From 1813a68457bb45b378d5bbec615b167deff3bcfc Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 20 Jul 2010 15:22:54 -0700
Subject: x86: Move alloc_desk_mask variables inside ifdef

They are only useful with CONFIG_CPUMASK_OFFSTACK
Avoids hundreds of warnings with a gcc 4.6 -Wall build.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c03243ad84b4..fff1d77c3753 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -439,12 +439,12 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 							bool boot)
 {
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	gfp_t gfp = GFP_ATOMIC;
 
 	if (boot)
 		gfp = GFP_NOWAIT;
 
-#ifdef CONFIG_CPUMASK_OFFSTACK
 	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
 		return false;
 
-- 
cgit v1.2.3


From e3239ff92a17976ac5d26fa0fe40ef3a9daf2523 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 14:06:41 +1000
Subject: memblock: Rename memblock_region to memblock_type and
 memblock_property to memblock_region

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mm/init.c                       |   2 +-
 arch/arm/plat-omap/fb.c                  |   2 +-
 arch/microblaze/mm/init.c                |   4 +-
 arch/powerpc/mm/hash_utils_64.c          |   2 +-
 arch/powerpc/mm/mem.c                    |  26 ++---
 arch/powerpc/platforms/embedded6xx/wii.c |   2 +-
 arch/sparc/mm/init_64.c                  |   6 +-
 drivers/video/omap2/vram.c               |   2 +-
 include/linux/memblock.h                 |  24 ++---
 mm/memblock.c                            | 168 +++++++++++++++----------------
 10 files changed, 118 insertions(+), 120 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 7185b00650fe..d1496e65dc2d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -237,7 +237,7 @@ static void __init arm_bootmem_free(struct meminfo *mi, unsigned long min,
 #ifndef CONFIG_SPARSEMEM
 int pfn_valid(unsigned long pfn)
 {
-	struct memblock_region *mem = &memblock.memory;
+	struct memblock_type *mem = &memblock.memory;
 	unsigned int left = 0, right = mem->cnt;
 
 	do {
diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c
index 0054b9501a53..05bf22827404 100644
--- a/arch/arm/plat-omap/fb.c
+++ b/arch/arm/plat-omap/fb.c
@@ -173,7 +173,7 @@ static int check_fbmem_region(int region_idx, struct omapfb_mem_region *rg,
 
 static int valid_sdram(unsigned long addr, unsigned long size)
 {
-	struct memblock_property res;
+	struct memblock_region res;
 
 	res.base = addr;
 	res.size = size;
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index db5934989926..afd6494fbbc6 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -77,8 +77,8 @@ void __init setup_memory(void)
 
 	/* Find main memory where is the kernel */
 	for (i = 0; i < memblock.memory.cnt; i++) {
-		memory_start = (u32) memblock.memory.region[i].base;
-		memory_end = (u32) memblock.memory.region[i].base
+		memory_start = (u32) memblock.memory.regions[i].base;
+		memory_end = (u32) memblock.memory.regions[i].base
 				+ (u32) memblock.memory.region[i].size;
 		if ((memory_start <= (u32)_text) &&
 					((u32)_text <= memory_end)) {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 09dffe6efa46..b1a3784744db 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -660,7 +660,7 @@ static void __init htab_initialize(void)
 
 	/* create bolted the linear mapping in the hash table */
 	for (i=0; i < memblock.memory.cnt; i++) {
-		base = (unsigned long)__va(memblock.memory.region[i].base);
+		base = (unsigned long)__va(memblock.memory.regions[i].base);
 		size = memblock.memory.region[i].size;
 
 		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a84a8d00005..a33f5c186fb7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -86,10 +86,10 @@ int page_is_ram(unsigned long pfn)
 	for (i=0; i < memblock.memory.cnt; i++) {
 		unsigned long base;
 
-		base = memblock.memory.region[i].base;
+		base = memblock.memory.regions[i].base;
 
 		if ((paddr >= base) &&
-			(paddr < (base + memblock.memory.region[i].size))) {
+			(paddr < (base + memblock.memory.regions[i].size))) {
 			return 1;
 		}
 	}
@@ -149,7 +149,7 @@ int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
-	struct memblock_property res;
+	struct memblock_region res;
 	unsigned long pfn, len;
 	u64 end;
 	int ret = -1;
@@ -206,7 +206,7 @@ void __init do_init_bootmem(void)
 	/* Add active regions with valid PFNs */
 	for (i = 0; i < memblock.memory.cnt; i++) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		add_active_range(0, start_pfn, end_pfn);
 	}
@@ -219,16 +219,16 @@ void __init do_init_bootmem(void)
 
 	/* reserve the sections we're already using */
 	for (i = 0; i < memblock.reserved.cnt; i++) {
-		unsigned long addr = memblock.reserved.region[i].base +
+		unsigned long addr = memblock.reserved.regions[i].base +
 				     memblock_size_bytes(&memblock.reserved, i) - 1;
 		if (addr < lowmem_end_addr)
-			reserve_bootmem(memblock.reserved.region[i].base,
+			reserve_bootmem(memblock.reserved.regions[i].base,
 					memblock_size_bytes(&memblock.reserved, i),
 					BOOTMEM_DEFAULT);
-		else if (memblock.reserved.region[i].base < lowmem_end_addr) {
+		else if (memblock.reserved.regions[i].base < lowmem_end_addr) {
 			unsigned long adjusted_size = lowmem_end_addr -
-				      memblock.reserved.region[i].base;
-			reserve_bootmem(memblock.reserved.region[i].base,
+				      memblock.reserved.regions[i].base;
+			reserve_bootmem(memblock.reserved.regions[i].base,
 					adjusted_size, BOOTMEM_DEFAULT);
 		}
 	}
@@ -237,7 +237,7 @@ void __init do_init_bootmem(void)
 
 	/* reserve the sections we're already using */
 	for (i = 0; i < memblock.reserved.cnt; i++)
-		reserve_bootmem(memblock.reserved.region[i].base,
+		reserve_bootmem(memblock.reserved.regions[i].base,
 				memblock_size_bytes(&memblock.reserved, i),
 				BOOTMEM_DEFAULT);
 
@@ -257,10 +257,10 @@ static int __init mark_nonram_nosave(void)
 
 	for (i = 0; i < memblock.memory.cnt - 1; i++) {
 		memblock_region_max_pfn =
-			(memblock.memory.region[i].base >> PAGE_SHIFT) +
-			(memblock.memory.region[i].size >> PAGE_SHIFT);
+			(memblock.memory.regions[i].base >> PAGE_SHIFT) +
+			(memblock.memory.regions[i].size >> PAGE_SHIFT);
 		memblock_next_region_start_pfn =
-			memblock.memory.region[i+1].base >> PAGE_SHIFT;
+			memblock.memory.regions[i+1].base >> PAGE_SHIFT;
 
 		if (memblock_region_max_pfn < memblock_next_region_start_pfn)
 			register_nosave_region(memblock_region_max_pfn,
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 5cdcc7c8d973..8450c29e9b2f 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x)
 
 void __init wii_memory_fixups(void)
 {
-	struct memblock_property *p = memblock.memory.region;
+	struct memblock_region *p = memblock.memory.region;
 
 	/*
 	 * This is part of a workaround to allow the use of two
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f0434513df15..16d8bee889ba 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -978,7 +978,7 @@ static void __init add_node_ranges(void)
 		unsigned long size = memblock_size_bytes(&memblock.memory, i);
 		unsigned long start, end;
 
-		start = memblock.memory.region[i].base;
+		start = memblock.memory.regions[i].base;
 		end = start + size;
 		while (start < end) {
 			unsigned long this_end;
@@ -1299,7 +1299,7 @@ static void __init bootmem_init_nonnuma(void)
 		if (!size)
 			continue;
 
-		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		add_active_range(0, start_pfn, end_pfn);
 	}
@@ -1339,7 +1339,7 @@ static void __init trim_reserved_in_node(int nid)
 	numadbg("  trim_reserved_in_node(%d)\n", nid);
 
 	for (i = 0; i < memblock.reserved.cnt; i++) {
-		unsigned long start = memblock.reserved.region[i].base;
+		unsigned long start = memblock.reserved.regions[i].base;
 		unsigned long size = memblock_size_bytes(&memblock.reserved, i);
 		unsigned long end = start + size;
 
diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c
index f6fdc2085f3e..0f2532bf0f04 100644
--- a/drivers/video/omap2/vram.c
+++ b/drivers/video/omap2/vram.c
@@ -554,7 +554,7 @@ void __init omap_vram_reserve_sdram_memblock(void)
 	size = PAGE_ALIGN(size);
 
 	if (paddr) {
-		struct memblock_property res;
+		struct memblock_region res;
 
 		res.base = paddr;
 		res.size = size;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index a59faf2b5edd..86e7daf742f2 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -18,22 +18,22 @@
 
 #define MAX_MEMBLOCK_REGIONS 128
 
-struct memblock_property {
+struct memblock_region {
 	u64 base;
 	u64 size;
 };
 
-struct memblock_region {
+struct memblock_type {
 	unsigned long cnt;
 	u64 size;
-	struct memblock_property region[MAX_MEMBLOCK_REGIONS+1];
+	struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1];
 };
 
 struct memblock {
 	unsigned long debug;
 	u64 rmo_size;
-	struct memblock_region memory;
-	struct memblock_region reserved;
+	struct memblock_type memory;
+	struct memblock_type reserved;
 };
 
 extern struct memblock memblock;
@@ -56,27 +56,27 @@ extern u64 memblock_end_of_DRAM(void);
 extern void __init memblock_enforce_memory_limit(u64 memory_limit);
 extern int __init memblock_is_reserved(u64 addr);
 extern int memblock_is_region_reserved(u64 base, u64 size);
-extern int memblock_find(struct memblock_property *res);
+extern int memblock_find(struct memblock_region *res);
 
 extern void memblock_dump_all(void);
 
 static inline u64
-memblock_size_bytes(struct memblock_region *type, unsigned long region_nr)
+memblock_size_bytes(struct memblock_type *type, unsigned long region_nr)
 {
-	return type->region[region_nr].size;
+	return type->regions[region_nr].size;
 }
 static inline u64
-memblock_size_pages(struct memblock_region *type, unsigned long region_nr)
+memblock_size_pages(struct memblock_type *type, unsigned long region_nr)
 {
 	return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT;
 }
 static inline u64
-memblock_start_pfn(struct memblock_region *type, unsigned long region_nr)
+memblock_start_pfn(struct memblock_type *type, unsigned long region_nr)
 {
-	return type->region[region_nr].base >> PAGE_SHIFT;
+	return type->regions[region_nr].base >> PAGE_SHIFT;
 }
 static inline u64
-memblock_end_pfn(struct memblock_region *type, unsigned long region_nr)
+memblock_end_pfn(struct memblock_type *type, unsigned long region_nr)
 {
 	return memblock_start_pfn(type, region_nr) +
 	       memblock_size_pages(type, region_nr);
diff --git a/mm/memblock.c b/mm/memblock.c
index 43840b305ecb..6f407ccf604e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -29,7 +29,7 @@ static int __init early_memblock(char *p)
 }
 early_param("memblock", early_memblock);
 
-static void memblock_dump(struct memblock_region *region, char *name)
+static void memblock_dump(struct memblock_type *region, char *name)
 {
 	unsigned long long base, size;
 	int i;
@@ -37,8 +37,8 @@ static void memblock_dump(struct memblock_region *region, char *name)
 	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
 
 	for (i = 0; i < region->cnt; i++) {
-		base = region->region[i].base;
-		size = region->region[i].size;
+		base = region->regions[i].base;
+		size = region->regions[i].size;
 
 		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
 		    name, i, base, base + size - 1, size);
@@ -74,34 +74,34 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
 	return 0;
 }
 
-static long memblock_regions_adjacent(struct memblock_region *rgn,
+static long memblock_regions_adjacent(struct memblock_type *type,
 		unsigned long r1, unsigned long r2)
 {
-	u64 base1 = rgn->region[r1].base;
-	u64 size1 = rgn->region[r1].size;
-	u64 base2 = rgn->region[r2].base;
-	u64 size2 = rgn->region[r2].size;
+	u64 base1 = type->regions[r1].base;
+	u64 size1 = type->regions[r1].size;
+	u64 base2 = type->regions[r2].base;
+	u64 size2 = type->regions[r2].size;
 
 	return memblock_addrs_adjacent(base1, size1, base2, size2);
 }
 
-static void memblock_remove_region(struct memblock_region *rgn, unsigned long r)
+static void memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
 	unsigned long i;
 
-	for (i = r; i < rgn->cnt - 1; i++) {
-		rgn->region[i].base = rgn->region[i + 1].base;
-		rgn->region[i].size = rgn->region[i + 1].size;
+	for (i = r; i < type->cnt - 1; i++) {
+		type->regions[i].base = type->regions[i + 1].base;
+		type->regions[i].size = type->regions[i + 1].size;
 	}
-	rgn->cnt--;
+	type->cnt--;
 }
 
 /* Assumption: base addr of region 1 < base addr of region 2 */
-static void memblock_coalesce_regions(struct memblock_region *rgn,
+static void memblock_coalesce_regions(struct memblock_type *type,
 		unsigned long r1, unsigned long r2)
 {
-	rgn->region[r1].size += rgn->region[r2].size;
-	memblock_remove_region(rgn, r2);
+	type->regions[r1].size += type->regions[r2].size;
+	memblock_remove_region(type, r2);
 }
 
 void __init memblock_init(void)
@@ -109,13 +109,13 @@ void __init memblock_init(void)
 	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
 	 * This simplifies the memblock_add() code below...
 	 */
-	memblock.memory.region[0].base = 0;
-	memblock.memory.region[0].size = 0;
+	memblock.memory.regions[0].base = 0;
+	memblock.memory.regions[0].size = 0;
 	memblock.memory.cnt = 1;
 
 	/* Ditto. */
-	memblock.reserved.region[0].base = 0;
-	memblock.reserved.region[0].size = 0;
+	memblock.reserved.regions[0].base = 0;
+	memblock.reserved.regions[0].size = 0;
 	memblock.reserved.cnt = 1;
 }
 
@@ -126,24 +126,24 @@ void __init memblock_analyze(void)
 	memblock.memory.size = 0;
 
 	for (i = 0; i < memblock.memory.cnt; i++)
-		memblock.memory.size += memblock.memory.region[i].size;
+		memblock.memory.size += memblock.memory.regions[i].size;
 }
 
-static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
+static long memblock_add_region(struct memblock_type *type, u64 base, u64 size)
 {
 	unsigned long coalesced = 0;
 	long adjacent, i;
 
-	if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
+	if ((type->cnt == 1) && (type->regions[0].size == 0)) {
+		type->regions[0].base = base;
+		type->regions[0].size = size;
 		return 0;
 	}
 
 	/* First try and coalesce this MEMBLOCK with another. */
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
+	for (i = 0; i < type->cnt; i++) {
+		u64 rgnbase = type->regions[i].base;
+		u64 rgnsize = type->regions[i].size;
 
 		if ((rgnbase == base) && (rgnsize == size))
 			/* Already have this region, so we're done */
@@ -151,61 +151,59 @@ static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
 
 		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
 		if (adjacent > 0) {
-			rgn->region[i].base -= size;
-			rgn->region[i].size += size;
+			type->regions[i].base -= size;
+			type->regions[i].size += size;
 			coalesced++;
 			break;
 		} else if (adjacent < 0) {
-			rgn->region[i].size += size;
+			type->regions[i].size += size;
 			coalesced++;
 			break;
 		}
 	}
 
-	if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
-		memblock_coalesce_regions(rgn, i, i+1);
+	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) {
+		memblock_coalesce_regions(type, i, i+1);
 		coalesced++;
 	}
 
 	if (coalesced)
 		return coalesced;
-	if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
+	if (type->cnt >= MAX_MEMBLOCK_REGIONS)
 		return -1;
 
 	/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
-	for (i = rgn->cnt - 1; i >= 0; i--) {
-		if (base < rgn->region[i].base) {
-			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].size = rgn->region[i].size;
+	for (i = type->cnt - 1; i >= 0; i--) {
+		if (base < type->regions[i].base) {
+			type->regions[i+1].base = type->regions[i].base;
+			type->regions[i+1].size = type->regions[i].size;
 		} else {
-			rgn->region[i+1].base = base;
-			rgn->region[i+1].size = size;
+			type->regions[i+1].base = base;
+			type->regions[i+1].size = size;
 			break;
 		}
 	}
 
-	if (base < rgn->region[0].base) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
+	if (base < type->regions[0].base) {
+		type->regions[0].base = base;
+		type->regions[0].size = size;
 	}
-	rgn->cnt++;
+	type->cnt++;
 
 	return 0;
 }
 
 long memblock_add(u64 base, u64 size)
 {
-	struct memblock_region *_rgn = &memblock.memory;
-
 	/* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
 	if (base == 0)
 		memblock.rmo_size = size;
 
-	return memblock_add_region(_rgn, base, size);
+	return memblock_add_region(&memblock.memory, base, size);
 
 }
 
-static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
+static long __memblock_remove(struct memblock_type *type, u64 base, u64 size)
 {
 	u64 rgnbegin, rgnend;
 	u64 end = base + size;
@@ -214,34 +212,34 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
 	rgnbegin = rgnend = 0; /* supress gcc warnings */
 
 	/* Find the region where (base, size) belongs to */
-	for (i=0; i < rgn->cnt; i++) {
-		rgnbegin = rgn->region[i].base;
-		rgnend = rgnbegin + rgn->region[i].size;
+	for (i=0; i < type->cnt; i++) {
+		rgnbegin = type->regions[i].base;
+		rgnend = rgnbegin + type->regions[i].size;
 
 		if ((rgnbegin <= base) && (end <= rgnend))
 			break;
 	}
 
 	/* Didn't find the region */
-	if (i == rgn->cnt)
+	if (i == type->cnt)
 		return -1;
 
 	/* Check to see if we are removing entire region */
 	if ((rgnbegin == base) && (rgnend == end)) {
-		memblock_remove_region(rgn, i);
+		memblock_remove_region(type, i);
 		return 0;
 	}
 
 	/* Check to see if region is matching at the front */
 	if (rgnbegin == base) {
-		rgn->region[i].base = end;
-		rgn->region[i].size -= size;
+		type->regions[i].base = end;
+		type->regions[i].size -= size;
 		return 0;
 	}
 
 	/* Check to see if the region is matching at the end */
 	if (rgnend == end) {
-		rgn->region[i].size -= size;
+		type->regions[i].size -= size;
 		return 0;
 	}
 
@@ -249,8 +247,8 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
 	 * We need to split the entry -  adjust the current one to the
 	 * beginging of the hole and add the region after hole.
 	 */
-	rgn->region[i].size = base - rgn->region[i].base;
-	return memblock_add_region(rgn, end, rgnend - end);
+	type->regions[i].size = base - type->regions[i].base;
+	return memblock_add_region(type, end, rgnend - end);
 }
 
 long memblock_remove(u64 base, u64 size)
@@ -265,25 +263,25 @@ long __init memblock_free(u64 base, u64 size)
 
 long __init memblock_reserve(u64 base, u64 size)
 {
-	struct memblock_region *_rgn = &memblock.reserved;
+	struct memblock_type *_rgn = &memblock.reserved;
 
 	BUG_ON(0 == size);
 
 	return memblock_add_region(_rgn, base, size);
 }
 
-long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size)
+long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size)
 {
 	unsigned long i;
 
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
+	for (i = 0; i < type->cnt; i++) {
+		u64 rgnbase = type->regions[i].base;
+		u64 rgnsize = type->regions[i].size;
 		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
 			break;
 	}
 
-	return (i < rgn->cnt) ? i : -1;
+	return (i < type->cnt) ? i : -1;
 }
 
 static u64 memblock_align_down(u64 addr, u64 size)
@@ -311,7 +309,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
 				base = ~(u64)0;
 			return base;
 		}
-		res_base = memblock.reserved.region[j].base;
+		res_base = memblock.reserved.regions[j].base;
 		if (res_base < size)
 			break;
 		base = memblock_align_down(res_base - size, align);
@@ -320,7 +318,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
 	return ~(u64)0;
 }
 
-static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
+static u64 __init memblock_alloc_nid_region(struct memblock_region *mp,
 				       u64 (*nid_range)(u64, u64, int *),
 				       u64 size, u64 align, int nid)
 {
@@ -350,7 +348,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
 u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
 			 u64 (*nid_range)(u64 start, u64 end, int *nid))
 {
-	struct memblock_region *mem = &memblock.memory;
+	struct memblock_type *mem = &memblock.memory;
 	int i;
 
 	BUG_ON(0 == size);
@@ -358,7 +356,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
 	size = memblock_align_up(size, align);
 
 	for (i = 0; i < mem->cnt; i++) {
-		u64 ret = memblock_alloc_nid_region(&mem->region[i],
+		u64 ret = memblock_alloc_nid_region(&mem->regions[i],
 					       nid_range,
 					       size, align, nid);
 		if (ret != ~(u64)0)
@@ -402,8 +400,8 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
 		max_addr = MEMBLOCK_REAL_LIMIT;
 
 	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
-		u64 memblockbase = memblock.memory.region[i].base;
-		u64 memblocksize = memblock.memory.region[i].size;
+		u64 memblockbase = memblock.memory.regions[i].base;
+		u64 memblocksize = memblock.memory.regions[i].size;
 
 		if (memblocksize < size)
 			continue;
@@ -423,7 +421,7 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
 					return 0;
 				return base;
 			}
-			res_base = memblock.reserved.region[j].base;
+			res_base = memblock.reserved.regions[j].base;
 			if (res_base < size)
 				break;
 			base = memblock_align_down(res_base - size, align);
@@ -442,7 +440,7 @@ u64 memblock_end_of_DRAM(void)
 {
 	int idx = memblock.memory.cnt - 1;
 
-	return (memblock.memory.region[idx].base + memblock.memory.region[idx].size);
+	return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
 }
 
 /* You must call memblock_analyze() after this. */
@@ -450,7 +448,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit)
 {
 	unsigned long i;
 	u64 limit;
-	struct memblock_property *p;
+	struct memblock_region *p;
 
 	if (!memory_limit)
 		return;
@@ -458,24 +456,24 @@ void __init memblock_enforce_memory_limit(u64 memory_limit)
 	/* Truncate the memblock regions to satisfy the memory limit. */
 	limit = memory_limit;
 	for (i = 0; i < memblock.memory.cnt; i++) {
-		if (limit > memblock.memory.region[i].size) {
-			limit -= memblock.memory.region[i].size;
+		if (limit > memblock.memory.regions[i].size) {
+			limit -= memblock.memory.regions[i].size;
 			continue;
 		}
 
-		memblock.memory.region[i].size = limit;
+		memblock.memory.regions[i].size = limit;
 		memblock.memory.cnt = i + 1;
 		break;
 	}
 
-	if (memblock.memory.region[0].size < memblock.rmo_size)
-		memblock.rmo_size = memblock.memory.region[0].size;
+	if (memblock.memory.regions[0].size < memblock.rmo_size)
+		memblock.rmo_size = memblock.memory.regions[0].size;
 
 	memory_limit = memblock_end_of_DRAM();
 
 	/* And truncate any reserves above the limit also. */
 	for (i = 0; i < memblock.reserved.cnt; i++) {
-		p = &memblock.reserved.region[i];
+		p = &memblock.reserved.regions[i];
 
 		if (p->base > memory_limit)
 			p->size = 0;
@@ -494,9 +492,9 @@ int __init memblock_is_reserved(u64 addr)
 	int i;
 
 	for (i = 0; i < memblock.reserved.cnt; i++) {
-		u64 upper = memblock.reserved.region[i].base +
-			memblock.reserved.region[i].size - 1;
-		if ((addr >= memblock.reserved.region[i].base) && (addr <= upper))
+		u64 upper = memblock.reserved.regions[i].base +
+			memblock.reserved.regions[i].size - 1;
+		if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper))
 			return 1;
 	}
 	return 0;
@@ -511,7 +509,7 @@ int memblock_is_region_reserved(u64 base, u64 size)
  * Given a <base, len>, find which memory regions belong to this range.
  * Adjust the request and return a contiguous chunk.
  */
-int memblock_find(struct memblock_property *res)
+int memblock_find(struct memblock_region *res)
 {
 	int i;
 	u64 rstart, rend;
@@ -520,8 +518,8 @@ int memblock_find(struct memblock_property *res)
 	rend = rstart + res->size - 1;
 
 	for (i = 0; i < memblock.memory.cnt; i++) {
-		u64 start = memblock.memory.region[i].base;
-		u64 end = start + memblock.memory.region[i].size - 1;
+		u64 start = memblock.memory.regions[i].base;
+		u64 end = start + memblock.memory.regions[i].size - 1;
 
 		if (start > rend)
 			return -1;
-- 
cgit v1.2.3


From 411a25a80da328f5ae6b6c037872ffe867fcc130 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:38:56 -0700
Subject: memblock: No reason to include asm/memblock.h late

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 86e7daf742f2..4b6931327b22 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -16,6 +16,8 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 
+#include <asm/memblock.h>
+
 #define MAX_MEMBLOCK_REGIONS 128
 
 struct memblock_region {
@@ -82,8 +84,6 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr)
 	       memblock_size_pages(type, region_nr);
 }
 
-#include <asm/memblock.h>
-
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From 72d4b0b4e0e7fa858767e03972771a9f7c02b689 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 14:38:47 +1000
Subject: memblock: Implement memblock_is_memory and memblock_is_region_memory

To make it fast, we steal ARM's binary search for memblock_is_memory()
and we use that to also the replace existing implementation of
memblock_is_reserved().

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  2 ++
 mm/memblock.c            | 42 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4b6931327b22..47bceb187058 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -56,6 +56,8 @@ extern u64 __init __memblock_alloc_base(u64 size,
 extern u64 __init memblock_phys_mem_size(void);
 extern u64 memblock_end_of_DRAM(void);
 extern void __init memblock_enforce_memory_limit(u64 memory_limit);
+extern int memblock_is_memory(u64 addr);
+extern int memblock_is_region_memory(u64 base, u64 size);
 extern int __init memblock_is_reserved(u64 addr);
 extern int memblock_is_region_reserved(u64 base, u64 size);
 extern int memblock_find(struct memblock_region *res);
diff --git a/mm/memblock.c b/mm/memblock.c
index 6f407ccf604e..aa88c62bce7f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -487,17 +487,43 @@ void __init memblock_enforce_memory_limit(u64 memory_limit)
 	}
 }
 
+static int memblock_search(struct memblock_type *type, u64 addr)
+{
+	unsigned int left = 0, right = type->cnt;
+
+	do {
+		unsigned int mid = (right + left) / 2;
+
+		if (addr < type->regions[mid].base)
+			right = mid;
+		else if (addr >= (type->regions[mid].base +
+				  type->regions[mid].size))
+			left = mid + 1;
+		else
+			return mid;
+	} while (left < right);
+	return -1;
+}
+
 int __init memblock_is_reserved(u64 addr)
 {
-	int i;
+	return memblock_search(&memblock.reserved, addr) != -1;
+}
 
-	for (i = 0; i < memblock.reserved.cnt; i++) {
-		u64 upper = memblock.reserved.regions[i].base +
-			memblock.reserved.regions[i].size - 1;
-		if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper))
-			return 1;
-	}
-	return 0;
+int memblock_is_memory(u64 addr)
+{
+	return memblock_search(&memblock.memory, addr) != -1;
+}
+
+int memblock_is_region_memory(u64 base, u64 size)
+{
+	int idx = memblock_search(&memblock.reserved, base);
+
+	if (idx == -1)
+		return 0;
+	return memblock.reserved.regions[idx].base <= base &&
+		(memblock.reserved.regions[idx].base +
+		 memblock.reserved.regions[idx].size) >= (base + size);
 }
 
 int memblock_is_region_reserved(u64 base, u64 size)
-- 
cgit v1.2.3


From 5b385f259fa4d356452e3b4729cbaf5213f4f55b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 13:40:38 +1000
Subject: memblock: Introduce for_each_memblock() and new accessors

Walk memblock's using for_each_memblock() and use memblock_region_base/end_pfn() for
getting to PFNs.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 47bceb187058..c914112cd24f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -64,6 +64,7 @@ extern int memblock_find(struct memblock_region *res);
 
 extern void memblock_dump_all(void);
 
+/* Obsolete accessors */
 static inline u64
 memblock_size_bytes(struct memblock_type *type, unsigned long region_nr)
 {
@@ -86,6 +87,57 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr)
 	       memblock_size_pages(type, region_nr);
 }
 
+/*
+ * pfn conversion functions
+ *
+ * While the memory MEMBLOCKs should always be page aligned, the reserved
+ * MEMBLOCKs may not be. This accessor attempt to provide a very clear
+ * idea of what they return for such non aligned MEMBLOCKs.
+ */
+
+/**
+ * memblock_region_base_pfn - Return the lowest pfn intersecting with the region
+ * @reg: memblock_region structure
+ */
+static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg)
+{
+	return reg->base >> PAGE_SHIFT;
+}
+
+/**
+ * memblock_region_last_pfn - Return the highest pfn intersecting with the region
+ * @reg: memblock_region structure
+ */
+static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg)
+{
+	return (reg->base + reg->size - 1) >> PAGE_SHIFT;
+}
+
+/**
+ * memblock_region_end_pfn - Return the pfn of the first page following the region
+ *                      but not intersecting it
+ * @reg: memblock_region structure
+ */
+static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg)
+{
+	return memblock_region_last_pfn(reg) + 1;
+}
+
+/**
+ * memblock_region_pages - Return the number of pages covering a region
+ * @reg: memblock_region structure
+ */
+static inline unsigned long memblock_region_pages(const struct memblock_region *reg)
+{
+	return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg);
+}
+
+#define for_each_memblock(memblock_type, region)					\
+	for (region = memblock.memblock_type.regions;				\
+	     region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);	\
+	     region++)
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From 1e2b904026e9debf95f500b8980a00c43ac0f31c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 13:52:25 +1000
Subject: memblock: Remove obsolete accessors

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c914112cd24f..7d70fdd43db4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -64,29 +64,6 @@ extern int memblock_find(struct memblock_region *res);
 
 extern void memblock_dump_all(void);
 
-/* Obsolete accessors */
-static inline u64
-memblock_size_bytes(struct memblock_type *type, unsigned long region_nr)
-{
-	return type->regions[region_nr].size;
-}
-static inline u64
-memblock_size_pages(struct memblock_type *type, unsigned long region_nr)
-{
-	return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT;
-}
-static inline u64
-memblock_start_pfn(struct memblock_type *type, unsigned long region_nr)
-{
-	return type->regions[region_nr].base >> PAGE_SHIFT;
-}
-static inline u64
-memblock_end_pfn(struct memblock_type *type, unsigned long region_nr)
-{
-	return memblock_start_pfn(type, region_nr) +
-	       memblock_size_pages(type, region_nr);
-}
-
 /*
  * pfn conversion functions
  *
-- 
cgit v1.2.3


From b693fffb189fbfe7e1e8317ce5838808be8666a0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 13:52:55 +1000
Subject: memblock: Remove memblock_find()

Nobody uses it anymore. It's semantics were ... weird

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  1 -
 mm/memblock.c            | 32 --------------------------------
 2 files changed, 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7d70fdd43db4..776c7d945dcc 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -60,7 +60,6 @@ extern int memblock_is_memory(u64 addr);
 extern int memblock_is_region_memory(u64 base, u64 size);
 extern int __init memblock_is_reserved(u64 addr);
 extern int memblock_is_region_reserved(u64 base, u64 size);
-extern int memblock_find(struct memblock_region *res);
 
 extern void memblock_dump_all(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index aa88c62bce7f..8a118b71cbec 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -531,35 +531,3 @@ int memblock_is_region_reserved(u64 base, u64 size)
 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
 }
 
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int memblock_find(struct memblock_region *res)
-{
-	int i;
-	u64 rstart, rend;
-
-	rstart = res->base;
-	rend = rstart + res->size - 1;
-
-	for (i = 0; i < memblock.memory.cnt; i++) {
-		u64 start = memblock.memory.regions[i].base;
-		u64 end = start + memblock.memory.regions[i].size - 1;
-
-		if (start > rend)
-			return -1;
-
-		if ((end >= rstart) && (start < rend)) {
-			/* adjust the request */
-			if (rstart < start)
-				rstart = start;
-			if (rend > end)
-				rend = end;
-			res->base = rstart;
-			res->size = rend - rstart + 1;
-			return 0;
-		}
-	}
-	return -1;
-}
-- 
cgit v1.2.3


From 35a1f0bd07015dde66501b47cfb6ddc72ebe7346 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:38:58 -0700
Subject: memblock: Remove nid_range argument, arch provides
 memblock_nid_range() instead

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/sparc/mm/init_64.c  | 16 ++++++----------
 include/linux/memblock.h |  7 +++++--
 mm/memblock.c            | 13 ++++++++-----
 3 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index dd68025ecdbe..0883113624b9 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -785,8 +785,7 @@ static int find_node(unsigned long addr)
 	return -1;
 }
 
-static unsigned long long nid_range(unsigned long long start,
-				    unsigned long long end, int *nid)
+u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = find_node(start);
 	start += PAGE_SIZE;
@@ -804,8 +803,7 @@ static unsigned long long nid_range(unsigned long long start,
 	return start;
 }
 #else
-static unsigned long long nid_range(unsigned long long start,
-				    unsigned long long end, int *nid)
+u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = 0;
 	return end;
@@ -822,8 +820,7 @@ static void __init allocate_node_data(int nid)
 	struct pglist_data *p;
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-	paddr = memblock_alloc_nid(sizeof(struct pglist_data),
-			      SMP_CACHE_BYTES, nid, nid_range);
+	paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
 	if (!paddr) {
 		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
 		prom_halt();
@@ -843,8 +840,7 @@ static void __init allocate_node_data(int nid)
 	if (p->node_spanned_pages) {
 		num_pages = bootmem_bootmap_pages(p->node_spanned_pages);
 
-		paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid,
-				      nid_range);
+		paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid);
 		if (!paddr) {
 			prom_printf("Cannot allocate bootmap for nid[%d]\n",
 				  nid);
@@ -984,7 +980,7 @@ static void __init add_node_ranges(void)
 			unsigned long this_end;
 			int nid;
 
-			this_end = nid_range(start, end, &nid);
+			this_end = memblock_nid_range(start, end, &nid);
 
 			numadbg("Adding active range nid[%d] "
 				"start[%lx] end[%lx]\n",
@@ -1317,7 +1313,7 @@ static void __init reserve_range_in_node(int nid, unsigned long start,
 		unsigned long this_end;
 		int n;
 
-		this_end = nid_range(start, end, &n);
+		this_end = memblock_nid_range(start, end, &n);
 		if (n == nid) {
 			numadbg("      MATCH reserving range [%lx:%lx]\n",
 				start, this_end);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 776c7d945dcc..367dea6e95a0 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -46,8 +46,7 @@ extern long memblock_add(u64 base, u64 size);
 extern long memblock_remove(u64 base, u64 size);
 extern long __init memblock_free(u64 base, u64 size);
 extern long __init memblock_reserve(u64 base, u64 size);
-extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
-				u64 (*nid_range)(u64, u64, int *));
+extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid);
 extern u64 __init memblock_alloc(u64 size, u64 align);
 extern u64 __init memblock_alloc_base(u64 size,
 		u64, u64 max_addr);
@@ -63,6 +62,10 @@ extern int memblock_is_region_reserved(u64 base, u64 size);
 
 extern void memblock_dump_all(void);
 
+/* Provided by the architecture */
+extern u64 memblock_nid_range(u64 start, u64 end, int *nid);
+
+
 /*
  * pfn conversion functions
  *
diff --git a/mm/memblock.c b/mm/memblock.c
index 8a118b71cbec..13807f280ada 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -319,7 +319,6 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
 }
 
 static u64 __init memblock_alloc_nid_region(struct memblock_region *mp,
-				       u64 (*nid_range)(u64, u64, int *),
 				       u64 size, u64 align, int nid)
 {
 	u64 start, end;
@@ -332,7 +331,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp,
 		u64 this_end;
 		int this_nid;
 
-		this_end = nid_range(start, end, &this_nid);
+		this_end = memblock_nid_range(start, end, &this_nid);
 		if (this_nid == nid) {
 			u64 ret = memblock_alloc_nid_unreserved(start, this_end,
 							   size, align);
@@ -345,8 +344,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp,
 	return ~(u64)0;
 }
 
-u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
-			 u64 (*nid_range)(u64 start, u64 end, int *nid))
+u64 __init memblock_alloc_nid(u64 size, u64 align, int nid)
 {
 	struct memblock_type *mem = &memblock.memory;
 	int i;
@@ -357,7 +355,6 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
 
 	for (i = 0; i < mem->cnt; i++) {
 		u64 ret = memblock_alloc_nid_region(&mem->regions[i],
-					       nid_range,
 					       size, align, nid);
 		if (ret != ~(u64)0)
 			return ret;
@@ -531,3 +528,9 @@ int memblock_is_region_reserved(u64 base, u64 size)
 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
 }
 
+u64 __weak memblock_nid_range(u64 start, u64 end, int *nid)
+{
+	*nid = 0;
+
+	return end;
+}
-- 
cgit v1.2.3


From 27f574c223d2c09610058b3ec7a29582d63a3e06 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:00 -0700
Subject: memblock: Expose MEMBLOCK_ALLOC_ANYWHERE

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_utils_64.c | 2 +-
 include/linux/memblock.h        | 1 +
 mm/memblock.c                   | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 4072b871497d..a542ff5ec8a9 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -625,7 +625,7 @@ static void __init htab_initialize(void)
 		if (machine_is(cell))
 			limit = 0x80000000;
 		else
-			limit = 0;
+			limit = MEMBLOCK_ALLOC_ANYWHERE;
 
 		table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 367dea6e95a0..3cf3304e901d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -50,6 +50,7 @@ extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid);
 extern u64 __init memblock_alloc(u64 size, u64 align);
 extern u64 __init memblock_alloc_base(u64 size,
 		u64, u64 max_addr);
+#define MEMBLOCK_ALLOC_ANYWHERE	0
 extern u64 __init __memblock_alloc_base(u64 size,
 		u64 align, u64 max_addr);
 extern u64 __init memblock_phys_mem_size(void);
diff --git a/mm/memblock.c b/mm/memblock.c
index e264e8c70892..0131684c42f8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -15,8 +15,6 @@
 #include <linux/bitops.h>
 #include <linux/memblock.h>
 
-#define MEMBLOCK_ALLOC_ANYWHERE	0
-
 struct memblock memblock;
 
 static int memblock_debug;
-- 
cgit v1.2.3


From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:01 -0700
Subject: memblock: Introduce default allocation limit and use it to replace
 explicit ones

This introduce memblock.current_limit which is used to limit allocations
from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE).

The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still
be used with memblock_alloc_base() to allocate really anywhere.

It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears.

Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I
strongly recommend that you ensure that you set an appropriate limit
during boot in order to guarantee that an memblock_alloc() at any time
results in something that is accessible with a simple __va().

The reason is that a subsequent patch will introduce the ability for
the array to resize itself by reallocating itself. The MEMBLOCK core will
honor the current limit when performing those allocations.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/microblaze/include/asm/memblock.h |  3 ---
 arch/powerpc/include/asm/memblock.h    |  7 -------
 arch/powerpc/kernel/prom.c             | 20 +++++++++++++++++++-
 arch/powerpc/kernel/setup_32.c         |  2 +-
 arch/powerpc/mm/40x_mmu.c              |  5 +++--
 arch/powerpc/mm/fsl_booke_mmu.c        |  3 ++-
 arch/powerpc/mm/hash_utils_64.c        |  3 ++-
 arch/powerpc/mm/init_32.c              | 29 +++++++----------------------
 arch/powerpc/mm/ppc_mmu_32.c           |  3 +--
 arch/powerpc/mm/tlb_nohash.c           |  2 ++
 arch/sh/include/asm/memblock.h         |  2 --
 arch/sparc/include/asm/memblock.h      |  2 --
 include/linux/memblock.h               | 16 +++++++++++++++-
 mm/memblock.c                          | 19 +++++++++++--------
 14 files changed, 63 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
index f9c2fa331d2a..20a8e257c77f 100644
--- a/arch/microblaze/include/asm/memblock.h
+++ b/arch/microblaze/include/asm/memblock.h
@@ -9,9 +9,6 @@
 #ifndef _ASM_MICROBLAZE_MEMBLOCK_H
 #define _ASM_MICROBLAZE_MEMBLOCK_H
 
-/* MEMBLOCK limit is OFF */
-#define MEMBLOCK_REAL_LIMIT	0xFFFFFFFF
-
 #endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
 
 
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
index 3c29728b56b1..43efc345065e 100644
--- a/arch/powerpc/include/asm/memblock.h
+++ b/arch/powerpc/include/asm/memblock.h
@@ -5,11 +5,4 @@
 
 #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
 
-#ifdef CONFIG_PPC32
-extern phys_addr_t lowmem_end_addr;
-#define MEMBLOCK_REAL_LIMIT	lowmem_end_addr
-#else
-#define MEMBLOCK_REAL_LIMIT	0
-#endif
-
 #endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fed9bf6187d1..3aec0b980f6a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -98,7 +98,7 @@ static void __init move_device_tree(void)
 
 	if ((memory_limit && (start + size) > memory_limit) ||
 			overlaps_crashkernel(start, size)) {
-		p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size));
+		p = __va(memblock_alloc(size, PAGE_SIZE));
 		memcpy(p, initial_boot_params, size);
 		initial_boot_params = (struct boot_param_header *)p;
 		DBG("Moved device tree to 0x%p\n", p);
@@ -655,6 +655,21 @@ static void __init phyp_dump_reserve_mem(void)
 static inline void __init phyp_dump_reserve_mem(void) {}
 #endif /* CONFIG_PHYP_DUMP  && CONFIG_PPC_RTAS */
 
+static void set_boot_memory_limit(void)
+{
+#ifdef CONFIG_PPC32
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		memblock_set_current_limit(0x01000000);
+	/* 8xx can only access 8MB at the moment */
+	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
+		memblock_set_current_limit(0x00800000);
+	else
+		memblock_set_current_limit(0x10000000);
+#else
+	memblock_set_current_limit(memblock.rmo_size);
+#endif
+}
 
 void __init early_init_devtree(void *params)
 {
@@ -683,6 +698,7 @@ void __init early_init_devtree(void *params)
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
 	memblock_init();
+
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
@@ -718,6 +734,8 @@ void __init early_init_devtree(void *params)
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
 
+	set_boot_memory_limit();
+
 	/* We may need to relocate the flat tree, do it now.
 	 * FIXME .. and the initrd too? */
 	move_device_tree();
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a10ffc85ada7..b7eb1ded3b5f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -246,7 +246,7 @@ static void __init irqstack_early_init(void)
 	unsigned int i;
 
 	/* interrupt stacks must be in lowmem, we get that for free on ppc32
-	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
+	 * as the memblock is limited to lowmem by default */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 1dc2fa5ce1bd..58969b51f454 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -47,6 +48,7 @@
 #include <asm/bootx.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
+
 #include "mmu_decl.h"
 
 extern int __map_without_ltlbs;
@@ -139,8 +141,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	 * coverage with normal-sized pages (or other reasons) do not
 	 * attempt to allocate outside the allowed range.
 	 */
-
-	__initial_memory_limit_addr = memstart_addr + mapped;
+	memblock_set_current_limit(memstart_addr + mapped);
 
 	return mapped;
 }
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index cdc7526e9c93..e525f862d759 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -40,6 +40,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -212,5 +213,5 @@ void __init adjust_total_lowmem(void)
 	pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
 	        (unsigned int)((total_lowmem - __max_low_memory) >> 20));
 
-	__initial_memory_limit_addr = memstart_addr + __max_low_memory;
+	memblock_set_current_limit(memstart_addr + __max_low_memory);
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index a542ff5ec8a9..b05890e23813 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -696,7 +696,8 @@ static void __init htab_initialize(void)
 #endif /* CONFIG_U3_DART */
 		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
 				prot, mmu_linear_psize, mmu_kernel_ssize));
-       }
+	}
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 
 	/*
 	 * If we have a memory_limit and we've allocated TCEs then we need to
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6a6975dc2654..59b208b7ec6f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -91,12 +91,6 @@ int __allow_ioremap_reserved;
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
-/*
- * address of the limit of what is accessible with initial MMU setup -
- * 256MB usually, but only 16MB on 601.
- */
-phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000;
-
 /*
  * Check for command-line options that affect what MMU_init will do.
  */
@@ -126,13 +120,6 @@ void __init MMU_init(void)
 	if (ppc_md.progress)
 		ppc_md.progress("MMU:enter", 0x111);
 
-	/* 601 can only access 16MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		__initial_memory_limit_addr = 0x01000000;
-	/* 8xx can only access 8MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
-		__initial_memory_limit_addr = 0x00800000;
-
 	/* parse args from command line */
 	MMU_setup();
 
@@ -190,20 +177,18 @@ void __init MMU_init(void)
 #ifdef CONFIG_BOOTX_TEXT
 	btext_unmap();
 #endif
+
+	/* Shortly after that, the entire linear mapping will be available */
+	memblock_set_current_limit(lowmem_end_addr);
 }
 
 /* This is only called until mem_init is done. */
 void __init *early_get_page(void)
 {
-	void *p;
-
-	if (init_bootmem_done) {
-		p = alloc_bootmem_pages(PAGE_SIZE);
-	} else {
-		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
-					__initial_memory_limit_addr));
-	}
-	return p;
+	if (init_bootmem_done)
+		return alloc_bootmem_pages(PAGE_SIZE);
+	else
+		return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
 }
 
 /* Free up now-unused memory */
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f8a01829d64f..7d34e170e80f 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -223,8 +223,7 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(memblock_alloc_base(Hash_size, Hash_size,
-				   __initial_memory_limit_addr));
+	Hash = __va(memblock_alloc(Hash_size, Hash_size));
 	cacheable_memzero(Hash, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index d8695b02a968..7ba32e762990 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -432,6 +432,8 @@ static void __early_init_mmu(int boot_cpu)
 	 * the MMU configuration
 	 */
 	mb();
+
+	memblock_set_current_limit(linear_map_top);
 }
 
 void __init early_init_mmu(void)
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
index dfe683b88075..e87063fad2ea 100644
--- a/arch/sh/include/asm/memblock.h
+++ b/arch/sh/include/asm/memblock.h
@@ -1,6 +1,4 @@
 #ifndef __ASM_SH_MEMBLOCK_H
 #define __ASM_SH_MEMBLOCK_H
 
-#define MEMBLOCK_REAL_LIMIT	0
-
 #endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
index f12af880649b..c67b047ef85e 100644
--- a/arch/sparc/include/asm/memblock.h
+++ b/arch/sparc/include/asm/memblock.h
@@ -5,6 +5,4 @@
 
 #define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
 
-#define MEMBLOCK_REAL_LIMIT	0
-
 #endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3cf3304e901d..c4f6e53264ed 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -34,6 +34,7 @@ struct memblock_type {
 struct memblock {
 	unsigned long debug;
 	u64 rmo_size;
+	u64 current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
 };
@@ -46,11 +47,16 @@ extern long memblock_add(u64 base, u64 size);
 extern long memblock_remove(u64 base, u64 size);
 extern long __init memblock_free(u64 base, u64 size);
 extern long __init memblock_reserve(u64 base, u64 size);
+
 extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid);
 extern u64 __init memblock_alloc(u64 size, u64 align);
+
+/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
+#define MEMBLOCK_ALLOC_ANYWHERE	(~(u64)0)
+#define MEMBLOCK_ALLOC_ACCESSIBLE	0
+
 extern u64 __init memblock_alloc_base(u64 size,
 		u64, u64 max_addr);
-#define MEMBLOCK_ALLOC_ANYWHERE	0
 extern u64 __init __memblock_alloc_base(u64 size,
 		u64 align, u64 max_addr);
 extern u64 __init memblock_phys_mem_size(void);
@@ -66,6 +72,14 @@ extern void memblock_dump_all(void);
 /* Provided by the architecture */
 extern u64 memblock_nid_range(u64 start, u64 end, int *nid);
 
+/**
+ * memblock_set_current_limit - Set the current allocation limit to allow
+ *                         limiting allocations to what is currently
+ *                         accessible during boot
+ * @limit: New limit value (physical address)
+ */
+extern void memblock_set_current_limit(u64 limit);
+
 
 /*
  * pfn conversion functions
diff --git a/mm/memblock.c b/mm/memblock.c
index 0131684c42f8..770c5bfac2cd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -115,6 +115,8 @@ void __init memblock_init(void)
 	memblock.reserved.regions[0].base = 0;
 	memblock.reserved.regions[0].size = 0;
 	memblock.reserved.cnt = 1;
+
+	memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
 }
 
 void __init memblock_analyze(void)
@@ -373,7 +375,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid)
 
 u64 __init memblock_alloc(u64 size, u64 align)
 {
-	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
 u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
@@ -399,14 +401,9 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
 
 	size = memblock_align_up(size, align);
 
-	/* On some platforms, make sure we allocate lowmem */
-	/* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */
-	if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
-		max_addr = MEMBLOCK_REAL_LIMIT;
-
 	/* Pump up max_addr */
-	if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
-		max_addr = ~(u64)0;
+	if (max_addr == MEMBLOCK_ALLOC_ACCESSIBLE)
+		max_addr = memblock.current_limit;
 
 	/* We do a top-down search, this tends to limit memory
 	 * fragmentation by keeping early boot allocs near the
@@ -527,3 +524,9 @@ int memblock_is_region_reserved(u64 base, u64 size)
 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
 }
 
+
+void __init memblock_set_current_limit(u64 limit)
+{
+	memblock.current_limit = limit;
+}
+
-- 
cgit v1.2.3


From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:02 -0700
Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs

The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact
server ppc64 though I hijack it on embedded ppc64 for similar purposes)
and represents the area of memory that can be accessed in real mode
(aka with MMU off), or on embedded, from the exception vectors (which
is bolted in the TLB) which pretty much boils down to the same thing.

We take that out of the generic MEMBLOCK data structure and move it into
arch/powerpc where it belongs, renaming it to "RMA" while at it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu.h  | 12 ++++++++++++
 arch/powerpc/kernel/head_40x.S  |  6 +-----
 arch/powerpc/kernel/paca.c      |  2 +-
 arch/powerpc/kernel/prom.c      | 29 ++++++++---------------------
 arch/powerpc/kernel/rtas.c      |  2 +-
 arch/powerpc/kernel/setup_64.c  |  2 +-
 arch/powerpc/mm/40x_mmu.c       | 14 +++++++++++++-
 arch/powerpc/mm/44x_mmu.c       | 14 ++++++++++++++
 arch/powerpc/mm/fsl_booke_mmu.c |  9 +++++++++
 arch/powerpc/mm/hash_utils_64.c | 22 +++++++++++++++++++++-
 arch/powerpc/mm/init_32.c       | 14 ++++++++++++++
 arch/powerpc/mm/init_64.c       |  1 +
 arch/powerpc/mm/ppc_mmu_32.c    | 15 +++++++++++++++
 arch/powerpc/mm/tlb_nohash.c    | 14 ++++++++++++++
 include/linux/memblock.h        |  1 -
 mm/memblock.c                   |  8 --------
 16 files changed, 125 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 7ebf42ed84a2..bb40a06d3b77 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -2,6 +2,8 @@
 #define _ASM_POWERPC_MMU_H_
 #ifdef __KERNEL__
 
+#include <linux/types.h>
+
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 
@@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 extern void early_init_mmu(void);
 extern void early_init_mmu_secondary(void);
 
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size);
+
+#ifdef CONFIG_PPC64
+/* This is our real memory area size on ppc64 server, on embedded, we
+ * make it match the size our of bolted TLB area
+ */
+extern u64 ppc64_rma_size;
+#endif /* CONFIG_PPC64 */
+
 #endif /* !__ASSEMBLY__ */
 
 /* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a90625f9b485..8278e8bad5a0 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -923,11 +923,7 @@ initial_mmu:
 	mtspr	SPRN_PID,r0
 	sync
 
-	/* Configure and load two entries into TLB slots 62 and 63.
-	 * In case we are pinning TLBs, these are reserved in by the
-	 * other TLB functions.  If not reserving, then it doesn't
-	 * matter where they are loaded.
-	 */
+	/* Configure and load one entry into TLB slots 63 */
 	clrrwi	r4,r4,10		/* Mask off the real page number */
 	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 139a773853f4..b9ffd7deeed7 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -117,7 +117,7 @@ void __init allocate_pacas(void)
 	 * the first segment. On iSeries they must be within the area mapped
 	 * by the HV, which is HvPagesToMap * HVPAGESIZE bytes.
 	 */
-	limit = min(0x10000000ULL, memblock.rmo_size);
+	limit = min(0x10000000ULL, ppc64_rma_size);
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 3aec0b980f6a..c3c6a8857544 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -66,6 +66,7 @@
 int __initdata iommu_is_off;
 int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
+u64 ppc64_rma_size;
 #endif
 
 static int __init early_parse_mem(char *p)
@@ -492,7 +493,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PPC64
 	if (iommu_is_off) {
 		if (base >= 0x80000000ul)
 			return;
@@ -501,9 +502,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	}
 #endif
 
-	memblock_add(base, size);
-
+	/* First MEMBLOCK added, do some special initializations */
+	if (memstart_addr == ~(phys_addr_t)0)
+		setup_initial_memory_limit(base, size);
 	memstart_addr = min((u64)memstart_addr, base);
+
+	/* Add the chunk to the MEMBLOCK list */
+	memblock_add(base, size);
 }
 
 u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
@@ -655,22 +660,6 @@ static void __init phyp_dump_reserve_mem(void)
 static inline void __init phyp_dump_reserve_mem(void) {}
 #endif /* CONFIG_PHYP_DUMP  && CONFIG_PPC_RTAS */
 
-static void set_boot_memory_limit(void)
-{
-#ifdef CONFIG_PPC32
-	/* 601 can only access 16MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		memblock_set_current_limit(0x01000000);
-	/* 8xx can only access 8MB at the moment */
-	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
-		memblock_set_current_limit(0x00800000);
-	else
-		memblock_set_current_limit(0x10000000);
-#else
-	memblock_set_current_limit(memblock.rmo_size);
-#endif
-}
-
 void __init early_init_devtree(void *params)
 {
 	phys_addr_t limit;
@@ -734,8 +723,6 @@ void __init early_init_devtree(void *params)
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
 
-	set_boot_memory_limit();
-
 	/* We may need to relocate the flat tree, do it now.
 	 * FIXME .. and the initrd too? */
 	move_device_tree();
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d0516dbee762..1662777be5dd 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -934,7 +934,7 @@ void __init rtas_initialize(void)
 	 */
 #ifdef CONFIG_PPC64
 	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
-		rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX);
+		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d135f93cb0f6..4360944b60f0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -487,7 +487,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), memblock.rmo_size);
+	limit = min(slb0_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 58969b51f454..5810967511d4 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -141,7 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	 * coverage with normal-sized pages (or other reasons) do not
 	 * attempt to allocate outside the allowed range.
 	 */
-	memblock_set_current_limit(memstart_addr + mapped);
+	memblock_set_current_limit(mapped);
 
 	return mapped;
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 40x can only access 16MB at the moment (see head_40x.S) */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index d8c6efb32bc6..024acab588fd 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/init.h>
+#include <linux/memblock.h>
+
 #include <asm/mmu.h>
 #include <asm/system.h>
 #include <asm/page.h>
@@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	return total_lowmem;
 }
 
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 44x has a 256M TLB entry pinned at boot */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE));
+}
+
 #ifdef CONFIG_SMP
 void __cpuinit mmu_init_secondary(int cpu)
 {
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index e525f862d759..0be8fe24c54e 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -215,3 +215,12 @@ void __init adjust_total_lowmem(void)
 
 	memblock_set_current_limit(memstart_addr + __max_low_memory);
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	phys_addr_t limit = first_memblock_base + first_memblock_size;
+
+	/* 64M mapped initially according to head_fsl_booke.S */
+	memblock_set_current_limit(min_t(u64, limit, 0x04000000));
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b05890e23813..83f534d862db 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -649,7 +649,7 @@ static void __init htab_initialize(void)
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
-						    1, memblock.rmo_size));
+						    1, ppc64_rma_size));
 	memset(linear_map_hash_slots, 0, linear_map_hash_count);
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
@@ -1248,3 +1248,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	local_irq_restore(flags);
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* On LPAR systems, the first entry is our RMA region,
+	 * non-LPAR 64-bit hash MMU systems don't have a limitation
+	 * on real mode access, but using the first entry works well
+	 * enough. We also clamp it to 1G to avoid some funky things
+	 * such as RTAS bugs etc...
+	 */
+	ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(ppc64_rma_size);
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 59b208b7ec6f..742da43b4ab6 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -237,3 +237,17 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
+
+#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 8xx can only access 8MB at the moment */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
+#endif /* CONFIG_8xx */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 71f1415e2472..9e081ffbf0f2 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -328,3 +328,4 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 7d34e170e80f..11571e118831 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -271,3 +271,18 @@ void __init MMU_init_hw(void)
 
 	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
+	else /* Anything else has 256M mapped */
+		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 7ba32e762990..a086ed562606 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -446,4 +446,18 @@ void __cpuinit early_init_mmu_secondary(void)
 	__early_init_mmu(0);
 }
 
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* On Embedded 64-bit, we adjust the RMA size to match
+	 * the bolted TLB entry. We know for now that only 1G
+	 * entries are supported though that may eventually
+	 * change. We crop it to the size of the first MEMBLOCK to
+	 * avoid going over total available memory just in case...
+	 */
+	ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size);
+}
 #endif /* CONFIG_PPC64 */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c4f6e53264ed..71b8edc6ede8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -33,7 +33,6 @@ struct memblock_type {
 
 struct memblock {
 	unsigned long debug;
-	u64 rmo_size;
 	u64 current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
diff --git a/mm/memblock.c b/mm/memblock.c
index 770c5bfac2cd..73d903ebf3d4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -49,7 +49,6 @@ void memblock_dump_all(void)
 		return;
 
 	pr_info("MEMBLOCK configuration:\n");
-	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)memblock.rmo_size);
 	pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
 
 	memblock_dump(&memblock.memory, "memory");
@@ -195,10 +194,6 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size)
 
 long memblock_add(u64 base, u64 size)
 {
-	/* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
-	if (base == 0)
-		memblock.rmo_size = size;
-
 	return memblock_add_region(&memblock.memory, base, size);
 
 }
@@ -459,9 +454,6 @@ void __init memblock_enforce_memory_limit(u64 memory_limit)
 		break;
 	}
 
-	if (memblock.memory.regions[0].size < memblock.rmo_size)
-		memblock.rmo_size = memblock.memory.regions[0].size;
-
 	memory_limit = memblock_end_of_DRAM();
 
 	/* And truncate any reserves above the limit also. */
-- 
cgit v1.2.3


From 2898cc4cdf208f15246b7a1c6951d2b126a70fd6 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 13:34:42 +1000
Subject: memblock: Change u64 to phys_addr_t

Let's not waste space and cycles on archs that don't support >32-bit
physical address space.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  48 +++++++++----------
 mm/memblock.c            | 118 ++++++++++++++++++++++++-----------------------
 2 files changed, 84 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 71b8edc6ede8..b65045a4ed08 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -21,19 +21,19 @@
 #define MAX_MEMBLOCK_REGIONS 128
 
 struct memblock_region {
-	u64 base;
-	u64 size;
+	phys_addr_t base;
+	phys_addr_t size;
 };
 
 struct memblock_type {
 	unsigned long cnt;
-	u64 size;
+	phys_addr_t size;
 	struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1];
 };
 
 struct memblock {
 	unsigned long debug;
-	u64 current_limit;
+	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
 };
@@ -42,34 +42,34 @@ extern struct memblock memblock;
 
 extern void __init memblock_init(void);
 extern void __init memblock_analyze(void);
-extern long memblock_add(u64 base, u64 size);
-extern long memblock_remove(u64 base, u64 size);
-extern long __init memblock_free(u64 base, u64 size);
-extern long __init memblock_reserve(u64 base, u64 size);
+extern long memblock_add(phys_addr_t base, phys_addr_t size);
+extern long memblock_remove(phys_addr_t base, phys_addr_t size);
+extern long __init memblock_free(phys_addr_t base, phys_addr_t size);
+extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size);
 
-extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid);
-extern u64 __init memblock_alloc(u64 size, u64 align);
+extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
+extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
 
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
-#define MEMBLOCK_ALLOC_ANYWHERE	(~(u64)0)
+#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
-extern u64 __init memblock_alloc_base(u64 size,
-		u64, u64 max_addr);
-extern u64 __init __memblock_alloc_base(u64 size,
-		u64 align, u64 max_addr);
-extern u64 __init memblock_phys_mem_size(void);
-extern u64 memblock_end_of_DRAM(void);
-extern void __init memblock_enforce_memory_limit(u64 memory_limit);
-extern int memblock_is_memory(u64 addr);
-extern int memblock_is_region_memory(u64 base, u64 size);
-extern int __init memblock_is_reserved(u64 addr);
-extern int memblock_is_region_reserved(u64 base, u64 size);
+extern phys_addr_t __init memblock_alloc_base(phys_addr_t size,
+		phys_addr_t, phys_addr_t max_addr);
+extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size,
+		phys_addr_t align, phys_addr_t max_addr);
+extern phys_addr_t __init memblock_phys_mem_size(void);
+extern phys_addr_t memblock_end_of_DRAM(void);
+extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit);
+extern int memblock_is_memory(phys_addr_t addr);
+extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
+extern int __init memblock_is_reserved(phys_addr_t addr);
+extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
 extern void memblock_dump_all(void);
 
 /* Provided by the architecture */
-extern u64 memblock_nid_range(u64 start, u64 end, int *nid);
+extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
@@ -77,7 +77,7 @@ extern u64 memblock_nid_range(u64 start, u64 end, int *nid);
  *                         accessible during boot
  * @limit: New limit value (physical address)
  */
-extern void memblock_set_current_limit(u64 limit);
+extern void memblock_set_current_limit(phys_addr_t limit);
 
 
 /*
diff --git a/mm/memblock.c b/mm/memblock.c
index 73d903ebf3d4..81da63592a68 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -55,13 +55,14 @@ void memblock_dump_all(void)
 	memblock_dump(&memblock.reserved, "reserved");
 }
 
-static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2,
-					u64 size2)
+static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
+				       phys_addr_t base2, phys_addr_t size2)
 {
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
-static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
+static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
+			       phys_addr_t base2, phys_addr_t size2)
 {
 	if (base2 == base1 + size1)
 		return 1;
@@ -72,12 +73,12 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
 }
 
 static long memblock_regions_adjacent(struct memblock_type *type,
-		unsigned long r1, unsigned long r2)
+				 unsigned long r1, unsigned long r2)
 {
-	u64 base1 = type->regions[r1].base;
-	u64 size1 = type->regions[r1].size;
-	u64 base2 = type->regions[r2].base;
-	u64 size2 = type->regions[r2].size;
+	phys_addr_t base1 = type->regions[r1].base;
+	phys_addr_t size1 = type->regions[r1].size;
+	phys_addr_t base2 = type->regions[r2].base;
+	phys_addr_t size2 = type->regions[r2].size;
 
 	return memblock_addrs_adjacent(base1, size1, base2, size2);
 }
@@ -128,7 +129,7 @@ void __init memblock_analyze(void)
 		memblock.memory.size += memblock.memory.regions[i].size;
 }
 
-static long memblock_add_region(struct memblock_type *type, u64 base, u64 size)
+static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long coalesced = 0;
 	long adjacent, i;
@@ -141,8 +142,8 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size)
 
 	/* First try and coalesce this MEMBLOCK with another. */
 	for (i = 0; i < type->cnt; i++) {
-		u64 rgnbase = type->regions[i].base;
-		u64 rgnsize = type->regions[i].size;
+		phys_addr_t rgnbase = type->regions[i].base;
+		phys_addr_t rgnsize = type->regions[i].size;
 
 		if ((rgnbase == base) && (rgnsize == size))
 			/* Already have this region, so we're done */
@@ -192,16 +193,16 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size)
 	return 0;
 }
 
-long memblock_add(u64 base, u64 size)
+long memblock_add(phys_addr_t base, phys_addr_t size)
 {
 	return memblock_add_region(&memblock.memory, base, size);
 
 }
 
-static long __memblock_remove(struct memblock_type *type, u64 base, u64 size)
+static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
-	u64 rgnbegin, rgnend;
-	u64 end = base + size;
+	phys_addr_t rgnbegin, rgnend;
+	phys_addr_t end = base + size;
 	int i;
 
 	rgnbegin = rgnend = 0; /* supress gcc warnings */
@@ -246,17 +247,17 @@ static long __memblock_remove(struct memblock_type *type, u64 base, u64 size)
 	return memblock_add_region(type, end, rgnend - end);
 }
 
-long memblock_remove(u64 base, u64 size)
+long memblock_remove(phys_addr_t base, phys_addr_t size)
 {
 	return __memblock_remove(&memblock.memory, base, size);
 }
 
-long __init memblock_free(u64 base, u64 size)
+long __init memblock_free(phys_addr_t base, phys_addr_t size)
 {
 	return __memblock_remove(&memblock.reserved, base, size);
 }
 
-long __init memblock_reserve(u64 base, u64 size)
+long __init memblock_reserve(phys_addr_t base, phys_addr_t size)
 {
 	struct memblock_type *_rgn = &memblock.reserved;
 
@@ -265,13 +266,13 @@ long __init memblock_reserve(u64 base, u64 size)
 	return memblock_add_region(_rgn, base, size);
 }
 
-long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size)
+long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long i;
 
 	for (i = 0; i < type->cnt; i++) {
-		u64 rgnbase = type->regions[i].base;
-		u64 rgnsize = type->regions[i].size;
+		phys_addr_t rgnbase = type->regions[i].base;
+		phys_addr_t rgnsize = type->regions[i].size;
 		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
 			break;
 	}
@@ -279,20 +280,20 @@ long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size)
 	return (i < type->cnt) ? i : -1;
 }
 
-static u64 memblock_align_down(u64 addr, u64 size)
+static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size)
 {
 	return addr & ~(size - 1);
 }
 
-static u64 memblock_align_up(u64 addr, u64 size)
+static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size)
 {
 	return (addr + (size - 1)) & ~(size - 1);
 }
 
-static u64 __init memblock_alloc_region(u64 start, u64 end,
-				   u64 size, u64 align)
+static phys_addr_t __init memblock_alloc_region(phys_addr_t start, phys_addr_t end,
+					   phys_addr_t size, phys_addr_t align)
 {
-	u64 base, res_base;
+	phys_addr_t base, res_base;
 	long j;
 
 	base = memblock_align_down((end - size), align);
@@ -301,7 +302,7 @@ static u64 __init memblock_alloc_region(u64 start, u64 end,
 		if (j < 0) {
 			/* this area isn't reserved, take it */
 			if (memblock_add_region(&memblock.reserved, base, size) < 0)
-				base = ~(u64)0;
+				base = ~(phys_addr_t)0;
 			return base;
 		}
 		res_base = memblock.reserved.regions[j].base;
@@ -310,42 +311,43 @@ static u64 __init memblock_alloc_region(u64 start, u64 end,
 		base = memblock_align_down(res_base - size, align);
 	}
 
-	return ~(u64)0;
+	return ~(phys_addr_t)0;
 }
 
-u64 __weak __init memblock_nid_range(u64 start, u64 end, int *nid)
+phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
 {
 	*nid = 0;
 
 	return end;
 }
 
-static u64 __init memblock_alloc_nid_region(struct memblock_region *mp,
-				       u64 size, u64 align, int nid)
+static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
+					       phys_addr_t size,
+					       phys_addr_t align, int nid)
 {
-	u64 start, end;
+	phys_addr_t start, end;
 
 	start = mp->base;
 	end = start + mp->size;
 
 	start = memblock_align_up(start, align);
 	while (start < end) {
-		u64 this_end;
+		phys_addr_t this_end;
 		int this_nid;
 
 		this_end = memblock_nid_range(start, end, &this_nid);
 		if (this_nid == nid) {
-			u64 ret = memblock_alloc_region(start, this_end, size, align);
-			if (ret != ~(u64)0)
+			phys_addr_t ret = memblock_alloc_region(start, this_end, size, align);
+			if (ret != ~(phys_addr_t)0)
 				return ret;
 		}
 		start = this_end;
 	}
 
-	return ~(u64)0;
+	return ~(phys_addr_t)0;
 }
 
-u64 __init memblock_alloc_nid(u64 size, u64 align, int nid)
+phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	struct memblock_type *mem = &memblock.memory;
 	int i;
@@ -359,23 +361,23 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid)
 	size = memblock_align_up(size, align);
 
 	for (i = 0; i < mem->cnt; i++) {
-		u64 ret = memblock_alloc_nid_region(&mem->regions[i],
+		phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
 					       size, align, nid);
-		if (ret != ~(u64)0)
+		if (ret != ~(phys_addr_t)0)
 			return ret;
 	}
 
 	return memblock_alloc(size, align);
 }
 
-u64 __init memblock_alloc(u64 size, u64 align)
+phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
-	u64 alloc;
+	phys_addr_t alloc;
 
 	alloc = __memblock_alloc_base(size, align, max_addr);
 
@@ -386,11 +388,11 @@ u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
 	return alloc;
 }
 
-u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
 	long i;
-	u64 base = 0;
-	u64 res_base;
+	phys_addr_t base = 0;
+	phys_addr_t res_base;
 
 	BUG_ON(0 == size);
 
@@ -405,26 +407,26 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
 	 * top of memory
 	 */
 	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
-		u64 memblockbase = memblock.memory.regions[i].base;
-		u64 memblocksize = memblock.memory.regions[i].size;
+		phys_addr_t memblockbase = memblock.memory.regions[i].base;
+		phys_addr_t memblocksize = memblock.memory.regions[i].size;
 
 		if (memblocksize < size)
 			continue;
 		base = min(memblockbase + memblocksize, max_addr);
 		res_base = memblock_alloc_region(memblockbase, base, size, align);
-		if (res_base != ~(u64)0)
+		if (res_base != ~(phys_addr_t)0)
 			return res_base;
 	}
 	return 0;
 }
 
 /* You must call memblock_analyze() before this. */
-u64 __init memblock_phys_mem_size(void)
+phys_addr_t __init memblock_phys_mem_size(void)
 {
 	return memblock.memory.size;
 }
 
-u64 memblock_end_of_DRAM(void)
+phys_addr_t memblock_end_of_DRAM(void)
 {
 	int idx = memblock.memory.cnt - 1;
 
@@ -432,10 +434,10 @@ u64 memblock_end_of_DRAM(void)
 }
 
 /* You must call memblock_analyze() after this. */
-void __init memblock_enforce_memory_limit(u64 memory_limit)
+void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
 {
 	unsigned long i;
-	u64 limit;
+	phys_addr_t limit;
 	struct memblock_region *p;
 
 	if (!memory_limit)
@@ -472,7 +474,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit)
 	}
 }
 
-static int memblock_search(struct memblock_type *type, u64 addr)
+static int memblock_search(struct memblock_type *type, phys_addr_t addr)
 {
 	unsigned int left = 0, right = type->cnt;
 
@@ -490,17 +492,17 @@ static int memblock_search(struct memblock_type *type, u64 addr)
 	return -1;
 }
 
-int __init memblock_is_reserved(u64 addr)
+int __init memblock_is_reserved(phys_addr_t addr)
 {
 	return memblock_search(&memblock.reserved, addr) != -1;
 }
 
-int memblock_is_memory(u64 addr)
+int memblock_is_memory(phys_addr_t addr)
 {
 	return memblock_search(&memblock.memory, addr) != -1;
 }
 
-int memblock_is_region_memory(u64 base, u64 size)
+int memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
 {
 	int idx = memblock_search(&memblock.reserved, base);
 
@@ -511,13 +513,13 @@ int memblock_is_region_memory(u64 base, u64 size)
 		 memblock.reserved.regions[idx].size) >= (base + size);
 }
 
-int memblock_is_region_reserved(u64 base, u64 size)
+int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
 {
 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
 }
 
 
-void __init memblock_set_current_limit(u64 limit)
+void __init memblock_set_current_limit(phys_addr_t limit)
 {
 	memblock.current_limit = limit;
 }
-- 
cgit v1.2.3


From 9d3c30f5a17ec35894eadb7171f724643dce19c3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:04 -0700
Subject: memblock: Remove unused memblock.debug struct member

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b65045a4ed08..0fe6dd56a4b4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -32,7 +32,6 @@ struct memblock_type {
 };
 
 struct memblock {
-	unsigned long debug;
 	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
@@ -55,9 +54,11 @@ extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
 extern phys_addr_t __init memblock_alloc_base(phys_addr_t size,
-		phys_addr_t, phys_addr_t max_addr);
+					 phys_addr_t align,
+					 phys_addr_t max_addr);
 extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size,
-		phys_addr_t align, phys_addr_t max_addr);
+					   phys_addr_t align,
+					   phys_addr_t max_addr);
 extern phys_addr_t __init memblock_phys_mem_size(void);
 extern phys_addr_t memblock_end_of_DRAM(void);
 extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit);
-- 
cgit v1.2.3


From 4734b594c6ca1be796d30c82d93fdf5160f45124 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 28 Jul 2010 14:31:29 +1000
Subject: memblock: Remove memblock_type.size and add memblock.memory_size
 instead

Right now, both the "memory" and "reserved" memblock_type structures have
a "size" member. It represents the calculated memory size in the former
case and is unused in the latter.

This moves it out to the main memblock structure instead

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/mem.c    | 2 +-
 include/linux/memblock.h | 2 +-
 mm/memblock.c            | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52df5428ece4..f661f6c527da 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -301,7 +301,7 @@ void __init mem_init(void)
 		swiotlb_init(1);
 #endif
 
-	num_physpages = memblock.memory.size >> PAGE_SHIFT;
+	num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 0fe6dd56a4b4..c9c7b0f344a5 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -27,12 +27,12 @@ struct memblock_region {
 
 struct memblock_type {
 	unsigned long cnt;
-	phys_addr_t size;
 	struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1];
 };
 
 struct memblock {
 	phys_addr_t current_limit;
+	phys_addr_t memory_size;	/* Updated by memblock_analyze() */
 	struct memblock_type memory;
 	struct memblock_type reserved;
 };
diff --git a/mm/memblock.c b/mm/memblock.c
index 81da63592a68..5ae413e9afd8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -49,7 +49,7 @@ void memblock_dump_all(void)
 		return;
 
 	pr_info("MEMBLOCK configuration:\n");
-	pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
+	pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size);
 
 	memblock_dump(&memblock.memory, "memory");
 	memblock_dump(&memblock.reserved, "reserved");
@@ -123,10 +123,10 @@ void __init memblock_analyze(void)
 {
 	int i;
 
-	memblock.memory.size = 0;
+	memblock.memory_size = 0;
 
 	for (i = 0; i < memblock.memory.cnt; i++)
-		memblock.memory.size += memblock.memory.regions[i].size;
+		memblock.memory_size += memblock.memory.regions[i].size;
 }
 
 static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
@@ -423,7 +423,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph
 /* You must call memblock_analyze() before this. */
 phys_addr_t __init memblock_phys_mem_size(void)
 {
-	return memblock.memory.size;
+	return memblock.memory_size;
 }
 
 phys_addr_t memblock_end_of_DRAM(void)
-- 
cgit v1.2.3


From bf23c51f1f49d3960f3cd8e3d2e7f943d9c41042 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:06 -0700
Subject: memblock: Move memblock arrays to static storage in memblock.c and
 make their size a variable

This is in preparation for having resizable arrays.

Note that we still allocate one more than needed, this is unchanged from
the previous implementation.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  7 ++++---
 mm/memblock.c            | 10 +++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c9c7b0f344a5..150be938b910 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -18,7 +18,7 @@
 
 #include <asm/memblock.h>
 
-#define MAX_MEMBLOCK_REGIONS 128
+#define INIT_MEMBLOCK_REGIONS 128
 
 struct memblock_region {
 	phys_addr_t base;
@@ -26,8 +26,9 @@ struct memblock_region {
 };
 
 struct memblock_type {
-	unsigned long cnt;
-	struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1];
+	unsigned long cnt;	/* number of regions */
+	unsigned long max;	/* size of the allocated array */
+	struct memblock_region *regions;
 };
 
 struct memblock {
diff --git a/mm/memblock.c b/mm/memblock.c
index 5ae413e9afd8..3c474502d92b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -18,6 +18,8 @@
 struct memblock memblock;
 
 static int memblock_debug;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
 
 static int __init early_memblock(char *p)
 {
@@ -104,6 +106,12 @@ static void memblock_coalesce_regions(struct memblock_type *type,
 
 void __init memblock_init(void)
 {
+	/* Hookup the initial arrays */
+	memblock.memory.regions	= memblock_memory_init_regions;
+	memblock.memory.max		= INIT_MEMBLOCK_REGIONS;
+	memblock.reserved.regions	= memblock_reserved_init_regions;
+	memblock.reserved.max	= INIT_MEMBLOCK_REGIONS;
+
 	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
 	 * This simplifies the memblock_add() code below...
 	 */
@@ -169,7 +177,7 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph
 
 	if (coalesced)
 		return coalesced;
-	if (type->cnt >= MAX_MEMBLOCK_REGIONS)
+	if (type->cnt >= type->max)
 		return -1;
 
 	/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
-- 
cgit v1.2.3


From d2cd563ba82c424083b78e0ce97d68bfb04d1242 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:14 -0700
Subject: memblock: Add arch function to control coalescing of memblock memory
 regions

Some archs such as ARM want to avoid coalescing accross things such
as the lowmem/highmem boundary or similar. This provides the option
to control it via an arch callback for which a weak default is provided
which always allows coalescing.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  2 ++
 mm/memblock.c            | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 150be938b910..e5e8f9db3a84 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -72,6 +72,8 @@ extern void memblock_dump_all(void);
 
 /* Provided by the architecture */
 extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid);
+extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
+				   phys_addr_t addr2, phys_addr_t size2);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
diff --git a/mm/memblock.c b/mm/memblock.c
index 0787790b1ce0..8715f09434df 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -241,6 +241,12 @@ static int memblock_double_array(struct memblock_type *type)
 	return 0;
 }
 
+extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
+					  phys_addr_t addr2, phys_addr_t size2)
+{
+	return 1;
+}
+
 static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long coalesced = 0;
@@ -262,6 +268,10 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph
 			return 0;
 
 		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
+		/* Check if arch allows coalescing */
+		if (adjacent != 0 && type == &memblock.memory &&
+		    !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize))
+			break;
 		if (adjacent > 0) {
 			type->regions[i].base -= size;
 			type->regions[i].size += size;
@@ -274,7 +284,14 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph
 		}
 	}
 
-	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) {
+	/* If we plugged a hole, we may want to also coalesce with the
+	 * next region
+	 */
+	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) &&
+	    ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base,
+							     type->regions[i].size,
+							     type->regions[i+1].base,
+							     type->regions[i+1].size)))) {
 		memblock_coalesce_regions(type, i, i+1);
 		coalesced++;
 	}
-- 
cgit v1.2.3


From c196f76fd5ece716ee3b7fa5dda3576961c0cecc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:16 -0700
Subject: memblock: NUMA allocate can now use early_pfn_map

We now provide a default (weak) implementation of memblock_nid_range()
which uses the early_pfn_map[] if CONFIG_ARCH_POPULATES_NODE_MAP
is set. Sparc still needs to use its own method due to the way
the pages can be scattered between nodes.

This implementation is inefficient due to our main algorithm and
callback construct wanting to work on an ascending addresses bases
while early_pfn_map[] would rather work with nid's (it's unsorted
at that stage). But it should work and we can look into improving
it subsequently, possibly using arch compile options to chose a
different algorithm alltogether.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  3 +++
 mm/memblock.c            | 28 +++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e5e8f9db3a84..82b030244aa7 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -47,6 +47,9 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size);
 extern long __init memblock_free(phys_addr_t base, phys_addr_t size);
 extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size);
 
+/* The numa aware allocator is only available if
+ * CONFIG_ARCH_POPULATES_NODE_MAP is set
+ */
 extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 468ff43a72b4..af7e4d9cf400 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/poison.h>
+#include <linux/pfn.h>
 #include <linux/memblock.h>
 
 struct memblock memblock;
@@ -451,11 +452,36 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
 /*
  * Additional node-local allocators. Search for node memory is bottom up
  * and walks memblock regions within that node bottom-up as well, but allocation
- * within an memblock region is top-down.
+ * within an memblock region is top-down. XXX I plan to fix that at some stage
+ *
+ * WARNING: Only available after early_node_map[] has been populated,
+ * on some architectures, that is after all the calls to add_active_range()
+ * have been done to populate it.
  */
 
 phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
 {
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+	/*
+	 * This code originates from sparc which really wants use to walk by addresses
+	 * and returns the nid. This is not very convenient for early_pfn_map[] users
+	 * as the map isn't sorted yet, and it really wants to be walked by nid.
+	 *
+	 * For now, I implement the inefficient method below which walks the early
+	 * map multiple times. Eventually we may want to use an ARCH config option
+	 * to implement a completely different method for both case.
+	 */
+	unsigned long start_pfn, end_pfn;
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		get_pfn_range_for_nid(i, &start_pfn, &end_pfn);
+		if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn))
+			continue;
+		*nid = i;
+		return min(end, PFN_PHYS(end_pfn));
+	}
+#endif
 	*nid = 0;
 
 	return end;
-- 
cgit v1.2.3


From 9d1e24928e6a0728d1c7c76818ccbd11b93e7ac9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:17 -0700
Subject: memblock: Separate memblock_alloc_nid() and memblock_alloc_try_nid()

The former is now strict, it will fail if it cannot honor the allocation
within the node, while the later implements the previous semantic which
falls back to allocating anywhere.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/sparc/mm/init_64.c  |  4 ++--
 include/linux/memblock.h |  6 +++++-
 mm/memblock.c            | 14 ++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0883113624b9..dc584d26d597 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -820,7 +820,7 @@ static void __init allocate_node_data(int nid)
 	struct pglist_data *p;
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-	paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
+	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
 	if (!paddr) {
 		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
 		prom_halt();
@@ -840,7 +840,7 @@ static void __init allocate_node_data(int nid)
 	if (p->node_spanned_pages) {
 		num_pages = bootmem_bootmap_pages(p->node_spanned_pages);
 
-		paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid);
+		paddr = memblock_alloc_try_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid);
 		if (!paddr) {
 			prom_printf("Cannot allocate bootmap for nid[%d]\n",
 				  nid);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 82b030244aa7..c8da03eb7ba3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -50,7 +50,11 @@ extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size);
 /* The numa aware allocator is only available if
  * CONFIG_ARCH_POPULATES_NODE_MAP is set
  */
-extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
+extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
+					int nid);
+extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+					    int nid);
+
 extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
 
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
diff --git a/mm/memblock.c b/mm/memblock.c
index af7e4d9cf400..1802d97c7284 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -537,9 +537,23 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n
 			return ret;
 	}
 
+	return 0;
+}
+
+phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
+{
+	phys_addr_t res = memblock_alloc_nid(size, align, nid);
+
+	if (res)
+		return res;
 	return memblock_alloc(size, align);
 }
 
+
+/*
+ * Remaining API functions
+ */
+
 /* You must call memblock_analyze() before this. */
 phys_addr_t __init memblock_phys_mem_size(void)
 {
-- 
cgit v1.2.3


From 5e63cf43af844ed30acc278b38b8c9bc51eba493 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 28 Jul 2010 15:07:21 +1000
Subject: memblock: Expose some memblock bits for use by x86

This exposes memblock_debug and associated memblock_dbg() macro,
along with memblock_can_resize so that x86 can use these when
ported to use memblock

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 5 +++++
 mm/memblock.c            | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c8da03eb7ba3..eed0f9b8e526 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -39,6 +39,11 @@ struct memblock {
 };
 
 extern struct memblock memblock;
+extern int memblock_debug;
+extern int memblock_can_resize;
+
+#define memblock_dbg(fmt, ...) \
+	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
 extern void __init memblock_init(void);
 extern void __init memblock_analyze(void);
diff --git a/mm/memblock.c b/mm/memblock.c
index cc15be29fd0a..5499ab162b9d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -22,7 +22,8 @@
 
 struct memblock memblock;
 
-static int memblock_debug, memblock_can_resize;
+int memblock_debug;
+int memblock_can_resize;
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
 
-- 
cgit v1.2.3


From 37d8d4bf489e39eedc9537f8616fe87879b13cb0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 28 Jul 2010 15:20:58 +1000
Subject: memblock: Export MEMBLOCK_ERROR

will used by x86 memblock_x86_find_in_range_node and nobootmem replacement

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 3 ++-
 mm/memblock.c            | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index eed0f9b8e526..1a9c29cc92fa 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -18,7 +18,8 @@
 
 #include <asm/memblock.h>
 
-#define INIT_MEMBLOCK_REGIONS 128
+#define INIT_MEMBLOCK_REGIONS	128
+#define MEMBLOCK_ERROR		(~(phys_addr_t)0)
 
 struct memblock_region {
 	phys_addr_t base;
diff --git a/mm/memblock.c b/mm/memblock.c
index c3703abf057e..85cfa1d3ab28 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -27,8 +27,6 @@ int memblock_can_resize;
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
 
-#define MEMBLOCK_ERROR	(~(phys_addr_t)0)
-
 /* inline so we don't get a warning when pr_debug is compiled out */
 static inline const char *memblock_type_name(struct memblock_type *type)
 {
-- 
cgit v1.2.3


From 25818f0f288cd5333ba5a90ad6dde3def4c4ff58 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 28 Jul 2010 15:25:10 +1000
Subject: memblock: Make MEMBLOCK_ERROR be 0

And ensure we don't hand out 0 as a valid allocation. We put the
low limit at PAGE_SIZE arbitrarily.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 2 +-
 mm/memblock.c            | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1a9c29cc92fa..dfa64494ced1 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -19,7 +19,7 @@
 #include <asm/memblock.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
-#define MEMBLOCK_ERROR		(~(phys_addr_t)0)
+#define MEMBLOCK_ERROR		0
 
 struct memblock_region {
 	phys_addr_t base;
diff --git a/mm/memblock.c b/mm/memblock.c
index 85cfa1d3ab28..cb520df2a414 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -105,6 +105,12 @@ static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t en
 	phys_addr_t base, res_base;
 	long j;
 
+	/* Prevent allocations returning 0 as it's also used to
+	 * indicate an allocation failure
+	 */
+	if (start == 0)
+		start = PAGE_SIZE;
+
 	base = memblock_align_down((end - size), align);
 	while (start <= base) {
 		j = memblock_overlaps_region(&memblock.reserved, base, size);
-- 
cgit v1.2.3


From f0b37fad9a63217c39997b2d2b31f44e3d8be727 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 28 Jul 2010 15:28:21 +1000
Subject: memblock: Protect memblock.h with CONFIG_HAVE_MEMBLOCK

This should make it easier to catch/debug incorrect use when
the CONFIG_ option isn't set.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index dfa64494ced1..c24b27849096 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,6 +2,7 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
+#ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
  *
@@ -148,6 +149,8 @@ static inline unsigned long memblock_region_pages(const struct memblock_region *
 	     region++)
 
 
+#endif /* CONFIG_HAVE_MEMBLOCK */
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_MEMBLOCK_H */
-- 
cgit v1.2.3


From 10d0643988e976360eb3497dcafb55b393b8e480 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 28 Jul 2010 15:43:02 +1000
Subject: memblock: Option for the architecture to put memblock into the .init
 section

Arch code can define ARCH_DISCARD_MEMBLOCK in asm/memblock.h,
which in turns causes memblock code and data to go respectively
into the .init and .initdata sections. This will be used by the
x86 architecture.

If ARCH_DISCARD_MEMBLOCK is defined, the debugfs files to inspect
the memblock arrays after boot are not created.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h |  8 ++++++++
 mm/memblock.c            | 48 ++++++++++++++++++++++++------------------------
 2 files changed, 32 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c24b27849096..3978e6a8e824 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -149,6 +149,14 @@ static inline unsigned long memblock_region_pages(const struct memblock_region *
 	     region++)
 
 
+#ifdef ARCH_DISCARD_MEMBLOCK
+#define __init_memblock __init
+#define __initdata_memblock __initdata
+#else
+#define __init_memblock
+#define __initdata_memblock
+#endif
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
diff --git a/mm/memblock.c b/mm/memblock.c
index cb520df2a414..a17faea37d47 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,12 @@
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
 
-struct memblock memblock;
+struct memblock memblock __initdata_memblock;
 
-int memblock_debug;
-int memblock_can_resize;
-static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
+int memblock_debug __initdata_memblock;
+int memblock_can_resize __initdata_memblock;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
 
 /* inline so we don't get a warning when pr_debug is compiled out */
 static inline const char *memblock_type_name(struct memblock_type *type)
@@ -42,23 +42,23 @@ static inline const char *memblock_type_name(struct memblock_type *type)
  * Address comparison utilities
  */
 
-static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size)
+static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size)
 {
 	return addr & ~(size - 1);
 }
 
-static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size)
+static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size)
 {
 	return (addr + (size - 1)) & ~(size - 1);
 }
 
-static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
+static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
 				       phys_addr_t base2, phys_addr_t size2)
 {
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
-static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
+static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
 			       phys_addr_t base2, phys_addr_t size2)
 {
 	if (base2 == base1 + size1)
@@ -69,7 +69,7 @@ static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
 	return 0;
 }
 
-static long memblock_regions_adjacent(struct memblock_type *type,
+static long __init_memblock memblock_regions_adjacent(struct memblock_type *type,
 				 unsigned long r1, unsigned long r2)
 {
 	phys_addr_t base1 = type->regions[r1].base;
@@ -80,7 +80,7 @@ static long memblock_regions_adjacent(struct memblock_type *type,
 	return memblock_addrs_adjacent(base1, size1, base2, size2);
 }
 
-long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long i;
 
@@ -162,7 +162,7 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align
 	return MEMBLOCK_ERROR;
 }
 
-static void memblock_remove_region(struct memblock_type *type, unsigned long r)
+static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
 	unsigned long i;
 
@@ -174,7 +174,7 @@ static void memblock_remove_region(struct memblock_type *type, unsigned long r)
 }
 
 /* Assumption: base addr of region 1 < base addr of region 2 */
-static void memblock_coalesce_regions(struct memblock_type *type,
+static void __init_memblock memblock_coalesce_regions(struct memblock_type *type,
 		unsigned long r1, unsigned long r2)
 {
 	type->regions[r1].size += type->regions[r2].size;
@@ -184,7 +184,7 @@ static void memblock_coalesce_regions(struct memblock_type *type,
 /* Defined below but needed now */
 static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
 
-static int memblock_double_array(struct memblock_type *type)
+static int __init_memblock memblock_double_array(struct memblock_type *type)
 {
 	struct memblock_region *new_array, *old_array;
 	phys_addr_t old_size, new_size, addr;
@@ -255,13 +255,13 @@ static int memblock_double_array(struct memblock_type *type)
 	return 0;
 }
 
-extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
+extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
 					  phys_addr_t addr2, phys_addr_t size2)
 {
 	return 1;
 }
 
-static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long coalesced = 0;
 	long adjacent, i;
@@ -348,13 +348,13 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph
 	return 0;
 }
 
-long memblock_add(phys_addr_t base, phys_addr_t size)
+long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
 	return memblock_add_region(&memblock.memory, base, size);
 
 }
 
-static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t rgnbegin, rgnend;
 	phys_addr_t end = base + size;
@@ -402,7 +402,7 @@ static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys
 	return memblock_add_region(type, end, rgnend - end);
 }
 
-long memblock_remove(phys_addr_t base, phys_addr_t size)
+long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 {
 	return __memblock_remove(&memblock.memory, base, size);
 }
@@ -568,7 +568,7 @@ phys_addr_t __init memblock_phys_mem_size(void)
 	return memblock.memory_size;
 }
 
-phys_addr_t memblock_end_of_DRAM(void)
+phys_addr_t __init_memblock memblock_end_of_DRAM(void)
 {
 	int idx = memblock.memory.cnt - 1;
 
@@ -655,7 +655,7 @@ int memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
 		 memblock.reserved.regions[idx].size) >= (base + size);
 }
 
-int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
 {
 	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
 }
@@ -666,7 +666,7 @@ void __init memblock_set_current_limit(phys_addr_t limit)
 	memblock.current_limit = limit;
 }
 
-static void memblock_dump(struct memblock_type *region, char *name)
+static void __init_memblock memblock_dump(struct memblock_type *region, char *name)
 {
 	unsigned long long base, size;
 	int i;
@@ -682,7 +682,7 @@ static void memblock_dump(struct memblock_type *region, char *name)
 	}
 }
 
-void memblock_dump_all(void)
+void __init_memblock memblock_dump_all(void)
 {
 	if (!memblock_debug)
 		return;
@@ -748,7 +748,7 @@ static int __init early_memblock(char *p)
 }
 early_param("memblock", early_memblock);
 
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK)
 
 static int memblock_debug_show(struct seq_file *m, void *private)
 {
-- 
cgit v1.2.3


From 5303b68f57c227c27193a14e57dd12be27cd670f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 28 Jul 2010 15:38:40 +1000
Subject: memblock: Add memblock_find_in_range()

This is a wrapper for memblock_find_base() using slightly different
arguments (start,end instead of start,size for example) in order to
make it easier to convert existing arch/x86 code.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/memblock.h | 2 ++
 mm/memblock.c            | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3978e6a8e824..4df09bdcae42 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -47,6 +47,8 @@ extern int memblock_can_resize;
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
+u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
+
 extern void __init memblock_init(void);
 extern void __init memblock_analyze(void);
 extern long memblock_add(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index a17faea37d47..b7ab10a2ef46 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -162,6 +162,14 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align
 	return MEMBLOCK_ERROR;
 }
 
+/*
+ * Find a free area with specified alignment in a specific range.
+ */
+u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align)
+{
+	return memblock_find_base(size, align, start, end);
+}
+
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
 	unsigned long i;
-- 
cgit v1.2.3


From 18cb2aef91b37dbce2bec2f39bb1dddd0e9dd838 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Sat, 7 Aug 2010 03:26:23 +0900
Subject: percpu: handle __percpu notations in UP accessors

UP accessors didn't take care of __percpu notations leading to a lot
of spurious sparse warnings on UP configurations.  Fix it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h | 15 ++++++++++-----
 include/linux/percpu.h       |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b5043a9890d8..08923b684768 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &(var)))
-#define __get_cpu_var(var)			(var)
-#define __raw_get_cpu_var(var)			(var)
-#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
-#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
+#define VERIFY_PERCPU_PTR(__p) ({			\
+	__verify_pcpu_ptr((__p));			\
+	(typeof(*(__p)) __kernel __force *)(__p);	\
+})
+
+#define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
+#define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
+#define __this_cpu_ptr(ptr)	this_cpu_ptr(ptr)
 
 #endif	/* SMP */
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b8b9084527b1..49466b13c5c6 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,7 +149,7 @@ extern void __init percpu_init_late(void);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
 
 /* can't distinguish from other static vars, always false */
 static inline bool is_kernel_percpu_address(unsigned long addr)
-- 
cgit v1.2.3


From 1b5ad24878b7e5a543b98c5d2f8c0d8c0dd3088f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Sat, 7 Aug 2010 14:29:22 +0200
Subject: slub: add missing __percpu markup in mm/slub_def.h

kmem_cache->cpu_slab is a percpu pointer but was missing __percpu
markup.  Add it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 6447a723ecb1..5ec4bc0e45aa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -68,7 +68,7 @@ struct kmem_cache_order_objects {
  * Slab cache management.
  */
 struct kmem_cache {
-	struct kmem_cache_cpu *cpu_slab;
+	struct kmem_cache_cpu __percpu *cpu_slab;
 	/* Used for retriving partial slabs etc */
 	unsigned long flags;
 	int size;		/* The size of an object including meta data */
-- 
cgit v1.2.3


From 73e4008ddddc84d5f2499c17012b340a0dae153e Mon Sep 17 00:00:00 2001
From: Nikolai Kondrashov <spbnick@gmail.com>
Date: Fri, 6 Aug 2010 23:03:06 +0400
Subject: HID: allow resizing and replacing report descriptors

Update hid_driver's report_fixup prototype to allow changing report
descriptor size and/or returning completely different report descriptor.
Update existing usage accordingly.

This is to give more freedom in descriptor fixup and to allow having a whole
fixed descriptor in the code for the sake of readability.

Signed-off-by: Nikolai Kondrashov <spbnick@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-apple.c     |  7 ++++---
 drivers/hid/hid-cherry.c    |  7 ++++---
 drivers/hid/hid-core.c      |  2 +-
 drivers/hid/hid-cypress.c   |  9 +++++----
 drivers/hid/hid-elecom.c    |  7 ++++---
 drivers/hid/hid-kye.c       |  7 ++++---
 drivers/hid/hid-lg.c        |  9 +++++----
 drivers/hid/hid-microsoft.c |  7 ++++---
 drivers/hid/hid-monterey.c  |  7 ++++---
 drivers/hid/hid-ortek.c     |  7 ++++---
 drivers/hid/hid-petalynx.c  |  7 ++++---
 drivers/hid/hid-prodikeys.c |  7 ++++---
 drivers/hid/hid-samsung.c   | 20 +++++++++++---------
 drivers/hid/hid-sony.c      |  7 ++++---
 drivers/hid/hid-sunplus.c   |  7 ++++---
 drivers/hid/hid-zydacron.c  |  7 ++++---
 include/linux/hid.h         |  4 ++--
 17 files changed, 72 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index bba05d0a8980..eaeca564a8d3 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -246,17 +246,18 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field,
 /*
  * MacBook JIS keyboard has wrong logical maximum
  */
-static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
 	struct apple_sc *asc = hid_get_drvdata(hdev);
 
-	if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 &&
+	if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 &&
 			rdesc[53] == 0x65 && rdesc[59] == 0x65) {
 		dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report "
 				"descriptor\n");
 		rdesc[53] = rdesc[59] = 0xe7;
 	}
+	return rdesc;
 }
 
 static void apple_setup_input(struct input_dev *input)
diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c
index 24663a8717b1..e880086c2311 100644
--- a/drivers/hid/hid-cherry.c
+++ b/drivers/hid/hid-cherry.c
@@ -26,15 +26,16 @@
  * Cherry Cymotion keyboard have an invalid HID report descriptor,
  * that needs fixing before we can parse it.
  */
-static void ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
+	if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) {
 		dev_info(&hdev->dev, "fixing up Cherry Cymotion report "
 				"descriptor\n");
 		rdesc[11] = rdesc[16] = 0xff;
 		rdesc[12] = rdesc[17] = 0x03;
 	}
+	return rdesc;
 }
 
 #define ch_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e635199a0cd2..ec20e83cbd61 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -651,7 +651,7 @@ int hid_parse_report(struct hid_device *device, __u8 *start,
 	};
 
 	if (device->driver->report_fixup)
-		device->driver->report_fixup(device, start, size);
+		start = device->driver->report_fixup(device, start, &size);
 
 	device->rdesc = kmemdup(start, size, GFP_KERNEL);
 	if (device->rdesc == NULL)
diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
index 998b6f443d7d..4cd0e2345991 100644
--- a/drivers/hid/hid-cypress.c
+++ b/drivers/hid/hid-cypress.c
@@ -31,16 +31,16 @@
  * Some USB barcode readers from cypress have usage min and usage max in
  * the wrong order
  */
-static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
 	unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
 	unsigned int i;
 
 	if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX))
-		return;
+		return rdesc;
 
-	for (i = 0; i < rsize - 4; i++)
+	for (i = 0; i < *rsize - 4; i++)
 		if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) {
 			__u8 tmp;
 
@@ -50,6 +50,7 @@ static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 			rdesc[i + 3] = rdesc[i + 1];
 			rdesc[i + 1] = tmp;
 		}
+	return rdesc;
 }
 
 static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index 7a40878f46b4..6e31f305397d 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c
@@ -20,14 +20,15 @@
 
 #include "hid-ids.h"
 
-static void elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
+	if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
 		dev_info(&hdev->dev, "Fixing up Elecom BM084 "
 				"report descriptor.\n");
 		rdesc[47] = 0x00;
 	}
+    return rdesc;
 }
 
 static const struct hid_device_id elecom_devices[] = {
diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c
index f8871712b7b5..817247ee006c 100644
--- a/drivers/hid/hid-kye.c
+++ b/drivers/hid/hid-kye.c
@@ -23,10 +23,10 @@
  *   - report size 8 count 1 must be size 1 count 8 for button bitfield
  *   - change the button usage range to 4-7 for the extra buttons
  */
-static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 74 &&
+	if (*rsize >= 74 &&
 		rdesc[61] == 0x05 && rdesc[62] == 0x08 &&
 		rdesc[63] == 0x19 && rdesc[64] == 0x08 &&
 		rdesc[65] == 0x29 && rdesc[66] == 0x0f &&
@@ -40,6 +40,7 @@ static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		rdesc[72] = 0x01;
 		rdesc[74] = 0x08;
 	}
+	return rdesc;
 }
 
 static const struct hid_device_id kye_devices[] = {
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index f6433d8050a9..68c0b68856c7 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -41,25 +41,26 @@
  * above the logical maximum described in descriptor. This extends
  * the original value of 0x28c of logical maximum to 0x104d
  */
-static void lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
 	unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
 
-	if ((quirks & LG_RDESC) && rsize >= 90 && rdesc[83] == 0x26 &&
+	if ((quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 &&
 			rdesc[84] == 0x8c && rdesc[85] == 0x02) {
 		dev_info(&hdev->dev, "fixing up Logitech keyboard report "
 				"descriptor\n");
 		rdesc[84] = rdesc[89] = 0x4d;
 		rdesc[85] = rdesc[90] = 0x10;
 	}
-	if ((quirks & LG_RDESC_REL_ABS) && rsize >= 50 &&
+	if ((quirks & LG_RDESC_REL_ABS) && *rsize >= 50 &&
 			rdesc[32] == 0x81 && rdesc[33] == 0x06 &&
 			rdesc[49] == 0x81 && rdesc[50] == 0x06) {
 		dev_info(&hdev->dev, "fixing up rel/abs in Logitech "
 				"report descriptor\n");
 		rdesc[33] = rdesc[50] = 0x02;
 	}
+	return rdesc;
 }
 
 #define lg_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index 359cc447c6c6..dc618c33d0a2 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -33,18 +33,19 @@
  * Microsoft Wireless Desktop Receiver (Model 1028) has
  * 'Usage Min/Max' where it ought to have 'Physical Min/Max'
  */
-static void ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
 	unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
 
-	if ((quirks & MS_RDESC) && rsize == 571 && rdesc[557] == 0x19 &&
+	if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 &&
 			rdesc[559] == 0x29) {
 		dev_info(&hdev->dev, "fixing up Microsoft Wireless Receiver "
 				"Model 1028 report descriptor\n");
 		rdesc[557] = 0x35;
 		rdesc[559] = 0x45;
 	}
+	return rdesc;
 }
 
 #define ms_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c
index 2cd05aa244b9..c95c31e2d869 100644
--- a/drivers/hid/hid-monterey.c
+++ b/drivers/hid/hid-monterey.c
@@ -22,14 +22,15 @@
 
 #include "hid-ids.h"
 
-static void mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
+	if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) {
 		dev_info(&hdev->dev, "fixing up button/consumer in HID report "
 				"descriptor\n");
 		rdesc[30] = 0x0c;
 	}
+	return rdesc;
 }
 
 #define mr_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-ortek.c b/drivers/hid/hid-ortek.c
index aa9a960f73a4..2e79716dca31 100644
--- a/drivers/hid/hid-ortek.c
+++ b/drivers/hid/hid-ortek.c
@@ -19,14 +19,15 @@
 
 #include "hid-ids.h"
 
-static void ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) {
+	if (*rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) {
 		dev_info(&hdev->dev, "Fixing up Ortek WKB-2000 "
 				"report descriptor.\n");
 		rdesc[55] = 0x92;
 	}
+	return rdesc;
 }
 
 static const struct hid_device_id ortek_devices[] = {
diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c
index 500fbd0652dc..308d6ae48a3e 100644
--- a/drivers/hid/hid-petalynx.c
+++ b/drivers/hid/hid-petalynx.c
@@ -23,10 +23,10 @@
 #include "hid-ids.h"
 
 /* Petalynx Maxter Remote has maximum for consumer page set too low */
-static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
+	if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 &&
 			rdesc[41] == 0x00 && rdesc[59] == 0x26 &&
 			rdesc[60] == 0xf9 && rdesc[61] == 0x00) {
 		dev_info(&hdev->dev, "fixing up Petalynx Maxter Remote report "
@@ -34,6 +34,7 @@ static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		rdesc[60] = 0xfa;
 		rdesc[40] = 0xfa;
 	}
+	return rdesc;
 }
 
 #define pl_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index 845f428b8090..48eab84f53b5 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -740,10 +740,10 @@ int pcmidi_snd_terminate(struct pcmidi_snd *pm)
 /*
  * PC-MIDI report descriptor for report id is wrong.
  */
-static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize == 178 &&
+	if (*rsize == 178 &&
 	      rdesc[111] == 0x06 && rdesc[112] == 0x00 &&
 	      rdesc[113] == 0xff) {
 		dev_info(&hdev->dev, "fixing up pc-midi keyboard report "
@@ -751,6 +751,7 @@ static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 
 		rdesc[144] = 0x18; /* report 4: was 0x10 report count */
 	}
+	return rdesc;
 }
 
 static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c
index bda0fd60c98d..35894444e000 100644
--- a/drivers/hid/hid-samsung.c
+++ b/drivers/hid/hid-samsung.c
@@ -61,10 +61,10 @@ static inline void samsung_irda_dev_trace(struct hid_device *hdev,
 			"descriptor\n", rsize);
 }
 
-static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 &&
+	if (*rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 &&
 			rdesc[177] == 0x75 && rdesc[178] == 0x30 &&
 			rdesc[179] == 0x95 && rdesc[180] == 0x01 &&
 			rdesc[182] == 0x40) {
@@ -74,24 +74,25 @@ static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		rdesc[180] = 0x06;
 		rdesc[182] = 0x42;
 	} else
-	if (rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 &&
+	if (*rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 &&
 			rdesc[194] == 0x25 && rdesc[195] == 0x12) {
 		samsung_irda_dev_trace(hdev, 203);
 		rdesc[193] = 0x1;
 		rdesc[195] = 0xf;
 	} else
-	if (rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 &&
+	if (*rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 &&
 			rdesc[126] == 0x25 && rdesc[127] == 0x11) {
 		samsung_irda_dev_trace(hdev, 135);
 		rdesc[125] = 0x1;
 		rdesc[127] = 0xe;
 	} else
-	if (rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 &&
+	if (*rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 &&
 			rdesc[162] == 0x25 && rdesc[163] == 0x01) {
 		samsung_irda_dev_trace(hdev, 171);
 		rdesc[161] = 0x1;
 		rdesc[163] = 0x3;
 	}
+	return rdesc;
 }
 
 #define samsung_kbd_mouse_map_key_clear(c) \
@@ -130,11 +131,12 @@ static int samsung_kbd_mouse_input_mapping(struct hid_device *hdev,
 	return 1;
 }
 
-static void samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-	unsigned int rsize)
+static __u8 *samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+	unsigned int *rsize)
 {
 	if (USB_DEVICE_ID_SAMSUNG_IR_REMOTE == hdev->product)
-		samsung_irda_report_fixup(hdev, rdesc, rsize);
+		rdesc = samsung_irda_report_fixup(hdev, rdesc, rsize);
+	return rdesc;
 }
 
 static int samsung_input_mapping(struct hid_device *hdev, struct hid_input *hi,
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 402d5574b574..9fa034915185 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -31,17 +31,18 @@ struct sony_sc {
 };
 
 /* Sony Vaio VGX has wrongly mouse pointer declared as constant */
-static void sony_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *sony_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
 	struct sony_sc *sc = hid_get_drvdata(hdev);
 
 	if ((sc->quirks & VAIO_RDESC_CONSTANT) &&
-			rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) {
+			*rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) {
 		dev_info(&hdev->dev, "Fixing up Sony Vaio VGX report "
 				"descriptor\n");
 		rdesc[55] = 0x06;
 	}
+	return rdesc;
 }
 
 /*
diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c
index 438107d9f1b2..164ed568f6cf 100644
--- a/drivers/hid/hid-sunplus.c
+++ b/drivers/hid/hid-sunplus.c
@@ -22,16 +22,17 @@
 
 #include "hid-ids.h"
 
-static void sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-		unsigned int rsize)
+static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+		unsigned int *rsize)
 {
-	if (rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
+	if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 &&
 			rdesc[106] == 0x03) {
 		dev_info(&hdev->dev, "fixing up Sunplus Wireless Desktop "
 				"report descriptor\n");
 		rdesc[105] = rdesc[110] = 0x03;
 		rdesc[106] = rdesc[111] = 0x21;
 	}
+	return rdesc;
 }
 
 #define sp_map_key_clear(c)	hid_map_usage_clear(hi, usage, bit, max, \
diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c
index 9e8d35a203e4..aac1f9273149 100644
--- a/drivers/hid/hid-zydacron.c
+++ b/drivers/hid/hid-zydacron.c
@@ -27,10 +27,10 @@ struct zc_device {
 * Zydacron remote control has an invalid HID report descriptor,
 * that needs fixing before we can parse it.
 */
-static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
-	unsigned int rsize)
+static __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+	unsigned int *rsize)
 {
-	if (rsize >= 253 &&
+	if (*rsize >= 253 &&
 		rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff &&
 		rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff &&
 		rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) {
@@ -40,6 +40,7 @@ static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 			rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c;
 			rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00;
 		}
+	return rdesc;
 }
 
 #define zc_map_key_clear(c) \
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 42a0f1d11365..0a34fb071379 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -626,8 +626,8 @@ struct hid_driver {
 	int (*event)(struct hid_device *hdev, struct hid_field *field,
 			struct hid_usage *usage, __s32 value);
 
-	void (*report_fixup)(struct hid_device *hdev, __u8 *buf,
-			unsigned int size);
+	__u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
+			unsigned int *size);
 
 	int (*input_mapping)(struct hid_device *hdev,
 			struct hid_input *hidinput, struct hid_field *field,
-- 
cgit v1.2.3


From 92298e668372f2f6c8a79fb272f13d65161a4876 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Fri, 13 Aug 2010 10:22:17 +1000
Subject: PCI: provide stub pci_domain_nr function for !CONFIG_PCI configs

Allows the new PCI domain aware DRM code to compile on m68k.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index b1d17956a153..c8d95e369ff4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 						unsigned int devfn)
 { return NULL; }
 
+static inline int pci_domain_nr(struct pci_bus *bus)
+{ return 0; }
+
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
 #define dev_num_vf(d) (0)
-- 
cgit v1.2.3


From c84e032e145775032fa9078b55e6333dd866603b Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Sat, 14 Aug 2010 09:43:22 -0700
Subject: include/video/vga.h: update web address.

The below updates a broken web address to one(hopefully) thats
the new correct address.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/video/vga.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/video/vga.h b/include/video/vga.h
index b49a5120ca2d..2b8691f7d256 100644
--- a/include/video/vga.h
+++ b/include/video/vga.h
@@ -5,7 +5,7 @@
  * 
  * Copyright history from vga16fb.c:
  *	Copyright 1999 Ben Pfaff and Petr Vandrovec
- *	Based on VGA info at http://www.goodnet.com/~tinara/FreeVGA/home.htm
+ *	Based on VGA info at http://www.osdever.net/FreeVGA/home.htm 
  *	Based on VESA framebuffer (c) 1998 Gerd Knorr
  *
  * This file is subject to the terms and conditions of the GNU General
-- 
cgit v1.2.3


From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 29 Jul 2010 16:14:13 -0400
Subject: mac80211: support use of NAPI for bottom-half processing

This patch implement basic infrastructure to support use of NAPI by
mac80211-based hardware drivers.

Because mac80211 devices can support multiple netdevs, a dummy netdev
is used for interfacing with the NAPI code in the core of the network
stack.  That structure is hidden from the hardware drivers, but the
actual napi_struct is exposed in the ieee80211_hw structure so that the
poll routines in drivers can retrieve that structure.  Hardware drivers
can also specify their own weight value for NAPI polling.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 24 ++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  5 +++++
 net/mac80211/iface.c       |  4 ++++
 net/mac80211/main.c        | 30 ++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b0787a1dea90..3f1e03b521ec 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags {
  *
  * @max_rates: maximum number of alternate rate retry stages
  * @max_rate_tries: maximum number of tries for each stage
+ *
+ * @napi_weight: weight used for NAPI polling.  You must specify an
+ *	appropriate value here if a napi_poll operation is provided
+ *	by your driver.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -1113,6 +1117,7 @@ struct ieee80211_hw {
 	int channel_change_time;
 	int vif_data_size;
 	int sta_data_size;
+	int napi_weight;
 	u16 queues;
 	u16 max_listen_interval;
 	s8 max_signal;
@@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action {
  *	switch operation for CSAs received from the AP may implement this
  *	callback. They must then call ieee80211_chswitch_done() to indicate
  *	completion of the channel switch.
+ *
+ * @napi_poll: Poll Rx queue for incoming data frames.
  */
 struct ieee80211_ops {
 	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
@@ -1752,6 +1759,7 @@ struct ieee80211_ops {
 	void (*flush)(struct ieee80211_hw *hw, bool drop);
 	void (*channel_switch)(struct ieee80211_hw *hw,
 			       struct ieee80211_channel_switch *ch_switch);
+	int (*napi_poll)(struct ieee80211_hw *hw, int budget);
 };
 
 /**
@@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
  */
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 
+/** ieee80211_napi_schedule - schedule NAPI poll
+ *
+ * Use this function to schedule NAPI polling on a device.
+ *
+ * @hw: the hardware to start polling
+ */
+void ieee80211_napi_schedule(struct ieee80211_hw *hw);
+
+/** ieee80211_napi_complete - complete NAPI polling
+ *
+ * Use this function to finish NAPI polling on a device.
+ *
+ * @hw: the hardware to stop polling
+ */
+void ieee80211_napi_complete(struct ieee80211_hw *hw);
+
 /**
  * ieee80211_rx - receive frame
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c2975..79d56454484a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -870,6 +870,11 @@ struct ieee80211_local {
 		struct dentry *keys;
 	} debugfs;
 #endif
+
+	/* dummy netdev for use w/ NAPI */
+	struct net_device napi_dev;
+
+	struct napi_struct napi;
 };
 
 static inline struct ieee80211_sub_if_data *
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0b..c1008a9d7bfb 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -187,6 +187,8 @@ static int ieee80211_open(struct net_device *dev)
 		res = drv_start(local);
 		if (res)
 			goto err_del_bss;
+		if (local->ops->napi_poll)
+			napi_enable(&local->napi);
 		/* we're brought up, everything changes */
 		hw_reconf_flags = ~0;
 		ieee80211_led_radio(local, true);
@@ -519,6 +521,8 @@ static int ieee80211_stop(struct net_device *dev)
 	ieee80211_recalc_ps(local, -1);
 
 	if (local->open_count == 0) {
+		if (local->ops->napi_poll)
+			napi_disable(&local->napi);
 		ieee80211_clear_tx_pending(local);
 		ieee80211_stop_device(local);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 798a91b100cc..1ed956c9cb8b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -390,6 +390,30 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 }
 #endif
 
+static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct ieee80211_local *local =
+		container_of(napi, struct ieee80211_local, napi);
+
+	return local->ops->napi_poll(&local->hw, budget);
+}
+
+void ieee80211_napi_schedule(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	napi_schedule(&local->napi);
+}
+EXPORT_SYMBOL(ieee80211_napi_schedule);
+
+void ieee80211_napi_complete(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	napi_complete(&local->napi);
+}
+EXPORT_SYMBOL(ieee80211_napi_complete);
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -494,6 +518,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	skb_queue_head_init(&local->skb_queue);
 	skb_queue_head_init(&local->skb_queue_unreliable);
 
+	/* init dummy netdev for use w/ NAPI */
+	init_dummy_netdev(&local->napi_dev);
+
 	return local_to_hw(local);
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -683,6 +710,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_ifa;
 #endif
 
+	netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
+			local->hw.napi_weight);
+
 	return 0;
 
 #ifdef CONFIG_INET
-- 
cgit v1.2.3


From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Aug 2010 17:02:38 +0200
Subject: mac80211: per interface idle notification

Sometimes we don't just need to know whether or
not the device is idle, but also per interface.
This adds that reporting capability to mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  6 ++++++
 net/mac80211/ibss.c        |  8 +++++--
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/iface.c       | 53 +++++++++++++++++++++++++++++++++++++++-------
 net/mac80211/mlme.c        | 17 +++++++++++++--
 net/mac80211/scan.c        |  2 ++
 net/mac80211/work.c        |  8 +++----
 7 files changed, 81 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3f1e03b521ec..3a3c26f647b7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -149,6 +149,7 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed.
  * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note
  *	that it is only ever disabled for station mode.
+ * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface.
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -165,6 +166,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_IBSS		= 1<<11,
 	BSS_CHANGED_ARP_FILTER		= 1<<12,
 	BSS_CHANGED_QOS			= 1<<13,
+	BSS_CHANGED_IDLE		= 1<<14,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -223,6 +225,9 @@ enum ieee80211_bss_change {
  *	hardware must not perform any ARP filtering. Note, that the filter will
  *	be enabled also in promiscuous mode.
  * @qos: This is a QoS-enabled BSS.
+ * @idle: This interface is idle. There's also a global idle flag in the
+ *	hardware config which may be more appropriate depending on what
+ *	your driver/device needs to do.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -247,6 +252,7 @@ struct ieee80211_bss_conf {
 	u8 arp_addr_cnt;
 	bool arp_filter_enabled;
 	bool qos;
+	bool idle;
 };
 
 /**
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a7..32af97108425 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
 	sdata->u.ibss.ssid_len = params->ssid_len;
 
+	mutex_unlock(&sdata->u.ibss.mtx);
+
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&sdata->local->mtx);
 
 	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 
-	mutex_unlock(&sdata->u.ibss.mtx);
-
 	return 0;
 }
 
@@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 	mutex_unlock(&sdata->u.ibss.mtx);
 
+	mutex_lock(&local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&local->mtx);
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b44e03a02da9..98e783c6a363 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -497,6 +497,9 @@ struct ieee80211_sub_if_data {
 	 */
 	bool ht_opmode_valid;
 
+	/* to detect idle changes */
+	bool old_idle;
+
 	/* Fragment table for host-based reassembly */
 	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
 	unsigned int fragment_next;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c1008a9d7bfb..9459aeee0ddc 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -309,7 +309,9 @@ static int ieee80211_open(struct net_device *dev)
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_inc(&local->iff_promiscs);
 
+	mutex_lock(&local->mtx);
 	hw_reconf_flags |= __ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
 
 	local->open_count++;
 	if (hw_reconf_flags) {
@@ -516,7 +518,9 @@ static int ieee80211_stop(struct net_device *dev)
 
 	sdata->bss = NULL;
 
+	mutex_lock(&local->mtx);
 	hw_reconf_flags |= __ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
 
 	ieee80211_recalc_ps(local, -1);
 
@@ -1199,28 +1203,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 	int count = 0;
+	bool working = false, scanning = false;
+	struct ieee80211_work *wk;
 
-	if (!list_empty(&local->work_list))
-		return ieee80211_idle_off(local, "working");
-
-	if (local->scanning)
-		return ieee80211_idle_off(local, "scanning");
+#ifdef CONFIG_PROVE_LOCKING
+	WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
+		!lockdep_is_held(&local->iflist_mtx));
+#endif
+	lockdep_assert_held(&local->mtx);
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
+		if (!ieee80211_sdata_running(sdata)) {
+			sdata->vif.bss_conf.idle = true;
 			continue;
+		}
+
+		sdata->old_idle = sdata->vif.bss_conf.idle;
+
 		/* do not count disabled managed interfaces */
 		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-		    !sdata->u.mgd.associated)
+		    !sdata->u.mgd.associated) {
+			sdata->vif.bss_conf.idle = true;
 			continue;
+		}
 		/* do not count unused IBSS interfaces */
 		if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
-		    !sdata->u.ibss.ssid_len)
+		    !sdata->u.ibss.ssid_len) {
+			sdata->vif.bss_conf.idle = true;
 			continue;
+		}
 		/* count everything else */
 		count++;
 	}
 
+	list_for_each_entry(wk, &local->work_list, list) {
+		working = true;
+		wk->sdata->vif.bss_conf.idle = false;
+	}
+
+	if (local->scan_sdata) {
+		scanning = true;
+		local->scan_sdata->vif.bss_conf.idle = false;
+	}
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata->old_idle == sdata->vif.bss_conf.idle)
+			continue;
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
+	}
+
+	if (working)
+		return ieee80211_idle_off(local, "working");
+	if (scanning)
+		return ieee80211_idle_off(local, "scanning");
 	if (!count)
 		return ieee80211_idle_on(local);
 	else
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 17e9257a61d8..82e7cec5179c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1103,8 +1103,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
 
 	ieee80211_set_disassoc(sdata, true);
-	ieee80211_recalc_idle(local);
 	mutex_unlock(&ifmgd->mtx);
+
+	mutex_lock(&local->mtx);
+	ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
 	/*
 	 * must be outside lock due to cfg80211,
 	 * but that's not a problem.
@@ -1173,7 +1176,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 			sdata->name, bssid, reason_code);
 
 	ieee80211_set_disassoc(sdata, true);
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&sdata->local->mtx);
 
 	return RX_MGMT_CFG80211_DEAUTH;
 }
@@ -1203,7 +1208,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 			sdata->name, mgmt->sa, reason_code);
 
 	ieee80211_set_disassoc(sdata, true);
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&sdata->local->mtx);
 	return RX_MGMT_CFG80211_DISASSOC;
 }
 
@@ -1840,8 +1847,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				" after %dms, disconnecting.\n",
 				bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
 			ieee80211_set_disassoc(sdata, true);
-			ieee80211_recalc_idle(local);
 			mutex_unlock(&ifmgd->mtx);
+			mutex_lock(&local->mtx);
+			ieee80211_recalc_idle(local);
+			mutex_unlock(&local->mtx);
 			/*
 			 * must be outside lock due to cfg80211,
 			 * but that's not a problem.
@@ -2319,7 +2328,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	if (assoc_bss)
 		sta_info_destroy_addr(sdata, bssid);
 
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&sdata->local->mtx);
 
 	return 0;
 }
@@ -2357,7 +2368,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			cookie, !req->local_state_change);
 	sta_info_destroy_addr(sdata, bssid);
 
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_recalc_idle(sdata->local);
+	mutex_unlock(&sdata->local->mtx);
 
 	return 0;
 }
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f31f549733b1..31f233f7f51a 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -304,7 +304,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	ieee80211_offchannel_return(local, true);
 
  done:
+	mutex_lock(&local->mtx);
 	ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
 	ieee80211_mlme_notify_scan_completed(local);
 	ieee80211_ibss_notify_scan_completed(local);
 	ieee80211_mesh_notify_scan_completed(local);
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index b98af64f5862..ae344d1ba056 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -888,10 +888,10 @@ static void ieee80211_work_work(struct work_struct *work)
 	while ((skb = skb_dequeue(&local->work_skb_queue)))
 		ieee80211_work_rx_queued_mgmt(local, skb);
 
-	ieee80211_recalc_idle(local);
-
 	mutex_lock(&local->mtx);
 
+	ieee80211_recalc_idle(local);
+
 	list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
 		bool started = wk->started;
 
@@ -1001,10 +1001,10 @@ static void ieee80211_work_work(struct work_struct *work)
 					     &local->scan_work,
 					     round_jiffies_relative(0));
 
-	mutex_unlock(&local->mtx);
-
 	ieee80211_recalc_idle(local);
 
+	mutex_unlock(&local->mtx);
+
 	list_for_each_entry_safe(wk, tmp, &free_work, list) {
 		wk->done(wk, NULL);
 		list_del(&wk->list);
-- 
cgit v1.2.3


From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Aug 2010 17:05:55 +0200
Subject: mac80211: allow drivers to request SM PS mode change

Sometimes drivers have more information than the
stack about how their antennas/chains are used,
and may require that the SM PS mode be changed.
This could happen, for example, when detecting
that the user disconnected an antenna. Thus this
patch introduces API to allow drivers to request
SM PS mode changes.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 12 ++++++++++++
 net/mac80211/ht.c          | 28 ++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  6 +++++-
 net/mac80211/mlme.c        |  3 +++
 4 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a3c26f647b7..871ed1de736a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
  */
 void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
 
+/**
+ * ieee80211_request_smps - request SM PS transition
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @mode: new SM PS mode
+ *
+ * This allows the driver to request an SM PS transition in managed
+ * mode. This is useful when the driver has more information than
+ * the stack about possible interference, for example by bluetooth.
+ */
+void ieee80211_request_smps(struct ieee80211_vif *vif,
+			    enum ieee80211_smps_mode smps_mode);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb33861..11f74f5f7b2f 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 
 	return 0;
 }
+
+void ieee80211_request_smps_work(struct work_struct *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data,
+			     u.mgd.request_smps_work);
+
+	mutex_lock(&sdata->u.mgd.mtx);
+	__ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
+	mutex_unlock(&sdata->u.mgd.mtx);
+}
+
+void ieee80211_request_smps(struct ieee80211_vif *vif,
+			    enum ieee80211_smps_mode smps_mode)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return;
+
+	if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
+		smps_mode = IEEE80211_SMPS_AUTOMATIC;
+
+	ieee80211_queue_work(&sdata->local->hw,
+			     &sdata->u.mgd.request_smps_work);
+}
+/* this might change ... don't want non-open drivers using it */
+EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 98e783c6a363..1bf05bfd149d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -343,7 +343,10 @@ struct ieee80211_if_managed {
 	unsigned long timers_running; /* used for quiesce/restart */
 	bool powersave; /* powersave requested for this iface */
 	enum ieee80211_smps_mode req_smps, /* requested smps mode */
-				 ap_smps; /* smps mode AP thinks we're in */
+				 ap_smps, /* smps mode AP thinks we're in */
+				 driver_smps_mode; /* smps mode request */
+
+	struct work_struct request_smps_work;
 
 	unsigned int flags;
 
@@ -1113,6 +1116,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid);
+void ieee80211_request_smps_work(struct work_struct *work);
 
 void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				     u16 initiator, u16 reason);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 82e7cec5179c..38996a44aa8e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1926,6 +1926,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	 * time -- the code here is properly synchronised.
 	 */
 
+	cancel_work_sync(&ifmgd->request_smps_work);
+
 	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
 	if (del_timer_sync(&ifmgd->timer))
 		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1961,6 +1963,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
 	INIT_WORK(&ifmgd->beacon_connection_loss_work,
 		  ieee80211_beacon_connection_loss_work);
+	INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
 	setup_timer(&ifmgd->timer, ieee80211_sta_timer,
 		    (unsigned long) sdata);
 	setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
-- 
cgit v1.2.3


From 04600794958f1833f5571c6cde40f260ab557f55 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Aug 2010 17:45:15 +0200
Subject: cfg80211: support sysfs namespaces

Enable using network namespaces with
wireless devices even when sysfs is
enabled using the same infrastructure
that was built for netdevs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/netdevice.h | 2 ++
 net/core/net-sysfs.c      | 3 ++-
 net/wireless/core.c       | 7 ++++++-
 net/wireless/sysfs.c      | 9 +++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 46c36ffe20ee..a4b14fd81c6a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2171,6 +2171,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v);
 extern int netdev_class_create_file(struct class_attribute *class_attr);
 extern void netdev_class_remove_file(struct class_attribute *class_attr);
 
+extern struct kobj_ns_type_operations net_ns_type_operations;
+
 extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len);
 
 extern void linkwatch_run_queue(void);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a0..7d748542d97e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -789,12 +789,13 @@ static const void *net_netlink_ns(struct sock *sk)
 	return sock_net(sk);
 }
 
-static struct kobj_ns_type_operations net_ns_type_operations = {
+struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
 	.current_ns = net_current_ns,
 	.netlink_ns = net_netlink_ns,
 	.initial_ns = net_initial_ns,
 };
+EXPORT_SYMBOL_GPL(net_ns_type_operations);
 
 static void net_kobj_ns_exit(struct net *net)
 {
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 541e2fff5e9c..c70909c3eae4 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 			WARN_ON(err);
 			wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
 		}
+
+		return err;
 	}
 
 	wiphy_net_set(&rdev->wiphy, net);
 
-	return err;
+	err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
+	WARN_ON(err);
+
+	return 0;
 }
 
 static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca0..74a9e3cce452 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev)
 	return ret;
 }
 
+static const void *wiphy_namespace(struct device *d)
+{
+	struct wiphy *wiphy = container_of(d, struct wiphy, dev);
+
+	return wiphy_net(wiphy);
+}
+
 struct class ieee80211_class = {
 	.name = "ieee80211",
 	.owner = THIS_MODULE,
@@ -120,6 +127,8 @@ struct class ieee80211_class = {
 #endif
 	.suspend = wiphy_suspend,
 	.resume = wiphy_resume,
+	.ns_type = &net_ns_type_operations,
+	.namespace = wiphy_namespace,
 };
 
 int wiphy_sysfs_init(void)
-- 
cgit v1.2.3


From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 10 Aug 2010 09:46:38 +0200
Subject: mac80211: use cipher suite selectors

Currently, mac80211 translates the cfg80211
cipher suite selectors into ALG_* values.
That isn't all too useful, and some drivers
benefit from the distinction between WEP40
and WEP104 as well. Therefore, convert it
all to use the cipher suite selectors.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Gertjan van Wingerde <gwingerde@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/at76c50x-usb.c           |  7 ++--
 drivers/net/wireless/ath/ar9170/main.c        | 31 ++++++++-------
 drivers/net/wireless/ath/ath5k/base.c         |  9 +++--
 drivers/net/wireless/ath/ath5k/pcu.c          | 19 ++++-----
 drivers/net/wireless/ath/ath9k/common.c       | 36 ++++++++++--------
 drivers/net/wireless/ath/ath9k/htc_drv_main.c |  5 ++-
 drivers/net/wireless/ath/ath9k/main.c         |  5 ++-
 drivers/net/wireless/b43/main.c               | 16 ++++----
 drivers/net/wireless/iwlwifi/iwl-agn-tx.c     | 16 ++++----
 drivers/net/wireless/iwlwifi/iwl-agn.c        |  4 +-
 drivers/net/wireless/iwlwifi/iwl-dev.h        |  2 +-
 drivers/net/wireless/iwlwifi/iwl-sta.c        | 24 ++++++------
 drivers/net/wireless/iwlwifi/iwl3945-base.c   | 43 ++++++++++++---------
 drivers/net/wireless/p54/main.c               |  9 +++--
 drivers/net/wireless/p54/txrx.c               | 17 +++++----
 drivers/net/wireless/rt2x00/rt2500usb.c       |  4 +-
 drivers/net/wireless/rt2x00/rt2x00crypto.c    | 17 ++++-----
 drivers/net/wireless/wl12xx/wl1251_main.c     | 13 ++++---
 drivers/net/wireless/wl12xx/wl1251_tx.c       |  4 +-
 drivers/net/wireless/wl12xx/wl1271_main.c     | 13 ++++---
 drivers/net/wireless/wl12xx/wl1271_tx.c       |  4 +-
 include/net/mac80211.h                        | 18 +--------
 net/mac80211/cfg.c                            | 44 ++++++---------------
 net/mac80211/debugfs_key.c                    | 55 +++++++++++----------------
 net/mac80211/driver-trace.h                   |  4 +-
 net/mac80211/key.c                            | 25 ++++++------
 net/mac80211/key.h                            |  4 +-
 net/mac80211/rx.c                             | 18 +++++----
 net/mac80211/tx.c                             | 22 ++++++-----
 net/mac80211/wep.c                            |  2 +-
 net/mac80211/wpa.c                            |  6 +--
 31 files changed, 236 insertions(+), 260 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index d5140a87f073..a267bf55574c 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -2061,11 +2061,12 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 
 	int i;
 
-	at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d "
+	at76_dbg(DBG_MAC80211, "%s(): cmd %d key->cipher %d key->keyidx %d "
 		 "key->keylen %d",
-		 __func__, cmd, key->alg, key->keyidx, key->keylen);
+		 __func__, cmd, key->cipher, key->keyidx, key->keylen);
 
-	if (key->alg != ALG_WEP)
+	if ((key->cipher != WLAN_CIPHER_SUITE_WEP40) &&
+	    (key->cipher != WLAN_CIPHER_SUITE_WEP104))
 		return -EOPNOTSUPP;
 
 	key->hw_key_idx = key->keyidx;
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index c67b05f3bcbd..29d2253ad7e4 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -1190,14 +1190,13 @@ static int ar9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb)
 	if (info->control.hw_key) {
 		icv = info->control.hw_key->icv_len;
 
-		switch (info->control.hw_key->alg) {
-		case ALG_WEP:
+		switch (info->control.hw_key->cipher) {
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
+		case WLAN_CIPHER_SUITE_TKIP:
 			keytype = AR9170_TX_MAC_ENCR_RC4;
 			break;
-		case ALG_TKIP:
-			keytype = AR9170_TX_MAC_ENCR_RC4;
-			break;
-		case ALG_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP:
 			keytype = AR9170_TX_MAC_ENCR_AES;
 			break;
 		default:
@@ -1778,17 +1777,17 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	if ((!ar->vif) || (ar->disable_offload))
 		return -EOPNOTSUPP;
 
-	switch (key->alg) {
-	case ALG_WEP:
-		if (key->keylen == WLAN_KEY_LEN_WEP40)
-			ktype = AR9170_ENC_ALG_WEP64;
-		else
-			ktype = AR9170_ENC_ALG_WEP128;
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		ktype = AR9170_ENC_ALG_WEP64;
+		break;
+	case WLAN_CIPHER_SUITE_WEP104:
+		ktype = AR9170_ENC_ALG_WEP128;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		ktype = AR9170_ENC_ALG_TKIP;
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		ktype = AR9170_ENC_ALG_AESCCMP;
 		break;
 	default:
@@ -1827,7 +1826,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		if (err)
 			goto out;
 
-		if (key->alg == ALG_TKIP) {
+		if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 			err = ar9170_upload_key(ar, i, sta ? sta->addr : NULL,
 						ktype, 1, key->key + 16, 16);
 			if (err)
@@ -1864,7 +1863,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			if (err)
 				goto out;
 
-			if (key->alg == ALG_TKIP) {
+			if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 				err = ar9170_upload_key(ar, key->hw_key_idx,
 							NULL,
 							AR9170_ENC_ALG_NONE, 1,
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 373dcfec689c..a729b8774670 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -3297,11 +3297,12 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	if (sc->opmode == NL80211_IFTYPE_AP)
 		return -EOPNOTSUPP;
 
-	switch (key->alg) {
-	case ALG_WEP:
-	case ALG_TKIP:
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+	case WLAN_CIPHER_SUITE_TKIP:
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		if (sc->ah->ah_aes_support)
 			break;
 
diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c
index 86fdb6ddfaaa..af273358c7cb 100644
--- a/drivers/net/wireless/ath/ath5k/pcu.c
+++ b/drivers/net/wireless/ath/ath5k/pcu.c
@@ -695,21 +695,18 @@ int ath5k_hw_reset_key(struct ath5k_hw *ah, u16 entry)
 static
 int ath5k_keycache_type(const struct ieee80211_key_conf *key)
 {
-	switch (key->alg) {
-	case ALG_TKIP:
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_TKIP:
 		return AR5K_KEYTABLE_TYPE_TKIP;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		return AR5K_KEYTABLE_TYPE_CCM;
-	case ALG_WEP:
-		if (key->keylen == WLAN_KEY_LEN_WEP40)
-			return AR5K_KEYTABLE_TYPE_40;
-		else if (key->keylen == WLAN_KEY_LEN_WEP104)
-			return AR5K_KEYTABLE_TYPE_104;
-		return -EINVAL;
+	case WLAN_CIPHER_SUITE_WEP40:
+		return AR5K_KEYTABLE_TYPE_40;
+	case WLAN_CIPHER_SUITE_WEP104:
+		return AR5K_KEYTABLE_TYPE_104;
 	default:
 		return -EINVAL;
 	}
-	return -EINVAL;
 }
 
 /*
@@ -728,7 +725,7 @@ int ath5k_hw_set_key(struct ath5k_hw *ah, u16 entry,
 	bool is_tkip;
 	const u8 *key_ptr;
 
-	is_tkip = (key->alg == ALG_TKIP);
+	is_tkip = (key->cipher == WLAN_CIPHER_SUITE_TKIP);
 
 	/*
 	 * key->keylen comes in from mac80211 in bytes.
diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c
index c86f7d3593ab..3100c87a4fcd 100644
--- a/drivers/net/wireless/ath/ath9k/common.c
+++ b/drivers/net/wireless/ath/ath9k/common.c
@@ -46,12 +46,17 @@ int ath9k_cmn_get_hw_crypto_keytype(struct sk_buff *skb)
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 
 	if (tx_info->control.hw_key) {
-		if (tx_info->control.hw_key->alg == ALG_WEP)
+		switch (tx_info->control.hw_key->cipher) {
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
 			return ATH9K_KEY_TYPE_WEP;
-		else if (tx_info->control.hw_key->alg == ALG_TKIP)
+		case WLAN_CIPHER_SUITE_TKIP:
 			return ATH9K_KEY_TYPE_TKIP;
-		else if (tx_info->control.hw_key->alg == ALG_CCMP)
+		case WLAN_CIPHER_SUITE_CCMP:
 			return ATH9K_KEY_TYPE_AES;
+		default:
+			break;
+		}
 	}
 
 	return ATH9K_KEY_TYPE_CLEAR;
@@ -212,11 +217,11 @@ static int ath_reserve_key_cache_slot_tkip(struct ath_common *common)
 }
 
 static int ath_reserve_key_cache_slot(struct ath_common *common,
-				      enum ieee80211_key_alg alg)
+				      u32 cipher)
 {
 	int i;
 
-	if (alg == ALG_TKIP)
+	if (cipher == WLAN_CIPHER_SUITE_TKIP)
 		return ath_reserve_key_cache_slot_tkip(common);
 
 	/* First, try to find slots that would not be available for TKIP. */
@@ -293,14 +298,15 @@ int ath9k_cmn_key_config(struct ath_common *common,
 
 	memset(&hk, 0, sizeof(hk));
 
-	switch (key->alg) {
-	case ALG_WEP:
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		hk.kv_type = ATH9K_CIPHER_WEP;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		hk.kv_type = ATH9K_CIPHER_TKIP;
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		hk.kv_type = ATH9K_CIPHER_AES_CCM;
 		break;
 	default:
@@ -316,7 +322,7 @@ int ath9k_cmn_key_config(struct ath_common *common,
 			memcpy(gmac, vif->addr, ETH_ALEN);
 			gmac[0] |= 0x01;
 			mac = gmac;
-			idx = ath_reserve_key_cache_slot(common, key->alg);
+			idx = ath_reserve_key_cache_slot(common, key->cipher);
 			break;
 		case NL80211_IFTYPE_ADHOC:
 			if (!sta) {
@@ -326,7 +332,7 @@ int ath9k_cmn_key_config(struct ath_common *common,
 			memcpy(gmac, sta->addr, ETH_ALEN);
 			gmac[0] |= 0x01;
 			mac = gmac;
-			idx = ath_reserve_key_cache_slot(common, key->alg);
+			idx = ath_reserve_key_cache_slot(common, key->cipher);
 			break;
 		default:
 			idx = key->keyidx;
@@ -348,13 +354,13 @@ int ath9k_cmn_key_config(struct ath_common *common,
 			return -EOPNOTSUPP;
 		mac = sta->addr;
 
-		idx = ath_reserve_key_cache_slot(common, key->alg);
+		idx = ath_reserve_key_cache_slot(common, key->cipher);
 	}
 
 	if (idx < 0)
 		return -ENOSPC; /* no free key cache entries */
 
-	if (key->alg == ALG_TKIP)
+	if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
 		ret = ath_setkey_tkip(common, idx, key->key, &hk, mac,
 				      vif->type == NL80211_IFTYPE_AP);
 	else
@@ -364,7 +370,7 @@ int ath9k_cmn_key_config(struct ath_common *common,
 		return -EIO;
 
 	set_bit(idx, common->keymap);
-	if (key->alg == ALG_TKIP) {
+	if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 		set_bit(idx + 64, common->keymap);
 		if (common->splitmic) {
 			set_bit(idx + 32, common->keymap);
@@ -389,7 +395,7 @@ void ath9k_cmn_key_delete(struct ath_common *common,
 		return;
 
 	clear_bit(key->hw_key_idx, common->keymap);
-	if (key->alg != ALG_TKIP)
+	if (key->cipher != WLAN_CIPHER_SUITE_TKIP)
 		return;
 
 	clear_bit(key->hw_key_idx + 64, common->keymap);
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 7d09b4b17bbd..4e345be62435 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1585,9 +1585,10 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw,
 			key->hw_key_idx = ret;
 			/* push IV and Michael MIC generation to stack */
 			key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
-			if (key->alg == ALG_TKIP)
+			if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
 				key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
-			if (priv->ah->sw_mgmt_crypto && key->alg == ALG_CCMP)
+			if (priv->ah->sw_mgmt_crypto &&
+			    key->cipher == WLAN_CIPHER_SUITE_CCMP)
 				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT;
 			ret = 0;
 		}
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index a3b0ea90439d..1165f909ef04 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1782,9 +1782,10 @@ static int ath9k_set_key(struct ieee80211_hw *hw,
 			key->hw_key_idx = ret;
 			/* push IV and Michael MIC generation to stack */
 			key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
-			if (key->alg == ALG_TKIP)
+			if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
 				key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
-			if (sc->sc_ah->sw_mgmt_crypto && key->alg == ALG_CCMP)
+			if (sc->sc_ah->sw_mgmt_crypto &&
+			    key->cipher == WLAN_CIPHER_SUITE_CCMP)
 				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT;
 			ret = 0;
 		}
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index a3e2f2bfe3a7..a1186525c70d 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3759,17 +3759,17 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	}
 
 	err = -EINVAL;
-	switch (key->alg) {
-	case ALG_WEP:
-		if (key->keylen == WLAN_KEY_LEN_WEP40)
-			algorithm = B43_SEC_ALGO_WEP40;
-		else
-			algorithm = B43_SEC_ALGO_WEP104;
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		algorithm = B43_SEC_ALGO_WEP40;
+		break;
+	case WLAN_CIPHER_SUITE_WEP104:
+		algorithm = B43_SEC_ALGO_WEP104;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		algorithm = B43_SEC_ALGO_TKIP;
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		algorithm = B43_SEC_ALGO_AES;
 		break;
 	default:
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
index 69155aa448fb..3fc982e87921 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
@@ -470,8 +470,8 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv,
 {
 	struct ieee80211_key_conf *keyconf = info->control.hw_key;
 
-	switch (keyconf->alg) {
-	case ALG_CCMP:
+	switch (keyconf->cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		tx_cmd->sec_ctl = TX_CMD_SEC_CCM;
 		memcpy(tx_cmd->key, keyconf->key, keyconf->keylen);
 		if (info->flags & IEEE80211_TX_CTL_AMPDU)
@@ -479,20 +479,20 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv,
 		IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n");
 		break;
 
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		tx_cmd->sec_ctl = TX_CMD_SEC_TKIP;
 		ieee80211_get_tkip_key(keyconf, skb_frag,
 			IEEE80211_TKIP_P2_KEY, tx_cmd->key);
 		IWL_DEBUG_TX(priv, "tx_cmd with tkip hwcrypto\n");
 		break;
 
-	case ALG_WEP:
+	case WLAN_CIPHER_SUITE_WEP104:
+		tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128;
+		/* fall through */
+	case WLAN_CIPHER_SUITE_WEP40:
 		tx_cmd->sec_ctl |= (TX_CMD_SEC_WEP |
 			(keyconf->keyidx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT);
 
-		if (keyconf->keylen == WEP_KEY_LEN_128)
-			tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128;
-
 		memcpy(&tx_cmd->key[3], keyconf->key, keyconf->keylen);
 
 		IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption "
@@ -500,7 +500,7 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv,
 		break;
 
 	default:
-		IWL_ERR(priv, "Unknown encode alg %d\n", keyconf->alg);
+		IWL_ERR(priv, "Unknown encode cipher %x\n", keyconf->cipher);
 		break;
 	}
 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 43e078b87378..21a52ae05c0d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -3389,7 +3389,9 @@ static int iwl_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	 * in 1X mode.
 	 * In legacy wep mode, we use another host command to the uCode.
 	 */
-	if (key->alg == ALG_WEP && !sta && vif->type != NL80211_IFTYPE_AP) {
+	if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 ||
+	     key->cipher == WLAN_CIPHER_SUITE_WEP104) &&
+	    !sta && vif->type != NL80211_IFTYPE_AP) {
 		if (cmd == SET_KEY)
 			is_default_wep_key = !priv->key_mapping_key;
 		else
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 599635547021..149619fb1c07 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -420,7 +420,7 @@ struct iwl_tid_data {
 };
 
 struct iwl_hw_key {
-	enum ieee80211_key_alg alg;
+	u32 cipher;
 	int keylen;
 	u8 keyidx;
 	u8 key[32];
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index 7e0829be5e78..d5e8db37b86e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -818,7 +818,7 @@ int iwl_set_default_wep_key(struct iwl_priv *priv,
 
 	keyconf->flags &= ~IEEE80211_KEY_FLAG_GENERATE_IV;
 	keyconf->hw_key_idx = HW_KEY_DEFAULT;
-	priv->stations[IWL_AP_ID].keyinfo.alg = ALG_WEP;
+	priv->stations[IWL_AP_ID].keyinfo.cipher = keyconf->cipher;
 
 	priv->wep_keys[keyconf->keyidx].key_size = keyconf->keylen;
 	memcpy(&priv->wep_keys[keyconf->keyidx].key, &keyconf->key,
@@ -856,7 +856,7 @@ static int iwl_set_wep_dynamic_key_info(struct iwl_priv *priv,
 
 	spin_lock_irqsave(&priv->sta_lock, flags);
 
-	priv->stations[sta_id].keyinfo.alg = keyconf->alg;
+	priv->stations[sta_id].keyinfo.cipher = keyconf->cipher;
 	priv->stations[sta_id].keyinfo.keylen = keyconf->keylen;
 	priv->stations[sta_id].keyinfo.keyidx = keyconf->keyidx;
 
@@ -906,7 +906,7 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv,
 	keyconf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
 
 	spin_lock_irqsave(&priv->sta_lock, flags);
-	priv->stations[sta_id].keyinfo.alg = keyconf->alg;
+	priv->stations[sta_id].keyinfo.cipher = keyconf->cipher;
 	priv->stations[sta_id].keyinfo.keylen = keyconf->keylen;
 
 	memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key,
@@ -955,7 +955,7 @@ static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv,
 
 	spin_lock_irqsave(&priv->sta_lock, flags);
 
-	priv->stations[sta_id].keyinfo.alg = keyconf->alg;
+	priv->stations[sta_id].keyinfo.cipher = keyconf->cipher;
 	priv->stations[sta_id].keyinfo.keylen = 16;
 
 	if ((priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_ENCRYPT_MSK)
@@ -1090,24 +1090,26 @@ int iwl_set_dynamic_key(struct iwl_priv *priv,
 	priv->key_mapping_key++;
 	keyconf->hw_key_idx = HW_KEY_DYNAMIC;
 
-	switch (keyconf->alg) {
-	case ALG_CCMP:
+	switch (keyconf->cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		ret = iwl_set_ccmp_dynamic_key_info(priv, keyconf, sta_id);
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		ret = iwl_set_tkip_dynamic_key_info(priv, keyconf, sta_id);
 		break;
-	case ALG_WEP:
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		ret = iwl_set_wep_dynamic_key_info(priv, keyconf, sta_id);
 		break;
 	default:
 		IWL_ERR(priv,
-			"Unknown alg: %s alg = %d\n", __func__, keyconf->alg);
+			"Unknown alg: %s cipher = %x\n", __func__,
+			keyconf->cipher);
 		ret = -EINVAL;
 	}
 
-	IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n",
-		      keyconf->alg, keyconf->keylen, keyconf->keyidx,
+	IWL_DEBUG_WEP(priv, "Set dynamic key: cipher=%x len=%d idx=%d sta=%d ret=%d\n",
+		      keyconf->cipher, keyconf->keylen, keyconf->keyidx,
 		      sta_id, ret);
 
 	return ret;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index b14eaf91c7c2..faa2e0037e10 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -152,7 +152,7 @@ static int iwl3945_set_ccmp_dynamic_key_info(struct iwl_priv *priv,
 	key_flags &= ~STA_KEY_FLG_INVALID;
 
 	spin_lock_irqsave(&priv->sta_lock, flags);
-	priv->stations[sta_id].keyinfo.alg = keyconf->alg;
+	priv->stations[sta_id].keyinfo.cipher = keyconf->cipher;
 	priv->stations[sta_id].keyinfo.keylen = keyconf->keylen;
 	memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key,
 	       keyconf->keylen);
@@ -223,23 +223,25 @@ static int iwl3945_set_dynamic_key(struct iwl_priv *priv,
 
 	keyconf->hw_key_idx = HW_KEY_DYNAMIC;
 
-	switch (keyconf->alg) {
-	case ALG_CCMP:
+	switch (keyconf->cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		ret = iwl3945_set_ccmp_dynamic_key_info(priv, keyconf, sta_id);
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		ret = iwl3945_set_tkip_dynamic_key_info(priv, keyconf, sta_id);
 		break;
-	case ALG_WEP:
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		ret = iwl3945_set_wep_dynamic_key_info(priv, keyconf, sta_id);
 		break;
 	default:
-		IWL_ERR(priv, "Unknown alg: %s alg = %d\n", __func__, keyconf->alg);
+		IWL_ERR(priv, "Unknown alg: %s alg=%x\n", __func__,
+			keyconf->cipher);
 		ret = -EINVAL;
 	}
 
-	IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n",
-		      keyconf->alg, keyconf->keylen, keyconf->keyidx,
+	IWL_DEBUG_WEP(priv, "Set dynamic key: alg=%x len=%d idx=%d sta=%d ret=%d\n",
+		      keyconf->cipher, keyconf->keylen, keyconf->keyidx,
 		      sta_id, ret);
 
 	return ret;
@@ -255,10 +257,11 @@ static int iwl3945_remove_static_key(struct iwl_priv *priv)
 static int iwl3945_set_static_key(struct iwl_priv *priv,
 				struct ieee80211_key_conf *key)
 {
-	if (key->alg == ALG_WEP)
+	if (key->cipher == WLAN_CIPHER_SUITE_WEP40 ||
+	    key->cipher == WLAN_CIPHER_SUITE_WEP104)
 		return -EOPNOTSUPP;
 
-	IWL_ERR(priv, "Static key invalid: alg %d\n", key->alg);
+	IWL_ERR(priv, "Static key invalid: cipher %x\n", key->cipher);
 	return -EINVAL;
 }
 
@@ -370,23 +373,25 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv,
 	struct iwl3945_tx_cmd *tx_cmd = (struct iwl3945_tx_cmd *)cmd->cmd.payload;
 	struct iwl_hw_key *keyinfo = &priv->stations[sta_id].keyinfo;
 
-	switch (keyinfo->alg) {
-	case ALG_CCMP:
+	tx_cmd->sec_ctl = 0;
+
+	switch (keyinfo->cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		tx_cmd->sec_ctl = TX_CMD_SEC_CCM;
 		memcpy(tx_cmd->key, keyinfo->key, keyinfo->keylen);
 		IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n");
 		break;
 
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		break;
 
-	case ALG_WEP:
-		tx_cmd->sec_ctl = TX_CMD_SEC_WEP |
+	case WLAN_CIPHER_SUITE_WEP104:
+		tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128;
+		/* fall through */
+	case WLAN_CIPHER_SUITE_WEP40:
+		tx_cmd->sec_ctl |= TX_CMD_SEC_WEP |
 		    (info->control.hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT;
 
-		if (keyinfo->keylen == 13)
-			tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128;
-
 		memcpy(&tx_cmd->key[3], keyinfo->key, keyinfo->keylen);
 
 		IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption "
@@ -394,7 +399,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv,
 		break;
 
 	default:
-		IWL_ERR(priv, "Unknown encode alg %d\n", keyinfo->alg);
+		IWL_ERR(priv, "Unknown encode cipher %x\n", keyinfo->cipher);
 		break;
 	}
 }
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 47db439b63bf..622d27b6d8f2 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -429,8 +429,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 
 	mutex_lock(&priv->conf_mutex);
 	if (cmd == SET_KEY) {
-		switch (key->alg) {
-		case ALG_TKIP:
+		switch (key->cipher) {
+		case WLAN_CIPHER_SUITE_TKIP:
 			if (!(priv->privacy_caps & (BR_DESC_PRIV_CAP_MICHAEL |
 			      BR_DESC_PRIV_CAP_TKIP))) {
 				ret = -EOPNOTSUPP;
@@ -439,7 +439,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 			key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
 			algo = P54_CRYPTO_TKIPMICHAEL;
 			break;
-		case ALG_WEP:
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
 			if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_WEP)) {
 				ret = -EOPNOTSUPP;
 				goto out_unlock;
@@ -447,7 +448,7 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 			key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
 			algo = P54_CRYPTO_WEP;
 			break;
-		case ALG_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP:
 			if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_AESCCMP)) {
 				ret = -EOPNOTSUPP;
 				goto out_unlock;
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 427b46f558ed..e53f8cec7798 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -683,14 +683,15 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb,
 	}
 }
 
-static u8 p54_convert_algo(enum ieee80211_key_alg alg)
+static u8 p54_convert_algo(u32 cipher)
 {
-	switch (alg) {
-	case ALG_WEP:
+	switch (cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		return P54_CRYPTO_WEP;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		return P54_CRYPTO_TKIPMICHAEL;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		return P54_CRYPTO_AESCCMP;
 	default:
 		return 0;
@@ -731,7 +732,7 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb)
 
 	if (info->control.hw_key) {
 		crypt_offset = ieee80211_get_hdrlen_from_skb(skb);
-		if (info->control.hw_key->alg == ALG_TKIP) {
+		if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 			u8 *iv = (u8 *)(skb->data + crypt_offset);
 			/*
 			 * The firmware excepts that the IV has to have
@@ -827,10 +828,10 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb)
 	hdr->tries = ridx;
 	txhdr->rts_rate_idx = 0;
 	if (info->control.hw_key) {
-		txhdr->key_type = p54_convert_algo(info->control.hw_key->alg);
+		txhdr->key_type = p54_convert_algo(info->control.hw_key->cipher);
 		txhdr->key_len = min((u8)16, info->control.hw_key->keylen);
 		memcpy(txhdr->key, info->control.hw_key->key, txhdr->key_len);
-		if (info->control.hw_key->alg == ALG_TKIP) {
+		if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 			/* reserve space for the MIC key */
 			len += 8;
 			memcpy(skb_put(skb, 8), &(info->control.hw_key->key
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index cdaf93f48263..97cf72f8bc12 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -355,7 +355,9 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev,
 		 * it is known that not work at least on some hardware.
 		 * SW crypto will be used in that case.
 		 */
-		if (key->alg == ALG_WEP && key->keyidx != 0)
+		if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 ||
+		     key->cipher == WLAN_CIPHER_SUITE_WEP104) &&
+		    key->keyidx != 0)
 			return -EOPNOTSUPP;
 
 		/*
diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c
index 583dacd8d241..5e9074bf2b8e 100644
--- a/drivers/net/wireless/rt2x00/rt2x00crypto.c
+++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c
@@ -31,15 +31,14 @@
 
 enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key)
 {
-	switch (key->alg) {
-	case ALG_WEP:
-		if (key->keylen == WLAN_KEY_LEN_WEP40)
-			return CIPHER_WEP64;
-		else
-			return CIPHER_WEP128;
-	case ALG_TKIP:
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		return CIPHER_WEP64;
+	case WLAN_CIPHER_SUITE_WEP104:
+		return CIPHER_WEP128;
+	case WLAN_CIPHER_SUITE_TKIP:
 		return CIPHER_TKIP;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		return CIPHER_AES;
 	default:
 		return CIPHER_NONE;
@@ -95,7 +94,7 @@ unsigned int rt2x00crypto_tx_overhead(struct rt2x00_dev *rt2x00dev,
 		overhead += key->iv_len;
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
-		if (key->alg == ALG_TKIP)
+		if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
 			overhead += 8;
 	}
 
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 861a5f33761e..6d31c855fcfe 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -725,8 +725,9 @@ static int wl1251_set_key_type(struct wl1251 *wl,
 			       struct ieee80211_key_conf *mac80211_key,
 			       const u8 *addr)
 {
-	switch (mac80211_key->alg) {
-	case ALG_WEP:
+	switch (mac80211_key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		if (is_broadcast_ether_addr(addr))
 			key->key_type = KEY_WEP_DEFAULT;
 		else
@@ -734,7 +735,7 @@ static int wl1251_set_key_type(struct wl1251 *wl,
 
 		mac80211_key->hw_key_idx = mac80211_key->keyidx;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		if (is_broadcast_ether_addr(addr))
 			key->key_type = KEY_TKIP_MIC_GROUP;
 		else
@@ -742,7 +743,7 @@ static int wl1251_set_key_type(struct wl1251 *wl,
 
 		mac80211_key->hw_key_idx = mac80211_key->keyidx;
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		if (is_broadcast_ether_addr(addr))
 			key->key_type = KEY_AES_GROUP;
 		else
@@ -750,7 +751,7 @@ static int wl1251_set_key_type(struct wl1251 *wl,
 		mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
 		break;
 	default:
-		wl1251_error("Unknown key algo 0x%x", mac80211_key->alg);
+		wl1251_error("Unknown key cipher 0x%x", mac80211_key->cipher);
 		return -EOPNOTSUPP;
 	}
 
@@ -783,7 +784,7 @@ static int wl1251_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	wl1251_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd);
 	wl1251_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN);
 	wl1251_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x",
-		     key->alg, key->keyidx, key->keylen, key->flags);
+		     key->cipher, key->keyidx, key->keylen, key->flags);
 	wl1251_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen);
 
 	if (is_zero_ether_addr(addr)) {
diff --git a/drivers/net/wireless/wl12xx/wl1251_tx.c b/drivers/net/wireless/wl12xx/wl1251_tx.c
index a38ec199187a..6634b3e27cfc 100644
--- a/drivers/net/wireless/wl12xx/wl1251_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1251_tx.c
@@ -189,7 +189,7 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb,
 	tx_hdr = (struct tx_double_buffer_desc *) skb->data;
 
 	if (control->control.hw_key &&
-	    control->control.hw_key->alg == ALG_TKIP) {
+	    control->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 		int hdrlen;
 		__le16 fc;
 		u16 length;
@@ -399,7 +399,7 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl,
 	 */
 	frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc));
 	if (info->control.hw_key &&
-	    info->control.hw_key->alg == ALG_TKIP) {
+	    info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 		hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 		memmove(frame + WL1251_TKIP_IV_SPACE, frame, hdrlen);
 		skb_pull(skb, WL1251_TKIP_IV_SPACE);
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 9d68f0012f05..30194c0f36a9 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -1439,7 +1439,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	wl1271_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd);
 	wl1271_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN);
 	wl1271_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x",
-		     key_conf->alg, key_conf->keyidx,
+		     key_conf->cipher, key_conf->keyidx,
 		     key_conf->keylen, key_conf->flags);
 	wl1271_dump(DEBUG_CRYPT, "KEY: ", key_conf->key, key_conf->keylen);
 
@@ -1455,20 +1455,21 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	if (ret < 0)
 		goto out_unlock;
 
-	switch (key_conf->alg) {
-	case ALG_WEP:
+	switch (key_conf->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		key_type = KEY_WEP;
 
 		key_conf->hw_key_idx = key_conf->keyidx;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		key_type = KEY_TKIP;
 
 		key_conf->hw_key_idx = key_conf->keyidx;
 		tx_seq_32 = WL1271_TX_SECURITY_HI32(wl->tx_security_seq);
 		tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq);
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		key_type = KEY_AES;
 
 		key_conf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
@@ -1476,7 +1477,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq);
 		break;
 	default:
-		wl1271_error("Unknown key algo 0x%x", key_conf->alg);
+		wl1271_error("Unknown key algo 0x%x", key_conf->cipher);
 
 		ret = -EOPNOTSUPP;
 		goto out_sleep;
diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c
index c592cc2e9fe8..dc0b46c93c4b 100644
--- a/drivers/net/wireless/wl12xx/wl1271_tx.c
+++ b/drivers/net/wireless/wl12xx/wl1271_tx.c
@@ -193,7 +193,7 @@ static int wl1271_tx_frame(struct wl1271 *wl, struct sk_buff *skb)
 	info = IEEE80211_SKB_CB(skb);
 
 	if (info->control.hw_key &&
-	    info->control.hw_key->alg == ALG_TKIP)
+	    info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP)
 		extra = WL1271_TKIP_IV_SPACE;
 
 	if (info->control.hw_key) {
@@ -347,7 +347,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl,
 
 	/* remove TKIP header space if present */
 	if (info->control.hw_key &&
-	    info->control.hw_key->alg == ALG_TKIP) {
+	    info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) {
 		int hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 		memmove(skb->data + WL1271_TKIP_IV_SPACE, skb->data, hdrlen);
 		skb_pull(skb, WL1271_TKIP_IV_SPACE);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 871ed1de736a..914c747ac3a0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
 	return false;
 }
 
-/**
- * enum ieee80211_key_alg - key algorithm
- * @ALG_WEP: WEP40 or WEP104
- * @ALG_TKIP: TKIP
- * @ALG_CCMP: CCMP (AES)
- * @ALG_AES_CMAC: AES-128-CMAC
- */
-enum ieee80211_key_alg {
-	ALG_WEP,
-	ALG_TKIP,
-	ALG_CCMP,
-	ALG_AES_CMAC,
-};
-
 /**
  * enum ieee80211_key_flags - key flags
  *
@@ -839,7 +825,7 @@ enum ieee80211_key_flags {
  * @hw_key_idx: To be set by the driver, this is the key index the driver
  *	wants to be given when a frame is transmitted and needs to be
  *	encrypted in hardware.
- * @alg: The key algorithm.
+ * @cipher: The key's cipher suite selector.
  * @flags: key flags, see &enum ieee80211_key_flags.
  * @keyidx: the key index (0-3)
  * @keylen: key material length
@@ -852,7 +838,7 @@ enum ieee80211_key_flags {
  * @iv_len: The IV length for this key type
  */
 struct ieee80211_key_conf {
-	enum ieee80211_key_alg alg;
+	u32 cipher;
 	u8 icv_len;
 	u8 iv_len;
 	u8 hw_key_idx;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 19c6146010b7..9a35d9e7efd7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -116,7 +116,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta = NULL;
-	enum ieee80211_key_alg alg;
 	struct ieee80211_key *key;
 	int err;
 
@@ -125,31 +124,20 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	/* reject WEP and TKIP keys if WEP failed to initialize */
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-	case WLAN_CIPHER_SUITE_WEP104:
-		alg = ALG_WEP;
-		break;
 	case WLAN_CIPHER_SUITE_TKIP:
-		alg = ALG_TKIP;
-		break;
-	case WLAN_CIPHER_SUITE_CCMP:
-		alg = ALG_CCMP;
-		break;
-	case WLAN_CIPHER_SUITE_AES_CMAC:
-		alg = ALG_AES_CMAC;
+	case WLAN_CIPHER_SUITE_WEP104:
+		if (IS_ERR(sdata->local->wep_tx_tfm))
+			return -EINVAL;
 		break;
 	default:
-		return -EINVAL;
+		break;
 	}
 
-	/* reject WEP and TKIP keys if WEP failed to initialize */
-	if ((alg == ALG_WEP || alg == ALG_TKIP) &&
-	    IS_ERR(sdata->local->wep_tx_tfm))
-		return -EINVAL;
-
-	key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
-				  params->seq_len, params->seq);
+	key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
+				  params->key, params->seq_len, params->seq);
 	if (IS_ERR(key))
 		return PTR_ERR(key);
 
@@ -247,10 +235,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 
 	memset(&params, 0, sizeof(params));
 
-	switch (key->conf.alg) {
-	case ALG_TKIP:
-		params.cipher = WLAN_CIPHER_SUITE_TKIP;
+	params.cipher = key->conf.cipher;
 
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_TKIP:
 		iv32 = key->u.tkip.tx.iv32;
 		iv16 = key->u.tkip.tx.iv16;
 
@@ -268,8 +256,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
-	case ALG_CCMP:
-		params.cipher = WLAN_CIPHER_SUITE_CCMP;
+	case WLAN_CIPHER_SUITE_CCMP:
 		seq[0] = key->u.ccmp.tx_pn[5];
 		seq[1] = key->u.ccmp.tx_pn[4];
 		seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +266,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
-	case ALG_WEP:
-		if (key->conf.keylen == 5)
-			params.cipher = WLAN_CIPHER_SUITE_WEP40;
-		else
-			params.cipher = WLAN_CIPHER_SUITE_WEP104;
-		break;
-	case ALG_AES_CMAC:
-		params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		seq[0] = key->u.aes_cmac.tx_pn[5];
 		seq[1] = key->u.aes_cmac.tx_pn[4];
 		seq[2] = key->u.aes_cmac.tx_pn[3];
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index fa5e76e658ef..1647f8dc5cda 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file,
 				  char __user *userbuf,
 				  size_t count, loff_t *ppos)
 {
-	char *alg;
+	char buf[15];
 	struct ieee80211_key *key = file->private_data;
+	u32 c = key->conf.cipher;
 
-	switch (key->conf.alg) {
-	case ALG_WEP:
-		alg = "WEP\n";
-		break;
-	case ALG_TKIP:
-		alg = "TKIP\n";
-		break;
-	case ALG_CCMP:
-		alg = "CCMP\n";
-		break;
-	case ALG_AES_CMAC:
-		alg = "AES-128-CMAC\n";
-		break;
-	default:
-		return 0;
-	}
-	return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
+	sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
+		c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
+	return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
 }
 KEY_OPS(algorithm);
 
@@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 	int len;
 	struct ieee80211_key *key = file->private_data;
 
-	switch (key->conf.alg) {
-	case ALG_WEP:
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		len = scnprintf(buf, sizeof(buf), "\n");
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
 				key->u.tkip.tx.iv32,
 				key->u.tkip.tx.iv16);
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		tpn = key->u.ccmp.tx_pn;
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
 		break;
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		tpn = key->u.aes_cmac.tx_pn;
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 	int i, len;
 	const u8 *rpn;
 
-	switch (key->conf.alg) {
-	case ALG_WEP:
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		len = scnprintf(buf, sizeof(buf), "\n");
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
 			p += scnprintf(p, sizeof(buf)+buf-p,
 				       "%08x %04x\n",
@@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 				       key->u.tkip.rx[i].iv16);
 		len = p - buf;
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
 			rpn = key->u.ccmp.rx_pn[i];
 			p += scnprintf(p, sizeof(buf)+buf-p,
@@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		}
 		len = p - buf;
 		break;
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		rpn = key->u.aes_cmac.rx_pn;
 		p += scnprintf(p, sizeof(buf)+buf-p,
 			       "%02x%02x%02x%02x%02x%02x\n",
@@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 	char buf[20];
 	int len;
 
-	switch (key->conf.alg) {
-	case ALG_CCMP:
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
 		break;
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
@@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
 	char buf[20];
 	int len;
 
-	switch (key->conf.alg) {
-	case ALG_AES_CMAC:
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.icverrors);
 		break;
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a974668..b5a95582d816 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -336,7 +336,7 @@ TRACE_EVENT(drv_set_key,
 		LOCAL_ENTRY
 		VIF_ENTRY
 		STA_ENTRY
-		__field(enum ieee80211_key_alg, alg)
+		__field(u32, cipher)
 		__field(u8, hw_key_idx)
 		__field(u8, flags)
 		__field(s8, keyidx)
@@ -346,7 +346,7 @@ TRACE_EVENT(drv_set_key,
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		STA_ASSIGN;
-		__entry->alg = key->alg;
+		__entry->cipher = key->cipher;
 		__entry->flags = key->flags;
 		__entry->keyidx = key->keyidx;
 		__entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index d6dbc8ea4ead..3203d1d3cd38 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -227,9 +227,7 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
-struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
-					  int idx,
-					  size_t key_len,
+struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 					  const u8 *key_data,
 					  size_t seq_len, const u8 *seq)
 {
@@ -249,15 +247,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	key->conf.flags = 0;
 	key->flags = 0;
 
-	key->conf.alg = alg;
+	key->conf.cipher = cipher;
 	key->conf.keyidx = idx;
 	key->conf.keylen = key_len;
-	switch (alg) {
-	case ALG_WEP:
+	switch (cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		key->conf.iv_len = WEP_IV_LEN;
 		key->conf.icv_len = WEP_ICV_LEN;
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		key->conf.iv_len = TKIP_IV_LEN;
 		key->conf.icv_len = TKIP_ICV_LEN;
 		if (seq) {
@@ -269,7 +268,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 			}
 		}
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		key->conf.iv_len = CCMP_HDR_LEN;
 		key->conf.icv_len = CCMP_MIC_LEN;
 		if (seq) {
@@ -279,7 +278,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 						seq[CCMP_PN_LEN - j - 1];
 		}
 		break;
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		key->conf.iv_len = 0;
 		key->conf.icv_len = sizeof(struct ieee80211_mmie);
 		if (seq)
@@ -290,7 +289,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	memcpy(key->conf.key, key_data, key_len);
 	INIT_LIST_HEAD(&key->list);
 
-	if (alg == ALG_CCMP) {
+	if (cipher == WLAN_CIPHER_SUITE_CCMP) {
 		/*
 		 * Initialize AES key state here as an optimization so that
 		 * it does not need to be initialized for every packet.
@@ -303,7 +302,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 		}
 	}
 
-	if (alg == ALG_AES_CMAC) {
+	if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
 		/*
 		 * Initialize AES key state here as an optimization so that
 		 * it does not need to be initialized for every packet.
@@ -328,9 +327,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
 	if (key->local)
 		ieee80211_key_disable_hw_accel(key);
 
-	if (key->conf.alg == ALG_CCMP)
+	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
-	if (key->conf.alg == ALG_AES_CMAC)
+	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 	if (key->local)
 		ieee80211_debugfs_key_remove(key);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a471..53b5ce12536f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -123,9 +123,7 @@ struct ieee80211_key {
 	struct ieee80211_key_conf conf;
 };
 
-struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
-					  int idx,
-					  size_t key_len,
+struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 					  const u8 *key_data,
 					  size_t seq_len, const u8 *seq);
 /*
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f24a0a1cff1a..ad2427021b26 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -961,7 +961,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		 * pairwise or station-to-station keys, but for WEP we allow
 		 * using a key index as well.
 		 */
-		if (rx->key && rx->key->conf.alg != ALG_WEP &&
+		if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+		    rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
 		    !is_multicast_ether_addr(hdr->addr1))
 			rx->key = NULL;
 	}
@@ -977,8 +978,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_UNUSABLE;
 	/* the hdr variable is invalid now! */
 
-	switch (rx->key->conf.alg) {
-	case ALG_WEP:
+	switch (rx->key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		/* Check for weak IVs if possible */
 		if (rx->sta && ieee80211_is_data(fc) &&
 		    (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -988,13 +990,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 
 		result = ieee80211_crypto_wep_decrypt(rx);
 		break;
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		result = ieee80211_crypto_tkip_decrypt(rx);
 		break;
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		result = ieee80211_crypto_ccmp_decrypt(rx);
 		break;
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
 		break;
 	}
@@ -1291,7 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		/* This is the first fragment of a new frame. */
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->queue, &(rx->skb));
-		if (rx->key && rx->key->conf.alg == ALG_CCMP &&
+		if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
 		    ieee80211_has_protected(fc)) {
 			int queue = ieee80211_is_mgmt(fc) ?
 				NUM_RX_DATA_QUEUES : rx->queue;
@@ -1320,7 +1322,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		int i;
 		u8 pn[CCMP_PN_LEN], *rpn;
 		int queue;
-		if (!rx->key || rx->key->conf.alg != ALG_CCMP)
+		if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, CCMP_PN_LEN);
 		for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926b..bc4fefc91663 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -543,15 +543,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		tx->key->tx_rx_count++;
 		/* TODO: add threshold stuff again */
 
-		switch (tx->key->conf.alg) {
-		case ALG_WEP:
+		switch (tx->key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
 			if (ieee80211_is_auth(hdr->frame_control))
 				break;
-		case ALG_TKIP:
+		case WLAN_CIPHER_SUITE_TKIP:
 			if (!ieee80211_is_data_present(hdr->frame_control))
 				tx->key = NULL;
 			break;
-		case ALG_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
 					       tx->skb))
@@ -561,7 +562,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 					   IEEE80211_KEY_FLAG_SW_MGMT) &&
 					ieee80211_is_mgmt(hdr->frame_control);
 			break;
-		case ALG_AES_CMAC:
+		case WLAN_CIPHER_SUITE_AES_CMAC:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
@@ -949,14 +950,15 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 	if (!tx->key)
 		return TX_CONTINUE;
 
-	switch (tx->key->conf.alg) {
-	case ALG_WEP:
+	switch (tx->key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
 		return ieee80211_crypto_wep_encrypt(tx);
-	case ALG_TKIP:
+	case WLAN_CIPHER_SUITE_TKIP:
 		return ieee80211_crypto_tkip_encrypt(tx);
-	case ALG_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP:
 		return ieee80211_crypto_ccmp_encrypt(tx);
-	case ALG_AES_CMAC:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
 	}
 
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5b..f27484c22b9f 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
 
 	keyidx = skb->data[hdrlen + 3] >> 6;
 
-	if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP)
+	if (!key || keyidx != key->conf.keyidx)
 		return -1;
 
 	klen = 3 + key->conf.keylen;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887e..b08ad94b56da 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	int tail;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
-	if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 ||
-	    !ieee80211_is_data_present(hdr->frame_control))
+	if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
+	    skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
 		return TX_CONTINUE;
 
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	if (status->flag & RX_FLAG_MMIC_STRIPPED)
 		return RX_CONTINUE;
 
-	if (!rx->key || rx->key->conf.alg != ALG_TKIP ||
+	if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
 	    !ieee80211_has_protected(hdr->frame_control) ||
 	    !ieee80211_is_data_present(hdr->frame_control))
 		return RX_CONTINUE;
-- 
cgit v1.2.3


From bfb564e7391340638afe4ad67744a8f3858e7566 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Wed, 4 Aug 2010 06:15:52 +0000
Subject: core: Factor out flow calculation from get_rps_cpu

Factor out flow calculation code from get_rps_cpu, since other
functions can use the same code.

Revisions:

v2 (Ben): Separate flow calcuation out and use in select queue.
v3 (Arnd): Don't re-implement MIN.
v4 (Changli): skb->data points to ethernet header in macvtap, and
	make a fast path. Tested macvtap with this patch.
v5 (Changli):
	- Cache skb->rxhash in skb_get_rxhash
	- macvtap may not have pow(2) queues, so change code for
	  queue selection.
    (Arnd):
	- Use first available queue if all fails.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |   9 +++++
 net/core/dev.c         | 106 +++++++++++++++++++++++++++++--------------------
 2 files changed, 71 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 77eb60d2b496..d8050382b189 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -558,6 +558,15 @@ extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
 				    unsigned int to, struct ts_config *config,
 				    struct ts_state *state);
 
+extern __u32 __skb_get_rxhash(struct sk_buff *skb);
+static inline __u32 skb_get_rxhash(struct sk_buff *skb)
+{
+	if (!skb->rxhash)
+		skb->rxhash = __skb_get_rxhash(skb);
+
+	return skb->rxhash;
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 1ae654391442..586a11cb4398 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2259,69 +2259,41 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
 /*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
  */
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
-		       struct rps_dev_flow **rflowp)
+__u32 __skb_get_rxhash(struct sk_buff *skb)
 {
+	int nhoff, hash = 0;
 	struct ipv6hdr *ip6;
 	struct iphdr *ip;
-	struct netdev_rx_queue *rxqueue;
-	struct rps_map *map;
-	struct rps_dev_flow_table *flow_table;
-	struct rps_sock_flow_table *sock_flow_table;
-	int cpu = -1;
 	u8 ip_proto;
-	u16 tcpu;
 	u32 addr1, addr2, ihl;
 	union {
 		u32 v32;
 		u16 v16[2];
 	} ports;
 
-	if (skb_rx_queue_recorded(skb)) {
-		u16 index = skb_get_rx_queue(skb);
-		if (unlikely(index >= dev->num_rx_queues)) {
-			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
-				"on queue %u, but number of RX queues is %u\n",
-				dev->name, index, dev->num_rx_queues);
-			goto done;
-		}
-		rxqueue = dev->_rx + index;
-	} else
-		rxqueue = dev->_rx;
-
-	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
-		goto done;
-
-	if (skb->rxhash)
-		goto got_hash; /* Skip hash computation on packet header */
+	nhoff = skb_network_offset(skb);
 
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		if (!pskb_may_pull(skb, sizeof(*ip)))
+		if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
 			goto done;
 
-		ip = (struct iphdr *) skb->data;
+		ip = (struct iphdr *) skb->data + nhoff;
 		ip_proto = ip->protocol;
 		addr1 = (__force u32) ip->saddr;
 		addr2 = (__force u32) ip->daddr;
 		ihl = ip->ihl;
 		break;
 	case __constant_htons(ETH_P_IPV6):
-		if (!pskb_may_pull(skb, sizeof(*ip6)))
+		if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
 			goto done;
 
-		ip6 = (struct ipv6hdr *) skb->data;
+		ip6 = (struct ipv6hdr *) skb->data + nhoff;
 		ip_proto = ip6->nexthdr;
 		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
 		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,6 +2302,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	default:
 		goto done;
 	}
+
 	switch (ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
@@ -2338,8 +2311,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	case IPPROTO_AH:
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
-		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
-			ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+		if (pskb_may_pull(skb, (ihl * 4) + 4 + nhoff)) {
+			ports.v32 = * (__force u32 *) (skb->data + nhoff +
+						       (ihl * 4));
 			if (ports.v16[1] < ports.v16[0])
 				swap(ports.v16[0], ports.v16[1]);
 			break;
@@ -2352,11 +2326,55 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	/* get a consistent hash (same value on both flow directions) */
 	if (addr2 < addr1)
 		swap(addr1, addr2);
-	skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
-	if (!skb->rxhash)
-		skb->rxhash = 1;
 
-got_hash:
+	hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+	if (!hash)
+		hash = 1;
+
+done:
+	return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
+{
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
+	int cpu = -1;
+	u16 tcpu;
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->num_rx_queues)) {
+			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+				"on queue %u, but number of RX queues is %u\n",
+				dev->name, index, dev->num_rx_queues);
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+		goto done;
+
+	if (!skb_get_rxhash(skb))
+		goto done;
+
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
 	if (flow_table && sock_flow_table) {
-- 
cgit v1.2.3


From 1565c7c1c4c8e931bdba66abc8aa6f141a406872 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Wed, 4 Aug 2010 06:15:59 +0000
Subject: macvtap: Implement multiqueue for macvtap driver

Implement multiqueue facility for macvtap driver. The idea is that
a macvtap device can be opened multiple times and the fd's can be
used to register eg, as backend for vhost.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c      | 99 ++++++++++++++++++++++++++++++++++++++--------
 include/linux/if_macvlan.h |  9 ++++-
 2 files changed, 90 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 3b1c54a9c6ef..42567279843e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -84,26 +84,45 @@ static const struct proto_ops macvtap_socket_ops;
 static DEFINE_SPINLOCK(macvtap_lock);
 
 /*
- * Choose the next free queue, for now there is only one
+ * get_slot: return a [unused/occupied] slot in vlan->taps[]:
+ *	- if 'q' is NULL, return the first empty slot;
+ *	- otherwise, return the slot this pointer occupies.
  */
+static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q)
+{
+	int i;
+
+	for (i = 0; i < MAX_MACVTAP_QUEUES; i++) {
+		if (rcu_dereference(vlan->taps[i]) == q)
+			return i;
+	}
+
+	/* Should never happen */
+	BUG_ON(1);
+}
+
 static int macvtap_set_queue(struct net_device *dev, struct file *file,
 				struct macvtap_queue *q)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	int index;
 	int err = -EBUSY;
 
 	spin_lock(&macvtap_lock);
-	if (rcu_dereference(vlan->tap))
+	if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
 		goto out;
 
 	err = 0;
+	index = get_slot(vlan, NULL);
 	rcu_assign_pointer(q->vlan, vlan);
-	rcu_assign_pointer(vlan->tap, q);
+	rcu_assign_pointer(vlan->taps[index], q);
 	sock_hold(&q->sk);
 
 	q->file = file;
 	file->private_data = q;
 
+	vlan->numvtaps++;
+
 out:
 	spin_unlock(&macvtap_lock);
 	return err;
@@ -124,9 +143,12 @@ static void macvtap_put_queue(struct macvtap_queue *q)
 	spin_lock(&macvtap_lock);
 	vlan = rcu_dereference(q->vlan);
 	if (vlan) {
-		rcu_assign_pointer(vlan->tap, NULL);
+		int index = get_slot(vlan, q);
+
+		rcu_assign_pointer(vlan->taps[index], NULL);
 		rcu_assign_pointer(q->vlan, NULL);
 		sock_put(&q->sk);
+		--vlan->numvtaps;
 	}
 
 	spin_unlock(&macvtap_lock);
@@ -136,39 +158,82 @@ static void macvtap_put_queue(struct macvtap_queue *q)
 }
 
 /*
- * Since we only support one queue, just dereference the pointer.
+ * Select a queue based on the rxq of the device on which this packet
+ * arrived. If the incoming device is not mq, calculate a flow hash
+ * to select a queue. If all fails, find the first available queue.
+ * Cache vlan->numvtaps since it can become zero during the execution
+ * of this function.
  */
 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
 					       struct sk_buff *skb)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvtap_queue *tap = NULL;
+	int numvtaps = vlan->numvtaps;
+	__u32 rxq;
+
+	if (!numvtaps)
+		goto out;
+
+	if (likely(skb_rx_queue_recorded(skb))) {
+		rxq = skb_get_rx_queue(skb);
+
+		while (unlikely(rxq >= numvtaps))
+			rxq -= numvtaps;
+
+		tap = rcu_dereference(vlan->taps[rxq]);
+		if (tap)
+			goto out;
+	}
+
+	/* Check if we can use flow to select a queue */
+	rxq = skb_get_rxhash(skb);
+	if (rxq) {
+		tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
+		if (tap)
+			goto out;
+	}
 
-	return rcu_dereference(vlan->tap);
+	/* Everything failed - find first available queue */
+	for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) {
+		tap = rcu_dereference(vlan->taps[rxq]);
+		if (tap)
+			break;
+	}
+
+out:
+	return tap;
 }
 
 /*
  * The net_device is going away, give up the reference
- * that it holds on the queue (all the queues one day)
- * and safely set the pointer from the queues to NULL.
+ * that it holds on all queues and safely set the pointer
+ * from the queues to NULL.
  */
 static void macvtap_del_queues(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
-	struct macvtap_queue *q;
+	struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES];
+	int i, j = 0;
 
+	/* macvtap_put_queue can free some slots, so go through all slots */
 	spin_lock(&macvtap_lock);
-	q = rcu_dereference(vlan->tap);
-	if (!q) {
-		spin_unlock(&macvtap_lock);
-		return;
+	for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) {
+		q = rcu_dereference(vlan->taps[i]);
+		if (q) {
+			qlist[j++] = q;
+			rcu_assign_pointer(vlan->taps[i], NULL);
+			rcu_assign_pointer(q->vlan, NULL);
+			vlan->numvtaps--;
+		}
 	}
-
-	rcu_assign_pointer(vlan->tap, NULL);
-	rcu_assign_pointer(q->vlan, NULL);
+	BUG_ON(vlan->numvtaps != 0);
 	spin_unlock(&macvtap_lock);
 
 	synchronize_rcu();
-	sock_put(&q->sk);
+
+	for (--j; j >= 0; j--)
+		sock_put(&qlist[j]->sk);
 }
 
 /*
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 35280b302290..8a2fd66a8b5f 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -40,6 +40,12 @@ struct macvlan_rx_stats {
 	unsigned long		rx_errors;
 };
 
+/*
+ * Maximum times a macvtap device can be opened. This can be used to
+ * configure the number of receive queue, e.g. for multiqueue virtio.
+ */
+#define MAX_MACVTAP_QUEUES	(NR_CPUS < 16 ? NR_CPUS : 16)
+
 struct macvlan_dev {
 	struct net_device	*dev;
 	struct list_head	list;
@@ -50,7 +56,8 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
-	struct macvtap_queue	*tap;
+	struct macvtap_queue	*taps[MAX_MACVTAP_QUEUES];
+	int			numvtaps;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit v1.2.3


From 1b2f1489633888d4a06028315dc19d65768a1c05 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 14 Aug 2010 20:20:34 +1000
Subject: drm: block userspace under allocating buffer and having drivers
 overwrite it (v2)

With the current screwed but its ABI, ioctls for the drm, Linus pointed out that we could allow userspace to specify the allocation size, but we pass it to the driver which then uses it blindly to store a struct. Now if userspace specifies the allocation size as smaller than the driver needs, the driver can possibly overwrite memory.

This patch restructures the driver ioctls so we store the structure size we are expecting, and make sure we allocate at least that size. The copy from/to userspace are still restricted to the size the user specifies, this allows ioctl structs to grow on both sides of the equation.

Up until now we didn't really use the DRM_IOCTL defines in the kernel, so this cleans them up and adds them for nouveau.

v2:
fix nouveau pushbuf arg (thanks to Ben for pointing it out)

Reported-by: Linus Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c                 | 23 ++++++---
 drivers/gpu/drm/i810/i810_dma.c           | 30 ++++++------
 drivers/gpu/drm/i830/i830_dma.c           | 28 +++++------
 drivers/gpu/drm/i915/i915_dma.c           | 80 +++++++++++++++----------------
 drivers/gpu/drm/mga/mga_state.c           | 26 +++++-----
 drivers/gpu/drm/nouveau/nouveau_channel.c | 24 +++++-----
 drivers/gpu/drm/r128/r128_state.c         | 35 +++++++-------
 drivers/gpu/drm/radeon/radeon_kms.c       | 78 +++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon_state.c     | 56 +++++++++++-----------
 drivers/gpu/drm/savage/savage_bci.c       |  8 ++--
 drivers/gpu/drm/sis/sis_mm.c              | 12 ++---
 drivers/gpu/drm/via/via_dma.c             | 28 +++++------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c       | 34 ++++++-------
 include/drm/drmP.h                        |  6 ++-
 include/drm/i830_drm.h                    | 28 +++++------
 include/drm/i915_drm.h                    |  1 +
 include/drm/mga_drm.h                     |  2 +-
 include/drm/nouveau_drm.h                 | 13 +++++
 include/drm/radeon_drm.h                  |  4 +-
 include/drm/savage_drm.h                  |  8 ++--
 20 files changed, 275 insertions(+), 249 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3644c94c0a17..84da748555bc 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -55,6 +55,9 @@
 static int drm_version(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 
+#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
+	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0}
+
 /** Ioctl table */
 static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, 0),
@@ -421,6 +424,7 @@ long drm_ioctl(struct file *filp,
 	int retcode = -EINVAL;
 	char stack_kdata[128];
 	char *kdata = NULL;
+	unsigned int usize, asize;
 
 	dev = file_priv->minor->dev;
 	atomic_inc(&dev->ioctl_count);
@@ -436,11 +440,18 @@ long drm_ioctl(struct file *filp,
 	    ((nr < DRM_COMMAND_BASE) || (nr >= DRM_COMMAND_END)))
 		goto err_i1;
 	if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END) &&
-	    (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls))
+	    (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) {
+		u32 drv_size;
 		ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE];
+		drv_size = _IOC_SIZE(ioctl->cmd_drv);
+		usize = asize = _IOC_SIZE(cmd);
+		if (drv_size > asize)
+			asize = drv_size;
+	}
 	else if ((nr >= DRM_COMMAND_END) || (nr < DRM_COMMAND_BASE)) {
 		ioctl = &drm_ioctls[nr];
 		cmd = ioctl->cmd;
+		usize = asize = _IOC_SIZE(cmd);
 	} else
 		goto err_i1;
 
@@ -460,10 +471,10 @@ long drm_ioctl(struct file *filp,
 		retcode = -EACCES;
 	} else {
 		if (cmd & (IOC_IN | IOC_OUT)) {
-			if (_IOC_SIZE(cmd) <= sizeof(stack_kdata)) {
+			if (asize <= sizeof(stack_kdata)) {
 				kdata = stack_kdata;
 			} else {
-				kdata = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL);
+				kdata = kmalloc(asize, GFP_KERNEL);
 				if (!kdata) {
 					retcode = -ENOMEM;
 					goto err_i1;
@@ -473,12 +484,12 @@ long drm_ioctl(struct file *filp,
 
 		if (cmd & IOC_IN) {
 			if (copy_from_user(kdata, (void __user *)arg,
-					   _IOC_SIZE(cmd)) != 0) {
+					   usize) != 0) {
 				retcode = -EFAULT;
 				goto err_i1;
 			}
 		} else
-			memset(kdata, 0, _IOC_SIZE(cmd));
+			memset(kdata, 0, usize);
 
 		if (ioctl->flags & DRM_UNLOCKED)
 			retcode = func(dev, kdata, file_priv);
@@ -490,7 +501,7 @@ long drm_ioctl(struct file *filp,
 
 		if (cmd & IOC_OUT) {
 			if (copy_to_user((void __user *)arg, kdata,
-					 _IOC_SIZE(cmd)) != 0)
+					 usize) != 0)
 				retcode = -EFAULT;
 		}
 	}
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index 0e6c131313d9..61b4caf220fa 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -1255,21 +1255,21 @@ long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 
 struct drm_ioctl_desc i810_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls);
diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c
index 5168862c9227..671aa18415ac 100644
--- a/drivers/gpu/drm/i830/i830_dma.c
+++ b/drivers/gpu/drm/i830/i830_dma.c
@@ -1524,20 +1524,20 @@ long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 
 struct drm_ioctl_desc i830_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f19ffe87af3c..a2d3509c393b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2360,46 +2360,46 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc i915_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_FLUSH, i915_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_FLIP, i915_flip_bufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_GETPARAM, i915_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_ALLOC, i915_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_FREE, i915_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_DESTROY_HEAP,  i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ),
-	DRM_IOCTL_DEF(DRM_I915_SET_VBLANK_PIPE,  i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ),
-	DRM_IOCTL_DEF(DRM_I915_GET_VBLANK_PIPE,  i915_vblank_pipe_get, DRM_AUTH ),
-	DRM_IOCTL_DEF(DRM_I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_FLUSH, i915_flush_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_FLIP, i915_flip_bufs, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_ALLOC, i915_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_FREE, i915_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP,  i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE,  i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE,  i915_vblank_pipe_get, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index fff82045c427..9ce2827f8c00 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -1085,19 +1085,19 @@ file_priv)
 }
 
 struct drm_ioctl_desc mga_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_MGA_FLUSH, mga_dma_flush, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_RESET, mga_dma_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_SWAP, mga_dma_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_CLEAR, mga_dma_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_VERTEX, mga_dma_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_INDICES, mga_dma_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_ILOAD, mga_dma_iload, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_BLIT, mga_dma_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_GETPARAM, mga_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_SET_FENCE, mga_set_fence, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(MGA_FLUSH, mga_dma_flush, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_RESET, mga_dma_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_SWAP, mga_dma_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_CLEAR, mga_dma_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_VERTEX, mga_dma_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_INDICES, mga_dma_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_ILOAD, mga_dma_iload, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_BLIT, mga_dma_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_GETPARAM, mga_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_SET_FENCE, mga_set_fence, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 };
 
 int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 90fdcda332be..0480f064f2c1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -426,18 +426,18 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
  ***********************************/
 
 struct drm_ioctl_desc nouveau_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index 077af1f2f9b4..a9e33ce65918 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -1639,30 +1639,29 @@ void r128_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
 			r128_do_cleanup_pageflip(dev);
 	}
 }
-
 void r128_driver_lastclose(struct drm_device *dev)
 {
 	r128_do_cleanup_cce(dev);
 }
 
 struct drm_ioctl_desc r128_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_CCE_IDLE, r128_cce_idle, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_RESET, r128_engine_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_FULLSCREEN, r128_fullscreen, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_SWAP, r128_cce_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_FLIP, r128_cce_flip, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_CLEAR, r128_cce_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_VERTEX, r128_cce_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_INDICES, r128_cce_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_BLIT, r128_cce_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_DEPTH, r128_cce_depth, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_STIPPLE, r128_cce_stipple, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_R128_GETPARAM, r128_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_CCE_IDLE, r128_cce_idle, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_RESET, r128_engine_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_FULLSCREEN, r128_fullscreen, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_SWAP, r128_cce_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_FLIP, r128_cce_flip, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_CLEAR, r128_cce_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_VERTEX, r128_cce_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INDICES, r128_cce_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_BLIT, r128_cce_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_DEPTH, r128_cce_depth, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_STIPPLE, r128_cce_stipple, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH),
 };
 
 int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index b1c8ace5f080..27435db0aa48 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -323,45 +323,45 @@ KMS_INVALID_IOCTL(radeon_surface_free_kms)
 
 
 struct drm_ioctl_desc radeon_ioctls_kms[] = {
-	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
 	/* KMS */
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index b3ba44c0a818..4ae5a3d1074e 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -3228,34 +3228,34 @@ void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc radeon_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
+	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index f576232846c3..6756c97899f1 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -1082,10 +1082,10 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 struct drm_ioctl_desc savage_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
 };
 
 int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 07d0f2979cac..7fe2b63412ce 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -320,12 +320,12 @@ void sis_reclaim_buffers_locked(struct drm_device *dev,
 }
 
 struct drm_ioctl_desc sis_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_FB_FREE, sis_drm_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_AGP_FREE, sis_drm_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_FB_FREE, sis_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_AGP_FREE, sis_drm_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 };
 
 int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls);
diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index 68dda74a50ae..cc0ffa9abd00 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c
@@ -722,20 +722,20 @@ static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file *
 }
 
 struct drm_ioctl_desc via_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
+	DRM_IOCTL_DEF_DRV(VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_FREEMEM, via_mem_free, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_DMA_INIT, via_dma_init, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
 };
 
 int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9dd395b90216..72ec2e2b6e97 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -99,47 +99,47 @@
  */
 
 #define VMW_IOCTL_DEF(ioctl, func, flags) \
-	[DRM_IOCTL_NR(ioctl) - DRM_COMMAND_BASE] = {ioctl, flags, func}
+  [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_##ioctl, flags, func, DRM_IOCTL_##ioctl}
 
 /**
  * Ioctl definitions.
  */
 
 static struct drm_ioctl_desc vmw_ioctls[] = {
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_GET_PARAM, vmw_getparam_ioctl,
+	VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
+	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CURSOR_BYPASS,
+	VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
 		      vmw_kms_cursor_bypass_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
 
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CONTROL_STREAM, vmw_overlay_ioctl,
+	VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
+	VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED),
 
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
+	VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
+	VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_REF_SURFACE, vmw_surface_reference_ioctl,
+	VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_EXECBUF, vmw_execbuf_ioctl,
+	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
+	VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
 		      DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(DRM_IOCTL_VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
+	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
 		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED)
 };
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 2a512bc0d4ab..7809d230adee 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -305,14 +305,16 @@ struct drm_ioctl_desc {
 	unsigned int cmd;
 	int flags;
 	drm_ioctl_t *func;
+	unsigned int cmd_drv;
 };
 
 /**
  * Creates a driver or general drm_ioctl_desc array entry for the given
  * ioctl, for use by drm_ioctl().
  */
-#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
-	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags}
+
+#define DRM_IOCTL_DEF_DRV(ioctl, _func, _flags)			\
+	[DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl}
 
 struct drm_magic_entry {
 	struct list_head head;
diff --git a/include/drm/i830_drm.h b/include/drm/i830_drm.h
index 4b00d2dd4f68..61315c29b8f3 100644
--- a/include/drm/i830_drm.h
+++ b/include/drm/i830_drm.h
@@ -264,20 +264,20 @@ typedef struct _drm_i830_sarea {
 #define DRM_I830_GETPARAM	0x0c
 #define DRM_I830_SETPARAM	0x0d
 
-#define DRM_IOCTL_I830_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_INIT, drm_i830_init_t)
-#define DRM_IOCTL_I830_VERTEX		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_VERTEX, drm_i830_vertex_t)
-#define DRM_IOCTL_I830_CLEAR		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_CLEAR, drm_i830_clear_t)
-#define DRM_IOCTL_I830_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLUSH)
-#define DRM_IOCTL_I830_GETAGE		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_GETAGE)
-#define DRM_IOCTL_I830_GETBUF		DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETBUF, drm_i830_dma_t)
-#define DRM_IOCTL_I830_SWAP		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_SWAP)
-#define DRM_IOCTL_I830_COPY		DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_COPY, drm_i830_copy_t)
-#define DRM_IOCTL_I830_DOCOPY		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_DOCOPY)
-#define DRM_IOCTL_I830_FLIP		DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLIP)
-#define DRM_IOCTL_I830_IRQ_EMIT         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_EMIT, drm_i830_irq_emit_t)
-#define DRM_IOCTL_I830_IRQ_WAIT         DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_WAIT, drm_i830_irq_wait_t)
-#define DRM_IOCTL_I830_GETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETPARAM, drm_i830_getparam_t)
-#define DRM_IOCTL_I830_SETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_SETPARAM, drm_i830_setparam_t)
+#define DRM_IOCTL_I830_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_INIT, drm_i830_init_t)
+#define DRM_IOCTL_I830_VERTEX		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_VERTEX, drm_i830_vertex_t)
+#define DRM_IOCTL_I830_CLEAR		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_CLEAR, drm_i830_clear_t)
+#define DRM_IOCTL_I830_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLUSH)
+#define DRM_IOCTL_I830_GETAGE		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_GETAGE)
+#define DRM_IOCTL_I830_GETBUF		DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETBUF, drm_i830_dma_t)
+#define DRM_IOCTL_I830_SWAP		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_SWAP)
+#define DRM_IOCTL_I830_COPY		DRM_IOW( DRM_COMMAND_BASE + DRM_I830_COPY, drm_i830_copy_t)
+#define DRM_IOCTL_I830_DOCOPY		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_DOCOPY)
+#define DRM_IOCTL_I830_FLIP		DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLIP)
+#define DRM_IOCTL_I830_IRQ_EMIT         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_IRQ_EMIT, drm_i830_irq_emit_t)
+#define DRM_IOCTL_I830_IRQ_WAIT         DRM_IOW( DRM_COMMAND_BASE + DRM_I830_IRQ_WAIT, drm_i830_irq_wait_t)
+#define DRM_IOCTL_I830_GETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETPARAM, drm_i830_getparam_t)
+#define DRM_IOCTL_I830_SETPARAM         DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_SETPARAM, drm_i830_setparam_t)
 
 typedef struct _drm_i830_clear {
 	int clear_color;
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 7f0028e1010b..000357bf3ac8 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_SET_VBLANK_PIPE	DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_GET_VBLANK_PIPE	DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
+#define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h
index 3ffbc4798afa..c16097f99be0 100644
--- a/include/drm/mga_drm.h
+++ b/include/drm/mga_drm.h
@@ -248,7 +248,7 @@ typedef struct _drm_mga_sarea {
 #define DRM_MGA_DMA_BOOTSTRAP  0x0c
 
 #define DRM_IOCTL_MGA_INIT     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t)
-#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t)
+#define DRM_IOCTL_MGA_FLUSH    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock)
 #define DRM_IOCTL_MGA_RESET    DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_RESET)
 #define DRM_IOCTL_MGA_SWAP     DRM_IO(  DRM_COMMAND_BASE + DRM_MGA_SWAP)
 #define DRM_IOCTL_MGA_CLEAR    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t)
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index fe917dee723a..01a714119506 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -197,4 +197,17 @@ struct drm_nouveau_sarea {
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
 
+#define DRM_IOCTL_NOUVEAU_GETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam)
+#define DRM_IOCTL_NOUVEAU_SETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc)
+#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free)
+#define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc)
+#define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC  DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc)
+#define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free)
+#define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
+#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
+#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
+#define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
+#define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
+
 #endif /* __NOUVEAU_DRM_H__ */
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 0acaf8f91437..10f8b53bdd40 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -547,8 +547,8 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
 #define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
-#define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
-#define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
+#define DRM_IOCTL_RADEON_GEM_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
+#define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 
 typedef struct drm_radeon_init {
diff --git a/include/drm/savage_drm.h b/include/drm/savage_drm.h
index 8a576ef01821..4863cf6bf96f 100644
--- a/include/drm/savage_drm.h
+++ b/include/drm/savage_drm.h
@@ -63,10 +63,10 @@ typedef struct _drm_savage_sarea {
 #define DRM_SAVAGE_BCI_EVENT_EMIT	0x02
 #define DRM_SAVAGE_BCI_EVENT_WAIT	0x03
 
-#define DRM_IOCTL_SAVAGE_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
-#define DRM_IOCTL_SAVAGE_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
-#define DRM_IOCTL_SAVAGE_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
-#define DRM_IOCTL_SAVAGE_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
+#define DRM_IOCTL_SAVAGE_BCI_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
+#define DRM_IOCTL_SAVAGE_BCI_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
+#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
 
 #define SAVAGE_DMA_PCI	1
 #define SAVAGE_DMA_AGP	3
-- 
cgit v1.2.3


From f1b499f029c5dde85d46a8811353c62f29157541 Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Thu, 5 Aug 2010 17:10:53 +0200
Subject: lockdep: Remove __debug_show_held_locks

There is no longer any functional difference between
__debug_show_held_locks() and debug_show_held_locks(),
so remove the former.

Signed-off-by: John Kacur <jkacur@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281021054-4228-1-git-send-email-jkacur@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/debug_locks.h | 5 -----
 kernel/hung_task.c          | 2 +-
 kernel/lockdep.c            | 8 +-------
 3 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 29b3ce3f2a1d..2833452ea01c 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -49,7 +49,6 @@ struct task_struct;
 
 #ifdef CONFIG_LOCKDEP
 extern void debug_show_all_locks(void);
-extern void __debug_show_held_locks(struct task_struct *task);
 extern void debug_show_held_locks(struct task_struct *task);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
 extern void debug_check_no_locks_held(struct task_struct *task);
@@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void)
 {
 }
 
-static inline void __debug_show_held_locks(struct task_struct *task)
-{
-}
-
 static inline void debug_show_held_locks(struct task_struct *task)
 {
 }
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 0c642d51aac2..bca942379559 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 			" disables this message.\n");
 	sched_show_task(t);
-	__debug_show_held_locks(t);
+	debug_show_held_locks(t);
 
 	touch_nmi_watchdog();
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index f2852a510232..84baa71cfda5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3775,7 +3775,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks);
  * Careful: only use this function if you are sure that
  * the task cannot run in parallel!
  */
-void __debug_show_held_locks(struct task_struct *task)
+void debug_show_held_locks(struct task_struct *task)
 {
 	if (unlikely(!debug_locks)) {
 		printk("INFO: lockdep is turned off.\n");
@@ -3783,12 +3783,6 @@ void __debug_show_held_locks(struct task_struct *task)
 	}
 	lockdep_print_held_locks(task);
 }
-EXPORT_SYMBOL_GPL(__debug_show_held_locks);
-
-void debug_show_held_locks(struct task_struct *task)
-{
-		__debug_show_held_locks(task);
-}
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
 
 void lockdep_sys_exit(void)
-- 
cgit v1.2.3


From 99c796df94afca5256860dd4760017f1dbb3480c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 17 Aug 2010 22:13:22 +0100
Subject: VIDEO: amba clcd: don't disable an already disabled clock

Fix the clock enable/disable tracking in the AMBA CLCD driver so
that the driver doesn't try to disable an already disabled clock,
thereby causing the clock (if shared) to become unbalanced.

This resolves a problem with CLCD on LPC32xx ARM platforms.

Reported-by: Kevin Wells <wellsk40@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/video/amba-clcd.c | 10 ++++++++--
 include/linux/amba/clcd.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index afe21e6eb544..1c2c68356ea7 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb)
 	/*
 	 * Disable CLCD clock source.
 	 */
-	clk_disable(fb->clk);
+	if (fb->clk_enabled) {
+		fb->clk_enabled = false;
+		clk_disable(fb->clk);
+	}
 }
 
 static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
@@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	/*
 	 * Enable the CLCD clock source.
 	 */
-	clk_enable(fb->clk);
+	if (!fb->clk_enabled) {
+		fb->clk_enabled = true;
+		clk_enable(fb->clk);
+	}
 
 	/*
 	 * Bring up by first enabling..
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index ca16c3801a1e..be33b3affc8a 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -150,6 +150,7 @@ struct clcd_fb {
 	u16			off_cntl;
 	u32			clcd_cntl;
 	u32			cmap[16];
+	bool			clk_enabled;
 };
 
 static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs)
-- 
cgit v1.2.3


From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 17 Aug 2010 23:52:56 +0100
Subject: Make do_execve() take a const filename pointer

Make do_execve() take a const filename pointer so that kernel_execve() compiles
correctly on ARM:

arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type

This also requires the argv and envp arguments to be consted twice, once for
the pointer array and once for the strings the array points to.  This is
because do_execve() passes a pointer to the filename (now const) to
copy_strings_kernel().  A simpler alternative would be to cast the filename
pointer in do_execve() when it's passed to copy_strings_kernel().

do_execve() may not change any of the strings it is passed as part of the argv
or envp lists as they are some of them in .rodata, so marking these strings as
const should be fine.

Further kernel_execve() and sys_execve() need to be changed to match.

This has been test built on x86_64, frv, arm and mips.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/process.c             |  5 +++--
 arch/arm/kernel/sys_arm.c               | 14 +++++++++-----
 arch/avr32/kernel/process.c             |  5 +++--
 arch/avr32/kernel/sys_avr32.c           |  4 +++-
 arch/blackfin/kernel/process.c          |  4 +++-
 arch/cris/arch-v10/kernel/process.c     |  4 +++-
 arch/cris/arch-v32/kernel/process.c     |  6 ++++--
 arch/frv/kernel/process.c               |  5 +++--
 arch/h8300/kernel/process.c             |  5 ++++-
 arch/h8300/kernel/sys_h8300.c           |  4 +++-
 arch/ia64/kernel/process.c              |  4 +++-
 arch/m32r/kernel/process.c              |  4 ++--
 arch/m32r/kernel/sys_m32r.c             |  4 +++-
 arch/m68k/kernel/process.c              |  4 +++-
 arch/m68k/kernel/sys_m68k.c             |  4 +++-
 arch/m68knommu/kernel/process.c         |  4 +++-
 arch/m68knommu/kernel/sys_m68k.c        |  4 +++-
 arch/microblaze/kernel/sys_microblaze.c | 10 +++++++---
 arch/mips/kernel/syscall.c              | 10 +++++++---
 arch/mn10300/kernel/process.c           |  4 ++--
 arch/parisc/hpux/fs.c                   |  6 ++++--
 arch/parisc/kernel/process.c            | 15 ++++++++++-----
 arch/powerpc/kernel/process.c           |  5 +++--
 arch/s390/kernel/process.c              |  5 +++--
 arch/score/kernel/sys_score.c           | 10 +++++++---
 arch/sh/kernel/process_32.c             |  7 ++++---
 arch/sh/kernel/process_64.c             |  4 ++--
 arch/sh/kernel/sys_sh32.c               |  4 +++-
 arch/sh/kernel/sys_sh64.c               |  4 +++-
 arch/sparc/kernel/process_32.c          |  6 ++++--
 arch/sparc/kernel/process_64.c          |  4 ++--
 arch/sparc/kernel/sys_sparc_32.c        |  4 +++-
 arch/sparc/kernel/sys_sparc_64.c        |  4 +++-
 arch/tile/kernel/process.c              |  5 +++--
 arch/um/kernel/exec.c                   |  5 +++--
 arch/um/kernel/syscall.c                |  4 +++-
 arch/x86/include/asm/syscalls.h         |  5 +++--
 arch/x86/kernel/process.c               |  5 +++--
 arch/x86/kernel/sys_i386_32.c           |  4 +++-
 arch/xtensa/kernel/process.c            |  5 +++--
 fs/binfmt_misc.c                        |  2 +-
 fs/binfmt_script.c                      |  3 ++-
 fs/exec.c                               | 21 +++++++++++----------
 include/linux/binfmts.h                 |  7 ++++---
 include/linux/sched.h                   |  4 +++-
 include/linux/syscalls.h                |  2 +-
 init/do_mounts_initrd.c                 |  7 ++++---
 init/main.c                             |  6 +++---
 kernel/kmod.c                           |  4 +++-
 security/commoncap.c                    |  2 +-
 50 files changed, 179 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 88e608aebc8c..842dba308eab 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp);
  * sys_execve() executes a new program.
  */
 asmlinkage int
-do_sys_execve(const char __user *ufilename, char __user * __user *argv,
-	      char __user * __user *envp, struct pt_regs *regs)
+do_sys_execve(const char __user *ufilename,
+	      const char __user *const __user *argv,
+	      const char __user *const __user *envp, struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 5b7c541a4c63..62e7c61d0342 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
 /* sys_execve() executes a new program.
  * This is called indirectly via a small wrapper
  */
-asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv,
-			  char __user * __user *envp, struct pt_regs *regs)
+asmlinkage int sys_execve(const char __user *filenamei,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp, struct pt_regs *regs)
 {
 	int error;
 	char * filename;
@@ -78,14 +79,17 @@ out:
 	return error;
 }
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	struct pt_regs regs;
 	int ret;
 
 	memset(&regs, 0, sizeof(struct pt_regs));
-	ret = do_execve(filename, (char __user * __user *)argv,
-			(char __user * __user *)envp, &regs);
+	ret = do_execve(filename,
+			(const char __user *const __user *)argv,
+			(const char __user *const __user *)envp, &regs);
 	if (ret < 0)
 		goto out;
 
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index e5daddff397d..9c46aaad11ce 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
 }
 
 asmlinkage int sys_execve(const char __user *ufilename,
-			  char __user *__user *uargv,
-			  char __user *__user *uenvp, struct pt_regs *regs)
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
+			  struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
index 459349b5ed5a..62635a09ae3e 100644
--- a/arch/avr32/kernel/sys_avr32.c
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -7,7 +7,9 @@
  */
 #include <linux/unistd.h>
 
-int kernel_execve(const char *file, char **argv, char **envp)
+int kernel_execve(const char *file,
+		  const char *const *argv,
+		  const char *const *envp)
 {
 	register long scno asm("r8") = __NR_execve;
 	register long sc1 asm("r12") = (long)file;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index a566f61c002a..01f98cb964d2 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char *filename;
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 93f0f64b1326..9a57db6907f5 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *fname, char **argv, char **envp,
+asmlinkage int sys_execve(const char *fname,
+			  const char *const *argv,
+			  const char *const *envp,
 			  long r13, long mof, long srp, 
 			  struct pt_regs *regs)
 {
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 2661a9529d70..562f84718906 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp,
 
 /* sys_execve() executes a new program. */
 asmlinkage int
-sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp,
-	struct pt_regs *regs)
+sys_execve(const char *fname,
+	   const char *const *argv,
+	   const char *const *envp, long r13, long mof, long srp,
+	   struct pt_regs *regs)
 {
 	int error;
 	char *filename;
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 428931cf2f0c..2b63b0191f52 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv,
-			  char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 8b7b78d77d5c..97478138e361 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...)
+asmlinkage int sys_execve(const char *name,
+			  const char *const *argv,
+			  const char *const *envp,
+			  int dummy, ...)
 {
 	int error;
 	char * filename;
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index f9b3f44da69f..dc1ac0243b78 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long res __asm__("er0");
 	register char *const *_c __asm__("er3") = envp;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index a879c03b7f1c..16f1c7b04c69 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 }
 
 long
-sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp,
+sys_execve (const char __user *filename,
+	    const char __user *const __user *argv,
+	    const char __user *const __user *envp,
 	    struct pt_regs *regs)
 {
 	char *fname;
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 8665a4d868ec..422bea9f1dbc 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2,
  * sys_execve() executes a new program.
  */
 asmlinkage int sys_execve(const char __user *ufilename,
-			  char __user * __user *uargv,
-			  char __user * __user *uenvp,
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
 			  unsigned long r3, unsigned long r4, unsigned long r5,
 			  unsigned long r6, struct pt_regs regs)
 {
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index 0a00f467edfa..d841fb6cc703 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __scno __asm__ ("r7") = __NR_execve;
 	register long __arg3 __asm__ ("r2") = (long)(envp);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 221d0b71ce39..18732ab23292 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu);
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 77896692eb0a..2f431ece7b5f 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __res asm ("%d0") = __NR_execve;
 	register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c
index 6350f68cd026..4d090d3c0897 100644
--- a/arch/m68knommu/kernel/process.c
+++ b/arch/m68knommu/kernel/process.c
@@ -350,7 +350,9 @@ void dump(struct pt_regs *fp)
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(const char *name, char **argv, char **envp)
+asmlinkage int sys_execve(const char *name,
+			  const char *const *argv,
+			  const char *const *envp)
 {
 	int error;
 	char * filename;
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index d65e9c4c930c..68488ae47f0a 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __res asm ("%d0") = __NR_execve;
 	register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 6abab6ebedbe..2250fe9d269b 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs
 	return do_fork(flags, stack, regs, 0, NULL, NULL);
 }
 
-asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv,
-			char __user *__user *envp, struct pt_regs *regs)
+asmlinkage long microblaze_execve(const char __user *filenamei,
+				  const char __user *const __user *argv,
+				  const char __user *const __user *envp,
+				  struct pt_regs *regs)
 {
 	int error;
 	char *filename;
@@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register const char *__a __asm__("r5") = filename;
 	register const void *__b __asm__("r6") = argv;
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index bddce0bca195..1dc6edff45e0 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs)
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename, (char __user *__user *) (long)regs.regs[5],
-	                  (char __user *__user *) (long)regs.regs[6], &regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) (long)regs.regs[5],
+	                  (const char __user *const __user *) (long)regs.regs[6],
+			  &regs);
 	putname(filename);
 
 out:
@@ -436,7 +438,9 @@ asmlinkage void bad_stack(void)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __a0 asm("$4") = (unsigned long) filename;
 	register unsigned long __a1 asm("$5") = (unsigned long) argv;
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 762eb325b949..f48373e2bc1c 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void)
 }
 
 asmlinkage long sys_execve(const char __user *name,
-			   char __user * __user *argv,
-			   char __user * __user *envp)
+			   const char __user *const __user *argv,
+			   const char __user *const __user *envp)
 {
 	char *filename;
 	int error;
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 1444875a7611..0dc8543acb4f 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		goto out;
 
-	error = do_execve(filename, (char __user * __user *) regs->gr[25],
-		(char __user * __user *) regs->gr[24], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) regs->gr[25],
+			  (const char __user *const __user *) regs->gr[24],
+			  regs);
 
 	putname(filename);
 
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 76332dadc6e9..4b4b9181a1a0 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs)
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
-	error = do_execve(filename, (char __user * __user *) regs->gr[25],
-		(char __user * __user *) regs->gr[24], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) regs->gr[25],
+			  (const char __user *const __user *) regs->gr[24],
+			  regs);
 	putname(filename);
 out:
 
 	return error;
 }
 
-extern int __execve(const char *filename, char *const argv[],
-		char *const envp[], struct task_struct *task);
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+extern int __execve(const char *filename,
+		    const char *const argv[],
+		    const char *const envp[], struct task_struct *task);
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	return __execve(filename, argv, envp, current);
 }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index feacfb789686..91356ffda2ca 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
 	flush_fp_to_thread(current);
 	flush_altivec_to_thread(current);
 	flush_spe_to_thread(current);
-	error = do_execve(filename, (char __user * __user *) a1,
-			  (char __user * __user *) a2, regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *) a1,
+			  (const char __user *const __user *) a2, regs);
 	putname(filename);
 out:
 	return error;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7eafaf2662b9..d3a2d1c6438e 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -267,8 +267,9 @@ asmlinkage void execve_tail(void)
 /*
  * sys_execve() executes a new program.
  */
-SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv,
-		char __user * __user *, envp)
+SYSCALL_DEFINE3(execve, const char __user *, name,
+		const char __user *const __user *, argv,
+		const char __user *const __user *, envp)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	char *filename;
diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c
index 651096ff8db4..e478bf9a7e91 100644
--- a/arch/score/kernel/sys_score.c
+++ b/arch/score/kernel/sys_score.c
@@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		return error;
 
-	error = do_execve(filename, (char __user *__user*)regs->regs[5],
-			  (char __user *__user *) regs->regs[6], regs);
+	error = do_execve(filename,
+			  (const char __user *const __user *)regs->regs[5],
+			  (const char __user *const __user *)regs->regs[6],
+			  regs);
 
 	putname(filename);
 	return error;
@@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs)
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __r4 asm("r4") = (unsigned long) filename;
 	register unsigned long __r5 asm("r5") = (unsigned long) argv;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 052981972ae6..762a13984bbd 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv,
-			  char __user * __user *uenvp, unsigned long r7,
-			  struct pt_regs __regs)
+asmlinkage int sys_execve(const char __user *ufilename,
+			  const char __user *const __user *uargv,
+			  const char __user *const __user *uenvp,
+			  unsigned long r7, struct pt_regs __regs)
 {
 	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
 	int error;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 68d128d651b3..210c1cabcb7f 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv,
 		goto out;
 
 	error = do_execve(filename,
-			  (char __user * __user *)uargv,
-			  (char __user * __user *)uenvp,
+			  (const char __user *const __user *)uargv,
+			  (const char __user *const __user *)uenvp,
 			  pregs);
 	putname(filename);
 out:
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index eb68bfdd86e6..f56b6fe5c5d0 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register long __sc0 __asm__ ("r3") = __NR_execve;
 	register long __sc4 __asm__ ("r4") = (long) filename;
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 287235768bc5..c5a38c4bf410 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -33,7 +33,9 @@
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
 	register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 40e29fc8a4d6..17529298c50a 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
 	if(IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			  (char __user * __user *)regs->u_regs[base + UREG_I1],
-			  (char __user * __user *)regs->u_regs[base + UREG_I2],
+			  (const char __user *const  __user *)
+			  regs->u_regs[base + UREG_I1],
+			  (const char __user *const  __user *)
+			  regs->u_regs[base + UREG_I2],
 			  regs);
 	putname(filename);
 out:
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dbe81a368b45..485f54748384 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
 	if (IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			  (char __user * __user *)
+			  (const char __user *const __user *)
 			  regs->u_regs[base + UREG_I1],
-			  (char __user * __user *)
+			  (const char __user *const __user *)
 			  regs->u_regs[base + UREG_I2], regs);
 	putname(filename);
 	if (!error) {
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ee995b7dae7e..50794137d710 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -282,7 +282,9 @@ out:
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 3d435c42e6db..f836f4e93afe 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index ed590ad0acdc..985cc28c74c5 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs)
 /*
  * sys_execve() executes a new program.
  */
-long _sys_execve(char __user *path, char __user *__user *argv,
-		 char __user *__user *envp, struct pt_regs *regs)
+long _sys_execve(const char __user *path,
+		 const char __user *const __user *argv,
+		 const char __user *const __user *envp, struct pt_regs *regs)
 {
 	long error;
 	char *filename;
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 59b20d93b6d4..cd145eda3579 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 	PT_REGS_SP(regs) = esp;
 }
 
-static long execve1(const char *file, char __user * __user *argv,
-		    char __user *__user *env)
+static long execve1(const char *file,
+		    const char __user *const __user *argv,
+		    const char __user *const __user *env)
 {
 	long error;
 
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 7427c0b1930c..5ddb246626db 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len,
 	return err;
 }
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	mm_segment_t fs;
 	int ret;
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index feb2ff9bfc2d..f1d8b441fc77 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *);
 /* kernel/process.c */
 int sys_fork(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *, char __user * __user *,
-		char __user * __user *, struct pt_regs *);
+long sys_execve(const char __user *,
+		const char __user *const __user *,
+		const char __user *const __user *, struct pt_regs *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 64ecaf0af9af..57d1868a86aa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread);
 /*
  * sys_execve() executes a new program.
  */
-long sys_execve(const char __user *name, char __user * __user *argv,
-		char __user * __user *envp, struct pt_regs *regs)
+long sys_execve(const char __user *name,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp, struct pt_regs *regs)
 {
 	long error;
 	char *filename;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 196552bb412c..d5e06624e34a 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -28,7 +28,9 @@
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
  */
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
 {
 	long __res;
 	asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 7c2f38f68ebb..e3558b9a58ba 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
  */
 
 asmlinkage
-long xtensa_execve(const char __user *name, char __user * __user *argv,
-                   char __user * __user *envp,
+long xtensa_execve(const char __user *name,
+		   const char __user *const __user *argv,
+                   const char __user *const __user *envp,
                    long a3, long a4, long a5,
                    struct pt_regs *regs)
 {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9e60fd201716..a7528b913936 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	Node *fmt;
 	struct file * interp_file = NULL;
 	char iname[BINPRM_BUF_SIZE];
-	char *iname_addr = iname;
+	const char *iname_addr = iname;
 	int retval;
 	int fd_binary = -1;
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index aca9d55afb22..396a9884591f 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -16,7 +16,8 @@
 
 static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 {
-	char *cp, *i_name, *i_arg;
+	const char *i_arg, *i_name;
+	char *cp;
 	struct file *file;
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
diff --git a/fs/exec.c b/fs/exec.c
index 7761837e4500..05c7d6b84df7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -361,13 +361,13 @@ err:
 /*
  * count() counts the number of strings in array ARGV.
  */
-static int count(char __user * __user * argv, int max)
+static int count(const char __user * const __user * argv, int max)
 {
 	int i = 0;
 
 	if (argv != NULL) {
 		for (;;) {
-			char __user * p;
+			const char __user * p;
 
 			if (get_user(p, argv))
 				return -EFAULT;
@@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max)
  * processes's memory to the new process's stack.  The call to get_user_pages()
  * ensures the destination page is created and not swapped out.
  */
-static int copy_strings(int argc, char __user * __user * argv,
+static int copy_strings(int argc, const char __user *const __user *argv,
 			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
@@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv,
 	int ret;
 
 	while (argc-- > 0) {
-		char __user *str;
+		const char __user *str;
 		int len;
 		unsigned long pos;
 
@@ -470,12 +470,13 @@ out:
 /*
  * Like copy_strings, but get argv and its values from kernel memory.
  */
-int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
+int copy_strings_kernel(int argc, const char *const *argv,
+			struct linux_binprm *bprm)
 {
 	int r;
 	mm_segment_t oldfs = get_fs();
 	set_fs(KERNEL_DS);
-	r = copy_strings(argc, (char __user * __user *)argv, bprm);
+	r = copy_strings(argc, (const char __user *const  __user *)argv, bprm);
 	set_fs(oldfs);
 	return r;
 }
@@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec);
 void setup_new_exec(struct linux_binprm * bprm)
 {
 	int i, ch;
-	char * name;
+	const char *name;
 	char tcomm[sizeof(current->comm)];
 
 	arch_pick_mmap_layout(current->mm);
@@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler);
 /*
  * sys_execve() executes a new program.
  */
-int do_execve(char * filename,
-	char __user *__user *argv,
-	char __user *__user *envp,
+int do_execve(const char * filename,
+	const char __user *const __user *argv,
+	const char __user *const __user *envp,
 	struct pt_regs * regs)
 {
 	struct linux_binprm *bprm;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c809e286d213..a065612fc928 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -50,8 +50,8 @@ struct linux_binprm{
 	int unsafe;		/* how unsafe this exec is (mask of LSM_UNSAFE_*) */
 	unsigned int per_clear;	/* bits to clear in current->personality */
 	int argc, envc;
-	char * filename;	/* Name of binary as seen by procps */
-	char * interp;		/* Name of the binary really executed. Most
+	const char * filename;	/* Name of binary as seen by procps */
+	const char * interp;	/* Name of the binary really executed. Most
 				   of the time same as filename, but could be
 				   different for binfmt_{misc,script} */
 	unsigned interp_flags;
@@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
-extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
+extern int copy_strings_kernel(int argc, const char *const *argv,
+			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ce160d68f5e7..1e2a6db2d7dd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2109,7 +2109,9 @@ extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
-extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+extern int do_execve(const char *,
+		     const char __user * const __user *,
+		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6e5d19788634..e6319d18a55d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  u64 mask, int fd,
 				  const char  __user *pathname);
 
-int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
 
 
 asmlinkage long sys_perf_event_open(
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 2b108538d0d9..3098a38f3ae1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,10 +24,11 @@ static int __init no_initrd(char *str)
 
 __setup("noinitrd", no_initrd);
 
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
 {
-	static char *argv[] = { "linuxrc", NULL, };
-	extern char * envp_init[];
+	static const char *argv[] = { "linuxrc", NULL, };
+	extern const char *envp_init[];
+	const char *shell = _shell;
 
 	sys_close(old_fd);sys_close(root_fd);
 	sys_setsid();
diff --git a/init/main.c b/init/main.c
index 22d61cb06f98..94ab488039aa 100644
--- a/init/main.c
+++ b/init/main.c
@@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str)
 
 __setup("reset_devices", set_reset_devices);
 
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
 
 extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void)
 		do_one_initcall(*fn);
 }
 
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
 	kernel_execve(init_filename, argv_init, envp_init);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6e9b19667a8d..9cd0591c96a2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data)
 			goto fail;
 	}
 
-	retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
+	retval = kernel_execve(sub_info->path,
+			       (const char *const *)sub_info->argv,
+			       (const char *const *)sub_info->envp);
 
 	/* Exec failed? */
 fail:
diff --git a/security/commoncap.c b/security/commoncap.c
index 4e015996dd4d..9d172e6e330c 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -40,7 +40,7 @@
  *
  * Warn if that happens, once per boot.
  */
-static void warn_setuid_and_fcaps_mixed(char *fname)
+static void warn_setuid_and_fcaps_mixed(const char *fname)
 {
 	static int warned;
 	if (!warned) {
-- 
cgit v1.2.3


From 5c79a5ae23e72fa12f1c7c528f62bf3ea35da0dc Mon Sep 17 00:00:00 2001
From: Ernst Schwab <eschwab@online.de>
Date: Mon, 16 Aug 2010 15:10:11 +0200
Subject: spi.h: missing kernel-doc notation, please fix

Added comments in kernel-doc notation for previously added struct fields.

Signed-off-by: Ernst Schwab <eschwab@online.de>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/spi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index ae0a5286f558..92e52a1e6af3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @dma_alignment: SPI controller constraint on DMA buffers alignment.
  * @mode_bits: flags understood by this controller driver
  * @flags: other constraints relevant to this driver
+ * @bus_lock_spinlock: spinlock for SPI bus locking
+ * @bus_lock_mutex: mutex for SPI bus locking
+ * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
  * @setup: updates the device mode and clocking records used by a
  *	device's SPI controller; protocol code may call this.  This
  *	must fail if an unrecognized or unsupported mode is requested.
-- 
cgit v1.2.3


From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Aug 2010 17:05:45 +0200
Subject: kill BH_Ordered flag

Instead of abusing a buffer_head flag just add a variant of
sync_dirty_buffer which allows passing the exact type of write
flag required.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 17 ++++++++--------
 fs/jbd/commit.c             | 49 +++++++++++++++++++++++----------------------
 fs/jbd2/commit.c            | 39 ++++++++++++++----------------------
 fs/nilfs2/super.c           | 28 +++++++++++++-------------
 include/linux/buffer_head.h |  3 +--
 5 files changed, 63 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa339e051..6c8ad977f3d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh)
 	BUG_ON(buffer_delay(bh));
 	BUG_ON(buffer_unwritten(bh));
 
-	/*
-	 * Mask in barrier bit for a write (could be either a WRITE or a
-	 * WRITE_SYNC
-	 */
-	if (buffer_ordered(bh) && (rw & WRITE))
-		rw |= WRITE_BARRIER;
-
 	/*
 	 * Only clear out a write error when rewriting
 	 */
@@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block);
  * and then start new I/O and then wait upon it.  The caller must have a ref on
  * the buffer_head.
  */
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
 {
 	int ret = 0;
 
@@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
 	if (test_clear_buffer_dirty(bh)) {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_write_sync;
-		ret = submit_bh(WRITE_SYNC, bh);
+		ret = submit_bh(rw, bh);
 		wait_on_buffer(bh);
 		if (buffer_eopnotsupp(bh)) {
 			clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
 	}
 	return ret;
 }
+EXPORT_SYMBOL(__sync_dirty_buffer);
+
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+	return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
 EXPORT_SYMBOL(sync_dirty_buffer);
 
 /*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9ddaa0c49..95d8c11c929e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
 	struct buffer_head *bh;
 	journal_header_t *header;
 	int ret;
-	int barrier_done = 0;
 
 	if (is_journal_aborted(journal))
 		return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
 
 	JBUFFER_TRACE(descriptor, "write commit block");
 	set_buffer_dirty(bh);
+
 	if (journal->j_flags & JFS_BARRIER) {
-		set_buffer_ordered(bh);
-		barrier_done = 1;
-	}
-	ret = sync_dirty_buffer(bh);
-	if (barrier_done)
-		clear_buffer_ordered(bh);
-	/* is it possible for another commit to fail at roughly
-	 * the same time as this one?  If so, we don't want to
-	 * trust the barrier flag in the super, but instead want
-	 * to remember if we sent a barrier request
-	 */
-	if (ret == -EOPNOTSUPP && barrier_done) {
-		char b[BDEVNAME_SIZE];
+		ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
 
-		printk(KERN_WARNING
-			"JBD: barrier-based sync failed on %s - "
-			"disabling barriers\n",
-			bdevname(journal->j_dev, b));
-		spin_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JFS_BARRIER;
-		spin_unlock(&journal->j_state_lock);
+		/*
+		 * Is it possible for another commit to fail at roughly
+		 * the same time as this one?  If so, we don't want to
+		 * trust the barrier flag in the super, but instead want
+		 * to remember if we sent a barrier request
+		 */
+		if (ret == -EOPNOTSUPP) {
+			char b[BDEVNAME_SIZE];
 
-		/* And try again, without the barrier */
-		set_buffer_uptodate(bh);
-		set_buffer_dirty(bh);
+			printk(KERN_WARNING
+				"JBD: barrier-based sync failed on %s - "
+				"disabling barriers\n",
+				bdevname(journal->j_dev, b));
+			spin_lock(&journal->j_state_lock);
+			journal->j_flags &= ~JFS_BARRIER;
+			spin_unlock(&journal->j_state_lock);
+
+			/* And try again, without the barrier */
+			set_buffer_uptodate(bh);
+			set_buffer_dirty(bh);
+			ret = sync_dirty_buffer(bh);
+		}
+	} else {
 		ret = sync_dirty_buffer(bh);
 	}
+
 	put_bh(bh);		/* One for getblk() */
 	journal_put_journal_head(descriptor);
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8049f1..7c068c189d80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
 	struct commit_header *tmp;
 	struct buffer_head *bh;
 	int ret;
-	int barrier_done = 0;
 	struct timespec now = current_kernel_time();
 
 	if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !JBD2_HAS_INCOMPAT_FEATURE(journal,
 				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-		set_buffer_ordered(bh);
-		barrier_done = 1;
-	}
-	ret = submit_bh(WRITE_SYNC_PLUG, bh);
-	if (barrier_done)
-		clear_buffer_ordered(bh);
-
-	/* is it possible for another commit to fail at roughly
-	 * the same time as this one?  If so, we don't want to
-	 * trust the barrier flag in the super, but instead want
-	 * to remember if we sent a barrier request
-	 */
-	if (ret == -EOPNOTSUPP && barrier_done) {
-		printk(KERN_WARNING
-		       "JBD2: Disabling barriers on %s, "
-		       "not supported by device\n", journal->j_devname);
-		write_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JBD2_BARRIER;
-		write_unlock(&journal->j_state_lock);
+		ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
+		if (ret == -EOPNOTSUPP) {
+			printk(KERN_WARNING
+			       "JBD2: Disabling barriers on %s, "
+			       "not supported by device\n", journal->j_devname);
+			write_lock(&journal->j_state_lock);
+			journal->j_flags &= ~JBD2_BARRIER;
+			write_unlock(&journal->j_state_lock);
 
-		/* And try again, without the barrier */
-		lock_buffer(bh);
-		set_buffer_uptodate(bh);
-		clear_buffer_dirty(bh);
+			/* And try again, without the barrier */
+			lock_buffer(bh);
+			set_buffer_uptodate(bh);
+			clear_buffer_dirty(bh);
+			ret = submit_bh(WRITE_SYNC_PLUG, bh);
+		}
+	} else {
 		ret = submit_bh(WRITE_SYNC_PLUG, bh);
 	}
 	*cbh = bh;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1fa86b9df73b..68345430fb48 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 	int err;
-	int barrier_done = 0;
 
-	if (nilfs_test_opt(sbi, BARRIER)) {
-		set_buffer_ordered(nilfs->ns_sbh[0]);
-		barrier_done = 1;
-	}
  retry:
 	set_buffer_dirty(nilfs->ns_sbh[0]);
-	err = sync_dirty_buffer(nilfs->ns_sbh[0]);
-	if (err == -EOPNOTSUPP && barrier_done) {
-		nilfs_warning(sbi->s_super, __func__,
-			      "barrier-based sync failed. "
-			      "disabling barriers\n");
-		nilfs_clear_opt(sbi, BARRIER);
-		barrier_done = 0;
-		clear_buffer_ordered(nilfs->ns_sbh[0]);
-		goto retry;
+
+	if (nilfs_test_opt(sbi, BARRIER)) {
+		err = __sync_dirty_buffer(nilfs->ns_sbh[0],
+					  WRITE_SYNC | WRITE_BARRIER);
+		if (err == -EOPNOTSUPP) {
+			nilfs_warning(sbi->s_super, __func__,
+				      "barrier-based sync failed. "
+				      "disabling barriers\n");
+			nilfs_clear_opt(sbi, BARRIER);
+			goto retry;
+		}
+	} else {
+		err = sync_dirty_buffer(nilfs->ns_sbh[0]);
 	}
+
 	if (unlikely(err)) {
 		printk(KERN_ERR
 		       "NILFS: unable to write superblock (err=%d)\n", err);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 43e649a72529..72c1cf83eb85 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,6 @@ enum bh_state_bits {
 	BH_Delay,	/* Buffer is not yet allocated on disk */
 	BH_Boundary,	/* Block is followed by a discontiguity */
 	BH_Write_EIO,	/* I/O error on write */
-	BH_Ordered,	/* ordered write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
@@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write)
 BUFFER_FNS(Delay, delay)
 BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
-BUFFER_FNS(Ordered, ordered)
 BUFFER_FNS(Eopnotsupp, eopnotsupp)
 BUFFER_FNS(Unwritten, unwritten)
 
@@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh);
 void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
+int __sync_dirty_buffer(struct buffer_head *bh, int rw);
 int submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
-- 
cgit v1.2.3


From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Aug 2010 17:06:24 +0200
Subject: remove SWRITE* I/O types

These flags aren't real I/O types, but tell ll_rw_block to always
lock the buffer instead of giving up on a failed trylock.

Instead add a new write_dirty_buffer helper that implements this semantic
and use it from the existing SWRITE* callers.  Note that the ll_rw_block
code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which
this patch fixes.

In the ufs code clean up the helper that used to call ll_rw_block
to mirror sync_dirty_buffer, which is the function it implements for
compound buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                 | 52 +++++++++++++++++++++++++--------------------
 fs/fat/misc.c               |  4 +++-
 fs/jbd/checkpoint.c         |  4 +++-
 fs/jbd/journal.c            |  2 +-
 fs/jbd/revoke.c             |  2 +-
 fs/jbd2/checkpoint.c        |  4 +++-
 fs/jbd2/journal.c           |  2 +-
 fs/jbd2/revoke.c            |  2 +-
 fs/reiserfs/journal.c       |  2 +-
 fs/ufs/balloc.c             | 24 +++++++--------------
 fs/ufs/ialloc.c             | 18 ++++++----------
 fs/ufs/truncate.c           | 18 ++++++----------
 fs/ufs/util.c               | 20 +++++++----------
 fs/ufs/util.h               |  3 +--
 include/linux/buffer_head.h |  1 +
 include/linux/fs.h          |  9 --------
 16 files changed, 73 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6c8ad977f3d4..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 				spin_unlock(lock);
 				/*
 				 * Ensure any pending I/O completes so that
-				 * ll_rw_block() actually writes the current
-				 * contents - it is a noop if I/O is still in
-				 * flight on potentially older contents.
+				 * write_dirty_buffer() actually writes the
+				 * current contents - it is a noop if I/O is
+				 * still in flight on potentially older
+				 * contents.
 				 */
-				ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+				write_dirty_buffer(bh, WRITE_SYNC_PLUG);
 
 				/*
 				 * Kick off IO for the previous mapping. Note
@@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
 
 /**
  * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
  * @nr: number of &struct buffer_heads in the array
  * @bhs: array of pointers to &struct buffer_head
  *
  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
  * requests an I/O operation on them, either a %READ or a %WRITE.  The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
- * are sent to disk. The fourth %READA option is described in the documentation
- * for generic_make_request() which ll_rw_block() calls.
+ * %READA option is described in the documentation for generic_make_request()
+ * which ll_rw_block() calls.
  *
  * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
- * clean when doing a write request, and any buffer that appears to be
- * up-to-date when doing read request.  Further it marks as clean buffers that
- * are processed for writing (the buffer cache won't assume that they are
- * actually clean until the buffer gets unlocked).
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
+ * request, and any buffer that appears to be up-to-date when doing read
+ * request.  Further it marks as clean buffers that are processed for
+ * writing (the buffer cache won't assume that they are actually clean
+ * until the buffer gets unlocked).
  *
  * ll_rw_block sets b_end_io to simple completion handler that marks
  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 	for (i = 0; i < nr; i++) {
 		struct buffer_head *bh = bhs[i];
 
-		if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
-			lock_buffer(bh);
-		else if (!trylock_buffer(bh))
+		if (!trylock_buffer(bh))
 			continue;
-
-		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
-		    rw == SWRITE_SYNC_PLUG) {
+		if (rw == WRITE) {
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
 				get_bh(bh);
-				if (rw == SWRITE_SYNC)
-					submit_bh(WRITE_SYNC, bh);
-				else
-					submit_bh(WRITE, bh);
+				submit_bh(WRITE, bh);
 				continue;
 			}
 		} else {
@@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 }
 EXPORT_SYMBOL(ll_rw_block);
 
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+	lock_buffer(bh);
+	if (!test_clear_buffer_dirty(bh)) {
+		unlock_buffer(bh);
+		return;
+	}
+	bh->b_end_io = end_buffer_write_sync;
+	get_bh(bh);
+	submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
+
 /*
  * For a data-integrity writeout, we need to wait upon any in-progress I/O
  * and then start new I/O and then wait upon it.  The caller must have a ref on
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6ffba5..1736f2356388 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 {
 	int i, err = 0;
 
-	ll_rw_block(SWRITE, nr_bhs, bhs);
+	for (i = 0; i < nr_bhs; i++)
+		write_dirty_buffer(bhs[i], WRITE);
+
 	for (i = 0; i < nr_bhs; i++) {
 		wait_on_buffer(bhs[i]);
 		if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..05a38b9c4c0e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
-	ll_rw_block(SWRITE, *batch_count, bhs);
+	for (i = 0; i < *batch_count; i++)
+		write_dirty_buffer(bhs[i], WRITE);
+
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94693d8..2c4b1f109da9 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 	if (wait)
 		sync_dirty_buffer(bh);
 	else
-		ll_rw_block(SWRITE, 1, &bh);
+		write_dirty_buffer(bh, WRITE);
 
 out:
 	/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
 	set_buffer_dirty(bh);
-	ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+	write_dirty_buffer(bh, write_op);
 }
 #endif
 
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f4e8a3..5247e7ffdcb4 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
 {
 	int i;
 
-	ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs);
+	for (i = 0; i < *batch_count; i++)
+		write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
+
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = journal->j_chkpt_bhs[i];
 		clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866aaf0f9..0e8014ea6b94 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
 			set_buffer_uptodate(bh);
 		}
 	} else
-		ll_rw_block(SWRITE, 1, &bh);
+		write_dirty_buffer(bh, WRITE);
 
 out:
 	/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06af2e3..9ad321fd63fd 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
 	set_buffer_dirty(bh);
-	ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+	write_dirty_buffer(bh, write_op);
 }
 #endif
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b1f036..812e2c05aa29 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
 	/* flush out the real blocks */
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		set_buffer_dirty(real_blocks[i]);
-		ll_rw_block(SWRITE, 1, real_blocks + i);
+		write_dirty_buffer(real_blocks[i], WRITE);
 	}
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 		wait_on_buffer(real_blocks[i]);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484fb10d2..46f7a807bbc1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 	
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 	
 	unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
 
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 
 	if (overflow) {
 		fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
 succed:
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e018fe..2eabf04af3de 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
 
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	
 	sb->s_dirt = 1;
 	unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
 
 	fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
 	ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer(UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 
 	UFSD("EXIT\n");
 }
@@ -290,10 +286,8 @@ cg_found:
 	}
 	ubh_mark_buffer_dirty (USPI_UBH(uspi));
 	ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
-	if (sb->s_flags & MS_SYNCHRONOUS) {
-		ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
-		ubh_wait_on_buffer (UCPI_UBH(ucpi));
-	}
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		ubh_sync_block(UCPI_UBH(ucpi));
 	sb->s_dirt = 1;
 
 	inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb135320..a58f9155fc9a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(ind_ubh);
 		ind_ubh = NULL;
 	}
-	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
-		ubh_ll_rw_block(SWRITE, ind_ubh);
-		ubh_wait_on_buffer (ind_ubh);
-	}
+	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+		ubh_sync_block(ind_ubh);
 	ubh_brelse (ind_ubh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 		ubh_bforget(dind_bh);
 		dind_bh = NULL;
 	}
-	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
-		ubh_ll_rw_block(SWRITE, dind_bh);
-		ubh_wait_on_buffer (dind_bh);
-	}
+	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+		ubh_sync_block(dind_bh);
 	ubh_brelse (dind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
 		ubh_bforget(tind_bh);
 		tind_bh = NULL;
 	}
-	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
-		ubh_ll_rw_block(SWRITE, tind_bh);
-		ubh_wait_on_buffer (tind_bh);
-	}
+	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+		ubh_sync_block(tind_bh);
 	ubh_brelse (tind_bh);
 	
 	UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9e4a4e..d2c36d53fe66 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
 	}
 }
 
-void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh)
+void ubh_sync_block(struct ufs_buffer_head *ubh)
 {
-	if (!ubh)
-		return;
+	if (ubh) {
+		unsigned i;
 
-	ll_rw_block(rw, ubh->count, ubh->bh);
-}
+		for (i = 0; i < ubh->count; i++)
+			write_dirty_buffer(ubh->bh[i], WRITE);
 
-void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
-{
-	unsigned i;
-	if (!ubh)
-		return;
-	for ( i = 0; i < ubh->count; i++ )
-		wait_on_buffer (ubh->bh[i]);
+		for (i = 0; i < ubh->count; i++)
+			wait_on_buffer(ubh->bh[i]);
+	}
 }
 
 void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036912f1..9f8775ce381c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
 extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
 extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
 extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
-extern void ubh_ll_rw_block(int, struct ufs_buffer_head *);
-extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
+extern void ubh_sync_block(struct ufs_buffer_head *);
 extern void ubh_bforget (struct ufs_buffer_head *);
 extern int  ubh_buffer_dirty (struct ufs_buffer_head *);
 #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 72c1cf83eb85..ec94c12f21da 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int __sync_dirty_buffer(struct buffer_head *bh, int rw);
+void write_dirty_buffer(struct buffer_head *bh, int rw);
 int submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a96b4d83fc1..29f7c975304c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,9 +125,6 @@ struct inodes_stat_t {
  *			block layer could (in theory) choose to ignore this
  *			request if it runs into resource problems.
  * WRITE		A normal async write. Device will be plugged.
- * SWRITE		Like WRITE, but a special case for ll_rw_block() that
- *			tells it to lock the buffer first. Normally a buffer
- *			must be locked before doing IO.
  * WRITE_SYNC_PLUG	Synchronous write. Identical to WRITE, but passes down
  *			the hint that someone will be waiting on this IO
  *			shortly. The device must still be unplugged explicitly,
@@ -138,9 +135,6 @@ struct inodes_stat_t {
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
  * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
- * SWRITE_SYNC
- * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
- *			See SWRITE.
  * WRITE_BARRIER	Like WRITE_SYNC, but tells the block layer that all
  *			previously submitted writes must be safely on storage
  *			before this one is started. Also guarantees that when
@@ -155,7 +149,6 @@ struct inodes_stat_t {
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
-#define SWRITE			(WRITE | READA)
 
 #define READ_SYNC		(READ | REQ_SYNC | REQ_UNPLUG)
 #define READ_META		(READ | REQ_META)
@@ -165,8 +158,6 @@ struct inodes_stat_t {
 #define WRITE_META		(WRITE | REQ_META)
 #define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_HARDBARRIER)
-#define SWRITE_SYNC_PLUG	(SWRITE | REQ_SYNC | REQ_NOIDLE)
-#define SWRITE_SYNC		(SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
 
 /*
  * These aren't really reads or writes, they pass down information about
-- 
cgit v1.2.3


From 3d529946ce292336793b85198bd59afc75e16bd4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 18 Aug 2010 09:48:31 +1000
Subject: Fix spelling mistake in jhash

Fix a spelling mistake.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/jhash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 2a2f99fbcb16..ced1159fa4f2 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -116,7 +116,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 /* A special ultra-optimized versions that knows they are hashing exactly
  * 3, 2 or 1 word(s).
  *
- * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally
  *       done at the end is not done here.
  */
 static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
-- 
cgit v1.2.3


From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:33 +1000
Subject: fs: fs_struct rwlock to spinlock

fs: fs_struct rwlock to spinlock

struct fs_struct.lock is an rwlock with the read-side used to protect root and
pwd members while taking references to them. Taking a reference to a path
typically requires just 2 atomic ops, so the critical section is very small.
Parallel read-side operations would have cacheline contention on the lock, the
dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a
real parallelism increase.

Replace it with a spinlock to avoid one or two atomic operations in typical
path lookup fastpath.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/pohmelfs/path_entry.c |  8 ++++----
 fs/exec.c                             |  4 ++--
 fs/fs_struct.c                        | 32 ++++++++++++++++----------------
 include/linux/fs_struct.h             | 14 +++++++-------
 kernel/fork.c                         | 10 +++++-----
 5 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index cdc4dd50d638..8ec83d2dffb7 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le
 		return -ENOENT;
 	}
 
-	read_lock(&current->fs->lock);
+	spin_lock(&current->fs->lock);
 	path.mnt = mntget(current->fs->root.mnt);
-	read_unlock(&current->fs->lock);
+	spin_unlock(&current->fs->lock);
 
 	path.dentry = d;
 
@@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
 		return -ENOENT;
 	}
 
-	read_lock(&current->fs->lock);
+	spin_lock(&current->fs->lock);
 	root = dget(current->fs->root.dentry);
-	read_unlock(&current->fs->lock);
+	spin_unlock(&current->fs->lock);
 
 	spin_lock(&dcache_lock);
 
diff --git a/fs/exec.c b/fs/exec.c
index 7761837e4500..5adab2c93eca 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
 	bprm->unsafe = tracehook_unsafe_exec(p);
 
 	n_fs = 1;
-	write_lock(&p->fs->lock);
+	spin_lock(&p->fs->lock);
 	rcu_read_lock();
 	for (t = next_thread(p); t != p; t = next_thread(t)) {
 		if (t->fs == p->fs)
@@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
 			res = 1;
 		}
 	}
-	write_unlock(&p->fs->lock);
+	spin_unlock(&p->fs->lock);
 
 	return res;
 }
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb9a2c0..ed45a9cf5f3d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 {
 	struct path old_root;
 
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	old_root = fs->root;
 	fs->root = *path;
 	path_get(path);
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 	if (old_root.dentry)
 		path_put(&old_root);
 }
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
 	struct path old_pwd;
 
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
 	path_get(path);
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
 		path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
-			write_lock(&fs->lock);
+			spin_lock(&fs->lock);
 			if (fs->root.dentry == old_root->dentry
 			    && fs->root.mnt == old_root->mnt) {
 				path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 				fs->pwd = *new_root;
 				count++;
 			}
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 		}
 		task_unlock(p);
 	} while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
 	if (fs) {
 		int kill;
 		task_lock(tsk);
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		tsk->fs = NULL;
 		kill = !--fs->users;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 		task_unlock(tsk);
 		if (kill)
 			free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 	if (fs) {
 		fs->users = 1;
 		fs->in_exec = 0;
-		rwlock_init(&fs->lock);
+		spin_lock_init(&fs->lock);
 		fs->umask = old->umask;
 		get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
 	}
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
 		return -ENOMEM;
 
 	task_lock(current);
-	write_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	kill = !--fs->users;
 	current->fs = new_fs;
-	write_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 	task_unlock(current);
 
 	if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
 /* to be mentioned only in INIT_TASK */
 struct fs_struct init_fs = {
 	.users		= 1,
-	.lock		= __RW_LOCK_UNLOCKED(init_fs.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(init_fs.lock),
 	.umask		= 0022,
 };
 
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
 
 		task_lock(current);
 
-		write_lock(&init_fs.lock);
+		spin_lock(&init_fs.lock);
 		init_fs.users++;
-		write_unlock(&init_fs.lock);
+		spin_unlock(&init_fs.lock);
 
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		current->fs = &init_fs;
 		kill = !--fs->users;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 
 		task_unlock(current);
 		if (kill)
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index eca3d5202138..a42b5bf02f8b 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -5,7 +5,7 @@
 
 struct fs_struct {
 	int users;
-	rwlock_t lock;
+	spinlock_t lock;
 	int umask;
 	int in_exec;
 	struct path root, pwd;
@@ -23,29 +23,29 @@ extern int unshare_fs_struct(void);
 
 static inline void get_fs_root(struct fs_struct *fs, struct path *root)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*root = fs->root;
 	path_get(root);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*pwd = fs->pwd;
 	path_get(pwd);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
 				       struct path *pwd)
 {
-	read_lock(&fs->lock);
+	spin_lock(&fs->lock);
 	*root = fs->root;
 	path_get(root);
 	*pwd = fs->pwd;
 	path_get(pwd);
-	read_unlock(&fs->lock);
+	spin_unlock(&fs->lock);
 }
 
 #endif /* _LINUX_FS_STRUCT_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b450876f93..856eac3ec52e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 	struct fs_struct *fs = current->fs;
 	if (clone_flags & CLONE_FS) {
 		/* tsk->fs is already what we want */
-		write_lock(&fs->lock);
+		spin_lock(&fs->lock);
 		if (fs->in_exec) {
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 			return -EAGAIN;
 		}
 		fs->users++;
-		write_unlock(&fs->lock);
+		spin_unlock(&fs->lock);
 		return 0;
 	}
 	tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 
 		if (new_fs) {
 			fs = current->fs;
-			write_lock(&fs->lock);
+			spin_lock(&fs->lock);
 			current->fs = new_fs;
 			if (--fs->users)
 				new_fs = NULL;
 			else
 				new_fs = fs;
-			write_unlock(&fs->lock);
+			spin_unlock(&fs->lock);
 		}
 
 		if (new_mm) {
-- 
cgit v1.2.3


From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:35 +1000
Subject: fs: cleanup files_lock locking

fs: cleanup files_lock locking

Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
manipulate the per-sb files list; unexport the files_lock spinlock.

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/pty.c       |  6 +++++-
 drivers/char/tty_io.c    | 26 ++++++++++++++++++--------
 fs/file_table.c          | 42 ++++++++++++++++++------------------------
 fs/open.c                |  4 ++--
 include/linux/fs.h       |  7 ++-----
 include/linux/tty.h      |  1 +
 security/selinux/hooks.c |  4 ++--
 7 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index ad46eae1f9bb..2c64faa8efa4 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 0350c42375a2..cd5b829634ea 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
 
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
 static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
 ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 	struct list_head *p;
 	int count = 0;
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_for_each(p, &tty->tty_files) {
 		count++;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty)
 	   workqueue with the lock held */
 	check_tty_count(tty, "tty_hangup");
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
 	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
 		if (filp->f_op->write == redirected_tty_write)
@@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty)
 		__tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	tty_ldisc_hangup(tty);
 
@@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work)
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_del_init(&tty->tty_files);
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	put_pid(tty->pgrp);
 	put_pid(tty->session);
@@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	file_kill(filp);
+	spin_lock(&tty_files_lock);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	list_del_init(&filp->f_u.fu_list);
+	spin_unlock(&tty_files_lock);
 	filp->private_data = NULL;
 
 	/*
@@ -1840,7 +1846,11 @@ got_driver:
 	}
 
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36fed9b..6f0e62ecfddd 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +248,7 @@ static void __fput(struct file *file)
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_kill(file);
+	file_sb_list_del(file);
 	if (file->f_mode & FMODE_WRITE)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
@@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 	return file;
 }
 
-
 void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_kill(file);
+		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	if (!list)
-		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	spin_lock(&files_lock);
+	BUG_ON(!list_empty(&file->f_u.fu_list));
+	list_add(&file->f_u.fu_list, &sb->s_files);
+	spin_unlock(&files_lock);
 }
 
-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
+		spin_lock(&files_lock);
 		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
+		spin_unlock(&files_lock);
 	}
 }
 
@@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb)
 	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb)
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	spin_unlock(&files_lock);
 	return 0;
 }
 
@@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb)
 	struct file *f;
 
 retry:
-	file_list_lock();
+	spin_lock(&files_lock);
 	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
@@ -408,16 +405,13 @@ retry:
 			continue;
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
+		/* This can sleep, so we can't hold the spinlock. */
+		spin_unlock(&files_lock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
 	}
-	file_list_unlock();
+	spin_unlock(&files_lock);
 }
 
 void __init files_init(unsigned long mempages)
diff --git a/fs/open.c b/fs/open.c
index 630715f9f73d..d74e1983e8dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	file_sb_list_add(f, inode->i_sb);
 
 	error = security_dentry_open(f, cred);
 	if (error)
@@ -721,7 +721,7 @@ cleanup_all:
 			mnt_drop_write(mnt);
 		}
 	}
-	file_kill(f);
+	file_sb_list_del(f);
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29f7c975304c..5a9a9e5a3705 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -944,9 +944,6 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)	atomic_long_inc(&(x)->f_count)
 #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1437da3ddc62..f6b371a2514e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
 extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
 
 extern void tty_write_unlock(struct tty_struct *tty);
 extern int tty_write_lock(struct tty_struct *tty, int ndelay);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 42043f96e54f..bd7da0f0ccf3 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	tty = get_current_tty();
 	if (tty) {
-		file_list_lock();
+		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
 			struct inode *inode;
 
@@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 				drop_tty = 1;
 			}
 		}
-		file_list_unlock();
+		spin_unlock(&tty_files_lock);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
-- 
cgit v1.2.3


From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:36 +1000
Subject: tty: fix fu_list abuse

tty: fix fu_list abuse

tty code abuses fu_list, which causes a bug in remount,ro handling.

If a tty device node is opened on a filesystem, then the last link to the inode
removed, the filesystem will be allowed to be remounted readonly. This is
because fs_may_remount_ro does not find the 0 link tty inode on the file sb
list (because the tty code incorrectly removed it to use for its own purpose).
This can result in a filesystem with errors after it is marked "clean".

Taking idea from Christoph's initial patch, allocate a tty private struct
at file->private_data and put our required list fields in there, linking
file and tty. This makes tty nodes behave the same way as other device nodes
and avoid meddling with the vfs, and avoids this bug.

The error handling is not trivial in the tty code, so for this bugfix, I take
the simple approach of using __GFP_NOFAIL and don't worry about memory errors.
This is not a problem because our allocator doesn't fail small allocs as a rule
anyway. So proper error handling is left as an exercise for tty hackers.

[ Arguably filesystem's device inode would ideally be divorced from the
driver's pseudo inode when it is opened, but in practice it's not clear whether
that will ever be worth implementing. ]

Cc: linux-kernel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/pty.c       |  6 +---
 drivers/char/tty_io.c    | 84 +++++++++++++++++++++++++++++++-----------------
 fs/internal.h            |  2 ++
 include/linux/fs.h       |  2 --
 include/linux/tty.h      |  8 +++++
 security/selinux/hooks.c |  5 ++-
 6 files changed, 69 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 2c64faa8efa4..c350d01716bd 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 	}
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
-	filp->private_data = tty;
 
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index cd5b829634ea..949067a0bd47 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty)
 	kfree(tty);
 }
 
+static inline struct tty_struct *file_tty(struct file *file)
+{
+	return ((struct tty_file_private *)file->private_data)->tty;
+}
+
+/* Associate a new file with the tty structure */
+void tty_add_file(struct tty_struct *tty, struct file *file)
+{
+	struct tty_file_private *priv;
+
+	/* XXX: must implement proper error handling in callers */
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
+
+	priv->tty = tty;
+	priv->file = file;
+	file->private_data = priv;
+
+	spin_lock(&tty_files_lock);
+	list_add(&priv->list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
+}
+
+/* Delete file from its tty */
+void tty_del_file(struct file *file)
+{
+	struct tty_file_private *priv = file->private_data;
+
+	spin_lock(&tty_files_lock);
+	list_del(&priv->list);
+	spin_unlock(&tty_files_lock);
+	file->private_data = NULL;
+	kfree(priv);
+}
+
+
 #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
 
 /**
@@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty)
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
+	struct tty_file_private *priv;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty)
 
 
 	spin_lock(&redirect_lock);
-	if (redirect && redirect->private_data == tty) {
+	if (redirect && file_tty(redirect) == tty) {
 		f = redirect;
 		redirect = NULL;
 	}
@@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty)
 
 	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
-	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+	list_for_each_entry(priv, &tty->tty_files, list) {
+		filp = priv->file;
 		if (filp->f_op->write == redirected_tty_write)
 			cons_filp = filp;
 		if (filp->f_op->write != tty_write)
@@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
 	int i;
-	struct tty_struct *tty;
-	struct inode *inode;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 
-	tty = file->private_data;
-	inode = file->f_path.dentry->d_inode;
 	if (tty_paranoia_check(tty, inode, "tty_read"))
 		return -EIO;
 	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 static ssize_t tty_write(struct file *file, const char __user *buf,
 						size_t count, loff_t *ppos)
 {
-	struct tty_struct *tty;
 	struct inode *inode = file->f_path.dentry->d_inode;
+	struct tty_struct *tty = file_tty(file);
+ 	struct tty_ldisc *ld;
 	ssize_t ret;
-	struct tty_ldisc *ld;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_write"))
 		return -EIO;
 	if (!tty || !tty->ops->write ||
@@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx)
 
 int tty_release(struct inode *inode, struct file *filp)
 {
-	struct tty_struct *tty, *o_tty;
+	struct tty_struct *tty = file_tty(filp);
+	struct tty_struct *o_tty;
 	int	pty_master, tty_closing, o_tty_closing, do_sleep;
 	int	devpts;
 	int	idx;
 	char	buf[64];
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_release_dev"))
 		return 0;
 
@@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	spin_lock(&tty_files_lock);
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	list_del_init(&filp->f_u.fu_list);
-	spin_unlock(&tty_files_lock);
-	filp->private_data = NULL;
+	tty_del_file(filp);
 
 	/*
 	 * Perform some housekeeping before deciding whether to return.
@@ -1845,12 +1875,8 @@ got_driver:
 		return PTR_ERR(tty);
 	}
 
-	filp->private_data = tty;
-	BUG_ON(list_empty(&filp->f_u.fu_list));
-	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
-	spin_lock(&tty_files_lock);
-	list_add(&filp->f_u.fu_list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	tty_add_file(tty, filp);
+
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
@@ -1926,11 +1952,10 @@ got_driver:
 
 static unsigned int tty_poll(struct file *filp, poll_table *wait)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	struct tty_ldisc *ld;
 	int ret = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
 		return 0;
 
@@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
 
 static int __tty_fasync(int fd, struct file *filp, int on)
 {
-	struct tty_struct *tty;
+	struct tty_struct *tty = file_tty(filp);
 	unsigned long flags;
 	int retval = 0;
 
-	tty = filp->private_data;
 	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
 		goto out;
 
@@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty);
  */
 long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct tty_struct *tty, *real_tty;
+	struct tty_struct *tty = file_tty(file);
+	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int retval;
 	struct tty_ldisc *ld;
 	struct inode *inode = file->f_dentry->d_inode;
 
-	tty = file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
@@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct tty_struct *tty = file->private_data;
+	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 	int retval = -ENOIOCTLCMD;
 
@@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty)
 				if (!filp)
 					continue;
 				if (filp->f_op->read == tty_read &&
-				    filp->private_data == tty) {
+				    file_tty(filp) == tty) {
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
 					    task_pid_nr(p), p->comm, i);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc60a66..6a5c13a80660 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
 /*
  * file_table.c
  */
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a9a9e5a3705..5e65add0f163 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f6b371a2514e..67d64e6efe7a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -329,6 +329,13 @@ struct tty_struct {
 	struct tty_port *port;
 };
 
+/* Each of a tty's open files has private_data pointing to tty_file_private */
+struct tty_file_private {
+	struct tty_struct *tty;
+	struct file *file;
+	struct list_head list;
+};
+
 /* tty magic number */
 #define TTY_MAGIC		0x5401
 
@@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p);
 extern struct tty_struct *get_current_tty(void);
 extern void tty_default_fops(struct file_operations *fops);
 extern struct tty_struct *alloc_tty_struct(void);
+extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index bd7da0f0ccf3..4796ddd4e721 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 	if (tty) {
 		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
+			struct tty_file_private *file_priv;
 			struct inode *inode;
 
 			/* Revalidate access to controlling tty.
@@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 			   than using file_has_perm, as this particular open
 			   file may belong to another process and we are only
 			   interested in the inode-based check here. */
-			file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+			file_priv = list_first_entry(&tty->tty_files,
+						struct tty_file_private, list);
+			file = file_priv->file;
 			inode = file->f_path.dentry->d_inode;
 			if (inode_has_perm(cred, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
-- 
cgit v1.2.3


From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:37 +1000
Subject: lglock: introduce special lglock and brlock spin locks

lglock: introduce special lglock and brlock spin locks

This patch introduces "local-global" locks (lglocks). These can be used to:

- Provide fast exclusive access to per-CPU data, with exclusive access to
  another CPU's data allowed but possibly subject to contention, and to provide
  very slow exclusive access to all per-CPU data.
- Or to provide very fast and scalable read serialisation, and to provide
  very slow exclusive serialisation of data (not necessarily per-CPU data).

Brlocks are also implemented as a short-hand notation for the latter use
case.

Thanks to Paul for local/global naming convention.

Cc: linux-kernel@vger.kernel.org
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 include/linux/lglock.h

(limited to 'include')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
new file mode 100644
index 000000000000..b288cb713b90
--- /dev/null
+++ b/include/linux/lglock.h
@@ -0,0 +1,172 @@
+/*
+ * Specialised local-global spinlock. Can only be declared as global variables
+ * to avoid overhead and keep things simple (and we don't want to start using
+ * these inside dynamically allocated structures).
+ *
+ * "local/global locks" (lglocks) can be used to:
+ *
+ * - Provide fast exclusive access to per-CPU data, with exclusive access to
+ *   another CPU's data allowed but possibly subject to contention, and to
+ *   provide very slow exclusive access to all per-CPU data.
+ * - Or to provide very fast and scalable read serialisation, and to provide
+ *   very slow exclusive serialisation of data (not necessarily per-CPU data).
+ *
+ * Brlocks are also implemented as a short-hand notation for the latter use
+ * case.
+ *
+ * Copyright 2009, 2010, Nick Piggin, Novell Inc.
+ */
+#ifndef __LINUX_LGLOCK_H
+#define __LINUX_LGLOCK_H
+
+#include <linux/spinlock.h>
+#include <linux/lockdep.h>
+#include <linux/percpu.h>
+
+/* can make br locks by using local lock for read side, global lock for write */
+#define br_lock_init(name)	name##_lock_init()
+#define br_read_lock(name)	name##_local_lock()
+#define br_read_unlock(name)	name##_local_unlock()
+#define br_write_lock(name)	name##_global_lock_online()
+#define br_write_unlock(name)	name##_global_unlock_online()
+
+#define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
+#define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
+
+
+#define lg_lock_init(name)	name##_lock_init()
+#define lg_local_lock(name)	name##_local_lock()
+#define lg_local_unlock(name)	name##_local_unlock()
+#define lg_local_lock_cpu(name, cpu)	name##_local_lock_cpu(cpu)
+#define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
+#define lg_global_lock(name)	name##_global_lock()
+#define lg_global_unlock(name)	name##_global_unlock()
+#define lg_global_lock_online(name) name##_global_lock_online()
+#define lg_global_unlock_online(name) name##_global_unlock_online()
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define LOCKDEP_INIT_MAP lockdep_init_map
+
+#define DEFINE_LGLOCK_LOCKDEP(name)					\
+ struct lock_class_key name##_lock_key;					\
+ struct lockdep_map name##_lock_dep_map;				\
+ EXPORT_SYMBOL(name##_lock_dep_map)
+
+#else
+#define LOCKDEP_INIT_MAP(a, b, c, d)
+
+#define DEFINE_LGLOCK_LOCKDEP(name)
+#endif
+
+
+#define DECLARE_LGLOCK(name)						\
+ extern void name##_lock_init(void);					\
+ extern void name##_local_lock(void);					\
+ extern void name##_local_unlock(void);					\
+ extern void name##_local_lock_cpu(int cpu);				\
+ extern void name##_local_unlock_cpu(int cpu);				\
+ extern void name##_global_lock(void);					\
+ extern void name##_global_unlock(void);				\
+ extern void name##_global_lock_online(void);				\
+ extern void name##_global_unlock_online(void);				\
+
+#define DEFINE_LGLOCK(name)						\
+									\
+ DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
+ DEFINE_LGLOCK_LOCKDEP(name);						\
+									\
+ void name##_lock_init(void) {						\
+	int i;								\
+	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
+	for_each_possible_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_lock_init);					\
+									\
+ void name##_local_lock(void) {						\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock);					\
+									\
+ void name##_local_unlock(void) {					\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &__get_cpu_var(name##_lock);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock);					\
+									\
+ void name##_local_lock_cpu(int cpu) {					\
+	arch_spinlock_t *lock;						\
+	preempt_disable();						\
+	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_lock(lock);						\
+ }									\
+ EXPORT_SYMBOL(name##_local_lock_cpu);					\
+									\
+ void name##_local_unlock_cpu(int cpu) {				\
+	arch_spinlock_t *lock;						\
+	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
+	lock = &per_cpu(name##_lock, cpu);				\
+	arch_spin_unlock(lock);						\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_local_unlock_cpu);				\
+									\
+ void name##_global_lock_online(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock_online);				\
+									\
+ void name##_global_unlock_online(void) {				\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock_online);				\
+									\
+ void name##_global_lock(void) {					\
+	int i;								\
+	preempt_disable();						\
+	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_lock(lock);					\
+	}								\
+ }									\
+ EXPORT_SYMBOL(name##_global_lock);					\
+									\
+ void name##_global_unlock(void) {					\
+	int i;								\
+	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
+	for_each_online_cpu(i) {					\
+		arch_spinlock_t *lock;					\
+		lock = &per_cpu(name##_lock, i);			\
+		arch_spin_unlock(lock);					\
+	}								\
+	preempt_enable();						\
+ }									\
+ EXPORT_SYMBOL(name##_global_unlock);
+#endif
-- 
cgit v1.2.3


From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Wed, 18 Aug 2010 04:37:38 +1000
Subject: fs: scale files_lock

fs: scale files_lock

Improve scalability of files_lock by adding per-cpu, per-sb files lists,
protected with an lglock. The lglock provides fast access to the per-cpu lists
to add and remove files. It also provides a snapshot of all the per-cpu lists
(although this is very slow).

One difficulty with this approach is that a file can be removed from the list
by another CPU. We must track which per-cpu list the file is on with a new
variale in the file struct (packed into a hole on 64-bit archs). Scalability
could suffer if files are frequently removed from different cpu's list.

However loads with frequent removal of files imply short interval between
adding and removing the files, and the scheduler attempts to avoid moving
processes too far away. Also, even in the case of cross-CPU removal, the
hardware has much more opportunity to parallelise cacheline transfers with N
cachelines than with 1.

A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs
degenerates to contending on a single lock, which is no worse than before. When
more than one CPU are allocating files, even if they are always freed by
different CPUs, there will be more parallelism than the single-lock case.

Testing results:

On a 2 socket, 8 core opteron, I measure the number of times the lock is taken
to remove the file, the number of times it is removed by the same CPU that
added it, and the number of times it is removed by the same node that added it.

Booting:    locks=  25049 cpu-hits=  23174 (92.5%) node-hits=  23945 (95.6%)
kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%)
dbench 64   locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%)

So a file is removed from the same CPU it was added by over 90% of the time.
It remains within the same node 95% of the time.

Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile.

                throughput
2.6.34-rc2      24.5
+patch          24.9

                us      sys     idle    IO wait (in %)
2.6.34-rc2      51.25   28.25   17.25   3.25
+patch          53.75   18.5    19      8.75

So significantly less CPU time spent in kernel code, higher idle time and
slightly higher throughput.

Single threaded performance difference was within the noise of microbenchmarks.
That is not to say penalty does not exist, the code is larger and more memory
accesses required so it will be slightly slower.

Cc: linux-kernel@vger.kernel.org
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 108 ++++++++++++++++++++++++++++++++++++++++++++---------
 fs/super.c         |  18 +++++++++
 include/linux/fs.h |   7 ++++
 3 files changed, 115 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 6f0e62ecfddd..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
+#include <linux/lglock.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu.h>
 #include <linux/ima.h>
 
 #include <asm/atomic.h>
@@ -32,7 +34,8 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+DECLARE_LGLOCK(files_lglock);
+DEFINE_LGLOCK(files_lglock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -336,30 +339,98 @@ void put_filp(struct file *file)
 	}
 }
 
+static inline int file_list_cpu(struct file *file)
+{
+#ifdef CONFIG_SMP
+	return file->f_sb_list_cpu;
+#else
+	return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+	struct list_head *list;
+#ifdef CONFIG_SMP
+	int cpu;
+	cpu = smp_processor_id();
+	file->f_sb_list_cpu = cpu;
+	list = per_cpu_ptr(sb->s_files, cpu);
+#else
+	list = &sb->s_files;
+#endif
+	list_add(&file->f_u.fu_list, list);
+}
+
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
 void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	spin_lock(&files_lock);
-	BUG_ON(!list_empty(&file->f_u.fu_list));
-	list_add(&file->f_u.fu_list, &sb->s_files);
-	spin_unlock(&files_lock);
+	lg_local_lock(files_lglock);
+	__file_sb_list_add(file, sb);
+	lg_local_unlock(files_lglock);
 }
 
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
 void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		spin_lock(&files_lock);
+		lg_local_lock_cpu(files_lglock, file_list_cpu(file));
 		list_del_init(&file->f_u.fu_list);
-		spin_unlock(&files_lock);
+		lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
 	}
 }
 
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	int i;							\
+	for_each_possible_cpu(i) {				\
+		struct list_head *list;				\
+		list = per_cpu_ptr((__sb)->s_files, i);		\
+		list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+	}							\
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file)		\
+{								\
+	struct list_head *list;					\
+	list = &(sb)->s_files;					\
+	list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry				\
+}
+
+#endif
+
 int fs_may_remount_ro(struct super_block *sb)
 {
 	struct file *file;
-
 	/* Check that no files are currently opened for writing. */
-	spin_lock(&files_lock);
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, file) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
@@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
 		/* Writeable file? */
 		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
 			goto too_bad;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 	return 1; /* Tis' cool bro. */
 too_bad:
-	spin_unlock(&files_lock);
+	lg_global_unlock(files_lglock);
 	return 0;
 }
 
@@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
 	struct file *f;
 
 retry:
-	spin_lock(&files_lock);
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+	lg_global_lock(files_lglock);
+	do_file_list_for_each_entry(sb, f) {
 		struct vfsmount *mnt;
 		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
 		       continue;
@@ -406,12 +477,12 @@ retry:
 		file_release_write(f);
 		mnt = mntget(f->f_path.mnt);
 		/* This can sleep, so we can't hold the spinlock. */
-		spin_unlock(&files_lock);
+		lg_global_unlock(files_lglock);
 		mnt_drop_write(mnt);
 		mntput(mnt);
 		goto retry;
-	}
-	spin_unlock(&files_lock);
+	} while_file_list_for_each_entry;
+	lg_global_unlock(files_lglock);
 }
 
 void __init files_init(unsigned long mempages)
@@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages)
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/super.c b/fs/super.c
index 9674ab2c8718..8819e3a7ff20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
 			s = NULL;
 			goto out;
 		}
+#ifdef CONFIG_SMP
+		s->s_files = alloc_percpu(struct list_head);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+		}
+#else
 		INIT_LIST_HEAD(&s->s_files);
+#endif
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
  */
 static inline void destroy_super(struct super_block *s)
 {
+#ifdef CONFIG_SMP
+	free_percpu(s->s_files);
+#endif
 	security_sb_free(s);
 	kfree(s->s_subtype);
 	kfree(s->s_options);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e65add0f163..76041b614758 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -920,6 +920,9 @@ struct file {
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 	spinlock_t		f_lock;  /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+	int			f_sb_list_cpu;
+#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1334,7 +1337,11 @@ struct super_block {
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+	struct list_head __percpu *s_files;
+#else
 	struct list_head	s_files;
+#endif
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
-- 
cgit v1.2.3


From 56385a12d9bb9e173751f74b6c430742018cafc0 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 18 Aug 2010 14:08:17 +0200
Subject: ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay
 parameter)

With some hardware combinations, the PCM interrupts are acknowledged
before the period boundary from the emu10k1 chip. The midlevel PCM code
gets confused and the playback stream is interrupted.

It seems that the interrupt processing shift by 2 samples is enough
to fix this issue. This default value does not harm other,
non-affected hardware.

More information: Kernel bugzilla bug#16300

[A copmile warning fixed by tiwai]

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/emu10k1.h     |  1 +
 sound/core/pcm_native.c     |  4 ++++
 sound/pci/emu10k1/emu10k1.c |  4 ++++
 sound/pci/emu10k1/emupcm.c  | 30 ++++++++++++++++++++++++++----
 sound/pci/emu10k1/memory.c  |  4 +++-
 5 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 6a664c3f7c1e..7dc97d12253c 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1707,6 +1707,7 @@ struct snd_emu10k1 {
 	unsigned int card_type;			/* EMU10K1_CARD_* */
 	unsigned int ecard_ctrl;		/* ecard control bits */
 	unsigned long dma_mask;			/* PCI DMA mask */
+	unsigned int delay_pcm_irq;		/* in samples */
 	int max_cache_pages;			/* max memory size / PAGE_SIZE */
 	struct snd_dma_buffer silent_page;	/* silent page */
 	struct snd_dma_buffer ptb_pages;	/* page table pages */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a3b2a6479246..134fc6c2e08d 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push)
 {
 	if (substream->runtime->trigger_master != substream)
 		return 0;
+	/* some drivers might use hw_ptr to recover from the pause -
+	   update the hw_ptr now */
+	if (push)
+		snd_pcm_update_hw_ptr(substream);
 	/* The jiffies check in snd_pcm_update_hw_ptr*() is done by
 	 * a delta betwen the current jiffies, this gives a large enough
 	 * delta, effectively to skip the check once.
diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index 4203782d7cb7..aff8387c45cf 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64};
 static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128};
 static int enable_ir[SNDRV_CARDS];
 static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */
+static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2};
 
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard.");
@@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444);
 MODULE_PARM_DESC(enable_ir, "Enable IR.");
 module_param_array(subsystem, uint, NULL, 0444);
 MODULE_PARM_DESC(subsystem, "Force card subsystem model.");
+module_param_array(delay_pcm_irq, uint, NULL, 0444);
+MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0).");
 /*
  * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value  Model:SB0400
  */
@@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci,
 				      &emu)) < 0)
 		goto error;
 	card->private_data = emu;
+	emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f;
 	if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0)
 		goto error;
 	if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0)
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 55b83ef73c63..622bace148e3 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 		evoice->epcm->ccca_start_addr = start_addr + ccis;
 		if (extra) {
 			start_addr += ccis;
-			end_addr += ccis;
+			end_addr += ccis + emu->delay_pcm_irq;
 		}
 		if (stereo && !extra) {
 			snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK);
@@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
 	/* Assumption that PT is already 0 so no harm overwriting */
 	snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]);
 	snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24));
-	snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24));
+	snd_emu10k1_ptr_write(emu, PSST, voice,
+			(start_addr + (extra ? emu->delay_pcm_irq : 0)) |
+			(send_amount[2] << 24));
 	if (emu->card_capabilities->emu_model)
 		pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */
 	else 
@@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_
 	snd_emu10k1_ptr_write(emu, IP, voice, 0);
 }
 
+static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu,
+		struct snd_emu10k1_pcm *epcm,
+		struct snd_pcm_substream *substream,
+		struct snd_pcm_runtime *runtime)
+{
+	unsigned int ptr, period_pos;
+
+	/* try to sychronize the current position for the interrupt
+	   source voice */
+	period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt;
+	period_pos %= runtime->period_size;
+	ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number);
+	ptr &= ~0x00ffffff;
+	ptr |= epcm->ccca_start_addr + period_pos;
+	snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr);
+}
+
 static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
 				        int cmd)
 {
@@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
 		/* follow thru */
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 	case SNDRV_PCM_TRIGGER_RESUME:
+		if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE)
+			snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime);
 		mix = &emu->pcm_mixer[substream->number];
 		snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix);
 		snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix);
@@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream *
 #endif
 	/*
 	printk(KERN_DEBUG
-	       "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n",
-	       ptr, runtime->buffer_size, runtime->period_size);
+	       "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n",
+	       (long)ptr, (long)runtime->buffer_size,
+	       (long)runtime->period_size);
 	*/
 	return ptr;
 }
diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index ffb1ddb8dc28..957a311514c8 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst
 	if (snd_BUG_ON(!hdr))
 		return NULL;
 
+	idx = runtime->period_size >= runtime->buffer_size ?
+					(emu->delay_pcm_irq * 2) : 0;
 	mutex_lock(&hdr->block_mutex);
-	blk = search_empty(emu, runtime->dma_bytes);
+	blk = search_empty(emu, runtime->dma_bytes + idx);
 	if (blk == NULL) {
 		mutex_unlock(&hdr->block_mutex);
 		return NULL;
-- 
cgit v1.2.3


From bd76af0f87f7a1815b311bde269a3f18305b3169 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 18 Aug 2010 14:16:54 +0200
Subject: ALSA: pcm midlevel code - add time check for double interrupt
 acknowledge

The current code in pcm_lib.c do all checks using only the position
in the ring buffer. Unfortunately, where the interrupts gets delayed or
merged into one, we need another timing source to check when the
buffer size boundary overlaps to avoid the wrong updating of the
ring buffer pointers.

This code uses jiffies to check the right time window without any
performance impact.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |  1 +
 sound/core/pcm_lib.c    | 14 +++++++++-----
 sound/core/pcm_native.c |  2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 85f1c6bf8566..dfd9b76b1853 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -278,6 +278,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
 	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
 	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
+	unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */
 	snd_pcm_sframes_t delay;	/* extra delay; typically FIFO size */
 
 	/* -- HW params -- */
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index e23e0e7ab26f..a1707cca9c66 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -334,11 +334,15 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
 		/* delta = "expected next hw_ptr" for in_interrupt != 0 */
 		delta = runtime->hw_ptr_interrupt + runtime->period_size;
 		if (delta > new_hw_ptr) {
-			hw_base += runtime->buffer_size;
-			if (hw_base >= runtime->boundary)
-				hw_base = 0;
-			new_hw_ptr = hw_base + pos;
-			goto __delta;
+			/* check for double acknowledged interrupts */
+			hdelta = jiffies - runtime->hw_ptr_jiffies;
+			if (hdelta > runtime->hw_ptr_buffer_jiffies/2) {
+				hw_base += runtime->buffer_size;
+				if (hw_base >= runtime->boundary)
+					hw_base = 0;
+				new_hw_ptr = hw_base + pos;
+				goto __delta;
+			}
 		}
 	}
 	/* new_hw_ptr might be lower than old_hw_ptr in case when */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 134fc6c2e08d..e2e73895db12 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -864,6 +864,8 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state)
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_trigger_tstamp(substream);
 	runtime->hw_ptr_jiffies = jiffies;
+	runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / 
+							    runtime->rate;
 	runtime->status->state = state;
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    runtime->silence_size > 0)
-- 
cgit v1.2.3


From d15ca3203754359cfe5d18910722d3089b204cc4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Aug 2010 18:55:33 +0100
Subject: Fix the declaration of sys_execve() in asm-generic/syscalls.h

Fix the declaration of sys_execve() in asm-generic/syscalls.h to have
various consts applied to its pointers.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/syscalls.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index df84e3b04555..d89dec864d42 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs);
 #endif
 
 #ifndef sys_execve
-asmlinkage long sys_execve(char __user *filename, char __user * __user *argv,
-			char __user * __user *envp, struct pt_regs *regs);
+asmlinkage long sys_execve(const char __user *filename,
+			   const char __user *const __user *argv,
+			   const char __user *const __user *envp,
+			   struct pt_regs *regs);
 #endif
 
 #ifndef sys_mmap2
-- 
cgit v1.2.3


From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 29 Jun 2010 19:34:05 +0200
Subject: perf: Generalize callchain_store()

callchain_store() is the same on every archs, inline it in
perf_event.h and rename it to perf_callchain_store() to avoid
any collision.

This removes repetitive code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/arm/kernel/perf_event.c         | 15 ++++----------
 arch/powerpc/kernel/perf_callchain.c | 40 +++++++++++++-----------------------
 arch/sh/kernel/perf_callchain.c      | 11 +++-------
 arch/sparc/kernel/perf_event.c       | 26 +++++++++--------------
 arch/x86/kernel/cpu/perf_event.c     | 20 +++++++-----------
 include/linux/perf_event.h           |  7 +++++++
 6 files changed, 45 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index fdcb0be47df1..a07c3b1955f0 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3001,13 +3001,6 @@ arch_initcall(init_hw_perf_events);
 /*
  * Callchain handling code.
  */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
-		u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
 
 /*
  * The registers we're interested in are at the end of the variable
@@ -3039,7 +3032,7 @@ user_backtrace(struct frame_tail *tail,
 	if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
 		return NULL;
 
-	callchain_store(entry, buftail.lr);
+	perf_callchain_store(entry, buftail.lr);
 
 	/*
 	 * Frame pointers should strictly progress back up the stack
@@ -3057,7 +3050,7 @@ perf_callchain_user(struct pt_regs *regs,
 {
 	struct frame_tail *tail;
 
-	callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
 
 	if (!user_mode(regs))
 		regs = task_pt_regs(current);
@@ -3078,7 +3071,7 @@ callchain_trace(struct stackframe *fr,
 		void *data)
 {
 	struct perf_callchain_entry *entry = data;
-	callchain_store(entry, fr->pc);
+	perf_callchain_store(entry, fr->pc);
 	return 0;
 }
 
@@ -3088,7 +3081,7 @@ perf_callchain_kernel(struct pt_regs *regs,
 {
 	struct stackframe fr;
 
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 	fr.fp = regs->ARM_fp;
 	fr.sp = regs->ARM_sp;
 	fr.lr = regs->ARM_lr;
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..a286c2e5a3ea 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
 #include "ppc32.h"
 #endif
 
-/*
- * Store another value in a callchain_entry.
- */
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	unsigned int nr = entry->nr;
-
-	if (nr < PERF_MAX_STACK_DEPTH) {
-		entry->ip[nr] = ip;
-		entry->nr = nr + 1;
-	}
-}
 
 /*
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -69,8 +57,8 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->nip);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, regs->nip);
 
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 		return;
@@ -89,7 +77,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			next_ip = regs->nip;
 			lr = regs->link;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_KERNEL);
+			perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 
 		} else {
 			if (level == 0)
@@ -111,7 +99,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			++level;
 		}
 
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		if (!valid_next_sp(next_sp, sp))
 			return;
 		sp = next_sp;
@@ -246,8 +234,8 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, next_ip);
 
 	for (;;) {
 		fp = (unsigned long __user *) sp;
@@ -276,14 +264,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 			    read_user_stack_64(&uregs[PT_R1], &sp))
 				return;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 		}
 
 		if (level == 0)
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		sp = next_sp;
 	}
@@ -447,8 +435,8 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, next_ip);
 
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,14 +458,14 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 			    read_user_stack_32(&uregs[PT_R1], &sp))
 				return;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 		}
 
 		if (level == 0)
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		sp = next_sp;
 	}
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 1d6dbce7a3bc..00143f3dd196 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -14,11 +14,6 @@
 #include <asm/unwinder.h>
 #include <asm/ptrace.h>
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
 
 static void callchain_warning(void *data, char *msg)
 {
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
 	struct perf_callchain_entry *entry = data;
 
 	if (reliable)
-		callchain_store(entry, addr);
+		perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops callchain_ops = {
@@ -52,8 +47,8 @@ static const struct stacktrace_ops callchain_ops = {
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->pc);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, regs->pc);
 
 	unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 357ced3c33ff..2a95a9079862 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1283,12 +1283,6 @@ void __init init_hw_perf_events(void)
 	register_die_notifier(&perf_event_nmi_notifier);
 }
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
-
 static void perf_callchain_kernel(struct pt_regs *regs,
 				  struct perf_callchain_entry *entry)
 {
@@ -1297,8 +1291,8 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 	int graph = 0;
 #endif
 
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->tpc);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, regs->tpc);
 
 	ksp = regs->u_regs[UREG_I6];
 	fp = ksp + STACK_BIAS;
@@ -1322,13 +1316,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			pc = sf->callers_pc;
 			fp = (unsigned long)sf->fp + STACK_BIAS;
 		}
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 		if ((pc + 8UL) == (unsigned long) &return_to_handler) {
 			int index = current->curr_ret_stack;
 			if (current->ret_stack && index >= graph) {
 				pc = current->ret_stack[index - graph].ret;
-				callchain_store(entry, pc);
+				perf_callchain_store(entry, pc);
 				graph++;
 			}
 		}
@@ -1341,8 +1335,8 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 {
 	unsigned long ufp;
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->tpc);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, regs->tpc);
 
 	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
 	do {
@@ -1355,7 +1349,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 
 		pc = sf.callers_pc;
 		ufp = (unsigned long)sf.fp + STACK_BIAS;
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
@@ -1364,8 +1358,8 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 {
 	unsigned long ufp;
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->tpc);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, regs->tpc);
 
 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
 	do {
@@ -1378,7 +1372,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 
 		pc = sf.callers_pc;
 		ufp = (unsigned long)sf.fp;
-		callchain_store(entry, pc);
+		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4a4d191f9492..8af28caeafc1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1571,12 +1571,6 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
  * callchain support
  */
 
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
 
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
@@ -1602,7 +1596,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	struct perf_callchain_entry *entry = data;
 
-	callchain_store(entry, addr);
+	perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops backtrace_ops = {
@@ -1616,8 +1610,8 @@ static const struct stacktrace_ops backtrace_ops = {
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->ip);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, regs->ip);
 
 	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
@@ -1646,7 +1640,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		if (fp < compat_ptr(regs->sp))
 			break;
 
-		callchain_store(entry, frame.return_address);
+		perf_callchain_store(entry, frame.return_address);
 		fp = compat_ptr(frame.next_frame);
 	}
 	return 1;
@@ -1670,8 +1664,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
 	fp = (void __user *)regs->bp;
 
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->ip);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, regs->ip);
 
 	if (perf_callchain_user32(regs, entry))
 		return;
@@ -1688,7 +1682,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		if ((unsigned long)fp < regs->sp)
 			break;
 
-		callchain_store(entry, frame.return_address);
+		perf_callchain_store(entry, frame.return_address);
 		fp = frame.next_frame;
 	}
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 937495c25073..358880404b42 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -978,6 +978,13 @@ extern void perf_event_fork(struct task_struct *tsk);
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
+static inline void
+perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
 extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
-- 
cgit v1.2.3


From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Jun 2010 23:03:51 +0200
Subject: perf: Generalize some arch callchain code

- Most archs use one callchain buffer per cpu, except x86 that needs
  to deal with NMIs. Provide a default perf_callchain_buffer()
  implementation that x86 overrides.

- Centralize all the kernel/user regs handling and invoke new arch
  handlers from there: perf_callchain_user() / perf_callchain_kernel()
  That avoid all the user_mode(), current->mm checks and so...

- Invert some parameters in perf_callchain_*() helpers: entry to the
  left, regs to the right, following the traditional (dst, src).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/arm/kernel/perf_event.c         | 43 +++----------------------------
 arch/powerpc/kernel/perf_callchain.c | 49 +++++++++++-------------------------
 arch/sh/kernel/perf_callchain.c      | 37 ++-------------------------
 arch/sparc/kernel/perf_event.c       | 46 +++++++++++----------------------
 arch/x86/kernel/cpu/perf_event.c     | 45 ++++++---------------------------
 include/linux/perf_event.h           | 10 +++++++-
 kernel/perf_event.c                  | 40 +++++++++++++++++++++++++++--
 7 files changed, 90 insertions(+), 180 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a07c3b1955f0..0e3bbdb15927 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail,
 	return buftail.fp - 1;
 }
 
-static void
-perf_callchain_user(struct pt_regs *regs,
-		    struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct frame_tail *tail;
 
 	perf_callchain_store(entry, PERF_CONTEXT_USER);
 
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
-
 	tail = (struct frame_tail *)regs->ARM_fp - 1;
 
 	while (tail && !((unsigned long)tail & 0x3))
@@ -3075,9 +3071,8 @@ callchain_trace(struct stackframe *fr,
 	return 0;
 }
 
-static void
-perf_callchain_kernel(struct pt_regs *regs,
-		      struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct stackframe fr;
 
@@ -3088,33 +3083,3 @@ perf_callchain_kernel(struct pt_regs *regs,
 	fr.pc = regs->ARM_pc;
 	walk_stackframe(&fr, callchain_trace, entry);
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs,
-		  struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-	perf_do_callchain(regs, entry);
-	return entry;
-}
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a286c2e5a3ea..f7a85ede8407 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 	return 0;
 }
 
-static void perf_callchain_kernel(struct pt_regs *regs,
-				  struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
@@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp)
 		puc == (unsigned long) &sf->uc;
 }
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
@@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
 	return __get_user_inatomic(*ret, ptr);
 }
 
-static inline void perf_callchain_user_64(struct pt_regs *regs,
-					  struct perf_callchain_entry *entry)
+static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
+					  struct pt_regs *regs)
 {
 }
 
@@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
 	return mctx->mc_gregs;
 }
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned int sp, next_sp;
 	unsigned int next_ip;
@@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	}
 }
 
-/*
- * Since we can't get PMU interrupts inside a PMU interrupt handler,
- * we don't need separate irq and nmi entries here.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-	struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
-
-	entry->nr = 0;
-
-	if (!user_mode(regs)) {
-		perf_callchain_kernel(regs, entry);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-
-	if (regs) {
-		if (current_is_64bit())
-			perf_callchain_user_64(regs, entry);
-		else
-			perf_callchain_user_32(regs, entry);
-	}
-
-	return entry;
+	if (current_is_64bit())
+		perf_callchain_user_64(entry, regs);
+	else
+		perf_callchain_user_32(entry, regs);
 }
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 00143f3dd196..ef076a91292a 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -44,44 +44,11 @@ static const struct stacktrace_ops callchain_ops = {
 	.address	= callchain_address,
 };
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 	perf_callchain_store(entry, regs->pc);
 
 	unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	/*
-	 * Only the kernel side is implemented for now.
-	 */
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-}
-
-/*
- * No need for separate IRQ and NMI entries.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 2a95a9079862..460162d74aba 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1283,14 +1283,16 @@ void __init init_hw_perf_events(void)
 	register_die_notifier(&perf_event_nmi_notifier);
 }
 
-static void perf_callchain_kernel(struct pt_regs *regs,
-				  struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
 {
 	unsigned long ksp, fp;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	int graph = 0;
 #endif
 
+	stack_trace_flush();
+
 	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 	perf_callchain_store(entry, regs->tpc);
 
@@ -1330,8 +1332,8 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned long ufp;
 
@@ -1353,8 +1355,8 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned long ufp;
 
@@ -1376,30 +1378,12 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-/* Like powerpc we can't get PMU interrupts within the PMU handler,
- * so no need for separate NMI and IRQ chains as on x86.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-	entry->nr = 0;
-	if (!user_mode(regs)) {
-		stack_trace_flush();
-		perf_callchain_kernel(regs, entry);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-	if (regs) {
-		flushw_user();
-		if (test_thread_flag(TIF_32BIT))
-			perf_callchain_user_32(regs, entry);
-		else
-			perf_callchain_user_64(regs, entry);
-	}
-	return entry;
+	flushw_user();
+	if (test_thread_flag(TIF_32BIT))
+		perf_callchain_user_32(entry, regs);
+	else
+		perf_callchain_user_64(entry, regs);
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8af28caeafc1..39f8421b86e6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1571,9 +1571,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
  * callchain support
  */
 
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi);
 
 
 static void
@@ -1607,8 +1605,8 @@ static const struct stacktrace_ops backtrace_ops = {
 	.walk_stack		= print_context_stack_bp,
 };
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 	perf_callchain_store(entry, regs->ip);
@@ -1653,14 +1651,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 }
 #endif
 
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct stack_frame frame;
 	const void __user *fp;
 
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
 
 	fp = (void __user *)regs->bp;
 
@@ -1687,42 +1683,17 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 	}
 }
 
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+struct perf_callchain_entry *perf_callchain_buffer(void)
 {
-	struct perf_callchain_entry *entry;
-
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
 		return NULL;
 	}
 
 	if (in_nmi())
-		entry = &__get_cpu_var(pmc_nmi_entry);
-	else
-		entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
+		return &__get_cpu_var(perf_callchain_entry_nmi);
 
-	return entry;
+	return &__get_cpu_var(perf_callchain_entry);
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 358880404b42..4db61dded388 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -976,7 +976,15 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+/* Callchains */
+DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
+
+extern void perf_callchain_user(struct perf_callchain_entry *entry,
+				struct pt_regs *regs);
+extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
+				  struct pt_regs *regs);
+extern struct perf_callchain_entry *perf_callchain_buffer(void);
+
 
 static inline void
 perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c772a3d4000d..02efde6c8798 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2937,13 +2937,49 @@ void perf_event_do_pending(void)
 	__perf_pending_run();
 }
 
+DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
+
 /*
  * Callchain support -- arch specific
  */
 
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+__weak struct perf_callchain_entry *perf_callchain_buffer(void)
 {
-	return NULL;
+	return &__get_cpu_var(perf_callchain_entry);
+}
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+				  struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+				struct pt_regs *regs)
+{
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	entry = perf_callchain_buffer();
+	if (!entry)
+		return NULL;
+
+	entry->nr = 0;
+
+	if (!user_mode(regs)) {
+		perf_callchain_kernel(entry, regs);
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+
+	if (regs)
+		perf_callchain_user(entry, regs);
+
+	return entry;
 }
 
 
-- 
cgit v1.2.3


From 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 1 Jul 2010 16:20:36 +0200
Subject: perf: Fix race in callchains

Now that software events don't have interrupt disabled anymore in
the event path, callchains can nest on any context. So seperating
nmi and others contexts in two buffers has become racy.

Fix this by providing one buffer per nesting level. Given the size
of the callchain entries (2040 bytes * 4), we now need to allocate
them dynamically.

v2: Fixed put_callchain_entry call after recursion.
    Fix the type of the recursion, it must be an array.

v3: Use a manual pr cpu allocation (temporary solution until NMIs
    can safely access vmalloc'ed memory).
    Do a better separation between callchain reference tracking and
    allocation. Make the "put" path lockless for non-release cases.

v4: Protect the callchain buffers with rcu.

v5: Do the cpu buffers allocations node affine.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David Miller <davem@davemloft.net>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/x86/kernel/cpu/perf_event.c |  22 ++-
 include/linux/perf_event.h       |   1 -
 kernel/perf_event.c              | 298 ++++++++++++++++++++++++++++++---------
 3 files changed, 238 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a3c922288cc0..8e91cf34a9c8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1608,6 +1608,11 @@ static const struct stacktrace_ops backtrace_ops = {
 void
 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* TODO: We don't support guest os callchain now */
+		return NULL;
+	}
+
 	perf_callchain_store(entry, regs->ip);
 
 	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
@@ -1656,6 +1661,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 	struct stack_frame frame;
 	const void __user *fp;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* TODO: We don't support guest os callchain now */
+		return NULL;
+	}
 
 	fp = (void __user *)regs->bp;
 
@@ -1681,19 +1690,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 	}
 }
 
-struct perf_callchain_entry *perf_callchain_buffer(void)
-{
-	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		/* TODO: We don't support guest os callchain now */
-		return NULL;
-	}
-
-	if (in_nmi())
-		return &__get_cpu_var(perf_callchain_entry_nmi);
-
-	return &__get_cpu_var(perf_callchain_entry);
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
 	unsigned long ip;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4db61dded388..d7e8ea690864 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -983,7 +983,6 @@ extern void perf_callchain_user(struct perf_callchain_entry *entry,
 				struct pt_regs *regs);
 extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
 				  struct pt_regs *regs);
-extern struct perf_callchain_entry *perf_callchain_buffer(void);
 
 
 static inline void
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 615d024894cf..75ab8a2df6b2 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1763,6 +1763,216 @@ static u64 perf_event_read(struct perf_event *event)
 	return perf_event_count(event);
 }
 
+/*
+ * Callchain support
+ */
+
+struct callchain_cpus_entries {
+	struct rcu_head			rcu_head;
+	struct perf_callchain_entry	*cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[4]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+				  struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+				struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+	struct callchain_cpus_entries *entries;
+	int cpu;
+
+	entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+	for_each_possible_cpu(cpu)
+		kfree(entries->cpu_entries[cpu]);
+
+	kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+	struct callchain_cpus_entries *entries;
+
+	entries = callchain_cpus_entries;
+	rcu_assign_pointer(callchain_cpus_entries, NULL);
+	call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+	int cpu;
+	int size;
+	struct callchain_cpus_entries *entries;
+
+	/*
+	 * We can't use the percpu allocation API for data that can be
+	 * accessed from NMI. Use a temporary manual per cpu allocation
+	 * until that gets sorted out.
+	 */
+	size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
+		num_possible_cpus();
+
+	entries = kzalloc(size, GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	size = sizeof(struct perf_callchain_entry) * 4;
+
+	for_each_possible_cpu(cpu) {
+		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+							 cpu_to_node(cpu));
+		if (!entries->cpu_entries[cpu])
+			goto fail;
+	}
+
+	rcu_assign_pointer(callchain_cpus_entries, entries);
+
+	return 0;
+
+fail:
+	for_each_possible_cpu(cpu)
+		kfree(entries->cpu_entries[cpu]);
+	kfree(entries);
+
+	return -ENOMEM;
+}
+
+static int get_callchain_buffers(void)
+{
+	int err = 0;
+	int count;
+
+	mutex_lock(&callchain_mutex);
+
+	count = atomic_inc_return(&nr_callchain_events);
+	if (WARN_ON_ONCE(count < 1)) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (count > 1) {
+		/* If the allocation failed, give up */
+		if (!callchain_cpus_entries)
+			err = -ENOMEM;
+		goto exit;
+	}
+
+	err = alloc_callchain_buffers();
+	if (err)
+		release_callchain_buffers();
+exit:
+	mutex_unlock(&callchain_mutex);
+
+	return err;
+}
+
+static void put_callchain_buffers(void)
+{
+	if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+		release_callchain_buffers();
+		mutex_unlock(&callchain_mutex);
+	}
+}
+
+static int get_recursion_context(int *recursion)
+{
+	int rctx;
+
+	if (in_nmi())
+		rctx = 3;
+	else if (in_irq())
+		rctx = 2;
+	else if (in_softirq())
+		rctx = 1;
+	else
+		rctx = 0;
+
+	if (recursion[rctx])
+		return -1;
+
+	recursion[rctx]++;
+	barrier();
+
+	return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+	barrier();
+	recursion[rctx]--;
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+	int cpu;
+	struct callchain_cpus_entries *entries;
+
+	*rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+	if (*rctx == -1)
+		return NULL;
+
+	entries = rcu_dereference(callchain_cpus_entries);
+	if (!entries)
+		return NULL;
+
+	cpu = smp_processor_id();
+
+	return &entries->cpu_entries[cpu][*rctx];
+}
+
+static void
+put_callchain_entry(int rctx)
+{
+	put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	int rctx;
+	struct perf_callchain_entry *entry;
+
+
+	entry = get_callchain_entry(&rctx);
+	if (rctx == -1)
+		return NULL;
+
+	if (!entry)
+		goto exit_put;
+
+	entry->nr = 0;
+
+	if (!user_mode(regs)) {
+		perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+		perf_callchain_kernel(entry, regs);
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+
+	if (regs) {
+		perf_callchain_store(entry, PERF_CONTEXT_USER);
+		perf_callchain_user(entry, regs);
+	}
+
+exit_put:
+	put_callchain_entry(rctx);
+
+	return entry;
+}
+
 /*
  * Initialize the perf_event context in a task_struct:
  */
@@ -1895,6 +2105,8 @@ static void free_event(struct perf_event *event)
 			atomic_dec(&nr_comm_events);
 		if (event->attr.task)
 			atomic_dec(&nr_task_events);
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			put_callchain_buffers();
 	}
 
 	if (event->buffer) {
@@ -2937,55 +3149,6 @@ void perf_event_do_pending(void)
 	__perf_pending_run();
 }
 
-DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
-
-/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain_buffer(void)
-{
-	return &__get_cpu_var(perf_callchain_entry);
-}
-
-__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
-				  struct pt_regs *regs)
-{
-}
-
-__weak void perf_callchain_user(struct perf_callchain_entry *entry,
-				struct pt_regs *regs)
-{
-}
-
-static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry;
-
-	entry = perf_callchain_buffer();
-	if (!entry)
-		return NULL;
-
-	entry->nr = 0;
-
-	if (!user_mode(regs)) {
-		perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
-		perf_callchain_kernel(entry, regs);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-
-	if (regs) {
-		perf_callchain_store(entry, PERF_CONTEXT_USER);
-		perf_callchain_user(entry, regs);
-	}
-
-	return entry;
-}
-
-
 /*
  * We assume there is only KVM supporting the callbacks.
  * Later on, we might change it to a list if there is
@@ -3480,14 +3643,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 
+	/* protect the callchain buffers */
+	rcu_read_lock();
+
 	perf_prepare_sample(&header, data, event, regs);
 
 	if (perf_output_begin(&handle, event, header.size, nmi, 1))
-		return;
+		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
 
 	perf_output_end(&handle);
+
+exit:
+	rcu_read_unlock();
 }
 
 /*
@@ -4243,32 +4412,16 @@ end:
 int perf_swevent_get_recursion_context(void)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	int rctx;
 
-	if (in_nmi())
-		rctx = 3;
-	else if (in_irq())
-		rctx = 2;
-	else if (in_softirq())
-		rctx = 1;
-	else
-		rctx = 0;
-
-	if (cpuctx->recursion[rctx])
-		return -1;
-
-	cpuctx->recursion[rctx]++;
-	barrier();
-
-	return rctx;
+	return get_recursion_context(cpuctx->recursion);
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
 void inline perf_swevent_put_recursion_context(int rctx)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	barrier();
-	cpuctx->recursion[rctx]--;
+
+	put_recursion_context(cpuctx->recursion, rctx);
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4968,6 +5121,13 @@ done:
 			atomic_inc(&nr_comm_events);
 		if (event->attr.task)
 			atomic_inc(&nr_task_events);
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+			err = get_callchain_buffers();
+			if (err) {
+				free_event(event);
+				return ERR_PTR(err);
+			}
+		}
 	}
 
 	return event;
-- 
cgit v1.2.3


From 7ae07ea3a48d30689ee037cb136bc21f0b37d8ae Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 14 Aug 2010 20:45:13 +0200
Subject: perf: Humanize the number of contexts

Instead of hardcoding the number of contexts for the recursions
barriers, define a cpp constant to make the code more
self-explanatory.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
---
 include/linux/perf_event.h      | 14 ++++++++------
 kernel/perf_event.c             |  4 ++--
 kernel/trace/trace_event_perf.c |  8 ++++----
 3 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d7e8ea690864..ae6fa6050925 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -808,6 +808,12 @@ struct perf_event_context {
 	struct rcu_head			rcu_head;
 };
 
+/*
+ * Number of contexts where an event can trigger:
+ * 	task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS	4
+
 /**
  * struct perf_event_cpu_context - per cpu event context structure
  */
@@ -821,12 +827,8 @@ struct perf_cpu_context {
 	struct mutex			hlist_mutex;
 	int				hlist_refcount;
 
-	/*
-	 * Recursion avoidance:
-	 *
-	 * task, softirq, irq, nmi context
-	 */
-	int				recursion[4];
+	/* Recursion avoidance in each contexts */
+	int				recursion[PERF_NR_CONTEXTS];
 };
 
 struct perf_output_handle {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 75ab8a2df6b2..f416aef242c3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1772,7 +1772,7 @@ struct callchain_cpus_entries {
 	struct perf_callchain_entry	*cpu_entries[0];
 };
 
-static DEFINE_PER_CPU(int, callchain_recursion[4]);
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
 static atomic_t nr_callchain_events;
 static DEFINE_MUTEX(callchain_mutex);
 struct callchain_cpus_entries *callchain_cpus_entries;
@@ -1828,7 +1828,7 @@ static int alloc_callchain_buffers(void)
 	if (!entries)
 		return -ENOMEM;
 
-	size = sizeof(struct perf_callchain_entry) * 4;
+	size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
 
 	for_each_possible_cpu(cpu) {
 		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 000e6e85b445..db2eae2efcf2 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
 #include <linux/kprobes.h>
 #include "trace.h"
 
-static char *perf_trace_buf[4];
+static char *perf_trace_buf[PERF_NR_CONTEXTS];
 
 /*
  * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -45,7 +45,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 		char *buf;
 		int i;
 
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
 			buf = (char *)alloc_percpu(perf_trace_t);
 			if (!buf)
 				goto fail;
@@ -65,7 +65,7 @@ fail:
 	if (!total_ref_count) {
 		int i;
 
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
 			free_percpu(perf_trace_buf[i]);
 			perf_trace_buf[i] = NULL;
 		}
@@ -140,7 +140,7 @@ void perf_trace_destroy(struct perf_event *p_event)
 	tp_event->perf_events = NULL;
 
 	if (!--total_ref_count) {
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
 			free_percpu(perf_trace_buf[i]);
 			perf_trace_buf[i] = NULL;
 		}
-- 
cgit v1.2.3


From 6016ee13db518ab1cd0cbf43fc2ad5712021e338 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Wed, 11 Aug 2010 12:47:59 +0900
Subject: perf, tracing: add missing __percpu markups

ftrace_event_call->perf_events, perf_trace_buf,
fgraph_data->cpu_data and some local variables are percpu pointers
missing __percpu markups. Add them.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h         |  4 ++--
 kernel/trace/trace_event_perf.c      | 15 ++++++++-------
 kernel/trace/trace_functions_graph.c |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24f8f51..5f8ad7bec636 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
 	unsigned int		flags;
 
 #ifdef CONFIG_PERF_EVENTS
-	int			perf_refcount;
-	struct hlist_head	*perf_events;
+	int				perf_refcount;
+	struct hlist_head __percpu	*perf_events;
 #endif
 };
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index db2eae2efcf2..92f5477a006a 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
 #include <linux/kprobes.h>
 #include "trace.h"
 
-static char *perf_trace_buf[PERF_NR_CONTEXTS];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
 
 /*
  * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int	total_ref_count;
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
-	struct hlist_head *list;
+	struct hlist_head __percpu *list;
 	int ret = -ENOMEM;
 	int cpu;
 
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 	tp_event->perf_events = list;
 
 	if (!total_ref_count) {
-		char *buf;
+		char __percpu *buf;
 		int i;
 
 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
-			buf = (char *)alloc_percpu(perf_trace_t);
+			buf = (char __percpu *)alloc_percpu(perf_trace_t);
 			if (!buf)
 				goto fail;
 
@@ -102,13 +102,14 @@ int perf_trace_init(struct perf_event *p_event)
 int perf_trace_enable(struct perf_event *p_event)
 {
 	struct ftrace_event_call *tp_event = p_event->tp_event;
+	struct hlist_head __percpu *pcpu_list;
 	struct hlist_head *list;
 
-	list = tp_event->perf_events;
-	if (WARN_ON_ONCE(!list))
+	pcpu_list = tp_event->perf_events;
+	if (WARN_ON_ONCE(!pcpu_list))
 		return -EINVAL;
 
-	list = this_cpu_ptr(list);
+	list = this_cpu_ptr(pcpu_list);
 	hlist_add_head_rcu(&p_event->hlist_entry, list);
 
 	return 0;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6bff23625781..fcb5a542cd21 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -23,7 +23,7 @@ struct fgraph_cpu_data {
 };
 
 struct fgraph_data {
-	struct fgraph_cpu_data		*cpu_data;
+	struct fgraph_cpu_data __percpu *cpu_data;
 
 	/* Place to preserve last processed entry. */
 	struct ftrace_graph_ent_entry	ent;
-- 
cgit v1.2.3


From a49f37eed22b74221f271811ea41323654e40dad Mon Sep 17 00:00:00 2001
From: Sachin Sanap <ssanap@marvell.com>
Date: Fri, 13 Aug 2010 21:22:49 +0000
Subject: net: add Fast Ethernet driver for PXA168.

Signed-off-by: Sachin Sanap <ssanap@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig        |   10 +
 drivers/net/Makefile       |    1 +
 drivers/net/pxa168_eth.c   | 1666 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pxa168_eth.h |   30 +
 4 files changed, 1707 insertions(+)
 create mode 100644 drivers/net/pxa168_eth.c
 create mode 100644 include/linux/pxa168_eth.h

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ebe68395ecf8..fe581566cb26 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -928,6 +928,16 @@ config SMC91X
 	  The module will be called smc91x.  If you want to compile it as a
 	  module, say M here and read <file:Documentation/kbuild/modules.txt>.
 
+config PXA168_ETH
+	tristate "Marvell pxa168 ethernet support"
+	depends on CPU_PXA168
+	select PHYLIB
+	help
+	  This driver supports the pxa168 Ethernet ports.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called pxa168_eth.
+
 config NET_NETX
 	tristate "NetX Ethernet support"
 	select MII
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 56e8c27f77ce..3e8f150c4b14 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -244,6 +244,7 @@ obj-$(CONFIG_MYRI10GE) += myri10ge/
 obj-$(CONFIG_SMC91X) += smc91x.o
 obj-$(CONFIG_SMC911X) += smc911x.o
 obj-$(CONFIG_SMSC911X) += smsc911x.o
+obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
 obj-$(CONFIG_BFIN_MAC) += bfin_mac.o
 obj-$(CONFIG_DM9000) += dm9000.o
 obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o
diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c
new file mode 100644
index 000000000000..ecc64d750cce
--- /dev/null
+++ b/drivers/net/pxa168_eth.c
@@ -0,0 +1,1666 @@
+/*
+ * PXA168 ethernet driver.
+ * Most of the code is derived from mv643xx ethernet driver.
+ *
+ * Copyright (C) 2010 Marvell International Ltd.
+ *		Sachin Sanap <ssanap@marvell.com>
+ *		Philip Rakity <prakity@marvell.com>
+ *		Mark Brown <markb@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/etherdevice.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/clk.h>
+#include <linux/phy.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <asm/cacheflush.h>
+#include <linux/pxa168_eth.h>
+
+#define DRIVER_NAME	"pxa168-eth"
+#define DRIVER_VERSION	"0.3"
+
+/*
+ * Registers
+ */
+
+#define PHY_ADDRESS		0x0000
+#define SMI			0x0010
+#define PORT_CONFIG		0x0400
+#define PORT_CONFIG_EXT		0x0408
+#define PORT_COMMAND		0x0410
+#define PORT_STATUS		0x0418
+#define HTPR			0x0428
+#define SDMA_CONFIG		0x0440
+#define SDMA_CMD		0x0448
+#define INT_CAUSE		0x0450
+#define INT_W_CLEAR		0x0454
+#define INT_MASK		0x0458
+#define ETH_F_RX_DESC_0		0x0480
+#define ETH_C_RX_DESC_0		0x04A0
+#define ETH_C_TX_DESC_1		0x04E4
+
+/* smi register */
+#define SMI_BUSY		(1 << 28)	/* 0 - Write, 1 - Read  */
+#define SMI_R_VALID		(1 << 27)	/* 0 - Write, 1 - Read  */
+#define SMI_OP_W		(0 << 26)	/* Write operation      */
+#define SMI_OP_R		(1 << 26)	/* Read operation */
+
+#define PHY_WAIT_ITERATIONS	10
+
+#define PXA168_ETH_PHY_ADDR_DEFAULT	0
+/* RX & TX descriptor command */
+#define BUF_OWNED_BY_DMA	(1 << 31)
+
+/* RX descriptor status */
+#define RX_EN_INT		(1 << 23)
+#define RX_FIRST_DESC		(1 << 17)
+#define RX_LAST_DESC		(1 << 16)
+#define RX_ERROR		(1 << 15)
+
+/* TX descriptor command */
+#define TX_EN_INT		(1 << 23)
+#define TX_GEN_CRC		(1 << 22)
+#define TX_ZERO_PADDING		(1 << 18)
+#define TX_FIRST_DESC		(1 << 17)
+#define TX_LAST_DESC		(1 << 16)
+#define TX_ERROR		(1 << 15)
+
+/* SDMA_CMD */
+#define SDMA_CMD_AT		(1 << 31)
+#define SDMA_CMD_TXDL		(1 << 24)
+#define SDMA_CMD_TXDH		(1 << 23)
+#define SDMA_CMD_AR		(1 << 15)
+#define SDMA_CMD_ERD		(1 << 7)
+
+/* Bit definitions of the Port Config Reg */
+#define PCR_HS			(1 << 12)
+#define PCR_EN			(1 << 7)
+#define PCR_PM			(1 << 0)
+
+/* Bit definitions of the Port Config Extend Reg */
+#define PCXR_2BSM		(1 << 28)
+#define PCXR_DSCP_EN		(1 << 21)
+#define PCXR_MFL_1518		(0 << 14)
+#define PCXR_MFL_1536		(1 << 14)
+#define PCXR_MFL_2048		(2 << 14)
+#define PCXR_MFL_64K		(3 << 14)
+#define PCXR_FLP		(1 << 11)
+#define PCXR_PRIO_TX_OFF	3
+#define PCXR_TX_HIGH_PRI	(7 << PCXR_PRIO_TX_OFF)
+
+/* Bit definitions of the SDMA Config Reg */
+#define SDCR_BSZ_OFF		12
+#define SDCR_BSZ8		(3 << SDCR_BSZ_OFF)
+#define SDCR_BSZ4		(2 << SDCR_BSZ_OFF)
+#define SDCR_BSZ2		(1 << SDCR_BSZ_OFF)
+#define SDCR_BSZ1		(0 << SDCR_BSZ_OFF)
+#define SDCR_BLMR		(1 << 6)
+#define SDCR_BLMT		(1 << 7)
+#define SDCR_RIFB		(1 << 9)
+#define SDCR_RC_OFF		2
+#define SDCR_RC_MAX_RETRANS	(0xf << SDCR_RC_OFF)
+
+/*
+ * Bit definitions of the Interrupt Cause Reg
+ * and Interrupt MASK Reg is the same
+ */
+#define ICR_RXBUF		(1 << 0)
+#define ICR_TXBUF_H		(1 << 2)
+#define ICR_TXBUF_L		(1 << 3)
+#define ICR_TXEND_H		(1 << 6)
+#define ICR_TXEND_L		(1 << 7)
+#define ICR_RXERR		(1 << 8)
+#define ICR_TXERR_H		(1 << 10)
+#define ICR_TXERR_L		(1 << 11)
+#define ICR_TX_UDR		(1 << 13)
+#define ICR_MII_CH		(1 << 28)
+
+#define ALL_INTS (ICR_TXBUF_H  | ICR_TXBUF_L  | ICR_TX_UDR |\
+				ICR_TXERR_H  | ICR_TXERR_L |\
+				ICR_TXEND_H  | ICR_TXEND_L |\
+				ICR_RXBUF | ICR_RXERR  | ICR_MII_CH)
+
+#define ETH_HW_IP_ALIGN		2	/* hw aligns IP header */
+
+#define NUM_RX_DESCS		64
+#define NUM_TX_DESCS		64
+
+#define HASH_ADD		0
+#define HASH_DELETE		1
+#define HASH_ADDR_TABLE_SIZE	0x4000	/* 16K (1/2K address - PCR_HS == 1) */
+#define HOP_NUMBER		12
+
+/* Bit definitions for Port status */
+#define PORT_SPEED_100		(1 << 0)
+#define FULL_DUPLEX		(1 << 1)
+#define FLOW_CONTROL_ENABLED	(1 << 2)
+#define LINK_UP			(1 << 3)
+
+/* Bit definitions for work to be done */
+#define WORK_LINK		(1 << 0)
+#define WORK_TX_DONE		(1 << 1)
+
+/*
+ * Misc definitions.
+ */
+#define SKB_DMA_REALIGN		((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
+
+struct rx_desc {
+	u32 cmd_sts;		/* Descriptor command status            */
+	u16 byte_cnt;		/* Descriptor buffer byte count         */
+	u16 buf_size;		/* Buffer size                          */
+	u32 buf_ptr;		/* Descriptor buffer pointer            */
+	u32 next_desc_ptr;	/* Next descriptor pointer              */
+};
+
+struct tx_desc {
+	u32 cmd_sts;		/* Command/status field                 */
+	u16 reserved;
+	u16 byte_cnt;		/* buffer byte count                    */
+	u32 buf_ptr;		/* pointer to buffer for this descriptor */
+	u32 next_desc_ptr;	/* Pointer to next descriptor           */
+};
+
+struct pxa168_eth_private {
+	int port_num;		/* User Ethernet port number    */
+
+	int rx_resource_err;	/* Rx ring resource error flag */
+
+	/* Next available and first returning Rx resource */
+	int rx_curr_desc_q, rx_used_desc_q;
+
+	/* Next available and first returning Tx resource */
+	int tx_curr_desc_q, tx_used_desc_q;
+
+	struct rx_desc *p_rx_desc_area;
+	dma_addr_t rx_desc_dma;
+	int rx_desc_area_size;
+	struct sk_buff **rx_skb;
+
+	struct tx_desc *p_tx_desc_area;
+	dma_addr_t tx_desc_dma;
+	int tx_desc_area_size;
+	struct sk_buff **tx_skb;
+
+	struct work_struct tx_timeout_task;
+
+	struct net_device *dev;
+	struct napi_struct napi;
+	u8 work_todo;
+	int skb_size;
+
+	struct net_device_stats stats;
+	/* Size of Tx Ring per queue */
+	int tx_ring_size;
+	/* Number of tx descriptors in use */
+	int tx_desc_count;
+	/* Size of Rx Ring per queue */
+	int rx_ring_size;
+	/* Number of rx descriptors in use */
+	int rx_desc_count;
+
+	/*
+	 * Used in case RX Ring is empty, which can occur when
+	 * system does not have resources (skb's)
+	 */
+	struct timer_list timeout;
+	struct mii_bus *smi_bus;
+	struct phy_device *phy;
+
+	/* clock */
+	struct clk *clk;
+	struct pxa168_eth_platform_data *pd;
+	/*
+	 * Ethernet controller base address.
+	 */
+	void __iomem *base;
+
+	/* Pointer to the hardware address filter table */
+	void *htpr;
+	dma_addr_t htpr_dma;
+};
+
+struct addr_table_entry {
+	__le32 lo;
+	__le32 hi;
+};
+
+/* Bit fields of a Hash Table Entry */
+enum hash_table_entry {
+	HASH_ENTRY_VALID = 1,
+	SKIP = 2,
+	HASH_ENTRY_RECEIVE_DISCARD = 4,
+	HASH_ENTRY_RECEIVE_DISCARD_BIT = 2
+};
+
+static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static int pxa168_init_hw(struct pxa168_eth_private *pep);
+static void eth_port_reset(struct net_device *dev);
+static void eth_port_start(struct net_device *dev);
+static int pxa168_eth_open(struct net_device *dev);
+static int pxa168_eth_stop(struct net_device *dev);
+static int ethernet_phy_setup(struct net_device *dev);
+
+static inline u32 rdl(struct pxa168_eth_private *pep, int offset)
+{
+	return readl(pep->base + offset);
+}
+
+static inline void wrl(struct pxa168_eth_private *pep, int offset, u32 data)
+{
+	writel(data, pep->base + offset);
+}
+
+static void abort_dma(struct pxa168_eth_private *pep)
+{
+	int delay;
+	int max_retries = 40;
+
+	do {
+		wrl(pep, SDMA_CMD, SDMA_CMD_AR | SDMA_CMD_AT);
+		udelay(100);
+
+		delay = 10;
+		while ((rdl(pep, SDMA_CMD) & (SDMA_CMD_AR | SDMA_CMD_AT))
+		       && delay-- > 0) {
+			udelay(10);
+		}
+	} while (max_retries-- > 0 && delay <= 0);
+
+	if (max_retries <= 0)
+		printk(KERN_ERR "%s : DMA Stuck\n", __func__);
+}
+
+static int ethernet_phy_get(struct pxa168_eth_private *pep)
+{
+	unsigned int reg_data;
+
+	reg_data = rdl(pep, PHY_ADDRESS);
+
+	return (reg_data >> (5 * pep->port_num)) & 0x1f;
+}
+
+static void ethernet_phy_set_addr(struct pxa168_eth_private *pep, int phy_addr)
+{
+	u32 reg_data;
+	int addr_shift = 5 * pep->port_num;
+
+	reg_data = rdl(pep, PHY_ADDRESS);
+	reg_data &= ~(0x1f << addr_shift);
+	reg_data |= (phy_addr & 0x1f) << addr_shift;
+	wrl(pep, PHY_ADDRESS, reg_data);
+}
+
+static void ethernet_phy_reset(struct pxa168_eth_private *pep)
+{
+	int data;
+
+	data = phy_read(pep->phy, MII_BMCR);
+	if (data < 0)
+		return;
+
+	data |= BMCR_RESET;
+	if (phy_write(pep->phy, MII_BMCR, data) < 0)
+		return;
+
+	do {
+		data = phy_read(pep->phy, MII_BMCR);
+	} while (data >= 0 && data & BMCR_RESET);
+}
+
+static void rxq_refill(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct rx_desc *p_used_rx_desc;
+	int used_rx_desc;
+
+	while (pep->rx_desc_count < pep->rx_ring_size) {
+		int size;
+
+		skb = dev_alloc_skb(pep->skb_size);
+		if (!skb)
+			break;
+		if (SKB_DMA_REALIGN)
+			skb_reserve(skb, SKB_DMA_REALIGN);
+		pep->rx_desc_count++;
+		/* Get 'used' Rx descriptor */
+		used_rx_desc = pep->rx_used_desc_q;
+		p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc];
+		size = skb->end - skb->data;
+		p_used_rx_desc->buf_ptr = dma_map_single(NULL,
+							 skb->data,
+							 size,
+							 DMA_FROM_DEVICE);
+		p_used_rx_desc->buf_size = size;
+		pep->rx_skb[used_rx_desc] = skb;
+
+		/* Return the descriptor to DMA ownership */
+		wmb();
+		p_used_rx_desc->cmd_sts = BUF_OWNED_BY_DMA | RX_EN_INT;
+		wmb();
+
+		/* Move the used descriptor pointer to the next descriptor */
+		pep->rx_used_desc_q = (used_rx_desc + 1) % pep->rx_ring_size;
+
+		/* Any Rx return cancels the Rx resource error status */
+		pep->rx_resource_err = 0;
+
+		skb_reserve(skb, ETH_HW_IP_ALIGN);
+	}
+
+	/*
+	 * If RX ring is empty of SKB, set a timer to try allocating
+	 * again at a later time.
+	 */
+	if (pep->rx_desc_count == 0) {
+		pep->timeout.expires = jiffies + (HZ / 10);
+		add_timer(&pep->timeout);
+	}
+}
+
+static inline void rxq_refill_timer_wrapper(unsigned long data)
+{
+	struct pxa168_eth_private *pep = (void *)data;
+	napi_schedule(&pep->napi);
+}
+
+static inline u8 flip_8_bits(u8 x)
+{
+	return (((x) & 0x01) << 3) | (((x) & 0x02) << 1)
+	    | (((x) & 0x04) >> 1) | (((x) & 0x08) >> 3)
+	    | (((x) & 0x10) << 3) | (((x) & 0x20) << 1)
+	    | (((x) & 0x40) >> 1) | (((x) & 0x80) >> 3);
+}
+
+static void nibble_swap_every_byte(unsigned char *mac_addr)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac_addr[i] = ((mac_addr[i] & 0x0f) << 4) |
+				((mac_addr[i] & 0xf0) >> 4);
+	}
+}
+
+static void inverse_every_nibble(unsigned char *mac_addr)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = flip_8_bits(mac_addr[i]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * This function will calculate the hash function of the address.
+ * Inputs
+ * mac_addr_orig    - MAC address.
+ * Outputs
+ * return the calculated entry.
+ */
+static u32 hash_function(unsigned char *mac_addr_orig)
+{
+	u32 hash_result;
+	u32 addr0;
+	u32 addr1;
+	u32 addr2;
+	u32 addr3;
+	unsigned char mac_addr[ETH_ALEN];
+
+	/* Make a copy of MAC address since we are going to performe bit
+	 * operations on it
+	 */
+	memcpy(mac_addr, mac_addr_orig, ETH_ALEN);
+
+	nibble_swap_every_byte(mac_addr);
+	inverse_every_nibble(mac_addr);
+
+	addr0 = (mac_addr[5] >> 2) & 0x3f;
+	addr1 = (mac_addr[5] & 0x03) | (((mac_addr[4] & 0x7f)) << 2);
+	addr2 = ((mac_addr[4] & 0x80) >> 7) | mac_addr[3] << 1;
+	addr3 = (mac_addr[2] & 0xff) | ((mac_addr[1] & 1) << 8);
+
+	hash_result = (addr0 << 9) | (addr1 ^ addr2 ^ addr3);
+	hash_result = hash_result & 0x07ff;
+	return hash_result;
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * This function will add/del an entry to the address table.
+ * Inputs
+ * pep - ETHERNET .
+ * mac_addr - MAC address.
+ * skip - if 1, skip this address.Used in case of deleting an entry which is a
+ *	  part of chain in the hash table.We cant just delete the entry since
+ *	  that will break the chain.We need to defragment the tables time to
+ *	  time.
+ * rd   - 0 Discard packet upon match.
+ *	- 1 Receive packet upon match.
+ * Outputs
+ * address table entry is added/deleted.
+ * 0 if success.
+ * -ENOSPC if table full
+ */
+static int add_del_hash_entry(struct pxa168_eth_private *pep,
+			      unsigned char *mac_addr,
+			      u32 rd, u32 skip, int del)
+{
+	struct addr_table_entry *entry, *start;
+	u32 new_high;
+	u32 new_low;
+	u32 i;
+
+	new_low = (((mac_addr[1] >> 4) & 0xf) << 15)
+	    | (((mac_addr[1] >> 0) & 0xf) << 11)
+	    | (((mac_addr[0] >> 4) & 0xf) << 7)
+	    | (((mac_addr[0] >> 0) & 0xf) << 3)
+	    | (((mac_addr[3] >> 4) & 0x1) << 31)
+	    | (((mac_addr[3] >> 0) & 0xf) << 27)
+	    | (((mac_addr[2] >> 4) & 0xf) << 23)
+	    | (((mac_addr[2] >> 0) & 0xf) << 19)
+	    | (skip << SKIP) | (rd << HASH_ENTRY_RECEIVE_DISCARD_BIT)
+	    | HASH_ENTRY_VALID;
+
+	new_high = (((mac_addr[5] >> 4) & 0xf) << 15)
+	    | (((mac_addr[5] >> 0) & 0xf) << 11)
+	    | (((mac_addr[4] >> 4) & 0xf) << 7)
+	    | (((mac_addr[4] >> 0) & 0xf) << 3)
+	    | (((mac_addr[3] >> 5) & 0x7) << 0);
+
+	/*
+	 * Pick the appropriate table, start scanning for free/reusable
+	 * entries at the index obtained by hashing the specified MAC address
+	 */
+	start = (struct addr_table_entry *)(pep->htpr);
+	entry = start + hash_function(mac_addr);
+	for (i = 0; i < HOP_NUMBER; i++) {
+		if (!(le32_to_cpu(entry->lo) & HASH_ENTRY_VALID)) {
+			break;
+		} else {
+			/* if same address put in same position */
+			if (((le32_to_cpu(entry->lo) & 0xfffffff8) ==
+				(new_low & 0xfffffff8)) &&
+				(le32_to_cpu(entry->hi) == new_high)) {
+				break;
+			}
+		}
+		if (entry == start + 0x7ff)
+			entry = start;
+		else
+			entry++;
+	}
+
+	if (((le32_to_cpu(entry->lo) & 0xfffffff8) != (new_low & 0xfffffff8)) &&
+	    (le32_to_cpu(entry->hi) != new_high) && del)
+		return 0;
+
+	if (i == HOP_NUMBER) {
+		if (!del) {
+			printk(KERN_INFO "%s: table section is full, need to "
+					"move to 16kB implementation?\n",
+					 __FILE__);
+			return -ENOSPC;
+		} else
+			return 0;
+	}
+
+	/*
+	 * Update the selected entry
+	 */
+	if (del) {
+		entry->hi = 0;
+		entry->lo = 0;
+	} else {
+		entry->hi = cpu_to_le32(new_high);
+		entry->lo = cpu_to_le32(new_low);
+	}
+
+	return 0;
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ *  Create an addressTable entry from MAC address info
+ *  found in the specifed net_device struct
+ *
+ *  Input : pointer to ethernet interface network device structure
+ *  Output : N/A
+ */
+static void update_hash_table_mac_address(struct pxa168_eth_private *pep,
+					  unsigned char *oaddr,
+					  unsigned char *addr)
+{
+	/* Delete old entry */
+	if (oaddr)
+		add_del_hash_entry(pep, oaddr, 1, 0, HASH_DELETE);
+	/* Add new entry */
+	add_del_hash_entry(pep, addr, 1, 0, HASH_ADD);
+}
+
+static int init_hash_table(struct pxa168_eth_private *pep)
+{
+	/*
+	 * Hardware expects CPU to build a hash table based on a predefined
+	 * hash function and populate it based on hardware address. The
+	 * location of the hash table is identified by 32-bit pointer stored
+	 * in HTPR internal register. Two possible sizes exists for the hash
+	 * table 8kB (256kB of DRAM required (4 x 64 kB banks)) and 1/2kB
+	 * (16kB of DRAM required (4 x 4 kB banks)).We currently only support
+	 * 1/2kB.
+	 */
+	/* TODO: Add support for 8kB hash table and alternative hash
+	 * function.Driver can dynamically switch to them if the 1/2kB hash
+	 * table is full.
+	 */
+	if (pep->htpr == NULL) {
+		pep->htpr = dma_alloc_coherent(pep->dev->dev.parent,
+					      HASH_ADDR_TABLE_SIZE,
+					      &pep->htpr_dma, GFP_KERNEL);
+		if (pep->htpr == NULL)
+			return -ENOMEM;
+	}
+	memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE);
+	wrl(pep, HTPR, pep->htpr_dma);
+	return 0;
+}
+
+static void pxa168_eth_set_rx_mode(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct netdev_hw_addr *ha;
+	u32 val;
+
+	val = rdl(pep, PORT_CONFIG);
+	if (dev->flags & IFF_PROMISC)
+		val |= PCR_PM;
+	else
+		val &= ~PCR_PM;
+	wrl(pep, PORT_CONFIG, val);
+
+	/*
+	 * Remove the old list of MAC address and add dev->addr
+	 * and multicast address.
+	 */
+	memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE);
+	update_hash_table_mac_address(pep, NULL, dev->dev_addr);
+
+	netdev_for_each_mc_addr(ha, dev)
+		update_hash_table_mac_address(pep, NULL, ha->addr);
+}
+
+static int pxa168_eth_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sa = addr;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	unsigned char oldMac[ETH_ALEN];
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EINVAL;
+	memcpy(oldMac, dev->dev_addr, ETH_ALEN);
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	netif_addr_lock_bh(dev);
+	update_hash_table_mac_address(pep, oldMac, dev->dev_addr);
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static void eth_port_start(struct net_device *dev)
+{
+	unsigned int val = 0;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int tx_curr_desc, rx_curr_desc;
+
+	/* Perform PHY reset, if there is a PHY. */
+	if (pep->phy != NULL) {
+		struct ethtool_cmd cmd;
+
+		pxa168_get_settings(pep->dev, &cmd);
+		ethernet_phy_reset(pep);
+		pxa168_set_settings(pep->dev, &cmd);
+	}
+
+	/* Assignment of Tx CTRP of given queue */
+	tx_curr_desc = pep->tx_curr_desc_q;
+	wrl(pep, ETH_C_TX_DESC_1,
+	    (u32) ((struct tx_desc *)pep->tx_desc_dma + tx_curr_desc));
+
+	/* Assignment of Rx CRDP of given queue */
+	rx_curr_desc = pep->rx_curr_desc_q;
+	wrl(pep, ETH_C_RX_DESC_0,
+	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+
+	wrl(pep, ETH_F_RX_DESC_0,
+	    (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc));
+
+	/* Clear all interrupts */
+	wrl(pep, INT_CAUSE, 0);
+
+	/* Enable all interrupts for receive, transmit and error. */
+	wrl(pep, INT_MASK, ALL_INTS);
+
+	val = rdl(pep, PORT_CONFIG);
+	val |= PCR_EN;
+	wrl(pep, PORT_CONFIG, val);
+
+	/* Start RX DMA engine */
+	val = rdl(pep, SDMA_CMD);
+	val |= SDMA_CMD_ERD;
+	wrl(pep, SDMA_CMD, val);
+}
+
+static void eth_port_reset(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	unsigned int val = 0;
+
+	/* Stop all interrupts for receive, transmit and error. */
+	wrl(pep, INT_MASK, 0);
+
+	/* Clear all interrupts */
+	wrl(pep, INT_CAUSE, 0);
+
+	/* Stop RX DMA */
+	val = rdl(pep, SDMA_CMD);
+	val &= ~SDMA_CMD_ERD;	/* abort dma command */
+
+	/* Abort any transmit and receive operations and put DMA
+	 * in idle state.
+	 */
+	abort_dma(pep);
+
+	/* Disable port */
+	val = rdl(pep, PORT_CONFIG);
+	val &= ~PCR_EN;
+	wrl(pep, PORT_CONFIG, val);
+}
+
+/*
+ * txq_reclaim - Free the tx desc data for completed descriptors
+ * If force is non-zero, frees uncompleted descriptors as well
+ */
+static int txq_reclaim(struct net_device *dev, int force)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct tx_desc *desc;
+	u32 cmd_sts;
+	struct sk_buff *skb;
+	int tx_index;
+	dma_addr_t addr;
+	int count;
+	int released = 0;
+
+	netif_tx_lock(dev);
+
+	pep->work_todo &= ~WORK_TX_DONE;
+	while (pep->tx_desc_count > 0) {
+		tx_index = pep->tx_used_desc_q;
+		desc = &pep->p_tx_desc_area[tx_index];
+		cmd_sts = desc->cmd_sts;
+		if (!force && (cmd_sts & BUF_OWNED_BY_DMA)) {
+			if (released > 0) {
+				goto txq_reclaim_end;
+			} else {
+				released = -1;
+				goto txq_reclaim_end;
+			}
+		}
+		pep->tx_used_desc_q = (tx_index + 1) % pep->tx_ring_size;
+		pep->tx_desc_count--;
+		addr = desc->buf_ptr;
+		count = desc->byte_cnt;
+		skb = pep->tx_skb[tx_index];
+		if (skb)
+			pep->tx_skb[tx_index] = NULL;
+
+		if (cmd_sts & TX_ERROR) {
+			if (net_ratelimit())
+				printk(KERN_ERR "%s: Error in TX\n", dev->name);
+			dev->stats.tx_errors++;
+		}
+		dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE);
+		if (skb)
+			dev_kfree_skb_irq(skb);
+		released++;
+	}
+txq_reclaim_end:
+	netif_tx_unlock(dev);
+	return released;
+}
+
+static void pxa168_eth_tx_timeout(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	printk(KERN_INFO "%s: TX timeout  desc_count %d\n",
+	       dev->name, pep->tx_desc_count);
+
+	schedule_work(&pep->tx_timeout_task);
+}
+
+static void pxa168_eth_tx_timeout_task(struct work_struct *work)
+{
+	struct pxa168_eth_private *pep = container_of(work,
+						 struct pxa168_eth_private,
+						 tx_timeout_task);
+	struct net_device *dev = pep->dev;
+	pxa168_eth_stop(dev);
+	pxa168_eth_open(dev);
+}
+
+static int rxq_process(struct net_device *dev, int budget)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int received_packets = 0;
+	struct sk_buff *skb;
+
+	while (budget-- > 0) {
+		int rx_next_curr_desc, rx_curr_desc, rx_used_desc;
+		struct rx_desc *rx_desc;
+		unsigned int cmd_sts;
+
+		/* Do not process Rx ring in case of Rx ring resource error */
+		if (pep->rx_resource_err)
+			break;
+		rx_curr_desc = pep->rx_curr_desc_q;
+		rx_used_desc = pep->rx_used_desc_q;
+		rx_desc = &pep->p_rx_desc_area[rx_curr_desc];
+		cmd_sts = rx_desc->cmd_sts;
+		rmb();
+		if (cmd_sts & (BUF_OWNED_BY_DMA))
+			break;
+		skb = pep->rx_skb[rx_curr_desc];
+		pep->rx_skb[rx_curr_desc] = NULL;
+
+		rx_next_curr_desc = (rx_curr_desc + 1) % pep->rx_ring_size;
+		pep->rx_curr_desc_q = rx_next_curr_desc;
+
+		/* Rx descriptors exhausted. */
+		/* Set the Rx ring resource error flag */
+		if (rx_next_curr_desc == rx_used_desc)
+			pep->rx_resource_err = 1;
+		pep->rx_desc_count--;
+		dma_unmap_single(NULL, rx_desc->buf_ptr,
+				 rx_desc->buf_size,
+				 DMA_FROM_DEVICE);
+		received_packets++;
+		/*
+		 * Update statistics.
+		 * Note byte count includes 4 byte CRC count
+		 */
+		stats->rx_packets++;
+		stats->rx_bytes += rx_desc->byte_cnt;
+		/*
+		 * In case received a packet without first / last bits on OR
+		 * the error summary bit is on, the packets needs to be droped.
+		 */
+		if (((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) !=
+		     (RX_FIRST_DESC | RX_LAST_DESC))
+		    || (cmd_sts & RX_ERROR)) {
+
+			stats->rx_dropped++;
+			if ((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) !=
+			    (RX_FIRST_DESC | RX_LAST_DESC)) {
+				if (net_ratelimit())
+					printk(KERN_ERR
+					       "%s: Rx pkt on multiple desc\n",
+					       dev->name);
+			}
+			if (cmd_sts & RX_ERROR)
+				stats->rx_errors++;
+			dev_kfree_skb_irq(skb);
+		} else {
+			/*
+			 * The -4 is for the CRC in the trailer of the
+			 * received packet
+			 */
+			skb_put(skb, rx_desc->byte_cnt - 4);
+			skb->protocol = eth_type_trans(skb, dev);
+			netif_receive_skb(skb);
+		}
+		dev->last_rx = jiffies;
+	}
+	/* Fill RX ring with skb's */
+	rxq_refill(dev);
+	return received_packets;
+}
+
+static int pxa168_eth_collect_events(struct pxa168_eth_private *pep,
+				     struct net_device *dev)
+{
+	u32 icr;
+	int ret = 0;
+
+	icr = rdl(pep, INT_CAUSE);
+	if (icr == 0)
+		return IRQ_NONE;
+
+	wrl(pep, INT_CAUSE, ~icr);
+	if (icr & (ICR_TXBUF_H | ICR_TXBUF_L)) {
+		pep->work_todo |= WORK_TX_DONE;
+		ret = 1;
+	}
+	if (icr & ICR_RXBUF)
+		ret = 1;
+	if (icr & ICR_MII_CH) {
+		pep->work_todo |= WORK_LINK;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void handle_link_event(struct pxa168_eth_private *pep)
+{
+	struct net_device *dev = pep->dev;
+	u32 port_status;
+	int speed;
+	int duplex;
+	int fc;
+
+	port_status = rdl(pep, PORT_STATUS);
+	if (!(port_status & LINK_UP)) {
+		if (netif_carrier_ok(dev)) {
+			printk(KERN_INFO "%s: link down\n", dev->name);
+			netif_carrier_off(dev);
+			txq_reclaim(dev, 1);
+		}
+		return;
+	}
+	if (port_status & PORT_SPEED_100)
+		speed = 100;
+	else
+		speed = 10;
+
+	duplex = (port_status & FULL_DUPLEX) ? 1 : 0;
+	fc = (port_status & FLOW_CONTROL_ENABLED) ? 1 : 0;
+	printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+	       "flow control %sabled\n", dev->name,
+	       speed, duplex ? "full" : "half", fc ? "en" : "dis");
+	if (!netif_carrier_ok(dev))
+		netif_carrier_on(dev);
+}
+
+static irqreturn_t pxa168_eth_int_handler(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (unlikely(!pxa168_eth_collect_events(pep, dev)))
+		return IRQ_NONE;
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	napi_schedule(&pep->napi);
+	return IRQ_HANDLED;
+}
+
+static void pxa168_eth_recalc_skb_size(struct pxa168_eth_private *pep)
+{
+	int skb_size;
+
+	/*
+	 * Reserve 2+14 bytes for an ethernet header (the hardware
+	 * automatically prepends 2 bytes of dummy data to each
+	 * received packet), 16 bytes for up to four VLAN tags, and
+	 * 4 bytes for the trailing FCS -- 36 bytes total.
+	 */
+	skb_size = pep->dev->mtu + 36;
+
+	/*
+	 * Make sure that the skb size is a multiple of 8 bytes, as
+	 * the lower three bits of the receive descriptor's buffer
+	 * size field are ignored by the hardware.
+	 */
+	pep->skb_size = (skb_size + 7) & ~7;
+
+	/*
+	 * If NET_SKB_PAD is smaller than a cache line,
+	 * netdev_alloc_skb() will cause skb->data to be misaligned
+	 * to a cache line boundary.  If this is the case, include
+	 * some extra space to allow re-aligning the data area.
+	 */
+	pep->skb_size += SKB_DMA_REALIGN;
+
+}
+
+static int set_port_config_ext(struct pxa168_eth_private *pep)
+{
+	int skb_size;
+
+	pxa168_eth_recalc_skb_size(pep);
+	if  (pep->skb_size <= 1518)
+		skb_size = PCXR_MFL_1518;
+	else if (pep->skb_size <= 1536)
+		skb_size = PCXR_MFL_1536;
+	else if (pep->skb_size <= 2048)
+		skb_size = PCXR_MFL_2048;
+	else
+		skb_size = PCXR_MFL_64K;
+
+	/* Extended Port Configuration */
+	wrl(pep,
+	    PORT_CONFIG_EXT, PCXR_2BSM | /* Two byte prefix aligns IP hdr */
+	    PCXR_DSCP_EN |		 /* Enable DSCP in IP */
+	    skb_size | PCXR_FLP |	 /* do not force link pass */
+	    PCXR_TX_HIGH_PRI);		 /* Transmit - high priority queue */
+
+	return 0;
+}
+
+static int pxa168_init_hw(struct pxa168_eth_private *pep)
+{
+	int err = 0;
+
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	wrl(pep, INT_CAUSE, 0);
+	/* Write to ICR to clear interrupts. */
+	wrl(pep, INT_W_CLEAR, 0);
+	/* Abort any transmit and receive operations and put DMA
+	 * in idle state.
+	 */
+	abort_dma(pep);
+	/* Initialize address hash table */
+	err = init_hash_table(pep);
+	if (err)
+		return err;
+	/* SDMA configuration */
+	wrl(pep, SDMA_CONFIG, SDCR_BSZ8 |	/* Burst size = 32 bytes */
+	    SDCR_RIFB |				/* Rx interrupt on frame */
+	    SDCR_BLMT |				/* Little endian transmit */
+	    SDCR_BLMR |				/* Little endian receive */
+	    SDCR_RC_MAX_RETRANS);		/* Max retransmit count */
+	/* Port Configuration */
+	wrl(pep, PORT_CONFIG, PCR_HS);		/* Hash size is 1/2kb */
+	set_port_config_ext(pep);
+
+	return err;
+}
+
+static int rxq_init(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct rx_desc *p_rx_desc;
+	int size = 0, i = 0;
+	int rx_desc_num = pep->rx_ring_size;
+
+	/* Allocate RX skb rings */
+	pep->rx_skb = kmalloc(sizeof(*pep->rx_skb) * pep->rx_ring_size,
+			     GFP_KERNEL);
+	if (!pep->rx_skb) {
+		printk(KERN_ERR "%s: Cannot alloc RX skb ring\n", dev->name);
+		return -ENOMEM;
+	}
+	/* Allocate RX ring */
+	pep->rx_desc_count = 0;
+	size = pep->rx_ring_size * sizeof(struct rx_desc);
+	pep->rx_desc_area_size = size;
+	pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						&pep->rx_desc_dma, GFP_KERNEL);
+	if (!pep->p_rx_desc_area) {
+		printk(KERN_ERR "%s: Cannot alloc RX ring (size %d bytes)\n",
+		       dev->name, size);
+		goto out;
+	}
+	memset((void *)pep->p_rx_desc_area, 0, size);
+	/* initialize the next_desc_ptr links in the Rx descriptors ring */
+	p_rx_desc = (struct rx_desc *)pep->p_rx_desc_area;
+	for (i = 0; i < rx_desc_num; i++) {
+		p_rx_desc[i].next_desc_ptr = pep->rx_desc_dma +
+		    ((i + 1) % rx_desc_num) * sizeof(struct rx_desc);
+	}
+	/* Save Rx desc pointer to driver struct. */
+	pep->rx_curr_desc_q = 0;
+	pep->rx_used_desc_q = 0;
+	pep->rx_desc_area_size = rx_desc_num * sizeof(struct rx_desc);
+	return 0;
+out:
+	kfree(pep->rx_skb);
+	return -ENOMEM;
+}
+
+static void rxq_deinit(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int curr;
+
+	/* Free preallocated skb's on RX rings */
+	for (curr = 0; pep->rx_desc_count && curr < pep->rx_ring_size; curr++) {
+		if (pep->rx_skb[curr]) {
+			dev_kfree_skb(pep->rx_skb[curr]);
+			pep->rx_desc_count--;
+		}
+	}
+	if (pep->rx_desc_count)
+		printk(KERN_ERR
+		       "Error in freeing Rx Ring. %d skb's still\n",
+		       pep->rx_desc_count);
+	/* Free RX ring */
+	if (pep->p_rx_desc_area)
+		dma_free_coherent(pep->dev->dev.parent, pep->rx_desc_area_size,
+				  pep->p_rx_desc_area, pep->rx_desc_dma);
+	kfree(pep->rx_skb);
+}
+
+static int txq_init(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct tx_desc *p_tx_desc;
+	int size = 0, i = 0;
+	int tx_desc_num = pep->tx_ring_size;
+
+	pep->tx_skb = kmalloc(sizeof(*pep->tx_skb) * pep->tx_ring_size,
+			     GFP_KERNEL);
+	if (!pep->tx_skb) {
+		printk(KERN_ERR "%s: Cannot alloc TX skb ring\n", dev->name);
+		return -ENOMEM;
+	}
+	/* Allocate TX ring */
+	pep->tx_desc_count = 0;
+	size = pep->tx_ring_size * sizeof(struct tx_desc);
+	pep->tx_desc_area_size = size;
+	pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size,
+						&pep->tx_desc_dma, GFP_KERNEL);
+	if (!pep->p_tx_desc_area) {
+		printk(KERN_ERR "%s: Cannot allocate Tx Ring (size %d bytes)\n",
+		       dev->name, size);
+		goto out;
+	}
+	memset((void *)pep->p_tx_desc_area, 0, pep->tx_desc_area_size);
+	/* Initialize the next_desc_ptr links in the Tx descriptors ring */
+	p_tx_desc = (struct tx_desc *)pep->p_tx_desc_area;
+	for (i = 0; i < tx_desc_num; i++) {
+		p_tx_desc[i].next_desc_ptr = pep->tx_desc_dma +
+		    ((i + 1) % tx_desc_num) * sizeof(struct tx_desc);
+	}
+	pep->tx_curr_desc_q = 0;
+	pep->tx_used_desc_q = 0;
+	pep->tx_desc_area_size = tx_desc_num * sizeof(struct tx_desc);
+	return 0;
+out:
+	kfree(pep->tx_skb);
+	return -ENOMEM;
+}
+
+static void txq_deinit(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	/* Free outstanding skb's on TX ring */
+	txq_reclaim(dev, 1);
+	BUG_ON(pep->tx_used_desc_q != pep->tx_curr_desc_q);
+	/* Free TX ring */
+	if (pep->p_tx_desc_area)
+		dma_free_coherent(pep->dev->dev.parent, pep->tx_desc_area_size,
+				  pep->p_tx_desc_area, pep->tx_desc_dma);
+	kfree(pep->tx_skb);
+}
+
+static int pxa168_eth_open(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int err;
+
+	err = request_irq(dev->irq, pxa168_eth_int_handler,
+			  IRQF_DISABLED, dev->name, dev);
+	if (err) {
+		dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n");
+		return -EAGAIN;
+	}
+	pep->rx_resource_err = 0;
+	err = rxq_init(dev);
+	if (err != 0)
+		goto out_free_irq;
+	err = txq_init(dev);
+	if (err != 0)
+		goto out_free_rx_skb;
+	pep->rx_used_desc_q = 0;
+	pep->rx_curr_desc_q = 0;
+
+	/* Fill RX ring with skb's */
+	rxq_refill(dev);
+	pep->rx_used_desc_q = 0;
+	pep->rx_curr_desc_q = 0;
+	netif_carrier_off(dev);
+	eth_port_start(dev);
+	napi_enable(&pep->napi);
+	return 0;
+out_free_rx_skb:
+	rxq_deinit(dev);
+out_free_irq:
+	free_irq(dev->irq, dev);
+	return err;
+}
+
+static int pxa168_eth_stop(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	eth_port_reset(dev);
+
+	/* Disable interrupts */
+	wrl(pep, INT_MASK, 0);
+	wrl(pep, INT_CAUSE, 0);
+	/* Write to ICR to clear interrupts. */
+	wrl(pep, INT_W_CLEAR, 0);
+	napi_disable(&pep->napi);
+	del_timer_sync(&pep->timeout);
+	netif_carrier_off(dev);
+	free_irq(dev->irq, dev);
+	rxq_deinit(dev);
+	txq_deinit(dev);
+
+	return 0;
+}
+
+static int pxa168_eth_change_mtu(struct net_device *dev, int mtu)
+{
+	int retval;
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if ((mtu > 9500) || (mtu < 68))
+		return -EINVAL;
+
+	dev->mtu = mtu;
+	retval = set_port_config_ext(pep);
+
+	if (!netif_running(dev))
+		return 0;
+
+	/*
+	 * Stop and then re-open the interface. This will allocate RX
+	 * skbs of the new MTU.
+	 * There is a possible danger that the open will not succeed,
+	 * due to memory being full.
+	 */
+	pxa168_eth_stop(dev);
+	if (pxa168_eth_open(dev)) {
+		dev_printk(KERN_ERR, &dev->dev,
+			   "fatal error on re-opening device after "
+			   "MTU change\n");
+	}
+
+	return 0;
+}
+
+static int eth_alloc_tx_desc_index(struct pxa168_eth_private *pep)
+{
+	int tx_desc_curr;
+
+	tx_desc_curr = pep->tx_curr_desc_q;
+	pep->tx_curr_desc_q = (tx_desc_curr + 1) % pep->tx_ring_size;
+	BUG_ON(pep->tx_curr_desc_q == pep->tx_used_desc_q);
+	pep->tx_desc_count++;
+
+	return tx_desc_curr;
+}
+
+static int pxa168_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct pxa168_eth_private *pep =
+	    container_of(napi, struct pxa168_eth_private, napi);
+	struct net_device *dev = pep->dev;
+	int work_done = 0;
+
+	if (unlikely(pep->work_todo & WORK_LINK)) {
+		pep->work_todo &= ~(WORK_LINK);
+		handle_link_event(pep);
+	}
+	/*
+	 * We call txq_reclaim every time since in NAPI interupts are disabled
+	 * and due to this we miss the TX_DONE interrupt,which is not updated in
+	 * interrupt status register.
+	 */
+	txq_reclaim(dev, 0);
+	if (netif_queue_stopped(dev)
+	    && pep->tx_ring_size - pep->tx_desc_count > 1) {
+		netif_wake_queue(dev);
+	}
+	work_done = rxq_process(dev, budget);
+	if (work_done < budget) {
+		napi_complete(napi);
+		wrl(pep, INT_MASK, ALL_INTS);
+	}
+
+	return work_done;
+}
+
+static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct tx_desc *desc;
+	int tx_index;
+	int length;
+
+	tx_index = eth_alloc_tx_desc_index(pep);
+	desc = &pep->p_tx_desc_area[tx_index];
+	length = skb->len;
+	pep->tx_skb[tx_index] = skb;
+	desc->byte_cnt = length;
+	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
+	wmb();
+	desc->cmd_sts = BUF_OWNED_BY_DMA | TX_GEN_CRC | TX_FIRST_DESC |
+			TX_ZERO_PADDING | TX_LAST_DESC | TX_EN_INT;
+	wmb();
+	wrl(pep, SDMA_CMD, SDMA_CMD_TXDH | SDMA_CMD_ERD);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+	dev->trans_start = jiffies;
+	if (pep->tx_ring_size - pep->tx_desc_count <= 1) {
+		/* We handled the current skb, but now we are out of space.*/
+		netif_stop_queue(dev);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int smi_wait_ready(struct pxa168_eth_private *pep)
+{
+	int i = 0;
+
+	/* wait for the SMI register to become available */
+	for (i = 0; rdl(pep, SMI) & SMI_BUSY; i++) {
+		if (i == PHY_WAIT_ITERATIONS)
+			return -ETIMEDOUT;
+		msleep(10);
+	}
+
+	return 0;
+}
+
+static int pxa168_smi_read(struct mii_bus *bus, int phy_addr, int regnum)
+{
+	struct pxa168_eth_private *pep = bus->priv;
+	int i = 0;
+	int val;
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+	wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | SMI_OP_R);
+	/* now wait for the data to be valid */
+	for (i = 0; !((val = rdl(pep, SMI)) & SMI_R_VALID); i++) {
+		if (i == PHY_WAIT_ITERATIONS) {
+			printk(KERN_WARNING
+				"pxa168_eth: SMI bus read not valid\n");
+			return -ENODEV;
+		}
+		msleep(10);
+	}
+
+	return val & 0xffff;
+}
+
+static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum,
+			    u16 value)
+{
+	struct pxa168_eth_private *pep = bus->priv;
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) |
+	    SMI_OP_W | (value & 0xffff));
+
+	if (smi_wait_ready(pep)) {
+		printk(KERN_ERR "pxa168_eth: SMI bus busy timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
+			       int cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	if (pep->phy != NULL)
+		return phy_mii_ioctl(pep->phy, if_mii(ifr), cmd);
+
+	return -EOPNOTSUPP;
+}
+
+static struct phy_device *phy_scan(struct pxa168_eth_private *pep, int phy_addr)
+{
+	struct mii_bus *bus = pep->smi_bus;
+	struct phy_device *phydev;
+	int start;
+	int num;
+	int i;
+
+	if (phy_addr == PXA168_ETH_PHY_ADDR_DEFAULT) {
+		/* Scan entire range */
+		start = ethernet_phy_get(pep);
+		num = 32;
+	} else {
+		/* Use phy addr specific to platform */
+		start = phy_addr & 0x1f;
+		num = 1;
+	}
+	phydev = NULL;
+	for (i = 0; i < num; i++) {
+		int addr = (start + i) & 0x1f;
+		if (bus->phy_map[addr] == NULL)
+			mdiobus_scan(bus, addr);
+
+		if (phydev == NULL) {
+			phydev = bus->phy_map[addr];
+			if (phydev != NULL)
+				ethernet_phy_set_addr(pep, addr);
+		}
+	}
+
+	return phydev;
+}
+
+static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex)
+{
+	struct phy_device *phy = pep->phy;
+	ethernet_phy_reset(pep);
+
+	phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII);
+
+	if (speed == 0) {
+		phy->autoneg = AUTONEG_ENABLE;
+		phy->speed = 0;
+		phy->duplex = 0;
+		phy->supported &= PHY_BASIC_FEATURES;
+		phy->advertising = phy->supported | ADVERTISED_Autoneg;
+	} else {
+		phy->autoneg = AUTONEG_DISABLE;
+		phy->advertising = 0;
+		phy->speed = speed;
+		phy->duplex = duplex;
+	}
+	phy_start_aneg(phy);
+}
+
+static int ethernet_phy_setup(struct net_device *dev)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (pep->pd != NULL) {
+		if (pep->pd->init)
+			pep->pd->init();
+	}
+	pep->phy = phy_scan(pep, pep->pd->phy_addr & 0x1f);
+	if (pep->phy != NULL)
+		phy_init(pep, pep->pd->speed, pep->pd->duplex);
+	update_hash_table_mac_address(pep, NULL, dev->dev_addr);
+
+	return 0;
+}
+
+static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+	int err;
+
+	err = phy_read_status(pep->phy);
+	if (err == 0)
+		err = phy_ethtool_gset(pep->phy, cmd);
+
+	return err;
+}
+
+static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	return phy_ethtool_sset(pep->phy, cmd);
+}
+
+static void pxa168_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *info)
+{
+	strncpy(info->driver, DRIVER_NAME, 32);
+	strncpy(info->version, DRIVER_VERSION, 32);
+	strncpy(info->fw_version, "N/A", 32);
+	strncpy(info->bus_info, "N/A", 32);
+}
+
+static u32 pxa168_get_link(struct net_device *dev)
+{
+	return !!netif_carrier_ok(dev);
+}
+
+static const struct ethtool_ops pxa168_ethtool_ops = {
+	.get_settings = pxa168_get_settings,
+	.set_settings = pxa168_set_settings,
+	.get_drvinfo = pxa168_get_drvinfo,
+	.get_link = pxa168_get_link,
+};
+
+static const struct net_device_ops pxa168_eth_netdev_ops = {
+	.ndo_open = pxa168_eth_open,
+	.ndo_stop = pxa168_eth_stop,
+	.ndo_start_xmit = pxa168_eth_start_xmit,
+	.ndo_set_rx_mode = pxa168_eth_set_rx_mode,
+	.ndo_set_mac_address = pxa168_eth_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_do_ioctl = pxa168_eth_do_ioctl,
+	.ndo_change_mtu = pxa168_eth_change_mtu,
+	.ndo_tx_timeout = pxa168_eth_tx_timeout,
+};
+
+static int pxa168_eth_probe(struct platform_device *pdev)
+{
+	struct pxa168_eth_private *pep = NULL;
+	struct net_device *dev = NULL;
+	struct resource *res;
+	struct clk *clk;
+	int err;
+
+	printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n");
+
+	clk = clk_get(&pdev->dev, "MFUCLK");
+	if (IS_ERR(clk)) {
+		printk(KERN_ERR "%s: Fast Ethernet failed to get clock\n",
+			DRIVER_NAME);
+		return -ENODEV;
+	}
+	clk_enable(clk);
+
+	dev = alloc_etherdev(sizeof(struct pxa168_eth_private));
+	if (!dev) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	platform_set_drvdata(pdev, dev);
+	pep = netdev_priv(dev);
+	pep->dev = dev;
+	pep->clk = clk;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		err = -ENODEV;
+		goto out;
+	}
+	pep->base = ioremap(res->start, res->end - res->start + 1);
+	if (pep->base == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	BUG_ON(!res);
+	dev->irq = res->start;
+	dev->netdev_ops = &pxa168_eth_netdev_ops;
+	dev->watchdog_timeo = 2 * HZ;
+	dev->base_addr = 0;
+	SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops);
+
+	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
+
+	printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME);
+	random_ether_addr(dev->dev_addr);
+
+	pep->pd = pdev->dev.platform_data;
+	pep->rx_ring_size = NUM_RX_DESCS;
+	if (pep->pd->rx_queue_size)
+		pep->rx_ring_size = pep->pd->rx_queue_size;
+
+	pep->tx_ring_size = NUM_TX_DESCS;
+	if (pep->pd->tx_queue_size)
+		pep->tx_ring_size = pep->pd->tx_queue_size;
+
+	pep->port_num = pep->pd->port_number;
+	/* Hardware supports only 3 ports */
+	BUG_ON(pep->port_num > 2);
+	netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size);
+
+	memset(&pep->timeout, 0, sizeof(struct timer_list));
+	init_timer(&pep->timeout);
+	pep->timeout.function = rxq_refill_timer_wrapper;
+	pep->timeout.data = (unsigned long)pep;
+
+	pep->smi_bus = mdiobus_alloc();
+	if (pep->smi_bus == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	pep->smi_bus->priv = pep;
+	pep->smi_bus->name = "pxa168_eth smi";
+	pep->smi_bus->read = pxa168_smi_read;
+	pep->smi_bus->write = pxa168_smi_write;
+	snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
+	pep->smi_bus->parent = &pdev->dev;
+	pep->smi_bus->phy_mask = 0xffffffff;
+	if (mdiobus_register(pep->smi_bus) < 0) {
+		err = -ENOMEM;
+		goto out;
+	}
+	pxa168_init_hw(pep);
+	err = ethernet_phy_setup(dev);
+	if (err)
+		goto out;
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	err = register_netdev(dev);
+	if (err)
+		goto out;
+	return 0;
+out:
+	if (pep->clk) {
+		clk_disable(pep->clk);
+		clk_put(pep->clk);
+		pep->clk = NULL;
+	}
+	if (pep->base) {
+		iounmap(pep->base);
+		pep->base = NULL;
+	}
+	if (dev)
+		free_netdev(dev);
+	return err;
+}
+
+static int pxa168_eth_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	struct pxa168_eth_private *pep = netdev_priv(dev);
+
+	if (pep->htpr) {
+		dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE,
+				  pep->htpr, pep->htpr_dma);
+		pep->htpr = NULL;
+	}
+	if (pep->clk) {
+		clk_disable(pep->clk);
+		clk_put(pep->clk);
+		pep->clk = NULL;
+	}
+	if (pep->phy != NULL)
+		phy_detach(pep->phy);
+
+	iounmap(pep->base);
+	pep->base = NULL;
+	unregister_netdev(dev);
+	flush_scheduled_work();
+	free_netdev(dev);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static void pxa168_eth_shutdown(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+	eth_port_reset(dev);
+}
+
+#ifdef CONFIG_PM
+static int pxa168_eth_resume(struct platform_device *pdev)
+{
+	return -ENOSYS;
+}
+
+static int pxa168_eth_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	return -ENOSYS;
+}
+
+#else
+#define pxa168_eth_resume NULL
+#define pxa168_eth_suspend NULL
+#endif
+
+static struct platform_driver pxa168_eth_driver = {
+	.probe = pxa168_eth_probe,
+	.remove = pxa168_eth_remove,
+	.shutdown = pxa168_eth_shutdown,
+	.resume = pxa168_eth_resume,
+	.suspend = pxa168_eth_suspend,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   },
+};
+
+static int __init pxa168_init_module(void)
+{
+	return platform_driver_register(&pxa168_eth_driver);
+}
+
+static void __exit pxa168_cleanup_module(void)
+{
+	platform_driver_unregister(&pxa168_eth_driver);
+}
+
+module_init(pxa168_init_module);
+module_exit(pxa168_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ethernet driver for Marvell PXA168");
+MODULE_ALIAS("platform:pxa168_eth");
diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h
new file mode 100644
index 000000000000..18d75e795606
--- /dev/null
+++ b/include/linux/pxa168_eth.h
@@ -0,0 +1,30 @@
+/*
+ *pxa168 ethernet platform device data definition file.
+ */
+#ifndef __LINUX_PXA168_ETH_H
+#define __LINUX_PXA168_ETH_H
+
+struct pxa168_eth_platform_data {
+	int	port_number;
+	int	phy_addr;
+
+	/*
+	 * If speed is 0, then speed and duplex are autonegotiated.
+	 */
+	int	speed;		/* 0, SPEED_10, SPEED_100 */
+	int	duplex;		/* DUPLEX_HALF or DUPLEX_FULL */
+
+	/*
+	 * Override default RX/TX queue sizes if nonzero.
+	 */
+	int	rx_queue_size;
+	int	tx_queue_size;
+
+	/*
+	 * init callback is used for board specific initialization
+	 * e.g on Aspenite its used to initialize the PHY transceiver.
+	 */
+	int (*init)(void);
+};
+
+#endif /* __LINUX_PXA168_ETH_H */
-- 
cgit v1.2.3


From e243f5b6de35b6fc394bc2e1e1737afe538e7e0c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 15 Aug 2010 10:03:57 +0000
Subject: netfilter: fix userspace header warning

"make headers_check" issued the following warning:

  CHECK   include/linux/netfilter (64 files)
usr/include/linux/netfilter/xt_ipvs.h:19: found __[us]{8,16,32,64} type without #include <linux/types.h>

Fix this by as suggested including linux/types.h.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/xt_ipvs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h
index 1167aeb7a347..eff34ac18808 100644
--- a/include/linux/netfilter/xt_ipvs.h
+++ b/include/linux/netfilter/xt_ipvs.h
@@ -1,6 +1,8 @@
 #ifndef _XT_IPVS_H
 #define _XT_IPVS_H
 
+#include <linux/types.h>
+
 enum {
 	XT_IPVS_IPVS_PROPERTY =	1 << 0, /* all other options imply this one */
 	XT_IPVS_PROTO =		1 << 1,
-- 
cgit v1.2.3


From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 17 Aug 2010 08:59:14 +0000
Subject: net: simplify flags for tx timestamping

This patch removes the abstraction introduced by the union skb_shared_tx in
the shared skb data.

The access of the different union elements at several places led to some
confusion about accessing the shared tx_flags e.g. in skb_orphan_try().

    http://marc.info/?l=linux-netdev&m=128084897415886&w=2

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 22 +++++++++-------
 drivers/net/bfin_mac.c                    | 10 +++----
 drivers/net/gianfar.c                     | 15 +++++------
 drivers/net/igb/igb.h                     |  2 +-
 drivers/net/igb/igb_main.c                | 11 ++++----
 include/linux/skbuff.h                    | 44 +++++++++++--------------------
 include/net/ip.h                          |  2 +-
 include/net/sock.h                        |  8 ++----
 net/can/raw.c                             |  4 +--
 net/core/dev.c                            |  6 ++---
 net/core/skbuff.c                         |  2 +-
 net/ipv4/icmp.c                           |  4 +--
 net/ipv4/ip_output.c                      |  6 ++---
 net/ipv4/raw.c                            |  2 +-
 net/ipv4/udp.c                            |  4 +--
 net/packet/af_packet.c                    |  4 +--
 net/socket.c                              |  9 +++----
 17 files changed, 68 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index e8c8f4f06c67..98097d8cb910 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -172,15 +172,19 @@ struct skb_shared_hwtstamps {
 };
 
 Time stamps for outgoing packets are to be generated as follows:
-- In hard_start_xmit(), check if skb_tx(skb)->hardware is set no-zero.
-  If yes, then the driver is expected to do hardware time stamping.
+- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+  is set no-zero. If yes, then the driver is expected to do hardware time
+  stamping.
 - If this is possible for the skb and requested, then declare
-  that the driver is doing the time stamping by setting the field
-  skb_tx(skb)->in_progress non-zero. You might want to keep a pointer
-  to the associated skb for the next step and not free the skb. A driver
-  not supporting hardware time stamping doesn't do that. A driver must
-  never touch sk_buff::tstamp! It is used to store software generated
-  time stamps by the network subsystem.
+  that the driver is doing the time stamping by setting the flag
+  SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with
+
+      skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+  You might want to keep a pointer to the associated skb for the next step
+  and not free the skb. A driver not supporting hardware time stamping doesn't
+  do that. A driver must never touch sk_buff::tstamp! It is used to store
+  software generated time stamps by the network subsystem.
 - As soon as the driver has sent the packet and/or obtained a
   hardware time stamp for it, it passes the time stamp back by
   calling skb_hwtstamp_tx() with the original skb, the raw
@@ -191,6 +195,6 @@ Time stamps for outgoing packets are to be generated as follows:
   this would occur at a later time in the processing pipeline than other
   software time stamping and therefore could lead to unexpected deltas
   between time stamps.
-- If the driver did not call set skb_tx(skb)->in_progress, then
+- If the driver did not set the SKBTX_IN_PROGRESS flag (see above), then
   dev_hard_start_xmit() checks whether software time stamping
   is wanted as fallback and potentially generates the time stamp.
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 012613fde3f4..7a0e4156fade 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -803,15 +803,14 @@ static void bfin_dump_hwtamp(char *s, ktime_t *hw, ktime_t *ts, struct timecompa
 static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb)
 {
 	struct bfin_mac_local *lp = netdev_priv(netdev);
-	union skb_shared_tx *shtx = skb_tx(skb);
 
-	if (shtx->hardware) {
+	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
 		int timeout_cnt = MAX_TIMEOUT_CNT;
 
 		/* When doing time stamping, keep the connection to the socket
 		 * a while longer
 		 */
-		shtx->in_progress = 1;
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 		/*
 		 * The timestamping is done at the EMAC module's MII/RMII interface
@@ -991,7 +990,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 	struct bfin_mac_local *lp = netdev_priv(dev);
 	u16 *data;
 	u32 data_align = (unsigned long)(skb->data) & 0x3;
-	union skb_shared_tx *shtx = skb_tx(skb);
 
 	current_tx_ptr->skb = skb;
 
@@ -1005,7 +1003,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 		 * of this field are the length of the packet payload in bytes and the higher
 		 * 4 bits are the timestamping enable field.
 		 */
-		if (shtx->hardware)
+		if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
 			*data |= 0x1000;
 
 		current_tx_ptr->desc_a.start_addr = (u32)data;
@@ -1015,7 +1013,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb,
 	} else {
 		*((u16 *)(current_tx_ptr->packet)) = (u16)(skb->len);
 		/* enable timestamping for the sent packet */
-		if (shtx->hardware)
+		if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
 			*((u16 *)(current_tx_ptr->packet)) |= 0x1000;
 		memcpy((u8 *)(current_tx_ptr->packet + 2), skb->data,
 			skb->len);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 3d9f958ebd2c..e6048d6ab0ea 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2048,7 +2048,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u32 bufaddr;
 	unsigned long flags;
 	unsigned int nr_frags, nr_txbds, length;
-	union skb_shared_tx *shtx;
 
 	/*
 	 * TOE=1 frames larger than 2500 bytes may see excess delays
@@ -2069,10 +2068,10 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txq = netdev_get_tx_queue(dev, rq);
 	base = tx_queue->tx_bd_base;
 	regs = tx_queue->grp->regs;
-	shtx = skb_tx(skb);
 
 	/* check if time stamp should be generated */
-	if (unlikely(shtx->hardware && priv->hwts_tx_en))
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+		     priv->hwts_tx_en))
 		do_tstamp = 1;
 
 	/* make space for additional header when fcb is needed */
@@ -2174,7 +2173,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Setup tx hardware time stamping if requested */
 	if (unlikely(do_tstamp)) {
-		shtx->in_progress = 1;
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		if (fcb == NULL)
 			fcb = gfar_add_fcb(skb);
 		fcb->ptp = 1;
@@ -2446,7 +2445,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 	int howmany = 0;
 	u32 lstatus;
 	size_t buflen;
-	union skb_shared_tx *shtx;
 
 	rx_queue = priv->rx_queue[tx_queue->qindex];
 	bdp = tx_queue->dirty_tx;
@@ -2461,8 +2459,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		 * When time stamping, one additional TxBD must be freed.
 		 * Also, we need to dma_unmap_single() the TxPAL.
 		 */
-		shtx = skb_tx(skb);
-		if (unlikely(shtx->in_progress))
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
 			nr_txbds = frags + 2;
 		else
 			nr_txbds = frags + 1;
@@ -2476,7 +2473,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 				(lstatus & BD_LENGTH_MASK))
 			break;
 
-		if (unlikely(shtx->in_progress)) {
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
 			next = next_txbd(bdp, base, tx_ring_size);
 			buflen = next->length + GMAC_FCB_LEN;
 		} else
@@ -2485,7 +2482,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr,
 				buflen, DMA_TO_DEVICE);
 
-		if (unlikely(shtx->in_progress)) {
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
 			struct skb_shared_hwtstamps shhwtstamps;
 			u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7);
 			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 6e63d9a7fc75..44e0ff1494e0 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -143,7 +143,7 @@ struct igb_buffer {
 			u16 next_to_watch;
 			unsigned int bytecount;
 			u16 gso_segs;
-			union skb_shared_tx shtx;
+			u8 tx_flags;
 			u8 mapped_as_page;
 		};
 		/* RX */
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 9b4e5895f5f9..985e37cf17b6 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3954,7 +3954,7 @@ static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
 	}
 
 	tx_ring->buffer_info[i].skb = skb;
-	tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags;
+	tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
 	/* multiply data chunks by size of headers */
 	tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
 	tx_ring->buffer_info[i].gso_segs = gso_segs;
@@ -4088,7 +4088,6 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 	u32 tx_flags = 0;
 	u16 first;
 	u8 hdr_len = 0;
-	union skb_shared_tx *shtx = skb_tx(skb);
 
 	/* need: 1 descriptor per page,
 	 *       + 2 desc gap to keep tail from touching head,
@@ -4100,8 +4099,8 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (unlikely(shtx->hardware)) {
-		shtx->in_progress = 1;
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_flags |= IGB_TX_FLAGS_TSTAMP;
 	}
 
@@ -5319,7 +5318,7 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *bu
 	u64 regval;
 
 	/* if skb does not support hw timestamp or TX stamp not valid exit */
-	if (likely(!buffer_info->shtx.hardware) ||
+	if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
 	    !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
 		return;
 
@@ -5500,7 +5499,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
 	 * values must belong to this one here and therefore we don't need to
 	 * compare any of the additional attributes stored for it.
 	 *
-	 * If nothing went wrong, then it should have a skb_shared_tx that we
+	 * If nothing went wrong, then it should have a shared tx_flags that we
 	 * can turn into a skb_shared_hwtstamps.
 	 */
 	if (staterr & E1000_RXDADV_STAT_TSIP) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8050382b189..f067c95cf18a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -163,26 +163,19 @@ struct skb_shared_hwtstamps {
 	ktime_t	syststamp;
 };
 
-/**
- * struct skb_shared_tx - instructions for time stamping of outgoing packets
- * @hardware:		generate hardware time stamp
- * @software:		generate software time stamp
- * @in_progress:	device driver is going to provide
- *			hardware time stamp
- * @prevent_sk_orphan:	make sk reference available on driver level
- * @flags:		all shared_tx flags
- *
- * These flags are attached to packets as part of the
- * &skb_shared_info. Use skb_tx() to get a pointer.
- */
-union skb_shared_tx {
-	struct {
-		__u8	hardware:1,
-			software:1,
-			in_progress:1,
-			prevent_sk_orphan:1;
-	};
-	__u8 flags;
+/* Definitions for tx_flags in struct skb_shared_info */
+enum {
+	/* generate hardware time stamp */
+	SKBTX_HW_TSTAMP = 1 << 0,
+
+	/* generate software time stamp */
+	SKBTX_SW_TSTAMP = 1 << 1,
+
+	/* device driver is going to provide hardware time stamp */
+	SKBTX_IN_PROGRESS = 1 << 2,
+
+	/* ensure the originating sk reference is available on driver level */
+	SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
 };
 
 /* This data is invariant across clones and lives at
@@ -195,7 +188,7 @@ struct skb_shared_info {
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
 	__be32          ip6_frag_id;
-	union skb_shared_tx tx_flags;
+	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
 
@@ -587,11 +580,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 	return &skb_shinfo(skb)->hwtstamps;
 }
 
-static inline union skb_shared_tx *skb_tx(struct sk_buff *skb)
-{
-	return &skb_shinfo(skb)->tx_flags;
-}
-
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
@@ -1996,8 +1984,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb,
 
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
-	union skb_shared_tx *shtx = skb_tx(skb);
-	if (shtx->software && !shtx->in_progress)
+	if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP &&
+	    !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
 		skb_tstamp_tx(skb, NULL);
 }
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 890f9725d681..7691aca133db 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -53,7 +53,7 @@ struct ipcm_cookie {
 	__be32			addr;
 	int			oif;
 	struct ip_options	*opt;
-	union skb_shared_tx	shtx;
+	__u8			tx_flags;
 };
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/net/sock.h b/include/net/sock.h
index ac53bfbdfe16..100e43bf95fb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
- * @msg:	outgoing packet
  * @sk:		socket sending this packet
- * @shtx:	filled with instructions for time stamping
+ * @tx_flags:	filled with instructions for time stamping
  *
  * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if
  * parameters are invalid.
  */
-extern int sock_tx_timestamp(struct msghdr *msg,
-			     struct sock *sk,
-			     union skb_shared_tx *shtx);
-
+extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
 
 /**
  * sk_eat_skb - Release a skb if it is no longer needed
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..7d77e67e57af 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto free_skb;
-	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 	if (err < 0)
 		goto free_skb;
 
 	/* to be able to check the received tx sock reference in raw_rcv() */
-	skb_tx(skb)->prevent_sk_orphan = 1;
+	skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
 
 	skb->dev = dev;
 	skb->sk  = sk;
diff --git a/net/core/dev.c b/net/core/dev.c
index 586a11cb4398..c1dc8a95f6ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb)
 
 /*
  * Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested, since
- * drivers need to call skb_tstamp_tx() to send the timestamp.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
  */
 static inline void skb_orphan_try(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
-	if (sk && !skb_tx(skb)->flags) {
+	if (sk && !skb_shinfo(skb)->tx_flags) {
 		/* skb_tx_hash() wont be able to get sk.
 		 * We copy sk_hash into skb->rxhash
 		 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f0d0c3..99ef721f773d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3016,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	} else {
 		/*
 		 * no hardware time stamps available,
-		 * so keep the skb_shared_tx and only
+		 * so keep the shared tx_flags and only
 		 * store software time stamp
 		 */
 		skb->tstamp = ktime_get_real();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	inet->tos = ip_hdr(skb)->tos;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
-	ipc.shtx.flags = 0;
+	ipc.tx_flags = 0;
 	if (icmp_param->replyopts.optlen) {
 		ipc.opt = &icmp_param->replyopts;
 		if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	inet_sk(sk)->tos = tos;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts;
-	ipc.shtx.flags = 0;
+	ipc.tx_flags = 0;
 
 	{
 		struct flowi fl = {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04b69896df5f..e807492f1777 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -953,7 +953,7 @@ alloc_new_skb:
 				else
 					/* only the initial fragment is
 					   time stamped */
-					ipc->shtx.flags = 0;
+					ipc->tx_flags = 0;
 			}
 			if (skb == NULL)
 				goto error;
@@ -964,7 +964,7 @@ alloc_new_skb:
 			skb->ip_summed = csummode;
 			skb->csum = 0;
 			skb_reserve(skb, hh_len);
-			*skb_tx(skb) = ipc->shtx;
+			skb_shinfo(skb)->tx_flags = ipc->tx_flags;
 
 			/*
 			 *	Find where to start putting bytes.
@@ -1384,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
-	ipc.shtx.flags = 0;
+	ipc.tx_flags = 0;
 
 	if (replyopts.opt.optlen) {
 		ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	ipc.addr = inet->inet_saddr;
 	ipc.opt = NULL;
-	ipc.shtx.flags = 0;
+	ipc.tx_flags = 0;
 	ipc.oif = sk->sk_bound_dev_if;
 
 	if (msg->msg_controllen) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32e0bef60d0a..86e757e162ee 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		return -EOPNOTSUPP;
 
 	ipc.opt = NULL;
-	ipc.shtx.flags = 0;
+	ipc.tx_flags = 0;
 
 	if (up->pending) {
 		/*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.addr = inet->inet_saddr;
 
 	ipc.oif = sk->sk_bound_dev_if;
-	err = sock_tx_timestamp(msg, sk, &ipc.shtx);
+	err = sock_tx_timestamp(sk, &ipc.tx_flags);
 	if (err)
 		return err;
 	if (msg->msg_controllen) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..3616f27b9d46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 	if (err < 0)
 		goto out_unlock;
 
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
-	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 	if (err < 0)
 		goto out_free;
 
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..7848d12f5e4d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
 }
 EXPORT_SYMBOL(sock_release);
 
-int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
-		      union skb_shared_tx *shtx)
+int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 {
-	shtx->flags = 0;
+	*tx_flags = 0;
 	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
-		shtx->hardware = 1;
+		*tx_flags |= SKBTX_HW_TSTAMP;
 	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
-		shtx->software = 1;
+		*tx_flags |= SKBTX_SW_TSTAMP;
 	return 0;
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
-- 
cgit v1.2.3


From ede1b4290781ae82ccf0f2ecc6dada8d3dd35779 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 18 Aug 2010 15:33:13 -0700
Subject: tracing: Fix timer tracing

PowerTOP would like to be able to trace timers.

Unfortunately, the current timer tracing is not very useful: the
actual timer function is not recorded in the trace at the start
of timer execution.

Although this is recorded for timer "start" time (when it gets
armed), this is not useful; most timers get started early, and a
tracer like PowerTOP will never see this event, but will only
see the actual running of the  timer.

This patch just adds the function to the timer tracing; I've
verified with PowerTOP that now it can get useful information
about timers.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: xiaoguangrong@cn.fujitsu.com
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org> # .35.x, .34.x, .33.x
LKML-Reference: <4C6C5FA9.3000405@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/timer.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index c624126a9c8a..425bcfe56c62 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -81,14 +81,16 @@ TRACE_EVENT(timer_expire_entry,
 	TP_STRUCT__entry(
 		__field( void *,	timer	)
 		__field( unsigned long,	now	)
+		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->timer		= timer;
 		__entry->now		= jiffies;
+		__entry->function	= timer->function;
 	),
 
-	TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
+	TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now)
 );
 
 /**
@@ -200,14 +202,16 @@ TRACE_EVENT(hrtimer_expire_entry,
 	TP_STRUCT__entry(
 		__field( void *,	hrtimer	)
 		__field( s64,		now	)
+		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->hrtimer	= hrtimer;
 		__entry->now		= now->tv64;
+		__entry->function	= hrtimer->function;
 	),
 
-	TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+	TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function,
 		  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
-- 
cgit v1.2.3


From e760702ed8333588f9f21e7bf6597873993006f1 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 17 Aug 2010 19:03:44 +0000
Subject: net: introduce proto_ports_offset()

Introduce proto_ports_offset() for getting the position of the ports or
SPI in the message of a protocol.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index 41d88a4689af..beeb6dee2b49 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -250,6 +250,25 @@ struct sockaddr_in {
 
 #ifdef __KERNEL__
 
+#include <linux/errno.h>
+
+static inline int proto_ports_offset(int proto)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:	/* SPI */
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		return 0;
+	case IPPROTO_AH:	/* SPI */
+		return 4;
+	default:
+		return -EINVAL;
+	}
+}
+
 static inline bool ipv4_is_loopback(__be32 addr)
 {
 	return (addr & htonl(0xff000000)) == htonl(0x7f000000);
-- 
cgit v1.2.3


From d34a16661ed0fed433c9469d7cfa3ca4d30ca42e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 14 Jun 2010 17:06:21 -0700
Subject: net: convert to rcu_dereference_index_check()

The task_cls_classid() function applies rcu_dereference() to integers,
which does not work with the shiny new sparse-based checking in
rcu_dereference().  This commit therefore moves to the new RCU API
rcu_dereference_index_check().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/net/cls_cgroup.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 726cc3536409..dd1fdb8293f5 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p)
 		return 0;
 
 	rcu_read_lock();
-	id = rcu_dereference(net_cls_subsys_id);
+	id = rcu_dereference_index_check(net_cls_subsys_id,
+					 rcu_read_lock_held());
 	if (id >= 0)
 		classid = container_of(task_subsys_state(p, id),
 				       struct cgroup_cls_state, css)->classid;
-- 
cgit v1.2.3


From ca5ecddfa8fcbd948c95530e7e817cee9fb43a3d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 28 Apr 2010 14:39:09 -0700
Subject: rcu: define __rcu address space modifier for sparse

This commit provides definitions for the __rcu annotation defined earlier.
This annotation permits sparse to check for correct use of RCU-protected
pointers.  If a pointer that is annotated with __rcu is accessed
directly (as opposed to via rcu_dereference(), rcu_assign_pointer(),
or one of their variants), sparse can be made to complain.  To enable
such complaints, use the new default-disabled CONFIG_SPARSE_RCU_POINTER
kernel configuration option.  Please note that these sparse complaints are
intended to be a debugging aid, -not- a code-style-enforcement mechanism.

There are special rcu_dereference_protected() and rcu_access_pointer()
accessors for use when RCU read-side protection is not required, for
example, when no other CPU has access to the data structure in question
or while the current CPU hold the update-side lock.

This patch also updates a number of docbook comments that were showing
their age.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Christopher Li <sparse@chrisli.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/compiler.h |   4 +
 include/linux/rcupdate.h | 352 ++++++++++++++++++++++++++++-------------------
 include/linux/srcu.h     |  27 +++-
 kernel/rcupdate.c        |   6 +-
 lib/Kconfig.debug        |  13 ++
 5 files changed, 257 insertions(+), 145 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c1a62c56a660..320d6c94ff84 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -16,7 +16,11 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
+#ifdef CONFIG_SPARSE_RCU_POINTER
+# define __rcu		__attribute__((noderef, address_space(4)))
+#else
 # define __rcu
+#endif
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a2585d..b973dea2d6b0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,7 @@
 #include <linux/lockdep.h>
 #include <linux/completion.h>
 #include <linux/debugobjects.h>
+#include <linux/compiler.h>
 
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable; /* for sysctl */
@@ -120,14 +121,15 @@ extern struct lockdep_map rcu_sched_lock_map;
 extern int debug_lockdep_rcu_enabled(void);
 
 /**
- * rcu_read_lock_held - might we be in RCU read-side critical section?
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
  * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
  * this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
  *
- * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  */
 static inline int rcu_read_lock_held(void)
@@ -144,14 +146,16 @@ static inline int rcu_read_lock_held(void)
 extern int rcu_read_lock_bh_held(void);
 
 /**
- * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
+ * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
  * RCU-sched read-side critical section.  In absence of
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.  Note that disabling
  * of preemption (including disabling irqs) counts as an RCU-sched
- * read-side critical section.
+ * read-side critical section.  This is useful for debug checks in functions
+ * that required that they be called within an RCU-sched read-side
+ * critical section.
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
@@ -220,41 +224,155 @@ extern int rcu_my_thread_group_empty(void);
 		}							\
 	} while (0)
 
+#else /* #ifdef CONFIG_PROVE_RCU */
+
+#define __do_rcu_dereference_check(c) do { } while (0)
+
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
+
+/*
+ * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
+ * and rcu_assign_pointer().  Some of these could be folded into their
+ * callers, but they are left separate in order to ease introduction of
+ * multiple flavors of pointers to match the multiple flavors of RCU
+ * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
+ * the future.
+ */
+#define __rcu_access_pointer(p, space) \
+	({ \
+		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
+		(void) (((typeof (*p) space *)p) == p); \
+		((typeof(*p) __force __kernel *)(_________p1)); \
+	})
+#define __rcu_dereference_check(p, c, space) \
+	({ \
+		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
+		__do_rcu_dereference_check(c); \
+		(void) (((typeof (*p) space *)p) == p); \
+		smp_read_barrier_depends(); \
+		((typeof(*p) __force __kernel *)(_________p1)); \
+	})
+#define __rcu_dereference_protected(p, c, space) \
+	({ \
+		__do_rcu_dereference_check(c); \
+		(void) (((typeof (*p) space *)p) == p); \
+		((typeof(*p) __force __kernel *)(p)); \
+	})
+
+#define __rcu_dereference_index_check(p, c) \
+	({ \
+		typeof(p) _________p1 = ACCESS_ONCE(p); \
+		__do_rcu_dereference_check(c); \
+		smp_read_barrier_depends(); \
+		(_________p1); \
+	})
+#define __rcu_assign_pointer(p, v, space) \
+	({ \
+		if (!__builtin_constant_p(v) || \
+		    ((v) != NULL)) \
+			smp_wmb(); \
+		(p) = (typeof(*v) __force space *)(v); \
+	})
+
+
+/**
+ * rcu_access_pointer() - fetch RCU pointer with no dereferencing
+ * @p: The pointer to read
+ *
+ * Return the value of the specified RCU-protected pointer, but omit the
+ * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * when the value of this pointer is accessed, but the pointer is not
+ * dereferenced, for example, when testing an RCU-protected pointer against
+ * NULL.  Although rcu_access_pointer() may also be used in cases where
+ * update-side locks prevent the value of the pointer from changing, you
+ * should instead use rcu_dereference_protected() for this use case.
+ */
+#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
+
 /**
- * rcu_dereference_check - rcu_dereference with debug checking
+ * rcu_dereference_check() - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * Do an rcu_dereference(), but check that the conditions under which the
- * dereference will take place are correct.  Typically the conditions indicate
- * the various locking conditions that should be held at that point.  The check
- * should return true if the conditions are satisfied.
+ * dereference will take place are correct.  Typically the conditions
+ * indicate the various locking conditions that should be held at that
+ * point.  The check should return true if the conditions are satisfied.
+ * An implicit check for being in an RCU read-side critical section
+ * (rcu_read_lock()) is included.
  *
  * For example:
  *
- *	bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
- *					      lockdep_is_held(&foo->lock));
+ *	bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
  *
  * could be used to indicate to lockdep that foo->bar may only be dereferenced
- * if either the RCU read lock is held, or that the lock required to replace
+ * if either rcu_read_lock() is held, or that the lock required to replace
  * the bar struct at foo->bar is held.
  *
  * Note that the list of conditions may also include indications of when a lock
  * need not be held, for example during initialisation or destruction of the
  * target struct:
  *
- *	bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
- *					      lockdep_is_held(&foo->lock) ||
+ *	bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
  *					      atomic_read(&foo->usage) == 0);
+ *
+ * Inserts memory barriers on architectures that require them
+ * (currently only the Alpha), prevents the compiler from refetching
+ * (and from merging fetches), and, more importantly, documents exactly
+ * which pointers are protected by RCU and checks that the pointer is
+ * annotated as __rcu.
  */
 #define rcu_dereference_check(p, c) \
-	({ \
-		__do_rcu_dereference_check(c); \
-		rcu_dereference_raw(p); \
-	})
+	__rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
+
+/**
+ * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-bh counterpart to rcu_dereference_check().
+ */
+#define rcu_dereference_bh_check(p, c) \
+	__rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
 
 /**
- * rcu_dereference_protected - fetch RCU pointer when updates prevented
+ * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-sched counterpart to rcu_dereference_check().
+ */
+#define rcu_dereference_sched_check(p, c) \
+	__rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
+				__rcu)
+
+#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
+
+/**
+ * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * Similar to rcu_dereference_check(), but omits the sparse checking.
+ * This allows rcu_dereference_index_check() to be used on integers,
+ * which can then be used as array indices.  Attempting to use
+ * rcu_dereference_check() on an integer will give compiler warnings
+ * because the sparse address-space mechanism relies on dereferencing
+ * the RCU-protected pointer.  Dereferencing integers is not something
+ * that even gcc will put up with.
+ *
+ * Note that this function does not implicitly check for RCU read-side
+ * critical sections.  If this function gains lots of uses, it might
+ * make sense to provide versions for each flavor of RCU, but it does
+ * not make sense as of early 2010.
+ */
+#define rcu_dereference_index_check(p, c) \
+	__rcu_dereference_index_check((p), (c))
+
+/**
+ * rcu_dereference_protected() - fetch RCU pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
  *
  * Return the value of the specified RCU-protected pointer, but omit
  * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
@@ -263,35 +381,61 @@ extern int rcu_my_thread_group_empty(void);
  * prevent the compiler from repeating this reference or combining it
  * with other references, so it should not be used without protection
  * of appropriate locks.
+ *
+ * This function is only for update-side use.  Using this function
+ * when protected only by rcu_read_lock() will result in infrequent
+ * but very ugly failures.
  */
 #define rcu_dereference_protected(p, c) \
-	({ \
-		__do_rcu_dereference_check(c); \
-		(p); \
-	})
+	__rcu_dereference_protected((p), (c), __rcu)
 
-#else /* #ifdef CONFIG_PROVE_RCU */
+/**
+ * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-bh counterpart to rcu_dereference_protected().
+ */
+#define rcu_dereference_bh_protected(p, c) \
+	__rcu_dereference_protected((p), (c), __rcu)
 
-#define rcu_dereference_check(p, c)	rcu_dereference_raw(p)
-#define rcu_dereference_protected(p, c) (p)
+/**
+ * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-sched counterpart to rcu_dereference_protected().
+ */
+#define rcu_dereference_sched_protected(p, c) \
+	__rcu_dereference_protected((p), (c), __rcu)
 
-#endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 /**
- * rcu_access_pointer - fetch RCU pointer with no dereferencing
+ * rcu_dereference() - fetch RCU-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
  *
- * Return the value of the specified RCU-protected pointer, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
- * when the value of this pointer is accessed, but the pointer is not
- * dereferenced, for example, when testing an RCU-protected pointer against
- * NULL.  This may also be used in cases where update-side locks prevent
- * the value of the pointer from changing, but rcu_dereference_protected()
- * is a lighter-weight primitive for this use case.
+ * This is a simple wrapper around rcu_dereference_check().
  */
-#define rcu_access_pointer(p)	ACCESS_ONCE(p)
+#define rcu_dereference(p) rcu_dereference_check(p, 0)
 
 /**
- * rcu_read_lock - mark the beginning of an RCU read-side critical section.
+ * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
+
+/**
+ * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
+
+/**
+ * rcu_read_lock() - mark the beginning of an RCU read-side critical section
  *
  * When synchronize_rcu() is invoked on one CPU while other CPUs
  * are within RCU read-side critical sections, then the
@@ -337,7 +481,7 @@ static inline void rcu_read_lock(void)
  */
 
 /**
- * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ * rcu_read_unlock() - marks the end of an RCU read-side critical section.
  *
  * See rcu_read_lock() for more information.
  */
@@ -349,15 +493,16 @@ static inline void rcu_read_unlock(void)
 }
 
 /**
- * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
+ * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
  * This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks
- * consider completion of a softirq handler to be a quiescent state,
- * a process in RCU read-side critical section must be protected by
- * disabling softirqs. Read-side critical sections in interrupt context
- * can use just rcu_read_lock().
- *
+ * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
+ * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
+ * softirq handler to be a quiescent state, a process in RCU read-side
+ * critical section must be protected by disabling softirqs. Read-side
+ * critical sections in interrupt context can use just rcu_read_lock(),
+ * though this should at least be commented to avoid confusing people
+ * reading the code.
  */
 static inline void rcu_read_lock_bh(void)
 {
@@ -379,13 +524,12 @@ static inline void rcu_read_unlock_bh(void)
 }
 
 /**
- * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
+ * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
  *
- * Should be used with either
- * - synchronize_sched()
- * or
- * - call_rcu_sched() and rcu_barrier_sched()
- * on the write-side to insure proper synchronization.
+ * This is equivalent of rcu_read_lock(), but to be used when updates
+ * are being done using call_rcu_sched() or synchronize_rcu_sched().
+ * Read-side critical sections can also be introduced by anything that
+ * disables preemption, including local_irq_disable() and friends.
  */
 static inline void rcu_read_lock_sched(void)
 {
@@ -420,54 +564,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 	preempt_enable_notrace();
 }
 
-
 /**
- * rcu_dereference_raw - fetch an RCU-protected pointer
+ * rcu_assign_pointer() - assign to RCU-protected pointer
+ * @p: pointer to assign to
+ * @v: value to assign (publish)
  *
- * The caller must be within some flavor of RCU read-side critical
- * section, or must be otherwise preventing the pointer from changing,
- * for example, by holding an appropriate lock.  This pointer may later
- * be safely dereferenced.  It is the caller's responsibility to have
- * done the right thing, as this primitive does no checking of any kind.
- *
- * Inserts memory barriers on architectures that require them
- * (currently only the Alpha), and, more importantly, documents
- * exactly which pointers are protected by RCU.
- */
-#define rcu_dereference_raw(p)	({ \
-				typeof(p) _________p1 = ACCESS_ONCE(p); \
-				smp_read_barrier_depends(); \
-				(_________p1); \
-				})
-
-/**
- * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference(p) \
-	rcu_dereference_check(p, rcu_read_lock_held())
-
-/**
- * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference_bh(p) \
-		rcu_dereference_check(p, rcu_read_lock_bh_held())
-
-/**
- * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference_sched(p) \
-		rcu_dereference_check(p, rcu_read_lock_sched_held())
-
-/**
- * rcu_assign_pointer - assign (publicize) a pointer to a newly
- * initialized structure that will be dereferenced by RCU read-side
- * critical sections.  Returns the value assigned.
+ * Assigns the specified value to the specified RCU-protected
+ * pointer, ensuring that any concurrent RCU readers will see
+ * any prior initialization.  Returns the value assigned.
  *
  * Inserts memory barriers on architectures that require them
  * (pretty much all of them other than x86), and also prevents
@@ -476,14 +580,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * call documents which pointers will be dereferenced by RCU read-side
  * code.
  */
-
 #define rcu_assign_pointer(p, v) \
-	({ \
-		if (!__builtin_constant_p(v) || \
-		    ((v) != NULL)) \
-			smp_wmb(); \
-		(p) = (v); \
-	})
+	__rcu_assign_pointer((p), (v), __rcu)
+
+/**
+ * RCU_INIT_POINTER() - initialize an RCU protected pointer
+ *
+ * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
+ * splats.
+ */
+#define RCU_INIT_POINTER(p, v) \
+		p = (typeof(*v) __force __rcu *)(v)
 
 /* Infrastructure to implement the synchronize_() primitives. */
 
@@ -495,7 +602,7 @@ struct rcu_synchronize {
 extern void wakeme_after_rcu(struct rcu_head  *head);
 
 /**
- * call_rcu - Queue an RCU callback for invocation after a grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual update function to be invoked after the grace period
  *
@@ -509,7 +616,7 @@ extern void call_rcu(struct rcu_head *head,
 			      void (*func)(struct rcu_head *head));
 
 /**
- * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual update function to be invoked after the grace period
  *
@@ -566,37 +673,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
-#ifndef CONFIG_PROVE_RCU
-#define __do_rcu_dereference_check(c) do { } while (0)
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
-#define __rcu_dereference_index_check(p, c) \
-	({ \
-		typeof(p) _________p1 = ACCESS_ONCE(p); \
-		__do_rcu_dereference_check(c); \
-		smp_read_barrier_depends(); \
-		(_________p1); \
-	})
-
-/**
- * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * Similar to rcu_dereference_check(), but omits the sparse checking.
- * This allows rcu_dereference_index_check() to be used on integers,
- * which can then be used as array indices.  Attempting to use
- * rcu_dereference_check() on an integer will give compiler warnings
- * because the sparse address-space mechanism relies on dereferencing
- * the RCU-protected pointer.  Dereferencing integers is not something
- * that even gcc will put up with.
- *
- * Note that this function does not implicitly check for RCU read-side
- * critical sections.  If this function gains lots of uses, it might
- * make sense to provide versions for each flavor of RCU, but it does
- * not make sense as of early 2010.
- */
-#define rcu_dereference_index_check(p, c) \
-	__rcu_dereference_index_check((p), (c))
-
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4d5d2f546dbf..6f456a720ff0 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -108,12 +108,31 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /**
- * srcu_dereference - fetch SRCU-protected pointer with checking
+ * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
+ * @p: the pointer to fetch and protect for later dereferencing
+ * @sp: pointer to the srcu_struct, which is used to check that we
+ *	really are in an SRCU read-side critical section.
+ * @c: condition to check for update-side use
  *
- * Makes rcu_dereference_check() do the dirty work.
+ * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
+ * critical section will result in an RCU-lockdep splat, unless @c evaluates
+ * to 1.  The @c argument will normally be a logical expression containing
+ * lockdep_is_held() calls.
  */
-#define srcu_dereference(p, sp) \
-		rcu_dereference_check(p, srcu_read_lock_held(sp))
+#define srcu_dereference_check(p, sp, c) \
+	__rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
+
+/**
+ * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
+ * @p: the pointer to fetch and protect for later dereferencing
+ * @sp: pointer to the srcu_struct, which is used to check that we
+ *	really are in an SRCU read-side critical section.
+ *
+ * Makes rcu_dereference_check() do the dirty work.  If PROVE_RCU
+ * is enabled, invoking this outside of an RCU read-side critical
+ * section will result in an RCU-lockdep splat.
+ */
+#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
 
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4d169835fb36..6c79e851521c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void)
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 
 /**
- * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  *
  * Check for bottom half being disabled, which covers both the
  * CONFIG_PROVE_RCU and not cases.  Note that if someone uses
  * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
- * will show the situation.
+ * will show the situation.  This is useful for debug checks in functions
+ * that require that they be called within an RCU read-side critical
+ * section.
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
  */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca0..12465f2ef766 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY
 	 disabling, allowing multiple RCU-lockdep warnings to be printed
 	 on a single reboot.
 
+config SPARSE_RCU_POINTER
+	bool "RCU debugging: sparse-based checks for pointer usage"
+	default n
+	help
+	 This feature enables the __rcu sparse annotation for
+	 RCU-protected pointers.  This annotation will cause sparse
+	 to flag any non-RCU used of annotated pointers.  This can be
+	 helpful when debugging RCU usage.  Please note that this feature
+	 is not intended to enforce code cleanliness; it is instead merely
+	 a debugging aid.
+
+	 Say Y to make sparse flag questionable use of RCU-protected pointers
+
 	 Say N if you are unsure.
 
 config LOCKDEP
-- 
cgit v1.2.3


From 67bdbffd696f29a0b68aa8daa285783a06651583 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2010 16:55:13 +0100
Subject: rculist: avoid __rcu annotations

This avoids warnings from missing __rcu annotations
in the rculist implementation, making it possible to
use the same lists in both RCU and non-RCU cases.

We can add rculist annotations later, together with
lockdep support for rculist, which is missing as well,
but that may involve changing all the users.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rculist.h       | 53 +++++++++++++++++++++++++++----------------
 include/linux/rculist_nulls.h | 16 +++++++++----
 kernel/pid.c                  |  2 +-
 3 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4ec3b38ce9c5..c10b1050dbe6 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -9,6 +9,12 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 
+/*
+ * return the ->next pointer of a list_head in an rcu safe
+ * way, we must not access it directly
+ */
+#define list_next_rcu(list)	(*((struct list_head __rcu **)(&(list)->next)))
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new,
 {
 	new->next = next;
 	new->prev = prev;
-	rcu_assign_pointer(prev->next, new);
+	rcu_assign_pointer(list_next_rcu(prev), new);
 	next->prev = new;
 }
 
@@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old,
 {
 	new->next = old->next;
 	new->prev = old->prev;
-	rcu_assign_pointer(new->prev->next, new);
+	rcu_assign_pointer(list_next_rcu(new->prev), new);
 	new->next->prev = new;
 	old->prev = LIST_POISON2;
 }
@@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	 */
 
 	last->next = at;
-	rcu_assign_pointer(head->next, first);
+	rcu_assign_pointer(list_next_rcu(head), first);
 	first->prev = head;
 	at->prev = last;
 }
@@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
  */
 #define list_entry_rcu(ptr, type, member) \
-	container_of(rcu_dereference_raw(ptr), type, member)
+	({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \
+	 container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
+	})
 
 /**
  * list_first_entry_rcu - get the first element from a list
@@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	list_entry_rcu((ptr)->next, type, member)
 
 #define __list_for_each_rcu(pos, head) \
-	for (pos = rcu_dereference_raw((head)->next); \
+	for (pos = rcu_dereference_raw(list_next_rcu(head)); \
 		pos != (head); \
-		pos = rcu_dereference_raw(pos->next))
+		pos = rcu_dereference_raw(list_next_rcu((pos)))
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
@@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = rcu_dereference_raw((pos)->next); \
+	for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
 		prefetch((pos)->next), (pos) != (head); \
-		(pos) = rcu_dereference_raw((pos)->next))
+		(pos) = rcu_dereference_raw(list_next_rcu(pos)))
 
 /**
  * list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
 
 	new->next = next;
 	new->pprev = old->pprev;
-	rcu_assign_pointer(*new->pprev, new);
+	rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
 	if (next)
 		new->next->pprev = &new->next;
 	old->pprev = LIST_POISON2;
 }
 
+/*
+ * return the first or the next element in an RCU protected hlist
+ */
+#define hlist_first_rcu(head)	(*((struct hlist_node __rcu **)(&(head)->first)))
+#define hlist_next_rcu(node)	(*((struct hlist_node __rcu **)(&(node)->next)))
+#define hlist_pprev_rcu(node)	(*((struct hlist_node __rcu **)((node)->pprev)))
+
 /**
  * hlist_add_head_rcu
  * @n: the element to add to the hash list.
@@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n,
 
 	n->next = first;
 	n->pprev = &h->first;
-	rcu_assign_pointer(h->first, n);
+	rcu_assign_pointer(hlist_first_rcu(h), n);
 	if (first)
 		first->pprev = &n->next;
 }
@@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 {
 	n->pprev = next->pprev;
 	n->next = next;
-	rcu_assign_pointer(*(n->pprev), n);
+	rcu_assign_pointer(hlist_pprev_rcu(n), n);
 	next->pprev = &n->next;
 }
 
@@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 {
 	n->next = prev->next;
 	n->pprev = &prev->next;
-	rcu_assign_pointer(prev->next, n);
+	rcu_assign_pointer(hlist_next_rcu(prev), n);
 	if (n->next)
 		n->next->pprev = &n->next;
 }
 
-#define __hlist_for_each_rcu(pos, head)			\
-	for (pos = rcu_dereference((head)->first);	\
-	     pos && ({ prefetch(pos->next); 1; });	\
-	     pos = rcu_dereference(pos->next))
+#define __hlist_for_each_rcu(pos, head)				\
+	for (pos = rcu_dereference(hlist_first_rcu(head));	\
+	     pos && ({ prefetch(pos->next); 1; });		\
+	     pos = rcu_dereference(hlist_next_rcu(pos)))
 
 /**
  * hlist_for_each_entry_rcu - iterate over rcu list of given type
@@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
-	for (pos = rcu_dereference_raw((head)->first);			 \
+#define hlist_for_each_entry_rcu(tpos, pos, head, member)		\
+	for (pos = rcu_dereference_raw(hlist_first_rcu(head));		\
 		pos && ({ prefetch(pos->next); 1; }) &&			 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference_raw(pos->next))
+		pos = rcu_dereference_raw(hlist_next_rcu(pos)))
 
 /**
  * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index b70ffe53cb9f..2ae13714828b 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
 	}
 }
 
+#define hlist_nulls_first_rcu(head) \
+	(*((struct hlist_nulls_node __rcu __force **)&(head)->first))
+
+#define hlist_nulls_next_rcu(node) \
+	(*((struct hlist_nulls_node __rcu __force **)&(node)->next))
+
 /**
  * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
  * @n: the element to delete from the hash list.
@@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 
 	n->next = first;
 	n->pprev = &h->first;
-	rcu_assign_pointer(h->first, n);
+	rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
 	if (!is_a_nulls(first))
 		first->pprev = &n->next;
 }
@@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  * @member:	the name of the hlist_nulls_node within the struct.
  *
  */
-#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
-	for (pos = rcu_dereference_raw((head)->first);			 \
-		(!is_a_nulls(pos)) &&			\
+#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member)			\
+	for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+		(!is_a_nulls(pos)) &&						\
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference_raw(pos->next))
+		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
 
 #endif
 #endif
diff --git a/kernel/pid.c b/kernel/pid.c
index d55c6fb8d087..0f90c2f713f1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 	struct task_struct *result = NULL;
 	if (pid) {
 		struct hlist_node *first;
-		first = rcu_dereference_check(pid->tasks[type].first,
+		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
 					      rcu_read_lock_held() ||
 					      lockdep_tasklist_lock_is_held());
 		if (first)
-- 
cgit v1.2.3


From 2c392b8c3450ceb69ba1b93cb0cddb3998fb8cdc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Feb 2010 19:41:39 +0100
Subject: cgroups: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/cgroup.h | 4 ++--
 include/linux/sched.h  | 2 +-
 kernel/cgroup.c        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6e..3cb7d04308cd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -75,7 +75,7 @@ struct cgroup_subsys_state {
 
 	unsigned long flags;
 	/* ID for this css, if possible */
-	struct css_id *id;
+	struct css_id __rcu *id;
 };
 
 /* bits in struct cgroup_subsys_state flags field */
@@ -205,7 +205,7 @@ struct cgroup {
 	struct list_head children;	/* my children */
 
 	struct cgroup *parent;		/* my parent */
-	struct dentry *dentry;	  	/* cgroup fs entry, RCU protected */
+	struct dentry __rcu *dentry;	/* cgroup fs entry, RCU protected */
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..bbffd087476c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1418,7 +1418,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_CGROUPS
 	/* Control Group info protected by css_set_lock */
-	struct css_set *cgroups;
+	struct css_set __rcu *cgroups;
 	/* cg_list protected by css_set_lock and tsk->alloc_lock */
 	struct list_head cg_list;
 #endif
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c5b0f9..e5c5497a7dca 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -138,7 +138,7 @@ struct css_id {
 	 * is called after synchronize_rcu(). But for safe use, css_is_removed()
 	 * css_tryget() should be used for avoiding race.
 	 */
-	struct cgroup_subsys_state *css;
+	struct cgroup_subsys_state __rcu *css;
 	/*
 	 * ID of this css.
 	 */
-- 
cgit v1.2.3


From 1b0ba1c9037b2265d6e5d0165d31e4c0269b603b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Feb 2010 19:45:09 +0100
Subject: credentials: rcu annotation

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bbffd087476c..2c756666c111 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1288,9 +1288,9 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
-	const struct cred *real_cred;	/* objective and real subjective task
+	const struct cred __rcu *real_cred; /* objective and real subjective task
 					 * credentials (COW) */
-	const struct cred *cred;	/* effective (overridable) subjective task
+	const struct cred __rcu *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
-- 
cgit v1.2.3


From e63ba744a64d234c8a07c469ab1806443cb0a6ff Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Feb 2010 18:01:20 +0100
Subject: keys: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: David Howells <dhowells@redhat.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/cred.h | 2 +-
 include/linux/key.h  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4d2c39573f36..4aaeab376446 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,7 +84,7 @@ struct thread_group_cred {
 	atomic_t	usage;
 	pid_t		tgid;			/* thread group process ID */
 	spinlock_t	lock;
-	struct key	*session_keyring;	/* keyring inherited over fork */
+	struct key __rcu *session_keyring;	/* keyring inherited over fork */
 	struct key	*process_keyring;	/* keyring private to this process */
 	struct rcu_head	rcu;			/* RCU deletion hook */
 };
diff --git a/include/linux/key.h b/include/linux/key.h
index cd50dfa1d4c2..3db0adce1fda 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -178,8 +178,9 @@ struct key {
 	 */
 	union {
 		unsigned long		value;
+		void __rcu		*rcudata;
 		void			*data;
-		struct keyring_list	*subscriptions;
+		struct keyring_list __rcu *subscriptions;
 	} payload;
 };
 
-- 
cgit v1.2.3


From 5b22216e11f717adc344abc7f97b42e03127c6c0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 3 Mar 2010 10:20:10 +0100
Subject: nfs: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h          | 2 +-
 include/linux/sunrpc/auth_gss.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 508f8cf6da37..d0edf7d823ae 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,7 +185,7 @@ struct nfs_inode {
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
 	struct list_head	open_states;
-	struct nfs_delegation	*delegation;
+	struct nfs_delegation __rcu *delegation;
 	fmode_t			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 671538d25bc1..8eee9dbbfe7a 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
 	enum rpc_gss_proc	gc_proc;
 	u32			gc_seq;
 	spinlock_t		gc_seq_lock;
-	struct gss_ctx		*gc_gss_ctx;
+	struct gss_ctx __rcu	*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
 	u32			gc_win;
 	unsigned long		gc_expiry;
@@ -80,7 +80,7 @@ struct gss_upcall_msg;
 struct gss_cred {
 	struct rpc_cred		gc_base;
 	enum rpc_gss_svc	gc_service;
-	struct gss_cl_ctx	*gc_ctx;
+	struct gss_cl_ctx __rcu	*gc_ctx;
 	struct gss_upcall_msg	*gc_upcall;
 	unsigned long		gc_upcall_timestamp;
 	unsigned char		gc_machine_cred : 1;
-- 
cgit v1.2.3


From 2be85279281bafe7de808ca99de59af4fd474c49 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Mar 2010 15:50:28 +0100
Subject: input: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 drivers/input/evdev.c | 2 +-
 include/linux/input.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c908c5f83645..5808731f72d2 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -28,7 +28,7 @@ struct evdev {
 	int minor;
 	struct input_handle handle;
 	wait_queue_head_t wait;
-	struct evdev_client *grab;
+	struct evdev_client __rcu *grab;
 	struct list_head client_list;
 	spinlock_t client_lock; /* protects client_list */
 	struct mutex mutex;
diff --git a/include/linux/input.h b/include/linux/input.h
index 896a92227bc4..d6ae1761be97 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1196,7 +1196,7 @@ struct input_dev {
 	int (*flush)(struct input_dev *dev, struct file *file);
 	int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value);
 
-	struct input_handle *grab;
+	struct input_handle __rcu *grab;
 
 	spinlock_t event_lock;
 	struct mutex mutex;
-- 
cgit v1.2.3


From 0906a372f2aa0fec1e59bd12b896883b6e41307a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@relay.de.ibm.com>
Date: Tue, 9 Mar 2010 20:59:15 +0100
Subject: net/netfilter: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 net/ipv4/netfilter/nf_nat_core.c     | 2 +-
 net/netfilter/core.c                 | 2 +-
 net/netfilter/nf_conntrack_ecache.c  | 4 ++--
 net/netfilter/nf_conntrack_extend.c  | 2 +-
 net/netfilter/nf_conntrack_proto.c   | 4 ++--
 net/netfilter/nf_log.c               | 2 +-
 net/netfilter/nf_queue.c             | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae54fa4..caf17db87dbc 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -75,7 +75,7 @@ struct nf_conntrack_helper;
 /* nf_conn feature for connections that have a helper */
 struct nf_conn_help {
 	/* Helper. if any */
-	struct nf_conntrack_helper *helper;
+	struct nf_conntrack_helper __rcu *helper;
 
 	union nf_conntrack_help help;
 
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 8c8632d9b93c..957c9241fb0c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
 #define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
+static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
 
 static inline const struct nf_nat_protocol *
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 78b505d33bfb..fdaec7daff1d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -27,7 +27,7 @@
 
 static DEFINE_MUTEX(afinfo_mutex);
 
-const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
+const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
 EXPORT_SYMBOL(nf_afinfo);
 
 int nf_register_afinfo(const struct nf_afinfo *afinfo)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index cdcc7649476b..5702de35e2bb 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -26,10 +26,10 @@
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
 
-struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
 EXPORT_SYMBOL_GPL(nf_expect_event_cb);
 
 /* deliver cached events and clear cache entry - must be called with locally
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 7dcf7a404190..1d9bdae06161 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -16,7 +16,7 @@
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
-static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM];
+static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
 static DEFINE_MUTEX(nf_ct_ext_type_mutex);
 
 void __nf_ct_ext_destroy(struct nf_conn *ct)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 5886ba1d52a0..ed6d92958023 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 7df37fd786bc..b07393eab88e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,7 @@
 #define NF_LOG_PREFIXLEN		128
 #define NFLOGGER_NAME_LEN		64
 
-static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
+static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
 static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
 static DEFINE_MUTEX(nf_log_mutex);
 
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 78b3cf9c519c..74aebed5bd28 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -18,7 +18,7 @@
  * long term mutex.  The handler must provide an an outfn() to accept packets
  * for queueing and must reinject all packets it receives, no matter what.
  */
-static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
+static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
 
 static DEFINE_MUTEX(queue_handler_mutex);
 
-- 
cgit v1.2.3


From 4b6a2872a2a00042ee50024822ab706e5456aad8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Mar 2010 15:59:23 +0100
Subject: kvm: add __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c13cc48697aa..ac740b26eb10 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -205,7 +205,7 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct kvm_irq_routing_table *irq_routing;
+	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
 	struct hlist_head irq_ack_notifier_list;
 #endif
-- 
cgit v1.2.3


From 4221a9918e38b7494cee341dda7b7b4bb8c04bde Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 Jun 2010 01:08:19 +0900
Subject: Add RCU check for find_task_by_vpid().

find_task_by_vpid() says "Must be called under rcu_read_lock().". But due to
commit 3120438 "rcu: Disable lockdep checking in RCU list-traversal primitives",
we are currently unable to catch "find_task_by_vpid() with tasklist_lock held
but RCU lock not held" errors due to the RCU-lockdep checks being
suppressed in the RCU variants of the struct list_head traversals.
This commit therefore places an explicit check for being in an RCU
read-side critical section in find_task_by_pid_ns().

  ===================================================
  [ INFO: suspicious rcu_dereference_check() usage. ]
  ---------------------------------------------------
  kernel/pid.c:386 invoked rcu_dereference_check() without protection!

  other info that might help us debug this:

  rcu_scheduler_active = 1, debug_locks = 1
  1 lock held by rc.sysinit/1102:
   #0:  (tasklist_lock){.+.+..}, at: [<c1048340>] sys_setpgid+0x40/0x160

  stack backtrace:
  Pid: 1102, comm: rc.sysinit Not tainted 2.6.35-rc3-dirty #1
  Call Trace:
   [<c105e714>] lockdep_rcu_dereference+0x94/0xb0
   [<c104b4cd>] find_task_by_pid_ns+0x6d/0x70
   [<c104b4e8>] find_task_by_vpid+0x18/0x20
   [<c1048347>] sys_setpgid+0x47/0x160
   [<c1002b50>] sysenter_do_call+0x12/0x36

Commit updated to use a new rcu_lockdep_assert() exported API rather than
the old internal __do_rcu_dereference().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 14 +++++++++-----
 kernel/pid.c             |  1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b973dea2d6b0..b124bc6a75ad 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -215,7 +215,11 @@ static inline int rcu_read_lock_sched_held(void)
 
 extern int rcu_my_thread_group_empty(void);
 
-#define __do_rcu_dereference_check(c)					\
+/**
+ * rcu_lockdep_assert - emit lockdep splat if specified condition not met
+ * @c: condition to check
+ */
+#define rcu_lockdep_assert(c)						\
 	do {								\
 		static bool __warned;					\
 		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\
@@ -226,7 +230,7 @@ extern int rcu_my_thread_group_empty(void);
 
 #else /* #ifdef CONFIG_PROVE_RCU */
 
-#define __do_rcu_dereference_check(c) do { } while (0)
+#define rcu_lockdep_assert(c) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
@@ -247,14 +251,14 @@ extern int rcu_my_thread_group_empty(void);
 #define __rcu_dereference_check(p, c, space) \
 	({ \
 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
-		__do_rcu_dereference_check(c); \
+		rcu_lockdep_assert(c); \
 		(void) (((typeof (*p) space *)p) == p); \
 		smp_read_barrier_depends(); \
 		((typeof(*p) __force __kernel *)(_________p1)); \
 	})
 #define __rcu_dereference_protected(p, c, space) \
 	({ \
-		__do_rcu_dereference_check(c); \
+		rcu_lockdep_assert(c); \
 		(void) (((typeof (*p) space *)p) == p); \
 		((typeof(*p) __force __kernel *)(p)); \
 	})
@@ -262,7 +266,7 @@ extern int rcu_my_thread_group_empty(void);
 #define __rcu_dereference_index_check(p, c) \
 	({ \
 		typeof(p) _________p1 = ACCESS_ONCE(p); \
-		__do_rcu_dereference_check(c); \
+		rcu_lockdep_assert(c); \
 		smp_read_barrier_depends(); \
 		(_________p1); \
 	})
diff --git a/kernel/pid.c b/kernel/pid.c
index 0f90c2f713f1..39b65b69584f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task);
  */
 struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
 {
+	rcu_lockdep_assert(rcu_read_lock_held());
 	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
 }
 
-- 
cgit v1.2.3


From 77d8485a8b5416c615b6acd95f01bfcacd7d81ff Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 8 Jul 2010 17:38:59 -0700
Subject: rcu: improve kerneldoc for rcu_read_lock(), call_rcu(), and
 synchronize_rcu()

Make it explicit that new RCU read-side critical sections that start
after call_rcu() and synchronize_rcu() start might still be running
after the end of the relevant grace period.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 16 +++++++++-------
 kernel/rcutree_plugin.h  |  8 +++++---
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b124bc6a75ad..3e1b6625553b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -450,7 +450,7 @@ extern int rcu_my_thread_group_empty(void);
  * until after the all the other CPUs exit their critical sections.
  *
  * Note, however, that RCU callbacks are permitted to run concurrently
- * with RCU read-side critical sections.  One way that this can happen
+ * with new RCU read-side critical sections.  One way that this can happen
  * is via the following sequence of events: (1) CPU 0 enters an RCU
  * read-side critical section, (2) CPU 1 invokes call_rcu() to register
  * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
@@ -608,11 +608,13 @@ extern void wakeme_after_rcu(struct rcu_head  *head);
 /**
  * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
+ * @func: actual callback function to be invoked after the grace period
  *
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
  */
@@ -622,9 +624,9 @@ extern void call_rcu(struct rcu_head *head,
 /**
  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
+ * @func: actual callback function to be invoked after the grace period
  *
- * The update function will be invoked some time after a full grace
+ * The callback function will be invoked some time after a full grace
  * period elapses, in other words after all currently executing RCU
  * read-side critical sections have completed. call_rcu_bh() assumes
  * that the read-side critical sections end on completion of a softirq
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 9906f85c7780..63bb7714fdeb 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -546,9 +546,11 @@ EXPORT_SYMBOL_GPL(call_rcu);
  *
  * Control will return to the caller some time after a full grace
  * period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
+ * read-side critical sections have completed.  Note, however, that
+ * upon return from synchronize_rcu(), the caller might well be executing
+ * concurrently with new RCU read-side critical sections that began while
+ * synchronize_rcu() was waiting.  RCU read-side critical sections are
+ * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
  */
 void synchronize_rcu(void)
 {
-- 
cgit v1.2.3


From 374a8e0dc33c984fac284de7d57d77af3cfdbfb7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Feb 2010 20:00:13 +0100
Subject: notifiers: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/notifier.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b2f1a4d83550..2026f9e1ceb8 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -49,28 +49,28 @@
 
 struct notifier_block {
 	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
-	struct notifier_block *next;
+	struct notifier_block __rcu *next;
 	int priority;
 };
 
 struct atomic_notifier_head {
 	spinlock_t lock;
-	struct notifier_block *head;
+	struct notifier_block __rcu *head;
 };
 
 struct blocking_notifier_head {
 	struct rw_semaphore rwsem;
-	struct notifier_block *head;
+	struct notifier_block __rcu *head;
 };
 
 struct raw_notifier_head {
-	struct notifier_block *head;
+	struct notifier_block __rcu *head;
 };
 
 struct srcu_notifier_head {
 	struct mutex mutex;
 	struct srcu_struct srcu;
-	struct notifier_block *head;
+	struct notifier_block __rcu *head;
 };
 
 #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
-- 
cgit v1.2.3


From a1115570b31091f3e3ab9e6cf7ee8d320a42be84 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2010 23:43:52 +0100
Subject: radix-tree: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Nick Piggin <npiggin@suse.de>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/radix-tree.h | 4 +++-
 lib/radix-tree.c           | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 634b8e674ac5..a39cbed9ee17 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr)
 {
 	return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
 }
+#define radix_tree_indirect_to_ptr(ptr) \
+	radix_tree_indirect_to_ptr((void __force *)(ptr))
 
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
@@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
 struct radix_tree_root {
 	unsigned int		height;
 	gfp_t			gfp_mask;
-	struct radix_tree_node	*rnode;
+	struct radix_tree_node	__rcu *rnode;
 };
 
 #define RADIX_TREE_INIT(mask)	{					\
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e907858498a6..899fb750946f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -49,7 +49,7 @@ struct radix_tree_node {
 	unsigned int	height;		/* Height from the bottom */
 	unsigned int	count;
 	struct rcu_head	rcu_head;
-	void		*slots[RADIX_TREE_MAP_SIZE];
+	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-- 
cgit v1.2.3


From d2c2486bc8e185548490e8edbc84d185de9eaff1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Feb 2010 14:53:26 +0100
Subject: idr: __rcu annotations

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/idr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e968db71e33a..cdb715e58e3e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -50,14 +50,14 @@
 
 struct idr_layer {
 	unsigned long		 bitmap; /* A zero bit means "space here" */
-	struct idr_layer	*ary[1<<IDR_BITS];
+	struct idr_layer __rcu	*ary[1<<IDR_BITS];
 	int			 count;	 /* When zero, we can release it */
 	int			 layer;	 /* distance from leaf */
 	struct rcu_head		 rcu_head;
 };
 
 struct idr {
-	struct idr_layer *top;
+	struct idr_layer __rcu *top;
 	struct idr_layer *id_free;
 	int		  layers; /* only valid without concurrent changes */
 	int		  id_free_cnt;
-- 
cgit v1.2.3


From 4d2deb40b20c2608486598364e63e37b09a9ac2f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Feb 2010 20:01:56 +0100
Subject: kernel: __rcu annotations

This adds annotations for RCU operations in core kernel components

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/fdtable.h   | 6 +++---
 include/linux/fs.h        | 2 +-
 include/linux/genhd.h     | 6 +++---
 include/linux/init_task.h | 4 ++--
 include/linux/iocontext.h | 2 +-
 include/linux/mm_types.h  | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index f59ed297b661..133c0ba25e30 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -31,7 +31,7 @@ struct embedded_fd_set {
 
 struct fdtable {
 	unsigned int max_fds;
-	struct file ** fd;      /* current fd array */
+	struct file __rcu **fd;      /* current fd array */
 	fd_set *close_on_exec;
 	fd_set *open_fds;
 	struct rcu_head rcu;
@@ -46,7 +46,7 @@ struct files_struct {
    * read mostly part
    */
 	atomic_t count;
-	struct fdtable *fdt;
+	struct fdtable __rcu *fdt;
 	struct fdtable fdtab;
   /*
    * written part on a separate cache line in SMP
@@ -55,7 +55,7 @@ struct files_struct {
 	int next_fd;
 	struct embedded_fd_set close_on_exec_init;
 	struct embedded_fd_set open_fds_init;
-	struct file * fd_array[NR_OPEN_DEFAULT];
+	struct file __rcu * fd_array[NR_OPEN_DEFAULT];
 };
 
 #define rcu_dereference_check_fdtable(files, fdtfd) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..aa3dc8d20436 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1380,7 +1380,7 @@ struct super_block {
 	 * Saved mount options for lazy filesystems using
 	 * generic_show_options()
 	 */
-	char *s_options;
+	char __rcu *s_options;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5f2f4c4d8fb0..af3f06b41dc1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter {
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
-	struct hd_struct *last_lookup;
-	struct hd_struct *part[];
+	struct hd_struct __rcu *last_lookup;
+	struct hd_struct __rcu *part[];
 };
 
 struct gendisk {
@@ -149,7 +149,7 @@ struct gendisk {
 	 * non-critical accesses use RCU.  Always access through
 	 * helpers.
 	 */
-	struct disk_part_tbl *part_tbl;
+	struct disk_part_tbl __rcu *part_tbl;
 	struct hd_struct part0;
 
 	const struct block_device_operations *fops;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1f43fa56f600..6460fc65ed6b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -137,8 +137,8 @@ extern struct cred init_cred;
 	.children	= LIST_HEAD_INIT(tsk.children),			\
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
-	.real_cred	= &init_cred,					\
-	.cred		= &init_cred,					\
+	RCU_INIT_POINTER(.real_cred, &init_cred),			\
+	RCU_INIT_POINTER(.cred, &init_cred),				\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(tsk.cred_guard_mutex),		\
 	.comm		= "swapper",					\
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 64d529133031..3e70b21884a9 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -53,7 +53,7 @@ struct io_context {
 
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
-	void *ioc_data;
+	void __rcu *ioc_data;
 };
 
 static inline struct io_context *ioc_task_link(struct io_context *ioc)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b8bb9a6a1f37..05537a5eb855 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -299,7 +299,7 @@ struct mm_struct {
 	 * new_owner->mm == mm
 	 * new_owner->alloc_lock is held
 	 */
-	struct task_struct *owner;
+	struct task_struct __rcu *owner;
 #endif
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 5e8067adfdbaf97039a97540765b1e16eb8d61cc Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 17 Apr 2010 08:48:41 -0400
Subject: rcu head remove init

RCU heads really don't need to be initialized. Their state before call_rcu()
really does not matter.

We need to keep init/destroy_rcu_head_on_stack() though, since we want
debugobjects to be able to keep track of these objects.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: David S. Miller <davem@davemloft.net>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: akpm@linux-foundation.org
CC: mingo@elte.hu
CC: laijs@cn.fujitsu.com
CC: dipankar@in.ibm.com
CC: josh@joshtriplett.org
CC: dvhltc@us.ibm.com
CC: niv@us.ibm.com
CC: tglx@linutronix.de
CC: peterz@infradead.org
CC: rostedt@goodmis.org
CC: Valdis.Kletnieks@vt.edu
CC: dhowells@redhat.com
CC: eric.dumazet@gmail.com
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3e1b6625553b..27b44b3e3024 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -75,12 +75,6 @@ extern void rcu_init(void);
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
-#define RCU_HEAD_INIT	{ .next = NULL, .func = NULL }
-#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
-#define INIT_RCU_HEAD(ptr) do { \
-       (ptr)->next = NULL; (ptr)->func = NULL; \
-} while (0)
-
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
-- 
cgit v1.2.3


From 1495cc9df4e81f5a8fa9b0b8f1034b14d24b7d8c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:46 -0700
Subject: Input: sysrq - drop tty argument from sysrq ops handlers

Noone is using tty argument so let's get rid of it.

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/arm/kernel/etm.c           |  2 +-
 arch/powerpc/xmon/xmon.c        |  5 ++---
 arch/sparc/kernel/process_64.c  |  2 +-
 drivers/char/sysrq.c            | 42 ++++++++++++++++++++---------------------
 drivers/gpu/drm/drm_fb_helper.c |  2 +-
 drivers/net/ibm_newemac/debug.c |  2 +-
 include/linux/sysrq.h           |  6 +++++-
 kernel/debug/debug_core.c       |  2 +-
 kernel/power/poweroff.c         |  2 +-
 9 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c
index 56418f98cd01..33c7077174db 100644
--- a/arch/arm/kernel/etm.c
+++ b/arch/arm/kernel/etm.c
@@ -230,7 +230,7 @@ static void etm_dump(void)
 	etb_lock(t);
 }
 
-static void sysrq_etm_dump(int key, struct tty_struct *tty)
+static void sysrq_etm_dump(int key)
 {
 	dev_dbg(tracer.dev, "Dumping ETB buffer\n");
 	etm_dump();
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0554445200bf..d17d04cfb2cd 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2880,15 +2880,14 @@ static void xmon_init(int enable)
 }
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_xmon(int key, struct tty_struct *tty) 
+static void sysrq_handle_xmon(int key)
 {
 	/* ensure xmon is enabled */
 	xmon_init(1);
 	debugger(get_irq_regs());
 }
 
-static struct sysrq_key_op sysrq_xmon_op = 
-{
+static struct sysrq_key_op sysrq_xmon_op = {
 	.handler =	sysrq_handle_xmon,
 	.help_msg =	"Xmon",
 	.action_msg =	"Entering xmon",
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dbe81a368b45..25b01b43b40d 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -303,7 +303,7 @@ void arch_trigger_all_cpu_backtrace(void)
 
 #ifdef CONFIG_MAGIC_SYSRQ
 
-static void sysrq_handle_globreg(int key, struct tty_struct *tty)
+static void sysrq_handle_globreg(int key)
 {
 	arch_trigger_all_cpu_backtrace();
 }
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 878ac0c2cc68..a892a3c249dd 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -76,7 +76,7 @@ static int __init sysrq_always_enabled_setup(char *str)
 __setup("sysrq_always_enabled", sysrq_always_enabled_setup);
 
 
-static void sysrq_handle_loglevel(int key, struct tty_struct *tty)
+static void sysrq_handle_loglevel(int key)
 {
 	int i;
 
@@ -93,7 +93,7 @@ static struct sysrq_key_op sysrq_loglevel_op = {
 };
 
 #ifdef CONFIG_VT
-static void sysrq_handle_SAK(int key, struct tty_struct *tty)
+static void sysrq_handle_SAK(int key)
 {
 	struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
 	schedule_work(SAK_work);
@@ -109,7 +109,7 @@ static struct sysrq_key_op sysrq_SAK_op = {
 #endif
 
 #ifdef CONFIG_VT
-static void sysrq_handle_unraw(int key, struct tty_struct *tty)
+static void sysrq_handle_unraw(int key)
 {
 	struct kbd_struct *kbd = &kbd_table[fg_console];
 
@@ -126,7 +126,7 @@ static struct sysrq_key_op sysrq_unraw_op = {
 #define sysrq_unraw_op (*(struct sysrq_key_op *)NULL)
 #endif /* CONFIG_VT */
 
-static void sysrq_handle_crash(int key, struct tty_struct *tty)
+static void sysrq_handle_crash(int key)
 {
 	char *killer = NULL;
 
@@ -141,7 +141,7 @@ static struct sysrq_key_op sysrq_crash_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_reboot(int key, struct tty_struct *tty)
+static void sysrq_handle_reboot(int key)
 {
 	lockdep_off();
 	local_irq_enable();
@@ -154,7 +154,7 @@ static struct sysrq_key_op sysrq_reboot_op = {
 	.enable_mask	= SYSRQ_ENABLE_BOOT,
 };
 
-static void sysrq_handle_sync(int key, struct tty_struct *tty)
+static void sysrq_handle_sync(int key)
 {
 	emergency_sync();
 }
@@ -165,7 +165,7 @@ static struct sysrq_key_op sysrq_sync_op = {
 	.enable_mask	= SYSRQ_ENABLE_SYNC,
 };
 
-static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
+static void sysrq_handle_show_timers(int key)
 {
 	sysrq_timer_list_show();
 }
@@ -176,7 +176,7 @@ static struct sysrq_key_op sysrq_show_timers_op = {
 	.action_msg	= "Show clockevent devices & pending hrtimers (no others)",
 };
 
-static void sysrq_handle_mountro(int key, struct tty_struct *tty)
+static void sysrq_handle_mountro(int key)
 {
 	emergency_remount();
 }
@@ -188,7 +188,7 @@ static struct sysrq_key_op sysrq_mountro_op = {
 };
 
 #ifdef CONFIG_LOCKDEP
-static void sysrq_handle_showlocks(int key, struct tty_struct *tty)
+static void sysrq_handle_showlocks(int key)
 {
 	debug_show_all_locks();
 }
@@ -226,7 +226,7 @@ static void sysrq_showregs_othercpus(struct work_struct *dummy)
 
 static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus);
 
-static void sysrq_handle_showallcpus(int key, struct tty_struct *tty)
+static void sysrq_handle_showallcpus(int key)
 {
 	/*
 	 * Fall back to the workqueue based printing if the
@@ -252,7 +252,7 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
 };
 #endif
 
-static void sysrq_handle_showregs(int key, struct tty_struct *tty)
+static void sysrq_handle_showregs(int key)
 {
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
@@ -266,7 +266,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_showstate(int key, struct tty_struct *tty)
+static void sysrq_handle_showstate(int key)
 {
 	show_state();
 }
@@ -277,7 +277,7 @@ static struct sysrq_key_op sysrq_showstate_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 
-static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty)
+static void sysrq_handle_showstate_blocked(int key)
 {
 	show_state_filter(TASK_UNINTERRUPTIBLE);
 }
@@ -291,7 +291,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
 #ifdef CONFIG_TRACING
 #include <linux/ftrace.h>
 
-static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
+static void sysrq_ftrace_dump(int key)
 {
 	ftrace_dump(DUMP_ALL);
 }
@@ -305,7 +305,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
 #define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL)
 #endif
 
-static void sysrq_handle_showmem(int key, struct tty_struct *tty)
+static void sysrq_handle_showmem(int key)
 {
 	show_mem();
 }
@@ -330,7 +330,7 @@ static void send_sig_all(int sig)
 	}
 }
 
-static void sysrq_handle_term(int key, struct tty_struct *tty)
+static void sysrq_handle_term(int key)
 {
 	send_sig_all(SIGTERM);
 	console_loglevel = 8;
@@ -349,7 +349,7 @@ static void moom_callback(struct work_struct *ignored)
 
 static DECLARE_WORK(moom_work, moom_callback);
 
-static void sysrq_handle_moom(int key, struct tty_struct *tty)
+static void sysrq_handle_moom(int key)
 {
 	schedule_work(&moom_work);
 }
@@ -361,7 +361,7 @@ static struct sysrq_key_op sysrq_moom_op = {
 };
 
 #ifdef CONFIG_BLOCK
-static void sysrq_handle_thaw(int key, struct tty_struct *tty)
+static void sysrq_handle_thaw(int key)
 {
 	emergency_thaw_all();
 }
@@ -373,7 +373,7 @@ static struct sysrq_key_op sysrq_thaw_op = {
 };
 #endif
 
-static void sysrq_handle_kill(int key, struct tty_struct *tty)
+static void sysrq_handle_kill(int key)
 {
 	send_sig_all(SIGKILL);
 	console_loglevel = 8;
@@ -385,7 +385,7 @@ static struct sysrq_key_op sysrq_kill_op = {
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
-static void sysrq_handle_unrt(int key, struct tty_struct *tty)
+static void sysrq_handle_unrt(int key)
 {
 	normalize_rt_tasks();
 }
@@ -520,7 +520,7 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			printk("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
-			op_p->handler(key, tty);
+			op_p->handler(key);
 		} else {
 			printk("This sysrq operation is disabled.\n");
 		}
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index de82e201d682..5efd6d6742ec 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -369,7 +369,7 @@ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored)
 }
 static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn);
 
-static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3)
+static void drm_fb_helper_sysrq(int dummy1)
 {
 	schedule_work(&drm_fb_helper_restore_work);
 }
diff --git a/drivers/net/ibm_newemac/debug.c b/drivers/net/ibm_newemac/debug.c
index 3995fafc1e08..8c6c1e2a8750 100644
--- a/drivers/net/ibm_newemac/debug.c
+++ b/drivers/net/ibm_newemac/debug.c
@@ -238,7 +238,7 @@ void emac_dbg_dump_all(void)
 }
 
 #if defined(CONFIG_MAGIC_SYSRQ)
-static void emac_sysrq_handler(int key, struct tty_struct *tty)
+static void emac_sysrq_handler(int key)
 {
 	emac_dbg_dump_all();
 }
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 609e8ca5f534..4ee650315119 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -31,7 +31,7 @@ struct tty_struct;
 #define SYSRQ_ENABLE_RTNICE	0x0100
 
 struct sysrq_key_op {
-	void (*handler)(int, struct tty_struct *);
+	void (*handler)(int);
 	char *help_msg;
 	char *action_msg;
 	int enable_mask;
@@ -58,6 +58,10 @@ static inline void handle_sysrq(int key, struct tty_struct *tty)
 {
 }
 
+static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+{
+}
+
 static inline int register_sysrq_key(int key, struct sysrq_key_op *op)
 {
 	return -EINVAL;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3c2d4972d235..de407c78178d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -741,7 +741,7 @@ static struct console kgdbcons = {
 };
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_dbg(int key, struct tty_struct *tty)
+static void sysrq_handle_dbg(int key)
 {
 	if (!dbg_io_ops) {
 		printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index e8b337006276..d52359374e85 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -24,7 +24,7 @@ static void do_poweroff(struct work_struct *dummy)
 
 static DECLARE_WORK(poweroff_work, do_poweroff);
 
-static void handle_poweroff(int key, struct tty_struct *tty)
+static void handle_poweroff(int key)
 {
 	/* run sysrq poweroff on boot cpu */
 	schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
-- 
cgit v1.2.3


From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001
From: Grégoire Baron <baronchon@n7mm.org>
Date: Wed, 18 Aug 2010 13:10:35 +0000
Subject: net/sched: add ACT_CSUM action to update packets checksums

net/sched: add ACT_CSUM action to update packets checksums

ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some
altered checksums in IPv4 and IPv6 packets. The following checksums are
supported by this patch:
 - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite
 - IPv6: ICMPv6, TCP, UDP & UDPLite
It's possible to request in the same action to update different kind of
checksums, if the packets flow mix TCP, UDP and UDPLite, ...

An example of usage is done in the associated iproute2 patch.

Version 3 changes:
 - remove useless goto instructions
 - improve IPv6 hop options decoding

Version 2 changes:
 - coding style correction
 - remove useless arguments of some functions
 - use stack in tcf_csum_dump()
 - add tcf_csum_skb_nextlayer() to factor code

Signed-off-by: Gregoire Baron <baronchon@n7mm.org>
Acked-by: jamal <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tc_act/Kbuild    |   1 +
 include/linux/tc_act/tc_csum.h |  32 +++
 include/net/tc_act/tc_csum.h   |  15 ++
 net/sched/Kconfig              |  10 +
 net/sched/Makefile             |   1 +
 net/sched/act_csum.c           | 595 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 654 insertions(+)
 create mode 100644 include/linux/tc_act/tc_csum.h
 create mode 100644 include/net/tc_act/tc_csum.h
 create mode 100644 net/sched/act_csum.c

(limited to 'include')

diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
index 76990937f4c9..67b501c302b2 100644
--- a/include/linux/tc_act/Kbuild
+++ b/include/linux/tc_act/Kbuild
@@ -4,3 +4,4 @@ header-y += tc_mirred.h
 header-y += tc_pedit.h
 header-y += tc_nat.h
 header-y += tc_skbedit.h
+header-y += tc_csum.h
diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h
new file mode 100644
index 000000000000..a047c49a3153
--- /dev/null
+++ b/include/linux/tc_act/tc_csum.h
@@ -0,0 +1,32 @@
+#ifndef __LINUX_TC_CSUM_H
+#define __LINUX_TC_CSUM_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CSUM 16
+
+enum {
+	TCA_CSUM_UNSPEC,
+	TCA_CSUM_PARMS,
+	TCA_CSUM_TM,
+	__TCA_CSUM_MAX
+};
+#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1)
+
+enum {
+	TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1,
+	TCA_CSUM_UPDATE_FLAG_ICMP    = 2,
+	TCA_CSUM_UPDATE_FLAG_IGMP    = 4,
+	TCA_CSUM_UPDATE_FLAG_TCP     = 8,
+	TCA_CSUM_UPDATE_FLAG_UDP     = 16,
+	TCA_CSUM_UPDATE_FLAG_UDPLITE = 32
+};
+
+struct tc_csum {
+	tc_gen;
+
+	__u32 update_flags;
+};
+
+#endif /* __LINUX_TC_CSUM_H */
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
new file mode 100644
index 000000000000..9e8710be7a04
--- /dev/null
+++ b/include/net/tc_act/tc_csum.h
@@ -0,0 +1,15 @@
+#ifndef __NET_TC_CSUM_H
+#define __NET_TC_CSUM_H
+
+#include <linux/types.h>
+#include <net/act_api.h>
+
+struct tcf_csum {
+	struct tcf_common common;
+
+	u32 update_flags;
+};
+#define to_tcf_csum(pc) \
+	container_of(pc,struct tcf_csum,common)
+
+#endif /* __NET_TC_CSUM_H */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..522d5a9a2825 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
 	  To compile this code as a module, choose M here: the
 	  module will be called act_skbedit.
 
+config NET_ACT_CSUM
+        tristate "Checksum Updating"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to update some common checksum after some direct
+	  packet alterations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_csum.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
+obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..58d7f36949da
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
+/*
+ * Checksum updating actions
+ *
+ * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <linux/skbuff.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/igmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#include <net/act_api.h>
+
+#include <linux/tc_act/tc_csum.h>
+#include <net/tc_act/tc_csum.h>
+
+#define CSUM_TAB_MASK 15
+static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
+static u32 csum_idx_gen;
+static DEFINE_RWLOCK(csum_lock);
+
+static struct tcf_hashinfo csum_hash_info = {
+	.htab	=	tcf_csum_ht,
+	.hmask	=	CSUM_TAB_MASK,
+	.lock	=	&csum_lock,
+};
+
+static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
+	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
+};
+
+static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_CSUM_MAX + 1];
+	struct tc_csum *parm;
+	struct tcf_common *pc;
+	struct tcf_csum *p;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CSUM_PARMS] == NULL)
+		return -EINVAL;
+	parm = nla_data(tb[TCA_CSUM_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &csum_idx_gen, &csum_hash_info);
+		if (IS_ERR(pc))
+			return PTR_ERR(pc);
+		p = to_tcf_csum(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		p = to_tcf_csum(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &csum_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->tcf_lock);
+	p->tcf_action = parm->action;
+	p->update_flags = parm->update_flags;
+	spin_unlock_bh(&p->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &csum_hash_info);
+
+	return ret;
+}
+
+static int tcf_csum_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_csum *p = a->priv;
+	return tcf_hash_release(&p->common, bind, &csum_hash_info);
+}
+
+/**
+ * tcf_csum_skb_nextlayer - Get next layer pointer
+ * @skb: sk_buff to use
+ * @ihl: previous summed headers length
+ * @ipl: complete packet length
+ * @jhl: next header length
+ *
+ * Check the expected next layer availability in the specified sk_buff.
+ * Return the next layer pointer if pass, NULL otherwise.
+ */
+static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
+				    unsigned int ihl, unsigned int ipl,
+				    unsigned int jhl)
+{
+	int ntkoff = skb_network_offset(skb);
+	int hl = ihl + jhl;
+
+	if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
+	    (skb_cloned(skb) &&
+	     !skb_clone_writable(skb, hl + ntkoff) &&
+	     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+		return NULL;
+	else
+		return (void *)(skb_network_header(skb) + ihl);
+}
+
+static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
+			      unsigned int ihl, unsigned int ipl)
+{
+	struct icmphdr *icmph;
+
+	icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
+	if (icmph == NULL)
+		return 0;
+
+	icmph->checksum = 0;
+	skb->csum = csum_partial(icmph, ipl - ihl, 0);
+	icmph->checksum = csum_fold(skb->csum);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 1;
+}
+
+static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
+			      unsigned int ihl, unsigned int ipl)
+{
+	struct igmphdr *igmph;
+
+	igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
+	if (igmph == NULL)
+		return 0;
+
+	igmph->csum = 0;
+	skb->csum = csum_partial(igmph, ipl - ihl, 0);
+	igmph->csum = csum_fold(skb->csum);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 1;
+}
+
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int ihl, unsigned int ipl)
+{
+	struct icmp6hdr *icmp6h;
+
+	icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
+	if (icmp6h == NULL)
+		return 0;
+
+	icmp6h->icmp6_cksum = 0;
+	skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
+	icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					      ipl - ihl, IPPROTO_ICMPV6,
+					      skb->csum);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 1;
+}
+
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+			     unsigned int ihl, unsigned int ipl)
+{
+	struct tcphdr *tcph;
+
+	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+	if (tcph == NULL)
+		return 0;
+
+	tcph->check = 0;
+	skb->csum = csum_partial(tcph, ipl - ihl, 0);
+	tcph->check = tcp_v4_check(ipl - ihl,
+				   iph->saddr, iph->daddr, skb->csum);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 1;
+}
+
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int ihl, unsigned int ipl)
+{
+	struct tcphdr *tcph;
+
+	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+	if (tcph == NULL)
+		return 0;
+
+	tcph->check = 0;
+	skb->csum = csum_partial(tcph, ipl - ihl, 0);
+	tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				      ipl - ihl, IPPROTO_TCP,
+				      skb->csum);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 1;
+}
+
+static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+			     unsigned int ihl, unsigned int ipl, int udplite)
+{
+	struct udphdr *udph;
+	u16 ul;
+
+	/* Support both UDP and UDPLITE checksum algorithms,
+	 * Don't use udph->len to get the real length without any protocol check,
+	 * UDPLITE uses udph->len for another thing,
+	 * Use iph->tot_len, or just ipl.
+	 */
+
+	udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+	if (udph == NULL)
+		return 0;
+
+	ul = ntohs(udph->len);
+
+	if (udplite || udph->check) {
+
+		udph->check = 0;
+
+		if (udplite) {
+			if (ul == 0)
+				skb->csum = csum_partial(udph, ipl - ihl, 0);
+
+			else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+				skb->csum = csum_partial(udph, ul, 0);
+
+			else
+				goto ignore_obscure_skb;
+		} else {
+			if (ul != ipl - ihl)
+				goto ignore_obscure_skb;
+
+			skb->csum = csum_partial(udph, ul, 0);
+		}
+
+		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+						ul, iph->protocol,
+						skb->csum);
+
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+ignore_obscure_skb:
+	return 1;
+}
+
+static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int ihl, unsigned int ipl, int udplite)
+{
+	struct udphdr *udph;
+	u16 ul;
+
+	/* Support both UDP and UDPLITE checksum algorithms,
+	 * Don't use udph->len to get the real length without any protocol check,
+	 * UDPLITE uses udph->len for another thing,
+	 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
+	 */
+
+	udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+	if (udph == NULL)
+		return 0;
+
+	ul = ntohs(udph->len);
+
+	udph->check = 0;
+
+	if (udplite) {
+		if (ul == 0)
+			skb->csum = csum_partial(udph, ipl - ihl, 0);
+
+		else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+			skb->csum = csum_partial(udph, ul, 0);
+
+		else
+			goto ignore_obscure_skb;
+	} else {
+		if (ul != ipl - ihl)
+			goto ignore_obscure_skb;
+
+		skb->csum = csum_partial(udph, ul, 0);
+	}
+
+	udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
+				      udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
+				      skb->csum);
+
+	if (!udph->check)
+		udph->check = CSUM_MANGLED_0;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+ignore_obscure_skb:
+	return 1;
+}
+
+static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
+{
+	struct iphdr *iph;
+	int ntkoff;
+
+	ntkoff = skb_network_offset(skb);
+
+	if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
+		goto fail;
+
+	iph = ip_hdr(skb);
+
+	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
+	case IPPROTO_ICMP:
+		if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+			if (!tcf_csum_ipv4_icmp(skb,
+						iph->ihl * 4, ntohs(iph->tot_len)))
+				goto fail;
+		break;
+	case IPPROTO_IGMP:
+		if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
+			if (!tcf_csum_ipv4_igmp(skb,
+						iph->ihl * 4, ntohs(iph->tot_len)))
+				goto fail;
+		break;
+	case IPPROTO_TCP:
+		if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+			if (!tcf_csum_ipv4_tcp(skb, iph,
+					       iph->ihl * 4, ntohs(iph->tot_len)))
+				goto fail;
+		break;
+	case IPPROTO_UDP:
+		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+			if (!tcf_csum_ipv4_udp(skb, iph,
+					       iph->ihl * 4, ntohs(iph->tot_len), 0))
+				goto fail;
+		break;
+	case IPPROTO_UDPLITE:
+		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+			if (!tcf_csum_ipv4_udp(skb, iph,
+					       iph->ihl * 4, ntohs(iph->tot_len), 1))
+				goto fail;
+		break;
+	}
+
+	if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
+		if (skb_cloned(skb) &&
+		    !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
+		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			goto fail;
+
+		ip_send_check(iph);
+	}
+
+	return 1;
+
+fail:
+	return 0;
+}
+
+static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
+				  unsigned int ixhl, unsigned int *pl)
+{
+	int off, len, optlen;
+	unsigned char *xh = (void *)ip6xh;
+
+	off = sizeof(*ip6xh);
+	len = ixhl - off;
+
+	while (len > 1) {
+		switch (xh[off])
+		{
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		case IPV6_TLV_JUMBO:
+			optlen = xh[off + 1] + 2;
+			if (optlen != 6 || len < 6 || (off & 3) != 2)
+				/* wrong jumbo option length/alignment */
+				return 0;
+			*pl = ntohl(*(__be32 *)(xh + off + 2));
+			goto done;
+		default:
+			optlen = xh[off + 1] + 2;
+			if (optlen > len)
+				/* ignore obscure options */
+				goto done;
+			break;
+		}
+		off += optlen;
+		len -= optlen;
+	}
+
+done:
+	return 1;
+}
+
+static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
+{
+	struct ipv6hdr *ip6h;
+	struct ipv6_opt_hdr *ip6xh;
+	unsigned int hl, ixhl;
+	unsigned int pl;
+	int ntkoff;
+	u8 nexthdr;
+
+	ntkoff = skb_network_offset(skb);
+
+	hl = sizeof(*ip6h);
+
+	if (!pskb_may_pull(skb, hl + ntkoff))
+		goto fail;
+
+	ip6h = ipv6_hdr(skb);
+
+	pl = ntohs(ip6h->payload_len);
+	nexthdr = ip6h->nexthdr;
+
+	do {
+		switch (nexthdr) {
+		case NEXTHDR_FRAGMENT:
+			goto ignore_skb;
+		case NEXTHDR_ROUTING:
+		case NEXTHDR_HOP:
+		case NEXTHDR_DEST:
+			if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
+				goto fail;
+			ip6xh = (void *)(skb_network_header(skb) + hl);
+			ixhl = ipv6_optlen(ip6xh);
+			if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
+				goto fail;
+			if ((nexthdr == NEXTHDR_HOP) &&
+			    !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
+				goto fail;
+			nexthdr = ip6xh->nexthdr;
+			hl += ixhl;
+			break;
+		case IPPROTO_ICMPV6:
+			if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+				if (!tcf_csum_ipv6_icmp(skb, ip6h,
+							hl, pl + sizeof(*ip6h)))
+					goto fail;
+			goto done;
+		case IPPROTO_TCP:
+			if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+				if (!tcf_csum_ipv6_tcp(skb, ip6h,
+						       hl, pl + sizeof(*ip6h)))
+					goto fail;
+			goto done;
+		case IPPROTO_UDP:
+			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+				if (!tcf_csum_ipv6_udp(skb, ip6h,
+						       hl, pl + sizeof(*ip6h), 0))
+					goto fail;
+			goto done;
+		case IPPROTO_UDPLITE:
+			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+				if (!tcf_csum_ipv6_udp(skb, ip6h,
+						       hl, pl + sizeof(*ip6h), 1))
+					goto fail;
+			goto done;
+		default:
+			goto ignore_skb;
+		}
+	} while (pskb_may_pull(skb, hl + 1 + ntkoff));
+
+done:
+ignore_skb:
+	return 1;
+
+fail:
+	return 0;
+}
+
+static int tcf_csum(struct sk_buff *skb,
+		    struct tc_action *a, struct tcf_result *res)
+{
+	struct tcf_csum *p = a->priv;
+	int action;
+	u32 update_flags;
+
+	spin_lock(&p->tcf_lock);
+	p->tcf_tm.lastuse = jiffies;
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	p->tcf_bstats.packets++;
+	action = p->tcf_action;
+	update_flags = p->update_flags;
+	spin_unlock(&p->tcf_lock);
+
+	if (unlikely(action == TC_ACT_SHOT))
+		goto drop;
+
+	switch (skb->protocol) {
+	case cpu_to_be16(ETH_P_IP):
+		if (!tcf_csum_ipv4(skb, update_flags))
+			goto drop;
+		break;
+	case cpu_to_be16(ETH_P_IPV6):
+		if (!tcf_csum_ipv6(skb, update_flags))
+			goto drop;
+		break;
+	}
+
+	return action;
+
+drop:
+	spin_lock(&p->tcf_lock);
+	p->tcf_qstats.drops++;
+	spin_unlock(&p->tcf_lock);
+	return TC_ACT_SHOT;
+}
+
+static int tcf_csum_dump(struct sk_buff *skb,
+			 struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_csum *p = a->priv;
+	struct tc_csum opt = {
+		.update_flags = p->update_flags,
+
+		.index   = p->tcf_index,
+		.action  = p->tcf_action,
+		.refcnt  = p->tcf_refcnt - ref,
+		.bindcnt = p->tcf_bindcnt - bind,
+	};
+	struct tcf_t t;
+
+	NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+	NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_csum_ops = {
+	.kind		=	"csum",
+	.hinfo		=	&csum_hash_info,
+	.type		=	TCA_ACT_CSUM,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_csum,
+	.dump		=	tcf_csum_dump,
+	.cleanup	=	tcf_csum_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_csum_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_DESCRIPTION("Checksum updating actions");
+MODULE_LICENSE("GPL");
+
+static int __init csum_init_module(void)
+{
+	return tcf_register_action(&act_csum_ops);
+}
+
+static void __exit csum_cleanup_module(void)
+{
+	tcf_unregister_action(&act_csum_ops);
+}
+
+module_init(csum_init_module);
+module_exit(csum_cleanup_module);
-- 
cgit v1.2.3


From a57eb940d130477a799dfb24a570ee04979c0f7f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 29 Jun 2010 16:49:16 -0700
Subject: rcu: Add a TINY_PREEMPT_RCU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement a small-memory-footprint uniprocessor-only implementation of
preemptible RCU.  This implementation uses but a single blocked-tasks
list rather than the combinatorial number used per leaf rcu_node by
TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies
processing.  This version also takes advantage of uniprocessor execution
to accelerate grace periods in the case where there are no readers.

The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU.

This implementation is a step towards having RCU implementation driven
off of the SMP and PREEMPT kernel configuration variables, which can
happen once this implementation has accumulated sufficient experience.

Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as
suggested by Steve Rostedt in order to avoid the compiler-reordering
issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183).

As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte
savings compared to CONFIG_TREE_PREEMPT_RCU.  Of course, for non-real-time
workloads, CONFIG_TINY_RCU is even better.

	CONFIG_TREE_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   6170	    825	     28	   7023	   kernel/rcutree.o
				   ----
				   7026    Total

	CONFIG_TINY_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   2081	     81	      8	   2170	   kernel/rcutiny.o
				   ----
				   2183    Total

	CONFIG_TINY_RCU (non-preemptible)

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	    719	     25	      0	    744	   kernel/rcutiny.o
				    ---
				    757    Total

Requested-by: Loïc Minier <loic.minier@canonical.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/hardirq.h   |   2 +-
 include/linux/init_task.h |  10 +-
 include/linux/rcupdate.h  |   3 +-
 include/linux/rcutiny.h   | 126 +++++++---
 include/linux/rcutree.h   |   2 +
 include/linux/sched.h     |  10 +-
 init/Kconfig              |  16 +-
 kernel/Makefile           |   1 +
 kernel/rcutiny.c          |  33 ++-
 kernel/rcutiny_plugin.h   | 582 +++++++++++++++++++++++++++++++++++++++++++++-
 10 files changed, 717 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669dab..1f4517d55b19 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
 #endif
 
 #if defined(CONFIG_NO_HZ)
-#if defined(CONFIG_TINY_RCU)
+#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
 extern void rcu_enter_nohz(void);
 extern void rcu_exit_nohz(void);
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6460fc65ed6b..2fea6c8ef6ba 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -82,11 +82,17 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_FULL_SET
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
+#define INIT_TASK_RCU_TREE_PREEMPT()					\
+	.rcu_blocked_node = NULL,
+#else
+#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
+#endif
+#ifdef CONFIG_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
 	.rcu_read_lock_nesting = 0,					\
 	.rcu_read_unlock_special = 0,					\
-	.rcu_blocked_node = NULL,					\
-	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
+	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
+	INIT_TASK_RCU_TREE_PREEMPT()
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 27b44b3e3024..24b896649384 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,7 +58,6 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
-extern void rcu_barrier(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
 extern void synchronize_sched_expedited(void);
@@ -69,7 +68,7 @@ extern void rcu_init(void);
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
-#elif defined(CONFIG_TINY_RCU)
+#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
 #include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e2e893144a84..4cc5eba41616 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -29,66 +29,51 @@
 
 void rcu_sched_qs(int cpu);
 void rcu_bh_qs(int cpu);
-static inline void rcu_note_context_switch(int cpu)
-{
-	rcu_sched_qs(cpu);
-}
 
+#ifdef CONFIG_TINY_RCU
 #define __rcu_read_lock()	preempt_disable()
 #define __rcu_read_unlock()	preempt_enable()
+#else /* #ifdef CONFIG_TINY_RCU */
+void __rcu_read_lock(void);
+void __rcu_read_unlock(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
 #define __rcu_read_lock_bh()	local_bh_disable()
 #define __rcu_read_unlock_bh()	local_bh_enable()
-#define call_rcu_sched		call_rcu
+extern void call_rcu_sched(struct rcu_head *head,
+			   void (*func)(struct rcu_head *rcu));
 
 #define rcu_init_sched()	do { } while (0)
-extern void rcu_check_callbacks(int cpu, int user);
 
-static inline int rcu_needs_cpu(int cpu)
-{
-	return 0;
-}
+extern void synchronize_sched(void);
 
-/*
- * Return the number of grace periods.
- */
-static inline long rcu_batches_completed(void)
-{
-	return 0;
-}
+#ifdef CONFIG_TINY_RCU
 
-/*
- * Return the number of bottom-half grace periods.
- */
-static inline long rcu_batches_completed_bh(void)
-{
-	return 0;
-}
+#define call_rcu		call_rcu_sched
 
-static inline void rcu_force_quiescent_state(void)
+static inline void synchronize_rcu(void)
 {
+	synchronize_sched();
 }
 
-static inline void rcu_bh_force_quiescent_state(void)
+static inline void synchronize_rcu_expedited(void)
 {
+	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
 }
 
-static inline void rcu_sched_force_quiescent_state(void)
+static inline void rcu_barrier(void)
 {
+	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
 }
 
-extern void synchronize_sched(void);
+#else /* #ifdef CONFIG_TINY_RCU */
 
-static inline void synchronize_rcu(void)
-{
-	synchronize_sched();
-}
+void synchronize_rcu(void);
+void rcu_barrier(void);
+void synchronize_rcu_expedited(void);
 
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_sched();
-}
+#endif /* #else #ifdef CONFIG_TINY_RCU */
 
-static inline void synchronize_rcu_expedited(void)
+static inline void synchronize_rcu_bh(void)
 {
 	synchronize_sched();
 }
@@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void)
 
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
+#ifdef CONFIG_TINY_RCU
+
+static inline void rcu_preempt_note_context_switch(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_needs_cpu(int cpu)
+{
+	return 0;
+}
+
 static inline int rcu_preempt_depth(void)
 {
 	return 0;
 }
 
+#else /* #ifdef CONFIG_TINY_RCU */
+
+void rcu_preempt_note_context_switch(void);
+extern void exit_rcu(void);
+int rcu_preempt_needs_cpu(void);
+
+static inline int rcu_needs_cpu(int cpu)
+{
+	return rcu_preempt_needs_cpu();
+}
+
+/*
+ * Defined as macro as it is a very low level header
+ * included from areas that don't even know about current
+ * FIXME: combine with include/linux/rcutree.h into rcupdate.h.
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+static inline void rcu_note_context_switch(int cpu)
+{
+	rcu_sched_qs(cpu);
+	rcu_preempt_note_context_switch();
+}
+
+extern void rcu_check_callbacks(int cpu, int user);
+
+/*
+ * Return the number of grace periods.
+ */
+static inline long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods.
+ */
+static inline long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0ed1c056f29..c13b85dd22bc 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched_expedited();
 }
 
+extern void rcu_barrier(void);
+
 extern void rcu_check_callbacks(int cpu, int user);
 
 extern long rcu_batches_completed(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c756666c111..e18473f0eb78 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,13 @@ struct task_struct {
 	unsigned int policy;
 	cpumask_t cpus_allowed;
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
-	struct rcu_node *rcu_blocked_node;
 	struct list_head rcu_node_entry;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
@@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p)
 {
 	p->rcu_read_lock_nesting = 0;
 	p->rcu_read_unlock_special = 0;
+#ifdef CONFIG_TREE_PREEMPT_RCU
 	p->rcu_blocked_node = NULL;
+#endif
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
diff --git a/init/Kconfig b/init/Kconfig
index dbc08baad77e..a619a1ac7f4c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -348,7 +348,7 @@ config TREE_RCU
 	  smaller systems.
 
 config TREE_PREEMPT_RCU
-	bool "Preemptable tree-based hierarchical RCU"
+	bool "Preemptible tree-based hierarchical RCU"
 	depends on PREEMPT
 	help
 	  This option selects the RCU implementation that is
@@ -366,8 +366,22 @@ config TINY_RCU
 	  is not required.  This option greatly reduces the
 	  memory footprint of RCU.
 
+config TINY_PREEMPT_RCU
+	bool "Preemptible UP-only small-memory-footprint RCU"
+	depends on !SMP && PREEMPT
+	help
+	  This option selects the RCU implementation that is designed
+	  for real-time UP systems.  This option greatly reduces the
+	  memory footprint of RCU.
+
 endchoice
 
+config PREEMPT_RCU
+	def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+	help
+	  This option enables preemptible-RCU code that is common between
+	  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on TREE_RCU || TREE_PREEMPT_RCU
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be0..17046b6e7c90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
 obj-$(CONFIG_TINY_RCU) += rcutiny.o
+obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02f8be0..d806735342ac 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/* Forward declarations for rcutiny_plugin.h. */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void __call_rcu(struct rcu_head *head,
+		       void (*func)(struct rcu_head *rcu),
+		       struct rcu_ctrlblk *rcp);
+
+#include "rcutiny_plugin.h"
+
 #ifdef CONFIG_NO_HZ
 
 static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
 		rcu_bh_qs(cpu);
+	rcu_preempt_check_callbacks();
 }
 
 /*
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	*rcp->donetail = NULL;
 	if (rcp->curtail == rcp->donetail)
 		rcp->curtail = &rcp->rcucblist;
+	rcu_preempt_remove_callbacks(rcp);
 	rcp->donetail = &rcp->rcucblist;
 	local_irq_restore(flags);
 
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 {
 	__rcu_process_callbacks(&rcu_sched_ctrlblk);
 	__rcu_process_callbacks(&rcu_bh_ctrlblk);
+	rcu_preempt_process_callbacks();
 }
 
 /*
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
 }
 
 /*
- * Post an RCU callback to be invoked after the end of an RCU grace
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_sched_ctrlblk);
 }
-EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_sched);
 
 /*
  * Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 
-void rcu_barrier(void)
-{
-	struct rcu_synchronize rcu;
-
-	init_rcu_head_on_stack(&rcu.head);
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-	destroy_rcu_head_on_stack(&rcu.head);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
 void rcu_barrier_bh(void)
 {
 	struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@ void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
-
-#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index d223a92bc742..e6bc1b447c6c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -1,7 +1,7 @@
 /*
- * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
  * Internal non-public definitions that provide either classic
- * or preemptable semantics.
+ * or preemptible semantics.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,11 +17,587 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
- * Copyright IBM Corporation, 2009
+ * Copyright (c) 2010 Linaro
  *
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
+#ifdef CONFIG_TINY_PREEMPT_RCU
+
+#include <linux/delay.h>
+
+/* FIXME: merge with definitions in kernel/rcutree.h. */
+#define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
+#define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
+
+/* Global control variables for preemptible RCU. */
+struct rcu_preempt_ctrlblk {
+	struct rcu_ctrlblk rcb;	/* curtail: ->next ptr of last CB for GP. */
+	struct rcu_head **nexttail;
+				/* Tasks blocked in a preemptible RCU */
+				/*  read-side critical section while an */
+				/*  preemptible-RCU grace period is in */
+				/*  progress must wait for a later grace */
+				/*  period.  This pointer points to the */
+				/*  ->next pointer of the last task that */
+				/*  must wait for a later grace period, or */
+				/*  to &->rcb.rcucblist if there is no */
+				/*  such task. */
+	struct list_head blkd_tasks;
+				/* Tasks blocked in RCU read-side critical */
+				/*  section.  Tasks are placed at the head */
+				/*  of this list and age towards the tail. */
+	struct list_head *gp_tasks;
+				/* Pointer to the first task blocking the */
+				/*  current grace period, or NULL if there */
+				/*  is not such task. */
+	struct list_head *exp_tasks;
+				/* Pointer to first task blocking the */
+				/*  current expedited grace period, or NULL */
+				/*  if there is no such task.  If there */
+				/*  is no current expedited grace period, */
+				/*  then there cannot be any such task. */
+	u8 gpnum;		/* Current grace period. */
+	u8 gpcpu;		/* Last grace period blocked by the CPU. */
+	u8 completed;		/* Last grace period completed. */
+				/*  If all three are equal, RCU is idle. */
+};
+
+static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
+	.rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+	.rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+	.nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+	.blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
+};
+
+static int rcu_preempted_readers_exp(void);
+static void rcu_report_exp_done(void);
+
+/*
+ * Return true if the CPU has not yet responded to the current grace period.
+ */
+static int rcu_cpu_cur_gp(void)
+{
+	return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
+}
+
+/*
+ * Check for a running RCU reader.  Because there is only one CPU,
+ * there can be but one running RCU reader at a time.  ;-)
+ */
+static int rcu_preempt_running_reader(void)
+{
+	return current->rcu_read_lock_nesting;
+}
+
+/*
+ * Check for preempted RCU readers blocking any grace period.
+ * If the caller needs a reliable answer, it must disable hard irqs.
+ */
+static int rcu_preempt_blocked_readers_any(void)
+{
+	return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
+}
+
+/*
+ * Check for preempted RCU readers blocking the current grace period.
+ * If the caller needs a reliable answer, it must disable hard irqs.
+ */
+static int rcu_preempt_blocked_readers_cgp(void)
+{
+	return rcu_preempt_ctrlblk.gp_tasks != NULL;
+}
+
+/*
+ * Return true if another preemptible-RCU grace period is needed.
+ */
+static int rcu_preempt_needs_another_gp(void)
+{
+	return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
+}
+
+/*
+ * Return true if a preemptible-RCU grace period is in progress.
+ * The caller must disable hardirqs.
+ */
+static int rcu_preempt_gp_in_progress(void)
+{
+	return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
+}
+
+/*
+ * Record a preemptible-RCU quiescent state for the specified CPU.  Note
+ * that this just means that the task currently running on the CPU is
+ * in a quiescent state.  There might be any number of tasks blocked
+ * while in an RCU read-side critical section.
+ *
+ * Unlike the other rcu_*_qs() functions, callers to this function
+ * must disable irqs in order to protect the assignment to
+ * ->rcu_read_unlock_special.
+ *
+ * Because this is a single-CPU implementation, the only way a grace
+ * period can end is if the CPU is in a quiescent state.  The reason is
+ * that a blocked preemptible-RCU reader can exit its critical section
+ * only if the CPU is running it at the time.  Therefore, when the
+ * last task blocking the current grace period exits its RCU read-side
+ * critical section, neither the CPU nor blocked tasks will be stopping
+ * the current grace period.  (In contrast, SMP implementations
+ * might have CPUs running in RCU read-side critical sections that
+ * block later grace periods -- but this is not possible given only
+ * one CPU.)
+ */
+static void rcu_preempt_cpu_qs(void)
+{
+	/* Record both CPU and task as having responded to current GP. */
+	rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
+	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+
+	/*
+	 * If there is no GP, or if blocked readers are still blocking GP,
+	 * then there is nothing more to do.
+	 */
+	if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
+		return;
+
+	/* Advance callbacks. */
+	rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
+	rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
+	rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
+
+	/* If there are no blocked readers, next GP is done instantly. */
+	if (!rcu_preempt_blocked_readers_any())
+		rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
+
+	/* If there are done callbacks, make RCU_SOFTIRQ process them. */
+	if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Start a new RCU grace period if warranted.  Hard irqs must be disabled.
+ */
+static void rcu_preempt_start_gp(void)
+{
+	if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
+
+		/* Official start of GP. */
+		rcu_preempt_ctrlblk.gpnum++;
+
+		/* Any blocked RCU readers block new GP. */
+		if (rcu_preempt_blocked_readers_any())
+			rcu_preempt_ctrlblk.gp_tasks =
+				rcu_preempt_ctrlblk.blkd_tasks.next;
+
+		/* If there is no running reader, CPU is done with GP. */
+		if (!rcu_preempt_running_reader())
+			rcu_preempt_cpu_qs();
+	}
+}
+
+/*
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the blkd_tasks list.
+ * If the task started after the current grace period began, as recorded
+ * by ->gpcpu, we enqueue at the beginning of the list.  Otherwise
+ * before the element referenced by ->gp_tasks (or at the tail if
+ * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
+ * The task will dequeue itself when it exits the outermost enclosing
+ * RCU read-side critical section.  Therefore, the current grace period
+ * cannot be permitted to complete until the ->gp_tasks pointer becomes
+ * NULL.
+ *
+ * Caller must disable preemption.
+ */
+void rcu_preempt_note_context_switch(void)
+{
+	struct task_struct *t = current;
+	unsigned long flags;
+
+	local_irq_save(flags); /* must exclude scheduler_tick(). */
+	if (rcu_preempt_running_reader() &&
+	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+
+		/* Possibly blocking in an RCU read-side critical section. */
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+
+		/*
+		 * If this CPU has already checked in, then this task
+		 * will hold up the next grace period rather than the
+		 * current grace period.  Queue the task accordingly.
+		 * If the task is queued for the current grace period
+		 * (i.e., this CPU has not yet passed through a quiescent
+		 * state for the current grace period), then as long
+		 * as that task remains queued, the current grace period
+		 * cannot end.
+		 */
+		list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
+		if (rcu_cpu_cur_gp())
+			rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
+	}
+
+	/*
+	 * Either we were not in an RCU read-side critical section to
+	 * begin with, or we have now recorded that critical section
+	 * globally.  Either way, we can now note a quiescent state
+	 * for this CPU.  Again, if we were in an RCU read-side critical
+	 * section, and if that critical section was blocking the current
+	 * grace period, then the fact that the task has been enqueued
+	 * means that current grace period continues to be blocked.
+	 */
+	rcu_preempt_cpu_qs();
+	local_irq_restore(flags);
+}
+
+/*
+ * Tiny-preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+	current->rcu_read_lock_nesting++;
+	barrier();  /* needed if we ever invoke rcu_read_lock in rcutiny.c */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+	int empty;
+	int empty_exp;
+	unsigned long flags;
+	struct list_head *np;
+	int special;
+
+	/*
+	 * NMI handlers cannot block and cannot safely manipulate state.
+	 * They therefore cannot possibly be special, so just leave.
+	 */
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+
+	/*
+	 * If RCU core is waiting for this CPU to exit critical section,
+	 * let it know that we have done so.
+	 */
+	special = t->rcu_read_unlock_special;
+	if (special & RCU_READ_UNLOCK_NEED_QS)
+		rcu_preempt_cpu_qs();
+
+	/* Hardware IRQ handlers cannot block. */
+	if (in_irq()) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	/* Clean up if blocked during RCU read-side critical section. */
+	if (special & RCU_READ_UNLOCK_BLOCKED) {
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+
+		/*
+		 * Remove this task from the ->blkd_tasks list and adjust
+		 * any pointers that might have been referencing it.
+		 */
+		empty = !rcu_preempt_blocked_readers_cgp();
+		empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
+		np = t->rcu_node_entry.next;
+		if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+			np = NULL;
+		list_del(&t->rcu_node_entry);
+		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
+			rcu_preempt_ctrlblk.gp_tasks = np;
+		if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
+			rcu_preempt_ctrlblk.exp_tasks = np;
+		INIT_LIST_HEAD(&t->rcu_node_entry);
+
+		/*
+		 * If this was the last task on the current list, and if
+		 * we aren't waiting on the CPU, report the quiescent state
+		 * and start a new grace period if needed.
+		 */
+		if (!empty && !rcu_preempt_blocked_readers_cgp()) {
+			rcu_preempt_cpu_qs();
+			rcu_preempt_start_gp();
+		}
+
+		/*
+		 * If this was the last task on the expedited lists,
+		 * then we need wake up the waiting task.
+		 */
+		if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
+			rcu_report_exp_done();
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Tiny-preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+	struct task_struct *t = current;
+
+	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
+	--t->rcu_read_lock_nesting;
+	barrier();  /* decrement before load of ->rcu_read_unlock_special */
+	if (t->rcu_read_lock_nesting == 0 &&
+	    unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+		rcu_read_unlock_special(t);
+#ifdef CONFIG_PROVE_LOCKING
+	WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
+ * Check for a quiescent state from the current CPU.  When a task blocks,
+ * the task is recorded in the rcu_preempt_ctrlblk structure, which is
+ * checked elsewhere.  This is called from the scheduling-clock interrupt.
+ *
+ * Caller must disable hard irqs.
+ */
+static void rcu_preempt_check_callbacks(void)
+{
+	struct task_struct *t = current;
+
+	if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress())
+		rcu_preempt_cpu_qs();
+	if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
+	    rcu_preempt_ctrlblk.rcb.donetail)
+		raise_softirq(RCU_SOFTIRQ);
+	if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader())
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+}
+
+/*
+ * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
+ * update, so this is invoked from __rcu_process_callbacks() to
+ * handle that case.  Of course, it is invoked for all flavors of
+ * RCU, but RCU callbacks can appear only on one of the lists, and
+ * neither ->nexttail nor ->donetail can possibly be NULL, so there
+ * is no need for an explicit check.
+ */
+static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
+{
+	if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
+		rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
+}
+
+/*
+ * Process callbacks for preemptible RCU.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+	__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+}
+
+/*
+ * Queue a preemptible -RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+
+	debug_rcu_head_queue(head);
+	head->func = func;
+	head->next = NULL;
+
+	local_irq_save(flags);
+	*rcu_preempt_ctrlblk.nexttail = head;
+	rcu_preempt_ctrlblk.nexttail = &head->next;
+	rcu_preempt_start_gp();  /* checks to see if GP needed. */
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+void rcu_barrier(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_rcu_head_on_stack(&rcu.head);
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_rcu(void)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	if (!rcu_scheduler_active)
+		return;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+	WARN_ON_ONCE(rcu_preempt_running_reader());
+	if (!rcu_preempt_blocked_readers_any())
+		return;
+
+	/* Once we get past the fastpath checks, same code as rcu_barrier(). */
+	rcu_barrier();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
+static unsigned long sync_rcu_preempt_exp_count;
+static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
+
+/*
+ * Return non-zero if there are any tasks in RCU read-side critical
+ * sections blocking the current preemptible-RCU expedited grace period.
+ * If there is no preemptible-RCU expedited grace period currently in
+ * progress, returns zero unconditionally.
+ */
+static int rcu_preempted_readers_exp(void)
+{
+	return rcu_preempt_ctrlblk.exp_tasks != NULL;
+}
+
+/*
+ * Report the exit from RCU read-side critical section for the last task
+ * that queued itself during or before the current expedited preemptible-RCU
+ * grace period.
+ */
+static void rcu_report_exp_done(void)
+{
+	wake_up(&sync_rcu_preempt_exp_wq);
+}
+
+/*
+ * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
+ * is to rely in the fact that there is but one CPU, and that it is
+ * illegal for a task to invoke synchronize_rcu_expedited() while in a
+ * preemptible-RCU read-side critical section.  Therefore, any such
+ * critical sections must correspond to blocked tasks, which must therefore
+ * be on the ->blkd_tasks list.  So just record the current head of the
+ * list in the ->exp_tasks pointer, and wait for all tasks including and
+ * after the task pointed to by ->exp_tasks to drain.
+ */
+void synchronize_rcu_expedited(void)
+{
+	unsigned long flags;
+	struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
+	unsigned long snap;
+
+	barrier(); /* ensure prior action seen before grace period. */
+
+	WARN_ON_ONCE(rcu_preempt_running_reader());
+
+	/*
+	 * Acquire lock so that there is only one preemptible RCU grace
+	 * period in flight.  Of course, if someone does the expedited
+	 * grace period for us while we are acquiring the lock, just leave.
+	 */
+	snap = sync_rcu_preempt_exp_count + 1;
+	mutex_lock(&sync_rcu_preempt_exp_mutex);
+	if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
+		goto unlock_mb_ret; /* Others did our work for us. */
+
+	local_irq_save(flags);
+
+	/*
+	 * All RCU readers have to already be on blkd_tasks because
+	 * we cannot legally be executing in an RCU read-side critical
+	 * section.
+	 */
+
+	/* Snapshot current head of ->blkd_tasks list. */
+	rpcp->exp_tasks = rpcp->blkd_tasks.next;
+	if (rpcp->exp_tasks == &rpcp->blkd_tasks)
+		rpcp->exp_tasks = NULL;
+	local_irq_restore(flags);
+
+	/* Wait for tail of ->blkd_tasks list to drain. */
+	if (rcu_preempted_readers_exp())
+		wait_event(sync_rcu_preempt_exp_wq,
+			   !rcu_preempted_readers_exp());
+
+	/* Clean up and exit. */
+	barrier(); /* ensure expedited GP seen before counter increment. */
+	sync_rcu_preempt_exp_count++;
+unlock_mb_ret:
+	mutex_unlock(&sync_rcu_preempt_exp_mutex);
+	barrier(); /* ensure subsequent action seen after grace period. */
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+/*
+ * Does preemptible RCU need the CPU to stay out of dynticks mode?
+ */
+int rcu_preempt_needs_cpu(void)
+{
+	if (!rcu_preempt_running_reader())
+		rcu_preempt_cpu_qs();
+	return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
+}
+
+/*
+ * Check for a task exiting while in a preemptible -RCU read-side
+ * critical section, clean up if so.  No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+	struct task_struct *t = current;
+
+	if (t->rcu_read_lock_nesting == 0)
+		return;
+	t->rcu_read_lock_nesting = 1;
+	rcu_read_unlock();
+}
+
+#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to check.
+ */
+static void rcu_preempt_check_callbacks(void)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to remove.
+ */
+static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to process.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 #include <linux/kernel_stat.h>
-- 
cgit v1.2.3


From 9079fd7c2e06a92cf27d05224a1f478581916c5b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Aug 2010 21:59:54 -0700
Subject: rcu: update obsolete rcu_read_lock() comment.

The comment says that blocking is illegal in rcu_read_lock()-style
RCU read-side critical sections, which is no longer entirely true
given preemptible RCU.  This commit provides a fix.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 24b896649384..d7af96ef6fcf 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -458,7 +458,20 @@ extern int rcu_my_thread_group_empty(void);
  * will be deferred until the outermost RCU read-side critical section
  * completes.
  *
- * It is illegal to block while in an RCU read-side critical section.
+ * You can avoid reading and understanding the next paragraph by
+ * following this rule: don't put anything in an rcu_read_lock() RCU
+ * read-side critical section that would block in a !PREEMPT kernel.
+ * But if you want the full story, read on!
+ *
+ * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
+ * is illegal to block while in an RCU read-side critical section.  In
+ * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
+ * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
+ * be preempted, but explicit blocking is illegal.  Finally, in preemptible
+ * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU read-side critical sections may be preempted and they may also
+ * block, but only when acquiring spinlocks that are subject to priority
+ * inheritance.
  */
 static inline void rcu_read_lock(void)
 {
-- 
cgit v1.2.3


From 53d84e004d5e8c018be395c4330dc72fd60bd13e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Aug 2010 14:28:53 -0700
Subject: rcu: permit suppressing current grace period's CPU stall warnings

When using a kernel debugger, a long sojourn in the debugger can get
you lots of RCU CPU stall warnings once you resume.  This might not be
helpful, especially if you are using the system console.  This patch
therefore allows RCU CPU stall warnings to be suppressed, but only for
the duration of the current set of grace periods.

This differs from Jason's original patch in that it adds support for
tiny RCU and preemptible RCU, and uses a slightly different method for
suppressing the RCU CPU stall warning messages.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/rcutiny.h |  4 ++++
 include/linux/rcutree.h |  1 +
 kernel/rcutree.c        | 20 ++++++++++++++++++++
 kernel/rcutree.h        |  1 +
 kernel/rcutree_plugin.h | 18 ++++++++++++++++++
 5 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4cc5eba41616..3fa179784e18 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void)
 {
 }
 
+static inline void rcu_cpu_stall_reset(void)
+{
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c13b85dd22bc..0726809497ba 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
+extern void rcu_cpu_stall_reset(void);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ff214118e4b8..42140a860bb9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -565,6 +565,22 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
 	return NOTIFY_DONE;
 }
 
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+	rcu_preempt_stall_reset();
+}
+
 static struct notifier_block rcu_panic_block = {
 	.notifier_call = rcu_panic,
 };
@@ -584,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 {
 }
 
+void rcu_cpu_stall_reset(void)
+{
+}
+
 static void __init check_cpu_stall_init(void)
 {
 }
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bb4d08695c45..7abd439a7573 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -372,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
 static void rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_stall_reset(void);
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63bb7714fdeb..561410f70d4a 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -417,6 +417,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 	}
 }
 
+/*
+ * Suppress preemptible RCU's CPU stall warnings by pushing the
+ * time of the next stall-warning message comfortably far into the
+ * future.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+	rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+}
+
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /*
@@ -867,6 +877,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
 {
 }
 
+/*
+ * Because preemptible RCU does not exist, there is no need to suppress
+ * its CPU stall warnings.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+}
+
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /*
-- 
cgit v1.2.3


From a3dc3fb161f9b4066c0fce22db72638af8baf83b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Aug 2010 16:16:25 -0700
Subject: rcu: repair code-duplication FIXMEs

Combine the duplicate definitions of ULONG_CMP_GE(), ULONG_CMP_LT(),
and rcu_preempt_depth() into include/linux/rcupdate.h.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 15 +++++++++++++++
 include/linux/rcutiny.h  |  7 -------
 include/linux/rcutree.h  |  6 ------
 kernel/rcutiny_plugin.h  |  4 ----
 kernel/rcutree.h         |  3 ---
 5 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d7af96ef6fcf..325bad7bbca9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,9 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
+#define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
+
 /**
  * struct rcu_head - callback structure for use with RCU
  * @next: next update requests in a list
@@ -66,6 +69,18 @@ extern int sched_expedited_torture_stats(char *page);
 /* Internal to kernel */
 extern void rcu_init(void);
 
+#ifdef CONFIG_PREEMPT_RCU
+
+/*
+ * Defined as a macro as it is a very low level header included from
+ * areas that don't even know about current.  This gives the rcu_read_lock()
+ * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
+ * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
 #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 3fa179784e18..c6b11dc5ba0a 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -133,13 +133,6 @@ static inline int rcu_needs_cpu(int cpu)
 	return rcu_preempt_needs_cpu();
 }
 
-/*
- * Defined as macro as it is a very low level header
- * included from areas that don't even know about current
- * FIXME: combine with include/linux/rcutree.h into rcupdate.h.
- */
-#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
-
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 static inline void rcu_note_context_switch(int cpu)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0726809497ba..54a20c11f98d 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,12 +45,6 @@ extern void __rcu_read_unlock(void);
 extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
-/*
- * Defined as macro as it is a very low level header
- * included from areas that don't even know about current
- */
-#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
-
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock(void)
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index e6bc1b447c6c..c5bea1137dcb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -26,10 +26,6 @@
 
 #include <linux/delay.h>
 
-/* FIXME: merge with definitions in kernel/rcutree.h. */
-#define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
-#define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
-
 /* Global control variables for preemptible RCU. */
 struct rcu_preempt_ctrlblk {
 	struct rcu_ctrlblk rcb;	/* curtail: ->next ptr of last CB for GP. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7abd439a7573..7918ba61873f 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -272,9 +272,6 @@ struct rcu_data {
 
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
-#define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
-#define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
-
 /*
  * RCU global state, including node hierarchy.  This hierarchy is
  * represented in "heap" form in a dense array.  The root (first level)
-- 
cgit v1.2.3


From 73d4da4d360136826b36f78f5cf72b29da82c8a6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 16 Aug 2010 10:50:54 -0700
Subject: rcu: Upgrade srcu_read_lock() docbook about SRCU grace periods

It is illegal to wait for an SRCU grace period while within the
corresponding flavor of SRCU read-side critical section.  Therefore,
this commit updates the srcu_read_lock() docbook accordingly.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6f456a720ff0..58971e891f48 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -139,7 +139,12 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
  * @sp: srcu_struct in which to register the new reader.
  *
  * Enter an SRCU read-side critical section.  Note that SRCU read-side
- * critical sections may be nested.
+ * critical sections may be nested.  However, it is illegal to
+ * call anything that waits on an SRCU grace period for the same
+ * srcu_struct, whether directly or indirectly.  Please note that
+ * one way to indirectly wait on an SRCU grace period is to acquire
+ * a mutex that is held elsewhere while calling synchronize_srcu() or
+ * synchronize_srcu_expedited().
  */
 static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
-- 
cgit v1.2.3


From 7b0b759b65247cbc66384a912be9acf8d4800636 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 17 Aug 2010 14:18:46 -0700
Subject: rcu: combine duplicate code, courtesy of CONFIG_PREEMPT_RCU

The CONFIG_PREEMPT_RCU kernel configuration parameter was recently
re-introduced, but as an indication of the type of RCU (preemptible
vs. non-preemptible) instead of as selecting a given implementation.
This commit uses CONFIG_PREEMPT_RCU to combine duplicate code
from include/linux/rcutiny.h and include/linux/rcutree.h into
include/linux/rcupdate.h.  This commit also combines a few other pieces
of duplicate code that have accumulated.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/rcutiny.h  | 51 --------------------------------
 include/linux/rcutree.h  | 50 --------------------------------
 kernel/rcutree_plugin.h  |  9 ------
 4 files changed, 72 insertions(+), 113 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 325bad7bbca9..89414d67d961 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -61,16 +61,30 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
+extern void call_rcu_sched(struct rcu_head *head,
+			   void (*func)(struct rcu_head *rcu));
+extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
 extern void synchronize_sched_expedited(void);
 extern int sched_expedited_torture_stats(char *page);
 
-/* Internal to kernel */
-extern void rcu_init(void);
+static inline void __rcu_read_lock_bh(void)
+{
+	local_bh_disable();
+}
+
+static inline void __rcu_read_unlock_bh(void)
+{
+	local_bh_enable();
+}
 
 #ifdef CONFIG_PREEMPT_RCU
 
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+void synchronize_rcu(void);
+
 /*
  * Defined as a macro as it is a very low level header included from
  * areas that don't even know about current.  This gives the rcu_read_lock()
@@ -79,7 +93,53 @@ extern void rcu_init(void);
  */
 #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
 
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+static inline void __rcu_read_lock(void)
+{
+	preempt_disable();
+}
+
+static inline void __rcu_read_unlock(void)
+{
+	preempt_enable();
+}
+
+static inline void synchronize_rcu(void)
+{
+	synchronize_sched();
+}
+
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+/* Internal to kernel */
+extern void rcu_init(void);
+extern void rcu_sched_qs(int cpu);
+extern void rcu_bh_qs(int cpu);
+extern void rcu_check_callbacks(int cpu, int user);
+struct notifier_block;
+
+#ifdef CONFIG_NO_HZ
+
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static inline void rcu_enter_nohz(void)
+{
+}
+
+static inline void rcu_exit_nohz(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
@@ -626,6 +686,8 @@ struct rcu_synchronize {
 
 extern void wakeme_after_rcu(struct rcu_head  *head);
 
+#ifdef CONFIG_PREEMPT_RCU
+
 /**
  * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -642,6 +704,13 @@ extern void wakeme_after_rcu(struct rcu_head  *head);
 extern void call_rcu(struct rcu_head *head,
 			      void (*func)(struct rcu_head *head));
 
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+/* In classic RCU, call_rcu() is just call_rcu_sched(). */
+#define	call_rcu	call_rcu_sched
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
 /**
  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c6b11dc5ba0a..13877cb93a60 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,34 +27,10 @@
 
 #include <linux/cache.h>
 
-void rcu_sched_qs(int cpu);
-void rcu_bh_qs(int cpu);
-
-#ifdef CONFIG_TINY_RCU
-#define __rcu_read_lock()	preempt_disable()
-#define __rcu_read_unlock()	preempt_enable()
-#else /* #ifdef CONFIG_TINY_RCU */
-void __rcu_read_lock(void);
-void __rcu_read_unlock(void);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-#define __rcu_read_lock_bh()	local_bh_disable()
-#define __rcu_read_unlock_bh()	local_bh_enable()
-extern void call_rcu_sched(struct rcu_head *head,
-			   void (*func)(struct rcu_head *rcu));
-
 #define rcu_init_sched()	do { } while (0)
 
-extern void synchronize_sched(void);
-
 #ifdef CONFIG_TINY_RCU
 
-#define call_rcu		call_rcu_sched
-
-static inline void synchronize_rcu(void)
-{
-	synchronize_sched();
-}
-
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
@@ -67,7 +43,6 @@ static inline void rcu_barrier(void)
 
 #else /* #ifdef CONFIG_TINY_RCU */
 
-void synchronize_rcu(void);
 void rcu_barrier(void);
 void synchronize_rcu_expedited(void);
 
@@ -83,25 +58,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched();
 }
 
-struct notifier_block;
-
-#ifdef CONFIG_NO_HZ
-
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-#else /* #ifdef CONFIG_NO_HZ */
-
-static inline void rcu_enter_nohz(void)
-{
-}
-
-static inline void rcu_exit_nohz(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ */
-
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
@@ -117,11 +73,6 @@ static inline int rcu_needs_cpu(int cpu)
 	return 0;
 }
 
-static inline int rcu_preempt_depth(void)
-{
-	return 0;
-}
-
 #else /* #ifdef CONFIG_TINY_RCU */
 
 void rcu_preempt_note_context_switch(void);
@@ -141,8 +92,6 @@ static inline void rcu_note_context_switch(int cpu)
 	rcu_preempt_note_context_switch();
 }
 
-extern void rcu_check_callbacks(int cpu, int user);
-
 /*
  * Return the number of grace periods.
  */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 54a20c11f98d..95518e628794 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,59 +30,23 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-struct notifier_block;
-
-extern void rcu_sched_qs(int cpu);
-extern void rcu_bh_qs(int cpu);
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
-extern void __rcu_read_lock(void);
-extern void __rcu_read_unlock(void);
-extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
-static inline void __rcu_read_lock(void)
-{
-	preempt_disable();
-}
-
-static inline void __rcu_read_unlock(void)
-{
-	preempt_enable();
-}
-
-#define synchronize_rcu synchronize_sched
-
 static inline void exit_rcu(void)
 {
 }
 
-static inline int rcu_preempt_depth(void)
-{
-	return 0;
-}
-
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
-static inline void __rcu_read_lock_bh(void)
-{
-	local_bh_disable();
-}
-static inline void __rcu_read_unlock_bh(void)
-{
-	local_bh_enable();
-}
-
-extern void call_rcu_sched(struct rcu_head *head,
-			   void (*func)(struct rcu_head *rcu));
 extern void synchronize_rcu_bh(void);
-extern void synchronize_sched(void);
 extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
@@ -92,8 +56,6 @@ static inline void synchronize_rcu_bh_expedited(void)
 
 extern void rcu_barrier(void);
 
-extern void rcu_check_callbacks(int cpu, int user);
-
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
@@ -101,18 +63,6 @@ extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
 
-#ifdef CONFIG_NO_HZ
-void rcu_enter_nohz(void);
-void rcu_exit_nohz(void);
-#else /* CONFIG_NO_HZ */
-static inline void rcu_enter_nohz(void)
-{
-}
-static inline void rcu_exit_nohz(void)
-{
-}
-#endif /* CONFIG_NO_HZ */
-
 /* A context switch is a grace period for RCU-sched and RCU-bh. */
 static inline int rcu_blocking_is_gp(void)
 {
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 561410f70d4a..87f60f06b18e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -938,15 +938,6 @@ static void rcu_preempt_process_callbacks(void)
 {
 }
 
-/*
- * In classic RCU, call_rcu() is just call_rcu_sched().
- */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-	call_rcu_sched(head, func);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
 /*
  * Wait for an rcu-preempt grace period, but make it happen quickly.
  * But because preemptable RCU does not exist, map to rcu-sched.
-- 
cgit v1.2.3


From 65e6bf484c497f02d47a0faae69ee398cd59cfda Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Aug 2010 21:43:09 -0700
Subject: rcu: add comment stating that list_empty() applies to RCU-protected
 lists

Because list_empty() does not dereference any RCU-protected pointers, and
further does not pass such pointers to the caller (so that the caller
does not dereference them either), it is safe to use list_empty() on
RCU-protected lists.  There is no need for a list_empty_rcu().  This
commit adds a comment stating this explicitly.

Requested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index c10b1050dbe6..f31ef61f1c65 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -9,6 +9,15 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 
+/*
+ * Why is there no list_empty_rcu()?  Because list_empty() serves this
+ * purpose.  The list_empty() function fetches the RCU-protected pointer
+ * and compares it to the address of the list head, but neither dereferences
+ * this pointer itself nor provides this pointer to the caller.  Therefore,
+ * it is not necessary to use rcu_dereference(), so that list_empty() can
+ * be used anywhere you would want to use a list_empty_rcu().
+ */
+
 /*
  * return the ->next pointer of a list_head in an rcu safe
  * way, we must not access it directly
-- 
cgit v1.2.3


From b35de43b31040828f83046f40fd34ba33146409d Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi@develer.com>
Date: Thu, 19 Aug 2010 14:13:27 -0700
Subject: kfifo: implement missing __kfifo_skip_r()

kfifo_skip() is currently broken, due to the missing of the internal
helper function.  Add it.

Signed-off-by: Andrea Righi <arighi@develer.com>
Cc: Greg KH <greg@kroah.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 ++
 kernel/kfifo.c        | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 311f8753d713..4aa95f203f3e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -836,6 +836,8 @@ extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize);
 
 extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize);
 
+extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize);
+
 extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo,
 	void *buf, unsigned int len, size_t recsize);
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4502604ecadf..6b5580c57644 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
 }
 EXPORT_SYMBOL(__kfifo_out_r);
 
+void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
+{
+	unsigned int n;
+
+	n = __kfifo_peek_n(fifo, recsize);
+	fifo->out += n + recsize;
+}
+EXPORT_SYMBOL(__kfifo_skip_r);
+
 int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
 	unsigned long len, unsigned int *copied, size_t recsize)
 {
-- 
cgit v1.2.3


From f335397d177c906256ee1bba28e8c49e8ec63817 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:47 -0700
Subject: Input: sysrq - drop tty argument form handle_sysrq()

Sysrq operations do not accept tty argument anymore so no need to pass
it to us.

[Stephen Rothwell <sfr@canb.auug.org.au>: fix build breakage in drm code
 caused by sysrq using bool but not including linux/types.h]

[Sachin Sant <sachinp@in.ibm.com>: fix build breakage in s390 keyboadr
 driver]

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/ia64/hp/sim/simserial.c    |  2 +-
 arch/um/drivers/mconsole_kern.c |  2 +-
 drivers/char/hangcheck-timer.c  |  2 +-
 drivers/char/hvc_console.c      |  2 +-
 drivers/char/hvsi.c             |  2 +-
 drivers/char/sysrq.c            | 11 +++++------
 drivers/s390/char/ctrlchar.c    |  4 +---
 drivers/s390/char/keyboard.c    |  2 +-
 drivers/serial/sn_console.c     |  2 +-
 drivers/usb/serial/generic.c    |  2 +-
 drivers/xen/manage.c            |  2 +-
 include/linux/serial_core.h     |  2 +-
 include/linux/sysrq.h           | 12 +++++-------
 kernel/debug/kdb/kdb_main.c     |  2 +-
 14 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 2bef5261d96d..1e8d71ad93ef 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -149,7 +149,7 @@ static  void receive_chars(struct tty_struct *tty)
 						ch = ia64_ssc(0, 0, 0, 0,
 							      SSC_GETCHAR);
 					while (!ch);
-					handle_sysrq(ch, NULL);
+					handle_sysrq(ch);
 				}
 #endif
 				seen_esc = 0;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index de317d0c3294..ebc680717e59 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -690,7 +690,7 @@ static void with_console(struct mc_request *req, void (*proc)(void *),
 static void sysrq_proc(void *arg)
 {
 	char *op = arg;
-	handle_sysrq(*op, NULL);
+	handle_sysrq(*op);
 }
 
 void mconsole_sysrq(struct mc_request *req)
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index e0249722d25f..f953c96efc86 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -159,7 +159,7 @@ static void hangcheck_fire(unsigned long data)
 		if (hangcheck_dump_tasks) {
 			printk(KERN_CRIT "Hangcheck: Task state:\n");
 #ifdef CONFIG_MAGIC_SYSRQ
-			handle_sysrq('t', NULL);
+			handle_sysrq('t');
 #endif  /* CONFIG_MAGIC_SYSRQ */
 		}
 		if (hangcheck_reboot) {
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index fa27d1676ee5..3afd62e856eb 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -651,7 +651,7 @@ int hvc_poll(struct hvc_struct *hp)
 					if (sysrq_pressed)
 						continue;
 				} else if (sysrq_pressed) {
-					handle_sysrq(buf[i], tty);
+					handle_sysrq(buf[i]);
 					sysrq_pressed = 0;
 					continue;
 				}
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 1f4b6de65a2d..a2bc885ce60a 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -403,7 +403,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, const char *buf, int len)
 			hp->sysrq = 1;
 			continue;
 		} else if (hp->sysrq) {
-			handle_sysrq(c, hp->tty);
+			handle_sysrq(c);
 			hp->sysrq = 0;
 			continue;
 		}
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index a892a3c249dd..ef31bb81e843 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
-#include <linux/tty.h>
 #include <linux/mount.h>
 #include <linux/kdev_t.h>
 #include <linux/major.h>
@@ -493,7 +492,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+void __handle_sysrq(int key, bool check_mask)
 {
 	struct sysrq_key_op *op_p;
 	int orig_log_level;
@@ -545,10 +544,10 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 	spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
 }
 
-void handle_sysrq(int key, struct tty_struct *tty)
+void handle_sysrq(int key)
 {
 	if (sysrq_on())
-		__handle_sysrq(key, tty, 1);
+		__handle_sysrq(key, true);
 }
 EXPORT_SYMBOL(handle_sysrq);
 
@@ -597,7 +596,7 @@ static bool sysrq_filter(struct input_handle *handle, unsigned int type,
 
 	default:
 		if (sysrq_down && value && value != 2)
-			__handle_sysrq(sysrq_xlate[code], NULL, 1);
+			__handle_sysrq(sysrq_xlate[code], true);
 		break;
 	}
 
@@ -765,7 +764,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
 
 		if (get_user(c, buf))
 			return -EFAULT;
-		__handle_sysrq(c, NULL, 0);
+		__handle_sysrq(c, false);
 	}
 
 	return count;
diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c
index c6cbcb3f925e..0e9a309b9669 100644
--- a/drivers/s390/char/ctrlchar.c
+++ b/drivers/s390/char/ctrlchar.c
@@ -16,12 +16,11 @@
 
 #ifdef CONFIG_MAGIC_SYSRQ
 static int ctrlchar_sysrq_key;
-static struct tty_struct *sysrq_tty;
 
 static void
 ctrlchar_handle_sysrq(struct work_struct *work)
 {
-	handle_sysrq(ctrlchar_sysrq_key, sysrq_tty);
+	handle_sysrq(ctrlchar_sysrq_key);
 }
 
 static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq);
@@ -54,7 +53,6 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty)
 	/* racy */
 	if (len == 3 && buf[1] == '-') {
 		ctrlchar_sysrq_key = buf[2];
-		sysrq_tty = tty;
 		schedule_work(&ctrlchar_work);
 		return CTRLCHAR_SYSRQ;
 	}
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 18d9a497863b..8cd58e412b5e 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -305,7 +305,7 @@ kbd_keycode(struct kbd_data *kbd, unsigned int keycode)
 		if (kbd->sysrq) {
 			if (kbd->sysrq == K(KT_LATIN, '-')) {
 				kbd->sysrq = 0;
-				handle_sysrq(value, kbd->tty);
+				handle_sysrq(value);
 				return;
 			}
 			if (value == '-') {
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index 7e5e5efea4e2..cff9a306660f 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -492,7 +492,7 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
                         sysrq_requested = 0;
                         if (ch && time_before(jiffies, sysrq_timeout)) {
                                 spin_unlock_irqrestore(&port->sc_port.lock, flags);
-                                handle_sysrq(ch, NULL);
+                                handle_sysrq(ch);
                                 spin_lock_irqsave(&port->sc_port.lock, flags);
                                 /* ignore actual sysrq command char */
                                 continue;
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index ca92f67747cc..1e846cc3c7a4 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -453,7 +453,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 {
 	if (port->sysrq && port->port.console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, tty);
+			handle_sysrq(ch);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 1799bd890315..ef9c7db52077 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -237,7 +237,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
 		goto again;
 
 	if (sysrq_key != '\0')
-		handle_sysrq(sysrq_key, NULL);
+		handle_sysrq(sysrq_key);
 }
 
 static struct xenbus_watch sysrq_watch = {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3c2ad99fed34..64458a9a8938 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -465,7 +465,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->state->port.tty);
+			handle_sysrq(ch);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 4ee650315119..387fa7d05c98 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -15,9 +15,7 @@
 #define _LINUX_SYSRQ_H
 
 #include <linux/errno.h>
-
-struct pt_regs;
-struct tty_struct;
+#include <linux/types.h>
 
 /* Possible values of bitmask for enabling sysrq functions */
 /* 0x0001 is reserved for enable everything */
@@ -44,8 +42,8 @@ struct sysrq_key_op {
  * are available -- else NULL's).
  */
 
-void handle_sysrq(int key, struct tty_struct *tty);
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+void handle_sysrq(int key);
+void __handle_sysrq(int key, bool check_mask);
 int register_sysrq_key(int key, struct sysrq_key_op *op);
 int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
@@ -54,11 +52,11 @@ int sysrq_toggle_support(int enable_mask);
 
 #else
 
-static inline void handle_sysrq(int key, struct tty_struct *tty)
+static inline void handle_sysrq(int key)
 {
 }
 
-static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
+static inline void __handle_sysrq(int key, bool check_mask)
 {
 }
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 28b844118bbd..caf057a3de0e 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1929,7 +1929,7 @@ static int kdb_sr(int argc, const char **argv)
 	if (argc != 1)
 		return KDB_ARGCOUNT;
 	kdb_trap_printk++;
-	__handle_sysrq(*argv[1], NULL, 0);
+	__handle_sysrq(*argv[1], false);
 	kdb_trap_printk--;
 
 	return 0;
-- 
cgit v1.2.3


From 6ee9f4b4affe751d313d2538999aeec134d413a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 17 Aug 2010 21:15:47 -0700
Subject: USB: drop tty argument from usb_serial_handle_sysrq_char()

Since handle_sysrq() does not take tty as argument anymore we can
drop it from usb_serial_handle_sysrq_char() as well.

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/usb/serial/ftdi_sio.c | 2 +-
 drivers/usb/serial/generic.c  | 8 +++-----
 drivers/usb/serial/pl2303.c   | 2 +-
 drivers/usb/serial/ssu100.c   | 2 +-
 include/linux/usb/serial.h    | 3 +--
 5 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index eb12d9b096b4..5d47983b533c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1831,7 +1831,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else {
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 1e846cc3c7a4..1abe34c38c08 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -343,7 +343,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb)
 		tty_insert_flip_string(tty, ch, urb->actual_length);
 	else {
 		for (i = 0; i < urb->actual_length; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, TTY_NORMAL);
 		}
 	}
@@ -448,8 +448,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle);
 
 #ifdef CONFIG_MAGIC_SYSRQ
-int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-			struct usb_serial_port *port, unsigned int ch)
+int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
 {
 	if (port->sysrq && port->port.console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
@@ -462,8 +461,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 	return 0;
 }
 #else
-int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-			struct usb_serial_port *port, unsigned int ch)
+int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
 {
 	return 0;
 }
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 6b6001822279..34ad7b3d5948 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -788,7 +788,7 @@ static void pl2303_process_read_urb(struct urb *urb)
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < urb->actual_length; ++i)
-			if (!usb_serial_handle_sysrq_char(tty, port, data[i]))
+			if (!usb_serial_handle_sysrq_char(port, data[i]))
 				tty_insert_flip_char(tty, data[i], tty_flag);
 	} else {
 		tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 6e82d4f54bc8..819de4740388 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -596,7 +596,7 @@ static int ssu100_process_packet(struct tty_struct *tty,
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
+			if (!usb_serial_handle_sysrq_char(port, *ch))
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 84a4c44c208b..55675b1efb28 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -342,8 +342,7 @@ extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 extern void usb_serial_generic_process_read_urb(struct urb *urb);
 extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
 						void *dest, size_t size);
-extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
-					struct usb_serial_port *port,
+extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port,
 					unsigned int ch);
 extern int usb_serial_handle_break(struct usb_serial_port *port);
 
-- 
cgit v1.2.3


From 8905aaafb4b5d9764c5b4b54c7d03eb41bb0a7e9 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 19 Aug 2010 09:52:28 -0700
Subject: Input: uinput - add devname alias to allow module on-demand load

Recent modprobe and udev versions allow to create device nodes
for modules which are not loaded. Only the first access will cause
the in-kernel module loader to pull-in the module. Systems which
never access the device node will not needlessly load the module,
and no longer need init scripts or other facilities to unconditionally
load it.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/uinput.c | 2 ++
 include/linux/miscdevice.h  | 1 +
 include/linux/uinput.h      | 1 -
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index bb53fd33cd1c..0d4266a533a5 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -811,6 +811,8 @@ static struct miscdevice uinput_misc = {
 	.minor		= UINPUT_MINOR,
 	.name		= UINPUT_NAME,
 };
+MODULE_ALIAS_MISCDEV(UINPUT_MINOR);
+MODULE_ALIAS("devname:" UINPUT_NAME);
 
 static int __init uinput_init(void)
 {
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index bafffc737903..18fd13028ba1 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -33,6 +33,7 @@
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
 #define MPT_MINOR		220
 #define MPT2SAS_MINOR		221
+#define UINPUT_MINOR		223
 #define HPET_MINOR		228
 #define FUSE_MINOR		229
 #define KVM_MINOR		232
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index 60c81da77f0f..05f7fed2b173 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -37,7 +37,6 @@
 #define UINPUT_VERSION		3
 
 #ifdef __KERNEL__
-#define UINPUT_MINOR		223
 #define UINPUT_NAME		"uinput"
 #define UINPUT_BUFFER_SIZE	16
 #define UINPUT_NUM_REQUESTS	16
-- 
cgit v1.2.3


From 297c5eee372478fc32fec5fe8eed711eedb13f3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 Aug 2010 16:24:55 -0700
Subject: mm: make the vma list be doubly linked

It's a really simple list, and several of the users want to go backwards
in it to find the previous vma.  So rather than have to look up the
previous entry with 'find_vma_prev()' or something similar, just make it
doubly linked instead.

Tested-by: Ian Campbell <ijc@hellion.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 kernel/fork.c            |  7 +++++--
 mm/mmap.c                | 21 +++++++++++++++++----
 mm/nommu.c               |  7 +++++--
 4 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b8bb9a6a1f37..ee7e258627f9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -134,7 +134,7 @@ struct vm_area_struct {
 					   within vm_mm. */
 
 	/* linked list of VM areas per task, sorted by address */
-	struct vm_area_struct *vm_next;
+	struct vm_area_struct *vm_next, *vm_prev;
 
 	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
 	unsigned long vm_flags;		/* Flags, see mm.h. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 856eac3ec52e..b7e9d60a675d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
 #ifdef CONFIG_MMU
 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
-	struct vm_area_struct *mpnt, *tmp, **pprev;
+	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	if (retval)
 		goto out;
 
+	prev = NULL;
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
 
@@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			goto fail_nomem_anon_vma_fork;
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
-		tmp->vm_next = NULL;
+		tmp->vm_next = tmp->vm_prev = NULL;
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		 */
 		*pprev = tmp;
 		pprev = &tmp->vm_next;
+		tmp->vm_prev = prev;
+		prev = tmp;
 
 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
 		rb_link = &tmp->vm_rb.rb_right;
diff --git a/mm/mmap.c b/mm/mmap.c
index 31003338b978..331e51af38c9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -388,17 +388,23 @@ static inline void
 __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent)
 {
+	struct vm_area_struct *next;
+
+	vma->vm_prev = prev;
 	if (prev) {
-		vma->vm_next = prev->vm_next;
+		next = prev->vm_next;
 		prev->vm_next = vma;
 	} else {
 		mm->mmap = vma;
 		if (rb_parent)
-			vma->vm_next = rb_entry(rb_parent,
+			next = rb_entry(rb_parent,
 					struct vm_area_struct, vm_rb);
 		else
-			vma->vm_next = NULL;
+			next = NULL;
 	}
+	vma->vm_next = next;
+	if (next)
+		next->vm_prev = vma;
 }
 
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -483,7 +489,11 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev)
 {
-	prev->vm_next = vma->vm_next;
+	struct vm_area_struct *next = vma->vm_next;
+
+	prev->vm_next = next;
+	if (next)
+		next->vm_prev = prev;
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = prev;
@@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long addr;
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+	vma->vm_prev = NULL;
 	do {
 		rb_erase(&vma->vm_rb, &mm->mm_rb);
 		mm->map_count--;
@@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma = vma->vm_next;
 	} while (vma && vma->vm_start < end);
 	*insertion_point = vma;
+	if (vma)
+		vma->vm_prev = prev;
 	tail_vma->vm_next = NULL;
 	if (mm->unmap_area == arch_unmap_area)
 		addr = prev ? prev->vm_end : mm->mmap_base;
diff --git a/mm/nommu.c b/mm/nommu.c
index efa9a380335e..88ff091eb07a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
  */
 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	struct vm_area_struct *pvma, **pp;
+	struct vm_area_struct *pvma, **pp, *next;
 	struct address_space *mapping;
 	struct rb_node **p, *parent;
 
@@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 			break;
 	}
 
-	vma->vm_next = *pp;
+	next = *pp;
 	*pp = vma;
+	vma->vm_next = next;
+	if (next)
+		next->vm_prev = vma;
 }
 
 /*
-- 
cgit v1.2.3


From e36c886a0f9d624377977fa6cae309cfd7f362fa Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 21 Aug 2010 13:07:26 -0700
Subject: workqueue: Add basic tracepoints to track workqueue execution

With the introduction of the new unified work queue thread pools,
we lost one feature: It's no longer possible to know which worker
is causing the CPU to wake out of idle. The result is that PowerTOP
now reports a lot of "kworker/a:b" instead of more readable results.

This patch adds a pair of tracepoints to the new workqueue code,
similar in style to the timer/hrtimer tracepoints.

With this pair of tracepoints, the next PowerTOP can correctly
report which work item caused the wakeup (and how long it took):

Interrupt (43)            i915      time   3.51ms    wakeups 141
Work      ieee80211_iface_work      time   0.81ms    wakeups  29
Work              do_dbs_timer      time   0.55ms    wakeups  24
Process                   Xorg      time  21.36ms    wakeups   4
Timer    sched_rt_period_timer      time   0.01ms    wakeups   1

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/workqueue.h | 62 ++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c               |  9 ++++++
 2 files changed, 71 insertions(+)
 create mode 100644 include/trace/events/workqueue.h

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 000000000000..49682d7e9d60
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,62 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/tracepoint.h>
+#include <linux/workqueue.h>
+
+/**
+ * workqueue_execute_start - called immediately before the workqueue callback
+ * @work:	pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_start,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field( void *,	work	)
+		__field( void *,	function)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+		__entry->function	= work->func;
+	),
+
+	TP_printk("work struct %p: function %pf", __entry->work, __entry->function)
+);
+
+/**
+ * workqueue_execute_end - called immediately before the workqueue callback
+ * @work:	pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_end,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field( void *,	work	)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+	),
+
+	TP_printk("work struct %p", __entry->work)
+);
+
+
+#endif /*  _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2994a0e3a61c..8bd600c020e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,9 @@
 #include <linux/lockdep.h>
 #include <linux/idr.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
+
 #include "workqueue_sched.h"
 
 enum {
@@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	work_clear_pending(work);
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
+	trace_workqueue_execute_start(work);
 	f(work);
+	/*
+	 * While we must be careful to not use "work" after this, the trace
+	 * point will only record its address.
+	 */
+	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
 	lock_map_release(&cwq->wq->lockdep_map);
 
-- 
cgit v1.2.3


From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001
From: Dmitry Kozlov <xeb@mail.ru>
Date: Sat, 21 Aug 2010 23:05:39 -0700
Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)

PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework
NET: introduce the "gre" module for demultiplexing GRE packets on version criteria
     (required to pptp and ip_gre may coexists)
NET: ip_gre: update to use the "gre" module

This patch introduces then pptp support to the linux kernel which
dramatically speeds up pptp vpn connections and decreases cpu usage in
comparison of existing user-space implementation
(poptop/pptpclient). There is accel-pptp project
(https://sourceforge.net/projects/accel-pptp/) to utilize this module,
it contains plugin for pppd to use pptp in client-mode and modified
pptpd (poptop) to build high-performance pptp NAS.

There was many changes from initial submitted patch, most important are:
1. using rcu instead of read-write locks
2. using static bitmap instead of dynamically allocated
3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages
4. fixed many coding style issues
Thanks to Eric Dumazet.

Signed-off-by: Dmitry Kozlov <xeb@mail.ru>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS              |  14 +
 drivers/net/Kconfig      |  11 +
 drivers/net/Makefile     |   1 +
 drivers/net/pptp.c       | 726 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_pppox.h |  52 ++--
 include/net/gre.h        |  18 ++
 net/ipv4/Kconfig         |   7 +
 net/ipv4/Makefile        |   1 +
 net/ipv4/gre.c           | 151 ++++++++++
 net/ipv4/ip_gre.c        |  14 +-
 10 files changed, 971 insertions(+), 24 deletions(-)
 create mode 100644 drivers/net/pptp.c
 create mode 100644 include/net/gre.h
 create mode 100644 net/ipv4/gre.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index b5b8baa1d70e..43c9efcd8e10 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6528,6 +6528,20 @@ M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 S:	Maintained
 F:	drivers/serial/zs.*
 
+GRE DEMULTIPLEXER DRIVER
+M:	Dmitry Kozlov <xeb@mail.ru>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/ipv4/gre.c
+F:	include/net/gre.h
+
+PPTP DRIVER
+M:	Dmitry Kozlov <xeb@mail.ru>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/pptp.c
+W:	http://sourceforge.net/projects/accel-pptp
+
 THE REST
 M:	Linus Torvalds <torvalds@linux-foundation.org>
 L:	linux-kernel@vger.kernel.org
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 5a6895320b48..9b2a72089a68 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3192,6 +3192,17 @@ config PPPOE
 	  which contains instruction on how to use this driver (under 
 	  the heading "Kernel mode PPPoE").
 
+config PPTP
+	tristate "PPP over IPv4 (PPTP) (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PPP && NET_IPGRE_DEMUX
+	help
+	  Support for PPP over IPv4.(Point-to-Point Tunneling Protocol)
+
+	  This driver requires pppd plugin to work in client mode or
+	  modified pptpd (poptop) to work in server mode.
+	  See http://accel-pptp.sourceforge.net/ for information how to
+	  utilize this module.
+
 config PPPOATM
 	tristate "PPP over ATM"
 	depends on ATM && PPP
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 56e8c27f77ce..0b371083d481 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -162,6 +162,7 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o
 obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
 obj-$(CONFIG_PPPOL2TP) += pppox.o
+obj-$(CONFIG_PPTP) += pppox.o pptp.o
 
 obj-$(CONFIG_SLIP) += slip.o
 obj-$(CONFIG_SLHC) += slhc.o
diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c
new file mode 100644
index 000000000000..761f0eced724
--- /dev/null
+++ b/drivers/net/pptp.c
@@ -0,0 +1,726 @@
+/*
+ *  Point-to-Point Tunneling Protocol for Linux
+ *
+ *	Authors: Dmitry Kozlov <xeb@mail.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_pppox.h>
+#include <linux/if_ppp.h>
+#include <linux/notifier.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/version.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+
+#include <net/sock.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/route.h>
+#include <net/gre.h>
+
+#include <linux/uaccess.h>
+
+#define PPTP_DRIVER_VERSION "0.8.5"
+
+#define MAX_CALLID 65535
+
+static DECLARE_BITMAP(callid_bitmap, MAX_CALLID + 1);
+static struct pppox_sock **callid_sock;
+
+static DEFINE_SPINLOCK(chan_lock);
+
+static struct proto pptp_sk_proto __read_mostly;
+static struct ppp_channel_ops pptp_chan_ops;
+static const struct proto_ops pptp_ops;
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+	((((curseq) & 0xffffff00) == 0) &&\
+	(((lastseq) & 0xffffff00) == 0xffffff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER    0x1
+
+#define PPTP_GRE_FLAG_C	0x80
+#define PPTP_GRE_FLAG_R	0x40
+#define PPTP_GRE_FLAG_K	0x20
+#define PPTP_GRE_FLAG_S	0x10
+#define PPTP_GRE_FLAG_A	0x80
+
+#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+	u8  flags;
+	u8  ver;
+	u16 protocol;
+	u16 payload_len;
+	u16 call_id;
+	u32 seq;
+	u32 ack;
+} __packed;
+
+static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
+{
+	struct pppox_sock *sock;
+	struct pptp_opt *opt;
+
+	rcu_read_lock();
+	sock = rcu_dereference(callid_sock[call_id]);
+	if (sock) {
+		opt = &sock->proto.pptp;
+		if (opt->dst_addr.sin_addr.s_addr != s_addr)
+			sock = NULL;
+		else
+			sock_hold(sk_pppox(sock));
+	}
+	rcu_read_unlock();
+
+	return sock;
+}
+
+static int lookup_chan_dst(u16 call_id, __be32 d_addr)
+{
+	struct pppox_sock *sock;
+	struct pptp_opt *opt;
+	int i;
+
+	rcu_read_lock();
+	for (i = find_next_bit(callid_bitmap, MAX_CALLID, 1); i < MAX_CALLID;
+	     i = find_next_bit(callid_bitmap, MAX_CALLID, i + 1)) {
+		sock = rcu_dereference(callid_sock[i]);
+		if (!sock)
+			continue;
+		opt = &sock->proto.pptp;
+		if (opt->dst_addr.call_id == call_id &&
+			  opt->dst_addr.sin_addr.s_addr == d_addr)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i < MAX_CALLID;
+}
+
+static int add_chan(struct pppox_sock *sock)
+{
+	static int call_id;
+
+	spin_lock(&chan_lock);
+	if (!sock->proto.pptp.src_addr.call_id)	{
+		call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
+		if (call_id == MAX_CALLID) {
+			call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
+			if (call_id == MAX_CALLID)
+				goto out_err;
+		}
+		sock->proto.pptp.src_addr.call_id = call_id;
+	} else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
+		goto out_err;
+
+	set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
+	rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
+	spin_unlock(&chan_lock);
+
+	return 0;
+
+out_err:
+	spin_unlock(&chan_lock);
+	return -1;
+}
+
+static void del_chan(struct pppox_sock *sock)
+{
+	spin_lock(&chan_lock);
+	clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
+	rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL);
+	spin_unlock(&chan_lock);
+	synchronize_rcu();
+}
+
+static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *) chan->private;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct pptp_opt *opt = &po->proto.pptp;
+	struct pptp_gre_header *hdr;
+	unsigned int header_len = sizeof(*hdr);
+	int err = 0;
+	int islcp;
+	int len;
+	unsigned char *data;
+	__u32 seq_recv;
+
+
+	struct rtable *rt;
+	struct net_device *tdev;
+	struct iphdr  *iph;
+	int    max_headroom;
+
+	if (sk_pppox(po)->sk_state & PPPOX_DEAD)
+		goto tx_error;
+
+	{
+		struct flowi fl = { .oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = opt->dst_addr.sin_addr.s_addr,
+					.saddr = opt->src_addr.sin_addr.s_addr,
+					.tos = RT_TOS(0) } },
+			.proto = IPPROTO_GRE };
+		err = ip_route_output_key(&init_net, &rt, &fl);
+		if (err)
+			goto tx_error;
+	}
+	tdev = rt->dst.dev;
+
+	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 2;
+
+	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (!new_skb) {
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	data = skb->data;
+	islcp = ((data[0] << 8) + data[1]) == PPP_LCP && 1 <= data[2] && data[2] <= 7;
+
+	/* compress protocol field */
+	if ((opt->ppp_flags & SC_COMP_PROT) && data[0] == 0 && !islcp)
+		skb_pull(skb, 1);
+
+	/* Put in the address/control bytes if necessary */
+	if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) {
+		data = skb_push(skb, 2);
+		data[0] = PPP_ALLSTATIONS;
+		data[1] = PPP_UI;
+	}
+
+	len = skb->len;
+
+	seq_recv = opt->seq_recv;
+
+	if (opt->ack_sent == seq_recv)
+		header_len -= sizeof(hdr->ack);
+
+	/* Push down and install GRE header */
+	skb_push(skb, header_len);
+	hdr = (struct pptp_gre_header *)(skb->data);
+
+	hdr->flags       = PPTP_GRE_FLAG_K;
+	hdr->ver         = PPTP_GRE_VER;
+	hdr->protocol    = htons(PPTP_GRE_PROTO);
+	hdr->call_id     = htons(opt->dst_addr.call_id);
+
+	hdr->flags      |= PPTP_GRE_FLAG_S;
+	hdr->seq         = htonl(++opt->seq_sent);
+	if (opt->ack_sent != seq_recv)	{
+		/* send ack with this message */
+		hdr->ver |= PPTP_GRE_FLAG_A;
+		hdr->ack  = htonl(seq_recv);
+		opt->ack_sent = seq_recv;
+	}
+	hdr->payload_len = htons(len);
+
+	/*	Push down and install the IP header. */
+
+	skb_reset_transport_header(skb);
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
+
+	iph =	ip_hdr(skb);
+	iph->version =	4;
+	iph->ihl =	sizeof(struct iphdr) >> 2;
+	if (ip_dont_fragment(sk, &rt->dst))
+		iph->frag_off	=	htons(IP_DF);
+	else
+		iph->frag_off	=	0;
+	iph->protocol = IPPROTO_GRE;
+	iph->tos      = 0;
+	iph->daddr    = rt->rt_dst;
+	iph->saddr    = rt->rt_src;
+	iph->ttl      = dst_metric(&rt->dst, RTAX_HOPLIMIT);
+	iph->tot_len  = htons(skb->len);
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->dst);
+
+	nf_reset(skb);
+
+	skb->ip_summed = CHECKSUM_NONE;
+	ip_select_ident(iph, &rt->dst, NULL);
+	ip_send_check(iph);
+
+	ip_local_out(skb);
+
+tx_error:
+	return 1;
+}
+
+static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
+{
+	struct pppox_sock *po = pppox_sk(sk);
+	struct pptp_opt *opt = &po->proto.pptp;
+	int headersize, payload_len, seq;
+	__u8 *payload;
+	struct pptp_gre_header *header;
+
+	if (!(sk->sk_state & PPPOX_CONNECTED)) {
+		if (sock_queue_rcv_skb(sk, skb))
+			goto drop;
+		return NET_RX_SUCCESS;
+	}
+
+	header = (struct pptp_gre_header *)(skb->data);
+
+	/* test if acknowledgement present */
+	if (PPTP_GRE_IS_A(header->ver)) {
+		__u32 ack = (PPTP_GRE_IS_S(header->flags)) ?
+				header->ack : header->seq; /* ack in different place if S = 0 */
+
+		ack = ntohl(ack);
+
+		if (ack > opt->ack_recv)
+			opt->ack_recv = ack;
+		/* also handle sequence number wrap-around  */
+		if (WRAPPED(ack, opt->ack_recv))
+			opt->ack_recv = ack;
+	}
+
+	/* test if payload present */
+	if (!PPTP_GRE_IS_S(header->flags))
+		goto drop;
+
+	headersize  = sizeof(*header);
+	payload_len = ntohs(header->payload_len);
+	seq         = ntohl(header->seq);
+
+	/* no ack present? */
+	if (!PPTP_GRE_IS_A(header->ver))
+		headersize -= sizeof(header->ack);
+	/* check for incomplete packet (length smaller than expected) */
+	if (skb->len - headersize < payload_len)
+		goto drop;
+
+	payload = skb->data + headersize;
+	/* check for expected sequence number */
+	if (seq < opt->seq_recv + 1 || WRAPPED(opt->seq_recv, seq)) {
+		if ((payload[0] == PPP_ALLSTATIONS) && (payload[1] == PPP_UI) &&
+				(PPP_PROTOCOL(payload) == PPP_LCP) &&
+				((payload[4] == PPP_LCP_ECHOREQ) || (payload[4] == PPP_LCP_ECHOREP)))
+			goto allow_packet;
+	} else {
+		opt->seq_recv = seq;
+allow_packet:
+		skb_pull(skb, headersize);
+
+		if (payload[0] == PPP_ALLSTATIONS && payload[1] == PPP_UI) {
+			/* chop off address/control */
+			if (skb->len < 3)
+				goto drop;
+			skb_pull(skb, 2);
+		}
+
+		if ((*skb->data) & 1) {
+			/* protocol is compressed */
+			skb_push(skb, 1)[0] = 0;
+		}
+
+		skb->ip_summed = CHECKSUM_NONE;
+		skb_set_network_header(skb, skb->head-skb->data);
+		ppp_input(&po->chan, skb);
+
+		return NET_RX_SUCCESS;
+	}
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static int pptp_rcv(struct sk_buff *skb)
+{
+	struct pppox_sock *po;
+	struct pptp_gre_header *header;
+	struct iphdr *iph;
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
+	if (!pskb_may_pull(skb, 12))
+		goto drop;
+
+	iph = ip_hdr(skb);
+
+	header = (struct pptp_gre_header *)skb->data;
+
+	if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */
+		PPTP_GRE_IS_C(header->flags) ||                /* flag C should be clear */
+		PPTP_GRE_IS_R(header->flags) ||                /* flag R should be clear */
+		!PPTP_GRE_IS_K(header->flags) ||               /* flag K should be set */
+		(header->flags&0xF) != 0)                      /* routing and recursion ctrl = 0 */
+		/* if invalid, discard this packet */
+		goto drop;
+
+	po = lookup_chan(htons(header->call_id), iph->saddr);
+	if (po) {
+		skb_dst_drop(skb);
+		nf_reset(skb);
+		return sk_receive_skb(sk_pppox(po), skb, 0);
+	}
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
+	int sockaddr_len)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct pptp_opt *opt = &po->proto.pptp;
+	int error = 0;
+
+	lock_sock(sk);
+
+	opt->src_addr = sp->sa_addr.pptp;
+	if (add_chan(po)) {
+		release_sock(sk);
+		error = -EBUSY;
+	}
+
+	release_sock(sk);
+	return error;
+}
+
+static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
+	int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct pptp_opt *opt = &po->proto.pptp;
+	struct rtable *rt;
+	int error = 0;
+
+	if (sp->sa_protocol != PX_PROTO_PPTP)
+		return -EINVAL;
+
+	if (lookup_chan_dst(sp->sa_addr.pptp.call_id, sp->sa_addr.pptp.sin_addr.s_addr))
+		return -EALREADY;
+
+	lock_sock(sk);
+	/* Check for already bound sockets */
+	if (sk->sk_state & PPPOX_CONNECTED) {
+		error = -EBUSY;
+		goto end;
+	}
+
+	/* Check for already disconnected sockets, on attempts to disconnect */
+	if (sk->sk_state & PPPOX_DEAD) {
+		error = -EALREADY;
+		goto end;
+	}
+
+	if (!opt->src_addr.sin_addr.s_addr || !sp->sa_addr.pptp.sin_addr.s_addr) {
+		error = -EINVAL;
+		goto end;
+	}
+
+	po->chan.private = sk;
+	po->chan.ops = &pptp_chan_ops;
+
+	{
+		struct flowi fl = {
+			.nl_u = {
+				.ip4_u = {
+					.daddr = opt->dst_addr.sin_addr.s_addr,
+					.saddr = opt->src_addr.sin_addr.s_addr,
+					.tos = RT_CONN_FLAGS(sk) } },
+			.proto = IPPROTO_GRE };
+		security_sk_classify_flow(sk, &fl);
+		if (ip_route_output_key(&init_net, &rt, &fl)) {
+			error = -EHOSTUNREACH;
+			goto end;
+		}
+		sk_setup_caps(sk, &rt->dst);
+	}
+	po->chan.mtu = dst_mtu(&rt->dst);
+	if (!po->chan.mtu)
+		po->chan.mtu = PPP_MTU;
+	ip_rt_put(rt);
+	po->chan.mtu -= PPTP_HEADER_OVERHEAD;
+
+	po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header);
+	error = ppp_register_channel(&po->chan);
+	if (error) {
+		pr_err("PPTP: failed to register PPP channel (%d)\n", error);
+		goto end;
+	}
+
+	opt->dst_addr = sp->sa_addr.pptp;
+	sk->sk_state = PPPOX_CONNECTED;
+
+ end:
+	release_sock(sk);
+	return error;
+}
+
+static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
+	int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppox);
+	struct sockaddr_pppox sp;
+
+	sp.sa_family	  = AF_PPPOX;
+	sp.sa_protocol  = PX_PROTO_PPTP;
+	sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr;
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	return 0;
+}
+
+static int pptp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct pppox_sock *po;
+	struct pptp_opt *opt;
+	int error = 0;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+
+	if (sock_flag(sk, SOCK_DEAD)) {
+		release_sock(sk);
+		return -EBADF;
+	}
+
+	po = pppox_sk(sk);
+	opt = &po->proto.pptp;
+	del_chan(po);
+
+	pppox_unbind_sock(sk);
+	sk->sk_state = PPPOX_DEAD;
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return error;
+}
+
+static void pptp_sock_destruct(struct sock *sk)
+{
+	if (!(sk->sk_state & PPPOX_DEAD)) {
+		del_chan(pppox_sk(sk));
+		pppox_unbind_sock(sk);
+	}
+	skb_queue_purge(&sk->sk_receive_queue);
+}
+
+static int pptp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+	struct pppox_sock *po;
+	struct pptp_opt *opt;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state = SS_UNCONNECTED;
+	sock->ops   = &pptp_ops;
+
+	sk->sk_backlog_rcv = pptp_rcv_core;
+	sk->sk_state       = PPPOX_NONE;
+	sk->sk_type        = SOCK_STREAM;
+	sk->sk_family      = PF_PPPOX;
+	sk->sk_protocol    = PX_PROTO_PPTP;
+	sk->sk_destruct    = pptp_sock_destruct;
+
+	po = pppox_sk(sk);
+	opt = &po->proto.pptp;
+
+	opt->seq_sent = 0; opt->seq_recv = 0;
+	opt->ack_recv = 0; opt->ack_sent = 0;
+
+	error = 0;
+out:
+	return error;
+}
+
+static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
+	unsigned long arg)
+{
+	struct sock *sk = (struct sock *) chan->private;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct pptp_opt *opt = &po->proto.pptp;
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	int err, val;
+
+	err = -EFAULT;
+	switch (cmd) {
+	case PPPIOCGFLAGS:
+		val = opt->ppp_flags;
+		if (put_user(val, p))
+			break;
+		err = 0;
+		break;
+	case PPPIOCSFLAGS:
+		if (get_user(val, p))
+			break;
+		opt->ppp_flags = val & ~SC_RCV_BITS;
+		err = 0;
+		break;
+	default:
+		err = -ENOTTY;
+	}
+
+	return err;
+}
+
+static struct ppp_channel_ops pptp_chan_ops = {
+	.start_xmit = pptp_xmit,
+	.ioctl      = pptp_ppp_ioctl,
+};
+
+static struct proto pptp_sk_proto __read_mostly = {
+	.name     = "PPTP",
+	.owner    = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static const struct proto_ops pptp_ops = {
+	.family     = AF_PPPOX,
+	.owner      = THIS_MODULE,
+	.release    = pptp_release,
+	.bind       = pptp_bind,
+	.connect    = pptp_connect,
+	.socketpair = sock_no_socketpair,
+	.accept     = sock_no_accept,
+	.getname    = pptp_getname,
+	.poll       = sock_no_poll,
+	.listen     = sock_no_listen,
+	.shutdown   = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg    = sock_no_sendmsg,
+	.recvmsg    = sock_no_recvmsg,
+	.mmap       = sock_no_mmap,
+	.ioctl      = pppox_ioctl,
+};
+
+static struct pppox_proto pppox_pptp_proto = {
+	.create = pptp_create,
+	.owner  = THIS_MODULE,
+};
+
+static struct gre_protocol gre_pptp_protocol = {
+	.handler = pptp_rcv,
+};
+
+static int __init pptp_init_module(void)
+{
+	int err = 0;
+	pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n");
+
+	callid_sock = __vmalloc((MAX_CALLID + 1) * sizeof(void *),
+		GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+	if (!callid_sock) {
+		pr_err("PPTP: cann't allocate memory\n");
+		return -ENOMEM;
+	}
+
+	err = gre_add_protocol(&gre_pptp_protocol, GREPROTO_PPTP);
+	if (err) {
+		pr_err("PPTP: can't add gre protocol\n");
+		goto out_mem_free;
+	}
+
+	err = proto_register(&pptp_sk_proto, 0);
+	if (err) {
+		pr_err("PPTP: can't register sk_proto\n");
+		goto out_gre_del_protocol;
+	}
+
+	err = register_pppox_proto(PX_PROTO_PPTP, &pppox_pptp_proto);
+	if (err) {
+		pr_err("PPTP: can't register pppox_proto\n");
+		goto out_unregister_sk_proto;
+	}
+
+	return 0;
+
+out_unregister_sk_proto:
+	proto_unregister(&pptp_sk_proto);
+out_gre_del_protocol:
+	gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP);
+out_mem_free:
+	vfree(callid_sock);
+
+	return err;
+}
+
+static void __exit pptp_exit_module(void)
+{
+	unregister_pppox_proto(PX_PROTO_PPTP);
+	proto_unregister(&pptp_sk_proto);
+	gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP);
+	vfree(callid_sock);
+}
+
+module_init(pptp_init_module);
+module_exit(pptp_exit_module);
+
+MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 1925e0c3f162..1525b2156b2a 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -40,25 +40,35 @@
  * PPPoE addressing definition 
  */ 
 typedef __be16 sid_t;
-struct pppoe_addr{ 
-       sid_t           sid;                    /* Session identifier */ 
-       unsigned char   remote[ETH_ALEN];       /* Remote address */ 
-       char            dev[IFNAMSIZ];          /* Local device to use */ 
+struct pppoe_addr {
+	sid_t         sid;                    /* Session identifier */
+	unsigned char remote[ETH_ALEN];       /* Remote address */
+	char          dev[IFNAMSIZ];          /* Local device to use */
 }; 
  
 /************************************************************************ 
- * Protocols supported by AF_PPPOX 
- */ 
+ * PPTP addressing definition
+ */
+struct pptp_addr {
+	u16             call_id;
+	struct in_addr  sin_addr;
+};
+
+/************************************************************************
+ * Protocols supported by AF_PPPOX
+ */
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
 #define PX_PROTO_OL2TP 1 /* Now L2TP also */
-#define PX_MAX_PROTO   2
-
-struct sockaddr_pppox { 
-       sa_family_t     sa_family;            /* address family, AF_PPPOX */ 
-       unsigned int    sa_protocol;          /* protocol identifier */ 
-       union{ 
-               struct pppoe_addr       pppoe; 
-       }sa_addr; 
+#define PX_PROTO_PPTP  2
+#define PX_MAX_PROTO   3
+
+struct sockaddr_pppox {
+	sa_family_t     sa_family;            /* address family, AF_PPPOX */
+	unsigned int    sa_protocol;          /* protocol identifier */
+	union {
+		struct pppoe_addr  pppoe;
+		struct pptp_addr   pptp;
+	} sa_addr;
 } __packed;
 
 /* The use of the above union isn't viable because the size of this
@@ -101,7 +111,7 @@ struct pppoe_tag {
 	__be16 tag_type;
 	__be16 tag_len;
 	char tag_data[0];
-} __attribute ((packed));
+} __packed;
 
 /* Tag identifiers */
 #define PTT_EOL		__cpu_to_be16(0x0000)
@@ -150,15 +160,23 @@ struct pppoe_opt {
 					     relayed to (PPPoE relaying) */
 };
 
+struct pptp_opt {
+	struct pptp_addr src_addr;
+	struct pptp_addr dst_addr;
+	u32 ack_sent, ack_recv;
+	u32 seq_sent, seq_recv;
+	int ppp_flags;
+};
 #include <net/sock.h>
 
 struct pppox_sock {
 	/* struct sock must be the first member of pppox_sock */
-	struct sock		sk;
-	struct ppp_channel	chan;
+	struct sock sk;
+	struct ppp_channel chan;
 	struct pppox_sock	*next;	  /* for hash table */
 	union {
 		struct pppoe_opt pppoe;
+		struct pptp_opt  pptp;
 	} proto;
 	__be16			num;
 };
diff --git a/include/net/gre.h b/include/net/gre.h
new file mode 100644
index 000000000000..82665474bcb7
--- /dev/null
+++ b/include/net/gre.h
@@ -0,0 +1,18 @@
+#ifndef __LINUX_GRE_H
+#define __LINUX_GRE_H
+
+#include <linux/skbuff.h>
+
+#define GREPROTO_CISCO		0
+#define GREPROTO_PPTP		1
+#define GREPROTO_MAX		2
+
+struct gre_protocol {
+	int  (*handler)(struct sk_buff *skb);
+	void (*err_handler)(struct sk_buff *skb, u32 info);
+};
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version);
+int gre_del_protocol(const struct gre_protocol *proto, u8 version);
+
+#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7c3a7d191249..7458bdae7e9f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,8 +215,15 @@ config NET_IPIP
 	  be inserted in and removed from the running kernel whenever you
 	  want). Most people won't need this and can say N.
 
+config NET_IPGRE_DEMUX
+	tristate "IP: GRE demultiplexer"
+	help
+	 This is helper module to demultiplex GRE packets on GRE version field criteria.
+	 Required by ip_gre and pptp modules.
+
 config NET_IPGRE
 	tristate "IP: GRE tunnels over IP"
+	depends on NET_IPGRE_DEMUX
 	help
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..b546736da2e1
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
+/*
+ *	GRE over IPv4 demultiplexer driver
+ *
+ *	Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <linux/spinlock.h>
+#include <net/protocol.h>
+#include <net/gre.h>
+
+
+const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static DEFINE_SPINLOCK(gre_proto_lock);
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version)
+{
+	if (version >= GREPROTO_MAX)
+		goto err_out;
+
+	spin_lock(&gre_proto_lock);
+	if (gre_proto[version])
+		goto err_out_unlock;
+
+	rcu_assign_pointer(gre_proto[version], proto);
+	spin_unlock(&gre_proto_lock);
+	return 0;
+
+err_out_unlock:
+	spin_unlock(&gre_proto_lock);
+err_out:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol);
+
+int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+{
+	if (version >= GREPROTO_MAX)
+		goto err_out;
+
+	spin_lock(&gre_proto_lock);
+	if (gre_proto[version] != proto)
+		goto err_out_unlock;
+	rcu_assign_pointer(gre_proto[version], NULL);
+	spin_unlock(&gre_proto_lock);
+	synchronize_rcu();
+	return 0;
+
+err_out_unlock:
+	spin_unlock(&gre_proto_lock);
+err_out:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol);
+
+static int gre_rcv(struct sk_buff *skb)
+{
+	const struct gre_protocol *proto;
+	u8 ver;
+	int ret;
+
+	if (!pskb_may_pull(skb, 12))
+		goto drop;
+
+	ver = skb->data[1]&0x7f;
+	if (ver >= GREPROTO_MAX)
+		goto drop;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_proto[ver]);
+	if (!proto || !proto->handler)
+		goto drop_unlock;
+	ret = proto->handler(skb);
+	rcu_read_unlock();
+	return ret;
+
+drop_unlock:
+	rcu_read_unlock();
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+	const struct gre_protocol *proto;
+	u8 ver;
+
+	if (!pskb_may_pull(skb, 12))
+		goto drop;
+
+	ver = skb->data[1]&0x7f;
+	if (ver >= GREPROTO_MAX)
+		goto drop;
+
+	rcu_read_lock();
+	proto = rcu_dereference(gre_proto[ver]);
+	if (!proto || !proto->err_handler)
+		goto drop_unlock;
+	proto->err_handler(skb, info);
+	rcu_read_unlock();
+	return;
+
+drop_unlock:
+	rcu_read_unlock();
+drop:
+	kfree_skb(skb);
+}
+
+static const struct net_protocol net_gre_protocol = {
+	.handler     = gre_rcv,
+	.err_handler = gre_err,
+	.netns_ok    = 1,
+};
+
+static int __init gre_init(void)
+{
+	pr_info("GRE over IPv4 demultiplexor driver");
+
+	if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
+		pr_err("gre: can't add protocol\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void __exit gre_exit(void)
+{
+	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+}
+
+module_init(gre_init);
+module_exit(gre_exit);
+
+MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_LICENSE("GPL");
+
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 945b20a5ad50..85176895495a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/gre.h>
 
 #ifdef CONFIG_IPV6
 #include <net/ipv6.h>
@@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 }
 
 
-static const struct net_protocol ipgre_protocol = {
-	.handler	=	ipgre_rcv,
-	.err_handler	=	ipgre_err,
-	.netns_ok	=	1,
+static const struct gre_protocol ipgre_protocol = {
+	.handler     = ipgre_rcv,
+	.err_handler = ipgre_err,
 };
 
 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1663,7 +1663,7 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		return err;
 
-	err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
+	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
 		printk(KERN_INFO "ipgre init: can't add protocol\n");
 		goto add_proto_failed;
@@ -1683,7 +1683,7 @@ out:
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
-	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 add_proto_failed:
 	unregister_pernet_device(&ipgre_net_ops);
 	goto out;
@@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void)
 {
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
-	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
 	unregister_pernet_device(&ipgre_net_ops);
 }
-- 
cgit v1.2.3


From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 Aug 2010 23:51:33 +0000
Subject: irda: use net_device_stats from struct net_device

struct net_device has its own struct net_device_stats member, so use
this one instead of a private copy in the irlan_cb struct.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/irlan_common.h |  1 -
 net/irda/irlan/irlan_eth.c      | 32 +++++++++-----------------------
 2 files changed, 9 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h
index 73cacb3ac16c..0af8b8dfbc22 100644
--- a/include/net/irda/irlan_common.h
+++ b/include/net/irda/irlan_common.h
@@ -171,7 +171,6 @@ struct irlan_cb {
 	int    magic;
 	struct list_head  dev_list;
 	struct net_device *dev;        /* Ethernet device structure*/
-	struct net_device_stats stats;
 
 	__u32 saddr;               /* Source device address */
 	__u32 daddr;               /* Destination device address */
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105cc..8ee1ff6c742f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int  irlan_eth_close(struct net_device *dev);
 static netdev_tx_t  irlan_eth_xmit(struct sk_buff *skb,
 					 struct net_device *dev);
 static void irlan_eth_set_multicast_list( struct net_device *dev);
-static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
 
 static const struct net_device_ops irlan_eth_netdev_ops = {
 	.ndo_open               = irlan_eth_open,
 	.ndo_stop               = irlan_eth_close,
 	.ndo_start_xmit    	= irlan_eth_xmit,
-	.ndo_get_stats	        = irlan_eth_get_stats,
 	.ndo_set_multicast_list = irlan_eth_set_multicast_list,
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 		 * tried :-) DB
 		 */
 		/* irttp_data_request already free the packet */
-		self->stats.tx_dropped++;
+		dev->stats.tx_dropped++;
 	} else {
-		self->stats.tx_packets++;
-		self->stats.tx_bytes += len;
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += len;
 	}
 
 	return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
 int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
 {
 	struct irlan_cb *self = instance;
+	struct net_device *dev = self->dev;
 
 	if (skb == NULL) {
-		++self->stats.rx_dropped;
+		dev->stats.rx_dropped++;
 		return 0;
 	}
 	if (skb->len < ETH_HLEN) {
 		IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
 			   __func__, skb->len);
-		++self->stats.rx_dropped;
+		dev->stats.rx_dropped++;
 		dev_kfree_skb(skb);
 		return 0;
 	}
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
 	 * might have been previously set by the low level IrDA network
 	 * device driver
 	 */
-	skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
+	skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
 
-	self->stats.rx_packets++;
-	self->stats.rx_bytes += skb->len;
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
 
 	netif_rx(skb);   /* Eat it! */
 
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
 	else
 		irlan_set_broadcast_filter(self, FALSE);
 }
-
-/*
- * Function irlan_get_stats (dev)
- *
- *    Get the current statistics for this device
- *
- */
-static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
-{
-	struct irlan_cb *self = netdev_priv(dev);
-
-	return &self->stats;
-}
-- 
cgit v1.2.3


From 739a91ef0625e0e4a40b835f4f891313c47915df Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 21 Aug 2010 06:23:15 +0000
Subject: net_sched: cls_flow: add key rxhash

We can use rxhash to classify the traffic into flows. As rxhash maybe
supplied by NIC or RPS, it is cheaper.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h | 1 +
 net/sched/cls_flow.c    | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 7f6ba8658abe..defbde203d07 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -332,6 +332,7 @@ enum {
 	FLOW_KEY_SKUID,
 	FLOW_KEY_SKGID,
 	FLOW_KEY_VLAN_TAG,
+	FLOW_KEY_RXHASH,
 	__FLOW_KEY_MAX,
 };
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index cd709f1294df..5b271a18bc3a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -306,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
 	return tag & VLAN_VID_MASK;
 }
 
+static u32 flow_get_rxhash(struct sk_buff *skb)
+{
+	return skb_get_rxhash(skb);
+}
+
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
 	switch (key) {
@@ -343,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 		return flow_get_skgid(skb);
 	case FLOW_KEY_VLAN_TAG:
 		return flow_get_vlan_tag(skb);
+	case FLOW_KEY_RXHASH:
+		return flow_get_rxhash(skb);
 	default:
 		WARN_ON(1);
 		return 0;
-- 
cgit v1.2.3


From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 18 Aug 2010 12:25:50 -0400
Subject: fanotify: flush outstanding perm requests on group destroy

When an fanotify listener is closing it may cause a deadlock between the
listener and the original task doing an fs operation.  If the original task
is waiting for a permissions response it will be holding the srcu lock.  The
listener cannot clean up and exit until after that srcu lock is syncronized.
Thus deadlock.  The fix introduced here is to stop accepting new permissions
events when a listener is shutting down and to grant permission for all
outstanding events.  Thus the original task will eventually release the srcu
lock and the listener can complete shutdown.

Reported-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++
 include/linux/fanotify.h           |  7 -------
 include/linux/fsnotify_backend.h   |  1 +
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 032b837fcd11..b966b7230f47 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 	re->fd = fd;
 
 	mutex_lock(&group->fanotify_data.access_mutex);
+
+	if (group->fanotify_data.bypass_perm) {
+		mutex_unlock(&group->fanotify_data.access_mutex);
+		kmem_cache_free(fanotify_response_event_cache, re);
+		event->response = FAN_ALLOW;
+		return 0;
+	}
+		
 	list_add_tail(&re->list, &group->fanotify_data.access_list);
 	mutex_unlock(&group->fanotify_data.access_mutex);
 
@@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
+	struct fanotify_response_event *re, *lre;
 
 	pr_debug("%s: file=%p group=%p\n", __func__, file, group);
 
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	mutex_lock(&group->fanotify_data.access_mutex);
+
+	group->fanotify_data.bypass_perm = true;
+
+	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
+		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
+			 re, re->event);
+
+		list_del_init(&re->list);
+		re->event->response = FAN_ALLOW;
+
+		kmem_cache_free(fanotify_response_event_cache, re);
+	}
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	wake_up(&group->fanotify_data.access_waitq);
+#endif
 	/* matches the fanotify_init->fsnotify_alloc_group */
 	fsnotify_put_group(group);
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index f0949a57ca9d..985435622ecd 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -95,11 +95,4 @@ struct fanotify_response {
 				(long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
 				(long)(meta)->event_len <= (long)(len))
 
-#ifdef __KERNEL__
-
-struct fanotify_wait {
-	struct fsnotify_event *event;
-	__s32 fd;
-};
-#endif /* __KERNEL__ */
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index ed36fb57c426..e40190d16878 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -156,6 +156,7 @@ struct fsnotify_group {
 			struct mutex access_mutex;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
+			bool bypass_perm; /* protected by access_mutex */
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 		} fanotify_data;
-- 
cgit v1.2.3


From d8287fc864643beaf1623c92aceb1ab38eae0648 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 18:37:27 -0700
Subject: net: use __be16 instead of u16 for the userspace code

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppox.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 1525b2156b2a..770e8fa669d2 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -50,8 +50,8 @@ struct pppoe_addr {
  * PPTP addressing definition
  */
 struct pptp_addr {
-	u16             call_id;
-	struct in_addr  sin_addr;
+	__be16		call_id;
+	struct in_addr	sin_addr;
 };
 
 /************************************************************************
-- 
cgit v1.2.3


From 05532121da0728eaedac2a0a5c3cecad3a95d765 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 21:03:33 -0700
Subject: net: 802.1q: make vlan_hwaccel_do_receive() return void

vlan_hwaccel_do_receive() always returns 0, so make it return void.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 5 ++---
 net/8021q/vlan_core.c   | 3 +--
 net/core/dev.c          | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 3d870fda8c4f..a52320751bfc 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -119,7 +119,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
+extern void vlan_hwaccel_do_receive(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -147,9 +147,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
+static inline void vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
-	return 0;
 }
 
 static inline gro_result_t
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 01ddb0472f86..07eeb5b99dce 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -35,7 +35,7 @@ drop:
 }
 EXPORT_SYMBOL(__vlan_hwaccel_rx);
 
-int vlan_hwaccel_do_receive(struct sk_buff *skb)
+void vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct vlan_rx_stats     *rx_stats;
@@ -69,7 +69,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 		break;
 	}
 	u64_stats_update_end(&rx_stats->syncp);
-	return 0;
 }
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 7cd5237d9822..d569f88bcf80 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2841,8 +2841,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
-	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
-		return NET_RX_SUCCESS;
+	if (vlan_tx_tag_present(skb))
+		vlan_hwaccel_do_receive(skb);
 
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
-- 
cgit v1.2.3


From fcb12fd2236f49aa8fdc1568ed4ebdfe4fddc6b5 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 16:41:59 +0000
Subject: net: rds: remove duplication type definitions

__be* are defined in linux/types.h now, and in fact, rds.h isn't exported
to user space even.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rds.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 24bce3ded9ea..7f3971d9fc5c 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -36,15 +36,6 @@
 
 #include <linux/types.h>
 
-/* These sparse annotated types shouldn't be in any user
- * visible header file. We should clean this up rather
- * than kludging around them. */
-#ifndef __KERNEL__
-#define __be16	u_int16_t
-#define __be32	u_int32_t
-#define __be64	u_int64_t
-#endif
-
 #define RDS_IB_ABI_VERSION		0x301
 
 /*
-- 
cgit v1.2.3


From 09cd2b99c6cdd1e14e84c1febca2fb91e9f4e5ba Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 22 Aug 2010 17:25:05 +0000
Subject: header: fix broken headers for user space

__packed is only defined in kernel space, so we should use
__attribute__((packed)) for the code shared between kernel and user space.

Two __attribute() annotations are replaced with __attribute__() too.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h               |  2 +-
 include/linux/if_fddi.h                |  8 ++++----
 include/linux/if_hippi.h               |  8 ++++----
 include/linux/if_pppox.h               | 10 +++++-----
 include/linux/ipv6.h                   |  4 ++--
 include/linux/nbd.h                    |  2 +-
 include/linux/ncp.h                    | 10 +++++-----
 include/linux/netfilter/xt_IDLETIMER.h |  2 +-
 include/linux/phonet.h                 |  4 ++--
 include/linux/rfkill.h                 |  2 +-
 10 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index c831467774d0..bed7a4682b90 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -119,7 +119,7 @@ struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
-} __packed;
+} __attribute__((packed));
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 9947c39e62f6..e6dc11e7f9a5 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -67,7 +67,7 @@ struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-} __packed;
+} __attribute__((packed));
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
@@ -75,7 +75,7 @@ struct fddi_8022_2_hdr {
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-} __packed;
+} __attribute__((packed));
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
@@ -85,7 +85,7 @@ struct fddi_snap_hdr {
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-} __packed;
+} __attribute__((packed));
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
@@ -98,7 +98,7 @@ struct fddihdr {
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-} __packed;
+} __attribute__((packed));
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 5fe5f307c6f5..cdc049f1829a 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -104,7 +104,7 @@ struct hippi_fp_hdr {
 	__be32		fixed;
 #endif
 	__be32		d2_size;
-} __packed;
+} __attribute__((packed));
 
 struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -129,7 +129,7 @@ struct hippi_le_hdr {
 	__u8		daddr[HIPPI_ALEN];
 	__u16		locally_administered;
 	__u8		saddr[HIPPI_ALEN];
-} __packed;
+} __attribute__((packed));
 
 #define HIPPI_OUI_LEN	3
 /*
@@ -142,12 +142,12 @@ struct hippi_snap_hdr {
 	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[HIPPI_OUI_LEN];	/* organizational universal id (zero)*/
 	__be16	ethertype;		/* packet type ID field */
-} __packed;
+} __attribute__((packed));
 
 struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
-} __packed;
+} __attribute__((packed));
 
 #endif	/* _LINUX_IF_HIPPI_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 1925e0c3f162..27741e05446f 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -59,7 +59,7 @@ struct sockaddr_pppox {
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-} __packed;
+} __attribute__((packed));
 
 /* The use of the above union isn't viable because the size of this
  * struct must stay fixed over time -- applications use sizeof(struct
@@ -70,7 +70,7 @@ struct sockaddr_pppol2tp {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tp_addr pppol2tp;
-} __packed;
+} __attribute__((packed));
 
 /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
  * bits. So we need a different sockaddr structure.
@@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tpv3_addr pppol2tp;
-} __packed;
+} __attribute__((packed));
 
 /*********************************************************************
  *
@@ -101,7 +101,7 @@ struct pppoe_tag {
 	__be16 tag_type;
 	__be16 tag_len;
 	char tag_data[0];
-} __attribute ((packed));
+} __attribute__ ((packed));
 
 /* Tag identifiers */
 #define PTT_EOL		__cpu_to_be16(0x0000)
@@ -129,7 +129,7 @@ struct pppoe_hdr {
 	__be16 sid;
 	__be16 length;
 	struct pppoe_tag tag[0];
-} __packed;
+} __attribute__((packed));
 
 /* Length of entire PPPoE + PPP header */
 #define PPPOE_SES_HLEN	8
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ab9e9e89e407..e62683ba88e6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -58,7 +58,7 @@ struct ipv6_opt_hdr {
 	/* 
 	 * TLV encoded option data follows.
 	 */
-} __packed;	/* required for some archs */
+} __attribute__((packed));	/* required for some archs */
 
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
@@ -99,7 +99,7 @@ struct ipv6_destopt_hao {
 	__u8			type;
 	__u8			length;
 	struct in6_addr		addr;
-} __packed;
+} __attribute__((packed));
 
 /*
  *	IPv6 fixed header
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index bb58854a8061..d146ca10c0f5 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -88,7 +88,7 @@ struct nbd_request {
 	char handle[8];
 	__be64 from;
 	__be32 len;
-} __packed;
+} __attribute__((packed));
 
 /*
  * This is the reply packet that nbd-server sends back to the client after
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 3ace8370e61e..99f0adeeb3f3 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -27,7 +27,7 @@ struct ncp_request_header {
 	__u8 conn_high;
 	__u8 function;
 	__u8 data[0];
-} __packed;
+} __attribute__((packed));
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG		 (0x3E3E)
@@ -42,7 +42,7 @@ struct ncp_reply_header {
 	__u8 completion_code;
 	__u8 connection_state;
 	__u8 data[0];
-} __packed;
+} __attribute__((packed));
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -158,7 +158,7 @@ struct nw_info_struct {
 #ifdef __KERNEL__
 	struct nw_nfs_info nfs;
 #endif
-} __packed;
+} __attribute__((packed));
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES		  (cpu_to_le32(0x02))
@@ -190,12 +190,12 @@ struct nw_modify_dos_info {
 	__u16 inheritanceGrantMask;
 	__u16 inheritanceRevokeMask;
 	__u32 maximumSpace;
-} __packed;
+} __attribute__((packed));
 
 struct nw_search_sequence {
 	__u8 volNumber;
 	__u32 dirBase;
 	__u32 sequence;
-} __packed;
+} __attribute__((packed));
 
 #endif				/* _LINUX_NCP_H */
diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
index 3e1aa1be942e..208ae9387331 100644
--- a/include/linux/netfilter/xt_IDLETIMER.h
+++ b/include/linux/netfilter/xt_IDLETIMER.h
@@ -39,7 +39,7 @@ struct idletimer_tg_info {
 	char label[MAX_IDLETIMER_LABEL_SIZE];
 
 	/* for kernel module internal use only */
-	struct idletimer_tg *timer __attribute((aligned(8)));
+	struct idletimer_tg *timer __attribute__((aligned(8)));
 };
 
 #endif
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 24426c3d6b5a..76edadf046d3 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -56,7 +56,7 @@ struct phonethdr {
 	__be16	pn_length;
 	__u8	pn_robj;
 	__u8	pn_sobj;
-} __packed;
+} __attribute__((packed));
 
 /* Common Phonet payload header */
 struct phonetmsg {
@@ -98,7 +98,7 @@ struct sockaddr_pn {
 	__u8 spn_dev;
 	__u8 spn_resource;
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
-} __packed;
+} __attribute__((packed));
 
 /* Well known address */
 #define PN_DEV_PC	0x10
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 4f82326eb294..08c32e4f261a 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -81,7 +81,7 @@ struct rfkill_event {
 	__u8  type;
 	__u8  op;
 	__u8  soft, hard;
-} __packed;
+} __attribute__((packed));
 
 /*
  * We are planning to be backward and forward compatible with changes
-- 
cgit v1.2.3


From 21dc330157454046dd7c494961277d76e1c957fe Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 23 Aug 2010 00:13:46 -0700
Subject: net: Rename skb_has_frags to skb_has_frag_list

SKBs can be "fragmented" in two ways, via a page array (called
skb_shinfo(skb)->frags[]) and via a list of SKBs (called
skb_shinfo(skb)->frag_list).

Since skb_has_frags() tests the latter, it's name is confusing
since it sounds more like it's testing the former.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h               |  2 +-
 include/linux/skbuff.h                  |  4 ++--
 net/core/dev.c                          |  4 ++--
 net/core/skbuff.c                       | 18 +++++++++---------
 net/ipv4/ip_fragment.c                  |  2 +-
 net/ipv4/ip_output.c                    |  2 +-
 net/ipv6/ip6_output.c                   |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 9 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 46c36ffe20ee..ce2de8b64083 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2191,7 +2191,7 @@ static inline int net_gso_ok(int features, int gso_type)
 static inline int skb_gso_ok(struct sk_buff *skb, int features)
 {
 	return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
-	       (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST));
+	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f067c95cf18a..f900ffcd847e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1120,7 +1120,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
 			    int off, int size);
 
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
-#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frags(skb))
+#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
 
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
@@ -1784,7 +1784,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = skb->prev)
 
 
-static inline bool skb_has_frags(const struct sk_buff *skb)
+static inline bool skb_has_frag_list(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->frag_list != NULL;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index d569f88bcf80..859e30ff044a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1930,7 +1930,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	       ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
 	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
 					      illegal_highdma(dev, skb))));
 }
@@ -3090,7 +3090,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
 
-	if (skb_is_gso(skb) || skb_has_frags(skb))
+	if (skb_is_gso(skb) || skb_has_frag_list(skb))
 		goto normal;
 
 	rcu_read_lock();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 99ef721f773d..e2535fb4985d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,7 +340,7 @@ static void skb_release_data(struct sk_buff *skb)
 				put_page(skb_shinfo(skb)->frags[i].page);
 		}
 
-		if (skb_has_frags(skb))
+		if (skb_has_frag_list(skb))
 			skb_drop_fraglist(skb);
 
 		kfree(skb->head);
@@ -759,7 +759,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 		skb_shinfo(n)->nr_frags = i;
 	}
 
-	if (skb_has_frags(skb)) {
+	if (skb_has_frag_list(skb)) {
 		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
 		skb_clone_fraglist(n);
 	}
@@ -822,7 +822,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		get_page(skb_shinfo(skb)->frags[i].page);
 
-	if (skb_has_frags(skb))
+	if (skb_has_frag_list(skb))
 		skb_clone_fraglist(skb);
 
 	skb_release_data(skb);
@@ -1099,7 +1099,7 @@ drop_pages:
 		for (; i < nfrags; i++)
 			put_page(skb_shinfo(skb)->frags[i].page);
 
-		if (skb_has_frags(skb))
+		if (skb_has_frag_list(skb))
 			skb_drop_fraglist(skb);
 		goto done;
 	}
@@ -1194,7 +1194,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 	/* Optimization: no fragments, no reasons to preestimate
 	 * size of pulled pages. Superb.
 	 */
-	if (!skb_has_frags(skb))
+	if (!skb_has_frag_list(skb))
 		goto pull_pages;
 
 	/* Estimate size of pulled pages. */
@@ -2323,7 +2323,7 @@ next_skb:
 		st->frag_data = NULL;
 	}
 
-	if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
+	if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
 		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
 		st->frag_idx = 0;
 		goto next_skb;
@@ -2889,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 		return -ENOMEM;
 
 	/* Easy case. Most of packets will go this way. */
-	if (!skb_has_frags(skb)) {
+	if (!skb_has_frag_list(skb)) {
 		/* A little of trouble, not enough of space for trailer.
 		 * This should not happen, when stack is tuned to generate
 		 * good frames. OK, on miss we reallocate and reserve even more
@@ -2924,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 
 		if (skb1->next == NULL && tailbits) {
 			if (skb_shinfo(skb1)->nr_frags ||
-			    skb_has_frags(skb1) ||
+			    skb_has_frag_list(skb1) ||
 			    skb_tailroom(skb1) < tailbits)
 				ntail = tailbits + 128;
 		}
@@ -2933,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 		    skb_cloned(skb1) ||
 		    ntail ||
 		    skb_shinfo(skb1)->nr_frags ||
-		    skb_has_frags(skb1)) {
+		    skb_has_frag_list(skb1)) {
 			struct sk_buff *skb2;
 
 			/* Fuck, we are miserable poor guys... */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..f4dc879e258e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
-	if (skb_has_frags(head)) {
+	if (skb_has_frag_list(head)) {
 		struct sk_buff *clone;
 		int i, plen = 0;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e807492f1777..6d2753c7ffdd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * LATER: this step can be merged to real generation of fragments,
 	 * we can switch to copy when see the first bad fragment.
 	 */
-	if (skb_has_frags(skb)) {
+	if (skb_has_frag_list(skb)) {
 		struct sk_buff *frag;
 		int first_len = skb_pagelen(skb);
 		int truesizes = 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d40b330c0ee6..1838927a2243 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	}
 	mtu -= hlen + sizeof(struct frag_hdr);
 
-	if (skb_has_frags(skb)) {
+	if (skb_has_frag_list(skb)) {
 		int first_len = skb_pagelen(skb);
 		int truesizes = 0;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 13ef5bc05cf5..089c598773c7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -413,7 +413,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
-	if (skb_has_frags(head)) {
+	if (skb_has_frag_list(head)) {
 		struct sk_buff *clone;
 		int i, plen = 0;
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 545c4141b755..8aea3f3f18d7 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -499,7 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
-	if (skb_has_frags(head)) {
+	if (skb_has_frag_list(head)) {
 		struct sk_buff *clone;
 		int i, plen = 0;
 
-- 
cgit v1.2.3


From c93a4dfb31f2c023da3ad1238c352452f2cc0e05 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 11:59:28 +0100
Subject: xen: pvhvm: allow user to request no emulated device unplug

this allows the user to disable pvhvm and revert to emulated devices
in case of a system misconfiguration (e.g. initramfs with only
emulated drivers in it).

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 Documentation/kernel-parameters.txt | 1 +
 arch/x86/xen/platform-pci-unplug.c  | 5 +++++
 include/xen/platform_pci.h          | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2c85c0692b01..8bbe83b9d0b2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2631,6 +2631,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			all -- unplug all emulated devices (NICs and IDE disks)
 			ignore -- continue loading the Xen platform PCI driver even
 				if the version check failed
+			never -- do not unplug even if version check succeeds
 
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 554c002a1e1a..070dfa0654bd 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -72,6 +72,9 @@ void __init xen_unplug_emulated_devices(void)
 {
 	int r;
 
+	/* user explicitly requested no unplug */
+	if (xen_emul_unplug & XEN_UNPLUG_NEVER)
+		return;
 	/* check the version of the xen platform PCI device */
 	r = check_platform_magic();
 	/* If the version matches enable the Xen platform PCI driver.
@@ -127,6 +130,8 @@ static int __init parse_xen_emul_unplug(char *arg)
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
 		else if (!strncmp(p, "ignore", l))
 			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+		else if (!strncmp(p, "never", l))
+			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
 			printk(KERN_WARNING "unrecognised option '%s' "
 				 "in parameter 'xen_emul_unplug'\n", p);
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index ce9d671c636c..123b7752fa6a 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -21,6 +21,7 @@
 #define XEN_UNPLUG_AUX_IDE_DISKS 4
 #define XEN_UNPLUG_ALL 7
 #define XEN_UNPLUG_IGNORE 8
+#define XEN_UNPLUG_NEVER 16
 
 static inline int xen_must_unplug_nics(void) {
 #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
-- 
cgit v1.2.3


From 1dc7ce99b091a11cce0f34456c1ffcb928f17edd Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 11:59:29 +0100
Subject: xen: pvhvm: rename xen_emul_unplug=ignore to =unnnecessary

It is not immediately clear what this option causes to become
ignored. The actual meaning is that it is not necessary to unplug the
emulated devices to safely use the PV ones, even if the platform does
not support the unplug protocol. (pressumably the user will only add
this option if they have ensured that their domain configuration is
safe).

I think xen_emul_unplug=unnecessary better captures this.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 Documentation/kernel-parameters.txt |  5 +++--
 arch/x86/xen/platform-pci-unplug.c  | 13 +++++++------
 drivers/block/xen-blkfront.c        |  2 +-
 include/xen/platform_pci.h          |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8bbe83b9d0b2..f084af0cb8e0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2629,8 +2629,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			aux-ide-disks -- unplug non-primary-master IDE devices
 			nics -- unplug network devices
 			all -- unplug all emulated devices (NICs and IDE disks)
-			ignore -- continue loading the Xen platform PCI driver even
-				if the version check failed
+			unnecessary -- unplugging emulated devices is
+				unnecessary even if the host did not respond to
+				the unplug protocol
 			never -- do not unplug even if version check succeeds
 
 	xirc2ps_cs=	[NET,PCMCIA]
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 070dfa0654bd..0f456386cce5 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -78,10 +78,11 @@ void __init xen_unplug_emulated_devices(void)
 	/* check the version of the xen platform PCI device */
 	r = check_platform_magic();
 	/* If the version matches enable the Xen platform PCI driver.
-	 * Also enable the Xen platform PCI driver if the version is really old
-	 * and the user told us to ignore it. */
+	 * Also enable the Xen platform PCI driver if the host does
+	 * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
+	 * but the user told us that unplugging is unnecessary. */
 	if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
-			(xen_emul_unplug & XEN_UNPLUG_IGNORE)))
+			(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
 		return;
 	/* Set the default value of xen_emul_unplug depending on whether or
 	 * not the Xen PV frontends and the Xen platform PCI driver have
@@ -102,7 +103,7 @@ void __init xen_unplug_emulated_devices(void)
 		}
 	}
 	/* Now unplug the emulated devices */
-	if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
+	if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
 		outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
 	xen_platform_pci_unplug = xen_emul_unplug;
 }
@@ -128,8 +129,8 @@ static int __init parse_xen_emul_unplug(char *arg)
 			xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
 		else if (!strncmp(p, "nics", l))
 			xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
-		else if (!strncmp(p, "ignore", l))
-			xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+		else if (!strncmp(p, "unnecessary", l))
+			xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
 		else if (!strncmp(p, "never", l))
 			xen_emul_unplug |= XEN_UNPLUG_NEVER;
 		else
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ac1b682edecb..ab735a605cf3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -834,7 +834,7 @@ static int blkfront_probe(struct xenbus_device *dev,
 		char *type;
 		int len;
 		/* no unplug has been done: do not hook devices != xen vbds */
-		if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) {
+		if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
 			int major;
 
 			if (!VDEV_IS_EXTENDED(vdevice))
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 123b7752fa6a..590ccfd82645 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -20,7 +20,7 @@
 #define XEN_UNPLUG_ALL_NICS 2
 #define XEN_UNPLUG_AUX_IDE_DISKS 4
 #define XEN_UNPLUG_ALL 7
-#define XEN_UNPLUG_IGNORE 8
+#define XEN_UNPLUG_UNNECESSARY 8
 #define XEN_UNPLUG_NEVER 16
 
 static inline int xen_must_unplug_nics(void) {
-- 
cgit v1.2.3


From 9c35e90c6fcf7f5baf27a63d9565e9f47633f299 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 23 Aug 2010 12:01:35 +0100
Subject: xen: pvhvm: make it clearer that XEN_UNPLUG_* define bits in a
 bitfield

by defining in terms of (1<<N).

XEN_UNPLUG_UNNECESSARY and XEN_UNPLUG_NEVER are only used within the
kernel and are not defined as a bit on the unplug IO port. Therefore
use a bit which is outside the potentially valid range of the 16 bit
IO port.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
---
 include/xen/platform_pci.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 590ccfd82645..a785a3b0c8c7 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -16,12 +16,15 @@
 #define XEN_IOPORT_PROTOVER	(XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
 #define XEN_IOPORT_PRODNUM	(XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
 
-#define XEN_UNPLUG_ALL_IDE_DISKS 1
-#define XEN_UNPLUG_ALL_NICS 2
-#define XEN_UNPLUG_AUX_IDE_DISKS 4
-#define XEN_UNPLUG_ALL 7
-#define XEN_UNPLUG_UNNECESSARY 8
-#define XEN_UNPLUG_NEVER 16
+#define XEN_UNPLUG_ALL_IDE_DISKS	(1<<0)
+#define XEN_UNPLUG_ALL_NICS		(1<<1)
+#define XEN_UNPLUG_AUX_IDE_DISKS	(1<<2)
+#define XEN_UNPLUG_ALL			(XEN_UNPLUG_ALL_IDE_DISKS|\
+					 XEN_UNPLUG_ALL_NICS|\
+					 XEN_UNPLUG_AUX_IDE_DISKS)
+
+#define XEN_UNPLUG_UNNECESSARY 		(1<<16)
+#define XEN_UNPLUG_NEVER	 		(1<<17)
 
 static inline int xen_must_unplug_nics(void) {
 #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
-- 
cgit v1.2.3


From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Mon, 23 Aug 2010 13:52:19 +0200
Subject: block: add function call to switch the IO scheduler from a driver

Currently drivers must do an elevator_exit() + elevator_init()
to switch IO schedulers. There are a few problems with this:

- Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96,
  elevator_init() requires a zeroed out q->elevator
  pointer. The two existing in-kernel users don't do that.

- It will only work at initialization time, since using the
  above two-staged construct does not properly quisce the queue.

So add elevator_change() which takes care of this, and convert
the elv_iosched_store() sysfs interface to use this helper as well.

Reported-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reported-by: Kevin Vigor <kevin@vigor.nu>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/elevator.c         | 44 +++++++++++++++++++++++++++++++-------------
 include/linux/elevator.h |  1 +
 2 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index ec585c9554d3..205b09a5bd9e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1009,18 +1009,19 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	struct elevator_queue *old_elevator, *e;
 	void *data;
+	int err;
 
 	/*
 	 * Allocate new elevator
 	 */
 	e = elevator_alloc(q, new_e);
 	if (!e)
-		return 0;
+		return -ENOMEM;
 
 	data = elevator_init_queue(q, e);
 	if (!data) {
 		kobject_put(&e->kobj);
-		return 0;
+		return -ENOMEM;
 	}
 
 	/*
@@ -1043,7 +1044,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	__elv_unregister_queue(old_elevator);
 
-	if (elv_register_queue(q))
+	err = elv_register_queue(q);
+	if (err)
 		goto fail_register;
 
 	/*
@@ -1056,7 +1058,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
 
-	return 1;
+	return 0;
 
 fail_register:
 	/*
@@ -1071,17 +1073,19 @@ fail_register:
 	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
 	spin_unlock_irq(q->queue_lock);
 
-	return 0;
+	return err;
 }
 
-ssize_t elv_iosched_store(struct request_queue *q, const char *name,
-			  size_t count)
+/*
+ * Switch this queue to the given IO scheduler.
+ */
+int elevator_change(struct request_queue *q, const char *name)
 {
 	char elevator_name[ELV_NAME_MAX];
 	struct elevator_type *e;
 
 	if (!q->elevator)
-		return count;
+		return -ENXIO;
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	e = elevator_get(strstrip(elevator_name));
@@ -1092,13 +1096,27 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
 	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
 		elevator_put(e);
-		return count;
+		return 0;
 	}
 
-	if (!elevator_switch(q, e))
-		printk(KERN_ERR "elevator: switch to %s failed\n",
-							elevator_name);
-	return count;
+	return elevator_switch(q, e);
+}
+EXPORT_SYMBOL(elevator_change);
+
+ssize_t elv_iosched_store(struct request_queue *q, const char *name,
+			  size_t count)
+{
+	int ret;
+
+	if (!q->elevator)
+		return count;
+
+	ret = elevator_change(q, name);
+	if (!ret)
+		return count;
+
+	printk(KERN_ERR "elevator: switch to %s failed\n", name);
+	return ret;
 }
 
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2c958f4fce1e..926b50322a46 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
 extern int elevator_init(struct request_queue *, char *);
 extern void elevator_exit(struct elevator_queue *);
+extern int elevator_change(struct request_queue *, const char *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
-- 
cgit v1.2.3


From 61c77326d1df079f202fa79403c3ccd8c5966a81 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 16 Aug 2010 09:16:55 +0800
Subject: x86, mm: Avoid unnecessary TLB flush

In x86, access and dirty bits are set automatically by CPU when CPU accesses
memory. When we go into the code path of below flush_tlb_fix_spurious_fault(),
we already set dirty bit for pte and don't need flush tlb. This might mean
tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When
the CPUs do page write, they will automatically check the bit and no software
involved.

On the other hand, flush tlb in below position is harmful. Test creates CPU
number of threads, each thread writes to a same but random address in same vma
range and we measure the total time. Under a 4 socket system, original time is
1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is
20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for
tlb flush.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Andrea Archangeli <aarcange@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h | 2 ++
 include/asm-generic/pgtable.h  | 4 ++++
 mm/memory.c                    | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785c5a63..2d0a33bd2971 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep);
 }
 
+#define flush_tlb_fix_spurious_fault(vma, address)
+
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e2bd73e8f9c0..f4d4120e5128 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef flush_tlb_fix_spurious_fault
+#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 2ed2267439df..a40da6983961 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3147,7 +3147,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_fix_spurious_fault(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);
-- 
cgit v1.2.3


From 8488a38f4d2f43bd55a3e0db4cd57a5bef3af6d6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Aug 2010 15:01:02 +0100
Subject: kobject: Break the kobject namespace defs into their own header

Break the kobject namespace defs into their own header to avoid a header file
inclusion ordering problem between linux/sysfs.h and linux/kobject.h.

This fixes the build breakage on older versions of gcc.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h    | 35 +----------------------------
 include/linux/kobject_ns.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h      |  1 +
 3 files changed, 58 insertions(+), 34 deletions(-)
 create mode 100644 include/linux/kobject_ns.h

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index cf343a852534..7950a37a7146 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/kref.h>
+#include <linux/kobject_ns.h>
 #include <linux/kernel.h>
 #include <linux/wait.h>
 #include <asm/atomic.h>
@@ -136,42 +137,8 @@ struct kobj_attribute {
 
 extern const struct sysfs_ops kobj_sysfs_ops;
 
-/*
- * Namespace types which are used to tag kobjects and sysfs entries.
- * Network namespace will likely be the first.
- */
-enum kobj_ns_type {
-	KOBJ_NS_TYPE_NONE = 0,
-	KOBJ_NS_TYPE_NET,
-	KOBJ_NS_TYPES
-};
-
 struct sock;
 
-/*
- * Callbacks so sysfs can determine namespaces
- *   @current_ns: return calling task's namespace
- *   @netlink_ns: return namespace to which a sock belongs (right?)
- *   @initial_ns: return the initial namespace (i.e. init_net_ns)
- */
-struct kobj_ns_type_operations {
-	enum kobj_ns_type type;
-	const void *(*current_ns)(void);
-	const void *(*netlink_ns)(struct sock *sk);
-	const void *(*initial_ns)(void);
-};
-
-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
-int kobj_ns_type_registered(enum kobj_ns_type type);
-const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
-const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
-
-const void *kobj_ns_current(enum kobj_ns_type type);
-const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
-const void *kobj_ns_initial(enum kobj_ns_type type);
-void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
-
-
 /**
  * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
  *
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
new file mode 100644
index 000000000000..82cb5bf461fb
--- /dev/null
+++ b/include/linux/kobject_ns.h
@@ -0,0 +1,56 @@
+/* Kernel object name space definitions
+ *
+ * Copyright (c) 2002-2003 Patrick Mochel
+ * Copyright (c) 2002-2003 Open Source Development Labs
+ * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (c) 2006-2008 Novell Inc.
+ *
+ * Split from kobject.h by David Howells (dhowells@redhat.com)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please read Documentation/kobject.txt before using the kobject
+ * interface, ESPECIALLY the parts about reference counts and object
+ * destructors.
+ */
+
+#ifndef _LINUX_KOBJECT_NS_H
+#define _LINUX_KOBJECT_NS_H
+
+struct sock;
+struct kobject;
+
+/*
+ * Namespace types which are used to tag kobjects and sysfs entries.
+ * Network namespace will likely be the first.
+ */
+enum kobj_ns_type {
+	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPE_NET,
+	KOBJ_NS_TYPES
+};
+
+/*
+ * Callbacks so sysfs can determine namespaces
+ *   @current_ns: return calling task's namespace
+ *   @netlink_ns: return namespace to which a sock belongs (right?)
+ *   @initial_ns: return the initial namespace (i.e. init_net_ns)
+ */
+struct kobj_ns_type_operations {
+	enum kobj_ns_type type;
+	const void *(*current_ns)(void);
+	const void *(*netlink_ns)(struct sock *sk);
+	const void *(*initial_ns)(void);
+};
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
+int kobj_ns_type_registered(enum kobj_ns_type type);
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
+
+const void *kobj_ns_current(enum kobj_ns_type type);
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
+const void *kobj_ns_initial(enum kobj_ns_type type);
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
+
+#endif /* _LINUX_KOBJECT_NS_H */
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 3c92121ba9af..96eb576d82fd 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
+#include <linux/kobject_ns.h>
 #include <asm/atomic.h>
 
 struct kobject;
-- 
cgit v1.2.3


From 8b230ed8ec96c933047dd0625cf95f739e4939a6 Mon Sep 17 00:00:00 2001
From: Rasesh Mody <rmody@brocade.com>
Date: Mon, 23 Aug 2010 20:24:12 -0700
Subject: bna: Brocade 10Gb Ethernet device driver

This is patch 1/6 which contains linux driver source for
Brocade's BR1010/BR1020 10Gb CEE capable ethernet adapter.

Signed-off-by: Debashis Dutt <ddutt@brocade.com>
Signed-off-by: Rasesh Mody <rmody@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                         |    7 +
 drivers/net/Kconfig                 |   14 +
 drivers/net/Makefile                |    1 +
 drivers/net/bna/Makefile            |   11 +
 drivers/net/bna/bfa_cee.c           |  407 ++++
 drivers/net/bna/bfa_cee.h           |   72 +
 drivers/net/bna/bfa_defs.h          |  243 ++
 drivers/net/bna/bfa_defs_cna.h      |  223 ++
 drivers/net/bna/bfa_defs_mfg_comm.h |  244 ++
 drivers/net/bna/bfa_defs_status.h   |  216 ++
 drivers/net/bna/bfa_ioc.c           | 1839 +++++++++++++++
 drivers/net/bna/bfa_ioc.h           |  343 +++
 drivers/net/bna/bfa_ioc_ct.c        |  391 ++++
 drivers/net/bna/bfa_sm.h            |   88 +
 drivers/net/bna/bfa_wc.h            |   69 +
 drivers/net/bna/bfi.h               |  392 ++++
 drivers/net/bna/bfi_cna.h           |  199 ++
 drivers/net/bna/bfi_ctreg.h         |  637 ++++++
 drivers/net/bna/bfi_ll.h            |  438 ++++
 drivers/net/bna/bna.h               |  654 ++++++
 drivers/net/bna/bna_ctrl.c          | 3626 ++++++++++++++++++++++++++++++
 drivers/net/bna/bna_hw.h            | 1491 +++++++++++++
 drivers/net/bna/bna_txrx.c          | 4209 +++++++++++++++++++++++++++++++++++
 drivers/net/bna/bna_types.h         | 1128 ++++++++++
 drivers/net/bna/bnad.c              | 3270 +++++++++++++++++++++++++++
 drivers/net/bna/bnad.h              |  334 +++
 drivers/net/bna/bnad_ethtool.c      | 1282 +++++++++++
 drivers/net/bna/cna.h               |   81 +
 drivers/net/bna/cna_fwimg.c         |   64 +
 include/linux/pci_ids.h             |    3 +
 30 files changed, 21976 insertions(+)
 create mode 100644 drivers/net/bna/Makefile
 create mode 100644 drivers/net/bna/bfa_cee.c
 create mode 100644 drivers/net/bna/bfa_cee.h
 create mode 100644 drivers/net/bna/bfa_defs.h
 create mode 100644 drivers/net/bna/bfa_defs_cna.h
 create mode 100644 drivers/net/bna/bfa_defs_mfg_comm.h
 create mode 100644 drivers/net/bna/bfa_defs_status.h
 create mode 100644 drivers/net/bna/bfa_ioc.c
 create mode 100644 drivers/net/bna/bfa_ioc.h
 create mode 100644 drivers/net/bna/bfa_ioc_ct.c
 create mode 100644 drivers/net/bna/bfa_sm.h
 create mode 100644 drivers/net/bna/bfa_wc.h
 create mode 100644 drivers/net/bna/bfi.h
 create mode 100644 drivers/net/bna/bfi_cna.h
 create mode 100644 drivers/net/bna/bfi_ctreg.h
 create mode 100644 drivers/net/bna/bfi_ll.h
 create mode 100644 drivers/net/bna/bna.h
 create mode 100644 drivers/net/bna/bna_ctrl.c
 create mode 100644 drivers/net/bna/bna_hw.h
 create mode 100644 drivers/net/bna/bna_txrx.c
 create mode 100644 drivers/net/bna/bna_types.h
 create mode 100644 drivers/net/bna/bnad.c
 create mode 100644 drivers/net/bna/bnad.h
 create mode 100644 drivers/net/bna/bnad_ethtool.c
 create mode 100644 drivers/net/bna/cna.h
 create mode 100644 drivers/net/bna/cna_fwimg.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 43c9efcd8e10..93198c1980a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1387,6 +1387,13 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/bfa/
 
+BROCADE BNA 10 GIGABIT ETHERNET DRIVER
+M:	Rasesh Mody <rmody@brocade.com>
+M:	Debashis Dutt <ddutt@brocade.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/bna/
+
 BSG (block layer generic sg v4 driver)
 M:	FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
 L:	linux-scsi@vger.kernel.org
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 85485287192a..53c4810b119e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2869,6 +2869,20 @@ config QLGE
 	  To compile this driver as a module, choose M here: the module
 	  will be called qlge.
 
+config BNA
+        tristate "Brocade 1010/1020 10Gb Ethernet Driver support"
+        depends on PCI
+        ---help---
+          This driver supports Brocade 1010/1020 10Gb CEE capable Ethernet
+          cards.
+          To compile this driver as a module, choose M here: the module
+          will be called bna.
+
+          For general information and support, go to the Brocade support
+          website at:
+
+          <http://support.brocade.com>
+
 source "drivers/net/sfc/Kconfig"
 
 source "drivers/net/benet/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index f34b3baf7ee6..18a277709a2a 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_ENIC) += enic/
 obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_BE2NET) += benet/
 obj-$(CONFIG_VMXNET3) += vmxnet3/
+obj-$(CONFIG_BNA) += bna/
 
 gianfar_driver-objs := gianfar.o \
 		gianfar_ethtool.o \
diff --git a/drivers/net/bna/Makefile b/drivers/net/bna/Makefile
new file mode 100644
index 000000000000..a5d604de7fea
--- /dev/null
+++ b/drivers/net/bna/Makefile
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+# All rights reserved.
+#
+
+obj-$(CONFIG_BNA) += bna.o
+
+bna-objs := bnad.o bnad_ethtool.o bna_ctrl.o bna_txrx.o
+bna-objs += bfa_ioc.o bfa_ioc_ct.o bfa_cee.o cna_fwimg.o
+
+EXTRA_CFLAGS := -Idrivers/net/bna
diff --git a/drivers/net/bna/bfa_cee.c b/drivers/net/bna/bfa_cee.c
new file mode 100644
index 000000000000..1545fc9720f8
--- /dev/null
+++ b/drivers/net/bna/bfa_cee.c
@@ -0,0 +1,407 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#include "bfa_defs_cna.h"
+#include "cna.h"
+#include "bfa_cee.h"
+#include "bfi_cna.h"
+#include "bfa_ioc.h"
+
+#define bfa_ioc_portid(__ioc) ((__ioc)->port_id)
+#define bfa_lpuid(__arg) bfa_ioc_portid(&(__arg)->ioc)
+
+static void bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg);
+static void bfa_cee_format_cee_cfg(void *buffer);
+
+static void
+bfa_cee_format_cee_cfg(void *buffer)
+{
+	struct bfa_cee_attr *cee_cfg = buffer;
+	bfa_cee_format_lldp_cfg(&cee_cfg->lldp_remote);
+}
+
+static void
+bfa_cee_stats_swap(struct bfa_cee_stats *stats)
+{
+	u32 *buffer = (u32 *)stats;
+	int i;
+
+	for (i = 0; i < (sizeof(struct bfa_cee_stats) / sizeof(u32));
+		i++) {
+		buffer[i] = ntohl(buffer[i]);
+	}
+}
+
+static void
+bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg)
+{
+	lldp_cfg->time_to_live =
+			ntohs(lldp_cfg->time_to_live);
+	lldp_cfg->enabled_system_cap =
+			ntohs(lldp_cfg->enabled_system_cap);
+}
+
+/**
+ * bfa_cee_attr_meminfo()
+ *
+ * @brief Returns the size of the DMA memory needed by CEE attributes
+ *
+ * @param[in] void
+ *
+ * @return Size of DMA region
+ */
+static u32
+bfa_cee_attr_meminfo(void)
+{
+	return roundup(sizeof(struct bfa_cee_attr), BFA_DMA_ALIGN_SZ);
+}
+/**
+ * bfa_cee_stats_meminfo()
+ *
+ * @brief Returns the size of the DMA memory needed by CEE stats
+ *
+ * @param[in] void
+ *
+ * @return Size of DMA region
+ */
+static u32
+bfa_cee_stats_meminfo(void)
+{
+	return roundup(sizeof(struct bfa_cee_stats), BFA_DMA_ALIGN_SZ);
+}
+
+/**
+ * bfa_cee_get_attr_isr()
+ *
+ * @brief CEE ISR for get-attributes responses from f/w
+ *
+ * @param[in] cee - Pointer to the CEE module
+ *            status - Return status from the f/w
+ *
+ * @return void
+ */
+static void
+bfa_cee_get_attr_isr(struct bfa_cee *cee, enum bfa_status status)
+{
+	cee->get_attr_status = status;
+	if (status == BFA_STATUS_OK) {
+		memcpy(cee->attr, cee->attr_dma.kva,
+		    sizeof(struct bfa_cee_attr));
+		bfa_cee_format_cee_cfg(cee->attr);
+	}
+	cee->get_attr_pending = false;
+	if (cee->cbfn.get_attr_cbfn)
+		cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg, status);
+}
+
+/**
+ * bfa_cee_get_attr_isr()
+ *
+ * @brief CEE ISR for get-stats responses from f/w
+ *
+ * @param[in] cee - Pointer to the CEE module
+ *            status - Return status from the f/w
+ *
+ * @return void
+ */
+static void
+bfa_cee_get_stats_isr(struct bfa_cee *cee, enum bfa_status status)
+{
+	cee->get_stats_status = status;
+	if (status == BFA_STATUS_OK) {
+		memcpy(cee->stats, cee->stats_dma.kva,
+			sizeof(struct bfa_cee_stats));
+		bfa_cee_stats_swap(cee->stats);
+	}
+	cee->get_stats_pending = false;
+	if (cee->cbfn.get_stats_cbfn)
+		cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg, status);
+}
+
+/**
+ * bfa_cee_get_attr_isr()
+ *
+ * @brief CEE ISR for reset-stats responses from f/w
+ *
+ * @param[in] cee - Pointer to the CEE module
+ *            status - Return status from the f/w
+ *
+ * @return void
+ */
+static void
+bfa_cee_reset_stats_isr(struct bfa_cee *cee, enum bfa_status status)
+{
+	cee->reset_stats_status = status;
+	cee->reset_stats_pending = false;
+	if (cee->cbfn.reset_stats_cbfn)
+		cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg, status);
+}
+/**
+ * bfa_cee_meminfo()
+ *
+ * @brief Returns the size of the DMA memory needed by CEE module
+ *
+ * @param[in] void
+ *
+ * @return Size of DMA region
+ */
+u32
+bfa_cee_meminfo(void)
+{
+	return bfa_cee_attr_meminfo() + bfa_cee_stats_meminfo();
+}
+
+/**
+ * bfa_cee_mem_claim()
+ *
+ * @brief Initialized CEE DMA Memory
+ *
+ * @param[in] cee CEE module pointer
+ *	      dma_kva Kernel Virtual Address of CEE DMA Memory
+ *	      dma_pa  Physical Address of CEE DMA Memory
+ *
+ * @return void
+ */
+void
+bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, u64 dma_pa)
+{
+	cee->attr_dma.kva = dma_kva;
+	cee->attr_dma.pa = dma_pa;
+	cee->stats_dma.kva = dma_kva + bfa_cee_attr_meminfo();
+	cee->stats_dma.pa = dma_pa + bfa_cee_attr_meminfo();
+	cee->attr = (struct bfa_cee_attr *) dma_kva;
+	cee->stats = (struct bfa_cee_stats *)
+		(dma_kva + bfa_cee_attr_meminfo());
+}
+
+/**
+ * bfa_cee_get_attr()
+ *
+ * @brief
+ *   Send the request to the f/w to fetch CEE attributes.
+ *
+ * @param[in] Pointer to the CEE module data structure.
+ *
+ * @return Status
+ */
+
+enum bfa_status
+bfa_cee_get_attr(struct bfa_cee *cee, struct bfa_cee_attr *attr,
+		     bfa_cee_get_attr_cbfn_t cbfn, void *cbarg)
+{
+	struct bfi_cee_get_req *cmd;
+
+	BUG_ON(!((cee != NULL) && (cee->ioc != NULL)));
+	if (!bfa_ioc_is_operational(cee->ioc))
+		return BFA_STATUS_IOC_FAILURE;
+	if (cee->get_attr_pending == true)
+		return 	BFA_STATUS_DEVBUSY;
+	cee->get_attr_pending = true;
+	cmd = (struct bfi_cee_get_req *) cee->get_cfg_mb.msg;
+	cee->attr = attr;
+	cee->cbfn.get_attr_cbfn = cbfn;
+	cee->cbfn.get_attr_cbarg = cbarg;
+	bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_CFG_REQ,
+	    bfa_ioc_portid(cee->ioc));
+	bfa_dma_be_addr_set(cmd->dma_addr, cee->attr_dma.pa);
+	bfa_ioc_mbox_queue(cee->ioc, &cee->get_cfg_mb);
+
+	return BFA_STATUS_OK;
+}
+
+/**
+ * bfa_cee_get_stats()
+ *
+ * @brief
+ *   Send the request to the f/w to fetch CEE statistics.
+ *
+ * @param[in] Pointer to the CEE module data structure.
+ *
+ * @return Status
+ */
+
+enum bfa_status
+bfa_cee_get_stats(struct bfa_cee *cee, struct bfa_cee_stats *stats,
+		      bfa_cee_get_stats_cbfn_t cbfn, void *cbarg)
+{
+	struct bfi_cee_get_req *cmd;
+
+	BUG_ON(!((cee != NULL) && (cee->ioc != NULL)));
+
+	if (!bfa_ioc_is_operational(cee->ioc))
+		return BFA_STATUS_IOC_FAILURE;
+	if (cee->get_stats_pending == true)
+		return 	BFA_STATUS_DEVBUSY;
+	cee->get_stats_pending = true;
+	cmd = (struct bfi_cee_get_req *) cee->get_stats_mb.msg;
+	cee->stats = stats;
+	cee->cbfn.get_stats_cbfn = cbfn;
+	cee->cbfn.get_stats_cbarg = cbarg;
+	bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_STATS_REQ,
+	    bfa_ioc_portid(cee->ioc));
+	bfa_dma_be_addr_set(cmd->dma_addr, cee->stats_dma.pa);
+	bfa_ioc_mbox_queue(cee->ioc, &cee->get_stats_mb);
+
+	return BFA_STATUS_OK;
+}
+
+/**
+ * bfa_cee_reset_stats()
+ *
+ * @brief Clears CEE Stats in the f/w.
+ *
+ * @param[in] Pointer to the CEE module data structure.
+ *
+ * @return Status
+ */
+
+enum bfa_status
+bfa_cee_reset_stats(struct bfa_cee *cee, bfa_cee_reset_stats_cbfn_t cbfn,
+			void *cbarg)
+{
+	struct bfi_cee_reset_stats *cmd;
+
+	BUG_ON(!((cee != NULL) && (cee->ioc != NULL)));
+	if (!bfa_ioc_is_operational(cee->ioc))
+		return BFA_STATUS_IOC_FAILURE;
+	if (cee->reset_stats_pending == true)
+		return 	BFA_STATUS_DEVBUSY;
+	cee->reset_stats_pending = true;
+	cmd = (struct bfi_cee_reset_stats *) cee->reset_stats_mb.msg;
+	cee->cbfn.reset_stats_cbfn = cbfn;
+	cee->cbfn.reset_stats_cbarg = cbarg;
+	bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_RESET_STATS,
+	    bfa_ioc_portid(cee->ioc));
+	bfa_ioc_mbox_queue(cee->ioc, &cee->reset_stats_mb);
+	return BFA_STATUS_OK;
+}
+
+/**
+ * bfa_cee_isrs()
+ *
+ * @brief Handles Mail-box interrupts for CEE module.
+ *
+ * @param[in] Pointer to the CEE module data structure.
+ *
+ * @return void
+ */
+
+void
+bfa_cee_isr(void *cbarg, struct bfi_mbmsg *m)
+{
+	union bfi_cee_i2h_msg_u *msg;
+	struct bfi_cee_get_rsp *get_rsp;
+	struct bfa_cee *cee = (struct bfa_cee *) cbarg;
+	msg = (union bfi_cee_i2h_msg_u *) m;
+	get_rsp = (struct bfi_cee_get_rsp *) m;
+	switch (msg->mh.msg_id) {
+	case BFI_CEE_I2H_GET_CFG_RSP:
+		bfa_cee_get_attr_isr(cee, get_rsp->cmd_status);
+		break;
+	case BFI_CEE_I2H_GET_STATS_RSP:
+		bfa_cee_get_stats_isr(cee, get_rsp->cmd_status);
+		break;
+	case BFI_CEE_I2H_RESET_STATS_RSP:
+		bfa_cee_reset_stats_isr(cee, get_rsp->cmd_status);
+		break;
+	default:
+		BUG_ON(1);
+	}
+}
+
+/**
+ * bfa_cee_hbfail()
+ *
+ * @brief CEE module heart-beat failure handler.
+ *
+ * @param[in] Pointer to the CEE module data structure.
+ *
+ * @return void
+ */
+
+void
+bfa_cee_hbfail(void *arg)
+{
+	struct bfa_cee *cee;
+	cee = (struct bfa_cee *) arg;
+
+	if (cee->get_attr_pending == true) {
+		cee->get_attr_status = BFA_STATUS_FAILED;
+		cee->get_attr_pending  = false;
+		if (cee->cbfn.get_attr_cbfn) {
+			cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg,
+			    BFA_STATUS_FAILED);
+		}
+	}
+	if (cee->get_stats_pending == true) {
+		cee->get_stats_status = BFA_STATUS_FAILED;
+		cee->get_stats_pending  = false;
+		if (cee->cbfn.get_stats_cbfn) {
+			cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg,
+			    BFA_STATUS_FAILED);
+		}
+	}
+	if (cee->reset_stats_pending == true) {
+		cee->reset_stats_status = BFA_STATUS_FAILED;
+		cee->reset_stats_pending  = false;
+		if (cee->cbfn.reset_stats_cbfn) {
+			cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg,
+			    BFA_STATUS_FAILED);
+		}
+	}
+}
+
+/**
+ * bfa_cee_attach()
+ *
+ * @brief CEE module-attach API
+ *
+ * @param[in] cee - Pointer to the CEE module data structure
+ *            ioc - Pointer to the ioc module data structure
+ *            dev - Pointer to the device driver module data structure
+ *                  The device driver specific mbox ISR functions have
+ *                  this pointer as one of the parameters.
+ *
+ * @return void
+ */
+void
+bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc,
+		void *dev)
+{
+	BUG_ON(!(cee != NULL));
+	cee->dev = dev;
+	cee->ioc = ioc;
+
+	bfa_ioc_mbox_regisr(cee->ioc, BFI_MC_CEE, bfa_cee_isr, cee);
+	bfa_ioc_hbfail_init(&cee->hbfail, bfa_cee_hbfail, cee);
+	bfa_ioc_hbfail_register(cee->ioc, &cee->hbfail);
+}
+
+/**
+ * bfa_cee_detach()
+ *
+ * @brief CEE module-detach API
+ *
+ * @param[in] cee - Pointer to the CEE module data structure
+ *
+ * @return void
+ */
+void
+bfa_cee_detach(struct bfa_cee *cee)
+{
+}
diff --git a/drivers/net/bna/bfa_cee.h b/drivers/net/bna/bfa_cee.h
new file mode 100644
index 000000000000..1208cadeceed
--- /dev/null
+++ b/drivers/net/bna/bfa_cee.h
@@ -0,0 +1,72 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __BFA_CEE_H__
+#define __BFA_CEE_H__
+
+#include "bfa_defs_cna.h"
+#include "bfa_ioc.h"
+
+typedef void (*bfa_cee_get_attr_cbfn_t) (void *dev, enum bfa_status status);
+typedef void (*bfa_cee_get_stats_cbfn_t) (void *dev, enum bfa_status status);
+typedef void (*bfa_cee_reset_stats_cbfn_t) (void *dev, enum bfa_status status);
+typedef void (*bfa_cee_hbfail_cbfn_t) (void *dev, enum bfa_status status);
+
+struct bfa_cee_cbfn {
+	bfa_cee_get_attr_cbfn_t    get_attr_cbfn;
+	void *get_attr_cbarg;
+	bfa_cee_get_stats_cbfn_t   get_stats_cbfn;
+	void *get_stats_cbarg;
+	bfa_cee_reset_stats_cbfn_t reset_stats_cbfn;
+	void *reset_stats_cbarg;
+};
+
+struct bfa_cee {
+	void *dev;
+	bool get_attr_pending;
+	bool get_stats_pending;
+	bool reset_stats_pending;
+	enum bfa_status get_attr_status;
+	enum bfa_status get_stats_status;
+	enum bfa_status reset_stats_status;
+	struct bfa_cee_cbfn cbfn;
+	struct bfa_ioc_hbfail_notify hbfail;
+	struct bfa_cee_attr *attr;
+	struct bfa_cee_stats *stats;
+	struct bfa_dma attr_dma;
+	struct bfa_dma stats_dma;
+	struct bfa_ioc *ioc;
+	struct bfa_mbox_cmd get_cfg_mb;
+	struct bfa_mbox_cmd get_stats_mb;
+	struct bfa_mbox_cmd reset_stats_mb;
+};
+
+u32 bfa_cee_meminfo(void);
+void bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva,
+	u64 dma_pa);
+void bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc, void *dev);
+void bfa_cee_detach(struct bfa_cee *cee);
+enum bfa_status bfa_cee_get_attr(struct bfa_cee *cee,
+	struct bfa_cee_attr *attr, bfa_cee_get_attr_cbfn_t cbfn, void *cbarg);
+enum bfa_status bfa_cee_get_stats(struct bfa_cee *cee,
+	struct bfa_cee_stats *stats, bfa_cee_get_stats_cbfn_t cbfn,
+	void *cbarg);
+enum bfa_status bfa_cee_reset_stats(struct bfa_cee *cee,
+	bfa_cee_reset_stats_cbfn_t cbfn, void *cbarg);
+
+#endif /* __BFA_CEE_H__ */
diff --git a/drivers/net/bna/bfa_defs.h b/drivers/net/bna/bfa_defs.h
new file mode 100644
index 000000000000..29c1b8de2c2d
--- /dev/null
+++ b/drivers/net/bna/bfa_defs.h
@@ -0,0 +1,243 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __BFA_DEFS_H__
+#define __BFA_DEFS_H__
+
+#include "cna.h"
+#include "bfa_defs_status.h"
+#include "bfa_defs_mfg_comm.h"
+
+#define BFA_STRING_32	32
+#define BFA_VERSION_LEN 64
+
+/**
+ * ---------------------- adapter definitions ------------
+ */
+
+/**
+ * BFA adapter level attributes.
+ */
+enum {
+	BFA_ADAPTER_SERIAL_NUM_LEN = STRSZ(BFA_MFG_SERIALNUM_SIZE),
+					/*
+					 *!< adapter serial num length
+					 */
+	BFA_ADAPTER_MODEL_NAME_LEN  = 16,  /*!< model name length */
+	BFA_ADAPTER_MODEL_DESCR_LEN = 128, /*!< model description length */
+	BFA_ADAPTER_MFG_NAME_LEN    = 8,   /*!< manufacturer name length */
+	BFA_ADAPTER_SYM_NAME_LEN    = 64,  /*!< adapter symbolic name length */
+	BFA_ADAPTER_OS_TYPE_LEN	    = 64,  /*!< adapter os type length */
+};
+
+struct bfa_adapter_attr {
+	char		manufacturer[BFA_ADAPTER_MFG_NAME_LEN];
+	char		serial_num[BFA_ADAPTER_SERIAL_NUM_LEN];
+	u32	card_type;
+	char		model[BFA_ADAPTER_MODEL_NAME_LEN];
+	char		model_descr[BFA_ADAPTER_MODEL_DESCR_LEN];
+	u64		pwwn;
+	char		node_symname[FC_SYMNAME_MAX];
+	char		hw_ver[BFA_VERSION_LEN];
+	char		fw_ver[BFA_VERSION_LEN];
+	char		optrom_ver[BFA_VERSION_LEN];
+	char		os_type[BFA_ADAPTER_OS_TYPE_LEN];
+	struct bfa_mfg_vpd vpd;
+	struct mac mac;
+
+	u8		nports;
+	u8		max_speed;
+	u8		prototype;
+	char	        asic_rev;
+
+	u8		pcie_gen;
+	u8		pcie_lanes_orig;
+	u8		pcie_lanes;
+	u8	        cna_capable;
+
+	u8		is_mezz;
+	u8		trunk_capable;
+};
+
+/**
+ * ---------------------- IOC definitions ------------
+ */
+
+enum {
+	BFA_IOC_DRIVER_LEN	= 16,
+	BFA_IOC_CHIP_REV_LEN 	= 8,
+};
+
+/**
+ * Driver and firmware versions.
+ */
+struct bfa_ioc_driver_attr {
+	char		driver[BFA_IOC_DRIVER_LEN];	/*!< driver name */
+	char		driver_ver[BFA_VERSION_LEN];	/*!< driver version */
+	char		fw_ver[BFA_VERSION_LEN];	/*!< firmware version */
+	char		bios_ver[BFA_VERSION_LEN];	/*!< bios version */
+	char		efi_ver[BFA_VERSION_LEN];	/*!< EFI version */
+	char		ob_ver[BFA_VERSION_LEN];	/*!< openboot version */
+};
+
+/**
+ * IOC PCI device attributes
+ */
+struct bfa_ioc_pci_attr {
+	u16	vendor_id;	/*!< PCI vendor ID */
+	u16	device_id;	/*!< PCI device ID */
+	u16	ssid;		/*!< subsystem ID */
+	u16	ssvid;		/*!< subsystem vendor ID */
+	u32	pcifn;		/*!< PCI device function */
+	u32	rsvd;		/* padding */
+	char		chip_rev[BFA_IOC_CHIP_REV_LEN];	 /*!< chip revision */
+};
+
+/**
+ * IOC states
+ */
+enum bfa_ioc_state {
+	BFA_IOC_RESET		= 1,	/*!< IOC is in reset state */
+	BFA_IOC_SEMWAIT		= 2,	/*!< Waiting for IOC h/w semaphore */
+	BFA_IOC_HWINIT		= 3,	/*!< IOC h/w is being initialized */
+	BFA_IOC_GETATTR		= 4,	/*!< IOC is being configured */
+	BFA_IOC_OPERATIONAL	= 5,	/*!< IOC is operational */
+	BFA_IOC_INITFAIL	= 6,	/*!< IOC hardware failure */
+	BFA_IOC_HBFAIL		= 7,	/*!< IOC heart-beat failure */
+	BFA_IOC_DISABLING	= 8,	/*!< IOC is being disabled */
+	BFA_IOC_DISABLED	= 9,	/*!< IOC is disabled */
+	BFA_IOC_FWMISMATCH	= 10,	/*!< IOC f/w different from drivers */
+};
+
+/**
+ * IOC firmware stats
+ */
+struct bfa_fw_ioc_stats {
+	u32	enable_reqs;
+	u32	disable_reqs;
+	u32	get_attr_reqs;
+	u32	dbg_sync;
+	u32	dbg_dump;
+	u32	unknown_reqs;
+};
+
+/**
+ * IOC driver stats
+ */
+struct bfa_ioc_drv_stats {
+	u32	ioc_isrs;
+	u32	ioc_enables;
+	u32	ioc_disables;
+	u32	ioc_hbfails;
+	u32	ioc_boots;
+	u32	stats_tmos;
+	u32	hb_count;
+	u32	disable_reqs;
+	u32	enable_reqs;
+	u32	disable_replies;
+	u32	enable_replies;
+};
+
+/**
+ * IOC statistics
+ */
+struct bfa_ioc_stats {
+	struct bfa_ioc_drv_stats drv_stats; /*!< driver IOC stats */
+	struct bfa_fw_ioc_stats fw_stats;  /*!< firmware IOC stats */
+};
+
+enum bfa_ioc_type {
+	BFA_IOC_TYPE_FC		= 1,
+	BFA_IOC_TYPE_FCoE	= 2,
+	BFA_IOC_TYPE_LL		= 3,
+};
+
+/**
+ * IOC attributes returned in queries
+ */
+struct bfa_ioc_attr {
+	enum bfa_ioc_type ioc_type;
+	enum bfa_ioc_state 		state;		/*!< IOC state      */
+	struct bfa_adapter_attr adapter_attr;	/*!< HBA attributes */
+	struct bfa_ioc_driver_attr driver_attr;	/*!< driver attr    */
+	struct bfa_ioc_pci_attr pci_attr;
+	u8				port_id;	/*!< port number    */
+	u8				rsvd[7];	/*!< 64bit align    */
+};
+
+/**
+ * ---------------------- mfg definitions ------------
+ */
+
+/**
+ * Checksum size
+ */
+#define BFA_MFG_CHKSUM_SIZE			16
+
+#define BFA_MFG_PARTNUM_SIZE			14
+#define BFA_MFG_SUPPLIER_ID_SIZE		10
+#define BFA_MFG_SUPPLIER_PARTNUM_SIZE		20
+#define BFA_MFG_SUPPLIER_SERIALNUM_SIZE		20
+#define BFA_MFG_SUPPLIER_REVISION_SIZE		4
+
+#pragma pack(1)
+
+/**
+ * @brief BFA adapter manufacturing block definition.
+ *
+ * All numerical fields are in big-endian format.
+ */
+struct bfa_mfg_block {
+	u8		version;	/*!< manufacturing block version */
+	u8		mfg_sig[3];	/*!< characters 'M', 'F', 'G' */
+	u16	mfgsize;	/*!< mfg block size */
+	u16	u16_chksum;	/*!< old u16 checksum */
+	char		brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)];
+	char		brcd_partnum[STRSZ(BFA_MFG_PARTNUM_SIZE)];
+	u8		mfg_day;	/*!< manufacturing day */
+	u8		mfg_month;	/*!< manufacturing month */
+	u16	mfg_year;	/*!< manufacturing year */
+	u64		mfg_wwn;	/*!< wwn base for this adapter */
+	u8		num_wwn;	/*!< number of wwns assigned */
+	u8		mfg_speeds;	/*!< speeds allowed for this adapter */
+	u8		rsv[2];
+	char		supplier_id[STRSZ(BFA_MFG_SUPPLIER_ID_SIZE)];
+	char		supplier_partnum[STRSZ(BFA_MFG_SUPPLIER_PARTNUM_SIZE)];
+	char
+		supplier_serialnum[STRSZ(BFA_MFG_SUPPLIER_SERIALNUM_SIZE)];
+	char
+		supplier_revision[STRSZ(BFA_MFG_SUPPLIER_REVISION_SIZE)];
+	mac_t		mfg_mac;	/*!< mac address */
+	u8		num_mac;	/*!< number of mac addresses */
+	u8		rsv2;
+	u32	mfg_type;	/*!< card type */
+	u8		rsv3[108];
+	u8		md5_chksum[BFA_MFG_CHKSUM_SIZE]; /*!< md5 checksum */
+};
+
+#pragma pack()
+
+/**
+ * ---------------------- pci definitions ------------
+ */
+
+#define bfa_asic_id_ct(devid)			\
+	((devid) == PCI_DEVICE_ID_BROCADE_CT ||	\
+	(devid) == PCI_DEVICE_ID_BROCADE_CT_FC)
+
+#endif /* __BFA_DEFS_H__ */
diff --git a/drivers/net/bna/bfa_defs_cna.h b/drivers/net/bna/bfa_defs_cna.h
new file mode 100644
index 000000000000..7e0a9187bdd5
--- /dev/null
+++ b/drivers/net/bna/bfa_defs_cna.h
@@ -0,0 +1,223 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __BFA_DEFS_CNA_H__
+#define __BFA_DEFS_CNA_H__
+
+#include "bfa_defs.h"
+
+/**
+ * @brief
+ * FC physical port statistics.
+ */
+struct bfa_port_fc_stats {
+	u64	secs_reset;	/*!< Seconds since stats is reset */
+	u64	tx_frames;	/*!< Tx frames			*/
+	u64	tx_words;	/*!< Tx words			*/
+	u64	tx_lip;		/*!< Tx LIP			*/
+	u64	tx_nos;		/*!< Tx NOS			*/
+	u64	tx_ols;		/*!< Tx OLS			*/
+	u64	tx_lr;		/*!< Tx LR			*/
+	u64	tx_lrr;		/*!< Tx LRR			*/
+	u64	rx_frames;	/*!< Rx frames			*/
+	u64	rx_words;	/*!< Rx words			*/
+	u64	lip_count;	/*!< Rx LIP			*/
+	u64	nos_count;	/*!< Rx NOS			*/
+	u64	ols_count;	/*!< Rx OLS			*/
+	u64	lr_count;	/*!< Rx LR			*/
+	u64	lrr_count;	/*!< Rx LRR			*/
+	u64	invalid_crcs;	/*!< Rx CRC err frames		*/
+	u64	invalid_crc_gd_eof; /*!< Rx CRC err good EOF frames */
+	u64	undersized_frm; /*!< Rx undersized frames	*/
+	u64	oversized_frm;	/*!< Rx oversized frames	*/
+	u64	bad_eof_frm;	/*!< Rx frames with bad EOF	*/
+	u64	error_frames;	/*!< Errored frames		*/
+	u64	dropped_frames;	/*!< Dropped frames		*/
+	u64	link_failures;	/*!< Link Failure (LF) count	*/
+	u64	loss_of_syncs;	/*!< Loss of sync count		*/
+	u64	loss_of_signals; /*!< Loss of signal count	*/
+	u64	primseq_errs;	/*!< Primitive sequence protocol err. */
+	u64	bad_os_count;	/*!< Invalid ordered sets	*/
+	u64	err_enc_out;	/*!< Encoding err nonframe_8b10b */
+	u64	err_enc;	/*!< Encoding err frame_8b10b	*/
+};
+
+/**
+ * @brief
+ * Eth Physical Port statistics.
+ */
+struct bfa_port_eth_stats {
+	u64	secs_reset;	/*!< Seconds since stats is reset */
+	u64	frame_64;	/*!< Frames 64 bytes		*/
+	u64	frame_65_127;	/*!< Frames 65-127 bytes	*/
+	u64	frame_128_255;	/*!< Frames 128-255 bytes	*/
+	u64	frame_256_511;	/*!< Frames 256-511 bytes	*/
+	u64	frame_512_1023;	/*!< Frames 512-1023 bytes	*/
+	u64	frame_1024_1518; /*!< Frames 1024-1518 bytes	*/
+	u64	frame_1519_1522; /*!< Frames 1519-1522 bytes	*/
+	u64	tx_bytes;	/*!< Tx bytes			*/
+	u64	tx_packets;	 /*!< Tx packets		*/
+	u64	tx_mcast_packets; /*!< Tx multicast packets	*/
+	u64	tx_bcast_packets; /*!< Tx broadcast packets	*/
+	u64	tx_control_frame; /*!< Tx control frame		*/
+	u64	tx_drop;	/*!< Tx drops			*/
+	u64	tx_jabber;	/*!< Tx jabber			*/
+	u64	tx_fcs_error;	/*!< Tx FCS errors		*/
+	u64	tx_fragments;	/*!< Tx fragments		*/
+	u64	rx_bytes;	/*!< Rx bytes			*/
+	u64	rx_packets;	/*!< Rx packets			*/
+	u64	rx_mcast_packets; /*!< Rx multicast packets	*/
+	u64	rx_bcast_packets; /*!< Rx broadcast packets	*/
+	u64	rx_control_frames; /*!< Rx control frames	*/
+	u64	rx_unknown_opcode; /*!< Rx unknown opcode	*/
+	u64	rx_drop;	/*!< Rx drops			*/
+	u64	rx_jabber;	/*!< Rx jabber			*/
+	u64	rx_fcs_error;	/*!< Rx FCS errors		*/
+	u64	rx_alignment_error; /*!< Rx alignment errors	*/
+	u64	rx_frame_length_error; /*!< Rx frame len errors	*/
+	u64	rx_code_error;	/*!< Rx code errors		*/
+	u64	rx_fragments;	/*!< Rx fragments		*/
+	u64	rx_pause;	/*!< Rx pause			*/
+	u64	rx_zero_pause;	/*!< Rx zero pause		*/
+	u64	tx_pause;	/*!< Tx pause			*/
+	u64	tx_zero_pause;	/*!< Tx zero pause		*/
+	u64	rx_fcoe_pause;	/*!< Rx FCoE pause		*/
+	u64	rx_fcoe_zero_pause; /*!< Rx FCoE zero pause	*/
+	u64	tx_fcoe_pause;	/*!< Tx FCoE pause		*/
+	u64	tx_fcoe_zero_pause; /*!< Tx FCoE zero pause	*/
+};
+
+/**
+ * @brief
+ *		Port statistics.
+ */
+union bfa_port_stats_u {
+	struct bfa_port_fc_stats fc;
+	struct bfa_port_eth_stats eth;
+};
+
+#pragma pack(1)
+
+#define BFA_CEE_LLDP_MAX_STRING_LEN (128)
+#define BFA_CEE_DCBX_MAX_PRIORITY	(8)
+#define BFA_CEE_DCBX_MAX_PGID		(8)
+
+#define BFA_CEE_LLDP_SYS_CAP_OTHER	0x0001
+#define BFA_CEE_LLDP_SYS_CAP_REPEATER	0x0002
+#define BFA_CEE_LLDP_SYS_CAP_MAC_BRIDGE	0x0004
+#define BFA_CEE_LLDP_SYS_CAP_WLAN_AP	0x0008
+#define BFA_CEE_LLDP_SYS_CAP_ROUTER	0x0010
+#define BFA_CEE_LLDP_SYS_CAP_TELEPHONE	0x0020
+#define BFA_CEE_LLDP_SYS_CAP_DOCSIS_CD	0x0040
+#define BFA_CEE_LLDP_SYS_CAP_STATION	0x0080
+#define BFA_CEE_LLDP_SYS_CAP_CVLAN	0x0100
+#define BFA_CEE_LLDP_SYS_CAP_SVLAN	0x0200
+#define BFA_CEE_LLDP_SYS_CAP_TPMR	0x0400
+
+/* LLDP string type */
+struct bfa_cee_lldp_str {
+	u8 sub_type;
+	u8 len;
+	u8 rsvd[2];
+	u8 value[BFA_CEE_LLDP_MAX_STRING_LEN];
+};
+
+/* LLDP paramters */
+struct bfa_cee_lldp_cfg {
+	struct bfa_cee_lldp_str chassis_id;
+	struct bfa_cee_lldp_str port_id;
+	struct bfa_cee_lldp_str port_desc;
+	struct bfa_cee_lldp_str sys_name;
+	struct bfa_cee_lldp_str sys_desc;
+	struct bfa_cee_lldp_str mgmt_addr;
+	u16 time_to_live;
+	u16 enabled_system_cap;
+};
+
+enum bfa_cee_dcbx_version {
+	DCBX_PROTOCOL_PRECEE	= 1,
+	DCBX_PROTOCOL_CEE	= 2,
+};
+
+enum bfa_cee_lls {
+	/* LLS is down because the TLV not sent by the peer */
+	CEE_LLS_DOWN_NO_TLV = 0,
+	/* LLS is down as advertised by the peer */
+	CEE_LLS_DOWN	= 1,
+	CEE_LLS_UP	= 2,
+};
+
+/* CEE/DCBX parameters */
+struct bfa_cee_dcbx_cfg {
+	u8 pgid[BFA_CEE_DCBX_MAX_PRIORITY];
+	u8 pg_percentage[BFA_CEE_DCBX_MAX_PGID];
+	u8 pfc_primap; /* bitmap of priorties with PFC enabled */
+	u8 fcoe_primap; /* bitmap of priorities used for FcoE traffic */
+	u8 iscsi_primap; /* bitmap of priorities used for iSCSI traffic */
+	u8 dcbx_version; /* operating version:CEE or preCEE */
+	u8 lls_fcoe; /* FCoE Logical Link Status */
+	u8 lls_lan; /* LAN Logical Link Status */
+	u8 rsvd[2];
+};
+
+/* CEE status */
+/* Making this to tri-state for the benefit of port list command */
+enum bfa_cee_status {
+	CEE_UP = 0,
+	CEE_PHY_UP = 1,
+	CEE_LOOPBACK = 2,
+	CEE_PHY_DOWN = 3,
+};
+
+/* CEE Query */
+struct bfa_cee_attr {
+	u8	cee_status;
+	u8 error_reason;
+	struct bfa_cee_lldp_cfg lldp_remote;
+	struct bfa_cee_dcbx_cfg dcbx_remote;
+	mac_t src_mac;
+	u8 link_speed;
+	u8 nw_priority;
+	u8 filler[2];
+};
+
+/* LLDP/DCBX/CEE Statistics */
+struct bfa_cee_stats {
+	u32	lldp_tx_frames;		/*!< LLDP Tx Frames */
+	u32	lldp_rx_frames;		/*!< LLDP Rx Frames */
+	u32	lldp_rx_frames_invalid;	/*!< LLDP Rx Frames invalid */
+	u32	lldp_rx_frames_new;	/*!< LLDP Rx Frames new */
+	u32	lldp_tlvs_unrecognized;	/*!< LLDP Rx unrecognized TLVs */
+	u32	lldp_rx_shutdown_tlvs;	/*!< LLDP Rx shutdown TLVs */
+	u32	lldp_info_aged_out;	/*!< LLDP remote info aged out */
+	u32	dcbx_phylink_ups;	/*!< DCBX phy link ups */
+	u32	dcbx_phylink_downs;	/*!< DCBX phy link downs */
+	u32	dcbx_rx_tlvs;		/*!< DCBX Rx TLVs */
+	u32	dcbx_rx_tlvs_invalid;	/*!< DCBX Rx TLVs invalid */
+	u32	dcbx_control_tlv_error;	/*!< DCBX control TLV errors */
+	u32	dcbx_feature_tlv_error;	/*!< DCBX feature TLV errors */
+	u32	dcbx_cee_cfg_new;	/*!< DCBX new CEE cfg rcvd */
+	u32	cee_status_down;	/*!< CEE status down */
+	u32	cee_status_up;		/*!< CEE status up */
+	u32	cee_hw_cfg_changed;	/*!< CEE hw cfg changed */
+	u32	cee_rx_invalid_cfg;	/*!< CEE invalid cfg */
+};
+
+#pragma pack()
+
+#endif	/* __BFA_DEFS_CNA_H__ */
diff --git a/drivers/net/bna/bfa_defs_mfg_comm.h b/drivers/net/bna/bfa_defs_mfg_comm.h
new file mode 100644
index 000000000000..987978fcb3fe
--- /dev/null
+++ b/drivers/net/bna/bfa_defs_mfg_comm.h
@@ -0,0 +1,244 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BFA_DEFS_MFG_COMM_H__
+#define __BFA_DEFS_MFG_COMM_H__
+
+#include "cna.h"
+
+/**
+ * Manufacturing block version
+ */
+#define BFA_MFG_VERSION				2
+#define BFA_MFG_VERSION_UNINIT			0xFF
+
+/**
+ * Manufacturing block encrypted version
+ */
+#define BFA_MFG_ENC_VER				2
+
+/**
+ * Manufacturing block version 1 length
+ */
+#define BFA_MFG_VER1_LEN			128
+
+/**
+ * Manufacturing block header length
+ */
+#define BFA_MFG_HDR_LEN				4
+
+#define BFA_MFG_SERIALNUM_SIZE			11
+#define STRSZ(_n)				(((_n) + 4) & ~3)
+
+/**
+ * Manufacturing card type
+ */
+enum {
+	BFA_MFG_TYPE_CB_MAX  = 825,      /*!< Crossbow card type max	*/
+	BFA_MFG_TYPE_FC8P2   = 825,      /*!< 8G 2port FC card		*/
+	BFA_MFG_TYPE_FC8P1   = 815,      /*!< 8G 1port FC card		*/
+	BFA_MFG_TYPE_FC4P2   = 425,      /*!< 4G 2port FC card		*/
+	BFA_MFG_TYPE_FC4P1   = 415,      /*!< 4G 1port FC card		*/
+	BFA_MFG_TYPE_CNA10P2 = 1020,     /*!< 10G 2port CNA card	*/
+	BFA_MFG_TYPE_CNA10P1 = 1010,     /*!< 10G 1port CNA card	*/
+	BFA_MFG_TYPE_JAYHAWK = 804,	 /*!< Jayhawk mezz card		*/
+	BFA_MFG_TYPE_WANCHESE = 1007,	 /*!< Wanchese mezz card	*/
+	BFA_MFG_TYPE_ASTRA    = 807,	 /*!< Astra mezz card		*/
+	BFA_MFG_TYPE_LIGHTNING_P0 = 902, /*!< Lightning mezz card - old	*/
+	BFA_MFG_TYPE_LIGHTNING = 1741,	 /*!< Lightning mezz card	*/
+	BFA_MFG_TYPE_INVALID = 0,	 /*!< Invalid card type		*/
+};
+
+#pragma pack(1)
+
+/**
+ * Check if 1-port card
+ */
+#define bfa_mfg_is_1port(type) (( \
+	(type) == BFA_MFG_TYPE_FC8P1 || \
+	(type) == BFA_MFG_TYPE_FC4P1 || \
+	(type) == BFA_MFG_TYPE_CNA10P1))
+
+/**
+ * Check if Mezz card
+ */
+#define bfa_mfg_is_mezz(type) (( \
+	(type) == BFA_MFG_TYPE_JAYHAWK || \
+	(type) == BFA_MFG_TYPE_WANCHESE || \
+	(type) == BFA_MFG_TYPE_ASTRA || \
+	(type) == BFA_MFG_TYPE_LIGHTNING_P0 || \
+	(type) == BFA_MFG_TYPE_LIGHTNING))
+
+/**
+ * Check if card type valid
+ */
+#define bfa_mfg_is_card_type_valid(type) (( \
+	(type) == BFA_MFG_TYPE_FC8P2 || \
+	(type) == BFA_MFG_TYPE_FC8P1 || \
+	(type) == BFA_MFG_TYPE_FC4P2 || \
+	(type) == BFA_MFG_TYPE_FC4P1 || \
+	(type) == BFA_MFG_TYPE_CNA10P2 || \
+	(type) == BFA_MFG_TYPE_CNA10P1 || \
+	bfa_mfg_is_mezz(type)))
+
+/**
+ * Check if the card having old wwn/mac handling
+ */
+#define bfa_mfg_is_old_wwn_mac_model(type) (( \
+	(type) == BFA_MFG_TYPE_FC8P2 || \
+	(type) == BFA_MFG_TYPE_FC8P1 || \
+	(type) == BFA_MFG_TYPE_FC4P2 || \
+	(type) == BFA_MFG_TYPE_FC4P1 || \
+	(type) == BFA_MFG_TYPE_CNA10P2 || \
+	(type) == BFA_MFG_TYPE_CNA10P1 || \
+	(type) == BFA_MFG_TYPE_JAYHAWK || \
+	(type) == BFA_MFG_TYPE_WANCHESE))
+
+#define bfa_mfg_increment_wwn_mac(m, i)				\
+do {								\
+	u32 t = ((m)[0] << 16) | ((m)[1] << 8) | (m)[2];	\
+	t += (i);						\
+	(m)[0] = (t >> 16) & 0xFF;				\
+	(m)[1] = (t >> 8) & 0xFF;				\
+	(m)[2] = t & 0xFF;					\
+} while (0)
+
+#define bfa_mfg_adapter_prop_init_flash(card_type, prop)	\
+do {								\
+	switch ((card_type)) {					\
+	case BFA_MFG_TYPE_FC8P2:				\
+	case BFA_MFG_TYPE_JAYHAWK:				\
+	case BFA_MFG_TYPE_ASTRA:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 2) |		\
+			BFI_ADAPTER_SETP(SPEED, 8);		\
+		break;						\
+	case BFA_MFG_TYPE_FC8P1:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 1) |		\
+			BFI_ADAPTER_SETP(SPEED, 8);		\
+		break;						\
+	case BFA_MFG_TYPE_FC4P2:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 2) |		\
+			BFI_ADAPTER_SETP(SPEED, 4);		\
+		break;						\
+	case BFA_MFG_TYPE_FC4P1:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 1) |		\
+			BFI_ADAPTER_SETP(SPEED, 4);		\
+		break;						\
+	case BFA_MFG_TYPE_CNA10P2:				\
+	case BFA_MFG_TYPE_WANCHESE:				\
+	case BFA_MFG_TYPE_LIGHTNING_P0:				\
+	case BFA_MFG_TYPE_LIGHTNING:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 2);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 10);		\
+		break;						\
+	case BFA_MFG_TYPE_CNA10P1:				\
+		(prop) = BFI_ADAPTER_SETP(NPORTS, 1);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 10);		\
+		break;						\
+	default:						\
+		(prop) = BFI_ADAPTER_UNSUPP;			\
+	}							\
+} while (0)
+
+enum {
+	CB_GPIO_TTV	= (1),		/*!< TTV debug capable cards	*/
+	CB_GPIO_FC8P2   = (2),		/*!< 8G 2port FC card		*/
+	CB_GPIO_FC8P1   = (3),		/*!< 8G 1port FC card		*/
+	CB_GPIO_FC4P2   = (4),		/*!< 4G 2port FC card		*/
+	CB_GPIO_FC4P1   = (5),		/*!< 4G 1port FC card		*/
+	CB_GPIO_DFLY    = (6),		/*!< 8G 2port FC mezzanine card	*/
+	CB_GPIO_PROTO   = (1 << 7)	/*!< 8G 2port FC prototypes	*/
+};
+
+#define bfa_mfg_adapter_prop_init_gpio(gpio, card_type, prop)	\
+do {								\
+	if ((gpio) & CB_GPIO_PROTO) {				\
+		(prop) |= BFI_ADAPTER_PROTO;			\
+		(gpio) &= ~CB_GPIO_PROTO;			\
+	}							\
+	switch ((gpio)) {					\
+	case CB_GPIO_TTV:					\
+		(prop) |= BFI_ADAPTER_TTV;			\
+	case CB_GPIO_DFLY:					\
+	case CB_GPIO_FC8P2:					\
+		(prop) |= BFI_ADAPTER_SETP(NPORTS, 2);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 8);		\
+		(card_type) = BFA_MFG_TYPE_FC8P2;		\
+		break;						\
+	case CB_GPIO_FC8P1:					\
+		(prop) |= BFI_ADAPTER_SETP(NPORTS, 1);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 8);		\
+		(card_type) = BFA_MFG_TYPE_FC8P1;		\
+		break;						\
+	case CB_GPIO_FC4P2:					\
+		(prop) |= BFI_ADAPTER_SETP(NPORTS, 2);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 4);		\
+		(card_type) = BFA_MFG_TYPE_FC4P2;		\
+		break;						\
+	case CB_GPIO_FC4P1:					\
+		(prop) |= BFI_ADAPTER_SETP(NPORTS, 1);		\
+		(prop) |= BFI_ADAPTER_SETP(SPEED, 4);		\
+		(card_type) = BFA_MFG_TYPE_FC4P1;		\
+		break;						\
+	default:						\
+		(prop) |= BFI_ADAPTER_UNSUPP;			\
+		(card_type) = BFA_MFG_TYPE_INVALID;		\
+	}							\
+} while (0)
+
+/**
+ * VPD data length
+ */
+#define BFA_MFG_VPD_LEN			512
+#define BFA_MFG_VPD_LEN_INVALID		0
+
+#define BFA_MFG_VPD_PCI_HDR_OFF		137
+#define BFA_MFG_VPD_PCI_VER_MASK	0x07	/*!< version mask 3 bits */
+#define BFA_MFG_VPD_PCI_VDR_MASK	0xf8	/*!< vendor mask 5 bits */
+
+/**
+ * VPD vendor tag
+ */
+enum {
+	BFA_MFG_VPD_UNKNOWN	= 0,     /*!< vendor unknown 		*/
+	BFA_MFG_VPD_IBM 	= 1,     /*!< vendor IBM 		*/
+	BFA_MFG_VPD_HP  	= 2,     /*!< vendor HP  		*/
+	BFA_MFG_VPD_DELL  	= 3,     /*!< vendor DELL  		*/
+	BFA_MFG_VPD_PCI_IBM 	= 0x08,  /*!< PCI VPD IBM     		*/
+	BFA_MFG_VPD_PCI_HP  	= 0x10,  /*!< PCI VPD HP		*/
+	BFA_MFG_VPD_PCI_DELL  	= 0x20,  /*!< PCI VPD DELL		*/
+	BFA_MFG_VPD_PCI_BRCD 	= 0xf8,  /*!< PCI VPD Brocade 		*/
+};
+
+/**
+ * @brief BFA adapter flash vpd data definition.
+ *
+ * All numerical fields are in big-endian format.
+ */
+struct bfa_mfg_vpd {
+	u8		version;	/*!< vpd data version */
+	u8		vpd_sig[3];	/*!< characters 'V', 'P', 'D' */
+	u8		chksum;		/*!< u8 checksum */
+	u8		vendor;		/*!< vendor */
+	u8 	len;		/*!< vpd data length excluding header */
+	u8 	rsv;
+	u8		data[BFA_MFG_VPD_LEN];	/*!< vpd data */
+};
+
+#pragma pack()
+
+#endif /* __BFA_DEFS_MFG_H__ */
diff --git a/drivers/net/bna/bfa_defs_status.h b/drivers/net/bna/bfa_defs_status.h
new file mode 100644
index 000000000000..af951126375c
--- /dev/null
+++ b/drivers/net/bna/bfa_defs_status.h
@@ -0,0 +1,216 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BFA_DEFS_STATUS_H__
+#define __BFA_DEFS_STATUS_H__
+
+/**
+ * API status return values
+ *
+ * NOTE: The error msgs are auto generated from the comments. Only singe line
+ * comments are supported
+ */
+enum bfa_status {
+	BFA_STATUS_OK 		= 0,
+	BFA_STATUS_FAILED 	= 1,
+	BFA_STATUS_EINVAL 	= 2,
+	BFA_STATUS_ENOMEM 	= 3,
+	BFA_STATUS_ENOSYS 	= 4,
+	BFA_STATUS_ETIMER 	= 5,
+	BFA_STATUS_EPROTOCOL 	= 6,
+	BFA_STATUS_ENOFCPORTS 	= 7,
+	BFA_STATUS_NOFLASH 	= 8,
+	BFA_STATUS_BADFLASH 	= 9,
+	BFA_STATUS_SFP_UNSUPP 	= 10,
+	BFA_STATUS_UNKNOWN_VFID = 11,
+	BFA_STATUS_DATACORRUPTED = 12,
+	BFA_STATUS_DEVBUSY 	= 13,
+	BFA_STATUS_ABORTED 	= 14,
+	BFA_STATUS_NODEV 	= 15,
+	BFA_STATUS_HDMA_FAILED 	= 16,
+	BFA_STATUS_FLASH_BAD_LEN = 17,
+	BFA_STATUS_UNKNOWN_LWWN = 18,
+	BFA_STATUS_UNKNOWN_RWWN = 19,
+	BFA_STATUS_FCPT_LS_RJT 	= 20,
+	BFA_STATUS_VPORT_EXISTS = 21,
+	BFA_STATUS_VPORT_MAX 	= 22,
+	BFA_STATUS_UNSUPP_SPEED = 23,
+	BFA_STATUS_INVLD_DFSZ 	= 24,
+	BFA_STATUS_CNFG_FAILED 	= 25,
+	BFA_STATUS_CMD_NOTSUPP 	= 26,
+	BFA_STATUS_NO_ADAPTER 	= 27,
+	BFA_STATUS_LINKDOWN 	= 28,
+	BFA_STATUS_FABRIC_RJT 	= 29,
+	BFA_STATUS_UNKNOWN_VWWN = 30,
+	BFA_STATUS_NSLOGIN_FAILED = 31,
+	BFA_STATUS_NO_RPORTS 	= 32,
+	BFA_STATUS_NSQUERY_FAILED = 33,
+	BFA_STATUS_PORT_OFFLINE = 34,
+	BFA_STATUS_RPORT_OFFLINE = 35,
+	BFA_STATUS_TGTOPEN_FAILED = 36,
+	BFA_STATUS_BAD_LUNS 	= 37,
+	BFA_STATUS_IO_FAILURE 	= 38,
+	BFA_STATUS_NO_FABRIC 	= 39,
+	BFA_STATUS_EBADF 	= 40,
+	BFA_STATUS_EINTR 	= 41,
+	BFA_STATUS_EIO 		= 42,
+	BFA_STATUS_ENOTTY 	= 43,
+	BFA_STATUS_ENXIO 	= 44,
+	BFA_STATUS_EFOPEN 	= 45,
+	BFA_STATUS_VPORT_WWN_BP = 46,
+	BFA_STATUS_PORT_NOT_DISABLED = 47,
+	BFA_STATUS_BADFRMHDR 	= 48,
+	BFA_STATUS_BADFRMSZ 	= 49,
+	BFA_STATUS_MISSINGFRM 	= 50,
+	BFA_STATUS_LINKTIMEOUT 	= 51,
+	BFA_STATUS_NO_FCPIM_NEXUS = 52,
+	BFA_STATUS_CHECKSUM_FAIL = 53,
+	BFA_STATUS_GZME_FAILED 	= 54,
+	BFA_STATUS_SCSISTART_REQD = 55,
+	BFA_STATUS_IOC_FAILURE 	= 56,
+	BFA_STATUS_INVALID_WWN 	= 57,
+	BFA_STATUS_MISMATCH 	= 58,
+	BFA_STATUS_IOC_ENABLED 	= 59,
+	BFA_STATUS_ADAPTER_ENABLED = 60,
+	BFA_STATUS_IOC_NON_OP 	= 61,
+	BFA_STATUS_ADDR_MAP_FAILURE = 62,
+	BFA_STATUS_SAME_NAME 	= 63,
+	BFA_STATUS_PENDING      = 64,
+	BFA_STATUS_8G_SPD	= 65,
+	BFA_STATUS_4G_SPD	= 66,
+	BFA_STATUS_AD_IS_ENABLE = 67,
+	BFA_STATUS_EINVAL_TOV 	= 68,
+	BFA_STATUS_EINVAL_QDEPTH = 69,
+	BFA_STATUS_VERSION_FAIL = 70,
+	BFA_STATUS_DIAG_BUSY    = 71,
+	BFA_STATUS_BEACON_ON	= 72,
+	BFA_STATUS_BEACON_OFF	= 73,
+	BFA_STATUS_LBEACON_ON   = 74,
+	BFA_STATUS_LBEACON_OFF	= 75,
+	BFA_STATUS_PORT_NOT_INITED = 76,
+	BFA_STATUS_RPSC_ENABLED = 77,
+	BFA_STATUS_ENOFSAVE 	= 78,
+	BFA_STATUS_BAD_FILE		= 79,
+	BFA_STATUS_RLIM_EN		= 80,
+	BFA_STATUS_RLIM_DIS		= 81,
+	BFA_STATUS_IOC_DISABLED  = 82,
+	BFA_STATUS_ADAPTER_DISABLED  = 83,
+	BFA_STATUS_BIOS_DISABLED  = 84,
+	BFA_STATUS_AUTH_ENABLED  = 85,
+	BFA_STATUS_AUTH_DISABLED  = 86,
+	BFA_STATUS_ERROR_TRL_ENABLED  = 87,
+	BFA_STATUS_ERROR_QOS_ENABLED  = 88,
+	BFA_STATUS_NO_SFP_DEV = 89,
+	BFA_STATUS_MEMTEST_FAILED = 90,
+	BFA_STATUS_INVALID_DEVID = 91,
+	BFA_STATUS_QOS_ENABLED = 92,
+	BFA_STATUS_QOS_DISABLED = 93,
+	BFA_STATUS_INCORRECT_DRV_CONFIG = 94,
+	BFA_STATUS_REG_FAIL = 95,
+	BFA_STATUS_IM_INV_CODE = 96,
+	BFA_STATUS_IM_INV_VLAN = 97,
+	BFA_STATUS_IM_INV_ADAPT_NAME = 98,
+	BFA_STATUS_IM_LOW_RESOURCES = 99,
+	BFA_STATUS_IM_VLANID_IS_PVID = 100,
+	BFA_STATUS_IM_VLANID_EXISTS = 101,
+	BFA_STATUS_IM_FW_UPDATE_FAIL = 102,
+	BFA_STATUS_PORTLOG_ENABLED = 103,
+	BFA_STATUS_PORTLOG_DISABLED = 104,
+	BFA_STATUS_FILE_NOT_FOUND = 105,
+	BFA_STATUS_QOS_FC_ONLY = 106,
+	BFA_STATUS_RLIM_FC_ONLY = 107,
+	BFA_STATUS_CT_SPD = 108,
+	BFA_STATUS_LEDTEST_OP = 109,
+	BFA_STATUS_CEE_NOT_DN = 110,
+	BFA_STATUS_10G_SPD = 111,
+	BFA_STATUS_IM_INV_TEAM_NAME = 112,
+	BFA_STATUS_IM_DUP_TEAM_NAME = 113,
+	BFA_STATUS_IM_ADAPT_ALREADY_IN_TEAM = 114,
+	BFA_STATUS_IM_ADAPT_HAS_VLANS = 115,
+	BFA_STATUS_IM_PVID_MISMATCH = 116,
+	BFA_STATUS_IM_LINK_SPEED_MISMATCH = 117,
+	BFA_STATUS_IM_MTU_MISMATCH = 118,
+	BFA_STATUS_IM_RSS_MISMATCH = 119,
+	BFA_STATUS_IM_HDS_MISMATCH = 120,
+	BFA_STATUS_IM_OFFLOAD_MISMATCH = 121,
+	BFA_STATUS_IM_PORT_PARAMS = 122,
+	BFA_STATUS_IM_PORT_NOT_IN_TEAM = 123,
+	BFA_STATUS_IM_CANNOT_REM_PRI = 124,
+	BFA_STATUS_IM_MAX_PORTS_REACHED = 125,
+	BFA_STATUS_IM_LAST_PORT_DELETE = 126,
+	BFA_STATUS_IM_NO_DRIVER = 127,
+	BFA_STATUS_IM_MAX_VLANS_REACHED = 128,
+	BFA_STATUS_TOMCAT_SPD_NOT_ALLOWED = 129,
+	BFA_STATUS_NO_MINPORT_DRIVER = 130,
+	BFA_STATUS_CARD_TYPE_MISMATCH = 131,
+	BFA_STATUS_BAD_ASICBLK = 132,
+	BFA_STATUS_NO_DRIVER = 133,
+	BFA_STATUS_INVALID_MAC = 134,
+	BFA_STATUS_IM_NO_VLAN = 135,
+	BFA_STATUS_IM_ETH_LB_FAILED = 136,
+	BFA_STATUS_IM_PVID_REMOVE = 137,
+	BFA_STATUS_IM_PVID_EDIT = 138,
+	BFA_STATUS_CNA_NO_BOOT = 139,
+	BFA_STATUS_IM_PVID_NON_ZERO = 140,
+	BFA_STATUS_IM_INETCFG_LOCK_FAILED = 141,
+	BFA_STATUS_IM_GET_INETCFG_FAILED = 142,
+	BFA_STATUS_IM_NOT_BOUND = 143,
+	BFA_STATUS_INSUFFICIENT_PERMS = 144,
+	BFA_STATUS_IM_INV_VLAN_NAME = 145,
+	BFA_STATUS_CMD_NOTSUPP_CNA = 146,
+	BFA_STATUS_IM_PASSTHRU_EDIT = 147,
+	BFA_STATUS_IM_BIND_FAILED = 148,
+	BFA_STATUS_IM_UNBIND_FAILED = 149,
+	BFA_STATUS_IM_PORT_IN_TEAM = 150,
+	BFA_STATUS_IM_VLAN_NOT_FOUND = 151,
+	BFA_STATUS_IM_TEAM_NOT_FOUND = 152,
+	BFA_STATUS_IM_TEAM_CFG_NOT_ALLOWED = 153,
+	BFA_STATUS_PBC = 154,
+	BFA_STATUS_DEVID_MISSING = 155,
+	BFA_STATUS_BAD_FWCFG = 156,
+	BFA_STATUS_CREATE_FILE = 157,
+	BFA_STATUS_INVALID_VENDOR = 158,
+	BFA_STATUS_SFP_NOT_READY = 159,
+	BFA_STATUS_FLASH_UNINIT = 160,
+	BFA_STATUS_FLASH_EMPTY = 161,
+	BFA_STATUS_FLASH_CKFAIL = 162,
+	BFA_STATUS_TRUNK_UNSUPP = 163,
+	BFA_STATUS_TRUNK_ENABLED = 164,
+	BFA_STATUS_TRUNK_DISABLED  = 165,
+	BFA_STATUS_TRUNK_ERROR_TRL_ENABLED = 166,
+	BFA_STATUS_BOOT_CODE_UPDATED = 167,
+	BFA_STATUS_BOOT_VERSION = 168,
+	BFA_STATUS_CARDTYPE_MISSING = 169,
+	BFA_STATUS_INVALID_CARDTYPE = 170,
+	BFA_STATUS_NO_TOPOLOGY_FOR_CNA = 171,
+	BFA_STATUS_IM_VLAN_OVER_TEAM_DELETE_FAILED = 172,
+	BFA_STATUS_ETHBOOT_ENABLED  = 173,
+	BFA_STATUS_ETHBOOT_DISABLED  = 174,
+	BFA_STATUS_IOPROFILE_OFF = 175,
+	BFA_STATUS_NO_PORT_INSTANCE = 176,
+	BFA_STATUS_BOOT_CODE_TIMEDOUT = 177,
+	BFA_STATUS_NO_VPORT_LOCK = 178,
+	BFA_STATUS_VPORT_NO_CNFG = 179,
+	BFA_STATUS_MAX_VAL
+};
+
+enum bfa_eproto_status {
+	BFA_EPROTO_BAD_ACCEPT = 0,
+	BFA_EPROTO_UNKNOWN_RSP = 1
+};
+
+#endif /* __BFA_DEFS_STATUS_H__ */
diff --git a/drivers/net/bna/bfa_ioc.c b/drivers/net/bna/bfa_ioc.c
new file mode 100644
index 000000000000..cdc2cb1597ec
--- /dev/null
+++ b/drivers/net/bna/bfa_ioc.c
@@ -0,0 +1,1839 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#include "bfa_ioc.h"
+#include "cna.h"
+#include "bfi.h"
+#include "bfi_ctreg.h"
+#include "bfa_defs.h"
+
+/**
+ * IOC local definitions
+ */
+
+#define bfa_ioc_timer_start(__ioc)					\
+	mod_timer(&(__ioc)->ioc_timer, jiffies +	\
+			msecs_to_jiffies(BFA_IOC_TOV))
+#define bfa_ioc_timer_stop(__ioc)   del_timer(&(__ioc)->ioc_timer)
+
+#define bfa_ioc_recovery_timer_start(__ioc)				\
+	mod_timer(&(__ioc)->ioc_timer, jiffies +	\
+			msecs_to_jiffies(BFA_IOC_TOV_RECOVER))
+
+#define bfa_sem_timer_start(__ioc)					\
+	mod_timer(&(__ioc)->sem_timer, jiffies +	\
+			msecs_to_jiffies(BFA_IOC_HWSEM_TOV))
+#define bfa_sem_timer_stop(__ioc)	del_timer(&(__ioc)->sem_timer)
+
+#define bfa_hb_timer_start(__ioc)					\
+	mod_timer(&(__ioc)->hb_timer, jiffies +		\
+			msecs_to_jiffies(BFA_IOC_HB_TOV))
+#define bfa_hb_timer_stop(__ioc)	del_timer(&(__ioc)->hb_timer)
+
+/**
+ * Asic specific macros : see bfa_hw_cb.c and bfa_hw_ct.c for details.
+ */
+
+#define bfa_ioc_firmware_lock(__ioc)			\
+			((__ioc)->ioc_hwif->ioc_firmware_lock(__ioc))
+#define bfa_ioc_firmware_unlock(__ioc)			\
+			((__ioc)->ioc_hwif->ioc_firmware_unlock(__ioc))
+#define bfa_ioc_reg_init(__ioc) ((__ioc)->ioc_hwif->ioc_reg_init(__ioc))
+#define bfa_ioc_map_port(__ioc) ((__ioc)->ioc_hwif->ioc_map_port(__ioc))
+#define bfa_ioc_notify_hbfail(__ioc)			\
+			((__ioc)->ioc_hwif->ioc_notify_hbfail(__ioc))
+
+#define bfa_ioc_is_optrom(__ioc)	\
+	(bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(__ioc)) < BFA_IOC_FWIMG_MINSZ)
+
+#define bfa_ioc_mbox_cmd_pending(__ioc)		\
+			(!list_empty(&((__ioc)->mbox_mod.cmd_q)) || \
+			readl((__ioc)->ioc_regs.hfn_mbox_cmd))
+
+bool bfa_auto_recover = true;
+
+/*
+ * forward declarations
+ */
+static void bfa_ioc_hw_sem_get(struct bfa_ioc *ioc);
+static void bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc);
+static void bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force);
+static void bfa_ioc_send_enable(struct bfa_ioc *ioc);
+static void bfa_ioc_send_disable(struct bfa_ioc *ioc);
+static void bfa_ioc_send_getattr(struct bfa_ioc *ioc);
+static void bfa_ioc_hb_monitor(struct bfa_ioc *ioc);
+static void bfa_ioc_hb_stop(struct bfa_ioc *ioc);
+static void bfa_ioc_reset(struct bfa_ioc *ioc, bool force);
+static void bfa_ioc_mbox_poll(struct bfa_ioc *ioc);
+static void bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc);
+static void bfa_ioc_recover(struct bfa_ioc *ioc);
+static void bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc);
+static void bfa_ioc_disable_comp(struct bfa_ioc *ioc);
+static void bfa_ioc_lpu_stop(struct bfa_ioc *ioc);
+
+/**
+ * IOC state machine events
+ */
+enum ioc_event {
+	IOC_E_ENABLE		= 1,	/*!< IOC enable request		*/
+	IOC_E_DISABLE		= 2,	/*!< IOC disable request	*/
+	IOC_E_TIMEOUT		= 3,	/*!< f/w response timeout	*/
+	IOC_E_FWREADY		= 4,	/*!< f/w initialization done	*/
+	IOC_E_FWRSP_GETATTR	= 5,	/*!< IOC get attribute response	*/
+	IOC_E_FWRSP_ENABLE	= 6,	/*!< enable f/w response	*/
+	IOC_E_FWRSP_DISABLE	= 7,	/*!< disable f/w response	*/
+	IOC_E_HBFAIL		= 8,	/*!< heartbeat failure		*/
+	IOC_E_HWERROR		= 9,	/*!< hardware error interrupt	*/
+	IOC_E_SEMLOCKED		= 10,	/*!< h/w semaphore is locked	*/
+	IOC_E_DETACH		= 11,	/*!< driver detach cleanup	*/
+};
+
+bfa_fsm_state_decl(bfa_ioc, reset, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, fwcheck, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, mismatch, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, semwait, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, hwinit, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, enabling, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, getattr, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, op, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, initfail, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, hbfail, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, disabling, struct bfa_ioc, enum ioc_event);
+bfa_fsm_state_decl(bfa_ioc, disabled, struct bfa_ioc, enum ioc_event);
+
+static struct bfa_sm_table ioc_sm_table[] = {
+	{BFA_SM(bfa_ioc_sm_reset), BFA_IOC_RESET},
+	{BFA_SM(bfa_ioc_sm_fwcheck), BFA_IOC_FWMISMATCH},
+	{BFA_SM(bfa_ioc_sm_mismatch), BFA_IOC_FWMISMATCH},
+	{BFA_SM(bfa_ioc_sm_semwait), BFA_IOC_SEMWAIT},
+	{BFA_SM(bfa_ioc_sm_hwinit), BFA_IOC_HWINIT},
+	{BFA_SM(bfa_ioc_sm_enabling), BFA_IOC_HWINIT},
+	{BFA_SM(bfa_ioc_sm_getattr), BFA_IOC_GETATTR},
+	{BFA_SM(bfa_ioc_sm_op), BFA_IOC_OPERATIONAL},
+	{BFA_SM(bfa_ioc_sm_initfail), BFA_IOC_INITFAIL},
+	{BFA_SM(bfa_ioc_sm_hbfail), BFA_IOC_HBFAIL},
+	{BFA_SM(bfa_ioc_sm_disabling), BFA_IOC_DISABLING},
+	{BFA_SM(bfa_ioc_sm_disabled), BFA_IOC_DISABLED},
+};
+
+/**
+ * Reset entry actions -- initialize state machine
+ */
+static void
+bfa_ioc_sm_reset_entry(struct bfa_ioc *ioc)
+{
+	ioc->retry_count = 0;
+	ioc->auto_recover = bfa_auto_recover;
+}
+
+/**
+ * Beginning state. IOC is in reset state.
+ */
+static void
+bfa_ioc_sm_reset(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_ENABLE:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_disable_comp(ioc);
+		break;
+
+	case IOC_E_DETACH:
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+/**
+ * Semaphore should be acquired for version check.
+ */
+static void
+bfa_ioc_sm_fwcheck_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_hw_sem_get(ioc);
+}
+
+/**
+ * Awaiting h/w semaphore to continue with version check.
+ */
+static void
+bfa_ioc_sm_fwcheck(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_SEMLOCKED:
+		if (bfa_ioc_firmware_lock(ioc)) {
+			ioc->retry_count = 0;
+			bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit);
+		} else {
+			bfa_ioc_hw_sem_release(ioc);
+			bfa_fsm_set_state(ioc, bfa_ioc_sm_mismatch);
+		}
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_disable_comp(ioc);
+		/* fall through */
+
+	case IOC_E_DETACH:
+		bfa_ioc_hw_sem_get_cancel(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_reset);
+		break;
+
+	case IOC_E_FWREADY:
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+/**
+ * Notify enable completion callback and generate mismatch AEN.
+ */
+static void
+bfa_ioc_sm_mismatch_entry(struct bfa_ioc *ioc)
+{
+	/**
+	 * Provide enable completion callback and AEN notification only once.
+	 */
+	if (ioc->retry_count == 0)
+		ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
+	ioc->retry_count++;
+	bfa_ioc_timer_start(ioc);
+}
+
+/**
+ * Awaiting firmware version match.
+ */
+static void
+bfa_ioc_sm_mismatch(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_TIMEOUT:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_disable_comp(ioc);
+		/* fall through */
+
+	case IOC_E_DETACH:
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_reset);
+		break;
+
+	case IOC_E_FWREADY:
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+/**
+ * Request for semaphore.
+ */
+static void
+bfa_ioc_sm_semwait_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_hw_sem_get(ioc);
+}
+
+/**
+ * Awaiting semaphore for h/w initialzation.
+ */
+static void
+bfa_ioc_sm_semwait(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_SEMLOCKED:
+		ioc->retry_count = 0;
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_hw_sem_get_cancel(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_hwinit_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_timer_start(ioc);
+	bfa_ioc_reset(ioc, false);
+}
+
+/**
+ * @brief
+ * Hardware is being initialized. Interrupts are enabled.
+ * Holding hardware semaphore lock.
+ */
+static void
+bfa_ioc_sm_hwinit(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_FWREADY:
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_enabling);
+		break;
+
+	case IOC_E_HWERROR:
+		bfa_ioc_timer_stop(ioc);
+		/* fall through */
+
+	case IOC_E_TIMEOUT:
+		ioc->retry_count++;
+		if (ioc->retry_count < BFA_IOC_HWINIT_MAX) {
+			bfa_ioc_timer_start(ioc);
+			bfa_ioc_reset(ioc, true);
+			break;
+		}
+
+		bfa_ioc_hw_sem_release(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_hw_sem_release(ioc);
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_enabling_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_timer_start(ioc);
+	bfa_ioc_send_enable(ioc);
+}
+
+/**
+ * Host IOC function is being enabled, awaiting response from firmware.
+ * Semaphore is acquired.
+ */
+static void
+bfa_ioc_sm_enabling(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_FWRSP_ENABLE:
+		bfa_ioc_timer_stop(ioc);
+		bfa_ioc_hw_sem_release(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_getattr);
+		break;
+
+	case IOC_E_HWERROR:
+		bfa_ioc_timer_stop(ioc);
+		/* fall through */
+
+	case IOC_E_TIMEOUT:
+		ioc->retry_count++;
+		if (ioc->retry_count < BFA_IOC_HWINIT_MAX) {
+			writel(BFI_IOC_UNINIT,
+				      ioc->ioc_regs.ioc_fwstate);
+			bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit);
+			break;
+		}
+
+		bfa_ioc_hw_sem_release(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_timer_stop(ioc);
+		bfa_ioc_hw_sem_release(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	case IOC_E_FWREADY:
+		bfa_ioc_send_enable(ioc);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_getattr_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_timer_start(ioc);
+	bfa_ioc_send_getattr(ioc);
+}
+
+/**
+ * @brief
+ * IOC configuration in progress. Timer is active.
+ */
+static void
+bfa_ioc_sm_getattr(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_FWRSP_GETATTR:
+		bfa_ioc_timer_stop(ioc);
+		bfa_ioc_check_attr_wwns(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_op);
+		break;
+
+	case IOC_E_HWERROR:
+		bfa_ioc_timer_stop(ioc);
+		/* fall through */
+
+	case IOC_E_TIMEOUT:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail);
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_op_entry(struct bfa_ioc *ioc)
+{
+	ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK);
+	bfa_ioc_hb_monitor(ioc);
+}
+
+static void
+bfa_ioc_sm_op(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_ENABLE:
+		break;
+
+	case IOC_E_DISABLE:
+		bfa_ioc_hb_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabling);
+		break;
+
+	case IOC_E_HWERROR:
+	case IOC_E_FWREADY:
+		/**
+		 * Hard error or IOC recovery by other function.
+		 * Treat it same as heartbeat failure.
+		 */
+		bfa_ioc_hb_stop(ioc);
+		/* !!! fall through !!! */
+
+	case IOC_E_HBFAIL:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_hbfail);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_disabling_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_timer_start(ioc);
+	bfa_ioc_send_disable(ioc);
+}
+
+/**
+ * IOC is being disabled
+ */
+static void
+bfa_ioc_sm_disabling(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_FWRSP_DISABLE:
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	case IOC_E_HWERROR:
+		bfa_ioc_timer_stop(ioc);
+		/*
+		 * !!! fall through !!!
+		 */
+
+	case IOC_E_TIMEOUT:
+		writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+/**
+ * IOC disable completion entry.
+ */
+static void
+bfa_ioc_sm_disabled_entry(struct bfa_ioc *ioc)
+{
+	bfa_ioc_disable_comp(ioc);
+}
+
+static void
+bfa_ioc_sm_disabled(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_ENABLE:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait);
+		break;
+
+	case IOC_E_DISABLE:
+		ioc->cbfn->disable_cbfn(ioc->bfa);
+		break;
+
+	case IOC_E_FWREADY:
+		break;
+
+	case IOC_E_DETACH:
+		bfa_ioc_firmware_unlock(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_reset);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_initfail_entry(struct bfa_ioc *ioc)
+{
+	ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
+	bfa_ioc_timer_start(ioc);
+}
+
+/**
+ * @brief
+ * Hardware initialization failed.
+ */
+static void
+bfa_ioc_sm_initfail(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+	case IOC_E_DISABLE:
+		bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	case IOC_E_DETACH:
+		bfa_ioc_timer_stop(ioc);
+		bfa_ioc_firmware_unlock(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_reset);
+		break;
+
+	case IOC_E_TIMEOUT:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait);
+		break;
+
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+static void
+bfa_ioc_sm_hbfail_entry(struct bfa_ioc *ioc)
+{
+	struct list_head			*qe;
+	struct bfa_ioc_hbfail_notify *notify;
+
+	/**
+	 * Mark IOC as failed in hardware and stop firmware.
+	 */
+	bfa_ioc_lpu_stop(ioc);
+	writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate);
+
+	/**
+	 * Notify other functions on HB failure.
+	 */
+	bfa_ioc_notify_hbfail(ioc);
+
+	/**
+	 * Notify driver and common modules registered for notification.
+	 */
+	ioc->cbfn->hbfail_cbfn(ioc->bfa);
+	list_for_each(qe, &ioc->hb_notify_q) {
+		notify = (struct bfa_ioc_hbfail_notify *) qe;
+		notify->cbfn(notify->cbarg);
+	}
+
+	/**
+	 * Flush any queued up mailbox requests.
+	 */
+	bfa_ioc_mbox_hbfail(ioc);
+
+	/**
+	 * Trigger auto-recovery after a delay.
+	 */
+	if (ioc->auto_recover)
+		mod_timer(&ioc->ioc_timer, jiffies +
+			msecs_to_jiffies(BFA_IOC_TOV_RECOVER));
+}
+
+/**
+ * @brief
+ * IOC heartbeat failure.
+ */
+static void
+bfa_ioc_sm_hbfail(struct bfa_ioc *ioc, enum ioc_event event)
+{
+	switch (event) {
+
+	case IOC_E_ENABLE:
+		ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
+		break;
+
+	case IOC_E_DISABLE:
+		if (ioc->auto_recover)
+			bfa_ioc_timer_stop(ioc);
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled);
+		break;
+
+	case IOC_E_TIMEOUT:
+		bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait);
+		break;
+
+	case IOC_E_FWREADY:
+		/**
+		 * Recovery is already initiated by other function.
+		 */
+		break;
+
+	case IOC_E_HWERROR:
+		/*
+		 * HB failure notification, ignore.
+		 */
+		break;
+	default:
+		bfa_sm_fault(ioc, event);
+	}
+}
+
+/**
+ * BFA IOC private functions
+ */
+
+static void
+bfa_ioc_disable_comp(struct bfa_ioc *ioc)
+{
+	struct list_head			*qe;
+	struct bfa_ioc_hbfail_notify *notify;
+
+	ioc->cbfn->disable_cbfn(ioc->bfa);
+
+	/**
+	 * Notify common modules registered for notification.
+	 */
+	list_for_each(qe, &ioc->hb_notify_q) {
+		notify = (struct bfa_ioc_hbfail_notify *) qe;
+		notify->cbfn(notify->cbarg);
+	}
+}
+
+void
+bfa_ioc_sem_timeout(void *ioc_arg)
+{
+	struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg;
+
+	bfa_ioc_hw_sem_get(ioc);
+}
+
+bool
+bfa_ioc_sem_get(void __iomem *sem_reg)
+{
+	u32 r32;
+	int cnt = 0;
+#define BFA_SEM_SPINCNT	3000
+
+	r32 = readl(sem_reg);
+
+	while (r32 && (cnt < BFA_SEM_SPINCNT)) {
+		cnt++;
+		udelay(2);
+		r32 = readl(sem_reg);
+	}
+
+	if (r32 == 0)
+		return true;
+
+	BUG_ON(!(cnt < BFA_SEM_SPINCNT));
+	return false;
+}
+
+void
+bfa_ioc_sem_release(void __iomem *sem_reg)
+{
+	writel(1, sem_reg);
+}
+
+static void
+bfa_ioc_hw_sem_get(struct bfa_ioc *ioc)
+{
+	u32	r32;
+
+	/**
+	 * First read to the semaphore register will return 0, subsequent reads
+	 * will return 1. Semaphore is released by writing 1 to the register
+	 */
+	r32 = readl(ioc->ioc_regs.ioc_sem_reg);
+	if (r32 == 0) {
+		bfa_fsm_send_event(ioc, IOC_E_SEMLOCKED);
+		return;
+	}
+
+	mod_timer(&ioc->sem_timer, jiffies +
+		msecs_to_jiffies(BFA_IOC_HWSEM_TOV));
+}
+
+void
+bfa_ioc_hw_sem_release(struct bfa_ioc *ioc)
+{
+	writel(1, ioc->ioc_regs.ioc_sem_reg);
+}
+
+static void
+bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc)
+{
+	del_timer(&ioc->sem_timer);
+}
+
+/**
+ * @brief
+ * Initialize LPU local memory (aka secondary memory / SRAM)
+ */
+static void
+bfa_ioc_lmem_init(struct bfa_ioc *ioc)
+{
+	u32	pss_ctl;
+	int		i;
+#define PSS_LMEM_INIT_TIME  10000
+
+	pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg);
+	pss_ctl &= ~__PSS_LMEM_RESET;
+	pss_ctl |= __PSS_LMEM_INIT_EN;
+
+	/*
+	 * i2c workaround 12.5khz clock
+	 */
+	pss_ctl |= __PSS_I2C_CLK_DIV(3UL);
+	writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg);
+
+	/**
+	 * wait for memory initialization to be complete
+	 */
+	i = 0;
+	do {
+		pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg);
+		i++;
+	} while (!(pss_ctl & __PSS_LMEM_INIT_DONE) && (i < PSS_LMEM_INIT_TIME));
+
+	/**
+	 * If memory initialization is not successful, IOC timeout will catch
+	 * such failures.
+	 */
+	BUG_ON(!(pss_ctl & __PSS_LMEM_INIT_DONE));
+
+	pss_ctl &= ~(__PSS_LMEM_INIT_DONE | __PSS_LMEM_INIT_EN);
+	writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg);
+}
+
+static void
+bfa_ioc_lpu_start(struct bfa_ioc *ioc)
+{
+	u32	pss_ctl;
+
+	/**
+	 * Take processor out of reset.
+	 */
+	pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg);
+	pss_ctl &= ~__PSS_LPU0_RESET;
+
+	writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg);
+}
+
+static void
+bfa_ioc_lpu_stop(struct bfa_ioc *ioc)
+{
+	u32	pss_ctl;
+
+	/**
+	 * Put processors in reset.
+	 */
+	pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg);
+	pss_ctl |= (__PSS_LPU0_RESET | __PSS_LPU1_RESET);
+
+	writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg);
+}
+
+/**
+ * Get driver and firmware versions.
+ */
+void
+bfa_ioc_fwver_get(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr)
+{
+	u32	pgnum, pgoff;
+	u32	loff = 0;
+	int		i;
+	u32	*fwsig = (u32 *) fwhdr;
+
+	pgnum = bfa_ioc_smem_pgnum(ioc, loff);
+	pgoff = bfa_ioc_smem_pgoff(ioc, loff);
+	writel(pgnum, ioc->ioc_regs.host_page_num_fn);
+
+	for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32));
+	     i++) {
+		fwsig[i] =
+			swab32(readl((loff) + (ioc->ioc_regs.smem_page_start)));
+		loff += sizeof(u32);
+	}
+}
+
+/**
+ * Returns TRUE if same.
+ */
+bool
+bfa_ioc_fwver_cmp(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr)
+{
+	struct bfi_ioc_image_hdr *drv_fwhdr;
+	int i;
+
+	drv_fwhdr = (struct bfi_ioc_image_hdr *)
+		bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0);
+
+	for (i = 0; i < BFI_IOC_MD5SUM_SZ; i++) {
+		if (fwhdr->md5sum[i] != drv_fwhdr->md5sum[i])
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * Return true if current running version is valid. Firmware signature and
+ * execution context (driver/bios) must match.
+ */
+static bool
+bfa_ioc_fwver_valid(struct bfa_ioc *ioc)
+{
+	struct bfi_ioc_image_hdr fwhdr, *drv_fwhdr;
+
+	/**
+	 * If bios/efi boot (flash based) -- return true
+	 */
+	if (bfa_ioc_is_optrom(ioc))
+		return true;
+
+	bfa_ioc_fwver_get(ioc, &fwhdr);
+	drv_fwhdr = (struct bfi_ioc_image_hdr *)
+		bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0);
+
+	if (fwhdr.signature != drv_fwhdr->signature)
+		return false;
+
+	if (fwhdr.exec != drv_fwhdr->exec)
+		return false;
+
+	return bfa_ioc_fwver_cmp(ioc, &fwhdr);
+}
+
+/**
+ * Conditionally flush any pending message from firmware at start.
+ */
+static void
+bfa_ioc_msgflush(struct bfa_ioc *ioc)
+{
+	u32	r32;
+
+	r32 = readl(ioc->ioc_regs.lpu_mbox_cmd);
+	if (r32)
+		writel(1, ioc->ioc_regs.lpu_mbox_cmd);
+}
+
+/**
+ * @img ioc_init_logic.jpg
+ */
+static void
+bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force)
+{
+	enum bfi_ioc_state ioc_fwstate;
+	bool fwvalid;
+
+	ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate);
+
+	if (force)
+		ioc_fwstate = BFI_IOC_UNINIT;
+
+	/**
+	 * check if firmware is valid
+	 */
+	fwvalid = (ioc_fwstate == BFI_IOC_UNINIT) ?
+		false : bfa_ioc_fwver_valid(ioc);
+
+	if (!fwvalid) {
+		bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id);
+		return;
+	}
+
+	/**
+	 * If hardware initialization is in progress (initialized by other IOC),
+	 * just wait for an initialization completion interrupt.
+	 */
+	if (ioc_fwstate == BFI_IOC_INITING) {
+		ioc->cbfn->reset_cbfn(ioc->bfa);
+		return;
+	}
+
+	/**
+	 * If IOC function is disabled and firmware version is same,
+	 * just re-enable IOC.
+	 *
+	 * If option rom, IOC must not be in operational state. With
+	 * convergence, IOC will be in operational state when 2nd driver
+	 * is loaded.
+	 */
+	if (ioc_fwstate == BFI_IOC_DISABLED ||
+	    (!bfa_ioc_is_optrom(ioc) && ioc_fwstate == BFI_IOC_OP)) {
+		/**
+		 * When using MSI-X any pending firmware ready event should
+		 * be flushed. Otherwise MSI-X interrupts are not delivered.
+		 */
+		bfa_ioc_msgflush(ioc);
+		ioc->cbfn->reset_cbfn(ioc->bfa);
+		bfa_fsm_send_event(ioc, IOC_E_FWREADY);
+		return;
+	}
+
+	/**
+	 * Initialize the h/w for any other states.
+	 */
+	bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id);
+}
+
+void
+bfa_ioc_timeout(void *ioc_arg)
+{
+	struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg;
+
+	bfa_fsm_send_event(ioc, IOC_E_TIMEOUT);
+}
+
+void
+bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len)
+{
+	u32 *msgp = (u32 *) ioc_msg;
+	u32 i;
+
+	BUG_ON(!(len <= BFI_IOC_MSGLEN_MAX));
+
+	/*
+	 * first write msg to mailbox registers
+	 */
+	for (i = 0; i < len / sizeof(u32); i++)
+		writel(cpu_to_le32(msgp[i]),
+			      ioc->ioc_regs.hfn_mbox + i * sizeof(u32));
+
+	for (; i < BFI_IOC_MSGLEN_MAX / sizeof(u32); i++)
+		writel(0, ioc->ioc_regs.hfn_mbox + i * sizeof(u32));
+
+	/*
+	 * write 1 to mailbox CMD to trigger LPU event
+	 */
+	writel(1, ioc->ioc_regs.hfn_mbox_cmd);
+	(void) readl(ioc->ioc_regs.hfn_mbox_cmd);
+}
+
+static void
+bfa_ioc_send_enable(struct bfa_ioc *ioc)
+{
+	struct bfi_ioc_ctrl_req enable_req;
+	struct timeval tv;
+
+	bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
+		    bfa_ioc_portid(ioc));
+	enable_req.ioc_class = ioc->ioc_mc;
+	do_gettimeofday(&tv);
+	enable_req.tv_sec = ntohl(tv.tv_sec);
+	bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
+}
+
+static void
+bfa_ioc_send_disable(struct bfa_ioc *ioc)
+{
+	struct bfi_ioc_ctrl_req disable_req;
+
+	bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
+		    bfa_ioc_portid(ioc));
+	bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
+}
+
+static void
+bfa_ioc_send_getattr(struct bfa_ioc *ioc)
+{
+	struct bfi_ioc_getattr_req attr_req;
+
+	bfi_h2i_set(attr_req.mh, BFI_MC_IOC, BFI_IOC_H2I_GETATTR_REQ,
+		    bfa_ioc_portid(ioc));
+	bfa_dma_be_addr_set(attr_req.attr_addr, ioc->attr_dma.pa);
+	bfa_ioc_mbox_send(ioc, &attr_req, sizeof(attr_req));
+}
+
+void
+bfa_ioc_hb_check(void *cbarg)
+{
+	struct bfa_ioc *ioc = cbarg;
+	u32	hb_count;
+
+	hb_count = readl(ioc->ioc_regs.heartbeat);
+	if (ioc->hb_count == hb_count) {
+		pr_crit("Firmware heartbeat failure at %d", hb_count);
+		bfa_ioc_recover(ioc);
+		return;
+	} else {
+		ioc->hb_count = hb_count;
+	}
+
+	bfa_ioc_mbox_poll(ioc);
+	mod_timer(&ioc->hb_timer, jiffies +
+		msecs_to_jiffies(BFA_IOC_HB_TOV));
+}
+
+static void
+bfa_ioc_hb_monitor(struct bfa_ioc *ioc)
+{
+	ioc->hb_count = readl(ioc->ioc_regs.heartbeat);
+	mod_timer(&ioc->hb_timer, jiffies +
+		msecs_to_jiffies(BFA_IOC_HB_TOV));
+}
+
+static void
+bfa_ioc_hb_stop(struct bfa_ioc *ioc)
+{
+	del_timer(&ioc->hb_timer);
+}
+
+/**
+ * @brief
+ *	Initiate a full firmware download.
+ */
+static void
+bfa_ioc_download_fw(struct bfa_ioc *ioc, u32 boot_type,
+		    u32 boot_param)
+{
+	u32 *fwimg;
+	u32 pgnum, pgoff;
+	u32 loff = 0;
+	u32 chunkno = 0;
+	u32 i;
+
+	/**
+	 * Initialize LMEM first before code download
+	 */
+	bfa_ioc_lmem_init(ioc);
+
+	/**
+	 * Flash based firmware boot
+	 */
+	if (bfa_ioc_is_optrom(ioc))
+		boot_type = BFI_BOOT_TYPE_FLASH;
+	fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), chunkno);
+
+	pgnum = bfa_ioc_smem_pgnum(ioc, loff);
+	pgoff = bfa_ioc_smem_pgoff(ioc, loff);
+
+	writel(pgnum, ioc->ioc_regs.host_page_num_fn);
+
+	for (i = 0; i < bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)); i++) {
+		if (BFA_IOC_FLASH_CHUNK_NO(i) != chunkno) {
+			chunkno = BFA_IOC_FLASH_CHUNK_NO(i);
+			fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc),
+					BFA_IOC_FLASH_CHUNK_ADDR(chunkno));
+		}
+
+		/**
+		 * write smem
+		 */
+		writel((swab32(fwimg[BFA_IOC_FLASH_OFFSET_IN_CHUNK(i)])),
+			      ((ioc->ioc_regs.smem_page_start) + (loff)));
+
+		loff += sizeof(u32);
+
+		/**
+		 * handle page offset wrap around
+		 */
+		loff = PSS_SMEM_PGOFF(loff);
+		if (loff == 0) {
+			pgnum++;
+			writel(pgnum,
+				      ioc->ioc_regs.host_page_num_fn);
+		}
+	}
+
+	writel(bfa_ioc_smem_pgnum(ioc, 0),
+		      ioc->ioc_regs.host_page_num_fn);
+
+	/*
+	 * Set boot type and boot param at the end.
+	*/
+	writel((swab32(swab32(boot_type))), ((ioc->ioc_regs.smem_page_start)
+			+ (BFI_BOOT_TYPE_OFF)));
+	writel((swab32(swab32(boot_param))), ((ioc->ioc_regs.smem_page_start)
+			+ (BFI_BOOT_PARAM_OFF)));
+}
+
+static void
+bfa_ioc_reset(struct bfa_ioc *ioc, bool force)
+{
+	bfa_ioc_hwinit(ioc, force);
+}
+
+/**
+ * @brief
+ * Update BFA configuration from firmware configuration.
+ */
+static void
+bfa_ioc_getattr_reply(struct bfa_ioc *ioc)
+{
+	struct bfi_ioc_attr *attr = ioc->attr;
+
+	attr->adapter_prop  = ntohl(attr->adapter_prop);
+	attr->card_type     = ntohl(attr->card_type);
+	attr->maxfrsize	    = ntohs(attr->maxfrsize);
+
+	bfa_fsm_send_event(ioc, IOC_E_FWRSP_GETATTR);
+}
+
+/**
+ * Attach time initialization of mbox logic.
+ */
+static void
+bfa_ioc_mbox_attach(struct bfa_ioc *ioc)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	int	mc;
+
+	INIT_LIST_HEAD(&mod->cmd_q);
+	for (mc = 0; mc < BFI_MC_MAX; mc++) {
+		mod->mbhdlr[mc].cbfn = NULL;
+		mod->mbhdlr[mc].cbarg = ioc->bfa;
+	}
+}
+
+/**
+ * Mbox poll timer -- restarts any pending mailbox requests.
+ */
+static void
+bfa_ioc_mbox_poll(struct bfa_ioc *ioc)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	struct bfa_mbox_cmd *cmd;
+	u32			stat;
+
+	/**
+	 * If no command pending, do nothing
+	 */
+	if (list_empty(&mod->cmd_q))
+		return;
+
+	/**
+	 * If previous command is not yet fetched by firmware, do nothing
+	 */
+	stat = readl(ioc->ioc_regs.hfn_mbox_cmd);
+	if (stat)
+		return;
+
+	/**
+	 * Enqueue command to firmware.
+	 */
+	bfa_q_deq(&mod->cmd_q, &cmd);
+	bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg));
+}
+
+/**
+ * Cleanup any pending requests.
+ */
+static void
+bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	struct bfa_mbox_cmd *cmd;
+
+	while (!list_empty(&mod->cmd_q))
+		bfa_q_deq(&mod->cmd_q, &cmd);
+}
+
+/**
+ * IOC public
+ */
+enum bfa_status
+bfa_ioc_pll_init(struct bfa_ioc *ioc)
+{
+	/*
+	 *  Hold semaphore so that nobody can access the chip during init.
+	 */
+	bfa_ioc_sem_get(ioc->ioc_regs.ioc_init_sem_reg);
+
+	bfa_ioc_pll_init_asic(ioc);
+
+	ioc->pllinit = true;
+	/*
+	 *  release semaphore.
+	 */
+	bfa_ioc_sem_release(ioc->ioc_regs.ioc_init_sem_reg);
+
+	return BFA_STATUS_OK;
+}
+
+/**
+ * Interface used by diag module to do firmware boot with memory test
+ * as the entry vector.
+ */
+void
+bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, u32 boot_param)
+{
+	void __iomem *rb;
+
+	bfa_ioc_stats(ioc, ioc_boots);
+
+	if (bfa_ioc_pll_init(ioc) != BFA_STATUS_OK)
+		return;
+
+	/**
+	 * Initialize IOC state of all functions on a chip reset.
+	 */
+	rb = ioc->pcidev.pci_bar_kva;
+	if (boot_param == BFI_BOOT_TYPE_MEMTEST) {
+		writel(BFI_IOC_MEMTEST, (rb + BFA_IOC0_STATE_REG));
+		writel(BFI_IOC_MEMTEST, (rb + BFA_IOC1_STATE_REG));
+	} else {
+		writel(BFI_IOC_INITING, (rb + BFA_IOC0_STATE_REG));
+		writel(BFI_IOC_INITING, (rb + BFA_IOC1_STATE_REG));
+	}
+
+	bfa_ioc_msgflush(ioc);
+	bfa_ioc_download_fw(ioc, boot_type, boot_param);
+
+	/**
+	 * Enable interrupts just before starting LPU
+	 */
+	ioc->cbfn->reset_cbfn(ioc->bfa);
+	bfa_ioc_lpu_start(ioc);
+}
+
+/**
+ * Enable/disable IOC failure auto recovery.
+ */
+void
+bfa_ioc_auto_recover(bool auto_recover)
+{
+	bfa_auto_recover = auto_recover;
+}
+
+bool
+bfa_ioc_is_operational(struct bfa_ioc *ioc)
+{
+	return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_op);
+}
+
+bool
+bfa_ioc_is_initialized(struct bfa_ioc *ioc)
+{
+	u32 r32 = readl(ioc->ioc_regs.ioc_fwstate);
+
+	return ((r32 != BFI_IOC_UNINIT) &&
+		(r32 != BFI_IOC_INITING) &&
+		(r32 != BFI_IOC_MEMTEST));
+}
+
+void
+bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg)
+{
+	u32	*msgp = mbmsg;
+	u32	r32;
+	int		i;
+
+	/**
+	 * read the MBOX msg
+	 */
+	for (i = 0; i < (sizeof(union bfi_ioc_i2h_msg_u) / sizeof(u32));
+	     i++) {
+		r32 = readl(ioc->ioc_regs.lpu_mbox +
+				   i * sizeof(u32));
+		msgp[i] = htonl(r32);
+	}
+
+	/**
+	 * turn off mailbox interrupt by clearing mailbox status
+	 */
+	writel(1, ioc->ioc_regs.lpu_mbox_cmd);
+	readl(ioc->ioc_regs.lpu_mbox_cmd);
+}
+
+void
+bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *m)
+{
+	union bfi_ioc_i2h_msg_u	*msg;
+
+	msg = (union bfi_ioc_i2h_msg_u *) m;
+
+	bfa_ioc_stats(ioc, ioc_isrs);
+
+	switch (msg->mh.msg_id) {
+	case BFI_IOC_I2H_HBEAT:
+		break;
+
+	case BFI_IOC_I2H_READY_EVENT:
+		bfa_fsm_send_event(ioc, IOC_E_FWREADY);
+		break;
+
+	case BFI_IOC_I2H_ENABLE_REPLY:
+		bfa_fsm_send_event(ioc, IOC_E_FWRSP_ENABLE);
+		break;
+
+	case BFI_IOC_I2H_DISABLE_REPLY:
+		bfa_fsm_send_event(ioc, IOC_E_FWRSP_DISABLE);
+		break;
+
+	case BFI_IOC_I2H_GETATTR_REPLY:
+		bfa_ioc_getattr_reply(ioc);
+		break;
+
+	default:
+		BUG_ON(1);
+	}
+}
+
+/**
+ * IOC attach time initialization and setup.
+ *
+ * @param[in]	ioc	memory for IOC
+ * @param[in]	bfa	driver instance structure
+ */
+void
+bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa, struct bfa_ioc_cbfn *cbfn)
+{
+	ioc->bfa	= bfa;
+	ioc->cbfn	= cbfn;
+	ioc->fcmode	= false;
+	ioc->pllinit	= false;
+	ioc->dbg_fwsave_once = true;
+
+	bfa_ioc_mbox_attach(ioc);
+	INIT_LIST_HEAD(&ioc->hb_notify_q);
+
+	bfa_fsm_set_state(ioc, bfa_ioc_sm_reset);
+}
+
+/**
+ * Driver detach time IOC cleanup.
+ */
+void
+bfa_ioc_detach(struct bfa_ioc *ioc)
+{
+	bfa_fsm_send_event(ioc, IOC_E_DETACH);
+}
+
+/**
+ * Setup IOC PCI properties.
+ *
+ * @param[in]	pcidev	PCI device information for this IOC
+ */
+void
+bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev,
+		 enum bfi_mclass mc)
+{
+	ioc->ioc_mc	= mc;
+	ioc->pcidev	= *pcidev;
+	ioc->ctdev	= bfa_asic_id_ct(ioc->pcidev.device_id);
+	ioc->cna	= ioc->ctdev && !ioc->fcmode;
+
+	bfa_ioc_set_ct_hwif(ioc);
+
+	bfa_ioc_map_port(ioc);
+	bfa_ioc_reg_init(ioc);
+}
+
+/**
+ * Initialize IOC dma memory
+ *
+ * @param[in]	dm_kva	kernel virtual address of IOC dma memory
+ * @param[in]	dm_pa	physical address of IOC dma memory
+ */
+void
+bfa_ioc_mem_claim(struct bfa_ioc *ioc,  u8 *dm_kva, u64 dm_pa)
+{
+	/**
+	 * dma memory for firmware attribute
+	 */
+	ioc->attr_dma.kva = dm_kva;
+	ioc->attr_dma.pa = dm_pa;
+	ioc->attr = (struct bfi_ioc_attr *) dm_kva;
+}
+
+/**
+ * Return size of dma memory required.
+ */
+u32
+bfa_ioc_meminfo(void)
+{
+	return roundup(sizeof(struct bfi_ioc_attr), BFA_DMA_ALIGN_SZ);
+}
+
+void
+bfa_ioc_enable(struct bfa_ioc *ioc)
+{
+	bfa_ioc_stats(ioc, ioc_enables);
+	ioc->dbg_fwsave_once = true;
+
+	bfa_fsm_send_event(ioc, IOC_E_ENABLE);
+}
+
+void
+bfa_ioc_disable(struct bfa_ioc *ioc)
+{
+	bfa_ioc_stats(ioc, ioc_disables);
+	bfa_fsm_send_event(ioc, IOC_E_DISABLE);
+}
+
+u32
+bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr)
+{
+	return PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, fmaddr);
+}
+
+u32
+bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr)
+{
+	return PSS_SMEM_PGOFF(fmaddr);
+}
+
+/**
+ * Register mailbox message handler functions
+ *
+ * @param[in]	ioc		IOC instance
+ * @param[in]	mcfuncs		message class handler functions
+ */
+void
+bfa_ioc_mbox_register(struct bfa_ioc *ioc, bfa_ioc_mbox_mcfunc_t *mcfuncs)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	int				mc;
+
+	for (mc = 0; mc < BFI_MC_MAX; mc++)
+		mod->mbhdlr[mc].cbfn = mcfuncs[mc];
+}
+
+/**
+ * Register mailbox message handler function, to be called by common modules
+ */
+void
+bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc,
+		    bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+
+	mod->mbhdlr[mc].cbfn	= cbfn;
+	mod->mbhdlr[mc].cbarg = cbarg;
+}
+
+/**
+ * Queue a mailbox command request to firmware. Waits if mailbox is busy.
+ * Responsibility of caller to serialize
+ *
+ * @param[in]	ioc	IOC instance
+ * @param[i]	cmd	Mailbox command
+ */
+void
+bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	u32			stat;
+
+	/**
+	 * If a previous command is pending, queue new command
+	 */
+	if (!list_empty(&mod->cmd_q)) {
+		list_add_tail(&cmd->qe, &mod->cmd_q);
+		return;
+	}
+
+	/**
+	 * If mailbox is busy, queue command for poll timer
+	 */
+	stat = readl(ioc->ioc_regs.hfn_mbox_cmd);
+	if (stat) {
+		list_add_tail(&cmd->qe, &mod->cmd_q);
+		return;
+	}
+
+	/**
+	 * mailbox is free -- queue command to firmware
+	 */
+	bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg));
+}
+
+/**
+ * Handle mailbox interrupts
+ */
+void
+bfa_ioc_mbox_isr(struct bfa_ioc *ioc)
+{
+	struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod;
+	struct bfi_mbmsg m;
+	int				mc;
+
+	bfa_ioc_msgget(ioc, &m);
+
+	/**
+	 * Treat IOC message class as special.
+	 */
+	mc = m.mh.msg_class;
+	if (mc == BFI_MC_IOC) {
+		bfa_ioc_isr(ioc, &m);
+		return;
+	}
+
+	if ((mc > BFI_MC_MAX) || (mod->mbhdlr[mc].cbfn == NULL))
+		return;
+
+	mod->mbhdlr[mc].cbfn(mod->mbhdlr[mc].cbarg, &m);
+}
+
+void
+bfa_ioc_error_isr(struct bfa_ioc *ioc)
+{
+	bfa_fsm_send_event(ioc, IOC_E_HWERROR);
+}
+
+void
+bfa_ioc_set_fcmode(struct bfa_ioc *ioc)
+{
+	ioc->fcmode  = true;
+	ioc->port_id = bfa_ioc_pcifn(ioc);
+}
+
+/**
+ * return true if IOC is disabled
+ */
+bool
+bfa_ioc_is_disabled(struct bfa_ioc *ioc)
+{
+	return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabling) ||
+		bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled);
+}
+
+/**
+ * return true if IOC firmware is different.
+ */
+bool
+bfa_ioc_fw_mismatch(struct bfa_ioc *ioc)
+{
+	return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_reset) ||
+		bfa_fsm_cmp_state(ioc, bfa_ioc_sm_fwcheck) ||
+		bfa_fsm_cmp_state(ioc, bfa_ioc_sm_mismatch);
+}
+
+#define bfa_ioc_state_disabled(__sm)		\
+	(((__sm) == BFI_IOC_UNINIT) ||		\
+	 ((__sm) == BFI_IOC_INITING) ||		\
+	 ((__sm) == BFI_IOC_HWINIT) ||		\
+	 ((__sm) == BFI_IOC_DISABLED) ||	\
+	 ((__sm) == BFI_IOC_FAIL) ||		\
+	 ((__sm) == BFI_IOC_CFG_DISABLED))
+
+/**
+ * Check if adapter is disabled -- both IOCs should be in a disabled
+ * state.
+ */
+bool
+bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc)
+{
+	u32	ioc_state;
+	void __iomem *rb = ioc->pcidev.pci_bar_kva;
+
+	if (!bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled))
+		return false;
+
+	ioc_state = readl(rb + BFA_IOC0_STATE_REG);
+	if (!bfa_ioc_state_disabled(ioc_state))
+		return false;
+
+	if (ioc->pcidev.device_id != PCI_DEVICE_ID_BROCADE_FC_8G1P) {
+		ioc_state = readl(rb + BFA_IOC1_STATE_REG);
+		if (!bfa_ioc_state_disabled(ioc_state))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * Add to IOC heartbeat failure notification queue. To be used by common
+ * modules such as cee, port, diag.
+ */
+void
+bfa_ioc_hbfail_register(struct bfa_ioc *ioc,
+			struct bfa_ioc_hbfail_notify *notify)
+{
+	list_add_tail(&notify->qe, &ioc->hb_notify_q);
+}
+
+#define BFA_MFG_NAME "Brocade"
+void
+bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc,
+			 struct bfa_adapter_attr *ad_attr)
+{
+	struct bfi_ioc_attr *ioc_attr;
+
+	ioc_attr = ioc->attr;
+
+	bfa_ioc_get_adapter_serial_num(ioc, ad_attr->serial_num);
+	bfa_ioc_get_adapter_fw_ver(ioc, ad_attr->fw_ver);
+	bfa_ioc_get_adapter_optrom_ver(ioc, ad_attr->optrom_ver);
+	bfa_ioc_get_adapter_manufacturer(ioc, ad_attr->manufacturer);
+	memcpy(&ad_attr->vpd, &ioc_attr->vpd,
+		      sizeof(struct bfa_mfg_vpd));
+
+	ad_attr->nports = bfa_ioc_get_nports(ioc);
+	ad_attr->max_speed = bfa_ioc_speed_sup(ioc);
+
+	bfa_ioc_get_adapter_model(ioc, ad_attr->model);
+	/* For now, model descr uses same model string */
+	bfa_ioc_get_adapter_model(ioc, ad_attr->model_descr);
+
+	ad_attr->card_type = ioc_attr->card_type;
+	ad_attr->is_mezz = bfa_mfg_is_mezz(ioc_attr->card_type);
+
+	if (BFI_ADAPTER_IS_SPECIAL(ioc_attr->adapter_prop))
+		ad_attr->prototype = 1;
+	else
+		ad_attr->prototype = 0;
+
+	ad_attr->pwwn = bfa_ioc_get_pwwn(ioc);
+	ad_attr->mac  = bfa_ioc_get_mac(ioc);
+
+	ad_attr->pcie_gen = ioc_attr->pcie_gen;
+	ad_attr->pcie_lanes = ioc_attr->pcie_lanes;
+	ad_attr->pcie_lanes_orig = ioc_attr->pcie_lanes_orig;
+	ad_attr->asic_rev = ioc_attr->asic_rev;
+
+	bfa_ioc_get_pci_chip_rev(ioc, ad_attr->hw_ver);
+
+	ad_attr->cna_capable = ioc->cna;
+	ad_attr->trunk_capable = (ad_attr->nports > 1) && !ioc->cna;
+}
+
+enum bfa_ioc_type
+bfa_ioc_get_type(struct bfa_ioc *ioc)
+{
+	if (!ioc->ctdev || ioc->fcmode)
+		return BFA_IOC_TYPE_FC;
+	else if (ioc->ioc_mc == BFI_MC_IOCFC)
+		return BFA_IOC_TYPE_FCoE;
+	else if (ioc->ioc_mc == BFI_MC_LL)
+		return BFA_IOC_TYPE_LL;
+	else {
+		BUG_ON(!(ioc->ioc_mc == BFI_MC_LL));
+		return BFA_IOC_TYPE_LL;
+	}
+}
+
+void
+bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num)
+{
+	memset(serial_num, 0, BFA_ADAPTER_SERIAL_NUM_LEN);
+	memcpy(serial_num,
+			(void *)ioc->attr->brcd_serialnum,
+			BFA_ADAPTER_SERIAL_NUM_LEN);
+}
+
+void
+bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver)
+{
+	memset(fw_ver, 0, BFA_VERSION_LEN);
+	memcpy(fw_ver, ioc->attr->fw_version, BFA_VERSION_LEN);
+}
+
+void
+bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev)
+{
+	BUG_ON(!(chip_rev));
+
+	memset(chip_rev, 0, BFA_IOC_CHIP_REV_LEN);
+
+	chip_rev[0] = 'R';
+	chip_rev[1] = 'e';
+	chip_rev[2] = 'v';
+	chip_rev[3] = '-';
+	chip_rev[4] = ioc->attr->asic_rev;
+	chip_rev[5] = '\0';
+}
+
+void
+bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver)
+{
+	memset(optrom_ver, 0, BFA_VERSION_LEN);
+	memcpy(optrom_ver, ioc->attr->optrom_version,
+		      BFA_VERSION_LEN);
+}
+
+void
+bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer)
+{
+	memset(manufacturer, 0, BFA_ADAPTER_MFG_NAME_LEN);
+	memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN);
+}
+
+void
+bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model)
+{
+	struct bfi_ioc_attr *ioc_attr;
+
+	BUG_ON(!(model));
+	memset(model, 0, BFA_ADAPTER_MODEL_NAME_LEN);
+
+	ioc_attr = ioc->attr;
+
+	/**
+	 * model name
+	 */
+	snprintf(model, BFA_ADAPTER_MODEL_NAME_LEN, "%s-%u",
+		BFA_MFG_NAME, ioc_attr->card_type);
+}
+
+enum bfa_ioc_state
+bfa_ioc_get_state(struct bfa_ioc *ioc)
+{
+	return bfa_sm_to_state(ioc_sm_table, ioc->fsm);
+}
+
+void
+bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr)
+{
+	memset((void *)ioc_attr, 0, sizeof(struct bfa_ioc_attr));
+
+	ioc_attr->state = bfa_ioc_get_state(ioc);
+	ioc_attr->port_id = ioc->port_id;
+
+	ioc_attr->ioc_type = bfa_ioc_get_type(ioc);
+
+	bfa_ioc_get_adapter_attr(ioc, &ioc_attr->adapter_attr);
+
+	ioc_attr->pci_attr.device_id = ioc->pcidev.device_id;
+	ioc_attr->pci_attr.pcifn = ioc->pcidev.pci_func;
+	bfa_ioc_get_pci_chip_rev(ioc, ioc_attr->pci_attr.chip_rev);
+}
+
+/**
+ * WWN public
+ */
+u64
+bfa_ioc_get_pwwn(struct bfa_ioc *ioc)
+{
+	return ioc->attr->pwwn;
+}
+
+u64
+bfa_ioc_get_nwwn(struct bfa_ioc *ioc)
+{
+	return ioc->attr->nwwn;
+}
+
+u64
+bfa_ioc_get_adid(struct bfa_ioc *ioc)
+{
+	return ioc->attr->mfg_pwwn;
+}
+
+mac_t
+bfa_ioc_get_mac(struct bfa_ioc *ioc)
+{
+	/*
+	 * Currently mfg mac is used as FCoE enode mac (not configured by PBC)
+	 */
+	if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_FCoE)
+		return bfa_ioc_get_mfg_mac(ioc);
+	else
+		return ioc->attr->mac;
+}
+
+u64
+bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc)
+{
+	return ioc->attr->mfg_pwwn;
+}
+
+u64
+bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc)
+{
+	return ioc->attr->mfg_nwwn;
+}
+
+mac_t
+bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc)
+{
+	mac_t	m;
+
+	m = ioc->attr->mfg_mac;
+	if (bfa_mfg_is_old_wwn_mac_model(ioc->attr->card_type))
+		m.mac[MAC_ADDRLEN - 1] += bfa_ioc_pcifn(ioc);
+	else
+		bfa_mfg_increment_wwn_mac(&(m.mac[MAC_ADDRLEN-3]),
+			bfa_ioc_pcifn(ioc));
+
+	return m;
+}
+
+bool
+bfa_ioc_get_fcmode(struct bfa_ioc *ioc)
+{
+	return ioc->fcmode || !bfa_asic_id_ct(ioc->pcidev.device_id);
+}
+
+/**
+ * Firmware failure detected. Start recovery actions.
+ */
+static void
+bfa_ioc_recover(struct bfa_ioc *ioc)
+{
+	bfa_ioc_stats(ioc, ioc_hbfails);
+	bfa_fsm_send_event(ioc, IOC_E_HBFAIL);
+}
+
+static void
+bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc)
+{
+	if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_LL)
+		return;
+
+}
diff --git a/drivers/net/bna/bfa_ioc.h b/drivers/net/bna/bfa_ioc.h
new file mode 100644
index 000000000000..2e5c0adef899
--- /dev/null
+++ b/drivers/net/bna/bfa_ioc.h
@@ -0,0 +1,343 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __BFA_IOC_H__
+#define __BFA_IOC_H__
+
+#include "bfa_sm.h"
+#include "bfi.h"
+#include "cna.h"
+
+#define BFA_IOC_TOV		3000	/* msecs */
+#define BFA_IOC_HWSEM_TOV	500	/* msecs */
+#define BFA_IOC_HB_TOV		500	/* msecs */
+#define BFA_IOC_HWINIT_MAX	2
+#define BFA_IOC_TOV_RECOVER	BFA_IOC_HB_TOV
+
+/**
+ * Generic Scatter Gather Element used by driver
+ */
+struct bfa_sge {
+	u32	sg_len;
+	void	*sg_addr;
+};
+
+/**
+ * PCI device information required by IOC
+ */
+struct bfa_pcidev {
+	int	pci_slot;
+	u8	pci_func;
+	u16	device_id;
+	void	__iomem *pci_bar_kva;
+};
+
+/**
+ * Structure used to remember the DMA-able memory block's KVA and Physical
+ * Address
+ */
+struct bfa_dma {
+	void	*kva;	/* ! Kernel virtual address	*/
+	u64	pa;	/* ! Physical address		*/
+};
+
+#define BFA_DMA_ALIGN_SZ	256
+
+/**
+ * smem size for Crossbow and Catapult
+ */
+#define BFI_SMEM_CB_SIZE	0x200000U	/* ! 2MB for crossbow	*/
+#define BFI_SMEM_CT_SIZE	0x280000U	/* ! 2.5MB for catapult	*/
+
+/**
+ * @brief BFA dma address assignment macro
+ */
+#define bfa_dma_addr_set(dma_addr, pa)	\
+		__bfa_dma_addr_set(&dma_addr, (u64)pa)
+
+static inline void
+__bfa_dma_addr_set(union bfi_addr_u *dma_addr, u64 pa)
+{
+	dma_addr->a32.addr_lo = (u32) pa;
+	dma_addr->a32.addr_hi = (u32) (upper_32_bits(pa));
+}
+
+/**
+ * @brief BFA dma address assignment macro. (big endian format)
+ */
+#define bfa_dma_be_addr_set(dma_addr, pa)	\
+		__bfa_dma_be_addr_set(&dma_addr, (u64)pa)
+static inline void
+__bfa_dma_be_addr_set(union bfi_addr_u *dma_addr, u64 pa)
+{
+	dma_addr->a32.addr_lo = (u32) htonl(pa);
+	dma_addr->a32.addr_hi = (u32) htonl(upper_32_bits(pa));
+}
+
+struct bfa_ioc_regs {
+	void __iomem *hfn_mbox_cmd;
+	void __iomem *hfn_mbox;
+	void __iomem *lpu_mbox_cmd;
+	void __iomem *lpu_mbox;
+	void __iomem *pss_ctl_reg;
+	void __iomem *pss_err_status_reg;
+	void __iomem *app_pll_fast_ctl_reg;
+	void __iomem *app_pll_slow_ctl_reg;
+	void __iomem *ioc_sem_reg;
+	void __iomem *ioc_usage_sem_reg;
+	void __iomem *ioc_init_sem_reg;
+	void __iomem *ioc_usage_reg;
+	void __iomem *host_page_num_fn;
+	void __iomem *heartbeat;
+	void __iomem *ioc_fwstate;
+	void __iomem *ll_halt;
+	void __iomem *err_set;
+	void __iomem *shirq_isr_next;
+	void __iomem *shirq_msk_next;
+	void __iomem *smem_page_start;
+	u32	smem_pg0;
+};
+
+/**
+ * IOC Mailbox structures
+ */
+struct bfa_mbox_cmd {
+	struct list_head	qe;
+	u32			msg[BFI_IOC_MSGSZ];
+};
+
+/**
+ * IOC mailbox module
+ */
+typedef void (*bfa_ioc_mbox_mcfunc_t)(void *cbarg, struct bfi_mbmsg *m);
+struct bfa_ioc_mbox_mod {
+	struct list_head	cmd_q;		/*!< pending mbox queue	*/
+	int			nmclass;	/*!< number of handlers */
+	struct {
+		bfa_ioc_mbox_mcfunc_t	cbfn;	/*!< message handlers	*/
+		void			*cbarg;
+	} mbhdlr[BFI_MC_MAX];
+};
+
+/**
+ * IOC callback function interfaces
+ */
+typedef void (*bfa_ioc_enable_cbfn_t)(void *bfa, enum bfa_status status);
+typedef void (*bfa_ioc_disable_cbfn_t)(void *bfa);
+typedef void (*bfa_ioc_hbfail_cbfn_t)(void *bfa);
+typedef void (*bfa_ioc_reset_cbfn_t)(void *bfa);
+struct bfa_ioc_cbfn {
+	bfa_ioc_enable_cbfn_t	enable_cbfn;
+	bfa_ioc_disable_cbfn_t	disable_cbfn;
+	bfa_ioc_hbfail_cbfn_t	hbfail_cbfn;
+	bfa_ioc_reset_cbfn_t	reset_cbfn;
+};
+
+/**
+ * Heartbeat failure notification queue element.
+ */
+struct bfa_ioc_hbfail_notify {
+	struct list_head	qe;
+	bfa_ioc_hbfail_cbfn_t	cbfn;
+	void			*cbarg;
+};
+
+/**
+ * Initialize a heartbeat failure notification structure
+ */
+#define bfa_ioc_hbfail_init(__notify, __cbfn, __cbarg) do {	\
+	(__notify)->cbfn = (__cbfn);				\
+	(__notify)->cbarg = (__cbarg);				\
+} while (0)
+
+struct bfa_ioc {
+	bfa_fsm_t		fsm;
+	struct bfa 		*bfa;
+	struct bfa_pcidev 	pcidev;
+	struct bfa_timer_mod	*timer_mod;
+	struct timer_list 	ioc_timer;
+	struct timer_list 	sem_timer;
+	struct timer_list	hb_timer;
+	u32			hb_count;
+	u32			retry_count;
+	struct list_head	hb_notify_q;
+	void			*dbg_fwsave;
+	int			dbg_fwsave_len;
+	bool			dbg_fwsave_once;
+	enum bfi_mclass		ioc_mc;
+	struct bfa_ioc_regs 	ioc_regs;
+	struct bfa_ioc_drv_stats stats;
+	bool			auto_recover;
+	bool			fcmode;
+	bool			ctdev;
+	bool			cna;
+	bool			pllinit;
+	bool   			stats_busy;	/*!< outstanding stats */
+	u8			port_id;
+
+	struct bfa_dma		attr_dma;
+	struct bfi_ioc_attr	*attr;
+	struct bfa_ioc_cbfn	*cbfn;
+	struct bfa_ioc_mbox_mod	mbox_mod;
+	struct bfa_ioc_hwif	*ioc_hwif;
+};
+
+struct bfa_ioc_hwif {
+	enum bfa_status (*ioc_pll_init) (void __iomem *rb, bool fcmode);
+	bool		(*ioc_firmware_lock)	(struct bfa_ioc *ioc);
+	void		(*ioc_firmware_unlock)	(struct bfa_ioc *ioc);
+	void		(*ioc_reg_init)	(struct bfa_ioc *ioc);
+	void		(*ioc_map_port)	(struct bfa_ioc *ioc);
+	void		(*ioc_isr_mode_set)	(struct bfa_ioc *ioc,
+					bool msix);
+	void		(*ioc_notify_hbfail)	(struct bfa_ioc *ioc);
+	void		(*ioc_ownership_reset)	(struct bfa_ioc *ioc);
+};
+
+#define bfa_ioc_pcifn(__ioc)		((__ioc)->pcidev.pci_func)
+#define bfa_ioc_devid(__ioc)		((__ioc)->pcidev.device_id)
+#define bfa_ioc_bar0(__ioc)		((__ioc)->pcidev.pci_bar_kva)
+#define bfa_ioc_portid(__ioc)		((__ioc)->port_id)
+#define bfa_ioc_fetch_stats(__ioc, __stats) \
+		(((__stats)->drv_stats) = (__ioc)->stats)
+#define bfa_ioc_clr_stats(__ioc)	\
+		memset(&(__ioc)->stats, 0, sizeof((__ioc)->stats))
+#define bfa_ioc_maxfrsize(__ioc)	((__ioc)->attr->maxfrsize)
+#define bfa_ioc_rx_bbcredit(__ioc)	((__ioc)->attr->rx_bbcredit)
+#define bfa_ioc_speed_sup(__ioc)	\
+	BFI_ADAPTER_GETP(SPEED, (__ioc)->attr->adapter_prop)
+#define bfa_ioc_get_nports(__ioc)	\
+	BFI_ADAPTER_GETP(NPORTS, (__ioc)->attr->adapter_prop)
+
+#define bfa_ioc_stats(_ioc, _stats)	((_ioc)->stats._stats++)
+#define BFA_IOC_FWIMG_MINSZ	(16 * 1024)
+#define BFA_IOC_FWIMG_TYPE(__ioc)					\
+	(((__ioc)->ctdev) ? 						\
+	 (((__ioc)->fcmode) ? BFI_IMAGE_CT_FC : BFI_IMAGE_CT_CNA) :	\
+	 BFI_IMAGE_CB_FC)
+#define BFA_IOC_FW_SMEM_SIZE(__ioc)					\
+	(((__ioc)->ctdev) ? BFI_SMEM_CT_SIZE : BFI_SMEM_CB_SIZE)
+#define BFA_IOC_FLASH_CHUNK_NO(off)		(off / BFI_FLASH_CHUNK_SZ_WORDS)
+#define BFA_IOC_FLASH_OFFSET_IN_CHUNK(off)	(off % BFI_FLASH_CHUNK_SZ_WORDS)
+#define BFA_IOC_FLASH_CHUNK_ADDR(chunkno)  (chunkno * BFI_FLASH_CHUNK_SZ_WORDS)
+
+/**
+ * IOC mailbox interface
+ */
+void bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd);
+void bfa_ioc_mbox_register(struct bfa_ioc *ioc,
+		bfa_ioc_mbox_mcfunc_t *mcfuncs);
+void bfa_ioc_mbox_isr(struct bfa_ioc *ioc);
+void bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len);
+void bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg);
+void bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc,
+		bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg);
+
+/**
+ * IOC interfaces
+ */
+
+#define bfa_ioc_pll_init_asic(__ioc) \
+	((__ioc)->ioc_hwif->ioc_pll_init((__ioc)->pcidev.pci_bar_kva, \
+			   (__ioc)->fcmode))
+
+enum bfa_status bfa_ioc_pll_init(struct bfa_ioc *ioc);
+enum bfa_status bfa_ioc_cb_pll_init(void __iomem *rb, bool fcmode);
+enum bfa_status bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode);
+
+#define	bfa_ioc_isr_mode_set(__ioc, __msix)			\
+			((__ioc)->ioc_hwif->ioc_isr_mode_set(__ioc, __msix))
+#define	bfa_ioc_ownership_reset(__ioc)				\
+			((__ioc)->ioc_hwif->ioc_ownership_reset(__ioc))
+
+void bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc);
+
+void bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa,
+		struct bfa_ioc_cbfn *cbfn);
+void bfa_ioc_auto_recover(bool auto_recover);
+void bfa_ioc_detach(struct bfa_ioc *ioc);
+void bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev,
+		enum bfi_mclass mc);
+u32 bfa_ioc_meminfo(void);
+void bfa_ioc_mem_claim(struct bfa_ioc *ioc,  u8 *dm_kva, u64 dm_pa);
+void bfa_ioc_enable(struct bfa_ioc *ioc);
+void bfa_ioc_disable(struct bfa_ioc *ioc);
+bool bfa_ioc_intx_claim(struct bfa_ioc *ioc);
+
+void bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type,
+		u32 boot_param);
+void bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *msg);
+void bfa_ioc_error_isr(struct bfa_ioc *ioc);
+bool bfa_ioc_is_operational(struct bfa_ioc *ioc);
+bool bfa_ioc_is_initialized(struct bfa_ioc *ioc);
+bool bfa_ioc_is_disabled(struct bfa_ioc *ioc);
+bool bfa_ioc_fw_mismatch(struct bfa_ioc *ioc);
+bool bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc);
+void bfa_ioc_cfg_complete(struct bfa_ioc *ioc);
+enum bfa_ioc_type bfa_ioc_get_type(struct bfa_ioc *ioc);
+void bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num);
+void bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver);
+void bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver);
+void bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model);
+void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc,
+		char *manufacturer);
+void bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev);
+enum bfa_ioc_state bfa_ioc_get_state(struct bfa_ioc *ioc);
+
+void bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr);
+void bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc,
+		struct bfa_adapter_attr *ad_attr);
+u32 bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr);
+u32 bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr);
+void bfa_ioc_set_fcmode(struct bfa_ioc *ioc);
+bool bfa_ioc_get_fcmode(struct bfa_ioc *ioc);
+void bfa_ioc_hbfail_register(struct bfa_ioc *ioc,
+	struct bfa_ioc_hbfail_notify *notify);
+bool bfa_ioc_sem_get(void __iomem *sem_reg);
+void bfa_ioc_sem_release(void __iomem *sem_reg);
+void bfa_ioc_hw_sem_release(struct bfa_ioc *ioc);
+void bfa_ioc_fwver_get(struct bfa_ioc *ioc,
+			struct bfi_ioc_image_hdr *fwhdr);
+bool bfa_ioc_fwver_cmp(struct bfa_ioc *ioc,
+			struct bfi_ioc_image_hdr *fwhdr);
+
+/*
+ * Timeout APIs
+ */
+void bfa_ioc_timeout(void *ioc);
+void bfa_ioc_hb_check(void *ioc);
+void bfa_ioc_sem_timeout(void *ioc);
+
+/*
+ * bfa mfg wwn API functions
+ */
+u64 bfa_ioc_get_pwwn(struct bfa_ioc *ioc);
+u64 bfa_ioc_get_nwwn(struct bfa_ioc *ioc);
+mac_t bfa_ioc_get_mac(struct bfa_ioc *ioc);
+u64 bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc);
+u64 bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc);
+mac_t bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc);
+u64 bfa_ioc_get_adid(struct bfa_ioc *ioc);
+
+/*
+ * F/W Image Size & Chunk
+ */
+u32 *bfa_cb_image_get_chunk(int type, u32 off);
+u32 bfa_cb_image_get_size(int type);
+
+#endif /* __BFA_IOC_H__ */
diff --git a/drivers/net/bna/bfa_ioc_ct.c b/drivers/net/bna/bfa_ioc_ct.c
new file mode 100644
index 000000000000..870046e32c8d
--- /dev/null
+++ b/drivers/net/bna/bfa_ioc_ct.c
@@ -0,0 +1,391 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#include "bfa_ioc.h"
+#include "cna.h"
+#include "bfi.h"
+#include "bfi_ctreg.h"
+#include "bfa_defs.h"
+
+/*
+ * forward declarations
+ */
+static bool bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc);
+static void bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc);
+static void bfa_ioc_ct_reg_init(struct bfa_ioc *ioc);
+static void bfa_ioc_ct_map_port(struct bfa_ioc *ioc);
+static void bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix);
+static void bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc);
+static void bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc);
+
+struct bfa_ioc_hwif hwif_ct;
+
+/**
+ * Called from bfa_ioc_attach() to map asic specific calls.
+ */
+void
+bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc)
+{
+	hwif_ct.ioc_pll_init = bfa_ioc_ct_pll_init;
+	hwif_ct.ioc_firmware_lock = bfa_ioc_ct_firmware_lock;
+	hwif_ct.ioc_firmware_unlock = bfa_ioc_ct_firmware_unlock;
+	hwif_ct.ioc_reg_init = bfa_ioc_ct_reg_init;
+	hwif_ct.ioc_map_port = bfa_ioc_ct_map_port;
+	hwif_ct.ioc_isr_mode_set = bfa_ioc_ct_isr_mode_set;
+	hwif_ct.ioc_notify_hbfail = bfa_ioc_ct_notify_hbfail;
+	hwif_ct.ioc_ownership_reset = bfa_ioc_ct_ownership_reset;
+
+	ioc->ioc_hwif = &hwif_ct;
+}
+
+/**
+ * Return true if firmware of current driver matches the running firmware.
+ */
+static bool
+bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc)
+{
+	enum bfi_ioc_state ioc_fwstate;
+	u32 usecnt;
+	struct bfi_ioc_image_hdr fwhdr;
+
+	/**
+	 * Firmware match check is relevant only for CNA.
+	 */
+	if (!ioc->cna)
+		return true;
+
+	/**
+	 * If bios boot (flash based) -- do not increment usage count
+	 */
+	if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) <
+						BFA_IOC_FWIMG_MINSZ)
+		return true;
+
+	bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg);
+	usecnt = readl(ioc->ioc_regs.ioc_usage_reg);
+
+	/**
+	 * If usage count is 0, always return TRUE.
+	 */
+	if (usecnt == 0) {
+		writel(1, ioc->ioc_regs.ioc_usage_reg);
+		bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
+		return true;
+	}
+
+	ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate);
+
+	/**
+	 * Use count cannot be non-zero and chip in uninitialized state.
+	 */
+	BUG_ON(!(ioc_fwstate != BFI_IOC_UNINIT));
+
+	/**
+	 * Check if another driver with a different firmware is active
+	 */
+	bfa_ioc_fwver_get(ioc, &fwhdr);
+	if (!bfa_ioc_fwver_cmp(ioc, &fwhdr)) {
+		bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
+		return false;
+	}
+
+	/**
+	 * Same firmware version. Increment the reference count.
+	 */
+	usecnt++;
+	writel(usecnt, ioc->ioc_regs.ioc_usage_reg);
+	bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
+	return true;
+}
+
+static void
+bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc)
+{
+	u32 usecnt;
+
+	/**
+	 * Firmware lock is relevant only for CNA.
+	 */
+	if (!ioc->cna)
+		return;
+
+	/**
+	 * If bios boot (flash based) -- do not decrement usage count
+	 */
+	if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) <
+						BFA_IOC_FWIMG_MINSZ)
+		return;
+
+	/**
+	 * decrement usage count
+	 */
+	bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg);
+	usecnt = readl(ioc->ioc_regs.ioc_usage_reg);
+	BUG_ON(!(usecnt > 0));
+
+	usecnt--;
+	writel(usecnt, ioc->ioc_regs.ioc_usage_reg);
+
+	bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
+}
+
+/**
+ * Notify other functions on HB failure.
+ */
+static void
+bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc)
+{
+	if (ioc->cna) {
+		writel(__FW_INIT_HALT_P, ioc->ioc_regs.ll_halt);
+		/* Wait for halt to take effect */
+		readl(ioc->ioc_regs.ll_halt);
+	} else {
+		writel(__PSS_ERR_STATUS_SET, ioc->ioc_regs.err_set);
+		readl(ioc->ioc_regs.err_set);
+	}
+}
+
+/**
+ * Host to LPU mailbox message addresses
+ */
+static struct { u32 hfn_mbox, lpu_mbox, hfn_pgn; } iocreg_fnreg[] = {
+	{ HOSTFN0_LPU_MBOX0_0, LPU_HOSTFN0_MBOX0_0, HOST_PAGE_NUM_FN0 },
+	{ HOSTFN1_LPU_MBOX0_8, LPU_HOSTFN1_MBOX0_8, HOST_PAGE_NUM_FN1 },
+	{ HOSTFN2_LPU_MBOX0_0, LPU_HOSTFN2_MBOX0_0, HOST_PAGE_NUM_FN2 },
+	{ HOSTFN3_LPU_MBOX0_8, LPU_HOSTFN3_MBOX0_8, HOST_PAGE_NUM_FN3 }
+};
+
+/**
+ * Host <-> LPU mailbox command/status registers - port 0
+ */
+static struct { u32 hfn, lpu; } iocreg_mbcmd_p0[] = {
+	{ HOSTFN0_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN0_MBOX0_CMD_STAT },
+	{ HOSTFN1_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN1_MBOX0_CMD_STAT },
+	{ HOSTFN2_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN2_MBOX0_CMD_STAT },
+	{ HOSTFN3_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN3_MBOX0_CMD_STAT }
+};
+
+/**
+ * Host <-> LPU mailbox command/status registers - port 1
+ */
+static struct { u32 hfn, lpu; } iocreg_mbcmd_p1[] = {
+	{ HOSTFN0_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN0_MBOX0_CMD_STAT },
+	{ HOSTFN1_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN1_MBOX0_CMD_STAT },
+	{ HOSTFN2_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN2_MBOX0_CMD_STAT },
+	{ HOSTFN3_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN3_MBOX0_CMD_STAT }
+};
+
+static void
+bfa_ioc_ct_reg_init(struct bfa_ioc *ioc)
+{
+	void __iomem *rb;
+	int		pcifn = bfa_ioc_pcifn(ioc);
+
+	rb = bfa_ioc_bar0(ioc);
+
+	ioc->ioc_regs.hfn_mbox = rb + iocreg_fnreg[pcifn].hfn_mbox;
+	ioc->ioc_regs.lpu_mbox = rb + iocreg_fnreg[pcifn].lpu_mbox;
+	ioc->ioc_regs.host_page_num_fn = rb + iocreg_fnreg[pcifn].hfn_pgn;
+
+	if (ioc->port_id == 0) {
+		ioc->ioc_regs.heartbeat = rb + BFA_IOC0_HBEAT_REG;
+		ioc->ioc_regs.ioc_fwstate = rb + BFA_IOC0_STATE_REG;
+		ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].hfn;
+		ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].lpu;
+		ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P0;
+	} else {
+		ioc->ioc_regs.heartbeat = (rb + BFA_IOC1_HBEAT_REG);
+		ioc->ioc_regs.ioc_fwstate = (rb + BFA_IOC1_STATE_REG);
+		ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].hfn;
+		ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].lpu;
+		ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P1;
+	}
+
+	/*
+	 * PSS control registers
+	 */
+	ioc->ioc_regs.pss_ctl_reg = (rb + PSS_CTL_REG);
+	ioc->ioc_regs.pss_err_status_reg = (rb + PSS_ERR_STATUS_REG);
+	ioc->ioc_regs.app_pll_fast_ctl_reg = (rb + APP_PLL_425_CTL_REG);
+	ioc->ioc_regs.app_pll_slow_ctl_reg = (rb + APP_PLL_312_CTL_REG);
+
+	/*
+	 * IOC semaphore registers and serialization
+	 */
+	ioc->ioc_regs.ioc_sem_reg = (rb + HOST_SEM0_REG);
+	ioc->ioc_regs.ioc_usage_sem_reg = (rb + HOST_SEM1_REG);
+	ioc->ioc_regs.ioc_init_sem_reg = (rb + HOST_SEM2_REG);
+	ioc->ioc_regs.ioc_usage_reg = (rb + BFA_FW_USE_COUNT);
+
+	/**
+	 * sram memory access
+	 */
+	ioc->ioc_regs.smem_page_start = (rb + PSS_SMEM_PAGE_START);
+	ioc->ioc_regs.smem_pg0 = BFI_IOC_SMEM_PG0_CT;
+
+	/*
+	 * err set reg : for notification of hb failure in fcmode
+	 */
+	ioc->ioc_regs.err_set = (rb + ERR_SET_REG);
+}
+
+/**
+ * Initialize IOC to port mapping.
+ */
+
+#define FNC_PERS_FN_SHIFT(__fn)	((__fn) * 8)
+static void
+bfa_ioc_ct_map_port(struct bfa_ioc *ioc)
+{
+	void __iomem *rb = ioc->pcidev.pci_bar_kva;
+	u32	r32;
+
+	/**
+	 * For catapult, base port id on personality register and IOC type
+	 */
+	r32 = readl(rb + FNC_PERS_REG);
+	r32 >>= FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc));
+	ioc->port_id = (r32 & __F0_PORT_MAP_MK) >> __F0_PORT_MAP_SH;
+
+}
+
+/**
+ * Set interrupt mode for a function: INTX or MSIX
+ */
+static void
+bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix)
+{
+	void __iomem *rb = ioc->pcidev.pci_bar_kva;
+	u32	r32, mode;
+
+	r32 = readl(rb + FNC_PERS_REG);
+
+	mode = (r32 >> FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))) &
+		__F0_INTX_STATUS;
+
+	/**
+	 * If already in desired mode, do not change anything
+	 */
+	if (!msix && mode)
+		return;
+
+	if (msix)
+		mode = __F0_INTX_STATUS_MSIX;
+	else
+		mode = __F0_INTX_STATUS_INTA;
+
+	r32 &= ~(__F0_INTX_STATUS << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc)));
+	r32 |= (mode << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc)));
+
+	writel(r32, rb + FNC_PERS_REG);
+}
+
+/**
+ * Cleanup hw semaphore and usecnt registers
+ */
+static void
+bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc)
+{
+	if (ioc->cna) {
+		bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg);
+		writel(0, ioc->ioc_regs.ioc_usage_reg);
+		bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg);
+	}
+
+	/*
+	 * Read the hw sem reg to make sure that it is locked
+	 * before we clear it. If it is not locked, writing 1
+	 * will lock it instead of clearing it.
+	 */
+	readl(ioc->ioc_regs.ioc_sem_reg);
+	bfa_ioc_hw_sem_release(ioc);
+}
+
+enum bfa_status
+bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode)
+{
+	u32	pll_sclk, pll_fclk, r32;
+
+	pll_sclk = __APP_PLL_312_LRESETN | __APP_PLL_312_ENARST |
+		__APP_PLL_312_RSEL200500 | __APP_PLL_312_P0_1(3U) |
+		__APP_PLL_312_JITLMT0_1(3U) |
+		__APP_PLL_312_CNTLMT0_1(1U);
+	pll_fclk = __APP_PLL_425_LRESETN | __APP_PLL_425_ENARST |
+		__APP_PLL_425_RSEL200500 | __APP_PLL_425_P0_1(3U) |
+		__APP_PLL_425_JITLMT0_1(3U) |
+		__APP_PLL_425_CNTLMT0_1(1U);
+	if (fcmode) {
+		writel(0, (rb + OP_MODE));
+		writel(__APP_EMS_CMLCKSEL |
+				__APP_EMS_REFCKBUFEN2 |
+				__APP_EMS_CHANNEL_SEL,
+				(rb + ETH_MAC_SER_REG));
+	} else {
+		writel(__GLOBAL_FCOE_MODE, (rb + OP_MODE));
+		writel(__APP_EMS_REFCKBUFEN1,
+				(rb + ETH_MAC_SER_REG));
+	}
+	writel(BFI_IOC_UNINIT, (rb + BFA_IOC0_STATE_REG));
+	writel(BFI_IOC_UNINIT, (rb + BFA_IOC1_STATE_REG));
+	writel(0xffffffffU, (rb + HOSTFN0_INT_MSK));
+	writel(0xffffffffU, (rb + HOSTFN1_INT_MSK));
+	writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS));
+	writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS));
+	writel(0xffffffffU, (rb + HOSTFN0_INT_MSK));
+	writel(0xffffffffU, (rb + HOSTFN1_INT_MSK));
+	writel(pll_sclk |
+		__APP_PLL_312_LOGIC_SOFT_RESET,
+		rb + APP_PLL_312_CTL_REG);
+	writel(pll_fclk |
+		__APP_PLL_425_LOGIC_SOFT_RESET,
+		rb + APP_PLL_425_CTL_REG);
+	writel(pll_sclk |
+		__APP_PLL_312_LOGIC_SOFT_RESET | __APP_PLL_312_ENABLE,
+		rb + APP_PLL_312_CTL_REG);
+	writel(pll_fclk |
+		__APP_PLL_425_LOGIC_SOFT_RESET | __APP_PLL_425_ENABLE,
+		rb + APP_PLL_425_CTL_REG);
+	readl(rb + HOSTFN0_INT_MSK);
+	udelay(2000);
+	writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS));
+	writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS));
+	writel(pll_sclk |
+		__APP_PLL_312_ENABLE,
+		rb + APP_PLL_312_CTL_REG);
+	writel(pll_fclk |
+		__APP_PLL_425_ENABLE,
+		rb + APP_PLL_425_CTL_REG);
+	if (!fcmode) {
+		writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P0));
+		writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P1));
+	}
+	r32 = readl((rb + PSS_CTL_REG));
+	r32 &= ~__PSS_LMEM_RESET;
+	writel(r32, (rb + PSS_CTL_REG));
+	udelay(1000);
+	if (!fcmode) {
+		writel(0, (rb + PMM_1T_RESET_REG_P0));
+		writel(0, (rb + PMM_1T_RESET_REG_P1));
+	}
+
+	writel(__EDRAM_BISTR_START, (rb + MBIST_CTL_REG));
+	udelay(1000);
+	r32 = readl((rb + MBIST_STAT_REG));
+	writel(0, (rb + MBIST_CTL_REG));
+	return BFA_STATUS_OK;
+}
diff --git a/drivers/net/bna/bfa_sm.h b/drivers/net/bna/bfa_sm.h
new file mode 100644
index 000000000000..1d3d975d6f68
--- /dev/null
+++ b/drivers/net/bna/bfa_sm.h
@@ -0,0 +1,88 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+/**
+ * @file bfasm.h State machine defines
+ */
+
+#ifndef __BFA_SM_H__
+#define __BFA_SM_H__
+
+#include "cna.h"
+
+typedef void (*bfa_sm_t)(void *sm, int event);
+
+/**
+ * oc - object class eg. bfa_ioc
+ * st - state, eg. reset
+ * otype - object type, eg. struct bfa_ioc
+ * etype - object type, eg. enum ioc_event
+ */
+#define bfa_sm_state_decl(oc, st, otype, etype)		\
+	static void oc ## _sm_ ## st(otype * fsm, etype event)
+
+#define bfa_sm_set_state(_sm, _state)	((_sm)->sm = (bfa_sm_t)(_state))
+#define bfa_sm_send_event(_sm, _event)	((_sm)->sm((_sm), (_event)))
+#define bfa_sm_get_state(_sm)		((_sm)->sm)
+#define bfa_sm_cmp_state(_sm, _state)	((_sm)->sm == (bfa_sm_t)(_state))
+
+/**
+ * For converting from state machine function to state encoding.
+ */
+struct bfa_sm_table {
+	bfa_sm_t	sm;	/*!< state machine function	*/
+	int		state;	/*!< state machine encoding	*/
+	char		*name;	/*!< state name for display	*/
+};
+#define BFA_SM(_sm)	((bfa_sm_t)(_sm))
+
+/**
+ * State machine with entry actions.
+ */
+typedef void (*bfa_fsm_t)(void *fsm, int event);
+
+/**
+ * oc - object class eg. bfa_ioc
+ * st - state, eg. reset
+ * otype - object type, eg. struct bfa_ioc
+ * etype - object type, eg. enum ioc_event
+ */
+#define bfa_fsm_state_decl(oc, st, otype, etype)		\
+	static void oc ## _sm_ ## st(otype * fsm, etype event);	\
+	static void oc ## _sm_ ## st ## _entry(otype * fsm)
+
+#define bfa_fsm_set_state(_fsm, _state) do {	\
+	(_fsm)->fsm = (bfa_fsm_t)(_state);	\
+	_state ## _entry(_fsm);			\
+} while (0)
+
+#define bfa_fsm_send_event(_fsm, _event)	((_fsm)->fsm((_fsm), (_event)))
+#define bfa_fsm_get_state(_fsm)			((_fsm)->fsm)
+#define bfa_fsm_cmp_state(_fsm, _state)		\
+	((_fsm)->fsm == (bfa_fsm_t)(_state))
+
+static inline int
+bfa_sm_to_state(struct bfa_sm_table *smt, bfa_sm_t sm)
+{
+	int	i = 0;
+
+	while (smt[i].sm && smt[i].sm != sm)
+		i++;
+	return smt[i].state;
+}
+#endif
diff --git a/drivers/net/bna/bfa_wc.h b/drivers/net/bna/bfa_wc.h
new file mode 100644
index 000000000000..d0e4caee67b0
--- /dev/null
+++ b/drivers/net/bna/bfa_wc.h
@@ -0,0 +1,69 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+/**
+ * @file bfa_wc.h Generic wait counter.
+ */
+
+#ifndef __BFA_WC_H__
+#define __BFA_WC_H__
+
+typedef void (*bfa_wc_resume_t) (void *cbarg);
+
+struct bfa_wc {
+	bfa_wc_resume_t wc_resume;
+	void		*wc_cbarg;
+	int		wc_count;
+};
+
+static inline void
+bfa_wc_up(struct bfa_wc *wc)
+{
+	wc->wc_count++;
+}
+
+static inline void
+bfa_wc_down(struct bfa_wc *wc)
+{
+	wc->wc_count--;
+	if (wc->wc_count == 0)
+		wc->wc_resume(wc->wc_cbarg);
+}
+
+/**
+ * Initialize a waiting counter.
+ */
+static inline void
+bfa_wc_init(struct bfa_wc *wc, bfa_wc_resume_t wc_resume, void *wc_cbarg)
+{
+	wc->wc_resume = wc_resume;
+	wc->wc_cbarg = wc_cbarg;
+	wc->wc_count = 0;
+	bfa_wc_up(wc);
+}
+
+/**
+ * Wait for counter to reach zero
+ */
+static inline void
+bfa_wc_wait(struct bfa_wc *wc)
+{
+	bfa_wc_down(wc);
+}
+
+#endif
diff --git a/drivers/net/bna/bfi.h b/drivers/net/bna/bfi.h
new file mode 100644
index 000000000000..a97396811050
--- /dev/null
+++ b/drivers/net/bna/bfi.h
@@ -0,0 +1,392 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __BFI_H__
+#define __BFI_H__
+
+#include "bfa_defs.h"
+
+#pragma pack(1)
+
+/**
+ * BFI FW image type
+ */
+#define	BFI_FLASH_CHUNK_SZ			256	/*!< Flash chunk size */
+#define	BFI_FLASH_CHUNK_SZ_WORDS	(BFI_FLASH_CHUNK_SZ/sizeof(u32))
+enum {
+	BFI_IMAGE_CB_FC,
+	BFI_IMAGE_CT_FC,
+	BFI_IMAGE_CT_CNA,
+	BFI_IMAGE_MAX,
+};
+
+/**
+ * Msg header common to all msgs
+ */
+struct bfi_mhdr {
+	u8		msg_class;	/*!< @ref enum bfi_mclass	    */
+	u8		msg_id;		/*!< msg opcode with in the class   */
+	union {
+		struct {
+			u8	rsvd;
+			u8	lpu_id;	/*!< msg destination		    */
+		} h2i;
+		u16	i2htok;	/*!< token in msgs to host	    */
+	} mtag;
+};
+
+#define bfi_h2i_set(_mh, _mc, _op, _lpuid) do {		\
+	(_mh).msg_class 		= (_mc);		\
+	(_mh).msg_id			= (_op);		\
+	(_mh).mtag.h2i.lpu_id	= (_lpuid);			\
+} while (0)
+
+#define bfi_i2h_set(_mh, _mc, _op, _i2htok) do {		\
+	(_mh).msg_class 		= (_mc);		\
+	(_mh).msg_id			= (_op);		\
+	(_mh).mtag.i2htok		= (_i2htok);		\
+} while (0)
+
+/*
+ * Message opcodes: 0-127 to firmware, 128-255 to host
+ */
+#define BFI_I2H_OPCODE_BASE	128
+#define BFA_I2HM(_x) 			((_x) + BFI_I2H_OPCODE_BASE)
+
+/**
+ ****************************************************************************
+ *
+ * Scatter Gather Element and Page definition
+ *
+ ****************************************************************************
+ */
+
+#define BFI_SGE_INLINE	1
+#define BFI_SGE_INLINE_MAX	(BFI_SGE_INLINE + 1)
+
+/**
+ * SG Flags
+ */
+enum {
+	BFI_SGE_DATA		= 0,	/*!< data address, not last	     */
+	BFI_SGE_DATA_CPL	= 1,	/*!< data addr, last in current page */
+	BFI_SGE_DATA_LAST	= 3,	/*!< data address, last		     */
+	BFI_SGE_LINK		= 2,	/*!< link address		     */
+	BFI_SGE_PGDLEN		= 2,	/*!< cumulative data length for page */
+};
+
+/**
+ * DMA addresses
+ */
+union bfi_addr_u {
+	struct {
+		u32	addr_lo;
+		u32	addr_hi;
+	} a32;
+};
+
+/**
+ * Scatter Gather Element
+ */
+struct bfi_sge {
+#ifdef __BIGENDIAN
+	u32	flags:2,
+			rsvd:2,
+			sg_len:28;
+#else
+	u32	sg_len:28,
+			rsvd:2,
+			flags:2;
+#endif
+	union bfi_addr_u sga;
+};
+
+/**
+ * Scatter Gather Page
+ */
+#define BFI_SGPG_DATA_SGES		7
+#define BFI_SGPG_SGES_MAX		(BFI_SGPG_DATA_SGES + 1)
+#define BFI_SGPG_RSVD_WD_LEN	8
+struct bfi_sgpg {
+	struct bfi_sge sges[BFI_SGPG_SGES_MAX];
+	u32	rsvd[BFI_SGPG_RSVD_WD_LEN];
+};
+
+/*
+ * Large Message structure - 128 Bytes size Msgs
+ */
+#define BFI_LMSG_SZ		128
+#define BFI_LMSG_PL_WSZ	\
+			((BFI_LMSG_SZ - sizeof(struct bfi_mhdr)) / 4)
+
+struct bfi_msg {
+	struct bfi_mhdr mhdr;
+	u32	pl[BFI_LMSG_PL_WSZ];
+};
+
+/**
+ * Mailbox message structure
+ */
+#define BFI_MBMSG_SZ		7
+struct bfi_mbmsg {
+	struct bfi_mhdr mh;
+	u32		pl[BFI_MBMSG_SZ];
+};
+
+/**
+ * Message Classes
+ */
+enum bfi_mclass {
+	BFI_MC_IOC		= 1,	/*!< IO Controller (IOC)	    */
+	BFI_MC_DIAG		= 2,	/*!< Diagnostic Msgs		    */
+	BFI_MC_FLASH		= 3,	/*!< Flash message class	    */
+	BFI_MC_CEE		= 4,	/*!< CEE			    */
+	BFI_MC_FCPORT		= 5,	/*!< FC port			    */
+	BFI_MC_IOCFC		= 6,	/*!< FC - IO Controller (IOC)	    */
+	BFI_MC_LL		= 7,	/*!< Link Layer			    */
+	BFI_MC_UF		= 8,	/*!< Unsolicited frame receive	    */
+	BFI_MC_FCXP		= 9,	/*!< FC Transport		    */
+	BFI_MC_LPS		= 10,	/*!< lport fc login services	    */
+	BFI_MC_RPORT		= 11,	/*!< Remote port		    */
+	BFI_MC_ITNIM		= 12,	/*!< I-T nexus (Initiator mode)	    */
+	BFI_MC_IOIM_READ	= 13,	/*!< read IO (Initiator mode)	    */
+	BFI_MC_IOIM_WRITE	= 14,	/*!< write IO (Initiator mode)	    */
+	BFI_MC_IOIM_IO		= 15,	/*!< IO (Initiator mode)	    */
+	BFI_MC_IOIM		= 16,	/*!< IO (Initiator mode)	    */
+	BFI_MC_IOIM_IOCOM	= 17,	/*!< good IO completion		    */
+	BFI_MC_TSKIM		= 18,	/*!< Initiator Task management	    */
+	BFI_MC_SBOOT		= 19,	/*!< SAN boot services		    */
+	BFI_MC_IPFC		= 20,	/*!< IP over FC Msgs		    */
+	BFI_MC_PORT		= 21,	/*!< Physical port		    */
+	BFI_MC_SFP		= 22,	/*!< SFP module			    */
+	BFI_MC_MSGQ		= 23,	/*!< MSGQ			    */
+	BFI_MC_ENET		= 24,	/*!< ENET commands/responses	    */
+	BFI_MC_MAX		= 32
+};
+
+#define BFI_IOC_MAX_CQS		4
+#define BFI_IOC_MAX_CQS_ASIC	8
+#define BFI_IOC_MSGLEN_MAX	32	/* 32 bytes */
+
+#define BFI_BOOT_TYPE_OFF		8
+#define BFI_BOOT_PARAM_OFF		12
+
+#define BFI_BOOT_TYPE_NORMAL 		0	/* param is device id */
+#define	BFI_BOOT_TYPE_FLASH		1
+#define	BFI_BOOT_TYPE_MEMTEST		2
+
+#define BFI_BOOT_MEMTEST_RES_ADDR   0x900
+#define BFI_BOOT_MEMTEST_RES_SIG    0xA0A1A2A3
+
+/**
+ *----------------------------------------------------------------------
+ *				IOC
+ *----------------------------------------------------------------------
+ */
+
+enum bfi_ioc_h2i_msgs {
+	BFI_IOC_H2I_ENABLE_REQ		= 1,
+	BFI_IOC_H2I_DISABLE_REQ		= 2,
+	BFI_IOC_H2I_GETATTR_REQ		= 3,
+	BFI_IOC_H2I_DBG_SYNC		= 4,
+	BFI_IOC_H2I_DBG_DUMP		= 5,
+};
+
+enum bfi_ioc_i2h_msgs {
+	BFI_IOC_I2H_ENABLE_REPLY	= BFA_I2HM(1),
+	BFI_IOC_I2H_DISABLE_REPLY 	= BFA_I2HM(2),
+	BFI_IOC_I2H_GETATTR_REPLY 	= BFA_I2HM(3),
+	BFI_IOC_I2H_READY_EVENT 	= BFA_I2HM(4),
+	BFI_IOC_I2H_HBEAT		= BFA_I2HM(5),
+};
+
+/**
+ * BFI_IOC_H2I_GETATTR_REQ message
+ */
+struct bfi_ioc_getattr_req {
+	struct bfi_mhdr mh;
+	union bfi_addr_u	attr_addr;
+};
+
+struct bfi_ioc_attr {
+	u64		mfg_pwwn;	/*!< Mfg port wwn	   */
+	u64		mfg_nwwn;	/*!< Mfg node wwn	   */
+	mac_t		mfg_mac;	/*!< Mfg mac		   */
+	u16	rsvd_a;
+	u64		pwwn;
+	u64		nwwn;
+	mac_t		mac;		/*!< PBC or Mfg mac	   */
+	u16	rsvd_b;
+	mac_t		fcoe_mac;
+	u16	rsvd_c;
+	char		brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)];
+	u8		pcie_gen;
+	u8		pcie_lanes_orig;
+	u8		pcie_lanes;
+	u8		rx_bbcredit;	/*!< receive buffer credits */
+	u32	adapter_prop;	/*!< adapter properties     */
+	u16	maxfrsize;	/*!< max receive frame size */
+	char		asic_rev;
+	u8		rsvd_d;
+	char		fw_version[BFA_VERSION_LEN];
+	char		optrom_version[BFA_VERSION_LEN];
+	struct bfa_mfg_vpd vpd;
+	u32	card_type;	/*!< card type			*/
+};
+
+/**
+ * BFI_IOC_I2H_GETATTR_REPLY message
+ */
+struct bfi_ioc_getattr_reply {
+	struct bfi_mhdr mh;	/*!< Common msg header		*/
+	u8			status;	/*!< cfg reply status		*/
+	u8			rsvd[3];
+};
+
+/**
+ * Firmware memory page offsets
+ */
+#define BFI_IOC_SMEM_PG0_CB	(0x40)
+#define BFI_IOC_SMEM_PG0_CT	(0x180)
+
+/**
+ * Firmware statistic offset
+ */
+#define BFI_IOC_FWSTATS_OFF	(0x6B40)
+#define BFI_IOC_FWSTATS_SZ	(4096)
+
+/**
+ * Firmware trace offset
+ */
+#define BFI_IOC_TRC_OFF		(0x4b00)
+#define BFI_IOC_TRC_ENTS	256
+
+#define BFI_IOC_FW_SIGNATURE	(0xbfadbfad)
+#define BFI_IOC_MD5SUM_SZ	4
+struct bfi_ioc_image_hdr {
+	u32	signature;	/*!< constant signature */
+	u32	rsvd_a;
+	u32	exec;		/*!< exec vector	*/
+	u32	param;		/*!< parameters		*/
+	u32	rsvd_b[4];
+	u32	md5sum[BFI_IOC_MD5SUM_SZ];
+};
+
+/**
+ *  BFI_IOC_I2H_READY_EVENT message
+ */
+struct bfi_ioc_rdy_event {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8			init_status;	/*!< init event status */
+	u8			rsvd[3];
+};
+
+struct bfi_ioc_hbeat {
+	struct bfi_mhdr mh;		/*!< common msg header		*/
+	u32	   hb_count;	/*!< current heart beat count	*/
+};
+
+/**
+ * IOC hardware/firmware state
+ */
+enum bfi_ioc_state {
+	BFI_IOC_UNINIT		= 0,	/*!< not initialized		     */
+	BFI_IOC_INITING		= 1,	/*!< h/w is being initialized	     */
+	BFI_IOC_HWINIT		= 2,	/*!< h/w is initialized		     */
+	BFI_IOC_CFG		= 3,	/*!< IOC configuration in progress   */
+	BFI_IOC_OP		= 4,	/*!< IOC is operational		     */
+	BFI_IOC_DISABLING	= 5,	/*!< IOC is being disabled	     */
+	BFI_IOC_DISABLED	= 6,	/*!< IOC is disabled		     */
+	BFI_IOC_CFG_DISABLED	= 7,	/*!< IOC is being disabled;transient */
+	BFI_IOC_FAIL		= 8,	/*!< IOC heart-beat failure	     */
+	BFI_IOC_MEMTEST		= 9,	/*!< IOC is doing memtest	     */
+};
+
+#define BFI_IOC_ENDIAN_SIG  0x12345678
+
+enum {
+	BFI_ADAPTER_TYPE_FC	= 0x01,		/*!< FC adapters	   */
+	BFI_ADAPTER_TYPE_MK	= 0x0f0000,	/*!< adapter type mask     */
+	BFI_ADAPTER_TYPE_SH	= 16,	        /*!< adapter type shift    */
+	BFI_ADAPTER_NPORTS_MK	= 0xff00,	/*!< number of ports mask  */
+	BFI_ADAPTER_NPORTS_SH	= 8,	        /*!< number of ports shift */
+	BFI_ADAPTER_SPEED_MK	= 0xff,		/*!< adapter speed mask    */
+	BFI_ADAPTER_SPEED_SH	= 0,	        /*!< adapter speed shift   */
+	BFI_ADAPTER_PROTO	= 0x100000,	/*!< prototype adapaters   */
+	BFI_ADAPTER_TTV		= 0x200000,	/*!< TTV debug capable     */
+	BFI_ADAPTER_UNSUPP	= 0x400000,	/*!< unknown adapter type  */
+};
+
+#define BFI_ADAPTER_GETP(__prop, __adap_prop)			\
+	(((__adap_prop) & BFI_ADAPTER_ ## __prop ## _MK) >>	\
+		BFI_ADAPTER_ ## __prop ## _SH)
+#define BFI_ADAPTER_SETP(__prop, __val)				\
+	((__val) << BFI_ADAPTER_ ## __prop ## _SH)
+#define BFI_ADAPTER_IS_PROTO(__adap_type)			\
+	((__adap_type) & BFI_ADAPTER_PROTO)
+#define BFI_ADAPTER_IS_TTV(__adap_type)				\
+	((__adap_type) & BFI_ADAPTER_TTV)
+#define BFI_ADAPTER_IS_UNSUPP(__adap_type)			\
+	((__adap_type) & BFI_ADAPTER_UNSUPP)
+#define BFI_ADAPTER_IS_SPECIAL(__adap_type)			\
+	((__adap_type) & (BFI_ADAPTER_TTV | BFI_ADAPTER_PROTO |	\
+			BFI_ADAPTER_UNSUPP))
+
+/**
+ * BFI_IOC_H2I_ENABLE_REQ & BFI_IOC_H2I_DISABLE_REQ messages
+ */
+struct bfi_ioc_ctrl_req {
+	struct bfi_mhdr mh;
+	u8			ioc_class;
+	u8			rsvd[3];
+	u32		tv_sec;
+};
+
+/**
+ * BFI_IOC_I2H_ENABLE_REPLY & BFI_IOC_I2H_DISABLE_REPLY messages
+ */
+struct bfi_ioc_ctrl_reply {
+	struct bfi_mhdr mh;		/*!< Common msg header     */
+	u8			status;		/*!< enable/disable status */
+	u8			rsvd[3];
+};
+
+#define BFI_IOC_MSGSZ   8
+/**
+ * H2I Messages
+ */
+union bfi_ioc_h2i_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_ioc_ctrl_req enable_req;
+	struct bfi_ioc_ctrl_req disable_req;
+	struct bfi_ioc_getattr_req getattr_req;
+	u32			mboxmsg[BFI_IOC_MSGSZ];
+};
+
+/**
+ * I2H Messages
+ */
+union bfi_ioc_i2h_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_ioc_rdy_event rdy_event;
+	u32			mboxmsg[BFI_IOC_MSGSZ];
+};
+
+#pragma pack()
+
+#endif /* __BFI_H__ */
diff --git a/drivers/net/bna/bfi_cna.h b/drivers/net/bna/bfi_cna.h
new file mode 100644
index 000000000000..4eecabea397b
--- /dev/null
+++ b/drivers/net/bna/bfi_cna.h
@@ -0,0 +1,199 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BFI_CNA_H__
+#define __BFI_CNA_H__
+
+#include "bfi.h"
+#include "bfa_defs_cna.h"
+
+#pragma pack(1)
+
+enum bfi_port_h2i {
+	BFI_PORT_H2I_ENABLE_REQ		= (1),
+	BFI_PORT_H2I_DISABLE_REQ	= (2),
+	BFI_PORT_H2I_GET_STATS_REQ	= (3),
+	BFI_PORT_H2I_CLEAR_STATS_REQ	= (4),
+};
+
+enum bfi_port_i2h {
+	BFI_PORT_I2H_ENABLE_RSP		= BFA_I2HM(1),
+	BFI_PORT_I2H_DISABLE_RSP	= BFA_I2HM(2),
+	BFI_PORT_I2H_GET_STATS_RSP	= BFA_I2HM(3),
+	BFI_PORT_I2H_CLEAR_STATS_RSP	= BFA_I2HM(4),
+};
+
+/**
+ * Generic REQ type
+ */
+struct bfi_port_generic_req {
+	struct bfi_mhdr mh;		/*!< msg header			    */
+	u32	msgtag;		/*!< msgtag for reply		    */
+	u32	rsvd;
+};
+
+/**
+ * Generic RSP type
+ */
+struct bfi_port_generic_rsp {
+	struct bfi_mhdr mh;		/*!< common msg header		    */
+	u8		status;		/*!< port enable status		    */
+	u8		rsvd[3];
+	u32	msgtag;		/*!< msgtag for reply		    */
+};
+
+/**
+ * @todo
+ * BFI_PORT_H2I_ENABLE_REQ
+ */
+
+/**
+ * @todo
+ * BFI_PORT_I2H_ENABLE_RSP
+ */
+
+/**
+ * BFI_PORT_H2I_DISABLE_REQ
+ */
+
+/**
+ * BFI_PORT_I2H_DISABLE_RSP
+ */
+
+/**
+ * BFI_PORT_H2I_GET_STATS_REQ
+ */
+struct bfi_port_get_stats_req {
+	struct bfi_mhdr mh;		/*!< common msg header		    */
+	union bfi_addr_u   dma_addr;
+};
+
+/**
+ * BFI_PORT_I2H_GET_STATS_RSP
+ */
+
+/**
+ * BFI_PORT_H2I_CLEAR_STATS_REQ
+ */
+
+/**
+ * BFI_PORT_I2H_CLEAR_STATS_RSP
+ */
+
+union bfi_port_h2i_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_port_generic_req enable_req;
+	struct bfi_port_generic_req disable_req;
+	struct bfi_port_get_stats_req getstats_req;
+	struct bfi_port_generic_req clearstats_req;
+};
+
+union bfi_port_i2h_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_port_generic_rsp enable_rsp;
+	struct bfi_port_generic_rsp disable_rsp;
+	struct bfi_port_generic_rsp getstats_rsp;
+	struct bfi_port_generic_rsp clearstats_rsp;
+};
+
+/* @brief Mailbox commands from host to (DCBX/LLDP) firmware */
+enum bfi_cee_h2i_msgs {
+	BFI_CEE_H2I_GET_CFG_REQ = 1,
+	BFI_CEE_H2I_RESET_STATS = 2,
+	BFI_CEE_H2I_GET_STATS_REQ = 3,
+};
+
+/* @brief Mailbox reply and AEN messages from DCBX/LLDP firmware to host */
+enum bfi_cee_i2h_msgs {
+	BFI_CEE_I2H_GET_CFG_RSP = BFA_I2HM(1),
+	BFI_CEE_I2H_RESET_STATS_RSP = BFA_I2HM(2),
+	BFI_CEE_I2H_GET_STATS_RSP = BFA_I2HM(3),
+};
+
+/* Data structures */
+
+/*
+ * @brief H2I command structure for resetting the stats.
+ * BFI_CEE_H2I_RESET_STATS
+ */
+struct bfi_lldp_reset_stats {
+	struct bfi_mhdr mh;
+};
+
+/*
+ * @brief H2I command structure for resetting the stats.
+ * BFI_CEE_H2I_RESET_STATS
+ */
+struct bfi_cee_reset_stats {
+	struct bfi_mhdr mh;
+};
+
+/*
+ * @brief  get configuration  command from host
+ * BFI_CEE_H2I_GET_CFG_REQ
+ */
+struct bfi_cee_get_req {
+	struct bfi_mhdr mh;
+	union bfi_addr_u   dma_addr;
+};
+
+/*
+ * @brief reply message from firmware
+ * BFI_CEE_I2H_GET_CFG_RSP
+ */
+struct bfi_cee_get_rsp {
+	struct bfi_mhdr mh;
+	u8			cmd_status;
+	u8			rsvd[3];
+};
+
+/*
+ * @brief  get configuration  command from host
+ * BFI_CEE_H2I_GET_STATS_REQ
+ */
+struct bfi_cee_stats_req {
+	struct bfi_mhdr mh;
+	union bfi_addr_u   dma_addr;
+};
+
+/*
+ * @brief reply message from firmware
+ * BFI_CEE_I2H_GET_STATS_RSP
+ */
+struct bfi_cee_stats_rsp {
+	struct bfi_mhdr mh;
+	u8			cmd_status;
+	u8			rsvd[3];
+};
+
+/* @brief mailbox command structures from host to firmware */
+union bfi_cee_h2i_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_cee_get_req get_req;
+	struct bfi_cee_stats_req stats_req;
+};
+
+/* @brief mailbox message structures from firmware to host	*/
+union bfi_cee_i2h_msg_u {
+	struct bfi_mhdr mh;
+	struct bfi_cee_get_rsp get_rsp;
+	struct bfi_cee_stats_rsp stats_rsp;
+};
+
+#pragma pack()
+
+#endif /* __BFI_CNA_H__ */
diff --git a/drivers/net/bna/bfi_ctreg.h b/drivers/net/bna/bfi_ctreg.h
new file mode 100644
index 000000000000..404ea351d4a1
--- /dev/null
+++ b/drivers/net/bna/bfi_ctreg.h
@@ -0,0 +1,637 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+/*
+ * bfi_ctreg.h catapult host block register definitions
+ *
+ * !!! Do not edit. Auto generated. !!!
+ */
+
+#ifndef __BFI_CTREG_H__
+#define __BFI_CTREG_H__
+
+#define HOSTFN0_LPU_MBOX0_0		0x00019200
+#define HOSTFN1_LPU_MBOX0_8		0x00019260
+#define LPU_HOSTFN0_MBOX0_0		0x00019280
+#define LPU_HOSTFN1_MBOX0_8		0x000192e0
+#define HOSTFN2_LPU_MBOX0_0		0x00019400
+#define HOSTFN3_LPU_MBOX0_8		0x00019460
+#define LPU_HOSTFN2_MBOX0_0		0x00019480
+#define LPU_HOSTFN3_MBOX0_8		0x000194e0
+#define HOSTFN0_INT_STATUS		0x00014000
+#define __HOSTFN0_HALT_OCCURRED		0x01000000
+#define __HOSTFN0_INT_STATUS_LVL_MK	0x00f00000
+#define __HOSTFN0_INT_STATUS_LVL_SH	20
+#define __HOSTFN0_INT_STATUS_LVL(_v)	((_v) << __HOSTFN0_INT_STATUS_LVL_SH)
+#define __HOSTFN0_INT_STATUS_P_MK	0x000f0000
+#define __HOSTFN0_INT_STATUS_P_SH	16
+#define __HOSTFN0_INT_STATUS_P(_v)	((_v) << __HOSTFN0_INT_STATUS_P_SH)
+#define __HOSTFN0_INT_STATUS_F		0x0000ffff
+#define HOSTFN0_INT_MSK			0x00014004
+#define HOST_PAGE_NUM_FN0		0x00014008
+#define __HOST_PAGE_NUM_FN		0x000001ff
+#define HOST_MSIX_ERR_INDEX_FN0		0x0001400c
+#define __MSIX_ERR_INDEX_FN		0x000001ff
+#define HOSTFN1_INT_STATUS		0x00014100
+#define __HOSTFN1_HALT_OCCURRED		0x01000000
+#define __HOSTFN1_INT_STATUS_LVL_MK	0x00f00000
+#define __HOSTFN1_INT_STATUS_LVL_SH	20
+#define __HOSTFN1_INT_STATUS_LVL(_v)	((_v) << __HOSTFN1_INT_STATUS_LVL_SH)
+#define __HOSTFN1_INT_STATUS_P_MK	0x000f0000
+#define __HOSTFN1_INT_STATUS_P_SH	16
+#define __HOSTFN1_INT_STATUS_P(_v)	((_v) << __HOSTFN1_INT_STATUS_P_SH)
+#define __HOSTFN1_INT_STATUS_F		0x0000ffff
+#define HOSTFN1_INT_MSK			0x00014104
+#define HOST_PAGE_NUM_FN1		0x00014108
+#define HOST_MSIX_ERR_INDEX_FN1		0x0001410c
+#define APP_PLL_425_CTL_REG		0x00014204
+#define __P_425_PLL_LOCK		0x80000000
+#define __APP_PLL_425_SRAM_USE_100MHZ	0x00100000
+#define __APP_PLL_425_RESET_TIMER_MK	0x000e0000
+#define __APP_PLL_425_RESET_TIMER_SH	17
+#define __APP_PLL_425_RESET_TIMER(_v)	((_v) << __APP_PLL_425_RESET_TIMER_SH)
+#define __APP_PLL_425_LOGIC_SOFT_RESET	0x00010000
+#define __APP_PLL_425_CNTLMT0_1_MK	0x0000c000
+#define __APP_PLL_425_CNTLMT0_1_SH	14
+#define __APP_PLL_425_CNTLMT0_1(_v)	((_v) << __APP_PLL_425_CNTLMT0_1_SH)
+#define __APP_PLL_425_JITLMT0_1_MK	0x00003000
+#define __APP_PLL_425_JITLMT0_1_SH	12
+#define __APP_PLL_425_JITLMT0_1(_v)	((_v) << __APP_PLL_425_JITLMT0_1_SH)
+#define __APP_PLL_425_HREF		0x00000800
+#define __APP_PLL_425_HDIV		0x00000400
+#define __APP_PLL_425_P0_1_MK		0x00000300
+#define __APP_PLL_425_P0_1_SH		8
+#define __APP_PLL_425_P0_1(_v)		((_v) << __APP_PLL_425_P0_1_SH)
+#define __APP_PLL_425_Z0_2_MK		0x000000e0
+#define __APP_PLL_425_Z0_2_SH		5
+#define __APP_PLL_425_Z0_2(_v)		((_v) << __APP_PLL_425_Z0_2_SH)
+#define __APP_PLL_425_RSEL200500	0x00000010
+#define __APP_PLL_425_ENARST		0x00000008
+#define __APP_PLL_425_BYPASS		0x00000004
+#define __APP_PLL_425_LRESETN		0x00000002
+#define __APP_PLL_425_ENABLE		0x00000001
+#define APP_PLL_312_CTL_REG		0x00014208
+#define __P_312_PLL_LOCK		0x80000000
+#define __ENABLE_MAC_AHB_1		0x00800000
+#define __ENABLE_MAC_AHB_0		0x00400000
+#define __ENABLE_MAC_1			0x00200000
+#define __ENABLE_MAC_0			0x00100000
+#define __APP_PLL_312_RESET_TIMER_MK	0x000e0000
+#define __APP_PLL_312_RESET_TIMER_SH	17
+#define __APP_PLL_312_RESET_TIMER(_v)	((_v) << __APP_PLL_312_RESET_TIMER_SH)
+#define __APP_PLL_312_LOGIC_SOFT_RESET	0x00010000
+#define __APP_PLL_312_CNTLMT0_1_MK	0x0000c000
+#define __APP_PLL_312_CNTLMT0_1_SH	14
+#define __APP_PLL_312_CNTLMT0_1(_v)	((_v) << __APP_PLL_312_CNTLMT0_1_SH)
+#define __APP_PLL_312_JITLMT0_1_MK	0x00003000
+#define __APP_PLL_312_JITLMT0_1_SH	12
+#define __APP_PLL_312_JITLMT0_1(_v)	((_v) << __APP_PLL_312_JITLMT0_1_SH)
+#define __APP_PLL_312_HREF		0x00000800
+#define __APP_PLL_312_HDIV		0x00000400
+#define __APP_PLL_312_P0_1_MK		0x00000300
+#define __APP_PLL_312_P0_1_SH		8
+#define __APP_PLL_312_P0_1(_v)		((_v) << __APP_PLL_312_P0_1_SH)
+#define __APP_PLL_312_Z0_2_MK		0x000000e0
+#define __APP_PLL_312_Z0_2_SH		5
+#define __APP_PLL_312_Z0_2(_v)		((_v) << __APP_PLL_312_Z0_2_SH)
+#define __APP_PLL_312_RSEL200500	0x00000010
+#define __APP_PLL_312_ENARST		0x00000008
+#define __APP_PLL_312_BYPASS		0x00000004
+#define __APP_PLL_312_LRESETN		0x00000002
+#define __APP_PLL_312_ENABLE		0x00000001
+#define MBIST_CTL_REG			0x00014220
+#define __EDRAM_BISTR_START		0x00000004
+#define __MBIST_RESET			0x00000002
+#define __MBIST_START			0x00000001
+#define MBIST_STAT_REG			0x00014224
+#define __EDRAM_BISTR_STATUS		0x00000008
+#define __EDRAM_BISTR_DONE		0x00000004
+#define __MEM_BIT_STATUS		0x00000002
+#define __MBIST_DONE			0x00000001
+#define HOST_SEM0_REG			0x00014230
+#define __HOST_SEMAPHORE		0x00000001
+#define HOST_SEM1_REG			0x00014234
+#define HOST_SEM2_REG			0x00014238
+#define HOST_SEM3_REG			0x0001423c
+#define HOST_SEM0_INFO_REG		0x00014240
+#define HOST_SEM1_INFO_REG		0x00014244
+#define HOST_SEM2_INFO_REG		0x00014248
+#define HOST_SEM3_INFO_REG		0x0001424c
+#define ETH_MAC_SER_REG			0x00014288
+#define __APP_EMS_CKBUFAMPIN		0x00000020
+#define __APP_EMS_REFCLKSEL		0x00000010
+#define __APP_EMS_CMLCKSEL		0x00000008
+#define __APP_EMS_REFCKBUFEN2		0x00000004
+#define __APP_EMS_REFCKBUFEN1		0x00000002
+#define __APP_EMS_CHANNEL_SEL		0x00000001
+#define HOSTFN2_INT_STATUS		0x00014300
+#define __HOSTFN2_HALT_OCCURRED		0x01000000
+#define __HOSTFN2_INT_STATUS_LVL_MK	0x00f00000
+#define __HOSTFN2_INT_STATUS_LVL_SH	20
+#define __HOSTFN2_INT_STATUS_LVL(_v)	((_v) << __HOSTFN2_INT_STATUS_LVL_SH)
+#define __HOSTFN2_INT_STATUS_P_MK	0x000f0000
+#define __HOSTFN2_INT_STATUS_P_SH	16
+#define __HOSTFN2_INT_STATUS_P(_v)	((_v) << __HOSTFN2_INT_STATUS_P_SH)
+#define __HOSTFN2_INT_STATUS_F		0x0000ffff
+#define HOSTFN2_INT_MSK			0x00014304
+#define HOST_PAGE_NUM_FN2		0x00014308
+#define HOST_MSIX_ERR_INDEX_FN2		0x0001430c
+#define HOSTFN3_INT_STATUS		0x00014400
+#define __HALT_OCCURRED			0x01000000
+#define __HOSTFN3_INT_STATUS_LVL_MK	0x00f00000
+#define __HOSTFN3_INT_STATUS_LVL_SH	20
+#define __HOSTFN3_INT_STATUS_LVL(_v)	((_v) << __HOSTFN3_INT_STATUS_LVL_SH)
+#define __HOSTFN3_INT_STATUS_P_MK	0x000f0000
+#define __HOSTFN3_INT_STATUS_P_SH	16
+#define __HOSTFN3_INT_STATUS_P(_v)	((_v) << __HOSTFN3_INT_STATUS_P_SH)
+#define __HOSTFN3_INT_STATUS_F		0x0000ffff
+#define HOSTFN3_INT_MSK			0x00014404
+#define HOST_PAGE_NUM_FN3		0x00014408
+#define HOST_MSIX_ERR_INDEX_FN3		0x0001440c
+#define FNC_ID_REG			0x00014600
+#define __FUNCTION_NUMBER		0x00000007
+#define FNC_PERS_REG			0x00014604
+#define __F3_FUNCTION_ACTIVE		0x80000000
+#define __F3_FUNCTION_MODE		0x40000000
+#define __F3_PORT_MAP_MK		0x30000000
+#define __F3_PORT_MAP_SH		28
+#define __F3_PORT_MAP(_v)		((_v) << __F3_PORT_MAP_SH)
+#define __F3_VM_MODE			0x08000000
+#define __F3_INTX_STATUS_MK		0x07000000
+#define __F3_INTX_STATUS_SH		24
+#define __F3_INTX_STATUS(_v)		((_v) << __F3_INTX_STATUS_SH)
+#define __F2_FUNCTION_ACTIVE		0x00800000
+#define __F2_FUNCTION_MODE		0x00400000
+#define __F2_PORT_MAP_MK		0x00300000
+#define __F2_PORT_MAP_SH		20
+#define __F2_PORT_MAP(_v)		((_v) << __F2_PORT_MAP_SH)
+#define __F2_VM_MODE			0x00080000
+#define __F2_INTX_STATUS_MK		0x00070000
+#define __F2_INTX_STATUS_SH		16
+#define __F2_INTX_STATUS(_v)		((_v) << __F2_INTX_STATUS_SH)
+#define __F1_FUNCTION_ACTIVE		0x00008000
+#define __F1_FUNCTION_MODE		0x00004000
+#define __F1_PORT_MAP_MK		0x00003000
+#define __F1_PORT_MAP_SH		12
+#define __F1_PORT_MAP(_v)		((_v) << __F1_PORT_MAP_SH)
+#define __F1_VM_MODE			0x00000800
+#define __F1_INTX_STATUS_MK		0x00000700
+#define __F1_INTX_STATUS_SH		8
+#define __F1_INTX_STATUS(_v)		((_v) << __F1_INTX_STATUS_SH)
+#define __F0_FUNCTION_ACTIVE		0x00000080
+#define __F0_FUNCTION_MODE		0x00000040
+#define __F0_PORT_MAP_MK		0x00000030
+#define __F0_PORT_MAP_SH		4
+#define __F0_PORT_MAP(_v)		((_v) << __F0_PORT_MAP_SH)
+#define __F0_VM_MODE		0x00000008
+#define __F0_INTX_STATUS		0x00000007
+enum {
+	__F0_INTX_STATUS_MSIX		= 0x0,
+	__F0_INTX_STATUS_INTA		= 0x1,
+	__F0_INTX_STATUS_INTB		= 0x2,
+	__F0_INTX_STATUS_INTC		= 0x3,
+	__F0_INTX_STATUS_INTD		= 0x4,
+};
+#define OP_MODE				0x0001460c
+#define __APP_ETH_CLK_LOWSPEED		0x00000004
+#define __GLOBAL_CORECLK_HALFSPEED	0x00000002
+#define __GLOBAL_FCOE_MODE		0x00000001
+#define HOST_SEM4_REG			0x00014610
+#define HOST_SEM5_REG			0x00014614
+#define HOST_SEM6_REG			0x00014618
+#define HOST_SEM7_REG			0x0001461c
+#define HOST_SEM4_INFO_REG		0x00014620
+#define HOST_SEM5_INFO_REG		0x00014624
+#define HOST_SEM6_INFO_REG		0x00014628
+#define HOST_SEM7_INFO_REG		0x0001462c
+#define HOSTFN0_LPU0_MBOX0_CMD_STAT	0x00019000
+#define __HOSTFN0_LPU0_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN0_LPU0_MBOX0_INFO_SH	1
+#define __HOSTFN0_LPU0_MBOX0_INFO(_v)	((_v) << __HOSTFN0_LPU0_MBOX0_INFO_SH)
+#define __HOSTFN0_LPU0_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN0_LPU1_MBOX0_CMD_STAT	0x00019004
+#define __HOSTFN0_LPU1_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN0_LPU1_MBOX0_INFO_SH	1
+#define __HOSTFN0_LPU1_MBOX0_INFO(_v)	((_v) << __HOSTFN0_LPU1_MBOX0_INFO_SH)
+#define __HOSTFN0_LPU1_MBOX0_CMD_STATUS 0x00000001
+#define LPU0_HOSTFN0_MBOX0_CMD_STAT	0x00019008
+#define __LPU0_HOSTFN0_MBOX0_INFO_MK	0xfffffffe
+#define __LPU0_HOSTFN0_MBOX0_INFO_SH	1
+#define __LPU0_HOSTFN0_MBOX0_INFO(_v)	((_v) << __LPU0_HOSTFN0_MBOX0_INFO_SH)
+#define __LPU0_HOSTFN0_MBOX0_CMD_STATUS 0x00000001
+#define LPU1_HOSTFN0_MBOX0_CMD_STAT	0x0001900c
+#define __LPU1_HOSTFN0_MBOX0_INFO_MK	0xfffffffe
+#define __LPU1_HOSTFN0_MBOX0_INFO_SH	1
+#define __LPU1_HOSTFN0_MBOX0_INFO(_v)	((_v) << __LPU1_HOSTFN0_MBOX0_INFO_SH)
+#define __LPU1_HOSTFN0_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN1_LPU0_MBOX0_CMD_STAT	0x00019010
+#define __HOSTFN1_LPU0_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN1_LPU0_MBOX0_INFO_SH	1
+#define __HOSTFN1_LPU0_MBOX0_INFO(_v)	((_v) << __HOSTFN1_LPU0_MBOX0_INFO_SH)
+#define __HOSTFN1_LPU0_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN1_LPU1_MBOX0_CMD_STAT	0x00019014
+#define __HOSTFN1_LPU1_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN1_LPU1_MBOX0_INFO_SH	1
+#define __HOSTFN1_LPU1_MBOX0_INFO(_v)	((_v) << __HOSTFN1_LPU1_MBOX0_INFO_SH)
+#define __HOSTFN1_LPU1_MBOX0_CMD_STATUS 0x00000001
+#define LPU0_HOSTFN1_MBOX0_CMD_STAT	0x00019018
+#define __LPU0_HOSTFN1_MBOX0_INFO_MK	0xfffffffe
+#define __LPU0_HOSTFN1_MBOX0_INFO_SH	1
+#define __LPU0_HOSTFN1_MBOX0_INFO(_v)	((_v) << __LPU0_HOSTFN1_MBOX0_INFO_SH)
+#define __LPU0_HOSTFN1_MBOX0_CMD_STATUS 0x00000001
+#define LPU1_HOSTFN1_MBOX0_CMD_STAT	0x0001901c
+#define __LPU1_HOSTFN1_MBOX0_INFO_MK	0xfffffffe
+#define __LPU1_HOSTFN1_MBOX0_INFO_SH	1
+#define __LPU1_HOSTFN1_MBOX0_INFO(_v)	((_v) << __LPU1_HOSTFN1_MBOX0_INFO_SH)
+#define __LPU1_HOSTFN1_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN2_LPU0_MBOX0_CMD_STAT	0x00019150
+#define __HOSTFN2_LPU0_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN2_LPU0_MBOX0_INFO_SH	1
+#define __HOSTFN2_LPU0_MBOX0_INFO(_v)	((_v) << __HOSTFN2_LPU0_MBOX0_INFO_SH)
+#define __HOSTFN2_LPU0_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN2_LPU1_MBOX0_CMD_STAT	0x00019154
+#define __HOSTFN2_LPU1_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN2_LPU1_MBOX0_INFO_SH	1
+#define __HOSTFN2_LPU1_MBOX0_INFO(_v)	((_v) << __HOSTFN2_LPU1_MBOX0_INFO_SH)
+#define __HOSTFN2_LPU1_MBOX0BOX0_CMD_STATUS 0x00000001
+#define LPU0_HOSTFN2_MBOX0_CMD_STAT	0x00019158
+#define __LPU0_HOSTFN2_MBOX0_INFO_MK	0xfffffffe
+#define __LPU0_HOSTFN2_MBOX0_INFO_SH	1
+#define __LPU0_HOSTFN2_MBOX0_INFO(_v)	((_v) << __LPU0_HOSTFN2_MBOX0_INFO_SH)
+#define __LPU0_HOSTFN2_MBOX0_CMD_STATUS 0x00000001
+#define LPU1_HOSTFN2_MBOX0_CMD_STAT	0x0001915c
+#define __LPU1_HOSTFN2_MBOX0_INFO_MK	0xfffffffe
+#define __LPU1_HOSTFN2_MBOX0_INFO_SH	1
+#define __LPU1_HOSTFN2_MBOX0_INFO(_v)	((_v) << __LPU1_HOSTFN2_MBOX0_INFO_SH)
+#define __LPU1_HOSTFN2_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN3_LPU0_MBOX0_CMD_STAT	0x00019160
+#define __HOSTFN3_LPU0_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN3_LPU0_MBOX0_INFO_SH	1
+#define __HOSTFN3_LPU0_MBOX0_INFO(_v)	((_v) << __HOSTFN3_LPU0_MBOX0_INFO_SH)
+#define __HOSTFN3_LPU0_MBOX0_CMD_STATUS 0x00000001
+#define HOSTFN3_LPU1_MBOX0_CMD_STAT	0x00019164
+#define __HOSTFN3_LPU1_MBOX0_INFO_MK	0xfffffffe
+#define __HOSTFN3_LPU1_MBOX0_INFO_SH	1
+#define __HOSTFN3_LPU1_MBOX0_INFO(_v)	((_v) << __HOSTFN3_LPU1_MBOX0_INFO_SH)
+#define __HOSTFN3_LPU1_MBOX0_CMD_STATUS 0x00000001
+#define LPU0_HOSTFN3_MBOX0_CMD_STAT	0x00019168
+#define __LPU0_HOSTFN3_MBOX0_INFO_MK	0xfffffffe
+#define __LPU0_HOSTFN3_MBOX0_INFO_SH	1
+#define __LPU0_HOSTFN3_MBOX0_INFO(_v)	((_v) << __LPU0_HOSTFN3_MBOX0_INFO_SH)
+#define __LPU0_HOSTFN3_MBOX0_CMD_STATUS 0x00000001
+#define LPU1_HOSTFN3_MBOX0_CMD_STAT	0x0001916c
+#define __LPU1_HOSTFN3_MBOX0_INFO_MK	0xfffffffe
+#define __LPU1_HOSTFN3_MBOX0_INFO_SH	1
+#define __LPU1_HOSTFN3_MBOX0_INFO(_v)	((_v) << __LPU1_HOSTFN3_MBOX0_INFO_SH)
+#define __LPU1_HOSTFN3_MBOX0_CMD_STATUS	0x00000001
+#define FW_INIT_HALT_P0			0x000191ac
+#define __FW_INIT_HALT_P		0x00000001
+#define FW_INIT_HALT_P1			0x000191bc
+#define CPE_PI_PTR_Q0			0x00038000
+#define __CPE_PI_UNUSED_MK		0xffff0000
+#define __CPE_PI_UNUSED_SH		16
+#define __CPE_PI_UNUSED(_v)		((_v) << __CPE_PI_UNUSED_SH)
+#define __CPE_PI_PTR			0x0000ffff
+#define CPE_PI_PTR_Q1			0x00038040
+#define CPE_CI_PTR_Q0			0x00038004
+#define __CPE_CI_UNUSED_MK		0xffff0000
+#define __CPE_CI_UNUSED_SH		16
+#define __CPE_CI_UNUSED(_v)		((_v) << __CPE_CI_UNUSED_SH)
+#define __CPE_CI_PTR			0x0000ffff
+#define CPE_CI_PTR_Q1			0x00038044
+#define CPE_DEPTH_Q0			0x00038008
+#define __CPE_DEPTH_UNUSED_MK		0xf8000000
+#define __CPE_DEPTH_UNUSED_SH		27
+#define __CPE_DEPTH_UNUSED(_v)		((_v) << __CPE_DEPTH_UNUSED_SH)
+#define __CPE_MSIX_VEC_INDEX_MK		0x07ff0000
+#define __CPE_MSIX_VEC_INDEX_SH		16
+#define __CPE_MSIX_VEC_INDEX(_v)	((_v) << __CPE_MSIX_VEC_INDEX_SH)
+#define __CPE_DEPTH			0x0000ffff
+#define CPE_DEPTH_Q1			0x00038048
+#define CPE_QCTRL_Q0			0x0003800c
+#define __CPE_CTRL_UNUSED30_MK		0xfc000000
+#define __CPE_CTRL_UNUSED30_SH		26
+#define __CPE_CTRL_UNUSED30(_v)		((_v) << __CPE_CTRL_UNUSED30_SH)
+#define __CPE_FUNC_INT_CTRL_MK		0x03000000
+#define __CPE_FUNC_INT_CTRL_SH		24
+#define __CPE_FUNC_INT_CTRL(_v)		((_v) << __CPE_FUNC_INT_CTRL_SH)
+enum {
+	__CPE_FUNC_INT_CTRL_DISABLE		= 0x0,
+	__CPE_FUNC_INT_CTRL_F2NF		= 0x1,
+	__CPE_FUNC_INT_CTRL_3QUART		= 0x2,
+	__CPE_FUNC_INT_CTRL_HALF		= 0x3,
+};
+#define __CPE_CTRL_UNUSED20_MK		0x00f00000
+#define __CPE_CTRL_UNUSED20_SH		20
+#define __CPE_CTRL_UNUSED20(_v)		((_v) << __CPE_CTRL_UNUSED20_SH)
+#define __CPE_SCI_TH_MK			0x000f0000
+#define __CPE_SCI_TH_SH			16
+#define __CPE_SCI_TH(_v)		((_v) << __CPE_SCI_TH_SH)
+#define __CPE_CTRL_UNUSED10_MK		0x0000c000
+#define __CPE_CTRL_UNUSED10_SH		14
+#define __CPE_CTRL_UNUSED10(_v)		((_v) << __CPE_CTRL_UNUSED10_SH)
+#define __CPE_ACK_PENDING		0x00002000
+#define __CPE_CTRL_UNUSED40_MK		0x00001c00
+#define __CPE_CTRL_UNUSED40_SH		10
+#define __CPE_CTRL_UNUSED40(_v)		((_v) << __CPE_CTRL_UNUSED40_SH)
+#define __CPE_PCIEID_MK			0x00000300
+#define __CPE_PCIEID_SH			8
+#define __CPE_PCIEID(_v)		((_v) << __CPE_PCIEID_SH)
+#define __CPE_CTRL_UNUSED00_MK		0x000000fe
+#define __CPE_CTRL_UNUSED00_SH		1
+#define __CPE_CTRL_UNUSED00(_v)		((_v) << __CPE_CTRL_UNUSED00_SH)
+#define __CPE_ESIZE			0x00000001
+#define CPE_QCTRL_Q1			0x0003804c
+#define __CPE_CTRL_UNUSED31_MK		0xfc000000
+#define __CPE_CTRL_UNUSED31_SH		26
+#define __CPE_CTRL_UNUSED31(_v)		((_v) << __CPE_CTRL_UNUSED31_SH)
+#define __CPE_CTRL_UNUSED21_MK		0x00f00000
+#define __CPE_CTRL_UNUSED21_SH		20
+#define __CPE_CTRL_UNUSED21(_v)		((_v) << __CPE_CTRL_UNUSED21_SH)
+#define __CPE_CTRL_UNUSED11_MK		0x0000c000
+#define __CPE_CTRL_UNUSED11_SH		14
+#define __CPE_CTRL_UNUSED11(_v)		((_v) << __CPE_CTRL_UNUSED11_SH)
+#define __CPE_CTRL_UNUSED41_MK		0x00001c00
+#define __CPE_CTRL_UNUSED41_SH		10
+#define __CPE_CTRL_UNUSED41(_v)		((_v) << __CPE_CTRL_UNUSED41_SH)
+#define __CPE_CTRL_UNUSED01_MK		0x000000fe
+#define __CPE_CTRL_UNUSED01_SH		1
+#define __CPE_CTRL_UNUSED01(_v)		((_v) << __CPE_CTRL_UNUSED01_SH)
+#define RME_PI_PTR_Q0			0x00038020
+#define __LATENCY_TIME_STAMP_MK		0xffff0000
+#define __LATENCY_TIME_STAMP_SH		16
+#define __LATENCY_TIME_STAMP(_v)	((_v) << __LATENCY_TIME_STAMP_SH)
+#define __RME_PI_PTR			0x0000ffff
+#define RME_PI_PTR_Q1			0x00038060
+#define RME_CI_PTR_Q0			0x00038024
+#define __DELAY_TIME_STAMP_MK		0xffff0000
+#define __DELAY_TIME_STAMP_SH		16
+#define __DELAY_TIME_STAMP(_v)		((_v) << __DELAY_TIME_STAMP_SH)
+#define __RME_CI_PTR			0x0000ffff
+#define RME_CI_PTR_Q1			0x00038064
+#define RME_DEPTH_Q0			0x00038028
+#define __RME_DEPTH_UNUSED_MK		0xf8000000
+#define __RME_DEPTH_UNUSED_SH		27
+#define __RME_DEPTH_UNUSED(_v)		((_v) << __RME_DEPTH_UNUSED_SH)
+#define __RME_MSIX_VEC_INDEX_MK		0x07ff0000
+#define __RME_MSIX_VEC_INDEX_SH		16
+#define __RME_MSIX_VEC_INDEX(_v)	((_v) << __RME_MSIX_VEC_INDEX_SH)
+#define __RME_DEPTH			0x0000ffff
+#define RME_DEPTH_Q1			0x00038068
+#define RME_QCTRL_Q0			0x0003802c
+#define __RME_INT_LATENCY_TIMER_MK	0xff000000
+#define __RME_INT_LATENCY_TIMER_SH	24
+#define __RME_INT_LATENCY_TIMER(_v)	((_v) << __RME_INT_LATENCY_TIMER_SH)
+#define __RME_INT_DELAY_TIMER_MK	0x00ff0000
+#define __RME_INT_DELAY_TIMER_SH	16
+#define __RME_INT_DELAY_TIMER(_v)	((_v) << __RME_INT_DELAY_TIMER_SH)
+#define __RME_INT_DELAY_DISABLE		0x00008000
+#define __RME_DLY_DELAY_DISABLE		0x00004000
+#define __RME_ACK_PENDING		0x00002000
+#define __RME_FULL_INTERRUPT_DISABLE	0x00001000
+#define __RME_CTRL_UNUSED10_MK		0x00000c00
+#define __RME_CTRL_UNUSED10_SH		10
+#define __RME_CTRL_UNUSED10(_v)		((_v) << __RME_CTRL_UNUSED10_SH)
+#define __RME_PCIEID_MK			0x00000300
+#define __RME_PCIEID_SH			8
+#define __RME_PCIEID(_v)		((_v) << __RME_PCIEID_SH)
+#define __RME_CTRL_UNUSED00_MK		0x000000fe
+#define __RME_CTRL_UNUSED00_SH		1
+#define __RME_CTRL_UNUSED00(_v)		((_v) << __RME_CTRL_UNUSED00_SH)
+#define __RME_ESIZE			0x00000001
+#define RME_QCTRL_Q1			0x0003806c
+#define __RME_CTRL_UNUSED11_MK		0x00000c00
+#define __RME_CTRL_UNUSED11_SH		10
+#define __RME_CTRL_UNUSED11(_v)		((_v) << __RME_CTRL_UNUSED11_SH)
+#define __RME_CTRL_UNUSED01_MK		0x000000fe
+#define __RME_CTRL_UNUSED01_SH		1
+#define __RME_CTRL_UNUSED01(_v)		((_v) << __RME_CTRL_UNUSED01_SH)
+#define PSS_CTL_REG			0x00018800
+#define __PSS_I2C_CLK_DIV_MK		0x007f0000
+#define __PSS_I2C_CLK_DIV_SH		16
+#define __PSS_I2C_CLK_DIV(_v)		((_v) << __PSS_I2C_CLK_DIV_SH)
+#define __PSS_LMEM_INIT_DONE		0x00001000
+#define __PSS_LMEM_RESET		0x00000200
+#define __PSS_LMEM_INIT_EN		0x00000100
+#define __PSS_LPU1_RESET		0x00000002
+#define __PSS_LPU0_RESET		0x00000001
+#define PSS_ERR_STATUS_REG		0x00018810
+#define __PSS_LPU1_TCM_READ_ERR		0x00200000
+#define __PSS_LPU0_TCM_READ_ERR		0x00100000
+#define __PSS_LMEM5_CORR_ERR		0x00080000
+#define __PSS_LMEM4_CORR_ERR		0x00040000
+#define __PSS_LMEM3_CORR_ERR		0x00020000
+#define __PSS_LMEM2_CORR_ERR		0x00010000
+#define __PSS_LMEM1_CORR_ERR		0x00008000
+#define __PSS_LMEM0_CORR_ERR		0x00004000
+#define __PSS_LMEM5_UNCORR_ERR		0x00002000
+#define __PSS_LMEM4_UNCORR_ERR		0x00001000
+#define __PSS_LMEM3_UNCORR_ERR		0x00000800
+#define __PSS_LMEM2_UNCORR_ERR		0x00000400
+#define __PSS_LMEM1_UNCORR_ERR		0x00000200
+#define __PSS_LMEM0_UNCORR_ERR		0x00000100
+#define __PSS_BAL_PERR			0x00000080
+#define __PSS_DIP_IF_ERR		0x00000040
+#define __PSS_IOH_IF_ERR		0x00000020
+#define __PSS_TDS_IF_ERR		0x00000010
+#define __PSS_RDS_IF_ERR		0x00000008
+#define __PSS_SGM_IF_ERR		0x00000004
+#define __PSS_LPU1_RAM_ERR		0x00000002
+#define __PSS_LPU0_RAM_ERR		0x00000001
+#define ERR_SET_REG			0x00018818
+#define __PSS_ERR_STATUS_SET		0x003fffff
+#define PMM_1T_RESET_REG_P0		0x0002381c
+#define __PMM_1T_RESET_P		0x00000001
+#define PMM_1T_RESET_REG_P1		0x00023c1c
+#define HQM_QSET0_RXQ_DRBL_P0		0x00038000
+#define __RXQ0_ADD_VECTORS_P		0x80000000
+#define __RXQ0_STOP_P			0x40000000
+#define __RXQ0_PRD_PTR_P		0x0000ffff
+#define HQM_QSET1_RXQ_DRBL_P0		0x00038080
+#define __RXQ1_ADD_VECTORS_P		0x80000000
+#define __RXQ1_STOP_P			0x40000000
+#define __RXQ1_PRD_PTR_P		0x0000ffff
+#define HQM_QSET0_RXQ_DRBL_P1		0x0003c000
+#define HQM_QSET1_RXQ_DRBL_P1		0x0003c080
+#define HQM_QSET0_TXQ_DRBL_P0		0x00038020
+#define __TXQ0_ADD_VECTORS_P		0x80000000
+#define __TXQ0_STOP_P			0x40000000
+#define __TXQ0_PRD_PTR_P		0x0000ffff
+#define HQM_QSET1_TXQ_DRBL_P0		0x000380a0
+#define __TXQ1_ADD_VECTORS_P		0x80000000
+#define __TXQ1_STOP_P			0x40000000
+#define __TXQ1_PRD_PTR_P		0x0000ffff
+#define HQM_QSET0_TXQ_DRBL_P1		0x0003c020
+#define HQM_QSET1_TXQ_DRBL_P1		0x0003c0a0
+#define HQM_QSET0_IB_DRBL_1_P0		0x00038040
+#define __IB1_0_ACK_P			0x80000000
+#define __IB1_0_DISABLE_P		0x40000000
+#define __IB1_0_COALESCING_CFG_P_MK	0x00ff0000
+#define __IB1_0_COALESCING_CFG_P_SH	16
+#define __IB1_0_COALESCING_CFG_P(_v)	((_v) << __IB1_0_COALESCING_CFG_P_SH)
+#define __IB1_0_NUM_OF_ACKED_EVENTS_P	0x0000ffff
+#define HQM_QSET1_IB_DRBL_1_P0		0x000380c0
+#define __IB1_1_ACK_P			0x80000000
+#define __IB1_1_DISABLE_P		0x40000000
+#define __IB1_1_COALESCING_CFG_P_MK	0x00ff0000
+#define __IB1_1_COALESCING_CFG_P_SH	16
+#define __IB1_1_COALESCING_CFG_P(_v)	((_v) << __IB1_1_COALESCING_CFG_P_SH)
+#define __IB1_1_NUM_OF_ACKED_EVENTS_P	0x0000ffff
+#define HQM_QSET0_IB_DRBL_1_P1		0x0003c040
+#define HQM_QSET1_IB_DRBL_1_P1		0x0003c0c0
+#define HQM_QSET0_IB_DRBL_2_P0		0x00038060
+#define __IB2_0_ACK_P			0x80000000
+#define __IB2_0_DISABLE_P		0x40000000
+#define __IB2_0_COALESCING_CFG_P_MK	0x00ff0000
+#define __IB2_0_COALESCING_CFG_P_SH	16
+#define __IB2_0_COALESCING_CFG_P(_v)	((_v) << __IB2_0_COALESCING_CFG_P_SH)
+#define __IB2_0_NUM_OF_ACKED_EVENTS_P	0x0000ffff
+#define HQM_QSET1_IB_DRBL_2_P0		0x000380e0
+#define __IB2_1_ACK_P			0x80000000
+#define __IB2_1_DISABLE_P		0x40000000
+#define __IB2_1_COALESCING_CFG_P_MK	0x00ff0000
+#define __IB2_1_COALESCING_CFG_P_SH	16
+#define __IB2_1_COALESCING_CFG_P(_v)	((_v) << __IB2_1_COALESCING_CFG_P_SH)
+#define __IB2_1_NUM_OF_ACKED_EVENTS_P	0x0000ffff
+#define HQM_QSET0_IB_DRBL_2_P1		0x0003c060
+#define HQM_QSET1_IB_DRBL_2_P1		0x0003c0e0
+
+/*
+ * These definitions are either in error/missing in spec. Its auto-generated
+ * from hard coded values in regparse.pl.
+ */
+#define __EMPHPOST_AT_4G_MK_FIX		0x0000001c
+#define __EMPHPOST_AT_4G_SH_FIX		0x00000002
+#define __EMPHPRE_AT_4G_FIX		0x00000003
+#define __SFP_TXRATE_EN_FIX		0x00000100
+#define __SFP_RXRATE_EN_FIX		0x00000080
+
+/*
+ * These register definitions are auto-generated from hard coded values
+ * in regparse.pl.
+ */
+
+/*
+ * These register mapping definitions are auto-generated from mapping tables
+ * in regparse.pl.
+ */
+#define BFA_IOC0_HBEAT_REG		HOST_SEM0_INFO_REG
+#define BFA_IOC0_STATE_REG		HOST_SEM1_INFO_REG
+#define BFA_IOC1_HBEAT_REG		HOST_SEM2_INFO_REG
+#define BFA_IOC1_STATE_REG		HOST_SEM3_INFO_REG
+#define BFA_FW_USE_COUNT		 HOST_SEM4_INFO_REG
+
+#define CPE_DEPTH_Q(__n) \
+	(CPE_DEPTH_Q0 + (__n) * (CPE_DEPTH_Q1 - CPE_DEPTH_Q0))
+#define CPE_QCTRL_Q(__n) \
+	(CPE_QCTRL_Q0 + (__n) * (CPE_QCTRL_Q1 - CPE_QCTRL_Q0))
+#define CPE_PI_PTR_Q(__n) \
+	(CPE_PI_PTR_Q0 + (__n) * (CPE_PI_PTR_Q1 - CPE_PI_PTR_Q0))
+#define CPE_CI_PTR_Q(__n) \
+	(CPE_CI_PTR_Q0 + (__n) * (CPE_CI_PTR_Q1 - CPE_CI_PTR_Q0))
+#define RME_DEPTH_Q(__n) \
+	(RME_DEPTH_Q0 + (__n) * (RME_DEPTH_Q1 - RME_DEPTH_Q0))
+#define RME_QCTRL_Q(__n) \
+	(RME_QCTRL_Q0 + (__n) * (RME_QCTRL_Q1 - RME_QCTRL_Q0))
+#define RME_PI_PTR_Q(__n) \
+	(RME_PI_PTR_Q0 + (__n) * (RME_PI_PTR_Q1 - RME_PI_PTR_Q0))
+#define RME_CI_PTR_Q(__n) \
+	(RME_CI_PTR_Q0 + (__n) * (RME_CI_PTR_Q1 - RME_CI_PTR_Q0))
+#define HQM_QSET_RXQ_DRBL_P0(__n) (HQM_QSET0_RXQ_DRBL_P0 + (__n) \
+	* (HQM_QSET1_RXQ_DRBL_P0 - HQM_QSET0_RXQ_DRBL_P0))
+#define HQM_QSET_TXQ_DRBL_P0(__n) (HQM_QSET0_TXQ_DRBL_P0 + (__n) \
+	* (HQM_QSET1_TXQ_DRBL_P0 - HQM_QSET0_TXQ_DRBL_P0))
+#define HQM_QSET_IB_DRBL_1_P0(__n) (HQM_QSET0_IB_DRBL_1_P0 + (__n) \
+	* (HQM_QSET1_IB_DRBL_1_P0 - HQM_QSET0_IB_DRBL_1_P0))
+#define HQM_QSET_IB_DRBL_2_P0(__n) (HQM_QSET0_IB_DRBL_2_P0 + (__n) \
+	* (HQM_QSET1_IB_DRBL_2_P0 - HQM_QSET0_IB_DRBL_2_P0))
+#define HQM_QSET_RXQ_DRBL_P1(__n) (HQM_QSET0_RXQ_DRBL_P1 + (__n) \
+	* (HQM_QSET1_RXQ_DRBL_P1 - HQM_QSET0_RXQ_DRBL_P1))
+#define HQM_QSET_TXQ_DRBL_P1(__n) (HQM_QSET0_TXQ_DRBL_P1 + (__n) \
+	* (HQM_QSET1_TXQ_DRBL_P1 - HQM_QSET0_TXQ_DRBL_P1))
+#define HQM_QSET_IB_DRBL_1_P1(__n) (HQM_QSET0_IB_DRBL_1_P1 + (__n) \
+	* (HQM_QSET1_IB_DRBL_1_P1 - HQM_QSET0_IB_DRBL_1_P1))
+#define HQM_QSET_IB_DRBL_2_P1(__n) (HQM_QSET0_IB_DRBL_2_P1 + (__n) \
+	* (HQM_QSET1_IB_DRBL_2_P1 - HQM_QSET0_IB_DRBL_2_P1))
+
+#define CPE_Q_NUM(__fn, __q) (((__fn) << 2) + (__q))
+#define RME_Q_NUM(__fn, __q) (((__fn) << 2) + (__q))
+#define CPE_Q_MASK(__q) ((__q) & 0x3)
+#define RME_Q_MASK(__q) ((__q) & 0x3)
+
+/*
+ * PCI MSI-X vector defines
+ */
+enum {
+	BFA_MSIX_CPE_Q0 = 0,
+	BFA_MSIX_CPE_Q1 = 1,
+	BFA_MSIX_CPE_Q2 = 2,
+	BFA_MSIX_CPE_Q3 = 3,
+	BFA_MSIX_RME_Q0 = 4,
+	BFA_MSIX_RME_Q1 = 5,
+	BFA_MSIX_RME_Q2 = 6,
+	BFA_MSIX_RME_Q3 = 7,
+	BFA_MSIX_LPU_ERR = 8,
+	BFA_MSIX_CT_MAX = 9,
+};
+
+/*
+ * And corresponding host interrupt status bit field defines
+ */
+#define __HFN_INT_CPE_Q0		0x00000001U
+#define __HFN_INT_CPE_Q1		0x00000002U
+#define __HFN_INT_CPE_Q2		0x00000004U
+#define __HFN_INT_CPE_Q3		0x00000008U
+#define __HFN_INT_CPE_Q4		0x00000010U
+#define __HFN_INT_CPE_Q5		0x00000020U
+#define __HFN_INT_CPE_Q6		0x00000040U
+#define __HFN_INT_CPE_Q7		0x00000080U
+#define __HFN_INT_RME_Q0		0x00000100U
+#define __HFN_INT_RME_Q1		0x00000200U
+#define __HFN_INT_RME_Q2		0x00000400U
+#define __HFN_INT_RME_Q3		0x00000800U
+#define __HFN_INT_RME_Q4		0x00001000U
+#define __HFN_INT_RME_Q5		0x00002000U
+#define __HFN_INT_RME_Q6		0x00004000U
+#define __HFN_INT_RME_Q7		0x00008000U
+#define __HFN_INT_ERR_EMC		0x00010000U
+#define __HFN_INT_ERR_LPU0		0x00020000U
+#define __HFN_INT_ERR_LPU1		0x00040000U
+#define __HFN_INT_ERR_PSS		0x00080000U
+#define __HFN_INT_MBOX_LPU0		0x00100000U
+#define __HFN_INT_MBOX_LPU1		0x00200000U
+#define __HFN_INT_MBOX1_LPU0		0x00400000U
+#define __HFN_INT_MBOX1_LPU1		0x00800000U
+#define __HFN_INT_LL_HALT		0x01000000U
+#define __HFN_INT_CPE_MASK		0x000000ffU
+#define __HFN_INT_RME_MASK		0x0000ff00U
+
+/*
+ * catapult memory map.
+ */
+#define LL_PGN_HQM0		0x0096
+#define LL_PGN_HQM1		0x0097
+#define PSS_SMEM_PAGE_START	0x8000
+#define PSS_SMEM_PGNUM(_pg0, _ma)	((_pg0) + ((_ma) >> 15))
+#define PSS_SMEM_PGOFF(_ma)	((_ma) & 0x7fff)
+
+/*
+ * End of catapult memory map
+ */
+
+#endif /* __BFI_CTREG_H__ */
diff --git a/drivers/net/bna/bfi_ll.h b/drivers/net/bna/bfi_ll.h
new file mode 100644
index 000000000000..bee4d054066a
--- /dev/null
+++ b/drivers/net/bna/bfi_ll.h
@@ -0,0 +1,438 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BFI_LL_H__
+#define __BFI_LL_H__
+
+#include "bfi.h"
+
+#pragma pack(1)
+
+/**
+ * @brief
+ *	"enums" for all LL mailbox messages other than IOC
+ */
+enum {
+	BFI_LL_H2I_MAC_UCAST_SET_REQ = 1,
+	BFI_LL_H2I_MAC_UCAST_ADD_REQ = 2,
+	BFI_LL_H2I_MAC_UCAST_DEL_REQ = 3,
+
+	BFI_LL_H2I_MAC_MCAST_ADD_REQ = 4,
+	BFI_LL_H2I_MAC_MCAST_DEL_REQ = 5,
+	BFI_LL_H2I_MAC_MCAST_FILTER_REQ = 6,
+	BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ = 7,
+
+	BFI_LL_H2I_PORT_ADMIN_REQ = 8,
+	BFI_LL_H2I_STATS_GET_REQ = 9,
+	BFI_LL_H2I_STATS_CLEAR_REQ = 10,
+
+	BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ = 11,
+	BFI_LL_H2I_RXF_DEFAULT_SET_REQ = 12,
+
+	BFI_LL_H2I_TXQ_STOP_REQ = 13,
+	BFI_LL_H2I_RXQ_STOP_REQ = 14,
+
+	BFI_LL_H2I_DIAG_LOOPBACK_REQ = 15,
+
+	BFI_LL_H2I_SET_PAUSE_REQ = 16,
+	BFI_LL_H2I_MTU_INFO_REQ = 17,
+
+	BFI_LL_H2I_RX_REQ = 18,
+} ;
+
+enum {
+	BFI_LL_I2H_MAC_UCAST_SET_RSP = BFA_I2HM(1),
+	BFI_LL_I2H_MAC_UCAST_ADD_RSP = BFA_I2HM(2),
+	BFI_LL_I2H_MAC_UCAST_DEL_RSP = BFA_I2HM(3),
+
+	BFI_LL_I2H_MAC_MCAST_ADD_RSP = BFA_I2HM(4),
+	BFI_LL_I2H_MAC_MCAST_DEL_RSP = BFA_I2HM(5),
+	BFI_LL_I2H_MAC_MCAST_FILTER_RSP = BFA_I2HM(6),
+	BFI_LL_I2H_MAC_MCAST_DEL_ALL_RSP = BFA_I2HM(7),
+
+	BFI_LL_I2H_PORT_ADMIN_RSP = BFA_I2HM(8),
+	BFI_LL_I2H_STATS_GET_RSP = BFA_I2HM(9),
+	BFI_LL_I2H_STATS_CLEAR_RSP = BFA_I2HM(10),
+
+	BFI_LL_I2H_RXF_PROMISCUOUS_SET_RSP = BFA_I2HM(11),
+	BFI_LL_I2H_RXF_DEFAULT_SET_RSP = BFA_I2HM(12),
+
+	BFI_LL_I2H_TXQ_STOP_RSP = BFA_I2HM(13),
+	BFI_LL_I2H_RXQ_STOP_RSP = BFA_I2HM(14),
+
+	BFI_LL_I2H_DIAG_LOOPBACK_RSP = BFA_I2HM(15),
+
+	BFI_LL_I2H_SET_PAUSE_RSP = BFA_I2HM(16),
+
+	BFI_LL_I2H_MTU_INFO_RSP = BFA_I2HM(17),
+	BFI_LL_I2H_RX_RSP = BFA_I2HM(18),
+
+	BFI_LL_I2H_LINK_DOWN_AEN = BFA_I2HM(19),
+	BFI_LL_I2H_LINK_UP_AEN = BFA_I2HM(20),
+
+	BFI_LL_I2H_PORT_ENABLE_AEN = BFA_I2HM(21),
+	BFI_LL_I2H_PORT_DISABLE_AEN = BFA_I2HM(22),
+} ;
+
+/**
+ * @brief bfi_ll_mac_addr_req is used by:
+ *        BFI_LL_H2I_MAC_UCAST_SET_REQ
+ *        BFI_LL_H2I_MAC_UCAST_ADD_REQ
+ *        BFI_LL_H2I_MAC_UCAST_DEL_REQ
+ *        BFI_LL_H2I_MAC_MCAST_ADD_REQ
+ *        BFI_LL_H2I_MAC_MCAST_DEL_REQ
+ */
+struct bfi_ll_mac_addr_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		rxf_id;
+	u8		rsvd1[3];
+	mac_t		mac_addr;
+	u8		rsvd2[2];
+};
+
+/**
+ * @brief bfi_ll_mcast_filter_req is used by:
+ *	  BFI_LL_H2I_MAC_MCAST_FILTER_REQ
+ */
+struct bfi_ll_mcast_filter_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		rxf_id;
+	u8		enable;
+	u8		rsvd[2];
+};
+
+/**
+ * @brief bfi_ll_mcast_del_all is used by:
+ *	  BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ
+ */
+struct bfi_ll_mcast_del_all_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		   rxf_id;
+	u8		   rsvd[3];
+};
+
+/**
+ * @brief bfi_ll_q_stop_req is used by:
+ *	BFI_LL_H2I_TXQ_STOP_REQ
+ *	BFI_LL_H2I_RXQ_STOP_REQ
+ */
+struct bfi_ll_q_stop_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u32	q_id_mask[2];	/* !< bit-mask for queue ids */
+};
+
+/**
+ * @brief bfi_ll_stats_req is used by:
+ *    BFI_LL_I2H_STATS_GET_REQ
+ *    BFI_LL_I2H_STATS_CLEAR_REQ
+ */
+struct bfi_ll_stats_req {
+	struct bfi_mhdr mh;	/*!< common msg header */
+	u16 stats_mask;	/* !< bit-mask for non-function statistics */
+	u8	rsvd[2];
+	u32 rxf_id_mask[2];	/* !< bit-mask for RxF Statistics */
+	u32 txf_id_mask[2];	/* !< bit-mask for TxF Statistics */
+	union bfi_addr_u  host_buffer;	/* !< where statistics are returned */
+};
+
+/**
+ * @brief defines for "stats_mask" above.
+ */
+#define BFI_LL_STATS_MAC	(1 << 0)	/* !< MAC Statistics */
+#define BFI_LL_STATS_BPC	(1 << 1)	/* !< Pause Stats from BPC */
+#define BFI_LL_STATS_RAD	(1 << 2)	/* !< Rx Admission Statistics */
+#define BFI_LL_STATS_RX_FC	(1 << 3)	/* !< Rx FC Stats from RxA */
+#define BFI_LL_STATS_TX_FC	(1 << 4)	/* !< Tx FC Stats from TxA */
+
+#define BFI_LL_STATS_ALL	0x1f
+
+/**
+ * @brief bfi_ll_port_admin_req
+ */
+struct bfi_ll_port_admin_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		 up;
+	u8		 rsvd[3];
+};
+
+/**
+ * @brief bfi_ll_rxf_req is used by:
+ *      BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ
+ *      BFI_LL_H2I_RXF_DEFAULT_SET_REQ
+ */
+struct bfi_ll_rxf_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		rxf_id;
+	u8		enable;
+	u8		rsvd[2];
+};
+
+/**
+ * @brief bfi_ll_rxf_multi_req is used by:
+ *	BFI_LL_H2I_RX_REQ
+ */
+struct bfi_ll_rxf_multi_req {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u32	rxf_id_mask[2];
+	u8		enable;
+	u8		rsvd[3];
+};
+
+/**
+ * @brief enum for Loopback opmodes
+ */
+enum {
+	BFI_LL_DIAG_LB_OPMODE_EXT = 0,
+	BFI_LL_DIAG_LB_OPMODE_CBL = 1,
+};
+
+/**
+ * @brief bfi_ll_set_pause_req is used by:
+ *	BFI_LL_H2I_SET_PAUSE_REQ
+ */
+struct bfi_ll_set_pause_req {
+	struct bfi_mhdr mh;
+	u8		tx_pause; /* 1 = enable, 0 =  disable */
+	u8		rx_pause; /* 1 = enable, 0 =  disable */
+	u8		rsvd[2];
+};
+
+/**
+ * @brief bfi_ll_mtu_info_req is used by:
+ *	BFI_LL_H2I_MTU_INFO_REQ
+ */
+struct bfi_ll_mtu_info_req {
+	struct bfi_mhdr mh;
+	u16	mtu;
+	u8		rsvd[2];
+};
+
+/**
+ * @brief
+ *	  Response header format used by all responses
+ *	  For both responses and asynchronous notifications
+ */
+struct bfi_ll_rsp {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u8		error;
+	u8		rsvd[3];
+};
+
+/**
+ * @brief bfi_ll_cee_aen is used by:
+ *	BFI_LL_I2H_LINK_DOWN_AEN
+ *	BFI_LL_I2H_LINK_UP_AEN
+ */
+struct bfi_ll_aen {
+	struct bfi_mhdr mh;		/*!< common msg header */
+	u32	reason;
+	u8		cee_linkup;
+	u8		prio_map;    /*!< LL priority bit-map */
+	u8		rsvd[2];
+};
+
+/**
+ * @brief
+ * 	The following error codes can be returned
+ *	by the mbox commands
+ */
+enum {
+	BFI_LL_CMD_OK 		= 0,
+	BFI_LL_CMD_FAIL 	= 1,
+	BFI_LL_CMD_DUP_ENTRY	= 2,	/* !< Duplicate entry in CAM */
+	BFI_LL_CMD_CAM_FULL	= 3,	/* !< CAM is full */
+	BFI_LL_CMD_NOT_OWNER	= 4,   	/* !< Not permitted, b'cos not owner */
+	BFI_LL_CMD_NOT_EXEC	= 5,   	/* !< Was not sent to f/w at all */
+	BFI_LL_CMD_WAITING	= 6,	/* !< Waiting for completion (VMware) */
+	BFI_LL_CMD_PORT_DISABLED	= 7,	/* !< port in disabled state */
+} ;
+
+/* Statistics */
+#define BFI_LL_TXF_ID_MAX  	64
+#define BFI_LL_RXF_ID_MAX  	64
+
+/* TxF Frame Statistics */
+struct bfi_ll_stats_txf {
+	u64 ucast_octets;
+	u64 ucast;
+	u64 ucast_vlan;
+
+	u64 mcast_octets;
+	u64 mcast;
+	u64 mcast_vlan;
+
+	u64 bcast_octets;
+	u64 bcast;
+	u64 bcast_vlan;
+
+	u64 errors;
+	u64 filter_vlan;      /* frames filtered due to VLAN */
+	u64 filter_mac_sa;    /* frames filtered due to SA check */
+};
+
+/* RxF Frame Statistics */
+struct bfi_ll_stats_rxf {
+	u64 ucast_octets;
+	u64 ucast;
+	u64 ucast_vlan;
+
+	u64 mcast_octets;
+	u64 mcast;
+	u64 mcast_vlan;
+
+	u64 bcast_octets;
+	u64 bcast;
+	u64 bcast_vlan;
+	u64 frame_drops;
+};
+
+/* FC Tx Frame Statistics */
+struct bfi_ll_stats_fc_tx {
+	u64 txf_ucast_octets;
+	u64 txf_ucast;
+	u64 txf_ucast_vlan;
+
+	u64 txf_mcast_octets;
+	u64 txf_mcast;
+	u64 txf_mcast_vlan;
+
+	u64 txf_bcast_octets;
+	u64 txf_bcast;
+	u64 txf_bcast_vlan;
+
+	u64 txf_parity_errors;
+	u64 txf_timeout;
+	u64 txf_fid_parity_errors;
+};
+
+/* FC Rx Frame Statistics */
+struct bfi_ll_stats_fc_rx {
+	u64 rxf_ucast_octets;
+	u64 rxf_ucast;
+	u64 rxf_ucast_vlan;
+
+	u64 rxf_mcast_octets;
+	u64 rxf_mcast;
+	u64 rxf_mcast_vlan;
+
+	u64 rxf_bcast_octets;
+	u64 rxf_bcast;
+	u64 rxf_bcast_vlan;
+};
+
+/* RAD Frame Statistics */
+struct bfi_ll_stats_rad {
+	u64 rx_frames;
+	u64 rx_octets;
+	u64 rx_vlan_frames;
+
+	u64 rx_ucast;
+	u64 rx_ucast_octets;
+	u64 rx_ucast_vlan;
+
+	u64 rx_mcast;
+	u64 rx_mcast_octets;
+	u64 rx_mcast_vlan;
+
+	u64 rx_bcast;
+	u64 rx_bcast_octets;
+	u64 rx_bcast_vlan;
+
+	u64 rx_drops;
+};
+
+/* BPC Tx Registers */
+struct bfi_ll_stats_bpc {
+	/* transmit stats */
+	u64 tx_pause[8];
+	u64 tx_zero_pause[8];	/*!< Pause cancellation */
+	/*!<Pause initiation rather than retention */
+	u64 tx_first_pause[8];
+
+	/* receive stats */
+	u64 rx_pause[8];
+	u64 rx_zero_pause[8];	/*!< Pause cancellation */
+	/*!<Pause initiation rather than retention */
+	u64 rx_first_pause[8];
+};
+
+/* MAC Rx Statistics */
+struct bfi_ll_stats_mac {
+	u64 frame_64;		/* both rx and tx counter */
+	u64 frame_65_127;		/* both rx and tx counter */
+	u64 frame_128_255;		/* both rx and tx counter */
+	u64 frame_256_511;		/* both rx and tx counter */
+	u64 frame_512_1023;	/* both rx and tx counter */
+	u64 frame_1024_1518;	/* both rx and tx counter */
+	u64 frame_1519_1522;	/* both rx and tx counter */
+
+	/* receive stats */
+	u64 rx_bytes;
+	u64 rx_packets;
+	u64 rx_fcs_error;
+	u64 rx_multicast;
+	u64 rx_broadcast;
+	u64 rx_control_frames;
+	u64 rx_pause;
+	u64 rx_unknown_opcode;
+	u64 rx_alignment_error;
+	u64 rx_frame_length_error;
+	u64 rx_code_error;
+	u64 rx_carrier_sense_error;
+	u64 rx_undersize;
+	u64 rx_oversize;
+	u64 rx_fragments;
+	u64 rx_jabber;
+	u64 rx_drop;
+
+	/* transmit stats */
+	u64 tx_bytes;
+	u64 tx_packets;
+	u64 tx_multicast;
+	u64 tx_broadcast;
+	u64 tx_pause;
+	u64 tx_deferral;
+	u64 tx_excessive_deferral;
+	u64 tx_single_collision;
+	u64 tx_muliple_collision;
+	u64 tx_late_collision;
+	u64 tx_excessive_collision;
+	u64 tx_total_collision;
+	u64 tx_pause_honored;
+	u64 tx_drop;
+	u64 tx_jabber;
+	u64 tx_fcs_error;
+	u64 tx_control_frame;
+	u64 tx_oversize;
+	u64 tx_undersize;
+	u64 tx_fragments;
+};
+
+/* Complete statistics */
+struct bfi_ll_stats {
+	struct bfi_ll_stats_mac		mac_stats;
+	struct bfi_ll_stats_bpc		bpc_stats;
+	struct bfi_ll_stats_rad		rad_stats;
+	struct bfi_ll_stats_fc_rx	fc_rx_stats;
+	struct bfi_ll_stats_fc_tx	fc_tx_stats;
+	struct bfi_ll_stats_rxf	rxf_stats[BFI_LL_RXF_ID_MAX];
+	struct bfi_ll_stats_txf	txf_stats[BFI_LL_TXF_ID_MAX];
+};
+
+#pragma pack()
+
+#endif  /* __BFI_LL_H__ */
diff --git a/drivers/net/bna/bna.h b/drivers/net/bna/bna.h
new file mode 100644
index 000000000000..6a2b3291c190
--- /dev/null
+++ b/drivers/net/bna/bna.h
@@ -0,0 +1,654 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __BNA_H__
+#define __BNA_H__
+
+#include "bfa_wc.h"
+#include "bfa_ioc.h"
+#include "cna.h"
+#include "bfi_ll.h"
+#include "bna_types.h"
+
+extern u32 bna_dim_vector[][BNA_BIAS_T_MAX];
+extern u32 bna_napi_dim_vector[][BNA_BIAS_T_MAX];
+
+/**
+ *
+ *  Macros and constants
+ *
+ */
+
+#define BNA_IOC_TIMER_FREQ		200
+
+/* Log string size */
+#define BNA_MESSAGE_SIZE		256
+
+#define bna_device_timer(_dev)		bfa_timer_beat(&((_dev)->timer_mod))
+
+/* MBOX API for PORT, TX, RX */
+#define bna_mbox_qe_fill(_qe, _cmd, _cmd_len, _cbfn, _cbarg)		\
+do {									\
+	memcpy(&((_qe)->cmd.msg[0]), (_cmd), (_cmd_len));	\
+	(_qe)->cbfn = (_cbfn);						\
+	(_qe)->cbarg = (_cbarg);					\
+} while (0)
+
+#define bna_is_small_rxq(rcb) ((rcb)->id == 1)
+
+#define BNA_MAC_IS_EQUAL(_mac1, _mac2)					\
+	(!memcmp((_mac1), (_mac2), sizeof(mac_t)))
+
+#define BNA_POWER_OF_2(x) (((x) & ((x) - 1)) == 0)
+
+#define BNA_TO_POWER_OF_2(x)						\
+do {									\
+	int _shift = 0;							\
+	while ((x) && (x) != 1) {					\
+		(x) >>= 1;						\
+		_shift++;						\
+	}								\
+	(x) <<= _shift;							\
+} while (0)
+
+#define BNA_TO_POWER_OF_2_HIGH(x)					\
+do {									\
+	int n = 1;							\
+	while (n < (x))							\
+		n <<= 1;						\
+	(x) = n;							\
+} while (0)
+
+/*
+ * input : _addr-> os dma addr in host endian format,
+ * output : _bna_dma_addr-> pointer to hw dma addr
+ */
+#define BNA_SET_DMA_ADDR(_addr, _bna_dma_addr)				\
+do {									\
+	u64 tmp_addr =						\
+	cpu_to_be64((u64)(_addr));				\
+	(_bna_dma_addr)->msb = ((struct bna_dma_addr *)&tmp_addr)->msb; \
+	(_bna_dma_addr)->lsb = ((struct bna_dma_addr *)&tmp_addr)->lsb; \
+} while (0)
+
+/*
+ * input : _bna_dma_addr-> pointer to hw dma addr
+ * output : _addr-> os dma addr in host endian format
+ */
+#define BNA_GET_DMA_ADDR(_bna_dma_addr, _addr)			\
+do {								\
+	(_addr) = ((((u64)ntohl((_bna_dma_addr)->msb))) << 32)		\
+	| ((ntohl((_bna_dma_addr)->lsb) & 0xffffffff));	\
+} while (0)
+
+#define	containing_rec(addr, type, field)				\
+	((type *)((unsigned char *)(addr) - 				\
+	(unsigned char *)(&((type *)0)->field)))
+
+#define BNA_TXQ_WI_NEEDED(_vectors)	(((_vectors) + 3) >> 2)
+
+/* TxQ element is 64 bytes */
+#define BNA_TXQ_PAGE_INDEX_MAX		(PAGE_SIZE >> 6)
+#define BNA_TXQ_PAGE_INDEX_MAX_SHIFT	(PAGE_SHIFT - 6)
+
+#define BNA_TXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \
+{									\
+	unsigned int page_index;	/* index within a page */	\
+	void *page_addr;						\
+	page_index = (_qe_idx) & (BNA_TXQ_PAGE_INDEX_MAX - 1); 		\
+	(_qe_ptr_range) = (BNA_TXQ_PAGE_INDEX_MAX - page_index); 	\
+	page_addr = (_qpt_ptr)[((_qe_idx) >>  BNA_TXQ_PAGE_INDEX_MAX_SHIFT)];\
+	(_qe_ptr) = &((struct bna_txq_entry *)(page_addr))[page_index]; \
+}
+
+/* RxQ element is 8 bytes */
+#define BNA_RXQ_PAGE_INDEX_MAX		(PAGE_SIZE >> 3)
+#define BNA_RXQ_PAGE_INDEX_MAX_SHIFT	(PAGE_SHIFT - 3)
+
+#define BNA_RXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \
+{									\
+	unsigned int page_index;	/* index within a page */	\
+	void *page_addr;						\
+	page_index = (_qe_idx) & (BNA_RXQ_PAGE_INDEX_MAX - 1);		\
+	(_qe_ptr_range) = (BNA_RXQ_PAGE_INDEX_MAX - page_index);	\
+	page_addr = (_qpt_ptr)[((_qe_idx) >>				\
+				BNA_RXQ_PAGE_INDEX_MAX_SHIFT)];		\
+	(_qe_ptr) = &((struct bna_rxq_entry *)(page_addr))[page_index]; \
+}
+
+/* CQ element is 16 bytes */
+#define BNA_CQ_PAGE_INDEX_MAX		(PAGE_SIZE >> 4)
+#define BNA_CQ_PAGE_INDEX_MAX_SHIFT	(PAGE_SHIFT - 4)
+
+#define BNA_CQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range)	\
+{									\
+	unsigned int page_index;	  /* index within a page */	\
+	void *page_addr;						\
+									\
+	page_index = (_qe_idx) & (BNA_CQ_PAGE_INDEX_MAX - 1);		\
+	(_qe_ptr_range) = (BNA_CQ_PAGE_INDEX_MAX - page_index);		\
+	page_addr = (_qpt_ptr)[((_qe_idx) >>				\
+				    BNA_CQ_PAGE_INDEX_MAX_SHIFT)];	\
+	(_qe_ptr) = &((struct bna_cq_entry *)(page_addr))[page_index];\
+}
+
+#define BNA_QE_INDX_2_PTR(_cast, _qe_idx, _q_base)			\
+	(&((_cast *)(_q_base))[(_qe_idx)])
+
+#define BNA_QE_INDX_RANGE(_qe_idx, _q_depth) ((_q_depth) - (_qe_idx))
+
+#define BNA_QE_INDX_ADD(_qe_idx, _qe_num, _q_depth)			\
+	((_qe_idx) = ((_qe_idx) + (_qe_num)) & ((_q_depth) - 1))
+
+#define BNA_Q_INDEX_CHANGE(_old_idx, _updated_idx, _q_depth)		\
+	(((_updated_idx) - (_old_idx)) & ((_q_depth) - 1))
+
+#define BNA_QE_FREE_CNT(_q_ptr, _q_depth)				\
+	(((_q_ptr)->consumer_index - (_q_ptr)->producer_index - 1) &	\
+	 ((_q_depth) - 1))
+
+#define BNA_QE_IN_USE_CNT(_q_ptr, _q_depth)				\
+	((((_q_ptr)->producer_index - (_q_ptr)->consumer_index)) &	\
+	 (_q_depth - 1))
+
+#define BNA_Q_GET_CI(_q_ptr)		((_q_ptr)->q.consumer_index)
+
+#define BNA_Q_GET_PI(_q_ptr)		((_q_ptr)->q.producer_index)
+
+#define BNA_Q_PI_ADD(_q_ptr, _num)					\
+	(_q_ptr)->q.producer_index =					\
+		(((_q_ptr)->q.producer_index + (_num)) &		\
+		((_q_ptr)->q.q_depth - 1))
+
+#define BNA_Q_CI_ADD(_q_ptr, _num) 					\
+	(_q_ptr)->q.consumer_index =					\
+		(((_q_ptr)->q.consumer_index + (_num))  		\
+		& ((_q_ptr)->q.q_depth - 1))
+
+#define BNA_Q_FREE_COUNT(_q_ptr)					\
+	(BNA_QE_FREE_CNT(&((_q_ptr)->q), (_q_ptr)->q.q_depth))
+
+#define BNA_Q_IN_USE_COUNT(_q_ptr)  					\
+	(BNA_QE_IN_USE_CNT(&(_q_ptr)->q, (_q_ptr)->q.q_depth))
+
+/* These macros build the data portion of the TxQ/RxQ doorbell */
+#define BNA_DOORBELL_Q_PRD_IDX(_pi) 	(0x80000000 | (_pi))
+#define BNA_DOORBELL_Q_STOP		(0x40000000)
+
+/* These macros build the data portion of the IB doorbell */
+#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \
+	(0x80000000 | ((_timeout) << 16) | (_events))
+#define BNA_DOORBELL_IB_INT_DISABLE 	(0x40000000)
+
+/* Set the coalescing timer for the given ib */
+#define bna_ib_coalescing_timer_set(_i_dbell, _cls_timer)		\
+	((_i_dbell)->doorbell_ack = BNA_DOORBELL_IB_INT_ACK((_cls_timer), 0));
+
+/* Acks 'events' # of events for a given ib */
+#define bna_ib_ack(_i_dbell, _events)					\
+	(writel(((_i_dbell)->doorbell_ack | (_events)), \
+		(_i_dbell)->doorbell_addr));
+
+#define bna_txq_prod_indx_doorbell(_tcb)				\
+	(writel(BNA_DOORBELL_Q_PRD_IDX((_tcb)->producer_index), \
+		(_tcb)->q_dbell));
+
+#define bna_rxq_prod_indx_doorbell(_rcb)				\
+	(writel(BNA_DOORBELL_Q_PRD_IDX((_rcb)->producer_index), \
+		(_rcb)->q_dbell));
+
+#define BNA_LARGE_PKT_SIZE		1000
+
+#define BNA_UPDATE_PKT_CNT(_pkt, _len)					\
+do {									\
+	if ((_len) > BNA_LARGE_PKT_SIZE) {				\
+		(_pkt)->large_pkt_cnt++;				\
+	} else {							\
+		(_pkt)->small_pkt_cnt++;				\
+	}								\
+} while (0)
+
+#define	call_rxf_stop_cbfn(rxf, status)					\
+	if ((rxf)->stop_cbfn) {						\
+		(*(rxf)->stop_cbfn)((rxf)->stop_cbarg, (status));	\
+		(rxf)->stop_cbfn = NULL;				\
+		(rxf)->stop_cbarg = NULL;				\
+	}
+
+#define	call_rxf_start_cbfn(rxf, status)				\
+	if ((rxf)->start_cbfn) {					\
+		(*(rxf)->start_cbfn)((rxf)->start_cbarg, (status));	\
+		(rxf)->start_cbfn = NULL;				\
+		(rxf)->start_cbarg = NULL;				\
+	}
+
+#define	call_rxf_cam_fltr_cbfn(rxf, status)				\
+	if ((rxf)->cam_fltr_cbfn) {					\
+		(*(rxf)->cam_fltr_cbfn)((rxf)->cam_fltr_cbarg, rxf->rx,	\
+					(status));			\
+		(rxf)->cam_fltr_cbfn = NULL;				\
+		(rxf)->cam_fltr_cbarg = NULL;				\
+	}
+
+#define	call_rxf_pause_cbfn(rxf, status)				\
+	if ((rxf)->oper_state_cbfn) {					\
+		(*(rxf)->oper_state_cbfn)((rxf)->oper_state_cbarg, rxf->rx,\
+					(status));			\
+		(rxf)->rxf_flags &= ~BNA_RXF_FL_OPERSTATE_CHANGED;	\
+		(rxf)->oper_state_cbfn = NULL;				\
+		(rxf)->oper_state_cbarg = NULL;				\
+	}
+
+#define	call_rxf_resume_cbfn(rxf, status) call_rxf_pause_cbfn(rxf, status)
+
+#define is_xxx_enable(mode, bitmask, xxx) ((bitmask & xxx) && (mode & xxx))
+
+#define is_xxx_disable(mode, bitmask, xxx) ((bitmask & xxx) && !(mode & xxx))
+
+#define xxx_enable(mode, bitmask, xxx)					\
+do {									\
+	bitmask |= xxx;							\
+	mode |= xxx;							\
+} while (0)
+
+#define xxx_disable(mode, bitmask, xxx)					\
+do {									\
+	bitmask |= xxx;							\
+	mode &= ~xxx;							\
+} while (0)
+
+#define xxx_inactive(mode, bitmask, xxx)				\
+do {									\
+	bitmask &= ~xxx;						\
+	mode &= ~xxx;							\
+} while (0)
+
+#define is_promisc_enable(mode, bitmask)				\
+	is_xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC)
+
+#define is_promisc_disable(mode, bitmask)				\
+	is_xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC)
+
+#define promisc_enable(mode, bitmask)					\
+	xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC)
+
+#define promisc_disable(mode, bitmask)					\
+	xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC)
+
+#define promisc_inactive(mode, bitmask)					\
+	xxx_inactive(mode, bitmask, BNA_RXMODE_PROMISC)
+
+#define is_default_enable(mode, bitmask)				\
+	is_xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT)
+
+#define is_default_disable(mode, bitmask)				\
+	is_xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT)
+
+#define default_enable(mode, bitmask)					\
+	xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT)
+
+#define default_disable(mode, bitmask)					\
+	xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT)
+
+#define default_inactive(mode, bitmask)					\
+	xxx_inactive(mode, bitmask, BNA_RXMODE_DEFAULT)
+
+#define is_allmulti_enable(mode, bitmask)				\
+	is_xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI)
+
+#define is_allmulti_disable(mode, bitmask)				\
+	is_xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI)
+
+#define allmulti_enable(mode, bitmask)					\
+	xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI)
+
+#define allmulti_disable(mode, bitmask)					\
+	xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI)
+
+#define allmulti_inactive(mode, bitmask)				\
+	xxx_inactive(mode, bitmask, BNA_RXMODE_ALLMULTI)
+
+#define	GET_RXQS(rxp, q0, q1)	do {					\
+	switch ((rxp)->type) {						\
+	case BNA_RXP_SINGLE:						\
+		(q0) = rxp->rxq.single.only;				\
+		(q1) = NULL;						\
+		break;							\
+	case BNA_RXP_SLR:						\
+		(q0) = rxp->rxq.slr.large;				\
+		(q1) = rxp->rxq.slr.small;				\
+		break;							\
+	case BNA_RXP_HDS:						\
+		(q0) = rxp->rxq.hds.data;				\
+		(q1) = rxp->rxq.hds.hdr;				\
+		break;							\
+	}								\
+} while (0)
+
+/**
+ *
+ * Function prototypes
+ *
+ */
+
+/**
+ * BNA
+ */
+
+/* Internal APIs */
+void bna_adv_res_req(struct bna_res_info *res_info);
+
+/* APIs for BNAD */
+void bna_res_req(struct bna_res_info *res_info);
+void bna_init(struct bna *bna, struct bnad *bnad,
+			struct bfa_pcidev *pcidev,
+			struct bna_res_info *res_info);
+void bna_uninit(struct bna *bna);
+void bna_stats_get(struct bna *bna);
+void bna_stats_clr(struct bna *bna);
+void bna_get_perm_mac(struct bna *bna, u8 *mac);
+
+/* APIs for Rx */
+int bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size);
+
+/* APIs for RxF */
+struct bna_mac *bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod);
+void bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod,
+			  struct bna_mac *mac);
+struct bna_mac *bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod);
+void bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod,
+			  struct bna_mac *mac);
+struct bna_rit_segment *
+bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size);
+void bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod,
+			struct bna_rit_segment *seg);
+
+/**
+ * DEVICE
+ */
+
+/* Interanl APIs */
+void bna_adv_device_init(struct bna_device *device, struct bna *bna,
+			struct bna_res_info *res_info);
+
+/* APIs for BNA */
+void bna_device_init(struct bna_device *device, struct bna *bna,
+		     struct bna_res_info *res_info);
+void bna_device_uninit(struct bna_device *device);
+void bna_device_cb_port_stopped(void *arg, enum bna_cb_status status);
+int bna_device_status_get(struct bna_device *device);
+int bna_device_state_get(struct bna_device *device);
+
+/* APIs for BNAD */
+void bna_device_enable(struct bna_device *device);
+void bna_device_disable(struct bna_device *device,
+			enum bna_cleanup_type type);
+
+/**
+ * MBOX
+ */
+
+/* APIs for DEVICE */
+void bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna);
+void bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod);
+void bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod);
+void bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod);
+
+/* APIs for PORT, TX, RX */
+void bna_mbox_handler(struct bna *bna, u32 intr_status);
+void bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe);
+
+/**
+ * PORT
+ */
+
+/* APIs for BNA */
+void bna_port_init(struct bna_port *port, struct bna *bna);
+void bna_port_uninit(struct bna_port *port);
+int bna_port_state_get(struct bna_port *port);
+int bna_llport_state_get(struct bna_llport *llport);
+
+/* APIs for DEVICE */
+void bna_port_start(struct bna_port *port);
+void bna_port_stop(struct bna_port *port);
+void bna_port_fail(struct bna_port *port);
+
+/* API for RX */
+int bna_port_mtu_get(struct bna_port *port);
+void bna_llport_admin_up(struct bna_llport *llport);
+void bna_llport_admin_down(struct bna_llport *llport);
+
+/* API for BNAD */
+void bna_port_enable(struct bna_port *port);
+void bna_port_disable(struct bna_port *port, enum bna_cleanup_type type,
+		      void (*cbfn)(void *, enum bna_cb_status));
+void bna_port_pause_config(struct bna_port *port,
+			   struct bna_pause_config *pause_config,
+			   void (*cbfn)(struct bnad *, enum bna_cb_status));
+void bna_port_mtu_set(struct bna_port *port, int mtu,
+		      void (*cbfn)(struct bnad *, enum bna_cb_status));
+void bna_port_mac_get(struct bna_port *port, mac_t *mac);
+void bna_port_type_set(struct bna_port *port, enum bna_port_type type);
+void bna_port_linkcbfn_set(struct bna_port *port,
+			   void (*linkcbfn)(struct bnad *,
+					    enum bna_link_status));
+void bna_port_admin_up(struct bna_port *port);
+void bna_port_admin_down(struct bna_port *port);
+
+/* Callbacks for TX, RX */
+void bna_port_cb_tx_stopped(struct bna_port *port,
+			    enum bna_cb_status status);
+void bna_port_cb_rx_stopped(struct bna_port *port,
+			    enum bna_cb_status status);
+
+/* Callbacks for MBOX */
+void bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen,
+			 int status);
+void bna_port_cb_link_down(struct bna_port *port, int status);
+
+/**
+ * IB
+ */
+
+/* APIs for BNA */
+void bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna,
+		     struct bna_res_info *res_info);
+void bna_ib_mod_uninit(struct bna_ib_mod *ib_mod);
+
+/* APIs for TX, RX */
+struct bna_ib *bna_ib_get(struct bna_ib_mod *ib_mod,
+			    enum bna_intr_type intr_type, int vector);
+void bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib);
+int bna_ib_reserve_idx(struct bna_ib *ib);
+void bna_ib_release_idx(struct bna_ib *ib, int idx);
+int bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config);
+void bna_ib_start(struct bna_ib *ib);
+void bna_ib_stop(struct bna_ib *ib);
+void bna_ib_fail(struct bna_ib *ib);
+void bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo);
+
+/**
+ * TX MODULE AND TX
+ */
+
+/* Internal APIs */
+void bna_tx_prio_changed(struct bna_tx *tx, int prio);
+
+/* APIs for BNA */
+void bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna,
+		     struct bna_res_info *res_info);
+void bna_tx_mod_uninit(struct bna_tx_mod *tx_mod);
+int bna_tx_state_get(struct bna_tx *tx);
+
+/* APIs for PORT */
+void bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type);
+void bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type);
+void bna_tx_mod_fail(struct bna_tx_mod *tx_mod);
+void bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio);
+void bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link);
+
+/* APIs for BNAD */
+void bna_tx_res_req(int num_txq, int txq_depth,
+		    struct bna_res_info *res_info);
+struct bna_tx *bna_tx_create(struct bna *bna, struct bnad *bnad,
+			       struct bna_tx_config *tx_cfg,
+			       struct bna_tx_event_cbfn *tx_cbfn,
+			       struct bna_res_info *res_info, void *priv);
+void bna_tx_destroy(struct bna_tx *tx);
+void bna_tx_enable(struct bna_tx *tx);
+void bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type,
+		    void (*cbfn)(void *, struct bna_tx *,
+				 enum bna_cb_status));
+enum bna_cb_status
+bna_tx_prio_set(struct bna_tx *tx, int prio,
+		void (*cbfn)(struct bnad *, struct bna_tx *,
+			     enum bna_cb_status));
+void bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo);
+
+/**
+ * RX MODULE, RX, RXF
+ */
+
+/* Internal APIs */
+void rxf_cb_cam_fltr_mbox_cmd(void *arg, int status);
+void rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd,
+		const struct bna_mac *mac_addr);
+void __rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status);
+void bna_rxf_adv_init(struct bna_rxf *rxf,
+		struct bna_rx *rx,
+		struct bna_rx_config *q_config);
+int rxf_process_packet_filter_ucast(struct bna_rxf *rxf);
+int rxf_process_packet_filter_promisc(struct bna_rxf *rxf);
+int rxf_process_packet_filter_default(struct bna_rxf *rxf);
+int rxf_process_packet_filter_allmulti(struct bna_rxf *rxf);
+int rxf_clear_packet_filter_ucast(struct bna_rxf *rxf);
+int rxf_clear_packet_filter_promisc(struct bna_rxf *rxf);
+int rxf_clear_packet_filter_default(struct bna_rxf *rxf);
+int rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf);
+void rxf_reset_packet_filter_ucast(struct bna_rxf *rxf);
+void rxf_reset_packet_filter_promisc(struct bna_rxf *rxf);
+void rxf_reset_packet_filter_default(struct bna_rxf *rxf);
+void rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf);
+
+/* APIs for BNA */
+void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna,
+		     struct bna_res_info *res_info);
+void bna_rx_mod_uninit(struct bna_rx_mod *rx_mod);
+int bna_rx_state_get(struct bna_rx *rx);
+int bna_rxf_state_get(struct bna_rxf *rxf);
+
+/* APIs for PORT */
+void bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type);
+void bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type);
+void bna_rx_mod_fail(struct bna_rx_mod *rx_mod);
+
+/* APIs for BNAD */
+void bna_rx_res_req(struct bna_rx_config *rx_config,
+		    struct bna_res_info *res_info);
+struct bna_rx *bna_rx_create(struct bna *bna, struct bnad *bnad,
+			       struct bna_rx_config *rx_cfg,
+			       struct bna_rx_event_cbfn *rx_cbfn,
+			       struct bna_res_info *res_info, void *priv);
+void bna_rx_destroy(struct bna_rx *rx);
+void bna_rx_enable(struct bna_rx *rx);
+void bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type,
+		    void (*cbfn)(void *, struct bna_rx *,
+				 enum bna_cb_status));
+void bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo);
+void bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX]);
+void bna_rx_dim_update(struct bna_ccb *ccb);
+enum bna_cb_status
+bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_ucast_add(struct bna_rx *rx, u8* ucmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_ucast_del(struct bna_rx *rx, u8 *ucmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_mcast_add(struct bna_rx *rx, u8 *mcmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_mcast_del(struct bna_rx *rx, u8 *mcmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mcmac,
+		     void (*cbfn)(struct bnad *, struct bna_rx *,
+				  enum bna_cb_status));
+void bna_rx_mcast_delall(struct bna_rx *rx,
+			 void (*cbfn)(struct bnad *, struct bna_rx *,
+				      enum bna_cb_status));
+enum bna_cb_status
+bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode rxmode,
+		enum bna_rxmode bitmask,
+		void (*cbfn)(struct bnad *, struct bna_rx *,
+			     enum bna_cb_status));
+void bna_rx_vlan_add(struct bna_rx *rx, int vlan_id);
+void bna_rx_vlan_del(struct bna_rx *rx, int vlan_id);
+void bna_rx_vlanfilter_enable(struct bna_rx *rx);
+void bna_rx_vlanfilter_disable(struct bna_rx *rx);
+void bna_rx_rss_enable(struct bna_rx *rx);
+void bna_rx_rss_disable(struct bna_rx *rx);
+void bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config);
+void bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors,
+			int nvectors);
+void bna_rx_hds_enable(struct bna_rx *rx, struct bna_rxf_hds *hds_config,
+		       void (*cbfn)(struct bnad *, struct bna_rx *,
+				    enum bna_cb_status));
+void bna_rx_hds_disable(struct bna_rx *rx,
+			void (*cbfn)(struct bnad *, struct bna_rx *,
+				     enum bna_cb_status));
+void bna_rx_receive_pause(struct bna_rx *rx,
+			  void (*cbfn)(struct bnad *, struct bna_rx *,
+				       enum bna_cb_status));
+void bna_rx_receive_resume(struct bna_rx *rx,
+			   void (*cbfn)(struct bnad *, struct bna_rx *,
+					enum bna_cb_status));
+
+/* RxF APIs for RX */
+void bna_rxf_start(struct bna_rxf *rxf);
+void bna_rxf_stop(struct bna_rxf *rxf);
+void bna_rxf_fail(struct bna_rxf *rxf);
+void bna_rxf_init(struct bna_rxf *rxf, struct bna_rx *rx,
+		  struct bna_rx_config *q_config);
+void bna_rxf_uninit(struct bna_rxf *rxf);
+
+/* Callback from RXF to RX */
+void bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status);
+void bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status);
+
+/**
+ * BNAD
+ */
+
+/* Callbacks for BNA */
+void bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status,
+		       struct bna_stats *stats);
+void bnad_cb_stats_clr(struct bnad *bnad);
+
+/* Callbacks for DEVICE */
+void bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status);
+void bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status);
+void bnad_cb_device_enable_mbox_intr(struct bnad *bnad);
+void bnad_cb_device_disable_mbox_intr(struct bnad *bnad);
+
+/* Callbacks for port */
+void bnad_cb_port_link_status(struct bnad *bnad,
+			      enum bna_link_status status);
+
+#endif  /* __BNA_H__ */
diff --git a/drivers/net/bna/bna_ctrl.c b/drivers/net/bna/bna_ctrl.c
new file mode 100644
index 000000000000..9d41ebf41cf4
--- /dev/null
+++ b/drivers/net/bna/bna_ctrl.c
@@ -0,0 +1,3626 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#include "bna.h"
+#include "bfa_sm.h"
+#include "bfa_wc.h"
+
+/**
+ * MBOX
+ */
+static int
+bna_is_aen(u8 msg_id)
+{
+	return (msg_id == BFI_LL_I2H_LINK_DOWN_AEN ||
+		msg_id == BFI_LL_I2H_LINK_UP_AEN);
+}
+
+static void
+bna_mbox_aen_callback(struct bna *bna, struct bfi_mbmsg *msg)
+{
+	struct bfi_ll_aen *aen = (struct bfi_ll_aen *)(msg);
+
+	switch (aen->mh.msg_id) {
+	case BFI_LL_I2H_LINK_UP_AEN:
+		bna_port_cb_link_up(&bna->port, aen, aen->reason);
+		break;
+	case BFI_LL_I2H_LINK_DOWN_AEN:
+		bna_port_cb_link_down(&bna->port, aen->reason);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+bna_ll_isr(void *llarg, struct bfi_mbmsg *msg)
+{
+	struct bna *bna = (struct bna *)(llarg);
+	struct bfi_ll_rsp *mb_rsp = (struct bfi_ll_rsp *)(msg);
+	struct bfi_mhdr *cmd_h, *rsp_h;
+	struct bna_mbox_qe *mb_qe = NULL;
+	int to_post = 0;
+	u8 aen = 0;
+	char message[BNA_MESSAGE_SIZE];
+
+	aen = bna_is_aen(mb_rsp->mh.msg_id);
+
+	if (!aen) {
+		mb_qe = bfa_q_first(&bna->mbox_mod.posted_q);
+		cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]);
+		rsp_h = (struct bfi_mhdr *)(&mb_rsp->mh);
+
+		if ((BFA_I2HM(cmd_h->msg_id) == rsp_h->msg_id) &&
+		    (cmd_h->mtag.i2htok == rsp_h->mtag.i2htok)) {
+			/* Remove the request from posted_q, update state  */
+			list_del(&mb_qe->qe);
+			bna->mbox_mod.msg_pending--;
+			if (list_empty(&bna->mbox_mod.posted_q))
+				bna->mbox_mod.state = BNA_MBOX_FREE;
+			else
+				to_post = 1;
+
+			/* Dispatch the cbfn */
+			if (mb_qe->cbfn)
+				mb_qe->cbfn(mb_qe->cbarg, mb_rsp->error);
+
+			/* Post the next entry, if needed */
+			if (to_post) {
+				mb_qe = bfa_q_first(&bna->mbox_mod.posted_q);
+				bfa_ioc_mbox_queue(&bna->device.ioc,
+							&mb_qe->cmd);
+			}
+		} else {
+			snprintf(message, BNA_MESSAGE_SIZE,
+				       "No matching rsp for [%d:%d:%d]\n",
+				       mb_rsp->mh.msg_class, mb_rsp->mh.msg_id,
+				       mb_rsp->mh.mtag.i2htok);
+		pr_info("%s", message);
+		}
+
+	} else
+		bna_mbox_aen_callback(bna, msg);
+}
+
+void
+bna_err_handler(struct bna *bna, u32 intr_status)
+{
+	u32 init_halt;
+
+	if (intr_status & __HALT_STATUS_BITS) {
+		init_halt = readl(bna->device.ioc.ioc_regs.ll_halt);
+		init_halt &= ~__FW_INIT_HALT_P;
+		writel(init_halt, bna->device.ioc.ioc_regs.ll_halt);
+	}
+
+	bfa_ioc_error_isr(&bna->device.ioc);
+}
+
+void
+bna_mbox_handler(struct bna *bna, u32 intr_status)
+{
+	if (BNA_IS_ERR_INTR(intr_status)) {
+		bna_err_handler(bna, intr_status);
+		return;
+	}
+	if (BNA_IS_MBOX_INTR(intr_status))
+		bfa_ioc_mbox_isr(&bna->device.ioc);
+}
+
+void
+bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe)
+{
+	struct bfi_mhdr *mh;
+
+	mh = (struct bfi_mhdr *)(&mbox_qe->cmd.msg[0]);
+
+	mh->mtag.i2htok = htons(bna->mbox_mod.msg_ctr);
+	bna->mbox_mod.msg_ctr++;
+	bna->mbox_mod.msg_pending++;
+	if (bna->mbox_mod.state == BNA_MBOX_FREE) {
+		list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q);
+		bfa_ioc_mbox_queue(&bna->device.ioc, &mbox_qe->cmd);
+		bna->mbox_mod.state = BNA_MBOX_POSTED;
+	} else {
+		list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q);
+	}
+}
+
+void
+bna_mbox_flush_q(struct bna *bna, struct list_head *q)
+{
+	struct bna_mbox_qe *mb_qe = NULL;
+	struct bfi_mhdr *cmd_h;
+	struct list_head			*mb_q;
+	void 			(*cbfn)(void *arg, int status);
+	void 			*cbarg;
+
+	mb_q = &bna->mbox_mod.posted_q;
+
+	while (!list_empty(mb_q)) {
+		bfa_q_deq(mb_q, &mb_qe);
+		cbfn = mb_qe->cbfn;
+		cbarg = mb_qe->cbarg;
+		bfa_q_qe_init(mb_qe);
+		bna->mbox_mod.msg_pending--;
+
+		cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]);
+		if (cbfn)
+			cbfn(cbarg, BNA_CB_NOT_EXEC);
+	}
+
+	bna->mbox_mod.state = BNA_MBOX_FREE;
+}
+
+void
+bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod)
+{
+}
+
+void
+bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod)
+{
+	bna_mbox_flush_q(mbox_mod->bna, &mbox_mod->posted_q);
+}
+
+void
+bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna)
+{
+	bfa_ioc_mbox_regisr(&bna->device.ioc, BFI_MC_LL, bna_ll_isr, bna);
+	mbox_mod->state = BNA_MBOX_FREE;
+	mbox_mod->msg_ctr = mbox_mod->msg_pending = 0;
+	INIT_LIST_HEAD(&mbox_mod->posted_q);
+	mbox_mod->bna = bna;
+}
+
+void
+bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod)
+{
+	mbox_mod->bna = NULL;
+}
+
+/**
+ * LLPORT
+ */
+#define call_llport_stop_cbfn(llport, status)\
+do {\
+	if ((llport)->stop_cbfn)\
+		(llport)->stop_cbfn(&(llport)->bna->port, status);\
+	(llport)->stop_cbfn = NULL;\
+} while (0)
+
+static void bna_fw_llport_up(struct bna_llport *llport);
+static void bna_fw_cb_llport_up(void *arg, int status);
+static void bna_fw_llport_down(struct bna_llport *llport);
+static void bna_fw_cb_llport_down(void *arg, int status);
+static void bna_llport_start(struct bna_llport *llport);
+static void bna_llport_stop(struct bna_llport *llport);
+static void bna_llport_fail(struct bna_llport *llport);
+
+enum bna_llport_event {
+	LLPORT_E_START			= 1,
+	LLPORT_E_STOP			= 2,
+	LLPORT_E_FAIL			= 3,
+	LLPORT_E_UP			= 4,
+	LLPORT_E_DOWN			= 5,
+	LLPORT_E_FWRESP_UP		= 6,
+	LLPORT_E_FWRESP_DOWN		= 7
+};
+
+enum bna_llport_state {
+	BNA_LLPORT_STOPPED		= 1,
+	BNA_LLPORT_DOWN			= 2,
+	BNA_LLPORT_UP_RESP_WAIT		= 3,
+	BNA_LLPORT_DOWN_RESP_WAIT	= 4,
+	BNA_LLPORT_UP			= 5,
+	BNA_LLPORT_LAST_RESP_WAIT 	= 6
+};
+
+bfa_fsm_state_decl(bna_llport, stopped, struct bna_llport,
+			enum bna_llport_event);
+bfa_fsm_state_decl(bna_llport, down, struct bna_llport,
+			enum bna_llport_event);
+bfa_fsm_state_decl(bna_llport, up_resp_wait, struct bna_llport,
+			enum bna_llport_event);
+bfa_fsm_state_decl(bna_llport, down_resp_wait, struct bna_llport,
+			enum bna_llport_event);
+bfa_fsm_state_decl(bna_llport, up, struct bna_llport,
+			enum bna_llport_event);
+bfa_fsm_state_decl(bna_llport, last_resp_wait, struct bna_llport,
+			enum bna_llport_event);
+
+static struct bfa_sm_table llport_sm_table[] = {
+	{BFA_SM(bna_llport_sm_stopped), BNA_LLPORT_STOPPED},
+	{BFA_SM(bna_llport_sm_down), BNA_LLPORT_DOWN},
+	{BFA_SM(bna_llport_sm_up_resp_wait), BNA_LLPORT_UP_RESP_WAIT},
+	{BFA_SM(bna_llport_sm_down_resp_wait), BNA_LLPORT_DOWN_RESP_WAIT},
+	{BFA_SM(bna_llport_sm_up), BNA_LLPORT_UP},
+	{BFA_SM(bna_llport_sm_last_resp_wait), BNA_LLPORT_LAST_RESP_WAIT}
+};
+
+static void
+bna_llport_sm_stopped_entry(struct bna_llport *llport)
+{
+	llport->bna->port.link_cbfn((llport)->bna->bnad, BNA_LINK_DOWN);
+	call_llport_stop_cbfn(llport, BNA_CB_SUCCESS);
+}
+
+static void
+bna_llport_sm_stopped(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_START:
+		bfa_fsm_set_state(llport, bna_llport_sm_down);
+		break;
+
+	case LLPORT_E_STOP:
+		call_llport_stop_cbfn(llport, BNA_CB_SUCCESS);
+		break;
+
+	case LLPORT_E_FAIL:
+		break;
+
+	case LLPORT_E_DOWN:
+		/* This event is received due to Rx objects failing */
+		/* No-op */
+		break;
+
+	case LLPORT_E_FWRESP_UP:
+	case LLPORT_E_FWRESP_DOWN:
+		/**
+		 * These events are received due to flushing of mbox when
+		 * device fails
+		 */
+		/* No-op */
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_llport_sm_down_entry(struct bna_llport *llport)
+{
+	bnad_cb_port_link_status((llport)->bna->bnad, BNA_LINK_DOWN);
+}
+
+static void
+bna_llport_sm_down(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_STOP:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_FAIL:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_UP:
+		bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait);
+		bna_fw_llport_up(llport);
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_llport_sm_up_resp_wait_entry(struct bna_llport *llport)
+{
+	/**
+	 * NOTE: Do not call bna_fw_llport_up() here. That will over step
+	 * mbox due to down_resp_wait -> up_resp_wait transition on event
+	 * LLPORT_E_UP
+	 */
+}
+
+static void
+bna_llport_sm_up_resp_wait(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_STOP:
+		bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait);
+		break;
+
+	case LLPORT_E_FAIL:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_DOWN:
+		bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait);
+		break;
+
+	case LLPORT_E_FWRESP_UP:
+		bfa_fsm_set_state(llport, bna_llport_sm_up);
+		break;
+
+	case LLPORT_E_FWRESP_DOWN:
+		/* down_resp_wait -> up_resp_wait transition on LLPORT_E_UP */
+		bna_fw_llport_up(llport);
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_llport_sm_down_resp_wait_entry(struct bna_llport *llport)
+{
+	/**
+	 * NOTE: Do not call bna_fw_llport_down() here. That will over step
+	 * mbox due to up_resp_wait -> down_resp_wait transition on event
+	 * LLPORT_E_DOWN
+	 */
+}
+
+static void
+bna_llport_sm_down_resp_wait(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_STOP:
+		bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait);
+		break;
+
+	case LLPORT_E_FAIL:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_UP:
+		bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait);
+		break;
+
+	case LLPORT_E_FWRESP_UP:
+		/* up_resp_wait->down_resp_wait transition on LLPORT_E_DOWN */
+		bna_fw_llport_down(llport);
+		break;
+
+	case LLPORT_E_FWRESP_DOWN:
+		bfa_fsm_set_state(llport, bna_llport_sm_down);
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_llport_sm_up_entry(struct bna_llport *llport)
+{
+}
+
+static void
+bna_llport_sm_up(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_STOP:
+		bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait);
+		bna_fw_llport_down(llport);
+		break;
+
+	case LLPORT_E_FAIL:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_DOWN:
+		bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait);
+		bna_fw_llport_down(llport);
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_llport_sm_last_resp_wait_entry(struct bna_llport *llport)
+{
+}
+
+static void
+bna_llport_sm_last_resp_wait(struct bna_llport *llport,
+			enum bna_llport_event event)
+{
+	switch (event) {
+	case LLPORT_E_FAIL:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	case LLPORT_E_DOWN:
+		/**
+		 * This event is received due to Rx objects stopping in
+		 * parallel to llport
+		 */
+		/* No-op */
+		break;
+
+	case LLPORT_E_FWRESP_UP:
+		/* up_resp_wait->last_resp_wait transition on LLPORT_T_STOP */
+		bna_fw_llport_down(llport);
+		break;
+
+	case LLPORT_E_FWRESP_DOWN:
+		bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(llport->bna, event);
+	}
+}
+
+static void
+bna_fw_llport_admin_up(struct bna_llport *llport)
+{
+	struct bfi_ll_port_admin_req ll_req;
+
+	memset(&ll_req, 0, sizeof(ll_req));
+	ll_req.mh.msg_class = BFI_MC_LL;
+	ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ;
+	ll_req.mh.mtag.h2i.lpu_id = 0;
+
+	ll_req.up = BNA_STATUS_T_ENABLED;
+
+	bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_fw_cb_llport_up, llport);
+
+	bna_mbox_send(llport->bna, &llport->mbox_qe);
+}
+
+static void
+bna_fw_llport_up(struct bna_llport *llport)
+{
+	if (llport->type == BNA_PORT_T_REGULAR)
+		bna_fw_llport_admin_up(llport);
+}
+
+static void
+bna_fw_cb_llport_up(void *arg, int status)
+{
+	struct bna_llport *llport = (struct bna_llport *)arg;
+
+	bfa_q_qe_init(&llport->mbox_qe.qe);
+	bfa_fsm_send_event(llport, LLPORT_E_FWRESP_UP);
+}
+
+static void
+bna_fw_llport_admin_down(struct bna_llport *llport)
+{
+	struct bfi_ll_port_admin_req ll_req;
+
+	memset(&ll_req, 0, sizeof(ll_req));
+	ll_req.mh.msg_class = BFI_MC_LL;
+	ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ;
+	ll_req.mh.mtag.h2i.lpu_id = 0;
+
+	ll_req.up = BNA_STATUS_T_DISABLED;
+
+	bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_fw_cb_llport_down, llport);
+
+	bna_mbox_send(llport->bna, &llport->mbox_qe);
+}
+
+static void
+bna_fw_llport_down(struct bna_llport *llport)
+{
+	if (llport->type == BNA_PORT_T_REGULAR)
+		bna_fw_llport_admin_down(llport);
+}
+
+static void
+bna_fw_cb_llport_down(void *arg, int status)
+{
+	struct bna_llport *llport = (struct bna_llport *)arg;
+
+	bfa_q_qe_init(&llport->mbox_qe.qe);
+	bfa_fsm_send_event(llport, LLPORT_E_FWRESP_DOWN);
+}
+
+void
+bna_port_cb_llport_stopped(struct bna_port *port,
+				enum bna_cb_status status)
+{
+	bfa_wc_down(&port->chld_stop_wc);
+}
+
+static void
+bna_llport_init(struct bna_llport *llport, struct bna *bna)
+{
+	llport->flags |= BNA_LLPORT_F_ENABLED;
+	llport->type = BNA_PORT_T_REGULAR;
+	llport->bna = bna;
+
+	llport->link_status = BNA_LINK_DOWN;
+
+	llport->admin_up_count = 0;
+
+	llport->stop_cbfn = NULL;
+
+	bfa_q_qe_init(&llport->mbox_qe.qe);
+
+	bfa_fsm_set_state(llport, bna_llport_sm_stopped);
+}
+
+static void
+bna_llport_uninit(struct bna_llport *llport)
+{
+	llport->flags &= ~BNA_LLPORT_F_ENABLED;
+
+	llport->bna = NULL;
+}
+
+static void
+bna_llport_start(struct bna_llport *llport)
+{
+	bfa_fsm_send_event(llport, LLPORT_E_START);
+}
+
+static void
+bna_llport_stop(struct bna_llport *llport)
+{
+	llport->stop_cbfn = bna_port_cb_llport_stopped;
+
+	bfa_fsm_send_event(llport, LLPORT_E_STOP);
+}
+
+static void
+bna_llport_fail(struct bna_llport *llport)
+{
+	bfa_fsm_send_event(llport, LLPORT_E_FAIL);
+}
+
+int
+bna_llport_state_get(struct bna_llport *llport)
+{
+	return bfa_sm_to_state(llport_sm_table, llport->fsm);
+}
+
+void
+bna_llport_admin_up(struct bna_llport *llport)
+{
+	llport->admin_up_count++;
+
+	if (llport->admin_up_count == 1) {
+		llport->flags |= BNA_LLPORT_F_RX_ENABLED;
+		if (llport->flags & BNA_LLPORT_F_ENABLED)
+			bfa_fsm_send_event(llport, LLPORT_E_UP);
+	}
+}
+
+void
+bna_llport_admin_down(struct bna_llport *llport)
+{
+	llport->admin_up_count--;
+
+	if (llport->admin_up_count == 0) {
+		llport->flags &= ~BNA_LLPORT_F_RX_ENABLED;
+		if (llport->flags & BNA_LLPORT_F_ENABLED)
+			bfa_fsm_send_event(llport, LLPORT_E_DOWN);
+	}
+}
+
+/**
+ * PORT
+ */
+#define bna_port_chld_start(port)\
+do {\
+	enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\
+	enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\
+	bna_llport_start(&(port)->llport);\
+	bna_tx_mod_start(&(port)->bna->tx_mod, tx_type);\
+	bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\
+} while (0)
+
+#define bna_port_chld_stop(port)\
+do {\
+	enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\
+	enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\
+	bfa_wc_up(&(port)->chld_stop_wc);\
+	bfa_wc_up(&(port)->chld_stop_wc);\
+	bfa_wc_up(&(port)->chld_stop_wc);\
+	bna_llport_stop(&(port)->llport);\
+	bna_tx_mod_stop(&(port)->bna->tx_mod, tx_type);\
+	bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\
+} while (0)
+
+#define bna_port_chld_fail(port)\
+do {\
+	bna_llport_fail(&(port)->llport);\
+	bna_tx_mod_fail(&(port)->bna->tx_mod);\
+	bna_rx_mod_fail(&(port)->bna->rx_mod);\
+} while (0)
+
+#define bna_port_rx_start(port)\
+do {\
+	enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\
+	bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\
+} while (0)
+
+#define bna_port_rx_stop(port)\
+do {\
+	enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\
+					BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\
+	bfa_wc_up(&(port)->chld_stop_wc);\
+	bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\
+} while (0)
+
+#define call_port_stop_cbfn(port, status)\
+do {\
+	if ((port)->stop_cbfn)\
+		(port)->stop_cbfn((port)->stop_cbarg, status);\
+	(port)->stop_cbfn = NULL;\
+	(port)->stop_cbarg = NULL;\
+} while (0)
+
+#define call_port_pause_cbfn(port, status)\
+do {\
+	if ((port)->pause_cbfn)\
+		(port)->pause_cbfn((port)->bna->bnad, status);\
+	(port)->pause_cbfn = NULL;\
+} while (0)
+
+#define call_port_mtu_cbfn(port, status)\
+do {\
+	if ((port)->mtu_cbfn)\
+		(port)->mtu_cbfn((port)->bna->bnad, status);\
+	(port)->mtu_cbfn = NULL;\
+} while (0)
+
+static void bna_fw_pause_set(struct bna_port *port);
+static void bna_fw_cb_pause_set(void *arg, int status);
+static void bna_fw_mtu_set(struct bna_port *port);
+static void bna_fw_cb_mtu_set(void *arg, int status);
+
+enum bna_port_event {
+	PORT_E_START			= 1,
+	PORT_E_STOP			= 2,
+	PORT_E_FAIL			= 3,
+	PORT_E_PAUSE_CFG		= 4,
+	PORT_E_MTU_CFG			= 5,
+	PORT_E_CHLD_STOPPED		= 6,
+	PORT_E_FWRESP_PAUSE		= 7,
+	PORT_E_FWRESP_MTU		= 8
+};
+
+enum bna_port_state {
+	BNA_PORT_STOPPED		= 1,
+	BNA_PORT_MTU_INIT_WAIT		= 2,
+	BNA_PORT_PAUSE_INIT_WAIT	= 3,
+	BNA_PORT_LAST_RESP_WAIT		= 4,
+	BNA_PORT_STARTED		= 5,
+	BNA_PORT_PAUSE_CFG_WAIT		= 6,
+	BNA_PORT_RX_STOP_WAIT		= 7,
+	BNA_PORT_MTU_CFG_WAIT 		= 8,
+	BNA_PORT_CHLD_STOP_WAIT		= 9
+};
+
+bfa_fsm_state_decl(bna_port, stopped, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, mtu_init_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, pause_init_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, last_resp_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, started, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, pause_cfg_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, rx_stop_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, mtu_cfg_wait, struct bna_port,
+			enum bna_port_event);
+bfa_fsm_state_decl(bna_port, chld_stop_wait, struct bna_port,
+			enum bna_port_event);
+
+static struct bfa_sm_table port_sm_table[] = {
+	{BFA_SM(bna_port_sm_stopped), BNA_PORT_STOPPED},
+	{BFA_SM(bna_port_sm_mtu_init_wait), BNA_PORT_MTU_INIT_WAIT},
+	{BFA_SM(bna_port_sm_pause_init_wait), BNA_PORT_PAUSE_INIT_WAIT},
+	{BFA_SM(bna_port_sm_last_resp_wait), BNA_PORT_LAST_RESP_WAIT},
+	{BFA_SM(bna_port_sm_started), BNA_PORT_STARTED},
+	{BFA_SM(bna_port_sm_pause_cfg_wait), BNA_PORT_PAUSE_CFG_WAIT},
+	{BFA_SM(bna_port_sm_rx_stop_wait), BNA_PORT_RX_STOP_WAIT},
+	{BFA_SM(bna_port_sm_mtu_cfg_wait), BNA_PORT_MTU_CFG_WAIT},
+	{BFA_SM(bna_port_sm_chld_stop_wait), BNA_PORT_CHLD_STOP_WAIT}
+};
+
+static void
+bna_port_sm_stopped_entry(struct bna_port *port)
+{
+	call_port_pause_cbfn(port, BNA_CB_SUCCESS);
+	call_port_mtu_cbfn(port, BNA_CB_SUCCESS);
+	call_port_stop_cbfn(port, BNA_CB_SUCCESS);
+}
+
+static void
+bna_port_sm_stopped(struct bna_port *port, enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_START:
+		bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait);
+		break;
+
+	case PORT_E_STOP:
+		call_port_stop_cbfn(port, BNA_CB_SUCCESS);
+		break;
+
+	case PORT_E_FAIL:
+		/* No-op */
+		break;
+
+	case PORT_E_PAUSE_CFG:
+		call_port_pause_cbfn(port, BNA_CB_SUCCESS);
+		break;
+
+	case PORT_E_MTU_CFG:
+		call_port_mtu_cbfn(port, BNA_CB_SUCCESS);
+		break;
+
+	case PORT_E_CHLD_STOPPED:
+		/**
+		 * This event is received due to LLPort, Tx and Rx objects
+		 * failing
+		 */
+		/* No-op */
+		break;
+
+	case PORT_E_FWRESP_PAUSE:
+	case PORT_E_FWRESP_MTU:
+		/**
+		 * These events are received due to flushing of mbox when
+		 * device fails
+		 */
+		/* No-op */
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_mtu_init_wait_entry(struct bna_port *port)
+{
+	bna_fw_mtu_set(port);
+}
+
+static void
+bna_port_sm_mtu_init_wait(struct bna_port *port, enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_STOP:
+		bfa_fsm_set_state(port, bna_port_sm_last_resp_wait);
+		break;
+
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		break;
+
+	case PORT_E_PAUSE_CFG:
+		/* No-op */
+		break;
+
+	case PORT_E_MTU_CFG:
+		port->flags |= BNA_PORT_F_MTU_CHANGED;
+		break;
+
+	case PORT_E_FWRESP_MTU:
+		if (port->flags & BNA_PORT_F_MTU_CHANGED) {
+			port->flags &= ~BNA_PORT_F_MTU_CHANGED;
+			bna_fw_mtu_set(port);
+		} else {
+			bfa_fsm_set_state(port, bna_port_sm_pause_init_wait);
+		}
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_pause_init_wait_entry(struct bna_port *port)
+{
+	bna_fw_pause_set(port);
+}
+
+static void
+bna_port_sm_pause_init_wait(struct bna_port *port,
+				enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_STOP:
+		bfa_fsm_set_state(port, bna_port_sm_last_resp_wait);
+		break;
+
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		break;
+
+	case PORT_E_PAUSE_CFG:
+		port->flags |= BNA_PORT_F_PAUSE_CHANGED;
+		break;
+
+	case PORT_E_MTU_CFG:
+		port->flags |= BNA_PORT_F_MTU_CHANGED;
+		break;
+
+	case PORT_E_FWRESP_PAUSE:
+		if (port->flags & BNA_PORT_F_PAUSE_CHANGED) {
+			port->flags &= ~BNA_PORT_F_PAUSE_CHANGED;
+			bna_fw_pause_set(port);
+		} else if (port->flags & BNA_PORT_F_MTU_CHANGED) {
+			port->flags &= ~BNA_PORT_F_MTU_CHANGED;
+			bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait);
+		} else {
+			bfa_fsm_set_state(port, bna_port_sm_started);
+			bna_port_chld_start(port);
+		}
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_last_resp_wait_entry(struct bna_port *port)
+{
+}
+
+static void
+bna_port_sm_last_resp_wait(struct bna_port *port,
+				enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_FAIL:
+	case PORT_E_FWRESP_PAUSE:
+	case PORT_E_FWRESP_MTU:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_started_entry(struct bna_port *port)
+{
+	/**
+	 * NOTE: Do not call bna_port_chld_start() here, since it will be
+	 * inadvertently called during pause_cfg_wait->started transition
+	 * as well
+	 */
+	call_port_pause_cbfn(port, BNA_CB_SUCCESS);
+	call_port_mtu_cbfn(port, BNA_CB_SUCCESS);
+}
+
+static void
+bna_port_sm_started(struct bna_port *port,
+			enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_STOP:
+		bfa_fsm_set_state(port, bna_port_sm_chld_stop_wait);
+		break;
+
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		bna_port_chld_fail(port);
+		break;
+
+	case PORT_E_PAUSE_CFG:
+		bfa_fsm_set_state(port, bna_port_sm_pause_cfg_wait);
+		break;
+
+	case PORT_E_MTU_CFG:
+		bfa_fsm_set_state(port, bna_port_sm_rx_stop_wait);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_pause_cfg_wait_entry(struct bna_port *port)
+{
+	bna_fw_pause_set(port);
+}
+
+static void
+bna_port_sm_pause_cfg_wait(struct bna_port *port,
+				enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		bna_port_chld_fail(port);
+		break;
+
+	case PORT_E_FWRESP_PAUSE:
+		bfa_fsm_set_state(port, bna_port_sm_started);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_rx_stop_wait_entry(struct bna_port *port)
+{
+	bna_port_rx_stop(port);
+}
+
+static void
+bna_port_sm_rx_stop_wait(struct bna_port *port,
+				enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		bna_port_chld_fail(port);
+		break;
+
+	case PORT_E_CHLD_STOPPED:
+		bfa_fsm_set_state(port, bna_port_sm_mtu_cfg_wait);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_mtu_cfg_wait_entry(struct bna_port *port)
+{
+	bna_fw_mtu_set(port);
+}
+
+static void
+bna_port_sm_mtu_cfg_wait(struct bna_port *port, enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		bna_port_chld_fail(port);
+		break;
+
+	case PORT_E_FWRESP_MTU:
+		bfa_fsm_set_state(port, bna_port_sm_started);
+		bna_port_rx_start(port);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_port_sm_chld_stop_wait_entry(struct bna_port *port)
+{
+	bna_port_chld_stop(port);
+}
+
+static void
+bna_port_sm_chld_stop_wait(struct bna_port *port,
+				enum bna_port_event event)
+{
+	switch (event) {
+	case PORT_E_FAIL:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		bna_port_chld_fail(port);
+		break;
+
+	case PORT_E_CHLD_STOPPED:
+		bfa_fsm_set_state(port, bna_port_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(port->bna, event);
+	}
+}
+
+static void
+bna_fw_pause_set(struct bna_port *port)
+{
+	struct bfi_ll_set_pause_req ll_req;
+
+	memset(&ll_req, 0, sizeof(ll_req));
+	ll_req.mh.msg_class = BFI_MC_LL;
+	ll_req.mh.msg_id = BFI_LL_H2I_SET_PAUSE_REQ;
+	ll_req.mh.mtag.h2i.lpu_id = 0;
+
+	ll_req.tx_pause = port->pause_config.tx_pause;
+	ll_req.rx_pause = port->pause_config.rx_pause;
+
+	bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_fw_cb_pause_set, port);
+
+	bna_mbox_send(port->bna, &port->mbox_qe);
+}
+
+static void
+bna_fw_cb_pause_set(void *arg, int status)
+{
+	struct bna_port *port = (struct bna_port *)arg;
+
+	bfa_q_qe_init(&port->mbox_qe.qe);
+	bfa_fsm_send_event(port, PORT_E_FWRESP_PAUSE);
+}
+
+void
+bna_fw_mtu_set(struct bna_port *port)
+{
+	struct bfi_ll_mtu_info_req ll_req;
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_MTU_INFO_REQ, 0);
+	ll_req.mtu = htons((u16)port->mtu);
+
+	bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req),
+				bna_fw_cb_mtu_set, port);
+	bna_mbox_send(port->bna, &port->mbox_qe);
+}
+
+void
+bna_fw_cb_mtu_set(void *arg, int status)
+{
+	struct bna_port *port = (struct bna_port *)arg;
+
+	bfa_q_qe_init(&port->mbox_qe.qe);
+	bfa_fsm_send_event(port, PORT_E_FWRESP_MTU);
+}
+
+static void
+bna_port_cb_chld_stopped(void *arg)
+{
+	struct bna_port *port = (struct bna_port *)arg;
+
+	bfa_fsm_send_event(port, PORT_E_CHLD_STOPPED);
+}
+
+void
+bna_port_init(struct bna_port *port, struct bna *bna)
+{
+	port->bna = bna;
+	port->flags = 0;
+	port->mtu = 0;
+	port->type = BNA_PORT_T_REGULAR;
+
+	port->link_cbfn = bnad_cb_port_link_status;
+
+	port->chld_stop_wc.wc_resume = bna_port_cb_chld_stopped;
+	port->chld_stop_wc.wc_cbarg = port;
+	port->chld_stop_wc.wc_count = 0;
+
+	port->stop_cbfn = NULL;
+	port->stop_cbarg = NULL;
+
+	port->pause_cbfn = NULL;
+
+	port->mtu_cbfn = NULL;
+
+	bfa_q_qe_init(&port->mbox_qe.qe);
+
+	bfa_fsm_set_state(port, bna_port_sm_stopped);
+
+	bna_llport_init(&port->llport, bna);
+}
+
+void
+bna_port_uninit(struct bna_port *port)
+{
+	bna_llport_uninit(&port->llport);
+
+	port->flags = 0;
+
+	port->bna = NULL;
+}
+
+int
+bna_port_state_get(struct bna_port *port)
+{
+	return bfa_sm_to_state(port_sm_table, port->fsm);
+}
+
+void
+bna_port_start(struct bna_port *port)
+{
+	port->flags |= BNA_PORT_F_DEVICE_READY;
+	if (port->flags & BNA_PORT_F_ENABLED)
+		bfa_fsm_send_event(port, PORT_E_START);
+}
+
+void
+bna_port_stop(struct bna_port *port)
+{
+	port->stop_cbfn = bna_device_cb_port_stopped;
+	port->stop_cbarg = &port->bna->device;
+
+	port->flags &= ~BNA_PORT_F_DEVICE_READY;
+	bfa_fsm_send_event(port, PORT_E_STOP);
+}
+
+void
+bna_port_fail(struct bna_port *port)
+{
+	port->flags &= ~BNA_PORT_F_DEVICE_READY;
+	bfa_fsm_send_event(port, PORT_E_FAIL);
+}
+
+void
+bna_port_cb_tx_stopped(struct bna_port *port, enum bna_cb_status status)
+{
+	bfa_wc_down(&port->chld_stop_wc);
+}
+
+void
+bna_port_cb_rx_stopped(struct bna_port *port, enum bna_cb_status status)
+{
+	bfa_wc_down(&port->chld_stop_wc);
+}
+
+void
+bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen,
+			int status)
+{
+	int i;
+	u8 prio_map;
+
+	port->llport.link_status = BNA_LINK_UP;
+	if (aen->cee_linkup)
+		port->llport.link_status = BNA_CEE_UP;
+
+	/* Compute the priority */
+	prio_map = aen->prio_map;
+	if (prio_map) {
+		for (i = 0; i < 8; i++) {
+			if ((prio_map >> i) & 0x1)
+				break;
+		}
+		port->priority = i;
+	} else
+		port->priority = 0;
+
+	/* Dispatch events */
+	bna_tx_mod_cee_link_status(&port->bna->tx_mod, aen->cee_linkup);
+	bna_tx_mod_prio_changed(&port->bna->tx_mod, port->priority);
+	port->link_cbfn(port->bna->bnad, port->llport.link_status);
+}
+
+void
+bna_port_cb_link_down(struct bna_port *port, int status)
+{
+	port->llport.link_status = BNA_LINK_DOWN;
+
+	/* Dispatch events */
+	bna_tx_mod_cee_link_status(&port->bna->tx_mod, BNA_LINK_DOWN);
+	port->link_cbfn(port->bna->bnad, BNA_LINK_DOWN);
+}
+
+int
+bna_port_mtu_get(struct bna_port *port)
+{
+	return port->mtu;
+}
+
+void
+bna_port_enable(struct bna_port *port)
+{
+	if (port->fsm != (bfa_sm_t)bna_port_sm_stopped)
+		return;
+
+	port->flags |= BNA_PORT_F_ENABLED;
+
+	if (port->flags & BNA_PORT_F_DEVICE_READY)
+		bfa_fsm_send_event(port, PORT_E_START);
+}
+
+void
+bna_port_disable(struct bna_port *port, enum bna_cleanup_type type,
+		 void (*cbfn)(void *, enum bna_cb_status))
+{
+	if (type == BNA_SOFT_CLEANUP) {
+		(*cbfn)(port->bna->bnad, BNA_CB_SUCCESS);
+		return;
+	}
+
+	port->stop_cbfn = cbfn;
+	port->stop_cbarg = port->bna->bnad;
+
+	port->flags &= ~BNA_PORT_F_ENABLED;
+
+	bfa_fsm_send_event(port, PORT_E_STOP);
+}
+
+void
+bna_port_pause_config(struct bna_port *port,
+		      struct bna_pause_config *pause_config,
+		      void (*cbfn)(struct bnad *, enum bna_cb_status))
+{
+	port->pause_config = *pause_config;
+
+	port->pause_cbfn = cbfn;
+
+	bfa_fsm_send_event(port, PORT_E_PAUSE_CFG);
+}
+
+void
+bna_port_mtu_set(struct bna_port *port, int mtu,
+		 void (*cbfn)(struct bnad *, enum bna_cb_status))
+{
+	port->mtu = mtu;
+
+	port->mtu_cbfn = cbfn;
+
+	bfa_fsm_send_event(port, PORT_E_MTU_CFG);
+}
+
+void
+bna_port_mac_get(struct bna_port *port, mac_t *mac)
+{
+	*mac = bfa_ioc_get_mac(&port->bna->device.ioc);
+}
+
+/**
+ * Should be called only when port is disabled
+ */
+void
+bna_port_type_set(struct bna_port *port, enum bna_port_type type)
+{
+	port->type = type;
+	port->llport.type = type;
+}
+
+/**
+ * Should be called only when port is disabled
+ */
+void
+bna_port_linkcbfn_set(struct bna_port *port,
+		      void (*linkcbfn)(struct bnad *, enum bna_link_status))
+{
+	port->link_cbfn = linkcbfn;
+}
+
+void
+bna_port_admin_up(struct bna_port *port)
+{
+	struct bna_llport *llport = &port->llport;
+
+	if (llport->flags & BNA_LLPORT_F_ENABLED)
+		return;
+
+	llport->flags |= BNA_LLPORT_F_ENABLED;
+
+	if (llport->flags & BNA_LLPORT_F_RX_ENABLED)
+		bfa_fsm_send_event(llport, LLPORT_E_UP);
+}
+
+void
+bna_port_admin_down(struct bna_port *port)
+{
+	struct bna_llport *llport = &port->llport;
+
+	if (!(llport->flags & BNA_LLPORT_F_ENABLED))
+		return;
+
+	llport->flags &= ~BNA_LLPORT_F_ENABLED;
+
+	if (llport->flags & BNA_LLPORT_F_RX_ENABLED)
+		bfa_fsm_send_event(llport, LLPORT_E_DOWN);
+}
+
+/**
+ * DEVICE
+ */
+#define enable_mbox_intr(_device)\
+do {\
+	u32 intr_status;\
+	bna_intr_status_get((_device)->bna, intr_status);\
+	bnad_cb_device_enable_mbox_intr((_device)->bna->bnad);\
+	bna_mbox_intr_enable((_device)->bna);\
+} while (0)
+
+#define disable_mbox_intr(_device)\
+do {\
+	bna_mbox_intr_disable((_device)->bna);\
+	bnad_cb_device_disable_mbox_intr((_device)->bna->bnad);\
+} while (0)
+
+const struct bna_chip_regs_offset reg_offset[] =
+{{HOST_PAGE_NUM_FN0, HOSTFN0_INT_STATUS,
+	HOSTFN0_INT_MASK, HOST_MSIX_ERR_INDEX_FN0},
+{HOST_PAGE_NUM_FN1, HOSTFN1_INT_STATUS,
+	HOSTFN1_INT_MASK, HOST_MSIX_ERR_INDEX_FN1},
+{HOST_PAGE_NUM_FN2, HOSTFN2_INT_STATUS,
+	HOSTFN2_INT_MASK, HOST_MSIX_ERR_INDEX_FN2},
+{HOST_PAGE_NUM_FN3, HOSTFN3_INT_STATUS,
+	HOSTFN3_INT_MASK, HOST_MSIX_ERR_INDEX_FN3},
+};
+
+enum bna_device_event {
+	DEVICE_E_ENABLE			= 1,
+	DEVICE_E_DISABLE		= 2,
+	DEVICE_E_IOC_READY		= 3,
+	DEVICE_E_IOC_FAILED		= 4,
+	DEVICE_E_IOC_DISABLED		= 5,
+	DEVICE_E_IOC_RESET		= 6,
+	DEVICE_E_PORT_STOPPED		= 7,
+};
+
+enum bna_device_state {
+	BNA_DEVICE_STOPPED		= 1,
+	BNA_DEVICE_IOC_READY_WAIT 	= 2,
+	BNA_DEVICE_READY		= 3,
+	BNA_DEVICE_PORT_STOP_WAIT 	= 4,
+	BNA_DEVICE_IOC_DISABLE_WAIT 	= 5,
+	BNA_DEVICE_FAILED		= 6
+};
+
+bfa_fsm_state_decl(bna_device, stopped, struct bna_device,
+			enum bna_device_event);
+bfa_fsm_state_decl(bna_device, ioc_ready_wait, struct bna_device,
+			enum bna_device_event);
+bfa_fsm_state_decl(bna_device, ready, struct bna_device,
+			enum bna_device_event);
+bfa_fsm_state_decl(bna_device, port_stop_wait, struct bna_device,
+			enum bna_device_event);
+bfa_fsm_state_decl(bna_device, ioc_disable_wait, struct bna_device,
+			enum bna_device_event);
+bfa_fsm_state_decl(bna_device, failed, struct bna_device,
+			enum bna_device_event);
+
+static struct bfa_sm_table device_sm_table[] = {
+	{BFA_SM(bna_device_sm_stopped), BNA_DEVICE_STOPPED},
+	{BFA_SM(bna_device_sm_ioc_ready_wait), BNA_DEVICE_IOC_READY_WAIT},
+	{BFA_SM(bna_device_sm_ready), BNA_DEVICE_READY},
+	{BFA_SM(bna_device_sm_port_stop_wait), BNA_DEVICE_PORT_STOP_WAIT},
+	{BFA_SM(bna_device_sm_ioc_disable_wait), BNA_DEVICE_IOC_DISABLE_WAIT},
+	{BFA_SM(bna_device_sm_failed), BNA_DEVICE_FAILED},
+};
+
+static void
+bna_device_sm_stopped_entry(struct bna_device *device)
+{
+	if (device->stop_cbfn)
+		device->stop_cbfn(device->stop_cbarg, BNA_CB_SUCCESS);
+
+	device->stop_cbfn = NULL;
+	device->stop_cbarg = NULL;
+}
+
+static void
+bna_device_sm_stopped(struct bna_device *device,
+			enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_ENABLE:
+		if (device->intr_type == BNA_INTR_T_MSIX)
+			bna_mbox_msix_idx_set(device);
+		bfa_ioc_enable(&device->ioc);
+		bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait);
+		break;
+
+	case DEVICE_E_DISABLE:
+		bfa_fsm_set_state(device, bna_device_sm_stopped);
+		break;
+
+	case DEVICE_E_IOC_RESET:
+		enable_mbox_intr(device);
+		break;
+
+	case DEVICE_E_IOC_FAILED:
+		bfa_fsm_set_state(device, bna_device_sm_failed);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+static void
+bna_device_sm_ioc_ready_wait_entry(struct bna_device *device)
+{
+	/**
+	 * Do not call bfa_ioc_enable() here. It must be called in the
+	 * previous state due to failed -> ioc_ready_wait transition.
+	 */
+}
+
+static void
+bna_device_sm_ioc_ready_wait(struct bna_device *device,
+				enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_DISABLE:
+		if (device->ready_cbfn)
+			device->ready_cbfn(device->ready_cbarg,
+						BNA_CB_INTERRUPT);
+		device->ready_cbfn = NULL;
+		device->ready_cbarg = NULL;
+		bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait);
+		break;
+
+	case DEVICE_E_IOC_READY:
+		bfa_fsm_set_state(device, bna_device_sm_ready);
+		break;
+
+	case DEVICE_E_IOC_FAILED:
+		bfa_fsm_set_state(device, bna_device_sm_failed);
+		break;
+
+	case DEVICE_E_IOC_RESET:
+		enable_mbox_intr(device);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+static void
+bna_device_sm_ready_entry(struct bna_device *device)
+{
+	bna_mbox_mod_start(&device->bna->mbox_mod);
+	bna_port_start(&device->bna->port);
+
+	if (device->ready_cbfn)
+		device->ready_cbfn(device->ready_cbarg,
+					BNA_CB_SUCCESS);
+	device->ready_cbfn = NULL;
+	device->ready_cbarg = NULL;
+}
+
+static void
+bna_device_sm_ready(struct bna_device *device, enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_DISABLE:
+		bfa_fsm_set_state(device, bna_device_sm_port_stop_wait);
+		break;
+
+	case DEVICE_E_IOC_FAILED:
+		bfa_fsm_set_state(device, bna_device_sm_failed);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+static void
+bna_device_sm_port_stop_wait_entry(struct bna_device *device)
+{
+	bna_port_stop(&device->bna->port);
+}
+
+static void
+bna_device_sm_port_stop_wait(struct bna_device *device,
+				enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_PORT_STOPPED:
+		bna_mbox_mod_stop(&device->bna->mbox_mod);
+		bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait);
+		break;
+
+	case DEVICE_E_IOC_FAILED:
+		disable_mbox_intr(device);
+		bna_port_fail(&device->bna->port);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+static void
+bna_device_sm_ioc_disable_wait_entry(struct bna_device *device)
+{
+	bfa_ioc_disable(&device->ioc);
+}
+
+static void
+bna_device_sm_ioc_disable_wait(struct bna_device *device,
+				enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_IOC_DISABLED:
+		disable_mbox_intr(device);
+		bfa_fsm_set_state(device, bna_device_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+static void
+bna_device_sm_failed_entry(struct bna_device *device)
+{
+	disable_mbox_intr(device);
+	bna_port_fail(&device->bna->port);
+	bna_mbox_mod_stop(&device->bna->mbox_mod);
+
+	if (device->ready_cbfn)
+		device->ready_cbfn(device->ready_cbarg,
+					BNA_CB_FAIL);
+	device->ready_cbfn = NULL;
+	device->ready_cbarg = NULL;
+}
+
+static void
+bna_device_sm_failed(struct bna_device *device,
+			enum bna_device_event event)
+{
+	switch (event) {
+	case DEVICE_E_DISABLE:
+		bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait);
+		break;
+
+	case DEVICE_E_IOC_RESET:
+		enable_mbox_intr(device);
+		bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait);
+		break;
+
+	default:
+		bfa_sm_fault(device->bna, event);
+	}
+}
+
+/* IOC callback functions */
+
+static void
+bna_device_cb_iocll_ready(void *dev, enum bfa_status error)
+{
+	struct bna_device *device = (struct bna_device *)dev;
+
+	if (error)
+		bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED);
+	else
+		bfa_fsm_send_event(device, DEVICE_E_IOC_READY);
+}
+
+static void
+bna_device_cb_iocll_disabled(void *dev)
+{
+	struct bna_device *device = (struct bna_device *)dev;
+
+	bfa_fsm_send_event(device, DEVICE_E_IOC_DISABLED);
+}
+
+static void
+bna_device_cb_iocll_failed(void *dev)
+{
+	struct bna_device *device = (struct bna_device *)dev;
+
+	bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED);
+}
+
+static void
+bna_device_cb_iocll_reset(void *dev)
+{
+	struct bna_device *device = (struct bna_device *)dev;
+
+	bfa_fsm_send_event(device, DEVICE_E_IOC_RESET);
+}
+
+static struct bfa_ioc_cbfn bfa_iocll_cbfn = {
+	bna_device_cb_iocll_ready,
+	bna_device_cb_iocll_disabled,
+	bna_device_cb_iocll_failed,
+	bna_device_cb_iocll_reset
+};
+
+void
+bna_device_init(struct bna_device *device, struct bna *bna,
+		struct bna_res_info *res_info)
+{
+	u64 dma;
+
+	device->bna = bna;
+
+	/**
+	 * Attach IOC and claim:
+	 *	1. DMA memory for IOC attributes
+	 *	2. Kernel memory for FW trace
+	 */
+	bfa_ioc_attach(&device->ioc, device, &bfa_iocll_cbfn);
+	bfa_ioc_pci_init(&device->ioc, &bna->pcidev, BFI_MC_LL);
+
+	BNA_GET_DMA_ADDR(
+		&res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].dma, dma);
+	bfa_ioc_mem_claim(&device->ioc,
+		res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].kva,
+			  dma);
+
+	bna_adv_device_init(device, bna, res_info);
+	/*
+	 * Initialize mbox_mod only after IOC, so that mbox handler
+	 * registration goes through
+	 */
+	device->intr_type =
+		res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type;
+	device->vector =
+		res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.idl[0].vector;
+	bna_mbox_mod_init(&bna->mbox_mod, bna);
+
+	device->ready_cbfn = device->stop_cbfn = NULL;
+	device->ready_cbarg = device->stop_cbarg = NULL;
+
+	bfa_fsm_set_state(device, bna_device_sm_stopped);
+}
+
+void
+bna_device_uninit(struct bna_device *device)
+{
+	bna_mbox_mod_uninit(&device->bna->mbox_mod);
+
+	bfa_cee_detach(&device->bna->cee);
+
+	bfa_ioc_detach(&device->ioc);
+
+	device->bna = NULL;
+}
+
+void
+bna_device_cb_port_stopped(void *arg, enum bna_cb_status status)
+{
+	struct bna_device *device = (struct bna_device *)arg;
+
+	bfa_fsm_send_event(device, DEVICE_E_PORT_STOPPED);
+}
+
+int
+bna_device_status_get(struct bna_device *device)
+{
+	return (device->fsm == (bfa_fsm_t)bna_device_sm_ready);
+}
+
+void
+bna_device_enable(struct bna_device *device)
+{
+	if (device->fsm != (bfa_fsm_t)bna_device_sm_stopped) {
+		bnad_cb_device_enabled(device->bna->bnad, BNA_CB_BUSY);
+		return;
+	}
+
+	device->ready_cbfn = bnad_cb_device_enabled;
+	device->ready_cbarg = device->bna->bnad;
+
+	bfa_fsm_send_event(device, DEVICE_E_ENABLE);
+}
+
+void
+bna_device_disable(struct bna_device *device, enum bna_cleanup_type type)
+{
+	if (type == BNA_SOFT_CLEANUP) {
+		bnad_cb_device_disabled(device->bna->bnad, BNA_CB_SUCCESS);
+		return;
+	}
+
+	device->stop_cbfn = bnad_cb_device_disabled;
+	device->stop_cbarg = device->bna->bnad;
+
+	bfa_fsm_send_event(device, DEVICE_E_DISABLE);
+}
+
+int
+bna_device_state_get(struct bna_device *device)
+{
+	return bfa_sm_to_state(device_sm_table, device->fsm);
+}
+
+u32 bna_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = {
+	{12, 20},
+	{10, 18},
+	{8, 16},
+	{6, 12},
+	{4, 8},
+	{3, 6},
+	{2, 4},
+	{1, 2},
+};
+
+u32 bna_napi_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = {
+	{12, 12},
+	{6, 10},
+	{5, 10},
+	{4, 8},
+	{3, 6},
+	{3, 6},
+	{2, 4},
+	{1, 2},
+};
+
+/* device */
+void
+bna_adv_device_init(struct bna_device *device, struct bna *bna,
+		struct bna_res_info *res_info)
+{
+	u8 *kva;
+	u64 dma;
+
+	device->bna = bna;
+
+	kva = res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mdl[0].kva;
+
+	/**
+	 * Attach common modules (Diag, SFP, CEE, Port) and claim respective
+	 * DMA memory.
+	 */
+	BNA_GET_DMA_ADDR(
+		&res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].dma, dma);
+	kva = res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].kva;
+
+	bfa_cee_attach(&bna->cee, &device->ioc, bna);
+	bfa_cee_mem_claim(&bna->cee, kva, dma);
+	kva += bfa_cee_meminfo();
+	dma += bfa_cee_meminfo();
+
+}
+
+/* utils */
+
+void
+bna_adv_res_req(struct bna_res_info *res_info)
+{
+	/* DMA memory for COMMON_MODULE */
+	res_info[BNA_RES_MEM_T_COM].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mem_type = BNA_MEM_T_DMA;
+	res_info[BNA_RES_MEM_T_COM].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_COM].res_u.mem_info.len = ALIGN(
+				bfa_cee_meminfo(), PAGE_SIZE);
+
+	/* Virtual memory for retreiving fw_trc */
+	res_info[BNA_RES_MEM_T_FWTRC].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mem_type = BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.num = 0;
+	res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.len = 0;
+
+	/* DMA memory for retreiving stats */
+	res_info[BNA_RES_MEM_T_STATS].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mem_type = BNA_MEM_T_DMA;
+	res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.len =
+				ALIGN(BFI_HW_STATS_SIZE, PAGE_SIZE);
+
+	/* Virtual memory for soft stats */
+	res_info[BNA_RES_MEM_T_SWSTATS].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mem_type = BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.len =
+				sizeof(struct bna_sw_stats);
+}
+
+static void
+bna_sw_stats_get(struct bna *bna, struct bna_sw_stats *sw_stats)
+{
+	struct bna_tx *tx;
+	struct bna_txq *txq;
+	struct bna_rx *rx;
+	struct bna_rxp *rxp;
+	struct list_head *qe;
+	struct list_head *txq_qe;
+	struct list_head *rxp_qe;
+	struct list_head *mac_qe;
+	int i;
+
+	sw_stats->device_state = bna_device_state_get(&bna->device);
+	sw_stats->port_state = bna_port_state_get(&bna->port);
+	sw_stats->port_flags = bna->port.flags;
+	sw_stats->llport_state = bna_llport_state_get(&bna->port.llport);
+	sw_stats->priority = bna->port.priority;
+
+	i = 0;
+	list_for_each(qe, &bna->tx_mod.tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		sw_stats->tx_stats[i].tx_state = bna_tx_state_get(tx);
+		sw_stats->tx_stats[i].tx_flags = tx->flags;
+
+		sw_stats->tx_stats[i].num_txqs = 0;
+		sw_stats->tx_stats[i].txq_bmap[0] = 0;
+		sw_stats->tx_stats[i].txq_bmap[1] = 0;
+		list_for_each(txq_qe, &tx->txq_q) {
+			txq = (struct bna_txq *)txq_qe;
+			if (txq->txq_id < 32)
+				sw_stats->tx_stats[i].txq_bmap[0] |=
+						((u32)1 << txq->txq_id);
+			else
+				sw_stats->tx_stats[i].txq_bmap[1] |=
+						((u32)
+						 1 << (txq->txq_id - 32));
+			sw_stats->tx_stats[i].num_txqs++;
+		}
+
+		sw_stats->tx_stats[i].txf_id = tx->txf.txf_id;
+
+		i++;
+	}
+	sw_stats->num_active_tx = i;
+
+	i = 0;
+	list_for_each(qe, &bna->rx_mod.rx_active_q) {
+		rx = (struct bna_rx *)qe;
+		sw_stats->rx_stats[i].rx_state = bna_rx_state_get(rx);
+		sw_stats->rx_stats[i].rx_flags = rx->rx_flags;
+
+		sw_stats->rx_stats[i].num_rxps = 0;
+		sw_stats->rx_stats[i].num_rxqs = 0;
+		sw_stats->rx_stats[i].rxq_bmap[0] = 0;
+		sw_stats->rx_stats[i].rxq_bmap[1] = 0;
+		sw_stats->rx_stats[i].cq_bmap[0] = 0;
+		sw_stats->rx_stats[i].cq_bmap[1] = 0;
+		list_for_each(rxp_qe, &rx->rxp_q) {
+			rxp = (struct bna_rxp *)rxp_qe;
+
+			sw_stats->rx_stats[i].num_rxqs += 1;
+
+			if (rxp->type == BNA_RXP_SINGLE) {
+				if (rxp->rxq.single.only->rxq_id < 32) {
+					sw_stats->rx_stats[i].rxq_bmap[0] |=
+					((u32)1 <<
+					rxp->rxq.single.only->rxq_id);
+				} else {
+					sw_stats->rx_stats[i].rxq_bmap[1] |=
+					((u32)1 <<
+					(rxp->rxq.single.only->rxq_id - 32));
+				}
+			} else {
+				if (rxp->rxq.slr.large->rxq_id < 32) {
+					sw_stats->rx_stats[i].rxq_bmap[0] |=
+					((u32)1 <<
+					rxp->rxq.slr.large->rxq_id);
+				} else {
+					sw_stats->rx_stats[i].rxq_bmap[1] |=
+					((u32)1 <<
+					(rxp->rxq.slr.large->rxq_id - 32));
+				}
+
+				if (rxp->rxq.slr.small->rxq_id < 32) {
+					sw_stats->rx_stats[i].rxq_bmap[0] |=
+					((u32)1 <<
+					rxp->rxq.slr.small->rxq_id);
+				} else {
+					sw_stats->rx_stats[i].rxq_bmap[1] |=
+				((u32)1 <<
+				 (rxp->rxq.slr.small->rxq_id - 32));
+				}
+				sw_stats->rx_stats[i].num_rxqs += 1;
+			}
+
+			if (rxp->cq.cq_id < 32)
+				sw_stats->rx_stats[i].cq_bmap[0] |=
+					(1 << rxp->cq.cq_id);
+			else
+				sw_stats->rx_stats[i].cq_bmap[1] |=
+					(1 << (rxp->cq.cq_id - 32));
+
+			sw_stats->rx_stats[i].num_rxps++;
+		}
+
+		sw_stats->rx_stats[i].rxf_id = rx->rxf.rxf_id;
+		sw_stats->rx_stats[i].rxf_state = bna_rxf_state_get(&rx->rxf);
+		sw_stats->rx_stats[i].rxf_oper_state = rx->rxf.rxf_oper_state;
+
+		sw_stats->rx_stats[i].num_active_ucast = 0;
+		if (rx->rxf.ucast_active_mac)
+			sw_stats->rx_stats[i].num_active_ucast++;
+		list_for_each(mac_qe, &rx->rxf.ucast_active_q)
+			sw_stats->rx_stats[i].num_active_ucast++;
+
+		sw_stats->rx_stats[i].num_active_mcast = 0;
+		list_for_each(mac_qe, &rx->rxf.mcast_active_q)
+			sw_stats->rx_stats[i].num_active_mcast++;
+
+		sw_stats->rx_stats[i].rxmode_active = rx->rxf.rxmode_active;
+		sw_stats->rx_stats[i].vlan_filter_status =
+						rx->rxf.vlan_filter_status;
+		memcpy(sw_stats->rx_stats[i].vlan_filter_table,
+				rx->rxf.vlan_filter_table,
+				sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32));
+
+		sw_stats->rx_stats[i].rss_status = rx->rxf.rss_status;
+		sw_stats->rx_stats[i].hds_status = rx->rxf.hds_status;
+
+		i++;
+	}
+	sw_stats->num_active_rx = i;
+}
+
+static void
+bna_fw_cb_stats_get(void *arg, int status)
+{
+	struct bna *bna = (struct bna *)arg;
+	u64 *p_stats;
+	int i, count;
+	int rxf_count, txf_count;
+	u64 rxf_bmap, txf_bmap;
+
+	bfa_q_qe_init(&bna->mbox_qe.qe);
+
+	if (status == 0) {
+		p_stats = (u64 *)bna->stats.hw_stats;
+		count = sizeof(struct bfi_ll_stats) / sizeof(u64);
+		for (i = 0; i < count; i++)
+			p_stats[i] = cpu_to_be64(p_stats[i]);
+
+		rxf_count = 0;
+		rxf_bmap = (u64)bna->stats.rxf_bmap[0] |
+			((u64)bna->stats.rxf_bmap[1] << 32);
+		for (i = 0; i < BFI_LL_RXF_ID_MAX; i++)
+			if (rxf_bmap & ((u64)1 << i))
+				rxf_count++;
+
+		txf_count = 0;
+		txf_bmap = (u64)bna->stats.txf_bmap[0] |
+			((u64)bna->stats.txf_bmap[1] << 32);
+		for (i = 0; i < BFI_LL_TXF_ID_MAX; i++)
+			if (txf_bmap & ((u64)1 << i))
+				txf_count++;
+
+		p_stats = (u64 *)&bna->stats.hw_stats->rxf_stats[0] +
+				((rxf_count * sizeof(struct bfi_ll_stats_rxf) +
+				txf_count * sizeof(struct bfi_ll_stats_txf))/
+				sizeof(u64));
+
+		/* Populate the TXF stats from the firmware DMAed copy */
+		for (i = (BFI_LL_TXF_ID_MAX - 1); i >= 0; i--)
+			if (txf_bmap & ((u64)1 << i)) {
+				p_stats -= sizeof(struct bfi_ll_stats_txf)/
+						sizeof(u64);
+				memcpy(&bna->stats.hw_stats->txf_stats[i],
+					p_stats,
+					sizeof(struct bfi_ll_stats_txf));
+			}
+
+		/* Populate the RXF stats from the firmware DMAed copy */
+		for (i = (BFI_LL_RXF_ID_MAX - 1); i >= 0; i--)
+			if (rxf_bmap & ((u64)1 << i)) {
+				p_stats -= sizeof(struct bfi_ll_stats_rxf)/
+						sizeof(u64);
+				memcpy(&bna->stats.hw_stats->rxf_stats[i],
+					p_stats,
+					sizeof(struct bfi_ll_stats_rxf));
+			}
+
+		bna_sw_stats_get(bna, bna->stats.sw_stats);
+		bnad_cb_stats_get(bna->bnad, BNA_CB_SUCCESS, &bna->stats);
+	} else
+		bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats);
+}
+
+static void
+bna_fw_stats_get(struct bna *bna)
+{
+	struct bfi_ll_stats_req ll_req;
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_GET_REQ, 0);
+	ll_req.stats_mask = htons(BFI_LL_STATS_ALL);
+
+	ll_req.rxf_id_mask[0] = htonl(bna->rx_mod.rxf_bmap[0]);
+	ll_req.rxf_id_mask[1] =	htonl(bna->rx_mod.rxf_bmap[1]);
+	ll_req.txf_id_mask[0] =	htonl(bna->tx_mod.txf_bmap[0]);
+	ll_req.txf_id_mask[1] =	htonl(bna->tx_mod.txf_bmap[1]);
+
+	ll_req.host_buffer.a32.addr_hi = bna->hw_stats_dma.msb;
+	ll_req.host_buffer.a32.addr_lo = bna->hw_stats_dma.lsb;
+
+	bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req),
+				bna_fw_cb_stats_get, bna);
+	bna_mbox_send(bna, &bna->mbox_qe);
+
+	bna->stats.rxf_bmap[0] = bna->rx_mod.rxf_bmap[0];
+	bna->stats.rxf_bmap[1] = bna->rx_mod.rxf_bmap[1];
+	bna->stats.txf_bmap[0] = bna->tx_mod.txf_bmap[0];
+	bna->stats.txf_bmap[1] = bna->tx_mod.txf_bmap[1];
+}
+
+static void
+bna_fw_cb_stats_clr(void *arg, int status)
+{
+	struct bna *bna = (struct bna *)arg;
+
+	bfa_q_qe_init(&bna->mbox_qe.qe);
+
+	memset(bna->stats.sw_stats, 0, sizeof(struct bna_sw_stats));
+	memset(bna->stats.hw_stats, 0, sizeof(struct bfi_ll_stats));
+
+	bnad_cb_stats_clr(bna->bnad);
+}
+
+static void
+bna_fw_stats_clr(struct bna *bna)
+{
+	struct bfi_ll_stats_req ll_req;
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0);
+	ll_req.stats_mask = htons(BFI_LL_STATS_ALL);
+	ll_req.rxf_id_mask[0] = htonl(0xffffffff);
+	ll_req.rxf_id_mask[1] =	htonl(0xffffffff);
+	ll_req.txf_id_mask[0] =	htonl(0xffffffff);
+	ll_req.txf_id_mask[1] =	htonl(0xffffffff);
+
+	bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req),
+				bna_fw_cb_stats_clr, bna);
+	bna_mbox_send(bna, &bna->mbox_qe);
+}
+
+void
+bna_stats_get(struct bna *bna)
+{
+	if (bna_device_status_get(&bna->device))
+		bna_fw_stats_get(bna);
+	else
+		bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats);
+}
+
+void
+bna_stats_clr(struct bna *bna)
+{
+	if (bna_device_status_get(&bna->device))
+		bna_fw_stats_clr(bna);
+	else {
+		memset(&bna->stats.sw_stats, 0,
+				sizeof(struct bna_sw_stats));
+		memset(bna->stats.hw_stats, 0,
+				sizeof(struct bfi_ll_stats));
+		bnad_cb_stats_clr(bna->bnad);
+	}
+}
+
+/* IB */
+void
+bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo)
+{
+	ib->ib_config.coalescing_timeo = coalescing_timeo;
+
+	if (ib->start_count)
+		ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK(
+				(u32)ib->ib_config.coalescing_timeo, 0);
+}
+
+/* RxF */
+void
+bna_rxf_adv_init(struct bna_rxf *rxf,
+		struct bna_rx *rx,
+		struct bna_rx_config *q_config)
+{
+	switch (q_config->rxp_type) {
+	case BNA_RXP_SINGLE:
+		/* No-op */
+		break;
+	case BNA_RXP_SLR:
+		rxf->ctrl_flags |= BNA_RXF_CF_SM_LG_RXQ;
+		break;
+	case BNA_RXP_HDS:
+		rxf->hds_cfg.hdr_type = q_config->hds_config.hdr_type;
+		rxf->hds_cfg.header_size =
+				q_config->hds_config.header_size;
+		rxf->forced_offset = 0;
+		break;
+	default:
+		break;
+	}
+
+	if (q_config->rss_status == BNA_STATUS_T_ENABLED) {
+		rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE;
+		rxf->rss_cfg.hash_type = q_config->rss_config.hash_type;
+		rxf->rss_cfg.hash_mask = q_config->rss_config.hash_mask;
+		memcpy(&rxf->rss_cfg.toeplitz_hash_key[0],
+			&q_config->rss_config.toeplitz_hash_key[0],
+			sizeof(rxf->rss_cfg.toeplitz_hash_key));
+	}
+}
+
+static void
+rxf_fltr_mbox_cmd(struct bna_rxf *rxf, u8 cmd, enum bna_status status)
+{
+	struct bfi_ll_rxf_req req;
+
+	bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0);
+
+	req.rxf_id = rxf->rxf_id;
+	req.enable = status;
+
+	bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req),
+			rxf_cb_cam_fltr_mbox_cmd, rxf);
+
+	bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe);
+}
+
+void
+__rxf_default_function_config(struct bna_rxf *rxf, enum bna_status status)
+{
+	struct bna_rx_fndb_ram *rx_fndb_ram;
+	u32 ctrl_flags;
+	int i;
+
+	rx_fndb_ram = (struct bna_rx_fndb_ram *)
+			BNA_GET_MEM_BASE_ADDR(rxf->rx->bna->pcidev.pci_bar_kva,
+			RX_FNDB_RAM_BASE_OFFSET);
+
+	for (i = 0; i < BFI_MAX_RXF; i++) {
+		if (status == BNA_STATUS_T_ENABLED) {
+			if (i == rxf->rxf_id)
+				continue;
+
+			ctrl_flags =
+				readl(&rx_fndb_ram[i].control_flags);
+			ctrl_flags |= BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE;
+			writel(ctrl_flags,
+						&rx_fndb_ram[i].control_flags);
+		} else {
+			ctrl_flags =
+				readl(&rx_fndb_ram[i].control_flags);
+			ctrl_flags &= ~BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE;
+			writel(ctrl_flags,
+						&rx_fndb_ram[i].control_flags);
+		}
+	}
+}
+
+int
+rxf_process_packet_filter_ucast(struct bna_rxf *rxf)
+{
+	struct bna_mac *mac = NULL;
+	struct list_head *qe;
+
+	/* Add additional MAC entries */
+	if (!list_empty(&rxf->ucast_pending_add_q)) {
+		bfa_q_deq(&rxf->ucast_pending_add_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_ADD_REQ, mac);
+		list_add_tail(&mac->qe, &rxf->ucast_active_q);
+		return 1;
+	}
+
+	/* Delete MAC addresses previousely added */
+	if (!list_empty(&rxf->ucast_pending_del_q)) {
+		bfa_q_deq(&rxf->ucast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac);
+		bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_process_packet_filter_promisc(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* Enable/disable promiscuous mode */
+	if (is_promisc_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move promisc configuration from pending -> active */
+		promisc_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active |= BNA_RXMODE_PROMISC;
+
+		/* Disable VLAN filter to allow all VLANs */
+		__rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ,
+				BNA_STATUS_T_ENABLED);
+		return 1;
+	} else if (is_promisc_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move promisc configuration from pending -> active */
+		promisc_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_PROMISC;
+		bna->rxf_promisc_id = BFI_MAX_RXF;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_process_packet_filter_default(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* Enable/disable default mode */
+	if (is_default_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move default configuration from pending -> active */
+		default_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active |= BNA_RXMODE_DEFAULT;
+
+		/* Disable VLAN filter to allow all VLANs */
+		__rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED);
+		/* Redirect all other RxF vlan filtering to this one */
+		__rxf_default_function_config(rxf, BNA_STATUS_T_ENABLED);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ,
+				BNA_STATUS_T_ENABLED);
+		return 1;
+	} else if (is_default_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move default configuration from pending -> active */
+		default_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT;
+		bna->rxf_default_id = BFI_MAX_RXF;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		/* Stop RxF vlan filter table redirection */
+		__rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_process_packet_filter_allmulti(struct bna_rxf *rxf)
+{
+	/* Enable/disable allmulti mode */
+	if (is_allmulti_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move allmulti configuration from pending -> active */
+		allmulti_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active |= BNA_RXMODE_ALLMULTI;
+
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ,
+				BNA_STATUS_T_ENABLED);
+		return 1;
+	} else if (is_allmulti_disable(rxf->rxmode_pending,
+					rxf->rxmode_pending_bitmask)) {
+		/* move allmulti configuration from pending -> active */
+		allmulti_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI;
+
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_clear_packet_filter_ucast(struct bna_rxf *rxf)
+{
+	struct bna_mac *mac = NULL;
+	struct list_head *qe;
+
+	/* 1. delete pending ucast entries */
+	if (!list_empty(&rxf->ucast_pending_del_q)) {
+		bfa_q_deq(&rxf->ucast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac);
+		bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac);
+		return 1;
+	}
+
+	/* 2. clear active ucast entries; move them to pending_add_q */
+	if (!list_empty(&rxf->ucast_active_q)) {
+		bfa_q_deq(&rxf->ucast_active_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac);
+		list_add_tail(&mac->qe, &rxf->ucast_pending_add_q);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_clear_packet_filter_promisc(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* 6. Execute pending promisc mode disable command */
+	if (is_promisc_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move promisc configuration from pending -> active */
+		promisc_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_PROMISC;
+		bna->rxf_promisc_id = BFI_MAX_RXF;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	/* 7. Clear active promisc mode; move it to pending enable */
+	if (rxf->rxmode_active & BNA_RXMODE_PROMISC) {
+		/* move promisc configuration from active -> pending */
+		promisc_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_PROMISC;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_clear_packet_filter_default(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* 8. Execute pending default mode disable command */
+	if (is_default_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move default configuration from pending -> active */
+		default_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT;
+		bna->rxf_default_id = BFI_MAX_RXF;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		/* Stop RxF vlan filter table redirection */
+		__rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	/* 9. Clear active default mode; move it to pending enable */
+	if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) {
+		/* move default configuration from active -> pending */
+		default_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT;
+
+		/* Revert VLAN filter */
+		__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+		/* Stop RxF vlan filter table redirection */
+		__rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED);
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+int
+rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf)
+{
+	/* 10. Execute pending allmulti mode disable command */
+	if (is_allmulti_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* move allmulti configuration from pending -> active */
+		allmulti_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI;
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	/* 11. Clear active allmulti mode; move it to pending enable */
+	if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) {
+		/* move allmulti configuration from active -> pending */
+		allmulti_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI;
+		rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ,
+				BNA_STATUS_T_DISABLED);
+		return 1;
+	}
+
+	return 0;
+}
+
+void
+rxf_reset_packet_filter_ucast(struct bna_rxf *rxf)
+{
+	struct list_head *qe;
+	struct bna_mac *mac;
+
+	/* 1. Move active ucast entries to pending_add_q */
+	while (!list_empty(&rxf->ucast_active_q)) {
+		bfa_q_deq(&rxf->ucast_active_q, &qe);
+		bfa_q_qe_init(qe);
+		list_add_tail(qe, &rxf->ucast_pending_add_q);
+	}
+
+	/* 2. Throw away delete pending ucast entries */
+	while (!list_empty(&rxf->ucast_pending_del_q)) {
+		bfa_q_deq(&rxf->ucast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac);
+	}
+}
+
+void
+rxf_reset_packet_filter_promisc(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* 6. Clear pending promisc mode disable */
+	if (is_promisc_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		promisc_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_PROMISC;
+		bna->rxf_promisc_id = BFI_MAX_RXF;
+	}
+
+	/* 7. Move promisc mode config from active -> pending */
+	if (rxf->rxmode_active & BNA_RXMODE_PROMISC) {
+		promisc_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_PROMISC;
+	}
+
+}
+
+void
+rxf_reset_packet_filter_default(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+
+	/* 8. Clear pending default mode disable */
+	if (is_default_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		default_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT;
+		bna->rxf_default_id = BFI_MAX_RXF;
+	}
+
+	/* 9. Move default mode config from active -> pending */
+	if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) {
+		default_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT;
+	}
+}
+
+void
+rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf)
+{
+	/* 10. Clear pending allmulti mode disable */
+	if (is_allmulti_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		allmulti_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI;
+	}
+
+	/* 11. Move allmulti mode config from active -> pending */
+	if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) {
+		allmulti_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI;
+	}
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_promisc_enable(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+	int ret = 0;
+
+	/* There can not be any pending disable command */
+
+	/* Do nothing if pending enable or already enabled */
+	if (is_promisc_enable(rxf->rxmode_pending,
+			rxf->rxmode_pending_bitmask) ||
+			(rxf->rxmode_active & BNA_RXMODE_PROMISC)) {
+		/* Schedule enable */
+	} else {
+		/* Promisc mode should not be active in the system */
+		promisc_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		bna->rxf_promisc_id = rxf->rxf_id;
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_promisc_disable(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+	int ret = 0;
+
+	/* There can not be any pending disable */
+
+	/* Turn off pending enable command , if any */
+	if (is_promisc_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* Promisc mode should not be active */
+		/* system promisc state should be pending */
+		promisc_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		/* Remove the promisc state from the system */
+		bna->rxf_promisc_id = BFI_MAX_RXF;
+
+		/* Schedule disable */
+	} else if (rxf->rxmode_active & BNA_RXMODE_PROMISC) {
+		/* Promisc mode should be active in the system */
+		promisc_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		ret = 1;
+
+	/* Do nothing if already disabled */
+	} else {
+	}
+
+	return ret;
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_default_enable(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+	int ret = 0;
+
+	/* There can not be any pending disable command */
+
+	/* Do nothing if pending enable or already enabled */
+	if (is_default_enable(rxf->rxmode_pending,
+		rxf->rxmode_pending_bitmask) ||
+		(rxf->rxmode_active & BNA_RXMODE_DEFAULT)) {
+		/* Schedule enable */
+	} else {
+		/* Default mode should not be active in the system */
+		default_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		bna->rxf_default_id = rxf->rxf_id;
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_default_disable(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+	int ret = 0;
+
+	/* There can not be any pending disable */
+
+	/* Turn off pending enable command , if any */
+	if (is_default_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* Promisc mode should not be active */
+		/* system default state should be pending */
+		default_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		/* Remove the default state from the system */
+		bna->rxf_default_id = BFI_MAX_RXF;
+
+	/* Schedule disable */
+	} else if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) {
+		/* Default mode should be active in the system */
+		default_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		ret = 1;
+
+	/* Do nothing if already disabled */
+	} else {
+	}
+
+	return ret;
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_allmulti_enable(struct bna_rxf *rxf)
+{
+	int ret = 0;
+
+	/* There can not be any pending disable command */
+
+	/* Do nothing if pending enable or already enabled */
+	if (is_allmulti_enable(rxf->rxmode_pending,
+			rxf->rxmode_pending_bitmask) ||
+			(rxf->rxmode_active & BNA_RXMODE_ALLMULTI)) {
+		/* Schedule enable */
+	} else {
+		allmulti_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/**
+ * Should only be called by bna_rxf_mode_set.
+ * Helps deciding if h/w configuration is needed or not.
+ *  Returns:
+ *	0 = no h/w change
+ *	1 = need h/w change
+ */
+int
+rxf_allmulti_disable(struct bna_rxf *rxf)
+{
+	int ret = 0;
+
+	/* There can not be any pending disable */
+
+	/* Turn off pending enable command , if any */
+	if (is_allmulti_enable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask)) {
+		/* Allmulti mode should not be active */
+		allmulti_inactive(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+
+	/* Schedule disable */
+	} else if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) {
+		allmulti_disable(rxf->rxmode_pending,
+				rxf->rxmode_pending_bitmask);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/* RxF <- bnad */
+void
+bna_rx_mcast_delall(struct bna_rx *rx,
+		    void (*cbfn)(struct bnad *, struct bna_rx *,
+				 enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head *qe;
+	struct bna_mac *mac;
+	int need_hw_config = 0;
+
+	/* Purge all entries from pending_add_q */
+	while (!list_empty(&rxf->mcast_pending_add_q)) {
+		bfa_q_deq(&rxf->mcast_pending_add_q, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+	}
+
+	/* Schedule all entries in active_q for deletion */
+	while (!list_empty(&rxf->mcast_active_q)) {
+		bfa_q_deq(&rxf->mcast_active_q, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		list_add_tail(&mac->qe, &rxf->mcast_pending_del_q);
+		need_hw_config = 1;
+	}
+
+	if (need_hw_config) {
+		rxf->cam_fltr_cbfn = cbfn;
+		rxf->cam_fltr_cbarg = rx->bna->bnad;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+		return;
+	}
+
+	if (cbfn)
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+}
+
+/* RxF <- Rx */
+void
+bna_rx_receive_resume(struct bna_rx *rx,
+		      void (*cbfn)(struct bnad *, struct bna_rx *,
+				   enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) {
+		rxf->oper_state_cbfn = cbfn;
+		rxf->oper_state_cbarg = rx->bna->bnad;
+		bfa_fsm_send_event(rxf, RXF_E_RESUME);
+	} else if (cbfn)
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+}
+
+void
+bna_rx_receive_pause(struct bna_rx *rx,
+		     void (*cbfn)(struct bnad *, struct bna_rx *,
+				  enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_RUNNING) {
+		rxf->oper_state_cbfn = cbfn;
+		rxf->oper_state_cbarg = rx->bna->bnad;
+		bfa_fsm_send_event(rxf, RXF_E_PAUSE);
+	} else if (cbfn)
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+}
+
+/* RxF <- bnad */
+enum bna_cb_status
+bna_rx_ucast_add(struct bna_rx *rx, u8 *addr,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head *qe;
+	struct bna_mac *mac;
+
+	/* Check if already added */
+	list_for_each(qe, &rxf->ucast_active_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	/* Check if pending addition */
+	list_for_each(qe, &rxf->ucast_pending_add_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	mac = bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod);
+	if (mac == NULL)
+		return BNA_CB_UCAST_CAM_FULL;
+	bfa_q_qe_init(&mac->qe);
+	memcpy(mac->addr, addr, ETH_ALEN);
+	list_add_tail(&mac->qe, &rxf->ucast_pending_add_q);
+
+	rxf->cam_fltr_cbfn = cbfn;
+	rxf->cam_fltr_cbarg = rx->bna->bnad;
+
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+
+	return BNA_CB_SUCCESS;
+}
+
+/* RxF <- bnad */
+enum bna_cb_status
+bna_rx_ucast_del(struct bna_rx *rx, u8 *addr,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head *qe;
+	struct bna_mac *mac;
+
+	list_for_each(qe, &rxf->ucast_pending_add_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			list_del(qe);
+			bfa_q_qe_init(qe);
+			bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac);
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	list_for_each(qe, &rxf->ucast_active_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			list_del(qe);
+			bfa_q_qe_init(qe);
+			list_add_tail(qe, &rxf->ucast_pending_del_q);
+			rxf->cam_fltr_cbfn = cbfn;
+			rxf->cam_fltr_cbarg = rx->bna->bnad;
+			bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	return BNA_CB_INVALID_MAC;
+}
+
+/* RxF <- bnad */
+enum bna_cb_status
+bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode new_mode,
+		enum bna_rxmode bitmask,
+		void (*cbfn)(struct bnad *, struct bna_rx *,
+			     enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	int need_hw_config = 0;
+
+	/* Error checks */
+
+	if (is_promisc_enable(new_mode, bitmask)) {
+		/* If promisc mode is already enabled elsewhere in the system */
+		if ((rx->bna->rxf_promisc_id != BFI_MAX_RXF) &&
+			(rx->bna->rxf_promisc_id != rxf->rxf_id))
+			goto err_return;
+
+		/* If default mode is already enabled in the system */
+		if (rx->bna->rxf_default_id != BFI_MAX_RXF)
+			goto err_return;
+
+		/* Trying to enable promiscuous and default mode together */
+		if (is_default_enable(new_mode, bitmask))
+			goto err_return;
+	}
+
+	if (is_default_enable(new_mode, bitmask)) {
+		/* If default mode is already enabled elsewhere in the system */
+		if ((rx->bna->rxf_default_id != BFI_MAX_RXF) &&
+			(rx->bna->rxf_default_id != rxf->rxf_id)) {
+				goto err_return;
+		}
+
+		/* If promiscuous mode is already enabled in the system */
+		if (rx->bna->rxf_promisc_id != BFI_MAX_RXF)
+			goto err_return;
+	}
+
+	/* Process the commands */
+
+	if (is_promisc_enable(new_mode, bitmask)) {
+		if (rxf_promisc_enable(rxf))
+			need_hw_config = 1;
+	} else if (is_promisc_disable(new_mode, bitmask)) {
+		if (rxf_promisc_disable(rxf))
+			need_hw_config = 1;
+	}
+
+	if (is_default_enable(new_mode, bitmask)) {
+		if (rxf_default_enable(rxf))
+			need_hw_config = 1;
+	} else if (is_default_disable(new_mode, bitmask)) {
+		if (rxf_default_disable(rxf))
+			need_hw_config = 1;
+	}
+
+	if (is_allmulti_enable(new_mode, bitmask)) {
+		if (rxf_allmulti_enable(rxf))
+			need_hw_config = 1;
+	} else if (is_allmulti_disable(new_mode, bitmask)) {
+		if (rxf_allmulti_disable(rxf))
+			need_hw_config = 1;
+	}
+
+	/* Trigger h/w if needed */
+
+	if (need_hw_config) {
+		rxf->cam_fltr_cbfn = cbfn;
+		rxf->cam_fltr_cbarg = rx->bna->bnad;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	} else if (cbfn)
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+
+	return BNA_CB_SUCCESS;
+
+err_return:
+	return BNA_CB_FAIL;
+}
+
+/* RxF <- bnad */
+void
+bna_rx_rss_enable(struct bna_rx *rx)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING;
+	rxf->rss_status = BNA_STATUS_T_ENABLED;
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+}
+
+/* RxF <- bnad */
+void
+bna_rx_rss_disable(struct bna_rx *rx)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING;
+	rxf->rss_status = BNA_STATUS_T_DISABLED;
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+}
+
+/* RxF <- bnad */
+void
+bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING;
+	rxf->rss_status = BNA_STATUS_T_ENABLED;
+	rxf->rss_cfg = *rss_config;
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+}
+
+void
+/* RxF <- bnad */
+bna_rx_vlanfilter_enable(struct bna_rx *rx)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	if (rxf->vlan_filter_status == BNA_STATUS_T_DISABLED) {
+		rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING;
+		rxf->vlan_filter_status = BNA_STATUS_T_ENABLED;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	}
+}
+
+/* RxF <- bnad */
+void
+bna_rx_vlanfilter_disable(struct bna_rx *rx)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) {
+		rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING;
+		rxf->vlan_filter_status = BNA_STATUS_T_DISABLED;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	}
+}
+
+/* Rx */
+
+struct bna_rxp *
+bna_rx_get_rxp(struct bna_rx *rx, int vector)
+{
+	struct bna_rxp *rxp;
+	struct list_head *qe;
+
+	list_for_each(qe, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe;
+		if (rxp->vector == vector)
+			return rxp;
+	}
+	return NULL;
+}
+
+/*
+ * bna_rx_rss_rit_set()
+ * Sets the Q ids for the specified msi-x vectors in the RIT.
+ * Maximum rit size supported is 64, which should be the max size of the
+ * vectors array.
+ */
+
+void
+bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors, int nvectors)
+{
+	int i;
+	struct bna_rxp *rxp;
+	struct bna_rxq *q0 = NULL, *q1 = NULL;
+	struct bna *bna;
+	struct bna_rxf *rxf;
+
+	/* Build the RIT contents for this RX */
+	bna = rx->bna;
+
+	rxf = &rx->rxf;
+	for (i = 0; i < nvectors; i++) {
+		rxp = bna_rx_get_rxp(rx, vectors[i]);
+
+		GET_RXQS(rxp, q0, q1);
+		rxf->rit_segment->rit[i].large_rxq_id = q0->rxq_id;
+		rxf->rit_segment->rit[i].small_rxq_id = (q1 ? q1->rxq_id : 0);
+	}
+
+	rxf->rit_segment->rit_size = nvectors;
+
+	/* Subsequent call to enable/reconfig RSS will update the RIT in h/w */
+}
+
+/* Rx <- bnad */
+void
+bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo)
+{
+	struct bna_rxp *rxp;
+	struct list_head *qe;
+
+	list_for_each(qe, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe;
+		rxp->cq.ccb->rx_coalescing_timeo = coalescing_timeo;
+		bna_ib_coalescing_timeo_set(rxp->cq.ib, coalescing_timeo);
+	}
+}
+
+/* Rx <- bnad */
+void
+bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX])
+{
+	int i, j;
+
+	for (i = 0; i < BNA_LOAD_T_MAX; i++)
+		for (j = 0; j < BNA_BIAS_T_MAX; j++)
+			bna->rx_mod.dim_vector[i][j] = vector[i][j];
+}
+
+/* Rx <- bnad */
+void
+bna_rx_dim_update(struct bna_ccb *ccb)
+{
+	struct bna *bna = ccb->cq->rx->bna;
+	u32 load, bias;
+	u32 pkt_rt, small_rt, large_rt;
+	u8 coalescing_timeo;
+
+	if ((ccb->pkt_rate.small_pkt_cnt == 0) &&
+		(ccb->pkt_rate.large_pkt_cnt == 0))
+		return;
+
+	/* Arrive at preconfigured coalescing timeo value based on pkt rate */
+
+	small_rt = ccb->pkt_rate.small_pkt_cnt;
+	large_rt = ccb->pkt_rate.large_pkt_cnt;
+
+	pkt_rt = small_rt + large_rt;
+
+	if (pkt_rt < BNA_PKT_RATE_10K)
+		load = BNA_LOAD_T_LOW_4;
+	else if (pkt_rt < BNA_PKT_RATE_20K)
+		load = BNA_LOAD_T_LOW_3;
+	else if (pkt_rt < BNA_PKT_RATE_30K)
+		load = BNA_LOAD_T_LOW_2;
+	else if (pkt_rt < BNA_PKT_RATE_40K)
+		load = BNA_LOAD_T_LOW_1;
+	else if (pkt_rt < BNA_PKT_RATE_50K)
+		load = BNA_LOAD_T_HIGH_1;
+	else if (pkt_rt < BNA_PKT_RATE_60K)
+		load = BNA_LOAD_T_HIGH_2;
+	else if (pkt_rt < BNA_PKT_RATE_80K)
+		load = BNA_LOAD_T_HIGH_3;
+	else
+		load = BNA_LOAD_T_HIGH_4;
+
+	if (small_rt > (large_rt << 1))
+		bias = 0;
+	else
+		bias = 1;
+
+	ccb->pkt_rate.small_pkt_cnt = 0;
+	ccb->pkt_rate.large_pkt_cnt = 0;
+
+	coalescing_timeo = bna->rx_mod.dim_vector[load][bias];
+	ccb->rx_coalescing_timeo = coalescing_timeo;
+
+	/* Set it to IB */
+	bna_ib_coalescing_timeo_set(ccb->cq->ib, coalescing_timeo);
+}
+
+/* Tx */
+/* TX <- bnad */
+enum bna_cb_status
+bna_tx_prio_set(struct bna_tx *tx, int prio,
+		void (*cbfn)(struct bnad *, struct bna_tx *,
+			     enum bna_cb_status))
+{
+	if (tx->flags & BNA_TX_F_PRIO_LOCK)
+		return BNA_CB_FAIL;
+	else {
+		tx->prio_change_cbfn = cbfn;
+		bna_tx_prio_changed(tx, prio);
+	}
+
+	return BNA_CB_SUCCESS;
+}
+
+/* TX <- bnad */
+void
+bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo)
+{
+	struct bna_txq *txq;
+	struct list_head *qe;
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		bna_ib_coalescing_timeo_set(txq->ib, coalescing_timeo);
+	}
+}
+
+/*
+ * Private data
+ */
+
+struct bna_ritseg_pool_cfg {
+	u32	pool_size;
+	u32	pool_entry_size;
+};
+init_ritseg_pool(ritseg_pool_cfg);
+
+/*
+ * Private functions
+ */
+static void
+bna_ucam_mod_init(struct bna_ucam_mod *ucam_mod, struct bna *bna,
+		  struct bna_res_info *res_info)
+{
+	int i;
+
+	ucam_mod->ucmac = (struct bna_mac *)
+		res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mdl[0].kva;
+
+	INIT_LIST_HEAD(&ucam_mod->free_q);
+	for (i = 0; i < BFI_MAX_UCMAC; i++) {
+		bfa_q_qe_init(&ucam_mod->ucmac[i].qe);
+		list_add_tail(&ucam_mod->ucmac[i].qe, &ucam_mod->free_q);
+	}
+
+	ucam_mod->bna = bna;
+}
+
+static void
+bna_ucam_mod_uninit(struct bna_ucam_mod *ucam_mod)
+{
+	struct list_head *qe;
+	int i = 0;
+
+	list_for_each(qe, &ucam_mod->free_q)
+		i++;
+
+	ucam_mod->bna = NULL;
+}
+
+static void
+bna_mcam_mod_init(struct bna_mcam_mod *mcam_mod, struct bna *bna,
+		  struct bna_res_info *res_info)
+{
+	int i;
+
+	mcam_mod->mcmac = (struct bna_mac *)
+		res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mdl[0].kva;
+
+	INIT_LIST_HEAD(&mcam_mod->free_q);
+	for (i = 0; i < BFI_MAX_MCMAC; i++) {
+		bfa_q_qe_init(&mcam_mod->mcmac[i].qe);
+		list_add_tail(&mcam_mod->mcmac[i].qe, &mcam_mod->free_q);
+	}
+
+	mcam_mod->bna = bna;
+}
+
+static void
+bna_mcam_mod_uninit(struct bna_mcam_mod *mcam_mod)
+{
+	struct list_head *qe;
+	int i = 0;
+
+	list_for_each(qe, &mcam_mod->free_q)
+		i++;
+
+	mcam_mod->bna = NULL;
+}
+
+static void
+bna_rit_mod_init(struct bna_rit_mod *rit_mod,
+		struct bna_res_info *res_info)
+{
+	int i;
+	int j;
+	int count;
+	int offset;
+
+	rit_mod->rit = (struct bna_rit_entry *)
+		res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mdl[0].kva;
+	rit_mod->rit_segment = (struct bna_rit_segment *)
+		res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mdl[0].kva;
+
+	count = 0;
+	offset = 0;
+	for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) {
+		INIT_LIST_HEAD(&rit_mod->rit_seg_pool[i]);
+		for (j = 0; j < ritseg_pool_cfg[i].pool_size; j++) {
+			bfa_q_qe_init(&rit_mod->rit_segment[count].qe);
+			rit_mod->rit_segment[count].max_rit_size =
+					ritseg_pool_cfg[i].pool_entry_size;
+			rit_mod->rit_segment[count].rit_offset = offset;
+			rit_mod->rit_segment[count].rit =
+					&rit_mod->rit[offset];
+			list_add_tail(&rit_mod->rit_segment[count].qe,
+				&rit_mod->rit_seg_pool[i]);
+			count++;
+			offset += ritseg_pool_cfg[i].pool_entry_size;
+		}
+	}
+}
+
+static void
+bna_rit_mod_uninit(struct bna_rit_mod *rit_mod)
+{
+	struct bna_rit_segment *rit_segment;
+	struct list_head *qe;
+	int i;
+	int j;
+
+	for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) {
+		j = 0;
+		list_for_each(qe, &rit_mod->rit_seg_pool[i]) {
+			rit_segment = (struct bna_rit_segment *)qe;
+			j++;
+		}
+	}
+}
+
+/*
+ * Public functions
+ */
+
+/* Called during probe(), before calling bna_init() */
+void
+bna_res_req(struct bna_res_info *res_info)
+{
+	bna_adv_res_req(res_info);
+
+	/* DMA memory for retrieving IOC attributes */
+	res_info[BNA_RES_MEM_T_ATTR].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mem_type = BNA_MEM_T_DMA;
+	res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.len =
+				ALIGN(bfa_ioc_meminfo(), PAGE_SIZE);
+
+	/* DMA memory for index segment of an IB */
+	res_info[BNA_RES_MEM_T_IBIDX].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mem_type = BNA_MEM_T_DMA;
+	res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.len =
+				BFI_IBIDX_SIZE * BFI_IBIDX_MAX_SEGSIZE;
+	res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.num = BFI_MAX_IB;
+
+	/* Virtual memory for IB objects - stored by IB module */
+	res_info[BNA_RES_MEM_T_IB_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.len =
+				BFI_MAX_IB * sizeof(struct bna_ib);
+
+	/* Virtual memory for intr objects - stored by IB module */
+	res_info[BNA_RES_MEM_T_INTR_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.len =
+				BFI_MAX_IB * sizeof(struct bna_intr);
+
+	/* Virtual memory for idx_seg objects - stored by IB module */
+	res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.len =
+			BFI_IBIDX_TOTAL_SEGS * sizeof(struct bna_ibidx_seg);
+
+	/* Virtual memory for Tx objects - stored by Tx module */
+	res_info[BNA_RES_MEM_T_TX_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.len =
+			BFI_MAX_TXQ * sizeof(struct bna_tx);
+
+	/* Virtual memory for TxQ - stored by Tx module */
+	res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.len =
+			BFI_MAX_TXQ * sizeof(struct bna_txq);
+
+	/* Virtual memory for Rx objects - stored by Rx module */
+	res_info[BNA_RES_MEM_T_RX_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.len =
+			BFI_MAX_RXQ * sizeof(struct bna_rx);
+
+	/* Virtual memory for RxPath - stored by Rx module */
+	res_info[BNA_RES_MEM_T_RXP_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.len =
+			BFI_MAX_RXQ * sizeof(struct bna_rxp);
+
+	/* Virtual memory for RxQ - stored by Rx module */
+	res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.len =
+			BFI_MAX_RXQ * sizeof(struct bna_rxq);
+
+	/* Virtual memory for Unicast MAC address - stored by ucam module */
+	res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.len =
+			BFI_MAX_UCMAC * sizeof(struct bna_mac);
+
+	/* Virtual memory for Multicast MAC address - stored by mcam module */
+	res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.len =
+			BFI_MAX_MCMAC * sizeof(struct bna_mac);
+
+	/* Virtual memory for RIT entries */
+	res_info[BNA_RES_MEM_T_RIT_ENTRY].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.len =
+			BFI_MAX_RIT_SIZE * sizeof(struct bna_rit_entry);
+
+	/* Virtual memory for RIT segment table */
+	res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_type = BNA_RES_T_MEM;
+	res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mem_type =
+								BNA_MEM_T_KVA;
+	res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.num = 1;
+	res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.len =
+			BFI_RIT_TOTAL_SEGS * sizeof(struct bna_rit_segment);
+
+	/* Interrupt resource for mailbox interrupt */
+	res_info[BNA_RES_INTR_T_MBOX].res_type = BNA_RES_T_INTR;
+	res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type =
+							BNA_INTR_T_MSIX;
+	res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.num = 1;
+}
+
+/* Called during probe() */
+void
+bna_init(struct bna *bna, struct bnad *bnad, struct bfa_pcidev *pcidev,
+		struct bna_res_info *res_info)
+{
+	bna->bnad = bnad;
+	bna->pcidev = *pcidev;
+
+	bna->stats.hw_stats = (struct bfi_ll_stats *)
+		res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].kva;
+	bna->hw_stats_dma.msb =
+		res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.msb;
+	bna->hw_stats_dma.lsb =
+		res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.lsb;
+	bna->stats.sw_stats = (struct bna_sw_stats *)
+		res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mdl[0].kva;
+
+	bna->regs.page_addr = bna->pcidev.pci_bar_kva +
+				reg_offset[bna->pcidev.pci_func].page_addr;
+	bna->regs.fn_int_status = bna->pcidev.pci_bar_kva +
+				reg_offset[bna->pcidev.pci_func].fn_int_status;
+	bna->regs.fn_int_mask = bna->pcidev.pci_bar_kva +
+				reg_offset[bna->pcidev.pci_func].fn_int_mask;
+
+	if (bna->pcidev.pci_func < 3)
+		bna->port_num = 0;
+	else
+		bna->port_num = 1;
+
+	/* Also initializes diag, cee, sfp, phy_port and mbox_mod */
+	bna_device_init(&bna->device, bna, res_info);
+
+	bna_port_init(&bna->port, bna);
+
+	bna_tx_mod_init(&bna->tx_mod, bna, res_info);
+
+	bna_rx_mod_init(&bna->rx_mod, bna, res_info);
+
+	bna_ib_mod_init(&bna->ib_mod, bna, res_info);
+
+	bna_rit_mod_init(&bna->rit_mod, res_info);
+
+	bna_ucam_mod_init(&bna->ucam_mod, bna, res_info);
+
+	bna_mcam_mod_init(&bna->mcam_mod, bna, res_info);
+
+	bna->rxf_default_id = BFI_MAX_RXF;
+	bna->rxf_promisc_id = BFI_MAX_RXF;
+
+	/* Mbox q element for posting stat request to f/w */
+	bfa_q_qe_init(&bna->mbox_qe.qe);
+}
+
+void
+bna_uninit(struct bna *bna)
+{
+	bna_mcam_mod_uninit(&bna->mcam_mod);
+
+	bna_ucam_mod_uninit(&bna->ucam_mod);
+
+	bna_rit_mod_uninit(&bna->rit_mod);
+
+	bna_ib_mod_uninit(&bna->ib_mod);
+
+	bna_rx_mod_uninit(&bna->rx_mod);
+
+	bna_tx_mod_uninit(&bna->tx_mod);
+
+	bna_port_uninit(&bna->port);
+
+	bna_device_uninit(&bna->device);
+
+	bna->bnad = NULL;
+}
+
+struct bna_mac *
+bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod)
+{
+	struct list_head *qe;
+
+	if (list_empty(&ucam_mod->free_q))
+		return NULL;
+
+	bfa_q_deq(&ucam_mod->free_q, &qe);
+
+	return (struct bna_mac *)qe;
+}
+
+void
+bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod, struct bna_mac *mac)
+{
+	list_add_tail(&mac->qe, &ucam_mod->free_q);
+}
+
+struct bna_mac *
+bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod)
+{
+	struct list_head *qe;
+
+	if (list_empty(&mcam_mod->free_q))
+		return NULL;
+
+	bfa_q_deq(&mcam_mod->free_q, &qe);
+
+	return (struct bna_mac *)qe;
+}
+
+void
+bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod, struct bna_mac *mac)
+{
+	list_add_tail(&mac->qe, &mcam_mod->free_q);
+}
+
+/**
+ * Note: This should be called in the same locking context as the call to
+ * bna_rit_mod_seg_get()
+ */
+int
+bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size)
+{
+	int i;
+
+	/* Select the pool for seg_size */
+	for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) {
+		if (seg_size <= ritseg_pool_cfg[i].pool_entry_size)
+			break;
+	}
+
+	if (i == BFI_RIT_SEG_TOTAL_POOLS)
+		return 0;
+
+	if (list_empty(&rit_mod->rit_seg_pool[i]))
+		return 0;
+
+	return 1;
+}
+
+struct bna_rit_segment *
+bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size)
+{
+	struct bna_rit_segment *seg;
+	struct list_head *qe;
+	int i;
+
+	/* Select the pool for seg_size */
+	for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) {
+		if (seg_size <= ritseg_pool_cfg[i].pool_entry_size)
+			break;
+	}
+
+	if (i == BFI_RIT_SEG_TOTAL_POOLS)
+		return NULL;
+
+	if (list_empty(&rit_mod->rit_seg_pool[i]))
+		return NULL;
+
+	bfa_q_deq(&rit_mod->rit_seg_pool[i], &qe);
+	seg = (struct bna_rit_segment *)qe;
+	bfa_q_qe_init(&seg->qe);
+	seg->rit_size = seg_size;
+
+	return seg;
+}
+
+void
+bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod,
+			struct bna_rit_segment *seg)
+{
+	int i;
+
+	/* Select the pool for seg->max_rit_size */
+	for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) {
+		if (seg->max_rit_size == ritseg_pool_cfg[i].pool_entry_size)
+			break;
+	}
+
+	seg->rit_size = 0;
+	list_add_tail(&seg->qe, &rit_mod->rit_seg_pool[i]);
+}
diff --git a/drivers/net/bna/bna_hw.h b/drivers/net/bna/bna_hw.h
new file mode 100644
index 000000000000..67eb376c5c7e
--- /dev/null
+++ b/drivers/net/bna/bna_hw.h
@@ -0,0 +1,1491 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ *
+ * File for interrupt macros and functions
+ */
+
+#ifndef __BNA_HW_H__
+#define __BNA_HW_H__
+
+#include "bfi_ctreg.h"
+
+/**
+ *
+ * SW imposed limits
+ *
+ */
+
+#ifndef BNA_BIOS_BUILD
+
+#define BFI_MAX_TXQ			64
+#define BFI_MAX_RXQ			64
+#define	BFI_MAX_RXF			64
+#define BFI_MAX_IB			128
+#define	BFI_MAX_RIT_SIZE		256
+#define	BFI_RSS_RIT_SIZE		64
+#define	BFI_NONRSS_RIT_SIZE		1
+#define BFI_MAX_UCMAC			256
+#define BFI_MAX_MCMAC			512
+#define BFI_IBIDX_SIZE			4
+#define BFI_MAX_VLAN			4095
+
+/**
+ * There are 2 free IB index pools:
+ *	pool1: 120 segments of 1 index each
+ *	pool8: 1 segment of 8 indexes
+ */
+#define BFI_IBIDX_POOL1_SIZE		116
+#define	BFI_IBIDX_POOL1_ENTRY_SIZE	1
+#define BFI_IBIDX_POOL2_SIZE		2
+#define	BFI_IBIDX_POOL2_ENTRY_SIZE	2
+#define	BFI_IBIDX_POOL8_SIZE		1
+#define	BFI_IBIDX_POOL8_ENTRY_SIZE	8
+#define	BFI_IBIDX_TOTAL_POOLS		3
+#define	BFI_IBIDX_TOTAL_SEGS		119 /* (POOL1 + POOL2 + POOL8)_SIZE */
+#define	BFI_IBIDX_MAX_SEGSIZE		8
+#define init_ibidx_pool(name)						\
+static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] =		\
+{									\
+	{ BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE },		\
+	{ BFI_IBIDX_POOL2_SIZE, BFI_IBIDX_POOL2_ENTRY_SIZE },		\
+	{ BFI_IBIDX_POOL8_SIZE, BFI_IBIDX_POOL8_ENTRY_SIZE }		\
+}
+
+/**
+ * There are 2 free RIT segment pools:
+ * 	Pool1: 192 segments of 1 RIT entry each
+ *	Pool2: 1 segment of 64 RIT entry
+ */
+#define BFI_RIT_SEG_POOL1_SIZE		192
+#define BFI_RIT_SEG_POOL1_ENTRY_SIZE	1
+#define BFI_RIT_SEG_POOLRSS_SIZE	1
+#define BFI_RIT_SEG_POOLRSS_ENTRY_SIZE	64
+#define BFI_RIT_SEG_TOTAL_POOLS		2
+#define BFI_RIT_TOTAL_SEGS		193 /* POOL1_SIZE + POOLRSS_SIZE */
+#define init_ritseg_pool(name)						\
+static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] =	\
+{									\
+	{ BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE },	\
+	{ BFI_RIT_SEG_POOLRSS_SIZE, BFI_RIT_SEG_POOLRSS_ENTRY_SIZE }	\
+}
+
+#else /* BNA_BIOS_BUILD */
+
+#define BFI_MAX_TXQ			1
+#define BFI_MAX_RXQ			1
+#define	BFI_MAX_RXF			1
+#define BFI_MAX_IB			2
+#define	BFI_MAX_RIT_SIZE		2
+#define	BFI_RSS_RIT_SIZE		64
+#define	BFI_NONRSS_RIT_SIZE		1
+#define BFI_MAX_UCMAC			1
+#define BFI_MAX_MCMAC			8
+#define BFI_IBIDX_SIZE			4
+#define BFI_MAX_VLAN			4095
+/* There is one free pool: 2 segments of 1 index each */
+#define BFI_IBIDX_POOL1_SIZE		2
+#define	BFI_IBIDX_POOL1_ENTRY_SIZE	1
+#define	BFI_IBIDX_TOTAL_POOLS		1
+#define	BFI_IBIDX_TOTAL_SEGS		2 /* POOL1_SIZE */
+#define	BFI_IBIDX_MAX_SEGSIZE		1
+#define init_ibidx_pool(name)						\
+static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] =		\
+{									\
+	{ BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE }		\
+}
+
+#define BFI_RIT_SEG_POOL1_SIZE		1
+#define BFI_RIT_SEG_POOL1_ENTRY_SIZE	1
+#define BFI_RIT_SEG_TOTAL_POOLS		1
+#define BFI_RIT_TOTAL_SEGS		1 /* POOL1_SIZE */
+#define init_ritseg_pool(name)						\
+static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] =	\
+{									\
+	{ BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE }	\
+}
+
+#endif /* BNA_BIOS_BUILD */
+
+#define BFI_RSS_HASH_KEY_LEN		10
+
+#define BFI_COALESCING_TIMER_UNIT	5	/* 5us */
+#define BFI_MAX_COALESCING_TIMEO	0xFF	/* in 5us units */
+#define BFI_MAX_INTERPKT_COUNT		0xFF
+#define BFI_MAX_INTERPKT_TIMEO		0xF	/* in 0.5us units */
+#define BFI_TX_COALESCING_TIMEO		20	/* 20 * 5 = 100us */
+#define BFI_TX_INTERPKT_COUNT		32
+#define	BFI_RX_COALESCING_TIMEO		12	/* 12 * 5 = 60us */
+#define	BFI_RX_INTERPKT_COUNT		6	/* Pkt Cnt = 6 */
+#define	BFI_RX_INTERPKT_TIMEO		3	/* 3 * 0.5 = 1.5us */
+
+#define BFI_TXQ_WI_SIZE			64	/* bytes */
+#define BFI_RXQ_WI_SIZE			8	/* bytes */
+#define BFI_CQ_WI_SIZE			16	/* bytes */
+#define BFI_TX_MAX_WRR_QUOTA		0xFFF
+
+#define BFI_TX_MAX_VECTORS_PER_WI	4
+#define BFI_TX_MAX_VECTORS_PER_PKT	0xFF
+#define BFI_TX_MAX_DATA_PER_VECTOR	0xFFFF
+#define BFI_TX_MAX_DATA_PER_PKT		0xFFFFFF
+
+/* Small Q buffer size */
+#define BFI_SMALL_RXBUF_SIZE		128
+
+/* Defined separately since BFA_FLASH_DMA_BUF_SZ is in bfa_flash.c */
+#define BFI_FLASH_DMA_BUF_SZ		0x010000 /* 64K DMA */
+#define BFI_HW_STATS_SIZE		0x4000 /* 16K DMA */
+
+/**
+ *
+ * HW register offsets, macros
+ *
+ */
+
+/* DMA Block Register Host Window Start Address */
+#define DMA_BLK_REG_ADDR		0x00013000
+
+/* DMA Block Internal Registers */
+#define DMA_CTRL_REG0			(DMA_BLK_REG_ADDR + 0x000)
+#define DMA_CTRL_REG1			(DMA_BLK_REG_ADDR + 0x004)
+#define DMA_ERR_INT_STATUS		(DMA_BLK_REG_ADDR + 0x008)
+#define DMA_ERR_INT_ENABLE		(DMA_BLK_REG_ADDR + 0x00c)
+#define DMA_ERR_INT_STATUS_SET		(DMA_BLK_REG_ADDR + 0x010)
+
+/* APP Block Register Address Offset from BAR0 */
+#define APP_BLK_REG_ADDR		0x00014000
+
+/* Host Function Interrupt Mask Registers */
+#define HOSTFN0_INT_MASK		(APP_BLK_REG_ADDR + 0x004)
+#define HOSTFN1_INT_MASK		(APP_BLK_REG_ADDR + 0x104)
+#define HOSTFN2_INT_MASK		(APP_BLK_REG_ADDR + 0x304)
+#define HOSTFN3_INT_MASK		(APP_BLK_REG_ADDR + 0x404)
+
+/**
+ * Host Function PCIe Error Registers
+ * Duplicates "Correctable" & "Uncorrectable"
+ * registers in PCIe Config space.
+ */
+#define FN0_PCIE_ERR_REG		(APP_BLK_REG_ADDR + 0x014)
+#define FN1_PCIE_ERR_REG		(APP_BLK_REG_ADDR + 0x114)
+#define FN2_PCIE_ERR_REG		(APP_BLK_REG_ADDR + 0x314)
+#define FN3_PCIE_ERR_REG		(APP_BLK_REG_ADDR + 0x414)
+
+/* Host Function Error Type Status Registers */
+#define FN0_ERR_TYPE_STATUS_REG		(APP_BLK_REG_ADDR + 0x018)
+#define FN1_ERR_TYPE_STATUS_REG		(APP_BLK_REG_ADDR + 0x118)
+#define FN2_ERR_TYPE_STATUS_REG		(APP_BLK_REG_ADDR + 0x318)
+#define FN3_ERR_TYPE_STATUS_REG		(APP_BLK_REG_ADDR + 0x418)
+
+/* Host Function Error Type Mask Registers */
+#define FN0_ERR_TYPE_MSK_STATUS_REG	(APP_BLK_REG_ADDR + 0x01c)
+#define FN1_ERR_TYPE_MSK_STATUS_REG	(APP_BLK_REG_ADDR + 0x11c)
+#define FN2_ERR_TYPE_MSK_STATUS_REG	(APP_BLK_REG_ADDR + 0x31c)
+#define FN3_ERR_TYPE_MSK_STATUS_REG	(APP_BLK_REG_ADDR + 0x41c)
+
+/* Catapult Host Semaphore Status Registers (App block) */
+#define HOST_SEM_STS0_REG		(APP_BLK_REG_ADDR + 0x630)
+#define HOST_SEM_STS1_REG		(APP_BLK_REG_ADDR + 0x634)
+#define HOST_SEM_STS2_REG		(APP_BLK_REG_ADDR + 0x638)
+#define HOST_SEM_STS3_REG		(APP_BLK_REG_ADDR + 0x63c)
+#define HOST_SEM_STS4_REG		(APP_BLK_REG_ADDR + 0x640)
+#define HOST_SEM_STS5_REG		(APP_BLK_REG_ADDR + 0x644)
+#define HOST_SEM_STS6_REG		(APP_BLK_REG_ADDR + 0x648)
+#define HOST_SEM_STS7_REG		(APP_BLK_REG_ADDR + 0x64c)
+
+/* PCIe Misc Register */
+#define PCIE_MISC_REG			(APP_BLK_REG_ADDR + 0x200)
+
+/* Temp Sensor Control Registers */
+#define TEMPSENSE_CNTL_REG		(APP_BLK_REG_ADDR + 0x250)
+#define TEMPSENSE_STAT_REG		(APP_BLK_REG_ADDR + 0x254)
+
+/* APP Block local error registers */
+#define APP_LOCAL_ERR_STAT		(APP_BLK_REG_ADDR + 0x258)
+#define APP_LOCAL_ERR_MSK		(APP_BLK_REG_ADDR + 0x25c)
+
+/* PCIe Link Error registers */
+#define PCIE_LNK_ERR_STAT		(APP_BLK_REG_ADDR + 0x260)
+#define PCIE_LNK_ERR_MSK		(APP_BLK_REG_ADDR + 0x264)
+
+/**
+ * FCoE/FIP Ethertype Register
+ * 31:16 -- Chip wide value for FIP type
+ * 15:0  -- Chip wide value for FCoE type
+ */
+#define FCOE_FIP_ETH_TYPE		(APP_BLK_REG_ADDR + 0x280)
+
+/**
+ * Reserved Ethertype Register
+ * 31:16 -- Reserved
+ * 15:0  -- Other ethertype
+ */
+#define RESV_ETH_TYPE			(APP_BLK_REG_ADDR + 0x284)
+
+/**
+ * Host Command Status Registers
+ * Each set consists of 3 registers :
+ * clear, set, cmd
+ * 16 such register sets in all
+ * See catapult_spec.pdf for detailed functionality
+ * Put each type in a single macro accessed by _num ?
+ */
+#define HOST_CMDSTS0_CLR_REG		(APP_BLK_REG_ADDR + 0x500)
+#define HOST_CMDSTS0_SET_REG		(APP_BLK_REG_ADDR + 0x504)
+#define HOST_CMDSTS0_REG		(APP_BLK_REG_ADDR + 0x508)
+#define HOST_CMDSTS1_CLR_REG		(APP_BLK_REG_ADDR + 0x510)
+#define HOST_CMDSTS1_SET_REG		(APP_BLK_REG_ADDR + 0x514)
+#define HOST_CMDSTS1_REG		(APP_BLK_REG_ADDR + 0x518)
+#define HOST_CMDSTS2_CLR_REG		(APP_BLK_REG_ADDR + 0x520)
+#define HOST_CMDSTS2_SET_REG		(APP_BLK_REG_ADDR + 0x524)
+#define HOST_CMDSTS2_REG		(APP_BLK_REG_ADDR + 0x528)
+#define HOST_CMDSTS3_CLR_REG		(APP_BLK_REG_ADDR + 0x530)
+#define HOST_CMDSTS3_SET_REG		(APP_BLK_REG_ADDR + 0x534)
+#define HOST_CMDSTS3_REG		(APP_BLK_REG_ADDR + 0x538)
+#define HOST_CMDSTS4_CLR_REG		(APP_BLK_REG_ADDR + 0x540)
+#define HOST_CMDSTS4_SET_REG		(APP_BLK_REG_ADDR + 0x544)
+#define HOST_CMDSTS4_REG		(APP_BLK_REG_ADDR + 0x548)
+#define HOST_CMDSTS5_CLR_REG		(APP_BLK_REG_ADDR + 0x550)
+#define HOST_CMDSTS5_SET_REG		(APP_BLK_REG_ADDR + 0x554)
+#define HOST_CMDSTS5_REG		(APP_BLK_REG_ADDR + 0x558)
+#define HOST_CMDSTS6_CLR_REG		(APP_BLK_REG_ADDR + 0x560)
+#define HOST_CMDSTS6_SET_REG		(APP_BLK_REG_ADDR + 0x564)
+#define HOST_CMDSTS6_REG		(APP_BLK_REG_ADDR + 0x568)
+#define HOST_CMDSTS7_CLR_REG		(APP_BLK_REG_ADDR + 0x570)
+#define HOST_CMDSTS7_SET_REG		(APP_BLK_REG_ADDR + 0x574)
+#define HOST_CMDSTS7_REG		(APP_BLK_REG_ADDR + 0x578)
+#define HOST_CMDSTS8_CLR_REG		(APP_BLK_REG_ADDR + 0x580)
+#define HOST_CMDSTS8_SET_REG		(APP_BLK_REG_ADDR + 0x584)
+#define HOST_CMDSTS8_REG		(APP_BLK_REG_ADDR + 0x588)
+#define HOST_CMDSTS9_CLR_REG		(APP_BLK_REG_ADDR + 0x590)
+#define HOST_CMDSTS9_SET_REG		(APP_BLK_REG_ADDR + 0x594)
+#define HOST_CMDSTS9_REG		(APP_BLK_REG_ADDR + 0x598)
+#define HOST_CMDSTS10_CLR_REG		(APP_BLK_REG_ADDR + 0x5A0)
+#define HOST_CMDSTS10_SET_REG		(APP_BLK_REG_ADDR + 0x5A4)
+#define HOST_CMDSTS10_REG		(APP_BLK_REG_ADDR + 0x5A8)
+#define HOST_CMDSTS11_CLR_REG		(APP_BLK_REG_ADDR + 0x5B0)
+#define HOST_CMDSTS11_SET_REG		(APP_BLK_REG_ADDR + 0x5B4)
+#define HOST_CMDSTS11_REG		(APP_BLK_REG_ADDR + 0x5B8)
+#define HOST_CMDSTS12_CLR_REG		(APP_BLK_REG_ADDR + 0x5C0)
+#define HOST_CMDSTS12_SET_REG		(APP_BLK_REG_ADDR + 0x5C4)
+#define HOST_CMDSTS12_REG		(APP_BLK_REG_ADDR + 0x5C8)
+#define HOST_CMDSTS13_CLR_REG		(APP_BLK_REG_ADDR + 0x5D0)
+#define HOST_CMDSTS13_SET_REG		(APP_BLK_REG_ADDR + 0x5D4)
+#define HOST_CMDSTS13_REG		(APP_BLK_REG_ADDR + 0x5D8)
+#define HOST_CMDSTS14_CLR_REG		(APP_BLK_REG_ADDR + 0x5E0)
+#define HOST_CMDSTS14_SET_REG		(APP_BLK_REG_ADDR + 0x5E4)
+#define HOST_CMDSTS14_REG		(APP_BLK_REG_ADDR + 0x5E8)
+#define HOST_CMDSTS15_CLR_REG		(APP_BLK_REG_ADDR + 0x5F0)
+#define HOST_CMDSTS15_SET_REG		(APP_BLK_REG_ADDR + 0x5F4)
+#define HOST_CMDSTS15_REG		(APP_BLK_REG_ADDR + 0x5F8)
+
+/**
+ * LPU0 Block Register Address Offset from BAR0
+ * Range 0x18000 - 0x18033
+ */
+#define LPU0_BLK_REG_ADDR		0x00018000
+
+/**
+ * LPU0 Registers
+ * Should they be directly used from host,
+ * except for diagnostics ?
+ * CTL_REG : Control register
+ * CMD_REG : Triggers exec. of cmd. in
+ *           Mailbox memory
+ */
+#define LPU0_MBOX_CTL_REG		(LPU0_BLK_REG_ADDR + 0x000)
+#define LPU0_MBOX_CMD_REG		(LPU0_BLK_REG_ADDR + 0x004)
+#define LPU0_MBOX_LINK_0REG		(LPU0_BLK_REG_ADDR + 0x008)
+#define LPU1_MBOX_LINK_0REG		(LPU0_BLK_REG_ADDR + 0x00c)
+#define LPU0_MBOX_STATUS_0REG		(LPU0_BLK_REG_ADDR + 0x010)
+#define LPU1_MBOX_STATUS_0REG		(LPU0_BLK_REG_ADDR + 0x014)
+#define LPU0_ERR_STATUS_REG		(LPU0_BLK_REG_ADDR + 0x018)
+#define LPU0_ERR_SET_REG		(LPU0_BLK_REG_ADDR + 0x020)
+
+/**
+ * LPU1 Block Register Address Offset from BAR0
+ * Range 0x18400 - 0x18433
+ */
+#define LPU1_BLK_REG_ADDR		0x00018400
+
+/**
+ * LPU1 Registers
+ * Same as LPU0 registers above
+ */
+#define LPU1_MBOX_CTL_REG		(LPU1_BLK_REG_ADDR + 0x000)
+#define LPU1_MBOX_CMD_REG		(LPU1_BLK_REG_ADDR + 0x004)
+#define LPU0_MBOX_LINK_1REG		(LPU1_BLK_REG_ADDR + 0x008)
+#define LPU1_MBOX_LINK_1REG		(LPU1_BLK_REG_ADDR + 0x00c)
+#define LPU0_MBOX_STATUS_1REG		(LPU1_BLK_REG_ADDR + 0x010)
+#define LPU1_MBOX_STATUS_1REG		(LPU1_BLK_REG_ADDR + 0x014)
+#define LPU1_ERR_STATUS_REG		(LPU1_BLK_REG_ADDR + 0x018)
+#define LPU1_ERR_SET_REG		(LPU1_BLK_REG_ADDR + 0x020)
+
+/**
+ * PSS Block Register Address Offset from BAR0
+ * Range 0x18800 - 0x188DB
+ */
+#define PSS_BLK_REG_ADDR		0x00018800
+
+/**
+ * PSS Registers
+ * For details, see catapult_spec.pdf
+ * ERR_STATUS_REG : Indicates error in PSS module
+ * RAM_ERR_STATUS_REG : Indicates RAM module that detected error
+ */
+#define ERR_STATUS_SET			(PSS_BLK_REG_ADDR + 0x018)
+#define PSS_RAM_ERR_STATUS_REG		(PSS_BLK_REG_ADDR + 0x01C)
+
+/**
+ * PSS Semaphore Lock Registers, total 16
+ * First read when unlocked returns 0,
+ * and is set to 1, atomically.
+ * Subsequent reads returns 1.
+ * To clear set the value to 0.
+ * Range : 0x20 to 0x5c
+ */
+#define PSS_SEM_LOCK_REG(_num) 		\
+	(PSS_BLK_REG_ADDR + 0x020 + ((_num) << 2))
+
+/**
+ * PSS Semaphore Status Registers,
+ * corresponding to the lock registers above
+ */
+#define PSS_SEM_STATUS_REG(_num) 		\
+	(PSS_BLK_REG_ADDR + 0x060 + ((_num) << 2))
+
+/**
+ * Catapult CPQ Registers
+ * Defines for Mailbox Registers
+ * Used to send mailbox commands to firmware from
+ * host. The data part is written to the MBox
+ * memory, registers are used to indicate that
+ * a commnad is resident in memory.
+ *
+ * Note : LPU0<->LPU1 mailboxes are not listed here
+ */
+#define CPQ_BLK_REG_ADDR		0x00019000
+
+#define HOSTFN0_LPU0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x130)
+#define HOSTFN0_LPU1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x134)
+#define LPU0_HOSTFN0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x138)
+#define LPU1_HOSTFN0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x13C)
+
+#define HOSTFN1_LPU0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x140)
+#define HOSTFN1_LPU1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x144)
+#define LPU0_HOSTFN1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x148)
+#define LPU1_HOSTFN1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x14C)
+
+#define HOSTFN2_LPU0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x170)
+#define HOSTFN2_LPU1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x174)
+#define LPU0_HOSTFN2_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x178)
+#define LPU1_HOSTFN2_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x17C)
+
+#define HOSTFN3_LPU0_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x180)
+#define HOSTFN3_LPU1_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x184)
+#define LPU0_HOSTFN3_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x188)
+#define LPU1_HOSTFN3_MBOX1_CMD_STAT	(CPQ_BLK_REG_ADDR + 0x18C)
+
+/* Host Function Force Parity Error Registers */
+#define HOSTFN0_LPU_FORCE_PERR		(CPQ_BLK_REG_ADDR + 0x120)
+#define HOSTFN1_LPU_FORCE_PERR		(CPQ_BLK_REG_ADDR + 0x124)
+#define HOSTFN2_LPU_FORCE_PERR		(CPQ_BLK_REG_ADDR + 0x128)
+#define HOSTFN3_LPU_FORCE_PERR		(CPQ_BLK_REG_ADDR + 0x12C)
+
+/* LL Port[0|1] Halt Mask Registers */
+#define LL_HALT_MSK_P0			(CPQ_BLK_REG_ADDR + 0x1A0)
+#define LL_HALT_MSK_P1			(CPQ_BLK_REG_ADDR + 0x1B0)
+
+/* LL Port[0|1] Error Mask Registers */
+#define LL_ERR_MSK_P0			(CPQ_BLK_REG_ADDR + 0x1D0)
+#define LL_ERR_MSK_P1			(CPQ_BLK_REG_ADDR + 0x1D4)
+
+/* EMC FLI (Flash Controller) Block Register Address Offset from BAR0 */
+#define FLI_BLK_REG_ADDR		0x0001D000
+
+/* EMC FLI Registers */
+#define FLI_CMD_REG			(FLI_BLK_REG_ADDR + 0x000)
+#define FLI_ADDR_REG			(FLI_BLK_REG_ADDR + 0x004)
+#define FLI_CTL_REG			(FLI_BLK_REG_ADDR + 0x008)
+#define FLI_WRDATA_REG			(FLI_BLK_REG_ADDR + 0x00C)
+#define FLI_RDDATA_REG			(FLI_BLK_REG_ADDR + 0x010)
+#define FLI_DEV_STATUS_REG		(FLI_BLK_REG_ADDR + 0x014)
+#define FLI_SIG_WD_REG			(FLI_BLK_REG_ADDR + 0x018)
+
+/**
+ * RO register
+ * 31:16 -- Vendor Id
+ * 15:0  -- Device Id
+ */
+#define FLI_DEV_VENDOR_REG		(FLI_BLK_REG_ADDR + 0x01C)
+#define FLI_ERR_STATUS_REG		(FLI_BLK_REG_ADDR + 0x020)
+
+/**
+ * RAD (RxAdm) Block Register Address Offset from BAR0
+ * RAD0 Range : 0x20000 - 0x203FF
+ * RAD1 Range : 0x20400 - 0x207FF
+ */
+#define RAD0_BLK_REG_ADDR		0x00020000
+#define RAD1_BLK_REG_ADDR		0x00020400
+
+/* RAD0 Registers */
+#define RAD0_CTL_REG			(RAD0_BLK_REG_ADDR + 0x000)
+#define RAD0_PE_PARM_REG		(RAD0_BLK_REG_ADDR + 0x004)
+#define RAD0_BCN_REG			(RAD0_BLK_REG_ADDR + 0x008)
+
+/* Default function ID register */
+#define RAD0_DEFAULT_REG		(RAD0_BLK_REG_ADDR + 0x00C)
+
+/* Default promiscuous ID register */
+#define RAD0_PROMISC_REG		(RAD0_BLK_REG_ADDR + 0x010)
+
+#define RAD0_BCNQ_REG			(RAD0_BLK_REG_ADDR + 0x014)
+
+/*
+ * This register selects 1 of 8 PM Q's using
+ * VLAN pri, for non-BCN packets without a VLAN tag
+ */
+#define RAD0_DEFAULTQ_REG		(RAD0_BLK_REG_ADDR + 0x018)
+
+#define RAD0_ERR_STS			(RAD0_BLK_REG_ADDR + 0x01C)
+#define RAD0_SET_ERR_STS		(RAD0_BLK_REG_ADDR + 0x020)
+#define RAD0_ERR_INT_EN			(RAD0_BLK_REG_ADDR + 0x024)
+#define RAD0_FIRST_ERR			(RAD0_BLK_REG_ADDR + 0x028)
+#define RAD0_FORCE_ERR			(RAD0_BLK_REG_ADDR + 0x02C)
+
+#define RAD0_IF_RCVD			(RAD0_BLK_REG_ADDR + 0x030)
+#define RAD0_IF_RCVD_OCTETS_HIGH	(RAD0_BLK_REG_ADDR + 0x034)
+#define RAD0_IF_RCVD_OCTETS_LOW		(RAD0_BLK_REG_ADDR + 0x038)
+#define RAD0_IF_RCVD_VLAN		(RAD0_BLK_REG_ADDR + 0x03C)
+#define RAD0_IF_RCVD_UCAST		(RAD0_BLK_REG_ADDR + 0x040)
+#define RAD0_IF_RCVD_UCAST_OCTETS_HIGH	(RAD0_BLK_REG_ADDR + 0x044)
+#define RAD0_IF_RCVD_UCAST_OCTETS_LOW   (RAD0_BLK_REG_ADDR + 0x048)
+#define RAD0_IF_RCVD_UCAST_VLAN		(RAD0_BLK_REG_ADDR + 0x04C)
+#define RAD0_IF_RCVD_MCAST		(RAD0_BLK_REG_ADDR + 0x050)
+#define RAD0_IF_RCVD_MCAST_OCTETS_HIGH  (RAD0_BLK_REG_ADDR + 0x054)
+#define RAD0_IF_RCVD_MCAST_OCTETS_LOW   (RAD0_BLK_REG_ADDR + 0x058)
+#define RAD0_IF_RCVD_MCAST_VLAN		(RAD0_BLK_REG_ADDR + 0x05C)
+#define RAD0_IF_RCVD_BCAST		(RAD0_BLK_REG_ADDR + 0x060)
+#define RAD0_IF_RCVD_BCAST_OCTETS_HIGH  (RAD0_BLK_REG_ADDR + 0x064)
+#define RAD0_IF_RCVD_BCAST_OCTETS_LOW   (RAD0_BLK_REG_ADDR + 0x068)
+#define RAD0_IF_RCVD_BCAST_VLAN		(RAD0_BLK_REG_ADDR + 0x06C)
+#define RAD0_DROPPED_FRAMES		(RAD0_BLK_REG_ADDR + 0x070)
+
+#define RAD0_MAC_MAN_1H			(RAD0_BLK_REG_ADDR + 0x080)
+#define RAD0_MAC_MAN_1L			(RAD0_BLK_REG_ADDR + 0x084)
+#define RAD0_MAC_MAN_2H			(RAD0_BLK_REG_ADDR + 0x088)
+#define RAD0_MAC_MAN_2L			(RAD0_BLK_REG_ADDR + 0x08C)
+#define RAD0_MAC_MAN_3H			(RAD0_BLK_REG_ADDR + 0x090)
+#define RAD0_MAC_MAN_3L			(RAD0_BLK_REG_ADDR + 0x094)
+#define RAD0_MAC_MAN_4H			(RAD0_BLK_REG_ADDR + 0x098)
+#define RAD0_MAC_MAN_4L			(RAD0_BLK_REG_ADDR + 0x09C)
+
+#define RAD0_LAST4_IP			(RAD0_BLK_REG_ADDR + 0x100)
+
+/* RAD1 Registers */
+#define RAD1_CTL_REG			(RAD1_BLK_REG_ADDR + 0x000)
+#define RAD1_PE_PARM_REG		(RAD1_BLK_REG_ADDR + 0x004)
+#define RAD1_BCN_REG			(RAD1_BLK_REG_ADDR + 0x008)
+
+/* Default function ID register */
+#define RAD1_DEFAULT_REG		(RAD1_BLK_REG_ADDR + 0x00C)
+
+/* Promiscuous function ID register */
+#define RAD1_PROMISC_REG		(RAD1_BLK_REG_ADDR + 0x010)
+
+#define RAD1_BCNQ_REG			(RAD1_BLK_REG_ADDR + 0x014)
+
+/*
+ * This register selects 1 of 8 PM Q's using
+ * VLAN pri, for non-BCN packets without a VLAN tag
+ */
+#define RAD1_DEFAULTQ_REG		(RAD1_BLK_REG_ADDR + 0x018)
+
+#define RAD1_ERR_STS			(RAD1_BLK_REG_ADDR + 0x01C)
+#define RAD1_SET_ERR_STS		(RAD1_BLK_REG_ADDR + 0x020)
+#define RAD1_ERR_INT_EN			(RAD1_BLK_REG_ADDR + 0x024)
+
+/**
+ * TXA Block Register Address Offset from BAR0
+ * TXA0 Range : 0x21000 - 0x213FF
+ * TXA1 Range : 0x21400 - 0x217FF
+ */
+#define TXA0_BLK_REG_ADDR		0x00021000
+#define TXA1_BLK_REG_ADDR		0x00021400
+
+/* TXA Registers */
+#define TXA0_CTRL_REG			(TXA0_BLK_REG_ADDR + 0x000)
+#define TXA1_CTRL_REG			(TXA1_BLK_REG_ADDR + 0x000)
+
+/**
+ * TSO Sequence # Registers (RO)
+ * Total 8 (for 8 queues)
+ * Holds the last seq.# for TSO frames
+ * See catapult_spec.pdf for more details
+ */
+#define TXA0_TSO_TCP_SEQ_REG(_num)		\
+	(TXA0_BLK_REG_ADDR + 0x020 + ((_num) << 2))
+
+#define TXA1_TSO_TCP_SEQ_REG(_num)		\
+	(TXA1_BLK_REG_ADDR + 0x020 + ((_num) << 2))
+
+/**
+ * TSO IP ID # Registers (RO)
+ * Total 8 (for 8 queues)
+ * Holds the last IP ID for TSO frames
+ * See catapult_spec.pdf for more details
+ */
+#define TXA0_TSO_IP_INFO_REG(_num)		\
+	(TXA0_BLK_REG_ADDR + 0x040 + ((_num) << 2))
+
+#define TXA1_TSO_IP_INFO_REG(_num)		\
+	(TXA1_BLK_REG_ADDR + 0x040 + ((_num) << 2))
+
+/**
+ * RXA Block Register Address Offset from BAR0
+ * RXA0 Range : 0x21800 - 0x21BFF
+ * RXA1 Range : 0x21C00 - 0x21FFF
+ */
+#define RXA0_BLK_REG_ADDR		0x00021800
+#define RXA1_BLK_REG_ADDR		0x00021C00
+
+/* RXA Registers */
+#define RXA0_CTL_REG			(RXA0_BLK_REG_ADDR + 0x040)
+#define RXA1_CTL_REG			(RXA1_BLK_REG_ADDR + 0x040)
+
+/**
+ * PPLB Block Register Address Offset from BAR0
+ * PPLB0 Range : 0x22000 - 0x223FF
+ * PPLB1 Range : 0x22400 - 0x227FF
+ */
+#define PLB0_BLK_REG_ADDR		0x00022000
+#define PLB1_BLK_REG_ADDR		0x00022400
+
+/**
+ * PLB Registers
+ * Holds RL timer used time stamps in RLT tagged frames
+ */
+#define PLB0_ECM_TIMER_REG		(PLB0_BLK_REG_ADDR + 0x05C)
+#define PLB1_ECM_TIMER_REG		(PLB1_BLK_REG_ADDR + 0x05C)
+
+/* Controls the rate-limiter on each of the priority class */
+#define PLB0_RL_CTL			(PLB0_BLK_REG_ADDR + 0x060)
+#define PLB1_RL_CTL			(PLB1_BLK_REG_ADDR + 0x060)
+
+/**
+ * Max byte register, total 8, 0-7
+ * see catapult_spec.pdf for details
+ */
+#define PLB0_RL_MAX_BC(_num)			\
+	(PLB0_BLK_REG_ADDR + 0x064 + ((_num) << 2))
+#define PLB1_RL_MAX_BC(_num)			\
+	(PLB1_BLK_REG_ADDR + 0x064 + ((_num) << 2))
+
+/**
+ * RL Time Unit Register for priority 0-7
+ * 4 bits per priority
+ * (2^rl_unit)*1us is the actual time period
+ */
+#define PLB0_RL_TU_PRIO			(PLB0_BLK_REG_ADDR + 0x084)
+#define PLB1_RL_TU_PRIO			(PLB1_BLK_REG_ADDR + 0x084)
+
+/**
+ * RL byte count register,
+ * bytes transmitted in (rl_unit*1)us time period
+ * 1 per priority, 8 in all, 0-7.
+ */
+#define PLB0_RL_BYTE_CNT(_num)			\
+	(PLB0_BLK_REG_ADDR + 0x088 + ((_num) << 2))
+#define PLB1_RL_BYTE_CNT(_num)			\
+	(PLB1_BLK_REG_ADDR + 0x088 + ((_num) << 2))
+
+/**
+ * RL Min factor register
+ * 2 bits per priority,
+ * 4 factors possible: 1, 0.5, 0.25, 0
+ * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1
+ */
+#define PLB0_RL_MIN_REG			(PLB0_BLK_REG_ADDR + 0x0A8)
+#define PLB1_RL_MIN_REG			(PLB1_BLK_REG_ADDR + 0x0A8)
+
+/**
+ * RL Max factor register
+ * 2 bits per priority,
+ * 4 factors possible: 1, 0.5, 0.25, 0
+ * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1
+ */
+#define PLB0_RL_MAX_REG			(PLB0_BLK_REG_ADDR + 0x0AC)
+#define PLB1_RL_MAX_REG			(PLB1_BLK_REG_ADDR + 0x0AC)
+
+/* MAC SERDES Address Paging register */
+#define PLB0_EMS_ADD_REG		(PLB0_BLK_REG_ADDR + 0xD0)
+#define PLB1_EMS_ADD_REG		(PLB1_BLK_REG_ADDR + 0xD0)
+
+/* LL EMS Registers */
+#define LL_EMS0_BLK_REG_ADDR		0x00026800
+#define LL_EMS1_BLK_REG_ADDR		0x00026C00
+
+/**
+ * BPC Block Register Address Offset from BAR0
+ * BPC0 Range : 0x23000 - 0x233FF
+ * BPC1 Range : 0x23400 - 0x237FF
+ */
+#define BPC0_BLK_REG_ADDR		0x00023000
+#define BPC1_BLK_REG_ADDR		0x00023400
+
+/**
+ * PMM Block Register Address Offset from BAR0
+ * PMM0 Range : 0x23800 - 0x23BFF
+ * PMM1 Range : 0x23C00 - 0x23FFF
+ */
+#define PMM0_BLK_REG_ADDR		0x00023800
+#define PMM1_BLK_REG_ADDR		0x00023C00
+
+/**
+ * HQM Block Register Address Offset from BAR0
+ * HQM0 Range : 0x24000 - 0x243FF
+ * HQM1 Range : 0x24400 - 0x247FF
+ */
+#define HQM0_BLK_REG_ADDR		0x00024000
+#define HQM1_BLK_REG_ADDR		0x00024400
+
+/**
+ * HQM Control Register
+ * Controls some aspects of IB
+ * See catapult_spec.pdf for details
+ */
+#define HQM0_CTL_REG			(HQM0_BLK_REG_ADDR + 0x000)
+#define HQM1_CTL_REG			(HQM1_BLK_REG_ADDR + 0x000)
+
+/**
+ * HQM Stop Q Semaphore Registers.
+ * Only one Queue resource can be stopped at
+ * any given time. This register controls access
+ * to the single stop Q resource.
+ * See catapult_spec.pdf for details
+ */
+#define HQM0_RXQ_STOP_SEM		(HQM0_BLK_REG_ADDR + 0x028)
+#define HQM0_TXQ_STOP_SEM		(HQM0_BLK_REG_ADDR + 0x02C)
+#define HQM1_RXQ_STOP_SEM		(HQM1_BLK_REG_ADDR + 0x028)
+#define HQM1_TXQ_STOP_SEM		(HQM1_BLK_REG_ADDR + 0x02C)
+
+/**
+ * LUT Block Register Address Offset from BAR0
+ * LUT0 Range : 0x25800 - 0x25BFF
+ * LUT1 Range : 0x25C00 - 0x25FFF
+ */
+#define LUT0_BLK_REG_ADDR		0x00025800
+#define LUT1_BLK_REG_ADDR		0x00025C00
+
+/**
+ * LUT Registers
+ * See catapult_spec.pdf for details
+ */
+#define LUT0_ERR_STS			(LUT0_BLK_REG_ADDR + 0x000)
+#define LUT1_ERR_STS			(LUT1_BLK_REG_ADDR + 0x000)
+#define LUT0_SET_ERR_STS		(LUT0_BLK_REG_ADDR + 0x004)
+#define LUT1_SET_ERR_STS		(LUT1_BLK_REG_ADDR + 0x004)
+
+/**
+ * TRC (Debug/Trace) Register Offset from BAR0
+ * Range : 0x26000 -- 0x263FFF
+ */
+#define TRC_BLK_REG_ADDR		0x00026000
+
+/**
+ * TRC Registers
+ * See catapult_spec.pdf for details of each
+ */
+#define TRC_CTL_REG			(TRC_BLK_REG_ADDR + 0x000)
+#define TRC_MODS_REG			(TRC_BLK_REG_ADDR + 0x004)
+#define TRC_TRGC_REG			(TRC_BLK_REG_ADDR + 0x008)
+#define TRC_CNT1_REG			(TRC_BLK_REG_ADDR + 0x010)
+#define TRC_CNT2_REG			(TRC_BLK_REG_ADDR + 0x014)
+#define TRC_NXTS_REG			(TRC_BLK_REG_ADDR + 0x018)
+#define TRC_DIRR_REG			(TRC_BLK_REG_ADDR + 0x01C)
+
+/**
+ * TRC Trigger match filters, total 10
+ * Determines the trigger condition
+ */
+#define TRC_TRGM_REG(_num)		\
+	(TRC_BLK_REG_ADDR + 0x040 + ((_num) << 2))
+
+/**
+ * TRC Next State filters, total 10
+ * Determines the next state conditions
+ */
+#define TRC_NXTM_REG(_num)		\
+	(TRC_BLK_REG_ADDR + 0x080 + ((_num) << 2))
+
+/**
+ * TRC Store Match filters, total 10
+ * Determines the store conditions
+ */
+#define TRC_STRM_REG(_num)		\
+	(TRC_BLK_REG_ADDR + 0x0C0 + ((_num) << 2))
+
+/* DOORBELLS ACCESS */
+
+/**
+ * Catapult doorbells
+ * Each doorbell-queue set has
+ * 1 RxQ, 1 TxQ, 2 IBs in that order
+ * Size of each entry in 32 bytes, even though only 1 word
+ * is used. For Non-VM case each doorbell-q set is
+ * separated by 128 bytes, for VM case it is separated
+ * by 4K bytes
+ * Non VM case Range : 0x38000 - 0x39FFF
+ * VM case Range     : 0x100000 - 0x11FFFF
+ * The range applies to both HQMs
+ */
+#define HQM_DOORBELL_BLK_BASE_ADDR	0x00038000
+#define HQM_DOORBELL_VM_BLK_BASE_ADDR	0x00100000
+
+/* MEMORY ACCESS */
+
+/**
+ * Catapult H/W Block Memory Access Address
+ * To the host a memory space of 32K (page) is visible
+ * at a time. The address range is from 0x08000 to 0x0FFFF
+ */
+#define HW_BLK_HOST_MEM_ADDR		0x08000
+
+/**
+ * Catapult LUT Memory Access Page Numbers
+ * Range : LUT0 0xa0-0xa1
+ *         LUT1 0xa2-0xa3
+ */
+#define LUT0_MEM_BLK_BASE_PG_NUM	0x000000A0
+#define LUT1_MEM_BLK_BASE_PG_NUM	0x000000A2
+
+/**
+ * Catapult RxFn Database Memory Block Base Offset
+ *
+ * The Rx function database exists in LUT block.
+ * In PCIe space this is accessible as a 256x32
+ * bit block. Each entry in this database is 4
+ * (4 byte) words. Max. entries is 64.
+ * Address of an entry corresponding to a function
+ * = base_addr + (function_no. * 16)
+ */
+#define RX_FNDB_RAM_BASE_OFFSET		0x0000B400
+
+/**
+ * Catapult TxFn Database Memory Block Base Offset Address
+ *
+ * The Tx function database exists in LUT block.
+ * In PCIe space this is accessible as a 64x32
+ * bit block. Each entry in this database is 1
+ * (4 byte) word. Max. entries is 64.
+ * Address of an entry corresponding to a function
+ * = base_addr + (function_no. * 4)
+ */
+#define TX_FNDB_RAM_BASE_OFFSET		0x0000B800
+
+/**
+ * Catapult Unicast CAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Shared by both the LL & FCoE driver.
+ * Size is 256x48 bits; mapped to PCIe space
+ * 512x32 bit blocks. For each address, bits
+ * are written in the order : [47:32] and then
+ * [31:0].
+ */
+#define UCAST_CAM_BASE_OFFSET		0x0000A800
+
+/**
+ * Catapult Unicast RAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Shared by both the LL & FCoE driver.
+ * Size is 256x9 bits.
+ */
+#define UCAST_RAM_BASE_OFFSET		0x0000B000
+
+/**
+ * Catapult Mulicast CAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Shared by both the LL & FCoE driver.
+ * Size is 256x48 bits; mapped to PCIe space
+ * 512x32 bit blocks. For each address, bits
+ * are written in the order : [47:32] and then
+ * [31:0].
+ */
+#define MCAST_CAM_BASE_OFFSET		0x0000A000
+
+/**
+ * Catapult VLAN RAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Size is 4096x66 bits; mapped to PCIe space as
+ * 8192x32 bit blocks.
+ * All the 4K entries are within the address range
+ * 0x0000 to 0x8000, so in the first LUT page.
+ */
+#define VLAN_RAM_BASE_OFFSET		0x00000000
+
+/**
+ * Catapult Tx Stats RAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Size is 1024x33 bits;
+ * Each Tx function has 64 bytes of space
+ */
+#define TX_STATS_RAM_BASE_OFFSET	0x00009000
+
+/**
+ * Catapult Rx Stats RAM Base Offset Address
+ *
+ * Exists in LUT memory space.
+ * Size is 1024x33 bits;
+ * Each Rx function has 64 bytes of space
+ */
+#define RX_STATS_RAM_BASE_OFFSET	0x00008000
+
+/* Catapult RXA Memory Access Page Numbers */
+#define RXA0_MEM_BLK_BASE_PG_NUM	0x0000008C
+#define RXA1_MEM_BLK_BASE_PG_NUM	0x0000008D
+
+/**
+ * Catapult Multicast Vector Table Base Offset Address
+ *
+ * Exists in RxA memory space.
+ * Organized as 512x65 bit block.
+ * However for each entry 16 bytes allocated (power of 2)
+ * Total size 512*16 bytes.
+ * There are two logical divisions, 256 entries each :
+ * a) Entries 0x00 to 0xff (256) -- Approx. MVT
+ *    Offset 0x000 to 0xFFF
+ * b) Entries 0x100 to 0x1ff (256) -- Exact MVT
+ *    Offsets 0x1000 to 0x1FFF
+ */
+#define MCAST_APPROX_MVT_BASE_OFFSET	0x00000000
+#define MCAST_EXACT_MVT_BASE_OFFSET	0x00001000
+
+/**
+ * Catapult RxQ Translate Table (RIT) Base Offset Address
+ *
+ * Exists in RxA memory space
+ * Total no. of entries 64
+ * Each entry is 1 (4 byte) word.
+ * 31:12 -- Reserved
+ * 11:0  -- Two 6 bit RxQ Ids
+ */
+#define FUNCTION_TO_RXQ_TRANSLATE	0x00002000
+
+/* Catapult RxAdm (RAD) Memory Access Page Numbers */
+#define RAD0_MEM_BLK_BASE_PG_NUM	0x00000086
+#define RAD1_MEM_BLK_BASE_PG_NUM	0x00000087
+
+/**
+ * Catapult RSS Table Base Offset Address
+ *
+ * Exists in RAD memory space.
+ * Each entry is 352 bits, but alligned on
+ * 64 byte (512 bit) boundary. Accessed
+ * 4 byte words, the whole entry can be
+ * broken into 11 word accesses.
+ */
+#define RSS_TABLE_BASE_OFFSET		0x00000800
+
+/**
+ * Catapult CPQ Block Page Number
+ * This value is written to the page number registers
+ * to access the memory associated with the mailboxes.
+ */
+#define CPQ_BLK_PG_NUM			0x00000005
+
+/**
+ * Clarification :
+ * LL functions are 2 & 3; can HostFn0/HostFn1
+ * <-> LPU0/LPU1 memories be used ?
+ */
+/**
+ * Catapult HostFn0/HostFn1 to LPU0/LPU1 Mbox memory
+ * Per catapult_spec.pdf, the offset of the mbox
+ * memory is in the register space at an offset of 0x200
+ */
+#define CPQ_BLK_REG_MBOX_ADDR		(CPQ_BLK_REG_ADDR + 0x200)
+
+#define HOSTFN_LPU_MBOX			(CPQ_BLK_REG_MBOX_ADDR + 0x000)
+
+/* Catapult LPU0/LPU1 to HostFn0/HostFn1 Mbox memory */
+#define LPU_HOSTFN_MBOX			(CPQ_BLK_REG_MBOX_ADDR + 0x080)
+
+/**
+ * Catapult HQM Block Page Number
+ * This is written to the page number register for
+ * the appropriate function to access the memory
+ * associated with HQM
+ */
+#define HQM0_BLK_PG_NUM			0x00000096
+#define HQM1_BLK_PG_NUM			0x00000097
+
+/**
+ * Note that TxQ and RxQ entries are interlaced
+ * the HQM memory, i.e RXQ0, TXQ0, RXQ1, TXQ1.. etc.
+ */
+
+#define HQM_RXTX_Q_RAM_BASE_OFFSET	0x00004000
+
+/**
+ * CQ Memory
+ * Exists in HQM Memory space
+ * Each entry is 16 (4 byte) words of which
+ * only 12 words are used for configuration
+ * Total 64 entries per HQM memory space
+ */
+#define HQM_CQ_RAM_BASE_OFFSET		0x00006000
+
+/**
+ * Interrupt Block (IB) Memory
+ * Exists in HQM Memory space
+ * Each entry is 8 (4 byte) words of which
+ * only 5 words are used for configuration
+ * Total 128 entries per HQM memory space
+ */
+#define HQM_IB_RAM_BASE_OFFSET		0x00001000
+
+/**
+ * Index Table (IT) Memory
+ * Exists in HQM Memory space
+ * Each entry is 1 (4 byte) word which
+ * is used for configuration
+ * Total 128 entries per HQM memory space
+ */
+#define HQM_INDX_TBL_RAM_BASE_OFFSET	0x00002000
+
+/**
+ * PSS Block Memory Page Number
+ * This is written to the appropriate page number
+ * register to access the CPU memory.
+ * Also known as the PSS secondary memory (SMEM).
+ * Range : 0x180 to 0x1CF
+ * See catapult_spec.pdf for details
+ */
+#define PSS_BLK_PG_NUM			0x00000180
+
+/**
+ * Offsets of different instances of PSS SMEM
+ * 2.5M of continuous 1T memory space : 2 blocks
+ * of 1M each (32 pages each, page=32KB) and 4 smaller
+ * blocks of 128K each (4 pages each, page=32KB)
+ * PSS_LMEM_INST0 is used for firmware download
+ */
+#define PSS_LMEM_INST0			0x00000000
+#define PSS_LMEM_INST1			0x00100000
+#define PSS_LMEM_INST2			0x00200000
+#define PSS_LMEM_INST3			0x00220000
+#define PSS_LMEM_INST4			0x00240000
+#define PSS_LMEM_INST5			0x00260000
+
+#define BNA_PCI_REG_CT_ADDRSZ		(0x40000)
+
+#define BNA_GET_PAGE_NUM(_base_page, _offset)   \
+	((_base_page) + ((_offset) >> 15))
+
+#define BNA_GET_PAGE_OFFSET(_offset)    \
+	((_offset) & 0x7fff)
+
+#define BNA_GET_MEM_BASE_ADDR(_bar0, _base_offset)	\
+	((_bar0) + HW_BLK_HOST_MEM_ADDR		\
+	  + BNA_GET_PAGE_OFFSET((_base_offset)))
+
+#define BNA_GET_VLAN_MEM_ENTRY_ADDR(_bar0, _fn_id, _vlan_id)\
+	(_bar0 + (HW_BLK_HOST_MEM_ADDR)  \
+	+ (BNA_GET_PAGE_OFFSET(VLAN_RAM_BASE_OFFSET))	\
+	+ (((_fn_id) & 0x3f) << 9)	  \
+	+ (((_vlan_id) & 0xfe0) >> 3))
+
+/**
+ *
+ *  Interrupt related bits, flags and macros
+ *
+ */
+
+#define __LPU02HOST_MBOX0_STATUS_BITS 0x00100000
+#define __LPU12HOST_MBOX0_STATUS_BITS 0x00200000
+#define __LPU02HOST_MBOX1_STATUS_BITS 0x00400000
+#define __LPU12HOST_MBOX1_STATUS_BITS 0x00800000
+
+#define __LPU02HOST_MBOX0_MASK_BITS	0x00100000
+#define __LPU12HOST_MBOX0_MASK_BITS	0x00200000
+#define __LPU02HOST_MBOX1_MASK_BITS	0x00400000
+#define __LPU12HOST_MBOX1_MASK_BITS	0x00800000
+
+#define __LPU2HOST_MBOX_MASK_BITS			 \
+	(__LPU02HOST_MBOX0_MASK_BITS | __LPU02HOST_MBOX1_MASK_BITS |	\
+	  __LPU12HOST_MBOX0_MASK_BITS | __LPU12HOST_MBOX1_MASK_BITS)
+
+#define __LPU2HOST_IB_STATUS_BITS	0x0000ffff
+
+#define BNA_IS_LPU0_MBOX_INTR(_intr_status) \
+	((_intr_status) & (__LPU02HOST_MBOX0_STATUS_BITS | \
+			__LPU02HOST_MBOX1_STATUS_BITS))
+
+#define BNA_IS_LPU1_MBOX_INTR(_intr_status) \
+	((_intr_status) & (__LPU12HOST_MBOX0_STATUS_BITS | \
+		__LPU12HOST_MBOX1_STATUS_BITS))
+
+#define BNA_IS_MBOX_INTR(_intr_status)		\
+	((_intr_status) &  			\
+	(__LPU02HOST_MBOX0_STATUS_BITS |	\
+	 __LPU02HOST_MBOX1_STATUS_BITS |	\
+	 __LPU12HOST_MBOX0_STATUS_BITS |	\
+	 __LPU12HOST_MBOX1_STATUS_BITS))
+
+#define __EMC_ERROR_STATUS_BITS		0x00010000
+#define __LPU0_ERROR_STATUS_BITS	0x00020000
+#define __LPU1_ERROR_STATUS_BITS	0x00040000
+#define __PSS_ERROR_STATUS_BITS		0x00080000
+
+#define __HALT_STATUS_BITS		0x01000000
+
+#define __EMC_ERROR_MASK_BITS		0x00010000
+#define __LPU0_ERROR_MASK_BITS		0x00020000
+#define __LPU1_ERROR_MASK_BITS		0x00040000
+#define __PSS_ERROR_MASK_BITS		0x00080000
+
+#define __HALT_MASK_BITS		0x01000000
+
+#define __ERROR_MASK_BITS		\
+	(__EMC_ERROR_MASK_BITS | __LPU0_ERROR_MASK_BITS | \
+	  __LPU1_ERROR_MASK_BITS | __PSS_ERROR_MASK_BITS | \
+	  __HALT_MASK_BITS)
+
+#define BNA_IS_ERR_INTR(_intr_status)	\
+	((_intr_status) &  		\
+	(__EMC_ERROR_STATUS_BITS |  	\
+	 __LPU0_ERROR_STATUS_BITS | 	\
+	 __LPU1_ERROR_STATUS_BITS | 	\
+	 __PSS_ERROR_STATUS_BITS  | 	\
+	 __HALT_STATUS_BITS))
+
+#define BNA_IS_MBOX_ERR_INTR(_intr_status)	\
+	(BNA_IS_MBOX_INTR((_intr_status)) |	\
+	 BNA_IS_ERR_INTR((_intr_status)))
+
+#define BNA_IS_INTX_DATA_INTR(_intr_status)	\
+	((_intr_status) & __LPU2HOST_IB_STATUS_BITS)
+
+#define BNA_INTR_STATUS_MBOX_CLR(_intr_status)			\
+do {								\
+	(_intr_status) &= ~(__LPU02HOST_MBOX0_STATUS_BITS |	\
+			__LPU02HOST_MBOX1_STATUS_BITS | 	\
+			__LPU12HOST_MBOX0_STATUS_BITS | 	\
+			__LPU12HOST_MBOX1_STATUS_BITS); 	\
+} while (0)
+
+#define BNA_INTR_STATUS_ERR_CLR(_intr_status)		\
+do {							\
+	(_intr_status) &= ~(__EMC_ERROR_STATUS_BITS |	\
+		__LPU0_ERROR_STATUS_BITS |		\
+		__LPU1_ERROR_STATUS_BITS |		\
+		__PSS_ERROR_STATUS_BITS  |		\
+		__HALT_STATUS_BITS);			\
+} while (0)
+
+#define bna_intx_disable(_bna, _cur_mask)		\
+{							\
+	(_cur_mask) = readl((_bna)->regs.fn_int_mask);\
+	writel(0xffffffff, (_bna)->regs.fn_int_mask);\
+}
+
+#define bna_intx_enable(bna, new_mask) 			\
+	writel((new_mask), (bna)->regs.fn_int_mask)
+
+#define bna_mbox_intr_disable(bna)		\
+	writel((readl((bna)->regs.fn_int_mask) | \
+	     (__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \
+	     (bna)->regs.fn_int_mask)
+
+#define bna_mbox_intr_enable(bna)		\
+	writel((readl((bna)->regs.fn_int_mask) & \
+	     ~(__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \
+	     (bna)->regs.fn_int_mask)
+
+#define bna_intr_status_get(_bna, _status)				\
+{									\
+	(_status) = readl((_bna)->regs.fn_int_status);		\
+	if ((_status)) {						\
+		writel((_status) & ~(__LPU02HOST_MBOX0_STATUS_BITS |\
+					  __LPU02HOST_MBOX1_STATUS_BITS |\
+					  __LPU12HOST_MBOX0_STATUS_BITS |\
+					  __LPU12HOST_MBOX1_STATUS_BITS), \
+			      (_bna)->regs.fn_int_status);\
+	}								\
+}
+
+#define bna_intr_status_get_no_clr(_bna, _status)		\
+	(_status) = readl((_bna)->regs.fn_int_status)
+
+#define bna_intr_mask_get(bna, mask)		\
+	(*mask) = readl((bna)->regs.fn_int_mask)
+
+#define bna_intr_ack(bna, intr_bmap)		\
+	writel((intr_bmap), (bna)->regs.fn_int_status)
+
+#define bna_ib_intx_disable(bna, ib_id)		\
+	writel(readl((bna)->regs.fn_int_mask) | \
+	    (1 << (ib_id)), \
+	    (bna)->regs.fn_int_mask)
+
+#define bna_ib_intx_enable(bna, ib_id)		\
+	writel(readl((bna)->regs.fn_int_mask) & \
+	    ~(1 << (ib_id)), \
+	    (bna)->regs.fn_int_mask)
+
+#define bna_mbox_msix_idx_set(_device) \
+do {\
+	writel(((_device)->vector & 0x000001FF), \
+		(_device)->bna->pcidev.pci_bar_kva + \
+		reg_offset[(_device)->bna->pcidev.pci_func].msix_idx);\
+} while (0)
+
+/**
+ *
+ * TxQ, RxQ, CQ related bits, offsets, macros
+ *
+ */
+
+#define	BNA_Q_IDLE_STATE	0x00008001
+
+#define BNA_GET_DOORBELL_BASE_ADDR(_bar0)	\
+	((_bar0) + HQM_DOORBELL_BLK_BASE_ADDR)
+
+#define BNA_GET_DOORBELL_ENTRY_OFFSET(_entry)		\
+	((HQM_DOORBELL_BLK_BASE_ADDR)		\
+	+ (_entry << 7))
+
+#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \
+		(0x80000000 | ((_timeout) << 16) | (_events))
+
+#define BNA_DOORBELL_IB_INT_DISABLE		(0x40000000)
+
+/* TxQ Entry Opcodes */
+#define BNA_TXQ_WI_SEND 		(0x402)	/* Single Frame Transmission */
+#define BNA_TXQ_WI_SEND_LSO 		(0x403)	/* Multi-Frame Transmission */
+#define BNA_TXQ_WI_EXTENSION		(0x104)	/* Extension WI */
+
+/* TxQ Entry Control Flags */
+#define BNA_TXQ_WI_CF_FCOE_CRC  	(1 << 8)
+#define BNA_TXQ_WI_CF_IPID_MODE 	(1 << 5)
+#define BNA_TXQ_WI_CF_INS_PRIO  	(1 << 4)
+#define BNA_TXQ_WI_CF_INS_VLAN  	(1 << 3)
+#define BNA_TXQ_WI_CF_UDP_CKSUM 	(1 << 2)
+#define BNA_TXQ_WI_CF_TCP_CKSUM 	(1 << 1)
+#define BNA_TXQ_WI_CF_IP_CKSUM  	(1 << 0)
+
+#define BNA_TXQ_WI_L4_HDR_N_OFFSET(_hdr_size, _offset) \
+		(((_hdr_size) << 10) | ((_offset) & 0x3FF))
+
+/*
+ * Completion Q defines
+ */
+/* CQ Entry Flags */
+#define	BNA_CQ_EF_MAC_ERROR 	(1 <<  0)
+#define	BNA_CQ_EF_FCS_ERROR 	(1 <<  1)
+#define	BNA_CQ_EF_TOO_LONG  	(1 <<  2)
+#define	BNA_CQ_EF_FC_CRC_OK 	(1 <<  3)
+
+#define	BNA_CQ_EF_RSVD1 	(1 <<  4)
+#define	BNA_CQ_EF_L4_CKSUM_OK	(1 <<  5)
+#define	BNA_CQ_EF_L3_CKSUM_OK	(1 <<  6)
+#define	BNA_CQ_EF_HDS_HEADER	(1 <<  7)
+
+#define	BNA_CQ_EF_UDP   	(1 <<  8)
+#define	BNA_CQ_EF_TCP   	(1 <<  9)
+#define	BNA_CQ_EF_IP_OPTIONS	(1 << 10)
+#define	BNA_CQ_EF_IPV6  	(1 << 11)
+
+#define	BNA_CQ_EF_IPV4  	(1 << 12)
+#define	BNA_CQ_EF_VLAN  	(1 << 13)
+#define	BNA_CQ_EF_RSS   	(1 << 14)
+#define	BNA_CQ_EF_RSVD2 	(1 << 15)
+
+#define	BNA_CQ_EF_MCAST_MATCH   (1 << 16)
+#define	BNA_CQ_EF_MCAST 	(1 << 17)
+#define BNA_CQ_EF_BCAST 	(1 << 18)
+#define	BNA_CQ_EF_REMOTE 	(1 << 19)
+
+#define	BNA_CQ_EF_LOCAL		(1 << 20)
+
+/**
+ *
+ * Data structures
+ *
+ */
+
+enum txf_flags {
+	BFI_TXF_CF_ENABLE		= 1 << 0,
+	BFI_TXF_CF_VLAN_FILTER		= 1 << 8,
+	BFI_TXF_CF_VLAN_ADMIT		= 1 << 9,
+	BFI_TXF_CF_VLAN_INSERT		= 1 << 10,
+	BFI_TXF_CF_RSVD1		= 1 << 11,
+	BFI_TXF_CF_MAC_SA_CHECK		= 1 << 12,
+	BFI_TXF_CF_VLAN_WI_BASED	= 1 << 13,
+	BFI_TXF_CF_VSWITCH_MCAST	= 1 << 14,
+	BFI_TXF_CF_VSWITCH_UCAST	= 1 << 15,
+	BFI_TXF_CF_RSVD2		= 0x7F << 1
+};
+
+enum ib_flags {
+	BFI_IB_CF_MASTER_ENABLE		= (1 << 0),
+	BFI_IB_CF_MSIX_MODE		= (1 << 1),
+	BFI_IB_CF_COALESCING_MODE	= (1 << 2),
+	BFI_IB_CF_INTER_PKT_ENABLE	= (1 << 3),
+	BFI_IB_CF_INT_ENABLE		= (1 << 4),
+	BFI_IB_CF_INTER_PKT_DMA		= (1 << 5),
+	BFI_IB_CF_ACK_PENDING		= (1 << 6),
+	BFI_IB_CF_RESERVED1		= (1 << 7)
+};
+
+enum rss_hash_type {
+	BFI_RSS_T_V4_TCP    		= (1 << 11),
+	BFI_RSS_T_V4_IP     		= (1 << 10),
+	BFI_RSS_T_V6_TCP    		= (1 <<  9),
+	BFI_RSS_T_V6_IP     		= (1 <<  8)
+};
+enum hds_header_type {
+	BNA_HDS_T_V4_TCP	= (1 << 11),
+	BNA_HDS_T_V4_UDP	= (1 << 10),
+	BNA_HDS_T_V6_TCP	= (1 << 9),
+	BNA_HDS_T_V6_UDP	= (1 << 8),
+	BNA_HDS_FORCED		= (1 << 7),
+};
+enum rxf_flags {
+	BNA_RXF_CF_SM_LG_RXQ			= (1 << 15),
+	BNA_RXF_CF_DEFAULT_VLAN			= (1 << 14),
+	BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE	= (1 << 13),
+	BNA_RXF_CF_VLAN_STRIP			= (1 << 12),
+	BNA_RXF_CF_RSS_ENABLE			= (1 <<  8)
+};
+struct bna_chip_regs_offset {
+	u32 page_addr;
+	u32 fn_int_status;
+	u32 fn_int_mask;
+	u32 msix_idx;
+};
+extern const struct bna_chip_regs_offset reg_offset[];
+
+struct bna_chip_regs {
+	void __iomem *page_addr;
+	void __iomem *fn_int_status;
+	void __iomem *fn_int_mask;
+};
+
+struct bna_txq_mem {
+	u32 pg_tbl_addr_lo;
+	u32 pg_tbl_addr_hi;
+	u32 cur_q_entry_lo;
+	u32 cur_q_entry_hi;
+	u32 reserved1;
+	u32 reserved2;
+	u32 pg_cnt_n_prd_ptr;	/* 31:16->total page count */
+					/* 15:0 ->producer pointer (index?) */
+	u32 entry_n_pg_size; 	/* 31:16->entry size */
+					/* 15:0 ->page size */
+	u32 int_blk_n_cns_ptr;	/* 31:24->Int Blk Id;  */
+					/* 23:16->Int Blk Offset */
+					/* 15:0 ->consumer pointer(index?) */
+	u32 cns_ptr2_n_q_state;	/* 31:16->cons. ptr 2; 15:0-> Q state */
+	u32 nxt_qid_n_fid_n_pri;	/* 17:10->next */
+					/* QId;9:3->FID;2:0->Priority */
+	u32 wvc_n_cquota_n_rquota; /* 31:24->WI Vector Count; */
+					/* 23:12->Cfg Quota; */
+					/* 11:0 ->Run Quota */
+	u32 reserved3[4];
+};
+
+struct bna_rxq_mem {
+	u32 pg_tbl_addr_lo;
+	u32 pg_tbl_addr_hi;
+	u32 cur_q_entry_lo;
+	u32 cur_q_entry_hi;
+	u32 reserved1;
+	u32 reserved2;
+	u32 pg_cnt_n_prd_ptr;	/* 31:16->total page count */
+					/* 15:0 ->producer pointer (index?) */
+	u32 entry_n_pg_size;	/* 31:16->entry size */
+					/* 15:0 ->page size */
+	u32 sg_n_cq_n_cns_ptr;	/* 31:28->reserved; 27:24->sg count */
+					/* 23:16->CQ; */
+					/* 15:0->consumer pointer(index?) */
+	u32 buf_sz_n_q_state; 	/* 31:16->buffer size; 15:0-> Q state */
+	u32 next_qid;		/* 17:10->next QId */
+	u32 reserved3;
+	u32 reserved4[4];
+};
+
+struct bna_rxtx_q_mem {
+	struct bna_rxq_mem rxq;
+	struct bna_txq_mem txq;
+};
+
+struct bna_cq_mem {
+	u32 pg_tbl_addr_lo;
+	u32 pg_tbl_addr_hi;
+	u32 cur_q_entry_lo;
+	u32 cur_q_entry_hi;
+
+	u32 reserved1;
+	u32 reserved2;
+	u32 pg_cnt_n_prd_ptr;	/* 31:16->total page count */
+					/* 15:0 ->producer pointer (index?) */
+	u32 entry_n_pg_size;	/* 31:16->entry size */
+					/* 15:0 ->page size */
+	u32 int_blk_n_cns_ptr;	/* 31:24->Int Blk Id; */
+					/* 23:16->Int Blk Offset */
+					/* 15:0 ->consumer pointer(index?) */
+	u32 q_state;		/* 31:16->reserved; 15:0-> Q state */
+	u32 reserved3[2];
+	u32 reserved4[4];
+};
+
+struct bna_ib_blk_mem {
+	u32 host_addr_lo;
+	u32 host_addr_hi;
+	u32 clsc_n_ctrl_n_msix;	/* 31:24->coalescing; */
+					/* 23:16->coalescing cfg; */
+					/* 15:8 ->control; */
+					/* 7:0 ->msix; */
+	u32 ipkt_n_ent_n_idxof;
+	u32 ipkt_cnt_cfg_n_unacked;
+
+	u32 reserved[3];
+};
+
+struct bna_idx_tbl_mem {
+	u32 idx;	  /* !< 31:16->res;15:0->idx; */
+};
+
+struct bna_doorbell_qset {
+	u32 rxq[0x20 >> 2];
+	u32 txq[0x20 >> 2];
+	u32 ib0[0x20 >> 2];
+	u32 ib1[0x20 >> 2];
+};
+
+struct bna_rx_fndb_ram {
+	u32 rss_prop;
+	u32 size_routing_props;
+	u32 rit_hds_mcastq;
+	u32 control_flags;
+};
+
+struct bna_tx_fndb_ram {
+	u32 vlan_n_ctrl_flags;
+};
+
+/**
+ * @brief
+ *  Structure which maps to RxFn Indirection Table (RIT)
+ *  Size : 1 word
+ *  See catapult_spec.pdf, RxA for details
+ */
+struct bna_rit_mem {
+	u32 rxq_ids;	/* !< 31:12->res;11:0->two 6 bit RxQ Ids */
+};
+
+/**
+ * @brief
+ *  Structure which maps to RSS Table entry
+ *  Size : 16 words
+ *  See catapult_spec.pdf, RAD for details
+ */
+struct bna_rss_mem {
+	/*
+	 * 31:12-> res
+	 * 11:8 -> protocol type
+	 *  7:0 -> hash index
+	 */
+	u32 type_n_hash;
+	u32 hash_key[10];  /* !< 40 byte Toeplitz hash key */
+	u32 reserved[5];
+};
+
+/* TxQ Vector (a.k.a. Tx-Buffer Descriptor) */
+struct bna_dma_addr {
+	u32		msb;
+	u32		lsb;
+};
+
+struct bna_txq_wi_vector {
+	u16 		reserved;
+	u16 		length;		/* Only 14 LSB are valid */
+	struct bna_dma_addr host_addr; /* Tx-Buf DMA addr */
+};
+
+typedef u16 bna_txq_wi_opcode_t;
+
+typedef u16 bna_txq_wi_ctrl_flag_t;
+
+/**
+ *  TxQ Entry Structure
+ *
+ *  BEWARE:  Load values into this structure with correct endianess.
+ */
+struct bna_txq_entry {
+	union {
+		struct {
+			u8 reserved;
+			u8 num_vectors;	/* number of vectors present */
+			bna_txq_wi_opcode_t opcode; /* Either */
+						    /* BNA_TXQ_WI_SEND or */
+						    /* BNA_TXQ_WI_SEND_LSO */
+			bna_txq_wi_ctrl_flag_t flags; /* OR of all the flags */
+			u16 l4_hdr_size_n_offset;
+			u16 vlan_tag;
+			u16 lso_mss;	/* Only 14 LSB are valid */
+			u32 frame_length;	/* Only 24 LSB are valid */
+		} wi;
+
+		struct {
+			u16 reserved;
+			bna_txq_wi_opcode_t opcode; /* Must be */
+						    /* BNA_TXQ_WI_EXTENSION */
+			u32 reserved2[3];	/* Place holder for */
+						/* removed vector (12 bytes) */
+		} wi_ext;
+	} hdr;
+	struct bna_txq_wi_vector vector[4];
+};
+#define wi_hdr  	hdr.wi
+#define wi_ext_hdr  hdr.wi_ext
+
+/* RxQ Entry Structure */
+struct bna_rxq_entry {		/* Rx-Buffer */
+	struct bna_dma_addr host_addr; /* Rx-Buffer DMA address */
+};
+
+typedef u32 bna_cq_e_flag_t;
+
+/* CQ Entry Structure */
+struct bna_cq_entry {
+	bna_cq_e_flag_t flags;
+	u16 vlan_tag;
+	u16 length;
+	u32 rss_hash;
+	u8 valid;
+	u8 reserved1;
+	u8 reserved2;
+	u8 rxq_id;
+};
+
+#endif /* __BNA_HW_H__ */
diff --git a/drivers/net/bna/bna_txrx.c b/drivers/net/bna/bna_txrx.c
new file mode 100644
index 000000000000..890846d55502
--- /dev/null
+++ b/drivers/net/bna/bna_txrx.c
@@ -0,0 +1,4209 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+  */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#include "bna.h"
+#include "bfa_sm.h"
+#include "bfi.h"
+
+/**
+ * IB
+ */
+#define bna_ib_find_free_ibidx(_mask, _pos)\
+do {\
+	(_pos) = 0;\
+	while (((_pos) < (BFI_IBIDX_MAX_SEGSIZE)) &&\
+		((1 << (_pos)) & (_mask)))\
+		(_pos)++;\
+} while (0)
+
+#define bna_ib_count_ibidx(_mask, _count)\
+do {\
+	int pos = 0;\
+	(_count) = 0;\
+	while (pos < (BFI_IBIDX_MAX_SEGSIZE)) {\
+		if ((1 << pos) & (_mask))\
+			(_count) = pos + 1;\
+		pos++;\
+	} \
+} while (0)
+
+#define bna_ib_select_segpool(_count, _q_idx)\
+do {\
+	int i;\
+	(_q_idx) = -1;\
+	for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) {\
+		if ((_count <= ibidx_pool[i].pool_entry_size)) {\
+			(_q_idx) = i;\
+			break;\
+		} \
+	} \
+} while (0)
+
+struct bna_ibidx_pool {
+	int	pool_size;
+	int	pool_entry_size;
+};
+init_ibidx_pool(ibidx_pool);
+
+static struct bna_intr *
+bna_intr_get(struct bna_ib_mod *ib_mod, enum bna_intr_type intr_type,
+		int vector)
+{
+	struct bna_intr *intr;
+	struct list_head *qe;
+
+	list_for_each(qe, &ib_mod->intr_active_q) {
+		intr = (struct bna_intr *)qe;
+
+		if ((intr->intr_type == intr_type) &&
+			(intr->vector == vector)) {
+			intr->ref_count++;
+			return intr;
+		}
+	}
+
+	if (list_empty(&ib_mod->intr_free_q))
+		return NULL;
+
+	bfa_q_deq(&ib_mod->intr_free_q, &intr);
+	bfa_q_qe_init(&intr->qe);
+
+	intr->ref_count = 1;
+	intr->intr_type = intr_type;
+	intr->vector = vector;
+
+	list_add_tail(&intr->qe, &ib_mod->intr_active_q);
+
+	return intr;
+}
+
+static void
+bna_intr_put(struct bna_ib_mod *ib_mod,
+		struct bna_intr *intr)
+{
+	intr->ref_count--;
+
+	if (intr->ref_count == 0) {
+		intr->ib = NULL;
+		list_del(&intr->qe);
+		bfa_q_qe_init(&intr->qe);
+		list_add_tail(&intr->qe, &ib_mod->intr_free_q);
+	}
+}
+
+void
+bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna,
+		struct bna_res_info *res_info)
+{
+	int i;
+	int j;
+	int count;
+	u8 offset;
+	struct bna_doorbell_qset *qset;
+	unsigned long off;
+
+	ib_mod->bna = bna;
+
+	ib_mod->ib = (struct bna_ib *)
+		res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mdl[0].kva;
+	ib_mod->intr = (struct bna_intr *)
+		res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mdl[0].kva;
+	ib_mod->idx_seg = (struct bna_ibidx_seg *)
+		res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mdl[0].kva;
+
+	INIT_LIST_HEAD(&ib_mod->ib_free_q);
+	INIT_LIST_HEAD(&ib_mod->intr_free_q);
+	INIT_LIST_HEAD(&ib_mod->intr_active_q);
+
+	for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++)
+		INIT_LIST_HEAD(&ib_mod->ibidx_seg_pool[i]);
+
+	for (i = 0; i < BFI_MAX_IB; i++) {
+		ib_mod->ib[i].ib_id = i;
+
+		ib_mod->ib[i].ib_seg_host_addr_kva =
+		res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].kva;
+		ib_mod->ib[i].ib_seg_host_addr.lsb =
+		res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.lsb;
+		ib_mod->ib[i].ib_seg_host_addr.msb =
+		res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.msb;
+
+		qset = (struct bna_doorbell_qset *)0;
+		off = (unsigned long)(&qset[i >> 1].ib0[(i & 0x1)
+					* (0x20 >> 2)]);
+		ib_mod->ib[i].door_bell.doorbell_addr = off +
+			BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva);
+
+		bfa_q_qe_init(&ib_mod->ib[i].qe);
+		list_add_tail(&ib_mod->ib[i].qe, &ib_mod->ib_free_q);
+
+		bfa_q_qe_init(&ib_mod->intr[i].qe);
+		list_add_tail(&ib_mod->intr[i].qe, &ib_mod->intr_free_q);
+	}
+
+	count = 0;
+	offset = 0;
+	for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) {
+		for (j = 0; j < ibidx_pool[i].pool_size; j++) {
+			bfa_q_qe_init(&ib_mod->idx_seg[count]);
+			ib_mod->idx_seg[count].ib_seg_size =
+					ibidx_pool[i].pool_entry_size;
+			ib_mod->idx_seg[count].ib_idx_tbl_offset = offset;
+			list_add_tail(&ib_mod->idx_seg[count].qe,
+				&ib_mod->ibidx_seg_pool[i]);
+			count++;
+			offset += ibidx_pool[i].pool_entry_size;
+		}
+	}
+}
+
+void
+bna_ib_mod_uninit(struct bna_ib_mod *ib_mod)
+{
+	int i;
+	int j;
+	struct list_head *qe;
+
+	i = 0;
+	list_for_each(qe, &ib_mod->ib_free_q)
+		i++;
+
+	i = 0;
+	list_for_each(qe, &ib_mod->intr_free_q)
+		i++;
+
+	for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) {
+		j = 0;
+		list_for_each(qe, &ib_mod->ibidx_seg_pool[i])
+			j++;
+	}
+
+	ib_mod->bna = NULL;
+}
+
+struct bna_ib *
+bna_ib_get(struct bna_ib_mod *ib_mod,
+		enum bna_intr_type intr_type,
+		int vector)
+{
+	struct bna_ib *ib;
+	struct bna_intr *intr;
+
+	if (intr_type == BNA_INTR_T_INTX)
+		vector = (1 << vector);
+
+	intr = bna_intr_get(ib_mod, intr_type, vector);
+	if (intr == NULL)
+		return NULL;
+
+	if (intr->ib) {
+		if (intr->ib->ref_count == BFI_IBIDX_MAX_SEGSIZE) {
+			bna_intr_put(ib_mod, intr);
+			return NULL;
+		}
+		intr->ib->ref_count++;
+		return intr->ib;
+	}
+
+	if (list_empty(&ib_mod->ib_free_q)) {
+		bna_intr_put(ib_mod, intr);
+		return NULL;
+	}
+
+	bfa_q_deq(&ib_mod->ib_free_q, &ib);
+	bfa_q_qe_init(&ib->qe);
+
+	ib->ref_count = 1;
+	ib->start_count = 0;
+	ib->idx_mask = 0;
+
+	ib->intr = intr;
+	ib->idx_seg = NULL;
+	intr->ib = ib;
+
+	ib->bna = ib_mod->bna;
+
+	return ib;
+}
+
+void
+bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib)
+{
+	bna_intr_put(ib_mod, ib->intr);
+
+	ib->ref_count--;
+
+	if (ib->ref_count == 0) {
+		ib->intr = NULL;
+		ib->bna = NULL;
+		list_add_tail(&ib->qe, &ib_mod->ib_free_q);
+	}
+}
+
+/* Returns index offset - starting from 0 */
+int
+bna_ib_reserve_idx(struct bna_ib *ib)
+{
+	struct bna_ib_mod *ib_mod = &ib->bna->ib_mod;
+	struct bna_ibidx_seg *idx_seg;
+	int idx;
+	int num_idx;
+	int q_idx;
+
+	/* Find the first free index position */
+	bna_ib_find_free_ibidx(ib->idx_mask, idx);
+	if (idx == BFI_IBIDX_MAX_SEGSIZE)
+		return -1;
+
+	/*
+	 * Calculate the total number of indexes held by this IB,
+	 * including the index newly reserved above.
+	 */
+	bna_ib_count_ibidx((ib->idx_mask | (1 << idx)), num_idx);
+
+	/* See if there is a free space in the index segment held by this IB */
+	if (ib->idx_seg && (num_idx <= ib->idx_seg->ib_seg_size)) {
+		ib->idx_mask |= (1 << idx);
+		return idx;
+	}
+
+	if (ib->start_count)
+		return -1;
+
+	/* Allocate a new segment */
+	bna_ib_select_segpool(num_idx, q_idx);
+	while (1) {
+		if (q_idx == BFI_IBIDX_TOTAL_POOLS)
+			return -1;
+		if (!list_empty(&ib_mod->ibidx_seg_pool[q_idx]))
+			break;
+		q_idx++;
+	}
+	bfa_q_deq(&ib_mod->ibidx_seg_pool[q_idx], &idx_seg);
+	bfa_q_qe_init(&idx_seg->qe);
+
+	/* Free the old segment */
+	if (ib->idx_seg) {
+		bna_ib_select_segpool(ib->idx_seg->ib_seg_size, q_idx);
+		list_add_tail(&ib->idx_seg->qe, &ib_mod->ibidx_seg_pool[q_idx]);
+	}
+
+	ib->idx_seg = idx_seg;
+
+	ib->idx_mask |= (1 << idx);
+
+	return idx;
+}
+
+void
+bna_ib_release_idx(struct bna_ib *ib, int idx)
+{
+	struct bna_ib_mod *ib_mod = &ib->bna->ib_mod;
+	struct bna_ibidx_seg *idx_seg;
+	int num_idx;
+	int cur_q_idx;
+	int new_q_idx;
+
+	ib->idx_mask &= ~(1 << idx);
+
+	if (ib->start_count)
+		return;
+
+	bna_ib_count_ibidx(ib->idx_mask, num_idx);
+
+	/*
+	 * Free the segment, if there are no more indexes in the segment
+	 * held by this IB
+	 */
+	if (!num_idx) {
+		bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx);
+		list_add_tail(&ib->idx_seg->qe,
+			&ib_mod->ibidx_seg_pool[cur_q_idx]);
+		ib->idx_seg = NULL;
+		return;
+	}
+
+	/* See if we can move to a smaller segment */
+	bna_ib_select_segpool(num_idx, new_q_idx);
+	bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx);
+	while (new_q_idx < cur_q_idx) {
+		if (!list_empty(&ib_mod->ibidx_seg_pool[new_q_idx]))
+			break;
+		new_q_idx++;
+	}
+	if (new_q_idx < cur_q_idx) {
+		/* Select the new smaller segment */
+		bfa_q_deq(&ib_mod->ibidx_seg_pool[new_q_idx], &idx_seg);
+		bfa_q_qe_init(&idx_seg->qe);
+		/* Free the old segment */
+		list_add_tail(&ib->idx_seg->qe,
+			&ib_mod->ibidx_seg_pool[cur_q_idx]);
+		ib->idx_seg = idx_seg;
+	}
+}
+
+int
+bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config)
+{
+	if (ib->start_count)
+		return -1;
+
+	ib->ib_config.coalescing_timeo = ib_config->coalescing_timeo;
+	ib->ib_config.interpkt_timeo = ib_config->interpkt_timeo;
+	ib->ib_config.interpkt_count = ib_config->interpkt_count;
+	ib->ib_config.ctrl_flags = ib_config->ctrl_flags;
+
+	ib->ib_config.ctrl_flags |= BFI_IB_CF_MASTER_ENABLE;
+	if (ib->intr->intr_type == BNA_INTR_T_MSIX)
+		ib->ib_config.ctrl_flags |= BFI_IB_CF_MSIX_MODE;
+
+	return 0;
+}
+
+void
+bna_ib_start(struct bna_ib *ib)
+{
+	struct bna_ib_blk_mem ib_cfg;
+	struct bna_ib_blk_mem *ib_mem;
+	u32 pg_num;
+	u32 intx_mask;
+	int i;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	ib->start_count++;
+
+	if (ib->start_count > 1)
+		return;
+
+	ib_cfg.host_addr_lo = (u32)(ib->ib_seg_host_addr.lsb);
+	ib_cfg.host_addr_hi = (u32)(ib->ib_seg_host_addr.msb);
+
+	ib_cfg.clsc_n_ctrl_n_msix = (((u32)
+				     ib->ib_config.coalescing_timeo << 16) |
+				((u32)ib->ib_config.ctrl_flags << 8) |
+				(ib->intr->vector));
+	ib_cfg.ipkt_n_ent_n_idxof =
+				((u32)
+				 (ib->ib_config.interpkt_timeo & 0xf) << 16) |
+				((u32)ib->idx_seg->ib_seg_size << 8) |
+				(ib->idx_seg->ib_idx_tbl_offset);
+	ib_cfg.ipkt_cnt_cfg_n_unacked = ((u32)
+					 ib->ib_config.interpkt_count << 24);
+
+	pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num,
+				HQM_IB_RAM_BASE_OFFSET);
+	writel(pg_num, ib->bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva,
+					HQM_IB_RAM_BASE_OFFSET);
+
+	ib_mem = (struct bna_ib_blk_mem *)0;
+	off = (unsigned long)&ib_mem[ib->ib_id].host_addr_lo;
+	writel(htonl(ib_cfg.host_addr_lo), base_addr + off);
+
+	off = (unsigned long)&ib_mem[ib->ib_id].host_addr_hi;
+	writel(htonl(ib_cfg.host_addr_hi), base_addr + off);
+
+	off = (unsigned long)&ib_mem[ib->ib_id].clsc_n_ctrl_n_msix;
+	writel(ib_cfg.clsc_n_ctrl_n_msix, base_addr + off);
+
+	off = (unsigned long)&ib_mem[ib->ib_id].ipkt_n_ent_n_idxof;
+	writel(ib_cfg.ipkt_n_ent_n_idxof, base_addr + off);
+
+	off = (unsigned long)&ib_mem[ib->ib_id].ipkt_cnt_cfg_n_unacked;
+	writel(ib_cfg.ipkt_cnt_cfg_n_unacked, base_addr + off);
+
+	ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK(
+				(u32)ib->ib_config.coalescing_timeo, 0);
+
+	pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num,
+				HQM_INDX_TBL_RAM_BASE_OFFSET);
+	writel(pg_num, ib->bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva,
+					HQM_INDX_TBL_RAM_BASE_OFFSET);
+	for (i = 0; i < ib->idx_seg->ib_seg_size; i++) {
+		off = (unsigned long)
+		((ib->idx_seg->ib_idx_tbl_offset + i) * BFI_IBIDX_SIZE);
+		writel(0, base_addr + off);
+	}
+
+	if (ib->intr->intr_type == BNA_INTR_T_INTX) {
+		bna_intx_disable(ib->bna, intx_mask);
+		intx_mask &= ~(ib->intr->vector);
+		bna_intx_enable(ib->bna, intx_mask);
+	}
+}
+
+void
+bna_ib_stop(struct bna_ib *ib)
+{
+	u32 intx_mask;
+
+	ib->start_count--;
+
+	if (ib->start_count == 0) {
+		writel(BNA_DOORBELL_IB_INT_DISABLE,
+				ib->door_bell.doorbell_addr);
+		if (ib->intr->intr_type == BNA_INTR_T_INTX) {
+			bna_intx_disable(ib->bna, intx_mask);
+			intx_mask |= (ib->intr->vector);
+			bna_intx_enable(ib->bna, intx_mask);
+		}
+	}
+}
+
+void
+bna_ib_fail(struct bna_ib *ib)
+{
+	ib->start_count = 0;
+}
+
+/**
+ * RXF
+ */
+static void rxf_enable(struct bna_rxf *rxf);
+static void rxf_disable(struct bna_rxf *rxf);
+static void __rxf_config_set(struct bna_rxf *rxf);
+static void __rxf_rit_set(struct bna_rxf *rxf);
+static void __bna_rxf_stat_clr(struct bna_rxf *rxf);
+static int rxf_process_packet_filter(struct bna_rxf *rxf);
+static int rxf_clear_packet_filter(struct bna_rxf *rxf);
+static void rxf_reset_packet_filter(struct bna_rxf *rxf);
+static void rxf_cb_enabled(void *arg, int status);
+static void rxf_cb_disabled(void *arg, int status);
+static void bna_rxf_cb_stats_cleared(void *arg, int status);
+static void __rxf_enable(struct bna_rxf *rxf);
+static void __rxf_disable(struct bna_rxf *rxf);
+
+bfa_fsm_state_decl(bna_rxf, stopped, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, start_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, cam_fltr_mod_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, started, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, cam_fltr_clr_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, stop_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, pause_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, resume_wait, struct bna_rxf,
+			enum bna_rxf_event);
+bfa_fsm_state_decl(bna_rxf, stat_clr_wait, struct bna_rxf,
+			enum bna_rxf_event);
+
+static struct bfa_sm_table rxf_sm_table[] = {
+	{BFA_SM(bna_rxf_sm_stopped), BNA_RXF_STOPPED},
+	{BFA_SM(bna_rxf_sm_start_wait), BNA_RXF_START_WAIT},
+	{BFA_SM(bna_rxf_sm_cam_fltr_mod_wait), BNA_RXF_CAM_FLTR_MOD_WAIT},
+	{BFA_SM(bna_rxf_sm_started), BNA_RXF_STARTED},
+	{BFA_SM(bna_rxf_sm_cam_fltr_clr_wait), BNA_RXF_CAM_FLTR_CLR_WAIT},
+	{BFA_SM(bna_rxf_sm_stop_wait), BNA_RXF_STOP_WAIT},
+	{BFA_SM(bna_rxf_sm_pause_wait), BNA_RXF_PAUSE_WAIT},
+	{BFA_SM(bna_rxf_sm_resume_wait), BNA_RXF_RESUME_WAIT},
+	{BFA_SM(bna_rxf_sm_stat_clr_wait), BNA_RXF_STAT_CLR_WAIT}
+};
+
+static void
+bna_rxf_sm_stopped_entry(struct bna_rxf *rxf)
+{
+	call_rxf_stop_cbfn(rxf, BNA_CB_SUCCESS);
+}
+
+static void
+bna_rxf_sm_stopped(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_START:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_start_wait);
+		break;
+
+	case RXF_E_STOP:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_FAIL:
+		/* No-op */
+		break;
+
+	case RXF_E_CAM_FLTR_MOD:
+		call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS);
+		break;
+
+	case RXF_E_STARTED:
+	case RXF_E_STOPPED:
+	case RXF_E_CAM_FLTR_RESP:
+		/**
+		 * These events are received due to flushing of mbox
+		 * when device fails
+		 */
+		/* No-op */
+		break;
+
+	case RXF_E_PAUSE:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED;
+		call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS);
+		break;
+
+	case RXF_E_RESUME:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING;
+		call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS);
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_start_wait_entry(struct bna_rxf *rxf)
+{
+	__rxf_config_set(rxf);
+	__rxf_rit_set(rxf);
+	rxf_enable(rxf);
+}
+
+static void
+bna_rxf_sm_start_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_STOP:
+		/**
+		 * STOP is originated from bnad. When this happens,
+		 * it can not be waiting for filter update
+		 */
+		call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait);
+		break;
+
+	case RXF_E_FAIL:
+		call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS);
+		call_rxf_start_cbfn(rxf, BNA_CB_FAIL);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_CAM_FLTR_MOD:
+		/* No-op */
+		break;
+
+	case RXF_E_STARTED:
+		/**
+		 * Force rxf_process_filter() to go through initial
+		 * config
+		 */
+		if ((rxf->ucast_active_mac != NULL) &&
+			(rxf->ucast_pending_set == 0))
+			rxf->ucast_pending_set = 1;
+
+		if (rxf->rss_status == BNA_STATUS_T_ENABLED)
+			rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING;
+
+		rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING;
+
+		bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait);
+		break;
+
+	case RXF_E_PAUSE:
+	case RXF_E_RESUME:
+		rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED;
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_cam_fltr_mod_wait_entry(struct bna_rxf *rxf)
+{
+	if (!rxf_process_packet_filter(rxf)) {
+		/* No more pending CAM entries to update */
+		bfa_fsm_set_state(rxf, bna_rxf_sm_started);
+	}
+}
+
+static void
+bna_rxf_sm_cam_fltr_mod_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_STOP:
+		/**
+		 * STOP is originated from bnad. When this happens,
+		 * it can not be waiting for filter update
+		 */
+		call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait);
+		break;
+
+	case RXF_E_FAIL:
+		rxf_reset_packet_filter(rxf);
+		call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS);
+		call_rxf_start_cbfn(rxf, BNA_CB_FAIL);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_CAM_FLTR_MOD:
+		/* No-op */
+		break;
+
+	case RXF_E_CAM_FLTR_RESP:
+		if (!rxf_process_packet_filter(rxf)) {
+			/* No more pending CAM entries to update */
+			call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS);
+			bfa_fsm_set_state(rxf, bna_rxf_sm_started);
+		}
+		break;
+
+	case RXF_E_PAUSE:
+	case RXF_E_RESUME:
+		rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED;
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_started_entry(struct bna_rxf *rxf)
+{
+	call_rxf_start_cbfn(rxf, BNA_CB_SUCCESS);
+
+	if (rxf->rxf_flags & BNA_RXF_FL_OPERSTATE_CHANGED) {
+		if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED)
+			bfa_fsm_send_event(rxf, RXF_E_PAUSE);
+		else
+			bfa_fsm_send_event(rxf, RXF_E_RESUME);
+	}
+
+}
+
+static void
+bna_rxf_sm_started(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_STOP:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait);
+		/* Hack to get FSM start clearing CAM entries */
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP);
+		break;
+
+	case RXF_E_FAIL:
+		rxf_reset_packet_filter(rxf);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_CAM_FLTR_MOD:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait);
+		break;
+
+	case RXF_E_PAUSE:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_pause_wait);
+		break;
+
+	case RXF_E_RESUME:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_resume_wait);
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_cam_fltr_clr_wait_entry(struct bna_rxf *rxf)
+{
+	/**
+	 *  Note: Do not add rxf_clear_packet_filter here.
+	 * It will overstep mbox when this transition happens:
+	 * 	cam_fltr_mod_wait -> cam_fltr_clr_wait on RXF_E_STOP event
+	 */
+}
+
+static void
+bna_rxf_sm_cam_fltr_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_FAIL:
+		/**
+		 * FSM was in the process of stopping, initiated by
+		 * bnad. When this happens, no one can be waiting for
+		 * start or filter update
+		 */
+		rxf_reset_packet_filter(rxf);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_CAM_FLTR_RESP:
+		if (!rxf_clear_packet_filter(rxf)) {
+			/* No more pending CAM entries to clear */
+			bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait);
+			rxf_disable(rxf);
+		}
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_stop_wait_entry(struct bna_rxf *rxf)
+{
+	/**
+	 * NOTE: Do not add  rxf_disable here.
+	 * It will overstep mbox when this transition happens:
+	 * 	start_wait -> stop_wait on RXF_E_STOP event
+	 */
+}
+
+static void
+bna_rxf_sm_stop_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_FAIL:
+		/**
+		 * FSM was in the process of stopping, initiated by
+		 * bnad. When this happens, no one can be waiting for
+		 * start or filter update
+		 */
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_STARTED:
+		/**
+		 * This event is received due to abrupt transition from
+		 * bna_rxf_sm_start_wait state on receiving
+		 * RXF_E_STOP event
+		 */
+		rxf_disable(rxf);
+		break;
+
+	case RXF_E_STOPPED:
+		/**
+		 * FSM was in the process of stopping, initiated by
+		 * bnad. When this happens, no one can be waiting for
+		 * start or filter update
+		 */
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stat_clr_wait);
+		break;
+
+	case RXF_E_PAUSE:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED;
+		break;
+
+	case RXF_E_RESUME:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING;
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_pause_wait_entry(struct bna_rxf *rxf)
+{
+	rxf->rxf_flags &=
+		~(BNA_RXF_FL_OPERSTATE_CHANGED | BNA_RXF_FL_RXF_ENABLED);
+	__rxf_disable(rxf);
+}
+
+static void
+bna_rxf_sm_pause_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_FAIL:
+		/**
+		 * FSM was in the process of disabling rxf, initiated by
+		 * bnad.
+		 */
+		call_rxf_pause_cbfn(rxf, BNA_CB_FAIL);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_STOPPED:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED;
+		call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_started);
+		break;
+
+	/*
+	 * Since PAUSE/RESUME can only be sent by bnad, we don't expect
+	 * any other event during these states
+	 */
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_resume_wait_entry(struct bna_rxf *rxf)
+{
+	rxf->rxf_flags &= ~(BNA_RXF_FL_OPERSTATE_CHANGED);
+	rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED;
+	__rxf_enable(rxf);
+}
+
+static void
+bna_rxf_sm_resume_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_FAIL:
+		/**
+		 * FSM was in the process of disabling rxf, initiated by
+		 * bnad.
+		 */
+		call_rxf_resume_cbfn(rxf, BNA_CB_FAIL);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	case RXF_E_STARTED:
+		rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING;
+		call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS);
+		bfa_fsm_set_state(rxf, bna_rxf_sm_started);
+		break;
+
+	/*
+	 * Since PAUSE/RESUME can only be sent by bnad, we don't expect
+	 * any other event during these states
+	 */
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+bna_rxf_sm_stat_clr_wait_entry(struct bna_rxf *rxf)
+{
+	__bna_rxf_stat_clr(rxf);
+}
+
+static void
+bna_rxf_sm_stat_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event)
+{
+	switch (event) {
+	case RXF_E_FAIL:
+	case RXF_E_STAT_CLEARED:
+		bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(rxf->rx->bna, event);
+	}
+}
+
+static void
+__rxf_enable(struct bna_rxf *rxf)
+{
+	struct bfi_ll_rxf_multi_req ll_req;
+	u32 bm[2] = {0, 0};
+
+	if (rxf->rxf_id < 32)
+		bm[0] = 1 << rxf->rxf_id;
+	else
+		bm[1] = 1 << (rxf->rxf_id - 32);
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0);
+	ll_req.rxf_id_mask[0] = htonl(bm[0]);
+	ll_req.rxf_id_mask[1] = htonl(bm[1]);
+	ll_req.enable = 1;
+
+	bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req),
+			rxf_cb_enabled, rxf);
+
+	bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe);
+}
+
+static void
+__rxf_disable(struct bna_rxf *rxf)
+{
+	struct bfi_ll_rxf_multi_req ll_req;
+	u32 bm[2] = {0, 0};
+
+	if (rxf->rxf_id < 32)
+		bm[0] = 1 << rxf->rxf_id;
+	else
+		bm[1] = 1 << (rxf->rxf_id - 32);
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0);
+	ll_req.rxf_id_mask[0] = htonl(bm[0]);
+	ll_req.rxf_id_mask[1] = htonl(bm[1]);
+	ll_req.enable = 0;
+
+	bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req),
+			rxf_cb_disabled, rxf);
+
+	bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe);
+}
+
+static void
+__rxf_config_set(struct bna_rxf *rxf)
+{
+	u32 i;
+	struct bna_rss_mem *rss_mem;
+	struct bna_rx_fndb_ram *rx_fndb_ram;
+	struct bna *bna = rxf->rx->bna;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva,
+			RSS_TABLE_BASE_OFFSET);
+
+	rss_mem = (struct bna_rss_mem *)0;
+
+	/* Configure RSS if required */
+	if (rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE) {
+		/* configure RSS Table */
+		writel(BNA_GET_PAGE_NUM(RAD0_MEM_BLK_BASE_PG_NUM +
+			bna->port_num, RSS_TABLE_BASE_OFFSET),
+					bna->regs.page_addr);
+
+		/* temporarily disable RSS, while hash value is written */
+		off = (unsigned long)&rss_mem[0].type_n_hash;
+		writel(0, base_addr + off);
+
+		for (i = 0; i < BFI_RSS_HASH_KEY_LEN; i++) {
+			off = (unsigned long)
+			&rss_mem[0].hash_key[(BFI_RSS_HASH_KEY_LEN - 1) - i];
+			writel(htonl(rxf->rss_cfg.toeplitz_hash_key[i]),
+			base_addr + off);
+		}
+
+		off = (unsigned long)&rss_mem[0].type_n_hash;
+		writel(rxf->rss_cfg.hash_type | rxf->rss_cfg.hash_mask,
+			base_addr + off);
+	}
+
+	/* Configure RxF */
+	writel(BNA_GET_PAGE_NUM(
+		LUT0_MEM_BLK_BASE_PG_NUM + (bna->port_num * 2),
+		RX_FNDB_RAM_BASE_OFFSET),
+		bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva,
+		RX_FNDB_RAM_BASE_OFFSET);
+
+	rx_fndb_ram = (struct bna_rx_fndb_ram *)0;
+
+	/* We always use RSS table 0 */
+	off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rss_prop;
+	writel(rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE,
+		base_addr + off);
+
+	/* small large buffer enable/disable */
+	off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].size_routing_props;
+	writel((rxf->ctrl_flags & BNA_RXF_CF_SM_LG_RXQ) | 0x80,
+		base_addr + off);
+
+	/* RIT offset,  HDS forced offset, multicast RxQ Id */
+	off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rit_hds_mcastq;
+	writel((rxf->rit_segment->rit_offset << 16) |
+		(rxf->forced_offset << 8) |
+		(rxf->hds_cfg.hdr_type & BNA_HDS_FORCED) | rxf->mcast_rxq_id,
+		base_addr + off);
+
+	/*
+	 * default vlan tag, default function enable, strip vlan bytes,
+	 * HDS type, header size
+	 */
+
+	off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].control_flags;
+	 writel(((u32)rxf->default_vlan_tag << 16) |
+		(rxf->ctrl_flags &
+			(BNA_RXF_CF_DEFAULT_VLAN |
+			BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE |
+			BNA_RXF_CF_VLAN_STRIP)) |
+		(rxf->hds_cfg.hdr_type & ~BNA_HDS_FORCED) |
+		rxf->hds_cfg.header_size,
+		base_addr + off);
+}
+
+void
+__rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status)
+{
+	struct bna *bna = rxf->rx->bna;
+	int i;
+
+	writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM +
+			(bna->port_num * 2), VLAN_RAM_BASE_OFFSET),
+			bna->regs.page_addr);
+
+	if (status == BNA_STATUS_T_ENABLED) {
+		/* enable VLAN filtering on this function */
+		for (i = 0; i <= BFI_MAX_VLAN / 32; i++) {
+			writel(rxf->vlan_filter_table[i],
+					BNA_GET_VLAN_MEM_ENTRY_ADDR
+					(bna->pcidev.pci_bar_kva, rxf->rxf_id,
+						i * 32));
+		}
+	} else {
+		/* disable VLAN filtering on this function */
+		for (i = 0; i <= BFI_MAX_VLAN / 32; i++) {
+			writel(0xffffffff,
+					BNA_GET_VLAN_MEM_ENTRY_ADDR
+					(bna->pcidev.pci_bar_kva, rxf->rxf_id,
+						i * 32));
+		}
+	}
+}
+
+static void
+__rxf_rit_set(struct bna_rxf *rxf)
+{
+	struct bna *bna = rxf->rx->bna;
+	struct bna_rit_mem *rit_mem;
+	int i;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva,
+			FUNCTION_TO_RXQ_TRANSLATE);
+
+	rit_mem = (struct bna_rit_mem *)0;
+
+	writel(BNA_GET_PAGE_NUM(RXA0_MEM_BLK_BASE_PG_NUM + bna->port_num,
+		FUNCTION_TO_RXQ_TRANSLATE),
+		bna->regs.page_addr);
+
+	for (i = 0; i < rxf->rit_segment->rit_size; i++) {
+		off = (unsigned long)&rit_mem[i + rxf->rit_segment->rit_offset];
+		writel(rxf->rit_segment->rit[i].large_rxq_id << 6 |
+			rxf->rit_segment->rit[i].small_rxq_id,
+			base_addr + off);
+	}
+}
+
+static void
+__bna_rxf_stat_clr(struct bna_rxf *rxf)
+{
+	struct bfi_ll_stats_req ll_req;
+	u32 bm[2] = {0, 0};
+
+	if (rxf->rxf_id < 32)
+		bm[0] = 1 << rxf->rxf_id;
+	else
+		bm[1] = 1 << (rxf->rxf_id - 32);
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0);
+	ll_req.stats_mask = 0;
+	ll_req.txf_id_mask[0] = 0;
+	ll_req.txf_id_mask[1] =	0;
+
+	ll_req.rxf_id_mask[0] = htonl(bm[0]);
+	ll_req.rxf_id_mask[1] = htonl(bm[1]);
+
+	bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_rxf_cb_stats_cleared, rxf);
+	bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe);
+}
+
+static void
+rxf_enable(struct bna_rxf *rxf)
+{
+	if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED)
+		bfa_fsm_send_event(rxf, RXF_E_STARTED);
+	else {
+		rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED;
+		__rxf_enable(rxf);
+	}
+}
+
+static void
+rxf_cb_enabled(void *arg, int status)
+{
+	struct bna_rxf *rxf = (struct bna_rxf *)arg;
+
+	bfa_q_qe_init(&rxf->mbox_qe.qe);
+	bfa_fsm_send_event(rxf, RXF_E_STARTED);
+}
+
+static void
+rxf_disable(struct bna_rxf *rxf)
+{
+	if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED)
+		bfa_fsm_send_event(rxf, RXF_E_STOPPED);
+	else
+		rxf->rxf_flags &= ~BNA_RXF_FL_RXF_ENABLED;
+		__rxf_disable(rxf);
+}
+
+static void
+rxf_cb_disabled(void *arg, int status)
+{
+	struct bna_rxf *rxf = (struct bna_rxf *)arg;
+
+	bfa_q_qe_init(&rxf->mbox_qe.qe);
+	bfa_fsm_send_event(rxf, RXF_E_STOPPED);
+}
+
+void
+rxf_cb_cam_fltr_mbox_cmd(void *arg, int status)
+{
+	struct bna_rxf *rxf = (struct bna_rxf *)arg;
+
+	bfa_q_qe_init(&rxf->mbox_qe.qe);
+
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP);
+}
+
+static void
+bna_rxf_cb_stats_cleared(void *arg, int status)
+{
+	struct bna_rxf *rxf = (struct bna_rxf *)arg;
+
+	bfa_q_qe_init(&rxf->mbox_qe.qe);
+	bfa_fsm_send_event(rxf, RXF_E_STAT_CLEARED);
+}
+
+void
+rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd,
+		const struct bna_mac *mac_addr)
+{
+	struct bfi_ll_mac_addr_req req;
+
+	bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0);
+
+	req.rxf_id = rxf->rxf_id;
+	memcpy(&req.mac_addr, (void *)&mac_addr->addr, ETH_ALEN);
+
+	bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req),
+				rxf_cb_cam_fltr_mbox_cmd, rxf);
+
+	bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe);
+}
+
+static int
+rxf_process_packet_filter_mcast(struct bna_rxf *rxf)
+{
+	struct bna_mac *mac = NULL;
+	struct list_head *qe;
+
+	/* Add multicast entries */
+	if (!list_empty(&rxf->mcast_pending_add_q)) {
+		bfa_q_deq(&rxf->mcast_pending_add_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_ADD_REQ, mac);
+		list_add_tail(&mac->qe, &rxf->mcast_active_q);
+		return 1;
+	}
+
+	/* Delete multicast entries previousely added */
+	if (!list_empty(&rxf->mcast_pending_del_q)) {
+		bfa_q_deq(&rxf->mcast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac);
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int
+rxf_process_packet_filter_vlan(struct bna_rxf *rxf)
+{
+	/* Apply the VLAN filter */
+	if (rxf->rxf_flags & BNA_RXF_FL_VLAN_CONFIG_PENDING) {
+		rxf->rxf_flags &= ~BNA_RXF_FL_VLAN_CONFIG_PENDING;
+		if (!(rxf->rxmode_active & BNA_RXMODE_PROMISC) &&
+			!(rxf->rxmode_active & BNA_RXMODE_DEFAULT))
+			__rxf_vlan_filter_set(rxf, rxf->vlan_filter_status);
+	}
+
+	/* Apply RSS configuration */
+	if (rxf->rxf_flags & BNA_RXF_FL_RSS_CONFIG_PENDING) {
+		rxf->rxf_flags &= ~BNA_RXF_FL_RSS_CONFIG_PENDING;
+		if (rxf->rss_status == BNA_STATUS_T_DISABLED) {
+			/* RSS is being disabled */
+			rxf->ctrl_flags &= ~BNA_RXF_CF_RSS_ENABLE;
+			__rxf_rit_set(rxf);
+			__rxf_config_set(rxf);
+		} else {
+			/* RSS is being enabled or reconfigured */
+			rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE;
+			__rxf_rit_set(rxf);
+			__rxf_config_set(rxf);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Processes pending ucast, mcast entry addition/deletion and issues mailbox
+ * command. Also processes pending filter configuration - promiscuous mode,
+ * default mode, allmutli mode and issues mailbox command or directly applies
+ * to h/w
+ */
+static int
+rxf_process_packet_filter(struct bna_rxf *rxf)
+{
+	/* Set the default MAC first */
+	if (rxf->ucast_pending_set > 0) {
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_SET_REQ,
+				rxf->ucast_active_mac);
+		rxf->ucast_pending_set--;
+		return 1;
+	}
+
+	if (rxf_process_packet_filter_ucast(rxf))
+		return 1;
+
+	if (rxf_process_packet_filter_mcast(rxf))
+		return 1;
+
+	if (rxf_process_packet_filter_promisc(rxf))
+		return 1;
+
+	if (rxf_process_packet_filter_default(rxf))
+		return 1;
+
+	if (rxf_process_packet_filter_allmulti(rxf))
+		return 1;
+
+	if (rxf_process_packet_filter_vlan(rxf))
+		return 1;
+
+	return 0;
+}
+
+static int
+rxf_clear_packet_filter_mcast(struct bna_rxf *rxf)
+{
+	struct bna_mac *mac = NULL;
+	struct list_head *qe;
+
+	/* 3. delete pending mcast entries */
+	if (!list_empty(&rxf->mcast_pending_del_q)) {
+		bfa_q_deq(&rxf->mcast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac);
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+		return 1;
+	}
+
+	/* 4. clear active mcast entries; move them to pending_add_q */
+	if (!list_empty(&rxf->mcast_active_q)) {
+		bfa_q_deq(&rxf->mcast_active_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac);
+		list_add_tail(&mac->qe, &rxf->mcast_pending_add_q);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * In the rxf stop path, processes pending ucast/mcast delete queue and issues
+ * the mailbox command. Moves the active ucast/mcast entries to pending add q,
+ * so that they are added to CAM again in the rxf start path. Moves the current
+ * filter settings - promiscuous, default, allmutli - to pending filter
+ * configuration
+ */
+static int
+rxf_clear_packet_filter(struct bna_rxf *rxf)
+{
+	if (rxf_clear_packet_filter_ucast(rxf))
+		return 1;
+
+	if (rxf_clear_packet_filter_mcast(rxf))
+		return 1;
+
+	/* 5. clear active default MAC in the CAM */
+	if (rxf->ucast_pending_set > 0)
+		rxf->ucast_pending_set = 0;
+
+	if (rxf_clear_packet_filter_promisc(rxf))
+		return 1;
+
+	if (rxf_clear_packet_filter_default(rxf))
+		return 1;
+
+	if (rxf_clear_packet_filter_allmulti(rxf))
+		return 1;
+
+	return 0;
+}
+
+static void
+rxf_reset_packet_filter_mcast(struct bna_rxf *rxf)
+{
+	struct list_head *qe;
+	struct bna_mac *mac;
+
+	/* 3. Move active mcast entries to pending_add_q */
+	while (!list_empty(&rxf->mcast_active_q)) {
+		bfa_q_deq(&rxf->mcast_active_q, &qe);
+		bfa_q_qe_init(qe);
+		list_add_tail(qe, &rxf->mcast_pending_add_q);
+	}
+
+	/* 4. Throw away delete pending mcast entries */
+	while (!list_empty(&rxf->mcast_pending_del_q)) {
+		bfa_q_deq(&rxf->mcast_pending_del_q, &qe);
+		bfa_q_qe_init(qe);
+		mac = (struct bna_mac *)qe;
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+	}
+}
+
+/**
+ * In the rxf fail path, throws away the ucast/mcast entries pending for
+ * deletion, moves all active ucast/mcast entries to pending queue so that
+ * they are added back to CAM in the rxf start path. Also moves the current
+ * filter configuration to pending filter configuration.
+ */
+static void
+rxf_reset_packet_filter(struct bna_rxf *rxf)
+{
+	rxf_reset_packet_filter_ucast(rxf);
+
+	rxf_reset_packet_filter_mcast(rxf);
+
+	/* 5. Turn off ucast set flag */
+	rxf->ucast_pending_set = 0;
+
+	rxf_reset_packet_filter_promisc(rxf);
+
+	rxf_reset_packet_filter_default(rxf);
+
+	rxf_reset_packet_filter_allmulti(rxf);
+}
+
+void
+bna_rxf_init(struct bna_rxf *rxf,
+		struct bna_rx *rx,
+		struct bna_rx_config *q_config)
+{
+	struct list_head *qe;
+	struct bna_rxp *rxp;
+
+	/* rxf_id is initialized during rx_mod init */
+	rxf->rx = rx;
+
+	INIT_LIST_HEAD(&rxf->ucast_pending_add_q);
+	INIT_LIST_HEAD(&rxf->ucast_pending_del_q);
+	rxf->ucast_pending_set = 0;
+	INIT_LIST_HEAD(&rxf->ucast_active_q);
+	rxf->ucast_active_mac = NULL;
+
+	INIT_LIST_HEAD(&rxf->mcast_pending_add_q);
+	INIT_LIST_HEAD(&rxf->mcast_pending_del_q);
+	INIT_LIST_HEAD(&rxf->mcast_active_q);
+
+	bfa_q_qe_init(&rxf->mbox_qe.qe);
+
+	if (q_config->vlan_strip_status == BNA_STATUS_T_ENABLED)
+		rxf->ctrl_flags |= BNA_RXF_CF_VLAN_STRIP;
+
+	rxf->rxf_oper_state = (q_config->paused) ?
+		BNA_RXF_OPER_STATE_PAUSED : BNA_RXF_OPER_STATE_RUNNING;
+
+	bna_rxf_adv_init(rxf, rx, q_config);
+
+	rxf->rit_segment = bna_rit_mod_seg_get(&rxf->rx->bna->rit_mod,
+					q_config->num_paths);
+
+	list_for_each(qe, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe;
+		if (q_config->rxp_type == BNA_RXP_SINGLE)
+			rxf->mcast_rxq_id = rxp->rxq.single.only->rxq_id;
+		else
+			rxf->mcast_rxq_id = rxp->rxq.slr.large->rxq_id;
+		break;
+	}
+
+	rxf->vlan_filter_status = BNA_STATUS_T_DISABLED;
+	memset(rxf->vlan_filter_table, 0,
+			(sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32)));
+
+	bfa_fsm_set_state(rxf, bna_rxf_sm_stopped);
+}
+
+void
+bna_rxf_uninit(struct bna_rxf *rxf)
+{
+	struct bna_mac *mac;
+
+	bna_rit_mod_seg_put(&rxf->rx->bna->rit_mod, rxf->rit_segment);
+	rxf->rit_segment = NULL;
+
+	rxf->ucast_pending_set = 0;
+
+	while (!list_empty(&rxf->ucast_pending_add_q)) {
+		bfa_q_deq(&rxf->ucast_pending_add_q, &mac);
+		bfa_q_qe_init(&mac->qe);
+		bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac);
+	}
+
+	if (rxf->ucast_active_mac) {
+		bfa_q_qe_init(&rxf->ucast_active_mac->qe);
+		bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod,
+			rxf->ucast_active_mac);
+		rxf->ucast_active_mac = NULL;
+	}
+
+	while (!list_empty(&rxf->mcast_pending_add_q)) {
+		bfa_q_deq(&rxf->mcast_pending_add_q, &mac);
+		bfa_q_qe_init(&mac->qe);
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+	}
+
+	rxf->rx = NULL;
+}
+
+void
+bna_rxf_start(struct bna_rxf *rxf)
+{
+	rxf->start_cbfn = bna_rx_cb_rxf_started;
+	rxf->start_cbarg = rxf->rx;
+	rxf->rxf_flags &= ~BNA_RXF_FL_FAILED;
+	bfa_fsm_send_event(rxf, RXF_E_START);
+}
+
+void
+bna_rxf_stop(struct bna_rxf *rxf)
+{
+	rxf->stop_cbfn = bna_rx_cb_rxf_stopped;
+	rxf->stop_cbarg = rxf->rx;
+	bfa_fsm_send_event(rxf, RXF_E_STOP);
+}
+
+void
+bna_rxf_fail(struct bna_rxf *rxf)
+{
+	rxf->rxf_flags |= BNA_RXF_FL_FAILED;
+	bfa_fsm_send_event(rxf, RXF_E_FAIL);
+}
+
+int
+bna_rxf_state_get(struct bna_rxf *rxf)
+{
+	return bfa_sm_to_state(rxf_sm_table, rxf->fsm);
+}
+
+enum bna_cb_status
+bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+
+	if (rxf->ucast_active_mac == NULL) {
+		rxf->ucast_active_mac =
+				bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod);
+		if (rxf->ucast_active_mac == NULL)
+			return BNA_CB_UCAST_CAM_FULL;
+		bfa_q_qe_init(&rxf->ucast_active_mac->qe);
+	}
+
+	memcpy(rxf->ucast_active_mac->addr, ucmac, ETH_ALEN);
+	rxf->ucast_pending_set++;
+	rxf->cam_fltr_cbfn = cbfn;
+	rxf->cam_fltr_cbarg = rx->bna->bnad;
+
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+
+	return BNA_CB_SUCCESS;
+}
+
+enum bna_cb_status
+bna_rx_mcast_add(struct bna_rx *rx, u8 *addr,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head	*qe;
+	struct bna_mac *mac;
+
+	/* Check if already added */
+	list_for_each(qe, &rxf->mcast_active_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	/* Check if pending addition */
+	list_for_each(qe, &rxf->mcast_pending_add_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod);
+	if (mac == NULL)
+		return BNA_CB_MCAST_LIST_FULL;
+	bfa_q_qe_init(&mac->qe);
+	memcpy(mac->addr, addr, ETH_ALEN);
+	list_add_tail(&mac->qe, &rxf->mcast_pending_add_q);
+
+	rxf->cam_fltr_cbfn = cbfn;
+	rxf->cam_fltr_cbarg = rx->bna->bnad;
+
+	bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+
+	return BNA_CB_SUCCESS;
+}
+
+enum bna_cb_status
+bna_rx_mcast_del(struct bna_rx *rx, u8 *addr,
+		 void (*cbfn)(struct bnad *, struct bna_rx *,
+			      enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head *qe;
+	struct bna_mac *mac;
+
+	list_for_each(qe, &rxf->mcast_pending_add_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			list_del(qe);
+			bfa_q_qe_init(qe);
+			bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+			if (cbfn)
+				(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	list_for_each(qe, &rxf->mcast_active_q) {
+		mac = (struct bna_mac *)qe;
+		if (BNA_MAC_IS_EQUAL(mac->addr, addr)) {
+			list_del(qe);
+			bfa_q_qe_init(qe);
+			list_add_tail(qe, &rxf->mcast_pending_del_q);
+			rxf->cam_fltr_cbfn = cbfn;
+			rxf->cam_fltr_cbarg = rx->bna->bnad;
+			bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+			return BNA_CB_SUCCESS;
+		}
+	}
+
+	return BNA_CB_INVALID_MAC;
+}
+
+enum bna_cb_status
+bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mclist,
+		     void (*cbfn)(struct bnad *, struct bna_rx *,
+				  enum bna_cb_status))
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	struct list_head list_head;
+	struct list_head *qe;
+	u8 *mcaddr;
+	struct bna_mac *mac;
+	struct bna_mac *mac1;
+	int skip;
+	int delete;
+	int need_hw_config = 0;
+	int i;
+
+	/* Allocate nodes */
+	INIT_LIST_HEAD(&list_head);
+	for (i = 0, mcaddr = mclist; i < count; i++) {
+		mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod);
+		if (mac == NULL)
+			goto err_return;
+		bfa_q_qe_init(&mac->qe);
+		memcpy(mac->addr, mcaddr, ETH_ALEN);
+		list_add_tail(&mac->qe, &list_head);
+
+		mcaddr += ETH_ALEN;
+	}
+
+	/* Schedule for addition */
+	while (!list_empty(&list_head)) {
+		bfa_q_deq(&list_head, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+
+		skip = 0;
+
+		/* Skip if already added */
+		list_for_each(qe, &rxf->mcast_active_q) {
+			mac1 = (struct bna_mac *)qe;
+			if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) {
+				bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod,
+							mac);
+				skip = 1;
+				break;
+			}
+		}
+
+		if (skip)
+			continue;
+
+		/* Skip if pending addition */
+		list_for_each(qe, &rxf->mcast_pending_add_q) {
+			mac1 = (struct bna_mac *)qe;
+			if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) {
+				bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod,
+							mac);
+				skip = 1;
+				break;
+			}
+		}
+
+		if (skip)
+			continue;
+
+		need_hw_config = 1;
+		list_add_tail(&mac->qe, &rxf->mcast_pending_add_q);
+	}
+
+	/**
+	 * Delete the entries that are in the pending_add_q but not
+	 * in the new list
+	 */
+	while (!list_empty(&rxf->mcast_pending_add_q)) {
+		bfa_q_deq(&rxf->mcast_pending_add_q, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) {
+			if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) {
+				delete = 0;
+				break;
+			}
+			mcaddr += ETH_ALEN;
+		}
+		if (delete)
+			bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+		else
+			list_add_tail(&mac->qe, &list_head);
+	}
+	while (!list_empty(&list_head)) {
+		bfa_q_deq(&list_head, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		list_add_tail(&mac->qe, &rxf->mcast_pending_add_q);
+	}
+
+	/**
+	 * Schedule entries for deletion that are in the active_q but not
+	 * in the new list
+	 */
+	while (!list_empty(&rxf->mcast_active_q)) {
+		bfa_q_deq(&rxf->mcast_active_q, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) {
+			if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) {
+				delete = 0;
+				break;
+			}
+			mcaddr += ETH_ALEN;
+		}
+		if (delete) {
+			list_add_tail(&mac->qe, &rxf->mcast_pending_del_q);
+			need_hw_config = 1;
+		} else {
+			list_add_tail(&mac->qe, &list_head);
+		}
+	}
+	while (!list_empty(&list_head)) {
+		bfa_q_deq(&list_head, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		list_add_tail(&mac->qe, &rxf->mcast_active_q);
+	}
+
+	if (need_hw_config) {
+		rxf->cam_fltr_cbfn = cbfn;
+		rxf->cam_fltr_cbarg = rx->bna->bnad;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	} else if (cbfn)
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+
+	return BNA_CB_SUCCESS;
+
+err_return:
+	while (!list_empty(&list_head)) {
+		bfa_q_deq(&list_head, &qe);
+		mac = (struct bna_mac *)qe;
+		bfa_q_qe_init(&mac->qe);
+		bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac);
+	}
+
+	return BNA_CB_MCAST_LIST_FULL;
+}
+
+void
+bna_rx_vlan_add(struct bna_rx *rx, int vlan_id)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	int index = (vlan_id >> 5);
+	int bit = (1 << (vlan_id & 0x1F));
+
+	rxf->vlan_filter_table[index] |= bit;
+	if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) {
+		rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	}
+}
+
+void
+bna_rx_vlan_del(struct bna_rx *rx, int vlan_id)
+{
+	struct bna_rxf *rxf = &rx->rxf;
+	int index = (vlan_id >> 5);
+	int bit = (1 << (vlan_id & 0x1F));
+
+	rxf->vlan_filter_table[index] &= ~bit;
+	if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) {
+		rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING;
+		bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD);
+	}
+}
+
+/**
+ * RX
+ */
+#define	RXQ_RCB_INIT(q, rxp, qdepth, bna, _id, unmapq_mem)	do {	\
+	struct bna_doorbell_qset *_qset;				\
+	unsigned long off;						\
+	(q)->rcb->producer_index = (q)->rcb->consumer_index = 0;	\
+	(q)->rcb->q_depth = (qdepth);					\
+	(q)->rcb->unmap_q = unmapq_mem;					\
+	(q)->rcb->rxq = (q);						\
+	(q)->rcb->cq = &(rxp)->cq;					\
+	(q)->rcb->bnad = (bna)->bnad;					\
+	_qset = (struct bna_doorbell_qset *)0;			\
+	off = (unsigned long)&_qset[(q)->rxq_id].rxq[0];		\
+	(q)->rcb->q_dbell = off +					\
+		BNA_GET_DOORBELL_BASE_ADDR((bna)->pcidev.pci_bar_kva);	\
+	(q)->rcb->id = _id;						\
+} while (0)
+
+#define	BNA_GET_RXQS(qcfg)	(((qcfg)->rxp_type == BNA_RXP_SINGLE) ?	\
+	(qcfg)->num_paths : ((qcfg)->num_paths * 2))
+
+#define	SIZE_TO_PAGES(size)	(((size) >> PAGE_SHIFT) + ((((size) &\
+	(PAGE_SIZE - 1)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT))
+
+#define	call_rx_stop_callback(rx, status)				\
+	if ((rx)->stop_cbfn) {						\
+		(*(rx)->stop_cbfn)((rx)->stop_cbarg, rx, (status));	\
+		(rx)->stop_cbfn = NULL;					\
+		(rx)->stop_cbarg = NULL;				\
+	}
+
+/*
+ * Since rx_enable is synchronous callback, there is no start_cbfn required.
+ * Instead, we'll call bnad_rx_post(rxp) so that bnad can post the buffers
+ * for each rxpath.
+ */
+
+#define	call_rx_disable_cbfn(rx, status)				\
+		if ((rx)->disable_cbfn)	{				\
+			(*(rx)->disable_cbfn)((rx)->disable_cbarg,	\
+					status);			\
+			(rx)->disable_cbfn = NULL;			\
+			(rx)->disable_cbarg = NULL;			\
+		}							\
+
+#define	rxqs_reqd(type, num_rxqs)					\
+	(((type) == BNA_RXP_SINGLE) ? (num_rxqs) : ((num_rxqs) * 2))
+
+#define rx_ib_fail(rx)						\
+do {								\
+	struct bna_rxp *rxp;					\
+	struct list_head *qe;						\
+	list_for_each(qe, &(rx)->rxp_q) {				\
+		rxp = (struct bna_rxp *)qe;			\
+		bna_ib_fail(rxp->cq.ib);			\
+	}							\
+} while (0)
+
+static void __bna_multi_rxq_stop(struct bna_rxp *, u32 *);
+static void __bna_rxq_start(struct bna_rxq *rxq);
+static void __bna_cq_start(struct bna_cq *cq);
+static void bna_rit_create(struct bna_rx *rx);
+static void bna_rx_cb_multi_rxq_stopped(void *arg, int status);
+static void bna_rx_cb_rxq_stopped_all(void *arg);
+
+bfa_fsm_state_decl(bna_rx, stopped,
+	struct bna_rx, enum bna_rx_event);
+bfa_fsm_state_decl(bna_rx, rxf_start_wait,
+	struct bna_rx, enum bna_rx_event);
+bfa_fsm_state_decl(bna_rx, started,
+	struct bna_rx, enum bna_rx_event);
+bfa_fsm_state_decl(bna_rx, rxf_stop_wait,
+	struct bna_rx, enum bna_rx_event);
+bfa_fsm_state_decl(bna_rx, rxq_stop_wait,
+	struct bna_rx, enum bna_rx_event);
+
+static struct bfa_sm_table rx_sm_table[] = {
+	{BFA_SM(bna_rx_sm_stopped), BNA_RX_STOPPED},
+	{BFA_SM(bna_rx_sm_rxf_start_wait), BNA_RX_RXF_START_WAIT},
+	{BFA_SM(bna_rx_sm_started), BNA_RX_STARTED},
+	{BFA_SM(bna_rx_sm_rxf_stop_wait), BNA_RX_RXF_STOP_WAIT},
+	{BFA_SM(bna_rx_sm_rxq_stop_wait), BNA_RX_RXQ_STOP_WAIT},
+};
+
+static void bna_rx_sm_stopped_entry(struct bna_rx *rx)
+{
+	struct bna_rxp *rxp;
+	struct list_head *qe_rxp;
+
+	list_for_each(qe_rxp, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe_rxp;
+		rx->rx_cleanup_cbfn(rx->bna->bnad, rxp->cq.ccb);
+	}
+
+	call_rx_stop_callback(rx, BNA_CB_SUCCESS);
+}
+
+static void bna_rx_sm_stopped(struct bna_rx *rx,
+				enum bna_rx_event event)
+{
+	switch (event) {
+	case RX_E_START:
+		bfa_fsm_set_state(rx, bna_rx_sm_rxf_start_wait);
+		break;
+	case RX_E_STOP:
+		call_rx_stop_callback(rx, BNA_CB_SUCCESS);
+		break;
+	case RX_E_FAIL:
+		/* no-op */
+		break;
+	default:
+		bfa_sm_fault(rx->bna, event);
+		break;
+	}
+
+}
+
+static void bna_rx_sm_rxf_start_wait_entry(struct bna_rx *rx)
+{
+	struct bna_rxp *rxp;
+	struct list_head *qe_rxp;
+	struct bna_rxq *q0 = NULL, *q1 = NULL;
+
+	/* Setup the RIT */
+	bna_rit_create(rx);
+
+	list_for_each(qe_rxp, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe_rxp;
+		bna_ib_start(rxp->cq.ib);
+		GET_RXQS(rxp, q0, q1);
+		q0->buffer_size = bna_port_mtu_get(&rx->bna->port);
+		__bna_rxq_start(q0);
+		rx->rx_post_cbfn(rx->bna->bnad, q0->rcb);
+		if (q1)  {
+			__bna_rxq_start(q1);
+			rx->rx_post_cbfn(rx->bna->bnad, q1->rcb);
+		}
+		__bna_cq_start(&rxp->cq);
+	}
+
+	bna_rxf_start(&rx->rxf);
+}
+
+static void bna_rx_sm_rxf_start_wait(struct bna_rx *rx,
+				enum bna_rx_event event)
+{
+	switch (event) {
+	case RX_E_STOP:
+		bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait);
+		break;
+	case RX_E_FAIL:
+		bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+		rx_ib_fail(rx);
+		bna_rxf_fail(&rx->rxf);
+		break;
+	case RX_E_RXF_STARTED:
+		bfa_fsm_set_state(rx, bna_rx_sm_started);
+		break;
+	default:
+		bfa_sm_fault(rx->bna, event);
+		break;
+	}
+}
+
+void
+bna_rx_sm_started_entry(struct bna_rx *rx)
+{
+	struct bna_rxp *rxp;
+	struct list_head *qe_rxp;
+
+	/* Start IB */
+	list_for_each(qe_rxp, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe_rxp;
+		bna_ib_ack(&rxp->cq.ib->door_bell, 0);
+	}
+
+	bna_llport_admin_up(&rx->bna->port.llport);
+}
+
+void
+bna_rx_sm_started(struct bna_rx *rx, enum bna_rx_event event)
+{
+	switch (event) {
+	case RX_E_FAIL:
+		bna_llport_admin_down(&rx->bna->port.llport);
+		bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+		rx_ib_fail(rx);
+		bna_rxf_fail(&rx->rxf);
+		break;
+	case RX_E_STOP:
+		bna_llport_admin_down(&rx->bna->port.llport);
+		bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait);
+		break;
+	default:
+		bfa_sm_fault(rx->bna, event);
+		break;
+	}
+}
+
+void
+bna_rx_sm_rxf_stop_wait_entry(struct bna_rx *rx)
+{
+	bna_rxf_stop(&rx->rxf);
+}
+
+void
+bna_rx_sm_rxf_stop_wait(struct bna_rx *rx, enum bna_rx_event event)
+{
+	switch (event) {
+	case RX_E_RXF_STOPPED:
+		bfa_fsm_set_state(rx, bna_rx_sm_rxq_stop_wait);
+		break;
+	case RX_E_RXF_STARTED:
+		/**
+		 * RxF was in the process of starting up when
+		 * RXF_E_STOP was issued. Ignore this event
+		 */
+		break;
+	case RX_E_FAIL:
+		bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+		rx_ib_fail(rx);
+		bna_rxf_fail(&rx->rxf);
+		break;
+	default:
+		bfa_sm_fault(rx->bna, event);
+		break;
+	}
+
+}
+
+void
+bna_rx_sm_rxq_stop_wait_entry(struct bna_rx *rx)
+{
+	struct bna_rxp *rxp = NULL;
+	struct bna_rxq *q0 = NULL;
+	struct bna_rxq *q1 = NULL;
+	struct list_head	*qe;
+	u32 rxq_mask[2] = {0, 0};
+
+	/* Only one call to multi-rxq-stop for all RXPs in this RX */
+	bfa_wc_up(&rx->rxq_stop_wc);
+	list_for_each(qe, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe;
+		GET_RXQS(rxp, q0, q1);
+		if (q0->rxq_id < 32)
+			rxq_mask[0] |= ((u32)1 << q0->rxq_id);
+		else
+			rxq_mask[1] |= ((u32)1 << (q0->rxq_id - 32));
+		if (q1) {
+			if (q1->rxq_id < 32)
+				rxq_mask[0] |= ((u32)1 << q1->rxq_id);
+			else
+				rxq_mask[1] |= ((u32)
+						1 << (q1->rxq_id - 32));
+		}
+	}
+
+	__bna_multi_rxq_stop(rxp, rxq_mask);
+}
+
+void
+bna_rx_sm_rxq_stop_wait(struct bna_rx *rx, enum bna_rx_event event)
+{
+	struct bna_rxp *rxp = NULL;
+	struct list_head	*qe;
+
+	switch (event) {
+	case RX_E_RXQ_STOPPED:
+		list_for_each(qe, &rx->rxp_q) {
+			rxp = (struct bna_rxp *)qe;
+			bna_ib_stop(rxp->cq.ib);
+		}
+		/* Fall through */
+	case RX_E_FAIL:
+		bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+		break;
+	default:
+		bfa_sm_fault(rx->bna, event);
+		break;
+	}
+}
+
+void
+__bna_multi_rxq_stop(struct bna_rxp *rxp, u32 * rxq_id_mask)
+{
+	struct bfi_ll_q_stop_req ll_req;
+
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RXQ_STOP_REQ, 0);
+	ll_req.q_id_mask[0] = htonl(rxq_id_mask[0]);
+	ll_req.q_id_mask[1] = htonl(rxq_id_mask[1]);
+	bna_mbox_qe_fill(&rxp->mbox_qe, &ll_req, sizeof(ll_req),
+		bna_rx_cb_multi_rxq_stopped, rxp);
+	bna_mbox_send(rxp->rx->bna, &rxp->mbox_qe);
+}
+
+void
+__bna_rxq_start(struct bna_rxq *rxq)
+{
+	struct bna_rxtx_q_mem *q_mem;
+	struct bna_rxq_mem rxq_cfg, *rxq_mem;
+	struct bna_dma_addr cur_q_addr;
+	/* struct bna_doorbell_qset *qset; */
+	struct bna_qpt *qpt;
+	u32 pg_num;
+	struct bna *bna = rxq->rx->bna;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	qpt = &rxq->qpt;
+	cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr));
+
+	rxq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb;
+	rxq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb;
+	rxq_cfg.cur_q_entry_lo = cur_q_addr.lsb;
+	rxq_cfg.cur_q_entry_hi = cur_q_addr.msb;
+
+	rxq_cfg.pg_cnt_n_prd_ptr = ((u32)qpt->page_count << 16) | 0x0;
+	rxq_cfg.entry_n_pg_size = ((u32)(BFI_RXQ_WI_SIZE >> 2) << 16) |
+		(qpt->page_size >> 2);
+	rxq_cfg.sg_n_cq_n_cns_ptr =
+		((u32)(rxq->rxp->cq.cq_id & 0xff) << 16) | 0x0;
+	rxq_cfg.buf_sz_n_q_state = ((u32)rxq->buffer_size << 16) |
+		BNA_Q_IDLE_STATE;
+	rxq_cfg.next_qid = 0x0 | (0x3 << 8);
+
+	/* Write the page number register */
+	pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num,
+			HQM_RXTX_Q_RAM_BASE_OFFSET);
+	writel(pg_num, bna->regs.page_addr);
+
+	/* Write to h/w */
+	base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva,
+					HQM_RXTX_Q_RAM_BASE_OFFSET);
+
+	q_mem = (struct bna_rxtx_q_mem *)0;
+	rxq_mem = &q_mem[rxq->rxq_id].rxq;
+
+	off = (unsigned long)&rxq_mem->pg_tbl_addr_lo;
+	writel(htonl(rxq_cfg.pg_tbl_addr_lo), base_addr + off);
+
+	off = (unsigned long)&rxq_mem->pg_tbl_addr_hi;
+	writel(htonl(rxq_cfg.pg_tbl_addr_hi), base_addr + off);
+
+	off = (unsigned long)&rxq_mem->cur_q_entry_lo;
+	writel(htonl(rxq_cfg.cur_q_entry_lo), base_addr + off);
+
+	off = (unsigned long)&rxq_mem->cur_q_entry_hi;
+	writel(htonl(rxq_cfg.cur_q_entry_hi), base_addr + off);
+
+	off = (unsigned long)&rxq_mem->pg_cnt_n_prd_ptr;
+	writel(rxq_cfg.pg_cnt_n_prd_ptr, base_addr + off);
+
+	off = (unsigned long)&rxq_mem->entry_n_pg_size;
+	writel(rxq_cfg.entry_n_pg_size, base_addr + off);
+
+	off = (unsigned long)&rxq_mem->sg_n_cq_n_cns_ptr;
+	writel(rxq_cfg.sg_n_cq_n_cns_ptr, base_addr + off);
+
+	off = (unsigned long)&rxq_mem->buf_sz_n_q_state;
+	writel(rxq_cfg.buf_sz_n_q_state, base_addr + off);
+
+	off = (unsigned long)&rxq_mem->next_qid;
+	writel(rxq_cfg.next_qid, base_addr + off);
+
+	rxq->rcb->producer_index = 0;
+	rxq->rcb->consumer_index = 0;
+}
+
+void
+__bna_cq_start(struct bna_cq *cq)
+{
+	struct bna_cq_mem cq_cfg, *cq_mem;
+	const struct bna_qpt *qpt;
+	struct bna_dma_addr cur_q_addr;
+	u32 pg_num;
+	struct bna *bna = cq->rx->bna;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	qpt = &cq->qpt;
+	cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr));
+
+	/*
+	 * Fill out structure, to be subsequently written
+	 * to hardware
+	 */
+	cq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb;
+	cq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb;
+	cq_cfg.cur_q_entry_lo = cur_q_addr.lsb;
+	cq_cfg.cur_q_entry_hi = cur_q_addr.msb;
+
+	cq_cfg.pg_cnt_n_prd_ptr = (qpt->page_count << 16) | 0x0;
+	cq_cfg.entry_n_pg_size =
+		((u32)(BFI_CQ_WI_SIZE >> 2) << 16) | (qpt->page_size >> 2);
+	cq_cfg.int_blk_n_cns_ptr = ((((u32)cq->ib_seg_offset) << 24) |
+			((u32)(cq->ib->ib_id & 0xff)  << 16) | 0x0);
+	cq_cfg.q_state = BNA_Q_IDLE_STATE;
+
+	/* Write the page number register */
+	pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num,
+				  HQM_CQ_RAM_BASE_OFFSET);
+
+	writel(pg_num, bna->regs.page_addr);
+
+	/* H/W write */
+	base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva,
+					HQM_CQ_RAM_BASE_OFFSET);
+
+	cq_mem = (struct bna_cq_mem *)0;
+
+	off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_lo;
+	writel(htonl(cq_cfg.pg_tbl_addr_lo), base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_hi;
+	writel(htonl(cq_cfg.pg_tbl_addr_hi), base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_lo;
+	writel(htonl(cq_cfg.cur_q_entry_lo), base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_hi;
+	writel(htonl(cq_cfg.cur_q_entry_hi), base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].pg_cnt_n_prd_ptr;
+	writel(cq_cfg.pg_cnt_n_prd_ptr, base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].entry_n_pg_size;
+	writel(cq_cfg.entry_n_pg_size, base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].int_blk_n_cns_ptr;
+	writel(cq_cfg.int_blk_n_cns_ptr, base_addr + off);
+
+	off = (unsigned long)&cq_mem[cq->cq_id].q_state;
+	writel(cq_cfg.q_state, base_addr + off);
+
+	cq->ccb->producer_index = 0;
+	*(cq->ccb->hw_producer_index) = 0;
+}
+
+void
+bna_rit_create(struct bna_rx *rx)
+{
+	struct list_head	*qe_rxp;
+	struct bna *bna;
+	struct bna_rxp *rxp;
+	struct bna_rxq *q0 = NULL;
+	struct bna_rxq *q1 = NULL;
+	int offset;
+
+	bna = rx->bna;
+
+	offset = 0;
+	list_for_each(qe_rxp, &rx->rxp_q) {
+		rxp = (struct bna_rxp *)qe_rxp;
+		GET_RXQS(rxp, q0, q1);
+		rx->rxf.rit_segment->rit[offset].large_rxq_id = q0->rxq_id;
+		rx->rxf.rit_segment->rit[offset].small_rxq_id =
+						(q1 ? q1->rxq_id : 0);
+		offset++;
+	}
+}
+
+int
+_rx_can_satisfy(struct bna_rx_mod *rx_mod,
+		struct bna_rx_config *rx_cfg)
+{
+	if ((rx_mod->rx_free_count == 0) ||
+		(rx_mod->rxp_free_count == 0) ||
+		(rx_mod->rxq_free_count == 0))
+		return 0;
+
+	if (rx_cfg->rxp_type == BNA_RXP_SINGLE) {
+		if ((rx_mod->rxp_free_count < rx_cfg->num_paths) ||
+			(rx_mod->rxq_free_count < rx_cfg->num_paths))
+				return 0;
+	} else {
+		if ((rx_mod->rxp_free_count < rx_cfg->num_paths) ||
+			(rx_mod->rxq_free_count < (2 * rx_cfg->num_paths)))
+			return 0;
+	}
+
+	if (!bna_rit_mod_can_satisfy(&rx_mod->bna->rit_mod, rx_cfg->num_paths))
+		return 0;
+
+	return 1;
+}
+
+struct bna_rxq *
+_get_free_rxq(struct bna_rx_mod *rx_mod)
+{
+	struct bna_rxq *rxq = NULL;
+	struct list_head	*qe = NULL;
+
+	bfa_q_deq(&rx_mod->rxq_free_q, &qe);
+	if (qe) {
+		rx_mod->rxq_free_count--;
+		rxq = (struct bna_rxq *)qe;
+	}
+	return rxq;
+}
+
+void
+_put_free_rxq(struct bna_rx_mod *rx_mod, struct bna_rxq *rxq)
+{
+	bfa_q_qe_init(&rxq->qe);
+	list_add_tail(&rxq->qe, &rx_mod->rxq_free_q);
+	rx_mod->rxq_free_count++;
+}
+
+struct bna_rxp *
+_get_free_rxp(struct bna_rx_mod *rx_mod)
+{
+	struct list_head	*qe = NULL;
+	struct bna_rxp *rxp = NULL;
+
+	bfa_q_deq(&rx_mod->rxp_free_q, &qe);
+	if (qe) {
+		rx_mod->rxp_free_count--;
+
+		rxp = (struct bna_rxp *)qe;
+	}
+
+	return rxp;
+}
+
+void
+_put_free_rxp(struct bna_rx_mod *rx_mod, struct bna_rxp *rxp)
+{
+	bfa_q_qe_init(&rxp->qe);
+	list_add_tail(&rxp->qe, &rx_mod->rxp_free_q);
+	rx_mod->rxp_free_count++;
+}
+
+struct bna_rx *
+_get_free_rx(struct bna_rx_mod *rx_mod)
+{
+	struct list_head	*qe = NULL;
+	struct bna_rx *rx = NULL;
+
+	bfa_q_deq(&rx_mod->rx_free_q, &qe);
+	if (qe) {
+		rx_mod->rx_free_count--;
+
+		rx = (struct bna_rx *)qe;
+		bfa_q_qe_init(qe);
+		list_add_tail(&rx->qe, &rx_mod->rx_active_q);
+	}
+
+	return rx;
+}
+
+void
+_put_free_rx(struct bna_rx_mod *rx_mod, struct bna_rx *rx)
+{
+	bfa_q_qe_init(&rx->qe);
+	list_add_tail(&rx->qe, &rx_mod->rx_free_q);
+	rx_mod->rx_free_count++;
+}
+
+void
+_rx_init(struct bna_rx *rx, struct bna *bna)
+{
+	rx->bna = bna;
+	rx->rx_flags = 0;
+
+	INIT_LIST_HEAD(&rx->rxp_q);
+
+	rx->rxq_stop_wc.wc_resume = bna_rx_cb_rxq_stopped_all;
+	rx->rxq_stop_wc.wc_cbarg = rx;
+	rx->rxq_stop_wc.wc_count = 0;
+
+	rx->stop_cbfn = NULL;
+	rx->stop_cbarg = NULL;
+}
+
+void
+_rxp_add_rxqs(struct bna_rxp *rxp,
+		struct bna_rxq *q0,
+		struct bna_rxq *q1)
+{
+	switch (rxp->type) {
+	case BNA_RXP_SINGLE:
+		rxp->rxq.single.only = q0;
+		rxp->rxq.single.reserved = NULL;
+		break;
+	case BNA_RXP_SLR:
+		rxp->rxq.slr.large = q0;
+		rxp->rxq.slr.small = q1;
+		break;
+	case BNA_RXP_HDS:
+		rxp->rxq.hds.data = q0;
+		rxp->rxq.hds.hdr = q1;
+		break;
+	default:
+		break;
+	}
+}
+
+void
+_rxq_qpt_init(struct bna_rxq *rxq,
+		struct bna_rxp *rxp,
+		u32 page_count,
+		u32 page_size,
+		struct bna_mem_descr *qpt_mem,
+		struct bna_mem_descr *swqpt_mem,
+		struct bna_mem_descr *page_mem)
+{
+	int	i;
+
+	rxq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
+	rxq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb;
+	rxq->qpt.kv_qpt_ptr = qpt_mem->kva;
+	rxq->qpt.page_count = page_count;
+	rxq->qpt.page_size = page_size;
+
+	rxq->rcb->sw_qpt = (void **) swqpt_mem->kva;
+
+	for (i = 0; i < rxq->qpt.page_count; i++) {
+		rxq->rcb->sw_qpt[i] = page_mem[i].kva;
+		((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].lsb =
+			page_mem[i].dma.lsb;
+		((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].msb =
+			page_mem[i].dma.msb;
+
+	}
+}
+
+void
+_rxp_cqpt_setup(struct bna_rxp *rxp,
+		u32 page_count,
+		u32 page_size,
+		struct bna_mem_descr *qpt_mem,
+		struct bna_mem_descr *swqpt_mem,
+		struct bna_mem_descr *page_mem)
+{
+	int	i;
+
+	rxp->cq.qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
+	rxp->cq.qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb;
+	rxp->cq.qpt.kv_qpt_ptr = qpt_mem->kva;
+	rxp->cq.qpt.page_count = page_count;
+	rxp->cq.qpt.page_size = page_size;
+
+	rxp->cq.ccb->sw_qpt = (void **) swqpt_mem->kva;
+
+	for (i = 0; i < rxp->cq.qpt.page_count; i++) {
+		rxp->cq.ccb->sw_qpt[i] = page_mem[i].kva;
+
+		((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].lsb =
+			page_mem[i].dma.lsb;
+		((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].msb =
+			page_mem[i].dma.msb;
+
+	}
+}
+
+void
+_rx_add_rxp(struct bna_rx *rx, struct bna_rxp *rxp)
+{
+	list_add_tail(&rxp->qe, &rx->rxp_q);
+}
+
+void
+_init_rxmod_queues(struct bna_rx_mod *rx_mod)
+{
+	INIT_LIST_HEAD(&rx_mod->rx_free_q);
+	INIT_LIST_HEAD(&rx_mod->rxq_free_q);
+	INIT_LIST_HEAD(&rx_mod->rxp_free_q);
+	INIT_LIST_HEAD(&rx_mod->rx_active_q);
+
+	rx_mod->rx_free_count = 0;
+	rx_mod->rxq_free_count = 0;
+	rx_mod->rxp_free_count = 0;
+}
+
+void
+_rx_ctor(struct bna_rx *rx, int id)
+{
+	bfa_q_qe_init(&rx->qe);
+	INIT_LIST_HEAD(&rx->rxp_q);
+	rx->bna = NULL;
+
+	rx->rxf.rxf_id = id;
+
+	/* FIXME: mbox_qe ctor()?? */
+	bfa_q_qe_init(&rx->mbox_qe.qe);
+
+	rx->stop_cbfn = NULL;
+	rx->stop_cbarg = NULL;
+}
+
+void
+bna_rx_cb_multi_rxq_stopped(void *arg, int status)
+{
+	struct bna_rxp *rxp = (struct bna_rxp *)arg;
+
+	bfa_wc_down(&rxp->rx->rxq_stop_wc);
+}
+
+void
+bna_rx_cb_rxq_stopped_all(void *arg)
+{
+	struct bna_rx *rx = (struct bna_rx *)arg;
+
+	bfa_fsm_send_event(rx, RX_E_RXQ_STOPPED);
+}
+
+void
+bna_rx_mod_cb_rx_stopped(void *arg, struct bna_rx *rx,
+			 enum bna_cb_status status)
+{
+	struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg;
+
+	bfa_wc_down(&rx_mod->rx_stop_wc);
+}
+
+void
+bna_rx_mod_cb_rx_stopped_all(void *arg)
+{
+	struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg;
+
+	if (rx_mod->stop_cbfn)
+		rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS);
+	rx_mod->stop_cbfn = NULL;
+}
+
+void
+bna_rx_start(struct bna_rx *rx)
+{
+	rx->rx_flags |= BNA_RX_F_PORT_ENABLED;
+	if (rx->rx_flags & BNA_RX_F_ENABLE)
+		bfa_fsm_send_event(rx, RX_E_START);
+}
+
+void
+bna_rx_stop(struct bna_rx *rx)
+{
+	rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED;
+	if (rx->fsm == (bfa_fsm_t) bna_rx_sm_stopped)
+		bna_rx_mod_cb_rx_stopped(&rx->bna->rx_mod, rx, BNA_CB_SUCCESS);
+	else {
+		rx->stop_cbfn = bna_rx_mod_cb_rx_stopped;
+		rx->stop_cbarg = &rx->bna->rx_mod;
+		bfa_fsm_send_event(rx, RX_E_STOP);
+	}
+}
+
+void
+bna_rx_fail(struct bna_rx *rx)
+{
+	/* Indicate port is not enabled, and failed */
+	rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED;
+	rx->rx_flags |= BNA_RX_F_PORT_FAILED;
+	bfa_fsm_send_event(rx, RX_E_FAIL);
+}
+
+void
+bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status status)
+{
+	bfa_fsm_send_event(rx, RX_E_RXF_STARTED);
+	if (rx->rxf.rxf_id < 32)
+		rx->bna->rx_mod.rxf_bmap[0] |= ((u32)1 << rx->rxf.rxf_id);
+	else
+		rx->bna->rx_mod.rxf_bmap[1] |= ((u32)
+				1 << (rx->rxf.rxf_id - 32));
+}
+
+void
+bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status status)
+{
+	bfa_fsm_send_event(rx, RX_E_RXF_STOPPED);
+	if (rx->rxf.rxf_id < 32)
+		rx->bna->rx_mod.rxf_bmap[0] &= ~(u32)1 << rx->rxf.rxf_id;
+	else
+		rx->bna->rx_mod.rxf_bmap[1] &= ~(u32)
+				1 << (rx->rxf.rxf_id - 32);
+}
+
+void
+bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type)
+{
+	struct bna_rx *rx;
+	struct list_head *qe;
+
+	rx_mod->flags |= BNA_RX_MOD_F_PORT_STARTED;
+	if (type == BNA_RX_T_LOOPBACK)
+		rx_mod->flags |= BNA_RX_MOD_F_PORT_LOOPBACK;
+
+	list_for_each(qe, &rx_mod->rx_active_q) {
+		rx = (struct bna_rx *)qe;
+		if (rx->type == type)
+			bna_rx_start(rx);
+	}
+}
+
+void
+bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type)
+{
+	struct bna_rx *rx;
+	struct list_head *qe;
+
+	rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED;
+	rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK;
+
+	rx_mod->stop_cbfn = bna_port_cb_rx_stopped;
+
+	/**
+	 * Before calling bna_rx_stop(), increment rx_stop_wc as many times
+	 * as we are going to call bna_rx_stop
+	 */
+	list_for_each(qe, &rx_mod->rx_active_q) {
+		rx = (struct bna_rx *)qe;
+		if (rx->type == type)
+			bfa_wc_up(&rx_mod->rx_stop_wc);
+	}
+
+	if (rx_mod->rx_stop_wc.wc_count == 0) {
+		rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS);
+		rx_mod->stop_cbfn = NULL;
+		return;
+	}
+
+	list_for_each(qe, &rx_mod->rx_active_q) {
+		rx = (struct bna_rx *)qe;
+		if (rx->type == type)
+			bna_rx_stop(rx);
+	}
+}
+
+void
+bna_rx_mod_fail(struct bna_rx_mod *rx_mod)
+{
+	struct bna_rx *rx;
+	struct list_head *qe;
+
+	rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED;
+	rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK;
+
+	list_for_each(qe, &rx_mod->rx_active_q) {
+		rx = (struct bna_rx *)qe;
+		bna_rx_fail(rx);
+	}
+}
+
+void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna,
+			struct bna_res_info *res_info)
+{
+	int	index;
+	struct bna_rx *rx_ptr;
+	struct bna_rxp *rxp_ptr;
+	struct bna_rxq *rxq_ptr;
+
+	rx_mod->bna = bna;
+	rx_mod->flags = 0;
+
+	rx_mod->rx = (struct bna_rx *)
+		res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mdl[0].kva;
+	rx_mod->rxp = (struct bna_rxp *)
+		res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mdl[0].kva;
+	rx_mod->rxq = (struct bna_rxq *)
+		res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mdl[0].kva;
+
+	/* Initialize the queues */
+	_init_rxmod_queues(rx_mod);
+
+	/* Build RX queues */
+	for (index = 0; index < BFI_MAX_RXQ; index++) {
+		rx_ptr = &rx_mod->rx[index];
+		_rx_ctor(rx_ptr, index);
+		list_add_tail(&rx_ptr->qe, &rx_mod->rx_free_q);
+		rx_mod->rx_free_count++;
+	}
+
+	/* build RX-path queue */
+	for (index = 0; index < BFI_MAX_RXQ; index++) {
+		rxp_ptr = &rx_mod->rxp[index];
+		rxp_ptr->cq.cq_id = index;
+		bfa_q_qe_init(&rxp_ptr->qe);
+		list_add_tail(&rxp_ptr->qe, &rx_mod->rxp_free_q);
+		rx_mod->rxp_free_count++;
+	}
+
+	/* build RXQ queue */
+	for (index = 0; index < BFI_MAX_RXQ; index++) {
+		rxq_ptr = &rx_mod->rxq[index];
+		rxq_ptr->rxq_id = index;
+
+		bfa_q_qe_init(&rxq_ptr->qe);
+		list_add_tail(&rxq_ptr->qe, &rx_mod->rxq_free_q);
+		rx_mod->rxq_free_count++;
+	}
+
+	rx_mod->rx_stop_wc.wc_resume = bna_rx_mod_cb_rx_stopped_all;
+	rx_mod->rx_stop_wc.wc_cbarg = rx_mod;
+	rx_mod->rx_stop_wc.wc_count = 0;
+}
+
+void
+bna_rx_mod_uninit(struct bna_rx_mod *rx_mod)
+{
+	struct list_head		*qe;
+	int i;
+
+	i = 0;
+	list_for_each(qe, &rx_mod->rx_free_q)
+		i++;
+
+	i = 0;
+	list_for_each(qe, &rx_mod->rxp_free_q)
+		i++;
+
+	i = 0;
+	list_for_each(qe, &rx_mod->rxq_free_q)
+		i++;
+
+	rx_mod->bna = NULL;
+}
+
+int
+bna_rx_state_get(struct bna_rx *rx)
+{
+	return bfa_sm_to_state(rx_sm_table, rx->fsm);
+}
+
+void
+bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info)
+{
+	u32 cq_size, hq_size, dq_size;
+	u32 cpage_count, hpage_count, dpage_count;
+	struct bna_mem_info *mem_info;
+	u32 cq_depth;
+	u32 hq_depth;
+	u32 dq_depth;
+
+	dq_depth = q_cfg->q_depth;
+	hq_depth = ((q_cfg->rxp_type == BNA_RXP_SINGLE) ? 0 : q_cfg->q_depth);
+	cq_depth = dq_depth + hq_depth;
+
+	BNA_TO_POWER_OF_2_HIGH(cq_depth);
+	cq_size = cq_depth * BFI_CQ_WI_SIZE;
+	cq_size = ALIGN(cq_size, PAGE_SIZE);
+	cpage_count = SIZE_TO_PAGES(cq_size);
+
+	BNA_TO_POWER_OF_2_HIGH(dq_depth);
+	dq_size = dq_depth * BFI_RXQ_WI_SIZE;
+	dq_size = ALIGN(dq_size, PAGE_SIZE);
+	dpage_count = SIZE_TO_PAGES(dq_size);
+
+	if (BNA_RXP_SINGLE != q_cfg->rxp_type) {
+		BNA_TO_POWER_OF_2_HIGH(hq_depth);
+		hq_size = hq_depth * BFI_RXQ_WI_SIZE;
+		hq_size = ALIGN(hq_size, PAGE_SIZE);
+		hpage_count = SIZE_TO_PAGES(hq_size);
+	} else {
+		hpage_count = 0;
+	}
+
+	/* CCB structures */
+	res_info[BNA_RX_RES_MEM_T_CCB].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = sizeof(struct bna_ccb);
+	mem_info->num = q_cfg->num_paths;
+
+	/* RCB structures */
+	res_info[BNA_RX_RES_MEM_T_RCB].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = sizeof(struct bna_rcb);
+	mem_info->num = BNA_GET_RXQS(q_cfg);
+
+	/* Completion QPT */
+	res_info[BNA_RX_RES_MEM_T_CQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = cpage_count * sizeof(struct bna_dma_addr);
+	mem_info->num = q_cfg->num_paths;
+
+	/* Completion s/w QPT */
+	res_info[BNA_RX_RES_MEM_T_CSWQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = cpage_count * sizeof(void *);
+	mem_info->num = q_cfg->num_paths;
+
+	/* Completion QPT pages */
+	res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = PAGE_SIZE;
+	mem_info->num = cpage_count * q_cfg->num_paths;
+
+	/* Data QPTs */
+	res_info[BNA_RX_RES_MEM_T_DQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = dpage_count * sizeof(struct bna_dma_addr);
+	mem_info->num = q_cfg->num_paths;
+
+	/* Data s/w QPTs */
+	res_info[BNA_RX_RES_MEM_T_DSWQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = dpage_count * sizeof(void *);
+	mem_info->num = q_cfg->num_paths;
+
+	/* Data QPT pages */
+	res_info[BNA_RX_RES_MEM_T_DPAGE].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = PAGE_SIZE;
+	mem_info->num = dpage_count * q_cfg->num_paths;
+
+	/* Hdr QPTs */
+	res_info[BNA_RX_RES_MEM_T_HQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = hpage_count * sizeof(struct bna_dma_addr);
+	mem_info->num = (hpage_count ? q_cfg->num_paths : 0);
+
+	/* Hdr s/w QPTs */
+	res_info[BNA_RX_RES_MEM_T_HSWQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = hpage_count * sizeof(void *);
+	mem_info->num = (hpage_count ? q_cfg->num_paths : 0);
+
+	/* Hdr QPT pages */
+	res_info[BNA_RX_RES_MEM_T_HPAGE].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = (hpage_count ? PAGE_SIZE : 0);
+	mem_info->num = (hpage_count ? (hpage_count * q_cfg->num_paths) : 0);
+
+	/* RX Interrupts */
+	res_info[BNA_RX_RES_T_INTR].res_type = BNA_RES_T_INTR;
+	res_info[BNA_RX_RES_T_INTR].res_u.intr_info.intr_type = BNA_INTR_T_MSIX;
+	res_info[BNA_RX_RES_T_INTR].res_u.intr_info.num = q_cfg->num_paths;
+}
+
+struct bna_rx *
+bna_rx_create(struct bna *bna, struct bnad *bnad,
+		struct bna_rx_config *rx_cfg,
+		struct bna_rx_event_cbfn *rx_cbfn,
+		struct bna_res_info *res_info,
+		void *priv)
+{
+	struct bna_rx_mod *rx_mod = &bna->rx_mod;
+	struct bna_rx *rx;
+	struct bna_rxp *rxp;
+	struct bna_rxq *q0;
+	struct bna_rxq *q1;
+	struct bna_intr_info *intr_info;
+	u32 page_count;
+	struct bna_mem_descr *ccb_mem;
+	struct bna_mem_descr *rcb_mem;
+	struct bna_mem_descr *unmapq_mem;
+	struct bna_mem_descr *cqpt_mem;
+	struct bna_mem_descr *cswqpt_mem;
+	struct bna_mem_descr *cpage_mem;
+	struct bna_mem_descr *hqpt_mem;	/* Header/Small Q qpt */
+	struct bna_mem_descr *dqpt_mem;	/* Data/Large Q qpt */
+	struct bna_mem_descr *hsqpt_mem;	/* s/w qpt for hdr */
+	struct bna_mem_descr *dsqpt_mem;	/* s/w qpt for data */
+	struct bna_mem_descr *hpage_mem;	/* hdr page mem */
+	struct bna_mem_descr *dpage_mem;	/* data page mem */
+	int i, cpage_idx = 0, dpage_idx = 0, hpage_idx = 0, ret;
+	int dpage_count, hpage_count, rcb_idx;
+	struct bna_ib_config ibcfg;
+	/* Fail if we don't have enough RXPs, RXQs */
+	if (!_rx_can_satisfy(rx_mod, rx_cfg))
+		return NULL;
+
+	/* Initialize resource pointers */
+	intr_info = &res_info[BNA_RX_RES_T_INTR].res_u.intr_info;
+	ccb_mem = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info.mdl[0];
+	rcb_mem = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info.mdl[0];
+	unmapq_mem = &res_info[BNA_RX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[0];
+	cqpt_mem = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info.mdl[0];
+	cswqpt_mem = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info.mdl[0];
+	cpage_mem = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.mdl[0];
+	hqpt_mem = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info.mdl[0];
+	dqpt_mem = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info.mdl[0];
+	hsqpt_mem = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info.mdl[0];
+	dsqpt_mem = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info.mdl[0];
+	hpage_mem = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.mdl[0];
+	dpage_mem = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.mdl[0];
+
+	/* Compute q depth & page count */
+	page_count = res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.num /
+			rx_cfg->num_paths;
+
+	dpage_count = res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.num /
+			rx_cfg->num_paths;
+
+	hpage_count = res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.num /
+			rx_cfg->num_paths;
+	/* Get RX pointer */
+	rx = _get_free_rx(rx_mod);
+	_rx_init(rx, bna);
+	rx->priv = priv;
+	rx->type = rx_cfg->rx_type;
+
+	rx->rcb_setup_cbfn = rx_cbfn->rcb_setup_cbfn;
+	rx->rcb_destroy_cbfn = rx_cbfn->rcb_destroy_cbfn;
+	rx->ccb_setup_cbfn = rx_cbfn->ccb_setup_cbfn;
+	rx->ccb_destroy_cbfn = rx_cbfn->ccb_destroy_cbfn;
+	/* Following callbacks are mandatory */
+	rx->rx_cleanup_cbfn = rx_cbfn->rx_cleanup_cbfn;
+	rx->rx_post_cbfn = rx_cbfn->rx_post_cbfn;
+
+	if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_STARTED) {
+		switch (rx->type) {
+		case BNA_RX_T_REGULAR:
+			if (!(rx->bna->rx_mod.flags &
+				BNA_RX_MOD_F_PORT_LOOPBACK))
+				rx->rx_flags |= BNA_RX_F_PORT_ENABLED;
+			break;
+		case BNA_RX_T_LOOPBACK:
+			if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_LOOPBACK)
+				rx->rx_flags |= BNA_RX_F_PORT_ENABLED;
+			break;
+		}
+	}
+
+	for (i = 0, rcb_idx = 0; i < rx_cfg->num_paths; i++) {
+		rxp = _get_free_rxp(rx_mod);
+		rxp->type = rx_cfg->rxp_type;
+		rxp->rx = rx;
+		rxp->cq.rx = rx;
+
+		/* Get required RXQs, and queue them to rx-path */
+		q0 = _get_free_rxq(rx_mod);
+		if (BNA_RXP_SINGLE == rx_cfg->rxp_type)
+			q1 = NULL;
+		else
+			q1 = _get_free_rxq(rx_mod);
+
+		/* Initialize IB */
+		if (1 == intr_info->num) {
+			rxp->cq.ib = bna_ib_get(&bna->ib_mod,
+					intr_info->intr_type,
+					intr_info->idl[0].vector);
+			rxp->vector = intr_info->idl[0].vector;
+		} else {
+			rxp->cq.ib = bna_ib_get(&bna->ib_mod,
+					intr_info->intr_type,
+					intr_info->idl[i].vector);
+
+			/* Map the MSI-x vector used for this RXP */
+			rxp->vector = intr_info->idl[i].vector;
+		}
+
+		rxp->cq.ib_seg_offset = bna_ib_reserve_idx(rxp->cq.ib);
+
+		ibcfg.coalescing_timeo = BFI_RX_COALESCING_TIMEO;
+		ibcfg.interpkt_count = BFI_RX_INTERPKT_COUNT;
+		ibcfg.interpkt_timeo = BFI_RX_INTERPKT_TIMEO;
+		ibcfg.ctrl_flags = BFI_IB_CF_INT_ENABLE;
+
+		ret = bna_ib_config(rxp->cq.ib, &ibcfg);
+
+		/* Link rxqs to rxp */
+		_rxp_add_rxqs(rxp, q0, q1);
+
+		/* Link rxp to rx */
+		_rx_add_rxp(rx, rxp);
+
+		q0->rx = rx;
+		q0->rxp = rxp;
+
+		/* Initialize RCB for the large / data q */
+		q0->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva;
+		RXQ_RCB_INIT(q0, rxp, rx_cfg->q_depth, bna, 0,
+			(void *)unmapq_mem[rcb_idx].kva);
+		rcb_idx++;
+		(q0)->rx_packets = (q0)->rx_bytes = 0;
+		(q0)->rx_packets_with_error = (q0)->rxbuf_alloc_failed = 0;
+
+		/* Initialize RXQs */
+		_rxq_qpt_init(q0, rxp, dpage_count, PAGE_SIZE,
+			&dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[dpage_idx]);
+		q0->rcb->page_idx = dpage_idx;
+		q0->rcb->page_count = dpage_count;
+		dpage_idx += dpage_count;
+
+		/* Call bnad to complete rcb setup */
+		if (rx->rcb_setup_cbfn)
+			rx->rcb_setup_cbfn(bnad, q0->rcb);
+
+		if (q1) {
+			q1->rx = rx;
+			q1->rxp = rxp;
+
+			q1->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva;
+			RXQ_RCB_INIT(q1, rxp, rx_cfg->q_depth, bna, 1,
+				(void *)unmapq_mem[rcb_idx].kva);
+			rcb_idx++;
+			(q1)->buffer_size = (rx_cfg)->small_buff_size;
+			(q1)->rx_packets = (q1)->rx_bytes = 0;
+			(q1)->rx_packets_with_error =
+				(q1)->rxbuf_alloc_failed = 0;
+
+			_rxq_qpt_init(q1, rxp, hpage_count, PAGE_SIZE,
+				&hqpt_mem[i], &hsqpt_mem[i],
+				&hpage_mem[hpage_idx]);
+			q1->rcb->page_idx = hpage_idx;
+			q1->rcb->page_count = hpage_count;
+			hpage_idx += hpage_count;
+
+			/* Call bnad to complete rcb setup */
+			if (rx->rcb_setup_cbfn)
+				rx->rcb_setup_cbfn(bnad, q1->rcb);
+		}
+		/* Setup RXP::CQ */
+		rxp->cq.ccb = (struct bna_ccb *) ccb_mem[i].kva;
+		_rxp_cqpt_setup(rxp, page_count, PAGE_SIZE,
+			&cqpt_mem[i], &cswqpt_mem[i], &cpage_mem[cpage_idx]);
+		rxp->cq.ccb->page_idx = cpage_idx;
+		rxp->cq.ccb->page_count = page_count;
+		cpage_idx += page_count;
+
+		rxp->cq.ccb->pkt_rate.small_pkt_cnt = 0;
+		rxp->cq.ccb->pkt_rate.large_pkt_cnt = 0;
+
+		rxp->cq.ccb->producer_index = 0;
+		rxp->cq.ccb->q_depth =	rx_cfg->q_depth +
+					((rx_cfg->rxp_type == BNA_RXP_SINGLE) ?
+					0 : rx_cfg->q_depth);
+		rxp->cq.ccb->i_dbell = &rxp->cq.ib->door_bell;
+		rxp->cq.ccb->rcb[0] = q0->rcb;
+		if (q1)
+			rxp->cq.ccb->rcb[1] = q1->rcb;
+		rxp->cq.ccb->cq = &rxp->cq;
+		rxp->cq.ccb->bnad = bna->bnad;
+		rxp->cq.ccb->hw_producer_index =
+			((volatile u32 *)rxp->cq.ib->ib_seg_host_addr_kva +
+				      (rxp->cq.ib_seg_offset * BFI_IBIDX_SIZE));
+		*(rxp->cq.ccb->hw_producer_index) = 0;
+		rxp->cq.ccb->intr_type = intr_info->intr_type;
+		rxp->cq.ccb->intr_vector = (intr_info->num == 1) ?
+						intr_info->idl[0].vector :
+						intr_info->idl[i].vector;
+		rxp->cq.ccb->rx_coalescing_timeo =
+					rxp->cq.ib->ib_config.coalescing_timeo;
+		rxp->cq.ccb->id = i;
+
+		/* Call bnad to complete CCB setup */
+		if (rx->ccb_setup_cbfn)
+			rx->ccb_setup_cbfn(bnad, rxp->cq.ccb);
+
+	} /* for each rx-path */
+
+	bna_rxf_init(&rx->rxf, rx, rx_cfg);
+
+	bfa_fsm_set_state(rx, bna_rx_sm_stopped);
+
+	return rx;
+}
+
+void
+bna_rx_destroy(struct bna_rx *rx)
+{
+	struct bna_rx_mod *rx_mod = &rx->bna->rx_mod;
+	struct bna_ib_mod *ib_mod = &rx->bna->ib_mod;
+	struct bna_rxq *q0 = NULL;
+	struct bna_rxq *q1 = NULL;
+	struct bna_rxp *rxp;
+	struct list_head *qe;
+
+	bna_rxf_uninit(&rx->rxf);
+
+	while (!list_empty(&rx->rxp_q)) {
+		bfa_q_deq(&rx->rxp_q, &rxp);
+		GET_RXQS(rxp, q0, q1);
+		/* Callback to bnad for destroying RCB */
+		if (rx->rcb_destroy_cbfn)
+			rx->rcb_destroy_cbfn(rx->bna->bnad, q0->rcb);
+		q0->rcb = NULL;
+		q0->rxp = NULL;
+		q0->rx = NULL;
+		_put_free_rxq(rx_mod, q0);
+		if (q1) {
+			/* Callback to bnad for destroying RCB */
+			if (rx->rcb_destroy_cbfn)
+				rx->rcb_destroy_cbfn(rx->bna->bnad, q1->rcb);
+			q1->rcb = NULL;
+			q1->rxp = NULL;
+			q1->rx = NULL;
+			_put_free_rxq(rx_mod, q1);
+		}
+		rxp->rxq.slr.large = NULL;
+		rxp->rxq.slr.small = NULL;
+		if (rxp->cq.ib) {
+			if (rxp->cq.ib_seg_offset != 0xff)
+				bna_ib_release_idx(rxp->cq.ib,
+						rxp->cq.ib_seg_offset);
+			bna_ib_put(ib_mod, rxp->cq.ib);
+			rxp->cq.ib = NULL;
+		}
+		/* Callback to bnad for destroying CCB */
+		if (rx->ccb_destroy_cbfn)
+			rx->ccb_destroy_cbfn(rx->bna->bnad, rxp->cq.ccb);
+		rxp->cq.ccb = NULL;
+		rxp->rx = NULL;
+		_put_free_rxp(rx_mod, rxp);
+	}
+
+	list_for_each(qe, &rx_mod->rx_active_q) {
+		if (qe == &rx->qe) {
+			list_del(&rx->qe);
+			bfa_q_qe_init(&rx->qe);
+			break;
+		}
+	}
+
+	rx->bna = NULL;
+	rx->priv = NULL;
+	_put_free_rx(rx_mod, rx);
+}
+
+void
+bna_rx_enable(struct bna_rx *rx)
+{
+	if (rx->fsm != (bfa_sm_t)bna_rx_sm_stopped)
+		return;
+
+	rx->rx_flags |= BNA_RX_F_ENABLE;
+	if (rx->rx_flags & BNA_RX_F_PORT_ENABLED)
+		bfa_fsm_send_event(rx, RX_E_START);
+}
+
+void
+bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type,
+		void (*cbfn)(void *, struct bna_rx *,
+				enum bna_cb_status))
+{
+	if (type == BNA_SOFT_CLEANUP) {
+		/* h/w should not be accessed. Treat we're stopped */
+		(*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS);
+	} else {
+		rx->stop_cbfn = cbfn;
+		rx->stop_cbarg = rx->bna->bnad;
+
+		rx->rx_flags &= ~BNA_RX_F_ENABLE;
+
+		bfa_fsm_send_event(rx, RX_E_STOP);
+	}
+}
+
+/**
+ * TX
+ */
+#define call_tx_stop_cbfn(tx, status)\
+do {\
+	if ((tx)->stop_cbfn)\
+		(tx)->stop_cbfn((tx)->stop_cbarg, (tx), status);\
+	(tx)->stop_cbfn = NULL;\
+	(tx)->stop_cbarg = NULL;\
+} while (0)
+
+#define call_tx_prio_change_cbfn(tx, status)\
+do {\
+	if ((tx)->prio_change_cbfn)\
+		(tx)->prio_change_cbfn((tx)->bna->bnad, (tx), status);\
+	(tx)->prio_change_cbfn = NULL;\
+} while (0)
+
+static void bna_tx_mod_cb_tx_stopped(void *tx_mod, struct bna_tx *tx,
+					enum bna_cb_status status);
+static void bna_tx_cb_txq_stopped(void *arg, int status);
+static void bna_tx_cb_stats_cleared(void *arg, int status);
+static void __bna_tx_stop(struct bna_tx *tx);
+static void __bna_tx_start(struct bna_tx *tx);
+static void __bna_txf_stat_clr(struct bna_tx *tx);
+
+enum bna_tx_event {
+	TX_E_START			= 1,
+	TX_E_STOP			= 2,
+	TX_E_FAIL			= 3,
+	TX_E_TXQ_STOPPED		= 4,
+	TX_E_PRIO_CHANGE		= 5,
+	TX_E_STAT_CLEARED		= 6,
+};
+
+enum bna_tx_state {
+	BNA_TX_STOPPED			= 1,
+	BNA_TX_STARTED			= 2,
+	BNA_TX_TXQ_STOP_WAIT		= 3,
+	BNA_TX_PRIO_STOP_WAIT		= 4,
+	BNA_TX_STAT_CLR_WAIT		= 5,
+};
+
+bfa_fsm_state_decl(bna_tx, stopped, struct bna_tx,
+			enum bna_tx_event);
+bfa_fsm_state_decl(bna_tx, started, struct bna_tx,
+			enum bna_tx_event);
+bfa_fsm_state_decl(bna_tx, txq_stop_wait, struct bna_tx,
+			enum bna_tx_event);
+bfa_fsm_state_decl(bna_tx, prio_stop_wait, struct bna_tx,
+			enum bna_tx_event);
+bfa_fsm_state_decl(bna_tx, stat_clr_wait, struct bna_tx,
+			enum bna_tx_event);
+
+static struct bfa_sm_table tx_sm_table[] = {
+	{BFA_SM(bna_tx_sm_stopped), BNA_TX_STOPPED},
+	{BFA_SM(bna_tx_sm_started), BNA_TX_STARTED},
+	{BFA_SM(bna_tx_sm_txq_stop_wait), BNA_TX_TXQ_STOP_WAIT},
+	{BFA_SM(bna_tx_sm_prio_stop_wait), BNA_TX_PRIO_STOP_WAIT},
+	{BFA_SM(bna_tx_sm_stat_clr_wait), BNA_TX_STAT_CLR_WAIT},
+};
+
+static void
+bna_tx_sm_stopped_entry(struct bna_tx *tx)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		(tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb);
+	}
+
+	call_tx_stop_cbfn(tx, BNA_CB_SUCCESS);
+}
+
+static void
+bna_tx_sm_stopped(struct bna_tx *tx, enum bna_tx_event event)
+{
+	switch (event) {
+	case TX_E_START:
+		bfa_fsm_set_state(tx, bna_tx_sm_started);
+		break;
+
+	case TX_E_STOP:
+		bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+		break;
+
+	case TX_E_FAIL:
+		/* No-op */
+		break;
+
+	case TX_E_PRIO_CHANGE:
+		call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS);
+		break;
+
+	case TX_E_TXQ_STOPPED:
+		/**
+		 * This event is received due to flushing of mbox when
+		 * device fails
+		 */
+		/* No-op */
+		break;
+
+	default:
+		bfa_sm_fault(tx->bna, event);
+	}
+}
+
+static void
+bna_tx_sm_started_entry(struct bna_tx *tx)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	__bna_tx_start(tx);
+
+	/* Start IB */
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		bna_ib_ack(&txq->ib->door_bell, 0);
+	}
+}
+
+static void
+bna_tx_sm_started(struct bna_tx *tx, enum bna_tx_event event)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	switch (event) {
+	case TX_E_STOP:
+		bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait);
+		__bna_tx_stop(tx);
+		break;
+
+	case TX_E_FAIL:
+		list_for_each(qe, &tx->txq_q) {
+			txq = (struct bna_txq *)qe;
+			bna_ib_fail(txq->ib);
+			(tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb);
+		}
+		bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+		break;
+
+	case TX_E_PRIO_CHANGE:
+		bfa_fsm_set_state(tx, bna_tx_sm_prio_stop_wait);
+		break;
+
+	default:
+		bfa_sm_fault(tx->bna, event);
+	}
+}
+
+static void
+bna_tx_sm_txq_stop_wait_entry(struct bna_tx *tx)
+{
+}
+
+static void
+bna_tx_sm_txq_stop_wait(struct bna_tx *tx, enum bna_tx_event event)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	switch (event) {
+	case TX_E_FAIL:
+		bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+		break;
+
+	case TX_E_TXQ_STOPPED:
+		list_for_each(qe, &tx->txq_q) {
+			txq = (struct bna_txq *)qe;
+			bna_ib_stop(txq->ib);
+		}
+		bfa_fsm_set_state(tx, bna_tx_sm_stat_clr_wait);
+		break;
+
+	case TX_E_PRIO_CHANGE:
+		/* No-op */
+		break;
+
+	default:
+		bfa_sm_fault(tx->bna, event);
+	}
+}
+
+static void
+bna_tx_sm_prio_stop_wait_entry(struct bna_tx *tx)
+{
+	__bna_tx_stop(tx);
+}
+
+static void
+bna_tx_sm_prio_stop_wait(struct bna_tx *tx, enum bna_tx_event event)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	switch (event) {
+	case TX_E_STOP:
+		bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait);
+		break;
+
+	case TX_E_FAIL:
+		call_tx_prio_change_cbfn(tx, BNA_CB_FAIL);
+		bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+		break;
+
+	case TX_E_TXQ_STOPPED:
+		list_for_each(qe, &tx->txq_q) {
+			txq = (struct bna_txq *)qe;
+			bna_ib_stop(txq->ib);
+			(tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb);
+		}
+		call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS);
+		bfa_fsm_set_state(tx, bna_tx_sm_started);
+		break;
+
+	case TX_E_PRIO_CHANGE:
+		/* No-op */
+		break;
+
+	default:
+		bfa_sm_fault(tx->bna, event);
+	}
+}
+
+static void
+bna_tx_sm_stat_clr_wait_entry(struct bna_tx *tx)
+{
+	__bna_txf_stat_clr(tx);
+}
+
+static void
+bna_tx_sm_stat_clr_wait(struct bna_tx *tx, enum bna_tx_event event)
+{
+	switch (event) {
+	case TX_E_FAIL:
+	case TX_E_STAT_CLEARED:
+		bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+		break;
+
+	default:
+		bfa_sm_fault(tx->bna, event);
+	}
+}
+
+static void
+__bna_txq_start(struct bna_tx *tx, struct bna_txq *txq)
+{
+	struct bna_rxtx_q_mem *q_mem;
+	struct bna_txq_mem txq_cfg;
+	struct bna_txq_mem *txq_mem;
+	struct bna_dma_addr cur_q_addr;
+	u32 pg_num;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	/* Fill out structure, to be subsequently written to hardware */
+	txq_cfg.pg_tbl_addr_lo = txq->qpt.hw_qpt_ptr.lsb;
+	txq_cfg.pg_tbl_addr_hi = txq->qpt.hw_qpt_ptr.msb;
+	cur_q_addr = *((struct bna_dma_addr *)(txq->qpt.kv_qpt_ptr));
+	txq_cfg.cur_q_entry_lo = cur_q_addr.lsb;
+	txq_cfg.cur_q_entry_hi = cur_q_addr.msb;
+
+	txq_cfg.pg_cnt_n_prd_ptr = (txq->qpt.page_count << 16) | 0x0;
+
+	txq_cfg.entry_n_pg_size = ((u32)(BFI_TXQ_WI_SIZE >> 2) << 16) |
+			(txq->qpt.page_size >> 2);
+	txq_cfg.int_blk_n_cns_ptr = ((((u32)txq->ib_seg_offset) << 24) |
+			((u32)(txq->ib->ib_id & 0xff) << 16) | 0x0);
+
+	txq_cfg.cns_ptr2_n_q_state = BNA_Q_IDLE_STATE;
+	txq_cfg.nxt_qid_n_fid_n_pri = (((tx->txf.txf_id & 0x3f) << 3) |
+			(txq->priority & 0x3));
+	txq_cfg.wvc_n_cquota_n_rquota =
+			((((u32)BFI_TX_MAX_WRR_QUOTA & 0xfff) << 12) |
+			(BFI_TX_MAX_WRR_QUOTA & 0xfff));
+
+	/* Setup the page and write to H/W */
+
+	pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + tx->bna->port_num,
+			HQM_RXTX_Q_RAM_BASE_OFFSET);
+	writel(pg_num, tx->bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva,
+					HQM_RXTX_Q_RAM_BASE_OFFSET);
+	q_mem = (struct bna_rxtx_q_mem *)0;
+	txq_mem = &q_mem[txq->txq_id].txq;
+
+	/*
+	 * The following 4 lines, is a hack b'cos the H/W needs to read
+	 * these DMA addresses as little endian
+	 */
+
+	off = (unsigned long)&txq_mem->pg_tbl_addr_lo;
+	writel(htonl(txq_cfg.pg_tbl_addr_lo), base_addr + off);
+
+	off = (unsigned long)&txq_mem->pg_tbl_addr_hi;
+	writel(htonl(txq_cfg.pg_tbl_addr_hi), base_addr + off);
+
+	off = (unsigned long)&txq_mem->cur_q_entry_lo;
+	writel(htonl(txq_cfg.cur_q_entry_lo), base_addr + off);
+
+	off = (unsigned long)&txq_mem->cur_q_entry_hi;
+	writel(htonl(txq_cfg.cur_q_entry_hi), base_addr + off);
+
+	off = (unsigned long)&txq_mem->pg_cnt_n_prd_ptr;
+	writel(txq_cfg.pg_cnt_n_prd_ptr, base_addr + off);
+
+	off = (unsigned long)&txq_mem->entry_n_pg_size;
+	writel(txq_cfg.entry_n_pg_size, base_addr + off);
+
+	off = (unsigned long)&txq_mem->int_blk_n_cns_ptr;
+	writel(txq_cfg.int_blk_n_cns_ptr, base_addr + off);
+
+	off = (unsigned long)&txq_mem->cns_ptr2_n_q_state;
+	writel(txq_cfg.cns_ptr2_n_q_state, base_addr + off);
+
+	off = (unsigned long)&txq_mem->nxt_qid_n_fid_n_pri;
+	writel(txq_cfg.nxt_qid_n_fid_n_pri, base_addr + off);
+
+	off = (unsigned long)&txq_mem->wvc_n_cquota_n_rquota;
+	writel(txq_cfg.wvc_n_cquota_n_rquota, base_addr + off);
+
+	txq->tcb->producer_index = 0;
+	txq->tcb->consumer_index = 0;
+	*(txq->tcb->hw_consumer_index) = 0;
+
+}
+
+static void
+__bna_txq_stop(struct bna_tx *tx, struct bna_txq *txq)
+{
+	struct bfi_ll_q_stop_req ll_req;
+	u32 bit_mask[2] = {0, 0};
+	if (txq->txq_id < 32)
+		bit_mask[0] = (u32)1 << txq->txq_id;
+	else
+		bit_mask[1] = (u32)1 << (txq->txq_id - 32);
+
+	memset(&ll_req, 0, sizeof(ll_req));
+	ll_req.mh.msg_class = BFI_MC_LL;
+	ll_req.mh.msg_id = BFI_LL_H2I_TXQ_STOP_REQ;
+	ll_req.mh.mtag.h2i.lpu_id = 0;
+	ll_req.q_id_mask[0] = htonl(bit_mask[0]);
+	ll_req.q_id_mask[1] = htonl(bit_mask[1]);
+
+	bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_tx_cb_txq_stopped, tx);
+
+	bna_mbox_send(tx->bna, &tx->mbox_qe);
+}
+
+static void
+__bna_txf_start(struct bna_tx *tx)
+{
+	struct bna_tx_fndb_ram *tx_fndb;
+	struct bna_txf *txf = &tx->txf;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM +
+			(tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET),
+			tx->bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva,
+					TX_FNDB_RAM_BASE_OFFSET);
+
+	tx_fndb = (struct bna_tx_fndb_ram *)0;
+	off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags;
+
+	writel(((u32)txf->vlan << 16) | txf->ctrl_flags,
+			base_addr + off);
+
+	if (tx->txf.txf_id < 32)
+		tx->bna->tx_mod.txf_bmap[0] |= ((u32)1 << tx->txf.txf_id);
+	else
+		tx->bna->tx_mod.txf_bmap[1] |= ((u32)
+						 1 << (tx->txf.txf_id - 32));
+}
+
+static void
+__bna_txf_stop(struct bna_tx *tx)
+{
+	struct bna_tx_fndb_ram *tx_fndb;
+	u32 page_num;
+	u32 ctl_flags;
+	struct bna_txf *txf = &tx->txf;
+	void __iomem *base_addr;
+	unsigned long off;
+
+	/* retrieve the running txf_flags & turn off enable bit */
+	page_num = BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM +
+			(tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET);
+	writel(page_num, tx->bna->regs.page_addr);
+
+	base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva,
+					TX_FNDB_RAM_BASE_OFFSET);
+	tx_fndb = (struct bna_tx_fndb_ram *)0;
+	off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags;
+
+	ctl_flags = readl(base_addr + off);
+	ctl_flags &= ~BFI_TXF_CF_ENABLE;
+
+	writel(ctl_flags, base_addr + off);
+
+	if (tx->txf.txf_id < 32)
+		tx->bna->tx_mod.txf_bmap[0] &= ~((u32)1 << tx->txf.txf_id);
+	else
+		tx->bna->tx_mod.txf_bmap[0] &= ~((u32)
+						 1 << (tx->txf.txf_id - 32));
+}
+
+static void
+__bna_txf_stat_clr(struct bna_tx *tx)
+{
+	struct bfi_ll_stats_req ll_req;
+	u32 txf_bmap[2] = {0, 0};
+	if (tx->txf.txf_id < 32)
+		txf_bmap[0] = ((u32)1 << tx->txf.txf_id);
+	else
+		txf_bmap[1] = ((u32)1 << (tx->txf.txf_id - 32));
+	bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0);
+	ll_req.stats_mask = 0;
+	ll_req.rxf_id_mask[0] = 0;
+	ll_req.rxf_id_mask[1] =	0;
+	ll_req.txf_id_mask[0] =	htonl(txf_bmap[0]);
+	ll_req.txf_id_mask[1] =	htonl(txf_bmap[1]);
+
+	bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req),
+			bna_tx_cb_stats_cleared, tx);
+	bna_mbox_send(tx->bna, &tx->mbox_qe);
+}
+
+static void
+__bna_tx_start(struct bna_tx *tx)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		bna_ib_start(txq->ib);
+		__bna_txq_start(tx, txq);
+	}
+
+	__bna_txf_start(tx);
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		txq->tcb->priority = txq->priority;
+		(tx->tx_resume_cbfn)(tx->bna->bnad, txq->tcb);
+	}
+}
+
+static void
+__bna_tx_stop(struct bna_tx *tx)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		(tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb);
+	}
+
+	__bna_txf_stop(tx);
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		bfa_wc_up(&tx->txq_stop_wc);
+	}
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		__bna_txq_stop(tx, txq);
+	}
+}
+
+static void
+bna_txq_qpt_setup(struct bna_txq *txq, int page_count, int page_size,
+		struct bna_mem_descr *qpt_mem,
+		struct bna_mem_descr *swqpt_mem,
+		struct bna_mem_descr *page_mem)
+{
+	int i;
+
+	txq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
+	txq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb;
+	txq->qpt.kv_qpt_ptr = qpt_mem->kva;
+	txq->qpt.page_count = page_count;
+	txq->qpt.page_size = page_size;
+
+	txq->tcb->sw_qpt = (void **) swqpt_mem->kva;
+
+	for (i = 0; i < page_count; i++) {
+		txq->tcb->sw_qpt[i] = page_mem[i].kva;
+
+		((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].lsb =
+			page_mem[i].dma.lsb;
+		((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].msb =
+			page_mem[i].dma.msb;
+
+	}
+}
+
+static void
+bna_tx_free(struct bna_tx *tx)
+{
+	struct bna_tx_mod *tx_mod = &tx->bna->tx_mod;
+	struct bna_txq *txq;
+	struct bna_ib_mod *ib_mod = &tx->bna->ib_mod;
+	struct list_head *qe;
+
+	while (!list_empty(&tx->txq_q)) {
+		bfa_q_deq(&tx->txq_q, &txq);
+		bfa_q_qe_init(&txq->qe);
+		if (txq->ib) {
+			if (txq->ib_seg_offset != -1)
+				bna_ib_release_idx(txq->ib,
+						txq->ib_seg_offset);
+			bna_ib_put(ib_mod, txq->ib);
+			txq->ib = NULL;
+		}
+		txq->tcb = NULL;
+		txq->tx = NULL;
+		list_add_tail(&txq->qe, &tx_mod->txq_free_q);
+	}
+
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		if (qe == &tx->qe) {
+			list_del(&tx->qe);
+			bfa_q_qe_init(&tx->qe);
+			break;
+		}
+	}
+
+	tx->bna = NULL;
+	tx->priv = NULL;
+	list_add_tail(&tx->qe, &tx_mod->tx_free_q);
+}
+
+static void
+bna_tx_cb_txq_stopped(void *arg, int status)
+{
+	struct bna_tx *tx = (struct bna_tx *)arg;
+
+	bfa_q_qe_init(&tx->mbox_qe.qe);
+	bfa_wc_down(&tx->txq_stop_wc);
+}
+
+static void
+bna_tx_cb_txq_stopped_all(void *arg)
+{
+	struct bna_tx *tx = (struct bna_tx *)arg;
+
+	bfa_fsm_send_event(tx, TX_E_TXQ_STOPPED);
+}
+
+static void
+bna_tx_cb_stats_cleared(void *arg, int status)
+{
+	struct bna_tx *tx = (struct bna_tx *)arg;
+
+	bfa_q_qe_init(&tx->mbox_qe.qe);
+
+	bfa_fsm_send_event(tx, TX_E_STAT_CLEARED);
+}
+
+static void
+bna_tx_start(struct bna_tx *tx)
+{
+	tx->flags |= BNA_TX_F_PORT_STARTED;
+	if (tx->flags & BNA_TX_F_ENABLED)
+		bfa_fsm_send_event(tx, TX_E_START);
+}
+
+static void
+bna_tx_stop(struct bna_tx *tx)
+{
+	tx->stop_cbfn = bna_tx_mod_cb_tx_stopped;
+	tx->stop_cbarg = &tx->bna->tx_mod;
+
+	tx->flags &= ~BNA_TX_F_PORT_STARTED;
+	bfa_fsm_send_event(tx, TX_E_STOP);
+}
+
+static void
+bna_tx_fail(struct bna_tx *tx)
+{
+	tx->flags &= ~BNA_TX_F_PORT_STARTED;
+	bfa_fsm_send_event(tx, TX_E_FAIL);
+}
+
+void
+bna_tx_prio_changed(struct bna_tx *tx, int prio)
+{
+	struct bna_txq *txq;
+	struct list_head		 *qe;
+
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		txq->priority = prio;
+	}
+
+	bfa_fsm_send_event(tx, TX_E_PRIO_CHANGE);
+}
+
+static void
+bna_tx_cee_link_status(struct bna_tx *tx, int cee_link)
+{
+	if (cee_link)
+		tx->flags |= BNA_TX_F_PRIO_LOCK;
+	else
+		tx->flags &= ~BNA_TX_F_PRIO_LOCK;
+}
+
+static void
+bna_tx_mod_cb_tx_stopped(void *arg, struct bna_tx *tx,
+			enum bna_cb_status status)
+{
+	struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg;
+
+	bfa_wc_down(&tx_mod->tx_stop_wc);
+}
+
+static void
+bna_tx_mod_cb_tx_stopped_all(void *arg)
+{
+	struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg;
+
+	if (tx_mod->stop_cbfn)
+		tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS);
+	tx_mod->stop_cbfn = NULL;
+}
+
+void
+bna_tx_res_req(int num_txq, int txq_depth, struct bna_res_info *res_info)
+{
+	u32 q_size;
+	u32 page_count;
+	struct bna_mem_info *mem_info;
+
+	res_info[BNA_TX_RES_MEM_T_TCB].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = sizeof(struct bna_tcb);
+	mem_info->num = num_txq;
+
+	q_size = txq_depth * BFI_TXQ_WI_SIZE;
+	q_size = ALIGN(q_size, PAGE_SIZE);
+	page_count = q_size >> PAGE_SHIFT;
+
+	res_info[BNA_TX_RES_MEM_T_QPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = page_count * sizeof(struct bna_dma_addr);
+	mem_info->num = num_txq;
+
+	res_info[BNA_TX_RES_MEM_T_SWQPT].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_KVA;
+	mem_info->len = page_count * sizeof(void *);
+	mem_info->num = num_txq;
+
+	res_info[BNA_TX_RES_MEM_T_PAGE].res_type = BNA_RES_T_MEM;
+	mem_info = &res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info;
+	mem_info->mem_type = BNA_MEM_T_DMA;
+	mem_info->len = PAGE_SIZE;
+	mem_info->num = num_txq * page_count;
+
+	res_info[BNA_TX_RES_INTR_T_TXCMPL].res_type = BNA_RES_T_INTR;
+	res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.intr_type =
+			BNA_INTR_T_MSIX;
+	res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.num = num_txq;
+}
+
+struct bna_tx *
+bna_tx_create(struct bna *bna, struct bnad *bnad,
+		struct bna_tx_config *tx_cfg,
+		struct bna_tx_event_cbfn *tx_cbfn,
+		struct bna_res_info *res_info, void *priv)
+{
+	struct bna_intr_info *intr_info;
+	struct bna_tx_mod *tx_mod = &bna->tx_mod;
+	struct bna_tx *tx;
+	struct bna_txq *txq;
+	struct list_head *qe;
+	struct bna_ib_mod *ib_mod = &bna->ib_mod;
+	struct bna_doorbell_qset *qset;
+	struct bna_ib_config ib_config;
+	int page_count;
+	int page_size;
+	int page_idx;
+	int i;
+	unsigned long off;
+
+	intr_info = &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info;
+	page_count = (res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.num) /
+			tx_cfg->num_txq;
+	page_size = res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.len;
+
+	/**
+	 * Get resources
+	 */
+
+	if ((intr_info->num != 1) && (intr_info->num != tx_cfg->num_txq))
+		return NULL;
+
+	/* Tx */
+
+	if (list_empty(&tx_mod->tx_free_q))
+		return NULL;
+	bfa_q_deq(&tx_mod->tx_free_q, &tx);
+	bfa_q_qe_init(&tx->qe);
+
+	/* TxQs */
+
+	INIT_LIST_HEAD(&tx->txq_q);
+	for (i = 0; i < tx_cfg->num_txq; i++) {
+		if (list_empty(&tx_mod->txq_free_q))
+			goto err_return;
+
+		bfa_q_deq(&tx_mod->txq_free_q, &txq);
+		bfa_q_qe_init(&txq->qe);
+		list_add_tail(&txq->qe, &tx->txq_q);
+		txq->ib = NULL;
+		txq->ib_seg_offset = -1;
+		txq->tx = tx;
+	}
+
+	/* IBs */
+	i = 0;
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+
+		if (intr_info->num == 1)
+			txq->ib = bna_ib_get(ib_mod, intr_info->intr_type,
+						intr_info->idl[0].vector);
+		else
+			txq->ib = bna_ib_get(ib_mod, intr_info->intr_type,
+						intr_info->idl[i].vector);
+
+		if (txq->ib == NULL)
+			goto err_return;
+
+		txq->ib_seg_offset = bna_ib_reserve_idx(txq->ib);
+		if (txq->ib_seg_offset == -1)
+			goto err_return;
+
+		i++;
+	}
+
+	/*
+	 * Initialize
+	 */
+
+	/* Tx */
+
+	tx->tcb_setup_cbfn = tx_cbfn->tcb_setup_cbfn;
+	tx->tcb_destroy_cbfn = tx_cbfn->tcb_destroy_cbfn;
+	/* Following callbacks are mandatory */
+	tx->tx_stall_cbfn = tx_cbfn->tx_stall_cbfn;
+	tx->tx_resume_cbfn = tx_cbfn->tx_resume_cbfn;
+	tx->tx_cleanup_cbfn = tx_cbfn->tx_cleanup_cbfn;
+
+	list_add_tail(&tx->qe, &tx_mod->tx_active_q);
+	tx->bna = bna;
+	tx->priv = priv;
+	tx->txq_stop_wc.wc_resume = bna_tx_cb_txq_stopped_all;
+	tx->txq_stop_wc.wc_cbarg = tx;
+	tx->txq_stop_wc.wc_count = 0;
+
+	tx->type = tx_cfg->tx_type;
+
+	tx->flags = 0;
+	if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_STARTED) {
+		switch (tx->type) {
+		case BNA_TX_T_REGULAR:
+			if (!(tx->bna->tx_mod.flags &
+				BNA_TX_MOD_F_PORT_LOOPBACK))
+				tx->flags |= BNA_TX_F_PORT_STARTED;
+			break;
+		case BNA_TX_T_LOOPBACK:
+			if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_LOOPBACK)
+				tx->flags |= BNA_TX_F_PORT_STARTED;
+			break;
+		}
+	}
+	if (tx->bna->tx_mod.cee_link)
+		tx->flags |= BNA_TX_F_PRIO_LOCK;
+
+	/* TxQ */
+
+	i = 0;
+	page_idx = 0;
+	list_for_each(qe, &tx->txq_q) {
+		txq = (struct bna_txq *)qe;
+		txq->priority = tx_mod->priority;
+		txq->tcb = (struct bna_tcb *)
+		  res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info.mdl[i].kva;
+		txq->tx_packets = 0;
+		txq->tx_bytes = 0;
+
+		/* IB */
+
+		ib_config.coalescing_timeo = BFI_TX_COALESCING_TIMEO;
+		ib_config.interpkt_timeo = 0; /* Not used */
+		ib_config.interpkt_count = BFI_TX_INTERPKT_COUNT;
+		ib_config.ctrl_flags = (BFI_IB_CF_INTER_PKT_DMA |
+					BFI_IB_CF_INT_ENABLE |
+					BFI_IB_CF_COALESCING_MODE);
+		bna_ib_config(txq->ib, &ib_config);
+
+		/* TCB */
+
+		txq->tcb->producer_index = 0;
+		txq->tcb->consumer_index = 0;
+		txq->tcb->hw_consumer_index = (volatile u32 *)
+			((volatile u8 *)txq->ib->ib_seg_host_addr_kva +
+			 (txq->ib_seg_offset * BFI_IBIDX_SIZE));
+		*(txq->tcb->hw_consumer_index) = 0;
+		txq->tcb->q_depth = tx_cfg->txq_depth;
+		txq->tcb->unmap_q = (void *)
+		res_info[BNA_TX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[i].kva;
+		qset = (struct bna_doorbell_qset *)0;
+		off = (unsigned long)&qset[txq->txq_id].txq[0];
+		txq->tcb->q_dbell = off +
+			BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva);
+		txq->tcb->i_dbell = &txq->ib->door_bell;
+		txq->tcb->intr_type = intr_info->intr_type;
+		txq->tcb->intr_vector = (intr_info->num == 1) ?
+					intr_info->idl[0].vector :
+					intr_info->idl[i].vector;
+		txq->tcb->txq = txq;
+		txq->tcb->bnad = bnad;
+		txq->tcb->id = i;
+
+		/* QPT, SWQPT, Pages */
+		bna_txq_qpt_setup(txq, page_count, page_size,
+			&res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info.mdl[i],
+			&res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info.mdl[i],
+			&res_info[BNA_TX_RES_MEM_T_PAGE].
+				  res_u.mem_info.mdl[page_idx]);
+		txq->tcb->page_idx = page_idx;
+		txq->tcb->page_count = page_count;
+		page_idx += page_count;
+
+		/* Callback to bnad for setting up TCB */
+		if (tx->tcb_setup_cbfn)
+			(tx->tcb_setup_cbfn)(bna->bnad, txq->tcb);
+
+		i++;
+	}
+
+	/* TxF */
+
+	tx->txf.ctrl_flags = BFI_TXF_CF_ENABLE | BFI_TXF_CF_VLAN_WI_BASED;
+	tx->txf.vlan = 0;
+
+	/* Mbox element */
+	bfa_q_qe_init(&tx->mbox_qe.qe);
+
+	bfa_fsm_set_state(tx, bna_tx_sm_stopped);
+
+	return tx;
+
+err_return:
+	bna_tx_free(tx);
+	return NULL;
+}
+
+void
+bna_tx_destroy(struct bna_tx *tx)
+{
+	/* Callback to bnad for destroying TCB */
+	if (tx->tcb_destroy_cbfn) {
+		struct bna_txq *txq;
+		struct list_head *qe;
+
+		list_for_each(qe, &tx->txq_q) {
+			txq = (struct bna_txq *)qe;
+			(tx->tcb_destroy_cbfn)(tx->bna->bnad, txq->tcb);
+		}
+	}
+
+	bna_tx_free(tx);
+}
+
+void
+bna_tx_enable(struct bna_tx *tx)
+{
+	if (tx->fsm != (bfa_sm_t)bna_tx_sm_stopped)
+		return;
+
+	tx->flags |= BNA_TX_F_ENABLED;
+
+	if (tx->flags & BNA_TX_F_PORT_STARTED)
+		bfa_fsm_send_event(tx, TX_E_START);
+}
+
+void
+bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type,
+		void (*cbfn)(void *, struct bna_tx *, enum bna_cb_status))
+{
+	if (type == BNA_SOFT_CLEANUP) {
+		(*cbfn)(tx->bna->bnad, tx, BNA_CB_SUCCESS);
+		return;
+	}
+
+	tx->stop_cbfn = cbfn;
+	tx->stop_cbarg = tx->bna->bnad;
+
+	tx->flags &= ~BNA_TX_F_ENABLED;
+
+	bfa_fsm_send_event(tx, TX_E_STOP);
+}
+
+int
+bna_tx_state_get(struct bna_tx *tx)
+{
+	return bfa_sm_to_state(tx_sm_table, tx->fsm);
+}
+
+void
+bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna,
+		struct bna_res_info *res_info)
+{
+	int i;
+
+	tx_mod->bna = bna;
+	tx_mod->flags = 0;
+
+	tx_mod->tx = (struct bna_tx *)
+		res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mdl[0].kva;
+	tx_mod->txq = (struct bna_txq *)
+		res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mdl[0].kva;
+
+	INIT_LIST_HEAD(&tx_mod->tx_free_q);
+	INIT_LIST_HEAD(&tx_mod->tx_active_q);
+
+	INIT_LIST_HEAD(&tx_mod->txq_free_q);
+
+	for (i = 0; i < BFI_MAX_TXQ; i++) {
+		tx_mod->tx[i].txf.txf_id = i;
+		bfa_q_qe_init(&tx_mod->tx[i].qe);
+		list_add_tail(&tx_mod->tx[i].qe, &tx_mod->tx_free_q);
+
+		tx_mod->txq[i].txq_id = i;
+		bfa_q_qe_init(&tx_mod->txq[i].qe);
+		list_add_tail(&tx_mod->txq[i].qe, &tx_mod->txq_free_q);
+	}
+
+	tx_mod->tx_stop_wc.wc_resume = bna_tx_mod_cb_tx_stopped_all;
+	tx_mod->tx_stop_wc.wc_cbarg = tx_mod;
+	tx_mod->tx_stop_wc.wc_count = 0;
+}
+
+void
+bna_tx_mod_uninit(struct bna_tx_mod *tx_mod)
+{
+	struct list_head		*qe;
+	int i;
+
+	i = 0;
+	list_for_each(qe, &tx_mod->tx_free_q)
+		i++;
+
+	i = 0;
+	list_for_each(qe, &tx_mod->txq_free_q)
+		i++;
+
+	tx_mod->bna = NULL;
+}
+
+void
+bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type)
+{
+	struct bna_tx *tx;
+	struct list_head		*qe;
+
+	tx_mod->flags |= BNA_TX_MOD_F_PORT_STARTED;
+	if (type == BNA_TX_T_LOOPBACK)
+		tx_mod->flags |= BNA_TX_MOD_F_PORT_LOOPBACK;
+
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		if (tx->type == type)
+			bna_tx_start(tx);
+	}
+}
+
+void
+bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type)
+{
+	struct bna_tx *tx;
+	struct list_head		*qe;
+
+	tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED;
+	tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK;
+
+	tx_mod->stop_cbfn = bna_port_cb_tx_stopped;
+
+	/**
+	 * Before calling bna_tx_stop(), increment tx_stop_wc as many times
+	 * as we are going to call bna_tx_stop
+	 */
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		if (tx->type == type)
+			bfa_wc_up(&tx_mod->tx_stop_wc);
+	}
+
+	if (tx_mod->tx_stop_wc.wc_count == 0) {
+		tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS);
+		tx_mod->stop_cbfn = NULL;
+		return;
+	}
+
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		if (tx->type == type)
+			bna_tx_stop(tx);
+	}
+}
+
+void
+bna_tx_mod_fail(struct bna_tx_mod *tx_mod)
+{
+	struct bna_tx *tx;
+	struct list_head		*qe;
+
+	tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED;
+	tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK;
+
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		bna_tx_fail(tx);
+	}
+}
+
+void
+bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio)
+{
+	struct bna_tx *tx;
+	struct list_head		*qe;
+
+	if (prio != tx_mod->priority) {
+		tx_mod->priority = prio;
+
+		list_for_each(qe, &tx_mod->tx_active_q) {
+			tx = (struct bna_tx *)qe;
+			bna_tx_prio_changed(tx, prio);
+		}
+	}
+}
+
+void
+bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link)
+{
+	struct bna_tx *tx;
+	struct list_head		*qe;
+
+	tx_mod->cee_link = cee_link;
+
+	list_for_each(qe, &tx_mod->tx_active_q) {
+		tx = (struct bna_tx *)qe;
+		bna_tx_cee_link_status(tx, cee_link);
+	}
+}
diff --git a/drivers/net/bna/bna_types.h b/drivers/net/bna/bna_types.h
new file mode 100644
index 000000000000..6877310f6ef4
--- /dev/null
+++ b/drivers/net/bna/bna_types.h
@@ -0,0 +1,1128 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BNA_TYPES_H__
+#define __BNA_TYPES_H__
+
+#include "cna.h"
+#include "bna_hw.h"
+#include "bfa_cee.h"
+
+/**
+ *
+ * Forward declarations
+ *
+ */
+
+struct bna_txq;
+struct bna_tx;
+struct bna_rxq;
+struct bna_cq;
+struct bna_rx;
+struct bna_rxf;
+struct bna_port;
+struct bna;
+struct bnad;
+
+/**
+ *
+ * Enums, primitive data types
+ *
+ */
+
+enum bna_status {
+	BNA_STATUS_T_DISABLED	= 0,
+	BNA_STATUS_T_ENABLED	= 1
+};
+
+enum bna_cleanup_type {
+	BNA_HARD_CLEANUP 	= 0,
+	BNA_SOFT_CLEANUP 	= 1
+};
+
+enum bna_cb_status {
+	BNA_CB_SUCCESS 		= 0,
+	BNA_CB_FAIL		= 1,
+	BNA_CB_INTERRUPT	= 2,
+	BNA_CB_BUSY		= 3,
+	BNA_CB_INVALID_MAC	= 4,
+	BNA_CB_MCAST_LIST_FULL	= 5,
+	BNA_CB_UCAST_CAM_FULL	= 6,
+	BNA_CB_WAITING		= 7,
+	BNA_CB_NOT_EXEC		= 8
+};
+
+enum bna_res_type {
+	BNA_RES_T_MEM		= 1,
+	BNA_RES_T_INTR		= 2
+};
+
+enum bna_mem_type {
+	BNA_MEM_T_KVA 		= 1,
+	BNA_MEM_T_DMA 		= 2
+};
+
+enum bna_intr_type {
+	BNA_INTR_T_INTX		= 1,
+	BNA_INTR_T_MSIX		= 2
+};
+
+enum bna_res_req_type {
+	BNA_RES_MEM_T_COM 		= 0,
+	BNA_RES_MEM_T_ATTR 		= 1,
+	BNA_RES_MEM_T_FWTRC 		= 2,
+	BNA_RES_MEM_T_STATS 		= 3,
+	BNA_RES_MEM_T_SWSTATS		= 4,
+	BNA_RES_MEM_T_IBIDX		= 5,
+	BNA_RES_MEM_T_IB_ARRAY		= 6,
+	BNA_RES_MEM_T_INTR_ARRAY	= 7,
+	BNA_RES_MEM_T_IDXSEG_ARRAY	= 8,
+	BNA_RES_MEM_T_TX_ARRAY		= 9,
+	BNA_RES_MEM_T_TXQ_ARRAY		= 10,
+	BNA_RES_MEM_T_RX_ARRAY		= 11,
+	BNA_RES_MEM_T_RXP_ARRAY		= 12,
+	BNA_RES_MEM_T_RXQ_ARRAY		= 13,
+	BNA_RES_MEM_T_UCMAC_ARRAY	= 14,
+	BNA_RES_MEM_T_MCMAC_ARRAY	= 15,
+	BNA_RES_MEM_T_RIT_ENTRY		= 16,
+	BNA_RES_MEM_T_RIT_SEGMENT	= 17,
+	BNA_RES_INTR_T_MBOX		= 18,
+	BNA_RES_T_MAX
+};
+
+enum bna_tx_res_req_type {
+	BNA_TX_RES_MEM_T_TCB	= 0,
+	BNA_TX_RES_MEM_T_UNMAPQ	= 1,
+	BNA_TX_RES_MEM_T_QPT 	= 2,
+	BNA_TX_RES_MEM_T_SWQPT	= 3,
+	BNA_TX_RES_MEM_T_PAGE 	= 4,
+	BNA_TX_RES_INTR_T_TXCMPL = 5,
+	BNA_TX_RES_T_MAX,
+};
+
+enum bna_rx_mem_type {
+	BNA_RX_RES_MEM_T_CCB		= 0,	/* CQ context */
+	BNA_RX_RES_MEM_T_RCB		= 1,	/* CQ context */
+	BNA_RX_RES_MEM_T_UNMAPQ		= 2,	/* UnmapQ for RxQs */
+	BNA_RX_RES_MEM_T_CQPT		= 3,	/* CQ QPT */
+	BNA_RX_RES_MEM_T_CSWQPT		= 4,	/* S/W QPT */
+	BNA_RX_RES_MEM_T_CQPT_PAGE	= 5,	/* CQPT page */
+	BNA_RX_RES_MEM_T_HQPT		= 6,	/* RX QPT */
+	BNA_RX_RES_MEM_T_DQPT		= 7,	/* RX QPT */
+	BNA_RX_RES_MEM_T_HSWQPT		= 8,	/* RX s/w QPT */
+	BNA_RX_RES_MEM_T_DSWQPT		= 9,	/* RX s/w QPT */
+	BNA_RX_RES_MEM_T_DPAGE		= 10,	/* RX s/w QPT */
+	BNA_RX_RES_MEM_T_HPAGE		= 11,	/* RX s/w QPT */
+	BNA_RX_RES_T_INTR		= 12,	/* Rx interrupts */
+	BNA_RX_RES_T_MAX		= 13
+};
+
+enum bna_mbox_state {
+	BNA_MBOX_FREE		= 0,
+	BNA_MBOX_POSTED		= 1
+};
+
+enum bna_tx_type {
+	BNA_TX_T_REGULAR	= 0,
+	BNA_TX_T_LOOPBACK	= 1,
+};
+
+enum bna_tx_flags {
+	BNA_TX_F_PORT_STARTED	= 1,
+	BNA_TX_F_ENABLED	= 2,
+	BNA_TX_F_PRIO_LOCK	= 4,
+};
+
+enum bna_tx_mod_flags {
+	BNA_TX_MOD_F_PORT_STARTED	= 1,
+	BNA_TX_MOD_F_PORT_LOOPBACK	= 2,
+};
+
+enum bna_rx_type {
+	BNA_RX_T_REGULAR	= 0,
+	BNA_RX_T_LOOPBACK	= 1,
+};
+
+enum bna_rxp_type {
+	BNA_RXP_SINGLE 		= 1,
+	BNA_RXP_SLR 		= 2,
+	BNA_RXP_HDS 		= 3
+};
+
+enum bna_rxmode {
+	BNA_RXMODE_PROMISC 	= 1,
+	BNA_RXMODE_DEFAULT 	= 2,
+	BNA_RXMODE_ALLMULTI 	= 4
+};
+
+enum bna_rx_event {
+	RX_E_START			= 1,
+	RX_E_STOP			= 2,
+	RX_E_FAIL			= 3,
+	RX_E_RXF_STARTED		= 4,
+	RX_E_RXF_STOPPED		= 5,
+	RX_E_RXQ_STOPPED		= 6,
+};
+
+enum bna_rx_state {
+	BNA_RX_STOPPED			= 1,
+	BNA_RX_RXF_START_WAIT		= 2,
+	BNA_RX_STARTED			= 3,
+	BNA_RX_RXF_STOP_WAIT		= 4,
+	BNA_RX_RXQ_STOP_WAIT		= 5,
+};
+
+enum bna_rx_flags {
+	BNA_RX_F_ENABLE		= 0x01,		/* bnad enabled rxf */
+	BNA_RX_F_PORT_ENABLED	= 0x02,		/* Port object is enabled */
+	BNA_RX_F_PORT_FAILED	= 0x04,		/* Port in failed state */
+};
+
+enum bna_rx_mod_flags {
+	BNA_RX_MOD_F_PORT_STARTED	= 1,
+	BNA_RX_MOD_F_PORT_LOOPBACK	= 2,
+};
+
+enum bna_rxf_oper_state {
+	BNA_RXF_OPER_STATE_RUNNING	= 0x01, /* rxf operational */
+	BNA_RXF_OPER_STATE_PAUSED	= 0x02,	/* rxf in PAUSED state */
+};
+
+enum bna_rxf_flags {
+	BNA_RXF_FL_STOP_PENDING 	= 0x01,
+	BNA_RXF_FL_FAILED		= 0x02,
+	BNA_RXF_FL_RSS_CONFIG_PENDING	= 0x04,
+	BNA_RXF_FL_OPERSTATE_CHANGED	= 0x08,
+	BNA_RXF_FL_RXF_ENABLED		= 0x10,
+	BNA_RXF_FL_VLAN_CONFIG_PENDING	= 0x20,
+};
+
+enum bna_rxf_event {
+	RXF_E_START			= 1,
+	RXF_E_STOP			= 2,
+	RXF_E_FAIL			= 3,
+	RXF_E_CAM_FLTR_MOD		= 4,
+	RXF_E_STARTED			= 5,
+	RXF_E_STOPPED			= 6,
+	RXF_E_CAM_FLTR_RESP		= 7,
+	RXF_E_PAUSE			= 8,
+	RXF_E_RESUME			= 9,
+	RXF_E_STAT_CLEARED		= 10,
+};
+
+enum bna_rxf_state {
+	BNA_RXF_STOPPED			= 1,
+	BNA_RXF_START_WAIT		= 2,
+	BNA_RXF_CAM_FLTR_MOD_WAIT	= 3,
+	BNA_RXF_STARTED			= 4,
+	BNA_RXF_CAM_FLTR_CLR_WAIT	= 5,
+	BNA_RXF_STOP_WAIT		= 6,
+	BNA_RXF_PAUSE_WAIT		= 7,
+	BNA_RXF_RESUME_WAIT		= 8,
+	BNA_RXF_STAT_CLR_WAIT		= 9,
+};
+
+enum bna_port_type {
+	BNA_PORT_T_REGULAR		= 0,
+	BNA_PORT_T_LOOPBACK_INTERNAL	= 1,
+	BNA_PORT_T_LOOPBACK_EXTERNAL	= 2,
+};
+
+enum bna_link_status {
+	BNA_LINK_DOWN		= 0,
+	BNA_LINK_UP		= 1,
+	BNA_CEE_UP 		= 2
+};
+
+enum bna_llport_flags {
+	BNA_LLPORT_F_ENABLED 	= 1,
+	BNA_LLPORT_F_RX_ENABLED	= 2
+};
+
+enum bna_port_flags {
+	BNA_PORT_F_DEVICE_READY	= 1,
+	BNA_PORT_F_ENABLED	= 2,
+	BNA_PORT_F_PAUSE_CHANGED = 4,
+	BNA_PORT_F_MTU_CHANGED	= 8
+};
+
+enum bna_pkt_rates {
+	BNA_PKT_RATE_10K		= 10000,
+	BNA_PKT_RATE_20K		= 20000,
+	BNA_PKT_RATE_30K		= 30000,
+	BNA_PKT_RATE_40K		= 40000,
+	BNA_PKT_RATE_50K		= 50000,
+	BNA_PKT_RATE_60K		= 60000,
+	BNA_PKT_RATE_70K		= 70000,
+	BNA_PKT_RATE_80K		= 80000,
+};
+
+enum bna_dim_load_types {
+	BNA_LOAD_T_HIGH_4		= 0, /* 80K <= r */
+	BNA_LOAD_T_HIGH_3		= 1, /* 60K <= r < 80K */
+	BNA_LOAD_T_HIGH_2		= 2, /* 50K <= r < 60K */
+	BNA_LOAD_T_HIGH_1		= 3, /* 40K <= r < 50K */
+	BNA_LOAD_T_LOW_1		= 4, /* 30K <= r < 40K */
+	BNA_LOAD_T_LOW_2		= 5, /* 20K <= r < 30K */
+	BNA_LOAD_T_LOW_3		= 6, /* 10K <= r < 20K */
+	BNA_LOAD_T_LOW_4		= 7, /* r < 10K */
+	BNA_LOAD_T_MAX			= 8
+};
+
+enum bna_dim_bias_types {
+	BNA_BIAS_T_SMALL		= 0, /* small pkts > (large pkts * 2) */
+	BNA_BIAS_T_LARGE		= 1, /* Not BNA_BIAS_T_SMALL */
+	BNA_BIAS_T_MAX			= 2
+};
+
+struct bna_mac {
+	/* This should be the first one */
+	struct list_head			qe;
+	u8			addr[ETH_ALEN];
+};
+
+struct bna_mem_descr {
+	u32		len;
+	void		*kva;
+	struct bna_dma_addr dma;
+};
+
+struct bna_mem_info {
+	enum bna_mem_type mem_type;
+	u32		len;
+	u32 		num;
+	u32		align_sz; /* 0/1 = no alignment */
+	struct bna_mem_descr *mdl;
+	void			*cookie; /* For bnad to unmap dma later */
+};
+
+struct bna_intr_descr {
+	int			vector;
+};
+
+struct bna_intr_info {
+	enum bna_intr_type intr_type;
+	int			num;
+	struct bna_intr_descr *idl;
+};
+
+union bna_res_u {
+	struct bna_mem_info mem_info;
+	struct bna_intr_info intr_info;
+};
+
+struct bna_res_info {
+	enum bna_res_type res_type;
+	union bna_res_u		res_u;
+};
+
+/* HW QPT */
+struct bna_qpt {
+	struct bna_dma_addr hw_qpt_ptr;
+	void		*kv_qpt_ptr;
+	u32		page_count;
+	u32		page_size;
+};
+
+/**
+ *
+ * Device
+ *
+ */
+
+struct bna_device {
+	bfa_fsm_t		fsm;
+	struct bfa_ioc ioc;
+
+	enum bna_intr_type intr_type;
+	int			vector;
+
+	void (*ready_cbfn)(struct bnad *bnad, enum bna_cb_status status);
+	struct bnad *ready_cbarg;
+
+	void (*stop_cbfn)(struct bnad *bnad, enum bna_cb_status status);
+	struct bnad *stop_cbarg;
+
+	struct bna *bna;
+};
+
+/**
+ *
+ * Mail box
+ *
+ */
+
+struct bna_mbox_qe {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	struct bfa_mbox_cmd cmd;
+	u32 		cmd_len;
+	/* Callback for port, tx, rx, rxf */
+	void (*cbfn)(void *arg, int status);
+	void 			*cbarg;
+};
+
+struct bna_mbox_mod {
+	enum bna_mbox_state state;
+	struct list_head			posted_q;
+	u32		msg_pending;
+	u32		msg_ctr;
+	struct bna *bna;
+};
+
+/**
+ *
+ * Port
+ *
+ */
+
+/* Pause configuration */
+struct bna_pause_config {
+	enum bna_status tx_pause;
+	enum bna_status rx_pause;
+};
+
+struct bna_llport {
+	bfa_fsm_t		fsm;
+	enum bna_llport_flags flags;
+
+	enum bna_port_type type;
+
+	enum bna_link_status link_status;
+
+	int			admin_up_count;
+
+	void (*stop_cbfn)(struct bna_port *, enum bna_cb_status);
+
+	struct bna_mbox_qe mbox_qe;
+
+	struct bna *bna;
+};
+
+struct bna_port {
+	bfa_fsm_t		fsm;
+	enum bna_port_flags flags;
+
+	enum bna_port_type type;
+
+	struct bna_llport llport;
+
+	struct bna_pause_config pause_config;
+	u8			priority;
+	int			mtu;
+
+	/* Callback for bna_port_disable(), port_stop() */
+	void (*stop_cbfn)(void *, enum bna_cb_status);
+	void			*stop_cbarg;
+
+	/* Callback for bna_port_pause_config() */
+	void (*pause_cbfn)(struct bnad *, enum bna_cb_status);
+
+	/* Callback for bna_port_mtu_set() */
+	void (*mtu_cbfn)(struct bnad *, enum bna_cb_status);
+
+	void (*link_cbfn)(struct bnad *, enum bna_link_status);
+
+	struct bfa_wc		chld_stop_wc;
+
+	struct bna_mbox_qe mbox_qe;
+
+	struct bna *bna;
+};
+
+/**
+ *
+ * Interrupt Block
+ *
+ */
+
+/* IB index segment structure */
+struct bna_ibidx_seg {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	u8			ib_seg_size;
+	u8			ib_idx_tbl_offset;
+};
+
+/* Interrupt structure */
+struct bna_intr {
+	/* This should be the first one */
+	struct list_head			qe;
+	int			ref_count;
+
+	enum bna_intr_type intr_type;
+	int			vector;
+
+	struct bna_ib *ib;
+};
+
+/* Doorbell structure */
+struct bna_ib_dbell {
+	void *__iomem doorbell_addr;
+	u32		doorbell_ack;
+};
+
+/* Interrupt timer configuration */
+struct bna_ib_config {
+	u8 		coalescing_timeo;    /* Unit is 5usec. */
+
+	int			interpkt_count;
+	int			interpkt_timeo;
+
+	enum ib_flags ctrl_flags;
+};
+
+/* IB structure */
+struct bna_ib {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	int			ib_id;
+
+	int			ref_count;
+	int			start_count;
+
+	struct bna_dma_addr ib_seg_host_addr;
+	void		*ib_seg_host_addr_kva;
+	u32		idx_mask; /* Size >= BNA_IBIDX_MAX_SEGSIZE */
+
+	struct bna_ibidx_seg *idx_seg;
+
+	struct bna_ib_dbell door_bell;
+
+	struct bna_intr *intr;
+
+	struct bna_ib_config ib_config;
+
+	struct bna *bna;
+};
+
+/* IB module - keeps track of IBs and interrupts */
+struct bna_ib_mod {
+	struct bna_ib *ib;		/* BFI_MAX_IB entries */
+	struct bna_intr *intr;		/* BFI_MAX_IB entries */
+	struct bna_ibidx_seg *idx_seg;	/* BNA_IBIDX_TOTAL_SEGS */
+
+	struct list_head			ib_free_q;
+
+	struct list_head		ibidx_seg_pool[BFI_IBIDX_TOTAL_POOLS];
+
+	struct list_head			intr_free_q;
+	struct list_head			intr_active_q;
+
+	struct bna *bna;
+};
+
+/**
+ *
+ * Tx object
+ *
+ */
+
+/* Tx datapath control structure */
+#define BNA_Q_NAME_SIZE		16
+struct bna_tcb {
+	/* Fast path */
+	void			**sw_qpt;
+	void			*unmap_q;
+	u32		producer_index;
+	u32		consumer_index;
+	volatile u32	*hw_consumer_index;
+	u32		q_depth;
+	void *__iomem q_dbell;
+	struct bna_ib_dbell *i_dbell;
+	int			page_idx;
+	int			page_count;
+	/* Control path */
+	struct bna_txq *txq;
+	struct bnad *bnad;
+	enum bna_intr_type intr_type;
+	int			intr_vector;
+	u8			priority; /* Current priority */
+	unsigned long		flags; /* Used by bnad as required */
+	int			id;
+	char			name[BNA_Q_NAME_SIZE];
+};
+
+/* TxQ QPT and configuration */
+struct bna_txq {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	int			txq_id;
+
+	u8			priority;
+
+	struct bna_qpt qpt;
+	struct bna_tcb *tcb;
+	struct bna_ib *ib;
+	int			ib_seg_offset;
+
+	struct bna_tx *tx;
+
+	u64 		tx_packets;
+	u64 		tx_bytes;
+};
+
+/* TxF structure (hardware Tx Function) */
+struct bna_txf {
+	int			txf_id;
+	enum txf_flags ctrl_flags;
+	u16		vlan;
+};
+
+/* Tx object */
+struct bna_tx {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	bfa_fsm_t		fsm;
+	enum bna_tx_flags flags;
+
+	enum bna_tx_type type;
+
+	struct list_head			txq_q;
+	struct bna_txf txf;
+
+	/* Tx event handlers */
+	void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *);
+
+	/* callback for bna_tx_disable(), bna_tx_stop() */
+	void (*stop_cbfn)(void *arg, struct bna_tx *tx,
+				enum bna_cb_status status);
+	void			*stop_cbarg;
+
+	/* callback for bna_tx_prio_set() */
+	void (*prio_change_cbfn)(struct bnad *bnad, struct bna_tx *tx,
+				enum bna_cb_status status);
+
+	struct bfa_wc		txq_stop_wc;
+
+	struct bna_mbox_qe mbox_qe;
+
+	struct bna *bna;
+	void			*priv;	/* bnad's cookie */
+};
+
+struct bna_tx_config {
+	int			num_txq;
+	int			txq_depth;
+	enum bna_tx_type tx_type;
+};
+
+struct bna_tx_event_cbfn {
+	/* Optional */
+	void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *);
+	/* Mandatory */
+	void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *);
+	void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *);
+};
+
+/* Tx module - keeps track of free, active tx objects */
+struct bna_tx_mod {
+	struct bna_tx *tx;		/* BFI_MAX_TXQ entries */
+	struct bna_txq *txq;		/* BFI_MAX_TXQ entries */
+
+	struct list_head			tx_free_q;
+	struct list_head			tx_active_q;
+
+	struct list_head			txq_free_q;
+
+	/* callback for bna_tx_mod_stop() */
+	void (*stop_cbfn)(struct bna_port *port,
+				enum bna_cb_status status);
+
+	struct bfa_wc		tx_stop_wc;
+
+	enum bna_tx_mod_flags flags;
+
+	int			priority;
+	int			cee_link;
+
+	u32		txf_bmap[2];
+
+	struct bna *bna;
+};
+
+/**
+ *
+ * Receive Indirection Table
+ *
+ */
+
+/* One row of RIT table */
+struct bna_rit_entry {
+	u8 large_rxq_id;	/* used for either large or data buffers */
+	u8 small_rxq_id;	/* used for either small or header buffers */
+};
+
+/* RIT segment */
+struct bna_rit_segment {
+	struct list_head			qe;
+
+	u32		rit_offset;
+	u32		rit_size;
+	/**
+	 * max_rit_size: Varies per RIT segment depending on how RIT is
+	 * partitioned
+	 */
+	u32		max_rit_size;
+
+	struct bna_rit_entry *rit;
+};
+
+struct bna_rit_mod {
+	struct bna_rit_entry *rit;
+	struct bna_rit_segment *rit_segment;
+
+	struct list_head		rit_seg_pool[BFI_RIT_SEG_TOTAL_POOLS];
+};
+
+/**
+ *
+ * Rx object
+ *
+ */
+
+/* Rx datapath control structure */
+struct bna_rcb {
+	/* Fast path */
+	void			**sw_qpt;
+	void			*unmap_q;
+	u32		producer_index;
+	u32		consumer_index;
+	u32		q_depth;
+	void *__iomem q_dbell;
+	int			page_idx;
+	int			page_count;
+	/* Control path */
+	struct bna_rxq *rxq;
+	struct bna_cq *cq;
+	struct bnad *bnad;
+	unsigned long		flags;
+	int			id;
+};
+
+/* RxQ structure - QPT, configuration */
+struct bna_rxq {
+	struct list_head			qe;
+	int			rxq_id;
+
+	int			buffer_size;
+	int			q_depth;
+
+	struct bna_qpt qpt;
+	struct bna_rcb *rcb;
+
+	struct bna_rxp *rxp;
+	struct bna_rx *rx;
+
+	u64 		rx_packets;
+	u64		rx_bytes;
+	u64 		rx_packets_with_error;
+	u64 		rxbuf_alloc_failed;
+};
+
+/* RxQ pair */
+union bna_rxq_u {
+	struct {
+		struct bna_rxq *hdr;
+		struct bna_rxq *data;
+	} hds;
+	struct {
+		struct bna_rxq *small;
+		struct bna_rxq *large;
+	} slr;
+	struct {
+		struct bna_rxq *only;
+		struct bna_rxq *reserved;
+	} single;
+};
+
+/* Packet rate for Dynamic Interrupt Moderation */
+struct bna_pkt_rate {
+	u32		small_pkt_cnt;
+	u32		large_pkt_cnt;
+};
+
+/* Completion control structure */
+struct bna_ccb {
+	/* Fast path */
+	void			**sw_qpt;
+	u32		producer_index;
+	volatile u32	*hw_producer_index;
+	u32		q_depth;
+	struct bna_ib_dbell *i_dbell;
+	struct bna_rcb *rcb[2];
+	void			*ctrl; /* For bnad */
+	struct bna_pkt_rate pkt_rate;
+	int			page_idx;
+	int			page_count;
+
+	/* Control path */
+	struct bna_cq *cq;
+	struct bnad *bnad;
+	enum bna_intr_type intr_type;
+	int			intr_vector;
+	u8			rx_coalescing_timeo; /* For NAPI */
+	int			id;
+	char			name[BNA_Q_NAME_SIZE];
+};
+
+/* CQ QPT, configuration  */
+struct bna_cq {
+	int			cq_id;
+
+	struct bna_qpt qpt;
+	struct bna_ccb *ccb;
+
+	struct bna_ib *ib;
+	u8			ib_seg_offset;
+
+	struct bna_rx *rx;
+};
+
+struct bna_rss_config {
+	enum rss_hash_type hash_type;
+	u8			hash_mask;
+	u32		toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN];
+};
+
+struct bna_hds_config {
+	enum hds_header_type hdr_type;
+	int			header_size;
+};
+
+/* This structure is used during RX creation */
+struct bna_rx_config {
+	enum bna_rx_type rx_type;
+	int			num_paths;
+	enum bna_rxp_type rxp_type;
+	int			paused;
+	int			q_depth;
+	/*
+	 * Small/Large (or Header/Data) buffer size to be configured
+	 * for SLR and HDS queue type. Large buffer size comes from
+	 * port->mtu.
+	 */
+	int			small_buff_size;
+
+	enum bna_status rss_status;
+	struct bna_rss_config rss_config;
+
+	enum bna_status hds_status;
+	struct bna_hds_config hds_config;
+
+	enum bna_status vlan_strip_status;
+};
+
+/* Rx Path structure - one per MSIX vector/CPU */
+struct bna_rxp {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	enum bna_rxp_type type;
+	union	bna_rxq_u	rxq;
+	struct bna_cq cq;
+
+	struct bna_rx *rx;
+
+	/* MSI-x vector number for configuring RSS */
+	int			vector;
+
+	struct bna_mbox_qe mbox_qe;
+};
+
+/* HDS configuration structure */
+struct bna_rxf_hds {
+	enum hds_header_type hdr_type;
+	int			header_size;
+};
+
+/* RSS configuration structure */
+struct bna_rxf_rss {
+	enum rss_hash_type hash_type;
+	u8			hash_mask;
+	u32		toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN];
+};
+
+/* RxF structure (hardware Rx Function) */
+struct bna_rxf {
+	bfa_fsm_t		fsm;
+	int			rxf_id;
+	enum rxf_flags ctrl_flags;
+	u16		default_vlan_tag;
+	enum bna_rxf_oper_state rxf_oper_state;
+	enum bna_status hds_status;
+	struct bna_rxf_hds hds_cfg;
+	enum bna_status rss_status;
+	struct bna_rxf_rss rss_cfg;
+	struct bna_rit_segment *rit_segment;
+	struct bna_rx *rx;
+	u32		forced_offset;
+	struct bna_mbox_qe mbox_qe;
+	int			mcast_rxq_id;
+
+	/* callback for bna_rxf_start() */
+	void (*start_cbfn) (struct bna_rx *rx, enum bna_cb_status status);
+	struct bna_rx *start_cbarg;
+
+	/* callback for bna_rxf_stop() */
+	void (*stop_cbfn) (struct bna_rx *rx, enum bna_cb_status status);
+	struct bna_rx *stop_cbarg;
+
+	/* callback for bna_rxf_receive_enable() / bna_rxf_receive_disable() */
+	void (*oper_state_cbfn) (struct bnad *bnad, struct bna_rx *rx,
+			enum bna_cb_status status);
+	struct bnad *oper_state_cbarg;
+
+	/**
+	 * callback for:
+	 *	bna_rxf_ucast_set()
+	 *	bna_rxf_{ucast/mcast}_add(),
+	 * 	bna_rxf_{ucast/mcast}_del(),
+	 *	bna_rxf_mode_set()
+	 */
+	void (*cam_fltr_cbfn)(struct bnad *bnad, struct bna_rx *rx,
+				enum bna_cb_status status);
+	struct bnad *cam_fltr_cbarg;
+
+	enum bna_rxf_flags rxf_flags;
+
+	/* List of unicast addresses yet to be applied to h/w */
+	struct list_head			ucast_pending_add_q;
+	struct list_head			ucast_pending_del_q;
+	int			ucast_pending_set;
+	/* ucast addresses applied to the h/w */
+	struct list_head			ucast_active_q;
+	struct bna_mac *ucast_active_mac;
+
+	/* List of multicast addresses yet to be applied to h/w */
+	struct list_head			mcast_pending_add_q;
+	struct list_head			mcast_pending_del_q;
+	/* multicast addresses applied to the h/w */
+	struct list_head			mcast_active_q;
+
+	/* Rx modes yet to be applied to h/w */
+	enum bna_rxmode rxmode_pending;
+	enum bna_rxmode rxmode_pending_bitmask;
+	/* Rx modes applied to h/w */
+	enum bna_rxmode rxmode_active;
+
+	enum bna_status vlan_filter_status;
+	u32		vlan_filter_table[(BFI_MAX_VLAN + 1) / 32];
+};
+
+/* Rx object */
+struct bna_rx {
+	/* This should be the first one */
+	struct list_head			qe;
+
+	bfa_fsm_t		fsm;
+
+	enum bna_rx_type type;
+
+	/* list-head for RX path objects */
+	struct list_head			rxp_q;
+
+	struct bna_rxf rxf;
+
+	enum bna_rx_flags rx_flags;
+
+	struct bna_mbox_qe mbox_qe;
+
+	struct bfa_wc		rxq_stop_wc;
+
+	/* Rx event handlers */
+	void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *);
+	void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *);
+	void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *);
+	void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *);
+	void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *);
+	void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *);
+
+	/* callback for bna_rx_disable(), bna_rx_stop() */
+	void (*stop_cbfn)(void *arg, struct bna_rx *rx,
+				enum bna_cb_status status);
+	void			*stop_cbarg;
+
+	struct bna *bna;
+	void			*priv; /* bnad's cookie */
+};
+
+struct bna_rx_event_cbfn {
+	/* Optional */
+	void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *);
+	void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *);
+	void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *);
+	void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *);
+	/* Mandatory */
+	void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *);
+	void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *);
+};
+
+/* Rx module - keeps track of free, active rx objects */
+struct bna_rx_mod {
+	struct bna *bna;		/* back pointer to parent */
+	struct bna_rx *rx;		/* BFI_MAX_RXQ entries */
+	struct bna_rxp *rxp;		/* BFI_MAX_RXQ entries */
+	struct bna_rxq *rxq;		/* BFI_MAX_RXQ entries */
+
+	struct list_head			rx_free_q;
+	struct list_head			rx_active_q;
+	int			rx_free_count;
+
+	struct list_head			rxp_free_q;
+	int			rxp_free_count;
+
+	struct list_head			rxq_free_q;
+	int			rxq_free_count;
+
+	enum bna_rx_mod_flags flags;
+
+	/* callback for bna_rx_mod_stop() */
+	void (*stop_cbfn)(struct bna_port *port,
+				enum bna_cb_status status);
+
+	struct bfa_wc		rx_stop_wc;
+	u32		dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX];
+	u32		rxf_bmap[2];
+};
+
+/**
+ *
+ * CAM
+ *
+ */
+
+struct bna_ucam_mod {
+	struct bna_mac *ucmac;		/* BFI_MAX_UCMAC entries */
+	struct list_head			free_q;
+
+	struct bna *bna;
+};
+
+struct bna_mcam_mod {
+	struct bna_mac *mcmac;		/* BFI_MAX_MCMAC entries */
+	struct list_head			free_q;
+
+	struct bna *bna;
+};
+
+/**
+ *
+ * Statistics
+ *
+ */
+
+struct bna_tx_stats {
+	int			tx_state;
+	int			tx_flags;
+	int			num_txqs;
+	u32		txq_bmap[2];
+	int			txf_id;
+};
+
+struct bna_rx_stats {
+	int			rx_state;
+	int			rx_flags;
+	int			num_rxps;
+	int			num_rxqs;
+	u32		rxq_bmap[2];
+	u32		cq_bmap[2];
+	int			rxf_id;
+	int			rxf_state;
+	int			rxf_oper_state;
+	int			num_active_ucast;
+	int			num_active_mcast;
+	int			rxmode_active;
+	int			vlan_filter_status;
+	u32		vlan_filter_table[(BFI_MAX_VLAN + 1) / 32];
+	int			rss_status;
+	int			hds_status;
+};
+
+struct bna_sw_stats {
+	int			device_state;
+	int			port_state;
+	int			port_flags;
+	int			llport_state;
+	int			priority;
+	int			num_active_tx;
+	int			num_active_rx;
+	struct bna_tx_stats tx_stats[BFI_MAX_TXQ];
+	struct bna_rx_stats rx_stats[BFI_MAX_RXQ];
+};
+
+struct bna_stats {
+	u32		txf_bmap[2];
+	u32		rxf_bmap[2];
+	struct bfi_ll_stats	*hw_stats;
+	struct bna_sw_stats *sw_stats;
+};
+
+/**
+ *
+ * BNA
+ *
+ */
+
+struct bna {
+	struct bfa_pcidev pcidev;
+
+	int			port_num;
+
+	struct bna_chip_regs regs;
+
+	struct bna_dma_addr hw_stats_dma;
+	struct bna_stats stats;
+
+	struct bna_device device;
+	struct bfa_cee cee;
+
+	struct bna_mbox_mod mbox_mod;
+
+	struct bna_port port;
+
+	struct bna_tx_mod tx_mod;
+
+	struct bna_rx_mod rx_mod;
+
+	struct bna_ib_mod ib_mod;
+
+	struct bna_ucam_mod ucam_mod;
+	struct bna_mcam_mod mcam_mod;
+
+	struct bna_rit_mod rit_mod;
+
+	int			rxf_default_id;
+	int			rxf_promisc_id;
+
+	struct bna_mbox_qe mbox_qe;
+
+	struct bnad *bnad;
+};
+
+#endif	/* __BNA_TYPES_H__ */
diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c
new file mode 100644
index 000000000000..491d148f88ae
--- /dev/null
+++ b/drivers/net/bna/bnad.c
@@ -0,0 +1,3270 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/in.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+#include "bnad.h"
+#include "bna.h"
+#include "cna.h"
+
+DEFINE_MUTEX(bnad_fwimg_mutex);
+
+/*
+ * Module params
+ */
+static uint bnad_msix_disable;
+module_param(bnad_msix_disable, uint, 0444);
+MODULE_PARM_DESC(bnad_msix_disable, "Disable MSIX mode");
+
+static uint bnad_ioc_auto_recover = 1;
+module_param(bnad_ioc_auto_recover, uint, 0444);
+MODULE_PARM_DESC(bnad_ioc_auto_recover, "Enable / Disable auto recovery");
+
+/*
+ * Global variables
+ */
+u32 bnad_rxqs_per_cq = 2;
+
+const u8 bnad_bcast_addr[] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+/*
+ * Local MACROS
+ */
+#define BNAD_TX_UNMAPQ_DEPTH (bnad->txq_depth * 2)
+
+#define BNAD_RX_UNMAPQ_DEPTH (bnad->rxq_depth)
+
+#define BNAD_GET_MBOX_IRQ(_bnad)				\
+	(((_bnad)->cfg_flags & BNAD_CF_MSIX) ?			\
+	 ((_bnad)->msix_table[(_bnad)->msix_num - 1].vector) : 	\
+	 ((_bnad)->pcidev->irq))
+
+#define BNAD_FILL_UNMAPQ_MEM_REQ(_res_info, _num, _depth)	\
+do {								\
+	(_res_info)->res_type = BNA_RES_T_MEM;			\
+	(_res_info)->res_u.mem_info.mem_type = BNA_MEM_T_KVA;	\
+	(_res_info)->res_u.mem_info.num = (_num);		\
+	(_res_info)->res_u.mem_info.len =			\
+	sizeof(struct bnad_unmap_q) +				\
+	(sizeof(struct bnad_skb_unmap) * ((_depth) - 1));	\
+} while (0)
+
+/*
+ * Reinitialize completions in CQ, once Rx is taken down
+ */
+static void
+bnad_cq_cmpl_init(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	struct bna_cq_entry *cmpl, *next_cmpl;
+	unsigned int wi_range, wis = 0, ccb_prod = 0;
+	int i;
+
+	BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt, cmpl,
+			    wi_range);
+
+	for (i = 0; i < ccb->q_depth; i++) {
+		wis++;
+		if (likely(--wi_range))
+			next_cmpl = cmpl + 1;
+		else {
+			BNA_QE_INDX_ADD(ccb_prod, wis, ccb->q_depth);
+			wis = 0;
+			BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt,
+						next_cmpl, wi_range);
+		}
+		cmpl->valid = 0;
+		cmpl = next_cmpl;
+	}
+}
+
+/*
+ * Frees all pending Tx Bufs
+ * At this point no activity is expected on the Q,
+ * so DMA unmap & freeing is fine.
+ */
+static void
+bnad_free_all_txbufs(struct bnad *bnad,
+		 struct bna_tcb *tcb)
+{
+	u16 		unmap_cons;
+	struct bnad_unmap_q *unmap_q = tcb->unmap_q;
+	struct bnad_skb_unmap *unmap_array;
+	struct sk_buff 		*skb = NULL;
+	int			i;
+
+	unmap_array = unmap_q->unmap_array;
+
+	unmap_cons = 0;
+	while (unmap_cons < unmap_q->q_depth) {
+		skb = unmap_array[unmap_cons].skb;
+		if (!skb) {
+			unmap_cons++;
+			continue;
+		}
+		unmap_array[unmap_cons].skb = NULL;
+
+		pci_unmap_single(bnad->pcidev,
+				 pci_unmap_addr(&unmap_array[unmap_cons],
+						dma_addr), skb_headlen(skb),
+						PCI_DMA_TODEVICE);
+
+		pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0);
+		unmap_cons++;
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			pci_unmap_page(bnad->pcidev,
+				       pci_unmap_addr(&unmap_array[unmap_cons],
+						      dma_addr),
+				       skb_shinfo(skb)->frags[i].size,
+				       PCI_DMA_TODEVICE);
+			pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr,
+					   0);
+			unmap_cons++;
+		}
+		dev_kfree_skb_any(skb);
+	}
+}
+
+/* Data Path Handlers */
+
+/*
+ * bnad_free_txbufs : Frees the Tx bufs on Tx completion
+ * Can be called in a) Interrupt context
+ *		    b) Sending context
+ *		    c) Tasklet context
+ */
+static u32
+bnad_free_txbufs(struct bnad *bnad,
+		 struct bna_tcb *tcb)
+{
+	u32 		sent_packets = 0, sent_bytes = 0;
+	u16 		wis, unmap_cons, updated_hw_cons;
+	struct bnad_unmap_q *unmap_q = tcb->unmap_q;
+	struct bnad_skb_unmap *unmap_array;
+	struct sk_buff 		*skb;
+	int i;
+
+	/*
+	 * Just return if TX is stopped. This check is useful
+	 * when bnad_free_txbufs() runs out of a tasklet scheduled
+	 * before bnad_cb_tx_cleanup() cleared BNAD_RF_TX_STARTED bit
+	 * but this routine runs actually after the cleanup has been
+	 * executed.
+	 */
+	if (!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags))
+		return 0;
+
+	updated_hw_cons = *(tcb->hw_consumer_index);
+
+	wis = BNA_Q_INDEX_CHANGE(tcb->consumer_index,
+				  updated_hw_cons, tcb->q_depth);
+
+	BUG_ON(!(wis <= BNA_QE_IN_USE_CNT(tcb, tcb->q_depth)));
+
+	unmap_array = unmap_q->unmap_array;
+	unmap_cons = unmap_q->consumer_index;
+
+	prefetch(&unmap_array[unmap_cons + 1]);
+	while (wis) {
+		skb = unmap_array[unmap_cons].skb;
+
+		unmap_array[unmap_cons].skb = NULL;
+
+		sent_packets++;
+		sent_bytes += skb->len;
+		wis -= BNA_TXQ_WI_NEEDED(1 + skb_shinfo(skb)->nr_frags);
+
+		pci_unmap_single(bnad->pcidev,
+				 pci_unmap_addr(&unmap_array[unmap_cons],
+						dma_addr), skb_headlen(skb),
+				 PCI_DMA_TODEVICE);
+		pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0);
+		BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth);
+
+		prefetch(&unmap_array[unmap_cons + 1]);
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			prefetch(&unmap_array[unmap_cons + 1]);
+
+			pci_unmap_page(bnad->pcidev,
+				       pci_unmap_addr(&unmap_array[unmap_cons],
+						      dma_addr),
+				       skb_shinfo(skb)->frags[i].size,
+				       PCI_DMA_TODEVICE);
+			pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr,
+					   0);
+			BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth);
+		}
+		dev_kfree_skb_any(skb);
+	}
+
+	/* Update consumer pointers. */
+	tcb->consumer_index = updated_hw_cons;
+	unmap_q->consumer_index = unmap_cons;
+
+	tcb->txq->tx_packets += sent_packets;
+	tcb->txq->tx_bytes += sent_bytes;
+
+	return sent_packets;
+}
+
+/* Tx Free Tasklet function */
+/* Frees for all the tcb's in all the Tx's */
+/*
+ * Scheduled from sending context, so that
+ * the fat Tx lock is not held for too long
+ * in the sending context.
+ */
+static void
+bnad_tx_free_tasklet(unsigned long bnad_ptr)
+{
+	struct bnad *bnad = (struct bnad *)bnad_ptr;
+	struct bna_tcb *tcb;
+	u32 		acked;
+	int			i, j;
+
+	for (i = 0; i < bnad->num_tx; i++) {
+		for (j = 0; j < bnad->num_txq_per_tx; j++) {
+			tcb = bnad->tx_info[i].tcb[j];
+			if (!tcb)
+				continue;
+			if (((u16) (*tcb->hw_consumer_index) !=
+				tcb->consumer_index) &&
+				(!test_and_set_bit(BNAD_TXQ_FREE_SENT,
+						  &tcb->flags))) {
+				acked = bnad_free_txbufs(bnad, tcb);
+				bna_ib_ack(tcb->i_dbell, acked);
+				smp_mb__before_clear_bit();
+				clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
+			}
+		}
+	}
+}
+
+static u32
+bnad_tx(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	struct net_device *netdev = bnad->netdev;
+	u32 sent;
+
+	if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags))
+		return 0;
+
+	sent = bnad_free_txbufs(bnad, tcb);
+	if (sent) {
+		if (netif_queue_stopped(netdev) &&
+		    netif_carrier_ok(netdev) &&
+		    BNA_QE_FREE_CNT(tcb, tcb->q_depth) >=
+				    BNAD_NETIF_WAKE_THRESHOLD) {
+			netif_wake_queue(netdev);
+			BNAD_UPDATE_CTR(bnad, netif_queue_wakeup);
+		}
+		bna_ib_ack(tcb->i_dbell, sent);
+	} else
+		bna_ib_ack(tcb->i_dbell, 0);
+
+	smp_mb__before_clear_bit();
+	clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
+
+	return sent;
+}
+
+/* MSIX Tx Completion Handler */
+static irqreturn_t
+bnad_msix_tx(int irq, void *data)
+{
+	struct bna_tcb *tcb = (struct bna_tcb *)data;
+	struct bnad *bnad = tcb->bnad;
+
+	bnad_tx(bnad, tcb);
+
+	return IRQ_HANDLED;
+}
+
+static void
+bnad_reset_rcb(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	struct bnad_unmap_q *unmap_q = rcb->unmap_q;
+
+	rcb->producer_index = 0;
+	rcb->consumer_index = 0;
+
+	unmap_q->producer_index = 0;
+	unmap_q->consumer_index = 0;
+}
+
+static void
+bnad_free_rxbufs(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	struct bnad_unmap_q *unmap_q;
+	struct sk_buff *skb;
+
+	unmap_q = rcb->unmap_q;
+	while (BNA_QE_IN_USE_CNT(unmap_q, unmap_q->q_depth)) {
+		skb = unmap_q->unmap_array[unmap_q->consumer_index].skb;
+		BUG_ON(!(skb));
+		unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL;
+		pci_unmap_single(bnad->pcidev, pci_unmap_addr(&unmap_q->
+					unmap_array[unmap_q->consumer_index],
+					dma_addr), rcb->rxq->buffer_size +
+					NET_IP_ALIGN, PCI_DMA_FROMDEVICE);
+		dev_kfree_skb(skb);
+		BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth);
+		BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth);
+	}
+
+	bnad_reset_rcb(bnad, rcb);
+}
+
+static void
+bnad_alloc_n_post_rxbufs(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	u16 to_alloc, alloced, unmap_prod, wi_range;
+	struct bnad_unmap_q *unmap_q = rcb->unmap_q;
+	struct bnad_skb_unmap *unmap_array;
+	struct bna_rxq_entry *rxent;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+
+	alloced = 0;
+	to_alloc =
+		BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth);
+
+	unmap_array = unmap_q->unmap_array;
+	unmap_prod = unmap_q->producer_index;
+
+	BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent, wi_range);
+
+	while (to_alloc--) {
+		if (!wi_range) {
+			BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent,
+					     wi_range);
+		}
+		skb = alloc_skb(rcb->rxq->buffer_size + NET_IP_ALIGN,
+				     GFP_ATOMIC);
+		if (unlikely(!skb)) {
+			BNAD_UPDATE_CTR(bnad, rxbuf_alloc_failed);
+			goto finishing;
+		}
+		skb->dev = bnad->netdev;
+		skb_reserve(skb, NET_IP_ALIGN);
+		unmap_array[unmap_prod].skb = skb;
+		dma_addr = pci_map_single(bnad->pcidev, skb->data,
+			rcb->rxq->buffer_size, PCI_DMA_FROMDEVICE);
+		pci_unmap_addr_set(&unmap_array[unmap_prod], dma_addr,
+				   dma_addr);
+		BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr);
+		BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth);
+
+		rxent++;
+		wi_range--;
+		alloced++;
+	}
+
+finishing:
+	if (likely(alloced)) {
+		unmap_q->producer_index = unmap_prod;
+		rcb->producer_index = unmap_prod;
+		smp_mb();
+		bna_rxq_prod_indx_doorbell(rcb);
+	}
+}
+
+/*
+ * Locking is required in the enable path
+ * because it is called from a napi poll
+ * context, where the bna_lock is not held
+ * unlike the IRQ context.
+ */
+static void
+bnad_enable_txrx_irqs(struct bnad *bnad)
+{
+	struct bna_tcb *tcb;
+	struct bna_ccb *ccb;
+	int i, j;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	for (i = 0; i < bnad->num_tx; i++) {
+		for (j = 0; j < bnad->num_txq_per_tx; j++) {
+			tcb = bnad->tx_info[i].tcb[j];
+			bna_ib_coalescing_timer_set(tcb->i_dbell,
+				tcb->txq->ib->ib_config.coalescing_timeo);
+			bna_ib_ack(tcb->i_dbell, 0);
+		}
+	}
+
+	for (i = 0; i < bnad->num_rx; i++) {
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			ccb = bnad->rx_info[i].rx_ctrl[j].ccb;
+			bnad_enable_rx_irq_unsafe(ccb);
+		}
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+static inline void
+bnad_refill_rxq(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	struct bnad_unmap_q *unmap_q = rcb->unmap_q;
+
+	if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) {
+		if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth)
+			 >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT)
+			bnad_alloc_n_post_rxbufs(bnad, rcb);
+		smp_mb__before_clear_bit();
+		clear_bit(BNAD_RXQ_REFILL, &rcb->flags);
+	}
+}
+
+static u32
+bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget)
+{
+	struct bna_cq_entry *cmpl, *next_cmpl;
+	struct bna_rcb *rcb = NULL;
+	unsigned int wi_range, packets = 0, wis = 0;
+	struct bnad_unmap_q *unmap_q;
+	struct sk_buff *skb;
+	u32 flags;
+	u32 qid0 = ccb->rcb[0]->rxq->rxq_id;
+	struct bna_pkt_rate *pkt_rt = &ccb->pkt_rate;
+
+	prefetch(bnad->netdev);
+	BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt, cmpl,
+			    wi_range);
+	BUG_ON(!(wi_range <= ccb->q_depth));
+	while (cmpl->valid && packets < budget) {
+		packets++;
+		BNA_UPDATE_PKT_CNT(pkt_rt, ntohs(cmpl->length));
+
+		if (qid0 == cmpl->rxq_id)
+			rcb = ccb->rcb[0];
+		else
+			rcb = ccb->rcb[1];
+
+		unmap_q = rcb->unmap_q;
+
+		skb = unmap_q->unmap_array[unmap_q->consumer_index].skb;
+		BUG_ON(!(skb));
+		unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL;
+		pci_unmap_single(bnad->pcidev,
+				 pci_unmap_addr(&unmap_q->
+						unmap_array[unmap_q->
+							    consumer_index],
+						dma_addr),
+						rcb->rxq->buffer_size,
+						PCI_DMA_FROMDEVICE);
+		BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth);
+
+		/* Should be more efficient ? Performance ? */
+		BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth);
+
+		wis++;
+		if (likely(--wi_range))
+			next_cmpl = cmpl + 1;
+		else {
+			BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth);
+			wis = 0;
+			BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt,
+						next_cmpl, wi_range);
+			BUG_ON(!(wi_range <= ccb->q_depth));
+		}
+		prefetch(next_cmpl);
+
+		flags = ntohl(cmpl->flags);
+		if (unlikely
+		    (flags &
+		     (BNA_CQ_EF_MAC_ERROR | BNA_CQ_EF_FCS_ERROR |
+		      BNA_CQ_EF_TOO_LONG))) {
+			dev_kfree_skb_any(skb);
+			rcb->rxq->rx_packets_with_error++;
+			goto next;
+		}
+
+		skb_put(skb, ntohs(cmpl->length));
+		if (likely
+		    (bnad->rx_csum &&
+		     (((flags & BNA_CQ_EF_IPV4) &&
+		      (flags & BNA_CQ_EF_L3_CKSUM_OK)) ||
+		      (flags & BNA_CQ_EF_IPV6)) &&
+		      (flags & (BNA_CQ_EF_TCP | BNA_CQ_EF_UDP)) &&
+		      (flags & BNA_CQ_EF_L4_CKSUM_OK)))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+
+		rcb->rxq->rx_packets++;
+		rcb->rxq->rx_bytes += skb->len;
+		skb->protocol = eth_type_trans(skb, bnad->netdev);
+
+		if (bnad->vlan_grp && (flags & BNA_CQ_EF_VLAN)) {
+			struct bnad_rx_ctrl *rx_ctrl =
+				(struct bnad_rx_ctrl *)ccb->ctrl;
+			if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+				vlan_gro_receive(&rx_ctrl->napi, bnad->vlan_grp,
+						ntohs(cmpl->vlan_tag), skb);
+			else
+				vlan_hwaccel_receive_skb(skb,
+							 bnad->vlan_grp,
+							 ntohs(cmpl->vlan_tag));
+
+		} else { /* Not VLAN tagged/stripped */
+			struct bnad_rx_ctrl *rx_ctrl =
+				(struct bnad_rx_ctrl *)ccb->ctrl;
+			if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+				napi_gro_receive(&rx_ctrl->napi, skb);
+			else
+				netif_receive_skb(skb);
+		}
+
+next:
+		cmpl->valid = 0;
+		cmpl = next_cmpl;
+	}
+
+	BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth);
+
+	if (likely(ccb)) {
+		bna_ib_ack(ccb->i_dbell, packets);
+		bnad_refill_rxq(bnad, ccb->rcb[0]);
+		if (ccb->rcb[1])
+			bnad_refill_rxq(bnad, ccb->rcb[1]);
+	} else
+		bna_ib_ack(ccb->i_dbell, 0);
+
+	return packets;
+}
+
+static void
+bnad_disable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	bna_ib_coalescing_timer_set(ccb->i_dbell, 0);
+	bna_ib_ack(ccb->i_dbell, 0);
+}
+
+static void
+bnad_enable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	spin_lock_irq(&bnad->bna_lock); /* Because of polling context */
+	bnad_enable_rx_irq_unsafe(ccb);
+	spin_unlock_irq(&bnad->bna_lock);
+}
+
+static void
+bnad_netif_rx_schedule_poll(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	struct bnad_rx_ctrl *rx_ctrl = (struct bnad_rx_ctrl *)(ccb->ctrl);
+	if (likely(napi_schedule_prep((&rx_ctrl->napi)))) {
+		bnad_disable_rx_irq(bnad, ccb);
+		__napi_schedule((&rx_ctrl->napi));
+	}
+	BNAD_UPDATE_CTR(bnad, netif_rx_schedule);
+}
+
+/* MSIX Rx Path Handler */
+static irqreturn_t
+bnad_msix_rx(int irq, void *data)
+{
+	struct bna_ccb *ccb = (struct bna_ccb *)data;
+	struct bnad *bnad = ccb->bnad;
+
+	bnad_netif_rx_schedule_poll(bnad, ccb);
+
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handlers */
+
+/* Mbox Interrupt Handlers */
+static irqreturn_t
+bnad_msix_mbox_handler(int irq, void *data)
+{
+	u32 intr_status;
+	unsigned long  flags;
+	struct net_device *netdev = data;
+	struct bnad *bnad;
+
+	bnad = netdev_priv(netdev);
+
+	/* BNA_ISR_GET(bnad); Inc Ref count */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	bna_intr_status_get(&bnad->bna, intr_status);
+
+	if (BNA_IS_MBOX_ERR_INTR(intr_status))
+		bna_mbox_handler(&bnad->bna, intr_status);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* BNAD_ISR_PUT(bnad); Dec Ref count */
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+bnad_isr(int irq, void *data)
+{
+	int i, j;
+	u32 intr_status;
+	unsigned long flags;
+	struct net_device *netdev = data;
+	struct bnad *bnad = netdev_priv(netdev);
+	struct bnad_rx_info *rx_info;
+	struct bnad_rx_ctrl *rx_ctrl;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	bna_intr_status_get(&bnad->bna, intr_status);
+	if (!intr_status) {
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+		return IRQ_NONE;
+	}
+
+	if (BNA_IS_MBOX_ERR_INTR(intr_status)) {
+		bna_mbox_handler(&bnad->bna, intr_status);
+		if (!BNA_IS_INTX_DATA_INTR(intr_status)) {
+			spin_unlock_irqrestore(&bnad->bna_lock, flags);
+			goto done;
+		}
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Process data interrupts */
+	for (i = 0; i < bnad->num_rx; i++) {
+		rx_info = &bnad->rx_info[i];
+		if (!rx_info->rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			rx_ctrl = &rx_info->rx_ctrl[j];
+			if (rx_ctrl->ccb)
+				bnad_netif_rx_schedule_poll(bnad,
+							    rx_ctrl->ccb);
+		}
+	}
+done:
+	return IRQ_HANDLED;
+}
+
+/*
+ * Called in interrupt / callback context
+ * with bna_lock held, so cfg_flags access is OK
+ */
+static void
+bnad_enable_mbox_irq(struct bnad *bnad)
+{
+	int irq = BNAD_GET_MBOX_IRQ(bnad);
+
+	if (!(bnad->cfg_flags & BNAD_CF_MSIX))
+		return;
+
+	if (test_and_clear_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags))
+		enable_irq(irq);
+	BNAD_UPDATE_CTR(bnad, mbox_intr_enabled);
+}
+
+/*
+ * Called with bnad->bna_lock held b'cos of
+ * bnad->cfg_flags access.
+ */
+void
+bnad_disable_mbox_irq(struct bnad *bnad)
+{
+	int irq = BNAD_GET_MBOX_IRQ(bnad);
+
+	if (!(bnad->cfg_flags & BNAD_CF_MSIX))
+		return;
+
+	if (!test_and_set_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags))
+		disable_irq_nosync(irq);
+	BNAD_UPDATE_CTR(bnad, mbox_intr_disabled);
+}
+
+/* Control Path Handlers */
+
+/* Callbacks */
+void
+bnad_cb_device_enable_mbox_intr(struct bnad *bnad)
+{
+	bnad_enable_mbox_irq(bnad);
+}
+
+void
+bnad_cb_device_disable_mbox_intr(struct bnad *bnad)
+{
+	bnad_disable_mbox_irq(bnad);
+}
+
+void
+bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status)
+{
+	complete(&bnad->bnad_completions.ioc_comp);
+	bnad->bnad_completions.ioc_comp_status = status;
+}
+
+void
+bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status)
+{
+	complete(&bnad->bnad_completions.ioc_comp);
+	bnad->bnad_completions.ioc_comp_status = status;
+}
+
+static void
+bnad_cb_port_disabled(void *arg, enum bna_cb_status status)
+{
+	struct bnad *bnad = (struct bnad *)arg;
+
+	complete(&bnad->bnad_completions.port_comp);
+
+	netif_carrier_off(bnad->netdev);
+}
+
+void
+bnad_cb_port_link_status(struct bnad *bnad,
+			enum bna_link_status link_status)
+{
+	bool link_up = 0;
+
+	link_up = (link_status == BNA_LINK_UP) || (link_status == BNA_CEE_UP);
+
+	if (link_status == BNA_CEE_UP) {
+		set_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags);
+		BNAD_UPDATE_CTR(bnad, cee_up);
+	} else
+		clear_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags);
+
+	if (link_up) {
+		if (!netif_carrier_ok(bnad->netdev)) {
+			pr_warn("bna: %s link up\n",
+				bnad->netdev->name);
+			netif_carrier_on(bnad->netdev);
+			BNAD_UPDATE_CTR(bnad, link_toggle);
+			if (test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) {
+				/* Force an immediate Transmit Schedule */
+				pr_info("bna: %s TX_STARTED\n",
+					bnad->netdev->name);
+				netif_wake_queue(bnad->netdev);
+				BNAD_UPDATE_CTR(bnad, netif_queue_wakeup);
+			} else {
+				netif_stop_queue(bnad->netdev);
+				BNAD_UPDATE_CTR(bnad, netif_queue_stop);
+			}
+		}
+	} else {
+		if (netif_carrier_ok(bnad->netdev)) {
+			pr_warn("bna: %s link down\n",
+				bnad->netdev->name);
+			netif_carrier_off(bnad->netdev);
+			BNAD_UPDATE_CTR(bnad, link_toggle);
+		}
+	}
+}
+
+static void
+bnad_cb_tx_disabled(void *arg, struct bna_tx *tx,
+			enum bna_cb_status status)
+{
+	struct bnad *bnad = (struct bnad *)arg;
+
+	complete(&bnad->bnad_completions.tx_comp);
+}
+
+static void
+bnad_cb_tcb_setup(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	struct bnad_tx_info *tx_info =
+			(struct bnad_tx_info *)tcb->txq->tx->priv;
+	struct bnad_unmap_q *unmap_q = tcb->unmap_q;
+
+	tx_info->tcb[tcb->id] = tcb;
+	unmap_q->producer_index = 0;
+	unmap_q->consumer_index = 0;
+	unmap_q->q_depth = BNAD_TX_UNMAPQ_DEPTH;
+}
+
+static void
+bnad_cb_tcb_destroy(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	struct bnad_tx_info *tx_info =
+			(struct bnad_tx_info *)tcb->txq->tx->priv;
+
+	tx_info->tcb[tcb->id] = NULL;
+}
+
+static void
+bnad_cb_rcb_setup(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	struct bnad_unmap_q *unmap_q = rcb->unmap_q;
+
+	unmap_q->producer_index = 0;
+	unmap_q->consumer_index = 0;
+	unmap_q->q_depth = BNAD_RX_UNMAPQ_DEPTH;
+}
+
+static void
+bnad_cb_ccb_setup(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	struct bnad_rx_info *rx_info =
+			(struct bnad_rx_info *)ccb->cq->rx->priv;
+
+	rx_info->rx_ctrl[ccb->id].ccb = ccb;
+	ccb->ctrl = &rx_info->rx_ctrl[ccb->id];
+}
+
+static void
+bnad_cb_ccb_destroy(struct bnad *bnad, struct bna_ccb *ccb)
+{
+	struct bnad_rx_info *rx_info =
+			(struct bnad_rx_info *)ccb->cq->rx->priv;
+
+	rx_info->rx_ctrl[ccb->id].ccb = NULL;
+}
+
+static void
+bnad_cb_tx_stall(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	struct bnad_tx_info *tx_info =
+			(struct bnad_tx_info *)tcb->txq->tx->priv;
+
+	if (tx_info != &bnad->tx_info[0])
+		return;
+
+	clear_bit(BNAD_RF_TX_STARTED, &bnad->run_flags);
+	netif_stop_queue(bnad->netdev);
+	pr_info("bna: %s TX_STOPPED\n", bnad->netdev->name);
+}
+
+static void
+bnad_cb_tx_resume(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	if (test_and_set_bit(BNAD_RF_TX_STARTED, &bnad->run_flags))
+		return;
+
+	if (netif_carrier_ok(bnad->netdev)) {
+		pr_info("bna: %s TX_STARTED\n", bnad->netdev->name);
+		netif_wake_queue(bnad->netdev);
+		BNAD_UPDATE_CTR(bnad, netif_queue_wakeup);
+	}
+}
+
+static void
+bnad_cb_tx_cleanup(struct bnad *bnad, struct bna_tcb *tcb)
+{
+	struct bnad_unmap_q *unmap_q = tcb->unmap_q;
+
+	if (!tcb || (!tcb->unmap_q))
+		return;
+
+	if (!unmap_q->unmap_array)
+		return;
+
+	if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags))
+		return;
+
+	bnad_free_all_txbufs(bnad, tcb);
+
+	unmap_q->producer_index = 0;
+	unmap_q->consumer_index = 0;
+
+	smp_mb__before_clear_bit();
+	clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
+}
+
+static void
+bnad_cb_rx_cleanup(struct bnad *bnad,
+			struct bna_ccb *ccb)
+{
+	bnad_cq_cmpl_init(bnad, ccb);
+
+	bnad_free_rxbufs(bnad, ccb->rcb[0]);
+	clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[0]->flags);
+
+	if (ccb->rcb[1]) {
+		bnad_free_rxbufs(bnad, ccb->rcb[1]);
+		clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[1]->flags);
+	}
+}
+
+static void
+bnad_cb_rx_post(struct bnad *bnad, struct bna_rcb *rcb)
+{
+	struct bnad_unmap_q *unmap_q = rcb->unmap_q;
+
+	set_bit(BNAD_RXQ_STARTED, &rcb->flags);
+
+	/* Now allocate & post buffers for this RCB */
+	/* !!Allocation in callback context */
+	if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) {
+		if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth)
+			 >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT)
+			bnad_alloc_n_post_rxbufs(bnad, rcb);
+		smp_mb__before_clear_bit();
+		clear_bit(BNAD_RXQ_REFILL, &rcb->flags);
+	}
+}
+
+static void
+bnad_cb_rx_disabled(void *arg, struct bna_rx *rx,
+			enum bna_cb_status status)
+{
+	struct bnad *bnad = (struct bnad *)arg;
+
+	complete(&bnad->bnad_completions.rx_comp);
+}
+
+static void
+bnad_cb_rx_mcast_add(struct bnad *bnad, struct bna_rx *rx,
+				enum bna_cb_status status)
+{
+	bnad->bnad_completions.mcast_comp_status = status;
+	complete(&bnad->bnad_completions.mcast_comp);
+}
+
+void
+bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status,
+		       struct bna_stats *stats)
+{
+	if (status == BNA_CB_SUCCESS)
+		BNAD_UPDATE_CTR(bnad, hw_stats_updates);
+
+	if (!netif_running(bnad->netdev) ||
+		!test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags))
+		return;
+
+	mod_timer(&bnad->stats_timer,
+		  jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ));
+}
+
+void
+bnad_cb_stats_clr(struct bnad *bnad)
+{
+}
+
+/* Resource allocation, free functions */
+
+static void
+bnad_mem_free(struct bnad *bnad,
+	      struct bna_mem_info *mem_info)
+{
+	int i;
+	dma_addr_t dma_pa;
+
+	if (mem_info->mdl == NULL)
+		return;
+
+	for (i = 0; i < mem_info->num; i++) {
+		if (mem_info->mdl[i].kva != NULL) {
+			if (mem_info->mem_type == BNA_MEM_T_DMA) {
+				BNA_GET_DMA_ADDR(&(mem_info->mdl[i].dma),
+						dma_pa);
+				pci_free_consistent(bnad->pcidev,
+						mem_info->mdl[i].len,
+						mem_info->mdl[i].kva, dma_pa);
+			} else
+				kfree(mem_info->mdl[i].kva);
+		}
+	}
+	kfree(mem_info->mdl);
+	mem_info->mdl = NULL;
+}
+
+static int
+bnad_mem_alloc(struct bnad *bnad,
+	       struct bna_mem_info *mem_info)
+{
+	int i;
+	dma_addr_t dma_pa;
+
+	if ((mem_info->num == 0) || (mem_info->len == 0)) {
+		mem_info->mdl = NULL;
+		return 0;
+	}
+
+	mem_info->mdl = kcalloc(mem_info->num, sizeof(struct bna_mem_descr),
+				GFP_KERNEL);
+	if (mem_info->mdl == NULL)
+		return -ENOMEM;
+
+	if (mem_info->mem_type == BNA_MEM_T_DMA) {
+		for (i = 0; i < mem_info->num; i++) {
+			mem_info->mdl[i].len = mem_info->len;
+			mem_info->mdl[i].kva =
+				pci_alloc_consistent(bnad->pcidev,
+						mem_info->len, &dma_pa);
+
+			if (mem_info->mdl[i].kva == NULL)
+				goto err_return;
+
+			BNA_SET_DMA_ADDR(dma_pa,
+					 &(mem_info->mdl[i].dma));
+		}
+	} else {
+		for (i = 0; i < mem_info->num; i++) {
+			mem_info->mdl[i].len = mem_info->len;
+			mem_info->mdl[i].kva = kzalloc(mem_info->len,
+							GFP_KERNEL);
+			if (mem_info->mdl[i].kva == NULL)
+				goto err_return;
+		}
+	}
+
+	return 0;
+
+err_return:
+	bnad_mem_free(bnad, mem_info);
+	return -ENOMEM;
+}
+
+/* Free IRQ for Mailbox */
+static void
+bnad_mbox_irq_free(struct bnad *bnad,
+		   struct bna_intr_info *intr_info)
+{
+	int irq;
+	unsigned long flags;
+
+	if (intr_info->idl == NULL)
+		return;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	bnad_disable_mbox_irq(bnad);
+
+	irq = BNAD_GET_MBOX_IRQ(bnad);
+	free_irq(irq, bnad->netdev);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	kfree(intr_info->idl);
+}
+
+/*
+ * Allocates IRQ for Mailbox, but keep it disabled
+ * This will be enabled once we get the mbox enable callback
+ * from bna
+ */
+static int
+bnad_mbox_irq_alloc(struct bnad *bnad,
+		    struct bna_intr_info *intr_info)
+{
+	int 		err;
+	unsigned long 	flags;
+	u32	irq;
+	irq_handler_t 	irq_handler;
+
+	/* Mbox should use only 1 vector */
+
+	intr_info->idl = kzalloc(sizeof(*(intr_info->idl)), GFP_KERNEL);
+	if (!intr_info->idl)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (bnad->cfg_flags & BNAD_CF_MSIX) {
+		irq_handler = (irq_handler_t)bnad_msix_mbox_handler;
+		irq = bnad->msix_table[bnad->msix_num - 1].vector;
+		flags = 0;
+		intr_info->intr_type = BNA_INTR_T_MSIX;
+		intr_info->idl[0].vector = bnad->msix_num - 1;
+	} else {
+		irq_handler = (irq_handler_t)bnad_isr;
+		irq = bnad->pcidev->irq;
+		flags = IRQF_SHARED;
+		intr_info->intr_type = BNA_INTR_T_INTX;
+		/* intr_info->idl.vector = 0 ? */
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	sprintf(bnad->mbox_irq_name, "%s", BNAD_NAME);
+
+	err = request_irq(irq, irq_handler, flags,
+			  bnad->mbox_irq_name, bnad->netdev);
+	if (err) {
+		kfree(intr_info->idl);
+		intr_info->idl = NULL;
+		return err;
+	}
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bnad_disable_mbox_irq(bnad);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	return 0;
+}
+
+static void
+bnad_txrx_irq_free(struct bnad *bnad, struct bna_intr_info *intr_info)
+{
+	kfree(intr_info->idl);
+	intr_info->idl = NULL;
+}
+
+/* Allocates Interrupt Descriptor List for MSIX/INT-X vectors */
+static int
+bnad_txrx_irq_alloc(struct bnad *bnad, enum bnad_intr_source src,
+		    uint txrx_id, struct bna_intr_info *intr_info)
+{
+	int i, vector_start = 0;
+	u32 cfg_flags;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	cfg_flags = bnad->cfg_flags;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	if (cfg_flags & BNAD_CF_MSIX) {
+		intr_info->intr_type = BNA_INTR_T_MSIX;
+		intr_info->idl = kcalloc(intr_info->num,
+					sizeof(struct bna_intr_descr),
+					GFP_KERNEL);
+		if (!intr_info->idl)
+			return -ENOMEM;
+
+		switch (src) {
+		case BNAD_INTR_TX:
+			vector_start = txrx_id;
+			break;
+
+		case BNAD_INTR_RX:
+			vector_start = bnad->num_tx * bnad->num_txq_per_tx +
+					txrx_id;
+			break;
+
+		default:
+			BUG();
+		}
+
+		for (i = 0; i < intr_info->num; i++)
+			intr_info->idl[i].vector = vector_start + i;
+	} else {
+		intr_info->intr_type = BNA_INTR_T_INTX;
+		intr_info->num = 1;
+		intr_info->idl = kcalloc(intr_info->num,
+					sizeof(struct bna_intr_descr),
+					GFP_KERNEL);
+		if (!intr_info->idl)
+			return -ENOMEM;
+
+		switch (src) {
+		case BNAD_INTR_TX:
+			intr_info->idl[0].vector = 0x1; /* Bit mask : Tx IB */
+			break;
+
+		case BNAD_INTR_RX:
+			intr_info->idl[0].vector = 0x2; /* Bit mask : Rx IB */
+			break;
+		}
+	}
+	return 0;
+}
+
+/**
+ * NOTE: Should be called for MSIX only
+ * Unregisters Tx MSIX vector(s) from the kernel
+ */
+static void
+bnad_tx_msix_unregister(struct bnad *bnad, struct bnad_tx_info *tx_info,
+			int num_txqs)
+{
+	int i;
+	int vector_num;
+
+	for (i = 0; i < num_txqs; i++) {
+		if (tx_info->tcb[i] == NULL)
+			continue;
+
+		vector_num = tx_info->tcb[i]->intr_vector;
+		free_irq(bnad->msix_table[vector_num].vector, tx_info->tcb[i]);
+	}
+}
+
+/**
+ * NOTE: Should be called for MSIX only
+ * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel
+ */
+static int
+bnad_tx_msix_register(struct bnad *bnad, struct bnad_tx_info *tx_info,
+			uint tx_id, int num_txqs)
+{
+	int i;
+	int err;
+	int vector_num;
+
+	for (i = 0; i < num_txqs; i++) {
+		vector_num = tx_info->tcb[i]->intr_vector;
+		sprintf(tx_info->tcb[i]->name, "%s TXQ %d", bnad->netdev->name,
+				tx_id + tx_info->tcb[i]->id);
+		err = request_irq(bnad->msix_table[vector_num].vector,
+				  (irq_handler_t)bnad_msix_tx, 0,
+				  tx_info->tcb[i]->name,
+				  tx_info->tcb[i]);
+		if (err)
+			goto err_return;
+	}
+
+	return 0;
+
+err_return:
+	if (i > 0)
+		bnad_tx_msix_unregister(bnad, tx_info, (i - 1));
+	return -1;
+}
+
+/**
+ * NOTE: Should be called for MSIX only
+ * Unregisters Rx MSIX vector(s) from the kernel
+ */
+static void
+bnad_rx_msix_unregister(struct bnad *bnad, struct bnad_rx_info *rx_info,
+			int num_rxps)
+{
+	int i;
+	int vector_num;
+
+	for (i = 0; i < num_rxps; i++) {
+		if (rx_info->rx_ctrl[i].ccb == NULL)
+			continue;
+
+		vector_num = rx_info->rx_ctrl[i].ccb->intr_vector;
+		free_irq(bnad->msix_table[vector_num].vector,
+			 rx_info->rx_ctrl[i].ccb);
+	}
+}
+
+/**
+ * NOTE: Should be called for MSIX only
+ * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel
+ */
+static int
+bnad_rx_msix_register(struct bnad *bnad, struct bnad_rx_info *rx_info,
+			uint rx_id, int num_rxps)
+{
+	int i;
+	int err;
+	int vector_num;
+
+	for (i = 0; i < num_rxps; i++) {
+		vector_num = rx_info->rx_ctrl[i].ccb->intr_vector;
+		sprintf(rx_info->rx_ctrl[i].ccb->name, "%s CQ %d",
+			bnad->netdev->name,
+			rx_id + rx_info->rx_ctrl[i].ccb->id);
+		err = request_irq(bnad->msix_table[vector_num].vector,
+				  (irq_handler_t)bnad_msix_rx, 0,
+				  rx_info->rx_ctrl[i].ccb->name,
+				  rx_info->rx_ctrl[i].ccb);
+		if (err)
+			goto err_return;
+	}
+
+	return 0;
+
+err_return:
+	if (i > 0)
+		bnad_rx_msix_unregister(bnad, rx_info, (i - 1));
+	return -1;
+}
+
+/* Free Tx object Resources */
+static void
+bnad_tx_res_free(struct bnad *bnad, struct bna_res_info *res_info)
+{
+	int i;
+
+	for (i = 0; i < BNA_TX_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			bnad_mem_free(bnad, &res_info[i].res_u.mem_info);
+		else if (res_info[i].res_type == BNA_RES_T_INTR)
+			bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info);
+	}
+}
+
+/* Allocates memory and interrupt resources for Tx object */
+static int
+bnad_tx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info,
+		  uint tx_id)
+{
+	int i, err = 0;
+
+	for (i = 0; i < BNA_TX_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			err = bnad_mem_alloc(bnad,
+					&res_info[i].res_u.mem_info);
+		else if (res_info[i].res_type == BNA_RES_T_INTR)
+			err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_TX, tx_id,
+					&res_info[i].res_u.intr_info);
+		if (err)
+			goto err_return;
+	}
+	return 0;
+
+err_return:
+	bnad_tx_res_free(bnad, res_info);
+	return err;
+}
+
+/* Free Rx object Resources */
+static void
+bnad_rx_res_free(struct bnad *bnad, struct bna_res_info *res_info)
+{
+	int i;
+
+	for (i = 0; i < BNA_RX_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			bnad_mem_free(bnad, &res_info[i].res_u.mem_info);
+		else if (res_info[i].res_type == BNA_RES_T_INTR)
+			bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info);
+	}
+}
+
+/* Allocates memory and interrupt resources for Rx object */
+static int
+bnad_rx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info,
+		  uint rx_id)
+{
+	int i, err = 0;
+
+	/* All memory needs to be allocated before setup_ccbs */
+	for (i = 0; i < BNA_RX_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			err = bnad_mem_alloc(bnad,
+					&res_info[i].res_u.mem_info);
+		else if (res_info[i].res_type == BNA_RES_T_INTR)
+			err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_RX, rx_id,
+					&res_info[i].res_u.intr_info);
+		if (err)
+			goto err_return;
+	}
+	return 0;
+
+err_return:
+	bnad_rx_res_free(bnad, res_info);
+	return err;
+}
+
+/* Timer callbacks */
+/* a) IOC timer */
+static void
+bnad_ioc_timeout(unsigned long data)
+{
+	struct bnad *bnad = (struct bnad *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bfa_ioc_timeout((void *) &bnad->bna.device.ioc);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+static void
+bnad_ioc_hb_check(unsigned long data)
+{
+	struct bnad *bnad = (struct bnad *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bfa_ioc_hb_check((void *) &bnad->bna.device.ioc);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+static void
+bnad_ioc_sem_timeout(unsigned long data)
+{
+	struct bnad *bnad = (struct bnad *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bfa_ioc_sem_timeout((void *) &bnad->bna.device.ioc);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+/*
+ * All timer routines use bnad->bna_lock to protect against
+ * the following race, which may occur in case of no locking:
+ * 	Time	CPU m  		CPU n
+ *	0       1 = test_bit
+ *	1			clear_bit
+ *	2			del_timer_sync
+ *	3	mod_timer
+ */
+
+/* b) Dynamic Interrupt Moderation Timer */
+static void
+bnad_dim_timeout(unsigned long data)
+{
+	struct bnad *bnad = (struct bnad *)data;
+	struct bnad_rx_info *rx_info;
+	struct bnad_rx_ctrl *rx_ctrl;
+	int i, j;
+	unsigned long flags;
+
+	if (!netif_carrier_ok(bnad->netdev))
+		return;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	for (i = 0; i < bnad->num_rx; i++) {
+		rx_info = &bnad->rx_info[i];
+		if (!rx_info->rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			rx_ctrl = &rx_info->rx_ctrl[j];
+			if (!rx_ctrl->ccb)
+				continue;
+			bna_rx_dim_update(rx_ctrl->ccb);
+		}
+	}
+
+	/* Check for BNAD_CF_DIM_ENABLED, does not eleminate a race */
+	if (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags))
+		mod_timer(&bnad->dim_timer,
+			  jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+/* c)  Statistics Timer */
+static void
+bnad_stats_timeout(unsigned long data)
+{
+	struct bnad *bnad = (struct bnad *)data;
+	unsigned long flags;
+
+	if (!netif_running(bnad->netdev) ||
+		!test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags))
+		return;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_stats_get(&bnad->bna);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+/*
+ * Set up timer for DIM
+ * Called with bnad->bna_lock held
+ */
+void
+bnad_dim_timer_start(struct bnad *bnad)
+{
+	if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED &&
+	    !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) {
+		setup_timer(&bnad->dim_timer, bnad_dim_timeout,
+			    (unsigned long)bnad);
+		set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags);
+		mod_timer(&bnad->dim_timer,
+			  jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
+	}
+}
+
+/*
+ * Set up timer for statistics
+ * Called with mutex_lock(&bnad->conf_mutex) held
+ */
+static void
+bnad_stats_timer_start(struct bnad *bnad)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) {
+		setup_timer(&bnad->stats_timer, bnad_stats_timeout,
+			    (unsigned long)bnad);
+		mod_timer(&bnad->stats_timer,
+			  jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ));
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+}
+
+/*
+ * Stops the stats timer
+ * Called with mutex_lock(&bnad->conf_mutex) held
+ */
+static void
+bnad_stats_timer_stop(struct bnad *bnad)
+{
+	int to_del = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (test_and_clear_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags))
+		to_del = 1;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	if (to_del)
+		del_timer_sync(&bnad->stats_timer);
+}
+
+/* Utilities */
+
+static void
+bnad_netdev_mc_list_get(struct net_device *netdev, u8 *mc_list)
+{
+	int i = 1; /* Index 0 has broadcast address */
+	struct netdev_hw_addr *mc_addr;
+
+	netdev_for_each_mc_addr(mc_addr, netdev) {
+		memcpy(&mc_list[i * ETH_ALEN], &mc_addr->addr[0],
+							ETH_ALEN);
+		i++;
+	}
+}
+
+static int
+bnad_napi_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct bnad_rx_ctrl *rx_ctrl =
+		container_of(napi, struct bnad_rx_ctrl, napi);
+	struct bna_ccb *ccb;
+	struct bnad *bnad;
+	int rcvd = 0;
+
+	ccb = rx_ctrl->ccb;
+
+	bnad = ccb->bnad;
+
+	if (!netif_carrier_ok(bnad->netdev))
+		goto poll_exit;
+
+	rcvd = bnad_poll_cq(bnad, ccb, budget);
+	if (rcvd == budget)
+		return rcvd;
+
+poll_exit:
+	napi_complete((napi));
+
+	BNAD_UPDATE_CTR(bnad, netif_rx_complete);
+
+	bnad_enable_rx_irq(bnad, ccb);
+	return rcvd;
+}
+
+static int
+bnad_napi_poll_txrx(struct napi_struct *napi, int budget)
+{
+	struct bnad_rx_ctrl *rx_ctrl =
+		container_of(napi, struct bnad_rx_ctrl, napi);
+	struct bna_ccb *ccb;
+	struct bnad *bnad;
+	int 			rcvd = 0;
+	int			i, j;
+
+	ccb = rx_ctrl->ccb;
+
+	bnad = ccb->bnad;
+
+	if (!netif_carrier_ok(bnad->netdev))
+		goto poll_exit;
+
+	/* Handle Tx Completions, if any */
+	for (i = 0; i < bnad->num_tx; i++) {
+		for (j = 0; j < bnad->num_txq_per_tx; j++)
+			bnad_tx(bnad, bnad->tx_info[i].tcb[j]);
+	}
+
+	/* Handle Rx Completions */
+	rcvd = bnad_poll_cq(bnad, ccb, budget);
+	if (rcvd == budget)
+		return rcvd;
+poll_exit:
+	napi_complete((napi));
+
+	BNAD_UPDATE_CTR(bnad, netif_rx_complete);
+
+	bnad_enable_txrx_irqs(bnad);
+	return rcvd;
+}
+
+static void
+bnad_napi_enable(struct bnad *bnad, u32 rx_id)
+{
+	int (*napi_poll) (struct napi_struct *, int);
+	struct bnad_rx_ctrl *rx_ctrl;
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (bnad->cfg_flags & BNAD_CF_MSIX)
+		napi_poll = bnad_napi_poll_rx;
+	else
+		napi_poll = bnad_napi_poll_txrx;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Initialize & enable NAPI */
+	for (i = 0; i <	bnad->num_rxp_per_rx; i++) {
+		rx_ctrl = &bnad->rx_info[rx_id].rx_ctrl[i];
+		netif_napi_add(bnad->netdev, &rx_ctrl->napi,
+			       napi_poll, 64);
+		napi_enable(&rx_ctrl->napi);
+	}
+}
+
+static void
+bnad_napi_disable(struct bnad *bnad, u32 rx_id)
+{
+	int i;
+
+	/* First disable and then clean up */
+	for (i = 0; i < bnad->num_rxp_per_rx; i++) {
+		napi_disable(&bnad->rx_info[rx_id].rx_ctrl[i].napi);
+		netif_napi_del(&bnad->rx_info[rx_id].rx_ctrl[i].napi);
+	}
+}
+
+/* Should be held with conf_lock held */
+void
+bnad_cleanup_tx(struct bnad *bnad, uint tx_id)
+{
+	struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id];
+	struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0];
+	unsigned long flags;
+
+	if (!tx_info->tx)
+		return;
+
+	init_completion(&bnad->bnad_completions.tx_comp);
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_tx_disable(tx_info->tx, BNA_HARD_CLEANUP, bnad_cb_tx_disabled);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	wait_for_completion(&bnad->bnad_completions.tx_comp);
+
+	if (tx_info->tcb[0]->intr_type == BNA_INTR_T_MSIX)
+		bnad_tx_msix_unregister(bnad, tx_info,
+			bnad->num_txq_per_tx);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_tx_destroy(tx_info->tx);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	tx_info->tx = NULL;
+
+	if (0 == tx_id)
+		tasklet_kill(&bnad->tx_free_tasklet);
+
+	bnad_tx_res_free(bnad, res_info);
+}
+
+/* Should be held with conf_lock held */
+int
+bnad_setup_tx(struct bnad *bnad, uint tx_id)
+{
+	int err;
+	struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id];
+	struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0];
+	struct bna_intr_info *intr_info =
+			&res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info;
+	struct bna_tx_config *tx_config = &bnad->tx_config[tx_id];
+	struct bna_tx_event_cbfn tx_cbfn;
+	struct bna_tx *tx;
+	unsigned long flags;
+
+	/* Initialize the Tx object configuration */
+	tx_config->num_txq = bnad->num_txq_per_tx;
+	tx_config->txq_depth = bnad->txq_depth;
+	tx_config->tx_type = BNA_TX_T_REGULAR;
+
+	/* Initialize the tx event handlers */
+	tx_cbfn.tcb_setup_cbfn = bnad_cb_tcb_setup;
+	tx_cbfn.tcb_destroy_cbfn = bnad_cb_tcb_destroy;
+	tx_cbfn.tx_stall_cbfn = bnad_cb_tx_stall;
+	tx_cbfn.tx_resume_cbfn = bnad_cb_tx_resume;
+	tx_cbfn.tx_cleanup_cbfn = bnad_cb_tx_cleanup;
+
+	/* Get BNA's resource requirement for one tx object */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_tx_res_req(bnad->num_txq_per_tx,
+		bnad->txq_depth, res_info);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Fill Unmap Q memory requirements */
+	BNAD_FILL_UNMAPQ_MEM_REQ(
+			&res_info[BNA_TX_RES_MEM_T_UNMAPQ],
+			bnad->num_txq_per_tx,
+			BNAD_TX_UNMAPQ_DEPTH);
+
+	/* Allocate resources */
+	err = bnad_tx_res_alloc(bnad, res_info, tx_id);
+	if (err)
+		return err;
+
+	/* Ask BNA to create one Tx object, supplying required resources */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	tx = bna_tx_create(&bnad->bna, bnad, tx_config, &tx_cbfn, res_info,
+			tx_info);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	if (!tx)
+		goto err_return;
+	tx_info->tx = tx;
+
+	/* Register ISR for the Tx object */
+	if (intr_info->intr_type == BNA_INTR_T_MSIX) {
+		err = bnad_tx_msix_register(bnad, tx_info,
+			tx_id, bnad->num_txq_per_tx);
+		if (err)
+			goto err_return;
+	}
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_tx_enable(tx);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	return 0;
+
+err_return:
+	bnad_tx_res_free(bnad, res_info);
+	return err;
+}
+
+/* Setup the rx config for bna_rx_create */
+/* bnad decides the configuration */
+static void
+bnad_init_rx_config(struct bnad *bnad, struct bna_rx_config *rx_config)
+{
+	rx_config->rx_type = BNA_RX_T_REGULAR;
+	rx_config->num_paths = bnad->num_rxp_per_rx;
+
+	if (bnad->num_rxp_per_rx > 1) {
+		rx_config->rss_status = BNA_STATUS_T_ENABLED;
+		rx_config->rss_config.hash_type =
+				(BFI_RSS_T_V4_TCP |
+				 BFI_RSS_T_V6_TCP |
+				 BFI_RSS_T_V4_IP  |
+				 BFI_RSS_T_V6_IP);
+		rx_config->rss_config.hash_mask =
+				bnad->num_rxp_per_rx - 1;
+		get_random_bytes(rx_config->rss_config.toeplitz_hash_key,
+			sizeof(rx_config->rss_config.toeplitz_hash_key));
+	} else {
+		rx_config->rss_status = BNA_STATUS_T_DISABLED;
+		memset(&rx_config->rss_config, 0,
+		       sizeof(rx_config->rss_config));
+	}
+	rx_config->rxp_type = BNA_RXP_SLR;
+	rx_config->q_depth = bnad->rxq_depth;
+
+	rx_config->small_buff_size = BFI_SMALL_RXBUF_SIZE;
+
+	rx_config->vlan_strip_status = BNA_STATUS_T_ENABLED;
+}
+
+/* Called with mutex_lock(&bnad->conf_mutex) held */
+void
+bnad_cleanup_rx(struct bnad *bnad, uint rx_id)
+{
+	struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id];
+	struct bna_rx_config *rx_config = &bnad->rx_config[rx_id];
+	struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0];
+	unsigned long flags;
+	int dim_timer_del = 0;
+
+	if (!rx_info->rx)
+		return;
+
+	if (0 == rx_id) {
+		spin_lock_irqsave(&bnad->bna_lock, flags);
+		dim_timer_del = bnad_dim_timer_running(bnad);
+		if (dim_timer_del)
+			clear_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags);
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+		if (dim_timer_del)
+			del_timer_sync(&bnad->dim_timer);
+	}
+
+	bnad_napi_disable(bnad, rx_id);
+
+	init_completion(&bnad->bnad_completions.rx_comp);
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_rx_disable(rx_info->rx, BNA_HARD_CLEANUP, bnad_cb_rx_disabled);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	wait_for_completion(&bnad->bnad_completions.rx_comp);
+
+	if (rx_info->rx_ctrl[0].ccb->intr_type == BNA_INTR_T_MSIX)
+		bnad_rx_msix_unregister(bnad, rx_info, rx_config->num_paths);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_rx_destroy(rx_info->rx);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	rx_info->rx = NULL;
+
+	bnad_rx_res_free(bnad, res_info);
+}
+
+/* Called with mutex_lock(&bnad->conf_mutex) held */
+int
+bnad_setup_rx(struct bnad *bnad, uint rx_id)
+{
+	int err;
+	struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id];
+	struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0];
+	struct bna_intr_info *intr_info =
+			&res_info[BNA_RX_RES_T_INTR].res_u.intr_info;
+	struct bna_rx_config *rx_config = &bnad->rx_config[rx_id];
+	struct bna_rx_event_cbfn rx_cbfn;
+	struct bna_rx *rx;
+	unsigned long flags;
+
+	/* Initialize the Rx object configuration */
+	bnad_init_rx_config(bnad, rx_config);
+
+	/* Initialize the Rx event handlers */
+	rx_cbfn.rcb_setup_cbfn = bnad_cb_rcb_setup;
+	rx_cbfn.rcb_destroy_cbfn = NULL;
+	rx_cbfn.ccb_setup_cbfn = bnad_cb_ccb_setup;
+	rx_cbfn.ccb_destroy_cbfn = bnad_cb_ccb_destroy;
+	rx_cbfn.rx_cleanup_cbfn = bnad_cb_rx_cleanup;
+	rx_cbfn.rx_post_cbfn = bnad_cb_rx_post;
+
+	/* Get BNA's resource requirement for one Rx object */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_rx_res_req(rx_config, res_info);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Fill Unmap Q memory requirements */
+	BNAD_FILL_UNMAPQ_MEM_REQ(
+			&res_info[BNA_RX_RES_MEM_T_UNMAPQ],
+			rx_config->num_paths +
+			((rx_config->rxp_type == BNA_RXP_SINGLE) ? 0 :
+				rx_config->num_paths), BNAD_RX_UNMAPQ_DEPTH);
+
+	/* Allocate resource */
+	err = bnad_rx_res_alloc(bnad, res_info, rx_id);
+	if (err)
+		return err;
+
+	/* Ask BNA to create one Rx object, supplying required resources */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	rx = bna_rx_create(&bnad->bna, bnad, rx_config, &rx_cbfn, res_info,
+			rx_info);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	if (!rx)
+		goto err_return;
+	rx_info->rx = rx;
+
+	/* Register ISR for the Rx object */
+	if (intr_info->intr_type == BNA_INTR_T_MSIX) {
+		err = bnad_rx_msix_register(bnad, rx_info, rx_id,
+						rx_config->num_paths);
+		if (err)
+			goto err_return;
+	}
+
+	/* Enable NAPI */
+	bnad_napi_enable(bnad, rx_id);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (0 == rx_id) {
+		/* Set up Dynamic Interrupt Moderation Vector */
+		if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED)
+			bna_rx_dim_reconfig(&bnad->bna, bna_napi_dim_vector);
+
+		/* Enable VLAN filtering only on the default Rx */
+		bna_rx_vlanfilter_enable(rx);
+
+		/* Start the DIM timer */
+		bnad_dim_timer_start(bnad);
+	}
+
+	bna_rx_enable(rx);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	return 0;
+
+err_return:
+	bnad_cleanup_rx(bnad, rx_id);
+	return err;
+}
+
+/* Called with conf_lock & bnad->bna_lock held */
+void
+bnad_tx_coalescing_timeo_set(struct bnad *bnad)
+{
+	struct bnad_tx_info *tx_info;
+
+	tx_info = &bnad->tx_info[0];
+	if (!tx_info->tx)
+		return;
+
+	bna_tx_coalescing_timeo_set(tx_info->tx, bnad->tx_coalescing_timeo);
+}
+
+/* Called with conf_lock & bnad->bna_lock held */
+void
+bnad_rx_coalescing_timeo_set(struct bnad *bnad)
+{
+	struct bnad_rx_info *rx_info;
+	int 	i;
+
+	for (i = 0; i < bnad->num_rx; i++) {
+		rx_info = &bnad->rx_info[i];
+		if (!rx_info->rx)
+			continue;
+		bna_rx_coalescing_timeo_set(rx_info->rx,
+				bnad->rx_coalescing_timeo);
+	}
+}
+
+/*
+ * Called with bnad->bna_lock held
+ */
+static int
+bnad_mac_addr_set_locked(struct bnad *bnad, u8 *mac_addr)
+{
+	int ret;
+
+	if (!is_valid_ether_addr(mac_addr))
+		return -EADDRNOTAVAIL;
+
+	/* If datapath is down, pretend everything went through */
+	if (!bnad->rx_info[0].rx)
+		return 0;
+
+	ret = bna_rx_ucast_set(bnad->rx_info[0].rx, mac_addr, NULL);
+	if (ret != BNA_CB_SUCCESS)
+		return -EADDRNOTAVAIL;
+
+	return 0;
+}
+
+/* Should be called with conf_lock held */
+static int
+bnad_enable_default_bcast(struct bnad *bnad)
+{
+	struct bnad_rx_info *rx_info = &bnad->rx_info[0];
+	int ret;
+	unsigned long flags;
+
+	init_completion(&bnad->bnad_completions.mcast_comp);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	ret = bna_rx_mcast_add(rx_info->rx, (u8 *)bnad_bcast_addr,
+				bnad_cb_rx_mcast_add);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	if (ret == BNA_CB_SUCCESS)
+		wait_for_completion(&bnad->bnad_completions.mcast_comp);
+	else
+		return -ENODEV;
+
+	if (bnad->bnad_completions.mcast_comp_status != BNA_CB_SUCCESS)
+		return -ENODEV;
+
+	return 0;
+}
+
+/* Statistics utilities */
+void
+bnad_netdev_qstats_fill(struct bnad *bnad)
+{
+	struct net_device_stats *net_stats = &bnad->net_stats;
+	int i, j;
+
+	for (i = 0; i < bnad->num_rx; i++) {
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			if (bnad->rx_info[i].rx_ctrl[j].ccb) {
+				net_stats->rx_packets += bnad->rx_info[i].
+				rx_ctrl[j].ccb->rcb[0]->rxq->rx_packets;
+				net_stats->rx_bytes += bnad->rx_info[i].
+					rx_ctrl[j].ccb->rcb[0]->rxq->rx_bytes;
+				if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] &&
+					bnad->rx_info[i].rx_ctrl[j].ccb->
+					rcb[1]->rxq) {
+					net_stats->rx_packets +=
+						bnad->rx_info[i].rx_ctrl[j].
+						ccb->rcb[1]->rxq->rx_packets;
+					net_stats->rx_bytes +=
+						bnad->rx_info[i].rx_ctrl[j].
+						ccb->rcb[1]->rxq->rx_bytes;
+				}
+			}
+		}
+	}
+	for (i = 0; i < bnad->num_tx; i++) {
+		for (j = 0; j < bnad->num_txq_per_tx; j++) {
+			if (bnad->tx_info[i].tcb[j]) {
+				net_stats->tx_packets +=
+				bnad->tx_info[i].tcb[j]->txq->tx_packets;
+				net_stats->tx_bytes +=
+					bnad->tx_info[i].tcb[j]->txq->tx_bytes;
+			}
+		}
+	}
+}
+
+/*
+ * Must be called with the bna_lock held.
+ */
+void
+bnad_netdev_hwstats_fill(struct bnad *bnad)
+{
+	struct bfi_ll_stats_mac *mac_stats;
+	struct net_device_stats *net_stats = &bnad->net_stats;
+	u64 bmap;
+	int i;
+
+	mac_stats = &bnad->stats.bna_stats->hw_stats->mac_stats;
+	net_stats->rx_errors =
+		mac_stats->rx_fcs_error + mac_stats->rx_alignment_error +
+		mac_stats->rx_frame_length_error + mac_stats->rx_code_error +
+		mac_stats->rx_undersize;
+	net_stats->tx_errors = mac_stats->tx_fcs_error +
+					mac_stats->tx_undersize;
+	net_stats->rx_dropped = mac_stats->rx_drop;
+	net_stats->tx_dropped = mac_stats->tx_drop;
+	net_stats->multicast = mac_stats->rx_multicast;
+	net_stats->collisions = mac_stats->tx_total_collision;
+
+	net_stats->rx_length_errors = mac_stats->rx_frame_length_error;
+
+	/* receive ring buffer overflow  ?? */
+
+	net_stats->rx_crc_errors = mac_stats->rx_fcs_error;
+	net_stats->rx_frame_errors = mac_stats->rx_alignment_error;
+	/* recv'r fifo overrun */
+	bmap = (u64)bnad->stats.bna_stats->rxf_bmap[0] |
+		((u64)bnad->stats.bna_stats->rxf_bmap[1] << 32);
+	for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) {
+		if (bmap & 1) {
+			net_stats->rx_fifo_errors =
+				bnad->stats.bna_stats->
+					hw_stats->rxf_stats[i].frame_drops;
+			break;
+		}
+		bmap >>= 1;
+	}
+}
+
+static void
+bnad_mbox_irq_sync(struct bnad *bnad)
+{
+	u32 irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (bnad->cfg_flags & BNAD_CF_MSIX)
+		irq = bnad->msix_table[bnad->msix_num - 1].vector;
+	else
+		irq = bnad->pcidev->irq;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	synchronize_irq(irq);
+}
+
+/* Utility used by bnad_start_xmit, for doing TSO */
+static int
+bnad_tso_prepare(struct bnad *bnad, struct sk_buff *skb)
+{
+	int err;
+
+	/* SKB_GSO_TCPV4 and SKB_GSO_TCPV6 is defined since 2.6.18. */
+	BUG_ON(!(skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4 ||
+		   skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6));
+	if (skb_header_cloned(skb)) {
+		err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (err) {
+			BNAD_UPDATE_CTR(bnad, tso_err);
+			return err;
+		}
+	}
+
+	/*
+	 * For TSO, the TCP checksum field is seeded with pseudo-header sum
+	 * excluding the length field.
+	 */
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *iph = ip_hdr(skb);
+
+		/* Do we really need these? */
+		iph->tot_len = 0;
+		iph->check = 0;
+
+		tcp_hdr(skb)->check =
+			~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
+					   IPPROTO_TCP, 0);
+		BNAD_UPDATE_CTR(bnad, tso4);
+	} else {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		BUG_ON(!(skb->protocol == htons(ETH_P_IPV6)));
+		ipv6h->payload_len = 0;
+		tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 0,
+					 IPPROTO_TCP, 0);
+		BNAD_UPDATE_CTR(bnad, tso6);
+	}
+
+	return 0;
+}
+
+/*
+ * Initialize Q numbers depending on Rx Paths
+ * Called with bnad->bna_lock held, because of cfg_flags
+ * access.
+ */
+static void
+bnad_q_num_init(struct bnad *bnad)
+{
+	int rxps;
+
+	rxps = min((uint)num_online_cpus(),
+			(uint)(BNAD_MAX_RXS * BNAD_MAX_RXPS_PER_RX));
+
+	if (!(bnad->cfg_flags & BNAD_CF_MSIX))
+		rxps = 1;	/* INTx */
+
+	bnad->num_rx = 1;
+	bnad->num_tx = 1;
+	bnad->num_rxp_per_rx = rxps;
+	bnad->num_txq_per_tx = BNAD_TXQ_NUM;
+}
+
+/*
+ * Adjusts the Q numbers, given a number of msix vectors
+ * Give preference to RSS as opposed to Tx priority Queues,
+ * in such a case, just use 1 Tx Q
+ * Called with bnad->bna_lock held b'cos of cfg_flags access
+ */
+static void
+bnad_q_num_adjust(struct bnad *bnad, int msix_vectors)
+{
+	bnad->num_txq_per_tx = 1;
+	if ((msix_vectors >= (bnad->num_tx * bnad->num_txq_per_tx)  +
+	     bnad_rxqs_per_cq + BNAD_MAILBOX_MSIX_VECTORS) &&
+	    (bnad->cfg_flags & BNAD_CF_MSIX)) {
+		bnad->num_rxp_per_rx = msix_vectors -
+			(bnad->num_tx * bnad->num_txq_per_tx) -
+			BNAD_MAILBOX_MSIX_VECTORS;
+	} else
+		bnad->num_rxp_per_rx = 1;
+}
+
+static void
+bnad_set_netdev_perm_addr(struct bnad *bnad)
+{
+	struct net_device *netdev = bnad->netdev;
+
+	memcpy(netdev->perm_addr, &bnad->perm_addr, netdev->addr_len);
+	if (is_zero_ether_addr(netdev->dev_addr))
+		memcpy(netdev->dev_addr, &bnad->perm_addr, netdev->addr_len);
+}
+
+/* Enable / disable device */
+static void
+bnad_device_disable(struct bnad *bnad)
+{
+	unsigned long flags;
+
+	init_completion(&bnad->bnad_completions.ioc_comp);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_device_disable(&bnad->bna.device, BNA_HARD_CLEANUP);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	wait_for_completion(&bnad->bnad_completions.ioc_comp);
+
+}
+
+static int
+bnad_device_enable(struct bnad *bnad)
+{
+	int err = 0;
+	unsigned long flags;
+
+	init_completion(&bnad->bnad_completions.ioc_comp);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_device_enable(&bnad->bna.device);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	wait_for_completion(&bnad->bnad_completions.ioc_comp);
+
+	if (bnad->bnad_completions.ioc_comp_status)
+		err = bnad->bnad_completions.ioc_comp_status;
+
+	return err;
+}
+
+/* Free BNA resources */
+static void
+bnad_res_free(struct bnad *bnad)
+{
+	int i;
+	struct bna_res_info *res_info = &bnad->res_info[0];
+
+	for (i = 0; i < BNA_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			bnad_mem_free(bnad, &res_info[i].res_u.mem_info);
+		else
+			bnad_mbox_irq_free(bnad, &res_info[i].res_u.intr_info);
+	}
+}
+
+/* Allocates memory and interrupt resources for BNA */
+static int
+bnad_res_alloc(struct bnad *bnad)
+{
+	int i, err;
+	struct bna_res_info *res_info = &bnad->res_info[0];
+
+	for (i = 0; i < BNA_RES_T_MAX; i++) {
+		if (res_info[i].res_type == BNA_RES_T_MEM)
+			err = bnad_mem_alloc(bnad, &res_info[i].res_u.mem_info);
+		else
+			err = bnad_mbox_irq_alloc(bnad,
+						  &res_info[i].res_u.intr_info);
+		if (err)
+			goto err_return;
+	}
+	return 0;
+
+err_return:
+	bnad_res_free(bnad);
+	return err;
+}
+
+/* Interrupt enable / disable */
+static void
+bnad_enable_msix(struct bnad *bnad)
+{
+	int i, ret;
+	u32 tot_msix_num;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (!(bnad->cfg_flags & BNAD_CF_MSIX)) {
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	if (bnad->msix_table)
+		return;
+
+	tot_msix_num = bnad->msix_num + bnad->msix_diag_num;
+
+	bnad->msix_table =
+		kcalloc(tot_msix_num, sizeof(struct msix_entry), GFP_KERNEL);
+
+	if (!bnad->msix_table)
+		goto intx_mode;
+
+	for (i = 0; i < tot_msix_num; i++)
+		bnad->msix_table[i].entry = i;
+
+	ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, tot_msix_num);
+	if (ret > 0) {
+		/* Not enough MSI-X vectors. */
+
+		spin_lock_irqsave(&bnad->bna_lock, flags);
+		/* ret = #of vectors that we got */
+		bnad_q_num_adjust(bnad, ret);
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+		bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx)
+			+ (bnad->num_rx
+			* bnad->num_rxp_per_rx) +
+			 BNAD_MAILBOX_MSIX_VECTORS;
+		tot_msix_num = bnad->msix_num + bnad->msix_diag_num;
+
+		/* Try once more with adjusted numbers */
+		/* If this fails, fall back to INTx */
+		ret = pci_enable_msix(bnad->pcidev, bnad->msix_table,
+				      tot_msix_num);
+		if (ret)
+			goto intx_mode;
+
+	} else if (ret < 0)
+		goto intx_mode;
+	return;
+
+intx_mode:
+
+	kfree(bnad->msix_table);
+	bnad->msix_table = NULL;
+	bnad->msix_num = 0;
+	bnad->msix_diag_num = 0;
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bnad->cfg_flags &= ~BNAD_CF_MSIX;
+	bnad_q_num_init(bnad);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+static void
+bnad_disable_msix(struct bnad *bnad)
+{
+	u32 cfg_flags;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	cfg_flags = bnad->cfg_flags;
+	if (bnad->cfg_flags & BNAD_CF_MSIX)
+		bnad->cfg_flags &= ~BNAD_CF_MSIX;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	if (cfg_flags & BNAD_CF_MSIX) {
+		pci_disable_msix(bnad->pcidev);
+		kfree(bnad->msix_table);
+		bnad->msix_table = NULL;
+	}
+}
+
+/* Netdev entry points */
+static int
+bnad_open(struct net_device *netdev)
+{
+	int err;
+	struct bnad *bnad = netdev_priv(netdev);
+	struct bna_pause_config pause_config;
+	int mtu;
+	unsigned long flags;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	/* Tx */
+	err = bnad_setup_tx(bnad, 0);
+	if (err)
+		goto err_return;
+
+	/* Rx */
+	err = bnad_setup_rx(bnad, 0);
+	if (err)
+		goto cleanup_tx;
+
+	/* Port */
+	pause_config.tx_pause = 0;
+	pause_config.rx_pause = 0;
+
+	mtu = ETH_HLEN + bnad->netdev->mtu + ETH_FCS_LEN;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_port_mtu_set(&bnad->bna.port, mtu, NULL);
+	bna_port_pause_config(&bnad->bna.port, &pause_config, NULL);
+	bna_port_enable(&bnad->bna.port);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Enable broadcast */
+	bnad_enable_default_bcast(bnad);
+
+	/* Set the UCAST address */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bnad_mac_addr_set_locked(bnad, netdev->dev_addr);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	/* Start the stats timer */
+	bnad_stats_timer_start(bnad);
+
+	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
+
+cleanup_tx:
+	bnad_cleanup_tx(bnad, 0);
+
+err_return:
+	mutex_unlock(&bnad->conf_mutex);
+	return err;
+}
+
+static int
+bnad_stop(struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	/* Stop the stats timer */
+	bnad_stats_timer_stop(bnad);
+
+	init_completion(&bnad->bnad_completions.port_comp);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_port_disable(&bnad->bna.port, BNA_HARD_CLEANUP,
+			bnad_cb_port_disabled);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	wait_for_completion(&bnad->bnad_completions.port_comp);
+
+	bnad_cleanup_tx(bnad, 0);
+	bnad_cleanup_rx(bnad, 0);
+
+	/* Synchronize mailbox IRQ */
+	bnad_mbox_irq_sync(bnad);
+
+	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
+}
+
+/* TX */
+/*
+ * bnad_start_xmit : Netdev entry point for Transmit
+ *		     Called under lock held by net_device
+ */
+static netdev_tx_t
+bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	u16 		txq_prod, vlan_tag = 0;
+	u32 		unmap_prod, wis, wis_used, wi_range;
+	u32 		vectors, vect_id, i, acked;
+	u32		tx_id;
+	int 			err;
+
+	struct bnad_tx_info *tx_info;
+	struct bna_tcb *tcb;
+	struct bnad_unmap_q *unmap_q;
+	dma_addr_t 		dma_addr;
+	struct bna_txq_entry *txqent;
+	bna_txq_wi_ctrl_flag_t 	flags;
+
+	if (unlikely
+	    (skb->len <= ETH_HLEN || skb->len > BFI_TX_MAX_DATA_PER_PKT)) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/*
+	 * Takes care of the Tx that is scheduled between clearing the flag
+	 * and the netif_stop_queue() call.
+	 */
+	if (unlikely(!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags))) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	tx_id = 0;
+
+	tx_info = &bnad->tx_info[tx_id];
+	tcb = tx_info->tcb[tx_id];
+	unmap_q = tcb->unmap_q;
+
+	vectors = 1 + skb_shinfo(skb)->nr_frags;
+	if (vectors > BFI_TX_MAX_VECTORS_PER_PKT) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+	wis = BNA_TXQ_WI_NEEDED(vectors);	/* 4 vectors per work item */
+	acked = 0;
+	if (unlikely
+	    (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) ||
+	     vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) {
+		if ((u16) (*tcb->hw_consumer_index) !=
+		    tcb->consumer_index &&
+		    !test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) {
+			acked = bnad_free_txbufs(bnad, tcb);
+			bna_ib_ack(tcb->i_dbell, acked);
+			smp_mb__before_clear_bit();
+			clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
+		} else {
+			netif_stop_queue(netdev);
+			BNAD_UPDATE_CTR(bnad, netif_queue_stop);
+		}
+
+		smp_mb();
+		/*
+		 * Check again to deal with race condition between
+		 * netif_stop_queue here, and netif_wake_queue in
+		 * interrupt handler which is not inside netif tx lock.
+		 */
+		if (likely
+		    (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) ||
+		     vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) {
+			BNAD_UPDATE_CTR(bnad, netif_queue_stop);
+			return NETDEV_TX_BUSY;
+		} else {
+			netif_wake_queue(netdev);
+			BNAD_UPDATE_CTR(bnad, netif_queue_wakeup);
+		}
+	}
+
+	unmap_prod = unmap_q->producer_index;
+	wis_used = 1;
+	vect_id = 0;
+	flags = 0;
+
+	txq_prod = tcb->producer_index;
+	BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt, txqent, wi_range);
+	BUG_ON(!(wi_range <= tcb->q_depth));
+	txqent->hdr.wi.reserved = 0;
+	txqent->hdr.wi.num_vectors = vectors;
+	txqent->hdr.wi.opcode =
+		htons((skb_is_gso(skb) ? BNA_TXQ_WI_SEND_LSO :
+		       BNA_TXQ_WI_SEND));
+
+	if (bnad->vlan_grp && vlan_tx_tag_present(skb)) {
+		vlan_tag = (u16) vlan_tx_tag_get(skb);
+		flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN);
+	}
+	if (test_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags)) {
+		vlan_tag =
+			(tcb->priority & 0x7) << 13 | (vlan_tag & 0x1fff);
+		flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN);
+	}
+
+	txqent->hdr.wi.vlan_tag = htons(vlan_tag);
+
+	if (skb_is_gso(skb)) {
+		err = bnad_tso_prepare(bnad, skb);
+		if (err) {
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
+		txqent->hdr.wi.lso_mss = htons(skb_is_gso(skb));
+		flags |= (BNA_TXQ_WI_CF_IP_CKSUM | BNA_TXQ_WI_CF_TCP_CKSUM);
+		txqent->hdr.wi.l4_hdr_size_n_offset =
+			htons(BNA_TXQ_WI_L4_HDR_N_OFFSET
+			      (tcp_hdrlen(skb) >> 2,
+			       skb_transport_offset(skb)));
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		u8 proto = 0;
+
+		txqent->hdr.wi.lso_mss = 0;
+
+		if (skb->protocol == htons(ETH_P_IP))
+			proto = ip_hdr(skb)->protocol;
+		else if (skb->protocol == htons(ETH_P_IPV6)) {
+			/* nexthdr may not be TCP immediately. */
+			proto = ipv6_hdr(skb)->nexthdr;
+		}
+		if (proto == IPPROTO_TCP) {
+			flags |= BNA_TXQ_WI_CF_TCP_CKSUM;
+			txqent->hdr.wi.l4_hdr_size_n_offset =
+				htons(BNA_TXQ_WI_L4_HDR_N_OFFSET
+				      (0, skb_transport_offset(skb)));
+
+			BNAD_UPDATE_CTR(bnad, tcpcsum_offload);
+
+			BUG_ON(!(skb_headlen(skb) >=
+				skb_transport_offset(skb) + tcp_hdrlen(skb)));
+
+		} else if (proto == IPPROTO_UDP) {
+			flags |= BNA_TXQ_WI_CF_UDP_CKSUM;
+			txqent->hdr.wi.l4_hdr_size_n_offset =
+				htons(BNA_TXQ_WI_L4_HDR_N_OFFSET
+				      (0, skb_transport_offset(skb)));
+
+			BNAD_UPDATE_CTR(bnad, udpcsum_offload);
+
+			BUG_ON(!(skb_headlen(skb) >=
+				   skb_transport_offset(skb) +
+				   sizeof(struct udphdr)));
+		} else {
+			err = skb_checksum_help(skb);
+			BNAD_UPDATE_CTR(bnad, csum_help);
+			if (err) {
+				dev_kfree_skb(skb);
+				BNAD_UPDATE_CTR(bnad, csum_help_err);
+				return NETDEV_TX_OK;
+			}
+		}
+	} else {
+		txqent->hdr.wi.lso_mss = 0;
+		txqent->hdr.wi.l4_hdr_size_n_offset = 0;
+	}
+
+	txqent->hdr.wi.flags = htons(flags);
+
+	txqent->hdr.wi.frame_length = htonl(skb->len);
+
+	unmap_q->unmap_array[unmap_prod].skb = skb;
+	BUG_ON(!(skb_headlen(skb) <= BFI_TX_MAX_DATA_PER_VECTOR));
+	txqent->vector[vect_id].length = htons(skb_headlen(skb));
+	dma_addr = pci_map_single(bnad->pcidev, skb->data, skb_headlen(skb),
+		PCI_DMA_TODEVICE);
+	pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr,
+			   dma_addr);
+
+	BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr);
+	BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth);
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+		u32		size = frag->size;
+
+		if (++vect_id == BFI_TX_MAX_VECTORS_PER_WI) {
+			vect_id = 0;
+			if (--wi_range)
+				txqent++;
+			else {
+				BNA_QE_INDX_ADD(txq_prod, wis_used,
+						tcb->q_depth);
+				wis_used = 0;
+				BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt,
+						     txqent, wi_range);
+				BUG_ON(!(wi_range <= tcb->q_depth));
+			}
+			wis_used++;
+			txqent->hdr.wi_ext.opcode = htons(BNA_TXQ_WI_EXTENSION);
+		}
+
+		BUG_ON(!(size <= BFI_TX_MAX_DATA_PER_VECTOR));
+		txqent->vector[vect_id].length = htons(size);
+		dma_addr =
+			pci_map_page(bnad->pcidev, frag->page,
+				     frag->page_offset, size,
+				     PCI_DMA_TODEVICE);
+		pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr,
+				   dma_addr);
+		BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr);
+		BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth);
+	}
+
+	unmap_q->producer_index = unmap_prod;
+	BNA_QE_INDX_ADD(txq_prod, wis_used, tcb->q_depth);
+	tcb->producer_index = txq_prod;
+
+	smp_mb();
+	bna_txq_prod_indx_doorbell(tcb);
+
+	if ((u16) (*tcb->hw_consumer_index) != tcb->consumer_index)
+		tasklet_schedule(&bnad->tx_free_tasklet);
+
+	return NETDEV_TX_OK;
+}
+
+/*
+ * Used spin_lock to synchronize reading of stats structures, which
+ * is written by BNA under the same lock.
+ */
+static struct net_device_stats *
+bnad_get_netdev_stats(struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	memset(&bnad->net_stats, 0, sizeof(struct net_device_stats));
+
+	bnad_netdev_qstats_fill(bnad);
+	bnad_netdev_hwstats_fill(bnad);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	return &bnad->net_stats;
+}
+
+static void
+bnad_set_rx_mode(struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	u32	new_mask, valid_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	new_mask = valid_mask = 0;
+
+	if (netdev->flags & IFF_PROMISC) {
+		if (!(bnad->cfg_flags & BNAD_CF_PROMISC)) {
+			new_mask = BNAD_RXMODE_PROMISC_DEFAULT;
+			valid_mask = BNAD_RXMODE_PROMISC_DEFAULT;
+			bnad->cfg_flags |= BNAD_CF_PROMISC;
+		}
+	} else {
+		if (bnad->cfg_flags & BNAD_CF_PROMISC) {
+			new_mask = ~BNAD_RXMODE_PROMISC_DEFAULT;
+			valid_mask = BNAD_RXMODE_PROMISC_DEFAULT;
+			bnad->cfg_flags &= ~BNAD_CF_PROMISC;
+		}
+	}
+
+	if (netdev->flags & IFF_ALLMULTI) {
+		if (!(bnad->cfg_flags & BNAD_CF_ALLMULTI)) {
+			new_mask |= BNA_RXMODE_ALLMULTI;
+			valid_mask |= BNA_RXMODE_ALLMULTI;
+			bnad->cfg_flags |= BNAD_CF_ALLMULTI;
+		}
+	} else {
+		if (bnad->cfg_flags & BNAD_CF_ALLMULTI) {
+			new_mask &= ~BNA_RXMODE_ALLMULTI;
+			valid_mask |= BNA_RXMODE_ALLMULTI;
+			bnad->cfg_flags &= ~BNAD_CF_ALLMULTI;
+		}
+	}
+
+	bna_rx_mode_set(bnad->rx_info[0].rx, new_mask, valid_mask, NULL);
+
+	if (!netdev_mc_empty(netdev)) {
+		u8 *mcaddr_list;
+		int mc_count = netdev_mc_count(netdev);
+
+		/* Index 0 holds the broadcast address */
+		mcaddr_list =
+			kzalloc((mc_count + 1) * ETH_ALEN,
+				GFP_ATOMIC);
+		if (!mcaddr_list)
+			return;
+
+		memcpy(&mcaddr_list[0], &bnad_bcast_addr[0], ETH_ALEN);
+
+		/* Copy rest of the MC addresses */
+		bnad_netdev_mc_list_get(netdev, mcaddr_list);
+
+		bna_rx_mcast_listset(bnad->rx_info[0].rx, mc_count + 1,
+					mcaddr_list, NULL);
+
+		/* Should we enable BNAD_CF_ALLMULTI for err != 0 ? */
+		kfree(mcaddr_list);
+	}
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+}
+
+/*
+ * bna_lock is used to sync writes to netdev->addr
+ * conf_lock cannot be used since this call may be made
+ * in a non-blocking context.
+ */
+static int
+bnad_set_mac_address(struct net_device *netdev, void *mac_addr)
+{
+	int err;
+	struct bnad *bnad = netdev_priv(netdev);
+	struct sockaddr *sa = (struct sockaddr *)mac_addr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	err = bnad_mac_addr_set_locked(bnad, sa->sa_data);
+
+	if (!err)
+		memcpy(netdev->dev_addr, sa->sa_data, netdev->addr_len);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	return err;
+}
+
+static int
+bnad_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int mtu, err = 0;
+	unsigned long flags;
+
+	struct bnad *bnad = netdev_priv(netdev);
+
+	if (new_mtu + ETH_HLEN < ETH_ZLEN || new_mtu > BNAD_JUMBO_MTU)
+		return -EINVAL;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	netdev->mtu = new_mtu;
+
+	mtu = ETH_HLEN + new_mtu + ETH_FCS_LEN;
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_port_mtu_set(&bnad->bna.port, mtu, NULL);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+	return err;
+}
+
+static void
+bnad_vlan_rx_register(struct net_device *netdev,
+				  struct vlan_group *vlan_grp)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	bnad->vlan_grp = vlan_grp;
+	mutex_unlock(&bnad->conf_mutex);
+}
+
+static void
+bnad_vlan_rx_add_vid(struct net_device *netdev,
+				 unsigned short vid)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+
+	if (!bnad->rx_info[0].rx)
+		return;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_rx_vlan_add(bnad->rx_info[0].rx, vid);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+}
+
+static void
+bnad_vlan_rx_kill_vid(struct net_device *netdev,
+				  unsigned short vid)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+
+	if (!bnad->rx_info[0].rx)
+		return;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_rx_vlan_del(bnad->rx_info[0].rx, vid);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void
+bnad_netpoll(struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	struct bnad_rx_info *rx_info;
+	struct bnad_rx_ctrl *rx_ctrl;
+	u32 curr_mask;
+	int i, j;
+
+	if (!(bnad->cfg_flags & BNAD_CF_MSIX)) {
+		bna_intx_disable(&bnad->bna, curr_mask);
+		bnad_isr(bnad->pcidev->irq, netdev);
+		bna_intx_enable(&bnad->bna, curr_mask);
+	} else {
+		for (i = 0; i < bnad->num_rx; i++) {
+			rx_info = &bnad->rx_info[i];
+			if (!rx_info->rx)
+				continue;
+			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+				rx_ctrl = &rx_info->rx_ctrl[j];
+				if (rx_ctrl->ccb) {
+					bnad_disable_rx_irq(bnad,
+							    rx_ctrl->ccb);
+					bnad_netif_rx_schedule_poll(bnad,
+							    rx_ctrl->ccb);
+				}
+			}
+		}
+	}
+}
+#endif
+
+static const struct net_device_ops bnad_netdev_ops = {
+	.ndo_open		= bnad_open,
+	.ndo_stop		= bnad_stop,
+	.ndo_start_xmit		= bnad_start_xmit,
+	.ndo_get_stats		= bnad_get_netdev_stats,
+	.ndo_set_rx_mode	= bnad_set_rx_mode,
+	.ndo_set_multicast_list = bnad_set_rx_mode,
+	.ndo_validate_addr      = eth_validate_addr,
+	.ndo_set_mac_address    = bnad_set_mac_address,
+	.ndo_change_mtu		= bnad_change_mtu,
+	.ndo_vlan_rx_register   = bnad_vlan_rx_register,
+	.ndo_vlan_rx_add_vid    = bnad_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid   = bnad_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller    = bnad_netpoll
+#endif
+};
+
+static void
+bnad_netdev_init(struct bnad *bnad, bool using_dac)
+{
+	struct net_device *netdev = bnad->netdev;
+
+	netdev->features |= NETIF_F_IPV6_CSUM;
+	netdev->features |= NETIF_F_TSO;
+	netdev->features |= NETIF_F_TSO6;
+
+	netdev->features |= NETIF_F_GRO;
+	pr_warn("bna: GRO enabled, using kernel stack GRO\n");
+
+	netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+
+	if (using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
+	netdev->features |=
+		NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX |
+		NETIF_F_HW_VLAN_FILTER;
+
+	netdev->vlan_features = netdev->features;
+	netdev->mem_start = bnad->mmio_start;
+	netdev->mem_end = bnad->mmio_start + bnad->mmio_len - 1;
+
+	netdev->netdev_ops = &bnad_netdev_ops;
+	bnad_set_ethtool_ops(netdev);
+}
+
+/*
+ * 1. Initialize the bnad structure
+ * 2. Setup netdev pointer in pci_dev
+ * 3. Initialze Tx free tasklet
+ * 4. Initialize no. of TxQ & CQs & MSIX vectors
+ */
+static int
+bnad_init(struct bnad *bnad,
+	  struct pci_dev *pdev, struct net_device *netdev)
+{
+	unsigned long flags;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pci_set_drvdata(pdev, netdev);
+
+	bnad->netdev = netdev;
+	bnad->pcidev = pdev;
+	bnad->mmio_start = pci_resource_start(pdev, 0);
+	bnad->mmio_len = pci_resource_len(pdev, 0);
+	bnad->bar0 = ioremap_nocache(bnad->mmio_start, bnad->mmio_len);
+	if (!bnad->bar0) {
+		dev_err(&pdev->dev, "ioremap for bar0 failed\n");
+		pci_set_drvdata(pdev, NULL);
+		return -ENOMEM;
+	}
+	pr_info("bar0 mapped to %p, len %llu\n", bnad->bar0,
+	       (unsigned long long) bnad->mmio_len);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (!bnad_msix_disable)
+		bnad->cfg_flags = BNAD_CF_MSIX;
+
+	bnad->cfg_flags |= BNAD_CF_DIM_ENABLED;
+
+	bnad_q_num_init(bnad);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx) +
+		(bnad->num_rx * bnad->num_rxp_per_rx) +
+			 BNAD_MAILBOX_MSIX_VECTORS;
+	bnad->msix_diag_num = 2;	/* 1 for Tx, 1 for Rx */
+
+	bnad->txq_depth = BNAD_TXQ_DEPTH;
+	bnad->rxq_depth = BNAD_RXQ_DEPTH;
+	bnad->rx_csum = true;
+
+	bnad->tx_coalescing_timeo = BFI_TX_COALESCING_TIMEO;
+	bnad->rx_coalescing_timeo = BFI_RX_COALESCING_TIMEO;
+
+	tasklet_init(&bnad->tx_free_tasklet, bnad_tx_free_tasklet,
+		     (unsigned long)bnad);
+
+	return 0;
+}
+
+/*
+ * Must be called after bnad_pci_uninit()
+ * so that iounmap() and pci_set_drvdata(NULL)
+ * happens only after PCI uninitialization.
+ */
+static void
+bnad_uninit(struct bnad *bnad)
+{
+	if (bnad->bar0)
+		iounmap(bnad->bar0);
+	pci_set_drvdata(bnad->pcidev, NULL);
+}
+
+/*
+ * Initialize locks
+	a) Per device mutes used for serializing configuration
+	   changes from OS interface
+	b) spin lock used to protect bna state machine
+ */
+static void
+bnad_lock_init(struct bnad *bnad)
+{
+	spin_lock_init(&bnad->bna_lock);
+	mutex_init(&bnad->conf_mutex);
+}
+
+static void
+bnad_lock_uninit(struct bnad *bnad)
+{
+	mutex_destroy(&bnad->conf_mutex);
+}
+
+/* PCI Initialization */
+static int
+bnad_pci_init(struct bnad *bnad,
+	      struct pci_dev *pdev, bool *using_dac)
+{
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+	err = pci_request_regions(pdev, BNAD_NAME);
+	if (err)
+		goto disable_device;
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
+	    !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		*using_dac = 1;
+	} else {
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			err = pci_set_consistent_dma_mask(pdev,
+						DMA_BIT_MASK(32));
+			if (err)
+				goto release_regions;
+		}
+		*using_dac = 0;
+	}
+	pci_set_master(pdev);
+	return 0;
+
+release_regions:
+	pci_release_regions(pdev);
+disable_device:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void
+bnad_pci_uninit(struct pci_dev *pdev)
+{
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static int __devinit
+bnad_pci_probe(struct pci_dev *pdev,
+		const struct pci_device_id *pcidev_id)
+{
+	bool 	using_dac;
+	int 	err;
+	struct bnad *bnad;
+	struct bna *bna;
+	struct net_device *netdev;
+	struct bfa_pcidev pcidev_info;
+	unsigned long flags;
+
+	pr_info("bnad_pci_probe : (0x%p, 0x%p) PCI Func : (%d)\n",
+	       pdev, pcidev_id, PCI_FUNC(pdev->devfn));
+
+	mutex_lock(&bnad_fwimg_mutex);
+	if (!cna_get_firmware_buf(pdev)) {
+		mutex_unlock(&bnad_fwimg_mutex);
+		pr_warn("Failed to load Firmware Image!\n");
+		return -ENODEV;
+	}
+	mutex_unlock(&bnad_fwimg_mutex);
+
+	/*
+	 * Allocates sizeof(struct net_device + struct bnad)
+	 * bnad = netdev->priv
+	 */
+	netdev = alloc_etherdev(sizeof(struct bnad));
+	if (!netdev) {
+		dev_err(&pdev->dev, "alloc_etherdev failed\n");
+		err = -ENOMEM;
+		return err;
+	}
+	bnad = netdev_priv(netdev);
+
+
+	/*
+	 * PCI initialization
+	 * 	Output : using_dac = 1 for 64 bit DMA
+	 *		           = 0 for 32 bit DMA
+	 */
+	err = bnad_pci_init(bnad, pdev, &using_dac);
+	if (err)
+		goto free_netdev;
+
+	bnad_lock_init(bnad);
+	/*
+	 * Initialize bnad structure
+	 * Setup relation between pci_dev & netdev
+	 * Init Tx free tasklet
+	 */
+	err = bnad_init(bnad, pdev, netdev);
+	if (err)
+		goto pci_uninit;
+	/* Initialize netdev structure, set up ethtool ops */
+	bnad_netdev_init(bnad, using_dac);
+
+	bnad_enable_msix(bnad);
+
+	/* Get resource requirement form bna */
+	bna_res_req(&bnad->res_info[0]);
+
+	/* Allocate resources from bna */
+	err = bnad_res_alloc(bnad);
+	if (err)
+		goto free_netdev;
+
+	bna = &bnad->bna;
+
+	/* Setup pcidev_info for bna_init() */
+	pcidev_info.pci_slot = PCI_SLOT(bnad->pcidev->devfn);
+	pcidev_info.pci_func = PCI_FUNC(bnad->pcidev->devfn);
+	pcidev_info.device_id = bnad->pcidev->device;
+	pcidev_info.pci_bar_kva = bnad->bar0;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_init(bna, bnad, &pcidev_info, &bnad->res_info[0]);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	bnad->stats.bna_stats = &bna->stats;
+
+	/* Set up timers */
+	setup_timer(&bnad->bna.device.ioc.ioc_timer, bnad_ioc_timeout,
+				((unsigned long)bnad));
+	setup_timer(&bnad->bna.device.ioc.hb_timer, bnad_ioc_hb_check,
+				((unsigned long)bnad));
+	setup_timer(&bnad->bna.device.ioc.sem_timer, bnad_ioc_sem_timeout,
+				((unsigned long)bnad));
+
+	/* Now start the timer before calling IOC */
+	mod_timer(&bnad->bna.device.ioc.ioc_timer,
+		  jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ));
+
+	/*
+	 * Start the chip
+	 * Don't care even if err != 0, bna state machine will
+	 * deal with it
+	 */
+	err = bnad_device_enable(bnad);
+
+	/* Get the burnt-in mac */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_port_mac_get(&bna->port, &bnad->perm_addr);
+	bnad_set_netdev_perm_addr(bnad);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+
+	/*
+	 * Make sure the link appears down to the stack
+	 */
+	netif_carrier_off(netdev);
+
+	/* Finally, reguister with net_device layer */
+	err = register_netdev(netdev);
+	if (err) {
+		pr_err("BNA : Registering with netdev failed\n");
+		goto disable_device;
+	}
+
+	return 0;
+
+disable_device:
+	mutex_lock(&bnad->conf_mutex);
+	bnad_device_disable(bnad);
+	del_timer_sync(&bnad->bna.device.ioc.ioc_timer);
+	del_timer_sync(&bnad->bna.device.ioc.sem_timer);
+	del_timer_sync(&bnad->bna.device.ioc.hb_timer);
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_uninit(bna);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	mutex_unlock(&bnad->conf_mutex);
+
+	bnad_res_free(bnad);
+	bnad_disable_msix(bnad);
+pci_uninit:
+	bnad_pci_uninit(pdev);
+	bnad_lock_uninit(bnad);
+	bnad_uninit(bnad);
+free_netdev:
+	free_netdev(netdev);
+	return err;
+}
+
+static void __devexit
+bnad_pci_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct bnad *bnad;
+	struct bna *bna;
+	unsigned long flags;
+
+	if (!netdev)
+		return;
+
+	pr_info("%s bnad_pci_remove\n", netdev->name);
+	bnad = netdev_priv(netdev);
+	bna = &bnad->bna;
+
+	unregister_netdev(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	bnad_device_disable(bnad);
+	del_timer_sync(&bnad->bna.device.ioc.ioc_timer);
+	del_timer_sync(&bnad->bna.device.ioc.sem_timer);
+	del_timer_sync(&bnad->bna.device.ioc.hb_timer);
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bna_uninit(bna);
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	mutex_unlock(&bnad->conf_mutex);
+
+	bnad_res_free(bnad);
+	bnad_disable_msix(bnad);
+	bnad_pci_uninit(pdev);
+	bnad_lock_uninit(bnad);
+	bnad_uninit(bnad);
+	free_netdev(netdev);
+}
+
+const struct pci_device_id bnad_pci_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_BROCADE,
+			PCI_DEVICE_ID_BROCADE_CT),
+		.class = PCI_CLASS_NETWORK_ETHERNET << 8,
+		.class_mask =  0xffff00
+	}, {0,  }
+};
+
+MODULE_DEVICE_TABLE(pci, bnad_pci_id_table);
+
+static struct pci_driver bnad_pci_driver = {
+	.name = BNAD_NAME,
+	.id_table = bnad_pci_id_table,
+	.probe = bnad_pci_probe,
+	.remove = __devexit_p(bnad_pci_remove),
+};
+
+static int __init
+bnad_module_init(void)
+{
+	int err;
+
+	pr_info("Brocade 10G Ethernet driver\n");
+
+	bfa_ioc_auto_recover(bnad_ioc_auto_recover);
+
+	err = pci_register_driver(&bnad_pci_driver);
+	if (err < 0) {
+		pr_err("bna : PCI registration failed in module init "
+		       "(%d)\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit
+bnad_module_exit(void)
+{
+	pci_unregister_driver(&bnad_pci_driver);
+
+	if (bfi_fw)
+		release_firmware(bfi_fw);
+}
+
+module_init(bnad_module_init);
+module_exit(bnad_module_exit);
+
+MODULE_AUTHOR("Brocade");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Brocade 10G PCIe Ethernet driver");
+MODULE_VERSION(BNAD_VERSION);
+MODULE_FIRMWARE(CNA_FW_FILE_CT);
diff --git a/drivers/net/bna/bnad.h b/drivers/net/bna/bnad.h
new file mode 100644
index 000000000000..3261401e35cb
--- /dev/null
+++ b/drivers/net/bna/bnad.h
@@ -0,0 +1,334 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#ifndef __BNAD_H__
+#define __BNAD_H__
+
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/ipv6.h>
+#include <linux/etherdevice.h>
+#include <linux/mutex.h>
+#include <linux/firmware.h>
+
+/* Fix for IA64 */
+#include <asm/checksum.h>
+#include <net/ip6_checksum.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include "bna.h"
+
+#define BNAD_TXQ_DEPTH		2048
+#define BNAD_RXQ_DEPTH		2048
+
+#define BNAD_MAX_TXS		1
+#define BNAD_MAX_TXQ_PER_TX	8	/* 8 priority queues */
+#define BNAD_TXQ_NUM		1
+
+#define BNAD_MAX_RXS		1
+#define BNAD_MAX_RXPS_PER_RX	16
+
+/*
+ * Control structure pointed to ccb->ctrl, which
+ * determines the NAPI / LRO behavior CCB
+ * There is 1:1 corres. between ccb & ctrl
+ */
+struct bnad_rx_ctrl {
+	struct bna_ccb *ccb;
+	struct napi_struct	napi;
+};
+
+#define BNAD_RXMODE_PROMISC_DEFAULT	BNA_RXMODE_PROMISC
+
+#define BNAD_GET_TX_ID(_skb)	(0)
+
+/*
+ * GLOBAL #defines (CONSTANTS)
+ */
+#define BNAD_NAME			"bna"
+#define BNAD_NAME_LEN			64
+
+#define BNAD_VERSION			"2.3.2.0"
+
+#define BNAD_MAILBOX_MSIX_VECTORS	1
+
+#define BNAD_STATS_TIMER_FREQ		1000 	/* in msecs */
+#define BNAD_DIM_TIMER_FREQ		1000 	/* in msecs */
+
+#define BNAD_MAX_Q_DEPTH		0x10000
+#define BNAD_MIN_Q_DEPTH		0x200
+
+#define BNAD_JUMBO_MTU			9000
+
+#define BNAD_NETIF_WAKE_THRESHOLD	8
+
+#define BNAD_RXQ_REFILL_THRESHOLD_SHIFT	3
+
+/* Bit positions for tcb->flags */
+#define BNAD_TXQ_FREE_SENT		0
+
+/* Bit positions for rcb->flags */
+#define BNAD_RXQ_REFILL			0
+#define BNAD_RXQ_STARTED		1
+
+/*
+ * DATA STRUCTURES
+ */
+
+/* enums */
+enum bnad_intr_source {
+	BNAD_INTR_TX		= 1,
+	BNAD_INTR_RX		= 2
+};
+
+enum bnad_link_state {
+	BNAD_LS_DOWN		= 0,
+	BNAD_LS_UP 		= 1
+};
+
+struct bnad_completion {
+	struct completion 	ioc_comp;
+	struct completion 	ucast_comp;
+	struct completion	mcast_comp;
+	struct completion	tx_comp;
+	struct completion	rx_comp;
+	struct completion	stats_comp;
+	struct completion	port_comp;
+
+	u8			ioc_comp_status;
+	u8			ucast_comp_status;
+	u8			mcast_comp_status;
+	u8			tx_comp_status;
+	u8			rx_comp_status;
+	u8			stats_comp_status;
+	u8			port_comp_status;
+};
+
+/* Tx Rx Control Stats */
+struct bnad_drv_stats {
+	u64 		netif_queue_stop;
+	u64		netif_queue_wakeup;
+	u64		tso4;
+	u64		tso6;
+	u64		tso_err;
+	u64		tcpcsum_offload;
+	u64		udpcsum_offload;
+	u64		csum_help;
+	u64		csum_help_err;
+
+	u64		hw_stats_updates;
+	u64		netif_rx_schedule;
+	u64		netif_rx_complete;
+	u64		netif_rx_dropped;
+
+	u64		link_toggle;
+	u64		cee_up;
+
+	u64		rxp_info_alloc_failed;
+	u64		mbox_intr_disabled;
+	u64		mbox_intr_enabled;
+	u64		tx_unmap_q_alloc_failed;
+	u64		rx_unmap_q_alloc_failed;
+
+	u64		rxbuf_alloc_failed;
+};
+
+/* Complete driver stats */
+struct bnad_stats {
+	struct bnad_drv_stats drv_stats;
+	struct bna_stats *bna_stats;
+};
+
+/* Tx / Rx Resources */
+struct bnad_tx_res_info {
+	struct bna_res_info res_info[BNA_TX_RES_T_MAX];
+};
+
+struct bnad_rx_res_info {
+	struct bna_res_info res_info[BNA_RX_RES_T_MAX];
+};
+
+struct bnad_tx_info {
+	struct bna_tx *tx; /* 1:1 between tx_info & tx */
+	struct bna_tcb *tcb[BNAD_MAX_TXQ_PER_TX];
+} ____cacheline_aligned;
+
+struct bnad_rx_info {
+	struct bna_rx *rx; /* 1:1 between rx_info & rx */
+
+	struct bnad_rx_ctrl rx_ctrl[BNAD_MAX_RXPS_PER_RX];
+} ____cacheline_aligned;
+
+/* Unmap queues for Tx / Rx cleanup */
+struct bnad_skb_unmap {
+	struct sk_buff		*skb;
+	DECLARE_PCI_UNMAP_ADDR(dma_addr)
+};
+
+struct bnad_unmap_q {
+	u32		producer_index;
+	u32		consumer_index;
+	u32 		q_depth;
+	/* This should be the last one */
+	struct bnad_skb_unmap unmap_array[1];
+};
+
+/* Bit mask values for bnad->cfg_flags */
+#define	BNAD_CF_DIM_ENABLED		0x01	/* DIM */
+#define	BNAD_CF_PROMISC			0x02
+#define BNAD_CF_ALLMULTI		0x04
+#define	BNAD_CF_MSIX			0x08	/* If in MSIx mode */
+
+/* Defines for run_flags bit-mask */
+/* Set, tested & cleared using xxx_bit() functions */
+/* Values indicated bit positions */
+#define	BNAD_RF_CEE_RUNNING		1
+#define BNAD_RF_HW_ERROR 		2
+#define BNAD_RF_MBOX_IRQ_DISABLED	3
+#define BNAD_RF_TX_STARTED		4
+#define BNAD_RF_RX_STARTED		5
+#define BNAD_RF_DIM_TIMER_RUNNING	6
+#define BNAD_RF_STATS_TIMER_RUNNING	7
+
+struct bnad {
+	struct net_device 	*netdev;
+
+	/* Data path */
+	struct bnad_tx_info tx_info[BNAD_MAX_TXS];
+	struct bnad_rx_info rx_info[BNAD_MAX_RXS];
+
+	struct vlan_group	*vlan_grp;
+	/*
+	 * These q numbers are global only because
+	 * they are used to calculate MSIx vectors.
+	 * Actually the exact # of queues are per Tx/Rx
+	 * object.
+	 */
+	u32		num_tx;
+	u32		num_rx;
+	u32		num_txq_per_tx;
+	u32		num_rxp_per_rx;
+
+	u32		txq_depth;
+	u32		rxq_depth;
+
+	u8			tx_coalescing_timeo;
+	u8			rx_coalescing_timeo;
+
+	struct bna_rx_config rx_config[BNAD_MAX_RXS];
+	struct bna_tx_config tx_config[BNAD_MAX_TXS];
+
+	u32		rx_csum;
+
+	void __iomem		*bar0;	/* BAR0 address */
+
+	struct bna bna;
+
+	u32		cfg_flags;
+	unsigned long		run_flags;
+
+	struct pci_dev 		*pcidev;
+	u64		mmio_start;
+	u64		mmio_len;
+
+	u32		msix_num;
+	u32		msix_diag_num;
+	struct msix_entry	*msix_table;
+
+	struct mutex		conf_mutex;
+	spinlock_t		bna_lock ____cacheline_aligned;
+
+	/* Timers */
+	struct timer_list	ioc_timer;
+	struct timer_list	dim_timer;
+	struct timer_list	stats_timer;
+
+	/* Control path resources, memory & irq */
+	struct bna_res_info res_info[BNA_RES_T_MAX];
+	struct bnad_tx_res_info tx_res_info[BNAD_MAX_TXS];
+	struct bnad_rx_res_info rx_res_info[BNAD_MAX_RXS];
+
+	struct bnad_completion bnad_completions;
+
+	/* Burnt in MAC address */
+	mac_t			perm_addr;
+
+	struct tasklet_struct	tx_free_tasklet;
+
+	/* Statistics */
+	struct bnad_stats stats;
+	struct net_device_stats net_stats;
+
+	struct bnad_diag *diag;
+
+	char			adapter_name[BNAD_NAME_LEN];
+	char 			port_name[BNAD_NAME_LEN];
+	char			mbox_irq_name[BNAD_NAME_LEN];
+};
+
+/*
+ * EXTERN VARIABLES
+ */
+extern struct firmware *bfi_fw;
+extern u32 		bnad_rxqs_per_cq;
+
+/*
+ * EXTERN PROTOTYPES
+ */
+extern u32 *cna_get_firmware_buf(struct pci_dev *pdev);
+/* Netdev entry point prototypes */
+extern void bnad_set_ethtool_ops(struct net_device *netdev);
+
+/* Configuration & setup */
+extern void bnad_tx_coalescing_timeo_set(struct bnad *bnad);
+extern void bnad_rx_coalescing_timeo_set(struct bnad *bnad);
+
+extern int bnad_setup_rx(struct bnad *bnad, uint rx_id);
+extern int bnad_setup_tx(struct bnad *bnad, uint tx_id);
+extern void bnad_cleanup_tx(struct bnad *bnad, uint tx_id);
+extern void bnad_cleanup_rx(struct bnad *bnad, uint rx_id);
+
+/* Timer start/stop protos */
+extern void bnad_dim_timer_start(struct bnad *bnad);
+
+/* Statistics */
+extern void bnad_netdev_qstats_fill(struct bnad *bnad);
+extern void bnad_netdev_hwstats_fill(struct bnad *bnad);
+
+/**
+ * MACROS
+ */
+/* To set & get the stats counters */
+#define BNAD_UPDATE_CTR(_bnad, _ctr)				\
+				(((_bnad)->stats.drv_stats._ctr)++)
+
+#define BNAD_GET_CTR(_bnad, _ctr) ((_bnad)->stats.drv_stats._ctr)
+
+#define bnad_enable_rx_irq_unsafe(_ccb)			\
+{							\
+	bna_ib_coalescing_timer_set((_ccb)->i_dbell,	\
+		(_ccb)->rx_coalescing_timeo);		\
+	bna_ib_ack((_ccb)->i_dbell, 0);			\
+}
+
+#define bnad_dim_timer_running(_bnad)				\
+	(((_bnad)->cfg_flags & BNAD_CF_DIM_ENABLED) && 		\
+	(test_bit(BNAD_RF_DIM_TIMER_RUNNING, &((_bnad)->run_flags))))
+
+#endif /* __BNAD_H__ */
diff --git a/drivers/net/bna/bnad_ethtool.c b/drivers/net/bna/bnad_ethtool.c
new file mode 100644
index 000000000000..e982785b6b25
--- /dev/null
+++ b/drivers/net/bna/bnad_ethtool.c
@@ -0,0 +1,1282 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#include "cna.h"
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+
+#include "bna.h"
+
+#include "bnad.h"
+
+#define BNAD_NUM_TXF_COUNTERS 12
+#define BNAD_NUM_RXF_COUNTERS 10
+#define BNAD_NUM_CQ_COUNTERS 3
+#define BNAD_NUM_RXQ_COUNTERS 6
+#define BNAD_NUM_TXQ_COUNTERS 5
+
+#define BNAD_ETHTOOL_STATS_NUM						\
+	(sizeof(struct net_device_stats) / sizeof(unsigned long) +	\
+	sizeof(struct bnad_drv_stats) / sizeof(u64) +		\
+	offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64))
+
+static char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
+	"rx_packets",
+	"tx_packets",
+	"rx_bytes",
+	"tx_bytes",
+	"rx_errors",
+	"tx_errors",
+	"rx_dropped",
+	"tx_dropped",
+	"multicast",
+	"collisions",
+
+	"rx_length_errors",
+	"rx_over_errors",
+	"rx_crc_errors",
+	"rx_frame_errors",
+	"rx_fifo_errors",
+	"rx_missed_errors",
+
+	"tx_aborted_errors",
+	"tx_carrier_errors",
+	"tx_fifo_errors",
+	"tx_heartbeat_errors",
+	"tx_window_errors",
+
+	"rx_compressed",
+	"tx_compressed",
+
+	"netif_queue_stop",
+	"netif_queue_wakeup",
+	"tso4",
+	"tso6",
+	"tso_err",
+	"tcpcsum_offload",
+	"udpcsum_offload",
+	"csum_help",
+	"csum_help_err",
+	"hw_stats_updates",
+	"netif_rx_schedule",
+	"netif_rx_complete",
+	"netif_rx_dropped",
+
+	"link_toggle",
+	"cee_up",
+
+	"rxp_info_alloc_failed",
+	"mbox_intr_disabled",
+	"mbox_intr_enabled",
+	"tx_unmap_q_alloc_failed",
+	"rx_unmap_q_alloc_failed",
+	"rxbuf_alloc_failed",
+
+	"mac_frame_64",
+	"mac_frame_65_127",
+	"mac_frame_128_255",
+	"mac_frame_256_511",
+	"mac_frame_512_1023",
+	"mac_frame_1024_1518",
+	"mac_frame_1518_1522",
+	"mac_rx_bytes",
+	"mac_rx_packets",
+	"mac_rx_fcs_error",
+	"mac_rx_multicast",
+	"mac_rx_broadcast",
+	"mac_rx_control_frames",
+	"mac_rx_pause",
+	"mac_rx_unknown_opcode",
+	"mac_rx_alignment_error",
+	"mac_rx_frame_length_error",
+	"mac_rx_code_error",
+	"mac_rx_carrier_sense_error",
+	"mac_rx_undersize",
+	"mac_rx_oversize",
+	"mac_rx_fragments",
+	"mac_rx_jabber",
+	"mac_rx_drop",
+
+	"mac_tx_bytes",
+	"mac_tx_packets",
+	"mac_tx_multicast",
+	"mac_tx_broadcast",
+	"mac_tx_pause",
+	"mac_tx_deferral",
+	"mac_tx_excessive_deferral",
+	"mac_tx_single_collision",
+	"mac_tx_muliple_collision",
+	"mac_tx_late_collision",
+	"mac_tx_excessive_collision",
+	"mac_tx_total_collision",
+	"mac_tx_pause_honored",
+	"mac_tx_drop",
+	"mac_tx_jabber",
+	"mac_tx_fcs_error",
+	"mac_tx_control_frame",
+	"mac_tx_oversize",
+	"mac_tx_undersize",
+	"mac_tx_fragments",
+
+	"bpc_tx_pause_0",
+	"bpc_tx_pause_1",
+	"bpc_tx_pause_2",
+	"bpc_tx_pause_3",
+	"bpc_tx_pause_4",
+	"bpc_tx_pause_5",
+	"bpc_tx_pause_6",
+	"bpc_tx_pause_7",
+	"bpc_tx_zero_pause_0",
+	"bpc_tx_zero_pause_1",
+	"bpc_tx_zero_pause_2",
+	"bpc_tx_zero_pause_3",
+	"bpc_tx_zero_pause_4",
+	"bpc_tx_zero_pause_5",
+	"bpc_tx_zero_pause_6",
+	"bpc_tx_zero_pause_7",
+	"bpc_tx_first_pause_0",
+	"bpc_tx_first_pause_1",
+	"bpc_tx_first_pause_2",
+	"bpc_tx_first_pause_3",
+	"bpc_tx_first_pause_4",
+	"bpc_tx_first_pause_5",
+	"bpc_tx_first_pause_6",
+	"bpc_tx_first_pause_7",
+
+	"bpc_rx_pause_0",
+	"bpc_rx_pause_1",
+	"bpc_rx_pause_2",
+	"bpc_rx_pause_3",
+	"bpc_rx_pause_4",
+	"bpc_rx_pause_5",
+	"bpc_rx_pause_6",
+	"bpc_rx_pause_7",
+	"bpc_rx_zero_pause_0",
+	"bpc_rx_zero_pause_1",
+	"bpc_rx_zero_pause_2",
+	"bpc_rx_zero_pause_3",
+	"bpc_rx_zero_pause_4",
+	"bpc_rx_zero_pause_5",
+	"bpc_rx_zero_pause_6",
+	"bpc_rx_zero_pause_7",
+	"bpc_rx_first_pause_0",
+	"bpc_rx_first_pause_1",
+	"bpc_rx_first_pause_2",
+	"bpc_rx_first_pause_3",
+	"bpc_rx_first_pause_4",
+	"bpc_rx_first_pause_5",
+	"bpc_rx_first_pause_6",
+	"bpc_rx_first_pause_7",
+
+	"rad_rx_frames",
+	"rad_rx_octets",
+	"rad_rx_vlan_frames",
+	"rad_rx_ucast",
+	"rad_rx_ucast_octets",
+	"rad_rx_ucast_vlan",
+	"rad_rx_mcast",
+	"rad_rx_mcast_octets",
+	"rad_rx_mcast_vlan",
+	"rad_rx_bcast",
+	"rad_rx_bcast_octets",
+	"rad_rx_bcast_vlan",
+	"rad_rx_drops",
+
+	"fc_rx_ucast_octets",
+	"fc_rx_ucast",
+	"fc_rx_ucast_vlan",
+	"fc_rx_mcast_octets",
+	"fc_rx_mcast",
+	"fc_rx_mcast_vlan",
+	"fc_rx_bcast_octets",
+	"fc_rx_bcast",
+	"fc_rx_bcast_vlan",
+
+	"fc_tx_ucast_octets",
+	"fc_tx_ucast",
+	"fc_tx_ucast_vlan",
+	"fc_tx_mcast_octets",
+	"fc_tx_mcast",
+	"fc_tx_mcast_vlan",
+	"fc_tx_bcast_octets",
+	"fc_tx_bcast",
+	"fc_tx_bcast_vlan",
+	"fc_tx_parity_errors",
+	"fc_tx_timeout",
+	"fc_tx_fid_parity_errors",
+};
+
+static int
+bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+{
+	cmd->supported = SUPPORTED_10000baseT_Full;
+	cmd->advertising = ADVERTISED_10000baseT_Full;
+	cmd->autoneg = AUTONEG_DISABLE;
+	cmd->supported |= SUPPORTED_FIBRE;
+	cmd->advertising |= ADVERTISED_FIBRE;
+	cmd->port = PORT_FIBRE;
+	cmd->phy_address = 0;
+
+	if (netif_carrier_ok(netdev)) {
+		cmd->speed = SPEED_10000;
+		cmd->duplex = DUPLEX_FULL;
+	} else {
+		cmd->speed = -1;
+		cmd->duplex = -1;
+	}
+	cmd->transceiver = XCVR_EXTERNAL;
+	cmd->maxtxpkt = 0;
+	cmd->maxrxpkt = 0;
+
+	return 0;
+}
+
+static int
+bnad_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+{
+	/* 10G full duplex setting supported only */
+	if (cmd->autoneg == AUTONEG_ENABLE)
+		return -EOPNOTSUPP; else {
+		if ((cmd->speed == SPEED_10000) && (cmd->duplex == DUPLEX_FULL))
+			return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static void
+bnad_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	struct bfa_ioc_attr *ioc_attr;
+	unsigned long flags;
+
+	strcpy(drvinfo->driver, BNAD_NAME);
+	strcpy(drvinfo->version, BNAD_VERSION);
+
+	ioc_attr = kzalloc(sizeof(*ioc_attr), GFP_KERNEL);
+	if (ioc_attr) {
+		memset(ioc_attr, 0, sizeof(*ioc_attr));
+		spin_lock_irqsave(&bnad->bna_lock, flags);
+		bfa_ioc_get_attr(&bnad->bna.device.ioc, ioc_attr);
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+		strncpy(drvinfo->fw_version, ioc_attr->adapter_attr.fw_ver,
+			sizeof(drvinfo->fw_version) - 1);
+		kfree(ioc_attr);
+	}
+
+	strncpy(drvinfo->bus_info, pci_name(bnad->pcidev), ETHTOOL_BUSINFO_LEN);
+}
+
+static int
+get_regs(struct bnad *bnad, u32 * regs)
+{
+	int num = 0, i;
+	u32 reg_addr;
+	unsigned long flags;
+
+#define BNAD_GET_REG(addr) 					\
+do {								\
+	if (regs)						\
+		regs[num++] = readl(bnad->bar0 + (addr));	\
+	else							\
+		num++;						\
+} while (0)
+
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+
+	/* DMA Block Internal Registers */
+	BNAD_GET_REG(DMA_CTRL_REG0);
+	BNAD_GET_REG(DMA_CTRL_REG1);
+	BNAD_GET_REG(DMA_ERR_INT_STATUS);
+	BNAD_GET_REG(DMA_ERR_INT_ENABLE);
+	BNAD_GET_REG(DMA_ERR_INT_STATUS_SET);
+
+	/* APP Block Register Address Offset from BAR0 */
+	BNAD_GET_REG(HOSTFN0_INT_STATUS);
+	BNAD_GET_REG(HOSTFN0_INT_MASK);
+	BNAD_GET_REG(HOST_PAGE_NUM_FN0);
+	BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN0);
+	BNAD_GET_REG(FN0_PCIE_ERR_REG);
+	BNAD_GET_REG(FN0_ERR_TYPE_STATUS_REG);
+	BNAD_GET_REG(FN0_ERR_TYPE_MSK_STATUS_REG);
+
+	BNAD_GET_REG(HOSTFN1_INT_STATUS);
+	BNAD_GET_REG(HOSTFN1_INT_MASK);
+	BNAD_GET_REG(HOST_PAGE_NUM_FN1);
+	BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN1);
+	BNAD_GET_REG(FN1_PCIE_ERR_REG);
+	BNAD_GET_REG(FN1_ERR_TYPE_STATUS_REG);
+	BNAD_GET_REG(FN1_ERR_TYPE_MSK_STATUS_REG);
+
+	BNAD_GET_REG(PCIE_MISC_REG);
+
+	BNAD_GET_REG(HOST_SEM0_REG);
+	BNAD_GET_REG(HOST_SEM1_REG);
+	BNAD_GET_REG(HOST_SEM2_REG);
+	BNAD_GET_REG(HOST_SEM3_REG);
+	BNAD_GET_REG(HOST_SEM0_INFO_REG);
+	BNAD_GET_REG(HOST_SEM1_INFO_REG);
+	BNAD_GET_REG(HOST_SEM2_INFO_REG);
+	BNAD_GET_REG(HOST_SEM3_INFO_REG);
+
+	BNAD_GET_REG(TEMPSENSE_CNTL_REG);
+	BNAD_GET_REG(TEMPSENSE_STAT_REG);
+
+	BNAD_GET_REG(APP_LOCAL_ERR_STAT);
+	BNAD_GET_REG(APP_LOCAL_ERR_MSK);
+
+	BNAD_GET_REG(PCIE_LNK_ERR_STAT);
+	BNAD_GET_REG(PCIE_LNK_ERR_MSK);
+
+	BNAD_GET_REG(FCOE_FIP_ETH_TYPE);
+	BNAD_GET_REG(RESV_ETH_TYPE);
+
+	BNAD_GET_REG(HOSTFN2_INT_STATUS);
+	BNAD_GET_REG(HOSTFN2_INT_MASK);
+	BNAD_GET_REG(HOST_PAGE_NUM_FN2);
+	BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN2);
+	BNAD_GET_REG(FN2_PCIE_ERR_REG);
+	BNAD_GET_REG(FN2_ERR_TYPE_STATUS_REG);
+	BNAD_GET_REG(FN2_ERR_TYPE_MSK_STATUS_REG);
+
+	BNAD_GET_REG(HOSTFN3_INT_STATUS);
+	BNAD_GET_REG(HOSTFN3_INT_MASK);
+	BNAD_GET_REG(HOST_PAGE_NUM_FN3);
+	BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN3);
+	BNAD_GET_REG(FN3_PCIE_ERR_REG);
+	BNAD_GET_REG(FN3_ERR_TYPE_STATUS_REG);
+	BNAD_GET_REG(FN3_ERR_TYPE_MSK_STATUS_REG);
+
+	/* Host Command Status Registers */
+	reg_addr = HOST_CMDSTS0_CLR_REG;
+	for (i = 0; i < 16; i++) {
+		BNAD_GET_REG(reg_addr);
+		BNAD_GET_REG(reg_addr + 4);
+		BNAD_GET_REG(reg_addr + 8);
+		reg_addr += 0x10;
+	}
+
+	/* Function ID register */
+	BNAD_GET_REG(FNC_ID_REG);
+
+	/* Function personality register */
+	BNAD_GET_REG(FNC_PERS_REG);
+
+	/* Operation mode register */
+	BNAD_GET_REG(OP_MODE);
+
+	/* LPU0 Registers */
+	BNAD_GET_REG(LPU0_MBOX_CTL_REG);
+	BNAD_GET_REG(LPU0_MBOX_CMD_REG);
+	BNAD_GET_REG(LPU0_MBOX_LINK_0REG);
+	BNAD_GET_REG(LPU1_MBOX_LINK_0REG);
+	BNAD_GET_REG(LPU0_MBOX_STATUS_0REG);
+	BNAD_GET_REG(LPU1_MBOX_STATUS_0REG);
+	BNAD_GET_REG(LPU0_ERR_STATUS_REG);
+	BNAD_GET_REG(LPU0_ERR_SET_REG);
+
+	/* LPU1 Registers */
+	BNAD_GET_REG(LPU1_MBOX_CTL_REG);
+	BNAD_GET_REG(LPU1_MBOX_CMD_REG);
+	BNAD_GET_REG(LPU0_MBOX_LINK_1REG);
+	BNAD_GET_REG(LPU1_MBOX_LINK_1REG);
+	BNAD_GET_REG(LPU0_MBOX_STATUS_1REG);
+	BNAD_GET_REG(LPU1_MBOX_STATUS_1REG);
+	BNAD_GET_REG(LPU1_ERR_STATUS_REG);
+	BNAD_GET_REG(LPU1_ERR_SET_REG);
+
+	/* PSS Registers */
+	BNAD_GET_REG(PSS_CTL_REG);
+	BNAD_GET_REG(PSS_ERR_STATUS_REG);
+	BNAD_GET_REG(ERR_STATUS_SET);
+	BNAD_GET_REG(PSS_RAM_ERR_STATUS_REG);
+
+	/* Catapult CPQ Registers */
+	BNAD_GET_REG(HOSTFN0_LPU0_MBOX0_CMD_STAT);
+	BNAD_GET_REG(HOSTFN0_LPU1_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN0_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN0_MBOX0_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN0_LPU0_MBOX1_CMD_STAT);
+	BNAD_GET_REG(HOSTFN0_LPU1_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN0_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN0_MBOX1_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN1_LPU0_MBOX0_CMD_STAT);
+	BNAD_GET_REG(HOSTFN1_LPU1_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN1_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN1_MBOX0_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN1_LPU0_MBOX1_CMD_STAT);
+	BNAD_GET_REG(HOSTFN1_LPU1_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN1_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN1_MBOX1_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN2_LPU0_MBOX0_CMD_STAT);
+	BNAD_GET_REG(HOSTFN2_LPU1_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN2_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN2_MBOX0_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN2_LPU0_MBOX1_CMD_STAT);
+	BNAD_GET_REG(HOSTFN2_LPU1_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN2_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN2_MBOX1_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN3_LPU0_MBOX0_CMD_STAT);
+	BNAD_GET_REG(HOSTFN3_LPU1_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN3_MBOX0_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN3_MBOX0_CMD_STAT);
+
+	BNAD_GET_REG(HOSTFN3_LPU0_MBOX1_CMD_STAT);
+	BNAD_GET_REG(HOSTFN3_LPU1_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU0_HOSTFN3_MBOX1_CMD_STAT);
+	BNAD_GET_REG(LPU1_HOSTFN3_MBOX1_CMD_STAT);
+
+	/* Host Function Force Parity Error Registers */
+	BNAD_GET_REG(HOSTFN0_LPU_FORCE_PERR);
+	BNAD_GET_REG(HOSTFN1_LPU_FORCE_PERR);
+	BNAD_GET_REG(HOSTFN2_LPU_FORCE_PERR);
+	BNAD_GET_REG(HOSTFN3_LPU_FORCE_PERR);
+
+	/* LL Port[0|1] Halt Mask Registers */
+	BNAD_GET_REG(LL_HALT_MSK_P0);
+	BNAD_GET_REG(LL_HALT_MSK_P1);
+
+	/* LL Port[0|1] Error Mask Registers */
+	BNAD_GET_REG(LL_ERR_MSK_P0);
+	BNAD_GET_REG(LL_ERR_MSK_P1);
+
+	/* EMC FLI Registers */
+	BNAD_GET_REG(FLI_CMD_REG);
+	BNAD_GET_REG(FLI_ADDR_REG);
+	BNAD_GET_REG(FLI_CTL_REG);
+	BNAD_GET_REG(FLI_WRDATA_REG);
+	BNAD_GET_REG(FLI_RDDATA_REG);
+	BNAD_GET_REG(FLI_DEV_STATUS_REG);
+	BNAD_GET_REG(FLI_SIG_WD_REG);
+
+	BNAD_GET_REG(FLI_DEV_VENDOR_REG);
+	BNAD_GET_REG(FLI_ERR_STATUS_REG);
+
+	/* RxAdm 0 Registers */
+	BNAD_GET_REG(RAD0_CTL_REG);
+	BNAD_GET_REG(RAD0_PE_PARM_REG);
+	BNAD_GET_REG(RAD0_BCN_REG);
+	BNAD_GET_REG(RAD0_DEFAULT_REG);
+	BNAD_GET_REG(RAD0_PROMISC_REG);
+	BNAD_GET_REG(RAD0_BCNQ_REG);
+	BNAD_GET_REG(RAD0_DEFAULTQ_REG);
+
+	BNAD_GET_REG(RAD0_ERR_STS);
+	BNAD_GET_REG(RAD0_SET_ERR_STS);
+	BNAD_GET_REG(RAD0_ERR_INT_EN);
+	BNAD_GET_REG(RAD0_FIRST_ERR);
+	BNAD_GET_REG(RAD0_FORCE_ERR);
+
+	BNAD_GET_REG(RAD0_MAC_MAN_1H);
+	BNAD_GET_REG(RAD0_MAC_MAN_1L);
+	BNAD_GET_REG(RAD0_MAC_MAN_2H);
+	BNAD_GET_REG(RAD0_MAC_MAN_2L);
+	BNAD_GET_REG(RAD0_MAC_MAN_3H);
+	BNAD_GET_REG(RAD0_MAC_MAN_3L);
+	BNAD_GET_REG(RAD0_MAC_MAN_4H);
+	BNAD_GET_REG(RAD0_MAC_MAN_4L);
+
+	BNAD_GET_REG(RAD0_LAST4_IP);
+
+	/* RxAdm 1 Registers */
+	BNAD_GET_REG(RAD1_CTL_REG);
+	BNAD_GET_REG(RAD1_PE_PARM_REG);
+	BNAD_GET_REG(RAD1_BCN_REG);
+	BNAD_GET_REG(RAD1_DEFAULT_REG);
+	BNAD_GET_REG(RAD1_PROMISC_REG);
+	BNAD_GET_REG(RAD1_BCNQ_REG);
+	BNAD_GET_REG(RAD1_DEFAULTQ_REG);
+
+	BNAD_GET_REG(RAD1_ERR_STS);
+	BNAD_GET_REG(RAD1_SET_ERR_STS);
+	BNAD_GET_REG(RAD1_ERR_INT_EN);
+
+	/* TxA0 Registers */
+	BNAD_GET_REG(TXA0_CTRL_REG);
+	/* TxA0 TSO Sequence # Registers (RO) */
+	for (i = 0; i < 8; i++) {
+		BNAD_GET_REG(TXA0_TSO_TCP_SEQ_REG(i));
+		BNAD_GET_REG(TXA0_TSO_IP_INFO_REG(i));
+	}
+
+	/* TxA1 Registers */
+	BNAD_GET_REG(TXA1_CTRL_REG);
+	/* TxA1 TSO Sequence # Registers (RO) */
+	for (i = 0; i < 8; i++) {
+		BNAD_GET_REG(TXA1_TSO_TCP_SEQ_REG(i));
+		BNAD_GET_REG(TXA1_TSO_IP_INFO_REG(i));
+	}
+
+	/* RxA Registers */
+	BNAD_GET_REG(RXA0_CTL_REG);
+	BNAD_GET_REG(RXA1_CTL_REG);
+
+	/* PLB0 Registers */
+	BNAD_GET_REG(PLB0_ECM_TIMER_REG);
+	BNAD_GET_REG(PLB0_RL_CTL);
+	for (i = 0; i < 8; i++)
+		BNAD_GET_REG(PLB0_RL_MAX_BC(i));
+	BNAD_GET_REG(PLB0_RL_TU_PRIO);
+	for (i = 0; i < 8; i++)
+		BNAD_GET_REG(PLB0_RL_BYTE_CNT(i));
+	BNAD_GET_REG(PLB0_RL_MIN_REG);
+	BNAD_GET_REG(PLB0_RL_MAX_REG);
+	BNAD_GET_REG(PLB0_EMS_ADD_REG);
+
+	/* PLB1 Registers */
+	BNAD_GET_REG(PLB1_ECM_TIMER_REG);
+	BNAD_GET_REG(PLB1_RL_CTL);
+	for (i = 0; i < 8; i++)
+		BNAD_GET_REG(PLB1_RL_MAX_BC(i));
+	BNAD_GET_REG(PLB1_RL_TU_PRIO);
+	for (i = 0; i < 8; i++)
+		BNAD_GET_REG(PLB1_RL_BYTE_CNT(i));
+	BNAD_GET_REG(PLB1_RL_MIN_REG);
+	BNAD_GET_REG(PLB1_RL_MAX_REG);
+	BNAD_GET_REG(PLB1_EMS_ADD_REG);
+
+	/* HQM Control Register */
+	BNAD_GET_REG(HQM0_CTL_REG);
+	BNAD_GET_REG(HQM0_RXQ_STOP_SEM);
+	BNAD_GET_REG(HQM0_TXQ_STOP_SEM);
+	BNAD_GET_REG(HQM1_CTL_REG);
+	BNAD_GET_REG(HQM1_RXQ_STOP_SEM);
+	BNAD_GET_REG(HQM1_TXQ_STOP_SEM);
+
+	/* LUT Registers */
+	BNAD_GET_REG(LUT0_ERR_STS);
+	BNAD_GET_REG(LUT0_SET_ERR_STS);
+	BNAD_GET_REG(LUT1_ERR_STS);
+	BNAD_GET_REG(LUT1_SET_ERR_STS);
+
+	/* TRC Registers */
+	BNAD_GET_REG(TRC_CTL_REG);
+	BNAD_GET_REG(TRC_MODS_REG);
+	BNAD_GET_REG(TRC_TRGC_REG);
+	BNAD_GET_REG(TRC_CNT1_REG);
+	BNAD_GET_REG(TRC_CNT2_REG);
+	BNAD_GET_REG(TRC_NXTS_REG);
+	BNAD_GET_REG(TRC_DIRR_REG);
+	for (i = 0; i < 10; i++)
+		BNAD_GET_REG(TRC_TRGM_REG(i));
+	for (i = 0; i < 10; i++)
+		BNAD_GET_REG(TRC_NXTM_REG(i));
+	for (i = 0; i < 10; i++)
+		BNAD_GET_REG(TRC_STRM_REG(i));
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+#undef BNAD_GET_REG
+	return num;
+}
+static int
+bnad_get_regs_len(struct net_device *netdev)
+{
+	int ret = get_regs(netdev_priv(netdev), NULL) * sizeof(u32);
+	return ret;
+}
+
+static void
+bnad_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *buf)
+{
+	memset(buf, 0, bnad_get_regs_len(netdev));
+	get_regs(netdev_priv(netdev), buf);
+}
+
+static void
+bnad_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wolinfo)
+{
+	wolinfo->supported = 0;
+	wolinfo->wolopts = 0;
+}
+
+static int
+bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+
+	/* Lock rqd. to access bnad->bna_lock */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	coalesce->use_adaptive_rx_coalesce =
+		(bnad->cfg_flags & BNAD_CF_DIM_ENABLED) ? true : false;
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	coalesce->rx_coalesce_usecs = bnad->rx_coalescing_timeo *
+					BFI_COALESCING_TIMER_UNIT;
+	coalesce->tx_coalesce_usecs = bnad->tx_coalescing_timeo *
+					BFI_COALESCING_TIMER_UNIT;
+	coalesce->tx_max_coalesced_frames = BFI_TX_INTERPKT_COUNT;
+
+	return 0;
+}
+
+static int
+bnad_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	unsigned long flags;
+	int dim_timer_del = 0;
+
+	if (coalesce->rx_coalesce_usecs == 0 ||
+	    coalesce->rx_coalesce_usecs >
+	    BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT)
+		return -EINVAL;
+
+	if (coalesce->tx_coalesce_usecs == 0 ||
+	    coalesce->tx_coalesce_usecs >
+	    BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT)
+		return -EINVAL;
+
+	mutex_lock(&bnad->conf_mutex);
+	/*
+	 * Do not need to store rx_coalesce_usecs here
+	 * Every time DIM is disabled, we can get it from the
+	 * stack.
+	 */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	if (coalesce->use_adaptive_rx_coalesce) {
+		if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED)) {
+			bnad->cfg_flags |= BNAD_CF_DIM_ENABLED;
+			bnad_dim_timer_start(bnad);
+		}
+	} else {
+		if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) {
+			bnad->cfg_flags &= ~BNAD_CF_DIM_ENABLED;
+			dim_timer_del = bnad_dim_timer_running(bnad);
+			if (dim_timer_del) {
+				clear_bit(BNAD_RF_DIM_TIMER_RUNNING,
+							&bnad->run_flags);
+				spin_unlock_irqrestore(&bnad->bna_lock, flags);
+				del_timer_sync(&bnad->dim_timer);
+				spin_lock_irqsave(&bnad->bna_lock, flags);
+			}
+			bnad_rx_coalescing_timeo_set(bnad);
+		}
+	}
+	if (bnad->tx_coalescing_timeo != coalesce->tx_coalesce_usecs /
+					BFI_COALESCING_TIMER_UNIT) {
+		bnad->tx_coalescing_timeo = coalesce->tx_coalesce_usecs /
+						BFI_COALESCING_TIMER_UNIT;
+		bnad_tx_coalescing_timeo_set(bnad);
+	}
+
+	if (bnad->rx_coalescing_timeo != coalesce->rx_coalesce_usecs /
+					BFI_COALESCING_TIMER_UNIT) {
+		bnad->rx_coalescing_timeo = coalesce->rx_coalesce_usecs /
+						BFI_COALESCING_TIMER_UNIT;
+
+		if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED))
+			bnad_rx_coalescing_timeo_set(bnad);
+
+	}
+
+	/* Add Tx Inter-pkt DMA count?  */
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+	return 0;
+}
+
+static void
+bnad_get_ringparam(struct net_device *netdev,
+		   struct ethtool_ringparam *ringparam)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	ringparam->rx_max_pending = BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq;
+	ringparam->rx_mini_max_pending = 0;
+	ringparam->rx_jumbo_max_pending = 0;
+	ringparam->tx_max_pending = BNAD_MAX_Q_DEPTH;
+
+	ringparam->rx_pending = bnad->rxq_depth;
+	ringparam->rx_mini_max_pending = 0;
+	ringparam->rx_jumbo_max_pending = 0;
+	ringparam->tx_pending = bnad->txq_depth;
+}
+
+static int
+bnad_set_ringparam(struct net_device *netdev,
+		   struct ethtool_ringparam *ringparam)
+{
+	int i, current_err, err = 0;
+	struct bnad *bnad = netdev_priv(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	if (ringparam->rx_pending == bnad->rxq_depth &&
+	    ringparam->tx_pending == bnad->txq_depth) {
+		mutex_unlock(&bnad->conf_mutex);
+		return 0;
+	}
+
+	if (ringparam->rx_pending < BNAD_MIN_Q_DEPTH ||
+	    ringparam->rx_pending > BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq ||
+	    !BNA_POWER_OF_2(ringparam->rx_pending)) {
+		mutex_unlock(&bnad->conf_mutex);
+		return -EINVAL;
+	}
+	if (ringparam->tx_pending < BNAD_MIN_Q_DEPTH ||
+	    ringparam->tx_pending > BNAD_MAX_Q_DEPTH ||
+	    !BNA_POWER_OF_2(ringparam->tx_pending)) {
+		mutex_unlock(&bnad->conf_mutex);
+		return -EINVAL;
+	}
+
+	if (ringparam->rx_pending != bnad->rxq_depth) {
+		bnad->rxq_depth = ringparam->rx_pending;
+		for (i = 0; i < bnad->num_rx; i++) {
+			if (!bnad->rx_info[i].rx)
+				continue;
+			bnad_cleanup_rx(bnad, i);
+			current_err = bnad_setup_rx(bnad, i);
+			if (current_err && !err)
+				err = current_err;
+		}
+	}
+	if (ringparam->tx_pending != bnad->txq_depth) {
+		bnad->txq_depth = ringparam->tx_pending;
+		for (i = 0; i < bnad->num_tx; i++) {
+			if (!bnad->tx_info[i].tx)
+				continue;
+			bnad_cleanup_tx(bnad, i);
+			current_err = bnad_setup_tx(bnad, i);
+			if (current_err && !err)
+				err = current_err;
+		}
+	}
+
+	mutex_unlock(&bnad->conf_mutex);
+	return err;
+}
+
+static void
+bnad_get_pauseparam(struct net_device *netdev,
+		    struct ethtool_pauseparam *pauseparam)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	pauseparam->autoneg = 0;
+	pauseparam->rx_pause = bnad->bna.port.pause_config.rx_pause;
+	pauseparam->tx_pause = bnad->bna.port.pause_config.tx_pause;
+}
+
+static int
+bnad_set_pauseparam(struct net_device *netdev,
+		    struct ethtool_pauseparam *pauseparam)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	struct bna_pause_config pause_config;
+	unsigned long flags;
+
+	if (pauseparam->autoneg == AUTONEG_ENABLE)
+		return -EINVAL;
+
+	mutex_lock(&bnad->conf_mutex);
+	if (pauseparam->rx_pause != bnad->bna.port.pause_config.rx_pause ||
+	    pauseparam->tx_pause != bnad->bna.port.pause_config.tx_pause) {
+		pause_config.rx_pause = pauseparam->rx_pause;
+		pause_config.tx_pause = pauseparam->tx_pause;
+		spin_lock_irqsave(&bnad->bna_lock, flags);
+		bna_port_pause_config(&bnad->bna.port, &pause_config, NULL);
+		spin_unlock_irqrestore(&bnad->bna_lock, flags);
+	}
+	mutex_unlock(&bnad->conf_mutex);
+	return 0;
+}
+
+static u32
+bnad_get_rx_csum(struct net_device *netdev)
+{
+	u32 rx_csum;
+	struct bnad *bnad = netdev_priv(netdev);
+
+	rx_csum = bnad->rx_csum;
+	return rx_csum;
+}
+
+static int
+bnad_set_rx_csum(struct net_device *netdev, u32 rx_csum)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	bnad->rx_csum = rx_csum;
+	mutex_unlock(&bnad->conf_mutex);
+	return 0;
+}
+
+static int
+bnad_set_tx_csum(struct net_device *netdev, u32 tx_csum)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	if (tx_csum) {
+		netdev->features |= NETIF_F_IP_CSUM;
+		netdev->features |= NETIF_F_IPV6_CSUM;
+	} else {
+		netdev->features &= ~NETIF_F_IP_CSUM;
+		netdev->features &= ~NETIF_F_IPV6_CSUM;
+	}
+	mutex_unlock(&bnad->conf_mutex);
+	return 0;
+}
+
+static int
+bnad_set_tso(struct net_device *netdev, u32 tso)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+
+	mutex_lock(&bnad->conf_mutex);
+	if (tso) {
+		netdev->features |= NETIF_F_TSO;
+		netdev->features |= NETIF_F_TSO6;
+	} else {
+		netdev->features &= ~NETIF_F_TSO;
+		netdev->features &= ~NETIF_F_TSO6;
+	}
+	mutex_unlock(&bnad->conf_mutex);
+	return 0;
+}
+
+static void
+bnad_get_strings(struct net_device *netdev, u32 stringset, u8 * string)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	int i, j, q_num;
+	u64 bmap;
+
+	mutex_lock(&bnad->conf_mutex);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
+			BUG_ON(!(strlen(bnad_net_stats_strings[i]) <
+				   ETH_GSTRING_LEN));
+			memcpy(string, bnad_net_stats_strings[i],
+			       ETH_GSTRING_LEN);
+			string += ETH_GSTRING_LEN;
+		}
+		bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] |
+			((u64)bnad->bna.tx_mod.txf_bmap[1] << 32);
+		for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) {
+			if (bmap & 1) {
+				sprintf(string, "txf%d_ucast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_ucast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_ucast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_mcast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_mcast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_mcast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_bcast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_bcast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_bcast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_errors", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_filter_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txf%d_filter_mac_sa", i);
+				string += ETH_GSTRING_LEN;
+			}
+			bmap >>= 1;
+		}
+
+		bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] |
+			((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32);
+		for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) {
+			if (bmap & 1) {
+				sprintf(string, "rxf%d_ucast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_ucast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_ucast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_mcast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_mcast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_mcast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_bcast_octets", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_bcast", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_bcast_vlan", i);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxf%d_frame_drops", i);
+				string += ETH_GSTRING_LEN;
+			}
+			bmap >>= 1;
+		}
+
+		q_num = 0;
+		for (i = 0; i < bnad->num_rx; i++) {
+			if (!bnad->rx_info[i].rx)
+				continue;
+			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+				sprintf(string, "cq%d_producer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "cq%d_consumer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "cq%d_hw_producer_index",
+					q_num);
+				string += ETH_GSTRING_LEN;
+				q_num++;
+			}
+		}
+
+		q_num = 0;
+		for (i = 0; i < bnad->num_rx; i++) {
+			if (!bnad->rx_info[i].rx)
+				continue;
+			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+				sprintf(string, "rxq%d_packets", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_bytes", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_packets_with_error",
+								q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_allocbuf_failed", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_producer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_consumer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				q_num++;
+				if (bnad->rx_info[i].rx_ctrl[j].ccb &&
+					bnad->rx_info[i].rx_ctrl[j].ccb->
+					rcb[1] &&
+					bnad->rx_info[i].rx_ctrl[j].ccb->
+					rcb[1]->rxq) {
+					sprintf(string, "rxq%d_packets", q_num);
+					string += ETH_GSTRING_LEN;
+					sprintf(string, "rxq%d_bytes", q_num);
+					string += ETH_GSTRING_LEN;
+					sprintf(string,
+					"rxq%d_packets_with_error", q_num);
+					string += ETH_GSTRING_LEN;
+					sprintf(string, "rxq%d_allocbuf_failed",
+								q_num);
+					string += ETH_GSTRING_LEN;
+					sprintf(string, "rxq%d_producer_index",
+								q_num);
+					string += ETH_GSTRING_LEN;
+					sprintf(string, "rxq%d_consumer_index",
+								q_num);
+					string += ETH_GSTRING_LEN;
+					q_num++;
+				}
+			}
+		}
+
+		q_num = 0;
+		for (i = 0; i < bnad->num_tx; i++) {
+			if (!bnad->tx_info[i].tx)
+				continue;
+			for (j = 0; j < bnad->num_txq_per_tx; j++) {
+				sprintf(string, "txq%d_packets", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txq%d_bytes", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txq%d_producer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txq%d_consumer_index", q_num);
+				string += ETH_GSTRING_LEN;
+				sprintf(string, "txq%d_hw_consumer_index",
+									q_num);
+				string += ETH_GSTRING_LEN;
+				q_num++;
+			}
+		}
+
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&bnad->conf_mutex);
+}
+
+static int
+bnad_get_stats_count_locked(struct net_device *netdev)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	int i, j, count, rxf_active_num = 0, txf_active_num = 0;
+	u64 bmap;
+
+	bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] |
+			((u64)bnad->bna.tx_mod.txf_bmap[1] << 32);
+	for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) {
+		if (bmap & 1)
+			txf_active_num++;
+		bmap >>= 1;
+	}
+	bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] |
+			((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32);
+	for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) {
+		if (bmap & 1)
+			rxf_active_num++;
+		bmap >>= 1;
+	}
+	count = BNAD_ETHTOOL_STATS_NUM +
+		txf_active_num * BNAD_NUM_TXF_COUNTERS +
+		rxf_active_num * BNAD_NUM_RXF_COUNTERS;
+
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		count += bnad->num_rxp_per_rx * BNAD_NUM_CQ_COUNTERS;
+		count += bnad->num_rxp_per_rx * BNAD_NUM_RXQ_COUNTERS;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++)
+			if (bnad->rx_info[i].rx_ctrl[j].ccb &&
+				bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] &&
+				bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq)
+				count +=  BNAD_NUM_RXQ_COUNTERS;
+	}
+
+	for (i = 0; i < bnad->num_tx; i++) {
+		if (!bnad->tx_info[i].tx)
+			continue;
+		count += bnad->num_txq_per_tx * BNAD_NUM_TXQ_COUNTERS;
+	}
+	return count;
+}
+
+static int
+bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi)
+{
+	int i, j;
+	struct bna_rcb *rcb = NULL;
+	struct bna_tcb *tcb = NULL;
+
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++)
+			if (bnad->rx_info[i].rx_ctrl[j].ccb &&
+				bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] &&
+				bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0]->rxq) {
+				buf[bi++] = bnad->rx_info[i].rx_ctrl[j].
+						ccb->producer_index;
+				buf[bi++] = 0; /* ccb->consumer_index */
+				buf[bi++] = *(bnad->rx_info[i].rx_ctrl[j].
+						ccb->hw_producer_index);
+			}
+	}
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++)
+			if (bnad->rx_info[i].rx_ctrl[j].ccb) {
+				if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] &&
+					bnad->rx_info[i].rx_ctrl[j].ccb->
+					rcb[0]->rxq) {
+					rcb = bnad->rx_info[i].rx_ctrl[j].
+							ccb->rcb[0];
+					buf[bi++] = rcb->rxq->rx_packets;
+					buf[bi++] = rcb->rxq->rx_bytes;
+					buf[bi++] = rcb->rxq->
+							rx_packets_with_error;
+					buf[bi++] = rcb->rxq->
+							rxbuf_alloc_failed;
+					buf[bi++] = rcb->producer_index;
+					buf[bi++] = rcb->consumer_index;
+				}
+				if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] &&
+					bnad->rx_info[i].rx_ctrl[j].ccb->
+					rcb[1]->rxq) {
+					rcb = bnad->rx_info[i].rx_ctrl[j].
+								ccb->rcb[1];
+					buf[bi++] = rcb->rxq->rx_packets;
+					buf[bi++] = rcb->rxq->rx_bytes;
+					buf[bi++] = rcb->rxq->
+							rx_packets_with_error;
+					buf[bi++] = rcb->rxq->
+							rxbuf_alloc_failed;
+					buf[bi++] = rcb->producer_index;
+					buf[bi++] = rcb->consumer_index;
+				}
+			}
+	}
+
+	for (i = 0; i < bnad->num_tx; i++) {
+		if (!bnad->tx_info[i].tx)
+			continue;
+		for (j = 0; j < bnad->num_txq_per_tx; j++)
+			if (bnad->tx_info[i].tcb[j] &&
+				bnad->tx_info[i].tcb[j]->txq) {
+				tcb = bnad->tx_info[i].tcb[j];
+				buf[bi++] = tcb->txq->tx_packets;
+				buf[bi++] = tcb->txq->tx_bytes;
+				buf[bi++] = tcb->producer_index;
+				buf[bi++] = tcb->consumer_index;
+				buf[bi++] = *(tcb->hw_consumer_index);
+			}
+	}
+
+	return bi;
+}
+
+static void
+bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats,
+		       u64 *buf)
+{
+	struct bnad *bnad = netdev_priv(netdev);
+	int i, j, bi;
+	unsigned long *net_stats, flags;
+	u64 *stats64;
+	u64 bmap;
+
+	mutex_lock(&bnad->conf_mutex);
+	if (bnad_get_stats_count_locked(netdev) != stats->n_stats) {
+		mutex_unlock(&bnad->conf_mutex);
+		return;
+	}
+
+	/*
+	 * Used bna_lock to sync reads from bna_stats, which is written
+	 * under the same lock
+	 */
+	spin_lock_irqsave(&bnad->bna_lock, flags);
+	bi = 0;
+	memset(buf, 0, stats->n_stats * sizeof(u64));
+	memset(&bnad->net_stats, 0, sizeof(struct net_device_stats));
+
+	bnad_netdev_qstats_fill(bnad);
+	bnad_netdev_hwstats_fill(bnad);
+
+	/* Fill net_stats into ethtool buffers */
+	net_stats = (unsigned long *)&bnad->net_stats;
+	for (i = 0; i < sizeof(struct net_device_stats) / sizeof(unsigned long);
+	     i++)
+		buf[bi++] = net_stats[i];
+
+	/* Fill driver stats into ethtool buffers */
+	stats64 = (u64 *)&bnad->stats.drv_stats;
+	for (i = 0; i < sizeof(struct bnad_drv_stats) / sizeof(u64); i++)
+		buf[bi++] = stats64[i];
+
+	/* Fill hardware stats excluding the rxf/txf into ethtool bufs */
+	stats64 = (u64 *) bnad->stats.bna_stats->hw_stats;
+	for (i = 0;
+	     i < offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64);
+	     i++)
+		buf[bi++] = stats64[i];
+
+	/* Fill txf stats into ethtool buffers */
+	bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] |
+			((u64)bnad->bna.tx_mod.txf_bmap[1] << 32);
+	for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) {
+		if (bmap & 1) {
+			stats64 = (u64 *)&bnad->stats.bna_stats->
+						hw_stats->txf_stats[i];
+			for (j = 0; j < sizeof(struct bfi_ll_stats_txf) /
+					sizeof(u64); j++)
+				buf[bi++] = stats64[j];
+		}
+		bmap >>= 1;
+	}
+
+	/*  Fill rxf stats into ethtool buffers */
+	bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] |
+			((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32);
+	for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) {
+		if (bmap & 1) {
+			stats64 = (u64 *)&bnad->stats.bna_stats->
+						hw_stats->rxf_stats[i];
+			for (j = 0; j < sizeof(struct bfi_ll_stats_rxf) /
+					sizeof(u64); j++)
+				buf[bi++] = stats64[j];
+		}
+		bmap >>= 1;
+	}
+
+	/* Fill per Q stats into ethtool buffers */
+	bi = bnad_per_q_stats_fill(bnad, buf, bi);
+
+	spin_unlock_irqrestore(&bnad->bna_lock, flags);
+
+	mutex_unlock(&bnad->conf_mutex);
+}
+
+static int
+bnad_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return bnad_get_stats_count_locked(netdev);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct ethtool_ops bnad_ethtool_ops = {
+	.get_settings = bnad_get_settings,
+	.set_settings = bnad_set_settings,
+	.get_drvinfo = bnad_get_drvinfo,
+	.get_regs_len = bnad_get_regs_len,
+	.get_regs = bnad_get_regs,
+	.get_wol = bnad_get_wol,
+	.get_link = ethtool_op_get_link,
+	.get_coalesce = bnad_get_coalesce,
+	.set_coalesce = bnad_set_coalesce,
+	.get_ringparam = bnad_get_ringparam,
+	.set_ringparam = bnad_set_ringparam,
+	.get_pauseparam = bnad_get_pauseparam,
+	.set_pauseparam = bnad_set_pauseparam,
+	.get_rx_csum = bnad_get_rx_csum,
+	.set_rx_csum = bnad_set_rx_csum,
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = bnad_set_tx_csum,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = ethtool_op_set_sg,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = bnad_set_tso,
+	.get_flags = ethtool_op_get_flags,
+	.set_flags = ethtool_op_set_flags,
+	.get_strings = bnad_get_strings,
+	.get_ethtool_stats = bnad_get_ethtool_stats,
+	.get_sset_count = bnad_get_sset_count
+};
+
+void
+bnad_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops);
+}
diff --git a/drivers/net/bna/cna.h b/drivers/net/bna/cna.h
new file mode 100644
index 000000000000..bbd39dc65972
--- /dev/null
+++ b/drivers/net/bna/cna.h
@@ -0,0 +1,81 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2006-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+
+#ifndef __CNA_H__
+#define __CNA_H__
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/string.h>
+
+#include <linux/list.h>
+
+#define bfa_sm_fault(__mod, __event)    do {                            \
+	pr_err("SM Assertion failure: %s: %d: event = %d", __FILE__, __LINE__, \
+		__event); \
+} while (0)
+
+extern char bfa_version[];
+
+#define	CNA_FW_FILE_CT	"ctfw_cna.bin"
+#define FC_SYMNAME_MAX	256	/*!< max name server symbolic name size */
+
+#pragma pack(1)
+
+#define MAC_ADDRLEN	(6)
+typedef struct mac { u8 mac[MAC_ADDRLEN]; } mac_t;
+
+#pragma pack()
+
+#define bfa_q_first(_q) ((void *)(((struct list_head *) (_q))->next))
+#define bfa_q_next(_qe)	(((struct list_head *) (_qe))->next)
+#define bfa_q_prev(_qe) (((struct list_head *) (_qe))->prev)
+
+/*
+ * bfa_q_qe_init - to initialize a queue element
+ */
+#define bfa_q_qe_init(_qe) {						\
+	bfa_q_next(_qe) = (struct list_head *) NULL;			\
+	bfa_q_prev(_qe) = (struct list_head *) NULL;			\
+}
+
+/*
+ * bfa_q_deq - dequeue an element from head of the queue
+ */
+#define bfa_q_deq(_q, _qe) {						\
+	if (!list_empty(_q)) {						\
+		(*((struct list_head **) (_qe))) = bfa_q_next(_q);	\
+		bfa_q_prev(bfa_q_next(*((struct list_head **) _qe))) =	\
+						(struct list_head *) (_q); \
+		bfa_q_next(_q) = bfa_q_next(*((struct list_head **) _qe)); \
+		bfa_q_qe_init(*((struct list_head **) _qe));		\
+	} else {							\
+		*((struct list_head **) (_qe)) = (struct list_head *) NULL; \
+	}								\
+}
+
+#endif /* __CNA_H__ */
diff --git a/drivers/net/bna/cna_fwimg.c b/drivers/net/bna/cna_fwimg.c
new file mode 100644
index 000000000000..0bd1d3790a27
--- /dev/null
+++ b/drivers/net/bna/cna_fwimg.c
@@ -0,0 +1,64 @@
+/*
+ * Linux network driver for Brocade Converged Network Adapter.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License (GPL) Version 2 as
+ * published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Copyright (c) 2005-2010 Brocade Communications Systems, Inc.
+ * All rights reserved
+ * www.brocade.com
+ */
+#include <linux/firmware.h>
+#include "cna.h"
+
+const struct firmware *bfi_fw;
+static u32 *bfi_image_ct_cna;
+static u32 bfi_image_ct_cna_size;
+
+u32 *
+cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image,
+			u32 *bfi_image_size, char *fw_name)
+{
+	const struct firmware *fw;
+
+	if (request_firmware(&fw, fw_name, &pdev->dev)) {
+		pr_alert("Can't locate firmware %s\n", fw_name);
+		goto error;
+	}
+
+	*bfi_image = (u32 *)fw->data;
+	*bfi_image_size = fw->size/sizeof(u32);
+	bfi_fw = fw;
+
+	return *bfi_image;
+error:
+	return NULL;
+}
+
+u32 *
+cna_get_firmware_buf(struct pci_dev *pdev)
+{
+	if (bfi_image_ct_cna_size == 0)
+		cna_read_firmware(pdev, &bfi_image_ct_cna,
+			&bfi_image_ct_cna_size, CNA_FW_FILE_CT);
+	return bfi_image_ct_cna;
+}
+
+u32 *
+bfa_cb_image_get_chunk(int type, u32 off)
+{
+	return (u32 *)(bfi_image_ct_cna + off);
+}
+
+u32
+bfa_cb_image_get_size(int type)
+{
+	return bfi_image_ct_cna_size;
+}
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..1f730de0df06 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2189,6 +2189,9 @@
 #define PCI_VENDOR_ID_ARIMA		0x161f
 
 #define PCI_VENDOR_ID_BROCADE		0x1657
+#define PCI_DEVICE_ID_BROCADE_CT	0x0014
+#define PCI_DEVICE_ID_BROCADE_FC_8G1P	0x0017
+#define PCI_DEVICE_ID_BROCADE_CT_FC	0x0021
 
 #define PCI_VENDOR_ID_SIBYTE		0x166d
 #define PCI_DEVICE_ID_BCM1250_PCI	0x0001
-- 
cgit v1.2.3


From 1726442e115a9e58f40747d009a5b4f303e0840a Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 23 Aug 2010 16:26:41 +0000
Subject: net: increase the size of priv_flags and add IFF_OVS_DATAPATH

IFF_OVS_DATAPATH is a place-holder for the Open vSwitch datapath
which I am preparing to submit for merging.

As all 16 bits of priv_flags are already assigned flags, also increase
the size of priv_flags to 32 bits.

Unfortunately, by my calculations this increases the size of
struct net_device by 4 bytes on 32bit architectures and
8 bytes on 64 bit architectures. I couldn't see an obvious
way to avoid that.

Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if.h        | 2 ++
 include/linux/netdevice.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if.h b/include/linux/if.h
index 53558ec59e1b..6ed43c1f07ab 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -75,6 +75,8 @@
 #define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 #define IFF_MACVLAN_PORT	0x4000	/* device used as macvlan port */
 #define IFF_BRIDGE_PORT	0x8000		/* device used as bridge port */
+#define IFF_OVS_DATAPATH	0x10000	/* device used as Open vSwitch
+					 * dapath port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ce2de8b64083..59962dbc2758 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -901,7 +901,7 @@ struct net_device {
 
 	unsigned int		flags;	/* interface flags (a la BSD)	*/
 	unsigned short		gflags;
-        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
+        unsigned int            priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		padded;	/* How much padding added by alloc_netdev() */
 
 	unsigned char		operstate; /* RFC2863 operstate */
-- 
cgit v1.2.3


From d187abb9a83e6c6b6e9f2ca17962bdeafb4bc903 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 11 Aug 2010 12:07:13 -0700
Subject: USB: gadget: fix composite kernel-doc warnings

Warning(include/linux/usb/composite.h:284): No description found for parameter 'disconnect'
Warning(drivers/usb/gadget/composite.c:744): No description found for parameter 'c'
Warning(drivers/usb/gadget/composite.c:744): Excess function parameter 'cdev' description in 'usb_string_ids_n'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 4 ++--
 include/linux/usb/composite.h  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index e483f80822d2..1160c55de7f2 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -723,12 +723,12 @@ int usb_string_ids_tab(struct usb_composite_dev *cdev, struct usb_string *str)
 
 /**
  * usb_string_ids_n() - allocate unused string IDs in batch
- * @cdev: the device whose string descriptor IDs are being allocated
+ * @c: the device whose string descriptor IDs are being allocated
  * @n: number of string IDs to allocate
  * Context: single threaded during gadget setup
  *
  * Returns the first requested ID.  This ID and next @n-1 IDs are now
- * valid IDs.  At least providind that @n is non zore because if it
+ * valid IDs.  At least provided that @n is non-zero because if it
  * is, returns last requested ID which is now very useful information.
  *
  * @usb_string_ids_n() is called from bind() callbacks to allocate
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 890bc1472190..617068134ae8 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -247,6 +247,7 @@ int usb_add_config(struct usb_composite_dev *,
  *	value; it should return zero on successful initialization.
  * @unbind: Reverses @bind(); called as a side effect of unregistering
  *	this driver.
+ * @disconnect: optional driver disconnect method
  * @suspend: Notifies when the host stops sending USB traffic,
  *	after function notifications
  * @resume: Notifies configuration when the host restarts USB traffic,
-- 
cgit v1.2.3


From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 24 Aug 2010 14:22:47 +0200
Subject: workqueue: improve destroy_workqueue() debuggability

Now that the worklist is global, having works pending after wq
destruction can easily lead to oops and destroy_workqueue() have
several BUG_ON()s to catch these cases.  Unfortunately, BUG_ON()
doesn't tell much about how the work became pending after the final
flush_workqueue().

This patch adds WQ_DYING which is set before the final flush begins.
If a work is requested to be queued on a dying workqueue,
WARN_ON_ONCE() is triggered and the request is ignored.  This clearly
indicates which caller is trying to queue a work on a dying workqueue
and keeps the system working in most cases.

Locking rule comment is updated such that the 'I' rule includes
modifying the field from destruction path.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 ++
 kernel/workqueue.c        | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f9d277bcd9a..c959666eafca 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -241,6 +241,8 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
+	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cc3456f96c56..362b50d092e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -87,7 +87,8 @@ enum {
 /*
  * Structure fields follow one of the following exclusion rules.
  *
- * I: Set during initialization and read-only afterwards.
+ * I: Modifiable by initialization/destruction paths and read-only for
+ *    everyone else.
  *
  * P: Preemption protected.  Disabling preemption is enough and should
  *    only be modified and accessed from the local cpu.
@@ -944,6 +945,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
+	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+		return;
+
 	/* determine gcwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
@@ -2828,6 +2832,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 {
 	unsigned int cpu;
 
+	wq->flags |= WQ_DYING;
 	flush_workqueue(wq);
 
 	/*
-- 
cgit v1.2.3


From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Tue, 24 Aug 2010 11:44:18 -0700
Subject: guard page for stacks that grow upwards

pa-risc and ia64 have stacks that grow upwards. Check that
they do not run into other mappings. By making VM_GROWSUP
0x0 on architectures that do not ever use it, we can avoid
some unpleasant #ifdefs in check_stack_guard_page().

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  8 +++++++-
 mm/memory.c        | 15 +++++++++++----
 mm/mmap.c          |  3 ---
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 709f6728fc90..831c693416b2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -78,7 +78,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MAYSHARE	0x00000080
 
 #define VM_GROWSDOWN	0x00000100	/* general info on the segment */
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
 #define VM_GROWSUP	0x00000200
+#else
+#define VM_GROWSUP	0x00000000
+#endif
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 
@@ -1330,8 +1334,10 @@ unsigned long ra_submit(struct file_ra_state *ra,
 
 /* Do stack extension */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
-#ifdef CONFIG_IA64
+#if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#else
+  #define expand_upwards(vma, address) do { } while (0)
 #endif
 extern int expand_stack_downwards(struct vm_area_struct *vma,
 				  unsigned long address);
diff --git a/mm/memory.c b/mm/memory.c
index 2ed2267439df..6b2ab1051851 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2760,11 +2760,9 @@ out_release:
 }
 
 /*
- * This is like a special single-page "expand_downwards()",
- * except we must first make sure that 'address-PAGE_SIZE'
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
  * doesn't hit another vma.
- *
- * The "find_vma()" will do the right thing even if we wrap
  */
 static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
 {
@@ -2783,6 +2781,15 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 
 		expand_stack(vma, address - PAGE_SIZE);
 	}
+	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+		struct vm_area_struct *next = vma->vm_next;
+
+		/* As VM_GROWSDOWN but s/below/above/ */
+		if (next && next->vm_start == address + PAGE_SIZE)
+			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+		expand_upwards(vma, address + PAGE_SIZE);
+	}
 	return 0;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 331e51af38c9..6128dc8e5ede 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1716,9 +1716,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-#ifndef CONFIG_IA64
-static
-#endif
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	int error;
-- 
cgit v1.2.3


From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Aug 2010 14:49:58 +0200
Subject: cfg80211: mark ieee80211_hdrlen const

This function analyses only its single, value-passed
argument, and has no side effects. Thus it can be
const, which makes mac80211 smaller, for example:

   text	   data	    bss	    dec	    hex	filename
 362518	  16720	    884	 380122	  5ccda	mac80211.ko (before)
 362358	  16720	    884	 379962	  5cc3a	mac80211.ko (after)

a 160 byte saving in text size, and an optimisation
because the function won't be called as often.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 net/wireless/util.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2fd06c60ffbb..2b403c7ee32e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
  * ieee80211_hdrlen - get header length in bytes from frame control
  * @fc: frame control field in little-endian format
  */
-unsigned int ieee80211_hdrlen(__le16 fc);
+unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b7690..1eb24162be61 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -221,7 +221,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
 	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
 EXPORT_SYMBOL(bridge_tunnel_header);
 
-unsigned int ieee80211_hdrlen(__le16 fc)
+unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
 {
 	unsigned int hdrlen = 24;
 
-- 
cgit v1.2.3


From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Aug 2010 15:38:38 +0200
Subject: cfg80211/mac80211: extensible frame processing

Allow userspace to register for more than just
action frames by giving the frame subtype, and
make it possible to use this in various modes
as well.

With some tweaks and some added functionality
this will, in the future, also be usable in AP
mode and be able to replace the cooked monitor
interface currently used in that case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  93 +++++++++++++++++++++++------
 include/net/cfg80211.h     |  56 +++++++++++-------
 net/mac80211/cfg.c         |  12 ++--
 net/mac80211/ieee80211_i.h |   1 +
 net/mac80211/iface.c       |   6 +-
 net/mac80211/main.c        |  37 ++++++++++++
 net/mac80211/rx.c          | 137 +++++++++++++++++++++++++++++-------------
 net/mac80211/status.c      |   2 +-
 net/mac80211/util.c        |   6 +-
 net/wireless/core.c        |   8 +--
 net/wireless/core.h        |  21 +++----
 net/wireless/mlme.c        | 144 +++++++++++++++++++++++++++++----------------
 net/wireless/nl80211.c     | 108 +++++++++++++++++++++++++---------
 net/wireless/nl80211.h     |  14 ++---
 net/wireless/util.c        |   2 +-
 15 files changed, 452 insertions(+), 195 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2c8701687336..8af1e66c3cf9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -39,6 +39,43 @@
  * TODO: need more info?
  */
 
+/**
+ * DOC: Frame transmission/registration support
+ *
+ * Frame transmission and registration support exists to allow userspace
+ * management entities such as wpa_supplicant react to management frames
+ * that are not being handled by the kernel. This includes, for example,
+ * certain classes of action frames that cannot be handled in the kernel
+ * for various reasons.
+ *
+ * Frame registration is done on a per-interface basis and registrations
+ * cannot be removed other than by closing the socket. It is possible to
+ * specify a registration filter to register, for example, only for a
+ * certain type of action frame. In particular with action frames, those
+ * that userspace registers for will not be returned as unhandled by the
+ * driver, so that the registered application has to take responsibility
+ * for doing that.
+ *
+ * The type of frame that can be registered for is also dependent on the
+ * driver and interface type. The frame types are advertised in wiphy
+ * attributes so applications know what to expect.
+ *
+ * NOTE: When an interface changes type while registrations are active,
+ *       these registrations are ignored until the interface type is
+ *       changed again. This means that changing the interface type can
+ *       lead to a situation that couldn't otherwise be produced, but
+ *       any such registrations will be dormant in the sense that they
+ *       will not be serviced, i.e. they will not receive any frames.
+ *
+ * Frame transmission allows userspace to send for example the required
+ * responses to action frames. It is subject to some sanity checking,
+ * but many frames can be transmitted. When a frame was transmitted, its
+ * status is indicated to the sending socket.
+ *
+ * For more technical details, see the corresponding command descriptions
+ * below.
+ */
+
 /**
  * enum nl80211_commands - supported nl80211 commands
  *
@@ -301,16 +338,18 @@
  *	rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
  *	and @NL80211_ATTR_TX_RATES the set of allowed rates.
  *
- * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames
- *	(via @NL80211_CMD_ACTION) for processing in userspace. This command
- *	requires an interface index and a match attribute containing the first
- *	few bytes of the frame that should match, e.g. a single byte for only
- *	a category match or four bytes for vendor frames including the OUI.
- *	The registration cannot be dropped, but is removed automatically
- *	when the netlink socket is closed. Multiple registrations can be made.
- * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This
- *	command is used both as a request to transmit an Action frame and as an
- *	event indicating reception of an Action frame that was not processed in
+ * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames
+ *	(via @NL80211_CMD_FRAME) for processing in userspace. This command
+ *	requires an interface index, a frame type attribute (optional for
+ *	backward compatibility reasons, if not given assumes action frames)
+ *	and a match attribute containing the first few bytes of the frame
+ *	that should match, e.g. a single byte for only a category match or
+ *	four bytes for vendor frames including the OUI. The registration
+ *	cannot be dropped, but is removed automatically when the netlink
+ *	socket is closed. Multiple registrations can be made.
+ * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This
+ *	command is used both as a request to transmit a management frame and
+ *	as an event indicating reception of a frame that was not processed in
  *	kernel code, but is for us (i.e., which may need to be processed in a
  *	user space application). %NL80211_ATTR_FRAME is used to specify the
  *	frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and
@@ -320,8 +359,8 @@
  *	operational channel). When called, this operation returns a cookie
  *	(%NL80211_ATTR_COOKIE) that will be included with the TX status event
  *	pertaining to the TX request.
- * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame
- *	transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies
+ * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
+ *	transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
  *	the TX command and %NL80211_ATTR_FRAME includes the contents of the
  *	frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
  *	the frame.
@@ -429,9 +468,12 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_TX_BITRATE_MASK,
 
-	NL80211_CMD_REGISTER_ACTION,
-	NL80211_CMD_ACTION,
-	NL80211_CMD_ACTION_TX_STATUS,
+	NL80211_CMD_REGISTER_FRAME,
+	NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME,
+	NL80211_CMD_FRAME,
+	NL80211_CMD_ACTION = NL80211_CMD_FRAME,
+	NL80211_CMD_FRAME_TX_STATUS,
+	NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS,
 
 	NL80211_CMD_SET_POWER_SAVE,
 	NL80211_CMD_GET_POWER_SAVE,
@@ -708,7 +750,16 @@ enum nl80211_commands {
  *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
  *
  * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
- *	at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION.
+ *	at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
+ * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the
+ *	@NL80211_CMD_REGISTER_FRAME command.
+ * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a
+ *	nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ *	information about which frame types can be transmitted with
+ *	%NL80211_CMD_FRAME.
+ * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a
+ *	nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ *	information about which frame types can be registered for RX.
  *
  * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
  *	acknowledged by the recipient.
@@ -891,6 +942,10 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_TX_POWER_SETTING,
 	NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
 
+	NL80211_ATTR_TX_FRAME_TYPES,
+	NL80211_ATTR_RX_FRAME_TYPES,
+	NL80211_ATTR_FRAME_TYPE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -947,7 +1002,7 @@ enum nl80211_attrs {
  * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
  * @NL80211_IFTYPE_MESH_POINT: mesh point
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
- * @__NL80211_IFTYPE_AFTER_LAST: internal use
+ * @NUM_NL80211_IFTYPES: number of defined interface types
  *
  * These values are used with the %NL80211_ATTR_IFTYPE
  * to set the type of an interface.
@@ -964,8 +1019,8 @@ enum nl80211_iftype {
 	NL80211_IFTYPE_MESH_POINT,
 
 	/* keep last */
-	__NL80211_IFTYPE_AFTER_LAST,
-	NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1
+	NUM_NL80211_IFTYPES,
+	NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1
 };
 
 /**
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2b403c7ee32e..6a98b1b3bfde 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1020,7 +1020,7 @@ struct cfg80211_pmksa {
  * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
  *	This allows the operation to be terminated prior to timeout based on
  *	the duration value.
- * @action: Transmit an action frame
+ * @mgmt_tx: Transmit a management frame
  *
  * @testmode_cmd: run a test mode command
  *
@@ -1172,7 +1172,7 @@ struct cfg80211_ops {
 					    struct net_device *dev,
 					    u64 cookie);
 
-	int	(*action)(struct wiphy *wiphy, struct net_device *dev,
+	int	(*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev,
 			  struct ieee80211_channel *chan,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid,
@@ -1236,6 +1236,10 @@ struct mac_address {
 	u8 addr[ETH_ALEN];
 };
 
+struct ieee80211_txrx_stypes {
+	u16 tx, rx;
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback
@@ -1286,6 +1290,10 @@ struct mac_address {
  * @privid: a pointer that drivers can use to identify if an arbitrary
  *	wiphy is theirs, e.g. in global notifiers
  * @bands: information about bands/channels supported by this device
+ *
+ * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or
+ *	transmitted through nl80211, points to an array indexed by interface
+ *	type
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1294,9 +1302,12 @@ struct wiphy {
 	u8 perm_addr[ETH_ALEN];
 	u8 addr_mask[ETH_ALEN];
 
-	u16 n_addresses;
 	struct mac_address *addresses;
 
+	const struct ieee80211_txrx_stypes *mgmt_stypes;
+
+	u16 n_addresses;
+
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
 	u16 interface_modes;
 
@@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys;
  *	set by driver (if supported) on add_interface BEFORE registering the
  *	netdev and may otherwise be used by driver read-only, will be update
  *	by cfg80211 on change_interface
- * @action_registrations: list of registrations for action frames
- * @action_registrations_lock: lock for the list
+ * @mgmt_registrations: list of registrations for management frames
+ * @mgmt_registrations_lock: lock for the list
  * @mtx: mutex used to lock data in this struct
  * @cleanup_work: work struct used for cleanup that can't be done directly
  */
@@ -1505,8 +1516,8 @@ struct wireless_dev {
 	struct list_head list;
 	struct net_device *netdev;
 
-	struct list_head action_registrations;
-	spinlock_t action_registrations_lock;
+	struct list_head mgmt_registrations;
+	spinlock_t mgmt_registrations_lock;
 
 	struct mutex mtx;
 
@@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		      struct station_info *sinfo, gfp_t gfp);
 
 /**
- * cfg80211_rx_action - notification of received, unprocessed Action frame
+ * cfg80211_rx_mgmt - notification of received, unprocessed management frame
  * @dev: network device
  * @freq: Frequency on which the frame was received in MHz
- * @buf: Action frame (header + body)
+ * @buf: Management frame (header + body)
  * @len: length of the frame data
  * @gfp: context flags
- * Returns %true if a user space application is responsible for rejecting the
- *	unrecognized Action frame; %false if no such application is registered
- *	(i.e., the driver is responsible for rejecting the unrecognized Action
- *	frame)
+ *
+ * Returns %true if a user space application has registered for this frame.
+ * For action frames, that makes it responsible for rejecting unrecognized
+ * action frames; %false otherwise, in which case for action frames the
+ * driver is responsible for rejecting the frame.
  *
  * This function is called whenever an Action frame is received for a station
  * mode interface, but is not processed in kernel.
  */
-bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
-			size_t len, gfp_t gfp);
+bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
+		      size_t len, gfp_t gfp);
 
 /**
- * cfg80211_action_tx_status - notification of TX status for Action frame
+ * cfg80211_mgmt_tx_status - notification of TX status for management frame
  * @dev: network device
- * @cookie: Cookie returned by cfg80211_ops::action()
- * @buf: Action frame (header + body)
+ * @cookie: Cookie returned by cfg80211_ops::mgmt_tx()
+ * @buf: Management frame (header + body)
  * @len: length of the frame data
  * @ack: Whether frame was acknowledged
  * @gfp: context flags
  *
- * This function is called whenever an Action frame was requested to be
- * transmitted with cfg80211_ops::action() to report the TX status of the
+ * This function is called whenever a management frame was requested to be
+ * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the
  * transmission attempt.
  */
-void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
-			       const u8 *buf, size_t len, bool ack, gfp_t gfp);
+void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
+			     const u8 *buf, size_t len, bool ack, gfp_t gfp);
 
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f9a317766136..94787d21282c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1521,11 +1521,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
 }
 
-static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
-			    struct ieee80211_channel *chan,
-			    enum nl80211_channel_type channel_type,
-			    bool channel_type_valid,
-			    const u8 *buf, size_t len, u64 *cookie)
+static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
+			     struct ieee80211_channel *chan,
+			     enum nl80211_channel_type channel_type,
+			     bool channel_type_valid,
+			     const u8 *buf, size_t len, u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -1625,6 +1625,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_bitrate_mask = ieee80211_set_bitrate_mask,
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
-	.action = ieee80211_action,
+	.mgmt_tx = ieee80211_mgmt_tx,
 	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1bf05bfd149d..e73ae51dc036 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -170,6 +170,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
 #define IEEE80211_RX_RA_MATCH		BIT(1)
 #define IEEE80211_RX_AMSDU		BIT(2)
 #define IEEE80211_RX_FRAGMENTED		BIT(3)
+#define IEEE80211_MALFORMED_ACTION_FRM	BIT(4)
 /* only add flags here that do not change with subframes of an aMPDU */
 
 struct ieee80211_rx_data {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9459aeee0ddc..86f434f234ae 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -177,7 +177,7 @@ static int ieee80211_open(struct net_device *dev)
 		/* no special treatment */
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
-	case __NL80211_IFTYPE_AFTER_LAST:
+	case NUM_NL80211_IFTYPES:
 		/* cannot happen */
 		WARN_ON(1);
 		break;
@@ -634,7 +634,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
 	case NL80211_IFTYPE_MONITOR:
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
-	case __NL80211_IFTYPE_AFTER_LAST:
+	case NUM_NL80211_IFTYPES:
 		BUG();
 		break;
 	}
@@ -886,7 +886,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	case NL80211_IFTYPE_AP_VLAN:
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
-	case __NL80211_IFTYPE_AFTER_LAST:
+	case NUM_NL80211_IFTYPES:
 		BUG();
 		break;
 	}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0afccda42a24..a53feac4618c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -417,6 +417,41 @@ void ieee80211_napi_complete(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_napi_complete);
 
+/* There isn't a lot of sense in it, but you can transmit anything you like */
+static const struct ieee80211_txrx_stypes
+ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
+	[NL80211_IFTYPE_ADHOC] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ACTION >> 4),
+	},
+	[NL80211_IFTYPE_STATION] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
+			BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
+	},
+	[NL80211_IFTYPE_AP] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
+			BIT(IEEE80211_STYPE_DISASSOC >> 4) |
+			BIT(IEEE80211_STYPE_AUTH >> 4) |
+			BIT(IEEE80211_STYPE_DEAUTH >> 4) |
+			BIT(IEEE80211_STYPE_ACTION >> 4),
+	},
+	[NL80211_IFTYPE_AP_VLAN] = {
+		/* copy AP */
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
+			BIT(IEEE80211_STYPE_DISASSOC >> 4) |
+			BIT(IEEE80211_STYPE_AUTH >> 4) |
+			BIT(IEEE80211_STYPE_DEAUTH >> 4) |
+			BIT(IEEE80211_STYPE_ACTION >> 4),
+	},
+};
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -446,6 +481,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	if (!wiphy)
 		return NULL;
 
+	wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
+
 	wiphy->flags |= WIPHY_FLAG_NETNS_OK |
 			WIPHY_FLAG_4ADDR_AP |
 			WIPHY_FLAG_4ADDR_STATION;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4fdbed58ca2f..aa41e382bbb3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1939,14 +1939,37 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tx_skb(sdata, skb);
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+
+	/*
+	 * From here on, look only at management frames.
+	 * Data and control frames are already handled,
+	 * and unknown (reserved) frames are useless.
+	 */
+	if (rx->skb->len < 24)
+		return RX_DROP_MONITOR;
+
+	if (!ieee80211_is_mgmt(mgmt->frame_control))
+		return RX_DROP_MONITOR;
+
+	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+		return RX_DROP_MONITOR;
+
+	if (ieee80211_drop_unencrypted_mgmt(rx))
+		return RX_DROP_UNUSABLE;
+
+	return RX_CONTINUE;
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
-	struct sk_buff *nskb;
-	struct ieee80211_rx_status *status;
 	int len = rx->skb->len;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
@@ -1962,9 +1985,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_UNUSABLE;
 
-	if (ieee80211_drop_unencrypted_mgmt(rx))
-		return RX_DROP_UNUSABLE;
-
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
 		/*
@@ -2055,17 +2075,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		goto queue;
 	}
 
+	return RX_CONTINUE;
+
  invalid:
-	/*
-	 * For AP mode, hostapd is responsible for handling any action
-	 * frames that we didn't handle, including returning unknown
-	 * ones. For all other modes we will return them to the sender,
-	 * setting the 0x80 bit in the action category, as required by
-	 * 802.11-2007 7.3.1.11.
-	 */
-	if (sdata->vif.type == NL80211_IFTYPE_AP ||
-	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		return RX_DROP_MONITOR;
+	rx->flags |= IEEE80211_MALFORMED_ACTION_FRM;
+	/* will return in the next handlers */
+	return RX_CONTINUE;
+
+ handled:
+	if (rx->sta)
+		rx->sta->rx_packets++;
+	dev_kfree_skb(rx->skb);
+	return RX_QUEUED;
+
+ queue:
+	rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+	skb_queue_tail(&sdata->skb_queue, rx->skb);
+	ieee80211_queue_work(&local->hw, &sdata->work);
+	if (rx->sta)
+		rx->sta->rx_packets++;
+	return RX_QUEUED;
+}
+
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_rx_status *status;
+
+	/* skip known-bad action frames and return them in the next handler */
+	if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM)
+		return RX_CONTINUE;
 
 	/*
 	 * Getting here means the kernel doesn't know how to handle
@@ -2075,10 +2114,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	 */
 	status = IEEE80211_SKB_RXCB(rx->skb);
 
-	if (cfg80211_rx_action(rx->sdata->dev, status->freq,
-			       rx->skb->data, rx->skb->len,
-			       GFP_ATOMIC))
-		goto handled;
+	if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
+			     rx->skb->data, rx->skb->len,
+			     GFP_ATOMIC)) {
+		if (rx->sta)
+			rx->sta->rx_packets++;
+		dev_kfree_skb(rx->skb);
+		return RX_QUEUED;
+	}
+
+
+	return RX_CONTINUE;
+}
+
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_local *local = rx->local;
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	struct sk_buff *nskb;
+	struct ieee80211_sub_if_data *sdata = rx->sdata;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return RX_CONTINUE;
+
+	/*
+	 * For AP mode, hostapd is responsible for handling any action
+	 * frames that we didn't handle, including returning unknown
+	 * ones. For all other modes we will return them to the sender,
+	 * setting the 0x80 bit in the action category, as required by
+	 * 802.11-2007 7.3.1.11.
+	 * Newer versions of hostapd shall also use the management frame
+	 * registration mechanisms, but older ones still use cooked
+	 * monitor interfaces so push all frames there.
+	 */
+	if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) &&
+	    (sdata->vif.type == NL80211_IFTYPE_AP ||
+	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
+		return RX_DROP_MONITOR;
 
 	/* do not return rejected action frames */
 	if (mgmt->u.action.category & 0x80)
@@ -2097,20 +2170,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 		ieee80211_tx_skb(rx->sdata, nskb);
 	}
-
- handled:
-	if (rx->sta)
-		rx->sta->rx_packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
-
- queue:
-	rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-	skb_queue_tail(&sdata->skb_queue, rx->skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
-	if (rx->sta)
-		rx->sta->rx_packets++;
-	return RX_QUEUED;
 }
 
 static ieee80211_rx_result debug_noinline
@@ -2121,15 +2182,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
 	__le16 stype;
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
-		return RX_DROP_MONITOR;
-
-	if (rx->skb->len < 24)
-		return RX_DROP_MONITOR;
-
-	if (ieee80211_drop_unencrypted_mgmt(rx))
-		return RX_DROP_UNUSABLE;
-
 	rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
 	if (rxs != RX_CONTINUE)
 		return rxs;
@@ -2374,7 +2426,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 		if (res != RX_CONTINUE)
 			goto rxh_next;
 
+		CALL_RXH(ieee80211_rx_h_mgmt_check)
 		CALL_RXH(ieee80211_rx_h_action)
+		CALL_RXH(ieee80211_rx_h_userspace_mgmt)
+		CALL_RXH(ieee80211_rx_h_action_return)
 		CALL_RXH(ieee80211_rx_h_mgmt)
 
  rxh_next:
@@ -2527,7 +2582,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		break;
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_UNSPECIFIED:
-	case __NL80211_IFTYPE_AFTER_LAST:
+	case NUM_NL80211_IFTYPES:
 		/* should never get here */
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 10caec5ea8fa..67a35841bef0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	}
 
 	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
-		cfg80211_action_tx_status(
+		cfg80211_mgmt_tx_status(
 			skb->dev, (unsigned long) skb, skb->data, skb->len,
 			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc0..cd2b485fed4f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -471,7 +471,7 @@ void ieee80211_iterate_active_interfaces(
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case __NL80211_IFTYPE_AFTER_LAST:
+		case NUM_NL80211_IFTYPES:
 		case NL80211_IFTYPE_UNSPECIFIED:
 		case NL80211_IFTYPE_MONITOR:
 		case NL80211_IFTYPE_AP_VLAN:
@@ -505,7 +505,7 @@ void ieee80211_iterate_active_interfaces_atomic(
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case __NL80211_IFTYPE_AFTER_LAST:
+		case NUM_NL80211_IFTYPES:
 		case NL80211_IFTYPE_UNSPECIFIED:
 		case NL80211_IFTYPE_MONITOR:
 		case NL80211_IFTYPE_AP_VLAN:
@@ -1189,7 +1189,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			/* ignore virtual */
 			break;
 		case NL80211_IFTYPE_UNSPECIFIED:
-		case __NL80211_IFTYPE_AFTER_LAST:
+		case NUM_NL80211_IFTYPES:
 			WARN_ON(1);
 			break;
 		}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c70909c3eae4..d52630bbab04 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -433,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
 
 	/* sanity check ifmodes */
 	WARN_ON(!ifmodes);
-	ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
+	ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
 	if (WARN_ON(ifmodes != wiphy->interface_modes))
 		wiphy->interface_modes = ifmodes;
 
@@ -685,8 +685,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
 		INIT_LIST_HEAD(&wdev->event_list);
 		spin_lock_init(&wdev->event_lock);
-		INIT_LIST_HEAD(&wdev->action_registrations);
-		spin_lock_init(&wdev->action_registrations_lock);
+		INIT_LIST_HEAD(&wdev->mgmt_registrations);
+		spin_lock_init(&wdev->mgmt_registrations_lock);
 
 		mutex_lock(&rdev->devlist_mtx);
 		list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -806,7 +806,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
-			cfg80211_mlme_purge_actions(wdev);
+			cfg80211_mlme_purge_registrations(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.keys);
 #endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c3..58ab2c791d28 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -331,16 +331,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *resp_ie, size_t resp_ie_len,
 			       u16 status, bool wextev,
 			       struct cfg80211_bss *bss);
-int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
-				  const u8 *match_data, int match_len);
-void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid);
-void cfg80211_mlme_purge_actions(struct wireless_dev *wdev);
-int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
-			 struct net_device *dev,
-			 struct ieee80211_channel *chan,
-			 enum nl80211_channel_type channel_type,
-			 bool channel_type_valid,
-			 const u8 *buf, size_t len, u64 *cookie);
+int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
+				u16 frame_type, const u8 *match_data,
+				int match_len);
+void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
+void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
+int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev,
+			  struct ieee80211_channel *chan,
+			  enum nl80211_channel_type channel_type,
+			  bool channel_type_valid,
+			  const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index ee0af32ed59e..8515b1e5c578 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -748,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
 
-struct cfg80211_action_registration {
+struct cfg80211_mgmt_registration {
 	struct list_head list;
 
 	u32 nlpid;
 
 	int match_len;
 
+	__le16 frame_type;
+
 	u8 match[];
 };
 
-int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
-				  const u8 *match_data, int match_len)
+int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
+				u16 frame_type, const u8 *match_data,
+				int match_len)
 {
-	struct cfg80211_action_registration *reg, *nreg;
+	struct cfg80211_mgmt_registration *reg, *nreg;
 	int err = 0;
+	u16 mgmt_type;
+
+	if (!wdev->wiphy->mgmt_stypes)
+		return -EOPNOTSUPP;
+
+	if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
+		return -EINVAL;
+
+	if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
+		return -EINVAL;
+
+	mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
+	if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
+		return -EINVAL;
 
 	nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
 	if (!nreg)
 		return -ENOMEM;
 
-	spin_lock_bh(&wdev->action_registrations_lock);
+	spin_lock_bh(&wdev->mgmt_registrations_lock);
 
-	list_for_each_entry(reg, &wdev->action_registrations, list) {
+	list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
 		int mlen = min(match_len, reg->match_len);
 
+		if (frame_type != le16_to_cpu(reg->frame_type))
+			continue;
+
 		if (memcmp(reg->match, match_data, mlen) == 0) {
 			err = -EALREADY;
 			break;
@@ -787,62 +807,75 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
 	memcpy(nreg->match, match_data, match_len);
 	nreg->match_len = match_len;
 	nreg->nlpid = snd_pid;
-	list_add(&nreg->list, &wdev->action_registrations);
+	nreg->frame_type = cpu_to_le16(frame_type);
+	list_add(&nreg->list, &wdev->mgmt_registrations);
 
  out:
-	spin_unlock_bh(&wdev->action_registrations_lock);
+	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 	return err;
 }
 
-void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid)
+void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 {
-	struct cfg80211_action_registration *reg, *tmp;
+	struct cfg80211_mgmt_registration *reg, *tmp;
 
-	spin_lock_bh(&wdev->action_registrations_lock);
+	spin_lock_bh(&wdev->mgmt_registrations_lock);
 
-	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+	list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
 		if (reg->nlpid == nlpid) {
 			list_del(&reg->list);
 			kfree(reg);
 		}
 	}
 
-	spin_unlock_bh(&wdev->action_registrations_lock);
+	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 }
 
-void cfg80211_mlme_purge_actions(struct wireless_dev *wdev)
+void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
 {
-	struct cfg80211_action_registration *reg, *tmp;
+	struct cfg80211_mgmt_registration *reg, *tmp;
 
-	spin_lock_bh(&wdev->action_registrations_lock);
+	spin_lock_bh(&wdev->mgmt_registrations_lock);
 
-	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+	list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
 		list_del(&reg->list);
 		kfree(reg);
 	}
 
-	spin_unlock_bh(&wdev->action_registrations_lock);
+	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 }
 
-int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
-			 struct net_device *dev,
-			 struct ieee80211_channel *chan,
-			 enum nl80211_channel_type channel_type,
-			 bool channel_type_valid,
-			 const u8 *buf, size_t len, u64 *cookie)
+int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev,
+			  struct ieee80211_channel *chan,
+			  enum nl80211_channel_type channel_type,
+			  bool channel_type_valid,
+			  const u8 *buf, size_t len, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	const struct ieee80211_mgmt *mgmt;
+	u16 stype;
+
+	if (!wdev->wiphy->mgmt_stypes)
+		return -EOPNOTSUPP;
 
-	if (rdev->ops->action == NULL)
+	if (!rdev->ops->mgmt_tx)
 		return -EOPNOTSUPP;
+
 	if (len < 24 + 1)
 		return -EINVAL;
 
 	mgmt = (const struct ieee80211_mgmt *) buf;
-	if (!ieee80211_is_action(mgmt->frame_control))
+
+	if (!ieee80211_is_mgmt(mgmt->frame_control))
 		return -EINVAL;
-	if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
+
+	stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
+	if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
+		return -EINVAL;
+
+	if (ieee80211_is_action(mgmt->frame_control) &&
+	    mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
 		/* Verify that we are associated with the destination AP */
 		wdev_lock(wdev);
 
@@ -863,64 +896,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 
 	/* Transmit the Action frame as requested by user space */
-	return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
-				 channel_type_valid, buf, len, cookie);
+	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
+				  channel_type_valid, buf, len, cookie);
 }
 
-bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
-			size_t len, gfp_t gfp)
+bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
+		      size_t len, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	struct cfg80211_action_registration *reg;
-	const u8 *action_data;
-	int action_data_len;
+	struct cfg80211_mgmt_registration *reg;
+	const struct ieee80211_txrx_stypes *stypes =
+		&wiphy->mgmt_stypes[wdev->iftype];
+	struct ieee80211_mgmt *mgmt = (void *)buf;
+	const u8 *data;
+	int data_len;
 	bool result = false;
+	__le16 ftype = mgmt->frame_control &
+		cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
+	u16 stype;
 
-	/* frame length - min size excluding category */
-	action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
+	stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
 
-	/* action data starts with category */
-	action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1;
+	if (!(stypes->rx & BIT(stype)))
+		return false;
 
-	spin_lock_bh(&wdev->action_registrations_lock);
+	data = buf + ieee80211_hdrlen(mgmt->frame_control);
+	data_len = len - ieee80211_hdrlen(mgmt->frame_control);
+
+	spin_lock_bh(&wdev->mgmt_registrations_lock);
+
+	list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
+		if (reg->frame_type != ftype)
+			continue;
 
-	list_for_each_entry(reg, &wdev->action_registrations, list) {
-		if (reg->match_len > action_data_len)
+		if (reg->match_len > data_len)
 			continue;
 
-		if (memcmp(reg->match, action_data, reg->match_len))
+		if (memcmp(reg->match, data, reg->match_len))
 			continue;
 
 		/* found match! */
 
 		/* Indicate the received Action frame to user space */
-		if (nl80211_send_action(rdev, dev, reg->nlpid, freq,
-					buf, len, gfp))
+		if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
+				      buf, len, gfp))
 			continue;
 
 		result = true;
 		break;
 	}
 
-	spin_unlock_bh(&wdev->action_registrations_lock);
+	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
 	return result;
 }
-EXPORT_SYMBOL(cfg80211_rx_action);
+EXPORT_SYMBOL(cfg80211_rx_mgmt);
 
-void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
-			       const u8 *buf, size_t len, bool ack, gfp_t gfp)
+void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
+			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 
 	/* Indicate TX status of the Action frame to user space */
-	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
+	nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
 }
-EXPORT_SYMBOL(cfg80211_action_tx_status);
+EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
 
 void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bb5b78eebeb2..927ffbd2aebc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -156,6 +156,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 
 	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
+	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
 };
 
 /* policy for the attributes */
@@ -437,6 +438,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	struct ieee80211_rate *rate;
 	int i;
 	u16 ifmodes = dev->wiphy.interface_modes;
+	const struct ieee80211_txrx_stypes *mgmt_stypes =
+				dev->wiphy.mgmt_stypes;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
 	if (!hdr)
@@ -587,7 +590,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
-	CMD(action, ACTION);
+	CMD(mgmt_tx, FRAME);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -608,6 +611,53 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	nla_nest_end(msg, nl_cmds);
 
+	if (mgmt_stypes) {
+		u16 stypes;
+		struct nlattr *nl_ftypes, *nl_ifs;
+		enum nl80211_iftype ift;
+
+		nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
+		if (!nl_ifs)
+			goto nla_put_failure;
+
+		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+			nl_ftypes = nla_nest_start(msg, ift);
+			if (!nl_ftypes)
+				goto nla_put_failure;
+			i = 0;
+			stypes = mgmt_stypes[ift].tx;
+			while (stypes) {
+				if (stypes & 1)
+					NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
+						    (i << 4) | IEEE80211_FTYPE_MGMT);
+				stypes >>= 1;
+				i++;
+			}
+			nla_nest_end(msg, nl_ftypes);
+		}
+
+		nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
+		if (!nl_ifs)
+			goto nla_put_failure;
+
+		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+			nl_ftypes = nla_nest_start(msg, ift);
+			if (!nl_ftypes)
+				goto nla_put_failure;
+			i = 0;
+			stypes = mgmt_stypes[ift].rx;
+			while (stypes) {
+				if (stypes & 1)
+					NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
+						    (i << 4) | IEEE80211_FTYPE_MGMT);
+				stypes >>= 1;
+				i++;
+			}
+			nla_nest_end(msg, nl_ftypes);
+		}
+		nla_nest_end(msg, nl_ifs);
+	}
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -4732,17 +4782,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 	return err;
 }
 
-static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
+	u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
 	int err;
 
 	if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
 		return -EINVAL;
 
-	if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1)
-		return -EINVAL;
+	if (info->attrs[NL80211_ATTR_FRAME_TYPE])
+		frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
 
 	rtnl_lock();
 
@@ -4757,12 +4808,13 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* not much point in registering if we can't reply */
-	if (!rdev->ops->action) {
+	if (!rdev->ops->mgmt_tx) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
 
-	err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid,
+	err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
+			frame_type,
 			nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
 			nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
  out:
@@ -4773,7 +4825,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
-static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
@@ -4796,7 +4848,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (!rdev->ops->action) {
+	if (!rdev->ops->mgmt_tx) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4839,17 +4891,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
-			     NL80211_CMD_ACTION);
+			     NL80211_CMD_FRAME);
 
 	if (IS_ERR(hdr)) {
 		err = PTR_ERR(hdr);
 		goto free_msg;
 	}
-	err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
-				   channel_type_valid,
-				   nla_data(info->attrs[NL80211_ATTR_FRAME]),
-				   nla_len(info->attrs[NL80211_ATTR_FRAME]),
-				   &cookie);
+	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
+				    channel_type_valid,
+				    nla_data(info->attrs[NL80211_ATTR_FRAME]),
+				    nla_len(info->attrs[NL80211_ATTR_FRAME]),
+				    &cookie);
 	if (err)
 		goto free_msg;
 
@@ -5348,14 +5400,14 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
-		.cmd = NL80211_CMD_REGISTER_ACTION,
-		.doit = nl80211_register_action,
+		.cmd = NL80211_CMD_REGISTER_FRAME,
+		.doit = nl80211_register_mgmt,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
-		.cmd = NL80211_CMD_ACTION,
-		.doit = nl80211_action,
+		.cmd = NL80211_CMD_FRAME,
+		.doit = nl80211_tx_mgmt,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
@@ -6055,9 +6107,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 				nl80211_mlme_mcgrp.id, gfp);
 }
 
-int nl80211_send_action(struct cfg80211_registered_device *rdev,
-			struct net_device *netdev, u32 nlpid,
-			int freq, const u8 *buf, size_t len, gfp_t gfp)
+int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
+		      struct net_device *netdev, u32 nlpid,
+		      int freq, const u8 *buf, size_t len, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -6067,7 +6119,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION);
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
 	if (!hdr) {
 		nlmsg_free(msg);
 		return -ENOMEM;
@@ -6095,10 +6147,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
 	return -ENOBUFS;
 }
 
-void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
-				   struct net_device *netdev, u64 cookie,
-				   const u8 *buf, size_t len, bool ack,
-				   gfp_t gfp)
+void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev, u64 cookie,
+				 const u8 *buf, size_t len, bool ack,
+				 gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -6107,7 +6159,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 	if (!msg)
 		return;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS);
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
 	if (!hdr) {
 		nlmsg_free(msg);
 		return;
@@ -6194,7 +6246,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
 		list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
-			cfg80211_mlme_unregister_actions(wdev, notify->pid);
+			cfg80211_mlme_unregister_socket(wdev, notify->pid);
 
 	rcu_read_unlock();
 
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f1..30d2f939150d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 			    struct net_device *dev, const u8 *mac_addr,
 			    struct station_info *sinfo, gfp_t gfp);
 
-int nl80211_send_action(struct cfg80211_registered_device *rdev,
-			struct net_device *netdev, u32 nlpid, int freq,
-			const u8 *buf, size_t len, gfp_t gfp);
-void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
-				   struct net_device *netdev, u64 cookie,
-				   const u8 *buf, size_t len, bool ack,
-				   gfp_t gfp);
+int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
+		      struct net_device *netdev, u32 nlpid, int freq,
+		      const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev, u64 cookie,
+				 const u8 *buf, size_t len, bool ack,
+				 gfp_t gfp);
 
 void
 nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1eb24162be61..8d961cc4ae98 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -823,7 +823,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			/* monitor can't bridge anyway */
 			break;
 		case NL80211_IFTYPE_UNSPECIFIED:
-		case __NL80211_IFTYPE_AFTER_LAST:
+		case NUM_NL80211_IFTYPES:
 			/* not happening */
 			break;
 		}
-- 
cgit v1.2.3


From d2730b2a6a019d14455556019d744ab051e6554b Mon Sep 17 00:00:00 2001
From: Gábor Stefanik <netrolller.3d@gmail.com>
Date: Mon, 16 Aug 2010 22:39:16 +0200
Subject: b43: N-PHY: Implement MAC PHY clock set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Gábor Stefanik <netrolller.3d@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/phy_n.c | 13 ++++++++++++-
 include/linux/ssb/ssb_regs.h     |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index f1b57070ada9..d212726d509b 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c
@@ -3074,6 +3074,17 @@ static int b43_nphy_cal_rx_iq(struct b43_wldev *dev,
 		return b43_nphy_rev2_cal_rx_iq(dev, target, type, debug);
 }
 
+/* http://bcm-v4.sipsolutions.net/802.11/PHY/N/MacPhyClkSet */
+static void b43_nphy_mac_phy_clock_set(struct b43_wldev *dev, bool on)
+{
+	u32 tmslow = ssb_read32(dev->dev, SSB_TMSLOW);
+	if (on)
+		tmslow |= SSB_TMSLOW_PHYCLK;
+	else
+		tmslow &= ~SSB_TMSLOW_PHYCLK;
+	ssb_write32(dev->dev, SSB_TMSLOW, tmslow);
+}
+
 /*
  * Init N-PHY
  * http://bcm-v4.sipsolutions.net/802.11/PHY/Init/N
@@ -3174,7 +3185,7 @@ int b43_phy_initn(struct b43_wldev *dev)
 	b43_phy_write(dev, B43_NPHY_BBCFG, tmp & ~B43_NPHY_BBCFG_RSTCCA);
 	b43_nphy_bmac_clock_fgc(dev, 0);
 
-	/* TODO N PHY MAC PHY Clock Set with argument 1 */
+	b43_nphy_mac_phy_clock_set(dev, true);
 
 	b43_nphy_pa_override(dev, false);
 	b43_nphy_force_rf_sequence(dev, B43_RFSEQ_RX2TX);
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index a6d5225b9275..11daf9c140e7 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -97,6 +97,7 @@
 #define  SSB_TMSLOW_RESET	0x00000001 /* Reset */
 #define  SSB_TMSLOW_REJECT_22	0x00000002 /* Reject (Backplane rev 2.2) */
 #define  SSB_TMSLOW_REJECT_23	0x00000004 /* Reject (Backplane rev 2.3) */
+#define  SSB_TMSLOW_PHYCLK	0x00000010 /* MAC PHY Clock Control Enable */
 #define  SSB_TMSLOW_CLOCK	0x00010000 /* Clock Enable */
 #define  SSB_TMSLOW_FGC		0x00020000 /* Force Gated Clocks On */
 #define  SSB_TMSLOW_PE		0x40000000 /* Power Management Enable */
-- 
cgit v1.2.3


From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 18 Aug 2010 15:01:23 +0200
Subject: mac80211: fix docbook

Fix a small problem in the documentation for
ieee80211_request_smps, and a now erroneous
inclusion of enum ieee80211_key_alg, which no
longer exists after the change to ciphers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/DocBook/80211.tmpl | 1 -
 include/net/mac80211.h           | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 6f88e184d2e7..52310ac892ea 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -248,7 +248,6 @@ MISSING
   <!-- intentionally multiple !F lines to get proper order -->
 !Finclude/net/mac80211.h set_key_cmd
 !Finclude/net/mac80211.h ieee80211_key_conf
-!Finclude/net/mac80211.h ieee80211_key_alg
 !Finclude/net/mac80211.h ieee80211_key_flags
       </chapter>
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 914c747ac3a0..2a1811366076 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
 /**
  * ieee80211_request_smps - request SM PS transition
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
- * @mode: new SM PS mode
+ * @smps_mode: new SM PS mode
  *
  * This allows the driver to request an SM PS transition in managed
  * mode. This is useful when the driver has more information than
-- 
cgit v1.2.3


From 2a5fb7b088f8418958775774dda9427d6c73c522 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 18 Aug 2010 17:44:36 +0200
Subject: nl80211: some documentation fixes

The nl80211 documentation is currently never
generated, so problems have accumulated. Fix
most of the trivial ones.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 57 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8af1e66c3cf9..ec1690da7845 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -347,6 +347,8 @@
  *	four bytes for vendor frames including the OUI. The registration
  *	cannot be dropped, but is removed automatically when the netlink
  *	socket is closed. Multiple registrations can be made.
+ * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for
+ *	backward compatibility
  * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This
  *	command is used both as a request to transmit a management frame and
  *	as an event indicating reception of a frame that was not processed in
@@ -359,11 +361,14 @@
  *	operational channel). When called, this operation returns a cookie
  *	(%NL80211_ATTR_COOKIE) that will be included with the TX status event
  *	pertaining to the TX request.
+ * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
  * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
  *	transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
  *	the TX command and %NL80211_ATTR_FRAME includes the contents of the
  *	frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
  *	the frame.
+ * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for
+ *	backward compatibility.
  * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command
  *	is used to configure connection quality monitoring notification trigger
  *	levels.
@@ -1029,11 +1034,14 @@ enum nl80211_iftype {
  * Station flags. When a station is added to an AP interface, it is
  * assumed to be already associated (and hence authenticated.)
  *
+ * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved
  * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X)
  * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
  *	with short barker preamble
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
  * @NL80211_STA_FLAG_MFP: station uses management frame protection
+ * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
+ * @__NL80211_STA_FLAG_AFTER_LAST: internal use
  */
 enum nl80211_sta_flags {
 	__NL80211_STA_FLAG_INVALID,
@@ -1146,14 +1154,17 @@ enum nl80211_mpath_flags {
  * information about a mesh path.
  *
  * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved
- * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination
- * @NL80211_ATTR_MPATH_SN: destination sequence number
- * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path
- * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now
- * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in
+ * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination
+ * @NL80211_MPATH_INFO_SN: destination sequence number
+ * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path
+ * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now
+ * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in
  * 	&enum nl80211_mpath_flags;
- * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
- * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries
+ * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
+ * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries
+ * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number
+ *	currently defind
+ * @__NL80211_MPATH_INFO_AFTER_LAST: internal use
  */
 enum nl80211_mpath_info {
 	__NL80211_MPATH_INFO_INVALID,
@@ -1182,6 +1193,8 @@ enum nl80211_mpath_info {
  * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE
  * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n
  * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n
+ * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
+ * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_band_attr {
 	__NL80211_BAND_ATTR_INVALID,
@@ -1202,6 +1215,7 @@ enum nl80211_band_attr {
 
 /**
  * enum nl80211_frequency_attr - frequency attributes
+ * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved
  * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz
  * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current
  *	regulatory domain.
@@ -1213,6 +1227,9 @@ enum nl80211_band_attr {
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm
  *	(100 * dBm).
+ * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
+ *	currently defined
+ * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -1232,9 +1249,13 @@ enum nl80211_frequency_attr {
 
 /**
  * enum nl80211_bitrate_attr - bitrate attributes
+ * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved
  * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps
  * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported
  *	in 2.4 GHz band.
+ * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number
+ *	currently defined
+ * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_bitrate_attr {
 	__NL80211_BITRATE_ATTR_INVALID,
@@ -1290,6 +1311,7 @@ enum nl80211_reg_type {
 
 /**
  * enum nl80211_reg_rule_attr - regulatory rule attributes
+ * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved
  * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
  * 	considerations for a given frequency range. These are the
  * 	&enum nl80211_reg_rule_flags.
@@ -1306,6 +1328,9 @@ enum nl80211_reg_type {
  * 	If you don't have one then don't send this.
  * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for
  * 	a given frequency range. The value is in mBm (100 * dBm).
+ * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number
+ *	currently defined
+ * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_reg_rule_attr {
 	__NL80211_REG_RULE_ATTR_INVALID,
@@ -1357,6 +1382,9 @@ enum nl80211_reg_rule_flags {
  * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
+ * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
+ *	currently defined
+ * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
  */
 enum nl80211_survey_info {
 	__NL80211_SURVEY_INFO_INVALID,
@@ -1521,6 +1549,7 @@ enum nl80211_channel_type {
  * enum nl80211_bss - netlink attributes for a BSS
  *
  * @__NL80211_BSS_INVALID: invalid
+ * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets)
  * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
  * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
  * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
@@ -1564,6 +1593,12 @@ enum nl80211_bss {
 
 /**
  * enum nl80211_bss_status - BSS "status"
+ * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS.
+ * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS.
+ * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS.
+ *
+ * The BSS status is a BSS attribute in scan dumps, which
+ * indicates the status the interface has wrt. this BSS.
  */
 enum nl80211_bss_status {
 	NL80211_BSS_STATUS_AUTHENTICATED,
@@ -1674,8 +1709,8 @@ enum nl80211_tx_rate_attributes {
 
 /**
  * enum nl80211_band - Frequency band
- * @NL80211_BAND_2GHZ - 2.4 GHz ISM band
- * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz)
+ * @NL80211_BAND_2GHZ: 2.4 GHz ISM band
+ * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz)
  */
 enum nl80211_band {
 	NL80211_BAND_2GHZ,
@@ -1713,9 +1748,9 @@ enum nl80211_attr_cqm {
 
 /**
  * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event
- * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the
  *      configured threshold
- * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the
  *      configured threshold
  */
 enum nl80211_cqm_rssi_threshold_event {
-- 
cgit v1.2.3


From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 19 Aug 2010 16:11:27 +0200
Subject: cfg80211: add some documentation

Add some documentation for cfg80211. I'm hoping some of
the regulatory documentation will be filled by somebody
more familiar with it, hint hint! :)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/DocBook/80211.tmpl | 136 +++++++++++++++++++++++++++++++++++++--
 include/net/cfg80211.h           | 121 ++++++++++++++++++++++++++++++++--
 2 files changed, 246 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 52310ac892ea..b84c9282828f 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -65,18 +65,144 @@
     </abstract>
   </setinfo>
   <book id="cfg80211-developers-guide">
+!Ainclude/net/cfg80211.h
     <bookinfo>
       <title>The cfg80211 subsystem</title>
 
       <abstract>
-        <para>
-MISSING
-        </para>
+!Pinclude/net/cfg80211.h Introduction
       </abstract>
     </bookinfo>
-    <part><chapter>
+      <chapter>
+      <title>Device registration</title>
+!Pinclude/net/cfg80211.h Device registration
 !Finclude/net/cfg80211.h ieee80211_band
-    </chapter></part>
+!Finclude/net/cfg80211.h ieee80211_channel_flags
+!Finclude/net/cfg80211.h ieee80211_channel
+!Finclude/net/cfg80211.h ieee80211_rate_flags
+!Finclude/net/cfg80211.h ieee80211_rate
+!Finclude/net/cfg80211.h ieee80211_sta_ht_cap
+!Finclude/net/cfg80211.h ieee80211_supported_band
+!Finclude/net/cfg80211.h cfg80211_signal_type
+!Finclude/net/cfg80211.h wiphy_params_flags
+!Finclude/net/cfg80211.h wiphy_flags
+!Finclude/net/cfg80211.h wiphy
+!Finclude/net/cfg80211.h wireless_dev
+!Finclude/net/cfg80211.h wiphy_new
+!Finclude/net/cfg80211.h wiphy_register
+!Finclude/net/cfg80211.h wiphy_unregister
+!Finclude/net/cfg80211.h wiphy_free
+
+!Finclude/net/cfg80211.h wiphy_name
+!Finclude/net/cfg80211.h wiphy_dev
+!Finclude/net/cfg80211.h wiphy_priv
+!Finclude/net/cfg80211.h priv_to_wiphy
+!Finclude/net/cfg80211.h set_wiphy_dev
+!Finclude/net/cfg80211.h wdev_priv
+      </chapter>
+      <chapter>
+      <title>Actions and configuration</title>
+!Pinclude/net/cfg80211.h Actions and configuration
+!Finclude/net/cfg80211.h cfg80211_ops
+!Finclude/net/cfg80211.h vif_params
+!Finclude/net/cfg80211.h key_params
+!Finclude/net/cfg80211.h survey_info_flags
+!Finclude/net/cfg80211.h survey_info
+!Finclude/net/cfg80211.h beacon_parameters
+!Finclude/net/cfg80211.h plink_actions
+!Finclude/net/cfg80211.h station_parameters
+!Finclude/net/cfg80211.h station_info_flags
+!Finclude/net/cfg80211.h rate_info_flags
+!Finclude/net/cfg80211.h rate_info
+!Finclude/net/cfg80211.h station_info
+!Finclude/net/cfg80211.h monitor_flags
+!Finclude/net/cfg80211.h mpath_info_flags
+!Finclude/net/cfg80211.h mpath_info
+!Finclude/net/cfg80211.h bss_parameters
+!Finclude/net/cfg80211.h ieee80211_txq_params
+!Finclude/net/cfg80211.h cfg80211_crypto_settings
+!Finclude/net/cfg80211.h cfg80211_auth_request
+!Finclude/net/cfg80211.h cfg80211_assoc_request
+!Finclude/net/cfg80211.h cfg80211_deauth_request
+!Finclude/net/cfg80211.h cfg80211_disassoc_request
+!Finclude/net/cfg80211.h cfg80211_ibss_params
+!Finclude/net/cfg80211.h cfg80211_connect_params
+!Finclude/net/cfg80211.h cfg80211_pmksa
+!Finclude/net/cfg80211.h cfg80211_send_rx_auth
+!Finclude/net/cfg80211.h cfg80211_send_auth_timeout
+!Finclude/net/cfg80211.h __cfg80211_auth_canceled
+!Finclude/net/cfg80211.h cfg80211_send_rx_assoc
+!Finclude/net/cfg80211.h cfg80211_send_assoc_timeout
+!Finclude/net/cfg80211.h cfg80211_send_deauth
+!Finclude/net/cfg80211.h __cfg80211_send_deauth
+!Finclude/net/cfg80211.h cfg80211_send_disassoc
+!Finclude/net/cfg80211.h __cfg80211_send_disassoc
+!Finclude/net/cfg80211.h cfg80211_ibss_joined
+!Finclude/net/cfg80211.h cfg80211_connect_result
+!Finclude/net/cfg80211.h cfg80211_roamed
+!Finclude/net/cfg80211.h cfg80211_disconnected
+!Finclude/net/cfg80211.h cfg80211_ready_on_channel
+!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired
+!Finclude/net/cfg80211.h cfg80211_new_sta
+!Finclude/net/cfg80211.h cfg80211_rx_mgmt
+!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status
+!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify
+!Finclude/net/cfg80211.h cfg80211_michael_mic_failure
+      </chapter>
+      <chapter>
+      <title>Scanning and BSS list handling</title>
+!Pinclude/net/cfg80211.h Scanning and BSS list handling
+!Finclude/net/cfg80211.h cfg80211_ssid
+!Finclude/net/cfg80211.h cfg80211_scan_request
+!Finclude/net/cfg80211.h cfg80211_scan_done
+!Finclude/net/cfg80211.h cfg80211_bss
+!Finclude/net/cfg80211.h cfg80211_inform_bss_frame
+!Finclude/net/cfg80211.h cfg80211_inform_bss
+!Finclude/net/cfg80211.h cfg80211_unlink_bss
+!Finclude/net/cfg80211.h cfg80211_find_ie
+!Finclude/net/cfg80211.h ieee80211_bss_get_ie
+      </chapter>
+      <chapter>
+      <title>Utility functions</title>
+!Pinclude/net/cfg80211.h Utility functions
+!Finclude/net/cfg80211.h ieee80211_channel_to_frequency
+!Finclude/net/cfg80211.h ieee80211_frequency_to_channel
+!Finclude/net/cfg80211.h ieee80211_get_channel
+!Finclude/net/cfg80211.h ieee80211_get_response_rate
+!Finclude/net/cfg80211.h ieee80211_hdrlen
+!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb
+!Finclude/net/cfg80211.h ieee80211_radiotap_iterator
+      </chapter>
+      <chapter>
+      <title>Data path helpers</title>
+!Pinclude/net/cfg80211.h Data path helpers
+!Finclude/net/cfg80211.h ieee80211_data_to_8023
+!Finclude/net/cfg80211.h ieee80211_data_from_8023
+!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s
+!Finclude/net/cfg80211.h cfg80211_classify8021d
+      </chapter>
+      <chapter>
+      <title>Regulatory enforcement infrastructure</title>
+!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure
+!Finclude/net/cfg80211.h regulatory_hint
+!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory
+!Finclude/net/cfg80211.h freq_reg_info
+      </chapter>
+      <chapter>
+      <title>RFkill integration</title>
+!Pinclude/net/cfg80211.h RFkill integration
+!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state
+!Finclude/net/cfg80211.h wiphy_rfkill_start_polling
+!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling
+      </chapter>
+      <chapter>
+      <title>Test mode</title>
+!Pinclude/net/cfg80211.h Test mode
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_reply
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_event
+      </chapter>
   </book>
   <book id="mac80211-developers-guide">
     <bookinfo>
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6a98b1b3bfde..f2740537b5d6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -25,6 +25,43 @@
 #include <linux/wireless.h>
 
 
+/**
+ * DOC: Introduction
+ *
+ * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges
+ * userspace and drivers, and offers some utility functionality associated
+ * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used
+ * by all modern wireless drivers in Linux, so that they offer a consistent
+ * API through nl80211. For backward compatibility, cfg80211 also offers
+ * wireless extensions to userspace, but hides them from drivers completely.
+ *
+ * Additionally, cfg80211 contains code to help enforce regulatory spectrum
+ * use restrictions.
+ */
+
+
+/**
+ * DOC: Device registration
+ *
+ * In order for a driver to use cfg80211, it must register the hardware device
+ * with cfg80211. This happens through a number of hardware capability structs
+ * described below.
+ *
+ * The fundamental structure for each device is the 'wiphy', of which each
+ * instance describes a physical wireless device connected to the system. Each
+ * such wiphy can have zero, one, or many virtual interfaces associated with
+ * it, which need to be identified as such by pointing the network interface's
+ * @ieee80211_ptr pointer to a &struct wireless_dev which further describes
+ * the wireless part of the interface, normally this struct is embedded in the
+ * network interface's private data area. Drivers can optionally allow creating
+ * or destroying virtual interfaces on the fly, but without at least one or the
+ * ability to create some the wireless device isn't useful.
+ *
+ * Each wiphy structure contains device capability information, and also has
+ * a pointer to the various operations the driver offers. The definitions and
+ * structures here describe these capabilities in detail.
+ */
+
 /*
  * wireless hardware capability structures
  */
@@ -204,6 +241,21 @@ struct ieee80211_supported_band {
  * Wireless hardware/device configuration structures and methods
  */
 
+/**
+ * DOC: Actions and configuration
+ *
+ * Each wireless device and each virtual interface offer a set of configuration
+ * operations and other actions that are invoked by userspace. Each of these
+ * actions is described in the operations structure, and the parameters these
+ * operations use are described separately.
+ *
+ * Additionally, some operations are asynchronous and expect to get status
+ * information via some functions that drivers need to call.
+ *
+ * Scanning and BSS list handling with its associated functionality is described
+ * in a separate chapter.
+ */
+
 /**
  * struct vif_params - describes virtual interface parameters
  * @mesh_id: mesh ID to use
@@ -570,8 +622,28 @@ struct ieee80211_txq_params {
 /* from net/wireless.h */
 struct wiphy;
 
-/* from net/ieee80211.h */
-struct ieee80211_channel;
+/**
+ * DOC: Scanning and BSS list handling
+ *
+ * The scanning process itself is fairly simple, but cfg80211 offers quite
+ * a bit of helper functionality. To start a scan, the scan operation will
+ * be invoked with a scan definition. This scan definition contains the
+ * channels to scan, and the SSIDs to send probe requests for (including the
+ * wildcard, if desired). A passive scan is indicated by having no SSIDs to
+ * probe. Additionally, a scan request may contain extra information elements
+ * that should be added to the probe request. The IEs are guaranteed to be
+ * well-formed, and will not exceed the maximum length the driver advertised
+ * in the wiphy structure.
+ *
+ * When scanning finds a BSS, cfg80211 needs to be notified of that, because
+ * it is responsible for maintaining the BSS list; the driver should not
+ * maintain a list itself. For this notification, various functions exist.
+ *
+ * Since drivers do not maintain a BSS list, there are also a number of
+ * functions to search for a BSS and obtain information about it from the
+ * BSS structure cfg80211 maintains. The BSS list is also made available
+ * to userspace.
+ */
 
 /**
  * struct cfg80211_ssid - SSID description
@@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev)
 	return wiphy_priv(wdev->wiphy);
 }
 
-/*
- * Utility functions
+/**
+ * DOC: Utility functions
+ *
+ * cfg80211 offers a number of utility functions that can be useful.
  */
 
 /**
@@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
  */
 unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc);
 
+/**
+ * DOC: Data path helpers
+ *
+ * In addition to generic utilities, cfg80211 also offers
+ * functions that help implement the data path for devices
+ * that do not do the 802.11/802.3 conversion on the device.
+ */
+
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
  * @skb: the 802.11 data frame
@@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb);
  */
 const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len);
 
-/*
- * Regulatory helper functions for wiphys
+/**
+ * DOC: Regulatory enforcement infrastructure
+ *
+ * TODO
  */
 
 /**
@@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
  */
 void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp);
 
+/**
+ * DOC: RFkill integration
+ *
+ * RFkill integration in cfg80211 is almost invisible to drivers,
+ * as cfg80211 automatically registers an rfkill instance for each
+ * wireless device it knows about. Soft kill is also translated
+ * into disconnecting and turning all interfaces off, drivers are
+ * expected to turn off the device when all interfaces are down.
+ *
+ * However, devices may have a hard RFkill line, in which case they
+ * also need to interact with the rfkill subsystem, via cfg80211.
+ * They can do this with a few helper functions documented here.
+ */
+
 /**
  * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state
  * @wiphy: the wiphy
@@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy);
 void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
 
 #ifdef CONFIG_NL80211_TESTMODE
+/**
+ * DOC: Test mode
+ *
+ * Test mode is a set of utility functions to allow drivers to
+ * interact with driver-specific tools to aid, for instance,
+ * factory programming.
+ *
+ * This chapter describes how drivers interact with it, for more
+ * information see the nl80211 book's chapter on it.
+ */
+
 /**
  * cfg80211_testmode_alloc_reply_skb - allocate testmode reply
  * @wiphy: the wiphy
-- 
cgit v1.2.3


From 2b8fd9186d9275b07aef43e5bb4e98cd571f9a7d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 23 Aug 2010 23:55:59 +0200
Subject: ACPI/PCI: Do not preserve _OSC control bits returned by a query

There is the assumption in acpi_pci_osc_control_set() that it is
always sufficient to compare the mask of _OSC control bits to be
requested with the result of an _OSC query where all of the known
control bits have been checked.  However, in general, that need not
be the case.  For example, if an _OSC feature A depends on an _OSC
feature B and control of A, B plus another _OSC feature C is
requested simultaneously, the BIOS may return A, B, C, while it would
only return C if A and C were requested without B.

That may result in passing a wrong mask of _OSC control bits to an
_OSC control request, in which case the BIOS may only grant control
of a subset of the requested features.  Moreover, acpi_pci_run_osc()
will return error code if that happens and the caller of
acpi_pci_osc_control_set() will not know that it's been granted
control of some _OSC features.  Consequently, the system will
generally not work as expected.

Apart from this acpi_pci_osc_control_set() always uses the mask
of _OSC control bits returned by the very first invocation of
acpi_pci_query_osc(), but that is done with the second argument
equal to OSC_PCI_SEGMENT_GROUPS_SUPPORT which generally happens
to affect the returned _OSC control bits.

For these reasons, make acpi_pci_osc_control_set() always check if
control of the requested _OSC features will be granted before making
the final control request.  As a result, the osc_control_qry and
osc_queried members of struct acpi_pci_root are not necessary any
more, so drop them and remove the remaining code referring to them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c | 20 +++++++-------------
 include/acpi/acpi_bus.h |  3 ---
 2 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d2ae816df0f5..77cd19697b1e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -249,12 +249,8 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
 		root->osc_support_set = support;
-		if (control) {
+		if (control)
 			*control = result;
-		} else {
-			root->osc_control_qry = result;
-			root->osc_queried = 1;
-		}
 	}
 	return status;
 }
@@ -409,14 +405,12 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 		goto out;
 
 	/* Need to query controls first before requesting them */
-	if (!root->osc_queried) {
-		status = acpi_pci_query_osc(root, root->osc_support_set, NULL);
-		if (ACPI_FAILURE(status))
-			goto out;
-	}
-	if ((root->osc_control_qry & control_req) != control_req) {
-		printk(KERN_DEBUG
-		       "Firmware did not grant requested _OSC control\n");
+	flags = control_req;
+	status = acpi_pci_query_osc(root, root->osc_support_set, &flags);
+	if (ACPI_FAILURE(status))
+		goto out;
+
+	if (flags != control_req) {
 		status = AE_SUPPORT;
 		goto out;
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index baacd98e7cc6..4de84ce3a927 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -377,9 +377,6 @@ struct acpi_pci_root {
 
 	u32 osc_support_set;	/* _OSC state of support bits */
 	u32 osc_control_set;	/* _OSC state of control bits */
-	u32 osc_control_qry;	/* the latest _OSC query result */
-
-	u32 osc_queried:1;	/* has _OSC control been queried? */
 };
 
 /* helper */
-- 
cgit v1.2.3


From 75fb60f26befb59dbfa05cb122972642b7bdd219 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 23 Aug 2010 23:53:11 +0200
Subject: ACPI/PCI: Negotiate _OSC control bits before requesting them

It is possible that the BIOS will not grant control of all _OSC
features requested via acpi_pci_osc_control_set(), so it is
recommended to negotiate the final set of _OSC features with the
query flag set before calling _OSC to request control of these
features.

To implement it, rework acpi_pci_osc_control_set() so that the caller
can specify the mask of _OSC control bits to negotiate and the mask
of _OSC control bits that are absolutely necessary to it.  Then,
acpi_pci_osc_control_set() will run _OSC queries in a loop until
the mask of _OSC control bits returned by the BIOS is equal to the
mask passed to it.  Also, before running the _OSC request
acpi_pci_osc_control_set() will check if the caller's required
control bits are present in the final mask.

Using this mechanism we will be able to avoid situations in which the
BIOS doesn't grant control of certain _OSC features, because they
depend on some other _OSC features that have not been requested.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c              | 59 +++++++++++++++++++++++-------------
 drivers/pci/hotplug/acpi_pcihp.c     |  2 +-
 drivers/pci/pcie/aer/aerdrv_acpi.c   |  6 ++--
 drivers/pci/pcie/pme/pcie_pme_acpi.c |  8 +++--
 include/linux/acpi.h                 |  4 +--
 5 files changed, 49 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 77cd19697b1e..c34713112520 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -374,21 +374,32 @@ out:
 EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
 
 /**
- * acpi_pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
+ * acpi_pci_osc_control_set - Request control of PCI root _OSC features.
+ * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex).
+ * @mask: Mask of _OSC bits to request control of, place to store control mask.
+ * @req: Mask of _OSC bits the control of is essential to the caller.
  *
- * Attempt to take control from Firmware on requested control bits.
+ * Run _OSC query for @mask and if that is successful, compare the returned
+ * mask of control bits with @req.  If all of the @req bits are set in the
+ * returned mask, run _OSC request for it.
+ *
+ * The variable at the @mask address may be modified regardless of whether or
+ * not the function returns success.  On success it will contain the mask of
+ * _OSC bits the BIOS has granted control of, but its contents are meaningless
+ * on failure.
  **/
-acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
+acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
 {
+	struct acpi_pci_root *root;
 	acpi_status status;
-	u32 control_req, result, capbuf[3];
+	u32 ctrl, capbuf[3];
 	acpi_handle tmp;
-	struct acpi_pci_root *root;
 
-	control_req = (flags & OSC_PCI_CONTROL_MASKS);
-	if (!control_req)
+	if (!mask)
+		return AE_BAD_PARAMETER;
+
+	ctrl = *mask & OSC_PCI_CONTROL_MASKS;
+	if ((ctrl & req) != req)
 		return AE_TYPE;
 
 	root = acpi_pci_find_root(handle);
@@ -400,27 +411,33 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 		return status;
 
 	mutex_lock(&osc_lock);
+
+	*mask = ctrl | root->osc_control_set;
 	/* No need to evaluate _OSC if the control was already granted. */
-	if ((root->osc_control_set & control_req) == control_req)
+	if ((root->osc_control_set & ctrl) == ctrl)
 		goto out;
 
-	/* Need to query controls first before requesting them */
-	flags = control_req;
-	status = acpi_pci_query_osc(root, root->osc_support_set, &flags);
-	if (ACPI_FAILURE(status))
-		goto out;
+	/* Need to check the available controls bits before requesting them. */
+	while (*mask) {
+		status = acpi_pci_query_osc(root, root->osc_support_set, mask);
+		if (ACPI_FAILURE(status))
+			goto out;
+		if (ctrl == *mask)
+			break;
+		ctrl = *mask;
+	}
 
-	if (flags != control_req) {
+	if ((ctrl & req) != req) {
 		status = AE_SUPPORT;
 		goto out;
 	}
 
 	capbuf[OSC_QUERY_TYPE] = 0;
 	capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
-	capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req;
-	status = acpi_pci_run_osc(handle, capbuf, &result);
+	capbuf[OSC_CONTROL_TYPE] = ctrl;
+	status = acpi_pci_run_osc(handle, capbuf, mask);
 	if (ACPI_SUCCESS(status))
-		root->osc_control_set = result;
+		root->osc_control_set = *mask;
 out:
 	mutex_unlock(&osc_lock);
 	return status;
@@ -551,8 +568,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (flags != base_flags)
 		acpi_pci_osc_support(root, flags);
 
-	status = acpi_pci_osc_control_set(root->device->handle,
-					  OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+	status = acpi_pci_osc_control_set(root->device->handle, &flags, flags);
 
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n");
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 45fcc1e96df9..3d93d529a7bd 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -360,7 +360,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
 		dbg("Trying to get hotplug control for %s\n",
 				(char *)string.pointer);
-		status = acpi_pci_osc_control_set(handle, flags);
+		status = acpi_pci_osc_control_set(handle, &flags, flags);
 		if (ACPI_SUCCESS(status))
 			goto got_one;
 		if (status == AE_SUPPORT)
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index f278d7b0d95d..3a276a0cea93 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -39,9 +39,9 @@ int aer_osc_setup(struct pcie_device *pciedev)
 
 	handle = acpi_find_root_bridge_handle(pdev);
 	if (handle) {
-		status = acpi_pci_osc_control_set(handle,
-					OSC_PCI_EXPRESS_AER_CONTROL |
-					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+		u32 flags = OSC_PCI_EXPRESS_AER_CONTROL |
+				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+		status = acpi_pci_osc_control_set(handle, &flags, flags);
 	}
 
 	if (ACPI_FAILURE(status)) {
diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c
index 83ab2287ae3f..be20222b12d3 100644
--- a/drivers/pci/pcie/pme/pcie_pme_acpi.c
+++ b/drivers/pci/pcie/pme/pcie_pme_acpi.c
@@ -28,6 +28,7 @@ int pcie_pme_acpi_setup(struct pcie_device *srv)
 	acpi_status status = AE_NOT_FOUND;
 	struct pci_dev *port = srv->port;
 	acpi_handle handle;
+	u32 flags;
 	int error = 0;
 
 	if (acpi_pci_disabled)
@@ -39,9 +40,10 @@ int pcie_pme_acpi_setup(struct pcie_device *srv)
 	if (!handle)
 		return -EINVAL;
 
-	status = acpi_pci_osc_control_set(handle,
-			OSC_PCI_EXPRESS_PME_CONTROL |
-			OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	flags = OSC_PCI_EXPRESS_PME_CONTROL |
+		OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL;
+
+	status = acpi_pci_osc_control_set(handle, &flags, flags);
 	if (ACPI_FAILURE(status)) {
 		dev_info(&port->dev,
 			"Failed to receive control of PCIe PME service: %s\n",
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ccf94dc5acdf..c227757feb06 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
-
-extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
+extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
+					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
 #else	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From 5a46790ca4c40fdb6ed5336d7d6b593c96326b31 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Aug 2010 14:46:53 -0700
Subject: include/linux/if_ether.h: Remove unused #define MAC_FMT

Last use was removed, so remove the #define.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index bed7a4682b90..f9c3df03db0f 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -137,8 +137,6 @@ extern struct ctl_table ether_table[];
 
 extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 
-#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
-
 #endif
 
 #endif	/* _LINUX_IF_ETHER_H */
-- 
cgit v1.2.3


From c2e3143e3c46ede22336316b3ff4746727c0d93a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 24 Aug 2010 14:48:10 -0700
Subject: tc: add meta match on receive hash

Trivial extension to existing meta data match rules to allow
matching on skb receive hash value.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tc_ematch/tc_em_meta.h | 1 +
 net/sched/em_meta.c                  | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index 0864206ec1a3..7138962664f8 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -79,6 +79,7 @@ enum {
  	TCF_META_ID_SK_SENDMSG_OFF,
  	TCF_META_ID_SK_WRITE_PENDING,
 	TCF_META_ID_VLAN_TAG,
+	TCF_META_ID_RXHASH,
 	__TCF_META_ID_MAX
 };
 #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..34da5e29ea1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
 	dst->value = skb->mac_len;
 }
 
+META_COLLECTOR(int_rxhash)
+{
+	dst->value = skb_get_rxhash(skb);
+}
+
 /**************************************************************************
  * Netfilter
  **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 		[META_ID(SK_SENDMSG_OFF)]	= META_FUNC(int_sk_sendmsg_off),
 		[META_ID(SK_WRITE_PENDING)]	= META_FUNC(int_sk_write_pend),
 		[META_ID(VLAN_TAG)]		= META_FUNC(int_vlan_tag),
+		[META_ID(RXHASH)]		= META_FUNC(int_rxhash),
 	}
 };
 
-- 
cgit v1.2.3


From e7c1c2c46201e46f8ce817196507d2ffd3dafd8e Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Tue, 24 Aug 2010 03:46:18 +0000
Subject: mlx4_en: Added self diagnostics test implementation

The selftest includes 5 features:
1. Interrupt test: Executing commands and receiving command completion
   on all our interrupt vectors.
2. Link test: Verifying we are connected to valid link partner.
3. Speed test: Check that we negotiated link speed correctly.
4. Registers test: Activate HW health check command.
5. Loopback test: Send a packet on loopback interface and catch it on RX side.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mlx4/Makefile      |   2 +-
 drivers/net/mlx4/en_ethtool.c  |  79 ++++++++++++------
 drivers/net/mlx4/en_netdev.c   |   2 +-
 drivers/net/mlx4/en_port.c     |  32 ++++++++
 drivers/net/mlx4/en_port.h     |  14 ++++
 drivers/net/mlx4/en_rx.c       |  20 +++++
 drivers/net/mlx4/en_selftest.c | 179 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx4/en_tx.c       |  16 ++++
 drivers/net/mlx4/eq.c          |  44 ++++++++++
 drivers/net/mlx4/fw.c          |   3 +
 drivers/net/mlx4/fw.h          |   1 +
 drivers/net/mlx4/main.c        |   1 +
 drivers/net/mlx4/mlx4_en.h     |  19 +++++
 include/linux/mlx4/cmd.h       |   1 +
 include/linux/mlx4/device.h    |   2 +
 15 files changed, 388 insertions(+), 27 deletions(-)
 create mode 100644 drivers/net/mlx4/en_selftest.c

(limited to 'include')

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 1fd068e1d930..d1aa45a15854 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -6,4 +6,4 @@ mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
 mlx4_en-y := 	en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
-		en_resources.o en_netdev.o
+		en_resources.o en_netdev.o en_selftest.o
diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c
index 398d54136967..f7d72d7a8704 100644
--- a/drivers/net/mlx4/en_ethtool.c
+++ b/drivers/net/mlx4/en_ethtool.c
@@ -125,6 +125,14 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 #define NUM_MAIN_STATS	21
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS)
 
+static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
+	"Interupt Test",
+	"Link Test",
+	"Speed Test",
+	"Register Test",
+	"Loopback Test",
+};
+
 static u32 mlx4_en_get_msglevel(struct net_device *dev)
 {
 	return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable;
@@ -146,10 +154,15 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
-	if (sset != ETH_SS_STATS)
+	switch (sset) {
+	case ETH_SS_STATS:
+		return NUM_ALL_STATS +
+			(priv->tx_ring_num + priv->rx_ring_num) * 2;
+	case ETH_SS_TEST:
+		return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.loopback_support) * 2;
+	default:
 		return -EOPNOTSUPP;
-
-	return NUM_ALL_STATS + (priv->tx_ring_num + priv->rx_ring_num) * 2;
+	}
 }
 
 static void mlx4_en_get_ethtool_stats(struct net_device *dev,
@@ -181,6 +194,12 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 
 }
 
+static void mlx4_en_self_test(struct net_device *dev,
+			      struct ethtool_test *etest, u64 *buf)
+{
+	mlx4_en_ex_selftest(dev, &etest->flags, buf);
+}
+
 static void mlx4_en_get_strings(struct net_device *dev,
 				uint32_t stringset, uint8_t *data)
 {
@@ -188,30 +207,39 @@ static void mlx4_en_get_strings(struct net_device *dev,
 	int index = 0;
 	int i;
 
-	if (stringset != ETH_SS_STATS)
-		return;
-
-	/* Add main counters */
-	for (i = 0; i < NUM_MAIN_STATS; i++)
-		strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
-	for (i = 0; i < NUM_PORT_STATS; i++)
-		strcpy(data + (index++) * ETH_GSTRING_LEN,
+	switch (stringset) {
+	case ETH_SS_TEST:
+		for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++)
+			strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+		if (priv->mdev->dev->caps.loopback_support)
+			for (; i < MLX4_EN_NUM_SELF_TEST; i++)
+				strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+		break;
+
+	case ETH_SS_STATS:
+		/* Add main counters */
+		for (i = 0; i < NUM_MAIN_STATS; i++)
+			strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
+		for (i = 0; i< NUM_PORT_STATS; i++)
+			strcpy(data + (index++) * ETH_GSTRING_LEN,
 			main_strings[i + NUM_MAIN_STATS]);
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		sprintf(data + (index++) * ETH_GSTRING_LEN,
-			"tx%d_packets", i);
-		sprintf(data + (index++) * ETH_GSTRING_LEN,
-			"tx%d_bytes", i);
-	}
-	for (i = 0; i < priv->rx_ring_num; i++) {
-		sprintf(data + (index++) * ETH_GSTRING_LEN,
-			"rx%d_packets", i);
-		sprintf(data + (index++) * ETH_GSTRING_LEN,
-			"rx%d_bytes", i);
-	}
-	for (i = 0; i < NUM_PKT_STATS; i++)
-		strcpy(data + (index++) * ETH_GSTRING_LEN,
+		for (i = 0; i < priv->tx_ring_num; i++) {
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"tx%d_packets", i);
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"tx%d_bytes", i);
+		}
+		for (i = 0; i < priv->rx_ring_num; i++) {
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"rx%d_packets", i);
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"rx%d_bytes", i);
+		}
+		for (i = 0; i< NUM_PKT_STATS; i++)
+			strcpy(data + (index++) * ETH_GSTRING_LEN,
 			main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]);
+		break;
+	}
 }
 
 static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
@@ -439,6 +467,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_strings = mlx4_en_get_strings,
 	.get_sset_count = mlx4_en_get_sset_count,
 	.get_ethtool_stats = mlx4_en_get_ethtool_stats,
+	.self_test = mlx4_en_self_test,
 	.get_wol = mlx4_en_get_wol,
 	.get_msglevel = mlx4_en_get_msglevel,
 	.set_msglevel = mlx4_en_set_msglevel,
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 34cfa3cfbdae..968e75b7acf4 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -109,7 +109,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	mutex_unlock(&mdev->state_lock);
 }
 
-static u64 mlx4_en_mac_to_u64(u8 *addr)
+u64 mlx4_en_mac_to_u64(u8 *addr)
 {
 	u64 mac = 0;
 	int i;
diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
index a29abe845d2e..aa3ef2aee5bf 100644
--- a/drivers/net/mlx4/en_port.c
+++ b/drivers/net/mlx4/en_port.c
@@ -142,6 +142,38 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 	return err;
 }
 
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
+{
+	struct mlx4_en_query_port_context *qport_context;
+	struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]);
+	struct mlx4_en_port_state *state = &priv->port_state;
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	memset(mailbox->buf, 0, sizeof(*qport_context));
+	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
+			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+	if (err)
+		goto out;
+	qport_context = mailbox->buf;
+
+	/* This command is always accessed from Ethtool context
+	 * already synchronized, no need in locking */
+	state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK);
+	if ((qport_context->link_speed & MLX4_EN_SPEED_MASK) ==
+	    MLX4_EN_1G_SPEED)
+		state->link_speed = 1000;
+	else
+		state->link_speed = 10000;
+	state->transciver = qport_context->transceiver;
+
+out:
+	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+	return err;
+}
 
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h
index e6477f12beb5..f6511aa2b7df 100644
--- a/drivers/net/mlx4/en_port.h
+++ b/drivers/net/mlx4/en_port.h
@@ -84,6 +84,20 @@ enum {
 	MLX4_MCAST_ENABLE       = 2,
 };
 
+struct mlx4_en_query_port_context {
+	u8 link_up;
+#define MLX4_EN_LINK_UP_MASK	0x80
+	u8 reserved;
+	__be16 mtu;
+	u8 reserved2;
+	u8 link_speed;
+#define MLX4_EN_SPEED_MASK	0x3
+#define MLX4_EN_1G_SPEED	0x2
+	u16 reserved3[5];
+	__be64 mac;
+	u8 transceiver;
+};
+
 
 struct mlx4_en_stat_out_mbox {
 	/* Received frames with a length of 64 octets */
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index 7a5123c4a366..f421a3d42723 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -541,6 +541,21 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
 	return skb;
 }
 
+static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
+{
+	int i;
+	int offset = ETH_HLEN;
+
+	for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
+		if (*(skb->data + offset) != (unsigned char) (i & 0xff))
+			goto out_loopback;
+	}
+	/* Loopback found */
+	priv->loopback_ok = 1;
+
+out_loopback:
+	dev_kfree_skb_any(skb);
+}
 
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
@@ -655,6 +670,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			goto next;
 		}
 
+                if (unlikely(priv->validate_loopback)) {
+			validate_loopback(priv, skb);
+			goto next;
+		}
+
 		skb->ip_summed = ip_summed;
 		skb->protocol = eth_type_trans(skb, dev);
 		skb_record_rx_queue(skb, cq->ring);
diff --git a/drivers/net/mlx4/en_selftest.c b/drivers/net/mlx4/en_selftest.c
new file mode 100644
index 000000000000..43357d35616a
--- /dev/null
+++ b/drivers/net/mlx4/en_selftest.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/mlx4/driver.h>
+
+#include "mlx4_en.h"
+
+
+static int mlx4_en_test_registers(struct mlx4_en_priv *priv)
+{
+	return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK,
+			MLX4_CMD_TIME_CLASS_A);
+}
+
+static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
+{
+	struct sk_buff *skb;
+	struct ethhdr *ethh;
+	unsigned char *packet;
+	unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD;
+	unsigned int i;
+	int err;
+
+
+	/* build the pkt before xmit */
+	skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN);
+	if (!skb) {
+		en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n");
+		return -ENOMEM;
+	}
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr));
+	packet	= (unsigned char *)skb_put(skb, packet_size);
+	memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN);
+	memset(ethh->h_source, 0, ETH_ALEN);
+	ethh->h_proto = htons(ETH_P_ARP);
+	skb_set_mac_header(skb, 0);
+	for (i = 0; i < packet_size; ++i)	/* fill our packet */
+		packet[i] = (unsigned char)(i & 0xff);
+
+	/* xmit the pkt */
+	err = mlx4_en_xmit(skb, priv->dev);
+	return err;
+}
+
+static int mlx4_en_test_loopback(struct mlx4_en_priv *priv)
+{
+	u32 loopback_ok = 0;
+	int i;
+
+
+        priv->loopback_ok = 0;
+	priv->validate_loopback = 1;
+
+	/* xmit */
+	if (mlx4_en_test_loopback_xmit(priv)) {
+		en_err(priv, "Transmitting loopback packet failed\n");
+		goto mlx4_en_test_loopback_exit;
+	}
+
+	/* polling for result */
+	for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) {
+		msleep(MLX4_EN_LOOPBACK_TIMEOUT);
+		if (priv->loopback_ok) {
+			loopback_ok = 1;
+			break;
+		}
+	}
+	if (!loopback_ok)
+		en_err(priv, "Loopback packet didn't arrive\n");
+
+mlx4_en_test_loopback_exit:
+
+	priv->validate_loopback = 0;
+	return (!loopback_ok);
+}
+
+
+static int mlx4_en_test_link(struct mlx4_en_priv *priv)
+{
+	if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+		return -ENOMEM;
+	if (priv->port_state.link_state == 1)
+		return 0;
+	else
+		return 1;
+}
+
+static int mlx4_en_test_speed(struct mlx4_en_priv *priv)
+{
+
+	if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+		return -ENOMEM;
+
+	/* The device currently only supports 10G speed */
+	if (priv->port_state.link_speed != SPEED_10000)
+		return priv->port_state.link_speed;
+	return 0;
+}
+
+
+void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_tx_ring *tx_ring;
+	int i, carrier_ok;
+
+	memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST);
+
+	if (*flags & ETH_TEST_FL_OFFLINE) {
+		/* disable the interface */
+		carrier_ok = netif_carrier_ok(dev);
+
+		netif_carrier_off(dev);
+retry_tx:
+		/* Wait untill all tx queues are empty.
+		 * there should not be any additional incoming traffic
+		 * since we turned the carrier off */
+		msleep(200);
+		for (i = 0; i < priv->tx_ring_num && carrier_ok; i++) {
+			tx_ring = &priv->tx_ring[i];
+			if (tx_ring->prod != (tx_ring->cons + tx_ring->last_nr_txbb))
+				goto retry_tx;
+		}
+
+		if (priv->mdev->dev->caps.loopback_support){
+			buf[3] = mlx4_en_test_registers(priv);
+			buf[4] = mlx4_en_test_loopback(priv);
+		}
+
+		if (carrier_ok)
+			netif_carrier_on(dev);
+
+	}
+	buf[0] = mlx4_test_interrupts(mdev->dev);
+	buf[1] = mlx4_en_test_link(priv);
+	buf[2] = mlx4_en_test_speed(priv);
+
+	for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) {
+		if (buf[i])
+			*flags |= ETH_TEST_FL_FAILED;
+	}
+}
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index 3deabd1d0357..b875f9c38848 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -600,6 +600,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mlx4_wqe_data_seg *data;
 	struct skb_frag_struct *frag;
 	struct mlx4_en_tx_info *tx_info;
+	struct ethhdr *ethh;
+	u64 mac;
+	u32 mac_l, mac_h;
 	int tx_ind = 0;
 	int nr_txbb;
 	int desc_size;
@@ -679,6 +682,19 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->port_stats.tx_chksum_offload++;
 	}
 
+	if (unlikely(priv->validate_loopback)) {
+		/* Copy dst mac address to wqe */
+		skb_reset_mac_header(skb);
+		ethh = eth_hdr(skb);
+		if (ethh && ethh->h_dest) {
+			mac = mlx4_en_mac_to_u64(ethh->h_dest);
+			mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16);
+			mac_l = (u32) (mac & 0xffffffff);
+			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h);
+			tx_desc->ctrl.imm = cpu_to_be32(mac_l);
+		}
+	}
+
 	/* Handle LSO (TSO) packets */
 	if (lso_header_size) {
 		/* Mark opcode as LSO */
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 6d7b2bf210ce..552d0fce6f67 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -699,3 +699,47 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
 
 	kfree(priv->eq_table.uar_map);
 }
+
+/* A test that verifies that we can accept interrupts on all
+ * the irq vectors of the device.
+ * Interrupts are checked using the NOP command.
+ */
+int mlx4_test_interrupts(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int i;
+	int err;
+
+	err = mlx4_NOP(dev);
+	/* When not in MSI_X, there is only one irq to check */
+	if (!(dev->flags & MLX4_FLAG_MSI_X))
+		return err;
+
+	/* A loop over all completion vectors, for each vector we will check
+	 * whether it works by mapping command completions to that vector
+	 * and performing a NOP command
+	 */
+	for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) {
+		/* Temporary use polling for command completions */
+		mlx4_cmd_use_polling(dev);
+
+		/* Map the new eq to handle all asyncronous events */
+		err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+				  priv->eq_table.eq[i].eqn);
+		if (err) {
+			mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
+			mlx4_cmd_use_events(dev);
+			break;
+		}
+
+		/* Go back to using events */
+		mlx4_cmd_use_events(dev);
+		err = mlx4_NOP(dev);
+	}
+
+	/* Return to default */
+	mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_test_interrupts);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 04f42ae1eda0..a87bf3c97055 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -178,6 +178,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_GID_OFFSET		0x3b
 #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET	0x3c
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
+#define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET	0x43
 #define QUERY_DEV_CAP_FLAGS_OFFSET		0x44
 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET		0x48
 #define QUERY_DEV_CAP_UAR_SZ_OFFSET		0x49
@@ -268,6 +269,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->max_msg_sz = 1 << (field & 0x1f);
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	dev_cap->stat_rate_support = stat_rate;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET);
+	dev_cap->loopback_support = field & 0x1;
 	MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
 	dev_cap->reserved_uars = field >> 4;
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 526d7f30c041..2cc1ba5452e3 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -74,6 +74,7 @@ struct mlx4_dev_cap {
 	u64 def_mac[MLX4_MAX_PORTS + 1];
 	u16 eth_mtu[MLX4_MAX_PORTS + 1];
 	u16 stat_rate_support;
+	int loopback_support;
 	u32 flags;
 	int reserved_uars;
 	int uar_size;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 5102ab1ac561..7390cdb19414 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -221,6 +221,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
+	dev->caps.loopback_support   = dev_cap->loopback_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
 	dev->caps.log_num_macs  = log_num_mac;
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index edf6523702c0..a09598b2b12e 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -45,6 +45,7 @@
 #include <linux/mlx4/cq.h>
 #include <linux/mlx4/srq.h>
 #include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
 
 #include "en_port.h"
 
@@ -139,10 +140,14 @@ enum {
 
 #define SMALL_PACKET_SIZE      (256 - NET_IP_ALIGN)
 #define HEADER_COPY_SIZE       (128 - NET_IP_ALIGN)
+#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
 
 #define MLX4_EN_MIN_MTU		46
 #define ETH_BCAST		0xffffffffffffULL
 
+#define MLX4_EN_LOOPBACK_RETRIES	5
+#define MLX4_EN_LOOPBACK_TIMEOUT	100
+
 #ifdef MLX4_EN_PERF_STAT
 /* Number of samples to 'average' */
 #define AVG_SIZE			128
@@ -356,6 +361,12 @@ struct mlx4_en_rss_context {
 	__be32 rss_key[10];
 };
 
+struct mlx4_en_port_state {
+	int link_state;
+	int link_speed;
+	int transciver;
+};
+
 struct mlx4_en_pkt_stats {
 	unsigned long broadcast;
 	unsigned long rx_prio[8];
@@ -404,6 +415,7 @@ struct mlx4_en_priv {
 	struct vlan_group *vlgrp;
 	struct net_device_stats stats;
 	struct net_device_stats ret_stats;
+	struct mlx4_en_port_state port_state;
 	spinlock_t stats_lock;
 
 	unsigned long last_moder_packets;
@@ -422,6 +434,8 @@ struct mlx4_en_priv {
 	u16 sample_interval;
 	u16 adaptive_rx_coal;
 	u32 msg_enable;
+	u32 loopback_ok;
+	u32 validate_loopback;
 
 	struct mlx4_hwq_resources res;
 	int link_state;
@@ -530,6 +544,11 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
 
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
+
+#define MLX4_EN_NUM_SELF_TEST	5
+void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf);
+u64 mlx4_en_mac_to_u64(u8 *addr);
 
 /*
  * Globals
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 0f82293a82ed..78a1b9671752 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -56,6 +56,7 @@ enum {
 	MLX4_CMD_QUERY_HCA	 = 0xb,
 	MLX4_CMD_QUERY_PORT	 = 0x43,
 	MLX4_CMD_SENSE_PORT	 = 0x4d,
+	MLX4_CMD_HW_HEALTH_CHECK = 0x50,
 	MLX4_CMD_SET_PORT	 = 0xc,
 	MLX4_CMD_ACCESS_DDR	 = 0x2e,
 	MLX4_CMD_MAP_ICM	 = 0xffa,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7a7f9c1e679a..2cec58722738 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -229,6 +229,7 @@ struct mlx4_caps {
 	u32			bmme_flags;
 	u32			reserved_lkey;
 	u16			stat_rate_support;
+	int			loopback_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
 	int                     reserved_qps_cnt[MLX4_NUM_QP_REGION];
@@ -480,5 +481,6 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
 		    u32 *lkey, u32 *rkey);
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
+int mlx4_test_interrupts(struct mlx4_dev *dev);
 
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From 7699517db435fd24143bd32dd644275e3eeb4c86 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Tue, 24 Aug 2010 03:46:23 +0000
Subject: mlx4_en: Fixing report in Ethtool get_settings

The report now based on query from FW, giving the correct tranciever type
and link speed.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mlx4/en_ethtool.c | 27 +++++++++++++++++++++++++--
 drivers/net/mlx4/fw.c         |  9 +++++++++
 drivers/net/mlx4/fw.h         |  4 ++++
 drivers/net/mlx4/main.c       |  4 ++++
 include/linux/mlx4/device.h   |  4 ++++
 5 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c
index f7d72d7a8704..fa2f2f43ab48 100644
--- a/drivers/net/mlx4/en_ethtool.c
+++ b/drivers/net/mlx4/en_ethtool.c
@@ -244,16 +244,39 @@ static void mlx4_en_get_strings(struct net_device *dev,
 
 static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int trans_type;
+
 	cmd->autoneg = AUTONEG_DISABLE;
 	cmd->supported = SUPPORTED_10000baseT_Full;
-	cmd->advertising = ADVERTISED_1000baseT_Full;
+	cmd->advertising = ADVERTISED_10000baseT_Full;
+
+	if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+		return -ENOMEM;
+
+	trans_type = priv->port_state.transciver;
 	if (netif_carrier_ok(dev)) {
-		cmd->speed = SPEED_10000;
+		cmd->speed = priv->port_state.link_speed;
 		cmd->duplex = DUPLEX_FULL;
 	} else {
 		cmd->speed = -1;
 		cmd->duplex = -1;
 	}
+
+	if (trans_type > 0 && trans_type <= 0xC) {
+		cmd->port = PORT_FIBRE;
+		cmd->transceiver = XCVR_EXTERNAL;
+		cmd->supported |= SUPPORTED_FIBRE;
+		cmd->advertising |= ADVERTISED_FIBRE;
+	} else if (trans_type == 0x80 || trans_type == 0) {
+		cmd->port = PORT_TP;
+		cmd->transceiver = XCVR_INTERNAL;
+		cmd->supported |= SUPPORTED_TP;
+		cmd->advertising |= ADVERTISED_TP;
+	} else  {
+		cmd->port = -1;
+		cmd->transceiver = -1;
+	}
 	return 0;
 }
 
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index a87bf3c97055..515c6348f32b 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -141,6 +141,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	struct mlx4_cmd_mailbox *mailbox;
 	u32 *outbox;
 	u8 field;
+	u32 field32;
 	u16 size;
 	u16 stat_rate;
 	int err;
@@ -368,6 +369,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_PORT_MAX_MACVLAN_OFFSET		0x0a
 #define QUERY_PORT_MAX_VL_OFFSET		0x0b
 #define QUERY_PORT_MAC_OFFSET			0x10
+#define QUERY_PORT_TRANS_VENDOR_OFFSET		0x18
+#define QUERY_PORT_WAVELENGTH_OFFSET		0x1c
+#define QUERY_PORT_TRANS_CODE_OFFSET		0x20
 
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
 			err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT,
@@ -391,6 +395,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev_cap->log_max_vlans[i] = field >> 4;
 			MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET);
 			MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET);
+			MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET);
+			dev_cap->trans_type[i] = field32 >> 24;
+			dev_cap->vendor_oui[i] = field32 & 0xffffff;
+			MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET);
+			MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET);
 		}
 	}
 
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 2cc1ba5452e3..443e456c9dab 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -73,6 +73,10 @@ struct mlx4_dev_cap {
 	int max_pkeys[MLX4_MAX_PORTS + 1];
 	u64 def_mac[MLX4_MAX_PORTS + 1];
 	u16 eth_mtu[MLX4_MAX_PORTS + 1];
+	int trans_type[MLX4_MAX_PORTS + 1];
+	int vendor_oui[MLX4_MAX_PORTS + 1];
+	u16 wavelength[MLX4_MAX_PORTS + 1];
+	u64 trans_code[MLX4_MAX_PORTS + 1];
 	u16 stat_rate_support;
 	int loopback_support;
 	u32 flags;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 7390cdb19414..f4791fa2472f 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -184,6 +184,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
+		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
+		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
+		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
 	}
 
 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 2cec58722738..2a36a344fb3d 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -186,6 +186,10 @@ struct mlx4_caps {
 	int			eth_mtu_cap[MLX4_MAX_PORTS + 1];
 	int			gid_table_len[MLX4_MAX_PORTS + 1];
 	int			pkey_table_len[MLX4_MAX_PORTS + 1];
+	int			trans_type[MLX4_MAX_PORTS + 1];
+	int			vendor_oui[MLX4_MAX_PORTS + 1];
+	int			wavelength[MLX4_MAX_PORTS + 1];
+	u64			trans_code[MLX4_MAX_PORTS + 1];
 	int			local_ca_ack_delay;
 	int			num_uars;
 	int			bf_reg_size;
-- 
cgit v1.2.3


From 0533943c5c45cce2e26432bf0a6b8e114757c897 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Tue, 24 Aug 2010 03:46:42 +0000
Subject: mlx4_en: UDP RSS support

Adding capability for RSS for UDP traffic, hashing is done based on
IP addresses and UDP port number.
The support depends on HW/FW capabilities.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mlx4/en_main.c  | 19 ++++++++++++-------
 drivers/net/mlx4/en_rx.c    |  8 ++++----
 drivers/net/mlx4/fw.c       |  3 +++
 drivers/net/mlx4/fw.h       |  1 +
 drivers/net/mlx4/main.c     |  1 +
 drivers/net/mlx4/mlx4_en.h  |  3 +++
 include/linux/mlx4/device.h |  1 +
 7 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index cacac4e966ee..143906417048 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -63,11 +63,12 @@ static const char mlx4_en_version[] =
  */
 
 
-/* Use a XOR rathern than Toeplitz hash function for RSS */
-MLX4_EN_PARM_INT(rss_xor, 0, "Use XOR hash function for RSS");
-
-/* RSS hash type mask - default to <saddr, daddr, sport, dport> */
-MLX4_EN_PARM_INT(rss_mask, 0xf, "RSS hash type bitmask");
+/* Enable RSS TCP traffic */
+MLX4_EN_PARM_INT(tcp_rss, 1,
+		 "Enable RSS for incomming TCP traffic or disabled (0)");
+/* Enable RSS UDP traffic */
+MLX4_EN_PARM_INT(udp_rss, 1,
+		 "Enable RSS for incomming UDP traffic or disabled (0)");
 
 /* Priority pausing */
 MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]."
@@ -103,8 +104,12 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 	struct mlx4_en_profile *params = &mdev->profile;
 	int i;
 
-	params->rss_xor = (rss_xor != 0);
-	params->rss_mask = rss_mask & 0x1f;
+	params->tcp_rss = tcp_rss;
+	params->udp_rss = udp_rss;
+	if (params->udp_rss && !mdev->dev->caps.udp_rss) {
+		mlx4_warn(mdev, "UDP RSS is not supported on this device.\n");
+		params->udp_rss = 0;
+	}
 	for (i = 1; i <= MLX4_MAX_PORTS; i++) {
 		params->prof[i].rx_pause = 1;
 		params->prof[i].rx_ppp = pfcrx;
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index f421a3d42723..e2126c76d1dc 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -859,8 +859,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	struct mlx4_qp_context context;
 	struct mlx4_en_rss_context *rss_context;
 	void *ptr;
-	int rss_xor = mdev->profile.rss_xor;
-	u8 rss_mask = mdev->profile.rss_mask;
+	u8 rss_mask = 0x3f;
 	int i, qpn;
 	int err = 0;
 	int good_qps = 0;
@@ -906,9 +905,10 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 |
 					    (rss_map->base_qpn));
 	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
-	rss_context->hash_fn = rss_xor & 0x3;
-	rss_context->flags = rss_mask << 2;
+	rss_context->flags = rss_mask;
 
+	if (priv->mdev->profile.udp_rss)
+		rss_context->base_qpn_udp = rss_context->default_qpn;
 	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
 			       &rss_map->indir_qp, &rss_map->indir_state);
 	if (err)
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 515c6348f32b..b716e1a1b298 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -179,6 +179,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_GID_OFFSET		0x3b
 #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET	0x3c
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
+#define QUERY_DEV_CAP_UDP_RSS_OFFSET		0x42
 #define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET	0x43
 #define QUERY_DEV_CAP_FLAGS_OFFSET		0x44
 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET		0x48
@@ -270,6 +271,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->max_msg_sz = 1 << (field & 0x1f);
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	dev_cap->stat_rate_support = stat_rate;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET);
+	dev_cap->udp_rss = field & 0x1;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET);
 	dev_cap->loopback_support = field & 0x1;
 	MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 443e456c9dab..65cc72eb899d 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -78,6 +78,7 @@ struct mlx4_dev_cap {
 	u16 wavelength[MLX4_MAX_PORTS + 1];
 	u64 trans_code[MLX4_MAX_PORTS + 1];
 	u16 stat_rate_support;
+	int udp_rss;
 	int loopback_support;
 	u32 flags;
 	int reserved_uars;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index f4791fa2472f..569fa3df381f 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -225,6 +225,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
 	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
+	dev->caps.udp_rss	     = dev_cap->udp_rss;
 	dev->caps.loopback_support   = dev_cap->loopback_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 5d8f097d7e06..4036a053ee32 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -318,6 +318,8 @@ struct mlx4_en_port_profile {
 
 struct mlx4_en_profile {
 	int rss_xor;
+	int tcp_rss;
+	int udp_rss;
 	u8 rss_mask;
 	u32 active_ports;
 	u32 small_pkt_int;
@@ -360,6 +362,7 @@ struct mlx4_en_rss_context {
 	u8 hash_fn;
 	u8 flags;
 	__be32 rss_key[10];
+	__be32 base_qpn_udp;
 };
 
 struct mlx4_en_port_state {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 2a36a344fb3d..7338654c02b4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -233,6 +233,7 @@ struct mlx4_caps {
 	u32			bmme_flags;
 	u32			reserved_lkey;
 	u16			stat_rate_support;
+	int			udp_rss;
 	int			loopback_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
-- 
cgit v1.2.3


From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 25 Aug 2010 10:33:56 +0200
Subject: workqueue: fix cwq->nr_active underflow

cwq->nr_active is used to keep track of how many work items are active
for the cpu workqueue, where 'active' is defined as either pending on
global worklist or executing.  This is used to implement the
max_active limit and workqueue freezing.  If a work item is queued
after nr_active has already reached max_active, the work item doesn't
increment nr_active and is put on the delayed queue and gets activated
later as previous active work items retire.

try_to_grab_pending() which is used in the cancellation path
unconditionally decremented nr_active whether the work item being
cancelled is currently active or delayed, so cancelling a delayed work
item makes nr_active underflow.  This breaks max_active enforcement
and triggers BUG_ON() in destroy_workqueue() later on.

This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which
is set while a work item in on the delayed list and making
try_to_grab_pending() decrement nr_active iff the work item is
currently active.

The addition of the flag enlarges cwq alignment to 256 bytes which is
getting a bit too large.  It's scheduled to be reduced back to 128
bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next
devel cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/workqueue.h | 16 +++++++++-------
 kernel/workqueue.c        | 30 ++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c959666eafca..f11100f96482 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
-	WORK_STRUCT_CWQ_BIT	= 1,	/* data points to cwq */
-	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
+	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
+	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 3,	/* static initializer (debugobjects) */
-	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
+	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
+	WORK_STRUCT_COLOR_SHIFT	= 5,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
+	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
 #endif
 
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
 	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -59,8 +61,8 @@ enum {
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 128 bytes which isn't too
-	 * excessive while allowing 15 workqueue flush colors.
+	 * off.  This makes cwqs aligned to 256 bytes and allows 15
+	 * workqueue flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 362b50d092e2..a2dccfca03ba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -941,6 +941,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct global_cwq *gcwq;
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
+	unsigned int work_flags;
 	unsigned long flags;
 
 	debug_work_activate(work);
@@ -990,14 +991,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	BUG_ON(!list_empty(&work->entry));
 
 	cwq->nr_in_flight[cwq->work_color]++;
+	work_flags = work_color_to_flags(cwq->work_color);
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
 		cwq->nr_active++;
 		worklist = gcwq_determine_ins_pos(gcwq, cwq);
-	} else
+	} else {
+		work_flags |= WORK_STRUCT_DELAYED;
 		worklist = &cwq->delayed_works;
+	}
 
-	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+	insert_work(cwq, work, worklist, work_flags);
 
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
@@ -1666,6 +1670,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
 	move_linked_works(work, pos, NULL);
+	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
 	cwq->nr_active++;
 }
 
@@ -1673,6 +1678,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
  * @color: color of work which left the queue
+ * @delayed: for a delayed work
  *
  * A work either has completed or is removed from pending queue,
  * decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1680,19 +1686,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * CONTEXT:
  * spin_lock_irq(gcwq->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+				 bool delayed)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
 	cwq->nr_in_flight[color]--;
-	cwq->nr_active--;
 
-	if (!list_empty(&cwq->delayed_works)) {
-		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+	if (!delayed) {
+		cwq->nr_active--;
+		if (!list_empty(&cwq->delayed_works)) {
+			/* one down, submit a delayed one */
+			if (cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+		}
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -1823,7 +1832,7 @@ __acquires(&gcwq->lock)
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	cwq_dec_nr_in_flight(cwq, work_color, false);
 }
 
 /**
@@ -2388,7 +2397,8 @@ static int try_to_grab_pending(struct work_struct *work)
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
 			cwq_dec_nr_in_flight(get_work_cwq(work),
-					     get_work_color(work));
+				get_work_color(work),
+				*work_data_bits(work) & WORK_STRUCT_DELAYED);
 			ret = 1;
 		}
 	}
-- 
cgit v1.2.3


From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 Aug 2010 02:27:49 -0700
Subject: tcp: Combat per-cpu skew in orphan tests.

As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.

Fix this by doing the full expensive sum check if the optimized check
triggers.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/tcp.h    | 18 ++++++++++++++----
 net/ipv4/tcp.c       |  5 +----
 net/ipv4/tcp_timer.c |  8 ++++----
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index df6a2eb20193..eaa9582779d0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 	return seq3 - seq2 >= seq1 - seq2;
 }
 
-static inline int tcp_too_many_orphans(struct sock *sk, int num)
+static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
 {
-	return (num > sysctl_tcp_max_orphans) ||
-		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
+	struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+	int orphans = percpu_counter_read_positive(ocp);
+
+	if (orphans << shift > sysctl_tcp_max_orphans) {
+		orphans = percpu_counter_sum_positive(ocp);
+		if (orphans << shift > sysctl_tcp_max_orphans)
+			return true;
+	}
+
+	if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+	    atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])
+		return true;
+	return false;
 }
 
 /* syncookies: remember time of last synqueue overflow */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..197b9b77fa3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2011,11 +2011,8 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
-		int orphan_count = percpu_counter_read_positive(
-						sk->sk_prot->orphan_count);
-
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, orphan_count)) {
+		if (tcp_too_many_orphans(sk, 0)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..c35b469e851c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
+	int shift = 0;
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
-		orphans <<= 1;
+		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
 	if (sk->sk_err_soft)
-		orphans <<= 1;
+		shift++;
 
-	if (tcp_too_many_orphans(sk, orphans)) {
+	if (tcp_too_many_orphans(sk, shift)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "Out of socket memory\n");
 
-- 
cgit v1.2.3


From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sat, 21 Aug 2010 16:39:01 -0400
Subject: mac80211: trivial spelling fixes

Fix spelling and readability of a few lines of kernel doc:

    s/issueing/issuing/g
    s/approriate/appropriate/g
    s/supported by simply/supported simply by/
    s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2a1811366076..dcc8c2bf986e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in
  * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused
  * with hardware wakeup and sleep states. Driver is responsible for waking
- * up the hardware before issueing commands to the hardware and putting it
- * back to sleep at approriate times.
+ * up the hardware before issuing commands to the hardware and putting it
+ * back to sleep at appropriate times.
  *
  * When PS is enabled, hardware needs to wakeup for beacons and receive the
  * buffered multicast/broadcast frames after the beacon. Also it must be
@@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * there's data traffic and still saving significantly power in idle
  * periods.
  *
- * Dynamic powersave is supported by simply mac80211 enabling and disabling
+ * Dynamic powersave is simply supported by mac80211 enabling and disabling
  * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS
  * flag and mac80211 will handle everything automatically. Additionally,
  * hardware having support for the dynamic PS feature may set the
@@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
  *
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
- * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and
+ * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and
  * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the
  * hardware is not receiving beacons with this function.
  */
@@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif);
  *
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
- * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and
+ * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and
  * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver
  * needs to inform if the connection to the AP has been lost.
  *
-- 
cgit v1.2.3


From 4c5f7d7a1e6cf20ad515dad8a63c0813fac5bcea Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@adurom.com>
Date: Sun, 22 Aug 2010 22:46:28 +0300
Subject: wl12xx: change contact person for the include file

Luciano should be the contact person for the include/linux/spi/wl12xx.h file.

Signed-off-by: Kalle Valo <kvalo@adurom.com>
Acked-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/spi/wl12xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h
index a223ecbc71ef..a20bccf0b5c2 100644
--- a/include/linux/spi/wl12xx.h
+++ b/include/linux/spi/wl12xx.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009 Nokia Corporation
  *
- * Contact: Kalle Valo <kalle.valo@nokia.com>
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
-- 
cgit v1.2.3


From ad01b7d480a4a135f974afd5c617c417e0b0542f Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Mon, 23 Aug 2010 20:40:42 +0000
Subject: stmmac: make ioaddr 'void __iomem *' rather than unsigned long

This avoids unnecessary casting and adds the ioaddr in the
private structure.
This patch also removes many warning when compile the driver.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/common.h         |  50 +++++++--------
 drivers/net/stmmac/dwmac1000_core.c |  18 +++---
 drivers/net/stmmac/dwmac1000_dma.c  |   8 +--
 drivers/net/stmmac/dwmac100_core.c  |  20 +++---
 drivers/net/stmmac/dwmac100_dma.c   |   8 +--
 drivers/net/stmmac/dwmac_dma.h      |  16 ++---
 drivers/net/stmmac/dwmac_lib.c      |  22 +++----
 drivers/net/stmmac/enh_desc.c       |   2 +-
 drivers/net/stmmac/norm_desc.c      |   2 +-
 drivers/net/stmmac/stmmac.h         |   3 +-
 drivers/net/stmmac/stmmac_ethtool.c |  21 +++---
 drivers/net/stmmac/stmmac_main.c    | 124 +++++++++++++++++-------------------
 drivers/net/stmmac/stmmac_mdio.c    |  21 +++---
 include/linux/stmmac.h              |   2 +-
 14 files changed, 153 insertions(+), 164 deletions(-)

(limited to 'include')

diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h
index 66b9da0260fe..e8cbcb5c206e 100644
--- a/drivers/net/stmmac/common.h
+++ b/drivers/net/stmmac/common.h
@@ -167,7 +167,7 @@ struct stmmac_desc_ops {
 	int (*get_tx_ls) (struct dma_desc *p);
 	/* Return the transmit status looking at the TDES1 */
 	int (*tx_status) (void *data, struct stmmac_extra_stats *x,
-			  struct dma_desc *p, unsigned long ioaddr);
+			  struct dma_desc *p, void __iomem *ioaddr);
 	/* Get the buffer size from the descriptor */
 	int (*get_tx_len) (struct dma_desc *p);
 	/* Handle extra events on specific interrupts hw dependent */
@@ -182,44 +182,44 @@ struct stmmac_desc_ops {
 
 struct stmmac_dma_ops {
 	/* DMA core initialization */
-	int (*init) (unsigned long ioaddr, int pbl, u32 dma_tx, u32 dma_rx);
+	int (*init) (void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx);
 	/* Dump DMA registers */
-	void (*dump_regs) (unsigned long ioaddr);
+	void (*dump_regs) (void __iomem *ioaddr);
 	/* Set tx/rx threshold in the csr6 register
 	 * An invalid value enables the store-and-forward mode */
-	void (*dma_mode) (unsigned long ioaddr, int txmode, int rxmode);
+	void (*dma_mode) (void __iomem *ioaddr, int txmode, int rxmode);
 	/* To track extra statistic (if supported) */
 	void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
-				   unsigned long ioaddr);
-	void (*enable_dma_transmission) (unsigned long ioaddr);
-	void (*enable_dma_irq) (unsigned long ioaddr);
-	void (*disable_dma_irq) (unsigned long ioaddr);
-	void (*start_tx) (unsigned long ioaddr);
-	void (*stop_tx) (unsigned long ioaddr);
-	void (*start_rx) (unsigned long ioaddr);
-	void (*stop_rx) (unsigned long ioaddr);
-	int (*dma_interrupt) (unsigned long ioaddr,
+				   void __iomem *ioaddr);
+	void (*enable_dma_transmission) (void __iomem *ioaddr);
+	void (*enable_dma_irq) (void __iomem *ioaddr);
+	void (*disable_dma_irq) (void __iomem *ioaddr);
+	void (*start_tx) (void __iomem *ioaddr);
+	void (*stop_tx) (void __iomem *ioaddr);
+	void (*start_rx) (void __iomem *ioaddr);
+	void (*stop_rx) (void __iomem *ioaddr);
+	int (*dma_interrupt) (void __iomem *ioaddr,
 			      struct stmmac_extra_stats *x);
 };
 
 struct stmmac_ops {
 	/* MAC core initialization */
-	void (*core_init) (unsigned long ioaddr) ____cacheline_aligned;
+	void (*core_init) (void __iomem *ioaddr) ____cacheline_aligned;
 	/* Dump MAC registers */
-	void (*dump_regs) (unsigned long ioaddr);
+	void (*dump_regs) (void __iomem *ioaddr);
 	/* Handle extra events on specific interrupts hw dependent */
-	void (*host_irq_status) (unsigned long ioaddr);
+	void (*host_irq_status) (void __iomem *ioaddr);
 	/* Multicast filter setting */
 	void (*set_filter) (struct net_device *dev);
 	/* Flow control setting */
-	void (*flow_ctrl) (unsigned long ioaddr, unsigned int duplex,
+	void (*flow_ctrl) (void __iomem *ioaddr, unsigned int duplex,
 			   unsigned int fc, unsigned int pause_time);
 	/* Set power management mode (e.g. magic frame) */
-	void (*pmt) (unsigned long ioaddr, unsigned long mode);
+	void (*pmt) (void __iomem *ioaddr, unsigned long mode);
 	/* Set/Get Unicast MAC addresses */
-	void (*set_umac_addr) (unsigned long ioaddr, unsigned char *addr,
+	void (*set_umac_addr) (void __iomem *ioaddr, unsigned char *addr,
 			       unsigned int reg_n);
-	void (*get_umac_addr) (unsigned long ioaddr, unsigned char *addr,
+	void (*get_umac_addr) (void __iomem *ioaddr, unsigned char *addr,
 			       unsigned int reg_n);
 };
 
@@ -243,11 +243,11 @@ struct mac_device_info {
 	struct mac_link link;
 };
 
-struct mac_device_info *dwmac1000_setup(unsigned long addr);
-struct mac_device_info *dwmac100_setup(unsigned long addr);
+struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr);
+struct mac_device_info *dwmac100_setup(void __iomem *ioaddr);
 
-extern void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6],
+extern void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
 				unsigned int high, unsigned int low);
-extern void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr,
+extern void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
 				unsigned int high, unsigned int low);
-extern void dwmac_dma_flush_tx_fifo(unsigned long ioaddr);
+extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr);
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index 2b2f5c8caf1c..8bbfc0f48dea 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -30,7 +30,7 @@
 #include <linux/slab.h>
 #include "dwmac1000.h"
 
-static void dwmac1000_core_init(unsigned long ioaddr)
+static void dwmac1000_core_init(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + GMAC_CONTROL);
 	value |= GMAC_CORE_INIT;
@@ -50,7 +50,7 @@ static void dwmac1000_core_init(unsigned long ioaddr)
 #endif
 }
 
-static void dwmac1000_dump_regs(unsigned long ioaddr)
+static void dwmac1000_dump_regs(void __iomem *ioaddr)
 {
 	int i;
 	pr_info("\tDWMAC1000 regs (base addr = 0x%8x)\n", (unsigned int)ioaddr);
@@ -62,14 +62,14 @@ static void dwmac1000_dump_regs(unsigned long ioaddr)
 	}
 }
 
-static void dwmac1000_set_umac_addr(unsigned long ioaddr, unsigned char *addr,
+static void dwmac1000_set_umac_addr(void __iomem *ioaddr, unsigned char *addr,
 				unsigned int reg_n)
 {
 	stmmac_set_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n),
 				GMAC_ADDR_LOW(reg_n));
 }
 
-static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr,
+static void dwmac1000_get_umac_addr(void __iomem *ioaddr, unsigned char *addr,
 				unsigned int reg_n)
 {
 	stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n),
@@ -78,7 +78,7 @@ static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr,
 
 static void dwmac1000_set_filter(struct net_device *dev)
 {
-	unsigned long ioaddr = dev->base_addr;
+	void __iomem *ioaddr = (void __iomem *) dev->base_addr;
 	unsigned int value = 0;
 
 	CHIP_DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n",
@@ -139,7 +139,7 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	    readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW));
 }
 
-static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex,
+static void dwmac1000_flow_ctrl(void __iomem *ioaddr, unsigned int duplex,
 			   unsigned int fc, unsigned int pause_time)
 {
 	unsigned int flow = 0;
@@ -162,7 +162,7 @@ static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex,
 	writel(flow, ioaddr + GMAC_FLOW_CTRL);
 }
 
-static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode)
+static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode)
 {
 	unsigned int pmt = 0;
 
@@ -178,7 +178,7 @@ static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode)
 }
 
 
-static void dwmac1000_irq_status(unsigned long ioaddr)
+static void dwmac1000_irq_status(void __iomem *ioaddr)
 {
 	u32 intr_status = readl(ioaddr + GMAC_INT_STATUS);
 
@@ -211,7 +211,7 @@ struct stmmac_ops dwmac1000_ops = {
 	.get_umac_addr = dwmac1000_get_umac_addr,
 };
 
-struct mac_device_info *dwmac1000_setup(unsigned long ioaddr)
+struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr)
 {
 	struct mac_device_info *mac;
 	u32 uid = readl(ioaddr + GMAC_VERSION);
diff --git a/drivers/net/stmmac/dwmac1000_dma.c b/drivers/net/stmmac/dwmac1000_dma.c
index 415805057cb0..2ef5a56370e9 100644
--- a/drivers/net/stmmac/dwmac1000_dma.c
+++ b/drivers/net/stmmac/dwmac1000_dma.c
@@ -29,7 +29,7 @@
 #include "dwmac1000.h"
 #include "dwmac_dma.h"
 
-static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx,
+static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx,
 			      u32 dma_rx)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
@@ -58,7 +58,7 @@ static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx,
 	return 0;
 }
 
-static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode,
+static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode,
 				    int rxmode)
 {
 	u32 csr6 = readl(ioaddr + DMA_CONTROL);
@@ -111,12 +111,12 @@ static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode,
 
 /* Not yet implemented --- no RMON module */
 static void dwmac1000_dma_diagnostic_fr(void *data,
-		  struct stmmac_extra_stats *x, unsigned long ioaddr)
+		  struct stmmac_extra_stats *x, void __iomem *ioaddr)
 {
 	return;
 }
 
-static void dwmac1000_dump_dma_regs(unsigned long ioaddr)
+static void dwmac1000_dump_dma_regs(void __iomem *ioaddr)
 {
 	int i;
 	pr_info(" DMA registers\n");
diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c
index 2fb165fa2ba0..135a8082816e 100644
--- a/drivers/net/stmmac/dwmac100_core.c
+++ b/drivers/net/stmmac/dwmac100_core.c
@@ -31,7 +31,7 @@
 #include <linux/crc32.h>
 #include "dwmac100.h"
 
-static void dwmac100_core_init(unsigned long ioaddr)
+static void dwmac100_core_init(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CONTROL);
 
@@ -42,12 +42,12 @@ static void dwmac100_core_init(unsigned long ioaddr)
 #endif
 }
 
-static void dwmac100_dump_mac_regs(unsigned long ioaddr)
+static void dwmac100_dump_mac_regs(void __iomem *ioaddr)
 {
 	pr_info("\t----------------------------------------------\n"
 		"\t  DWMAC 100 CSR (base addr = 0x%8x)\n"
 		"\t----------------------------------------------\n",
-		(unsigned int)ioaddr);
+		(unsigned int) ioaddr);
 	pr_info("\tcontrol reg (offset 0x%x): 0x%08x\n", MAC_CONTROL,
 		readl(ioaddr + MAC_CONTROL));
 	pr_info("\taddr HI (offset 0x%x): 0x%08x\n ", MAC_ADDR_HIGH,
@@ -77,18 +77,18 @@ static void dwmac100_dump_mac_regs(unsigned long ioaddr)
 		MMC_LOW_INTR_MASK, readl(ioaddr + MMC_LOW_INTR_MASK));
 }
 
-static void dwmac100_irq_status(unsigned long ioaddr)
+static void dwmac100_irq_status(void __iomem *ioaddr)
 {
 	return;
 }
 
-static void dwmac100_set_umac_addr(unsigned long ioaddr, unsigned char *addr,
+static void dwmac100_set_umac_addr(void __iomem *ioaddr, unsigned char *addr,
 				   unsigned int reg_n)
 {
 	stmmac_set_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW);
 }
 
-static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr,
+static void dwmac100_get_umac_addr(void __iomem *ioaddr, unsigned char *addr,
 				   unsigned int reg_n)
 {
 	stmmac_get_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW);
@@ -96,7 +96,7 @@ static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr,
 
 static void dwmac100_set_filter(struct net_device *dev)
 {
-	unsigned long ioaddr = dev->base_addr;
+	void __iomem *ioaddr = (void __iomem *) dev->base_addr;
 	u32 value = readl(ioaddr + MAC_CONTROL);
 
 	if (dev->flags & IFF_PROMISC) {
@@ -145,7 +145,7 @@ static void dwmac100_set_filter(struct net_device *dev)
 	    readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW));
 }
 
-static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex,
+static void dwmac100_flow_ctrl(void __iomem *ioaddr, unsigned int duplex,
 			       unsigned int fc, unsigned int pause_time)
 {
 	unsigned int flow = MAC_FLOW_CTRL_ENABLE;
@@ -158,7 +158,7 @@ static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex,
 /* No PMT module supported for this Ethernet Controller.
  * Tested on ST platforms only.
  */
-static void dwmac100_pmt(unsigned long ioaddr, unsigned long mode)
+static void dwmac100_pmt(void __iomem *ioaddr, unsigned long mode)
 {
 	return;
 }
@@ -174,7 +174,7 @@ struct stmmac_ops dwmac100_ops = {
 	.get_umac_addr = dwmac100_get_umac_addr,
 };
 
-struct mac_device_info *dwmac100_setup(unsigned long ioaddr)
+struct mac_device_info *dwmac100_setup(void __iomem *ioaddr)
 {
 	struct mac_device_info *mac;
 
diff --git a/drivers/net/stmmac/dwmac100_dma.c b/drivers/net/stmmac/dwmac100_dma.c
index 2fece7b72727..c7279d2b946b 100644
--- a/drivers/net/stmmac/dwmac100_dma.c
+++ b/drivers/net/stmmac/dwmac100_dma.c
@@ -31,7 +31,7 @@
 #include "dwmac100.h"
 #include "dwmac_dma.h"
 
-static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx,
+static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx,
 			     u32 dma_rx)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
@@ -58,7 +58,7 @@ static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx,
 /* Store and Forward capability is not used at all..
  * The transmit threshold can be programmed by
  * setting the TTC bits in the DMA control register.*/
-static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode,
+static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode,
 					int rxmode)
 {
 	u32 csr6 = readl(ioaddr + DMA_CONTROL);
@@ -73,7 +73,7 @@ static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode,
 	writel(csr6, ioaddr + DMA_CONTROL);
 }
 
-static void dwmac100_dump_dma_regs(unsigned long ioaddr)
+static void dwmac100_dump_dma_regs(void __iomem *ioaddr)
 {
 	int i;
 
@@ -91,7 +91,7 @@ static void dwmac100_dump_dma_regs(unsigned long ioaddr)
 /* DMA controller has two counters to track the number of
  * the receive missed frames. */
 static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x,
-				       unsigned long ioaddr)
+				       void __iomem *ioaddr)
 {
 	struct net_device_stats *stats = (struct net_device_stats *)data;
 	u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR);
diff --git a/drivers/net/stmmac/dwmac_dma.h b/drivers/net/stmmac/dwmac_dma.h
index 7b815a1b7b8c..da3f5ccf83d3 100644
--- a/drivers/net/stmmac/dwmac_dma.h
+++ b/drivers/net/stmmac/dwmac_dma.h
@@ -97,12 +97,12 @@
 #define DMA_STATUS_TI	0x00000001	/* Transmit Interrupt */
 #define DMA_CONTROL_FTF		0x00100000 /* Flush transmit FIFO */
 
-extern void dwmac_enable_dma_transmission(unsigned long ioaddr);
-extern void dwmac_enable_dma_irq(unsigned long ioaddr);
-extern void dwmac_disable_dma_irq(unsigned long ioaddr);
-extern void dwmac_dma_start_tx(unsigned long ioaddr);
-extern void dwmac_dma_stop_tx(unsigned long ioaddr);
-extern void dwmac_dma_start_rx(unsigned long ioaddr);
-extern void dwmac_dma_stop_rx(unsigned long ioaddr);
-extern int dwmac_dma_interrupt(unsigned long ioaddr,
+extern void dwmac_enable_dma_transmission(void __iomem *ioaddr);
+extern void dwmac_enable_dma_irq(void __iomem *ioaddr);
+extern void dwmac_disable_dma_irq(void __iomem *ioaddr);
+extern void dwmac_dma_start_tx(void __iomem *ioaddr);
+extern void dwmac_dma_stop_tx(void __iomem *ioaddr);
+extern void dwmac_dma_start_rx(void __iomem *ioaddr);
+extern void dwmac_dma_stop_rx(void __iomem *ioaddr);
+extern int dwmac_dma_interrupt(void __iomem *ioaddr,
 				struct stmmac_extra_stats *x);
diff --git a/drivers/net/stmmac/dwmac_lib.c b/drivers/net/stmmac/dwmac_lib.c
index a85415216ef4..d65fab1ba790 100644
--- a/drivers/net/stmmac/dwmac_lib.c
+++ b/drivers/net/stmmac/dwmac_lib.c
@@ -32,43 +32,43 @@
 #endif
 
 /* CSR1 enables the transmit DMA to check for new descriptor */
-void dwmac_enable_dma_transmission(unsigned long ioaddr)
+void dwmac_enable_dma_transmission(void __iomem *ioaddr)
 {
 	writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
 }
 
-void dwmac_enable_dma_irq(unsigned long ioaddr)
+void dwmac_enable_dma_irq(void __iomem *ioaddr)
 {
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_disable_dma_irq(unsigned long ioaddr)
+void dwmac_disable_dma_irq(void __iomem *ioaddr)
 {
 	writel(0, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_dma_start_tx(unsigned long ioaddr)
+void dwmac_dma_start_tx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + DMA_CONTROL);
 	value |= DMA_CONTROL_ST;
 	writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_tx(unsigned long ioaddr)
+void dwmac_dma_stop_tx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + DMA_CONTROL);
 	value &= ~DMA_CONTROL_ST;
 	writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_start_rx(unsigned long ioaddr)
+void dwmac_dma_start_rx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + DMA_CONTROL);
 	value |= DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_rx(unsigned long ioaddr)
+void dwmac_dma_stop_rx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + DMA_CONTROL);
 	value &= ~DMA_CONTROL_SR;
@@ -145,7 +145,7 @@ static void show_rx_process_state(unsigned int status)
 }
 #endif
 
-int dwmac_dma_interrupt(unsigned long ioaddr,
+int dwmac_dma_interrupt(void __iomem *ioaddr,
 			struct stmmac_extra_stats *x)
 {
 	int ret = 0;
@@ -219,7 +219,7 @@ int dwmac_dma_interrupt(unsigned long ioaddr,
 	return ret;
 }
 
-void dwmac_dma_flush_tx_fifo(unsigned long ioaddr)
+void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr)
 {
 	u32 csr6 = readl(ioaddr + DMA_CONTROL);
 	writel((csr6 | DMA_CONTROL_FTF), ioaddr + DMA_CONTROL);
@@ -227,7 +227,7 @@ void dwmac_dma_flush_tx_fifo(unsigned long ioaddr)
 	do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF));
 }
 
-void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6],
+void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
 			 unsigned int high, unsigned int low)
 {
 	unsigned long data;
@@ -238,7 +238,7 @@ void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6],
 	writel(data, ioaddr + low);
 }
 
-void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr,
+void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
 			 unsigned int high, unsigned int low)
 {
 	unsigned int hi_addr, lo_addr;
diff --git a/drivers/net/stmmac/enh_desc.c b/drivers/net/stmmac/enh_desc.c
index f612f986a7e1..77ff88c3958b 100644
--- a/drivers/net/stmmac/enh_desc.c
+++ b/drivers/net/stmmac/enh_desc.c
@@ -25,7 +25,7 @@
 #include "common.h"
 
 static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
-				  struct dma_desc *p, unsigned long ioaddr)
+				  struct dma_desc *p, void __iomem *ioaddr)
 {
 	int ret = 0;
 	struct net_device_stats *stats = (struct net_device_stats *)data;
diff --git a/drivers/net/stmmac/norm_desc.c b/drivers/net/stmmac/norm_desc.c
index 31ad53643792..51f4440ab98b 100644
--- a/drivers/net/stmmac/norm_desc.c
+++ b/drivers/net/stmmac/norm_desc.c
@@ -25,7 +25,7 @@
 #include "common.h"
 
 static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
-			       struct dma_desc *p, unsigned long ioaddr)
+			       struct dma_desc *p, void __iomem *ioaddr)
 {
 	int ret = 0;
 	struct net_device_stats *stats = (struct net_device_stats *)data;
diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index ebebc644b1b8..cca53dbac361 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -54,6 +54,7 @@ struct stmmac_priv {
 	unsigned int dma_buf_sz;
 	struct device *device;
 	struct mac_device_info *hw;
+	void __iomem *ioaddr;
 
 	struct stmmac_extra_stats xstats;
 	struct napi_struct napi;
@@ -65,7 +66,7 @@ struct stmmac_priv {
 	int phy_mask;
 	int (*phy_reset) (void *priv);
 	void (*fix_mac_speed) (void *priv, unsigned int speed);
-	void (*bus_setup)(unsigned long ioaddr);
+	void (*bus_setup)(void __iomem *ioaddr);
 	void *bsp_priv;
 
 	int phy_irq;
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index f080509923f0..63b68e61afce 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -177,21 +177,21 @@ void stmmac_ethtool_gregs(struct net_device *dev,
 	if (!priv->is_gmac) {
 		/* MAC registers */
 		for (i = 0; i < 12; i++)
-			reg_space[i] = readl(dev->base_addr + (i * 4));
+			reg_space[i] = readl(priv->ioaddr + (i * 4));
 		/* DMA registers */
 		for (i = 0; i < 9; i++)
 			reg_space[i + 12] =
-			    readl(dev->base_addr + (DMA_BUS_MODE + (i * 4)));
-		reg_space[22] = readl(dev->base_addr + DMA_CUR_TX_BUF_ADDR);
-		reg_space[23] = readl(dev->base_addr + DMA_CUR_RX_BUF_ADDR);
+			    readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4)));
+		reg_space[22] = readl(priv->ioaddr + DMA_CUR_TX_BUF_ADDR);
+		reg_space[23] = readl(priv->ioaddr + DMA_CUR_RX_BUF_ADDR);
 	} else {
 		/* MAC registers */
 		for (i = 0; i < 55; i++)
-			reg_space[i] = readl(dev->base_addr + (i * 4));
+			reg_space[i] = readl(priv->ioaddr + (i * 4));
 		/* DMA registers */
 		for (i = 0; i < 22; i++)
 			reg_space[i + 55] =
-			    readl(dev->base_addr + (DMA_BUS_MODE + (i * 4)));
+			    readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4)));
 	}
 }
 
@@ -263,11 +263,9 @@ stmmac_set_pauseparam(struct net_device *netdev,
 			cmd.phy_address = phy->addr;
 			ret = phy_ethtool_sset(phy, &cmd);
 		}
-	} else {
-		unsigned long ioaddr = netdev->base_addr;
-		priv->hw->mac->flow_ctrl(ioaddr, phy->duplex,
+	} else
+		priv->hw->mac->flow_ctrl(priv->ioaddr, phy->duplex,
 					 priv->flow_ctrl, priv->pause);
-	}
 	spin_unlock(&priv->lock);
 	return ret;
 }
@@ -276,12 +274,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 				 struct ethtool_stats *dummy, u64 *data)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	unsigned long ioaddr = dev->base_addr;
 	int i;
 
 	/* Update HW stats if supported */
 	priv->hw->dma->dma_diagnostic_fr(&dev->stats, (void *) &priv->xstats,
-					 ioaddr);
+					 priv->ioaddr);
 
 	for (i = 0; i < STMMAC_STATS_LEN; i++) {
 		char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 86b6c69c068c..c59c1061252a 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -202,7 +202,6 @@ static void stmmac_adjust_link(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct phy_device *phydev = priv->phydev;
-	unsigned long ioaddr = dev->base_addr;
 	unsigned long flags;
 	int new_state = 0;
 	unsigned int fc = priv->flow_ctrl, pause_time = priv->pause;
@@ -215,7 +214,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 
 	spin_lock_irqsave(&priv->lock, flags);
 	if (phydev->link) {
-		u32 ctrl = readl(ioaddr + MAC_CTRL_REG);
+		u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
 
 		/* Now we make sure that we can be in full duplex mode.
 		 * If not, we operate in half-duplex mode. */
@@ -229,7 +228,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 		}
 		/* Flow Control operation */
 		if (phydev->pause)
-			priv->hw->mac->flow_ctrl(ioaddr, phydev->duplex,
+			priv->hw->mac->flow_ctrl(priv->ioaddr, phydev->duplex,
 						 fc, pause_time);
 
 		if (phydev->speed != priv->speed) {
@@ -268,7 +267,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 			priv->speed = phydev->speed;
 		}
 
-		writel(ctrl, ioaddr + MAC_CTRL_REG);
+		writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
 
 		if (!priv->oldlink) {
 			new_state = 1;
@@ -345,7 +344,7 @@ static int stmmac_init_phy(struct net_device *dev)
 	return 0;
 }
 
-static inline void stmmac_mac_enable_rx(unsigned long ioaddr)
+static inline void stmmac_mac_enable_rx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
 	value |= MAC_RNABLE_RX;
@@ -353,7 +352,7 @@ static inline void stmmac_mac_enable_rx(unsigned long ioaddr)
 	writel(value, ioaddr + MAC_CTRL_REG);
 }
 
-static inline void stmmac_mac_enable_tx(unsigned long ioaddr)
+static inline void stmmac_mac_enable_tx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
 	value |= MAC_ENABLE_TX;
@@ -361,14 +360,14 @@ static inline void stmmac_mac_enable_tx(unsigned long ioaddr)
 	writel(value, ioaddr + MAC_CTRL_REG);
 }
 
-static inline void stmmac_mac_disable_rx(unsigned long ioaddr)
+static inline void stmmac_mac_disable_rx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
 	value &= ~MAC_RNABLE_RX;
 	writel(value, ioaddr + MAC_CTRL_REG);
 }
 
-static inline void stmmac_mac_disable_tx(unsigned long ioaddr)
+static inline void stmmac_mac_disable_tx(void __iomem *ioaddr)
 {
 	u32 value = readl(ioaddr + MAC_CTRL_REG);
 	value &= ~MAC_ENABLE_TX;
@@ -577,17 +576,17 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
 	if (!priv->is_gmac) {
 		/* MAC 10/100 */
-		priv->hw->dma->dma_mode(priv->dev->base_addr, tc, 0);
+		priv->hw->dma->dma_mode(priv->ioaddr, tc, 0);
 		priv->tx_coe = NO_HW_CSUM;
 	} else {
 		if ((priv->dev->mtu <= ETH_DATA_LEN) && (tx_coe)) {
-			priv->hw->dma->dma_mode(priv->dev->base_addr,
+			priv->hw->dma->dma_mode(priv->ioaddr,
 						SF_DMA_MODE, SF_DMA_MODE);
 			tc = SF_DMA_MODE;
 			priv->tx_coe = HW_CSUM;
 		} else {
 			/* Checksum computation is performed in software. */
-			priv->hw->dma->dma_mode(priv->dev->base_addr, tc,
+			priv->hw->dma->dma_mode(priv->ioaddr, tc,
 						SF_DMA_MODE);
 			priv->tx_coe = NO_HW_CSUM;
 		}
@@ -603,7 +602,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 static void stmmac_tx(struct stmmac_priv *priv)
 {
 	unsigned int txsize = priv->dma_tx_size;
-	unsigned long ioaddr = priv->dev->base_addr;
 
 	while (priv->dirty_tx != priv->cur_tx) {
 		int last;
@@ -621,7 +619,7 @@ static void stmmac_tx(struct stmmac_priv *priv)
 			int tx_error =
 				priv->hw->desc->tx_status(&priv->dev->stats,
 							  &priv->xstats, p,
-							  ioaddr);
+							  priv->ioaddr);
 			if (likely(tx_error == 0)) {
 				priv->dev->stats.tx_packets++;
 				priv->xstats.tx_pkt_n++;
@@ -677,7 +675,7 @@ static inline void stmmac_enable_irq(struct stmmac_priv *priv)
 		priv->tm->timer_start(tmrate);
 	else
 #endif
-		priv->hw->dma->enable_dma_irq(priv->dev->base_addr);
+		priv->hw->dma->enable_dma_irq(priv->ioaddr);
 }
 
 static inline void stmmac_disable_irq(struct stmmac_priv *priv)
@@ -687,7 +685,7 @@ static inline void stmmac_disable_irq(struct stmmac_priv *priv)
 		priv->tm->timer_stop();
 	else
 #endif
-		priv->hw->dma->disable_dma_irq(priv->dev->base_addr);
+		priv->hw->dma->disable_dma_irq(priv->ioaddr);
 }
 
 static int stmmac_has_work(struct stmmac_priv *priv)
@@ -742,14 +740,15 @@ static void stmmac_no_timer_stopped(void)
  */
 static void stmmac_tx_err(struct stmmac_priv *priv)
 {
+
 	netif_stop_queue(priv->dev);
 
-	priv->hw->dma->stop_tx(priv->dev->base_addr);
+	priv->hw->dma->stop_tx(priv->ioaddr);
 	dma_free_tx_skbufs(priv);
 	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size);
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
-	priv->hw->dma->start_tx(priv->dev->base_addr);
+	priv->hw->dma->start_tx(priv->ioaddr);
 
 	priv->dev->stats.tx_errors++;
 	netif_wake_queue(priv->dev);
@@ -758,11 +757,9 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
-	unsigned long ioaddr = priv->dev->base_addr;
 	int status;
 
-	status = priv->hw->dma->dma_interrupt(priv->dev->base_addr,
-					      &priv->xstats);
+	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
 	if (likely(status == handle_tx_rx))
 		_stmmac_schedule(priv);
 
@@ -770,7 +767,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 		/* Try to bump up the dma threshold on this failure */
 		if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) {
 			tc += 64;
-			priv->hw->dma->dma_mode(ioaddr, tc, SF_DMA_MODE);
+			priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE);
 			priv->xstats.threshold = tc;
 		}
 		stmmac_tx_err(priv);
@@ -790,7 +787,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 static int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	unsigned long ioaddr = dev->base_addr;
 	int ret;
 
 	/* Check that the MAC address is valid.  If its not, refuse
@@ -846,7 +842,8 @@ static int stmmac_open(struct net_device *dev)
 	init_dma_desc_rings(dev);
 
 	/* DMA initialization and SW reset */
-	if (unlikely(priv->hw->dma->init(ioaddr, priv->pbl, priv->dma_tx_phy,
+	if (unlikely(priv->hw->dma->init(priv->ioaddr, priv->pbl,
+					 priv->dma_tx_phy,
 					 priv->dma_rx_phy) < 0)) {
 
 		pr_err("%s: DMA initialization failed\n", __func__);
@@ -854,22 +851,22 @@ static int stmmac_open(struct net_device *dev)
 	}
 
 	/* Copy the MAC addr into the HW  */
-	priv->hw->mac->set_umac_addr(ioaddr, dev->dev_addr, 0);
+	priv->hw->mac->set_umac_addr(priv->ioaddr, dev->dev_addr, 0);
 	/* If required, perform hw setup of the bus. */
 	if (priv->bus_setup)
-		priv->bus_setup(ioaddr);
+		priv->bus_setup(priv->ioaddr);
 	/* Initialize the MAC Core */
-	priv->hw->mac->core_init(ioaddr);
+	priv->hw->mac->core_init(priv->ioaddr);
 
 	priv->shutdown = 0;
 
 	/* Initialise the MMC (if present) to disable all interrupts. */
-	writel(0xffffffff, ioaddr + MMC_HIGH_INTR_MASK);
-	writel(0xffffffff, ioaddr + MMC_LOW_INTR_MASK);
+	writel(0xffffffff, priv->ioaddr + MMC_HIGH_INTR_MASK);
+	writel(0xffffffff, priv->ioaddr + MMC_LOW_INTR_MASK);
 
 	/* Enable the MAC Rx/Tx */
-	stmmac_mac_enable_rx(ioaddr);
-	stmmac_mac_enable_tx(ioaddr);
+	stmmac_mac_enable_rx(priv->ioaddr);
+	stmmac_mac_enable_tx(priv->ioaddr);
 
 	/* Set the HW DMA mode and the COE */
 	stmmac_dma_operation_mode(priv);
@@ -880,16 +877,16 @@ static int stmmac_open(struct net_device *dev)
 
 	/* Start the ball rolling... */
 	DBG(probe, DEBUG, "%s: DMA RX/TX processes started...\n", dev->name);
-	priv->hw->dma->start_tx(ioaddr);
-	priv->hw->dma->start_rx(ioaddr);
+	priv->hw->dma->start_tx(priv->ioaddr);
+	priv->hw->dma->start_rx(priv->ioaddr);
 
 #ifdef CONFIG_STMMAC_TIMER
 	priv->tm->timer_start(tmrate);
 #endif
 	/* Dump DMA/MAC registers */
 	if (netif_msg_hw(priv)) {
-		priv->hw->mac->dump_regs(ioaddr);
-		priv->hw->dma->dump_regs(ioaddr);
+		priv->hw->mac->dump_regs(priv->ioaddr);
+		priv->hw->dma->dump_regs(priv->ioaddr);
 	}
 
 	if (priv->phydev)
@@ -933,15 +930,15 @@ static int stmmac_release(struct net_device *dev)
 	free_irq(dev->irq, dev);
 
 	/* Stop TX/RX DMA and clear the descriptors */
-	priv->hw->dma->stop_tx(dev->base_addr);
-	priv->hw->dma->stop_rx(dev->base_addr);
+	priv->hw->dma->stop_tx(priv->ioaddr);
+	priv->hw->dma->stop_rx(priv->ioaddr);
 
 	/* Release and free the Rx/Tx resources */
 	free_dma_desc_resources(priv);
 
 	/* Disable the MAC core */
-	stmmac_mac_disable_tx(dev->base_addr);
-	stmmac_mac_disable_rx(dev->base_addr);
+	stmmac_mac_disable_tx(priv->ioaddr);
+	stmmac_mac_disable_rx(priv->ioaddr);
 
 	netif_carrier_off(dev);
 
@@ -1143,7 +1140,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev->stats.tx_bytes += skb->len;
 
-	priv->hw->dma->enable_dma_transmission(dev->base_addr);
+	priv->hw->dma->enable_dma_transmission(priv->ioaddr);
 
 	return NETDEV_TX_OK;
 }
@@ -1408,11 +1405,9 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	if (priv->is_gmac) {
-		unsigned long ioaddr = dev->base_addr;
+	if (priv->is_gmac)
 		/* To handle GMAC own interrupts */
-		priv->hw->mac->host_irq_status(ioaddr);
-	}
+		priv->hw->mac->host_irq_status((void __iomem *) dev->base_addr);
 
 	stmmac_dma_interrupt(priv);
 
@@ -1525,7 +1520,8 @@ static int stmmac_probe(struct net_device *dev)
 	netif_napi_add(dev, &priv->napi, stmmac_poll, 64);
 
 	/* Get the MAC address */
-	priv->hw->mac->get_umac_addr(dev->base_addr, dev->dev_addr, 0);
+	priv->hw->mac->get_umac_addr((void __iomem *) dev->base_addr,
+				     dev->dev_addr, 0);
 
 	if (!is_valid_ether_addr(dev->dev_addr))
 		pr_warning("\tno valid MAC address;"
@@ -1555,14 +1551,13 @@ static int stmmac_probe(struct net_device *dev)
 static int stmmac_mac_device_setup(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	unsigned long ioaddr = dev->base_addr;
 
 	struct mac_device_info *device;
 
 	if (priv->is_gmac)
-		device = dwmac1000_setup(ioaddr);
+		device = dwmac1000_setup(priv->ioaddr);
 	else
-		device = dwmac100_setup(ioaddr);
+		device = dwmac100_setup(priv->ioaddr);
 
 	if (!device)
 		return -ENOMEM;
@@ -1656,7 +1651,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 {
 	int ret = 0;
 	struct resource *res;
-	unsigned int *addr = NULL;
+	void __iomem *addr = NULL;
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
 	struct plat_stmmacenet_data *plat_dat;
@@ -1711,6 +1706,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	priv->pbl = plat_dat->pbl;	/* TLI */
 	priv->is_gmac = plat_dat->has_gmac;	/* GMAC is on board */
 	priv->enh_desc = plat_dat->enh_desc;
+	priv->ioaddr = addr;
 
 	platform_set_drvdata(pdev, ndev);
 
@@ -1782,11 +1778,11 @@ static int stmmac_dvr_remove(struct platform_device *pdev)
 
 	pr_info("%s:\n\tremoving driver", __func__);
 
-	priv->hw->dma->stop_rx(ndev->base_addr);
-	priv->hw->dma->stop_tx(ndev->base_addr);
+	priv->hw->dma->stop_rx(priv->ioaddr);
+	priv->hw->dma->stop_tx(priv->ioaddr);
 
-	stmmac_mac_disable_rx(ndev->base_addr);
-	stmmac_mac_disable_tx(ndev->base_addr);
+	stmmac_mac_disable_rx(priv->ioaddr);
+	stmmac_mac_disable_tx(priv->ioaddr);
 
 	netif_carrier_off(ndev);
 
@@ -1795,7 +1791,7 @@ static int stmmac_dvr_remove(struct platform_device *pdev)
 	platform_set_drvdata(pdev, NULL);
 	unregister_netdev(ndev);
 
-	iounmap((void *)ndev->base_addr);
+	iounmap((void *)priv->ioaddr);
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	release_mem_region(res->start, resource_size(res));
 
@@ -1830,22 +1826,21 @@ static int stmmac_suspend(struct platform_device *pdev, pm_message_t state)
 		napi_disable(&priv->napi);
 
 		/* Stop TX/RX DMA */
-		priv->hw->dma->stop_tx(dev->base_addr);
-		priv->hw->dma->stop_rx(dev->base_addr);
+		priv->hw->dma->stop_tx(priv->ioaddr);
+		priv->hw->dma->stop_rx(priv->ioaddr);
 		/* Clear the Rx/Tx descriptors */
 		priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size,
 					     dis_ic);
 		priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size);
 
-		stmmac_mac_disable_tx(dev->base_addr);
+		stmmac_mac_disable_tx(priv->ioaddr);
 
 		if (device_may_wakeup(&(pdev->dev))) {
 			/* Enable Power down mode by programming the PMT regs */
 			if (priv->wolenabled == PMT_SUPPORTED)
-				priv->hw->mac->pmt(dev->base_addr,
-						   priv->wolopts);
+				priv->hw->mac->pmt(priv->ioaddr, priv->wolopts);
 		} else {
-			stmmac_mac_disable_rx(dev->base_addr);
+			stmmac_mac_disable_rx(priv->ioaddr);
 		}
 	} else {
 		priv->shutdown = 1;
@@ -1863,7 +1858,6 @@ static int stmmac_resume(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(dev);
-	unsigned long ioaddr = dev->base_addr;
 
 	if (!netif_running(dev))
 		return 0;
@@ -1884,15 +1878,15 @@ static int stmmac_resume(struct platform_device *pdev)
 	 * from another devices (e.g. serial console). */
 	if (device_may_wakeup(&(pdev->dev)))
 		if (priv->wolenabled == PMT_SUPPORTED)
-			priv->hw->mac->pmt(dev->base_addr, 0);
+			priv->hw->mac->pmt(priv->ioaddr, 0);
 
 	netif_device_attach(dev);
 
 	/* Enable the MAC and DMA */
-	stmmac_mac_enable_rx(ioaddr);
-	stmmac_mac_enable_tx(ioaddr);
-	priv->hw->dma->start_tx(ioaddr);
-	priv->hw->dma->start_rx(ioaddr);
+	stmmac_mac_enable_rx(priv->ioaddr);
+	stmmac_mac_enable_tx(priv->ioaddr);
+	priv->hw->dma->start_tx(priv->ioaddr);
+	priv->hw->dma->start_rx(priv->ioaddr);
 
 #ifdef CONFIG_STMMAC_TIMER
 	priv->tm->timer_start(tmrate);
diff --git a/drivers/net/stmmac/stmmac_mdio.c b/drivers/net/stmmac/stmmac_mdio.c
index 40b2c7929719..03dea1401571 100644
--- a/drivers/net/stmmac/stmmac_mdio.c
+++ b/drivers/net/stmmac/stmmac_mdio.c
@@ -47,7 +47,6 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 {
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	unsigned long ioaddr = ndev->base_addr;
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
 
@@ -56,12 +55,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 			((phyreg << 6) & (0x000007C0)));
 	regValue |= MII_BUSY;	/* in case of GMAC */
 
-	do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1);
-	writel(regValue, ioaddr + mii_address);
-	do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1);
+	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
+	writel(regValue, priv->ioaddr + mii_address);
+	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
 
 	/* Read the data from the MII data register */
-	data = (int)readl(ioaddr + mii_data);
+	data = (int)readl(priv->ioaddr + mii_data);
 
 	return data;
 }
@@ -79,7 +78,6 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 {
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	unsigned long ioaddr = ndev->base_addr;
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
 
@@ -90,14 +88,14 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	value |= MII_BUSY;
 
 	/* Wait until any existing MII operation is complete */
-	do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1);
+	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
 
 	/* Set the MII address register to write */
-	writel(phydata, ioaddr + mii_data);
-	writel(value, ioaddr + mii_address);
+	writel(phydata, priv->ioaddr + mii_data);
+	writel(value, priv->ioaddr + mii_address);
 
 	/* Wait until any existing MII operation is complete */
-	do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1);
+	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
 
 	return 0;
 }
@@ -111,7 +109,6 @@ static int stmmac_mdio_reset(struct mii_bus *bus)
 {
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	unsigned long ioaddr = ndev->base_addr;
 	unsigned int mii_address = priv->hw->mii.addr;
 
 	if (priv->phy_reset) {
@@ -123,7 +120,7 @@ static int stmmac_mdio_reset(struct mii_bus *bus)
 	 * It doesn't complete its reset until at least one clock cycle
 	 * on MDC, so perform a dummy mdio read.
 	 */
-	writel(0, ioaddr + mii_address);
+	writel(0, priv->ioaddr + mii_address);
 
 	return 0;
 }
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 632ff7c03280..a4adf0de6ed6 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -35,7 +35,7 @@ struct plat_stmmacenet_data {
 	int has_gmac;
 	int enh_desc;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
-	void (*bus_setup)(unsigned long ioaddr);
+	void (*bus_setup)(void __iomem *ioaddr);
 #ifdef CONFIG_STM_DRIVERS
 	struct stm_pad_config *pad_config;
 #endif
-- 
cgit v1.2.3


From 2971944582ff43b7dedbb460777052243ac9915a Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Mon, 9 Aug 2010 12:54:43 +0100
Subject: ARM: 6307/1: mmci: allow the card detect GPIO value not to be
 inverted

On some platforms, the GPIO value from the gpio_cd pin doesn't need to
be inverted to get it active high.  Add a cd_invert platform data
parameter and change existing platforms using GPIO for CD (only
Realview) to enable it.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-realview/core.c | 2 ++
 drivers/mmc/host/mmci.c       | 5 +++--
 include/linux/amba/mmci.h     | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 2fa38df28414..07c08151dfe6 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -259,6 +259,7 @@ struct mmci_platform_data realview_mmc0_plat_data = {
 	.status		= realview_mmc_status,
 	.gpio_wp	= 17,
 	.gpio_cd	= 16,
+	.cd_invert	= true,
 };
 
 struct mmci_platform_data realview_mmc1_plat_data = {
@@ -266,6 +267,7 @@ struct mmci_platform_data realview_mmc1_plat_data = {
 	.status		= realview_mmc_status,
 	.gpio_wp	= 19,
 	.gpio_cd	= 18,
+	.cd_invert	= true,
 };
 
 /*
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 840b301b5671..9a9aeac50a6c 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -570,12 +570,13 @@ static int mmci_get_ro(struct mmc_host *mmc)
 static int mmci_get_cd(struct mmc_host *mmc)
 {
 	struct mmci_host *host = mmc_priv(mmc);
+	struct mmci_platform_data *plat = host->plat;
 	unsigned int status;
 
 	if (host->gpio_cd == -ENOSYS)
-		status = host->plat->status(mmc_dev(host->mmc));
+		status = plat->status(mmc_dev(host->mmc));
 	else
-		status = !gpio_get_value(host->gpio_cd);
+		status = !!gpio_get_value(host->gpio_cd) ^ plat->cd_invert;
 
 	/*
 	 * Use positive logic throughout - status is zero for no card,
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index ca84ce70d5d5..f4ee9acc9721 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -24,6 +24,7 @@
  * whether a card is present in the MMC slot or not
  * @gpio_wp: read this GPIO pin to see if the card is write protected
  * @gpio_cd: read this GPIO pin to detect card insertion
+ * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
  */
@@ -35,6 +36,7 @@ struct mmci_platform_data {
 	unsigned int (*status)(struct device *);
 	int	gpio_wp;
 	int	gpio_cd;
+	bool	cd_invert;
 	unsigned long capabilities;
 };
 
-- 
cgit v1.2.3


From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Aug 2010 13:21:08 +0000
Subject: net/sctp: Use pr_fmt and pr_<level>

Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to
use do { print } while (0) guards.
Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when
lines were continued.
Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Add a missing newline in "Failed bind hash alloc"

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h  | 48 ++++++++++++++++++++++++++++++------------------
 net/sctp/associola.c     |  2 ++
 net/sctp/chunk.c         |  2 ++
 net/sctp/inqueue.c       |  2 ++
 net/sctp/ipv6.c          |  4 +++-
 net/sctp/objcnt.c        |  5 +++--
 net/sctp/output.c        |  2 ++
 net/sctp/outqueue.c      | 34 ++++++++++++++++++----------------
 net/sctp/probe.c         |  4 +++-
 net/sctp/protocol.c      | 17 ++++++++---------
 net/sctp/sm_make_chunk.c |  2 ++
 net/sctp/sm_sideeffect.c | 21 ++++++++++-----------
 net/sctp/sm_statefuns.c  | 20 ++++++++++----------
 net/sctp/sm_statetable.c | 42 +++++++++++++++++++++---------------------
 net/sctp/socket.c        | 39 +++++++++++++++------------------------
 net/sctp/transport.c     |  9 +++++----
 16 files changed, 136 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 65946bc43d00..2cb3980b1616 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -275,24 +275,35 @@ struct sctp_mib {
 /* Print debugging messages.  */
 #if SCTP_DEBUG
 extern int sctp_debug_flag;
-#define SCTP_DEBUG_PRINTK(whatever...) \
-	((void) (sctp_debug_flag && printk(KERN_DEBUG whatever)))
-#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \
-	if (sctp_debug_flag) { \
-		if (saddr->sa.sa_family == AF_INET6) { \
-			printk(KERN_DEBUG \
-			       lead "%pI6" trail, \
-			       leadparm, \
-			       &saddr->v6.sin6_addr, \
-			       otherparms); \
-		} else { \
-			printk(KERN_DEBUG \
-			       lead "%pI4" trail, \
-			       leadparm, \
-			       &saddr->v4.sin_addr.s_addr, \
-			       otherparms); \
-		} \
-	}
+#define SCTP_DEBUG_PRINTK(fmt, args...)			\
+do {							\
+	if (sctp_debug_flag)				\
+		printk(KERN_DEBUG pr_fmt(fmt), ##args);	\
+} while (0)
+#define SCTP_DEBUG_PRINTK_CONT(fmt, args...)		\
+do {							\
+	if (sctp_debug_flag)				\
+		pr_cont(fmt, ##args);			\
+} while (0)
+#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail,			\
+				 args_lead, saddr, args_trail...)	\
+do {									\
+	if (sctp_debug_flag) {						\
+		if (saddr->sa.sa_family == AF_INET6) {			\
+			printk(KERN_DEBUG				\
+			       pr_fmt(fmt_lead "%pI6" fmt_trail),	\
+			       args_lead,				\
+			       &saddr->v6.sin6_addr,			\
+			       args_trail);				\
+		} else {						\
+			printk(KERN_DEBUG				\
+			       pr_fmt(fmt_lead "%pI4" fmt_trail),	\
+			       args_lead,				\
+			       &saddr->v4.sin_addr.s_addr,		\
+			       args_trail);				\
+		}							\
+	}								\
+} while (0)
 #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; }
 #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; }
 
@@ -306,6 +317,7 @@ extern int sctp_debug_flag;
 #else	/* SCTP_DEBUG */
 
 #define SCTP_DEBUG_PRINTK(whatever...)
+#define SCTP_DEBUG_PRINTK_CONT(fmt, args...)
 #define SCTP_DEBUG_PRINTK_IPADDR(whatever...)
 #define SCTP_ENABLE_DEBUG
 #define SCTP_DISABLE_DEBUG
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e5256434..5f1fb8bd862d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100ed..6c8556459a75 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15b..397296fb156f 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 #include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb8..95e0c8eda1a0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 		memcpy(saddr, baddr, sizeof(union sctp_addr));
 		SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
 	} else {
-		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
+		pr_err("%s: asoc:%p Could not find a valid source "
 		       "address for the dest:%pI6\n",
 		       __func__, asoc, &daddr->v6.sin6_addr);
 	}
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93ba..8ef8e7d9eb61 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <net/sctp/sctp.h>
 
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
 	ent = proc_create("sctp_dbg_objcnt", 0,
 			  proc_net_sctp, &sctp_objcnt_ops);
 	if (!ent)
-		printk(KERN_WARNING
-			"sctp_dbg_objcnt: Unable to create /proc entry.\n");
+		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
 
 /* Cleanup the objcount entry in the proc filesystem.  */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a646681f5acd..901764b17aee 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb59186..8c6d379b4bb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/list.h>   /* For struct list_head */
 #include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 					/* Display the end of the
 					 * current range.
 					 */
-					SCTP_DEBUG_PRINTK("-%08x",
-							  dbg_last_ack_tsn);
+					SCTP_DEBUG_PRINTK_CONT("-%08x",
+							       dbg_last_ack_tsn);
 				}
 
 				/* Start a new range.  */
-				SCTP_DEBUG_PRINTK(",%08x", tsn);
+				SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
 				dbg_ack_tsn = tsn;
 				break;
 
 			case 1:	/* The last TSN was NOT ACKed. */
 				if (dbg_last_kept_tsn != dbg_kept_tsn) {
 					/* Display the end of current range. */
-					SCTP_DEBUG_PRINTK("-%08x",
-							  dbg_last_kept_tsn);
+					SCTP_DEBUG_PRINTK_CONT("-%08x",
+							       dbg_last_kept_tsn);
 				}
 
-				SCTP_DEBUG_PRINTK("\n");
+				SCTP_DEBUG_PRINTK_CONT("\n");
 
 				/* FALL THROUGH... */
 			default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 					break;
 
 				if (dbg_last_kept_tsn != dbg_kept_tsn)
-					SCTP_DEBUG_PRINTK("-%08x",
-							  dbg_last_kept_tsn);
+					SCTP_DEBUG_PRINTK_CONT("-%08x",
+							       dbg_last_kept_tsn);
 
-				SCTP_DEBUG_PRINTK(",%08x", tsn);
+				SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
 				dbg_kept_tsn = tsn;
 				break;
 
 			case 0:
 				if (dbg_last_ack_tsn != dbg_ack_tsn)
-					SCTP_DEBUG_PRINTK("-%08x",
-							  dbg_last_ack_tsn);
-				SCTP_DEBUG_PRINTK("\n");
+					SCTP_DEBUG_PRINTK_CONT("-%08x",
+							       dbg_last_ack_tsn);
+				SCTP_DEBUG_PRINTK_CONT("\n");
 
 				/* FALL THROUGH... */
 			default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 	switch (dbg_prt_state) {
 	case 0:
 		if (dbg_last_ack_tsn != dbg_ack_tsn) {
-			SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn);
+			SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
 		} else {
-			SCTP_DEBUG_PRINTK("\n");
+			SCTP_DEBUG_PRINTK_CONT("\n");
 		}
 	break;
 
 	case 1:
 		if (dbg_last_kept_tsn != dbg_kept_tsn) {
-			SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn);
+			SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
 		} else {
-			SCTP_DEBUG_PRINTK("\n");
+			SCTP_DEBUG_PRINTK_CONT("\n");
 		}
 	}
 #endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index db3a42b8b349..2e63e9dc010e 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/socket.h>
@@ -192,7 +194,7 @@ static __init int sctpprobe_init(void)
 	if (ret)
 		goto remove_proc;
 
-	pr_info("SCTP probe registered (port=%d)\n", port);
+	pr_info("probe registered (port=%d)\n", port);
 
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc0..f774e657641a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
 					   &init_net);
 
 	if (err < 0) {
-		printk(KERN_ERR
-		       "SCTP: Failed to create the SCTP control socket.\n");
+		pr_err("Failed to create the SCTP control socket\n");
 		return err;
 	}
 	return 0;
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
 					__get_free_pages(GFP_ATOMIC, order);
 	} while (!sctp_assoc_hashtable && --order > 0);
 	if (!sctp_assoc_hashtable) {
-		printk(KERN_ERR "SCTP: Failed association hash alloc.\n");
+		pr_err("Failed association hash alloc\n");
 		status = -ENOMEM;
 		goto err_ahash_alloc;
 	}
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_ep_hashtable = (struct sctp_hashbucket *)
 		kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
 	if (!sctp_ep_hashtable) {
-		printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n");
+		pr_err("Failed endpoint_hash alloc\n");
 		status = -ENOMEM;
 		goto err_ehash_alloc;
 	}
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
 					__get_free_pages(GFP_ATOMIC, order);
 	} while (!sctp_port_hashtable && --order > 0);
 	if (!sctp_port_hashtable) {
-		printk(KERN_ERR "SCTP: Failed bind hash alloc.");
+		pr_err("Failed bind hash alloc\n");
 		status = -ENOMEM;
 		goto err_bhash_alloc;
 	}
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
 		INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
 	}
 
-	printk(KERN_INFO "SCTP: Hash tables configured "
-			 "(established %d bind %d)\n",
+	pr_info("Hash tables configured (established %d bind %d)\n",
 		sctp_assoc_hashsize, sctp_port_hashsize);
 
 	/* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
 
 	/* Initialize the control inode/socket for handling OOTB packets.  */
 	if ((status = sctp_ctl_sock_init())) {
-		printk (KERN_ERR
-			"SCTP: Failed to initialize the SCTP control sock.\n");
+		pr_err("Failed to initialize the SCTP control sock\n");
 		goto err_ctl_sock_init;
 	}
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f92924658..2cc46f0962ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5e..b21b218d564f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 
 	case SCTP_DISPOSITION_VIOLATION:
 		if (net_ratelimit())
-			printk(KERN_ERR "sctp protocol violation state %d "
-			       "chunkid %d\n", state, subtype.chunk);
+			pr_err("protocol violation state %d chunkid %d\n",
+			       state, subtype.chunk);
 		break;
 
 	case SCTP_DISPOSITION_NOT_IMPL:
-		printk(KERN_WARNING "sctp unimplemented feature in state %d, "
-		       "event_type %d, event_id %d\n",
-		       state, event_type, subtype.chunk);
+		pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
+			state, event_type, subtype.chunk);
 		break;
 
 	case SCTP_DISPOSITION_BUG:
-		printk(KERN_ERR "sctp bug in state %d, "
-		       "event_type %d, event_id %d\n",
+		pr_err("bug in state %d, event_type %d, event_id %d\n",
 		       state, event_type, subtype.chunk);
 		BUG();
 		break;
 
 	default:
-		printk(KERN_ERR "sctp impossible disposition %d "
-		       "in state %d, event_type %d, event_id %d\n",
+		pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
 		       status, state, event_type, subtype.chunk);
 		BUG();
 		break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			sctp_cmd_send_asconf(asoc);
 			break;
 		default:
-			printk(KERN_WARNING "Impossible command: %u, %p\n",
-			       cmd->verb, cmd->obj.ptr);
+			pr_warn("Impossible command: %u, %p\n",
+				cmd->verb, cmd->obj.ptr);
 			break;
 		}
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 24b2cd555637..8b284436be65 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 	if (unlikely(!link)) {
 		if (from_addr.sa.sa_family == AF_INET6) {
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				    "%s association %p could not find address %pI6\n",
-				    __func__,
-				    asoc,
-				    &from_addr.v6.sin6_addr);
+				pr_warn("%s association %p could not find address %pI6\n",
+					__func__,
+					asoc,
+					&from_addr.v6.sin6_addr);
 		} else {
 			if (net_ratelimit())
-				printk(KERN_WARNING
-				    "%s association %p could not find address %pI4\n",
-				    __func__,
-				    asoc,
-				    &from_addr.v4.sin_addr.s_addr);
+				pr_warn("%s association %p could not find address %pI4\n",
+					__func__,
+					asoc,
+					&from_addr.v4.sin_addr.s_addr);
 		}
 		return SCTP_DISPOSITION_DISCARD;
 	}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5d..546d4387fb3c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/skbuff.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
 	.name = "sctp_sf_bug"
 };
 
-#define DO_LOOKUP(_max, _type, _table) \
-	if ((event_subtype._type > (_max))) { \
-		printk(KERN_WARNING \
-		       "sctp table %p possible attack:" \
-		       " event %d exceeds max %d\n", \
-		       _table, event_subtype._type, _max); \
-		return &bug; \
-	} \
-	return &_table[event_subtype._type][(int)state];
+#define DO_LOOKUP(_max, _type, _table)					\
+({									\
+	const sctp_sm_table_entry_t *rtn;				\
+									\
+	if ((event_subtype._type > (_max))) {				\
+		pr_warn("table %p possible attack: event %d exceeds max %d\n", \
+			_table, event_subtype._type, _max);		\
+	        rtn = &bug;						\
+	} else								\
+		rtn = &_table[event_subtype._type][(int)state];		\
+									\
+	rtn;								\
+})
 
 const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 						  sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
 		return sctp_chunk_event_lookup(event_subtype.chunk, state);
-		break;
 	case SCTP_EVENT_T_TIMEOUT:
-		DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
-			  timeout_event_table);
-		break;
-
+		return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
+				 timeout_event_table);
 	case SCTP_EVENT_T_OTHER:
-		DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table);
-		break;
-
+		return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
+				 other_event_table);
 	case SCTP_EVENT_T_PRIMITIVE:
-		DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
-			  primitive_event_table);
-		break;
-
+		return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
+				 primitive_event_table);
 	default:
 		/* Yikes!  We got an illegal event type.  */
 		return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ca44917872d2..f4bec2772351 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/wait.h>
@@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
 		if (params.sack_delay == 0 && params.sack_freq == 0)
 			return 0;
 	} else if (optlen == sizeof(struct sctp_assoc_value)) {
-		printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value "
-		       "in delayed_ack socket option deprecated\n");
-		printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
+		pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
+		pr_warn("Use struct sctp_sack_info instead\n");
 		if (copy_from_user(&params, optval, optlen))
 			return -EFAULT;
 
@@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 	int val;
 
 	if (optlen == sizeof(int)) {
-		printk(KERN_WARNING
-		   "SCTP: Use of int in maxseg socket option deprecated\n");
-		printk(KERN_WARNING
-		   "SCTP: Use struct sctp_assoc_value instead\n");
+		pr_warn("Use of int in maxseg socket option deprecated\n");
+		pr_warn("Use struct sctp_assoc_value instead\n");
 		if (copy_from_user(&val, optval, optlen))
 			return -EFAULT;
 		params.assoc_id = 0;
@@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
 	int assoc_id = 0;
 
 	if (optlen == sizeof(int)) {
-		printk(KERN_WARNING
-		   "SCTP: Use of int in max_burst socket option deprecated\n");
-		printk(KERN_WARNING
-		   "SCTP: Use struct sctp_assoc_value instead\n");
+		pr_warn("Use of int in max_burst socket option deprecated\n");
+		pr_warn("Use struct sctp_assoc_value instead\n");
 		if (copy_from_user(&val, optval, optlen))
 			return -EFAULT;
 	} else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -4281,9 +4278,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
 		if (copy_from_user(&params, optval, len))
 			return -EFAULT;
 	} else if (len == sizeof(struct sctp_assoc_value)) {
-		printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value "
-		       "in delayed_ack socket option deprecated\n");
-		printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
+		pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
+		pr_warn("Use struct sctp_sack_info instead\n");
 		if (copy_from_user(&params, optval, len))
 			return -EFAULT;
 	} else
@@ -4929,10 +4925,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
 	struct sctp_association *asoc;
 
 	if (len == sizeof(int)) {
-		printk(KERN_WARNING
-		   "SCTP: Use of int in maxseg socket option deprecated\n");
-		printk(KERN_WARNING
-		   "SCTP: Use struct sctp_assoc_value instead\n");
+		pr_warn("Use of int in maxseg socket option deprecated\n");
+		pr_warn("Use struct sctp_assoc_value instead\n");
 		params.assoc_id = 0;
 	} else if (len >= sizeof(struct sctp_assoc_value)) {
 		len = sizeof(struct sctp_assoc_value);
@@ -5023,10 +5017,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
 	struct sctp_association *asoc;
 
 	if (len == sizeof(int)) {
-		printk(KERN_WARNING
-		   "SCTP: Use of int in max_burst socket option deprecated\n");
-		printk(KERN_WARNING
-		   "SCTP: Use struct sctp_assoc_value instead\n");
+		pr_warn("Use of int in max_burst socket option deprecated\n");
+		pr_warn("Use struct sctp_assoc_value instead\n");
 		params.assoc_id = 0;
 	} else if (len >= sizeof(struct sctp_assoc_value)) {
 		len = sizeof(struct sctp_assoc_value);
@@ -5586,8 +5578,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
 		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
 		if (IS_ERR(tfm)) {
 			if (net_ratelimit()) {
-				printk(KERN_INFO
-				       "SCTP: failed to load transform for %s: %ld\n",
+				pr_info("failed to load transform for %s: %ld\n",
 					sctp_hmac_alg, PTR_ERR(tfm));
 			}
 			return -ENOSYS;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82fc..d3ae493d234a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
  * be incorporated into the next SCTP release.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 	struct dst_entry *dst;
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-		printk(KERN_WARNING "%s: Reported pmtu %d too low, "
-		       "using default minimum of %d\n",
-		       __func__, pmtu,
-		       SCTP_DEFAULT_MINSEGMENT);
+		pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
+			__func__, pmtu,
+			SCTP_DEFAULT_MINSEGMENT);
 		/* Use default minimum segment size and disable
 		 * pmtu discovery on this transport.
 		 */
-- 
cgit v1.2.3


From 480125ba49ba62be93beea37770f266846e077ab Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 26 Aug 2010 13:57:57 -0400
Subject: x86, iommu: Make all IOMMU's detection routines return a value.

We return 1 if the IOMMU has been detected. Zero or an error number
if we failed to find it. This is in preperation of using the IOMMU_INIT
so that we can detect whether an IOMMU is present. I have not
tested this for regression on Calgary, nor on AMD Vi chipsets as
I don't have that hardware.

CC: Muli Ben-Yehuda <muli@il.ibm.com>
CC: "Jon D. Mason" <jdmason@kudzu.us>
CC: "Darrick J. Wong" <djwong@us.ibm.com>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: David Woodhouse <David.Woodhouse@intel.com>
CC: Chris Wright <chrisw@sous-sol.org>
CC: Yinghai Lu <yinghai@kernel.org>
CC: Joerg Roedel <joerg.roedel@amd.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Fujita Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
LKML-Reference: <1282845485-8991-3-git-send-email-konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/amd_iommu.h |  4 ++--
 arch/x86/include/asm/calgary.h   |  4 ++--
 arch/x86/include/asm/gart.h      |  5 +++--
 arch/x86/kernel/amd_iommu_init.c |  8 +++++---
 arch/x86/kernel/aperture_64.c    | 11 +++++++----
 arch/x86/kernel/pci-calgary_64.c | 15 ++++++++-------
 drivers/pci/dmar.c               |  4 +++-
 include/linux/dmar.h             |  6 +++---
 8 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 5af2982133b5..2798142cdb49 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -24,11 +24,11 @@
 
 #ifdef CONFIG_AMD_IOMMU
 
-extern void amd_iommu_detect(void);
+extern int amd_iommu_detect(void);
 
 #else
 
-static inline void amd_iommu_detect(void) { }
+static inline int amd_iommu_detect(void) { return -ENODEV; }
 
 #endif
 
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index 0918654305af..0d467b338835 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -62,9 +62,9 @@ struct cal_chipset_ops {
 extern int use_calgary;
 
 #ifdef CONFIG_CALGARY_IOMMU
-extern void detect_calgary(void);
+extern int detect_calgary(void);
 #else
-static inline void detect_calgary(void) { return; }
+static inline int detect_calgary(void) { return -ENODEV; }
 #endif
 
 #endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 4ac5b0f33fc1..d7d1d4c438a4 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled;
 extern void early_gart_iommu_check(void);
 extern int gart_iommu_init(void);
 extern void __init gart_parse_options(char *);
-extern void gart_iommu_hole_init(void);
+extern int gart_iommu_hole_init(void);
 
 #else
 #define gart_iommu_aperture            0
@@ -50,8 +50,9 @@ static inline void early_gart_iommu_check(void)
 static inline void gart_parse_options(char *options)
 {
 }
-static inline void gart_iommu_hole_init(void)
+static inline int gart_iommu_hole_init(void)
 {
+	return -ENODEV;
 }
 #endif
 
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cc63e2b8dd4..0b9e2dc4fc9a 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1382,13 +1382,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
 	return 0;
 }
 
-void __init amd_iommu_detect(void)
+int __init amd_iommu_detect(void)
 {
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
-		return;
+		return -ENODEV;
 
 	if (amd_iommu_disabled)
-		return;
+		return -ENODEV;
 
 	if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
 		iommu_detected = 1;
@@ -1397,7 +1397,9 @@ void __init amd_iommu_detect(void)
 
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
+		return 1;
 	}
+	return -ENODEV;
 }
 
 /****************************************************************************
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index a2e0caf26e17..afa0dab3302f 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void)
 
 static int __initdata printed_gart_size_msg;
 
-void __init gart_iommu_hole_init(void)
+int __init gart_iommu_hole_init(void)
 {
 	u32 agp_aper_base = 0, agp_aper_order = 0;
 	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
@@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void)
 
 	if (gart_iommu_aperture_disabled || !fix_aperture ||
 	    !early_pci_allowed())
-		return;
+		return -ENODEV;
 
 	printk(KERN_INFO  "Checking aperture...\n");
 
@@ -463,8 +463,9 @@ out:
 			unsigned long n = (32 * 1024 * 1024) << last_aper_order;
 
 			insert_aperture_resource((u32)last_aper_base, n);
+			return 1;
 		}
-		return;
+		return 0;
 	}
 
 	if (!fallback_aper_force) {
@@ -500,7 +501,7 @@ out:
 			panic("Not enough memory for aperture");
 		}
 	} else {
-		return;
+		return 0;
 	}
 
 	/* Fix up the north bridges */
@@ -524,4 +525,6 @@ out:
 	}
 
 	set_up_gart_resume(aper_order, aper_alloc);
+
+	return 1;
 }
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 078d4ec1a9d9..28c6b389fee6 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1364,7 +1364,7 @@ static int __init calgary_iommu_init(void)
 	return 0;
 }
 
-void __init detect_calgary(void)
+int __init detect_calgary(void)
 {
 	int bus;
 	void *tbl;
@@ -1378,13 +1378,13 @@ void __init detect_calgary(void)
 	 * another HW IOMMU already, bail out.
 	 */
 	if (no_iommu || iommu_detected)
-		return;
+		return -ENODEV;
 
 	if (!use_calgary)
-		return;
+		return -ENODEV;
 
 	if (!early_pci_allowed())
-		return;
+		return -ENODEV;
 
 	printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
 
@@ -1410,13 +1410,13 @@ void __init detect_calgary(void)
 	if (!rio_table_hdr) {
 		printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
 		       "in EBDA - bailing!\n");
-		return;
+		return -ENODEV;
 	}
 
 	ret = build_detail_arrays();
 	if (ret) {
 		printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
-		return;
+		return -ENOMEM;
 	}
 
 	specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
@@ -1464,7 +1464,7 @@ void __init detect_calgary(void)
 
 		x86_init.iommu.iommu_init = calgary_iommu_init;
 	}
-	return;
+	return calgary_found;
 
 cleanup:
 	for (--bus; bus >= 0; --bus) {
@@ -1473,6 +1473,7 @@ cleanup:
 		if (info->tce_space)
 			free_tce_table(info->tce_space);
 	}
+	return -ENOMEM;
 }
 
 static int __init calgary_parse_options(char *p)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0a19708074c2..5fa64ea5416f 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -687,7 +687,7 @@ failed:
 	return 0;
 }
 
-void __init detect_intel_iommu(void)
+int __init detect_intel_iommu(void)
 {
 	int ret;
 
@@ -723,6 +723,8 @@ void __init detect_intel_iommu(void)
 	}
 	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
 	dmar_tbl = NULL;
+
+	return (ret ? 1 : -ENODEV);
 }
 
 
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index d7cecc90ed34..a20602041511 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -57,15 +57,15 @@ extern int dmar_table_init(void);
 extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
-extern void detect_intel_iommu(void);
+extern int detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
 
 extern int parse_ioapics_under_ir(void);
 extern int alloc_iommu(struct dmar_drhd_unit *);
 #else
-static inline void detect_intel_iommu(void)
+static inline int detect_intel_iommu(void)
 {
-	return;
+	return -ENODEV;
 }
 
 static inline int dmar_table_init(void)
-- 
cgit v1.2.3


From 04cbe1de6fbda9649a6f25666194e6955d3e717e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 19 Aug 2010 21:29:43 +0100
Subject: vgaarb: Wrap vga_(get|put) in CONFIG_VGA_ARB

Fix link failure without the vga arbitrator.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 6228b5b77d35..e9e1524b582c 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -93,8 +93,11 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
  *     Nested calls are supported (a per-resource counter is maintained)
  */
 
-extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
-											int interruptible);
+#if defined(CONFIG_VGA_ARB)
+extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible);
+#else
+static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { return 0; }
+#endif
 
 /**
  *     vga_get_interruptible
@@ -131,7 +134,11 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev,
  *     are already locked by another card. It can be called in any context
  */
 
+#if defined(CONFIG_VGA_ARB)
 extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
+#else
+static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; }
+#endif
 
 /**
  *     vga_put         - release lock on legacy VGA resources
@@ -146,7 +153,11 @@ extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
  *     released if the counter reaches 0.
  */
 
+#if defined(CONFIG_VGA_ARB)
 extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
+#else
+#define vga_put(pdev, rsrc)
+#endif
 
 
 /**
-- 
cgit v1.2.3


From 40d0802b3eb47d57e2d57a5244a18cbbe9632e13 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 26 Aug 2010 22:03:08 -0700
Subject: gro: __napi_gro_receive() optimizations

compare_ether_header() can have a special implementation on 64 bit
arches if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is defined.

__napi_gro_receive() and vlan_gro_common() can avoid a conditional
branch to perform device match.

On x86_64, __napi_gro_receive() has now 38 instructions instead of 53

As gcc-4.4.3 still choose to not inline it, add inline keyword to this
performance critical function.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 18 +++++++++++++++++-
 net/8021q/vlan_core.c       |  9 ++++++---
 net/core/dev.c              | 10 ++++++----
 3 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2308fbb4523a..fb6aa6070921 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -237,13 +237,29 @@ static inline bool is_etherdev_addr(const struct net_device *dev,
  * entry points.
  */
 
-static inline int compare_ether_header(const void *a, const void *b)
+static inline unsigned long compare_ether_header(const void *a, const void *b)
 {
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+	unsigned long fold;
+
+	/*
+	 * We want to compare 14 bytes:
+	 *  [a0 ... a13] ^ [b0 ... b13]
+	 * Use two long XOR, ORed together, with an overlap of two bytes.
+	 *  [a0  a1  a2  a3  a4  a5  a6  a7 ] ^ [b0  b1  b2  b3  b4  b5  b6  b7 ] |
+	 *  [a6  a7  a8  a9  a10 a11 a12 a13] ^ [b6  b7  b8  b9  b10 b11 b12 b13]
+	 * This means the [a6 a7] ^ [b6 b7] part is done two times.
+	*/
+	fold = *(unsigned long *)a ^ *(unsigned long *)b;
+	fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6);
+	return fold;
+#else
 	u32 *a32 = (u32 *)((u8 *)a + 2);
 	u32 *b32 = (u32 *)((u8 *)b + 2);
 
 	return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) |
 	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]);
+#endif
 }
 
 #endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 07eeb5b99dce..3438c01bbacf 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -105,9 +105,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 		goto drop;
 
 	for (p = napi->gro_list; p; p = p->next) {
-		NAPI_GRO_CB(p)->same_flow =
-			p->dev == skb->dev && !compare_ether_header(
-				skb_mac_header(p), skb_gro_mac_header(skb));
+		unsigned long diffs;
+
+		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= compare_ether_header(skb_mac_header(p),
+					      skb_gro_mac_header(skb));
+		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 859e30ff044a..63bd20a75929 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3169,16 +3169,18 @@ normal:
 }
 EXPORT_SYMBOL(dev_gro_receive);
 
-static gro_result_t
+static inline gro_result_t
 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff *p;
 
 	for (p = napi->gro_list; p; p = p->next) {
-		NAPI_GRO_CB(p)->same_flow =
-			(p->dev == skb->dev) &&
-			!compare_ether_header(skb_mac_header(p),
+		unsigned long diffs;
+
+		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= compare_ether_header(skb_mac_header(p),
 					      skb_gro_mac_header(skb));
+		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
 	}
 
-- 
cgit v1.2.3


From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Aug 2010 13:30:26 +0200
Subject: mac80211: allow scan to complete from any context

The ieee80211_scan_completed() function was a frequent
source of potential deadlocks, since it is called by
drivers but may call back into drivers, so drivers had
to make sure to call it without any locks held, which
frequently lead to more complex code in drivers. Avoid
that problem by allowing the function to be called in
any context, and queueing the actual work it does.
Also update the documentation for it to indicate this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  3 ++-
 net/mac80211/ieee80211_i.h |  6 ++++++
 net/mac80211/scan.c        | 34 ++++++++++++++++++++++++++--------
 3 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dcc8c2bf986e..8f97548b6d80 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
  *
  * When hardware scan offload is used (i.e. the hw_scan() callback is
  * assigned) this function needs to be called by the driver to notify
- * mac80211 that the scan finished.
+ * mac80211 that the scan finished. This function can be called from
+ * any context, including hardirq context.
  *
  * @hw: the hardware that finished the scan
  * @aborted: set to true if scan was aborted
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9e225f01497b..31713320258c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -596,11 +596,17 @@ enum queue_stop_reason {
  *	determine if we are on the operating channel or not
  * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
  *	gets only set in conjunction with SCAN_SW_SCANNING
+ * @SCAN_COMPLETED: Set for our scan work function when the driver reported
+ *	that the scan completed.
+ * @SCAN_ABORTED: Set for our scan work function when the driver reported
+ *	a scan complete for an aborted scan.
  */
 enum {
 	SCAN_SW_SCANNING,
 	SCAN_HW_SCANNING,
 	SCAN_OFF_CHANNEL,
+	SCAN_COMPLETED,
+	SCAN_ABORTED,
 };
 
 /**
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 31f233f7f51a..d60389ba9b95 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -248,13 +248,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	return true;
 }
 
-void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	bool was_hw_scan;
 
-	trace_api_scan_completed(local, aborted);
-
 	mutex_lock(&local->mtx);
 
 	/*
@@ -312,6 +310,18 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	ieee80211_mesh_notify_scan_completed(local);
 	ieee80211_queue_work(&local->hw, &local->work_work);
 }
+
+void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	trace_api_scan_completed(local, aborted);
+
+	set_bit(SCAN_COMPLETED, &local->scanning);
+	if (aborted)
+		set_bit(SCAN_ABORTED, &local->scanning);
+	ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+}
 EXPORT_SYMBOL(ieee80211_scan_completed);
 
 static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -449,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 
 	/* if no more bands/channels left, complete scan and advance to the idle state */
 	if (local->scan_channel_idx >= local->scan_req->n_channels) {
-		ieee80211_scan_completed(&local->hw, false);
+		__ieee80211_scan_completed(&local->hw, false);
 		return 1;
 	}
 
@@ -641,6 +651,14 @@ void ieee80211_scan_work(struct work_struct *work)
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
 	unsigned long next_delay = 0;
 
+	if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
+		bool aborted;
+
+		aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
+		__ieee80211_scan_completed(&local->hw, aborted);
+		return;
+	}
+
 	mutex_lock(&local->mtx);
 	if (!sdata || !local->scan_req) {
 		mutex_unlock(&local->mtx);
@@ -651,7 +669,7 @@ void ieee80211_scan_work(struct work_struct *work)
 		int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 		mutex_unlock(&local->mtx);
 		if (rc)
-			ieee80211_scan_completed(&local->hw, true);
+			__ieee80211_scan_completed(&local->hw, true);
 		return;
 	}
 
@@ -666,7 +684,7 @@ void ieee80211_scan_work(struct work_struct *work)
 		mutex_unlock(&local->mtx);
 
 		if (rc)
-			ieee80211_scan_completed(&local->hw, true);
+			__ieee80211_scan_completed(&local->hw, true);
 		return;
 	}
 
@@ -676,7 +694,7 @@ void ieee80211_scan_work(struct work_struct *work)
 	 * Avoid re-scheduling when the sdata is going away.
 	 */
 	if (!ieee80211_sdata_running(sdata)) {
-		ieee80211_scan_completed(&local->hw, true);
+		__ieee80211_scan_completed(&local->hw, true);
 		return;
 	}
 
@@ -783,5 +801,5 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
 	mutex_unlock(&local->mtx);
 
 	if (abortscan)
-		ieee80211_scan_completed(&local->hw, true);
+		__ieee80211_scan_completed(&local->hw, true);
 }
-- 
cgit v1.2.3


From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Aug 2010 14:26:53 +0300
Subject: cfg80211: allow changing port control protocol

Some vendor specified mechanisms for 802.1X-style
functionality use a different protocol than EAP
(even if EAP is vendor-extensible). Allow setting
the ethertype for the protocol when a driver has
support for this. The default if unspecified is
EAP, of course.

Note: This is suitable only for station mode, not
      for AP implementation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 16 +++++++++++++++-
 include/net/cfg80211.h  | 24 +++++++++++++++++-------
 net/wireless/nl80211.c  | 25 ++++++++++++++++++++++---
 net/wireless/wext-sme.c |  2 ++
 4 files changed, 56 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ec1690da7845..31603e8b5581 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -295,7 +295,9 @@
  *	auth and assoc steps. For this, you need to specify the SSID in a
  *	%NL80211_ATTR_SSID attribute, and can optionally specify the association
  *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC,
- *	%NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT.
+ *	%NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
+ *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE and
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT.
  *	It is also sent as an event, with the BSSID and response IEs when the
  *	connection is established or failed to be established. This can be
  *	determined by the STATUS_CODE attribute.
@@ -686,6 +688,15 @@ enum nl80211_commands {
  *	request, the driver will assume that the port is unauthorized until
  *	authorized by user space. Otherwise, port is marked authorized by
  *	default in station mode.
+ * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the
+ *	ethertype that will be used for key negotiation. It can be
+ *	specified with the associate and connect commands. If it is not
+ *	specified, the value defaults to 0x888E (PAE, 802.1X). This
+ *	attribute is also used as a flag in the wiphy information to
+ *	indicate that protocols other than PAE are supported.
+ * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with
+ *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom
+ *	ethertype frames used for key negotiation must not be encrypted.
  *
  * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
  *	We recommend using nested, driver-specific attributes within this.
@@ -951,6 +962,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_RX_FRAME_TYPES,
 	NL80211_ATTR_FRAME_TYPE,
 
+	NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+	NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f2740537b5d6..4c8c727d0cca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie);
  *	sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
  *	required to assume that the port is unauthorized until authorized by
  *	user space. Otherwise, port is marked authorized by default.
+ * @control_port_ethertype: the control port protocol that should be
+ *	allowed through even on unauthorized ports
+ * @control_port_no_encrypt: TRUE to prevent encryption of control port
+ *	protocol frames.
  */
 struct cfg80211_crypto_settings {
 	u32 wpa_versions;
@@ -772,6 +776,8 @@ struct cfg80211_crypto_settings {
 	int n_akm_suites;
 	u32 akm_suites[NL80211_MAX_NR_AKM_SUITES];
 	bool control_port;
+	__be16 control_port_ethertype;
+	bool control_port_no_encrypt;
 };
 
 /**
@@ -1293,15 +1299,19 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station
  *	on a VLAN interface)
  * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station
+ * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the
+ *	control port protocol ethertype. The device also honours the
+ *	control_port_no_encrypt flag.
  */
 enum wiphy_flags {
-	WIPHY_FLAG_CUSTOM_REGULATORY	= BIT(0),
-	WIPHY_FLAG_STRICT_REGULATORY	= BIT(1),
-	WIPHY_FLAG_DISABLE_BEACON_HINTS	= BIT(2),
-	WIPHY_FLAG_NETNS_OK		= BIT(3),
-	WIPHY_FLAG_PS_ON_BY_DEFAULT	= BIT(4),
-	WIPHY_FLAG_4ADDR_AP		= BIT(5),
-	WIPHY_FLAG_4ADDR_STATION	= BIT(6),
+	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
+	WIPHY_FLAG_STRICT_REGULATORY		= BIT(1),
+	WIPHY_FLAG_DISABLE_BEACON_HINTS		= BIT(2),
+	WIPHY_FLAG_NETNS_OK			= BIT(3),
+	WIPHY_FLAG_PS_ON_BY_DEFAULT		= BIT(4),
+	WIPHY_FLAG_4ADDR_AP			= BIT(5),
+	WIPHY_FLAG_4ADDR_STATION		= BIT(6),
+	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 };
 
 struct mac_address {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 49f5ca35e787..85a23de7bff3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 		.len = sizeof(struct nl80211_sta_flag_update),
 	},
 	[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
+	[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -474,6 +476,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
 		   dev->wiphy.max_num_pmkids);
 
+	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
+
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
 		goto nla_put_failure;
@@ -3691,7 +3696,8 @@ unlock_rtnl:
 	return err;
 }
 
-static int nl80211_crypto_settings(struct genl_info *info,
+static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
+				   struct genl_info *info,
 				   struct cfg80211_crypto_settings *settings,
 				   int cipher_limit)
 {
@@ -3699,6 +3705,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
 
 	settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
 
+	if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
+		u16 proto;
+		proto = nla_get_u16(
+			info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
+		settings->control_port_ethertype = cpu_to_be16(proto);
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+		    proto != ETH_P_PAE)
+			return -EINVAL;
+		if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
+			settings->control_port_no_encrypt = true;
+	} else
+		settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
+
 	if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
 		void *data;
 		int len, i;
@@ -3826,7 +3845,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_PREV_BSSID])
 		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
-	err = nl80211_crypto_settings(info, &crypto, 1);
+	err = nl80211_crypto_settings(rdev, info, &crypto, 1);
 	if (!err)
 		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
 					  ssid, ssid_len, ie, ie_len, use_mfp,
@@ -4303,7 +4322,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 
 	connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
 
-	err = nl80211_crypto_settings(info, &connect.crypto,
+	err = nl80211_crypto_settings(rdev, info, &connect.crypto,
 				      NL80211_MAX_NR_CIPHER_SUITES);
 	if (err)
 		return err;
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8a..6fffe62d7c25 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 	wdev->wext.connect.ssid_len = len;
 
 	wdev->wext.connect.crypto.control_port = false;
+	wdev->wext.connect.crypto.control_port_ethertype =
+					cpu_to_be16(ETH_P_PAE);
 
 	err = cfg80211_mgd_wext_connect(rdev, wdev);
  out:
-- 
cgit v1.2.3


From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Aug 2010 12:35:58 +0200
Subject: mac80211: support runtime interface type changes

Add support to mac80211 for changing the interface
type even when the interface is UP, if the driver
supports it.

To achieve this
 * add a new driver callback for switching,
 * split some of the interface up/down code out
   into new functions (do_open/do_stop), and
 * maintain an own __SDATA_RUNNING bit that will
   not be set during interface type, so that any
   other code doesn't use the interface.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      |   9 +++
 net/mac80211/cfg.c          |   3 -
 net/mac80211/driver-ops.h   |  14 ++++
 net/mac80211/driver-trace.h |  25 +++++++
 net/mac80211/ieee80211_i.h  |  14 +++-
 net/mac80211/iface.c        | 157 ++++++++++++++++++++++++++++++++++----------
 6 files changed, 185 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8f97548b6d80..f91fc331369b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action {
  *	negative error code (which will be seen in userspace.)
  *	Must be implemented and can sleep.
  *
+ * @change_interface: Called when a netdevice changes type. This callback
+ *	is optional, but only if it is supported can interface types be
+ *	switched while the interface is UP. The callback may sleep.
+ *	Note that while an interface is being switched, it will not be
+ *	found by the interface iteration callbacks.
+ *
  * @remove_interface: Notifies a driver that an interface is going down.
  *	The @stop callback is called after this if it is the last interface
  *	and no monitor interfaces are present.
@@ -1693,6 +1699,9 @@ struct ieee80211_ops {
 	void (*stop)(struct ieee80211_hw *hw);
 	int (*add_interface)(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif);
+	int (*change_interface)(struct ieee80211_hw *hw,
+				struct ieee80211_vif *vif,
+				enum nl80211_iftype new_type);
 	void (*remove_interface)(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif);
 	int (*config)(struct ieee80211_hw *hw, u32 changed);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f82b18e996b2..5de1ca3f17b9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -52,9 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	int ret;
 
-	if (ieee80211_sdata_running(sdata))
-		return -EBUSY;
-
 	ret = ieee80211_if_change_type(sdata, type);
 	if (ret)
 		return ret;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544b..6064b7b09e01 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_change_interface(struct ieee80211_local *local,
+				       struct ieee80211_sub_if_data *sdata,
+				       enum nl80211_iftype type)
+{
+	int ret;
+
+	might_sleep();
+
+	trace_drv_change_interface(local, sdata, type);
+	ret = local->ops->change_interface(&local->hw, &sdata->vif, type);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
 static inline void drv_remove_interface(struct ieee80211_local *local,
 					struct ieee80211_vif *vif)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index b5a95582d816..f6f3d89e43fa 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -136,6 +136,31 @@ TRACE_EVENT(drv_add_interface,
 	)
 );
 
+TRACE_EVENT(drv_change_interface,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 enum nl80211_iftype type),
+
+	TP_ARGS(local, sdata, type),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(u32, new_type)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->new_type = type;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT " new type:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type
+	)
+);
+
 TRACE_EVENT(drv_remove_interface,
 	TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f64837788681..d529bd5eab47 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -472,6 +472,16 @@ enum ieee80211_sub_if_data_flags {
 	IEEE80211_SDATA_DONT_BRIDGE_PACKETS	= BIT(3),
 };
 
+/**
+ * enum ieee80211_sdata_state_bits - virtual interface state bits
+ * @SDATA_STATE_RUNNING: virtual interface is up & running; this
+ *	mirrors netif_running() but is separate for interface type
+ *	change handling while the interface is up
+ */
+enum ieee80211_sdata_state_bits {
+	SDATA_STATE_RUNNING,
+};
+
 struct ieee80211_sub_if_data {
 	struct list_head list;
 
@@ -485,6 +495,8 @@ struct ieee80211_sub_if_data {
 
 	unsigned int flags;
 
+	unsigned long state;
+
 	int drop_unencrypted;
 
 	char name[IFNAMSIZ];
@@ -1087,7 +1099,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local);
 
 static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
 {
-	return netif_running(sdata->dev);
+	return test_bit(SDATA_STATE_RUNNING, &sdata->state);
 }
 
 /* tx handling */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index cba3d806d722..c1cc200ac81f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -148,7 +148,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-static int ieee80211_open(struct net_device *dev)
+/*
+ * NOTE: Be very careful when changing this function, it must NOT return
+ * an error on interface type changes that have been pre-checked, so most
+ * checks should be in ieee80211_check_concurrent_iface.
+ */
+static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -157,15 +162,6 @@ static int ieee80211_open(struct net_device *dev)
 	int res;
 	u32 hw_reconf_flags = 0;
 
-	/* fail early if user set an invalid address */
-	if (!is_zero_ether_addr(dev->dev_addr) &&
-	    !is_valid_ether_addr(dev->dev_addr))
-		return -EADDRNOTAVAIL;
-
-	res = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
-	if (res)
-		return res;
-
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_WDS:
 		if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -258,9 +254,11 @@ static int ieee80211_open(struct net_device *dev)
 		netif_carrier_on(dev);
 		break;
 	default:
-		res = drv_add_interface(local, &sdata->vif);
-		if (res)
-			goto err_stop;
+		if (coming_up) {
+			res = drv_add_interface(local, &sdata->vif);
+			if (res)
+				goto err_stop;
+		}
 
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
 			local->fif_other_bss++;
@@ -316,7 +314,9 @@ static int ieee80211_open(struct net_device *dev)
 	hw_reconf_flags |= __ieee80211_recalc_idle(local);
 	mutex_unlock(&local->mtx);
 
-	local->open_count++;
+	if (coming_up)
+		local->open_count++;
+
 	if (hw_reconf_flags) {
 		ieee80211_hw_config(local, hw_reconf_flags);
 		/*
@@ -331,6 +331,8 @@ static int ieee80211_open(struct net_device *dev)
 
 	netif_tx_start_all_queues(dev);
 
+	set_bit(SDATA_STATE_RUNNING, &sdata->state);
+
 	return 0;
  err_del_interface:
 	drv_remove_interface(local, &sdata->vif);
@@ -344,19 +346,38 @@ static int ieee80211_open(struct net_device *dev)
 	return res;
 }
 
-static int ieee80211_stop(struct net_device *dev)
+static int ieee80211_open(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	int err;
+
+	/* fail early if user set an invalid address */
+	if (!is_zero_ether_addr(dev->dev_addr) &&
+	    !is_valid_ether_addr(dev->dev_addr))
+		return -EADDRNOTAVAIL;
+
+	err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
+	if (err)
+		return err;
+
+	return ieee80211_do_open(dev, true);
+}
+
+static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
+			      bool going_down)
+{
 	struct ieee80211_local *local = sdata->local;
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
 	int i;
 
+	clear_bit(SDATA_STATE_RUNNING, &sdata->state);
+
 	/*
 	 * Stop TX on this interface first.
 	 */
-	netif_tx_stop_all_queues(dev);
+	netif_tx_stop_all_queues(sdata->dev);
 
 	/*
 	 * Purge work for this interface.
@@ -394,11 +415,12 @@ static int ieee80211_stop(struct net_device *dev)
 	if (sdata->vif.type == NL80211_IFTYPE_AP)
 		local->fif_pspoll--;
 
-	netif_addr_lock_bh(dev);
+	netif_addr_lock_bh(sdata->dev);
 	spin_lock_bh(&local->filter_lock);
-	__hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
+	__hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
+			 sdata->dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
-	netif_addr_unlock_bh(dev);
+	netif_addr_unlock_bh(sdata->dev);
 
 	ieee80211_configure_filter(local);
 
@@ -432,7 +454,8 @@ static int ieee80211_stop(struct net_device *dev)
 		WARN_ON(!list_empty(&sdata->u.ap.vlans));
 	}
 
-	local->open_count--;
+	if (going_down)
+		local->open_count--;
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP_VLAN:
@@ -504,7 +527,8 @@ static int ieee80211_stop(struct net_device *dev)
 		 */
 		ieee80211_free_keys(sdata);
 
-		drv_remove_interface(local, &sdata->vif);
+		if (going_down)
+			drv_remove_interface(local, &sdata->vif);
 	}
 
 	sdata->bss = NULL;
@@ -540,6 +564,13 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+}
+
+static int ieee80211_stop(struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	ieee80211_do_stop(sdata, true);
 
 	return 0;
 }
@@ -857,9 +888,72 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	ieee80211_debugfs_add_netdev(sdata);
 }
 
+static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
+					   enum nl80211_iftype type)
+{
+	struct ieee80211_local *local = sdata->local;
+	int ret, err;
+
+	ASSERT_RTNL();
+
+	if (!local->ops->change_interface)
+		return -EBUSY;
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+		/*
+		 * Could maybe also all others here?
+		 * Just not sure how that interacts
+		 * with the RX/config path e.g. for
+		 * mesh.
+		 */
+		break;
+	default:
+		return -EBUSY;
+	}
+
+	switch (type) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+		/*
+		 * Could probably support everything
+		 * but WDS here (WDS do_open can fail
+		 * under memory pressure, which this
+		 * code isn't prepared to handle).
+		 */
+		break;
+	default:
+		return -EBUSY;
+	}
+
+	ret = ieee80211_check_concurrent_iface(sdata, type);
+	if (ret)
+		return ret;
+
+	ieee80211_do_stop(sdata, false);
+
+	ieee80211_teardown_sdata(sdata->dev);
+
+	ret = drv_change_interface(local, sdata, type);
+	if (ret)
+		type = sdata->vif.type;
+
+	ieee80211_setup_sdata(sdata, type);
+
+	err = ieee80211_do_open(sdata->dev, false);
+	WARN(err, "type change: do_open returned %d", err);
+
+	return ret;
+}
+
 int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 			     enum nl80211_iftype type)
 {
+	int ret;
+
 	ASSERT_RTNL();
 
 	if (type == sdata->vif.type)
@@ -870,18 +964,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	    type == NL80211_IFTYPE_ADHOC)
 		return -EOPNOTSUPP;
 
-	/*
-	 * We could, here, on changes between IBSS/STA/MESH modes,
-	 * invoke an MLME function instead that disassociates etc.
-	 * and goes into the requested mode.
-	 */
-
-	if (ieee80211_sdata_running(sdata))
-		return -EBUSY;
-
-	/* Purge and reset type-dependent state. */
-	ieee80211_teardown_sdata(sdata->dev);
-	ieee80211_setup_sdata(sdata, type);
+	if (ieee80211_sdata_running(sdata)) {
+		ret = ieee80211_runtime_change_iftype(sdata, type);
+		if (ret)
+			return ret;
+	} else {
+		/* Purge and reset type-dependent state. */
+		ieee80211_teardown_sdata(sdata->dev);
+		ieee80211_setup_sdata(sdata, type);
+	}
 
 	/* reset some values that shouldn't be kept across type changes */
 	sdata->vif.bss_conf.basic_rates =
-- 
cgit v1.2.3


From 7950c407c0288b223a200c1bba8198941599ca37 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 25 Aug 2010 13:39:14 -0700
Subject: memblock: Add memblock_free/reserve_reserved_regions()

So we can avoid export memblock_reserved_init_regions()
Suggested by Ben.

-v2: use __init_memblock attribute

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/memblock.h |  2 ++
 mm/memblock.c            | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4df09bdcae42..7d285271130d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -48,6 +48,8 @@ extern int memblock_can_resize;
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
 u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
+int memblock_free_reserved_regions(void);
+int memblock_reserve_reserved_regions(void);
 
 extern void __init memblock_init(void);
 extern void __init memblock_analyze(void);
diff --git a/mm/memblock.c b/mm/memblock.c
index b7ab10a2ef46..65e3ba8d09fb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -170,6 +170,30 @@ u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 ali
 	return memblock_find_base(size, align, start, end);
 }
 
+/*
+ * Free memblock.reserved.regions
+ */
+int __init_memblock memblock_free_reserved_regions(void)
+{
+	if (memblock.reserved.regions == memblock_reserved_init_regions)
+		return 0;
+
+	return memblock_free(__pa(memblock.reserved.regions),
+		 sizeof(struct memblock_region) * memblock.reserved.max);
+}
+
+/*
+ * Reserve memblock.reserved.regions
+ */
+int __init_memblock memblock_reserve_reserved_regions(void)
+{
+	if (memblock.reserved.regions == memblock_reserved_init_regions)
+		return 0;
+
+	return memblock_reserve(__pa(memblock.reserved.regions),
+		 sizeof(struct memblock_region) * memblock.reserved.max);
+}
+
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
 	unsigned long i;
-- 
cgit v1.2.3


From edbe7d23b4482e7f33179290bcff3b1feae1c5f3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 25 Aug 2010 13:39:16 -0700
Subject: memblock: Add find_memory_core_early()

According to node range in early_node_map[] with __memblock_find_in_range
to find free range.

Will be used by memblock_x86_find_in_range_node()

memblock_x86_find_in_range_node will be used to find right buffer for NODE_DATA

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h |  2 ++
 mm/page_alloc.c    | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2b48041b910..993e85f0afcb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1164,6 +1164,8 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 int add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid);
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit);
 void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
 				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9bd339eb04c6..8c9b34674d83 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -21,6 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/kmemcheck.h>
@@ -3612,6 +3613,41 @@ void __init free_bootmem_with_active_regions(int nid,
 	}
 }
 
+#ifdef CONFIG_HAVE_MEMBLOCK
+u64 __init find_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	int i;
+
+	/* Need to go over early_node_map to find out good range for node */
+	for_each_active_range_index_in_nid(i, nid) {
+		u64 addr;
+		u64 ei_start, ei_last;
+		u64 final_start, final_end;
+
+		ei_last = early_node_map[i].end_pfn;
+		ei_last <<= PAGE_SHIFT;
+		ei_start = early_node_map[i].start_pfn;
+		ei_start <<= PAGE_SHIFT;
+
+		final_start = max(ei_start, goal);
+		final_end = min(ei_last, limit);
+
+		if (final_start >= final_end)
+			continue;
+
+		addr = memblock_find_in_range(final_start, final_end, size, align);
+
+		if (addr == MEMBLOCK_ERROR)
+			continue;
+
+		return addr;
+	}
+
+	return MEMBLOCK_ERROR;
+}
+#endif
+
 int __init add_from_early_node_map(struct range *range, int az,
 				   int nr_range, int nid)
 {
-- 
cgit v1.2.3


From a587d2daebcd2bc159d4348b6a7b028950a6d803 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 25 Aug 2010 13:39:18 -0700
Subject: x86: Remove not used early_res code

and some functions in e820.c that are not used anymore

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/e820.h |  14 --
 arch/x86/kernel/e820.c      |  52 ----
 include/linux/early_res.h   |  23 --
 kernel/Makefile             |   1 -
 kernel/early_res.c          | 590 --------------------------------------------
 5 files changed, 680 deletions(-)
 delete mode 100644 include/linux/early_res.h
 delete mode 100644 kernel/early_res.c

(limited to 'include')

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 388fed291467..718646384e03 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -112,31 +112,17 @@ static inline void early_memtest(unsigned long start, unsigned long end)
 }
 #endif
 
-extern unsigned long end_user_pfn;
-
-extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
-extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
-extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
-
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
-extern void e820_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long end_pfn);
-extern u64 e820_hole_size(u64 start, u64 end);
-
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
 void memblock_x86_fill(void);
-
 extern void finish_e820_parsing(void);
 extern void e820_reserve_resources(void);
 extern void e820_reserve_resources_late(void);
 extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
 
-void reserve_early(u64 start, u64 end, char *name);
-void free_early(u64 start, u64 end);
-
 /*
  * Returns true iff the specified range [s,e) is completely contained inside
  * the ISA region.
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a9221d18a5ed..d5fd89462d79 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,32 +738,6 @@ static int __init e820_mark_nvs_memory(void)
 core_initcall(e820_mark_nvs_memory);
 #endif
 
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
-{
-	u64 mem = memblock_find_in_range(start, end, size, align);
-
-	if (mem == MEMBLOCK_ERROR)
-		return -1ULL;
-
-	return mem;
-}
-
-/*
- * Find next free range after *start
- */
-u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
-{
-	u64 mem = memblock_x86_find_in_range_size(start, sizep, align);
-
-	if (mem == MEMBLOCK_ERROR)
-		return -1ULL
-
-	return mem;
-}
-
 /*
  * pre allocated 4k and reserved it in memblock and e820_saved
  */
@@ -856,32 +830,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void)
 	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
 }
 
-/* Walk the e820 map and register active regions within a node */
-void __init e820_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long last_pfn)
-{
-	memblock_x86_register_active_regions(nid, start_pfn, last_pfn);
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-u64 __init e820_hole_size(u64 start, u64 end)
-{
-	return memblock_x86_hole_size(start, end);
-}
-
-void reserve_early(u64 start, u64 end, char *name)
-{
-	memblock_x86_reserve_range(start, end, name);
-}
-void free_early(u64 start, u64 end)
-{
-	memblock_x86_free_range(start, end);
-}
-
 static void early_panic(char *msg)
 {
 	early_printk(msg);
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
deleted file mode 100644
index 29c09f57a13c..000000000000
--- a/include/linux/early_res.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _LINUX_EARLY_RES_H
-#define _LINUX_EARLY_RES_H
-#ifdef __KERNEL__
-
-extern void reserve_early(u64 start, u64 end, char *name);
-extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
-extern void free_early(u64 start, u64 end);
-void free_early_partial(u64 start, u64 end);
-extern void early_res_to_bootmem(u64 start, u64 end);
-
-void reserve_early_without_check(u64 start, u64 end, char *name);
-u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align);
-u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align);
-u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
-u64 get_max_mapped(void);
-#include <linux/range.h>
-int get_free_all_memory_range(struct range **rangep, int nodeid);
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_EARLY_RES_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 057472fbc272..80e61c3e44ae 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,7 +11,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
 	    async.o range.o
-obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/early_res.c b/kernel/early_res.c
deleted file mode 100644
index 7bfae887f211..000000000000
--- a/kernel/early_res.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * early_res, could be used to replace bootmem
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/early_res.h>
-#include <linux/slab.h>
-#include <linux/kmemleak.h>
-
-/*
- * Early reserved memory areas.
- */
-/*
- * need to make sure this one is bigger enough before
- * find_fw_memmap_area could be used
- */
-#define MAX_EARLY_RES_X 32
-
-struct early_res {
-	u64 start, end;
-	char name[15];
-	char overlap_ok;
-};
-static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
-
-static int max_early_res __initdata = MAX_EARLY_RES_X;
-static struct early_res *early_res __initdata = &early_res_x[0];
-static int early_res_count __initdata;
-
-static int __init find_overlapped_early(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-		if (end > r->start && start < r->end)
-			break;
-	}
-
-	return i;
-}
-
-/*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
- * clearing what had been the last slot.
- */
-static void __init drop_range(int i)
-{
-	int j;
-
-	for (j = i + 1; j < max_early_res && early_res[j].end; j++)
-		;
-
-	memmove(&early_res[i], &early_res[i + 1],
-	       (j - 1 - i) * sizeof(struct early_res));
-
-	early_res[j - 1].end = 0;
-	early_res_count--;
-}
-
-static void __init drop_range_partial(int i, u64 start, u64 end)
-{
-	u64 common_start, common_end;
-	u64 old_start, old_end;
-
-	old_start = early_res[i].start;
-	old_end = early_res[i].end;
-	common_start = max(old_start, start);
-	common_end = min(old_end, end);
-
-	/* no overlap ? */
-	if (common_start >= common_end)
-		return;
-
-	if (old_start < common_start) {
-		/* make head segment */
-		early_res[i].end = common_start;
-		if (old_end > common_end) {
-			char name[15];
-
-			/*
-			 * Save a local copy of the name, since the
-			 * early_res array could get resized inside
-			 * reserve_early_without_check() ->
-			 * __check_and_double_early_res(), which would
-			 * make the current name pointer invalid.
-			 */
-			strncpy(name, early_res[i].name,
-					 sizeof(early_res[i].name) - 1);
-			/* add another for left over on tail */
-			reserve_early_without_check(common_end, old_end, name);
-		}
-		return;
-	} else {
-		if (old_end > common_end) {
-			/* reuse the entry for tail left */
-			early_res[i].start = common_end;
-			return;
-		}
-		/* all covered */
-		drop_range(i);
-	}
-}
-
-/*
- * Split any existing ranges that:
- *  1) are marked 'overlap_ok', and
- *  2) overlap with the stated range [start, end)
- * into whatever portion (if any) of the existing range is entirely
- * below or entirely above the stated range.  Drop the portion
- * of the existing range that overlaps with the stated range,
- * which will allow the caller of this routine to then add that
- * stated range without conflicting with any existing range.
- */
-static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-	u64 lower_start, lower_end;
-	u64 upper_start, upper_end;
-	char name[15];
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-
-		/* Continue past non-overlapping ranges */
-		if (end <= r->start || start >= r->end)
-			continue;
-
-		/*
-		 * Leave non-ok overlaps as is; let caller
-		 * panic "Overlapping early reservations"
-		 * when it hits this overlap.
-		 */
-		if (!r->overlap_ok)
-			return;
-
-		/*
-		 * We have an ok overlap.  We will drop it from the early
-		 * reservation map, and add back in any non-overlapping
-		 * portions (lower or upper) as separate, overlap_ok,
-		 * non-overlapping ranges.
-		 */
-
-		/* 1. Note any non-overlapping (lower or upper) ranges. */
-		strncpy(name, r->name, sizeof(name) - 1);
-
-		lower_start = lower_end = 0;
-		upper_start = upper_end = 0;
-		if (r->start < start) {
-			lower_start = r->start;
-			lower_end = start;
-		}
-		if (r->end > end) {
-			upper_start = end;
-			upper_end = r->end;
-		}
-
-		/* 2. Drop the original ok overlapping range */
-		drop_range(i);
-
-		i--;		/* resume for-loop on copied down entry */
-
-		/* 3. Add back in any non-overlapping ranges. */
-		if (lower_end)
-			reserve_early_overlap_ok(lower_start, lower_end, name);
-		if (upper_end)
-			reserve_early_overlap_ok(upper_start, upper_end, name);
-	}
-}
-
-static void __init __reserve_early(u64 start, u64 end, char *name,
-						int overlap_ok)
-{
-	int i;
-	struct early_res *r;
-
-	i = find_overlapped_early(start, end);
-	if (i >= max_early_res)
-		panic("Too many early reservations");
-	r = &early_res[i];
-	if (r->end)
-		panic("Overlapping early reservations "
-		      "%llx-%llx %s to %llx-%llx %s\n",
-		      start, end - 1, name ? name : "", r->start,
-		      r->end - 1, r->name);
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = overlap_ok;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-/*
- * A few early reservtations come here.
- *
- * The 'overlap_ok' in the name of this routine does -not- mean it
- * is ok for these reservations to overlap an earlier reservation.
- * Rather it means that it is ok for subsequent reservations to
- * overlap this one.
- *
- * Use this entry point to reserve early ranges when you are doing
- * so out of "Paranoia", reserving perhaps more memory than you need,
- * just in case, and don't mind a subsequent overlapping reservation
- * that is known to be needed.
- *
- * The drop_overlaps_that_are_ok() call here isn't really needed.
- * It would be needed if we had two colliding 'overlap_ok'
- * reservations, so that the second such would not panic on the
- * overlap with the first.  We don't have any such as of this
- * writing, but might as well tolerate such if it happens in
- * the future.
- */
-void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
-{
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 1);
-}
-
-static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
-{
-	u64 start, end, size, mem;
-	struct early_res *new;
-
-	/* do we have enough slots left ? */
-	if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
-		return;
-
-	/* double it */
-	mem = -1ULL;
-	size = sizeof(struct early_res) * max_early_res * 2;
-	if (early_res == early_res_x)
-		start = 0;
-	else
-		start = early_res[0].end;
-	end = ex_start;
-	if (start + size < end)
-		mem = find_fw_memmap_area(start, end, size,
-					 sizeof(struct early_res));
-	if (mem == -1ULL) {
-		start = ex_end;
-		end = get_max_mapped();
-		if (start + size < end)
-			mem = find_fw_memmap_area(start, end, size,
-						 sizeof(struct early_res));
-	}
-	if (mem == -1ULL)
-		panic("can not find more space for early_res array");
-
-	new = __va(mem);
-	/* save the first one for own */
-	new[0].start = mem;
-	new[0].end = mem + size;
-	new[0].overlap_ok = 0;
-	/* copy old to new */
-	if (early_res == early_res_x) {
-		memcpy(&new[1], &early_res[0],
-			 sizeof(struct early_res) * max_early_res);
-		memset(&new[max_early_res+1], 0,
-			 sizeof(struct early_res) * (max_early_res - 1));
-		early_res_count++;
-	} else {
-		memcpy(&new[1], &early_res[1],
-			 sizeof(struct early_res) * (max_early_res - 1));
-		memset(&new[max_early_res], 0,
-			 sizeof(struct early_res) * max_early_res);
-	}
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = new;
-	max_early_res *= 2;
-	printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
-		max_early_res, mem, mem + size - 1);
-}
-
-/*
- * Most early reservations come here.
- *
- * We first have drop_overlaps_that_are_ok() drop any pre-existing
- * 'overlap_ok' ranges, so that we can then reserve this memory
- * range without risk of panic'ing on an overlapping overlap_ok
- * early reservation.
- */
-void __init reserve_early(u64 start, u64 end, char *name)
-{
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 0);
-}
-
-void __init reserve_early_without_check(u64 start, u64 end, char *name)
-{
-	struct early_res *r;
-
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	r = &early_res[early_res_count];
-
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = 0;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-void __init free_early(u64 start, u64 end)
-{
-	struct early_res *r;
-	int i;
-
-	kmemleak_free_part(__va(start), end - start);
-
-	i = find_overlapped_early(start, end);
-	r = &early_res[i];
-	if (i >= max_early_res || r->end != end || r->start != start)
-		panic("free_early on not reserved area: %llx-%llx!",
-			 start, end - 1);
-
-	drop_range(i);
-}
-
-void __init free_early_partial(u64 start, u64 end)
-{
-	struct early_res *r;
-	int i;
-
-	kmemleak_free_part(__va(start), end - start);
-
-	if (start == end)
-		return;
-
-	if (WARN_ONCE(start > end, "  wrong range [%#llx, %#llx]\n", start, end))
-		return;
-
-try_next:
-	i = find_overlapped_early(start, end);
-	if (i >= max_early_res)
-		return;
-
-	r = &early_res[i];
-	/* hole ? */
-	if (r->end >= end && r->start <= start) {
-		drop_range_partial(i, start, end);
-		return;
-	}
-
-	drop_range_partial(i, start, end);
-	goto try_next;
-}
-
-#ifdef CONFIG_NO_BOOTMEM
-static void __init subtract_early_res(struct range *range, int az)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-#define DEBUG_PRINT_EARLY_RES 1
-
-#if DEBUG_PRINT_EARLY_RES
-	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
-#endif
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-#if DEBUG_PRINT_EARLY_RES
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s\n", i,
-			r->start, r->end, r->name);
-#endif
-		final_start = PFN_DOWN(r->start);
-		final_end = PFN_UP(r->end);
-		if (final_start >= final_end)
-			continue;
-		subtract_range(range, az, final_start, final_end);
-	}
-
-}
-
-int __init get_free_all_memory_range(struct range **rangep, int nodeid)
-{
-	int i, count;
-	u64 start = 0, end;
-	u64 size;
-	u64 mem;
-	struct range *range;
-	int nr_range;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	count *= 2;
-
-	size = sizeof(struct range) * count;
-	end = get_max_mapped();
-#ifdef MAX_DMA32_PFN
-	if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
-		start = MAX_DMA32_PFN << PAGE_SHIFT;
-#endif
-	mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
-	if (mem == -1ULL)
-		panic("can not find more space for range free");
-
-	range = __va(mem);
-	/* use early_node_map[] and early_res to get range array at first */
-	memset(range, 0, size);
-	nr_range = 0;
-
-	/* need to go over early_node_map to find out good range for node */
-	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
-#ifdef CONFIG_X86_32
-	subtract_range(range, count, max_low_pfn, -1ULL);
-#endif
-	subtract_early_res(range, count);
-	nr_range = clean_sort_range(range, count);
-
-	/* need to clear it ? */
-	if (nodeid == MAX_NUMNODES) {
-		memset(&early_res[0], 0,
-			 sizeof(struct early_res) * max_early_res);
-		early_res = NULL;
-		max_early_res = 0;
-	}
-
-	*rangep = range;
-	return nr_range;
-}
-#else
-void __init early_res_to_bootmem(u64 start, u64 end)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-	printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
-			 count - idx, max_early_res, start, end);
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
-			r->start, r->end, r->name);
-		final_start = max(start, r->start);
-		final_end = min(end, r->end);
-		if (final_start >= final_end) {
-			printk(KERN_CONT "\n");
-			continue;
-		}
-		printk(KERN_CONT " ==> [%010llx - %010llx]\n",
-			final_start, final_end);
-		reserve_bootmem_generic(final_start, final_end - final_start,
-				BOOTMEM_DEFAULT);
-	}
-	/* clear them */
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = NULL;
-	max_early_res = 0;
-	early_res_count = 0;
-}
-#endif
-
-/* Check for already reserved areas */
-static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
-{
-	int i;
-	u64 addr = *addrp;
-	int changed = 0;
-	struct early_res *r;
-again:
-	i = find_overlapped_early(addr, addr + size);
-	r = &early_res[i];
-	if (i < max_early_res && r->end) {
-		*addrp = addr = round_up(r->end, align);
-		changed = 1;
-		goto again;
-	}
-	return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
-{
-	int i;
-	u64 addr = *addrp, last;
-	u64 size = *sizep;
-	int changed = 0;
-again:
-	last = addr + size;
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		if (last > r->start && addr < r->start) {
-			size = r->start - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last > r->end && addr < r->end) {
-			addr = round_up(r->end, align);
-			size = last - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last <= r->end && addr >= r->start) {
-			(*sizep)++;
-			return 0;
-		}
-	}
-	if (changed) {
-		*addrp = addr;
-		*sizep = size;
-	}
-	return changed;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- * only with the area.between start to end is active range from early_node_map
- * so they are good as RAM
- */
-u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-		;
-	last = addr + size;
-	if (last > ei_last)
-		goto out;
-	if (last > end)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
-
-u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	*sizep = ei_last - addr;
-	while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
-		;
-	last = addr + *sizep;
-	if (last > ei_last)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
-- 
cgit v1.2.3


From 0fb85621df4f9f7c663c6c77c302e821a832c95e Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@sophos.com>
Date: Fri, 20 Aug 2010 10:02:15 +0100
Subject: fanotify: resize pid and reorder structure

resize pid and reorder the fanotify_event_metadata so it is naturally
aligned and we can work towards dropping the packed attributed

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@sophos.com>
Cc: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 985435622ecd..63531a6b4d2a 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -65,14 +65,14 @@
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
 
-#define FANOTIFY_METADATA_VERSION	1
+#define FANOTIFY_METADATA_VERSION	2
 
 struct fanotify_event_metadata {
 	__u32 event_len;
 	__u32 vers;
-	__s32 fd;
 	__u64 mask;
-	__s64 pid;
+	__s32 fd;
+	__s32 pid;
 } __attribute__ ((packed));
 
 struct fanotify_response {
-- 
cgit v1.2.3


From bad849b3dc0fae1297c8d47f846f8d202a6145ed Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Aug 2010 16:00:34 +0100
Subject: NOMMU: Stub out vm_get_page_prot() if there's no MMU

Stub out vm_get_page_prot() if there's no MMU.

This was added by commit 804af2cf6e7a ("[AGPGART] remove private page
protection map") and is used in commit c07fbfd17e61 ("fbmem: VM_IO set,
but not propagated") in the fbmem video driver, but the function doesn't
exist on NOMMU, resulting in an undefined symbol at link time.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 831c693416b2..e6b1210772ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1363,7 +1363,15 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+#ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
+#else
+static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	return __pgprot(0);
+}
+#endif
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
-- 
cgit v1.2.3


From 1da3f87ebb7edb3e0b829ec4bbe5fb3d9d93986f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:24 +0200
Subject: drm: kill kernel_context_switch callbacks

Not used by any in-kernel driver. So drop it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_lock.c | 18 ------------------
 include/drm/drmP.h         |  3 ---
 2 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 9bf93bc9a32c..609f2d504f72 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -136,12 +136,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		}
 	}
 
-	if (dev->driver->kernel_context_switch &&
-	    dev->last_context != lock->context) {
-		dev->driver->kernel_context_switch(dev, dev->last_context,
-						   lock->context);
-	}
-
 	return 0;
 }
 
@@ -159,7 +153,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_lock *lock = data;
-	struct drm_master *master = file_priv->master;
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
@@ -169,17 +162,6 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
 	atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]);
 
-	/* kernel_context_switch isn't used by any of the x86 drm
-	 * modules but is required by the Sparc driver.
-	 */
-	if (dev->driver->kernel_context_switch_unlock)
-		dev->driver->kernel_context_switch_unlock(dev);
-	else {
-		if (drm_lock_free(&master->lock, lock->context)) {
-			/* FIXME: Should really bail out here. */
-		}
-	}
-
 	unblock_all_signals();
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7809d230adee..15ea8c44f28d 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -703,9 +703,6 @@ struct drm_driver {
 	int (*dma_quiescent) (struct drm_device *);
 	int (*context_ctor) (struct drm_device *dev, int context);
 	int (*context_dtor) (struct drm_device *dev, int context);
-	int (*kernel_context_switch) (struct drm_device *dev, int old,
-				      int new);
-	void (*kernel_context_switch_unlock) (struct drm_device *dev);
 
 	/**
 	 * get_vblank_counter - get raw hardware vblank counter
-- 
cgit v1.2.3


From be72ae26b11478c00c64858c86b647b438791671 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:26 +0200
Subject: drm: kill procfs callbacks

Not used by any driver (rightly so!). Kill them.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_proc.c | 13 -------------
 include/drm/drmP.h         |  2 --
 2 files changed, 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index a9ba6b69ad35..e571de536dc5 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -151,7 +151,6 @@ fail:
 int drm_proc_init(struct drm_minor *minor, int minor_id,
 		  struct proc_dir_entry *root)
 {
-	struct drm_device *dev = minor->dev;
 	char name[64];
 	int ret;
 
@@ -172,14 +171,6 @@ int drm_proc_init(struct drm_minor *minor, int minor_id,
 		return ret;
 	}
 
-	if (dev->driver->proc_init) {
-		ret = dev->driver->proc_init(minor);
-		if (ret) {
-			DRM_ERROR("DRM: Driver failed to initialize "
-				  "/proc/dri.\n");
-			return ret;
-		}
-	}
 	return 0;
 }
 
@@ -216,15 +207,11 @@ int drm_proc_remove_files(struct drm_info_list *files, int count,
  */
 int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root)
 {
-	struct drm_device *dev = minor->dev;
 	char name[64];
 
 	if (!root || !minor->proc_root)
 		return 0;
 
-	if (dev->driver->proc_cleanup)
-		dev->driver->proc_cleanup(minor);
-
 	drm_proc_remove_files(drm_proc_list, DRM_PROC_ENTRIES, minor);
 
 	sprintf(name, "%d", minor->index);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 15ea8c44f28d..0d7af3f39652 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -792,8 +792,6 @@ struct drm_driver {
 	void (*master_drop)(struct drm_device *dev, struct drm_file *file_priv,
 			    bool from_release);
 
-	int (*proc_init)(struct drm_minor *minor);
-	void (*proc_cleanup)(struct drm_minor *minor);
 	int (*debugfs_init)(struct drm_minor *minor);
 	void (*debugfs_cleanup)(struct drm_minor *minor);
 
-- 
cgit v1.2.3


From 23ddc0243d7313942b94f1a2e44e6394f7bb996e Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:25 +0200
Subject: drm: kill dma_ready callbacks

Not used by any driver. So drop it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_lock.c | 3 ---
 include/drm/drmP.h         | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 609f2d504f72..d9146f240d33 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -124,9 +124,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask);
 	}
 
-	if (dev->driver->dma_ready && (lock->flags & _DRM_LOCK_READY))
-		dev->driver->dma_ready(dev);
-
 	if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
 	{
 		if (dev->driver->dma_quiescent(dev)) {
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0d7af3f39652..d5a2b8869246 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -699,7 +699,6 @@ struct drm_driver {
 	int (*suspend) (struct drm_device *, pm_message_t state);
 	int (*resume) (struct drm_device *);
 	int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv);
-	void (*dma_ready) (struct drm_device *);
 	int (*dma_quiescent) (struct drm_device *);
 	int (*context_ctor) (struct drm_device *dev, int context);
 	int (*context_dtor) (struct drm_device *dev, int context);
-- 
cgit v1.2.3


From fd2e7931cdefa8e9acf63f0a4efd61ae0f89e77b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:33 +0200
Subject: drm: kill gem_free_object_unlocked driver callback

Not used by any current driver.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c | 4 +---
 include/drm/drmP.h        | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index bf92d07510df..cff7317d3830 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -474,9 +474,7 @@ drm_gem_object_free_unlocked(struct kref *kref)
 	struct drm_gem_object *obj = (struct drm_gem_object *) kref;
 	struct drm_device *dev = obj->dev;
 
-	if (dev->driver->gem_free_object_unlocked != NULL)
-		dev->driver->gem_free_object_unlocked(obj);
-	else if (dev->driver->gem_free_object != NULL) {
+	if (dev->driver->gem_free_object != NULL) {
 		mutex_lock(&dev->struct_mutex);
 		dev->driver->gem_free_object(obj);
 		mutex_unlock(&dev->struct_mutex);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index d5a2b8869246..6dbdbf45cd1a 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -802,7 +802,6 @@ struct drm_driver {
 	 */
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
-	void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
 
 	/* vga arb irq handler */
 	void (*vgaarb_irq)(struct drm_device *dev, bool state);
-- 
cgit v1.2.3


From b3da8f7d2d1fa81fb65cb3f5d9e50dde40a83182 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:29 +0200
Subject: drm: kill context_ctor callback

It's not used by any driver. The destructor callback is unfortunately
used by the via driver in a rather convoluted piece of code used
to reimplement something resembling broken futexes. I didn't dare
to touch this code. But at least kill the needless NULL assignemt
in the sis driver.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_context.c | 8 --------
 drivers/gpu/drm/sis/sis_drv.c | 1 -
 include/drm/drmP.h            | 1 -
 3 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 2607753a320b..6d440fb894cf 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -333,14 +333,6 @@ int drm_addctx(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	if (ctx->handle != DRM_KERNEL_CONTEXT) {
-		if (dev->driver->context_ctor)
-			if (!dev->driver->context_ctor(dev, ctx->handle)) {
-				DRM_DEBUG("Running out of ctxs or memory.\n");
-				return -ENOMEM;
-			}
-	}
-
 	ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL);
 	if (!ctx_entry) {
 		DRM_DEBUG("out of memory\n");
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 776bf9e9ea1a..2d1292131500 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -67,7 +67,6 @@ static struct drm_driver driver = {
 	.driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR,
 	.load = sis_driver_load,
 	.unload = sis_driver_unload,
-	.context_dtor = NULL,
 	.dma_quiescent = sis_idle,
 	.reclaim_buffers = NULL,
 	.reclaim_buffers_idlelocked = sis_reclaim_buffers_locked,
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 6dbdbf45cd1a..eb4f7edcc314 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -700,7 +700,6 @@ struct drm_driver {
 	int (*resume) (struct drm_device *);
 	int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv);
 	int (*dma_quiescent) (struct drm_device *);
-	int (*context_ctor) (struct drm_device *dev, int context);
 	int (*context_dtor) (struct drm_device *dev, int context);
 
 	/**
-- 
cgit v1.2.3


From a2a273c94357ffd24e635cf9ec9b2e5c6f02b63b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:30 +0200
Subject: drm: don't export drm_get_drawable_info

Not used by any in-tree user. So drop it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drawable.c | 3 +--
 include/drm/drmP.h             | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c
index c53c9768cc11..170e53178d8b 100644
--- a/drivers/gpu/drm/drm_drawable.c
+++ b/drivers/gpu/drm/drm_drawable.c
@@ -173,11 +173,10 @@ error:
 /**
  * Caller must hold the drawable spinlock!
  */
-struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id)
+static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id)
 {
 	return idr_find(&dev->drw_idr, id);
 }
-EXPORT_SYMBOL(drm_get_drawable_info);
 
 static int drm_drawable_free(int idr, void *p, void *data)
 {
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index eb4f7edcc314..7a5c91c2aa60 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1238,8 +1238,6 @@ extern int drm_rmdraw(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
 extern int drm_update_drawable_info(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
-extern struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev,
-						  drm_drawable_t id);
 extern void drm_drawable_free_all(struct drm_device *dev);
 
 				/* Authentication IOCTL support (drm_auth.h) */
-- 
cgit v1.2.3


From 690bb51b54a986e48c7b8b2dba51a3cd262a7266 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:35 +0200
Subject: drm: drop return value of drm_free_agp

No caller (rightly) cares about it, so drop it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_memory.c | 4 ++--
 include/drm/drmP.h           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 7732268eced2..70ca27edc3c9 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -106,9 +106,9 @@ DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type)
 }
 
 /** Wrapper around agp_free_memory() */
-int drm_free_agp(DRM_AGP_MEM * handle, int pages)
+void drm_free_agp(DRM_AGP_MEM * handle, int pages)
 {
-	return drm_agp_free_memory(handle) ? 0 : -EINVAL;
+	drm_agp_free_memory(handle);
 }
 EXPORT_SYMBOL(drm_free_agp);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7a5c91c2aa60..7fd1870b6a70 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1178,8 +1178,8 @@ extern int drm_mem_info(char *buf, char **start, off_t offset,
 			int request, int *eof, void *data);
 extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area);
 
+extern void drm_free_agp(DRM_AGP_MEM * handle, int pages);
 extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type);
-extern int drm_free_agp(DRM_AGP_MEM * handle, int pages);
 extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
 extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
 				       struct page **pages,
-- 
cgit v1.2.3


From 793a97e4cc38f834e0488ccc1ecbfe52ff6f5b84 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:27 +0200
Subject: drm: kill drm_map_ofs callbacks

All drivers happily copy&pasted the default implementation without
checking whether this callback is used at all. It's not. Sigh.

Kill it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_vm.c              | 7 -------
 drivers/gpu/drm/i810/i810_drv.c       | 1 -
 drivers/gpu/drm/i830/i830_drv.c       | 1 -
 drivers/gpu/drm/i915/i915_drv.c       | 1 -
 drivers/gpu/drm/mga/mga_drv.c         | 1 -
 drivers/gpu/drm/nouveau/nouveau_drv.c | 1 -
 drivers/gpu/drm/r128/r128_drv.c       | 1 -
 drivers/gpu/drm/radeon/radeon_drv.c   | 2 --
 drivers/gpu/drm/savage/savage_drv.c   | 1 -
 drivers/gpu/drm/sis/sis_drv.c         | 1 -
 drivers/gpu/drm/tdfx/tdfx_drv.c       | 1 -
 drivers/gpu/drm/via/via_drv.c         | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   | 1 -
 include/drm/drmP.h                    | 2 --
 14 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index fda67468e603..2fea2e63a313 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -515,13 +515,6 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-resource_size_t drm_core_get_map_ofs(struct drm_local_map * map)
-{
-	return map->offset;
-}
-
-EXPORT_SYMBOL(drm_core_get_map_ofs);
-
 resource_size_t drm_core_get_reg_ofs(struct drm_device *dev)
 {
 #ifdef __alpha__
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index b4250b2cac1f..084a85c3d5d5 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -52,7 +52,6 @@ static struct drm_driver driver = {
 	.device_is_agp = i810_driver_device_is_agp,
 	.reclaim_buffers_locked = i810_driver_reclaim_buffers_locked,
 	.dma_quiescent = i810_driver_dma_quiescent,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = i810_ioctls,
 	.fops = {
diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c
index a5c66aa82f0c..16352954311c 100644
--- a/drivers/gpu/drm/i830/i830_drv.c
+++ b/drivers/gpu/drm/i830/i830_drv.c
@@ -57,7 +57,6 @@ static struct drm_driver driver = {
 	.device_is_agp = i830_driver_device_is_agp,
 	.reclaim_buffers_locked = i830_driver_reclaim_buffers_locked,
 	.dma_quiescent = i830_driver_dma_quiescent,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 #if USE_IRQS
 	.irq_preinstall = i830_driver_irq_preinstall,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 00befce8fbb7..d079f7b86cca 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -524,7 +524,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = i915_driver_irq_uninstall,
 	.irq_handler = i915_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.master_create = i915_master_create,
 	.master_destroy = i915_master_destroy,
diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c
index 26d0d8ced80d..e9c0cbc21fe2 100644
--- a/drivers/gpu/drm/mga/mga_drv.c
+++ b/drivers/gpu/drm/mga/mga_drv.c
@@ -60,7 +60,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = mga_driver_irq_uninstall,
 	.irq_handler = mga_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = mga_ioctls,
 	.dma_ioctl = mga_dma_buffers,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 1de5eb53e016..0d64259b21cc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -379,7 +379,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = nouveau_irq_uninstall,
 	.irq_handler = nouveau_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = nouveau_ioctls,
 	.fops = {
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 1e2971f13aa1..42ec20a10eed 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -56,7 +56,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = r128_driver_irq_uninstall,
 	.irq_handler = r128_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = r128_ioctls,
 	.dma_ioctl = r128_cce_buffers,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 795403b0e2cd..8fd89bb8e610 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -203,7 +203,6 @@ static struct drm_driver driver_old = {
 	.irq_uninstall = radeon_driver_irq_uninstall,
 	.irq_handler = radeon_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = radeon_ioctls,
 	.dma_ioctl = radeon_cp_buffers,
@@ -290,7 +289,6 @@ static struct drm_driver kms_driver = {
 	.irq_uninstall = radeon_driver_irq_uninstall_kms,
 	.irq_handler = radeon_driver_irq_handler_kms,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = radeon_ioctls_kms,
 	.gem_init_object = radeon_gem_object_init,
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index 021de44c15ab..f539996ba230 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -42,7 +42,6 @@ static struct drm_driver driver = {
 	.lastclose = savage_driver_lastclose,
 	.unload = savage_driver_unload,
 	.reclaim_buffers = savage_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = savage_ioctls,
 	.dma_ioctl = savage_bci_buffers,
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 2d1292131500..72b0a74f265f 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -71,7 +71,6 @@ static struct drm_driver driver = {
 	.reclaim_buffers = NULL,
 	.reclaim_buffers_idlelocked = sis_reclaim_buffers_locked,
 	.lastclose = sis_lastclose,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = sis_ioctls,
 	.fops = {
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index ec5a43e65722..38a562647d6f 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++ b/drivers/gpu/drm/tdfx/tdfx_drv.c
@@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = {
 static struct drm_driver driver = {
 	.driver_features = DRIVER_USE_MTRR,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.fops = {
 		 .owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c
index 7a1b210401e0..0b9ad8bbb031 100644
--- a/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@ -51,7 +51,6 @@ static struct drm_driver driver = {
 	.reclaim_buffers_locked = NULL,
 	.reclaim_buffers_idlelocked = via_reclaim_buffers_locked,
 	.lastclose = via_lastclose,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = via_ioctls,
 	.fops = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 72ec2e2b6e97..4a832de43dd5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -723,7 +723,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = vmw_irq_uninstall,
 	.irq_handler = vmw_irq_handler,
 	.reclaim_buffers_locked = NULL,
-	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7fd1870b6a70..70a14a4faa1e 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -772,7 +772,6 @@ struct drm_driver {
 					struct drm_file *file_priv);
 	void (*reclaim_buffers_idlelocked) (struct drm_device *dev,
 					    struct drm_file *file_priv);
-	resource_size_t (*get_map_ofs) (struct drm_local_map * map);
 	resource_size_t (*get_reg_ofs) (struct drm_device *dev);
 	void (*set_version) (struct drm_device *dev,
 			     struct drm_set_version *sv);
@@ -1167,7 +1166,6 @@ extern int drm_release(struct inode *inode, struct file *filp);
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
 extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
 extern void drm_vm_open_locked(struct vm_area_struct *vma);
-extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map);
 extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
 
-- 
cgit v1.2.3


From 4ac5ec40ec70022e4dea8cc6254d2dadd1e43d57 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:34 +0200
Subject: drm: don't export dri1 locking functions

Only used by ioctl, not by any in-tree drivers.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_lock.c | 10 +++-------
 include/drm/drmP.h         |  1 -
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index d9146f240d33..1e28b9072068 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -37,6 +37,8 @@
 
 static int drm_notifier(void *priv);
 
+static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
+
 /**
  * Lock ioctl.
  *
@@ -172,6 +174,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
  *
  * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction.
  */
+static
 int drm_lock_take(struct drm_lock_data *lock_data,
 		  unsigned int context)
 {
@@ -208,7 +211,6 @@ int drm_lock_take(struct drm_lock_data *lock_data,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(drm_lock_take);
 
 /**
  * This takes a lock forcibly and hands it to context.	Should ONLY be used
@@ -276,7 +278,6 @@ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 	wake_up_interruptible(&lock_data->lock_queue);
 	return 0;
 }
-EXPORT_SYMBOL(drm_lock_free);
 
 /**
  * If we get here, it means that the process has called DRM_IOCTL_LOCK
@@ -339,7 +340,6 @@ void drm_idlelock_take(struct drm_lock_data *lock_data)
 	}
 	spin_unlock_bh(&lock_data->spinlock);
 }
-EXPORT_SYMBOL(drm_idlelock_take);
 
 void drm_idlelock_release(struct drm_lock_data *lock_data)
 {
@@ -359,8 +359,6 @@ void drm_idlelock_release(struct drm_lock_data *lock_data)
 	}
 	spin_unlock_bh(&lock_data->spinlock);
 }
-EXPORT_SYMBOL(drm_idlelock_release);
-
 
 int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv)
 {
@@ -369,5 +367,3 @@ int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv)
 		_DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) &&
 		master->lock.file_priv == file_priv);
 }
-
-EXPORT_SYMBOL(drm_i_have_hw_lock);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 70a14a4faa1e..45d09639e9d2 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1252,7 +1252,6 @@ extern int drm_lock(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv);
 extern int drm_unlock(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
-extern int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
 extern int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context);
 extern void drm_idlelock_take(struct drm_lock_data *lock_data);
 extern void drm_idlelock_release(struct drm_lock_data *lock_data);
-- 
cgit v1.2.3


From 8f879194f88742d9c452f669482b6d6abdc1e1e7 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:31 +0200
Subject: drm: replace drawable ioctl by noops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The information supplied by userspace through these ioctls is only
accessible by dev->drw_idr. But there's no in-tree user of that.
Also userspace does not really care about return values of these ioctls,
either. Only hw/xfree86/dri/dri.c from the xserver actually checks the
return from adddraw and keeps on trying to create a kernel drawable
every time somebody creates a dri drawable. But since that's now a noop,
who cares.

Therefore it's safe to replace these three ioctls with noops and rip
out the implementation.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Michel Dänzer <michel@daenzer.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile       |   2 +-
 drivers/gpu/drm/drm_drawable.c | 197 -----------------------------------------
 drivers/gpu/drm/drm_drv.c      |   8 +-
 drivers/gpu/drm/drm_stub.c     |   3 -
 include/drm/drmP.h             |  15 ----
 5 files changed, 4 insertions(+), 221 deletions(-)
 delete mode 100644 drivers/gpu/drm/drm_drawable.c

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index f3a23a329f4e..997c43d04909 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -5,7 +5,7 @@
 ccflags-y := -Iinclude/drm
 
 drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
-		drm_context.o drm_dma.o drm_drawable.o \
+		drm_context.o drm_dma.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c
deleted file mode 100644
index 170e53178d8b..000000000000
--- a/drivers/gpu/drm/drm_drawable.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
- * \file drm_drawable.c
- * IOCTLs for drawables
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- * \author Michel Dänzer <michel@tungstengraphics.com>
- */
-
-/*
- * Created: Tue Feb  2 08:37:54 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, North Dakota.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "drmP.h"
-
-/**
- * Allocate drawable ID and memory to store information about it.
- */
-int drm_adddraw(struct drm_device *dev, void *data, struct drm_file *file_priv)
-{
-	unsigned long irqflags;
-	struct drm_draw *draw = data;
-	int new_id = 0;
-	int ret;
-
-again:
-	if (idr_pre_get(&dev->drw_idr, GFP_KERNEL) == 0) {
-		DRM_ERROR("Out of memory expanding drawable idr\n");
-		return -ENOMEM;
-	}
-
-	spin_lock_irqsave(&dev->drw_lock, irqflags);
-	ret = idr_get_new_above(&dev->drw_idr, NULL, 1, &new_id);
-	if (ret == -EAGAIN) {
-		spin_unlock_irqrestore(&dev->drw_lock, irqflags);
-		goto again;
-	}
-
-	spin_unlock_irqrestore(&dev->drw_lock, irqflags);
-
-	draw->handle = new_id;
-
-	DRM_DEBUG("%d\n", draw->handle);
-
-	return 0;
-}
-
-/**
- * Free drawable ID and memory to store information about it.
- */
-int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv)
-{
-	struct drm_draw *draw = data;
-	unsigned long irqflags;
-	struct drm_drawable_info *info;
-
-	spin_lock_irqsave(&dev->drw_lock, irqflags);
-
-	info = drm_get_drawable_info(dev, draw->handle);
-	if (info == NULL) {
-		spin_unlock_irqrestore(&dev->drw_lock, irqflags);
-		return -EINVAL;
-	}
-	kfree(info->rects);
-	kfree(info);
-
-	idr_remove(&dev->drw_idr, draw->handle);
-
-	spin_unlock_irqrestore(&dev->drw_lock, irqflags);
-	DRM_DEBUG("%d\n", draw->handle);
-	return 0;
-}
-
-int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file *file_priv)
-{
-	struct drm_update_draw *update = data;
-	unsigned long irqflags;
-	struct drm_clip_rect *rects;
-	struct drm_drawable_info *info;
-	int err;
-
-	info = idr_find(&dev->drw_idr, update->handle);
-	if (!info) {
-		info = kzalloc(sizeof(*info), GFP_KERNEL);
-		if (!info)
-			return -ENOMEM;
-		if (IS_ERR(idr_replace(&dev->drw_idr, info, update->handle))) {
-			DRM_ERROR("No such drawable %d\n", update->handle);
-			kfree(info);
-			return -EINVAL;
-		}
-	}
-
-	switch (update->type) {
-	case DRM_DRAWABLE_CLIPRECTS:
-		if (update->num == 0)
-			rects = NULL;
-		else if (update->num != info->num_rects) {
-			rects = kmalloc(update->num *
-					sizeof(struct drm_clip_rect),
-					GFP_KERNEL);
-		} else
-			rects = info->rects;
-
-		if (update->num && !rects) {
-			DRM_ERROR("Failed to allocate cliprect memory\n");
-			err = -ENOMEM;
-			goto error;
-		}
-
-		if (update->num && DRM_COPY_FROM_USER(rects,
-						     (struct drm_clip_rect __user *)
-						     (unsigned long)update->data,
-						     update->num *
-						     sizeof(*rects))) {
-			DRM_ERROR("Failed to copy cliprects from userspace\n");
-			err = -EFAULT;
-			goto error;
-		}
-
-		spin_lock_irqsave(&dev->drw_lock, irqflags);
-
-		if (rects != info->rects) {
-			kfree(info->rects);
-		}
-
-		info->rects = rects;
-		info->num_rects = update->num;
-
-		spin_unlock_irqrestore(&dev->drw_lock, irqflags);
-
-		DRM_DEBUG("Updated %d cliprects for drawable %d\n",
-			  info->num_rects, update->handle);
-		break;
-	default:
-		DRM_ERROR("Invalid update type %d\n", update->type);
-		return -EINVAL;
-	}
-
-	return 0;
-
-error:
-	if (rects != info->rects)
-		kfree(rects);
-
-	return err;
-}
-
-/**
- * Caller must hold the drawable spinlock!
- */
-static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id)
-{
-	return idr_find(&dev->drw_idr, id);
-}
-
-static int drm_drawable_free(int idr, void *p, void *data)
-{
-	struct drm_drawable_info *info = p;
-
-	if (info) {
-		kfree(info->rects);
-		kfree(info);
-	}
-
-	return 0;
-}
-
-void drm_drawable_free_all(struct drm_device *dev)
-{
-	idr_for_each(&dev->drw_idr, drm_drawable_free, NULL);
-	idr_remove_all(&dev->drw_idr);
-}
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 84da748555bc..a35a41002c33 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -91,8 +91,8 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_resctx, DRM_AUTH),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_adddraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_rmdraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_LOCK, drm_lock, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_unlock, DRM_AUTH),
@@ -127,7 +127,7 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_modeset_ctl, 0),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_update_drawable_info, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
@@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev)
 
 	mutex_lock(&dev->struct_mutex);
 
-	/* Free drawable information memory */
-	drm_drawable_free_all(dev);
 	del_timer(&dev->timer);
 
 	/* Clear AGP information */
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index d1ad57450df1..f797ae9da77c 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -240,14 +240,11 @@ int drm_fill_in_dev(struct drm_device *dev,
 	INIT_LIST_HEAD(&dev->vblank_event_list);
 
 	spin_lock_init(&dev->count_lock);
-	spin_lock_init(&dev->drw_lock);
 	spin_lock_init(&dev->event_lock);
 	init_timer(&dev->timer);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);
 
-	idr_init(&dev->drw_idr);
-
 	if (drm_ht_create(&dev->map_hash, 12)) {
 		return -ENOMEM;
 	}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 45d09639e9d2..989cefe33c7b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1037,12 +1037,6 @@ struct drm_device {
 	struct drm_minor *control;		/**< Control node for card */
 	struct drm_minor *primary;		/**< render type primary screen head */
 
-	/** \name Drawable information */
-	/*@{ */
-	spinlock_t drw_lock;
-	struct idr drw_idr;
-	/*@} */
-
         struct drm_mode_config mode_config;	/**< Current mode config */
 
 	/** \name GEM information */
@@ -1229,15 +1223,6 @@ extern int drm_setsareactx(struct drm_device *dev, void *data,
 extern int drm_getsareactx(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 
-				/* Drawable IOCTL support (drm_drawable.h) */
-extern int drm_adddraw(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv);
-extern int drm_rmdraw(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv);
-extern int drm_update_drawable_info(struct drm_device *dev, void *data,
-				    struct drm_file *file_priv);
-extern void drm_drawable_free_all(struct drm_device *dev);
-
 				/* Authentication IOCTL support (drm_auth.h) */
 extern int drm_getmagic(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-- 
cgit v1.2.3


From 89c372647d1d698a96e2189ef4312a977b939839 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:36 +0200
Subject: drm: kill agp indirection mess

There's no point in jumping through two indirections. So kill one
and call the kernels agp functions directly.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_agpsupport.c | 40 ++++------------------------------------
 drivers/gpu/drm/drm_memory.c     | 12 +++---------
 include/drm/drmP.h               |  5 -----
 3 files changed, 7 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index ba38e0147220..252fdb98b73a 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -193,7 +193,7 @@ int drm_agp_enable_ioctl(struct drm_device *dev, void *data,
  * \return zero on success or a negative number on failure.
  *
  * Verifies the AGP device is present and has been acquired, allocates the
- * memory via alloc_agp() and creates a drm_agp_mem entry for it.
+ * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it.
  */
 int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
 {
@@ -211,7 +211,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
 
 	pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE;
 	type = (u32) request->type;
-	if (!(memory = drm_alloc_agp(dev, pages, type))) {
+	if (!(memory = agp_allocate_memory(dev->agp->bridge, pages, type))) {
 		kfree(entry);
 		return -ENOMEM;
 	}
@@ -423,38 +423,6 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev)
 	return head;
 }
 
-/** Calls agp_allocate_memory() */
-DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data * bridge,
-				     size_t pages, u32 type)
-{
-	return agp_allocate_memory(bridge, pages, type);
-}
-
-/** Calls agp_free_memory() */
-int drm_agp_free_memory(DRM_AGP_MEM * handle)
-{
-	if (!handle)
-		return 0;
-	agp_free_memory(handle);
-	return 1;
-}
-
-/** Calls agp_bind_memory() */
-int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start)
-{
-	if (!handle)
-		return -EINVAL;
-	return agp_bind_memory(handle, start);
-}
-
-/** Calls agp_unbind_memory() */
-int drm_agp_unbind_memory(DRM_AGP_MEM * handle)
-{
-	if (!handle)
-		return -EINVAL;
-	return agp_unbind_memory(handle);
-}
-
 /**
  * Binds a collection of pages into AGP memory at the given offset, returning
  * the AGP memory structure containing them.
@@ -474,7 +442,7 @@ drm_agp_bind_pages(struct drm_device *dev,
 
 	DRM_DEBUG("\n");
 
-	mem = drm_agp_allocate_memory(dev->agp->bridge, num_pages,
+	mem = agp_allocate_memory(dev->agp->bridge, num_pages,
 				      type);
 	if (mem == NULL) {
 		DRM_ERROR("Failed to allocate memory for %ld pages\n",
@@ -487,7 +455,7 @@ drm_agp_bind_pages(struct drm_device *dev,
 	mem->page_count = num_pages;
 
 	mem->is_flushed = true;
-	ret = drm_agp_bind_memory(mem, gtt_offset / PAGE_SIZE);
+	ret = agp_bind_memory(mem, gtt_offset / PAGE_SIZE);
 	if (ret != 0) {
 		DRM_ERROR("Failed to bind AGP memory: %d\n", ret);
 		agp_free_memory(mem);
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 70ca27edc3c9..c9b805000a11 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -99,29 +99,23 @@ static void *agp_remap(unsigned long offset, unsigned long size,
 	return addr;
 }
 
-/** Wrapper around agp_allocate_memory() */
-DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type)
-{
-	return drm_agp_allocate_memory(dev->agp->bridge, pages, type);
-}
-
 /** Wrapper around agp_free_memory() */
 void drm_free_agp(DRM_AGP_MEM * handle, int pages)
 {
-	drm_agp_free_memory(handle);
+	agp_free_memory(handle);
 }
 EXPORT_SYMBOL(drm_free_agp);
 
 /** Wrapper around agp_bind_memory() */
 int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start)
 {
-	return drm_agp_bind_memory(handle, start);
+	return agp_bind_memory(handle, start);
 }
 
 /** Wrapper around agp_unbind_memory() */
 int drm_unbind_agp(DRM_AGP_MEM * handle)
 {
-	return drm_agp_unbind_memory(handle);
+	return agp_unbind_memory(handle);
 }
 EXPORT_SYMBOL(drm_unbind_agp);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 989cefe33c7b..ffe6035cf471 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1171,7 +1171,6 @@ extern int drm_mem_info(char *buf, char **start, off_t offset,
 extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area);
 
 extern void drm_free_agp(DRM_AGP_MEM * handle, int pages);
-extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type);
 extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
 extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
 				       struct page **pages,
@@ -1331,10 +1330,6 @@ extern int drm_agp_unbind_ioctl(struct drm_device *dev, void *data,
 extern int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request);
 extern int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-extern DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data *bridge, size_t pages, u32 type);
-extern int drm_agp_free_memory(DRM_AGP_MEM * handle);
-extern int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start);
-extern int drm_agp_unbind_memory(DRM_AGP_MEM * handle);
 extern void drm_agp_chipset_flush(struct drm_device *dev);
 
 				/* Stub support (drm_stub.h) */
-- 
cgit v1.2.3


From df8fcb09667c1b2c9dcf65de23f0bfa851e8138e Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:32 +0200
Subject: drm: kill dev->timer

Totally unused.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c  | 2 --
 drivers/gpu/drm/drm_stub.c | 1 -
 include/drm/drmP.h         | 1 -
 3 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index a35a41002c33..5ff75a3a6b9d 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev)
 
 	mutex_lock(&dev->struct_mutex);
 
-	del_timer(&dev->timer);
-
 	/* Clear AGP information */
 	if (drm_core_has_AGP(dev) && dev->agp &&
 			!drm_core_check_feature(dev, DRIVER_MODESET)) {
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index f797ae9da77c..cdc89ee042cc 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -241,7 +241,6 @@ int drm_fill_in_dev(struct drm_device *dev,
 
 	spin_lock_init(&dev->count_lock);
 	spin_lock_init(&dev->event_lock);
-	init_timer(&dev->timer);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index ffe6035cf471..757b63a23b14 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -964,7 +964,6 @@ struct drm_device {
 	__volatile__ long context_flag;	/**< Context swapping flag */
 	__volatile__ long interrupt_flag; /**< Interruption handler flag */
 	__volatile__ long dma_flag;	/**< DMA dispatch flag */
-	struct timer_list timer;	/**< Timer for delaying ctx switch */
 	wait_queue_head_t context_wait;	/**< Processes waiting on ctx switch */
 	int last_checked;		/**< Last context checked for DMA */
 	int last_context;		/**< Last current context */
-- 
cgit v1.2.3


From cbc60ca04b342a4e1f2a1086a7277c077f07dbed Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 23 Aug 2010 22:53:28 +0200
Subject: drm: kill get_reg_ofs callback

Every driver used the default implementation. Fold that one into
the only callsite and drop the callback.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_vm.c              | 6 ++----
 drivers/gpu/drm/i810/i810_drv.c       | 1 -
 drivers/gpu/drm/i830/i830_drv.c       | 1 -
 drivers/gpu/drm/i915/i915_drv.c       | 1 -
 drivers/gpu/drm/mga/mga_drv.c         | 1 -
 drivers/gpu/drm/nouveau/nouveau_drv.c | 1 -
 drivers/gpu/drm/r128/r128_drv.c       | 1 -
 drivers/gpu/drm/radeon/radeon_drv.c   | 2 --
 drivers/gpu/drm/savage/savage_drv.c   | 1 -
 drivers/gpu/drm/sis/sis_drv.c         | 1 -
 drivers/gpu/drm/tdfx/tdfx_drv.c       | 1 -
 drivers/gpu/drm/via/via_drv.c         | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   | 1 -
 include/drm/drmP.h                    | 2 --
 14 files changed, 2 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 2fea2e63a313..ee879d6bb522 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -515,7 +515,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-resource_size_t drm_core_get_reg_ofs(struct drm_device *dev)
+static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev)
 {
 #ifdef __alpha__
 	return dev->hose->dense_mem_base - dev->hose->mem_space->start;
@@ -524,8 +524,6 @@ resource_size_t drm_core_get_reg_ofs(struct drm_device *dev)
 #endif
 }
 
-EXPORT_SYMBOL(drm_core_get_reg_ofs);
-
 /**
  * mmap DMA memory.
  *
@@ -612,7 +610,7 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 #endif
 	case _DRM_FRAME_BUFFER:
 	case _DRM_REGISTERS:
-		offset = dev->driver->get_reg_ofs(dev);
+		offset = drm_core_get_reg_ofs(dev);
 		vma->vm_flags |= VM_IO;	/* not in core dump */
 		vma->vm_page_prot = drm_io_prot(map->type, vma);
 #if !defined(__arm__)
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index 084a85c3d5d5..1c73b0c43c1e 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -52,7 +52,6 @@ static struct drm_driver driver = {
 	.device_is_agp = i810_driver_device_is_agp,
 	.reclaim_buffers_locked = i810_driver_reclaim_buffers_locked,
 	.dma_quiescent = i810_driver_dma_quiescent,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = i810_ioctls,
 	.fops = {
 		 .owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c
index 16352954311c..7140ffc12eee 100644
--- a/drivers/gpu/drm/i830/i830_drv.c
+++ b/drivers/gpu/drm/i830/i830_drv.c
@@ -57,7 +57,6 @@ static struct drm_driver driver = {
 	.device_is_agp = i830_driver_device_is_agp,
 	.reclaim_buffers_locked = i830_driver_reclaim_buffers_locked,
 	.dma_quiescent = i830_driver_dma_quiescent,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 #if USE_IRQS
 	.irq_preinstall = i830_driver_irq_preinstall,
 	.irq_postinstall = i830_driver_irq_postinstall,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d079f7b86cca..e6afa68775b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -524,7 +524,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = i915_driver_irq_uninstall,
 	.irq_handler = i915_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.master_create = i915_master_create,
 	.master_destroy = i915_master_destroy,
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c
index e9c0cbc21fe2..65ea42cf1795 100644
--- a/drivers/gpu/drm/mga/mga_drv.c
+++ b/drivers/gpu/drm/mga/mga_drv.c
@@ -60,7 +60,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = mga_driver_irq_uninstall,
 	.irq_handler = mga_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = mga_ioctls,
 	.dma_ioctl = mga_dma_buffers,
 	.fops = {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 0d64259b21cc..209912a1b7a5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -379,7 +379,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = nouveau_irq_uninstall,
 	.irq_handler = nouveau_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = nouveau_ioctls,
 	.fops = {
 		.owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 42ec20a10eed..67309f84f16d 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -56,7 +56,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = r128_driver_irq_uninstall,
 	.irq_handler = r128_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = r128_ioctls,
 	.dma_ioctl = r128_cce_buffers,
 	.fops = {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8fd89bb8e610..663cdc10a5c2 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -203,7 +203,6 @@ static struct drm_driver driver_old = {
 	.irq_uninstall = radeon_driver_irq_uninstall,
 	.irq_handler = radeon_driver_irq_handler,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = radeon_ioctls,
 	.dma_ioctl = radeon_cp_buffers,
 	.fops = {
@@ -289,7 +288,6 @@ static struct drm_driver kms_driver = {
 	.irq_uninstall = radeon_driver_irq_uninstall_kms,
 	.irq_handler = radeon_driver_irq_handler_kms,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = radeon_ioctls_kms,
 	.gem_init_object = radeon_gem_object_init,
 	.gem_free_object = radeon_gem_object_free,
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index f539996ba230..c0385633667d 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -42,7 +42,6 @@ static struct drm_driver driver = {
 	.lastclose = savage_driver_lastclose,
 	.unload = savage_driver_unload,
 	.reclaim_buffers = savage_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = savage_ioctls,
 	.dma_ioctl = savage_bci_buffers,
 	.fops = {
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 72b0a74f265f..4d9f311d249d 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -71,7 +71,6 @@ static struct drm_driver driver = {
 	.reclaim_buffers = NULL,
 	.reclaim_buffers_idlelocked = sis_reclaim_buffers_locked,
 	.lastclose = sis_lastclose,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = sis_ioctls,
 	.fops = {
 		 .owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index 38a562647d6f..e0768adbeccd 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++ b/drivers/gpu/drm/tdfx/tdfx_drv.c
@@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = {
 static struct drm_driver driver = {
 	.driver_features = DRIVER_USE_MTRR,
 	.reclaim_buffers = drm_core_reclaim_buffers,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.fops = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c
index 0b9ad8bbb031..02f733db61c1 100644
--- a/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@ -51,7 +51,6 @@ static struct drm_driver driver = {
 	.reclaim_buffers_locked = NULL,
 	.reclaim_buffers_idlelocked = via_reclaim_buffers_locked,
 	.lastclose = via_lastclose,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = via_ioctls,
 	.fops = {
 		.owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4a832de43dd5..e645f44e4302 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -723,7 +723,6 @@ static struct drm_driver driver = {
 	.irq_uninstall = vmw_irq_uninstall,
 	.irq_handler = vmw_irq_handler,
 	.reclaim_buffers_locked = NULL,
-	.get_reg_ofs = drm_core_get_reg_ofs,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
 	.dma_quiescent = NULL,	/*vmw_dma_quiescent, */
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 757b63a23b14..30e827aeba02 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -772,7 +772,6 @@ struct drm_driver {
 					struct drm_file *file_priv);
 	void (*reclaim_buffers_idlelocked) (struct drm_device *dev,
 					    struct drm_file *file_priv);
-	resource_size_t (*get_reg_ofs) (struct drm_device *dev);
 	void (*set_version) (struct drm_device *dev,
 			     struct drm_set_version *sv);
 
@@ -1159,7 +1158,6 @@ extern int drm_release(struct inode *inode, struct file *filp);
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
 extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
 extern void drm_vm_open_locked(struct vm_area_struct *vma);
-extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
 
 				/* Memory management support (drm_memory.h) */
-- 
cgit v1.2.3


From 409456b10f87b28303643fec37543103f9ada00c Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 29 Aug 2010 21:57:55 -0700
Subject: net: fix datapath typo

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if.h b/include/linux/if.h
index 6ed43c1f07ab..123959927745 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -76,7 +76,7 @@
 #define IFF_MACVLAN_PORT	0x4000	/* device used as macvlan port */
 #define IFF_BRIDGE_PORT	0x8000		/* device used as bridge port */
 #define IFF_OVS_DATAPATH	0x10000	/* device used as Open vSwitch
-					 * dapath port */
+					 * datapath port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
-- 
cgit v1.2.3


From fcaf780b2ad352edaeb1d1c07a6da053266b1eed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 24 Aug 2010 23:22:57 -0300
Subject: i7300_edac: start a driver for i7300 chipset (Clarksboro)

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/Kconfig      |    7 +
 drivers/edac/Makefile     |    1 +
 drivers/edac/i7300_edac.c | 1373 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h   |   19 +-
 4 files changed, 1392 insertions(+), 8 deletions(-)
 create mode 100644 drivers/edac/i7300_edac.c

(limited to 'include')

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 70bb350de996..4573ccc11f93 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -199,6 +199,13 @@ config EDAC_I5100
 	  Support for error detection and correction the Intel
 	  San Clemente MCH.
 
+config EDAC_I7300
+	tristate "Intel Clarksboro MCH"
+	depends on EDAC_MM_EDAC && X86 && PCI
+	help
+	  Support for error detection and correction the Intel
+	  Clarksboro MCH (Intel 7300 chipset).
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC83xx / MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb24ccc..bca4369d6b7a 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_EDAC_CPC925)		+= cpc925_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
 obj-$(CONFIG_EDAC_I5100)		+= i5100_edac.o
 obj-$(CONFIG_EDAC_I5400)		+= i5400_edac.o
+obj-$(CONFIG_EDAC_I7300)		+= i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)		+= i7core_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
new file mode 100644
index 000000000000..eb3f30e96ee3
--- /dev/null
+++ b/drivers/edac/i7300_edac.c
@@ -0,0 +1,1373 @@
+/*
+ * Intel 7300 class Memory Controllers kernel module (Clarksboro)
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License version 2 only.
+ *
+ * Copyright (c) 2010 by:
+ *	 Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * Red Hat Inc. http://www.redhat.com
+ *
+ * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
+ *	http://www.intel.com/Assets/PDF/datasheet/318082.pdf
+ *
+ * TODO: The chipset allow checking for PCI Express errors also. Currently,
+ *	 the driver covers only memory error errors
+ *
+ * This driver uses "csrows" EDAC attribute to represent DIMM slot#
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/*
+ * Alter this version for the I7300 module when modifications are made
+ */
+#define I7300_REVISION    " Ver: 1.0.0 " __DATE__
+
+#define EDAC_MOD_STR      "i7300_edac"
+
+#define i7300_printk(level, fmt, arg...) \
+	edac_printk(level, "i7300", fmt, ##arg)
+
+#define i7300_mc_printk(mci, level, fmt, arg...) \
+	edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
+
+/*
+ * Memory topology is organized as:
+ *	Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
+ *	Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
+ * Each channel can have to 8 DIMM sets (called as SLOTS)
+ * Slots should generally be filled in pairs
+ *	Except on Single Channel mode of operation
+ *		just slot 0/channel0 filled on this mode
+ *	On normal operation mode, the two channels on a branch should be
+		filled together for the same SLOT#
+ * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
+ *		channels on both branches should be filled
+ */
+
+/* Limits for i7300 */
+#define MAX_SLOTS		8
+#define MAX_BRANCHES		2
+#define MAX_CH_PER_BRANCH	2
+#define MAX_CHANNELS		(MAX_CH_PER_BRANCH * MAX_BRANCHES)
+#define MAX_MIR			3
+
+#define to_channel(ch, branch)	((((branch)) << 1) | (ch))
+
+#define to_csrow(slot, ch, branch)					\
+		(to_channel(ch, branch) | ((slot) << 2))
+
+
+/* Device 16,
+ * Function 0: System Address (not documented)
+ * Function 1: Memory Branch Map, Control, Errors Register
+ * Function 2: FSB Error Registers
+ *
+ * All 3 functions of Device 16 (0,1,2) share the SAME DID and
+ * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2),
+ * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
+ * for device 21 (0,1).
+ */
+
+	/* OFFSETS for Function 0 */
+#define		AMBASE			0x48 /* AMB Mem Mapped Reg Region Base */
+#define		MAXCH			0x56 /* Max Channel Number */
+#define		MAXDIMMPERCH		0x57 /* Max DIMM PER Channel Number */
+
+	/* OFFSETS for Function 1 */
+#define		TOLM			0x6C
+#define		REDMEMB			0x7C
+
+#define		MIR0			0x80
+#define		MIR1			0x84
+#define		MIR2			0x88
+
+#if 0
+#define		AMIR0			0x8c
+#define		AMIR1			0x90
+#define		AMIR2			0x94
+
+/*TODO: double check it */
+#define			REC_ECC_LOCATOR_ODD(x)	((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0]  indicate EVEN */
+
+	/* Fatal error registers */
+#define		FERR_FAT_FBD		0x98
+
+/*TODO: double check it */
+#define			FERR_FAT_FBDCHAN (3<<28)	/* channel index where the highest-order error occurred */
+
+#define		NERR_FAT_FBD		0x9c
+#define		FERR_NF_FBD		0xa0
+
+	/* Non-fatal error register */
+#define		NERR_NF_FBD		0xa4
+
+	/* Enable error mask */
+#define		EMASK_FBD		0xa8
+
+#define		ERR0_FBD		0xac
+#define		ERR1_FBD		0xb0
+#define		ERR2_FBD		0xb4
+#define		MCERR_FBD		0xb8
+
+#endif
+
+/* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */
+
+	/* TODO: OFFSETS for Device 16 Function 2 */
+
+/*
+ * Device 21,
+ * Function 0: Memory Map Branch 0
+ *
+ * Device 22,
+ * Function 0: Memory Map Branch 1
+ */
+
+	/* OFFSETS for Function 0 */
+
+/*
+ * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
+ * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
+ * seems that we cannot use this information directly for the same usage.
+ * Each memory slot may have up to 2 AMB interfaces, one for income and another
+ * for outcome interface to the next slot.
+ * For now, the driver just stores the AMB present registers, but rely only at
+ * the MTR info to detect memory.
+ * Datasheet is also not clear about how to map each AMBPRESENT registers to
+ * one of the 4 available channels.
+ */
+#define AMBPRESENT_0	0x64
+#define AMBPRESENT_1	0x66
+
+const static u16 mtr_regs [MAX_SLOTS] = {
+	0x80, 0x84, 0x88, 0x8c,
+	0x82, 0x86, 0x8a, 0x8e
+};
+
+/* Defines to extract the vaious fields from the
+ *	MTRx - Memory Technology Registers
+ */
+#define MTR_DIMMS_PRESENT(mtr)		((mtr) & (1 << 8))
+#define MTR_DIMMS_ETHROTTLE(mtr)	((mtr) & (1 << 7))
+#define MTR_DRAM_WIDTH(mtr)		(((mtr) & (1 << 6)) ? 8 : 4)
+#define MTR_DRAM_BANKS(mtr)		(((mtr) & (1 << 5)) ? 8 : 4)
+#define MTR_DIMM_RANKS(mtr)		(((mtr) & (1 << 4)) ? 1 : 0)
+#define MTR_DIMM_ROWS(mtr)		(((mtr) >> 2) & 0x3)
+#define MTR_DRAM_BANKS_ADDR_BITS	2
+#define MTR_DIMM_ROWS_ADDR_BITS(mtr)	(MTR_DIMM_ROWS(mtr) + 13)
+#define MTR_DIMM_COLS(mtr)		((mtr) & 0x3)
+#define MTR_DIMM_COLS_ADDR_BITS(mtr)	(MTR_DIMM_COLS(mtr) + 10)
+
+#if 0
+	/* OFFSETS for Function 1 */
+
+/* TODO */
+#define NRECFGLOG		0x74
+#define RECFGLOG		0x78
+#define NRECMEMA		0xbe
+#define NRECMEMB		0xc0
+#define NRECFB_DIMMA		0xc4
+#define NRECFB_DIMMB		0xc8
+#define NRECFB_DIMMC		0xcc
+#define NRECFB_DIMMD		0xd0
+#define NRECFB_DIMME		0xd4
+#define NRECFB_DIMMF		0xd8
+#define REDMEMA			0xdC
+#define RECMEMA			0xf0
+#define RECMEMB			0xf4
+#define RECFB_DIMMA		0xf8
+#define RECFB_DIMMB		0xec
+#define RECFB_DIMMC		0xf0
+#define RECFB_DIMMD		0xf4
+#define RECFB_DIMME		0xf8
+#define RECFB_DIMMF		0xfC
+
+/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */
+static inline int extract_fbdchan_indx(u32 x)
+{
+	return (x>>28) & 0x3;
+}
+#endif
+
+#ifdef CONFIG_EDAC_DEBUG
+/* MTR NUMROW */
+static const char *numrow_toString[] = {
+	"8,192 - 13 rows",
+	"16,384 - 14 rows",
+	"32,768 - 15 rows",
+	"65,536 - 16 rows"
+};
+
+/* MTR NUMCOL */
+static const char *numcol_toString[] = {
+	"1,024 - 10 columns",
+	"2,048 - 11 columns",
+	"4,096 - 12 columns",
+	"reserved"
+};
+#endif
+
+#if 0
+
+/*
+ * Error indicator bits and masks
+ * Error masks are according with Table 5-17 of i7300 datasheet
+ */
+
+enum error_mask {
+	EMASK_M1  = 1<<0,  /* Memory Write error on non-redundant retry */
+	EMASK_M2  = 1<<1,  /* Memory or FB-DIMM configuration CRC read error */
+	EMASK_M3  = 1<<2,  /* Reserved */
+	EMASK_M4  = 1<<3,  /* Uncorrectable Data ECC on Replay */
+	EMASK_M5  = 1<<4,  /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
+	EMASK_M6  = 1<<5,  /* Unsupported on i7300 */
+	EMASK_M7  = 1<<6,  /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
+	EMASK_M8  = 1<<7,  /* Aliased Uncorrectable Patrol Data ECC */
+	EMASK_M9  = 1<<8,  /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
+	EMASK_M10 = 1<<9,  /* Unsupported on i7300 */
+	EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC  */
+	EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
+	EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
+	EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
+	EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
+	EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
+	EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
+	EMASK_M18 = 1<<17, /* Unsupported on i7300 */
+	EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
+	EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
+	EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
+	EMASK_M22 = 1<<21, /* SPD protocol Error */
+	EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
+	EMASK_M24 = 1<<23, /* Refresh error */
+	EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
+	EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
+	EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
+	EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
+	EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
+};
+
+/*
+ * Names to translate bit error into something useful
+ */
+static const char *error_name[] = {
+	[0]  = "Memory Write error on non-redundant retry",
+	[1]  = "Memory or FB-DIMM configuration CRC read error",
+	/* Reserved */
+	[3]  = "Uncorrectable Data ECC on Replay",
+	[4]  = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
+	/* M6 Unsupported on i7300 */
+	[6]  = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
+	[7]  = "Aliased Uncorrectable Patrol Data ECC",
+	[8]  = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
+	/* M10 Unsupported on i7300 */
+	[10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
+	[11] = "Non-Aliased Uncorrectable Patrol Data ECC",
+	[12] = "Memory Write error on first attempt",
+	[13] = "FB-DIMM Configuration Write error on first attempt",
+	[14] = "Memory or FB-DIMM configuration CRC read error",
+	[15] = "Channel Failed-Over Occurred",
+	[16] = "Correctable Non-Mirrored Demand Data ECC",
+	/* M18 Unsupported on i7300 */
+	[18] = "Correctable Resilver- or Spare-Copy Data ECC",
+	[19] = "Correctable Patrol Data ECC",
+	[20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
+	[21] = "SPD protocol Error",
+	[22] = "Non-Redundant Fast Reset Timeout",
+	[23] = "Refresh error",
+	[24] = "Memory Write error on redundant retry",
+	[25] = "Redundant Fast Reset Timeout",
+	[26] = "Correctable Counter Threshold Exceeded",
+	[27] = "DIMM-Spare Copy Completed",
+	[28] = "DIMM-Isolation Completed",
+};
+
+/* Fatal errors */
+#define ERROR_FAT_MASK		(EMASK_M1 | \
+				 EMASK_M2 | \
+				 EMASK_M23)
+
+/* Correctable errors */
+#define ERROR_NF_CORRECTABLE	(EMASK_M27 | \
+				 EMASK_M20 | \
+				 EMASK_M19 | \
+				 EMASK_M18 | \
+				 EMASK_M17 | \
+				 EMASK_M16)
+#define ERROR_NF_DIMM_SPARE	(EMASK_M29 | \
+				 EMASK_M28)
+#define ERROR_NF_SPD_PROTOCOL	(EMASK_M22)
+#define ERROR_NF_NORTH_CRC	(EMASK_M21)
+
+/* Recoverable errors */
+#define ERROR_NF_RECOVERABLE	(EMASK_M26 | \
+				 EMASK_M25 | \
+				 EMASK_M24 | \
+				 EMASK_M15 | \
+				 EMASK_M14 | \
+				 EMASK_M13 | \
+				 EMASK_M12 | \
+				 EMASK_M11 | \
+				 EMASK_M9  | \
+				 EMASK_M8  | \
+				 EMASK_M7  | \
+				 EMASK_M5)
+
+/* uncorrectable errors */
+#define ERROR_NF_UNCORRECTABLE	(EMASK_M4)
+
+/* mask to all non-fatal errors */
+#define ERROR_NF_MASK		(ERROR_NF_CORRECTABLE   | \
+				 ERROR_NF_UNCORRECTABLE | \
+				 ERROR_NF_RECOVERABLE   | \
+				 ERROR_NF_DIMM_SPARE    | \
+				 ERROR_NF_SPD_PROTOCOL  | \
+				 ERROR_NF_NORTH_CRC)
+
+/*
+ * Define error masks for the several registers
+ */
+
+/* Enable all fatal and non fatal errors */
+#define ENABLE_EMASK_ALL	(ERROR_FAT_MASK | ERROR_NF_MASK)
+
+/* mask for fatal error registers */
+#define FERR_FAT_MASK ERROR_FAT_MASK
+
+/* masks for non-fatal error register */
+static inline int to_nf_mask(unsigned int mask)
+{
+	return (mask & EMASK_M29) | (mask >> 3);
+};
+
+static inline int from_nf_ferr(unsigned int mask)
+{
+	return (mask & EMASK_M29) |		/* Bit 28 */
+	       (mask & ((1 << 28) - 1) << 3);	/* Bits 0 to 27 */
+};
+
+#define FERR_NF_MASK		to_nf_mask(ERROR_NF_MASK)
+#define FERR_NF_CORRECTABLE	to_nf_mask(ERROR_NF_CORRECTABLE)
+#define FERR_NF_DIMM_SPARE	to_nf_mask(ERROR_NF_DIMM_SPARE)
+#define FERR_NF_SPD_PROTOCOL	to_nf_mask(ERROR_NF_SPD_PROTOCOL)
+#define FERR_NF_NORTH_CRC	to_nf_mask(ERROR_NF_NORTH_CRC)
+#define FERR_NF_RECOVERABLE	to_nf_mask(ERROR_NF_RECOVERABLE)
+#define FERR_NF_UNCORRECTABLE	to_nf_mask(ERROR_NF_UNCORRECTABLE)
+
+#endif
+
+/* Device name and register DID (Device ID) */
+struct i7300_dev_info {
+	const char *ctl_name;	/* name for this device */
+	u16 fsb_mapping_errors;	/* DID for the branchmap,control */
+};
+
+/* Table of devices attributes supported by this driver */
+static const struct i7300_dev_info i7300_devs[] = {
+	{
+		.ctl_name = "I7300",
+		.fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
+	},
+};
+
+struct i7300_dimm_info {
+	int megabytes;		/* size, 0 means not present  */
+};
+
+/* driver private data structure */
+struct i7300_pvt {
+	struct pci_dev *system_address;		/* 16.0 */
+	struct pci_dev *branchmap_werrors;	/* 16.1 */
+	struct pci_dev *fsb_error_regs;		/* 16.2 */
+	struct pci_dev *branch_pci[MAX_BRANCHES];	/* 21.0  and 22.0 */
+
+	u16 tolm;				/* top of low memory */
+	u64 ambase;				/* AMB BAR */
+
+	u16 mir[MAX_MIR];
+
+	u16 mtr[MAX_SLOTS][MAX_BRANCHES];		/* Memory Technlogy Reg */
+	u16 ambpresent[MAX_CHANNELS];		/* AMB present regs */
+
+	/* DIMM information matrix, allocating architecture maximums */
+	struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS];
+};
+
+#if 0
+/* I7300 MCH error information retrieved from Hardware */
+struct i7300_error_info {
+	/* These registers are always read from the MC */
+	u32 ferr_fat_fbd;	/* First Errors Fatal */
+	u32 nerr_fat_fbd;	/* Next Errors Fatal */
+	u32 ferr_nf_fbd;	/* First Errors Non-Fatal */
+	u32 nerr_nf_fbd;	/* Next Errors Non-Fatal */
+
+	/* These registers are input ONLY if there was a Recoverable Error */
+	u32 redmemb;		/* Recoverable Mem Data Error log B */
+	u16 recmema;		/* Recoverable Mem Error log A */
+	u32 recmemb;		/* Recoverable Mem Error log B */
+
+	/* These registers are input ONLY if there was a Non-Rec Error */
+	u16 nrecmema;		/* Non-Recoverable Mem log A */
+	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+
+};
+#endif
+
+/* FIXME: Why do we need to have this static? */
+static struct edac_pci_ctl_info *i7300_pci;
+
+
+#if 0
+/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
+   5400 better to use an inline function than a macro in this case */
+static inline int nrec_bank(struct i7300_error_info *info)
+{
+	return ((info->nrecmema) >> 12) & 0x7;
+}
+static inline int nrec_rank(struct i7300_error_info *info)
+{
+	return ((info->nrecmema) >> 8) & 0xf;
+}
+static inline int nrec_buf_id(struct i7300_error_info *info)
+{
+	return ((info->nrecmema)) & 0xff;
+}
+static inline int nrec_rdwr(struct i7300_error_info *info)
+{
+	return (info->nrecmemb) >> 31;
+}
+/* This applies to both NREC and REC string so it can be used with nrec_rdwr
+   and rec_rdwr */
+static inline const char *rdwr_str(int rdwr)
+{
+	return rdwr ? "Write" : "Read";
+}
+static inline int nrec_cas(struct i7300_error_info *info)
+{
+	return ((info->nrecmemb) >> 16) & 0x1fff;
+}
+static inline int nrec_ras(struct i7300_error_info *info)
+{
+	return (info->nrecmemb) & 0xffff;
+}
+static inline int rec_bank(struct i7300_error_info *info)
+{
+	return ((info->recmema) >> 12) & 0x7;
+}
+static inline int rec_rank(struct i7300_error_info *info)
+{
+	return ((info->recmema) >> 8) & 0xf;
+}
+static inline int rec_rdwr(struct i7300_error_info *info)
+{
+	return (info->recmemb) >> 31;
+}
+static inline int rec_cas(struct i7300_error_info *info)
+{
+	return ((info->recmemb) >> 16) & 0x1fff;
+}
+static inline int rec_ras(struct i7300_error_info *info)
+{
+	return (info->recmemb) & 0xffff;
+}
+
+/*
+ *	i7300_get_error_info	Retrieve the hardware error information from
+ *				the hardware and cache it in the 'info'
+ *				structure
+ */
+static void i7300_get_error_info(struct mem_ctl_info *mci,
+				 struct i7300_error_info *info)
+{
+	struct i7300_pvt *pvt;
+	u32 value;
+
+	pvt = mci->pvt_info;
+
+	/* read in the 1st FATAL error register */
+	pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
+
+	/* Mask only the bits that the doc says are valid
+	 */
+	value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
+
+	/* If there is an error, then read in the
+	   NEXT FATAL error register and the Memory Error Log Register A
+	 */
+	if (value & FERR_FAT_MASK) {
+		info->ferr_fat_fbd = value;
+
+		/* harvest the various error data we need */
+		pci_read_config_dword(pvt->branchmap_werrors,
+				NERR_FAT_FBD, &info->nerr_fat_fbd);
+		pci_read_config_word(pvt->branchmap_werrors,
+				NRECMEMA, &info->nrecmema);
+		pci_read_config_word(pvt->branchmap_werrors,
+				NRECMEMB, &info->nrecmemb);
+
+		/* Clear the error bits, by writing them back */
+		pci_write_config_dword(pvt->branchmap_werrors,
+				FERR_FAT_FBD, value);
+	} else {
+		info->ferr_fat_fbd = 0;
+		info->nerr_fat_fbd = 0;
+		info->nrecmema = 0;
+		info->nrecmemb = 0;
+	}
+
+	/* read in the 1st NON-FATAL error register */
+	pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
+
+	/* If there is an error, then read in the 1st NON-FATAL error
+	 * register as well */
+	if (value & FERR_NF_MASK) {
+		info->ferr_nf_fbd = value;
+
+		/* harvest the various error data we need */
+		pci_read_config_dword(pvt->branchmap_werrors,
+				NERR_NF_FBD, &info->nerr_nf_fbd);
+		pci_read_config_word(pvt->branchmap_werrors,
+				RECMEMA, &info->recmema);
+		pci_read_config_dword(pvt->branchmap_werrors,
+				RECMEMB, &info->recmemb);
+		pci_read_config_dword(pvt->branchmap_werrors,
+				REDMEMB, &info->redmemb);
+
+		/* Clear the error bits, by writing them back */
+		pci_write_config_dword(pvt->branchmap_werrors,
+				FERR_NF_FBD, value);
+	} else {
+		info->ferr_nf_fbd = 0;
+		info->nerr_nf_fbd = 0;
+		info->recmema = 0;
+		info->recmemb = 0;
+		info->redmemb = 0;
+	}
+}
+
+/*
+ * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
+ * 					struct i7300_error_info *info,
+ * 					int handle_errors);
+ *
+ *	handle the Intel FATAL and unrecoverable errors, if any
+ */
+static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
+				    struct i7300_error_info *info,
+				    unsigned long allErrors)
+{
+	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
+	int branch;
+	int channel;
+	int bank;
+	int buf_id;
+	int rank;
+	int rdwr;
+	int ras, cas;
+	int errnum;
+	char *type = NULL;
+
+	if (!allErrors)
+		return;		/* if no error, return now */
+
+	if (allErrors &  ERROR_FAT_MASK)
+		type = "FATAL";
+	else if (allErrors & FERR_NF_UNCORRECTABLE)
+		type = "NON-FATAL uncorrected";
+	else
+		type = "NON-FATAL recoverable";
+
+	/* ONLY ONE of the possible error bits will be set, as per the docs */
+
+	branch = extract_fbdchan_indx(info->ferr_fat_fbd);
+	channel = branch;
+
+	/* Use the NON-Recoverable macros to extract data */
+	bank = nrec_bank(info);
+	rank = nrec_rank(info);
+	buf_id = nrec_buf_id(info);
+	rdwr = nrec_rdwr(info);
+	ras = nrec_ras(info);
+	cas = nrec_cas(info);
+
+	debugf0("\t\tCSROW= %d  Channels= %d,%d  (Branch= %d "
+		"DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
+		rank, channel, channel + 1, branch >> 1, bank,
+		buf_id, rdwr_str(rdwr), ras, cas);
+
+	/* Only 1 bit will be on */
+	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+	/* Form out message */
+	snprintf(msg, sizeof(msg),
+		 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
+		 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
+		 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
+		 type, allErrors, error_name[errnum]);
+
+	/* Call the helper to output message */
+	edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+}
+
+/*
+ * i7300_process_fatal_error_info(struct mem_ctl_info *mci,
+ * 				struct i7300_error_info *info,
+ * 				int handle_errors);
+ *
+ *	handle the Intel NON-FATAL errors, if any
+ */
+static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci,
+					struct i7300_error_info *info)
+{
+	char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
+	unsigned long allErrors;
+	int branch;
+	int channel;
+	int bank;
+	int rank;
+	int rdwr;
+	int ras, cas;
+	int errnum;
+
+	/* mask off the Error bits that are possible */
+	allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
+	if (!allErrors)
+		return;		/* if no error, return now */
+
+	/* ONLY ONE of the possible error bits will be set, as per the docs */
+
+	if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
+		i7300_proccess_non_recoverable_info(mci, info, allErrors);
+		return;
+	}
+
+	/* Correctable errors */
+	if (allErrors & ERROR_NF_CORRECTABLE) {
+		debugf0("\tCorrected bits= 0x%lx\n", allErrors);
+
+		branch = extract_fbdchan_indx(info->ferr_nf_fbd);
+
+		channel = 0;
+		if (REC_ECC_LOCATOR_ODD(info->redmemb))
+			channel = 1;
+
+		/* Convert channel to be based from zero, instead of
+		 * from branch base of 0 */
+		channel += branch;
+
+		bank = rec_bank(info);
+		rank = rec_rank(info);
+		rdwr = rec_rdwr(info);
+		ras = rec_ras(info);
+		cas = rec_cas(info);
+
+		/* Only 1 bit will be on */
+		errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+		debugf0("\t\tCSROW= %d Channel= %d  (Branch %d "
+			"DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+			rank, channel, branch >> 1, bank,
+			rdwr_str(rdwr), ras, cas);
+
+		/* Form out message */
+		snprintf(msg, sizeof(msg),
+			 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
+			 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
+			 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
+			 allErrors, error_name[errnum]);
+
+		/* Call the helper to output message */
+		edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+
+		return;
+	}
+
+	/* Miscelaneous errors */
+	errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
+
+	branch = extract_fbdchan_indx(info->ferr_nf_fbd);
+
+	i7300_mc_printk(mci, KERN_EMERG,
+			"Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
+			branch >> 1, allErrors, error_name[errnum]);
+}
+
+/*
+ *	i7300_process_error_info	Process the error info that is
+ *	in the 'info' structure, previously retrieved from hardware
+ */
+static void i7300_process_error_info(struct mem_ctl_info *mci,
+				struct i7300_error_info *info)
+{	u32 allErrors;
+
+	/* First handle any fatal errors that occurred */
+	allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
+	i7300_proccess_non_recoverable_info(mci, info, allErrors);
+
+	/* now handle any non-fatal errors that occurred */
+	i7300_process_nonfatal_error_info(mci, info);
+}
+
+/*
+ *	i7300_clear_error	Retrieve any error from the hardware
+ *				but do NOT process that error.
+ *				Used for 'clearing' out of previous errors
+ *				Called by the Core module.
+ */
+static void i7300_clear_error(struct mem_ctl_info *mci)
+{
+	struct i7300_error_info info;
+
+	i7300_get_error_info(mci, &info);
+}
+
+/*
+ *	i7300_check_error	Retrieve and process errors reported by the
+ *				hardware. Called by the Core module.
+ */
+static void i7300_check_error(struct mem_ctl_info *mci)
+{
+	struct i7300_error_info info;
+	debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
+	i7300_get_error_info(mci, &info);
+	i7300_process_error_info(mci, &info);
+}
+
+/*
+ *	i7300_enable_error_reporting
+ *			Turn on the memory reporting features of the hardware
+ */
+static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
+{
+	struct i7300_pvt *pvt;
+	u32 fbd_error_mask;
+
+	pvt = mci->pvt_info;
+
+	/* Read the FBD Error Mask Register */
+	pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
+			&fbd_error_mask);
+
+	/* Enable with a '0' */
+	fbd_error_mask &= ~(ENABLE_EMASK_ALL);
+
+	pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
+			fbd_error_mask);
+}
+#endif
+
+/*
+ * determine_mtr(pvt, csrow, channel)
+ *
+ * return the proper MTR register as determine by the csrow and desired channel
+ */
+static int decode_mtr(struct i7300_pvt *pvt,
+		      int slot, int ch, int branch,
+		      struct i7300_dimm_info *dinfo,
+		      struct csrow_info *p_csrow)
+{
+	int mtr, ans, addrBits, channel;
+
+	channel = to_channel(ch, branch);
+
+	mtr = pvt->mtr[slot][branch];
+	ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
+
+	debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n",
+		slot, channel,
+		ans ? "Present" : "NOT Present");
+
+	/* Determine if there is a DIMM present in this DIMM slot */
+
+#if 0
+	if (!amb_present || !ans)
+		return 0;
+#else
+	if (!ans)
+		return 0;
+#endif
+
+	/* Start with the number of bits for a Bank
+	* on the DRAM */
+	addrBits = MTR_DRAM_BANKS_ADDR_BITS;
+	/* Add thenumber of ROW bits */
+	addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
+	/* add the number of COLUMN bits */
+	addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
+	/* add the number of RANK bits */
+	addrBits += MTR_DIMM_RANKS(mtr);
+
+	addrBits += 6;	/* add 64 bits per DIMM */
+	addrBits -= 20;	/* divide by 2^^20 */
+	addrBits -= 3;	/* 8 bits per bytes */
+
+	dinfo->megabytes = 1 << addrBits;
+
+	debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
+
+	debugf2("\t\tELECTRICAL THROTTLING is %s\n",
+		MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+
+	debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
+	debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single");
+	debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
+	debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
+	debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
+
+	p_csrow->grain = 8;
+	p_csrow->nr_pages = dinfo->megabytes << 8;
+	p_csrow->mtype = MEM_FB_DDR2;
+	p_csrow->edac_mode = EDAC_S8ECD8ED;
+
+	/* ask what device type on this row */
+	if (MTR_DRAM_WIDTH(mtr))
+		p_csrow->dtype = DEV_X8;
+	else
+		p_csrow->dtype = DEV_X4;
+
+	return mtr;
+}
+
+/*
+ *	print_dimm_size
+ *
+ *	also will output a DIMM matrix map, if debug is enabled, for viewing
+ *	how the DIMMs are populated
+ */
+static void print_dimm_size(struct i7300_pvt *pvt)
+{
+	struct i7300_dimm_info *dinfo;
+	char *p, *mem_buffer;
+	int space, n;
+	int channel, slot;
+
+	space = PAGE_SIZE;
+	mem_buffer = p = kmalloc(space, GFP_KERNEL);
+	if (p == NULL) {
+		i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
+			__FILE__, __func__);
+		return;
+	}
+
+	n = snprintf(p, space, "              ");
+	p += n;
+	space -= n;
+	for (channel = 0; channel < MAX_CHANNELS; channel++) {
+		n = snprintf(p, space, "channel %d | ", channel);
+		p += n;
+		space -= n;
+	}
+	debugf2("%s\n", mem_buffer);
+	p = mem_buffer;
+	space = PAGE_SIZE;
+	n = snprintf(p, space, "-------------------------------"
+		               "------------------------------");
+	p += n;
+	space -= n;
+	debugf2("%s\n", mem_buffer);
+	p = mem_buffer;
+	space = PAGE_SIZE;
+
+	for (slot = 0; slot < MAX_SLOTS; slot++) {
+		n = snprintf(p, space, "csrow/SLOT %d  ", slot);
+		p += n;
+		space -= n;
+
+		for (channel = 0; channel < MAX_CHANNELS; channel++) {
+			dinfo = &pvt->dimm_info[slot][channel];
+			n = snprintf(p, space, "%4d MB   | ", dinfo->megabytes);
+			p += n;
+			space -= n;
+		}
+
+		debugf2("%s\n", mem_buffer);
+		p = mem_buffer;
+		space = PAGE_SIZE;
+	}
+
+	n = snprintf(p, space, "-------------------------------"
+		               "------------------------------");
+	p += n;
+	space -= n;
+	debugf2("%s\n", mem_buffer);
+	p = mem_buffer;
+	space = PAGE_SIZE;
+
+	kfree(mem_buffer);
+}
+
+/*
+ *	i7300_init_csrows	Initialize the 'csrows' table within
+ *				the mci control	structure with the
+ *				addressing of memory.
+ *
+ *	return:
+ *		0	success
+ *		1	no actual memory found on this MC
+ */
+static int i7300_init_csrows(struct mem_ctl_info *mci)
+{
+	struct i7300_pvt *pvt;
+	struct i7300_dimm_info *dinfo;
+	struct csrow_info *p_csrow;
+	int empty;
+	int mtr;
+	int ch, branch, slot, channel;
+
+	pvt = mci->pvt_info;
+
+	empty = 1;		/* Assume NO memory */
+
+	debugf2("Memory Technology Registers:\n");
+
+	/* Get the AMB present registers for the four channels */
+	for (branch = 0; branch < MAX_BRANCHES; branch++) {
+		/* Read and dump branch 0's MTRs */
+		channel = to_channel(0, branch);
+		pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0,
+				&pvt->ambpresent[channel]);
+		debugf2("\t\tAMB-present CH%d = 0x%x:\n",
+			channel, pvt->ambpresent[channel]);
+
+		channel = to_channel(1, branch);
+		pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1,
+				&pvt->ambpresent[channel]);
+		debugf2("\t\tAMB-present CH%d = 0x%x:\n",
+			channel, pvt->ambpresent[channel]);
+	}
+
+	/* Get the set of MTR[0-7] regs by each branch */
+	for (slot = 0; slot < MAX_SLOTS; slot++) {
+		int where = mtr_regs[slot];
+		for (branch = 0; branch < MAX_BRANCHES; branch++) {
+			pci_read_config_word(pvt->branch_pci[branch],
+					where,
+					&pvt->mtr[slot][branch]);
+			for (ch = 0; ch < MAX_BRANCHES; ch++) {
+				int channel = to_channel(ch, branch);
+
+				dinfo = &pvt->dimm_info[slot][channel];
+				p_csrow = &mci->csrows[slot];
+
+				mtr = decode_mtr(pvt, slot, ch, branch,
+							dinfo, p_csrow);
+				/* if no DIMMS on this row, continue */
+				if (!MTR_DIMMS_PRESENT(mtr))
+					continue;
+
+				p_csrow->csrow_idx = slot;
+
+				/* FAKE OUT VALUES, FIXME */
+				p_csrow->first_page = 0 + slot * 20;
+				p_csrow->last_page = 9 + slot * 20;
+				p_csrow->page_mask = 0xfff;
+
+				empty = 0;
+			}
+		}
+	}
+
+	return empty;
+}
+
+static void decode_mir(int mir_no, u16 mir[MAX_MIR])
+{
+	if (mir[mir_no] & 3)
+		debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
+			mir_no,
+			(mir[mir_no] >> 4) & 0xfff,
+			(mir[mir_no] & 1) ? "B0" : "",
+			(mir[mir_no] & 2) ? "B1": "");
+}
+
+/*
+ *	i7300_get_mc_regs	read in the necessary registers and
+ *				cache locally
+ *
+ *			Fills in the private data members
+ */
+static int i7300_get_mc_regs(struct mem_ctl_info *mci)
+{
+	struct i7300_pvt *pvt;
+	u32 actual_tolm;
+	int i, rc;
+
+	pvt = mci->pvt_info;
+
+	pci_read_config_dword(pvt->system_address, AMBASE,
+			(u32 *) &pvt->ambase);
+
+	debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
+
+	/* Get the Branch Map regs */
+	pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
+	pvt->tolm >>= 12;
+	debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
+		pvt->tolm);
+
+	actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
+	debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
+		actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
+
+	pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]);
+	pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]);
+	pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]);
+
+	/* Decode the MIR regs */
+	for (i = 0; i < MAX_MIR; i++)
+		decode_mir(i, pvt->mir);
+
+	rc = i7300_init_csrows(mci);
+	if (rc < 0)
+		return rc;
+
+	/* Go and determine the size of each DIMM and place in an
+	 * orderly matrix */
+	print_dimm_size(pvt);
+
+	return 0;
+}
+
+/*
+ *	i7300_put_devices	'put' all the devices that we have
+ *				reserved via 'get'
+ */
+static void i7300_put_devices(struct mem_ctl_info *mci)
+{
+	struct i7300_pvt *pvt;
+	int branch;
+
+	pvt = mci->pvt_info;
+
+	/* Decrement usage count for devices */
+	for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++)
+		pci_dev_put(pvt->branch_pci[branch]);
+	pci_dev_put(pvt->fsb_error_regs);
+	pci_dev_put(pvt->branchmap_werrors);
+}
+
+/*
+ *	i7300_get_devices	Find and perform 'get' operation on the MCH's
+ *			device/functions we want to reference for this driver
+ *
+ *			Need to 'get' device 16 func 1 and func 2
+ */
+static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx)
+{
+	struct i7300_pvt *pvt;
+	struct pci_dev *pdev;
+
+	pvt = mci->pvt_info;
+
+	/* Attempt to 'get' the MCH register we want */
+	pdev = NULL;
+	while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+				      PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
+		if (!pdev) {
+			/* End of list, leave */
+			i7300_printk(KERN_ERR,
+				"'system address,Process Bus' "
+				"device not found:"
+				"vendor 0x%x device 0x%x ERR funcs "
+				"(broken BIOS?)\n",
+				PCI_VENDOR_ID_INTEL,
+				PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
+			goto error;
+		}
+
+		/* Store device 16 funcs 1 and 2 */
+		switch (PCI_FUNC(pdev->devfn)) {
+		case 1:
+			pvt->branchmap_werrors = pdev;
+			break;
+		case 2:
+			pvt->fsb_error_regs = pdev;
+			break;
+		}
+	}
+
+	debugf1("System Address, processor bus- PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->system_address),
+		pvt->system_address->vendor, pvt->system_address->device);
+	debugf1("Branchmap, control and errors - PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->branchmap_werrors),
+		pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
+	debugf1("FSB Error Regs - PCI Bus ID: %s  %x:%x\n",
+		pci_name(pvt->fsb_error_regs),
+		pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
+
+	pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL,
+				            PCI_DEVICE_ID_INTEL_I7300_MCH_FB0,
+					    NULL);
+	if (!pvt->branch_pci[0]) {
+		i7300_printk(KERN_ERR,
+			"MC: 'BRANCH 0' device not found:"
+			"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
+			PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0);
+		goto error;
+	}
+
+	pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL,
+					    PCI_DEVICE_ID_INTEL_I7300_MCH_FB1,
+					    NULL);
+	if (!pvt->branch_pci[1]) {
+		i7300_printk(KERN_ERR,
+			"MC: 'BRANCH 1' device not found:"
+			"vendor 0x%x device 0x%x Func 0 "
+			"(broken BIOS?)\n",
+			PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_I7300_MCH_FB1);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	i7300_put_devices(mci);
+	return -ENODEV;
+}
+
+/*
+ *	i7300_probe1	Probe for ONE instance of device to see if it is
+ *			present.
+ *	return:
+ *		0 for FOUND a device
+ *		< 0 for error code
+ */
+static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
+{
+	struct mem_ctl_info *mci;
+	struct i7300_pvt *pvt;
+	int num_channels;
+	int num_dimms_per_channel;
+	int num_csrows;
+
+	if (dev_idx >= ARRAY_SIZE(i7300_devs))
+		return -EINVAL;
+
+	debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
+		__func__,
+		pdev->bus->number,
+		PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+	/* We only are looking for func 0 of the set */
+	if (PCI_FUNC(pdev->devfn) != 0)
+		return -ENODEV;
+
+	/* As we don't have a motherboard identification routine to determine
+	 * actual number of slots/dimms per channel, we thus utilize the
+	 * resource as specified by the chipset. Thus, we might have
+	 * have more DIMMs per channel than actually on the mobo, but this
+	 * allows the driver to support upto the chipset max, without
+	 * some fancy mobo determination.
+	 */
+	num_dimms_per_channel = MAX_SLOTS;
+	num_channels = MAX_CHANNELS;
+	num_csrows = MAX_SLOTS * MAX_CHANNELS;
+
+	debugf0("MC: %s(): Number of - Channels= %d  DIMMS= %d  CSROWS= %d\n",
+		__func__, num_channels, num_dimms_per_channel, num_csrows);
+
+	/* allocate a new MC control structure */
+	mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
+
+	if (mci == NULL)
+		return -ENOMEM;
+
+	debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+
+	mci->dev = &pdev->dev;	/* record ptr  to the generic device */
+
+	pvt = mci->pvt_info;
+	pvt->system_address = pdev;	/* Record this device in our private */
+
+	/* 'get' the pci devices we want to reserve for our use */
+	if (i7300_get_devices(mci, dev_idx))
+		goto fail0;
+
+	mci->mc_idx = 0;
+	mci->mtype_cap = MEM_FLAG_FB_DDR2;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "i7300_edac.c";
+	mci->mod_ver = I7300_REVISION;
+	mci->ctl_name = i7300_devs[dev_idx].ctl_name;
+	mci->dev_name = pci_name(pdev);
+	mci->ctl_page_to_phys = NULL;
+
+#if 0
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = i7300_check_error;
+#endif
+
+	/* initialize the MC control structure 'csrows' table
+	 * with the mapping and control information */
+	if (i7300_get_mc_regs(mci)) {
+		debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
+			"    because i7300_init_csrows() returned nonzero "
+			"value\n");
+		mci->edac_cap = EDAC_FLAG_NONE;	/* no csrows found */
+	} else {
+#if 0
+		debugf1("MC: Enable error reporting now\n");
+		i7300_enable_error_reporting(mci);
+#endif
+	}
+
+	/* add this new MC control structure to EDAC's list of MCs */
+	if (edac_mc_add_mc(mci)) {
+		debugf0("MC: " __FILE__
+			": %s(): failed edac_mc_add_mc()\n", __func__);
+		/* FIXME: perhaps some code should go here that disables error
+		 * reporting if we just enabled it
+		 */
+		goto fail1;
+	}
+
+#if 0
+	i7300_clear_error(mci);
+#endif
+
+	/* allocating generic PCI control info */
+	i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
+	if (!i7300_pci) {
+		printk(KERN_WARNING
+			"%s(): Unable to create PCI control\n",
+			__func__);
+		printk(KERN_WARNING
+			"%s(): PCI error report via EDAC not setup\n",
+			__func__);
+	}
+
+	return 0;
+
+	/* Error exit unwinding stack */
+fail1:
+
+	i7300_put_devices(mci);
+
+fail0:
+	edac_mc_free(mci);
+	return -ENODEV;
+}
+
+/*
+ *	i7300_init_one	constructor for one instance of device
+ *
+ * 	returns:
+ *		negative on error
+ *		count (>= 0)
+ */
+static int __devinit i7300_init_one(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	int rc;
+
+	debugf0("MC: " __FILE__ ": %s()\n", __func__);
+
+	/* wake up device */
+	rc = pci_enable_device(pdev);
+	if (rc == -EIO)
+		return rc;
+
+	/* now probe and enable the device */
+	return i7300_probe1(pdev, id->driver_data);
+}
+
+/*
+ *	i7300_remove_one	destructor for one instance of device
+ *
+ */
+static void __devexit i7300_remove_one(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+
+	debugf0(__FILE__ ": %s()\n", __func__);
+
+	if (i7300_pci)
+		edac_pci_release_generic_ctl(i7300_pci);
+
+	mci = edac_mc_del_mc(&pdev->dev);
+	if (!mci)
+		return;
+
+	/* retrieve references to resources, and free those resources */
+	i7300_put_devices(mci);
+
+	edac_mc_free(mci);
+}
+
+/*
+ *	pci_device_id	table for which devices we are looking for
+ *
+ *	The "E500P" device is the first device supported.
+ */
+static const struct pci_device_id i7300_pci_tbl[] __devinitdata = {
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)},
+	{0,}			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
+
+/*
+ *	i7300_driver	pci_driver structure for this module
+ *
+ */
+static struct pci_driver i7300_driver = {
+	.name = "i7300_edac",
+	.probe = i7300_init_one,
+	.remove = __devexit_p(i7300_remove_one),
+	.id_table = i7300_pci_tbl,
+};
+
+/*
+ *	i7300_init		Module entry function
+ *			Try to initialize this module for its devices
+ */
+static int __init i7300_init(void)
+{
+	int pci_rc;
+
+	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+
+	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
+	opstate_init();
+
+	pci_rc = pci_register_driver(&i7300_driver);
+
+	return (pci_rc < 0) ? pci_rc : 0;
+}
+
+/*
+ *	i7300_exit()	Module exit function
+ *			Unregister the driver
+ */
+static void __exit i7300_exit(void)
+{
+	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+	pci_unregister_driver(&i7300_driver);
+}
+
+module_init(i7300_init);
+module_exit(i7300_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
+		   I7300_REVISION);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..2eabe311f0d3 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -815,7 +815,7 @@
 
 #define PCI_VENDOR_ID_ANIGMA		0x1051
 #define PCI_DEVICE_ID_ANIGMA_MC145575	0x0100
-  
+
 #define PCI_VENDOR_ID_EFAR		0x1055
 #define PCI_DEVICE_ID_EFAR_SLC90E66_1	0x9130
 #define PCI_DEVICE_ID_EFAR_SLC90E66_3	0x9463
@@ -1446,7 +1446,7 @@
 
 #define PCI_VENDOR_ID_ZIATECH		0x1138
 #define PCI_DEVICE_ID_ZIATECH_5550_HC	0x5550
- 
+
 
 #define PCI_VENDOR_ID_SYSKONNECT	0x1148
 #define PCI_DEVICE_ID_SYSKONNECT_TR	0x4200
@@ -1600,8 +1600,8 @@
 #define PCI_DEVICE_ID_RP8OCTA		0x0005
 #define PCI_DEVICE_ID_RP8J		0x0006
 #define PCI_DEVICE_ID_RP4J		0x0007
-#define PCI_DEVICE_ID_RP8SNI		0x0008	
-#define PCI_DEVICE_ID_RP16SNI		0x0009	
+#define PCI_DEVICE_ID_RP8SNI		0x0008
+#define PCI_DEVICE_ID_RP16SNI		0x0009
 #define PCI_DEVICE_ID_RPP4		0x000A
 #define PCI_DEVICE_ID_RPP8		0x000B
 #define PCI_DEVICE_ID_RP4M		0x000D
@@ -1611,9 +1611,9 @@
 #define PCI_DEVICE_ID_URP8INTF		0x0802
 #define PCI_DEVICE_ID_URP16INTF		0x0803
 #define PCI_DEVICE_ID_URP8OCTA		0x0805
-#define PCI_DEVICE_ID_UPCI_RM3_8PORT	0x080C       
+#define PCI_DEVICE_ID_UPCI_RM3_8PORT	0x080C
 #define PCI_DEVICE_ID_UPCI_RM3_4PORT	0x080D
-#define PCI_DEVICE_ID_CRP16INTF		0x0903       
+#define PCI_DEVICE_ID_CRP16INTF		0x0903
 
 #define PCI_VENDOR_ID_CYCLADES		0x120e
 #define PCI_DEVICE_ID_CYCLOM_Y_Lo	0x0100
@@ -2139,7 +2139,7 @@
 #define PCI_DEVICE_ID_RASTEL_2PORT	0x2000
 
 #define PCI_VENDOR_ID_ZOLTRIX		0x15b0
-#define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0 
+#define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0
 
 #define PCI_VENDOR_ID_MELLANOX		0x15b3
 #define PCI_DEVICE_ID_MELLANOX_TAVOR	0x5a44
@@ -2413,7 +2413,7 @@
 #define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
-#define PCI_DEVICE_ID_INTEL_7505_0	0x2550  
+#define PCI_DEVICE_ID_INTEL_7505_0	0x2550
 #define PCI_DEVICE_ID_INTEL_7205_0	0x255d
 #define PCI_DEVICE_ID_INTEL_82437	0x122d
 #define PCI_DEVICE_ID_INTEL_82371FB_0	0x122e
@@ -2616,6 +2616,9 @@
 #define PCI_DEVICE_ID_INTEL_MCH_PC	0x3599
 #define PCI_DEVICE_ID_INTEL_MCH_PC1	0x359a
 #define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
+#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c
+#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f
+#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610
 #define PCI_DEVICE_ID_INTEL_IOAT_CNB	0x360b
 #define PCI_DEVICE_ID_INTEL_FBD_CNB	0x360c
 #define PCI_DEVICE_ID_INTEL_IOAT_JSF0	0x3710
-- 
cgit v1.2.3


From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Fri, 27 Aug 2010 19:13:28 +0000
Subject: tcp: Add TCP_USER_TIMEOUT socket option.

This patch provides a "user timeout" support as described in RFC793. The
socket option is also needed for the the local half of RFC5482 "TCP User
Timeout Option".

TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int,
when > 0, to specify the maximum amount of time in ms that transmitted
data may remain unacknowledged before TCP will forcefully close the
corresponding connection and return ETIMEDOUT to the application. If
0 is given, TCP will continue to use the system default.

Increasing the user timeouts allows a TCP connection to survive extended
periods without end-to-end connectivity. Decreasing the user timeouts
allows applications to "fail fast" if so desired. Otherwise it may take
upto 20 minutes with the current system defaults in a normal WAN
environment.

The socket option can be made during any state of a TCP connection, but
is only effective during the synchronized states of a connection
(ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK).
Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option,
TCP_USER_TIMEOUT will overtake keepalive to determine when to close a
connection due to keepalive failure.

The option does not change in anyway when TCP retransmits a packet, nor
when a keepalive probe will be sent.

This option, like many others, will be inherited by an acceptor from its
listener.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h                |  1 +
 include/net/inet_connection_sock.h |  1 +
 net/ipv4/tcp.c                     | 11 ++++++++++-
 net/ipv4/tcp_timer.c               | 40 ++++++++++++++++++++++++--------------
 4 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a778ee024590..e64f4c67d0ef 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -105,6 +105,7 @@ enum {
 #define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 #define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
+#define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b6d3b55da19b..e4f494b42e06 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -125,6 +125,7 @@ struct inet_connection_sock {
 		int		  probe_size;
 	} icsk_mtup;
 	u32			  icsk_ca_priv[16];
+	u32			  icsk_user_timeout;
 #define ICSK_CA_PRIV_SIZE	(16 * sizeof(u32))
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea77..cf3254528753 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2391,7 +2391,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		err = tp->af_specific->md5_parse(sk, optval, optlen);
 		break;
 #endif
-
+	case TCP_USER_TIMEOUT:
+		/* Cap the max timeout in ms TCP will retry/retrans
+		 * before giving up and aborting (ETIMEDOUT) a connection.
+		 */
+		icsk->icsk_user_timeout = msecs_to_jiffies(val);
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2610,6 +2615,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_THIN_DUPACK:
 		val = tp->thin_dupack;
 		break;
+
+	case TCP_USER_TIMEOUT:
+		val = jiffies_to_msecs(icsk->icsk_user_timeout);
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f5..11569deccbea 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
  * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static bool retransmits_timed_out(struct sock *sk,
-				  unsigned int boundary)
+				  unsigned int boundary,
+				  unsigned int timeout)
 {
-	unsigned int timeout, linear_backoff_thresh;
-	unsigned int start_ts;
+	unsigned int linear_backoff_thresh, start_ts;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
@@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk,
 	else
 		start_ts = tcp_sk(sk)->retrans_stamp;
 
-	linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
-
-	if (boundary <= linear_backoff_thresh)
-		timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
-	else
-		timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
-			  (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+	if (likely(timeout == 0)) {
+		linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
 
+		if (boundary <= linear_backoff_thresh)
+			timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
+		else
+			timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+	}
 	return (tcp_time_stamp - start_ts) >= timeout;
 }
 
@@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk)
 			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 	} else {
-		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
+		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
@@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk)
 
 			retry_until = tcp_orphan_retries(sk, alive);
 			do_reset = alive ||
-				   !retransmits_timed_out(sk, retry_until);
+				   !retransmits_timed_out(sk, retry_until, 0);
 
 			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
 	}
 
-	if (retransmits_timed_out(sk, retry_until)) {
+	if (retransmits_timed_out(sk, retry_until,
+	    (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 :
+	    icsk->icsk_user_timeout)) {
 		/* Has it gone just too far? */
 		tcp_write_err(sk);
 		return 1;
@@ -436,7 +439,7 @@ out_reset_timer:
 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
+	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
 		__sk_dst_reset(sk);
 
 out:;
@@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = keepalive_time_elapsed(tp);
 
 	if (elapsed >= keepalive_time_when(tp)) {
-		if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
+		/* If the TCP_USER_TIMEOUT option is enabled, use that
+		 * to determine when to timeout instead.
+		 */
+		if ((icsk->icsk_user_timeout != 0 &&
+		    elapsed >= icsk->icsk_user_timeout &&
+		    icsk->icsk_probes_out > 0) ||
+		    (icsk->icsk_user_timeout == 0 &&
+		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			tcp_write_err(sk);
 			goto out;
-- 
cgit v1.2.3


From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 29 Aug 2010 19:23:12 +0000
Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion

This patch consolidates initial-window code common to TCP and CCID-2:
 * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and
 * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h      | 15 +++++++++++++++
 net/dccp/ccids/ccid2.c |  8 ++------
 net/ipv4/tcp_input.c   | 17 ++---------------
 3 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index df6a2eb20193..a64022199b62 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ *
+ * John Heffner states:
+ *
+ *	The RFC specifies a window of no more than 4380 bytes
+ *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
+ *	is a bit misleading because they use a clamp at 4380 bytes
+ *	rather than a multiplier in the relevant range.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
+}
+
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 8c95813bcc67..b9c942a09c98 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 	hc->tx_ssthresh = ~0U;
 
-	/*
-	 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
-	 * packets for new connections, following the rules from [RFC3390]".
-	 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
-	 */
-	hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+	/* Use larger initial windows (RFC 4341, section 5). */
+	hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
 
 	/* Make sure that Ack Ratio is enabled and within bounds. */
 	max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e663b78a2ef6..1bc87a05c734 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-/* Numbers are taken from RFC3390.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than use a multiplier in the relevant range.
- */
 __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
-	if (!cwnd) {
-		if (tp->mss_cache > 1460)
-			cwnd = 2;
-		else
-			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
-	}
+	if (!cwnd)
+		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
-- 
cgit v1.2.3


From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 29 Aug 2010 19:27:34 +0000
Subject: TCP: update initial windows according to RFC 5681

This updates the use of larger initial windows, as originally specified in
RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1.

The changes made in RFC 5681 are:
 a) the setting now is more clearly specified in units of segments (as the
    comments  by John Heffner emphasized, this was not very clear in RFC 3390);
 b) for connections with 1095 < SMSS <= 2190 there is now a change:
    - RFC 3390 says that IW <= 4380,
    - RFC 5681 says that IW = 3 * SMSS <= 6570.

Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681
is already draft standard, it seems preferable to use the newer IW variant.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a64022199b62..11dbacc886c0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 
 /*
  * Convert RFC 3390 larger initial window into an equivalent number of packets.
- *
- * John Heffner states:
- *
- *	The RFC specifies a window of no more than 4380 bytes
- *	unless 2*MSS > 4380.  Reading the pseudocode in the RFC
- *	is a bit misleading because they use a clamp at 4380 bytes
- *	rather than a multiplier in the relevant range.
+ * This is based on the numbers specified in RFC 5681, 3.1.
  */
 static inline u32 rfc3390_bytes_to_packets(const u32 smss)
 {
-	return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3);
+	return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
 }
 
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
-- 
cgit v1.2.3


From 4dc89133f49b8cfd77ba7e83f5960aed63aaa99e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Aug 2010 07:40:16 +0000
Subject: net: add a comment on netdev->last_rx

As some driver authors seem to reintroduce dev->last_rx use,
add a comment to strongly discourage this.

Since commit 6cf3f41e6c0 (bonding, net: Move last_rx update into bonding
recv logic), network drivers dont need to update last_rx themselves,
unless they use this field to implement a timeout.

Not updating last_rx helps not dirtying a cache line, improving
performance in SMP.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0cf9448a32c4..c82220a9f3d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -953,7 +953,14 @@ struct net_device {
 /*
  * Cache line mostly used on receive path (including eth_type_trans())
  */
-	unsigned long		last_rx;	/* Time of last Rx	*/
+	unsigned long		last_rx;	/* Time of last Rx
+						 * This should not be set in
+						 * drivers, unless really needed,
+						 * because network stack (bonding)
+						 * use it if/when necessary, to
+						 * avoid dirtying this cache line.
+						 */
+
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;	/* hw address, (before bcast
 						   because most packets are
-- 
cgit v1.2.3


From a28dec2f26013aad89446b1f708f948617bc28a2 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@mvista.com>
Date: Sun, 8 Aug 2010 18:03:33 +0400
Subject: powerpc/85xx: Add P1021 PCI IDs and quirks

This is needed for proper PCI-E support on P1021 SoCs.

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 2 ++
 include/linux/pci_ids.h       | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 209384b6e039..4ae933225251 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -399,6 +399,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..10d33309e9a6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2300,6 +2300,8 @@
 #define PCI_DEVICE_ID_P2010		0x0079
 #define PCI_DEVICE_ID_P1020E		0x0100
 #define PCI_DEVICE_ID_P1020		0x0101
+#define PCI_DEVICE_ID_P1021E		0x0102
+#define PCI_DEVICE_ID_P1021		0x0103
 #define PCI_DEVICE_ID_P1011E		0x0108
 #define PCI_DEVICE_ID_P1011		0x0109
 #define PCI_DEVICE_ID_P1022E		0x0110
-- 
cgit v1.2.3


From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Wed, 1 Sep 2010 08:55:24 -0600
Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case

With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared,
so the following pops up on PowerPC:

  cc1: warnings being treated as errors
  In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19:
  include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared
                              inside parameter list
  include/linux/of_gpio.h:74: warning: its scope is only this definition
                              or declaration, which is probably not what
			      you want
  include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared
                              inside parameter list
  make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1

This patch fixes the issue by providing the proper forward declaration.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 03f616b78cfa..e41f7dd1ae67 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 
 struct device;
+struct gpio_chip;
 
 /*
  * Some platforms don't support the GPIO programming interface.
-- 
cgit v1.2.3


From 86cac58b71227cc34a3d0e78f19585c0eff49ea3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Aug 2010 18:25:32 +0000
Subject: skge: add GRO support

- napi_gro_flush() is exported from net/core/dev.c, to avoid
  an irq_save/irq_restore in the packet receive path.
- use napi_gro_receive() instead of netif_receive_skb()
- use napi_gro_flush() before calling __napi_complete()
- turn on NETIF_F_GRO by default
- Tested on a Marvell 88E8001 Gigabit NIC

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/skge.c        | 5 +++--
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 40e5c46e7571..a8a63581d63d 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3178,8 +3178,7 @@ static int skge_poll(struct napi_struct *napi, int to_do)
 
 		skb = skge_rx_get(dev, e, control, rd->status, rd->csum2);
 		if (likely(skb)) {
-			netif_receive_skb(skb);
-
+			napi_gro_receive(napi, skb);
 			++work_done;
 		}
 	}
@@ -3192,6 +3191,7 @@ static int skge_poll(struct napi_struct *napi, int to_do)
 	if (work_done < to_do) {
 		unsigned long flags;
 
+		napi_gro_flush(napi);
 		spin_lock_irqsave(&hw->hw_lock, flags);
 		__napi_complete(napi);
 		hw->intr_mask |= napimask[skge->port];
@@ -3849,6 +3849,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
 		skge->rx_csum = 1;
 	}
+	dev->features |= NETIF_F_GRO;
 
 	/* read the mac address */
 	memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c82220a9f3d5..af05186d5b36 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1702,6 +1702,7 @@ extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
 extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
 extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
+extern void		napi_gro_flush(struct napi_struct *napi);
 extern void		napi_reuse_skb(struct napi_struct *napi,
 				       struct sk_buff *skb);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
diff --git a/net/core/dev.c b/net/core/dev.c
index 63bd20a75929..d8c43e73f0b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3063,7 +3063,7 @@ out:
 	return netif_receive_skb(skb);
 }
 
-static void napi_gro_flush(struct napi_struct *napi)
+inline void napi_gro_flush(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 
@@ -3076,6 +3076,7 @@ static void napi_gro_flush(struct napi_struct *napi)
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 }
+EXPORT_SYMBOL(napi_gro_flush);
 
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From c68839963426d42bdb2c915b435f9860d060e645 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Thu, 2 Sep 2010 04:06:24 +0000
Subject: net: Improve comments in include/linux/phy.h

Correct state range of PHY bus addresses (i.e. 0-31) in comment,
make spelling of PHY consistent in comments.

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6b0a782c6224..a6e047a04f79 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -116,7 +116,7 @@ struct mii_bus {
 	/* list of all PHYs on bus */
 	struct phy_device *phy_map[PHY_MAX_ADDR];
 
-	/* Phy addresses to be ignored when probing */
+	/* PHY addresses to be ignored when probing */
 	u32 phy_mask;
 
 	/*
@@ -283,7 +283,7 @@ struct phy_device {
 
 	phy_interface_t interface;
 
-	/* Bus address of the PHY (0-32) */
+	/* Bus address of the PHY (0-31) */
 	int addr;
 
 	/*
-- 
cgit v1.2.3


From bc8acf2c8c3e43fcc192762a9f964b3e9a17748b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 2 Sep 2010 13:07:41 -0700
Subject: drivers/net: avoid some skb->ip_summed initializations

fresh skbs have ip_summed set to CHECKSUM_NONE (0)

We can avoid setting again skb->ip_summed to CHECKSUM_NONE in drivers.

Introduce skb_checksum_none_assert() helper so that we keep this
assertion documented in driver sources.

Change most occurrences of :

skb->ip_summed = CHECKSUM_NONE;

by :

skb_checksum_none_assert(skb);

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/8139cp.c               |  2 +-
 drivers/net/acenic.c               |  2 +-
 drivers/net/atl1c/atl1c_main.c     |  2 +-
 drivers/net/atl1e/atl1e_main.c     |  2 +-
 drivers/net/atlx/atl1.c            |  2 +-
 drivers/net/b44.c                  |  2 +-
 drivers/net/benet/be_main.c        |  2 +-
 drivers/net/bna/bnad.c             |  2 +-
 drivers/net/bnx2.c                 |  2 +-
 drivers/net/bnx2x/bnx2x_cmn.c      |  2 +-
 drivers/net/cassini.c              |  2 +-
 drivers/net/chelsio/sge.c          |  2 +-
 drivers/net/cpmac.c                |  2 +-
 drivers/net/cxgb3/sge.c            |  2 +-
 drivers/net/cxgb4/sge.c            |  2 +-
 drivers/net/cxgb4vf/sge.c          |  2 +-
 drivers/net/dm9000.c               |  2 +-
 drivers/net/e1000/e1000_main.c     |  3 ++-
 drivers/net/e1000e/netdev.c        |  3 ++-
 drivers/net/gianfar.c              |  2 +-
 drivers/net/greth.c                |  2 +-
 drivers/net/ibmlana.c              |  2 +-
 drivers/net/igb/igb_main.c         |  2 +-
 drivers/net/igbvf/netdev.c         |  2 +-
 drivers/net/ipg.c                  |  6 +++---
 drivers/net/iseries_veth.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c       |  4 ++--
 drivers/net/ixgbe/ixgbe_fcoe.c     |  5 +++--
 drivers/net/ixgbe/ixgbe_main.c     |  2 +-
 drivers/net/ixgbevf/ixgbevf_main.c |  2 +-
 drivers/net/jme.c                  |  2 +-
 drivers/net/ll_temac_main.c        |  2 +-
 drivers/net/macb.c                 |  2 +-
 drivers/net/niu.c                  |  2 +-
 drivers/net/ns83820.c              |  2 +-
 drivers/net/pasemi_mac.c           |  2 +-
 drivers/net/ps3_gelic_net.c        |  4 ++--
 drivers/net/qla3xxx.c              |  4 ++--
 drivers/net/qlcnic/qlcnic_init.c   |  2 +-
 drivers/net/qlge/qlge_main.c       |  6 +++---
 drivers/net/r8169.c                |  2 +-
 drivers/net/s2io.c                 |  4 ++--
 drivers/net/sb1250-mac.c           |  2 +-
 drivers/net/sfc/rx.c               |  2 +-
 drivers/net/sh_eth.c               |  2 +-
 drivers/net/smsc911x.c             |  2 +-
 drivers/net/spider_net.c           |  4 ++--
 drivers/net/stmmac/stmmac_main.c   |  2 +-
 drivers/net/tehuti.c               |  5 +++--
 drivers/net/tg3.c                  |  2 +-
 drivers/net/typhoon.c              |  2 +-
 drivers/net/via-velocity.c         |  2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c  |  4 ++--
 drivers/net/vxge/vxge-main.c       |  2 +-
 drivers/net/xilinx_emaclite.c      |  2 +-
 include/linux/skbuff.h             | 15 +++++++++++++++
 56 files changed, 86 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 4a4f6b81e32d..237d4ea5a416 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -561,7 +561,7 @@ rx_status_loop:
 		if (cp_rx_csum_ok(status))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		skb_put(skb, len);
 
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index b9a591604e5b..41d9911202d0 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2033,7 +2033,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
 			skb->csum = htons(csum);
 			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else {
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 		}
 
 		/* send it up */
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 61f1634ab3f8..553230eb365c 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -1719,7 +1719,7 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 	 * cannot figure out if the packet is fragmented or not,
 	 * so we tell the KERNEL CHECKSUM_NONE
 	 */
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 }
 
 static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, const int ringid)
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index 1ae44bb26317..56ace3fbe40d 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -1331,7 +1331,7 @@ static inline void atl1e_rx_checksum(struct atl1e_adapter *adapter,
 	u16 pkt_flags;
 	u16 err_flags;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 	pkt_flags = prrs->pkt_flag;
 	err_flags = prrs->err_flag;
 	if (((pkt_flags & RRS_IS_IPV4) || (pkt_flags & RRS_IS_IPV6)) &&
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 5837b0184d4b..e1e0171d6e62 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -1805,7 +1805,7 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter,
 	 * the higher layers and let it be sorted out there.
 	 */
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	if (unlikely(rrd->pkt_flg & PACKET_FLAG_ERR)) {
 		if (rrd->err_flg & (ERR_FLAG_CRC | ERR_FLAG_TRUNC |
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 37617abc1647..7342308718a0 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -818,7 +818,7 @@ static int b44_rx(struct b44 *bp, int budget)
 							 copy_skb->data, len);
 			skb = copy_skb;
 		}
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		skb->protocol = eth_type_trans(skb, bp->dev);
 		netif_receive_skb(skb);
 		received++;
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 9db10fec8235..dcee7a20c0b9 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1016,7 +1016,7 @@ static void be_rx_compl_process(struct be_adapter *adapter,
 	skb_fill_rx_data(adapter, skb, rxcp, num_rcvd);
 
 	if (do_pkt_csum(rxcp, adapter->rx_csum))
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 	else
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c
index 79c4c2441449..44adc7aefddc 100644
--- a/drivers/net/bna/bnad.c
+++ b/drivers/net/bna/bnad.c
@@ -510,7 +510,7 @@ bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget)
 		      (flags & BNA_CQ_EF_L4_CKSUM_OK)))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		rcb->rxq->rx_packets++;
 		rcb->rxq->rx_bytes += skb->len;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index a0e02aa42214..4ff76e38e788 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3218,7 +3218,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
 
 		}
 
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		if (bp->rx_csum &&
 			(status & (L2_FHDR_STATUS_TCP_SEGMENT |
 			L2_FHDR_STATUS_UDP_DATAGRAM))) {
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index da96d1a18c20..0e4caf411905 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -623,7 +623,7 @@ reuse_rx:
 			/* Set Toeplitz hash for a none-LRO skb */
 			bnx2x_set_skb_rxhash(bp, cqe, skb);
 
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 			if (bp->rx_csum) {
 				if (likely(BNX2X_RX_CSUM_OK(cqe)))
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 28c88eeec757..32aaadc4734f 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2149,7 +2149,7 @@ end_copy_pkt:
 		skb->csum = csum_unfold(~csum);
 		skb->ip_summed = CHECKSUM_COMPLETE;
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 	return len;
 }
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index f01cfdb995de..1950b9a20ecd 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1388,7 +1388,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 		++st->rx_cso_good;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	if (unlikely(adapter->vlan_grp && p->vlan_valid)) {
 		st->vlan_xtract++;
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 5a5af1ca7541..fec939f8f65f 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -391,7 +391,7 @@ static struct sk_buff *cpmac_rx_one(struct cpmac_priv *priv,
 	if (likely(skb)) {
 		skb_put(desc->skb, desc->datalen);
 		desc->skb->protocol = eth_type_trans(desc->skb, priv->dev);
-		desc->skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(desc->skb);
 		priv->dev->stats.rx_packets++;
 		priv->dev->stats.rx_bytes += desc->datalen;
 		result = desc->skb;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 8ff96c6f6de5..c5a142bea5e9 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -2022,7 +2022,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 		qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 	skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
 
 	if (unlikely(p->vlan_valid)) {
diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c
index 6ddb3bb0ce67..9967f3debce7 100644
--- a/drivers/net/cxgb4/sge.c
+++ b/drivers/net/cxgb4/sge.c
@@ -1605,7 +1605,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 			rxq->stats.rx_cso++;
 		}
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	if (unlikely(pkt->vlan_ex)) {
 		struct vlan_group *grp = pi->vlan_grp;
diff --git a/drivers/net/cxgb4vf/sge.c b/drivers/net/cxgb4vf/sge.c
index e00fd9c36e8b..f10864ddafbe 100644
--- a/drivers/net/cxgb4vf/sge.c
+++ b/drivers/net/cxgb4vf/sge.c
@@ -1534,7 +1534,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
 		}
 		rxq->stats.rx_cso++;
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	if (unlikely(pkt->vlan_ex)) {
 		struct vlan_group *grp = pi->vlan_grp;
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 4fd6b2b4554b..9f6aeefa06bf 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -1056,7 +1056,7 @@ dm9000_rx(struct net_device *dev)
 				if ((((rxbyte & 0x1c) << 3) & rxbyte) == 0)
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
 				else
-					skb->ip_summed = CHECKSUM_NONE;
+					skb_checksum_none_assert(skb);
 			}
 			netif_rx(skb);
 			dev->stats.rx_packets++;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 3e8ac4baae1b..17f5867b5d9b 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3552,7 +3552,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 	struct e1000_hw *hw = &adapter->hw;
 	u16 status = (u16)status_err;
 	u8 errors = (u8)(status_err >> 24);
-	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
 
 	/* 82543 or newer only */
 	if (unlikely(hw->mac_type < e1000_82543)) return;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5f3eac6432cb..c9b66f4727e4 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -475,7 +475,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 {
 	u16 status = (u16)status_err;
 	u8 errors = (u8)(status_err >> 24);
-	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
 
 	/* Ignore Checksum bit is set */
 	if (status & E1000_RXD_STAT_IXSM)
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index e6048d6ab0ea..f30adbf86bb2 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2654,7 +2654,7 @@ static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb)
 	if ((fcb->flags & RXFCB_CSUM_MASK) == (RXFCB_CIP | RXFCB_CTU))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 }
 
 
diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index fbeaf70d1727..27d6960ce09e 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -893,7 +893,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit)
 				if (greth->flags & GRETH_FLAG_RX_CSUM && hw_checksummed(status))
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
 				else
-					skb->ip_summed = CHECKSUM_NONE;
+					skb_checksum_none_assert(skb);
 
 				skb->protocol = eth_type_trans(skb, dev);
 				dev->stats.rx_packets++;
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 294ccfb427cf..0037a696cd0a 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -602,7 +602,7 @@ static void irqrx_handler(struct net_device *dev)
 				/* set up skb fields */
 
 				skb->protocol = eth_type_trans(skb, dev);
-				skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(skb);
 
 				/* bookkeeping */
 				dev->stats.rx_packets++;
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d35cc38bf8b2..c4d861b557ca 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -5455,7 +5455,7 @@ static void igb_receive_skb(struct igb_q_vector *q_vector,
 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
 				       u32 status_err, struct sk_buff *skb)
 {
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
 	if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index c539f7c9c3e0..c7fab80d2490 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -103,7 +103,7 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter,
 static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter,
                                          u32 status_err, struct sk_buff *skb)
 {
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
 	if ((status_err & E1000_RXD_STAT_IXSM) ||
diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c
index 72e3d2da9e9f..dc0198092343 100644
--- a/drivers/net/ipg.c
+++ b/drivers/net/ipg.c
@@ -1213,7 +1213,7 @@ static void ipg_nic_rx_with_start_and_end(struct net_device *dev,
 
 	skb_put(skb, framelen);
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 	netif_rx(skb);
 	sp->rx_buff[entry] = NULL;
 }
@@ -1278,7 +1278,7 @@ static void ipg_nic_rx_with_end(struct net_device *dev,
 				jumbo->skb->protocol =
 				    eth_type_trans(jumbo->skb, dev);
 
-				jumbo->skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(jumbo->skb);
 				netif_rx(jumbo->skb);
 			}
 		}
@@ -1476,7 +1476,7 @@ static int ipg_nic_rx(struct net_device *dev)
 			 * IP/TCP/UDP frame was received. Let the
 			 * upper layer decide.
 			 */
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 			/* Hand off frame for higher layer processing.
 			 * The function netif_rx() releases the sk_buff
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index ba1de5973fb2..8df645e78f2e 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1524,7 +1524,7 @@ static void veth_receive(struct veth_lpar_connection *cnx,
 
 		skb_put(skb, length);
 		skb->protocol = eth_type_trans(skb, dev);
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		netif_rx(skb);	/* send it up */
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += length;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 33c4ffe6e103..c2f6e71e1181 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1905,7 +1905,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter,
 	 */
 	if ((rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) ||
 	   (!(rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS))) {
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		return;
 	}
 
@@ -1913,7 +1913,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter,
 	/* now look at the TCP checksum error bit */
 	if (rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE) {
 		/* let the stack verify checksum errors */
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		adapter->hw_csum_rx_error++;
 	} else {
 		/* TCP checksum is good */
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 86fa07cb061d..2f1de8b90f9e 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -304,12 +304,13 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
 	if (!ixgbe_rx_is_fcoe(rx_desc))
 		goto ddp_out;
 
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	sterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	fcerr = (sterr & IXGBE_RXDADV_ERR_FCERR);
 	fceofe = (sterr & IXGBE_RXDADV_ERR_FCEOFE);
 	if (fcerr == IXGBE_FCERR_BADCRC)
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
+	else
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
 		fh = (struct fc_frame_header *)(skb->data +
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 5e4dc1b0a1bd..3aafe94741ba 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -980,7 +980,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter,
 {
 	u32 status_err = le32_to_cpu(rx_desc->wb.upper.status_error);
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* Rx csum disabled */
 	if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED))
diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
index 5d3c869283a5..bdbd26c60ae6 100644
--- a/drivers/net/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ixgbevf/ixgbevf_main.c
@@ -356,7 +356,7 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector,
 static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter,
 				       u32 status_err, struct sk_buff *skb)
 {
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* Rx csum disabled */
 	if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED))
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index 99f24f5cac53..1fcd533b1a61 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -936,7 +936,7 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx)
 		if (jme_rxsum_ok(jme, le16_to_cpu(rxdesc->descwb.flags)))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		if (rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_TAGON)) {
 			if (jme->vlgrp) {
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index bdf2149e5296..874ee01e8d9d 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -760,7 +760,7 @@ static void ll_temac_recv(struct net_device *ndev)
 		skb_put(skb, length);
 		skb->dev = ndev;
 		skb->protocol = eth_type_trans(skb, ndev);
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 		/* if we're doing rx csum offload, set it up */
 		if (((lp->temac_features & TEMAC_FEATURE_RX_CSUM) != 0) &&
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index ff2f158ab0b9..4297f6e8c4bc 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -407,7 +407,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	}
 
 	skb_reserve(skb, RX_OFFSET);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 	skb_put(skb, len);
 
 	for (frag = first_frag; ; frag = NEXT_RX(frag)) {
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index b4cc61f1fc59..1f89f472cbfb 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3484,7 +3484,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 				     RCR_ENTRY_ERROR)))
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			else
-				skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(skb);
 		} else if (!(val & RCR_ENTRY_MULTI))
 			append_size = len - skb->len;
 
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 447c2c43769a..4475ca97f031 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -923,7 +923,7 @@ static void rx_irq(struct net_device *ndev)
 			if ((extsts & 0x002a0000) && !(extsts & 0x00540000)) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			} else {
-				skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(skb);
 			}
 			skb->protocol = eth_type_trans(skb, ndev);
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 8ab6ae0a6107..828e97cacdbf 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -808,7 +808,7 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx,
 			skb->csum = (macrx & XCT_MACRX_CSUM_M) >>
 					   XCT_MACRX_CSUM_S;
 		} else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		packets++;
 		tot_bytes += len;
diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c
index 87d6b8f36304..5526ab4895e6 100644
--- a/drivers/net/ps3_gelic_net.c
+++ b/drivers/net/ps3_gelic_net.c
@@ -956,9 +956,9 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr,
 		    (!(data_error & GELIC_DESCR_DATA_ERROR_CHK_MASK)))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	/* update netdevice statistics */
 	netdev->stats.rx_packets++;
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 6168a130f33f..7496ed2c34ab 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2029,7 +2029,7 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev,
 			 dma_unmap_len(lrg_buf_cb2, maplen),
 			 PCI_DMA_FROMDEVICE);
 	prefetch(skb->data);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 	skb->protocol = eth_type_trans(skb, qdev->ndev);
 
 	netif_receive_skb(skb);
@@ -2076,7 +2076,7 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 			 PCI_DMA_FROMDEVICE);
 	prefetch(skb2->data);
 
-	skb2->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb2);
 	if (qdev->device_id == QL3022_DEVICE_ID) {
 		/*
 		 * Copy the ethhdr from first buffer to second. This
diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 8e47d7aea562..26a7d6bca5c7 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -1369,7 +1369,7 @@ static struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *adapter,
 		adapter->stats.csummed++;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else {
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 	}
 
 	skb->dev = adapter->netdev;
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 5a245211fc53..e2d0e108b9aa 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1566,7 +1566,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev,
 	rx_ring->rx_packets++;
 	rx_ring->rx_bytes += skb->len;
 	skb->protocol = eth_type_trans(skb, ndev);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	if (qdev->rx_csum &&
 		!(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) {
@@ -1676,7 +1676,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
 	rx_ring->rx_packets++;
 	rx_ring->rx_bytes += skb->len;
 	skb->protocol = eth_type_trans(skb, ndev);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* If rx checksum is on, and there are no
 	 * csum or frame errors.
@@ -1996,7 +1996,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev,
 	}
 
 	skb->protocol = eth_type_trans(skb, ndev);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* If rx checksum is on, and there are no
 	 * csum or frame errors.
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 078bbf4e6f19..07b3fb5175e5 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4460,7 +4460,7 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, struct RxDesc *desc)
 	    ((status == RxProtoIP) && !(opts1 & IPFail)))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 }
 
 static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff,
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 7061fc8e99c7..c70ad515383a 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -7603,10 +7603,10 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp)
 			 * Packet with erroneous checksum, let the
 			 * upper layers deal with it.
 			 */
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 		}
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	swstats->mem_freed += skb->truesize;
 send_up:
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 8e6bd45b9f31..d8249d7653c6 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -1170,7 +1170,7 @@ again:
 						sb->ip_summed = CHECKSUM_UNNECESSARY;
 						/* don't need to set sb->csum */
 					} else {
-						sb->ip_summed = CHECKSUM_NONE;
+						skb_checksum_none_assert(sb);
 					}
 				}
 				prefetch(sb->data);
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 799c461ce7b8..acb372e841b2 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -615,7 +615,7 @@ void __efx_rx_packet(struct efx_channel *channel,
 	EFX_BUG_ON_PARANOID(!skb);
 
 	/* Set the SKB flags */
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	/* Pass the packet up */
 	netif_receive_skb(skb);
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index a812efc3632e..50259dfec583 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -798,7 +798,7 @@ static int sh_eth_rx(struct net_device *ndev)
 			skb->dev = ndev;
 			sh_eth_set_receive_align(skb);
 
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 			rxdesc->addr = virt_to_phys(PTR_ALIGN(skb->data, 4));
 		}
 		if (entry >= RX_RING_SIZE - 1)
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 0909ae934ad0..13ddcd487200 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1048,7 +1048,7 @@ static int smsc911x_poll(struct napi_struct *napi, int budget)
 		smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head,
 				     pktwords);
 		skb->protocol = eth_type_trans(skb, dev);
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 		netif_receive_skb(skb);
 
 		/* Update counters */
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 1636a34d95dd..cb6bcca9d541 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1000,9 +1000,9 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
 		     !(data_error & SPIDER_NET_DATA_ERR_CKSUM_MASK))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 	} else
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 
 	if (data_status & SPIDER_NET_VLAN_PACKET) {
 		/* further enhancements: HW-accel VLAN
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index e3f002eba89a..1ccea76d89ae 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1256,7 +1256,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
 			if (unlikely(status == csum_none)) {
 				/* always for the old mac 10/100 */
-				skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(skb);
 				netif_receive_skb(skb);
 			} else {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 3128d6a8e9ce..8b3dc1eb4015 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1297,12 +1297,13 @@ static int bdx_rx_receive(struct bdx_priv *priv, struct rxd_fifo *f, int budget)
 		ndev->stats.rx_bytes += len;
 
 		skb_put(skb, len);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		skb->protocol = eth_type_trans(skb, ndev);
 
 		/* Non-IP packets aren't checksum-offloaded */
 		if (GET_RXD_PKT_ID(rxd_val1) == 0)
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
+		else
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 		NETIF_RX_MUX(priv, rxd_val1, rxd_vlan, skb);
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index bc3af78a869f..9f6ffffc8376 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4719,7 +4719,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		      >> RXD_TCPCSUM_SHIFT) == 0xffff))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		skb->protocol = eth_type_trans(skb, tp->dev);
 
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 3f4681f78262..5dfb39539b3e 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1760,7 +1760,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read
 		   (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_UDP_CHK_GOOD)) {
 			new_skb->ip_summed = CHECKSUM_UNNECESSARY;
 		} else
-			new_skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(new_skb);
 
 		spin_lock(&tp->state_lock);
 		if(tp->vlgrp != NULL && rx->rxStatus & TYPHOON_RX_VLAN)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index fd69095ef6e3..ed7f4f5c4062 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1954,7 +1954,7 @@ static int velocity_tx_srv(struct velocity_info *vptr)
  */
 static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb)
 {
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	if (rd->rdesc1.CSM & CSM_IPKT) {
 		if (rd->rdesc1.CSM & CSM_IPOK) {
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index abe0ff53daf3..198ce92af0c3 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1042,11 +1042,11 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
 				skb->csum = htons(gdesc->rcd.csum);
 				skb->ip_summed = CHECKSUM_PARTIAL;
 			} else {
-				skb->ip_summed = CHECKSUM_NONE;
+				skb_checksum_none_assert(skb);
 			}
 		}
 	} else {
-		skb->ip_summed = CHECKSUM_NONE;
+		skb_checksum_none_assert(skb);
 	}
 }
 
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 01cdec712b64..5378b849f54f 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -501,7 +501,7 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
 		    ext_info.l4_cksum == VXGE_HW_L4_CKSUM_OK)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 		else
-			skb->ip_summed = CHECKSUM_NONE;
+			skb_checksum_none_assert(skb);
 
 		vxge_rx_complete(ring, skb, ext_info.vlan,
 			pkt_length, &ext_info);
diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c
index 71122ee4e830..f3f8be5a35fa 100644
--- a/drivers/net/xilinx_emaclite.c
+++ b/drivers/net/xilinx_emaclite.c
@@ -641,7 +641,7 @@ static void xemaclite_rx_handler(struct net_device *dev)
 	skb_put(skb, len);	/* Tell the skb how much data we got */
 
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
 
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += len;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f900ffcd847e..9e8085a89589 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2206,6 +2206,21 @@ static inline void skb_forward_csum(struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_NONE;
 }
 
+/**
+ * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE
+ * @skb: skb to check
+ *
+ * fresh skbs have their ip_summed set to CHECKSUM_NONE.
+ * Instead of forcing ip_summed to CHECKSUM_NONE, we can
+ * use this helper, to document places where we make this assertion.
+ */
+static inline void skb_checksum_none_assert(struct sk_buff *skb)
+{
+#ifdef DEBUG
+	BUG_ON(skb->ip_summed != CHECKSUM_NONE);
+#endif
+}
+
 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
-- 
cgit v1.2.3


From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 2 Sep 2010 15:48:16 -0700
Subject: mutex: Fix annotations to include it in kernel-locking docbook

Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c,
then add these 2 files to the kernel-locking docbook as the
Mutex API reference chapter.

Add one API function to mutex-design.txt and correct a typo in
that file.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/DocBook/kernel-locking.tmpl |  6 ++++++
 Documentation/mutex-design.txt            |  3 ++-
 include/linux/mutex.h                     |  8 ++++++++
 kernel/mutex.c                            | 23 +++++++----------------
 4 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 0b1a3f97f285..a0d479d1e1dd 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1961,6 +1961,12 @@ machines due to caching.
    </sect1>
   </chapter>
 
+  <chapter id="apiref">
+   <title>Mutex API reference</title>
+!Iinclude/linux/mutex.h
+!Ekernel/mutex.c
+  </chapter>
+
   <chapter id="references">
    <title>Further reading</title>
 
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index c91ccc0720fa..38c10fd7f411 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt
@@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler
 mutex semantics are sufficient for your code, then there are a couple
 of advantages of mutexes:
 
- - 'struct mutex' is smaller on most architectures: .e.g on x86,
+ - 'struct mutex' is smaller on most architectures: E.g. on x86,
    'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
    A smaller structure size means less RAM footprint, and better
    CPU-cache utilization.
@@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined:
  void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
  int  mutex_lock_interruptible_nested(struct mutex *lock,
                                       unsigned int subclass);
+ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 878cab4f5fcc..f363bc8fdc74 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -78,6 +78,14 @@ struct mutex_waiter {
 # include <linux/mutex-debug.h>
 #else
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
+/**
+ * mutex_init - initialize the mutex
+ * @mutex: the mutex to be initialized
+ *
+ * Initialize the mutex to unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
 # define mutex_init(mutex) \
 do {							\
 	static struct lock_class_key __key;		\
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 4c0b7b3e6d2e..200407c1502f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -36,15 +36,6 @@
 # include <asm/mutex.h>
 #endif
 
-/***
- * mutex_init - initialize the mutex
- * @lock: the mutex to be initialized
- * @key: the lock_class_key for the class; used by mutex lock debugging
- *
- * Initialize the mutex to unlocked state.
- *
- * It is not allowed to initialize an already locked mutex.
- */
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
@@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
 static __used noinline void __sched
 __mutex_lock_slowpath(atomic_t *lock_count);
 
-/***
+/**
  * mutex_lock - acquire the mutex
  * @lock: the mutex to be acquired
  *
@@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock);
 
 static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
 
-/***
+/**
  * mutex_unlock - release the mutex
  * @lock: the mutex to be released
  *
@@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count);
 static noinline int __sched
 __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
 
-/***
- * mutex_lock_interruptible - acquire the mutex, interruptable
+/**
+ * mutex_lock_interruptible - acquire the mutex, interruptible
  * @lock: the mutex to be acquired
  *
  * Lock the mutex like mutex_lock(), and return 0 if the mutex has
@@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 	return prev == 1;
 }
 
-/***
- * mutex_trylock - try acquire the mutex, without waiting
+/**
+ * mutex_trylock - try to acquire the mutex, without waiting
  * @lock: the mutex to be acquired
  *
  * Try to acquire the mutex atomically. Returns 1 if the mutex
  * has been acquired successfully, and 0 on contention.
  *
  * NOTE: this function follows the spin_trylock() convention, so
- * it is negated to the down_trylock() return values! Be careful
+ * it is negated from the down_trylock() return values! Be careful
  * about this when converting semaphore users to mutexes.
  *
  * This function must not be used in interrupt context. The
-- 
cgit v1.2.3


From 57a2ce5f54f3120467be760662c6ef3bea3f9579 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Fri, 3 Sep 2010 19:09:46 +0800
Subject: padata: add missing __percpu markup in include/linux/padata.h

parallel_data->queue is a percpu pointer but was missing __percpu markup.
Add it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index bdcd1e9eacea..4633b2f726b6 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -127,8 +127,8 @@ struct padata_cpumask {
  */
 struct parallel_data {
 	struct padata_instance		*pinst;
-	struct padata_parallel_queue	*pqueue;
-	struct padata_serial_queue	*squeue;
+	struct padata_parallel_queue	__percpu *pqueue;
+	struct padata_serial_queue	__percpu *squeue;
 	atomic_t			seq_nr;
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
-- 
cgit v1.2.3


From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 2 Sep 2010 15:42:43 +0000
Subject: cls_cgroup: Fix rcu lockdep warning

Dave reported an rcu lockdep warning on 2.6.35.4 kernel

task->cgroups and task->cgroups->subsys[i] are protected by RCU.
So we avoid accessing invalid pointers here. This might happen,
for example, when you are deref-ing those pointers while someone
move @task from one cgroup to another.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 726cc3536409..ef6c24a529e1 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -27,11 +27,17 @@ struct cgroup_cls_state
 #ifdef CONFIG_NET_CLS_CGROUP
 static inline u32 task_cls_classid(struct task_struct *p)
 {
+	int classid;
+
 	if (in_interrupt())
 		return 0;
 
-	return container_of(task_subsys_state(p, net_cls_subsys_id),
-			    struct cgroup_cls_state, css)->classid;
+	rcu_read_lock();
+	classid = container_of(task_subsys_state(p, net_cls_subsys_id),
+			       struct cgroup_cls_state, css)->classid;
+	rcu_read_unlock();
+
+	return classid;
 }
 #else
 extern int net_cls_subsys_id;
-- 
cgit v1.2.3


From 71cad0554956de87c3fc413b1eac9313887eb14f Mon Sep 17 00:00:00 2001
From: Philippe Langlais <philippe.langlais@stericsson.com>
Date: Tue, 31 Aug 2010 14:19:09 +0200
Subject: serial: fix port type conflict between NS16550A & U6_16550A

Bug seen by Dr. David Alan Gilbert with sparse

Signed-off-by: Philippe Langlais <philippe.langlais@stericsson.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/serial.h      | 3 +--
 include/linux/serial_core.h | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 1ebc694a6d52..ef914061511e 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -77,8 +77,7 @@ struct serial_struct {
 #define PORT_16654	11
 #define PORT_16850	12
 #define PORT_RSA	13	/* RSA-DV II/S card */
-#define PORT_U6_16550A	14
-#define PORT_MAX	14
+#define PORT_MAX	13
 
 #define SERIAL_IO_PORT	0
 #define SERIAL_IO_HUB6	1
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 64458a9a8938..563e23400913 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -44,7 +44,8 @@
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
 #define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
-#define PORT_MAX_8250	18	/* max port ID */
+#define PORT_U6_16550A	19	/* ST-Ericsson U6xxx internal UART */
+#define PORT_MAX_8250	19	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v1.2.3


From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 4 Sep 2010 22:56:44 +0200
Subject: io-mapping: Fix the address space annotations

Fixes a bunch of sparse warnings in io-mapping.h because of the
inconsistent __iomem usage.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/io-mapping.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0a6b3d5c490c..7fb592793738 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping)
 }
 
 /* Atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
 			 unsigned long offset,
 			 int slot)
@@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
 {
 	iounmap_atomic(vaddr, slot);
 }
 
-static inline void *
+static inline void __iomem *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
 	resource_size_t phys_addr;
@@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 }
 
 static inline void
-io_mapping_unmap(void *vaddr)
+io_mapping_unmap(void __iomem *vaddr)
 {
 	iounmap(vaddr);
 }
@@ -125,38 +125,38 @@ struct io_mapping;
 static inline struct io_mapping *
 io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
-	return (struct io_mapping *) ioremap_wc(base, size);
+	return (struct io_mapping __force *) ioremap_wc(base, size);
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
-	iounmap(mapping);
+	iounmap((void __force __iomem *) mapping);
 }
 
 /* Atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
 			 unsigned long offset,
 			 int slot)
 {
-	return ((char *) mapping) + offset;
+	return ((char __force __iomem *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap_atomic(void *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
 {
 }
 
 /* Non-atomic map/unmap */
-static inline void *
+static inline void __iomem *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	return ((char *) mapping) + offset;
+	return ((char __force __iomem *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap(void *vaddr)
+io_mapping_unmap(void __iomem *vaddr)
 {
 }
 
-- 
cgit v1.2.3


From 73457f0f836956747e0394320be2163c050e96ef Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 6 Aug 2010 01:59:14 +0300
Subject: cgroups: fix API thinko

cgroup_attach_task_current_cg API that have upstream is backwards: we
really need an API to attach to the cgroups from another process A to
the current one.

In our case (vhost), a priveledged user wants to attach it's task to cgroups
from a less priveledged one, the API makes us run it in the other
task's context, and this fails.

So let's make the API generic and just pass in 'from' and 'to' tasks.
Add an inline wrapper for cgroup_attach_task_current_cg to avoid
breaking bisect.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
---
 include/linux/cgroup.h | 11 ++++++++++-
 kernel/cgroup.c        |  9 +++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6e..5a53d8f039a2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,7 +578,11 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
-int cgroup_attach_task_current_cg(struct task_struct *);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+	return cgroup_attach_task_all(current, tsk);
+}
 
 /*
  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -636,6 +640,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 }
 
 /* No cgroups - nothing to do */
+static inline int cgroup_attach_task_all(struct task_struct *from,
+					 struct task_struct *t)
+{
+	return 0;
+}
 static inline int cgroup_attach_task_current_cg(struct task_struct *t)
 {
 	return 0;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a8ce09954404..9d90c08f3bde 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1789,10 +1789,11 @@ out:
 }
 
 /**
- * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
  * @tsk: the task to be attached
  */
-int cgroup_attach_task_current_cg(struct task_struct *tsk)
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 {
 	struct cgroupfs_root *root;
 	struct cgroup *cur_cg;
@@ -1800,7 +1801,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	cgroup_lock();
 	for_each_active_root(root) {
-		cur_cg = task_cgroup_from_root(current, root);
+		cur_cg = task_cgroup_from_root(from, root);
 		retval = cgroup_attach_task(cur_cg, tsk);
 		if (retval)
 			break;
@@ -1809,7 +1810,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
 /*
  * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
-- 
cgit v1.2.3


From b8ef3204f460912a46659cdc74d237adbe705053 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 6 Aug 2010 03:02:37 -0500
Subject: [SCSI] fc class: add fc host default default dev loss setting

This patch adds a fc_host setting to store the
default dev_loss_tmo. It is used if the driver
has a callack to get the value from the LLD. If
the callback is not set, then we use the fc class
module default value.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/scsi_transport_fc.c | 6 +++++-
 include/scsi/scsi_transport_fc.h | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index d7e470a06180..9f0f7d9c7422 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2525,7 +2525,11 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 
 	rport->maxframe_size = -1;
 	rport->supported_classes = FC_COS_UNSPECIFIED;
-	rport->dev_loss_tmo = fc_dev_loss_tmo;
+	if (fci->f->get_host_def_dev_loss_tmo) {
+		fci->f->get_host_def_dev_loss_tmo(shost);
+		rport->dev_loss_tmo = fc_host_def_dev_loss_tmo(shost);
+	} else
+		rport->dev_loss_tmo = fc_dev_loss_tmo;
 	memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name));
 	memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name));
 	rport->port_id = ids->port_id;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 87d81b3ce564..9f98fca9b763 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -496,6 +496,7 @@ struct fc_host_attrs {
 	u64 fabric_name;
 	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
 	char system_hostname[FC_SYMBOLIC_NAME_SIZE];
+	u32 def_dev_loss_tmo;
 
 	/* Private (Transport-managed) Attributes */
 	enum fc_tgtid_binding_type  tgtid_bind_type;
@@ -580,6 +581,8 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name)
 #define fc_host_devloss_work_q(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
+#define fc_host_def_dev_loss_tmo(x) \
+	(((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo)
 
 
 struct fc_bsg_buffer {
@@ -640,6 +643,7 @@ struct fc_function_template {
 	void	(*get_host_fabric_name)(struct Scsi_Host *);
 	void	(*get_host_symbolic_name)(struct Scsi_Host *);
 	void	(*set_host_system_hostname)(struct Scsi_Host *);
+	void	(*get_host_def_dev_loss_tmo)(struct Scsi_Host *);
 
 	struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *);
 	void	(*reset_fc_host_stats)(struct Scsi_Host *);
-- 
cgit v1.2.3


From 144c0f8833d0458e4369a27a53aea8856c665c41 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 3 Sep 2010 10:31:05 -0700
Subject: Input: fix a few typos

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 6 +++---
 include/linux/input.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index ab6982056518..acb3c8095c65 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -171,7 +171,7 @@ static int input_handle_abs_event(struct input_dev *dev,
 	if (code == ABS_MT_SLOT) {
 		/*
 		 * "Stage" the event; we'll flush it later, when we
-		 * get actiual touch data.
+		 * get actual touch data.
 		 */
 		if (*pval >= 0 && *pval < dev->mtsize)
 			dev->slot = *pval;
@@ -188,7 +188,7 @@ static int input_handle_abs_event(struct input_dev *dev,
 		pold = &mtslot->abs[code - ABS_MT_FIRST];
 	} else {
 		/*
-		 * Bypass filtering for multitouch events when
+		 * Bypass filtering for multi-touch events when
 		 * not employing slots.
 		 */
 		pold = NULL;
@@ -1601,7 +1601,7 @@ EXPORT_SYMBOL(input_free_device);
  *
  * This function allocates all necessary memory for MT slot handling in the
  * input device, and adds ABS_MT_SLOT to the device capabilities. All slots
- * are initially marked as unused iby setting ABS_MT_TRACKING_ID to -1.
+ * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1.
  */
 int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots)
 {
diff --git a/include/linux/input.h b/include/linux/input.h
index 896a92227bc4..789265123531 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -67,7 +67,7 @@ struct input_absinfo {
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
 #define EVIOCGUNIQ(len)		_IOC(_IOC_READ, 'E', 0x08, len)		/* get unique identifier */
 
-#define EVIOCGKEY(len)		_IOC(_IOC_READ, 'E', 0x18, len)		/* get global keystate */
+#define EVIOCGKEY(len)		_IOC(_IOC_READ, 'E', 0x18, len)		/* get global key state */
 #define EVIOCGLED(len)		_IOC(_IOC_READ, 'E', 0x19, len)		/* get all LEDs */
 #define EVIOCGSND(len)		_IOC(_IOC_READ, 'E', 0x1a, len)		/* get all sounds status */
 #define EVIOCGSW(len)		_IOC(_IOC_READ, 'E', 0x1b, len)		/* get all switch states */
-- 
cgit v1.2.3


From fe8e0c25cad28e8858ecfa5863333c70685a6811 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Mon, 6 Sep 2010 20:53:42 +0200
Subject: x86, 32-bit: Align percpu area and irq stacks to THREAD_SIZE

The irq stacks, located in the percpu-area, need to be
THREAD_SIZE aligned. Add the infrastucture to align percpu
variables to larger-than-pagesize amounts within the percpu
area, and use it to specify the alignment for the irq stacks.
Also align the percpu area itself to THREAD_SIZE.

This should make irq stacks work with 8K THREAD_SIZE.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Cc: Tejun Heo <tj@kernel.org>
Cc: hch@lst.de
LKML-Reference: <1283799222.15941.1393621887@webmail.messagingengine.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_32.c      |  4 ++--
 arch/x86/kernel/vmlinux.lds.S |  2 +-
 include/linux/percpu-defs.h   | 12 ++++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 3b5609f54c4b..50fbbe60e507 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -60,8 +60,8 @@ union irq_ctx {
 static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
 static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
 
-static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
-static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
+static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE);
+static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE);
 
 static void call_on_stack(void *func, void *stack)
 {
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d0bb52296fa3..bb899475355d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -273,7 +273,7 @@ SECTIONS
 	}
 
 #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
-	PERCPU(PAGE_SIZE)
+	PERCPU(THREAD_SIZE)
 #endif
 
 	. = ALIGN(PAGE_SIZE);
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ce2dc655cd1d..ab20d119a85d 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -138,6 +138,18 @@
 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
 	__aligned(PAGE_SIZE)
 
+/*
+ * Declaration/definition used for large per-CPU variables that must be
+ * aligned to something larger than the pagesize.
+ */
+#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size)		\
+	DECLARE_PER_CPU_SECTION(type, name, "..page_aligned")		\
+	__aligned(size)
+
+#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size)		\
+	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
+	__aligned(size)
+
 /*
  * Intermodule exports for per-CPU variables.  sparse forgets about
  * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
-- 
cgit v1.2.3


From 831853c87fb7234a8650484d30993242ea9ad6d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 6 Sep 2010 16:08:56 +0100
Subject: ALSA: Add more jack button slots

Some devices have more flexible microphone detection and can detect
a wider range of buttons.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/jack.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/jack.h b/include/sound/jack.h
index d90b9fa32707..c140fc7cbd3f 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -47,6 +47,9 @@ enum snd_jack_types {
 	SND_JACK_BTN_0		= 0x4000,
 	SND_JACK_BTN_1		= 0x2000,
 	SND_JACK_BTN_2		= 0x1000,
+	SND_JACK_BTN_3		= 0x0800,
+	SND_JACK_BTN_4		= 0x0400,
+	SND_JACK_BTN_5		= 0x0200,
 };
 
 struct snd_jack {
@@ -55,7 +58,7 @@ struct snd_jack {
 	int type;
 	const char *id;
 	char name[100];
-	unsigned int key[3];   /* Keep in sync with definitions above */
+	unsigned int key[6];   /* Keep in sync with definitions above */
 	void *private_data;
 	void (*private_free)(struct snd_jack *);
 };
-- 
cgit v1.2.3


From f8f235e5bbf4e61f3e0886a44afb1dc4cfe8f337 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 27 Aug 2010 11:08:57 +0800
Subject: agp/intel: Fix cache control for Sandybridge

Sandybridge GTT has new cache control bits in PTE, which controls
graphics page cache in LLC or LLC/MLC, so we need to extend the mask
function to respect the new bits.

And set cache control to always LLC only by default on Gen6.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@kernel.org
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-agp.c    |  1 +
 drivers/char/agp/intel-gtt.c    | 50 ++++++++++++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gem.c |  1 +
 include/linux/intel-gtt.h       | 20 +++++++++++++++++
 4 files changed, 62 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/intel-gtt.h

(limited to 'include')

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 710af89b176d..74461d177baf 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -12,6 +12,7 @@
 #include <asm/smp.h>
 #include "agp.h"
 #include "intel-agp.h"
+#include <linux/intel-gtt.h>
 
 #include "intel-gtt.c"
 
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 7f35854d33a3..64b10551a3f8 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -49,6 +49,26 @@ static struct gatt_mask intel_i810_masks[] =
 	 .type = INTEL_AGP_CACHED_MEMORY}
 };
 
+#define INTEL_AGP_UNCACHED_MEMORY              0
+#define INTEL_AGP_CACHED_MEMORY_LLC            1
+#define INTEL_AGP_CACHED_MEMORY_LLC_GFDT       2
+#define INTEL_AGP_CACHED_MEMORY_LLC_MLC        3
+#define INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT   4
+
+static struct gatt_mask intel_gen6_masks[] =
+{
+	{.mask = I810_PTE_VALID | GEN6_PTE_UNCACHED,
+	 .type = INTEL_AGP_UNCACHED_MEMORY },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC | GEN6_PTE_GFDT,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_GFDT },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC },
+	{.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC | GEN6_PTE_GFDT,
+         .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT },
+};
+
 static struct _intel_private {
 	struct pci_dev *pcidev;	/* device one */
 	u8 __iomem *registers;
@@ -178,13 +198,6 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
 					off_t pg_start, int mask_type)
 {
 	int i, j;
-	u32 cache_bits = 0;
-
-	if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB ||
-	    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB)
-	{
-		cache_bits = GEN6_PTE_LLC_MLC;
-	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
@@ -317,6 +330,23 @@ static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge,
 		return 0;
 }
 
+static int intel_gen6_type_to_mask_type(struct agp_bridge_data *bridge,
+					int type)
+{
+	unsigned int type_mask = type & ~AGP_USER_CACHED_MEMORY_GFDT;
+	unsigned int gfdt = type & AGP_USER_CACHED_MEMORY_GFDT;
+
+	if (type_mask == AGP_USER_UNCACHED_MEMORY)
+		return INTEL_AGP_UNCACHED_MEMORY;
+	else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC)
+		return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT :
+			      INTEL_AGP_CACHED_MEMORY_LLC_MLC;
+	else /* set 'normal'/'cached' to LLC by default */
+		return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_GFDT :
+			      INTEL_AGP_CACHED_MEMORY_LLC;
+}
+
+
 static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
 				int type)
 {
@@ -1163,7 +1193,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
 
 	mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type);
 
-	if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY &&
+	if (!IS_SNB && mask_type != 0 && mask_type != AGP_PHYS_MEMORY &&
 	    mask_type != INTEL_AGP_CACHED_MEMORY)
 		goto out_err;
 
@@ -1563,7 +1593,7 @@ static const struct agp_bridge_driver intel_gen6_driver = {
 	.fetch_size		= intel_i9xx_fetch_size,
 	.cleanup		= intel_i915_cleanup,
 	.mask_memory		= intel_gen6_mask_memory,
-	.masks			= intel_i810_masks,
+	.masks			= intel_gen6_masks,
 	.agp_enable		= intel_i810_agp_enable,
 	.cache_flush		= global_cache_flush,
 	.create_gatt_table	= intel_i965_create_gatt_table,
@@ -1576,7 +1606,7 @@ static const struct agp_bridge_driver intel_gen6_driver = {
 	.agp_alloc_pages        = agp_generic_alloc_pages,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_destroy_pages      = agp_generic_destroy_pages,
-	.agp_type_to_mask_type	= intel_i830_type_to_mask_type,
+	.agp_type_to_mask_type	= intel_gen6_type_to_mask_type,
 	.chipset_flush		= intel_i915_chipset_flush,
 #ifdef USE_PCI_DMA_API
 	.agp_map_page		= intel_agp_map_page,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 748c26340c35..16fca1d1799a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
+#include <linux/intel-gtt.h>
 
 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h
new file mode 100644
index 000000000000..1d19ab2afa39
--- /dev/null
+++ b/include/linux/intel-gtt.h
@@ -0,0 +1,20 @@
+/*
+ * Common Intel AGPGART and GTT definitions.
+ */
+#ifndef _INTEL_GTT_H
+#define _INTEL_GTT_H
+
+#include <linux/agp_backend.h>
+
+/* This is for Intel only GTT controls.
+ *
+ * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only
+ */
+
+#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
+#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
+
+/* flag for GFDT type */
+#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
+
+#endif
-- 
cgit v1.2.3


From 2bf2160d8805de64308e2e7c3cd97813cb58ed2f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 23 Aug 2010 18:42:48 +0900
Subject: irq: Add tracepoint to softirq_raise

Add a tracepoint for tracing when softirq action is raised.

This and the existing tracepoints complete softirq's tracepoints:
softirq_raise, softirq_entry and softirq_exit.

And when this tracepoint is used in combination with
the softirq_entry tracepoint we can determine
the softirq raise latency.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: David Miller <davem@davemloft.net>
Cc: Kaneshige Kenji <kaneshige.kenji@jp.fujitsu.com>
Cc: Izumo Taku <izumi.taku@jp.fujitsu.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Scott Mcmillan <scott.a.mcmillan@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
LKML-Reference: <4C724298.4050509@jp.fujitsu.com>
[ factorize softirq events with DECLARE_EVENT_CLASS ]
Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/interrupt.h  |  8 +++++++-
 include/trace/events/irq.h | 26 ++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..531495db1708 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
+#include <trace/events/irq.h>
 
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
-#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
+static inline void __raise_softirq_irqoff(unsigned int nr)
+{
+	trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+	or_softirq_pending(1UL << nr);
+}
+
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 extern void wakeup_softirqd(void);
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb694fe7..6fa7cbab7d93 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -5,7 +5,9 @@
 #define _TRACE_IRQ_H
 
 #include <linux/tracepoint.h>
-#include <linux/interrupt.h>
+
+struct irqaction;
+struct softirq_action;
 
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
 #define show_softirq_name(val)				\
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
 	),
 
 	TP_fast_assign(
-		__entry->vec = (int)(h - vec);
+		if (vec)
+			__entry->vec = (int)(h - vec);
+		else
+			__entry->vec = (int)(long)h;
 	),
 
 	TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
 	TP_ARGS(h, vec)
 );
 
+/**
+ * softirq_raise - called immediately when a softirq is raised
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the softirq vector number which is
+ * raised. @vec is NULL and it means @h includes vector number not
+ * softirq_action. When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise latency.
+ */
+DEFINE_EVENT(softirq, softirq_raise,
+
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+	TP_ARGS(h, vec)
+);
+
 #endif /*  _TRACE_IRQ_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 3e4b10d7a4d2a78af64f8096dc7cdb3bebd65adb Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 23 Aug 2010 18:43:51 +0900
Subject: napi: Convert trace_napi_poll to TRACE_EVENT

This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT
to improve the usability of napi_poll tracepoint.

          <idle>-0     [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3
          <idle>-0     [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1

The original patch is below:
http://marc.info/?l=linux-kernel&m=126021713809450&w=2

[ sanagi.koki@jp.fujitsu.com: And add a fix by Steven Rostedt:
http://marc.info/?l=linux-kernel&m=126150506519173&w=2 ]

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Kaneshige Kenji <kaneshige.kenji@jp.fujitsu.com>
Cc: Izumo Taku <izumi.taku@jp.fujitsu.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Scott Mcmillan <scott.a.mcmillan@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
LKML-Reference: <4C7242D7.4050009@jp.fujitsu.com>
Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/napi.h | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 188deca2f3c7..8fe1e93f531d 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -6,10 +6,31 @@
 
 #include <linux/netdevice.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace.h>
+
+#define NO_DEV "(no_device)"
+
+TRACE_EVENT(napi_poll,
 
-DECLARE_TRACE(napi_poll,
 	TP_PROTO(struct napi_struct *napi),
-	TP_ARGS(napi));
+
+	TP_ARGS(napi),
+
+	TP_STRUCT__entry(
+		__field(	struct napi_struct *,	napi)
+		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
+	),
+
+	TP_fast_assign(
+		__entry->napi = napi;
+		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
+	),
+
+	TP_printk("napi poll on napi struct %p for device %s",
+		__entry->napi, __get_str(dev_name))
+);
+
+#undef NO_DEV
 
 #endif /* _TRACE_NAPI_H_ */
 
-- 
cgit v1.2.3


From cf66ba58b5cb8b1526e9dd2fb96ff8db048d4d44 Mon Sep 17 00:00:00 2001
From: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Date: Mon, 23 Aug 2010 18:45:02 +0900
Subject: netdev: Add tracepoints to netdev layer

This patch adds tracepoint to dev_queue_xmit, dev_hard_start_xmit,
netif_rx and netif_receive_skb. These tracepoints help you to monitor
network driver's input/output.

          <idle>-0     [001] 112447.902030: netif_rx: dev=eth1 skbaddr=f3ef0900 len=84
          <idle>-0     [001] 112447.902039: netif_receive_skb: dev=eth1 skbaddr=f3ef0900 len=84
            sshd-6828  [000] 112447.903257: net_dev_queue: dev=eth4 skbaddr=f3fca538 len=226
            sshd-6828  [000] 112447.903260: net_dev_xmit: dev=eth4 skbaddr=f3fca538 len=226 rc=0

Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Kaneshige Kenji <kaneshige.kenji@jp.fujitsu.com>
Cc: Izumo Taku <izumi.taku@jp.fujitsu.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Scott Mcmillan <scott.a.mcmillan@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
LKML-Reference: <4C72431E.3000901@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/net.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c             |  6 ++++
 net/core/net-traces.c      |  1 +
 3 files changed, 89 insertions(+)
 create mode 100644 include/trace/events/net.h

(limited to 'include')

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
new file mode 100644
index 000000000000..5f247f5ffc56
--- /dev/null
+++ b/include/trace/events/net.h
@@ -0,0 +1,82 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM net
+
+#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NET_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(net_dev_xmit,
+
+	TP_PROTO(struct sk_buff *skb,
+		 int rc),
+
+	TP_ARGS(skb, rc),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned int,	len		)
+		__field(	int,		rc		)
+		__string(	name,		skb->dev->name	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->rc = rc;
+		__assign_str(name, skb->dev->name);
+	),
+
+	TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
+		__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
+);
+
+DECLARE_EVENT_CLASS(net_dev_template,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned int,	len		)
+		__string(	name,		skb->dev->name	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__assign_str(name, skb->dev->name);
+	),
+
+	TP_printk("dev=%s skbaddr=%p len=%u",
+		__get_str(name), __entry->skbaddr, __entry->len)
+)
+
+DEFINE_EVENT(net_dev_template, net_dev_queue,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
+DEFINE_EVENT(net_dev_template, netif_receive_skb,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
+DEFINE_EVENT(net_dev_template, netif_rx,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+#endif /* _TRACE_NET_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb9a83c..5a4fbc7405e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -128,6 +128,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <trace/events/net.h>
 #include <linux/pci.h>
 
 #include "net-sysfs.h"
@@ -1978,6 +1979,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		rc = ops->ndo_start_xmit(skb, dev);
+		trace_net_dev_xmit(skb, rc);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
 		return rc;
@@ -1998,6 +2000,7 @@ gso:
 			skb_dst_drop(nskb);
 
 		rc = ops->ndo_start_xmit(nskb, dev);
+		trace_net_dev_xmit(nskb, rc);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
 				goto out_kfree_gso_skb;
@@ -2186,6 +2189,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
+	trace_net_dev_queue(skb);
 	if (q->enqueue) {
 		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
@@ -2512,6 +2516,7 @@ int netif_rx(struct sk_buff *skb)
 	if (netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
+	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
 	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2828,6 +2833,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
+	trace_netif_receive_skb(skb);
 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index afa6380ed88a..7f1bb2aba03b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -26,6 +26,7 @@
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/skb.h>
+#include <trace/events/net.h>
 #include <trace/events/napi.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
-- 
cgit v1.2.3


From 07dc22e7295f25526f110d704655ff0ea7687420 Mon Sep 17 00:00:00 2001
From: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Date: Mon, 23 Aug 2010 18:46:12 +0900
Subject: skb: Add tracepoints to freeing skb

This patch adds tracepoint to consume_skb and add trace_kfree_skb
before __kfree_skb in skb_free_datagram_locked and net_tx_action.
Combinating with tracepoint on dev_hard_start_xmit, we can check
how long it takes to free transmitted packets. And using it, we can
calculate how many packets driver had at that time. It is useful when
a drop of transmitted packet is a problem.

            sshd-6828  [000] 112689.258154: consume_skb: skbaddr=f2d99bb8

Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Kaneshige Kenji <kaneshige.kenji@jp.fujitsu.com>
Cc: Izumo Taku <izumi.taku@jp.fujitsu.com>
Cc: Kosaki Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Scott Mcmillan <scott.a.mcmillan@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
LKML-Reference: <4C724364.50903@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/skb.h | 17 +++++++++++++++++
 net/core/datagram.c        |  1 +
 net/core/dev.c             |  2 ++
 net/core/skbuff.c          |  1 +
 4 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 4b2be6dc76f0..75ce9d500d8e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
 		__entry->skbaddr, __entry->protocol, __entry->location)
 );
 
+TRACE_EVENT(consume_skb,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(	void *,	skbaddr	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+	),
+
+	TP_printk("skbaddr=%p", __entry->skbaddr)
+);
+
 TRACE_EVENT(skb_copy_datagram_iovec,
 
 	TP_PROTO(const struct sk_buff *skb, int len),
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..282806ba7a57 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 	unlock_sock_fast(sk, slow);
 
 	/* skb is now orphaned, can be freed outside of locked section */
+	trace_kfree_skb(skb, skb_free_datagram_locked);
 	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5a4fbc7405e2..2308cce48048 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
 #include <linux/random.h>
 #include <trace/events/napi.h>
 #include <trace/events/net.h>
+#include <trace/events/skb.h>
 #include <linux/pci.h>
 
 #include "net-sysfs.h"
@@ -2576,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h)
 			clist = clist->next;
 
 			WARN_ON(atomic_read(&skb->users));
+			trace_kfree_skb(skb, net_tx_action);
 			__kfree_skb(skb);
 		}
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f0d0c3..12e61e351d0e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb)
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
+	trace_consume_skb(skb);
 	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(consume_skb);
-- 
cgit v1.2.3


From 269cddd44e3588d1c50a7ec055b78de4d6c72cb6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 7 Sep 2010 14:17:10 -0500
Subject: dlm: Fix dlm lock status block comment in dlm.h

There is only one place in the dlm where the sb_status is set
and that is queue_cast(). Tracing back the callers of that
function shows that the listed set of return values is
out of date, so here are an updated set.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index 0b3518c42356..d4e02f5353a0 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -48,10 +48,10 @@ typedef void dlm_lockspace_t;
  *
  * 0 if lock request was successful
  * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE
- * -ENOMEM if there is no memory to process request
- * -EINVAL if there are invalid parameters
  * -DLM_EUNLOCK if unlock request was successful
  * -DLM_ECANCEL if a cancel completed successfully
+ * -EDEADLK if a deadlock was detected
+ * -ETIMEDOUT if the lock request was canceled due to a timeout
  */
 
 #define DLM_SBF_DEMOTED		0x01
-- 
cgit v1.2.3


From 17cebf658e088935d4bdebfc7ad9800e9fc4a0b2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 16:55:22 +1000
Subject: sunrpc: extract some common sunrpc_cache code from nfsd

Rather can duplicating this idiom twice, put it in an inline function.
This reduces the usage of 'expiry_time' out side the sunrpc/cache.c
code and thus the impact of a change that is about to be made to that
field.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c             | 9 +++------
 include/linux/sunrpc/cache.h | 6 ++++++
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..e56827b88fd2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -935,10 +935,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
 
 	ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
 	if (!IS_ERR(ek)) {
-		ek->h.expiry_time = get_seconds()-1;
+		sunrpc_invalidate(&ek->h, &svc_expkey_cache);
 		cache_put(&ek->h, &svc_expkey_cache);
 	}
-	svc_expkey_cache.nextcheck = get_seconds();
 }
 
 static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +972,9 @@ static void exp_unhash(struct svc_export *exp)
 
 	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
 	if (!IS_ERR(ek)) {
-		ek->h.expiry_time = get_seconds()-1;
+		sunrpc_invalidate(&ek->h, &svc_expkey_cache);
 		cache_put(&ek->h, &svc_expkey_cache);
 	}
-	svc_expkey_cache.nextcheck = get_seconds();
 }
 	
 /*
@@ -1097,8 +1095,7 @@ out:
 static void
 exp_do_unexport(svc_export *unexp)
 {
-	unexp->h.expiry_time = get_seconds()-1;
-	svc_export_cache.nextcheck = get_seconds();
+	sunrpc_invalidate(&unexp->h, &svc_export_cache);
 	exp_unhash(unexp);
 	exp_fsid_unhash(unexp);
 }
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 7bf3e84b92f4..0e1febf4e5bc 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -228,4 +228,10 @@ static inline time_t get_expiry(char **bpp)
 	return rv;
 }
 
+static inline void sunrpc_invalidate(struct cache_head *h,
+				     struct cache_detail *detail)
+{
+	h->expiry_time = get_seconds() - 1;
+	detail->nextcheck = get_seconds();
+}
 #endif /*  _LINUX_SUNRPC_CACHE_H_ */
-- 
cgit v1.2.3


From c5b29f885afe890f953f7f23424045cdad31d3e4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 16:55:22 +1000
Subject: sunrpc: use seconds since boot in expiry cache

This protects us from confusion when the wallclock time changes.

We convert to and from wallclock when  setting or reading expiry
times.

Also use seconds since boot for last_clost time.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/dns_resolve.c         |  6 +++---
 fs/nfsd/nfs4idmap.c          |  2 +-
 include/linux/sunrpc/cache.h | 28 +++++++++++++++++++++++++---
 net/sunrpc/cache.c           | 36 +++++++++++++++++++-----------------
 4 files changed, 48 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
 		return 0;
 	}
 	item = container_of(h, struct nfs_dns_ent, h);
-	ttl = (long)item->h.expiry_time - (long)get_seconds();
+	ttl = item->h.expiry_time - seconds_since_boot();
 	if (ttl < 0)
 		ttl = 0;
 
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
 	ttl = get_expiry(&buf);
 	if (ttl == 0)
 		goto out;
-	key.h.expiry_time = ttl + get_seconds();
+	key.h.expiry_time = ttl + seconds_since_boot();
 
 	ret = -ENOMEM;
 	item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
 		goto out_err;
 	ret = -ETIMEDOUT;
 	if (!test_bit(CACHE_VALID, &(*item)->h.flags)
-			|| (*item)->h.expiry_time < get_seconds()
+			|| (*item)->h.expiry_time < seconds_since_boot()
 			|| cd->flush_time > (*item)->h.last_refresh)
 		goto out_put;
 	ret = -ENOENT;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..808b33a4a090 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -550,7 +550,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
 		goto out_err;
 	ret = -ETIMEDOUT;
 	if (!test_bit(CACHE_VALID, &(*item)->h.flags)
-			|| (*item)->h.expiry_time < get_seconds()
+			|| (*item)->h.expiry_time < seconds_since_boot()
 			|| detail->flush_time > (*item)->h.last_refresh)
 		goto out_put;
 	ret = -ENOENT;
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 0e1febf4e5bc..ece432b7f87f 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -218,20 +218,42 @@ static inline int get_int(char **bpp, int *anint)
 	return 0;
 }
 
+/*
+ * timestamps kept in the cache are expressed in seconds
+ * since boot.  This is the best for measuring differences in
+ * real time.
+ */
+static inline time_t seconds_since_boot(void)
+{
+	struct timespec boot;
+	getboottime(&boot);
+	return get_seconds() - boot.tv_sec;
+}
+
+static inline time_t convert_to_wallclock(time_t sinceboot)
+{
+	struct timespec boot;
+	getboottime(&boot);
+	return boot.tv_sec + sinceboot;
+}
+
 static inline time_t get_expiry(char **bpp)
 {
 	int rv;
+	struct timespec boot;
+
 	if (get_int(bpp, &rv))
 		return 0;
 	if (rv < 0)
 		return 0;
-	return rv;
+	getboottime(&boot);
+	return rv - boot.tv_sec;
 }
 
 static inline void sunrpc_invalidate(struct cache_head *h,
 				     struct cache_detail *detail)
 {
-	h->expiry_time = get_seconds() - 1;
-	detail->nextcheck = get_seconds();
+	h->expiry_time = seconds_since_boot() - 1;
+	detail->nextcheck = seconds_since_boot();
 }
 #endif /*  _LINUX_SUNRPC_CACHE_H_ */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b06410e584e..8dc121955fdc 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -42,7 +42,7 @@ static void cache_revisit_request(struct cache_head *item);
 
 static void cache_init(struct cache_head *h)
 {
-	time_t now = get_seconds();
+	time_t now = seconds_since_boot();
 	h->next = NULL;
 	h->flags = 0;
 	kref_init(&h->ref);
@@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h)
 
 static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
-	return  (h->expiry_time < get_seconds()) ||
+	return  (h->expiry_time < seconds_since_boot()) ||
 		(detail->flush_time > h->last_refresh);
 }
 
@@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 static void cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
 	head->expiry_time = expiry;
-	head->last_refresh = get_seconds();
+	head->last_refresh = seconds_since_boot();
 	set_bit(CACHE_VALID, &head->flags);
 }
 
@@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail,
 
 	/* now see if we want to start an upcall */
 	refresh_age = (h->expiry_time - h->last_refresh);
-	age = get_seconds() - h->last_refresh;
+	age = seconds_since_boot() - h->last_refresh;
 
 	if (rqstp == NULL) {
 		if (rv == -EAGAIN)
@@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail,
 				cache_revisit_request(h);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
+					cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
 					cache_fresh_unlocked(h, detail);
 					rv = -ENOENT;
 				}
@@ -388,11 +388,11 @@ static int cache_clean(void)
 			return -1;
 		}
 		current_detail = list_entry(next, struct cache_detail, others);
-		if (current_detail->nextcheck > get_seconds())
+		if (current_detail->nextcheck > seconds_since_boot())
 			current_index = current_detail->hash_size;
 		else {
 			current_index = 0;
-			current_detail->nextcheck = get_seconds()+30*60;
+			current_detail->nextcheck = seconds_since_boot()+30*60;
 		}
 	}
 
@@ -477,7 +477,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
 void cache_purge(struct cache_detail *detail)
 {
 	detail->flush_time = LONG_MAX;
-	detail->nextcheck = get_seconds();
+	detail->nextcheck = seconds_since_boot();
 	cache_flush();
 	detail->flush_time = 1;
 }
@@ -902,7 +902,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 		filp->private_data = NULL;
 		kfree(rp);
 
-		cd->last_close = get_seconds();
+		cd->last_close = seconds_since_boot();
 		atomic_dec(&cd->readers);
 	}
 	module_put(cd->owner);
@@ -1034,7 +1034,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
 	int len;
 
 	if (atomic_read(&detail->readers) == 0 &&
-	    detail->last_close < get_seconds() - 30) {
+	    detail->last_close < seconds_since_boot() - 30) {
 			warn_no_listener(detail);
 			return -EINVAL;
 	}
@@ -1219,7 +1219,8 @@ static int c_show(struct seq_file *m, void *p)
 
 	ifdebug(CACHE)
 		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
-			   cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
+			   convert_to_wallclock(cp->expiry_time),
+			   atomic_read(&cp->ref.refcount), cp->flags);
 	cache_get(cp);
 	if (cache_check(cd, cp, NULL))
 		/* cache_check does a cache_put on failure */
@@ -1285,7 +1286,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
 	unsigned long p = *ppos;
 	size_t len;
 
-	sprintf(tbuf, "%lu\n", cd->flush_time);
+	sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
 	len = strlen(tbuf);
 	if (p >= len)
 		return 0;
@@ -1303,19 +1304,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 			   struct cache_detail *cd)
 {
 	char tbuf[20];
-	char *ep;
-	long flushtime;
+	char *bp, *ep;
+
 	if (*ppos || count > sizeof(tbuf)-1)
 		return -EINVAL;
 	if (copy_from_user(tbuf, buf, count))
 		return -EFAULT;
 	tbuf[count] = 0;
-	flushtime = simple_strtoul(tbuf, &ep, 0);
+	simple_strtoul(tbuf, &ep, 0);
 	if (*ep && *ep != '\n')
 		return -EINVAL;
 
-	cd->flush_time = flushtime;
-	cd->nextcheck = get_seconds();
+	bp = tbuf;
+	cd->flush_time = get_expiry(&bp);
+	cd->nextcheck = seconds_since_boot();
 	cache_flush();
 
 	*ppos += count;
-- 
cgit v1.2.3


From f16b6e8d838b2e2bb4561201311c66ac02ad67df Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 17:04:06 +1000
Subject: sunrpc/cache: allow threads to block while waiting for cache update.

The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.

1/ With NFSv4, requests can be quite complex and re-trying a whole
  request when a later part fails should only be a last-resort, not a
  normal practice.

2/ Large requests, and in particular any 'write' request, will not be
  queued by the current code and doing so would be undesirable.

In many cases only a very sort wait is needed before the cache gets
valid data.

So, providing the underlying transport permits it by setting
 ->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.

The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.

These values are probably much higher than needed, but will ensure
some forward progress.

Note that as we only request an update for a non-valid item, and as
non-valid items are updated in place it is extremely unlikely that
cache_check will return -ETIMEDOUT.  Normally cache_defer_req will
sleep for a short while and then find that the item is_valid.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  3 +++
 net/sunrpc/cache.c           | 59 +++++++++++++++++++++++++++++++++++++++++++-
 net/sunrpc/svc_xprt.c        | 11 +++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index ece432b7f87f..52a7d7224e90 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -125,6 +125,9 @@ struct cache_detail {
  */
 struct cache_req {
 	struct cache_deferred_req *(*defer)(struct cache_req *req);
+	int thread_wait;  /* How long (jiffies) we can block the
+			   * current thread to wait for updates.
+			   */
 };
 /* this must be embedded in a deferred_request that is being
  * delayed awaiting cache-fill
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8dc121955fdc..2c5297f245b4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list);
 static struct list_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
+struct thread_deferred_req {
+	struct cache_deferred_req handle;
+	struct completion completion;
+};
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+	struct thread_deferred_req *dr =
+		container_of(dreq, struct thread_deferred_req, handle);
+	complete(&dr->completion);
+}
+
 static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
 	struct cache_deferred_req *dreq, *discard;
 	int hash = DFR_HASH(item);
+	struct thread_deferred_req sleeper;
 
 	if (cache_defer_cnt >= DFR_MAX) {
 		/* too much in the cache, randomly drop this one,
@@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		if (net_random()&1)
 			return -ENOMEM;
 	}
-	dreq = req->defer(req);
+	if (req->thread_wait) {
+		dreq = &sleeper.handle;
+		sleeper.completion =
+			COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+		dreq->revisit = cache_restart_thread;
+	} else
+		dreq = req->defer(req);
+
+ retry:
 	if (dreq == NULL)
 		return -ENOMEM;
 
@@ -555,6 +575,43 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		cache_revisit_request(item);
 		return -EAGAIN;
 	}
+
+	if (dreq == &sleeper.handle) {
+		if (wait_for_completion_interruptible_timeout(
+			    &sleeper.completion, req->thread_wait) <= 0) {
+			/* The completion wasn't completed, so we need
+			 * to clean up
+			 */
+			spin_lock(&cache_defer_lock);
+			if (!list_empty(&sleeper.handle.hash)) {
+				list_del_init(&sleeper.handle.recent);
+				list_del_init(&sleeper.handle.hash);
+				cache_defer_cnt--;
+				spin_unlock(&cache_defer_lock);
+			} else {
+				/* cache_revisit_request already removed
+				 * this from the hash table, but hasn't
+				 * called ->revisit yet.  It will very soon
+				 * and we need to wait for it.
+				 */
+				spin_unlock(&cache_defer_lock);
+				wait_for_completion(&sleeper.completion);
+			}
+		}
+		if (test_bit(CACHE_PENDING, &item->flags)) {
+			/* item is still pending, try request
+			 * deferral
+			 */
+			dreq = req->defer(req);
+			goto retry;
+		}
+		/* only return success if we actually deferred the
+		 * request.  In this case we waited until it was
+		 * answered so no deferral has happened - rather
+		 * an answer already exists.
+		 */
+		return -EEXIST;
+	}
 	return 0;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..8ff6840866fa 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (signalled() || kthread_should_stop())
 		return -EINTR;
 
+	/* Normally we will wait up to 5 seconds for any required
+	 * cache information to be provided.
+	 */
+	rqstp->rq_chandle.thread_wait = 5*HZ;
+
 	spin_lock_bh(&pool->sp_lock);
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
@@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+
+		/* As there is a shortage of threads and this request
+		 * had to be queue, don't allow the thread to wait so
+		 * long for cache updates.
+		 */
+		rqstp->rq_chandle.thread_wait = 1*HZ;
 	} else {
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);
-- 
cgit v1.2.3


From 4f8b02b4e5c6896e073bed736136d420bd44b627 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:47 +0200
Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP

These functions are used only by percpu memory allocator on SMP.
Don't build them on UP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Reviewed-by: Chrsitoph Lameter <cl@linux.com>
---
 include/linux/vmalloc.h | 2 ++
 mm/vmalloc.c            | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 01c2145118dc..63a4fe6d51bd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
 extern struct vm_struct *vmlist;
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
+#ifdef CONFIG_SMP
 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     const size_t *sizes, int nr_vms,
 				     size_t align, gfp_t gfp_mask);
 
 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+#endif
 
 #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6b8889da69a6..c623e0ce3f00 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area)
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
 
+#ifdef CONFIG_SMP
 static struct vmap_area *node_to_va(struct rb_node *n)
 {
 	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 		free_vm_area(vms[i]);
 	kfree(vms);
 }
+#endif	/* CONFIG_SMP */
 
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
-- 
cgit v1.2.3


From 6abad5acac09921f4944af77d3860f82d49f528d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:47 +0200
Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k

In preparation of enabling percpu allocator for UP, reduce
PCPU_MIN_UNIT_SIZE to 32k.  On UP, the first chunk doesn't have to
include static percpu variables and chunk size can be smaller which is
important as UP percpu allocator will use contiguous kernel memory to
populate chunks.

PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation
size but 32k should still be enough.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux.com>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..fc8130a7cac0 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -42,7 +42,7 @@
 #ifdef CONFIG_SMP
 
 /* minimum unit size, also is the maximum supported allocation size */
-#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
+#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
 /*
  * Percpu allocator can serve percpu allocations before slab is
-- 
cgit v1.2.3


From bbddff0545878a8649c091a9dd7c43ce91516734 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:48 +0200
Subject: percpu: use percpu allocator on UP too

On UP, percpu allocations were redirected to kmalloc.  This has the
following problems.

* For certain amount of allocations (determined by
  PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu
  allocator can be used before the usual kernel memory allocator is
  brought online.  On SMP, this is used to initialize the kernel
  memory allocator.

* percpu allocator honors alignment upto PAGE_SIZE but kmalloc()
  doesn't.  For example, workqueue makes use of larger alignments for
  cpu_workqueues.

Currently, users of percpu allocators need to handle UP differently,
which is somewhat fragile and ugly.  Other than small amount of
memory, there isn't much to lose by enabling percpu allocator on UP.
It can simply use kernel memory based chunk allocation which was added
for SMP archs w/o MMUs.

This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and
makes UP build use percpu-km.  As percpu addresses and kernel
addresses are always identity mapped and static percpu variables don't
need any special treatment, nothing is arch dependent and mm/percpu.c
implements generic setup_per_cpu_areas() for UP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/percpu.h | 29 +++++-------------------
 mm/Kconfig             |  8 +++++++
 mm/Makefile            |  7 +-----
 mm/percpu-km.c         |  2 +-
 mm/percpu.c            | 60 ++++++++++++++++++++++++++++++++++++++++++++++----
 mm/percpu_up.c         | 30 -------------------------
 6 files changed, 71 insertions(+), 65 deletions(-)
 delete mode 100644 mm/percpu_up.c

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index fc8130a7cac0..aeeeef1093cd 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,8 +39,6 @@
 	preempt_enable();				\
 } while (0)
 
-#ifdef CONFIG_SMP
-
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
@@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
  * dynamically allocated. Non-atomic access to the current CPU's
  * version should probably be combined with get_cpu()/put_cpu().
  */
+#ifdef CONFIG_SMP
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#else
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#endif
 
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
 extern bool is_kernel_percpu_address(unsigned long addr);
 
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 extern void __init setup_per_cpu_areas(void);
 #endif
 extern void __init percpu_init_late(void);
 
-#else /* CONFIG_SMP */
-
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-
-/* can't distinguish from other static vars, always false */
-static inline bool is_kernel_percpu_address(unsigned long addr)
-{
-	return false;
-}
-
-static inline void __init setup_per_cpu_areas(void) { }
-
-static inline void __init percpu_init_late(void) { }
-
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_SMP */
-
 extern void __percpu *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
diff --git a/mm/Kconfig b/mm/Kconfig
index f4e516e9c37c..01a57447a410 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
 	  of 1 says that all excess pages should be trimmed.
 
 	  See Documentation/nommu-mmap.txt for more information.
+
+#
+# UP and nommu archs use km based percpu allocator
+#
+config NEED_PER_CPU_KM
+	depends on !SMP
+	bool
+	default y
diff --git a/mm/Makefile b/mm/Makefile
index 34b2546a9e37..f73f75a29f82 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o \
+			   page_isolation.o mm_init.o mmu_context.o percpu.o \
 			   $(mmu-y)
 obj-y += init-mm.o
 
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_SMP
-obj-y += percpu.o
-else
-obj-y += percpu_up.o
-endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index df680855540a..7037bc73bfa4 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -27,7 +27,7 @@
  *   chunk size is not aligned.  percpu-km code will whine about it.
  */
 
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
 #error "contiguous percpu allocation is incompatible with paged first chunk"
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 58c572b18b07..fa70122dfdd0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -76,6 +76,7 @@
 #define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
 
+#ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
 #define __addr_to_pcpu_ptr(addr)					\
@@ -89,6 +90,11 @@
 			 (unsigned long)pcpu_base_addr -		\
 			 (unsigned long)__per_cpu_start)
 #endif
+#else	/* CONFIG_SMP */
+/* on UP, it's always identity mapped */
+#define __addr_to_pcpu_ptr(addr)	(void __percpu *)(addr)
+#define __pcpu_ptr_to_addr(ptr)		(void __force *)(ptr)
+#endif	/* CONFIG_SMP */
 
 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
@@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 bool is_kernel_percpu_address(unsigned long addr)
 {
+#ifdef CONFIG_SMP
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
 	unsigned int cpu;
@@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr)
 		if ((void *)addr >= start && (void *)addr < start + static_size)
 			return true;
         }
+#endif
+	/* on UP, can't distinguish from other static vars, always false */
 	return false;
 }
 
@@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 	free_bootmem(__pa(ai), ai->__ai_size);
 }
 
+#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
+			    defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK))
 /**
  * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
@@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 
 	return ai;
 }
+#endif	/* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
+			  CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */
 
 /**
  * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
@@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	/* sanity checks */
 	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
+#ifdef CONFIG_SMP
 	PCPU_SETUP_BUG_ON(!ai->static_size);
+#endif
 	PCPU_SETUP_BUG_ON(!base_addr);
 	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
 	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
@@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	return 0;
 }
 
+#ifdef CONFIG_SMP
+
 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
 	[PCPU_FC_AUTO]	= "auto",
 	[PCPU_FC_EMBED]	= "embed",
@@ -1758,8 +1775,9 @@ out_free_ar:
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
+#ifndef	CONFIG_HAVE_SETUP_PER_CPU_AREA
 /*
- * Generic percpu area setup.
+ * Generic SMP percpu area setup.
  *
  * The embedding helper is used because its behavior closely resembles
  * the original non-dynamic generic percpu area setup.  This is
@@ -1770,7 +1788,6 @@ out_free_ar:
  * on the physical linear memory mapping which uses large page
  * mappings on applicable archs.
  */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
@@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void)
 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
 				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
-		panic("Failed to initialized percpu areas.");
+		panic("Failed to initialize percpu areas.");
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
 		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif	/* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+#else	/* CONFIG_SMP */
+
+/*
+ * UP percpu area setup.
+ *
+ * UP always uses km-based percpu allocator with identity mapping.
+ * Static percpu variables are indistinguishable from the usual static
+ * variables and don't require any special preparation.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	const size_t unit_size =
+		roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
+					 PERCPU_DYNAMIC_RESERVE));
+	struct pcpu_alloc_info *ai;
+	void *fc;
+
+	ai = pcpu_alloc_alloc_info(1, 1);
+	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!ai || !fc)
+		panic("Failed to allocate memory for percpu areas.");
+
+	ai->dyn_size = unit_size;
+	ai->unit_size = unit_size;
+	ai->atom_size = unit_size;
+	ai->alloc_size = unit_size;
+	ai->groups[0].nr_units = 1;
+	ai->groups[0].cpu_map[0] = 0;
+
+	if (pcpu_setup_first_chunk(ai, fc) < 0)
+		panic("Failed to initialize percpu areas.");
+}
+
+#endif	/* CONFIG_SMP */
 
 /*
  * First and reserved chunks are initialized with temporary allocation
diff --git a/mm/percpu_up.c b/mm/percpu_up.c
deleted file mode 100644
index db884fae5721..000000000000
--- a/mm/percpu_up.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
- */
-
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-
-void __percpu *__alloc_percpu(size_t size, size_t align)
-{
-	/*
-	 * Can't easily make larger alignment work with kmalloc.  WARN
-	 * on it.  Larger alignment should only be used for module
-	 * percpu sections on SMP for which this path isn't used.
-	 */
-	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-	return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-void free_percpu(void __percpu *p)
-{
-	kfree(this_cpu_ptr(p));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-phys_addr_t per_cpu_ptr_to_phys(void *addr)
-{
-	return __pa(addr);
-}
-- 
cgit v1.2.3


From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Sep 2010 14:46:37 +0200
Subject: semaphore: Add DEFINE_SEMAPHORE

The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been
done. Some of the users are real semaphores and we should name them as
such instead of confusing everyone with "MUTEX".

Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED]
and DECLARE_MUTEX.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
LKML-Reference: <20100907125054.795929962@linutronix.de>
---
 include/linux/semaphore.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 7415839ac890..5310d27abd2a 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -26,6 +26,9 @@ struct semaphore {
 	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
 }
 
+#define DEFINE_SEMAPHORE(name)	\
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
+
 #define DECLARE_MUTEX(name)	\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
 
-- 
cgit v1.2.3


From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 7 Sep 2010 15:52:06 +0800
Subject: spi/dw_spi: clean the cs_control code

commit 052dc7c45i "spi/dw_spi: conditional transfer mode change"
introduced cs_control code, which has a bug by using bit offset
for spi mode to set transfer mode in control register. Also it
forces devices who don't need cs_control to re-configure the
control registers for each spi transfer. This patch will fix them

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/dw_spi.c       | 17 +++++------------
 include/linux/spi/dw_spi.h |  2 ++
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
index 11fbbf6fb07b..56247853c298 100644
--- a/drivers/spi/dw_spi.c
+++ b/drivers/spi/dw_spi.c
@@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws)
 	wait_till_not_busy(dws);
 }
 
-static void null_cs_control(u32 command)
-{
-}
-
 static int null_writer(struct dw_spi *dws)
 {
 	u8 n_bytes = dws->n_bytes;
@@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws)
 					struct spi_transfer,
 					transfer_list);
 
-	if (!last_transfer->cs_change)
+	if (!last_transfer->cs_change && dws->cs_control)
 		dws->cs_control(MRST_SPI_DEASSERT);
 
 	msg->state = NULL;
@@ -549,13 +545,13 @@ static void pump_transfers(unsigned long data)
 	 */
 	if (dws->cs_control) {
 		if (dws->rx && dws->tx)
-			chip->tmode = 0x00;
+			chip->tmode = SPI_TMOD_TR;
 		else if (dws->rx)
-			chip->tmode = 0x02;
+			chip->tmode = SPI_TMOD_RO;
 		else
-			chip->tmode = 0x01;
+			chip->tmode = SPI_TMOD_TO;
 
-		cr0 &= ~(0x3 << SPI_MODE_OFFSET);
+		cr0 &= ~SPI_TMOD_MASK;
 		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
 	}
 
@@ -704,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi)
 		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
 		if (!chip)
 			return -ENOMEM;
-
-		chip->cs_control = null_cs_control;
-		chip->enable_dma = 0;
 	}
 
 	/*
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
index cc813f95a2f2..c91302f3a257 100644
--- a/include/linux/spi/dw_spi.h
+++ b/include/linux/spi/dw_spi.h
@@ -14,7 +14,9 @@
 #define SPI_MODE_OFFSET			6
 #define SPI_SCPH_OFFSET			6
 #define SPI_SCOL_OFFSET			7
+
 #define SPI_TMOD_OFFSET			8
+#define SPI_TMOD_MASK			(0x3 << SPI_TMOD_OFFSET)
 #define	SPI_TMOD_TR			0x0		/* xmit & recv */
 #define SPI_TMOD_TO			0x1		/* xmit only */
 #define SPI_TMOD_RO			0x2		/* recv only */
-- 
cgit v1.2.3


From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 5 Sep 2010 18:02:29 +0000
Subject: ipvs: fix active FTP

- Do not create expectation when forwarding the PORT
  command to avoid blocking the connection. The problem is that
  nf_conntrack_ftp.c:help() tries to create the same expectation later in
  POST_ROUTING and drops the packet with "dropping packet" message after
  failure in nf_ct_expect_related.

- Change ip_vs_update_conntrack to alter the conntrack
  for related connections from real server. If we do not alter the reply in
  this direction the next packet from client sent to vport 20 comes as NEW
  connection. We alter it but may be some collision happens for both
  conntracks and the second conntrack gets destroyed immediately. The
  connection stucks too.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h             |  3 +++
 net/netfilter/ipvs/ip_vs_core.c |  1 +
 net/netfilter/ipvs/ip_vs_ftp.c  |  6 ------
 net/netfilter/ipvs/ip_vs_xmit.c | 18 ++++++++++++------
 4 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a4747a0f7303..f976885f686f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
+extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
+				   int outin);
+
 #endif /* __KERNEL__ */
 
 #endif	/* _NET_IP_VS_H */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f8ddba48011..4c2f89df5cce 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_update_conntrack(skb, cp, 0);
 	ip_vs_conn_put(cp);
 
 	skb->ipvs_property = 1;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33b329bfc2d2..7e9af5b76d9e 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
-	struct nf_conn *ct;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		ip_vs_control_add(n_cp, cp);
 	}
 
-	ct = (struct nf_conn *)skb->nfct;
-	if (ct && ct != &nf_conntrack_untracked)
-		ip_vs_expect_related(skb, ct, n_cp,
-				     IPPROTO_TCP, &n_cp->dport, 1);
-
 	/*
 	 *	Move tunnel to listen state
 	 */
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 21e1a5e9b9d3..49df6bea6a2d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 }
 #endif
 
-static void
-ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
 {
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	struct nf_conntrack_tuple new_tuple;
@@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
 	 * real-server we will see RIP->DIP.
 	 */
 	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	new_tuple.src.u3 = cp->daddr;
+	if (outin)
+		new_tuple.src.u3 = cp->daddr;
+	else
+		new_tuple.dst.u3 = cp->vaddr;
 	/*
 	 * This will also take care of UDP and other protocols.
 	 */
-	new_tuple.src.u.tcp.port = cp->dport;
+	if (outin)
+		new_tuple.src.u.tcp.port = cp->dport;
+	else
+		new_tuple.dst.u.tcp.port = cp->vport;
 	nf_conntrack_alter_reply(ct, &new_tuple);
 }
 
@@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp);
+	ip_vs_update_conntrack(skb, cp, 1);
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
-- 
cgit v1.2.3


From 0ade638655f0ef4d807295c14a4c97544bd6b9ca Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 24 Aug 2010 22:18:41 +0200
Subject: intel-gtt: introduce drm/intel-gtt.h

Add a few definitions to it that are already shared and that will
be shared in the future (like the number of stolen entries).
No functional changes in here.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    | 42 +++++++++++++++++++----------------------
 drivers/gpu/drm/i915/i915_dma.c |  2 --
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 include/drm/intel-gtt.h         | 18 ++++++++++++++++++
 4 files changed, 38 insertions(+), 25 deletions(-)
 create mode 100644 include/drm/intel-gtt.h

(limited to 'include')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 72f937615056..0a3e91ba0f2b 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -25,6 +25,7 @@
 #include "agp.h"
 #include "intel-agp.h"
 #include <linux/intel-gtt.h>
+#include <drm/intel-gtt.h>
 
 /*
  * If we have Intel graphics, we're not going to have anything other than
@@ -81,17 +82,11 @@ static struct gatt_mask intel_gen6_masks[] =
 };
 
 static struct _intel_private {
+	struct intel_gtt base;
 	struct pci_dev *pcidev;	/* device one */
 	u8 __iomem *registers;
 	u32 __iomem *gtt;		/* I915G */
 	int num_dcache_entries;
-	/* gtt_entries is the number of gtt entries that are already mapped
-	 * to stolen memory.  Stolen memory is larger than the memory mapped
-	 * through gtt_entries, as it includes some reserved space for the BIOS
-	 * popup and for the GTT.
-	 */
-	int gtt_entries;			/* i830+ */
-	int gtt_total_size;
 	union {
 		void __iomem *i9xx_flush_page;
 		void *i8xx_flush_page;
@@ -772,7 +767,7 @@ static void intel_i830_init_gtt_entries(void)
 		gtt_entries = 0;
 	}
 
-	intel_private.gtt_entries = gtt_entries;
+	intel_private.base.gtt_stolen_entries = gtt_entries;
 }
 
 static void intel_i830_fini_flush(void)
@@ -849,7 +844,7 @@ static int intel_i830_create_gatt_table(struct agp_bridge_data *bridge)
 
 	/* we have to call this as early as possible after the MMIO base address is known */
 	intel_i830_init_gtt_entries();
-	if (intel_private.gtt_entries == 0) {
+	if (intel_private.base.gtt_stolen_entries == 0) {
 		iounmap(intel_private.registers);
 		return -ENOMEM;
 	}
@@ -919,7 +914,7 @@ static int intel_i830_configure(void)
 	readl(intel_private.registers+I810_PGETBL_CTL);	/* PCI Posting. */
 
 	if (agp_bridge->driver->needs_scratch_page) {
-		for (i = intel_private.gtt_entries; i < current_size->num_entries; i++) {
+		for (i = intel_private.base.gtt_stolen_entries; i < current_size->num_entries; i++) {
 			writel(agp_bridge->scratch_page, intel_private.registers+I810_PTE_BASE+(i*4));
 		}
 		readl(intel_private.registers+I810_PTE_BASE+((i-1)*4));	/* PCI Posting. */
@@ -950,10 +945,10 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
 	temp = agp_bridge->current_size;
 	num_entries = A_SIZE_FIX(temp)->num_entries;
 
-	if (pg_start < intel_private.gtt_entries) {
+	if (pg_start < intel_private.base.gtt_stolen_entries) {
 		dev_printk(KERN_DEBUG, &intel_private.pcidev->dev,
-			   "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n",
-			   pg_start, intel_private.gtt_entries);
+			   "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n",
+			   pg_start, intel_private.base.gtt_stolen_entries);
 
 		dev_info(&intel_private.pcidev->dev,
 			 "trying to insert into local/stolen memory\n");
@@ -1001,7 +996,7 @@ static int intel_i830_remove_entries(struct agp_memory *mem, off_t pg_start,
 	if (mem->page_count == 0)
 		return 0;
 
-	if (pg_start < intel_private.gtt_entries) {
+	if (pg_start < intel_private.base.gtt_stolen_entries) {
 		dev_info(&intel_private.pcidev->dev,
 			 "trying to disable local/stolen memory\n");
 		return -EINVAL;
@@ -1136,7 +1131,8 @@ static int intel_i9xx_configure(void)
 	readl(intel_private.registers+I810_PGETBL_CTL);	/* PCI Posting. */
 
 	if (agp_bridge->driver->needs_scratch_page) {
-		for (i = intel_private.gtt_entries; i < intel_private.gtt_total_size; i++) {
+		for (i = intel_private.base.gtt_stolen_entries; i <
+				intel_private.base.gtt_total_entries; i++) {
 			writel(agp_bridge->scratch_page, intel_private.gtt+i);
 		}
 		readl(intel_private.gtt+i-1);	/* PCI Posting. */
@@ -1181,10 +1177,10 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
 	temp = agp_bridge->current_size;
 	num_entries = A_SIZE_FIX(temp)->num_entries;
 
-	if (pg_start < intel_private.gtt_entries) {
+	if (pg_start < intel_private.base.gtt_stolen_entries) {
 		dev_printk(KERN_DEBUG, &intel_private.pcidev->dev,
-			   "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n",
-			   pg_start, intel_private.gtt_entries);
+			   "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n",
+			   pg_start, intel_private.base.gtt_stolen_entries);
 
 		dev_info(&intel_private.pcidev->dev,
 			 "trying to insert into local/stolen memory\n");
@@ -1227,7 +1223,7 @@ static int intel_i915_remove_entries(struct agp_memory *mem, off_t pg_start,
 	if (mem->page_count == 0)
 		return 0;
 
-	if (pg_start < intel_private.gtt_entries) {
+	if (pg_start < intel_private.base.gtt_stolen_entries) {
 		dev_info(&intel_private.pcidev->dev,
 			 "trying to disable local/stolen memory\n");
 		return -EINVAL;
@@ -1323,7 +1319,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
 	if (!intel_private.gtt)
 		return -ENOMEM;
 
-	intel_private.gtt_total_size = gtt_map_size / 4;
+	intel_private.base.gtt_total_entries = gtt_map_size / 4;
 
 	temp &= 0xfff80000;
 
@@ -1338,7 +1334,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
 
 	/* we have to call this as early as possible after the MMIO base address is known */
 	intel_i830_init_gtt_entries();
-	if (intel_private.gtt_entries == 0) {
+	if (intel_private.base.gtt_stolen_entries == 0) {
 		iounmap(intel_private.gtt);
 		iounmap(intel_private.registers);
 		return -ENOMEM;
@@ -1449,7 +1445,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
 	if (!intel_private.gtt)
 		return -ENOMEM;
 
-	intel_private.gtt_total_size = gtt_size / 4;
+	intel_private.base.gtt_total_entries = gtt_size / 4;
 
 	intel_private.registers = ioremap(temp, 128 * 4096);
 	if (!intel_private.registers) {
@@ -1462,7 +1458,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
 
 	/* we have to call this as early as possible after the MMIO base address is known */
 	intel_i830_init_gtt_entries();
-	if (intel_private.gtt_entries == 0) {
+	if (intel_private.base.gtt_stolen_entries == 0) {
 		iounmap(intel_private.gtt);
 		iounmap(intel_private.registers);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8d52f01a6d90..47228cb16901 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -42,8 +42,6 @@
 #include <linux/slab.h>
 #include <acpi/video.h>
 
-extern int intel_max_stolen; /* from AGP driver */
-
 /**
  * Sets up the hardware status page for devices that need a physical address
  * in the register.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfc8bfd0fd7e..d825ef207b2c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -34,6 +34,7 @@
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
 #include <linux/io-mapping.h>
+#include <drm/intel-gtt.h>
 
 /* General customization:
  */
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
new file mode 100644
index 000000000000..6769cb704e9b
--- /dev/null
+++ b/include/drm/intel-gtt.h
@@ -0,0 +1,18 @@
+/* Common header for intel-gtt.ko and i915.ko */
+
+#ifndef _DRM_INTEL_GTT_H
+#define	_DRM_INTEL_GTT_H
+extern int intel_max_stolen; /* from AGP driver */
+
+struct intel_gtt {
+	/* Number of stolen gtt entries at the beginning. */
+	unsigned int gtt_stolen_entries;
+	/* Total number of gtt entries. */
+	unsigned int gtt_total_entries;
+	/* Part of the gtt that is mappable by the cpu, for those chips where
+	 * this is not the full gtt. */
+	unsigned int gtt_mappable_entries;
+};
+
+#endif
+
-- 
cgit v1.2.3


From 19966754328d99ee003ddfc7a8c31ceb115483ac Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 6 Sep 2010 20:08:44 +0200
Subject: drm/i915: die, i915_probe_agp, die

Use the detection from intel-gtt.ko instead. Hooray!

Also move the stolen mem allocator to the other gtt stuff in dev_prv->mem.

v2: Chris Wilson noted that my error handling was crap. Fix it. He also
said that this fixes a problem on his i845. Indeed, i915_probe_agp
misses a special case for i830/i845 stolen mem detection.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=25476
Cc: stable@kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    |   6 ++
 drivers/gpu/drm/i915/i915_dma.c | 190 +++-------------------------------------
 drivers/gpu/drm/i915/i915_drv.h |   7 +-
 include/drm/intel-gtt.h         |   2 +
 4 files changed, 25 insertions(+), 180 deletions(-)

(limited to 'include')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 6eb64c19af0e..9cb7c98afb9c 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -1714,6 +1714,12 @@ int intel_gmch_probe(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
+struct intel_gtt *intel_gtt_get(void)
+{
+	return &intel_private.base;
+}
+EXPORT_SYMBOL(intel_gtt_get);
+
 void intel_gmch_remove(struct pci_dev *pdev)
 {
 	if (intel_private.pcidev)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a693b27f3df4..428c75b466aa 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,172 +989,6 @@ intel_teardown_mchbar(struct drm_device *dev)
 		release_resource(&dev_priv->mch_res);
 }
 
-/**
- * i915_probe_agp - get AGP bootup configuration
- * @pdev: PCI device
- * @aperture_size: returns AGP aperture configured size
- * @preallocated_size: returns size of BIOS preallocated AGP space
- *
- * Since Intel integrated graphics are UMA, the BIOS has to set aside
- * some RAM for the framebuffer at early boot.  This code figures out
- * how much was set aside so we can use it for our own purposes.
- */
-static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
-			  uint32_t *preallocated_size)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u16 tmp = 0;
-	unsigned long overhead;
-	unsigned long stolen;
-
-	/* Get the fb aperture size and "stolen" memory amount. */
-	pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &tmp);
-
-	*aperture_size = 1024 * 1024;
-	*preallocated_size = 1024 * 1024;
-
-	switch (dev->pdev->device) {
-	case PCI_DEVICE_ID_INTEL_82830_CGC:
-	case PCI_DEVICE_ID_INTEL_82845G_IG:
-	case PCI_DEVICE_ID_INTEL_82855GM_IG:
-	case PCI_DEVICE_ID_INTEL_82865_IG:
-		if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M)
-			*aperture_size *= 64;
-		else
-			*aperture_size *= 128;
-		break;
-	default:
-		/* 9xx supports large sizes, just look at the length */
-		*aperture_size = pci_resource_len(dev->pdev, 2);
-		break;
-	}
-
-	/*
-	 * Some of the preallocated space is taken by the GTT
-	 * and popup.  GTT is 1K per MB of aperture size, and popup is 4K.
-	 */
-	if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev))
-		overhead = 4096;
-	else
-		overhead = (*aperture_size / 1024) + 4096;
-
-	if (IS_GEN6(dev)) {
-		/* SNB has memory control reg at 0x50.w */
-		pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &tmp);
-
-		switch (tmp & SNB_GMCH_GMS_STOLEN_MASK) {
-		case INTEL_855_GMCH_GMS_DISABLED:
-			DRM_ERROR("video memory is disabled\n");
-			return -1;
-		case SNB_GMCH_GMS_STOLEN_32M:
-			stolen = 32 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_64M:
-			stolen = 64 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_96M:
-			stolen = 96 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_128M:
-			stolen = 128 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_160M:
-			stolen = 160 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_192M:
-			stolen = 192 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_224M:
-			stolen = 224 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_256M:
-			stolen = 256 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_288M:
-			stolen = 288 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_320M:
-			stolen = 320 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_352M:
-			stolen = 352 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_384M:
-			stolen = 384 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_416M:
-			stolen = 416 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_448M:
-			stolen = 448 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_480M:
-			stolen = 480 * 1024 * 1024;
-			break;
-		case SNB_GMCH_GMS_STOLEN_512M:
-			stolen = 512 * 1024 * 1024;
-			break;
-		default:
-			DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
-				  tmp & SNB_GMCH_GMS_STOLEN_MASK);
-			return -1;
-		}
-	} else {
-		switch (tmp & INTEL_GMCH_GMS_MASK) {
-		case INTEL_855_GMCH_GMS_DISABLED:
-			DRM_ERROR("video memory is disabled\n");
-			return -1;
-		case INTEL_855_GMCH_GMS_STOLEN_1M:
-			stolen = 1 * 1024 * 1024;
-			break;
-		case INTEL_855_GMCH_GMS_STOLEN_4M:
-			stolen = 4 * 1024 * 1024;
-			break;
-		case INTEL_855_GMCH_GMS_STOLEN_8M:
-			stolen = 8 * 1024 * 1024;
-			break;
-		case INTEL_855_GMCH_GMS_STOLEN_16M:
-			stolen = 16 * 1024 * 1024;
-			break;
-		case INTEL_855_GMCH_GMS_STOLEN_32M:
-			stolen = 32 * 1024 * 1024;
-			break;
-		case INTEL_915G_GMCH_GMS_STOLEN_48M:
-			stolen = 48 * 1024 * 1024;
-			break;
-		case INTEL_915G_GMCH_GMS_STOLEN_64M:
-			stolen = 64 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_128M:
-			stolen = 128 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_256M:
-			stolen = 256 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_96M:
-			stolen = 96 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_160M:
-			stolen = 160 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_224M:
-			stolen = 224 * 1024 * 1024;
-			break;
-		case INTEL_GMCH_GMS_STOLEN_352M:
-			stolen = 352 * 1024 * 1024;
-			break;
-		default:
-			DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
-				  tmp & INTEL_GMCH_GMS_MASK);
-			return -1;
-		}
-	}
-
-	*preallocated_size = stolen - overhead;
-
-	return 0;
-}
-
 #define PTE_ADDRESS_MASK		0xfffff000
 #define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
 #define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
@@ -1249,7 +1083,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	unsigned long ll_base = 0;
 
 	/* Leave 1M for line length buffer & misc. */
-	compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
+	compressed_fb = drm_mm_search_free(&dev_priv->mm.vram, size, 4096, 0);
 	if (!compressed_fb) {
 		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
 		i915_warn_stolen(dev);
@@ -1270,7 +1104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	}
 
 	if (!(IS_GM45(dev) || IS_IRONLAKE_M(dev))) {
-		compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
+		compressed_llb = drm_mm_search_free(&dev_priv->mm.vram, 4096,
 						    4096, 0);
 		if (!compressed_llb) {
 			i915_warn_stolen(dev);
@@ -1366,8 +1200,8 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret = 0;
 
-	/* Basic memrange allocator for stolen space (aka vram) */
-	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+	/* Basic memrange allocator for stolen space (aka mm.vram) */
+	drm_mm_init(&dev_priv->mm.vram, 0, prealloc_size);
 	DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
 
 	/* We're off and running w/KMS */
@@ -2107,16 +1941,16 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 			 "performance may suffer.\n");
 	}
 
-	ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
-	if (ret)
+	dev_priv->mm.gtt = intel_gtt_get();
+	if (!dev_priv->mm.gtt) {
+		DRM_ERROR("Failed to initialize GTT\n");
+		ret = -ENODEV;
 		goto out_iomapfree;
-
-	if (prealloc_size > intel_max_stolen) {
-		DRM_INFO("detected %dM stolen memory, trimming to %dM\n",
-			 prealloc_size >> 20, intel_max_stolen >> 20);
-		prealloc_size = intel_max_stolen;
 	}
 
+	prealloc_size = dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT;
+	agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
+
 	dev_priv->wq = create_singlethread_workqueue("i915");
 	if (dev_priv->wq == NULL) {
 		DRM_ERROR("Failed to create our workqueue.\n");
@@ -2301,7 +2135,7 @@ int i915_driver_unload(struct drm_device *dev)
 		mutex_unlock(&dev->struct_mutex);
 		if (I915_HAS_FBC(dev) && i915_powersave)
 			i915_cleanup_compression(dev);
-		drm_mm_takedown(&dev_priv->vram);
+		drm_mm_takedown(&dev_priv->mm.vram);
 
 		intel_cleanup_overlay(dev);
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d825ef207b2c..c8b22005ec18 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -305,8 +305,6 @@ typedef struct drm_i915_private {
 	uint32_t last_instdone;
 	uint32_t last_instdone1;
 
-	struct drm_mm vram;
-
 	unsigned long cfb_size;
 	unsigned long cfb_pitch;
 	int cfb_fence;
@@ -511,6 +509,11 @@ typedef struct drm_i915_private {
 	u32 saveMCHBAR_RENDER_STANDBY;
 
 	struct {
+		/** Bridge to intel-gtt-ko */
+		struct intel_gtt *gtt;
+		/** Memory allocator for GTT stolen memory */
+		struct drm_mm vram;
+		/** Memory allocator for GTT */
 		struct drm_mm gtt_space;
 
 		struct io_mapping *gtt_mapping;
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 6769cb704e9b..b3aa7ab72d09 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -14,5 +14,7 @@ struct intel_gtt {
 	unsigned int gtt_mappable_entries;
 };
 
+struct intel_gtt *intel_gtt_get(void);
+
 #endif
 
-- 
cgit v1.2.3


From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 7 Sep 2010 05:55:38 +0000
Subject: include/net/raw.h: Convert raw_seq_private macro to inline

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/raw.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/raw.h b/include/net/raw.h
index 43c57502659b..42ce6fe7a2d5 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -45,7 +45,10 @@ struct raw_iter_state {
 	struct raw_hashinfo *h;
 };
 
-#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private)
+static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
+{
+	return seq->private;
+}
 void *raw_seq_start(struct seq_file *seq, loff_t *pos);
 void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void raw_seq_stop(struct seq_file *seq, void *v);
-- 
cgit v1.2.3


From a6e0fc8514d41dfdd98b1d15cacc432cf040f8af Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Sep 2010 14:15:32 -0700
Subject: net: introduce rcu_dereference_rtnl

We use rcu_dereference_check(p, rcu_read_lock_held() ||
lockdep_rtnl_is_held()) several times in network stack.

More usages to come too, so its time to create a helper.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 58d44491880f..263690d991a8 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -749,6 +749,17 @@ extern int rtnl_is_locked(void);
 extern int lockdep_rtnl_is_held(void);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
+/**
+ * rcu_dereference_rtnl - rcu_dereference with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
+ * or RTNL
+ */
+#define rcu_dereference_rtnl(p)					\
+	rcu_dereference_check(p, rcu_read_lock_held() ||	\
+				 lockdep_rtnl_is_held())
+
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
 
-- 
cgit v1.2.3


From 15133f6e67d8d646d0744336b4daa3135452cb0d Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Tue, 12 Jan 2010 14:33:38 -0800
Subject: RDS: Implement atomic operations

Implement a CMSG-based interface to do FADD and CSWP ops.

Alter send routines to handle atomic ops.

Add atomic counters to stats.

Add xmit_atomic() to struct rds_transport

Inline rds_ib_send_unmap_rdma into unmap_rm

Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/rds.h |  19 +++++++
 net/rds/ib.c        |   1 +
 net/rds/ib.h        |   1 +
 net/rds/ib_rdma.c   |   4 +-
 net/rds/ib_send.c   | 140 ++++++++++++++++++++++++++++++++++++++++++++++------
 net/rds/rdma.c      |  73 +++++++++++++++++++++++++++
 net/rds/rds.h       |  33 +++++++++++--
 net/rds/send.c      |  71 ++++++++++++++++++++++++--
 net/rds/stats.c     |   2 +
 9 files changed, 321 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 7f3971d9fc5c..9239152abf7a 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -73,6 +73,8 @@
 #define RDS_CMSG_RDMA_MAP		3
 #define RDS_CMSG_RDMA_STATUS		4
 #define RDS_CMSG_CONG_UPDATE		5
+#define RDS_CMSG_ATOMIC_FADD		6
+#define RDS_CMSG_ATOMIC_CSWP		7
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -237,6 +239,23 @@ struct rds_rdma_args {
 	u_int64_t	user_token;
 };
 
+struct rds_atomic_args {
+	rds_rdma_cookie_t cookie;
+	uint64_t 	local_addr;
+	uint64_t 	remote_addr;
+	union {
+		struct {
+			uint64_t	compare;
+			uint64_t	swap;
+		} cswp;
+		struct {
+			uint64_t	add;
+		} fadd;
+	};
+	uint64_t	flags;
+	uint64_t	user_token;
+};
+
 struct rds_rdma_notify {
 	u_int64_t	user_token;
 	int32_t		status;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700a..f0d29656baff 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = {
 	.xmit			= rds_ib_xmit,
 	.xmit_cong_map		= NULL,
 	.xmit_rdma		= rds_ib_xmit_rdma,
+	.xmit_atomic		= rds_ib_xmit_atomic,
 	.recv			= rds_ib_recv,
 	.conn_alloc		= rds_ib_conn_alloc,
 	.conn_free		= rds_ib_conn_free,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29f..d2fd0aa4fde7 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
 void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
 int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
 			     u32 *adv_credits, int need_posted, int max_posted);
+int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
 
 /* ib_stats.c */
 DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0f3b5a2f3fe0..242231f09464 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
 	ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
 			(IB_ACCESS_LOCAL_WRITE |
 			 IB_ACCESS_REMOTE_READ |
-			 IB_ACCESS_REMOTE_WRITE),
+			 IB_ACCESS_REMOTE_WRITE|
+			 IB_ACCESS_REMOTE_ATOMIC),
+
 			&pool->fmr_attr);
 	if (IS_ERR(ibmr->fmr)) {
 		err = PTR_ERR(ibmr->fmr);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index f0edfdb2866c..b2bd164434ad 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
 	rds_rdma_send_complete(rm, notify_status);
 }
 
-static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
-				   struct rds_rdma_op *op)
+static void rds_ib_send_atomic_complete(struct rds_message *rm,
+				      int wc_status)
 {
-	if (op->r_mapped) {
-		ib_dma_unmap_sg(ic->i_cm_id->device,
-			op->r_sg, op->r_nents,
-			op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		op->r_mapped = 0;
-	}
+	int notify_status;
+
+	if (wc_status != IB_WC_SUCCESS)
+		notify_status = RDS_RDMA_OTHER_ERROR;
+	else
+		notify_status = RDS_RDMA_SUCCESS;
+
+	rds_atomic_send_complete(rm, notify_status);
 }
 
 static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
@@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
 			DMA_TO_DEVICE);
 
 	if (rm->rdma.m_rdma_op.r_active) {
-		rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
+		struct rds_rdma_op *op = &rm->rdma.m_rdma_op;
+
+		if (op->r_mapped) {
+			ib_dma_unmap_sg(ic->i_cm_id->device,
+					op->r_sg, op->r_nents,
+					op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			op->r_mapped = 0;
+		}
 
 		/* If the user asked for a completion notification on this
 		 * message, we can implement three different semantics:
@@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
 			rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
 	}
 
+	if (rm->atomic.op_active) {
+		struct rm_atomic_op *op = &rm->atomic;
+
+		/* unmap atomic recvbuf */
+		if (op->op_mapped) {
+			ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
+					DMA_FROM_DEVICE);
+			op->op_mapped = 0;
+		}
+
+		rds_ib_send_atomic_complete(rm, wc_status);
+
+		if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP)
+			rds_stats_inc(s_atomic_cswp);
+		else
+			rds_stats_inc(s_atomic_fadd);
+	}
+
 	/* If anyone waited for this message to get flushed out, wake
 	 * them up now */
 	rds_message_unmapped(rm);
@@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
 	u32 i;
 
 	for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
-		if (send->s_wr.opcode == 0xdead)
+		if (!send->s_rm || send->s_wr.opcode == 0xdead)
 			continue;
-		if (send->s_rm)
-			rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
-		if (send->s_op)
-			rds_ib_send_unmap_rdma(ic, send->s_op);
+		rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
 	}
 }
 
@@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
 				break;
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_READ:
+			case IB_WR_ATOMIC_FETCH_AND_ADD:
+			case IB_WR_ATOMIC_CMP_AND_SWP:
 				/* Nothing to be done - the SG list will be unmapped
 				 * when the SEND completes. */
 				break;
@@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
 
 				rm = rds_send_get_message(conn, send->s_op);
 				if (rm) {
-					if (rm->rdma.m_rdma_op.r_active)
-						rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
+					rds_ib_send_unmap_rm(ic, send, wc.status);
 					rds_ib_send_rdma_complete(rm, wc.status);
 					rds_message_put(rm);
 				}
@@ -736,6 +761,89 @@ out:
 	return ret;
 }
 
+/*
+ * Issue atomic operation.
+ * A simplified version of the rdma case, we always map 1 SG, and
+ * only 8 bytes, for the return value from the atomic operation.
+ */
+int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
+{
+	struct rds_ib_connection *ic = conn->c_transport_data;
+	struct rds_ib_send_work *send = NULL;
+	struct ib_send_wr *failed_wr;
+	struct rds_ib_device *rds_ibdev;
+	u32 pos;
+	u32 work_alloc;
+	int ret;
+
+	rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
+
+	work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
+	if (work_alloc != 1) {
+		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
+		rds_ib_stats_inc(s_ib_tx_ring_full);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* address of send request in ring */
+	send = &ic->i_sends[pos];
+	send->s_queued = jiffies;
+
+	if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
+		send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP;
+		send->s_wr.wr.atomic.compare_add = op->op_compare;
+		send->s_wr.wr.atomic.swap = op->op_swap_add;
+	} else { /* FADD */
+		send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD;
+		send->s_wr.wr.atomic.compare_add = op->op_swap_add;
+		send->s_wr.wr.atomic.swap = 0;
+	}
+	send->s_wr.send_flags = IB_SEND_SIGNALED;
+	send->s_wr.num_sge = 1;
+	send->s_wr.next = NULL;
+	send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
+	send->s_wr.wr.atomic.rkey = op->op_rkey;
+
+	/* map 8 byte retval buffer to the device */
+	ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
+	rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
+	if (ret != 1) {
+		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
+		rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
+		ret = -ENOMEM; /* XXX ? */
+		goto out;
+	}
+
+	/* Convert our struct scatterlist to struct ib_sge */
+	send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
+	send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
+	send->s_sge[0].lkey = ic->i_mr->lkey;
+
+	rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
+		 send->s_sge[0].addr, send->s_sge[0].length);
+
+	failed_wr = &send->s_wr;
+	ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+	rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
+		 send, &send->s_wr, ret, failed_wr);
+	BUG_ON(failed_wr != &send->s_wr);
+	if (ret) {
+		printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
+		       "returned %d\n", &conn->c_faddr, ret);
+		rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
+		goto out;
+	}
+
+	if (unlikely(failed_wr != &send->s_wr)) {
+		printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
+		BUG_ON(failed_wr != &send->s_wr);
+	}
+
+out:
+	return ret;
+}
+
 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4fda33045598..a7019df38c70 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
 
 	return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr);
 }
+
+/*
+ * Fill in rds_message for an atomic request.
+ */
+int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
+		    struct cmsghdr *cmsg)
+{
+	struct page *page = NULL;
+	struct rds_atomic_args *args;
+	int ret = 0;
+
+	if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
+	 || rm->atomic.op_active)
+		return -EINVAL;
+
+	args = CMSG_DATA(cmsg);
+
+	if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) {
+		rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
+		rm->atomic.op_swap_add = args->cswp.swap;
+		rm->atomic.op_compare = args->cswp.compare;
+	} else {
+		rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
+		rm->atomic.op_swap_add = args->fadd.add;
+	}
+
+	rm->m_rdma_cookie = args->cookie;
+	rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
+	rm->atomic.op_recverr = rs->rs_recverr;
+	rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
+
+	/* verify 8 byte-aligned */
+	if (args->local_addr & 0x7) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	ret = rds_pin_pages(args->local_addr, 1, &page, 1);
+	if (ret != 1)
+		goto err;
+	ret = 0;
+
+	sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
+
+	if (rm->atomic.op_notify || rm->atomic.op_recverr) {
+		/* We allocate an uninitialized notifier here, because
+		 * we don't want to do that in the completion handler. We
+		 * would have to use GFP_ATOMIC there, and don't want to deal
+		 * with failed allocations.
+		 */
+		rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
+		if (!rm->atomic.op_notifier) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		rm->atomic.op_notifier->n_user_token = args->user_token;
+		rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
+	}
+
+	rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie);
+	rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
+
+	rm->atomic.op_active = 1;
+
+	return ret;
+err:
+	if (page)
+		put_page(page);
+	kfree(rm->atomic.op_notifier);
+
+	return ret;
+}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0bb4957e0cfc..830e2bbb3332 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -97,6 +97,7 @@ struct rds_connection {
 	unsigned int		c_xmit_hdr_off;
 	unsigned int		c_xmit_data_off;
 	unsigned int		c_xmit_rdma_sent;
+	unsigned int		c_xmit_atomic_sent;
 
 	spinlock_t		c_lock;		/* protect msg queues */
 	u64			c_next_tx_seq;
@@ -260,6 +261,10 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 	return cookie >> 32;
 }
 
+/* atomic operation types */
+#define RDS_ATOMIC_TYPE_CSWP		0
+#define RDS_ATOMIC_TYPE_FADD		1
+
 /*
  * m_sock_item and m_conn_item are on lists that are serialized under
  * conn->c_lock.  m_sock_item has additional meaning in that once it is empty
@@ -315,11 +320,27 @@ struct rds_message {
 	struct rds_sock		*m_rs;
 	rds_rdma_cookie_t	m_rdma_cookie;
 	struct {
-		struct {
+		struct rm_atomic_op {
+			int			op_type;
+			uint64_t		op_swap_add;
+			uint64_t		op_compare;
+
+			u32			op_rkey;
+			u64			op_remote_addr;
+			unsigned int		op_notify:1;
+			unsigned int		op_recverr:1;
+			unsigned int		op_mapped:1;
+			unsigned int		op_active:1;
+			struct rds_notifier	*op_notifier;
+			struct scatterlist	*op_sg;
+
+			struct rds_mr		*op_rdma_mr;
+		} atomic;
+		struct rm_rdma_op {
 			struct rds_rdma_op	m_rdma_op;
 			struct rds_mr		*m_rdma_mr;
 		} rdma;
-		struct {
+		struct rm_data_op {
 			unsigned int		m_nents;
 			unsigned int		m_count;
 			struct scatterlist	*m_sg;
@@ -397,6 +418,7 @@ struct rds_transport {
 	int (*xmit_cong_map)(struct rds_connection *conn,
 			     struct rds_cong_map *map, unsigned long offset);
 	int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
+	int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
 	int (*recv)(struct rds_connection *conn);
 	int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
 				size_t size);
@@ -546,6 +568,8 @@ struct rds_statistics {
 	uint64_t	s_cong_update_received;
 	uint64_t	s_cong_send_error;
 	uint64_t	s_cong_send_blocked;
+	uint64_t	s_atomic_cswp;
+	uint64_t	s_atomic_fadd;
 };
 
 /* af_rds.c */
@@ -722,7 +746,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
 			  struct cmsghdr *cmsg);
 void rds_rdma_free_op(struct rds_rdma_op *ro);
-void rds_rdma_send_complete(struct rds_message *rm, int);
+void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
+void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
+int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
+		    struct cmsghdr *cmsg);
 
 extern void __rds_put_mr_final(struct rds_mr *mr);
 static inline void rds_mr_put(struct rds_mr *mr)
diff --git a/net/rds/send.c b/net/rds/send.c
index b751a8e77c41..f3f4e79274bf 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -73,6 +73,7 @@ void rds_send_reset(struct rds_connection *conn)
 	conn->c_xmit_hdr_off = 0;
 	conn->c_xmit_data_off = 0;
 	conn->c_xmit_rdma_sent = 0;
+	conn->c_xmit_atomic_sent = 0;
 
 	conn->c_map_queued = 0;
 
@@ -171,6 +172,7 @@ int rds_send_xmit(struct rds_connection *conn)
 			conn->c_xmit_hdr_off = 0;
 			conn->c_xmit_data_off = 0;
 			conn->c_xmit_rdma_sent = 0;
+			conn->c_xmit_atomic_sent = 0;
 
 			/* Release the reference to the previous message. */
 			rds_message_put(rm);
@@ -262,6 +264,17 @@ int rds_send_xmit(struct rds_connection *conn)
 			conn->c_xmit_rm = rm;
 		}
 
+
+		if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
+			ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
+			if (ret)
+				break;
+			conn->c_xmit_atomic_sent = 1;
+			/* The transport owns the mapped memory for now.
+			 * You can't unmap it while it's on the send queue */
+			set_bit(RDS_MSG_MAPPED, &rm->m_flags);
+		}
+
 		/*
 		 * Try and send an rdma message.  Let's see if we can
 		 * keep this simple and require that the transport either
@@ -442,6 +455,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
 }
 EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
 
+/*
+ * Just like above, except looks at atomic op
+ */
+void rds_atomic_send_complete(struct rds_message *rm, int status)
+{
+	struct rds_sock *rs = NULL;
+	struct rm_atomic_op *ao;
+	struct rds_notifier *notifier;
+
+	spin_lock(&rm->m_rs_lock);
+
+	ao = &rm->atomic;
+	if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
+	    && ao->op_active && ao->op_notify && ao->op_notifier) {
+		notifier = ao->op_notifier;
+		rs = rm->m_rs;
+		sock_hold(rds_rs_to_sk(rs));
+
+		notifier->n_status = status;
+		spin_lock(&rs->rs_lock);
+		list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
+		spin_unlock(&rs->rs_lock);
+
+		ao->op_notifier = NULL;
+	}
+
+	spin_unlock(&rm->m_rs_lock);
+
+	if (rs) {
+		rds_wake_sk_sleep(rs);
+		sock_put(rds_rs_to_sk(rs));
+	}
+}
+EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
+
 /*
  * This is the same as rds_rdma_send_complete except we
  * don't do any locking - we have all the ingredients (message,
@@ -788,6 +836,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 			/* these are valid but do no add any size */
 			break;
 
+		case RDS_CMSG_ATOMIC_CSWP:
+		case RDS_CMSG_ATOMIC_FADD:
+			size += sizeof(struct scatterlist);
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -813,7 +866,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			continue;
 
 		/* As a side effect, RDMA_DEST and RDMA_MAP will set
-		 * rm->m_rdma_cookie and rm->m_rdma_mr.
+		 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
 		 */
 		switch (cmsg->cmsg_type) {
 		case RDS_CMSG_RDMA_ARGS:
@@ -829,6 +882,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			if (!ret)
 				*allocated_mr = 1;
 			break;
+		case RDS_CMSG_ATOMIC_CSWP:
+		case RDS_CMSG_ATOMIC_FADD:
+			ret = rds_cmsg_atomic(rs, rm, cmsg);
+			break;
 
 		default:
 			return -EINVAL;
@@ -926,10 +983,18 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		goto out;
 
 	if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) &&
-	    !conn->c_trans->xmit_rdma) {
+	       !conn->c_trans->xmit_rdma) {
 		if (printk_ratelimit())
 			printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
-				&rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
+			       &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
+		if (printk_ratelimit())
+			printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
+			       &rm->atomic, conn->c_trans->xmit_atomic);
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb1..c66d95d9c262 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -75,6 +75,8 @@ static const char *const rds_stat_names[] = {
 	"cong_update_received",
 	"cong_send_error",
 	"cong_send_blocked",
+	"s_atomic_cswp",
+	"s_atomic_fadd",
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,
-- 
cgit v1.2.3


From 2c3a5f9abb1dc5efdab8ba9a568b1661c65fd1e3 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Mon, 1 Mar 2010 16:10:40 -0800
Subject: RDS: Add flag for silent ops. Do atomic op before RDMA

Add a flag to the API so users can indicate they want
silent operations. This is needed because silent ops
cannot be used with USE_ONCE MRs, so we can't just
assume silent.

Also, change send_xmit to do atomic op before rdma op if
both are present, and centralize the hairy logic to determine if
we want to attempt silent, or not.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/rds.h |  1 +
 net/rds/rdma.c      |  2 ++
 net/rds/rds.h       |  2 ++
 net/rds/send.c      | 55 ++++++++++++++++++++++++++++++-----------------------
 4 files changed, 36 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 9239152abf7a..109f1d343318 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -276,5 +276,6 @@ struct rds_rdma_notify {
 #define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
 #define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
 #define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
+#define RDS_RDMA_SILENT		0x0040	/* Do not interrupt remote */
 
 #endif /* IB_RDS_H */
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 5ba514684431..48781fe4431c 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -559,6 +559,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 	op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
 	op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
 	op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
+	op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
 	op->op_active = 1;
 	op->op_recverr = rs->rs_recverr;
 	WARN_ON(!nr_pages);
@@ -747,6 +748,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 	}
 
 	rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
+	rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
 	rm->atomic.op_active = 1;
 	rm->atomic.op_recverr = rs->rs_recverr;
 	rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 46d190d08549..23b921000e74 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -319,6 +319,7 @@ struct rds_message {
 			unsigned int		op_notify:1;
 			unsigned int		op_recverr:1;
 			unsigned int		op_mapped:1;
+			unsigned int		op_silent:1;
 			unsigned int		op_active:1;
 			struct scatterlist	*op_sg;
 			struct rds_notifier	*op_notifier;
@@ -333,6 +334,7 @@ struct rds_message {
 			unsigned int		op_notify:1;
 			unsigned int		op_recverr:1;
 			unsigned int		op_mapped:1;
+			unsigned int		op_silent:1;
 			unsigned int		op_active:1;
 			unsigned int		op_bytes;
 			unsigned int		op_nents;
diff --git a/net/rds/send.c b/net/rds/send.c
index cdca9747fcbc..38567f3ee7e8 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -250,42 +250,50 @@ int rds_send_xmit(struct rds_connection *conn)
 			conn->c_xmit_rm = rm;
 		}
 
-		if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
-			ret = conn->c_trans->xmit_atomic(conn, rm);
+		/* The transport either sends the whole rdma or none of it */
+		if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
+			ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
 			if (ret)
 				break;
-			conn->c_xmit_atomic_sent = 1;
+			conn->c_xmit_rdma_sent = 1;
+
 			/* The transport owns the mapped memory for now.
 			 * You can't unmap it while it's on the send queue */
 			set_bit(RDS_MSG_MAPPED, &rm->m_flags);
-
-			/*
-			 * This is evil, muahaha.
-			 * We permit 0-byte sends. (rds-ping depends on this.)
-			 * BUT if there is an atomic op and no sent data,
-			 * we turn off sending the header, to achieve
-			 * "silent" atomics.
-			 * But see below; RDMA op might toggle this back on!
-			 */
-			if (rm->data.op_nents == 0)
-				rm->data.op_active = 0;
 		}
 
-		/* The transport either sends the whole rdma or none of it */
-		if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
-			ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
+		if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
+			ret = conn->c_trans->xmit_atomic(conn, rm);
 			if (ret)
 				break;
-			conn->c_xmit_rdma_sent = 1;
-
-			/* rdmas need data sent, even if just the header */
-			rm->data.op_active = 1;
-
+			conn->c_xmit_atomic_sent = 1;
 			/* The transport owns the mapped memory for now.
 			 * You can't unmap it while it's on the send queue */
 			set_bit(RDS_MSG_MAPPED, &rm->m_flags);
 		}
 
+		/*
+		 * A number of cases require an RDS header to be sent
+		 * even if there is no data.
+		 * We permit 0-byte sends; rds-ping depends on this.
+		 * However, if there are exclusively attached silent ops,
+		 * we skip the hdr/data send, to enable silent operation.
+		 */
+		if (rm->data.op_nents == 0) {
+			int ops_present;
+			int all_ops_are_silent = 1;
+
+			ops_present = (rm->atomic.op_active || rm->rdma.op_active);
+			if (rm->atomic.op_active && !rm->atomic.op_silent)
+				all_ops_are_silent = 0;
+			if (rm->rdma.op_active && !rm->rdma.op_silent)
+				all_ops_are_silent = 0;
+
+			if (ops_present && all_ops_are_silent
+			    && !rm->m_rdma_cookie)
+				rm->data.op_active = 0;
+		}
+
 		if (rm->data.op_active && !conn->c_xmit_data_sent) {
 			ret = conn->c_trans->xmit(conn, rm,
 						  conn->c_xmit_hdr_off,
@@ -1009,8 +1017,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	if (ret)
 		goto out;
 
-	if ((rm->m_rdma_cookie || rm->rdma.op_active) &&
-	    !conn->c_trans->xmit_rdma) {
+	if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
 		if (printk_ratelimit())
 			printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
 			       &rm->rdma, conn->c_trans->xmit_rdma);
-- 
cgit v1.2.3


From 20c72bd5f5f902e5a8745d51573699605bf8d21c Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Wed, 25 Aug 2010 05:51:28 -0700
Subject: RDS: Implement masked atomic operations

Add two CMSGs for masked versions of cswp and fadd. args
struct modified to use a union for different atomic op type's
arguments. Change IB to do masked atomic ops. Atomic op type
in rds_message similarly unionized.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/rds.h | 12 ++++++++++++
 net/rds/ib_send.c   | 14 +++++++++-----
 net/rds/rdma.c      | 33 +++++++++++++++++++++++++++------
 net/rds/rds.h       | 14 ++++++++++++--
 net/rds/send.c      |  4 ++++
 5 files changed, 64 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 109f1d343318..a2a5edb4a276 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -75,6 +75,8 @@
 #define RDS_CMSG_CONG_UPDATE		5
 #define RDS_CMSG_ATOMIC_FADD		6
 #define RDS_CMSG_ATOMIC_CSWP		7
+#define RDS_CMSG_MASKED_ATOMIC_FADD	8
+#define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -251,6 +253,16 @@ struct rds_atomic_args {
 		struct {
 			uint64_t	add;
 		} fadd;
+		struct {
+			uint64_t	compare;
+			uint64_t	swap;
+			uint64_t	compare_mask;
+			uint64_t	swap_mask;
+		} m_cswp;
+		struct {
+			uint64_t	add;
+			uint64_t	nocarry_mask;
+		} m_fadd;
 	};
 	uint64_t	flags;
 	uint64_t	user_token;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 808544aebb70..71f373c421bc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -807,13 +807,17 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 	send->s_queued = jiffies;
 
 	if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
-		send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP;
-		send->s_wr.wr.atomic.compare_add = op->op_compare;
-		send->s_wr.wr.atomic.swap = op->op_swap_add;
+		send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+		send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
+		send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
+		send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
+		send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
 	} else { /* FADD */
-		send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD;
-		send->s_wr.wr.atomic.compare_add = op->op_swap_add;
+		send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+		send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
 		send->s_wr.wr.atomic.swap = 0;
+		send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
+		send->s_wr.wr.atomic.swap_mask = 0;
 	}
 	nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
 	send->s_wr.num_sge = 1;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 48781fe4431c..48064673fc76 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -738,13 +738,34 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 
 	args = CMSG_DATA(cmsg);
 
-	if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) {
-		rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
-		rm->atomic.op_swap_add = args->cswp.swap;
-		rm->atomic.op_compare = args->cswp.compare;
-	} else {
+	/* Nonmasked & masked cmsg ops converted to masked hw ops */
+	switch (cmsg->cmsg_type) {
+	case RDS_CMSG_ATOMIC_FADD:
+		rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
+		rm->atomic.op_m_fadd.add = args->fadd.add;
+		rm->atomic.op_m_fadd.nocarry_mask = 0;
+		break;
+	case RDS_CMSG_MASKED_ATOMIC_FADD:
 		rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
-		rm->atomic.op_swap_add = args->fadd.add;
+		rm->atomic.op_m_fadd.add = args->m_fadd.add;
+		rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
+		break;
+	case RDS_CMSG_ATOMIC_CSWP:
+		rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
+		rm->atomic.op_m_cswp.compare = args->cswp.compare;
+		rm->atomic.op_m_cswp.swap = args->cswp.swap;
+		rm->atomic.op_m_cswp.compare_mask = ~0;
+		rm->atomic.op_m_cswp.swap_mask = ~0;
+		break;
+	case RDS_CMSG_MASKED_ATOMIC_CSWP:
+		rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
+		rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
+		rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
+		rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
+		rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
+		break;
+	default:
+		BUG(); /* should never happen */
 	}
 
 	rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index aadaddba88a7..8103dcf8b976 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -316,8 +316,18 @@ struct rds_message {
 	struct {
 		struct rm_atomic_op {
 			int			op_type;
-			uint64_t		op_swap_add;
-			uint64_t		op_compare;
+			union {
+				struct {
+					uint64_t	compare;
+					uint64_t	swap;
+					uint64_t	compare_mask;
+					uint64_t	swap_mask;
+				} op_m_cswp;
+				struct {
+					uint64_t	add;
+					uint64_t	nocarry_mask;
+				} op_m_fadd;
+			};
 
 			u32			op_rkey;
 			u64			op_remote_addr;
diff --git a/net/rds/send.c b/net/rds/send.c
index 81471b25373b..9b951a0ab6b7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -843,6 +843,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 		case RDS_CMSG_ATOMIC_CSWP:
 		case RDS_CMSG_ATOMIC_FADD:
+		case RDS_CMSG_MASKED_ATOMIC_CSWP:
+		case RDS_CMSG_MASKED_ATOMIC_FADD:
 			cmsg_groups |= 1;
 			size += sizeof(struct scatterlist);
 			break;
@@ -894,6 +896,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			break;
 		case RDS_CMSG_ATOMIC_CSWP:
 		case RDS_CMSG_ATOMIC_FADD:
+		case RDS_CMSG_MASKED_ATOMIC_CSWP:
+		case RDS_CMSG_MASKED_ATOMIC_FADD:
 			ret = rds_cmsg_atomic(rs, rm, cmsg);
 			break;
 
-- 
cgit v1.2.3


From fd128dfa50cfc4f2959dc4aa5d7468d33b988332 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Wed, 25 Aug 2010 09:32:17 -0700
Subject: RDS: Add rds.h to exported headers list

Also, a number of changes were made based on the assumption that
rds.h wasn't exported, so roll these back.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/Kbuild |  1 +
 include/linux/rds.h  | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429ff..c7fbf298ad68 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -302,6 +302,7 @@ header-y += quota.h
 header-y += radeonfb.h
 header-y += random.h
 header-y += raw.h
+header-y += rds.h
 header-y += reboot.h
 header-y += reiserfs_fs.h
 header-y += reiserfs_xattr.h
diff --git a/include/linux/rds.h b/include/linux/rds.h
index a2a5edb4a276..f371f885a352 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -95,7 +95,7 @@
 struct rds_info_counter {
 	u_int8_t	name[32];
 	u_int64_t	value;
-} __packed;
+} __attribute__((packed));
 
 #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
 #define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
@@ -110,7 +110,7 @@ struct rds_info_connection {
 	__be32		faddr;
 	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
 	u_int8_t	flags;
-} __packed;
+} __attribute__((packed));
 
 struct rds_info_flow {
 	__be32		laddr;
@@ -118,7 +118,7 @@ struct rds_info_flow {
 	u_int32_t	bytes;
 	__be16		lport;
 	__be16		fport;
-} __packed;
+} __attribute__((packed));
 
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
@@ -131,7 +131,7 @@ struct rds_info_message {
 	__be16		lport;
 	__be16		fport;
 	u_int8_t	flags;
-} __packed;
+} __attribute__((packed));
 
 struct rds_info_socket {
 	u_int32_t	sndbuf;
@@ -141,7 +141,7 @@ struct rds_info_socket {
 	__be16		connected_port;
 	u_int32_t	rcvbuf;
 	u_int64_t	inum;
-} __packed;
+} __attribute__((packed));
 
 struct rds_info_tcp_socket {
 	__be32          local_addr;
@@ -153,7 +153,7 @@ struct rds_info_tcp_socket {
 	u_int32_t       last_sent_nxt;
 	u_int32_t       last_expected_una;
 	u_int32_t       last_seen_una;
-} __packed;
+} __attribute__((packed));
 
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
-- 
cgit v1.2.3


From a46f561b774d90d8616473d56696e7d44fa1c9f1 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Wed, 25 Aug 2010 09:34:10 -0700
Subject: RDS: rds.h: Replace u_int[size]_t with uint[size]_t

Replace e.g. u_int32_t types with the more common uint32_t.

Reported-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/rds.h | 58 ++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index f371f885a352..3576b31b6b7b 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -93,8 +93,8 @@
 #define RDS_INFO_LAST			10010
 
 struct rds_info_counter {
-	u_int8_t	name[32];
-	u_int64_t	value;
+	uint8_t	name[32];
+	uint64_t	value;
 } __attribute__((packed));
 
 #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
@@ -104,18 +104,18 @@ struct rds_info_counter {
 #define TRANSNAMSIZ	16
 
 struct rds_info_connection {
-	u_int64_t	next_tx_seq;
-	u_int64_t	next_rx_seq;
+	uint64_t	next_tx_seq;
+	uint64_t	next_rx_seq;
 	__be32		laddr;
 	__be32		faddr;
-	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
-	u_int8_t	flags;
+	uint8_t	transport[TRANSNAMSIZ];		/* null term ascii */
+	uint8_t	flags;
 } __attribute__((packed));
 
 struct rds_info_flow {
 	__be32		laddr;
 	__be32		faddr;
-	u_int32_t	bytes;
+	uint32_t	bytes;
 	__be16		lport;
 	__be16		fport;
 } __attribute__((packed));
@@ -124,23 +124,23 @@ struct rds_info_flow {
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
 
 struct rds_info_message {
-	u_int64_t	seq;
-	u_int32_t	len;
+	uint64_t	seq;
+	uint32_t	len;
 	__be32		laddr;
 	__be32		faddr;
 	__be16		lport;
 	__be16		fport;
-	u_int8_t	flags;
+	uint8_t	flags;
 } __attribute__((packed));
 
 struct rds_info_socket {
-	u_int32_t	sndbuf;
+	uint32_t	sndbuf;
 	__be32		bound_addr;
 	__be32		connected_addr;
 	__be16		bound_port;
 	__be16		connected_port;
-	u_int32_t	rcvbuf;
-	u_int64_t	inum;
+	uint32_t	rcvbuf;
+	uint64_t	inum;
 } __attribute__((packed));
 
 struct rds_info_tcp_socket {
@@ -148,11 +148,11 @@ struct rds_info_tcp_socket {
 	__be16          local_port;
 	__be32          peer_addr;
 	__be16          peer_port;
-	u_int64_t       hdr_rem;
-	u_int64_t       data_rem;
-	u_int32_t       last_sent_nxt;
-	u_int32_t       last_expected_una;
-	u_int32_t       last_seen_una;
+	uint64_t       hdr_rem;
+	uint64_t       data_rem;
+	uint32_t       last_sent_nxt;
+	uint32_t       last_expected_una;
+	uint32_t       last_seen_una;
 } __attribute__((packed));
 
 #define RDS_IB_GID_LEN	16
@@ -207,38 +207,38 @@ struct rds_info_rdma_connection {
  * (so that the application does not have to worry about
  * alignment).
  */
-typedef u_int64_t	rds_rdma_cookie_t;
+typedef uint64_t	rds_rdma_cookie_t;
 
 struct rds_iovec {
-	u_int64_t	addr;
-	u_int64_t	bytes;
+	uint64_t	addr;
+	uint64_t	bytes;
 };
 
 struct rds_get_mr_args {
 	struct rds_iovec vec;
-	u_int64_t	cookie_addr;
+	uint64_t	cookie_addr;
 	uint64_t	flags;
 };
 
 struct rds_get_mr_for_dest_args {
 	struct sockaddr_storage	dest_addr;
 	struct rds_iovec 	vec;
-	u_int64_t		cookie_addr;
+	uint64_t		cookie_addr;
 	uint64_t		flags;
 };
 
 struct rds_free_mr_args {
 	rds_rdma_cookie_t cookie;
-	u_int64_t	flags;
+	uint64_t	flags;
 };
 
 struct rds_rdma_args {
 	rds_rdma_cookie_t cookie;
 	struct rds_iovec remote_vec;
-	u_int64_t	local_vec_addr;
-	u_int64_t	nr_local;
-	u_int64_t	flags;
-	u_int64_t	user_token;
+	uint64_t	local_vec_addr;
+	uint64_t	nr_local;
+	uint64_t	flags;
+	uint64_t	user_token;
 };
 
 struct rds_atomic_args {
@@ -269,7 +269,7 @@ struct rds_atomic_args {
 };
 
 struct rds_rdma_notify {
-	u_int64_t	user_token;
+	uint64_t	user_token;
 	int32_t		status;
 };
 
-- 
cgit v1.2.3


From 905d64c89e2a9d71d0606904b7c3908633db6072 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Wed, 8 Sep 2010 18:03:54 -0700
Subject: RDS: Remove dead struct from rds.h

flows are an obsolete date type.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
---
 include/linux/rds.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index 3576b31b6b7b..91950950aa59 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -112,14 +112,6 @@ struct rds_info_connection {
 	uint8_t	flags;
 } __attribute__((packed));
 
-struct rds_info_flow {
-	__be32		laddr;
-	__be32		faddr;
-	uint32_t	bytes;
-	__be16		lport;
-	__be16		fport;
-} __attribute__((packed));
-
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
 
-- 
cgit v1.2.3


From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Sep 2010 05:08:44 +0000
Subject: udp: add rehash on connect()

commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).

Problem is that following sequence :

fd = socket(...)
connect(fd, &remote, ...)

not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)

Sequence is :
 - autobind() : choose a random local port, insert socket in hash tables
              [while local address is INADDR_ANY]
 - connect() : set remote address and port, change local address to IP
              given by a route lookup.

When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.

One solution to this problem is to rehash datagram socket if needed.

We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.

This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.

Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  |  1 +
 include/net/udp.h   |  1 +
 net/ipv4/datagram.c |  5 ++++-
 net/ipv4/udp.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/datagram.c |  7 ++++++-
 net/ipv6/udp.c      | 10 ++++++++++
 6 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ac53bfbdfe16..adab9dc58183 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -752,6 +752,7 @@ struct proto {
 	/* Keeping track of sk's, looking them up, and port selection methods. */
 	void			(*hash)(struct sock *sk);
 	void			(*unhash)(struct sock *sk);
+	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
 
 	/* Keeping track of sockets in use */
diff --git a/include/net/udp.h b/include/net/udp.h
index 7abdf305da50..a184d3496b13 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk)
 }
 
 extern void udp_lib_unhash(struct sock *sk);
+extern void udp_lib_rehash(struct sock *sk, u16 new_hash);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f0550941df7b..721a8a37b45c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 	if (!inet->inet_saddr)
 		inet->inet_saddr = rt->rt_src;	/* Update source address */
-	if (!inet->inet_rcv_saddr)
+	if (!inet->inet_rcv_saddr) {
 		inet->inet_rcv_saddr = rt->rt_src;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
+	}
 	inet->inet_daddr = rt->rt_dst;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32e0bef60d0a..fb23c2e63b52 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk)
 }
 EXPORT_SYMBOL(udp_lib_unhash);
 
+/*
+ * inet_rcv_saddr was changed, we must rehash secondary hash
+ */
+void udp_lib_rehash(struct sock *sk, u16 newhash)
+{
+	if (sk_hashed(sk)) {
+		struct udp_table *udptable = sk->sk_prot->h.udp_table;
+		struct udp_hslot *hslot, *hslot2, *nhslot2;
+
+		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+		nhslot2 = udp_hashslot2(udptable, newhash);
+		udp_sk(sk)->udp_portaddr_hash = newhash;
+		if (hslot2 != nhslot2) {
+			hslot = udp_hashslot(udptable, sock_net(sk),
+					     udp_sk(sk)->udp_port_hash);
+			/* we must lock primary chain too */
+			spin_lock_bh(&hslot->lock);
+
+			spin_lock(&hslot2->lock);
+			hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+			hslot2->count--;
+			spin_unlock(&hslot2->lock);
+
+			spin_lock(&nhslot2->lock);
+			hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+						 &nhslot2->head);
+			nhslot2->count++;
+			spin_unlock(&nhslot2->lock);
+
+			spin_unlock_bh(&hslot->lock);
+		}
+	}
+}
+EXPORT_SYMBOL(udp_lib_rehash);
+
+static void udp_v4_rehash(struct sock *sk)
+{
+	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_num);
+	udp_lib_rehash(sk, new_hash);
+}
+
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
@@ -1843,6 +1886,7 @@ struct proto udp_prot = {
 	.backlog_rcv	   = __udp_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v4_rehash,
 	.get_port	   = udp_v4_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7d929a22cbc2..ef371aa01ac5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -105,9 +105,12 @@ ipv4_connected:
 		if (ipv6_addr_any(&np->saddr))
 			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr))
+		if (ipv6_addr_any(&np->rcv_saddr)) {
 			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
 					       &np->rcv_saddr);
+			if (sk->sk_prot->rehash)
+				sk->sk_prot->rehash(sk);
+		}
 
 		goto out;
 	}
@@ -181,6 +184,8 @@ ipv4_connected:
 	if (ipv6_addr_any(&np->rcv_saddr)) {
 		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+		if (sk->sk_prot->rehash)
+			sk->sk_prot->rehash(sk);
 	}
 
 	ip6_dst_store(sk, dst,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1dd1affdead2..5acb3560ff15 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
 }
 
+static void udp_v6_rehash(struct sock *sk)
+{
+	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+					  &inet6_sk(sk)->rcv_saddr,
+					  inet_sk(sk)->inet_num);
+
+	udp_lib_rehash(sk, new_hash);
+}
+
 static inline int compute_score(struct sock *sk, struct net *net,
 				unsigned short hnum,
 				struct in6_addr *saddr, __be16 sport,
@@ -1447,6 +1456,7 @@ struct proto udpv6_prot = {
 	.backlog_rcv	   = udpv6_queue_rcv_skb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
+	.rehash		   = udp_v6_rehash,
 	.get_port	   = udp_v6_get_port,
 	.memory_allocated  = &udp_memory_allocated,
 	.sysctl_mem	   = sysctl_udp_mem,
-- 
cgit v1.2.3


From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Fri, 20 Aug 2010 16:49:43 +0800
Subject: dquot: do full inode dirty in allocating space

Alex Shi found a regression when doing ffsb test. The test has several threads,
and each thread creates a small file, write to it and then delete it. ffsb
reports about 20% regression and Alex bisected it to 43d2932d88e4. The test
will call __mark_inode_dirty 3 times. without this commit, we only take
inode_lock one time, while with it, we take the lock 3 times with flags (
I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased
too much. Below proposed patch fixes it.

fs is allocating blocks, which usually means file writes and the inode
will be dirtied soon. We fully dirty the inode to reduce some inode_lock
contention in several calls of __mark_inode_dirty.

Jan Kara: Added comment.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quotaops.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d50ba858cfe0..d1a9193960f1 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 	int ret;
 
 	ret = dquot_alloc_space_nodirty(inode, nr);
-	if (!ret)
-		mark_inode_dirty_sync(inode);
+	if (!ret) {
+		/*
+		 * Mark inode fully dirty. Since we are allocating blocks, inode
+		 * would become fully dirty soon anyway and it reportedly
+		 * reduces inode_lock contention.
+		 */
+		mark_inode_dirty(inode);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.3


From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Tue, 31 Aug 2010 15:52:27 +0200
Subject: mm: Move vma_stack_continue into mm.h

So it can be used by all that need to check for that.

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 3 ++-
 include/linux/mm.h | 6 ++++++
 mm/mlock.c         | 6 ------
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 439fc1f1c1c4..271afc48b9a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
 	if (vma->vm_flags & VM_GROWSDOWN)
-		start += PAGE_SIZE;
+		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
+			start += PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e6b1210772ce..74949fbef8c6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -864,6 +864,12 @@ int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
+/* Is the vma a continuation of the stack vma above it? */
+static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+{
+	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+}
+
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
 		unsigned long new_addr, unsigned long len);
diff --git a/mm/mlock.c b/mm/mlock.c
index cbae7c5b9568..b70919ce4f72 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page)
 	}
 }
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	return (vma->vm_flags & VM_GROWSDOWN) &&
-- 
cgit v1.2.3


From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 8 Sep 2010 16:54:54 -0600
Subject: lglock: make lg_lock_global() actually lock globally

lg_lock_global() currently only acquires spinlocks for online CPUs, but
it's meant to lock all possible CPUs.  Lglock-protected resources may be
associated with removed CPUs - and, indeed, that could happen with the
per-superblock open files lists.

At Nick's suggestion, change for_each_online_cpu() to
for_each_possible_cpu() to protect accesses to those resources.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lglock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index b288cb713b90..f549056fb20b 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -150,7 +150,7 @@
 	int i;								\
 	preempt_disable();						\
 	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_possible_cpu(i) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_lock(lock);					\
@@ -161,7 +161,7 @@
  void name##_global_unlock(void) {					\
 	int i;								\
 	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_online_cpu(i) {					\
+	for_each_possible_cpu(i) {					\
 		arch_spinlock_t *lock;					\
 		lock = &per_cpu(name##_lock, i);			\
 		arch_spin_unlock(lock);					\
-- 
cgit v1.2.3


From 01a08546af311c065f34727787dd0cc8dc0c216f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 31 Aug 2010 10:28:16 +0200
Subject: sched: Add book scheduling domain

On top of the SMT and MC scheduling domains this adds the BOOK scheduling
domain. This is useful for NUMA like machines which do not have an interface
which tells which piece of memory is attached to which node or where the
hardware performs striping.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100831082844.253053798@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  1 +
 include/linux/topology.h |  6 ++++
 kernel/sched.c           | 77 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 82 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..b51c53c285b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -875,6 +875,7 @@ enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
 	SD_LV_MC,
+	SD_LV_BOOK,
 	SD_LV_CPU,
 	SD_LV_NODE,
 	SD_LV_ALLNODES,
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 64e084ff5e5c..b91a40e847d2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -201,6 +201,12 @@ int arch_update_cpu_topology(void);
 	.balance_interval	= 64,					\
 }
 
+#ifdef CONFIG_SCHED_BOOK
+#ifndef SD_BOOK_INIT
+#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
+#endif
+#endif /* CONFIG_SCHED_BOOK */
+
 #ifdef CONFIG_NUMA
 #ifndef SD_NODE_INIT
 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff --git a/kernel/sched.c b/kernel/sched.c
index 1a0c084b1cf9..26f83e2f1534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6506,6 +6506,7 @@ struct s_data {
 	cpumask_var_t		nodemask;
 	cpumask_var_t		this_sibling_map;
 	cpumask_var_t		this_core_map;
+	cpumask_var_t		this_book_map;
 	cpumask_var_t		send_covered;
 	cpumask_var_t		tmpmask;
 	struct sched_group	**sched_group_nodes;
@@ -6517,6 +6518,7 @@ enum s_alloc {
 	sa_rootdomain,
 	sa_tmpmask,
 	sa_send_covered,
+	sa_this_book_map,
 	sa_this_core_map,
 	sa_this_sibling_map,
 	sa_nodemask,
@@ -6570,6 +6572,31 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
 }
 #endif /* CONFIG_SCHED_MC */
 
+/*
+ * book sched-domains:
+ */
+#ifdef CONFIG_SCHED_BOOK
+static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
+
+static int
+cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
+{
+	int group = cpu;
+#ifdef CONFIG_SCHED_MC
+	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_SMT)
+	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#endif
+	if (sg)
+		*sg = &per_cpu(sched_group_book, group).sg;
+	return group;
+}
+#endif /* CONFIG_SCHED_BOOK */
+
 static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
@@ -6578,7 +6605,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
 		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_BOOK
+	cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_MC)
 	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
 	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
@@ -6839,6 +6869,9 @@ SD_INIT_FUNC(CPU)
 #ifdef CONFIG_SCHED_MC
  SD_INIT_FUNC(MC)
 #endif
+#ifdef CONFIG_SCHED_BOOK
+ SD_INIT_FUNC(BOOK)
+#endif
 
 static int default_relax_domain_level = -1;
 
@@ -6888,6 +6921,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 		free_cpumask_var(d->tmpmask); /* fall through */
 	case sa_send_covered:
 		free_cpumask_var(d->send_covered); /* fall through */
+	case sa_this_book_map:
+		free_cpumask_var(d->this_book_map); /* fall through */
 	case sa_this_core_map:
 		free_cpumask_var(d->this_core_map); /* fall through */
 	case sa_this_sibling_map:
@@ -6934,8 +6969,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 		return sa_nodemask;
 	if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
 		return sa_this_sibling_map;
-	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+	if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
 		return sa_this_core_map;
+	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+		return sa_this_book_map;
 	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
 		return sa_send_covered;
 	d->rd = alloc_rootdomain();
@@ -6993,6 +7030,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
 	return sd;
 }
 
+static struct sched_domain *__build_book_sched_domain(struct s_data *d,
+	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+	struct sched_domain *parent, int i)
+{
+	struct sched_domain *sd = parent;
+#ifdef CONFIG_SCHED_BOOK
+	sd = &per_cpu(book_domains, i).sd;
+	SD_INIT(sd, BOOK);
+	set_domain_attribute(sd, attr);
+	cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
+	sd->parent = parent;
+	parent->child = sd;
+	cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
+#endif
+	return sd;
+}
+
 static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
 	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
 	struct sched_domain *parent, int i)
@@ -7049,6 +7103,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
 						&cpu_to_core_group,
 						d->send_covered, d->tmpmask);
 		break;
+#endif
+#ifdef CONFIG_SCHED_BOOK
+	case SD_LV_BOOK: /* set up book groups */
+		cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
+		if (cpu == cpumask_first(d->this_book_map))
+			init_sched_build_groups(d->this_book_map, cpu_map,
+						&cpu_to_book_group,
+						d->send_covered, d->tmpmask);
+		break;
 #endif
 	case SD_LV_CPU: /* set up physical groups */
 		cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
@@ -7097,12 +7160,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 		sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
 		sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+		sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
 		sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
 		sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
 	}
 
 	for_each_cpu(i, cpu_map) {
 		build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+		build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
 		build_sched_groups(&d, SD_LV_MC, cpu_map, i);
 	}
 
@@ -7133,6 +7198,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		init_sched_groups_power(i, sd);
 	}
 #endif
+#ifdef CONFIG_SCHED_BOOK
+	for_each_cpu(i, cpu_map) {
+		sd = &per_cpu(book_domains, i).sd;
+		init_sched_groups_power(i, sd);
+	}
+#endif
 
 	for_each_cpu(i, cpu_map) {
 		sd = &per_cpu(phys_domains, i).sd;
@@ -7158,6 +7229,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		sd = &per_cpu(cpu_domains, i).sd;
 #elif defined(CONFIG_SCHED_MC)
 		sd = &per_cpu(core_domains, i).sd;
+#elif defined(CONFIG_SCHED_BOOK)
+		sd = &per_cpu(book_domains, i).sd;
 #else
 		sd = &per_cpu(phys_domains, i).sd;
 #endif
-- 
cgit v1.2.3


From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 13:35:57 +0200
Subject: perf: Deconstify struct pmu

sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"`

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c           |  4 ++--
 arch/arm/kernel/perf_event.c             |  2 +-
 arch/powerpc/kernel/perf_event.c         |  8 ++++----
 arch/powerpc/kernel/perf_event_fsl_emb.c |  2 +-
 arch/sh/kernel/perf_event.c              |  4 ++--
 arch/sparc/kernel/perf_event.c           | 10 +++++-----
 arch/x86/kernel/cpu/perf_event.c         | 14 +++++++-------
 include/linux/perf_event.h               | 10 +++++-----
 kernel/perf_event.c                      | 26 +++++++++++++-------------
 9 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 51c39fa41693..56fa41590381 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -642,7 +642,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	return 0;
 }
 
-static const struct pmu pmu = {
+static struct pmu pmu = {
 	.enable		= alpha_pmu_enable,
 	.disable	= alpha_pmu_disable,
 	.read		= alpha_pmu_read,
@@ -653,7 +653,7 @@ static const struct pmu pmu = {
 /*
  * Main entry point to initialise a HW performance event.
  */
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	int err;
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 64ca8c3ab94b..0671e92c5111 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -491,7 +491,7 @@ __hw_perf_event_init(struct perf_event *event)
 	return err;
 }
 
-const struct pmu *
+struct pmu *
 hw_perf_event_init(struct perf_event *event)
 {
 	int err = 0;
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..5f78681ad902 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -857,7 +857,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-void power_pmu_start_txn(const struct pmu *pmu)
+void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -870,7 +870,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-void power_pmu_cancel_txn(const struct pmu *pmu)
+void power_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -882,7 +882,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-int power_pmu_commit_txn(const struct pmu *pmu)
+int power_pmu_commit_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	long i, n;
@@ -1014,7 +1014,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	u64 ev;
 	unsigned long flags;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..d7619b5e7a6e 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -428,7 +428,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	u64 ev;
 	struct perf_event *events[MAX_HWEVENTS];
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..395572c94c6a 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -257,13 +257,13 @@ static void sh_pmu_read(struct perf_event *event)
 	sh_perf_event_update(event, &event->hw, event->hw.idx);
 }
 
-static const struct pmu pmu = {
+static struct pmu pmu = {
 	.enable		= sh_pmu_enable,
 	.disable	= sh_pmu_disable,
 	.read		= sh_pmu_read,
 };
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	int err = __hw_perf_event_init(event);
 	if (unlikely(err)) {
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 4bc402938575..481b894a5018 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1099,7 +1099,7 @@ static int __hw_perf_event_init(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void sparc_pmu_start_txn(const struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -1111,7 +1111,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-static void sparc_pmu_cancel_txn(const struct pmu *pmu)
+static void sparc_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -1123,7 +1123,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-static int sparc_pmu_commit_txn(const struct pmu *pmu)
+static int sparc_pmu_commit_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int n;
@@ -1142,7 +1142,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
 	return 0;
 }
 
-static const struct pmu pmu = {
+static struct pmu pmu = {
 	.enable		= sparc_pmu_enable,
 	.disable	= sparc_pmu_disable,
 	.read		= sparc_pmu_read,
@@ -1152,7 +1152,7 @@ static const struct pmu pmu = {
 	.commit_txn	= sparc_pmu_commit_txn,
 };
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	int err = __hw_perf_event_init(event);
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index de6569c04cd0..fdd97f2e9961 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -618,7 +618,7 @@ static void x86_pmu_enable_all(int added)
 	}
 }
 
-static const struct pmu pmu;
+static struct pmu pmu;
 
 static inline int is_x86_event(struct perf_event *event)
 {
@@ -1427,7 +1427,7 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -1440,7 +1440,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -1457,7 +1457,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int assign[X86_PMC_IDX_MAX];
@@ -1483,7 +1483,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
 	return 0;
 }
 
-static const struct pmu pmu = {
+static struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
 	.start		= x86_pmu_start,
@@ -1569,9 +1569,9 @@ out:
 	return ret;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
-	const struct pmu *tmp;
+	struct pmu *tmp;
 	int err;
 
 	err = __hw_perf_event_init(event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 000610c4de71..09d048b52115 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -578,19 +578,19 @@ struct pmu {
 	 * Start the transaction, after this ->enable() doesn't need
 	 * to do schedulability tests.
 	 */
-	void (*start_txn)	(const struct pmu *pmu);
+	void (*start_txn)	(struct pmu *pmu);
 	/*
 	 * If ->start_txn() disabled the ->enable() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
-	int  (*commit_txn)	(const struct pmu *pmu);
+	int  (*commit_txn)	(struct pmu *pmu);
 	/*
 	 * Will cancel the transaction, assumes ->disable() is called for
 	 * each successfull ->enable() during the transaction.
 	 */
-	void (*cancel_txn)	(const struct pmu *pmu);
+	void (*cancel_txn)	(struct pmu *pmu);
 };
 
 /**
@@ -669,7 +669,7 @@ struct perf_event {
 	int				nr_siblings;
 	int				group_flags;
 	struct perf_event		*group_leader;
-	const struct pmu		*pmu;
+	struct pmu		*pmu;
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
@@ -849,7 +849,7 @@ struct perf_output_handle {
  */
 extern int perf_max_events;
 
-extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+extern struct pmu *hw_perf_event_init(struct perf_event *event);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2d74f31220ad..fb46fd13f31f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -75,7 +75,7 @@ static DEFINE_SPINLOCK(perf_resource_lock);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
@@ -691,7 +691,7 @@ group_sched_in(struct perf_event *group_event,
 	       struct perf_event_context *ctx)
 {
 	struct perf_event *event, *partial_group = NULL;
-	const struct pmu *pmu = group_event->pmu;
+	struct pmu *pmu = group_event->pmu;
 	bool txn = false;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
@@ -4501,7 +4501,7 @@ static int perf_swevent_int(struct perf_event *event)
 	return 0;
 }
 
-static const struct pmu perf_ops_generic = {
+static struct pmu perf_ops_generic = {
 	.enable		= perf_swevent_enable,
 	.disable	= perf_swevent_disable,
 	.start		= perf_swevent_int,
@@ -4614,7 +4614,7 @@ static void cpu_clock_perf_event_read(struct perf_event *event)
 	cpu_clock_perf_event_update(event);
 }
 
-static const struct pmu perf_ops_cpu_clock = {
+static struct pmu perf_ops_cpu_clock = {
 	.enable		= cpu_clock_perf_event_enable,
 	.disable	= cpu_clock_perf_event_disable,
 	.read		= cpu_clock_perf_event_read,
@@ -4671,7 +4671,7 @@ static void task_clock_perf_event_read(struct perf_event *event)
 	task_clock_perf_event_update(event, time);
 }
 
-static const struct pmu perf_ops_task_clock = {
+static struct pmu perf_ops_task_clock = {
 	.enable		= task_clock_perf_event_enable,
 	.disable	= task_clock_perf_event_disable,
 	.read		= task_clock_perf_event_read,
@@ -4785,7 +4785,7 @@ static int swevent_hlist_get(struct perf_event *event)
 
 #ifdef CONFIG_EVENT_TRACING
 
-static const struct pmu perf_ops_tracepoint = {
+static struct pmu perf_ops_tracepoint = {
 	.enable		= perf_trace_enable,
 	.disable	= perf_trace_disable,
 	.start		= perf_swevent_int,
@@ -4849,7 +4849,7 @@ static void tp_perf_event_destroy(struct perf_event *event)
 	perf_trace_destroy(event);
 }
 
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	int err;
 
@@ -4896,7 +4896,7 @@ static void perf_event_free_filter(struct perf_event *event)
 
 #else
 
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
 {
 	return NULL;
 }
@@ -4918,7 +4918,7 @@ static void bp_perf_event_destroy(struct perf_event *event)
 	release_bp_slot(event);
 }
 
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
 	int err;
 
@@ -4942,7 +4942,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 		perf_swevent_add(bp, 1, 1, &sample, regs);
 }
 #else
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
 {
 	return NULL;
 }
@@ -4964,9 +4964,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
 	swevent_hlist_put(event);
 }
 
-static const struct pmu *sw_perf_event_init(struct perf_event *event)
+static struct pmu *sw_perf_event_init(struct perf_event *event)
 {
-	const struct pmu *pmu = NULL;
+	struct pmu *pmu = NULL;
 	u64 event_id = event->attr.config;
 
 	/*
@@ -5028,7 +5028,7 @@ perf_event_alloc(struct perf_event_attr *attr,
 		   perf_overflow_handler_t overflow_handler,
 		   gfp_t gfpflags)
 {
-	const struct pmu *pmu;
+	struct pmu *pmu;
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
 	long err;
-- 
cgit v1.2.3


From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 13:35:08 +0200
Subject: perf: Register PMU implementations

Simple registration interface for struct pmu, this provides the
infrastructure for removing all the weak functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c           |  37 +-
 arch/arm/kernel/perf_event.c             |  38 +-
 arch/powerpc/kernel/perf_event.c         |  46 +--
 arch/powerpc/kernel/perf_event_fsl_emb.c |  37 +-
 arch/sh/kernel/perf_event.c              |  35 +-
 arch/sparc/kernel/perf_event.c           |  29 +-
 arch/x86/kernel/cpu/perf_event.c         |  45 ++-
 include/linux/perf_event.h               |  10 +-
 kernel/hw_breakpoint.c                   |  35 +-
 kernel/perf_event.c                      | 588 +++++++++++++++----------------
 10 files changed, 488 insertions(+), 412 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 56fa41590381..19660b5c298f 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -642,34 +642,39 @@ static int __hw_perf_event_init(struct perf_event *event)
 	return 0;
 }
 
-static struct pmu pmu = {
-	.enable		= alpha_pmu_enable,
-	.disable	= alpha_pmu_disable,
-	.read		= alpha_pmu_read,
-	.unthrottle	= alpha_pmu_unthrottle,
-};
-
-
 /*
  * Main entry point to initialise a HW performance event.
  */
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int alpha_pmu_event_init(struct perf_event *event)
 {
 	int err;
 
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (!alpha_pmu)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 	/* Do the real initialisation work. */
 	err = __hw_perf_event_init(event);
 
-	if (err)
-		return ERR_PTR(err);
-
-	return &pmu;
+	return err;
 }
 
-
+static struct pmu pmu = {
+	.event_init	= alpha_pmu_event_init,
+	.enable		= alpha_pmu_enable,
+	.disable	= alpha_pmu_disable,
+	.read		= alpha_pmu_read,
+	.unthrottle	= alpha_pmu_unthrottle,
+};
 
 /*
  * Main entry point - enable HW performance counters.
@@ -838,5 +843,7 @@ void __init init_hw_perf_events(void)
 	/* And set up PMU specification */
 	alpha_pmu = &ev67_pmu;
 	perf_max_events = alpha_pmu->num_pmcs;
+
+	perf_pmu_register(&pmu);
 }
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 0671e92c5111..f62f9db35db3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -306,12 +306,7 @@ out:
 	return err;
 }
 
-static struct pmu pmu = {
-	.enable	    = armpmu_enable,
-	.disable    = armpmu_disable,
-	.unthrottle = armpmu_unthrottle,
-	.read	    = armpmu_read,
-};
+static struct pmu pmu;
 
 static int
 validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event)
 	return err;
 }
 
-struct pmu *
-hw_perf_event_init(struct perf_event *event)
+static int armpmu_event_init(struct perf_event *event)
 {
 	int err = 0;
 
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (!armpmu)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 	event->destroy = hw_perf_event_destroy;
 
 	if (!atomic_inc_not_zero(&active_events)) {
 		if (atomic_read(&active_events) > perf_max_events) {
 			atomic_dec(&active_events);
-			return ERR_PTR(-ENOSPC);
+			return -ENOSPC;
 		}
 
 		mutex_lock(&pmu_reserve_mutex);
@@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event)
 	}
 
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	err = __hw_perf_event_init(event);
 	if (err)
 		hw_perf_event_destroy(event);
 
-	return err ? ERR_PTR(err) : &pmu;
+	return err;
 }
 
+static struct pmu pmu = {
+	.event_init = armpmu_event_init,
+	.enable	    = armpmu_enable,
+	.disable    = armpmu_disable,
+	.unthrottle = armpmu_unthrottle,
+	.read	    = armpmu_read,
+};
+
 void
 hw_perf_enable(void)
 {
@@ -2994,6 +3006,8 @@ init_hw_perf_events(void)
 		perf_max_events = -1;
 	}
 
+	perf_pmu_register(&pmu);
+
 	return 0;
 }
 arch_initcall(init_hw_perf_events);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5f78681ad902..19131b2614b9 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
 	return 0;
 }
 
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-	.start_txn	= power_pmu_start_txn,
-	.cancel_txn	= power_pmu_cancel_txn,
-	.commit_txn	= power_pmu_commit_txn,
-};
-
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
@@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int power_pmu_event_init(struct perf_event *event)
 {
 	u64 ev;
 	unsigned long flags;
@@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	struct cpu_hw_events *cpuhw;
 
 	if (!ppmu)
-		return ERR_PTR(-ENXIO);
+		return -ENOENT;
+
 	switch (event->attr.type) {
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		break;
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 	case PERF_TYPE_RAW:
 		ev = event->attr.config;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
+
 	event->hw.config_base = ev;
 	event->hw.idx = 0;
 
@@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 			 */
 			ev = normal_pmc_alternative(ev, flags);
 			if (!ev)
-				return ERR_PTR(-EINVAL);
+				return -EINVAL;
 		}
 	}
 
@@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader, ppmu->n_counter - 1,
 				   ctrs, events, cflags);
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	events[n] = ev;
 	ctrs[n] = event;
 	cflags[n] = flags;
 	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	cpuhw = &get_cpu_var(cpu_hw_events);
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 	put_cpu_var(cpu_hw_events);
 	if (err)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	event->hw.config = events[n];
 	event->hw.event_base = cflags[n];
@@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
+	return err;
 }
 
+struct pmu power_pmu = {
+	.event_init	= power_pmu_event_init,
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+	.start_txn	= power_pmu_start_txn,
+	.cancel_txn	= power_pmu_cancel_txn,
+	.commit_txn	= power_pmu_commit_txn,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
+	perf_pmu_register(&power_pmu);
 	perf_cpu_notifier(power_pmu_notifier);
 
 	return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index d7619b5e7a6e..ea6a804e43fd 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	local_irq_restore(flags);
 }
 
-static struct pmu fsl_emb_pmu = {
-	.enable		= fsl_emb_pmu_enable,
-	.disable	= fsl_emb_pmu_disable,
-	.read		= fsl_emb_pmu_read,
-	.unthrottle	= fsl_emb_pmu_unthrottle,
-};
-
 /*
  * Release the PMU if this is the last perf_event.
  */
@@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int fsl_emb_pmu_event_init(struct perf_event *event)
 {
 	u64 ev;
 	struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		break;
 
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 
 	case PERF_TYPE_RAW:
@@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		break;
 
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
 
 	event->hw.config = ppmu->xlate_event(ev);
 	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	/*
 	 * If this is in a group, check if it can go on with all the
@@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader,
 		                   ppmu->n_counter - 1, events);
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		}
 
 		if (num_restricted >= ppmu->n_restricted)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 
 	event->hw.idx = -1;
@@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	if (event->attr.exclude_kernel)
 		event->hw.config_base |= PMLCA_FCS;
 	if (event->attr.exclude_idle)
-		return ERR_PTR(-ENOTSUPP);
+		return -ENOTSUPP;
 
 	event->hw.last_period = event->hw.sample_period;
 	local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		return ERR_PTR(err);
-	return &fsl_emb_pmu;
+	return err;
 }
 
+static struct pmu fsl_emb_pmu = {
+	.event_init	= fsl_emb_pmu_event_init,
+	.enable		= fsl_emb_pmu_enable,
+	.disable	= fsl_emb_pmu_disable,
+	.read		= fsl_emb_pmu_read,
+	.unthrottle	= fsl_emb_pmu_unthrottle,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 
+	perf_pmu_register(&fsl_emb_pmu);
+
 	return 0;
 }
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 395572c94c6a..8cb206597e0c 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event)
 	sh_perf_event_update(event, &event->hw, event->hw.idx);
 }
 
-static struct pmu pmu = {
-	.enable		= sh_pmu_enable,
-	.disable	= sh_pmu_disable,
-	.read		= sh_pmu_read,
-};
-
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int sh_pmu_event_init(struct perf_event *event)
 {
-	int err = __hw_perf_event_init(event);
+	int err;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HARDWARE:
+		err = __hw_perf_event_init(event);
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
 	if (unlikely(err)) {
 		if (event->destroy)
 			event->destroy(event);
-		return ERR_PTR(err);
 	}
 
-	return &pmu;
+	return err;
 }
 
+static struct pmu pmu = {
+	.event_init	= sh_pmu_event_init,
+	.enable		= sh_pmu_enable,
+	.disable	= sh_pmu_disable,
+	.read		= sh_pmu_read,
+};
+
 static void sh_pmu_setup(int cpu)
-{
+
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
 	memset(cpuhw, 0, sizeof(struct cpu_hw_events));
@@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
 
 	WARN_ON(pmu->num_events > MAX_HWEVENTS);
 
+	perf_pmu_register(&pmu);
 	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 481b894a5018..bed4327f5a7a 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1025,7 +1025,7 @@ out:
 	return ret;
 }
 
-static int __hw_perf_event_init(struct perf_event *event)
+static int sparc_pmu_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (atomic_read(&nmi_active) < 0)
 		return -ENODEV;
 
-	if (attr->type == PERF_TYPE_HARDWARE) {
+	switch (attr->type) {
+	case PERF_TYPE_HARDWARE:
 		if (attr->config >= sparc_pmu->max_events)
 			return -EINVAL;
 		pmap = sparc_pmu->event_map(attr->config);
-	} else if (attr->type == PERF_TYPE_HW_CACHE) {
+		break;
+
+	case PERF_TYPE_HW_CACHE:
 		pmap = sparc_map_cache_event(attr->config);
 		if (IS_ERR(pmap))
 			return PTR_ERR(pmap);
-	} else
+		break;
+
+	case PERF_TYPE_RAW:
 		return -EOPNOTSUPP;
 
+	default:
+		return -ENOENT;
+
+	}
+
 	/* We save the enable bits in the config_base.  */
 	hwc->config_base = sparc_pmu->irq_bit;
 	if (!attr->exclude_user)
@@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 }
 
 static struct pmu pmu = {
+	.event_init	= sparc_pmu_event_init,
 	.enable		= sparc_pmu_enable,
 	.disable	= sparc_pmu_disable,
 	.read		= sparc_pmu_read,
@@ -1152,15 +1163,6 @@ static struct pmu pmu = {
 	.commit_txn	= sparc_pmu_commit_txn,
 };
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
-{
-	int err = __hw_perf_event_init(event);
-
-	if (err)
-		return ERR_PTR(err);
-	return &pmu;
-}
-
 void perf_event_print_debug(void)
 {
 	unsigned long flags;
@@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void)
 	/* All sparc64 PMUs currently have 2 events.  */
 	perf_max_events = 2;
 
+	perf_pmu_register(&pmu);
 	register_die_notifier(&perf_event_nmi_notifier);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fdd97f2e9961..2c89264ee791 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
 /*
  * Setup the hardware configuration for a given attr_type
  */
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
 {
 	int err;
 
@@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void)
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+	perf_pmu_register(&pmu);
 	perf_cpu_notifier(x86_pmu_notifier);
 }
 
@@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	return 0;
 }
 
-static struct pmu pmu = {
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
-	.start		= x86_pmu_start,
-	.stop		= x86_pmu_stop,
-	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
-	.start_txn	= x86_pmu_start_txn,
-	.cancel_txn	= x86_pmu_cancel_txn,
-	.commit_txn	= x86_pmu_commit_txn,
-};
-
 /*
  * validate that we can schedule this event
  */
@@ -1569,12 +1558,22 @@ out:
 	return ret;
 }
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
 {
 	struct pmu *tmp;
 	int err;
 
-	err = __hw_perf_event_init(event);
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	err = __x86_pmu_event_init(event);
 	if (!err) {
 		/*
 		 * we temporarily connect event to its pmu
@@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	if (err) {
 		if (event->destroy)
 			event->destroy(event);
-		return ERR_PTR(err);
 	}
 
-	return &pmu;
+	return err;
 }
 
+static struct pmu pmu = {
+	.event_init	= x86_pmu_event_init,
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.start		= x86_pmu_start,
+	.stop		= x86_pmu_stop,
+	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
+	.start_txn	= x86_pmu_start_txn,
+	.cancel_txn	= x86_pmu_cancel_txn,
+	.commit_txn	= x86_pmu_commit_txn,
+};
+
 /*
  * callchain support
  */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 09d048b52115..ab72f56eb372 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -561,6 +561,13 @@ struct perf_event;
  * struct pmu - generic performance monitoring unit
  */
 struct pmu {
+	struct list_head		entry;
+
+	/*
+	 * Should return -ENOENT when the @event doesn't match this pmu
+	 */
+	int (*event_init)		(struct perf_event *event);
+
 	int (*enable)			(struct perf_event *event);
 	void (*disable)			(struct perf_event *event);
 	int (*start)			(struct perf_event *event);
@@ -849,7 +856,8 @@ struct perf_output_handle {
  */
 extern int perf_max_events;
 
-extern struct pmu *hw_perf_event_init(struct perf_event *event);
+extern int perf_pmu_register(struct pmu *pmu);
+extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index d71a987fd2bf..e9c5cfa1fd20 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
 	.priority = 0x7fffffff
 };
 
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+	release_bp_slot(event);
+}
+
+static int hw_breakpoint_event_init(struct perf_event *bp)
+{
+	int err;
+
+	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
+		return -ENOENT;
+
+	err = register_perf_hw_breakpoint(bp);
+	if (err)
+		return err;
+
+	bp->destroy = bp_perf_event_destroy;
+
+	return 0;
+}
+
+static struct pmu perf_breakpoint = {
+	.event_init	= hw_breakpoint_event_init,
+	.enable		= arch_install_hw_breakpoint,
+	.disable	= arch_uninstall_hw_breakpoint,
+	.read		= hw_breakpoint_pmu_read,
+};
+
 static int __init init_hw_breakpoint(void)
 {
 	unsigned int **task_bp_pinned;
@@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void)
 
 	constraints_initialized = 1;
 
+	perf_pmu_register(&perf_breakpoint);
+
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
  err_alloc:
@@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void)
 core_initcall(init_hw_breakpoint);
 
 
-struct pmu perf_ops_bp = {
-	.enable		= arch_install_hw_breakpoint,
-	.disable	= arch_uninstall_hw_breakpoint,
-	.read		= hw_breakpoint_pmu_read,
-};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fb46fd13f31f..288ce43de57c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,7 +31,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
-#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -72,14 +71,6 @@ static atomic64_t perf_event_id;
  */
 static DEFINE_SPINLOCK(perf_resource_lock);
 
-/*
- * Architecture provided APIs - weak aliases:
- */
-extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
-{
-	return NULL;
-}
-
 void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
@@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event)
 	return 0;
 }
 
-static struct pmu perf_ops_generic = {
-	.enable		= perf_swevent_enable,
-	.disable	= perf_swevent_disable,
-	.start		= perf_swevent_int,
-	.stop		= perf_swevent_void,
-	.read		= perf_swevent_read,
-	.unthrottle	= perf_swevent_void, /* hwc->interrupts already reset */
-};
-
-/*
- * hrtimer based swevent callback
- */
-
-static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
-{
-	enum hrtimer_restart ret = HRTIMER_RESTART;
-	struct perf_sample_data data;
-	struct pt_regs *regs;
-	struct perf_event *event;
-	u64 period;
-
-	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
-	event->pmu->read(event);
-
-	perf_sample_data_init(&data, 0);
-	data.period = event->hw.last_period;
-	regs = get_irq_regs();
-
-	if (regs && !perf_exclude_event(event, regs)) {
-		if (!(event->attr.exclude_idle && current->pid == 0))
-			if (perf_event_overflow(event, 0, &data, regs))
-				ret = HRTIMER_NORESTART;
-	}
-
-	period = max_t(u64, 10000, event->hw.sample_period);
-	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
-
-	return ret;
-}
-
-static void perf_swevent_start_hrtimer(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swevent_hrtimer;
-	if (hwc->sample_period) {
-		u64 period;
-
-		if (hwc->remaining) {
-			if (hwc->remaining < 0)
-				period = 10000;
-			else
-				period = hwc->remaining;
-			hwc->remaining = 0;
-		} else {
-			period = max_t(u64, 10000, hwc->sample_period);
-		}
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-}
-
-static void perf_swevent_cancel_hrtimer(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (hwc->sample_period) {
-		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
-		hwc->remaining = ktime_to_ns(remaining);
-
-		hrtimer_cancel(&hwc->hrtimer);
-	}
-}
-
-/*
- * Software event: cpu wall time clock
- */
-
-static void cpu_clock_perf_event_update(struct perf_event *event)
-{
-	int cpu = raw_smp_processor_id();
-	s64 prev;
-	u64 now;
-
-	now = cpu_clock(cpu);
-	prev = local64_xchg(&event->hw.prev_count, now);
-	local64_add(now - prev, &event->count);
-}
-
-static int cpu_clock_perf_event_enable(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	int cpu = raw_smp_processor_id();
-
-	local64_set(&hwc->prev_count, cpu_clock(cpu));
-	perf_swevent_start_hrtimer(event);
-
-	return 0;
-}
-
-static void cpu_clock_perf_event_disable(struct perf_event *event)
-{
-	perf_swevent_cancel_hrtimer(event);
-	cpu_clock_perf_event_update(event);
-}
-
-static void cpu_clock_perf_event_read(struct perf_event *event)
-{
-	cpu_clock_perf_event_update(event);
-}
-
-static struct pmu perf_ops_cpu_clock = {
-	.enable		= cpu_clock_perf_event_enable,
-	.disable	= cpu_clock_perf_event_disable,
-	.read		= cpu_clock_perf_event_read,
-};
-
-/*
- * Software event: task time clock
- */
-
-static void task_clock_perf_event_update(struct perf_event *event, u64 now)
-{
-	u64 prev;
-	s64 delta;
-
-	prev = local64_xchg(&event->hw.prev_count, now);
-	delta = now - prev;
-	local64_add(delta, &event->count);
-}
-
-static int task_clock_perf_event_enable(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	u64 now;
-
-	now = event->ctx->time;
-
-	local64_set(&hwc->prev_count, now);
-
-	perf_swevent_start_hrtimer(event);
-
-	return 0;
-}
-
-static void task_clock_perf_event_disable(struct perf_event *event)
-{
-	perf_swevent_cancel_hrtimer(event);
-	task_clock_perf_event_update(event, event->ctx->time);
-
-}
-
-static void task_clock_perf_event_read(struct perf_event *event)
-{
-	u64 time;
-
-	if (!in_nmi()) {
-		update_context_time(event->ctx);
-		time = event->ctx->time;
-	} else {
-		u64 now = perf_clock();
-		u64 delta = now - event->ctx->timestamp;
-		time = event->ctx->time + delta;
-	}
-
-	task_clock_perf_event_update(event, time);
-}
-
-static struct pmu perf_ops_task_clock = {
-	.enable		= task_clock_perf_event_enable,
-	.disable	= task_clock_perf_event_disable,
-	.read		= task_clock_perf_event_read,
-};
-
 /* Deref the hlist from the update side */
 static inline struct swevent_hlist *
 swevent_hlist_deref(struct perf_cpu_context *cpuctx)
@@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event)
 	return err;
 }
 
-#ifdef CONFIG_EVENT_TRACING
+atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
-static struct pmu perf_ops_tracepoint = {
-	.enable		= perf_trace_enable,
-	.disable	= perf_trace_disable,
+static void sw_perf_event_destroy(struct perf_event *event)
+{
+	u64 event_id = event->attr.config;
+
+	WARN_ON(event->parent);
+
+	atomic_dec(&perf_swevent_enabled[event_id]);
+	swevent_hlist_put(event);
+}
+
+static int perf_swevent_init(struct perf_event *event)
+{
+	int event_id = event->attr.config;
+
+	if (event->attr.type != PERF_TYPE_SOFTWARE)
+		return -ENOENT;
+
+	switch (event_id) {
+	case PERF_COUNT_SW_CPU_CLOCK:
+	case PERF_COUNT_SW_TASK_CLOCK:
+		return -ENOENT;
+
+	default:
+		break;
+	}
+
+	if (event_id > PERF_COUNT_SW_MAX)
+		return -ENOENT;
+
+	if (!event->parent) {
+		int err;
+
+		err = swevent_hlist_get(event);
+		if (err)
+			return err;
+
+		atomic_inc(&perf_swevent_enabled[event_id]);
+		event->destroy = sw_perf_event_destroy;
+	}
+
+	return 0;
+}
+
+static struct pmu perf_swevent = {
+	.event_init	= perf_swevent_init,
+	.enable		= perf_swevent_enable,
+	.disable	= perf_swevent_disable,
 	.start		= perf_swevent_int,
 	.stop		= perf_swevent_void,
 	.read		= perf_swevent_read,
-	.unthrottle	= perf_swevent_void,
+	.unthrottle	= perf_swevent_void, /* hwc->interrupts already reset */
 };
 
+#ifdef CONFIG_EVENT_TRACING
+
 static int perf_tp_filter_match(struct perf_event *event,
 				struct perf_sample_data *data)
 {
@@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
 	perf_trace_destroy(event);
 }
 
-static struct pmu *tp_perf_event_init(struct perf_event *event)
+static int perf_tp_event_init(struct perf_event *event)
 {
 	int err;
 
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -ENOENT;
+
 	/*
 	 * Raw tracepoint data is a severe data leak, only allow root to
 	 * have these.
@@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event)
 	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
 			perf_paranoid_tracepoint_raw() &&
 			!capable(CAP_SYS_ADMIN))
-		return ERR_PTR(-EPERM);
+		return -EPERM;
 
 	err = perf_trace_init(event);
 	if (err)
-		return NULL;
+		return err;
 
 	event->destroy = tp_perf_event_destroy;
 
-	return &perf_ops_tracepoint;
+	return 0;
+}
+
+static struct pmu perf_tracepoint = {
+	.event_init	= perf_tp_event_init,
+	.enable		= perf_trace_enable,
+	.disable	= perf_trace_disable,
+	.start		= perf_swevent_int,
+	.stop		= perf_swevent_void,
+	.read		= perf_swevent_read,
+	.unthrottle	= perf_swevent_void,
+};
+
+static inline void perf_tp_register(void)
+{
+	perf_pmu_register(&perf_tracepoint);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event)
 
 #else
 
-static struct pmu *tp_perf_event_init(struct perf_event *event)
+static inline void perf_tp_register(void)
 {
-	return NULL;
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event)
 #endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-static void bp_perf_event_destroy(struct perf_event *event)
+void perf_bp_event(struct perf_event *bp, void *data)
 {
-	release_bp_slot(event);
+	struct perf_sample_data sample;
+	struct pt_regs *regs = data;
+
+	perf_sample_data_init(&sample, bp->attr.bp_addr);
+
+	if (!perf_exclude_event(bp, regs))
+		perf_swevent_add(bp, 1, 1, &sample, regs);
 }
+#endif
+
+/*
+ * hrtimer based swevent callback
+ */
 
-static struct pmu *bp_perf_event_init(struct perf_event *bp)
+static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 {
-	int err;
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	struct perf_event *event;
+	u64 period;
 
-	err = register_perf_hw_breakpoint(bp);
-	if (err)
-		return ERR_PTR(err);
+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event->pmu->read(event);
+
+	perf_sample_data_init(&data, 0);
+	data.period = event->hw.last_period;
+	regs = get_irq_regs();
+
+	if (regs && !perf_exclude_event(event, regs)) {
+		if (!(event->attr.exclude_idle && current->pid == 0))
+			if (perf_event_overflow(event, 0, &data, regs))
+				ret = HRTIMER_NORESTART;
+	}
 
-	bp->destroy = bp_perf_event_destroy;
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
-	return &perf_ops_bp;
+	return ret;
 }
 
-void perf_bp_event(struct perf_event *bp, void *data)
+static void perf_swevent_start_hrtimer(struct perf_event *event)
 {
-	struct perf_sample_data sample;
-	struct pt_regs *regs = data;
+	struct hw_perf_event *hwc = &event->hw;
 
-	perf_sample_data_init(&sample, bp->attr.bp_addr);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period;
 
-	if (!perf_exclude_event(bp, regs))
-		perf_swevent_add(bp, 1, 1, &sample, regs);
+		if (hwc->remaining) {
+			if (hwc->remaining < 0)
+				period = 10000;
+			else
+				period = hwc->remaining;
+			hwc->remaining = 0;
+		} else {
+			period = max_t(u64, 10000, hwc->sample_period);
+		}
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
 }
-#else
-static struct pmu *bp_perf_event_init(struct perf_event *bp)
+
+static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
-	return NULL;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->sample_period) {
+		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
+		hwc->remaining = ktime_to_ns(remaining);
+
+		hrtimer_cancel(&hwc->hrtimer);
+	}
 }
 
-void perf_bp_event(struct perf_event *bp, void *regs)
+/*
+ * Software event: cpu wall time clock
+ */
+
+static void cpu_clock_event_update(struct perf_event *event)
 {
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
 }
-#endif
 
-atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+static int cpu_clock_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = raw_smp_processor_id();
 
-static void sw_perf_event_destroy(struct perf_event *event)
+	local64_set(&hwc->prev_count, cpu_clock(cpu));
+	perf_swevent_start_hrtimer(event);
+
+	return 0;
+}
+
+static void cpu_clock_event_disable(struct perf_event *event)
 {
-	u64 event_id = event->attr.config;
+	perf_swevent_cancel_hrtimer(event);
+	cpu_clock_event_update(event);
+}
 
-	WARN_ON(event->parent);
+static void cpu_clock_event_read(struct perf_event *event)
+{
+	cpu_clock_event_update(event);
+}
 
-	atomic_dec(&perf_swevent_enabled[event_id]);
-	swevent_hlist_put(event);
+static int cpu_clock_event_init(struct perf_event *event)
+{
+	if (event->attr.type != PERF_TYPE_SOFTWARE)
+		return -ENOENT;
+
+	if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
+		return -ENOENT;
+
+	return 0;
 }
 
-static struct pmu *sw_perf_event_init(struct perf_event *event)
+static struct pmu perf_cpu_clock = {
+	.event_init	= cpu_clock_event_init,
+	.enable		= cpu_clock_event_enable,
+	.disable	= cpu_clock_event_disable,
+	.read		= cpu_clock_event_read,
+};
+
+/*
+ * Software event: task time clock
+ */
+
+static void task_clock_event_update(struct perf_event *event, u64 now)
 {
-	struct pmu *pmu = NULL;
-	u64 event_id = event->attr.config;
+	u64 prev;
+	s64 delta;
 
-	/*
-	 * Software events (currently) can't in general distinguish
-	 * between user, kernel and hypervisor events.
-	 * However, context switches and cpu migrations are considered
-	 * to be kernel events, and page faults are never hypervisor
-	 * events.
-	 */
-	switch (event_id) {
-	case PERF_COUNT_SW_CPU_CLOCK:
-		pmu = &perf_ops_cpu_clock;
+	prev = local64_xchg(&event->hw.prev_count, now);
+	delta = now - prev;
+	local64_add(delta, &event->count);
+}
 
-		break;
-	case PERF_COUNT_SW_TASK_CLOCK:
-		/*
-		 * If the user instantiates this as a per-cpu event,
-		 * use the cpu_clock event instead.
-		 */
-		if (event->ctx->task)
-			pmu = &perf_ops_task_clock;
-		else
-			pmu = &perf_ops_cpu_clock;
+static int task_clock_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now;
 
-		break;
-	case PERF_COUNT_SW_PAGE_FAULTS:
-	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
-	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_SW_CONTEXT_SWITCHES:
-	case PERF_COUNT_SW_CPU_MIGRATIONS:
-	case PERF_COUNT_SW_ALIGNMENT_FAULTS:
-	case PERF_COUNT_SW_EMULATION_FAULTS:
-		if (!event->parent) {
-			int err;
-
-			err = swevent_hlist_get(event);
-			if (err)
-				return ERR_PTR(err);
+	now = event->ctx->time;
 
-			atomic_inc(&perf_swevent_enabled[event_id]);
-			event->destroy = sw_perf_event_destroy;
+	local64_set(&hwc->prev_count, now);
+
+	perf_swevent_start_hrtimer(event);
+
+	return 0;
+}
+
+static void task_clock_event_disable(struct perf_event *event)
+{
+	perf_swevent_cancel_hrtimer(event);
+	task_clock_event_update(event, event->ctx->time);
+
+}
+
+static void task_clock_event_read(struct perf_event *event)
+{
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(event->ctx);
+		time = event->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - event->ctx->timestamp;
+		time = event->ctx->time + delta;
+	}
+
+	task_clock_event_update(event, time);
+}
+
+static int task_clock_event_init(struct perf_event *event)
+{
+	if (event->attr.type != PERF_TYPE_SOFTWARE)
+		return -ENOENT;
+
+	if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
+		return -ENOENT;
+
+	return 0;
+}
+
+static struct pmu perf_task_clock = {
+	.event_init	= task_clock_event_init,
+	.enable		= task_clock_event_enable,
+	.disable	= task_clock_event_disable,
+	.read		= task_clock_event_read,
+};
+
+static LIST_HEAD(pmus);
+static DEFINE_MUTEX(pmus_lock);
+static struct srcu_struct pmus_srcu;
+
+int perf_pmu_register(struct pmu *pmu)
+{
+	mutex_lock(&pmus_lock);
+	list_add_rcu(&pmu->entry, &pmus);
+	mutex_unlock(&pmus_lock);
+
+	return 0;
+}
+
+void perf_pmu_unregister(struct pmu *pmu)
+{
+	mutex_lock(&pmus_lock);
+	list_del_rcu(&pmu->entry);
+	mutex_unlock(&pmus_lock);
+
+	synchronize_srcu(&pmus_srcu);
+}
+
+struct pmu *perf_init_event(struct perf_event *event)
+{
+	struct pmu *pmu = NULL;
+	int idx;
+
+	idx = srcu_read_lock(&pmus_srcu);
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		int ret = pmu->event_init(event);
+		if (!ret)
+			break;
+		if (ret != -ENOENT) {
+			pmu = ERR_PTR(ret);
+			break;
 		}
-		pmu = &perf_ops_generic;
-		break;
 	}
+	srcu_read_unlock(&pmus_srcu, idx);
 
 	return pmu;
 }
@@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr,
 	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
 		goto done;
 
-	switch (attr->type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		pmu = hw_perf_event_init(event);
-		break;
-
-	case PERF_TYPE_SOFTWARE:
-		pmu = sw_perf_event_init(event);
-		break;
-
-	case PERF_TYPE_TRACEPOINT:
-		pmu = tp_perf_event_init(event);
-		break;
+	pmu = perf_init_event(event);
 
-	case PERF_TYPE_BREAKPOINT:
-		pmu = bp_perf_event_init(event);
-		break;
-
-
-	default:
-		break;
-	}
 done:
 	err = 0;
 	if (!pmu)
@@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-/*
- * This has to have a higher priority than migration_notifier in sched.c.
- */
-static struct notifier_block __cpuinitdata perf_cpu_nb = {
-	.notifier_call		= perf_cpu_notify,
-	.priority		= 20,
-};
-
 void __init perf_event_init(void)
 {
 	perf_event_init_all_cpus();
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
-			(void *)(long)smp_processor_id());
-	register_cpu_notifier(&perf_cpu_nb);
+	init_srcu_struct(&pmus_srcu);
+	perf_pmu_register(&perf_swevent);
+	perf_pmu_register(&perf_cpu_clock);
+	perf_pmu_register(&perf_task_clock);
+	perf_tp_register();
+	perf_cpu_notifier(perf_cpu_notify);
 }
 
 static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
-- 
cgit v1.2.3


From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 17:32:03 +0200
Subject: perf: Reduce perf_disable() usage

Since the current perf_disable() usage is only an optimization,
remove it for now. This eases the removal of the __weak
hw_perf_enable() interface.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/perf_event.c             |  3 +++
 arch/powerpc/kernel/perf_event.c         |  3 +++
 arch/powerpc/kernel/perf_event_fsl_emb.c |  8 +++++--
 arch/sh/kernel/perf_event.c              | 11 +++++++---
 arch/sparc/kernel/perf_event.c           |  3 +++
 arch/x86/kernel/cpu/perf_event.c         | 22 ++++++++++++-------
 include/linux/perf_event.h               | 20 ++++++++---------
 kernel/perf_event.c                      | 37 +-------------------------------
 8 files changed, 48 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index f62f9db35db3..afc92c580d18 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -277,6 +277,8 @@ armpmu_enable(struct perf_event *event)
 	int idx;
 	int err = 0;
 
+	perf_disable();
+
 	/* If we don't have a space for the counter then finish early. */
 	idx = armpmu->get_event_idx(cpuc, hwc);
 	if (idx < 0) {
@@ -303,6 +305,7 @@ armpmu_enable(struct perf_event *event)
 	perf_event_update_userpage(event);
 
 out:
+	perf_enable();
 	return err;
 }
 
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 19131b2614b9..c1408821dbc2 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -861,6 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
+	perf_disable();
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
@@ -875,6 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 }
 
 /*
@@ -901,6 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index ea6a804e43fd..9bc84a7fd901 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -262,7 +262,7 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-/* perf must be disabled, context locked on entry */
+/* context locked on entry */
 static int fsl_emb_pmu_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuhw;
@@ -271,6 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	u64 val;
 	int i;
 
+	perf_disable();
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -310,15 +311,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	ret = 0;
  out:
 	put_cpu_var(cpu_hw_events);
+	perf_enable();
 	return ret;
 }
 
-/* perf must be disabled, context locked on entry */
+/* context locked on entry */
 static void fsl_emb_pmu_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
 
+	perf_disable();
 	if (i < 0)
 		goto out;
 
@@ -346,6 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	cpuhw->n_events--;
 
  out:
+	perf_enable();
 	put_cpu_var(cpu_hw_events);
 }
 
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 8cb206597e0c..d042989ceb45 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -230,11 +230,14 @@ static int sh_pmu_enable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
+	int ret = -EAGAIN;
+
+	perf_disable();
 
 	if (test_and_set_bit(idx, cpuc->used_mask)) {
 		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
 		if (idx == sh_pmu->num_events)
-			return -EAGAIN;
+			goto out;
 
 		set_bit(idx, cpuc->used_mask);
 		hwc->idx = idx;
@@ -248,8 +251,10 @@ static int sh_pmu_enable(struct perf_event *event)
 	sh_pmu->enable(hwc, idx);
 
 	perf_event_update_userpage(event);
-
-	return 0;
+	ret = 0;
+out:
+	perf_enable();
+	return ret;
 }
 
 static void sh_pmu_read(struct perf_event *event)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index bed4327f5a7a..d0131deeeaf6 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1113,6 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
+	perf_disable();
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
@@ -1126,6 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 }
 
 /*
@@ -1149,6 +1151,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 		return -EAGAIN;
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2c89264ee791..846070ce49c3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -969,10 +969,11 @@ static int x86_pmu_enable(struct perf_event *event)
 
 	hwc = &event->hw;
 
+	perf_disable();
 	n0 = cpuc->n_events;
-	n = collect_events(cpuc, event, false);
-	if (n < 0)
-		return n;
+	ret = n = collect_events(cpuc, event, false);
+	if (ret < 0)
+		goto out;
 
 	/*
 	 * If group events scheduling transaction was started,
@@ -980,23 +981,26 @@ static int x86_pmu_enable(struct perf_event *event)
 	 * at commit time(->commit_txn) as a whole
 	 */
 	if (cpuc->group_flag & PERF_EVENT_TXN)
-		goto out;
+		goto done_collect;
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
 	if (ret)
-		return ret;
+		goto out;
 	/*
 	 * copy new assignment, now we know it is possible
 	 * will be used by hw_perf_enable()
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-out:
+done_collect:
 	cpuc->n_events = n;
 	cpuc->n_added += n - n0;
 	cpuc->n_txn += n - n0;
 
-	return 0;
+	ret = 0;
+out:
+	perf_enable();
+	return ret;
 }
 
 static int x86_pmu_start(struct perf_event *event)
@@ -1432,6 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+	perf_disable();
 	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->n_txn = 0;
 }
@@ -1451,6 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
 	 */
 	cpuc->n_added -= cpuc->n_txn;
 	cpuc->n_events -= cpuc->n_txn;
+	perf_enable();
 }
 
 /*
@@ -1480,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
-
+	perf_enable();
 	return 0;
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ab72f56eb372..243286a8ded7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -564,26 +564,26 @@ struct pmu {
 	struct list_head		entry;
 
 	/*
-	 * Should return -ENOENT when the @event doesn't match this pmu
+	 * Should return -ENOENT when the @event doesn't match this PMU.
 	 */
 	int (*event_init)		(struct perf_event *event);
 
-	int (*enable)			(struct perf_event *event);
+	int  (*enable)			(struct perf_event *event);
 	void (*disable)			(struct perf_event *event);
-	int (*start)			(struct perf_event *event);
+	int  (*start)			(struct perf_event *event);
 	void (*stop)			(struct perf_event *event);
 	void (*read)			(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
 
 	/*
-	 * Group events scheduling is treated as a transaction, add group
-	 * events as a whole and perform one schedulability test. If the test
-	 * fails, roll back the whole group
+	 * Group events scheduling is treated as a transaction, add
+	 * group events as a whole and perform one schedulability test.
+	 * If the test fails, roll back the whole group
 	 */
 
 	/*
-	 * Start the transaction, after this ->enable() doesn't need
-	 * to do schedulability tests.
+	 * Start the transaction, after this ->enable() doesn't need to
+	 * do schedulability tests.
 	 */
 	void (*start_txn)	(struct pmu *pmu);
 	/*
@@ -594,8 +594,8 @@ struct pmu {
 	 */
 	int  (*commit_txn)	(struct pmu *pmu);
 	/*
-	 * Will cancel the transaction, assumes ->disable() is called for
-	 * each successfull ->enable() during the transaction.
+	 * Will cancel the transaction, assumes ->disable() is called
+	 * for each successfull ->enable() during the transaction.
 	 */
 	void (*cancel_txn)	(struct pmu *pmu);
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 149ca18371b7..9a98ce953561 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -478,11 +478,6 @@ static void __perf_event_remove_from_context(void *info)
 		return;
 
 	raw_spin_lock(&ctx->lock);
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * events on a global level.
-	 */
-	perf_disable();
 
 	event_sched_out(event, cpuctx, ctx);
 
@@ -498,7 +493,6 @@ static void __perf_event_remove_from_context(void *info)
 			    perf_max_events - perf_reserved_percpu);
 	}
 
-	perf_enable();
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -803,12 +797,6 @@ static void __perf_install_in_context(void *info)
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * events on a global level. NOP for non NMI based events.
-	 */
-	perf_disable();
-
 	add_event_to_ctx(event, ctx);
 
 	if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -850,8 +838,6 @@ static void __perf_install_in_context(void *info)
 		cpuctx->max_pertask--;
 
 unlock:
-	perf_enable();
-
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -972,12 +958,10 @@ static void __perf_event_enable(void *info)
 	if (!group_can_go_on(event, cpuctx, 1)) {
 		err = -EEXIST;
 	} else {
-		perf_disable();
 		if (event == leader)
 			err = group_sched_in(event, cpuctx, ctx);
 		else
 			err = event_sched_in(event, cpuctx, ctx);
-		perf_enable();
 	}
 
 	if (err) {
@@ -1090,9 +1074,8 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 		goto out;
 	update_context_time(ctx);
 
-	perf_disable();
 	if (!ctx->nr_active)
-		goto out_enable;
+		goto out;
 
 	if (event_type & EVENT_PINNED) {
 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
@@ -1103,9 +1086,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
-
- out_enable:
-	perf_enable();
 out:
 	raw_spin_unlock(&ctx->lock);
 }
@@ -1364,8 +1344,6 @@ ctx_sched_in(struct perf_event_context *ctx,
 
 	ctx->timestamp = perf_clock();
 
-	perf_disable();
-
 	/*
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
@@ -1377,7 +1355,6 @@ ctx_sched_in(struct perf_event_context *ctx,
 	if (event_type & EVENT_FLEXIBLE)
 		ctx_flexible_sched_in(ctx, cpuctx);
 
-	perf_enable();
 out:
 	raw_spin_unlock(&ctx->lock);
 }
@@ -1425,8 +1402,6 @@ void perf_event_task_sched_in(struct task_struct *task)
 	if (cpuctx->task_ctx == ctx)
 		return;
 
-	perf_disable();
-
 	/*
 	 * We want to keep the following priority order:
 	 * cpu pinned (that don't need to move), task pinned,
@@ -1439,8 +1414,6 @@ void perf_event_task_sched_in(struct task_struct *task)
 	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
 
 	cpuctx->task_ctx = ctx;
-
-	perf_enable();
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1555,11 +1528,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	hwc->sample_period = sample_period;
 
 	if (local64_read(&hwc->period_left) > 8*sample_period) {
-		perf_disable();
 		perf_event_stop(event);
 		local64_set(&hwc->period_left, 0);
 		perf_event_start(event);
-		perf_enable();
 	}
 }
 
@@ -1588,15 +1559,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 		 */
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(event, 1);
-			perf_disable();
 			event->pmu->unthrottle(event);
-			perf_enable();
 		}
 
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		perf_disable();
 		event->pmu->read(event);
 		now = local64_read(&event->count);
 		delta = now - hwc->freq_count_stamp;
@@ -1604,7 +1572,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 
 		if (delta > 0)
 			perf_adjust_period(event, TICK_NSEC, delta);
-		perf_enable();
 	}
 	raw_spin_unlock(&ctx->lock);
 }
@@ -1647,7 +1614,6 @@ void perf_event_task_tick(struct task_struct *curr)
 	if (!rotate)
 		return;
 
-	perf_disable();
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
 		task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1659,7 +1625,6 @@ void perf_event_task_tick(struct task_struct *curr)
 	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
 		task_ctx_sched_in(curr, EVENT_FLEXIBLE);
-	perf_enable();
 }
 
 static int event_enable_on_exec(struct perf_event *event,
-- 
cgit v1.2.3


From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Jun 2010 08:49:00 +0200
Subject: perf: Per PMU disable

Changes perf_disable() into perf_pmu_disable().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c           | 30 +++++++++++++------------
 arch/arm/kernel/perf_event.c             | 28 +++++++++++------------
 arch/powerpc/kernel/perf_event.c         | 24 +++++++++++---------
 arch/powerpc/kernel/perf_event_fsl_emb.c | 18 ++++++++-------
 arch/sh/kernel/perf_event.c              | 38 +++++++++++++++++---------------
 arch/sparc/kernel/perf_event.c           | 20 +++++++++--------
 arch/x86/kernel/cpu/perf_event.c         | 16 ++++++++------
 include/linux/perf_event.h               | 13 ++++++-----
 kernel/perf_event.c                      | 31 ++++++++++++++++----------
 9 files changed, 119 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 19660b5c298f..3e260731f8e6 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -435,7 +435,7 @@ static int alpha_pmu_enable(struct perf_event *event)
 	 * nevertheless we disable the PMCs first to enable a potential
 	 * final PMI to occur before we disable interrupts.
 	 */
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	local_irq_save(flags);
 
 	/* Default to error to be returned */
@@ -456,7 +456,7 @@ static int alpha_pmu_enable(struct perf_event *event)
 	}
 
 	local_irq_restore(flags);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 
 	return ret;
 }
@@ -474,7 +474,7 @@ static void alpha_pmu_disable(struct perf_event *event)
 	unsigned long flags;
 	int j;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	local_irq_save(flags);
 
 	for (j = 0; j < cpuc->n_events; j++) {
@@ -502,7 +502,7 @@ static void alpha_pmu_disable(struct perf_event *event)
 	}
 
 	local_irq_restore(flags);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 }
 
 
@@ -668,18 +668,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
 	return err;
 }
 
-static struct pmu pmu = {
-	.event_init	= alpha_pmu_event_init,
-	.enable		= alpha_pmu_enable,
-	.disable	= alpha_pmu_disable,
-	.read		= alpha_pmu_read,
-	.unthrottle	= alpha_pmu_unthrottle,
-};
-
 /*
  * Main entry point - enable HW performance counters.
  */
-void hw_perf_enable(void)
+static void alpha_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -705,7 +697,7 @@ void hw_perf_enable(void)
  * Main entry point - disable HW performance counters.
  */
 
-void hw_perf_disable(void)
+static void alpha_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -718,6 +710,16 @@ void hw_perf_disable(void)
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 }
 
+static struct pmu pmu = {
+	.pmu_enable	= alpha_pmu_pmu_enable,
+	.pmu_disable	= alpha_pmu_pmu_disable,
+	.event_init	= alpha_pmu_event_init,
+	.enable		= alpha_pmu_enable,
+	.disable	= alpha_pmu_disable,
+	.read		= alpha_pmu_read,
+	.unthrottle	= alpha_pmu_unthrottle,
+};
+
 
 /*
  * Main entry point - don't know when this is called but it
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index afc92c580d18..3343f3f4b973 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -277,7 +277,7 @@ armpmu_enable(struct perf_event *event)
 	int idx;
 	int err = 0;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
 	idx = armpmu->get_event_idx(cpuc, hwc);
@@ -305,7 +305,7 @@ armpmu_enable(struct perf_event *event)
 	perf_event_update_userpage(event);
 
 out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	return err;
 }
 
@@ -534,16 +534,7 @@ static int armpmu_event_init(struct perf_event *event)
 	return err;
 }
 
-static struct pmu pmu = {
-	.event_init = armpmu_event_init,
-	.enable	    = armpmu_enable,
-	.disable    = armpmu_disable,
-	.unthrottle = armpmu_unthrottle,
-	.read	    = armpmu_read,
-};
-
-void
-hw_perf_enable(void)
+static void armpmu_pmu_enable(struct pmu *pmu)
 {
 	/* Enable all of the perf events on hardware. */
 	int idx;
@@ -564,13 +555,22 @@ hw_perf_enable(void)
 	armpmu->start();
 }
 
-void
-hw_perf_disable(void)
+static void armpmu_pmu_disable(struct pmu *pmu)
 {
 	if (armpmu)
 		armpmu->stop();
 }
 
+static struct pmu pmu = {
+	.pmu_enable = armpmu_pmu_enable,
+	.pmu_disable= armpmu_pmu_disable,
+	.event_init = armpmu_event_init,
+	.enable	    = armpmu_enable,
+	.disable    = armpmu_disable,
+	.unthrottle = armpmu_unthrottle,
+	.read	    = armpmu_read,
+};
+
 /*
  * ARMv6 Performance counter handling code.
  *
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c1408821dbc2..deb84bbcb0e6 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -517,7 +517,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void power_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -565,7 +565,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void power_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct perf_event *event;
 	struct cpu_hw_events *cpuhw;
@@ -735,7 +735,7 @@ static int power_pmu_enable(struct perf_event *event)
 	int ret = -EAGAIN;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	/*
 	 * Add the event to the list (if there is room)
@@ -769,7 +769,7 @@ nocheck:
 
 	ret = 0;
  out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;
 }
@@ -784,7 +784,7 @@ static void power_pmu_disable(struct perf_event *event)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	power_pmu_read(event);
 
@@ -821,7 +821,7 @@ static void power_pmu_disable(struct perf_event *event)
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -837,7 +837,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	power_pmu_read(event);
 	left = event->hw.sample_period;
 	event->hw.last_period = left;
@@ -848,7 +848,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	local64_set(&event->hw.prev_count, val);
 	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -861,7 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	perf_disable();
+	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
@@ -876,7 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 }
 
 /*
@@ -903,7 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 	return 0;
 }
 
@@ -1131,6 +1131,8 @@ static int power_pmu_event_init(struct perf_event *event)
 }
 
 struct pmu power_pmu = {
+	.pmu_enable	= power_pmu_pmu_enable,
+	.pmu_disable	= power_pmu_pmu_disable,
 	.event_init	= power_pmu_event_init,
 	.enable		= power_pmu_enable,
 	.disable	= power_pmu_disable,
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 9bc84a7fd901..84b1974c628f 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -177,7 +177,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -216,7 +216,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void fsl_emb_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -271,7 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	u64 val;
 	int i;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -311,7 +311,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	ret = 0;
  out:
 	put_cpu_var(cpu_hw_events);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -321,7 +321,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	if (i < 0)
 		goto out;
 
@@ -349,7 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	cpuhw->n_events--;
 
  out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	put_cpu_var(cpu_hw_events);
 }
 
@@ -367,7 +367,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 		return;
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	fsl_emb_pmu_read(event);
 	left = event->hw.sample_period;
 	event->hw.last_period = left;
@@ -378,7 +378,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	local64_set(&event->hw.prev_count, val);
 	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -524,6 +524,8 @@ static int fsl_emb_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu fsl_emb_pmu = {
+	.pmu_enable	= fsl_emb_pmu_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_pmu_disable,
 	.event_init	= fsl_emb_pmu_event_init,
 	.enable		= fsl_emb_pmu_enable,
 	.disable	= fsl_emb_pmu_disable,
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index d042989ceb45..4bbe19058a58 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -232,7 +232,7 @@ static int sh_pmu_enable(struct perf_event *event)
 	int idx = hwc->idx;
 	int ret = -EAGAIN;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	if (test_and_set_bit(idx, cpuc->used_mask)) {
 		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
@@ -253,7 +253,7 @@ static int sh_pmu_enable(struct perf_event *event)
 	perf_event_update_userpage(event);
 	ret = 0;
 out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -285,7 +285,25 @@ static int sh_pmu_event_init(struct perf_event *event)
 	return err;
 }
 
+static void sh_pmu_pmu_enable(struct pmu *pmu)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->enable_all();
+}
+
+static void sh_pmu_pmu_disable(struct pmu *pmu)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->disable_all();
+}
+
 static struct pmu pmu = {
+	.pmu_enable	= sh_pmu_pmu_enable,
+	.pmu_disable	= sh_pmu_pmu_disable,
 	.event_init	= sh_pmu_event_init,
 	.enable		= sh_pmu_enable,
 	.disable	= sh_pmu_disable,
@@ -316,22 +334,6 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-void hw_perf_enable(void)
-{
-	if (!sh_pmu_initialized())
-		return;
-
-	sh_pmu->enable_all();
-}
-
-void hw_perf_disable(void)
-{
-	if (!sh_pmu_initialized())
-		return;
-
-	sh_pmu->disable_all();
-}
-
 int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
 {
 	if (sh_pmu)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index d0131deeeaf6..37cae676536c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -664,7 +664,7 @@ out:
 	return pcr;
 }
 
-void hw_perf_enable(void)
+static void sparc_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 pcr;
@@ -691,7 +691,7 @@ void hw_perf_enable(void)
 	pcr_ops->write(cpuc->pcr);
 }
 
-void hw_perf_disable(void)
+static void sparc_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
@@ -718,7 +718,7 @@ static void sparc_pmu_disable(struct perf_event *event)
 	int i;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event[i]) {
@@ -748,7 +748,7 @@ static void sparc_pmu_disable(struct perf_event *event)
 		}
 	}
 
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -991,7 +991,7 @@ static int sparc_pmu_enable(struct perf_event *event)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	n0 = cpuc->n_events;
 	if (n0 >= perf_max_events)
@@ -1020,7 +1020,7 @@ nocheck:
 
 	ret = 0;
 out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;
 }
@@ -1113,7 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	perf_disable();
+	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
@@ -1127,7 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 }
 
 /*
@@ -1151,11 +1151,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 		return -EAGAIN;
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 	return 0;
 }
 
 static struct pmu pmu = {
+	.pmu_enable	= sparc_pmu_pmu_enable,
+	.pmu_disable	= sparc_pmu_pmu_disable,
 	.event_init	= sparc_pmu_event_init,
 	.enable		= sparc_pmu_enable,
 	.disable	= sparc_pmu_disable,
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 846070ce49c3..79705ac45019 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void)
 	}
 }
 
-void hw_perf_disable(void)
+static void x86_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -803,7 +803,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 static int x86_pmu_start(struct perf_event *event);
 static void x86_pmu_stop(struct perf_event *event);
 
-void hw_perf_enable(void)
+static void x86_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *event;
@@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event)
 
 	hwc = &event->hw;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	n0 = cpuc->n_events;
 	ret = n = collect_events(cpuc, event, false);
 	if (ret < 0)
@@ -999,7 +999,7 @@ done_collect:
 
 	ret = 0;
 out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -1436,7 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	perf_disable();
+	perf_pmu_disable(pmu);
 	cpuc->group_flag |= PERF_EVENT_TXN;
 	cpuc->n_txn = 0;
 }
@@ -1456,7 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
 	 */
 	cpuc->n_added -= cpuc->n_txn;
 	cpuc->n_events -= cpuc->n_txn;
-	perf_enable();
+	perf_pmu_enable(pmu);
 }
 
 /*
@@ -1486,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
 	cpuc->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 	return 0;
 }
 
@@ -1605,6 +1605,8 @@ int x86_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu pmu = {
+	.pmu_enable	= x86_pmu_pmu_enable,
+	.pmu_disable	= x86_pmu_pmu_disable,
 	.event_init	= x86_pmu_event_init,
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 243286a8ded7..6abf103fb7f8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -563,6 +563,11 @@ struct perf_event;
 struct pmu {
 	struct list_head		entry;
 
+	int				*pmu_disable_count;
+
+	void (*pmu_enable)		(struct pmu *pmu);
+	void (*pmu_disable)		(struct pmu *pmu);
+
 	/*
 	 * Should return -ENOENT when the @event doesn't match this PMU.
 	 */
@@ -868,10 +873,8 @@ extern void perf_event_free_task(struct task_struct *task);
 extern void set_perf_event_pending(void);
 extern void perf_event_do_pending(void);
 extern void perf_event_print_debug(void);
-extern void __perf_disable(void);
-extern bool __perf_enable(void);
-extern void perf_disable(void);
-extern void perf_enable(void);
+extern void perf_pmu_disable(struct pmu *pmu);
+extern void perf_pmu_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern void perf_event_update_userpage(struct perf_event *event);
@@ -1056,8 +1059,6 @@ static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_do_pending(void)				{ }
 static inline void perf_event_print_debug(void)				{ }
-static inline void perf_disable(void)					{ }
-static inline void perf_enable(void)					{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9a98ce953561..5ed0c06765bb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -71,23 +71,20 @@ static atomic64_t perf_event_id;
  */
 static DEFINE_SPINLOCK(perf_resource_lock);
 
-void __weak hw_perf_disable(void)		{ barrier(); }
-void __weak hw_perf_enable(void)		{ barrier(); }
-
 void __weak perf_event_print_debug(void)	{ }
 
-static DEFINE_PER_CPU(int, perf_disable_count);
-
-void perf_disable(void)
+void perf_pmu_disable(struct pmu *pmu)
 {
-	if (!__get_cpu_var(perf_disable_count)++)
-		hw_perf_disable();
+	int *count = this_cpu_ptr(pmu->pmu_disable_count);
+	if (!(*count)++)
+		pmu->pmu_disable(pmu);
 }
 
-void perf_enable(void)
+void perf_pmu_enable(struct pmu *pmu)
 {
-	if (!--__get_cpu_var(perf_disable_count))
-		hw_perf_enable();
+	int *count = this_cpu_ptr(pmu->pmu_disable_count);
+	if (!--(*count))
+		pmu->pmu_enable(pmu);
 }
 
 static void get_ctx(struct perf_event_context *ctx)
@@ -4970,11 +4967,19 @@ static struct srcu_struct pmus_srcu;
 
 int perf_pmu_register(struct pmu *pmu)
 {
+	int ret;
+
 	mutex_lock(&pmus_lock);
+	ret = -ENOMEM;
+	pmu->pmu_disable_count = alloc_percpu(int);
+	if (!pmu->pmu_disable_count)
+		goto unlock;
 	list_add_rcu(&pmu->entry, &pmus);
+	ret = 0;
+unlock:
 	mutex_unlock(&pmus_lock);
 
-	return 0;
+	return ret;
 }
 
 void perf_pmu_unregister(struct pmu *pmu)
@@ -4984,6 +4989,8 @@ void perf_pmu_unregister(struct pmu *pmu)
 	mutex_unlock(&pmus_lock);
 
 	synchronize_srcu(&pmus_srcu);
+
+	free_percpu(pmu->pmu_disable_count);
 }
 
 struct pmu *perf_init_event(struct perf_event *event)
-- 
cgit v1.2.3


From ad5133b7030d04ce7701aa7cbe98f561347c79c2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Jun 2010 12:22:39 +0200
Subject: perf: Default PMU ops

Provide default implementations for the pmu txn methods, this
allows us to remove some conditional code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 10 ++++----
 kernel/perf_event.c        | 64 +++++++++++++++++++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6abf103fb7f8..bf85733597ec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -565,8 +565,8 @@ struct pmu {
 
 	int				*pmu_disable_count;
 
-	void (*pmu_enable)		(struct pmu *pmu);
-	void (*pmu_disable)		(struct pmu *pmu);
+	void (*pmu_enable)		(struct pmu *pmu); /* optional */
+	void (*pmu_disable)		(struct pmu *pmu); /* optional */
 
 	/*
 	 * Should return -ENOENT when the @event doesn't match this PMU.
@@ -590,19 +590,19 @@ struct pmu {
 	 * Start the transaction, after this ->enable() doesn't need to
 	 * do schedulability tests.
 	 */
-	void (*start_txn)	(struct pmu *pmu);
+	void (*start_txn)	(struct pmu *pmu); /* optional */
 	/*
 	 * If ->start_txn() disabled the ->enable() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
-	int  (*commit_txn)	(struct pmu *pmu);
+	int  (*commit_txn)	(struct pmu *pmu); /* optional */
 	/*
 	 * Will cancel the transaction, assumes ->disable() is called
 	 * for each successfull ->enable() during the transaction.
 	 */
-	void (*cancel_txn)	(struct pmu *pmu);
+	void (*cancel_txn)	(struct pmu *pmu); /* optional */
 };
 
 /**
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5ed0c06765bb..8ef4ba3bcb1f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -674,21 +674,14 @@ group_sched_in(struct perf_event *group_event,
 {
 	struct perf_event *event, *partial_group = NULL;
 	struct pmu *pmu = group_event->pmu;
-	bool txn = false;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	/* Check if group transaction availabe */
-	if (pmu->start_txn)
-		txn = true;
-
-	if (txn)
-		pmu->start_txn(pmu);
+	pmu->start_txn(pmu);
 
 	if (event_sched_in(group_event, cpuctx, ctx)) {
-		if (txn)
-			pmu->cancel_txn(pmu);
+		pmu->cancel_txn(pmu);
 		return -EAGAIN;
 	}
 
@@ -702,7 +695,7 @@ group_sched_in(struct perf_event *group_event,
 		}
 	}
 
-	if (!txn || !pmu->commit_txn(pmu))
+	if (!pmu->commit_txn(pmu))
 		return 0;
 
 group_error:
@@ -717,8 +710,7 @@ group_error:
 	}
 	event_sched_out(group_event, cpuctx, ctx);
 
-	if (txn)
-		pmu->cancel_txn(pmu);
+	pmu->cancel_txn(pmu);
 
 	return -EAGAIN;
 }
@@ -4965,6 +4957,31 @@ static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
 
+static void perf_pmu_nop_void(struct pmu *pmu)
+{
+}
+
+static int perf_pmu_nop_int(struct pmu *pmu)
+{
+	return 0;
+}
+
+static void perf_pmu_start_txn(struct pmu *pmu)
+{
+	perf_pmu_disable(pmu);
+}
+
+static int perf_pmu_commit_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+static void perf_pmu_cancel_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+}
+
 int perf_pmu_register(struct pmu *pmu)
 {
 	int ret;
@@ -4974,6 +4991,29 @@ int perf_pmu_register(struct pmu *pmu)
 	pmu->pmu_disable_count = alloc_percpu(int);
 	if (!pmu->pmu_disable_count)
 		goto unlock;
+
+	if (!pmu->start_txn) {
+		if (pmu->pmu_enable) {
+			/*
+			 * If we have pmu_enable/pmu_disable calls, install
+			 * transaction stubs that use that to try and batch
+			 * hardware accesses.
+			 */
+			pmu->start_txn  = perf_pmu_start_txn;
+			pmu->commit_txn = perf_pmu_commit_txn;
+			pmu->cancel_txn = perf_pmu_cancel_txn;
+		} else {
+			pmu->start_txn  = perf_pmu_nop_void;
+			pmu->commit_txn = perf_pmu_nop_int;
+			pmu->cancel_txn = perf_pmu_nop_void;
+		}
+	}
+
+	if (!pmu->pmu_enable) {
+		pmu->pmu_enable  = perf_pmu_nop_void;
+		pmu->pmu_disable = perf_pmu_nop_void;
+	}
+
 	list_add_rcu(&pmu->entry, &pmus);
 	ret = 0;
 unlock:
-- 
cgit v1.2.3


From fa407f35e0298d841e4088f95a7f9cf6e725c6d5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 24 Jun 2010 12:35:12 +0200
Subject: perf: Shrink hw_perf_event

Use hw_perf_event::period_left instead of hw_perf_event::remaining
and win back 8 bytes.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 -
 kernel/perf_event.c        | 13 ++++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bf85733597ec..8cafa15af60d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -529,7 +529,6 @@ struct hw_perf_event {
 			int		last_cpu;
 		};
 		struct { /* software */
-			s64		remaining;
 			struct hrtimer	hrtimer;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8ef4ba3bcb1f..1a6cdbf0d091 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4800,14 +4800,13 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swevent_hrtimer;
 	if (hwc->sample_period) {
-		u64 period;
+		s64 period = local64_read(&hwc->period_left);
 
-		if (hwc->remaining) {
-			if (hwc->remaining < 0)
+		if (period) {
+			if (period < 0)
 				period = 10000;
-			else
-				period = hwc->remaining;
-			hwc->remaining = 0;
+
+			local64_set(&hwc->period_left, 0);
 		} else {
 			period = max_t(u64, 10000, hwc->sample_period);
 		}
@@ -4823,7 +4822,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 
 	if (hwc->sample_period) {
 		ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
-		hwc->remaining = ktime_to_ns(remaining);
+		local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
 		hrtimer_cancel(&hwc->hrtimer);
 	}
-- 
cgit v1.2.3


From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Jun 2010 14:37:10 +0200
Subject: perf: Rework the PMU methods

Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.

The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.

This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).

It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).

The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:

 1) We disable the counter:
    a) the pmu has per-counter enable bits, we flip that
    b) we program a NOP event, preserving the counter state

 2) We store the counter state and ignore all read/overflow events

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c            |  71 +++++++++++----
 arch/arm/kernel/perf_event.c              |  96 ++++++++++++--------
 arch/powerpc/kernel/perf_event.c          | 105 ++++++++++++++--------
 arch/powerpc/kernel/perf_event_fsl_emb.c  | 107 ++++++++++++++---------
 arch/sh/kernel/perf_event.c               |  75 +++++++++++-----
 arch/sparc/kernel/perf_event.c            | 109 ++++++++++++++---------
 arch/x86/kernel/cpu/perf_event.c          | 106 ++++++++++++----------
 arch/x86/kernel/cpu/perf_event_intel.c    |   2 +-
 arch/x86/kernel/cpu/perf_event_intel_ds.c |   2 +-
 include/linux/ftrace_event.h              |   4 +-
 include/linux/perf_event.h                |  54 +++++++++---
 kernel/hw_breakpoint.c                    |  29 ++++++-
 kernel/perf_event.c                       | 140 +++++++++++++++---------------
 kernel/trace/trace_event_perf.c           |   7 +-
 14 files changed, 576 insertions(+), 331 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 3e260731f8e6..380ef02d557a 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -307,7 +307,7 @@ again:
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
-	delta = (new_raw_count  - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
+	delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
 
 	/* It is possible on very rare occasions that the PMC has overflowed
 	 * but the interrupt is yet to come.  Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
 		struct hw_perf_event *hwc = &pe->hw;
 		int idx = hwc->idx;
 
-		if (cpuc->current_idx[j] != PMC_NO_INDEX) {
-			cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
-			continue;
+		if (cpuc->current_idx[j] == PMC_NO_INDEX) {
+			alpha_perf_event_set_period(pe, hwc, idx);
+			cpuc->current_idx[j] = idx;
 		}
 
-		alpha_perf_event_set_period(pe, hwc, idx);
-		cpuc->current_idx[j] = idx;
-		cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
+		if (!(hwc->state & PERF_HES_STOPPED))
+			cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
 	}
 	cpuc->config = cpuc->event[0]->hw.config_base;
 }
@@ -420,7 +419,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  */
-static int alpha_pmu_enable(struct perf_event *event)
+static int alpha_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int n0;
@@ -455,6 +454,10 @@ static int alpha_pmu_enable(struct perf_event *event)
 		}
 	}
 
+	hwc->state = PERF_HES_UPTODATE;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_STOPPED;
+
 	local_irq_restore(flags);
 	perf_pmu_enable(event->pmu);
 
@@ -467,7 +470,7 @@ static int alpha_pmu_enable(struct perf_event *event)
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  */
-static void alpha_pmu_disable(struct perf_event *event)
+static void alpha_pmu_del(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -514,13 +517,44 @@ static void alpha_pmu_read(struct perf_event *event)
 }
 
 
-static void alpha_pmu_unthrottle(struct perf_event *event)
+static void alpha_pmu_stop(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		cpuc->idx_mask &= !(1UL<<hwc->idx);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		alpha_perf_event_update(event, hwc, hwc->idx, 0);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
+}
+
+
+static void alpha_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+		alpha_perf_event_set_period(event, hwc, hwc->idx);
+	}
+
+	hwc->state = 0;
+
 	cpuc->idx_mask |= 1UL<<hwc->idx;
-	wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
 }
 
 
@@ -671,7 +705,7 @@ static int alpha_pmu_event_init(struct perf_event *event)
 /*
  * Main entry point - enable HW performance counters.
  */
-static void alpha_pmu_pmu_enable(struct pmu *pmu)
+static void alpha_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -697,7 +731,7 @@ static void alpha_pmu_pmu_enable(struct pmu *pmu)
  * Main entry point - disable HW performance counters.
  */
 
-static void alpha_pmu_pmu_disable(struct pmu *pmu)
+static void alpha_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -711,13 +745,14 @@ static void alpha_pmu_pmu_disable(struct pmu *pmu)
 }
 
 static struct pmu pmu = {
-	.pmu_enable	= alpha_pmu_pmu_enable,
-	.pmu_disable	= alpha_pmu_pmu_disable,
+	.pmu_enable	= alpha_pmu_enable,
+	.pmu_disable	= alpha_pmu_disable,
 	.event_init	= alpha_pmu_event_init,
-	.enable		= alpha_pmu_enable,
-	.disable	= alpha_pmu_disable,
+	.add		= alpha_pmu_add,
+	.del		= alpha_pmu_del,
+	.start		= alpha_pmu_start,
+	.stop		= alpha_pmu_stop,
 	.read		= alpha_pmu_read,
-	.unthrottle	= alpha_pmu_unthrottle,
 };
 
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 3343f3f4b973..448cfa6b3ef0 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -221,46 +221,56 @@ again:
 }
 
 static void
-armpmu_disable(struct perf_event *event)
+armpmu_read(struct perf_event *event)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	WARN_ON(idx < 0);
-
-	clear_bit(idx, cpuc->active_mask);
-	armpmu->disable(hwc, idx);
-
-	barrier();
 
-	armpmu_event_update(event, hwc, idx);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
 
-	perf_event_update_userpage(event);
+	armpmu_event_update(event, hwc, hwc->idx);
 }
 
 static void
-armpmu_read(struct perf_event *event)
+armpmu_stop(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	/* Don't read disabled counters! */
-	if (hwc->idx < 0)
+	if (!armpmu)
 		return;
 
-	armpmu_event_update(event, hwc, hwc->idx);
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(hwc, hwc->idx);
+		barrier(); /* why? */
+		armpmu_event_update(event, hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
 }
 
 static void
-armpmu_unthrottle(struct perf_event *event)
+armpmu_start(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!armpmu)
+		return;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
 	/*
 	 * Set the period again. Some counters can't be stopped, so when we
-	 * were throttled we simply disabled the IRQ source and the counter
+	 * were stopped we simply disabled the IRQ source and the counter
 	 * may have been left counting. If we don't do this step then we may
 	 * get an interrupt too soon or *way* too late if the overflow has
 	 * happened since disabling.
@@ -269,8 +279,25 @@ armpmu_unthrottle(struct perf_event *event)
 	armpmu->enable(hwc, hwc->idx);
 }
 
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0);
+
+	clear_bit(idx, cpuc->active_mask);
+	armpmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
 static int
-armpmu_enable(struct perf_event *event)
+armpmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -295,11 +322,9 @@ armpmu_enable(struct perf_event *event)
 	cpuc->events[idx] = event;
 	set_bit(idx, cpuc->active_mask);
 
-	/* Set the period for the event. */
-	armpmu_event_set_period(event, hwc, idx);
-
-	/* Enable the event. */
-	armpmu->enable(hwc, idx);
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
 
 	/* Propagate our changes to the userspace mapping. */
 	perf_event_update_userpage(event);
@@ -534,7 +559,7 @@ static int armpmu_event_init(struct perf_event *event)
 	return err;
 }
 
-static void armpmu_pmu_enable(struct pmu *pmu)
+static void armpmu_enable(struct pmu *pmu)
 {
 	/* Enable all of the perf events on hardware. */
 	int idx;
@@ -555,20 +580,21 @@ static void armpmu_pmu_enable(struct pmu *pmu)
 	armpmu->start();
 }
 
-static void armpmu_pmu_disable(struct pmu *pmu)
+static void armpmu_disable(struct pmu *pmu)
 {
 	if (armpmu)
 		armpmu->stop();
 }
 
 static struct pmu pmu = {
-	.pmu_enable = armpmu_pmu_enable,
-	.pmu_disable= armpmu_pmu_disable,
-	.event_init = armpmu_event_init,
-	.enable	    = armpmu_enable,
-	.disable    = armpmu_disable,
-	.unthrottle = armpmu_unthrottle,
-	.read	    = armpmu_read,
+	.pmu_enable	= armpmu_enable,
+	.pmu_disable	= armpmu_disable,
+	.event_init	= armpmu_event_init,
+	.add		= armpmu_add,
+	.del		= armpmu_del,
+	.start		= armpmu_start,
+	.stop		= armpmu_stop,
+	.read		= armpmu_read,
 };
 
 /*
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index deb84bbcb0e6..9cb4924b6c07 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	if (!event->hw.idx)
 		return;
 	/*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-static void power_pmu_pmu_disable(struct pmu *pmu)
+static void power_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-static void power_pmu_pmu_enable(struct pmu *pmu)
+static void power_pmu_enable(struct pmu *pmu)
 {
 	struct perf_event *event;
 	struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu)
 		}
 		local64_set(&event->hw.prev_count, val);
 		event->hw.idx = idx;
+		if (event->hw.state & PERF_HES_STOPPED)
+			val = 0;
 		write_pmc(idx, val);
 		perf_event_update_userpage(event);
 	}
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_pmu_enable(struct perf_event *event)
+static int power_pmu_add(struct perf_event *event, int ef_flags)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
 	cpuhw->events[n0] = event->hw.config;
 	cpuhw->flags[n0] = event->hw.event_base;
 
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
 	/*
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
@@ -777,7 +785,7 @@ nocheck:
 /*
  * Remove a event from the PMU.
  */
-static void power_pmu_disable(struct perf_event *event)
+static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
 	struct cpu_hw_events *cpuhw;
 	long i;
@@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event)
 }
 
 /*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
  */
-static void power_pmu_unthrottle(struct perf_event *event)
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
 {
-	s64 val, left;
 	unsigned long flags;
+	s64 left;
 
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	local_irq_save(flags);
 	perf_pmu_disable(event->pmu);
+
 	power_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
@@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event)
 }
 
 struct pmu power_pmu = {
-	.pmu_enable	= power_pmu_pmu_enable,
-	.pmu_disable	= power_pmu_pmu_disable,
+	.pmu_enable	= power_pmu_enable,
+	.pmu_disable	= power_pmu_disable,
 	.event_init	= power_pmu_event_init,
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
+	.add		= power_pmu_add,
+	.del		= power_pmu_del,
+	.start		= power_pmu_start,
+	.stop		= power_pmu_stop,
 	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
 	.start_txn	= power_pmu_start_txn,
 	.cancel_txn	= power_pmu_cancel_txn,
 	.commit_txn	= power_pmu_commit_txn,
@@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	int record = 0;
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
@@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 	}
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	 * Finally record data if requested.
 	 */
@@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			power_pmu_stop(event, 0);
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 
 /*
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 84b1974c628f..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	/*
 	 * Performance monitor interrupts come even when interrupts
 	 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
+static void fsl_emb_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-static void fsl_emb_pmu_pmu_enable(struct pmu *pmu)
+static void fsl_emb_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count,
 }
 
 /* context locked on entry */
-static int fsl_emb_pmu_enable(struct perf_event *event)
+static int fsl_emb_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuhw;
 	int ret = -EAGAIN;
@@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 			val = 0x80000000L - left;
 	}
 	local64_set(&event->hw.prev_count, val);
+
+	if (!(flags & PERF_EF_START)) {
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+		val = 0;
+	}
+
 	write_pmc(i, val);
 	perf_event_update_userpage(event);
 
@@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 }
 
 /* context locked on entry */
-static void fsl_emb_pmu_disable(struct perf_event *event)
+static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
@@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	put_cpu_var(cpu_hw_events);
 }
 
-/*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
- *
- * Context is locked on entry, but perf is not disabled.
- */
-static void fsl_emb_pmu_unthrottle(struct perf_event *event)
+static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+	s64 left;
+
+	if (event->hw.idx < 0 || !event->hw.sample_period)
+		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
 {
-	s64 val, left;
 	unsigned long flags;
 
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	local_irq_save(flags);
 	perf_pmu_disable(event->pmu);
+
 	fsl_emb_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
@@ -524,13 +552,14 @@ static int fsl_emb_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu fsl_emb_pmu = {
-	.pmu_enable	= fsl_emb_pmu_pmu_enable,
-	.pmu_disable	= fsl_emb_pmu_pmu_disable,
+	.pmu_enable	= fsl_emb_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_disable,
 	.event_init	= fsl_emb_pmu_event_init,
-	.enable		= fsl_emb_pmu_enable,
-	.disable	= fsl_emb_pmu_disable,
+	.add		= fsl_emb_pmu_add,
+	.del		= fsl_emb_pmu_del,
+	.start		= fsl_emb_pmu_start,
+	.stop		= fsl_emb_pmu_stop,
 	.read		= fsl_emb_pmu_read,
-	.unthrottle	= fsl_emb_pmu_unthrottle,
 };
 
 /*
@@ -545,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	int record = 0;
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
@@ -567,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 	}
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	 * Finally record data if requested.
 	 */
@@ -576,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		perf_sample_data_init(&data, 0);
 		data.period = event->hw.last_period;
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			fsl_emb_pmu_stop(event, 0);
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 
 static void perf_event_interrupt(struct pt_regs *regs)
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 4bbe19058a58..cf39c4873468 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -206,26 +206,52 @@ again:
 	local64_add(delta, &event->count);
 }
 
-static void sh_pmu_disable(struct perf_event *event)
+static void sh_pmu_stop(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	clear_bit(idx, cpuc->active_mask);
-	sh_pmu->disable(hwc, idx);
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		sh_pmu->disable(hwc, idx);
+		cpuc->events[idx] = NULL;
+		event->hw.state |= PERF_HES_STOPPED;
+	}
 
-	barrier();
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		sh_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
 
-	sh_perf_event_update(event, &event->hw, idx);
+static void sh_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	cpuc->events[idx] = event;
+	event->hw.state = 0;
+	sh_pmu->enable(hwc, idx);
+}
+
+static void sh_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	sh_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, cpuc->used_mask);
 
 	perf_event_update_userpage(event);
 }
 
-static int sh_pmu_enable(struct perf_event *event)
+static int sh_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -234,21 +260,20 @@ static int sh_pmu_enable(struct perf_event *event)
 
 	perf_pmu_disable(event->pmu);
 
-	if (test_and_set_bit(idx, cpuc->used_mask)) {
+	if (__test_and_set_bit(idx, cpuc->used_mask)) {
 		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
 		if (idx == sh_pmu->num_events)
 			goto out;
 
-		set_bit(idx, cpuc->used_mask);
+		__set_bit(idx, cpuc->used_mask);
 		hwc->idx = idx;
 	}
 
 	sh_pmu->disable(hwc, idx);
 
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
-
-	sh_pmu->enable(hwc, idx);
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		sh_pmu_start(event, PERF_EF_RELOAD);
 
 	perf_event_update_userpage(event);
 	ret = 0;
@@ -285,7 +310,7 @@ static int sh_pmu_event_init(struct perf_event *event)
 	return err;
 }
 
-static void sh_pmu_pmu_enable(struct pmu *pmu)
+static void sh_pmu_enable(struct pmu *pmu)
 {
 	if (!sh_pmu_initialized())
 		return;
@@ -293,7 +318,7 @@ static void sh_pmu_pmu_enable(struct pmu *pmu)
 	sh_pmu->enable_all();
 }
 
-static void sh_pmu_pmu_disable(struct pmu *pmu)
+static void sh_pmu_disable(struct pmu *pmu)
 {
 	if (!sh_pmu_initialized())
 		return;
@@ -302,11 +327,13 @@ static void sh_pmu_pmu_disable(struct pmu *pmu)
 }
 
 static struct pmu pmu = {
-	.pmu_enable	= sh_pmu_pmu_enable,
-	.pmu_disable	= sh_pmu_pmu_disable,
+	.pmu_enable	= sh_pmu_enable,
+	.pmu_disable	= sh_pmu_disable,
 	.event_init	= sh_pmu_event_init,
-	.enable		= sh_pmu_enable,
-	.disable	= sh_pmu_disable,
+	.add		= sh_pmu_add,
+	.del		= sh_pmu_del,
+	.start		= sh_pmu_start,
+	.stop		= sh_pmu_stop,
 	.read		= sh_pmu_read,
 };
 
@@ -334,15 +361,15 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
+int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 {
 	if (sh_pmu)
 		return -EBUSY;
-	sh_pmu = pmu;
+	sh_pmu = _pmu;
 
-	pr_info("Performance Events: %s support registered\n", pmu->name);
+	pr_info("Performance Events: %s support registered\n", _pmu->name);
 
-	WARN_ON(pmu->num_events > MAX_HWEVENTS);
+	WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
 	perf_pmu_register(&pmu);
 	perf_cpu_notifier(sh_pmu_notifier);
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 37cae676536c..516be2314b54 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
 
 		enc = perf_event_get_enc(cpuc->events[i]);
 		pcr &= ~mask_for_index(idx);
-		pcr |= event_encoding(enc, idx);
+		if (hwc->state & PERF_HES_STOPPED)
+			pcr |= nop_for_index(idx);
+		else
+			pcr |= event_encoding(enc, idx);
 	}
 out:
 	return pcr;
 }
 
-static void sparc_pmu_pmu_enable(struct pmu *pmu)
+static void sparc_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 pcr;
@@ -691,7 +694,7 @@ static void sparc_pmu_pmu_enable(struct pmu *pmu)
 	pcr_ops->write(cpuc->pcr);
 }
 
-static void sparc_pmu_pmu_disable(struct pmu *pmu)
+static void sparc_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
@@ -710,10 +713,53 @@ static void sparc_pmu_pmu_disable(struct pmu *pmu)
 	pcr_ops->write(cpuc->pcr);
 }
 
-static void sparc_pmu_disable(struct perf_event *event)
+static int active_event_index(struct cpu_hw_events *cpuc,
+			      struct perf_event *event)
+{
+	int i;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (cpuc->event[i] == event)
+			break;
+	}
+	BUG_ON(i == cpuc->n_events);
+	return cpuc->current_idx[i];
+}
+
+static void sparc_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		sparc_perf_event_set_period(event, &event->hw, idx);
+	}
+
+	event->hw.state = 0;
+
+	sparc_pmu_enable_event(cpuc, &event->hw, idx);
+}
+
+static void sparc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		sparc_pmu_disable_event(cpuc, &event->hw, idx);
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
+		sparc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void sparc_pmu_del(struct perf_event *event, int _flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 	int i;
 
@@ -722,7 +768,10 @@ static void sparc_pmu_disable(struct perf_event *event)
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event[i]) {
-			int idx = cpuc->current_idx[i];
+			/* Absorb the final count and turn off the
+			 * event.
+			 */
+			sparc_pmu_stop(event, PERF_EF_UPDATE);
 
 			/* Shift remaining entries down into
 			 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
 					cpuc->current_idx[i];
 			}
 
-			/* Absorb the final count and turn off the
-			 * event.
-			 */
-			sparc_pmu_disable_event(cpuc, hwc, idx);
-			barrier();
-			sparc_perf_event_update(event, hwc, idx);
-
 			perf_event_update_userpage(event);
 
 			cpuc->n_events--;
@@ -752,19 +794,6 @@ static void sparc_pmu_disable(struct perf_event *event)
 	local_irq_restore(flags);
 }
 
-static int active_event_index(struct cpu_hw_events *cpuc,
-			      struct perf_event *event)
-{
-	int i;
-
-	for (i = 0; i < cpuc->n_events; i++) {
-		if (cpuc->event[i] == event)
-			break;
-	}
-	BUG_ON(i == cpuc->n_events);
-	return cpuc->current_idx[i];
-}
-
 static void sparc_pmu_read(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
 	sparc_perf_event_update(event, hwc, idx);
 }
 
-static void sparc_pmu_unthrottle(struct perf_event *event)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	int idx = active_event_index(cpuc, event);
-	struct hw_perf_event *hwc = &event->hw;
-
-	sparc_pmu_enable_event(cpuc, hwc, idx);
-}
-
 static atomic_t active_events = ATOMIC_INIT(0);
 static DEFINE_MUTEX(pmc_grab_mutex);
 
@@ -984,7 +1004,7 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static int sparc_pmu_enable(struct perf_event *event)
+static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int n0, ret = -EAGAIN;
@@ -1001,6 +1021,10 @@ static int sparc_pmu_enable(struct perf_event *event)
 	cpuc->events[n0] = event->hw.event_base;
 	cpuc->current_idx[n0] = PIC_NO_INDEX;
 
+	event->hw.state = PERF_HES_UPTODATE;
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state |= PERF_HES_STOPPED;
+
 	/*
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
@@ -1156,13 +1180,14 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 }
 
 static struct pmu pmu = {
-	.pmu_enable	= sparc_pmu_pmu_enable,
-	.pmu_disable	= sparc_pmu_pmu_disable,
+	.pmu_enable	= sparc_pmu_enable,
+	.pmu_disable	= sparc_pmu_disable,
 	.event_init	= sparc_pmu_event_init,
-	.enable		= sparc_pmu_enable,
-	.disable	= sparc_pmu_disable,
+	.add		= sparc_pmu_add,
+	.del		= sparc_pmu_del,
+	.start		= sparc_pmu_start,
+	.stop		= sparc_pmu_stop,
 	.read		= sparc_pmu_read,
-	.unthrottle	= sparc_pmu_unthrottle,
 	.start_txn	= sparc_pmu_start_txn,
 	.cancel_txn	= sparc_pmu_cancel_txn,
 	.commit_txn	= sparc_pmu_commit_txn,
@@ -1243,7 +1268,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 			continue;
 
 		if (perf_event_overflow(event, 1, &data, regs))
-			sparc_pmu_disable_event(cpuc, hwc, idx);
+			sparc_pmu_stop(event, 0);
 	}
 
 	return NOTIFY_STOP;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 79705ac45019..dd6fec710677 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void)
 	}
 }
 
-static void x86_pmu_pmu_disable(struct pmu *pmu)
+static void x86_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -800,10 +800,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 		hwc->last_tag == cpuc->tags[i];
 }
 
-static int x86_pmu_start(struct perf_event *event);
-static void x86_pmu_stop(struct perf_event *event);
+static void x86_pmu_start(struct perf_event *event, int flags);
+static void x86_pmu_stop(struct perf_event *event, int flags);
 
-static void x86_pmu_pmu_enable(struct pmu *pmu)
+static void x86_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *event;
@@ -839,7 +839,14 @@ static void x86_pmu_pmu_enable(struct pmu *pmu)
 			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 
-			x86_pmu_stop(event);
+			/*
+			 * Ensure we don't accidentally enable a stopped
+			 * counter simply because we rescheduled.
+			 */
+			if (hwc->state & PERF_HES_STOPPED)
+				hwc->state |= PERF_HES_ARCH;
+
+			x86_pmu_stop(event, PERF_EF_UPDATE);
 		}
 
 		for (i = 0; i < cpuc->n_events; i++) {
@@ -851,7 +858,10 @@ static void x86_pmu_pmu_enable(struct pmu *pmu)
 			else if (i < n_running)
 				continue;
 
-			x86_pmu_start(event);
+			if (hwc->state & PERF_HES_ARCH)
+				continue;
+
+			x86_pmu_start(event, PERF_EF_RELOAD);
 		}
 		cpuc->n_added = 0;
 		perf_events_lapic_init();
@@ -952,15 +962,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
 }
 
 /*
- * activate a single event
+ * Add a single event to the PMU.
  *
  * The event is added to the group of enabled events
  * but only if it can be scehduled with existing events.
- *
- * Called with PMU disabled. If successful and return value 1,
- * then guaranteed to call perf_enable() and hw_perf_enable()
  */
-static int x86_pmu_enable(struct perf_event *event)
+static int x86_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc;
@@ -975,10 +982,14 @@ static int x86_pmu_enable(struct perf_event *event)
 	if (ret < 0)
 		goto out;
 
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_ARCH;
+
 	/*
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
-	 * at commit time(->commit_txn) as a whole
+	 * at commit time (->commit_txn) as a whole
 	 */
 	if (cpuc->group_flag & PERF_EVENT_TXN)
 		goto done_collect;
@@ -1003,27 +1014,28 @@ out:
 	return ret;
 }
 
-static int x86_pmu_start(struct perf_event *event)
+static void x86_pmu_start(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx = event->hw.idx;
 
-	if (idx == -1)
-		return -EAGAIN;
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		x86_perf_event_set_period(event);
+	}
+
+	event->hw.state = 0;
 
-	x86_perf_event_set_period(event);
 	cpuc->events[idx] = event;
 	__set_bit(idx, cpuc->active_mask);
 	x86_pmu.enable(event);
 	perf_event_update_userpage(event);
-
-	return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_event *event)
-{
-	int ret = x86_pmu_start(event);
-	WARN_ON_ONCE(ret);
 }
 
 void perf_event_print_debug(void)
@@ -1080,27 +1092,29 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void x86_pmu_stop(struct perf_event *event)
+static void x86_pmu_stop(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	if (!__test_and_clear_bit(idx, cpuc->active_mask))
-		return;
-
-	x86_pmu.disable(event);
 
-	/*
-	 * Drain the remaining delta count out of a event
-	 * that we are disabling:
-	 */
-	x86_perf_event_update(event);
+	if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+		x86_pmu.disable(event);
+		cpuc->events[hwc->idx] = NULL;
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+	}
 
-	cpuc->events[idx] = NULL;
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of a event
+		 * that we are disabling:
+		 */
+		x86_perf_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
 }
 
-static void x86_pmu_disable(struct perf_event *event)
+static void x86_pmu_del(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int i;
@@ -1113,7 +1127,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	if (cpuc->group_flag & PERF_EVENT_TXN)
 		return;
 
-	x86_pmu_stop(event);
+	x86_pmu_stop(event, PERF_EF_UPDATE);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {
@@ -1165,7 +1179,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 			continue;
 
 		if (perf_event_overflow(event, 1, &data, regs))
-			x86_pmu_stop(event);
+			x86_pmu_stop(event, 0);
 	}
 
 	if (handled)
@@ -1605,15 +1619,17 @@ int x86_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu pmu = {
-	.pmu_enable	= x86_pmu_pmu_enable,
-	.pmu_disable	= x86_pmu_pmu_disable,
+	.pmu_enable	= x86_pmu_enable,
+	.pmu_disable	= x86_pmu_disable,
+
 	.event_init	= x86_pmu_event_init,
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
+
+	.add		= x86_pmu_add,
+	.del		= x86_pmu_del,
 	.start		= x86_pmu_start,
 	.stop		= x86_pmu_stop,
 	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
+
 	.start_txn	= x86_pmu_start_txn,
 	.cancel_txn	= x86_pmu_cancel_txn,
 	.commit_txn	= x86_pmu_commit_txn,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..82395f2378ec 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -763,7 +763,7 @@ again:
 		data.period = event->hw.last_period;
 
 		if (perf_event_overflow(event, 1, &data, regs))
-			x86_pmu_stop(event);
+			x86_pmu_stop(event, 0);
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..9893a2f77b7a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -491,7 +491,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
 	if (perf_event_overflow(event, 1, &data, &regs))
-		x86_pmu_stop(event);
+		x86_pmu_stop(event, 0);
 }
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5f8ad7bec636..8beabb958f61 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
-extern int  perf_trace_enable(struct perf_event *event);
-extern void perf_trace_disable(struct perf_event *event);
+extern int  perf_trace_add(struct perf_event *event, int flags);
+extern void perf_trace_del(struct perf_event *event, int flags);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8cafa15af60d..402073c61669 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -538,6 +538,7 @@ struct hw_perf_event {
 		};
 #endif
 	};
+	int				state;
 	local64_t			prev_count;
 	u64				sample_period;
 	u64				last_period;
@@ -549,6 +550,13 @@ struct hw_perf_event {
 #endif
 };
 
+/*
+ * hw_perf_event::state flags
+ */
+#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
+#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
+#define PERF_HES_ARCH		0x04
+
 struct perf_event;
 
 /*
@@ -564,42 +572,62 @@ struct pmu {
 
 	int				*pmu_disable_count;
 
+	/*
+	 * Fully disable/enable this PMU, can be used to protect from the PMI
+	 * as well as for lazy/batch writing of the MSRs.
+	 */
 	void (*pmu_enable)		(struct pmu *pmu); /* optional */
 	void (*pmu_disable)		(struct pmu *pmu); /* optional */
 
 	/*
+	 * Try and initialize the event for this PMU.
 	 * Should return -ENOENT when the @event doesn't match this PMU.
 	 */
 	int (*event_init)		(struct perf_event *event);
 
-	int  (*enable)			(struct perf_event *event);
-	void (*disable)			(struct perf_event *event);
-	int  (*start)			(struct perf_event *event);
-	void (*stop)			(struct perf_event *event);
+#define PERF_EF_START	0x01		/* start the counter when adding    */
+#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
+#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
+
+	/*
+	 * Adds/Removes a counter to/from the PMU, can be done inside
+	 * a transaction, see the ->*_txn() methods.
+	 */
+	int  (*add)			(struct perf_event *event, int flags);
+	void (*del)			(struct perf_event *event, int flags);
+
+	/*
+	 * Starts/Stops a counter present on the PMU. The PMI handler
+	 * should stop the counter when perf_event_overflow() returns
+	 * !0. ->start() will be used to continue.
+	 */
+	void (*start)			(struct perf_event *event, int flags);
+	void (*stop)			(struct perf_event *event, int flags);
+
+	/*
+	 * Updates the counter value of the event.
+	 */
 	void (*read)			(struct perf_event *event);
-	void (*unthrottle)		(struct perf_event *event);
 
 	/*
 	 * Group events scheduling is treated as a transaction, add
 	 * group events as a whole and perform one schedulability test.
 	 * If the test fails, roll back the whole group
-	 */
-
-	/*
-	 * Start the transaction, after this ->enable() doesn't need to
+	 *
+	 * Start the transaction, after this ->add() doesn't need to
 	 * do schedulability tests.
 	 */
 	void (*start_txn)	(struct pmu *pmu); /* optional */
 	/*
-	 * If ->start_txn() disabled the ->enable() schedulability test
+	 * If ->start_txn() disabled the ->add() schedulability test
 	 * then ->commit_txn() is required to perform one. On success
 	 * the transaction is closed. On error the transaction is kept
 	 * open until ->cancel_txn() is called.
 	 */
 	int  (*commit_txn)	(struct pmu *pmu); /* optional */
 	/*
-	 * Will cancel the transaction, assumes ->disable() is called
-	 * for each successfull ->enable() during the transaction.
+	 * Will cancel the transaction, assumes ->del() is called
+	 * for each successfull ->add() during the transaction.
 	 */
 	void (*cancel_txn)	(struct pmu *pmu); /* optional */
 };
@@ -680,7 +708,7 @@ struct perf_event {
 	int				nr_siblings;
 	int				group_flags;
 	struct perf_event		*group_leader;
-	struct pmu		*pmu;
+	struct pmu			*pmu;
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e9c5cfa1fd20..6f150095cafe 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -586,10 +586,35 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
 	return 0;
 }
 
+static int hw_breakpoint_add(struct perf_event *bp, int flags)
+{
+	if (!(flags & PERF_EF_START))
+		bp->hw.state = PERF_HES_STOPPED;
+
+	return arch_install_hw_breakpoint(bp);
+}
+
+static void hw_breakpoint_del(struct perf_event *bp, int flags)
+{
+	arch_uninstall_hw_breakpoint(bp);
+}
+
+static void hw_breakpoint_start(struct perf_event *bp, int flags)
+{
+	bp->hw.state = 0;
+}
+
+static void hw_breakpoint_stop(struct perf_event *bp, int flags)
+{
+	bp->hw.state = PERF_HES_STOPPED;
+}
+
 static struct pmu perf_breakpoint = {
 	.event_init	= hw_breakpoint_event_init,
-	.enable		= arch_install_hw_breakpoint,
-	.disable	= arch_uninstall_hw_breakpoint,
+	.add		= hw_breakpoint_add,
+	.del		= hw_breakpoint_del,
+	.start		= hw_breakpoint_start,
+	.stop		= hw_breakpoint_stop,
 	.read		= hw_breakpoint_pmu_read,
 };
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 1a6cdbf0d091..3bace4fd0355 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -424,7 +424,7 @@ event_sched_out(struct perf_event *event,
 		event->state = PERF_EVENT_STATE_OFF;
 	}
 	event->tstamp_stopped = ctx->time;
-	event->pmu->disable(event);
+	event->pmu->del(event, 0);
 	event->oncpu = -1;
 
 	if (!is_software_event(event))
@@ -649,7 +649,7 @@ event_sched_in(struct perf_event *event,
 	 */
 	smp_wmb();
 
-	if (event->pmu->enable(event)) {
+	if (event->pmu->add(event, PERF_EF_START)) {
 		event->state = PERF_EVENT_STATE_INACTIVE;
 		event->oncpu = -1;
 		return -EAGAIN;
@@ -1482,22 +1482,6 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
-static void perf_event_stop(struct perf_event *event)
-{
-	if (!event->pmu->stop)
-		return event->pmu->disable(event);
-
-	return event->pmu->stop(event);
-}
-
-static int perf_event_start(struct perf_event *event)
-{
-	if (!event->pmu->start)
-		return event->pmu->enable(event);
-
-	return event->pmu->start(event);
-}
-
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -1517,9 +1501,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	hwc->sample_period = sample_period;
 
 	if (local64_read(&hwc->period_left) > 8*sample_period) {
-		perf_event_stop(event);
+		event->pmu->stop(event, PERF_EF_UPDATE);
 		local64_set(&hwc->period_left, 0);
-		perf_event_start(event);
+		event->pmu->start(event, PERF_EF_RELOAD);
 	}
 }
 
@@ -1548,7 +1532,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 		 */
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(event, 1);
-			event->pmu->unthrottle(event);
+			event->pmu->start(event, 0);
 		}
 
 		if (!event->attr.freq || !event->attr.sample_freq)
@@ -2506,6 +2490,9 @@ int perf_event_task_disable(void)
 
 static int perf_event_index(struct perf_event *event)
 {
+	if (event->hw.state & PERF_HES_STOPPED)
+		return 0;
+
 	if (event->state != PERF_EVENT_STATE_ACTIVE)
 		return 0;
 
@@ -4120,8 +4107,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 	struct hw_perf_event *hwc = &event->hw;
 	int ret = 0;
 
-	throttle = (throttle && event->pmu->unthrottle != NULL);
-
 	if (!throttle) {
 		hwc->interrupts++;
 	} else {
@@ -4246,7 +4231,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 	}
 }
 
-static void perf_swevent_add(struct perf_event *event, u64 nr,
+static void perf_swevent_event(struct perf_event *event, u64 nr,
 			       int nmi, struct perf_sample_data *data,
 			       struct pt_regs *regs)
 {
@@ -4272,6 +4257,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 static int perf_exclude_event(struct perf_event *event,
 			      struct pt_regs *regs)
 {
+	if (event->hw.state & PERF_HES_STOPPED)
+		return 0;
+
 	if (regs) {
 		if (event->attr.exclude_user && user_mode(regs))
 			return 1;
@@ -4371,7 +4359,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 
 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
 		if (perf_swevent_match(event, type, event_id, data, regs))
-			perf_swevent_add(event, nr, nmi, data, regs);
+			perf_swevent_event(event, nr, nmi, data, regs);
 	}
 end:
 	rcu_read_unlock();
@@ -4415,7 +4403,7 @@ static void perf_swevent_read(struct perf_event *event)
 {
 }
 
-static int perf_swevent_enable(struct perf_event *event)
+static int perf_swevent_add(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_cpu_context *cpuctx;
@@ -4428,6 +4416,8 @@ static int perf_swevent_enable(struct perf_event *event)
 		perf_swevent_set_period(event);
 	}
 
+	hwc->state = !(flags & PERF_EF_START);
+
 	head = find_swevent_head(cpuctx, event);
 	if (WARN_ON_ONCE(!head))
 		return -EINVAL;
@@ -4437,18 +4427,19 @@ static int perf_swevent_enable(struct perf_event *event)
 	return 0;
 }
 
-static void perf_swevent_disable(struct perf_event *event)
+static void perf_swevent_del(struct perf_event *event, int flags)
 {
 	hlist_del_rcu(&event->hlist_entry);
 }
 
-static void perf_swevent_void(struct perf_event *event)
+static void perf_swevent_start(struct perf_event *event, int flags)
 {
+	event->hw.state = 0;
 }
 
-static int perf_swevent_int(struct perf_event *event)
+static void perf_swevent_stop(struct perf_event *event, int flags)
 {
-	return 0;
+	event->hw.state = PERF_HES_STOPPED;
 }
 
 /* Deref the hlist from the update side */
@@ -4604,12 +4595,11 @@ static int perf_swevent_init(struct perf_event *event)
 
 static struct pmu perf_swevent = {
 	.event_init	= perf_swevent_init,
-	.enable		= perf_swevent_enable,
-	.disable	= perf_swevent_disable,
-	.start		= perf_swevent_int,
-	.stop		= perf_swevent_void,
+	.add		= perf_swevent_add,
+	.del		= perf_swevent_del,
+	.start		= perf_swevent_start,
+	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.unthrottle	= perf_swevent_void, /* hwc->interrupts already reset */
 };
 
 #ifdef CONFIG_EVENT_TRACING
@@ -4657,7 +4647,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 
 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
 		if (perf_tp_event_match(event, &data, regs))
-			perf_swevent_add(event, count, 1, &data, regs);
+			perf_swevent_event(event, count, 1, &data, regs);
 	}
 
 	perf_swevent_put_recursion_context(rctx);
@@ -4696,12 +4686,11 @@ static int perf_tp_event_init(struct perf_event *event)
 
 static struct pmu perf_tracepoint = {
 	.event_init	= perf_tp_event_init,
-	.enable		= perf_trace_enable,
-	.disable	= perf_trace_disable,
-	.start		= perf_swevent_int,
-	.stop		= perf_swevent_void,
+	.add		= perf_trace_add,
+	.del		= perf_trace_del,
+	.start		= perf_swevent_start,
+	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.unthrottle	= perf_swevent_void,
 };
 
 static inline void perf_tp_register(void)
@@ -4757,8 +4746,8 @@ void perf_bp_event(struct perf_event *bp, void *data)
 
 	perf_sample_data_init(&sample, bp->attr.bp_addr);
 
-	if (!perf_exclude_event(bp, regs))
-		perf_swevent_add(bp, 1, 1, &sample, regs);
+	if (!bp->hw.state && !perf_exclude_event(bp, regs))
+		perf_swevent_event(bp, 1, 1, &sample, regs);
 }
 #endif
 
@@ -4834,32 +4823,39 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 
 static void cpu_clock_event_update(struct perf_event *event)
 {
-	int cpu = raw_smp_processor_id();
 	s64 prev;
 	u64 now;
 
-	now = cpu_clock(cpu);
+	now = local_clock();
 	prev = local64_xchg(&event->hw.prev_count, now);
 	local64_add(now - prev, &event->count);
 }
 
-static int cpu_clock_event_enable(struct perf_event *event)
+static void cpu_clock_event_start(struct perf_event *event, int flags)
 {
-	struct hw_perf_event *hwc = &event->hw;
-	int cpu = raw_smp_processor_id();
-
-	local64_set(&hwc->prev_count, cpu_clock(cpu));
+	local64_set(&event->hw.prev_count, local_clock());
 	perf_swevent_start_hrtimer(event);
-
-	return 0;
 }
 
-static void cpu_clock_event_disable(struct perf_event *event)
+static void cpu_clock_event_stop(struct perf_event *event, int flags)
 {
 	perf_swevent_cancel_hrtimer(event);
 	cpu_clock_event_update(event);
 }
 
+static int cpu_clock_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		cpu_clock_event_start(event, flags);
+
+	return 0;
+}
+
+static void cpu_clock_event_del(struct perf_event *event, int flags)
+{
+	cpu_clock_event_stop(event, flags);
+}
+
 static void cpu_clock_event_read(struct perf_event *event)
 {
 	cpu_clock_event_update(event);
@@ -4878,8 +4874,10 @@ static int cpu_clock_event_init(struct perf_event *event)
 
 static struct pmu perf_cpu_clock = {
 	.event_init	= cpu_clock_event_init,
-	.enable		= cpu_clock_event_enable,
-	.disable	= cpu_clock_event_disable,
+	.add		= cpu_clock_event_add,
+	.del		= cpu_clock_event_del,
+	.start		= cpu_clock_event_start,
+	.stop		= cpu_clock_event_stop,
 	.read		= cpu_clock_event_read,
 };
 
@@ -4897,25 +4895,29 @@ static void task_clock_event_update(struct perf_event *event, u64 now)
 	local64_add(delta, &event->count);
 }
 
-static int task_clock_event_enable(struct perf_event *event)
+static void task_clock_event_start(struct perf_event *event, int flags)
 {
-	struct hw_perf_event *hwc = &event->hw;
-	u64 now;
-
-	now = event->ctx->time;
-
-	local64_set(&hwc->prev_count, now);
-
+	local64_set(&event->hw.prev_count, event->ctx->time);
 	perf_swevent_start_hrtimer(event);
-
-	return 0;
 }
 
-static void task_clock_event_disable(struct perf_event *event)
+static void task_clock_event_stop(struct perf_event *event, int flags)
 {
 	perf_swevent_cancel_hrtimer(event);
 	task_clock_event_update(event, event->ctx->time);
+}
+
+static int task_clock_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		task_clock_event_start(event, flags);
 
+	return 0;
+}
+
+static void task_clock_event_del(struct perf_event *event, int flags)
+{
+	task_clock_event_stop(event, PERF_EF_UPDATE);
 }
 
 static void task_clock_event_read(struct perf_event *event)
@@ -4947,8 +4949,10 @@ static int task_clock_event_init(struct perf_event *event)
 
 static struct pmu perf_task_clock = {
 	.event_init	= task_clock_event_init,
-	.enable		= task_clock_event_enable,
-	.disable	= task_clock_event_disable,
+	.add		= task_clock_event_add,
+	.del		= task_clock_event_del,
+	.start		= task_clock_event_start,
+	.stop		= task_clock_event_stop,
 	.read		= task_clock_event_read,
 };
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index f3bbcd1c90c8..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -101,7 +101,7 @@ int perf_trace_init(struct perf_event *p_event)
 	return ret;
 }
 
-int perf_trace_enable(struct perf_event *p_event)
+int perf_trace_add(struct perf_event *p_event, int flags)
 {
 	struct ftrace_event_call *tp_event = p_event->tp_event;
 	struct hlist_head __percpu *pcpu_list;
@@ -111,13 +111,16 @@ int perf_trace_enable(struct perf_event *p_event)
 	if (WARN_ON_ONCE(!pcpu_list))
 		return -EINVAL;
 
+	if (!(flags & PERF_EF_START))
+		p_event->hw.state = PERF_HES_STOPPED;
+
 	list = this_cpu_ptr(pcpu_list);
 	hlist_add_head_rcu(&p_event->hlist_entry, list);
 
 	return 0;
 }
 
-void perf_trace_disable(struct perf_event *p_event)
+void perf_trace_del(struct perf_event *p_event, int flags)
 {
 	hlist_del_rcu(&p_event->hlist_entry);
 }
-- 
cgit v1.2.3


From 15ac9a395a753cb28c674e7ea80386ffdff21785 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Sep 2010 15:51:45 +0200
Subject: perf: Remove the sysfs bits

Neither the overcommit nor the reservation sysfs parameter were
actually working, remove them as they'll only get in the way.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/perf_event.c   |   3 +-
 arch/arm/kernel/perf_event.c     |   9 +--
 arch/sparc/kernel/perf_event.c   |   9 +--
 arch/x86/kernel/cpu/perf_event.c |   1 -
 include/linux/perf_event.h       |   6 --
 kernel/perf_event.c              | 124 ---------------------------------------
 6 files changed, 5 insertions(+), 147 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 380ef02d557a..9bb8c024080c 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -808,7 +808,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 
 	/* la_ptr is the counter that overflowed. */
-	if (unlikely(la_ptr >= perf_max_events)) {
+	if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
 		/* This should never occur! */
 		irq_err_count++;
 		pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -879,7 +879,6 @@ void __init init_hw_perf_events(void)
 
 	/* And set up PMU specification */
 	alpha_pmu = &ev67_pmu;
-	perf_max_events = alpha_pmu->num_pmcs;
 
 	perf_pmu_register(&pmu);
 }
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 448cfa6b3ef0..45d6a35217c1 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -534,7 +534,7 @@ static int armpmu_event_init(struct perf_event *event)
 	event->destroy = hw_perf_event_destroy;
 
 	if (!atomic_inc_not_zero(&active_events)) {
-		if (atomic_read(&active_events) > perf_max_events) {
+		if (atomic_read(&active_events) > armpmu.num_events) {
 			atomic_dec(&active_events);
 			return -ENOSPC;
 		}
@@ -2974,14 +2974,12 @@ init_hw_perf_events(void)
 			armpmu = &armv6pmu;
 			memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
 					sizeof(armv6_perf_cache_map));
-			perf_max_events	= armv6pmu.num_events;
 			break;
 		case 0xB020:	/* ARM11mpcore */
 			armpmu = &armv6mpcore_pmu;
 			memcpy(armpmu_perf_cache_map,
 			       armv6mpcore_perf_cache_map,
 			       sizeof(armv6mpcore_perf_cache_map));
-			perf_max_events = armv6mpcore_pmu.num_events;
 			break;
 		case 0xC080:	/* Cortex-A8 */
 			armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2993,7 +2991,6 @@ init_hw_perf_events(void)
 			/* Reset PMNC and read the nb of CNTx counters
 			    supported */
 			armv7pmu.num_events = armv7_reset_read_pmnc();
-			perf_max_events = armv7pmu.num_events;
 			break;
 		case 0xC090:	/* Cortex-A9 */
 			armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -3005,7 +3002,6 @@ init_hw_perf_events(void)
 			/* Reset PMNC and read the nb of CNTx counters
 			    supported */
 			armv7pmu.num_events = armv7_reset_read_pmnc();
-			perf_max_events = armv7pmu.num_events;
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -3016,13 +3012,11 @@ init_hw_perf_events(void)
 			armpmu = &xscale1pmu;
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 					sizeof(xscale_perf_cache_map));
-			perf_max_events	= xscale1pmu.num_events;
 			break;
 		case 2:
 			armpmu = &xscale2pmu;
 			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
 					sizeof(xscale_perf_cache_map));
-			perf_max_events	= xscale2pmu.num_events;
 			break;
 		}
 	}
@@ -3032,7 +3026,6 @@ init_hw_perf_events(void)
 				arm_pmu_names[armpmu->id], armpmu->num_events);
 	} else {
 		pr_info("no hardware support available\n");
-		perf_max_events = -1;
 	}
 
 	perf_pmu_register(&pmu);
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 516be2314b54..f9a706759364 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -897,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
 	if (!n_ev)
 		return 0;
 
-	if (n_ev > perf_max_events)
+	if (n_ev > MAX_HWEVENTS)
 		return -1;
 
 	msk0 = perf_event_get_msk(events[0]);
@@ -1014,7 +1014,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	perf_pmu_disable(event->pmu);
 
 	n0 = cpuc->n_events;
-	if (n0 >= perf_max_events)
+	if (n0 >= MAX_HWEVENTS)
 		goto out;
 
 	cpuc->event[n0] = event;
@@ -1097,7 +1097,7 @@ static int sparc_pmu_event_init(struct perf_event *event)
 	n = 0;
 	if (event->group_leader != event) {
 		n = collect_events(event->group_leader,
-				   perf_max_events - 1,
+				   MAX_HWEVENTS - 1,
 				   evts, events, current_idx_dmy);
 		if (n < 0)
 			return -EINVAL;
@@ -1309,9 +1309,6 @@ void __init init_hw_perf_events(void)
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-	/* All sparc64 PMUs currently have 2 events.  */
-	perf_max_events = 2;
-
 	perf_pmu_register(&pmu);
 	register_die_notifier(&perf_event_nmi_notifier);
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dd6fec710677..0fb17050360f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1396,7 +1396,6 @@ void __init init_hw_perf_events(void)
 		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
 	}
 	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-	perf_max_events = x86_pmu.num_counters;
 
 	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 402073c61669..b22176d3ebdf 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -860,7 +860,6 @@ struct perf_cpu_context {
 	struct perf_event_context	ctx;
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
-	int				max_pertask;
 	int				exclusive;
 	struct swevent_hlist		*swevent_hlist;
 	struct mutex			hlist_mutex;
@@ -883,11 +882,6 @@ struct perf_output_handle {
 
 #ifdef CONFIG_PERF_EVENTS
 
-/*
- * Set by architecture code:
- */
-extern int perf_max_events;
-
 extern int perf_pmu_register(struct pmu *pmu);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3bace4fd0355..8462e69409ae 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -39,10 +39,6 @@
  */
 static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
 
-int perf_max_events __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
 static atomic_t nr_events __read_mostly;
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -66,11 +62,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
 
 static atomic64_t perf_event_id;
 
-/*
- * Lock for (sysadmin-configurable) event reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
 void __weak perf_event_print_debug(void)	{ }
 
 void perf_pmu_disable(struct pmu *pmu)
@@ -480,16 +471,6 @@ static void __perf_event_remove_from_context(void *info)
 
 	list_del_event(event, ctx);
 
-	if (!ctx->task) {
-		/*
-		 * Allow more per task events with respect to the
-		 * reservation:
-		 */
-		cpuctx->max_pertask =
-			min(perf_max_events - ctx->nr_events,
-			    perf_max_events - perf_reserved_percpu);
-	}
-
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -823,9 +804,6 @@ static void __perf_install_in_context(void *info)
 		}
 	}
 
-	if (!err && !ctx->task && cpuctx->max_pertask)
-		cpuctx->max_pertask--;
-
 unlock:
 	raw_spin_unlock(&ctx->lock);
 }
@@ -5930,10 +5908,6 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 
-	spin_lock(&perf_resource_lock);
-	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
-	spin_unlock(&perf_resource_lock);
-
 	mutex_lock(&cpuctx->hlist_mutex);
 	if (cpuctx->hlist_refcount > 0) {
 		struct swevent_hlist *hlist;
@@ -6008,101 +5982,3 @@ void __init perf_event_init(void)
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
 }
-
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
-					struct sysdev_class_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%d\n", perf_reserved_percpu);
-}
-
-static ssize_t
-perf_set_reserve_percpu(struct sysdev_class *class,
-			struct sysdev_class_attribute *attr,
-			const char *buf,
-			size_t count)
-{
-	struct perf_cpu_context *cpuctx;
-	unsigned long val;
-	int err, cpu, mpt;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > perf_max_events)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_reserved_percpu = val;
-	for_each_online_cpu(cpu) {
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		raw_spin_lock_irq(&cpuctx->ctx.lock);
-		mpt = min(perf_max_events - cpuctx->ctx.nr_events,
-			  perf_max_events - perf_reserved_percpu);
-		cpuctx->max_pertask = mpt;
-		raw_spin_unlock_irq(&cpuctx->ctx.lock);
-	}
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static ssize_t perf_show_overcommit(struct sysdev_class *class,
-				    struct sysdev_class_attribute *attr,
-				    char *buf)
-{
-	return sprintf(buf, "%d\n", perf_overcommit);
-}
-
-static ssize_t
-perf_set_overcommit(struct sysdev_class *class,
-		    struct sysdev_class_attribute *attr,
-		    const char *buf, size_t count)
-{
-	unsigned long val;
-	int err;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > 1)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_overcommit = val;
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static SYSDEV_CLASS_ATTR(
-				reserve_percpu,
-				0644,
-				perf_show_reserve_percpu,
-				perf_set_reserve_percpu
-			);
-
-static SYSDEV_CLASS_ATTR(
-				overcommit,
-				0644,
-				perf_show_overcommit,
-				perf_set_overcommit
-			);
-
-static struct attribute *perfclass_attrs[] = {
-	&attr_reserve_percpu.attr,
-	&attr_overcommit.attr,
-	NULL
-};
-
-static struct attribute_group perfclass_attr_group = {
-	.attrs			= perfclass_attrs,
-	.name			= "perf_events",
-};
-
-static int __init perf_event_sysfs_init(void)
-{
-	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
-				  &perfclass_attr_group);
-}
-device_initcall(perf_event_sysfs_init);
-- 
cgit v1.2.3


From b28ab83c595e767f2028276b7398d17f2253cec0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Sep 2010 14:48:15 +0200
Subject: perf: Remove the swevent hash-table from the cpu context

Separate the swevent hash-table from the cpu_context bits in
preparation for per pmu cpu contexts.

This keeps the swevent hash a global entity.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   6 ---
 kernel/perf_event.c        | 104 +++++++++++++++++++++++++--------------------
 2 files changed, 58 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b22176d3ebdf..4ab4f0ca09a1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -861,12 +861,6 @@ struct perf_cpu_context {
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				exclusive;
-	struct swevent_hlist		*swevent_hlist;
-	struct mutex			hlist_mutex;
-	int				hlist_refcount;
-
-	/* Recursion avoidance in each contexts */
-	int				recursion[PERF_NR_CONTEXTS];
 };
 
 struct perf_output_handle {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a3c86a8335c4..2c47ed6c4f26 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4154,6 +4154,17 @@ int perf_event_overflow(struct perf_event *event, int nmi,
  * Generic software event infrastructure
  */
 
+struct swevent_htable {
+	struct swevent_hlist		*swevent_hlist;
+	struct mutex			hlist_mutex;
+	int				hlist_refcount;
+
+	/* Recursion avoidance in each contexts */
+	int				recursion[PERF_NR_CONTEXTS];
+};
+
+static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
+
 /*
  * We directly increment event->count and keep a second value in
  * event->hw.period_left to count intervals. This period event
@@ -4286,11 +4297,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
 
 /* For the read side: events when they trigger */
 static inline struct hlist_head *
-find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
+find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
 {
 	struct swevent_hlist *hlist;
 
-	hlist = rcu_dereference(ctx->swevent_hlist);
+	hlist = rcu_dereference(swhash->swevent_hlist);
 	if (!hlist)
 		return NULL;
 
@@ -4299,7 +4310,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
 
 /* For the event head insertion and removal in the hlist */
 static inline struct hlist_head *
-find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
+find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
 {
 	struct swevent_hlist *hlist;
 	u32 event_id = event->attr.config;
@@ -4310,7 +4321,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
 	 * and release. Which makes the protected version suitable here.
 	 * The context lock guarantees that.
 	 */
-	hlist = rcu_dereference_protected(ctx->swevent_hlist,
+	hlist = rcu_dereference_protected(swhash->swevent_hlist,
 					  lockdep_is_held(&event->ctx->lock));
 	if (!hlist)
 		return NULL;
@@ -4323,17 +4334,13 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 				    struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
-	struct perf_cpu_context *cpuctx;
+	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
 	struct perf_event *event;
 	struct hlist_node *node;
 	struct hlist_head *head;
 
-	cpuctx = &__get_cpu_var(perf_cpu_context);
-
 	rcu_read_lock();
-
-	head = find_swevent_head_rcu(cpuctx, type, event_id);
-
+	head = find_swevent_head_rcu(swhash, type, event_id);
 	if (!head)
 		goto end;
 
@@ -4347,17 +4354,17 @@ end:
 
 int perf_swevent_get_recursion_context(void)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
 
-	return get_recursion_context(cpuctx->recursion);
+	return get_recursion_context(swhash->recursion);
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
 void inline perf_swevent_put_recursion_context(int rctx)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
 
-	put_recursion_context(cpuctx->recursion, rctx);
+	put_recursion_context(swhash->recursion, rctx);
 }
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4385,12 +4392,10 @@ static void perf_swevent_read(struct perf_event *event)
 
 static int perf_swevent_add(struct perf_event *event, int flags)
 {
+	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
 	struct hw_perf_event *hwc = &event->hw;
-	struct perf_cpu_context *cpuctx;
 	struct hlist_head *head;
 
-	cpuctx = &__get_cpu_var(perf_cpu_context);
-
 	if (hwc->sample_period) {
 		hwc->last_period = hwc->sample_period;
 		perf_swevent_set_period(event);
@@ -4398,7 +4403,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
 
 	hwc->state = !(flags & PERF_EF_START);
 
-	head = find_swevent_head(cpuctx, event);
+	head = find_swevent_head(swhash, event);
 	if (WARN_ON_ONCE(!head))
 		return -EINVAL;
 
@@ -4424,10 +4429,10 @@ static void perf_swevent_stop(struct perf_event *event, int flags)
 
 /* Deref the hlist from the update side */
 static inline struct swevent_hlist *
-swevent_hlist_deref(struct perf_cpu_context *cpuctx)
+swevent_hlist_deref(struct swevent_htable *swhash)
 {
-	return rcu_dereference_protected(cpuctx->swevent_hlist,
-					 lockdep_is_held(&cpuctx->hlist_mutex));
+	return rcu_dereference_protected(swhash->swevent_hlist,
+					 lockdep_is_held(&swhash->hlist_mutex));
 }
 
 static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
@@ -4438,27 +4443,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
 	kfree(hlist);
 }
 
-static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
+static void swevent_hlist_release(struct swevent_htable *swhash)
 {
-	struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx);
+	struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
 
 	if (!hlist)
 		return;
 
-	rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
+	rcu_assign_pointer(swhash->swevent_hlist, NULL);
 	call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
 }
 
 static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
-	mutex_lock(&cpuctx->hlist_mutex);
+	mutex_lock(&swhash->hlist_mutex);
 
-	if (!--cpuctx->hlist_refcount)
-		swevent_hlist_release(cpuctx);
+	if (!--swhash->hlist_refcount)
+		swevent_hlist_release(swhash);
 
-	mutex_unlock(&cpuctx->hlist_mutex);
+	mutex_unlock(&swhash->hlist_mutex);
 }
 
 static void swevent_hlist_put(struct perf_event *event)
@@ -4476,12 +4481,12 @@ static void swevent_hlist_put(struct perf_event *event)
 
 static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 	int err = 0;
 
-	mutex_lock(&cpuctx->hlist_mutex);
+	mutex_lock(&swhash->hlist_mutex);
 
-	if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) {
+	if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
 		struct swevent_hlist *hlist;
 
 		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -4489,11 +4494,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
 			err = -ENOMEM;
 			goto exit;
 		}
-		rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+		rcu_assign_pointer(swhash->swevent_hlist, hlist);
 	}
-	cpuctx->hlist_refcount++;
+	swhash->hlist_refcount++;
 exit:
-	mutex_unlock(&cpuctx->hlist_mutex);
+	mutex_unlock(&swhash->hlist_mutex);
 
 	return err;
 }
@@ -5889,12 +5894,15 @@ int perf_event_init_task(struct task_struct *child)
 
 static void __init perf_event_init_all_cpus(void)
 {
-	int cpu;
 	struct perf_cpu_context *cpuctx;
+	struct swevent_htable *swhash;
+	int cpu;
 
 	for_each_possible_cpu(cpu) {
+		swhash = &per_cpu(swevent_htable, cpu);
+		mutex_init(&swhash->hlist_mutex);
+
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		mutex_init(&cpuctx->hlist_mutex);
 		__perf_event_init_context(&cpuctx->ctx, NULL);
 	}
 }
@@ -5902,18 +5910,21 @@ static void __init perf_event_init_all_cpus(void)
 static void __cpuinit perf_event_init_cpu(int cpu)
 {
 	struct perf_cpu_context *cpuctx;
+	struct swevent_htable *swhash;
 
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 
-	mutex_lock(&cpuctx->hlist_mutex);
-	if (cpuctx->hlist_refcount > 0) {
+	swhash = &per_cpu(swevent_htable, cpu);
+
+	mutex_lock(&swhash->hlist_mutex);
+	if (swhash->hlist_refcount > 0) {
 		struct swevent_hlist *hlist;
 
-		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
-		WARN_ON_ONCE(!hlist);
-		rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+		hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
+		WARN_ON(!hlist);
+		rcu_assign_pointer(swhash->swevent_hlist, hlist);
 	}
-	mutex_unlock(&cpuctx->hlist_mutex);
+	mutex_unlock(&swhash->hlist_mutex);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -5931,11 +5942,12 @@ static void __perf_event_exit_cpu(void *info)
 static void perf_event_exit_cpu(int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 	struct perf_event_context *ctx = &cpuctx->ctx;
 
-	mutex_lock(&cpuctx->hlist_mutex);
-	swevent_hlist_release(cpuctx);
-	mutex_unlock(&cpuctx->hlist_mutex);
+	mutex_lock(&swhash->hlist_mutex);
+	swevent_hlist_release(swhash);
+	mutex_unlock(&swhash->hlist_mutex);
 
 	mutex_lock(&ctx->mutex);
 	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
-- 
cgit v1.2.3


From b5ab4cd563e7ab49b27957704112a8ecade54e1f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Sep 2010 16:32:21 +0200
Subject: perf: Per cpu-context rotation timer

Give each cpu-context its own timer so that it is a self contained
entity, this eases the way for per-pmu-per-cpu contexts as well as
provides the basic infrastructure to allow different rotation
times per pmu.

Things to look at:
 - folding the tick and these TICK_NSEC timers
 - separate task context rotation

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  5 ++-
 kernel/perf_event.c        | 80 ++++++++++++++++++++++++++++++++++++----------
 kernel/sched.c             |  2 --
 3 files changed, 65 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4ab4f0ca09a1..fa04537df55b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -861,6 +861,8 @@ struct perf_cpu_context {
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				exclusive;
+	u64				timer_interval;
+	struct hrtimer			timer;
 };
 
 struct perf_output_handle {
@@ -881,7 +883,6 @@ extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
-extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1067,8 +1068,6 @@ perf_event_task_sched_in(struct task_struct *task)			{ }
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next)			{ }
-static inline void
-perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2c47ed6c4f26..d75e4c8727f9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -78,6 +78,25 @@ void perf_pmu_enable(struct pmu *pmu)
 		pmu->pmu_enable(pmu);
 }
 
+static void perf_pmu_rotate_start(void)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	if (hrtimer_active(&cpuctx->timer))
+		return;
+
+	__hrtimer_start_range_ns(&cpuctx->timer,
+			ns_to_ktime(cpuctx->timer_interval), 0,
+			HRTIMER_MODE_REL_PINNED, 0);
+}
+
+static void perf_pmu_rotate_stop(void)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	hrtimer_cancel(&cpuctx->timer);
+}
+
 static void get_ctx(struct perf_event_context *ctx)
 {
 	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
@@ -281,6 +300,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	}
 
 	list_add_rcu(&event->event_entry, &ctx->event_list);
+	if (!ctx->nr_events)
+		perf_pmu_rotate_start();
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
@@ -1383,6 +1404,12 @@ void perf_event_task_sched_in(struct task_struct *task)
 	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
 
 	cpuctx->task_ctx = ctx;
+
+	/*
+	 * Since these rotations are per-cpu, we need to ensure the
+	 * cpu-context we got scheduled on is actually rotating.
+	 */
+	perf_pmu_rotate_start();
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1487,7 +1514,7 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	}
 }
 
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
+static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
@@ -1524,7 +1551,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 		hwc->freq_count_stamp = now;
 
 		if (delta > 0)
-			perf_adjust_period(event, TICK_NSEC, delta);
+			perf_adjust_period(event, period, delta);
 	}
 	raw_spin_unlock(&ctx->lock);
 }
@@ -1542,30 +1569,39 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	raw_spin_unlock(&ctx->lock);
 }
 
-void perf_event_task_tick(struct task_struct *curr)
+/*
+ * Cannot race with ->pmu_rotate_start() because this is ran from hardirq
+ * context, and ->pmu_rotate_start() is called with irqs disabled (both are
+ * cpu affine, so there are no SMP races).
+ */
+static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
 {
+	enum hrtimer_restart restart = HRTIMER_NORESTART;
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	int rotate = 0;
 
-	if (!atomic_read(&nr_events))
-		return;
+	cpuctx = container_of(timer, struct perf_cpu_context, timer);
 
-	cpuctx = &__get_cpu_var(perf_cpu_context);
-	if (cpuctx->ctx.nr_events &&
-	    cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
-		rotate = 1;
+	if (cpuctx->ctx.nr_events) {
+		restart = HRTIMER_RESTART;
+		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
+			rotate = 1;
+	}
 
-	ctx = curr->perf_event_ctxp;
-	if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
-		rotate = 1;
+	ctx = current->perf_event_ctxp;
+	if (ctx && ctx->nr_events) {
+		restart = HRTIMER_RESTART;
+		if (ctx->nr_events != ctx->nr_active)
+			rotate = 1;
+	}
 
-	perf_ctx_adjust_freq(&cpuctx->ctx);
+	perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval);
 	if (ctx)
-		perf_ctx_adjust_freq(ctx);
+		perf_ctx_adjust_freq(ctx, cpuctx->timer_interval);
 
 	if (!rotate)
-		return;
+		goto done;
 
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
@@ -1577,7 +1613,12 @@ void perf_event_task_tick(struct task_struct *curr)
 
 	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
-		task_ctx_sched_in(curr, EVENT_FLEXIBLE);
+		task_ctx_sched_in(current, EVENT_FLEXIBLE);
+
+done:
+	hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval));
+
+	return restart;
 }
 
 static int event_enable_on_exec(struct perf_event *event,
@@ -4786,7 +4827,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 		}
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
+				HRTIMER_MODE_REL_PINNED, 0);
 	}
 }
 
@@ -5904,6 +5945,9 @@ static void __init perf_event_init_all_cpus(void)
 
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx, NULL);
+		cpuctx->timer_interval = TICK_NSEC;
+		hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cpuctx->timer.function = perf_event_context_tick;
 	}
 }
 
@@ -5934,6 +5978,8 @@ static void __perf_event_exit_cpu(void *info)
 	struct perf_event_context *ctx = &cpuctx->ctx;
 	struct perf_event *event, *tmp;
 
+	perf_pmu_rotate_stop();
+
 	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
 		__perf_event_remove_from_context(event);
 	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
diff --git a/kernel/sched.c b/kernel/sched.c
index 09b574e7f4df..66a02ba83c01 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3578,8 +3578,6 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	raw_spin_unlock(&rq->lock);
 
-	perf_event_task_tick(curr);
-
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
-- 
cgit v1.2.3


From 108b02cfce04ee90b0a07ee0b104baffd39f5934 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Sep 2010 14:32:03 +0200
Subject: perf: Per-pmu-per-cpu contexts

Allocate per-cpu contexts per pmu.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   4 +-
 kernel/perf_event.c        | 178 ++++++++++++++++++++++++++++-----------------
 2 files changed, 113 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fa04537df55b..22155ef3b362 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -570,7 +570,8 @@ struct perf_event;
 struct pmu {
 	struct list_head		entry;
 
-	int				*pmu_disable_count;
+	int * __percpu			pmu_disable_count;
+	struct perf_cpu_context * __percpu pmu_cpu_context;
 
 	/*
 	 * Fully disable/enable this PMU, can be used to protect from the PMI
@@ -808,6 +809,7 @@ struct perf_event {
  * Used as a container for task events and CPU events as well:
  */
 struct perf_event_context {
+	struct pmu			*pmu;
 	/*
 	 * Protect the states of the events in the list,
 	 * nr_active, and the list:
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d75e4c8727f9..8ca6e690ffe3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -34,16 +34,15 @@
 
 #include <asm/irq_regs.h>
 
-/*
- * Each CPU has a list of per CPU events:
- */
-static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
-
 static atomic_t nr_events __read_mostly;
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 
+static LIST_HEAD(pmus);
+static DEFINE_MUTEX(pmus_lock);
+static struct srcu_struct pmus_srcu;
+
 /*
  * perf event paranoia level:
  *  -1 - not paranoid at all
@@ -78,9 +77,9 @@ void perf_pmu_enable(struct pmu *pmu)
 		pmu->pmu_enable(pmu);
 }
 
-static void perf_pmu_rotate_start(void)
+static void perf_pmu_rotate_start(struct pmu *pmu)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
 	if (hrtimer_active(&cpuctx->timer))
 		return;
@@ -90,9 +89,9 @@ static void perf_pmu_rotate_start(void)
 			HRTIMER_MODE_REL_PINNED, 0);
 }
 
-static void perf_pmu_rotate_stop(void)
+static void perf_pmu_rotate_stop(struct pmu *pmu)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
 	hrtimer_cancel(&cpuctx->timer);
 }
@@ -301,7 +300,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 
 	list_add_rcu(&event->event_entry, &ctx->event_list);
 	if (!ctx->nr_events)
-		perf_pmu_rotate_start();
+		perf_pmu_rotate_start(ctx->pmu);
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
@@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event,
 		cpuctx->exclusive = 0;
 }
 
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
 /*
  * Cross CPU call to remove a performance event
  *
@@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event,
  */
 static void __perf_event_remove_from_context(void *info)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -556,8 +561,8 @@ retry:
 static void __perf_event_disable(void *info)
 {
 	struct perf_event *event = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
 	/*
 	 * If this is a per-task event, need to check whether this
@@ -765,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event,
  */
 static void __perf_install_in_context(void *info)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	int err;
 
 	/*
@@ -912,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event,
 static void __perf_event_enable(void *info)
 {
 	struct perf_event *event = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	int err;
 
 	/*
@@ -1188,15 +1193,19 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
 void perf_event_task_sched_out(struct task_struct *task,
 				 struct task_struct *next)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
 	struct perf_event_context *next_ctx;
 	struct perf_event_context *parent;
+	struct perf_cpu_context *cpuctx;
 	int do_switch = 1;
 
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
-	if (likely(!ctx || !cpuctx->task_ctx))
+	if (likely(!ctx))
+		return;
+
+	cpuctx = __get_cpu_context(ctx);
+	if (!cpuctx->task_ctx)
 		return;
 
 	rcu_read_lock();
@@ -1242,7 +1251,7 @@ void perf_event_task_sched_out(struct task_struct *task,
 static void task_ctx_sched_out(struct perf_event_context *ctx,
 			       enum event_type_t event_type)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
 	if (!cpuctx->task_ctx)
 		return;
@@ -1360,8 +1369,8 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 static void task_ctx_sched_in(struct task_struct *task,
 			      enum event_type_t event_type)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
 	if (likely(!ctx))
 		return;
@@ -1383,12 +1392,13 @@ static void task_ctx_sched_in(struct task_struct *task,
  */
 void perf_event_task_sched_in(struct task_struct *task)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_cpu_context *cpuctx;
 
 	if (likely(!ctx))
 		return;
 
+	cpuctx = __get_cpu_context(ctx);
 	if (cpuctx->task_ctx == ctx)
 		return;
 
@@ -1409,7 +1419,7 @@ void perf_event_task_sched_in(struct task_struct *task)
 	 * Since these rotations are per-cpu, we need to ensure the
 	 * cpu-context we got scheduled on is actually rotating.
 	 */
-	perf_pmu_rotate_start();
+	perf_pmu_rotate_start(ctx->pmu);
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1687,9 +1697,9 @@ out:
  */
 static void __perf_event_read(void *info)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -1962,7 +1972,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
 	ctx->task = task;
 }
 
-static struct perf_event_context *find_get_context(pid_t pid, int cpu)
+static struct perf_event_context *
+find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 {
 	struct perf_event_context *ctx;
 	struct perf_cpu_context *cpuctx;
@@ -1986,7 +1997,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 		if (!cpu_online(cpu))
 			return ERR_PTR(-ENODEV);
 
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 		get_ctx(ctx);
 
@@ -2030,6 +2041,7 @@ retry:
 		if (!ctx)
 			goto errout;
 		__perf_event_init_context(ctx, task);
+		ctx->pmu = pmu;
 		get_ctx(ctx);
 		if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
 			/*
@@ -3745,18 +3757,20 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
 
 static void perf_event_task_event(struct perf_task_event *task_event)
 {
-	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx = task_event->task_ctx;
+	struct perf_cpu_context *cpuctx;
+	struct pmu *pmu;
 
-	rcu_read_lock();
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_event_task_ctx(&cpuctx->ctx, task_event);
+	rcu_read_lock_sched();
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+		perf_event_task_ctx(&cpuctx->ctx, task_event);
+	}
 	if (!ctx)
 		ctx = rcu_dereference(current->perf_event_ctxp);
 	if (ctx)
 		perf_event_task_ctx(ctx, task_event);
-	put_cpu_var(perf_cpu_context);
-	rcu_read_unlock();
+	rcu_read_unlock_sched();
 }
 
 static void perf_event_task(struct task_struct *task,
@@ -3861,6 +3875,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	unsigned int size;
+	struct pmu *pmu;
 	char comm[TASK_COMM_LEN];
 
 	memset(comm, 0, sizeof(comm));
@@ -3872,14 +3887,15 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
-	rcu_read_lock();
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+	rcu_read_lock_sched();
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+		perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+	}
 	ctx = rcu_dereference(current->perf_event_ctxp);
 	if (ctx)
 		perf_event_comm_ctx(ctx, comm_event);
-	put_cpu_var(perf_cpu_context);
-	rcu_read_unlock();
+	rcu_read_unlock_sched();
 }
 
 void perf_event_comm(struct task_struct *task)
@@ -3989,6 +4005,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	char tmp[16];
 	char *buf = NULL;
 	const char *name;
+	struct pmu *pmu;
 
 	memset(tmp, 0, sizeof(tmp));
 
@@ -4040,14 +4057,16 @@ got_name:
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
-	rcu_read_lock();
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
+	rcu_read_lock_sched();
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
+					vma->vm_flags & VM_EXEC);
+	}
 	ctx = rcu_dereference(current->perf_event_ctxp);
 	if (ctx)
 		perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
-	put_cpu_var(perf_cpu_context);
-	rcu_read_unlock();
+	rcu_read_unlock_sched();
 
 	kfree(buf);
 }
@@ -4982,10 +5001,6 @@ static struct pmu perf_task_clock = {
 	.read		= task_clock_event_read,
 };
 
-static LIST_HEAD(pmus);
-static DEFINE_MUTEX(pmus_lock);
-static struct srcu_struct pmus_srcu;
-
 static void perf_pmu_nop_void(struct pmu *pmu)
 {
 }
@@ -5013,7 +5028,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
 
 int perf_pmu_register(struct pmu *pmu)
 {
-	int ret;
+	int cpu, ret;
 
 	mutex_lock(&pmus_lock);
 	ret = -ENOMEM;
@@ -5021,6 +5036,21 @@ int perf_pmu_register(struct pmu *pmu)
 	if (!pmu->pmu_disable_count)
 		goto unlock;
 
+	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
+	if (!pmu->pmu_cpu_context)
+		goto free_pdc;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_cpu_context *cpuctx;
+
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		__perf_event_init_context(&cpuctx->ctx, NULL);
+		cpuctx->ctx.pmu = pmu;
+		cpuctx->timer_interval = TICK_NSEC;
+		hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cpuctx->timer.function = perf_event_context_tick;
+	}
+
 	if (!pmu->start_txn) {
 		if (pmu->pmu_enable) {
 			/*
@@ -5049,6 +5079,10 @@ unlock:
 	mutex_unlock(&pmus_lock);
 
 	return ret;
+
+free_pdc:
+	free_percpu(pmu->pmu_disable_count);
+	goto unlock;
 }
 
 void perf_pmu_unregister(struct pmu *pmu)
@@ -5057,9 +5091,14 @@ void perf_pmu_unregister(struct pmu *pmu)
 	list_del_rcu(&pmu->entry);
 	mutex_unlock(&pmus_lock);
 
+	/*
+	 * We use the pmu list either under SRCU or preempt_disable,
+	 * synchronize_srcu() implies synchronize_sched() so we're good.
+	 */
 	synchronize_srcu(&pmus_srcu);
 
 	free_percpu(pmu->pmu_disable_count);
+	free_percpu(pmu->pmu_cpu_context);
 }
 
 struct pmu *perf_init_event(struct perf_event *event)
@@ -5374,7 +5413,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	/*
 	 * Get the target context (task or percpu):
 	 */
-	ctx = find_get_context(pid, cpu);
+	ctx = find_get_context(event->pmu, pid, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_alloc;
@@ -5489,7 +5528,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err;
 	}
 
-	ctx = find_get_context(pid, cpu);
+	ctx = find_get_context(event->pmu, pid, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_free;
@@ -5833,6 +5872,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 			return -ENOMEM;
 
 		__perf_event_init_context(child_ctx, child);
+		child_ctx->pmu = event->pmu;
 		child->perf_event_ctxp = child_ctx;
 		get_task_struct(child);
 	}
@@ -5935,30 +5975,18 @@ int perf_event_init_task(struct task_struct *child)
 
 static void __init perf_event_init_all_cpus(void)
 {
-	struct perf_cpu_context *cpuctx;
 	struct swevent_htable *swhash;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
-
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		__perf_event_init_context(&cpuctx->ctx, NULL);
-		cpuctx->timer_interval = TICK_NSEC;
-		hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		cpuctx->timer.function = perf_event_context_tick;
 	}
 }
 
 static void __cpuinit perf_event_init_cpu(int cpu)
 {
-	struct perf_cpu_context *cpuctx;
-	struct swevent_htable *swhash;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-
-	swhash = &per_cpu(swevent_htable, cpu);
+	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
 	mutex_lock(&swhash->hlist_mutex);
 	if (swhash->hlist_refcount > 0) {
@@ -5972,32 +6000,46 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void __perf_event_exit_cpu(void *info)
+static void __perf_event_exit_context(void *__info)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_event_context *ctx = &cpuctx->ctx;
+	struct perf_event_context *ctx = __info;
 	struct perf_event *event, *tmp;
 
-	perf_pmu_rotate_stop();
+	perf_pmu_rotate_stop(ctx->pmu);
 
 	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
 		__perf_event_remove_from_context(event);
 	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
 		__perf_event_remove_from_context(event);
 }
+
+static void perf_event_exit_cpu_context(int cpu)
+{
+	struct perf_event_context *ctx;
+	struct pmu *pmu;
+	int idx;
+
+	idx = srcu_read_lock(&pmus_srcu);
+	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		ctx = &this_cpu_ptr(pmu->pmu_cpu_context)->ctx;
+
+		mutex_lock(&ctx->mutex);
+		smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+		mutex_unlock(&ctx->mutex);
+	}
+	srcu_read_unlock(&pmus_srcu, idx);
+
+}
+
 static void perf_event_exit_cpu(int cpu)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-	struct perf_event_context *ctx = &cpuctx->ctx;
 
 	mutex_lock(&swhash->hlist_mutex);
 	swevent_hlist_release(swhash);
 	mutex_unlock(&swhash->hlist_mutex);
 
-	mutex_lock(&ctx->mutex);
-	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
-	mutex_unlock(&ctx->mutex);
+	perf_event_exit_cpu_context(cpu);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }
-- 
cgit v1.2.3


From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Sep 2010 16:50:03 +0200
Subject: perf: Multiple task contexts

Provide the infrastructure for multiple task contexts.

A more flexible approach would have resulted in more pointer chases
in the scheduling hot-paths. This approach has the limitation of a
static number of task contexts.

Since I expect most external PMUs to be system wide, or at least node
wide (as per the intel uncore unit) they won't actually need a task
context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   1 +
 include/linux/sched.h      |   8 +-
 kernel/perf_event.c        | 336 +++++++++++++++++++++++++++++++--------------
 3 files changed, 239 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 22155ef3b362..9ecfd856ce6e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -572,6 +572,7 @@ struct pmu {
 
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
+	int				task_ctx_nr;
 
 	/*
 	 * Fully disable/enable this PMU, can be used to protect from the PMI
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..89d6023c6f82 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,6 +1160,12 @@ struct sched_rt_entity {
 
 struct rcu_node;
 
+enum perf_event_task_context {
+	perf_invalid_context = -1,
+	perf_hw_context = 0,
+	perf_nr_task_contexts,
+};
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
@@ -1431,7 +1437,7 @@ struct task_struct {
 	struct futex_pi_state *pi_state_cache;
 #endif
 #ifdef CONFIG_PERF_EVENTS
-	struct perf_event_context *perf_event_ctxp;
+	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
 	struct mutex perf_event_mutex;
 	struct list_head perf_event_list;
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 13d98d756347..7223ea875861 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -148,13 +148,13 @@ static u64 primary_event_id(struct perf_event *event)
  * the context could get moved to another task.
  */
 static struct perf_event_context *
-perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
 {
 	struct perf_event_context *ctx;
 
 	rcu_read_lock();
 retry:
-	ctx = rcu_dereference(task->perf_event_ctxp);
+	ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
 	if (ctx) {
 		/*
 		 * If this context is a clone of another, it might
@@ -167,7 +167,7 @@ retry:
 		 * can't get swapped on us any more.
 		 */
 		raw_spin_lock_irqsave(&ctx->lock, *flags);
-		if (ctx != rcu_dereference(task->perf_event_ctxp)) {
+		if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
 			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
 			goto retry;
 		}
@@ -186,12 +186,13 @@ retry:
  * can't get swapped to another task.  This also increments its
  * reference count so that the context can't get freed.
  */
-static struct perf_event_context *perf_pin_task_context(struct task_struct *task)
+static struct perf_event_context *
+perf_pin_task_context(struct task_struct *task, int ctxn)
 {
 	struct perf_event_context *ctx;
 	unsigned long flags;
 
-	ctx = perf_lock_task_context(task, &flags);
+	ctx = perf_lock_task_context(task, ctxn, &flags);
 	if (ctx) {
 		++ctx->pin_count;
 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -1179,28 +1180,15 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
 	}
 }
 
-/*
- * Called from scheduler to remove the events of the current task,
- * with interrupts disabled.
- *
- * We stop each event and update the event value in event->count.
- *
- * This does not protect us against NMI, but disable()
- * sets the disabled bit in the control field of event _before_
- * accessing the event control register. If a NMI hits, then it will
- * not restart the event.
- */
-void perf_event_task_sched_out(struct task_struct *task,
-				 struct task_struct *next)
+void perf_event_context_sched_out(struct task_struct *task, int ctxn,
+				  struct task_struct *next)
 {
-	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
 	struct perf_event_context *next_ctx;
 	struct perf_event_context *parent;
 	struct perf_cpu_context *cpuctx;
 	int do_switch = 1;
 
-	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
-
 	if (likely(!ctx))
 		return;
 
@@ -1210,7 +1198,7 @@ void perf_event_task_sched_out(struct task_struct *task,
 
 	rcu_read_lock();
 	parent = rcu_dereference(ctx->parent_ctx);
-	next_ctx = next->perf_event_ctxp;
+	next_ctx = next->perf_event_ctxp[ctxn];
 	if (parent && next_ctx &&
 	    rcu_dereference(next_ctx->parent_ctx) == parent) {
 		/*
@@ -1229,8 +1217,8 @@ void perf_event_task_sched_out(struct task_struct *task,
 			 * XXX do we need a memory barrier of sorts
 			 * wrt to rcu_dereference() of perf_event_ctxp
 			 */
-			task->perf_event_ctxp = next_ctx;
-			next->perf_event_ctxp = ctx;
+			task->perf_event_ctxp[ctxn] = next_ctx;
+			next->perf_event_ctxp[ctxn] = ctx;
 			ctx->task = next;
 			next_ctx->task = task;
 			do_switch = 0;
@@ -1248,6 +1236,31 @@ void perf_event_task_sched_out(struct task_struct *task,
 	}
 }
 
+#define for_each_task_context_nr(ctxn)					\
+	for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
+
+/*
+ * Called from scheduler to remove the events of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each event and update the event value in event->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * not restart the event.
+ */
+void perf_event_task_sched_out(struct task_struct *task,
+			       struct task_struct *next)
+{
+	int ctxn;
+
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+
+	for_each_task_context_nr(ctxn)
+		perf_event_context_sched_out(task, ctxn, next);
+}
+
 static void task_ctx_sched_out(struct perf_event_context *ctx,
 			       enum event_type_t event_type)
 {
@@ -1366,38 +1379,23 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 	ctx_sched_in(ctx, cpuctx, event_type);
 }
 
-static void task_ctx_sched_in(struct task_struct *task,
+static void task_ctx_sched_in(struct perf_event_context *ctx,
 			      enum event_type_t event_type)
 {
-	struct perf_event_context *ctx = task->perf_event_ctxp;
-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_cpu_context *cpuctx;
 
-	if (likely(!ctx))
-		return;
+       	cpuctx = __get_cpu_context(ctx);
 	if (cpuctx->task_ctx == ctx)
 		return;
+
 	ctx_sched_in(ctx, cpuctx, event_type);
 	cpuctx->task_ctx = ctx;
 }
-/*
- * Called from scheduler to add the events of the current task
- * with interrupts disabled.
- *
- * We restore the event value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of event _before_
- * accessing the event control register. If a NMI hits, then it will
- * keep the event running.
- */
-void perf_event_task_sched_in(struct task_struct *task)
+
+void perf_event_context_sched_in(struct perf_event_context *ctx)
 {
-	struct perf_event_context *ctx = task->perf_event_ctxp;
 	struct perf_cpu_context *cpuctx;
 
-	if (likely(!ctx))
-		return;
-
 	cpuctx = __get_cpu_context(ctx);
 	if (cpuctx->task_ctx == ctx)
 		return;
@@ -1422,6 +1420,31 @@ void perf_event_task_sched_in(struct task_struct *task)
 	perf_pmu_rotate_start(ctx->pmu);
 }
 
+/*
+ * Called from scheduler to add the events of the current task
+ * with interrupts disabled.
+ *
+ * We restore the event value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * keep the event running.
+ */
+void perf_event_task_sched_in(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	for_each_task_context_nr(ctxn) {
+		ctx = task->perf_event_ctxp[ctxn];
+		if (likely(!ctx))
+			continue;
+
+		perf_event_context_sched_in(ctx);
+	}
+}
+
 #define MAX_INTERRUPTS (~0ULL)
 
 static void perf_log_throttle(struct perf_event *event, int enable);
@@ -1588,7 +1611,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
 {
 	enum hrtimer_restart restart = HRTIMER_NORESTART;
 	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
+	struct perf_event_context *ctx = NULL;
 	int rotate = 0;
 
 	cpuctx = container_of(timer, struct perf_cpu_context, timer);
@@ -1599,7 +1622,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
 			rotate = 1;
 	}
 
-	ctx = current->perf_event_ctxp;
+	ctx = cpuctx->task_ctx;
 	if (ctx && ctx->nr_events) {
 		restart = HRTIMER_RESTART;
 		if (ctx->nr_events != ctx->nr_active)
@@ -1623,7 +1646,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
 
 	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
-		task_ctx_sched_in(current, EVENT_FLEXIBLE);
+		task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
 
 done:
 	hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval));
@@ -1650,20 +1673,18 @@ static int event_enable_on_exec(struct perf_event *event,
  * Enable all of a task's events that have been marked enable-on-exec.
  * This expects task == current.
  */
-static void perf_event_enable_on_exec(struct task_struct *task)
+static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 {
-	struct perf_event_context *ctx;
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
 	int ret;
 
 	local_irq_save(flags);
-	ctx = task->perf_event_ctxp;
 	if (!ctx || !ctx->nr_events)
 		goto out;
 
-	__perf_event_task_sched_out(ctx);
+	task_ctx_sched_out(ctx, EVENT_ALL);
 
 	raw_spin_lock(&ctx->lock);
 
@@ -1687,7 +1708,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	raw_spin_unlock(&ctx->lock);
 
-	perf_event_task_sched_in(task);
+	perf_event_context_sched_in(ctx);
 out:
 	local_irq_restore(flags);
 }
@@ -1995,7 +2016,7 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 	struct perf_cpu_context *cpuctx;
 	struct task_struct *task;
 	unsigned long flags;
-	int err;
+	int ctxn, err;
 
 	if (pid == -1 && cpu != -1) {
 		/* Must be root to operate on a CPU event: */
@@ -2044,8 +2065,13 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 	if (!ptrace_may_access(task, PTRACE_MODE_READ))
 		goto errout;
 
+	err = -EINVAL;
+	ctxn = pmu->task_ctx_nr;
+	if (ctxn < 0)
+		goto errout;
+
 retry:
-	ctx = perf_lock_task_context(task, &flags);
+	ctx = perf_lock_task_context(task, ctxn, &flags);
 	if (ctx) {
 		unclone_ctx(ctx);
 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -2059,7 +2085,7 @@ retry:
 
 		get_ctx(ctx);
 
-		if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
+		if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
 			/*
 			 * We raced with some other task; use
 			 * the context they set.
@@ -3773,19 +3799,26 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
 
 static void perf_event_task_event(struct perf_task_event *task_event)
 {
-	struct perf_event_context *ctx = task_event->task_ctx;
 	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
 	struct pmu *pmu;
+	int ctxn;
 
 	rcu_read_lock_sched();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 		perf_event_task_ctx(&cpuctx->ctx, task_event);
+
+		ctx = task_event->task_ctx;
+		if (!ctx) {
+			ctxn = pmu->task_ctx_nr;
+			if (ctxn < 0)
+				continue;
+			ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		}
+		if (ctx)
+			perf_event_task_ctx(ctx, task_event);
 	}
-	if (!ctx)
-		ctx = rcu_dereference(current->perf_event_ctxp);
-	if (ctx)
-		perf_event_task_ctx(ctx, task_event);
 	rcu_read_unlock_sched();
 }
 
@@ -3890,9 +3923,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
+	char comm[TASK_COMM_LEN];
 	unsigned int size;
 	struct pmu *pmu;
-	char comm[TASK_COMM_LEN];
+	int ctxn;
 
 	memset(comm, 0, sizeof(comm));
 	strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -3907,19 +3941,31 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 		perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+
+		ctxn = pmu->task_ctx_nr;
+		if (ctxn < 0)
+			continue;
+
+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		if (ctx)
+			perf_event_comm_ctx(ctx, comm_event);
 	}
-	ctx = rcu_dereference(current->perf_event_ctxp);
-	if (ctx)
-		perf_event_comm_ctx(ctx, comm_event);
 	rcu_read_unlock_sched();
 }
 
 void perf_event_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
+	struct perf_event_context *ctx;
+	int ctxn;
 
-	if (task->perf_event_ctxp)
-		perf_event_enable_on_exec(task);
+	for_each_task_context_nr(ctxn) {
+		ctx = task->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctx);
+	}
 
 	if (!atomic_read(&nr_comm_events))
 		return;
@@ -4022,6 +4068,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	char *buf = NULL;
 	const char *name;
 	struct pmu *pmu;
+	int ctxn;
 
 	memset(tmp, 0, sizeof(tmp));
 
@@ -4078,10 +4125,17 @@ got_name:
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
 					vma->vm_flags & VM_EXEC);
+
+		ctxn = pmu->task_ctx_nr;
+		if (ctxn < 0)
+			continue;
+
+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		if (ctx) {
+			perf_event_mmap_ctx(ctx, mmap_event,
+					vma->vm_flags & VM_EXEC);
+		}
 	}
-	ctx = rcu_dereference(current->perf_event_ctxp);
-	if (ctx)
-		perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
 	rcu_read_unlock_sched();
 
 	kfree(buf);
@@ -5042,6 +5096,43 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
 	perf_pmu_enable(pmu);
 }
 
+/*
+ * Ensures all contexts with the same task_ctx_nr have the same
+ * pmu_cpu_context too.
+ */
+static void *find_pmu_context(int ctxn)
+{
+	struct pmu *pmu;
+
+	if (ctxn < 0)
+		return NULL;
+
+	list_for_each_entry(pmu, &pmus, entry) {
+		if (pmu->task_ctx_nr == ctxn)
+			return pmu->pmu_cpu_context;
+	}
+
+	return NULL;
+}
+
+static void free_pmu_context(void * __percpu cpu_context)
+{
+	struct pmu *pmu;
+
+	mutex_lock(&pmus_lock);
+	/*
+	 * Like a real lame refcount.
+	 */
+	list_for_each_entry(pmu, &pmus, entry) {
+		if (pmu->pmu_cpu_context == cpu_context)
+			goto out;
+	}
+
+	free_percpu(cpu_context);
+out:
+	mutex_unlock(&pmus_lock);
+}
+
 int perf_pmu_register(struct pmu *pmu)
 {
 	int cpu, ret;
@@ -5052,6 +5143,10 @@ int perf_pmu_register(struct pmu *pmu)
 	if (!pmu->pmu_disable_count)
 		goto unlock;
 
+	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
+	if (pmu->pmu_cpu_context)
+		goto got_cpu_context;
+
 	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
 	if (!pmu->pmu_cpu_context)
 		goto free_pdc;
@@ -5067,6 +5162,7 @@ int perf_pmu_register(struct pmu *pmu)
 		cpuctx->timer.function = perf_event_context_tick;
 	}
 
+got_cpu_context:
 	if (!pmu->start_txn) {
 		if (pmu->pmu_enable) {
 			/*
@@ -5114,7 +5210,7 @@ void perf_pmu_unregister(struct pmu *pmu)
 	synchronize_srcu(&pmus_srcu);
 
 	free_percpu(pmu->pmu_disable_count);
-	free_percpu(pmu->pmu_cpu_context);
+	free_pmu_context(pmu->pmu_cpu_context);
 }
 
 struct pmu *perf_init_event(struct perf_event *event)
@@ -5628,16 +5724,13 @@ __perf_event_exit_task(struct perf_event *child_event,
 	}
 }
 
-/*
- * When a child task exits, feed back event values to parent events.
- */
-void perf_event_exit_task(struct task_struct *child)
+static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
 	struct perf_event *child_event, *tmp;
 	struct perf_event_context *child_ctx;
 	unsigned long flags;
 
-	if (likely(!child->perf_event_ctxp)) {
+	if (likely(!child->perf_event_ctxp[ctxn])) {
 		perf_event_task(child, NULL, 0);
 		return;
 	}
@@ -5649,7 +5742,7 @@ void perf_event_exit_task(struct task_struct *child)
 	 * scheduled, so we are now safe from rescheduling changing
 	 * our context.
 	 */
-	child_ctx = child->perf_event_ctxp;
+	child_ctx = child->perf_event_ctxp[ctxn];
 	__perf_event_task_sched_out(child_ctx);
 
 	/*
@@ -5658,7 +5751,7 @@ void perf_event_exit_task(struct task_struct *child)
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
 	raw_spin_lock(&child_ctx->lock);
-	child->perf_event_ctxp = NULL;
+	child->perf_event_ctxp[ctxn] = NULL;
 	/*
 	 * If this context is a clone; unclone it so it can't get
 	 * swapped to another process while we're removing all
@@ -5711,6 +5804,17 @@ again:
 	put_ctx(child_ctx);
 }
 
+/*
+ * When a child task exits, feed back event values to parent events.
+ */
+void perf_event_exit_task(struct task_struct *child)
+{
+	int ctxn;
+
+	for_each_task_context_nr(ctxn)
+		perf_event_exit_task_context(child, ctxn);
+}
+
 static void perf_free_event(struct perf_event *event,
 			    struct perf_event_context *ctx)
 {
@@ -5732,32 +5836,37 @@ static void perf_free_event(struct perf_event *event,
 
 /*
  * free an unexposed, unused context as created by inheritance by
- * init_task below, used by fork() in case of fail.
+ * perf_event_init_task below, used by fork() in case of fail.
  */
 void perf_event_free_task(struct task_struct *task)
 {
-	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event_context *ctx;
 	struct perf_event *event, *tmp;
+	int ctxn;
 
-	if (!ctx)
-		return;
+	for_each_task_context_nr(ctxn) {
+		ctx = task->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
 
-	mutex_lock(&ctx->mutex);
+		mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
-		perf_free_event(event, ctx);
+		list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
+				group_entry)
+			perf_free_event(event, ctx);
 
-	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-				 group_entry)
-		perf_free_event(event, ctx);
+		list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+				group_entry)
+			perf_free_event(event, ctx);
 
-	if (!list_empty(&ctx->pinned_groups) ||
-	    !list_empty(&ctx->flexible_groups))
-		goto again;
+		if (!list_empty(&ctx->pinned_groups) ||
+				!list_empty(&ctx->flexible_groups))
+			goto again;
 
-	mutex_unlock(&ctx->mutex);
+		mutex_unlock(&ctx->mutex);
 
-	put_ctx(ctx);
+		put_ctx(ctx);
+	}
 }
 
 /*
@@ -5863,17 +5972,18 @@ static int inherit_group(struct perf_event *parent_event,
 static int
 inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		   struct perf_event_context *parent_ctx,
-		   struct task_struct *child,
+		   struct task_struct *child, int ctxn,
 		   int *inherited_all)
 {
 	int ret;
-	struct perf_event_context *child_ctx = child->perf_event_ctxp;
+	struct perf_event_context *child_ctx;
 
 	if (!event->attr.inherit) {
 		*inherited_all = 0;
 		return 0;
 	}
 
+       	child_ctx = child->perf_event_ctxp[ctxn];
 	if (!child_ctx) {
 		/*
 		 * This is executed from the parent task context, so
@@ -5886,7 +5996,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		if (!child_ctx)
 			return -ENOMEM;
 
-		child->perf_event_ctxp = child_ctx;
+		child->perf_event_ctxp[ctxn] = child_ctx;
 	}
 
 	ret = inherit_group(event, parent, parent_ctx,
@@ -5901,7 +6011,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 /*
  * Initialize the perf_event context in task_struct
  */
-int perf_event_init_task(struct task_struct *child)
+int perf_event_init_context(struct task_struct *child, int ctxn)
 {
 	struct perf_event_context *child_ctx, *parent_ctx;
 	struct perf_event_context *cloned_ctx;
@@ -5910,19 +6020,19 @@ int perf_event_init_task(struct task_struct *child)
 	int inherited_all = 1;
 	int ret = 0;
 
-	child->perf_event_ctxp = NULL;
+	child->perf_event_ctxp[ctxn] = NULL;
 
 	mutex_init(&child->perf_event_mutex);
 	INIT_LIST_HEAD(&child->perf_event_list);
 
-	if (likely(!parent->perf_event_ctxp))
+	if (likely(!parent->perf_event_ctxp[ctxn]))
 		return 0;
 
 	/*
 	 * If the parent's context is a clone, pin it so it won't get
 	 * swapped under us.
 	 */
-	parent_ctx = perf_pin_task_context(parent);
+	parent_ctx = perf_pin_task_context(parent, ctxn);
 
 	/*
 	 * No need to check if parent_ctx != NULL here; since we saw
@@ -5942,20 +6052,20 @@ int perf_event_init_task(struct task_struct *child)
 	 * the list, not manipulating it:
 	 */
 	list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
-		ret = inherit_task_group(event, parent, parent_ctx, child,
-					 &inherited_all);
+		ret = inherit_task_group(event, parent, parent_ctx,
+					 child, ctxn, &inherited_all);
 		if (ret)
 			break;
 	}
 
 	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
-		ret = inherit_task_group(event, parent, parent_ctx, child,
-					 &inherited_all);
+		ret = inherit_task_group(event, parent, parent_ctx,
+					 child, ctxn, &inherited_all);
 		if (ret)
 			break;
 	}
 
-	child_ctx = child->perf_event_ctxp;
+	child_ctx = child->perf_event_ctxp[ctxn];
 
 	if (child_ctx && inherited_all) {
 		/*
@@ -5984,6 +6094,22 @@ int perf_event_init_task(struct task_struct *child)
 	return ret;
 }
 
+/*
+ * Initialize the perf_event context in task_struct
+ */
+int perf_event_init_task(struct task_struct *child)
+{
+	int ctxn, ret;
+
+	for_each_task_context_nr(ctxn) {
+		ret = perf_event_init_context(child, ctxn);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void __init perf_event_init_all_cpus(void)
 {
 	struct swevent_htable *swhash;
-- 
cgit v1.2.3


From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Sep 2010 17:34:50 +0200
Subject: perf: Provide a separate task context for swevents

Since software events are always schedulable, mixing them up with
hardware events (who are not) can lead to funny scheduling oddities.

Giving them their own context solves this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  9 +--------
 include/linux/sched.h      |  1 +
 kernel/hw_breakpoint.c     |  2 ++
 kernel/perf_event.c        | 40 +++++++++++++++++++++++++++++-----------
 4 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9ecfd856ce6e..c1173520f14d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	switch (event->attr.type) {
-	case PERF_TYPE_SOFTWARE:
-	case PERF_TYPE_TRACEPOINT:
-	/* for now the breakpoint stuff also works as software event */
-	case PERF_TYPE_BREAKPOINT:
-		return 1;
-	}
-	return 0;
+	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89d6023c6f82..eb3c1ceec06e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1163,6 +1163,7 @@ struct rcu_node;
 enum perf_event_task_context {
 	perf_invalid_context = -1,
 	perf_hw_context = 0,
+	perf_sw_context,
 	perf_nr_task_contexts,
 };
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 6f150095cafe..3b2aaffb65f0 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -610,6 +610,8 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
 }
 
 static struct pmu perf_breakpoint = {
+	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
+
 	.event_init	= hw_breakpoint_event_init,
 	.add		= hw_breakpoint_add,
 	.del		= hw_breakpoint_del,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7223ea875861..357ee8d5e8ae 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4709,6 +4709,8 @@ static int perf_swevent_init(struct perf_event *event)
 }
 
 static struct pmu perf_swevent = {
+	.task_ctx_nr	= perf_sw_context,
+
 	.event_init	= perf_swevent_init,
 	.add		= perf_swevent_add,
 	.del		= perf_swevent_del,
@@ -4800,6 +4802,8 @@ static int perf_tp_event_init(struct perf_event *event)
 }
 
 static struct pmu perf_tracepoint = {
+	.task_ctx_nr	= perf_sw_context,
+
 	.event_init	= perf_tp_event_init,
 	.add		= perf_trace_add,
 	.del		= perf_trace_del,
@@ -4988,6 +4992,8 @@ static int cpu_clock_event_init(struct perf_event *event)
 }
 
 static struct pmu perf_cpu_clock = {
+	.task_ctx_nr	= perf_sw_context,
+
 	.event_init	= cpu_clock_event_init,
 	.add		= cpu_clock_event_add,
 	.del		= cpu_clock_event_del,
@@ -5063,6 +5069,8 @@ static int task_clock_event_init(struct perf_event *event)
 }
 
 static struct pmu perf_task_clock = {
+	.task_ctx_nr	= perf_sw_context,
+
 	.event_init	= task_clock_event_init,
 	.add		= task_clock_event_add,
 	.del		= task_clock_event_del,
@@ -5490,6 +5498,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	struct perf_event_context *ctx;
 	struct file *event_file = NULL;
 	struct file *group_file = NULL;
+	struct pmu *pmu;
 	int event_fd;
 	int fput_needed = 0;
 	int err;
@@ -5522,20 +5531,11 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_fd;
 	}
 
-	/*
-	 * Get the target context (task or percpu):
-	 */
-	ctx = find_get_context(event->pmu, pid, cpu);
-	if (IS_ERR(ctx)) {
-		err = PTR_ERR(ctx);
-		goto err_alloc;
-	}
-
 	if (group_fd != -1) {
 		group_leader = perf_fget_light(group_fd, &fput_needed);
 		if (IS_ERR(group_leader)) {
 			err = PTR_ERR(group_leader);
-			goto err_context;
+			goto err_alloc;
 		}
 		group_file = group_leader->filp;
 		if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5544,6 +5544,23 @@ SYSCALL_DEFINE5(perf_event_open,
 			group_leader = NULL;
 	}
 
+	/*
+	 * Special case software events and allow them to be part of
+	 * any hardware group.
+	 */
+	pmu = event->pmu;
+	if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
+		pmu = group_leader->pmu;
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pmu, pid, cpu);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto err_group_fd;
+	}
+
 	/*
 	 * Look up the group leader (we will attach this event to it):
 	 */
@@ -5605,8 +5622,9 @@ SYSCALL_DEFINE5(perf_event_open,
 	return event_fd;
 
 err_context:
-	fput_light(group_file, fput_needed);
 	put_ctx(ctx);
+err_group_fd:
+	fput_light(group_file, fput_needed);
 err_alloc:
 	free_event(event);
 err_fd:
-- 
cgit v1.2.3


From 4e231c7962ce711c7d8c2a4dc23ecd1e8fc28363 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 9 Sep 2010 21:01:59 +0200
Subject: perf: Fix up delayed_put_task_struct()

I missed a perf_event_ctxp user when converting it to an array. Pull this
last user into perf_event.c as well and fix it up.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 2 ++
 kernel/exit.c              | 4 +---
 kernel/perf_event.c        | 8 ++++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c1173520f14d..93bf53aa50e5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -889,6 +889,7 @@ extern void perf_event_task_sched_out(struct task_struct *task, struct task_stru
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
+extern void perf_event_delayed_put(struct task_struct *task);
 extern void set_perf_event_pending(void);
 extern void perf_event_do_pending(void);
 extern void perf_event_print_debug(void);
@@ -1067,6 +1068,7 @@ perf_event_task_sched_out(struct task_struct *task,
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
+static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 static inline void perf_event_do_pending(void)				{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..e2bdf37f9fde 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
-#ifdef CONFIG_PERF_EVENTS
-	WARN_ON_ONCE(tsk->perf_event_ctxp);
-#endif
+	perf_event_delayed_put(tsk);
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9819a69a61a1..eaf1c5de6dcc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5893,6 +5893,14 @@ again:
 	}
 }
 
+void perf_event_delayed_put(struct task_struct *task)
+{
+	int ctxn;
+
+	for_each_task_context_nr(ctxn)
+		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
+}
+
 /*
  * inherit a event from parent task to child task:
  */
-- 
cgit v1.2.3


From 3b8fad3e2f5f69bfd8e42d099ca8582fb2342edf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 8 Sep 2010 14:26:00 +0200
Subject: irq: Fix circular headers dependency

asm-generic/hardirq.h needs asm/irq.h which might include
linux/interrupt.h as in the sparc 32 case. At this point
we need irq_cpustat generic definitions, but those are
included later in asm-generic/hardirq.h.

Then delay a bit the inclusion of irq.h from
asm-generic/hardirq.h, it doesn't need to be included early.

This fixes:

 include/linux/interrupt.h: In function '__raise_softirq_irqoff':
 include/linux/interrupt.h:414: error: implicit declaration of function 'local_softirq_pending'
 include/linux/interrupt.h:414: error: lvalue required as left operand of assignment

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
Cc: mathieu.desnoyers@efficios.com
Cc: rostedt@goodmis.org
Cc: nhorman@tuxdriver.com
Cc: scott.a.mcmillan@intel.com
Cc: eric.dumazet@gmail.com
Cc: kaneshige.kenji@jp.fujitsu.com
Cc: davem@davemloft.net
Cc: izumi.taku@jp.fujitsu.com
Cc: kosaki.motohiro@jp.fujitsu.com
LKML-Reference: <20100908122557.GA5310@nowhere>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/hardirq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..04d0a977cd43 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,13 +3,13 @@
 
 #include <linux/cache.h>
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 typedef struct {
 	unsigned int __softirq_pending;
 } ____cacheline_aligned irq_cpustat_t;
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+#include <linux/irq.h>
 
 #ifndef ack_bad_irq
 static inline void ack_bad_irq(unsigned int irq)
-- 
cgit v1.2.3


From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Thu, 9 Sep 2010 16:37:24 -0700
Subject: mmc: avoid getting CID on SDIO-only cards

The introduction of support for SD combo cards breaks the initialization
of all CSR SDIO chips.  The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR
chips to be reset (this is non-standard behavior).

When initializing an SDIO card check for a combo card by using the memory
present bit in the R4 response to IO_SEND_OP_COND (CMD5).  This avoids the
call to mmc_sd_get_cid() on an SDIO-only card.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Acked-by: Michal Mirolaw <mirq-linux@rere.qmqm.pl>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 5 ++---
 include/linux/mmc/sdio.h | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index bd2755e8d9a3..f332c52968b7 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 		goto err;
 	}
 
-	err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid);
-
-	if (!err) {
+	if (ocr & R4_MEMORY_PRESENT
+	    && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) {
 		card->type = MMC_TYPE_SD_COMBO;
 
 		if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO ||
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 329a8faa6e37..245cdacee544 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -38,6 +38,8 @@
  *      [8:0] Byte/block count
  */
 
+#define R4_MEMORY_PRESENT (1 << 27)
+
 /*
   SDIO status in R5
   Type
-- 
cgit v1.2.3


From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 9 Sep 2010 16:37:26 -0700
Subject: kfifo: add parenthesis for macro parameter reference

Some macro parameter references inside typeof() operator are not enclosed
with parenthesis.  It should be safer to add them.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 58 +++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 4aa95f203f3e..62dbee554f60 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_reset(fifo) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	__tmp->kfifo.in = __tmp->kfifo.out = 0; \
 })
 
@@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_reset_out(fifo)	\
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	__tmp->kfifo.out = __tmp->kfifo.in; \
 })
 
@@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define kfifo_len(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpl = (fifo); \
+	typeof((fifo) + 1) __tmpl = (fifo); \
 	__tmpl->kfifo.in - __tmpl->kfifo.out; \
 })
 
@@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define	kfifo_is_empty(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	__tmpq->kfifo.in == __tmpq->kfifo.out; \
 })
 
@@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val)
  */
 #define	kfifo_is_full(fifo) \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	kfifo_len(__tmpq) > __tmpq->kfifo.mask; \
 })
 
@@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val)
 #define	kfifo_avail(fifo) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmpq = (fifo); \
+	typeof((fifo) + 1) __tmpq = (fifo); \
 	const size_t __recsize = sizeof(*__tmpq->rectype); \
 	unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \
 	(__recsize) ? ((__avail <= __recsize) ? 0 : \
@@ -284,7 +284,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_skip(fifo) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	if (__recsize) \
@@ -302,7 +302,7 @@ __kfifo_must_check_helper( \
 #define kfifo_peek_len(fifo) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	(!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \
@@ -325,7 +325,7 @@ __kfifo_must_check_helper( \
 #define kfifo_alloc(fifo, size, gfp_mask) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	__is_kfifo_ptr(__tmp) ? \
 	__kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \
@@ -339,7 +339,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_free(fifo) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	if (__is_kfifo_ptr(__tmp)) \
 		__kfifo_free(__kfifo); \
@@ -358,7 +358,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_init(fifo, buffer, size) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	__is_kfifo_ptr(__tmp) ? \
 	__kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \
@@ -379,8 +379,8 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_put(fifo, val) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -421,8 +421,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_get(fifo, val) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -462,8 +462,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_peek(fifo, val) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(val + 1) __val = (val); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((val) + 1) __val = (val); \
 	unsigned int __ret; \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -501,8 +501,8 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_in(fifo, buf, n) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -554,8 +554,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_out(fifo, buf, n) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -611,7 +611,7 @@ __kfifo_must_check_helper( \
 #define	kfifo_from_user(fifo, from, len, copied) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	const void __user *__from = (from); \
 	unsigned int __len = (len); \
 	unsigned int *__copied = (copied); \
@@ -639,7 +639,7 @@ __kfifo_must_check_helper( \
 #define	kfifo_to_user(fifo, to, len, copied) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	void __user *__to = (to); \
 	unsigned int __len = (len); \
 	unsigned int *__copied = (copied); \
@@ -666,7 +666,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_dma_in_prepare(fifo, sgl, nents, len) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	struct scatterlist *__sgl = (sgl); \
 	int __nents = (nents); \
 	unsigned int __len = (len); \
@@ -690,7 +690,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_dma_in_finish(fifo, len) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	unsigned int __len = (len); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -717,7 +717,7 @@ __kfifo_must_check_helper( \
  */
 #define	kfifo_dma_out_prepare(fifo, sgl, nents, len) \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo);  \
 	struct scatterlist *__sgl = (sgl); \
 	int __nents = (nents); \
 	unsigned int __len = (len); \
@@ -741,7 +741,7 @@ __kfifo_must_check_helper( \
  */
 #define kfifo_dma_out_finish(fifo, len) \
 (void)({ \
-	typeof(fifo + 1) __tmp = (fifo); \
+	typeof((fifo) + 1) __tmp = (fifo); \
 	unsigned int __len = (len); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -766,8 +766,8 @@ __kfifo_must_check_helper( \
 #define	kfifo_out_peek(fifo, buf, n) \
 __kfifo_must_check_helper( \
 ({ \
-	typeof(fifo + 1) __tmp = (fifo); \
-	typeof(buf + 1) __buf = (buf); \
+	typeof((fifo) + 1) __tmp = (fifo); \
+	typeof((buf) + 1) __buf = (buf); \
 	unsigned long __n = (n); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
-- 
cgit v1.2.3


From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 9 Sep 2010 16:37:37 -0700
Subject: cgroups: fix API thinko

Add cgroup_attach_task_all()

The existing cgroup_attach_task_current_cg() API is called by a thread to
attach another thread to all of its cgroups; this is unsuitable for cases
where a privileged task wants to attach itself to the cgroups of a less
privileged one, since the call must be made from the context of the target
task.

This patch adds a more generic cgroup_attach_task_all() API that allows
both the source task and to-be-moved task to be specified.
cgroup_attach_task_current_cg() becomes a specialization of the more
generic new function.

[menage@google.com: rewrote changelog]
[akpm@linux-foundation.org: address reviewer comments]
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Ben Blum <bblum@google.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 12 +++++++++++-
 kernel/cgroup.c        | 13 +++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e41c6e..0c991023ee47 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
-int cgroup_attach_task_current_cg(struct task_struct *);
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
+
+static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+	return cgroup_attach_task_all(current, tsk);
+}
 
 /*
  * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
@@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 }
 
 /* No cgroups - nothing to do */
+static inline int cgroup_attach_task_all(struct task_struct *from,
+					 struct task_struct *t)
+{
+	return 0;
+}
 static inline int cgroup_attach_task_current_cg(struct task_struct *t)
 {
 	return 0;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c5b0f9..c9483d8f6140 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,19 +1791,20 @@ out:
 }
 
 /**
- * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
+ * @from: attach to all cgroups of a given task
  * @tsk: the task to be attached
  */
-int cgroup_attach_task_current_cg(struct task_struct *tsk)
+int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 {
 	struct cgroupfs_root *root;
-	struct cgroup *cur_cg;
 	int retval = 0;
 
 	cgroup_lock();
 	for_each_active_root(root) {
-		cur_cg = task_cgroup_from_root(current, root);
-		retval = cgroup_attach_task(cur_cg, tsk);
+		struct cgroup *from_cg = task_cgroup_from_root(from, root);
+
+		retval = cgroup_attach_task(from_cg, tsk);
 		if (retval)
 			break;
 	}
@@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk)
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
 
 /*
  * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
-- 
cgit v1.2.3


From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Sep 2010 16:37:52 -0700
Subject: mm: fix swapin race condition

The pte_same check is reliable only if the swap entry remains pinned (by
the page lock on swapcache).  We've also to ensure the swapcache isn't
removed before we take the lock as try_to_free_swap won't care about the
page pin.

One of the possible impacts of this patch is that a KSM-shared page can
point to the anon_vma of another process, which could exit before the page
is freed.

This can leave a page with a pointer to a recycled anon_vma object, or
worse, a pointer to something that is no longer an anon_vma.

[riel@redhat.com: changelog help]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 20 +++++++++-----------
 mm/ksm.c            |  3 ---
 mm/memory.c         | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 43 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 74d691ee9121..3319a6967626 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,6 +16,9 @@
 struct stable_node;
 struct mem_cgroup;
 
+struct page *ksm_does_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page,
  * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
  * but what if the vma was unmerged while the page was swapped out?
  */
-struct page *ksm_does_need_to_copy(struct page *page,
-			struct vm_area_struct *vma, unsigned long address);
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
 	struct anon_vma *anon_vma = page_anon_vma(page);
 
-	if (!anon_vma ||
-	    (anon_vma->root == vma->anon_vma->root &&
-	     page->index == linear_page_index(vma, address)))
-		return page;
-
-	return ksm_does_need_to_copy(page, vma, address);
+	return anon_vma &&
+		(anon_vma->root != vma->anon_vma->root ||
+		 page->index != linear_page_index(vma, address));
 }
 
 int page_referenced_ksm(struct page *page,
@@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 	return 0;
 }
 
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
-	return page;
+	return 0;
 }
 
 static inline int page_referenced_ksm(struct page *page,
diff --git a/mm/ksm.c b/mm/ksm.c
index e2ae00458320..b1873cf03ed9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 {
 	struct page *new_page;
 
-	unlock_page(page);	/* any racers will COW it, not modify it */
-
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
 		copy_user_highpage(new_page, page, address, vma);
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
 			add_page_to_unevictable_list(new_page);
 	}
 
-	page_cache_release(page);
 	return new_page;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 6b2ab1051851..71b161b73bb5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned int flags, pte_t orig_pte)
 {
 	spinlock_t *ptl;
-	struct page *page;
+	struct page *page, *swapcache = NULL;
 	swp_entry_t entry;
 	pte_t pte;
 	struct mem_cgroup *ptr = NULL;
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	lock_page(page);
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 
-	page = ksm_might_need_to_copy(page, vma, address);
-	if (!page) {
-		ret = VM_FAULT_OOM;
-		goto out;
+	/*
+	 * Make sure try_to_free_swap didn't release the swapcache
+	 * from under us. The page pin isn't enough to prevent that.
+	 */
+	if (unlikely(!PageSwapCache(page)))
+		goto out_page;
+
+	if (ksm_might_need_to_copy(page, vma, address)) {
+		swapcache = page;
+		page = ksm_does_need_to_copy(page, vma, address);
+
+		if (unlikely(!page)) {
+			ret = VM_FAULT_OOM;
+			page = swapcache;
+			swapcache = NULL;
+			goto out_page;
+		}
 	}
 
 	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
 		try_to_free_swap(page);
 	unlock_page(page);
+	if (swapcache) {
+		/*
+		 * Hold the lock to avoid the swap entry to be reused
+		 * until we take the PT lock for the pte_same() check
+		 * (to avoid false positives from pte_same). For
+		 * further safety release the lock after the swap_free
+		 * so that the swap count won't change under a
+		 * parallel locked swapcache.
+		 */
+		unlock_page(swapcache);
+		page_cache_release(swapcache);
+	}
 
 	if (flags & FAULT_FLAG_WRITE) {
 		ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,6 +2781,10 @@ out_page:
 	unlock_page(page);
 out_release:
 	page_cache_release(page);
+	if (swapcache) {
+		unlock_page(swapcache);
+		page_cache_release(swapcache);
+	}
 	return ret;
 }
 
-- 
cgit v1.2.3


From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001
From: Gregory Bean <gbean@codeaurora.org>
Date: Thu, 9 Sep 2010 16:38:02 -0700
Subject: gpio: sx150x: correct and refine reset-on-probe behavior

Replace the arbitrary software-reset call from the device-probe
method, because:

- It is defective.  To work correctly, it should be two byte writes,
  not a single word write.  As it stands, it does nothing.

- Some devices with sx150x expanders installed have their NRESET pins
  ganged on the same line, so resetting one causes the others to reset -
  not a nice thing to do arbitrarily!

- The probe, usually taking place at boot, implies a recent hard-reset,
  so a software reset at this point is just a waste of energy anyway.

Therefore, make it optional, defaulting to off, as this will match the
common case of probing at powerup and also matches the current broken
no-op behavior.

Signed-off-by: Gregory Bean <gbean@codeaurora.org>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/sx150x.c      | 26 +++++++++++++++++++++-----
 include/linux/i2c/sx150x.h |  4 ++++
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c
index b42f42ca70c3..823559ab0e24 100644
--- a/drivers/gpio/sx150x.c
+++ b/drivers/gpio/sx150x.c
@@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg)
 	return err;
 }
 
-static int sx150x_init_hw(struct sx150x_chip *chip,
-			struct sx150x_platform_data *pdata)
+static int sx150x_reset(struct sx150x_chip *chip)
 {
-	int err = 0;
+	int err;
 
-	err = i2c_smbus_write_word_data(chip->client,
+	err = i2c_smbus_write_byte_data(chip->client,
 					chip->dev_cfg->reg_reset,
-					0x3412);
+					0x12);
 	if (err < 0)
 		return err;
 
+	err = i2c_smbus_write_byte_data(chip->client,
+					chip->dev_cfg->reg_reset,
+					0x34);
+	return err;
+}
+
+static int sx150x_init_hw(struct sx150x_chip *chip,
+			struct sx150x_platform_data *pdata)
+{
+	int err = 0;
+
+	if (pdata->reset_during_probe) {
+		err = sx150x_reset(chip);
+		if (err < 0)
+			return err;
+	}
+
 	err = sx150x_i2c_write(chip->client,
 			chip->dev_cfg->reg_misc,
 			0x01);
diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h
index ee3049cb9ba5..52baa79d69a7 100644
--- a/include/linux/i2c/sx150x.h
+++ b/include/linux/i2c/sx150x.h
@@ -63,6 +63,9 @@
  *            IRQ lines will appear.  Similarly to gpio_base, the expander
  *            will create a block of irqs beginning at this number.
  *            This value is ignored if irq_summary is < 0.
+ * @reset_during_probe: If set to true, the driver will trigger a full
+ *                      reset of the chip at the beginning of the probe
+ *                      in order to place it in a known state.
  */
 struct sx150x_platform_data {
 	unsigned gpio_base;
@@ -73,6 +76,7 @@ struct sx150x_platform_data {
 	u16      io_polarity;
 	int      irq_summary;
 	unsigned irq_base;
+	bool     reset_during_probe;
 };
 
 #endif /* __LINUX_I2C_SX150X_H */
-- 
cgit v1.2.3


From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Thu, 9 Sep 2010 16:38:03 -0700
Subject: gpio: doc updates

There's been some recent confusion about error checking GPIO numbers.
briefly, it should be handled mostly during setup, when gpio_request() is
called, and NEVER by expectig gpio_is_valid to report more than
never-usable GPIO numbers.

[akpm@linux-foundation.org: terminate unterminated comment]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Eric Miao" <eric.y.miao@gmail.com>
Cc: "Ryan Mallon" <ryan@bluewatersys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     | 22 ++++++++++++++--------
 include/asm-generic/gpio.h | 14 +++++++++++++-
 2 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index d96a6dba5748..9633da01ff46 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
 
 If you want to initialize a structure with an invalid GPIO number, use
 some negative number (perhaps "-EINVAL"); that will never be valid.  To
-test if a number could reference a GPIO, you may use this predicate:
+test if such number from such a structure could reference a GPIO, you
+may use this predicate:
 
 	int gpio_is_valid(int number);
 
 A number that's not valid will be rejected by calls which may request
 or free GPIOs (see below).  Other numbers may also be rejected; for
-example, a number might be valid but unused on a given board.
-
-Whether a platform supports multiple GPIO controllers is currently a
-platform-specific implementation issue.
+example, a number might be valid but temporarily unused on a given board.
 
+Whether a platform supports multiple GPIO controllers is a platform-specific
+implementation issue, as are whether that support can leave "holes" in the space
+of GPIO numbers, and whether new controllers can be added at runtime.  Such issues
+can affect things including whether adjacent GPIO numbers are both valid.
 
 Using GPIOs
 -----------
@@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either
 ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
 and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
 three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
-They may also want to provide a custom value for ARCH_NR_GPIOS.
 
-ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+It may also provide a custom value for ARCH_NR_GPIOS, so that it better
+reflects the number of GPIOs in actual use on that platform, without
+wasting static table space.  (It should count both built-in/SoC GPIOs and
+also ones on GPIO expanders.
+
+ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled
 into the kernel on that architecture.
 
-ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user
 can enable it and build it into the kernel optionally.
 
 If neither of these options are selected, the platform does not support
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index c7376bf80b06..8ca18e26d7e3 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -16,15 +16,27 @@
  * While the GPIO programming interface defines valid GPIO numbers
  * to be in the range 0..MAX_INT, this library restricts them to the
  * smaller range 0..ARCH_NR_GPIOS-1.
+ *
+ * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of
+ * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is
+ * actually an estimate of a board-specific value.
  */
 
 #ifndef ARCH_NR_GPIOS
 #define ARCH_NR_GPIOS		256
 #endif
 
+/*
+ * "valid" GPIO numbers are nonnegative and may be passed to
+ * setup routines like gpio_request().  only some valid numbers
+ * can successfully be requested and used.
+ *
+ * Invalid GPIO numbers are useful for indicating no-such-GPIO in
+ * platform data and other tables.
+ */
+
 static inline int gpio_is_valid(int number)
 {
-	/* only some non-negative numbers are valid */
 	return ((unsigned)number) < ARCH_NR_GPIOS;
 }
 
-- 
cgit v1.2.3


From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 9 Sep 2010 16:38:07 -0700
Subject: swap: revert special hibernation allocation

Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf
"hibernation: freeze swap at hibernation".  It complicated matters by
adding a second swap allocation path, just for hibernation; without in any
way fixing the issue that it was intended to address - page reclaim after
fixing the hibernation image might free swap from a page already imaged as
swapcache, letting its swap be reallocated to store a different page of
the image: resulting in data corruption if the imaged page were freed as
clean then swapped back in.  Pages freed to si->swap_map were still in
danger of being reallocated by the alternative allocation path.

I guess it inadvertently fixed slow SSD swap allocation for hibernation,
as reported by Nigel Cunningham: by missing out the discards that occur on
the usual swap allocation path; but that was unintentional, and needs a
separate fix.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ondrej Zary <linux@rainbow-software.org>
Cc: Andrea Gelmini <andrea.gelmini@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h     |  8 +----
 kernel/power/hibernate.c |  1 -
 kernel/power/snapshot.c  |  1 -
 kernel/power/swap.c      |  6 ++--
 mm/swapfile.c            | 94 ++++++++++++------------------------------------
 5 files changed, 26 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fee51a11b73..bf4eb62506db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -315,6 +315,7 @@ extern long nr_swap_pages;
 extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page_of_type(int);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
@@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
-#ifdef CONFIG_HIBERNATION
-void hibernation_freeze_swap(void);
-void hibernation_thaw_swap(void);
-swp_entry_t get_swap_for_hibernation(int type);
-void swap_free_for_hibernation(swp_entry_t val);
-#endif
-
 /* linux/mm/thrash.c */
 extern struct mm_struct *swap_token_mm;
 extern void grab_swap_token(struct mm_struct *);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c77963938bca..8dc31e02ae12 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
-	hibernation_freeze_swap();
 	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5e7edfb05e66..f6cd6faf84fd 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1086,7 +1086,6 @@ void swsusp_free(void)
 	buffer = NULL;
 	alloc_normal = 0;
 	alloc_highmem = 0;
-	hibernation_thaw_swap();
 }
 
 /* Helper functions used for the shrinking of memory. */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 5d0059eed3e4..e6a5bdf61a37 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap)
 {
 	unsigned long offset;
 
-	offset = swp_offset(get_swap_for_hibernation(swap));
+	offset = swp_offset(get_swap_page_of_type(swap));
 	if (offset) {
 		if (swsusp_extents_insert(offset))
-			swap_free_for_hibernation(swp_entry(swap, offset));
+			swap_free(swp_entry(swap, offset));
 		else
 			return swapdev_block(swap, offset);
 	}
@@ -163,7 +163,7 @@ void free_all_swap_pages(int swap)
 		ext = container_of(node, struct swsusp_extent, node);
 		rb_erase(node, &swsusp_extents);
 		for (offset = ext->start; offset <= ext->end; offset++)
-			swap_free_for_hibernation(swp_entry(swap, offset));
+			swap_free(swp_entry(swap, offset));
 
 		kfree(ext);
 	}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1f3f9c59a73a..f08d165871b3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -47,8 +47,6 @@ long nr_swap_pages;
 long total_swap_pages;
 static int least_priority;
 
-static bool swap_for_hibernation;
-
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
@@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void)
 	spin_lock(&swap_lock);
 	if (nr_swap_pages <= 0)
 		goto noswap;
-	if (swap_for_hibernation)
-		goto noswap;
 	nr_swap_pages--;
 
 	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
@@ -487,6 +483,28 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+/* The only caller of this function is now susupend routine */
+swp_entry_t get_swap_page_of_type(int type)
+{
+	struct swap_info_struct *si;
+	pgoff_t offset;
+
+	spin_lock(&swap_lock);
+	si = swap_info[type];
+	if (si && (si->flags & SWP_WRITEOK)) {
+		nr_swap_pages--;
+		/* This is called for allocating swap entry, not cache */
+		offset = scan_swap_map(si, 1);
+		if (offset) {
+			spin_unlock(&swap_lock);
+			return swp_entry(type, offset);
+		}
+		nr_swap_pages++;
+	}
+	spin_unlock(&swap_lock);
+	return (swp_entry_t) {0};
+}
+
 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
 #endif
 
 #ifdef CONFIG_HIBERNATION
-
-static pgoff_t hibernation_offset[MAX_SWAPFILES];
-/*
- * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise,
- * saved swap_map[] image to the disk will be an incomplete because it's
- * changing without synchronization with hibernation snap shot.
- * At resume, we just make swap_for_hibernation=false. We can forget
- * used maps easily.
- */
-void hibernation_freeze_swap(void)
-{
-	int i;
-
-	spin_lock(&swap_lock);
-
-	printk(KERN_INFO "PM: Freeze Swap\n");
-	swap_for_hibernation = true;
-	for (i = 0; i < MAX_SWAPFILES; i++)
-		hibernation_offset[i] = 1;
-	spin_unlock(&swap_lock);
-}
-
-void hibernation_thaw_swap(void)
-{
-	spin_lock(&swap_lock);
-	if (swap_for_hibernation) {
-		printk(KERN_INFO "PM: Thaw Swap\n");
-		swap_for_hibernation = false;
-	}
-	spin_unlock(&swap_lock);
-}
-
-/*
- * Because updateing swap_map[] can make not-saved-status-change,
- * we use our own easy allocator.
- * Please see kernel/power/swap.c, Used swaps are recorded into
- * RB-tree.
- */
-swp_entry_t get_swap_for_hibernation(int type)
-{
-	pgoff_t off;
-	swp_entry_t val = {0};
-	struct swap_info_struct *si;
-
-	spin_lock(&swap_lock);
-
-	si = swap_info[type];
-	if (!si || !(si->flags & SWP_WRITEOK))
-		goto done;
-
-	for (off = hibernation_offset[type]; off < si->max; ++off) {
-		if (!si->swap_map[off])
-			break;
-	}
-	if (off < si->max) {
-		val = swp_entry(type, off);
-		hibernation_offset[type] = off + 1;
-	}
-done:
-	spin_unlock(&swap_lock);
-	return val;
-}
-
-void swap_free_for_hibernation(swp_entry_t ent)
-{
-	/* Nothing to do */
-}
-
 /*
  * Find the swap type that corresponds to given device (if any).
  *
-- 
cgit v1.2.3


From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 9 Sep 2010 16:38:11 -0700
Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD

Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs
show a shift since I last tested in December: in part because of firmware
updates, in part because of the necessary move from barriers to awaiting
completion at the block layer.  While discard at swapon still shows as
slightly beneficial on both, discarding 1MB swap cluster when allocating
is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV).

Surrender: discard as presently implemented is more hindrance than help
for swap; but might prove useful on other devices, or with improvements.
So continue to do the discard at swapon, but make discard while swapping
conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using
only the lower 16 bits of int flags).

We can add a --discard or -d to swapon(8), and a "discard" to swap in
/etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Nigel Cunningham <nigel@tuxonice.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 mm/swapfile.c        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bf4eb62506db..7cdd63366f88 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -19,6 +19,7 @@ struct bio;
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
+#define SWAP_FLAG_DISCARD	0x10000 /* discard swap cluster after use */
 
 static inline int current_is_kswapd(void)
 {
@@ -142,7 +143,7 @@ struct swap_extent {
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
-	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
+	SWP_DISCARDABLE = (1 << 2),	/* swapon+blkdev support discard */
 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1894dead0b58..7c703ff2f36f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 			p->flags |= SWP_SOLIDSTATE;
 			p->cluster_next = 1 + (random32() % p->highest_bit);
 		}
-		if (discard_swap(p) == 0)
+		if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD))
 			p->flags |= SWP_DISCARDABLE;
 	}
 
-- 
cgit v1.2.3


From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 9 Sep 2010 16:38:17 -0700
Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when
 memory is low and kswapd is awake

Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is
cheaper than scanning a number of lists.  To avoid synchronization
overhead, counter deltas are maintained on a per-cpu basis and drained
both periodically and when the delta is above a threshold.  On large CPU
systems, the difference between the estimated and real value of
NR_FREE_PAGES can be very high.  If NR_FREE_PAGES is much higher than
number of real free page in buddy, the VM can allocate pages below min
watermark, at worst reducing the real number of pages to zero.  Even if
the OOM killer kills some victim for freeing memory, it may not free
memory if the exit path requires a new page resulting in livelock.

This patch introduces a zone_page_state_snapshot() function (courtesy of
Christoph) that takes a slightly more accurate view of an arbitrary vmstat
counter.  It is used to read NR_FREE_PAGES while kswapd is awake to avoid
the watermark being accidentally broken.  The estimate is not perfect and
may result in cache line bounces but is expected to be lighter than the
IPI calls necessary to continually drain the per-cpu counters while kswapd
is awake.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 13 +++++++++++++
 include/linux/vmstat.h | 22 ++++++++++++++++++++++
 mm/mmzone.c            | 21 +++++++++++++++++++++
 mm/page_alloc.c        |  4 ++--
 mm/vmstat.c            | 15 ++++++++++++++-
 5 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6e6e62648a4d..3984c4eb41fd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -283,6 +283,13 @@ struct zone {
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
+	/*
+	 * When free pages are below this point, additional steps are taken
+	 * when reading the number of free pages to avoid per-cpu counter
+	 * drift allowing watermarks to be breached
+	 */
+	unsigned long percpu_drift_mark;
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
@@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
 }
 
+#ifdef CONFIG_SMP
+unsigned long zone_nr_free_pages(struct zone *zone);
+#else
+#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
+#endif /* CONFIG_SMP */
+
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 7f43ccdc1d38..eaaea37b3b75 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
+/*
+ * More accurate version that also considers the currently pending
+ * deltas. For that we need to loop over all cpus to find the current
+ * deltas. There is no synchronization so the result cannot be
+ * exactly accurate either.
+ */
+static inline unsigned long zone_page_state_snapshot(struct zone *zone,
+					enum zone_stat_item item)
+{
+	long x = atomic_long_read(&zone->vm_stat[item]);
+
+#ifdef CONFIG_SMP
+	int cpu;
+	for_each_online_cpu(cpu)
+		x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item];
+
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 extern unsigned long global_reclaimable_pages(void);
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
diff --git a/mm/mmzone.c b/mm/mmzone.c
index f5b7d1760213..e35bfb82c855 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn,
 	return 1;
 }
 #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
+#ifdef CONFIG_SMP
+/* Called when a more accurate view of NR_FREE_PAGES is needed */
+unsigned long zone_nr_free_pages(struct zone *zone)
+{
+	unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
+
+	/*
+	 * While kswapd is awake, it is considered the zone is under some
+	 * memory pressure. Under pressure, there is a risk that
+	 * per-cpu-counter-drift will allow the min watermark to be breached
+	 * potentially causing a live-lock. While kswapd is awake and
+	 * free pages are low, get a better estimate for free pages
+	 */
+	if (nr_free_pages < zone->percpu_drift_mark &&
+			!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
+		return zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+	return nr_free_pages;
+}
+#endif /* CONFIG_SMP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 452e2ba06c7c..b2d21e06d45d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1462,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
-	long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
+	long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
 	int o;
 
 	if (alloc_flags & ALLOC_HIGH)
@@ -2424,7 +2424,7 @@ void show_free_areas(void)
 			" all_unreclaimable? %s"
 			"\n",
 			zone->name,
-			K(zone_page_state(zone, NR_FREE_PAGES)),
+			K(zone_nr_free_pages(zone)),
 			K(min_wmark_pages(zone)),
 			K(low_wmark_pages(zone)),
 			K(high_wmark_pages(zone)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8d6b59e609a..355a9e669aaa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void)
 	int threshold;
 
 	for_each_populated_zone(zone) {
+		unsigned long max_drift, tolerate_drift;
+
 		threshold = calculate_threshold(zone);
 
 		for_each_online_cpu(cpu)
 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
 							= threshold;
+
+		/*
+		 * Only set percpu_drift_mark if there is a danger that
+		 * NR_FREE_PAGES reports the low watermark is ok when in fact
+		 * the min watermark could be breached by an allocation
+		 */
+		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
+		max_drift = num_online_cpus() * threshold;
+		if (max_drift > tolerate_drift)
+			zone->percpu_drift_mark = high_wmark_pages(zone) +
+					max_drift;
 	}
 }
 
@@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        scanned  %lu"
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
-		   zone_page_state(zone, NR_FREE_PAGES),
+		   zone_nr_free_pages(zone),
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
-- 
cgit v1.2.3


From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 7 Sep 2010 14:05:31 +0200
Subject: libata: skip EH autopsy and recovery during suspend

For some mysterious reason, certain hardware reacts badly to usual EH
actions while the system is going for suspend.  As the devices won't
be needed until the system is resumed, ask EH to skip usual autopsy
and recovery and proceed directly to suspend.

Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Stephan Diestelhorst <stephan.diestelhorst@amd.com>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 14 +++++++++++++-
 drivers/ata/libata-eh.c   |  4 ++++
 include/linux/libata.h    |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c035b3d041ee..932eaee50245 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5418,6 +5418,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
  */
 int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
 {
+	unsigned int ehi_flags = ATA_EHI_QUIET;
 	int rc;
 
 	/*
@@ -5426,7 +5427,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
 	 */
 	ata_lpm_enable(host);
 
-	rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1);
+	/*
+	 * On some hardware, device fails to respond after spun down
+	 * for suspend.  As the device won't be used before being
+	 * resumed, we don't need to touch the device.  Ask EH to skip
+	 * the usual stuff and proceed directly to suspend.
+	 *
+	 * http://thread.gmane.org/gmane.linux.ide/46764
+	 */
+	if (mesg.event == PM_EVENT_SUSPEND)
+		ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY;
+
+	rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1);
 	if (rc == 0)
 		host->dev->power.power_state = mesg;
 	return rc;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c9ae299b8342..e48302eae55f 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3235,6 +3235,10 @@ static int ata_eh_skip_recovery(struct ata_link *link)
 	if (link->flags & ATA_LFLAG_DISABLED)
 		return 1;
 
+	/* skip if explicitly requested */
+	if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
+		return 1;
+
 	/* thaw frozen port and recover failed devices */
 	if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
 		return 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f010f18a0f86..7de282d8bedf 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -335,6 +335,7 @@ enum {
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
 	ATA_EHI_NO_AUTOPSY	= (1 << 2),  /* no autopsy */
 	ATA_EHI_QUIET		= (1 << 3),  /* be quiet */
+	ATA_EHI_NO_RECOVERY	= (1 << 4),  /* no recovery */
 
 	ATA_EHI_DID_SOFTRESET	= (1 << 16), /* already soft-reset this port */
 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
-- 
cgit v1.2.3


From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@google.com>
Date: Tue, 31 Aug 2010 16:20:36 -0700
Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling.

Keep track of the link on the which the current request is in progress.
It allows support of links behind port multiplier.

Not all libata-sff is PMP compliant. Code for native BMDMA controller
does not take in accound PMP.

Tested on Marvell 7042 and Sil7526.

Signed-off-by: Gwendal Grignou <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c | 38 ++++++++++++++++++++++++++++----------
 drivers/ata/sata_mv.c    |  2 +-
 include/linux/libata.h   |  3 ++-
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index dee3c2c52562..e30c537cce32 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1045,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 		     u8 status, int in_wq)
 {
-	struct ata_eh_info *ehi = &ap->link.eh_info;
+	struct ata_link *link = qc->dev->link;
+	struct ata_eh_info *ehi = &link->eh_info;
 	unsigned long flags = 0;
 	int poll_next;
 
@@ -1301,8 +1302,14 @@ fsm_start:
 }
 EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
 
-void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay)
+void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
 {
+	struct ata_port *ap = link->ap;
+
+	WARN_ON((ap->sff_pio_task_link != NULL) &&
+		(ap->sff_pio_task_link != link));
+	ap->sff_pio_task_link = link;
+
 	/* may fail if ata_sff_flush_pio_task() in progress */
 	queue_delayed_work(ata_sff_wq, &ap->sff_pio_task,
 			   msecs_to_jiffies(delay));
@@ -1324,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work)
 {
 	struct ata_port *ap =
 		container_of(work, struct ata_port, sff_pio_task.work);
+	struct ata_link *link = ap->sff_pio_task_link;
 	struct ata_queued_cmd *qc;
 	u8 status;
 	int poll_next;
 
+	BUG_ON(ap->sff_pio_task_link == NULL); 
 	/* qc can be NULL if timeout occurred */
-	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	if (!qc)
+	qc = ata_qc_from_tag(ap, link->active_tag);
+	if (!qc) {
+		ap->sff_pio_task_link = NULL;
 		return;
+	}
 
 fsm_start:
 	WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE);
@@ -1348,11 +1359,16 @@ fsm_start:
 		msleep(2);
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
-			ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE);
+			ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
 			return;
 		}
 	}
 
+	/*
+	 * hsm_move() may trigger another command to be processed.
+	 * clean the link beforehand.
+	 */
+	ap->sff_pio_task_link = NULL;
 	/* move the HSM */
 	poll_next = ata_sff_hsm_move(ap, qc, status, 1);
 
@@ -1379,6 +1395,7 @@ fsm_start:
 unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_link *link = qc->dev->link;
 
 	/* Use polling pio if the LLD doesn't handle
 	 * interrupt driven pio and atapi CDB interrupt.
@@ -1399,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		ap->hsm_task_state = HSM_ST_LAST;
 
 		if (qc->tf.flags & ATA_TFLAG_POLLING)
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 
 		break;
 
@@ -1412,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		if (qc->tf.flags & ATA_TFLAG_WRITE) {
 			/* PIO data out protocol */
 			ap->hsm_task_state = HSM_ST_FIRST;
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 
 			/* always send first data block using the
 			 * ata_sff_pio_task() codepath.
@@ -1422,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 			ap->hsm_task_state = HSM_ST;
 
 			if (qc->tf.flags & ATA_TFLAG_POLLING)
-				ata_sff_queue_pio_task(ap, 0);
+				ata_sff_queue_pio_task(link, 0);
 
 			/* if polling, ata_sff_pio_task() handles the
 			 * rest.  otherwise, interrupt handler takes
@@ -1444,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		/* send cdb by polling if no cdb interrupt */
 		if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) ||
 		    (qc->tf.flags & ATA_TFLAG_POLLING))
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 		break;
 
 	default:
@@ -2737,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
 unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_link *link = qc->dev->link;
 
 	/* defer PIO handling to sff_qc_issue */
 	if (!ata_is_dma(qc->tf.protocol))
@@ -2765,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
 
 		/* send cdb by polling if no cdb interrupt */
 		if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
-			ata_sff_queue_pio_task(ap, 0);
+			ata_sff_queue_pio_task(link, 0);
 		break;
 
 	default:
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 81982594a014..a9fd9709c262 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc)
 	}
 
 	if (qc->tf.flags & ATA_TFLAG_POLLING)
-		ata_sff_queue_pio_task(ap, 0);
+		ata_sff_queue_pio_task(link, 0);
 	return 0;
 }
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7de282d8bedf..45fb2967b66d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -724,6 +724,7 @@ struct ata_port {
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
+	struct ata_link*	sff_pio_task_link; /* link currently used */
 	struct delayed_work	sff_pio_task;
 #ifdef CONFIG_ATA_BMDMA
 	struct ata_bmdma_prd	*bmdma_prd;	/* BMDMA SG list */
@@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 			    u8 status, int in_wq);
-extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay);
+extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay);
 extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc);
 extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc);
 extern unsigned int ata_sff_port_intr(struct ata_port *ap,
-- 
cgit v1.2.3


From 8613e4c2872a87cc309a42de2c7091744dc54d0e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 9 Sep 2010 21:54:22 -0700
Subject: Input: add support for large scancodes

Several devices use a high number of bits for scancodes. One important
group is the Remote Controllers. Some new protocols like RC-6 define a
scancode space of 64 bits.

The current EVIO[CS]GKEYCODE ioctls allow replace the scancode/keycode
translation tables, but it is limited to up to 32 bits for scancode.

Also, if userspace wants to clean the existing table, replacing it by
a new one, it needs to run a loop calling the ioctls over the entire
sparse scancode space.

To solve those problems, this patch extends the ioctls to allow drivers
handle scancodes up to 32 bytes long (the length could be extended in
the future should such need arise) and allow userspace to query and set
scancode to keycode mappings not only by scancode but also by index.

Compatibility code were also added to handle the old format of
EVIO[CS]GKEYCODE ioctls.

Folded fixes by:
- Dan Carpenter: locking fixes for the original implementation
- Jarod Wilson: fix crash when setting keycode and wiring up get/set
                handlers in original implementation.
- Dmitry Torokhov: rework to consolidate old and new scancode handling,
                   provide options to act either by index or scancode.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c |  31 ++++++--
 drivers/input/evdev.c   | 100 ++++++++++++++++++++-----
 drivers/input/input.c   | 192 ++++++++++++++++++++++++++++++++++++------------
 include/linux/input.h   |  55 +++++++++++---
 4 files changed, 292 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index a7ca75212bfe..e95d7876ca6b 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -175,8 +175,7 @@ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
  */
 
 struct getset_keycode_data {
-	unsigned int scancode;
-	unsigned int keycode;
+	struct input_keymap_entry ke;
 	int error;
 };
 
@@ -184,32 +183,50 @@ static int getkeycode_helper(struct input_handle *handle, void *data)
 {
 	struct getset_keycode_data *d = data;
 
-	d->error = input_get_keycode(handle->dev, d->scancode, &d->keycode);
+	d->error = input_get_keycode(handle->dev, &d->ke);
 
 	return d->error == 0; /* stop as soon as we successfully get one */
 }
 
 int getkeycode(unsigned int scancode)
 {
-	struct getset_keycode_data d = { scancode, 0, -ENODEV };
+	struct getset_keycode_data d = {
+		.ke	= {
+			.flags		= 0,
+			.len		= sizeof(scancode),
+			.keycode	= 0,
+		},
+		.error	= -ENODEV,
+	};
+
+	memcpy(d.ke.scancode, &scancode, sizeof(scancode));
 
 	input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper);
 
-	return d.error ?: d.keycode;
+	return d.error ?: d.ke.keycode;
 }
 
 static int setkeycode_helper(struct input_handle *handle, void *data)
 {
 	struct getset_keycode_data *d = data;
 
-	d->error = input_set_keycode(handle->dev, d->scancode, d->keycode);
+	d->error = input_set_keycode(handle->dev, &d->ke);
 
 	return d->error == 0; /* stop as soon as we successfully set one */
 }
 
 int setkeycode(unsigned int scancode, unsigned int keycode)
 {
-	struct getset_keycode_data d = { scancode, keycode, -ENODEV };
+	struct getset_keycode_data d = {
+		.ke	= {
+			.flags		= 0,
+			.len		= sizeof(scancode),
+			.keycode	= keycode,
+		},
+		.error	= -ENODEV,
+	};
+
+	memcpy(d.ke.scancode, &scancode, sizeof(scancode));
 
 	input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper);
 
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c908c5f83645..1ce9bf663206 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -534,6 +534,80 @@ static int handle_eviocgbit(struct input_dev *dev,
 }
 #undef OLD_KEY_MAX
 
+static int evdev_handle_get_keycode(struct input_dev *dev,
+				    void __user *p, size_t size)
+{
+	struct input_keymap_entry ke;
+	int error;
+
+	memset(&ke, 0, sizeof(ke));
+
+	if (size == sizeof(unsigned int[2])) {
+		/* legacy case */
+		int __user *ip = (int __user *)p;
+
+		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+			return -EFAULT;
+
+		ke.len = sizeof(unsigned int);
+		ke.flags = 0;
+
+		error = input_get_keycode(dev, &ke);
+		if (error)
+			return error;
+
+		if (put_user(ke.keycode, ip + 1))
+			return -EFAULT;
+
+	} else {
+		size = min(size, sizeof(ke));
+
+		if (copy_from_user(&ke, p, size))
+			return -EFAULT;
+
+		error = input_get_keycode(dev, &ke);
+		if (error)
+			return error;
+
+		if (copy_to_user(p, &ke, size))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int evdev_handle_set_keycode(struct input_dev *dev,
+				    void __user *p, size_t size)
+{
+	struct input_keymap_entry ke;
+
+	memset(&ke, 0, sizeof(ke));
+
+	if (size == sizeof(unsigned int[2])) {
+		/* legacy case */
+		int __user *ip = (int __user *)p;
+
+		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+			return -EFAULT;
+
+		if (get_user(ke.keycode, ip + 1))
+			return -EFAULT;
+
+		ke.len = sizeof(unsigned int);
+		ke.flags = 0;
+
+	} else {
+		size = min(size, sizeof(ke));
+
+		if (copy_from_user(&ke, p, size))
+			return -EFAULT;
+
+		if (ke.len > sizeof(ke.scancode))
+			return -EINVAL;
+	}
+
+	return input_set_keycode(dev, &ke);
+}
+
 static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 			   void __user *p, int compat_mode)
 {
@@ -580,25 +654,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 
 		return 0;
 
-	case EVIOCGKEYCODE:
-		if (get_user(t, ip))
-			return -EFAULT;
-
-		error = input_get_keycode(dev, t, &v);
-		if (error)
-			return error;
-
-		if (put_user(v, ip + 1))
-			return -EFAULT;
-
-		return 0;
-
-	case EVIOCSKEYCODE:
-		if (get_user(t, ip) || get_user(v, ip + 1))
-			return -EFAULT;
-
-		return input_set_keycode(dev, t, v);
-
 	case EVIOCRMFF:
 		return input_ff_erase(dev, (int)(unsigned long) p, file);
 
@@ -620,7 +675,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 
 	/* Now check variable-length commands */
 #define EVIOC_MASK_SIZE(nr)	((nr) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
-
 	switch (EVIOC_MASK_SIZE(cmd)) {
 
 	case EVIOCGKEY(0):
@@ -654,6 +708,12 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		return error;
+
+	case EVIOC_MASK_SIZE(EVIOCGKEYCODE):
+		return evdev_handle_get_keycode(dev, p, size);
+
+	case EVIOC_MASK_SIZE(EVIOCSKEYCODE):
+		return evdev_handle_set_keycode(dev, p, size);
 	}
 
 	/* Multi-number variable-length handlers */
diff --git a/drivers/input/input.c b/drivers/input/input.c
index acb3c8095c65..832771e73663 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -634,78 +634,141 @@ static void input_disconnect_device(struct input_dev *dev)
 	spin_unlock_irq(&dev->event_lock);
 }
 
-static int input_fetch_keycode(struct input_dev *dev, int scancode)
+/**
+ * input_scancode_to_scalar() - converts scancode in &struct input_keymap_entry
+ * @ke: keymap entry containing scancode to be converted.
+ * @scancode: pointer to the location where converted scancode should
+ *	be stored.
+ *
+ * This function is used to convert scancode stored in &struct keymap_entry
+ * into scalar form understood by legacy keymap handling methods. These
+ * methods expect scancodes to be represented as 'unsigned int'.
+ */
+int input_scancode_to_scalar(const struct input_keymap_entry *ke,
+			     unsigned int *scancode)
+{
+	switch (ke->len) {
+	case 1:
+		*scancode = *((u8 *)ke->scancode);
+		break;
+
+	case 2:
+		*scancode = *((u16 *)ke->scancode);
+		break;
+
+	case 4:
+		*scancode = *((u32 *)ke->scancode);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(input_scancode_to_scalar);
+
+/*
+ * Those routines handle the default case where no [gs]etkeycode() is
+ * defined. In this case, an array indexed by the scancode is used.
+ */
+
+static unsigned int input_fetch_keycode(struct input_dev *dev,
+					unsigned int index)
 {
 	switch (dev->keycodesize) {
-		case 1:
-			return ((u8 *)dev->keycode)[scancode];
+	case 1:
+		return ((u8 *)dev->keycode)[index];
 
-		case 2:
-			return ((u16 *)dev->keycode)[scancode];
+	case 2:
+		return ((u16 *)dev->keycode)[index];
 
-		default:
-			return ((u32 *)dev->keycode)[scancode];
+	default:
+		return ((u32 *)dev->keycode)[index];
 	}
 }
 
 static int input_default_getkeycode(struct input_dev *dev,
-				    unsigned int scancode,
-				    unsigned int *keycode)
+				    struct input_keymap_entry *ke)
 {
+	unsigned int index;
+	int error;
+
 	if (!dev->keycodesize)
 		return -EINVAL;
 
-	if (scancode >= dev->keycodemax)
+	if (ke->flags & INPUT_KEYMAP_BY_INDEX)
+		index = ke->index;
+	else {
+		error = input_scancode_to_scalar(ke, &index);
+		if (error)
+			return error;
+	}
+
+	if (index >= dev->keycodemax)
 		return -EINVAL;
 
-	*keycode = input_fetch_keycode(dev, scancode);
+	ke->keycode = input_fetch_keycode(dev, index);
+	ke->index = index;
+	ke->len = sizeof(index);
+	memcpy(ke->scancode, &index, sizeof(index));
 
 	return 0;
 }
 
 static int input_default_setkeycode(struct input_dev *dev,
-				    unsigned int scancode,
-				    unsigned int keycode)
+				    const struct input_keymap_entry *ke,
+				    unsigned int *old_keycode)
 {
-	int old_keycode;
+	unsigned int index;
+	int error;
 	int i;
 
-	if (scancode >= dev->keycodemax)
+	if (!dev->keycodesize)
 		return -EINVAL;
 
-	if (!dev->keycodesize)
+	if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
+		index = ke->index;
+	} else {
+		error = input_scancode_to_scalar(ke, &index);
+		if (error)
+			return error;
+	}
+
+	if (index >= dev->keycodemax)
 		return -EINVAL;
 
-	if (dev->keycodesize < sizeof(keycode) && (keycode >> (dev->keycodesize * 8)))
+	if (dev->keycodesize < sizeof(dev->keycode) &&
+			(ke->keycode >> (dev->keycodesize * 8)))
 		return -EINVAL;
 
 	switch (dev->keycodesize) {
 		case 1: {
 			u8 *k = (u8 *)dev->keycode;
-			old_keycode = k[scancode];
-			k[scancode] = keycode;
+			*old_keycode = k[index];
+			k[index] = ke->keycode;
 			break;
 		}
 		case 2: {
 			u16 *k = (u16 *)dev->keycode;
-			old_keycode = k[scancode];
-			k[scancode] = keycode;
+			*old_keycode = k[index];
+			k[index] = ke->keycode;
 			break;
 		}
 		default: {
 			u32 *k = (u32 *)dev->keycode;
-			old_keycode = k[scancode];
-			k[scancode] = keycode;
+			*old_keycode = k[index];
+			k[index] = ke->keycode;
 			break;
 		}
 	}
 
-	__clear_bit(old_keycode, dev->keybit);
-	__set_bit(keycode, dev->keybit);
+	__clear_bit(*old_keycode, dev->keybit);
+	__set_bit(ke->keycode, dev->keybit);
 
 	for (i = 0; i < dev->keycodemax; i++) {
-		if (input_fetch_keycode(dev, i) == old_keycode) {
-			__set_bit(old_keycode, dev->keybit);
+		if (input_fetch_keycode(dev, i) == *old_keycode) {
+			__set_bit(*old_keycode, dev->keybit);
 			break; /* Setting the bit twice is useless, so break */
 		}
 	}
@@ -716,53 +779,86 @@ static int input_default_setkeycode(struct input_dev *dev,
 /**
  * input_get_keycode - retrieve keycode currently mapped to a given scancode
  * @dev: input device which keymap is being queried
- * @scancode: scancode (or its equivalent for device in question) for which
- *	keycode is needed
- * @keycode: result
+ * @ke: keymap entry
  *
  * This function should be called by anyone interested in retrieving current
- * keymap. Presently keyboard and evdev handlers use it.
+ * keymap. Presently evdev handlers use it.
  */
-int input_get_keycode(struct input_dev *dev,
-		      unsigned int scancode, unsigned int *keycode)
+int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke)
 {
 	unsigned long flags;
 	int retval;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-	retval = dev->getkeycode(dev, scancode, keycode);
-	spin_unlock_irqrestore(&dev->event_lock, flags);
 
+	if (dev->getkeycode) {
+		/*
+		 * Support for legacy drivers, that don't implement the new
+		 * ioctls
+		 */
+		u32 scancode = ke->index;
+
+		memcpy(ke->scancode, &scancode, sizeof(scancode));
+		ke->len = sizeof(scancode);
+		retval = dev->getkeycode(dev, scancode, &ke->keycode);
+	} else {
+		retval = dev->getkeycode_new(dev, ke);
+	}
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
 	return retval;
 }
 EXPORT_SYMBOL(input_get_keycode);
 
 /**
- * input_get_keycode - assign new keycode to a given scancode
+ * input_set_keycode - attribute a keycode to a given scancode
  * @dev: input device which keymap is being updated
- * @scancode: scancode (or its equivalent for device in question)
- * @keycode: new keycode to be assigned to the scancode
+ * @ke: new keymap entry
  *
  * This function should be called by anyone needing to update current
  * keymap. Presently keyboard and evdev handlers use it.
  */
 int input_set_keycode(struct input_dev *dev,
-		      unsigned int scancode, unsigned int keycode)
+		      const struct input_keymap_entry *ke)
 {
 	unsigned long flags;
 	unsigned int old_keycode;
 	int retval;
 
-	if (keycode > KEY_MAX)
+	if (ke->keycode > KEY_MAX)
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
-	retval = dev->getkeycode(dev, scancode, &old_keycode);
-	if (retval)
-		goto out;
+	if (dev->setkeycode) {
+		/*
+		 * Support for legacy drivers, that don't implement the new
+		 * ioctls
+		 */
+		unsigned int scancode;
+
+		retval = input_scancode_to_scalar(ke, &scancode);
+		if (retval)
+			goto out;
+
+		/*
+		 * We need to know the old scancode, in order to generate a
+		 * keyup effect, if the set operation happens successfully
+		 */
+		if (!dev->getkeycode) {
+			retval = -EINVAL;
+			goto out;
+		}
+
+		retval = dev->getkeycode(dev, scancode, &old_keycode);
+		if (retval)
+			goto out;
+
+		retval = dev->setkeycode(dev, scancode, ke->keycode);
+	} else {
+		retval = dev->setkeycode_new(dev, ke, &old_keycode);
+	}
 
-	retval = dev->setkeycode(dev, scancode, keycode);
 	if (retval)
 		goto out;
 
@@ -1759,11 +1855,11 @@ int input_register_device(struct input_dev *dev)
 		dev->rep[REP_PERIOD] = 33;
 	}
 
-	if (!dev->getkeycode)
-		dev->getkeycode = input_default_getkeycode;
+	if (!dev->getkeycode && !dev->getkeycode_new)
+		dev->getkeycode_new = input_default_getkeycode;
 
-	if (!dev->setkeycode)
-		dev->setkeycode = input_default_setkeycode;
+	if (!dev->setkeycode && !dev->setkeycode_new)
+		dev->setkeycode_new = input_default_setkeycode;
 
 	dev_set_name(&dev->dev, "input%ld",
 		     (unsigned long) atomic_inc_return(&input_no) - 1);
diff --git a/include/linux/input.h b/include/linux/input.h
index 789265123531..0057698fd975 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -34,7 +34,7 @@ struct input_event {
  * Protocol version.
  */
 
-#define EV_VERSION		0x010000
+#define EV_VERSION		0x010001
 
 /*
  * IOCTLs (0x00 - 0x7f)
@@ -56,12 +56,37 @@ struct input_absinfo {
 	__s32 resolution;
 };
 
+/**
+ * struct input_keymap_entry - used by EVIOCGKEYCODE/EVIOCSKEYCODE ioctls
+ * @scancode: scancode represented in machine-endian form.
+ * @len: length of the scancode that resides in @scancode buffer.
+ * @index: index in the keymap, may be used instead of scancode
+ * @flags: allows to specify how kernel should handle the request. For
+ *	example, setting INPUT_KEYMAP_BY_INDEX flag indicates that kernel
+ *	should perform lookup in keymap by @index instead of @scancode
+ * @keycode: key code assigned to this scancode
+ *
+ * The structure is used to retrieve and modify keymap data. Users have
+ * option of performing lookup either by @scancode itself or by @index
+ * in keymap entry. EVIOCGKEYCODE will also return scancode or index
+ * (depending on which element was used to perform lookup).
+ */
+struct input_keymap_entry {
+#define INPUT_KEYMAP_BY_INDEX	(1 << 0)
+	__u8  flags;
+	__u8  len;
+	__u16 index;
+	__u32 keycode;
+	__u8  scancode[32];
+};
+
 #define EVIOCGVERSION		_IOR('E', 0x01, int)			/* get driver version */
 #define EVIOCGID		_IOR('E', 0x02, struct input_id)	/* get device ID */
 #define EVIOCGREP		_IOR('E', 0x03, unsigned int[2])	/* get repeat settings */
 #define EVIOCSREP		_IOW('E', 0x03, unsigned int[2])	/* set repeat settings */
-#define EVIOCGKEYCODE		_IOR('E', 0x04, unsigned int[2])	/* get keycode */
-#define EVIOCSKEYCODE		_IOW('E', 0x04, unsigned int[2])	/* set keycode */
+
+#define EVIOCGKEYCODE		_IOR('E', 0x04, struct input_keymap_entry)	/* get keycode */
+#define EVIOCSKEYCODE		_IOW('E', 0x04, struct input_keymap_entry)	/* set keycode */
 
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
@@ -73,8 +98,8 @@ struct input_absinfo {
 #define EVIOCGSW(len)		_IOC(_IOC_READ, 'E', 0x1b, len)		/* get all switch states */
 
 #define EVIOCGBIT(ev,len)	_IOC(_IOC_READ, 'E', 0x20 + ev, len)	/* get event bits */
-#define EVIOCGABS(abs)		_IOR('E', 0x40 + abs, struct input_absinfo)		/* get abs value/limits */
-#define EVIOCSABS(abs)		_IOW('E', 0xc0 + abs, struct input_absinfo)		/* set abs value/limits */
+#define EVIOCGABS(abs)		_IOR('E', 0x40 + abs, struct input_absinfo)	/* get abs value/limits */
+#define EVIOCSABS(abs)		_IOW('E', 0xc0 + abs, struct input_absinfo)	/* set abs value/limits */
 
 #define EVIOCSFF		_IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect))	/* send a force effect to a force feedback device */
 #define EVIOCRMFF		_IOW('E', 0x81, int)			/* Erase a force effect */
@@ -1088,13 +1113,13 @@ struct input_mt_slot {
  * @keycodemax: size of keycode table
  * @keycodesize: size of elements in keycode table
  * @keycode: map of scancodes to keycodes for this device
+ * @getkeycode: optional legacy method to retrieve current keymap.
  * @setkeycode: optional method to alter current keymap, used to implement
  *	sparse keymaps. If not supplied default mechanism will be used.
  *	The method is being called while holding event_lock and thus must
  *	not sleep
- * @getkeycode: optional method to retrieve current keymap. If not supplied
- *	default mechanism will be used. The method is being called while
- *	holding event_lock and thus must not sleep
+ * @getkeycode_new: transition method
+ * @setkeycode_new: transition method
  * @ff: force feedback structure associated with the device if device
  *	supports force feedback effects
  * @repeat_key: stores key code of the last key pressed; used to implement
@@ -1168,10 +1193,16 @@ struct input_dev {
 	unsigned int keycodemax;
 	unsigned int keycodesize;
 	void *keycode;
+
 	int (*setkeycode)(struct input_dev *dev,
 			  unsigned int scancode, unsigned int keycode);
 	int (*getkeycode)(struct input_dev *dev,
 			  unsigned int scancode, unsigned int *keycode);
+	int (*setkeycode_new)(struct input_dev *dev,
+			      const struct input_keymap_entry *ke,
+			      unsigned int *old_keycode);
+	int (*getkeycode_new)(struct input_dev *dev,
+			      struct input_keymap_entry *ke);
 
 	struct ff_device *ff;
 
@@ -1478,10 +1509,12 @@ INPUT_GENERATE_ABS_ACCESSORS(fuzz, fuzz)
 INPUT_GENERATE_ABS_ACCESSORS(flat, flat)
 INPUT_GENERATE_ABS_ACCESSORS(res, resolution)
 
-int input_get_keycode(struct input_dev *dev,
-		      unsigned int scancode, unsigned int *keycode);
+int input_scancode_to_scalar(const struct input_keymap_entry *ke,
+			     unsigned int *scancode);
+
+int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke);
 int input_set_keycode(struct input_dev *dev,
-		      unsigned int scancode, unsigned int keycode);
+		      const struct input_keymap_entry *ke);
 
 extern struct class input_class;
 
-- 
cgit v1.2.3


From 9f470095068e415658ccc6977cf4b3f5be418526 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 9 Sep 2010 21:59:11 -0700
Subject: Input: media/IR - switch to using new keycode interface

Switch the code to use new style of getkeycode and setkeycode
methods to allow retrieving and setting keycodes not only by
their scancodes but also by index.

Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/media/IR/ir-keytable.c | 393 +++++++++++++++++++++++++++--------------
 include/media/rc-map.h         |   2 +-
 2 files changed, 263 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c
index 7e82a9df726b..5ca36a42f019 100644
--- a/drivers/media/IR/ir-keytable.c
+++ b/drivers/media/IR/ir-keytable.c
@@ -24,15 +24,57 @@
 /* FIXME: IR_KEYPRESS_TIMEOUT should be protocol specific */
 #define IR_KEYPRESS_TIMEOUT 250
 
+/**
+ * ir_create_table() - initializes a scancode table
+ * @rc_tab:	the ir_scancode_table to initialize
+ * @name:	name to assign to the table
+ * @ir_type:	ir type to assign to the new table
+ * @size:	initial size of the table
+ * @return:	zero on success or a negative error code
+ *
+ * This routine will initialize the ir_scancode_table and will allocate
+ * memory to hold at least the specified number elements.
+ */
+static int ir_create_table(struct ir_scancode_table *rc_tab,
+			   const char *name, u64 ir_type, size_t size)
+{
+	rc_tab->name = name;
+	rc_tab->ir_type = ir_type;
+	rc_tab->alloc = roundup_pow_of_two(size * sizeof(struct ir_scancode));
+	rc_tab->size = rc_tab->alloc / sizeof(struct ir_scancode);
+	rc_tab->scan = kmalloc(rc_tab->alloc, GFP_KERNEL);
+	if (!rc_tab->scan)
+		return -ENOMEM;
+
+	IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n",
+		   rc_tab->size, rc_tab->alloc);
+	return 0;
+}
+
+/**
+ * ir_free_table() - frees memory allocated by a scancode table
+ * @rc_tab:	the table whose mappings need to be freed
+ *
+ * This routine will free memory alloctaed for key mappings used by given
+ * scancode table.
+ */
+static void ir_free_table(struct ir_scancode_table *rc_tab)
+{
+	rc_tab->size = 0;
+	kfree(rc_tab->scan);
+	rc_tab->scan = NULL;
+}
+
 /**
  * ir_resize_table() - resizes a scancode table if necessary
  * @rc_tab:	the ir_scancode_table to resize
+ * @gfp_flags:	gfp flags to use when allocating memory
  * @return:	zero on success or a negative error code
  *
  * This routine will shrink the ir_scancode_table if it has lots of
  * unused entries and grow it if it is full.
  */
-static int ir_resize_table(struct ir_scancode_table *rc_tab)
+static int ir_resize_table(struct ir_scancode_table *rc_tab, gfp_t gfp_flags)
 {
 	unsigned int oldalloc = rc_tab->alloc;
 	unsigned int newalloc = oldalloc;
@@ -57,7 +99,7 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab)
 	if (newalloc == oldalloc)
 		return 0;
 
-	newscan = kmalloc(newalloc, GFP_ATOMIC);
+	newscan = kmalloc(newalloc, gfp_flags);
 	if (!newscan) {
 		IR_dprintk(1, "Failed to kmalloc %u bytes\n", newalloc);
 		return -ENOMEM;
@@ -72,26 +114,78 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab)
 }
 
 /**
- * ir_do_setkeycode() - internal function to set a keycode in the
- *			scancode->keycode table
+ * ir_update_mapping() - set a keycode in the scancode->keycode table
  * @dev:	the struct input_dev device descriptor
- * @rc_tab:	the struct ir_scancode_table to set the keycode in
- * @scancode:	the scancode for the ir command
- * @keycode:	the keycode for the ir command
- * @resize:	whether the keytable may be shrunk
- * @return:	-EINVAL if the keycode could not be inserted, otherwise zero.
+ * @rc_tab:	scancode table to be adjusted
+ * @index:	index of the mapping that needs to be updated
+ * @keycode:	the desired keycode
+ * @return:	previous keycode assigned to the mapping
+ *
+ * This routine is used to update scancode->keycopde mapping at given
+ * position.
+ */
+static unsigned int ir_update_mapping(struct input_dev *dev,
+				      struct ir_scancode_table *rc_tab,
+				      unsigned int index,
+				      unsigned int new_keycode)
+{
+	int old_keycode = rc_tab->scan[index].keycode;
+	int i;
+
+	/* Did the user wish to remove the mapping? */
+	if (new_keycode == KEY_RESERVED || new_keycode == KEY_UNKNOWN) {
+		IR_dprintk(1, "#%d: Deleting scan 0x%04x\n",
+			   index, rc_tab->scan[index].scancode);
+		rc_tab->len--;
+		memmove(&rc_tab->scan[index], &rc_tab->scan[index+ 1],
+			(rc_tab->len - index) * sizeof(struct ir_scancode));
+	} else {
+		IR_dprintk(1, "#%d: %s scan 0x%04x with key 0x%04x\n",
+			   index,
+			   old_keycode == KEY_RESERVED ? "New" : "Replacing",
+			   rc_tab->scan[index].scancode, new_keycode);
+		rc_tab->scan[index].keycode = new_keycode;
+		__set_bit(new_keycode, dev->keybit);
+	}
+
+	if (old_keycode != KEY_RESERVED) {
+		/* A previous mapping was updated... */
+		__clear_bit(old_keycode, dev->keybit);
+		/* ... but another scancode might use the same keycode */
+		for (i = 0; i < rc_tab->len; i++) {
+			if (rc_tab->scan[i].keycode == old_keycode) {
+				__set_bit(old_keycode, dev->keybit);
+				break;
+			}
+		}
+
+		/* Possibly shrink the keytable, failure is not a problem */
+		ir_resize_table(rc_tab, GFP_ATOMIC);
+	}
+
+	return old_keycode;
+}
+
+/**
+ * ir_locate_scancode() - set a keycode in the scancode->keycode table
+ * @ir_dev:	the struct ir_input_dev device descriptor
+ * @rc_tab:	scancode table to be searched
+ * @scancode:	the desired scancode
+ * @resize:	controls whether we allowed to resize the table to
+ *		accomodate not yet present scancodes
+ * @return:	index of the mapping containing scancode in question
+ *		or -1U in case of failure.
  *
- * This routine is used internally to manipulate the scancode->keycode table.
- * The caller has to hold @rc_tab->lock.
+ * This routine is used to locate given scancode in ir_scancode_table.
+ * If scancode is not yet present the routine will allocate a new slot
+ * for it.
  */
-static int ir_do_setkeycode(struct input_dev *dev,
-			    struct ir_scancode_table *rc_tab,
-			    unsigned scancode, unsigned keycode,
-			    bool resize)
+static unsigned int ir_establish_scancode(struct ir_input_dev *ir_dev,
+					  struct ir_scancode_table *rc_tab,
+					  unsigned int scancode,
+					  bool resize)
 {
 	unsigned int i;
-	int old_keycode = KEY_RESERVED;
-	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
 
 	/*
 	 * Unfortunately, some hardware-based IR decoders don't provide
@@ -100,65 +194,34 @@ static int ir_do_setkeycode(struct input_dev *dev,
 	 * the provided IR with another one, it is needed to allow loading
 	 * IR tables from other remotes. So,
 	 */
-	if (ir_dev->props && ir_dev->props->scanmask) {
+	if (ir_dev->props && ir_dev->props->scanmask)
 		scancode &= ir_dev->props->scanmask;
-	}
 
 	/* First check if we already have a mapping for this ir command */
 	for (i = 0; i < rc_tab->len; i++) {
+		if (rc_tab->scan[i].scancode == scancode)
+			return i;
+
 		/* Keytable is sorted from lowest to highest scancode */
-		if (rc_tab->scan[i].scancode > scancode)
+		if (rc_tab->scan[i].scancode >= scancode)
 			break;
-		else if (rc_tab->scan[i].scancode < scancode)
-			continue;
-
-		old_keycode = rc_tab->scan[i].keycode;
-		rc_tab->scan[i].keycode = keycode;
-
-		/* Did the user wish to remove the mapping? */
-		if (keycode == KEY_RESERVED || keycode == KEY_UNKNOWN) {
-			IR_dprintk(1, "#%d: Deleting scan 0x%04x\n",
-				   i, scancode);
-			rc_tab->len--;
-			memmove(&rc_tab->scan[i], &rc_tab->scan[i + 1],
-				(rc_tab->len - i) * sizeof(struct ir_scancode));
-		}
-
-		/* Possibly shrink the keytable, failure is not a problem */
-		ir_resize_table(rc_tab);
-		break;
 	}
 
-	if (old_keycode == KEY_RESERVED && keycode != KEY_RESERVED) {
-		/* No previous mapping found, we might need to grow the table */
-		if (resize && ir_resize_table(rc_tab))
-			return -ENOMEM;
-
-		IR_dprintk(1, "#%d: New scan 0x%04x with key 0x%04x\n",
-			   i, scancode, keycode);
+	/* No previous mapping found, we might need to grow the table */
+	if (rc_tab->size == rc_tab->len) {
+		if (!resize || ir_resize_table(rc_tab, GFP_ATOMIC))
+			return -1U;
+	}
 
-		/* i is the proper index to insert our new keycode */
+	/* i is the proper index to insert our new keycode */
+	if (i < rc_tab->len)
 		memmove(&rc_tab->scan[i + 1], &rc_tab->scan[i],
 			(rc_tab->len - i) * sizeof(struct ir_scancode));
-		rc_tab->scan[i].scancode = scancode;
-		rc_tab->scan[i].keycode = keycode;
-		rc_tab->len++;
-		set_bit(keycode, dev->keybit);
-	} else {
-		IR_dprintk(1, "#%d: Replacing scan 0x%04x with key 0x%04x\n",
-			   i, scancode, keycode);
-		/* A previous mapping was updated... */
-		clear_bit(old_keycode, dev->keybit);
-		/* ...but another scancode might use the same keycode */
-		for (i = 0; i < rc_tab->len; i++) {
-			if (rc_tab->scan[i].keycode == old_keycode) {
-				set_bit(old_keycode, dev->keybit);
-				break;
-			}
-		}
-	}
+	rc_tab->scan[i].scancode = scancode;
+	rc_tab->scan[i].keycode = KEY_RESERVED;
+	rc_tab->len++;
 
-	return 0;
+	return i;
 }
 
 /**
@@ -171,17 +234,41 @@ static int ir_do_setkeycode(struct input_dev *dev,
  * This routine is used to handle evdev EVIOCSKEY ioctl.
  */
 static int ir_setkeycode(struct input_dev *dev,
-			 unsigned int scancode, unsigned int keycode)
+			 const struct input_keymap_entry *ke,
+			 unsigned int *old_keycode)
 {
-	int rc;
-	unsigned long flags;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
 	struct ir_scancode_table *rc_tab = &ir_dev->rc_tab;
+	unsigned int index;
+	unsigned int scancode;
+	int retval;
+	unsigned long flags;
 
 	spin_lock_irqsave(&rc_tab->lock, flags);
-	rc = ir_do_setkeycode(dev, rc_tab, scancode, keycode, true);
+
+	if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
+		index = ke->index;
+		if (index >= rc_tab->len) {
+			retval = -EINVAL;
+			goto out;
+		}
+	} else {
+		retval = input_scancode_to_scalar(ke, &scancode);
+		if (retval)
+			goto out;
+
+		index = ir_establish_scancode(ir_dev, rc_tab, scancode, true);
+		if (index >= rc_tab->len) {
+			retval = -ENOMEM;
+			goto out;
+		}
+	}
+
+	*old_keycode = ir_update_mapping(dev, rc_tab, index, ke->keycode);
+
+out:
 	spin_unlock_irqrestore(&rc_tab->lock, flags);
-	return rc;
+	return retval;
 }
 
 /**
@@ -189,31 +276,72 @@ static int ir_setkeycode(struct input_dev *dev,
  * @dev:	the struct input_dev device descriptor
  * @to:		the struct ir_scancode_table to copy entries to
  * @from:	the struct ir_scancode_table to copy entries from
- * @return:	-EINVAL if all keycodes could not be inserted, otherwise zero.
+ * @return:	-ENOMEM if all keycodes could not be inserted, otherwise zero.
  *
  * This routine is used to handle table initialization.
  */
-static int ir_setkeytable(struct input_dev *dev,
-			  struct ir_scancode_table *to,
+static int ir_setkeytable(struct ir_input_dev *ir_dev,
 			  const struct ir_scancode_table *from)
 {
-	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
 	struct ir_scancode_table *rc_tab = &ir_dev->rc_tab;
-	unsigned long flags;
-	unsigned int i;
-	int rc = 0;
+	unsigned int i, index;
+	int rc;
+
+	rc = ir_create_table(&ir_dev->rc_tab,
+			     from->name, from->ir_type, from->size);
+	if (rc)
+		return rc;
+
+	IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n",
+		   rc_tab->size, rc_tab->alloc);
 
-	spin_lock_irqsave(&rc_tab->lock, flags);
 	for (i = 0; i < from->size; i++) {
-		rc = ir_do_setkeycode(dev, to, from->scan[i].scancode,
-				      from->scan[i].keycode, false);
-		if (rc)
+		index = ir_establish_scancode(ir_dev, rc_tab,
+					      from->scan[i].scancode, false);
+		if (index >= rc_tab->len) {
+			rc = -ENOMEM;
 			break;
+		}
+
+		ir_update_mapping(ir_dev->input_dev, rc_tab, index,
+				  from->scan[i].keycode);
 	}
-	spin_unlock_irqrestore(&rc_tab->lock, flags);
+
+	if (rc)
+		ir_free_table(rc_tab);
+
 	return rc;
 }
 
+/**
+ * ir_lookup_by_scancode() - locate mapping by scancode
+ * @rc_tab:	the &struct ir_scancode_table to search
+ * @scancode:	scancode to look for in the table
+ * @return:	index in the table, -1U if not found
+ *
+ * This routine performs binary search in RC keykeymap table for
+ * given scancode.
+ */
+static unsigned int ir_lookup_by_scancode(const struct ir_scancode_table *rc_tab,
+					  unsigned int scancode)
+{
+	unsigned int start = 0;
+	unsigned int end = rc_tab->len - 1;
+	unsigned int mid;
+
+	while (start <= end) {
+		mid = (start + end) / 2;
+		if (rc_tab->scan[mid].scancode < scancode)
+			start = mid + 1;
+		else if (rc_tab->scan[mid].scancode > scancode)
+			end = mid - 1;
+		else
+			return mid;
+	}
+
+	return -1U;
+}
+
 /**
  * ir_getkeycode() - get a keycode from the scancode->keycode table
  * @dev:	the struct input_dev device descriptor
@@ -224,36 +352,46 @@ static int ir_setkeytable(struct input_dev *dev,
  * This routine is used to handle evdev EVIOCGKEY ioctl.
  */
 static int ir_getkeycode(struct input_dev *dev,
-			 unsigned int scancode, unsigned int *keycode)
+			 struct input_keymap_entry *ke)
 {
-	int start, end, mid;
-	unsigned long flags;
-	int key = KEY_RESERVED;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
 	struct ir_scancode_table *rc_tab = &ir_dev->rc_tab;
+	struct ir_scancode *entry;
+	unsigned long flags;
+	unsigned int index;
+	unsigned int scancode;
+	int retval;
 
 	spin_lock_irqsave(&rc_tab->lock, flags);
-	start = 0;
-	end = rc_tab->len - 1;
-	while (start <= end) {
-		mid = (start + end) / 2;
-		if (rc_tab->scan[mid].scancode < scancode)
-			start = mid + 1;
-		else if (rc_tab->scan[mid].scancode > scancode)
-			end = mid - 1;
-		else {
-			key = rc_tab->scan[mid].keycode;
-			break;
-		}
+
+	if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
+		index = ke->index;
+	} else {
+		retval = input_scancode_to_scalar(ke, &scancode);
+		if (retval)
+			goto out;
+
+		index = ir_lookup_by_scancode(rc_tab, scancode);
+	}
+
+	if (index >= rc_tab->len) {
+		if (!(ke->flags & INPUT_KEYMAP_BY_INDEX))
+			IR_dprintk(1, "unknown key for scancode 0x%04x\n",
+				   scancode);
+		retval = -EINVAL;
+		goto out;
 	}
-	spin_unlock_irqrestore(&rc_tab->lock, flags);
 
-	if (key == KEY_RESERVED)
-		IR_dprintk(1, "unknown key for scancode 0x%04x\n",
-			   scancode);
+	entry = &rc_tab->scan[index];
 
-	*keycode = key;
-	return 0;
+	ke->index = index;
+	ke->keycode = entry->keycode;
+	ke->len = sizeof(entry->scancode);
+	memcpy(ke->scancode, &entry->scancode, sizeof(entry->scancode));
+
+out:
+	spin_unlock_irqrestore(&rc_tab->lock, flags);
+	return retval;
 }
 
 /**
@@ -268,12 +406,24 @@ static int ir_getkeycode(struct input_dev *dev,
  */
 u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode)
 {
-	int keycode;
+	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
+	struct ir_scancode_table *rc_tab = &ir_dev->rc_tab;
+	unsigned int keycode;
+	unsigned int index;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc_tab->lock, flags);
+
+	index = ir_lookup_by_scancode(rc_tab, scancode);
+	keycode = index < rc_tab->len ?
+			rc_tab->scan[index].keycode : KEY_RESERVED;
+
+	spin_unlock_irqrestore(&rc_tab->lock, flags);
 
-	ir_getkeycode(dev, scancode, &keycode);
 	if (keycode != KEY_RESERVED)
 		IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n",
 			   dev->name, scancode, keycode);
+
 	return keycode;
 }
 EXPORT_SYMBOL_GPL(ir_g_keycode_from_table);
@@ -453,8 +603,8 @@ int __ir_input_register(struct input_dev *input_dev,
 		goto out_dev;
 	}
 
-	input_dev->getkeycode = ir_getkeycode;
-	input_dev->setkeycode = ir_setkeycode;
+	input_dev->getkeycode_new = ir_getkeycode;
+	input_dev->setkeycode_new = ir_setkeycode;
 	input_set_drvdata(input_dev, ir_dev);
 	ir_dev->input_dev = input_dev;
 
@@ -462,12 +612,6 @@ int __ir_input_register(struct input_dev *input_dev,
 	spin_lock_init(&ir_dev->keylock);
 	setup_timer(&ir_dev->timer_keyup, ir_timer_keyup, (unsigned long)ir_dev);
 
-	ir_dev->rc_tab.name = rc_tab->name;
-	ir_dev->rc_tab.ir_type = rc_tab->ir_type;
-	ir_dev->rc_tab.alloc = roundup_pow_of_two(rc_tab->size *
-						  sizeof(struct ir_scancode));
-	ir_dev->rc_tab.scan = kmalloc(ir_dev->rc_tab.alloc, GFP_KERNEL);
-	ir_dev->rc_tab.size = ir_dev->rc_tab.alloc / sizeof(struct ir_scancode);
 	if (props) {
 		ir_dev->props = props;
 		if (props->open)
@@ -476,23 +620,14 @@ int __ir_input_register(struct input_dev *input_dev,
 			input_dev->close = ir_close;
 	}
 
-	if (!ir_dev->rc_tab.scan) {
-		rc = -ENOMEM;
-		goto out_name;
-	}
-
-	IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n",
-		   ir_dev->rc_tab.size, ir_dev->rc_tab.alloc);
-
 	set_bit(EV_KEY, input_dev->evbit);
 	set_bit(EV_REP, input_dev->evbit);
 	set_bit(EV_MSC, input_dev->evbit);
 	set_bit(MSC_SCAN, input_dev->mscbit);
 
-	if (ir_setkeytable(input_dev, &ir_dev->rc_tab, rc_tab)) {
-		rc = -ENOMEM;
-		goto out_table;
-	}
+	rc = ir_setkeytable(ir_dev, rc_tab);
+	if (rc)
+		goto out_name;
 
 	rc = ir_register_class(input_dev);
 	if (rc < 0)
@@ -515,7 +650,7 @@ int __ir_input_register(struct input_dev *input_dev,
 out_event:
 	ir_unregister_class(input_dev);
 out_table:
-	kfree(ir_dev->rc_tab.scan);
+	ir_free_table(&ir_dev->rc_tab);
 out_name:
 	kfree(ir_dev->driver_name);
 out_dev:
@@ -533,7 +668,6 @@ EXPORT_SYMBOL_GPL(__ir_input_register);
 void ir_input_unregister(struct input_dev *input_dev)
 {
 	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
-	struct ir_scancode_table *rc_tab;
 
 	if (!ir_dev)
 		return;
@@ -545,10 +679,7 @@ void ir_input_unregister(struct input_dev *input_dev)
 		if (ir_dev->props->driver_type == RC_DRIVER_IR_RAW)
 			ir_raw_event_unregister(input_dev);
 
-	rc_tab = &ir_dev->rc_tab;
-	rc_tab->size = 0;
-	kfree(rc_tab->scan);
-	rc_tab->scan = NULL;
+	ir_free_table(&ir_dev->rc_tab);
 
 	ir_unregister_class(input_dev);
 
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index a9c041d49662..9b201ec8dc7f 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -35,7 +35,7 @@ struct ir_scancode_table {
 	unsigned int		len;	/* Used number of entries */
 	unsigned int		alloc;	/* Size of *scan in bytes */
 	u64			ir_type;
-	char			*name;
+	const char		*name;
 	spinlock_t		lock;
 };
 
-- 
cgit v1.2.3


From db7829c6cc32f3c0c9a324118d743acb1abff081 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Thu, 9 Sep 2010 18:17:26 +0200
Subject: x86, percpu: Optimize this_cpu_ptr

Allow arches to implement __this_cpu_ptr, and provide an x86 version.

Before:
	movq $foo, %rax
	movq %gs:this_cpu_off, %rdx
	addq %rdx, %rax

After:
	movq $foo, %rax
	addq %gs:this_cpu_off, %rax

The benefit is doing it in one less instruction and not clobbering
a temporary register.

tj: * Beefed up the comment a bit and renamed in-macro temp variable
      to match neighboring macros.

    * Folded fix for const pointer case found in linux-next.

    * Fixed sparse notation.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/percpu.h | 14 ++++++++++++++
 include/asm-generic/percpu.h  |  9 +++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cd28f9ad910d..f899e01a8ac9 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,20 @@
 #ifdef CONFIG_SMP
 #define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset		percpu_read(this_cpu_off)
+
+/*
+ * Compared to the generic __my_cpu_offset version, the following
+ * saves one instruction and avoids clobbering a temp register.
+ */
+#define __this_cpu_ptr(ptr)				\
+({							\
+	unsigned long tcp_ptr__;			\
+	__verify_pcpu_ptr(ptr);				\
+	asm volatile("add " __percpu_arg(1) ", %0"	\
+		     : "=r" (tcp_ptr__)			\
+		     : "m" (this_cpu_off), "0" (ptr));	\
+	(typeof(*(ptr)) __kernel __force *)tcp_ptr__;	\
+})
 #else
 #define __percpu_arg(x)		"%P" #x
 #endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 08923b684768..ec643115bb56 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define __raw_get_cpu_var(var) \
 	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
 
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#ifndef __this_cpu_ptr
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
-
+#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#endif
 
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
-- 
cgit v1.2.3


From 677243d7494d09bfa782425f063a6013de53c35b Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Thu, 9 Sep 2010 18:17:26 +0200
Subject: percpu: Optimize __get_cpu_var()

Redefine __get_cpu_var() using this_cpu_ptr() which can be
arch-optimized.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index ec643115bb56..d17784ea37ff 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  */
 #define per_cpu(var, cpu) \
 	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-#define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
-#define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
 
 #ifndef __this_cpu_ptr
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
@@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
 #endif
 
+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
+
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
 #endif
-- 
cgit v1.2.3


From 0da2f50944976e890ccc9436ab88c0da87788d02 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: ide: remove unnecessary blk_queue_flushing() test in do_ide_request()

Unplugging from a request function doesn't really help much (it's
already in the request_fn) and soon block layer will be updated to mix
barrier sequence with other commands, so there's no need to treat
queue flushing any differently.

ide was the only user of blk_queue_flushing().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/ide/ide-io.c   | 13 -------------
 include/linux/blkdev.h |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a381be814070..999dac054bcc 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q)
 	struct request	*rq = NULL;
 	ide_startstop_t	startstop;
 
-	/*
-	 * drive is doing pre-flush, ordered write, post-flush sequence. even
-	 * though that is 3 requests, it must be seen as a single transaction.
-	 * we must not preempt this drive until that is complete
-	 */
-	if (blk_queue_flushing(q))
-		/*
-		 * small race where queue could get replugged during
-		 * the 3-request flush cycle, just yank the plug since
-		 * we want it to finish asap
-		 */
-		blk_remove_plug(q);
-
 	spin_unlock_irq(q->queue_lock);
 
 	/* HLD do_request() callback might sleep, make sure it's okay */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c54906f678f..015375c7d031 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -521,7 +521,6 @@ enum {
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
-- 
cgit v1.2.3


From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: kill QUEUE_ORDERED_BY_TAG

Nobody is making meaningful use of ORDERED_BY_TAG now and queue
draining for barrier requests will be removed soon which will render
the advantage of tag ordering moot.  Kill ORDERED_BY_TAG.  The
following users are affected.

* brd: converted to ORDERED_DRAIN.
* virtio_blk: ORDERED_TAG path was already marked deprecated.  Removed.
* xen-blkfront: ORDERED_TAG case dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c          | 35 +++++++----------------------------
 drivers/block/brd.c          |  2 +-
 drivers/block/virtio_blk.c   |  9 ---------
 drivers/block/xen-blkfront.c |  8 +++-----
 drivers/scsi/sd.c            |  4 +---
 include/linux/blkdev.h       | 17 +----------------
 6 files changed, 13 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f0faefca032f..c807e9ca3a68 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered)
 	if (ordered != QUEUE_ORDERED_NONE &&
 	    ordered != QUEUE_ORDERED_DRAIN &&
 	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
-	    ordered != QUEUE_ORDERED_DRAIN_FUA &&
-	    ordered != QUEUE_ORDERED_TAG &&
-	    ordered != QUEUE_ORDERED_TAG_FLUSH &&
-	    ordered != QUEUE_ORDERED_TAG_FUA) {
+	    ordered != QUEUE_ORDERED_DRAIN_FUA) {
 		printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
 		return -EINVAL;
 	}
@@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * For an empty barrier, there's no actual BAR request, which
 	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
 	 */
-	if (!blk_rq_sectors(rq)) {
+	if (!blk_rq_sectors(rq))
 		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 				QUEUE_ORDERED_DO_POSTFLUSH);
-		/*
-		 * Empty barrier on a write-through device w/ ordered
-		 * tag has no command to issue and without any command
-		 * to issue, ordering by tag can't be used.  Drain
-		 * instead.
-		 */
-		if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
-		    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
-			q->ordered &= ~QUEUE_ORDERED_BY_TAG;
-			q->ordered |= QUEUE_ORDERED_BY_DRAIN;
-		}
-	}
 
 	/* stash away the original request */
 	blk_dequeue_request(rq);
@@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	} else
 		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
+	if (queue_in_flight(q))
 		rq = NULL;
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
@@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 		return true;
 
-	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
-		/* Ordered by tag.  Blocking the next barrier is enough. */
-		if (is_barrier && rq != &q->bar_rq)
-			*rqp = NULL;
-	} else {
-		/* Ordered by draining.  Wait for turn. */
-		WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
-		if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-			*rqp = NULL;
-	}
+	/* Ordered by draining.  Wait for turn. */
+	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
+	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
+		*rqp = NULL;
 
 	return true;
 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 1c7f63792ff8..47a41272d26b 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -482,7 +482,7 @@ static struct brd_device *brd_alloc(int i)
 	if (!brd->brd_queue)
 		goto out_free_dev;
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
-	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG);
+	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
 	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2aafafca2b13..79652809eee8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -395,15 +395,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 		 * to implement write barrier support.
 		 */
 		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
-	} else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) {
-		/*
-		 * If the BARRIER feature is supported the host expects us
-		 * to order request by tags.  This implies there is not
-		 * volatile write cache on the host, and that the host
-		 * never re-orders outstanding I/O.  This feature is not
-		 * useful for real life scenarious and deprecated.
-		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_TAG);
 	} else {
 		/*
 		 * If the FLUSH feature is not supported we must assume that
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ac1b682edecb..50ec6f834996 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -424,8 +424,7 @@ static int xlvbd_barrier(struct blkfront_info *info)
 	const char *barrier;
 
 	switch (info->feature_barrier) {
-	case QUEUE_ORDERED_DRAIN:	barrier = "enabled (drain)"; break;
-	case QUEUE_ORDERED_TAG:		barrier = "enabled (tag)"; break;
+	case QUEUE_ORDERED_DRAIN:	barrier = "enabled"; break;
 	case QUEUE_ORDERED_NONE:	barrier = "disabled"; break;
 	default:			return -EINVAL;
 	}
@@ -1078,8 +1077,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	 * we're dealing with a very old backend which writes
 	 * synchronously; draining will do what needs to get done.
 	 *
-	 * If there are barriers, then we can do full queued writes
-	 * with tagged barriers.
+	 * If there are barriers, then we use flush.
 	 *
 	 * If barriers are not supported, then there's no much we can
 	 * do, so just set ordering to NONE.
@@ -1087,7 +1085,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	if (err)
 		info->feature_barrier = QUEUE_ORDERED_DRAIN;
 	else if (barrier)
-		info->feature_barrier = QUEUE_ORDERED_TAG;
+		info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH;
 	else
 		info->feature_barrier = QUEUE_ORDERED_NONE;
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2714becc2eaf..cdfc51ab9cf2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2151,9 +2151,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 
 	/*
 	 * We now have all cache related info, determine how we deal
-	 * with ordered requests.  Note that as the current SCSI
-	 * dispatch function can alter request order, we cannot use
-	 * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+	 * with ordered requests.
 	 */
 	if (sdkp->WCE)
 		ordered = sdkp->DPOFUA
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 015375c7d031..7077bc0d6138 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -470,12 +470,7 @@ enum {
 	 * DRAIN	: ordering by draining is enough
 	 * DRAIN_FLUSH	: ordering by draining w/ pre and post flushes
 	 * DRAIN_FUA	: ordering by draining w/ pre flush and FUA write
-	 * TAG		: ordering by tag is enough
-	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
-	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
 	 */
-	QUEUE_ORDERED_BY_DRAIN		= 0x01,
-	QUEUE_ORDERED_BY_TAG		= 0x02,
 	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
 	QUEUE_ORDERED_DO_BAR		= 0x20,
 	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
@@ -483,8 +478,7 @@ enum {
 
 	QUEUE_ORDERED_NONE		= 0x00,
 
-	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN |
-					  QUEUE_ORDERED_DO_BAR,
+	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
@@ -492,15 +486,6 @@ enum {
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_FUA,
 
-	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG |
-					  QUEUE_ORDERED_DO_BAR,
-	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
-					  QUEUE_ORDERED_DO_PREFLUSH |
-					  QUEUE_ORDERED_DO_POSTFLUSH,
-	QUEUE_ORDERED_TAG_FUA		= QUEUE_ORDERED_TAG |
-					  QUEUE_ORDERED_DO_PREFLUSH |
-					  QUEUE_ORDERED_DO_FUA,
-
 	/*
 	 * Ordered operation sequence
 	 */
-- 
cgit v1.2.3


From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: deprecate barrier and replace blk_queue_ordered() with
 blk_queue_flush()

Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA
requests.  Deprecate barrier.  All REQ_HARDBARRIERs are failed with
-EOPNOTSUPP and blk_queue_ordered() is replaced with simpler
blk_queue_flush().

blk_queue_flush() takes combinations of REQ_FLUSH and FUA.  If a
device has write cache and can flush it, it should set REQ_FLUSH.  If
the device can handle FUA writes, it should also set REQ_FUA.

All blk_queue_ordered() users are converted.

* ORDERED_DRAIN is mapped to 0 which is the default value.
* ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH.
* ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c          | 29 -----------------------------
 block/blk-core.c             |  6 ++++--
 block/blk-settings.c         | 20 ++++++++++++++++++++
 drivers/block/brd.c          |  1 -
 drivers/block/loop.c         |  2 +-
 drivers/block/osdblk.c       |  2 +-
 drivers/block/ps3disk.c      |  2 +-
 drivers/block/virtio_blk.c   | 25 +++++++++----------------
 drivers/block/xen-blkfront.c | 43 ++++++++++++-------------------------------
 drivers/ide/ide-disk.c       | 13 ++++++-------
 drivers/md/dm.c              |  2 +-
 drivers/mmc/card/queue.c     |  1 -
 drivers/s390/block/dasd.c    |  1 -
 drivers/scsi/sd.c            | 16 ++++++++--------
 include/linux/blkdev.h       |  6 ++++--
 15 files changed, 67 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c807e9ca3a68..ed0aba5463ab 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,35 +9,6 @@
 
 #include "blk.h"
 
-/**
- * blk_queue_ordered - does this queue support ordered writes
- * @q:        the request queue
- * @ordered:  one of QUEUE_ORDERED_*
- *
- * Description:
- *   For journalled file systems, doing ordered writes on a commit
- *   block instead of explicitly doing wait_on_buffer (which is bad
- *   for performance) can be a big win. Block drivers supporting this
- *   feature should call this function and indicate so.
- *
- **/
-int blk_queue_ordered(struct request_queue *q, unsigned ordered)
-{
-	if (ordered != QUEUE_ORDERED_NONE &&
-	    ordered != QUEUE_ORDERED_DRAIN &&
-	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
-	    ordered != QUEUE_ORDERED_DRAIN_FUA) {
-		printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
-		return -EINVAL;
-	}
-
-	q->ordered = ordered;
-	q->next_ordered = ordered;
-
-	return 0;
-}
-EXPORT_SYMBOL(blk_queue_ordered);
-
 /*
  * Cache flushing for ordered writes handling
  */
diff --git a/block/blk-core.c b/block/blk-core.c
index ee1a1e7e63cc..f06354183b29 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 
-	if ((bio->bi_rw & REQ_HARDBARRIER) &&
-	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	/* REQ_HARDBARRIER is no more */
+	if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
+		"block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
+
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a234f4bf1d6f..9b18afcfe925 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
+/**
+ * blk_queue_flush - configure queue's cache flush capability
+ * @q:		the request queue for the device
+ * @flush:	0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
+ *
+ * Tell block layer cache flush capability of @q.  If it supports
+ * flushing, REQ_FLUSH should be set.  If it supports bypassing
+ * write cache for individual writes, REQ_FUA should be set.
+ */
+void blk_queue_flush(struct request_queue *q, unsigned int flush)
+{
+	WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
+
+	if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
+		flush &= ~REQ_FUA;
+
+	q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 47a41272d26b..fa33f97722ba 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -482,7 +482,6 @@ static struct brd_device *brd_alloc(int i)
 	if (!brd->brd_queue)
 		goto out_free_dev;
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
-	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
 	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c3a4a2e176da..953d1e12f4d4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -832,7 +832,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->lo_queue->unplug_fn = loop_unplug;
 
 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
-		blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH);
+		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
 
 	set_capacity(lo->lo_disk, size);
 	bd_set_size(bdev, size << 9);
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 2284b4f05c62..72d62462433d 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -439,7 +439,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
 	blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
 
 	blk_queue_prep_rq(q, blk_queue_start_tag);
-	blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
+	blk_queue_flush(q, REQ_FLUSH);
 
 	disk->queue = q;
 
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index e9da874d0419..4911f9e57bc7 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_logical_block_size(queue, dev->blk_size);
 
-	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH);
+	blk_queue_flush(queue, REQ_FLUSH);
 
 	blk_queue_max_segments(queue, -1);
 	blk_queue_max_segment_size(queue, dev->bounce_size);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 79652809eee8..d10b635b3946 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -388,22 +388,15 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	vblk->disk->driverfs_dev = &vdev->dev;
 	index++;
 
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) {
-		/*
-		 * If the FLUSH feature is supported we do have support for
-		 * flushing a volatile write cache on the host.  Use that
-		 * to implement write barrier support.
-		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
-	} else {
-		/*
-		 * If the FLUSH feature is not supported we must assume that
-		 * the host does not perform any kind of volatile write
-		 * caching. We still need to drain the queue to provider
-		 * proper barrier semantics.
-		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
-	}
+	/*
+	 * If the FLUSH feature is supported we do have support for
+	 * flushing a volatile write cache on the host.  Use that to
+	 * implement write barrier support; otherwise, we must assume
+	 * that the host does not perform any kind of volatile write
+	 * caching.
+	 */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_flush(q, REQ_FLUSH);
 
 	/* If disk is read-only in the host, the guest should obey */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 50ec6f834996..0b1eea643262 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -95,7 +95,7 @@ struct blkfront_info
 	struct gnttab_free_callback callback;
 	struct blk_shadow shadow[BLK_RING_SIZE];
 	unsigned long shadow_free;
-	int feature_barrier;
+	unsigned int feature_flush;
 	int is_ready;
 };
 
@@ -418,25 +418,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 }
 
 
-static int xlvbd_barrier(struct blkfront_info *info)
+static void xlvbd_flush(struct blkfront_info *info)
 {
-	int err;
-	const char *barrier;
-
-	switch (info->feature_barrier) {
-	case QUEUE_ORDERED_DRAIN:	barrier = "enabled"; break;
-	case QUEUE_ORDERED_NONE:	barrier = "disabled"; break;
-	default:			return -EINVAL;
-	}
-
-	err = blk_queue_ordered(info->rq, info->feature_barrier);
-
-	if (err)
-		return err;
-
+	blk_queue_flush(info->rq, info->feature_flush);
 	printk(KERN_INFO "blkfront: %s: barriers %s\n",
-	       info->gd->disk_name, barrier);
-	return 0;
+	       info->gd->disk_name,
+	       info->feature_flush ? "enabled" : "disabled");
 }
 
 
@@ -515,7 +502,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	info->rq = gd->queue;
 	info->gd = gd;
 
-	xlvbd_barrier(info);
+	xlvbd_flush(info);
 
 	if (vdisk_info & VDISK_READONLY)
 		set_disk_ro(gd, 1);
@@ -661,8 +648,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
-				info->feature_barrier = QUEUE_ORDERED_NONE;
-				xlvbd_barrier(info);
+				info->feature_flush = 0;
+				xlvbd_flush(info);
 			}
 			/* fall through */
 		case BLKIF_OP_READ:
@@ -1075,19 +1062,13 @@ static void blkfront_connect(struct blkfront_info *info)
 	/*
 	 * If there's no "feature-barrier" defined, then it means
 	 * we're dealing with a very old backend which writes
-	 * synchronously; draining will do what needs to get done.
+	 * synchronously; nothing to do.
 	 *
 	 * If there are barriers, then we use flush.
-	 *
-	 * If barriers are not supported, then there's no much we can
-	 * do, so just set ordering to NONE.
 	 */
-	if (err)
-		info->feature_barrier = QUEUE_ORDERED_DRAIN;
-	else if (barrier)
-		info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH;
-	else
-		info->feature_barrier = QUEUE_ORDERED_NONE;
+	info->feature_flush = 0;
+	if (!err && barrier)
+		info->feature_flush = REQ_FLUSH;
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7433e07de30e..7c5b01ce51d2 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect)
 	return ide_no_data_taskfile(drive, &cmd);
 }
 
-static void update_ordered(ide_drive_t *drive)
+static void update_flush(ide_drive_t *drive)
 {
 	u16 *id = drive->id;
-	unsigned ordered = QUEUE_ORDERED_NONE;
+	unsigned flush = 0;
 
 	if (drive->dev_flags & IDE_DFLAG_WCACHE) {
 		unsigned long long capacity;
@@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive)
 		       drive->name, barrier ? "" : "not ");
 
 		if (barrier) {
-			ordered = QUEUE_ORDERED_DRAIN_FLUSH;
+			flush = REQ_FLUSH;
 			blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
 		}
-	} else
-		ordered = QUEUE_ORDERED_DRAIN;
+	}
 
-	blk_queue_ordered(drive->queue, ordered);
+	blk_queue_flush(drive->queue, flush);
 }
 
 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
@@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg)
 		}
 	}
 
-	update_ordered(drive);
+	update_flush(drive);
 
 	return err;
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ac384b2a6a33..b1d92be8f990 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2245,7 +2245,7 @@ static int dm_init_request_based_queue(struct mapped_device *md)
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
 	blk_queue_prep_rq(md->queue, dm_prep_fn);
 	blk_queue_lld_busy(md->queue, dm_lld_busy);
-	blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
+	blk_queue_flush(md->queue, REQ_FLUSH);
 
 	elv_register_queue(md->queue);
 
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index e876678176be..9c0b42bfe089 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 	mq->req = NULL;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
-	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
 	if (mmc_can_erase(card)) {
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 8373ca0de8e0..9b106d83b0cd 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2197,7 +2197,6 @@ static void dasd_setup_queue(struct dasd_block *block)
 	 */
 	blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
 	blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
-	blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
 }
 
 /*
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cdfc51ab9cf2..63bd01ae534f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdp = sdkp->device;
 	unsigned char *buffer;
-	unsigned ordered;
+	unsigned flush = 0;
 
 	SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
 				      "sd_revalidate_disk\n"));
@@ -2151,15 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk)
 
 	/*
 	 * We now have all cache related info, determine how we deal
-	 * with ordered requests.
+	 * with flush requests.
 	 */
-	if (sdkp->WCE)
-		ordered = sdkp->DPOFUA
-			? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
-	else
-		ordered = QUEUE_ORDERED_DRAIN;
+	if (sdkp->WCE) {
+		flush |= REQ_FLUSH;
+		if (sdkp->DPOFUA)
+			flush |= REQ_FUA;
+	}
 
-	blk_queue_ordered(sdkp->disk->queue, ordered);
+	blk_queue_flush(sdkp->disk->queue, flush);
 
 	set_capacity(disk, sdkp->capacity);
 	kfree(buffer);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7077bc0d6138..e97911d4dec3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -355,8 +355,10 @@ struct request_queue
 	struct blk_trace	*blk_trace;
 #endif
 	/*
-	 * reserved for flush operations
+	 * for flush operations
 	 */
+	unsigned int		flush_flags;
+
 	unsigned int		ordered, next_ordered, ordseq;
 	int			orderr, ordcolor;
 	struct request		pre_flush_rq, bar_rq, post_flush_rq;
@@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
+extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern int blk_queue_ordered(struct request_queue *, unsigned);
 extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
-- 
cgit v1.2.3


From 9cbbdca44ae1a6f512ea1e2be11ced8bbb9d430a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: remove spurious uses of REQ_HARDBARRIER

REQ_HARDBARRIER is deprecated.  Remove spurious uses in the following
users.  Please note that other than osdblk, all other uses were
already spurious before deprecation.

* osdblk: osdblk_rq_fn() won't receive any request with
  REQ_HARDBARRIER set.  Remove the test for it.

* pktcdvd: use of REQ_HARDBARRIER in pkt_generic_packet() doesn't mean
  anything.  Removed.

* aic7xxx_old: Setting MSG_ORDERED_Q_TAG on REQ_HARDBARRIER is
  spurious.  Removed.

* sas_scsi_host: Setting TASK_ATTR_ORDERED on REQ_HARDBARRIER is
  spurious.  Removed.

* scsi_tcq: The ordered tag path wasn't being used anyway.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: Peter Osterlund <petero2@telia.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/osdblk.c              |  3 +--
 drivers/block/pktcdvd.c             |  1 -
 drivers/scsi/aic7xxx_old.c          | 21 ++-------------------
 drivers/scsi/libsas/sas_scsi_host.c | 13 +------------
 include/scsi/scsi_tcq.h             |  6 +-----
 5 files changed, 5 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 72d62462433d..87311ebac0db 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q)
 			break;
 
 		/* filter out block requests we don't understand */
-		if (rq->cmd_type != REQ_TYPE_FS &&
-		    !(rq->cmd_flags & REQ_HARDBARRIER)) {
+		if (rq->cmd_type != REQ_TYPE_FS) {
 			blk_end_request_all(rq, 0);
 			continue;
 		}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index b1cbeb59bb76..0166ea136045 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
 	rq->timeout = 60*HZ;
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->cmd_flags |= REQ_HARDBARRIER;
 	if (cgc->quiet)
 		rq->cmd_flags |= REQ_QUIET;
 
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 93984c9dfe14..e1cd6062776c 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
       aic_dev->r_total++;
       ptr = aic_dev->r_bins;
     }
-    if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
-    {
-      aic_dev->barrier_total++;
-      if(scb->tag_action == MSG_ORDERED_Q_TAG)
-        aic_dev->ordered_total++;
-    }
     x = scb->sg_length;
     x >>= 10;
     for(i=0; i<6; i++)
@@ -10144,19 +10138,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd,
     /* We always force TEST_UNIT_READY to untagged */
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     {
-      if (req->cmd_flags & REQ_HARDBARRIER)
-      {
-	if(sdptr->ordered_tags)
-	{
-          hscb->control |= MSG_ORDERED_Q_TAG;
-          scb->tag_action = MSG_ORDERED_Q_TAG;
-	}
-      }
-      else
-      {
-        hscb->control |= MSG_SIMPLE_Q_TAG;
-        scb->tag_action = MSG_SIMPLE_Q_TAG;
-      }
+      hscb->control |= MSG_SIMPLE_Q_TAG;
+      scb->tag_action = MSG_SIMPLE_Q_TAG;
     }
   }
   if ( !(aic_dev->dtr_pending) &&
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index f0cfba9a1fc8..535085cd27ec 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task)
 	sc->scsi_done(sc);
 }
 
-static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd)
-{
-	enum task_attribute ta = TASK_ATTR_SIMPLE;
-	if (cmd->request && blk_rq_tagged(cmd->request)) {
-		if (cmd->device->ordered_tags &&
-		    (cmd->request->cmd_flags & REQ_HARDBARRIER))
-			ta = TASK_ATTR_ORDERED;
-	}
-	return ta;
-}
-
 static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
 					       struct domain_device *dev,
 					       gfp_t gfp_flags)
@@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
 	task->ssp_task.retry_count = 1;
 	int_to_scsilun(cmd->device->lun, &lun);
 	memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8);
-	task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
+	task->ssp_task.task_attr = TASK_ATTR_SIMPLE;
 	memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
 
 	task->scatter = scsi_sglist(cmd);
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index 17231385cb37..d6e7994aa634 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth)
 static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
 {
         struct request *req = cmd->request;
-	struct scsi_device *sdev = cmd->device;
 
         if (blk_rq_tagged(req)) {
-		if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
-        	        *msg++ = MSG_ORDERED_TAG;
-        	else
-        	        *msg++ = MSG_SIMPLE_TAG;
+		*msg++ = MSG_SIMPLE_TAG;
         	*msg++ = req->tag;
         	return 2;
 	}
-- 
cgit v1.2.3


From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: misc cleanups in barrier code

Make the following cleanups in preparation of barrier/flush update.

* blk_do_ordered() declaration is moved from include/linux/blkdev.h to
  block/blk.h.

* blk_do_ordered() now returns pointer to struct request, with %NULL
  meaning "try the next request" and ERR_PTR(-EAGAIN) "try again
  later".  The third case will be dropped with further changes.

* In the initialization of proxy barrier request, data direction is
  already set by init_request_from_bio().  Drop unnecessary explicit
  REQ_WRITE setting and move init_request_from_bio() above REQ_FUA
  flag setting.

* add_request() is collapsed into __make_request().

These changes don't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c    | 32 ++++++++++++++------------------
 block/blk-core.c       | 21 ++++-----------------
 block/blk.h            |  7 +++++--
 include/linux/blkdev.h |  1 -
 4 files changed, 23 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index ed0aba5463ab..f1be85ba2bb5 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline bool start_ordered(struct request_queue *q, struct request **rqp)
+static inline struct request *start_ordered(struct request_queue *q,
+					    struct request *rq)
 {
-	struct request *rq = *rqp;
 	unsigned skip = 0;
 
 	q->orderr = 0;
@@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
-		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-			rq->cmd_flags |= REQ_WRITE;
+		init_request_from_bio(rq, q->orig_bar_rq->bio);
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
-		init_request_from_bio(rq, q->orig_bar_rq->bio);
 		rq->end_io = bar_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
@@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
 
-	*rqp = rq;
-
 	/*
 	 * Complete skipped sequences.  If whole sequence is complete,
-	 * return false to tell elevator that this request is gone.
+	 * return %NULL to tell elevator that this request is gone.
 	 */
-	return !blk_ordered_complete_seq(q, skip, 0);
+	if (blk_ordered_complete_seq(q, skip, 0))
+		rq = NULL;
+	return rq;
 }
 
-bool blk_do_ordered(struct request_queue *q, struct request **rqp)
+struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 {
-	struct request *rq = *rqp;
 	const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
 				(rq->cmd_flags & REQ_HARDBARRIER);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
-			return true;
+			return rq;
 
 		if (q->next_ordered != QUEUE_ORDERED_NONE)
-			return start_ordered(q, rqp);
+			return start_ordered(q, rq);
 		else {
 			/*
 			 * Queue ordering not supported.  Terminate
@@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 */
 			blk_dequeue_request(rq);
 			__blk_end_request_all(rq, -EOPNOTSUPP);
-			*rqp = NULL;
-			return false;
+			return NULL;
 		}
 	}
 
@@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 	/* Special requests are not subject to ordering rules. */
 	if (rq->cmd_type != REQ_TYPE_FS &&
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return true;
+		return rq;
 
 	/* Ordered by draining.  Wait for turn. */
 	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
 	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-		*rqp = NULL;
+		rq = ERR_PTR(-EAGAIN);
 
-	return true;
+	return rq;
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index f06354183b29..f8d37a8e2c55 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 }
 EXPORT_SYMBOL(blk_insert_request);
 
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
-	drive_stat_acct(req, 1);
-
-	/*
-	 * elevator indicated where it wants this request to be
-	 * inserted at elevator_merge time
-	 */
-	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
@@ -1316,7 +1300,10 @@ get_rq:
 		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (queue_should_plug(q) && elv_queue_empty(q))
 		blk_plug_device(q);
-	add_request(q, req);
+
+	/* insert the request into the elevator */
+	drive_stat_acct(req, 1);
+	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 out:
 	if (unplug || !queue_should_plug(q))
 		__generic_unplug_device(q);
diff --git a/block/blk.h b/block/blk.h
index 6e7dc87141e4..874eb4ea8093 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
+struct request *blk_do_ordered(struct request_queue *q, struct request *rq);
+
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
@@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-			if (blk_do_ordered(q, &rq))
-				return rq;
+			rq = blk_do_ordered(q, rq);
+			if (rq)
+				return !IS_ERR(rq) ? rq : NULL;
 		}
 
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e97911d4dec3..996549d71923 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
 extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
-- 
cgit v1.2.3


From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: drop barrier ordering by queue draining

Filesystems will take all the responsibilities for ordering requests
around commit writes and will only indicate how the commit writes
themselves should be handled by block layers.  This patch drops
barrier ordering by queue draining from block layer.  Ordering by
draining implementation was somewhat invasive to request handling.
List of notable changes follow.

* Each queue has 1 bit color which is flipped on each barrier issue.
  This is used to track whether a given request is issued before the
  current barrier or not.  REQ_ORDERED_COLOR flag and coloring
  implementation in __elv_add_request() are removed.

* Requests which shouldn't be processed yet for draining were stalled
  by returning -EAGAIN from blk_do_ordered() according to the test
  result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq().
  This logic is removed.

* Draining completion logic in elv_completed_request() removed.

* All barrier sequence requests were queued to request queue and then
  trckled to lower layer according to progress and thus maintaining
  request orders during requeue was necessary.  This is replaced by
  queueing the next request in the barrier sequence only after the
  current one is complete from blk_ordered_complete_seq(), which
  removes the need for multiple proxy requests in struct request_queue
  and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of
  elv_insert().

* As barriers no longer have ordering constraints, there's no need to
  dump the whole elevator onto the dispatch queue on each barrier.
  Insert barriers at the front instead.

* If other barrier requests come to the front of the dispatch queue
  while one is already in progress, they are stored in
  q->pending_barriers and restored to dispatch queue one-by-one after
  each barrier completion from blk_ordered_complete_seq().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c       | 220 ++++++++++++++++++----------------------------
 block/blk-core.c          |  11 ++-
 block/blk.h               |   2 +-
 block/elevator.c          |  79 ++---------------
 include/linux/blk_types.h |   2 -
 include/linux/blkdev.h    |  19 ++--
 6 files changed, 113 insertions(+), 220 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f1be85ba2bb5..e8b2e5c091b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,6 +9,8 @@
 
 #include "blk.h"
 
+static struct request *queue_next_ordseq(struct request_queue *q);
+
 /*
  * Cache flushing for ordered writes handling
  */
@@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q)
 	return 1 << ffz(q->ordseq);
 }
 
-unsigned blk_ordered_req_seq(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-
-	BUG_ON(q->ordseq == 0);
-
-	if (rq == &q->pre_flush_rq)
-		return QUEUE_ORDSEQ_PREFLUSH;
-	if (rq == &q->bar_rq)
-		return QUEUE_ORDSEQ_BAR;
-	if (rq == &q->post_flush_rq)
-		return QUEUE_ORDSEQ_POSTFLUSH;
-
-	/*
-	 * !fs requests don't need to follow barrier ordering.  Always
-	 * put them at the front.  This fixes the following deadlock.
-	 *
-	 * http://thread.gmane.org/gmane.linux.kernel/537473
-	 */
-	if (rq->cmd_type != REQ_TYPE_FS)
-		return QUEUE_ORDSEQ_DRAIN;
-
-	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
-	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
-		return QUEUE_ORDSEQ_DRAIN;
-	else
-		return QUEUE_ORDSEQ_DONE;
-}
-
-bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+static struct request *blk_ordered_complete_seq(struct request_queue *q,
+						unsigned seq, int error)
 {
-	struct request *rq;
+	struct request *next_rq = NULL;
 
 	if (error && !q->orderr)
 		q->orderr = error;
@@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	BUG_ON(q->ordseq & seq);
 	q->ordseq |= seq;
 
-	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-		return false;
-
-	/*
-	 * Okay, sequence complete.
-	 */
-	q->ordseq = 0;
-	rq = q->orig_bar_rq;
-	__blk_end_request_all(rq, q->orderr);
-	return true;
+	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
+		/* not complete yet, queue the next ordered sequence */
+		next_rq = queue_next_ordseq(q);
+	} else {
+		/* complete this barrier request */
+		__blk_end_request_all(q->orig_bar_rq, q->orderr);
+		q->orig_bar_rq = NULL;
+		q->ordseq = 0;
+
+		/* dispatch the next barrier if there's one */
+		if (!list_empty(&q->pending_barriers)) {
+			next_rq = list_entry_rq(q->pending_barriers.next);
+			list_move(&next_rq->queuelist, &q->queue_head);
+		}
+	}
+	return next_rq;
 }
 
 static void pre_flush_end_io(struct request *rq, int error)
@@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error)
 	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
 }
 
-static void queue_flush(struct request_queue *q, unsigned which)
+static void queue_flush(struct request_queue *q, struct request *rq,
+			rq_end_io_fn *end_io)
 {
-	struct request *rq;
-	rq_end_io_fn *end_io;
-
-	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
-		rq = &q->pre_flush_rq;
-		end_io = pre_flush_end_io;
-	} else {
-		rq = &q->post_flush_rq;
-		end_io = post_flush_end_io;
-	}
-
 	blk_rq_init(q, rq);
 	rq->cmd_type = REQ_TYPE_FS;
-	rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
+	rq->cmd_flags = REQ_FLUSH;
 	rq->rq_disk = q->orig_bar_rq->rq_disk;
 	rq->end_io = end_io;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline struct request *start_ordered(struct request_queue *q,
-					    struct request *rq)
+static struct request *queue_next_ordseq(struct request_queue *q)
 {
-	unsigned skip = 0;
-
-	q->orderr = 0;
-	q->ordered = q->next_ordered;
-	q->ordseq |= QUEUE_ORDSEQ_STARTED;
-
-	/*
-	 * For an empty barrier, there's no actual BAR request, which
-	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
-	 */
-	if (!blk_rq_sectors(rq))
-		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
-				QUEUE_ORDERED_DO_POSTFLUSH);
-
-	/* stash away the original request */
-	blk_dequeue_request(rq);
-	q->orig_bar_rq = rq;
-	rq = NULL;
-
-	/*
-	 * Queue ordered sequence.  As we stack them at the head, we
-	 * need to queue in reverse order.  Note that we rely on that
-	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence.
-	 */
-	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
-		rq = &q->post_flush_rq;
-	} else
-		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+	struct request *rq = &q->bar_rq;
 
-	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
-		rq = &q->bar_rq;
+	switch (blk_ordered_cur_seq(q)) {
+	case QUEUE_ORDSEQ_PREFLUSH:
+		queue_flush(q, rq, pre_flush_end_io);
+		break;
 
+	case QUEUE_ORDSEQ_BAR:
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
 		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		rq->cmd_flags &= ~REQ_HARDBARRIER;
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
 		rq->end_io = bar_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
-	} else
-		skip |= QUEUE_ORDSEQ_BAR;
+		break;
 
-	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
-		rq = &q->pre_flush_rq;
-	} else
-		skip |= QUEUE_ORDSEQ_PREFLUSH;
+	case QUEUE_ORDSEQ_POSTFLUSH:
+		queue_flush(q, rq, post_flush_end_io);
+		break;
 
-	if (queue_in_flight(q))
-		rq = NULL;
-	else
-		skip |= QUEUE_ORDSEQ_DRAIN;
-
-	/*
-	 * Complete skipped sequences.  If whole sequence is complete,
-	 * return %NULL to tell elevator that this request is gone.
-	 */
-	if (blk_ordered_complete_seq(q, skip, 0))
-		rq = NULL;
+	default:
+		BUG();
+	}
 	return rq;
 }
 
 struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 {
-	const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
-				(rq->cmd_flags & REQ_HARDBARRIER);
-
-	if (!q->ordseq) {
-		if (!is_barrier)
-			return rq;
-
-		if (q->next_ordered != QUEUE_ORDERED_NONE)
-			return start_ordered(q, rq);
-		else {
-			/*
-			 * Queue ordering not supported.  Terminate
-			 * with prejudice.
-			 */
-			blk_dequeue_request(rq);
-			__blk_end_request_all(rq, -EOPNOTSUPP);
-			return NULL;
-		}
+	unsigned skip = 0;
+
+	if (!(rq->cmd_flags & REQ_HARDBARRIER))
+		return rq;
+
+	if (q->ordseq) {
+		/*
+		 * Barrier is already in progress and they can't be
+		 * processed in parallel.  Queue for later processing.
+		 */
+		list_move_tail(&rq->queuelist, &q->pending_barriers);
+		return NULL;
+	}
+
+	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
+		/*
+		 * Queue ordering not supported.  Terminate
+		 * with prejudice.
+		 */
+		blk_dequeue_request(rq);
+		__blk_end_request_all(rq, -EOPNOTSUPP);
+		return NULL;
 	}
 
 	/*
-	 * Ordered sequence in progress
+	 * Start a new ordered sequence
 	 */
+	q->orderr = 0;
+	q->ordered = q->next_ordered;
+	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
-	/* Special requests are not subject to ordering rules. */
-	if (rq->cmd_type != REQ_TYPE_FS &&
-	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return rq;
+	/*
+	 * For an empty barrier, there's no actual BAR request, which
+	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
+	 */
+	if (!blk_rq_sectors(rq))
+		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+				QUEUE_ORDERED_DO_POSTFLUSH);
 
-	/* Ordered by draining.  Wait for turn. */
-	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
-	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-		rq = ERR_PTR(-EAGAIN);
+	/* stash away the original request */
+	blk_dequeue_request(rq);
+	q->orig_bar_rq = rq;
 
-	return rq;
+	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+		skip |= QUEUE_ORDSEQ_PREFLUSH;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+		skip |= QUEUE_ORDSEQ_BAR;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+
+	/* complete skipped sequences and return the first sequence */
+	return blk_ordered_complete_seq(q, skip, 0);
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index f8d37a8e2c55..d316662682c8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
+	INIT_LIST_HEAD(&q->pending_barriers);
 	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const bool sync = (bio->bi_rw & REQ_SYNC);
 	const bool unplug = (bio->bi_rw & REQ_UNPLUG);
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+	int where = ELEVATOR_INSERT_SORT;
 	int rw_flags;
 
 	/* REQ_HARDBARRIER is no more */
@@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
+	if (bio->bi_rw & REQ_HARDBARRIER) {
+		where = ELEVATOR_INSERT_FRONT;
+		goto get_rq;
+	}
+
+	if (elv_queue_empty(q))
 		goto get_rq;
 
 	el_ret = elv_merge(q, &req, bio);
@@ -1303,7 +1310,7 @@ get_rq:
 
 	/* insert the request into the elevator */
 	drive_stat_acct(req, 1);
-	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
+	__elv_add_request(q, req, where, 0);
 out:
 	if (unplug || !queue_should_plug(q))
 		__generic_unplug_device(q);
diff --git a/block/blk.h b/block/blk.h
index 874eb4ea8093..08081e4b294e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			rq = blk_do_ordered(q, rq);
 			if (rq)
-				return !IS_ERR(rq) ? rq : NULL;
+				return rq;
 		}
 
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
diff --git a/block/elevator.c b/block/elevator.c
index ec585c9554d3..241c69c45c5f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
 
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
-	struct list_head *pos;
-	unsigned ordseq;
 	int unplug_it = 1;
 
 	trace_block_rq_insert(q, rq);
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 	rq->q = q;
 
 	switch (where) {
+	case ELEVATOR_INSERT_REQUEUE:
+		/*
+		 * Most requeues happen because of a busy condition,
+		 * don't force unplug of the queue for that case.
+		 * Clear unplug_it and fall through.
+		 */
+		unplug_it = 0;
+
 	case ELEVATOR_INSERT_FRONT:
 		rq->cmd_flags |= REQ_SOFTBARRIER;
-
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		q->elevator->ops->elevator_add_req_fn(q, rq);
 		break;
 
-	case ELEVATOR_INSERT_REQUEUE:
-		/*
-		 * If ordered flush isn't in progress, we do front
-		 * insertion; otherwise, requests should be requeued
-		 * in ordseq order.
-		 */
-		rq->cmd_flags |= REQ_SOFTBARRIER;
-
-		/*
-		 * Most requeues happen because of a busy condition,
-		 * don't force unplug of the queue for that case.
-		 */
-		unplug_it = 0;
-
-		if (q->ordseq == 0) {
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		}
-
-		ordseq = blk_ordered_req_seq(rq);
-
-		list_for_each(pos, &q->queue_head) {
-			struct request *pos_rq = list_entry_rq(pos);
-			if (ordseq <= blk_ordered_req_seq(pos_rq))
-				break;
-		}
-
-		list_add_tail(&rq->queuelist, pos);
-		break;
-
 	default:
 		printk(KERN_ERR "%s: bad insertion point %d\n",
 		       __func__, where);
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 void __elv_add_request(struct request_queue *q, struct request *rq, int where,
 		       int plug)
 {
-	if (q->ordcolor)
-		rq->cmd_flags |= REQ_ORDERED_COLOR;
-
 	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
-		/*
-		 * toggle ordered color
-		 */
-		if (rq->cmd_flags & REQ_HARDBARRIER)
-			q->ordcolor ^= 1;
-
-		/*
-		 * barriers implicitly indicate back insertion
-		 */
-		if (where == ELEVATOR_INSERT_SORT)
-			where = ELEVATOR_INSERT_BACK;
-
-		/*
-		 * this request is scheduling boundary, update
-		 * end_sector
-		 */
+		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
 		    (rq->cmd_flags & REQ_DISCARD)) {
 			q->end_sector = rq_end_sector(rq);
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		    e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
-
-	/*
-	 * Check if the queue is waiting for fs requests to be
-	 * drained for flush sequence.
-	 */
-	if (unlikely(q->ordseq)) {
-		struct request *next = NULL;
-
-		if (!list_empty(&q->queue_head))
-			next = list_entry_rq(q->queue_head.next);
-
-		if (!queue_in_flight(q) &&
-		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
-		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
-			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-			__blk_run_queue(q);
-		}
-	}
 }
 
 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ca83a97c9715..9192282b4259 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -143,7 +143,6 @@ enum rq_flag_bits {
 	__REQ_FAILED,		/* set if the request failed */
 	__REQ_QUIET,		/* don't worry about errors */
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
-	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_COPY_USER,	/* contains copies of user pages */
 	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
@@ -184,7 +183,6 @@ enum rq_flag_bits {
 #define REQ_FAILED		(1 << __REQ_FAILED)
 #define REQ_QUIET		(1 << __REQ_QUIET)
 #define REQ_PREEMPT		(1 << __REQ_PREEMPT)
-#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_ALLOCED		(1 << __REQ_ALLOCED)
 #define REQ_COPY_USER		(1 << __REQ_COPY_USER)
 #define REQ_INTEGRITY		(1 << __REQ_INTEGRITY)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 996549d71923..20a3710a481b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -360,9 +360,10 @@ struct request_queue
 	unsigned int		flush_flags;
 
 	unsigned int		ordered, next_ordered, ordseq;
-	int			orderr, ordcolor;
-	struct request		pre_flush_rq, bar_rq, post_flush_rq;
+	int			orderr;
+	struct request		bar_rq;
 	struct request		*orig_bar_rq;
+	struct list_head	pending_barriers;
 
 	struct mutex		sysfs_lock;
 
@@ -491,12 +492,11 @@ enum {
 	/*
 	 * Ordered operation sequence
 	 */
-	QUEUE_ORDSEQ_STARTED	= 0x01,	/* flushing in progress */
-	QUEUE_ORDSEQ_DRAIN	= 0x02,	/* waiting for the queue to be drained */
-	QUEUE_ORDSEQ_PREFLUSH	= 0x04,	/* pre-flushing in progress */
-	QUEUE_ORDSEQ_BAR	= 0x08,	/* original barrier req in progress */
-	QUEUE_ORDSEQ_POSTFLUSH	= 0x10,	/* post-flushing in progress */
-	QUEUE_ORDSEQ_DONE	= 0x20,
+	QUEUE_ORDSEQ_STARTED	= (1 << 0), /* flushing in progress */
+	QUEUE_ORDSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
+	QUEUE_ORDSEQ_BAR	= (1 << 2), /* barrier write in progress */
+	QUEUE_ORDSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
+	QUEUE_ORDSEQ_DONE	= (1 << 4),
 };
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
@@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern unsigned blk_ordered_cur_seq(struct request_queue *);
-extern unsigned blk_ordered_req_seq(struct request *);
-extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-- 
cgit v1.2.3


From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: rename barrier/ordered to flush

With ordering requirements dropped, barrier and ordered are misnomers.
Now all block layer does is sequencing FLUSH and FUA.  Rename them to
flush.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c       | 21 ++++++-----
 block/blk-flush.c      | 98 +++++++++++++++++++++++++-------------------------
 block/blk.h            |  4 +--
 include/linux/blkdev.h | 24 ++++++-------
 4 files changed, 72 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index d316662682c8..8870ae40179d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 {
 	struct request_queue *q = rq->q;
 
-	if (&q->bar_rq != rq) {
+	if (&q->flush_rq != rq) {
 		if (error)
 			clear_bit(BIO_UPTODATE, &bio->bi_flags);
 		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 		if (bio->bi_size == 0)
 			bio_endio(bio, error);
 	} else {
-
 		/*
-		 * Okay, this is the barrier request in progress, just
-		 * record the error;
+		 * Okay, this is the sequenced flush request in
+		 * progress, just record the error;
 		 */
-		if (error && !q->orderr)
-			q->orderr = error;
+		if (error && !q->flush_err)
+			q->flush_err = error;
 	}
 }
 
@@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
-	INIT_LIST_HEAD(&q->pending_barriers);
+	INIT_LIST_HEAD(&q->pending_flushes);
 	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 static void blk_account_io_done(struct request *req)
 {
 	/*
-	 * Account IO completion.  bar_rq isn't accounted as a normal
-	 * IO on queueing nor completion.  Accounting the containing
-	 * request is enough.
+	 * Account IO completion.  flush_rq isn't accounted as a
+	 * normal IO on queueing nor completion.  Accounting the
+	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
+	if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e8b2e5c091b1..dd873225da97 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -9,41 +9,38 @@
 
 #include "blk.h"
 
-static struct request *queue_next_ordseq(struct request_queue *q);
+static struct request *queue_next_fseq(struct request_queue *q);
 
-/*
- * Cache flushing for ordered writes handling
- */
-unsigned blk_ordered_cur_seq(struct request_queue *q)
+unsigned blk_flush_cur_seq(struct request_queue *q)
 {
-	if (!q->ordseq)
+	if (!q->flush_seq)
 		return 0;
-	return 1 << ffz(q->ordseq);
+	return 1 << ffz(q->flush_seq);
 }
 
-static struct request *blk_ordered_complete_seq(struct request_queue *q,
-						unsigned seq, int error)
+static struct request *blk_flush_complete_seq(struct request_queue *q,
+					      unsigned seq, int error)
 {
 	struct request *next_rq = NULL;
 
-	if (error && !q->orderr)
-		q->orderr = error;
+	if (error && !q->flush_err)
+		q->flush_err = error;
 
-	BUG_ON(q->ordseq & seq);
-	q->ordseq |= seq;
+	BUG_ON(q->flush_seq & seq);
+	q->flush_seq |= seq;
 
-	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
-		/* not complete yet, queue the next ordered sequence */
-		next_rq = queue_next_ordseq(q);
+	if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
+		/* not complete yet, queue the next flush sequence */
+		next_rq = queue_next_fseq(q);
 	} else {
-		/* complete this barrier request */
-		__blk_end_request_all(q->orig_bar_rq, q->orderr);
-		q->orig_bar_rq = NULL;
-		q->ordseq = 0;
-
-		/* dispatch the next barrier if there's one */
-		if (!list_empty(&q->pending_barriers)) {
-			next_rq = list_entry_rq(q->pending_barriers.next);
+		/* complete this flush request */
+		__blk_end_request_all(q->orig_flush_rq, q->flush_err);
+		q->orig_flush_rq = NULL;
+		q->flush_seq = 0;
+
+		/* dispatch the next flush if there's one */
+		if (!list_empty(&q->pending_flushes)) {
+			next_rq = list_entry_rq(q->pending_flushes.next);
 			list_move(&next_rq->queuelist, &q->queue_head);
 		}
 	}
@@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q,
 static void pre_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error);
 }
 
-static void bar_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error);
 }
 
 static void post_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
 }
 
 static void queue_flush(struct request_queue *q, struct request *rq,
@@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq,
 	blk_rq_init(q, rq);
 	rq->cmd_type = REQ_TYPE_FS;
 	rq->cmd_flags = REQ_FLUSH;
-	rq->rq_disk = q->orig_bar_rq->rq_disk;
+	rq->rq_disk = q->orig_flush_rq->rq_disk;
 	rq->end_io = end_io;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static struct request *queue_next_ordseq(struct request_queue *q)
+static struct request *queue_next_fseq(struct request_queue *q)
 {
-	struct request *rq = &q->bar_rq;
+	struct request *rq = &q->flush_rq;
 
-	switch (blk_ordered_cur_seq(q)) {
-	case QUEUE_ORDSEQ_PREFLUSH:
+	switch (blk_flush_cur_seq(q)) {
+	case QUEUE_FSEQ_PREFLUSH:
 		queue_flush(q, rq, pre_flush_end_io);
 		break;
 
-	case QUEUE_ORDSEQ_BAR:
+	case QUEUE_FSEQ_DATA:
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
-		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		init_request_from_bio(rq, q->orig_flush_rq->bio);
 		rq->cmd_flags &= ~REQ_HARDBARRIER;
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
-		rq->end_io = bar_end_io;
+		rq->end_io = flush_data_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 		break;
 
-	case QUEUE_ORDSEQ_POSTFLUSH:
+	case QUEUE_FSEQ_POSTFLUSH:
 		queue_flush(q, rq, post_flush_end_io);
 		break;
 
@@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q)
 	return rq;
 }
 
-struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
+struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 {
 	unsigned skip = 0;
 
 	if (!(rq->cmd_flags & REQ_HARDBARRIER))
 		return rq;
 
-	if (q->ordseq) {
+	if (q->flush_seq) {
 		/*
-		 * Barrier is already in progress and they can't be
-		 * processed in parallel.  Queue for later processing.
+		 * Sequenced flush is already in progress and they
+		 * can't be processed in parallel.  Queue for later
+		 * processing.
 		 */
-		list_move_tail(&rq->queuelist, &q->pending_barriers);
+		list_move_tail(&rq->queuelist, &q->pending_flushes);
 		return NULL;
 	}
 
@@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 	}
 
 	/*
-	 * Start a new ordered sequence
+	 * Start a new flush sequence
 	 */
-	q->orderr = 0;
+	q->flush_err = 0;
 	q->ordered = q->next_ordered;
-	q->ordseq |= QUEUE_ORDSEQ_STARTED;
+	q->flush_seq |= QUEUE_FSEQ_STARTED;
 
 	/*
 	 * For an empty barrier, there's no actual BAR request, which
@@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 
 	/* stash away the original request */
 	blk_dequeue_request(rq);
-	q->orig_bar_rq = rq;
+	q->orig_flush_rq = rq;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
-		skip |= QUEUE_ORDSEQ_PREFLUSH;
+		skip |= QUEUE_FSEQ_PREFLUSH;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
-		skip |= QUEUE_ORDSEQ_BAR;
+		skip |= QUEUE_FSEQ_DATA;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
-		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+		skip |= QUEUE_FSEQ_POSTFLUSH;
 
 	/* complete skipped sequences and return the first sequence */
-	return blk_ordered_complete_seq(q, skip, 0);
+	return blk_flush_complete_seq(q, skip, 0);
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk.h b/block/blk.h
index 08081e4b294e..24b92bd78f37 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-struct request *blk_do_ordered(struct request_queue *q, struct request *rq);
+struct request *blk_do_flush(struct request_queue *q, struct request *rq);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
@@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-			rq = blk_do_ordered(q, rq);
+			rq = blk_do_flush(q, rq);
 			if (rq)
 				return rq;
 		}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 20a3710a481b..1cd83ec077db 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -357,13 +357,13 @@ struct request_queue
 	/*
 	 * for flush operations
 	 */
+	unsigned int		ordered, next_ordered;
 	unsigned int		flush_flags;
-
-	unsigned int		ordered, next_ordered, ordseq;
-	int			orderr;
-	struct request		bar_rq;
-	struct request		*orig_bar_rq;
-	struct list_head	pending_barriers;
+	unsigned int		flush_seq;
+	int			flush_err;
+	struct request		flush_rq;
+	struct request		*orig_flush_rq;
+	struct list_head	pending_flushes;
 
 	struct mutex		sysfs_lock;
 
@@ -490,13 +490,13 @@ enum {
 					  QUEUE_ORDERED_DO_FUA,
 
 	/*
-	 * Ordered operation sequence
+	 * FLUSH/FUA sequences.
 	 */
-	QUEUE_ORDSEQ_STARTED	= (1 << 0), /* flushing in progress */
-	QUEUE_ORDSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
-	QUEUE_ORDSEQ_BAR	= (1 << 2), /* barrier write in progress */
-	QUEUE_ORDSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
-	QUEUE_ORDSEQ_DONE	= (1 << 4),
+	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
+	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
+	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
+	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
+	QUEUE_FSEQ_DONE		= (1 << 4),
 };
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
-- 
cgit v1.2.3


From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests

Now that the backend conversion is complete, export sequenced
FLUSH/FUA capability through REQ_FLUSH/FUA flags.  REQ_FLUSH means the
device cache should be flushed before executing the request.  REQ_FUA
means that the data in the request should be on non-volatile media on
completion.

Block layer will choose the correct way of implementing the semantics
and execute it.  The request may be passed to the device directly if
the device can handle it; otherwise, it will be sequenced using one or
more proxy requests.  Devices will never see REQ_FLUSH and/or FUA
which it doesn't support.

Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are
never failed with -EOPNOTSUPP.  If the underlying device doesn't
support FLUSH/FUA, the block layer simply make those noop.  IOW, it no
longer distinguishes between writeback cache which doesn't support
cache flush and writethrough/no cache.  Devices which have WB cache
w/o flush are very difficult to come by these days and there's nothing
much we can do anyway, so it doesn't make sense to require everyone to
implement -EOPNOTSUPP handling.  This will simplify filesystems and
block drivers as they can drop -EOPNOTSUPP retry logic for barriers.

* QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into
  blk-flush.c.

* REQ_FLUSH w/o data can also be directly passed to drivers without
  sequencing but some drivers assume that zero length requests don't
  have rq->bio which isn't true for these requests requiring the use
  of proxy requests.

* REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are
  copied from bio to request.

* WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and
  WRITE_FLUSH_FUA are added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c            |  2 +-
 block/blk-flush.c           | 85 ++++++++++++++++++++++++---------------------
 block/blk.h                 |  3 ++
 include/linux/blk_types.h   |  2 +-
 include/linux/blkdev.h      | 38 ++------------------
 include/linux/buffer_head.h |  2 +-
 include/linux/fs.h          | 19 ++++++----
 7 files changed, 67 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8870ae40179d..18455c4f618a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	spin_lock_irq(q->queue_lock);
 
-	if (bio->bi_rw & REQ_HARDBARRIER) {
+	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
 		where = ELEVATOR_INSERT_FRONT;
 		goto get_rq;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index dd873225da97..452c552e9ead 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,5 +1,5 @@
 /*
- * Functions related to barrier IO handling
+ * Functions to sequence FLUSH and FUA writes.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -9,6 +9,15 @@
 
 #include "blk.h"
 
+/* FLUSH/FUA sequences */
+enum {
+	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
+	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
+	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
+	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
+	QUEUE_FSEQ_DONE		= (1 << 4),
+};
+
 static struct request *queue_next_fseq(struct request_queue *q);
 
 unsigned blk_flush_cur_seq(struct request_queue *q)
@@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq,
 
 static struct request *queue_next_fseq(struct request_queue *q)
 {
+	struct request *orig_rq = q->orig_flush_rq;
 	struct request *rq = &q->flush_rq;
 
 	switch (blk_flush_cur_seq(q)) {
@@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q)
 		break;
 
 	case QUEUE_FSEQ_DATA:
-		/* initialize proxy request and queue it */
+		/* initialize proxy request, inherit FLUSH/FUA and queue it */
 		blk_rq_init(q, rq);
-		init_request_from_bio(rq, q->orig_flush_rq->bio);
-		rq->cmd_flags &= ~REQ_HARDBARRIER;
-		if (q->ordered & QUEUE_ORDERED_DO_FUA)
-			rq->cmd_flags |= REQ_FUA;
+		init_request_from_bio(rq, orig_rq->bio);
+		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
+		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 		rq->end_io = flush_data_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
@@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q)
 
 struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 {
+	unsigned int fflags = q->flush_flags; /* may change, cache it */
+	bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
+	bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
+	bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
 	unsigned skip = 0;
 
-	if (!(rq->cmd_flags & REQ_HARDBARRIER))
+	/*
+	 * Special case.  If there's data but flush is not necessary,
+	 * the request can be issued directly.
+	 *
+	 * Flush w/o data should be able to be issued directly too but
+	 * currently some drivers assume that rq->bio contains
+	 * non-zero data if it isn't NULL and empty FLUSH requests
+	 * getting here usually have bio's without data.
+	 */
+	if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
+		rq->cmd_flags &= ~REQ_FLUSH;
+		if (!has_fua)
+			rq->cmd_flags &= ~REQ_FUA;
 		return rq;
+	}
 
+	/*
+	 * Sequenced flushes can't be processed in parallel.  If
+	 * another one is already in progress, queue for later
+	 * processing.
+	 */
 	if (q->flush_seq) {
-		/*
-		 * Sequenced flush is already in progress and they
-		 * can't be processed in parallel.  Queue for later
-		 * processing.
-		 */
 		list_move_tail(&rq->queuelist, &q->pending_flushes);
 		return NULL;
 	}
 
-	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
-		/*
-		 * Queue ordering not supported.  Terminate
-		 * with prejudice.
-		 */
-		blk_dequeue_request(rq);
-		__blk_end_request_all(rq, -EOPNOTSUPP);
-		return NULL;
-	}
-
 	/*
 	 * Start a new flush sequence
 	 */
 	q->flush_err = 0;
-	q->ordered = q->next_ordered;
 	q->flush_seq |= QUEUE_FSEQ_STARTED;
 
-	/*
-	 * For an empty barrier, there's no actual BAR request, which
-	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
-	 */
-	if (!blk_rq_sectors(rq))
-		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
-				QUEUE_ORDERED_DO_POSTFLUSH);
-
-	/* stash away the original request */
+	/* adjust FLUSH/FUA of the original request and stash it away */
+	rq->cmd_flags &= ~REQ_FLUSH;
+	if (!has_fua)
+		rq->cmd_flags &= ~REQ_FUA;
 	blk_dequeue_request(rq);
 	q->orig_flush_rq = rq;
 
-	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+	/* skip unneded sequences and return the first one */
+	if (!do_preflush)
 		skip |= QUEUE_FSEQ_PREFLUSH;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+	if (!blk_rq_sectors(rq))
 		skip |= QUEUE_FSEQ_DATA;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+	if (!do_postflush)
 		skip |= QUEUE_FSEQ_POSTFLUSH;
-
-	/* complete skipped sequences and return the first sequence */
 	return blk_flush_complete_seq(q, skip, 0);
 }
 
diff --git a/block/blk.h b/block/blk.h
index 24b92bd78f37..a09c18b19116 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
+			if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
+			    rq == &q->flush_rq)
+				return rq;
 			rq = blk_do_flush(q, rq);
 			if (rq)
 				return rq;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9192282b4259..179799479e6f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -167,7 +167,7 @@ enum rq_flag_bits {
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
-	 REQ_META| REQ_DISCARD | REQ_NOIDLE)
+	 REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 
 #define REQ_UNPLUG		(1 << __REQ_UNPLUG)
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1cd83ec077db..8ef705f800ab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -357,7 +357,6 @@ struct request_queue
 	/*
 	 * for flush operations
 	 */
-	unsigned int		ordered, next_ordered;
 	unsigned int		flush_flags;
 	unsigned int		flush_seq;
 	int			flush_err;
@@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 	__clear_bit(flag, &q->queue_flags);
 }
 
-enum {
-	/*
-	 * Hardbarrier is supported with one of the following methods.
-	 *
-	 * NONE		: hardbarrier unsupported
-	 * DRAIN	: ordering by draining is enough
-	 * DRAIN_FLUSH	: ordering by draining w/ pre and post flushes
-	 * DRAIN_FUA	: ordering by draining w/ pre flush and FUA write
-	 */
-	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
-	QUEUE_ORDERED_DO_BAR		= 0x20,
-	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
-	QUEUE_ORDERED_DO_FUA		= 0x80,
-
-	QUEUE_ORDERED_NONE		= 0x00,
-
-	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_DO_BAR,
-	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
-					  QUEUE_ORDERED_DO_PREFLUSH |
-					  QUEUE_ORDERED_DO_POSTFLUSH,
-	QUEUE_ORDERED_DRAIN_FUA		= QUEUE_ORDERED_DRAIN |
-					  QUEUE_ORDERED_DO_PREFLUSH |
-					  QUEUE_ORDERED_DO_FUA,
-
-	/*
-	 * FLUSH/FUA sequences.
-	 */
-	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
-	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
-	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
-	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
-	QUEUE_FSEQ_DONE		= (1 << 4),
-};
-
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
@@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
-	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
+	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \
+	 REQ_FLUSH | REQ_FUA)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (((rq)->cmd_flags & REQ_DISCARD) || \
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ec94c12f21da..fc999f583fda 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,7 @@ enum bh_state_bits {
 	BH_Delay,	/* Buffer is not yet allocated on disk */
 	BH_Boundary,	/* Block is followed by a discontiguity */
 	BH_Write_EIO,	/* I/O error on write */
-	BH_Eopnotsupp,	/* operation not supported (barrier) */
+	BH_Eopnotsupp,	/* DEPRECATED: operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..352c48627381 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,12 +135,13 @@ struct inodes_stat_t {
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
  * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
- * WRITE_BARRIER	Like WRITE_SYNC, but tells the block layer that all
- *			previously submitted writes must be safely on storage
- *			before this one is started. Also guarantees that when
- *			this write is complete, it itself is also safely on
- *			storage. Prevents reordering of writes on both sides
- *			of this IO.
+ * WRITE_BARRIER	DEPRECATED. Always fails. Use FLUSH/FUA instead.
+ * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
+ * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
+ *			non-volatile media on completion.
+ * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
+ *			by a cache flush and data is guaranteed to be on
+ *			non-volatile media on completion.
  *
  */
 #define RW_MASK			REQ_WRITE
@@ -158,6 +159,12 @@ struct inodes_stat_t {
 #define WRITE_META		(WRITE | REQ_META)
 #define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_HARDBARRIER)
+#define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+				 REQ_FLUSH)
+#define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+				 REQ_FUA)
+#define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+				 REQ_FLUSH | REQ_FUA)
 
 /*
  * These aren't really reads or writes, they pass down information about
-- 
cgit v1.2.3


From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:18 +0200
Subject: block: make __blk_rq_prep_clone() copy most command flags

Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD.
There's no reason to omit other command flags and REQ_FUA needs to be
copied to implement FUA support in request-based dm.

REQ_COMMON_MASK which specifies flags to be copied from bio to request
already identifies all the command flags.  Define REQ_CLONE_MASK to be
the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone()
copy all flags in the mask.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c          | 4 +---
 include/linux/blk_types.h | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 495bdc4a23da..2a5b19204546 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
-	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
-	if (src->cmd_flags & REQ_DISCARD)
-		dst->cmd_flags |= REQ_DISCARD;
+	dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 179799479e6f..36edadf5b41a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -168,6 +168,7 @@ enum rq_flag_bits {
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
 	 REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+#define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 #define REQ_UNPLUG		(1 << __REQ_UNPLUG)
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
-- 
cgit v1.2.3


From 2cf6d26a354ab6362e301b5a323832b02867df47 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 18 Aug 2010 05:29:10 -0400
Subject: block: pass gfp_mask and flags to sb_issue_discard

We'll need to get rid of the BLKDEV_IFL_BARRIER flag, and to facilitate
that and to make the interface less confusing pass all flags explicitly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/ext4/mballoc.c      |  3 ++-
 fs/fat/fatent.c        |  4 +++-
 include/linux/blkdev.h | 11 +++++------
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4b4ad4b7ce57..df44b345f662 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2566,7 +2566,8 @@ static inline void ext4_issue_discard(struct super_block *sb,
 	discard_block = block + ext4_group_first_block_no(sb, block_group);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	ret = sb_issue_discard(sb, discard_block, count);
+	ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS,
+			       BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 	if (ret == EOPNOTSUPP) {
 		ext4_warning(sb, "discard not supported, disabling");
 		clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 81184d3b75a3..3a56a82f5658 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -577,7 +577,9 @@ int fat_free_clusters(struct inode *inode, int cluster)
 
 				sb_issue_discard(sb,
 					fat_clus_to_blknr(sbi, first_cl),
-					nr_clus * sbi->sec_per_clus);
+					nr_clus * sbi->sec_per_clus,
+					GFP_NOFS,
+					BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 
 				first_cl = cluster;
 			}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8ef705f800ab..6b305eb4a343 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -881,13 +881,12 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-static inline int sb_issue_discard(struct super_block *sb,
-				   sector_t block, sector_t nr_blocks)
+static inline int sb_issue_discard(struct super_block *sb, sector_t block,
+		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
 {
-	block <<= (sb->s_blocksize_bits - 9);
-	nr_blocks <<= (sb->s_blocksize_bits - 9);
-	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
-				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+	return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
+				    nr_blocks << (sb->s_blocksize_bits - 9),
+				    gfp_mask, flags);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
-- 
cgit v1.2.3


From 31725e65c7214b52b607eba705fc4f306be4d5a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 18 Aug 2010 05:29:21 -0400
Subject: block: remove the WRITE_BARRIER flag

It's unused now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 352c48627381..d6add69bc170 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,7 +135,6 @@ struct inodes_stat_t {
  *			immediately after submission. The write equivalent
  *			of READ_SYNC.
  * WRITE_ODIRECT_PLUG	Special case write for O_DIRECT only.
- * WRITE_BARRIER	DEPRECATED. Always fails. Use FLUSH/FUA instead.
  * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
  * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
  *			non-volatile media on completion.
@@ -157,8 +156,6 @@ struct inodes_stat_t {
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
 #define WRITE_ODIRECT_PLUG	(WRITE | REQ_SYNC)
 #define WRITE_META		(WRITE | REQ_META)
-#define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-				 REQ_HARDBARRIER)
 #define WRITE_FLUSH		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_FLUSH)
 #define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
-- 
cgit v1.2.3


From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 18 Aug 2010 05:29:22 -0400
Subject: block: remove the BLKDEV_IFL_BARRIER flag

Remove support for barriers on discards, which is unused now.  Also
remove the DISCARD_NOBARRIER I/O type in favour of just setting the
rw flags up locally in blkdev_issue_discard.

tj: Also remove DISCARD_SECURE and use REQ_SECURE directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-lib.c        | 18 ++----------------
 include/linux/blkdev.h |  2 --
 include/linux/fs.h     |  8 --------
 3 files changed, 2 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index c392029a104e..fe2e6ed0f510 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
 	struct bio *bio;
 	int ret = 0;
@@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if (flags & BLKDEV_IFL_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
-		type |= DISCARD_SECURE;
+		type |= REQ_SECURE;
 	}
 
 	while (nr_sects && !ret) {
@@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	bb.wait = &wait;
 	bb.end_io = NULL;
 
-	if (flags & BLKDEV_IFL_BARRIER) {
-		/* issue async barrier before the data */
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
-		if (ret)
-			return ret;
-	}
 submit:
 	ret = 0;
 	while (nr_sects != 0) {
@@ -199,13 +192,6 @@ submit:
 		issued++;
 		submit_bio(WRITE, bio);
 	}
-	/*
-	 * When all data bios are in flight. Send final barrier if requeted.
-	 */
-	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
-					flags & BLKDEV_IFL_WAIT);
-
 
 	if (flags & BLKDEV_IFL_WAIT)
 		/* Wait for bios in-flight */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6b305eb4a343..cfcb3a610605 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 }
 enum{
 	BLKDEV_WAIT,	/* wait for completion */
-	BLKDEV_BARRIER,	/* issue request with barrier */
 	BLKDEV_SECURE,	/* secure discard */
 };
 #define BLKDEV_IFL_WAIT		(1 << BLKDEV_WAIT)
-#define BLKDEV_IFL_BARRIER	(1 << BLKDEV_BARRIER)
 #define BLKDEV_IFL_SECURE	(1 << BLKDEV_SECURE)
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d6add69bc170..6b0f6e9993a3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -163,14 +163,6 @@ struct inodes_stat_t {
 #define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_FLUSH | REQ_FUA)
 
-/*
- * These aren't really reads or writes, they pass down information about
- * parts of device that are now unused by the file system.
- */
-#define DISCARD_NOBARRIER	(WRITE | REQ_DISCARD)
-#define DISCARD_BARRIER		(WRITE | REQ_DISCARD | REQ_HARDBARRIER)
-#define DISCARD_SECURE		(DISCARD_NOBARRIER | REQ_SECURE)
-
 #define SEL_IN		1
 #define SEL_OUT		2
 #define SEL_EX		4
-- 
cgit v1.2.3


From 0edd55faea7c8081bc826234b917501738a6218f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 18 Aug 2010 05:29:23 -0400
Subject: block: remove the BH_Eopnotsupp flag

This flag was only set for barrier buffers, which we don't submit
anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/buffer.c                 | 7 +------
 fs/fat/misc.c               | 5 +----
 include/linux/buffer_head.h | 2 --
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3e7dca279d1c..7f0b9b083f77 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
+		if (!quiet_error(bh)) {
 			buffer_io_error(bh);
 			printk(KERN_WARNING "lost page write due to "
 					"I/O error on %s\n",
@@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
 
 	if (err == -EOPNOTSUPP) {
 		set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		set_bit(BH_Eopnotsupp, &bh->b_state);
 	}
 
 	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
@@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)
 		bh->b_end_io = end_buffer_write_sync;
 		ret = submit_bh(rw, bh);
 		wait_on_buffer(bh);
-		if (buffer_eopnotsupp(bh)) {
-			clear_buffer_eopnotsupp(bh);
-			ret = -EOPNOTSUPP;
-		}
 		if (!ret && !buffer_uptodate(bh))
 			ret = -EIO;
 	} else {
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1736f2356388..970e682ea754 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 
 	for (i = 0; i < nr_bhs; i++) {
 		wait_on_buffer(bhs[i]);
-		if (buffer_eopnotsupp(bhs[i])) {
-			clear_buffer_eopnotsupp(bhs[i]);
-			err = -EOPNOTSUPP;
-		} else if (!err && !buffer_uptodate(bhs[i]))
+		if (!err && !buffer_uptodate(bhs[i]))
 			err = -EIO;
 	}
 	return err;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index fc999f583fda..dd1b25b2641c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,6 @@ enum bh_state_bits {
 	BH_Delay,	/* Buffer is not yet allocated on disk */
 	BH_Boundary,	/* Block is followed by a discontiguity */
 	BH_Write_EIO,	/* I/O error on write */
-	BH_Eopnotsupp,	/* DEPRECATED: operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
@@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write)
 BUFFER_FNS(Delay, delay)
 BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
-BUFFER_FNS(Eopnotsupp, eopnotsupp)
 BUFFER_FNS(Unwritten, unwritten)
 
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
-- 
cgit v1.2.3


From 30ca22c70e3ef0a96ff84de69cd7e8561b416cb2 Mon Sep 17 00:00:00 2001
From: "Patrick J. LoPresti" <lopresti@gmail.com>
Date: Thu, 22 Jul 2010 15:03:41 -0700
Subject: ext3/ext4: Factor out disk addressability check

As part of adding support for OCFS2 to mount huge volumes, we need to
check that the sector_t and page cache of the system are capable of
addressing the entire volume.

An identical check already appears in ext3 and ext4.  This patch moves
the addressability check into its own function in fs/libfs.c and
modifies ext3 and ext4 to invoke it.

[Edited to -EINVAL instead of BUG_ON() for bad blocksize_bits -- Joel]

Signed-off-by: Patrick LoPresti <lopresti@gmail.com>
Cc: linux-ext4@vger.kernel.org
Acked-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ext3/super.c    |  4 ++--
 fs/ext4/super.c    |  8 +++-----
 fs/libfs.c         | 29 +++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 4 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5dbf4dba03c4..a367dd044280 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	if (le32_to_cpu(es->s_blocks_count) >
-		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+	if (generic_check_addressable(sb->s_blocksize_bits,
+				      le32_to_cpu(es->s_blocks_count))) {
 		ext3_msg(sb, KERN_ERR,
 			"error: filesystem is too large to mount safely");
 		if (sizeof(sector_t) < 8)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 26147746c272..7f47c366bf15 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 * Test whether we have more sectors than will fit in sector_t,
 	 * and whether the max offset is addressable by the page cache.
 	 */
-	if ((ext4_blocks_count(es) >
-	     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
-	    (ext4_blocks_count(es) >
-	     (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
+	ret = generic_check_addressable(sb->s_blocksize_bits,
+					ext4_blocks_count(es));
+	if (ret) {
 		ext4_msg(sb, KERN_ERR, "filesystem"
 			 " too large to mount safely on this system");
 		if (sizeof(sector_t) < 8)
 			ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
-		ret = -EFBIG;
 		goto failed_mount;
 	}
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 0a9da95317f7..8debe7b33769 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync)
 }
 EXPORT_SYMBOL(generic_file_fsync);
 
+/**
+ * generic_check_addressable - Check addressability of file system
+ * @blocksize_bits:	log of file system block size
+ * @num_blocks:		number of blocks in file system
+ *
+ * Determine whether a file system with @num_blocks blocks (and a
+ * block size of 2**@blocksize_bits) is addressable by the sector_t
+ * and page cache of the system.  Return 0 if so and -EFBIG otherwise.
+ */
+int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
+{
+	u64 last_fs_block = num_blocks - 1;
+
+	if (unlikely(num_blocks == 0))
+		return 0;
+
+	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
+		return -EINVAL;
+
+	if ((last_fs_block >
+	     (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
+	    (last_fs_block >
+	     (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - blocksize_bits))) {
+		return -EFBIG;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(generic_check_addressable);
+
 /*
  * No-op implementation of ->fsync for in-memory filesystems.
  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..1a759f40ab9e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
 
 extern int generic_file_fsync(struct file *, int);
 
+extern int generic_check_addressable(unsigned, u64);
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *);
-- 
cgit v1.2.3


From c8bf1336824ebd698d37b71763e1c43190f2229a Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 10 Sep 2010 20:07:38 +0200
Subject: Consolidate min_not_zero

We have several users of min_not_zero, each of them using their own
definition.  Move the define to kernel.h.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@carl.home.kernel.dk>
---
 block/blk-settings.c               |  5 -----
 drivers/block/drbd/drbd_receiver.c |  1 -
 drivers/md/dm-snap.c               |  2 --
 drivers/md/dm-table.c              |  5 -----
 include/linux/kernel.h             | 10 ++++++++++
 5 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a234f4bf1d6f..8d592b559bd3 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -455,11 +455,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
-/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
 /**
  * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
  * @t:	the stacking driver (top)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 081522d3c742..484ecbb6b772 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2972,7 +2972,6 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 	 * we still need to figure out whether we accept that. */
 	mdev->p_size = p_size;
 
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 	if (get_ldev(mdev)) {
 		warn_if_differ_considerably(mdev, "lower level device sizes",
 			   p_size, drbd_get_max_capacity(mdev->ldev));
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5974d3094d97..f30f6e8d594e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -706,8 +706,6 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 	return 0;
 }
 
-#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
-
 /*
  * Return a minimum chunk size of all snapshots that have the specified origin.
  * Return zero if the origin has no snapshots.
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f9fc07d7a4b9..90267f8d64ee 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -486,11 +486,6 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 	return 0;
 }
 
-/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
 int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 			 sector_t start, sector_t len, void *data)
 {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b0a35e6bc69..f5df2f4acb0d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -640,6 +640,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({			\
+	typeof(x) __x = (x);			\
+	typeof(y) __y = (y);			\
+	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
-- 
cgit v1.2.3


From 13f05c8d8e98bbdce89158bfdb2e380940695a88 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 10 Sep 2010 20:50:10 +0200
Subject: block/scsi: Provide a limit on the number of integrity segments

Some controllers have a hardware limit on the number of protection
information scatter-gather list segments they can handle.

Introduce a max_integrity_segments limit in the block layer and provide
a new scsi_host_template setting that allows HBA drivers to provide a
value suitable for the hardware.

Add support for honoring the integrity segment limit when merging both
bios and requests.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@carl.home.kernel.dk>
---
 block/blk-integrity.c     | 93 ++++++++++++++++++++++++++++++++++++-----------
 block/blk-merge.c         | 23 +++++++-----
 block/blk-settings.c      |  3 ++
 block/blk-sysfs.c         | 11 ++++++
 block/blk.h               |  8 ----
 drivers/scsi/hosts.c      |  1 +
 drivers/scsi/scsi_lib.c   | 26 +++++++++----
 drivers/scsi/scsi_sysfs.c |  2 +
 include/linux/bio.h       |  4 ++
 include/linux/blkdev.h    | 33 +++++++++++++++--
 include/scsi/scsi.h       |  6 +++
 include/scsi/scsi_host.h  |  7 ++++
 12 files changed, 167 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index edce1ef7933d..885cbb59967e 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -32,24 +32,37 @@ static struct kmem_cache *integrity_cachep;
 
 /**
  * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
- * @rq:		request with integrity metadata attached
+ * @q:		request queue
+ * @bio:	bio with integrity metadata attached
  *
  * Description: Returns the number of elements required in a
- * scatterlist corresponding to the integrity metadata in a request.
+ * scatterlist corresponding to the integrity metadata in a bio.
  */
-int blk_rq_count_integrity_sg(struct request *rq)
+int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
 {
-	struct bio_vec *iv, *ivprv;
-	struct req_iterator iter;
-	unsigned int segments;
+	struct bio_vec *iv, *ivprv = NULL;
+	unsigned int segments = 0;
+	unsigned int seg_size = 0;
+	unsigned int i = 0;
 
-	ivprv = NULL;
-	segments = 0;
+	bio_for_each_integrity_vec(iv, bio, i) {
 
-	rq_for_each_integrity_segment(iv, rq, iter) {
+		if (ivprv) {
+			if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+				goto new_segment;
+
+			if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
+				goto new_segment;
+
+			if (seg_size + iv->bv_len > queue_max_segment_size(q))
+				goto new_segment;
 
-		if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+			seg_size += iv->bv_len;
+		} else {
+new_segment:
 			segments++;
+			seg_size = iv->bv_len;
+		}
 
 		ivprv = iv;
 	}
@@ -60,30 +73,34 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
 
 /**
  * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
- * @rq:		request with integrity metadata attached
+ * @q:		request queue
+ * @bio:	bio with integrity metadata attached
  * @sglist:	target scatterlist
  *
  * Description: Map the integrity vectors in request into a
  * scatterlist.  The scatterlist must be big enough to hold all
  * elements.  I.e. sized using blk_rq_count_integrity_sg().
  */
-int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
+int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
+			    struct scatterlist *sglist)
 {
-	struct bio_vec *iv, *ivprv;
-	struct req_iterator iter;
-	struct scatterlist *sg;
-	unsigned int segments;
+	struct bio_vec *iv, *ivprv = NULL;
+	struct scatterlist *sg = NULL;
+	unsigned int segments = 0;
+	unsigned int i = 0;
 
-	ivprv = NULL;
-	sg = NULL;
-	segments = 0;
-
-	rq_for_each_integrity_segment(iv, rq, iter) {
+	bio_for_each_integrity_vec(iv, bio, i) {
 
 		if (ivprv) {
 			if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
 				goto new_segment;
 
+			if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
+				goto new_segment;
+
+			if (sg->length + iv->bv_len > queue_max_segment_size(q))
+				goto new_segment;
+
 			sg->length += iv->bv_len;
 		} else {
 new_segment:
@@ -162,6 +179,40 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 }
 EXPORT_SYMBOL(blk_integrity_compare);
 
+int blk_integrity_merge_rq(struct request_queue *q, struct request *req,
+			   struct request *next)
+{
+	if (blk_integrity_rq(req) != blk_integrity_rq(next))
+		return -1;
+
+	if (req->nr_integrity_segments + next->nr_integrity_segments >
+	    q->limits.max_integrity_segments)
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL(blk_integrity_merge_rq);
+
+int blk_integrity_merge_bio(struct request_queue *q, struct request *req,
+			    struct bio *bio)
+{
+	int nr_integrity_segs;
+	struct bio *next = bio->bi_next;
+
+	bio->bi_next = NULL;
+	nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
+	bio->bi_next = next;
+
+	if (req->nr_integrity_segments + nr_integrity_segs >
+	    q->limits.max_integrity_segments)
+		return -1;
+
+	req->nr_integrity_segments += nr_integrity_segs;
+
+	return 0;
+}
+EXPORT_SYMBOL(blk_integrity_merge_bio);
+
 struct integrity_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct blk_integrity *, char *);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3b0cd4249671..6a725461654d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -205,12 +205,11 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
-		req->cmd_flags |= REQ_NOMERGE;
-		if (req == q->last_merge)
-			q->last_merge = NULL;
-		return 0;
-	}
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
+		goto no_merge;
+
+	if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio))
+		goto no_merge;
 
 	/*
 	 * This will form the start of a new hw segment.  Bump both
@@ -218,6 +217,12 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 	 */
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
+
+no_merge:
+	req->cmd_flags |= REQ_NOMERGE;
+	if (req == q->last_merge)
+		q->last_merge = NULL;
+	return 0;
 }
 
 int ll_back_merge_fn(struct request_queue *q, struct request *req,
@@ -301,6 +306,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (total_phys_segments > queue_max_segments(q))
 		return 0;
 
+	if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next))
+		return 0;
+
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
 	return 1;
@@ -372,9 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	    || next->special)
 		return 0;
 
-	if (blk_integrity_rq(req) != blk_integrity_rq(next))
-		return 0;
-
 	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8d592b559bd3..f8f2ddf20613 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 void blk_set_default_limits(struct queue_limits *lim)
 {
 	lim->max_segments = BLK_MAX_SEGMENTS;
+	lim->max_integrity_segments = 0;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
@@ -509,6 +510,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 					    b->seg_boundary_mask);
 
 	t->max_segments = min_not_zero(t->max_segments, b->max_segments);
+	t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
+						 b->max_integrity_segments);
 
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 001ab18078f5..b014f7739e98 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -112,6 +112,11 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
 	return queue_var_show(queue_max_segments(q), (page));
 }
 
+static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.max_integrity_segments, (page));
+}
+
 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 {
 	if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
@@ -288,6 +293,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = {
 	.show = queue_max_segments_show,
 };
 
+static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
+	.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
+	.show = queue_max_integrity_segments_show,
+};
+
 static struct queue_sysfs_entry queue_max_segment_size_entry = {
 	.attr = {.name = "max_segment_size", .mode = S_IRUGO },
 	.show = queue_max_segment_size_show,
@@ -375,6 +385,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
 	&queue_max_segments_entry.attr,
+	&queue_max_integrity_segments_entry.attr,
 	&queue_max_segment_size_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
diff --git a/block/blk.h b/block/blk.h
index 6e7dc87141e4..6738831ba447 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -132,14 +132,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
 	return q->nr_congestion_off;
 }
 
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-#define rq_for_each_integrity_segment(bvl, _rq, _iter)		\
-	__rq_for_each_bio(_iter.bio, _rq)			\
-		bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
-
-#endif /* BLK_DEV_INTEGRITY */
-
 static inline int blk_cpu_to_group(int cpu)
 {
 #ifdef CONFIG_SCHED_MC
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 8a8f803439e1..10478153641b 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -376,6 +376,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	shost->this_id = sht->this_id;
 	shost->can_queue = sht->can_queue;
 	shost->sg_tablesize = sht->sg_tablesize;
+	shost->sg_prot_tablesize = sht->sg_prot_tablesize;
 	shost->cmd_per_lun = sht->cmd_per_lun;
 	shost->unchecked_isa_dma = sht->unchecked_isa_dma;
 	shost->use_clustering = sht->use_clustering;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ade720422c6..861c0b937ac9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -968,11 +968,13 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
  */
 int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 {
-	int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask);
+	struct request *rq = cmd->request;
+
+	int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask);
 	if (error)
 		goto err_exit;
 
-	if (blk_bidi_rq(cmd->request)) {
+	if (blk_bidi_rq(rq)) {
 		struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc(
 			scsi_sdb_cache, GFP_ATOMIC);
 		if (!bidi_sdb) {
@@ -980,28 +982,28 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 			goto err_exit;
 		}
 
-		cmd->request->next_rq->special = bidi_sdb;
-		error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb,
-								    GFP_ATOMIC);
+		rq->next_rq->special = bidi_sdb;
+		error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC);
 		if (error)
 			goto err_exit;
 	}
 
-	if (blk_integrity_rq(cmd->request)) {
+	if (blk_integrity_rq(rq)) {
 		struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
 		int ivecs, count;
 
 		BUG_ON(prot_sdb == NULL);
-		ivecs = blk_rq_count_integrity_sg(cmd->request);
+		ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
 
 		if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
 			error = BLKPREP_DEFER;
 			goto err_exit;
 		}
 
-		count = blk_rq_map_integrity_sg(cmd->request,
+		count = blk_rq_map_integrity_sg(rq->q, rq->bio,
 						prot_sdb->table.sgl);
 		BUG_ON(unlikely(count > ivecs));
+		BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q)));
 
 		cmd->prot_sdb = prot_sdb;
 		cmd->prot_sdb->table.nents = count;
@@ -1625,6 +1627,14 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
 					SCSI_MAX_SG_CHAIN_SEGMENTS));
 
+	if (scsi_host_prot_dma(shost)) {
+		shost->sg_prot_tablesize =
+			min_not_zero(shost->sg_prot_tablesize,
+				     (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
+		BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
+		blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
+	}
+
 	blk_queue_max_hw_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index c3f67373a4f8..20ad59dff730 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -251,6 +251,7 @@ shost_rd_attr(host_busy, "%hu\n");
 shost_rd_attr(cmd_per_lun, "%hd\n");
 shost_rd_attr(can_queue, "%hd\n");
 shost_rd_attr(sg_tablesize, "%hu\n");
+shost_rd_attr(sg_prot_tablesize, "%hu\n");
 shost_rd_attr(unchecked_isa_dma, "%d\n");
 shost_rd_attr(prot_capabilities, "%u\n");
 shost_rd_attr(prot_guard_type, "%hd\n");
@@ -262,6 +263,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = {
 	&dev_attr_cmd_per_lun.attr,
 	&dev_attr_can_queue.attr,
 	&dev_attr_sg_tablesize.attr,
+	&dev_attr_sg_prot_tablesize.attr,
 	&dev_attr_unchecked_isa_dma.attr,
 	&dev_attr_proc_name.attr,
 	&dev_attr_scan.attr,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5274103434ad..2c3fd7421607 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -496,6 +496,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
 #define bip_for_each_vec(bvl, bip, i)					\
 	__bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
 
+#define bio_for_each_integrity_vec(_bvl, _bio, _iter)			\
+	for_each_bio(_bio)						\
+		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+
 #define bio_integrity(bio) (bio->bi_integrity != NULL)
 
 extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c54906f678f..7e661106270a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -124,6 +124,9 @@ struct request {
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+	unsigned short nr_integrity_segments;
+#endif
 
 	unsigned short ioprio;
 
@@ -243,6 +246,7 @@ struct queue_limits {
 
 	unsigned short		logical_block_size;
 	unsigned short		max_segments;
+	unsigned short		max_integrity_segments;
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
@@ -1213,8 +1217,13 @@ struct blk_integrity {
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
 extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
-extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
-extern int blk_rq_count_integrity_sg(struct request *);
+extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
+				   struct scatterlist *);
+extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
+				  struct request *);
+extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
+				   struct bio *);
 
 static inline
 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
@@ -1235,16 +1244,32 @@ static inline int blk_integrity_rq(struct request *rq)
 	return bio_integrity(rq->bio);
 }
 
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+						    unsigned int segs)
+{
+	q->limits.max_integrity_segments = segs;
+}
+
+static inline unsigned short
+queue_max_integrity_segments(struct request_queue *q)
+{
+	return q->limits.max_integrity_segments;
+}
+
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 #define blk_integrity_rq(rq)			(0)
-#define blk_rq_count_integrity_sg(a)		(0)
-#define blk_rq_map_integrity_sg(a, b)		(0)
+#define blk_rq_count_integrity_sg(a, b)		(0)
+#define blk_rq_map_integrity_sg(a, b, c)	(0)
 #define bdev_get_integrity(a)			(0)
 #define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0);
+#define blk_queue_max_integrity_segments(a, b)	do { } while (0);
+#define queue_max_integrity_segments(a)		(0)
+#define blk_integrity_merge_rq(a, b, c)		(0)
+#define blk_integrity_merge_bio(a, b, c)	(0)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 8fcb6e0e9e72..d63533a4a59e 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,6 +31,12 @@ struct scsi_cmnd;
 #define SCSI_MAX_SG_CHAIN_SEGMENTS	SCSI_MAX_SG_SEGMENTS
 #endif
 
+/*
+ * DIX-capable adapters effectively support infinite chaining for the
+ * protection information scatterlist
+ */
+#define SCSI_MAX_PROT_SG_SEGMENTS	0xFFFF
+
 /*
  * Special value for scanning to specify scanning or rescanning of all
  * possible channels, (target) ids, or luns on a given shost.
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b7bdecb7b76e..d0a6a845f204 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -388,6 +388,7 @@ struct scsi_host_template {
 	 * of scatter-gather.
 	 */
 	unsigned short sg_tablesize;
+	unsigned short sg_prot_tablesize;
 
 	/*
 	 * Set this if the host adapter has limitations beside segment count.
@@ -599,6 +600,7 @@ struct Scsi_Host {
 	int can_queue;
 	short cmd_per_lun;
 	short unsigned int sg_tablesize;
+	short unsigned int sg_prot_tablesize;
 	short unsigned int max_sectors;
 	unsigned long dma_boundary;
 	/* 
@@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost)
 	return shost->prot_capabilities;
 }
 
+static inline int scsi_host_prot_dma(struct Scsi_Host *shost)
+{
+	return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION;
+}
+
 static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type)
 {
 	static unsigned char cap[] = { 0,
-- 
cgit v1.2.3


From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: SUNRPC: Fix a race in rpc_info_open

There is a race between rpc_info_open and rpc_release_client()
in that nothing stops a process from opening the file after
the clnt->cl_kref goes to zero.

Fix this by using atomic_inc_unless_zero()...

Reported-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 include/linux/sunrpc/clnt.h |  2 +-
 net/sunrpc/clnt.c           | 26 ++++++++++++--------------
 net/sunrpc/rpc_pipe.c       | 14 ++++++++------
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 569dc722a600..85f38a63f098 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -30,7 +30,7 @@ struct rpc_inode;
  * The high-level client handle
  */
 struct rpc_clnt {
-	struct kref		cl_kref;	/* Number of references */
+	atomic_t		cl_count;	/* Number of references */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 657aac630fc9..3a8f53e7ba07 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 			goto out_no_principal;
 	}
 
-	kref_init(&clnt->cl_kref);
+	atomic_set(&clnt->cl_count, 1);
 
 	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
 	if (err < 0)
@@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt)
 		if (new->cl_principal == NULL)
 			goto out_no_principal;
 	}
-	kref_init(&new->cl_kref);
+	atomic_set(&new->cl_count, 1);
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
 		goto out_no_path;
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 	xprt_get(clnt->cl_xprt);
-	kref_get(&clnt->cl_kref);
+	atomic_inc(&clnt->cl_count);
 	rpc_register_client(new);
 	rpciod_up();
 	return new;
@@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client);
  * Free an RPC client
  */
 static void
-rpc_free_client(struct kref *kref)
+rpc_free_client(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	dprintk("RPC:       destroying %s client for %s\n",
 			clnt->cl_protname, clnt->cl_server);
 	if (!IS_ERR(clnt->cl_path.dentry)) {
@@ -495,12 +493,10 @@ out_free:
  * Free an RPC client
  */
 static void
-rpc_free_auth(struct kref *kref)
+rpc_free_auth(struct rpc_clnt *clnt)
 {
-	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
-
 	if (clnt->cl_auth == NULL) {
-		rpc_free_client(kref);
+		rpc_free_client(clnt);
 		return;
 	}
 
@@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref)
 	 *       release remaining GSS contexts. This mechanism ensures
 	 *       that it can do so safely.
 	 */
-	kref_init(kref);
+	atomic_inc(&clnt->cl_count);
 	rpcauth_release(clnt->cl_auth);
 	clnt->cl_auth = NULL;
-	kref_put(kref, rpc_free_client);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_client(clnt);
 }
 
 /*
@@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt)
 
 	if (list_empty(&clnt->cl_tasks))
 		wake_up(&destroy_wait);
-	kref_put(&clnt->cl_kref, rpc_free_auth);
+	if (atomic_dec_and_test(&clnt->cl_count))
+		rpc_free_auth(clnt);
 }
 
 /**
@@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 	if (clnt != NULL) {
 		rpc_task_release_client(task);
 		task->tk_client = clnt;
-		kref_get(&clnt->cl_kref);
+		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
 		/* Add to the client's list of all tasks */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 41a762f82630..8c8eef2b8f26 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v)
 static int
 rpc_info_open(struct inode *inode, struct file *file)
 {
-	struct rpc_clnt *clnt;
+	struct rpc_clnt *clnt = NULL;
 	int ret = single_open(file, rpc_show_info, NULL);
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		mutex_lock(&inode->i_mutex);
-		clnt = RPC_I(inode)->private;
-		if (clnt) {
-			kref_get(&clnt->cl_kref);
+
+		spin_lock(&file->f_path.dentry->d_lock);
+		if (!d_unhashed(file->f_path.dentry))
+			clnt = RPC_I(inode)->private;
+		if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			m->private = clnt;
 		} else {
+			spin_unlock(&file->f_path.dentry->d_lock);
 			single_release(inode, file);
 			ret = -EINVAL;
 		}
-		mutex_unlock(&inode->i_mutex);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 10 Sep 2010 16:51:36 +0200
Subject: workqueue: add documentation

Update copyright notice and add Documentation/workqueue.txt.

Randy Dunlap, Dave Chinner: misc fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-By: Florian Mickler <florian@mickler.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
---
 Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/workqueue.h   |   4 +
 kernel/workqueue.c          |  27 ++--
 3 files changed, 401 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/workqueue.txt

(limited to 'include')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
new file mode 100644
index 000000000000..e4498a2872c3
--- /dev/null
+++ b/Documentation/workqueue.txt
@@ -0,0 +1,380 @@
+
+Concurrency Managed Workqueue (cmwq)
+
+September, 2010		Tejun Heo <tj@kernel.org>
+			Florian Mickler <florian@mickler.org>
+
+CONTENTS
+
+1. Introduction
+2. Why cmwq?
+3. The Design
+4. Application Programming Interface (API)
+5. Example Execution Scenarios
+6. Guidelines
+
+
+1. Introduction
+
+There are many cases where an asynchronous process execution context
+is needed and the workqueue (wq) API is the most commonly used
+mechanism for such cases.
+
+When such an asynchronous execution context is needed, a work item
+describing which function to execute is put on a queue.  An
+independent thread serves as the asynchronous execution context.  The
+queue is called workqueue and the thread is called worker.
+
+While there are work items on the workqueue the worker executes the
+functions associated with the work items one after the other.  When
+there is no work item left on the workqueue the worker becomes idle.
+When a new work item gets queued, the worker begins executing again.
+
+
+2. Why cmwq?
+
+In the original wq implementation, a multi threaded (MT) wq had one
+worker thread per CPU and a single threaded (ST) wq had one worker
+thread system-wide.  A single MT wq needed to keep around the same
+number of workers as the number of CPUs.  The kernel grew a lot of MT
+wq users over the years and with the number of CPU cores continuously
+rising, some systems saturated the default 32k PID space just booting
+up.
+
+Although MT wq wasted a lot of resource, the level of concurrency
+provided was unsatisfactory.  The limitation was common to both ST and
+MT wq albeit less severe on MT.  Each wq maintained its own separate
+worker pool.  A MT wq could provide only one execution context per CPU
+while a ST wq one for the whole system.  Work items had to compete for
+those very limited execution contexts leading to various problems
+including proneness to deadlocks around the single execution context.
+
+The tension between the provided level of concurrency and resource
+usage also forced its users to make unnecessary tradeoffs like libata
+choosing to use ST wq for polling PIOs and accepting an unnecessary
+limitation that no two polling PIOs can progress at the same time.  As
+MT wq don't provide much better concurrency, users which require
+higher level of concurrency, like async or fscache, had to implement
+their own thread pool.
+
+Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with
+focus on the following goals.
+
+* Maintain compatibility with the original workqueue API.
+
+* Use per-CPU unified worker pools shared by all wq to provide
+  flexible level of concurrency on demand without wasting a lot of
+  resource.
+
+* Automatically regulate worker pool and level of concurrency so that
+  the API users don't need to worry about such details.
+
+
+3. The Design
+
+In order to ease the asynchronous execution of functions a new
+abstraction, the work item, is introduced.
+
+A work item is a simple struct that holds a pointer to the function
+that is to be executed asynchronously.  Whenever a driver or subsystem
+wants a function to be executed asynchronously it has to set up a work
+item pointing to that function and queue that work item on a
+workqueue.
+
+Special purpose threads, called worker threads, execute the functions
+off of the queue, one after the other.  If no work is queued, the
+worker threads become idle.  These worker threads are managed in so
+called thread-pools.
+
+The cmwq design differentiates between the user-facing workqueues that
+subsystems and drivers queue work items on and the backend mechanism
+which manages thread-pool and processes the queued work items.
+
+The backend is called gcwq.  There is one gcwq for each possible CPU
+and one gcwq to serve work items queued on unbound workqueues.
+
+Subsystems and drivers can create and queue work items through special
+workqueue API functions as they see fit. They can influence some
+aspects of the way the work items are executed by setting flags on the
+workqueue they are putting the work item on. These flags include
+things like CPU locality, reentrancy, concurrency limits and more. To
+get a detailed overview refer to the API description of
+alloc_workqueue() below.
+
+When a work item is queued to a workqueue, the target gcwq is
+determined according to the queue parameters and workqueue attributes
+and appended on the shared worklist of the gcwq.  For example, unless
+specifically overridden, a work item of a bound workqueue will be
+queued on the worklist of exactly that gcwq that is associated to the
+CPU the issuer is running on.
+
+For any worker pool implementation, managing the concurrency level
+(how many execution contexts are active) is an important issue.  cmwq
+tries to keep the concurrency at a minimal but sufficient level.
+Minimal to save resources and sufficient in that the system is used at
+its full capacity.
+
+Each gcwq bound to an actual CPU implements concurrency management by
+hooking into the scheduler.  The gcwq is notified whenever an active
+worker wakes up or sleeps and keeps track of the number of the
+currently runnable workers.  Generally, work items are not expected to
+hog a CPU and consume many cycles.  That means maintaining just enough
+concurrency to prevent work processing from stalling should be
+optimal.  As long as there are one or more runnable workers on the
+CPU, the gcwq doesn't start execution of a new work, but, when the
+last running worker goes to sleep, it immediately schedules a new
+worker so that the CPU doesn't sit idle while there are pending work
+items.  This allows using a minimal number of workers without losing
+execution bandwidth.
+
+Keeping idle workers around doesn't cost other than the memory space
+for kthreads, so cmwq holds onto idle ones for a while before killing
+them.
+
+For an unbound wq, the above concurrency management doesn't apply and
+the gcwq for the pseudo unbound CPU tries to start executing all work
+items as soon as possible.  The responsibility of regulating
+concurrency level is on the users.  There is also a flag to mark a
+bound wq to ignore the concurrency management.  Please refer to the
+API section for details.
+
+Forward progress guarantee relies on that workers can be created when
+more execution contexts are necessary, which in turn is guaranteed
+through the use of rescue workers.  All work items which might be used
+on code paths that handle memory reclaim are required to be queued on
+wq's that have a rescue-worker reserved for execution under memory
+pressure.  Else it is possible that the thread-pool deadlocks waiting
+for execution contexts to free up.
+
+
+4. Application Programming Interface (API)
+
+alloc_workqueue() allocates a wq.  The original create_*workqueue()
+functions are deprecated and scheduled for removal.  alloc_workqueue()
+takes three arguments - @name, @flags and @max_active.  @name is the
+name of the wq and also used as the name of the rescuer thread if
+there is one.
+
+A wq no longer manages execution resources but serves as a domain for
+forward progress guarantee, flush and work item attributes.  @flags
+and @max_active control how work items are assigned execution
+resources, scheduled and executed.
+
+@flags:
+
+  WQ_NON_REENTRANT
+
+	By default, a wq guarantees non-reentrance only on the same
+	CPU.  A work item may not be executed concurrently on the same
+	CPU by multiple workers but is allowed to be executed
+	concurrently on multiple CPUs.  This flag makes sure
+	non-reentrance is enforced across all CPUs.  Work items queued
+	to a non-reentrant wq are guaranteed to be executed by at most
+	one worker system-wide at any given time.
+
+  WQ_UNBOUND
+
+	Work items queued to an unbound wq are served by a special
+	gcwq which hosts workers which are not bound to any specific
+	CPU.  This makes the wq behave as a simple execution context
+	provider without concurrency management.  The unbound gcwq
+	tries to start execution of work items as soon as possible.
+	Unbound wq sacrifices locality but is useful for the following
+	cases.
+
+	* Wide fluctuation in the concurrency level requirement is
+	  expected and using bound wq may end up creating large number
+	  of mostly unused workers across different CPUs as the issuer
+	  hops through different CPUs.
+
+	* Long running CPU intensive workloads which can be better
+	  managed by the system scheduler.
+
+  WQ_FREEZEABLE
+
+	A freezeable wq participates in the freeze phase of the system
+	suspend operations.  Work items on the wq are drained and no
+	new work item starts execution until thawed.
+
+  WQ_RESCUER
+
+	All wq which might be used in the memory reclaim paths _MUST_
+	have this flag set.  This reserves one worker exclusively for
+	the execution of this wq under memory pressure.
+
+  WQ_HIGHPRI
+
+	Work items of a highpri wq are queued at the head of the
+	worklist of the target gcwq and start execution regardless of
+	the current concurrency level.  In other words, highpri work
+	items will always start execution as soon as execution
+	resource is available.
+
+	Ordering among highpri work items is preserved - a highpri
+	work item queued after another highpri work item will start
+	execution after the earlier highpri work item starts.
+
+	Although highpri work items are not held back by other
+	runnable work items, they still contribute to the concurrency
+	level.  Highpri work items in runnable state will prevent
+	non-highpri work items from starting execution.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_CPU_INTENSIVE
+
+	Work items of a CPU intensive wq do not contribute to the
+	concurrency level.  In other words, runnable CPU intensive
+	work items will not prevent other work items from starting
+	execution.  This is useful for bound work items which are
+	expected to hog CPU cycles so that their execution is
+	regulated by the system scheduler.
+
+	Although CPU intensive work items don't contribute to the
+	concurrency level, start of their executions is still
+	regulated by the concurrency management and runnable
+	non-CPU-intensive work items can delay execution of CPU
+	intensive work items.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_HIGHPRI | WQ_CPU_INTENSIVE
+
+	This combination makes the wq avoid interaction with
+	concurrency management completely and behave as a simple
+	per-CPU execution context provider.  Work items queued on a
+	highpri CPU-intensive wq start execution as soon as resources
+	are available and don't affect execution of other work items.
+
+@max_active:
+
+@max_active determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq.  For example,
+with @max_active of 16, at most 16 work items of the wq can be
+executing at the same time per CPU.
+
+Currently, for a bound wq, the maximum limit for @max_active is 512
+and the default value used when 0 is specified is 256.  For an unbound
+wq, the limit is higher of 512 and 4 * num_possible_cpus().  These
+values are chosen sufficiently high such that they are not the
+limiting factor while providing protection in runaway cases.
+
+The number of active work items of a wq is usually regulated by the
+users of the wq, more specifically, by how many work items the users
+may queue at the same time.  Unless there is a specific need for
+throttling the number of active work items, specifying '0' is
+recommended.
+
+Some users depend on the strict execution ordering of ST wq.  The
+combination of @max_active of 1 and WQ_UNBOUND is used to achieve this
+behavior.  Work items on such wq are always queued to the unbound gcwq
+and only one work item can be active at any given time thus achieving
+the same ordering property as ST wq.
+
+
+5. Example Execution Scenarios
+
+The following example execution scenarios try to illustrate how cmwq
+behave under different configurations.
+
+ Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU.
+ w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms
+ again before finishing.  w1 and w2 burn CPU for 5ms then sleep for
+ 10ms.
+
+Ignoring all other tasks, works and processing overhead, and assuming
+simple FIFO scheduling, the following is one highly simplified version
+of possible sequences of events with the original wq.
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 starts and burns CPU
+ 25		w1 sleeps
+ 35		w1 wakes up and finishes
+ 35		w2 starts and burns CPU
+ 40		w2 sleeps
+ 50		w2 wakes up and finishes
+
+And with cmwq with @max_active >= 3,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 10		w2 starts and burns CPU
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+If @max_active == 2,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 20		w2 starts and burns CPU
+ 25		w2 sleeps
+ 35		w2 wakes up and finishes
+
+Now, let's assume w1 and w2 are queued to a different wq q1 which has
+WQ_HIGHPRI set,
+
+ TIME IN MSECS	EVENT
+ 0		w1 and w2 start and burn CPU
+ 5		w1 sleeps
+ 10		w2 sleeps
+ 10		w0 starts and burns CPU
+ 15		w0 sleeps
+ 15		w1 wakes up and finishes
+ 20		w2 wakes up and finishes
+ 25		w0 wakes up and burns CPU
+ 30		w0 finishes
+
+If q1 has WQ_CPU_INTENSIVE set,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 and w2 start and burn CPU
+ 10		w1 sleeps
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+
+6. Guidelines
+
+* Do not forget to use WQ_RESCUER if a wq may process work items which
+  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
+  rescuer thread reserved for it.  If there is dependency among
+  multiple work items used during memory reclaim, they should be
+  queued to separate wq each with WQ_RESCUER.
+
+* Unless strict ordering is required, there is no need to use ST wq.
+
+* Unless there is a specific need, using 0 for @max_active is
+  recommended.  In most use cases, concurrency level usually stays
+  well under the default limit.
+
+* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
+  flush and work item attributes.  Work items which are not involved
+  in memory reclaim and don't need to be flushed as a part of a group
+  of work items, and don't require any special attribute, can use one
+  of the system wq.  There is no difference in execution
+  characteristics between using a dedicated wq and a system wq.
+
+* Unless work items are expected to consume a huge amount of CPU
+  cycles, using a bound wq is usually beneficial due to the increased
+  level of locality in wq operations and work item execution.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f96482..25e02c941bac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
+/*
+ * Workqueue flags and constants.  For details, please refer to
+ * Documentation/workqueue.txt.
+ */
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 727f24e563ae..f77afd939229 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1,19 +1,26 @@
 /*
- * linux/kernel/workqueue.c
+ * kernel/workqueue.c - generic async execution with shared worker pool
  *
- * Generic mechanism for defining kernel helper threads for running
- * arbitrary tasks in process context.
+ * Copyright (C) 2002		Ingo Molnar
  *
- * Started by Ingo Molnar, Copyright (C) 2002
+ *   Derived from the taskqueue/keventd code by:
+ *     David Woodhouse <dwmw2@infradead.org>
+ *     Andrew Morton
+ *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
+ *     Theodore Ts'o <tytso@mit.edu>
  *
- * Derived from the taskqueue/keventd code by:
+ * Made to use alloc_percpu by Christoph Lameter.
  *
- *   David Woodhouse <dwmw2@infradead.org>
- *   Andrew Morton
- *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
- *   Theodore Ts'o <tytso@mit.edu>
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
  *
- * Made to use alloc_percpu by Christoph Lameter.
+ * This is the generic async execution mechanism.  Work items as are
+ * executed in process context.  The worker pool is shared and
+ * automatically managed.  There is one worker pool for each CPU and
+ * one extra for works which are better served by workers which are
+ * not bound to any specific CPU.
+ *
+ * Please read Documentation/workqueue.txt for details.
  */
 
 #include <linux/module.h>
-- 
cgit v1.2.3


From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 9 Sep 2010 23:51:02 +0100
Subject: drm: Use a nondestructive mode for output detect when polling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Destructive load-detection is very expensive and due to failings
elsewhere can trigger system wide stalls of up to 600ms. A simple
first step to correcting this is not to invoke such an expensive
and destructive load-detection operation automatically.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265
Reported-by: Bruno Prémont <bonbons@linux-vserver.org>
Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c           |  4 ++--
 drivers/gpu/drm/drm_sysfs.c                 |  2 +-
 drivers/gpu/drm/i915/intel_crt.c            |  7 ++++++-
 drivers/gpu/drm/i915/intel_dp.c             |  3 ++-
 drivers/gpu/drm/i915/intel_dvo.c            |  4 +++-
 drivers/gpu/drm/i915/intel_hdmi.c           |  3 ++-
 drivers/gpu/drm/i915/intel_lvds.c           |  8 ++++++--
 drivers/gpu/drm/i915/intel_sdvo.c           |  6 ++++--
 drivers/gpu/drm/i915/intel_tv.c             | 12 ++++++------
 drivers/gpu/drm/nouveau/nouveau_connector.c |  8 +++++---
 drivers/gpu/drm/radeon/radeon_connectors.c  | 20 +++++++++++++++-----
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c         |  3 ++-
 include/drm/drm_crtc.h                      |  3 ++-
 13 files changed, 56 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index de152a58967d..fb6b70fc6572 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		connector->status = connector->funcs->detect(connector);
+		connector->status = connector->funcs->detect(connector, false);
 		drm_kms_helper_poll_enable(dev);
 	}
 
@@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work)
 		    !(connector->polled & DRM_CONNECTOR_POLL_HPD))
 			continue;
 
-		status = connector->funcs->detect(connector);
+		status = connector->funcs->detect(connector, true);
 		if (old_status != status)
 			changed = true;
 	}
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 86118a742231..85da4c40694c 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device,
 	struct drm_connector *connector = to_drm_connector(device);
 	enum drm_connector_status status;
 
-	status = connector->funcs->detect(connector);
+	status = connector->funcs->detect(connector, true);
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			drm_get_connector_status_name(status));
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 4b7735196cd5..0350e5d711f8 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -400,7 +400,9 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder
 	return status;
 }
 
-static enum drm_connector_status intel_crt_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_crt_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -419,6 +421,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto
 	if (intel_crt_detect_ddc(encoder))
 		return connector_status_connected;
 
+	if (nondestructive)
+		return connector->status;
+
 	/* for pre-945g platforms use load detect */
 	if (encoder->crtc && encoder->crtc->enabled) {
 		status = intel_crt_load_detect(encoder->crtc, intel_encoder);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 51d142939a26..e1a2a05fb838 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1386,7 +1386,8 @@ ironlake_dp_detect(struct drm_connector *connector)
  * \return false if DP port is disconnected.
  */
 static enum drm_connector_status
-intel_dp_detect(struct drm_connector *connector)
+intel_dp_detect(struct drm_connector *connector,
+		bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index a399f4b2c1c5..f0de1addf8a4 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -221,7 +221,9 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder,
  *
  * Unimplemented.
  */
-static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_dvo_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ccd4c97e6524..2ea123d8d22b 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -139,7 +139,8 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
 }
 
 static enum drm_connector_status
-intel_hdmi_detect(struct drm_connector *connector)
+intel_hdmi_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 4fbb0165b26f..fb1bed8f4071 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -445,7 +445,9 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
  * connected and closed means disconnected.  We also send hotplug events as
  * needed, using lid status notification from the input layer.
  */
-static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_lvds_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status = connector_status_connected;
@@ -540,7 +542,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 	 * the LID nofication event.
 	 */
 	if (connector)
-		connector->status = connector->funcs->detect(connector);
+		connector->status = connector->funcs->detect(connector,
+							     true);
+
 	/* Don't force modeset on machines where it causes a GPU lockup */
 	if (dmi_check_system(intel_no_modeset_on_lid))
 		return NOTIFY_OK;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index e3b7a7ee39cb..db6b6d4b8fae 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev)
 	if (!analog_connector)
 		return false;
 
-	if (analog_connector->funcs->detect(analog_connector) ==
+	if (analog_connector->funcs->detect(analog_connector, true) ==
 			connector_status_disconnected)
 		return false;
 
@@ -1486,7 +1486,9 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
 	return status;
 }
 
-static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector)
+static enum drm_connector_status
+intel_sdvo_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	uint16_t response;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index c671f60ce80b..d20b550c0f55 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1341,7 +1341,8 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
  * we have a pipe programmed in order to probe the TV.
  */
 static enum drm_connector_status
-intel_tv_detect(struct drm_connector *connector)
+intel_tv_detect(struct drm_connector *connector,
+		bool nondestructive)
 {
 	struct drm_display_mode mode;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -1353,7 +1354,7 @@ intel_tv_detect(struct drm_connector *connector)
 
 	if (encoder->crtc && encoder->crtc->enabled) {
 		type = intel_tv_detect_type(intel_tv);
-	} else {
+	} else if (nondestructive) {
 		struct drm_crtc *crtc;
 		int dpms_mode;
 
@@ -1364,10 +1365,9 @@ intel_tv_detect(struct drm_connector *connector)
 			intel_release_load_detect_pipe(&intel_tv->base, connector,
 						       dpms_mode);
 		} else
-			type = -1;
-	}
-
-	intel_tv->type = type;
+			return connector_status_unknown;
+	} else
+		return connector->status;
 
 	if (type < 0)
 		return connector_status_disconnected;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index a1473fff06ac..67d515cb67e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -168,7 +168,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-nouveau_connector_detect(struct drm_connector *connector)
+nouveau_connector_detect(struct drm_connector *connector,
+			 bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
@@ -246,7 +247,8 @@ detect_analog:
 }
 
 static enum drm_connector_status
-nouveau_connector_detect_lvds(struct drm_connector *connector)
+nouveau_connector_detect_lvds(struct drm_connector *connector,
+			      bool nondestructive)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -267,7 +269,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector)
 
 	/* Try retrieving EDID via DDC */
 	if (!dev_priv->vbios.fp_no_ddc) {
-		status = nouveau_connector_detect(connector);
+		status = nouveau_connector_detect(connector, nondestructive);
 		if (status == connector_status_connected)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index a9dd7847d96e..31d309a8e75b 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -481,7 +481,9 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_lvds_detect(struct drm_connector *connector,
+		   bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -594,7 +596,9 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_vga_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
@@ -691,7 +695,9 @@ static int radeon_tv_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_tv_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -748,7 +754,9 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
  * we have to check if this analog encoder is shared with anyone else (TV)
  * if its shared we have to set the other connector to disconnected.
  */
-static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_dvi_detect(struct drm_connector *connector,
+		  bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
@@ -972,7 +980,9 @@ static int radeon_dp_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector)
+static enum drm_connector_status
+radeon_dp_detect(struct drm_connector *connector,
+		 bool nondestructive)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 2ff5cf78235f..a527c91c0ba6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-	vmw_ldu_connector_detect(struct drm_connector *connector)
+	vmw_ldu_connector_detect(struct drm_connector *connector,
+				 bool nondestructive)
 {
 	if (vmw_connector_to_ldu(connector)->pref_active)
 		return connector_status_connected;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index c9f3cc5949a8..5536223fbac8 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -386,7 +386,8 @@ struct drm_connector_funcs {
 	void (*dpms)(struct drm_connector *connector, int mode);
 	void (*save)(struct drm_connector *connector);
 	void (*restore)(struct drm_connector *connector);
-	enum drm_connector_status (*detect)(struct drm_connector *connector);
+	enum drm_connector_status (*detect)(struct drm_connector *connector,
+					    bool nondestructive);
 	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
-- 
cgit v1.2.3


From 637bbdc5b83615ef9f45f50399d1c7f27473c713 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Mon, 13 Sep 2010 20:19:03 +0800
Subject: sched: Remove unused PF_ALIGNWARN flag

PF_ALIGNWARN is not implemented and it is for 486 as the
comment.

It is not likely someone will implement this flag feature.
So here remove this flag and leave the valuable 0x00000001 for
future use.

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <20100913121903.GB22238@darkstar>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b51c53c285b8..cdf56693ecbf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1682,8 +1682,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * Per process flags
  */
-#define PF_ALIGNWARN	0x00000001	/* Print alignment warning msgs */
-					/* Not implemented yet, only for 486*/
 #define PF_STARTING	0x00000002	/* being created */
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
-- 
cgit v1.2.3


From ceee42714cf382e9bb9ab71b846ad49497b29d6c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 13 Sep 2010 23:53:55 -0700
Subject: Input: serio_driver - mark id_table and description as const

Memory pointed to by these fields is not supposed to change.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index b5552568178d..a31c95a3171e 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -55,9 +55,9 @@ struct serio {
 
 struct serio_driver {
 	void *private;
-	char *description;
+	const char *description;
 
-	struct serio_device_id *id_table;
+	const struct serio_device_id *id_table;
 	bool manual_bind;
 
 	void (*write_wakeup)(struct serio *);
-- 
cgit v1.2.3


From 7fc49c498c18795d35864bee433caf419bd013b2 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 13 Sep 2010 23:53:55 -0700
Subject: Input: serio_driver - drop private pointer

Nobody uses it anymore.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index a31c95a3171e..111ad501b054 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -54,7 +54,6 @@ struct serio {
 #define to_serio_port(d)	container_of(d, struct serio, dev)
 
 struct serio_driver {
-	void *private;
 	const char *description;
 
 	const struct serio_device_id *id_table;
-- 
cgit v1.2.3


From 37b0bb112b7e3ffa5015c4305a934e861b4e2e53 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 13 Sep 2010 23:53:55 -0700
Subject: Input: gameport_driver - mark description as const pointer

Memory pointed to by the pointer should not change.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gameport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 361d1cc288d0..632d1265fbe0 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -55,7 +55,7 @@ struct gameport {
 struct gameport_driver {
 
 	void *private;
-	char *description;
+	const char *description;
 
 	int (*connect)(struct gameport *, struct gameport_driver *drv);
 	int (*reconnect)(struct gameport *);
-- 
cgit v1.2.3


From 528487081aad32da85bf99802bdb7af32f4922b9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 13 Sep 2010 23:53:55 -0700
Subject: Input: gameport_driver - drop private pointer

Nobody uses it anymore.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gameport.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 632d1265fbe0..b65a6f472775 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -53,8 +53,6 @@ struct gameport {
 #define to_gameport_port(d)	container_of(d, struct gameport, dev)
 
 struct gameport_driver {
-
-	void *private;
 	const char *description;
 
 	int (*connect)(struct gameport *, struct gameport_driver *drv);
-- 
cgit v1.2.3


From 44432407d9f5e4b2e56c7eccb65d98cad4bba191 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 3 Sep 2010 07:20:04 +0000
Subject: fbdev: sh_mobile_lcdcfb: Support multiple video modes in platform
 data

This is a preparation for HDMI hotplug support. This patch just moves all
platform defined video modes for the sh_mobile_lcdcfb driver to separate
arrays and switches all users to use element 0 of that array, so, this patch
doesn't introduce any functional changes and as such should not cause any
regressions.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/board-ap4evb.c | 98 ++++++++++++++++++++---------------
 arch/sh/boards/mach-ap325rxa/setup.c  | 29 ++++++-----
 arch/sh/boards/mach-ecovec24/setup.c  | 60 ++++++++++++---------
 arch/sh/boards/mach-kfr2r09/setup.c   | 29 ++++++-----
 arch/sh/boards/mach-migor/setup.c     | 58 +++++++++++----------
 arch/sh/boards/mach-se/7724/setup.c   | 54 ++++++++++++-------
 drivers/video/sh_mipi_dsi.c           | 32 +++++++-----
 drivers/video/sh_mobile_hdmi.c        |  3 +-
 drivers/video/sh_mobile_lcdcfb.c      | 10 ++--
 include/video/sh_mobile_lcdc.h        |  3 +-
 10 files changed, 219 insertions(+), 157 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 31e5b28ab9b4..4e883e0fb01b 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -375,10 +375,40 @@ static struct platform_device usb1_host_device = {
 	.resource	= usb1_host_resources,
 };
 
+const static struct fb_videomode ap4evb_lcdc_modes[] = {
+	{
+#ifdef CONFIG_AP4EVB_QHD
+		.name		= "R63302(QHD)",
+		.xres		= 544,
+		.yres		= 961,
+		.left_margin	= 72,
+		.right_margin	= 600,
+		.hsync_len	= 16,
+		.upper_margin	= 8,
+		.lower_margin	= 8,
+		.vsync_len	= 2,
+		.sync		= FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT,
+#else
+		.name		= "WVGA Panel",
+		.xres		= 800,
+		.yres		= 480,
+		.left_margin	= 220,
+		.right_margin	= 110,
+		.hsync_len	= 70,
+		.upper_margin	= 20,
+		.lower_margin	= 5,
+		.vsync_len	= 5,
+		.sync		= 0,
+#endif
+	},
+};
+
 static struct sh_mobile_lcdc_info lcdc_info = {
 	.ch[0] = {
 		.chan = LCDC_CHAN_MAINLCD,
 		.bpp = 16,
+		.lcd_cfg = ap4evb_lcdc_modes,
+		.num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes),
 	}
 };
 
@@ -569,6 +599,31 @@ static struct platform_device fsi_device = {
 	},
 };
 
+const static struct fb_videomode ap4evb_hdmi_modes[] = {
+	{
+		.name = "HDMI 720p",
+		.xres = 1280,
+		.yres = 720,
+
+		/*
+		 * If left and right margins are not multiples of 8,
+		 * LDHAJR will be adjusted accordingly by the LCDC
+		 * driver. Until we start using EDID, these values
+		 * might have to be adjusted for different monitors.
+		 */
+		.left_margin = 200,
+		.right_margin = 88,
+		.hsync_len = 48,
+
+		.upper_margin = 20,
+		.lower_margin = 5,
+		.vsync_len = 5,
+
+		.pixclock = 13468,
+		.sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT,
+	},
+};
+
 static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = {
 	.clock_source = LCDC_CLK_EXTERNAL,
 	.ch[0] = {
@@ -577,26 +632,8 @@ static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = {
 		.interface_type = RGB24,
 		.clock_divider = 1,
 		.flags = LCDC_FLAGS_DWPOL,
-		.lcd_cfg = {
-			.name = "HDMI",
-			/* So far only 720p is supported */
-			.xres = 1280,
-			.yres = 720,
-			/*
-			 * If left and right margins are not multiples of 8,
-			 * LDHAJR will be adjusted accordingly by the LCDC
-			 * driver. Until we start using EDID, these values
-			 * might have to be adjusted for different monitors.
-			 */
-			.left_margin = 200,
-			.right_margin = 88,
-			.hsync_len = 48,
-			.upper_margin = 20,
-			.lower_margin = 5,
-			.vsync_len = 5,
-			.pixclock = 13468,
-			.sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT,
-		},
+		.lcd_cfg = ap4evb_hdmi_modes,
+		.num_cfg = ARRAY_SIZE(ap4evb_hdmi_modes),
 	}
 };
 
@@ -960,17 +997,6 @@ static void __init ap4evb_init(void)
 	lcdc_info.ch[0].interface_type		= RGB24;
 	lcdc_info.ch[0].clock_divider		= 1;
 	lcdc_info.ch[0].flags			= LCDC_FLAGS_DWPOL;
-	lcdc_info.ch[0].lcd_cfg.name		= "R63302(QHD)";
-	lcdc_info.ch[0].lcd_cfg.xres		= 544;
-	lcdc_info.ch[0].lcd_cfg.yres		= 961;
-	lcdc_info.ch[0].lcd_cfg.left_margin	= 72;
-	lcdc_info.ch[0].lcd_cfg.right_margin	= 600;
-	lcdc_info.ch[0].lcd_cfg.hsync_len	= 16;
-	lcdc_info.ch[0].lcd_cfg.upper_margin	= 8;
-	lcdc_info.ch[0].lcd_cfg.lower_margin	= 8;
-	lcdc_info.ch[0].lcd_cfg.vsync_len	= 2;
-	lcdc_info.ch[0].lcd_cfg.sync		= FB_SYNC_VERT_HIGH_ACT |
-						  FB_SYNC_HOR_HIGH_ACT;
 	lcdc_info.ch[0].lcd_size_cfg.width	= 44;
 	lcdc_info.ch[0].lcd_size_cfg.height	= 79;
 
@@ -1013,16 +1039,6 @@ static void __init ap4evb_init(void)
 	lcdc_info.ch[0].interface_type		= RGB18;
 	lcdc_info.ch[0].clock_divider		= 2;
 	lcdc_info.ch[0].flags			= 0;
-	lcdc_info.ch[0].lcd_cfg.name		= "WVGA Panel";
-	lcdc_info.ch[0].lcd_cfg.xres		= 800;
-	lcdc_info.ch[0].lcd_cfg.yres		= 480;
-	lcdc_info.ch[0].lcd_cfg.left_margin	= 220;
-	lcdc_info.ch[0].lcd_cfg.right_margin	= 110;
-	lcdc_info.ch[0].lcd_cfg.hsync_len	= 70;
-	lcdc_info.ch[0].lcd_cfg.upper_margin	= 20;
-	lcdc_info.ch[0].lcd_cfg.lower_margin	= 5;
-	lcdc_info.ch[0].lcd_cfg.vsync_len	= 5;
-	lcdc_info.ch[0].lcd_cfg.sync		= 0;
 	lcdc_info.ch[0].lcd_size_cfg.width	= 152;
 	lcdc_info.ch[0].lcd_size_cfg.height	= 91;
 
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 3da116f47f01..00553233a7c5 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -176,6 +176,21 @@ static void ap320_wvga_power_off(void *board_data)
 	__raw_writew(0, FPGA_LCDREG);
 }
 
+const static struct fb_videomode ap325rxa_lcdc_modes[] = {
+	{
+		.name = "LB070WV1",
+		.xres = 800,
+		.yres = 480,
+		.left_margin = 32,
+		.right_margin = 160,
+		.hsync_len = 8,
+		.upper_margin = 63,
+		.lower_margin = 80,
+		.vsync_len = 1,
+		.sync = 0, /* hsync and vsync are active low */
+	},
+};
+
 static struct sh_mobile_lcdc_info lcdc_info = {
 	.clock_source = LCDC_CLK_EXTERNAL,
 	.ch[0] = {
@@ -183,18 +198,8 @@ static struct sh_mobile_lcdc_info lcdc_info = {
 		.bpp = 16,
 		.interface_type = RGB18,
 		.clock_divider = 1,
-		.lcd_cfg = {
-			.name = "LB070WV1",
-			.xres = 800,
-			.yres = 480,
-			.left_margin = 32,
-			.right_margin = 160,
-			.hsync_len = 8,
-			.upper_margin = 63,
-			.lower_margin = 80,
-			.vsync_len = 1,
-			.sync = 0, /* hsync and vsync are active low */
-		},
+		.lcd_cfg = ap325rxa_lcdc_modes,
+		.num_cfg = ARRAY_SIZE(ap325rxa_lcdc_modes),
 		.lcd_size_cfg = { /* 7.0 inch */
 			.width = 152,
 			.height = 91,
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 1d7b495a7db4..feadf5dada77 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -231,14 +231,41 @@ static struct platform_device usb1_common_device = {
 };
 
 /* LCDC */
+const static struct fb_videomode ecovec_lcd_modes[] = {
+	{
+		.name		= "Panel",
+		.xres		= 800,
+		.yres		= 480,
+		.left_margin	= 220,
+		.right_margin	= 110,
+		.hsync_len	= 70,
+		.upper_margin	= 20,
+		.lower_margin	= 5,
+		.vsync_len	= 5,
+		.sync		= 0, /* hsync and vsync are active low */
+	},
+};
+
+const static struct fb_videomode ecovec_dvi_modes[] = {
+	{
+		.name		= "DVI",
+		.xres		= 1280,
+		.yres		= 720,
+		.left_margin	= 220,
+		.right_margin	= 110,
+		.hsync_len	= 40,
+		.upper_margin	= 20,
+		.lower_margin	= 5,
+		.vsync_len	= 5,
+		.sync = 0, /* hsync and vsync are active low */
+	},
+};
+
 static struct sh_mobile_lcdc_info lcdc_info = {
 	.ch[0] = {
 		.interface_type = RGB18,
 		.chan = LCDC_CHAN_MAINLCD,
 		.bpp = 16,
-		.lcd_cfg = {
-			.sync = 0, /* hsync and vsync are active low */
-		},
 		.lcd_size_cfg = { /* 7.0 inch */
 			.width = 152,
 			.height = 91,
@@ -1079,33 +1106,18 @@ static int __init arch_setup(void)
 	if (gpio_get_value(GPIO_PTE6)) {
 		/* DVI */
 		lcdc_info.clock_source			= LCDC_CLK_EXTERNAL;
-		lcdc_info.ch[0].clock_divider		= 1,
-		lcdc_info.ch[0].lcd_cfg.name		= "DVI";
-		lcdc_info.ch[0].lcd_cfg.xres		= 1280;
-		lcdc_info.ch[0].lcd_cfg.yres		= 720;
-		lcdc_info.ch[0].lcd_cfg.left_margin	= 220;
-		lcdc_info.ch[0].lcd_cfg.right_margin	= 110;
-		lcdc_info.ch[0].lcd_cfg.hsync_len	= 40;
-		lcdc_info.ch[0].lcd_cfg.upper_margin	= 20;
-		lcdc_info.ch[0].lcd_cfg.lower_margin	= 5;
-		lcdc_info.ch[0].lcd_cfg.vsync_len	= 5;
+		lcdc_info.ch[0].clock_divider		= 1;
+		lcdc_info.ch[0].lcd_cfg			= ecovec_dvi_modes;
+		lcdc_info.ch[0].num_cfg			= ARRAY_SIZE(ecovec_dvi_modes);
 
 		gpio_set_value(GPIO_PTA2, 1);
 		gpio_set_value(GPIO_PTU1, 1);
 	} else {
 		/* Panel */
-
 		lcdc_info.clock_source			= LCDC_CLK_PERIPHERAL;
-		lcdc_info.ch[0].clock_divider		= 2,
-		lcdc_info.ch[0].lcd_cfg.name		= "Panel";
-		lcdc_info.ch[0].lcd_cfg.xres		= 800;
-		lcdc_info.ch[0].lcd_cfg.yres		= 480;
-		lcdc_info.ch[0].lcd_cfg.left_margin	= 220;
-		lcdc_info.ch[0].lcd_cfg.right_margin	= 110;
-		lcdc_info.ch[0].lcd_cfg.hsync_len	= 70;
-		lcdc_info.ch[0].lcd_cfg.upper_margin	= 20;
-		lcdc_info.ch[0].lcd_cfg.lower_margin	= 5;
-		lcdc_info.ch[0].lcd_cfg.vsync_len	= 5;
+		lcdc_info.ch[0].clock_divider		= 2;
+		lcdc_info.ch[0].lcd_cfg			= ecovec_lcd_modes;
+		lcdc_info.ch[0].num_cfg			= ARRAY_SIZE(ecovec_lcd_modes);
 
 		gpio_set_value(GPIO_PTR1, 1);
 
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 68994a163f6c..87d4b90e368c 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -126,6 +126,21 @@ static struct platform_device kfr2r09_sh_keysc_device = {
 	},
 };
 
+const static struct fb_videomode kfr2r09_lcdc_modes[] = {
+	{
+		.name = "TX07D34VM0AAA",
+		.xres = 240,
+		.yres = 400,
+		.left_margin = 0,
+		.right_margin = 16,
+		.hsync_len = 8,
+		.upper_margin = 0,
+		.lower_margin = 1,
+		.vsync_len = 1,
+		.sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+	},
+};
+
 static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = {
 	.clock_source = LCDC_CLK_BUS,
 	.ch[0] = {
@@ -134,18 +149,8 @@ static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = {
 		.interface_type = SYS18,
 		.clock_divider = 6,
 		.flags = LCDC_FLAGS_DWPOL,
-		.lcd_cfg = {
-			.name = "TX07D34VM0AAA",
-			.xres = 240,
-			.yres = 400,
-			.left_margin = 0,
-			.right_margin = 16,
-			.hsync_len = 8,
-			.upper_margin = 0,
-			.lower_margin = 1,
-			.vsync_len = 1,
-			.sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
-		},
+		.lcd_cfg = kfr2r09_lcdc_modes,
+		.num_cfg = ARRAY_SIZE(kfr2r09_lcdc_modes),
 		.lcd_size_cfg = {
 			.width = 35,
 			.height = 58,
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 662debe4ead2..9204cbb87147 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -213,51 +213,55 @@ static struct platform_device migor_nand_flash_device = {
 	}
 };
 
+const static struct fb_videomode migor_lcd_modes[] = {
+	{
+#if defined(CONFIG_SH_MIGOR_RTA_WVGA)
+		.name = "LB070WV1",
+		.xres = 800,
+		.yres = 480,
+		.left_margin = 64,
+		.right_margin = 16,
+		.hsync_len = 120,
+		.sync = 0,
+#elif defined(CONFIG_SH_MIGOR_QVGA)
+		.name = "PH240320T",
+		.xres = 320,
+		.yres = 240,
+		.left_margin = 0,
+		.right_margin = 16,
+		.hsync_len = 8,
+		.sync = FB_SYNC_HOR_HIGH_ACT,
+#endif
+		.upper_margin = 1,
+		.lower_margin = 17,
+		.vsync_len = 2,
+	},
+};
+
 static struct sh_mobile_lcdc_info sh_mobile_lcdc_info = {
-#ifdef CONFIG_SH_MIGOR_RTA_WVGA
+#if defined(CONFIG_SH_MIGOR_RTA_WVGA)
 	.clock_source = LCDC_CLK_BUS,
 	.ch[0] = {
 		.chan = LCDC_CHAN_MAINLCD,
 		.bpp = 16,
 		.interface_type = RGB16,
 		.clock_divider = 2,
-		.lcd_cfg = {
-			.name = "LB070WV1",
-			.xres = 800,
-			.yres = 480,
-			.left_margin = 64,
-			.right_margin = 16,
-			.hsync_len = 120,
-			.upper_margin = 1,
-			.lower_margin = 17,
-			.vsync_len = 2,
-			.sync = 0,
-		},
+		.lcd_cfg = migor_lcd_modes,
+		.num_cfg = ARRAY_SIZE(migor_lcd_modes),
 		.lcd_size_cfg = { /* 7.0 inch */
 			.width = 152,
 			.height = 91,
 		},
 	}
-#endif
-#ifdef CONFIG_SH_MIGOR_QVGA
+#elif defined(CONFIG_SH_MIGOR_QVGA)
 	.clock_source = LCDC_CLK_PERIPHERAL,
 	.ch[0] = {
 		.chan = LCDC_CHAN_MAINLCD,
 		.bpp = 16,
 		.interface_type = SYS16A,
 		.clock_divider = 10,
-		.lcd_cfg = {
-			.name = "PH240320T",
-			.xres = 320,
-			.yres = 240,
-			.left_margin = 0,
-			.right_margin = 16,
-			.hsync_len = 8,
-			.upper_margin = 1,
-			.lower_margin = 17,
-			.vsync_len = 2,
-			.sync = FB_SYNC_HOR_HIGH_ACT,
-		},
+		.lcd_cfg = migor_lcd_modes,
+		.num_cfg = ARRAY_SIZE(migor_lcd_modes),
 		.lcd_size_cfg = { /* 2.4 inch */
 			.width = 49,
 			.height = 37,
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 552ebd9ba82b..3099c36759ad 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -144,16 +144,42 @@ static struct platform_device nor_flash_device = {
 };
 
 /* LCDC */
+const static struct fb_videomode lcdc_720p_modes[] = {
+	{
+		.name		= "LB070WV1",
+		.sync		= 0, /* hsync and vsync are active low */
+		.xres		= 1280;
+		.yres		= 720;
+		.left_margin	= 220;
+		.right_margin	= 110;
+		.hsync_len	= 40;
+		.upper_margin	= 20;
+		.lower_margin	= 5;
+		.vsync_len	= 5;
+	},
+};
+
+const static struct fb_videomode lcdc_vga_modes[] = {
+	{
+		.name		= "LB070WV1",
+		.sync		= 0, /* hsync and vsync are active low */
+		.xres		= 640;
+		.yres		= 480;
+		.left_margin	= 105;
+		.right_margin	= 50;
+		.hsync_len	= 96;
+		.upper_margin	= 33;
+		.lower_margin	= 10;
+		.vsync_len	= 2;
+	},
+};
+
 static struct sh_mobile_lcdc_info lcdc_info = {
 	.clock_source = LCDC_CLK_EXTERNAL,
 	.ch[0] = {
 		.chan = LCDC_CHAN_MAINLCD,
 		.bpp = 16,
 		.clock_divider = 1,
-		.lcd_cfg = {
-			.name = "LB070WV1",
-			.sync = 0, /* hsync and vsync are active low */
-		},
 		.lcd_size_cfg = { /* 7.0 inch */
 			.width = 152,
 			.height = 91,
@@ -909,24 +935,12 @@ static int __init devices_setup(void)
 
 	if (sw & SW41_B) {
 		/* 720p */
-		lcdc_info.ch[0].lcd_cfg.xres         = 1280;
-		lcdc_info.ch[0].lcd_cfg.yres         = 720;
-		lcdc_info.ch[0].lcd_cfg.left_margin  = 220;
-		lcdc_info.ch[0].lcd_cfg.right_margin = 110;
-		lcdc_info.ch[0].lcd_cfg.hsync_len    = 40;
-		lcdc_info.ch[0].lcd_cfg.upper_margin = 20;
-		lcdc_info.ch[0].lcd_cfg.lower_margin = 5;
-		lcdc_info.ch[0].lcd_cfg.vsync_len    = 5;
+		lcdc_info.ch[0].lcd_cfg	= lcdc_720p_modes;
+		lcdc_info.ch[0].num_cfg	= ARRAY_SIZE(lcdc_720p_modes);
 	} else {
 		/* VGA */
-		lcdc_info.ch[0].lcd_cfg.xres         = 640;
-		lcdc_info.ch[0].lcd_cfg.yres         = 480;
-		lcdc_info.ch[0].lcd_cfg.left_margin  = 105;
-		lcdc_info.ch[0].lcd_cfg.right_margin = 50;
-		lcdc_info.ch[0].lcd_cfg.hsync_len    = 96;
-		lcdc_info.ch[0].lcd_cfg.upper_margin = 33;
-		lcdc_info.ch[0].lcd_cfg.lower_margin = 10;
-		lcdc_info.ch[0].lcd_cfg.vsync_len    = 2;
+		lcdc_info.ch[0].lcd_cfg	= lcdc_vga_modes;
+		lcdc_info.ch[0].num_cfg	= ARRAY_SIZE(lcdc_vga_modes);
 	}
 
 	if (sw & SW41_A) {
diff --git a/drivers/video/sh_mipi_dsi.c b/drivers/video/sh_mipi_dsi.c
index 5699ce0c1780..3f3d431033ca 100644
--- a/drivers/video/sh_mipi_dsi.c
+++ b/drivers/video/sh_mipi_dsi.c
@@ -123,83 +123,87 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi,
 	u32 linelength;
 	bool yuv;
 
-	/* Select data format */
+	/*
+	 * Select data format. MIPI DSI is not hot-pluggable, so, we just use
+	 * the default videomode. If this ever becomes a problem, We'll have to
+	 * move this to mipi_display_on() above and use info->var.xres
+	 */
 	switch (pdata->data_format) {
 	case MIPI_RGB888:
 		pctype = 0;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24;
 		pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
-		linelength = ch->lcd_cfg.xres * 3;
+		linelength = ch->lcd_cfg[0].xres * 3;
 		yuv = false;
 		break;
 	case MIPI_RGB565:
 		pctype = 1;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16;
 		pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
-		linelength = ch->lcd_cfg.xres * 2;
+		linelength = ch->lcd_cfg[0].xres * 2;
 		yuv = false;
 		break;
 	case MIPI_RGB666_LP:
 		pctype = 2;
 		datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
 		pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
-		linelength = ch->lcd_cfg.xres * 3;
+		linelength = ch->lcd_cfg[0].xres * 3;
 		yuv = false;
 		break;
 	case MIPI_RGB666:
 		pctype = 3;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18;
 		pixfmt = MIPI_DCS_PIXEL_FMT_18BIT;
-		linelength = (ch->lcd_cfg.xres * 18 + 7) / 8;
+		linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8;
 		yuv = false;
 		break;
 	case MIPI_BGR888:
 		pctype = 8;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24;
 		pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
-		linelength = ch->lcd_cfg.xres * 3;
+		linelength = ch->lcd_cfg[0].xres * 3;
 		yuv = false;
 		break;
 	case MIPI_BGR565:
 		pctype = 9;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16;
 		pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
-		linelength = ch->lcd_cfg.xres * 2;
+		linelength = ch->lcd_cfg[0].xres * 2;
 		yuv = false;
 		break;
 	case MIPI_BGR666_LP:
 		pctype = 0xa;
 		datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
 		pixfmt = MIPI_DCS_PIXEL_FMT_24BIT;
-		linelength = ch->lcd_cfg.xres * 3;
+		linelength = ch->lcd_cfg[0].xres * 3;
 		yuv = false;
 		break;
 	case MIPI_BGR666:
 		pctype = 0xb;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18;
 		pixfmt = MIPI_DCS_PIXEL_FMT_18BIT;
-		linelength = (ch->lcd_cfg.xres * 18 + 7) / 8;
+		linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8;
 		yuv = false;
 		break;
 	case MIPI_YUYV:
 		pctype = 4;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16;
 		pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
-		linelength = ch->lcd_cfg.xres * 2;
+		linelength = ch->lcd_cfg[0].xres * 2;
 		yuv = true;
 		break;
 	case MIPI_UYVY:
 		pctype = 5;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16;
 		pixfmt = MIPI_DCS_PIXEL_FMT_16BIT;
-		linelength = ch->lcd_cfg.xres * 2;
+		linelength = ch->lcd_cfg[0].xres * 2;
 		yuv = true;
 		break;
 	case MIPI_YUV420_L:
 		pctype = 6;
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12;
 		pixfmt = MIPI_DCS_PIXEL_FMT_12BIT;
-		linelength = (ch->lcd_cfg.xres * 12 + 7) / 8;
+		linelength = (ch->lcd_cfg[0].xres * 12 + 7) / 8;
 		yuv = true;
 		break;
 	case MIPI_YUV420:
@@ -207,7 +211,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi,
 		datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12;
 		pixfmt = MIPI_DCS_PIXEL_FMT_12BIT;
 		/* Length of U/V line */
-		linelength = (ch->lcd_cfg.xres + 1) / 2;
+		linelength = (ch->lcd_cfg[0].xres + 1) / 2;
 		yuv = true;
 		break;
 	default:
@@ -281,7 +285,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi,
 	iowrite32(0x00e00000, base + 0x8024); /* VMCTR2 */
 	/*
 	 * 0x660 = 1632 bytes per line (RGB24, 544 pixels: see
-	 * sh_mobile_lcdc_info.ch[0].lcd_cfg.xres), HSALEN = 1 - default
+	 * sh_mobile_lcdc_info.ch[0].lcd_cfg[0].xres), HSALEN = 1 - default
 	 * (unused, since VMCTR2[HSABM] = 0)
 	 */
 	iowrite32(1 | (linelength << 16), base + 0x8028); /* VMLEN1 */
diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c
index a8117c33e75f..6608429c0fbb 100644
--- a/drivers/video/sh_mobile_hdmi.c
+++ b/drivers/video/sh_mobile_hdmi.c
@@ -777,7 +777,8 @@ static int __init sh_hdmi_probe(struct platform_device *pdev)
 		goto egetclk;
 	}
 
-	rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg.pixclock) * 1000;
+	/* TODO: reconfigure the clock on monitor plug in */
+	rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg[0].pixclock) * 1000;
 
 	rate = clk_round_rate(hdmi->hdmi_clk, rate);
 	if (rate < 0) {
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index e4f6012fe4e3..ce3ed4bc991f 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -989,8 +989,8 @@ static int sh_mobile_lcdc_notify(struct notifier_block *nb,
 			int ret;
 
 			/* Can we handle this display? */
-			if (var->xres > ch->cfg.lcd_cfg.xres ||
-			    var->yres > ch->cfg.lcd_cfg.yres)
+			if (var->xres > ch->cfg.lcd_cfg[0].xres ||
+			    var->yres > ch->cfg.lcd_cfg[0].yres)
 				return -ENOMEM;
 
 			/* Add to the modelist */
@@ -1115,7 +1115,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
 
 		info = ch->info;
 		var = &info->var;
-		lcd_cfg = &cfg->lcd_cfg;
+		lcd_cfg = &cfg->lcd_cfg[0];
 		info->fbops = &sh_mobile_lcdc_ops;
 		fb_videomode_to_var(var, lcd_cfg);
 		/* Default Y virtual resolution is 2x panel size */
@@ -1187,8 +1187,8 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
 			 pdev->name,
 			 (ch->cfg.chan == LCDC_CHAN_MAINLCD) ?
 			 "mainlcd" : "sublcd",
-			 (int) ch->cfg.lcd_cfg.xres,
-			 (int) ch->cfg.lcd_cfg.yres,
+			 (int) ch->cfg.lcd_cfg[0].xres,
+			 (int) ch->cfg.lcd_cfg[0].yres,
 			 ch->cfg.bpp);
 
 		/* deferred io mode: disable clock to save power */
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 55d700e8566e..19c69d788f3b 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -70,7 +70,8 @@ struct sh_mobile_lcdc_chan_cfg {
 	int interface_type; /* selects RGBn or SYSn I/F, see above */
 	int clock_divider;
 	unsigned long flags; /* LCDC_FLAGS_... */
-	struct fb_videomode lcd_cfg;
+	const struct fb_videomode *lcd_cfg;
+	int num_cfg;
 	struct sh_mobile_lcdc_lcd_size_cfg lcd_size_cfg;
 	struct sh_mobile_lcdc_board_cfg board_cfg;
 	struct sh_mobile_lcdc_sys_bus_cfg sys_bus_cfg; /* only for SYSn I/F */
-- 
cgit v1.2.3


From 6de9edd5bde0cdfea12e9948690e53ec669c3018 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 3 Sep 2010 07:20:23 +0000
Subject: fbdev: sh_mobile_hdmi: implement locking

The SH-Mobile HDMI driver runs in several contexts: ISR, delayed work-queue,
task context, when called from the sh_mobile_lcdc framebuffer driver. This
creates ample race possibilities. Even though most these races are purely
theoretical, it is better to close them. To trace fb_info validity we install a
notification callback in the HDMI driver, and the only way for it to get to
driver internal data is by using struct sh_mobile_lcdc_chan, therefore it had
to be extracted into a separate common header.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/sh_mobile_hdmi.c   | 103 +++++++++++++++++++++++++++++++++------
 drivers/video/sh_mobile_lcdcfb.c |  46 ++++++-----------
 drivers/video/sh_mobile_lcdcfb.h |  37 ++++++++++++++
 include/video/sh_mobile_lcdc.h   |   2 +
 4 files changed, 141 insertions(+), 47 deletions(-)
 create mode 100644 drivers/video/sh_mobile_lcdcfb.h

(limited to 'include')

diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c
index a8cf9a510f30..56e44fd0a3af 100644
--- a/drivers/video/sh_mobile_hdmi.c
+++ b/drivers/video/sh_mobile_hdmi.c
@@ -26,6 +26,8 @@
 #include <video/sh_mobile_hdmi.h>
 #include <video/sh_mobile_lcdc.h>
 
+#include "sh_mobile_lcdcfb.h"
+
 #define HDMI_SYSTEM_CTRL			0x00 /* System control */
 #define HDMI_L_R_DATA_SWAP_CTRL_RPKT		0x01 /* L/R data swap control,
 							bits 19..16 of 20-bit N for Audio Clock Regeneration packet */
@@ -209,6 +211,7 @@ struct sh_hdmi {
 	struct clk *hdmi_clk;
 	struct device *dev;
 	struct fb_info *info;
+	struct mutex mutex;		/* Protect the info pointer */
 	struct delayed_work edid_work;
 	struct fb_var_screeninfo var;
 };
@@ -720,17 +723,21 @@ static irqreturn_t sh_hdmi_hotplug(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/* locking:	called with info->lock held, or before register_framebuffer() */
 static void hdmi_display_on(void *arg, struct fb_info *info)
 {
+	/*
+	 * info is guaranteed to be valid, when we are called, because our
+	 * FB_EVENT_FB_UNBIND notify is also called with info->lock held
+	 */
 	struct sh_hdmi *hdmi = arg;
 	struct sh_mobile_hdmi_info *pdata = hdmi->dev->platform_data;
 
 	pr_debug("%s(%p): state %x\n", __func__, pdata->lcd_dev, info->state);
-	/*
-	 * FIXME: not a good place to store fb_info. And we cannot nullify it
-	 * even on monitor disconnect. What should the lifecycle be?
-	 */
+
+	/* No need to lock */
 	hdmi->info = info;
+
 	switch (hdmi->hp_state) {
 	case HDMI_HOTPLUG_EDID_DONE:
 		/* PS mode d->e. All functions are active */
@@ -744,6 +751,7 @@ static void hdmi_display_on(void *arg, struct fb_info *info)
 	}
 }
 
+/* locking: called with info->lock held */
 static void hdmi_display_off(void *arg)
 {
 	struct sh_hdmi *hdmi = arg;
@@ -766,6 +774,8 @@ static void edid_work_fn(struct work_struct *work)
 	if (!pdata->lcd_dev)
 		return;
 
+	mutex_lock(&hdmi->mutex);
+
 	if (hdmi->hp_state == HDMI_HOTPLUG_EDID_DONE) {
 		pm_runtime_get_sync(hdmi->dev);
 		/* A device has been plugged in */
@@ -776,21 +786,25 @@ static void edid_work_fn(struct work_struct *work)
 		msleep(10);
 
 		if (!hdmi->info)
-			return;
+			goto out;
 
 		acquire_console_sem();
 
 		/* HDMI plug in */
 		hdmi->info->var = hdmi->var;
-		if (hdmi->info->state != FBINFO_STATE_RUNNING)
+		if (hdmi->info->state != FBINFO_STATE_RUNNING) {
 			fb_set_suspend(hdmi->info, 0);
-		else
-			hdmi_display_on(hdmi, hdmi->info);
+		} else {
+			if (lock_fb_info(hdmi->info)) {
+				hdmi_display_on(hdmi, hdmi->info);
+				unlock_fb_info(hdmi->info);
+			}
+		}
 
 		release_console_sem();
 	} else {
 		if (!hdmi->info)
-			return;
+			goto out;
 
 		acquire_console_sem();
 
@@ -801,13 +815,61 @@ static void edid_work_fn(struct work_struct *work)
 		pm_runtime_put(hdmi->dev);
 	}
 
+out:
+	mutex_unlock(&hdmi->mutex);
+
 	pr_debug("%s(%p): end\n", __func__, pdata->lcd_dev);
 }
 
+static int sh_hdmi_notify(struct notifier_block *nb,
+			  unsigned long action, void *data);
+
+static struct notifier_block sh_hdmi_notifier = {
+	.notifier_call = sh_hdmi_notify,
+};
+
+static int sh_hdmi_notify(struct notifier_block *nb,
+			  unsigned long action, void *data)
+{
+	struct fb_event *event = data;
+	struct fb_info *info = event->info;
+	struct sh_mobile_lcdc_chan *ch = info->par;
+	struct sh_mobile_lcdc_board_cfg	*board_cfg = &ch->cfg.board_cfg;
+	struct sh_hdmi *hdmi = board_cfg->board_data;
+
+	if (nb != &sh_hdmi_notifier || !hdmi || hdmi->info != info)
+		return NOTIFY_DONE;
+
+	switch(action) {
+	case FB_EVENT_FB_REGISTERED:
+		/* Unneeded, activation taken care by hdmi_display_on() */
+		break;
+	case FB_EVENT_FB_UNREGISTERED:
+		/*
+		 * We are called from unregister_framebuffer() with the
+		 * info->lock held. This is bad for us, because we can race with
+		 * the scheduled work, which has to call fb_set_suspend(), which
+		 * takes info->lock internally, so, edid_work_fn() cannot take
+		 * and hold info->lock for the whole function duration. Using an
+		 * additional lock creates a classical AB-BA lock up. Therefore,
+		 * we have to release the info->lock temporarily, synchronise
+		 * with the work queue and re-acquire the info->lock.
+		 */
+		unlock_fb_info(hdmi->info);
+		mutex_lock(&hdmi->mutex);
+		hdmi->info = NULL;
+		mutex_unlock(&hdmi->mutex);
+		lock_fb_info(hdmi->info);
+		return NOTIFY_OK;
+	}
+	return NOTIFY_DONE;
+}
+
 static int __init sh_hdmi_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_hdmi_info *pdata = pdev->dev.platform_data;
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct sh_mobile_lcdc_board_cfg	*board_cfg;
 	int irq = platform_get_irq(pdev, 0), ret;
 	struct sh_hdmi *hdmi;
 	long rate;
@@ -821,6 +883,7 @@ static int __init sh_hdmi_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	mutex_init(&hdmi->mutex);
 	hdmi->dev = &pdev->dev;
 
 	hdmi->hdmi_clk = clk_get(&pdev->dev, "ick");
@@ -878,9 +941,11 @@ static int __init sh_hdmi_probe(struct platform_device *pdev)
 #endif
 
 	/* Set up LCDC callbacks */
-	pdata->lcd_chan->board_cfg.board_data = hdmi;
-	pdata->lcd_chan->board_cfg.display_on = hdmi_display_on;
-	pdata->lcd_chan->board_cfg.display_off = hdmi_display_off;
+	board_cfg = &pdata->lcd_chan->board_cfg;
+	board_cfg->owner = THIS_MODULE;
+	board_cfg->board_data = hdmi;
+	board_cfg->display_on = hdmi_display_on;
+	board_cfg->display_off = hdmi_display_off;
 
 	INIT_DELAYED_WORK(&hdmi->edid_work, edid_work_fn);
 
@@ -907,6 +972,7 @@ eclkenable:
 erate:
 	clk_put(hdmi->hdmi_clk);
 egetclk:
+	mutex_destroy(&hdmi->mutex);
 	kfree(hdmi);
 
 	return ret;
@@ -917,19 +983,24 @@ static int __exit sh_hdmi_remove(struct platform_device *pdev)
 	struct sh_mobile_hdmi_info *pdata = pdev->dev.platform_data;
 	struct sh_hdmi *hdmi = platform_get_drvdata(pdev);
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct sh_mobile_lcdc_board_cfg	*board_cfg = &pdata->lcd_chan->board_cfg;
 	int irq = platform_get_irq(pdev, 0);
 
-	pdata->lcd_chan->board_cfg.display_on = NULL;
-	pdata->lcd_chan->board_cfg.display_off = NULL;
-	pdata->lcd_chan->board_cfg.board_data = NULL;
+	board_cfg->display_on = NULL;
+	board_cfg->display_off = NULL;
+	board_cfg->board_data = NULL;
+	board_cfg->owner = NULL;
 
+	/* No new work will be scheduled, wait for running ISR */
 	free_irq(irq, hdmi);
-	pm_runtime_disable(&pdev->dev);
+	/* Wait for already scheduled work */
 	cancel_delayed_work_sync(&hdmi->edid_work);
+	pm_runtime_disable(&pdev->dev);
 	clk_disable(hdmi->hdmi_clk);
 	clk_put(hdmi->hdmi_clk);
 	iounmap(hdmi->base);
 	release_mem_region(res->start, resource_size(res));
+	mutex_destroy(&hdmi->mutex);
 	kfree(hdmi);
 
 	return 0;
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index ddd6c4669bd7..29d7ce7e7a1c 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
-#include <linux/fb.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
@@ -24,7 +23,8 @@
 #include <video/sh_mobile_lcdc.h>
 #include <asm/atomic.h>
 
-#define PALETTE_NR 16
+#include "sh_mobile_lcdcfb.h"
+
 #define SIDE_B_OFFSET 0x1000
 #define MIRROR_OFFSET 0x2000
 
@@ -53,12 +53,6 @@ static int lcdc_shared_regs[] = {
 };
 #define NR_SHARED_REGS ARRAY_SIZE(lcdc_shared_regs)
 
-/* per-channel registers */
-enum { LDDCKPAT1R, LDDCKPAT2R, LDMT1R, LDMT2R, LDMT3R, LDDFR, LDSM1R,
-       LDSM2R, LDSA1R, LDMLSR, LDHCNR, LDHSYNR, LDVLNR, LDVSYNR, LDPMR,
-       LDHAJR,
-       NR_CH_REGS };
-
 static unsigned long lcdc_offs_mainlcd[NR_CH_REGS] = {
 	[LDDCKPAT1R] = 0x400,
 	[LDDCKPAT2R] = 0x404,
@@ -112,25 +106,6 @@ static unsigned long lcdc_offs_sublcd[NR_CH_REGS] = {
 #define LDRCNTR_MRC	0x00000001
 #define LDSR_MRS	0x00000100
 
-struct sh_mobile_lcdc_priv;
-struct sh_mobile_lcdc_chan {
-	struct sh_mobile_lcdc_priv *lcdc;
-	unsigned long *reg_offs;
-	unsigned long ldmt1r_value;
-	unsigned long enabled; /* ME and SE in LDCNT2R */
-	struct sh_mobile_lcdc_chan_cfg cfg;
-	u32 pseudo_palette[PALETTE_NR];
-	unsigned long saved_ch_regs[NR_CH_REGS];
-	struct fb_info *info;
-	dma_addr_t dma_handle;
-	struct fb_deferred_io defio;
-	struct scatterlist *sglist;
-	unsigned long frame_end;
-	unsigned long pan_offset;
-	wait_queue_head_t frame_end_wait;
-	struct completion vsync_completion;
-};
-
 struct sh_mobile_lcdc_priv {
 	void __iomem *base;
 	int irq;
@@ -589,8 +564,10 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 			continue;
 
 		board_cfg = &ch->cfg.board_cfg;
-		if (board_cfg->display_on)
+		if (try_module_get(board_cfg->owner) && board_cfg->display_on) {
 			board_cfg->display_on(board_cfg->board_data, ch->info);
+			module_put(board_cfg->owner);
+		}
 	}
 
 	return 0;
@@ -622,8 +599,10 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
 		}
 
 		board_cfg = &ch->cfg.board_cfg;
-		if (board_cfg->display_off)
+		if (try_module_get(board_cfg->owner) && board_cfg->display_off) {
 			board_cfg->display_off(board_cfg->board_data);
+			module_put(board_cfg->owner);
+		}
 	}
 
 	/* stop the lcdc */
@@ -954,6 +933,7 @@ static const struct dev_pm_ops sh_mobile_lcdc_dev_pm_ops = {
 	.runtime_resume = sh_mobile_lcdc_runtime_resume,
 };
 
+/* locking: called with info->lock held */
 static int sh_mobile_lcdc_notify(struct notifier_block *nb,
 				 unsigned long action, void *data)
 {
@@ -971,16 +951,20 @@ static int sh_mobile_lcdc_notify(struct notifier_block *nb,
 
 	switch(action) {
 	case FB_EVENT_SUSPEND:
-		if (board_cfg->display_off)
+		if (try_module_get(board_cfg->owner) && board_cfg->display_off) {
 			board_cfg->display_off(board_cfg->board_data);
+			module_put(board_cfg->owner);
+		}
 		pm_runtime_put(info->device);
 		break;
 	case FB_EVENT_RESUME:
 		var = &info->var;
 
 		/* HDMI must be enabled before LCDC configuration */
-		if (board_cfg->display_on)
+		if (try_module_get(board_cfg->owner) && board_cfg->display_on) {
 			board_cfg->display_on(board_cfg->board_data, ch->info);
+			module_put(board_cfg->owner);
+		}
 
 		/* Check if the new display is not in our modelist */
 		if (ch->info->modelist.next &&
diff --git a/drivers/video/sh_mobile_lcdcfb.h b/drivers/video/sh_mobile_lcdcfb.h
new file mode 100644
index 000000000000..6fcfc0ffe3f8
--- /dev/null
+++ b/drivers/video/sh_mobile_lcdcfb.h
@@ -0,0 +1,37 @@
+#ifndef SH_MOBILE_LCDCFB_H
+#define SH_MOBILE_LCDCFB_H
+
+#include <linux/completion.h>
+#include <linux/fb.h>
+#include <linux/wait.h>
+
+/* per-channel registers */
+enum { LDDCKPAT1R, LDDCKPAT2R, LDMT1R, LDMT2R, LDMT3R, LDDFR, LDSM1R,
+       LDSM2R, LDSA1R, LDMLSR, LDHCNR, LDHSYNR, LDVLNR, LDVSYNR, LDPMR,
+       LDHAJR,
+       NR_CH_REGS };
+
+#define PALETTE_NR 16
+
+struct sh_mobile_lcdc_priv;
+struct fb_info;
+
+struct sh_mobile_lcdc_chan {
+	struct sh_mobile_lcdc_priv *lcdc;
+	unsigned long *reg_offs;
+	unsigned long ldmt1r_value;
+	unsigned long enabled; /* ME and SE in LDCNT2R */
+	struct sh_mobile_lcdc_chan_cfg cfg;
+	u32 pseudo_palette[PALETTE_NR];
+	unsigned long saved_ch_regs[NR_CH_REGS];
+	struct fb_info *info;
+	dma_addr_t dma_handle;
+	struct fb_deferred_io defio;
+	struct scatterlist *sglist;
+	unsigned long frame_end;
+	unsigned long pan_offset;
+	wait_queue_head_t frame_end_wait;
+	struct completion vsync_completion;
+};
+
+#endif
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 19c69d788f3b..daabae5817c6 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -49,7 +49,9 @@ struct sh_mobile_lcdc_sys_bus_ops {
 	unsigned long (*read_data)(void *handle);
 };
 
+struct module;
 struct sh_mobile_lcdc_board_cfg {
+	struct module *owner;
 	void *board_data;
 	int (*setup_sys)(void *board_data, void *sys_ops_handle,
 			 struct sh_mobile_lcdc_sys_bus_ops *sys_ops);
-- 
cgit v1.2.3


From 930a9e283516a3a3595c0c515113f1b78d07f695 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 14 Sep 2010 11:07:23 +0100
Subject: drm: Use a nondestructive mode for output detect when polling (v2)

v2: Julien Cristau pointed out that @nondestructive results in
double-negatives and confusion when trying to interpret the parameter,
so use @force instead. Much easier to type as well. ;-)

And fix the miscompilation of vmgfx reported by Sedat Dilek.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c           |  4 ++--
 drivers/gpu/drm/i915/intel_crt.c            |  5 ++---
 drivers/gpu/drm/i915/intel_dp.c             |  3 +--
 drivers/gpu/drm/i915/intel_dvo.c            |  3 +--
 drivers/gpu/drm/i915/intel_hdmi.c           |  3 +--
 drivers/gpu/drm/i915/intel_lvds.c           |  5 ++---
 drivers/gpu/drm/i915/intel_sdvo.c           |  5 ++---
 drivers/gpu/drm/i915/intel_tv.c             |  5 ++---
 drivers/gpu/drm/nouveau/nouveau_connector.c |  8 +++-----
 drivers/gpu/drm/radeon/radeon_connectors.c  | 15 +++++----------
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c         |  6 +++---
 include/drm/drm_crtc.h                      |  9 ++++++++-
 12 files changed, 32 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index fb6b70fc6572..dcbeb98f195a 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		connector->status = connector->funcs->detect(connector, false);
+		connector->status = connector->funcs->detect(connector, true);
 		drm_kms_helper_poll_enable(dev);
 	}
 
@@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work)
 		    !(connector->polled & DRM_CONNECTOR_POLL_HPD))
 			continue;
 
-		status = connector->funcs->detect(connector, true);
+		status = connector->funcs->detect(connector, false);
 		if (old_status != status)
 			changed = true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0350e5d711f8..a02a8df73727 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -401,8 +401,7 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder
 }
 
 static enum drm_connector_status
-intel_crt_detect(struct drm_connector *connector,
-		 bool nondestructive)
+intel_crt_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -421,7 +420,7 @@ intel_crt_detect(struct drm_connector *connector,
 	if (intel_crt_detect_ddc(encoder))
 		return connector_status_connected;
 
-	if (nondestructive)
+	if (!force)
 		return connector->status;
 
 	/* for pre-945g platforms use load detect */
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e1a2a05fb838..1a51ee07de3e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1386,8 +1386,7 @@ ironlake_dp_detect(struct drm_connector *connector)
  * \return false if DP port is disconnected.
  */
 static enum drm_connector_status
-intel_dp_detect(struct drm_connector *connector,
-		bool nondestructive)
+intel_dp_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index f0de1addf8a4..7c9ec1472d46 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -222,8 +222,7 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder,
  * Unimplemented.
  */
 static enum drm_connector_status
-intel_dvo_detect(struct drm_connector *connector,
-		 bool nondestructive)
+intel_dvo_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 2ea123d8d22b..926934a482ec 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -139,8 +139,7 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
 }
 
 static enum drm_connector_status
-intel_hdmi_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_hdmi_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index fb1bed8f4071..6ec39a86ed06 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -446,8 +446,7 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
  * needed, using lid status notification from the input layer.
  */
 static enum drm_connector_status
-intel_lvds_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_lvds_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status = connector_status_connected;
@@ -543,7 +542,7 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
 	 */
 	if (connector)
 		connector->status = connector->funcs->detect(connector,
-							     true);
+							     false);
 
 	/* Don't force modeset on machines where it causes a GPU lockup */
 	if (dmi_check_system(intel_no_modeset_on_lid))
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index db6b6d4b8fae..e8e902d614ed 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev)
 	if (!analog_connector)
 		return false;
 
-	if (analog_connector->funcs->detect(analog_connector, true) ==
+	if (analog_connector->funcs->detect(analog_connector, false) ==
 			connector_status_disconnected)
 		return false;
 
@@ -1487,8 +1487,7 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-intel_sdvo_detect(struct drm_connector *connector,
-		  bool nondestructive)
+intel_sdvo_detect(struct drm_connector *connector, bool force)
 {
 	uint16_t response;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index d20b550c0f55..4a117e318a73 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1341,8 +1341,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
  * we have a pipe programmed in order to probe the TV.
  */
 static enum drm_connector_status
-intel_tv_detect(struct drm_connector *connector,
-		bool nondestructive)
+intel_tv_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_display_mode mode;
 	struct drm_encoder *encoder = intel_attached_encoder(connector);
@@ -1354,7 +1353,7 @@ intel_tv_detect(struct drm_connector *connector,
 
 	if (encoder->crtc && encoder->crtc->enabled) {
 		type = intel_tv_detect_type(intel_tv);
-	} else if (nondestructive) {
+	} else if (force) {
 		struct drm_crtc *crtc;
 		int dpms_mode;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 67d515cb67e0..87186a4bbf03 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -168,8 +168,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-nouveau_connector_detect(struct drm_connector *connector,
-			 bool nondestructive)
+nouveau_connector_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
@@ -247,8 +246,7 @@ detect_analog:
 }
 
 static enum drm_connector_status
-nouveau_connector_detect_lvds(struct drm_connector *connector,
-			      bool nondestructive)
+nouveau_connector_detect_lvds(struct drm_connector *connector, bool force)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -269,7 +267,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector,
 
 	/* Try retrieving EDID via DDC */
 	if (!dev_priv->vbios.fp_no_ddc) {
-		status = nouveau_connector_detect(connector, nondestructive);
+		status = nouveau_connector_detect(connector, force);
 		if (status == connector_status_connected)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 31d309a8e75b..ecc1a8fafbfd 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -482,8 +482,7 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_lvds_detect(struct drm_connector *connector,
-		   bool nondestructive)
+radeon_lvds_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -597,8 +596,7 @@ static int radeon_vga_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_vga_detect(struct drm_connector *connector,
-		  bool nondestructive)
+radeon_vga_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
@@ -696,8 +694,7 @@ static int radeon_tv_mode_valid(struct drm_connector *connector,
 }
 
 static enum drm_connector_status
-radeon_tv_detect(struct drm_connector *connector,
-		 bool nondestructive)
+radeon_tv_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -755,8 +752,7 @@ static int radeon_dvi_get_modes(struct drm_connector *connector)
  * if its shared we have to set the other connector to disconnected.
  */
 static enum drm_connector_status
-radeon_dvi_detect(struct drm_connector *connector,
-		  bool nondestructive)
+radeon_dvi_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
@@ -981,8 +977,7 @@ static int radeon_dp_get_modes(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-radeon_dp_detect(struct drm_connector *connector,
-		 bool nondestructive)
+radeon_dp_detect(struct drm_connector *connector, bool force)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index a527c91c0ba6..7083b1a24df3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -336,7 +336,7 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector)
 
 static enum drm_connector_status
 	vmw_ldu_connector_detect(struct drm_connector *connector,
-				 bool nondestructive)
+				 bool force)
 {
 	if (vmw_connector_to_ldu(connector)->pref_active)
 		return connector_status_connected;
@@ -517,7 +517,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
-	connector->status = vmw_ldu_connector_detect(connector);
+	connector->status = vmw_ldu_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS);
@@ -611,7 +611,7 @@ int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
 			ldu->pref_height = 600;
 			ldu->pref_active = false;
 		}
-		con->status = vmw_ldu_connector_detect(con);
+		con->status = vmw_ldu_connector_detect(con, true);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 5536223fbac8..3e5a51af757c 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -386,8 +386,15 @@ struct drm_connector_funcs {
 	void (*dpms)(struct drm_connector *connector, int mode);
 	void (*save)(struct drm_connector *connector);
 	void (*restore)(struct drm_connector *connector);
+
+	/* Check to see if anything is attached to the connector.
+	 * @force is set to false whilst polling, true when checking the
+	 * connector due to user request. @force can be used by the driver
+	 * to avoid expensive, destructive operations during automated
+	 * probing.
+	 */
 	enum drm_connector_status (*detect)(struct drm_connector *connector,
-					    bool nondestructive);
+					    bool force);
 	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
-- 
cgit v1.2.3


From 2944f45d9db851e186774df7c9cbf075f4a585c6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 14 Sep 2010 18:37:20 +0200
Subject: mac80211: add a note about iterating interfaces during
 add_interface()

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f91fc331369b..19a5cb4a6582 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2293,6 +2293,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted);
  * This function allows the iterator function to sleep, when the iterator
  * function is atomic @ieee80211_iterate_active_interfaces_atomic can
  * be used.
+ * Does not iterate over a new interface during add_interface()
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iterator: the iterator function to call
@@ -2310,6 +2311,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
  * hardware that are currently active and calls the callback for them.
  * This function requires the iterator callback function to be atomic,
  * if that is not desired, use @ieee80211_iterate_active_interfaces instead.
+ * Does not iterate over a new interface during add_interface()
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iterator: the iterator function to call, cannot sleep
-- 
cgit v1.2.3


From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 7 Sep 2010 16:16:18 -0700
Subject: compat: Make compat_alloc_user_space() incorporate the access_ok()

compat_alloc_user_space() expects the caller to independently call
access_ok() to verify the returned area.  A missing call could
introduce problems on some architectures.

This patch incorporates the access_ok() check into
compat_alloc_user_space() and also adds a sanity check on the length.
The existing compat_alloc_user_space() implementations are renamed
arch_compat_alloc_user_space() and are used as part of the
implementation of the new global function.

This patch assumes NULL will cause __get_user()/__put_user() to either
fail or access userspace on all architectures.  This should be
followed by checking the return value of compat_access_user_space()
for NULL in the callers, at which time the access_ok() in the callers
can also be removed.

Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: <stable@kernel.org>
---
 arch/ia64/include/asm/compat.h    |  2 +-
 arch/mips/include/asm/compat.h    |  2 +-
 arch/parisc/include/asm/compat.h  |  2 +-
 arch/powerpc/include/asm/compat.h |  2 +-
 arch/s390/include/asm/compat.h    |  2 +-
 arch/sparc/include/asm/compat.h   |  2 +-
 arch/tile/include/asm/compat.h    |  2 +-
 arch/x86/include/asm/compat.h     |  2 +-
 include/linux/compat.h            |  3 +++
 kernel/compat.c                   | 21 +++++++++++++++++++++
 10 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
index f90edc85b509..9301a2821615 100644
--- a/arch/ia64/include/asm/compat.h
+++ b/arch/ia64/include/asm/compat.h
@@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr)
 }
 
 static __inline__ void __user *
-compat_alloc_user_space (long len)
+arch_compat_alloc_user_space (long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 613f6912dfc1..dbc51065df5b 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = (struct pt_regs *)
 		((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 02b77baa5da6..efa0b60c63fe 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static __inline__ void __user *compat_alloc_user_space(long len)
+static __inline__ void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = &current->thread.regs;
 	return (void __user *)regs->gr[30];
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 396d21a80058..a11d4eac4f97 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current->thread.regs;
 	unsigned long usp = regs->gpr[1];
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 104f2007f097..a875c2f542e1 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -181,7 +181,7 @@ static inline int is_compat_task(void)
 
 #endif
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	unsigned long stack;
 
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 5016f76ea98a..6f57325bb883 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current_thread_info()->kregs;
 	unsigned long usp = regs->u_regs[UREG_I6];
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 5a34da6cdd79..345d81ce44bb 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr)
 	return (long)(int)(long __force)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *)regs->sp - len;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 306160e58b48..1d9cd27c2920 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *)regs->sp - len;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9ddc8780e8db..5778b559d59c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector, unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
 		struct iovec **ret_pointer);
+
+extern void __user *compat_alloc_user_space(unsigned long len);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index e167efce8423..c9e2ec0b34a8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
 
 	return 0;
 }
+
+/*
+ * Allocate user-space memory for the duration of a single system call,
+ * in order to marshall parameters inside a compat thunk.
+ */
+void __user *compat_alloc_user_space(unsigned long len)
+{
+	void __user *ptr;
+
+	/* If len would occupy more than half of the entire compat space... */
+	if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
+		return NULL;
+
+	ptr = arch_compat_alloc_user_space(len);
+
+	if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
+		return NULL;
+
+	return ptr;
+}
+EXPORT_SYMBOL_GPL(compat_alloc_user_space);
-- 
cgit v1.2.3


From 55b1804c678e679e1018671bd40b583eab124689 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 13 Sep 2010 18:24:01 +0000
Subject: net/irda: Use static const char * const where possible

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/irlan_event.h | 2 +-
 net/irda/irlan/irlan_event.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h
index 6d9539f05806..018b5a77e610 100644
--- a/include/net/irda/irlan_event.h
+++ b/include/net/irda/irlan_event.h
@@ -67,7 +67,7 @@ typedef enum {
 	IRLAN_WATCHDOG_TIMEOUT,
 } IRLAN_EVENT;
 
-extern char *irlan_state[];
+extern const char * const irlan_state[];
 
 void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, 
 			   struct sk_buff *skb);
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
index cbcb4eb54037..43f16040a6fe 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/net/irda/irlan/irlan_event.c
@@ -24,7 +24,7 @@
 
 #include <net/irda/irlan_event.h>
 
-char *irlan_state[] = {
+const char * const irlan_state[] = {
 	"IRLAN_IDLE",
 	"IRLAN_QUERY",
 	"IRLAN_CONN",
-- 
cgit v1.2.3


From 38a81da2205f94e8a2a834b51a6b99c91fc7c2e8 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Mon, 13 Sep 2010 13:01:20 -0700
Subject: perf events: Clean up pid passing

The kernel perf event creation path shouldn't use find_task_by_vpid()
because a vpid exists in a specific namespace. find_task_by_vpid() uses
current's pid namespace which isn't always the correct namespace to use
for the vpid in all the places perf_event_create_kernel_counter() (and
thus find_get_context()) is called.

The goal is to clean up pid namespace handling and prevent bugs like:

	https://bugzilla.kernel.org/show_bug.cgi?id=17281

Instead of using pids switch find_get_context() to use task struct
pointers directly. The syscall is responsible for resolving the pid to
a task struct. This moves the pid namespace resolution into the syscall
much like every other syscall that takes pid parameters.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robin Green <greenrd@greenrd.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
LKML-Reference: <a134e5e392ab0204961fd1a62c84a222bf5874a9.1284407763.git.matthltc@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/oprofile/common.c |  2 +-
 include/linux/perf_event.h |  2 +-
 kernel/hw_breakpoint.c     |  5 ++---
 kernel/perf_event.c        | 21 ++++++++++-----------
 kernel/watchdog.c          |  2 +-
 5 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 0691176899ff..aad63e611b36 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -96,7 +96,7 @@ static int op_create_counter(int cpu, int event)
 		return ret;
 
 	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
-						  cpu, -1,
+						  cpu, NULL,
 						  op_overflow_handler);
 
 	if (IS_ERR(pevent)) {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 93bf53aa50e5..39d8860b2684 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -902,7 +902,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
-				pid_t pid,
+				struct task_struct *task,
 				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 6122f02cfedf..3b714e839c10 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
 			    struct task_struct *tsk)
 {
-	return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk),
-						triggered);
+	return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
-		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
+		bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
 
 		*pevent = bp;
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3f5309db72f1..86f394e15d53 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2053,15 +2053,14 @@ errout:
 }
 
 static struct perf_event_context *
-find_get_context(struct pmu *pmu, pid_t pid, int cpu)
+find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
 {
 	struct perf_event_context *ctx;
 	struct perf_cpu_context *cpuctx;
-	struct task_struct *task;
 	unsigned long flags;
 	int ctxn, err;
 
-	if (pid == -1 && cpu != -1) {
+	if (!task && cpu != -1) {
 		/* Must be root to operate on a CPU event: */
 		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
@@ -2084,10 +2083,6 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu)
 		return ctx;
 	}
 
-	task = find_lively_task_by_vpid(pid);
-	if (IS_ERR(task))
-		return (void*)task;
-
 	err = -EINVAL;
 	ctxn = pmu->task_ctx_nr;
 	if (ctxn < 0)
@@ -5527,6 +5522,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	struct perf_event_context *ctx;
 	struct file *event_file = NULL;
 	struct file *group_file = NULL;
+	struct task_struct *task = NULL;
 	struct pmu *pmu;
 	int event_fd;
 	int fput_needed = 0;
@@ -5581,10 +5577,13 @@ SYSCALL_DEFINE5(perf_event_open,
 	if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
 		pmu = group_leader->pmu;
 
+	if (pid != -1)
+		task = find_lively_task_by_vpid(pid);
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
-	ctx = find_get_context(pmu, pid, cpu);
+	ctx = find_get_context(pmu, task, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_group_fd;
@@ -5666,11 +5665,11 @@ err_fd:
  *
  * @attr: attributes of the counter to create
  * @cpu: cpu in which the counter is bound
- * @pid: task to profile
+ * @task: task to profile (NULL for percpu)
  */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-				 pid_t pid,
+				 struct task_struct *task,
 				 perf_overflow_handler_t overflow_handler)
 {
 	struct perf_event_context *ctx;
@@ -5687,7 +5686,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err;
 	}
 
-	ctx = find_get_context(event->pmu, pid, cpu);
+	ctx = find_get_context(event->pmu, task, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_free;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 89eadbb9cefe..dc8e16824b51 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -358,7 +358,7 @@ static int watchdog_nmi_enable(int cpu)
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period();
-	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
+	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
 		goto out_save;
-- 
cgit v1.2.3


From 144177991ca624841ddbd1e7edff958fc0f6d1fe Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Wed, 15 Sep 2010 13:08:27 +0200
Subject: block: fix an address space warning in blk-map.c

Change type of 2nd parameter of blk_rq_aligned() into unsigned long
and remove unnecessary casting. Now we can call it with 'uaddr'
instead of 'ubuf' in __blk_rq_map_user() so that it can remove
following warnings from sparse:

 block/blk-map.c:57:31: warning: incorrect type in argument 2 (different address spaces)
 block/blk-map.c:57:31:    expected void *addr
 block/blk-map.c:57:31:    got void [noderef] <asn:1>*ubuf

However blk_rq_map_kern() needs one more local variable to handle it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-map.c        | 5 +++--
 include/linux/blkdev.h | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/blk-map.c b/block/blk-map.c
index c65d7593f7f1..ac0f7d46db63 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -54,7 +54,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	if (blk_rq_aligned(q, ubuf, len) && !map_data)
+	if (blk_rq_aligned(q, uaddr, len) && !map_data)
 		bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
 	else
 		bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
@@ -288,6 +288,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
 {
 	int reading = rq_data_dir(rq) == READ;
+	unsigned long addr = (unsigned long) kbuf;
 	int do_copy = 0;
 	struct bio *bio;
 	int ret;
@@ -297,7 +298,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (!len || !kbuf)
 		return -EINVAL;
 
-	do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
+	do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
 	if (do_copy)
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e661106270a..780824edac16 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1097,11 +1097,11 @@ static inline int queue_dma_alignment(struct request_queue *q)
 	return q ? q->dma_alignment : 511;
 }
 
-static inline int blk_rq_aligned(struct request_queue *q, void *addr,
+static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
 	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-	return !((unsigned long)addr & alignment) && !(len & alignment);
+	return !(addr & alignment) && !(len & alignment);
 }
 
 /* assumes size > 256 */
-- 
cgit v1.2.3


From 6d1d8050b4bc89d0165d29b58e894aeba2564a97 Mon Sep 17 00:00:00 2001
From: Will Drewry <wad@chromium.org>
Date: Tue, 31 Aug 2010 15:47:05 -0500
Subject: block, partition: add partition_meta_info to hd_struct

I'm reposting this patch series as v4 since there have been no additional
comments, and I cleaned up one extra bit of unneeded code (in 3/3). The patches
are against Linus's tree: 2bfc96a127bc1cc94d26bfaa40159966064f9c8c
(2.6.36-rc3).

Would this patchset be suitable for inclusion in an mm branch?

This changes adds a partition_meta_info struct which itself contains a
union of structures that provide partition table specific metadata.

This change leaves the union empty. The subsequent patch includes an
implementation for CONFIG_EFI_PARTITION-based metadata.

Signed-off-by: Will Drewry <wad@chromium.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/genhd.c         |  1 +
 block/ioctl.c         |  2 +-
 fs/partitions/check.c | 23 +++++++++++++++++++---
 fs/partitions/check.h |  3 +++
 include/linux/genhd.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 76 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index 59a2db6fecef..c8da12055264 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1004,6 +1004,7 @@ static void disk_release(struct device *dev)
 	kfree(disk->random);
 	disk_replace_part_tbl(disk, NULL);
 	free_part_stats(&disk->part0);
+	free_part_info(&disk->part0);
 	kfree(disk);
 }
 struct class block_class = {
diff --git a/block/ioctl.c b/block/ioctl.c
index d8052f0dabd3..2c15fe0912c4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -62,7 +62,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
 
 			/* all seems OK */
 			part = add_partition(disk, partno, start, length,
-					     ADDPART_FLAG_NONE);
+					     ADDPART_FLAG_NONE, NULL);
 			mutex_unlock(&bdev->bd_mutex);
 			return IS_ERR(part) ? PTR_ERR(part) : 0;
 		case BLKPG_DEL_PARTITION:
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 79fbf3f390f0..6dfbee03ccc6 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -352,6 +352,7 @@ static void part_release(struct device *dev)
 {
 	struct hd_struct *p = dev_to_part(dev);
 	free_part_stats(p);
+	free_part_info(p);
 	kfree(p);
 }
 
@@ -401,7 +402,8 @@ static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
 struct hd_struct *add_partition(struct gendisk *disk, int partno,
-				sector_t start, sector_t len, int flags)
+				sector_t start, sector_t len, int flags,
+				struct partition_meta_info *info)
 {
 	struct hd_struct *p;
 	dev_t devt = MKDEV(0, 0);
@@ -438,6 +440,14 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
 
+	if (info) {
+		struct partition_meta_info *pinfo = alloc_part_info(disk);
+		if (!pinfo)
+			goto out_free_stats;
+		memcpy(pinfo, info, sizeof(*info));
+		p->info = pinfo;
+	}
+
 	dname = dev_name(ddev);
 	if (isdigit(dname[strlen(dname) - 1]))
 		dev_set_name(pdev, "%sp%d", dname, partno);
@@ -451,7 +461,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	err = blk_alloc_devt(p, &devt);
 	if (err)
-		goto out_free_stats;
+		goto out_free_info;
 	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */
@@ -481,6 +491,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	return p;
 
+out_free_info:
+	free_part_info(p);
 out_free_stats:
 	free_part_stats(p);
 out_free:
@@ -642,6 +654,7 @@ rescan:
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
 		sector_t size, from;
+		struct partition_meta_info *info = NULL;
 
 		size = state->parts[p].size;
 		if (!size)
@@ -675,8 +688,12 @@ rescan:
 				size = get_capacity(disk) - from;
 			}
 		}
+
+		if (state->parts[p].has_info)
+			info = &state->parts[p].info;
 		part = add_partition(disk, p, from, size,
-				     state->parts[p].flags);
+				     state->parts[p].flags,
+				     &state->parts[p].info);
 		if (IS_ERR(part)) {
 			printk(KERN_ERR " %s: p%d could not be added: %ld\n",
 			       disk->disk_name, p, -PTR_ERR(part));
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 8e4e103ba216..d68bf4dc3bc2 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -1,5 +1,6 @@
 #include <linux/pagemap.h>
 #include <linux/blkdev.h>
+#include <linux/genhd.h>
 
 /*
  * add_gd_partition adds a partitions details to the devices partition
@@ -12,6 +13,8 @@ struct parsed_partitions {
 		sector_t from;
 		sector_t size;
 		int flags;
+		bool has_info;
+		struct partition_meta_info info;
 	} parts[DISK_MAX_PARTS];
 	int next;
 	int limit;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5f2f4c4d8fb0..66e26b5a1537 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -86,7 +87,15 @@ struct disk_stats {
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
 };
-	
+
+#define PARTITION_META_INFO_VOLNAMELTH	64
+#define PARTITION_META_INFO_UUIDLTH	16
+
+struct partition_meta_info {
+	u8 uuid[PARTITION_META_INFO_UUIDLTH];	/* always big endian */
+	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
+};
+
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
@@ -95,6 +104,7 @@ struct hd_struct {
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
+	struct partition_meta_info *info;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
 #endif
@@ -181,6 +191,30 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
 	return NULL;
 }
 
+static inline void part_pack_uuid(const u8 *uuid_str, u8 *to)
+{
+	int i;
+	for (i = 0; i < 16; ++i) {
+		*to++ = (hex_to_bin(*uuid_str) << 4) |
+			(hex_to_bin(*(uuid_str + 1)));
+		uuid_str += 2;
+		switch (i) {
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			uuid_str++;
+			continue;
+		}
+	}
+}
+
+static inline char *part_unpack_uuid(const u8 *uuid, char *out)
+{
+	sprintf(out, "%pU", uuid);
+	return out;
+}
+
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -342,6 +376,19 @@ static inline int part_in_flight(struct hd_struct *part)
 	return part->in_flight[0] + part->in_flight[1];
 }
 
+static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
+{
+	if (disk)
+		return kzalloc_node(sizeof(struct partition_meta_info),
+				    GFP_KERNEL, disk->node_id);
+	return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL);
+}
+
+static inline void free_part_info(struct hd_struct *part)
+{
+	kfree(part->info);
+}
+
 /* block/blk-core.c */
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
@@ -533,7 +580,9 @@ extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
 						     int partno, sector_t start,
-						     sector_t len, int flags);
+						     sector_t len, int flags,
+						     struct partition_meta_info
+						       *info);
 extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
-- 
cgit v1.2.3


From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Wed, 15 Sep 2010 10:27:52 -0700
Subject: tcp: Prevent overzealous packetization by SWS logic.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised
window, the SWS logic will packetize to half the MSS unnecessarily.

This causes problems with some embedded devices.

However for large MSS devices we do want to half-MSS packetize
otherwise we never get enough packets into the pipe for things
like fast retransmit and recovery to work.

Be careful also to handle the case where MSS > window, otherwise
we'll never send until the probe timer.

Reported-by: ツ Leandro Melo de Sales <leandroal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaa9582779d0..3e4b33e36602 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk);
 /* Bound MSS / TSO packet size with the half of the window */
 static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 {
-	if (tp->max_window && pktsize > (tp->max_window >> 1))
-		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	int cutoff;
+
+	/* When peer uses tiny windows, there is no use in packetizing
+	 * to sub-MSS pieces for the sake of SWS or making sure there
+	 * are enough packets in the pipe for fast recovery.
+	 *
+	 * On the other hand, for extremely large MSS devices, handling
+	 * smaller than MSS windows in this way does make sense.
+	 */
+	if (tp->max_window >= 512)
+		cutoff = (tp->max_window >> 1);
+	else
+		cutoff = tp->max_window;
+
+	if (cutoff && pktsize > cutoff)
+		return max_t(int, cutoff, 68U - tp->tcp_header_len);
 	else
 		return pktsize;
 }
-- 
cgit v1.2.3


From 3661ca66a42e306aaf53246fb75aec1ea01be0f0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 15 Sep 2010 13:05:29 -0700
Subject: memblock: Fix section mismatch warnings

Stephen found a bunch of section mismatch warnings with the
new memblock changes.

Use __init_memblock to replace __init in memblock.c and remove
__init in memblock.h. We should not use __init in header files.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Tested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Yinghai Lu <Yinghai@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <4C912709.2090201@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/memblock.h | 24 ++++++++++++------------
 mm/memblock.c            | 14 +++++++-------
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7d285271130d..5096458c7535 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -51,39 +51,39 @@ u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align);
 int memblock_free_reserved_regions(void);
 int memblock_reserve_reserved_regions(void);
 
-extern void __init memblock_init(void);
-extern void __init memblock_analyze(void);
+extern void memblock_init(void);
+extern void memblock_analyze(void);
 extern long memblock_add(phys_addr_t base, phys_addr_t size);
 extern long memblock_remove(phys_addr_t base, phys_addr_t size);
-extern long __init memblock_free(phys_addr_t base, phys_addr_t size);
-extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size);
+extern long memblock_free(phys_addr_t base, phys_addr_t size);
+extern long memblock_reserve(phys_addr_t base, phys_addr_t size);
 
 /* The numa aware allocator is only available if
  * CONFIG_ARCH_POPULATES_NODE_MAP is set
  */
-extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
+extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align,
 					int nid);
-extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
 					    int nid);
 
-extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align);
+extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 
 /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
-extern phys_addr_t __init memblock_alloc_base(phys_addr_t size,
+extern phys_addr_t memblock_alloc_base(phys_addr_t size,
 					 phys_addr_t align,
 					 phys_addr_t max_addr);
-extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size,
+extern phys_addr_t __memblock_alloc_base(phys_addr_t size,
 					   phys_addr_t align,
 					   phys_addr_t max_addr);
-extern phys_addr_t __init memblock_phys_mem_size(void);
+extern phys_addr_t memblock_phys_mem_size(void);
 extern phys_addr_t memblock_end_of_DRAM(void);
-extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit);
+extern void memblock_enforce_memory_limit(phys_addr_t memory_limit);
 extern int memblock_is_memory(phys_addr_t addr);
 extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
-extern int __init memblock_is_reserved(phys_addr_t addr);
+extern int memblock_is_reserved(phys_addr_t addr);
 extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
 extern void memblock_dump_all(void);
diff --git a/mm/memblock.c b/mm/memblock.c
index 65e3ba8d09fb..d5d63ac1fd83 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -125,8 +125,8 @@ static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t en
 	return MEMBLOCK_ERROR;
 }
 
-static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align,
-					phys_addr_t start, phys_addr_t end)
+static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
+			phys_addr_t align, phys_addr_t start, phys_addr_t end)
 {
 	long i;
 
@@ -439,12 +439,12 @@ long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 	return __memblock_remove(&memblock.memory, base, size);
 }
 
-long __init memblock_free(phys_addr_t base, phys_addr_t size)
+long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
 	return __memblock_remove(&memblock.reserved, base, size);
 }
 
-long __init memblock_reserve(phys_addr_t base, phys_addr_t size)
+long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 {
 	struct memblock_type *_rgn = &memblock.reserved;
 
@@ -671,12 +671,12 @@ int __init memblock_is_reserved(phys_addr_t addr)
 	return memblock_search(&memblock.reserved, addr) != -1;
 }
 
-int memblock_is_memory(phys_addr_t addr)
+int __init_memblock memblock_is_memory(phys_addr_t addr)
 {
 	return memblock_search(&memblock.memory, addr) != -1;
 }
 
-int memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
 {
 	int idx = memblock_search(&memblock.reserved, base);
 
@@ -693,7 +693,7 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si
 }
 
 
-void __init memblock_set_current_limit(phys_addr_t limit)
+void __init_memblock memblock_set_current_limit(phys_addr_t limit)
 {
 	memblock.current_limit = limit;
 }
-- 
cgit v1.2.3


From e035587305011432ee07f69f9738b3c7ef7f3684 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 14 Sep 2010 09:10:03 +0000
Subject: ethtool: Complete kernel-doc comments for RX flow filter and hash
 control

There are now several interfaces within the ethtool API for getting
and setting RX flow filtering and hashing behaviour, most of which are
poorly documented.  This adds kernel-doc comments for all these
interfaces, based on the existing incomplete comments and on the
initial implementations.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 129 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 113 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 991269e5b152..4b3ba05b11a8 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -314,9 +314,20 @@ enum ethtool_flags {
 };
 
 /* The following structures are for supporting RX network flow
- * classification configuration. Note, all multibyte fields, e.g.,
- * ip4src, ip4dst, psrc, pdst, spi, etc. are expected to be in network
- * byte order.
+ * classification and RX n-tuple configuration. Note, all multibyte
+ * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to
+ * be in network byte order.
+ */
+
+/**
+ * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc.
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @psrc: Source port
+ * @pdst: Destination port
+ * @tos: Type-of-service
+ *
+ * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow.
  */
 struct ethtool_tcpip4_spec {
 	__be32	ip4src;
@@ -326,6 +337,15 @@ struct ethtool_tcpip4_spec {
 	__u8    tos;
 };
 
+/**
+ * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @spi: Security parameters index
+ * @tos: Type-of-service
+ *
+ * This can be used to specify an IPsec transport or tunnel over IPv4.
+ */
 struct ethtool_ah_espip4_spec {
 	__be32	ip4src;
 	__be32	ip4dst;
@@ -348,6 +368,15 @@ struct ethtool_ether_spec {
 #define	ETH_RX_NFC_IP4	1
 #define	ETH_RX_NFC_IP6	2
 
+/**
+ * struct ethtool_usrip4_spec - general flow specification for IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @l4_4_bytes: First 4 bytes of transport (layer 4) header
+ * @tos: Type-of-service
+ * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0
+ * @proto: Transport protocol number; mask must be 0
+ */
 struct ethtool_usrip4_spec {
 	__be32	ip4src;
 	__be32	ip4dst;
@@ -357,6 +386,15 @@ struct ethtool_usrip4_spec {
 	__u8    proto;
 };
 
+/**
+ * struct ethtool_rx_flow_spec - specification for RX flow filter
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow fields to match (dependent on @flow_type)
+ * @m_u: Masks for flow field bits to be ignored
+ * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
+ *	if packets should be discarded
+ * @location: Index of filter in hardware table
+ */
 struct ethtool_rx_flow_spec {
 	__u32		flow_type;
 	union {
@@ -369,32 +407,87 @@ struct ethtool_rx_flow_spec {
 		struct ethtool_ether_spec		ether_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
 		__u8					hdata[64];
-	} h_u, m_u; /* entry, mask */
+	} h_u, m_u;
 	__u64		ring_cookie;
 	__u32		location;
 };
 
+/**
+ * struct ethtool_rxnfc - command to get or set RX flow classification rules
+ * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH,
+ *	%ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE,
+ *	%ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS
+ * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
+ * @data: Command-dependent value
+ * @fs: Flow filter specification
+ * @rule_cnt: Number of rules to be affected
+ * @rule_locs: Array of valid rule indices
+ *
+ * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
+ * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
+ * structure fields must not be used.
+ *
+ * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
+ * on return.
+ *
+ * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined
+ * rules on return.
+ *
+ * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the index of an
+ * existing filter rule on entry and @fs contains the rule on return.
+ *
+ * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
+ * user buffer for @rule_locs on entry.  On return, @data is the size
+ * of the filter table and @rule_locs contains the indices of the
+ * defined rules.
+ *
+ * For %ETHTOOL_SRXCLSRLINS, @fs specifies the filter rule to add or
+ * update.  @fs.@location specifies the index to use and must not be
+ * ignored.
+ *
+ * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the index of an
+ * existing filter rule on entry.
+ *
+ * Implementation of indexed classification rules generally requires a
+ * TCAM.
+ */
 struct ethtool_rxnfc {
 	__u32				cmd;
 	__u32				flow_type;
-	/* The rx flow hash value or the rule DB size */
 	__u64				data;
-	/* The following fields are not valid and must not be used for
-	 * the ETHTOOL_{G,X}RXFH commands. */
 	struct ethtool_rx_flow_spec	fs;
 	__u32				rule_cnt;
 	__u32				rule_locs[0];
 };
 
+/**
+ * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
+ * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
+ * @size: On entry, the array size of the user buffer.  On return from
+ *	%ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table.
+ * @ring_index: RX ring/queue index for each hash value
+ */
 struct ethtool_rxfh_indir {
 	__u32	cmd;
-	/* On entry, this is the array size of the user buffer.  On
-	 * return from ETHTOOL_GRXFHINDIR, this is the array size of
-	 * the hardware indirection table. */
 	__u32	size;
-	__u32	ring_index[0];	/* ring/queue index for each hash value */
+	__u32	ring_index[0];
 };
 
+/**
+ * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow field values to match (dependent on @flow_type)
+ * @m_u: Masks for flow field value bits to be ignored
+ * @vlan_tag: VLAN tag to match
+ * @vlan_tag_mask: Mask for VLAN tag bits to be ignored
+ * @data: Driver-dependent data to match
+ * @data_mask: Mask for driver-dependent data bits to be ignored
+ * @action: RX ring/queue index to deliver to (non-negative) or other action
+ *	(negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP)
+ *
+ * Zero values in @h_u may be ignored, as if all the corresponding
+ * mask bits were set.
+ */
 struct ethtool_rx_ntuple_flow_spec {
 	__u32		 flow_type;
 	union {
@@ -407,18 +500,22 @@ struct ethtool_rx_ntuple_flow_spec {
 		struct ethtool_ether_spec		ether_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
 		__u8					hdata[64];
-	} h_u, m_u; /* entry, mask */
+	} h_u, m_u;
 
 	__u16	        vlan_tag;
 	__u16	        vlan_tag_mask;
-	__u64		data;      /* user-defined flow spec data */
-	__u64		data_mask; /* user-defined flow spec mask */
+	__u64		data;
+	__u64		data_mask;
 
-	/* signed to distinguish between queue and actions (DROP) */
 	__s32		action;
-#define ETHTOOL_RXNTUPLE_ACTION_DROP -1
+#define ETHTOOL_RXNTUPLE_ACTION_DROP -1		/* drop packet */
 };
 
+/**
+ * struct ethtool_rx_ntuple - command to set RX flow filter
+ * @cmd: Command number - %ETHTOOL_SRXNTUPLE
+ * @fs: Flow filter specification
+ */
 struct ethtool_rx_ntuple {
 	__u32					cmd;
 	struct ethtool_rx_ntuple_flow_spec	fs;
-- 
cgit v1.2.3


From e0de7c93b950b9e784894efc4b529c6958cb747a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 14 Sep 2010 09:13:08 +0000
Subject: ethtool: Remove unimplemented flow specification types

struct ethtool_rawip4_spec and struct ethtool_ether_spec are neither
commented nor used by any driver, so remove them.  Adjust padding in
the user-visible unions that included these structures.

Fix references to struct ethtool_rawip4_spec in
ethtool_get_rx_ntuple(), which should use struct ethtool_usrip4_spec.

struct ethtool_usrip4_spec cannot hold IPv6 host addresses and there
is no separate structure that can, so remove ETH_RX_NFC_IP6 and the
reference to it in niu.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/niu.c       | 19 +++++--------------
 include/linux/ethtool.h | 21 ++-------------------
 net/core/ethtool.c      |  8 ++++----
 3 files changed, 11 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 8e1859c801a4..e36a83845a1c 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -7462,10 +7462,12 @@ static int niu_add_ethtool_tcam_entry(struct niu *np,
 	if (fsp->flow_type == IP_USER_FLOW) {
 		int i;
 		int add_usr_cls = 0;
-		int ipv6 = 0;
 		struct ethtool_usrip4_spec *uspec = &fsp->h_u.usr_ip4_spec;
 		struct ethtool_usrip4_spec *umask = &fsp->m_u.usr_ip4_spec;
 
+		if (uspec->ip_ver != ETH_RX_NFC_IP4)
+			return -EINVAL;
+
 		niu_lock_parent(np, flags);
 
 		for (i = 0; i < NIU_L3_PROG_CLS; i++) {
@@ -7494,9 +7496,7 @@ static int niu_add_ethtool_tcam_entry(struct niu *np,
 				default:
 					break;
 				}
-				if (uspec->ip_ver == ETH_RX_NFC_IP6)
-					ipv6 = 1;
-				ret = tcam_user_ip_class_set(np, class, ipv6,
+				ret = tcam_user_ip_class_set(np, class, 0,
 							     uspec->proto,
 							     uspec->tos,
 							     umask->tos);
@@ -7553,16 +7553,7 @@ static int niu_add_ethtool_tcam_entry(struct niu *np,
 		ret = -EINVAL;
 		goto out;
 	case IP_USER_FLOW:
-		if (fsp->h_u.usr_ip4_spec.ip_ver == ETH_RX_NFC_IP4) {
-			niu_get_tcamkey_from_ip4fs(fsp, tp, l2_rdc_table,
-						   class);
-		} else {
-			/* Not yet implemented */
-			netdev_info(np->dev, "niu%d: In %s(): usr flow for IPv6 not implemented\n",
-				    parent->index, __func__);
-			ret = -EINVAL;
-			goto out;
-		}
+		niu_get_tcamkey_from_ip4fs(fsp, tp, l2_rdc_table, class);
 		break;
 	default:
 		netdev_info(np->dev, "niu%d: In %s(): Unknown flow type %d\n",
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4b3ba05b11a8..d64e246a39e7 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -353,20 +353,7 @@ struct ethtool_ah_espip4_spec {
 	__u8    tos;
 };
 
-struct ethtool_rawip4_spec {
-	__be32	ip4src;
-	__be32	ip4dst;
-	__u8	hdata[64];
-};
-
-struct ethtool_ether_spec {
-	__be16	ether_type;
-	__u8	frame_size;
-	__u8	eframe[16];
-};
-
 #define	ETH_RX_NFC_IP4	1
-#define	ETH_RX_NFC_IP6	2
 
 /**
  * struct ethtool_usrip4_spec - general flow specification for IPv4
@@ -403,10 +390,8 @@ struct ethtool_rx_flow_spec {
 		struct ethtool_tcpip4_spec		sctp_ip4_spec;
 		struct ethtool_ah_espip4_spec		ah_ip4_spec;
 		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_rawip4_spec		raw_ip4_spec;
-		struct ethtool_ether_spec		ether_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
-		__u8					hdata[64];
+		__u8					hdata[72];
 	} h_u, m_u;
 	__u64		ring_cookie;
 	__u32		location;
@@ -496,10 +481,8 @@ struct ethtool_rx_ntuple_flow_spec {
 		struct ethtool_tcpip4_spec		sctp_ip4_spec;
 		struct ethtool_ah_espip4_spec		ah_ip4_spec;
 		struct ethtool_ah_espip4_spec		esp_ip4_spec;
-		struct ethtool_rawip4_spec		raw_ip4_spec;
-		struct ethtool_ether_spec		ether_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
-		__u8					hdata[64];
+		__u8					hdata[72];
 	} h_u, m_u;
 
 	__u16	        vlan_tag;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 970eb9817bbc..fcd62757704d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -673,19 +673,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			break;
 		case IP_USER_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-				fsc->fs.h_u.raw_ip4_spec.ip4src);
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-				fsc->fs.m_u.raw_ip4_spec.ip4src);
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-				fsc->fs.h_u.raw_ip4_spec.ip4dst);
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-				fsc->fs.m_u.raw_ip4_spec.ip4dst);
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
-- 
cgit v1.2.3


From 7dff59efbb0e8b0f81c95fd40379c0d0c757c808 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Sep 2010 11:07:15 +0000
Subject: net: add rtnl_dereference()

We sometime want to dereference an rcu protected pointer while
holding RTNL. Use a macro to hide all lockdep details.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 263690d991a8..68c436bddc88 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -760,6 +760,15 @@ extern int lockdep_rtnl_is_held(void);
 	rcu_dereference_check(p, rcu_read_lock_held() ||	\
 				 lockdep_rtnl_is_held())
 
+/**
+ * rtnl_dereference - rcu_dereference with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Do an rcu_dereference(p), but check caller holds RTNL
+ */
+#define rtnl_dereference(p)					\
+	rcu_dereference_check(p, lockdep_rtnl_is_held())
+
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
 
-- 
cgit v1.2.3


From 6482f554e2b9cbe733d63124765104f29cf0c9ad Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 15 Sep 2010 12:19:53 +0000
Subject: Phonet: remove dangling pipe if an endpoint is closed early
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closing a pipe endpoint is not normally allowed by the Phonet pipe,
other than as a side after-effect of removing the pipe between two
endpoints. But there is no way to prevent Linux userspace processes
from being killed or suffering from bugs, so this can still happen.
We might as well forcefully close Phonet pipe endpoints then.

The cellular modem supports only a few existing pipes at a time. So we
really should not leak them. This change instructs the modem to destroy
the pipe if either of the pipe's endpoint (Linux socket) is closed too
early.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h |  5 +++++
 net/phonet/pep.c         | 27 ++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index 35672b1cf44a..37f23dc05de8 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -77,6 +77,11 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
 #define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4)
 
 enum {
+	PNS_PIPE_CREATE_REQ = 0x00,
+	PNS_PIPE_CREATE_RESP,
+	PNS_PIPE_REMOVE_REQ,
+	PNS_PIPE_REMOVE_RESP,
+
 	PNS_PIPE_DATA = 0x20,
 	PNS_PIPE_ALIGNED_DATA,
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 04e34196c9de..d0e7eb24c8b9 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -620,6 +620,28 @@ drop:
 	return err;
 }
 
+static int pipe_do_remove(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER);
+	__skb_push(skb, sizeof(*ph));
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = 0;
+	ph->message_id = PNS_PIPE_REMOVE_REQ;
+	ph->pipe_handle = pn->pipe_handle;
+	ph->data[0] = PAD;
+
+	return pn_skb_send(sk, skb, &pipe_srv);
+}
+
 /* associated socket ceases to exist */
 static void pep_sock_close(struct sock *sk, long timeout)
 {
@@ -638,7 +660,10 @@ static void pep_sock_close(struct sock *sk, long timeout)
 		sk_for_each_safe(sknode, p, n, &pn->ackq)
 			sk_del_node_init(sknode);
 		sk->sk_state = TCP_CLOSE;
-	}
+	} else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
+		/* Forcefully remove dangling Phonet pipe */
+		pipe_do_remove(sk);
+
 	ifindex = pn->ifindex;
 	pn->ifindex = 0;
 	release_sock(sk);
-- 
cgit v1.2.3


From 4e3d16ce5e82648d7f4dfd28b6cf8fe2e9a9efc3 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 15 Sep 2010 12:30:11 +0000
Subject: Phonet: resource routing backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When both destination device and object are nul, Phonet routes the
packet according to the resource field. In fact, this is the most
common pattern when sending Phonet "request" packets. In this case,
the packet is delivered to whichever endpoint (socket) has
registered the resource.

This adds a new table so that Linux processes can register their
Phonet sockets to Phonet resources, if they have adequate privileges.

(Namespace support is not implemented at the moment.)

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |  5 +++
 net/phonet/socket.c         | 88 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 7b114079a51b..d5df797f9540 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -54,6 +54,11 @@ void pn_sock_hash(struct sock *sk);
 void pn_sock_unhash(struct sock *sk);
 int pn_sock_get_port(struct sock *sk, unsigned short sport);
 
+struct sock *pn_find_sock_by_res(struct net *net, u8 res);
+int pn_sock_bind_res(struct sock *sock, u8 res);
+int pn_sock_unbind_res(struct sock *sk, u8 res);
+void pn_sock_unbind_all_res(struct sock *sk);
+
 int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 		const struct sockaddr_pn *target);
 
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 7c91f739f138..4c29a23e9007 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -565,3 +565,91 @@ const struct file_operations pn_sock_seq_fops = {
 	.release = seq_release_net,
 };
 #endif
+
+static struct  {
+	struct sock *sk[256];
+} pnres;
+
+/*
+ * Find and hold socket based on resource.
+ */
+struct sock *pn_find_sock_by_res(struct net *net, u8 res)
+{
+	struct sock *sk;
+
+	if (!net_eq(net, &init_net))
+		return NULL;
+
+	rcu_read_lock();
+	sk = rcu_dereference(pnres.sk[res]);
+	if (sk)
+		sock_hold(sk);
+	rcu_read_unlock();
+	return sk;
+}
+
+static DEFINE_MUTEX(resource_mutex);
+
+int pn_sock_bind_res(struct sock *sk, u8 res)
+{
+	int ret = -EADDRINUSE;
+
+	if (!net_eq(sock_net(sk), &init_net))
+		return -ENOIOCTLCMD;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (pn_socket_autobind(sk->sk_socket))
+		return -EAGAIN;
+
+	mutex_lock(&resource_mutex);
+	if (pnres.sk[res] == NULL) {
+		sock_hold(sk);
+		rcu_assign_pointer(pnres.sk[res], sk);
+		ret = 0;
+	}
+	mutex_unlock(&resource_mutex);
+	return ret;
+}
+
+int pn_sock_unbind_res(struct sock *sk, u8 res)
+{
+	int ret = -ENOENT;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&resource_mutex);
+	if (pnres.sk[res] == sk) {
+		rcu_assign_pointer(pnres.sk[res], NULL);
+		ret = 0;
+	}
+	mutex_unlock(&resource_mutex);
+
+	if (ret == 0) {
+		synchronize_rcu();
+		sock_put(sk);
+	}
+	return ret;
+}
+
+void pn_sock_unbind_all_res(struct sock *sk)
+{
+	unsigned res, match = 0;
+
+	mutex_lock(&resource_mutex);
+	for (res = 0; res < 256; res++) {
+		if (pnres.sk[res] == sk) {
+			rcu_assign_pointer(pnres.sk[res], NULL);
+			match++;
+		}
+	}
+	mutex_unlock(&resource_mutex);
+
+	if (match == 0)
+		return;
+	synchronize_rcu();
+	while (match > 0) {
+		sock_put(sk);
+		match--;
+	}
+}
-- 
cgit v1.2.3


From 7417fa83c1a8b75a03bd9b9b358999f38e771eab Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 15 Sep 2010 12:30:12 +0000
Subject: Phonet: hook resource routing to userspace via ioctl()'s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I wish we could use something cleaner, such as bind(). But that would
not work since resource subscription is orthogonal/in addition to the
normal object ID allocated via bind(). This is similar to multicasting
which also uses ioctl()'s.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h |  2 ++
 net/phonet/datagram.c  | 13 +++++++++++++
 net/phonet/socket.c    |  1 +
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 76edadf046d3..85e14a83283b 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -47,6 +47,8 @@
 
 /* ioctls */
 #define SIOCPNGETOBJECT		(SIOCPROTOPRIVATE + 0)
+#define SIOCPNADDRESOURCE	(SIOCPROTOPRIVATE + 14)
+#define SIOCPNDELRESOURCE	(SIOCPROTOPRIVATE + 15)
 
 /* Phonet protocol header */
 struct phonethdr {
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 1bd38db4fe1e..2f032381bd45 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -52,6 +52,19 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		answ = skb ? skb->len : 0;
 		release_sock(sk);
 		return put_user(answ, (int __user *)arg);
+
+	case SIOCPNADDRESOURCE:
+	case SIOCPNDELRESOURCE: {
+			u32 res;
+			if (get_user(res, (u32 __user *)arg))
+				return -EFAULT;
+			if (res >= 256)
+				return -EINVAL;
+			if (cmd == SIOCPNADDRESOURCE)
+				return pn_sock_bind_res(sk, res);
+			else
+				return pn_sock_unbind_res(sk, res);
+		}
 	}
 
 	return -ENOIOCTLCMD;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 4c29a23e9007..d4f41afc0583 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -158,6 +158,7 @@ void pn_sock_unhash(struct sock *sk)
 	spin_lock_bh(&pnsocks.lock);
 	sk_del_node_init(sk);
 	spin_unlock_bh(&pnsocks.lock);
+	pn_sock_unbind_all_res(sk);
 }
 EXPORT_SYMBOL(pn_sock_unhash);
 
-- 
cgit v1.2.3


From 507215f8d04f9e61f38c975e61d93bcafd30815f Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 15 Sep 2010 12:30:14 +0000
Subject: Phonet: list subscribed resources via proc_fs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  1 +
 net/phonet/pn_dev.c         |  2 +
 net/phonet/socket.c         | 96 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 2d16783d5e20..13649eb57413 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -57,5 +57,6 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr);
 #define PN_NO_ADDR	0xff
 
 extern const struct file_operations pn_sock_seq_fops;
+extern const struct file_operations pn_res_seq_fops;
 
 #endif
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index d0a429459370..947038ddd04c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -373,6 +373,7 @@ int __init phonet_device_init(void)
 	if (err)
 		return err;
 
+	proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
 	register_netdevice_notifier(&phonet_device_notifier);
 	err = phonet_netlink_register();
 	if (err)
@@ -385,6 +386,7 @@ void phonet_device_exit(void)
 	rtnl_unregister_all(PF_PHONET);
 	unregister_netdevice_notifier(&phonet_device_notifier);
 	unregister_pernet_device(&phonet_net_ops);
+	proc_net_remove(&init_net, "pnresource");
 }
 
 int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d4f41afc0583..6bf6e3c97d5c 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -654,3 +654,99 @@ void pn_sock_unbind_all_res(struct sock *sk)
 		match--;
 	}
 }
+
+#ifdef CONFIG_PROC_FS
+static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct net *net = seq_file_net(seq);
+	unsigned i;
+
+	if (!net_eq(net, &init_net))
+		return NULL;
+
+	for (i = 0; i < 256; i++) {
+		if (pnres.sk[i] == NULL)
+			continue;
+		if (!pos)
+			return pnres.sk + i;
+		pos--;
+	}
+	return NULL;
+}
+
+static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
+{
+	struct net *net = seq_file_net(seq);
+	unsigned i;
+
+	BUG_ON(!net_eq(net, &init_net));
+
+	for (i = (sk - pnres.sk) + 1; i < 256; i++)
+		if (pnres.sk[i])
+			return pnres.sk + i;
+	return NULL;
+}
+
+static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(resource_mutex)
+{
+	mutex_lock(&resource_mutex);
+	return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock **sk;
+
+	if (v == SEQ_START_TOKEN)
+		sk = pn_res_get_idx(seq, 0);
+	else
+		sk = pn_res_get_next(seq, v);
+	(*pos)++;
+	return sk;
+}
+
+static void pn_res_seq_stop(struct seq_file *seq, void *v)
+	__releases(resource_mutex)
+{
+	mutex_unlock(&resource_mutex);
+}
+
+static int pn_res_seq_show(struct seq_file *seq, void *v)
+{
+	int len;
+
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%s%n", "rs   uid inode", &len);
+	else {
+		struct sock **psk = v;
+		struct sock *sk = *psk;
+
+		seq_printf(seq, "%02X %5d %lu%n",
+			psk - pnres.sk, sock_i_uid(sk), sock_i_ino(sk), &len);
+	}
+	seq_printf(seq, "%*s\n", 63 - len, "");
+	return 0;
+}
+
+static const struct seq_operations pn_res_seq_ops = {
+	.start = pn_res_seq_start,
+	.next = pn_res_seq_next,
+	.stop = pn_res_seq_stop,
+	.show = pn_res_seq_show,
+};
+
+static int pn_res_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pn_res_seq_ops,
+				sizeof(struct seq_net_private));
+}
+
+const struct file_operations pn_res_seq_fops = {
+	.owner = THIS_MODULE,
+	.open = pn_res_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_net,
+};
+#endif
-- 
cgit v1.2.3


From 95ae6b228f814fc0528d0506ee9f18ac333d6851 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Sep 2010 04:04:31 +0000
Subject: ipv4: ip_ptr cleanups

dev->ip_ptr is protected by rtnl and rcu.

Yet some places dont use appropriate primitives and/or locking rules.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/plip.c           |  8 ++++++--
 drivers/net/via-velocity.h   | 11 +++++++----
 drivers/net/wan/hdlc_cisco.c |  4 +++-
 include/linux/inetdevice.h   | 14 +++++---------
 include/linux/netdevice.h    |  2 +-
 net/core/dev.c               |  2 +-
 net/ipv4/devinet.c           |  4 ++--
 net/ipv4/ipmr.c              |  2 +-
 net/mac80211/main.c          |  2 +-
 9 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 7e82a82422cf..ca4df7f4cf21 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -995,8 +995,10 @@ plip_tx_packet(struct sk_buff *skb, struct net_device *dev)
 static void
 plip_rewrite_address(const struct net_device *dev, struct ethhdr *eth)
 {
-	const struct in_device *in_dev = dev->ip_ptr;
+	const struct in_device *in_dev;
 
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		/* Any address will do - we take the first */
 		const struct in_ifaddr *ifa = in_dev->ifa_list;
@@ -1006,6 +1008,7 @@ plip_rewrite_address(const struct net_device *dev, struct ethhdr *eth)
 			memcpy(eth->h_dest+2, &ifa->ifa_address, 4);
 		}
 	}
+	rcu_read_unlock();
 }
 
 static int
@@ -1088,7 +1091,8 @@ plip_open(struct net_device *dev)
 	   when the device address isn't identical to the address of a
 	   received frame, the kernel incorrectly drops it).             */
 
-	if ((in_dev=dev->ip_ptr) != NULL) {
+	in_dev=__in_dev_get_rtnl(dev);
+	if (in_dev) {
 		/* Any address will do - we take the first. We already
 		   have the first two bytes filled with 0xfc, from
 		   plip_init_dev(). */
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index f7b33ae7a703..b5e120b0074b 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -1504,22 +1504,25 @@ struct velocity_info {
  *	addresses on this chain then we use the first - multi-IP WOL is not
  *	supported.
  *
- *	CHECK ME: locking
  */
 
 static inline int velocity_get_ip(struct velocity_info *vptr)
 {
-	struct in_device *in_dev = (struct in_device *) vptr->dev->ip_ptr;
+	struct in_device *in_dev;
 	struct in_ifaddr *ifa;
+	int res = -ENOENT;
 
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(vptr->dev);
 	if (in_dev != NULL) {
 		ifa = (struct in_ifaddr *) in_dev->ifa_list;
 		if (ifa != NULL) {
 			memcpy(vptr->ip_addr, &ifa->ifa_address, 4);
-			return 0;
+			res = 0;
 		}
 	}
-	return -ENOENT;
+	rcu_read_unlock();
+	return res;
 }
 
 /**
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index b38ffa149aba..b1e5e5b69c2a 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -191,7 +191,8 @@ static int cisco_rx(struct sk_buff *skb)
 
 		switch (ntohl (cisco_data->type)) {
 		case CISCO_ADDR_REQ: /* Stolen from syncppp.c :-) */
-			in_dev = dev->ip_ptr;
+			rcu_read_lock();
+			in_dev = __in_dev_get_rcu(dev);
 			addr = 0;
 			mask = ~cpu_to_be32(0); /* is the mask correct? */
 
@@ -211,6 +212,7 @@ static int cisco_rx(struct sk_buff *skb)
 				cisco_keepalive_send(dev, CISCO_ADDR_REPLY,
 						     addr, mask);
 			}
+			rcu_read_unlock();
 			dev_kfree_skb_any(skb);
 			return NET_RX_SUCCESS;
 
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 2be1a1a2beb9..1ec09bb4a3ab 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <linux/timer.h>
 #include <linux/sysctl.h>
+#include <linux/rtnetlink.h>
 
 enum
 {
@@ -198,14 +199,10 @@ static __inline__ int bad_mask(__be32 mask, __be32 addr)
 
 static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev)
 {
-	struct in_device *in_dev = dev->ip_ptr;
-	if (in_dev)
-		in_dev = rcu_dereference(in_dev);
-	return in_dev;
+	return rcu_dereference(dev->ip_ptr);
 }
 
-static __inline__ struct in_device *
-in_dev_get(const struct net_device *dev)
+static inline struct in_device *in_dev_get(const struct net_device *dev)
 {
 	struct in_device *in_dev;
 
@@ -217,10 +214,9 @@ in_dev_get(const struct net_device *dev)
 	return in_dev;
 }
 
-static __inline__ struct in_device *
-__in_dev_get_rtnl(const struct net_device *dev)
+static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 {
-	return (struct in_device*)dev->ip_ptr;
+	return rcu_dereference_check(dev->ip_ptr, lockdep_rtnl_is_held());
 }
 
 extern void in_dev_finish_destroy(struct in_device *idev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index af05186d5b36..8992fffb8104 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -942,7 +942,7 @@ struct net_device {
 	void			*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
-	void			*ip_ptr;	/* IPv4 specific data	*/
+	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
 	void                    *dn_ptr;        /* DECnet specific data */
 	void                    *ip6_ptr;       /* IPv6 specific data */
 	void			*ec_ptr;	/* Econet specific data	*/
diff --git a/net/core/dev.c b/net/core/dev.c
index fc2dc933bee5..5bdce97b8175 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5286,7 +5286,7 @@ void netdev_run_todo(void)
 
 		/* paranoia */
 		BUG_ON(atomic_read(&dev->refcnt));
-		WARN_ON(dev->ip_ptr);
+		WARN_ON(rcu_dereference_raw(dev->ip_ptr));
 		WARN_ON(dev->ip6_ptr);
 		WARN_ON(dev->dn_ptr);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f4..c2ff48fa18c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
 		inet_free_ifa(ifa);
 	}
 
-	dev->ip_ptr = NULL;
+	rcu_assign_pointer(dev->ip_ptr, NULL);
 
 	devinet_sysctl_unregister(in_dev);
 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1059,7 +1059,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	switch (event) {
 	case NETDEV_REGISTER:
 		printk(KERN_DEBUG "inetdev_event: bug\n");
-		dev->ip_ptr = NULL;
+		rcu_assign_pointer(dev->ip_ptr, NULL);
 		break;
 	case NETDEV_UP:
 		if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866fc..10b24c02deb0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -724,7 +724,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	case 0:
 		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
 			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
-			if (dev && dev->ip_ptr == NULL) {
+			if (dev && __in_dev_get_rtnl(dev) == NULL) {
 				dev_put(dev);
 				return -EADDRNOTAVAIL;
 			}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4935b843bcca..b8cf2821f00d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -362,7 +362,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return NOTIFY_DONE;
 
-	idev = sdata->dev->ip_ptr;
+	idev = __in_dev_get_rtnl(sdata->dev);
 	if (!idev)
 		return NOTIFY_DONE;
 
-- 
cgit v1.2.3


From e43473b7f223ec866f7db273697e76c337c390f9 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 15 Sep 2010 17:06:35 -0400
Subject: blkio: Core implementation of throttle policy

o Actual implementation of throttling policy in block layer. Currently it
  implements READ and WRITE bytes per second throttling logic. IOPS throttling
  comes in later patches.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/Kconfig             |  12 +
 block/Makefile            |   1 +
 block/blk-core.c          |  24 ++
 block/blk-throttle.c      | 909 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk_types.h |   3 +
 include/linux/blkdev.h    |  24 ++
 init/Kconfig              |   9 +-
 7 files changed, 979 insertions(+), 3 deletions(-)
 create mode 100644 block/blk-throttle.c

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index 9be0b56eaee1..6c9213ef15a1 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,18 @@ config BLK_DEV_INTEGRITY
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_DEV_THROTTLING
+	bool "Block layer bio throttling support"
+	depends on BLK_CGROUP=y && EXPERIMENTAL
+	default n
+	---help---
+	Block layer bio throttling support. It can be used to limit
+	the IO rate to a device. IO rate policies are per cgroup and
+	one needs to mount and use blkio cgroup controller for creating
+	cgroups and specifying per device IO rate policies.
+
+	See Documentation/cgroups/blkio-controller.txt for more information.
+
 endif # BLOCK
 
 config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 0bb499a739cd..c850d5ef80a2 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
+obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 8d07c1b7e701..797d5095eb83 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -382,6 +382,7 @@ void blk_sync_queue(struct request_queue *q)
 	del_timer_sync(&q->unplug_timer);
 	del_timer_sync(&q->timeout);
 	cancel_work_sync(&q->unplug_work);
+	throtl_shutdown_timer_wq(q);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -459,6 +460,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	if (q->elevator)
 		elevator_exit(q->elevator);
 
+	blk_throtl_exit(q);
+
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
@@ -515,6 +518,11 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 		return NULL;
 	}
 
+	if (blk_throtl_init(q)) {
+		kmem_cache_free(blk_requestq_cachep, q);
+		return NULL;
+	}
+
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
 	init_timer(&q->unplug_timer);
@@ -1522,6 +1530,15 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
+		blk_throtl_bio(q, &bio);
+
+		/*
+		 * If bio = NULL, bio has been throttled and will be submitted
+		 * later.
+		 */
+		if (!bio)
+			break;
+
 		trace_block_bio_queue(q, bio);
 
 		ret = q->make_request_fn(q, bio);
@@ -2580,6 +2597,13 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+			struct delayed_work *dwork, unsigned long delay)
+{
+	return queue_delayed_work(kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
new file mode 100644
index 000000000000..4b492011e0de
--- /dev/null
+++ b/block/blk-throttle.c
@@ -0,0 +1,909 @@
+/*
+ * Interface for controlling IO bandwidth on a request queue
+ *
+ * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/blktrace_api.h>
+#include "blk-cgroup.h"
+
+/* Max dispatch from a group in 1 round */
+static int throtl_grp_quantum = 8;
+
+/* Total max dispatch from all groups in one round */
+static int throtl_quantum = 32;
+
+/* Throttling is performed over 100ms slice and after that slice is renewed */
+static unsigned long throtl_slice = HZ/10;	/* 100 ms */
+
+struct throtl_rb_root {
+	struct rb_root rb;
+	struct rb_node *left;
+	unsigned int count;
+	unsigned long min_disptime;
+};
+
+#define THROTL_RB_ROOT	(struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \
+			.count = 0, .min_disptime = 0}
+
+#define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
+
+struct throtl_grp {
+	/* List of throtl groups on the request queue*/
+	struct hlist_node tg_node;
+
+	/* active throtl group service_tree member */
+	struct rb_node rb_node;
+
+	/*
+	 * Dispatch time in jiffies. This is the estimated time when group
+	 * will unthrottle and is ready to dispatch more bio. It is used as
+	 * key to sort active groups in service tree.
+	 */
+	unsigned long disptime;
+
+	struct blkio_group blkg;
+	atomic_t ref;
+	unsigned int flags;
+
+	/* Two lists for READ and WRITE */
+	struct bio_list bio_lists[2];
+
+	/* Number of queued bios on READ and WRITE lists */
+	unsigned int nr_queued[2];
+
+	/* bytes per second rate limits */
+	uint64_t bps[2];
+
+	/* Number of bytes disptached in current slice */
+	uint64_t bytes_disp[2];
+
+	/* When did we start a new slice */
+	unsigned long slice_start[2];
+	unsigned long slice_end[2];
+};
+
+struct throtl_data
+{
+	/* List of throtl groups */
+	struct hlist_head tg_list;
+
+	/* service tree for active throtl groups */
+	struct throtl_rb_root tg_service_tree;
+
+	struct throtl_grp root_tg;
+	struct request_queue *queue;
+
+	/* Total Number of queued bios on READ and WRITE lists */
+	unsigned int nr_queued[2];
+
+	/*
+	 * number of total undestroyed groups (excluding root group)
+	 */
+	unsigned int nr_undestroyed_grps;
+
+	/* Work for dispatching throttled bios */
+	struct delayed_work throtl_work;
+};
+
+enum tg_state_flags {
+	THROTL_TG_FLAG_on_rr = 0,	/* on round-robin busy list */
+};
+
+#define THROTL_TG_FNS(name)						\
+static inline void throtl_mark_tg_##name(struct throtl_grp *tg)		\
+{									\
+	(tg)->flags |= (1 << THROTL_TG_FLAG_##name);			\
+}									\
+static inline void throtl_clear_tg_##name(struct throtl_grp *tg)	\
+{									\
+	(tg)->flags &= ~(1 << THROTL_TG_FLAG_##name);			\
+}									\
+static inline int throtl_tg_##name(const struct throtl_grp *tg)		\
+{									\
+	return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0;	\
+}
+
+THROTL_TG_FNS(on_rr);
+
+#define throtl_log_tg(td, tg, fmt, args...)				\
+	blk_add_trace_msg((td)->queue, "throtl %s " fmt,		\
+				blkg_path(&(tg)->blkg), ##args);      	\
+
+#define throtl_log(td, fmt, args...)	\
+	blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
+
+static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
+{
+	if (blkg)
+		return container_of(blkg, struct throtl_grp, blkg);
+
+	return NULL;
+}
+
+static inline int total_nr_queued(struct throtl_data *td)
+{
+	return (td->nr_queued[0] + td->nr_queued[1]);
+}
+
+static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
+{
+	atomic_inc(&tg->ref);
+	return tg;
+}
+
+static void throtl_put_tg(struct throtl_grp *tg)
+{
+	BUG_ON(atomic_read(&tg->ref) <= 0);
+	if (!atomic_dec_and_test(&tg->ref))
+		return;
+	kfree(tg);
+}
+
+static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
+			struct cgroup *cgroup)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+	struct throtl_grp *tg = NULL;
+	void *key = td;
+	struct backing_dev_info *bdi = &td->queue->backing_dev_info;
+	unsigned int major, minor;
+
+	/*
+	 * TODO: Speed up blkiocg_lookup_group() by maintaining a radix
+	 * tree of blkg (instead of traversing through hash list all
+	 * the time.
+	 */
+	tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
+
+	/* Fill in device details for root group */
+	if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+		tg->blkg.dev = MKDEV(major, minor);
+		goto done;
+	}
+
+	if (tg)
+		goto done;
+
+	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
+	if (!tg)
+		goto done;
+
+	INIT_HLIST_NODE(&tg->tg_node);
+	RB_CLEAR_NODE(&tg->rb_node);
+	bio_list_init(&tg->bio_lists[0]);
+	bio_list_init(&tg->bio_lists[1]);
+
+	/*
+	 * Take the initial reference that will be released on destroy
+	 * This can be thought of a joint reference by cgroup and
+	 * request queue which will be dropped by either request queue
+	 * exit or cgroup deletion path depending on who is exiting first.
+	 */
+	atomic_set(&tg->ref, 1);
+
+	/* Add group onto cgroup list */
+	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+	blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
+				MKDEV(major, minor), BLKIO_POLICY_THROTL);
+
+	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
+	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
+
+	hlist_add_head(&tg->tg_node, &td->tg_list);
+	td->nr_undestroyed_grps++;
+done:
+	return tg;
+}
+
+static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+{
+	struct cgroup *cgroup;
+	struct throtl_grp *tg = NULL;
+
+	rcu_read_lock();
+	cgroup = task_cgroup(current, blkio_subsys_id);
+	tg = throtl_find_alloc_tg(td, cgroup);
+	if (!tg)
+		tg = &td->root_tg;
+	rcu_read_unlock();
+	return tg;
+}
+
+static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root)
+{
+	/* Service tree is empty */
+	if (!root->count)
+		return NULL;
+
+	if (!root->left)
+		root->left = rb_first(&root->rb);
+
+	if (root->left)
+		return rb_entry_tg(root->left);
+
+	return NULL;
+}
+
+static void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+	rb_erase(n, root);
+	RB_CLEAR_NODE(n);
+}
+
+static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root)
+{
+	if (root->left == n)
+		root->left = NULL;
+	rb_erase_init(n, &root->rb);
+	--root->count;
+}
+
+static void update_min_dispatch_time(struct throtl_rb_root *st)
+{
+	struct throtl_grp *tg;
+
+	tg = throtl_rb_first(st);
+	if (!tg)
+		return;
+
+	st->min_disptime = tg->disptime;
+}
+
+static void
+tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg)
+{
+	struct rb_node **node = &st->rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct throtl_grp *__tg;
+	unsigned long key = tg->disptime;
+	int left = 1;
+
+	while (*node != NULL) {
+		parent = *node;
+		__tg = rb_entry_tg(parent);
+
+		if (time_before(key, __tg->disptime))
+			node = &parent->rb_left;
+		else {
+			node = &parent->rb_right;
+			left = 0;
+		}
+	}
+
+	if (left)
+		st->left = &tg->rb_node;
+
+	rb_link_node(&tg->rb_node, parent, node);
+	rb_insert_color(&tg->rb_node, &st->rb);
+}
+
+static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
+{
+	struct throtl_rb_root *st = &td->tg_service_tree;
+
+	tg_service_tree_add(st, tg);
+	throtl_mark_tg_on_rr(tg);
+	st->count++;
+}
+
+static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
+{
+	if (!throtl_tg_on_rr(tg))
+		__throtl_enqueue_tg(td, tg);
+}
+
+static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
+{
+	throtl_rb_erase(&tg->rb_node, &td->tg_service_tree);
+	throtl_clear_tg_on_rr(tg);
+}
+
+static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
+{
+	if (throtl_tg_on_rr(tg))
+		__throtl_dequeue_tg(td, tg);
+}
+
+static void throtl_schedule_next_dispatch(struct throtl_data *td)
+{
+	struct throtl_rb_root *st = &td->tg_service_tree;
+
+	/*
+	 * If there are more bios pending, schedule more work.
+	 */
+	if (!total_nr_queued(td))
+		return;
+
+	BUG_ON(!st->count);
+
+	update_min_dispatch_time(st);
+
+	if (time_before_eq(st->min_disptime, jiffies))
+		throtl_schedule_delayed_work(td->queue, 0);
+	else
+		throtl_schedule_delayed_work(td->queue,
+				(st->min_disptime - jiffies));
+}
+
+static inline void
+throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+{
+	tg->bytes_disp[rw] = 0;
+	tg->slice_start[rw] = jiffies;
+	tg->slice_end[rw] = jiffies + throtl_slice;
+	throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu",
+			rw == READ ? 'R' : 'W', tg->slice_start[rw],
+			tg->slice_end[rw], jiffies);
+}
+
+static inline void throtl_extend_slice(struct throtl_data *td,
+		struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
+{
+	tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
+	throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu",
+			rw == READ ? 'R' : 'W', tg->slice_start[rw],
+			tg->slice_end[rw], jiffies);
+}
+
+/* Determine if previously allocated or extended slice is complete or not */
+static bool
+throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+{
+	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
+		return 0;
+
+	return 1;
+}
+
+/* Trim the used slices and adjust slice start accordingly */
+static inline void
+throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+{
+	unsigned long nr_slices, bytes_trim, time_elapsed;
+
+	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
+
+	/*
+	 * If bps are unlimited (-1), then time slice don't get
+	 * renewed. Don't try to trim the slice if slice is used. A new
+	 * slice will start when appropriate.
+	 */
+	if (throtl_slice_used(td, tg, rw))
+		return;
+
+	time_elapsed = jiffies - tg->slice_start[rw];
+
+	nr_slices = time_elapsed / throtl_slice;
+
+	if (!nr_slices)
+		return;
+
+	bytes_trim = (tg->bps[rw] * throtl_slice * nr_slices)/HZ;
+
+	if (!bytes_trim)
+		return;
+
+	if (tg->bytes_disp[rw] >= bytes_trim)
+		tg->bytes_disp[rw] -= bytes_trim;
+	else
+		tg->bytes_disp[rw] = 0;
+
+	tg->slice_start[rw] += nr_slices * throtl_slice;
+
+	throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%lu"
+			" start=%lu end=%lu jiffies=%lu",
+			rw == READ ? 'R' : 'W', nr_slices, bytes_trim,
+			tg->slice_start[rw], tg->slice_end[rw], jiffies);
+}
+
+/*
+ * Returns whether one can dispatch a bio or not. Also returns approx number
+ * of jiffies to wait before this bio is with-in IO rate and can be dispatched
+ */
+static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
+				struct bio *bio, unsigned long *wait)
+{
+	bool rw = bio_data_dir(bio);
+	u64 bytes_allowed, extra_bytes;
+	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+
+	/*
+	 * Currently whole state machine of group depends on first bio
+	 * queued in the group bio list. So one should not be calling
+	 * this function with a different bio if there are other bios
+	 * queued.
+	 */
+	BUG_ON(tg->nr_queued[rw] && bio != bio_list_peek(&tg->bio_lists[rw]));
+
+	/* If tg->bps = -1, then BW is unlimited */
+	if (tg->bps[rw] == -1) {
+		if (wait)
+			*wait = 0;
+		return 1;
+	}
+
+	/*
+	 * If previous slice expired, start a new one otherwise renew/extend
+	 * existing slice to make sure it is at least throtl_slice interval
+	 * long since now.
+	 */
+	if (throtl_slice_used(td, tg, rw))
+		throtl_start_new_slice(td, tg, rw);
+	else {
+		if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
+			throtl_extend_slice(td, tg, rw, jiffies + throtl_slice);
+	}
+
+	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
+
+	/* Slice has just started. Consider one slice interval */
+	if (!jiffy_elapsed)
+		jiffy_elapsed_rnd = throtl_slice;
+
+	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice);
+
+	bytes_allowed = (tg->bps[rw] * jiffies_to_msecs(jiffy_elapsed_rnd))
+				/ MSEC_PER_SEC;
+
+	if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) {
+		if (wait)
+			*wait = 0;
+		return 1;
+	}
+
+	/* Calc approx time to dispatch */
+	extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed;
+	jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
+
+	if (!jiffy_wait)
+		jiffy_wait = 1;
+
+	/*
+	 * This wait time is without taking into consideration the rounding
+	 * up we did. Add that time also.
+	 */
+	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
+
+	if (wait)
+		*wait = jiffy_wait;
+
+	if (time_before(tg->slice_end[rw], jiffies + jiffy_wait))
+		throtl_extend_slice(td, tg, rw, jiffies + jiffy_wait);
+
+	return 0;
+}
+
+static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
+{
+	bool rw = bio_data_dir(bio);
+	bool sync = bio->bi_rw & REQ_SYNC;
+
+	/* Charge the bio to the group */
+	tg->bytes_disp[rw] += bio->bi_size;
+
+	/*
+	 * TODO: This will take blkg->stats_lock. Figure out a way
+	 * to avoid this cost.
+	 */
+	blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
+
+}
+
+static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
+			struct bio *bio)
+{
+	bool rw = bio_data_dir(bio);
+
+	bio_list_add(&tg->bio_lists[rw], bio);
+	/* Take a bio reference on tg */
+	throtl_ref_get_tg(tg);
+	tg->nr_queued[rw]++;
+	td->nr_queued[rw]++;
+	throtl_enqueue_tg(td, tg);
+}
+
+static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg)
+{
+	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
+	struct bio *bio;
+
+	if ((bio = bio_list_peek(&tg->bio_lists[READ])))
+		tg_may_dispatch(td, tg, bio, &read_wait);
+
+	if ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
+		tg_may_dispatch(td, tg, bio, &write_wait);
+
+	min_wait = min(read_wait, write_wait);
+	disptime = jiffies + min_wait;
+
+	/*
+	 * If group is already on active tree, then update dispatch time
+	 * only if it is lesser than existing dispatch time. Otherwise
+	 * always update the dispatch time
+	 */
+
+	if (throtl_tg_on_rr(tg) && time_before(disptime, tg->disptime))
+		return;
+
+	/* Update dispatch time */
+	throtl_dequeue_tg(td, tg);
+	tg->disptime = disptime;
+	throtl_enqueue_tg(td, tg);
+}
+
+static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg,
+				bool rw, struct bio_list *bl)
+{
+	struct bio *bio;
+
+	bio = bio_list_pop(&tg->bio_lists[rw]);
+	tg->nr_queued[rw]--;
+	/* Drop bio reference on tg */
+	throtl_put_tg(tg);
+
+	BUG_ON(td->nr_queued[rw] <= 0);
+	td->nr_queued[rw]--;
+
+	throtl_charge_bio(tg, bio);
+	bio_list_add(bl, bio);
+	bio->bi_rw |= REQ_THROTTLED;
+
+	throtl_trim_slice(td, tg, rw);
+}
+
+static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
+				struct bio_list *bl)
+{
+	unsigned int nr_reads = 0, nr_writes = 0;
+	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
+	unsigned int max_nr_writes = throtl_grp_quantum - nr_reads;
+	struct bio *bio;
+
+	/* Try to dispatch 75% READS and 25% WRITES */
+
+	while ((bio = bio_list_peek(&tg->bio_lists[READ]))
+		&& tg_may_dispatch(td, tg, bio, NULL)) {
+
+		tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl);
+		nr_reads++;
+
+		if (nr_reads >= max_nr_reads)
+			break;
+	}
+
+	while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))
+		&& tg_may_dispatch(td, tg, bio, NULL)) {
+
+		tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl);
+		nr_writes++;
+
+		if (nr_writes >= max_nr_writes)
+			break;
+	}
+
+	return nr_reads + nr_writes;
+}
+
+static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
+{
+	unsigned int nr_disp = 0;
+	struct throtl_grp *tg;
+	struct throtl_rb_root *st = &td->tg_service_tree;
+
+	while (1) {
+		tg = throtl_rb_first(st);
+
+		if (!tg)
+			break;
+
+		if (time_before(jiffies, tg->disptime))
+			break;
+
+		throtl_dequeue_tg(td, tg);
+
+		nr_disp += throtl_dispatch_tg(td, tg, bl);
+
+		if (tg->nr_queued[0] || tg->nr_queued[1]) {
+			tg_update_disptime(td, tg);
+			throtl_enqueue_tg(td, tg);
+		}
+
+		if (nr_disp >= throtl_quantum)
+			break;
+	}
+
+	return nr_disp;
+}
+
+/* Dispatch throttled bios. Should be called without queue lock held. */
+static int throtl_dispatch(struct request_queue *q)
+{
+	struct throtl_data *td = q->td;
+	unsigned int nr_disp = 0;
+	struct bio_list bio_list_on_stack;
+	struct bio *bio;
+
+	spin_lock_irq(q->queue_lock);
+
+	if (!total_nr_queued(td))
+		goto out;
+
+	bio_list_init(&bio_list_on_stack);
+
+	throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u",
+			total_nr_queued(td), td->nr_queued[READ],
+			td->nr_queued[WRITE]);
+
+	nr_disp = throtl_select_dispatch(td, &bio_list_on_stack);
+
+	if (nr_disp)
+		throtl_log(td, "bios disp=%u", nr_disp);
+
+	throtl_schedule_next_dispatch(td);
+out:
+	spin_unlock_irq(q->queue_lock);
+
+	/*
+	 * If we dispatched some requests, unplug the queue to make sure
+	 * immediate dispatch
+	 */
+	if (nr_disp) {
+		while((bio = bio_list_pop(&bio_list_on_stack)))
+			generic_make_request(bio);
+		blk_unplug(q);
+	}
+	return nr_disp;
+}
+
+void blk_throtl_work(struct work_struct *work)
+{
+	struct throtl_data *td = container_of(work, struct throtl_data,
+					throtl_work.work);
+	struct request_queue *q = td->queue;
+
+	throtl_dispatch(q);
+}
+
+/* Call with queue lock held */
+void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay)
+{
+
+	struct throtl_data *td = q->td;
+	struct delayed_work *dwork = &td->throtl_work;
+
+	if (total_nr_queued(td) > 0) {
+		/*
+		 * We might have a work scheduled to be executed in future.
+		 * Cancel that and schedule a new one.
+		 */
+		__cancel_delayed_work(dwork);
+		kblockd_schedule_delayed_work(q, dwork, delay);
+		throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
+				delay, jiffies);
+	}
+}
+EXPORT_SYMBOL(throtl_schedule_delayed_work);
+
+static void
+throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
+{
+	/* Something wrong if we are trying to remove same group twice */
+	BUG_ON(hlist_unhashed(&tg->tg_node));
+
+	hlist_del_init(&tg->tg_node);
+
+	/*
+	 * Put the reference taken at the time of creation so that when all
+	 * queues are gone, group can be destroyed.
+	 */
+	throtl_put_tg(tg);
+	td->nr_undestroyed_grps--;
+}
+
+static void throtl_release_tgs(struct throtl_data *td)
+{
+	struct hlist_node *pos, *n;
+	struct throtl_grp *tg;
+
+	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
+		/*
+		 * If cgroup removal path got to blk_group first and removed
+		 * it from cgroup list, then it will take care of destroying
+		 * cfqg also.
+		 */
+		if (!blkiocg_del_blkio_group(&tg->blkg))
+			throtl_destroy_tg(td, tg);
+	}
+}
+
+static void throtl_td_free(struct throtl_data *td)
+{
+	kfree(td);
+}
+
+/*
+ * Blk cgroup controller notification saying that blkio_group object is being
+ * delinked as associated cgroup object is going away. That also means that
+ * no new IO will come in this group. So get rid of this group as soon as
+ * any pending IO in the group is finished.
+ *
+ * This function is called under rcu_read_lock(). key is the rcu protected
+ * pointer. That means "key" is a valid throtl_data pointer as long as we are
+ * rcu read lock.
+ *
+ * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
+ * it should not be NULL as even if queue was going away, cgroup deltion
+ * path got to it first.
+ */
+void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
+{
+	unsigned long flags;
+	struct throtl_data *td = key;
+
+	spin_lock_irqsave(td->queue->queue_lock, flags);
+	throtl_destroy_tg(td, tg_of_blkg(blkg));
+	spin_unlock_irqrestore(td->queue->queue_lock, flags);
+}
+
+static void throtl_update_blkio_group_read_bps (struct blkio_group *blkg,
+			u64 read_bps)
+{
+	tg_of_blkg(blkg)->bps[READ] = read_bps;
+}
+
+static void throtl_update_blkio_group_write_bps (struct blkio_group *blkg,
+			u64 write_bps)
+{
+	tg_of_blkg(blkg)->bps[WRITE] = write_bps;
+}
+
+void throtl_shutdown_timer_wq(struct request_queue *q)
+{
+	struct throtl_data *td = q->td;
+
+	cancel_delayed_work_sync(&td->throtl_work);
+}
+
+static struct blkio_policy_type blkio_policy_throtl = {
+	.ops = {
+		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
+		.blkio_update_group_read_bps_fn =
+					throtl_update_blkio_group_read_bps,
+		.blkio_update_group_write_bps_fn =
+					throtl_update_blkio_group_write_bps,
+	},
+};
+
+int blk_throtl_bio(struct request_queue *q, struct bio **biop)
+{
+	struct throtl_data *td = q->td;
+	struct throtl_grp *tg;
+	struct bio *bio = *biop;
+	bool rw = bio_data_dir(bio), update_disptime = true;
+
+	if (bio->bi_rw & REQ_THROTTLED) {
+		bio->bi_rw &= ~REQ_THROTTLED;
+		return 0;
+	}
+
+	spin_lock_irq(q->queue_lock);
+	tg = throtl_get_tg(td);
+
+	if (tg->nr_queued[rw]) {
+		/*
+		 * There is already another bio queued in same dir. No
+		 * need to update dispatch time.
+		 */
+		update_disptime = false;
+		goto queue_bio;
+	}
+
+	/* Bio is with-in rate limit of group */
+	if (tg_may_dispatch(td, tg, bio, NULL)) {
+		throtl_charge_bio(tg, bio);
+		goto out;
+	}
+
+queue_bio:
+	throtl_log_tg(td, tg, "[%c] bio. disp=%u sz=%u bps=%llu"
+			" queued=%d/%d", rw == READ ? 'R' : 'W',
+			tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
+			tg->nr_queued[READ], tg->nr_queued[WRITE]);
+
+	throtl_add_bio_tg(q->td, tg, bio);
+	*biop = NULL;
+
+	if (update_disptime) {
+		tg_update_disptime(td, tg);
+		throtl_schedule_next_dispatch(td);
+	}
+
+out:
+	spin_unlock_irq(q->queue_lock);
+	return 0;
+}
+
+int blk_throtl_init(struct request_queue *q)
+{
+	struct throtl_data *td;
+	struct throtl_grp *tg;
+
+	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
+	if (!td)
+		return -ENOMEM;
+
+	INIT_HLIST_HEAD(&td->tg_list);
+	td->tg_service_tree = THROTL_RB_ROOT;
+
+	/* Init root group */
+	tg = &td->root_tg;
+	INIT_HLIST_NODE(&tg->tg_node);
+	RB_CLEAR_NODE(&tg->rb_node);
+	bio_list_init(&tg->bio_lists[0]);
+	bio_list_init(&tg->bio_lists[1]);
+
+	/* Practically unlimited BW */
+	tg->bps[0] = tg->bps[1] = -1;
+	atomic_set(&tg->ref, 1);
+
+	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
+
+	rcu_read_lock();
+	blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td,
+					0, BLKIO_POLICY_THROTL);
+	rcu_read_unlock();
+
+	/* Attach throtl data to request queue */
+	td->queue = q;
+	q->td = td;
+	return 0;
+}
+
+void blk_throtl_exit(struct request_queue *q)
+{
+	struct throtl_data *td = q->td;
+	bool wait = false;
+
+	BUG_ON(!td);
+
+	throtl_shutdown_timer_wq(q);
+
+	spin_lock_irq(q->queue_lock);
+	throtl_release_tgs(td);
+	blkiocg_del_blkio_group(&td->root_tg.blkg);
+
+	/* If there are other groups */
+	if (td->nr_undestroyed_grps >= 1)
+		wait = true;
+
+	spin_unlock_irq(q->queue_lock);
+
+	/*
+	 * Wait for tg->blkg->key accessors to exit their grace periods.
+	 * Do this wait only if there are other undestroyed groups out
+	 * there (other than root group). This can happen if cgroup deletion
+	 * path claimed the responsibility of cleaning up a group before
+	 * queue cleanup code get to the group.
+	 *
+	 * Do not call synchronize_rcu() unconditionally as there are drivers
+	 * which create/delete request queue hundreds of times during scan/boot
+	 * and synchronize_rcu() can take significant time and slow down boot.
+	 */
+	if (wait)
+		synchronize_rcu();
+	throtl_td_free(td);
+}
+
+static int __init throtl_init(void)
+{
+	blkio_policy_register(&blkio_policy_throtl);
+	return 0;
+}
+
+module_init(throtl_init);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ca83a97c9715..10a0c291b55a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -130,6 +130,8 @@ enum rq_flag_bits {
 	/* bio only flags */
 	__REQ_UNPLUG,		/* unplug the immediately after submission */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
+	__REQ_THROTTLED,	/* This bio has already been subjected to
+				 * throttling rules. Don't do it again. */
 
 	/* request only flags */
 	__REQ_SORTED,		/* elevator knows about this request */
@@ -172,6 +174,7 @@ enum rq_flag_bits {
 
 #define REQ_UNPLUG		(1 << __REQ_UNPLUG)
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
+#define REQ_THROTTLED		(1 << __REQ_THROTTLED)
 
 #define REQ_SORTED		(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER		(1 << __REQ_SOFTBARRIER)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 780824edac16..1341df5806df 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -371,6 +371,11 @@ struct request_queue
 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
 #endif
+
+#ifdef CONFIG_BLK_DEV_THROTTLING
+	/* Throttle data */
+	struct throtl_data *td;
+#endif
 };
 
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
@@ -1131,6 +1136,7 @@ static inline void put_dev_sector(Sector p)
 
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
 
 #ifdef CONFIG_BLK_CGROUP
 /*
@@ -1174,6 +1180,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 }
 #endif
 
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern int blk_throtl_init(struct request_queue *q);
+extern void blk_throtl_exit(struct request_queue *q);
+extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
+extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
+extern void throtl_shutdown_timer_wq(struct request_queue *q);
+#else /* CONFIG_BLK_DEV_THROTTLING */
+static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
+{
+	return 0;
+}
+
+static inline int blk_throtl_init(struct request_queue *q) { return 0; }
+static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
+static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
+static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
+#endif /* CONFIG_BLK_DEV_THROTTLING */
+
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1cbadd9..950ba26f7233 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -634,11 +634,14 @@ config BLK_CGROUP
 
 	Currently, CFQ IO scheduler uses it to recognize task groups and
 	control disk bandwidth allocation (proportional time slice allocation)
-	to such task groups.
+	to such task groups. It is also used by bio throttling logic in
+	block layer to implement upper limit in IO rates on a device.
 
 	This option only enables generic Block IO controller infrastructure.
-	One needs to also enable actual IO controlling logic in CFQ for it
-	to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y).
+	One needs to also enable actual IO controlling logic/policy. For
+	enabling proportional weight division of disk bandwidth in CFQ seti
+	CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set
+	CONFIG_BLK_THROTTLE=y.
 
 	See Documentation/cgroups/blkio-controller.txt for more information.
 
-- 
cgit v1.2.3


From 1ec5584e3edf9c4bf2c88c846534d19cf986ba11 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 15 Aug 2010 21:50:52 +0200
Subject: libfs: use generic_file_llseek for simple_attr

Simple attribute files need to be seekable to
allow resetting the file for another read.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..c8effc81f0c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2450,6 +2450,7 @@ static const struct file_operations __fops = {				\
 	.release = simple_attr_release,					\
 	.read	 = simple_attr_read,					\
 	.write	 = simple_attr_write,					\
+	.llseek	 = generic_file_llseek,					\
 };
 
 static inline void __attribute__((format(printf, 1, 2)))
-- 
cgit v1.2.3


From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 16 Sep 2010 20:51:46 +0200
Subject: block: remove BLKDEV_IFL_WAIT

All the blkdev_issue_* helpers can only sanely be used for synchronous
caller.  To issue cache flushes or barriers asynchronously the caller needs
to set up a bio by itself with a completion callback to move the asynchronous
state machine ahead.  So drop the BLKDEV_IFL_WAIT flag that is always
specified when calling blkdev_issue_* and also remove the now unused flags
argument to blkdev_issue_flush and blkdev_issue_zeroout.  For
blkdev_issue_discard we need to keep it for the secure discard flag, which
gains a more descriptive name and loses the bitops vs flag confusion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c                  | 25 +++++++++++--------------
 block/blk-lib.c                    | 21 ++++++++-------------
 block/ioctl.c                      |  4 ++--
 drivers/block/drbd/drbd_int.h      |  3 +--
 drivers/block/drbd/drbd_receiver.c |  2 +-
 fs/block_dev.c                     |  2 +-
 fs/btrfs/extent-tree.c             |  3 +--
 fs/ext3/fsync.c                    |  3 +--
 fs/ext4/fsync.c                    |  5 ++---
 fs/ext4/mballoc.c                  |  3 +--
 fs/fat/fatent.c                    |  3 +--
 fs/gfs2/rgrp.c                     |  5 ++---
 fs/jbd2/checkpoint.c               |  3 +--
 fs/jbd2/commit.c                   |  6 ++----
 fs/nilfs2/the_nilfs.c              |  4 ++--
 fs/reiserfs/file.c                 |  3 +--
 fs/xfs/linux-2.6/xfs_super.c       |  3 +--
 include/linux/blkdev.h             | 14 +++++---------
 mm/swapfile.c                      |  6 +++---
 19 files changed, 47 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 62b7df9bca9d..54b123d6563e 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -205,7 +205,6 @@ static void bio_end_flush(struct bio *bio, int err)
  * @bdev:	blockdev to issue flush for
  * @gfp_mask:	memory allocation flags (for bio_alloc)
  * @error_sector:	error sector
- * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *    Issue a flush for the block device in question. Caller can supply
@@ -214,7 +213,7 @@ static void bio_end_flush(struct bio *bio, int err)
  *    request was pushed in some internal queue for later handling.
  */
 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
-		sector_t *error_sector, unsigned long flags)
+		sector_t *error_sector)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q;
@@ -240,21 +239,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_end_io = bio_end_flush;
 	bio->bi_bdev = bdev;
-	if (test_bit(BLKDEV_WAIT, &flags))
-		bio->bi_private = &wait;
+	bio->bi_private = &wait;
 
 	bio_get(bio);
 	submit_bio(WRITE_FLUSH, bio);
-	if (test_bit(BLKDEV_WAIT, &flags)) {
-		wait_for_completion(&wait);
-		/*
-		 * The driver must store the error location in ->bi_sector, if
-		 * it supports it. For non-stacked drivers, this should be
-		 * copied from blk_rq_pos(rq).
-		 */
-		if (error_sector)
-			*error_sector = bio->bi_sector;
-	}
+	wait_for_completion(&wait);
+
+	/*
+	 * The driver must store the error location in ->bi_sector, if
+	 * it supports it. For non-stacked drivers, this should be
+	 * copied from blk_rq_pos(rq).
+	 */
+	if (error_sector)
+               *error_sector = bio->bi_sector;
 
 	if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index fe2e6ed0f510..1a320d2406b0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -61,7 +61,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		max_discard_sectors &= ~(disc_sects - 1);
 	}
 
-	if (flags & BLKDEV_IFL_SECURE) {
+	if (flags & BLKDEV_DISCARD_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
 		type |= REQ_SECURE;
@@ -77,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
-		if (flags & BLKDEV_IFL_WAIT)
-			bio->bi_private = &wait;
+		bio->bi_private = &wait;
 
 		if (nr_sects > max_discard_sectors) {
 			bio->bi_size = max_discard_sectors << 9;
@@ -92,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_get(bio);
 		submit_bio(type, bio);
 
-		if (flags & BLKDEV_IFL_WAIT)
-			wait_for_completion(&wait);
+		wait_for_completion(&wait);
 
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
 			ret = -EOPNOTSUPP;
@@ -139,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
  * @sector:	start sector
  * @nr_sects:	number of sectors to write
  * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *  Generate and issue number of bios with zerofiled pages.
@@ -148,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
  */
 
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+			sector_t nr_sects, gfp_t gfp_mask)
 {
 	int ret;
 	struct bio *bio;
@@ -174,8 +171,7 @@ submit:
 		bio->bi_sector = sector;
 		bio->bi_bdev   = bdev;
 		bio->bi_end_io = bio_batch_end_io;
-		if (flags & BLKDEV_IFL_WAIT)
-			bio->bi_private = &bb;
+		bio->bi_private = &bb;
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -193,10 +189,9 @@ submit:
 		submit_bio(WRITE, bio);
 	}
 
-	if (flags & BLKDEV_IFL_WAIT)
-		/* Wait for bios in-flight */
-		while ( issued != atomic_read(&bb.done))
-			wait_for_completion(&wait);
+	/* Wait for bios in-flight */
+	while (issued != atomic_read(&bb.done))
+		wait_for_completion(&wait);
 
 	if (!test_bit(BIO_UPTODATE, &bb.flags))
 		/* One of bios in the batch was completed with error.*/
diff --git a/block/ioctl.c b/block/ioctl.c
index d8052f0dabd3..cb2b9099862b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev)
 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 			     uint64_t len, int secure)
 {
-	unsigned long flags = BLKDEV_IFL_WAIT;
+	unsigned long flags = 0;
 
 	if (start & 511)
 		return -EINVAL;
@@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 	if (start + len > (bdev->bd_inode->i_size >> 9))
 		return -EINVAL;
 	if (secure)
-		flags |= BLKDEV_IFL_SECURE;
+		flags |= BLKDEV_DISCARD_SECURE;
 	return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
 }
 
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 352441b0f92f..c2ef476f5711 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2321,8 +2321,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
 	if (test_bit(MD_NO_BARRIER, &mdev->flags))
 		return;
 
-	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
-			BLKDEV_IFL_WAIT);
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
 	if (r) {
 		set_bit(MD_NO_BARRIER, &mdev->flags);
 		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 081522d3c742..df15e7f0e7b7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -975,7 +975,7 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
 
 	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
 		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
-					NULL, BLKDEV_IFL_WAIT);
+					NULL);
 		if (rv) {
 			dev_err(DEV, "local disk flush failed with status %d\n", rv);
 			/* would rather check on EOPNOTSUPP, but that is not reliable.
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 50e8c8582faa..b737451e2e9d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -370,7 +370,7 @@ int blkdev_fsync(struct file *filp, int datasync)
 	 */
 	mutex_unlock(&bd_inode->i_mutex);
 
-	error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
+	error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
 	if (error == -EOPNOTSUPP)
 		error = 0;
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 43dc9ea9aef6..0b81ecdb101c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 static void btrfs_issue_discard(struct block_device *bdev,
 				u64 start, u64 len)
 {
-	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			BLKDEV_IFL_WAIT);
+	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d7e9f74dc3a6..09b13bb34c94 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync)
 	 * storage
 	 */
 	if (needs_barrier)
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
-				BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 	return ret;
 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 592adf2e546e..3f3ff5ee8f9d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync)
 		    (journal->j_fs_dev != journal->j_dev) &&
 		    (journal->j_flags & JBD2_BARRIER))
 			blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
-					NULL, BLKDEV_IFL_WAIT);
+					NULL);
 		ret = jbd2_log_wait_commit(journal, commit_tid);
 	} else if (journal->j_flags & JBD2_BARRIER)
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
-			BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 	return ret;
 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a22bfef3da95..19aa0d44d822 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2566,8 +2566,7 @@ static inline void ext4_issue_discard(struct super_block *sb,
 	discard_block = block + ext4_group_first_block_no(sb, block_group);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS,
-			       BLKDEV_IFL_WAIT);
+	ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 	if (ret == EOPNOTSUPP) {
 		ext4_warning(sb, "discard not supported, disabling");
 		clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index f9a0b7ae8648..b47d2c9f4fa1 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -578,8 +578,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
 				sb_issue_discard(sb,
 					fat_clus_to_blknr(sbi, first_cl),
 					nr_clus * sbi->sec_per_clus,
-					GFP_NOFS,
-					BLKDEV_IFL_WAIT);
+					GFP_NOFS, 0);
 
 				first_cl = cluster;
 			}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 379316472918..38b3ea1abacc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				if ((start + nr_sects) != blk) {
 					rv = blkdev_issue_discard(bdev, start,
 							    nr_sects, GFP_NOFS,
-							    BLKDEV_IFL_WAIT);
+							    0);
 					if (rv)
 						goto fail;
 					nr_sects = 0;
@@ -868,8 +868,7 @@ start_new_extent:
 		}
 	}
 	if (nr_sects) {
-		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
-					 BLKDEV_IFL_WAIT);
+		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
 		if (rv)
 			goto fail;
 	}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 5247e7ffdcb4..6571a056e55d 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	 */
 	if ((journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
-		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
-			BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f204e27f44d1..cb43c605cfaa 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -684,8 +684,7 @@ start_journal_io:
 	if (commit_transaction->t_flushed_data_blocks &&
 	    (journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
-		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
-			BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
 
 	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -810,8 +809,7 @@ wait_for_iobuf:
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
 	    journal->j_flags & JBD2_BARRIER) {
-		blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
-				   BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
 	}
 
 	if (err)
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 400b2caef4d8..d97310f07bef 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -774,7 +774,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
 			ret = blkdev_issue_discard(nilfs->ns_bdev,
 						   start * sects_per_block,
 						   nblocks * sects_per_block,
-						   GFP_NOFS, BLKDEV_IFL_WAIT);
+						   GFP_NOFS, 0);
 			if (ret < 0)
 				return ret;
 			nblocks = 0;
@@ -784,7 +784,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
 		ret = blkdev_issue_discard(nilfs->ns_bdev,
 					   start * sects_per_block,
 					   nblocks * sects_per_block,
-					   GFP_NOFS, BLKDEV_IFL_WAIT);
+					   GFP_NOFS, 0);
 	return ret;
 }
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6846371498b6..91f080cc76c8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync)
 	barrier_done = reiserfs_commit_for_inode(inode);
 	reiserfs_write_unlock(inode->i_sb);
 	if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
-		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, 
-			BLKDEV_IFL_WAIT);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 	if (barrier_done < 0)
 		return barrier_done;
 	return (err < 0) ? -EIO : 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 15c35b62ff14..5fa7a30cc3f0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -693,8 +693,7 @@ void
 xfs_blkdev_issue_flush(
 	xfs_buftarg_t		*buftarg)
 {
-	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
-			BLKDEV_IFL_WAIT);
+	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
 }
 
 STATIC void
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfcb3a610605..accbd0e5c893 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -867,18 +867,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 		return NULL;
 	return bqt->tag_index[tag];
 }
-enum{
-	BLKDEV_WAIT,	/* wait for completion */
-	BLKDEV_SECURE,	/* secure discard */
-};
-#define BLKDEV_IFL_WAIT		(1 << BLKDEV_WAIT)
-#define BLKDEV_IFL_SECURE	(1 << BLKDEV_SECURE)
-extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
-			unsigned long);
+
+#define BLKDEV_DISCARD_SECURE  0x01    /* secure discard */
+
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
+			sector_t nr_sects, gfp_t gfp_mask);
 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
 		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
 {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 68cda164dff6..e132e1708acc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -141,7 +141,7 @@ static int discard_swap(struct swap_info_struct *si)
 	nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
 	if (nr_blocks) {
 		err = blkdev_issue_discard(si->bdev, start_block,
-				nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
+				nr_blocks, GFP_KERNEL, 0);
 		if (err)
 			return err;
 		cond_resched();
@@ -152,7 +152,7 @@ static int discard_swap(struct swap_info_struct *si)
 		nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
 
 		err = blkdev_issue_discard(si->bdev, start_block,
-				nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
+				nr_blocks, GFP_KERNEL, 0);
 		if (err)
 			break;
 
@@ -191,7 +191,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-				    nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
+				    nr_blocks, GFP_NOIO, 0))
 				break;
 		}
 
-- 
cgit v1.2.3


From 9c376639297d3dd82d40e54c9cdca8da9dfc22f1 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 20 Aug 2010 15:13:59 -0700
Subject: include/net/cfg80211.h: wiphy_<level> messages use dev_printk

The output becomes:

[   41.261941] ieee80211 phy0: Selected rate control algorithm 'minstrel_ht'

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 37 ++++++++++++-------------------------
 net/wireless/core.c    | 49 -------------------------------------------------
 2 files changed, 12 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4c8c727d0cca..a0613ff62c97 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2558,49 +2558,36 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 /* wiphy_printk helpers, similar to dev_printk */
 
 #define wiphy_printk(level, wiphy, format, args...)		\
-	printk(level "%s: " format, wiphy_name(wiphy), ##args)
+	dev_printk(level, &(wiphy)->dev, format, ##args)
 #define wiphy_emerg(wiphy, format, args...)			\
-	wiphy_printk(KERN_EMERG, wiphy, format, ##args)
+	dev_emerg(&(wiphy)->dev, format, ##args)
 #define wiphy_alert(wiphy, format, args...)			\
-	wiphy_printk(KERN_ALERT, wiphy, format, ##args)
+	dev_alert(&(wiphy)->dev, format, ##args)
 #define wiphy_crit(wiphy, format, args...)			\
-	wiphy_printk(KERN_CRIT, wiphy, format, ##args)
+	dev_crit(&(wiphy)->dev, format, ##args)
 #define wiphy_err(wiphy, format, args...)			\
-	wiphy_printk(KERN_ERR, wiphy, format, ##args)
+	dev_err(&(wiphy)->dev, format, ##args)
 #define wiphy_warn(wiphy, format, args...)			\
-	wiphy_printk(KERN_WARNING, wiphy, format, ##args)
+	dev_warn(&(wiphy)->dev, format, ##args)
 #define wiphy_notice(wiphy, format, args...)			\
-	wiphy_printk(KERN_NOTICE, wiphy, format, ##args)
+	dev_notice(&(wiphy)->dev, format, ##args)
 #define wiphy_info(wiphy, format, args...)			\
-	wiphy_printk(KERN_INFO, wiphy, format, ##args)
+	dev_info(&(wiphy)->dev, format, ##args)
 
-int wiphy_debug(const struct wiphy *wiphy, const char *format, ...)
-	__attribute__ ((format (printf, 2, 3)));
-
-#if defined(DEBUG)
-#define wiphy_dbg(wiphy, format, args...)			\
+#define wiphy_debug(wiphy, format, args...)			\
 	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
+
 #define wiphy_dbg(wiphy, format, args...)			\
-	dynamic_pr_debug("%s: " format,	wiphy_name(wiphy), ##args)
-#else
-#define wiphy_dbg(wiphy, format, args...)				\
-({									\
-	if (0)								\
-		wiphy_printk(KERN_DEBUG, wiphy, format, ##args);	\
-	0;								\
-})
-#endif
+	dev_dbg(&(wiphy)->dev, format, ##args)
 
 #if defined(VERBOSE_DEBUG)
 #define wiphy_vdbg	wiphy_dbg
 #else
-
 #define wiphy_vdbg(wiphy, format, args...)				\
 ({									\
 	if (0)								\
 		wiphy_printk(KERN_DEBUG, wiphy, format, ##args);	\
-		0;							\
+	0;								\
 })
 #endif
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d52630bbab04..b8191cf86226 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -912,52 +912,3 @@ static void __exit cfg80211_exit(void)
 	destroy_workqueue(cfg80211_wq);
 }
 module_exit(cfg80211_exit);
-
-static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
-			   struct va_format *vaf)
-{
-	if (!wiphy)
-		return printk("%s(NULL wiphy *): %pV", level, vaf);
-
-	return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
-}
-
-int __wiphy_printk(const char *level, const struct wiphy *wiphy,
-		   const char *fmt, ...)
-{
-	struct va_format vaf;
-	va_list args;
-	int r;
-
-	va_start(args, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &args;
-
-	r = ___wiphy_printk(level, wiphy, &vaf);
-	va_end(args);
-
-	return r;
-}
-EXPORT_SYMBOL(__wiphy_printk);
-
-#define define_wiphy_printk_level(func, kern_level)		\
-int func(const struct wiphy *wiphy, const char *fmt, ...)	\
-{								\
-	struct va_format vaf;					\
-	va_list args;						\
-	int r;							\
-								\
-	va_start(args, fmt);					\
-								\
-	vaf.fmt = fmt;						\
-	vaf.va = &args;						\
-								\
-	r = ___wiphy_printk(kern_level, wiphy, &vaf);		\
-	va_end(args);						\
-								\
-	return r;						\
-}								\
-EXPORT_SYMBOL(func);
-
-define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
-- 
cgit v1.2.3


From 074ac8df9f93f2a35a356d92fd7f16cd846f0a03 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 16 Sep 2010 14:58:22 +0200
Subject: cfg80211/nl80211: introduce p2p device types

This adds P2P-STA and P2P-GO as device types so
we can distinguish between those and normal STA
or AP (respectively) type interfaces.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 net/wireless/core.c     |  2 ++
 net/wireless/mlme.c     |  3 ++-
 net/wireless/nl80211.c  | 56 ++++++++++++++++++++++++++++++++++---------------
 net/wireless/sme.c      |  9 +++++---
 net/wireless/util.c     | 15 ++++++++++---
 6 files changed, 65 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 31603e8b5581..f0518b0278a9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1020,6 +1020,8 @@ enum nl80211_attrs {
  * @NL80211_IFTYPE_WDS: wireless distribution interface
  * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
  * @NL80211_IFTYPE_MESH_POINT: mesh point
+ * @NL80211_IFTYPE_P2P_CLIENT: P2P client
+ * @NL80211_IFTYPE_P2P_GO: P2P group owner
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
  * @NUM_NL80211_IFTYPES: number of defined interface types
  *
@@ -1036,6 +1038,8 @@ enum nl80211_iftype {
 	NL80211_IFTYPE_WDS,
 	NL80211_IFTYPE_MONITOR,
 	NL80211_IFTYPE_MESH_POINT,
+	NL80211_IFTYPE_P2P_CLIENT,
+	NL80211_IFTYPE_P2P_GO,
 
 	/* keep last */
 	NUM_NL80211_IFTYPES,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b8191cf86226..ff9615a7ee7a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -726,6 +726,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			dev->ethtool_ops = &cfg80211_ethtool_ops;
 
 		if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
 		     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
 			dev->priv_flags |= IFF_DONT_BRIDGE;
 		break;
@@ -734,6 +735,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		case NL80211_IFTYPE_ADHOC:
 			cfg80211_leave_ibss(rdev, dev, true);
 			break;
+		case NL80211_IFTYPE_P2P_CLIENT:
 		case NL80211_IFTYPE_STATION:
 			wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 8515b1e5c578..46f371160896 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -882,7 +882,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 		if (!wdev->current_bss ||
 		    memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
 			   ETH_ALEN) != 0 ||
-		    (wdev->iftype == NL80211_IFTYPE_STATION &&
+		    ((wdev->iftype == NL80211_IFTYPE_STATION ||
+		      wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
 		     memcmp(wdev->current_bss->pub.bssid, mgmt->da,
 			    ETH_ALEN) != 0)) {
 			wdev_unlock(wdev);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1d6ef24254fe..f15b1af2c768 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -410,12 +410,14 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		if (!wdev->current_bss)
 			return -ENOLINK;
 		break;
 	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
 		if (wdev->sme_state != CFG80211_SME_CONNECTED)
 			return -ENOLINK;
 		break;
@@ -766,7 +768,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
 		wdev->iftype == NL80211_IFTYPE_AP ||
 		wdev->iftype == NL80211_IFTYPE_WDS ||
 		wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
-		wdev->iftype == NL80211_IFTYPE_MONITOR;
+		wdev->iftype == NL80211_IFTYPE_MONITOR ||
+		wdev->iftype == NL80211_IFTYPE_P2P_GO;
 }
 
 static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
@@ -1693,7 +1696,8 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1785,7 +1789,8 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -2128,10 +2133,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
 		/* disallow mesh-specific things */
 		if (params.plink_action)
 			err = -EINVAL;
 		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		/* disallow everything but AUTHORIZED flag */
 		if (params.plink_action)
@@ -2233,7 +2240,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		goto out_rtnl;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -2286,7 +2294,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -2660,7 +2669,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -3363,6 +3373,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 
 	switch (wdev->iftype) {
+	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		if (intbss == wdev->current_bss)
 			NLA_PUT_U32(msg, NL80211_BSS_STATUS,
@@ -3649,7 +3660,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -3804,7 +3816,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -3888,7 +3901,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -3954,7 +3968,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4332,7 +4347,8 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4408,7 +4424,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4496,7 +4513,8 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
 	pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
 	pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4541,7 +4559,8 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto out_rtnl;
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4823,7 +4842,8 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 		goto unlock_rtnl;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -4875,7 +4895,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -5093,7 +5114,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 		goto unlock_rdev;
 	}
 
-	if (wdev->iftype != NL80211_IFTYPE_STATION) {
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) {
 		err = -EOPNOTSUPP;
 		goto unlock_rdev;
 	}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a8c2d6b877ae..f161b9844542 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -411,7 +411,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
 		return;
 
 	if (wdev->sme_state != CFG80211_SME_CONNECTING)
@@ -548,7 +549,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
 		return;
 
 	if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -644,7 +646,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
 		return;
 
 	if (wdev->sme_state != CFG80211_SME_CONNECTED)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bca32eb8f446..fb5448f7d55a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -326,7 +326,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case cpu_to_le16(IEEE80211_FCTL_TODS):
 		if (unlikely(iftype != NL80211_IFTYPE_AP &&
-			     iftype != NL80211_IFTYPE_AP_VLAN))
+			     iftype != NL80211_IFTYPE_AP_VLAN &&
+			     iftype != NL80211_IFTYPE_P2P_GO))
 			return -1;
 		break;
 	case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -354,7 +355,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		break;
 	case cpu_to_le16(IEEE80211_FCTL_FROMDS):
 		if ((iftype != NL80211_IFTYPE_STATION &&
-		    iftype != NL80211_IFTYPE_MESH_POINT) ||
+		     iftype != NL80211_IFTYPE_P2P_CLIENT &&
+		     iftype != NL80211_IFTYPE_MESH_POINT) ||
 		    (is_multicast_ether_addr(dst) &&
 		     !compare_ether_addr(src, addr)))
 			return -1;
@@ -431,6 +433,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
 		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		/* DA BSSID SA */
 		memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -439,6 +442,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 		hdrlen = 24;
 		break;
 	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
 		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
 		/* BSSID SA DA */
 		memcpy(hdr.addr1, bssid, ETH_ALEN);
@@ -778,7 +782,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 
 	/* if it's part of a bridge, reject changing type to station/ibss */
 	if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
-	    (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
+	    (ntype == NL80211_IFTYPE_ADHOC ||
+	     ntype == NL80211_IFTYPE_STATION ||
+	     ntype == NL80211_IFTYPE_P2P_CLIENT))
 		return -EBUSY;
 
 	if (ntype != otype) {
@@ -789,6 +795,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			cfg80211_leave_ibss(rdev, dev, false);
 			break;
 		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_P2P_CLIENT:
 			cfg80211_disconnect(rdev, dev,
 					    WLAN_REASON_DEAUTH_LEAVING, true);
 			break;
@@ -817,9 +824,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			if (dev->ieee80211_ptr->use_4addr)
 				break;
 			/* fall through */
+		case NL80211_IFTYPE_P2P_CLIENT:
 		case NL80211_IFTYPE_ADHOC:
 			dev->priv_flags |= IFF_DONT_BRIDGE;
 			break;
+		case NL80211_IFTYPE_P2P_GO:
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_AP_VLAN:
 		case NL80211_IFTYPE_WDS:
-- 
cgit v1.2.3


From 2ca27bcff7127da1aa7dd39cd2a6f7cb187e327f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 16 Sep 2010 14:58:23 +0200
Subject: mac80211: add p2p device type support

When a driver advertises p2p device support,
mac80211 will handle it, but internally it will
rewrite the interface type to STA/AP rather than
P2P-STA/GO since otherwise a lot of paths need
to be touched that are otherwise identical. A
p2p boolean tells drivers whether or not a given
interface will be used for p2p or not.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 15 +++++++++++----
 include/net/mac80211.h                | 27 ++++++++++++++++++++++++++-
 net/mac80211/cfg.c                    | 25 ++++++++++++++++++-------
 net/mac80211/driver-ops.h             |  6 +++---
 net/mac80211/driver-trace.h           | 21 +++++++++++++--------
 net/mac80211/iface.c                  | 29 ++++++++++++++++++++++++++---
 net/mac80211/main.c                   | 15 +++++++++++++++
 net/mac80211/rx.c                     |  4 +---
 net/mac80211/util.c                   | 18 ++++--------------
 9 files changed, 117 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 92b486d46eb9..7eaaa3bab547 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -595,7 +595,8 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
 					struct ieee80211_vif *vif)
 {
 	wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-		    __func__, vif->type, vif->addr);
+		    __func__, ieee80211_vif_type_p2p(vif),
+		    vif->addr);
 	hwsim_set_magic(vif);
 	return 0;
 }
@@ -603,11 +604,14 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
 
 static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw,
 					   struct ieee80211_vif *vif,
-					   enum nl80211_iftype newtype)
+					   enum nl80211_iftype newtype,
+					   bool newp2p)
 {
+	newtype = ieee80211_iftype_p2p(newtype, newp2p);
 	wiphy_debug(hw->wiphy,
 		    "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
-		    __func__, vif->type, newtype, vif->addr);
+		    __func__, ieee80211_vif_type_p2p(vif),
+		    newtype, vif->addr);
 	hwsim_check_magic(vif);
 
 	return 0;
@@ -617,7 +621,8 @@ static void mac80211_hwsim_remove_interface(
 	struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 {
 	wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
-		    __func__, vif->type, vif->addr);
+		    __func__, ieee80211_vif_type_p2p(vif),
+		    vif->addr);
 	hwsim_check_magic(vif);
 	hwsim_clear_magic(vif);
 }
@@ -1310,6 +1315,8 @@ static int __init init_mac80211_hwsim(void)
 		hw->wiphy->interface_modes =
 			BIT(NL80211_IFTYPE_STATION) |
 			BIT(NL80211_IFTYPE_AP) |
+			BIT(NL80211_IFTYPE_P2P_CLIENT) |
+			BIT(NL80211_IFTYPE_P2P_GO) |
 			BIT(NL80211_IFTYPE_ADHOC) |
 			BIT(NL80211_IFTYPE_MESH_POINT);
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 19a5cb4a6582..12a49f0ba32c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -769,6 +769,8 @@ struct ieee80211_channel_switch {
  * @bss_conf: BSS configuration for this interface, either our own
  *	or the BSS we're associated to
  * @addr: address of this interface
+ * @p2p: indicates whether this AP or STA interface is a p2p
+ *	interface, i.e. a GO or p2p-sta respectively
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *).
  */
@@ -776,6 +778,7 @@ struct ieee80211_vif {
 	enum nl80211_iftype type;
 	struct ieee80211_bss_conf bss_conf;
 	u8 addr[ETH_ALEN];
+	bool p2p;
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
@@ -1701,7 +1704,7 @@ struct ieee80211_ops {
 			     struct ieee80211_vif *vif);
 	int (*change_interface)(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif,
-				enum nl80211_iftype new_type);
+				enum nl80211_iftype new_type, bool p2p);
 	void (*remove_interface)(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif);
 	int (*config)(struct ieee80211_hw *hw, u32 changed);
@@ -2721,4 +2724,26 @@ conf_is_ht(struct ieee80211_conf *conf)
 	return conf->channel_type != NL80211_CHAN_NO_HT;
 }
 
+static inline enum nl80211_iftype
+ieee80211_iftype_p2p(enum nl80211_iftype type, bool p2p)
+{
+	if (p2p) {
+		switch (type) {
+		case NL80211_IFTYPE_STATION:
+			return NL80211_IFTYPE_P2P_CLIENT;
+		case NL80211_IFTYPE_AP:
+			return NL80211_IFTYPE_P2P_GO;
+		default:
+			break;
+		}
+	}
+	return type;
+}
+
+static inline enum nl80211_iftype
+ieee80211_vif_type_p2p(struct ieee80211_vif *vif)
+{
+	return ieee80211_iftype_p2p(vif->type, vif->p2p);
+}
+
 #endif /* MAC80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 171e8ff8e028..c981604b71e6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1151,15 +1151,26 @@ static int ieee80211_scan(struct wiphy *wiphy,
 			  struct net_device *dev,
 			  struct cfg80211_scan_request *req)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (sdata->vif.type != NL80211_IFTYPE_STATION &&
-	    sdata->vif.type != NL80211_IFTYPE_ADHOC &&
-	    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
-	    (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon))
+	switch (ieee80211_vif_type_p2p(&sdata->vif)) {
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		break;
+	case NL80211_IFTYPE_P2P_GO:
+		if (sdata->local->ops->hw_scan)
+			break;
+		/* FIXME: implement NoA while scanning in software */
+		return -EOPNOTSUPP;
+	case NL80211_IFTYPE_AP:
+		if (sdata->u.ap.beacon)
+			return -EOPNOTSUPP;
+		break;
+	default:
 		return -EOPNOTSUPP;
+	}
 
 	return ieee80211_request_scan(sdata, req);
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 6064b7b09e01..16983825f8e8 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -56,14 +56,14 @@ static inline int drv_add_interface(struct ieee80211_local *local,
 
 static inline int drv_change_interface(struct ieee80211_local *local,
 				       struct ieee80211_sub_if_data *sdata,
-				       enum nl80211_iftype type)
+				       enum nl80211_iftype type, bool p2p)
 {
 	int ret;
 
 	might_sleep();
 
-	trace_drv_change_interface(local, sdata, type);
-	ret = local->ops->change_interface(&local->hw, &sdata->vif, type);
+	trace_drv_change_interface(local, sdata, type, p2p);
+	ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
 	trace_drv_return_int(local, ret);
 	return ret;
 }
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index f6f3d89e43fa..6831fb1641c8 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {}
 #define STA_PR_FMT	" sta:%pM"
 #define STA_PR_ARG	__entry->sta_addr
 
-#define VIF_ENTRY	__field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
+#define VIF_ENTRY	__field(enum nl80211_iftype, vif_type) __field(void *, sdata)	\
+			__field(bool, p2p)						\
 			__string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
-#define VIF_ASSIGN	__entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
+#define VIF_ASSIGN	__entry->vif_type = sdata->vif.type; __entry->sdata = sdata;	\
+			__entry->p2p = sdata->vif.p2p;					\
 			__assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
-#define VIF_PR_FMT	" vif:%s(%d)"
-#define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type
+#define VIF_PR_FMT	" vif:%s(%d%s)"
+#define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
 
 /*
  * Tracing for driver callbacks.
@@ -139,25 +141,28 @@ TRACE_EVENT(drv_add_interface,
 TRACE_EVENT(drv_change_interface,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-		 enum nl80211_iftype type),
+		 enum nl80211_iftype type, bool p2p),
 
-	TP_ARGS(local, sdata, type),
+	TP_ARGS(local, sdata, type, p2p),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
 		__field(u32, new_type)
+		__field(bool, new_p2p)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		__entry->new_type = type;
+		__entry->new_p2p = p2p;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT " new type:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type
+		LOCAL_PR_FMT  VIF_PR_FMT " new type:%d%s",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
+		__entry->new_p2p ? "/p2p" : ""
 	)
 );
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 95908aaa8a68..66785739dad3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -188,6 +188,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_P2P_GO:
 		/* cannot happen */
 		WARN_ON(1);
 		break;
@@ -844,6 +846,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 
 	/* and set some type-dependent values */
 	sdata->vif.type = type;
+	sdata->vif.p2p = false;
 	sdata->dev->netdev_ops = &ieee80211_dataif_ops;
 	sdata->wdev.iftype = type;
 
@@ -857,10 +860,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	INIT_WORK(&sdata->work, ieee80211_iface_work);
 
 	switch (type) {
+	case NL80211_IFTYPE_P2P_GO:
+		type = NL80211_IFTYPE_AP;
+		sdata->vif.type = type;
+		sdata->vif.p2p = true;
+		/* fall through */
 	case NL80211_IFTYPE_AP:
 		skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
 		INIT_LIST_HEAD(&sdata->u.ap.vlans);
 		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+		type = NL80211_IFTYPE_STATION;
+		sdata->vif.type = type;
+		sdata->vif.p2p = true;
+		/* fall through */
 	case NL80211_IFTYPE_STATION:
 		ieee80211_sta_setup_sdata(sdata);
 		break;
@@ -894,6 +907,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	int ret, err;
+	enum nl80211_iftype internal_type = type;
+	bool p2p = false;
 
 	ASSERT_RTNL();
 
@@ -926,11 +941,19 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 		 * code isn't prepared to handle).
 		 */
 		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+		p2p = true;
+		internal_type = NL80211_IFTYPE_STATION;
+		break;
+	case NL80211_IFTYPE_P2P_GO:
+		p2p = true;
+		internal_type = NL80211_IFTYPE_AP;
+		break;
 	default:
 		return -EBUSY;
 	}
 
-	ret = ieee80211_check_concurrent_iface(sdata, type);
+	ret = ieee80211_check_concurrent_iface(sdata, internal_type);
 	if (ret)
 		return ret;
 
@@ -938,7 +961,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_teardown_sdata(sdata->dev);
 
-	ret = drv_change_interface(local, sdata, type);
+	ret = drv_change_interface(local, sdata, internal_type, p2p);
 	if (ret)
 		type = sdata->vif.type;
 
@@ -957,7 +980,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 
 	ASSERT_RTNL();
 
-	if (type == sdata->vif.type)
+	if (type == ieee80211_vif_type_p2p(&sdata->vif))
 		return 0;
 
 	/* Setting ad-hoc mode on non-IBSS channel is not supported. */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7fb114856977..18fdeca43d98 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -459,6 +459,21 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
 			BIT(IEEE80211_STYPE_DEAUTH >> 4) |
 			BIT(IEEE80211_STYPE_ACTION >> 4),
 	},
+	[NL80211_IFTYPE_P2P_CLIENT] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
+			BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
+	},
+	[NL80211_IFTYPE_P2P_GO] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
+			BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
+			BIT(IEEE80211_STYPE_DISASSOC >> 4) |
+			BIT(IEEE80211_STYPE_AUTH >> 4) |
+			BIT(IEEE80211_STYPE_DEAUTH >> 4) |
+			BIT(IEEE80211_STYPE_ACTION >> 4),
+	},
 };
 
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ac205a33690f..c0368152b721 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2588,9 +2588,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 		if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
 			return 0;
 		break;
-	case NL80211_IFTYPE_MONITOR:
-	case NL80211_IFTYPE_UNSPECIFIED:
-	case NUM_NL80211_IFTYPES:
+	default:
 		/* should never get here */
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9f21a69f0917..737f4267c335 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -474,16 +474,10 @@ void ieee80211_iterate_active_interfaces(
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case NUM_NL80211_IFTYPES:
-		case NL80211_IFTYPE_UNSPECIFIED:
 		case NL80211_IFTYPE_MONITOR:
 		case NL80211_IFTYPE_AP_VLAN:
 			continue;
-		case NL80211_IFTYPE_AP:
-		case NL80211_IFTYPE_STATION:
-		case NL80211_IFTYPE_ADHOC:
-		case NL80211_IFTYPE_WDS:
-		case NL80211_IFTYPE_MESH_POINT:
+		default:
 			break;
 		}
 		if (ieee80211_sdata_running(sdata))
@@ -508,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic(
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
-		case NUM_NL80211_IFTYPES:
-		case NL80211_IFTYPE_UNSPECIFIED:
 		case NL80211_IFTYPE_MONITOR:
 		case NL80211_IFTYPE_AP_VLAN:
 			continue;
-		case NL80211_IFTYPE_AP:
-		case NL80211_IFTYPE_STATION:
-		case NL80211_IFTYPE_ADHOC:
-		case NL80211_IFTYPE_WDS:
-		case NL80211_IFTYPE_MESH_POINT:
+		default:
 			break;
 		}
 		if (ieee80211_sdata_running(sdata))
@@ -1193,6 +1181,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			break;
 		case NL80211_IFTYPE_UNSPECIFIED:
 		case NUM_NL80211_IFTYPES:
+		case NL80211_IFTYPE_P2P_CLIENT:
+		case NL80211_IFTYPE_P2P_GO:
 			WARN_ON(1);
 			break;
 		}
-- 
cgit v1.2.3


From cd13539b8bc9ae884e6d8d9374c594adff4304e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 16 Sep 2010 02:58:13 +0000
Subject: net: shrinks struct net_device

commit ab95bfe01 (net: replace hooks in __netif_receive_skb) added
rx_handler at wrong place, between two cache line aligned objects,
creating a big hole (a full cache line)

Move rx_handler and rx_handler_data before rx_queue, filling existing
hole.

Move master field in the cache line(s) used in receive path.

This saves 64 bytes (or L1_CACHE_BYTES), and avoids two possible
cache misses in receive path.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8992fffb8104..ec17887a5bca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -918,10 +918,6 @@ struct net_device {
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
 
-	struct net_device	*master; /* Pointer to master device of a group,
-					  * which this device is member of.
-					  */
-
 	/* Interface address info. */
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_assign_type; /* hw address assignment type */
@@ -951,7 +947,7 @@ struct net_device {
 						   assign before registering */
 
 /*
- * Cache line mostly used on receive path (including eth_type_trans())
+ * Cache lines mostly used on receive path (including eth_type_trans())
  */
 	unsigned long		last_rx;	/* Time of last Rx
 						 * This should not be set in
@@ -961,6 +957,10 @@ struct net_device {
 						 * avoid dirtying this cache line.
 						 */
 
+	struct net_device	*master; /* Pointer to master device of a group,
+					  * which this device is member of.
+					  */
+
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;	/* hw address, (before bcast
 						   because most packets are
@@ -980,10 +980,14 @@ struct net_device {
 	unsigned int		num_rx_queues;
 #endif
 
-	struct netdev_queue	rx_queue;
 	rx_handler_func_t	*rx_handler;
 	void			*rx_handler_data;
 
+	struct netdev_queue	rx_queue; /* use two cache lines */
+
+/*
+ * Cache lines mostly used on transmit path
+ */
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
 	/* Number of TX queues allocated at alloc_netdev_mq() time  */
@@ -997,9 +1001,7 @@ struct net_device {
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
-/*
- * One part is mostly used on xmit path (device)
- */
+
 	/* These may be needed for future network-power-down code. */
 
 	/*
-- 
cgit v1.2.3


From 74704ac6ea402d50c9543cb28247e6d9f521f7ae Mon Sep 17 00:00:00 2001
From: Jean Pihet <jean.pihet@newoldbits.com>
Date: Tue, 7 Sep 2010 09:21:32 +0200
Subject: tracing, perf: Add more power related events

This patch adds new generic events for dynamic power management
tracing:

 - clock events class: used for clock enable/disable and for
   clock rate change,
 - power_domain events class: used for power domains transitions.

The OMAP architecture will be using the new events for PM debugging,
however the new events are made generic enough to be used by all
platforms.

Signed-off-by: Jean Pihet <j-pihet@ti.com>
Acked-by: Thomas Renninger <trenn@suse.de>
Cc: discuss@lesswatts.org
Cc: linux-pm@lists.linux-foundation.org
Cc: Thomas Renninger <trenn@suse.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
LKML-Reference: <AANLkTinUmbSUUuxUzc8++pcb9gd1CZFdyTQFrveTBXyV@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/power.h | 90 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 35a2a6e7bf1e..286784d69b8f 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -10,12 +10,17 @@
 #ifndef _TRACE_POWER_ENUM_
 #define _TRACE_POWER_ENUM_
 enum {
-	POWER_NONE = 0,
-	POWER_CSTATE = 1,
-	POWER_PSTATE = 2,
+	POWER_NONE	= 0,
+	POWER_CSTATE	= 1,	/* C-State */
+	POWER_PSTATE	= 2,	/* Fequency change or DVFS */
+	POWER_SSTATE	= 3,	/* Suspend */
 };
 #endif
 
+/*
+ * The power events are used for cpuidle & suspend (power_start, power_end)
+ *  and for cpufreq (power_frequency)
+ */
 DECLARE_EVENT_CLASS(power,
 
 	TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
 
 );
 
+/*
+ * The clock events are used for clock enable/disable and for
+ *  clock rate change
+ */
+DECLARE_EVENT_CLASS(clock,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__field(        u64,            state           )
+		__field(        u64,            cpu_id          )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->state = state;
+		__entry->cpu_id = cpu_id;
+	),
+
+	TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_enable,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_disable,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+DEFINE_EVENT(clock, clock_set_rate,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
+/*
+ * The power domain events are used for power domains transitions
+ */
+DECLARE_EVENT_CLASS(power_domain,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__field(        u64,            state           )
+		__field(        u64,            cpu_id          )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->state = state;
+		__entry->cpu_id = cpu_id;
+),
+
+	TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
+		(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(power_domain, power_domain_target,
+
+	TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
+
+	TP_ARGS(name, state, cpu_id)
+);
+
 #endif /* _TRACE_POWER_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From b04243ef7006cda301819f54ee7ce0a3632489e3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 17 Sep 2010 11:28:48 +0200
Subject: perf: Complete software pmu grouping

Aside from allowing software events into a !software group,
allow adding !software events to pure software groups.

Once we've moved the software group and attached the first
!software event, the group will no longer be a pure software
group and hence no longer be eligible for movement, at which
point the straight ctx comparison is correct again.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20100917093009.410784731@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  6 +++++
 kernel/perf_event.c        | 65 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 39d8860b2684..165287fd2cc4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -804,12 +804,18 @@ struct perf_event {
 #endif /* CONFIG_PERF_EVENTS */
 };
 
+enum perf_event_context_type {
+	task_context,
+	cpu_context,
+};
+
 /**
  * struct perf_event_context - event context structure
  *
  * Used as a container for task events and CPU events as well:
  */
 struct perf_event_context {
+	enum perf_event_context_type	type;
 	struct pmu			*pmu;
 	/*
 	 * Protect the states of the events in the list,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ce95617f5d2c..6d7eef5f3c41 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5184,6 +5184,7 @@ int perf_pmu_register(struct pmu *pmu)
 
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx);
+		cpuctx->ctx.type = cpu_context;
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->timer_interval = TICK_NSEC;
 		hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5517,7 +5518,8 @@ SYSCALL_DEFINE5(perf_event_open,
 		struct perf_event_attr __user *, attr_uptr,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
-	struct perf_event *event, *group_leader = NULL, *output_event = NULL;
+	struct perf_event *group_leader = NULL, *output_event = NULL;
+	struct perf_event *event, *sibling;
 	struct perf_event_attr attr;
 	struct perf_event_context *ctx;
 	struct file *event_file = NULL;
@@ -5525,6 +5527,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	struct task_struct *task = NULL;
 	struct pmu *pmu;
 	int event_fd;
+	int move_group = 0;
 	int fput_needed = 0;
 	int err;
 
@@ -5574,8 +5577,29 @@ SYSCALL_DEFINE5(perf_event_open,
 	 * any hardware group.
 	 */
 	pmu = event->pmu;
-	if ((pmu->task_ctx_nr == perf_sw_context) && group_leader)
-		pmu = group_leader->pmu;
+
+	if (group_leader &&
+	    (is_software_event(event) != is_software_event(group_leader))) {
+		if (is_software_event(event)) {
+			/*
+			 * If event and group_leader are not both a software
+			 * event, and event is, then group leader is not.
+			 *
+			 * Allow the addition of software events to !software
+			 * groups, this is safe because software events never
+			 * fail to schedule.
+			 */
+			pmu = group_leader->pmu;
+		} else if (is_software_event(group_leader) &&
+			   (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+			/*
+			 * In case the group is a pure software group, and we
+			 * try to add a hardware event, move the whole group to
+			 * the hardware context.
+			 */
+			move_group = 1;
+		}
+	}
 
 	if (pid != -1)
 		task = find_lively_task_by_vpid(pid);
@@ -5605,8 +5629,14 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * Do not allow to attach to a group in a different
 		 * task or CPU context:
 		 */
-		if (group_leader->ctx != ctx)
-			goto err_context;
+		if (move_group) {
+			if (group_leader->ctx->type != ctx->type)
+				goto err_context;
+		} else {
+			if (group_leader->ctx != ctx)
+				goto err_context;
+		}
+
 		/*
 		 * Only a group leader can be exclusive or pinned
 		 */
@@ -5626,9 +5656,34 @@ SYSCALL_DEFINE5(perf_event_open,
 		goto err_context;
 	}
 
+	if (move_group) {
+		struct perf_event_context *gctx = group_leader->ctx;
+
+		mutex_lock(&gctx->mutex);
+		perf_event_remove_from_context(group_leader);
+		list_for_each_entry(sibling, &group_leader->sibling_list,
+				    group_entry) {
+			perf_event_remove_from_context(sibling);
+			put_ctx(gctx);
+		}
+		mutex_unlock(&gctx->mutex);
+		put_ctx(gctx);
+	}
+
 	event->filp = event_file;
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
+
+	if (move_group) {
+		perf_install_in_context(ctx, group_leader, cpu);
+		get_ctx(ctx);
+		list_for_each_entry(sibling, &group_leader->sibling_list,
+				    group_entry) {
+			perf_install_in_context(ctx, sibling, cpu);
+			get_ctx(ctx);
+		}
+	}
+
 	perf_install_in_context(ctx, event, cpu);
 	++ctx->generation;
 	mutex_unlock(&ctx->mutex);
-- 
cgit v1.2.3


From e9d2b064149ff7ef4acbc65a1b9374ac8b218d3e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 17 Sep 2010 11:28:50 +0200
Subject: perf: Undo the per cpu-context timer stuff

Revert the timer per cpu-context timers because of unfortunate
nohz interaction. Fixing that would have been somewhat ugly, so
go back to driving things from the regular tick. Provide a
jiffies interval feature for people who want slower rotations.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20100917093009.519845633@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  6 ++--
 kernel/perf_event.c        | 79 ++++++++++++++++++++++++++++------------------
 kernel/sched.c             |  2 ++
 3 files changed, 55 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 165287fd2cc4..61b1e2d760fd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -870,8 +870,8 @@ struct perf_cpu_context {
 	struct perf_event_context	*task_ctx;
 	int				active_oncpu;
 	int				exclusive;
-	u64				timer_interval;
-	struct hrtimer			timer;
+	struct list_head		rotation_list;
+	int				jiffies_interval;
 };
 
 struct perf_output_handle {
@@ -1065,6 +1065,7 @@ extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
+extern void perf_event_task_tick(void);
 #else
 static inline void
 perf_event_task_sched_in(struct task_struct *task)			{ }
@@ -1099,6 +1100,7 @@ static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
+static inline void perf_event_task_tick(void)				{ }
 #endif
 
 #define perf_output_put(handle, x) \
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 27332e5f51a7..baae1367e945 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -77,23 +77,22 @@ void perf_pmu_enable(struct pmu *pmu)
 		pmu->pmu_enable(pmu);
 }
 
+static DEFINE_PER_CPU(struct list_head, rotation_list);
+
+/*
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
+ */
 static void perf_pmu_rotate_start(struct pmu *pmu)
 {
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+	struct list_head *head = &__get_cpu_var(rotation_list);
 
-	if (hrtimer_active(&cpuctx->timer))
-		return;
+	WARN_ON(!irqs_disabled());
 
-	__hrtimer_start_range_ns(&cpuctx->timer,
-			ns_to_ktime(cpuctx->timer_interval), 0,
-			HRTIMER_MODE_REL_PINNED, 0);
-}
-
-static void perf_pmu_rotate_stop(struct pmu *pmu)
-{
-	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-	hrtimer_cancel(&cpuctx->timer);
+	if (list_empty(&cpuctx->rotation_list))
+		list_add(&cpuctx->rotation_list, head);
 }
 
 static void get_ctx(struct perf_event_context *ctx)
@@ -1607,36 +1606,33 @@ static void rotate_ctx(struct perf_event_context *ctx)
 }
 
 /*
- * Cannot race with ->pmu_rotate_start() because this is ran from hardirq
- * context, and ->pmu_rotate_start() is called with irqs disabled (both are
- * cpu affine, so there are no SMP races).
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
  */
-static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
+static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-	enum hrtimer_restart restart = HRTIMER_NORESTART;
-	struct perf_cpu_context *cpuctx;
+	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
 	struct perf_event_context *ctx = NULL;
-	int rotate = 0;
-
-	cpuctx = container_of(timer, struct perf_cpu_context, timer);
+	int rotate = 0, remove = 1;
 
 	if (cpuctx->ctx.nr_events) {
-		restart = HRTIMER_RESTART;
+		remove = 0;
 		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
 			rotate = 1;
 	}
 
 	ctx = cpuctx->task_ctx;
 	if (ctx && ctx->nr_events) {
-		restart = HRTIMER_RESTART;
+		remove = 0;
 		if (ctx->nr_events != ctx->nr_active)
 			rotate = 1;
 	}
 
 	perf_pmu_disable(cpuctx->ctx.pmu);
-	perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval);
+	perf_ctx_adjust_freq(&cpuctx->ctx, interval);
 	if (ctx)
-		perf_ctx_adjust_freq(ctx, cpuctx->timer_interval);
+		perf_ctx_adjust_freq(ctx, interval);
 
 	if (!rotate)
 		goto done;
@@ -1654,10 +1650,24 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer)
 		task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
 
 done:
+	if (remove)
+		list_del_init(&cpuctx->rotation_list);
+
 	perf_pmu_enable(cpuctx->ctx.pmu);
-	hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval));
+}
+
+void perf_event_task_tick(void)
+{
+	struct list_head *head = &__get_cpu_var(rotation_list);
+	struct perf_cpu_context *cpuctx, *tmp;
 
-	return restart;
+	WARN_ON(!irqs_disabled());
+
+	list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+		if (cpuctx->jiffies_interval == 1 ||
+				!(jiffies % cpuctx->jiffies_interval))
+			perf_rotate_context(cpuctx);
+	}
 }
 
 static int event_enable_on_exec(struct perf_event *event,
@@ -5186,9 +5196,8 @@ int perf_pmu_register(struct pmu *pmu)
 		__perf_event_init_context(&cpuctx->ctx);
 		cpuctx->ctx.type = cpu_context;
 		cpuctx->ctx.pmu = pmu;
-		cpuctx->timer_interval = TICK_NSEC;
-		hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		cpuctx->timer.function = perf_event_context_tick;
+		cpuctx->jiffies_interval = 1;
+		INIT_LIST_HEAD(&cpuctx->rotation_list);
 	}
 
 got_cpu_context:
@@ -6229,6 +6238,7 @@ static void __init perf_event_init_all_cpus(void)
 	for_each_possible_cpu(cpu) {
 		swhash = &per_cpu(swevent_htable, cpu);
 		mutex_init(&swhash->hlist_mutex);
+		INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
 	}
 }
 
@@ -6248,6 +6258,15 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+static void perf_pmu_rotate_stop(struct pmu *pmu)
+{
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+	WARN_ON(!irqs_disabled());
+
+	list_del_init(&cpuctx->rotation_list);
+}
+
 static void __perf_event_exit_context(void *__info)
 {
 	struct perf_event_context *ctx = __info;
diff --git a/kernel/sched.c b/kernel/sched.c
index 1c3ea7a55b7b..794819eab9ca 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3584,6 +3584,8 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	raw_spin_unlock(&rq->lock);
 
+	perf_event_task_tick();
+
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
-- 
cgit v1.2.3


From 3575792e005dc9994f15ae72c1c6f401d134177d Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 17 Sep 2010 14:18:16 +0200
Subject: ipvs: extend connection flags to 32 bits

- the sync protocol supports 16 bits only, so bits 0..15 should be
used only for flags that should go to backup server, bits 16 and
above should be allocated for flags not sent to backup.

- use IP_VS_CONN_F_DEST_MASK as mask of connection flags in
destination that can be changed by user space

- allow IP_VS_CONN_F_ONE_PACKET to be set in destination

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ip_vs.h           |  8 ++++++++
 include/net/ip_vs.h             |  2 +-
 net/netfilter/ipvs/ip_vs_conn.c | 16 ++++++++++------
 net/netfilter/ipvs/ip_vs_core.c | 11 ++++++-----
 net/netfilter/ipvs/ip_vs_ctl.c  |  5 +++--
 5 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 9708de265bb1..003d75f6ffe1 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -70,6 +70,7 @@
 
 /*
  *      IPVS Connection Flags
+ *      Only flags 0..15 are sent to backup server
  */
 #define IP_VS_CONN_F_FWD_MASK	0x0007		/* mask for the fwd methods */
 #define IP_VS_CONN_F_MASQ	0x0000		/* masquerading/NAT */
@@ -88,6 +89,13 @@
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
+/* Flags that are not sent to backup server start from bit 16 */
+
+/* Connection flags from destination that can be changed by user space */
+#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \
+				IP_VS_CONN_F_ONE_PACKET | \
+				0)
+
 #define IP_VS_SCHEDNAME_MAXLEN	16
 #define IP_VS_IFNAME_MAXLEN	16
 
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f976885f686f..62698a9c1631 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -366,6 +366,7 @@ struct ip_vs_conn {
 	union nf_inet_addr       caddr;          /* client address */
 	union nf_inet_addr       vaddr;          /* virtual address */
 	union nf_inet_addr       daddr;          /* destination address */
+	volatile __u32           flags;          /* status flags */
 	__be16                   cport;
 	__be16                   vport;
 	__be16                   dport;
@@ -378,7 +379,6 @@ struct ip_vs_conn {
 
 	/* Flags and state transition */
 	spinlock_t              lock;           /* lock for state transition */
-	volatile __u16          flags;          /* status flags */
 	volatile __u16          state;          /* state info */
 	volatile __u16          old_state;      /* old state, to be used for
 						 * state transition triggerd
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b71c69a2db13..9fe1da7bcf16 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -505,6 +505,8 @@ static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
 static inline void
 ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 {
+	unsigned int conn_flags;
+
 	/* if dest is NULL, then return directly */
 	if (!dest)
 		return;
@@ -512,16 +514,18 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	/* Increase the refcnt counter of the dest */
 	atomic_inc(&dest->refcnt);
 
+	conn_flags = atomic_read(&dest->conn_flags);
+	if (cp->protocol != IPPROTO_UDP)
+		conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
 	/* Bind with the destination and its corresponding transmitter */
-	if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-	    (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+	if (cp->flags & IP_VS_CONN_F_SYNC) {
 		/* if the connection is not template and is created
 		 * by sync, preserve the activity flag.
 		 */
-		cp->flags |= atomic_read(&dest->conn_flags) &
-			     (~IP_VS_CONN_F_INACTIVE);
-	else
-		cp->flags |= atomic_read(&dest->conn_flags);
+		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
+			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
+	}
+	cp->flags |= conn_flags;
 	cp->dest = dest;
 
 	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0c043b6ce65e..319991d4d251 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,7 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;			/* destination port to forward */
-	__be16  flags;
+	unsigned int flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
 
@@ -382,7 +382,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr, flags;
+	__be16 _ports[2], *pptr;
+	unsigned int flags;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -473,9 +474,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
-		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
-				iph.protocol == IPPROTO_UDP)?
-				IP_VS_CONN_F_ONE_PACKET : 0;
+		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				      iph.protocol == IPPROTO_UDP)?
+				      IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ca8ec8c4f311..7bd41d28080c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -765,7 +765,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 
 	/* set the weight and the flags */
 	atomic_set(&dest->weight, udest->weight);
-	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
+	conn_flags |= IP_VS_CONN_F_INACTIVE;
 
 	/* check if local node and update the flags */
 #ifdef CONFIG_IP_VS_IPV6
@@ -782,7 +783,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
 		}
 
 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
-	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
 		/*
-- 
cgit v1.2.3


From 56463e50d1fc3f070492434cea6303b35ea000de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 17 Sep 2010 10:54:37 -0400
Subject: NFS: Use super.c for NFSROOT mount option parsing

Replace duplicate code in NFSROOT for mounting an NFS server on '/'
with logic that uses the existing mainline text-based logic in the NFS
client.

Add documenting comments where appropriate.

Note that this means NFSROOT mounts now use the same default settings
as v2/v3 mounts done via mount(2) from user space.

  vers=3,tcp,rsize=<negotiated default>,wsize=<negotiated default>

As before, however, no version/protocol negotiation with the server is
done.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfsroot.c       | 182 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/nfs_fs.h |  10 +--
 init/do_mounts.c       |  12 ++--
 3 files changed, 187 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 169b67907a65..cb4a6bdca871 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -3,9 +3,10 @@
  *
  *  Allow an NFS filesystem to be mounted as root. The way this works is:
  *     (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
- *     (2) Handle RPC negotiation with the system which replied to RARP or
- *         was reported as a boot server by BOOTP or manually.
- *     (3) The actual mounting is done later, when init() is running.
+ *     (2) Construct the device string and the options string using DHCP
+ *         option 17 and/or kernel command line options.
+ *     (3) When mount_root() sets up the root file system, pass these strings
+ *         to the NFS client's regular mount interface via sys_mount().
  *
  *
  *	Changes:
@@ -65,7 +66,8 @@
  *	Hua Qin		:	Support for mounting root file system via
  *				NFS over TCP.
  *	Fabian Frederick:	Option parser rebuilt (using parser lib)
-*/
+ *	Chuck Lever	:	Use super.c's text-based mount option parsing
+ */
 
 #include <linux/types.h>
 #include <linux/string.h>
@@ -101,11 +103,17 @@
 /* Parameters passed from the kernel command line */
 static char nfs_root_parms[256] __initdata = "";
 
+/* Text-based mount options passed to super.c */
+static char nfs_root_options[256] __initdata = "";
+
 /* Address of NFS server */
 static __be32 servaddr __initdata = 0;
 
 /* Name of directory to mount */
-static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, };
+static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
+
+/* server:export path string passed to super.c */
+static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
 
 /* NFS-related data */
 static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
@@ -537,7 +545,7 @@ out:
  *  Get the NFS port numbers and file handle, and return the prepared 'data'
  *  argument for mount() if everything went OK. Return NULL otherwise.
  */
-void * __init nfs_root_data(void)
+void * __init old_nfs_root_data(void)
 {
 	if (root_nfs_init() < 0
 	 || root_nfs_ports() < 0
@@ -546,3 +554,165 @@ void * __init nfs_root_data(void)
 	set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
 	return (void*)&nfs_data;
 }
+
+static int __init root_nfs_copy(char *dest, const char *src,
+				     const size_t destlen)
+{
+	if (strlcpy(dest, src, destlen) > destlen)
+		return -1;
+	return 0;
+}
+
+static int __init root_nfs_cat(char *dest, const char *src,
+				  const size_t destlen)
+{
+	if (strlcat(dest, src, destlen) > destlen)
+		return -1;
+	return 0;
+}
+
+/*
+ * Parse out root export path and mount options from
+ * passed-in string @incoming.
+ *
+ * Copy the export path into @exppath.
+ */
+static int __init root_nfs_parse_options(char *incoming, char *exppath,
+					 const size_t exppathlen)
+{
+	char *p;
+
+	/*
+	 * Set the NFS remote path
+	 */
+	p = strsep(&incoming, ",");
+	if (*p != '\0' && strcmp(p, "default") != 0)
+		if (root_nfs_copy(exppath, p, exppathlen))
+			return -1;
+
+	/*
+	 * @incoming now points to the rest of the string; if it
+	 * contains something, append it to our root options buffer
+	 */
+	if (incoming != NULL && *incoming != '\0')
+		if (root_nfs_cat(nfs_root_options, incoming,
+						sizeof(nfs_root_options)))
+			return -1;
+
+	/*
+	 * Possibly prepare for more options to be appended
+	 */
+	if (nfs_root_options[0] != '\0' &&
+	    nfs_root_options[strlen(nfs_root_options)] != ',')
+		if (root_nfs_cat(nfs_root_options, ",",
+						sizeof(nfs_root_options)))
+			return -1;
+
+	return 0;
+}
+
+/*
+ *  Decode the export directory path name and NFS options from
+ *  the kernel command line.  This has to be done late in order to
+ *  use a dynamically acquired client IP address for the remote
+ *  root directory path.
+ *
+ *  Returns zero if successful; otherwise -1 is returned.
+ */
+static int __init root_nfs_data(char *cmdline)
+{
+	char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+	int len, retval = -1;
+	char *tmp = NULL;
+	const size_t tmplen = sizeof(nfs_export_path);
+
+	tmp = kzalloc(tmplen, GFP_KERNEL);
+	if (tmp == NULL)
+		goto out_nomem;
+	strcpy(tmp, NFS_ROOT);
+
+	if (root_server_path[0] != '\0') {
+		dprintk("Root-NFS: DHCPv4 option 17: %s\n",
+			root_server_path);
+		if (root_nfs_parse_options(root_server_path, tmp, tmplen))
+			goto out_optionstoolong;
+	}
+
+	if (cmdline[0] != '\0') {
+		dprintk("Root-NFS: nfsroot=%s\n", cmdline);
+		if (root_nfs_parse_options(cmdline, tmp, tmplen))
+			goto out_optionstoolong;
+	}
+
+	/*
+	 * Append mandatory options for nfsroot so they override
+	 * what has come before
+	 */
+	snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+			&servaddr);
+	if (root_nfs_cat(nfs_root_options, addr_option,
+						sizeof(nfs_root_options)))
+		goto out_optionstoolong;
+
+	/*
+	 * Set up nfs_root_device.  For NFS mounts, this looks like
+	 *
+	 *	server:/path
+	 *
+	 * At this point, utsname()->nodename contains our local
+	 * IP address or hostname, set by ipconfig.  If "%s" exists
+	 * in tmp, substitute the nodename, then shovel the whole
+	 * mess into nfs_root_device.
+	 */
+	len = snprintf(nfs_export_path, sizeof(nfs_export_path),
+				tmp, utsname()->nodename);
+	if (len > (int)sizeof(nfs_export_path))
+		goto out_devnametoolong;
+	len = snprintf(nfs_root_device, sizeof(nfs_root_device),
+				"%pI4:%s", &servaddr, nfs_export_path);
+	if (len > (int)sizeof(nfs_root_device))
+		goto out_devnametoolong;
+
+	retval = 0;
+
+out:
+	kfree(tmp);
+	return retval;
+out_nomem:
+	printk(KERN_ERR "Root-NFS: could not allocate memory\n");
+	goto out;
+out_optionstoolong:
+	printk(KERN_ERR "Root-NFS: mount options string too long\n");
+	goto out;
+out_devnametoolong:
+	printk(KERN_ERR "Root-NFS: root device name too long.\n");
+	goto out;
+}
+
+/**
+ * nfs_root_data - Return prepared 'data' for NFSROOT mount
+ * @root_device: OUT: address of string containing NFSROOT device
+ * @root_data: OUT: address of string containing NFSROOT mount options
+ *
+ * Returns zero and sets @root_device and @root_data if successful,
+ * otherwise -1 is returned.
+ */
+int __init nfs_root_data(char **root_device, char **root_data)
+{
+#ifdef NFSROOT_DEBUG
+	nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
+#endif	/* NFSROOT_DEBUG */
+
+	servaddr = root_server_addr;
+	if (servaddr == htonl(INADDR_NONE)) {
+		printk(KERN_ERR "Root-NFS: no NFS server address\n");
+		return -1;
+	}
+
+	if (root_nfs_data(nfs_root_parms) < 0)
+		return -1;
+
+	*root_device = nfs_root_device;
+	*root_data = nfs_root_options;
+	return 0;
+}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 508f8cf6da37..2a18f1582fa4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -364,6 +364,7 @@ extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ct
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
+extern unsigned long nfs_inc_attr_generation_counter(void);
 
 extern struct nfs_fattr *nfs_alloc_fattr(void);
 
@@ -379,9 +380,12 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh)
 	kfree(fh);
 }
 
+/*
+ * linux/fs/nfs/nfsroot.c
+ */
+extern int  nfs_root_data(char **root_device, char **root_data); /*__init*/
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
-extern unsigned long nfs_inc_attr_generation_counter(void);
 
 /*
  * linux/fs/nfs/file.c
@@ -584,10 +588,6 @@ nfs_fileid_to_ino_t(u64 fileid)
 	return ino;
 }
 
-/* NFS root */
-
-extern void * nfs_root_data(void);
-
 #define nfs_wait_event(clnt, wq, condition)				\
 ({									\
 	int __retval = wait_event_killable(wq, condition);		\
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 02e3ca4fc527..52387f14f9b4 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -291,13 +291,13 @@ out:
 #ifdef CONFIG_ROOT_NFS
 static int __init mount_nfs_root(void)
 {
-	void *data = nfs_root_data();
+	char *root_dev, *root_data;
 
-	create_dev("/dev/root", ROOT_DEV);
-	if (data &&
-	    do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0)
-		return 1;
-	return 0;
+	if (nfs_root_data(&root_dev, &root_data) != 0)
+		return 0;
+	if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
+		return 0;
+	return 1;
 }
 #endif
 
-- 
cgit v1.2.3


From 859d5024f450686ad0a42ed3c06f2fa20295c9e6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 17 Sep 2010 10:54:37 -0400
Subject: SUNRPC: Remove rpcb_getport_sync()

Clean up: rpcb_getport_sync() has no more users, so remove it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 -
 net/sunrpc/rpcb_clnt.c      | 51 ---------------------------------------------
 2 files changed, 52 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 569dc722a600..a1a40f0c1856 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -137,7 +137,6 @@ int		rpcb_register(u32, u32, int, unsigned short);
 int		rpcb_v4_register(const u32 program, const u32 version,
 				 const struct sockaddr *address,
 				 const char *netid);
-int		rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
 void		rpc_call_start(struct rpc_task *);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..c961094ebc62 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -475,57 +475,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-/**
- * rpcb_getport_sync - obtain the port for an RPC service on a given host
- * @sin: address of remote peer
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- *
- * Return value is the requested advertised port number,
- * or a negative errno value.
- *
- * Called from outside the RPC client in a synchronous task context.
- * Uses default timeout parameters specified by underlying transport.
- *
- * XXX: Needs to support IPv6
- */
-int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
-{
-	struct rpcbind_args map = {
-		.r_prog		= prog,
-		.r_vers		= vers,
-		.r_prot		= prot,
-		.r_port		= 0,
-	};
-	struct rpc_message msg = {
-		.rpc_proc	= &rpcb_procedures2[RPCBPROC_GETPORT],
-		.rpc_argp	= &map,
-		.rpc_resp	= &map,
-	};
-	struct rpc_clnt	*rpcb_clnt;
-	int status;
-
-	dprintk("RPC:       %s(%pI4, %u, %u, %d)\n",
-		__func__, &sin->sin_addr.s_addr, prog, vers, prot);
-
-	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-				sizeof(*sin), prot, RPCBVERS_2);
-	if (IS_ERR(rpcb_clnt))
-		return PTR_ERR(rpcb_clnt);
-
-	status = rpc_call_sync(rpcb_clnt, &msg, 0);
-	rpc_shutdown_client(rpcb_clnt);
-
-	if (status >= 0) {
-		if (map.r_port != 0)
-			return map.r_port;
-		status = -EACCES;
-	}
-	return status;
-}
-EXPORT_SYMBOL_GPL(rpcb_getport_sync);
-
 static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
-- 
cgit v1.2.3


From cd9a1c0e5ac681871d64804f82291649e2a0accb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 17 Sep 2010 10:56:50 -0400
Subject: NFSv4: Clean up nfs4_atomic_open

Start moving the 'struct nameidata' dependent code out of the lower level
NFS code in preparation for the removal of open intents.

Instead of the struct nameidata, we pass down a partially initialised
struct nfs_open_context that will be fully initialised by the atomic open
upon success.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfs/inode.c         |  8 +++---
 fs/nfs/nfs4_fs.h       |  2 +-
 fs/nfs/nfs4proc.c      | 40 ++++++++------------------
 include/linux/nfs_fs.h |  2 ++
 5 files changed, 93 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e257172d438c..17529b5bc551 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -38,6 +38,7 @@
 #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
+#include "fscache.h"
 
 /* #define NFS_DEBUG_VERBOSE 1 */
 
@@ -1029,9 +1030,61 @@ static int is_atomic_open(struct nameidata *nd)
 	return 1;
 }
 
+static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
+{
+	struct path path = {
+		.mnt = nd->path.mnt,
+		.dentry = dentry,
+	};
+	struct nfs_open_context *ctx;
+	struct rpc_cred *cred;
+	fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+	cred = rpc_lookup_cred();
+	if (IS_ERR(cred))
+		return ERR_CAST(cred);
+	ctx = alloc_nfs_open_context(&path, cred, fmode);
+	put_rpccred(cred);
+	if (ctx == NULL)
+		return ERR_PTR(-ENOMEM);
+	return ctx;
+}
+
+static int do_open(struct inode *inode, struct file *filp)
+{
+	nfs_fscache_set_inode_cookie(inode, filp);
+	return 0;
+}
+
+static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+{
+	struct file *filp;
+	int ret = 0;
+
+	/* If the open_intent is for execute, we have an extra check to make */
+	if (ctx->mode & FMODE_EXEC) {
+		ret = nfs_may_open(ctx->path.dentry->d_inode,
+				ctx->cred,
+				nd->intent.open.flags);
+		if (ret < 0)
+			goto out;
+	}
+	filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
+	if (IS_ERR(filp))
+		ret = PTR_ERR(filp);
+	else
+		nfs_file_set_open_context(filp, ctx);
+out:
+	put_nfs_open_context(ctx);
+	return ret;
+}
+
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct nfs_open_context *ctx;
+	struct iattr attr;
 	struct dentry *res = NULL;
+	int open_flags;
 	int error;
 
 	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
@@ -1054,9 +1107,27 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 		goto out;
 	}
 
+	ctx = nameidata_to_nfs_open_context(dentry, nd);
+	res = ERR_CAST(ctx);
+	if (IS_ERR(ctx))
+		goto out;
+
+	open_flags = nd->intent.open.flags;
+	if (nd->flags & LOOKUP_CREATE) {
+		attr.ia_mode = nd->intent.open.create_mode;
+		attr.ia_valid = ATTR_MODE;
+		if (!IS_POSIXACL(dir))
+			attr.ia_mode &= ~current_umask();
+	} else {
+		open_flags &= ~O_EXCL;
+		attr.ia_valid = 0;
+		BUG_ON(open_flags & O_CREAT);
+	}
+
 	/* Open the file on the server */
-	res = nfs4_atomic_open(dir, dentry, nd);
+	res = nfs4_atomic_open(dir, ctx, open_flags, &attr);
 	if (IS_ERR(res)) {
+		put_nfs_open_context(ctx);
 		error = PTR_ERR(res);
 		switch (error) {
 			/* Make a negative dentry */
@@ -1074,8 +1145,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 			default:
 				goto out;
 		}
-	} else if (res != NULL)
+	}
+	if (res != NULL)
 		dentry = res;
+	nfs_intent_set_file(nd, ctx);
 out:
 	return res;
 no_open:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d2d6c72aa78..2f9266406afc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -623,7 +623,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
 	nfs_revalidate_inode(server, inode);
 }
 
-static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred)
+struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
 {
 	struct nfs_open_context *ctx;
 
@@ -633,6 +633,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
 		path_get(&ctx->path);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
+		ctx->mode = f_mode;
 		ctx->flags = 0;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
@@ -673,7 +674,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -730,11 +731,10 @@ int nfs_open(struct inode *inode, struct file *filp)
 	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(&filp->f_path, cred);
+	ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
-	ctx->mode = filp->f_mode;
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
 	nfs_fscache_set_inode_cookie(inode, filp);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 311e15cc8af0..c5cc2a6aceb0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -242,7 +242,7 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
-extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *nfs4_atomic_open(struct inode *, struct nfs_open_context *, int, struct iattr *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 089da5b5d20a..38c3bed2240d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2025,39 +2025,17 @@ out_close:
 }
 
 struct dentry *
-nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
 {
-	struct path path = {
-		.mnt = nd->path.mnt,
-		.dentry = dentry,
-	};
+	struct dentry *dentry = ctx->path.dentry;
 	struct dentry *parent;
-	struct iattr attr;
-	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	struct dentry *res;
-	int open_flags = nd->intent.open.flags;
-	fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
-
-	if (nd->flags & LOOKUP_CREATE) {
-		attr.ia_mode = nd->intent.open.create_mode;
-		attr.ia_valid = ATTR_MODE;
-		if (!IS_POSIXACL(dir))
-			attr.ia_mode &= ~current_umask();
-	} else {
-		open_flags &= ~O_EXCL;
-		attr.ia_valid = 0;
-		BUG_ON(open_flags & O_CREAT);
-	}
 
-	cred = rpc_lookup_cred();
-	if (IS_ERR(cred))
-		return (struct dentry *)cred;
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	nfs_block_sillyrename(parent);
-	state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred);
-	put_rpccred(cred);
+	state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT) {
 			d_add(dentry, NULL);
@@ -2067,11 +2045,15 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		return (struct dentry *)state;
 	}
 	res = d_add_unique(dentry, igrab(state->inode));
-	if (res != NULL)
-		path.dentry = res;
-	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
+	if (res != NULL) {
+		struct dentry *dummy = ctx->path.dentry;
+
+		ctx->path.dentry = dget(res);
+		dput(dummy);
+	}
+	ctx->state = state;
+	nfs_set_verifier(ctx->path.dentry, nfs_save_change_attribute(dir));
 	nfs_unblock_sillyrename(parent);
-	nfs4_intent_set_file(nd, &path, state, fmode);
 	return res;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2a18f1582fa4..61c89b4ad7c6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -360,6 +360,8 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
+extern struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode);
+extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
-- 
cgit v1.2.3


From c0204fd2b8fe047b18b67e07e1bf2a03691240cd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 17 Sep 2010 10:56:51 -0400
Subject: NFS: Clean up nfs4_proc_create()

Remove all remaining references to the struct nameidata from the low level
NFS layers. Again pass down a partially initialised struct nfs_open_context
when we want to do atomic open+create.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 47 +++++++++++++++++++++++++++++++++++------
 fs/nfs/nfs3proc.c       |  2 +-
 fs/nfs/nfs4proc.c       | 56 +++++++++++++------------------------------------
 fs/nfs/proc.c           |  2 +-
 include/linux/nfs_xdr.h |  2 +-
 5 files changed, 58 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dc93d356341b..e37ffddd79c2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -105,8 +105,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
-	.create		= nfs_create,
+	.create		= nfs_open_create,
 	.lookup		= nfs_atomic_lookup,
 	.link		= nfs_link,
 	.unlink		= nfs_unlink,
@@ -1239,6 +1240,44 @@ no_open_dput:
 no_open:
 	return nfs_lookup_revalidate(dentry, nd);
 }
+
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct nfs_open_context *ctx = NULL;
+	struct iattr attr;
+	int error;
+	int open_flags = 0;
+
+	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
+	attr.ia_mode = mode;
+	attr.ia_valid = ATTR_MODE;
+
+	if ((nd->flags & LOOKUP_CREATE) != 0) {
+		open_flags = nd->intent.open.flags;
+
+		ctx = nameidata_to_nfs_open_context(dentry, nd);
+		error = PTR_ERR(ctx);
+		if (IS_ERR(ctx))
+			goto out_err;
+	}
+
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
+	if (error != 0)
+		goto out_put_ctx;
+	if (ctx != NULL)
+		nfs_intent_set_file(nd, ctx);
+	return 0;
+out_put_ctx:
+	if (ctx != NULL)
+		put_nfs_open_context(ctx);
+out_err:
+	d_drop(dentry);
+	return error;
+}
+
 #endif /* CONFIG_NFSV4 */
 
 static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
@@ -1369,7 +1408,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 {
 	struct iattr attr;
 	int error;
-	int open_flags = 0;
 
 	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1377,10 +1415,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if ((nd->flags & LOOKUP_CREATE) != 0)
-		open_flags = nd->intent.open.flags;
-
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
 	if (error != 0)
 		goto out_err;
 	return 0;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index fabb4f2849a1..2be4a7f59f1c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		 int flags, struct nameidata *nd)
+		 int flags, struct nfs_open_context *ctx)
 {
 	struct nfs3_createdata *data;
 	mode_t mode = sattr->ia_mode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 83c5ef6e7cef..617b149ee16d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1998,32 +1998,6 @@ out:
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
-{
-	struct file *filp;
-	int ret;
-
-	/* If the open_intent is for execute, we have an extra check to make */
-	if (fmode & FMODE_EXEC) {
-		ret = nfs_may_open(state->inode,
-				state->owner->so_cred,
-				nd->intent.open.flags);
-		if (ret < 0)
-			goto out_close;
-	}
-	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
-	if (!IS_ERR(filp)) {
-		struct nfs_open_context *ctx;
-		ctx = nfs_file_open_context(filp);
-		ctx->state = state;
-		return 0;
-	}
-	ret = PTR_ERR(filp);
-out_close:
-	nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
-	return ret;
-}
-
 struct inode *
 nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
 {
@@ -2491,36 +2465,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
 
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags, struct nameidata *nd)
+                 int flags, struct nfs_open_context *ctx)
 {
-	struct path path = {
-		.mnt = nd->path.mnt,
+	struct path my_path = {
 		.dentry = dentry,
 	};
+	struct path *path = &my_path;
 	struct nfs4_state *state;
-	struct rpc_cred *cred;
-	fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
+	struct rpc_cred *cred = NULL;
+	fmode_t fmode = 0;
 	int status = 0;
 
-	cred = rpc_lookup_cred();
-	if (IS_ERR(cred)) {
-		status = PTR_ERR(cred);
-		goto out;
+	if (ctx != NULL) {
+		cred = ctx->cred;
+		path = &ctx->path;
+		fmode = ctx->mode;
 	}
-	state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
+	state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
-		goto out_putcred;
+		goto out;
 	}
 	d_add(dentry, igrab(state->inode));
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
-		status = nfs4_intent_set_file(nd, &path, state, fmode);
+	if (ctx != NULL)
+		ctx->state = state;
 	else
-		nfs4_close_sync(&path, state, fmode);
-out_putcred:
-	put_rpccred(cred);
+		nfs4_close_sync(path, state, fmode);
 out:
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 611bec22f552..4ef39ae88ea5 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		int flags, struct nameidata *nd)
+		int flags, struct nfs_open_context *ctx)
 {
 	struct nfs_createdata *data;
 	struct rpc_message msg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index fc461926c412..b1484dad7bef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1032,7 +1032,7 @@ struct nfs_rpc_ops {
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
 	int	(*create)  (struct inode *, struct dentry *,
-			    struct iattr *, int, struct nameidata *);
+			    struct iattr *, int, struct nfs_open_context *);
 	int	(*remove)  (struct inode *, struct qstr *);
 	void	(*unlink_setup)  (struct rpc_message *, struct inode *dir);
 	int	(*unlink_done) (struct rpc_task *, struct inode *);
-- 
cgit v1.2.3


From 2b484297e48c3fbb1846fc6ea10036d9465273e7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 17 Sep 2010 10:56:51 -0400
Subject: NFS: Add an 'open_context' element to struct nfs_rpc_ops

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 7 ++++---
 fs/nfs/nfs4_fs.h        | 1 -
 fs/nfs/nfs4proc.c       | 3 ++-
 include/linux/nfs_xdr.h | 4 ++++
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e37ffddd79c2..1e9e18813ad7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,7 +34,6 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 
-#include "nfs4_fs.h"
 #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
@@ -1127,7 +1126,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 
 	/* Open the file on the server */
 	nfs_block_sillyrename(dentry->d_parent);
-	inode = nfs4_atomic_open(dir, ctx, open_flags, &attr);
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
 	if (IS_ERR(inode)) {
 		nfs_unblock_sillyrename(dentry->d_parent);
 		put_nfs_open_context(ctx);
@@ -1175,8 +1174,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	if (!is_atomic_open(nd) || d_mountpoint(dentry))
 		goto no_open;
+
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
+
 	/* We can't create new files in nfs_open_revalidate(), so we
 	 * optimize away revalidation of negative dentries.
 	 */
@@ -1205,7 +1206,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 * operations that change the directory. We therefore save the
 	 * change attribute *before* we do the RPC call.
 	 */
-	inode = nfs4_atomic_open(dir, ctx, openflags, NULL);
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		switch (ret) {
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6cf12ba9f4e7..d24a8e07b5e2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -242,7 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
-extern struct inode *nfs4_atomic_open(struct inode *, struct nfs_open_context *, int, struct iattr *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		struct nfs4_fs_locations *fs_locations, struct page *page);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 617b149ee16d..643abd26f2de 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1998,7 +1998,7 @@ out:
 	return status;
 }
 
-struct inode *
+static struct inode *
 nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
 {
 	struct nfs4_state *state;
@@ -5358,6 +5358,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
 	.close_context  = nfs4_close_context,
+	.open_context	= nfs4_atomic_open,
 };
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b1484dad7bef..6f345f8af4ae 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1065,6 +1065,10 @@ struct nfs_rpc_ops {
 	int	(*lock_check_bounds)(const struct file_lock *);
 	void	(*clear_acl_cache)(struct inode *);
 	void	(*close_context)(struct nfs_open_context *ctx, int);
+	struct inode * (*open_context) (struct inode *dir,
+				struct nfs_open_context *ctx,
+				int open_flags,
+				struct iattr *iattr);
 };
 
 /*
-- 
cgit v1.2.3


From 920769f031a8aff87b66bdf49d1a0d0988241ef9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 17 Sep 2010 17:30:25 -0400
Subject: nfs: standardize the rename args container

Each NFS version has its own version of the rename args container.
Standardize them on a common one that's identical to the one NFSv4
uses.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c        |  8 ++++----
 fs/nfs/nfs3proc.c       | 12 +++++-------
 fs/nfs/nfs3xdr.c        | 10 +++++-----
 fs/nfs/nfs4proc.c       |  2 +-
 fs/nfs/nfs4xdr.c        |  2 +-
 fs/nfs/proc.c           | 10 ++++------
 include/linux/nfs_xdr.h | 39 ++++++++++++---------------------------
 7 files changed, 32 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index db8846a0e82e..e15bc0306d0c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
 static int
 nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
 {
-	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_array(p, args->toname, args->tolen);
+	p = xdr_encode_fhandle(p, args->old_dir);
+	p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
+	p = xdr_encode_fhandle(p, args->new_dir);
+	p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2be4a7f59f1c..6dc4ef66f074 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -442,13 +442,11 @@ static int
 nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		 struct inode *new_dir, struct qstr *new_name)
 {
-	struct nfs3_renameargs	arg = {
-		.fromfh		= NFS_FH(old_dir),
-		.fromname	= old_name->name,
-		.fromlen	= old_name->len,
-		.tofh		= NFS_FH(new_dir),
-		.toname		= new_name->name,
-		.tolen		= new_name->len
+	struct nfs_renameargs	arg = {
+		.old_dir	= NFS_FH(old_dir),
+		.old_name	= old_name,
+		.new_dir	= NFS_FH(new_dir),
+		.new_name	= new_name,
 	};
 	struct nfs3_renameres res;
 	struct rpc_message msg = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 9769704f8ce6..f385759eb35b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -442,12 +442,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
  * Encode RENAME arguments
  */
 static int
-nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args)
+nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
 {
-	p = xdr_encode_fhandle(p, args->fromfh);
-	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_fhandle(p, args->tofh);
-	p = xdr_encode_array(p, args->toname, args->tolen);
+	p = xdr_encode_fhandle(p, args->old_dir);
+	p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
+	p = xdr_encode_fhandle(p, args->new_dir);
+	p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 	return 0;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 643abd26f2de..3aa13c1b8dd8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2570,7 +2570,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
 	struct nfs_server *server = NFS_SERVER(old_dir);
-	struct nfs4_rename_arg arg = {
+	struct nfs_renameargs arg = {
 		.old_dir = NFS_FH(old_dir),
 		.new_dir = NFS_FH(new_dir),
 		.old_name = old_name,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 08ef91291132..7a098ae07a1b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1823,7 +1823,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
 /*
  * Encode RENAME request
  */
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args)
+static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4ef39ae88ea5..0aff10c3bbce 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -370,12 +370,10 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
 	struct nfs_renameargs	arg = {
-		.fromfh		= NFS_FH(old_dir),
-		.fromname	= old_name->name,
-		.fromlen	= old_name->len,
-		.tofh		= NFS_FH(new_dir),
-		.toname		= new_name->name,
-		.tolen		= new_name->len
+		.old_dir	= NFS_FH(old_dir),
+		.old_name	= old_name,
+		.new_dir	= NFS_FH(new_dir),
+		.new_name	= new_name,
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_RENAME],
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6f345f8af4ae..acb95fb27bcc 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -399,6 +399,18 @@ struct nfs_removeres {
 	struct nfs4_sequence_res 	seq_res;
 };
 
+/*
+ * Common arguments to the rename call
+ */
+struct nfs_renameargs {
+	const struct nfs_fh		*old_dir;
+	const struct nfs_fh		*new_dir;
+	const struct qstr		*old_name;
+	const struct qstr		*new_name;
+	const u32			*bitmask;
+	struct nfs4_sequence_args	seq_args;
+};
+
 /*
  * Argument struct for decode_entry function
  */
@@ -434,15 +446,6 @@ struct nfs_createargs {
 	struct iattr *		sattr;
 };
 
-struct nfs_renameargs {
-	struct nfs_fh *		fromfh;
-	const char *		fromname;
-	unsigned int		fromlen;
-	struct nfs_fh *		tofh;
-	const char *		toname;
-	unsigned int		tolen;
-};
-
 struct nfs_setattrargs {
 	struct nfs_fh *                 fh;
 	nfs4_stateid                    stateid;
@@ -586,15 +589,6 @@ struct nfs3_mknodargs {
 	dev_t			rdev;
 };
 
-struct nfs3_renameargs {
-	struct nfs_fh *		fromfh;
-	const char *		fromname;
-	unsigned int		fromlen;
-	struct nfs_fh *		tofh;
-	const char *		toname;
-	unsigned int		tolen;
-};
-
 struct nfs3_linkargs {
 	struct nfs_fh *		fromfh;
 	struct nfs_fh *		tofh;
@@ -801,15 +795,6 @@ struct nfs4_readlink_res {
 	struct nfs4_sequence_res	seq_res;
 };
 
-struct nfs4_rename_arg {
-	const struct nfs_fh *		old_dir;
-	const struct nfs_fh *		new_dir;
-	const struct qstr *		old_name;
-	const struct qstr *		new_name;
-	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
-};
-
 struct nfs4_rename_res {
 	const struct nfs_server *	server;
 	struct nfs4_change_info		old_cinfo;
-- 
cgit v1.2.3


From e8582a8b96f329083b4da29aa87bc43cc0d80dd1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 17 Sep 2010 17:31:06 -0400
Subject: nfs: standardize the rename response container

Right now, v3 and v4 have their own variants. Create a standard struct
that will work for v3 and v4. v2 doesn't get anything but a simple error
and so isn't affected by this.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c       | 16 ++++++++--------
 fs/nfs/nfs3xdr.c        |  6 +++---
 fs/nfs/nfs4proc.c       |  2 +-
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h | 23 +++++++++--------------
 5 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 6dc4ef66f074..bb41d88e1567 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -448,7 +448,7 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.new_dir	= NFS_FH(new_dir),
 		.new_name	= new_name,
 	};
-	struct nfs3_renameres res;
+	struct nfs_renameres res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
 		.rpc_argp	= &arg,
@@ -458,17 +458,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 
-	res.fromattr = nfs_alloc_fattr();
-	res.toattr = nfs_alloc_fattr();
-	if (res.fromattr == NULL || res.toattr == NULL)
+	res.old_fattr = nfs_alloc_fattr();
+	res.new_fattr = nfs_alloc_fattr();
+	if (res.old_fattr == NULL || res.new_fattr == NULL)
 		goto out;
 
 	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
-	nfs_post_op_update_inode(old_dir, res.fromattr);
-	nfs_post_op_update_inode(new_dir, res.toattr);
+	nfs_post_op_update_inode(old_dir, res.old_fattr);
+	nfs_post_op_update_inode(new_dir, res.new_fattr);
 out:
-	nfs_free_fattr(res.toattr);
-	nfs_free_fattr(res.fromattr);
+	nfs_free_fattr(res.old_fattr);
+	nfs_free_fattr(res.new_fattr);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f385759eb35b..89c40f8ec6e6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -970,14 +970,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
  * Decode RENAME reply
  */
 static int
-nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res)
+nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
 {
 	int	status;
 
 	if ((status = ntohl(*p++)) != 0)
 		status = nfs_stat_to_errno(status);
-	p = xdr_decode_wcc_data(p, res->fromattr);
-	p = xdr_decode_wcc_data(p, res->toattr);
+	p = xdr_decode_wcc_data(p, res->old_fattr);
+	p = xdr_decode_wcc_data(p, res->new_fattr);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3aa13c1b8dd8..a3c21cc4677b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2577,7 +2577,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.new_name = new_name,
 		.bitmask = server->attr_bitmask,
 	};
-	struct nfs4_rename_res res = {
+	struct nfs_renameres res = {
 		.server = server,
 	};
 	struct rpc_message msg = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 7a098ae07a1b..b0bd7efe8100 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4873,7 +4873,7 @@ out:
 /*
  * Decode RENAME response
  */
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index acb95fb27bcc..9ad132e13d12 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -411,6 +411,15 @@ struct nfs_renameargs {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs_renameres {
+	const struct nfs_server		*server;
+	struct nfs4_change_info		old_cinfo;
+	struct nfs_fattr		*old_fattr;
+	struct nfs4_change_info		new_cinfo;
+	struct nfs_fattr		*new_fattr;
+	struct nfs4_sequence_res	seq_res;
+};
+
 /*
  * Argument struct for decode_entry function
  */
@@ -623,11 +632,6 @@ struct nfs3_readlinkargs {
 	struct page **		pages;
 };
 
-struct nfs3_renameres {
-	struct nfs_fattr *	fromattr;
-	struct nfs_fattr *	toattr;
-};
-
 struct nfs3_linkres {
 	struct nfs_fattr *	dir_attr;
 	struct nfs_fattr *	fattr;
@@ -795,15 +799,6 @@ struct nfs4_readlink_res {
 	struct nfs4_sequence_res	seq_res;
 };
 
-struct nfs4_rename_res {
-	const struct nfs_server *	server;
-	struct nfs4_change_info		old_cinfo;
-	struct nfs_fattr *		old_fattr;
-	struct nfs4_change_info		new_cinfo;
-	struct nfs_fattr *		new_fattr;
-	struct nfs4_sequence_res	seq_res;
-};
-
 #define NFS4_SETCLIENTID_NAMELEN	(127)
 struct nfs4_setclientid {
 	const nfs4_verifier *		sc_verifier;
-- 
cgit v1.2.3


From 779c51795bfb35c2403c924b9de90ca9356bc693 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 17 Sep 2010 17:31:30 -0400
Subject: nfs: move nfs_sillyrename to unlink.c

...since that's where most of the sillyrenaming code lives. A comment
block is added to the beginning as well to clarify how sillyrenaming
works. Also, make nfs_async_unlink static as nfs_sillyrename is the only
caller.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 70 -----------------------------------------
 fs/nfs/unlink.c        | 85 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs.h |  2 +-
 3 files changed, 85 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1e9e18813ad7..86f1d41c6078 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1498,76 +1498,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 	return error;
 }
 
-static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
-{
-	static unsigned int sillycounter;
-	const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
-	const int      countersize = sizeof(sillycounter)*2;
-	const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
-	char           silly[slen+1];
-	struct qstr    qsilly;
-	struct dentry *sdentry;
-	int            error = -EIO;
-
-	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name, 
-		atomic_read(&dentry->d_count));
-	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
-
-	/*
-	 * We don't allow a dentry to be silly-renamed twice.
-	 */
-	error = -EBUSY;
-	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
-		goto out;
-
-	sprintf(silly, ".nfs%*.*Lx",
-		fileidsize, fileidsize,
-		(unsigned long long)NFS_FILEID(dentry->d_inode));
-
-	/* Return delegation in anticipation of the rename */
-	nfs_inode_return_delegation(dentry->d_inode);
-
-	sdentry = NULL;
-	do {
-		char *suffix = silly + slen - countersize;
-
-		dput(sdentry);
-		sillycounter++;
-		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
-
-		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
-				dentry->d_name.name, silly);
-		
-		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
-		/*
-		 * N.B. Better to return EBUSY here ... it could be
-		 * dangerous to delete the file while it's in use.
-		 */
-		if (IS_ERR(sdentry))
-			goto out;
-	} while(sdentry->d_inode != NULL); /* need negative lookup */
-
-	qsilly.name = silly;
-	qsilly.len  = strlen(silly);
-	if (dentry->d_inode) {
-		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-				dir, &qsilly);
-		nfs_mark_for_revalidate(dentry->d_inode);
-	} else
-		error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-				dir, &qsilly);
-	if (!error) {
-		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-		d_move(dentry, sdentry);
-		error = nfs_async_unlink(dir, dentry);
- 		/* If we return 0 we don't unlink */
-	}
-	dput(sdentry);
-out:
-	return error;
-}
-
 /*
  * Remove a file after making sure there are no pending writes,
  * and after checking that the file has only one user. 
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 2f84adaad427..c3ce865294f1 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,9 +13,12 @@
 #include <linux/nfs_fs.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/namei.h>
 
 #include "internal.h"
 #include "nfs4_fs.h"
+#include "iostat.h"
+#include "delegation.h"
 
 struct nfs_unlinkdata {
 	struct hlist_node list;
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
  * @dir: parent directory of dentry
  * @dentry: dentry to unlink
  */
-int
+static int
 nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct nfs_unlinkdata *data;
@@ -303,3 +306,83 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
 	if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
 		nfs_free_unlinkdata(data);
 }
+
+/**
+ * nfs_sillyrename - Perform a silly-rename of a dentry
+ * @dir: inode of directory that contains dentry
+ * @dentry: dentry to be sillyrenamed
+ *
+ * NFSv2/3 is stateless and the server doesn't know when the client is
+ * holding a file open. To prevent application problems when a file is
+ * unlinked while it's still open, the client performs a "silly-rename".
+ * That is, it renames the file to a hidden file in the same directory,
+ * and only performs the unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int
+nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+{
+	static unsigned int sillycounter;
+	const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
+	const int      countersize = sizeof(sillycounter)*2;
+	const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
+	char           silly[slen+1];
+	struct qstr    qsilly;
+	struct dentry *sdentry;
+	int            error = -EIO;
+
+	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
+
+	/*
+	 * We don't allow a dentry to be silly-renamed twice.
+	 */
+	error = -EBUSY;
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		goto out;
+
+	sprintf(silly, ".nfs%*.*Lx",
+		fileidsize, fileidsize,
+		(unsigned long long)NFS_FILEID(dentry->d_inode));
+
+	/* Return delegation in anticipation of the rename */
+	nfs_inode_return_delegation(dentry->d_inode);
+
+	sdentry = NULL;
+	do {
+		char *suffix = silly + slen - countersize;
+
+		dput(sdentry);
+		sillycounter++;
+		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+
+		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+				dentry->d_name.name, silly);
+
+		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+		/*
+		 * N.B. Better to return EBUSY here ... it could be
+		 * dangerous to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while (sdentry->d_inode != NULL); /* need negative lookup */
+
+	qsilly.name = silly;
+	qsilly.len  = strlen(silly);
+	error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly);
+	if (dentry->d_inode)
+		nfs_mark_for_revalidate(dentry->d_inode);
+	if (!error) {
+		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+		d_move(dentry, sdentry);
+		error = nfs_async_unlink(dir, dentry);
+		/* If we return 0 we don't unlink */
+	}
+	dput(sdentry);
+out:
+	return error;
+}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 61c89b4ad7c6..d929b1883644 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -485,10 +485,10 @@ extern void nfs_release_automount_timer(void);
 /*
  * linux/fs/nfs/unlink.c
  */
-extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
 extern void nfs_block_sillyrename(struct dentry *dentry);
 extern void nfs_unblock_sillyrename(struct dentry *dentry);
+extern int  nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c
-- 
cgit v1.2.3


From d3d4152a5d59af9e13a73efa9e9c24383fbe307f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 17 Sep 2010 17:31:57 -0400
Subject: nfs: make sillyrename an async operation

A synchronous rename can be interrupted by a SIGKILL. If that happens
during a sillyrename operation, it's possible for the rename call to
be sent to the server, but the task exits before processing the
reply. If this happens, the sillyrenamed file won't get cleaned up
during nfs_dentry_iput and the server is left with a dangling .nfs* file
hanging around.

Fix this problem by turning sillyrename into an asynchronous operation
and have the task doing the sillyrename just wait on the reply. If the
task is killed before the sillyrename completes, it'll still proceed
to completion.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c       |  23 ++++++
 fs/nfs/nfs4proc.c       |  30 ++++++++
 fs/nfs/proc.c           |  19 +++++
 fs/nfs/unlink.c         | 200 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/nfs_xdr.h |   2 +
 5 files changed, 263 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index bb41d88e1567..4e9d941ab548 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -438,6 +438,27 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	return 1;
 }
 
+static void
+nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
+}
+
+static int
+nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+		      struct inode *new_dir)
+{
+	struct nfs_renameres *res;
+
+	if (nfs3_async_handle_jukebox(task, old_dir))
+		return 0;
+	res = task->tk_msg.rpc_resp;
+
+	nfs_post_op_update_inode(old_dir, res->old_fattr);
+	nfs_post_op_update_inode(new_dir, res->new_fattr);
+	return 1;
+}
+
 static int
 nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		 struct inode *new_dir, struct qstr *new_name)
@@ -842,6 +863,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.unlink_setup	= nfs3_proc_unlink_setup,
 	.unlink_done	= nfs3_proc_unlink_done,
 	.rename		= nfs3_proc_rename,
+	.rename_setup	= nfs3_proc_rename_setup,
+	.rename_done	= nfs3_proc_rename_done,
 	.link		= nfs3_proc_link,
 	.symlink	= nfs3_proc_symlink,
 	.mkdir		= nfs3_proc_mkdir,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a3c21cc4677b..c46e45e9b33f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2566,6 +2566,34 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	return 1;
 }
 
+static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_renameargs *arg = msg->rpc_argp;
+	struct nfs_renameres *res = msg->rpc_resp;
+
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
+	arg->bitmask = server->attr_bitmask;
+	res->server = server;
+}
+
+static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+				 struct inode *new_dir)
+{
+	struct nfs_renameres *res = task->tk_msg.rpc_resp;
+
+	if (!nfs4_sequence_done(task, &res->seq_res))
+		return 0;
+	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+		return 0;
+
+	update_changeattr(old_dir, &res->old_cinfo);
+	nfs_post_op_update_inode(old_dir, res->old_fattr);
+	update_changeattr(new_dir, &res->new_cinfo);
+	nfs_post_op_update_inode(new_dir, res->new_fattr);
+	return 1;
+}
+
 static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
@@ -5338,6 +5366,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.unlink_setup	= nfs4_proc_unlink_setup,
 	.unlink_done	= nfs4_proc_unlink_done,
 	.rename		= nfs4_proc_rename,
+	.rename_setup	= nfs4_proc_rename_setup,
+	.rename_done	= nfs4_proc_rename_done,
 	.link		= nfs4_proc_link,
 	.symlink	= nfs4_proc_symlink,
 	.mkdir		= nfs4_proc_mkdir,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0aff10c3bbce..e5e84aa2af17 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -365,6 +365,23 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	return 1;
 }
 
+static void
+nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+	msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
+}
+
+static int
+nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+		     struct inode *new_dir)
+{
+	if (nfs_async_handle_expired_key(task))
+		return 0;
+	nfs_mark_for_revalidate(old_dir);
+	nfs_mark_for_revalidate(new_dir);
+	return 1;
+}
+
 static int
 nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
@@ -703,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.unlink_setup	= nfs_proc_unlink_setup,
 	.unlink_done	= nfs_proc_unlink_done,
 	.rename		= nfs_proc_rename,
+	.rename_setup	= nfs_proc_rename_setup,
+	.rename_done	= nfs_proc_rename_done,
 	.link		= nfs_proc_link,
 	.symlink	= nfs_proc_symlink,
 	.mkdir		= nfs_proc_mkdir,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index c3ce865294f1..698b3e6367ff 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -307,6 +307,174 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
 		nfs_free_unlinkdata(data);
 }
 
+/* Cancel a queued async unlink. Called when a sillyrename run fails. */
+static void
+nfs_cancel_async_unlink(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		struct nfs_unlinkdata *data = dentry->d_fsdata;
+
+		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+		spin_unlock(&dentry->d_lock);
+		nfs_free_unlinkdata(data);
+		return;
+	}
+	spin_unlock(&dentry->d_lock);
+}
+
+struct nfs_renamedata {
+	struct nfs_renameargs	args;
+	struct nfs_renameres	res;
+	struct rpc_cred		*cred;
+	struct inode		*old_dir;
+	struct dentry		*old_dentry;
+	struct nfs_fattr	old_fattr;
+	struct inode		*new_dir;
+	struct dentry		*new_dentry;
+	struct nfs_fattr	new_fattr;
+};
+
+/**
+ * nfs_async_rename_done - Sillyrename post-processing
+ * @task: rpc_task of the sillyrename
+ * @calldata: nfs_renamedata for the sillyrename
+ *
+ * Do the directory attribute updates and the d_move
+ */
+static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs_renamedata *data = calldata;
+	struct inode *old_dir = data->old_dir;
+	struct inode *new_dir = data->new_dir;
+
+	if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
+		nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
+		return;
+	}
+
+	if (task->tk_status != 0) {
+		nfs_cancel_async_unlink(data->old_dentry);
+		return;
+	}
+
+	nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
+	d_move(data->old_dentry, data->new_dentry);
+}
+
+/**
+ * nfs_async_rename_release - Release the sillyrename data.
+ * @calldata: the struct nfs_renamedata to be released
+ */
+static void nfs_async_rename_release(void *calldata)
+{
+	struct nfs_renamedata	*data = calldata;
+	struct super_block *sb = data->old_dir->i_sb;
+
+	if (data->old_dentry->d_inode)
+		nfs_mark_for_revalidate(data->old_dentry->d_inode);
+
+	dput(data->old_dentry);
+	dput(data->new_dentry);
+	iput(data->old_dir);
+	iput(data->new_dir);
+	nfs_sb_deactive(sb);
+	put_rpccred(data->cred);
+	kfree(data);
+}
+
+#if defined(CONFIG_NFS_V4_1)
+static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_renamedata *data = calldata;
+	struct nfs_server *server = NFS_SERVER(data->old_dir);
+
+	if (nfs4_setup_sequence(server, &data->args.seq_args,
+				&data->res.seq_res, 1, task))
+		return;
+	rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+static const struct rpc_call_ops nfs_rename_ops = {
+	.rpc_call_done = nfs_async_rename_done,
+	.rpc_release = nfs_async_rename_release,
+#if defined(CONFIG_NFS_V4_1)
+	.rpc_call_prepare = nfs_rename_prepare,
+#endif /* CONFIG_NFS_V4_1 */
+};
+
+/**
+ * nfs_async_rename - perform an asynchronous rename operation
+ * @old_dir: directory that currently holds the dentry to be renamed
+ * @new_dir: target directory for the rename
+ * @old_dentry: original dentry to be renamed
+ * @new_dentry: dentry to which the old_dentry should be renamed
+ *
+ * It's expected that valid references to the dentries and inodes are held
+ */
+static struct rpc_task *
+nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+		 struct dentry *old_dentry, struct dentry *new_dentry)
+{
+	struct nfs_renamedata *data;
+	struct rpc_message msg = { };
+	struct rpc_task_setup task_setup_data = {
+		.rpc_message = &msg,
+		.callback_ops = &nfs_rename_ops,
+		.workqueue = nfsiod_workqueue,
+		.rpc_client = NFS_CLIENT(old_dir),
+		.flags = RPC_TASK_ASYNC,
+	};
+	struct rpc_task *task;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (data == NULL)
+		return ERR_PTR(-ENOMEM);
+	task_setup_data.callback_data = data,
+
+	data->cred = rpc_lookup_cred();
+	if (IS_ERR(data->cred)) {
+		task = (struct rpc_task *)data->cred;
+		kfree(data);
+		return task;
+	}
+
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
+	msg.rpc_cred = data->cred;
+
+	/* set up nfs_renamedata */
+	data->old_dir = old_dir;
+	atomic_inc(&old_dir->i_count);
+	data->new_dir = new_dir;
+	atomic_inc(&new_dir->i_count);
+	data->old_dentry = dget(old_dentry);
+	data->new_dentry = dget(new_dentry);
+	nfs_fattr_init(&data->old_fattr);
+	nfs_fattr_init(&data->new_fattr);
+
+	/* set up nfs_renameargs */
+	data->args.old_dir = NFS_FH(old_dir);
+	data->args.old_name = &old_dentry->d_name;
+	data->args.new_dir = NFS_FH(new_dir);
+	data->args.new_name = &new_dentry->d_name;
+
+	/* set up nfs_renameres */
+	data->res.old_fattr = &data->old_fattr;
+	data->res.new_fattr = &data->new_fattr;
+
+	nfs_sb_active(old_dir->i_sb);
+
+	NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		nfs_async_rename_release(data);
+
+	return task;
+}
+
 /**
  * nfs_sillyrename - Perform a silly-rename of a dentry
  * @dir: inode of directory that contains dentry
@@ -328,8 +496,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	const int      countersize = sizeof(sillycounter)*2;
 	const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
 	char           silly[slen+1];
-	struct qstr    qsilly;
 	struct dentry *sdentry;
+	struct rpc_task *task;
 	int            error = -EIO;
 
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
@@ -371,17 +539,27 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 			goto out;
 	} while (sdentry->d_inode != NULL); /* need negative lookup */
 
-	qsilly.name = silly;
-	qsilly.len  = strlen(silly);
-	error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly);
-	if (dentry->d_inode)
-		nfs_mark_for_revalidate(dentry->d_inode);
-	if (!error) {
-		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-		d_move(dentry, sdentry);
-		error = nfs_async_unlink(dir, dentry);
-		/* If we return 0 we don't unlink */
+	/* queue unlink first. Can't do this from rpc_release as it
+	 * has to allocate memory
+	 */
+	error = nfs_async_unlink(dir, dentry);
+	if (error)
+		goto out_dput;
+
+	/* run the rename task, undo unlink if it fails */
+	task = nfs_async_rename(dir, dir, dentry, sdentry);
+	if (IS_ERR(task)) {
+		error = -EBUSY;
+		nfs_cancel_async_unlink(dentry);
+		goto out_dput;
 	}
+
+	/* wait for the RPC task to complete, unless a SIGKILL intervenes */
+	error = rpc_wait_for_completion_task(task);
+	if (error == 0)
+		error = task->tk_status;
+	rpc_put_task(task);
+out_dput:
 	dput(sdentry);
 out:
 	return error;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9ad132e13d12..172df83ac54b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1018,6 +1018,8 @@ struct nfs_rpc_ops {
 	int	(*unlink_done) (struct rpc_task *, struct inode *);
 	int	(*rename)  (struct inode *, struct qstr *,
 			    struct inode *, struct qstr *);
+	void	(*rename_setup)  (struct rpc_message *msg, struct inode *dir);
+	int	(*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
 	int	(*link)    (struct inode *, struct inode *, struct qstr *);
 	int	(*symlink) (struct inode *, struct dentry *, struct page *,
 			    unsigned int, struct iattr *);
-- 
cgit v1.2.3


From dfb8fb96ae2b5126cd0c08c0ccd7c42e1f46568a Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 17 Sep 2010 03:23:39 +0000
Subject: stmmac: add CSR Clock range selection

This patch adds the CSR Clock range selection.

Original patch from Johannes Stezenbach fixed the CSR
in the stmmac_mdio. We agreed to provide this through
the platform instead of.
Also thanks to Johannes for having tested it on ARM.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Johannes Stezenbach <js@sig21.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/stmmac.h      | 1 +
 drivers/net/stmmac/stmmac_main.c | 1 +
 drivers/net/stmmac/stmmac_mdio.c | 5 +++--
 include/linux/stmmac.h           | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index d0ddab0d21c2..12d1cb00c0d7 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -78,6 +78,7 @@ struct stmmac_priv {
 	unsigned int flow_ctrl;
 	unsigned int pause;
 	struct mii_bus *mii;
+	int mii_clk_csr;
 
 	u32 msg_enable;
 	spinlock_t lock;
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 03c160c6d75c..a169b1441d50 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1704,6 +1704,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	plat_dat = pdev->dev.platform_data;
 	priv->bus_id = plat_dat->bus_id;
 	priv->pbl = plat_dat->pbl;	/* TLI */
+	priv->mii_clk_csr = plat_dat->clk_csr;
 	priv->is_gmac = plat_dat->has_gmac;	/* GMAC is on board */
 	priv->enh_desc = plat_dat->enh_desc;
 	priv->ioaddr = addr;
diff --git a/drivers/net/stmmac/stmmac_mdio.c b/drivers/net/stmmac/stmmac_mdio.c
index 03dea1401571..d7441616357d 100644
--- a/drivers/net/stmmac/stmmac_mdio.c
+++ b/drivers/net/stmmac/stmmac_mdio.c
@@ -53,7 +53,7 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	int data;
 	u16 regValue = (((phyaddr << 11) & (0x0000F800)) |
 			((phyreg << 6) & (0x000007C0)));
-	regValue |= MII_BUSY;	/* in case of GMAC */
+	regValue |= MII_BUSY | ((priv->mii_clk_csr & 7) << 2);
 
 	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
 	writel(regValue, priv->ioaddr + mii_address);
@@ -85,7 +85,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	    (((phyaddr << 11) & (0x0000F800)) | ((phyreg << 6) & (0x000007C0)))
 	    | MII_WRITE;
 
-	value |= MII_BUSY;
+	value |= MII_BUSY | ((priv->mii_clk_csr & 7) << 2);
+
 
 	/* Wait until any existing MII operation is complete */
 	do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a4adf0de6ed6..c87c88ccffc0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -32,6 +32,7 @@
 struct plat_stmmacenet_data {
 	int bus_id;
 	int pbl;
+	int clk_csr;
 	int has_gmac;
 	int enh_desc;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
-- 
cgit v1.2.3


From ebbb293f8b3021ae2009fcb7cb3b8a52fb5fd06a Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 17 Sep 2010 03:23:40 +0000
Subject: stmmac: consolidate and tidy-up the COE support

The first version of the driver had hard-coded the logic
for handling the checksum offloading.
This was designed according to the chips included in
the STM platforms where:
o MAC10/100 supports no COE at all.
o GMAC fully supports RX/TX COE.

This is not good for other chip configurations where,
for example, the mac10/100 supports the tx csum in HW
or when the GMAC has no IPC.

Thanks to Johannes Stezenbach; he provided me a first
draft of this patch that only reviewed the IPC for the
GMAC devices.

This patch also helps on SPEAr platforms where the
MAC10/100 can perform the TX csum in HW.
Thanks to Deepak SIKRI for his support on this.

In the end, GMAC devices for STM platforms have
a bugged Jumbo frame support that needs to have
the Tx COE disabled for oversized frames (due to
limited buffer sizes). This information is also
passed through the driver's platform structure.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Johannes Stezenbach <js@sig21.net>
Signed-off-by: Deepak SIKRI <deepak.sikri@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/common.h         |  4 +--
 drivers/net/stmmac/dwmac1000.h      |  2 +-
 drivers/net/stmmac/dwmac1000_core.c | 13 ++++++++
 drivers/net/stmmac/dwmac100_core.c  |  6 ++++
 drivers/net/stmmac/stmmac.h         |  4 ++-
 drivers/net/stmmac/stmmac_ethtool.c |  2 +-
 drivers/net/stmmac/stmmac_main.c    | 66 ++++++++++++++++++-------------------
 include/linux/stmmac.h              |  2 ++
 8 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h
index e8cbcb5c206e..673ef86a063f 100644
--- a/drivers/net/stmmac/common.h
+++ b/drivers/net/stmmac/common.h
@@ -102,8 +102,6 @@ struct stmmac_extra_stats {
 
 #define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */
 
-#define HW_CSUM 1
-#define NO_HW_CSUM 0
 enum rx_frame_status { /* IPC status */
 	good_frame = 0,
 	discard_frame = 1,
@@ -205,6 +203,8 @@ struct stmmac_dma_ops {
 struct stmmac_ops {
 	/* MAC core initialization */
 	void (*core_init) (void __iomem *ioaddr) ____cacheline_aligned;
+	/* Support checksum offload engine */
+	int  (*rx_coe) (void __iomem *ioaddr);
 	/* Dump MAC registers */
 	void (*dump_regs) (void __iomem *ioaddr);
 	/* Handle extra events on specific interrupts hw dependent */
diff --git a/drivers/net/stmmac/dwmac1000.h b/drivers/net/stmmac/dwmac1000.h
index 8b20b19971cb..81ee4fd04386 100644
--- a/drivers/net/stmmac/dwmac1000.h
+++ b/drivers/net/stmmac/dwmac1000.h
@@ -99,7 +99,7 @@ enum inter_frame_gap {
 #define GMAC_CONTROL_RE		0x00000004 /* Receiver Enable */
 
 #define GMAC_CORE_INIT (GMAC_CONTROL_JD | GMAC_CONTROL_PS | GMAC_CONTROL_ACS | \
-			GMAC_CONTROL_IPC | GMAC_CONTROL_JE | GMAC_CONTROL_BE)
+			GMAC_CONTROL_JE | GMAC_CONTROL_BE)
 
 /* GMAC Frame Filter defines */
 #define GMAC_FRAME_FILTER_PR	0x00000001	/* Promiscuous Mode */
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index f1f426146f40..c18c85993179 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -50,6 +50,18 @@ static void dwmac1000_core_init(void __iomem *ioaddr)
 #endif
 }
 
+static int dwmac1000_rx_coe_supported(void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr + GMAC_CONTROL);
+
+	value |= GMAC_CONTROL_IPC;
+	writel(value, ioaddr + GMAC_CONTROL);
+
+	value = readl(ioaddr + GMAC_CONTROL);
+
+	return !!(value & GMAC_CONTROL_IPC);
+}
+
 static void dwmac1000_dump_regs(void __iomem *ioaddr)
 {
 	int i;
@@ -202,6 +214,7 @@ static void dwmac1000_irq_status(void __iomem *ioaddr)
 
 struct stmmac_ops dwmac1000_ops = {
 	.core_init = dwmac1000_core_init,
+	.rx_coe = dwmac1000_rx_coe_supported,
 	.dump_regs = dwmac1000_dump_regs,
 	.host_irq_status = dwmac1000_irq_status,
 	.set_filter = dwmac1000_set_filter,
diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c
index db06c04ce480..58a914b27003 100644
--- a/drivers/net/stmmac/dwmac100_core.c
+++ b/drivers/net/stmmac/dwmac100_core.c
@@ -42,6 +42,11 @@ static void dwmac100_core_init(void __iomem *ioaddr)
 #endif
 }
 
+static int dwmac100_rx_coe_supported(void __iomem *ioaddr)
+{
+	return 0;
+}
+
 static void dwmac100_dump_mac_regs(void __iomem *ioaddr)
 {
 	pr_info("\t----------------------------------------------\n"
@@ -165,6 +170,7 @@ static void dwmac100_pmt(void __iomem *ioaddr, unsigned long mode)
 
 struct stmmac_ops dwmac100_ops = {
 	.core_init = dwmac100_core_init,
+	.rx_coe = dwmac100_rx_coe_supported,
 	.dump_regs = dwmac100_dump_mac_regs,
 	.host_irq_status = dwmac100_irq_status,
 	.set_filter = dwmac100_set_filter,
diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index 12d1cb00c0d7..92154ff7d702 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -51,7 +51,6 @@ struct stmmac_priv {
 	int is_gmac;
 	dma_addr_t dma_rx_phy;
 	unsigned int dma_rx_size;
-	int rx_csum;
 	unsigned int dma_buf_sz;
 	struct device *device;
 	struct mac_device_info *hw;
@@ -92,6 +91,9 @@ struct stmmac_priv {
 	struct vlan_group *vlgrp;
 #endif
 	int enh_desc;
+	int rx_coe;
+	int bugged_jumbo;
+	int no_csum_insertion;
 };
 
 #ifdef CONFIG_STM_DRIVERS
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index 63b68e61afce..b32c16ae55c6 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -209,7 +209,7 @@ u32 stmmac_ethtool_get_rx_csum(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
-	return priv->rx_csum;
+	return priv->rx_coe;
 }
 
 static void
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index a169b1441d50..a908f7201aae 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -134,13 +134,6 @@ static int buf_sz = DMA_BUFFER_SIZE;
 module_param(buf_sz, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(buf_sz, "DMA buffer size");
 
-/* In case of Giga ETH, we can enable/disable the COE for the
- * transmit HW checksum computation.
- * Note that, if tx csum is off in HW, SG will be still supported. */
-static int tx_coe = HW_CSUM;
-module_param(tx_coe, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(tx_coe, "GMAC COE type 2 [on/off]");
-
 static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				      NETIF_MSG_LINK | NETIF_MSG_IFUP |
 				      NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
@@ -569,29 +562,22 @@ static void free_dma_desc_resources(struct stmmac_priv *priv)
  *  stmmac_dma_operation_mode - HW DMA operation mode
  *  @priv : pointer to the private device structure.
  *  Description: it sets the DMA operation mode: tx/rx DMA thresholds
- *  or Store-And-Forward capability. It also verifies the COE for the
- *  transmission in case of Giga ETH.
+ *  or Store-And-Forward capability.
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
-	if (!priv->is_gmac) {
-		/* MAC 10/100 */
-		priv->hw->dma->dma_mode(priv->ioaddr, tc, 0);
-		priv->tx_coe = NO_HW_CSUM;
-	} else {
-		if ((priv->dev->mtu <= ETH_DATA_LEN) && (tx_coe)) {
-			priv->hw->dma->dma_mode(priv->ioaddr,
-						SF_DMA_MODE, SF_DMA_MODE);
-			tc = SF_DMA_MODE;
-			priv->tx_coe = HW_CSUM;
-		} else {
-			/* Checksum computation is performed in software. */
-			priv->hw->dma->dma_mode(priv->ioaddr, tc,
-						SF_DMA_MODE);
-			priv->tx_coe = NO_HW_CSUM;
-		}
-	}
-	tx_coe = priv->tx_coe;
+	if (likely((priv->tx_coe) && (!priv->no_csum_insertion))) {
+		/* In case of GMAC, SF mode has to be enabled
+		 * to perform the TX COE. This depends on:
+		 * 1) TX COE if actually supported
+		 * 2) There is no bugged Jumbo frame support
+		 *    that needs to not insert csum in the TDES.
+		 */
+		priv->hw->dma->dma_mode(priv->ioaddr,
+					SF_DMA_MODE, SF_DMA_MODE);
+		tc = SF_DMA_MODE;
+	} else
+		priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE);
 }
 
 /**
@@ -858,6 +844,12 @@ static int stmmac_open(struct net_device *dev)
 	/* Initialize the MAC Core */
 	priv->hw->mac->core_init(priv->ioaddr);
 
+	priv->rx_coe = priv->hw->mac->rx_coe(priv->ioaddr);
+	if (priv->rx_coe)
+		pr_info("stmmac: Rx Checksum Offload Engine supported\n");
+	if (priv->tx_coe)
+		pr_info("\tTX Checksum insertion supported\n");
+
 	priv->shutdown = 0;
 
 	/* Initialise the MMC (if present) to disable all interrupts. */
@@ -1066,7 +1058,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		return stmmac_sw_tso(priv, skb);
 
 	if (likely((skb->ip_summed == CHECKSUM_PARTIAL))) {
-		if (likely(priv->tx_coe == NO_HW_CSUM))
+		if (unlikely((!priv->tx_coe) || (priv->no_csum_insertion)))
 			skb_checksum_help(skb);
 		else
 			csum_insertion = 1;
@@ -1390,6 +1382,15 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 		return -EINVAL;
 	}
 
+	/* Some GMAC devices have a bugged Jumbo frame support that
+	 * needs to have the Tx COE disabled for oversized frames
+	 * (due to limited buffer sizes). In this case we disable
+	 * the TX csum insertionin the TDES and not use SF. */
+	if ((priv->bugged_jumbo) && (priv->dev->mtu > ETH_DATA_LEN))
+		priv->no_csum_insertion = 1;
+	else
+		priv->no_csum_insertion = 0;
+
 	dev->mtu = new_mtu;
 
 	return 0;
@@ -1510,9 +1511,6 @@ static int stmmac_probe(struct net_device *dev)
 #endif
 	priv->msg_enable = netif_msg_init(debug, default_msg_level);
 
-	if (priv->is_gmac)
-		priv->rx_csum = 1;
-
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
@@ -1662,7 +1660,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 		ret = -ENODEV;
 		goto out;
 	}
-	pr_info("done!\n");
+	pr_info("\tdone!\n");
 
 	if (!request_mem_region(res->start, resource_size(res),
 				pdev->name)) {
@@ -1705,6 +1703,8 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	priv->bus_id = plat_dat->bus_id;
 	priv->pbl = plat_dat->pbl;	/* TLI */
 	priv->mii_clk_csr = plat_dat->clk_csr;
+	priv->tx_coe = plat_dat->tx_coe;
+	priv->bugged_jumbo = plat_dat->bugged_jumbo;
 	priv->is_gmac = plat_dat->has_gmac;	/* GMAC is on board */
 	priv->enh_desc = plat_dat->enh_desc;
 	priv->ioaddr = addr;
@@ -1966,8 +1966,6 @@ static int __init stmmac_cmdline_opt(char *str)
 			strict_strtoul(opt + 7, 0, (unsigned long *)&buf_sz);
 		else if (!strncmp(opt, "tc:", 3))
 			strict_strtoul(opt + 3, 0, (unsigned long *)&tc);
-		else if (!strncmp(opt, "tx_coe:", 7))
-			strict_strtoul(opt + 7, 0, (unsigned long *)&tx_coe);
 		else if (!strncmp(opt, "watchdog:", 9))
 			strict_strtoul(opt + 9, 0, (unsigned long *)&watchdog);
 		else if (!strncmp(opt, "flow_ctrl:", 10))
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c87c88ccffc0..1d8baf719211 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -35,6 +35,8 @@ struct plat_stmmacenet_data {
 	int clk_csr;
 	int has_gmac;
 	int enh_desc;
+	int tx_coe;
+	int bugged_jumbo;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
 #ifdef CONFIG_STM_DRIVERS
-- 
cgit v1.2.3


From be2902daee80b655cebd482b5ee91ffc29408121 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 16 Sep 2010 11:28:07 +0000
Subject: ethtool, ixgbe: Move RX n-tuple mask fixup to ethtool

The ethtool utility does not set masks for flow parameters that are
not specified, so if both value and mask are 0 then this must be
treated as equivalent to a mask with all bits set.  Currently that is
done in the only driver that implements RX n-tuple filtering, ixgbe.
Move it to the ethtool core.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_82599.c | 57 ++++++++++-------------------------------
 include/linux/ethtool.h         |  5 ++--
 net/core/ethtool.c              | 34 ++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 3e06a61da921..e80657c75506 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1910,56 +1910,27 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	              (dst_port << IXGBE_FDIRPORT_DESTINATION_SHIFT)));
 
 	/*
-	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
-	 * are zero, then assume a full mask for that field.  Also assume that
-	 * a VLAN of 0 is unspecified, so mask that out as well.  L4type
-	 * cannot be masked out in this implementation.
+	 * Program the relevant mask registers.  L4type cannot be
+	 * masked out in this implementation.
 	 *
 	 * This also assumes IPv4 only.  IPv6 masking isn't supported at this
 	 * point in time.
 	 */
-	if (src_ipv4 == 0)
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, 0xffffffff);
-	else
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, input_masks->src_ip_mask);
-
-	if (dst_ipv4 == 0)
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, 0xffffffff);
-	else
-		IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, input_masks->dst_ip_mask);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, input_masks->src_ip_mask);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, input_masks->dst_ip_mask);
 
 	switch (l4type & IXGBE_ATR_L4TYPE_MASK) {
 	case IXGBE_ATR_L4TYPE_TCP:
-		if (src_port == 0)
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, 0xffff);
-		else
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM,
-			                input_masks->src_port_mask);
-
-		if (dst_port == 0)
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM,
-			               (IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) |
-			                (0xffff << 16)));
-		else
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM,
-			               (IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) |
-			                (input_masks->dst_port_mask << 16)));
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, input_masks->src_port_mask);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM,
+				(IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) |
+				 (input_masks->dst_port_mask << 16)));
 		break;
 	case IXGBE_ATR_L4TYPE_UDP:
-		if (src_port == 0)
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, 0xffff);
-		else
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM,
-			                input_masks->src_port_mask);
-
-		if (dst_port == 0)
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM,
-			               (IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) |
-			                (0xffff << 16)));
-		else
-			IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM,
-			               (IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) |
-			                (input_masks->src_port_mask << 16)));
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, input_masks->src_port_mask);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM,
+				(IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) |
+				 (input_masks->src_port_mask << 16)));
 		break;
 	default:
 		/* this already would have failed above */
@@ -1967,11 +1938,11 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	}
 
 	/* Program the last mask register, FDIRM */
-	if (input_masks->vlan_id_mask || !vlan_id)
+	if (input_masks->vlan_id_mask)
 		/* Mask both VLAN and VLANP - bits 0 and 1 */
 		fdirm |= 0x3;
 
-	if (input_masks->data_mask || !flex_bytes)
+	if (input_masks->data_mask)
 		/* Flex bytes need masking, so mask the whole thing - bit 4 */
 		fdirm |= 0x10;
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d64e246a39e7..00334eebbe26 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -470,8 +470,9 @@ struct ethtool_rxfh_indir {
  * @action: RX ring/queue index to deliver to (non-negative) or other action
  *	(negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP)
  *
- * Zero values in @h_u may be ignored, as if all the corresponding
- * mask bits were set.
+ * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where
+ * a field value and mask are both zero this is treated as if all mask
+ * bits are set i.e. the field is ignored.
  */
 struct ethtool_rx_ntuple_flow_spec {
 	__u32		 flow_type;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 248c25c3e820..91ffce20c36b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -485,6 +485,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
+/*
+ * ethtool does not (or did not) set masks for flow parameters that are
+ * not specified, so if both value and mask are 0 then this must be
+ * treated as equivalent to a mask with all bits set.  Implement that
+ * here rather than in drivers.
+ */
+static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
+{
+	struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
+	struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
+
+	if (fs->flow_type != TCP_V4_FLOW &&
+	    fs->flow_type != UDP_V4_FLOW &&
+	    fs->flow_type != SCTP_V4_FLOW)
+		return;
+
+	if (!(entry->ip4src | mask->ip4src))
+		mask->ip4src = htonl(0xffffffff);
+	if (!(entry->ip4dst | mask->ip4dst))
+		mask->ip4dst = htonl(0xffffffff);
+	if (!(entry->psrc | mask->psrc))
+		mask->psrc = htons(0xffff);
+	if (!(entry->pdst | mask->pdst))
+		mask->pdst = htons(0xffff);
+	if (!(entry->tos | mask->tos))
+		mask->tos = 0xff;
+	if (!(fs->vlan_tag | fs->vlan_tag_mask))
+		fs->vlan_tag_mask = 0xffff;
+	if (!(fs->data | fs->data_mask))
+		fs->data_mask = 0xffffffffffffffffULL;
+}
+
 static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
 						    void __user *useraddr)
 {
@@ -499,6 +531,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
+	rx_ntuple_fix_masks(&cmd.fs);
+
 	/*
 	 * Cache filter in dev struct for GET operation only if
 	 * the underlying driver doesn't have its own GET operation, and
-- 
cgit v1.2.3


From 07af7a2bfa853db3957a22f9a41f437bf0f10e63 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 16 Sep 2010 11:34:26 +0000
Subject: ethtool: Add comments for valid use of flow types

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 00334eebbe26..b67af60a8890 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -841,21 +841,21 @@ struct ethtool_ops {
 #define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
 
 /* L3-L4 network traffic flow types */
-#define	TCP_V4_FLOW	0x01
-#define	UDP_V4_FLOW	0x02
-#define	SCTP_V4_FLOW	0x03
-#define	AH_ESP_V4_FLOW	0x04
-#define	TCP_V6_FLOW	0x05
-#define	UDP_V6_FLOW	0x06
-#define	SCTP_V6_FLOW	0x07
-#define	AH_ESP_V6_FLOW	0x08
-#define	AH_V4_FLOW	0x09
-#define	ESP_V4_FLOW	0x0a
-#define	AH_V6_FLOW	0x0b
-#define	ESP_V6_FLOW	0x0c
-#define	IP_USER_FLOW	0x0d
-#define	IPV4_FLOW	0x10
-#define	IPV6_FLOW	0x11
+#define	TCP_V4_FLOW	0x01	/* hash or spec (tcp_ip4_spec) */
+#define	UDP_V4_FLOW	0x02	/* hash or spec (udp_ip4_spec) */
+#define	SCTP_V4_FLOW	0x03	/* hash or spec (sctp_ip4_spec) */
+#define	AH_ESP_V4_FLOW	0x04	/* hash only */
+#define	TCP_V6_FLOW	0x05	/* hash only */
+#define	UDP_V6_FLOW	0x06	/* hash only */
+#define	SCTP_V6_FLOW	0x07	/* hash only */
+#define	AH_ESP_V6_FLOW	0x08	/* hash only */
+#define	AH_V4_FLOW	0x09	/* hash or spec (ah_ip4_spec) */
+#define	ESP_V4_FLOW	0x0a	/* hash or spec (esp_ip4_spec) */
+#define	AH_V6_FLOW	0x0b	/* hash only */
+#define	ESP_V6_FLOW	0x0c	/* hash only */
+#define	IP_USER_FLOW	0x0d	/* spec only (usr_ip4_spec) */
+#define	IPV4_FLOW	0x10	/* hash only */
+#define	IPV6_FLOW	0x11	/* hash only */
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
-- 
cgit v1.2.3


From f0f9deae9e7c421fa0c1c627beb8e174325e1ba7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 17 Sep 2010 16:55:03 -0700
Subject: netpoll: Disable IRQ around RCU dereference in netpoll_rx

We cannot use rcu_dereference_bh safely in netpoll_rx as we may
be called with IRQs disabled.  We could however simply disable
IRQs as that too causes BH to be disabled and is safe in either
case.

Thanks to John Linville for discovering this bug and providing
a patch.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 791d5109f34c..50d8009be86c 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	bool ret = false;
 
-	rcu_read_lock_bh();
+	local_irq_save(flags);
 	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		goto out;
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb))
 		ret = true;
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	spin_unlock(&npinfo->rx_lock);
 
 out:
-	rcu_read_unlock_bh();
+	local_irq_restore(flags);
 	return ret;
 }
 
-- 
cgit v1.2.3


From a3c74c52570c0c4ac90c9a0216de800c39089ba7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 14 Sep 2010 21:43:47 +0900
Subject: futex: Mark restart_block.futex.uaddr[2] __user

@uaddr and @uaddr2 fields in restart_block.futex are user
pointers. Add __user and remove unnecessary casts.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Darren Hart <dvhltc@us.ibm.com>
LKML-Reference: <1284468228-8723-2-git-send-email-namhyung@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/thread_info.h | 4 ++--
 kernel/futex.c              | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index a8cc4e13434c..c90696544176 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -23,12 +23,12 @@ struct restart_block {
 		};
 		/* For futex_wait and futex_wait_requeue_pi */
 		struct {
-			u32 *uaddr;
+			u32 __user *uaddr;
 			u32 val;
 			u32 flags;
 			u32 bitset;
 			u64 time;
-			u32 *uaddr2;
+			u32 __user *uaddr2;
 		} futex;
 		/* For nanosleep */
 		struct {
diff --git a/kernel/futex.c b/kernel/futex.c
index 464de2751ff9..45e448a5e440 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1843,7 +1843,7 @@ retry:
 
 	restart = &current_thread_info()->restart_block;
 	restart->fn = futex_wait_restart;
-	restart->futex.uaddr = (u32 *)uaddr;
+	restart->futex.uaddr = uaddr;
 	restart->futex.val = val;
 	restart->futex.time = abs_time->tv64;
 	restart->futex.bitset = bitset;
@@ -1869,7 +1869,7 @@ out:
 
 static long futex_wait_restart(struct restart_block *restart)
 {
-	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
+	u32 __user *uaddr = restart->futex.uaddr;
 	int fshared = 0;
 	ktime_t t, *tp = NULL;
 
-- 
cgit v1.2.3


From f0a7a98d1d400e2a5fd9a63ed56d30d30f2864cb Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Mon, 13 Sep 2010 13:04:02 +0100
Subject: ARM: 6373/1: tc35892-gpio: add setup/remove callbacks

For board-specific initialization.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: linux-kernel@vger.kernel.org
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/gpio/tc35892-gpio.c | 8 ++++++++
 include/linux/mfd/tc35892.h | 4 ++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/tc35892-gpio.c b/drivers/gpio/tc35892-gpio.c
index 1be6288780de..7e10c935a047 100644
--- a/drivers/gpio/tc35892-gpio.c
+++ b/drivers/gpio/tc35892-gpio.c
@@ -322,6 +322,9 @@ static int __devinit tc35892_gpio_probe(struct platform_device *pdev)
 		goto out_freeirq;
 	}
 
+	if (pdata->setup)
+		pdata->setup(tc35892, tc35892_gpio->chip.base);
+
 	platform_set_drvdata(pdev, tc35892_gpio);
 
 	return 0;
@@ -338,9 +341,14 @@ out_free:
 static int __devexit tc35892_gpio_remove(struct platform_device *pdev)
 {
 	struct tc35892_gpio *tc35892_gpio = platform_get_drvdata(pdev);
+	struct tc35892 *tc35892 = tc35892_gpio->tc35892;
+	struct tc35892_gpio_platform_data *pdata = tc35892->pdata->gpio;
 	int irq = platform_get_irq(pdev, 0);
 	int ret;
 
+	if (pdata->remove)
+		pdata->remove(tc35892, tc35892_gpio->chip.base);
+
 	ret = gpiochip_remove(&tc35892_gpio->chip);
 	if (ret < 0) {
 		dev_err(tc35892_gpio->dev,
diff --git a/include/linux/mfd/tc35892.h b/include/linux/mfd/tc35892.h
index e47f770d3068..eff3094ca84e 100644
--- a/include/linux/mfd/tc35892.h
+++ b/include/linux/mfd/tc35892.h
@@ -111,9 +111,13 @@ extern int tc35892_set_bits(struct tc35892 *tc35892, u8 reg, u8 mask, u8 val);
  * struct tc35892_gpio_platform_data - TC35892 GPIO platform data
  * @gpio_base: first gpio number assigned to TC35892.  A maximum of
  *	       %TC35892_NR_GPIOS GPIOs will be allocated.
+ * @setup: callback for board-specific initialization
+ * @remove: callback for board-specific teardown
  */
 struct tc35892_gpio_platform_data {
 	int gpio_base;
+	void (*setup)(struct tc35892 *tc35892, unsigned gpio_base);
+	void (*remove)(struct tc35892 *tc35892, unsigned gpio_base);
 };
 
 /**
-- 
cgit v1.2.3


From 81dcaf6516d8bbd75b894862c8ae7bba04380cfe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:17:35 +0200
Subject: workqueue: implement alloc_ordered_workqueue()

alloc_ordered_workqueue() creates a workqueue which processes each
work itemp one by one in the queued order.  This will be used to
replace create_freezeable_workqueue() and
create_singlethread_workqueue().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 25e02c941bac..07c48925a8fc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -306,6 +306,24 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
+/**
+ * alloc_ordered_workqueue - allocate an ordered workqueue
+ * @name: name of the workqueue
+ * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful)
+ *
+ * Allocate an ordered workqueue.  An ordered workqueue executes at
+ * most one work item at any given time in the queued order.  They are
+ * implemented as unbound workqueues with @max_active of one.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+static inline struct workqueue_struct *
+alloc_ordered_workqueue(const char *name, unsigned int flags)
+{
+	return alloc_workqueue(name, WQ_UNBOUND | flags, 1);
+}
+
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-- 
cgit v1.2.3


From 401a8d048eadfbe1b1c1bf53d3b614fcc894c61a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:36:00 +0200
Subject: workqueue: cleanup flush/cancel functions

Make the following cleanup changes.

* Relocate flush/cancel function prototypes and definitions.

* Relocate wait_on_cpu_work() and wait_on_work() before
  try_to_grab_pending().  These will be used to implement
  flush_work_sync().

* Make all flush/cancel functions return bool instead of int.

* Update wait_on_cpu_work() and wait_on_work() to return %true if they
  actually waited.

* Add / update comments.

This patch doesn't cause any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  18 ++---
 kernel/workqueue.c        | 175 +++++++++++++++++++++++++---------------------
 2 files changed, 103 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 07c48925a8fc..bb9b683ea6fa 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -343,7 +343,6 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
-extern void flush_delayed_work(struct delayed_work *work);
 
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
@@ -355,8 +354,11 @@ extern int keventd_up(void);
 
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
-extern int flush_work(struct work_struct *work);
-extern int cancel_work_sync(struct work_struct *work);
+extern bool flush_work(struct work_struct *work);
+extern bool cancel_work_sync(struct work_struct *work);
+
+extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
@@ -370,9 +372,9 @@ extern unsigned int work_busy(struct work_struct *work);
  * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
  * cancel_work_sync() to wait on it.
  */
-static inline int cancel_delayed_work(struct delayed_work *work)
+static inline bool cancel_delayed_work(struct delayed_work *work)
 {
-	int ret;
+	bool ret;
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
@@ -385,9 +387,9 @@ static inline int cancel_delayed_work(struct delayed_work *work)
  * if it returns 0 the timer function may be running and the queueing is in
  * progress.
  */
-static inline int __cancel_delayed_work(struct delayed_work *work)
+static inline bool __cancel_delayed_work(struct delayed_work *work)
 {
-	int ret;
+	bool ret;
 
 	ret = del_timer(&work->timer);
 	if (ret)
@@ -395,8 +397,6 @@ static inline int __cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern int cancel_delayed_work_sync(struct delayed_work *work);
-
 /* Obsolete. use cancel_delayed_work_sync() */
 static inline
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f77afd939229..1240b9d94b03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2327,16 +2327,24 @@ out_unlock:
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
 /**
- * flush_work - block until a work_struct's callback has terminated
- * @work: the work which is to be flushed
+ * flush_work - wait for a work to finish executing the last queueing instance
+ * @work: the work to flush
  *
- * Returns false if @work has already terminated.
+ * Wait until @work has finished execution.  This function considers
+ * only the last queueing instance of @work.  If @work has been
+ * enqueued across different CPUs on a non-reentrant workqueue or on
+ * multiple workqueues, @work might still be executing on return on
+ * some of the CPUs from earlier queueing.
  *
- * It is expected that, prior to calling flush_work(), the caller has
- * arranged for the work to not be requeued, otherwise it doesn't make
- * sense to use this function.
+ * If @work was queued only on a non-reentrant, ordered or unbound
+ * workqueue, @work is guaranteed to be idle on return if it hasn't
+ * been requeued since flush started.
+ *
+ * RETURNS:
+ * %true if flush_work() waited for the work to finish execution,
+ * %false if it was already idle.
  */
-int flush_work(struct work_struct *work)
+bool flush_work(struct work_struct *work)
 {
 	struct worker *worker = NULL;
 	struct global_cwq *gcwq;
@@ -2374,13 +2382,49 @@ int flush_work(struct work_struct *work)
 
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
-	return 1;
+	return true;
 already_gone:
 	spin_unlock_irq(&gcwq->lock);
-	return 0;
+	return false;
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
+{
+	struct wq_barrier barr;
+	struct worker *worker;
+
+	spin_lock_irq(&gcwq->lock);
+
+	worker = find_worker_executing_work(gcwq, work);
+	if (unlikely(worker))
+		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
+
+	spin_unlock_irq(&gcwq->lock);
+
+	if (unlikely(worker)) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+		return true;
+	} else
+		return false;
+}
+
+static bool wait_on_work(struct work_struct *work)
+{
+	bool ret = false;
+	int cpu;
+
+	might_sleep();
+
+	lock_map_acquire(&work->lockdep_map);
+	lock_map_release(&work->lockdep_map);
+
+	for_each_gcwq_cpu(cpu)
+		ret |= wait_on_cpu_work(get_gcwq(cpu), work);
+	return ret;
+}
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
@@ -2423,39 +2467,7 @@ static int try_to_grab_pending(struct work_struct *work)
 	return ret;
 }
 
-static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
-{
-	struct wq_barrier barr;
-	struct worker *worker;
-
-	spin_lock_irq(&gcwq->lock);
-
-	worker = find_worker_executing_work(gcwq, work);
-	if (unlikely(worker))
-		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
-
-	spin_unlock_irq(&gcwq->lock);
-
-	if (unlikely(worker)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-	}
-}
-
-static void wait_on_work(struct work_struct *work)
-{
-	int cpu;
-
-	might_sleep();
-
-	lock_map_acquire(&work->lockdep_map);
-	lock_map_release(&work->lockdep_map);
-
-	for_each_gcwq_cpu(cpu)
-		wait_on_cpu_work(get_gcwq(cpu), work);
-}
-
-static int __cancel_work_timer(struct work_struct *work,
+static bool __cancel_work_timer(struct work_struct *work,
 				struct timer_list* timer)
 {
 	int ret;
@@ -2472,42 +2484,60 @@ static int __cancel_work_timer(struct work_struct *work,
 }
 
 /**
- * cancel_work_sync - block until a work_struct's callback has terminated
- * @work: the work which is to be flushed
- *
- * Returns true if @work was pending.
+ * cancel_work_sync - cancel a work and wait for it to finish
+ * @work: the work to cancel
  *
- * cancel_work_sync() will cancel the work if it is queued. If the work's
- * callback appears to be running, cancel_work_sync() will block until it
- * has completed.
+ * Cancel @work and wait for its execution to finish.  This function
+ * can be used even if the work re-queues itself or migrates to
+ * another workqueue.  On return from this function, @work is
+ * guaranteed to be not pending or executing on any CPU.
  *
- * It is possible to use this function if the work re-queues itself. It can
- * cancel the work even if it migrates to another workqueue, however in that
- * case it only guarantees that work->func() has completed on the last queued
- * workqueue.
- *
- * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
- * pending, otherwise it goes into a busy-wait loop until the timer expires.
+ * cancel_work_sync(&delayed_work->work) must not be used for
+ * delayed_work's.  Use cancel_delayed_work_sync() instead.
  *
- * The caller must ensure that workqueue_struct on which this work was last
+ * The caller must ensure that the workqueue on which @work was last
  * queued can't be destroyed before this function returns.
+ *
+ * RETURNS:
+ * %true if @work was pending, %false otherwise.
  */
-int cancel_work_sync(struct work_struct *work)
+bool cancel_work_sync(struct work_struct *work)
 {
 	return __cancel_work_timer(work, NULL);
 }
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
 /**
- * cancel_delayed_work_sync - reliably kill off a delayed work.
- * @dwork: the delayed work struct
+ * flush_delayed_work - wait for a dwork to finish executing the last queueing
+ * @dwork: the delayed work to flush
+ *
+ * Delayed timer is cancelled and the pending work is queued for
+ * immediate execution.  Like flush_work(), this function only
+ * considers the last queueing instance of @dwork.
+ *
+ * RETURNS:
+ * %true if flush_work() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_delayed_work(struct delayed_work *dwork)
+{
+	if (del_timer_sync(&dwork->timer))
+		__queue_work(raw_smp_processor_id(),
+			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	return flush_work(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work);
+
+/**
+ * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
+ * @dwork: the delayed work cancel
  *
- * Returns true if @dwork was pending.
+ * This is cancel_work_sync() for delayed works.
  *
- * It is possible to use this function if @dwork rearms itself via queue_work()
- * or queue_delayed_work(). See also the comment for cancel_work_sync().
+ * RETURNS:
+ * %true if @dwork was pending, %false otherwise.
  */
-int cancel_delayed_work_sync(struct delayed_work *dwork)
+bool cancel_delayed_work_sync(struct delayed_work *dwork)
 {
 	return __cancel_work_timer(&dwork->work, &dwork->timer);
 }
@@ -2558,23 +2588,6 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
-/**
- * flush_delayed_work - block until a dwork_struct's callback has terminated
- * @dwork: the delayed work which is to be flushed
- *
- * Any timeout is cancelled, and any pending work is run immediately.
- */
-void flush_delayed_work(struct delayed_work *dwork)
-{
-	if (del_timer_sync(&dwork->timer)) {
-		__queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq,
-			     &dwork->work);
-		put_cpu();
-	}
-	flush_work(&dwork->work);
-}
-EXPORT_SYMBOL(flush_delayed_work);
-
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From 09383498c5d35262e643bfdbae84826177a3c624 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:48:29 +0200
Subject: workqueue: implement flush[_delayed]_work_sync()

Implement flush[_delayed]_work_sync().  These are flush functions
which also make sure no CPU is still executing the target work from
earlier queueing instances.  These are similar to
cancel[_delayed]_work_sync() except that the target work item is
flushed instead of cancelled.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 56 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index bb9b683ea6fa..e33ff4a91703 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -355,9 +355,11 @@ extern int keventd_up(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
+extern bool flush_work_sync(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool flush_delayed_work_sync(struct delayed_work *work);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 33d31d768706..19e4bc15ee99 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2435,6 +2435,41 @@ static bool wait_on_work(struct work_struct *work)
 	return ret;
 }
 
+/**
+ * flush_work_sync - wait until a work has finished execution
+ * @work: the work to flush
+ *
+ * Wait until @work has finished execution.  On return, it's
+ * guaranteed that all queueing instances of @work which happened
+ * before this function is called are finished.  In other words, if
+ * @work hasn't been requeued since this function was called, @work is
+ * guaranteed to be idle on return.
+ *
+ * RETURNS:
+ * %true if flush_work_sync() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_work_sync(struct work_struct *work)
+{
+	struct wq_barrier barr;
+	bool pending, waited;
+
+	/* we'll wait for executions separately, queue barr only if pending */
+	pending = start_flush_work(work, &barr, false);
+
+	/* wait for executions to finish */
+	waited = wait_on_work(work);
+
+	/* wait for the pending one */
+	if (pending) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
+
+	return pending || waited;
+}
+EXPORT_SYMBOL_GPL(flush_work_sync);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
@@ -2538,6 +2573,27 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
+/**
+ * flush_delayed_work_sync - wait for a dwork to finish
+ * @dwork: the delayed work to flush
+ *
+ * Delayed timer is cancelled and the pending work is queued for
+ * execution immediately.  Other than timer handling, its behavior
+ * is identical to flush_work_sync().
+ *
+ * RETURNS:
+ * %true if flush_work_sync() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_delayed_work_sync(struct delayed_work *dwork)
+{
+	if (del_timer_sync(&dwork->timer))
+		__queue_work(raw_smp_processor_id(),
+			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	return flush_work_sync(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work_sync);
+
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From 8f8f103d8466e627ecef7894248eb79407d9047c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 19 Sep 2010 11:24:02 -0700
Subject: net: reorder struct netdev_hw_addr

Move 'synced' and 'global_use' fields before 'refcount', to shrinks
struct netdev_hw_addr by 8 bytes (on 64bit arches).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ec17887a5bca..f7f1302138af 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -228,9 +228,9 @@ struct netdev_hw_addr {
 #define NETDEV_HW_ADDR_T_SLAVE		3
 #define NETDEV_HW_ADDR_T_UNICAST	4
 #define NETDEV_HW_ADDR_T_MULTICAST	5
-	int			refcount;
 	bool			synced;
 	bool			global_use;
+	int			refcount;
 	struct rcu_head		rcu_head;
 };
 
-- 
cgit v1.2.3


From cabdf8bf488bfa3b565360b9fa1322d2db7747eb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 20 Sep 2010 15:37:25 +0900
Subject: sh: pci: Move Renesas PCI IDs to a better place.

Previously these IDs were only used by one driver, so there was not much
need for having them generically defined. Now that this will no longer
hold true, move them over.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/drivers/pci/pci-sh7780.h | 6 ------
 include/linux/pci_ids.h          | 7 +++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 205dcbefe275..1742e2c9db7a 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -12,12 +12,6 @@
 #ifndef _PCI_SH7780_H_
 #define _PCI_SH7780_H_
 
-#define PCI_VENDOR_ID_RENESAS		0x1912
-#define PCI_DEVICE_ID_RENESAS_SH7781	0x0001
-#define PCI_DEVICE_ID_RENESAS_SH7780	0x0002
-#define PCI_DEVICE_ID_RENESAS_SH7763	0x0004
-#define PCI_DEVICE_ID_RENESAS_SH7785	0x0007
-
 /* SH7780 Control Registers */
 #define	PCIECR			0xFE000008
 #define PCIECR_ENBL		0x01
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..33a5d1c39729 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2260,6 +2260,13 @@
 
 #define PCI_VENDOR_ID_SILAN		0x1904
 
+#define PCI_VENDOR_ID_RENESAS		0x1912
+#define PCI_DEVICE_ID_RENESAS_SH7781	0x0001
+#define PCI_DEVICE_ID_RENESAS_SH7780	0x0002
+#define PCI_DEVICE_ID_RENESAS_SH7763	0x0004
+#define PCI_DEVICE_ID_RENESAS_SH7785	0x0007
+#define PCI_DEVICE_ID_RENESAS_SH7786	0x0010
+
 #define PCI_VENDOR_ID_TDI               0x192E
 #define PCI_DEVICE_ID_TDI_EHCI          0x0101
 
-- 
cgit v1.2.3


From 298c926c6d7f50d91d6acb76c33b83bab5b5bd5c Mon Sep 17 00:00:00 2001
From: Adrian Hoban <adrian.hoban@intel.com>
Date: Mon, 20 Sep 2010 16:05:12 +0800
Subject: crypto: cryptd - Adding the AEAD interface type support to cryptd

This patch adds AEAD support into the cryptd framework. Having AEAD
support in cryptd enables crypto drivers that use the AEAD
interface type (such as the patch for AEAD based RFC4106 AES-GCM
implementation using Intel New Instructions) to leverage cryptd for
asynchronous processing.

Signed-off-by: Adrian Hoban <adrian.hoban@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com>
Signed-off-by: Aidan O'Mahony <aidan.o.mahony@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cryptd.c         | 206 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/crypto/cryptd.h |  24 ++++++
 2 files changed, 227 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index ef71318976c7..e46d21ae26bc 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -3,6 +3,13 @@
  *
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
+ * Added AEAD support to cryptd.
+ *    Authors: Tadeusz Struk (tadeusz.struk@intel.com)
+ *             Adrian Hoban <adrian.hoban@intel.com>
+ *             Gabriele Paoloni <gabriele.paoloni@intel.com>
+ *             Aidan O'Mahony (aidan.o.mahony@intel.com)
+ *    Copyright (c) 2010, Intel Corporation.
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
@@ -12,6 +19,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/aead.h>
 #include <crypto/cryptd.h>
 #include <crypto/crypto_wq.h>
 #include <linux/err.h>
@@ -44,6 +52,11 @@ struct hashd_instance_ctx {
 	struct cryptd_queue *queue;
 };
 
+struct aead_instance_ctx {
+	struct crypto_aead_spawn aead_spawn;
+	struct cryptd_queue *queue;
+};
+
 struct cryptd_blkcipher_ctx {
 	struct crypto_blkcipher *child;
 };
@@ -61,6 +74,14 @@ struct cryptd_hash_request_ctx {
 	struct shash_desc desc;
 };
 
+struct cryptd_aead_ctx {
+	struct crypto_aead *child;
+};
+
+struct cryptd_aead_request_ctx {
+	crypto_completion_t complete;
+};
+
 static void cryptd_queue_worker(struct work_struct *work);
 
 static int cryptd_init_queue(struct cryptd_queue *queue,
@@ -601,6 +622,144 @@ out_put_alg:
 	return err;
 }
 
+static void cryptd_aead_crypt(struct aead_request *req,
+			struct crypto_aead *child,
+			int err,
+			int (*crypt)(struct aead_request *req))
+{
+	struct cryptd_aead_request_ctx *rctx;
+	rctx = aead_request_ctx(req);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+	aead_request_set_tfm(req, child);
+	err = crypt( req );
+	req->base.complete = rctx->complete;
+out:
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+}
+
+static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err)
+{
+	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
+	struct crypto_aead *child = ctx->child;
+	struct aead_request *req;
+
+	req = container_of(areq, struct aead_request, base);
+	cryptd_aead_crypt(req, child, err, crypto_aead_crt(child)->encrypt);
+}
+
+static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err)
+{
+	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm);
+	struct crypto_aead *child = ctx->child;
+	struct aead_request *req;
+
+	req = container_of(areq, struct aead_request, base);
+	cryptd_aead_crypt(req, child, err, crypto_aead_crt(child)->decrypt);
+}
+
+static int cryptd_aead_enqueue(struct aead_request *req,
+				    crypto_completion_t complete)
+{
+	struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm));
+
+	rctx->complete = req->base.complete;
+	req->base.complete = complete;
+	return cryptd_enqueue_request(queue, &req->base);
+}
+
+static int cryptd_aead_encrypt_enqueue(struct aead_request *req)
+{
+	return cryptd_aead_enqueue(req, cryptd_aead_encrypt );
+}
+
+static int cryptd_aead_decrypt_enqueue(struct aead_request *req)
+{
+	return cryptd_aead_enqueue(req, cryptd_aead_decrypt );
+}
+
+static int cryptd_aead_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+	struct aead_instance_ctx *ictx = crypto_instance_ctx(inst);
+	struct crypto_aead_spawn *spawn = &ictx->aead_spawn;
+	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aead *cipher;
+
+	cipher = crypto_spawn_aead(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	crypto_aead_set_flags(cipher, CRYPTO_TFM_REQ_MAY_SLEEP);
+	ctx->child = cipher;
+	tfm->crt_aead.reqsize = sizeof(struct cryptd_aead_request_ctx);
+	return 0;
+}
+
+static void cryptd_aead_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_aead(ctx->child);
+}
+
+static int cryptd_create_aead(struct crypto_template *tmpl,
+		              struct rtattr **tb,
+			      struct cryptd_queue *queue)
+{
+	struct aead_instance_ctx *ctx;
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD,
+				CRYPTO_ALG_TYPE_MASK);
+        if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	inst = cryptd_alloc_instance(alg, 0, sizeof(*ctx));
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	ctx = crypto_instance_ctx(inst);
+	ctx->queue = queue;
+
+	err = crypto_init_spawn(&ctx->aead_spawn.base, alg, inst,
+			CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	if (err)
+		goto out_free_inst;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+	inst->alg.cra_type = alg->cra_type;
+	inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
+	inst->alg.cra_init = cryptd_aead_init_tfm;
+	inst->alg.cra_exit = cryptd_aead_exit_tfm;
+	inst->alg.cra_aead.setkey      = alg->cra_aead.setkey;
+	inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
+	inst->alg.cra_aead.geniv       = alg->cra_aead.geniv;
+	inst->alg.cra_aead.ivsize      = alg->cra_aead.ivsize;
+	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+	inst->alg.cra_aead.encrypt     = cryptd_aead_encrypt_enqueue;
+	inst->alg.cra_aead.decrypt     = cryptd_aead_decrypt_enqueue;
+	inst->alg.cra_aead.givencrypt  = alg->cra_aead.givencrypt;
+	inst->alg.cra_aead.givdecrypt  = alg->cra_aead.givdecrypt;
+
+	err = crypto_register_instance(tmpl, inst);
+	if (err) {
+		crypto_drop_spawn(&ctx->aead_spawn.base);
+out_free_inst:
+		kfree(inst);
+	}
+out_put_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
 static struct cryptd_queue queue;
 
 static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -616,6 +775,8 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 		return cryptd_create_blkcipher(tmpl, tb, &queue);
 	case CRYPTO_ALG_TYPE_DIGEST:
 		return cryptd_create_hash(tmpl, tb, &queue);
+	case CRYPTO_ALG_TYPE_AEAD:
+		return cryptd_create_aead(tmpl, tb, &queue);
 	}
 
 	return -EINVAL;
@@ -625,16 +786,21 @@ static void cryptd_free(struct crypto_instance *inst)
 {
 	struct cryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
 	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
+	struct aead_instance_ctx *aead_ctx = crypto_instance_ctx(inst);
 
 	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AHASH:
 		crypto_drop_shash(&hctx->spawn);
 		kfree(ahash_instance(inst));
 		return;
+	case CRYPTO_ALG_TYPE_AEAD:
+		crypto_drop_spawn(&aead_ctx->aead_spawn.base);
+		kfree(inst);
+		return;
+	default:
+		crypto_drop_spawn(&ctx->spawn);
+		kfree(inst);
 	}
-
-	crypto_drop_spawn(&ctx->spawn);
-	kfree(inst);
 }
 
 static struct crypto_template cryptd_tmpl = {
@@ -724,6 +890,40 @@ void cryptd_free_ahash(struct cryptd_ahash *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ahash);
 
+struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
+						  u32 type, u32 mask)
+{
+	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_aead *tfm;
+
+	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+		return ERR_PTR(-EINVAL);
+	tfm = crypto_alloc_aead(cryptd_alg_name, type, mask);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_aead(tfm);
+		return ERR_PTR(-EINVAL);
+	}
+	return __cryptd_aead_cast(tfm);
+}
+EXPORT_SYMBOL_GPL(cryptd_alloc_aead);
+
+struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm)
+{
+	struct cryptd_aead_ctx *ctx;
+	ctx = crypto_aead_ctx(&tfm->base);
+	return ctx->child;
+}
+EXPORT_SYMBOL_GPL(cryptd_aead_child);
+
+void cryptd_free_aead(struct cryptd_aead *tfm)
+{
+	crypto_free_aead(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(cryptd_free_aead);
+
 static int __init cryptd_init(void)
 {
 	int err;
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 1c96b255017c..ba98918bbd9b 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -1,5 +1,12 @@
 /*
  * Software async crypto daemon
+ *
+ * Added AEAD support to cryptd.
+ *    Authors: Tadeusz Struk (tadeusz.struk@intel.com)
+ *             Adrian Hoban <adrian.hoban@intel.com>
+ *             Gabriele Paoloni <gabriele.paoloni@intel.com>
+ *             Aidan O'Mahony (aidan.o.mahony@intel.com)
+ *    Copyright (c) 2010, Intel Corporation.
  */
 
 #ifndef _CRYPTO_CRYPT_H
@@ -42,4 +49,21 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm);
 struct shash_desc *cryptd_shash_desc(struct ahash_request *req);
 void cryptd_free_ahash(struct cryptd_ahash *tfm);
 
+struct cryptd_aead {
+	struct crypto_aead base;
+};
+
+static inline struct cryptd_aead *__cryptd_aead_cast(
+	struct crypto_aead *tfm)
+{
+	return (struct cryptd_aead *)tfm;
+}
+
+struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
+					  u32 type, u32 mask);
+
+struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm);
+
+void cryptd_free_aead(struct cryptd_aead *tfm);
+
 #endif
-- 
cgit v1.2.3


From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Mon, 20 Sep 2010 11:11:38 -0700
Subject: xfrm: Allow different selector family in temporary state

The family parameter xfrm_state_find is used to find a state matching a
certain policy. This value is set to the template's family
(encap_family) right before xfrm_state_find is called.
The family parameter is however also used to construct a temporary state
in xfrm_state_find itself which is wrong for inter-family scenarios
because it produces a selector for the wrong family. Since this selector
is included in the xfrm_user_acquire structure, user space programs
misinterpret IPv6 addresses as IPv4 and vice versa.
This patch splits up the original init_tempsel function into a part that
initializes the selector respectively the props and id of the temporary
state, to allow for differing ip address families whithin the state.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/ipv4/xfrm4_state.c | 33 +++++++++++++++++++--------------
 net/ipv6/xfrm6_state.c | 33 +++++++++++++++++++--------------
 net/xfrm/xfrm_policy.c |  5 ++---
 net/xfrm/xfrm_state.c  | 45 +++++++++++++++++++++++++++------------------
 5 files changed, 69 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fc8f36dd0f5c..4f53532d4c2f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -298,8 +298,8 @@ struct xfrm_state_afinfo {
 	const struct xfrm_type	*type_map[IPPROTO_MAX];
 	struct xfrm_mode	*mode_map[XFRM_MODE_MAX];
 	int			(*init_flags)(struct xfrm_state *x);
-	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
-						struct xfrm_tmpl *tmpl,
+	void			(*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl);
+	void			(*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1ef1366a0a03..47947624eccc 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x)
 }
 
 static void
-__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+{
+	sel->daddr.a4 = fl->fl4_dst;
+	sel->saddr.a4 = fl->fl4_src;
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET;
+	sel->prefixlen_d = 32;
+	sel->prefixlen_s = 32;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	x->sel.daddr.a4 = fl->fl4_dst;
-	x->sel.saddr.a4 = fl->fl4_src;
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET;
-	x->sel.prefixlen_d = 32;
-	x->sel.prefixlen_s = 32;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
 	x->id = tmpl->id;
 	if (x->id.daddr.a4 == 0)
 		x->id.daddr.a4 = daddr->a4;
@@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
+	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f417b77fa0e1..a67575d472a3 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,23 +20,27 @@
 #include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 {
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
-	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET6;
-	x->sel.prefixlen_d = 128;
-	x->sel.prefixlen_s = 128;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
+	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src);
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
 		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
@@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.eth_proto		= htons(ETH_P_IPV6),
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
+	.init_temprop		= xfrm6_init_temprop,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b3ed7ad4933..cbab6e1a8c9c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
-			family = tmpl->encap_family;
-			if (xfrm_addr_any(local, family)) {
-				error = xfrm_get_saddr(net, &tmp, remote, family);
+			if (xfrm_addr_any(local, tmpl->encap_family)) {
+				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
 				if (error)
 					goto fail;
 				local = &tmp;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5208b12fbfb4..eb96ce52f178 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static int
-xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		  struct xfrm_tmpl *tmpl,
-		  xfrm_address_t *daddr, xfrm_address_t *saddr,
-		  unsigned short family)
+xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
+		    struct xfrm_tmpl *tmpl,
+		    xfrm_address_t *daddr, xfrm_address_t *saddr,
+		    unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	if (!afinfo)
 		return -1;
-	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	afinfo->init_tempsel(&x->sel, fl);
+
+	if (family != tmpl->encap_family) {
+		xfrm_state_put_afinfo(afinfo);
+		afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
+		if (!afinfo)
+			return -1;
+	}
+	afinfo->init_temprop(x, tmpl, daddr, saddr);
 	xfrm_state_put_afinfo(afinfo);
 	return 0;
 }
@@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	unsigned short encap_family = tmpl->encap_family;
 
 	to_put = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family);
+	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best)
 		goto found;
 
-	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 
@@ -829,7 +838,7 @@ found:
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
-					      tmpl->id.proto, family)) != NULL) {
+					      tmpl->id.proto, encap_family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
@@ -839,9 +848,9 @@ found:
 			error = -ENOMEM;
 			goto out;
 		}
-		/* Initialize temporary selector matching only
+		/* Initialize temporary state matching only
 		 * to current session. */
-		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
@@ -856,10 +865,10 @@ found:
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
 			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
-			h = xfrm_src_hash(net, daddr, saddr, family);
+			h = xfrm_src_hash(net, daddr, saddr, encap_family);
 			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 			if (x->id.spi) {
-				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
+				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-- 
cgit v1.2.3


From a18213d1d2a469956845b437f5d1d0401ab22e8b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 19 Sep 2010 20:08:00 +0200
Subject: dccp: Replace magic CCID-specific numbers by symbolic constants

The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but
instead for the CCID-specific options numbers are used.

This patch unifies the use of CCID-specific option numbers, by adding symbolic
names reflecting the definitions in RFC 4340, 10.3.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  6 ++++--
 net/dccp/options.c   | 13 +++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7434a8353e23..7187bd8a75f6 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -165,8 +165,10 @@ enum {
 	DCCPO_TIMESTAMP_ECHO = 42,
 	DCCPO_ELAPSED_TIME = 43,
 	DCCPO_MAX = 45,
-	DCCPO_MIN_CCID_SPECIFIC = 128,
-	DCCPO_MAX_CCID_SPECIFIC = 255,
+	DCCPO_MIN_RX_CCID_SPECIFIC = 128,	/* from sender to receiver */
+	DCCPO_MAX_RX_CCID_SPECIFIC = 191,
+	DCCPO_MIN_TX_CCID_SPECIFIC = 192,	/* from receiver to sender */
+	DCCPO_MAX_TX_CCID_SPECIFIC = 255,
 };
 /* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
 #define DCCP_SINGLE_OPT_MAXLEN	253
diff --git a/net/dccp/options.c b/net/dccp/options.c
index e4983e3d2616..92718511eac5 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		}
 
 		/*
-		 * CCID-Specific Options (from RFC 4340, sec. 10.3):
-		 *
-		 * Option numbers 128 through 191 are for options sent from the
-		 * HC-Sender to the HC-Receiver; option numbers 192 through 255
-		 * are for options sent from the HC-Receiver to	the HC-Sender.
-		 *
 		 * CCID-specific options are ignored during connection setup, as
 		 * negotiation may still be in progress (see RFC 4340, 10.3).
 		 * The same applies to Ack Vectors, as these depend on the CCID.
-		 *
 		 */
-		if (dreq != NULL && (opt >= 128 ||
+		if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
 		    opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
 			goto ignore_option;
 
@@ -226,12 +219,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
 				      dccp_role(sk), elapsed_time);
 			break;
-		case 128 ... 191:
+		case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
 			if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
 			break;
-		case 192 ... 255:
+		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
-- 
cgit v1.2.3


From 0af9e92e779602bdd6d4d19acf63b4802fab91b6 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 12 Sep 2010 14:04:03 +0200
Subject: intel-gtt: clean up gtt size reporting

Consolidate everything in intel-gtt.c and also kill the export
of intel_max_stolen.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    | 6 +++++-
 drivers/gpu/drm/i915/i915_dma.c | 1 -
 include/drm/intel-gtt.h         | 2 --
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 57dc50488d18..4f84063bccdd 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -41,7 +41,6 @@
 
 /* Max amount of stolen space, anything above will be returned to Linux */
 int intel_max_stolen = 32 * 1024 * 1024;
-EXPORT_SYMBOL(intel_max_stolen);
 
 static const struct aper_size_info_fixed intel_i810_sizes[] =
 {
@@ -756,6 +755,11 @@ static int intel_gtt_init(void)
 	intel_private.base.gtt_mappable_entries = intel_gtt_mappable_entries();
 	intel_private.base.gtt_total_entries = intel_gtt_total_entries();
 
+	dev_info(&intel_private.bridge_dev->dev,
+			"detected gtt size: %dK total, %dK mappable\n",
+			intel_private.base.gtt_total_entries * 4,
+			intel_private.base.gtt_mappable_entries * 4);
+
 	gtt_map_size = intel_private.base.gtt_total_entries * 4;
 
 	intel_private.gtt = ioremap(intel_private.gtt_bus_addr,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9977a0a5308a..dd7a0de7212c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1200,7 +1200,6 @@ static int i915_load_modeset_init(struct drm_device *dev,
 
 	/* Basic memrange allocator for stolen space (aka mm.vram) */
 	drm_mm_init(&dev_priv->mm.vram, 0, prealloc_size);
-	DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
 
 	/* We're off and running w/KMS */
 	dev_priv->mm.suspended = 0;
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index b3aa7ab72d09..d3c81946f613 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -2,8 +2,6 @@
 
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
-extern int intel_max_stolen; /* from AGP driver */
-
 struct intel_gtt {
 	/* Number of stolen gtt entries at the beginning. */
 	unsigned int gtt_stolen_entries;
-- 
cgit v1.2.3


From 8b8e2ec1eeca7f6941bc81cefc9663018d6ceb57 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 16 Sep 2010 19:21:28 +0200
Subject: percpu: Add {get,put}_cpu_ptr

These are similar to {get,put}_cpu_var() except for dynamically
allocated per-cpu memory.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <20100917093009.252867712@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/percpu.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..0eb50832aa00 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,6 +39,15 @@
 	preempt_enable();				\
 } while (0)
 
+#define get_cpu_ptr(var) ({				\
+	preempt_disable();				\
+	this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do {				\
+	(void)(var);					\
+	preempt_enable();				\
+} while (0)
+
 #ifdef CONFIG_SMP
 
 /* minimum unit size, also is the maximum supported allocation size */
-- 
cgit v1.2.3


From 24750f3e469bef81a96c0036cd4700df5fb48925 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@thabit.(none)>
Date: Tue, 24 Aug 2010 10:54:44 +0200
Subject: HID: Add a hid quirk for input sync override

As of lately, HID devices which send per-frame data split over several
HID reports have started to emerge. This patch adds a quirk which
allows the HID driver to take over the input layer synchronization,
and hence the control of the frame boundary.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 3 +++
 include/linux/hid.h     | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 6c03dcc5760a..b186f6d0feba 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -659,6 +659,9 @@ void hidinput_report_event(struct hid_device *hid, struct hid_report *report)
 {
 	struct hid_input *hidinput;
 
+	if (hid->quirks & HID_QUIRK_NO_INPUT_SYNC)
+		return;
+
 	list_for_each_entry(hidinput, &hid->inputs, list)
 		input_sync(hidinput->input);
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 42a0f1d11365..4cfe02c3fa4e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -316,6 +316,7 @@ struct hid_item {
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
 #define HID_QUIRK_NO_INIT_REPORTS		0x20000000
 #define HID_QUIRK_NO_IGNORE			0x40000000
+#define HID_QUIRK_NO_INPUT_SYNC			0x80000000
 
 /*
  * This is the global environment of the parser. This information is
-- 
cgit v1.2.3


From a8027073eb127cd207070891374b5c54c2ce3d23 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Sep 2010 15:13:34 -0400
Subject: tracing/sched: Add sched_pi_setprio tracepoint

Add a tracepoint that shows the priority of a task being boosted
via priority inheritance.

Cc: Gregory Haskins <ghaskins@novell.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 29 +++++++++++++++++++++++++++++
 kernel/sched.c               |  1 +
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9208c92aeab5..f6334782a593 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -362,6 +362,35 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+TRACE_EVENT(sched_pi_setprio,
+
+	TP_PROTO(struct task_struct *tsk, int newprio),
+
+	TP_ARGS(tsk, newprio),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( int,	oldprio			)
+		__field( int,	newprio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->oldprio	= tsk->prio;
+		__entry->newprio	= newprio;
+	),
+
+	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+			__entry->comm, __entry->pid,
+			__entry->oldprio, __entry->newprio)
+);
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched.c b/kernel/sched.c
index 9ca8ad05950b..4ad473814350 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4355,6 +4355,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
 	rq = task_rq_lock(p, &flags);
 
+	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
 	on_rq = p->se.on_rq;
-- 
cgit v1.2.3


From c1f9a095600e07fefe64eb94eb711f410100824a Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 16 Sep 2010 13:16:02 +0200
Subject: wl12xx: make wl12xx.h common to both spi and sdio

Move wl12xx.h outside of the spi-specific location,
so it can be shared with both spi and sdio solutions.

Update all users of spi/wl12xx.h accordingly

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Luciano Coelho <luciano.coelho@nokia.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 MAINTAINERS                                  |  2 +-
 arch/arm/mach-omap2/board-omap3pandora.c     |  1 +
 arch/arm/mach-omap2/board-rx51-peripherals.c |  2 +-
 drivers/net/wireless/wl12xx/wl1251_sdio.c    |  2 +-
 drivers/net/wireless/wl12xx/wl1251_spi.c     |  2 +-
 drivers/net/wireless/wl12xx/wl1271_spi.c     |  2 +-
 include/linux/spi/wl12xx.h                   | 34 ----------------------------
 include/linux/wl12xx.h                       | 34 ++++++++++++++++++++++++++++
 8 files changed, 40 insertions(+), 39 deletions(-)
 delete mode 100644 include/linux/spi/wl12xx.h
 create mode 100644 include/linux/wl12xx.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 80cea03a737e..e9aec08e575a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6324,7 +6324,7 @@ W:	http://wireless.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 S:	Maintained
 F:	drivers/net/wireless/wl12xx/wl1271*
-F:	include/linux/spi/wl12xx.h
+F:	include/linux/wl12xx.h
 
 WL3501 WIRELESS PCMCIA CARD DRIVER
 M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index c0f4f12eba54..9b62b6283c6e 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -25,6 +25,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/regulator/machine.h>
 #include <linux/i2c/twl.h>
+#include <linux/wl12xx.h>
 #include <linux/leds.h>
 #include <linux/input.h>
 #include <linux/input/matrix_keypad.h>
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 03483920ed6e..1cf994299cbe 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -14,7 +14,7 @@
 #include <linux/input.h>
 #include <linux/input/matrix_keypad.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/wl12xx.h>
+#include <linux/wl12xx.h>
 #include <linux/i2c.h>
 #include <linux/i2c/twl.h>
 #include <linux/clk.h>
diff --git a/drivers/net/wireless/wl12xx/wl1251_sdio.c b/drivers/net/wireless/wl12xx/wl1251_sdio.c
index c0b68b0a9aa8..74ba9ced5393 100644
--- a/drivers/net/wireless/wl12xx/wl1251_sdio.c
+++ b/drivers/net/wireless/wl12xx/wl1251_sdio.c
@@ -24,7 +24,7 @@
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
 #include <linux/platform_device.h>
-#include <linux/spi/wl12xx.h>
+#include <linux/wl12xx.h>
 #include <linux/irq.h>
 
 #include "wl1251.h"
diff --git a/drivers/net/wireless/wl12xx/wl1251_spi.c b/drivers/net/wireless/wl12xx/wl1251_spi.c
index 334ded9881c0..320de79667a6 100644
--- a/drivers/net/wireless/wl12xx/wl1251_spi.c
+++ b/drivers/net/wireless/wl12xx/wl1251_spi.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/crc7.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/wl12xx.h>
+#include <linux/wl12xx.h>
 
 #include "wl1251.h"
 #include "wl1251_reg.h"
diff --git a/drivers/net/wireless/wl12xx/wl1271_spi.c b/drivers/net/wireless/wl12xx/wl1271_spi.c
index 4cb99c541e2a..c3fdab75ad2a 100644
--- a/drivers/net/wireless/wl12xx/wl1271_spi.c
+++ b/drivers/net/wireless/wl12xx/wl1271_spi.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/crc7.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/wl12xx.h>
+#include <linux/wl12xx.h>
 #include <linux/slab.h>
 
 #include "wl1271.h"
diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h
deleted file mode 100644
index a20bccf0b5c2..000000000000
--- a/include/linux/spi/wl12xx.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * This file is part of wl12xx
- *
- * Copyright (C) 2009 Nokia Corporation
- *
- * Contact: Luciano Coelho <luciano.coelho@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#ifndef _LINUX_SPI_WL12XX_H
-#define _LINUX_SPI_WL12XX_H
-
-struct wl12xx_platform_data {
-	void (*set_power)(bool enable);
-	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
-	int irq;
-	bool use_eeprom;
-};
-
-#endif
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
new file mode 100644
index 000000000000..015687a1776d
--- /dev/null
+++ b/include/linux/wl12xx.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef _LINUX_WL12XX_H
+#define _LINUX_WL12XX_H
+
+struct wl12xx_platform_data {
+	void (*set_power)(bool enable);
+	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
+	int irq;
+	bool use_eeprom;
+};
+
+#endif
-- 
cgit v1.2.3


From 817f2c842d6c38acfd58d20d29ba583ec467ae35 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Mon, 20 Sep 2010 11:44:00 +0530
Subject: Fix various typos of valid in comments

Fix various typos of valid.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/char/pcmcia/cm4000_cs.c          | 2 +-
 drivers/media/video/zoran/zoran_driver.c | 2 +-
 drivers/s390/block/dasd_eckd.c           | 4 ++--
 fs/jfs/jfs_logmgr.c                      | 6 ++----
 fs/ocfs2/cluster/tcp_internal.h          | 2 +-
 include/linux/libata.h                   | 2 +-
 6 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index ec73d9f6d9ed..273be76be988 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1667,7 +1667,7 @@ static int cmm_open(struct inode *inode, struct file *filp)
 	/* opening will always block since the
 	 * monitor will be started by open, which
 	 * means we have to wait for ATR becoming
-	 * vaild = block until valid (or card
+	 * valid = block until valid (or card
 	 * inserted)
 	 */
 	if (filp->f_flags & O_NONBLOCK) {
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index 6f89d0a096ea..3c471a4e3e4a 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -1177,7 +1177,7 @@ static int setup_window(struct zoran_fh *fh, int x, int y, int width, int height
 	if (height > BUZ_MAX_HEIGHT)
 		height = BUZ_MAX_HEIGHT;
 
-	/* Check for vaild parameters */
+	/* Check for invalid parameters */
 	if (width < BUZ_MIN_WIDTH || height < BUZ_MIN_HEIGHT ||
 	    width > BUZ_MAX_WIDTH || height > BUZ_MAX_HEIGHT) {
 		dprintk(1,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 66360c24bd48..59b4ecfb967b 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1190,7 +1190,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 		goto out_err2;
 	}
 	/*
-	 * dasd_eckd_vaildate_server is done on the first device that
+	 * dasd_eckd_validate_server is done on the first device that
 	 * is found for an LCU. All later other devices have to wait
 	 * for it, so they will read the correct feature codes.
 	 */
@@ -1216,7 +1216,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 				"Read device characteristic failed, rc=%d", rc);
 		goto out_err3;
 	}
-	/* find the vaild cylinder size */
+	/* find the valid cylinder size */
 	if (private->rdc_data.no_cyl == LV_COMPAT_CYL &&
 	    private->rdc_data.long_no_cyl)
 		private->real_cyl = private->rdc_data.long_no_cyl;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index c51af2a14516..e1b8493b9aaa 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1010,15 +1010,13 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
 		 * option 2 - shutdown file systems
 		 *	      associated with log ?
 		 * option 3 - extend log ?
-		 */
-		/*
 		 * option 4 - second chance
 		 *
 		 * mark log wrapped, and continue.
 		 * when all active transactions are completed,
-		 * mark log vaild for recovery.
+		 * mark log valid for recovery.
 		 * if crashed during invalid state, log state
-		 * implies invald log, forcing fsck().
+		 * implies invalid log, forcing fsck().
 		 */
 		/* mark log state log wrap in log superblock */
 		/* log->state = LOGWRAP; */
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 96fa7ebc530c..15fdbdf9eb4b 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -129,7 +129,7 @@ struct o2net_node {
 
 struct o2net_sock_container {
 	struct kref		sc_kref;
-	/* the next two are vaild for the life time of the sc */
+	/* the next two are valid for the life time of the sc */
 	struct socket		*sc_sock;
 	struct o2nm_node	*sc_node;
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f010f18a0f86..9eee84b24a4c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -340,7 +340,7 @@ enum {
 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
 	ATA_EHI_PRINTINFO	= (1 << 18), /* print configuration info */
 	ATA_EHI_SETMODE		= (1 << 19), /* configure transfer mode */
-	ATA_EHI_POST_SETMODE	= (1 << 20), /* revaildating after setmode */
+	ATA_EHI_POST_SETMODE	= (1 << 20), /* revalidating after setmode */
 
 	ATA_EHI_DID_RESET	= ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET,
 
-- 
cgit v1.2.3


From 61ee7007a5d61aa066076da578e8e8084e122d7d Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 16 Sep 2010 01:31:12 +0200
Subject: wl12xx: add platform data passing support

Add a simple mechanism to pass platform data to the
SDIO instances of wl12xx.

This way there is no confusion over who owns the 'embedded data',
typechecking is preserved, and no possibility for the wrong driver to
pick up the data.

Originally proposed by Russell King.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Makefile                      |  2 ++
 drivers/net/wireless/wl12xx/Kconfig                |  5 +++-
 drivers/net/wireless/wl12xx/wl12xx_platform_data.c | 28 ++++++++++++++++++++++
 include/linux/wl12xx.h                             |  3 +++
 4 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/wireless/wl12xx/wl12xx_platform_data.c

(limited to 'include')

diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile
index 5d4ce4d2b32b..85af697574a6 100644
--- a/drivers/net/wireless/Makefile
+++ b/drivers/net/wireless/Makefile
@@ -50,5 +50,7 @@ obj-$(CONFIG_ATH_COMMON)	+= ath/
 obj-$(CONFIG_MAC80211_HWSIM)	+= mac80211_hwsim.o
 
 obj-$(CONFIG_WL12XX)	+= wl12xx/
+# small builtin driver bit
+obj-$(CONFIG_WL12XX_PLATFORM_DATA)	+= wl12xx/wl12xx_platform_data.o
 
 obj-$(CONFIG_IWM)	+= iwmc3200wifi/
diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig
index 2f98058be451..4a8bb25c1739 100644
--- a/drivers/net/wireless/wl12xx/Kconfig
+++ b/drivers/net/wireless/wl12xx/Kconfig
@@ -74,4 +74,7 @@ config WL1271_SDIO
 	  If you choose to build a module, it'll be called
 	  wl1271_sdio. Say N if unsure.
 
-
+config WL12XX_PLATFORM_DATA
+	bool
+	depends on WL1271_SDIO != n
+	default y
diff --git a/drivers/net/wireless/wl12xx/wl12xx_platform_data.c b/drivers/net/wireless/wl12xx/wl12xx_platform_data.c
new file mode 100644
index 000000000000..973b11060a8f
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/wl12xx_platform_data.c
@@ -0,0 +1,28 @@
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/wl12xx.h>
+
+static const struct wl12xx_platform_data *platform_data;
+
+int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
+{
+	if (platform_data)
+		return -EBUSY;
+	if (!data)
+		return -EINVAL;
+
+	platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!platform_data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+const struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+{
+	if (!platform_data)
+		return ERR_PTR(-ENODEV);
+
+	return platform_data;
+}
+EXPORT_SYMBOL(wl12xx_get_platform_data);
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 015687a1776d..bd70563107fa 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -31,4 +31,7 @@ struct wl12xx_platform_data {
 	bool use_eeprom;
 };
 
+int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
+const struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+
 #endif
-- 
cgit v1.2.3


From 15cea99306ae14ce5f7c3d3989bcc17202e2b0be Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 16 Sep 2010 01:31:51 +0200
Subject: wl1271: make ref_clock configurable by board

The wl1271 device is using a reference clock that may change
between board to board.

Make the ref_clock parameter configurable by board settings
instead of having a hard coded value in the sources.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/wl12xx/wl1271.h      |  1 +
 drivers/net/wireless/wl12xx/wl1271_boot.c | 11 +++++++----
 drivers/net/wireless/wl12xx/wl1271_boot.h |  1 -
 drivers/net/wireless/wl12xx/wl1271_sdio.c |  1 +
 drivers/net/wireless/wl12xx/wl1271_spi.c  |  2 ++
 include/linux/wl12xx.h                    |  1 +
 6 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/wl12xx/wl1271.h b/drivers/net/wireless/wl12xx/wl1271.h
index faa5925efe30..4134f4495b95 100644
--- a/drivers/net/wireless/wl12xx/wl1271.h
+++ b/drivers/net/wireless/wl12xx/wl1271.h
@@ -330,6 +330,7 @@ struct wl1271 {
 
 	void (*set_power)(bool enable);
 	int irq;
+	int ref_clock;
 
 	spinlock_t wl_lock;
 
diff --git a/drivers/net/wireless/wl12xx/wl1271_boot.c b/drivers/net/wireless/wl12xx/wl1271_boot.c
index f36430b0336d..fc21db810812 100644
--- a/drivers/net/wireless/wl12xx/wl1271_boot.c
+++ b/drivers/net/wireless/wl12xx/wl1271_boot.c
@@ -457,17 +457,20 @@ int wl1271_boot(struct wl1271 *wl)
 {
 	int ret = 0;
 	u32 tmp, clk, pause;
+	int ref_clock = wl->ref_clock;
 
 	wl1271_boot_hw_version(wl);
 
-	if (REF_CLOCK == 0 || REF_CLOCK == 2 || REF_CLOCK == 4)
+	if (ref_clock == 0 || ref_clock == 2 || ref_clock == 4)
 		/* ref clk: 19.2/38.4/38.4-XTAL */
 		clk = 0x3;
-	else if (REF_CLOCK == 1 || REF_CLOCK == 3)
+	else if (ref_clock == 1 || ref_clock == 3)
 		/* ref clk: 26/52 */
 		clk = 0x5;
+	else
+		return -EINVAL;
 
-	if (REF_CLOCK != 0) {
+	if (ref_clock != 0) {
 		u16 val;
 		/* Set clock type (open drain) */
 		val = wl1271_top_reg_read(wl, OCP_REG_CLK_TYPE);
@@ -516,7 +519,7 @@ int wl1271_boot(struct wl1271 *wl)
 	wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk);
 
 	/* 2 */
-	clk |= (REF_CLOCK << 1) << 4;
+	clk |= (ref_clock << 1) << 4;
 	wl1271_write32(wl, DRPW_SCRATCH_START, clk);
 
 	wl1271_set_partition(wl, &part_table[PART_WORK]);
diff --git a/drivers/net/wireless/wl12xx/wl1271_boot.h b/drivers/net/wireless/wl12xx/wl1271_boot.h
index f829699d597e..f73b0b15a280 100644
--- a/drivers/net/wireless/wl12xx/wl1271_boot.h
+++ b/drivers/net/wireless/wl12xx/wl1271_boot.h
@@ -46,7 +46,6 @@ struct wl1271_static_data {
 /* delay between retries */
 #define INIT_LOOP_DELAY 50
 
-#define REF_CLOCK            2
 #define WU_COUNTER_PAUSE_VAL 0x3FF
 #define WELP_ARM_COMMAND_VAL 0x4
 
diff --git a/drivers/net/wireless/wl12xx/wl1271_sdio.c b/drivers/net/wireless/wl12xx/wl1271_sdio.c
index 987ecdc9406d..f2f04663627c 100644
--- a/drivers/net/wireless/wl12xx/wl1271_sdio.c
+++ b/drivers/net/wireless/wl12xx/wl1271_sdio.c
@@ -234,6 +234,7 @@ static int __devinit wl1271_probe(struct sdio_func *func,
 	}
 
 	wl->irq = wlan_data->irq;
+	wl->ref_clock = wlan_data->board_ref_clock;
 
 	ret = request_irq(wl->irq, wl1271_irq, 0, DRIVER_NAME, wl);
 	if (ret < 0) {
diff --git a/drivers/net/wireless/wl12xx/wl1271_spi.c b/drivers/net/wireless/wl12xx/wl1271_spi.c
index de56d8d9ee66..ced0a9e2c7e1 100644
--- a/drivers/net/wireless/wl12xx/wl1271_spi.c
+++ b/drivers/net/wireless/wl12xx/wl1271_spi.c
@@ -372,6 +372,8 @@ static int __devinit wl1271_probe(struct spi_device *spi)
 		goto out_free;
 	}
 
+	wl->ref_clock = pdata->board_ref_clock;
+
 	wl->irq = spi->irq;
 	if (wl->irq < 0) {
 		wl1271_error("irq missing in platform data");
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index bd70563107fa..95deae3968f4 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -29,6 +29,7 @@ struct wl12xx_platform_data {
 	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
 	int irq;
 	bool use_eeprom;
+	int board_ref_clock;
 };
 
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
-- 
cgit v1.2.3


From f4bc17cdd205ebaa3807c2aa973719bb5ce6a5b2 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 21 Sep 2010 17:35:41 +0200
Subject: ipvs: netfilter connection tracking changes

	Add more code to IPVS to work with Netfilter connection
tracking and fix some problems.

- Allow IPVS to be compiled without connection tracking as in
2.6.35 and before. This can avoid keeping conntracks for all
IPVS connections because this costs memory. ip_vs_ftp still
depends on connection tracking and NAT as implemented for 2.6.36.

- Add sysctl var "conntrack" to enable connection tracking for
all IPVS connections. For loaded IPVS directors it needs
tuning of nf_conntrack_max limit.

- Add IP_VS_CONN_F_NFCT connection flag to request the connection
to use connection tracking. This allows user space to provide this
flag, for example, in dest->conn_flags. This can be useful to
request connection tracking per real server instead of forcing it
for all connections with the "conntrack" sysctl. This flag is
set currently only by ip_vs_ftp and of course by "conntrack" sysctl.

- Add ip_vs_nfct.c file to hold all connection tracking code,
by this way main code should not depend of netfilter conntrack
support.

- Return back the ip_vs_post_routing handler as in 2.6.35 and use
skb->ipvs_property=1 to allow IPVS to work without connection
tracking

Connection tracking:

- most of the code is already in 2.6.36-rc

- alter conntrack reply tuple for LVS-NAT connections when first packet
from client is forwarded and conntrack state is NEW or RELATED.
Additionally, alter reply for RELATED connections from real server,
again for packet in original direction.

- add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering
reply) for LVS-TUN early because we want to call nf_reset. It is
needed because we add IPIP header and the original conntrack
should be preserved, not destroyed. The transmitted IPIP packets
can reuse same conntrack, so we do not set skb->ipvs_property.

- try to destroy conntrack when the IPVS connection is destroyed.
It is not fatal if conntrack disappears before that, it depends
on the used timers.

Fix problems from long time:

- add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ip_vs.h           |   2 +
 include/net/ip_vs.h             |  44 +++++-
 net/netfilter/ipvs/Kconfig      |  13 +-
 net/netfilter/ipvs/Makefile     |   5 +-
 net/netfilter/ipvs/ip_vs_conn.c |  13 ++
 net/netfilter/ipvs/ip_vs_core.c |  46 ++++++-
 net/netfilter/ipvs/ip_vs_ctl.c  |  12 ++
 net/netfilter/ipvs/ip_vs_ftp.c  | 146 +-------------------
 net/netfilter/ipvs/ip_vs_nfct.c | 292 ++++++++++++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_xmit.c |  98 +++++++-------
 10 files changed, 475 insertions(+), 196 deletions(-)
 create mode 100644 net/netfilter/ipvs/ip_vs_nfct.c

(limited to 'include')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 003d75f6ffe1..df7728613720 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -90,10 +90,12 @@
 #define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
 /* Flags that are not sent to backup server start from bit 16 */
+#define IP_VS_CONN_F_NFCT	(1 << 16)	/* use netfilter conntrack */
 
 /* Connection flags from destination that can be changed by user space */
 #define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \
 				IP_VS_CONN_F_ONE_PACKET | \
+				IP_VS_CONN_F_NFCT | \
 				0)
 
 #define IP_VS_SCHEDNAME_MAXLEN	16
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 62698a9c1631..e8ec5231eae9 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -25,7 +25,9 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
 #include <net/ipv6.h>			/* for ipv6_addr_copy */
-
+#ifdef CONFIG_IP_VS_NFCT
+#include <net/netfilter/nf_conntrack.h>
+#endif
 
 /* Connections' size value needed by ip_vs_ctl.c */
 extern int ip_vs_conn_tab_size;
@@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn;
 extern int sysctl_ip_vs_expire_quiescent_template;
 extern int sysctl_ip_vs_sync_threshold[2];
 extern int sysctl_ip_vs_nat_icmp_send;
+extern int sysctl_ip_vs_conntrack;
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
 
@@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
+#ifdef CONFIG_IP_VS_NFCT
+/*
+ *      Netfilter connection tracking
+ *      (from ip_vs_nfct.c)
+ */
+static inline int ip_vs_conntrack_enabled(void)
+{
+	return sysctl_ip_vs_conntrack;
+}
+
 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
 				   int outin);
+extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
+extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
+				      struct ip_vs_conn *cp, u_int8_t proto,
+				      const __be16 port, int from_rs);
+extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
+
+#else
+
+static inline int ip_vs_conntrack_enabled(void)
+{
+	return 0;
+}
+
+static inline void ip_vs_update_conntrack(struct sk_buff *skb,
+					  struct ip_vs_conn *cp, int outin)
+{
+}
+
+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
+					  struct ip_vs_conn *cp)
+{
+	return NF_ACCEPT;
+}
+
+static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
+{
+}
+/* CONFIG_IP_VS_NFCT */
+#endif
 
 #endif /* __KERNEL__ */
 
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 46a77d5c3887..af3c9f48f2d7 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
 #
 menuconfig IP_VS
 	tristate "IP virtual server support"
-	depends on NET && INET && NETFILTER && NF_CONNTRACK
+	depends on NET && INET && NETFILTER
 	---help---
 	  IP Virtual Server support will let you build a high-performance
 	  virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'
 
 config	IP_VS_FTP
   	tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP && NF_NAT
+        depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
+	select IP_VS_NFCT
 	---help---
 	  FTP is a protocol that transfers IP address and/or port number in
 	  the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,12 @@ config	IP_VS_FTP
 	  If you want to compile it in kernel, say Y. To compile it as a
 	  module, choose M here. If unsure, say N.
 
+config	IP_VS_NFCT
+	bool "Netfilter connection tracking"
+	depends on NF_CONNTRACK
+	---help---
+	  The Netfilter connection tracking support allows the IPVS
+	  connection state to be exported to the Netfilter framework
+	  for filtering purposes.
+
 endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index e3baefd7066e..349fe8819b89 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
 
+ip_vs-extra_objs-y :=
+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
+
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
 		ip_vs_est.o ip_vs_proto.o 				   \
-		$(ip_vs_proto-objs-y)
+		$(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
 
 
 # IPVS core
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9fe1da7bcf16..a970d9691496 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data)
 		if (cp->control)
 			ip_vs_control_del(cp);
 
+		if (cp->flags & IP_VS_CONN_F_NFCT)
+			ip_vs_conn_drop_conntrack(cp);
+
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
@@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	if (unlikely(pp && atomic_read(&pp->appcnt)))
 		ip_vs_bind_app(cp, pp);
 
+	/*
+	 * Allow conntrack to be preserved. By default, conntrack
+	 * is created and destroyed for every packet.
+	 * Sometimes keeping conntrack can be useful for
+	 * IP_VS_CONN_F_ONE_PACKET too.
+	 */
+
+	if (ip_vs_conntrack_enabled())
+		cp->flags |= IP_VS_CONN_F_NFCT;
+
 	/* Hash it in the ip_vs_conn_tab finally */
 	ip_vs_conn_hash(cp);
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 319991d4d251..7fbc80d81fe8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -537,6 +537,23 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	return NF_DROP;
 }
 
+/*
+ * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
+ * chain and is used to avoid double NAT and confirmation when we do
+ * not want to keep the conntrack structure
+ */
+static unsigned int ip_vs_post_routing(unsigned int hooknum,
+				       struct sk_buff *skb,
+				       const struct net_device *in,
+				       const struct net_device *out,
+				       int (*okfn)(struct sk_buff *))
+{
+	if (!skb->ipvs_property)
+		return NF_ACCEPT;
+	/* The packet was sent from IPVS, exit this chain */
+	return NF_STOP;
+}
+
 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -695,7 +712,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	/* do the statistics and put it back */
 	ip_vs_out_stats(cp, skb);
 
-	skb->ipvs_property = 1;
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		skb->ipvs_property = 1;
+	else
+		ip_vs_update_conntrack(skb, cp, 0);
 	verdict = NF_ACCEPT;
 
 out:
@@ -928,17 +948,19 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-	ip_vs_update_conntrack(skb, cp, 0);
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		skb->ipvs_property = 1;
+	else
+		ip_vs_update_conntrack(skb, cp, 0);
 	ip_vs_conn_put(cp);
 
-	skb->ipvs_property = 1;
-
 	LeaveFunction(11);
 	return NF_ACCEPT;
 
 drop:
 	ip_vs_conn_put(cp);
 	kfree_skb(skb);
+	LeaveFunction(11);
 	return NF_STOLEN;
 }
 
@@ -1483,6 +1505,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP_PRI_NAT_SRC-1,
+	},
 #ifdef CONFIG_IP_VS_IPV6
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
 	 * or VS/NAT(change destination), so that filtering rules can be
@@ -1511,6 +1541,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
+	/* Before the netfilter connection tracking, exit from POST_ROUTING */
+	{
+		.hook		= ip_vs_post_routing,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum        = NF_INET_POST_ROUTING,
+		.priority       = NF_IP6_PRI_NAT_SRC-1,
+	},
 #endif
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7bd41d28080c..d2d842f292c6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -88,6 +88,9 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
 int sysctl_ip_vs_expire_quiescent_template = 0;
 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
 int sysctl_ip_vs_nat_icmp_send = 0;
+#ifdef CONFIG_IP_VS_NFCT
+int sysctl_ip_vs_conntrack;
+#endif
 
 
 #ifdef CONFIG_IP_VS_DEBUG
@@ -1580,6 +1583,15 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
+#ifdef CONFIG_IP_VS_NFCT
+	{
+		.procname	= "conntrack",
+		.data		= &sysctl_ip_vs_conntrack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{
 		.procname	= "secure_tcp",
 		.data		= &sysctl_ip_vs_secure_tcp,
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b76d9e..9cd375f94d61 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,17 +20,6 @@
  *
  * Author:	Wouter Gadeyne
  *
- *
- * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
- * http://www.ssi.bg/~ja/nfct/:
- *
- * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
- *
- * Portions Copyright (C) 2001-2002
- * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
- *
- * Portions Copyright (C) 2003-2008
- * Julian Anastasov
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -58,16 +47,6 @@
 #define SERVER_STRING "227 Entering Passive Mode ("
 #define CLIENT_STRING "PORT "
 
-#define FMT_TUPLE	"%pI4:%u->%pI4:%u/%u"
-#define ARG_TUPLE(T)	&(T)->src.u3.ip, ntohs((T)->src.u.all), \
-			&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
-			(T)->dst.protonum
-
-#define FMT_CONN	"%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
-#define ARG_CONN(C)	&((C)->caddr.ip), ntohs((C)->cport), \
-			&((C)->vaddr.ip), ntohs((C)->vport), \
-			&((C)->daddr.ip), ntohs((C)->dport), \
-			(C)->protocol, (C)->state
 
 /*
  * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv;
 static int
 ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
 {
+	/* We use connection tracking for the command connection */
+	cp->flags |= IP_VS_CONN_F_NFCT;
 	return 0;
 }
 
@@ -148,120 +129,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
 	return 1;
 }
 
-/*
- * Called from init_conntrack() as expectfn handler.
- */
-static void
-ip_vs_expect_callback(struct nf_conn *ct,
-		      struct nf_conntrack_expect *exp)
-{
-	struct nf_conntrack_tuple *orig, new_reply;
-	struct ip_vs_conn *cp;
-
-	if (exp->tuple.src.l3num != PF_INET)
-		return;
-
-	/*
-	 * We assume that no NF locks are held before this callback.
-	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
-	 * expectations even if they use wildcard values, now we provide the
-	 * actual values from the newly created original conntrack direction.
-	 * The conntrack is confirmed when packet reaches IPVS hooks.
-	 */
-
-	/* RS->CLIENT */
-	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
-				&orig->src.u3, orig->src.u.tcp.port,
-				&orig->dst.u3, orig->dst.u.tcp.port);
-	if (cp) {
-		/* Change reply CLIENT->RS to CLIENT->VS */
-		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
-			  FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
-			  __func__, ct, ct->status,
-			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
-			  ARG_CONN(cp));
-		new_reply.dst.u3 = cp->vaddr;
-		new_reply.dst.u.tcp.port = cp->vport;
-		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
-			  ", inout cp=" FMT_CONN "\n",
-			  __func__, ct,
-			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
-			  ARG_CONN(cp));
-		goto alter;
-	}
-
-	/* CLIENT->VS */
-	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
-			       &orig->src.u3, orig->src.u.tcp.port,
-			       &orig->dst.u3, orig->dst.u.tcp.port);
-	if (cp) {
-		/* Change reply VS->CLIENT to RS->CLIENT */
-		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
-			  FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
-			  __func__, ct, ct->status,
-			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
-			  ARG_CONN(cp));
-		new_reply.src.u3 = cp->daddr;
-		new_reply.src.u.tcp.port = cp->dport;
-		IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
-			  FMT_TUPLE ", outin cp=" FMT_CONN "\n",
-			  __func__, ct,
-			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
-			  ARG_CONN(cp));
-		goto alter;
-	}
-
-	IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
-		  " - unknown expect\n",
-		  __func__, ct, ct->status, ARG_TUPLE(orig));
-	return;
-
-alter:
-	/* Never alter conntrack for non-NAT conns */
-	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
-		nf_conntrack_alter_reply(ct, &new_reply);
-	ip_vs_conn_put(cp);
-	return;
-}
-
-/*
- * Create NF conntrack expectation with wildcard (optional) source port.
- * Then the default callback function will alter the reply and will confirm
- * the conntrack entry when the first packet comes.
- */
-static void
-ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
-		     struct ip_vs_conn *cp, u_int8_t proto,
-		     const __be16 *port, int from_rs)
-{
-	struct nf_conntrack_expect *exp;
-
-	BUG_ON(!ct || ct == &nf_conntrack_untracked);
-
-	exp = nf_ct_expect_alloc(ct);
-	if (!exp)
-		return;
-
-	if (from_rs)
-		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
-				  nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
-				  proto, port, &cp->cport);
-	else
-		nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
-				  nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
-				  proto, port, &cp->vport);
-
-	exp->expectfn = ip_vs_expect_callback;
-
-	IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
-		  __func__, ct, ARG_TUPLE(&exp->tuple));
-	nf_ct_expect_related(exp);
-	nf_ct_expect_put(exp);
-}
-
 /*
  * Look at outgoing ftp packets to catch the response to a PASV command
  * from the server (inside-to-outside).
@@ -335,7 +202,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 					      &cp->caddr, 0,
 					      &cp->vaddr, port,
 					      &from, port,
-					      IP_VS_CONN_F_NO_CPORT,
+					      IP_VS_CONN_F_NO_CPORT |
+					      IP_VS_CONN_F_NFCT,
 					      cp->dest);
 			if (!n_cp)
 				return 0;
@@ -371,8 +239,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 						       start-data, end-start,
 						       buf, buf_len);
 			if (ret)
-				ip_vs_expect_related(skb, ct, n_cp,
-						     IPPROTO_TCP, NULL, 0);
+				ip_vs_nfct_expect_related(skb, ct, n_cp,
+							  IPPROTO_TCP, 0, 0);
 		}
 
 		/*
@@ -487,7 +355,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 				      &to, port,
 				      &cp->vaddr, htons(ntohs(cp->vport)-1),
 				      &cp->daddr, htons(ntohs(cp->dport)-1),
-				      0,
+				      IP_VS_CONN_F_NFCT,
 				      cp->dest);
 		if (!n_cp)
 			return 0;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000000..c038458d0290
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,292 @@
+/*
+ * ip_vs_nfct.c:	Netfilter connection tracking support for IPVS
+ *
+ * Portions Copyright (C) 2001-2002
+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
+ *
+ * Portions Copyright (C) 2003-2010
+ * Julian Anastasov
+ *
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * Authors:
+ * Ben North <ben@redfrontdoor.org>
+ * Julian Anastasov <ja@ssi.bg>		Reorganize and sync with latest kernels
+ * Hannes Eder <heder@google.com>	Extend NFCT support for FTP, ipvs match
+ *
+ *
+ * Current status:
+ *
+ * - provide conntrack confirmation for new and related connections, by
+ * this way we can see their proper conntrack state in all hooks
+ * - support for all forwarding methods, not only NAT
+ * - FTP support (NAT), ability to support other NAT apps with expectations
+ * - to correctly create expectations for related NAT connections the proper
+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
+ * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
+ * NAT rules are needed)
+ * - alter reply for NAT when forwarding packet in original direction:
+ * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
+ * when RELATED conntrack is created from real server (Active FTP DATA)
+ * - if iptables_nat is not loaded the Passive FTP will not work (the
+ * PASV response can not be NAT-ed) but Active FTP should work
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/ip_vs.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+
+#define FMT_TUPLE	"%pI4:%u->%pI4:%u/%u"
+#define ARG_TUPLE(T)	&(T)->src.u3.ip, ntohs((T)->src.u.all), \
+			&(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
+			(T)->dst.protonum
+
+#define FMT_CONN	"%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
+#define ARG_CONN(C)	&((C)->caddr.ip), ntohs((C)->cport), \
+			&((C)->vaddr.ip), ntohs((C)->vport), \
+			&((C)->daddr.ip), ntohs((C)->dport), \
+			(C)->protocol, (C)->state
+
+void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conntrack_tuple new_tuple;
+
+	if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
+	    nf_ct_is_dying(ct))
+		return;
+
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return;
+
+	/* Alter reply only in original direction */
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		return;
+
+	/*
+	 * The connection is not yet in the hashtable, so we update it.
+	 * CIP->VIP will remain the same, so leave the tuple in
+	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
+	 * real-server we will see RIP->DIP.
+	 */
+	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+	/*
+	 * This will also take care of UDP and other protocols.
+	 */
+	if (outin) {
+		new_tuple.src.u3 = cp->daddr;
+		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
+		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
+			new_tuple.src.u.tcp.port = cp->dport;
+	} else {
+		new_tuple.dst.u3 = cp->vaddr;
+		if (new_tuple.dst.protonum != IPPROTO_ICMP &&
+		    new_tuple.dst.protonum != IPPROTO_ICMPV6)
+			new_tuple.dst.u.tcp.port = cp->vport;
+	}
+	IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
+		  "ctinfo=%d, old reply=" FMT_TUPLE
+		  ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
+		  __func__, ct, ct->status, ctinfo,
+		  ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
+		  ARG_TUPLE(&new_tuple), ARG_CONN(cp));
+	nf_conntrack_alter_reply(ct, &new_tuple);
+}
+
+int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+	return nf_conntrack_confirm(skb);
+}
+
+/*
+ * Called from init_conntrack() as expectfn handler.
+ */
+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
+	struct nf_conntrack_expect *exp)
+{
+	struct nf_conntrack_tuple *orig, new_reply;
+	struct ip_vs_conn *cp;
+
+	if (exp->tuple.src.l3num != PF_INET)
+		return;
+
+	/*
+	 * We assume that no NF locks are held before this callback.
+	 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
+	 * expectations even if they use wildcard values, now we provide the
+	 * actual values from the newly created original conntrack direction.
+	 * The conntrack is confirmed when packet reaches IPVS hooks.
+	 */
+
+	/* RS->CLIENT */
+	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
+				&orig->src.u3, orig->src.u.tcp.port,
+				&orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply CLIENT->RS to CLIENT->VS */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.dst.u3 = cp->vaddr;
+		new_reply.dst.u.tcp.port = cp->vport;
+		IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
+			  ", inout cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	/* CLIENT->VS */
+	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
+			       &orig->src.u3, orig->src.u.tcp.port,
+			       &orig->dst.u3, orig->dst.u.tcp.port);
+	if (cp) {
+		/* Change reply VS->CLIENT to RS->CLIENT */
+		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
+			  __func__, ct, ct->status,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		new_reply.src.u3 = cp->daddr;
+		new_reply.src.u.tcp.port = cp->dport;
+		IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
+			  FMT_TUPLE ", outin cp=" FMT_CONN "\n",
+			  __func__, ct,
+			  ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
+			  ARG_CONN(cp));
+		goto alter;
+	}
+
+	IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
+		  " - unknown expect\n",
+		  __func__, ct, ct->status, ARG_TUPLE(orig));
+	return;
+
+alter:
+	/* Never alter conntrack for non-NAT conns */
+	if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
+		nf_conntrack_alter_reply(ct, &new_reply);
+	ip_vs_conn_put(cp);
+	return;
+}
+
+/*
+ * Create NF conntrack expectation with wildcard (optional) source port.
+ * Then the default callback function will alter the reply and will confirm
+ * the conntrack entry when the first packet comes.
+ * Use port 0 to expect connection from any port.
+ */
+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
+			       struct ip_vs_conn *cp, u_int8_t proto,
+			       const __be16 port, int from_rs)
+{
+	struct nf_conntrack_expect *exp;
+
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (!exp)
+		return;
+
+	nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
+			from_rs ? &cp->daddr : &cp->caddr,
+			from_rs ? &cp->caddr : &cp->vaddr,
+			proto, port ? &port : NULL,
+			from_rs ? &cp->cport : &cp->vport);
+
+	exp->expectfn = ip_vs_nfct_expect_callback;
+
+	IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
+		__func__, ct, ARG_TUPLE(&exp->tuple));
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
+}
+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
+
+/*
+ * Our connection was terminated, try to drop the conntrack immediately
+ */
+void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct nf_conntrack_tuple tuple;
+
+	if (!cp->cport)
+		return;
+
+	tuple = (struct nf_conntrack_tuple) {
+		.dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
+	tuple.src.u3 = cp->caddr;
+	tuple.src.u.all = cp->cport;
+	tuple.src.l3num = cp->af;
+	tuple.dst.u3 = cp->vaddr;
+	tuple.dst.u.all = cp->vport;
+
+	IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
+		" for conn " FMT_CONN "\n",
+		__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
+
+	h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+	if (h) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		/* Show what happens instead of calling nf_ct_kill() */
+		if (del_timer(&ct->timeout)) {
+			IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+				FMT_TUPLE "\n",
+				__func__, ct, ARG_TUPLE(&tuple));
+			if (ct->timeout.function)
+				ct->timeout.function(ct->timeout.data);
+		} else {
+			IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
+				FMT_TUPLE "\n",
+				__func__, ct, ARG_TUPLE(&tuple));
+		}
+		nf_ct_put(ct);
+	} else {
+		IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
+			__func__, ARG_TUPLE(&tuple));
+	}
+}
+
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 49df6bea6a2d..8817afa34e6a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -28,7 +28,6 @@
 #include <net/ip6_route.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip_vs.h>
@@ -194,12 +193,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 	dst_release(old_dst);
 }
 
-#define IP_VS_XMIT(pf, skb, rt)				\
+#define IP_VS_XMIT_TUNNEL(skb, cp)				\
+({								\
+	int __ret = NF_ACCEPT;					\
+								\
+	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
+		__ret = ip_vs_confirm_conntrack(skb, cp);	\
+	if (__ret == NF_ACCEPT) {				\
+		nf_reset(skb);					\
+		(skb)->ip_summed = CHECKSUM_NONE;		\
+	}							\
+	__ret;							\
+})
+
+#define IP_VS_XMIT_NAT(pf, skb, cp)				\
 do {							\
-	(skb)->ipvs_property = 1;			\
+	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
+		(skb)->ipvs_property = 1;		\
+	else						\
+		ip_vs_update_conntrack(skb, cp, 1);	\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->dst.dev, dst_output);		\
+		skb_dst(skb)->dev, dst_output);		\
+} while (0)
+
+#define IP_VS_XMIT(pf, skb, cp)				\
+do {							\
+	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
+		(skb)->ipvs_property = 1;		\
+	skb_forward_csum(skb);				\
+	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
+		skb_dst(skb)->dev, dst_output);		\
 } while (0)
 
 
@@ -271,7 +295,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -335,7 +359,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -349,36 +373,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 }
 #endif
 
-void
-ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
-{
-	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
-	struct nf_conntrack_tuple new_tuple;
-
-	if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
-		return;
-
-	/*
-	 * The connection is not yet in the hashtable, so we update it.
-	 * CIP->VIP will remain the same, so leave the tuple in
-	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
-	 * real-server we will see RIP->DIP.
-	 */
-	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	if (outin)
-		new_tuple.src.u3 = cp->daddr;
-	else
-		new_tuple.dst.u3 = cp->vaddr;
-	/*
-	 * This will also take care of UDP and other protocols.
-	 */
-	if (outin)
-		new_tuple.src.u.tcp.port = cp->dport;
-	else
-		new_tuple.dst.u.tcp.port = cp->vport;
-	nf_conntrack_alter_reply(ct, &new_tuple);
-}
-
 /*
  *      NAT transmitter (only for outside-to-inside nat forwarding)
  *      Not used for related ICMP
@@ -434,8 +428,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp, 1);
-
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
 	   MTU problem. */
@@ -443,7 +435,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
+	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -451,8 +443,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
   tx_error_icmp:
 	dst_link_failure(skb);
   tx_error:
-	LeaveFunction(10);
 	kfree_skb(skb);
+	LeaveFunction(10);
 	return NF_STOLEN;
   tx_error_put:
 	ip_rt_put(rt);
@@ -512,8 +504,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
-	ip_vs_update_conntrack(skb, cp, 1);
-
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
 	   MTU problem. */
@@ -521,7 +511,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
+	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -571,6 +561,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	int    mtu;
+	int ret;
 
 	EnterFunction(10);
 
@@ -655,7 +646,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	ip_local_out(skb);
+	ret = IP_VS_XMIT_TUNNEL(skb, cp);
+	if (ret == NF_ACCEPT)
+		ip_local_out(skb);
+	else if (ret == NF_DROP)
+		kfree_skb(skb);
 
 	LeaveFunction(10);
 
@@ -681,6 +676,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct ipv6hdr  *iph;		/* Our new IP header */
 	unsigned int max_headroom;	/* The extra header space needed */
 	int    mtu;
+	int ret;
 
 	EnterFunction(10);
 
@@ -761,7 +757,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	ip6_local_out(skb);
+	ret = IP_VS_XMIT_TUNNEL(skb, cp);
+	if (ret == NF_ACCEPT)
+		ip6_local_out(skb);
+	else if (ret == NF_DROP)
+		kfree_skb(skb);
 
 	LeaveFunction(10);
 
@@ -820,7 +820,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -873,7 +873,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -947,7 +947,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
 
 	rc = NF_STOLEN;
 	goto out;
@@ -1022,7 +1022,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
 
 	rc = NF_STOLEN;
 	goto out;
-- 
cgit v1.2.3


From 8a8030407f55a6aaedb51167c1a2383311fcd707 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 21 Sep 2010 17:38:57 +0200
Subject: ipvs: make rerouting optional with snat_reroute

	Add new sysctl flag "snat_reroute". Recent kernels use
ip_route_me_harder() to route LVS-NAT responses properly by
VIP when there are multiple paths to client. But setups
that do not have alternative default routes can skip this
routing lookup by using snat_reroute=0.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ip_vs.h             |  1 +
 net/netfilter/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++++++--------
 net/netfilter/ipvs/ip_vs_ctl.c  |  8 ++++++++
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e8ec5231eae9..3915a4f4cd30 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -801,6 +801,7 @@ extern int sysctl_ip_vs_expire_quiescent_template;
 extern int sysctl_ip_vs_sync_threshold[2];
 extern int sysctl_ip_vs_nat_icmp_send;
 extern int sysctl_ip_vs_conntrack;
+extern int sysctl_ip_vs_snat_reroute;
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7fbc80d81fe8..06c388bf4e33 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -929,20 +929,31 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		ip_send_check(ip_hdr(skb));
 	}
 
+	/*
+	 * nf_iterate does not expect change in the skb->dst->dev.
+	 * It looks like it is not fatal to enable this code for hooks
+	 * where our handlers are at the end of the chain list and
+	 * when all next handlers use skb->dst->dev and not outdev.
+	 * It will definitely route properly the inout NAT traffic
+	 * when multiple paths are used.
+	 */
+
 	/* For policy routing, packets originating from this
 	 * machine itself may be routed differently to packets
 	 * passing through.  We want this packet to be routed as
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
+	if (sysctl_ip_vs_snat_reroute) {
 #ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ip6_route_me_harder(skb) != 0)
-			goto drop;
-	} else
+		if (af == AF_INET6) {
+			if (ip6_route_me_harder(skb) != 0)
+				goto drop;
+		} else
 #endif
-		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto drop;
+			if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+				goto drop;
+	}
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
@@ -991,8 +1002,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related, verdict = ip_vs_out_icmp_v6(skb, &related);
 
-			if (related)
+			if (related) {
+				if (sysctl_ip_vs_snat_reroute &&
+					NF_ACCEPT == verdict &&
+					ip6_route_me_harder(skb))
+					verdict = NF_DROP;
 				return verdict;
+			}
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 	} else
@@ -1000,8 +1016,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
 			int related, verdict = ip_vs_out_icmp(skb, &related);
 
-			if (related)
+			if (related) {
+				if (sysctl_ip_vs_snat_reroute &&
+					NF_ACCEPT == verdict &&
+					ip_route_me_harder(skb, RTN_LOCAL))
+					verdict = NF_DROP;
 				return verdict;
+			}
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d2d842f292c6..e637cd0384b1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -91,6 +91,7 @@ int sysctl_ip_vs_nat_icmp_send = 0;
 #ifdef CONFIG_IP_VS_NFCT
 int sysctl_ip_vs_conntrack;
 #endif
+int sysctl_ip_vs_snat_reroute = 1;
 
 
 #ifdef CONFIG_IP_VS_DEBUG
@@ -1599,6 +1600,13 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
+	{
+		.procname	= "snat_reroute",
+		.data		= &sysctl_ip_vs_snat_reroute,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #if 0
 	{
 		.procname	= "timeout_established",
-- 
cgit v1.2.3


From b4687da7fc5f741af7fee9b0248a2cf2ad9c4478 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 21 Sep 2010 16:55:48 -0400
Subject: SUNRPC: Refactor logic to NUL-terminate strings in pages

Clean up: Introduce a helper to '\0'-terminate XDR strings
that are placed in a page in the page cache.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c           |  6 +-----
 fs/nfs/nfs3xdr.c           |  6 +-----
 fs/nfs/nfs4xdr.c           |  5 +----
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 17 +++++++++++++++++
 5 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e15bc0306d0c..79c74387a2fe 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -596,7 +596,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	struct kvec *iov = rcvbuf->head;
 	size_t hdrlen;
 	u32 len, recvd;
-	char	*kaddr;
 	int	status;
 
 	if ((status = ntohl(*p++)))
@@ -623,10 +622,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 		return -EIO;
 	}
 
-	/* NULL terminate the string we got */
-	kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-	kaddr[len+rcvbuf->page_base] = '\0';
-	kunmap_atomic(kaddr, KM_USER0);
+	xdr_terminate_string(rcvbuf, len);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 89c40f8ec6e6..52b2fda66e63 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -824,7 +824,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	struct kvec *iov = rcvbuf->head;
 	size_t hdrlen;
 	u32 len, recvd;
-	char	*kaddr;
 	int	status;
 
 	status = ntohl(*p++);
@@ -857,10 +856,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 		return -EIO;
 	}
 
-	/* NULL terminate the string we got */
-	kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-	kaddr[len+rcvbuf->page_base] = '\0';
-	kunmap_atomic(kaddr, KM_USER0);
+	xdr_terminate_string(rcvbuf, len);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b0bd7efe8100..86ab69eb149c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4299,7 +4299,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
 	size_t hdrlen;
 	u32 len, recvd;
 	__be32 *p;
-	char *kaddr;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_READLINK);
@@ -4330,9 +4329,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
 	 * and and null-terminate the text (the VFS expects
 	 * null-termination).
 	 */
-	kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-	kaddr[len+rcvbuf->page_base] = '\0';
-	kunmap_atomic(kaddr, KM_USER0);
+	xdr_terminate_string(rcvbuf, len);
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 35cf2e8cd7c6..8c1dcbb54d89 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -108,6 +108,7 @@ void	xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int,
 			 unsigned int);
 void	xdr_inline_pages(struct xdr_buf *, unsigned int,
 			 struct page **, unsigned int, unsigned int);
+void	xdr_terminate_string(struct xdr_buf *, const u32);
 
 static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
 {
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 3bbef7f5f826..e0725d9d8107 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
 }
 EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
 
+/**
+ * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
+ * @buf: XDR buffer where string resides
+ * @len: length of string, in bytes
+ *
+ */
+void
+xdr_terminate_string(struct xdr_buf *buf, const u32 len)
+{
+	char *kaddr;
+
+	kaddr = kmap_atomic(buf->pages[0], KM_USER0);
+	kaddr[buf->page_base + len] = '\0';
+	kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(xdr_terminate_string);
+
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
 		 unsigned int len)
-- 
cgit v1.2.3


From 1ebede86b8abbcf8833830e18e05391758cf2f28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 17:04:07 +1000
Subject: sunrpc: close connection when a request is irretrievably lost.

If we drop a request in the sunrpc layer, either due kmalloc failure,
or due to a cache miss when we could not queue the request for later
replay, then close the connection to encourage the client to retry sooner.

Note that if the drop happens in the NFS layer, NFSERR_JUKEBOX
(aka NFS4ERR_DELAY) is returned to guide the client concerning
replay.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h    | 10 +++++++---
 net/sunrpc/auth_gss/svcauth_gss.c | 12 ++++++------
 net/sunrpc/svc.c                  |  3 +++
 net/sunrpc/svcauth_unix.c         | 11 ++++++++---
 4 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index d39dbdc7b10f..11266935e2d6 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -108,9 +108,13 @@ struct auth_ops {
 #define	SVC_NEGATIVE	4
 #define	SVC_OK		5
 #define	SVC_DROP	6
-#define	SVC_DENIED	7
-#define	SVC_PENDING	8
-#define	SVC_COMPLETE	9
+#define	SVC_CLOSE	7	/* Like SVC_DROP, but request is definitely
+				 * lost so if there is a tcp connection, it
+				 * should be closed
+				 */
+#define	SVC_DENIED	8
+#define	SVC_PENDING	9
+#define	SVC_COMPLETE	10
 
 
 extern int	svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..ed005af3ef5d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	if (rqstp->rq_gssclient == NULL)
 		return SVC_DENIED;
 	stat = svcauth_unix_set_client(rqstp);
-	if (stat == SVC_DROP)
+	if (stat == SVC_DROP || stat == SVC_CLOSE)
 		return stat;
 	return SVC_OK;
 }
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
 		return SVC_DENIED;
 	memset(&rsikey, 0, sizeof(rsikey));
 	if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
-		return SVC_DROP;
+		return SVC_CLOSE;
 	*authp = rpc_autherr_badverf;
 	if (svc_safe_getnetobj(argv, &tmpobj)) {
 		kfree(rsikey.in_handle.data);
@@ -1026,22 +1026,22 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
 	}
 	if (dup_netobj(&rsikey.in_token, &tmpobj)) {
 		kfree(rsikey.in_handle.data);
-		return SVC_DROP;
+		return SVC_CLOSE;
 	}
 
 	/* Perform upcall, or find upcall result: */
 	rsip = rsi_lookup(&rsikey);
 	rsi_free(&rsikey);
 	if (!rsip)
-		return SVC_DROP;
+		return SVC_CLOSE;
 	switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
 	case -EAGAIN:
 	case -ETIMEDOUT:
 	case -ENOENT:
 		/* No upcall result: */
-		return SVC_DROP;
+		return SVC_CLOSE;
 	case 0:
-		ret = SVC_DROP;
+		ret = SVC_CLOSE;
 		/* Got an answer to the upcall; use it: */
 		if (gss_write_init_verf(rqstp, rsip))
 			goto out;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		goto err_bad;
 	case SVC_DENIED:
 		goto err_bad_auth;
+	case SVC_CLOSE:
+		if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+			svc_close_xprt(rqstp->rq_xprt);
 	case SVC_DROP:
 		goto dropit;
 	case SVC_COMPLETE:
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..e91b550bc836 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -674,6 +674,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
 	switch (ret) {
 	case -ENOENT:
 		return ERR_PTR(-ENOENT);
+	case -ETIMEDOUT:
+		return ERR_PTR(-ESHUTDOWN);
 	case 0:
 		gi = get_group_info(ug->gi);
 		cache_put(&ug->h, &unix_gid_cache);
@@ -720,8 +722,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
 		default:
 			BUG();
-		case -EAGAIN:
 		case -ETIMEDOUT:
+			return SVC_CLOSE;
+		case -EAGAIN:
 			return SVC_DROP;
 		case -ENOENT:
 			return SVC_DENIED;
@@ -736,6 +739,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	switch (PTR_ERR(gi)) {
 	case -EAGAIN:
 		return SVC_DROP;
+	case -ESHUTDOWN:
+		return SVC_CLOSE;
 	case -ENOENT:
 		break;
 	default:
@@ -776,7 +781,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	cred->cr_gid = (gid_t) -1;
 	cred->cr_group_info = groups_alloc(0);
 	if (cred->cr_group_info == NULL)
-		return SVC_DROP; /* kmalloc failure - client must retry */
+		return SVC_CLOSE; /* kmalloc failure - client must retry */
 
 	/* Put NULL verifier */
 	svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +845,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 		goto badcred;
 	cred->cr_group_info = groups_alloc(slen);
 	if (cred->cr_group_info == NULL)
-		return SVC_DROP;
+		return SVC_CLOSE;
 	for (i = 0; i < slen; i++)
 		GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
-- 
cgit v1.2.3


From c22ab7816fd81efceefa96b00c4ad62cf657964b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 20 Sep 2010 08:41:47 +0000
Subject: ethtool: Define RX n-tuple action to clear a rule

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b67af60a8890..3350870001fe 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -492,11 +492,12 @@ struct ethtool_rx_ntuple_flow_spec {
 	__u64		data_mask;
 
 	__s32		action;
-#define ETHTOOL_RXNTUPLE_ACTION_DROP -1		/* drop packet */
+#define ETHTOOL_RXNTUPLE_ACTION_DROP	(-1)	/* drop packet */
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR	(-2)	/* clear filter */
 };
 
 /**
- * struct ethtool_rx_ntuple - command to set RX flow filter
+ * struct ethtool_rx_ntuple - command to set or clear RX flow filter
  * @cmd: Command number - %ETHTOOL_SRXNTUPLE
  * @fs: Flow filter specification
  */
-- 
cgit v1.2.3


From 6099e3dea9aaa6127cea0610533221c9e956f009 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 20 Sep 2010 08:42:08 +0000
Subject: ethtool: Add Ethernet MAC-level filtering/steering

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3350870001fe..8a3338ceb438 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -14,6 +14,7 @@
 #define _LINUX_ETHTOOL_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>
 
 /* This should work for both 32 and 64 bit userland. */
 struct ethtool_cmd {
@@ -391,6 +392,7 @@ struct ethtool_rx_flow_spec {
 		struct ethtool_ah_espip4_spec		ah_ip4_spec;
 		struct ethtool_ah_espip4_spec		esp_ip4_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
+		struct ethhdr				ether_spec;
 		__u8					hdata[72];
 	} h_u, m_u;
 	__u64		ring_cookie;
@@ -483,6 +485,7 @@ struct ethtool_rx_ntuple_flow_spec {
 		struct ethtool_ah_espip4_spec		ah_ip4_spec;
 		struct ethtool_ah_espip4_spec		esp_ip4_spec;
 		struct ethtool_usrip4_spec		usr_ip4_spec;
+		struct ethhdr				ether_spec;
 		__u8					hdata[72];
 	} h_u, m_u;
 
@@ -841,7 +844,7 @@ struct ethtool_ops {
 #define WAKE_MAGIC		(1 << 5)
 #define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
 
-/* L3-L4 network traffic flow types */
+/* L2-L4 network traffic flow types */
 #define	TCP_V4_FLOW	0x01	/* hash or spec (tcp_ip4_spec) */
 #define	UDP_V4_FLOW	0x02	/* hash or spec (udp_ip4_spec) */
 #define	SCTP_V4_FLOW	0x03	/* hash or spec (sctp_ip4_spec) */
@@ -857,6 +860,7 @@ struct ethtool_ops {
 #define	IP_USER_FLOW	0x0d	/* spec only (usr_ip4_spec) */
 #define	IPV4_FLOW	0x10	/* hash only */
 #define	IPV6_FLOW	0x11	/* hash only */
+#define	ETHER_FLOW	0x12	/* spec only (ether_spec) */
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
-- 
cgit v1.2.3


From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 21 Sep 2010 14:35:37 -0700
Subject: fs: {lock,unlock}_flocks() stubs to prepare for BKL removal

The lock structs are currently protected by the BKL, but are accessed by
code in fs/locks.c and misc file system and DLM code.  These stubs will
allow all users to switch to the new interface before the implementation
is changed to a spinlock.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..63d069bd80b7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1093,6 +1093,10 @@ struct file_lock {
 
 #include <linux/fcntl.h>
 
+/* temporary stubs for BKL removal */
+#define lock_flocks() lock_kernel()
+#define unlock_flocks() unlock_kernel()
+
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 #ifdef CONFIG_FILE_LOCKING
-- 
cgit v1.2.3


From 756e64a0b106f1a2ca96889c39ea0d48131105c0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 06:43:54 +0000
Subject: net: constify some ppp/pptp structs

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c      | 2 +-
 drivers/net/pppox.c      | 4 ++--
 drivers/net/pptp.c       | 8 ++++----
 include/linux/if_pppox.h | 2 +-
 net/l2tp/l2tp_ppp.c      | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index c07de359dc07..d72fb0519a2a 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -1124,7 +1124,7 @@ static const struct proto_ops pppoe_ops = {
 	.ioctl		= pppox_ioctl,
 };
 
-static struct pppox_proto pppoe_proto = {
+static const struct pppox_proto pppoe_proto = {
 	.create	= pppoe_create,
 	.ioctl	= pppoe_ioctl,
 	.owner	= THIS_MODULE,
diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index d4191ef9cad1..8c0d170dabcd 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -36,9 +36,9 @@
 
 #include <asm/uaccess.h>
 
-static struct pppox_proto *pppox_protos[PX_MAX_PROTO + 1];
+static const struct pppox_proto *pppox_protos[PX_MAX_PROTO + 1];
 
-int register_pppox_proto(int proto_num, struct pppox_proto *pp)
+int register_pppox_proto(int proto_num, const struct pppox_proto *pp)
 {
 	if (proto_num < 0 || proto_num > PX_MAX_PROTO)
 		return -EINVAL;
diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c
index 761f0eced724..ccbc91326bfa 100644
--- a/drivers/net/pptp.c
+++ b/drivers/net/pptp.c
@@ -53,7 +53,7 @@ static struct pppox_sock **callid_sock;
 static DEFINE_SPINLOCK(chan_lock);
 
 static struct proto pptp_sk_proto __read_mostly;
-static struct ppp_channel_ops pptp_chan_ops;
+static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
 #define PPP_LCP_ECHOREQ 0x09
@@ -628,7 +628,7 @@ static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
 	return err;
 }
 
-static struct ppp_channel_ops pptp_chan_ops = {
+static const struct ppp_channel_ops pptp_chan_ops = {
 	.start_xmit = pptp_xmit,
 	.ioctl      = pptp_ppp_ioctl,
 };
@@ -659,12 +659,12 @@ static const struct proto_ops pptp_ops = {
 	.ioctl      = pppox_ioctl,
 };
 
-static struct pppox_proto pppox_pptp_proto = {
+static const struct pppox_proto pppox_pptp_proto = {
 	.create = pptp_create,
 	.owner  = THIS_MODULE,
 };
 
-static struct gre_protocol gre_pptp_protocol = {
+static const struct gre_protocol gre_pptp_protocol = {
 	.handler = pptp_rcv,
 };
 
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 29bcd55851eb..397921b09ef9 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -204,7 +204,7 @@ struct pppox_proto {
 	struct module	*owner;
 };
 
-extern int register_pppox_proto(int proto_num, struct pppox_proto *pp);
+extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp);
 extern void unregister_pppox_proto(int proto_num);
 extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */
 extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ff954b3e94b6..39a21d0c61c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1768,7 +1768,7 @@ static const struct proto_ops pppol2tp_ops = {
 	.ioctl		= pppox_ioctl,
 };
 
-static struct pppox_proto pppol2tp_proto = {
+static const struct pppox_proto pppol2tp_proto = {
 	.create		= pppol2tp_create,
 	.ioctl		= pppol2tp_ioctl
 };
-- 
cgit v1.2.3


From 48daa3bb84d547828871534caa51427a3fe90748 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 06:57:39 +0000
Subject: ipv6: addrconf.h cleanups

- Use rcu_dereference_rtnl() in __in6_dev_get
- kerneldoc for __in6_dev_get() and in6_dev_get()
- Use inline functions instead of macros

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 63 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 45375b41a2a0..7d178a758acf 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -174,20 +174,32 @@ extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 extern int register_inet6addr_notifier(struct notifier_block *nb);
 extern int unregister_inet6addr_notifier(struct notifier_block *nb);
 
-static inline struct inet6_dev *
-__in6_dev_get(struct net_device *dev)
+/**
+ * __in6_dev_get - get inet6_dev pointer from netdevice
+ * @dev: network device
+ *
+ * Caller must hold rcu_read_lock or RTNL, because this function
+ * does not take a reference on the inet6_dev.
+ */
+static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
 {
-	return rcu_dereference_check(dev->ip6_ptr,
-				     rcu_read_lock_held() ||
-				     lockdep_rtnl_is_held());
+	return rcu_dereference_rtnl(dev->ip6_ptr);
 }
 
-static inline struct inet6_dev *
-in6_dev_get(struct net_device *dev)
+/**
+ * in6_dev_get - get inet6_dev pointer from netdevice
+ * @dev: network device
+ *
+ * This version can be used in any context, and takes a reference
+ * on the inet6_dev. Callers must use in6_dev_put() later to
+ * release this reference.
+ */
+static inline struct inet6_dev *in6_dev_get(const struct net_device *dev)
 {
-	struct inet6_dev *idev = NULL;
+	struct inet6_dev *idev;
+
 	rcu_read_lock();
-	idev = __in6_dev_get(dev);
+	idev = rcu_dereference(dev->ip6_ptr);
 	if (idev)
 		atomic_inc(&idev->refcnt);
 	rcu_read_unlock();
@@ -196,16 +208,21 @@ in6_dev_get(struct net_device *dev)
 
 extern void in6_dev_finish_destroy(struct inet6_dev *idev);
 
-static inline void
-in6_dev_put(struct inet6_dev *idev)
+static inline void in6_dev_put(struct inet6_dev *idev)
 {
 	if (atomic_dec_and_test(&idev->refcnt))
 		in6_dev_finish_destroy(idev);
 }
 
-#define __in6_dev_put(idev)  atomic_dec(&(idev)->refcnt)
-#define in6_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+static inline void __in6_dev_put(struct inet6_dev *idev)
+{
+	atomic_dec(&idev->refcnt);
+}
 
+static inline void in6_dev_hold(struct inet6_dev *idev)
+{
+	atomic_inc(&idev->refcnt);
+}
 
 extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
 
@@ -215,9 +232,15 @@ static inline void in6_ifa_put(struct inet6_ifaddr *ifp)
 		inet6_ifa_finish_destroy(ifp);
 }
 
-#define __in6_ifa_put(ifp)	atomic_dec(&(ifp)->refcnt)
-#define in6_ifa_hold(ifp)	atomic_inc(&(ifp)->refcnt)
+static inline void __in6_ifa_put(struct inet6_ifaddr *ifp)
+{
+	atomic_dec(&ifp->refcnt);
+}
 
+static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
+{
+	atomic_inc(&ifp->refcnt);
+}
 
 
 /*
@@ -240,23 +263,23 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr)
 
 static inline int ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
 {
-	return (((addr->s6_addr32[0] ^ htonl(0xff020000)) |
+	return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
 		addr->s6_addr32[1] | addr->s6_addr32[2] |
-		(addr->s6_addr32[3] ^ htonl(0x00000001))) == 0);
+		(addr->s6_addr32[3] ^ htonl(0x00000001))) == 0;
 }
 
 static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
 {
-	return (((addr->s6_addr32[0] ^ htonl(0xff020000)) |
+	return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
 		addr->s6_addr32[1] | addr->s6_addr32[2] |
-		(addr->s6_addr32[3] ^ htonl(0x00000002))) == 0);
+		(addr->s6_addr32[3] ^ htonl(0x00000002))) == 0;
 }
 
 extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr);
 
 static inline int ipv6_addr_is_isatap(const struct in6_addr *addr)
 {
-	return ((addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE));
+	return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 1117449276bb909b029ed0b9ba13f53e4784db9d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 17:04:08 +1000
Subject: sunrpc/cache: change deferred-request hash table to use hlist.

Being a hash table, hlist is the best option.

There is currently some ugliness were we treat "->next == NULL" as
a special case to avoid having to initialise the whole array.
This change nicely gets rid of that case.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 +-
 net/sunrpc/cache.c           | 28 ++++++++++------------------
 2 files changed, 11 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 52a7d7224e90..03496357f455 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -133,7 +133,7 @@ struct cache_req {
  * delayed awaiting cache-fill
  */
 struct cache_deferred_req {
-	struct list_head	hash;	/* on hash chain */
+	struct hlist_node	hash;	/* on hash chain */
 	struct list_head	recent; /* on fifo */
 	struct cache_head	*item;  /* cache item we wait on */
 	void			*owner; /* we might need to discard all defered requests
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ca7c621cd975..2a8405194056 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -506,13 +506,13 @@ EXPORT_SYMBOL_GPL(cache_purge);
 
 static DEFINE_SPINLOCK(cache_defer_lock);
 static LIST_HEAD(cache_defer_list);
-static struct list_head cache_defer_hash[DFR_HASHSIZE];
+static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
 	list_del_init(&dreq->recent);
-	list_del_init(&dreq->hash);
+	hlist_del_init(&dreq->hash);
 	cache_defer_cnt--;
 }
 
@@ -521,9 +521,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he
 	int hash = DFR_HASH(item);
 
 	list_add(&dreq->recent, &cache_defer_list);
-	if (cache_defer_hash[hash].next == NULL)
-		INIT_LIST_HEAD(&cache_defer_hash[hash]);
-	list_add(&dreq->hash, &cache_defer_hash[hash]);
+	hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
 }
 
 static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
@@ -588,7 +586,7 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item)
 		 * to clean up
 		 */
 		spin_lock(&cache_defer_lock);
-		if (!list_empty(&sleeper.handle.hash)) {
+		if (!hlist_unhashed(&sleeper.handle.hash)) {
 			__unhash_deferred_req(&sleeper.handle);
 			spin_unlock(&cache_defer_lock);
 		} else {
@@ -642,24 +640,18 @@ static void cache_revisit_request(struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
 	struct list_head pending;
-
-	struct list_head *lp;
+	struct hlist_node *lp, *tmp;
 	int hash = DFR_HASH(item);
 
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
 
-	lp = cache_defer_hash[hash].next;
-	if (lp) {
-		while (lp != &cache_defer_hash[hash]) {
-			dreq = list_entry(lp, struct cache_deferred_req, hash);
-			lp = lp->next;
-			if (dreq->item == item) {
-				__unhash_deferred_req(dreq);
-				list_add(&dreq->recent, &pending);
-			}
+	hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+		if (dreq->item == item) {
+			__unhash_deferred_req(dreq);
+			list_add(&dreq->recent, &pending);
 		}
-	}
+
 	spin_unlock(&cache_defer_lock);
 
 	while (!list_empty(&pending)) {
-- 
cgit v1.2.3


From 8b008faf92ac8f7eeb65e8cd36077601af7c46db Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 22 Sep 2010 08:36:59 +0200
Subject: netfilter: ctnetlink: allow to specify the expectation flags

With this patch, you can specify the expectation flags for user-space
created expectations.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h | 4 ++++
 include/linux/netfilter/nfnetlink_conntrack.h | 1 +
 include/net/netfilter/nf_conntrack_expect.h   | 3 ---
 net/netfilter/nf_conntrack_netlink.c          | 8 +++++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 1afd18c855ec..fdc50cae861f 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -100,6 +100,10 @@ enum ip_conntrack_expect_events {
 	IPEXP_NEW,		/* new expectation */
 };
 
+/* expectation flags */
+#define NF_CT_EXPECT_PERMANENT		0x1
+#define NF_CT_EXPECT_INACTIVE		0x2
+
 #ifdef __KERNEL__
 struct ip_conntrack_stat {
 	unsigned int searched;
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 9ed534c991b9..455f0ce4f430 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -161,6 +161,7 @@ enum ctattr_expect {
 	CTA_EXPECT_ID,
 	CTA_EXPECT_HELP_NAME,
 	CTA_EXPECT_ZONE,
+	CTA_EXPECT_FLAGS,
 	__CTA_EXPECT_MAX
 };
 #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 11e815084fcf..96bb42af5fae 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -67,9 +67,6 @@ struct nf_conntrack_expect_policy {
 
 #define NF_CT_EXPECT_CLASS_DEFAULT	0
 
-#define NF_CT_EXPECT_PERMANENT	0x1
-#define NF_CT_EXPECT_INACTIVE	0x2
-
 int nf_conntrack_expect_init(struct net *net);
 void nf_conntrack_expect_fini(struct net *net);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 37533a30413b..0804e0ef6500 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1577,6 +1577,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 
 	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
 	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+	NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags));
 	helper = rcu_dereference(nfct_help(master)->helper);
 	if (helper)
 		NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
@@ -1734,6 +1735,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
 	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
 	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING },
 	[CTA_EXPECT_ZONE]	= { .type = NLA_U16 },
+	[CTA_EXPECT_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int
@@ -1933,9 +1935,13 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 		goto out;
 	}
 
+	if (cda[CTA_EXPECT_FLAGS])
+		exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
+	else
+		exp->flags = 0;
+
 	exp->class = 0;
 	exp->expectfn = NULL;
-	exp->flags = 0;
 	exp->master = ct;
 	exp->helper = NULL;
 	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
-- 
cgit v1.2.3


From 676cb02dc32adef13d9efb5ea52079e4ede1e3ec Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 20 Jul 2009 23:33:49 +0200
Subject: softirqs: Make wakeup_softirqd static

No users outside of kernel/softirq.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 1 -
 kernel/softirq.c          | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..0a9141e69241 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -410,7 +410,6 @@ extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
-extern void wakeup_softirqd(void);
 
 /* This is the worklist that queues up per-cpu softirq work.
  *
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b1a73a..80f6e3bd1d2a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -67,7 +67,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
  * to the pending events, so lets the scheduler to balance
  * the softirq load for us.
  */
-void wakeup_softirqd(void)
+static void wakeup_softirqd(void)
 {
 	/* Interrupts are disabled: no need to stop preemption */
 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
-- 
cgit v1.2.3


From 56b49f4b8f6728b91d10c556c116175051b77b60 Mon Sep 17 00:00:00 2001
From: Ollie Wild <aaw@google.com>
Date: Wed, 22 Sep 2010 05:54:54 +0000
Subject: net: Move "struct net" declaration inside the __KERNEL__ macro guard

This patch reduces namespace pollution by moving the "struct net" declaration
out of the userspace-facing portion of linux/netlink.h.  It has no impact on
the kernel.

(This came up because we have several C++ applications which use "net" as a
namespace name.)

Signed-off-by: Ollie Wild <aaw@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 59d066936ab9..123566912d73 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -27,8 +27,6 @@
 
 #define MAX_LINKS 32		
 
-struct net;
-
 struct sockaddr_nl {
 	sa_family_t	nl_family;	/* AF_NETLINK	*/
 	unsigned short	nl_pad;		/* zero		*/
@@ -151,6 +149,8 @@ struct nlattr {
 #include <linux/capability.h>
 #include <linux/skbuff.h>
 
+struct net;
+
 static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 {
 	return (struct nlmsghdr *)skb->data;
-- 
cgit v1.2.3


From bf5438fca2950b03c21ad868090cc1a8fcd49536 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 17 Sep 2010 11:09:00 -0400
Subject: jump label: Base patch for jump label

base patch to implement 'jump labeling'. Based on a new 'asm goto' inline
assembly gcc mechanism, we can now branch to labels from an 'asm goto'
statment. This allows us to create a 'no-op' fastpath, which can subsequently
be patched with a jump to the slowpath code. This is useful for code which
might be rarely used, but which we'd like to be able to call, if needed.
Tracepoints are the current usecase that these are being implemented for.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <ee8b3595967989fdaf84e698dc7447d315ce972a.1284733808.git.jbaron@redhat.com>

[ cleaned up some formating ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Makefile                           |   5 +
 arch/Kconfig                       |   3 +
 arch/x86/include/asm/alternative.h |   3 +-
 arch/x86/kernel/alternative.c      |   2 +-
 include/asm-generic/vmlinux.lds.h  |  10 ++
 include/linux/jump_label.h         |  58 +++++++
 include/linux/module.h             |   5 +-
 kernel/Makefile                    |   2 +-
 kernel/jump_label.c                | 346 +++++++++++++++++++++++++++++++++++++
 kernel/kprobes.c                   |   1 +
 kernel/module.c                    |   6 +
 scripts/gcc-goto.sh                |   5 +
 12 files changed, 442 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/jump_label.h
 create mode 100644 kernel/jump_label.c
 create mode 100644 scripts/gcc-goto.sh

(limited to 'include')

diff --git a/Makefile b/Makefile
index 92ab33f16cf0..a906378d505d 100644
--- a/Makefile
+++ b/Makefile
@@ -591,6 +591,11 @@ KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
+	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 # But warn user when we do so
 warn-assign = \
diff --git a/arch/Kconfig b/arch/Kconfig
index 4877a8c8ee16..1462d8492d89 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_ARCH_JUMP_LABEL
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 634bf782dca5..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
+#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -182,7 +183,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 
-#if defined(CONFIG_DYNAMIC_FTRACE)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
 extern void arch_init_ideal_nop5(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 083bd010d92c..cb0e6d385f6d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -641,7 +641,7 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 	return addr;
 }
 
-#if defined(CONFIG_DYNAMIC_FTRACE)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
 unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a92a170fb7d..ef2af9948eac 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -220,6 +220,8 @@
 									\
 	BUG_TABLE							\
 									\
+	JUMP_TABLE							\
+									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -563,6 +565,14 @@
 #define BUG_TABLE
 #endif
 
+#define JUMP_TABLE							\
+	. = ALIGN(8);							\
+	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___jump_table) = .;		\
+		*(__jump_table)						\
+		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
+	}
+
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 	. = ALIGN(4);							\
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
new file mode 100644
index 000000000000..de58656d28e0
--- /dev/null
+++ b/include/linux/jump_label.h
@@ -0,0 +1,58 @@
+#ifndef _LINUX_JUMP_LABEL_H
+#define _LINUX_JUMP_LABEL_H
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
+# include <asm/jump_label.h>
+# define HAVE_JUMP_LABEL
+#endif
+
+enum jump_label_type {
+	JUMP_LABEL_ENABLE,
+	JUMP_LABEL_DISABLE
+};
+
+struct module;
+
+#ifdef HAVE_JUMP_LABEL
+
+extern struct jump_entry __start___jump_table[];
+extern struct jump_entry __stop___jump_table[];
+
+extern void arch_jump_label_transform(struct jump_entry *entry,
+				 enum jump_label_type type);
+extern void jump_label_update(unsigned long key, enum jump_label_type type);
+extern void jump_label_apply_nops(struct module *mod);
+extern void arch_jump_label_text_poke_early(jump_label_t addr);
+
+#define enable_jump_label(key) \
+	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+
+#define disable_jump_label(key) \
+	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+
+#else
+
+#define JUMP_LABEL(key, label)			\
+do {						\
+	if (unlikely(*key))			\
+		goto label;			\
+} while (0)
+
+#define enable_jump_label(cond_var)	\
+do {					\
+       *(cond_var) = 1;			\
+} while (0)
+
+#define disable_jump_label(cond_var)	\
+do {					\
+       *(cond_var) = 0;			\
+} while (0)
+
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index 8a6b9fdc7ffa..403ac26023ce 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -350,7 +350,10 @@ struct module
 	struct tracepoint *tracepoints;
 	unsigned int num_tracepoints;
 #endif
-
+#ifdef HAVE_JUMP_LABEL
+	struct jump_entry *jump_entries;
+	unsigned int num_jump_entries;
+#endif
 #ifdef CONFIG_TRACING
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be0..d52b473c99a1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o range.o
+	    async.o range.o jump_label.o
 obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
new file mode 100644
index 000000000000..460fd40112b3
--- /dev/null
+++ b/kernel/jump_label.c
@@ -0,0 +1,346 @@
+/*
+ * jump label support
+ *
+ * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ *
+ */
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/err.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+#define JUMP_LABEL_HASH_BITS 6
+#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
+static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
+
+/* mutex to protect coming/going of the the jump_label table */
+static DEFINE_MUTEX(jump_label_mutex);
+
+struct jump_label_entry {
+	struct hlist_node hlist;
+	struct jump_entry *table;
+	int nr_entries;
+	/* hang modules off here */
+	struct hlist_head modules;
+	unsigned long key;
+};
+
+struct jump_label_module_entry {
+	struct hlist_node hlist;
+	struct jump_entry *table;
+	int nr_entries;
+	struct module *mod;
+};
+
+static int jump_label_cmp(const void *a, const void *b)
+{
+	const struct jump_entry *jea = a;
+	const struct jump_entry *jeb = b;
+
+	if (jea->key < jeb->key)
+		return -1;
+
+	if (jea->key > jeb->key)
+		return 1;
+
+	return 0;
+}
+
+static void
+sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
+{
+	unsigned long size;
+
+	size = (((unsigned long)stop - (unsigned long)start)
+					/ sizeof(struct jump_entry));
+	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
+}
+
+static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct jump_label_entry *e;
+	u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
+
+	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (key == e->key)
+			return e;
+	}
+	return NULL;
+}
+
+static struct jump_label_entry *
+add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
+{
+	struct hlist_head *head;
+	struct jump_label_entry *e;
+	u32 hash;
+
+	e = get_jump_label_entry(key);
+	if (e)
+		return ERR_PTR(-EEXIST);
+
+	e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	hash = jhash((void *)&key, sizeof(jump_label_t), 0);
+	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
+	e->key = key;
+	e->table = table;
+	e->nr_entries = nr_entries;
+	INIT_HLIST_HEAD(&(e->modules));
+	hlist_add_head(&e->hlist, head);
+	return e;
+}
+
+static int
+build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
+{
+	struct jump_entry *iter, *iter_begin;
+	struct jump_label_entry *entry;
+	int count;
+
+	sort_jump_label_entries(start, stop);
+	iter = start;
+	while (iter < stop) {
+		entry = get_jump_label_entry(iter->key);
+		if (!entry) {
+			iter_begin = iter;
+			count = 0;
+			while ((iter < stop) &&
+				(iter->key == iter_begin->key)) {
+				iter++;
+				count++;
+			}
+			entry = add_jump_label_entry(iter_begin->key,
+							count, iter_begin);
+			if (IS_ERR(entry))
+				return PTR_ERR(entry);
+		 } else {
+			WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/***
+ * jump_label_update - update jump label text
+ * @key -  key value associated with a a jump label
+ * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
+ *
+ * Will enable/disable the jump for jump label @key, depending on the
+ * value of @type.
+ *
+ */
+
+void jump_label_update(unsigned long key, enum jump_label_type type)
+{
+	struct jump_entry *iter;
+	struct jump_label_entry *entry;
+	struct hlist_node *module_node;
+	struct jump_label_module_entry *e_module;
+	int count;
+
+	mutex_lock(&jump_label_mutex);
+	entry = get_jump_label_entry((jump_label_t)key);
+	if (entry) {
+		count = entry->nr_entries;
+		iter = entry->table;
+		while (count--) {
+			if (kernel_text_address(iter->code))
+				arch_jump_label_transform(iter, type);
+			iter++;
+		}
+		/* eanble/disable jump labels in modules */
+		hlist_for_each_entry(e_module, module_node, &(entry->modules),
+							hlist) {
+			count = e_module->nr_entries;
+			iter = e_module->table;
+			while (count--) {
+				if (kernel_text_address(iter->code))
+					arch_jump_label_transform(iter, type);
+				iter++;
+			}
+		}
+	}
+	mutex_unlock(&jump_label_mutex);
+}
+
+static __init int init_jump_label(void)
+{
+	int ret;
+	struct jump_entry *iter_start = __start___jump_table;
+	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_entry *iter;
+
+	mutex_lock(&jump_label_mutex);
+	ret = build_jump_label_hashtable(__start___jump_table,
+					 __stop___jump_table);
+	iter = iter_start;
+	while (iter < iter_stop) {
+		arch_jump_label_text_poke_early(iter->code);
+		iter++;
+	}
+	mutex_unlock(&jump_label_mutex);
+	return ret;
+}
+early_initcall(init_jump_label);
+
+#ifdef CONFIG_MODULES
+
+static struct jump_label_module_entry *
+add_jump_label_module_entry(struct jump_label_entry *entry,
+			    struct jump_entry *iter_begin,
+			    int count, struct module *mod)
+{
+	struct jump_label_module_entry *e;
+
+	e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	e->mod = mod;
+	e->nr_entries = count;
+	e->table = iter_begin;
+	hlist_add_head(&e->hlist, &entry->modules);
+	return e;
+}
+
+static int add_jump_label_module(struct module *mod)
+{
+	struct jump_entry *iter, *iter_begin;
+	struct jump_label_entry *entry;
+	struct jump_label_module_entry *module_entry;
+	int count;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (!mod->num_jump_entries)
+		return 0;
+
+	sort_jump_label_entries(mod->jump_entries,
+				mod->jump_entries + mod->num_jump_entries);
+	iter = mod->jump_entries;
+	while (iter < mod->jump_entries + mod->num_jump_entries) {
+		entry = get_jump_label_entry(iter->key);
+		iter_begin = iter;
+		count = 0;
+		while ((iter < mod->jump_entries + mod->num_jump_entries) &&
+			(iter->key == iter_begin->key)) {
+				iter++;
+				count++;
+		}
+		if (!entry) {
+			entry = add_jump_label_entry(iter_begin->key, 0, NULL);
+			if (IS_ERR(entry))
+				return PTR_ERR(entry);
+		}
+		module_entry = add_jump_label_module_entry(entry, iter_begin,
+							   count, mod);
+		if (IS_ERR(module_entry))
+			return PTR_ERR(module_entry);
+	}
+	return 0;
+}
+
+static void remove_jump_label_module(struct module *mod)
+{
+	struct hlist_head *head;
+	struct hlist_node *node, *node_next, *module_node, *module_node_next;
+	struct jump_label_entry *e;
+	struct jump_label_module_entry *e_module;
+	int i;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (!mod->num_jump_entries)
+		return;
+
+	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
+		head = &jump_label_table[i];
+		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
+			hlist_for_each_entry_safe(e_module, module_node,
+						  module_node_next,
+						  &(e->modules), hlist) {
+				if (e_module->mod == mod) {
+					hlist_del(&e_module->hlist);
+					kfree(e_module);
+				}
+			}
+			if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
+				hlist_del(&e->hlist);
+				kfree(e);
+			}
+		}
+	}
+}
+
+static int
+jump_label_module_notify(struct notifier_block *self, unsigned long val,
+			 void *data)
+{
+	struct module *mod = data;
+	int ret = 0;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		mutex_lock(&jump_label_mutex);
+		ret = add_jump_label_module(mod);
+		if (ret)
+			remove_jump_label_module(mod);
+		mutex_unlock(&jump_label_mutex);
+		break;
+	case MODULE_STATE_GOING:
+		mutex_lock(&jump_label_mutex);
+		remove_jump_label_module(mod);
+		mutex_unlock(&jump_label_mutex);
+		break;
+	}
+	return ret;
+}
+
+/***
+ * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
+ * @mod: module to patch
+ *
+ * Allow for run-time selection of the optimal nops. Before the module
+ * loads patch these with arch_get_jump_label_nop(), which is specified by
+ * the arch specific jump label code.
+ */
+void jump_label_apply_nops(struct module *mod)
+{
+	struct jump_entry *iter;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (!mod->num_jump_entries)
+		return;
+
+	iter = mod->jump_entries;
+	while (iter < mod->jump_entries + mod->num_jump_entries) {
+		arch_jump_label_text_poke_early(iter->code);
+		iter++;
+	}
+}
+
+struct notifier_block jump_label_module_nb = {
+	.notifier_call = jump_label_module_notify,
+	.priority = 0,
+};
+
+static __init int init_jump_label_module(void)
+{
+	return register_module_notifier(&jump_label_module_nb);
+}
+early_initcall(init_jump_label_module);
+
+#endif /* CONFIG_MODULES */
+
+#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6dd5359e1f0e..18904e42a918 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,6 +47,7 @@
 #include <linux/memory.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
+#include <linux/jump_label.h>
 
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
diff --git a/kernel/module.c b/kernel/module.c
index d0b5f8db11b4..eba134157ef6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -55,6 +55,7 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
+#include <linux/jump_label.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
@@ -2308,6 +2309,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
 					sizeof(*mod->tracepoints),
 					&mod->num_tracepoints);
 #endif
+#ifdef HAVE_JUMP_LABEL
+	mod->jump_entries = section_objs(info, "__jump_table",
+					sizeof(*mod->jump_entries),
+					&mod->num_jump_entries);
+#endif
 #ifdef CONFIG_EVENT_TRACING
 	mod->trace_events = section_objs(info, "_ftrace_events",
 					 sizeof(*mod->trace_events),
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
new file mode 100644
index 000000000000..8e82424be7aa
--- /dev/null
+++ b/scripts/gcc-goto.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Test for gcc 'asm goto' suport
+# Copyright (C) 2010, Jason Baron <jbaron@redhat.com>
+
+echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $1 -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y"
-- 
cgit v1.2.3


From 4c3ef6d79328c0e23ade60cbfc8d496123a6855c Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 17 Sep 2010 11:09:08 -0400
Subject: jump label: Add jump_label_text_reserved() to reserve jump points

Add a jump_label_text_reserved(void *start, void *end), so that other
pieces of code that want to modify kernel text, can first verify that
jump label has not reserved the instruction.

Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <06236663a3a7b1c1f13576bb9eccb6d9c17b7bfe.1284733808.git.jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes.c  |  3 +-
 include/linux/jump_label.h |  8 ++++-
 kernel/jump_label.c        | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kprobes.c           |  3 +-
 4 files changed, 94 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e05952af5d26..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1218,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
 	}
 	/* Check whether the address range is reserved */
 	if (ftrace_text_reserved(src, src + len - 1) ||
-	    alternatives_text_reserved(src, src + len - 1))
+	    alternatives_text_reserved(src, src + len - 1) ||
+	    jump_label_text_reserved(src, src + len - 1))
 		return -EBUSY;
 
 	return len;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index de58656d28e0..b72cd9f92c2e 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -20,9 +20,10 @@ extern struct jump_entry __stop___jump_table[];
 
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
+extern void arch_jump_label_text_poke_early(jump_label_t addr);
 extern void jump_label_update(unsigned long key, enum jump_label_type type);
 extern void jump_label_apply_nops(struct module *mod);
-extern void arch_jump_label_text_poke_early(jump_label_t addr);
+extern int jump_label_text_reserved(void *start, void *end);
 
 #define enable_jump_label(key) \
 	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
@@ -53,6 +54,11 @@ static inline int jump_label_apply_nops(struct module *mod)
 	return 0;
 }
 
+static inline int jump_label_text_reserved(void *start, void *end)
+{
+	return 0;
+}
+
 #endif
 
 #endif
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 460fd40112b3..7be868bf25c6 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -177,6 +177,89 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 	mutex_unlock(&jump_label_mutex);
 }
 
+static int addr_conflict(struct jump_entry *entry, void *start, void *end)
+{
+	if (entry->code <= (unsigned long)end &&
+		entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+		return 1;
+
+	return 0;
+}
+
+#ifdef CONFIG_MODULES
+
+static int module_conflict(void *start, void *end)
+{
+	struct hlist_head *head;
+	struct hlist_node *node, *node_next, *module_node, *module_node_next;
+	struct jump_label_entry *e;
+	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter;
+	int i, count;
+	int conflict = 0;
+
+	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
+		head = &jump_label_table[i];
+		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
+			hlist_for_each_entry_safe(e_module, module_node,
+							module_node_next,
+							&(e->modules), hlist) {
+				count = e_module->nr_entries;
+				iter = e_module->table;
+				while (count--) {
+					if (addr_conflict(iter, start, end)) {
+						conflict = 1;
+						goto out;
+					}
+					iter++;
+				}
+			}
+		}
+	}
+out:
+	return conflict;
+}
+
+#endif
+
+/***
+ * jump_label_text_reserved - check if addr range is reserved
+ * @start: start text addr
+ * @end: end text addr
+ *
+ * checks if the text addr located between @start and @end
+ * overlaps with any of the jump label patch addresses. Code
+ * that wants to modify kernel text should first verify that
+ * it does not overlap with any of the jump label addresses.
+ *
+ * returns 1 if there is an overlap, 0 otherwise
+ */
+int jump_label_text_reserved(void *start, void *end)
+{
+	struct jump_entry *iter;
+	struct jump_entry *iter_start = __start___jump_table;
+	struct jump_entry *iter_stop = __start___jump_table;
+	int conflict = 0;
+
+	mutex_lock(&jump_label_mutex);
+	iter = iter_start;
+	while (iter < iter_stop) {
+		if (addr_conflict(iter, start, end)) {
+			conflict = 1;
+			goto out;
+		}
+		iter++;
+	}
+
+	/* now check modules */
+#ifdef CONFIG_MODULES
+	conflict = module_conflict(start, end);
+#endif
+out:
+	mutex_unlock(&jump_label_mutex);
+	return conflict;
+}
+
 static __init int init_jump_label(void)
 {
 	int ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 18904e42a918..ec4210c6501e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1147,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 	preempt_disable();
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    in_kprobes_functions((unsigned long) p->addr) ||
-	    ftrace_text_reserved(p->addr, p->addr)) {
+	    ftrace_text_reserved(p->addr, p->addr) ||
+	    jump_label_text_reserved(p->addr, p->addr)) {
 		preempt_enable();
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 8f7b50c514206211cc282a4247f7b12f18dee674 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 17 Sep 2010 11:09:13 -0400
Subject: jump label: Tracepoint support for jump labels

Make use of the jump label infrastructure for tracepoints.

Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <a9ba2056e2c9cf332c3c300b577463ce66ff23a8.1284733808.git.jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h |  5 ++++-
 kernel/tracepoint.c        | 14 ++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 103d1b61aacb..a4a90b6726ce 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
+#include <linux/jump_label.h>
 
 struct module;
 struct tracepoint;
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		if (unlikely(__tracepoint_##name.state))		\
+		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
+		return;							\
+do_trace:								\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args));			\
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index c77f3eceea25..d6073a50a6ca 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/jump_label.h>
 
 extern struct tracepoint __start___tracepoints[];
 extern struct tracepoint __stop___tracepoints[];
@@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 * is used.
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-	elem->state = active;
+	if (!elem->state && active) {
+		enable_jump_label(&elem->state);
+		elem->state = active;
+	} else if (elem->state && !active) {
+		disable_jump_label(&elem->state);
+		elem->state = active;
+	}
 }
 
 /*
@@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem)
 	if (elem->unregfunc && elem->state)
 		elem->unregfunc();
 
-	elem->state = 0;
+	if (elem->state) {
+		disable_jump_label(&elem->state);
+		elem->state = 0;
+	}
 	rcu_assign_pointer(elem->funcs, NULL);
 }
 
-- 
cgit v1.2.3


From 52159d98be6f26c48f5e02c7ab3c9848a85979b5 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 17 Sep 2010 11:09:17 -0400
Subject: jump label: Convert dynamic debug to use jump labels

Convert the 'dynamic debug' infrastructure to use jump labels.

Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <b77627358cea3e27d7be4386f45f66219afb8452.1284733808.git.jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/dynamic_debug.h | 39 ++++++++++++++------------
 lib/dynamic_debug.c           | 42 ++--------------------------
 scripts/Makefile.lib          | 11 +-------
 scripts/basic/Makefile        |  2 +-
 scripts/basic/hash.c          | 64 -------------------------------------------
 5 files changed, 26 insertions(+), 132 deletions(-)
 delete mode 100644 scripts/basic/hash.c

(limited to 'include')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 52c0da4bdd18..bef3cda44c4c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,6 +1,8 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
+#include <linux/jump_label.h>
+
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
 	const char *function;
 	const char *filename;
 	const char *format;
-	char primary_hash;
-	char secondary_hash;
 	unsigned int lineno:24;
 	/*
  	 * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_PRINT   (1<<0)  /* printk() a message using the format */
 #define _DPRINTK_FLAGS_DEFAULT 0
 	unsigned int flags:8;
+	char enabled;
 } __attribute__((aligned(8)));
 
 
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 #if defined(CONFIG_DYNAMIC_DEBUG)
 extern int ddebug_remove_module(const char *mod_name);
 
-#define __dynamic_dbg_enabled(dd)  ({	     \
-	int __ret = 0;							     \
-	if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) &&	     \
-			(dynamic_debug_enabled2 & (1LL << DEBUG_HASH2))))   \
-				if (unlikely(dd.flags))			     \
-					__ret = 1;			     \
-	__ret; })
-
 #define dynamic_pr_debug(fmt, ...) do {					\
+	__label__ do_printk;						\
+	__label__ out;							\
 	static struct _ddebug descriptor				\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
-	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
-		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
-	if (__dynamic_dbg_enabled(descriptor))				\
-		printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);		\
+	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
+		_DPRINTK_FLAGS_DEFAULT };				\
+	JUMP_LABEL(&descriptor.enabled, do_printk);			\
+	goto out;							\
+do_printk:								\
+	printk(KERN_DEBUG pr_fmt(fmt),	##__VA_ARGS__);			\
+out:	;								\
 	} while (0)
 
 
 #define dynamic_dev_dbg(dev, fmt, ...) do {				\
+	__label__ do_printk;						\
+	__label__ out;							\
 	static struct _ddebug descriptor				\
 	__used								\
 	__attribute__((section("__verbose"), aligned(8))) =		\
-	{ KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,	\
-		DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };	\
-	if (__dynamic_dbg_enabled(descriptor))				\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+	{ KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,		\
+		_DPRINTK_FLAGS_DEFAULT };				\
+	JUMP_LABEL(&descriptor.enabled, do_printk);			\
+	goto out;							\
+do_printk:								\
+	dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);		\
+out:	;								\
 	} while (0)
 
 #else
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 02afc2533728..e925c7b960f1 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -26,19 +26,11 @@
 #include <linux/dynamic_debug.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
+#include <linux/jump_label.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
 
-/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
- * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
- * use independent hash functions, to reduce the chance of false positives.
- */
-long long dynamic_debug_enabled;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
-long long dynamic_debug_enabled2;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
-
 struct ddebug_table {
 	struct list_head link;
 	char *mod_name;
@@ -87,26 +79,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 	return buf;
 }
 
-/*
- * must be called with ddebug_lock held
- */
-
-static int disabled_hash(char hash, bool first_table)
-{
-	struct ddebug_table *dt;
-	char table_hash_value;
-
-	list_for_each_entry(dt, &ddebug_tables, link) {
-		if (first_table)
-			table_hash_value = dt->ddebugs->primary_hash;
-		else
-			table_hash_value = dt->ddebugs->secondary_hash;
-		if (dt->num_enabled && (hash == table_hash_value))
-			return 0;
-	}
-	return 1;
-}
-
 /*
  * Search the tables for _ddebug's which match the given
  * `query' and apply the `flags' and `mask' to them.  Tells
@@ -170,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
 				dt->num_enabled++;
 			dp->flags = newflags;
 			if (newflags) {
-				dynamic_debug_enabled |=
-						(1LL << dp->primary_hash);
-				dynamic_debug_enabled2 |=
-						(1LL << dp->secondary_hash);
+				enable_jump_label(&dp->enabled);
 			} else {
-				if (disabled_hash(dp->primary_hash, true))
-					dynamic_debug_enabled &=
-						~(1LL << dp->primary_hash);
-				if (disabled_hash(dp->secondary_hash, false))
-					dynamic_debug_enabled2 &=
-						~(1LL << dp->secondary_hash);
+				disable_jump_label(&dp->enabled);
 			}
 			if (verbose)
 				printk(KERN_INFO
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 54fd1b700131..7bfcf1a09ac5 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -101,14 +101,6 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
 modname_flags  = $(if $(filter 1,$(words $(modname))),\
                  -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
 
-#hash values
-ifdef CONFIG_DYNAMIC_DEBUG
-debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\
-              -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))"
-else
-debug_flags =
-endif
-
 orig_c_flags   = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \
                  $(ccflags-y) $(CFLAGS_$(basetarget).o)
 _c_flags       = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
@@ -152,8 +144,7 @@ endif
 
 c_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE)     \
 		 $(__c_flags) $(modkern_cflags)                           \
-		 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \
-		  $(debug_flags)
+		 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags)
 
 a_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE)     \
 		 $(__a_flags) $(modkern_aflags)
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 09559951df12..4c324a1f1e0e 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,7 +9,7 @@
 # fixdep: 	 Used to generate dependency information during build process
 # docproc:	 Used in Documentation/DocBook
 
-hostprogs-y	:= fixdep docproc hash
+hostprogs-y	:= fixdep docproc
 always		:= $(hostprogs-y)
 
 # fixdep is needed to compile other host programs
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c
deleted file mode 100644
index 2ef5d3f666b8..000000000000
--- a/scripts/basic/hash.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define DYNAMIC_DEBUG_HASH_BITS 6
-
-static const char *program;
-
-static void usage(void)
-{
-	printf("Usage: %s <djb2|r5> <modname>\n", program);
-	exit(1);
-}
-
-/* djb2 hashing algorithm by Dan Bernstein. From:
- * http://www.cse.yorku.ca/~oz/hash.html
- */
-
-static unsigned int djb2_hash(char *str)
-{
-	unsigned long hash = 5381;
-	int c;
-
-	c = *str;
-	while (c) {
-		hash = ((hash << 5) + hash) + c;
-		c = *++str;
-	}
-	return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
-}
-
-static unsigned int r5_hash(char *str)
-{
-	unsigned long hash = 0;
-	int c;
-
-	c = *str;
-	while (c) {
-		hash = (hash + (c << 4) + (c >> 4)) * 11;
-		c = *++str;
-	}
-	return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
-}
-
-int main(int argc, char *argv[])
-{
-	program = argv[0];
-
-	if (argc != 3)
-		usage();
-	if (!strcmp(argv[1], "djb2"))
-		printf("%d\n", djb2_hash(argv[2]));
-	else if (!strcmp(argv[1], "r5"))
-		printf("%d\n", r5_hash(argv[2]));
-	else
-		usage();
-	exit(0);
-}
-
-- 
cgit v1.2.3


From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Sat, 14 Aug 2010 15:02:44 +0200
Subject: missing inline keyword for static function in linux/dmaengine.h

Add a missing inline keyword for static function in linux/dmaengine.h to
avoid duplicate symbol definitions.

Signed-off-by: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..e2106495cc11 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
 	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
 }
 
-static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma)
 {
 	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
 }
-- 
cgit v1.2.3


From 710224fa2750cf449c02dd115548acebfdd2c86a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 22 Sep 2010 13:04:55 -0700
Subject: arm: fix "arm: fix pci_set_consistent_dma_mask for dmabounce devices"

This fixes the regression caused by the commit 6fee48cd330c68
("dma-mapping: arm: use generic pci_set_dma_mask and
pci_set_consistent_dma_mask").

ARM needs to clip the dma coherent mask for dmabounce devices. This
restores the old trick.

Note that strictly speaking, the DMA API doesn't allow architectures to do
such but I'm not sure it's worth adding the new API to set the dma mask
that allows architectures to clip it.

Reported-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/common/it8152.c                     | 8 ++++++++
 arch/arm/mach-ixp4xx/common-pci.c            | 8 ++++++++
 arch/arm/mach-ixp4xx/include/mach/hardware.h | 2 ++
 arch/arm/mach-pxa/include/mach/hardware.h    | 2 +-
 arch/arm/mach-pxa/include/mach/io.h          | 2 ++
 include/linux/dma-mapping.h                  | 4 ++++
 6 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 7974baacafce..1bec96e85196 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
 		((dma_addr + size - PHYS_OFFSET) >= SZ_64M);
 }
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= PHYS_OFFSET + SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
 {
 	it8152_io.start = IT8152_IO_BASE + 0x12000;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 61cd4d64b985..24498a932ba6 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys)
 	return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys);
 }
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
 
diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h
index f91ca6d4fbe8..8138371c406e 100644
--- a/arch/arm/mach-ixp4xx/include/mach/hardware.h
+++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h
@@ -26,6 +26,8 @@
 #define PCIBIOS_MAX_MEM		0x4BFFFFFF
 #endif
 
+#define ARCH_HAS_DMA_SET_COHERENT_MASK
+
 #define pcibios_assign_all_busses()	1
 
 /* Register locations and bits */
diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h
index 7f64d24cd564..428cc7bda9a4 100644
--- a/arch/arm/mach-pxa/include/mach/hardware.h
+++ b/arch/arm/mach-pxa/include/mach/hardware.h
@@ -309,7 +309,7 @@ extern unsigned long get_clock_tick_rate(void);
 #define PCIBIOS_MIN_IO		0
 #define PCIBIOS_MIN_MEM		0
 #define pcibios_assign_all_busses()	1
+#define ARCH_HAS_DMA_SET_COHERENT_MASK
 #endif
 
-
 #endif  /* _ASM_ARCH_HARDWARE_H */
diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h
index 262691fb97d8..fdca3be47d9b 100644
--- a/arch/arm/mach-pxa/include/mach/io.h
+++ b/arch/arm/mach-pxa/include/mach/io.h
@@ -6,6 +6,8 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
+#include <mach/hardware.h>
+
 #define IO_SPACE_LIMIT 0xffffffff
 
 /*
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ce29b8151198..ba8319ae5fcc 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev)
 	return DMA_BIT_MASK(32);
 }
 
+#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK
+int dma_set_coherent_mask(struct device *dev, u64 mask);
+#else
 static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
 	if (!dma_supported(dev, mask))
@@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
 	dev->coherent_dma_mask = mask;
 	return 0;
 }
+#endif
 
 extern u64 dma_get_required_mask(struct device *dev);
 
-- 
cgit v1.2.3


From ed9f524ac79457f0c547c85746b19b92526be612 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Thu, 16 Sep 2010 01:30:19 +0900
Subject: ida: document IDA_BITMAP_LONGS calculation

IDA_BITMAP_LONGS value is calculated take into account struct ida_bitmap
not to waste memory space. Comment it.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/idr.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e968db71e33a..88607523e2df 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -117,10 +117,13 @@ void idr_init(struct idr *idp);
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
+ *
+ * IDA_BITMAP_LONGS is calculated to be one less to accommodate
+ * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
-#define IDA_BITMAP_LONGS	(128 / sizeof(long) - 1)
-#define IDA_BITMAP_BITS		(IDA_BITMAP_LONGS * sizeof(long) * 8)
+#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_BITS 	(IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
 	long			nr_busy;
-- 
cgit v1.2.3


From 5eebde23223aeb0ad2d9e3be6590ff8bbfab0fc2 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Thu, 23 Sep 2010 08:55:58 -0400
Subject: nfs: introduce mount option '-olocal_lock' to make locks local

NFS clients since 2.6.12 support flock locks by emulating fcntl byte-range
locks. Due to this, some windows applications which seem to use both flock
(share mode lock mapped as flock by Samba) and fcntl locks sequentially on
the same file, can't lock as they falsely assume the file is already locked.
The problem was reported on a setup with windows clients accessing excel files
on a Samba exported share which is originally a NFS mount from a NetApp filer.

Older NFS clients (< 2.6.12) did not see this problem as flock locks were
considered local. To support legacy flock behavior, this patch adds a mount
option "-olocal_lock=" which can take the following values:

   'none'  		- Neither flock locks nor POSIX locks are local
   'flock' 		- flock locks are local
   'posix' 		- fcntl/POSIX locks are local
   'all'		- Both flock locks and POSIX locks are local

Testing:

   - This patch was tested by using -olocal_lock option with different values
     and the NLM calls were noted from the network packet captured.

     'none'  - NLM calls were seen during both flock() and fcntl(), flock lock
   	       was granted, fcntl was denied
     'flock' - no NLM calls for flock(), NLM call was seen for fcntl(),
   	       granted
     'posix' - NLM call was seen for flock() - granted, no NLM call for fcntl()
     'all'   - no NLM calls were seen during both flock() and fcntl()

   - No bugs were seen during NFSv4 locking/unlocking in general and NFSv4
     reboot recovery.

Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  6 +++--
 fs/nfs/file.c             | 45 +++++++++++++++++++++++++-----------
 fs/nfs/super.c            | 59 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_mount.h |  3 +++
 4 files changed, 97 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4e7df2adb212..5f01f42b3991 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -635,7 +635,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
  */
 static void nfs_destroy_server(struct nfs_server *server)
 {
-	if (!(server->flags & NFS_MOUNT_NONLM))
+	if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
+			!(server->flags & NFS_MOUNT_LOCAL_FCNTL))
 		nlmclnt_done(server->nlm_host);
 }
 
@@ -657,7 +658,8 @@ static int nfs_start_lockd(struct nfs_server *server)
 
 	if (nlm_init.nfs_version > 3)
 		return 0;
-	if (server->flags & NFS_MOUNT_NONLM)
+	if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
+			(server->flags & NFS_MOUNT_LOCAL_FCNTL))
 		return 0;
 
 	switch (clp->cl_proto) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eb51bd6201da..59cbe1ba0511 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -684,7 +684,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
 	return ret;
 }
 
-static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
@@ -699,7 +700,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 	if (nfs_have_delegation(inode, FMODE_READ))
 		goto out_noconflict;
 
-	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+	if (is_local)
 		goto out_noconflict;
 
 	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -730,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 	return res;
 }
 
-static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status;
@@ -745,15 +747,19 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * 	If we're signalled while cleaning up locks on process exit, we
 	 * 	still need to complete the unlock.
 	 */
-	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+	/*
+	 * Use local locking if mounted with "-onolock" or with appropriate
+	 * "-olocal_lock="
+	 */
+	if (!is_local)
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
 	return status;
 }
 
-static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status;
@@ -766,8 +772,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	if (status != 0)
 		goto out;
 
-	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+	/*
+	 * Use local locking if mounted with "-onolock" or with appropriate
+	 * "-olocal_lock="
+	 */
+	if (!is_local)
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 	else
 		status = do_vfs_lock(filp, fl);
@@ -791,6 +800,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int ret = -ENOLCK;
+	int is_local = 0;
 
 	dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
@@ -804,6 +814,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
 		goto out_err;
 
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
+		is_local = 1;
+
 	if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
 		ret = NFS_PROTO(inode)->lock_check_bounds(fl);
 		if (ret < 0)
@@ -811,11 +824,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	}
 
 	if (IS_GETLK(cmd))
-		ret = do_getlk(filp, cmd, fl);
+		ret = do_getlk(filp, cmd, fl, is_local);
 	else if (fl->fl_type == F_UNLCK)
-		ret = do_unlk(filp, cmd, fl);
+		ret = do_unlk(filp, cmd, fl, is_local);
 	else
-		ret = do_setlk(filp, cmd, fl);
+		ret = do_setlk(filp, cmd, fl, is_local);
 out_err:
 	return ret;
 }
@@ -825,6 +838,9 @@ out_err:
  */
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
+	struct inode *inode = filp->f_mapping->host;
+	int is_local = 0;
+
 	dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
 			filp->f_path.dentry->d_name.name,
@@ -833,14 +849,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
 
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
+		is_local = 1;
+
 	/* We're simulating flock() locks using posix locks on the server */
 	fl->fl_owner = (fl_owner_t)filp;
 	fl->fl_start = 0;
 	fl->fl_end = OFFSET_MAX;
 
 	if (fl->fl_type == F_UNLCK)
-		return do_unlk(filp, cmd, fl);
-	return do_setlk(filp, cmd, fl);
+		return do_unlk(filp, cmd, fl, is_local);
+	return do_setlk(filp, cmd, fl, is_local);
 }
 
 /*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ec3966e4706b..2e866d86c220 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -100,6 +100,7 @@ enum {
 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
 	Opt_lookupcache,
 	Opt_fscache_uniq,
+	Opt_local_lock,
 
 	/* Special mount options */
 	Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
 
 	{ Opt_lookupcache, "lookupcache=%s" },
 	{ Opt_fscache_uniq, "fsc=%s" },
+	{ Opt_local_lock, "local_lock=%s" },
 
 	{ Opt_err, NULL }
 };
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = {
 	{ Opt_lookupcache_err, NULL }
 };
 
+enum {
+	Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
+	Opt_local_lock_none,
+
+	Opt_local_lock_err
+};
+
+static match_table_t nfs_local_lock_tokens = {
+	{ Opt_local_lock_all, "all" },
+	{ Opt_local_lock_flock, "flock" },
+	{ Opt_local_lock_posix, "posix" },
+	{ Opt_local_lock_none, "none" },
+
+	{ Opt_local_lock_err, NULL }
+};
+
 
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
@@ -1009,9 +1027,13 @@ static int nfs_parse_mount_options(char *raw,
 			break;
 		case Opt_lock:
 			mnt->flags &= ~NFS_MOUNT_NONLM;
+			mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+					NFS_MOUNT_LOCAL_FCNTL);
 			break;
 		case Opt_nolock:
 			mnt->flags |= NFS_MOUNT_NONLM;
+			mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+				       NFS_MOUNT_LOCAL_FCNTL);
 			break;
 		case Opt_v2:
 			mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1412,6 +1434,34 @@ static int nfs_parse_mount_options(char *raw,
 			mnt->fscache_uniq = string;
 			mnt->options |= NFS_OPTION_FSCACHE;
 			break;
+		case Opt_local_lock:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string, nfs_local_lock_tokens,
+					args);
+			kfree(string);
+			switch (token) {
+			case Opt_local_lock_all:
+				mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+					       NFS_MOUNT_LOCAL_FCNTL);
+				break;
+			case Opt_local_lock_flock:
+				mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
+				break;
+			case Opt_local_lock_posix:
+				mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
+				break;
+			case Opt_local_lock_none:
+				mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+						NFS_MOUNT_LOCAL_FCNTL);
+				break;
+			default:
+				dfprintk(MOUNT, "NFS:	invalid	"
+						"local_lock argument\n");
+				return 0;
+			};
+			break;
 
 		/*
 		 * Special options
@@ -1817,6 +1867,12 @@ static int nfs_validate_mount_data(void *options,
 		if (!args->nfs_server.hostname)
 			goto out_nomem;
 
+		if (!(data->flags & NFS_MOUNT_NONLM))
+			args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+					 NFS_MOUNT_LOCAL_FCNTL);
+		else
+			args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+					NFS_MOUNT_LOCAL_FCNTL);
 		/*
 		 * The legacy version 6 binary mount data from userspace has a
 		 * field used only to transport selinux information into the
@@ -2433,7 +2489,8 @@ static void nfs4_fill_super(struct super_block *sb)
 
 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
 {
-	args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
+	args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
+			 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
 }
 
 static int nfs4_validate_text_mount_data(void *options,
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 5d59ae861aa6..576bddd72e04 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -71,4 +71,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_NORESVPORT		0x40000
 #define NFS_MOUNT_LEGACY_INTERFACE	0x80000
 
+#define NFS_MOUNT_LOCAL_FLOCK	0x100000
+#define NFS_MOUNT_LOCAL_FCNTL	0x200000
+
 #endif
-- 
cgit v1.2.3


From 4c894f47bb49284008073d351c0ddaac8860864e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 23 Sep 2010 15:15:19 +0200
Subject: x86/amd-iommu: Work around S3 BIOS bug

This patch adds a workaround for an IOMMU BIOS problem to
the AMD IOMMU driver. The result of the bug is that the
IOMMU does not execute commands anymore when the system
comes out of the S3 state resulting in system failure. The
bug in the BIOS is that is does not restore certain hardware
specific registers correctly. This workaround reads out the
contents of these registers at boot time and restores them
on resume from S3. The workaround is limited to the specific
IOMMU chipset where this problem occurs.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_proto.h |  6 ++++++
 arch/x86/include/asm/amd_iommu_types.h |  9 +++++++++
 arch/x86/kernel/amd_iommu_init.c       | 18 ++++++++++++++++++
 include/linux/pci_ids.h                |  3 +++
 4 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index d2544f1d705d..cb030374b90a 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { }
 
 #endif /* !CONFIG_AMD_IOMMU_STATS */
 
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+	return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ef2d5cd7d7e7..08616180deaf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -414,6 +414,15 @@ struct amd_iommu {
 
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
+
+	/*
+	 * This array is required to work around a potential BIOS bug.
+	 * The BIOS may miss to restore parts of the PCI configuration
+	 * space when the system resumes from S3. The result is that the
+	 * IOMMU does not execute commands anymore which leads to system
+	 * failure.
+	 */
+	u32 cache_cfg[4];
 };
 
 /*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 85e9817ead43..5a170cbbbed8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
+
+	if (is_rd890_iommu(iommu->dev)) {
+		pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
+		pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
+		pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
+		pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
+	}
 }
 
 /*
@@ -1120,6 +1127,16 @@ static void iommu_init_flags(struct amd_iommu *iommu)
 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
 }
 
+static void iommu_apply_quirks(struct amd_iommu *iommu)
+{
+	if (is_rd890_iommu(iommu->dev)) {
+		pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
+		pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
+		pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
+		pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
+	}
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -1130,6 +1147,7 @@ static void enable_iommus(void)
 
 	for_each_iommu(iommu) {
 		iommu_disable(iommu);
+		iommu_apply_quirks(iommu);
 		iommu_init_flags(iommu);
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 10d33309e9a6..570fddeb0388 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -393,6 +393,9 @@
 #define PCI_DEVICE_ID_VLSI_82C147	0x0105
 #define PCI_DEVICE_ID_VLSI_VAS96011	0x0702
 
+/* AMD RD890 Chipset */
+#define PCI_DEVICE_ID_RD890_IOMMU	0x5a23
+
 #define PCI_VENDOR_ID_ADL		0x1005
 #define PCI_DEVICE_ID_ADL_2301		0x2301
 
-- 
cgit v1.2.3


From b3a084b9b684622b149e8dcf03855bf0d5fb588b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Sep 2010 08:38:44 +0200
Subject: rcu: rcu_read_lock_bh_held(): disabling irqs also disables bh

rcu_dereference_bh() doesnt know yet about hard irq being disabled, so
lockdep can trigger in netpoll_rx() after commit f0f9deae9e7c4 (netpoll:
Disable IRQ around RCU dereference in netpoll_rx)

Reported-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a2585d..83af1f8d8b74 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * Makes rcu_dereference_check() do the dirty work.
  */
 #define rcu_dereference_bh(p) \
-		rcu_dereference_check(p, rcu_read_lock_bh_held())
+		rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled())
 
 /**
  * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
-- 
cgit v1.2.3


From 53ecfba259f54b6967a35d19f4a564e3bc07997f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Sep 2010 17:24:21 -0700
Subject: rcu: only one evaluation of arg in rcu_dereference_check() unless
 sparse

The current version of the __rcu_access_pointer(), __rcu_dereference_check(),
and __rcu_dereference_protected() macros evaluate their "p" argument
three times, not counting typeof()s.  This is bad news if that argument
contains a side effect.  This commit therefore evaluates this argument
only once in normal kernel builds.  However, the straightforward approach
defeats sparse's RCU-pointer checking, so when __CHECKER__ is defined,
the additional pair of evaluations of the "p" argument are performed in
order to permit sparse to detect misuse of RCU-protected pointers.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/rcupdate.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 89414d67d961..03cda7bed985 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -310,24 +310,32 @@ extern int rcu_my_thread_group_empty(void);
  * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
  * the future.
  */
+
+#ifdef __CHECKER__
+#define rcu_dereference_sparse(p, space) \
+	((void)(((typeof(*p) space *)p) == p))
+#else /* #ifdef __CHECKER__ */
+#define rcu_dereference_sparse(p, space)
+#endif /* #else #ifdef __CHECKER__ */
+
 #define __rcu_access_pointer(p, space) \
 	({ \
 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
-		(void) (((typeof (*p) space *)p) == p); \
+		rcu_dereference_sparse(p, space); \
 		((typeof(*p) __force __kernel *)(_________p1)); \
 	})
 #define __rcu_dereference_check(p, c, space) \
 	({ \
 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
 		rcu_lockdep_assert(c); \
-		(void) (((typeof (*p) space *)p) == p); \
+		rcu_dereference_sparse(p, space); \
 		smp_read_barrier_depends(); \
 		((typeof(*p) __force __kernel *)(_________p1)); \
 	})
 #define __rcu_dereference_protected(p, c, space) \
 	({ \
 		rcu_lockdep_assert(c); \
-		(void) (((typeof (*p) space *)p) == p); \
+		rcu_dereference_sparse(p, space); \
 		((typeof(*p) __force __kernel *)(p)); \
 	})
 
-- 
cgit v1.2.3


From d1ea13c6e2cce0106531852daaa93dd97aec9580 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2010 18:40:07 +0200
Subject: genirq: Cleanup irq_chip->typename leftovers

3 years transition phase is enough. Cleanup the last users and remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Leo Chen <leochen@broadcom.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Chris Zankel <chris@zankel.net>
---
 arch/arm/mach-bcmring/irq.c          | 6 +++---
 arch/m32r/kernel/irq.c               | 2 +-
 arch/m32r/platforms/m32104ut/setup.c | 2 +-
 arch/m32r/platforms/m32700ut/setup.c | 8 ++++----
 arch/m32r/platforms/mappi/setup.c    | 2 +-
 arch/m32r/platforms/mappi2/setup.c   | 2 +-
 arch/m32r/platforms/mappi3/setup.c   | 2 +-
 arch/m32r/platforms/oaks32r/setup.c  | 2 +-
 arch/m32r/platforms/opsput/setup.c   | 6 +++---
 arch/m32r/platforms/usrv/setup.c     | 4 ++--
 arch/tile/kernel/irq.c               | 4 ++--
 arch/um/kernel/irq.c                 | 6 +++---
 arch/xtensa/kernel/irq.c             | 2 +-
 include/linux/irq.h                  | 6 ------
 kernel/irq/chip.c                    | 2 --
 15 files changed, 24 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-bcmring/irq.c b/arch/arm/mach-bcmring/irq.c
index dc1c4939b0ce..e3152631eb37 100644
--- a/arch/arm/mach-bcmring/irq.c
+++ b/arch/arm/mach-bcmring/irq.c
@@ -67,21 +67,21 @@ static void bcmring_unmask_irq2(unsigned int irq)
 }
 
 static struct irq_chip bcmring_irq0_chip = {
-	.typename = "ARM-INTC0",
+	.name = "ARM-INTC0",
 	.ack = bcmring_mask_irq0,
 	.mask = bcmring_mask_irq0,	/* mask a specific interrupt, blocking its delivery. */
 	.unmask = bcmring_unmask_irq0,	/* unmaks an interrupt */
 };
 
 static struct irq_chip bcmring_irq1_chip = {
-	.typename = "ARM-INTC1",
+	.name = "ARM-INTC1",
 	.ack = bcmring_mask_irq1,
 	.mask = bcmring_mask_irq1,
 	.unmask = bcmring_unmask_irq1,
 };
 
 static struct irq_chip bcmring_irq2_chip = {
-	.typename = "ARM-SINTC",
+	.name = "ARM-SINTC",
 	.ack = bcmring_mask_irq2,
 	.mask = bcmring_mask_irq2,
 	.unmask = bcmring_unmask_irq2,
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 3c71f776872c..7db26f1f082d 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c
@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
diff --git a/arch/m32r/platforms/m32104ut/setup.c b/arch/m32r/platforms/m32104ut/setup.c
index 922fdfdadeaa..402a59d7219b 100644
--- a/arch/m32r/platforms/m32104ut/setup.c
+++ b/arch/m32r/platforms/m32104ut/setup.c
@@ -65,7 +65,7 @@ static void shutdown_m32104ut_irq(unsigned int irq)
 
 static struct irq_chip m32104ut_irq_type =
 {
-	.typename = "M32104UT-IRQ",
+	.name = "M32104UT-IRQ",
 	.startup = startup_m32104ut_irq,
 	.shutdown = shutdown_m32104ut_irq,
 	.enable = enable_m32104ut_irq,
diff --git a/arch/m32r/platforms/m32700ut/setup.c b/arch/m32r/platforms/m32700ut/setup.c
index 9c1bc7487c1e..80b1a026795a 100644
--- a/arch/m32r/platforms/m32700ut/setup.c
+++ b/arch/m32r/platforms/m32700ut/setup.c
@@ -71,7 +71,7 @@ static void shutdown_m32700ut_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_irq_type =
 {
-	.typename = "M32700UT-IRQ",
+	.name = "M32700UT-IRQ",
 	.startup = startup_m32700ut_irq,
 	.shutdown = shutdown_m32700ut_irq,
 	.enable = enable_m32700ut_irq,
@@ -148,7 +148,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "M32700UT-PLD-IRQ",
+	.name = "M32700UT-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,
@@ -217,7 +217,7 @@ static void shutdown_m32700ut_lanpld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_lanpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LAN-IRQ",
+	.name = "M32700UT-PLD-LAN-IRQ",
 	.startup = startup_m32700ut_lanpld_irq,
 	.shutdown = shutdown_m32700ut_lanpld_irq,
 	.enable = enable_m32700ut_lanpld_irq,
@@ -286,7 +286,7 @@ static void shutdown_m32700ut_lcdpld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_lcdpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LCD-IRQ",
+	.name = "M32700UT-PLD-LCD-IRQ",
 	.startup = startup_m32700ut_lcdpld_irq,
 	.shutdown = shutdown_m32700ut_lcdpld_irq,
 	.enable = enable_m32700ut_lcdpld_irq,
diff --git a/arch/m32r/platforms/mappi/setup.c b/arch/m32r/platforms/mappi/setup.c
index fb4b17799b66..ea00c84d6b1b 100644
--- a/arch/m32r/platforms/mappi/setup.c
+++ b/arch/m32r/platforms/mappi/setup.c
@@ -65,7 +65,7 @@ static void shutdown_mappi_irq(unsigned int irq)
 
 static struct irq_chip mappi_irq_type =
 {
-	.typename = "MAPPI-IRQ",
+	.name = "MAPPI-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,
diff --git a/arch/m32r/platforms/mappi2/setup.c b/arch/m32r/platforms/mappi2/setup.c
index 6a65eda0a056..c049376d0270 100644
--- a/arch/m32r/platforms/mappi2/setup.c
+++ b/arch/m32r/platforms/mappi2/setup.c
@@ -72,7 +72,7 @@ static void shutdown_mappi2_irq(unsigned int irq)
 
 static struct irq_chip mappi2_irq_type =
 {
-	.typename = "MAPPI2-IRQ",
+	.name = "MAPPI2-IRQ",
 	.startup = startup_mappi2_irq,
 	.shutdown = shutdown_mappi2_irq,
 	.enable = enable_mappi2_irq,
diff --git a/arch/m32r/platforms/mappi3/setup.c b/arch/m32r/platforms/mappi3/setup.c
index 9c337aeac94b..882de25c6e8c 100644
--- a/arch/m32r/platforms/mappi3/setup.c
+++ b/arch/m32r/platforms/mappi3/setup.c
@@ -72,7 +72,7 @@ static void shutdown_mappi3_irq(unsigned int irq)
 
 static struct irq_chip mappi3_irq_type =
 {
-	.typename = "MAPPI3-IRQ",
+	.name = "MAPPI3-IRQ",
 	.startup = startup_mappi3_irq,
 	.shutdown = shutdown_mappi3_irq,
 	.enable = enable_mappi3_irq,
diff --git a/arch/m32r/platforms/oaks32r/setup.c b/arch/m32r/platforms/oaks32r/setup.c
index ed865741c38d..d11d93bf74f5 100644
--- a/arch/m32r/platforms/oaks32r/setup.c
+++ b/arch/m32r/platforms/oaks32r/setup.c
@@ -63,7 +63,7 @@ static void shutdown_oaks32r_irq(unsigned int irq)
 
 static struct irq_chip oaks32r_irq_type =
 {
-	.typename = "OAKS32R-IRQ",
+	.name = "OAKS32R-IRQ",
 	.startup = startup_oaks32r_irq,
 	.shutdown = shutdown_oaks32r_irq,
 	.enable = enable_oaks32r_irq,
diff --git a/arch/m32r/platforms/opsput/setup.c b/arch/m32r/platforms/opsput/setup.c
index 80d680657019..5f3402a2fbaf 100644
--- a/arch/m32r/platforms/opsput/setup.c
+++ b/arch/m32r/platforms/opsput/setup.c
@@ -72,7 +72,7 @@ static void shutdown_opsput_irq(unsigned int irq)
 
 static struct irq_chip opsput_irq_type =
 {
-	.typename = "OPSPUT-IRQ",
+	.name = "OPSPUT-IRQ",
 	.startup = startup_opsput_irq,
 	.shutdown = shutdown_opsput_irq,
 	.enable = enable_opsput_irq,
@@ -149,7 +149,7 @@ static void shutdown_opsput_pld_irq(unsigned int irq)
 
 static struct irq_chip opsput_pld_irq_type =
 {
-	.typename = "OPSPUT-PLD-IRQ",
+	.name = "OPSPUT-PLD-IRQ",
 	.startup = startup_opsput_pld_irq,
 	.shutdown = shutdown_opsput_pld_irq,
 	.enable = enable_opsput_pld_irq,
@@ -218,7 +218,7 @@ static void shutdown_opsput_lanpld_irq(unsigned int irq)
 
 static struct irq_chip opsput_lanpld_irq_type =
 {
-	.typename = "OPSPUT-PLD-LAN-IRQ",
+	.name = "OPSPUT-PLD-LAN-IRQ",
 	.startup = startup_opsput_lanpld_irq,
 	.shutdown = shutdown_opsput_lanpld_irq,
 	.enable = enable_opsput_lanpld_irq,
diff --git a/arch/m32r/platforms/usrv/setup.c b/arch/m32r/platforms/usrv/setup.c
index 757302660af8..1beac7a51ed4 100644
--- a/arch/m32r/platforms/usrv/setup.c
+++ b/arch/m32r/platforms/usrv/setup.c
@@ -63,7 +63,7 @@ static void shutdown_mappi_irq(unsigned int irq)
 
 static struct irq_chip mappi_irq_type =
 {
-	.typename = "M32700-IRQ",
+	.name = "M32700-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,
@@ -136,7 +136,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "USRV-PLD-IRQ",
+	.name = "USRV-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 596c60086930..9a27d563fc30 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq)
 }
 
 static struct irq_chip tile_irq_chip = {
-	.typename = "tile_irq_chip",
+	.name = "tile_irq_chip",
 	.ack = tile_irq_chip_ack,
 	.eoi = tile_irq_chip_eoi,
 	.mask = tile_irq_chip_mask,
@@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action = action->next; action; action = action->next)
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index a3f0b04d7101..a746e3037a5b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -46,7 +46,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
@@ -369,7 +369,7 @@ static void dummy(unsigned int irq)
 
 /* This is used for everything else than the timer. */
 static struct irq_chip normal_irq_type = {
-	.typename = "SIGIO",
+	.name = "SIGIO",
 	.release = free_irq_by_irq_and_dev,
 	.disable = dummy,
 	.enable = dummy,
@@ -378,7 +378,7 @@ static struct irq_chip normal_irq_type = {
 };
 
 static struct irq_chip SIGVTALRM_irq_type = {
-	.typename = "SIGVTALRM",
+	.name = "SIGVTALRM",
 	.release = free_irq_by_irq_and_dev,
 	.shutdown = dummy, /* never called */
 	.disable = dummy,
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index c64a5d387de5..87508886cbbd 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -92,7 +92,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c03243ad84b4..06273a2a17e7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -106,7 +106,6 @@ struct msi_desc;
  * @bus_sync_unlock:	function to sync and unlock slow bus (i2c) chips
  *
  * @release:		release function solely used by UML
- * @typename:		obsoleted by name, kept as migration helper
  */
 struct irq_chip {
 	const char	*name;
@@ -135,11 +134,6 @@ struct irq_chip {
 #ifdef CONFIG_IRQ_RELEASE_METHOD
 	void		(*release)(unsigned int irq, void *dev_id);
 #endif
-	/*
-	 * For compatibility, ->typename is copied into ->name.
-	 * Will disappear.
-	 */
-	const char	*typename;
 };
 
 struct timer_rand_state;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b7091d5ca2f8..4ea775cc60f0 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -344,8 +344,6 @@ void irq_chip_set_defaults(struct irq_chip *chip)
 	if (!chip->shutdown)
 		chip->shutdown = chip->disable != default_disable ?
 			chip->disable : default_shutdown;
-	if (!chip->name)
-		chip->name = chip->typename;
 	if (!chip->end)
 		chip->end = dummy_irq_chip.end;
 }
-- 
cgit v1.2.3


From a02cec2155fbea457eca8881870fd2de1a4c4c76 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Sep 2010 20:43:57 +0000
Subject: net: return operator cleanup

Change "return (EXPR);" to "return EXPR;"

return is not a function, parentheses are not required.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h                  |  2 +-
 include/linux/etherdevice.h             |  4 +--
 include/linux/netdevice.h               |  2 +-
 include/linux/skbuff.h                  |  6 ++---
 include/net/bluetooth/hci_core.h        |  2 +-
 include/net/bluetooth/l2cap.h           |  2 +-
 include/net/inet_ecn.h                  |  2 +-
 include/net/ip.h                        |  4 +--
 include/net/ipv6.h                      | 35 +++++++++++++-------------
 include/net/irda/irlap.h                |  2 +-
 include/net/irda/irlmp.h                |  2 +-
 include/net/irda/irttp.h                |  2 +-
 include/net/sch_generic.h               |  2 +-
 include/net/sctp/sctp.h                 | 12 ++++-----
 include/net/sctp/sm.h                   | 10 ++++----
 include/net/sctp/structs.h              |  2 +-
 include/net/sctp/tsnmap.h               |  2 +-
 include/net/tipc/tipc_msg.h             | 10 ++++----
 net/802/fc.c                            |  2 +-
 net/802/fddi.c                          | 12 ++++-----
 net/802/hippi.c                         |  2 +-
 net/802/tr.c                            |  2 +-
 net/8021q/vlan_core.c                   |  2 +-
 net/9p/client.c                         |  4 +--
 net/bluetooth/rfcomm/core.c             |  4 +--
 net/core/flow.c                         |  4 +--
 net/core/neighbour.c                    |  6 ++---
 net/core/utils.c                        |  2 +-
 net/dccp/ccids/lib/loss_interval.c      |  2 +-
 net/econet/af_econet.c                  |  4 +--
 net/ethernet/eth.c                      |  2 +-
 net/ipv4/arp.c                          |  2 +-
 net/ipv4/datagram.c                     |  2 +-
 net/ipv4/inet_diag.c                    |  2 +-
 net/ipv4/ip_fragment.c                  |  4 +--
 net/ipv4/ip_gre.c                       |  2 +-
 net/ipv4/netfilter/arp_tables.c         |  2 +-
 net/ipv4/route.c                        |  2 +-
 net/ipv4/tcp_input.c                    | 10 ++++----
 net/ipv4/tcp_minisocks.c                |  2 +-
 net/ipv4/tcp_output.c                   |  8 +++---
 net/ipv4/tcp_westwood.c                 |  2 +-
 net/ipv6/addrconf.c                     |  2 +-
 net/ipv6/addrlabel.c                    |  5 ++--
 net/ipv6/af_inet6.c                     |  6 ++---
 net/ipv6/exthdrs_core.c                 |  4 +--
 net/ipv6/ip6_output.c                   |  4 +--
 net/ipv6/ndisc.c                        |  8 +++---
 net/ipv6/netfilter/ip6_tables.c         | 14 +++++------
 net/ipv6/raw.c                          | 12 ++++-----
 net/ipv6/route.c                        | 14 +++++------
 net/ipv6/tcp_ipv6.c                     |  2 +-
 net/ipv6/xfrm6_policy.c                 |  2 +-
 net/irda/af_irda.c                      | 14 +++++------
 net/irda/discovery.c                    |  2 +-
 net/irda/ircomm/ircomm_tty.c            |  4 +--
 net/irda/irlmp.c                        |  2 +-
 net/irda/irlmp_frame.c                  |  2 +-
 net/irda/irnet/irnet_irda.c             | 22 ++++++++---------
 net/irda/irnet/irnet_ppp.c              |  8 +++---
 net/key/af_key.c                        |  4 +--
 net/mac80211/rate.c                     |  2 +-
 net/rfkill/input.c                      |  2 +-
 net/rose/rose_link.c                    |  4 +--
 net/sctp/protocol.c                     |  2 +-
 net/sctp/socket.c                       |  6 ++---
 net/sunrpc/auth_gss/auth_gss.c          |  2 +-
 net/sunrpc/auth_gss/gss_generic_token.c | 44 ++++++++++++++++-----------------
 net/sunrpc/auth_gss/gss_krb5_seqnum.c   |  2 +-
 net/sunrpc/auth_gss/gss_mech_switch.c   |  2 +-
 net/sunrpc/sched.c                      |  2 +-
 net/tipc/addr.c                         |  2 +-
 net/tipc/bcast.c                        |  2 +-
 net/tipc/bearer.c                       |  2 +-
 net/tipc/dbg.c                          |  4 +--
 net/tipc/link.c                         |  6 ++---
 net/tipc/link.h                         | 16 ++++++------
 net/tipc/msg.h                          |  6 ++---
 net/tipc/name_table.c                   |  2 +-
 net/tipc/node.c                         |  6 ++---
 net/tipc/port.h                         |  2 +-
 net/tipc/socket.c                       |  2 +-
 net/tipc/subscr.c                       |  2 +-
 net/wireless/core.h                     |  2 +-
 84 files changed, 220 insertions(+), 222 deletions(-)

(limited to 'include')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index f6481daf6e52..a8e4e832cdbb 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -449,7 +449,7 @@ void vcc_insert_socket(struct sock *sk);
 
 static inline int atm_guess_pdu2truesize(int size)
 {
-	return (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info));
+	return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
 }
 
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index fb6aa6070921..f16a01081e15 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -71,7 +71,7 @@ static inline int is_zero_ether_addr(const u8 *addr)
  */
 static inline int is_multicast_ether_addr(const u8 *addr)
 {
-	return (0x01 & addr[0]);
+	return 0x01 & addr[0];
 }
 
 /**
@@ -82,7 +82,7 @@ static inline int is_multicast_ether_addr(const u8 *addr)
  */
 static inline int is_local_ether_addr(const u8 *addr)
 {
-	return (0x02 & addr[0]);
+	return 0x02 & addr[0];
 }
 
 /**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f7f1302138af..45dcda5bfda9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1676,7 +1676,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
  */
 static inline int netif_is_multiqueue(const struct net_device *dev)
 {
-	return (dev->num_tx_queues > 1);
+	return dev->num_tx_queues > 1;
 }
 
 extern void netif_set_real_num_tx_queues(struct net_device *dev,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9e8085a89589..b2c41d19735c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -601,7 +601,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list)
 static inline bool skb_queue_is_last(const struct sk_buff_head *list,
 				     const struct sk_buff *skb)
 {
-	return (skb->next == (struct sk_buff *) list);
+	return skb->next == (struct sk_buff *)list;
 }
 
 /**
@@ -614,7 +614,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list,
 static inline bool skb_queue_is_first(const struct sk_buff_head *list,
 				      const struct sk_buff *skb)
 {
-	return (skb->prev == (struct sk_buff *) list);
+	return skb->prev == (struct sk_buff *)list;
 }
 
 /**
@@ -2156,7 +2156,7 @@ static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
 
 static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 {
-	return (skb->queue_mapping != 0);
+	return skb->queue_mapping != 0;
 }
 
 extern u16 skb_tx_hash(const struct net_device *dev,
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4568b938ca35..ebec8c9a929d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -233,7 +233,7 @@ static inline void inquiry_cache_init(struct hci_dev *hdev)
 static inline int inquiry_cache_empty(struct hci_dev *hdev)
 {
 	struct inquiry_cache *c = &hdev->inq_cache;
-	return (c->list == NULL);
+	return c->list == NULL;
 }
 
 static inline long inquiry_cache_age(struct hci_dev *hdev)
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 6c241444f902..c819c8bf9b68 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -414,7 +414,7 @@ static inline int l2cap_tx_window_full(struct sock *sk)
 	if (sub < 0)
 		sub += 64;
 
-	return (sub == pi->remote_tx_win);
+	return sub == pi->remote_tx_win;
 }
 
 #define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 9b5d08f4f6e8..88bdd010d65d 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -27,7 +27,7 @@ static inline int INET_ECN_is_not_ect(__u8 dsfield)
 
 static inline int INET_ECN_is_capable(__u8 dsfield)
 {
-	return (dsfield & INET_ECN_ECT_0);
+	return dsfield & INET_ECN_ECT_0;
 }
 
 static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
diff --git a/include/net/ip.h b/include/net/ip.h
index 7691aca133db..dbee3fe260e1 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -238,9 +238,9 @@ int ip_decrease_ttl(struct iphdr *iph)
 static inline
 int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 {
-	return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO ||
+	return  inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO ||
 		(inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT &&
-		 !(dst_metric_locked(dst, RTAX_MTU))));
+		 !(dst_metric_locked(dst, RTAX_MTU)));
 }
 
 extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1f8412410998..4a3cd2cd2f5e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -262,7 +262,7 @@ static inline int ipv6_addr_scope(const struct in6_addr *addr)
 
 static inline int __ipv6_addr_src_scope(int type)
 {
-	return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16));
+	return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16);
 }
 
 static inline int ipv6_addr_src_scope(const struct in6_addr *addr)
@@ -279,10 +279,10 @@ static inline int
 ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 		     const struct in6_addr *a2)
 {
-	return (!!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) |
-		   ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) |
-		   ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) |
-		   ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])));
+	return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) |
+		  ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) |
+		  ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) |
+		  ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]));
 }
 
 static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2)
@@ -317,10 +317,10 @@ static inline void ipv6_addr_set(struct in6_addr *addr,
 static inline int ipv6_addr_equal(const struct in6_addr *a1,
 				  const struct in6_addr *a2)
 {
-	return (((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
-		 (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
-		 (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
-		 (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0);
+	return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+		(a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+		(a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+		(a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
 }
 
 static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2,
@@ -373,20 +373,20 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a);
 
 static inline int ipv6_addr_any(const struct in6_addr *a)
 {
-	return ((a->s6_addr32[0] | a->s6_addr32[1] | 
-		 a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 
+	return (a->s6_addr32[0] | a->s6_addr32[1] |
+		a->s6_addr32[2] | a->s6_addr32[3]) == 0;
 }
 
 static inline int ipv6_addr_loopback(const struct in6_addr *a)
 {
-	return ((a->s6_addr32[0] | a->s6_addr32[1] |
-		 a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
+	return (a->s6_addr32[0] | a->s6_addr32[1] |
+		a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0;
 }
 
 static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
 {
-	return ((a->s6_addr32[0] | a->s6_addr32[1] |
-		 (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0);
+	return (a->s6_addr32[0] | a->s6_addr32[1] |
+		 (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0;
 }
 
 /*
@@ -395,8 +395,7 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
  */
 static inline int ipv6_addr_orchid(const struct in6_addr *a)
 {
-	return ((a->s6_addr32[0] & htonl(0xfffffff0))
-		== htonl(0x20010010));
+	return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010);
 }
 
 static inline void ipv6_addr_set_v4mapped(const __be32 addr,
@@ -441,7 +440,7 @@ static inline int __ipv6_addr_diff(const void *token1, const void *token2, int a
 	 *	if returned value is greater than prefix length.
 	 *					--ANK (980803)
 	 */
-	return (addrlen << 5);
+	return addrlen << 5;
 }
 
 static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h
index 9d0c78ea92f5..17fcd964f9d9 100644
--- a/include/net/irda/irlap.h
+++ b/include/net/irda/irlap.h
@@ -282,7 +282,7 @@ static inline int irlap_is_primary(struct irlap_cb *self)
 	default:
 		ret = -1;
 	}
-	return(ret);
+	return ret;
 }
 
 /* Clear a pending IrLAP disconnect. - Jean II */
diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h
index 3ffc1d0f93d6..fff11b7fe8a4 100644
--- a/include/net/irda/irlmp.h
+++ b/include/net/irda/irlmp.h
@@ -274,7 +274,7 @@ static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self)
 	if (self->lap->irlap == NULL)
 		return 0;
 
-	return(IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD);
+	return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD;
 }
 
 /* After doing a irlmp_dup(), this get one of the two socket back into
diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h
index 11aee7a2972a..af4b87721d13 100644
--- a/include/net/irda/irttp.h
+++ b/include/net/irda/irttp.h
@@ -204,7 +204,7 @@ static inline int irttp_is_primary(struct tsap_cb *self)
 	    (self->lsap->lap == NULL) ||
 	    (self->lsap->lap->irlap == NULL))
 		return -2;
-	return(irlap_is_primary(self->lsap->lap->irlap));
+	return irlap_is_primary(self->lsap->lap->irlap);
 }
 
 #endif /* IRTTP_H */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3c8728aaab4e..eda8808fdacd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -601,7 +601,7 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 		slot = 0;
 	slot >>= rtab->rate.cell_log;
 	if (slot > 255)
-		return (rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]);
+		return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF];
 	return rtab->data[slot];
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 2cb3980b1616..505845ddb0be 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -405,7 +405,7 @@ static inline void sctp_v6_del_protocol(void) { return; }
 /* Map an association to an assoc_id. */
 static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc)
 {
-	return (asoc?asoc->assoc_id:0);
+	return asoc ? asoc->assoc_id : 0;
 }
 
 /* Look up the association by its id.  */
@@ -473,7 +473,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 /* Tests if the list has one and only one entry. */
 static inline int sctp_list_single_entry(struct list_head *head)
 {
-	return ((head->next != head) && (head->next == head->prev));
+	return (head->next != head) && (head->next == head->prev);
 }
 
 /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */
@@ -631,13 +631,13 @@ static inline int sctp_sanity_check(void)
 /* This is the hash function for the SCTP port hash table. */
 static inline int sctp_phashfn(__u16 lport)
 {
-	return (lport & (sctp_port_hashsize - 1));
+	return lport & (sctp_port_hashsize - 1);
 }
 
 /* This is the hash function for the endpoint hash table. */
 static inline int sctp_ep_hashfn(__u16 lport)
 {
-	return (lport & (sctp_ep_hashsize - 1));
+	return lport & (sctp_ep_hashsize - 1);
 }
 
 /* This is the hash function for the association hash table. */
@@ -645,7 +645,7 @@ static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport)
 {
 	int h = (lport << 16) + rport;
 	h ^= h>>8;
-	return (h & (sctp_assoc_hashsize - 1));
+	return h & (sctp_assoc_hashsize - 1);
 }
 
 /* This is the hash function for the association hash table.  This is
@@ -656,7 +656,7 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag)
 {
 	int h = (lport << 16) + rport;
 	h ^= vtag;
-	return (h & (sctp_assoc_hashsize-1));
+	return h & (sctp_assoc_hashsize - 1);
 }
 
 #define sctp_for_each_hentry(epb, node, head) \
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 4088c89a9055..9352d12f02de 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -345,12 +345,12 @@ enum {
 
 static inline int TSN_lt(__u32 s, __u32 t)
 {
-	return (((s) - (t)) & TSN_SIGN_BIT);
+	return ((s) - (t)) & TSN_SIGN_BIT;
 }
 
 static inline int TSN_lte(__u32 s, __u32 t)
 {
-	return (((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT));
+	return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT);
 }
 
 /* Compare two SSNs */
@@ -369,12 +369,12 @@ enum {
 
 static inline int SSN_lt(__u16 s, __u16 t)
 {
-	return (((s) - (t)) & SSN_SIGN_BIT);
+	return ((s) - (t)) & SSN_SIGN_BIT;
 }
 
 static inline int SSN_lte(__u16 s, __u16 t)
 {
-	return (((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT));
+	return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT);
 }
 
 /*
@@ -388,7 +388,7 @@ enum {
 
 static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t)
 {
-	return (((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT));
+	return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT);
 }
 
 /* Check VTAG of the packet matches the sender's own tag. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f9e7473613bd..69fef4fb79c0 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -847,7 +847,7 @@ void sctp_packet_free(struct sctp_packet *);
 
 static inline int sctp_packet_empty(struct sctp_packet *packet)
 {
-	return (packet->size == packet->overhead);
+	return packet->size == packet->overhead;
 }
 
 /* This represents a remote transport address.
diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index 4aabc5a96cf6..e7728bc14ccf 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -157,7 +157,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map);
 /* Is there a gap in the TSN map?  */
 static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map)
 {
-	return (map->cumulative_tsn_ack_point != map->max_tsn_seen);
+	return map->cumulative_tsn_ack_point != map->max_tsn_seen;
 }
 
 /* Mark a duplicate TSN.  Note:  limit the storage of duplicate TSN
diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h
index 2e159a812f83..ffe50b4e7b93 100644
--- a/include/net/tipc/tipc_msg.h
+++ b/include/net/tipc/tipc_msg.h
@@ -107,7 +107,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m)
 
 static inline int msg_short(struct tipc_msg *m)
 {
-	return (msg_hdr_sz(m) == 24);
+	return msg_hdr_sz(m) == 24;
 }
 
 static inline u32 msg_size(struct tipc_msg *m)
@@ -117,7 +117,7 @@ static inline u32 msg_size(struct tipc_msg *m)
 
 static inline u32 msg_data_sz(struct tipc_msg *m)
 {
-	return (msg_size(m) - msg_hdr_sz(m));
+	return msg_size(m) - msg_hdr_sz(m);
 }
 
 static inline unchar *msg_data(struct tipc_msg *m)
@@ -132,17 +132,17 @@ static inline u32 msg_type(struct tipc_msg *m)
 
 static inline u32 msg_named(struct tipc_msg *m)
 {
-	return (msg_type(m) == TIPC_NAMED_MSG);
+	return msg_type(m) == TIPC_NAMED_MSG;
 }
 
 static inline u32 msg_mcast(struct tipc_msg *m)
 {
-	return (msg_type(m) == TIPC_MCAST_MSG);
+	return msg_type(m) == TIPC_MCAST_MSG;
 }
 
 static inline u32 msg_connected(struct tipc_msg *m)
 {
-	return (msg_type(m) == TIPC_CONN_MSG);
+	return msg_type(m) == TIPC_CONN_MSG;
 }
 
 static inline u32 msg_errcode(struct tipc_msg *m)
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b8..1e49f2d4ea96 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
 	if(daddr)
 	{
 		memcpy(fch->daddr,daddr,dev->addr_len);
-		return(hdr_len);
+		return hdr_len;
 	}
 	return -hdr_len;
 }
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543a..94b3ad08f39a 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
 	if (daddr != NULL)
 	{
 		memcpy(fddi->daddr, daddr, dev->addr_len);
-		return(hl);
+		return hl;
 	}
 
-	return(-hl);
+	return -hl;
 }
 
 
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff	*skb)
 	{
 		printk("%s: Don't know how to resolve type %04X addresses.\n",
 		       skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
-		return(0);
+		return 0;
 	}
 }
 
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 	/* Assume 802.2 SNAP frames, for now */
 
-	return(type);
+	return type;
 }
 
 EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
 int fddi_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
-		return(-EINVAL);
+		return -EINVAL;
 	dev->mtu = new_mtu;
-	return(0);
+	return 0;
 }
 EXPORT_SYMBOL(fddi_change_mtu);
 
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e929529..91aca8780fd0 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
 	if ((new_mtu < 68) || (new_mtu > 65280))
 		return -EINVAL;
 	dev->mtu = new_mtu;
-	return(0);
+	return 0;
 }
 EXPORT_SYMBOL(hippi_change_mtu);
 
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074df..5e20cf8a074b 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
 	{
 		memcpy(trh->daddr,daddr,dev->addr_len);
 		tr_source_route(skb, trh, dev);
-		return(hdr_len);
+		return hdr_len;
 	}
 
 	return -hdr_len;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 889f4ac4459a..0eb486d342dc 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -27,7 +27,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	else if (vlan_id)
 		goto drop;
 
-	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+	return polling ? netif_receive_skb(skb) : netif_rx(skb);
 
 drop:
 	dev_kfree_skb_any(skb);
diff --git a/net/9p/client.c b/net/9p/client.c
index dc6f2f26d023..f34b9f510818 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
 
 inline int p9_is_proto_dotl(struct p9_client *clnt)
 {
-	return (clnt->proto_version == p9_proto_2000L);
+	return clnt->proto_version == p9_proto_2000L;
 }
 EXPORT_SYMBOL(p9_is_proto_dotl);
 
 inline int p9_is_proto_dotu(struct p9_client *clnt)
 {
-	return (clnt->proto_version == p9_proto_2000u);
+	return clnt->proto_version == p9_proto_2000u;
 }
 EXPORT_SYMBOL(p9_is_proto_dotu);
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c57..15ea84ba344e 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -179,13 +179,13 @@ static unsigned char rfcomm_crc_table[256] = {
 /* FCS on 2 bytes */
 static inline u8 __fcs(u8 *data)
 {
-	return (0xff - __crc(data));
+	return 0xff - __crc(data);
 }
 
 /* FCS on 3 bytes */
 static inline u8 __fcs2(u8 *data)
 {
-	return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]);
+	return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
 }
 
 /* Check FCS */
diff --git a/net/core/flow.c b/net/core/flow.c
index b143b86b1f2a..127c8a7ffd61 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -176,8 +176,8 @@ static u32 flow_hash_code(struct flow_cache *fc,
 {
 	u32 *k = (u32 *) key;
 
-	return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1));
+	return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+		& (flow_cache_hash_size(fc) - 1);
 }
 
 typedef unsigned long flow_compare_t;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..96b1a749abb4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
 
 unsigned long neigh_rand_reach_time(unsigned long base)
 {
-	return (base ? (net_random() % base) + (base >> 1) : 0);
+	return base ? (net_random() % base) + (base >> 1) : 0;
 }
 EXPORT_SYMBOL(neigh_rand_reach_time);
 
@@ -766,9 +766,9 @@ next_elt:
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
-	return (n->nud_state & NUD_PROBE ?
+	return (n->nud_state & NUD_PROBE) ?
 		p->ucast_probes :
-		p->ucast_probes + p->app_probes + p->mcast_probes);
+		p->ucast_probes + p->app_probes + p->mcast_probes;
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
diff --git a/net/core/utils.c b/net/core/utils.c
index ec6bb322f372..5fea0ab21902 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
 				str++;
 		}
 	}
-	return(htonl(l));
+	return htonl(l);
 }
 EXPORT_SYMBOL(in_aton);
 
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf79071..497723c4d4bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
 	cur->li_length = len;
 	tfrc_lh_calc_i_mean(lh);
 
-	return (lh->i_mean < old_i_mean);
+	return lh->i_mean < old_i_mean;
 }
 
 /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index baa98fb83552..f8c1ae4b41f0 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		dev_queue_xmit(skb);
 		dev_put(dev);
 		mutex_unlock(&econet_mutex);
-		return(len);
+		return len;
 
 	out_free:
 		kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
 	eo->num = protocol;
 
 	econet_insert_socket(&econet_sklist, sk);
-	return(0);
+	return 0;
 out:
 	return err;
 }
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 85e7b4551326..f00ef2f1d814 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -387,6 +387,6 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 
 	l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
 	l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
-	return ((ssize_t) l);
+	return (ssize_t)l;
 }
 EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index dcfe7e961c10..4083c186fd30 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -567,7 +567,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
 
-	return (omi != imi && omi != -1);
+	return omi != imi && omi != -1;
 }
 
 /*
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45c..174be6caa5c8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->inet_id = jiffies;
 
 	sk_dst_set(sk, &rt->dst);
-	return(0);
+	return 0;
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce320..ba8042665849 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 			bc += op->no;
 		}
 	}
-	return (len == 0);
+	return len == 0;
 }
 
 static int valid_cc(const void *bc, int len, int cc)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f4dc879e258e..168440834ade 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
 	struct ip4_create_arg *arg = a;
 
 	qp = container_of(q, struct ipq, q);
-	return (qp->id == arg->iph->id &&
+	return	qp->id == arg->iph->id &&
 			qp->saddr == arg->iph->saddr &&
 			qp->daddr == arg->iph->daddr &&
 			qp->protocol == arg->iph->protocol &&
-			qp->user == arg->user);
+			qp->user == arg->user;
 }
 
 /* Memory Tracking Functions. */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 714b6a80361d..0967d02fefd8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -659,7 +659,7 @@ drop:
 	rcu_read_unlock();
 drop_nolock:
 	kfree_skb(skb);
-	return(0);
+	return 0;
 }
 
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f12..8b642f152468 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 	for (i = 0; i < len; i++)
 		ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
 
-	return (ret != 0);
+	return ret != 0;
 }
 
 /*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e24d48dd99d3..ae1d4a41f1c6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 
 	dst_release(&(*rp)->dst);
 	*rp = rt;
-	return (rt ? 0 : -ENOMEM);
+	return rt ? 0 : -ENOMEM;
 }
 
 int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1bc87a05c734..51966b3f9719 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2301,7 +2301,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
 
 static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 {
-	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
+	return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
 }
 
 static inline int tcp_head_timedout(struct sock *sk)
@@ -3398,8 +3398,8 @@ static void tcp_ack_probe(struct sock *sk)
 
 static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
 {
-	return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
-		inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
+	return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
+		inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
 }
 
 static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3416,9 +3416,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
 					const u32 ack, const u32 ack_seq,
 					const u32 nwin)
 {
-	return (after(ack, tp->snd_una) ||
+	return	after(ack, tp->snd_una) ||
 		after(ack_seq, tp->snd_wl1) ||
-		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
+		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
 }
 
 /* Update our send window.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f25b56cb85cb..43cf901d7659 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 		return 1;
 	if (after(end_seq, s_win) && before(seq, e_win))
 		return 1;
-	return (seq == e_win && seq == end_seq);
+	return seq == e_win && seq == end_seq;
 }
 
 /*
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ea09d2fd50c7..05b1ecf36763 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1370,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
 				  const struct sk_buff *skb,
 				  unsigned mss_now, int nonagle)
 {
-	return (skb->len < mss_now &&
+	return skb->len < mss_now &&
 		((nonagle & TCP_NAGLE_CORK) ||
-		 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
+		 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
 }
 
 /* Return non-zero if the Nagle test allows this packet to be
@@ -1443,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 
-	return (skb &&
+	return skb &&
 		tcp_snd_test(sk, skb, tcp_current_mss(sk),
 			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH)));
+			      tp->nonagle : TCP_NAGLE_PUSH));
 }
 
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 20151d6a6241..a534dda5456e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk)
  */
 static inline u32 westwood_do_filter(u32 a, u32 b)
 {
-	return (((7 * a) + b) >> 3);
+	return ((7 * a) + b) >> 3;
 }
 
 static void westwood_filter(struct westwood *w, u32 delta)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5bc893e28008..89aa54394a08 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
 /* Check if a route is valid prefix route */
 static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
 {
-	return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0);
+	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
 }
 
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f0e774cea386..921dcf6c271a 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -513,10 +513,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 static inline int ip6addrlbl_msgsize(void)
 {
-	return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
+	return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
 		+ nla_total_size(16)	/* IFAL_ADDRESS */
-		+ nla_total_size(4)	/* IFAL_LABEL */
-	);
+		+ nla_total_size(4);	/* IFAL_LABEL */
 }
 
 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 56b9bf2516f4..60220985bb80 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -467,7 +467,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 		sin->sin6_scope_id = sk->sk_bound_dev_if;
 	*uaddr_len = sizeof(*sin);
-	return(0);
+	return 0;
 }
 
 EXPORT_SYMBOL(inet6_getname);
@@ -488,7 +488,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCADDRT:
 	case SIOCDELRT:
 
-		return(ipv6_route_ioctl(net, cmd, (void __user *)arg));
+		return ipv6_route_ioctl(net, cmd, (void __user *)arg);
 
 	case SIOCSIFADDR:
 		return addrconf_add_ifaddr(net, (void __user *) arg);
@@ -502,7 +502,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return sk->sk_prot->ioctl(sk, cmd, arg);
 	}
 	/*NOTREACHED*/
-	return(0);
+	return 0;
 }
 
 EXPORT_SYMBOL(inet6_ioctl);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e1caa5d526c2..14ed0a955b56 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr)
 	/*
 	 * find out if nexthdr is an extension header or a protocol
 	 */
-	return ( (nexthdr == NEXTHDR_HOP)	||
+	return   (nexthdr == NEXTHDR_HOP)	||
 		 (nexthdr == NEXTHDR_ROUTING)	||
 		 (nexthdr == NEXTHDR_FRAGMENT)	||
 		 (nexthdr == NEXTHDR_AUTH)	||
 		 (nexthdr == NEXTHDR_NONE)	||
-		 (nexthdr == NEXTHDR_DEST) );
+		 (nexthdr == NEXTHDR_DEST);
 }
 
 /*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1838927a2243..efbbbce68f9e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -870,8 +870,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
 			       struct in6_addr *fl_addr,
 			       struct in6_addr *addr_cache)
 {
-	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
-		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
+	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
 }
 
 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 69a0051cea67..b3dd844cd34f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -228,12 +228,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 	do {
 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
 	} while(cur < end && cur->nd_opt_type != type);
-	return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
+	return cur <= end && cur->nd_opt_type == type ? cur : NULL;
 }
 
 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
 {
-	return (opt->nd_opt_type == ND_OPT_RDNSS);
+	return opt->nd_opt_type == ND_OPT_RDNSS;
 }
 
 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -244,7 +244,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
 	do {
 		cur = ((void *)cur) + (cur->nd_opt_len << 3);
 	} while(cur < end && !ndisc_is_useropt(cur));
-	return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
+	return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
 }
 
 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
@@ -319,7 +319,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
 	int prepad = ndisc_addr_option_pad(dev->type);
 	if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
 		return NULL;
-	return (lladdr + prepad);
+	return lladdr + prepad;
 }
 
 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c24..6b331e9b5706 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
 int
 ip6t_ext_hdr(u8 nexthdr)
 {
-	return ( (nexthdr == IPPROTO_HOPOPTS)   ||
-		 (nexthdr == IPPROTO_ROUTING)   ||
-		 (nexthdr == IPPROTO_FRAGMENT)  ||
-		 (nexthdr == IPPROTO_ESP)       ||
-		 (nexthdr == IPPROTO_AH)        ||
-		 (nexthdr == IPPROTO_NONE)      ||
-		 (nexthdr == IPPROTO_DSTOPTS) );
+	return  (nexthdr == IPPROTO_HOPOPTS)   ||
+		(nexthdr == IPPROTO_ROUTING)   ||
+		(nexthdr == IPPROTO_FRAGMENT)  ||
+		(nexthdr == IPPROTO_ESP)       ||
+		(nexthdr == IPPROTO_AH)        ||
+		(nexthdr == IPPROTO_NONE)      ||
+		(nexthdr == IPPROTO_DSTOPTS);
 }
 
 /* Returns whether matches rule or not. */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e677937a07fc..45e6efb7f171 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 			return -EINVAL;
 
 		if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
-			return(-EAFNOSUPPORT);
+			return -EAFNOSUPPORT;
 
 		/* port is the proto value [0..255] carried in nexthdr */
 		proto = ntohs(sin6->sin6_port);
@@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (!proto)
 			proto = inet->inet_num;
 		else if (proto != inet->inet_num)
-			return(-EINVAL);
+			return -EINVAL;
 
 		if (proto > 255)
-			return(-EINVAL);
+			return -EINVAL;
 
 		daddr = &sin6->sin6_addr;
 		if (np->sndflow) {
@@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 			/* You may get strange result with a positive odd offset;
 			   RFC2292bis agrees with me. */
 			if (val > 0 && (val&1))
-				return(-EINVAL);
+				return -EINVAL;
 			if (val < 0) {
 				rp->checksum = 0;
 			} else {
@@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		default:
-			return(-ENOPROTOOPT);
+			return -ENOPROTOOPT;
 	}
 }
 
@@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk)
 	default:
 		break;
 	}
-	return(0);
+	return 0;
 }
 
 struct proto rawv6_prot = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d126365ac046..25b0beda4331 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -217,14 +217,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 {
-	return (rt->rt6i_flags & RTF_EXPIRES &&
-		time_after(jiffies, rt->rt6i_expires));
+	return (rt->rt6i_flags & RTF_EXPIRES) &&
+		time_after(jiffies, rt->rt6i_expires);
 }
 
 static inline int rt6_need_strict(struct in6_addr *daddr)
 {
-	return (ipv6_addr_type(daddr) &
-		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
+	return ipv6_addr_type(daddr) &
+		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
 /*
@@ -440,7 +440,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 		  __func__, match);
 
 	net = dev_net(rt0->rt6i_dev);
-	return (match ? match : net->ipv6.ip6_null_entry);
+	return match ? match : net->ipv6.ip6_null_entry;
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
@@ -859,7 +859,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 
 	dst_release(*dstp);
 	*dstp = new;
-	return (new ? 0 : -ENOMEM);
+	return new ? 0 : -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 
@@ -1070,7 +1070,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
 out:
 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
-	return (atomic_read(&ops->entries) > rt_max_size);
+	return atomic_read(&ops->entries) > rt_max_size;
 }
 
 /* Clean host part of a prefix. Not necessary in radix tree,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..8d93f6d81979 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		return -EINVAL;
 
 	if (usin->sin6_family != AF_INET6)
-		return(-EAFNOSUPPORT);
+		return -EAFNOSUPPORT;
 
 	memset(&fl, 0, sizeof(fl));
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6baeabbbca82..39676eac3a37 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
 
 	xfrm6_policy_afinfo.garbage_collect(net);
-	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
+	return atomic_read(&ops->entries) > ops->gc_thresh * 2;
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fd55b5135de5..bf3635129b17 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
 		/* Requested object/attribute doesn't exist */
 		if((self->errno == IAS_CLASS_UNKNOWN) ||
 		   (self->errno == IAS_ATTRIB_UNKNOWN))
-			return (-EADDRNOTAVAIL);
+			return -EADDRNOTAVAIL;
 		else
-			return (-EHOSTUNREACH);
+			return -EHOSTUNREACH;
 	}
 
 	/* Get the remote TSAP selector */
@@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 					   __func__, name);
 				self->daddr = DEV_ADDR_ANY;
 				kfree(discoveries);
-				return(-ENOTUNIQ);
+				return -ENOTUNIQ;
 			}
 			/* First time we found that one, save it ! */
 			daddr = self->daddr;
@@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 			IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__);
 			self->daddr = DEV_ADDR_ANY;
 			kfree(discoveries);
-			return(-EHOSTUNREACH);
+			return -EHOSTUNREACH;
 			break;
 		}
 	}
@@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 		IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
 			   __func__, name);
 		self->daddr = DEV_ADDR_ANY;
-		return(-EADDRNOTAVAIL);
+		return -EADDRNOTAVAIL;
 	}
 
 	/* Revert back to discovered device & service */
@@ -2465,9 +2465,9 @@ bed:
 			/* Requested object/attribute doesn't exist */
 			if((self->errno == IAS_CLASS_UNKNOWN) ||
 			   (self->errno == IAS_ATTRIB_UNKNOWN))
-				return (-EADDRNOTAVAIL);
+				return -EADDRNOTAVAIL;
 			else
-				return (-EHOSTUNREACH);
+				return -EHOSTUNREACH;
 		}
 
 		/* Translate from internal to user structure */
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index c1c8ae939126..36c3f037f172 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
 
 	/* Get the actual number of device in the buffer and return */
 	*pn = i;
-	return(buffer);
+	return buffer;
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index faa82ca2dfdc..a39cca8331df 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 		}
 
 #ifdef SERIAL_DO_RESTART
-		return ((self->flags & ASYNC_HUP_NOTIFY) ?
-			-EAGAIN : -ERESTARTSYS);
+		return (self->flags & ASYNC_HUP_NOTIFY) ?
+			-EAGAIN : -ERESTARTSYS;
 #else
 		return -EAGAIN;
 #endif
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 0e7d8bde145d..6115a44c0a24 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
 	}
 
 	/* Return current cached discovery log */
-	return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE));
+	return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
 }
 EXPORT_SYMBOL(irlmp_get_discoveries);
 
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 3750884094da..062e63b1c5c4 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
 	    (self->cache.slsap_sel == slsap_sel) &&
 	    (self->cache.dlsap_sel == dlsap_sel))
 	{
-		return (self->cache.lsap);
+		return self->cache.lsap;
 	}
 #endif
 
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index e98e40d76f4f..7f17a8020e8a 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket *	self,
   DEXIT(IRDA_SR_TRACE, "\n");
 
   /* Return the TSAP */
-  return(dtsap_sel);
+  return dtsap_sel;
 }
 
 /*------------------------------------------------------------------*/
@@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket *	self)
     {
       clear_bit(0, &self->ttp_connect);
       DERROR(IRDA_SR_ERROR, "connect aborted!\n");
-      return(err);
+      return err;
     }
 
   /* Connect to remote device */
@@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket *	self)
     {
       clear_bit(0, &self->ttp_connect);
       DERROR(IRDA_SR_ERROR, "connect aborted!\n");
-      return(err);
+      return err;
     }
 
   /* The above call is non-blocking.
@@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket *	self)
    * See you there ;-) */
 
   DEXIT(IRDA_SR_TRACE, "\n");
-  return(err);
+  return err;
 }
 
 /*------------------------------------------------------------------*/
@@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket *	self)
       /* The above request is non-blocking.
        * After a while, IrDA will call us back in irnet_discovervalue_confirm()
        * We will then call irnet_ias_to_tsap() and come back here again... */
-      return(0);
+      return 0;
     }
   else
-    return(1);
+    return 1;
 }
 
 /*------------------------------------------------------------------*/
@@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket *	self)
   /* Follow me in irnet_discovervalue_confirm() */
 
   DEXIT(IRDA_SR_TRACE, "\n");
-  return(0);
+  return 0;
 }
 
 /*------------------------------------------------------------------*/
@@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket *	self)
   /* No luck ! */
   DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
   kfree(discoveries);
-  return(-EADDRNOTAVAIL);
+  return -EADDRNOTAVAIL;
 }
 
 
@@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket *	self)
   INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
 
   DEXIT(IRDA_SOCK_TRACE, "\n");
-  return(0);
+  return 0;
 }
 
 /*------------------------------------------------------------------*/
@@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket *	self)
    * We will finish the connection procedure in irnet_connect_tsap().
    */
   DEXIT(IRDA_SOCK_TRACE, "\n");
-  return(0);
+  return 0;
 }
 
 /*------------------------------------------------------------------*/
@@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket *	self)
   /* No luck ! */
   DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
   kfree(discoveries);
-  return(-EADDRNOTAVAIL);
+  return -EADDRNOTAVAIL;
 }
 
 /*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index dfe7b38dd4af..69f1fa64994e 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket *	ap,
     }
 
   /* Success : we have parsed all commands successfully */
-  return(count);
+  return count;
 }
 
 #ifdef INITIAL_DISCOVERY
@@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket *	ap,
 	}
 
       DEXIT(CTRL_TRACE, "\n");
-      return(strlen(event));
+      return strlen(event);
     }
 #endif /* INITIAL_DISCOVERY */
 
@@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket *	ap,
     }
 
   DEXIT(CTRL_TRACE, "\n");
-  return(strlen(event));
+  return strlen(event);
 }
 
 /*------------------------------------------------------------------*/
@@ -623,7 +623,7 @@ dev_irnet_poll(struct file *	file,
     mask |= irnet_ctrl_poll(ap, file, wait);
 
   DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
-  return(mask);
+  return mask;
 }
 
 /*------------------------------------------------------------------*/
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 43040e97c474..d87c22df6f1e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto)
 
 static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
 {
-	return (proto == IPSEC_PROTO_ANY ? 0 : proto);
+	return proto == IPSEC_PROTO_ANY ? 0 : proto;
 }
 
 static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
 {
-	return (proto ? proto : IPSEC_PROTO_ANY);
+	return proto ? proto : IPSEC_PROTO_ANY;
 }
 
 static inline int pfkey_sockaddr_len(sa_family_t family)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 4f772de2f213..b0cc385bf989 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -207,7 +207,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
 
 	fc = hdr->frame_control;
 
-	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc));
+	return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
 }
 
 static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 3713d7ecab96..1bca6d49ec96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled;
 static unsigned long rfkill_ratelimit(const unsigned long last)
 {
 	const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
-	return (time_after(jiffies, last + delay)) ? 0 : delay;
+	return time_after(jiffies, last + delay) ? 0 : delay;
 }
 
 static void rfkill_schedule_ratelimited(void)
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index a750a28e0221..fa5f5641a2c2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 	if (ax25s)
 		ax25_cb_put(ax25s);
 
-	return (neigh->ax25 != NULL);
+	return neigh->ax25 != NULL;
 }
 
 /*
@@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh)
 	if (ax25s)
 		ax25_cb_put(ax25s);
 
-	return (neigh->ax25 != NULL);
+	return neigh->ax25 != NULL;
 }
 
 /*
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f774e657641a..1ef29c74d85e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -799,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
 static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
 {
 	/* PF_INET only supports AF_INET addresses. */
-	return (AF_INET == family);
+	return AF_INET == family;
 }
 
 /* Address matching with wildcards allowed. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6a691d84aef4..535659fdbaa1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3884,7 +3884,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	}
 
 out:
-	return (retval);
+	return retval;
 }
 
 
@@ -3940,7 +3940,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
 	}
 
 out:
-	return (retval);
+	return retval;
 }
 
 /* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
@@ -5594,7 +5594,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
 	/* Note: sk->sk_num gets filled in if ephemeral port request. */
 	ret = sctp_get_port_local(sk, &addr);
 
-	return (ret ? 1 : 0);
+	return ret ? 1 : 0;
 }
 
 /*
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dcfc66bab2bb..597c493392ad 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1049,7 +1049,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 out:
 	if (acred->machine_cred != gss_cred->gc_machine_cred)
 		return 0;
-	return (rc->cr_uid == acred->uid);
+	return rc->cr_uid == acred->uid;
 }
 
 /*
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e99456..c586e92bcf76 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
 der_length_size( int length)
 {
 	if (length < (1<<7))
-		return(1);
+		return 1;
 	else if (length < (1<<8))
-		return(2);
+		return 2;
 #if (SIZEOF_INT == 2)
 	else
-		return(3);
+		return 3;
 #else
 	else if (length < (1<<16))
-		return(3);
+		return 3;
 	else if (length < (1<<24))
-		return(4);
+		return 4;
 	else
-		return(5);
+		return 5;
 #endif
 }
 
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
 	int ret;
 
 	if (*bufsize < 1)
-		return(-1);
+		return -1;
 	sf = *(*buf)++;
 	(*bufsize)--;
 	if (sf & 0x80) {
 		if ((sf &= 0x7f) > ((*bufsize)-1))
-			return(-1);
+			return -1;
 		if (sf > SIZEOF_INT)
-			return (-1);
+			return -1;
 		ret = 0;
 		for (; sf; sf--) {
 			ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
 		ret = sf;
 	}
 
-	return(ret);
+	return ret;
 }
 
 /* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
 {
 	/* set body_size to sequence contents size */
 	body_size += 2 + (int) mech->len;         /* NEED overflow check */
-	return(1 + der_length_size(body_size) + body_size);
+	return 1 + der_length_size(body_size) + body_size;
 }
 
 EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
 	int ret = 0;
 
 	if ((toksize-=1) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 	if (*buf++ != 0x60)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 
 	if ((seqsize = der_read_length(&buf, &toksize)) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 
 	if (seqsize != toksize)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 
 	if ((toksize-=1) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 	if (*buf++ != 0x06)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 
 	if ((toksize-=1) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 	toid.len = *buf++;
 
 	if ((toksize-=toid.len) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 	toid.data = buf;
 	buf+=toid.len;
 
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
       to return G_BAD_TOK_HEADER if the token header is in fact bad */
 
 	if ((toksize-=2) < 0)
-		return(G_BAD_TOK_HEADER);
+		return G_BAD_TOK_HEADER;
 
 	if (ret)
-		return(ret);
+		return ret;
 
 	if (!ret) {
 		*buf_in = buf;
 		*body_size = toksize;
 	}
 
-	return(ret);
+	return ret;
 }
 
 EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba382..62ac90c62cb1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 	*seqnum = ((plain[0]) |
 		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
 
-	return (0);
+	return 0;
 }
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc78..8b4061049d76 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx	**context_handle)
 			*context_handle);
 
 	if (!*context_handle)
-		return(GSS_S_NO_CONTEXT);
+		return GSS_S_NO_CONTEXT;
 	if ((*context_handle)->internal_ctx_id)
 		(*context_handle)->mech_type->gm_ops
 			->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a5..aa5dbda6608c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
 	spin_lock_bh(&queue->lock);
 	res = queue->qlen;
 	spin_unlock_bh(&queue->lock);
-	return (res == 0);
+	return res == 0;
 }
 EXPORT_SYMBOL_GPL(rpc_queue_empty);
 
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index c048543ffbeb..2ddc351b3be9 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -89,7 +89,7 @@ int tipc_addr_domain_valid(u32 addr)
 
 int tipc_addr_node_valid(u32 addr)
 {
-	return (tipc_addr_domain_valid(addr) && tipc_node(addr));
+	return tipc_addr_domain_valid(addr) && tipc_node(addr);
 }
 
 int tipc_in_scope(u32 domain, u32 addr)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index b11248c2d788..ecfaac10d0b4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -184,7 +184,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr)
 
 static int bclink_ack_allowed(u32 n)
 {
-	return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag);
+	return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag;
 }
 
 
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 52ae17b2583e..9c10c6b7c12b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -63,7 +63,7 @@ static int media_name_valid(const char *name)
 	len = strlen(name);
 	if ((len + 1) > TIPC_MAX_MEDIA_NAME)
 		return 0;
-	return (strspn(name, tipc_alphabet) == len);
+	return strspn(name, tipc_alphabet) == len;
 }
 
 /**
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1885a7edb0c8..6569d45bfb9a 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -134,7 +134,7 @@ void tipc_printbuf_reset(struct print_buf *pb)
 
 int tipc_printbuf_empty(struct print_buf *pb)
 {
-	return (!pb->buf || (pb->crs == pb->buf));
+	return !pb->buf || (pb->crs == pb->buf);
 }
 
 /**
@@ -169,7 +169,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
 			tipc_printf(pb, err);
 		}
 	}
-	return (pb->crs - pb->buf + 1);
+	return pb->crs - pb->buf + 1;
 }
 
 /**
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a6a3102bb4d6..b8cf1e9d0b86 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,13 +239,13 @@ int tipc_link_is_up(struct link *l_ptr)
 {
 	if (!l_ptr)
 		return 0;
-	return (link_working_working(l_ptr) || link_working_unknown(l_ptr));
+	return link_working_working(l_ptr) || link_working_unknown(l_ptr);
 }
 
 int tipc_link_is_active(struct link *l_ptr)
 {
-	return ((l_ptr->owner->active_links[0] == l_ptr) ||
-		(l_ptr->owner->active_links[1] == l_ptr));
+	return	(l_ptr->owner->active_links[0] == l_ptr) ||
+		(l_ptr->owner->active_links[1] == l_ptr);
 }
 
 /**
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 2e5385c47d30..26151d30589d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -279,12 +279,12 @@ static inline int between(u32 lower, u32 upper, u32 n)
 
 static inline int less_eq(u32 left, u32 right)
 {
-	return (mod(right - left) < 32768u);
+	return mod(right - left) < 32768u;
 }
 
 static inline int less(u32 left, u32 right)
 {
-	return (less_eq(left, right) && (mod(right) != mod(left)));
+	return less_eq(left, right) && (mod(right) != mod(left));
 }
 
 static inline u32 lesser(u32 left, u32 right)
@@ -299,32 +299,32 @@ static inline u32 lesser(u32 left, u32 right)
 
 static inline int link_working_working(struct link *l_ptr)
 {
-	return (l_ptr->state == WORKING_WORKING);
+	return l_ptr->state == WORKING_WORKING;
 }
 
 static inline int link_working_unknown(struct link *l_ptr)
 {
-	return (l_ptr->state == WORKING_UNKNOWN);
+	return l_ptr->state == WORKING_UNKNOWN;
 }
 
 static inline int link_reset_unknown(struct link *l_ptr)
 {
-	return (l_ptr->state == RESET_UNKNOWN);
+	return l_ptr->state == RESET_UNKNOWN;
 }
 
 static inline int link_reset_reset(struct link *l_ptr)
 {
-	return (l_ptr->state == RESET_RESET);
+	return l_ptr->state == RESET_RESET;
 }
 
 static inline int link_blocked(struct link *l_ptr)
 {
-	return (l_ptr->exp_msg_count || l_ptr->blocked);
+	return l_ptr->exp_msg_count || l_ptr->blocked;
 }
 
 static inline int link_congested(struct link *l_ptr)
 {
-	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
+	return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
 }
 
 #endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 995d2da35b01..031aad18efce 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m)
 
 static inline u32 msg_isdata(struct tipc_msg *m)
 {
-	return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE);
+	return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE;
 }
 
 static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
 
 static inline int msg_is_dest(struct tipc_msg *m, u32 d)
 {
-	return(msg_short(m) || (msg_destnode(m) == d));
+	return msg_short(m) || (msg_destnode(m) == d);
 }
 
 static inline u32 msg_routed(struct tipc_msg *m)
@@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
 
 static inline u32 msg_max_pkt(struct tipc_msg *m)
 {
-	return (msg_bits(m, 9, 16, 0xffff) * 4);
+	return msg_bits(m, 9, 16, 0xffff) * 4;
 }
 
 static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c13c2c7c4b57..9ca4b0689237 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
 
 static int hash(int x)
 {
-	return(x & (tipc_nametbl_size - 1));
+	return x & (tipc_nametbl_size - 1);
 }
 
 /**
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b702c7bf580f..7c49cd056df7 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -242,17 +242,17 @@ int tipc_node_has_active_links(struct tipc_node *n_ptr)
 
 int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
 {
-	return (n_ptr->working_links > 1);
+	return n_ptr->working_links > 1;
 }
 
 static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
 {
-	return (n_ptr && (n_ptr->last_router >= 0));
+	return n_ptr && (n_ptr->last_router >= 0);
 }
 
 int tipc_node_is_up(struct tipc_node *n_ptr)
 {
-	return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr));
+	return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
 }
 
 struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8d1652aab298..e74bd9563739 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -157,7 +157,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr)
 
 static inline int tipc_port_congested(struct port *p_ptr)
 {
-	return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2));
+	return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
 }
 
 /**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f7ac94de24fe..33217fc3d697 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1195,7 +1195,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
 	if (msg_connected(msg))
 		threshold *= 4;
 
-	return (queue_size >= threshold);
+	return queue_size >= threshold;
 }
 
 /**
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e2..1a5b9a6bd128 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -604,6 +604,6 @@ int tipc_ispublished(struct tipc_name const *name)
 {
 	u32 domain = 0;
 
-	return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0);
+	return tipc_nametbl_translate(name->type, name->instance, &domain) != 0;
 }
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 37580e090a3d..5d89310b3587 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
 static inline
 bool wiphy_idx_valid(int wiphy_idx)
 {
-	return (wiphy_idx >= 0);
+	return wiphy_idx >= 0;
 }
 
 
-- 
cgit v1.2.3


From 50bb6d8492ff0c3f204b263aff90d4a7ebf4dd90 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 23 Sep 2010 10:40:21 -0400
Subject: HID: usbhid: remove unused hiddev_driver

Now that hiddev_driver isn't being used for anything, there's no
reason to keep it around.  This patch (as1419) gets rid of it
entirely.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c |  6 ------
 drivers/hid/usbhid/hiddev.c   | 40 ----------------------------------------
 include/linux/hiddev.h        |  4 ----
 3 files changed, 50 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index c90fbbdbffa2..7a778ac4c5cb 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1470,9 +1470,6 @@ static int __init hid_init(void)
 	retval = usbhid_quirks_init(quirks_param);
 	if (retval)
 		goto usbhid_quirks_init_fail;
-	retval = hiddev_init();
-	if (retval)
-		goto hiddev_init_fail;
 	retval = usb_register(&hid_driver);
 	if (retval)
 		goto usb_register_fail;
@@ -1480,8 +1477,6 @@ static int __init hid_init(void)
 
 	return 0;
 usb_register_fail:
-	hiddev_exit();
-hiddev_init_fail:
 	usbhid_quirks_exit();
 usbhid_quirks_init_fail:
 	hid_unregister_driver(&hid_usb_driver);
@@ -1494,7 +1489,6 @@ no_queue:
 static void __exit hid_exit(void)
 {
 	usb_deregister(&hid_driver);
-	hiddev_exit();
 	usbhid_quirks_exit();
 	hid_unregister_driver(&hid_usb_driver);
 	destroy_workqueue(resumption_waker);
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 681e620eb95b..19ed90c8f503 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -67,8 +67,6 @@ struct hiddev_list {
 	struct mutex thread_lock;
 };
 
-static struct usb_driver hiddev_driver;
-
 /*
  * Find a report, given the report's type and ID.  The ID can be specified
  * indirectly by REPORT_ID_FIRST (which returns the first report of the given
@@ -925,41 +923,3 @@ void hiddev_disconnect(struct hid_device *hid)
 		kfree(hiddev);
 	}
 }
-
-/* Currently this driver is a USB driver.  It's not a conventional one in
- * the sense that it doesn't probe at the USB level.  Instead it waits to
- * be connected by HID through the hiddev_connect / hiddev_disconnect
- * routines.  The reason to register as a USB device is to gain part of the
- * minor number space from the USB major.
- *
- * In theory, should the HID code be generalized to more than one physical
- * medium (say, IEEE 1384), this driver will probably need to register its
- * own major number, and in doing so, no longer need to register with USB.
- * At that point the probe routine and hiddev_driver struct below will no
- * longer be useful.
- */
-
-
-/* We never attach in this manner, and rely on HID to connect us.  This
- * is why there is no disconnect routine defined in the usb_driver either.
- */
-static int hiddev_usbd_probe(struct usb_interface *intf,
-			     const struct usb_device_id *hiddev_info)
-{
-	return -ENODEV;
-}
-
-static /* const */ struct usb_driver hiddev_driver = {
-	.name =		"hiddev",
-	.probe =	hiddev_usbd_probe,
-};
-
-int __init hiddev_init(void)
-{
-	return usb_register(&hiddev_driver);
-}
-
-void hiddev_exit(void)
-{
-	usb_deregister(&hiddev_driver);
-}
diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h
index bb6f58baf319..a3f481a3063b 100644
--- a/include/linux/hiddev.h
+++ b/include/linux/hiddev.h
@@ -226,8 +226,6 @@ void hiddev_disconnect(struct hid_device *);
 void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
 		      struct hid_usage *usage, __s32 value);
 void hiddev_report_event(struct hid_device *hid, struct hid_report *report);
-int __init hiddev_init(void);
-void hiddev_exit(void);
 #else
 static inline int hiddev_connect(struct hid_device *hid,
 		unsigned int force)
@@ -236,8 +234,6 @@ static inline void hiddev_disconnect(struct hid_device *hid) { }
 static inline void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
 		      struct hid_usage *usage, __s32 value) { }
 static inline void hiddev_report_event(struct hid_device *hid, struct hid_report *report) { }
-static inline int hiddev_init(void) { return 0; }
-static inline void hiddev_exit(void) { }
 #endif
 
 #endif
-- 
cgit v1.2.3


From dfb4f309830359352539919f23accc59a20a3758 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 24 Sep 2010 09:17:01 -0400
Subject: NFSv4.1: keep seq_res.sr_slot as pointer rather than an index

Having to explicitly initialize sr_slotid to NFS4_MAX_SLOT_TABLE
resulted in numerous bugs.  Keeping the current slot as a pointer
to the slot table is more straight forward and robust as it's
implicitly set up to NULL wherever the seq_res member is initialized
to zeroes.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 52 ++++++++++++++++++++-----------------------------
 fs/nfs/nfs4xdr.c        |  6 ++----
 fs/nfs/read.c           |  1 -
 fs/nfs/unlink.c         |  3 +--
 fs/nfs/write.c          |  2 --
 include/linux/nfs_xdr.h |  2 +-
 6 files changed, 25 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 72aa7067b85d..aee1bcc1fcc9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -334,10 +334,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
  * Must be called while holding tbl->slot_tbl_lock
  */
 static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
+nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
 {
+	int free_slotid = free_slot - tbl->slots;
 	int slotid = free_slotid;
 
+	BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
 	/* clear used bit in bitmap */
 	__clear_bit(slotid, tbl->used_slots);
 
@@ -379,7 +381,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 	struct nfs4_slot_table *tbl;
 
 	tbl = &res->sr_session->fc_slot_table;
-	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+	if (!res->sr_slot) {
 		/* just wake up the next guy waiting since
 		 * we may have not consumed a slot after all */
 		dprintk("%s: No slot\n", __func__);
@@ -387,17 +389,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 	}
 
 	spin_lock(&tbl->slot_tbl_lock);
-	nfs4_free_slot(tbl, res->sr_slotid);
+	nfs4_free_slot(tbl, res->sr_slot);
 	nfs41_check_drain_session_complete(res->sr_session);
 	spin_unlock(&tbl->slot_tbl_lock);
-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	res->sr_slot = NULL;
 }
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
 	unsigned long timestamp;
-	struct nfs4_slot_table *tbl;
-	struct nfs4_slot *slot;
 	struct nfs_client *clp;
 
 	/*
@@ -410,17 +410,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		res->sr_status = NFS_OK;
 
 	/* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
-	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+	if (!res->sr_slot)
 		goto out;
 
-	tbl = &res->sr_session->fc_slot_table;
-	slot = tbl->slots + res->sr_slotid;
-
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
-		++slot->seq_nr;
+		++res->sr_slot->seq_nr;
 		timestamp = res->sr_renewal_time;
 		clp = res->sr_session->clp;
 		do_renew_lease(clp, timestamp);
@@ -433,12 +430,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
 		 * of RFC5661.
 		 */
-		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
-				__func__, res->sr_slotid, slot->seq_nr);
+		dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+			__func__,
+			res->sr_slot - res->sr_session->fc_slot_table.slots,
+			res->sr_slot->seq_nr);
 		goto out_retry;
 	default:
 		/* Just update the slot sequence no. */
-		++slot->seq_nr;
+		++res->sr_slot->seq_nr;
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
@@ -505,10 +504,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 
 	dprintk("--> %s\n", __func__);
 	/* slot already allocated? */
-	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+	if (res->sr_slot != NULL)
 		return 0;
 
-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
 	tbl = &session->fc_slot_table;
 
 	spin_lock(&tbl->slot_tbl_lock);
@@ -550,7 +548,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
 	res->sr_session = session;
-	res->sr_slotid = slotid;
+	res->sr_slot = slot;
 	res->sr_renewal_time = jiffies;
 	res->sr_status_flags = 0;
 	/*
@@ -576,8 +574,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
 		goto out;
 	}
 
-	dprintk("--> %s clp %p session %p sr_slotid %d\n",
-		__func__, session->clp, session, res->sr_slotid);
+	dprintk("--> %s clp %p session %p sr_slot %ld\n",
+		__func__, session->clp, session, res->sr_slot ?
+			res->sr_slot - session->fc_slot_table.slots : -1);
 
 	ret = nfs41_setup_sequence(session, args, res, cache_reply,
 				   task);
@@ -650,7 +649,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
 		.callback_data = &data
 	};
 
-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	res->sr_slot = NULL;
 	if (privileged)
 		task_setup.callback_ops = &nfs41_call_priv_sync_ops;
 	task = rpc_run_task(&task_setup);
@@ -735,7 +734,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
 	nfs_fattr_init(&p->dir_attr);
-	p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -1975,7 +1973,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
-	calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	path_get(path);
 	calldata->path = *path;
 
@@ -2550,7 +2547,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
 	args->bitmask = server->cache_consistency_bitmask;
 	res->server = server;
-	res->seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
+	res->seq_res.sr_slot = NULL;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 }
 
@@ -2576,7 +2573,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
 	arg->bitmask = server->attr_bitmask;
 	res->server = server;
-	res->seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 }
 
 static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -3646,7 +3642,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	memcpy(&data->stateid, stateid, sizeof(data->stateid));
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
-	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
@@ -3799,7 +3794,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	p->arg.fl = &p->fl;
 	p->arg.seqid = seqid;
 	p->res.seqid = seqid;
-	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->arg.stateid = &lsp->ls_stateid;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3979,7 +3973,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->res.lock_seqid = p->arg.lock_seqid;
-	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->lsp = lsp;
 	p->server = server;
 	atomic_inc(&lsp->ls_count);
@@ -4612,7 +4605,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	};
 	int status;
 
-	res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
@@ -5105,12 +5097,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 
 	if (!atomic_inc_not_zero(&clp->cl_count))
 		return ERR_PTR(-EIO);
-	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
+	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
 	if (calldata == NULL) {
 		nfs_put_client(clp);
 		return ERR_PTR(-ENOMEM);
 	}
-	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	msg.rpc_argp = &calldata->args;
 	msg.rpc_resp = &calldata->res;
 	calldata->clp = clp;
@@ -5242,7 +5233,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
 		goto out;
 	calldata->clp = clp;
 	calldata->arg.one_fs = 0;
-	calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 
 	msg.rpc_argp = &calldata->arg;
 	msg.rpc_resp = &calldata->res;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 86ab69eb149c..3feace66b981 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4665,7 +4665,6 @@ static int decode_sequence(struct xdr_stream *xdr,
 			   struct rpc_rqst *rqstp)
 {
 #if defined(CONFIG_NFS_V4_1)
-	struct nfs4_slot *slot;
 	struct nfs4_sessionid id;
 	u32 dummy;
 	int status;
@@ -4697,15 +4696,14 @@ static int decode_sequence(struct xdr_stream *xdr,
 		goto out_overflow;
 
 	/* seqid */
-	slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
 	dummy = be32_to_cpup(p++);
-	if (dummy != slot->seq_nr) {
+	if (dummy != res->sr_slot->seq_nr) {
 		dprintk("%s Invalid sequence number\n", __func__);
 		goto out_err;
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slotid) {
+	if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 87adc2744246..79859c81a943 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,7 +46,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
-		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 47530aacebfd..9a16bad5d2ea 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -262,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 		status = PTR_ERR(data->cred);
 		goto out_free;
 	}
-	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	data->res.dir_attr = &data->dir_attr;
 
 	status = -EBUSY;
@@ -427,7 +426,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 		.flags = RPC_TASK_ASYNC,
 	};
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
 		return ERR_PTR(-ENOMEM);
 	task_setup_data.callback_data = data,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 874972d9427c..4d6d35dd2b4b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
-		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	}
 	return p;
 }
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
-		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 172df83ac54b..5772b2c2f063 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -170,7 +170,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
-	u8			sr_slotid;	/* slot used to send request */
+	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	unsigned long		sr_renewal_time;
 	u32			sr_status_flags;
-- 
cgit v1.2.3


From f20136eb03a1dbdfb04f3c62fd11c0d02d02b726 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Wed, 15 Sep 2010 10:11:21 -0400
Subject: net: davinci_emac: separate out davinci mdio

Davinci's MDIO controller is present on other TI devices, without an
accompanying EMAC.  For example, on tnetv107x, the same MDIO module is used in
conjunction with a 3-port switch hardware.

By separating the MDIO controller code into its own platform driver, this
patch allows common logic to be reused on such platforms.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Tested-by: Michael Williamson <michael.williamson@criticallink.com>
Tested-by: Caglar Akyuz <caglarakyuz@gmail.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 drivers/net/Kconfig          |  10 +
 drivers/net/Makefile         |   1 +
 drivers/net/davinci_mdio.c   | 475 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/davinci_emac.h |   4 +
 4 files changed, 490 insertions(+)
 create mode 100644 drivers/net/davinci_mdio.c

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 2cc81a54cbf3..c5c86e0e14a1 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -958,6 +958,16 @@ config TI_DAVINCI_EMAC
 	  To compile this driver as a module, choose M here: the module
 	  will be called davinci_emac_driver.  This is recommended.
 
+config TI_DAVINCI_MDIO
+	tristate "TI DaVinci MDIO Support"
+	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
+	select PHYLIB
+	help
+	  This driver supports TI's DaVinci MDIO module.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called davinci_mdio.  This is recommended.
+
 config DM9000
 	tristate "DM9000 support"
 	depends on ARM || BLACKFIN || MIPS
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3e8f150c4b14..d38a7aba179c 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MDIO) += mdio.o
 obj-$(CONFIG_PHYLIB) += phy/
 
 obj-$(CONFIG_TI_DAVINCI_EMAC) += davinci_emac.o
+obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 
 obj-$(CONFIG_E1000) += e1000/
 obj-$(CONFIG_E1000E) += e1000e/
diff --git a/drivers/net/davinci_mdio.c b/drivers/net/davinci_mdio.c
new file mode 100644
index 000000000000..7615040df756
--- /dev/null
+++ b/drivers/net/davinci_mdio.c
@@ -0,0 +1,475 @@
+/*
+ * DaVinci MDIO Module driver
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Shamelessly ripped out of davinci_emac.c, original copyrights follow:
+ *
+ * Copyright (C) 2009 Texas Instruments.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * ---------------------------------------------------------------------------
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/davinci_emac.h>
+
+/*
+ * This timeout definition is a worst-case ultra defensive measure against
+ * unexpected controller lock ups.  Ideally, we should never ever hit this
+ * scenario in practice.
+ */
+#define MDIO_TIMEOUT		100 /* msecs */
+
+#define PHY_REG_MASK		0x1f
+#define PHY_ID_MASK		0x1f
+
+#define DEF_OUT_FREQ		2200000		/* 2.2 MHz */
+
+struct davinci_mdio_regs {
+	u32	version;
+	u32	control;
+#define CONTROL_IDLE		BIT(31)
+#define CONTROL_ENABLE		BIT(30)
+#define CONTROL_MAX_DIV		(0xff)
+
+	u32	alive;
+	u32	link;
+	u32	linkintraw;
+	u32	linkintmasked;
+	u32	__reserved_0[2];
+	u32	userintraw;
+	u32	userintmasked;
+	u32	userintmaskset;
+	u32	userintmaskclr;
+	u32	__reserved_1[20];
+
+	struct {
+		u32	access;
+#define USERACCESS_GO		BIT(31)
+#define USERACCESS_WRITE	BIT(30)
+#define USERACCESS_ACK		BIT(29)
+#define USERACCESS_READ		(0)
+#define USERACCESS_DATA		(0xffff)
+
+		u32	physel;
+	}	user[0];
+};
+
+struct mdio_platform_data default_pdata = {
+	.bus_freq = DEF_OUT_FREQ,
+};
+
+struct davinci_mdio_data {
+	struct mdio_platform_data pdata;
+	struct davinci_mdio_regs __iomem *regs;
+	spinlock_t	lock;
+	struct clk	*clk;
+	struct device	*dev;
+	struct mii_bus	*bus;
+	bool		suspended;
+	unsigned long	access_time; /* jiffies */
+};
+
+static void __davinci_mdio_reset(struct davinci_mdio_data *data)
+{
+	u32 mdio_in, div, mdio_out_khz, access_time;
+
+	mdio_in = clk_get_rate(data->clk);
+	div = (mdio_in / data->pdata.bus_freq) - 1;
+	if (div > CONTROL_MAX_DIV)
+		div = CONTROL_MAX_DIV;
+
+	/* set enable and clock divider */
+	__raw_writel(div | CONTROL_ENABLE, &data->regs->control);
+
+	/*
+	 * One mdio transaction consists of:
+	 *	32 bits of preamble
+	 *	32 bits of transferred data
+	 *	24 bits of bus yield (not needed unless shared?)
+	 */
+	mdio_out_khz = mdio_in / (1000 * (div + 1));
+	access_time  = (88 * 1000) / mdio_out_khz;
+
+	/*
+	 * In the worst case, we could be kicking off a user-access immediately
+	 * after the mdio bus scan state-machine triggered its own read.  If
+	 * so, our request could get deferred by one access cycle.  We
+	 * defensively allow for 4 access cycles.
+	 */
+	data->access_time = usecs_to_jiffies(access_time * 4);
+	if (!data->access_time)
+		data->access_time = 1;
+}
+
+static int davinci_mdio_reset(struct mii_bus *bus)
+{
+	struct davinci_mdio_data *data = bus->priv;
+	u32 phy_mask, ver;
+
+	__davinci_mdio_reset(data);
+
+	/* wait for scan logic to settle */
+	msleep(PHY_MAX_ADDR * data->access_time);
+
+	/* dump hardware version info */
+	ver = __raw_readl(&data->regs->version);
+	dev_info(data->dev, "davinci mdio revision %d.%d\n",
+		 (ver >> 8) & 0xff, ver & 0xff);
+
+	/* get phy mask from the alive register */
+	phy_mask = __raw_readl(&data->regs->alive);
+	if (phy_mask) {
+		/* restrict mdio bus to live phys only */
+		dev_info(data->dev, "detected phy mask %x\n", ~phy_mask);
+		phy_mask = ~phy_mask;
+	} else {
+		/* desperately scan all phys */
+		dev_warn(data->dev, "no live phy, scanning all\n");
+		phy_mask = 0;
+	}
+	data->bus->phy_mask = phy_mask;
+
+	return 0;
+}
+
+/* wait until hardware is ready for another user access */
+static inline int wait_for_user_access(struct davinci_mdio_data *data)
+{
+	struct davinci_mdio_regs __iomem *regs = data->regs;
+	unsigned long timeout = jiffies + msecs_to_jiffies(MDIO_TIMEOUT);
+	u32 reg;
+
+	while (time_after(timeout, jiffies)) {
+		reg = __raw_readl(&regs->user[0].access);
+		if ((reg & USERACCESS_GO) == 0)
+			return 0;
+
+		reg = __raw_readl(&regs->control);
+		if ((reg & CONTROL_IDLE) == 0)
+			continue;
+
+		/*
+		 * An emac soft_reset may have clobbered the mdio controller's
+		 * state machine.  We need to reset and retry the current
+		 * operation
+		 */
+		dev_warn(data->dev, "resetting idled controller\n");
+		__davinci_mdio_reset(data);
+		return -EAGAIN;
+	}
+	dev_err(data->dev, "timed out waiting for user access\n");
+	return -ETIMEDOUT;
+}
+
+/* wait until hardware state machine is idle */
+static inline int wait_for_idle(struct davinci_mdio_data *data)
+{
+	struct davinci_mdio_regs __iomem *regs = data->regs;
+	unsigned long timeout = jiffies + msecs_to_jiffies(MDIO_TIMEOUT);
+
+	while (time_after(timeout, jiffies)) {
+		if (__raw_readl(&regs->control) & CONTROL_IDLE)
+			return 0;
+	}
+	dev_err(data->dev, "timed out waiting for idle\n");
+	return -ETIMEDOUT;
+}
+
+static int davinci_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg)
+{
+	struct davinci_mdio_data *data = bus->priv;
+	u32 reg;
+	int ret;
+
+	if (phy_reg & ~PHY_REG_MASK || phy_id & ~PHY_ID_MASK)
+		return -EINVAL;
+
+	spin_lock(&data->lock);
+
+	if (data->suspended) {
+		spin_unlock(&data->lock);
+		return -ENODEV;
+	}
+
+	reg = (USERACCESS_GO | USERACCESS_READ | (phy_reg << 21) |
+	       (phy_id << 16));
+
+	while (1) {
+		ret = wait_for_user_access(data);
+		if (ret == -EAGAIN)
+			continue;
+		if (ret < 0)
+			break;
+
+		__raw_writel(reg, &data->regs->user[0].access);
+
+		ret = wait_for_user_access(data);
+		if (ret == -EAGAIN)
+			continue;
+		if (ret < 0)
+			break;
+
+		reg = __raw_readl(&data->regs->user[0].access);
+		ret = (reg & USERACCESS_ACK) ? (reg & USERACCESS_DATA) : -EIO;
+		break;
+	}
+
+	spin_unlock(&data->lock);
+
+	return ret;
+}
+
+static int davinci_mdio_write(struct mii_bus *bus, int phy_id,
+			      int phy_reg, u16 phy_data)
+{
+	struct davinci_mdio_data *data = bus->priv;
+	u32 reg;
+	int ret;
+
+	if (phy_reg & ~PHY_REG_MASK || phy_id & ~PHY_ID_MASK)
+		return -EINVAL;
+
+	spin_lock(&data->lock);
+
+	if (data->suspended) {
+		spin_unlock(&data->lock);
+		return -ENODEV;
+	}
+
+	reg = (USERACCESS_GO | USERACCESS_WRITE | (phy_reg << 21) |
+		   (phy_id << 16) | (phy_data & USERACCESS_DATA));
+
+	while (1) {
+		ret = wait_for_user_access(data);
+		if (ret == -EAGAIN)
+			continue;
+		if (ret < 0)
+			break;
+
+		__raw_writel(reg, &data->regs->user[0].access);
+
+		ret = wait_for_user_access(data);
+		if (ret == -EAGAIN)
+			continue;
+		break;
+	}
+
+	spin_unlock(&data->lock);
+
+	return 0;
+}
+
+static int __devinit davinci_mdio_probe(struct platform_device *pdev)
+{
+	struct mdio_platform_data *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct davinci_mdio_data *data;
+	struct resource *res;
+	struct phy_device *phy;
+	int ret, addr;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		dev_err(dev, "failed to alloc device data\n");
+		return -ENOMEM;
+	}
+
+	data->pdata = pdata ? (*pdata) : default_pdata;
+
+	data->bus = mdiobus_alloc();
+	if (!data->bus) {
+		dev_err(dev, "failed to alloc mii bus\n");
+		ret = -ENOMEM;
+		goto bail_out;
+	}
+
+	data->bus->name		= dev_name(dev);
+	data->bus->read		= davinci_mdio_read,
+	data->bus->write	= davinci_mdio_write,
+	data->bus->reset	= davinci_mdio_reset,
+	data->bus->parent	= dev;
+	data->bus->priv		= data;
+	snprintf(data->bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
+
+	data->clk = clk_get(dev, NULL);
+	if (IS_ERR(data->clk)) {
+		data->clk = NULL;
+		dev_err(dev, "failed to get device clock\n");
+		ret = PTR_ERR(data->clk);
+		goto bail_out;
+	}
+
+	clk_enable(data->clk);
+
+	dev_set_drvdata(dev, data);
+	data->dev = dev;
+	spin_lock_init(&data->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "could not find register map resource\n");
+		ret = -ENOENT;
+		goto bail_out;
+	}
+
+	res = devm_request_mem_region(dev, res->start, resource_size(res),
+					    dev_name(dev));
+	if (!res) {
+		dev_err(dev, "could not allocate register map resource\n");
+		ret = -ENXIO;
+		goto bail_out;
+	}
+
+	data->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
+	if (!data->regs) {
+		dev_err(dev, "could not map mdio registers\n");
+		ret = -ENOMEM;
+		goto bail_out;
+	}
+
+	/* register the mii bus */
+	ret = mdiobus_register(data->bus);
+	if (ret)
+		goto bail_out;
+
+	/* scan and dump the bus */
+	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+		phy = data->bus->phy_map[addr];
+		if (phy) {
+			dev_info(dev, "phy[%d]: device %s, driver %s\n",
+				 phy->addr, dev_name(&phy->dev),
+				 phy->drv ? phy->drv->name : "unknown");
+		}
+	}
+
+	return 0;
+
+bail_out:
+	if (data->bus)
+		mdiobus_free(data->bus);
+
+	if (data->clk) {
+		clk_disable(data->clk);
+		clk_put(data->clk);
+	}
+
+	kfree(data);
+
+	return ret;
+}
+
+static int __devexit davinci_mdio_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct davinci_mdio_data *data = dev_get_drvdata(dev);
+
+	if (data->bus)
+		mdiobus_free(data->bus);
+
+	if (data->clk) {
+		clk_disable(data->clk);
+		clk_put(data->clk);
+	}
+
+	dev_set_drvdata(dev, NULL);
+
+	kfree(data);
+
+	return 0;
+}
+
+static int davinci_mdio_suspend(struct device *dev)
+{
+	struct davinci_mdio_data *data = dev_get_drvdata(dev);
+	u32 ctrl;
+
+	spin_lock(&data->lock);
+
+	/* shutdown the scan state machine */
+	ctrl = __raw_readl(&data->regs->control);
+	ctrl &= ~CONTROL_ENABLE;
+	__raw_writel(ctrl, &data->regs->control);
+	wait_for_idle(data);
+
+	if (data->clk)
+		clk_disable(data->clk);
+
+	data->suspended = true;
+	spin_unlock(&data->lock);
+
+	return 0;
+}
+
+static int davinci_mdio_resume(struct device *dev)
+{
+	struct davinci_mdio_data *data = dev_get_drvdata(dev);
+	u32 ctrl;
+
+	spin_lock(&data->lock);
+	if (data->clk)
+		clk_enable(data->clk);
+
+	/* restart the scan state machine */
+	ctrl = __raw_readl(&data->regs->control);
+	ctrl |= CONTROL_ENABLE;
+	__raw_writel(ctrl, &data->regs->control);
+
+	data->suspended = false;
+	spin_unlock(&data->lock);
+
+	return 0;
+}
+
+static const struct dev_pm_ops davinci_mdio_pm_ops = {
+	.suspend	= davinci_mdio_suspend,
+	.resume		= davinci_mdio_resume,
+};
+
+static struct platform_driver davinci_mdio_driver = {
+	.driver = {
+		.name	 = "davinci_mdio",
+		.owner	 = THIS_MODULE,
+		.pm	 = &davinci_mdio_pm_ops,
+	},
+	.probe = davinci_mdio_probe,
+	.remove = __devexit_p(davinci_mdio_remove),
+};
+
+static int __init davinci_mdio_init(void)
+{
+	return platform_driver_register(&davinci_mdio_driver);
+}
+device_initcall(davinci_mdio_init);
+
+static void __exit davinci_mdio_exit(void)
+{
+	platform_driver_unregister(&davinci_mdio_driver);
+}
+module_exit(davinci_mdio_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DaVinci MDIO driver");
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 7c930dba477c..a04fd8c9fbdf 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -14,6 +14,10 @@
 #include <linux/if_ether.h>
 #include <linux/memory.h>
 
+struct mdio_platform_data {
+	unsigned long		bus_freq;
+};
+
 struct emac_platform_data {
 	char mac_addr[ETH_ALEN];
 	u32 ctrl_reg_offset;
-- 
cgit v1.2.3


From 5d69e0076a726588265af040b21ac3f8266856d1 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Wed, 15 Sep 2010 10:11:24 -0400
Subject: net: davinci_emac: switch to new mdio

This patch switches the emac implementation over to the newly separated
MDIO driver.

With this, the mdio bus frequency defaults to a safe 2.2MHz.  Boards may
optionally specify a bus frequency via platform data.

The phy identification scheme has been modified to use a phy bus id instead
of a mask.  This largely serves to eliminate the "phy search" code in emac
init.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: Michael Williamson <michael.williamson@criticallink.com>
Tested-by: Caglar Akyuz <caglarakyuz@gmail.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 drivers/net/Kconfig          |  1 +
 drivers/net/davinci_emac.c   | 88 +++++++++++++++-----------------------------
 include/linux/davinci_emac.h |  9 +++++
 3 files changed, 40 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c5c86e0e14a1..911e7f148107 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -951,6 +951,7 @@ config NET_NETX
 config TI_DAVINCI_EMAC
 	tristate "TI DaVinci EMAC Support"
 	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
+	select TI_DAVINCI_MDIO
 	select PHYLIB
 	help
 	  This driver supports TI's DaVinci Ethernet .
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 7fbd052ddb0a..997f5995fce1 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -500,6 +500,7 @@ struct emac_priv {
 	u32 phy_mask;
 	/* mii_bus,phy members */
 	struct mii_bus *mii_bus;
+	const char *phy_id;
 	struct phy_device *phydev;
 	spinlock_t lock;
 	/*platform specific members*/
@@ -686,7 +687,7 @@ static int emac_get_settings(struct net_device *ndev,
 			     struct ethtool_cmd *ecmd)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phy_mask)
+	if (priv->phydev)
 		return phy_ethtool_gset(priv->phydev, ecmd);
 	else
 		return -EOPNOTSUPP;
@@ -704,7 +705,7 @@ static int emac_get_settings(struct net_device *ndev,
 static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
-	if (priv->phy_mask)
+	if (priv->phydev)
 		return phy_ethtool_sset(priv->phydev, ecmd);
 	else
 		return -EOPNOTSUPP;
@@ -841,7 +842,7 @@ static void emac_update_phystatus(struct emac_priv *priv)
 	mac_control = emac_read(EMAC_MACCONTROL);
 	cur_duplex = (mac_control & EMAC_MACCONTROL_FULLDUPLEXEN) ?
 			DUPLEX_FULL : DUPLEX_HALF;
-	if (priv->phy_mask)
+	if (priv->phydev)
 		new_duplex = priv->phydev->duplex;
 	else
 		new_duplex = DUPLEX_FULL;
@@ -2485,6 +2486,11 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
 	return -EOPNOTSUPP;
 }
 
+static int match_first_device(struct device *dev, void *data)
+{
+	return 1;
+}
+
 /**
  * emac_dev_open: EMAC device open
  * @ndev: The DaVinci EMAC network adapter
@@ -2499,7 +2505,6 @@ static int emac_dev_open(struct net_device *ndev)
 {
 	struct device *emac_dev = &ndev->dev;
 	u32 rc, cnt, ch;
-	int phy_addr;
 	struct resource *res;
 	int q, m;
 	int i = 0;
@@ -2560,28 +2565,26 @@ static int emac_dev_open(struct net_device *ndev)
 		emac_set_coalesce(ndev, &coal);
 	}
 
-	/* find the first phy */
 	priv->phydev = NULL;
-	if (priv->phy_mask) {
-		emac_mii_reset(priv->mii_bus);
-		for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
-			if (priv->mii_bus->phy_map[phy_addr]) {
-				priv->phydev = priv->mii_bus->phy_map[phy_addr];
-				break;
-			}
-		}
+	/* use the first phy on the bus if pdata did not give us a phy id */
+	if (!priv->phy_id) {
+		struct device *phy;
 
-		if (!priv->phydev) {
-			printk(KERN_ERR "%s: no PHY found\n", ndev->name);
-			return -1;
-		}
+		phy = bus_find_device(&mdio_bus_type, NULL, NULL,
+				      match_first_device);
+		if (phy)
+			priv->phy_id = dev_name(phy);
+	}
 
-		priv->phydev = phy_connect(ndev, dev_name(&priv->phydev->dev),
-				&emac_adjust_link, 0, PHY_INTERFACE_MODE_MII);
+	if (priv->phy_id && *priv->phy_id) {
+		priv->phydev = phy_connect(ndev, priv->phy_id,
+					   &emac_adjust_link, 0,
+					   PHY_INTERFACE_MODE_MII);
 
 		if (IS_ERR(priv->phydev)) {
-			printk(KERN_ERR "%s: Could not attach to PHY\n",
-								ndev->name);
+			dev_err(emac_dev, "could not connect to phy %s\n",
+				priv->phy_id);
+			priv->phydev = NULL;
 			return PTR_ERR(priv->phydev);
 		}
 
@@ -2589,12 +2592,13 @@ static int emac_dev_open(struct net_device *ndev)
 		priv->speed = 0;
 		priv->duplex = ~0;
 
-		printk(KERN_INFO "%s: attached PHY driver [%s] "
-			"(mii_bus:phy_addr=%s, id=%x)\n", ndev->name,
+		dev_info(emac_dev, "attached PHY driver [%s] "
+			"(mii_bus:phy_addr=%s, id=%x)\n",
 			priv->phydev->drv->name, dev_name(&priv->phydev->dev),
 			priv->phydev->phy_id);
-	} else{
+	} else {
 		/* No PHY , fix the link, speed and duplex settings */
+		dev_notice(emac_dev, "no phy, defaulting to 100/full\n");
 		priv->link = 1;
 		priv->speed = SPEED_100;
 		priv->duplex = DUPLEX_FULL;
@@ -2607,7 +2611,7 @@ static int emac_dev_open(struct net_device *ndev)
 	if (netif_msg_drv(priv))
 		dev_notice(emac_dev, "DaVinci EMAC: Opened %s\n", ndev->name);
 
-	if (priv->phy_mask)
+	if (priv->phydev)
 		phy_start(priv->phydev);
 
 	return 0;
@@ -2794,7 +2798,7 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 
 	/* MAC addr and PHY mask , RMII enable info from platform_data */
 	memcpy(priv->mac_addr, pdata->mac_addr, 6);
-	priv->phy_mask = pdata->phy_mask;
+	priv->phy_id = pdata->phy_id;
 	priv->rmii_en = pdata->rmii_en;
 	priv->version = pdata->version;
 	priv->int_enable = pdata->interrupt_enable;
@@ -2871,32 +2875,6 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	}
 
 
-	/* MII/Phy intialisation, mdio bus registration */
-	emac_mii = mdiobus_alloc();
-	if (emac_mii == NULL) {
-		dev_err(emac_dev, "DaVinci EMAC: Error allocating mii_bus\n");
-		rc = -ENOMEM;
-		goto mdio_alloc_err;
-	}
-
-	priv->mii_bus = emac_mii;
-	emac_mii->name  = "emac-mii",
-	emac_mii->read  = emac_mii_read,
-	emac_mii->write = emac_mii_write,
-	emac_mii->reset = emac_mii_reset,
-	emac_mii->irq   = mii_irqs,
-	emac_mii->phy_mask = ~(priv->phy_mask);
-	emac_mii->parent = &pdev->dev;
-	emac_mii->priv = priv->remap_addr + pdata->mdio_reg_offset;
-	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", priv->pdev->id);
-	mdio_max_freq = pdata->mdio_max_freq;
-	emac_mii->reset(emac_mii);
-
-	/* Register the MII bus */
-	rc = mdiobus_register(emac_mii);
-	if (rc)
-		goto mdiobus_quit;
-
 	if (netif_msg_probe(priv)) {
 		dev_notice(emac_dev, "DaVinci EMAC Probe found device "\
 			   "(regs: %p, irq: %d)\n",
@@ -2904,11 +2882,7 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	}
 	return 0;
 
-mdiobus_quit:
-	mdiobus_free(emac_mii);
-
 netdev_reg_err:
-mdio_alloc_err:
 	clk_disable(emac_clk);
 no_irq_res:
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2938,8 +2912,6 @@ static int __devexit davinci_emac_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
 
 	release_mem_region(res->start, res->end - res->start + 1);
 
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index a04fd8c9fbdf..46a759f0c082 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -28,6 +28,15 @@ struct emac_platform_data {
 	u32 ctrl_ram_size;
 	u32 phy_mask;
 	u32 mdio_max_freq;
+
+	/*
+	 * phy_id can be one of the following:
+	 *   - NULL		: use the first phy on the bus,
+	 *   - ""		: force to 100/full, no mdio control
+	 *   - "<bus>:<addr>"	: use the specified bus and phy
+	 */
+	const char *phy_id;
+
 	u8 rmii_en;
 	u8 version;
 	void (*interrupt_enable) (void);
-- 
cgit v1.2.3


From 7b3742aff1a9946b9b25f16d6a7ca22c10926391 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Wed, 15 Sep 2010 10:11:27 -0400
Subject: net: davinci_emac: cleanup unused mdio emac code

This patch removes code that has been rendered useless by the previous patches
in this series.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: Michael Williamson <michael.williamson@criticallink.com>
Tested-by: Caglar Akyuz <caglarakyuz@gmail.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 drivers/net/davinci_emac.c   | 107 -------------------------------------------
 include/linux/davinci_emac.h |   3 --
 2 files changed, 110 deletions(-)

(limited to 'include')

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 997f5995fce1..d4298cb23b4d 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -113,7 +113,6 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
 #define EMAC_DEF_MAX_FRAME_SIZE		(1500 + 14 + 4 + 4)
 #define EMAC_DEF_TX_CH			(0) /* Default 0th channel */
 #define EMAC_DEF_RX_CH			(0) /* Default 0th channel */
-#define EMAC_DEF_MDIO_TICK_MS		(10) /* typically 1 tick=1 ms) */
 #define EMAC_DEF_MAX_TX_CH		(1) /* Max TX channels configured */
 #define EMAC_DEF_MAX_RX_CH		(1) /* Max RX channels configured */
 #define EMAC_POLL_WEIGHT		(64) /* Default NAPI poll weight */
@@ -303,25 +302,6 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
 #define EMAC_DM644X_INTMIN_INTVL	0x1
 #define EMAC_DM644X_INTMAX_INTVL	(EMAC_DM644X_EWINTCNT_MASK)
 
-/* EMAC MDIO related */
-/* Mask & Control defines */
-#define MDIO_CONTROL_CLKDIV	(0xFF)
-#define MDIO_CONTROL_ENABLE	BIT(30)
-#define MDIO_USERACCESS_GO	BIT(31)
-#define MDIO_USERACCESS_WRITE	BIT(30)
-#define MDIO_USERACCESS_READ	(0)
-#define MDIO_USERACCESS_REGADR	(0x1F << 21)
-#define MDIO_USERACCESS_PHYADR	(0x1F << 16)
-#define MDIO_USERACCESS_DATA	(0xFFFF)
-#define MDIO_USERPHYSEL_LINKSEL	BIT(7)
-#define MDIO_VER_MODID		(0xFFFF << 16)
-#define MDIO_VER_REVMAJ		(0xFF   << 8)
-#define MDIO_VER_REVMIN		(0xFF)
-
-#define MDIO_USERACCESS(inst)	(0x80 + (inst * 8))
-#define MDIO_USERPHYSEL(inst)	(0x84 + (inst * 8))
-#define MDIO_CONTROL		(0x04)
-
 /* EMAC DM646X control module registers */
 #define EMAC_DM646X_CMINTCTRL	0x0C
 #define EMAC_DM646X_CMRXINTEN	0x14
@@ -493,13 +473,6 @@ struct emac_priv {
 	u32 mac_hash2;
 	u32 multicast_hash_cnt[EMAC_NUM_MULTICAST_BITS];
 	u32 rx_addr_type;
-	/* periodic timer required for MDIO polling */
-	struct timer_list periodic_timer;
-	u32 periodic_ticks;
-	u32 timer_active;
-	u32 phy_mask;
-	/* mii_bus,phy members */
-	struct mii_bus *mii_bus;
 	const char *phy_id;
 	struct phy_device *phydev;
 	spinlock_t lock;
@@ -511,7 +484,6 @@ struct emac_priv {
 /* clock frequency for EMAC */
 static struct clk *emac_clk;
 static unsigned long emac_bus_frequency;
-static unsigned long mdio_max_freq;
 
 #define emac_virt_to_phys(addr, priv) \
 	(((u32 __force)(addr) - (u32 __force)(priv->emac_ctrl_ram)) \
@@ -549,9 +521,6 @@ static char *emac_rxhost_errcodes[16] = {
 #define emac_ctrl_read(reg)	  ioread32((priv->ctrl_base + (reg)))
 #define emac_ctrl_write(reg, val) iowrite32(val, (priv->ctrl_base + (reg)))
 
-#define emac_mdio_read(reg)	  ioread32(bus->priv + (reg))
-#define emac_mdio_write(reg, val) iowrite32(val, (bus->priv + (reg)))
-
 /**
  * emac_dump_regs: Dump important EMAC registers to debug terminal
  * @priv: The DaVinci EMAC private adapter structure
@@ -657,9 +626,6 @@ static void emac_dump_regs(struct emac_priv *priv)
 		emac_read(EMAC_RXDMAOVERRUNS));
 }
 
-/*************************************************************************
- *  EMAC MDIO/Phy Functionality
- *************************************************************************/
 /**
  * emac_get_drvinfo: Get EMAC driver information
  * @ndev: The DaVinci EMAC network adapter
@@ -2349,79 +2315,6 @@ void emac_poll_controller(struct net_device *ndev)
 }
 #endif
 
-/* PHY/MII bus related */
-
-/* Wait until mdio is ready for next command */
-#define MDIO_WAIT_FOR_USER_ACCESS\
-		while ((emac_mdio_read((MDIO_USERACCESS(0))) &\
-			MDIO_USERACCESS_GO) != 0)
-
-static int emac_mii_read(struct mii_bus *bus, int phy_id, int phy_reg)
-{
-	unsigned int phy_data = 0;
-	unsigned int phy_control;
-
-	/* Wait until mdio is ready for next command */
-	MDIO_WAIT_FOR_USER_ACCESS;
-
-	phy_control = (MDIO_USERACCESS_GO |
-		       MDIO_USERACCESS_READ |
-		       ((phy_reg << 21) & MDIO_USERACCESS_REGADR) |
-		       ((phy_id << 16) & MDIO_USERACCESS_PHYADR) |
-		       (phy_data & MDIO_USERACCESS_DATA));
-	emac_mdio_write(MDIO_USERACCESS(0), phy_control);
-
-	/* Wait until mdio is ready for next command */
-	MDIO_WAIT_FOR_USER_ACCESS;
-
-	return emac_mdio_read(MDIO_USERACCESS(0)) & MDIO_USERACCESS_DATA;
-
-}
-
-static int emac_mii_write(struct mii_bus *bus, int phy_id,
-			  int phy_reg, u16 phy_data)
-{
-
-	unsigned int control;
-
-	/*  until mdio is ready for next command */
-	MDIO_WAIT_FOR_USER_ACCESS;
-
-	control = (MDIO_USERACCESS_GO |
-		   MDIO_USERACCESS_WRITE |
-		   ((phy_reg << 21) & MDIO_USERACCESS_REGADR) |
-		   ((phy_id << 16) & MDIO_USERACCESS_PHYADR) |
-		   (phy_data & MDIO_USERACCESS_DATA));
-	emac_mdio_write(MDIO_USERACCESS(0), control);
-
-	return 0;
-}
-
-static int emac_mii_reset(struct mii_bus *bus)
-{
-	unsigned int clk_div;
-	int mdio_bus_freq = emac_bus_frequency;
-
-	if (mdio_max_freq && mdio_bus_freq)
-		clk_div = ((mdio_bus_freq / mdio_max_freq) - 1);
-	else
-		clk_div = 0xFF;
-
-	clk_div &= MDIO_CONTROL_CLKDIV;
-
-	/* Set enable and clock divider in MDIOControl */
-	emac_mdio_write(MDIO_CONTROL, (clk_div | MDIO_CONTROL_ENABLE));
-
-	return 0;
-
-}
-
-static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, PHY_POLL };
-
-/* emac_driver: EMAC MII bus structure */
-
-static struct mii_bus *emac_mii;
-
 static void emac_adjust_link(struct net_device *ndev)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 46a759f0c082..5dd428532f79 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -24,10 +24,7 @@ struct emac_platform_data {
 	u32 ctrl_mod_reg_offset;
 	u32 ctrl_ram_offset;
 	u32 hw_ram_addr;
-	u32 mdio_reg_offset;
 	u32 ctrl_ram_size;
-	u32 phy_mask;
-	u32 mdio_max_freq;
 
 	/*
 	 * phy_id can be one of the following:
-- 
cgit v1.2.3


From eb7d3066cf864342e8ae6a5c1126a1602c4d06c0 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Tue, 21 Sep 2010 21:36:18 +0200
Subject: mac80211: clear txflags for ps-filtered frames

This patch fixes stale mac80211_tx_control_flags for
filtered / retried frames.

Because ieee80211_handle_filtered_frame feeds skbs back
into the tx path, they have to be stripped of some tx
flags so they won't confuse the stack, driver or device.

Cc: <stable@kernel.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 16 ++++++++++++++++
 net/mac80211/status.c  |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 12a49f0ba32c..5d1187d7c5e5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -321,6 +321,9 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame
  * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this
  *	frame and selects the maximum number of streams that it can use.
+ *
+ * Note: If you have to add new flags to the enumeration, then don't
+ *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -350,6 +353,19 @@ enum mac80211_tx_control_flags {
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
 
+/*
+ * This definition is used as a mask to clear all temporary flags, which are
+ * set by the tx handlers for each transmission attempt by the mac80211 stack.
+ */
+#define IEEE80211_TX_TEMPORARY_FLAGS (IEEE80211_TX_CTL_NO_ACK |		      \
+	IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT |    \
+	IEEE80211_TX_CTL_SEND_AFTER_DTIM | IEEE80211_TX_CTL_AMPDU |	      \
+	IEEE80211_TX_STAT_TX_FILTERED |	IEEE80211_TX_STAT_ACK |		      \
+	IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_STAT_AMPDU_NO_BACK |	      \
+	IEEE80211_TX_CTL_RATE_CTRL_PROBE | IEEE80211_TX_CTL_PSPOLL_RESPONSE | \
+	IEEE80211_TX_CTL_MORE_FRAMES | IEEE80211_TX_CTL_LDPC |		      \
+	IEEE80211_TX_CTL_STBC)
+
 /**
  * enum mac80211_rate_control_flags - per-rate flags set by the
  *	Rate Control algorithm.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 571b32bfc54c..dd85006c4fe8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -58,6 +58,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	info->control.vif = &sta->sdata->vif;
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
 		       IEEE80211_TX_INTFL_RETRANSMISSION;
+	info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
 
 	sta->tx_filtered_count++;
 
-- 
cgit v1.2.3


From a210080195c95ebca2a517ee3057d71607aa65e0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 16 Sep 2010 00:30:43 +0200
Subject: ACPI / ACPICA: Defer enabling of runtime GPEs (v3)

The current ACPI GPEs initialization code has a problem that it
enables some GPEs pointed to by device _PRW methods, generally
intended for signaling wakeup events (system or device wakeup).
These GPEs are then almost immediately disabled by the ACPI namespace
scanning code with the help of acpi_gpe_can_wake(), but it would be
better not to enable them at all until really necessary.

Modify the initialization of GPEs so that the ones that have
associated _Lxx or _Exx methods and are not pointed to by any _PRW
methods will be enabled after the namespace scan is complete.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acevents.h  |  5 ++--
 drivers/acpi/acpica/acglobal.h  |  1 +
 drivers/acpi/acpica/aclocal.h   |  3 +-
 drivers/acpi/acpica/evevent.c   | 41 ---------------------------
 drivers/acpi/acpica/evgpeblk.c  | 47 ++++++++++++++-----------------
 drivers/acpi/acpica/evgpeinit.c | 31 +--------------------
 drivers/acpi/acpica/evxface.c   | 19 +++++++------
 drivers/acpi/acpica/evxfevnt.c  | 61 +++++++++++++++++++++++++++++------------
 drivers/acpi/acpica/utglobal.c  |  1 +
 drivers/acpi/acpica/utxface.c   | 13 ---------
 drivers/acpi/dock.c             |  1 +
 drivers/acpi/scan.c             |  9 +++++-
 include/acpi/acpixf.h           |  2 ++
 13 files changed, 94 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 36867cd70eac..a6f99cc37a19 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -105,8 +105,9 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
 			 struct acpi_gpe_block_info **return_gpe_block);
 
 acpi_status
-acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
-			     struct acpi_gpe_block_info *gpe_block);
+acpi_ev_initialize_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+			     struct acpi_gpe_block_info *gpe_block,
+			     void *ignored);
 
 acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block);
 
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 1d192142c691..c1a6a6744aea 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -364,6 +364,7 @@ ACPI_EXTERN struct acpi_fixed_event_handler
 ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head;
 ACPI_EXTERN struct acpi_gpe_block_info
 *acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS];
+ACPI_EXTERN u8 acpi_all_gpes_initialized;
 
 /*****************************************************************************
  *
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index df85b53a674f..5e0449106091 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -413,6 +413,7 @@ struct acpi_handler_info {
 	void *context;		/* Context to be passed to handler */
 	struct acpi_namespace_node *method_node;	/* Method node for this GPE level (saved) */
 	u8 orig_flags;		/* Original misc info about this GPE */
+	u8 orig_enabled;	/* Set if the GPE was originally enabled */
 };
 
 union acpi_gpe_dispatch_info {
@@ -457,6 +458,7 @@ struct acpi_gpe_block_info {
 	u32 register_count;	/* Number of register pairs in block */
 	u16 gpe_count;		/* Number of individual GPEs in block */
 	u8 block_base_number;	/* Base GPE number for this block */
+	u8 initialized;         /* If set, the GPE block has been initialized */
 };
 
 /* Information about GPE interrupt handlers, one per each interrupt level used for GPEs */
@@ -473,7 +475,6 @@ struct acpi_gpe_walk_info {
 	struct acpi_gpe_block_info *gpe_block;
 	u16 count;
 	acpi_owner_id owner_id;
-	u8 enable_this_gpe;
 	u8 execute_by_owner_id;
 };
 
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index 303618889da0..c61c3039c31a 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -93,47 +93,6 @@ acpi_status acpi_ev_initialize_events(void)
 	return_ACPI_STATUS(status);
 }
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_install_fadt_gpes
- *
- * PARAMETERS:  None
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Completes initialization of the FADT-defined GPE blocks
- *              (0 and 1). The HW must be fully initialized at this point,
- *              including global lock support.
- *
- ******************************************************************************/
-
-acpi_status acpi_ev_install_fadt_gpes(void)
-{
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ev_install_fadt_gpes);
-
-	/* Namespace must be locked */
-
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		return (status);
-	}
-
-	/* FADT GPE Block 0 */
-
-	(void)acpi_ev_initialize_gpe_block(acpi_gbl_fadt_gpe_device,
-					   acpi_gbl_gpe_fadt_blocks[0]);
-
-	/* FADT GPE Block 1 */
-
-	(void)acpi_ev_initialize_gpe_block(acpi_gbl_fadt_gpe_device,
-					   acpi_gbl_gpe_fadt_blocks[1]);
-
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-	return_ACPI_STATUS(AE_OK);
-}
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ev_install_xrupt_handlers
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 85445fb5844e..020add3eee1c 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -363,6 +363,7 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
 	gpe_block->gpe_count = (u16)(register_count * ACPI_GPE_REGISTER_WIDTH);
 	gpe_block->register_count = register_count;
 	gpe_block->block_base_number = gpe_block_base_number;
+	gpe_block->initialized = FALSE;
 
 	ACPI_MEMCPY(&gpe_block->block_address, gpe_block_address,
 		    sizeof(struct acpi_generic_address));
@@ -385,11 +386,12 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
 		return_ACPI_STATUS(status);
 	}
 
+	acpi_all_gpes_initialized = FALSE;
+
 	/* Find all GPE methods (_Lxx or_Exx) for this block */
 
 	walk_info.gpe_block = gpe_block;
 	walk_info.gpe_device = gpe_device;
-	walk_info.enable_this_gpe = FALSE;
 	walk_info.execute_by_owner_id = FALSE;
 
 	status = acpi_ns_walk_namespace(ACPI_TYPE_METHOD, gpe_device,
@@ -434,35 +436,34 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device,
  ******************************************************************************/
 
 acpi_status
-acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
-			     struct acpi_gpe_block_info *gpe_block)
+acpi_ev_initialize_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+			     struct acpi_gpe_block_info *gpe_block,
+			     void *ignored)
 {
 	acpi_status status;
 	struct acpi_gpe_event_info *gpe_event_info;
 	u32 gpe_enabled_count;
 	u32 gpe_index;
-	u32 gpe_number;
 	u32 i;
 	u32 j;
 
 	ACPI_FUNCTION_TRACE(ev_initialize_gpe_block);
 
-	/* Ignore a null GPE block (e.g., if no GPE block 1 exists) */
-
-	if (!gpe_block) {
+	/*
+	 * Ignore a null GPE block (e.g., if no GPE block 1 exists) and
+	 * GPE blocks that have been initialized already.
+	 */
+	if (!gpe_block || gpe_block->initialized) {
 		return_ACPI_STATUS(AE_OK);
 	}
 
 	/*
-	 * Enable all GPEs that have a corresponding method.  Any other GPEs
-	 * within this block must be enabled via the acpi_enable_gpe interface.
+	 * Enable all GPEs that have a corresponding method and have the
+	 * ACPI_GPE_CAN_WAKE flag unset.  Any other GPEs within this block must
+	 * be enabled via the acpi_enable_gpe() interface.
 	 */
 	gpe_enabled_count = 0;
 
-	if (gpe_device == acpi_gbl_fadt_gpe_device) {
-		gpe_device = NULL;
-	}
-
 	for (i = 0; i < gpe_block->register_count; i++) {
 		for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) {
 
@@ -470,27 +471,19 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 
 			gpe_index = (i * ACPI_GPE_REGISTER_WIDTH) + j;
 			gpe_event_info = &gpe_block->event_info[gpe_index];
-			gpe_number = gpe_index + gpe_block->block_base_number;
 
 			/* Ignore GPEs that have no corresponding _Lxx/_Exx method */
 
-			if (!(gpe_event_info->flags & ACPI_GPE_DISPATCH_METHOD)) {
+			if (!(gpe_event_info->flags & ACPI_GPE_DISPATCH_METHOD)
+			    || (gpe_event_info->flags & ACPI_GPE_CAN_WAKE)) {
 				continue;
 			}
 
-			/*
-			 * If the GPE has already been enabled for runtime
-			 * signaling, make sure it remains enabled, but do not
-			 * increment its reference counter.
-			 */
-			status = gpe_event_info->runtime_count ?
-				acpi_ev_enable_gpe(gpe_event_info) :
-				acpi_enable_gpe(gpe_device, gpe_number);
-
+			status = acpi_raw_enable_gpe(gpe_event_info);
 			if (ACPI_FAILURE(status)) {
 				ACPI_EXCEPTION((AE_INFO, status,
-						"Could not enable GPE 0x%02X",
-						gpe_number));
+					"Could not enable GPE 0x%02X",
+					gpe_index + gpe_block->block_base_number));
 				continue;
 			}
 
@@ -504,5 +497,7 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device,
 				  gpe_enabled_count));
 	}
 
+	gpe_block->initialized = TRUE;
+
 	return_ACPI_STATUS(AE_OK);
 }
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 3084c5de1bba..2c7def95f721 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -210,8 +210,7 @@ acpi_status acpi_ev_gpe_initialize(void)
  *
  * DESCRIPTION: Check for new GPE methods (_Lxx/_Exx) made available as a
  *              result of a Load() or load_table() operation. If new GPE
- *              methods have been installed, register the new methods and
- *              enable and runtime GPEs that are associated with them.
+ *              methods have been installed, register the new methods.
  *
  ******************************************************************************/
 
@@ -239,7 +238,6 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
 	walk_info.owner_id = table_owner_id;
 	walk_info.execute_by_owner_id = TRUE;
 	walk_info.count = 0;
-	walk_info.enable_this_gpe = TRUE;
 
 	/* Walk the interrupt level descriptor list */
 
@@ -301,8 +299,6 @@ void acpi_ev_update_gpes(acpi_owner_id table_owner_id)
  *
  * If walk_info->execute_by_owner_id is TRUE, we only execute examine GPE methods
  *    with that owner.
- * If walk_info->enable_this_gpe is TRUE, the GPE that is referred to by a GPE
- *    method is immediately enabled (Used for Load/load_table operators)
  *
  ******************************************************************************/
 
@@ -315,8 +311,6 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	struct acpi_gpe_walk_info *walk_info =
 	    ACPI_CAST_PTR(struct acpi_gpe_walk_info, context);
 	struct acpi_gpe_event_info *gpe_event_info;
-	struct acpi_namespace_node *gpe_device;
-	acpi_status status;
 	u32 gpe_number;
 	char name[ACPI_NAME_SIZE + 1];
 	u8 type;
@@ -421,29 +415,6 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	gpe_event_info->flags |= (u8)(type | ACPI_GPE_DISPATCH_METHOD);
 	gpe_event_info->dispatch.method_node = method_node;
 
-	/*
-	 * Enable this GPE if requested. This only happens when during the
-	 * execution of a Load or load_table operator. We have found a new
-	 * GPE method and want to immediately enable the GPE if it is a
-	 * runtime GPE.
-	 */
-	if (walk_info->enable_this_gpe) {
-
-		walk_info->count++;
-		gpe_device = walk_info->gpe_device;
-
-		if (gpe_device == acpi_gbl_fadt_gpe_device) {
-			gpe_device = NULL;
-		}
-
-		status = acpi_enable_gpe(gpe_device, gpe_number);
-		if (ACPI_FAILURE(status)) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"Could not enable GPE 0x%02X",
-					gpe_number));
-		}
-	}
-
 	ACPI_DEBUG_PRINT((ACPI_DB_LOAD,
 			  "Registered GPE method %s as GPE number 0x%.2X\n",
 			  name, gpe_number));
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index 14e48add32fa..36af222cac65 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -726,15 +726,16 @@ acpi_install_gpe_handler(acpi_handle gpe_device,
 			(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
 
 	/*
-	 * If the GPE is associated with a method and it cannot wake up the
-	 * system from sleep states, it was enabled automatically during
-	 * initialization, so it has to be disabled now to avoid spurious
-	 * execution of the handler.
+	 * If the GPE is associated with a method, it might have been enabled
+	 * automatically during initialization, in which case it has to be
+	 * disabled now to avoid spurious execution of the handler.
 	 */
 
 	if ((handler->orig_flags & ACPI_GPE_DISPATCH_METHOD)
-	    && !(gpe_event_info->flags & ACPI_GPE_CAN_WAKE))
+	    && gpe_event_info->runtime_count) {
+		handler->orig_enabled = 1;
 		(void)acpi_raw_disable_gpe(gpe_event_info);
+	}
 
 	/* Install the handler */
 
@@ -837,13 +838,13 @@ acpi_remove_gpe_handler(acpi_handle gpe_device,
 	gpe_event_info->flags |= handler->orig_flags;
 
 	/*
-	 * If the GPE was previously associated with a method and it cannot wake
-	 * up the system from sleep states, it should be enabled at this point
-	 * to restore the post-initialization configuration.
+	 * If the GPE was previously associated with a method and it was
+	 * enabled, it should be enabled at this point to restore the
+	 * post-initialization configuration.
 	 */
 
 	if ((handler->orig_flags & ACPI_GPE_DISPATCH_METHOD)
-	    && !(gpe_event_info->flags & ACPI_GPE_CAN_WAKE))
+	    && handler->orig_enabled)
 		(void)acpi_raw_enable_gpe(gpe_event_info);
 
 	/* Now we can free the handler object */
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index 304825528d48..a1dabe3fd8ae 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -379,21 +379,12 @@ acpi_status acpi_gpe_can_wake(acpi_handle gpe_device, u32 gpe_number)
 	/* Ensure that we have a valid GPE number */
 
 	gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
-	if (!gpe_event_info) {
+	if (gpe_event_info) {
+		gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
+	} else {
 		status = AE_BAD_PARAMETER;
-		goto unlock_and_exit;
-	}
-
-	if (gpe_event_info->flags & ACPI_GPE_CAN_WAKE) {
-		goto unlock_and_exit;
 	}
 
-	gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
-	if (gpe_event_info->flags & ACPI_GPE_DISPATCH_METHOD) {
-		(void)acpi_raw_disable_gpe(gpe_event_info);
-	}
-
-unlock_and_exit:
 	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
 	return_ACPI_STATUS(status);
 }
@@ -651,7 +642,7 @@ acpi_install_gpe_block(acpi_handle gpe_device,
 		       struct acpi_generic_address *gpe_block_address,
 		       u32 register_count, u32 interrupt_number)
 {
-	acpi_status status;
+	acpi_status status = AE_OK;
 	union acpi_operand_object *obj_desc;
 	struct acpi_namespace_node *node;
 	struct acpi_gpe_block_info *gpe_block;
@@ -715,10 +706,6 @@ acpi_install_gpe_block(acpi_handle gpe_device,
 
 	obj_desc->device.gpe_block = gpe_block;
 
-	/* Enable the runtime GPEs in the new block */
-
-	status = acpi_ev_initialize_gpe_block(node, gpe_block);
-
       unlock_and_exit:
 	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	return_ACPI_STATUS(status);
@@ -924,3 +911,43 @@ acpi_status acpi_enable_all_runtime_gpes(void)
 
 	return_ACPI_STATUS(status);
 }
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_update_gpes
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Enable all GPEs that have associated _Lxx or _Exx methods and
+ *              are not pointed to by any device _PRW methods indicating that
+ *              these GPEs are generally intended for system or device wakeup
+ *              (such GPEs have to be enabled directly when the devices whose
+ *              _PRW methods point to them are set up for wakeup signaling).
+ *
+ ******************************************************************************/
+
+acpi_status acpi_update_gpes(void)
+{
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(acpi_update_gpes);
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	} else if (acpi_all_gpes_initialized) {
+		goto unlock;
+	}
+
+	status = acpi_ev_walk_gpe_list(acpi_ev_initialize_gpe_block, NULL);
+	if (ACPI_SUCCESS(status)) {
+		acpi_all_gpes_initialized = TRUE;
+	}
+
+unlock:
+	(void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);
+
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 0558747579ef..75faf6790e5e 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -766,6 +766,7 @@ acpi_status acpi_ut_init_globals(void)
 	acpi_gbl_gpe_fadt_blocks[0] = NULL;
 	acpi_gbl_gpe_fadt_blocks[1] = NULL;
 	acpi_current_gpe_count = 0;
+	acpi_all_gpes_initialized = FALSE;
 
 	/* Global handlers */
 
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 7f8cefcb2b32..16cc48c2744b 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -289,19 +289,6 @@ acpi_status acpi_initialize_objects(u32 flags)
 		}
 	}
 
-	/*
-	 * Complete the GPE initialization for the GPE blocks defined in the FADT
-	 * (GPE block 0 and 1).
-	 *
-	 * NOTE: Currently, there seems to be no need to run the _REG methods
-	 * before enabling the GPEs.
-	 */
-	if (!(flags & ACPI_NO_EVENT_INIT)) {
-		status = acpi_ev_install_fadt_gpes();
-		if (ACPI_FAILURE(status))
-			return (status);
-	}
-
 	/*
 	 * Empty the caches (delete the cached objects) on the assumption that
 	 * the table load filled them up more than they will be at runtime --
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 3fe29e992be8..44f99bd83741 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -725,6 +725,7 @@ static void dock_notify(acpi_handle handle, u32 event, void *data)
 			complete_dock(ds);
 			dock_event(ds, event, DOCK_EVENT);
 			dock_lock(ds, 1);
+			acpi_update_gpes();
 			break;
 		}
 		if (dock_present(ds) || dock_in_progress(ds))
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b23825ecfa37..f87f04603b68 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1431,6 +1431,7 @@ EXPORT_SYMBOL(acpi_bus_add);
 int acpi_bus_start(struct acpi_device *device)
 {
 	struct acpi_bus_ops ops;
+	int result;
 
 	if (!device)
 		return -EINVAL;
@@ -1438,7 +1439,11 @@ int acpi_bus_start(struct acpi_device *device)
 	memset(&ops, 0, sizeof(ops));
 	ops.acpi_op_start = 1;
 
-	return acpi_bus_scan(device->handle, &ops, NULL);
+	result = acpi_bus_scan(device->handle, &ops, NULL);
+
+	acpi_update_gpes();
+
+	return result;
 }
 EXPORT_SYMBOL(acpi_bus_start);
 
@@ -1552,6 +1557,8 @@ int __init acpi_scan_init(void)
 
 	if (result)
 		acpi_device_unregister(acpi_root, ACPI_BUS_REMOVAL_NORMAL);
+	else
+		acpi_update_gpes();
 
 	return result;
 }
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c0786d446a00..87b0f31d18f3 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -308,6 +308,8 @@ acpi_install_gpe_block(acpi_handle gpe_device,
 
 acpi_status acpi_remove_gpe_block(acpi_handle gpe_device);
 
+acpi_status acpi_update_gpes(void);
+
 /*
  * Resource interfaces
  */
-- 
cgit v1.2.3


From 543876c92837a8b208b5c99ec225c1f5a581900e Mon Sep 17 00:00:00 2001
From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Fri, 24 Sep 2010 21:27:41 -0700
Subject: stmmac: review the wake-up support

If the PM support is available this is passed
through the platform instead to be hard-coded
in the core files.
WoL on Magic Frame can be enabled by using
the ethtool support.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/stmmac/common.h         |  1 -
 drivers/net/stmmac/dwmac1000_core.c |  1 -
 drivers/net/stmmac/dwmac100_core.c  |  1 -
 drivers/net/stmmac/stmmac_ethtool.c | 14 +++++++++-----
 drivers/net/stmmac/stmmac_main.c    | 26 ++++++++++++++------------
 include/linux/stmmac.h              |  1 +
 6 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h
index 673ef86a063f..dec7ce40c27a 100644
--- a/drivers/net/stmmac/common.h
+++ b/drivers/net/stmmac/common.h
@@ -238,7 +238,6 @@ struct mac_device_info {
 	struct stmmac_ops	*mac;
 	struct stmmac_desc_ops	*desc;
 	struct stmmac_dma_ops	*dma;
-	unsigned int pmt;	/* support Power-Down */
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 };
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index c18c85993179..65667b692024 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -239,7 +239,6 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr)
 	mac->mac = &dwmac1000_ops;
 	mac->dma = &dwmac1000_dma_ops;
 
-	mac->pmt = PMT_SUPPORTED;
 	mac->link.port = GMAC_CONTROL_PS;
 	mac->link.duplex = GMAC_CONTROL_DM;
 	mac->link.speed = GMAC_CONTROL_FES;
diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c
index 58a914b27003..94eeccf3a8a0 100644
--- a/drivers/net/stmmac/dwmac100_core.c
+++ b/drivers/net/stmmac/dwmac100_core.c
@@ -193,7 +193,6 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr)
 	mac->mac = &dwmac100_ops;
 	mac->dma = &dwmac100_dma_ops;
 
-	mac->pmt = PMT_NOT_SUPPORTED;
 	mac->link.port = MAC_CONTROL_PS;
 	mac->link.duplex = MAC_CONTROL_F;
 	mac->link.speed = 0;
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index b32c16ae55c6..25a7e385f8ec 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -322,7 +322,7 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	struct stmmac_priv *priv = netdev_priv(dev);
 
 	spin_lock_irq(&priv->lock);
-	if (priv->wolenabled == PMT_SUPPORTED) {
+	if (device_can_wakeup(priv->device)) {
 		wol->supported = WAKE_MAGIC;
 		wol->wolopts = priv->wolopts;
 	}
@@ -334,16 +334,20 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 support = WAKE_MAGIC;
 
-	if (priv->wolenabled == PMT_NOT_SUPPORTED)
+	if (!device_can_wakeup(priv->device))
 		return -EINVAL;
 
 	if (wol->wolopts & ~support)
 		return -EINVAL;
 
-	if (wol->wolopts == 0)
-		device_set_wakeup_enable(priv->device, 0);
-	else
+	if (wol->wolopts) {
+		pr_info("stmmac: wakeup enable\n");
 		device_set_wakeup_enable(priv->device, 1);
+		enable_irq_wake(dev->irq);
+	} else {
+		device_set_wakeup_enable(priv->device, 0);
+		disable_irq_wake(dev->irq);
+	}
 
 	spin_lock_irq(&priv->lock);
 	priv->wolopts = wol->wolopts;
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index a908f7201aae..823b9e6431d5 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1568,9 +1568,8 @@ static int stmmac_mac_device_setup(struct net_device *dev)
 
 	priv->hw = device;
 
-	priv->wolenabled = priv->hw->pmt;	/* PMT supported */
-	if (priv->wolenabled == PMT_SUPPORTED)
-		priv->wolopts = WAKE_MAGIC;		/* Magic Frame */
+	if (device_can_wakeup(priv->device))
+		priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */
 
 	return 0;
 }
@@ -1709,6 +1708,12 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 	priv->enh_desc = plat_dat->enh_desc;
 	priv->ioaddr = addr;
 
+	/* PMT module is not integrated in all the MAC devices. */
+	if (plat_dat->pmt) {
+		pr_info("\tPMT module supported\n");
+		device_set_wakeup_capable(&pdev->dev, 1);
+	}
+
 	platform_set_drvdata(pdev, ndev);
 
 	/* Set the I/O base addr */
@@ -1836,13 +1841,11 @@ static int stmmac_suspend(struct platform_device *pdev, pm_message_t state)
 
 		stmmac_mac_disable_tx(priv->ioaddr);
 
-		if (device_may_wakeup(&(pdev->dev))) {
-			/* Enable Power down mode by programming the PMT regs */
-			if (priv->wolenabled == PMT_SUPPORTED)
-				priv->hw->mac->pmt(priv->ioaddr, priv->wolopts);
-		} else {
+		/* Enable Power down mode by programming the PMT regs */
+		if (device_can_wakeup(priv->device))
+			priv->hw->mac->pmt(priv->ioaddr, priv->wolopts);
+		else
 			stmmac_mac_disable_rx(priv->ioaddr);
-		}
 	} else {
 		priv->shutdown = 1;
 		/* Although this can appear slightly redundant it actually
@@ -1877,9 +1880,8 @@ static int stmmac_resume(struct platform_device *pdev)
 	 * is received. Anyway, it's better to manually clear
 	 * this bit because it can generate problems while resuming
 	 * from another devices (e.g. serial console). */
-	if (device_may_wakeup(&(pdev->dev)))
-		if (priv->wolenabled == PMT_SUPPORTED)
-			priv->hw->mac->pmt(priv->ioaddr, 0);
+	if (device_can_wakeup(priv->device))
+		priv->hw->mac->pmt(priv->ioaddr, 0);
 
 	netif_device_attach(dev);
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 1d8baf719211..d66c61774d95 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -37,6 +37,7 @@ struct plat_stmmacenet_data {
 	int enh_desc;
 	int tx_coe;
 	int bugged_jumbo;
+	int pmt;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
 #ifdef CONFIG_STM_DRIVERS
-- 
cgit v1.2.3


From e4ecda1b60bfd2333c12bbe71b153d3b6bdc831a Mon Sep 17 00:00:00 2001
From: Mark Lord <mlord@pobox.com>
Date: Sat, 25 Sep 2010 11:17:22 +0200
Subject: Fix compile error in blk-exec.c for !CONFIG_DETECT_HUNG_TASK

Ensure that 'sysctl_hung_task_timeout_secs' is defined
even when CONFIG_DETECT_HUNG_TASK is not set.
This way we can safely reference it without need for
ifdefs in the code elsewhere.  eg. in block/blk-exec.c

Signed-off-by: Mark Lord <mlord@pobox.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..dbafa9e34a2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -336,6 +336,9 @@ extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 void __user *buffer,
 					 size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
 #endif
 
 /* Attach to any functions which should be ignored in wchan output. */
-- 
cgit v1.2.3


From f459ffbdfd04edb4a8ce6eea33170eb057a5e695 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 25 Sep 2010 17:45:50 +1000
Subject: drm/radeon: fix PCI ID 5657 to be an RV410

fixes https://bugzilla.kernel.org/show_bug.cgi?id=19012

cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 3a9940ef728b..883c1d439899 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -85,7 +85,6 @@
 	{0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
-	{0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \
@@ -103,6 +102,7 @@
 	{0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \
 	{0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \
-- 
cgit v1.2.3


From cb4dfe562cac6fcb544df752e40c1d78000d0712 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 23 Sep 2010 05:06:54 +0000
Subject: net: skb_frag_t can be smaller on small arches

On 32bit arches, if PAGE_SIZE is smaller than 65536, we can use 16bit
offset and size fields. This patch saves 72 bytes per skb on i386, or
128 bytes after rounding.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b2c41d19735c..0b53c43ac92e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -129,8 +129,13 @@ typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
 	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 	__u32 page_offset;
 	__u32 size;
+#else
+	__u16 page_offset;
+	__u16 size;
+#endif
 };
 
 #define HAVE_HW_TIME_STAMP
-- 
cgit v1.2.3


From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 23 Sep 2010 11:19:54 +0000
Subject: net: reset skb queue mapping when rx'ing over tunnel

Reset queue mapping when an skb is reentering the stack via a tunnel.
On second pass, the queue mapping from the original device is no
longer valid.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 81d1413a8701..02386505033d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
 	skb->rxhash = 0;
+	skb_set_queue_mapping(skb, 0);
 	skb_dst_drop(skb);
 	nf_reset(skb);
 }
-- 
cgit v1.2.3


From 7a91b434e2bad554b709265db7603b1aa52dd92e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 26 Sep 2010 18:53:07 -0700
Subject: net: update SOCK_MIN_RCVBUF

SOCK_MIN_RCVBUF current value is 256 bytes

It doesnt permit to receive the smallest possible frame, considering
socket sk_rmem_alloc/sk_rcvbuf account skb truesizes. On 64bit arches,
sizeof(struct sk_buff) is 240 bytes. Add the typical 64 bytes of
headroom, and we go over the limit.

With old kernels and 32bit arches, we were under the limit, if netdriver
was doing copybreak.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 8ae97c4970df..73a4f9702a65 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1558,7 +1558,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band)
 }
 
 #define SOCK_MIN_SNDBUF 2048
-#define SOCK_MIN_RCVBUF 256
+/*
+ * Since sk_rmem_alloc sums skb->truesize, even a small frame might need
+ * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak
+ */
+#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff))
 
 static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 {
-- 
cgit v1.2.3


From a7855c78a24d6348e989bec616318e68c662e78b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 23 Sep 2010 23:51:51 +0000
Subject: net: loopback driver cleanup

loopback driver uses dev->ml_priv to store its percpu stats pointer.
It uses ugly casts "(void __percpu __force *)" to shut up sparse
complains.

Define an union to better document we use ml_priv in loopback driver and
define a lstats field with appropriate types.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 20 +++++---------------
 include/linux/netdevice.h |  6 ++++--
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 9a0996795321..4b0e30b564e5 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -74,7 +74,6 @@ struct pcpu_lstats {
 static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
-	struct pcpu_lstats __percpu *pcpu_lstats;
 	struct pcpu_lstats *lb_stats;
 	int len;
 
@@ -83,8 +82,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 	skb->protocol = eth_type_trans(skb, dev);
 
 	/* it's OK to use per_cpu_ptr() because BHs are off */
-	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
-	lb_stats = this_cpu_ptr(pcpu_lstats);
+	lb_stats = this_cpu_ptr(dev->lstats);
 
 	len = skb->len;
 	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
@@ -101,19 +99,17 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
 						      struct rtnl_link_stats64 *stats)
 {
-	const struct pcpu_lstats __percpu *pcpu_lstats;
 	u64 bytes = 0;
 	u64 packets = 0;
 	u64 drops = 0;
 	int i;
 
-	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
 		u64 tbytes, tpackets;
 		unsigned int start;
 
-		lb_stats = per_cpu_ptr(pcpu_lstats, i);
+		lb_stats = per_cpu_ptr(dev->lstats, i);
 		do {
 			start = u64_stats_fetch_begin(&lb_stats->syncp);
 			tbytes = lb_stats->bytes;
@@ -147,22 +143,16 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 
 static int loopback_dev_init(struct net_device *dev)
 {
-	struct pcpu_lstats __percpu *lstats;
-
-	lstats = alloc_percpu(struct pcpu_lstats);
-	if (!lstats)
+	dev->lstats = alloc_percpu(struct pcpu_lstats);
+	if (!dev->lstats)
 		return -ENOMEM;
 
-	dev->ml_priv = (void __force *)lstats;
 	return 0;
 }
 
 static void loopback_dev_free(struct net_device *dev)
 {
-	struct pcpu_lstats __percpu *lstats =
-		(void __percpu __force *)dev->ml_priv;
-
-	free_percpu(lstats);
+	free_percpu(dev->lstats);
 	free_netdev(dev);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45dcda5bfda9..01bd4c82d982 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1050,8 +1050,10 @@ struct net_device {
 #endif
 
 	/* mid-layer private */
-	void			*ml_priv;
-
+	union {
+		void				*ml_priv;
+		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
+	};
 	/* GARP */
 	struct garp_port	*garp_port;
 
-- 
cgit v1.2.3


From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 24 Sep 2010 09:55:52 +0000
Subject: ipv6: add a missing unregister_pernet_subsys call

Clean up a missing exit path in the ipv6 module init routines.  In
addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys
for the ipv6_addr_label_ops structure.  But if module loading fails, or if the
ipv6 module is removed, there is no corresponding unregister_pernet_subsys call,
which leaves a now-bogus address on the pernet_list, leading to oopses in
subsequent registrations.  This patch cleans up both the failed load path and
the unload path.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/net/addrconf.h |    1 +
 net/ipv6/addrconf.c    |   11 ++++++++---
 net/ipv6/addrlabel.c   |    5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  1 +
 net/ipv6/addrconf.c    | 11 ++++++++---
 net/ipv6/addrlabel.c   |  5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 45375b41a2a0..4d40c4d0230b 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
 extern int			ipv6_addr_label_init(void);
+extern void			ipv6_addr_label_cleanup(void);
 extern void			ipv6_addr_label_rtnl_register(void);
 extern u32			ipv6_addr_label(struct net *net,
 						const struct in6_addr *addr,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab70a3fbcafa..324fac3b6c16 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4637,10 +4637,12 @@ int __init addrconf_init(void)
 	if (err < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf:"
 		       " cannot initialize default policy table: %d.\n", err);
-		return err;
+		goto out;
 	}
 
-	register_pernet_subsys(&addrconf_ops);
+	err = register_pernet_subsys(&addrconf_ops);
+	if (err < 0)
+		goto out_addrlabel;
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -4692,7 +4694,9 @@ errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
 	unregister_pernet_subsys(&addrconf_ops);
-
+out_addrlabel:
+	ipv6_addr_label_cleanup();
+out:
 	return err;
 }
 
@@ -4703,6 +4707,7 @@ void addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
+	ipv6_addr_label_cleanup();
 
 	rtnl_lock();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f0e774cea386..8175f802651b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void)
 	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
+void ipv6_addr_label_cleanup(void)
+{
+	unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
 static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
-- 
cgit v1.2.3


From e3bfca01c1ad378deaee598292bcc7ee19024563 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 13:58:42 +0400
Subject: sunrpc: Make xprt auth cache release work with the xprt

This is done in order to facilitate getting the ip_map_cache from
which to put the ip_map.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h | 3 ++-
 net/sunrpc/svc_xprt.c          | 5 ++---
 net/sunrpc/svcauth_unix.c      | 9 ++++++---
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 11266935e2d6..18bce95255a4 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -116,6 +116,7 @@ struct auth_ops {
 #define	SVC_PENDING	9
 #define	SVC_COMPLETE	10
 
+struct svc_xprt;
 
 extern int	svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
@@ -131,7 +132,7 @@ extern struct auth_domain *auth_domain_find(char *name);
 extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
 extern void svcauth_unix_purge(void);
-extern void svcauth_unix_info_release(void *);
+extern void svcauth_unix_info_release(struct svc_xprt *xpt);
 extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
 
 static inline unsigned long hash_str(char *name, int bits)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 95fc3e8c51d6..385d822419ca 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -128,9 +128,8 @@ static void svc_xprt_free(struct kref *kref)
 	struct svc_xprt *xprt =
 		container_of(kref, struct svc_xprt, xpt_ref);
 	struct module *owner = xprt->xpt_class->xcl_owner;
-	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) &&
-	    xprt->xpt_auth_cache != NULL)
-		svcauth_unix_info_release(xprt->xpt_auth_cache);
+	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
+		svcauth_unix_info_release(xprt);
 	xprt->xpt_ops->xpo_free(xprt);
 	module_put(owner);
 }
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 31b99c599e7e..49e39ff22910 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -472,10 +472,13 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
 }
 
 void
-svcauth_unix_info_release(void *info)
+svcauth_unix_info_release(struct svc_xprt *xpt)
 {
-	struct ip_map *ipm = info;
-	cache_put(&ipm->h, &ip_map_cache);
+	struct ip_map *ipm;
+
+	ipm = xpt->xpt_auth_cache;
+	if (ipm != NULL)
+		cache_put(&ipm->h, &ip_map_cache);
 }
 
 /****************************************************************************
-- 
cgit v1.2.3


From 352114f395bd79353faf0bc1506ead94de393f55 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 13:59:48 +0400
Subject: sunrpc: Add net to pure API calls

There are two calls that operate on ip_map_cache and are
directly called from the nfsd code. Other places will be
handled in a different way.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c               |  2 +-
 fs/nfsd/nfsctl.c               |  4 ++--
 include/linux/sunrpc/svcauth.h |  4 ++--
 net/sunrpc/svcauth_unix.c      | 18 ++++++++++--------
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 067e2e612e2d..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1593,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
 	/* Insert client into hashtable. */
 	for (i = 0; i < ncp->cl_naddr; i++) {
 		ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
-		auth_unix_add_addr(&addr6, dom);
+		auth_unix_add_addr(&init_net, &addr6, dom);
 	}
 	auth_unix_forget_old(dom);
 	auth_domain_put(dom);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7b2fa1d25af7..b6e192d25633 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -416,7 +416,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
 
 	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
 
-	clp = auth_unix_lookup(&in6);
+	clp = auth_unix_lookup(&init_net, &in6);
 	if (!clp)
 		err = -EPERM;
 	else {
@@ -479,7 +479,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 
 	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
 
-	clp = auth_unix_lookup(&in6);
+	clp = auth_unix_lookup(&init_net, &in6);
 	if (!clp)
 		err = -EPERM;
 	else {
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 18bce95255a4..25d333c1b571 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -126,10 +126,10 @@ extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom);
+extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
 extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
 extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr);
+extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
 extern void svcauth_unix_purge(void);
 extern void svcauth_unix_info_release(struct svc_xprt *xpt);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f4751805ecfe..2a76c7cf603e 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -327,7 +327,8 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
 		return NULL;
 }
 
-static inline struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
+static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
+		struct in6_addr *addr)
 {
 	return __ip_map_lookup(&ip_map_cache, class, addr);
 }
@@ -360,12 +361,13 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
 	return 0;
 }
 
-static inline int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry)
+static inline int ip_map_update(struct net *net, struct ip_map *ipm,
+		struct unix_domain *udom, time_t expiry)
 {
 	return __ip_map_update(&ip_map_cache, ipm, udom, expiry);
 }
 
-int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
+int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
 {
 	struct unix_domain *udom;
 	struct ip_map *ipmp;
@@ -373,10 +375,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
 	if (dom->flavour != &svcauth_unix)
 		return -EINVAL;
 	udom = container_of(dom, struct unix_domain, h);
-	ipmp = ip_map_lookup("nfsd", addr);
+	ipmp = ip_map_lookup(net, "nfsd", addr);
 
 	if (ipmp)
-		return ip_map_update(ipmp, udom, NEVER);
+		return ip_map_update(net, ipmp, udom, NEVER);
 	else
 		return -ENOMEM;
 }
@@ -394,12 +396,12 @@ int auth_unix_forget_old(struct auth_domain *dom)
 }
 EXPORT_SYMBOL_GPL(auth_unix_forget_old);
 
-struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
+struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
 {
 	struct ip_map *ipm;
 	struct auth_domain *rv;
 
-	ipm = ip_map_lookup("nfsd", addr);
+	ipm = ip_map_lookup(net, "nfsd", addr);
 
 	if (!ipm)
 		return NULL;
@@ -725,7 +727,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 
 	ipm = ip_map_cached_get(xprt);
 	if (ipm == NULL)
-		ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
+		ipm = ip_map_lookup(&init_net, rqstp->rq_server->sv_program->pg_class,
 				    &sin6->sin6_addr);
 
 	if (ipm == NULL)
-- 
cgit v1.2.3


From 593ce16b943ea37d4ec62c377b32d7f3f4085e84 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 14:00:15 +0400
Subject: sunrpc: Add routines that allow registering per-net caches

Existing calls do the same, but for the init_net.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 ++
 net/sunrpc/cache.c           | 27 +++++++++++++++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03496357f455..6950c981882d 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -197,7 +197,9 @@ extern void cache_purge(struct cache_detail *detail);
 #define NEVER (0x7FFFFFFF)
 extern void __init cache_initialize(void);
 extern int cache_register(struct cache_detail *cd);
+extern int cache_register_net(struct cache_detail *cd, struct net *net);
 extern void cache_unregister(struct cache_detail *cd);
+extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
 
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
 					mode_t, struct cache_detail *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ff733dfef3b8..e84e7ddeecd4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,6 +34,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <net/net_namespace.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1537,7 +1538,7 @@ static const struct file_operations cache_flush_operations_procfs = {
 	.release	= release_flush_procfs,
 };
 
-static void remove_cache_proc_entries(struct cache_detail *cd)
+static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	if (cd->u.procfs.proc_ent == NULL)
 		return;
@@ -1552,7 +1553,7 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
 }
 
 #ifdef CONFIG_PROC_FS
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	struct proc_dir_entry *p;
 
@@ -1587,11 +1588,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
 	}
 	return 0;
 out_nomem:
-	remove_cache_proc_entries(cd);
+	remove_cache_proc_entries(cd, net);
 	return -ENOMEM;
 }
 #else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	return 0;
 }
@@ -1602,23 +1603,33 @@ void __init cache_initialize(void)
 	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
 }
 
-int cache_register(struct cache_detail *cd)
+int cache_register_net(struct cache_detail *cd, struct net *net)
 {
 	int ret;
 
 	sunrpc_init_cache_detail(cd);
-	ret = create_cache_proc_entries(cd);
+	ret = create_cache_proc_entries(cd, net);
 	if (ret)
 		sunrpc_destroy_cache_detail(cd);
 	return ret;
 }
+
+int cache_register(struct cache_detail *cd)
+{
+	return cache_register_net(cd, &init_net);
+}
 EXPORT_SYMBOL_GPL(cache_register);
 
-void cache_unregister(struct cache_detail *cd)
+void cache_unregister_net(struct cache_detail *cd, struct net *net)
 {
-	remove_cache_proc_entries(cd);
+	remove_cache_proc_entries(cd, net);
 	sunrpc_destroy_cache_detail(cd);
 }
+
+void cache_unregister(struct cache_detail *cd)
+{
+	cache_unregister_net(cd, &init_net);
+}
 EXPORT_SYMBOL_GPL(cache_unregister);
 
 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
-- 
cgit v1.2.3


From 4fb8518bdac8e85f6580ea3f586adf396cd472bc Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 14:00:49 +0400
Subject: sunrpc: Tag svc_xprt with net

The transport representation should be per-net of course.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 2 ++
 net/sunrpc/svc_xprt.c           | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5f4e18b3ce73..e50e3eca1c7c 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -66,6 +66,8 @@ struct svc_xprt {
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
+
+	struct net		*xpt_net;
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 385d822419ca..f7e8915051b1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -130,6 +130,7 @@ static void svc_xprt_free(struct kref *kref)
 	struct module *owner = xprt->xpt_class->xcl_owner;
 	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
 		svcauth_unix_info_release(xprt);
+	put_net(xprt->xpt_net);
 	xprt->xpt_ops->xpo_free(xprt);
 	module_put(owner);
 }
@@ -159,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
 	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
+	xprt->xpt_net = get_net(&init_net);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
-- 
cgit v1.2.3


From 4f42d0d53ca4737f82937edb0efc83564c124853 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 14:01:58 +0400
Subject: sunrpc: Make the /proc/net/rpc appear in net namespaces

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/stats.h | 23 +++++++++++++++--------
 net/sunrpc/cache.c           | 11 ++++++++---
 net/sunrpc/netns.h           |  1 +
 net/sunrpc/stats.c           | 43 +++++++++++++++++++++++++------------------
 net/sunrpc/sunrpc_syms.c     | 16 ++++++++++------
 5 files changed, 59 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index 5fa0f2084307..680471d1f28a 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -38,8 +38,21 @@ struct svc_stat {
 				rpcbadclnt;
 };
 
-void			rpc_proc_init(void);
-void			rpc_proc_exit(void);
+struct net;
+#ifdef CONFIG_PROC_FS
+int			rpc_proc_init(struct net *);
+void			rpc_proc_exit(struct net *);
+#else
+static inline int rpc_proc_init(struct net *net)
+{
+	return 0;
+}
+
+static inline void rpc_proc_exit(struct net *net)
+{
+}
+#endif
+
 #ifdef MODULE
 void			rpc_modcount(struct inode *, int);
 #endif
@@ -54,9 +67,6 @@ void			svc_proc_unregister(const char *);
 
 void			svc_seq_show(struct seq_file *,
 				     const struct svc_stat *);
-
-extern struct proc_dir_entry	*proc_net_rpc;
-
 #else
 
 static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; }
@@ -69,9 +79,6 @@ static inline void svc_proc_unregister(const char *p) {}
 
 static inline void svc_seq_show(struct seq_file *seq,
 				const struct svc_stat *st) {}
-
-#define proc_net_rpc NULL
-
 #endif
 
 #endif /* _LINUX_SUNRPC_STATS_H */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e84e7ddeecd4..e20968aac68a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,7 +34,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-#include <net/net_namespace.h>
+#include "netns.h"
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1540,6 +1540,8 @@ static const struct file_operations cache_flush_operations_procfs = {
 
 static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
+	struct sunrpc_net *sn;
+
 	if (cd->u.procfs.proc_ent == NULL)
 		return;
 	if (cd->u.procfs.flush_ent)
@@ -1549,15 +1551,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	if (cd->u.procfs.content_ent)
 		remove_proc_entry("content", cd->u.procfs.proc_ent);
 	cd->u.procfs.proc_ent = NULL;
-	remove_proc_entry(cd->name, proc_net_rpc);
+	sn = net_generic(net, sunrpc_net_id);
+	remove_proc_entry(cd->name, sn->proc_net_rpc);
 }
 
 #ifdef CONFIG_PROC_FS
 static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	struct proc_dir_entry *p;
+	struct sunrpc_net *sn;
 
-	cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+	sn = net_generic(net, sunrpc_net_id);
+	cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
 	if (cd->u.procfs.proc_ent == NULL)
 		goto out_nomem;
 	cd->u.procfs.channel_ent = NULL;
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index b2d18af2815e..e52ce897dde5 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -5,6 +5,7 @@
 #include <net/netns/generic.h>
 
 struct sunrpc_net {
+	struct proc_dir_entry *proc_net_rpc;
 };
 
 extern int sunrpc_net_id;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..f71a73107ae9 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/metrics.h>
-#include <net/net_namespace.h>
 
-#define RPCDBG_FACILITY	RPCDBG_MISC
+#include "netns.h"
 
-struct proc_dir_entry	*proc_net_rpc = NULL;
+#define RPCDBG_FACILITY	RPCDBG_MISC
 
 /*
  * Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
 static inline struct proc_dir_entry *
 do_register(const char *name, void *data, const struct file_operations *fops)
 {
-	rpc_proc_init();
-	dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
+	struct sunrpc_net *sn;
 
-	return proc_create_data(name, 0, proc_net_rpc, fops, data);
+	dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
+	sn = net_generic(&init_net, sunrpc_net_id);
+	return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
 }
 
 struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
 void
 rpc_proc_unregister(const char *name)
 {
-	remove_proc_entry(name, proc_net_rpc);
+	struct sunrpc_net *sn;
+
+	sn = net_generic(&init_net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(rpc_proc_unregister);
 
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
 void
 svc_proc_unregister(const char *name)
 {
-	remove_proc_entry(name, proc_net_rpc);
+	struct sunrpc_net *sn;
+
+	sn = net_generic(&init_net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(svc_proc_unregister);
 
-void
-rpc_proc_init(void)
+int rpc_proc_init(struct net *net)
 {
+	struct sunrpc_net *sn;
+
 	dprintk("RPC:       registering /proc/net/rpc\n");
-	if (!proc_net_rpc)
-		proc_net_rpc = proc_mkdir("rpc", init_net.proc_net);
+	sn = net_generic(net, sunrpc_net_id);
+	sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
+	if (sn->proc_net_rpc == NULL)
+		return -ENOMEM;
+
+	return 0;
 }
 
-void
-rpc_proc_exit(void)
+void rpc_proc_exit(struct net *net)
 {
 	dprintk("RPC:       unregistering /proc/net/rpc\n");
-	if (proc_net_rpc) {
-		proc_net_rpc = NULL;
-		remove_proc_entry("rpc", init_net.proc_net);
-	}
+	remove_proc_entry("rpc", net->proc_net);
 }
 
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index faa23229bd25..c076af8535db 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -28,11 +28,21 @@ int sunrpc_net_id;
 
 static __net_init int sunrpc_init_net(struct net *net)
 {
+	int err;
+
+	err = rpc_proc_init(net);
+	if (err)
+		goto err_proc;
+
 	return 0;
+
+err_proc:
+	return err;
 }
 
 static __net_exit void sunrpc_exit_net(struct net *net)
 {
+	rpc_proc_exit(net);
 }
 
 static struct pernet_operations sunrpc_net_ops = {
@@ -66,9 +76,6 @@ init_sunrpc(void)
 		goto out4;
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
-#endif
-#ifdef CONFIG_PROC_FS
-	rpc_proc_init();
 #endif
 	cache_register(&ip_map_cache);
 	cache_register(&unix_gid_cache);
@@ -100,9 +107,6 @@ cleanup_sunrpc(void)
 	unregister_pernet_subsys(&sunrpc_net_ops);
 #ifdef RPC_DEBUG
 	rpc_unregister_sysctl();
-#endif
-#ifdef CONFIG_PROC_FS
-	rpc_proc_exit();
 #endif
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
-- 
cgit v1.2.3


From 686b9cb994f5f74be790df4cd12873dfdc8a6984 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 23 Sep 2010 09:44:36 -0700
Subject: mac80211/ath9k: Support AMPDU with multiple VIFs.

The old ieee80211_find_sta_by_hw method didn't properly
find VIFS when there was more than one per AP.  This caused
AMPDU logic in ath9k to get the wrong VIF when trying to
account for transmitted SKBs.

This patch changes ieee80211_find_sta_by_hw to take a
localaddr argument to distinguish between VIFs with the
same AP but different local addresses.  The method name
is changed to ieee80211_find_sta_by_ifaddr.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/recv.c |  6 +++++-
 drivers/net/wireless/ath/ath9k/xmit.c |  3 +--
 include/net/mac80211.h                | 25 ++++++++++++++-----------
 net/mac80211/sta_info.c               | 15 +++++++++++----
 4 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 00140489becb..9c166f3804ab 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -977,7 +977,11 @@ static void ath9k_process_rssi(struct ath_common *common,
 	 * at least one sdata of a wiphy on mac80211 but with ath9k virtual
 	 * wiphy you'd have to iterate over every wiphy and each sdata.
 	 */
-	sta = ieee80211_find_sta_by_hw(hw, hdr->addr2);
+	if (is_multicast_ether_addr(hdr->addr1))
+		sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr2, NULL);
+	else
+		sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr2, hdr->addr1);
+
 	if (sta) {
 		an = (struct ath_node *) sta->drv_priv;
 		if (rx_stats->rs_rssi != ATH9K_RSSI_BAD &&
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 85a7323a04ef..f7da6b20a925 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -328,8 +328,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
 
 	rcu_read_lock();
 
-	/* XXX: use ieee80211_find_sta! */
-	sta = ieee80211_find_sta_by_hw(hw, hdr->addr1);
+	sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
 	if (!sta) {
 		rcu_read_unlock();
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5d1187d7c5e5..73469d8b64bb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2432,25 +2432,28 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
 					 const u8 *addr);
 
 /**
- * ieee80211_find_sta_by_hw - find a station on hardware
+ * ieee80211_find_sta_by_ifaddr - find a station on hardware
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @addr: station's address
+ * @addr: remote station's address
+ * @localaddr: local address (vif->sdata->vif.addr). Use NULL for 'any'.
  *
  * This function must be called under RCU lock and the
  * resulting pointer is only valid under RCU lock as well.
  *
- * NOTE: This function should not be used! When mac80211 is converted
- *	 internally to properly keep track of stations on multiple
- *	 virtual interfaces, it will not always know which station to
- *	 return here since a single address might be used by multiple
- *	 logical stations (e.g. consider a station connecting to another
- *	 BSSID on the same AP hardware without disconnecting first).
+ * NOTE: You may pass NULL for localaddr, but then you will just get
+ *      the first STA that matches the remote address 'addr'.
+ *      We can have multiple STA associated with multiple
+ *      logical stations (e.g. consider a station connecting to another
+ *      BSSID on the same AP hardware without disconnecting first).
+ *      In this case, the result of this method with localaddr NULL
+ *      is not reliable.
  *
- * DO NOT USE THIS FUNCTION.
+ * DO NOT USE THIS FUNCTION with localaddr NULL if at all possible.
  */
-struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
-					       const u8 *addr);
+struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
+					       const u8 *addr,
+					       const u8 *localaddr);
 
 /**
  * ieee80211_sta_block_awake - block station from waking up
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 44e10a9de0a7..ca2cba9cea87 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -838,13 +838,20 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&local->sta_mtx);
 }
 
-struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
-					       const u8 *addr)
+struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
+					       const u8 *addr,
+					       const u8 *localaddr)
 {
 	struct sta_info *sta, *nxt;
 
-	/* Just return a random station ... first in list ... */
+	/*
+	 * Just return a random station if localaddr is NULL
+	 * ... first in list.
+	 */
 	for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
+		if (localaddr &&
+		    compare_ether_addr(sta->sdata->vif.addr, localaddr) != 0)
+			continue;
 		if (!sta->uploaded)
 			return NULL;
 		return &sta->sta;
@@ -852,7 +859,7 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
+EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr);
 
 struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
 					 const u8 *addr)
-- 
cgit v1.2.3


From 554891e63a29af35cc6bb403ef34e319518114d0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Sep 2010 12:38:25 +0200
Subject: mac80211: move packet flags into packet

commit 8c0c709eea5cbab97fb464cd68b06f24acc58ee1
Author: Johannes Berg <johannes@sipsolutions.net>
Date:   Wed Nov 25 17:46:15 2009 +0100

    mac80211: move cmntr flag out of rx flags

moved the CMNTR flag into the skb RX flags for
some aggregation cleanups, but this was wrong
since the optimisation this flag tried to make
requires that it is kept across the processing
of multiple interfaces -- which isn't true for
flags in the skb. The patch not only broke the
optimisation, it also introduced a bug: under
some (common!) circumstances the flag will be
set on an already freed skb!

However, investigating this in more detail, I
found that most of the flags that we set should
be per packet, _except_ for this one, due to
a-MPDU processing. Additionally, the flags used
for processing (currently just this one) need
to be reset before processing a new packet.

Since we haven't actually seen bugs reported as
a result of the wrong flags handling (which is
not too surprising -- the only real bug case I
can come up with is an a-MSDU contained in an
a-MPDU), I'll make a different fix for rc.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |   6 +--
 net/mac80211/ieee80211_i.h |  38 +++++++++++++----
 net/mac80211/rx.c          | 102 +++++++++++++++++++++++++--------------------
 net/mac80211/wpa.c         |   2 +-
 4 files changed, 91 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 73469d8b64bb..fe8b9dae4dee 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -581,9 +581,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index
  * @RX_FLAG_40MHZ: HT40 (40 MHz) was used
  * @RX_FLAG_SHORT_GI: Short guard interval was used
- * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported
- *	on cooked monitor to avoid double-reporting it for multiple
- *	virtual interfaces
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR	= 1<<0,
@@ -597,7 +594,6 @@ enum mac80211_rx_flags {
 	RX_FLAG_HT		= 1<<9,
 	RX_FLAG_40MHZ		= 1<<10,
 	RX_FLAG_SHORT_GI	= 1<<11,
-	RX_FLAG_INTERNAL_CMTR	= 1<<12,
 };
 
 /**
@@ -618,6 +614,7 @@ enum mac80211_rx_flags {
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *	HT rates are use (RX_FLAG_HT)
  * @flag: %RX_FLAG_*
+ * @rx_flags: internal RX flags for mac80211
  */
 struct ieee80211_rx_status {
 	u64 mactime;
@@ -627,6 +624,7 @@ struct ieee80211_rx_status {
 	int antenna;
 	int rate_idx;
 	int flag;
+	unsigned int rx_flags;
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 40f747273389..945fbf29719d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -159,13 +159,37 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
 #define RX_DROP_MONITOR		((__force ieee80211_rx_result) 2u)
 #define RX_QUEUED		((__force ieee80211_rx_result) 3u)
 
-#define IEEE80211_RX_IN_SCAN		BIT(0)
-/* frame is destined to interface currently processed (incl. multicast frames) */
-#define IEEE80211_RX_RA_MATCH		BIT(1)
-#define IEEE80211_RX_AMSDU		BIT(2)
-#define IEEE80211_RX_FRAGMENTED		BIT(3)
-#define IEEE80211_MALFORMED_ACTION_FRM	BIT(4)
-/* only add flags here that do not change with subframes of an aMPDU */
+/**
+ * enum ieee80211_packet_rx_flags - packet RX flags
+ * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
+ *	(incl. multicast frames)
+ * @IEEE80211_RX_IN_SCAN: received while scanning
+ * @IEEE80211_RX_FRAGMENTED: fragmented frame
+ * @IEEE80211_RX_AMSDU: a-MSDU packet
+ * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
+ *
+ * These are per-frame flags that are attached to a frame in the
+ * @rx_flags field of &struct ieee80211_rx_status.
+ */
+enum ieee80211_packet_rx_flags {
+	IEEE80211_RX_IN_SCAN			= BIT(0),
+	IEEE80211_RX_RA_MATCH			= BIT(1),
+	IEEE80211_RX_FRAGMENTED			= BIT(2),
+	IEEE80211_RX_AMSDU			= BIT(3),
+	IEEE80211_RX_MALFORMED_ACTION_FRM	= BIT(4),
+};
+
+/**
+ * enum ieee80211_rx_flags - RX data flags
+ *
+ * @IEEE80211_RX_CMNTR: received on cooked monitor already
+ *
+ * These flags are used across handling multiple interfaces
+ * for a single frame.
+ */
+enum ieee80211_rx_flags {
+	IEEE80211_RX_CMNTR		= BIT(0),
+};
 
 struct ieee80211_rx_data {
 	struct sk_buff *skb;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8c666e9e8fb0..0b0e83ebe3d5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -315,6 +315,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	int tid;
 
 	/* does the frame have a qos control field? */
@@ -323,9 +324,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
 		/* frame has qos control */
 		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 		if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
-			rx->flags |= IEEE80211_RX_AMSDU;
-		else
-			rx->flags &= ~IEEE80211_RX_AMSDU;
+			status->rx_flags |= IEEE80211_RX_AMSDU;
 	} else {
 		/*
 		 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"):
@@ -387,9 +386,10 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_local *local = rx->local;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	struct sk_buff *skb = rx->skb;
 
-	if (likely(!(rx->flags & IEEE80211_RX_IN_SCAN)))
+	if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
 		return RX_CONTINUE;
 
 	if (test_bit(SCAN_HW_SCANNING, &local->scanning))
@@ -783,13 +783,14 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	/* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
 	if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
 		if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
 			     rx->sta->last_seq_ctrl[rx->queue] ==
 			     hdr->seq_ctrl)) {
-			if (rx->flags & IEEE80211_RX_RA_MATCH) {
+			if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
 				rx->local->dot11FrameDuplicateCount++;
 				rx->sta->num_duplicates++;
 			}
@@ -822,7 +823,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 		if ((!ieee80211_has_fromds(hdr->frame_control) &&
 		     !ieee80211_has_tods(hdr->frame_control) &&
 		     ieee80211_is_data(hdr->frame_control)) ||
-		    !(rx->flags & IEEE80211_RX_RA_MATCH)) {
+		    !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
 			/* Drop IBSS frames and frames for other hosts
 			 * silently. */
 			return RX_DROP_MONITOR;
@@ -879,7 +880,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	 * No point in finding a key and decrypting if the frame is neither
 	 * addressed to us nor a multicast frame.
 	 */
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
 	/* start without a key */
@@ -1112,7 +1113,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		sta->last_rx = jiffies;
 	}
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
 	if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
@@ -1269,6 +1270,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	unsigned int frag, seq;
 	struct ieee80211_fragment_entry *entry;
 	struct sk_buff *skb;
+	struct ieee80211_rx_status *status;
 
 	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	fc = hdr->frame_control;
@@ -1368,7 +1370,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	}
 
 	/* Complete frame has been reassembled - process it now */
-	rx->flags |= IEEE80211_RX_FRAGMENTED;
+	status = IEEE80211_SKB_RXCB(rx->skb);
+	status->rx_flags |= IEEE80211_RX_FRAGMENTED;
 
  out:
 	if (rx->sta)
@@ -1385,9 +1388,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	__le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	if (likely(!rx->sta || !ieee80211_is_pspoll(fc) ||
-		   !(rx->flags & IEEE80211_RX_RA_MATCH)))
+		   !(status->rx_flags & IEEE80211_RX_RA_MATCH)))
 		return RX_CONTINUE;
 
 	if ((sdata->vif.type != NL80211_IFTYPE_AP) &&
@@ -1548,6 +1552,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb, *xmit_skb;
 	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
 	struct sta_info *dsta;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	skb = rx->skb;
 	xmit_skb = NULL;
@@ -1555,7 +1560,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	if ((sdata->vif.type == NL80211_IFTYPE_AP ||
 	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
 	    !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
-	    (rx->flags & IEEE80211_RX_RA_MATCH) &&
+	    (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
 	    (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
 		if (is_multicast_ether_addr(ehdr->h_dest)) {
 			/*
@@ -1632,6 +1637,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc = hdr->frame_control;
 	struct sk_buff_head frame_list;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	if (unlikely(!ieee80211_is_data(fc)))
 		return RX_CONTINUE;
@@ -1639,7 +1645,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	if (unlikely(!ieee80211_is_data_present(fc)))
 		return RX_DROP_MONITOR;
 
-	if (!(rx->flags & IEEE80211_RX_AMSDU))
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
 		return RX_CONTINUE;
 
 	if (ieee80211_has_a4(hdr->frame_control) &&
@@ -1690,6 +1696,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb = rx->skb, *fwd_skb;
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1735,7 +1742,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	mesh_hdr->ttl--;
 
-	if (rx->flags & IEEE80211_RX_RA_MATCH) {
+	if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
 		if (!mesh_hdr->ttl)
 			IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
 						     dropped_frames_ttl);
@@ -1945,6 +1952,7 @@ static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	/*
 	 * From here on, look only at management frames.
@@ -1957,7 +1965,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
 	if (!ieee80211_is_mgmt(mgmt->frame_control))
 		return RX_DROP_MONITOR;
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_MONITOR;
 
 	if (ieee80211_drop_unencrypted_mgmt(rx))
@@ -1972,6 +1980,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	int len = rx->skb->len;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
@@ -1984,7 +1993,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
 		return RX_DROP_UNUSABLE;
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_UNUSABLE;
 
 	switch (mgmt->u.action.category) {
@@ -2080,7 +2089,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 
  invalid:
-	rx->flags |= IEEE80211_MALFORMED_ACTION_FRM;
+	status->rx_flags |= IEEE80211_RX_MALFORMED_ACTION_FRM;
 	/* will return in the next handlers */
 	return RX_CONTINUE;
 
@@ -2102,10 +2111,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 {
-	struct ieee80211_rx_status *status;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	/* skip known-bad action frames and return them in the next handler */
-	if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM)
+	if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM)
 		return RX_CONTINUE;
 
 	/*
@@ -2114,7 +2123,6 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 	 * so userspace can register for those to know whether ones
 	 * it transmitted were processed or returned.
 	 */
-	status = IEEE80211_SKB_RXCB(rx->skb);
 
 	if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
 			     rx->skb->data, rx->skb->len,
@@ -2136,6 +2144,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
 	struct sk_buff *nskb;
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return RX_CONTINUE;
@@ -2150,7 +2159,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
 	 * registration mechanisms, but older ones still use cooked
 	 * monitor interfaces so push all frames there.
 	 */
-	if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) &&
+	if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) &&
 	    (sdata->vif.type == NL80211_IFTYPE_AP ||
 	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
 		return RX_DROP_MONITOR;
@@ -2284,8 +2293,13 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 
-	if (status->flag & RX_FLAG_INTERNAL_CMTR)
+	/*
+	 * If cooked monitor has been processed already, then
+	 * don't do it again. If not, set the flag.
+	 */
+	if (rx->flags & IEEE80211_RX_CMNTR)
 		goto out_free_skb;
+	rx->flags |= IEEE80211_RX_CMNTR;
 
 	if (skb_headroom(skb) < sizeof(*rthdr) &&
 	    pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC))
@@ -2341,12 +2355,8 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 	if (prev_dev) {
 		skb->dev = prev_dev;
 		netif_receive_skb(skb);
-		skb = NULL;
-	} else
-		goto out_free_skb;
-
-	status->flag |= RX_FLAG_INTERNAL_CMTR;
-	return;
+		return;
+	}
 
  out_free_skb:
 	dev_kfree_skb(skb);
@@ -2407,6 +2417,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 		 * same TID from the same station
 		 */
 		rx->skb = skb;
+		rx->flags = 0;
 
 		CALL_RXH(ieee80211_rx_h_decrypt)
 		CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2477,7 +2488,12 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 {
 	struct sk_buff_head frames;
-	struct ieee80211_rx_data rx = { };
+	struct ieee80211_rx_data rx = {
+		.sta = sta,
+		.sdata = sta->sdata,
+		.local = sta->local,
+		.queue = tid,
+	};
 	struct tid_ampdu_rx *tid_agg_rx;
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -2486,13 +2502,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 
 	__skb_queue_head_init(&frames);
 
-	/* construct rx struct */
-	rx.sta = sta;
-	rx.sdata = sta->sdata;
-	rx.local = sta->local;
-	rx.queue = tid;
-	rx.flags |= IEEE80211_RX_RA_MATCH;
-
 	spin_lock(&tid_agg_rx->reorder_lock);
 	ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
 	spin_unlock(&tid_agg_rx->reorder_lock);
@@ -2519,7 +2528,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 		    compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
-			rx->flags &= ~IEEE80211_RX_RA_MATCH;
+			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
 	case NL80211_IFTYPE_ADHOC:
@@ -2529,15 +2538,15 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 			return 1;
 		}
 		else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
-			if (!(rx->flags & IEEE80211_RX_IN_SCAN))
+			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
 				return 0;
-			rx->flags &= ~IEEE80211_RX_RA_MATCH;
+			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		} else if (!multicast &&
 			   compare_ether_addr(sdata->vif.addr,
 					      hdr->addr1) != 0) {
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
-			rx->flags &= ~IEEE80211_RX_RA_MATCH;
+			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		} else if (!rx->sta) {
 			int rate_idx;
 			if (status->flag & RX_FLAG_HT)
@@ -2555,7 +2564,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
 
-			rx->flags &= ~IEEE80211_RX_RA_MATCH;
+			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
@@ -2566,9 +2575,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 				return 0;
 		} else if (!ieee80211_bssid_match(bssid,
 					sdata->vif.addr)) {
-			if (!(rx->flags & IEEE80211_RX_IN_SCAN))
+			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
 				return 0;
-			rx->flags &= ~IEEE80211_RX_RA_MATCH;
+			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
 		break;
 	case NL80211_IFTYPE_WDS:
@@ -2602,14 +2611,14 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 	int prepares;
 
 	rx->skb = skb;
-	rx->flags |= IEEE80211_RX_RA_MATCH;
+	status->rx_flags |= IEEE80211_RX_RA_MATCH;
 	prepares = prepare_for_handlers(rx, hdr);
 
 	if (!prepares)
 		return false;
 
 	if (status->flag & RX_FLAG_MMIC_ERROR) {
-		if (rx->flags & IEEE80211_RX_RA_MATCH)
+		if (status->rx_flags & IEEE80211_RX_RA_MATCH)
 			ieee80211_rx_michael_mic_report(hdr, rx);
 		return false;
 	}
@@ -2638,6 +2647,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					 struct sk_buff *skb)
 {
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr;
@@ -2657,7 +2667,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 
 	if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
 		     test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
-		rx.flags |= IEEE80211_RX_IN_SCAN;
+		status->rx_flags |= IEEE80211_RX_IN_SCAN;
 
 	if (ieee80211_is_mgmt(fc))
 		err = skb_linearize(skb);
@@ -2808,6 +2818,8 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		}
 	}
 
+	status->rx_flags = 0;
+
 	/*
 	 * key references and virtual interfaces are protected using RCU
 	 * and this requires that we are in a read-side RCU section during
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 43882b36da55..bee230d8fd11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -117,7 +117,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	key = &rx->key->conf.key[key_offset];
 	michael_mic(key, hdr, data, data_len, mic);
 	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
-		if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+		if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 			return RX_DROP_UNUSABLE;
 
 		mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
-- 
cgit v1.2.3


From fb0c5f0bc8b69b40549449ee7fc65f3706f12062 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Mon, 27 Sep 2010 03:31:00 +0000
Subject: tproxy: check for transparent flag in ip_route_newports

as done in ip_route_connect()

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index bd732d62e1c3..7e5e73bfa4de 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol,
 		fl.fl_ip_sport = sport;
 		fl.fl_ip_dport = dport;
 		fl.proto = protocol;
+		if (inet_sk(sk)->transparent)
+			fl.flags |= FLOWI_FLAG_ANYSRC;
 		ip_rt_put(*rp);
 		*rp = NULL;
 		security_sk_classify_flow(sk, &fl);
-- 
cgit v1.2.3


From 31dfbc93923c0aaa0440b809f80ff2830c6a531a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Sep 2010 21:28:30 +0100
Subject: drm: Prune GEM vma entries

Hook the GEM vm open/close ops into the generic drm vm open/close so
that the private vma entries are created and destroy appropriately.
Fixes the leak of the drm_vma_entries during the lifetime of the filp.

Reported-by: Matt Mackall <mpm@selenic.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c |  9 ++++++++-
 drivers/gpu/drm/drm_vm.c  | 28 ++++++++++++++++++----------
 include/drm/drmP.h        |  1 +
 3 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index bf92d07510df..6fe2cd298c12 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -528,6 +528,10 @@ void drm_gem_vm_open(struct vm_area_struct *vma)
 	struct drm_gem_object *obj = vma->vm_private_data;
 
 	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	drm_vm_open_locked(vma);
+	mutex_unlock(&obj->dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_open);
 
@@ -535,7 +539,10 @@ void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
 
-	drm_gem_object_unreference_unlocked(obj);
+	mutex_lock(&obj->dev->struct_mutex);
+	drm_vm_close_locked(vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index fda67468e603..5df450683aab 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -433,15 +433,7 @@ static void drm_vm_open(struct vm_area_struct *vma)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-/**
- * \c close method for all virtual memory types.
- *
- * \param vma virtual memory area.
- *
- * Search the \p vma private data entry in drm_device::vmalist, unlink it, and
- * free it.
- */
-static void drm_vm_close(struct vm_area_struct *vma)
+void drm_vm_close_locked(struct vm_area_struct *vma)
 {
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -451,7 +443,6 @@ static void drm_vm_close(struct vm_area_struct *vma)
 		  vma->vm_start, vma->vm_end - vma->vm_start);
 	atomic_dec(&dev->vma_count);
 
-	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
 		if (pt->vma == vma) {
 			list_del(&pt->head);
@@ -459,6 +450,23 @@ static void drm_vm_close(struct vm_area_struct *vma)
 			break;
 		}
 	}
+}
+
+/**
+ * \c close method for all virtual memory types.
+ *
+ * \param vma virtual memory area.
+ *
+ * Search the \p vma private data entry in drm_device::vmalist, unlink it, and
+ * free it.
+ */
+static void drm_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_file *priv = vma->vm_file->private_data;
+	struct drm_device *dev = priv->minor->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	drm_vm_close_locked(vma);
 	mutex_unlock(&dev->struct_mutex);
 }
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7809d230adee..774e1d49509b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1175,6 +1175,7 @@ extern int drm_release(struct inode *inode, struct file *filp);
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
 extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
 extern void drm_vm_open_locked(struct vm_area_struct *vma);
+extern void drm_vm_close_locked(struct vm_area_struct *vma);
 extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map);
 extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
-- 
cgit v1.2.3


From a3f5adaf491490089215f863a61b9422fae902f8 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Mon, 27 Sep 2010 17:51:10 -0700
Subject: IB/core: Add link layer property to ports

This patch allows ports to have different link layers:
IB_LINK_LAYER_INFINIBAND or IB_LINK_LAYER_ETHERNET.  This is required
for adding IBoE (InfiniBand-over-Ethernet, aka RoCE) support.  For
devices that do not provide an implementation for querying the link
layer property of a port, we return a default value based on the
transport: RMA_TRANSPORT_IB nodes will return IB_LINK_LAYER_INFINIBAND
and RDMA_TRANSPORT_IWARP nodes will return IB_LINK_LAYER_ETHERNET.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/verbs.c | 16 ++++++++++++++++
 include/rdma/ib_verbs.h         | 11 +++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e0fa22238715..af7a8b08b2e9 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -94,6 +94,22 @@ rdma_node_get_transport(enum rdma_node_type node_type)
 }
 EXPORT_SYMBOL(rdma_node_get_transport);
 
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+{
+	if (device->get_link_layer)
+		return device->get_link_layer(device, port_num);
+
+	switch (rdma_node_get_transport(device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		return IB_LINK_LAYER_INFINIBAND;
+	case RDMA_TRANSPORT_IWARP:
+		return IB_LINK_LAYER_ETHERNET;
+	default:
+		return IB_LINK_LAYER_UNSPECIFIED;
+	}
+}
+EXPORT_SYMBOL(rdma_port_get_link_layer);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 857b3b9cf120..e04c4888d1fd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -75,6 +75,12 @@ enum rdma_transport_type {
 enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
 
+enum rdma_link_layer {
+	IB_LINK_LAYER_UNSPECIFIED,
+	IB_LINK_LAYER_INFINIBAND,
+	IB_LINK_LAYER_ETHERNET,
+};
+
 enum ib_device_cap_flags {
 	IB_DEVICE_RESIZE_MAX_WR		= 1,
 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
@@ -1010,6 +1016,8 @@ struct ib_device {
 	int		           (*query_port)(struct ib_device *device,
 						 u8 port_num,
 						 struct ib_port_attr *port_attr);
+	enum rdma_link_layer	   (*get_link_layer)(struct ib_device *device,
+						     u8 port_num);
 	int		           (*query_gid)(struct ib_device *device,
 						u8 port_num, int index,
 						union ib_gid *gid);
@@ -1222,6 +1230,9 @@ int ib_query_device(struct ib_device *device,
 int ib_query_port(struct ib_device *device,
 		  u8 port_num, struct ib_port_attr *port_attr);
 
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+					       u8 port_num);
+
 int ib_query_gid(struct ib_device *device,
 		 u8 port_num, int index, union ib_gid *gid);
 
-- 
cgit v1.2.3


From 2fc11536cf5c0b8eb4eb7e01a2a672a189e9280f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@tandberg.com>
Date: Tue, 7 Sep 2010 06:10:45 -0300
Subject: V4L/DVB: videobuf-dma-sg: set correct size in last sg element

This fixes a nasty memory corruption bug when using userptr I/O.
The function videobuf_pages_to_sg() sets up the scatter-gather list for the
DMA transfer to the userspace pages. The first transfer is setup correctly
(the size is set to PAGE_SIZE - offset), but all other transfers have size
PAGE_SIZE. This is wrong for the last transfer which may be less than PAGE_SIZE.

Most, if not all, drivers will program the boards DMA engine correctly, i.e.
even though the size in the last sg element is wrong, they will do their
own size calculations and make sure the right amount is DMA-ed, and so seemingly
prevent memory corruption.

However, behind the scenes the dynamic DMA mapping support (in lib/swiotlb.c)
may create bounce buffers if the memory pages are not in DMA-able memory.
This happens for example on a 64-bit linux with a board that only supports
32-bit DMA.

These bounce buffers DO use the information in the sg list to determine the
size. So while the DMA engine transfers the correct amount of data, when the
data is 'bounced' back too much is copied, causing buffer overwrites.

The fix is simple: calculate and set the correct size for the last sg list
element.

Signed-off-by: Hans Verkuil <hans.verkuil@tandberg.com>
Cc: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/videobuf-dma-sg.c | 11 +++++++----
 include/media/videobuf-dma-sg.h       |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 06f9a9c2a39a..2ad0bc252b0e 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -94,7 +94,7 @@ err:
  * must free the memory.
  */
 static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
-						int nr_pages, int offset)
+					int nr_pages, int offset, size_t size)
 {
 	struct scatterlist *sglist;
 	int i;
@@ -110,12 +110,14 @@ static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
 		/* DMA to highmem pages might not work */
 		goto highmem;
 	sg_set_page(&sglist[0], pages[0], PAGE_SIZE - offset, offset);
+	size -= PAGE_SIZE - offset;
 	for (i = 1; i < nr_pages; i++) {
 		if (NULL == pages[i])
 			goto nopage;
 		if (PageHighMem(pages[i]))
 			goto highmem;
-		sg_set_page(&sglist[i], pages[i], PAGE_SIZE, 0);
+		sg_set_page(&sglist[i], pages[i], min(PAGE_SIZE, size), 0);
+		size -= min(PAGE_SIZE, size);
 	}
 	return sglist;
 
@@ -170,7 +172,8 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 
 	first = (data          & PAGE_MASK) >> PAGE_SHIFT;
 	last  = ((data+size-1) & PAGE_MASK) >> PAGE_SHIFT;
-	dma->offset   = data & ~PAGE_MASK;
+	dma->offset = data & ~PAGE_MASK;
+	dma->size = size;
 	dma->nr_pages = last-first+1;
 	dma->pages = kmalloc(dma->nr_pages * sizeof(struct page *), GFP_KERNEL);
 	if (NULL == dma->pages)
@@ -252,7 +255,7 @@ int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
 
 	if (dma->pages) {
 		dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages,
-						   dma->offset);
+						   dma->offset, dma->size);
 	}
 	if (dma->vaddr) {
 		dma->sglist = videobuf_vmalloc_to_sg(dma->vaddr,
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 97e07f46a0fa..aa4ebb42a565 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -48,6 +48,7 @@ struct videobuf_dmabuf {
 
 	/* for userland buffer */
 	int                 offset;
+	size_t		    size;
 	struct page         **pages;
 
 	/* for kernel buffers */
-- 
cgit v1.2.3


From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Sep 2010 20:24:54 -0700
Subject: tcp: Fix >4GB writes on 64-bit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes kernel bugzilla #16603

tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write
zero bytes, for example.

There is also the problem higher up of how verify_iovec() works.  It
wants to prevent the total length from looking like an error return
value.

However it does this using 'int', but syscalls return 'long' (and
thus signed 64-bit on 64-bit machines).  So it could trigger
false-positives on 64-bit as written.  So fix it to use 'long'.

Reported-by: Olaf Bonorden <bono@onlinehome.de>
Reported-by: Daniel Büse <dbuese@gmx.de>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 net/core/iovec.c       | 5 +++--
 net/ipv4/tcp.c         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index a2fada9becb6..a8f56e1ec760 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  int offset, 
 					  unsigned int len, __wsum *csump);
 
-extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
+extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412df..e6b133b77ccb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,9 +35,10 @@
  *	in any case.
  */
 
-int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, err, ct;
+	int size, ct;
+	long err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95d75d443927..f115ea68a4ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	sg = sk->sk_route_caps & NETIF_F_SG;
 
 	while (--iovlen >= 0) {
-		int seglen = iov->iov_len;
+		size_t seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
 
 		iov++;
-- 
cgit v1.2.3


From 8d98efa84b790bdd62248eb0dfff17e9baf5c844 Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Date: Sun, 26 Sep 2010 19:07:59 +0000
Subject: Phonet: Implement Pipe Controller to support Nokia Slim Modems

Phonet stack assumes the presence of Pipe Controller, either in Modem or
on Application Processing Engine user-space for the Pipe data.
Nokia Slim Modems like WG2.5 used in ST-Ericsson U8500 platform do not
implement Pipe controller in them.
This patch adds Pipe Controller implemenation to Phonet stack to support
Pipe data over Phonet stack for Nokia Slim Modems.

Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h   |   5 +
 include/net/phonet/pep.h |  21 +++
 net/phonet/Kconfig       |  11 ++
 net/phonet/pep.c         | 448 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 479 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 85e14a83283b..96f5625d62fa 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -36,6 +36,11 @@
 /* Socket options for SOL_PNPIPE level */
 #define PNPIPE_ENCAP		1
 #define PNPIPE_IFINDEX		2
+#define PNPIPE_CREATE           3
+#define PNPIPE_ENABLE           4
+#define PNPIPE_DISABLE          5
+#define PNPIPE_DESTROY          6
+#define PNPIPE_INQ              7
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index 37f23dc05de8..def6cfa3f451 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -45,6 +45,10 @@ struct pep_sock {
 	u8			tx_fc;	/* TX flow control */
 	u8			init_enable;	/* auto-enable at creation */
 	u8			aligned;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	u16                     remote_pep;
+	u8                      pipe_state;
+#endif
 };
 
 static inline struct pep_sock *pep_sk(struct sock *sk)
@@ -165,4 +169,21 @@ enum {
 	PEP_IND_READY,
 };
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+#define PNS_PEP_CONNECT_UTID           0x02
+#define PNS_PIPE_CREATED_IND_UTID      0x04
+#define PNS_PIPE_ENABLE_UTID           0x0A
+#define PNS_PIPE_ENABLED_IND_UTID      0x0C
+#define PNS_PIPE_DISABLE_UTID          0x0F
+#define PNS_PIPE_DISABLED_IND_UTID     0x11
+#define PNS_PEP_DISCONNECT_UTID        0x06
+
+/* Used for tracking state of a pipe */
+enum {
+	PIPE_IDLE,
+	PIPE_DISABLED,
+	PIPE_ENABLED,
+};
+#endif /* CONFIG_PHONET_PIPECTRLR */
+
 #endif
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55b1769..901956ada9c8 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,3 +14,14 @@ config PHONET
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called phonet. If unsure, say N.
+
+config PHONET_PIPECTRLR
+	bool "Phonet Pipe Controller"
+	depends on PHONET
+	default N
+	help
+	  The Pipe Controller implementation in Phonet stack to support Pipe
+	  data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
+	  platform.
+
+	  If unsure, say N.
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index d0e7eb24c8b9..7bf23cf36b02 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -88,6 +88,15 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
 	const struct pnpipehdr *oph = pnp_hdr(oskb);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	const struct phonethdr *hdr = pn_hdr(oskb);
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = hdr->pn_sdev,
+		.spn_obj = hdr->pn_sobj,
+	};
+#endif
 
 	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
 	if (!skb)
@@ -105,10 +114,271 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
 	ph->pipe_handle = oph->pipe_handle;
 	ph->error_code = code;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	return pn_skb_send(sk, skb, &spn);
+#else
 	return pn_skb_send(sk, skb, &pipe_srv);
+#endif
 }
 
 #define PAD 0x00
+
+#ifdef CONFIG_PHONET_PIPECTRLR
+static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
+{
+	int i, j;
+	u8 base_fc, final_fc;
+
+	for (i = 0; i < len; i++) {
+		base_fc = host_fc[i];
+		for (j = 0; j < len; j++) {
+			if (remote_fc[j] == base_fc) {
+				final_fc = base_fc;
+				goto done;
+			}
+		}
+	}
+	return -EINVAL;
+
+done:
+	return final_fc;
+
+}
+
+static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
+		u8 *pref_rx_fc, u8 *req_tx_fc)
+{
+	struct pnpipehdr *hdr;
+	u8 n_sb;
+
+	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
+		return -EINVAL;
+
+	hdr = pnp_hdr(skb);
+	n_sb = hdr->data[4];
+
+	__skb_pull(skb, sizeof(*hdr) + 4);
+	while (n_sb > 0) {
+		u8 type, buf[3], len = sizeof(buf);
+		u8 *data = pep_get_sb(skb, &type, &len, buf);
+
+		if (data == NULL)
+			return -EINVAL;
+
+		switch (type) {
+		case PN_PIPE_SB_REQUIRED_FC_TX:
+			if (len < 3 || (data[2] | data[3] | data[4]) > 3)
+				break;
+			req_tx_fc[0] = data[2];
+			req_tx_fc[1] = data[3];
+			req_tx_fc[2] = data[4];
+			break;
+
+		case PN_PIPE_SB_PREFERRED_FC_RX:
+			if (len < 3 || (data[2] | data[3] | data[4]) > 3)
+				break;
+			pref_rx_fc[0] = data[2];
+			pref_rx_fc[1] = data[3];
+			pref_rx_fc[2] = data[4];
+			break;
+
+		}
+		n_sb--;
+	}
+	return 0;
+}
+
+static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid,
+		u8 msg_id, u8 p_handle, gfp_t priority)
+{
+	int len;
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = pn_dev(dobj),
+		.spn_obj = pn_obj(dobj),
+	};
+
+	static const u8 data[4] = {
+		PAD, PAD, PAD, PAD,
+	};
+
+	switch (msg_id) {
+	case PNS_PEP_CONNECT_REQ:
+		len = sizeof(data);
+		break;
+
+	case PNS_PEP_DISCONNECT_REQ:
+	case PNS_PEP_ENABLE_REQ:
+	case PNS_PEP_DISABLE_REQ:
+		len = 0;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER);
+	if (len) {
+		__skb_put(skb, len);
+		skb_copy_to_linear_data(skb, data, len);
+	}
+	__skb_push(skb, sizeof(*ph));
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = utid;
+	ph->message_id = msg_id;
+	ph->pipe_handle = p_handle;
+	ph->error_code = PN_PIPE_NO_ERROR;
+
+	return pn_skb_send(sk, skb, &spn);
+}
+
+static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj,
+		u8 utid, u8 p_handle, u8 msg_id, u8 tx_fc, u8 rx_fc)
+{
+	int err_code;
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = pn_dev(dobj),
+		.spn_obj = pn_obj(dobj),
+	};
+
+	static u8 data[4] = {
+		0x03, 0x04,
+	};
+	data[2] = tx_fc;
+	data[3] = rx_fc;
+
+	/*
+	 * actually, below is number of sub-blocks and not error code.
+	 * Pipe_created_ind message format does not have any
+	 * error code field. However, the Phonet stack will always send
+	 * an error code as part of pnpipehdr. So, use that err_code to
+	 * specify the number of sub-blocks.
+	 */
+	err_code = 0x01;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER);
+	__skb_put(skb, sizeof(data));
+	skb_copy_to_linear_data(skb, data, sizeof(data));
+	__skb_push(skb, sizeof(*ph));
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = utid;
+	ph->message_id = msg_id;
+	ph->pipe_handle = p_handle;
+	ph->error_code = err_code;
+
+	return pn_skb_send(sk, skb, &spn);
+}
+
+static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid,
+		u8 p_handle, u8 msg_id)
+{
+	int err_code;
+	struct pnpipehdr *ph;
+	struct sk_buff *skb;
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = pn_dev(dobj),
+		.spn_obj = pn_obj(dobj),
+	};
+
+	/*
+	 * actually, below is a filler.
+	 * Pipe_enabled/disabled_ind message format does not have any
+	 * error code field. However, the Phonet stack will always send
+	 * an error code as part of pnpipehdr. So, use that err_code to
+	 * specify the filler value.
+	 */
+	err_code = 0x0;
+
+	skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	skb_set_owner_w(skb, sk);
+
+	skb_reserve(skb, MAX_PNPIPE_HEADER);
+	__skb_push(skb, sizeof(*ph));
+	skb_reset_transport_header(skb);
+	ph = pnp_hdr(skb);
+	ph->utid = utid;
+	ph->message_id = msg_id;
+	ph->pipe_handle = p_handle;
+	ph->error_code = err_code;
+
+	return pn_skb_send(sk, skb, &spn);
+}
+
+static int pipe_handler_enable_pipe(struct sock *sk, int cmd)
+{
+	int ret;
+	struct pep_sock *pn = pep_sk(sk);
+
+	switch (cmd) {
+	case PNPIPE_ENABLE:
+		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
+				PNS_PIPE_ENABLE_UTID, PNS_PEP_ENABLE_REQ,
+				pn->pipe_handle, GFP_ATOMIC);
+		break;
+
+	case PNPIPE_DISABLE:
+		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
+				PNS_PIPE_DISABLE_UTID, PNS_PEP_DISABLE_REQ,
+				pn->pipe_handle, GFP_ATOMIC);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd)
+{
+	int ret;
+	struct pep_sock *pn = pep_sk(sk);
+
+	switch (cmd) {
+	case PNPIPE_CREATE:
+		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
+				PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
+				pipe_handle, GFP_ATOMIC);
+		break;
+
+	case PNPIPE_DESTROY:
+		ret = pipe_handler_send_req(sk, pn->remote_pep,
+				PNS_PEP_DISCONNECT_UTID,
+				PNS_PEP_DISCONNECT_REQ,
+				pn->pipe_handle, GFP_ATOMIC);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+#endif
+
 static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
 {
 	static const u8 data[20] = {
@@ -173,6 +443,14 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = pn_dev(pn->remote_pep),
+		.spn_obj = pn_obj(pn->remote_pep),
+	};
+#endif
 
 	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
 	if (!skb)
@@ -192,7 +470,11 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
 	ph->data[3] = PAD;
 	ph->data[4] = status;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	return pn_skb_send(sk, skb, &spn);
+#else
 	return pn_skb_send(sk, skb, &pipe_srv);
+#endif
 }
 
 /* Send our RX flow control information to the sender.
@@ -308,6 +590,12 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 	struct pnpipehdr *hdr = pnp_hdr(skb);
 	struct sk_buff_head *queue;
 	int err = 0;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	struct phonethdr *ph = pn_hdr(skb);
+	static u8 host_pref_rx_fc[3], host_req_tx_fc[3];
+	u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
+	u8 negotiated_rx_fc, negotiated_tx_fc;
+#endif
 
 	BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
 
@@ -316,6 +604,40 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
 		break;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNS_PEP_CONNECT_RESP:
+		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
+				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
+			pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
+					remote_req_tx_fc);
+
+			 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
+					 host_pref_rx_fc,
+					 sizeof(host_pref_rx_fc));
+			 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
+					 remote_pref_rx_fc,
+					 sizeof(host_pref_rx_fc));
+
+			pn->pipe_state = PIPE_DISABLED;
+			pipe_handler_send_created_ind(sk, pn->remote_pep,
+					PNS_PIPE_CREATED_IND_UTID,
+					pn->pipe_handle, PNS_PIPE_CREATED_IND,
+					negotiated_tx_fc, negotiated_rx_fc);
+			pipe_handler_send_created_ind(sk, pn->pn_sk.sobject,
+					PNS_PIPE_CREATED_IND_UTID,
+					pn->pipe_handle, PNS_PIPE_CREATED_IND,
+					negotiated_tx_fc, negotiated_rx_fc);
+		} else {
+			pipe_handler_send_req(sk, pn->remote_pep,
+					PNS_PEP_CONNECT_UTID,
+					PNS_PEP_CONNECT_REQ, pn->pipe_handle,
+					GFP_ATOMIC);
+			pipe_get_flow_info(sk, skb, host_pref_rx_fc,
+					host_req_tx_fc);
+		}
+		break;
+#endif
+
 	case PNS_PEP_DISCONNECT_REQ:
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		sk->sk_state = TCP_CLOSE_WAIT;
@@ -323,11 +645,41 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 			sk->sk_state_change(sk);
 		break;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNS_PEP_DISCONNECT_RESP:
+		pn->pipe_state = PIPE_IDLE;
+		pipe_handler_send_req(sk, pn->pn_sk.sobject,
+				PNS_PEP_DISCONNECT_UTID,
+				PNS_PEP_DISCONNECT_REQ, pn->pipe_handle,
+				GFP_KERNEL);
+		break;
+#endif
+
 	case PNS_PEP_ENABLE_REQ:
 		/* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNS_PEP_ENABLE_RESP:
+		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
+				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
+			pn->pipe_state = PIPE_ENABLED;
+			pipe_handler_send_ind(sk, pn->remote_pep,
+					PNS_PIPE_ENABLED_IND_UTID,
+					pn->pipe_handle, PNS_PIPE_ENABLED_IND);
+			pipe_handler_send_ind(sk, pn->pn_sk.sobject,
+					PNS_PIPE_ENABLED_IND_UTID,
+					pn->pipe_handle, PNS_PIPE_ENABLED_IND);
+		} else
+			pipe_handler_send_req(sk, pn->remote_pep,
+					PNS_PIPE_ENABLE_UTID,
+					PNS_PEP_ENABLE_REQ, pn->pipe_handle,
+					GFP_KERNEL);
+
+		break;
+#endif
+
 	case PNS_PEP_RESET_REQ:
 		switch (hdr->state_after_reset) {
 		case PN_PIPE_DISABLE:
@@ -346,6 +698,27 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNS_PEP_DISABLE_RESP:
+		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
+				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
+			pn->pipe_state = PIPE_DISABLED;
+			pipe_handler_send_ind(sk, pn->remote_pep,
+					PNS_PIPE_DISABLED_IND_UTID,
+					pn->pipe_handle,
+					PNS_PIPE_DISABLED_IND);
+			pipe_handler_send_ind(sk, pn->pn_sk.sobject,
+					PNS_PIPE_DISABLED_IND_UTID,
+					pn->pipe_handle,
+					PNS_PIPE_DISABLED_IND);
+		} else
+			pipe_handler_send_req(sk, pn->remote_pep,
+					PNS_PIPE_DISABLE_UTID,
+					PNS_PEP_DISABLE_REQ, pn->pipe_handle,
+					GFP_KERNEL);
+		break;
+#endif
+
 	case PNS_PEP_CTRL_REQ:
 		if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
 			atomic_inc(&sk->sk_drops);
@@ -519,6 +892,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	newpn->init_enable = enabled;
 	newpn->aligned = aligned;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	newpn->remote_pep = pn->remote_pep;
+#endif
 
 	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
 	skb_queue_head(&newsk->sk_receive_queue, skb);
@@ -781,6 +1157,10 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int val = 0, err = 0;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	int remote_pep;
+	int pipe_handle;
+#endif
 
 	if (level != SOL_PNPIPE)
 		return -ENOPROTOOPT;
@@ -791,6 +1171,48 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 
 	lock_sock(sk);
 	switch (optname) {
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNPIPE_CREATE:
+		if (val) {
+			if (pn->pipe_state > PIPE_IDLE) {
+				err = -EFAULT;
+				break;
+			}
+			remote_pep = val & 0xFFFF;
+			pipe_handle =  (val >> 16) & 0xFF;
+			pn->remote_pep = remote_pep;
+			err = pipe_handler_create_pipe(sk, pipe_handle,
+					PNPIPE_CREATE);
+			break;
+		}
+
+	case PNPIPE_ENABLE:
+		if (pn->pipe_state != PIPE_DISABLED) {
+			err = -EFAULT;
+			break;
+		}
+		err = pipe_handler_enable_pipe(sk, PNPIPE_ENABLE);
+		break;
+
+	case PNPIPE_DISABLE:
+		if (pn->pipe_state != PIPE_ENABLED) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = pipe_handler_enable_pipe(sk, PNPIPE_DISABLE);
+		break;
+
+	case PNPIPE_DESTROY:
+		if (pn->pipe_state < PIPE_DISABLED) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = pipe_handler_create_pipe(sk, 0x0, PNPIPE_DESTROY);
+		break;
+#endif
+
 	case PNPIPE_ENCAP:
 		if (val && val != PNPIPE_ENCAP_IP) {
 			err = -EINVAL;
@@ -840,6 +1262,13 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 	case PNPIPE_ENCAP:
 		val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
 		break;
+
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNPIPE_INQ:
+		val = pn->pipe_state;
+		break;
+#endif
+
 	case PNPIPE_IFINDEX:
 		val = pn->ifindex;
 		break;
@@ -859,7 +1288,14 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
-	int err;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	struct sockaddr_pn spn = {
+		.spn_family = AF_PHONET,
+		.spn_resource = 0xD9,
+		.spn_dev = pn_dev(pn->remote_pep),
+		.spn_obj = pn_obj(pn->remote_pep),
+	};
+#endif
 
 	if (pn_flow_safe(pn->tx_fc) &&
 	    !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -877,11 +1313,11 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 	} else
 		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
-
-	err = pn_skb_send(sk, skb, &pipe_srv);
-	if (err && pn_flow_safe(pn->tx_fc))
-		atomic_inc(&pn->tx_credits);
-	return err;
+#ifdef CONFIG_PHONET_PIPECTRLR
+	return pn_skb_send(sk, skb, &spn);
+#else
+	return pn_skb_send(sk, skb, &pipe_srv);
+#endif
 }
 
 static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
-- 
cgit v1.2.3


From 290b895e0ba4552dfcfc4bd35759c192345b934a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 27 Sep 2010 00:33:35 +0000
Subject: tunnels: prepare percpu accounting

Tunnels are going to use percpu for their accounting.

They are going to use a new tstats field in net_device.

skb_tunnel_rx() is changed to be a wrapper around __skb_tunnel_rx()

IPTUNNEL_XMIT() is changed to be a wrapper around __IPTUNNEL_XMIT()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/dst.h         | 24 +++++++++++++++++++-----
 include/net/ipip.h        | 12 +++++++-----
 3 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 01bd4c82d982..83de0eb7a071 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1053,6 +1053,7 @@ struct net_device {
 	union {
 		void				*ml_priv;
 		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
+		struct pcpu_tstats __percpu	*tstats; /* tunnel stats */
 	};
 	/* GARP */
 	struct garp_port	*garp_port;
diff --git a/include/net/dst.h b/include/net/dst.h
index 02386505033d..aa53fbc34b2b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -227,6 +227,23 @@ static inline void skb_dst_force(struct sk_buff *skb)
 }
 
 
+/**
+ *	__skb_tunnel_rx - prepare skb for rx reinsert
+ *	@skb: buffer
+ *	@dev: tunnel device
+ *
+ *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
+ *	so make some cleanups. (no accounting done)
+ */
+static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
+{
+	skb->dev = dev;
+	skb->rxhash = 0;
+	skb_set_queue_mapping(skb, 0);
+	skb_dst_drop(skb);
+	nf_reset(skb);
+}
+
 /**
  *	skb_tunnel_rx - prepare skb for rx reinsert
  *	@skb: buffer
@@ -234,17 +251,14 @@ static inline void skb_dst_force(struct sk_buff *skb)
  *
  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
  *	so make some cleanups, and perform accounting.
+ *	Note: this accounting is not SMP safe.
  */
 static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
 {
-	skb->dev = dev;
 	/* TODO : stats should be SMP safe */
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
-	skb->rxhash = 0;
-	skb_set_queue_mapping(skb, 0);
-	skb_dst_drop(skb);
-	nf_reset(skb);
+	__skb_tunnel_rx(skb, dev);
 }
 
 /* Children define the path of the packet through the
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 65caea8b414f..58abbf966b0c 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -45,7 +45,7 @@ struct ip_tunnel_prl_entry {
 	struct rcu_head			rcu_head;
 };
 
-#define IPTUNNEL_XMIT() do {						\
+#define __IPTUNNEL_XMIT(stats1, stats2) do {				\
 	int err;							\
 	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
@@ -54,12 +54,14 @@ struct ip_tunnel_prl_entry {
 									\
 	err = ip_local_out(skb);					\
 	if (likely(net_xmit_eval(err) == 0)) {				\
-		txq->tx_bytes += pkt_len;				\
-		txq->tx_packets++;					\
+		(stats1)->tx_bytes += pkt_len;				\
+		(stats1)->tx_packets++;					\
 	} else {							\
-		stats->tx_errors++;					\
-		stats->tx_aborted_errors++;				\
+		(stats2)->tx_errors++;					\
+		(stats2)->tx_aborted_errors++;				\
 	}								\
 } while (0)
 
+#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats)
+
 #endif
-- 
cgit v1.2.3


From 62fe0b40abb3484413800edaef9b087a20059acf Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 27 Sep 2010 08:24:33 +0000
Subject: net: Allow changing number of RX queues after device allocation

For RPS, we create a kobject for each RX queue based on the number of
queues passed to alloc_netdev_mq().  However, drivers generally do not
determine the numbers of hardware queues to use until much later, so
this usually represents the maximum number the driver may use and not
the actual number in use.

For TX queues, drivers can update the actual number using
netif_set_real_num_tx_queues().  Add a corresponding function for RX
queues, netif_set_real_num_rx_queues().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 +++++++++++++++-
 net/core/dev.c            | 45 +++++++++++++++++++++++++++++++++++++++++----
 net/core/net-sysfs.c      | 32 ++++++++++++++++++--------------
 net/core/net-sysfs.h      |  4 ++++
 4 files changed, 78 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83de0eb7a071..b15732e22eee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -976,8 +976,11 @@ struct net_device {
 
 	struct netdev_rx_queue	*_rx;
 
-	/* Number of RX queues allocated at alloc_netdev_mq() time  */
+	/* Number of RX queues allocated at register_netdev() time */
 	unsigned int		num_rx_queues;
+
+	/* Number of RX queues currently active in device */
+	unsigned int		real_num_rx_queues;
 #endif
 
 	rx_handler_func_t	*rx_handler;
@@ -1685,6 +1688,17 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 extern void netif_set_real_num_tx_queues(struct net_device *dev,
 					 unsigned int txq);
 
+#ifdef CONFIG_RPS
+extern int netif_set_real_num_rx_queues(struct net_device *dev,
+					unsigned int rxq);
+#else
+static inline int netif_set_real_num_rx_queues(struct net_device *dev,
+						unsigned int rxq)
+{
+	return 0;
+}
+#endif
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
diff --git a/net/core/dev.c b/net/core/dev.c
index 42b200fdf12e..48ad47f402ad 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1567,6 +1567,41 @@ void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
+#ifdef CONFIG_RPS
+/**
+ *	netif_set_real_num_rx_queues - set actual number of RX queues used
+ *	@dev: Network device
+ *	@rxq: Actual number of RX queues
+ *
+ *	This must be called either with the rtnl_lock held or before
+ *	registration of the net device.  Returns 0 on success, or a
+ *	negative error code.  If called before registration, it also
+ *	sets the maximum number of queues, and always succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+	int rc;
+
+	if (dev->reg_state == NETREG_REGISTERED) {
+		ASSERT_RTNL();
+
+		if (rxq > dev->num_rx_queues)
+			return -EINVAL;
+
+		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+						  rxq);
+		if (rc)
+			return rc;
+	} else {
+		dev->num_rx_queues = rxq;
+	}
+
+	dev->real_num_rx_queues = rxq;
+	return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
+
 static inline void __netif_reschedule(struct Qdisc *q)
 {
 	struct softnet_data *sd;
@@ -2352,10 +2387,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
-		if (unlikely(index >= dev->num_rx_queues)) {
-			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
-				"on queue %u, but number of RX queues is %u\n",
-				dev->name, index, dev->num_rx_queues);
+		if (unlikely(index >= dev->real_num_rx_queues)) {
+			WARN_ONCE(dev->real_num_rx_queues > 1,
+				  "%s received packet on queue %u, but number "
+				  "of RX queues is %u\n",
+				  dev->name, index, dev->real_num_rx_queues);
 			goto done;
 		}
 		rxqueue = dev->_rx + index;
@@ -5472,6 +5508,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
+	dev->real_num_rx_queues = queue_count;
 #endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 76485a3f910b..fa81fd0a488f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -742,34 +742,38 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
 	return error;
 }
 
-static int rx_queue_register_kobjects(struct net_device *net)
+int
+net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
 	int i;
 	int error = 0;
 
-	net->queues_kset = kset_create_and_add("queues",
-	    NULL, &net->dev.kobj);
-	if (!net->queues_kset)
-		return -ENOMEM;
-	for (i = 0; i < net->num_rx_queues; i++) {
+	for (i = old_num; i < new_num; i++) {
 		error = rx_queue_add_kobject(net, i);
-		if (error)
+		if (error) {
+			new_num = old_num;
 			break;
+		}
 	}
 
-	if (error)
-		while (--i >= 0)
-			kobject_put(&net->_rx[i].kobj);
+	while (--i >= new_num)
+		kobject_put(&net->_rx[i].kobj);
 
 	return error;
 }
 
-static void rx_queue_remove_kobjects(struct net_device *net)
+static int rx_queue_register_kobjects(struct net_device *net)
 {
-	int i;
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+	return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+}
 
-	for (i = 0; i < net->num_rx_queues; i++)
-		kobject_put(&net->_rx[i].kobj);
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+	net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b187..778e1571548d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
+#ifdef CONFIG_RPS
+int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+#endif
+
 #endif
-- 
cgit v1.2.3


From 3171d026291d08c2a4cfe06302ce308b09605c4b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 27 Sep 2010 08:24:49 +0000
Subject: net: Add netif_copy_real_num_queues() for use by virtual net drivers

This sets the active numbers of queues on a net device to match another.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b15732e22eee..c2bec990bd17 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1699,6 +1699,18 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
 }
 #endif
 
+static inline int netif_copy_real_num_queues(struct net_device *to_dev,
+					     const struct net_device *from_dev)
+{
+	netif_set_real_num_tx_queues(to_dev, from_dev->real_num_tx_queues);
+#ifdef CONFIG_RPS
+	return netif_set_real_num_rx_queues(to_dev,
+					    from_dev->real_num_rx_queues);
+#else
+	return 0;
+#endif
+}
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
-- 
cgit v1.2.3


From bc01befdcf3e40979eb518085a075cbf0aacede0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Sep 2010 21:06:34 +0200
Subject: netfilter: ctnetlink: add support for user-space expectation helpers

This patch adds the basic infrastructure to support user-space
expectation helpers via ctnetlink and the netfilter queuing
infrastructure NFQUEUE. Basically, this patch:

* adds NF_CT_EXPECT_USERSPACE flag to identify user-space
  created expectations. I have also added a sanity check in
  __nf_ct_expect_check() to avoid that kernel-space helpers
  may create an expectation if the master conntrack has no
  helper assigned.
* adds some branches to check if the master conntrack helper
  exists, otherwise we skip the code that refers to kernel-space
  helper such as the local expectation list and the expectation
  policy.
* allows to set the timeout for user-space expectations with
  no helper assigned.
* a list of expectations created from user-space that depends
  on ctnetlink (if this module is removed, they are deleted).
* includes USERSPACE in the /proc output for expectations
  that have been created by a user-space helper.

This patch also modifies ctnetlink to skip including the helper
name in the Netlink messages if no kernel-space helper is set
(since no user-space expectation has not kernel-space kernel
assigned).

You can access an example user-space FTP conntrack helper at:
http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-userspace-POC.tar.bz

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h |  1 +
 include/net/netfilter/nf_conntrack_expect.h   |  1 +
 net/netfilter/nf_conntrack_expect.c           | 62 ++++++++++++++++++++-------
 net/netfilter/nf_conntrack_netlink.c          | 46 +++++++++++++-------
 4 files changed, 79 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index fdc50cae861f..23a1a08578a8 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -103,6 +103,7 @@ enum ip_conntrack_expect_events {
 /* expectation flags */
 #define NF_CT_EXPECT_PERMANENT		0x1
 #define NF_CT_EXPECT_INACTIVE		0x2
+#define NF_CT_EXPECT_USERSPACE		0x4
 
 #ifdef __KERNEL__
 struct ip_conntrack_stat {
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 96bb42af5fae..416b83844485 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -85,6 +85,7 @@ nf_ct_find_expectation(struct net *net, u16 zone,
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
 void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
+void nf_ct_remove_userspace_expectations(void);
 
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acb29ccaa41f..b30a1f2aac00 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,20 +38,23 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
+static HLIST_HEAD(nf_ct_userspace_expect_list);
+
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
-	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
 
 	hlist_del(&exp->lnode);
-	master_help->expecting[exp->class]--;
+	if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
+		master_help->expecting[exp->class]--;
+
 	nf_ct_expect_put(exp);
 
 	NF_CT_STAT_INC(net, expect_delete);
@@ -320,16 +323,21 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 
 	atomic_inc(&exp->use);
 
-	hlist_add_head(&exp->lnode, &master_help->expectations);
-	master_help->expecting[exp->class]++;
+	if (master_help) {
+		hlist_add_head(&exp->lnode, &master_help->expectations);
+		master_help->expecting[exp->class]++;
+	} else if (exp->flags & NF_CT_EXPECT_USERSPACE)
+		hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
 
 	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 	net->ct.expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
-	p = &master_help->helper->expect_policy[exp->class];
-	exp->timeout.expires = jiffies + p->timeout * HZ;
+	if (master_help) {
+		p = &master_help->helper->expect_policy[exp->class];
+		exp->timeout.expires = jiffies + p->timeout * HZ;
+	}
 	add_timer(&exp->timeout);
 
 	atomic_inc(&exp->use);
@@ -380,7 +388,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	unsigned int h;
 	int ret = 1;
 
-	if (!master_help->helper) {
+	/* Don't allow expectations created from kernel-space with no helper */
+	if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
+	    (!master_help || (master_help && !master_help->helper))) {
 		ret = -ESHUTDOWN;
 		goto out;
 	}
@@ -398,13 +408,16 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		}
 	}
 	/* Will be over limit? */
-	p = &master_help->helper->expect_policy[expect->class];
-	if (p->max_expected &&
-	    master_help->expecting[expect->class] >= p->max_expected) {
-		evict_oldest_expect(master, expect);
-		if (master_help->expecting[expect->class] >= p->max_expected) {
-			ret = -EMFILE;
-			goto out;
+	if (master_help) {
+		p = &master_help->helper->expect_policy[expect->class];
+		if (p->max_expected &&
+		    master_help->expecting[expect->class] >= p->max_expected) {
+			evict_oldest_expect(master, expect);
+			if (master_help->expecting[expect->class]
+						>= p->max_expected) {
+				ret = -EMFILE;
+				goto out;
+			}
 		}
 	}
 
@@ -439,6 +452,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
+void nf_ct_remove_userspace_expectations(void)
+{
+	struct nf_conntrack_expect *exp;
+	struct hlist_node *n, *next;
+
+	hlist_for_each_entry_safe(exp, n, next,
+				  &nf_ct_userspace_expect_list, lnode) {
+		if (del_timer(&exp->timeout)) {
+			nf_ct_unlink_expect(exp);
+			nf_ct_expect_put(exp);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
+
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
@@ -529,8 +557,12 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		seq_printf(s, "PERMANENT");
 		delim = ",";
 	}
-	if (expect->flags & NF_CT_EXPECT_INACTIVE)
+	if (expect->flags & NF_CT_EXPECT_INACTIVE) {
 		seq_printf(s, "%sINACTIVE", delim);
+		delim = ",";
+	}
+	if (expect->flags & NF_CT_EXPECT_USERSPACE)
+		seq_printf(s, "%sUSERSPACE", delim);
 
 	helper = rcu_dereference(nfct_help(expect->master)->helper);
 	if (helper) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0804e0ef6500..b4077be5f663 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1560,8 +1560,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
 {
 	struct nf_conn *master = exp->master;
-	struct nf_conntrack_helper *helper;
 	long timeout = (exp->timeout.expires - jiffies) / HZ;
+	struct nf_conn_help *help;
 
 	if (timeout < 0)
 		timeout = 0;
@@ -1578,9 +1578,14 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
 	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
 	NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags));
-	helper = rcu_dereference(nfct_help(master)->helper);
-	if (helper)
-		NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
+	help = nfct_help(master);
+	if (help) {
+		struct nf_conntrack_helper *helper;
+
+		helper = rcu_dereference(help->helper);
+		if (helper)
+			NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
+	}
 
 	return 0;
 
@@ -1921,24 +1926,32 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
-	help = nfct_help(ct);
-
-	if (!help || !help->helper) {
-		/* such conntrack hasn't got any helper, abort */
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-
 	exp = nf_ct_expect_alloc(ct);
 	if (!exp) {
 		err = -ENOMEM;
 		goto out;
 	}
+	help = nfct_help(ct);
+	if (!help) {
+		if (!cda[CTA_EXPECT_TIMEOUT]) {
+			err = -EINVAL;
+			goto out;
+		}
+		exp->timeout.expires =
+		  jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
 
-	if (cda[CTA_EXPECT_FLAGS])
-		exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
-	else
-		exp->flags = 0;
+		exp->flags = NF_CT_EXPECT_USERSPACE;
+		if (cda[CTA_EXPECT_FLAGS]) {
+			exp->flags |=
+				ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
+		}
+	} else {
+		if (cda[CTA_EXPECT_FLAGS]) {
+			exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
+			exp->flags &= ~NF_CT_EXPECT_USERSPACE;
+		} else
+			exp->flags = 0;
+	}
 
 	exp->class = 0;
 	exp->expectfn = NULL;
@@ -2109,6 +2122,7 @@ static void __exit ctnetlink_exit(void)
 {
 	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
+	nf_ct_remove_userspace_expectations();
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
 	nf_conntrack_unregister_notifier(&ctnl_notifier);
-- 
cgit v1.2.3


From 58f87ed0d45141a90167f34c0959d607160a26df Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.de.marchi@gmail.com>
Date: Tue, 7 Sep 2010 12:49:45 -0400
Subject: ACPI: Fix typos

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/exutils.c    | 2 +-
 drivers/acpi/acpica/rsutils.c    | 2 +-
 drivers/acpi/apei/Kconfig        | 2 +-
 drivers/acpi/apei/erst-dbg.c     | 2 +-
 drivers/acpi/apei/erst.c         | 2 +-
 drivers/acpi/bus.c               | 4 ++--
 drivers/acpi/processor_perflib.c | 4 ++--
 include/acpi/acpixf.h            | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 74c24d517f81..4093522eed45 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -109,7 +109,7 @@ void acpi_ex_enter_interpreter(void)
  *
  * DESCRIPTION: Reacquire the interpreter execution region from within the
  *              interpreter code. Failure to enter the interpreter region is a
- *              fatal system error. Used in  conjuction with
+ *              fatal system error. Used in  conjunction with
  *              relinquish_interpreter
  *
  ******************************************************************************/
diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c
index 22cfcfbd9fff..491191e6cf69 100644
--- a/drivers/acpi/acpica/rsutils.c
+++ b/drivers/acpi/acpica/rsutils.c
@@ -149,7 +149,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type)
 
 			/*
 			 * 16-, 32-, and 64-bit cases must use the move macros that perform
-			 * endian conversion and/or accomodate hardware that cannot perform
+			 * endian conversion and/or accommodate hardware that cannot perform
 			 * misaligned memory transfers
 			 */
 		case ACPI_RSC_MOVE16:
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 907e350f1c7d..fca34ccfd294 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -34,6 +34,6 @@ config ACPI_APEI_ERST_DEBUG
 	depends on ACPI_APEI
 	help
 	  ERST is a way provided by APEI to save and retrieve hardware
-	  error infomation to and from a persistent store. Enable this
+	  error information to and from a persistent store. Enable this
 	  if you want to debugging and testing the ERST kernel support
 	  and firmware implementation.
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index 5281ddda2777..98ffa2991ebc 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -2,7 +2,7 @@
  * APEI Error Record Serialization Table debug support
  *
  * ERST is a way provided by APEI to save and retrieve hardware error
- * infomation to and from a persistent store. This file provide the
+ * information to and from a persistent store. This file provide the
  * debugging/testing support for ERST kernel support and firmware
  * implementation.
  *
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 18645f4e83cd..a4904f1680cf 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -2,7 +2,7 @@
  * APEI Error Record Serialization Table support
  *
  * ERST is a way provided by APEI to save and retrieve hardware error
- * infomation to and from a persistent store.
+ * information to and from a persistent store.
  *
  * For more information about ERST, please refer to ACPI Specification
  * version 4.0, section 17.4.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 5c221ab535d5..cc17b352d1c5 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(acpi_root_dir);
 static int set_power_nocheck(const struct dmi_system_id *id)
 {
 	printk(KERN_NOTICE PREFIX "%s detected - "
-		"disable power check in power transistion\n", id->ident);
+		"disable power check in power transition\n", id->ident);
 	acpi_power_nocheck = 1;
 	return 0;
 }
@@ -1027,7 +1027,7 @@ static int __init acpi_init(void)
 
 	/*
 	 * If the laptop falls into the DMI check table, the power state check
-	 * will be disabled in the course of device power transistion.
+	 * will be disabled in the course of device power transition.
 	 */
 	dmi_check_system(power_nocheck_dmi_table);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index ba1bd263d903..3a73a93596e8 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -447,8 +447,8 @@ int acpi_processor_notify_smm(struct module *calling_module)
 	if (!try_module_get(calling_module))
 		return -EINVAL;
 
-	/* is_done is set to negative if an error occured,
-	 * and to postitive if _no_ error occured, but SMM
+	/* is_done is set to negative if an error occurred,
+	 * and to postitive if _no_ error occurred, but SMM
 	 * was already notified. This avoids double notification
 	 * which might lead to unexpected results...
 	 */
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c0786d446a00..984cdc62e30b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -55,7 +55,7 @@
 extern u8 acpi_gbl_permanent_mmap;
 
 /*
- * Globals that are publically available, allowing for
+ * Globals that are publicly available, allowing for
  * run time configuration
  */
 extern u32 acpi_dbg_level;
-- 
cgit v1.2.3


From 4465b469008bc03b98a1b8df4e9ae501b6c69d4b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 23 May 2010 19:54:12 +0000
Subject: ipv4: Allow configuring subnets as local addresses

This patch allows a host to be configured to respond to any address in
a specified range as if it were local, without actually needing to
configure the address on an interface.  This is done through routing
table configuration.  For instance, to configure a host to respond
to any address in 10.1/16 received on eth0 as a local address we can do:

ip rule add from all iif eth0 lookup 200
ip route add local 10.1/16 dev lo proto kernel scope host src 127.0.0.1 table 200

This host is now reachable by any 10.1/16 address (route lookup on
input for packets received on eth0 can find the route).  On output, the
rule will not be matched so that this host can still send packets to
10.1/16 (not sent on loopback).  Presumably, external routing can be
configured to make sense out of this.

To make this work, we needed to modify the logic in finding the
interface which is assigned a given source address for output
(dev_ip_find).  We perform a normal fib_lookup instead of just a
lookup on the local table, and in the lookup we ignore the input
interface for matching.

This patch is useful to implement IP-anycast for subnets of virtual
addresses.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h      | 1 +
 net/core/fib_rules.c    | 3 ++-
 net/ipv4/fib_frontend.c | 7 +++----
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index bb08692a20b0..0ac3fb5e0973 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -49,6 +49,7 @@ struct flowi {
 	__u8	proto;
 	__u8	flags;
 #define FLOWI_FLAG_ANYSRC 0x01
+#define FLOWI_FLAG_MATCH_ANY_IIF 0x02
 	union {
 		struct {
 			__be16	sport;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d0787284cb07..332c2e31d048 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -182,7 +182,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
 	int ret = 0;
 
-	if (rule->iifindex && (rule->iifindex != fl->iif))
+	if (rule->iifindex && (rule->iifindex != fl->iif) &&
+	    !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
 		goto out;
 
 	if (rule->oifindex && (rule->oifindex != fl->oif))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999fa..981f3c59b334 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -153,17 +153,16 @@ static void fib_flush(struct net *net)
 
 struct net_device * ip_dev_find(struct net *net, __be32 addr)
 {
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } },
+			    .flags = FLOWI_FLAG_MATCH_ANY_IIF };
 	struct fib_result res;
 	struct net_device *dev = NULL;
-	struct fib_table *local_table;
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r = NULL;
 #endif
 
-	local_table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!local_table || fib_table_lookup(local_table, &fl, &res))
+	if (fib_lookup(net, &fl, &res))
 		return NULL;
 	if (res.type != RTN_LOCAL)
 		goto out;
-- 
cgit v1.2.3


From ffe8018c3424892c9590048fc36caa6c3e0c8a76 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 17 Sep 2010 15:24:11 -0700
Subject: initramfs: fix initramfs size calculation

The size of a built-in initramfs is calculated in init/initramfs.c by
"__initramfs_end - __initramfs_start".  Those symbols are defined in the
linker script include/asm-generic/vmlinux.lds.h:

#define INIT_RAM_FS                                                     \
        . = ALIGN(PAGE_SIZE);                                           \
        VMLINUX_SYMBOL(__initramfs_start) = .;                          \
        *(.init.ramfs)                                                  \
        VMLINUX_SYMBOL(__initramfs_end) = .;

If the initramfs file has an odd number of bytes, the "__initramfs_end"
symbol points to an odd address, for example, the symbols in the
System.map might look like:

    0000000000572000 T __initramfs_start
    00000000005bcd05 T __initramfs_end	  <-- odd address

At least on s390 this causes a problem:

Certain s390 instructions, especially instructions for loading addresses
(larl) or branch addresses must be on even addresses.  The compiler loads
the symbol addresses with the "larl" instruction.  This instruction sets
the last bit to 0 and, therefore, for odd size files, the calculated size
is one byte less than it should be:

    0000000000540a9c <populate_rootfs>:
      540a9c:     eb cf f0 78 00 24       stmg    %r12,%r15,120(%r15),
      540aa2:     c0 10 00 01 8a af       larl    %r1,572000 <__initramfs_start>
      540aa8:     c0 c0 00 03 e1 2e       larl    %r12,5bcd04 <initramfs_end>
                                                  (Instead of  5bcd05)
      ...
      540abe:     1b c1                   sr      %r12,%r1

To fix the problem, this patch introduces the global variable
__initramfs_size, which is calculated in the "usr/initramfs_data.S" file.
The populate_rootfs() function can then use the start marker of the
.init.ramfs section and the value of __initramfs_size for loading the
initramfs.  Because the start marker and size is sufficient, the
__initramfs_end symbol is no longer needed and is removed.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Michal Marek <mmarek@suse.cz>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 include/asm-generic/vmlinux.lds.h |  3 ++-
 init/initramfs.c                  |  9 ++++-----
 usr/initramfs_data.S              | 16 +++++++++++-----
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 030a954ed292..0c6387d6a6ae 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -633,7 +633,8 @@
 	. = ALIGN(PAGE_SIZE);						\
 	VMLINUX_SYMBOL(__initramfs_start) = .;				\
 	*(.init.ramfs)							\
-	VMLINUX_SYMBOL(__initramfs_end) = .;
+	. = ALIGN(8);							\
+	*(.init.ramfs.info)
 #else
 #define INIT_RAM_FS
 #endif
diff --git a/init/initramfs.c b/init/initramfs.c
index 4b9c20205092..371c3da64ad3 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -483,7 +483,8 @@ static int __init retain_initrd_param(char *str)
 }
 __setup("retain_initrd", retain_initrd_param);
 
-extern char __initramfs_start[], __initramfs_end[];
+extern char __initramfs_start[];
+extern unsigned long __initramfs_size;
 #include <linux/initrd.h>
 #include <linux/kexec.h>
 
@@ -570,8 +571,7 @@ static void __init clean_rootfs(void)
 
 static int __init populate_rootfs(void)
 {
-	char *err = unpack_to_rootfs(__initramfs_start,
-			 __initramfs_end - __initramfs_start);
+	char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
 	if (err)
 		panic(err);	/* Failed to decompress INTERNAL initramfs */
 	if (initrd_start) {
@@ -585,8 +585,7 @@ static int __init populate_rootfs(void)
 			return 0;
 		} else {
 			clean_rootfs();
-			unpack_to_rootfs(__initramfs_start,
-				 __initramfs_end - __initramfs_start);
+			unpack_to_rootfs(__initramfs_start, __initramfs_size);
 		}
 		printk(KERN_INFO "rootfs image is not initramfs (%s)"
 				"; looks like an initrd\n", err);
diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S
index 49a545fea120..b9efed5e35cc 100644
--- a/usr/initramfs_data.S
+++ b/usr/initramfs_data.S
@@ -11,11 +11,7 @@
   -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
    ld -m elf_i386  -r -o built-in.o initramfs_data.o
 
-  initramfs_data.scr looks like this:
-SECTIONS
-{
-       .init.ramfs : { *(.data) }
-}
+  For including the .init.ramfs sections, see include/asm-generic/vmlinux.lds.
 
   The above example is for i386 - the parameters vary from architectures.
   Eventually look up LDFLAGS_BLOB in an older version of the
@@ -28,4 +24,14 @@ SECTIONS
 #include <linux/stringify.h>
 
 .section .init.ramfs,"a"
+__irf_start:
 .incbin __stringify(INITRAMFS_IMAGE)
+__irf_end:
+.section .init.ramfs.info,"a"
+.globl __initramfs_size
+__initramfs_size:
+#ifdef CONFIG_32BIT
+	.long __irf_end - __irf_start
+#else
+	.quad __irf_end - __irf_start
+#endif
-- 
cgit v1.2.3


From cdb138080b78146d1cdadba9f5dadbeb97445b91 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Wed, 28 Jul 2010 10:59:06 +0200
Subject: pcmcia: do not use win_req_t when calling pcmcia_request_window()

Instead of win_req_t, drivers are now requested to fill out
struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four iomem
ranges. After a call to pcmcia_request_window(), the windows found there
are reserved and may be used until pcmcia_release_window() is called.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-mtd@lists.infradead.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt |  7 +++
 drivers/char/pcmcia/ipwireless/main.c   | 87 ++++++++++++++-------------------
 drivers/char/pcmcia/ipwireless/main.h   |  4 --
 drivers/mtd/maps/pcmciamtd.c            | 53 +++++++++++---------
 drivers/net/pcmcia/fmvj18x_cs.c         | 29 +++++------
 drivers/net/pcmcia/ibmtr_cs.c           | 42 ++++++++--------
 drivers/net/pcmcia/pcnet_cs.c           | 44 ++++++++---------
 drivers/net/pcmcia/smc91c92_cs.c        | 15 +++---
 drivers/net/pcmcia/xirc2ps_cs.c         | 16 +++---
 drivers/net/wireless/b43/pcmcia.c       | 14 +++---
 drivers/net/wireless/ray_cs.c           | 44 ++++++++---------
 drivers/net/wireless/ray_cs.h           |  2 -
 drivers/pcmcia/pcmcia_resource.c        | 60 +++++++++++++----------
 drivers/scsi/pcmcia/nsp_cs.c            | 49 +++++++------------
 include/pcmcia/cs.h                     | 24 ---------
 include/pcmcia/ds.h                     | 28 ++++++++---
 16 files changed, 241 insertions(+), 277 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 26c0f9c00545..86e0f491f85e 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,4 +1,11 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
+* pcmcia_request_window changes (as of 2.6.36)
+   Instead of win_req_t, drivers are now requested to fill out
+   struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four ioport
+   ranges. After a call to pcmcia_request_window(), the regions found there
+   are reserved and may be used immediately -- until pcmcia_release_window()
+   is called.
+
 * pcmcia_request_io changes (as of 2.6.36)
    Instead of io_req_t, drivers are now requested to fill out
    struct pcmcia_device *p_dev->resource[0,1] for up to two ioport
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 67bdb05798b1..8d2b86aab715 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -105,62 +105,54 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 	if (cfg->mem.nwin == 0)
 		return 0;
 
-	ipw->request_common_memory.Attributes =
+	p_dev->resource[2]->flags |=
 		WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE;
-	ipw->request_common_memory.Base = cfg->mem.win[0].host_addr;
-	ipw->request_common_memory.Size = cfg->mem.win[0].len;
-	if (ipw->request_common_memory.Size < 0x1000)
-		ipw->request_common_memory.Size = 0x1000;
-	ipw->request_common_memory.AccessSpeed = 0;
-
-	ret = pcmcia_request_window(p_dev, &ipw->request_common_memory,
-				&ipw->handle_common_memory);
+	p_dev->resource[2]->start = cfg->mem.win[0].host_addr;
+	p_dev->resource[2]->end = cfg->mem.win[0].len;
+	if (p_dev->resource[2]->end < 0x1000)
+		p_dev->resource[2]->end = 0x1000;
 
+	ret = pcmcia_request_window(p_dev, p_dev->resource[2], 0);
 	if (ret != 0)
 		goto exit1;
 
-	ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory,
+	ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2],
 				cfg->mem.win[0].card_addr);
-
 	if (ret != 0)
 		goto exit2;
 
 	ipw->is_v2_card = cfg->mem.win[0].len == 0x100;
 
-	ipw->common_memory = ioremap(ipw->request_common_memory.Base,
-				ipw->request_common_memory.Size);
-	request_mem_region(ipw->request_common_memory.Base,
-			ipw->request_common_memory.Size,
+	ipw->attr_memory = ioremap(p_dev->resource[2]->start,
+				resource_size(p_dev->resource[2]));
+	request_mem_region(p_dev->resource[2]->start,
+			resource_size(p_dev->resource[2]),
 			IPWIRELESS_PCCARD_NAME);
 
-	ipw->request_attr_memory.Attributes =
-		WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | WIN_ENABLE;
-	ipw->request_attr_memory.Base = 0;
-	ipw->request_attr_memory.Size = 0;	/* this used to be 0x1000 */
-	ipw->request_attr_memory.AccessSpeed = 0;
-
-	ret = pcmcia_request_window(p_dev, &ipw->request_attr_memory,
-				&ipw->handle_attr_memory);
-
+	p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM |
+					WIN_ENABLE;
+	p_dev->resource[3]->end = 0; /* this used to be 0x1000 */
+	ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0);
 	if (ret != 0)
 		goto exit2;
 
-	ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory, 0);
+	ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0);
 	if (ret != 0)
 		goto exit3;
 
-	ipw->attr_memory = ioremap(ipw->request_attr_memory.Base,
-				ipw->request_attr_memory.Size);
-	request_mem_region(ipw->request_attr_memory.Base,
-			ipw->request_attr_memory.Size, IPWIRELESS_PCCARD_NAME);
+	ipw->attr_memory = ioremap(p_dev->resource[3]->start,
+				resource_size(p_dev->resource[3]));
+	request_mem_region(p_dev->resource[3]->start,
+			resource_size(p_dev->resource[3]),
+			IPWIRELESS_PCCARD_NAME);
 
 	return 0;
 
 exit3:
 exit2:
 	if (ipw->common_memory) {
-		release_mem_region(ipw->request_common_memory.Base,
-				ipw->request_common_memory.Size);
+		release_mem_region(p_dev->resource[2]->start,
+				resource_size(p_dev->resource[2]));
 		iounmap(ipw->common_memory);
 	}
 exit1:
@@ -201,13 +193,9 @@ static int config_ipwireless(struct ipw_dev *ipw)
 			(unsigned int) link->irq);
 	if (ipw->attr_memory && ipw->common_memory)
 		printk(KERN_INFO IPWIRELESS_PCCARD_NAME
-			": attr memory 0x%08lx-0x%08lx, common memory 0x%08lx-0x%08lx\n",
-			ipw->request_attr_memory.Base,
-			ipw->request_attr_memory.Base
-			+ ipw->request_attr_memory.Size - 1,
-			ipw->request_common_memory.Base,
-			ipw->request_common_memory.Base
-			+ ipw->request_common_memory.Size - 1);
+			": attr memory %pR, common memory %pR\n",
+			link->resource[3],
+			link->resource[2]);
 
 	ipw->network = ipwireless_network_create(ipw->hardware);
 	if (!ipw->network)
@@ -231,17 +219,16 @@ static int config_ipwireless(struct ipw_dev *ipw)
 	return 0;
 
 exit:
-	if (ipw->attr_memory) {
-		release_mem_region(ipw->request_attr_memory.Base,
-				ipw->request_attr_memory.Size);
-		iounmap(ipw->attr_memory);
-
-	}
 	if (ipw->common_memory) {
-		release_mem_region(ipw->request_common_memory.Base,
-				ipw->request_common_memory.Size);
+		release_mem_region(link->resource[2]->start,
+				resource_size(link->resource[2]));
 		iounmap(ipw->common_memory);
 	}
+	if (ipw->attr_memory) {
+		release_mem_region(link->resource[3]->start,
+				resource_size(link->resource[3]));
+		iounmap(ipw->attr_memory);
+	}
 	pcmcia_disable_device(link);
 	return -1;
 }
@@ -249,13 +236,13 @@ exit:
 static void release_ipwireless(struct ipw_dev *ipw)
 {
 	if (ipw->common_memory) {
-		release_mem_region(ipw->request_common_memory.Base,
-				ipw->request_common_memory.Size);
+		release_mem_region(ipw->link->resource[2]->start,
+				resource_size(ipw->link->resource[2]));
 		iounmap(ipw->common_memory);
 	}
 	if (ipw->attr_memory) {
-		release_mem_region(ipw->request_attr_memory.Base,
-				ipw->request_attr_memory.Size);
+		release_mem_region(ipw->link->resource[3]->start,
+				resource_size(ipw->link->resource[3]));
 		iounmap(ipw->attr_memory);
 	}
 	pcmcia_disable_device(ipw->link);
diff --git a/drivers/char/pcmcia/ipwireless/main.h b/drivers/char/pcmcia/ipwireless/main.h
index c207be87b597..90402195855e 100644
--- a/drivers/char/pcmcia/ipwireless/main.h
+++ b/drivers/char/pcmcia/ipwireless/main.h
@@ -45,13 +45,9 @@ struct ipw_dev {
 	struct pcmcia_device *link;
 	int is_v2_card;
 
-	window_handle_t handle_attr_memory;
 	void __iomem *attr_memory;
-	win_req_t request_attr_memory;
 
-	window_handle_t handle_common_memory;
 	void __iomem *common_memory;
-	win_req_t request_common_memory;
 
 	/* Reference to attribute memory, containing CIS data */
 	void *attribute_memory;
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index e9ca5ba7d9d2..fb3c5380aa84 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -101,7 +101,7 @@ MODULE_PARM_DESC(mem_type, "Set Memory type (0=Flash, 1=RAM, 2=ROM, default=0)")
 static caddr_t remap_window(struct map_info *map, unsigned long to)
 {
 	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
-	window_handle_t win = (window_handle_t)map->map_priv_2;
+	struct resource *win = (struct resource *) map->map_priv_2;
 	unsigned int offset;
 	int ret;
 
@@ -339,7 +339,7 @@ static void pcmciamtd_release(struct pcmcia_device *link)
 
 	DEBUG(3, "link = 0x%p", link);
 
-	if (link->win) {
+	if (link->resource[2]->end) {
 		if(dev->win_base) {
 			iounmap(dev->win_base);
 			dev->win_base = NULL;
@@ -491,9 +491,8 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 {
 	struct pcmciamtd_dev *dev = link->priv;
 	struct mtd_info *mtd = NULL;
-	win_req_t req;
 	int ret;
-	int i;
+	int i, j = 0;
 	static char *probes[] = { "jedec_probe", "cfi_probe" };
 	int new_name = 0;
 
@@ -520,28 +519,34 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 	 * smaller windows until we succeed
 	 */
 
-	req.Attributes =  WIN_MEMORY_TYPE_CM | WIN_ENABLE;
-	req.Attributes |= (dev->pcmcia_map.bankwidth == 1) ? WIN_DATA_WIDTH_8 : WIN_DATA_WIDTH_16;
-	req.Base = 0;
-	req.AccessSpeed = mem_speed;
-	link->win = (window_handle_t)link;
-	req.Size = (force_size) ? force_size << 20 : MAX_PCMCIA_ADDR;
+	link->resource[2]->flags |=  WIN_MEMORY_TYPE_CM | WIN_ENABLE;
+	link->resource[2]->flags |= (dev->pcmcia_map.bankwidth == 1) ?
+					WIN_DATA_WIDTH_8 : WIN_DATA_WIDTH_16;
+	link->resource[2]->start = 0;
+	link->resource[2]->end = (force_size) ? force_size << 20 :
+					MAX_PCMCIA_ADDR;
 	dev->win_size = 0;
 
 	do {
 		int ret;
-		DEBUG(2, "requesting window with size = %dKiB memspeed = %d",
-		      req.Size >> 10, req.AccessSpeed);
-		ret = pcmcia_request_window(link, &req, &link->win);
+		DEBUG(2, "requesting window with size = %luKiB memspeed = %d",
+			(unsigned long) resource_size(link->resource[2]) >> 10,
+			mem_speed);
+		ret = pcmcia_request_window(link, link->resource[2], mem_speed);
 		DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size);
 		if(ret) {
-			req.Size >>= 1;
+			j++;
+			link->resource[2]->start = 0;
+			link->resource[2]->end = (force_size) ?
+					force_size << 20 : MAX_PCMCIA_ADDR;
+			link->resource[2]->end >>= j;
 		} else {
-			DEBUG(2, "Got window of size %dKiB", req.Size >> 10);
-			dev->win_size = req.Size;
+			DEBUG(2, "Got window of size %luKiB", (unsigned long)
+				resource_size(link->resource[2]) >> 10);
+			dev->win_size = resource_size(link->resource[2]);
 			break;
 		}
-	} while(req.Size >= 0x1000);
+	} while (link->resource[2]->end >= 0x1000);
 
 	DEBUG(2, "dev->win_size = %d", dev->win_size);
 
@@ -553,20 +558,20 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 	DEBUG(1, "Allocated a window of %dKiB", dev->win_size >> 10);
 
 	/* Get write protect status */
-	DEBUG(2, "window handle = 0x%8.8lx", (unsigned long)link->win);
-	dev->win_base = ioremap(req.Base, req.Size);
+	dev->win_base = ioremap(link->resource[2]->start,
+				resource_size(link->resource[2]));
 	if(!dev->win_base) {
-		dev_err(&dev->p_dev->dev, "ioremap(%lu, %u) failed\n",
-			req.Base, req.Size);
+		dev_err(&dev->p_dev->dev, "ioremap(%pR) failed\n",
+			link->resource[2]);
 		pcmciamtd_release(link);
 		return -ENODEV;
 	}
-	DEBUG(1, "mapped window dev = %p req.base = 0x%lx base = %p size = 0x%x",
-	      dev, req.Base, dev->win_base, req.Size);
+	DEBUG(1, "mapped window dev = %p @ %pR, base = %p",
+	      dev, link->resource[2], dev->win_base);
 
 	dev->offset = 0;
 	dev->pcmcia_map.map_priv_1 = (unsigned long)dev;
-	dev->pcmcia_map.map_priv_2 = (unsigned long)link->win;
+	dev->pcmcia_map.map_priv_2 = (unsigned long)link->resource[2];
 
 	dev->vpp = (vpp) ? vpp : link->socket->socket.Vpp;
 	link->conf.Attributes = 0;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 98fffb03ecd7..dfd32842412e 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -544,20 +544,18 @@ failed:
 
 static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 {
-    win_req_t req;
     u_char __iomem *base;
     int i, j;
 
     /* Allocate a small memory window */
-    req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
-    req.Base = 0; req.Size = 0;
-    req.AccessSpeed = 0;
-    i = pcmcia_request_window(link, &req, &link->win);
+    link->resource[2]->flags |= WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
+    link->resource[2]->start = 0; link->resource[2]->end = 0;
+    i = pcmcia_request_window(link, link->resource[2], 0);
     if (i != 0)
 	return -1;
 
-    base = ioremap(req.Base, req.Size);
-    pcmcia_map_mem_page(link, link->win, 0);
+    base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
+    pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*
      *  MBH10304 CISTPL_FUNCE_LAN_NODE_ID format
@@ -582,7 +580,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
     }
 
     iounmap(base);
-    j = pcmcia_release_window(link, link->win);
+    j = pcmcia_release_window(link, link->resource[2]);
     return (i != 0x200) ? 0 : -1;
 
 } /* fmvj18x_get_hwinfo */
@@ -590,27 +588,26 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 
 static int fmvj18x_setup_mfc(struct pcmcia_device *link)
 {
-    win_req_t req;
     int i;
     struct net_device *dev = link->priv;
     unsigned int ioaddr;
     local_info_t *lp = netdev_priv(dev);
 
     /* Allocate a small memory window */
-    req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
-    req.Base = 0; req.Size = 0;
-    req.AccessSpeed = 0;
-    i = pcmcia_request_window(link, &req, &link->win);
+    link->resource[3]->flags = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
+    link->resource[3]->start = link->resource[3]->end = 0;
+    i = pcmcia_request_window(link, link->resource[3], 0);
     if (i != 0)
 	return -1;
 
-    lp->base = ioremap(req.Base, req.Size);
+    lp->base = ioremap(link->resource[3]->start,
+		       resource_size(link->resource[3]));
     if (lp->base == NULL) {
 	printk(KERN_NOTICE "fmvj18x_cs: ioremap failed\n");
 	return -1;
     }
 
-    i = pcmcia_map_mem_page(link, link->win, 0);
+    i = pcmcia_map_mem_page(link, link->resource[3], 0);
     if (i != 0) {
 	iounmap(lp->base);
 	lp->base = NULL;
@@ -638,7 +635,6 @@ static void fmvj18x_release(struct pcmcia_device *link)
     struct net_device *dev = link->priv;
     local_info_t *lp = netdev_priv(dev);
     u_char __iomem *tmp;
-    int j;
 
     dev_dbg(&link->dev, "fmvj18x_release\n");
 
@@ -646,7 +642,6 @@ static void fmvj18x_release(struct pcmcia_device *link)
 	tmp = lp->base;
 	lp->base = NULL;    /* set NULL before iounmap */
 	iounmap(tmp);
-	j = pcmcia_release_window(link, link->win);
     }
 
     pcmcia_disable_device(link);
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index b0d06a3d962f..dbdea7f5e423 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -102,9 +102,8 @@ static void ibmtr_detach(struct pcmcia_device *p_dev);
 
 typedef struct ibmtr_dev_t {
 	struct pcmcia_device	*p_dev;
-    struct net_device	*dev;
-    window_handle_t     sram_win_handle;
-    struct tok_info	*ti;
+	struct net_device	*dev;
+	struct tok_info		*ti;
 } ibmtr_dev_t;
 
 static void netdev_get_drvinfo(struct net_device *dev,
@@ -210,7 +209,6 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     ibmtr_dev_t *info = link->priv;
     struct net_device *dev = info->dev;
     struct tok_info *ti = netdev_priv(dev);
-    win_req_t req;
     int i, ret;
 
     dev_dbg(&link->dev, "ibmtr_config\n");
@@ -240,37 +238,37 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
     ti->global_int_enable=GLOBAL_INT_ENABLE+((dev->irq==9) ? 2 : dev->irq);
 
     /* Allocate the MMIO memory window */
-    req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
-    req.Attributes |= WIN_USE_WAIT;
-    req.Base = 0; 
-    req.Size = 0x2000;
-    req.AccessSpeed = 250;
-    ret = pcmcia_request_window(link, &req, &link->win);
+    link->resource[2]->flags |= WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
+    link->resource[2]->flags |= WIN_USE_WAIT;
+    link->resource[2]->start = 0;
+    link->resource[2]->end = 0x2000;
+    ret = pcmcia_request_window(link, link->resource[2], 250);
     if (ret)
 	    goto failed;
 
-    ret = pcmcia_map_mem_page(link, link->win, mmiobase);
+    ret = pcmcia_map_mem_page(link, link->resource[2], mmiobase);
     if (ret)
 	    goto failed;
-    ti->mmio = ioremap(req.Base, req.Size);
+    ti->mmio = ioremap(link->resource[2]->start,
+		    resource_size(link->resource[2]));
 
     /* Allocate the SRAM memory window */
-    req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
-    req.Attributes |= WIN_USE_WAIT;
-    req.Base = 0;
-    req.Size = sramsize * 1024;
-    req.AccessSpeed = 250;
-    ret = pcmcia_request_window(link, &req, &info->sram_win_handle);
+    link->resource[3]->flags = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
+    link->resource[3]->flags |= WIN_USE_WAIT;
+    link->resource[3]->start = 0;
+    link->resource[3]->end = sramsize * 1024;
+    ret = pcmcia_request_window(link, link->resource[3], 250);
     if (ret)
 	    goto failed;
 
-    ret = pcmcia_map_mem_page(link, info->sram_win_handle, srambase);
+    ret = pcmcia_map_mem_page(link, link->resource[3], srambase);
     if (ret)
 	    goto failed;
 
     ti->sram_base = srambase >> 12;
-    ti->sram_virt = ioremap(req.Base, req.Size);
-    ti->sram_phys = req.Base;
+    ti->sram_virt = ioremap(link->resource[3]->start,
+		    resource_size(link->resource[3]));
+    ti->sram_phys = link->resource[3]->start;
 
     ret = pcmcia_request_configuration(link, &link->conf);
     if (ret)
@@ -316,7 +314,7 @@ static void ibmtr_release(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "ibmtr_release\n");
 
-	if (link->win) {
+	if (link->resource[2]->end) {
 		struct tok_info *ti = netdev_priv(dev);
 		iounmap(ti->mmio);
 	}
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index f9b509a6b09a..aa6ee6b264cd 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -300,22 +300,22 @@ static void pcnet_detach(struct pcmcia_device *link)
 static hw_info_t *get_hwinfo(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    win_req_t req;
     u_char __iomem *base, *virt;
     int i, j;
 
     /* Allocate a small memory window */
-    req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
-    req.Base = 0; req.Size = 0;
-    req.AccessSpeed = 0;
-    i = pcmcia_request_window(link, &req, &link->win);
+    link->resource[2]->flags |= WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
+    link->resource[2]->start = 0; link->resource[2]->end = 0;
+    i = pcmcia_request_window(link, link->resource[2], 0);
     if (i != 0)
 	return NULL;
 
-    virt = ioremap(req.Base, req.Size);
+    virt = ioremap(link->resource[2]->start,
+	    resource_size(link->resource[2]));
     for (i = 0; i < NR_INFO; i++) {
-	pcmcia_map_mem_page(link, link->win, hw_info[i].offset & ~(req.Size-1));
-	base = &virt[hw_info[i].offset & (req.Size-1)];
+	pcmcia_map_mem_page(link, link->resource[2],
+		hw_info[i].offset & ~(resource_size(link->resource[2])-1));
+	base = &virt[hw_info[i].offset & (resource_size(link->resource[2])-1)];
 	if ((readb(base+0) == hw_info[i].a0) &&
 	    (readb(base+2) == hw_info[i].a1) &&
 	    (readb(base+4) == hw_info[i].a2)) {
@@ -326,7 +326,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
     }
 
     iounmap(virt);
-    j = pcmcia_release_window(link, link->win);
+    j = pcmcia_release_window(link, link->resource[2]);
     return (i < NR_INFO) ? hw_info+i : NULL;
 } /* get_hwinfo */
 
@@ -1486,7 +1486,6 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
 {
     struct net_device *dev = link->priv;
     pcnet_dev_t *info = PRIV(dev);
-    win_req_t req;
     int i, window_size, offset, ret;
 
     window_size = (stop_pg - start_pg) << 8;
@@ -1497,22 +1496,22 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     window_size = roundup_pow_of_two(window_size);
 
     /* Allocate a memory window */
-    req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
-    req.Attributes |= WIN_USE_WAIT;
-    req.Base = 0; req.Size = window_size;
-    req.AccessSpeed = mem_speed;
-    ret = pcmcia_request_window(link, &req, &link->win);
+    link->resource[3]->flags |= WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
+    link->resource[3]->flags |= WIN_USE_WAIT;
+    link->resource[3]->start = 0; link->resource[3]->end = window_size;
+    ret = pcmcia_request_window(link, link->resource[3], mem_speed);
     if (ret)
 	    goto failed;
 
     offset = (start_pg << 8) + cm_offset;
     offset -= offset % window_size;
-    ret = pcmcia_map_mem_page(link, link->win, offset);
+    ret = pcmcia_map_mem_page(link, link->resource[3], offset);
     if (ret)
 	    goto failed;
 
     /* Try scribbling on the buffer */
-    info->base = ioremap(req.Base, window_size);
+    info->base = ioremap(link->resource[3]->start,
+			resource_size(link->resource[3]));
     for (i = 0; i < (TX_PAGES<<8); i += 2)
 	__raw_writew((i>>1), info->base+offset+i);
     udelay(100);
@@ -1521,19 +1520,20 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
     pcnet_reset_8390(dev);
     if (i != (TX_PAGES<<8)) {
 	iounmap(info->base);
-	pcmcia_release_window(link, link->win);
-	info->base = NULL; link->win = 0;
+	pcmcia_release_window(link, link->resource[3]);
+	info->base = NULL;
 	goto failed;
     }
 
     ei_status.mem = info->base + offset;
-    ei_status.priv = req.Size;
+    ei_status.priv = resource_size(link->resource[3]);
     dev->mem_start = (u_long)ei_status.mem;
-    dev->mem_end = dev->mem_start + req.Size;
+    dev->mem_end = dev->mem_start + resource_size(link->resource[3]);
 
     ei_status.tx_start_page = start_pg;
     ei_status.rx_start_page = start_pg + TX_PAGES;
-    ei_status.stop_page = start_pg + ((req.Size - offset) >> 8);
+    ei_status.stop_page = start_pg + (
+	    (resource_size(link->resource[3]) - offset) >> 8);
 
     /* set up block i/o functions */
     ei_status.get_8390_hdr = &shmem_get_8390_hdr;
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 377367d03b41..acc680739c89 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -442,7 +442,6 @@ static int mhz_mfc_config(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     struct smc_private *smc = netdev_priv(dev);
-    win_req_t req;
     unsigned int offset;
     int i;
 
@@ -459,16 +458,16 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     dev->base_addr = link->resource[0]->start;
 
     /* Allocate a memory window, for accessing the ISR */
-    req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
-    req.Base = req.Size = 0;
-    req.AccessSpeed = 0;
-    i = pcmcia_request_window(link, &req, &link->win);
+    link->resource[2]->flags = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
+    link->resource[2]->start = link->resource[2]->end = 0;
+    i = pcmcia_request_window(link, link->resource[2], 0);
     if (i != 0)
 	    return -ENODEV;
 
-    smc->base = ioremap(req.Base, req.Size);
+    smc->base = ioremap(link->resource[2]->start,
+		    resource_size(link->resource[2]));
     offset = (smc->manfid == MANFID_MOTOROLA) ? link->conf.ConfigBase : 0;
-    i = pcmcia_map_mem_page(link, link->win, offset);
+    i = pcmcia_map_mem_page(link, link->resource[2], offset);
     if ((i == 0) &&
 	(smc->manfid == MANFID_MEGAHERTZ) &&
 	(smc->cardid == PRODID_MEGAHERTZ_EM3288))
@@ -999,7 +998,7 @@ config_failed:
 static void smc91c92_release(struct pcmcia_device *link)
 {
 	dev_dbg(&link->dev, "smc91c92_release\n");
-	if (link->win) {
+	if (link->resource[2]->end) {
 		struct net_device *dev = link->priv;
 		struct smc_private *smc = netdev_priv(dev);
 		iounmap(smc->base);
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index f5819526b5ee..4308bda0e96d 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -869,8 +869,6 @@ xirc2ps_config(struct pcmcia_device * link)
 	goto config_error;
 
     if (local->dingo) {
-	win_req_t req;
-
 	/* Reset the modem's BAR to the correct value
 	 * This is necessary because in the RequestConfiguration call,
 	 * the base address of the ethernet port (BasePort1) is written
@@ -890,14 +888,14 @@ xirc2ps_config(struct pcmcia_device * link)
 	 * is at 0x0800. So we allocate a window into the attribute
 	 * memory and write direct to the CIS registers
 	 */
-	req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
-	req.Base = req.Size = 0;
-	req.AccessSpeed = 0;
-	if ((err = pcmcia_request_window(link, &req, &link->win)))
+	link->resource[2]->flags = WIN_DATA_WIDTH_8 | WIN_MEMORY_TYPE_AM |
+					WIN_ENABLE;
+	link->resource[2]->start = link->resource[2]->end = 0;
+	if ((err = pcmcia_request_window(link, link->resource[2], 0)))
 	    goto config_error;
 
-	local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800;
-	if ((err = pcmcia_map_mem_page(link, link->win, 0)))
+	local->dingo_ccr = ioremap(link->resource[2]->start, 0x1000) + 0x0800;
+	if ((err = pcmcia_map_mem_page(link, link->resource[2], 0)))
 	    goto config_error;
 
 	/* Setup the CCRs; there are no infos in the CIS about the Ethernet
@@ -988,7 +986,7 @@ xirc2ps_release(struct pcmcia_device *link)
 {
 	dev_dbg(&link->dev, "release\n");
 
-	if (link->win) {
+	if (link->resource[2]->end) {
 		struct net_device *dev = link->priv;
 		local_info_t *local = netdev_priv(dev);
 		if (local->dingo)
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index dfbc41d431ff..618d9b5c1054 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -63,7 +63,6 @@ static int b43_pcmcia_resume(struct pcmcia_device *dev)
 static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 {
 	struct ssb_bus *ssb;
-	win_req_t win;
 	int err = -ENOMEM;
 	int res = 0;
 
@@ -76,16 +75,15 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	dev->conf.Attributes = CONF_ENABLE_IRQ;
 	dev->conf.IntType = INT_MEMORY_AND_IO;
 
-	win.Attributes =  WIN_ENABLE | WIN_DATA_WIDTH_16 |
+	dev->resource[2]->flags |=  WIN_ENABLE | WIN_DATA_WIDTH_16 |
 			 WIN_USE_WAIT;
-	win.Base = 0;
-	win.Size = SSB_CORE_SIZE;
-	win.AccessSpeed = 250;
-	res = pcmcia_request_window(dev, &win, &dev->win);
+	dev->resource[2]->start = 0;
+	dev->resource[2]->end = SSB_CORE_SIZE;
+	res = pcmcia_request_window(dev, dev->resource[2], 250);
 	if (res != 0)
 		goto err_kfree_ssb;
 
-	res = pcmcia_map_mem_page(dev, dev->win, 0);
+	res = pcmcia_map_mem_page(dev, dev->resource[2], 0);
 	if (res != 0)
 		goto err_disable;
 
@@ -96,7 +94,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	if (res != 0)
 		goto err_disable;
 
-	err = ssb_bus_pcmciabus_register(ssb, dev, win.Base);
+	err = ssb_bus_pcmciabus_register(ssb, dev, dev->resource[2]->start);
 	if (err)
 		goto err_disable;
 	dev->priv = ssb;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 88560d0ae50a..ab34cb8c56c7 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -391,7 +391,6 @@ static int ray_config(struct pcmcia_device *link)
 {
 	int ret = 0;
 	int i;
-	win_req_t req;
 	struct net_device *dev = (struct net_device *)link->priv;
 	ray_dev_t *local = netdev_priv(dev);
 
@@ -420,46 +419,45 @@ static int ray_config(struct pcmcia_device *link)
 		goto failed;
 
 /*** Set up 32k window for shared memory (transmit and control) ************/
-	req.Attributes =
-	    WIN_DATA_WIDTH_8 | WIN_MEMORY_TYPE_CM | WIN_ENABLE | WIN_USE_WAIT;
-	req.Base = 0;
-	req.Size = 0x8000;
-	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(link, &req, &link->win);
+	link->resource[2]->flags |= WIN_DATA_WIDTH_8 | WIN_MEMORY_TYPE_CM | WIN_ENABLE | WIN_USE_WAIT;
+	link->resource[2]->start = 0;
+	link->resource[2]->end = 0x8000;
+	ret = pcmcia_request_window(link, link->resource[2], ray_mem_speed);
 	if (ret)
 		goto failed;
-	ret = pcmcia_map_mem_page(link, link->win, 0);
+	ret = pcmcia_map_mem_page(link, link->resource[2], 0);
 	if (ret)
 		goto failed;
-	local->sram = ioremap(req.Base, req.Size);
+	local->sram = ioremap(link->resource[2]->start,
+			resource_size(link->resource[2]));
 
 /*** Set up 16k window for shared memory (receive buffer) ***************/
-	req.Attributes =
+	link->resource[3]->flags |=
 	    WIN_DATA_WIDTH_8 | WIN_MEMORY_TYPE_CM | WIN_ENABLE | WIN_USE_WAIT;
-	req.Base = 0;
-	req.Size = 0x4000;
-	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(link, &req, &local->rmem_handle);
+	link->resource[3]->start = 0;
+	link->resource[3]->end = 0x4000;
+	ret = pcmcia_request_window(link, link->resource[3], ray_mem_speed);
 	if (ret)
 		goto failed;
-	ret = pcmcia_map_mem_page(link, local->rmem_handle, 0x8000);
+	ret = pcmcia_map_mem_page(link, link->resource[3], 0x8000);
 	if (ret)
 		goto failed;
-	local->rmem = ioremap(req.Base, req.Size);
+	local->rmem = ioremap(link->resource[3]->start,
+			resource_size(link->resource[3]));
 
 /*** Set up window for attribute memory ***********************************/
-	req.Attributes =
+	link->resource[4]->flags |=
 	    WIN_DATA_WIDTH_8 | WIN_MEMORY_TYPE_AM | WIN_ENABLE | WIN_USE_WAIT;
-	req.Base = 0;
-	req.Size = 0x1000;
-	req.AccessSpeed = ray_mem_speed;
-	ret = pcmcia_request_window(link, &req, &local->amem_handle);
+	link->resource[4]->start = 0;
+	link->resource[4]->end = 0x1000;
+	ret = pcmcia_request_window(link, link->resource[4], ray_mem_speed);
 	if (ret)
 		goto failed;
-	ret = pcmcia_map_mem_page(link, local->amem_handle, 0);
+	ret = pcmcia_map_mem_page(link, link->resource[4], 0);
 	if (ret)
 		goto failed;
-	local->amem = ioremap(req.Base, req.Size);
+	local->amem = ioremap(link->resource[4]->start,
+			resource_size(link->resource[4]));
 
 	dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram);
 	dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem);
diff --git a/drivers/net/wireless/ray_cs.h b/drivers/net/wireless/ray_cs.h
index 9f01ddb19748..e79848fbcca1 100644
--- a/drivers/net/wireless/ray_cs.h
+++ b/drivers/net/wireless/ray_cs.h
@@ -25,8 +25,6 @@ struct beacon_rx {
 typedef struct ray_dev_t {
     int card_status;
     int authentication_state;
-    window_handle_t amem_handle;   /* handle to window for attribute memory  */
-    window_handle_t rmem_handle;   /* handle to window for rx buffer on card */
     void __iomem *sram;            /* pointer to beginning of shared RAM     */
     void __iomem *amem;            /* pointer to attribute mem window        */
     void __iomem *rmem;            /* pointer to receive buffer window       */
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 9ba4dade69a4..bf16a1cf7399 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -204,11 +204,10 @@ int pcmcia_write_config_byte(struct pcmcia_device *p_dev, off_t where, u8 val)
 EXPORT_SYMBOL(pcmcia_write_config_byte);
 
 
-int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh,
+int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res,
 			unsigned int offset)
 {
 	struct pcmcia_socket *s = p_dev->socket;
-	struct resource *res = wh;
 	unsigned int w;
 	int ret;
 
@@ -386,7 +385,12 @@ out:
 	return ret;
 } /* pcmcia_release_io */
 
-
+/**
+ * pcmcia_release_window() - release reserved iomem for PCMCIA devices
+ *
+ * pcmcia_release_window() releases struct resource *res which was
+ * previously reserved by calling pcmcia_request_window().
+ */
 int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res)
 {
 	struct pcmcia_socket *s = p_dev->socket;
@@ -420,6 +424,8 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res)
 		kfree(win->res);
 		win->res = NULL;
 	}
+	res->start = res->end = 0;
+	res->flags = IORESOURCE_MEM;
 	p_dev->_win &= ~CLIENT_WIN_REQ(w);
 	mutex_unlock(&s->ops_mutex);
 
@@ -795,17 +801,21 @@ int pcmcia_setup_irq(struct pcmcia_device *p_dev)
 }
 
 
-/** pcmcia_request_window
+/**
+ * pcmcia_request_window() - attempt to reserve iomem for PCMCIA devices
  *
- * Request_window() establishes a mapping between card memory space
- * and system memory space.
+ * pcmcia_request_window() attepts to reserve an iomem ranges specified in
+ * struct resource *res pointing to one of the entries in
+ * struct pcmcia_device *p_dev->resource[2..5]. The "start" value is the
+ * requested start of the IO mem resource; "end" reflects the size
+ * requested.
  */
-int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh)
+int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res,
+			unsigned int speed)
 {
 	struct pcmcia_socket *s = p_dev->socket;
 	pccard_mem_map *win;
 	u_long align;
-	struct resource *res;
 	int w;
 
 	if (!(s->state & SOCKET_PRESENT)) {
@@ -814,19 +824,19 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 	}
 
 	/* Window size defaults to smallest available */
-	if (req->Size == 0)
-		req->Size = s->map_size;
-	align = (s->features & SS_CAP_MEM_ALIGN) ? req->Size : s->map_size;
-	if (req->Size & (s->map_size-1)) {
+	if (res->end == 0)
+		res->end = s->map_size;
+	align = (s->features & SS_CAP_MEM_ALIGN) ? res->end : s->map_size;
+	if (res->end & (s->map_size-1)) {
 		dev_dbg(&p_dev->dev, "invalid map size\n");
 		return -EINVAL;
 	}
-	if ((req->Base && (s->features & SS_CAP_STATIC_MAP)) ||
-	    (req->Base & (align-1))) {
+	if ((res->start && (s->features & SS_CAP_STATIC_MAP)) ||
+	    (res->start & (align-1))) {
 		dev_dbg(&p_dev->dev, "invalid base address\n");
 		return -EINVAL;
 	}
-	if (req->Base)
+	if (res->start)
 		align = 0;
 
 	/* Allocate system memory window */
@@ -843,7 +853,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 	win = &s->win[w];
 
 	if (!(s->features & SS_CAP_STATIC_MAP)) {
-		win->res = pcmcia_find_mem_region(req->Base, req->Size, align,
+		win->res = pcmcia_find_mem_region(res->start, res->end, align,
 						0, s);
 		if (!win->res) {
 			dev_dbg(&p_dev->dev, "allocating mem region failed\n");
@@ -855,8 +865,8 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 
 	/* Configure the socket controller */
 	win->map = w+1;
-	win->flags = req->Attributes;
-	win->speed = req->AccessSpeed;
+	win->flags = res->flags & WIN_FLAGS_MAP;
+	win->speed = speed;
 	win->card_start = 0;
 
 	if (s->ops->set_mem_map(s, win) != 0) {
@@ -868,17 +878,14 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 
 	/* Return window handle */
 	if (s->features & SS_CAP_STATIC_MAP)
-		req->Base = win->static_start;
+		res->start = win->static_start;
 	else
-		req->Base = win->res->start;
+		res->start = win->res->start;
 
 	/* convert to new-style resources */
-	res = p_dev->resource[w + MAX_IO_WIN];
-	res->start = req->Base;
-	res->end = req->Base + req->Size - 1;
-	res->flags &= ~IORESOURCE_BITS;
-	res->flags |= (req->Attributes & WIN_FLAGS_MAP) | (win->map << 2);
-	res->flags |= IORESOURCE_MEM;
+	res->end += res->start - 1;
+	res->flags &= ~WIN_FLAGS_REQ;
+	res->flags |= (win->map << 2) | IORESOURCE_MEM;
 	res->parent = win->res;
 	if (win->res)
 		request_resource(&iomem_resource, res);
@@ -886,7 +893,6 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha
 	dev_dbg(&p_dev->dev, "request_window results in %pR\n", res);
 
 	mutex_unlock(&s->ops_mutex);
-	*wh = res;
 
 	return 0;
 } /* pcmcia_request_window */
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index dd9b40306f3d..e872e0684273 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1596,18 +1596,13 @@ static void nsp_cs_detach(struct pcmcia_device *link)
     ethernet device available to the system.
 ======================================================================*/
 
-struct nsp_cs_configdata {
-	nsp_hw_data		*data;
-	win_req_t		req;
-};
-
 static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
 			       unsigned int vcc,
 			       void *priv_data)
 {
-	struct nsp_cs_configdata *cfg_mem = priv_data;
+	nsp_hw_data		*data = priv_data;
 
 	if (cfg->index == 0)
 		return -ENODEV;
@@ -1663,21 +1658,24 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
 			cistpl_mem_t	*mem =
 				(cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
-			cfg_mem->req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM;
-			cfg_mem->req.Attributes |= WIN_ENABLE;
-			cfg_mem->req.Base = mem->win[0].host_addr;
-			cfg_mem->req.Size = mem->win[0].len;
-			if (cfg_mem->req.Size < 0x1000)
-				cfg_mem->req.Size = 0x1000;
-			cfg_mem->req.AccessSpeed = 0;
-			if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0)
+			p_dev->resource[2]->flags |= (WIN_DATA_WIDTH_16 |
+						WIN_MEMORY_TYPE_CM |
+						WIN_ENABLE);
+			p_dev->resource[2]->start = mem->win[0].host_addr;
+			p_dev->resource[2]->end = mem->win[0].len;
+			if (p_dev->resource[2]->end < 0x1000)
+				p_dev->resource[2]->end = 0x1000;
+			if (pcmcia_request_window(p_dev, p_dev->resource[2],
+							0) != 0)
 				goto next_entry;
-			if (pcmcia_map_mem_page(p_dev, p_dev->win,
+			if (pcmcia_map_mem_page(p_dev, p_dev->resource[2],
 					mem->win[0].card_addr) != 0)
 				goto next_entry;
 
-			cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size);
-			cfg_mem->data->MmioLength  = cfg_mem->req.Size;
+			data->MmioAddress = (unsigned long)
+				ioremap_nocache(p_dev->resource[2]->start,
+					resource_size(p_dev->resource[2]));
+			data->MmioLength  = resource_size(p_dev->resource[2]);
 		}
 		/* If we got this far, we're cool! */
 		return 0;
@@ -1693,18 +1691,12 @@ static int nsp_cs_config(struct pcmcia_device *link)
 {
 	int		  ret;
 	scsi_info_t	 *info	 = link->priv;
-	struct nsp_cs_configdata *cfg_mem;
 	struct Scsi_Host *host;
 	nsp_hw_data      *data = &nsp_data_base;
 
 	nsp_dbg(NSP_DEBUG_INIT, "in");
 
-	cfg_mem = kzalloc(sizeof(*cfg_mem), GFP_KERNEL);
-	if (!cfg_mem)
-		return -ENOMEM;
-	cfg_mem->data = data;
-
-	ret = pcmcia_loop_config(link, nsp_cs_config_check, cfg_mem);
+	ret = pcmcia_loop_config(link, nsp_cs_config_check, data);
 	if (ret)
 		goto cs_failed;
 
@@ -1767,18 +1759,15 @@ static int nsp_cs_config(struct pcmcia_device *link)
 		printk(", io %pR", link->resource[0]);
 	if (link->resource[1])
 		printk(" & %pR", link->resource[1]);
-	if (link->win)
-		printk(", mem 0x%06lx-0x%06lx", cfg_mem->req.Base,
-		       cfg_mem->req.Base+cfg_mem->req.Size-1);
+	if (link->resource[2])
+		printk(", mem %pR", link->resource[2]);
 	printk("\n");
 
-	kfree(cfg_mem);
 	return 0;
 
  cs_failed:
 	nsp_dbg(NSP_DEBUG_INIT, "config fail");
 	nsp_cs_release(link);
-	kfree(cfg_mem);
 
 	return -ENODEV;
 } /* nsp_cs_config */
@@ -1807,7 +1796,7 @@ static void nsp_cs_release(struct pcmcia_device *link)
 		scsi_remove_host(info->host);
 	}
 
-	if (link->win) {
+	if (resource_size(link->resource[2])) {
 		if (data != NULL) {
 			iounmap((void *)(data->MmioAddress));
 		}
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 68d8bde7e8d6..63cb9bbe390e 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -68,28 +68,4 @@ typedef struct config_req_t {
 #define PRESENT_IOBASE_3	0x100
 #define PRESENT_IOSIZE		0x200
 
-/* For RequestWindow */
-typedef struct win_req_t {
-    u_int	Attributes;
-    u_long	Base;
-    u_int	Size;
-    u_int	AccessSpeed;
-} win_req_t;
-
-/* Attributes for RequestWindow */
-#define WIN_MEMORY_TYPE_CM	0x00 /* default */
-#define WIN_MEMORY_TYPE_AM	0x20 /* MAP_ATTRIB */
-#define WIN_DATA_WIDTH_8	0x00 /* default */
-#define WIN_DATA_WIDTH_16	0x02 /* MAP_16BIT */
-#define WIN_ENABLE		0x01 /* MAP_ACTIVE */
-#define WIN_USE_WAIT		0x40 /* MAP_USE_WAIT */
-
-#define WIN_FLAGS_MAP		0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE |
-					MAP_USE_WAIT */
-#define WIN_FLAGS_REQ		0x1c /* mapping to socket->win[i]:
-					0x04 -> 0
-					0x08 -> 1
-					0x0c -> 2
-					0x10 -> 3 */
-
 #endif /* _LINUX_CS_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 70c58ed2278c..6f7cb38d8850 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -36,8 +36,6 @@ struct pcmcia_device;
 struct config_t;
 struct net_device;
 
-typedef struct resource *window_handle_t;
-
 /* dynamic device IDs for PCMCIA device drivers. See
  * Documentation/pcmcia/driver.txt for details.
 */
@@ -92,7 +90,6 @@ struct pcmcia_device {
 
 	/* deprecated, will be cleaned up soon */
 	config_req_t		conf;
-	window_handle_t		win;
 
 	/* device setup */
 	unsigned int		irq;
@@ -209,10 +206,10 @@ int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
 int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 				 config_req_t *req);
 
-int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req,
-			  window_handle_t *wh);
-int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win);
-int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win,
+int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res,
+			unsigned int speed);
+int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res);
+int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res,
 			unsigned int offset);
 
 int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod);
@@ -234,6 +231,23 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags)
 	return IO_DATA_PATH_WIDTH_AUTO;
 }
 
+/* IO memory */
+#define WIN_MEMORY_TYPE_CM	0x00 /* default */
+#define WIN_MEMORY_TYPE_AM	0x20 /* MAP_ATTRIB */
+#define WIN_DATA_WIDTH_8	0x00 /* default */
+#define WIN_DATA_WIDTH_16	0x02 /* MAP_16BIT */
+#define WIN_ENABLE		0x01 /* MAP_ACTIVE */
+#define WIN_USE_WAIT		0x40 /* MAP_USE_WAIT */
+
+#define WIN_FLAGS_MAP		0x63 /* MAP_ATTRIB | MAP_16BIT | MAP_ACTIVE |
+					MAP_USE_WAIT */
+#define WIN_FLAGS_REQ		0x1c /* mapping to socket->win[i]:
+					0x04 -> 0
+					0x08 -> 1
+					0x0c -> 2
+					0x10 -> 3 */
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_DS_H */
-- 
cgit v1.2.3


From fb49fa533f9d211994c33efb752ffa5b30033729 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 14:06:42 +0200
Subject: pcmcia: split up modify_configuration() into two fixup functions

pcmcia_modify_configuration() was only used by two drivers to fix up
one issue each: setting the Vpp to a different value, and reducing
the IO width to 8 bit. Introduce two explicitly named functions
handling these things, and remove one further typedef.

CC: netdev@vger.kernel.org
CC: linux-mtd@lists.infradead.org
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/mtd/maps/pcmciamtd.c     |   8 +--
 drivers/net/pcmcia/smc91c92_cs.c |   5 +-
 drivers/pcmcia/pcmcia_resource.c | 128 +++++++++++++++++++--------------------
 include/pcmcia/cs.h              |  13 ----
 include/pcmcia/ds.h              |   4 +-
 5 files changed, 68 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index fb3c5380aa84..31ce404baa3c 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -316,15 +316,9 @@ static void pcmciamtd_set_vpp(struct map_info *map, int on)
 {
 	struct pcmciamtd_dev *dev = (struct pcmciamtd_dev *)map->map_priv_1;
 	struct pcmcia_device *link = dev->p_dev;
-	modconf_t mod;
-	int ret;
-
-	mod.Attributes = CONF_VPP1_CHANGE_VALID | CONF_VPP2_CHANGE_VALID;
-	mod.Vcc = 0;
-	mod.Vpp1 = mod.Vpp2 = on ? dev->vpp : 0;
 
 	DEBUG(2, "dev = %p on = %d vpp = %d\n", dev, on, dev->vpp);
-	ret = pcmcia_modify_configuration(link, &mod);
+	pcmcia_fixup_vpp(link, on ? dev->vpp : 0);
 }
 
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index acc680739c89..395e586d7c0a 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -816,13 +816,10 @@ static int check_sig(struct pcmcia_device *link)
     }
 
     if (width) {
-	    modconf_t mod = {
-		    .Attributes = CONF_IO_CHANGE_WIDTH,
-	    };
 	    printk(KERN_INFO "smc91c92_cs: using 8-bit IO window.\n");
 
 	    smc91c92_suspend(link);
-	    pcmcia_modify_configuration(link, &mod);
+	    pcmcia_fixup_iowidth(link);
 	    smc91c92_resume(link);
 	    return check_sig(link);
     }
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index bf16a1cf7399..14b1a951e1b6 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -226,92 +226,90 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res,
 EXPORT_SYMBOL(pcmcia_map_mem_page);
 
 
-/** pcmcia_modify_configuration
+/**
+ * pcmcia_fixup_iowidth() - reduce io width to 8bit
  *
- * Modify a locked socket configuration
+ * pcmcia_fixup_iowidth() allows a PCMCIA device driver to reduce the
+ * IO width to 8bit after having called pcmcia_request_configuration()
+ * previously.
  */
-int pcmcia_modify_configuration(struct pcmcia_device *p_dev,
-				modconf_t *mod)
+int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev)
 {
-	struct pcmcia_socket *s;
-	config_t *c;
-	int ret;
-
-	s = p_dev->socket;
+	struct pcmcia_socket *s = p_dev->socket;
+	pccard_io_map io_off = { 0, 0, 0, 0, 1 };
+	pccard_io_map io_on;
+	int i, ret = 0;
 
 	mutex_lock(&s->ops_mutex);
-	c = p_dev->function_config;
 
-	if (!(s->state & SOCKET_PRESENT)) {
-		dev_dbg(&p_dev->dev, "No card present\n");
-		ret = -ENODEV;
-		goto unlock;
-	}
-	if (!(c->state & CONFIG_LOCKED)) {
-		dev_dbg(&p_dev->dev, "Configuration isnt't locked\n");
+	dev_dbg(&p_dev->dev, "fixup iowidth to 8bit\n");
+
+	if (!(s->state & SOCKET_PRESENT) ||
+		!(p_dev->function_config->state & CONFIG_LOCKED)) {
+		dev_dbg(&p_dev->dev, "No card? Config not locked?\n");
 		ret = -EACCES;
 		goto unlock;
 	}
 
-	if (mod->Attributes & (CONF_IRQ_CHANGE_VALID | CONF_VCC_CHANGE_VALID)) {
-		dev_dbg(&p_dev->dev,
-			"changing Vcc or IRQ is not allowed at this time\n");
-		ret = -EINVAL;
-		goto unlock;
-	}
+	io_on.speed = io_speed;
+	for (i = 0; i < MAX_IO_WIN; i++) {
+		if (!s->io[i].res)
+			continue;
+		io_off.map = i;
+		io_on.map = i;
 
-	/* We only allow changing Vpp1 and Vpp2 to the same value */
-	if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) &&
-	    (mod->Attributes & CONF_VPP2_CHANGE_VALID)) {
-		if (mod->Vpp1 != mod->Vpp2) {
-			dev_dbg(&p_dev->dev,
-				"Vpp1 and Vpp2 must be the same\n");
-			ret = -EINVAL;
-			goto unlock;
-		}
-		s->socket.Vpp = mod->Vpp1;
-		if (s->ops->set_socket(s, &s->socket)) {
-			dev_printk(KERN_WARNING, &p_dev->dev,
-				   "Unable to set VPP\n");
-			ret = -EIO;
-			goto unlock;
-		}
-	} else if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) ||
-		   (mod->Attributes & CONF_VPP2_CHANGE_VALID)) {
-		dev_dbg(&p_dev->dev,
-			"changing Vcc is not allowed at this time\n");
-		ret = -EINVAL;
-		goto unlock;
+		io_on.flags = MAP_ACTIVE | IO_DATA_PATH_WIDTH_8;
+		io_on.start = s->io[i].res->start;
+		io_on.stop = s->io[i].res->end;
+
+		s->ops->set_io_map(s, &io_off);
+		mdelay(40);
+		s->ops->set_io_map(s, &io_on);
 	}
+unlock:
+	mutex_unlock(&s->ops_mutex);
 
-	if (mod->Attributes & CONF_IO_CHANGE_WIDTH) {
-		pccard_io_map io_off = { 0, 0, 0, 0, 1 };
-		pccard_io_map io_on;
-		int i;
+	return ret;
+}
+EXPORT_SYMBOL(pcmcia_fixup_iowidth);
 
-		io_on.speed = io_speed;
-		for (i = 0; i < MAX_IO_WIN; i++) {
-			if (!s->io[i].res)
-				continue;
-			io_off.map = i;
-			io_on.map = i;
 
-			io_on.flags = MAP_ACTIVE | IO_DATA_PATH_WIDTH_8;
-			io_on.start = s->io[i].res->start;
-			io_on.stop = s->io[i].res->end;
+/**
+ * pcmcia_fixup_vpp() - set Vpp to a new voltage level
+ *
+ * pcmcia_fixup_vpp() allows a PCMCIA device driver to set Vpp to
+ * a new voltage level between calls to pcmcia_request_configuration()
+ * and pcmcia_disable_device().
+ */
+int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp)
+{
+	struct pcmcia_socket *s = p_dev->socket;
+	int ret = 0;
 
-			s->ops->set_io_map(s, &io_off);
-			mdelay(40);
-			s->ops->set_io_map(s, &io_on);
-		}
+	mutex_lock(&s->ops_mutex);
+
+	dev_dbg(&p_dev->dev, "fixup Vpp to %d\n", new_vpp);
+
+	if (!(s->state & SOCKET_PRESENT) ||
+		!(p_dev->function_config->state & CONFIG_LOCKED)) {
+		dev_dbg(&p_dev->dev, "No card? Config not locked?\n");
+		ret = -EACCES;
+		goto unlock;
 	}
-	ret = 0;
+
+	s->socket.Vpp = new_vpp;
+	if (s->ops->set_socket(s, &s->socket)) {
+		dev_warn(&p_dev->dev, "Unable to set VPP\n");
+		ret = -EIO;
+		goto unlock;
+	}
+
 unlock:
 	mutex_unlock(&s->ops_mutex);
 
 	return ret;
-} /* modify_configuration */
-EXPORT_SYMBOL(pcmcia_modify_configuration);
+}
+EXPORT_SYMBOL(pcmcia_fixup_vpp);
 
 
 int pcmcia_release_configuration(struct pcmcia_device *p_dev)
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 63cb9bbe390e..e13d0cd3f8f7 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -19,19 +19,6 @@
 #include <linux/interrupt.h>
 #endif
 
-/* ModifyConfiguration */
-typedef struct modconf_t {
-    u_int	Attributes;
-    u_int	Vcc, Vpp1, Vpp2;
-} modconf_t;
-
-/* Attributes for ModifyConfiguration */
-#define CONF_IRQ_CHANGE_VALID	0x0100
-#define CONF_VCC_CHANGE_VALID	0x0200
-#define CONF_VPP1_CHANGE_VALID	0x0400
-#define CONF_VPP2_CHANGE_VALID	0x0800
-#define CONF_IO_CHANGE_WIDTH	0x1000
-
 /* For RequestConfiguration */
 typedef struct config_req_t {
     u_int	Attributes;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 6f7cb38d8850..8e307b93f47b 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -212,7 +212,9 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res);
 int pcmcia_map_mem_page(struct pcmcia_device *p_dev, struct resource *res,
 			unsigned int offset);
 
-int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod);
+int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp);
+int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev);
+
 void pcmcia_disable_device(struct pcmcia_device *p_dev);
 
 /* IO ports */
-- 
cgit v1.2.3


From e8405f0f617856de0ceb7d04e65b663051451544 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 15:50:55 +0200
Subject: pcmcia: move Vpp setup to struct pcmcia_device

Some drivers prefer to explicitly set Vpp. Instead of passing the
voltage inside config_req_t, store it in struct pcmcia_device.

CC: linux-ide@vger.kernel.org
CC: netdev@vger.kernel.org
CC: linux-mtd@lists.infradead.org
CC: linux-wireless@vger.kernel.org
CC: linux-serial@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: linux-scsi@vger.kernel.org
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi> (for drivers/bluetooth)
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/ata/pata_pcmcia.c                  |  4 ++--
 drivers/bluetooth/bt3c_cs.c                |  2 +-
 drivers/bluetooth/btuart_cs.c              |  2 +-
 drivers/ide/ide-cs.c                       |  6 +++---
 drivers/isdn/hisax/sedlbauer_cs.c          |  8 ++++----
 drivers/mtd/maps/pcmciamtd.c               |  4 ++--
 drivers/net/wireless/airo_cs.c             |  8 ++++----
 drivers/net/wireless/atmel_cs.c            |  4 ++--
 drivers/net/wireless/hostap/hostap_cs.c    | 10 +++++-----
 drivers/net/wireless/orinoco/orinoco_cs.c  |  4 ++--
 drivers/net/wireless/orinoco/spectrum_cs.c |  4 ++--
 drivers/pcmcia/pcmcia_resource.c           |  3 ++-
 drivers/scsi/pcmcia/nsp_cs.c               |  8 ++++----
 drivers/serial/serial_cs.c                 |  2 +-
 drivers/usb/host/sl811_cs.c                |  8 ++++----
 include/pcmcia/cs.h                        |  1 -
 include/pcmcia/ds.h                        |  1 +
 17 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index e944aa0c5517..12cdc9ff39ad 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -194,9 +194,9 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 	}
 
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		pdev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		pdev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 7ab8f29d5e0d..2c8d981c110d 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -686,7 +686,7 @@ static int bt3c_check_config(struct pcmcia_device *p_dev,
 	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
 	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		p_dev->vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
 		p_dev->resource[0]->start = cf->io.win[0].base;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 1c4f5e863b03..7ea6fa42fdef 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -615,7 +615,7 @@ static int btuart_check_config(struct pcmcia_device *p_dev,
 	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
 	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		p_dev->vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
 		p_dev->resource[0]->start = cf->io.win[0].base;
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 2a4cb9c18f01..82690e409435 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -221,9 +221,9 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 	}
 
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		pdev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		pdev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
@@ -311,7 +311,7 @@ static int ide_config(struct pcmcia_device *link)
     info->host = host;
     dev_info(&link->dev, "ide-cs: hd%c: Vpp = %d.%d\n",
 	    'a' + host->ports[0]->index * 2,
-	    link->conf.Vpp / 10, link->conf.Vpp % 10);
+	    link->vpp / 10, link->vpp % 10);
 
     kfree(stk);
     return 0;
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index a024192b672a..ad07d61dc09b 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -191,9 +191,9 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 	}
 
 	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
@@ -257,8 +257,8 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x:",
 	   link->conf.ConfigIndex);
-    if (link->conf.Vpp)
-	printk(", Vpp %d.%d", link->conf.Vpp/10, link->conf.Vpp%10);
+    if (link->vpp)
+	printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
     if (link->conf.Attributes & CONF_ENABLE_IRQ)
 	printk(", irq %d", link->irq);
     if (link->resource[0])
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 31ce404baa3c..ab94c8aebddc 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -570,9 +570,9 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 	dev->vpp = (vpp) ? vpp : link->socket->socket.Vpp;
 	link->conf.Attributes = 0;
 	if(setvpp == 2) {
-		link->conf.Vpp = dev->vpp;
+		link->vpp = dev->vpp;
 	} else {
-		link->conf.Vpp = 0;
+		link->vpp = 0;
 	}
 
 	link->conf.IntType = INT_MEMORY;
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 9a121a5b787c..488e7b45d54b 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -166,9 +166,9 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
 	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
@@ -244,8 +244,8 @@ static int airo_config(struct pcmcia_device *link)
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x: ",
 	       link->conf.ConfigIndex);
-	if (link->conf.Vpp)
-		printk(", Vpp %d.%d", link->conf.Vpp/10, link->conf.Vpp%10);
+	if (link->vpp)
+		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(" & %pR", link->resource[0]);
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 3b632161c106..479f5084bac1 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -183,9 +183,9 @@ static int atmel_config_check(struct pcmcia_device *p_dev,
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
 	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
+		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
 
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index ba54d1b04d22..3f35dd4438ca 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -508,9 +508,9 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 	}
 
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
@@ -605,9 +605,9 @@ static int prism2_config(struct pcmcia_device *link)
 	/* Finally, report what we've done */
 	printk(KERN_INFO "%s: index 0x%02x: ",
 	       dev_info, link->conf.ConfigIndex);
-	if (link->conf.Vpp)
-		printk(", Vpp %d.%d", link->conf.Vpp / 10,
-		       link->conf.Vpp % 10);
+	if (link->vpp)
+		printk(", Vpp %d.%d", link->vpp / 10,
+		       link->vpp % 10);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %d", link->irq);
 	if (link->resource[0])
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index ef46a2d88539..f7e3fa6305b4 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -181,10 +181,10 @@ static int orinoco_cs_config_check(struct pcmcia_device *p_dev,
 	}
 
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 873877e17e1b..1bbad101b559 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -243,10 +243,10 @@ static int spectrum_cs_config_check(struct pcmcia_device *p_dev,
 	}
 
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 14b1a951e1b6..817d00adfc89 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -303,6 +303,7 @@ int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp)
 		ret = -EIO;
 		goto unlock;
 	}
+	p_dev->vpp = new_vpp;
 
 unlock:
 	mutex_unlock(&s->ops_mutex);
@@ -458,7 +459,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	}
 
 	/* Do power control.  We don't allow changes in Vcc. */
-	s->socket.Vpp = req->Vpp;
+	s->socket.Vpp = p_dev->vpp;
 	if (s->ops->set_socket(s, &s->socket)) {
 		mutex_unlock(&s->ops_mutex);
 		dev_printk(KERN_WARNING, &p_dev->dev,
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index e872e0684273..50574e869418 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1624,10 +1624,10 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		}
 
 		if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM)) {
-			p_dev->conf.Vpp =
+			p_dev->vpp =
 				cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 		} else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM)) {
-			p_dev->conf.Vpp =
+			p_dev->vpp =
 				dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 		}
 
@@ -1749,8 +1749,8 @@ static int nsp_cs_config(struct pcmcia_device *link)
 	/* Finally, report what we've done */
 	printk(KERN_INFO "nsp_cs: index 0x%02x: ",
 	       link->conf.ConfigIndex);
-	if (link->conf.Vpp) {
-		printk(", Vpp %d.%d", link->conf.Vpp/10, link->conf.Vpp%10);
+	if (link->vpp) {
+		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	}
 	if (link->conf.Attributes & CONF_ENABLE_IRQ) {
 		printk(", irq %d", link->irq);
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 7d475b2a79e8..9597442a0ac1 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -440,7 +440,7 @@ static int simple_config_check(struct pcmcia_device *p_dev,
 	int *try = priv_data;
 
 	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	p_dev->io_lines = ((*try & 0x1) == 0) ?
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 0e13a00eb2ed..8e8475298baa 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -152,10 +152,10 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev,
 		}
 
 	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->conf.Vpp =
+		p_dev->vpp =
 			dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
 	/* we need an interrupt */
@@ -201,8 +201,8 @@ static int sl811_cs_config(struct pcmcia_device *link)
 
 	dev_info(&link->dev, "index 0x%02x: ",
 		link->conf.ConfigIndex);
-	if (link->conf.Vpp)
-		printk(", Vpp %d.%d", link->conf.Vpp/10, link->conf.Vpp%10);
+	if (link->vpp)
+		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	printk(", irq %d", link->irq);
 	printk(", io %pR", link->resource[0]);
 	printk("\n");
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index e13d0cd3f8f7..ccb8e6e0dd6b 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -22,7 +22,6 @@
 /* For RequestConfiguration */
 typedef struct config_req_t {
     u_int	Attributes;
-    u_int	Vpp; /* both Vpp1 and Vpp2 */
     u_int	IntType;
     u_int	ConfigBase;
     u_char	Status, Pin, Copy, ExtStatus;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 8e307b93f47b..6137fbc34abd 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -94,6 +94,7 @@ struct pcmcia_device {
 	/* device setup */
 	unsigned int		irq;
 	struct resource		*resource[PCMCIA_NUM_RESOURCES];
+	unsigned int		vpp;
 
 	unsigned int		io_lines; /* number of I/O lines */
 
-- 
cgit v1.2.3


From 1a4a046030ade0f57b8f3b476d61c7c35d894b66 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 15:54:54 +0200
Subject: pcmcia: remove Pin, Copy configuration register access

The "Pin" and "Copy" configuration registers (CISREG_SCR, CISREG_PPR)
do not seem to be utilized anywhere. If a device would request a
write to these registers, "0" would be written. Continue to do so, but
warn of unexpected behavior -- and remove the "Pin" and "Copy" entries
from config_req_t.

Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/cs_internal.h     |  2 +-
 drivers/pcmcia/pcmcia_resource.c | 14 ++++++++------
 include/pcmcia/cs.h              |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index da055dc14d98..9487340deaf0 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -36,7 +36,7 @@ typedef struct config_t {
 	unsigned int	Attributes;
 	unsigned int	IntType;
 	unsigned int	ConfigBase;
-	unsigned char	Status, Pin, Copy, Option, ExtStatus;
+	unsigned char	Status, Option, ExtStatus;
 	unsigned int	CardValues;
 
 	struct resource io[MAX_IO_WIN]; /* io ports */
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 817d00adfc89..28717eea7c57 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -489,8 +489,14 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	base = c->ConfigBase = req->ConfigBase;
 	c->CardValues = req->Present;
 	if (req->Present & PRESENT_COPY) {
-		c->Copy = req->Copy;
-		pcmcia_write_cis_mem(s, 1, (base + CISREG_SCR)>>1, 1, &c->Copy);
+		u16 tmp = 0;
+		dev_dbg(&p_dev->dev, "clearing CISREG_SCR\n");
+		pcmcia_write_cis_mem(s, 1, (base + CISREG_SCR)>>1, 1, &tmp);
+	}
+	if (req->Present & PRESENT_PIN_REPLACE) {
+		u16 tmp = 0;
+		dev_dbg(&p_dev->dev, "clearing CISREG_PRR\n");
+		pcmcia_write_cis_mem(s, 1, (base + CISREG_PRR)>>1, 1, &tmp);
 	}
 	if (req->Present & PRESENT_OPTION) {
 		if (s->functions == 1) {
@@ -511,10 +517,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 		c->Status = req->Status;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_CCSR)>>1, 1, &c->Status);
 	}
-	if (req->Present & PRESENT_PIN_REPLACE) {
-		c->Pin = req->Pin;
-		pcmcia_write_cis_mem(s, 1, (base + CISREG_PRR)>>1, 1, &c->Pin);
-	}
 	if (req->Present & PRESENT_EXT_STATUS) {
 		c->ExtStatus = req->ExtStatus;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_ESR)>>1, 1, &c->ExtStatus);
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index ccb8e6e0dd6b..e656abee1741 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -24,7 +24,7 @@ typedef struct config_req_t {
     u_int	Attributes;
     u_int	IntType;
     u_int	ConfigBase;
-    u_char	Status, Pin, Copy, ExtStatus;
+    u_char	Status, ExtStatus;
     u_char	ConfigIndex;
     u_int	Present;
 } config_req_t;
-- 
cgit v1.2.3


From fc301101034c06bf56a7f71bf682c48909e401a4 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 16:19:39 +0200
Subject: pcmcia: simplify Status, ExtStatus register access

The Status (CISREG_CCSR) and ExtStatus (CISREG_ESR) registers were
only accessed to enable audio output for some drivers and IRQ for
serial_cs.c. The former also required setting config_req_t.Attributes
to CONF_ENABLE_SPKR; the latter can be simplified to setting this
field to CONF_ENABLE_ESR.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-serial@vger.kernel.org
CC: linux-scsi@vger.kernel.org
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/isdn/hisax/sedlbauer_cs.c             |  4 +---
 drivers/net/pcmcia/axnet_cs.c                 |  4 +---
 drivers/net/pcmcia/pcnet_cs.c                 |  5 ++---
 drivers/net/pcmcia/smc91c92_cs.c              |  2 --
 drivers/net/pcmcia/xirc2ps_cs.c               |  5 ++---
 drivers/net/wireless/airo_cs.c                |  4 +---
 drivers/net/wireless/atmel_cs.c               |  4 +---
 drivers/net/wireless/hostap/hostap_cs.c       |  4 +---
 drivers/pcmcia/cs_internal.h                  |  2 +-
 drivers/pcmcia/pcmcia_resource.c              | 28 ++++++++++++++++++---------
 drivers/scsi/pcmcia/nsp_cs.c                  |  4 +---
 drivers/serial/serial_cs.c                    | 11 ++++-------
 drivers/staging/comedi/drivers/ni_daq_700.c   |  4 +---
 drivers/staging/comedi/drivers/ni_daq_dio24.c |  4 +---
 drivers/staging/comedi/drivers/ni_labpc_cs.c  |  4 +---
 include/pcmcia/cs.h                           |  2 +-
 16 files changed, 38 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index ad07d61dc09b..40001ad9f614 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -175,10 +175,8 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 5f05ffb240cc..8734ed8f174d 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -334,10 +334,8 @@ static int axnet_config(struct pcmcia_device *link)
     if (!link->irq)
 	    goto failed;
     
-    if (resource_size(link->resource[1]) == 8) {
+    if (resource_size(link->resource[1]) == 8)
 	link->conf.Attributes |= CONF_ENABLE_SPKR;
-	link->conf.Status = CCSR_AUDIO_ENA;
-    }
     
     ret = pcmcia_request_configuration(link, &link->conf);
     if (ret)
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index aa6ee6b264cd..f3d7a149070a 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -560,10 +560,9 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
 	if (!link->irq)
 		return NULL;
 
-	if (resource_size(link->resource[1]) == 8) {
+	if (resource_size(link->resource[1]) == 8)
 		link->conf.Attributes |= CONF_ENABLE_SPKR;
-		link->conf.Status = CCSR_AUDIO_ENA;
-	}
+
 	if ((link->manf_id == MANFID_IBM) &&
 	    (link->card_id == PRODID_IBM_HOME_AND_AWAY))
 		link->conf.ConfigIndex |= 0x10;
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 395e586d7c0a..2031a27253d1 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -446,7 +446,6 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     int i;
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
-    link->conf.Status = CCSR_AUDIO_ENA;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->end = 8;
 
@@ -640,7 +639,6 @@ static int osi_config(struct pcmcia_device *link)
     int i, j;
 
     link->conf.Attributes |= CONF_ENABLE_SPKR;
-    link->conf.Status = CCSR_AUDIO_ENA;
     link->resource[0]->end = 64;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->end = 8;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 4308bda0e96d..59398a6f1344 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -812,10 +812,9 @@ xirc2ps_config(struct pcmcia_device * link)
     if (local->modem) {
 	int pass;
 
-	if (do_sound) {
+	if (do_sound)
 	    link->conf.Attributes |= CONF_ENABLE_SPKR;
-	    link->conf.Status |= CCSR_AUDIO_ENA;
-	}
+
 	link->resource[1]->end = 8;
 	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	if (local->dingo) {
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 488e7b45d54b..ab60f1084716 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -158,10 +158,8 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 479f5084bac1..d6d585cb992a 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -175,10 +175,8 @@ static int atmel_config_check(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 3f35dd4438ca..3fa285b1bdd3 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -484,10 +484,8 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 	       "(default 0x%02X)\n", cfg->index, dflt->index);
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 9487340deaf0..7ef464d40e91 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -36,7 +36,7 @@ typedef struct config_t {
 	unsigned int	Attributes;
 	unsigned int	IntType;
 	unsigned int	ConfigBase;
-	unsigned char	Status, Option, ExtStatus;
+	unsigned char	Option;
 	unsigned int	CardValues;
 
 	struct resource io[MAX_IO_WIN]; /* io ports */
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 28717eea7c57..8834bb415d38 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -441,6 +441,8 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	struct pcmcia_socket *s = p_dev->socket;
 	config_t *c;
 	pccard_io_map iomap;
+	unsigned char status = 0;
+	unsigned char ext_status = 0;
 
 	if (!(s->state & SOCKET_PRESENT))
 		return -ENODEV;
@@ -476,12 +478,21 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 		s->socket.flags |= SS_ZVCARD | SS_IOCARD;
 	if (req->Attributes & CONF_ENABLE_DMA)
 		s->socket.flags |= SS_DMA_MODE;
-	if (req->Attributes & CONF_ENABLE_SPKR)
+	if (req->Attributes & CONF_ENABLE_SPKR) {
 		s->socket.flags |= SS_SPKR_ENA;
+		status = CCSR_AUDIO_ENA;
+		if (!(req->Present & PRESENT_STATUS))
+			dev_warn(&p_dev->dev, "speaker requested, but "
+					      "PRESENT_STATUS not set!\n");
+	}
 	if (req->Attributes & CONF_ENABLE_IRQ)
 		s->socket.io_irq = s->pcmcia_irq;
 	else
 		s->socket.io_irq = 0;
+	if (req->Attributes & CONF_ENABLE_ESR) {
+		req->Present |= PRESENT_EXT_STATUS;
+		ext_status = ESR_REQ_ATTN_ENA;
+	}
 	s->ops->set_socket(s, &s->socket);
 	s->lock_count++;
 
@@ -513,14 +524,13 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_COR)>>1, 1, &c->Option);
 		mdelay(40);
 	}
-	if (req->Present & PRESENT_STATUS) {
-		c->Status = req->Status;
-		pcmcia_write_cis_mem(s, 1, (base + CISREG_CCSR)>>1, 1, &c->Status);
-	}
-	if (req->Present & PRESENT_EXT_STATUS) {
-		c->ExtStatus = req->ExtStatus;
-		pcmcia_write_cis_mem(s, 1, (base + CISREG_ESR)>>1, 1, &c->ExtStatus);
-	}
+	if (req->Present & PRESENT_STATUS)
+		pcmcia_write_cis_mem(s, 1, (base + CISREG_CCSR)>>1, 1, &status);
+
+	if (req->Present & PRESENT_EXT_STATUS)
+		pcmcia_write_cis_mem(s, 1, (base + CISREG_ESR)>>1, 1,
+					&ext_status);
+
 	if (req->Present & PRESENT_IOBASE_0) {
 		u8 b = c->io[0].start & 0xff;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOBASE_0)>>1, 1, &b);
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 50574e869418..b90cade36746 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1608,10 +1608,8 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 9597442a0ac1..0460c0882b70 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -183,10 +183,8 @@ static void quirk_config_socket(struct pcmcia_device *link)
 {
 	struct serial_info *info = link->priv;
 
-	if (info->multi) {
-		link->conf.Present |= PRESENT_EXT_STATUS;
-		link->conf.ExtStatus = ESR_REQ_ATTN_ENA;
-	}
+	if (info->multi)
+		link->conf.Attributes |= CONF_ENABLE_ESR;
 }
 
 static const struct serial_quirk quirks[] = {
@@ -336,10 +334,9 @@ static int serial_probe(struct pcmcia_device *link)
 	link->priv = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	if (do_sound) {
+	if (do_sound)
 		link->conf.Attributes |= CONF_ENABLE_SPKR;
-		link->conf.Status = CCSR_AUDIO_ENA;
-	}
+
 	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return serial_config(link);
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index cc15666e5cc1..8cf56cb0a3ff 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -550,10 +550,8 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Do we need to allocate an interrupt? */
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 773ae2044e0e..ba693435963e 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -302,10 +302,8 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Do we need to allocate an interrupt? */
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 68c4ecbd93ae..5d5f11f2b5c3 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -281,10 +281,8 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO) {
+	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
 		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
-		p_dev->conf.Status = CCSR_AUDIO_ENA;
-	}
 
 	/* Do we need to allocate an interrupt? */
 	p_dev->conf.Attributes |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index e656abee1741..29d693f72ba9 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -24,7 +24,6 @@ typedef struct config_req_t {
     u_int	Attributes;
     u_int	IntType;
     u_int	ConfigBase;
-    u_char	Status, ExtStatus;
     u_char	ConfigIndex;
     u_int	Present;
 } config_req_t;
@@ -34,6 +33,7 @@ typedef struct config_req_t {
 #define CONF_ENABLE_DMA		0x02
 #define CONF_ENABLE_SPKR	0x04
 #define CONF_ENABLE_PULSE_IRQ	0x08
+#define CONF_ENABLE_ESR		0x10
 #define CONF_VALID_CLIENT	0x100
 
 /* IntType field */
-- 
cgit v1.2.3


From 37979e1546a790c44adbc7f27a85569944480ebc Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 16:33:23 +0200
Subject: pcmcia: simplify IntType

IntType was only set to INT_MEMORY (driver pcmciamtd) or INT_MEMORY_AND_IO
(all other drivers). As this flags seems to relate to ioport access, make
it conditional to the driver having requested IO port access. There are two
drivers which do not request IO ports, but did set INT_MEMORY_AND_IO:
ray_cs and b43. For those, we consistently only set INT_MEMORY in future.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi> (for drivers/bluetooth)
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/ata/pata_pcmcia.c                        |  1 -
 drivers/bluetooth/bluecard_cs.c                  |  1 -
 drivers/bluetooth/bt3c_cs.c                      |  1 -
 drivers/bluetooth/btuart_cs.c                    |  1 -
 drivers/bluetooth/dtl1_cs.c                      |  1 -
 drivers/char/pcmcia/cm4000_cs.c                  |  3 ---
 drivers/char/pcmcia/cm4040_cs.c                  |  3 ---
 drivers/char/pcmcia/ipwireless/main.c            |  1 -
 drivers/char/pcmcia/synclink_cs.c                |  2 --
 drivers/ide/ide-cs.c                             |  1 -
 drivers/isdn/hardware/avm/avm_cs.c               |  1 -
 drivers/isdn/hisax/avma1_cs.c                    |  1 -
 drivers/isdn/hisax/elsa_cs.c                     |  1 -
 drivers/isdn/hisax/sedlbauer_cs.c                |  1 -
 drivers/isdn/hisax/teles_cs.c                    |  1 -
 drivers/mtd/maps/pcmciamtd.c                     |  2 --
 drivers/net/pcmcia/3c574_cs.c                    |  1 -
 drivers/net/pcmcia/3c589_cs.c                    |  1 -
 drivers/net/pcmcia/axnet_cs.c                    |  1 -
 drivers/net/pcmcia/com20020_cs.c                 |  1 -
 drivers/net/pcmcia/fmvj18x_cs.c                  |  1 -
 drivers/net/pcmcia/ibmtr_cs.c                    |  1 -
 drivers/net/pcmcia/nmclan_cs.c                   |  1 -
 drivers/net/pcmcia/pcnet_cs.c                    |  1 -
 drivers/net/pcmcia/smc91c92_cs.c                 |  1 -
 drivers/net/pcmcia/xirc2ps_cs.c                  |  1 -
 drivers/net/wireless/airo_cs.c                   |  1 -
 drivers/net/wireless/atmel_cs.c                  |  1 -
 drivers/net/wireless/b43/pcmcia.c                |  1 -
 drivers/net/wireless/hostap/hostap_cs.c          |  1 -
 drivers/net/wireless/libertas/if_cs.c            |  1 -
 drivers/net/wireless/orinoco/orinoco_cs.c        |  1 -
 drivers/net/wireless/orinoco/spectrum_cs.c       |  1 -
 drivers/net/wireless/ray_cs.c                    |  1 -
 drivers/net/wireless/wl3501_cs.c                 |  1 -
 drivers/parport/parport_cs.c                     |  1 -
 drivers/pcmcia/cs_internal.h                     |  1 -
 drivers/pcmcia/pcmcia_resource.c                 | 10 +---------
 drivers/scsi/pcmcia/aha152x_stub.c               |  1 -
 drivers/scsi/pcmcia/fdomain_stub.c               |  1 -
 drivers/scsi/pcmcia/nsp_cs.c                     |  1 -
 drivers/scsi/pcmcia/qlogic_stub.c                |  1 -
 drivers/scsi/pcmcia/sym53c500_cs.c               |  1 -
 drivers/serial/serial_cs.c                       |  2 --
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  1 -
 drivers/staging/comedi/drivers/das08_cs.c        |  1 -
 drivers/staging/comedi/drivers/ni_daq_700.c      |  1 -
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  1 -
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  1 -
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  1 -
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  1 -
 drivers/staging/wlags49_h2/wl_cs.c               |  1 -
 drivers/telephony/ixj_pcmcia.c                   |  1 -
 drivers/usb/host/sl811_cs.c                      |  1 -
 include/pcmcia/cs.h                              |  7 -------
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  1 -
 sound/pcmcia/vx/vxpocket.c                       |  1 -
 57 files changed, 1 insertion(+), 78 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 12cdc9ff39ad..c2679c01188b 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -250,7 +250,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	pdev->conf.Attributes = CONF_ENABLE_IRQ;
-	pdev->conf.IntType = INT_MEMORY_AND_IO;
 
 	/* See if we have a manufacturer identifier. Use it to set is_kme for
 	   vendor quirks */
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index d52e90a5a617..4cb2dfebde80 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -866,7 +866,6 @@ static int bluecard_probe(struct pcmcia_device *link)
 	link->priv = info;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return bluecard_config(link);
 }
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 2c8d981c110d..fb018073a5f8 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -661,7 +661,6 @@ static int bt3c_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return bt3c_config(link);
 }
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 7ea6fa42fdef..897c7c74ca14 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -590,7 +590,6 @@ static int btuart_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return btuart_config(link);
 }
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index db7c8db695fc..b4c9a2e0a96b 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -576,7 +576,6 @@ static int dtl1_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 8;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return dtl1_config(link);
 }
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index ec73d9f6d9ed..d2accd64b3f5 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1767,8 +1767,6 @@ static int cm4000_config(struct pcmcia_device * link, int devno)
 	if (pcmcia_loop_config(link, cm4000_config_check, NULL))
 		goto cs_release;
 
-	link->conf.IntType = 00000002;
-
 	if (pcmcia_request_configuration(link, &link->conf))
 		goto cs_release;
 
@@ -1829,7 +1827,6 @@ static int cm4000_probe(struct pcmcia_device *link)
 
 	dev->p_dev = link;
 	link->priv = dev;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	dev_table[i] = link;
 
 	init_waitqueue_head(&dev->devq);
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 815cde1d0570..a32eba0afdfc 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -547,8 +547,6 @@ static int reader_config(struct pcmcia_device *link, int devno)
 	if (pcmcia_loop_config(link, cm4040_config_check, NULL))
 		goto cs_release;
 
-	link->conf.IntType = 00000002;
-
 	fail_rc = pcmcia_request_configuration(link, &link->conf);
 	if (fail_rc != 0) {
 		dev_printk(KERN_INFO, &link->dev,
@@ -599,7 +597,6 @@ static int reader_probe(struct pcmcia_device *link)
 	link->priv = dev;
 	dev->p_dev = link;
 
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	dev_table[i] = link;
 
 	init_waitqueue_head(&dev->devq);
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 8d2b86aab715..0f0be4df7b7c 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -173,7 +173,6 @@ static int config_ipwireless(struct ipw_dev *ipw)
 		return ret;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	INIT_WORK(&ipw->work_reboot, signalled_reboot_work);
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 9ecd6bef5d3b..ba7ccf5701ed 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -551,7 +551,6 @@ static int mgslpc_probe(struct pcmcia_device *link)
     /* Initialize the struct pcmcia_device structure */
 
     link->conf.Attributes = 0;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     ret = mgslpc_config(link);
     if (ret)
@@ -595,7 +594,6 @@ static int mgslpc_config(struct pcmcia_device *link)
 	    goto failed;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 8;
     link->conf.Present = PRESENT_OPTION;
 
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 82690e409435..cf7cb4953aa5 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -100,7 +100,6 @@ static int ide_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     return ide_config(link);
 } /* ide_attach */
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 09b1795516f4..938ca4155c3b 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -80,7 +80,6 @@ static int avmcs_probe(struct pcmcia_device *p_dev)
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-    p_dev->conf.IntType = INT_MEMORY_AND_IO;
     p_dev->conf.ConfigIndex = 1;
     p_dev->conf.Present = PRESENT_OPTION;
 
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 94263c22b874..7d5ff20b5d5b 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -85,7 +85,6 @@ static int __devinit avma1cs_probe(struct pcmcia_device *p_dev)
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-    p_dev->conf.IntType = INT_MEMORY_AND_IO;
     p_dev->conf.ConfigIndex = 1;
     p_dev->conf.Present = PRESENT_OPTION;
 
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index b3c08aaf41c4..df360c8b31cb 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -130,7 +130,6 @@ static int __devinit elsa_cs_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     return elsa_cs_config(link);
 } /* elsa_cs_attach */
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 40001ad9f614..169061fbca98 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -133,7 +133,6 @@ static int __devinit sedlbauer_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
     link->conf.Attributes = 0;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     return sedlbauer_config(link);
 } /* sedlbauer_attach */
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 7296102ca255..46e72a1eed2d 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -110,7 +110,6 @@ static int __devinit teles_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     return teles_cs_config(link);
 } /* teles_attach */
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index ab94c8aebddc..663b48bc657c 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -575,7 +575,6 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 		link->vpp = 0;
 	}
 
-	link->conf.IntType = INT_MEMORY;
 	link->conf.ConfigIndex = 0;
 	DEBUG(2, "Setting Configuration");
 	ret = pcmcia_request_configuration(link, &link->conf);
@@ -720,7 +719,6 @@ static int pcmciamtd_probe(struct pcmcia_device *link)
 	link->priv = dev;
 
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY;
 
 	return pcmciamtd_config(link);
 }
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c683f77c6f42..41ecb2728a20 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -281,7 +281,6 @@ static int tc574_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 32;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
 
 	dev->netdev_ops = &el3_netdev_ops;
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 61f9cf2100ff..68886729375d 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -217,7 +217,6 @@ static int tc589_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
 
     dev->netdev_ops = &el3_netdev_ops;
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 8734ed8f174d..4d4928a22b1f 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -167,7 +167,6 @@ static int axnet_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = dev;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     dev->netdev_ops = &axnet_netdev_ops;
 
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 3c400cfa82ae..a58eafed42a5 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -161,7 +161,6 @@ static int com20020_probe(struct pcmcia_device *p_dev)
     p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     p_dev->resource[0]->end = 16;
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-    p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
     info->dev = dev;
     p_dev->priv = info;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index dfd32842412e..caf2b2ef4e35 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -253,7 +253,6 @@ static int fmvj18x_probe(struct pcmcia_device *link)
 
     /* General socket configuration */
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     dev->netdev_ops = &fjn_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index dbdea7f5e423..1327f086d86b 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -153,7 +153,6 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[0]->end = 4;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
 
     info->dev = dev;
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 68f2deeb3ade..90d172804bfc 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -461,7 +461,6 @@ static int nmclan_probe(struct pcmcia_device *link)
     link->resource[0]->end = 32;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
     link->conf.Present = PRESENT_OPTION;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index f3d7a149070a..69135761719a 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -261,7 +261,6 @@ static int pcnet_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     dev->netdev_ops = &pcnet_netdev_ops;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2031a27253d1..2c2a8788c078 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -327,7 +327,6 @@ static int smc91c92_probe(struct pcmcia_device *link)
     link->resource[0]->end = 16;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 59398a6f1344..1776f49e0917 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -530,7 +530,6 @@ xirc2ps_probe(struct pcmcia_device *link)
 
     /* General socket configuration */
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.ConfigIndex = 1;
 
     /* Fill in card specific entries */
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index ab60f1084716..ccb2fdde002c 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -104,7 +104,6 @@ static int airo_probe(struct pcmcia_device *p_dev)
 	  device, and can be hard-wired here.
 	*/
 	p_dev->conf.Attributes = 0;
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
 	/* Allocate space for private device-specific data */
 	local = kzalloc(sizeof(local_info_t), GFP_KERNEL);
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index d6d585cb992a..8b75158caed7 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -114,7 +114,6 @@ static int atmel_probe(struct pcmcia_device *p_dev)
 	  device, and can be hard-wired here.
 	*/
 	p_dev->conf.Attributes = 0;
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
 	/* Allocate space for private device-specific data */
 	local = kzalloc(sizeof(local_info_t), GFP_KERNEL);
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 618d9b5c1054..138b26fcc75c 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -73,7 +73,6 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	err = -ENODEV;
 
 	dev->conf.Attributes = CONF_ENABLE_IRQ;
-	dev->conf.IntType = INT_MEMORY_AND_IO;
 
 	dev->resource[2]->flags |=  WIN_ENABLE | WIN_DATA_WIDTH_16 |
 			 WIN_USE_WAIT;
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 3fa285b1bdd3..0fe6f82cda58 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -437,7 +437,6 @@ static int hostap_cs_probe(struct pcmcia_device *p_dev)
 	int ret;
 
 	PDEBUG(DEBUG_HW, "%s: setting Vcc=33 (constant)\n", dev_info);
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
 	ret = prism2_config(p_dev);
 	if (ret) {
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 9c298396be50..814b7faaa365 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -836,7 +836,6 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
 	p_dev->priv = card;
 
 	p_dev->conf.Attributes = 0;
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 
 	if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) {
 		lbs_pr_err("error in pcmcia_loop_config\n");
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index f7e3fa6305b4..1147d6bd4733 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -123,7 +123,6 @@ orinoco_cs_probe(struct pcmcia_device *link)
 	 * number, sizes, and attributes of IO windows) are fixed by
 	 * the nature of the device, and can be hard-wired here. */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return orinoco_cs_config(link);
 }				/* orinoco_cs_attach */
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 1bbad101b559..20b08ab87655 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -185,7 +185,6 @@ spectrum_cs_probe(struct pcmcia_device *link)
 	 * number, sizes, and attributes of IO windows) are fixed by
 	 * the nature of the device, and can be hard-wired here. */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return spectrum_cs_config(link);
 }				/* spectrum_cs_attach */
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index ab34cb8c56c7..30cfd8890e34 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -319,7 +319,6 @@ static int ray_probe(struct pcmcia_device *p_dev)
 
 	/* General socket configuration */
 	p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 	p_dev->conf.ConfigIndex = 1;
 
 	p_dev->priv = dev;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index a1cc2d498a1c..92a9ad575cc7 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1889,7 +1889,6 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 
 	/* General socket configuration */
 	p_dev->conf.Attributes	= CONF_ENABLE_IRQ;
-	p_dev->conf.IntType	= INT_MEMORY_AND_IO;
 	p_dev->conf.ConfigIndex	= 1;
 
 	dev = alloc_etherdev(sizeof(struct wl3501_card));
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index 23e50f4a27c5..afd946e15ee7 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -104,7 +104,6 @@ static int parport_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
 
     return parport_config(link);
 } /* parport_attach */
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 7ef464d40e91..a0c5adb37b5f 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -34,7 +34,6 @@ typedef struct config_t {
 	struct kref	ref;
 	unsigned int	state;
 	unsigned int	Attributes;
-	unsigned int	IntType;
 	unsigned int	ConfigBase;
 	unsigned char	Option;
 	unsigned int	CardValues;
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 8834bb415d38..6210e1c2b432 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -447,11 +447,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	if (!(s->state & SOCKET_PRESENT))
 		return -ENODEV;
 
-	if (req->IntType & INT_CARDBUS) {
-		dev_dbg(&p_dev->dev, "IntType may not be INT_CARDBUS\n");
-		return -EINVAL;
-	}
-
 	mutex_lock(&s->ops_mutex);
 	c = p_dev->function_config;
 	if (c->state & CONFIG_LOCKED) {
@@ -470,12 +465,9 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	}
 
 	/* Pick memory or I/O card, DMA mode, interrupt */
-	c->IntType = req->IntType;
 	c->Attributes = req->Attributes;
-	if (req->IntType & INT_MEMORY_AND_IO)
+	if (p_dev->_io)
 		s->socket.flags |= SS_IOCARD;
-	if (req->IntType & INT_ZOOMED_VIDEO)
-		s->socket.flags |= SS_ZVCARD | SS_IOCARD;
 	if (req->Attributes & CONF_ENABLE_DMA)
 		s->socket.flags |= SS_DMA_MODE;
 	if (req->Attributes & CONF_ENABLE_SPKR) {
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 61f49bdcc0c2..3c0046e89f37 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -103,7 +103,6 @@ static int aha152x_probe(struct pcmcia_device *link)
     link->resource[0]->end = 0x20;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.IntType = INT_MEMORY_AND_IO;
     link->conf.Present = PRESENT_OPTION;
 
     return aha152x_config_cs(link);
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 13dbe5c48492..8ff760380d88 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -86,7 +86,6 @@ static int fdomain_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 0x10;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
 
 	return fdomain_config(link);
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index b90cade36746..c0cf2dfd6fbc 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1563,7 +1563,6 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 
 	/* General socket configuration */
 	link->conf.Attributes	 = CONF_ENABLE_IRQ;
-	link->conf.IntType	 = INT_MEMORY_AND_IO;
 
 	ret = nsp_cs_config(link);
 
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index eb775f1a523c..77f46a279d6b 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -159,7 +159,6 @@ static int qlogic_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 16;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.Present = PRESENT_OPTION;
 
 	return qlogic_config(link);
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 321e390c9120..9aaf974d4d1c 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -862,7 +862,6 @@ SYM53C500_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 16;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return SYM53C500_config(link);
 } /* SYM53C500_attach */
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 0460c0882b70..38baede2a770 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -337,8 +337,6 @@ static int serial_probe(struct pcmcia_device *link)
 	if (do_sound)
 		link->conf.Attributes |= CONF_ENABLE_SPKR;
 
-	link->conf.IntType = INT_MEMORY_AND_IO;
-
 	return serial_config(link);
 }
 
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index f8ede1182ccc..11271b61f653 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -694,7 +694,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link)
 
 	/* Initialize the pcmcia_device structure */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 48d9fb1227df..319aad48ec2e 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -170,7 +170,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link)
 	   device, and can be hard-wired here.
 	 */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 8cf56cb0a3ff..d269bbdf5cfb 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -499,7 +499,6 @@ static int dio700_cs_attach(struct pcmcia_device *link)
 	   device, and can be hard-wired here.
 	 */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	pcmcia_cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index ba693435963e..fcaa82967b92 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -251,7 +251,6 @@ static int dio24_cs_attach(struct pcmcia_device *link)
 	   device, and can be hard-wired here.
 	 */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	pcmcia_cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 5d5f11f2b5c3..a936c11e7dc9 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -227,7 +227,6 @@ static int labpc_cs_attach(struct pcmcia_device *link)
 	   device, and can be hard-wired here.
 	 */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	pcmcia_cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index 1f2426352eb5..be7e021e576d 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -266,7 +266,6 @@ static int cs_attach(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	link->resource[0]->end = 16;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	cur_dev = link;
 
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index bf489d7f4990..cd818fbd9185 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1039,7 +1039,6 @@ static int daqp_cs_attach(struct pcmcia_device *link)
 	   device, and can be hard-wired here.
 	 */
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	daqp_cs_config(link);
 
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 19c335458653..b2efff692ad5 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -148,7 +148,6 @@ static int wl_adapter_attach(struct pcmcia_device *link)
 	link->resource[0]->end  = HCF_NUM_IO_PORTS;
 	link->resource[0]->flags= IO_DATA_PATH_WIDTH_16;
 	link->conf.Attributes   = CONF_ENABLE_IRQ;
-	link->conf.IntType      = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex  = 5;
 	link->conf.Present      = PRESENT_OPTION;
 
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index a1900e502518..5ccc6d0560a9 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -34,7 +34,6 @@ static int ixj_probe(struct pcmcia_device *p_dev)
 	/* Create new ixj device */
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-	p_dev->conf.IntType = INT_MEMORY_AND_IO;
 	p_dev->priv = kzalloc(sizeof(struct ixj_info_t), GFP_KERNEL);
 	if (!p_dev->priv) {
 		return -ENOMEM;
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 8e8475298baa..78bad5188144 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -228,7 +228,6 @@ static int sl811_cs_probe(struct pcmcia_device *link)
 	link->priv = local;
 
 	link->conf.Attributes = 0;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 
 	return sl811_cs_config(link);
 }
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 29d693f72ba9..674edbc9ebef 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -22,7 +22,6 @@
 /* For RequestConfiguration */
 typedef struct config_req_t {
     u_int	Attributes;
-    u_int	IntType;
     u_int	ConfigBase;
     u_char	ConfigIndex;
     u_int	Present;
@@ -36,12 +35,6 @@ typedef struct config_req_t {
 #define CONF_ENABLE_ESR		0x10
 #define CONF_VALID_CLIENT	0x100
 
-/* IntType field */
-#define INT_MEMORY		0x01
-#define INT_MEMORY_AND_IO	0x02
-#define INT_CARDBUS		0x04
-#define INT_ZOOMED_VIDEO	0x08
-
 /* Configuration registers present */
 #define PRESENT_OPTION		0x001
 #define PRESENT_STATUS		0x002
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 7ab9174a8a84..2e1282de77d4 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -143,7 +143,6 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
 	link->conf.Present = PRESENT_OPTION;
 
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index a6edfc3be29a..a48b3ee71377 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -163,7 +163,6 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl,
 	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.IntType = INT_MEMORY_AND_IO;
 	link->conf.ConfigIndex = 1;
 	link->conf.Present = PRESENT_OPTION;
 
-- 
cgit v1.2.3


From 7feabb6412ea23edd298c0fa90e5aa6733eb4a42 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 18:35:47 +0200
Subject: pcmcia: move config_{base,index,regs} to struct pcmcia_device

Several drivers prefer to explicitly set config_{base,index,regs},
formerly known as ConfigBase, ConfigIndex and Present. Instead of
passing these values inside config_req_t, store it in struct
pcmcia_device.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi> (for drivers/bluetooth)
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/bluetooth/bluecard_cs.c                  |  2 +-
 drivers/char/pcmcia/ipwireless/main.c            |  2 +-
 drivers/char/pcmcia/synclink_cs.c                |  6 ++--
 drivers/ide/ide-cs.c                             |  3 +-
 drivers/isdn/hardware/avm/avm_cs.c               |  4 +--
 drivers/isdn/hisax/avma1_cs.c                    |  4 +--
 drivers/isdn/hisax/elsa_cs.c                     |  2 +-
 drivers/isdn/hisax/sedlbauer_cs.c                |  2 +-
 drivers/isdn/hisax/teles_cs.c                    |  2 +-
 drivers/mtd/maps/pcmciamtd.c                     |  2 +-
 drivers/net/pcmcia/3c574_cs.c                    |  2 +-
 drivers/net/pcmcia/3c589_cs.c                    |  2 +-
 drivers/net/pcmcia/axnet_cs.c                    |  6 ++--
 drivers/net/pcmcia/fmvj18x_cs.c                  | 22 +++++++-------
 drivers/net/pcmcia/ibmtr_cs.c                    |  4 +--
 drivers/net/pcmcia/nmclan_cs.c                   |  4 +--
 drivers/net/pcmcia/pcnet_cs.c                    |  6 ++--
 drivers/net/pcmcia/smc91c92_cs.c                 | 10 +++----
 drivers/net/pcmcia/xirc2ps_cs.c                  |  2 +-
 drivers/net/wireless/airo_cs.c                   |  2 +-
 drivers/net/wireless/hostap/hostap_cs.c          |  2 +-
 drivers/net/wireless/ray_cs.c                    |  2 +-
 drivers/net/wireless/wl3501_cs.c                 |  2 +-
 drivers/parport/parport_cs.c                     |  2 +-
 drivers/pcmcia/cs_internal.h                     |  3 --
 drivers/pcmcia/ds.c                              |  8 ++---
 drivers/pcmcia/pcmcia_cis.c                      |  2 +-
 drivers/pcmcia/pcmcia_resource.c                 | 38 ++++++++++++------------
 drivers/scsi/pcmcia/aha152x_stub.c               |  2 +-
 drivers/scsi/pcmcia/fdomain_stub.c               |  2 +-
 drivers/scsi/pcmcia/nsp_cs.c                     |  2 +-
 drivers/scsi/pcmcia/qlogic_stub.c                |  2 +-
 drivers/serial/serial_cs.c                       |  6 ++--
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  2 +-
 drivers/staging/comedi/drivers/das08_cs.c        |  2 +-
 drivers/staging/comedi/drivers/ni_daq_700.c      |  2 +-
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  2 +-
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  2 +-
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  2 +-
 drivers/staging/wlags49_h2/wl_cs.c               |  4 +--
 drivers/usb/host/sl811_cs.c                      |  2 +-
 include/pcmcia/cs.h                              | 15 ----------
 include/pcmcia/ds.h                              | 14 +++++++++
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  6 ++--
 sound/pcmcia/vx/vxpocket.c                       |  4 +--
 45 files changed, 107 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 4cb2dfebde80..08f4818ad9f7 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -885,7 +885,7 @@ static int bluecard_config(struct pcmcia_device *link)
 	bluecard_info_t *info = link->priv;
 	int i, n;
 
-	link->conf.ConfigIndex = 0x20;
+	link->config_index = 0x20;
 
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 64;
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 0f0be4df7b7c..05c4e6834a6b 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -92,7 +92,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 
 	/* 0x40 causes it to generate level mode interrupts. */
 	/* 0x04 enables IREQ pin. */
-	p_dev->conf.ConfigIndex = cfg->index | 0x44;
+	p_dev->config_index = cfg->index | 0x44;
 	p_dev->io_lines = 16;
 	ret = pcmcia_request_io(p_dev);
 	if (ret)
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index ba7ccf5701ed..535aa0899e9f 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -594,8 +594,8 @@ static int mgslpc_config(struct pcmcia_device *link)
 	    goto failed;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.ConfigIndex = 8;
-    link->conf.Present = PRESENT_OPTION;
+    link->config_index = 8;
+    link->config_regs = PRESENT_OPTION;
 
     ret = pcmcia_request_irq(link, mgslpc_isr);
     if (ret)
@@ -608,7 +608,7 @@ static int mgslpc_config(struct pcmcia_device *link)
     info->irq_level = link->irq;
 
     dev_info(&link->dev, "index 0x%02x:",
-	    link->conf.ConfigIndex);
+	    link->config_index);
     if (link->conf.Attributes & CONF_ENABLE_IRQ)
 	    printk(", irq %d", link->irq);
     if (link->resource[0])
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index cf7cb4953aa5..07e37876559c 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -227,8 +227,7 @@ static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
 		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-
-		pdev->conf.ConfigIndex = cfg->index;
+		pdev->config_index = cfg->index;
 		pdev->resource[0]->start = io->win[0].base;
 		if (!(io->flags & CISTPL_IO_16BIT)) {
 			pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 938ca4155c3b..2d8bbbf286aa 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -80,8 +80,8 @@ static int avmcs_probe(struct pcmcia_device *p_dev)
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-    p_dev->conf.ConfigIndex = 1;
-    p_dev->conf.Present = PRESENT_OPTION;
+    p_dev->config_index = 1;
+    p_dev->config_regs = PRESENT_OPTION;
 
     return avmcs_config(p_dev);
 } /* avmcs_attach */
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 7d5ff20b5d5b..e25f6c7376ed 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -85,8 +85,8 @@ static int __devinit avma1cs_probe(struct pcmcia_device *p_dev)
 
     /* General socket configuration */
     p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-    p_dev->conf.ConfigIndex = 1;
-    p_dev->conf.Present = PRESENT_OPTION;
+    p_dev->config_index = 1;
+    p_dev->config_regs = PRESENT_OPTION;
 
     return avma1cs_config(p_dev);
 } /* avma1cs_attach */
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index df360c8b31cb..f276e8428960 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -211,7 +211,7 @@ static int __devinit elsa_cs_config(struct pcmcia_device *link)
 
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x: ",
-	    link->conf.ConfigIndex);
+	    link->config_index);
     if (link->conf.Attributes & CONF_ENABLE_IRQ)
 	printk(", irq %d", link->irq);
     if (link->resource[0])
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 169061fbca98..43d0a4e97ead 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -253,7 +253,7 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
 
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x:",
-	   link->conf.ConfigIndex);
+	   link->config_index);
     if (link->vpp)
 	printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
     if (link->conf.Attributes & CONF_ENABLE_IRQ)
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 46e72a1eed2d..614afc64b5b1 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -191,7 +191,7 @@ static int __devinit teles_cs_config(struct pcmcia_device *link)
 
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x:",
-	    link->conf.ConfigIndex);
+	    link->config_index);
     if (link->conf.Attributes & CONF_ENABLE_IRQ)
 	    printk(", irq %d", link->irq);
     if (link->resource[0])
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 663b48bc657c..99c7257363d6 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -575,7 +575,7 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 		link->vpp = 0;
 	}
 
-	link->conf.ConfigIndex = 0;
+	link->config_index = 0;
 	DEBUG(2, "Setting Configuration");
 	ret = pcmcia_request_configuration(link, &link->conf);
 	if (ret != 0) {
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 41ecb2728a20..4b670b3da378 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -281,7 +281,7 @@ static int tc574_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 32;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.ConfigIndex = 1;
+	link->config_index = 1;
 
 	dev->netdev_ops = &el3_netdev_ops;
 	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 68886729375d..6549e2c496a0 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -217,7 +217,7 @@ static int tc589_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.ConfigIndex = 1;
+    link->config_index = 1;
 
     dev->netdev_ops = &el3_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 4d4928a22b1f..2c273ce6a5d8 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -230,7 +230,7 @@ static int get_prom(struct pcmcia_device *link)
     };
 
     /* Not much of a test, but the alternatives are messy */
-    if (link->conf.ConfigBase != 0x03c0)
+    if (link->config_base != 0x03c0)
 	return 0;
 
     axnet_reset_8390(dev);
@@ -297,7 +297,7 @@ static int axnet_configcheck(struct pcmcia_device *p_dev,
 	if (cfg->index == 0 || cfg->io.nwin == 0)
 		return -ENODEV;
 
-	p_dev->conf.ConfigIndex = 0x05;
+	p_dev->config_index = 0x05;
 	/* For multifunction cards, by convention, we configure the
 	   network function with window 0, and serial with window 1 */
 	if (io->nwin > 1) {
@@ -325,7 +325,7 @@ static int axnet_config(struct pcmcia_device *link)
     dev_dbg(&link->dev, "axnet_config(0x%p)\n", link);
 
     /* don't trust the CIS on this; Linksys got it wrong */
-    link->conf.Present = 0x63;
+    link->config_regs = 0x63;
     ret = pcmcia_loop_config(link, axnet_configcheck, NULL);
     if (ret != 0)
 	goto failed;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index caf2b2ef4e35..23f5333d8029 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -312,7 +312,7 @@ static int ungermann_try_io_port(struct pcmcia_device *link)
 	ret = pcmcia_request_io(link);
 	if (ret == 0) {
 	    /* calculate ConfigIndex value */
-	    link->conf.ConfigIndex = 
+	    link->config_index =
 		((link->resource[0]->start & 0x0f0) >> 3) | 0x22;
 	    return ret;
 	}
@@ -361,28 +361,28 @@ static int fmvj18x_config(struct pcmcia_device *link)
 		link->card_id == PRODID_TDK_NP9610 ||
 		link->card_id == PRODID_TDK_MN3200) {
 		/* MultiFunction Card */
-		link->conf.ConfigBase = 0x800;
-		link->conf.ConfigIndex = 0x47;
+		link->config_base = 0x800;
+		link->config_index = 0x47;
 		link->resource[1]->end = 8;
 	    }
 	    break;
 	case MANFID_NEC:
 	    cardtype = NEC; /* MultiFunction Card */
-	    link->conf.ConfigBase = 0x800;
-	    link->conf.ConfigIndex = 0x47;
+	    link->config_base = 0x800;
+	    link->config_index = 0x47;
 	    link->resource[1]->end = 8;
 	    break;
 	case MANFID_KME:
 	    cardtype = KME; /* MultiFunction Card */
-	    link->conf.ConfigBase = 0x800;
-	    link->conf.ConfigIndex = 0x47;
+	    link->config_base = 0x800;
+	    link->config_index = 0x47;
 	    link->resource[1]->end = 8;
 	    break;
 	case MANFID_CONTEC:
 	    cardtype = CONTEC;
 	    break;
 	case MANFID_FUJITSU:
-	    if (link->conf.ConfigBase == 0x0fe0)
+	    if (link->config_base == 0x0fe0)
 		cardtype = MBH10302;
 	    else if (link->card_id == PRODID_FUJITSU_MBH10302) 
                 /* RATOC REX-5588/9822/4886's PRODID are 0004(=MBH10302),
@@ -402,10 +402,10 @@ static int fmvj18x_config(struct pcmcia_device *link)
 	case MANFID_FUJITSU:
 	    if (link->card_id == PRODID_FUJITSU_MBH10304) {
 		cardtype = XXX10304;    /* MBH10304 with buggy CIS */
-	        link->conf.ConfigIndex = 0x20;
+		link->config_index = 0x20;
 	    } else {
 		cardtype = MBH10302;    /* NextCom NC5310, etc. */
-		link->conf.ConfigIndex = 1;
+		link->config_index = 1;
 	    }
 	    break;
 	case MANFID_UNGERMANN:
@@ -413,7 +413,7 @@ static int fmvj18x_config(struct pcmcia_device *link)
 	    break;
 	default:
 	    cardtype = MBH10302;
-	    link->conf.ConfigIndex = 1;
+	    link->config_index = 1;
 	}
     }
 
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index 1327f086d86b..feedeeb17a52 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -153,7 +153,7 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[0]->end = 4;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.Present = PRESENT_OPTION;
+    link->config_regs = PRESENT_OPTION;
 
     info->dev = dev;
 
@@ -212,8 +212,8 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
 
     dev_dbg(&link->dev, "ibmtr_config\n");
 
-    link->conf.ConfigIndex = 0x61;
     link->io_lines = 16;
+    link->config_index = 0x61;
 
     /* Determine if this is PRIMARY or ALTERNATE. */
 
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 90d172804bfc..98c4a6976045 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -461,8 +461,8 @@ static int nmclan_probe(struct pcmcia_device *link)
     link->resource[0]->end = 32;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.ConfigIndex = 1;
-    link->conf.Present = PRESENT_OPTION;
+    link->config_index = 1;
+    link->config_regs = PRESENT_OPTION;
 
     lp->tx_free_frames=AM2150_MAX_TX_FRAMES;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 69135761719a..68c46751f84f 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -420,7 +420,7 @@ static hw_info_t *get_ax88190(struct pcmcia_device *link)
     int i, j;
 
     /* Not much of a test, but the alternatives are messy */
-    if (link->conf.ConfigBase != 0x03c0)
+    if (link->config_base != 0x03c0)
 	return NULL;
 
     outb_p(0x01, ioaddr + EN0_DCFG);	/* Set word-wide access. */
@@ -564,7 +564,7 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
 
 	if ((link->manf_id == MANFID_IBM) &&
 	    (link->card_id == PRODID_IBM_HOME_AND_AWAY))
-		link->conf.ConfigIndex |= 0x10;
+		link->config_index |= 0x10;
 
 	ret = pcmcia_request_configuration(link, &link->conf);
 	if (ret)
@@ -581,7 +581,7 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
 	} else
 		dev->if_port = 0;
 
-	if ((link->conf.ConfigBase == 0x03c0) &&
+	if ((link->config_base == 0x03c0) &&
 	    (link->manf_id == 0x149) && (link->card_id == 0xc1ab)) {
 		dev_info(&link->dev,
 			"this is an AX88190 card - use axnet_cs instead.\n");
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2c2a8788c078..59f5034e8d93 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -411,9 +411,9 @@ static int mhz_3288_power(struct pcmcia_device *link)
     mdelay(200);
 
     /* Now read and write the COR... */
-    tmp = readb(smc->base + link->conf.ConfigBase + CISREG_COR);
+    tmp = readb(smc->base + link->config_base + CISREG_COR);
     udelay(5);
-    writeb(tmp, smc->base + link->conf.ConfigBase + CISREG_COR);
+    writeb(tmp, smc->base + link->config_base + CISREG_COR);
 
     return 0;
 }
@@ -464,7 +464,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
 
     smc->base = ioremap(link->resource[2]->start,
 		    resource_size(link->resource[2]));
-    offset = (smc->manfid == MANFID_MOTOROLA) ? link->conf.ConfigBase : 0;
+    offset = (smc->manfid == MANFID_MOTOROLA) ? link->config_base : 0;
     i = pcmcia_map_mem_page(link, link->resource[2], offset);
     if ((i == 0) &&
 	(smc->manfid == MANFID_MEGAHERTZ) &&
@@ -643,8 +643,8 @@ static int osi_config(struct pcmcia_device *link)
     link->resource[1]->end = 8;
 
     /* Enable Hard Decode, LAN, Modem */
-    link->conf.ConfigIndex = 0x23;
     link->io_lines = 16;
+    link->config_index = 0x23;
 
     for (i = j = 0; j < 4; j++) {
 	link->resource[1]->start = com[j];
@@ -654,7 +654,7 @@ static int osi_config(struct pcmcia_device *link)
     }
     if (i != 0) {
 	/* Fallback: turn off hard decode */
-	link->conf.ConfigIndex = 0x03;
+	link->config_index = 0x03;
 	link->resource[1]->end = 0;
 	i = pcmcia_request_io(link);
     }
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 1776f49e0917..1c8ebf2df0c0 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -530,7 +530,7 @@ xirc2ps_probe(struct pcmcia_device *link)
 
     /* General socket configuration */
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.ConfigIndex = 1;
+    link->config_index = 1;
 
     /* Fill in card specific entries */
     dev->netdev_ops = &netdev_ops;
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index ccb2fdde002c..40f9ed760bb3 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -240,7 +240,7 @@ static int airo_config(struct pcmcia_device *link)
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x: ",
-	       link->conf.ConfigIndex);
+	       link->config_index);
 	if (link->vpp)
 		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	printk(", irq %d", link->irq);
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 0fe6f82cda58..5704d3f9e1b7 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -601,7 +601,7 @@ static int prism2_config(struct pcmcia_device *link)
 
 	/* Finally, report what we've done */
 	printk(KERN_INFO "%s: index 0x%02x: ",
-	       dev_info, link->conf.ConfigIndex);
+	       dev_info, link->config_index);
 	if (link->vpp)
 		printk(", Vpp %d.%d", link->vpp / 10,
 		       link->vpp % 10);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 30cfd8890e34..7fb66cc1e9fd 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -319,7 +319,7 @@ static int ray_probe(struct pcmcia_device *p_dev)
 
 	/* General socket configuration */
 	p_dev->conf.Attributes = CONF_ENABLE_IRQ;
-	p_dev->conf.ConfigIndex = 1;
+	p_dev->config_index = 1;
 
 	p_dev->priv = dev;
 
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 92a9ad575cc7..3947cf8e63c5 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1889,7 +1889,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 
 	/* General socket configuration */
 	p_dev->conf.Attributes	= CONF_ENABLE_IRQ;
-	p_dev->conf.ConfigIndex	= 1;
+	p_dev->config_index	= 1;
 
 	dev = alloc_etherdev(sizeof(struct wl3501_card));
 	if (!dev)
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index afd946e15ee7..8faf634987e3 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -144,7 +144,7 @@ static int parport_config_check(struct pcmcia_device *p_dev,
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
 		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
 		if (epp_mode)
-			p_dev->conf.ConfigIndex |= FORCE_EPP_MODE;
+			p_dev->config_index |= FORCE_EPP_MODE;
 		p_dev->resource[0]->start = io->win[0].base;
 		p_dev->resource[0]->end = io->win[0].len;
 		if (io->nwin == 2) {
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index a0c5adb37b5f..42eef437afd4 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -34,9 +34,6 @@ typedef struct config_t {
 	struct kref	ref;
 	unsigned int	state;
 	unsigned int	Attributes;
-	unsigned int	ConfigBase;
-	unsigned char	Option;
-	unsigned int	CardValues;
 
 	struct resource io[MAX_IO_WIN]; /* io ports */
 	struct resource mem[MAX_WIN];   /* mem areas */
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 55570d9e1e4c..00db60053f93 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -276,13 +276,13 @@ static int pcmcia_device_probe(struct device *dev)
 	ret = pccard_read_tuple(p_dev->socket, p_dev->func, CISTPL_CONFIG,
 				&cis_config);
 	if (!ret) {
-		p_dev->conf.ConfigBase = cis_config.base;
-		p_dev->conf.Present = cis_config.rmask[0];
+		p_dev->config_base = cis_config.base;
+		p_dev->config_regs = cis_config.rmask[0];
 	} else {
 		dev_printk(KERN_INFO, dev,
 			   "pcmcia: could not parse base and rmask0 of CIS\n");
-		p_dev->conf.ConfigBase = 0;
-		p_dev->conf.Present = 0;
+		p_dev->config_base = 0;
+		p_dev->config_regs = 0;
 	}
 
 	ret = p_drv->probe(p_dev);
diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c
index 0ac54da15885..ac47cc4e20e8 100644
--- a/drivers/pcmcia/pcmcia_cis.c
+++ b/drivers/pcmcia/pcmcia_cis.c
@@ -151,7 +151,7 @@ static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv)
 	struct pcmcia_cfg_mem *cfg_mem = priv;
 
 	/* default values */
-	cfg_mem->p_dev->conf.ConfigIndex = cfg->index;
+	cfg_mem->p_dev->config_index = cfg->index;
 	if (cfg->flags & CISTPL_CFTABLE_DEFAULT)
 		cfg_mem->dflt = *cfg;
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 6210e1c2b432..a1fb0dc1a71f 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -168,7 +168,7 @@ static int pcmcia_access_config(struct pcmcia_device *p_dev,
 		return -EACCES;
 	}
 
-	addr = (c->ConfigBase + where) >> 1;
+	addr = (p_dev->config_base + where) >> 1;
 
 	ret = accessf(s, 1, addr, 1, val);
 
@@ -443,6 +443,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	pccard_io_map iomap;
 	unsigned char status = 0;
 	unsigned char ext_status = 0;
+	unsigned char option = 0;
 
 	if (!(s->state & SOCKET_PRESENT))
 		return -ENODEV;
@@ -473,7 +474,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	if (req->Attributes & CONF_ENABLE_SPKR) {
 		s->socket.flags |= SS_SPKR_ENA;
 		status = CCSR_AUDIO_ENA;
-		if (!(req->Present & PRESENT_STATUS))
+		if (!(p_dev->config_regs & PRESENT_STATUS))
 			dev_warn(&p_dev->dev, "speaker requested, but "
 					      "PRESENT_STATUS not set!\n");
 	}
@@ -482,54 +483,53 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	else
 		s->socket.io_irq = 0;
 	if (req->Attributes & CONF_ENABLE_ESR) {
-		req->Present |= PRESENT_EXT_STATUS;
+		p_dev->config_regs |= PRESENT_EXT_STATUS;
 		ext_status = ESR_REQ_ATTN_ENA;
 	}
 	s->ops->set_socket(s, &s->socket);
 	s->lock_count++;
 
 	/* Set up CIS configuration registers */
-	base = c->ConfigBase = req->ConfigBase;
-	c->CardValues = req->Present;
-	if (req->Present & PRESENT_COPY) {
+	base = p_dev->config_base;
+	if (p_dev->config_regs & PRESENT_COPY) {
 		u16 tmp = 0;
 		dev_dbg(&p_dev->dev, "clearing CISREG_SCR\n");
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_SCR)>>1, 1, &tmp);
 	}
-	if (req->Present & PRESENT_PIN_REPLACE) {
+	if (p_dev->config_regs & PRESENT_PIN_REPLACE) {
 		u16 tmp = 0;
 		dev_dbg(&p_dev->dev, "clearing CISREG_PRR\n");
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_PRR)>>1, 1, &tmp);
 	}
-	if (req->Present & PRESENT_OPTION) {
+	if (p_dev->config_regs & PRESENT_OPTION) {
 		if (s->functions == 1) {
-			c->Option = req->ConfigIndex & COR_CONFIG_MASK;
+			option = p_dev->config_index & COR_CONFIG_MASK;
 		} else {
-			c->Option = req->ConfigIndex & COR_MFC_CONFIG_MASK;
-			c->Option |= COR_FUNC_ENA|COR_IREQ_ENA;
-			if (req->Present & PRESENT_IOBASE_0)
-				c->Option |= COR_ADDR_DECODE;
+			option = p_dev->config_index & COR_MFC_CONFIG_MASK;
+			option |= COR_FUNC_ENA|COR_IREQ_ENA;
+			if (p_dev->config_regs & PRESENT_IOBASE_0)
+				option |= COR_ADDR_DECODE;
 		}
 		if ((req->Attributes & CONF_ENABLE_IRQ) &&
 			!(req->Attributes & CONF_ENABLE_PULSE_IRQ))
-			c->Option |= COR_LEVEL_REQ;
-		pcmcia_write_cis_mem(s, 1, (base + CISREG_COR)>>1, 1, &c->Option);
+			option |= COR_LEVEL_REQ;
+		pcmcia_write_cis_mem(s, 1, (base + CISREG_COR)>>1, 1, &option);
 		mdelay(40);
 	}
-	if (req->Present & PRESENT_STATUS)
+	if (p_dev->config_regs & PRESENT_STATUS)
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_CCSR)>>1, 1, &status);
 
-	if (req->Present & PRESENT_EXT_STATUS)
+	if (p_dev->config_regs & PRESENT_EXT_STATUS)
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_ESR)>>1, 1,
 					&ext_status);
 
-	if (req->Present & PRESENT_IOBASE_0) {
+	if (p_dev->config_regs & PRESENT_IOBASE_0) {
 		u8 b = c->io[0].start & 0xff;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOBASE_0)>>1, 1, &b);
 		b = (c->io[0].start >> 8) & 0xff;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOBASE_1)>>1, 1, &b);
 	}
-	if (req->Present & PRESENT_IOSIZE) {
+	if (p_dev->config_regs & PRESENT_IOSIZE) {
 		u8 b = resource_size(&c->io[0]) + resource_size(&c->io[1]) - 1;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_IOSIZE)>>1, 1, &b);
 	}
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 3c0046e89f37..c3682492af16 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -103,7 +103,7 @@ static int aha152x_probe(struct pcmcia_device *link)
     link->resource[0]->end = 0x20;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->conf.Attributes = CONF_ENABLE_IRQ;
-    link->conf.Present = PRESENT_OPTION;
+    link->config_regs = PRESENT_OPTION;
 
     return aha152x_config_cs(link);
 } /* aha152x_attach */
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 8ff760380d88..bb909e1b7c68 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -86,7 +86,7 @@ static int fdomain_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 0x10;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.Present = PRESENT_OPTION;
+	link->config_regs = PRESENT_OPTION;
 
 	return fdomain_config(link);
 } /* fdomain_attach */
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index c0cf2dfd6fbc..a5648e9c4f6e 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1745,7 +1745,7 @@ static int nsp_cs_config(struct pcmcia_device *link)
 
 	/* Finally, report what we've done */
 	printk(KERN_INFO "nsp_cs: index 0x%02x: ",
-	       link->conf.ConfigIndex);
+	       link->config_index);
 	if (link->vpp) {
 		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	}
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 77f46a279d6b..5e2cbe091408 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -159,7 +159,7 @@ static int qlogic_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 16;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.Present = PRESENT_OPTION;
+	link->config_regs = PRESENT_OPTION;
 
 	return qlogic_config(link);
 }				/* qlogic_attach */
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 38baede2a770..ee19f2d25c20 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -495,7 +495,7 @@ static int simple_config(struct pcmcia_device *link)
 
 found_port:
 	if (info->multi && (info->manfid == MANFID_3COM))
-		link->conf.ConfigIndex &= ~(0x08);
+		link->config_index &= ~(0x08);
 
 	/*
 	 * Apply any configuration quirks.
@@ -591,8 +591,8 @@ static int multi_config(struct pcmcia_device *link)
 				info->prodid == PRODID_POSSIO_GCC)) {
 		int err;
 
-		if (link->conf.ConfigIndex == 1 ||
-		    link->conf.ConfigIndex == 3) {
+		if (link->config_index == 1 ||
+		    link->config_index == 3) {
 			err = setup_serial(link, info, base2,
 					link->irq);
 			base2 = link->resource[0]->start;;
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 11271b61f653..034cbfcba196 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -772,7 +772,7 @@ static void das16cs_pcmcia_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %u", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 319aad48ec2e..e37ea79e6aea 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -268,7 +268,7 @@ static void das08_pcmcia_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %u", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index d269bbdf5cfb..8107e4009248 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -607,7 +607,7 @@ static void dio700_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %d", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index fcaa82967b92..4f9daa3558aa 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -358,7 +358,7 @@ static void dio24_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %d", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index a936c11e7dc9..2e27a3048cb8 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -336,7 +336,7 @@ static void labpc_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %d", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index cd818fbd9185..a2a32de044c8 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1137,7 +1137,7 @@ static void daqp_cs_config(struct pcmcia_device *link)
 		goto failed;
 
 	/* Finally, report what we've done */
-	dev_info(&link->dev, "index 0x%02x", link->conf.ConfigIndex);
+	dev_info(&link->dev, "index 0x%02x", link->config_index);
 	if (link->conf.Attributes & CONF_ENABLE_IRQ)
 		printk(", irq %u", link->irq);
 	if (link->resource[0])
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index b2efff692ad5..778800f1e464 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -148,8 +148,8 @@ static int wl_adapter_attach(struct pcmcia_device *link)
 	link->resource[0]->end  = HCF_NUM_IO_PORTS;
 	link->resource[0]->flags= IO_DATA_PATH_WIDTH_16;
 	link->conf.Attributes   = CONF_ENABLE_IRQ;
-	link->conf.ConfigIndex  = 5;
-	link->conf.Present      = PRESENT_OPTION;
+	link->config_index      = 5;
+	link->config_regs       = PRESENT_OPTION;
 
 	link->priv = dev;
 	lp = wl_priv(dev);
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 78bad5188144..590405361bed 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -200,7 +200,7 @@ static int sl811_cs_config(struct pcmcia_device *link)
 		goto failed;
 
 	dev_info(&link->dev, "index 0x%02x: ",
-		link->conf.ConfigIndex);
+		link->config_index);
 	if (link->vpp)
 		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	printk(", irq %d", link->irq);
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
index 674edbc9ebef..47b6092c4ed6 100644
--- a/include/pcmcia/cs.h
+++ b/include/pcmcia/cs.h
@@ -22,9 +22,6 @@
 /* For RequestConfiguration */
 typedef struct config_req_t {
     u_int	Attributes;
-    u_int	ConfigBase;
-    u_char	ConfigIndex;
-    u_int	Present;
 } config_req_t;
 
 /* Attributes for RequestConfiguration */
@@ -35,16 +32,4 @@ typedef struct config_req_t {
 #define CONF_ENABLE_ESR		0x10
 #define CONF_VALID_CLIENT	0x100
 
-/* Configuration registers present */
-#define PRESENT_OPTION		0x001
-#define PRESENT_STATUS		0x002
-#define PRESENT_PIN_REPLACE	0x004
-#define PRESENT_COPY		0x008
-#define PRESENT_EXT_STATUS	0x010
-#define PRESENT_IOBASE_0	0x020
-#define PRESENT_IOBASE_1	0x040
-#define PRESENT_IOBASE_2	0x080
-#define PRESENT_IOBASE_3	0x100
-#define PRESENT_IOSIZE		0x200
-
 #endif /* _LINUX_CS_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 6137fbc34abd..bc28f96d0b5a 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -97,6 +97,9 @@ struct pcmcia_device {
 	unsigned int		vpp;
 
 	unsigned int		io_lines; /* number of I/O lines */
+	unsigned int		config_base;
+	unsigned int		config_index;
+	unsigned int		config_regs;	/* PRESENT_ flags below */
 
 	/* Is the device suspended? */
 	u16			suspended:1;
@@ -250,6 +253,17 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags)
 					0x0c -> 2
 					0x10 -> 3 */
 
+/* config_reg{ister}s present for this PCMCIA device */
+#define PRESENT_OPTION		0x001
+#define PRESENT_STATUS		0x002
+#define PRESENT_PIN_REPLACE	0x004
+#define PRESENT_COPY		0x008
+#define PRESENT_EXT_STATUS	0x010
+#define PRESENT_IOBASE_0	0x020
+#define PRESENT_IOBASE_1	0x040
+#define PRESENT_IOBASE_2	0x080
+#define PRESENT_IOBASE_3	0x100
+#define PRESENT_IOSIZE		0x200
 
 #endif /* __KERNEL__ */
 
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 2e1282de77d4..4df07fce637f 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -143,8 +143,8 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
-	link->conf.ConfigIndex = 1;
-	link->conf.Present = PRESENT_OPTION;
+	link->config_index = 1;
+	link->config_regs = PRESENT_OPTION;
 
 	return pdacf_config(link);
 }
@@ -216,7 +216,7 @@ static int pdacf_config(struct pcmcia_device *link)
 	int ret;
 
 	snd_printdd(KERN_DEBUG "pdacf_config called\n");
-	link->conf.ConfigIndex = 0x5;
+	link->config_index = 0x5;
 
 	ret = pcmcia_request_io(link);
 	if (ret)
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index a48b3ee71377..16186adc1bdc 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -163,8 +163,8 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl,
 	link->resource[0]->end = 16;
 
 	link->conf.Attributes = CONF_ENABLE_IRQ;
-	link->conf.ConfigIndex = 1;
-	link->conf.Present = PRESENT_OPTION;
+	link->config_index = 1;
+	link->config_regs = PRESENT_OPTION;
 
 	*chip_ret = vxp;
 	return 0;
-- 
cgit v1.2.3


From 1ac71e5a35eebee60cdcf15b3980bd94498f037b Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Thu, 29 Jul 2010 19:27:09 +0200
Subject: pcmcia: convert pcmcia_request_configuration to pcmcia_enable_device

pcmcia_enable_device() now replaces pcmcia_request_configuration().
Instead of config_req_t, all necessary flags are either passed as
a parameter to pcmcia_enable_device(), or (in rare circumstances)
set in struct pcmcia_device -> flags.

With the last remaining user of include/pcmcia/cs.h gone, remove
all references.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi> (for drivers/bluetooth)
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt          |  6 ++++
 drivers/ata/pata_pcmcia.c                        |  5 ++--
 drivers/bluetooth/bluecard_cs.c                  |  5 ++--
 drivers/bluetooth/bt3c_cs.c                      |  5 ++--
 drivers/bluetooth/btuart_cs.c                    |  5 ++--
 drivers/bluetooth/dtl1_cs.c                      |  5 ++--
 drivers/char/pcmcia/cm4000_cs.c                  |  3 +-
 drivers/char/pcmcia/cm4040_cs.c                  |  5 ++--
 drivers/char/pcmcia/ipwireless/main.c            |  6 ++--
 drivers/char/pcmcia/ipwireless/main.h            |  1 -
 drivers/char/pcmcia/ipwireless/tty.h             |  1 -
 drivers/char/pcmcia/synclink_cs.c                | 10 ++-----
 drivers/ide/ide-cs.c                             |  6 ++--
 drivers/isdn/hardware/avm/avm_cs.c               |  5 ++--
 drivers/isdn/hisax/avma1_cs.c                    |  5 ++--
 drivers/isdn/hisax/elsa_cs.c                     |  8 ++----
 drivers/isdn/hisax/sedlbauer_cs.c                | 12 +++-----
 drivers/isdn/hisax/teles_cs.c                    |  8 ++----
 drivers/mmc/host/sdricoh_cs.c                    |  1 -
 drivers/mtd/maps/pcmciamtd.c                     |  6 +---
 drivers/net/pcmcia/3c574_cs.c                    |  5 ++--
 drivers/net/pcmcia/3c589_cs.c                    |  5 ++--
 drivers/net/pcmcia/axnet_cs.c                    | 10 +++----
 drivers/net/pcmcia/com20020_cs.c                 |  5 ++--
 drivers/net/pcmcia/fmvj18x_cs.c                  |  5 ++--
 drivers/net/pcmcia/ibmtr_cs.c                    |  5 ++--
 drivers/net/pcmcia/nmclan_cs.c                   |  5 ++--
 drivers/net/pcmcia/pcnet_cs.c                    |  7 ++---
 drivers/net/pcmcia/smc91c92_cs.c                 |  9 +++---
 drivers/net/pcmcia/xirc2ps_cs.c                  | 12 ++++----
 drivers/net/wireless/airo_cs.c                   | 16 ++---------
 drivers/net/wireless/atmel_cs.c                  | 16 ++---------
 drivers/net/wireless/b43/pcmcia.c                |  5 ++--
 drivers/net/wireless/hostap/hostap_cs.c          | 10 +++----
 drivers/net/wireless/libertas/if_cs.c            | 10 ++-----
 drivers/net/wireless/orinoco/orinoco_cs.c        | 12 ++------
 drivers/net/wireless/orinoco/spectrum_cs.c       | 12 ++------
 drivers/net/wireless/ray_cs.c                    |  5 ++--
 drivers/net/wireless/wl3501_cs.c                 |  5 ++--
 drivers/parport/parport_cs.c                     |  5 ++--
 drivers/pcmcia/au1000_generic.h                  |  1 -
 drivers/pcmcia/au1000_pb1x00.c                   |  1 -
 drivers/pcmcia/cistpl.c                          |  1 -
 drivers/pcmcia/cs.c                              |  1 -
 drivers/pcmcia/cs_internal.h                     |  5 ----
 drivers/pcmcia/ds.c                              |  3 +-
 drivers/pcmcia/i82092.c                          |  1 -
 drivers/pcmcia/i82365.c                          |  1 -
 drivers/pcmcia/m32r_cfc.c                        |  1 -
 drivers/pcmcia/m32r_pcc.c                        |  1 -
 drivers/pcmcia/m8xx_pcmcia.c                     |  1 -
 drivers/pcmcia/pcmcia_cis.c                      |  1 -
 drivers/pcmcia/pcmcia_resource.c                 | 33 +++++++++++-----------
 drivers/pcmcia/pd6729.c                          |  1 -
 drivers/pcmcia/rsrc_iodyn.c                      |  1 -
 drivers/pcmcia/rsrc_mgr.c                        |  1 -
 drivers/pcmcia/rsrc_nonstatic.c                  |  1 -
 drivers/pcmcia/sa1100_generic.c                  |  1 -
 drivers/pcmcia/soc_common.h                      |  1 -
 drivers/pcmcia/socket_sysfs.c                    |  1 -
 drivers/pcmcia/tcic.c                            |  1 -
 drivers/pcmcia/xxs1500_ss.c                      |  1 -
 drivers/pcmcia/yenta_socket.c                    |  1 -
 drivers/scsi/pcmcia/aha152x_stub.c               |  5 ++--
 drivers/scsi/pcmcia/fdomain_stub.c               |  5 ++--
 drivers/scsi/pcmcia/nsp_cs.c                     | 13 ++++-----
 drivers/scsi/pcmcia/qlogic_stub.c                |  7 ++---
 drivers/scsi/pcmcia/sym53c500_cs.c               |  5 ++--
 drivers/serial/serial_cs.c                       | 11 ++++----
 drivers/ssb/main.c                               |  1 -
 drivers/ssb/pcmcia.c                             |  1 -
 drivers/ssb/scan.c                               |  1 -
 drivers/staging/comedi/drivers/cb_das16_cs.c     | 11 ++------
 drivers/staging/comedi/drivers/das08_cs.c        | 17 ++----------
 drivers/staging/comedi/drivers/ni_daq_700.c      | 19 +++----------
 drivers/staging/comedi/drivers/ni_daq_dio24.c    | 19 +++----------
 drivers/staging/comedi/drivers/ni_labpc_cs.c     | 20 ++++----------
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  5 ++--
 drivers/staging/comedi/drivers/quatech_daqp_cs.c | 17 ++----------
 drivers/staging/wlags49_h2/wl_cs.c               |  7 ++---
 drivers/staging/wlags49_h2/wl_internal.h         |  1 -
 drivers/telephony/ixj_pcmcia.c                   |  3 +-
 drivers/usb/host/sl811_cs.c                      |  7 ++---
 include/pcmcia/cs.h                              | 35 ------------------------
 include/pcmcia/ds.h                              | 17 ++++++++----
 include/pcmcia/ss.h                              |  1 -
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  5 ++--
 sound/pcmcia/pdaudiocf/pdaudiocf.h               |  1 -
 sound/pcmcia/vx/vxpocket.c                       |  6 ++--
 sound/pcmcia/vx/vxpocket.h                       |  1 -
 90 files changed, 179 insertions(+), 395 deletions(-)
 delete mode 100644 include/pcmcia/cs.h

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 86e0f491f85e..62a029f24f46 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,4 +1,10 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
+* pcmcia_request_configuration -> pcmcia_enable_device (as of 2.6.36)
+   pcmcia_request_configuration() got renamed to pcmcia_enable_device(),
+   as it mirrors pcmcia_disable_device(). Configuration settings are now
+   stored in struct pcmcia_device, e.g. in the fields config_flags,
+   config_index, config_base, vpp.
+
 * pcmcia_request_window changes (as of 2.6.36)
    Instead of win_req_t, drivers are now requested to fill out
    struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four ioport
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index c2679c01188b..76da55d27e08 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -34,7 +34,6 @@
 #include <linux/ata.h>
 #include <linux/libata.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/cisreg.h>
@@ -249,7 +248,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	/* Set up attributes in order to probe card and get resources */
 	pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-	pdev->conf.Attributes = CONF_ENABLE_IRQ;
+	pdev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* See if we have a manufacturer identifier. Use it to set is_kme for
 	   vendor quirks */
@@ -275,7 +274,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	if (!pdev->irq)
 		goto failed;
 
-	ret = pcmcia_request_configuration(pdev, &pdev->conf);
+	ret = pcmcia_enable_device(pdev);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 08f4818ad9f7..c9dd5b789d25 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -39,7 +39,6 @@
 #include <linux/skbuff.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -865,7 +864,7 @@ static int bluecard_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	return bluecard_config(link);
 }
@@ -905,7 +904,7 @@ static int bluecard_config(struct pcmcia_device *link)
 	if (i != 0)
 		goto failed;
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		goto failed;
 
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index fb018073a5f8..3db95887cfd7 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -45,7 +45,6 @@
 #include <linux/device.h>
 #include <linux/firmware.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -660,7 +659,7 @@ static int bt3c_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 8;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	return bt3c_config(link);
 }
@@ -741,7 +740,7 @@ found_port:
 	if (i != 0)
 		goto failed;
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		goto failed;
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 897c7c74ca14..c5c43594ae0e 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -41,7 +41,6 @@
 #include <asm/system.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -589,7 +588,7 @@ static int btuart_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 8;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	return btuart_config(link);
 }
@@ -670,7 +669,7 @@ found_port:
 	if (i != 0)
 		goto failed;
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		goto failed;
 
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index b4c9a2e0a96b..38206df7206b 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -41,7 +41,6 @@
 #include <asm/system.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -575,7 +574,7 @@ static int dtl1_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 8;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	return dtl1_config(link);
 }
@@ -619,7 +618,7 @@ static int dtl1_config(struct pcmcia_device *link)
 	if (i != 0)
 		goto failed;
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		goto failed;
 
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index d2accd64b3f5..75caa8c1b484 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -34,7 +34,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -1767,7 +1766,7 @@ static int cm4000_config(struct pcmcia_device * link, int devno)
 	if (pcmcia_loop_config(link, cm4000_config_check, NULL))
 		goto cs_release;
 
-	if (pcmcia_request_configuration(link, &link->conf))
+	if (pcmcia_enable_device(link))
 		goto cs_release;
 
 	dev = link->priv;
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index a32eba0afdfc..0c87b80bf641 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -29,7 +29,6 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -547,10 +546,10 @@ static int reader_config(struct pcmcia_device *link, int devno)
 	if (pcmcia_loop_config(link, cm4040_config_check, NULL))
 		goto cs_release;
 
-	fail_rc = pcmcia_request_configuration(link, &link->conf);
+	fail_rc = pcmcia_enable_device(link);
 	if (fail_rc != 0) {
 		dev_printk(KERN_INFO, &link->dev,
-			   "pcmcia_request_configuration failed 0x%x\n",
+			   "pcmcia_enable_device failed 0x%x\n",
 			   fail_rc);
 		goto cs_release;
 	}
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 05c4e6834a6b..cd21b2bcdcd7 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -32,7 +32,6 @@
 #include <pcmcia/device_id.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/ds.h>
-#include <pcmcia/cs.h>
 
 static struct pcmcia_device_id ipw_ids[] = {
 	PCMCIA_DEVICE_MANF_CARD(0x02f2, 0x0100),
@@ -172,7 +171,7 @@ static int config_ipwireless(struct ipw_dev *ipw)
 	if (ret != 0)
 		return ret;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	INIT_WORK(&ipw->work_reboot, signalled_reboot_work);
 
@@ -210,8 +209,7 @@ static int config_ipwireless(struct ipw_dev *ipw)
 	 * Do the RequestConfiguration last, because it enables interrupts.
 	 * Then we don't get any interrupts before we're ready for them.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
-
+	ret = pcmcia_enable_device(link);
 	if (ret != 0)
 		goto exit;
 
diff --git a/drivers/char/pcmcia/ipwireless/main.h b/drivers/char/pcmcia/ipwireless/main.h
index 90402195855e..f2cbb116bccb 100644
--- a/drivers/char/pcmcia/ipwireless/main.h
+++ b/drivers/char/pcmcia/ipwireless/main.h
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
diff --git a/drivers/char/pcmcia/ipwireless/tty.h b/drivers/char/pcmcia/ipwireless/tty.h
index 3e163d4cab15..747b2d637860 100644
--- a/drivers/char/pcmcia/ipwireless/tty.h
+++ b/drivers/char/pcmcia/ipwireless/tty.h
@@ -21,7 +21,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 535aa0899e9f..99feaedc53a1 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -70,7 +70,6 @@
 #include <linux/workqueue.h>
 #include <linux/hdlc.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -550,8 +549,6 @@ static int mgslpc_probe(struct pcmcia_device *link)
 
     /* Initialize the struct pcmcia_device structure */
 
-    link->conf.Attributes = 0;
-
     ret = mgslpc_config(link);
     if (ret)
 	    return ret;
@@ -593,14 +590,14 @@ static int mgslpc_config(struct pcmcia_device *link)
     if (ret != 0)
 	    goto failed;
 
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 8;
     link->config_regs = PRESENT_OPTION;
 
     ret = pcmcia_request_irq(link, mgslpc_isr);
     if (ret)
 	    goto failed;
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
@@ -609,8 +606,7 @@ static int mgslpc_config(struct pcmcia_device *link)
 
     dev_info(&link->dev, "index 0x%02x:",
 	    link->config_index);
-    if (link->conf.Attributes & CONF_ENABLE_IRQ)
-	    printk(", irq %d", link->irq);
+    printk(", irq %d", link->irq);
     if (link->resource[0])
 	    printk(", io %pR", link->resource[0]);
     printk("\n");
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 07e37876559c..87ad04925a9f 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -43,7 +43,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/cisreg.h>
@@ -99,7 +98,7 @@ static int ide_probe(struct pcmcia_device *link)
 
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     return ide_config(link);
 } /* ide_attach */
@@ -284,7 +283,8 @@ static int ide_config(struct pcmcia_device *link)
 
     if (!link->irq)
 	    goto failed;
-    ret = pcmcia_request_configuration(link, &link->conf);
+
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 2d8bbbf286aa..6ea5cd28d349 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -20,7 +20,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -79,7 +78,7 @@ static int avmcs_probe(struct pcmcia_device *p_dev)
     p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
     /* General socket configuration */
-    p_dev->conf.Attributes = CONF_ENABLE_IRQ;
+    p_dev->config_flags |= CONF_ENABLE_IRQ;
     p_dev->config_index = 1;
     p_dev->config_regs = PRESENT_OPTION;
 
@@ -149,7 +148,7 @@ static int avmcs_config(struct pcmcia_device *link)
 	/*
          * configure the PCMCIA socket
 	  */
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0) {
 	    pcmcia_disable_device(link);
 	    break;
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index e25f6c7376ed..5dd47ad6ecc9 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -20,7 +20,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include "hisax_cfg.h"
@@ -84,7 +83,7 @@ static int __devinit avma1cs_probe(struct pcmcia_device *p_dev)
     p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_16;
 
     /* General socket configuration */
-    p_dev->conf.Attributes = CONF_ENABLE_IRQ;
+    p_dev->config_flags |= CONF_ENABLE_IRQ;
     p_dev->config_index = 1;
     p_dev->config_regs = PRESENT_OPTION;
 
@@ -160,7 +159,7 @@ static int __devinit avma1cs_config(struct pcmcia_device *link)
 	/*
 	 * configure the PCMCIA socket
 	 */
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0) {
 	    pcmcia_disable_device(link);
 	    break;
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index f276e8428960..368c8a213f0b 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -46,7 +46,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -129,8 +128,6 @@ static int __devinit elsa_cs_probe(struct pcmcia_device *link)
     link->resource[0]->end = 8;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
-    link->conf.Attributes = CONF_ENABLE_IRQ;
-
     return elsa_cs_config(link);
 } /* elsa_cs_attach */
 
@@ -205,15 +202,14 @@ static int __devinit elsa_cs_config(struct pcmcia_device *link)
     if (!link->irq)
 	goto failed;
 
-    i = pcmcia_request_configuration(link, &link->conf);
+    i = pcmcia_enable_device(link);
     if (i != 0)
 	goto failed;
 
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x: ",
 	    link->config_index);
-    if (link->conf.Attributes & CONF_ENABLE_IRQ)
-	printk(", irq %d", link->irq);
+    printk(", irq %d", link->irq);
     if (link->resource[0])
 	printk(" & %pR", link->resource[0]);
     if (link->resource[1])
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 43d0a4e97ead..791e23a75f78 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -46,7 +46,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -132,8 +131,6 @@ static int __devinit sedlbauer_probe(struct pcmcia_device *link)
     link->resource[0]->end = 8;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
-    link->conf.Attributes = 0;
-
     return sedlbauer_config(link);
 } /* sedlbauer_attach */
 
@@ -175,7 +172,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
@@ -192,7 +189,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
 		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -247,7 +244,7 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
        the I/O windows and the interrupt mapping, and putting the
        card and host interface into "Memory and IO" mode.
     */
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
@@ -256,8 +253,7 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
 	   link->config_index);
     if (link->vpp)
 	printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
-    if (link->conf.Attributes & CONF_ENABLE_IRQ)
-	printk(", irq %d", link->irq);
+    printk(", irq %d", link->irq);
     if (link->resource[0])
 	printk(" & %pR", link->resource[0]);
     if (link->resource[1])
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 614afc64b5b1..2ae71e3297ba 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -27,7 +27,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -109,7 +108,7 @@ static int __devinit teles_probe(struct pcmcia_device *link)
     link->resource[0]->end = 96;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     return teles_cs_config(link);
 } /* teles_attach */
@@ -185,15 +184,14 @@ static int __devinit teles_cs_config(struct pcmcia_device *link)
     if (!link->irq)
         goto cs_failed;
 
-    i = pcmcia_request_configuration(link, &link->conf);
+    i = pcmcia_enable_device(link);
     if (i != 0)
       goto cs_failed;
 
     /* Finally, report what we've done */
     dev_info(&link->dev, "index 0x%02x:",
 	    link->config_index);
-    if (link->conf.Attributes & CONF_ENABLE_IRQ)
-	    printk(", irq %d", link->irq);
+    printk(", irq %d", link->irq);
     if (link->resource[0])
 	printk(" & %pR", link->resource[0]);
     if (link->resource[1])
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index 7aa65bb2af4a..7a7273b09d9a 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -30,7 +30,6 @@
 #include <linux/ioport.h>
 #include <linux/scatterlist.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <linux/io.h>
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 99c7257363d6..388db9ecf222 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -16,7 +16,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -568,7 +567,6 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 	dev->pcmcia_map.map_priv_2 = (unsigned long)link->resource[2];
 
 	dev->vpp = (vpp) ? vpp : link->socket->socket.Vpp;
-	link->conf.Attributes = 0;
 	if(setvpp == 2) {
 		link->vpp = dev->vpp;
 	} else {
@@ -577,7 +575,7 @@ static int pcmciamtd_config(struct pcmcia_device *link)
 
 	link->config_index = 0;
 	DEBUG(2, "Setting Configuration");
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret != 0) {
 		if (dev->win_base) {
 			iounmap(dev->win_base);
@@ -718,8 +716,6 @@ static int pcmciamtd_probe(struct pcmcia_device *link)
 	dev->p_dev = link;
 	link->priv = dev;
 
-	link->conf.Attributes = 0;
-
 	return pcmciamtd_config(link);
 }
 
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 4b670b3da378..8abce76367f8 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -87,7 +87,6 @@ earlier 3Com products.
 #include <linux/bitops.h>
 #include <linux/mii.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -280,7 +279,7 @@ static int tc574_probe(struct pcmcia_device *link)
 	spin_lock_init(&lp->window_lock);
 	link->resource[0]->end = 32;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	link->config_index = 1;
 
 	dev->netdev_ops = &el3_netdev_ops;
@@ -351,7 +350,7 @@ static int tc574_config(struct pcmcia_device *link)
 	if (ret)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 6549e2c496a0..34195c407fb2 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -216,7 +215,7 @@ static int tc589_probe(struct pcmcia_device *link)
     link->resource[0]->end = 16;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 1;
 
     dev->netdev_ops = &el3_netdev_ops;
@@ -293,7 +292,7 @@ static int tc589_config(struct pcmcia_device *link)
     if (ret)
 	    goto failed;
 
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 2c273ce6a5d8..f361d2865e34 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -39,7 +39,6 @@
 #include <linux/mii.h>
 #include "../8390.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -166,7 +165,7 @@ static int axnet_probe(struct pcmcia_device *link)
     info = PRIV(dev);
     info->p_dev = link;
     link->priv = dev;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     dev->netdev_ops = &axnet_netdev_ops;
 
@@ -332,11 +331,12 @@ static int axnet_config(struct pcmcia_device *link)
 
     if (!link->irq)
 	    goto failed;
-    
+
+    link->config_flags |= CONF_ENABLE_IRQ;
     if (resource_size(link->resource[1]) == 8)
-	link->conf.Attributes |= CONF_ENABLE_SPKR;
+	link->config_flags |= CONF_ENABLE_SPKR;
     
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index a58eafed42a5..039731bddc27 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -43,7 +43,6 @@
 #include <linux/arcdevice.h>
 #include <linux/com20020.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -160,7 +159,7 @@ static int com20020_probe(struct pcmcia_device *p_dev)
 
     p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     p_dev->resource[0]->end = 16;
-    p_dev->conf.Attributes = CONF_ENABLE_IRQ;
+    p_dev->config_flags |= CONF_ENABLE_IRQ;
 
     info->dev = dev;
     p_dev->priv = info;
@@ -281,7 +280,7 @@ static int com20020_config(struct pcmcia_device *link)
 
     dev->irq = link->irq;
 
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 23f5333d8029..f6865adb126d 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -49,7 +49,6 @@
 #include <linux/ioport.h>
 #include <linux/crc32.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -252,7 +251,7 @@ static int fmvj18x_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
     /* General socket configuration */
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     dev->netdev_ops = &fjn_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
@@ -431,7 +430,7 @@ static int fmvj18x_config(struct pcmcia_device *link)
     ret = pcmcia_request_irq(link, fjn_interrupt);
     if (ret)
 	    goto failed;
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index feedeeb17a52..b298a3d98dc8 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -57,7 +57,6 @@
 #include <linux/trdevice.h>
 #include <linux/ibmtr.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -152,7 +151,7 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
 
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[0]->end = 4;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_regs = PRESENT_OPTION;
 
     info->dev = dev;
@@ -269,7 +268,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link)
 		    resource_size(link->resource[3]));
     ti->sram_phys = link->resource[3]->start;
 
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 98c4a6976045..51bf76de6499 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -146,7 +146,6 @@ Include Files
 #include <linux/ioport.h>
 #include <linux/bitops.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
@@ -460,7 +459,7 @@ static int nmclan_probe(struct pcmcia_device *link)
     spin_lock_init(&lp->bank_lock);
     link->resource[0]->end = 32;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 1;
     link->config_regs = PRESENT_OPTION;
 
@@ -649,7 +648,7 @@ static int nmclan_config(struct pcmcia_device *link)
   ret = pcmcia_request_exclusive_irq(link, mace_interrupt);
   if (ret)
 	  goto failed;
-  ret = pcmcia_request_configuration(link, &link->conf);
+  ret = pcmcia_enable_device(link);
   if (ret)
 	  goto failed;
 
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 68c46751f84f..4a3b6a43550f 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -42,7 +42,6 @@
 #include <linux/mii.h>
 #include "../8390.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -260,7 +259,7 @@ static int pcnet_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = dev;
 
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     dev->netdev_ops = &pcnet_netdev_ops;
 
@@ -560,13 +559,13 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
 		return NULL;
 
 	if (resource_size(link->resource[1]) == 8)
-		link->conf.Attributes |= CONF_ENABLE_SPKR;
+		link->config_flags |= CONF_ENABLE_SPKR;
 
 	if ((link->manf_id == MANFID_IBM) &&
 	    (link->card_id == PRODID_IBM_HOME_AND_AWAY))
 		link->config_index |= 0x10;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		return NULL;
 
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 59f5034e8d93..8c16ba672012 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -44,7 +44,6 @@
 #include <linux/jiffies.h>
 #include <linux/firmware.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -326,7 +325,7 @@ static int smc91c92_probe(struct pcmcia_device *link)
     spin_lock_init(&smc->lock);
     link->resource[0]->end = 16;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
@@ -444,7 +443,7 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     unsigned int offset;
     int i;
 
-    link->conf.Attributes |= CONF_ENABLE_SPKR;
+    link->config_flags |= CONF_ENABLE_SPKR;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->end = 8;
 
@@ -637,7 +636,7 @@ static int osi_config(struct pcmcia_device *link)
     static const unsigned int com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
     int i, j;
 
-    link->conf.Attributes |= CONF_ENABLE_SPKR;
+    link->config_flags |= CONF_ENABLE_SPKR;
     link->resource[0]->end = 64;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->end = 8;
@@ -862,7 +861,7 @@ static int smc91c92_config(struct pcmcia_device *link)
     i = pcmcia_request_irq(link, smc_interrupt);
     if (i)
 	    goto config_failed;
-    i = pcmcia_request_configuration(link, &link->conf);
+    i = pcmcia_enable_device(link);
     if (i)
 	    goto config_failed;
 
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 1c8ebf2df0c0..7a4a99b73d17 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -82,7 +82,6 @@
 #include <linux/bitops.h>
 #include <linux/mii.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -529,7 +528,7 @@ xirc2ps_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     /* General socket configuration */
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 1;
 
     /* Fill in card specific entries */
@@ -811,9 +810,6 @@ xirc2ps_config(struct pcmcia_device * link)
     if (local->modem) {
 	int pass;
 
-	if (do_sound)
-	    link->conf.Attributes |= CONF_ENABLE_SPKR;
-
 	link->resource[1]->end = 8;
 	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	if (local->dingo) {
@@ -863,7 +859,11 @@ xirc2ps_config(struct pcmcia_device * link)
      * This actually configures the PCMCIA socket -- setting up
      * the I/O windows and the interrupt mapping.
      */
-    if ((err=pcmcia_request_configuration(link, &link->conf)))
+    link->config_flags |= CONF_ENABLE_IRQ;
+    if (do_sound)
+	    link->config_flags |= CONF_ENABLE_SPKR;
+
+    if ((err = pcmcia_enable_device(link)))
 	goto config_error;
 
     if (local->dingo) {
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 40f9ed760bb3..5939d0c7a5c8 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -32,7 +32,6 @@
 #include <linux/timer.h>
 #include <linux/netdevice.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -96,15 +95,6 @@ static int airo_probe(struct pcmcia_device *p_dev)
 
 	dev_dbg(&p_dev->dev, "airo_attach()\n");
 
-	/*
-	  General socket configuration defaults can go here.  In this
-	  client, we assume very little, and rely on the CIS for almost
-	  everything.  In most clients, many details (i.e., number, sizes,
-	  and attributes of IO windows) are fixed by the nature of the
-	  device, and can be hard-wired here.
-	*/
-	p_dev->conf.Attributes = 0;
-
 	/* Allocate space for private device-specific data */
 	local = kzalloc(sizeof(local_info_t), GFP_KERNEL);
 	if (!local) {
@@ -158,7 +148,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
@@ -167,7 +157,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev,
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
 		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -229,7 +219,7 @@ static int airo_config(struct pcmcia_device *link)
 	  the I/O windows and the interrupt mapping, and putting the
 	  card and host interface into "Memory and IO" mode.
 	*/
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 	((local_info_t *)link->priv)->eth_dev =
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 8b75158caed7..080266eba985 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -42,7 +42,6 @@
 #include <linux/moduleparam.h>
 #include <linux/device.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -106,15 +105,6 @@ static int atmel_probe(struct pcmcia_device *p_dev)
 
 	dev_dbg(&p_dev->dev, "atmel_attach()\n");
 
-	/*
-	  General socket configuration defaults can go here.  In this
-	  client, we assume very little, and rely on the CIS for almost
-	  everything.  In most clients, many details (i.e., number, sizes,
-	  and attributes of IO windows) are fixed by the nature of the
-	  device, and can be hard-wired here.
-	*/
-	p_dev->conf.Attributes = 0;
-
 	/* Allocate space for private device-specific data */
 	local = kzalloc(sizeof(local_info_t), GFP_KERNEL);
 	if (!local) {
@@ -175,7 +165,7 @@ static int atmel_config_check(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
@@ -184,7 +174,7 @@ static int atmel_config_check(struct pcmcia_device *p_dev,
 	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
 		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -242,7 +232,7 @@ static int atmel_config(struct pcmcia_device *link)
 	  the I/O windows and the interrupt mapping, and putting the
 	  card and host interface into "Memory and IO" mode.
 	*/
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 138b26fcc75c..61abab1f1c7c 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -26,7 +26,6 @@
 #include <linux/ssb/ssb.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -72,7 +71,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 
 	err = -ENODEV;
 
-	dev->conf.Attributes = CONF_ENABLE_IRQ;
+	dev->config_flags |= CONF_ENABLE_IRQ;
 
 	dev->resource[2]->flags |=  WIN_ENABLE | WIN_DATA_WIDTH_16 |
 			 WIN_USE_WAIT;
@@ -89,7 +88,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev)
 	if (!dev->irq)
 		goto err_disable;
 
-	res = pcmcia_request_configuration(dev, &dev->conf);
+	res = pcmcia_enable_device(dev);
 	if (res != 0)
 		goto err_disable;
 
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 5704d3f9e1b7..5b0b5828b3cf 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -12,7 +12,6 @@
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -484,7 +483,7 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
@@ -510,7 +509,7 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	PDEBUG(DEBUG_EXTRA, "IO window settings: cfg->io.nwin=%d "
@@ -590,7 +589,7 @@ static int prism2_config(struct pcmcia_device *link)
 	 * the I/O windows and the interrupt mapping, and putting the
 	 * card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed_unlock;
 
@@ -605,8 +604,7 @@ static int prism2_config(struct pcmcia_device *link)
 	if (link->vpp)
 		printk(", Vpp %d.%d", link->vpp / 10,
 		       link->vpp % 10);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %d", link->irq);
+	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(" & %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 814b7faaa365..6020c19b1bdb 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -28,7 +28,6 @@
 #include <linux/firmware.h>
 #include <linux/netdevice.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -806,7 +805,7 @@ static int if_cs_ioprobe(struct pcmcia_device *p_dev,
 	p_dev->resource[0]->end = cfg->io.win[0].len;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	if (cfg->io.nwin != 1) {
@@ -835,14 +834,11 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
 	card->p_dev = p_dev;
 	p_dev->priv = card;
 
-	p_dev->conf.Attributes = 0;
-
 	if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) {
 		lbs_pr_err("error in pcmcia_loop_config\n");
 		goto out1;
 	}
 
-
 	/*
 	 * Allocate an interrupt line.  Note that this does not assign
 	 * a handler to the interrupt, unless the 'Handler' member of
@@ -865,9 +861,9 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
 	 * the I/O windows and the interrupt mapping, and putting the
 	 * card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(p_dev, &p_dev->conf);
+	ret = pcmcia_enable_device(p_dev);
 	if (ret) {
-		lbs_pr_err("error in pcmcia_request_configuration\n");
+		lbs_pr_err("error in pcmcia_enable_device\n");
 		goto out2;
 	}
 
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 1147d6bd4733..00316a1a1092 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -117,13 +116,6 @@ orinoco_cs_probe(struct pcmcia_device *link)
 	card->p_dev = link;
 	link->priv = priv;
 
-	/* General socket configuration defaults can go here.  In this
-	 * client, we assume very little, and rely on the CIS for
-	 * almost everything.  In most clients, many details (i.e.,
-	 * number, sizes, and attributes of IO windows) are fixed by
-	 * the nature of the device, and can be hard-wired here. */
-	link->conf.Attributes = 0;
-
 	return orinoco_cs_config(link);
 }				/* orinoco_cs_attach */
 
@@ -187,7 +179,7 @@ static int orinoco_cs_config_check(struct pcmcia_device *p_dev,
 			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -266,7 +258,7 @@ orinoco_cs_config(struct pcmcia_device *link)
 	 * the I/O windows and the interrupt mapping, and putting the
 	 * card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 20b08ab87655..ca2c6c0c5576 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -25,7 +25,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -179,13 +178,6 @@ spectrum_cs_probe(struct pcmcia_device *link)
 	card->p_dev = link;
 	link->priv = priv;
 
-	/* General socket configuration defaults can go here.  In this
-	 * client, we assume very little, and rely on the CIS for
-	 * almost everything.  In most clients, many details (i.e.,
-	 * number, sizes, and attributes of IO windows) are fixed by
-	 * the nature of the device, and can be hard-wired here. */
-	link->conf.Attributes = 0;
-
 	return spectrum_cs_config(link);
 }				/* spectrum_cs_attach */
 
@@ -249,7 +241,7 @@ static int spectrum_cs_config_check(struct pcmcia_device *p_dev,
 			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -329,7 +321,7 @@ spectrum_cs_config(struct pcmcia_device *link)
 	 * the I/O windows and the interrupt mapping, and putting the
 	 * card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 7fb66cc1e9fd..1457f34efa9a 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -46,7 +46,6 @@
 #include <linux/ethtool.h>
 #include <linux/ieee80211.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -318,7 +317,7 @@ static int ray_probe(struct pcmcia_device *p_dev)
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
 	/* General socket configuration */
-	p_dev->conf.Attributes = CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 	p_dev->config_index = 1;
 
 	p_dev->priv = dev;
@@ -413,7 +412,7 @@ static int ray_config(struct pcmcia_device *link)
 	/* This actually configures the PCMCIA socket -- setting up
 	   the I/O windows and the interrupt mapping.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 3947cf8e63c5..101b6ffd560e 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -48,7 +48,6 @@
 
 #include <net/iw_handler.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -1888,7 +1887,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
 	p_dev->resource[0]->flags	= IO_DATA_PATH_WIDTH_8;
 
 	/* General socket configuration */
-	p_dev->conf.Attributes	= CONF_ENABLE_IRQ;
+	p_dev->config_flags	= CONF_ENABLE_IRQ;
 	p_dev->config_index	= 1;
 
 	dev = alloc_etherdev(sizeof(struct wl3501_card));
@@ -1954,7 +1953,7 @@ static int wl3501_config(struct pcmcia_device *link)
 	/* This actually configures the PCMCIA socket -- setting up the I/O
 	 * windows and the interrupt mapping.  */
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index 8faf634987e3..63b3d3c18c34 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -48,7 +48,6 @@
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/cisreg.h>
@@ -103,7 +102,7 @@ static int parport_probe(struct pcmcia_device *link)
 
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
 
     return parport_config(link);
 } /* parport_attach */
@@ -172,7 +171,7 @@ static int parport_config(struct pcmcia_device *link)
 
     if (!link->irq)
 	    goto failed;
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/pcmcia/au1000_generic.h b/drivers/pcmcia/au1000_generic.h
index 67530cefcf3c..5c36bda2963b 100644
--- a/drivers/pcmcia/au1000_generic.h
+++ b/drivers/pcmcia/au1000_generic.h
@@ -23,7 +23,6 @@
 
 /* include the world */
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
 #include "cs_internal.h"
diff --git a/drivers/pcmcia/au1000_pb1x00.c b/drivers/pcmcia/au1000_pb1x00.c
index 807f2d75dad3..b2396647a165 100644
--- a/drivers/pcmcia/au1000_pb1x00.c
+++ b/drivers/pcmcia/au1000_pb1x00.c
@@ -31,7 +31,6 @@
 #include <linux/proc_fs.h>
 #include <linux/types.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
 
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 91414a0ddc44..884a984216fe 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -28,7 +28,6 @@
 #include <asm/unaligned.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/cistpl.h>
 #include "cs_internal.h"
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index 2ec8ac97445c..d8189d4061fc 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -33,7 +33,6 @@
 #include <asm/irq.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h
index 42eef437afd4..7f1953f78b12 100644
--- a/drivers/pcmcia/cs_internal.h
+++ b/drivers/pcmcia/cs_internal.h
@@ -33,14 +33,9 @@
 typedef struct config_t {
 	struct kref	ref;
 	unsigned int	state;
-	unsigned int	Attributes;
 
 	struct resource io[MAX_IO_WIN]; /* io ports */
 	struct resource mem[MAX_WIN];   /* mem areas */
-
-	struct {
-		u_int	Attributes;
-	} irq;
 } config_t;
 
 
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 00db60053f93..dd43bd33a9e3 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -26,7 +26,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/ss.h>
@@ -1178,7 +1177,7 @@ static int pcmcia_dev_resume(struct device *dev)
 
 	if (p_dev->device_no == p_dev->func) {
 		dev_dbg(dev, "requesting configuration\n");
-		ret = pcmcia_request_configuration(p_dev, &p_dev->conf);
+		ret = pcmcia_enable_device(p_dev);
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index 05d0879ce935..fc7906eaf228 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -16,7 +16,6 @@
 #include <linux/device.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 61746bd598b3..72a033a2acdb 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -51,7 +51,6 @@
 #include <asm/system.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 #include <linux/isapnp.h>
 
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 24de49925863..2adb0106a039 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -27,7 +27,6 @@
 #include <asm/system.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 #undef MAX_IO_WIN	/* FIXME */
 #define MAX_IO_WIN 1
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 8e4723844ad3..1511ff71c87b 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -28,7 +28,6 @@
 #include <asm/addrspace.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 /* XXX: should be moved into asm/irq.h */
 #define PCC0_IRQ 24
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index f0ecad99ce81..99d4f23cb435 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -59,7 +59,6 @@
 #include <asm/irq.h>
 #include <asm/fs_pd.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 
 #define pcmcia_info(args...) printk(KERN_INFO "m8xx_pcmcia: "args)
diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c
index ac47cc4e20e8..ce8b94a3b675 100644
--- a/drivers/pcmcia/pcmcia_cis.c
+++ b/drivers/pcmcia/pcmcia_cis.c
@@ -22,7 +22,6 @@
 #include <pcmcia/cisreg.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/ds.h>
 #include "cs_internal.h"
 
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index a1fb0dc1a71f..28de5e6e164c 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -26,7 +26,6 @@
 #include <asm/irq.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -230,7 +229,7 @@ EXPORT_SYMBOL(pcmcia_map_mem_page);
  * pcmcia_fixup_iowidth() - reduce io width to 8bit
  *
  * pcmcia_fixup_iowidth() allows a PCMCIA device driver to reduce the
- * IO width to 8bit after having called pcmcia_request_configuration()
+ * IO width to 8bit after having called pcmcia_enable_device()
  * previously.
  */
 int pcmcia_fixup_iowidth(struct pcmcia_device *p_dev)
@@ -278,7 +277,7 @@ EXPORT_SYMBOL(pcmcia_fixup_iowidth);
  * pcmcia_fixup_vpp() - set Vpp to a new voltage level
  *
  * pcmcia_fixup_vpp() allows a PCMCIA device driver to set Vpp to
- * a new voltage level between calls to pcmcia_request_configuration()
+ * a new voltage level between calls to pcmcia_enable_device()
  * and pcmcia_disable_device().
  */
 int pcmcia_fixup_vpp(struct pcmcia_device *p_dev, unsigned char new_vpp)
@@ -432,18 +431,21 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res)
 } /* pcmcia_release_window */
 EXPORT_SYMBOL(pcmcia_release_window);
 
-
-int pcmcia_request_configuration(struct pcmcia_device *p_dev,
-				 config_req_t *req)
+/**
+ * pcmcia_enable_device() - set up and activate a PCMCIA device
+ *
+ */
+int pcmcia_enable_device(struct pcmcia_device *p_dev)
 {
 	int i;
-	u_int base;
+	unsigned int base;
 	struct pcmcia_socket *s = p_dev->socket;
 	config_t *c;
 	pccard_io_map iomap;
 	unsigned char status = 0;
 	unsigned char ext_status = 0;
 	unsigned char option = 0;
+	unsigned int flags = p_dev->config_flags;
 
 	if (!(s->state & SOCKET_PRESENT))
 		return -ENODEV;
@@ -466,23 +468,20 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	}
 
 	/* Pick memory or I/O card, DMA mode, interrupt */
-	c->Attributes = req->Attributes;
 	if (p_dev->_io)
 		s->socket.flags |= SS_IOCARD;
-	if (req->Attributes & CONF_ENABLE_DMA)
-		s->socket.flags |= SS_DMA_MODE;
-	if (req->Attributes & CONF_ENABLE_SPKR) {
+	if (flags & CONF_ENABLE_SPKR) {
 		s->socket.flags |= SS_SPKR_ENA;
 		status = CCSR_AUDIO_ENA;
 		if (!(p_dev->config_regs & PRESENT_STATUS))
 			dev_warn(&p_dev->dev, "speaker requested, but "
 					      "PRESENT_STATUS not set!\n");
 	}
-	if (req->Attributes & CONF_ENABLE_IRQ)
+	if (flags & CONF_ENABLE_IRQ)
 		s->socket.io_irq = s->pcmcia_irq;
 	else
 		s->socket.io_irq = 0;
-	if (req->Attributes & CONF_ENABLE_ESR) {
+	if (flags & CONF_ENABLE_ESR) {
 		p_dev->config_regs |= PRESENT_EXT_STATUS;
 		ext_status = ESR_REQ_ATTN_ENA;
 	}
@@ -510,8 +509,8 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 			if (p_dev->config_regs & PRESENT_IOBASE_0)
 				option |= COR_ADDR_DECODE;
 		}
-		if ((req->Attributes & CONF_ENABLE_IRQ) &&
-			!(req->Attributes & CONF_ENABLE_PULSE_IRQ))
+		if ((flags & CONF_ENABLE_IRQ) &&
+			!(flags & CONF_ENABLE_PULSE_IRQ))
 			option |= COR_LEVEL_REQ;
 		pcmcia_write_cis_mem(s, 1, (base + CISREG_COR)>>1, 1, &option);
 		mdelay(40);
@@ -560,8 +559,8 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev,
 	p_dev->_locked = 1;
 	mutex_unlock(&s->ops_mutex);
 	return 0;
-} /* pcmcia_request_configuration */
-EXPORT_SYMBOL(pcmcia_request_configuration);
+} /* pcmcia_enable_device */
+EXPORT_SYMBOL(pcmcia_enable_device);
 
 
 /**
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index deef6656ab7b..8cbfa067171f 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -18,7 +18,6 @@
 #include <linux/io.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 #include <asm/system.h>
 
diff --git a/drivers/pcmcia/rsrc_iodyn.c b/drivers/pcmcia/rsrc_iodyn.c
index 8510c35d2952..523eb691c30b 100644
--- a/drivers/pcmcia/rsrc_iodyn.c
+++ b/drivers/pcmcia/rsrc_iodyn.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include "cs_internal.h"
 
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index 4e80421fd908..aa628ed0e9f4 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include "cs_internal.h"
 
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 96f348b35fde..b187555d4388 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -29,7 +29,6 @@
 #include <asm/irq.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include "cs_internal.h"
 
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index e09851480295..945857f8c284 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -35,7 +35,6 @@
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 
 #include <asm/hardware/scoop.h>
diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h
index 3fba3a679128..bbcd5385a221 100644
--- a/drivers/pcmcia/soc_common.h
+++ b/drivers/pcmcia/soc_common.h
@@ -11,7 +11,6 @@
 
 /* include the world */
 #include <linux/cpufreq.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
 
diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c
index cb0d3ace18bd..71aeed93037c 100644
--- a/drivers/pcmcia/socket_sysfs.c
+++ b/drivers/pcmcia/socket_sysfs.c
@@ -27,7 +27,6 @@
 #include <asm/irq.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index be0d841c7ebd..310160bffe38 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -49,7 +49,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include "tcic.h"
 
diff --git a/drivers/pcmcia/xxs1500_ss.c b/drivers/pcmcia/xxs1500_ss.c
index fa88c360c37a..3b67a1b6a197 100644
--- a/drivers/pcmcia/xxs1500_ss.c
+++ b/drivers/pcmcia/xxs1500_ss.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/ss.h>
 #include <pcmcia/cistpl.h>
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 414d9a6f9a32..408dbaa080a1 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -20,7 +20,6 @@
 #include <linux/slab.h>
 
 #include <pcmcia/ss.h>
-#include <pcmcia/cs.h>
 
 #include "yenta_socket.h"
 #include "i82365.h"
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index c3682492af16..e1f748517135 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -49,7 +49,6 @@
 #include <scsi/scsi_host.h>
 #include "aha152x.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -102,7 +101,7 @@ static int aha152x_probe(struct pcmcia_device *link)
 
     link->resource[0]->end = 0x20;
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->conf.Attributes = CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_regs = PRESENT_OPTION;
 
     return aha152x_config_cs(link);
@@ -159,7 +158,7 @@ static int aha152x_config_cs(struct pcmcia_device *link)
     if (!link->irq)
 	    goto failed;
 
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
     
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index bb909e1b7c68..ae263b17bfa5 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -46,7 +46,6 @@
 #include <scsi/scsi_host.h>
 #include "fdomain.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -85,7 +84,7 @@ static int fdomain_probe(struct pcmcia_device *link)
 	link->priv = info;
 	link->resource[0]->end = 0x10;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	link->config_regs = PRESENT_OPTION;
 
 	return fdomain_config(link);
@@ -131,7 +130,7 @@ static int fdomain_config(struct pcmcia_device *link)
 
     if (!link->irq)
 	    goto failed;
-    ret = pcmcia_request_configuration(link, &link->conf);
+    ret = pcmcia_enable_device(link);
     if (ret)
 	    goto failed;
 
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index a5648e9c4f6e..d0546c03f57c 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -47,7 +47,6 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -1562,7 +1561,7 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 	link->resource[0]->flags = IO_DATA_PATH_WIDTH_AUTO;
 
 	/* General socket configuration */
-	link->conf.Attributes	 = CONF_ENABLE_IRQ;
+	link->config_flags	 |= CONF_ENABLE_IRQ;
 
 	ret = nsp_cs_config(link);
 
@@ -1608,7 +1607,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Use power settings for Vcc and Vpp if present */
 	/*  Note that the CIS values need to be rescaled */
@@ -1629,7 +1628,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 		}
 
 		/* Do we need to allocate an interrupt? */
-		p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+		p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 		/* IO window settings */
 		p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -1700,7 +1699,7 @@ static int nsp_cs_config(struct pcmcia_device *link)
 	if (pcmcia_request_irq(link, nspintr))
 		goto cs_failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto cs_failed;
 
@@ -1749,9 +1748,7 @@ static int nsp_cs_config(struct pcmcia_device *link)
 	if (link->vpp) {
 		printk(", Vpp %d.%d", link->vpp/10, link->vpp%10);
 	}
-	if (link->conf.Attributes & CONF_ENABLE_IRQ) {
-		printk(", irq %d", link->irq);
-	}
+	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(", io %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 5e2cbe091408..7d3f49c431fd 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -48,7 +48,6 @@
 #include <scsi/scsi_host.h>
 #include "../qlogicfas408.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/ciscode.h>
@@ -158,7 +157,7 @@ static int qlogic_probe(struct pcmcia_device *link)
 	link->priv = info;
 	link->resource[0]->end = 16;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	link->config_regs = PRESENT_OPTION;
 
 	return qlogic_config(link);
@@ -208,7 +207,7 @@ static int qlogic_config(struct pcmcia_device * link)
 	if (!link->irq)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
@@ -263,7 +262,7 @@ static int qlogic_resume(struct pcmcia_device *link)
 {
 	scsi_info_t *info = link->priv;
 
-	pcmcia_request_configuration(link, &link->conf);
+	pcmcia_enable_device(link);
 	if ((info->manf_id == MANFID_MACNICA) ||
 	    (info->manf_id == MANFID_PIONEER) ||
 	    (info->manf_id == 0x0098)) {
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 9aaf974d4d1c..600630eb7034 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -71,7 +71,6 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 #include <pcmcia/ciscode.h>
@@ -721,7 +720,7 @@ SYM53C500_config(struct pcmcia_device *link)
 	if (!link->irq)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
@@ -861,7 +860,7 @@ SYM53C500_probe(struct pcmcia_device *link)
 	link->priv = info;
 	link->resource[0]->end = 16;
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	return SYM53C500_config(link);
 } /* SYM53C500_attach */
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index ee19f2d25c20..47b1869026e7 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -45,7 +45,6 @@
 #include <asm/io.h>
 #include <asm/system.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
@@ -184,7 +183,7 @@ static void quirk_config_socket(struct pcmcia_device *link)
 	struct serial_info *info = link->priv;
 
 	if (info->multi)
-		link->conf.Attributes |= CONF_ENABLE_ESR;
+		link->config_flags |= CONF_ENABLE_ESR;
 }
 
 static const struct serial_quirk quirks[] = {
@@ -333,9 +332,9 @@ static int serial_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	if (do_sound)
-		link->conf.Attributes |= CONF_ENABLE_SPKR;
+		link->config_flags |= CONF_ENABLE_SPKR;
 
 	return serial_config(link);
 }
@@ -503,7 +502,7 @@ found_port:
 	if (info->quirk && info->quirk->config)
 		info->quirk->config(link);
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		return -1;
 	return setup_serial(link, info, link->resource[0]->start, link->irq);
@@ -579,7 +578,7 @@ static int multi_config(struct pcmcia_device *link)
 	if (info->quirk && info->quirk->config)
 		info->quirk->config(link);
 
-	i = pcmcia_request_configuration(link, &link->conf);
+	i = pcmcia_enable_device(link);
 	if (i != 0)
 		return -ENODEV;
 
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 7892ac163522..c68b3dc19e11 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -20,7 +20,6 @@
 #include <linux/mmc/sdio_func.h>
 #include <linux/slab.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index 526682d68de8..c7345dbf43fa 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/etherdevice.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ciscode.h>
 #include <pcmcia/ds.h>
diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 9738cad4ba13..ee079ab9fb28 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -17,7 +17,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 034cbfcba196..ee91c89511ed 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -37,7 +37,6 @@ Status: experimental
 #include <linux/delay.h>
 #include <linux/pci.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -692,9 +691,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/* Initialize the pcmcia_device structure */
-	link->conf.Attributes = 0;
-
 	cur_dev = link;
 
 	das16cs_pcmcia_config(link);
@@ -723,7 +719,7 @@ static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -EINVAL;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -767,14 +763,13 @@ static void das16cs_pcmcia_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %u", link->irq);
+	printk(", irq %u", link->irq);
 	if (link->resource[0])
 		printk(", io %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index e37ea79e6aea..f8f3de57b413 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -48,7 +48,6 @@ Command support does not exist, but could be added for this board.
 #include "das08.h"
 
 /* pcmcia includes */
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -162,15 +161,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/*
-	   General socket configuration defaults can go here.  In this
-	   client, we assume very little, and rely on the CIS for almost
-	   everything.  In most clients, many details (i.e., number, sizes,
-	   and attributes of IO windows) are fixed by the nature of the
-	   device, and can be hard-wired here.
-	 */
-	link->conf.Attributes = 0;
-
 	cur_dev = link;
 
 	das08_pcmcia_config(link);
@@ -211,7 +201,7 @@ static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -263,14 +253,13 @@ static void das08_pcmcia_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %u", link->irq);
+	printk(", irq %u", link->irq);
 	if (link->resource[0])
 		printk(", io %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 8107e4009248..803683b83543 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -47,7 +47,6 @@ IRQ is assigned but not used.
 
 #include <linux/ioport.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -491,15 +490,6 @@ static int dio700_cs_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/*
-	   General socket configuration defaults can go here.  In this
-	   client, we assume very little, and rely on the CIS for almost
-	   everything.  In most clients, many details (i.e., number, sizes,
-	   and attributes of IO windows) are fixed by the nature of the
-	   device, and can be hard-wired here.
-	 */
-	link->conf.Attributes = 0;
-
 	pcmcia_cur_dev = link;
 
 	dio700_config(link);
@@ -550,10 +540,10 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -602,14 +592,13 @@ static void dio700_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret != 0)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %d", link->irq);
+	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(", io %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 4f9daa3558aa..6512f7a283ce 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -48,7 +48,6 @@ the PCMCIA interface.
 
 #include "8255.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -243,15 +242,6 @@ static int dio24_cs_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/*
-	   General socket configuration defaults can go here.  In this
-	   client, we assume very little, and rely on the CIS for almost
-	   everything.  In most clients, many details (i.e., number, sizes,
-	   and attributes of IO windows) are fixed by the nature of the
-	   device, and can be hard-wired here.
-	 */
-	link->conf.Attributes = 0;
-
 	pcmcia_cur_dev = link;
 
 	dio24_config(link);
@@ -302,10 +292,10 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -353,14 +343,13 @@ static void dio24_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %d", link->irq);
+	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(" & %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 2e27a3048cb8..255cf40c5fd1 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -71,7 +71,6 @@ NI manuals:
 #include "comedi_fc.h"
 #include "ni_labpc.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -219,15 +218,6 @@ static int labpc_cs_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/*
-	   General socket configuration defaults can go here.  In this
-	   client, we assume very little, and rely on the CIS for almost
-	   everything.  In most clients, many details (i.e., number, sizes,
-	   and attributes of IO windows) are fixed by the nature of the
-	   device, and can be hard-wired here.
-	 */
-	link->conf.Attributes = 0;
-
 	pcmcia_cur_dev = link;
 
 	labpc_config(link);
@@ -281,10 +271,10 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 
 	/* Does this card need audio output? */
 	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->conf.Attributes |= CONF_ENABLE_SPKR;
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -331,14 +321,14 @@ static void labpc_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %d", link->irq);
+	printk(", irq %d", link->irq);
 	if (link->resource[0])
 		printk(" & %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index be7e021e576d..b88f52dd08d0 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -48,7 +48,6 @@ See the notes in the ni_atmio.o driver.
 #include "ni_stc.h"
 #include "8255.h"
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -265,7 +264,7 @@ static int cs_attach(struct pcmcia_device *link)
 {
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	link->resource[0]->end = 16;
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 
 	cur_dev = link;
 
@@ -336,7 +335,7 @@ static void mio_cs_config(struct pcmcia_device *link)
 	if (!link->irq)
 		dev_info(&link->dev, "no IRQ available\n");
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 }
 
 static int mio_cs_attach(struct comedi_device *dev, struct comedi_devconfig *it)
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index a2a32de044c8..b8940d7f4155 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -50,7 +50,6 @@ Devices: [Quatech] DAQP-208 (daqp), DAQP-308
 #include "../comedidev.h"
 #include <linux/semaphore.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -1031,15 +1030,6 @@ static int daqp_cs_attach(struct pcmcia_device *link)
 	local->link = link;
 	link->priv = local;
 
-	/*
-	   General socket configuration defaults can go here.  In this
-	   client, we assume very little, and rely on the CIS for almost
-	   everything.  In most clients, many details (i.e., number, sizes,
-	   and attributes of IO windows) are fixed by the nature of the
-	   device, and can be hard-wired here.
-	 */
-	link->conf.Attributes = 0;
-
 	daqp_cs_config(link);
 
 	return 0;
@@ -1088,7 +1078,7 @@ static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev,
 		return -ENODEV;
 
 	/* Do we need to allocate an interrupt? */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -1132,14 +1122,13 @@ static void daqp_cs_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
 	/* Finally, report what we've done */
 	dev_info(&link->dev, "index 0x%02x", link->config_index);
-	if (link->conf.Attributes & CONF_ENABLE_IRQ)
-		printk(", irq %u", link->irq);
+	printk(", irq %u", link->irq);
 	if (link->resource[0])
 		printk(" & %pR", link->resource[0]);
 	if (link->resource[1])
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 778800f1e464..62a70afa3e29 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -83,7 +83,6 @@
 #include <linux/if_arp.h>
 #include <linux/ioport.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
@@ -147,7 +146,7 @@ static int wl_adapter_attach(struct pcmcia_device *link)
 
 	link->resource[0]->end  = HCF_NUM_IO_PORTS;
 	link->resource[0]->flags= IO_DATA_PATH_WIDTH_16;
-	link->conf.Attributes   = CONF_ENABLE_IRQ;
+	link->config_flags     |= CONF_ENABLE_IRQ;
 	link->config_index      = 5;
 	link->config_regs       = PRESENT_OPTION;
 
@@ -301,7 +300,7 @@ void wl_adapter_insert(struct pcmcia_device *link)
 	dev     = link->priv;
 
 	/* Do we need to allocate an interrupt? */
-	link->conf.Attributes |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	link->io_lines = 6;
 
 	ret = pcmcia_request_io(link);
@@ -312,7 +311,7 @@ void wl_adapter_insert(struct pcmcia_device *link)
 	if (ret != 0)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret != 0)
 		goto failed;
 
diff --git a/drivers/staging/wlags49_h2/wl_internal.h b/drivers/staging/wlags49_h2/wl_internal.h
index 02f0a20e178a..cd129b3ee6c0 100644
--- a/drivers/staging/wlags49_h2/wl_internal.h
+++ b/drivers/staging/wlags49_h2/wl_internal.h
@@ -69,7 +69,6 @@
  ******************************************************************************/
 #include <linux/version.h>
 #ifdef BUS_PCMCIA
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ciscode.h>
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index 5ccc6d0560a9..e23270d1b4d1 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -8,7 +8,6 @@
 #include <linux/errno.h>	/* error codes */
 #include <linux/slab.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
@@ -143,7 +142,7 @@ static int ixj_config(struct pcmcia_device * link)
 	if (pcmcia_loop_config(link, ixj_config_check, &dflt))
 		goto failed;
 
-	if (pcmcia_request_configuration(link, &link->conf))
+	if (pcmcia_enable_device(link))
 		goto failed;
 
 	/*
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 590405361bed..744c2cd809f1 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -20,7 +20,6 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/cisreg.h>
 #include <pcmcia/ds.h>
@@ -159,7 +158,7 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev,
 			dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
 
 	/* we need an interrupt */
-	p_dev->conf.Attributes |= CONF_ENABLE_IRQ;
+	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
@@ -195,7 +194,7 @@ static int sl811_cs_config(struct pcmcia_device *link)
 	if (!link->irq)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
@@ -227,8 +226,6 @@ static int sl811_cs_probe(struct pcmcia_device *link)
 	local->p_dev = link;
 	link->priv = local;
 
-	link->conf.Attributes = 0;
-
 	return sl811_cs_config(link);
 }
 
diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h
deleted file mode 100644
index 47b6092c4ed6..000000000000
--- a/include/pcmcia/cs.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * cs.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * The initial developer of the original code is David A. Hinds
- * <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- * (C) 1999             David A. Hinds
- */
-
-#ifndef _LINUX_CS_H
-#define _LINUX_CS_H
-
-#ifdef __KERNEL__
-#include <linux/interrupt.h>
-#endif
-
-/* For RequestConfiguration */
-typedef struct config_req_t {
-    u_int	Attributes;
-} config_req_t;
-
-/* Attributes for RequestConfiguration */
-#define CONF_ENABLE_IRQ		0x01
-#define CONF_ENABLE_DMA		0x02
-#define CONF_ENABLE_SPKR	0x04
-#define CONF_ENABLE_PULSE_IRQ	0x08
-#define CONF_ENABLE_ESR		0x10
-#define CONF_VALID_CLIENT	0x100
-
-#endif /* _LINUX_CS_H */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index bc28f96d0b5a..50b03fd67fd6 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -24,9 +24,11 @@
 
 #ifdef __KERNEL__
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <pcmcia/ss.h>
 #include <asm/atomic.h>
 
+
 /*
  * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus
  * a.k.a. PCI drivers
@@ -88,18 +90,16 @@ struct pcmcia_device {
 
 	struct list_head	socket_device_list;
 
-	/* deprecated, will be cleaned up soon */
-	config_req_t		conf;
-
 	/* device setup */
 	unsigned int		irq;
 	struct resource		*resource[PCMCIA_NUM_RESOURCES];
 	unsigned int		vpp;
 
-	unsigned int		io_lines; /* number of I/O lines */
+	unsigned int		config_flags;	/* CONF_ENABLE_ flags below */
 	unsigned int		config_base;
 	unsigned int		config_index;
 	unsigned int		config_regs;	/* PRESENT_ flags below */
+	unsigned int		io_lines;	/* number of I/O lines */
 
 	/* Is the device suspended? */
 	u16			suspended:1;
@@ -207,8 +207,7 @@ pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
 int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
 				irq_handler_t handler);
 
-int pcmcia_request_configuration(struct pcmcia_device *p_dev,
-				 config_req_t *req);
+int pcmcia_enable_device(struct pcmcia_device *p_dev);
 
 int pcmcia_request_window(struct pcmcia_device *p_dev, struct resource *res,
 			unsigned int speed);
@@ -265,6 +264,12 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags)
 #define PRESENT_IOBASE_3	0x100
 #define PRESENT_IOSIZE		0x200
 
+/* flags to be passed to pcmcia_enable_device() */
+#define CONF_ENABLE_IRQ         0x01
+#define CONF_ENABLE_SPKR        0x02
+#define CONF_ENABLE_PULSE_IRQ   0x04
+#define CONF_ENABLE_ESR         0x08
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_DS_H */
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 626b63c33d9e..731cde010f42 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -19,7 +19,6 @@
 #include <linux/sched.h>	/* task_struct, completion */
 #include <linux/mutex.h>
 
-#include <pcmcia/cs.h>
 #ifdef CONFIG_CARDBUS
 #include <linux/pci.h>
 #endif
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 4df07fce637f..2476d5f0a14f 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -142,7 +142,7 @@ static int snd_pdacf_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->resource[0]->end = 16;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
+	link->config_flags = CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 	link->config_index = 1;
 	link->config_regs = PRESENT_OPTION;
 
@@ -217,6 +217,7 @@ static int pdacf_config(struct pcmcia_device *link)
 
 	snd_printdd(KERN_DEBUG "pdacf_config called\n");
 	link->config_index = 0x5;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 
 	ret = pcmcia_request_io(link);
 	if (ret)
@@ -226,7 +227,7 @@ static int pdacf_config(struct pcmcia_device *link)
 	if (ret)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.h b/sound/pcmcia/pdaudiocf/pdaudiocf.h
index 5cc3e4573074..bd26e092aead 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.h
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.h
@@ -24,7 +24,6 @@
 #include <sound/pcm.h>
 #include <asm/io.h>
 #include <linux/interrupt.h>
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index 16186adc1bdc..017a8d6c510d 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -2,7 +2,7 @@
  * Driver for Digigram VXpocket V2/440 soundcards
  *
  * Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de>
- *
+
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation; either version 2 of the License, or
@@ -162,7 +162,7 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl,
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	link->resource[0]->end = 16;
 
-	link->conf.Attributes = CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ;
 	link->config_index = 1;
 	link->config_regs = PRESENT_OPTION;
 
@@ -233,7 +233,7 @@ static int vxpocket_config(struct pcmcia_device *link)
 	if (ret)
 		goto failed;
 
-	ret = pcmcia_request_configuration(link, &link->conf);
+	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
 
diff --git a/sound/pcmcia/vx/vxpocket.h b/sound/pcmcia/vx/vxpocket.h
index d9110669d042..13d658c1a216 100644
--- a/sound/pcmcia/vx/vxpocket.h
+++ b/sound/pcmcia/vx/vxpocket.h
@@ -23,7 +23,6 @@
 
 #include <sound/vx_core.h>
 
-#include <pcmcia/cs.h>
 #include <pcmcia/cistpl.h>
 #include <pcmcia/ds.h>
 
-- 
cgit v1.2.3


From 440eed43e2a95bb842488755683716814da10f2b Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 30 Jul 2010 09:51:52 +0200
Subject: pcmcia: introduce autoconfiguration feature

Introduce an autoconfiguration feature to set certain values in
pcmcia_loop_config(), instead of copying the same code over and over
in each PCMCIA driver. At first, introduce the following options:

CONF_AUTO_CHECK_VCC	check or matching Vcc entry
CONF_AUTO_SET_VPP	set Vpp
CONF_AUTO_AUDIO		enable the speaker line

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi> (for drivers/bluetooth)
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/ata/pata_pcmcia.c                        |  23 +----
 drivers/bluetooth/bt3c_cs.c                      |   7 +-
 drivers/bluetooth/btuart_cs.c                    |   7 +-
 drivers/bluetooth/dtl1_cs.c                      |   1 -
 drivers/char/pcmcia/cm4000_cs.c                  |   1 -
 drivers/char/pcmcia/cm4040_cs.c                  |   1 -
 drivers/char/pcmcia/ipwireless/main.c            |   1 -
 drivers/char/pcmcia/synclink_cs.c                |   1 -
 drivers/ide/ide-cs.c                             |  23 +----
 drivers/isdn/hardware/avm/avm_cs.c               |   1 -
 drivers/isdn/hisax/avma1_cs.c                    |   1 -
 drivers/isdn/hisax/elsa_cs.c                     |   1 -
 drivers/isdn/hisax/sedlbauer_cs.c                |  25 +-----
 drivers/isdn/hisax/teles_cs.c                    |   1 -
 drivers/net/pcmcia/axnet_cs.c                    |   1 -
 drivers/net/pcmcia/fmvj18x_cs.c                  |   1 -
 drivers/net/pcmcia/pcnet_cs.c                    |   1 -
 drivers/net/pcmcia/smc91c92_cs.c                 |   2 -
 drivers/net/pcmcia/xirc2ps_cs.c                  |   5 +-
 drivers/net/wireless/airo_cs.c                   |  17 +---
 drivers/net/wireless/atmel_cs.c                  |  17 +---
 drivers/net/wireless/hostap/hostap_cs.c          |  27 +-----
 drivers/net/wireless/libertas/if_cs.c            |   1 -
 drivers/net/wireless/orinoco/orinoco_cs.c        |  31 +------
 drivers/net/wireless/orinoco/spectrum_cs.c       |  24 +----
 drivers/parport/parport_cs.c                     |   1 -
 drivers/pcmcia/pcmcia_cis.c                      |  36 +++++++-
 drivers/scsi/pcmcia/aha152x_stub.c               |   1 -
 drivers/scsi/pcmcia/fdomain_stub.c               |   1 -
 drivers/scsi/pcmcia/nsp_cs.c                     | 110 +++++++++--------------
 drivers/scsi/pcmcia/qlogic_stub.c                |   1 -
 drivers/scsi/pcmcia/sym53c500_cs.c               |   1 -
 drivers/serial/serial_cs.c                       |  10 +--
 drivers/staging/comedi/drivers/cb_das16_cs.c     |   1 -
 drivers/staging/comedi/drivers/das08_cs.c        |   1 -
 drivers/staging/comedi/drivers/ni_daq_700.c      |  10 +--
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  10 +--
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  11 +--
 drivers/staging/comedi/drivers/ni_mio_cs.c       |   1 -
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |   1 -
 drivers/telephony/ixj_pcmcia.c                   |   1 -
 drivers/usb/host/sl811_cs.c                      |  24 +----
 include/pcmcia/ds.h                              |   7 +-
 43 files changed, 120 insertions(+), 329 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 76da55d27e08..954f43c512f1 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -169,34 +169,16 @@ static struct ata_port_operations pcmcia_8bit_port_ops = {
 
 struct pcmcia_config_check {
 	unsigned long ctl_base;
-	int skip_vcc;
 	int is_kme;
 };
 
 static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 				   cistpl_cftable_entry_t *cfg,
 				   cistpl_cftable_entry_t *dflt,
-				   unsigned int vcc,
 				   void *priv_data)
 {
 	struct pcmcia_config_check *stk = priv_data;
 
-	/* Check for matching Vcc, unless we're desperate */
-	if (!stk->skip_vcc) {
-		if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-			if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000)
-				return -ENODEV;
-		} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-			if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] / 10000)
-				return -ENODEV;
-		}
-	}
-
-	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
 		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
@@ -249,6 +231,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	pdev->config_flags |= CONF_ENABLE_IRQ;
+	pdev->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
 
 	/* See if we have a manufacturer identifier. Use it to set is_kme for
 	   vendor quirks */
@@ -262,10 +245,10 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	if (!stk)
 		goto out1;
 	stk->is_kme = is_kme;
-	stk->skip_vcc = io_base = ctl_base = 0;
+	io_base = ctl_base = 0;
 
 	if (pcmcia_loop_config(pdev, pcmcia_check_one_config, stk)) {
-		stk->skip_vcc = 1;
+		pdev->config_flags &= ~CONF_AUTO_CHECK_VCC;
 		if (pcmcia_loop_config(pdev, pcmcia_check_one_config, stk))
 			goto failed; /* No suitable config found */
 	}
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 3db95887cfd7..97338a3aae1a 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -659,7 +659,7 @@ static int bt3c_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 8;
 
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP;
 
 	return bt3c_config(link);
 }
@@ -676,15 +676,11 @@ static void bt3c_detach(struct pcmcia_device *link)
 static int bt3c_check_config(struct pcmcia_device *p_dev,
 			     cistpl_cftable_entry_t *cf,
 			     cistpl_cftable_entry_t *dflt,
-			     unsigned int vcc,
 			     void *priv_data)
 {
 	unsigned long try = (unsigned long) priv_data;
-
 	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
-	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
 		p_dev->resource[0]->start = cf->io.win[0].base;
@@ -697,7 +693,6 @@ static int bt3c_check_config(struct pcmcia_device *p_dev,
 static int bt3c_check_config_notpicky(struct pcmcia_device *p_dev,
 				      cistpl_cftable_entry_t *cf,
 				      cistpl_cftable_entry_t *dflt,
-				      unsigned int vcc,
 				      void *priv_data)
 {
 	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c5c43594ae0e..8a6864fc8c38 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -588,7 +588,7 @@ static int btuart_probe(struct pcmcia_device *link)
 	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	link->resource[0]->end = 8;
 
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP;
 
 	return btuart_config(link);
 }
@@ -605,15 +605,11 @@ static void btuart_detach(struct pcmcia_device *link)
 static int btuart_check_config(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cf,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	int *try = priv_data;
-
 	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
-	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp = cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
 	    (cf->io.win[0].base != 0)) {
 		p_dev->resource[0]->start = cf->io.win[0].base;
@@ -626,7 +622,6 @@ static int btuart_check_config(struct pcmcia_device *p_dev,
 static int btuart_check_config_notpicky(struct pcmcia_device *p_dev,
 					cistpl_cftable_entry_t *cf,
 					cistpl_cftable_entry_t *dflt,
-					unsigned int vcc,
 					void *priv_data)
 {
 	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 38206df7206b..4620cc398676 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -592,7 +592,6 @@ static void dtl1_detach(struct pcmcia_device *link)
 static int dtl1_confcheck(struct pcmcia_device *p_dev,
 			  cistpl_cftable_entry_t *cf,
 			  cistpl_cftable_entry_t *dflt,
-			  unsigned int vcc,
 			  void *priv_data)
 {
 	if ((cf->io.nwin != 1) || (cf->io.win[0].len <= 8))
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 75caa8c1b484..0b2f3b9d261a 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1744,7 +1744,6 @@ static void cmm_cm4000_release(struct pcmcia_device * link)
 static int cm4000_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	if (!cfg->io.nwin)
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 0c87b80bf641..acf88d5c72b2 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -518,7 +518,6 @@ static void cm4040_reader_release(struct pcmcia_device *link)
 static int cm4040_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	int rc;
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index cd21b2bcdcd7..1b7f0920737b 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -78,7 +78,6 @@ static void signalled_reboot_callback(void *callback_data)
 static int ipwireless_probe(struct pcmcia_device *p_dev,
 			    cistpl_cftable_entry_t *cfg,
 			    cistpl_cftable_entry_t *dflt,
-			    unsigned int vcc,
 			    void *priv_data)
 {
 	struct ipw_dev *ipw = priv_data;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 99feaedc53a1..c701434f76b7 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -564,7 +564,6 @@ static int mgslpc_probe(struct pcmcia_device *link)
 static int mgslpc_ioprobe(struct pcmcia_device *p_dev,
 			  cistpl_cftable_entry_t *cfg,
 			  cistpl_cftable_entry_t *dflt,
-			  unsigned int vcc,
 			  void *priv_data)
 {
 	if (!cfg->io.nwin)
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 87ad04925a9f..25b8a105a98d 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -99,6 +99,7 @@ static int ide_probe(struct pcmcia_device *link)
     link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->config_flags |= CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
 
     return ide_config(link);
 } /* ide_attach */
@@ -195,34 +196,16 @@ out_release:
 
 struct pcmcia_config_check {
 	unsigned long ctl_base;
-	int skip_vcc;
 	int is_kme;
 };
 
 static int pcmcia_check_one_config(struct pcmcia_device *pdev,
 				   cistpl_cftable_entry_t *cfg,
 				   cistpl_cftable_entry_t *dflt,
-				   unsigned int vcc,
 				   void *priv_data)
 {
 	struct pcmcia_config_check *stk = priv_data;
 
-	/* Check for matching Vcc, unless we're desperate */
-	if (!stk->skip_vcc) {
-		if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-			if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000)
-				return -ENODEV;
-		} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-			if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] / 10000)
-				return -ENODEV;
-		}
-	}
-
-	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		pdev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
 		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
 		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
@@ -271,10 +254,10 @@ static int ide_config(struct pcmcia_device *link)
     if (!stk)
 	    goto err_mem;
     stk->is_kme = is_kme;
-    stk->skip_vcc = io_base = ctl_base = 0;
+    io_base = ctl_base = 0;
 
     if (pcmcia_loop_config(link, pcmcia_check_one_config, stk)) {
-	    stk->skip_vcc = 1;
+	    link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	    if (pcmcia_loop_config(link, pcmcia_check_one_config, stk))
 		    goto failed; /* No suitable config found */
     }
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 6ea5cd28d349..9dbab9c99bea 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -110,7 +110,6 @@ static void avmcs_detach(struct pcmcia_device *link)
 static int avmcs_configcheck(struct pcmcia_device *p_dev,
 			     cistpl_cftable_entry_t *cf,
 			     cistpl_cftable_entry_t *dflt,
-			     unsigned int vcc,
 			     void *priv_data)
 {
 	if (cf->io.nwin <= 0)
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 5dd47ad6ecc9..2f2b0005f07b 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -117,7 +117,6 @@ static void __devexit avma1cs_detach(struct pcmcia_device *link)
 static int avma1cs_configcheck(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cf,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	if (cf->io.nwin <= 0)
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index 368c8a213f0b..0a65280be8d5 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -163,7 +163,6 @@ static void __devexit elsa_cs_detach(struct pcmcia_device *link)
 static int elsa_cs_configcheck(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cf,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	int j;
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index 791e23a75f78..b69eccfdbb05 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -164,33 +164,11 @@ static void __devexit sedlbauer_detach(struct pcmcia_device *link)
 static int sedlbauer_config_check(struct pcmcia_device *p_dev,
 				  cistpl_cftable_entry_t *cfg,
 				  cistpl_cftable_entry_t *dflt,
-				  unsigned int vcc,
 				  void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-		if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM]/10000)
-			return -ENODEV;
-	} else if (dflt->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-		if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM]/10000)
-			return -ENODEV;
-	}
-
-	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
-	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
-
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -223,6 +201,9 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
 
     dev_dbg(&link->dev, "sedlbauer_config(0x%p)\n", link);
 
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_CHECK_VCC |
+	    CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO;
+
     /*
       In this loop, we scan the CIS for configuration table entries,
       each of which describes a valid card configuration, including
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 2ae71e3297ba..6605480aa37f 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -145,7 +145,6 @@ static void __devexit teles_detach(struct pcmcia_device *link)
 static int teles_cs_configcheck(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cf,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	int j;
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index f361d2865e34..17f1040e255e 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -287,7 +287,6 @@ static int try_io_port(struct pcmcia_device *link)
 static int axnet_configcheck(struct pcmcia_device *p_dev,
 			     cistpl_cftable_entry_t *cfg,
 			     cistpl_cftable_entry_t *dflt,
-			     unsigned int vcc,
 			     void *priv_data)
 {
 	int i;
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index f6865adb126d..c1479e3bfab4 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -322,7 +322,6 @@ static int ungermann_try_io_port(struct pcmcia_device *link)
 static int fmvj18x_ioprobe(struct pcmcia_device *p_dev,
 			   cistpl_cftable_entry_t *cfg,
 			   cistpl_cftable_entry_t *dflt,
-			   unsigned int vcc,
 			   void *priv_data)
 {
 	return 0; /* strange, but that's what the code did already before... */
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 4a3b6a43550f..12b028c6abc9 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -503,7 +503,6 @@ static int try_io_port(struct pcmcia_device *link)
 static int pcnet_confcheck(struct pcmcia_device *p_dev,
 			   cistpl_cftable_entry_t *cfg,
 			   cistpl_cftable_entry_t *dflt,
-			   unsigned int vcc,
 			   void *priv_data)
 {
 	int *priv = priv_data;
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 8c16ba672012..e127d2b546dd 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -420,7 +420,6 @@ static int mhz_3288_power(struct pcmcia_device *link)
 static int mhz_mfc_config_check(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cf,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	int k;
@@ -590,7 +589,6 @@ static int mot_setup(struct pcmcia_device *link)
 static int smc_configcheck(struct pcmcia_device *p_dev,
 			   cistpl_cftable_entry_t *cf,
 			   cistpl_cftable_entry_t *dflt,
-			   unsigned int vcc,
 			   void *priv_data)
 {
 	p_dev->resource[0]->start = cf->io.win[0].base;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 7a4a99b73d17..2bc2eb89c4cd 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -668,7 +668,6 @@ static int
 xirc2ps_config_modem(struct pcmcia_device *p_dev,
 		     cistpl_cftable_entry_t *cf,
 		     cistpl_cftable_entry_t *dflt,
-		     unsigned int vcc,
 		     void *priv_data)
 {
 	unsigned int ioaddr;
@@ -688,7 +687,6 @@ static int
 xirc2ps_config_check(struct pcmcia_device *p_dev,
 		     cistpl_cftable_entry_t *cf,
 		     cistpl_cftable_entry_t *dflt,
-		     unsigned int vcc,
 		     void *priv_data)
 {
 	int *pass = priv_data;
@@ -826,7 +824,8 @@ xirc2ps_config(struct pcmcia_device * link)
 	     * the Mako if (on the first pass) the COR bit 5 is set.
 	     */
 	    for (pass=0; pass < 2; pass++)
-		    if (!pcmcia_loop_config(link, xirc2ps_config_check, &pass))
+		    if (!pcmcia_loop_config(link, xirc2ps_config_check,
+						    &pass))
 			    goto port_found;
 	    /* if special option:
 	     * try to configure as Ethernet only.
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 5939d0c7a5c8..63bf662e9c7f 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -140,25 +140,11 @@ static void airo_detach(struct pcmcia_device *link)
 static int airo_cs_config_check(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
-	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
-
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -193,6 +179,9 @@ static int airo_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "airo_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
+		CONF_AUTO_AUDIO;
+
 	/*
 	 * In this loop, we scan the CIS for configuration table
 	 * entries, each of which describes a valid card
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 080266eba985..812decd3fbe9 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -157,25 +157,11 @@ static int card_present(void *arg)
 static int atmel_config_check(struct pcmcia_device *p_dev,
 			      cistpl_cftable_entry_t *cfg,
 			      cistpl_cftable_entry_t *dflt,
-			      unsigned int vcc,
 			      void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
-	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
-
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -207,6 +193,9 @@ static int atmel_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "atmel_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
+		CONF_AUTO_AUDIO;
+
 	/*
 	  In this loop, we scan the CIS for configuration table entries,
 	  each of which describes a valid card configuration, including
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index 5b0b5828b3cf..d4f19af1757f 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -472,7 +472,6 @@ static void prism2_detach(struct pcmcia_device *link)
 static int prism2_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	if (cfg->index == 0)
@@ -481,28 +480,6 @@ static int prism2_config_check(struct pcmcia_device *p_dev,
 	PDEBUG(DEBUG_EXTRA, "Checking CFTABLE_ENTRY 0x%02X "
 	       "(default 0x%02X)\n", cfg->index, dflt->index);
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] /
-		    10000 && !ignore_cis_vcc) {
-			PDEBUG(DEBUG_EXTRA, "  Vcc mismatch - skipping"
-			       " this entry\n");
-			return -ENODEV;
-		}
-	} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] /
-		    10000 && !ignore_cis_vcc) {
-			PDEBUG(DEBUG_EXTRA, "  Vcc (default) mismatch "
-			       "- skipping this entry\n");
-			return -ENODEV;
-		}
-	}
-
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
 		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
 	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
@@ -553,6 +530,10 @@ static int prism2_config(struct pcmcia_device *link)
 	}
 
 	/* Look for an appropriate configuration table entry in the CIS */
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO |
+		CONF_AUTO_CHECK_VCC;
+	if (ignore_cis_vcc)
+		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, prism2_config_check, NULL);
 	if (ret) {
 		if (!ignore_cis_vcc)
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 6020c19b1bdb..031f3e6da3c9 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -797,7 +797,6 @@ static void if_cs_release(struct pcmcia_device *p_dev)
 static int if_cs_ioprobe(struct pcmcia_device *p_dev,
 			 cistpl_cftable_entry_t *cfg,
 			 cistpl_cftable_entry_t *dflt,
-			 unsigned int vcc,
 			 void *priv_data)
 {
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 00316a1a1092..b92173827b55 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -145,39 +145,11 @@ static void orinoco_cs_detach(struct pcmcia_device *link)
 static int orinoco_cs_config_check(struct pcmcia_device *p_dev,
 				   cistpl_cftable_entry_t *cfg,
 				   cistpl_cftable_entry_t *dflt,
-				   unsigned int vcc,
 				   void *priv_data)
 {
 	if (cfg->index == 0)
 		goto next_entry;
 
-	/* Use power settings for Vcc and Vpp if present */
-	/* Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000) {
-			DEBUG(2, "%s: Vcc mismatch (vcc = %d, CIS = %d)\n",
-			      __func__, vcc,
-			      cfg->vcc.param[CISTPL_POWER_VNOM] / 10000);
-			if (!ignore_cis_vcc)
-				goto next_entry;
-		}
-	} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] / 10000) {
-			DEBUG(2, "%s: Vcc mismatch (vcc = %d, CIS = %d)\n",
-			      __func__, vcc,
-			      dflt->vcc.param[CISTPL_POWER_VNOM] / 10000);
-			if (!ignore_cis_vcc)
-				goto next_entry;
-		}
-	}
-
-	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
 	/* Do we need to allocate an interrupt? */
 	p_dev->config_flags |= CONF_ENABLE_IRQ;
 
@@ -230,6 +202,9 @@ orinoco_cs_config(struct pcmcia_device *link)
 	 * and most client drivers will only use the CIS to fill in
 	 * implementation-defined details.
 	 */
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	if (ignore_cis_vcc)
+		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL);
 	if (ret) {
 		if (!ignore_cis_vcc)
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index ca2c6c0c5576..f462c78856e9 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -207,32 +207,11 @@ static void spectrum_cs_detach(struct pcmcia_device *link)
 static int spectrum_cs_config_check(struct pcmcia_device *p_dev,
 				    cistpl_cftable_entry_t *cfg,
 				    cistpl_cftable_entry_t *dflt,
-				    unsigned int vcc,
 				    void *priv_data)
 {
 	if (cfg->index == 0)
 		goto next_entry;
 
-	/* Use power settings for Vcc and Vpp if present */
-	/* Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000) {
-			DEBUG(2, "%s: Vcc mismatch (vcc = %d, CIS = %d)\n",
-			      __func__, vcc,
-			      cfg->vcc.param[CISTPL_POWER_VNOM] / 10000);
-			if (!ignore_cis_vcc)
-				goto next_entry;
-		}
-	} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
-		if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] / 10000) {
-			DEBUG(2, "%s: Vcc mismatch (vcc = %d, CIS = %d)\n",
-			      __func__, vcc,
-			      dflt->vcc.param[CISTPL_POWER_VNOM] / 10000);
-			if (!ignore_cis_vcc)
-				goto next_entry;
-		}
-	}
-
 	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
 		p_dev->vpp =
 			cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
@@ -292,6 +271,9 @@ spectrum_cs_config(struct pcmcia_device *link)
 	 * and most client drivers will only use the CIS to fill in
 	 * implementation-defined details.
 	 */
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	if (ignore_cis_vcc)
+		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL);
 	if (ret) {
 		if (!ignore_cis_vcc)
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index 63b3d3c18c34..8c2a4733bc19 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -136,7 +136,6 @@ static void parport_detach(struct pcmcia_device *link)
 static int parport_config_check(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c
index ce8b94a3b675..160da0697335 100644
--- a/drivers/pcmcia/pcmcia_cis.c
+++ b/drivers/pcmcia/pcmcia_cis.c
@@ -131,7 +131,6 @@ struct pcmcia_cfg_mem {
 	int (*conf_check) (struct pcmcia_device *p_dev,
 			   cistpl_cftable_entry_t *cfg,
 			   cistpl_cftable_entry_t *dflt,
-			   unsigned int vcc,
 			   void *priv_data);
 	cisparse_t parse;
 	cistpl_cftable_entry_t dflt;
@@ -146,16 +145,46 @@ struct pcmcia_cfg_mem {
  */
 static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv)
 {
-	cistpl_cftable_entry_t *cfg = &parse->cftable_entry;
 	struct pcmcia_cfg_mem *cfg_mem = priv;
+	struct pcmcia_device *p_dev = cfg_mem->p_dev;
+	cistpl_cftable_entry_t *cfg = &parse->cftable_entry;
+	cistpl_cftable_entry_t *dflt = &cfg_mem->dflt;
+	unsigned int flags = p_dev->config_flags;
+	unsigned int vcc = p_dev->socket->socket.Vcc;
+
+	dev_dbg(&p_dev->dev, "testing configuration %x, autoconf %x\n",
+		cfg->index, flags);
 
 	/* default values */
 	cfg_mem->p_dev->config_index = cfg->index;
 	if (cfg->flags & CISTPL_CFTABLE_DEFAULT)
 		cfg_mem->dflt = *cfg;
 
+	/* check for matching Vcc? */
+	if (flags & CONF_AUTO_CHECK_VCC) {
+		if (cfg->vcc.present & (1 << CISTPL_POWER_VNOM)) {
+			if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM] / 10000)
+				return -ENODEV;
+		} else if (dflt->vcc.present & (1 << CISTPL_POWER_VNOM)) {
+			if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM] / 10000)
+				return -ENODEV;
+		}
+	}
+
+	/* set Vpp? */
+	if (flags & CONF_AUTO_SET_VPP) {
+		if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
+			p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+		else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
+			p_dev->vpp =
+				dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
+	}
+
+	/* enable audio? */
+	if ((flags & CONF_AUTO_AUDIO) && (cfg->flags & CISTPL_CFTABLE_AUDIO))
+		p_dev->config_flags |= CONF_ENABLE_SPKR;
+
 	return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt,
-				   cfg_mem->p_dev->socket->socket.Vcc,
 				   cfg_mem->priv_data);
 }
 
@@ -176,7 +205,6 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       int	(*conf_check)	(struct pcmcia_device *p_dev,
 						 cistpl_cftable_entry_t *cfg,
 						 cistpl_cftable_entry_t *dflt,
-						 unsigned int vcc,
 						 void *priv_data),
 		       void *priv_data)
 {
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index e1f748517135..0b5fc2fa0589 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -124,7 +124,6 @@ static void aha152x_detach(struct pcmcia_device *link)
 static int aha152x_config_check(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	p_dev->io_lines = 10;
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index ae263b17bfa5..3b9f311cb035 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -106,7 +106,6 @@ static void fdomain_detach(struct pcmcia_device *link)
 static int fdomain_config_check(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	p_dev->io_lines = 10;
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index d0546c03f57c..344d49900b4c 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1597,7 +1597,6 @@ static void nsp_cs_detach(struct pcmcia_device *link)
 static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	nsp_hw_data		*data = priv_data;
@@ -1605,77 +1604,49 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev,
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-		if (vcc != cfg->vcc.param[CISTPL_POWER_VNOM]/10000)
-			return -ENODEV;
-		else if (dflt->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-			if (vcc != dflt->vcc.param[CISTPL_POWER_VNOM]/10000)
-				return -ENODEV;
-		}
-
-		if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM)) {
-			p_dev->vpp =
-				cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-		} else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM)) {
-			p_dev->vpp =
-				dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-		}
-
-		/* Do we need to allocate an interrupt? */
-		p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-		/* IO window settings */
-		p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-		if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-			cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-			p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-			p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-			p_dev->resource[0]->flags |=
-				pcmcia_io_cfg_data_width(io->flags);
-			p_dev->resource[0]->start = io->win[0].base;
-			p_dev->resource[0]->end = io->win[0].len;
-			if (io->nwin > 1) {
-				p_dev->resource[1]->flags =
-					p_dev->resource[0]->flags;
-				p_dev->resource[1]->start = io->win[1].base;
-				p_dev->resource[1]->end = io->win[1].len;
-			}
-			/* This reserves IO space but doesn't actually enable it */
-			if (pcmcia_request_io(p_dev) != 0)
-				goto next_entry;
+	/* IO window settings */
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
+	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
+		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
+		p_dev->resource[0]->start = io->win[0].base;
+		p_dev->resource[0]->end = io->win[0].len;
+		if (io->nwin > 1) {
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1].base;
+			p_dev->resource[1]->end = io->win[1].len;
 		}
+		/* This reserves IO space but doesn't actually enable it */
+		if (pcmcia_request_io(p_dev) != 0)
+			goto next_entry;
+	}
 
-		if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
-			cistpl_mem_t	*mem =
-				(cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
-			p_dev->resource[2]->flags |= (WIN_DATA_WIDTH_16 |
-						WIN_MEMORY_TYPE_CM |
-						WIN_ENABLE);
-			p_dev->resource[2]->start = mem->win[0].host_addr;
-			p_dev->resource[2]->end = mem->win[0].len;
-			if (p_dev->resource[2]->end < 0x1000)
-				p_dev->resource[2]->end = 0x1000;
-			if (pcmcia_request_window(p_dev, p_dev->resource[2],
-							0) != 0)
-				goto next_entry;
-			if (pcmcia_map_mem_page(p_dev, p_dev->resource[2],
-					mem->win[0].card_addr) != 0)
-				goto next_entry;
-
-			data->MmioAddress = (unsigned long)
-				ioremap_nocache(p_dev->resource[2]->start,
+	if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
+		cistpl_mem_t	*mem =
+			(cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
+		p_dev->resource[2]->flags |= (WIN_DATA_WIDTH_16 |
+					WIN_MEMORY_TYPE_CM |
+					WIN_ENABLE);
+		p_dev->resource[2]->start = mem->win[0].host_addr;
+		p_dev->resource[2]->end = mem->win[0].len;
+		if (p_dev->resource[2]->end < 0x1000)
+			p_dev->resource[2]->end = 0x1000;
+		if (pcmcia_request_window(p_dev, p_dev->resource[2], 0) != 0)
+			goto next_entry;
+		if (pcmcia_map_mem_page(p_dev, p_dev->resource[2],
+						mem->win[0].card_addr) != 0)
+			goto next_entry;
+
+		data->MmioAddress = (unsigned long)
+			ioremap_nocache(p_dev->resource[2]->start,
 					resource_size(p_dev->resource[2]));
-			data->MmioLength  = resource_size(p_dev->resource[2]);
-		}
-		/* If we got this far, we're cool! */
-		return 0;
+		data->MmioLength  = resource_size(p_dev->resource[2]);
 	}
+	/* If we got this far, we're cool! */
+	return 0;
 
 next_entry:
 	nsp_dbg(NSP_DEBUG_INIT, "next");
@@ -1692,6 +1663,9 @@ static int nsp_cs_config(struct pcmcia_device *link)
 
 	nsp_dbg(NSP_DEBUG_INIT, "in");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_CHECK_VCC |
+		CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO;
+
 	ret = pcmcia_loop_config(link, nsp_cs_config_check, data);
 	if (ret)
 		goto cs_failed;
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 7d3f49c431fd..468fd12fe98d 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -179,7 +179,6 @@ static void qlogic_detach(struct pcmcia_device *link)
 static int qlogic_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cfg,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	p_dev->io_lines = 10;
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 600630eb7034..7a0bb9aea042 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -686,7 +686,6 @@ static struct scsi_host_template sym53c500_driver_template = {
 static int SYM53C500_config_check(struct pcmcia_device *p_dev,
 				  cistpl_cftable_entry_t *cfg,
 				  cistpl_cftable_entry_t *dflt,
-				  unsigned int vcc,
 				  void *priv_data)
 {
 	p_dev->io_lines = 10;
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 47b1869026e7..a796a93fe39c 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -427,16 +427,11 @@ static int pfc_config(struct pcmcia_device *p_dev)
 static int simple_config_check(struct pcmcia_device *p_dev,
 			       cistpl_cftable_entry_t *cf,
 			       cistpl_cftable_entry_t *dflt,
-			       unsigned int vcc,
 			       void *priv_data)
 {
 	static const int size_table[2] = { 8, 16 };
 	int *try = priv_data;
 
-	if (cf->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			cf->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
 	p_dev->io_lines = ((*try & 0x1) == 0) ?
 			16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
@@ -452,7 +447,6 @@ static int simple_config_check(struct pcmcia_device *p_dev,
 static int simple_config_check_notpicky(struct pcmcia_device *p_dev,
 					cistpl_cftable_entry_t *cf,
 					cistpl_cftable_entry_t *dflt,
-					unsigned int vcc,
 					void *priv_data)
 {
 	static const unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
@@ -479,6 +473,7 @@ static int simple_config(struct pcmcia_device *link)
 
 	/* First pass: look for a config entry that looks normal.
 	 * Two tries: without IO aliases, then with aliases */
+	link->config_flags |= CONF_AUTO_SET_VPP;
 	for (try = 0; try < 4; try++)
 		if (!pcmcia_loop_config(link, simple_config_check, &try))
 			goto found_port;
@@ -511,7 +506,6 @@ found_port:
 static int multi_config_check(struct pcmcia_device *p_dev,
 			      cistpl_cftable_entry_t *cf,
 			      cistpl_cftable_entry_t *dflt,
-			      unsigned int vcc,
 			      void *priv_data)
 {
 	int *base2 = priv_data;
@@ -532,7 +526,6 @@ static int multi_config_check(struct pcmcia_device *p_dev,
 static int multi_config_check_notpicky(struct pcmcia_device *p_dev,
 				       cistpl_cftable_entry_t *cf,
 				       cistpl_cftable_entry_t *dflt,
-				       unsigned int vcc,
 				       void *priv_data)
 {
 	int *base2 = priv_data;
@@ -621,7 +614,6 @@ static int multi_config(struct pcmcia_device *link)
 static int serial_check_for_multi(struct pcmcia_device *p_dev,
 				  cistpl_cftable_entry_t *cf,
 				  cistpl_cftable_entry_t *dflt,
-				  unsigned int vcc,
 				  void *priv_data)
 {
 	struct serial_info *info = p_dev->priv;
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index ee91c89511ed..678fbf67d7a8 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -712,7 +712,6 @@ static void das16cs_pcmcia_detach(struct pcmcia_device *link)
 static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index f8f3de57b413..12a96b7a43ad 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -194,7 +194,6 @@ static void das08_pcmcia_detach(struct pcmcia_device *link)
 static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 803683b83543..f22dc0f2a8d6 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -532,19 +532,11 @@ static void dio700_cs_detach(struct pcmcia_device *link)
 static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -578,6 +570,8 @@ static void dio700_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "dio700_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO;
+
 	ret = pcmcia_loop_config(link, dio700_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 6512f7a283ce..6dc2b06064cd 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -284,19 +284,11 @@ static void dio24_cs_detach(struct pcmcia_device *link)
 static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -329,6 +321,8 @@ static void dio24_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "dio24_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO;
+
 	ret = pcmcia_loop_config(link, dio24_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 255cf40c5fd1..6eacbd70e2e9 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -263,19 +263,11 @@ static void labpc_cs_detach(struct pcmcia_device *link)
 static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Does this card need audio output? */
-	if (cfg->flags & CISTPL_CFTABLE_AUDIO)
-		p_dev->config_flags |= CONF_ENABLE_SPKR;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -307,6 +299,9 @@ static void labpc_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "labpc_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ |
+		CONF_AUTO_AUDIO;
+
 	ret = pcmcia_loop_config(link, labpc_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index b88f52dd08d0..da4e2a21b19a 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -302,7 +302,6 @@ static int mio_cs_resume(struct pcmcia_device *link)
 static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	int base, ret;
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index b8940d7f4155..03a72d7ac676 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1071,7 +1071,6 @@ static void daqp_cs_detach(struct pcmcia_device *link)
 static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev,
 				cistpl_cftable_entry_t *cfg,
 				cistpl_cftable_entry_t *dflt,
-				unsigned int vcc,
 				void *priv_data)
 {
 	if (cfg->index == 0)
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index e23270d1b4d1..670a76bf5164 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -112,7 +112,6 @@ failed:
 static int ixj_config_check(struct pcmcia_device *p_dev,
 			    cistpl_cftable_entry_t *cfg,
 			    cistpl_cftable_entry_t *dflt,
-			    unsigned int vcc,
 			    void *priv_data)
 {
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 744c2cd809f1..d9606293c1af 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -134,32 +134,11 @@ static void sl811_cs_release(struct pcmcia_device * link)
 static int sl811_cs_config_check(struct pcmcia_device *p_dev,
 				 cistpl_cftable_entry_t *cfg,
 				 cistpl_cftable_entry_t *dflt,
-				 unsigned int vcc,
 				 void *priv_data)
 {
 	if (cfg->index == 0)
 		return -ENODEV;
 
-	/* Use power settings for Vcc and Vpp if present */
-	/*  Note that the CIS values need to be rescaled */
-	if (cfg->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-		if (cfg->vcc.param[CISTPL_POWER_VNOM]/10000 != vcc)
-			return -ENODEV;
-	} else if (dflt->vcc.present & (1<<CISTPL_POWER_VNOM)) {
-		if (dflt->vcc.param[CISTPL_POWER_VNOM]/10000 != vcc)
-			return -ENODEV;
-		}
-
-	if (cfg->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			cfg->vpp1.param[CISTPL_POWER_VNOM]/10000;
-	else if (dflt->vpp1.present & (1<<CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			dflt->vpp1.param[CISTPL_POWER_VNOM]/10000;
-
-	/* we need an interrupt */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
 	/* IO window settings */
 	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
 	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
@@ -184,6 +163,9 @@ static int sl811_cs_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "sl811_cs_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ |	CONF_AUTO_SET_VPP |
+		CONF_AUTO_CHECK_VCC;
+
 	if (pcmcia_loop_config(link, sl811_cs_config_check, NULL))
 		goto failed;
 
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 50b03fd67fd6..0577e5f10304 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -177,7 +177,6 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       int	(*conf_check)	(struct pcmcia_device *p_dev,
 						 cistpl_cftable_entry_t *cf,
 						 cistpl_cftable_entry_t *dflt,
-						 unsigned int vcc,
 						 void *priv_data),
 		       void *priv_data);
 
@@ -270,6 +269,12 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags)
 #define CONF_ENABLE_PULSE_IRQ   0x04
 #define CONF_ENABLE_ESR         0x08
 
+/* flags used by pcmcia_loop_config() autoconfiguration */
+#define CONF_AUTO_CHECK_VCC	0x10 /* check for matching Vcc? */
+#define CONF_AUTO_SET_VPP	0x20 /* set Vpp? */
+#define CONF_AUTO_AUDIO		0x40 /* enable audio line? */
+
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_DS_H */
-- 
cgit v1.2.3


From 00990e7ce0b0e596fe41d9c64d6933ea70084003 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 30 Jul 2010 13:13:46 +0200
Subject: pcmcia: use autoconfiguration feature for ioports and iomem

When CONF_AUTO_SET_IO or CONF_AUTO_SET_IOMEM are set, the corresponding
fields in struct pcmcia_device *p_dev->resource[0,1,2] are set
accordinly. Drivers wishing to override certain settings may do so in
the callback function, but they no longer need to parse the CIS entries
stored in cistpl_cftable_entry_t themselves.

CC: netdev@vger.kernel.org
CC: linux-wireless@vger.kernel.org
CC: linux-ide@vger.kernel.org
CC: linux-usb@vger.kernel.org
CC: laforge@gnumonks.org
CC: linux-mtd@lists.infradead.org
CC: linux-bluetooth@vger.kernel.org
CC: alsa-devel@alsa-project.org
CC: linux-serial@vger.kernel.org
CC: Jiri Kosina <jkosina@suse.cz>
CC: linux-scsi@vger.kernel.org
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 Documentation/pcmcia/driver-changes.txt          |  12 +++
 drivers/ata/pata_pcmcia.c                        |  81 +++++---------
 drivers/bluetooth/bt3c_cs.c                      |  53 ++++-----
 drivers/bluetooth/btuart_cs.c                    |  51 ++++-----
 drivers/bluetooth/dtl1_cs.c                      |  18 ++--
 drivers/char/pcmcia/cm4000_cs.c                  |  15 +--
 drivers/char/pcmcia/cm4040_cs.c                  |  22 +---
 drivers/char/pcmcia/ipwireless/main.c            |  26 ++---
 drivers/char/pcmcia/synclink_cs.c                |  16 +--
 drivers/ide/ide-cs.c                             |  82 +++++---------
 drivers/isdn/hardware/avm/avm_cs.c               |  19 +---
 drivers/isdn/hisax/avma1_cs.c                    |  22 ++--
 drivers/isdn/hisax/elsa_cs.c                     |  23 ++--
 drivers/isdn/hisax/sedlbauer_cs.c                |  36 ++-----
 drivers/isdn/hisax/teles_cs.c                    |  16 ++-
 drivers/net/pcmcia/axnet_cs.c                    |  32 ++----
 drivers/net/pcmcia/fmvj18x_cs.c                  |   5 +-
 drivers/net/pcmcia/pcnet_cs.c                    |  38 ++-----
 drivers/net/pcmcia/smc91c92_cs.c                 |  39 ++++---
 drivers/net/pcmcia/xirc2ps_cs.c                  |  69 ++++++------
 drivers/net/wireless/airo_cs.c                   |  34 +-----
 drivers/net/wireless/atmel_cs.c                  |  28 +----
 drivers/net/wireless/hostap/hostap_cs.c          |  43 +-------
 drivers/net/wireless/libertas/if_cs.c            |  16 +--
 drivers/net/wireless/orinoco/orinoco_cs.c        |  41 ++-----
 drivers/net/wireless/orinoco/spectrum_cs.c       |  45 +-------
 drivers/parport/parport_cs.c                     |  34 ++----
 drivers/pcmcia/pcmcia_cis.c                      |  78 ++++++++++++--
 drivers/scsi/pcmcia/aha152x_stub.c               |  34 +++---
 drivers/scsi/pcmcia/fdomain_stub.c               |  13 +--
 drivers/scsi/pcmcia/nsp_cs.c                     |  47 ++------
 drivers/scsi/pcmcia/qlogic_stub.c                |  13 +--
 drivers/scsi/pcmcia/sym53c500_cs.c               |  13 +--
 drivers/serial/serial_cs.c                       | 132 ++++++++++++-----------
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  31 +-----
 drivers/staging/comedi/drivers/das08_cs.c        |  33 ++----
 drivers/staging/comedi/drivers/ni_daq_700.c      |  32 +-----
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  32 +-----
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  32 +-----
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  14 +--
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  32 +-----
 drivers/telephony/ixj_pcmcia.c                   |  32 ++----
 drivers/usb/host/sl811_cs.c                      |  26 +----
 include/pcmcia/ds.h                              |  30 ++----
 44 files changed, 515 insertions(+), 1025 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index 62a029f24f46..dd04361dd361 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -1,4 +1,16 @@
 This file details changes in 2.6 which affect PCMCIA card driver authors:
+* pcmcia_loop_config() and autoconfiguration (as of 2.6.36)
+   If struct pcmcia_device *p_dev->config_flags is set accordingly,
+   pcmcia_loop_config() now sets up certain configuration values
+   automatically, though the driver may still override the settings
+   in the callback function. The following autoconfiguration options
+   are provided at the moment:
+	CONF_AUTO_CHECK_VCC : check for matching Vcc
+	CONF_AUTO_SET_VPP   : set Vpp
+	CONF_AUTO_AUDIO     : auto-enable audio line, if required
+	CONF_AUTO_SET_IO    : set ioport resources (->resource[0,1])
+	CONF_AUTO_SET_IOMEM : set first iomem resource (->resource[2])
+
 * pcmcia_request_configuration -> pcmcia_enable_device (as of 2.6.36)
    pcmcia_request_configuration() got renamed to pcmcia_enable_device(),
    as it mirrors pcmcia_disable_device(). Configuration settings are now
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 954f43c512f1..88cb03c36963 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -167,45 +167,26 @@ static struct ata_port_operations pcmcia_8bit_port_ops = {
 };
 
 
-struct pcmcia_config_check {
-	unsigned long ctl_base;
-	int is_kme;
-};
-
-static int pcmcia_check_one_config(struct pcmcia_device *pdev,
-				   cistpl_cftable_entry_t *cfg,
-				   cistpl_cftable_entry_t *dflt,
-				   void *priv_data)
+static int pcmcia_check_one_config(struct pcmcia_device *pdev, void *priv_data)
 {
-	struct pcmcia_config_check *stk = priv_data;
-
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		pdev->resource[0]->start = io->win[0].base;
-		if (!(io->flags & CISTPL_IO_16BIT)) {
-			pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-			pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-		}
-		if (io->nwin == 2) {
-			pdev->resource[0]->end = 8;
-			pdev->resource[1]->start = io->win[1].base;
-			pdev->resource[1]->end = (stk->is_kme) ? 2 : 1;
-			if (pcmcia_request_io(pdev) != 0)
-				return -ENODEV;
-			stk->ctl_base = pdev->resource[1]->start;
-		} else if ((io->nwin == 1) && (io->win[0].len >= 16)) {
-			pdev->resource[0]->end = io->win[0].len;
-			pdev->resource[1]->end = 0;
-			if (pcmcia_request_io(pdev) != 0)
-				return -ENODEV;
-			stk->ctl_base = pdev->resource[0]->start + 0x0e;
-		} else
+	int *is_kme = priv_data;
+
+	if (!(pdev->resource[0]->flags & IO_DATA_PATH_WIDTH_8)) {
+		pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	}
+	pdev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	if (pdev->resource[1]->end) {
+		pdev->resource[0]->end = 8;
+		pdev->resource[1]->end = (*is_kme) ? 2 : 1;
+	} else {
+		if (pdev->resource[0]->end < 16)
 			return -ENODEV;
-		/* If we've got this far, we're done */
-		return 0;
 	}
-	return -ENODEV;
+
+	return pcmcia_request_io(pdev);
 }
 
 /**
@@ -220,7 +201,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 {
 	struct ata_host *host;
 	struct ata_port *ap;
-	struct pcmcia_config_check *stk = NULL;
 	int is_kme = 0, ret = -ENOMEM, p;
 	unsigned long io_base, ctl_base;
 	void __iomem *io_addr, *ctl_addr;
@@ -228,10 +208,8 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 	struct ata_port_operations *ops = &pcmcia_port_ops;
 
 	/* Set up attributes in order to probe card and get resources */
-	pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-	pdev->config_flags |= CONF_ENABLE_IRQ;
-	pdev->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	pdev->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO |
+		CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
 
 	/* See if we have a manufacturer identifier. Use it to set is_kme for
 	   vendor quirks */
@@ -239,21 +217,17 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 		  ((pdev->card_id == PRODID_KME_KXLC005_A) ||
 		   (pdev->card_id == PRODID_KME_KXLC005_B)));
 
-	/* Allocate resoure probing structures */
-
-	stk = kzalloc(sizeof(*stk), GFP_KERNEL);
-	if (!stk)
-		goto out1;
-	stk->is_kme = is_kme;
-	io_base = ctl_base = 0;
-
-	if (pcmcia_loop_config(pdev, pcmcia_check_one_config, stk)) {
+	if (pcmcia_loop_config(pdev, pcmcia_check_one_config, &is_kme)) {
 		pdev->config_flags &= ~CONF_AUTO_CHECK_VCC;
-		if (pcmcia_loop_config(pdev, pcmcia_check_one_config, stk))
+		if (pcmcia_loop_config(pdev, pcmcia_check_one_config, &is_kme))
 			goto failed; /* No suitable config found */
 	}
 	io_base = pdev->resource[0]->start;
-	ctl_base = stk->ctl_base;
+	if (pdev->resource[1]->end)
+		ctl_base = pdev->resource[1]->start;
+	else
+		ctl_base = pdev->resource[0]->start + 0x0e;
+
 	if (!pdev->irq)
 		goto failed;
 
@@ -310,13 +284,10 @@ static int pcmcia_init_one(struct pcmcia_device *pdev)
 		goto failed;
 
 	pdev->priv = host;
-	kfree(stk);
 	return 0;
 
 failed:
-	kfree(stk);
 	pcmcia_disable_device(pdev);
-out1:
 	return ret;
 }
 
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 97338a3aae1a..8b8be35fe312 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -656,10 +656,8 @@ static int bt3c_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	link->resource[0]->end = 8;
-
-	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
+		CONF_AUTO_SET_IO;
 
 	return bt3c_config(link);
 }
@@ -673,38 +671,41 @@ static void bt3c_detach(struct pcmcia_device *link)
 	kfree(info);
 }
 
-static int bt3c_check_config(struct pcmcia_device *p_dev,
-			     cistpl_cftable_entry_t *cf,
-			     cistpl_cftable_entry_t *dflt,
-			     void *priv_data)
+static int bt3c_check_config(struct pcmcia_device *p_dev, void *priv_data)
 {
-	unsigned long try = (unsigned long) priv_data;
-	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
+	int *try = priv_data;
 
-	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
-	    (cf->io.win[0].base != 0)) {
-		p_dev->resource[0]->start = cf->io.win[0].base;
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -ENODEV;
+	if (try == 0)
+		p_dev->io_lines = 16;
+
+	if ((p_dev->resource[0]->end != 8) || (p_dev->resource[0]->start == 0))
+		return -EINVAL;
+
+	p_dev->resource[0]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int bt3c_check_config_notpicky(struct pcmcia_device *p_dev,
-				      cistpl_cftable_entry_t *cf,
-				      cistpl_cftable_entry_t *dflt,
 				      void *priv_data)
 {
 	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	int j;
 
-	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
-		for (j = 0; j < 5; j++) {
-			p_dev->resource[0]->start = base[j];
-			p_dev->io_lines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev))
-				return 0;
-		}
+	if (p_dev->io_lines > 3)
+		return -ENODEV;
+
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = 8;
+
+	for (j = 0; j < 5; j++) {
+		p_dev->resource[0]->start = base[j];
+		p_dev->io_lines = base[j] ? 16 : 3;
+		if (!pcmcia_request_io(p_dev))
+			return 0;
 	}
 	return -ENODEV;
 }
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 8a6864fc8c38..9f9bb69dc0a2 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -585,10 +585,8 @@ static int btuart_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	link->resource[0]->end = 8;
-
-	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
+		CONF_AUTO_SET_IO;
 
 	return btuart_config(link);
 }
@@ -602,38 +600,41 @@ static void btuart_detach(struct pcmcia_device *link)
 	kfree(info);
 }
 
-static int btuart_check_config(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cf,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int btuart_check_config(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int *try = priv_data;
-	p_dev->io_lines = (try == 0) ? 16 : cf->io.flags & CISTPL_IO_LINES_MASK;
 
-	if ((cf->io.nwin > 0) && (cf->io.win[0].len == 8) &&
-	    (cf->io.win[0].base != 0)) {
-		p_dev->resource[0]->start = cf->io.win[0].base;
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -ENODEV;
+	if (try == 0)
+		p_dev->io_lines = 16;
+
+	if ((p_dev->resource[0]->end != 8) || (p_dev->resource[0]->start == 0))
+		return -EINVAL;
+
+	p_dev->resource[0]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int btuart_check_config_notpicky(struct pcmcia_device *p_dev,
-					cistpl_cftable_entry_t *cf,
-					cistpl_cftable_entry_t *dflt,
 					void *priv_data)
 {
 	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	int j;
 
-	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
-		for (j = 0; j < 5; j++) {
-			p_dev->resource[0]->start = base[j];
-			p_dev->io_lines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev))
-				return 0;
-		}
+	if (p_dev->io_lines > 3)
+		return -ENODEV;
+
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = 8;
+
+	for (j = 0; j < 5; j++) {
+		p_dev->resource[0]->start = base[j];
+		p_dev->io_lines = base[j] ? 16 : 3;
+		if (!pcmcia_request_io(p_dev))
+			return 0;
 	}
 	return -ENODEV;
 }
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 4620cc398676..12cd177132fc 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -571,10 +571,7 @@ static int dtl1_probe(struct pcmcia_device *link)
 	info->p_dev = link;
 	link->priv = info;
 
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	link->resource[0]->end = 8;
-
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
 	return dtl1_config(link);
 }
@@ -589,17 +586,14 @@ static void dtl1_detach(struct pcmcia_device *link)
 	kfree(info);
 }
 
-static int dtl1_confcheck(struct pcmcia_device *p_dev,
-			  cistpl_cftable_entry_t *cf,
-			  cistpl_cftable_entry_t *dflt,
-			  void *priv_data)
+static int dtl1_confcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if ((cf->io.nwin != 1) || (cf->io.win[0].len <= 8))
+	if ((p_dev->resource[1]->end) || (p_dev->resource[1]->end < 8))
 		return -ENODEV;
 
-	p_dev->resource[0]->start = cf->io.win[0].base;
-	p_dev->resource[0]->end = cf->io.win[0].len;	/*yo */
-	p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+
 	return pcmcia_request_io(p_dev);
 }
 
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 0b2f3b9d261a..79de9ccb8caf 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1741,19 +1741,8 @@ static void cmm_cm4000_release(struct pcmcia_device * link)
 
 /*==== Interface to PCMCIA Layer =======================================*/
 
-static int cm4000_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cfg,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int cm4000_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (!cfg->io.nwin)
-		return -ENODEV;
-
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
-	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
-	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-
 	return pcmcia_request_io(p_dev);
 }
 
@@ -1761,6 +1750,8 @@ static int cm4000_config(struct pcmcia_device * link, int devno)
 {
 	struct cm4000_dev *dev;
 
+	link->config_flags |= CONF_AUTO_SET_IO;
+
 	/* read the config-tuples */
 	if (pcmcia_loop_config(link, cm4000_config_check, NULL))
 		goto cs_release;
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index acf88d5c72b2..bf012d228a9e 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -515,25 +515,9 @@ static void cm4040_reader_release(struct pcmcia_device *link)
 	return;
 }
 
-static int cm4040_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cfg,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int cm4040_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	int rc;
-	if (!cfg->io.nwin)
-		return -ENODEV;
-
-	/* Get the IOaddr */
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
-	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
-	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-	rc = pcmcia_request_io(p_dev);
-
-	dev_printk(KERN_INFO, &p_dev->dev,
-		   "pcmcia_request_io returned 0x%x\n", rc);
-	return rc;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -542,6 +526,8 @@ static int reader_config(struct pcmcia_device *link, int devno)
 	struct reader_dev *dev;
 	int fail_rc;
 
+	link->config_flags |= CONF_AUTO_SET_IO;
+
 	if (pcmcia_loop_config(link, cm4040_config_check, NULL))
 		goto cs_release;
 
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 1b7f0920737b..594c23be69f5 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -75,22 +75,18 @@ static void signalled_reboot_callback(void *callback_data)
 	schedule_work(&ipw->work_reboot);
 }
 
-static int ipwireless_probe(struct pcmcia_device *p_dev,
-			    cistpl_cftable_entry_t *cfg,
-			    cistpl_cftable_entry_t *dflt,
-			    void *priv_data)
+static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
 {
 	struct ipw_dev *ipw = priv_data;
 	struct resource *io_resource;
 	int ret;
 
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
 
 	/* 0x40 causes it to generate level mode interrupts. */
 	/* 0x04 enables IREQ pin. */
-	p_dev->config_index = cfg->index | 0x44;
+	p_dev->config_index |= 0x44;
 	p_dev->io_lines = 16;
 	ret = pcmcia_request_io(p_dev);
 	if (ret)
@@ -100,26 +96,18 @@ static int ipwireless_probe(struct pcmcia_device *p_dev,
 				resource_size(p_dev->resource[0]),
 				IPWIRELESS_PCCARD_NAME);
 
-	if (cfg->mem.nwin == 0)
-		return 0;
-
 	p_dev->resource[2]->flags |=
 		WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE;
-	p_dev->resource[2]->start = cfg->mem.win[0].host_addr;
-	p_dev->resource[2]->end = cfg->mem.win[0].len;
-	if (p_dev->resource[2]->end < 0x1000)
-		p_dev->resource[2]->end = 0x1000;
 
 	ret = pcmcia_request_window(p_dev, p_dev->resource[2], 0);
 	if (ret != 0)
 		goto exit1;
 
-	ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2],
-				cfg->mem.win[0].card_addr);
+	ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr);
 	if (ret != 0)
 		goto exit2;
 
-	ipw->is_v2_card = cfg->mem.win[0].len == 0x100;
+	ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100;
 
 	ipw->attr_memory = ioremap(p_dev->resource[2]->start,
 				resource_size(p_dev->resource[2]));
@@ -165,13 +153,13 @@ static int config_ipwireless(struct ipw_dev *ipw)
 	int ret = 0;
 
 	ipw->is_v2_card = 0;
+	link->config_flags |= CONF_AUTO_SET_IO | CONF_AUTO_SET_IOMEM |
+		CONF_ENABLE_IRQ;
 
 	ret = pcmcia_loop_config(link, ipwireless_probe, ipw);
 	if (ret != 0)
 		return ret;
 
-	link->config_flags |= CONF_ENABLE_IRQ;
-
 	INIT_WORK(&ipw->work_reboot, signalled_reboot_work);
 
 	ipwireless_init_hardware_v1(ipw->hardware, link->resource[0]->start,
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index c701434f76b7..a343b8f817e4 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -561,19 +561,8 @@ static int mgslpc_probe(struct pcmcia_device *link)
 /* Card has been inserted.
  */
 
-static int mgslpc_ioprobe(struct pcmcia_device *p_dev,
-			  cistpl_cftable_entry_t *cfg,
-			  cistpl_cftable_entry_t *dflt,
-			  void *priv_data)
+static int mgslpc_ioprobe(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (!cfg->io.nwin)
-		return -ENODEV;
-
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
-	p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(cfg->io.flags);
-	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
-
 	return pcmcia_request_io(p_dev);
 }
 
@@ -585,11 +574,12 @@ static int mgslpc_config(struct pcmcia_device *link)
     if (debug_level >= DEBUG_LEVEL_INFO)
 	    printk("mgslpc_config(0x%p)\n", link);
 
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
     ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL);
     if (ret != 0)
 	    goto failed;
 
-    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 8;
     link->config_regs = PRESENT_OPTION;
 
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 25b8a105a98d..c389d9a28881 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -96,10 +96,8 @@ static int ide_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = info;
 
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->config_flags |= CONF_ENABLE_IRQ;
-    link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO |
+	    CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
 
     return ide_config(link);
 } /* ide_attach */
@@ -194,52 +192,31 @@ out_release:
 
 ======================================================================*/
 
-struct pcmcia_config_check {
-	unsigned long ctl_base;
-	int is_kme;
-};
-
-static int pcmcia_check_one_config(struct pcmcia_device *pdev,
-				   cistpl_cftable_entry_t *cfg,
-				   cistpl_cftable_entry_t *dflt,
-				   void *priv_data)
+static int pcmcia_check_one_config(struct pcmcia_device *pdev, void *priv_data)
 {
-	struct pcmcia_config_check *stk = priv_data;
-
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		pdev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		pdev->config_index = cfg->index;
-		pdev->resource[0]->start = io->win[0].base;
-		if (!(io->flags & CISTPL_IO_16BIT)) {
-			pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-			pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-		}
-		if (io->nwin == 2) {
-			pdev->resource[0]->end = 8;
-			pdev->resource[1]->start = io->win[1].base;
-			pdev->resource[1]->end = (stk->is_kme) ? 2 : 1;
-			if (pcmcia_request_io(pdev) != 0)
-				return -ENODEV;
-			stk->ctl_base = pdev->resource[1]->start;
-		} else if ((io->nwin == 1) && (io->win[0].len >= 16)) {
-			pdev->resource[0]->end = io->win[0].len;
-			pdev->resource[1]->end = 0;
-			if (pcmcia_request_io(pdev) != 0)
-				return -ENODEV;
-			stk->ctl_base = pdev->resource[0]->start + 0x0e;
-		} else
+	int *is_kme = priv_data;
+
+	if (!(pdev->resource[0]->flags & IO_DATA_PATH_WIDTH_8)) {
+		pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+	}
+	pdev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	if (pdev->resource[1]->end) {
+		pdev->resource[0]->end = 8;
+		pdev->resource[1]->end = (*is_kme) ? 2 : 1;
+	} else {
+		if (pdev->resource[0]->end < 16)
 			return -ENODEV;
-		/* If we've got this far, we're done */
-		return 0;
 	}
-	return -ENODEV;
+
+	return pcmcia_request_io(pdev);
 }
 
 static int ide_config(struct pcmcia_device *link)
 {
     ide_info_t *info = link->priv;
-    struct pcmcia_config_check *stk = NULL;
     int ret = 0, is_kme = 0;
     unsigned long io_base, ctl_base;
     struct ide_host *host;
@@ -250,19 +227,16 @@ static int ide_config(struct pcmcia_device *link)
 	      ((link->card_id == PRODID_KME_KXLC005_A) ||
 	       (link->card_id == PRODID_KME_KXLC005_B)));
 
-    stk = kzalloc(sizeof(*stk), GFP_KERNEL);
-    if (!stk)
-	    goto err_mem;
-    stk->is_kme = is_kme;
-    io_base = ctl_base = 0;
-
-    if (pcmcia_loop_config(link, pcmcia_check_one_config, stk)) {
+    if (pcmcia_loop_config(link, pcmcia_check_one_config, &is_kme)) {
 	    link->config_flags &= ~CONF_AUTO_CHECK_VCC;
-	    if (pcmcia_loop_config(link, pcmcia_check_one_config, stk))
+	    if (pcmcia_loop_config(link, pcmcia_check_one_config, &is_kme))
 		    goto failed; /* No suitable config found */
     }
     io_base = link->resource[0]->start;
-    ctl_base = stk->ctl_base;
+    if (link->resource[1]->end)
+	    ctl_base = link->resource[1]->start;
+    else
+	    ctl_base = link->resource[0]->start + 0x0e;
 
     if (!link->irq)
 	    goto failed;
@@ -294,15 +268,9 @@ static int ide_config(struct pcmcia_device *link)
 	    'a' + host->ports[0]->index * 2,
 	    link->vpp / 10, link->vpp % 10);
 
-    kfree(stk);
     return 0;
 
-err_mem:
-    printk(KERN_NOTICE "ide-cs: ide_config failed memory allocation\n");
-    goto failed;
-
 failed:
-    kfree(stk);
     ide_release(link);
     return -ENODEV;
 } /* ide_config */
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 9dbab9c99bea..403a995bec95 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -72,13 +72,8 @@ static void avmcs_detach(struct pcmcia_device *p_dev);
 
 static int avmcs_probe(struct pcmcia_device *p_dev)
 {
-
-    /* The io structure describes IO port mapping */
-    p_dev->resource[0]->end = 16;
-    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-
     /* General socket configuration */
-    p_dev->config_flags |= CONF_ENABLE_IRQ;
+    p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
     p_dev->config_index = 1;
     p_dev->config_regs = PRESENT_OPTION;
 
@@ -107,16 +102,12 @@ static void avmcs_detach(struct pcmcia_device *link)
     
 ======================================================================*/
 
-static int avmcs_configcheck(struct pcmcia_device *p_dev,
-			     cistpl_cftable_entry_t *cf,
-			     cistpl_cftable_entry_t *dflt,
-			     void *priv_data)
+static int avmcs_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cf->io.nwin <= 0)
-		return -ENODEV;
+	p_dev->resource[0]->end = 16;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
-	p_dev->resource[0]->start = cf->io.win[0].base;
-	p_dev->resource[0]->end = cf->io.win[0].len;
 	return pcmcia_request_io(p_dev);
 }
 
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 2f2b0005f07b..cb09f0cacd12 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -76,14 +76,8 @@ static int __devinit avma1cs_probe(struct pcmcia_device *p_dev)
 {
     dev_dbg(&p_dev->dev, "avma1cs_attach()\n");
 
-    /* The io structure describes IO port mapping */
-    p_dev->resource[0]->end = 16;
-    p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-    p_dev->resource[1]->end = 16;
-    p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_16;
-
     /* General socket configuration */
-    p_dev->config_flags |= CONF_ENABLE_IRQ;
+    p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
     p_dev->config_index = 1;
     p_dev->config_regs = PRESENT_OPTION;
 
@@ -114,17 +108,13 @@ static void __devexit avma1cs_detach(struct pcmcia_device *link)
     
 ======================================================================*/
 
-static int avma1cs_configcheck(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cf,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int avma1cs_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cf->io.nwin <= 0)
-		return -ENODEV;
-
-	p_dev->resource[0]->start = cf->io.win[0].base;
-	p_dev->resource[0]->end = cf->io.win[0].len;
+	p_dev->resource[0]->end = 16;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 	p_dev->io_lines = 5;
+
 	return pcmcia_request_io(p_dev);
 }
 
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index 0a65280be8d5..f203a52aab2a 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -118,16 +118,6 @@ static int __devinit elsa_cs_probe(struct pcmcia_device *link)
 
     local->cardnr = -1;
 
-    /*
-      General socket configuration defaults can go here.  In this
-      client, we assume very little, and rely on the CIS for almost
-      everything.  In most clients, many details (i.e., number, sizes,
-      and attributes of IO windows) are fixed by the nature of the
-      device, and can be hard-wired here.
-    */
-    link->resource[0]->end = 8;
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-
     return elsa_cs_config(link);
 } /* elsa_cs_attach */
 
@@ -160,18 +150,17 @@ static void __devexit elsa_cs_detach(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static int elsa_cs_configcheck(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cf,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int elsa_cs_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int j;
 
 	p_dev->io_lines = 3;
+	p_dev->resource[0]->end = 8;
+	p_dev->resource[0]->flags &= IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
-	if ((cf->io.nwin > 0) && cf->io.win[0].base) {
+	if ((p_dev->resource[0]->end) && p_dev->resource[0]->start) {
 		printk(KERN_INFO "(elsa_cs: looks like the 96 model)\n");
-		p_dev->resource[0]->start = cf->io.win[0].base;
 		if (!pcmcia_request_io(p_dev))
 			return 0;
 	} else {
@@ -194,6 +183,8 @@ static int __devinit elsa_cs_config(struct pcmcia_device *link)
     dev_dbg(&link->dev, "elsa_config(0x%p)\n", link);
     dev = link->priv;
 
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
     i = pcmcia_loop_config(link, elsa_cs_configcheck, NULL);
     if (i != 0)
 	goto failed;
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index b69eccfdbb05..a88c88f6cdeb 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -128,8 +128,6 @@ static int __devinit sedlbauer_probe(struct pcmcia_device *link)
     /* from old sedl_cs 
     */
     /* The io structure describes IO port mapping */
-    link->resource[0]->end = 8;
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
 
     return sedlbauer_config(link);
 } /* sedlbauer_attach */
@@ -161,35 +159,13 @@ static void __devexit sedlbauer_detach(struct pcmcia_device *link)
     device available to the system.
     
 ======================================================================*/
-static int sedlbauer_config_check(struct pcmcia_device *p_dev,
-				  cistpl_cftable_entry_t *cfg,
-				  cistpl_cftable_entry_t *dflt,
-				  void *priv_data)
+static int sedlbauer_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-					pcmcia_io_cfg_data_width(io->flags);
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		p_dev->io_lines = 3;
-		if (pcmcia_request_io(p_dev) != 0)
-			return -ENODEV;
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	return 0;
+	p_dev->io_lines = 3;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -202,7 +178,7 @@ static int __devinit sedlbauer_config(struct pcmcia_device *link)
     dev_dbg(&link->dev, "sedlbauer_config(0x%p)\n", link);
 
     link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_CHECK_VCC |
-	    CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO;
+	    CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO | CONF_AUTO_SET_IO;
 
     /*
       In this loop, we scan the CIS for configuration table entries,
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 6605480aa37f..05a5631963bb 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -105,10 +105,7 @@ static int __devinit teles_probe(struct pcmcia_device *link)
       and attributes of IO windows) are fixed by the nature of the
       device, and can be hard-wired here.
     */
-    link->resource[0]->end = 96;
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-
-    link->config_flags |= CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
     return teles_cs_config(link);
 } /* teles_attach */
@@ -142,18 +139,17 @@ static void __devexit teles_detach(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static int teles_cs_configcheck(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cf,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int teles_cs_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int j;
 
 	p_dev->io_lines = 5;
+	p_dev->resource[0]->end = 96;
+	p_dev->resource[0]->flags &= IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
-	if ((cf->io.nwin > 0) && cf->io.win[0].base) {
+	if ((p_dev->resource[0]->end) && p_dev->resource[0]->start) {
 		printk(KERN_INFO "(teles_cs: looks like the 96 model)\n");
-		p_dev->resource[0]->start = cf->io.win[0].base;
 		if (!pcmcia_request_io(p_dev))
 			return 0;
 	} else {
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 17f1040e255e..9d9d997f2e59 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -284,34 +284,16 @@ static int try_io_port(struct pcmcia_device *link)
     }
 }
 
-static int axnet_configcheck(struct pcmcia_device *p_dev,
-			     cistpl_cftable_entry_t *cfg,
-			     cistpl_cftable_entry_t *dflt,
-			     void *priv_data)
+static int axnet_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
-	int i;
-	cistpl_io_t *io = &cfg->io;
-
-	if (cfg->index == 0 || cfg->io.nwin == 0)
-		return -ENODEV;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
 	p_dev->config_index = 0x05;
-	/* For multifunction cards, by convention, we configure the
-	   network function with window 0, and serial with window 1 */
-	if (io->nwin > 1) {
-		i = (io->win[1].len > io->win[0].len);
-		p_dev->resource[1]->start = io->win[1-i].base;
-		p_dev->resource[1]->end = io->win[1-i].len;
-	} else {
-		i = p_dev->resource[1]->end = 0;
-	}
-	p_dev->resource[0]->start = io->win[i].base;
-	p_dev->resource[0]->end = io->win[i].len;
-	p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-	if (p_dev->resource[0]->end + p_dev->resource[1]->end >= 32)
-		return try_io_port(p_dev);
+	if (p_dev->resource[0]->end + p_dev->resource[1]->end < 32)
+		return -ENODEV;
 
-	return -ENODEV;
+	return try_io_port(p_dev);
 }
 
 static int axnet_config(struct pcmcia_device *link)
@@ -324,6 +306,7 @@ static int axnet_config(struct pcmcia_device *link)
 
     /* don't trust the CIS on this; Linksys got it wrong */
     link->config_regs = 0x63;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
     ret = pcmcia_loop_config(link, axnet_configcheck, NULL);
     if (ret != 0)
 	goto failed;
@@ -331,7 +314,6 @@ static int axnet_config(struct pcmcia_device *link)
     if (!link->irq)
 	    goto failed;
 
-    link->config_flags |= CONF_ENABLE_IRQ;
     if (resource_size(link->resource[1]) == 8)
 	link->config_flags |= CONF_ENABLE_SPKR;
     
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index c1479e3bfab4..792ab38d979c 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -319,10 +319,7 @@ static int ungermann_try_io_port(struct pcmcia_device *link)
     return ret;	/* RequestIO failed */
 }
 
-static int fmvj18x_ioprobe(struct pcmcia_device *p_dev,
-			   cistpl_cftable_entry_t *cfg,
-			   cistpl_cftable_entry_t *dflt,
-			   void *priv_data)
+static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, void *priv_data)
 {
 	return 0; /* strange, but that's what the code did already before... */
 }
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index 12b028c6abc9..ffe2587b9145 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -259,7 +259,7 @@ static int pcnet_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = dev;
 
-    link->config_flags |= CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
     dev->netdev_ops = &pcnet_netdev_ops;
 
@@ -500,42 +500,22 @@ static int try_io_port(struct pcmcia_device *link)
     }
 }
 
-static int pcnet_confcheck(struct pcmcia_device *p_dev,
-			   cistpl_cftable_entry_t *cfg,
-			   cistpl_cftable_entry_t *dflt,
-			   void *priv_data)
+static int pcnet_confcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int *priv = priv_data;
 	int try = (*priv & 0x1);
-	int i;
-	cistpl_io_t *io = &cfg->io;
 
-	if (cfg->index == 0 || cfg->io.nwin == 0)
-		return -EINVAL;
+	*priv &= (p_dev->resource[2]->end >= 0x4000) ? 0x10 : ~0x10;
 
-	/* For multifunction cards, by convention, we configure the
-	   network function with window 0, and serial with window 1 */
-	if (io->nwin > 1) {
-		i = (io->win[1].len > io->win[0].len);
-		p_dev->resource[1]->start = io->win[1-i].base;
-		p_dev->resource[1]->end = io->win[1-i].len;
-	} else {
-		i = p_dev->resource[1]->end = 0;
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	*priv &= ((cfg->mem.nwin == 1) &&
-		  (cfg->mem.win[0].len >= 0x4000)) ? 0x10 : ~0x10;
+	if (p_dev->resource[0]->end + p_dev->resource[1]->end < 32)
+		return -EINVAL;
 
-	p_dev->resource[0]->start = io->win[i].base;
-	p_dev->resource[0]->end = io->win[i].len;
-	if (!try)
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-	else
+	if (try)
 		p_dev->io_lines = 16;
-	if (p_dev->resource[0]->end + p_dev->resource[1]->end >= 32)
-		return try_io_port(p_dev);
-
-	return -EINVAL;
+	return try_io_port(p_dev);
 }
 
 static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index e127d2b546dd..a8cef28507de 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -323,9 +323,6 @@ static int smc91c92_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     spin_lock_init(&smc->lock);
-    link->resource[0]->end = 16;
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->config_flags |= CONF_ENABLE_IRQ;
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
@@ -417,18 +414,21 @@ static int mhz_3288_power(struct pcmcia_device *link)
     return 0;
 }
 
-static int mhz_mfc_config_check(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cf,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int mhz_mfc_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int k;
-	p_dev->resource[1]->start = cf->io.win[0].base;
+	p_dev->io_lines = 16;
+	p_dev->resource[1]->start = p_dev->resource[0]->start;
+	p_dev->resource[1]->end = 8;
+	p_dev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = 16;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	for (k = 0; k < 0x400; k += 0x10) {
 		if (k & 0x80)
 			continue;
 		p_dev->resource[0]->start = k ^ 0x300;
-		p_dev->io_lines = 16;
 		if (!pcmcia_request_io(p_dev))
 			return 0;
 	}
@@ -442,9 +442,8 @@ static int mhz_mfc_config(struct pcmcia_device *link)
     unsigned int offset;
     int i;
 
-    link->config_flags |= CONF_ENABLE_SPKR;
-    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->resource[1]->end = 8;
+    link->config_flags |= CONF_ENABLE_SPKR | CONF_ENABLE_IRQ |
+	    CONF_AUTO_SET_IO;
 
     /* The Megahertz combo cards have modem-like CIS entries, so
        we have to explicitly try a bunch of port combinations. */
@@ -586,13 +585,12 @@ static int mot_setup(struct pcmcia_device *link)
 
 /*====================================================================*/
 
-static int smc_configcheck(struct pcmcia_device *p_dev,
-			   cistpl_cftable_entry_t *cf,
-			   cistpl_cftable_entry_t *dflt,
-			   void *priv_data)
+static int smc_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 {
-	p_dev->resource[0]->start = cf->io.win[0].base;
-	p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
+	p_dev->resource[0]->end = 16;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+
 	return pcmcia_request_io(p_dev);
 }
 
@@ -601,7 +599,8 @@ static int smc_config(struct pcmcia_device *link)
     struct net_device *dev = link->priv;
     int i;
 
-    link->resource[0]->end = 16;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
     i = pcmcia_loop_config(link, smc_configcheck, NULL);
     if (!i)
 	    dev->base_addr = link->resource[0]->start;
@@ -634,7 +633,7 @@ static int osi_config(struct pcmcia_device *link)
     static const unsigned int com[4] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
     int i, j;
 
-    link->config_flags |= CONF_ENABLE_SPKR;
+    link->config_flags |= CONF_ENABLE_SPKR | CONF_ENABLE_IRQ;
     link->resource[0]->end = 64;
     link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
     link->resource[1]->end = 8;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 2bc2eb89c4cd..cecc07454e9e 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -528,7 +528,6 @@ xirc2ps_probe(struct pcmcia_device *link)
     link->priv = dev;
 
     /* General socket configuration */
-    link->config_flags |= CONF_ENABLE_IRQ;
     link->config_index = 1;
 
     /* Fill in card specific entries */
@@ -665,42 +664,53 @@ has_ce2_string(struct pcmcia_device * p_dev)
 }
 
 static int
-xirc2ps_config_modem(struct pcmcia_device *p_dev,
-		     cistpl_cftable_entry_t *cf,
-		     cistpl_cftable_entry_t *dflt,
-		     void *priv_data)
+xirc2ps_config_modem(struct pcmcia_device *p_dev, void *priv_data)
 {
 	unsigned int ioaddr;
 
-	if (cf->io.nwin > 0  &&  (cf->io.win[0].base & 0xf) == 8) {
-		for (ioaddr = 0x300; ioaddr < 0x400; ioaddr += 0x10) {
-			p_dev->resource[1]->start = cf->io.win[0].base;
-			p_dev->resource[0]->start = ioaddr;
-			if (!pcmcia_request_io(p_dev))
-				return 0;
-		}
+	if ((p_dev->resource[0]->start & 0xf) == 8)
+		return -ENODEV;
+
+	p_dev->resource[0]->end = 16;
+	p_dev->resource[1]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
+	p_dev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->io_lines = 10;
+
+	p_dev->resource[1]->start = p_dev->resource[0]->start;
+	for (ioaddr = 0x300; ioaddr < 0x400; ioaddr += 0x10) {
+		p_dev->resource[0]->start = ioaddr;
+		if (!pcmcia_request_io(p_dev))
+			return 0;
 	}
 	return -ENODEV;
 }
 
 static int
-xirc2ps_config_check(struct pcmcia_device *p_dev,
-		     cistpl_cftable_entry_t *cf,
-		     cistpl_cftable_entry_t *dflt,
-		     void *priv_data)
+xirc2ps_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int *pass = priv_data;
+	resource_size_t tmp = p_dev->resource[1]->start;
 
-	if (cf->io.nwin > 0 && (cf->io.win[0].base & 0xf) == 8) {
-		p_dev->resource[1]->start = cf->io.win[0].base;
-		p_dev->resource[0]->start = p_dev->resource[1]->start
-			+ (*pass ? (cf->index & 0x20 ? -24:8)
-			   : (cf->index & 0x20 ?   8:-24));
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -ENODEV;
+	tmp += (*pass ? (p_dev->config_index & 0x20 ? -24 : 8)
+		: (p_dev->config_index & 0x20 ?   8 : -24));
+
+	if ((p_dev->resource[0]->start & 0xf) == 8)
+		return -ENODEV;
+
+	p_dev->resource[0]->end = 18;
+	p_dev->resource[1]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
+	p_dev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->io_lines = 10;
 
+	p_dev->resource[1]->start = p_dev->resource[0]->start;
+	p_dev->resource[0]->start = tmp;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -803,21 +813,16 @@ xirc2ps_config(struct pcmcia_device * link)
 	goto failure;
     }
 
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
-    link->io_lines = 10;
     if (local->modem) {
 	int pass;
+	link->config_flags |= CONF_AUTO_SET_IO;
 
-	link->resource[1]->end = 8;
-	link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	if (local->dingo) {
 	    /* Take the Modem IO port from the CIS and scan for a free
 	     * Ethernet port */
-	    link->resource[0]->end = 16; /* no Mako stuff anymore */
 	    if (!pcmcia_loop_config(link, xirc2ps_config_modem, NULL))
 		    goto port_found;
 	} else {
-	    link->resource[0]->end = 18;
 	    /* We do 2 passes here: The first one uses the regular mapping and
 	     * the second tries again, thereby considering that the 32 ports are
 	     * mirrored every 32 bytes. Actually we use a mirrored port for
@@ -833,7 +838,9 @@ xirc2ps_config(struct pcmcia_device * link)
 	}
 	printk(KNOT_XIRC "no ports available\n");
     } else {
+	link->io_lines = 10;
 	link->resource[0]->end = 16;
+	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 	for (ioaddr = 0x300; ioaddr < 0x400; ioaddr += 0x10) {
 	    link->resource[0]->start = ioaddr;
 	    if (!(err = pcmcia_request_io(link)))
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 63bf662e9c7f..77682f27772b 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -137,36 +137,12 @@ static void airo_detach(struct pcmcia_device *link)
 
   ======================================================================*/
 
-static int airo_cs_config_check(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int airo_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-					pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-	}
-
-	/* This reserves IO space but doesn't actually enable it */
-	if (pcmcia_request_io(p_dev) != 0)
-		return -ENODEV;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* If we got this far, we're cool! */
-	return 0;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -180,7 +156,7 @@ static int airo_config(struct pcmcia_device *link)
 	dev_dbg(&link->dev, "airo_config\n");
 
 	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
-		CONF_AUTO_AUDIO;
+		CONF_AUTO_AUDIO | CONF_AUTO_SET_IO;
 
 	/*
 	 * In this loop, we scan the CIS for configuration table
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 812decd3fbe9..202938022112 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -154,31 +154,11 @@ static int card_present(void *arg)
 	return 0;
 }
 
-static int atmel_config_check(struct pcmcia_device *p_dev,
-			      cistpl_cftable_entry_t *cfg,
-			      cistpl_cftable_entry_t *dflt,
-			      void *priv_data)
+static int atmel_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-					pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* This reserves IO space but doesn't actually enable it */
 	return pcmcia_request_io(p_dev);
 }
 
@@ -194,7 +174,7 @@ static int atmel_config(struct pcmcia_device *link)
 	dev_dbg(&link->dev, "atmel_config\n");
 
 	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
-		CONF_AUTO_AUDIO;
+		CONF_AUTO_AUDIO | CONF_AUTO_SET_IO;
 
 	/*
 	  In this loop, we scan the CIS for configuration table entries,
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index d4f19af1757f..e57b20134d39 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -469,46 +469,11 @@ static void prism2_detach(struct pcmcia_device *link)
 /* run after a CARD_INSERTION event is received to configure the PCMCIA
  * socket and make the device available to the system */
 
-static int prism2_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cfg,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int prism2_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	PDEBUG(DEBUG_EXTRA, "Checking CFTABLE_ENTRY 0x%02X "
-	       "(default 0x%02X)\n", cfg->index, dflt->index);
-
-	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp = cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp = dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	PDEBUG(DEBUG_EXTRA, "IO window settings: cfg->io.nwin=%d "
-	       "dflt->io.nwin=%d\n",
-	       cfg->io.nwin, dflt->io.nwin);
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-					pcmcia_io_cfg_data_width(io->flags);
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* This reserves IO space but doesn't actually enable it */
 	return pcmcia_request_io(p_dev);
 }
 
@@ -531,7 +496,7 @@ static int prism2_config(struct pcmcia_device *link)
 
 	/* Look for an appropriate configuration table entry in the CIS */
 	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO |
-		CONF_AUTO_CHECK_VCC;
+		CONF_AUTO_CHECK_VCC | CONF_AUTO_SET_IO | CONF_ENABLE_IRQ;
 	if (ignore_cis_vcc)
 		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, prism2_config_check, NULL);
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 031f3e6da3c9..2c6f28ac5197 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -794,20 +794,12 @@ static void if_cs_release(struct pcmcia_device *p_dev)
  * insertion event.
  */
 
-static int if_cs_ioprobe(struct pcmcia_device *p_dev,
-			 cistpl_cftable_entry_t *cfg,
-			 cistpl_cftable_entry_t *dflt,
-			 void *priv_data)
+static int if_cs_ioprobe(struct pcmcia_device *p_dev, void *priv_data)
 {
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
 	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
 
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	if (cfg->io.nwin != 1) {
+	if (p_dev->resource[1]->end) {
 		lbs_pr_err("wrong CIS (check number of IO windows)\n");
 		return -ENODEV;
 	}
@@ -833,6 +825,8 @@ static int if_cs_probe(struct pcmcia_device *p_dev)
 	card->p_dev = p_dev;
 	p_dev->priv = card;
 
+	p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
 	if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) {
 		lbs_pr_err("error in pcmcia_loop_config\n");
 		goto out1;
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index b92173827b55..263dfe9e0e30 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -142,42 +142,12 @@ static void orinoco_cs_detach(struct pcmcia_device *link)
  * device available to the system.
  */
 
-static int orinoco_cs_config_check(struct pcmcia_device *p_dev,
-				   cistpl_cftable_entry_t *cfg,
-				   cistpl_cftable_entry_t *dflt,
-				   void *priv_data)
+static int orinoco_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		goto next_entry;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			goto next_entry;
-	}
-	return 0;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-next_entry:
-	pcmcia_disable_device(p_dev);
-	return -ENODEV;
+	return pcmcia_request_io(p_dev);
 };
 
 static int
@@ -202,7 +172,8 @@ orinoco_cs_config(struct pcmcia_device *link)
 	 * and most client drivers will only use the CIS to fill in
 	 * implementation-defined details.
 	 */
-	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC |
+		CONF_AUTO_SET_IO | CONF_ENABLE_IRQ;
 	if (ignore_cis_vcc)
 		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL);
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index f462c78856e9..78446507873f 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -205,48 +205,12 @@ static void spectrum_cs_detach(struct pcmcia_device *link)
  */
 
 static int spectrum_cs_config_check(struct pcmcia_device *p_dev,
-				    cistpl_cftable_entry_t *cfg,
-				    cistpl_cftable_entry_t *dflt,
 				    void *priv_data)
 {
-	if (cfg->index == 0)
-		goto next_entry;
-
-	if (cfg->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			cfg->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-	else if (dflt->vpp1.present & (1 << CISTPL_POWER_VNOM))
-		p_dev->vpp =
-			dflt->vpp1.param[CISTPL_POWER_VNOM] / 10000;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			goto next_entry;
-	}
-	return 0;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-next_entry:
-	pcmcia_disable_device(p_dev);
-	return -ENODEV;
+	return pcmcia_request_io(p_dev);
 };
 
 static int
@@ -271,7 +235,8 @@ spectrum_cs_config(struct pcmcia_device *link)
 	 * and most client drivers will only use the CIS to fill in
 	 * implementation-defined details.
 	 */
-	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC |
+		CONF_AUTO_SET_IO | CONF_ENABLE_IRQ;
 	if (ignore_cis_vcc)
 		link->config_flags &= ~CONF_AUTO_CHECK_VCC;
 	ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL);
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index 8c2a4733bc19..3730184a04a3 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -100,9 +100,7 @@ static int parport_probe(struct pcmcia_device *link)
     link->priv = info;
     info->p_dev = link;
 
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-    link->config_flags |= CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
     return parport_config(link);
 } /* parport_attach */
@@ -133,27 +131,14 @@ static void parport_detach(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static int parport_config_check(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int parport_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		if (epp_mode)
-			p_dev->config_index |= FORCE_EPP_MODE;
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin == 2) {
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		if (pcmcia_request_io(p_dev) != 0)
-			return -ENODEV;
-		return 0;
-	}
-	return -ENODEV;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int parport_config(struct pcmcia_device *link)
@@ -164,6 +149,9 @@ static int parport_config(struct pcmcia_device *link)
 
     dev_dbg(&link->dev, "parport_config\n");
 
+    if (epp_mode)
+	    link->config_index |= FORCE_EPP_MODE;
+
     ret = pcmcia_loop_config(link, parport_config_check, NULL);
     if (ret)
 	    goto failed;
diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c
index 160da0697335..e2c92415b892 100644
--- a/drivers/pcmcia/pcmcia_cis.c
+++ b/drivers/pcmcia/pcmcia_cis.c
@@ -6,7 +6,7 @@
  * are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
  *
  * Copyright (C) 1999	     David A. Hinds
- * Copyright (C) 2004-2009   Dominik Brodowski
+ * Copyright (C) 2004-2010   Dominik Brodowski
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -125,13 +125,24 @@ next_entry:
 	return ret;
 }
 
+
+/**
+ * pcmcia_io_cfg_data_width() - convert cfgtable to data path width parameter
+ */
+static int pcmcia_io_cfg_data_width(unsigned int flags)
+{
+	if (!(flags & CISTPL_IO_8BIT))
+		return IO_DATA_PATH_WIDTH_16;
+	if (!(flags & CISTPL_IO_16BIT))
+		return IO_DATA_PATH_WIDTH_8;
+	return IO_DATA_PATH_WIDTH_AUTO;
+}
+
+
 struct pcmcia_cfg_mem {
 	struct pcmcia_device *p_dev;
+	int (*conf_check) (struct pcmcia_device *p_dev, void *priv_data);
 	void *priv_data;
-	int (*conf_check) (struct pcmcia_device *p_dev,
-			   cistpl_cftable_entry_t *cfg,
-			   cistpl_cftable_entry_t *dflt,
-			   void *priv_data);
 	cisparse_t parse;
 	cistpl_cftable_entry_t dflt;
 };
@@ -184,16 +195,63 @@ static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv)
 	if ((flags & CONF_AUTO_AUDIO) && (cfg->flags & CISTPL_CFTABLE_AUDIO))
 		p_dev->config_flags |= CONF_ENABLE_SPKR;
 
-	return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt,
-				   cfg_mem->priv_data);
+
+	/* IO window settings? */
+	if (flags & CONF_AUTO_SET_IO) {
+		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
+		int i = 0;
+
+		p_dev->resource[0]->start = p_dev->resource[0]->end = 0;
+		p_dev->resource[1]->start = p_dev->resource[1]->end = 0;
+		if (io->nwin == 0)
+			return -ENODEV;
+
+		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+		p_dev->resource[0]->flags |=
+					pcmcia_io_cfg_data_width(io->flags);
+		if (io->nwin > 1) {
+			/* For multifunction cards, by convention, we
+			 * configure the network function with window 0,
+			 * and serial with window 1 */
+			i = (io->win[1].len > io->win[0].len);
+			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
+			p_dev->resource[1]->start = io->win[1-i].base;
+			p_dev->resource[1]->end = io->win[1-i].len;
+		}
+		p_dev->resource[0]->start = io->win[i].base;
+		p_dev->resource[0]->end = io->win[i].len;
+		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
+	}
+
+	/* MEM window settings? */
+	if (flags & CONF_AUTO_SET_IOMEM) {
+		/* so far, we only set one memory window */
+		cistpl_mem_t *mem = (cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
+
+		p_dev->resource[2]->start = p_dev->resource[2]->end = 0;
+		if (mem->nwin == 0)
+			return -ENODEV;
+
+		p_dev->resource[2]->start = mem->win[0].host_addr;
+		p_dev->resource[2]->end = mem->win[0].len;
+		if (p_dev->resource[2]->end < 0x1000)
+			p_dev->resource[2]->end = 0x1000;
+		p_dev->card_addr = mem->win[0].card_addr;
+	}
+
+	dev_dbg(&p_dev->dev,
+		"checking configuration %x: %pr %pr %pr (%d lines)\n",
+		p_dev->config_index, p_dev->resource[0], p_dev->resource[1],
+		p_dev->resource[2], p_dev->io_lines);
+
+	return cfg_mem->conf_check(p_dev, cfg_mem->priv_data);
 }
 
 /**
  * pcmcia_loop_config() - loop over configuration options
  * @p_dev:	the struct pcmcia_device which we need to loop for.
  * @conf_check:	function to call for each configuration option.
- *		It gets passed the struct pcmcia_device, the CIS data
- *		describing the configuration option, and private data
+ *		It gets passed the struct pcmcia_device and private data
  *		being passed to pcmcia_loop_config()
  * @priv_data:	private data to be passed to the conf_check function.
  *
@@ -203,8 +261,6 @@ static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv)
  */
 int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       int	(*conf_check)	(struct pcmcia_device *p_dev,
-						 cistpl_cftable_entry_t *cfg,
-						 cistpl_cftable_entry_t *dflt,
 						 void *priv_data),
 		       void *priv_data)
 {
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index 0b5fc2fa0589..bd9ce09b7ff8 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -99,9 +99,7 @@ static int aha152x_probe(struct pcmcia_device *link)
     info->p_dev = link;
     link->priv = info;
 
-    link->resource[0]->end = 0x20;
-    link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-    link->config_flags |= CONF_ENABLE_IRQ;
+    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
     link->config_regs = PRESENT_OPTION;
 
     return aha152x_config_cs(link);
@@ -121,24 +119,24 @@ static void aha152x_detach(struct pcmcia_device *link)
 
 /*====================================================================*/
 
-static int aha152x_config_check(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int aha152x_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	p_dev->io_lines = 10;
+
 	/* For New Media T&J, look for a SCSI window */
-	if (cfg->io.win[0].len >= 0x20)
-		p_dev->resource[0]->start = cfg->io.win[0].base;
-	else if ((cfg->io.nwin > 1) &&
-		 (cfg->io.win[1].len >= 0x20))
-		p_dev->resource[0]->start = cfg->io.win[1].base;
-	if ((cfg->io.nwin > 0) &&
-	    (p_dev->resource[0]->start < 0xffff)) {
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -EINVAL;
+	if ((p_dev->resource[0]->end < 0x20) &&
+		(p_dev->resource[1]->end >= 0x20))
+		p_dev->resource[0]->start = p_dev->resource[1]->start;
+
+	if (p_dev->resource[0]->start >= 0xffff)
+		return -EINVAL;
+
+	p_dev->resource[1]->start = p_dev->resource[1]->end = 0;
+	p_dev->resource[0]->end = 0x20;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int aha152x_config_cs(struct pcmcia_device *link)
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index 3b9f311cb035..f2dc627e9da2 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -82,9 +82,7 @@ static int fdomain_probe(struct pcmcia_device *link)
 
 	info->p_dev = link;
 	link->priv = info;
-	link->resource[0]->end = 0x10;
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 	link->config_regs = PRESENT_OPTION;
 
 	return fdomain_config(link);
@@ -103,13 +101,12 @@ static void fdomain_detach(struct pcmcia_device *link)
 
 /*====================================================================*/
 
-static int fdomain_config_check(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int fdomain_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	p_dev->io_lines = 10;
-	p_dev->resource[0]->start = cfg->io.win[0].base;
+	p_dev->resource[0]->end = 0x10;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 	return pcmcia_request_io(p_dev);
 }
 
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 344d49900b4c..3b90ad9d1956 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1556,13 +1556,6 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 
 	nsp_dbg(NSP_DEBUG_INIT, "info=0x%p", info);
 
-	/* The io structure describes IO port mapping */
-	link->resource[0]->end	 = 0x10;
-	link->resource[0]->flags = IO_DATA_PATH_WIDTH_AUTO;
-
-	/* General socket configuration */
-	link->config_flags	 |= CONF_ENABLE_IRQ;
-
 	ret = nsp_cs_config(link);
 
 	nsp_dbg(NSP_DEBUG_INIT, "link=0x%p", link);
@@ -1594,50 +1587,27 @@ static void nsp_cs_detach(struct pcmcia_device *link)
     ethernet device available to the system.
 ======================================================================*/
 
-static int nsp_cs_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cfg,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int nsp_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	nsp_hw_data		*data = priv_data;
 
-	if (cfg->index == 0)
+	if (p_dev->config_index == 0)
 		return -ENODEV;
 
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-					pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			goto next_entry;
-	}
+	/* This reserves IO space but doesn't actually enable it */
+	if (pcmcia_request_io(p_dev) != 0)
+		goto next_entry;
 
-	if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) {
-		cistpl_mem_t	*mem =
-			(cfg->mem.nwin) ? &cfg->mem : &dflt->mem;
+	if (resource_size(p_dev->resource[2])) {
 		p_dev->resource[2]->flags |= (WIN_DATA_WIDTH_16 |
 					WIN_MEMORY_TYPE_CM |
 					WIN_ENABLE);
-		p_dev->resource[2]->start = mem->win[0].host_addr;
-		p_dev->resource[2]->end = mem->win[0].len;
 		if (p_dev->resource[2]->end < 0x1000)
 			p_dev->resource[2]->end = 0x1000;
 		if (pcmcia_request_window(p_dev, p_dev->resource[2], 0) != 0)
 			goto next_entry;
 		if (pcmcia_map_mem_page(p_dev, p_dev->resource[2],
-						mem->win[0].card_addr) != 0)
+						p_dev->card_addr) != 0)
 			goto next_entry;
 
 		data->MmioAddress = (unsigned long)
@@ -1664,7 +1634,8 @@ static int nsp_cs_config(struct pcmcia_device *link)
 	nsp_dbg(NSP_DEBUG_INIT, "in");
 
 	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_CHECK_VCC |
-		CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO;
+		CONF_AUTO_SET_VPP | CONF_AUTO_AUDIO | CONF_AUTO_SET_IOMEM |
+		CONF_AUTO_SET_IO;
 
 	ret = pcmcia_loop_config(link, nsp_cs_config_check, data);
 	if (ret)
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index 468fd12fe98d..e8a06e3a384c 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -155,9 +155,7 @@ static int qlogic_probe(struct pcmcia_device *link)
 		return -ENOMEM;
 	info->p_dev = link;
 	link->priv = info;
-	link->resource[0]->end = 16;
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 	link->config_regs = PRESENT_OPTION;
 
 	return qlogic_config(link);
@@ -176,14 +174,11 @@ static void qlogic_detach(struct pcmcia_device *link)
 
 /*====================================================================*/
 
-static int qlogic_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cfg,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int qlogic_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	p_dev->io_lines = 10;
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
 	if (p_dev->resource[0]->start == 0)
 		return -ENODEV;
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 7a0bb9aea042..6ceb57c355fa 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -683,14 +683,11 @@ static struct scsi_host_template sym53c500_driver_template = {
      .shost_attrs		= SYM53C500_shost_attrs
 };
 
-static int SYM53C500_config_check(struct pcmcia_device *p_dev,
-				  cistpl_cftable_entry_t *cfg,
-				  cistpl_cftable_entry_t *dflt,
-				  void *priv_data)
+static int SYM53C500_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	p_dev->io_lines = 10;
-	p_dev->resource[0]->start = cfg->io.win[0].base;
-	p_dev->resource[0]->end = cfg->io.win[0].len;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
 
 	if (p_dev->resource[0]->start == 0)
 		return -ENODEV;
@@ -857,9 +854,7 @@ SYM53C500_probe(struct pcmcia_device *link)
 		return -ENOMEM;
 	info->p_dev = link;
 	link->priv = info;
-	link->resource[0]->end = 16;
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	link->config_flags |= CONF_ENABLE_IRQ;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
 	return SYM53C500_config(link);
 } /* SYM53C500_attach */
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index a796a93fe39c..422520342936 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -424,41 +424,45 @@ static int pfc_config(struct pcmcia_device *p_dev)
 	return -ENODEV;
 }
 
-static int simple_config_check(struct pcmcia_device *p_dev,
-			       cistpl_cftable_entry_t *cf,
-			       cistpl_cftable_entry_t *dflt,
-			       void *priv_data)
+static int simple_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
 	static const int size_table[2] = { 8, 16 };
 	int *try = priv_data;
 
-	p_dev->io_lines = ((*try & 0x1) == 0) ?
-			16 : cf->io.flags & CISTPL_IO_LINES_MASK;
+	if (p_dev->resource[0]->start == 0)
+		return -ENODEV;
 
-	if ((cf->io.nwin > 0) && (cf->io.win[0].len == size_table[(*try >> 1)])
-	    && (cf->io.win[0].base != 0)) {
-		p_dev->resource[0]->start = cf->io.win[0].base;
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -EINVAL;
+	if ((*try & 0x1) == 0)
+		p_dev->io_lines = 16;
+
+	if (p_dev->resource[0]->end != size_table[(*try >> 1)])
+		return -ENODEV;
+
+	p_dev->resource[0]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int simple_config_check_notpicky(struct pcmcia_device *p_dev,
-					cistpl_cftable_entry_t *cf,
-					cistpl_cftable_entry_t *dflt,
 					void *priv_data)
 {
 	static const unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
 	int j;
 
-	if ((cf->io.nwin > 0) && ((cf->io.flags & CISTPL_IO_LINES_MASK) <= 3)) {
-		for (j = 0; j < 5; j++) {
-			p_dev->resource[0]->start = base[j];
-			p_dev->io_lines = base[j] ? 16 : 3;
-			if (!pcmcia_request_io(p_dev))
-				return 0;
-		}
+	if (p_dev->io_lines > 3)
+		return -ENODEV;
+
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = 8;
+
+	for (j = 0; j < 5; j++) {
+		p_dev->resource[0]->start = base[j];
+		p_dev->io_lines = base[j] ? 16 : 3;
+		if (!pcmcia_request_io(p_dev))
+			return 0;
 	}
 	return -ENODEV;
 }
@@ -468,12 +472,9 @@ static int simple_config(struct pcmcia_device *link)
 	struct serial_info *info = link->priv;
 	int i = -ENODEV, try;
 
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	link->resource[0]->end = 8;
-
 	/* First pass: look for a config entry that looks normal.
 	 * Two tries: without IO aliases, then with aliases */
-	link->config_flags |= CONF_AUTO_SET_VPP;
+	link->config_flags |= CONF_AUTO_SET_VPP | CONF_AUTO_SET_IO;
 	for (try = 0; try < 4; try++)
 		if (!pcmcia_loop_config(link, simple_config_check, &try))
 			goto found_port;
@@ -503,43 +504,44 @@ found_port:
 	return setup_serial(link, info, link->resource[0]->start, link->irq);
 }
 
-static int multi_config_check(struct pcmcia_device *p_dev,
-			      cistpl_cftable_entry_t *cf,
-			      cistpl_cftable_entry_t *dflt,
-			      void *priv_data)
+static int multi_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	int *base2 = priv_data;
+	int *multi = priv_data;
+
+	if (p_dev->resource[1]->end)
+		return -EINVAL;
 
 	/* The quad port cards have bad CIS's, so just look for a
 	   window larger than 8 ports and assume it will be right */
-	if ((cf->io.nwin == 1) && (cf->io.win[0].len > 8)) {
-		p_dev->resource[0]->start = cf->io.win[0].base;
-		p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev)) {
-			*base2 = p_dev->resource[0]->start + 8;
-			return 0;
-		}
-	}
-	return -ENODEV;
+	if (p_dev->resource[0]->end <= 8)
+		return -EINVAL;
+
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[0]->end = *multi * 8;
+
+	if (pcmcia_request_io(p_dev))
+		return -ENODEV;
+	return 0;
 }
 
 static int multi_config_check_notpicky(struct pcmcia_device *p_dev,
-				       cistpl_cftable_entry_t *cf,
-				       cistpl_cftable_entry_t *dflt,
 				       void *priv_data)
 {
 	int *base2 = priv_data;
 
-	if (cf->io.nwin == 2) {
-		p_dev->resource[0]->start = cf->io.win[0].base;
-		p_dev->resource[1]->start = cf->io.win[1].base;
-		p_dev->io_lines = cf->io.flags & CISTPL_IO_LINES_MASK;
-		if (!pcmcia_request_io(p_dev)) {
-			*base2 = p_dev->resource[1]->start;
-			return 0;
-		}
-	}
-	return -ENODEV;
+	if (!p_dev->resource[0]->end || !p_dev->resource[1]->end)
+		return -ENODEV;
+
+	p_dev->resource[0]->end = p_dev->resource[1]->end = 8;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+
+	if (pcmcia_request_io(p_dev))
+		return -ENODEV;
+
+	*base2 = p_dev->resource[0]->start + 8;
+	return 0;
 }
 
 static int multi_config(struct pcmcia_device *link)
@@ -547,12 +549,12 @@ static int multi_config(struct pcmcia_device *link)
 	struct serial_info *info = link->priv;
 	int i, base2 = 0;
 
+	link->config_flags |= CONF_AUTO_SET_IO;
 	/* First, look for a generic full-sized window */
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	link->resource[0]->end = info->multi * 8;
-	if (pcmcia_loop_config(link, multi_config_check, &base2)) {
+	if (!pcmcia_loop_config(link, multi_config_check, &info->multi))
+		base2 = link->resource[0]->start + 8;
+	else {
 		/* If that didn't work, look for two windows */
-		link->resource[0]->end = link->resource[1]->end = 8;
 		info->multi = 2;
 		if (pcmcia_loop_config(link, multi_config_check_notpicky,
 				       &base2)) {
@@ -587,7 +589,7 @@ static int multi_config(struct pcmcia_device *link)
 		    link->config_index == 3) {
 			err = setup_serial(link, info, base2,
 					link->irq);
-			base2 = link->resource[0]->start;;
+			base2 = link->resource[0]->start;
 		} else {
 			err = setup_serial(link, info, link->resource[0]->start,
 					link->irq);
@@ -611,18 +613,18 @@ static int multi_config(struct pcmcia_device *link)
 	return 0;
 }
 
-static int serial_check_for_multi(struct pcmcia_device *p_dev,
-				  cistpl_cftable_entry_t *cf,
-				  cistpl_cftable_entry_t *dflt,
-				  void *priv_data)
+static int serial_check_for_multi(struct pcmcia_device *p_dev,  void *priv_data)
 {
 	struct serial_info *info = p_dev->priv;
 
-	if ((cf->io.nwin == 1) && (cf->io.win[0].len % 8 == 0))
-		info->multi = cf->io.win[0].len >> 3;
+	if (!p_dev->resource[0]->end)
+		return -EINVAL;
+
+	if ((!p_dev->resource[1]->end) && (p_dev->resource[0]->end % 8 == 0))
+		info->multi = p_dev->resource[0]->end >> 3;
 
-	if ((cf->io.nwin == 2) && (cf->io.win[0].len == 8) &&
-		(cf->io.win[1].len == 8))
+	if ((p_dev->resource[1]->end) && (p_dev->resource[0]->end == 8)
+		&& (p_dev->resource[1]->end == 8))
 		info->multi = 2;
 
 	return 0; /* break */
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 678fbf67d7a8..c43c68922c74 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -710,36 +710,12 @@ static void das16cs_pcmcia_detach(struct pcmcia_device *link)
 
 
 static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
 				void *priv_data)
 {
-	if (cfg->index == 0)
+	if (p_dev->config_index == 0)
 		return -EINVAL;
 
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		return pcmcia_request_io(p_dev);
-	}
-
-	return 0;
+	return pcmcia_request_io(p_dev);
 }
 
 static void das16cs_pcmcia_config(struct pcmcia_device *link)
@@ -748,6 +724,9 @@ static void das16cs_pcmcia_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "das16cs_pcmcia_config\n");
 
+	/* Do we need to allocate an interrupt? */
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
 	ret = pcmcia_loop_config(link, das16cs_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 12a96b7a43ad..d3959093381b 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -192,35 +192,12 @@ static void das08_pcmcia_detach(struct pcmcia_device *link)
 
 
 static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
 				void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		return pcmcia_request_io(p_dev);
-	}
-	return 0;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
+
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -238,6 +215,8 @@ static void das08_pcmcia_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "das08_pcmcia_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
 	ret = pcmcia_loop_config(link, das08_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index f22dc0f2a8d6..7129b0ca4c8b 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -530,35 +530,12 @@ static void dio700_cs_detach(struct pcmcia_device *link)
 ======================================================================*/
 
 static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
 				void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			return -ENODEV;
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* If we got this far, we're cool! */
-	return 0;
+	return pcmcia_request_io(p_dev);
 }
 
 static void dio700_config(struct pcmcia_device *link)
@@ -570,7 +547,8 @@ static void dio700_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "dio700_config\n");
 
-	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO |
+		CONF_AUTO_SET_IO;
 
 	ret = pcmcia_loop_config(link, dio700_pcmcia_config_loop, NULL);
 	if (ret) {
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 6dc2b06064cd..4defdda2bfaf 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -282,35 +282,12 @@ static void dio24_cs_detach(struct pcmcia_device *link)
 ======================================================================*/
 
 static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
 				void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			return -ENODEV;
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* If we got this far, we're cool! */
-	return 0;
+	return pcmcia_request_io(p_dev);
 }
 
 static void dio24_config(struct pcmcia_device *link)
@@ -321,7 +298,8 @@ static void dio24_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "dio24_config\n");
 
-	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO;
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_AUDIO |
+		CONF_AUTO_SET_IO;
 
 	ret = pcmcia_loop_config(link, dio24_pcmcia_config_loop, NULL);
 	if (ret) {
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 6eacbd70e2e9..5123b3131c51 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -261,35 +261,12 @@ static void labpc_cs_detach(struct pcmcia_device *link)
 ======================================================================*/
 
 static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
 				void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		/* This reserves IO space but doesn't actually enable it */
-		if (pcmcia_request_io(p_dev) != 0)
-			return -ENODEV;
-	}
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* If we got this far, we're cool! */
-	return 0;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -300,7 +277,7 @@ static void labpc_config(struct pcmcia_device *link)
 	dev_dbg(&link->dev, "labpc_config\n");
 
 	link->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ |
-		CONF_AUTO_AUDIO;
+		CONF_AUTO_AUDIO | CONF_AUTO_SET_IO;
 
 	ret = pcmcia_loop_config(link, labpc_pcmcia_config_loop, NULL);
 	if (ret) {
@@ -316,7 +293,6 @@ static void labpc_config(struct pcmcia_device *link)
 	   the I/O windows and the interrupt mapping, and putting the
 	   card and host interface into "Memory and IO" mode.
 	 */
-	p_dev->config_flags |= CONF_ENABLE_IRQ | CONF_ENABLE_PULSE_IRQ;
 	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index da4e2a21b19a..f1e31d3e12bc 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -262,10 +262,6 @@ static struct pcmcia_device *cur_dev = NULL;
 
 static int cs_attach(struct pcmcia_device *link)
 {
-	link->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
-	link->resource[0]->end = 16;
-	link->config_flags |= CONF_ENABLE_IRQ;
-
 	cur_dev = link;
 
 	mio_cs_config(link);
@@ -299,15 +295,12 @@ static int mio_cs_resume(struct pcmcia_device *link)
 }
 
 
-static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev, void *priv_data)
 {
 	int base, ret;
 
-	p_dev->resource[0]->end = cfg->io.win[0].len;
-	p_dev->io_lines = cfg->io.flags & CISTPL_IO_LINES_MASK;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_16;
 
 	for (base = 0x000; base < 0x400; base += 0x20) {
 		p_dev->resource[0]->start = base;
@@ -324,6 +317,7 @@ static void mio_cs_config(struct pcmcia_device *link)
 	int ret;
 
 	DPRINTK("mio_cs_config(link=%p)\n", link);
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
 
 	ret = pcmcia_loop_config(link, mio_pcmcia_config_loop, NULL);
 	if (ret) {
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index 03a72d7ac676..afd283d0e711 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1068,35 +1068,11 @@ static void daqp_cs_detach(struct pcmcia_device *link)
 ======================================================================*/
 
 
-static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev,
-				cistpl_cftable_entry_t *cfg,
-				cistpl_cftable_entry_t *dflt,
-				void *priv_data)
+static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* Do we need to allocate an interrupt? */
-	p_dev->config_flags |= CONF_ENABLE_IRQ;
-
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-		p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		p_dev->resource[0]->flags |=
-			pcmcia_io_cfg_data_width(io->flags);
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		if (io->nwin > 1) {
-			p_dev->resource[1]->flags = p_dev->resource[0]->flags;
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-	}
-
-	/* This reserves IO space but doesn't actually enable it */
 	return pcmcia_request_io(p_dev);
 }
 
@@ -1106,6 +1082,8 @@ static void daqp_cs_config(struct pcmcia_device *link)
 
 	dev_dbg(&link->dev, "daqp_cs_config\n");
 
+	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO;
+
 	ret = pcmcia_loop_config(link, daqp_pcmcia_config_loop, NULL);
 	if (ret) {
 		dev_warn(&link->dev, "no configuration found\n");
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index 670a76bf5164..76edd39525de 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -31,8 +31,6 @@ static int ixj_probe(struct pcmcia_device *p_dev)
 {
 	dev_dbg(&p_dev->dev, "ixj_attach()\n");
 	/* Create new ixj device */
-	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
 	p_dev->priv = kzalloc(sizeof(struct ixj_info_t), GFP_KERNEL);
 	if (!p_dev->priv) {
 		return -ENOMEM;
@@ -109,36 +107,28 @@ failed:
 	return;
 }
 
-static int ixj_config_check(struct pcmcia_device *p_dev,
-			    cistpl_cftable_entry_t *cfg,
-			    cistpl_cftable_entry_t *dflt,
-			    void *priv_data)
+static int ixj_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-		p_dev->io_lines = 3;
-		if (io->nwin == 2) {
-			p_dev->resource[1]->start = io->win[1].base;
-			p_dev->resource[1]->end = io->win[1].len;
-		}
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -ENODEV;
+	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
+	p_dev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
+	p_dev->io_lines = 3;
+
+	return pcmcia_request_io(p_dev);
 }
 
 static int ixj_config(struct pcmcia_device * link)
 {
 	IXJ *j;
 	ixj_info_t *info;
-	cistpl_cftable_entry_t dflt = { 0 };
 
 	info = link->priv;
 	dev_dbg(&link->dev, "ixj_config\n");
 
-	if (pcmcia_loop_config(link, ixj_config_check, &dflt))
+	link->config_flags = CONF_AUTO_SET_IO;
+
+	if (pcmcia_loop_config(link, ixj_config_check, NULL))
 		goto failed;
 
 	if (pcmcia_enable_device(link))
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index d9606293c1af..81d7eeacdf6a 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -131,28 +131,12 @@ static void sl811_cs_release(struct pcmcia_device * link)
 	platform_device_unregister(&platform_dev);
 }
 
-static int sl811_cs_config_check(struct pcmcia_device *p_dev,
-				 cistpl_cftable_entry_t *cfg,
-				 cistpl_cftable_entry_t *dflt,
-				 void *priv_data)
+static int sl811_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
 {
-	if (cfg->index == 0)
-		return -ENODEV;
+	if (p_dev->config_index == 0)
+		return -EINVAL;
 
-	/* IO window settings */
-	p_dev->resource[0]->end = p_dev->resource[1]->end = 0;
-	if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) {
-		cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io;
-		p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK;
-
-		p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-		p_dev->resource[0]->start = io->win[0].base;
-		p_dev->resource[0]->end = io->win[0].len;
-
-		return pcmcia_request_io(p_dev);
-	}
-	pcmcia_disable_device(p_dev);
-	return -ENODEV;
+	return pcmcia_request_io(p_dev);
 }
 
 
@@ -164,7 +148,7 @@ static int sl811_cs_config(struct pcmcia_device *link)
 	dev_dbg(&link->dev, "sl811_cs_config\n");
 
 	link->config_flags |= CONF_ENABLE_IRQ |	CONF_AUTO_SET_VPP |
-		CONF_AUTO_CHECK_VCC;
+		CONF_AUTO_CHECK_VCC | CONF_AUTO_SET_IO;
 
 	if (pcmcia_loop_config(link, sl811_cs_config_check, NULL))
 		goto failed;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 0577e5f10304..0b8c8d45df47 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -93,6 +93,7 @@ struct pcmcia_device {
 	/* device setup */
 	unsigned int		irq;
 	struct resource		*resource[PCMCIA_NUM_RESOURCES];
+	resource_size_t		card_addr;	/* for the 1st IOMEM resource */
 	unsigned int		vpp;
 
 	unsigned int		config_flags;	/* CONF_ENABLE_ flags below */
@@ -175,8 +176,6 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse);
 /* loop CIS entries for valid configuration */
 int pcmcia_loop_config(struct pcmcia_device *p_dev,
 		       int	(*conf_check)	(struct pcmcia_device *p_dev,
-						 cistpl_cftable_entry_t *cf,
-						 cistpl_cftable_entry_t *dflt,
 						 void *priv_data),
 		       void *priv_data);
 
@@ -225,16 +224,6 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
 #define IO_DATA_PATH_WIDTH_16	0x08
 #define IO_DATA_PATH_WIDTH_AUTO	0x10
 
-/* convert flag found in cfgtable to data path width parameter */
-static inline int pcmcia_io_cfg_data_width(unsigned int flags)
-{
-	if (!(flags & CISTPL_IO_8BIT))
-		return IO_DATA_PATH_WIDTH_16;
-	if (!(flags & CISTPL_IO_16BIT))
-		return IO_DATA_PATH_WIDTH_8;
-	return IO_DATA_PATH_WIDTH_AUTO;
-}
-
 /* IO memory */
 #define WIN_MEMORY_TYPE_CM	0x00 /* default */
 #define WIN_MEMORY_TYPE_AM	0x20 /* MAP_ATTRIB */
@@ -264,16 +253,17 @@ static inline int pcmcia_io_cfg_data_width(unsigned int flags)
 #define PRESENT_IOSIZE		0x200
 
 /* flags to be passed to pcmcia_enable_device() */
-#define CONF_ENABLE_IRQ         0x01
-#define CONF_ENABLE_SPKR        0x02
-#define CONF_ENABLE_PULSE_IRQ   0x04
-#define CONF_ENABLE_ESR         0x08
+#define CONF_ENABLE_IRQ         0x0001
+#define CONF_ENABLE_SPKR        0x0002
+#define CONF_ENABLE_PULSE_IRQ   0x0004
+#define CONF_ENABLE_ESR         0x0008
 
 /* flags used by pcmcia_loop_config() autoconfiguration */
-#define CONF_AUTO_CHECK_VCC	0x10 /* check for matching Vcc? */
-#define CONF_AUTO_SET_VPP	0x20 /* set Vpp? */
-#define CONF_AUTO_AUDIO		0x40 /* enable audio line? */
-
+#define CONF_AUTO_CHECK_VCC	0x0100 /* check for matching Vcc? */
+#define CONF_AUTO_SET_VPP	0x0200 /* set Vpp? */
+#define CONF_AUTO_AUDIO		0x0400 /* enable audio line? */
+#define CONF_AUTO_SET_IO	0x0800 /* set ->resource[0,1] */
+#define CONF_AUTO_SET_IOMEM	0x1000 /* set ->resource[2] */
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 2e9b981a7c63ee8278df6823f8389d69dad1a499 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 8 Aug 2010 11:36:26 +0200
Subject: pcmcia: move driver name to struct pcmcia_driver

Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/ata/pata_pcmcia.c                        |  4 +---
 drivers/bluetooth/bluecard_cs.c                  |  4 +---
 drivers/bluetooth/bt3c_cs.c                      |  4 +---
 drivers/bluetooth/btuart_cs.c                    |  4 +---
 drivers/bluetooth/dtl1_cs.c                      |  4 +---
 drivers/char/pcmcia/cm4000_cs.c                  |  4 +---
 drivers/char/pcmcia/cm4040_cs.c                  |  4 +---
 drivers/char/pcmcia/ipwireless/main.c            |  2 +-
 drivers/char/pcmcia/synclink_cs.c                |  4 +---
 drivers/ide/ide-cs.c                             |  4 +---
 drivers/isdn/hardware/avm/avm_cs.c               |  4 +---
 drivers/isdn/hisax/avma1_cs.c                    |  4 +---
 drivers/isdn/hisax/elsa_cs.c                     |  4 +---
 drivers/isdn/hisax/sedlbauer_cs.c                |  4 +---
 drivers/isdn/hisax/teles_cs.c                    |  4 +---
 drivers/mmc/host/sdricoh_cs.c                    |  4 +---
 drivers/mtd/maps/pcmciamtd.c                     |  4 +---
 drivers/net/pcmcia/3c574_cs.c                    |  4 +---
 drivers/net/pcmcia/3c589_cs.c                    |  4 +---
 drivers/net/pcmcia/axnet_cs.c                    |  4 +---
 drivers/net/pcmcia/com20020_cs.c                 |  4 +---
 drivers/net/pcmcia/fmvj18x_cs.c                  |  4 +---
 drivers/net/pcmcia/ibmtr_cs.c                    |  4 +---
 drivers/net/pcmcia/nmclan_cs.c                   |  4 +---
 drivers/net/pcmcia/pcnet_cs.c                    |  4 +---
 drivers/net/pcmcia/smc91c92_cs.c                 |  4 +---
 drivers/net/pcmcia/xirc2ps_cs.c                  |  4 +---
 drivers/net/wireless/airo_cs.c                   |  4 +---
 drivers/net/wireless/atmel_cs.c                  |  4 +---
 drivers/net/wireless/b43/pcmcia.c                |  4 +---
 drivers/net/wireless/hostap/hostap_cs.c          |  4 +---
 drivers/net/wireless/libertas/if_cs.c            |  4 +---
 drivers/net/wireless/orinoco/orinoco_cs.c        |  4 +---
 drivers/net/wireless/orinoco/spectrum_cs.c       |  4 +---
 drivers/net/wireless/ray_cs.c                    |  4 +---
 drivers/net/wireless/wl3501_cs.c                 |  4 +---
 drivers/parport/parport_cs.c                     |  4 +---
 drivers/pcmcia/ds.c                              | 21 +++++++++++----------
 drivers/scsi/pcmcia/aha152x_stub.c               |  4 +---
 drivers/scsi/pcmcia/fdomain_stub.c               |  4 +---
 drivers/scsi/pcmcia/nsp_cs.c                     |  4 +---
 drivers/scsi/pcmcia/qlogic_stub.c                |  2 --
 drivers/scsi/pcmcia/sym53c500_cs.c               |  4 +---
 drivers/serial/serial_cs.c                       |  4 +---
 drivers/staging/comedi/drivers/cb_das16_cs.c     |  4 +---
 drivers/staging/comedi/drivers/das08_cs.c        |  4 +---
 drivers/staging/comedi/drivers/ni_daq_700.c      |  4 +---
 drivers/staging/comedi/drivers/ni_daq_dio24.c    |  4 +---
 drivers/staging/comedi/drivers/ni_labpc_cs.c     |  4 +---
 drivers/staging/comedi/drivers/ni_mio_cs.c       |  4 +---
 drivers/staging/comedi/drivers/quatech_daqp_cs.c |  4 +---
 drivers/staging/wlags49_h2/wl_cs.c               |  4 +---
 drivers/telephony/ixj_pcmcia.c                   |  4 +---
 drivers/usb/host/sl811_cs.c                      |  4 +---
 include/pcmcia/ds.h                              |  2 ++
 sound/pcmcia/pdaudiocf/pdaudiocf.c               |  4 +---
 sound/pcmcia/vx/vxpocket.c                       |  4 +---
 57 files changed, 67 insertions(+), 172 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 88cb03c36963..806292160b3f 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -382,9 +382,7 @@ MODULE_DEVICE_TABLE(pcmcia, pcmcia_devices);
 
 static struct pcmcia_driver pcmcia_driver = {
 	.owner		= THIS_MODULE,
-	.drv = {
-		.name		= DRV_NAME,
-	},
+	.name		= DRV_NAME,
 	.id_table	= pcmcia_devices,
 	.probe		= pcmcia_init_one,
 	.remove		= pcmcia_remove_one,
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index c9dd5b789d25..4104b7feae67 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -940,9 +940,7 @@ MODULE_DEVICE_TABLE(pcmcia, bluecard_ids);
 
 static struct pcmcia_driver bluecard_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "bluecard_cs",
-	},
+	.name		= "bluecard_cs",
 	.probe		= bluecard_probe,
 	.remove		= bluecard_detach,
 	.id_table	= bluecard_ids,
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 8b8be35fe312..0c8a65587491 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -769,9 +769,7 @@ MODULE_DEVICE_TABLE(pcmcia, bt3c_ids);
 
 static struct pcmcia_driver bt3c_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "bt3c_cs",
-	},
+	.name		= "bt3c_cs",
 	.probe		= bt3c_probe,
 	.remove		= bt3c_detach,
 	.id_table	= bt3c_ids,
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 9f9bb69dc0a2..f8a0708e2311 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -697,9 +697,7 @@ MODULE_DEVICE_TABLE(pcmcia, btuart_ids);
 
 static struct pcmcia_driver btuart_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "btuart_cs",
-	},
+	.name		= "btuart_cs",
 	.probe		= btuart_probe,
 	.remove		= btuart_detach,
 	.id_table	= btuart_ids,
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 12cd177132fc..26ee0cf88d20 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -647,9 +647,7 @@ MODULE_DEVICE_TABLE(pcmcia, dtl1_ids);
 
 static struct pcmcia_driver dtl1_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "dtl1_cs",
-	},
+	.name		= "dtl1_cs",
 	.probe		= dtl1_probe,
 	.remove		= dtl1_detach,
 	.id_table	= dtl1_ids,
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 79de9ccb8caf..e932526d4561 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1877,9 +1877,7 @@ MODULE_DEVICE_TABLE(pcmcia, cm4000_ids);
 
 static struct pcmcia_driver cm4000_driver = {
 	.owner	  = THIS_MODULE,
-	.drv	  = {
-		.name = "cm4000_cs",
-		},
+	.name	  = "cm4000_cs",
 	.probe    = cm4000_probe,
 	.remove   = cm4000_detach,
 	.suspend  = cm4000_suspend,
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index bf012d228a9e..d7e2bec02485 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -643,9 +643,7 @@ MODULE_DEVICE_TABLE(pcmcia, cm4040_ids);
 
 static struct pcmcia_driver reader_driver = {
   	.owner		= THIS_MODULE,
-  	.drv		= {
-		.name	= "cm4040_cs",
-	},
+	.name		= "cm4040_cs",
 	.probe		= reader_probe,
 	.remove		= reader_detach,
 	.id_table	= cm4040_ids,
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 594c23be69f5..a1b808346691 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -295,7 +295,7 @@ static struct pcmcia_driver me = {
 	.owner		= THIS_MODULE,
 	.probe          = ipwireless_attach,
 	.remove         = ipwireless_detach,
-	.drv = { .name  = IPWIRELESS_PCCARD_NAME },
+	.name		= IPWIRELESS_PCCARD_NAME,
 	.id_table       = ipw_ids
 };
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 7c71913714ca..493dba51115b 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -2773,9 +2773,7 @@ MODULE_DEVICE_TABLE(pcmcia, mgslpc_ids);
 
 static struct pcmcia_driver mgslpc_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "synclink_cs",
-	},
+	.name		= "synclink_cs",
 	.probe		= mgslpc_probe,
 	.remove		= mgslpc_detach,
 	.id_table	= mgslpc_ids,
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index c389d9a28881..54702cc8e0d4 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -389,9 +389,7 @@ MODULE_DEVICE_TABLE(pcmcia, ide_ids);
 
 static struct pcmcia_driver ide_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "ide-cs",
-	},
+	.name		= "ide-cs",
 	.probe		= ide_probe,
 	.remove		= ide_detach,
 	.id_table       = ide_ids,
diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c
index 403a995bec95..b172361bbef8 100644
--- a/drivers/isdn/hardware/avm/avm_cs.c
+++ b/drivers/isdn/hardware/avm/avm_cs.c
@@ -210,9 +210,7 @@ MODULE_DEVICE_TABLE(pcmcia, avmcs_ids);
 
 static struct pcmcia_driver avmcs_driver = {
 	.owner	= THIS_MODULE,
-	.drv	= {
-		.name	= "avm_cs",
-	},
+	.name		= "avm_cs",
 	.probe = avmcs_probe,
 	.remove	= avmcs_detach,
 	.id_table = avmcs_ids,
diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c
index 47590e0a63dc..314bc86bf92b 100644
--- a/drivers/isdn/hisax/avma1_cs.c
+++ b/drivers/isdn/hisax/avma1_cs.c
@@ -210,9 +210,7 @@ MODULE_DEVICE_TABLE(pcmcia, avma1cs_ids);
 
 static struct pcmcia_driver avma1cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "avma1_cs",
-	},
+	.name		= "avma1_cs",
 	.probe		= avma1cs_probe,
 	.remove		= __devexit_p(avma1cs_detach),
 	.id_table	= avma1cs_ids,
diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c
index b37b9f0f7fdc..2d439a79fd88 100644
--- a/drivers/isdn/hisax/elsa_cs.c
+++ b/drivers/isdn/hisax/elsa_cs.c
@@ -266,9 +266,7 @@ MODULE_DEVICE_TABLE(pcmcia, elsa_ids);
 
 static struct pcmcia_driver elsa_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "elsa_cs",
-	},
+	.name		= "elsa_cs",
 	.probe		= elsa_cs_probe,
 	.remove		= __devexit_p(elsa_cs_detach),
 	.id_table	= elsa_ids,
diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c
index ff17dba175c4..9e5c4fd08671 100644
--- a/drivers/isdn/hisax/sedlbauer_cs.c
+++ b/drivers/isdn/hisax/sedlbauer_cs.c
@@ -284,9 +284,7 @@ MODULE_DEVICE_TABLE(pcmcia, sedlbauer_ids);
 
 static struct pcmcia_driver sedlbauer_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "sedlbauer_cs",
-	},
+	.name		= "sedlbauer_cs",
 	.probe		= sedlbauer_probe,
 	.remove		= __devexit_p(sedlbauer_detach),
 	.id_table	= sedlbauer_ids,
diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c
index 13ba9abfc9b5..82f09b864846 100644
--- a/drivers/isdn/hisax/teles_cs.c
+++ b/drivers/isdn/hisax/teles_cs.c
@@ -255,9 +255,7 @@ MODULE_DEVICE_TABLE(pcmcia, teles_ids);
 
 static struct pcmcia_driver teles_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "teles_cs",
-	},
+	.name		= "teles_cs",
 	.probe		= teles_probe,
 	.remove		= __devexit_p(teles_detach),
 	.id_table       = teles_ids,
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index 7a7273b09d9a..f472c2714eb8 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -535,9 +535,7 @@ static int sdricoh_pcmcia_resume(struct pcmcia_device *link)
 #endif
 
 static struct pcmcia_driver sdricoh_driver = {
-	.drv = {
-		.name = DRIVER_NAME,
-		},
+	.name = DRIVER_NAME,
 	.probe = sdricoh_pcmcia_probe,
 	.remove = sdricoh_pcmcia_detach,
 	.id_table = pcmcia_ids,
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 388db9ecf222..214c695a2398 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -750,9 +750,7 @@ static struct pcmcia_device_id pcmciamtd_ids[] = {
 MODULE_DEVICE_TABLE(pcmcia, pcmciamtd_ids);
 
 static struct pcmcia_driver pcmciamtd_driver = {
-	.drv		= {
-		.name	= "pcmciamtd"
-	},
+	.name		= "pcmciamtd",
 	.probe		= pcmciamtd_probe,
 	.remove		= pcmciamtd_detach,
 	.owner		= THIS_MODULE,
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index 8abce76367f8..ba52b0b6d93b 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -1196,9 +1196,7 @@ MODULE_DEVICE_TABLE(pcmcia, tc574_ids);
 
 static struct pcmcia_driver tc574_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "3c574_cs",
-	},
+	.name		= "3c574_cs",
 	.probe		= tc574_probe,
 	.remove		= tc574_detach,
 	.id_table       = tc574_ids,
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 34195c407fb2..551759c25a74 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -953,9 +953,7 @@ MODULE_DEVICE_TABLE(pcmcia, tc589_ids);
 
 static struct pcmcia_driver tc589_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "3c589_cs",
-	},
+	.name		= "3c589_cs",
 	.probe		= tc589_probe,
 	.remove		= tc589_detach,
 	.id_table	= tc589_ids,
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 9d9d997f2e59..fb5a39ba4801 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -761,9 +761,7 @@ MODULE_DEVICE_TABLE(pcmcia, axnet_ids);
 
 static struct pcmcia_driver axnet_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "axnet_cs",
-	},
+	.name		= "axnet_cs",
 	.probe		= axnet_probe,
 	.remove		= axnet_detach,
 	.id_table       = axnet_ids,
diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c
index 039731bddc27..a5d918ea5b13 100644
--- a/drivers/net/pcmcia/com20020_cs.c
+++ b/drivers/net/pcmcia/com20020_cs.c
@@ -364,9 +364,7 @@ MODULE_DEVICE_TABLE(pcmcia, com20020_ids);
 
 static struct pcmcia_driver com20020_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "com20020_cs",
-	},
+	.name		= "com20020_cs",
 	.probe		= com20020_probe,
 	.remove		= com20020_detach,
 	.id_table	= com20020_ids,
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 792ab38d979c..1c327598bbe8 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -697,9 +697,7 @@ MODULE_DEVICE_TABLE(pcmcia, fmvj18x_ids);
 
 static struct pcmcia_driver fmvj18x_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "fmvj18x_cs",
-	},
+	.name		= "fmvj18x_cs",
 	.probe		= fmvj18x_probe,
 	.remove		= fmvj18x_detach,
 	.id_table       = fmvj18x_ids,
diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
index b298a3d98dc8..d3c9f016f791 100644
--- a/drivers/net/pcmcia/ibmtr_cs.c
+++ b/drivers/net/pcmcia/ibmtr_cs.c
@@ -394,9 +394,7 @@ MODULE_DEVICE_TABLE(pcmcia, ibmtr_ids);
 
 static struct pcmcia_driver ibmtr_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "ibmtr_cs",
-	},
+	.name		= "ibmtr_cs",
 	.probe		= ibmtr_attach,
 	.remove		= ibmtr_detach,
 	.id_table       = ibmtr_ids,
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 51bf76de6499..8e5730c42ce6 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1533,9 +1533,7 @@ MODULE_DEVICE_TABLE(pcmcia, nmclan_ids);
 
 static struct pcmcia_driver nmclan_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "nmclan_cs",
-	},
+	.name		= "nmclan_cs",
 	.probe		= nmclan_probe,
 	.remove		= nmclan_detach,
 	.id_table       = nmclan_ids,
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index ffe2587b9145..22987e6a685e 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -1748,9 +1748,7 @@ MODULE_FIRMWARE("cis/PE-200.cis");
 MODULE_FIRMWARE("cis/tamarack.cis");
 
 static struct pcmcia_driver pcnet_driver = {
-	.drv		= {
-		.name	= "pcnet_cs",
-	},
+	.name		= "pcnet_cs",
 	.probe		= pcnet_probe,
 	.remove		= pcnet_detach,
 	.owner		= THIS_MODULE,
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index a8cef28507de..e4c4fb626572 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -2090,9 +2090,7 @@ MODULE_DEVICE_TABLE(pcmcia, smc91c92_ids);
 
 static struct pcmcia_driver smc91c92_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "smc91c92_cs",
-	},
+	.name		= "smc91c92_cs",
 	.probe		= smc91c92_probe,
 	.remove		= smc91c92_detach,
 	.id_table       = smc91c92_ids,
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index cecc07454e9e..18cdc84a84bf 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1832,9 +1832,7 @@ MODULE_DEVICE_TABLE(pcmcia, xirc2ps_ids);
 
 static struct pcmcia_driver xirc2ps_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "xirc2ps_cs",
-	},
+	.name		= "xirc2ps_cs",
 	.probe		= xirc2ps_probe,
 	.remove		= xirc2ps_detach,
 	.id_table       = xirc2ps_ids,
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 4067bf1cdeb4..0fc8f639c1d4 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -246,9 +246,7 @@ MODULE_DEVICE_TABLE(pcmcia, airo_ids);
 
 static struct pcmcia_driver airo_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "airo_cs",
-	},
+	.name		= "airo_cs",
 	.probe		= airo_probe,
 	.remove		= airo_detach,
 	.id_table       = airo_ids,
diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c
index 202938022112..13c0c3b02690 100644
--- a/drivers/net/wireless/atmel_cs.c
+++ b/drivers/net/wireless/atmel_cs.c
@@ -309,9 +309,7 @@ MODULE_DEVICE_TABLE(pcmcia, atmel_ids);
 
 static struct pcmcia_driver atmel_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "atmel_cs",
-        },
+	.name		= "atmel_cs",
 	.probe          = atmel_probe,
 	.remove		= atmel_detach,
 	.id_table	= atmel_ids,
diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c
index 61abab1f1c7c..7dcba5fafdc7 100644
--- a/drivers/net/wireless/b43/pcmcia.c
+++ b/drivers/net/wireless/b43/pcmcia.c
@@ -121,9 +121,7 @@ static void __devexit b43_pcmcia_remove(struct pcmcia_device *dev)
 
 static struct pcmcia_driver b43_pcmcia_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-				.name = "b43-pcmcia",
-			},
+	.name		= "b43-pcmcia",
 	.id_table	= b43_pcmcia_tbl,
 	.probe		= b43_pcmcia_probe,
 	.remove		= __devexit_p(b43_pcmcia_remove),
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index d6ff0c7b7d94..ba645701179a 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -707,9 +707,7 @@ MODULE_DEVICE_TABLE(pcmcia, hostap_cs_ids);
 
 
 static struct pcmcia_driver hostap_driver = {
-	.drv		= {
-		.name	= "hostap_cs",
-	},
+	.name		= "hostap_cs",
 	.probe		= hostap_cs_probe,
 	.remove		= prism2_detach,
 	.owner		= THIS_MODULE,
diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c
index 2c6f28ac5197..c2bd2f0304bb 100644
--- a/drivers/net/wireless/libertas/if_cs.c
+++ b/drivers/net/wireless/libertas/if_cs.c
@@ -988,9 +988,7 @@ MODULE_DEVICE_TABLE(pcmcia, if_cs_ids);
 
 static struct pcmcia_driver lbs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= DRV_NAME,
-	},
+	.name		= DRV_NAME,
 	.probe		= if_cs_probe,
 	.remove		= if_cs_detach,
 	.id_table       = if_cs_ids,
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 263dfe9e0e30..28212661f34a 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -378,9 +378,7 @@ MODULE_DEVICE_TABLE(pcmcia, orinoco_cs_ids);
 
 static struct pcmcia_driver orinoco_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= DRIVER_NAME,
-	},
+	.name		= DRIVER_NAME,
 	.probe		= orinoco_cs_probe,
 	.remove		= orinoco_cs_detach,
 	.id_table       = orinoco_cs_ids,
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 78446507873f..5906e9af9064 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -361,9 +361,7 @@ MODULE_DEVICE_TABLE(pcmcia, spectrum_cs_ids);
 
 static struct pcmcia_driver orinoco_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= DRIVER_NAME,
-	},
+	.name		= DRIVER_NAME,
 	.probe		= spectrum_cs_probe,
 	.remove		= spectrum_cs_detach,
 	.suspend	= spectrum_cs_suspend,
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 1457f34efa9a..b5a2c9e31de2 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2843,9 +2843,7 @@ MODULE_DEVICE_TABLE(pcmcia, ray_ids);
 
 static struct pcmcia_driver ray_driver = {
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "ray_cs",
-		},
+	.name = "ray_cs",
 	.probe = ray_probe,
 	.remove = ray_detach,
 	.id_table = ray_ids,
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 101b6ffd560e..d3ed38f558fa 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -2054,9 +2054,7 @@ MODULE_DEVICE_TABLE(pcmcia, wl3501_ids);
 
 static struct pcmcia_driver wl3501_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "wl3501_cs",
-	},
+	.name		= "wl3501_cs",
 	.probe		= wl3501_probe,
 	.remove		= wl3501_detach,
 	.id_table	= wl3501_ids,
diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c
index 3730184a04a3..6f4c9560b2b9 100644
--- a/drivers/parport/parport_cs.c
+++ b/drivers/parport/parport_cs.c
@@ -221,9 +221,7 @@ MODULE_DEVICE_TABLE(pcmcia, parport_ids);
 
 static struct pcmcia_driver parport_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "parport_cs",
-	},
+	.name		= "parport_cs",
 	.probe		= parport_probe,
 	.remove		= parport_detach,
 	.id_table	= parport_ids,
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 912c74082891..100c4412457d 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -51,7 +51,7 @@ static void pcmcia_check_driver(struct pcmcia_driver *p_drv)
 
 	if (!p_drv->probe || !p_drv->remove)
 		printk(KERN_DEBUG "pcmcia: %s lacks a requisite callback "
-		       "function\n", p_drv->drv.name);
+		       "function\n", p_drv->name);
 
 	while (did && did->match_flags) {
 		for (i = 0; i < 4; i++) {
@@ -64,7 +64,7 @@ static void pcmcia_check_driver(struct pcmcia_driver *p_drv)
 
 			printk(KERN_DEBUG "pcmcia: %s: invalid hash for "
 			       "product string \"%s\": is 0x%x, should "
-			       "be 0x%x\n", p_drv->drv.name, did->prod_id[i],
+			       "be 0x%x\n", p_drv->name, did->prod_id[i],
 			       did->prod_id_hash[i], hash);
 			printk(KERN_DEBUG "pcmcia: see "
 				"Documentation/pcmcia/devicetable.txt for "
@@ -179,10 +179,11 @@ int pcmcia_register_driver(struct pcmcia_driver *driver)
 	/* initialize common fields */
 	driver->drv.bus = &pcmcia_bus_type;
 	driver->drv.owner = driver->owner;
+	driver->drv.name = driver->name;
 	mutex_init(&driver->dynids.lock);
 	INIT_LIST_HEAD(&driver->dynids.list);
 
-	pr_debug("registering driver %s\n", driver->drv.name);
+	pr_debug("registering driver %s\n", driver->name);
 
 	error = driver_register(&driver->drv);
 	if (error < 0)
@@ -202,7 +203,7 @@ EXPORT_SYMBOL(pcmcia_register_driver);
  */
 void pcmcia_unregister_driver(struct pcmcia_driver *driver)
 {
-	pr_debug("unregistering driver %s\n", driver->drv.name);
+	pr_debug("unregistering driver %s\n", driver->name);
 	driver_unregister(&driver->drv);
 	pcmcia_free_dynids(driver);
 }
@@ -263,7 +264,7 @@ static int pcmcia_device_probe(struct device *dev)
 	p_drv = to_pcmcia_drv(dev->driver);
 	s = p_dev->socket;
 
-	dev_dbg(dev, "trying to bind to %s\n", p_drv->drv.name);
+	dev_dbg(dev, "trying to bind to %s\n", p_drv->name);
 
 	if ((!p_drv->probe) || (!p_dev->function_config) ||
 	    (!try_module_get(p_drv->owner))) {
@@ -289,10 +290,10 @@ static int pcmcia_device_probe(struct device *dev)
 	ret = p_drv->probe(p_dev);
 	if (ret) {
 		dev_dbg(dev, "binding to %s failed with %d\n",
-			   p_drv->drv.name, ret);
+			   p_drv->name, ret);
 		goto put_module;
 	}
-	dev_dbg(dev, "%s bound: Vpp %d.%d, idx %x, IRQ %d", p_drv->drv.name,
+	dev_dbg(dev, "%s bound: Vpp %d.%d, idx %x, IRQ %d", p_drv->name,
 		p_dev->vpp/10, p_dev->vpp%10, p_dev->config_index, p_dev->irq);
 	dev_dbg(dev, "resources: ioport %pR %pR iomem %pR %pR %pR",
 		p_dev->resource[0], p_dev->resource[1], p_dev->resource[2],
@@ -380,13 +381,13 @@ static int pcmcia_device_remove(struct device *dev)
 	if (p_dev->_irq || p_dev->_io || p_dev->_locked)
 		dev_printk(KERN_INFO, dev,
 			"pcmcia: driver %s did not release config properly\n",
-			p_drv->drv.name);
+			p_drv->name);
 
 	for (i = 0; i < MAX_WIN; i++)
 		if (p_dev->_win & CLIENT_WIN_REQ(i))
 			dev_printk(KERN_INFO, dev,
 			  "pcmcia: driver %s did not release window properly\n",
-			   p_drv->drv.name);
+			   p_drv->name);
 
 	/* references from pcmcia_probe_device */
 	pcmcia_put_dev(p_dev);
@@ -1142,7 +1143,7 @@ static int pcmcia_dev_suspend(struct device *dev, pm_message_t state)
 			dev_printk(KERN_ERR, dev,
 				   "pcmcia: device %s (driver %s) did "
 				   "not want to go to sleep (%d)\n",
-				   p_dev->devname, p_drv->drv.name, ret);
+				   p_dev->devname, p_drv->name, ret);
 			mutex_lock(&p_dev->socket->ops_mutex);
 			p_dev->suspended = 0;
 			mutex_unlock(&p_dev->socket->ops_mutex);
diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c
index bd9ce09b7ff8..49cbea33c141 100644
--- a/drivers/scsi/pcmcia/aha152x_stub.c
+++ b/drivers/scsi/pcmcia/aha152x_stub.c
@@ -216,9 +216,7 @@ MODULE_DEVICE_TABLE(pcmcia, aha152x_ids);
 
 static struct pcmcia_driver aha152x_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "aha152x_cs",
-	},
+	.name		= "aha152x_cs",
 	.probe		= aha152x_probe,
 	.remove		= aha152x_detach,
 	.id_table       = aha152x_ids,
diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c
index f2dc627e9da2..cd69c2670f81 100644
--- a/drivers/scsi/pcmcia/fdomain_stub.c
+++ b/drivers/scsi/pcmcia/fdomain_stub.c
@@ -188,9 +188,7 @@ MODULE_DEVICE_TABLE(pcmcia, fdomain_ids);
 
 static struct pcmcia_driver fdomain_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "fdomain_cs",
-	},
+	.name		= "fdomain_cs",
 	.probe		= fdomain_probe,
 	.remove		= fdomain_detach,
 	.id_table       = fdomain_ids,
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 2b4a1a82d529..0f604d613656 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1790,9 +1790,7 @@ MODULE_DEVICE_TABLE(pcmcia, nsp_cs_ids);
 
 static struct pcmcia_driver nsp_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "nsp_cs",
-	},
+	.name		= "nsp_cs",
 	.probe		= nsp_cs_probe,
 	.remove		= nsp_cs_detach,
 	.id_table	= nsp_cs_ids,
diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c
index e8a06e3a384c..9c96ca889ec9 100644
--- a/drivers/scsi/pcmcia/qlogic_stub.c
+++ b/drivers/scsi/pcmcia/qlogic_stub.c
@@ -294,9 +294,7 @@ MODULE_DEVICE_TABLE(pcmcia, qlogic_ids);
 
 static struct pcmcia_driver qlogic_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
 	.name		= "qlogic_cs",
-	},
 	.probe		= qlogic_probe,
 	.remove		= qlogic_detach,
 	.id_table       = qlogic_ids,
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 6ceb57c355fa..0ae27cb5cd6f 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -873,9 +873,7 @@ MODULE_DEVICE_TABLE(pcmcia, sym53c500_ids);
 
 static struct pcmcia_driver sym53c500_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "sym53c500_cs",
-	},
+	.name		= "sym53c500_cs",
 	.probe		= SYM53C500_probe,
 	.remove		= SYM53C500_detach,
 	.id_table       = sym53c500_ids,
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 422520342936..5047224d2625 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -882,9 +882,7 @@ MODULE_FIRMWARE("cis/RS-COM-2P.cis");
 
 static struct pcmcia_driver serial_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "serial_cs",
-	},
+	.name		= "serial_cs",
 	.probe		= serial_probe,
 	.remove		= serial_detach,
 	.id_table	= serial_ids,
diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c
index 0c4b27c13eb7..a885cd7cfd7f 100644
--- a/drivers/staging/comedi/drivers/cb_das16_cs.c
+++ b/drivers/staging/comedi/drivers/cb_das16_cs.c
@@ -795,9 +795,7 @@ struct pcmcia_driver das16cs_driver = {
 	.resume = das16cs_pcmcia_resume,
 	.id_table = das16cs_id_table,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "cb_das16_cs",
-		},
+	.name = "cb_das16_cs",
 };
 
 static int __init init_das16cs_pcmcia_cs(void)
diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c
index 6d893253de45..def167eac1d8 100644
--- a/drivers/staging/comedi/drivers/das08_cs.c
+++ b/drivers/staging/comedi/drivers/das08_cs.c
@@ -305,9 +305,7 @@ struct pcmcia_driver das08_cs_driver = {
 	.resume = das08_pcmcia_resume,
 	.id_table = das08_cs_id_table,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "pcm-das08",
-		},
+	.name = "pcm-das08",
 };
 
 static int __init init_das08_pcmcia_cs(void)
diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c
index 52441255e2bd..9cc8401765bd 100644
--- a/drivers/staging/comedi/drivers/ni_daq_700.c
+++ b/drivers/staging/comedi/drivers/ni_daq_700.c
@@ -634,9 +634,7 @@ struct pcmcia_driver dio700_cs_driver = {
 	.resume = dio700_cs_resume,
 	.id_table = dio700_cs_ids,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "ni_daq_700",
-		},
+	.name = "ni_daq_700",
 };
 
 static int __init init_dio700_cs(void)
diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c
index 54dda4a45039..b3f7c66e25c2 100644
--- a/drivers/staging/comedi/drivers/ni_daq_dio24.c
+++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c
@@ -384,9 +384,7 @@ struct pcmcia_driver dio24_cs_driver = {
 	.resume = dio24_cs_resume,
 	.id_table = dio24_cs_ids,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "ni_daq_dio24",
-		},
+	.name = "ni_daq_dio24",
 };
 
 static int __init init_dio24_cs(void)
diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c
index 22119f2a292e..6b7c5d043b0a 100644
--- a/drivers/staging/comedi/drivers/ni_labpc_cs.c
+++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c
@@ -360,9 +360,7 @@ struct pcmcia_driver labpc_cs_driver = {
 	.resume = labpc_cs_resume,
 	.id_table = labpc_cs_ids,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "daqcard-1200",
-		},
+	.name = "daqcard-1200",
 };
 
 static int __init init_labpc_cs(void)
diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c
index f1e31d3e12bc..49563273f605 100644
--- a/drivers/staging/comedi/drivers/ni_mio_cs.c
+++ b/drivers/staging/comedi/drivers/ni_mio_cs.c
@@ -437,9 +437,7 @@ struct pcmcia_driver ni_mio_cs_driver = {
 	.resume = &mio_cs_resume,
 	.id_table = ni_mio_cs_ids,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "ni_mio_cs",
-		},
+	.name = "ni_mio_cs",
 };
 
 int init_module(void)
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index de37ff70a9e7..32fecf57868d 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -1168,9 +1168,7 @@ static struct pcmcia_driver daqp_cs_driver = {
 	.resume = daqp_cs_resume,
 	.id_table = daqp_cs_id_table,
 	.owner = THIS_MODULE,
-	.drv = {
-		.name = "quatech_daqp_cs",
-		},
+	.name = "quatech_daqp_cs",
 };
 
 int __init init_module(void)
diff --git a/drivers/staging/wlags49_h2/wl_cs.c b/drivers/staging/wlags49_h2/wl_cs.c
index 62a70afa3e29..aa3cc516fe18 100644
--- a/drivers/staging/wlags49_h2/wl_cs.c
+++ b/drivers/staging/wlags49_h2/wl_cs.c
@@ -455,9 +455,7 @@ MODULE_DEVICE_TABLE(pcmcia, wl_adapter_ids);
 
 static struct pcmcia_driver wlags49_driver = {
 	.owner	    = THIS_MODULE,
-	.drv	    = {
-		.name = DRIVER_NAME,
-	},
+	.name	    = DRIVER_NAME,
 	.probe	    = wl_adapter_attach,
 	.remove	    = wl_adapter_detach,
 	.id_table   = wl_adapter_ids,
diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c
index 76edd39525de..d005b9eeebbc 100644
--- a/drivers/telephony/ixj_pcmcia.c
+++ b/drivers/telephony/ixj_pcmcia.c
@@ -165,9 +165,7 @@ MODULE_DEVICE_TABLE(pcmcia, ixj_ids);
 
 static struct pcmcia_driver ixj_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "ixj_cs",
-	},
+	.name		= "ixj_cs",
 	.probe		= ixj_probe,
 	.remove		= ixj_detach,
 	.id_table	= ixj_ids,
diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
index 9ce95cdfc9ef..3775c035a6c5 100644
--- a/drivers/usb/host/sl811_cs.c
+++ b/drivers/usb/host/sl811_cs.c
@@ -195,9 +195,7 @@ MODULE_DEVICE_TABLE(pcmcia, sl811_ids);
 
 static struct pcmcia_driver sl811_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "sl811_cs",
-	},
+	.name		= "sl811_cs",
 	.probe		= sl811_cs_probe,
 	.remove		= sl811_cs_detach,
 	.id_table	= sl811_ids,
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 0b8c8d45df47..d830c87ff0a7 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -47,6 +47,8 @@ struct pcmcia_dynids {
 };
 
 struct pcmcia_driver {
+	const char		*name;
+
 	int (*probe)		(struct pcmcia_device *dev);
 	void (*remove)		(struct pcmcia_device *dev);
 
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 2476d5f0a14f..8cc4733698a0 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -287,9 +287,7 @@ MODULE_DEVICE_TABLE(pcmcia, snd_pdacf_ids);
 
 static struct pcmcia_driver pdacf_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "snd-pdaudiocf",
-	},
+	.name		= "snd-pdaudiocf",
 	.probe		= snd_pdacf_probe,
 	.remove		= snd_pdacf_detach,
 	.id_table	= snd_pdacf_ids,
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index 017a8d6c510d..80000d631f88 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -358,9 +358,7 @@ MODULE_DEVICE_TABLE(pcmcia, vxp_ids);
 
 static struct pcmcia_driver vxp_cs_driver = {
 	.owner		= THIS_MODULE,
-	.drv		= {
-		.name	= "snd-vxpocket",
-	},
+	.name		= "snd-vxpocket",
 	.probe		= vxpocket_probe,
 	.remove		= vxpocket_detach,
 	.id_table	= vxp_ids,
-- 
cgit v1.2.3


From c135e84afb6bcec9cb8ef0492fa4867efbfaad91 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 29 Sep 2010 14:16:57 +1000
Subject: sunrpc: fix up rpcauth_remove_module section mismatch

On Wed, 29 Sep 2010 14:02:38 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> After merging the final tree, today's linux-next build (powerpc
> ppc44x_defconfig) produced tis warning:
>
> WARNING: net/sunrpc/sunrpc.o(.init.text+0x110): Section mismatch in reference from the function init_sunrpc() to the function .exit.text:rpcauth_remove_module()
> The function __init init_sunrpc() references
> a function __exit rpcauth_remove_module().
> This is often seen when error handling in the init function
> uses functionality in the exit path.
> The fix is often to remove the __exit annotation of
> rpcauth_remove_module() so it may be used outside an exit section.
>
> Probably caused by commit 2f72c9b73730c335381b13e2bd221abe1acea394
> ("sunrpc: The per-net skeleton").

This actually causes a build failure on a sparc32 defconfig build:

`rpcauth_remove_module' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o

I applied the following patch for today:

Fixes:

`rpcauth_remove_module' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/auth.h | 4 ++--
 net/sunrpc/auth.c           | 2 +-
 net/sunrpc/auth_generic.c   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 5bbc447175dc..b2024757edd5 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -122,8 +122,8 @@ extern const struct rpc_authops	authnull_ops;
 int __init		rpc_init_authunix(void);
 int __init		rpc_init_generic_auth(void);
 int __init		rpcauth_init_module(void);
-void __exit		rpcauth_remove_module(void);
-void __exit		rpc_destroy_generic_auth(void);
+void			rpcauth_remove_module(void);
+void			rpc_destroy_generic_auth(void);
 void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..2c0d9e6093b8 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -658,7 +658,7 @@ out1:
 	return err;
 }
 
-void __exit rpcauth_remove_module(void)
+void rpcauth_remove_module(void)
 {
 	rpc_destroy_authunix();
 	rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
 	return rpcauth_init_credcache(&generic_auth);
 }
 
-void __exit rpc_destroy_generic_auth(void)
+void rpc_destroy_generic_auth(void)
 {
 	rpcauth_destroy_credcache(&generic_auth);
 }
-- 
cgit v1.2.3


From b612633b5928077441b979471869753bfa93d41a Mon Sep 17 00:00:00 2001
From: "Govindraj.R" <govindraj.raja@ti.com>
Date: Mon, 27 Sep 2010 20:20:49 +0530
Subject: serial: Add OMAP high-speed UART driver

This patch adds driver support for OMAP2/3/4 high speed UART.

The driver is made separate from 8250 driver as we cannot
over load 8250 driver with omap platform specific configuration for
features like DMA, it makes easier to implement features like DMA and
hardware flow control and software flow control configuration with
this driver as required for the omap-platform.
This patch involves only the core driver and its dependent.

Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Govindraj.R <govindraj.raja@ti.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 arch/arm/plat-omap/include/plat/omap-serial.h |  129 +++
 drivers/serial/Kconfig                        |   27 +
 drivers/serial/Makefile                       |    1 +
 drivers/serial/omap-serial.c                  | 1333 +++++++++++++++++++++++++
 include/linux/serial_core.h                   |    3 +
 5 files changed, 1493 insertions(+)
 create mode 100644 arch/arm/plat-omap/include/plat/omap-serial.h
 create mode 100644 drivers/serial/omap-serial.c

(limited to 'include')

diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
new file mode 100644
index 000000000000..0d6f076cf748
--- /dev/null
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -0,0 +1,129 @@
+/*
+ * Driver for OMAP-UART controller.
+ * Based on drivers/serial/8250.c
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Authors:
+ *	Govindraj R	<govindraj.raja@ti.com>
+ *	Thara Gopinath	<thara@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __OMAP_SERIAL_H__
+#define __OMAP_SERIAL_H__
+
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+
+#include <plat/control.h>
+#include <plat/mux.h>
+
+#define DRIVER_NAME	"omap-hsuart"
+
+/*
+ * Use tty device name as ttyO, [O -> OMAP]
+ * in bootargs we specify as console=ttyO0 if uart1
+ * is used as console uart.
+ */
+#define OMAP_SERIAL_NAME	"ttyO"
+
+#define OMAP_MDR1_DISABLE	0x07
+#define OMAP_MDR1_MODE13X	0x03
+#define OMAP_MDR1_MODE16X	0x00
+#define OMAP_MODE13X_SPEED	230400
+
+/*
+ * LCR = 0XBF: Switch to Configuration Mode B.
+ * In configuration mode b allow access
+ * to EFR,DLL,DLH.
+ * Reference OMAP TRM Chapter 17
+ * Section: 1.4.3 Mode Selection
+ */
+#define OMAP_UART_LCR_CONF_MDB	0XBF
+
+/* WER = 0x7F
+ * Enable module level wakeup in WER reg
+ */
+#define OMAP_UART_WER_MOD_WKUP	0X7F
+
+/* Enable XON/XOFF flow control on output */
+#define OMAP_UART_SW_TX		0x04
+
+/* Enable XON/XOFF flow control on input */
+#define OMAP_UART_SW_RX		0x04
+
+#define OMAP_UART_SYSC_RESET	0X07
+#define OMAP_UART_TCR_TRIG	0X0F
+#define OMAP_UART_SW_CLR	0XF0
+#define OMAP_UART_FIFO_CLR	0X06
+
+#define OMAP_UART_DMA_CH_FREE	-1
+
+#define RX_TIMEOUT		(3 * HZ)
+#define OMAP_MAX_HSUART_PORTS	4
+
+#define MSR_SAVE_FLAGS		UART_MSR_ANY_DELTA
+
+struct omap_uart_port_info {
+	bool			dma_enabled;	/* To specify DMA Mode */
+	unsigned int		uartclk;	/* UART clock rate */
+	void __iomem		*membase;	/* ioremap cookie or NULL */
+	resource_size_t		mapbase;	/* resource base */
+	unsigned long		irqflags;	/* request_irq flags */
+	upf_t			flags;		/* UPF_* flags */
+};
+
+struct uart_omap_dma {
+	u8			uart_dma_tx;
+	u8			uart_dma_rx;
+	int			rx_dma_channel;
+	int			tx_dma_channel;
+	dma_addr_t		rx_buf_dma_phys;
+	dma_addr_t		tx_buf_dma_phys;
+	unsigned int		uart_base;
+	/*
+	 * Buffer for rx dma.It is not required for tx because the buffer
+	 * comes from port structure.
+	 */
+	unsigned char		*rx_buf;
+	unsigned int		prev_rx_dma_pos;
+	int			tx_buf_size;
+	int			tx_dma_used;
+	int			rx_dma_used;
+	spinlock_t		tx_lock;
+	spinlock_t		rx_lock;
+	/* timer to poll activity on rx dma */
+	struct timer_list	rx_timer;
+	int			rx_buf_size;
+	int			rx_timeout;
+};
+
+struct uart_omap_port {
+	struct uart_port	port;
+	struct uart_omap_dma	uart_dma;
+	struct platform_device	*pdev;
+
+	unsigned char		ier;
+	unsigned char		lcr;
+	unsigned char		mcr;
+	unsigned char		fcr;
+	unsigned char		efr;
+
+	int			use_dma;
+	/*
+	 * Some bits in registers are cleared on a read, so they must
+	 * be saved whenever the register is read but the bits will not
+	 * be immediately processed.
+	 */
+	unsigned int		lsr_break_flag;
+	unsigned char		msr_saved_flags;
+	char			name[20];
+	unsigned long		port_activity;
+};
+
+#endif /* __OMAP_SERIAL_H__ */
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 12900f7083b0..8d8b975ce784 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1416,6 +1416,33 @@ config SERIAL_OF_PLATFORM
 	  Currently, only 8250 compatible ports are supported, but
 	  others can easily be added.
 
+config SERIAL_OMAP
+	tristate "OMAP serial port support"
+	depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP4
+	select SERIAL_CORE
+	help
+	  If you have a machine based on an Texas Instruments OMAP CPU you
+	  can enable its onboard serial ports by enabling this option.
+
+	  By enabling this option you take advantage of dma feature available
+	  with the omap-serial driver. DMA support can be enabled from platform
+	  data.
+
+config SERIAL_OMAP_CONSOLE
+	bool "Console on OMAP serial port"
+	depends on SERIAL_OMAP
+	select SERIAL_CORE_CONSOLE
+	help
+	  Select this option if you would like to use omap serial port as
+	  console.
+
+	  Even if you say Y here, the currently visible virtual console
+	  (/dev/tty0) will still be used as the system console by default, but
+	  you can alter that using a kernel command line option such as
+	  "console=ttyOx". (Try "man bootparam" or see the documentation of
+	  your boot loader about how to pass options to the kernel at
+	  boot time.)
+
 config SERIAL_OF_PLATFORM_NWPSERIAL
 	tristate "NWP serial port driver"
 	depends on PPC_OF && PPC_DCR
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 1ca4fd599ffe..c5705765454f 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -88,3 +88,4 @@ obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
 obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
 obj-$(CONFIG_SERIAL_MRST_MAX3110)	+= mrst_max3110.o
 obj-$(CONFIG_SERIAL_MFD_HSU)	+= mfd.o
+obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c
new file mode 100644
index 000000000000..2ee1d3282a8c
--- /dev/null
+++ b/drivers/serial/omap-serial.c
@@ -0,0 +1,1333 @@
+/*
+ * Driver for OMAP-UART controller.
+ * Based on drivers/serial/8250.c
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Authors:
+ *	Govindraj R	<govindraj.raja@ti.com>
+ *	Thara Gopinath	<thara@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Note: This driver is made seperate from 8250 driver as we cannot
+ * over load 8250 driver with omap platform specific configuration for
+ * features like DMA, it makes easier to implement features like DMA and
+ * hardware flow control and software flow control configuration with
+ * this driver as required for the omap-platform.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/serial_reg.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <linux/serial_core.h>
+#include <linux/irq.h>
+
+#include <plat/dma.h>
+#include <plat/dmtimer.h>
+#include <plat/omap-serial.h>
+
+static struct uart_omap_port *ui[OMAP_MAX_HSUART_PORTS];
+
+/* Forward declaration of functions */
+static void uart_tx_dma_callback(int lch, u16 ch_status, void *data);
+static void serial_omap_rx_timeout(unsigned long uart_no);
+static int serial_omap_start_rxdma(struct uart_omap_port *up);
+
+static inline unsigned int serial_in(struct uart_omap_port *up, int offset)
+{
+	offset <<= up->port.regshift;
+	return readw(up->port.membase + offset);
+}
+
+static inline void serial_out(struct uart_omap_port *up, int offset, int value)
+{
+	offset <<= up->port.regshift;
+	writew(value, up->port.membase + offset);
+}
+
+static inline void serial_omap_clear_fifos(struct uart_omap_port *up)
+{
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
+		       UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
+	serial_out(up, UART_FCR, 0);
+}
+
+/*
+ * serial_omap_get_divisor - calculate divisor value
+ * @port: uart port info
+ * @baud: baudrate for which divisor needs to be calculated.
+ *
+ * We have written our own function to get the divisor so as to support
+ * 13x mode. 3Mbps Baudrate as an different divisor.
+ * Reference OMAP TRM Chapter 17:
+ * Table 17-1. UART Mode Baud Rates, Divisor Values, and Error Rates
+ * referring to oversampling - divisor value
+ * baudrate 460,800 to 3,686,400 all have divisor 13
+ * except 3,000,000 which has divisor value 16
+ */
+static unsigned int
+serial_omap_get_divisor(struct uart_port *port, unsigned int baud)
+{
+	unsigned int divisor;
+
+	if (baud > OMAP_MODE13X_SPEED && baud != 3000000)
+		divisor = 13;
+	else
+		divisor = 16;
+	return port->uartclk/(baud * divisor);
+}
+
+static void serial_omap_stop_rxdma(struct uart_omap_port *up)
+{
+	if (up->uart_dma.rx_dma_used) {
+		del_timer(&up->uart_dma.rx_timer);
+		omap_stop_dma(up->uart_dma.rx_dma_channel);
+		omap_free_dma(up->uart_dma.rx_dma_channel);
+		up->uart_dma.rx_dma_channel = OMAP_UART_DMA_CH_FREE;
+		up->uart_dma.rx_dma_used = false;
+	}
+}
+
+static void serial_omap_enable_ms(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	dev_dbg(up->port.dev, "serial_omap_enable_ms+%d\n", up->pdev->id);
+	up->ier |= UART_IER_MSI;
+	serial_out(up, UART_IER, up->ier);
+}
+
+static void serial_omap_stop_tx(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	if (up->use_dma &&
+		up->uart_dma.tx_dma_channel != OMAP_UART_DMA_CH_FREE) {
+		/*
+		 * Check if dma is still active. If yes do nothing,
+		 * return. Else stop dma
+		 */
+		if (omap_get_dma_active_status(up->uart_dma.tx_dma_channel))
+			return;
+		omap_stop_dma(up->uart_dma.tx_dma_channel);
+		omap_free_dma(up->uart_dma.tx_dma_channel);
+		up->uart_dma.tx_dma_channel = OMAP_UART_DMA_CH_FREE;
+	}
+
+	if (up->ier & UART_IER_THRI) {
+		up->ier &= ~UART_IER_THRI;
+		serial_out(up, UART_IER, up->ier);
+	}
+}
+
+static void serial_omap_stop_rx(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	if (up->use_dma)
+		serial_omap_stop_rxdma(up);
+	up->ier &= ~UART_IER_RLSI;
+	up->port.read_status_mask &= ~UART_LSR_DR;
+	serial_out(up, UART_IER, up->ier);
+}
+
+static inline void receive_chars(struct uart_omap_port *up, int *status)
+{
+	struct tty_struct *tty = up->port.state->port.tty;
+	unsigned int flag;
+	unsigned char ch, lsr = *status;
+	int max_count = 256;
+
+	do {
+		if (likely(lsr & UART_LSR_DR))
+			ch = serial_in(up, UART_RX);
+		flag = TTY_NORMAL;
+		up->port.icount.rx++;
+
+		if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) {
+			/*
+			 * For statistics only
+			 */
+			if (lsr & UART_LSR_BI) {
+				lsr &= ~(UART_LSR_FE | UART_LSR_PE);
+				up->port.icount.brk++;
+				/*
+				 * We do the SysRQ and SAK checking
+				 * here because otherwise the break
+				 * may get masked by ignore_status_mask
+				 * or read_status_mask.
+				 */
+				if (uart_handle_break(&up->port))
+					goto ignore_char;
+			} else if (lsr & UART_LSR_PE) {
+				up->port.icount.parity++;
+			} else if (lsr & UART_LSR_FE) {
+				up->port.icount.frame++;
+			}
+
+			if (lsr & UART_LSR_OE)
+				up->port.icount.overrun++;
+
+			/*
+			 * Mask off conditions which should be ignored.
+			 */
+			lsr &= up->port.read_status_mask;
+
+#ifdef CONFIG_SERIAL_OMAP_CONSOLE
+			if (up->port.line == up->port.cons->index) {
+				/* Recover the break flag from console xmit */
+				lsr |= up->lsr_break_flag;
+				up->lsr_break_flag = 0;
+			}
+#endif
+			if (lsr & UART_LSR_BI)
+				flag = TTY_BREAK;
+			else if (lsr & UART_LSR_PE)
+				flag = TTY_PARITY;
+			else if (lsr & UART_LSR_FE)
+				flag = TTY_FRAME;
+		}
+
+		if (uart_handle_sysrq_char(&up->port, ch))
+			goto ignore_char;
+		uart_insert_char(&up->port, lsr, UART_LSR_OE, ch, flag);
+ignore_char:
+		lsr = serial_in(up, UART_LSR);
+	} while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0));
+	spin_unlock(&up->port.lock);
+	tty_flip_buffer_push(tty);
+	spin_lock(&up->port.lock);
+}
+
+static void transmit_chars(struct uart_omap_port *up)
+{
+	struct circ_buf *xmit = &up->port.state->xmit;
+	int count;
+
+	if (up->port.x_char) {
+		serial_out(up, UART_TX, up->port.x_char);
+		up->port.icount.tx++;
+		up->port.x_char = 0;
+		return;
+	}
+	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
+		serial_omap_stop_tx(&up->port);
+		return;
+	}
+	count = up->port.fifosize / 4;
+	do {
+		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		up->port.icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&up->port);
+
+	if (uart_circ_empty(xmit))
+		serial_omap_stop_tx(&up->port);
+}
+
+static inline void serial_omap_enable_ier_thri(struct uart_omap_port *up)
+{
+	if (!(up->ier & UART_IER_THRI)) {
+		up->ier |= UART_IER_THRI;
+		serial_out(up, UART_IER, up->ier);
+	}
+}
+
+static void serial_omap_start_tx(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	struct circ_buf *xmit;
+	unsigned int start;
+	int ret = 0;
+
+	if (!up->use_dma) {
+		serial_omap_enable_ier_thri(up);
+		return;
+	}
+
+	if (up->uart_dma.tx_dma_used)
+		return;
+
+	xmit = &up->port.state->xmit;
+
+	if (up->uart_dma.tx_dma_channel == OMAP_UART_DMA_CH_FREE) {
+		ret = omap_request_dma(up->uart_dma.uart_dma_tx,
+				"UART Tx DMA",
+				(void *)uart_tx_dma_callback, up,
+				&(up->uart_dma.tx_dma_channel));
+
+		if (ret < 0) {
+			serial_omap_enable_ier_thri(up);
+			return;
+		}
+	}
+	spin_lock(&(up->uart_dma.tx_lock));
+	up->uart_dma.tx_dma_used = true;
+	spin_unlock(&(up->uart_dma.tx_lock));
+
+	start = up->uart_dma.tx_buf_dma_phys +
+				(xmit->tail & (UART_XMIT_SIZE - 1));
+
+	up->uart_dma.tx_buf_size = uart_circ_chars_pending(xmit);
+	/*
+	 * It is a circular buffer. See if the buffer has wounded back.
+	 * If yes it will have to be transferred in two separate dma
+	 * transfers
+	 */
+	if (start + up->uart_dma.tx_buf_size >=
+			up->uart_dma.tx_buf_dma_phys + UART_XMIT_SIZE)
+		up->uart_dma.tx_buf_size =
+			(up->uart_dma.tx_buf_dma_phys +
+			UART_XMIT_SIZE) - start;
+
+	omap_set_dma_dest_params(up->uart_dma.tx_dma_channel, 0,
+				OMAP_DMA_AMODE_CONSTANT,
+				up->uart_dma.uart_base, 0, 0);
+	omap_set_dma_src_params(up->uart_dma.tx_dma_channel, 0,
+				OMAP_DMA_AMODE_POST_INC, start, 0, 0);
+	omap_set_dma_transfer_params(up->uart_dma.tx_dma_channel,
+				OMAP_DMA_DATA_TYPE_S8,
+				up->uart_dma.tx_buf_size, 1,
+				OMAP_DMA_SYNC_ELEMENT,
+				up->uart_dma.uart_dma_tx, 0);
+	/* FIXME: Cache maintenance needed here? */
+	omap_start_dma(up->uart_dma.tx_dma_channel);
+}
+
+static unsigned int check_modem_status(struct uart_omap_port *up)
+{
+	unsigned int status;
+
+	status = serial_in(up, UART_MSR);
+	status |= up->msr_saved_flags;
+	up->msr_saved_flags = 0;
+	if ((status & UART_MSR_ANY_DELTA) == 0)
+		return status;
+
+	if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
+	    up->port.state != NULL) {
+		if (status & UART_MSR_TERI)
+			up->port.icount.rng++;
+		if (status & UART_MSR_DDSR)
+			up->port.icount.dsr++;
+		if (status & UART_MSR_DDCD)
+			uart_handle_dcd_change
+				(&up->port, status & UART_MSR_DCD);
+		if (status & UART_MSR_DCTS)
+			uart_handle_cts_change
+				(&up->port, status & UART_MSR_CTS);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
+	}
+
+	return status;
+}
+
+/**
+ * serial_omap_irq() - This handles the interrupt from one port
+ * @irq: uart port irq number
+ * @dev_id: uart port info
+ */
+static inline irqreturn_t serial_omap_irq(int irq, void *dev_id)
+{
+	struct uart_omap_port *up = dev_id;
+	unsigned int iir, lsr;
+	unsigned long flags;
+
+	iir = serial_in(up, UART_IIR);
+	if (iir & UART_IIR_NO_INT)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	lsr = serial_in(up, UART_LSR);
+	if (iir & UART_IIR_RLSI) {
+		if (!up->use_dma) {
+			if (lsr & UART_LSR_DR)
+				receive_chars(up, &lsr);
+		} else {
+			up->ier &= ~(UART_IER_RDI | UART_IER_RLSI);
+			serial_out(up, UART_IER, up->ier);
+			if ((serial_omap_start_rxdma(up) != 0) &&
+					(lsr & UART_LSR_DR))
+				receive_chars(up, &lsr);
+		}
+	}
+
+	check_modem_status(up);
+	if ((lsr & UART_LSR_THRE) && (iir & UART_IIR_THRI))
+		transmit_chars(up);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+	up->port_activity = jiffies;
+	return IRQ_HANDLED;
+}
+
+static unsigned int serial_omap_tx_empty(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned long flags = 0;
+	unsigned int ret = 0;
+
+	dev_dbg(up->port.dev, "serial_omap_tx_empty+%d\n", up->pdev->id);
+	spin_lock_irqsave(&up->port.lock, flags);
+	ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return ret;
+}
+
+static unsigned int serial_omap_get_mctrl(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned char status;
+	unsigned int ret = 0;
+
+	status = check_modem_status(up);
+	dev_dbg(up->port.dev, "serial_omap_get_mctrl+%d\n", up->pdev->id);
+
+	if (status & UART_MSR_DCD)
+		ret |= TIOCM_CAR;
+	if (status & UART_MSR_RI)
+		ret |= TIOCM_RNG;
+	if (status & UART_MSR_DSR)
+		ret |= TIOCM_DSR;
+	if (status & UART_MSR_CTS)
+		ret |= TIOCM_CTS;
+	return ret;
+}
+
+static void serial_omap_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned char mcr = 0;
+
+	dev_dbg(up->port.dev, "serial_omap_set_mctrl+%d\n", up->pdev->id);
+	if (mctrl & TIOCM_RTS)
+		mcr |= UART_MCR_RTS;
+	if (mctrl & TIOCM_DTR)
+		mcr |= UART_MCR_DTR;
+	if (mctrl & TIOCM_OUT1)
+		mcr |= UART_MCR_OUT1;
+	if (mctrl & TIOCM_OUT2)
+		mcr |= UART_MCR_OUT2;
+	if (mctrl & TIOCM_LOOP)
+		mcr |= UART_MCR_LOOP;
+
+	mcr |= up->mcr;
+	serial_out(up, UART_MCR, mcr);
+}
+
+static void serial_omap_break_ctl(struct uart_port *port, int break_state)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned long flags = 0;
+
+	dev_dbg(up->port.dev, "serial_omap_break_ctl+%d\n", up->pdev->id);
+	spin_lock_irqsave(&up->port.lock, flags);
+	if (break_state == -1)
+		up->lcr |= UART_LCR_SBC;
+	else
+		up->lcr &= ~UART_LCR_SBC;
+	serial_out(up, UART_LCR, up->lcr);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static int serial_omap_startup(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned long flags = 0;
+	int retval;
+
+	/*
+	 * Allocate the IRQ
+	 */
+	retval = request_irq(up->port.irq, serial_omap_irq, up->port.irqflags,
+				up->name, up);
+	if (retval)
+		return retval;
+
+	dev_dbg(up->port.dev, "serial_omap_startup+%d\n", up->pdev->id);
+
+	/*
+	 * Clear the FIFO buffers and disable them.
+	 * (they will be reenabled in set_termios())
+	 */
+	serial_omap_clear_fifos(up);
+	/* For Hardware flow control */
+	serial_out(up, UART_MCR, UART_MCR_RTS);
+
+	/*
+	 * Clear the interrupt registers.
+	 */
+	(void) serial_in(up, UART_LSR);
+	if (serial_in(up, UART_LSR) & UART_LSR_DR)
+		(void) serial_in(up, UART_RX);
+	(void) serial_in(up, UART_IIR);
+	(void) serial_in(up, UART_MSR);
+
+	/*
+	 * Now, initialize the UART
+	 */
+	serial_out(up, UART_LCR, UART_LCR_WLEN8);
+	spin_lock_irqsave(&up->port.lock, flags);
+	/*
+	 * Most PC uarts need OUT2 raised to enable interrupts.
+	 */
+	up->port.mctrl |= TIOCM_OUT2;
+	serial_omap_set_mctrl(&up->port, up->port.mctrl);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	up->msr_saved_flags = 0;
+	if (up->use_dma) {
+		free_page((unsigned long)up->port.state->xmit.buf);
+		up->port.state->xmit.buf = dma_alloc_coherent(NULL,
+			UART_XMIT_SIZE,
+			(dma_addr_t *)&(up->uart_dma.tx_buf_dma_phys),
+			0);
+		init_timer(&(up->uart_dma.rx_timer));
+		up->uart_dma.rx_timer.function = serial_omap_rx_timeout;
+		up->uart_dma.rx_timer.data = up->pdev->id;
+		/* Currently the buffer size is 4KB. Can increase it */
+		up->uart_dma.rx_buf = dma_alloc_coherent(NULL,
+			up->uart_dma.rx_buf_size,
+			(dma_addr_t *)&(up->uart_dma.rx_buf_dma_phys), 0);
+	}
+	/*
+	 * Finally, enable interrupts. Note: Modem status interrupts
+	 * are set via set_termios(), which will be occurring imminently
+	 * anyway, so we don't enable them here.
+	 */
+	up->ier = UART_IER_RLSI | UART_IER_RDI;
+	serial_out(up, UART_IER, up->ier);
+
+	up->port_activity = jiffies;
+	return 0;
+}
+
+static void serial_omap_shutdown(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned long flags = 0;
+
+	dev_dbg(up->port.dev, "serial_omap_shutdown+%d\n", up->pdev->id);
+	/*
+	 * Disable interrupts from this port
+	 */
+	up->ier = 0;
+	serial_out(up, UART_IER, 0);
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	up->port.mctrl &= ~TIOCM_OUT2;
+	serial_omap_set_mctrl(&up->port, up->port.mctrl);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	/*
+	 * Disable break condition and FIFOs
+	 */
+	serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC);
+	serial_omap_clear_fifos(up);
+
+	/*
+	 * Read data port to reset things, and then free the irq
+	 */
+	if (serial_in(up, UART_LSR) & UART_LSR_DR)
+		(void) serial_in(up, UART_RX);
+	if (up->use_dma) {
+		dma_free_coherent(up->port.dev,
+			UART_XMIT_SIZE,	up->port.state->xmit.buf,
+			up->uart_dma.tx_buf_dma_phys);
+		up->port.state->xmit.buf = NULL;
+		serial_omap_stop_rx(port);
+		dma_free_coherent(up->port.dev,
+			up->uart_dma.rx_buf_size, up->uart_dma.rx_buf,
+			up->uart_dma.rx_buf_dma_phys);
+		up->uart_dma.rx_buf = NULL;
+	}
+	free_irq(up->port.irq, up);
+}
+
+static inline void
+serial_omap_configure_xonxoff
+		(struct uart_omap_port *up, struct ktermios *termios)
+{
+	unsigned char efr = 0;
+
+	up->lcr = serial_in(up, UART_LCR);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	up->efr = serial_in(up, UART_EFR);
+	serial_out(up, UART_EFR, up->efr & ~UART_EFR_ECB);
+
+	serial_out(up, UART_XON1, termios->c_cc[VSTART]);
+	serial_out(up, UART_XOFF1, termios->c_cc[VSTOP]);
+
+	/* clear SW control mode bits */
+	efr = up->efr;
+	efr &= OMAP_UART_SW_CLR;
+
+	/*
+	 * IXON Flag:
+	 * Enable XON/XOFF flow control on output.
+	 * Transmit XON1, XOFF1
+	 */
+	if (termios->c_iflag & IXON)
+		efr |= OMAP_UART_SW_TX;
+
+	/*
+	 * IXOFF Flag:
+	 * Enable XON/XOFF flow control on input.
+	 * Receiver compares XON1, XOFF1.
+	 */
+	if (termios->c_iflag & IXOFF)
+		efr |= OMAP_UART_SW_RX;
+
+	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
+	serial_out(up, UART_LCR, UART_LCR_DLAB);
+
+	up->mcr = serial_in(up, UART_MCR);
+
+	/*
+	 * IXANY Flag:
+	 * Enable any character to restart output.
+	 * Operation resumes after receiving any
+	 * character after recognition of the XOFF character
+	 */
+	if (termios->c_iflag & IXANY)
+		up->mcr |= UART_MCR_XONANY;
+
+	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
+	/* Enable special char function UARTi.EFR_REG[5] and
+	 * load the new software flow control mode IXON or IXOFF
+	 * and restore the UARTi.EFR_REG[4] ENHANCED_EN value.
+	 */
+	serial_out(up, UART_EFR, efr | UART_EFR_SCD);
+	serial_out(up, UART_LCR, UART_LCR_DLAB);
+
+	serial_out(up, UART_MCR, up->mcr & ~UART_MCR_TCRTLR);
+	serial_out(up, UART_LCR, up->lcr);
+}
+
+static void
+serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
+			struct ktermios *old)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned char cval = 0;
+	unsigned char efr = 0;
+	unsigned long flags = 0;
+	unsigned int baud, quot;
+
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		cval = UART_LCR_WLEN5;
+		break;
+	case CS6:
+		cval = UART_LCR_WLEN6;
+		break;
+	case CS7:
+		cval = UART_LCR_WLEN7;
+		break;
+	default:
+	case CS8:
+		cval = UART_LCR_WLEN8;
+		break;
+	}
+
+	if (termios->c_cflag & CSTOPB)
+		cval |= UART_LCR_STOP;
+	if (termios->c_cflag & PARENB)
+		cval |= UART_LCR_PARITY;
+	if (!(termios->c_cflag & PARODD))
+		cval |= UART_LCR_EPAR;
+
+	/*
+	 * Ask the core to calculate the divisor for us.
+	 */
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/13);
+	quot = serial_omap_get_divisor(port, baud);
+
+	up->fcr = UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_01 |
+			UART_FCR_ENABLE_FIFO;
+	if (up->use_dma)
+		up->fcr |= UART_FCR_DMA_SELECT;
+
+	/*
+	 * Ok, we're now changing the port state. Do it with
+	 * interrupts disabled.
+	 */
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/*
+	 * Update the per-port timeout.
+	 */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
+	if (termios->c_iflag & INPCK)
+		up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE;
+	if (termios->c_iflag & (BRKINT | PARMRK))
+		up->port.read_status_mask |= UART_LSR_BI;
+
+	/*
+	 * Characters to ignore
+	 */
+	up->port.ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		up->port.ignore_status_mask |= UART_LSR_BI;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			up->port.ignore_status_mask |= UART_LSR_OE;
+	}
+
+	/*
+	 * ignore all characters if CREAD is not set
+	 */
+	if ((termios->c_cflag & CREAD) == 0)
+		up->port.ignore_status_mask |= UART_LSR_DR;
+
+	/*
+	 * Modem status interrupts
+	 */
+	up->ier &= ~UART_IER_MSI;
+	if (UART_ENABLE_MS(&up->port, termios->c_cflag))
+		up->ier |= UART_IER_MSI;
+	serial_out(up, UART_IER, up->ier);
+	serial_out(up, UART_LCR, cval);		/* reset DLAB */
+
+	/* FIFOs and DMA Settings */
+
+	/* FCR can be changed only when the
+	 * baud clock is not running
+	 * DLL_REG and DLH_REG set to 0.
+	 */
+	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_DLL, 0);
+	serial_out(up, UART_DLM, 0);
+	serial_out(up, UART_LCR, 0);
+
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+
+	up->efr = serial_in(up, UART_EFR);
+	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
+
+	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	up->mcr = serial_in(up, UART_MCR);
+	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
+	/* FIFO ENABLE, DMA MODE */
+	serial_out(up, UART_FCR, up->fcr);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+
+	if (up->use_dma) {
+		serial_out(up, UART_TI752_TLR, 0);
+		serial_out(up, UART_OMAP_SCR,
+			(UART_FCR_TRIGGER_4 | UART_FCR_TRIGGER_8));
+	}
+
+	serial_out(up, UART_EFR, up->efr);
+	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_MCR, up->mcr);
+
+	/* Protocol, Baud Rate, and Interrupt Settings */
+
+	serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_DISABLE);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+
+	up->efr = serial_in(up, UART_EFR);
+	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
+
+	serial_out(up, UART_LCR, 0);
+	serial_out(up, UART_IER, 0);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+
+	serial_out(up, UART_DLL, quot & 0xff);          /* LS of divisor */
+	serial_out(up, UART_DLM, quot >> 8);            /* MS of divisor */
+
+	serial_out(up, UART_LCR, 0);
+	serial_out(up, UART_IER, up->ier);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+
+	serial_out(up, UART_EFR, up->efr);
+	serial_out(up, UART_LCR, cval);
+
+	if (baud > 230400 && baud != 3000000)
+		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE13X);
+	else
+		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE16X);
+
+	/* Hardware Flow Control Configuration */
+
+	if (termios->c_cflag & CRTSCTS) {
+		efr |= (UART_EFR_CTS | UART_EFR_RTS);
+		serial_out(up, UART_LCR, UART_LCR_DLAB);
+
+		up->mcr = serial_in(up, UART_MCR);
+		serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
+
+		serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+		up->efr = serial_in(up, UART_EFR);
+		serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
+
+		serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
+		serial_out(up, UART_EFR, efr); /* Enable AUTORTS and AUTOCTS */
+		serial_out(up, UART_LCR, UART_LCR_DLAB);
+		serial_out(up, UART_MCR, up->mcr | UART_MCR_RTS);
+		serial_out(up, UART_LCR, cval);
+	}
+
+	serial_omap_set_mctrl(&up->port, up->port.mctrl);
+	/* Software Flow Control Configuration */
+	if (termios->c_iflag & (IXON | IXOFF))
+		serial_omap_configure_xonxoff(up, termios);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+	dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id);
+}
+
+static void
+serial_omap_pm(struct uart_port *port, unsigned int state,
+	       unsigned int oldstate)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+	unsigned char efr;
+
+	dev_dbg(up->port.dev, "serial_omap_pm+%d\n", up->pdev->id);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	efr = serial_in(up, UART_EFR);
+	serial_out(up, UART_EFR, efr | UART_EFR_ECB);
+	serial_out(up, UART_LCR, 0);
+
+	serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
+	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_EFR, efr);
+	serial_out(up, UART_LCR, 0);
+	/* Enable module level wake up */
+	serial_out(up, UART_OMAP_WER,
+		(state != 0) ? OMAP_UART_WER_MOD_WKUP : 0);
+}
+
+static void serial_omap_release_port(struct uart_port *port)
+{
+	dev_dbg(port->dev, "serial_omap_release_port+\n");
+}
+
+static int serial_omap_request_port(struct uart_port *port)
+{
+	dev_dbg(port->dev, "serial_omap_request_port+\n");
+	return 0;
+}
+
+static void serial_omap_config_port(struct uart_port *port, int flags)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	dev_dbg(up->port.dev, "serial_omap_config_port+%d\n",
+							up->pdev->id);
+	up->port.type = PORT_OMAP;
+}
+
+static int
+serial_omap_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+	/* we don't want the core code to modify any port params */
+	dev_dbg(port->dev, "serial_omap_verify_port+\n");
+	return -EINVAL;
+}
+
+static const char *
+serial_omap_type(struct uart_port *port)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	dev_dbg(up->port.dev, "serial_omap_type+%d\n", up->pdev->id);
+	return up->name;
+}
+
+#ifdef CONFIG_SERIAL_OMAP_CONSOLE
+
+static struct uart_omap_port *serial_omap_console_ports[4];
+
+static struct uart_driver serial_omap_reg;
+
+#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
+
+static inline void wait_for_xmitr(struct uart_omap_port *up)
+{
+	unsigned int status, tmout = 10000;
+
+	/* Wait up to 10ms for the character(s) to be sent. */
+	do {
+		status = serial_in(up, UART_LSR);
+
+		if (status & UART_LSR_BI)
+			up->lsr_break_flag = UART_LSR_BI;
+
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while ((status & BOTH_EMPTY) != BOTH_EMPTY);
+
+	/* Wait up to 1s for flow control if necessary */
+	if (up->port.flags & UPF_CONS_FLOW) {
+		tmout = 1000000;
+		for (tmout = 1000000; tmout; tmout--) {
+			unsigned int msr = serial_in(up, UART_MSR);
+
+			up->msr_saved_flags |= msr & MSR_SAVE_FLAGS;
+			if (msr & UART_MSR_CTS)
+				break;
+
+			udelay(1);
+		}
+	}
+}
+
+static void serial_omap_console_putchar(struct uart_port *port, int ch)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)port;
+
+	wait_for_xmitr(up);
+	serial_out(up, UART_TX, ch);
+}
+
+static void
+serial_omap_console_write(struct console *co, const char *s,
+		unsigned int count)
+{
+	struct uart_omap_port *up = serial_omap_console_ports[co->index];
+	unsigned long flags;
+	unsigned int ier;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (up->port.sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock(&up->port.lock);
+	else
+		spin_lock(&up->port.lock);
+
+	/*
+	 * First save the IER then disable the interrupts
+	 */
+	ier = serial_in(up, UART_IER);
+	serial_out(up, UART_IER, 0);
+
+	uart_console_write(&up->port, s, count, serial_omap_console_putchar);
+
+	/*
+	 * Finally, wait for transmitter to become empty
+	 * and restore the IER
+	 */
+	wait_for_xmitr(up);
+	serial_out(up, UART_IER, ier);
+	/*
+	 * The receive handling will happen properly because the
+	 * receive ready bit will still be set; it is not cleared
+	 * on read.  However, modem control will not, we must
+	 * call it if we have saved something in the saved flags
+	 * while processing with interrupts off.
+	 */
+	if (up->msr_saved_flags)
+		check_modem_status(up);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+	local_irq_restore(flags);
+}
+
+static int __init
+serial_omap_console_setup(struct console *co, char *options)
+{
+	struct uart_omap_port *up;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (serial_omap_console_ports[co->index] == NULL)
+		return -ENODEV;
+	up = serial_omap_console_ports[co->index];
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&up->port, co, baud, parity, bits, flow);
+}
+
+static struct console serial_omap_console = {
+	.name		= OMAP_SERIAL_NAME,
+	.write		= serial_omap_console_write,
+	.device		= uart_console_device,
+	.setup		= serial_omap_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &serial_omap_reg,
+};
+
+static void serial_omap_add_console_port(struct uart_omap_port *up)
+{
+	serial_omap_console_ports[up->pdev->id] = up;
+}
+
+#define OMAP_CONSOLE	(&serial_omap_console)
+
+#else
+
+#define OMAP_CONSOLE	NULL
+
+static inline void serial_omap_add_console_port(struct uart_omap_port *up)
+{}
+
+#endif
+
+static struct uart_ops serial_omap_pops = {
+	.tx_empty	= serial_omap_tx_empty,
+	.set_mctrl	= serial_omap_set_mctrl,
+	.get_mctrl	= serial_omap_get_mctrl,
+	.stop_tx	= serial_omap_stop_tx,
+	.start_tx	= serial_omap_start_tx,
+	.stop_rx	= serial_omap_stop_rx,
+	.enable_ms	= serial_omap_enable_ms,
+	.break_ctl	= serial_omap_break_ctl,
+	.startup	= serial_omap_startup,
+	.shutdown	= serial_omap_shutdown,
+	.set_termios	= serial_omap_set_termios,
+	.pm		= serial_omap_pm,
+	.type		= serial_omap_type,
+	.release_port	= serial_omap_release_port,
+	.request_port	= serial_omap_request_port,
+	.config_port	= serial_omap_config_port,
+	.verify_port	= serial_omap_verify_port,
+};
+
+static struct uart_driver serial_omap_reg = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "OMAP-SERIAL",
+	.dev_name	= OMAP_SERIAL_NAME,
+	.nr		= OMAP_MAX_HSUART_PORTS,
+	.cons		= OMAP_CONSOLE,
+};
+
+static int
+serial_omap_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct uart_omap_port *up = platform_get_drvdata(pdev);
+
+	if (up)
+		uart_suspend_port(&serial_omap_reg, &up->port);
+	return 0;
+}
+
+static int serial_omap_resume(struct platform_device *dev)
+{
+	struct uart_omap_port *up = platform_get_drvdata(dev);
+
+	if (up)
+		uart_resume_port(&serial_omap_reg, &up->port);
+	return 0;
+}
+
+static void serial_omap_rx_timeout(unsigned long uart_no)
+{
+	struct uart_omap_port *up = ui[uart_no];
+	unsigned int curr_dma_pos, curr_transmitted_size;
+	unsigned int ret = 0;
+
+	curr_dma_pos = omap_get_dma_dst_pos(up->uart_dma.rx_dma_channel);
+	if ((curr_dma_pos == up->uart_dma.prev_rx_dma_pos) ||
+			     (curr_dma_pos == 0)) {
+		if (jiffies_to_msecs(jiffies - up->port_activity) <
+							RX_TIMEOUT) {
+			mod_timer(&up->uart_dma.rx_timer, jiffies +
+				usecs_to_jiffies(up->uart_dma.rx_timeout));
+		} else {
+			serial_omap_stop_rxdma(up);
+			up->ier |= (UART_IER_RDI | UART_IER_RLSI);
+			serial_out(up, UART_IER, up->ier);
+		}
+		return;
+	}
+
+	curr_transmitted_size = curr_dma_pos -
+					up->uart_dma.prev_rx_dma_pos;
+	up->port.icount.rx += curr_transmitted_size;
+	tty_insert_flip_string(up->port.state->port.tty,
+			up->uart_dma.rx_buf +
+			(up->uart_dma.prev_rx_dma_pos -
+			up->uart_dma.rx_buf_dma_phys),
+			curr_transmitted_size);
+	tty_flip_buffer_push(up->port.state->port.tty);
+	up->uart_dma.prev_rx_dma_pos = curr_dma_pos;
+	if (up->uart_dma.rx_buf_size +
+			up->uart_dma.rx_buf_dma_phys == curr_dma_pos) {
+		ret = serial_omap_start_rxdma(up);
+		if (ret < 0) {
+			serial_omap_stop_rxdma(up);
+			up->ier |= (UART_IER_RDI | UART_IER_RLSI);
+			serial_out(up, UART_IER, up->ier);
+		}
+	} else  {
+		mod_timer(&up->uart_dma.rx_timer, jiffies +
+			usecs_to_jiffies(up->uart_dma.rx_timeout));
+	}
+	up->port_activity = jiffies;
+}
+
+static void uart_rx_dma_callback(int lch, u16 ch_status, void *data)
+{
+	return;
+}
+
+static int serial_omap_start_rxdma(struct uart_omap_port *up)
+{
+	int ret = 0;
+
+	if (up->uart_dma.rx_dma_channel == -1) {
+		ret = omap_request_dma(up->uart_dma.uart_dma_rx,
+				"UART Rx DMA",
+				(void *)uart_rx_dma_callback, up,
+				&(up->uart_dma.rx_dma_channel));
+		if (ret < 0)
+			return ret;
+
+		omap_set_dma_src_params(up->uart_dma.rx_dma_channel, 0,
+				OMAP_DMA_AMODE_CONSTANT,
+				up->uart_dma.uart_base, 0, 0);
+		omap_set_dma_dest_params(up->uart_dma.rx_dma_channel, 0,
+				OMAP_DMA_AMODE_POST_INC,
+				up->uart_dma.rx_buf_dma_phys, 0, 0);
+		omap_set_dma_transfer_params(up->uart_dma.rx_dma_channel,
+				OMAP_DMA_DATA_TYPE_S8,
+				up->uart_dma.rx_buf_size, 1,
+				OMAP_DMA_SYNC_ELEMENT,
+				up->uart_dma.uart_dma_rx, 0);
+	}
+	up->uart_dma.prev_rx_dma_pos = up->uart_dma.rx_buf_dma_phys;
+	/* FIXME: Cache maintenance needed here? */
+	omap_start_dma(up->uart_dma.rx_dma_channel);
+	mod_timer(&up->uart_dma.rx_timer, jiffies +
+				usecs_to_jiffies(up->uart_dma.rx_timeout));
+	up->uart_dma.rx_dma_used = true;
+	return ret;
+}
+
+static void serial_omap_continue_tx(struct uart_omap_port *up)
+{
+	struct circ_buf *xmit = &up->port.state->xmit;
+	unsigned int start = up->uart_dma.tx_buf_dma_phys
+			+ (xmit->tail & (UART_XMIT_SIZE - 1));
+
+	if (uart_circ_empty(xmit))
+		return;
+
+	up->uart_dma.tx_buf_size = uart_circ_chars_pending(xmit);
+	/*
+	 * It is a circular buffer. See if the buffer has wounded back.
+	 * If yes it will have to be transferred in two separate dma
+	 * transfers
+	 */
+	if (start + up->uart_dma.tx_buf_size >=
+			up->uart_dma.tx_buf_dma_phys + UART_XMIT_SIZE)
+		up->uart_dma.tx_buf_size =
+			(up->uart_dma.tx_buf_dma_phys + UART_XMIT_SIZE) - start;
+	omap_set_dma_dest_params(up->uart_dma.tx_dma_channel, 0,
+				OMAP_DMA_AMODE_CONSTANT,
+				up->uart_dma.uart_base, 0, 0);
+	omap_set_dma_src_params(up->uart_dma.tx_dma_channel, 0,
+				OMAP_DMA_AMODE_POST_INC, start, 0, 0);
+	omap_set_dma_transfer_params(up->uart_dma.tx_dma_channel,
+				OMAP_DMA_DATA_TYPE_S8,
+				up->uart_dma.tx_buf_size, 1,
+				OMAP_DMA_SYNC_ELEMENT,
+				up->uart_dma.uart_dma_tx, 0);
+	/* FIXME: Cache maintenance needed here? */
+	omap_start_dma(up->uart_dma.tx_dma_channel);
+}
+
+static void uart_tx_dma_callback(int lch, u16 ch_status, void *data)
+{
+	struct uart_omap_port *up = (struct uart_omap_port *)data;
+	struct circ_buf *xmit = &up->port.state->xmit;
+
+	xmit->tail = (xmit->tail + up->uart_dma.tx_buf_size) & \
+			(UART_XMIT_SIZE - 1);
+	up->port.icount.tx += up->uart_dma.tx_buf_size;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&up->port);
+
+	if (uart_circ_empty(xmit)) {
+		spin_lock(&(up->uart_dma.tx_lock));
+		serial_omap_stop_tx(&up->port);
+		up->uart_dma.tx_dma_used = false;
+		spin_unlock(&(up->uart_dma.tx_lock));
+	} else {
+		omap_stop_dma(up->uart_dma.tx_dma_channel);
+		serial_omap_continue_tx(up);
+	}
+	up->port_activity = jiffies;
+	return;
+}
+
+static int serial_omap_probe(struct platform_device *pdev)
+{
+	struct uart_omap_port	*up;
+	struct resource		*mem, *irq, *dma_tx, *dma_rx;
+	struct omap_uart_port_info *omap_up_info = pdev->dev.platform_data;
+	int ret = -ENOSPC;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "no mem resource?\n");
+		return -ENODEV;
+	}
+
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		return -ENODEV;
+	}
+
+	if (!request_mem_region(mem->start, (mem->end - mem->start) + 1,
+				     pdev->dev.driver->name)) {
+		dev_err(&pdev->dev, "memory region already claimed\n");
+		return -EBUSY;
+	}
+
+	dma_rx = platform_get_resource_byname(pdev, IORESOURCE_DMA, "rx");
+	if (!dma_rx) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	dma_tx = platform_get_resource_byname(pdev, IORESOURCE_DMA, "tx");
+	if (!dma_tx) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	up = kzalloc(sizeof(*up), GFP_KERNEL);
+	if (up == NULL) {
+		ret = -ENOMEM;
+		goto do_release_region;
+	}
+	sprintf(up->name, "OMAP UART%d", pdev->id);
+	up->pdev = pdev;
+	up->port.dev = &pdev->dev;
+	up->port.type = PORT_OMAP;
+	up->port.iotype = UPIO_MEM;
+	up->port.irq = irq->start;
+
+	up->port.regshift = 2;
+	up->port.fifosize = 64;
+	up->port.ops = &serial_omap_pops;
+	up->port.line = pdev->id;
+
+	up->port.membase = omap_up_info->membase;
+	up->port.mapbase = omap_up_info->mapbase;
+	up->port.flags = omap_up_info->flags;
+	up->port.irqflags = omap_up_info->irqflags;
+	up->port.uartclk = omap_up_info->uartclk;
+	up->uart_dma.uart_base = mem->start;
+
+	if (omap_up_info->dma_enabled) {
+		up->uart_dma.uart_dma_tx = dma_tx->start;
+		up->uart_dma.uart_dma_rx = dma_rx->start;
+		up->use_dma = 1;
+		up->uart_dma.rx_buf_size = 4096;
+		up->uart_dma.rx_timeout = 2;
+		spin_lock_init(&(up->uart_dma.tx_lock));
+		spin_lock_init(&(up->uart_dma.rx_lock));
+		up->uart_dma.tx_dma_channel = OMAP_UART_DMA_CH_FREE;
+		up->uart_dma.rx_dma_channel = OMAP_UART_DMA_CH_FREE;
+	}
+
+	ui[pdev->id] = up;
+	serial_omap_add_console_port(up);
+
+	ret = uart_add_one_port(&serial_omap_reg, &up->port);
+	if (ret != 0)
+		goto do_release_region;
+
+	platform_set_drvdata(pdev, up);
+	return 0;
+err:
+	dev_err(&pdev->dev, "[UART%d]: failure [%s]: %d\n",
+				pdev->id, __func__, ret);
+do_release_region:
+	release_mem_region(mem->start, (mem->end - mem->start) + 1);
+	return ret;
+}
+
+static int serial_omap_remove(struct platform_device *dev)
+{
+	struct uart_omap_port *up = platform_get_drvdata(dev);
+
+	platform_set_drvdata(dev, NULL);
+	if (up) {
+		uart_remove_one_port(&serial_omap_reg, &up->port);
+		kfree(up);
+	}
+	return 0;
+}
+
+static struct platform_driver serial_omap_driver = {
+	.probe          = serial_omap_probe,
+	.remove         = serial_omap_remove,
+
+	.suspend	= serial_omap_suspend,
+	.resume		= serial_omap_resume,
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init serial_omap_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&serial_omap_reg);
+	if (ret != 0)
+		return ret;
+	ret = platform_driver_register(&serial_omap_driver);
+	if (ret != 0)
+		uart_unregister_driver(&serial_omap_reg);
+	return ret;
+}
+
+static void __exit serial_omap_exit(void)
+{
+	platform_driver_unregister(&serial_omap_driver);
+	uart_unregister_driver(&serial_omap_reg);
+}
+
+module_init(serial_omap_init);
+module_exit(serial_omap_exit);
+
+MODULE_DESCRIPTION("OMAP High Speed UART driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Texas Instruments Inc");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 563e23400913..295e89817de8 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -196,6 +196,9 @@
 /* High Speed UART for Medfield */
 #define PORT_MFD	95
 
+/* TI OMAP-UART */
+#define PORT_OMAP	96
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From 6d81f41c58c69ddde497e9e640ba5805aa26e78c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 27 Sep 2010 20:50:33 +0000
Subject: dummy: percpu stats and lockless xmit

Converts dummy network device driver to :

- percpu stats

- 64bit stats

- lockless xmit (NETIF_F_LLTX)

- performance features added (NETIF_F_SG | NETIF_F_FRAGLIST |
NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dummy.c       | 58 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/netdevice.h |  1 +
 2 files changed, 56 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 37dcfdc63456..ff2d29b17858 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -36,6 +36,7 @@
 #include <linux/moduleparam.h>
 #include <linux/rtnetlink.h>
 #include <net/rtnetlink.h>
+#include <linux/u64_stats_sync.h>
 
 static int numdummies = 1;
 
@@ -55,21 +56,69 @@ static void set_multicast_list(struct net_device *dev)
 {
 }
 
+struct pcpu_dstats {
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+};
+
+static struct rtnl_link_stats64 *dummy_get_stats64(struct net_device *dev,
+						   struct rtnl_link_stats64 *stats)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_dstats *dstats;
+		u64 tbytes, tpackets;
+		unsigned int start;
+
+		dstats = per_cpu_ptr(dev->dstats, i);
+		do {
+			start = u64_stats_fetch_begin(&dstats->syncp);
+			tbytes = dstats->tx_bytes;
+			tpackets = dstats->tx_packets;
+		} while (u64_stats_fetch_retry(&dstats->syncp, start));
+		stats->tx_bytes += tbytes;
+		stats->tx_packets += tpackets;
+	}
+	return stats;
+}
 
 static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
+
+	u64_stats_update_begin(&dstats->syncp);
+	dstats->tx_packets++;
+	dstats->tx_bytes += skb->len;
+	u64_stats_update_end(&dstats->syncp);
 
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
+static int dummy_dev_init(struct net_device *dev)
+{
+	dev->dstats = alloc_percpu(struct pcpu_dstats);
+	if (!dev->dstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void dummy_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->dstats);
+	free_netdev(dev);
+}
+
 static const struct net_device_ops dummy_netdev_ops = {
+	.ndo_init		= dummy_dev_init,
 	.ndo_start_xmit		= dummy_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list = set_multicast_list,
 	.ndo_set_mac_address	= dummy_set_address,
+	.ndo_get_stats64	= dummy_get_stats64,
 };
 
 static void dummy_setup(struct net_device *dev)
@@ -78,14 +127,17 @@ static void dummy_setup(struct net_device *dev)
 
 	/* Initialize the device structure. */
 	dev->netdev_ops = &dummy_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->destructor = dummy_dev_free;
 
 	/* Fill in device structure with ethernet-generic values. */
 	dev->tx_queue_len = 0;
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
+	dev->features	|= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO;
+	dev->features	|= NETIF_F_NO_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
 	random_ether_addr(dev->dev_addr);
 }
+
 static int dummy_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2bec990bd17..6f0845e0b888 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1057,6 +1057,7 @@ struct net_device {
 		void				*ml_priv;
 		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
 		struct pcpu_tstats __percpu	*tstats; /* tunnel stats */
+		struct pcpu_dstats __percpu	*dstats; /* dummy stats */
 	};
 	/* GARP */
 	struct garp_port	*garp_port;
-- 
cgit v1.2.3


From bfa5ae63b823f4ffd3483a05f60a93a4a7b7d680 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 28 Sep 2010 05:58:37 +0000
Subject: net: rename netdev rx_queue to ingress_queue

There is some confusion with rx_queue name after RPS, and net drivers
private rx_queue fields.

I suggest to rename "struct net_device"->rx_queue to ingress_queue.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            |  8 ++++----
 net/sched/sch_api.c       | 14 +++++++-------
 net/sched/sch_generic.c   |  8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f0845e0b888..ceed3474014a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -986,7 +986,7 @@ struct net_device {
 	rx_handler_func_t	*rx_handler;
 	void			*rx_handler_data;
 
-	struct netdev_queue	rx_queue; /* use two cache lines */
+	struct netdev_queue	ingress_queue; /* use two cache lines */
 
 /*
  * Cache lines mostly used on transmit path
diff --git a/net/core/dev.c b/net/core/dev.c
index 50daccad6a53..a313bab1b754 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2720,7 +2720,7 @@ static int ing_filter(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	rxq = &dev->rx_queue;
+	rxq = &dev->ingress_queue;
 
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
@@ -2737,7 +2737,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
+	if (skb->dev->ingress_queue.qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
@@ -4940,7 +4940,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
 static void netdev_init_queue_locks(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+	__netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
 }
 
 unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -5452,7 +5452,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 
 static void netdev_init_queues(struct net_device *dev)
 {
-	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+	netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6fb3d41c0e41..b8020784d0e9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	if (q)
 		goto out;
 
-	q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+	q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
 out:
 	return q;
 }
@@ -701,7 +701,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		for (i = 0; i < num_q; i++) {
-			struct netdev_queue *dev_queue = &dev->rx_queue;
+			struct netdev_queue *dev_queue = &dev->ingress_queue;
 
 			if (!ingress)
 				dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +979,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /* ingress */
-				q = dev->rx_queue.qdisc_sleeping;
+				q = dev->ingress_queue.qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1044,7 +1044,7 @@ replay:
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /*ingress */
-				q = dev->rx_queue.qdisc_sleeping;
+				q = dev->ingress_queue.qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1124,7 +1124,7 @@ create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, &dev->rx_queue, p,
+		q = qdisc_create(dev, &dev->ingress_queue, p,
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else {
@@ -1304,7 +1304,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
-		dev_queue = &dev->rx_queue;
+		dev_queue = &dev->ingress_queue;
 		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
@@ -1595,7 +1595,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
-	dev_queue = &dev->rx_queue;
+	dev_queue = &dev->ingress_queue;
 	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..545278a1c478 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -753,7 +753,7 @@ void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-	transition_one_qdisc(dev, &dev->rx_queue, NULL);
+	transition_one_qdisc(dev, &dev->ingress_queue, NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
@@ -812,7 +812,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 void dev_deactivate(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
+	dev_deactivate_queue(dev, &dev->ingress_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -838,7 +838,7 @@ void dev_init_scheduler(struct net_device *dev)
 {
 	dev->qdisc = &noop_qdisc;
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+	dev_init_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -861,7 +861,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+	shutdown_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
 	qdisc_destroy(dev->qdisc);
 	dev->qdisc = &noop_qdisc;
 
-- 
cgit v1.2.3


From e8689e63d4d2046079f2db9d494ac05c6885ac0c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 28 Sep 2010 15:57:37 +0200
Subject: dmaengine: driver for the ARM PL080/PL081 PrimeCells v5

This creates a DMAengine driver for the ARM PL080/PL081 PrimeCells
based on the implementation earlier submitted by Peter Pearse.
This is working like a charm for memcpy and slave DMA to the PL011
PrimeCell on the PB11MPCore.

This DMA controller is used in mostly unmodified form in the ARM
RealView and Versatile platforms, in the ST-Ericsson Nomadik, and
in the ST SPEAr platform.

It has been converted to use the header from the Samsung PL080
derivate instead of its own defintions. The Samsungs have a custom
driver in their mach-* folders though, atleast we can share the
register definitions.

Cc: Peter Pearse <peter.pearse@arm.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Alessandro Rubini <rubini@unipv.it>
Acked-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
[GFP_KERNEL to GFP_NOWAIT in pl08x_prep_dma_memcpy]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig        |    8 +
 drivers/dma/Makefile       |    1 +
 drivers/dma/amba-pl08x.c   | 2167 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/amba/pl08x.h |  222 +++++
 4 files changed, 2398 insertions(+)
 create mode 100644 drivers/dma/amba-pl08x.c
 create mode 100644 include/linux/amba/pl08x.h

(limited to 'include')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9520cf02edc8..f82ef10a8361 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -49,6 +49,14 @@ config INTEL_MID_DMAC
 config ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	bool
 
+config AMBA_PL08X
+	bool "ARM PrimeCell PL080 or PL081 support"
+	depends on ARM_AMBA && EXPERIMENTAL
+	select DMA_ENGINE
+	help
+	  Platform has a PL08x DMAC device
+	  which can provide DMA engine support
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 72bd70384d8a..0b690e7e4384 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 000000000000..b605cc9ac3a2
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,2167 @@
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is iin this distribution in the
+ * file called COPYING.
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E	== PL081
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to
+ * any channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * Memory to peripheral transfer may be visualized as
+ *	Get data from memory to DMAC
+ *	Until no data left
+ *		On burst request from peripheral
+ *			Destination burst from DMAC to peripheral
+ *			Clear burst request
+ *	Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source      burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * Only DMAC flow control is implemented
+ *
+ * Global TODO:
+ * - Break out common code from arch/arm/mach-s3c64xx and share
+ */
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/dmapool.h>
+#include <linux/amba/bus.h>
+#include <linux/dmaengine.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/hardware/pl080.h>
+#include <asm/dma.h>
+#include <asm/mach/dma.h>
+#include <asm/atomic.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+
+#define DRIVER_NAME	"pl08xdmac"
+
+/**
+ * struct vendor_data - vendor-specific config parameters
+ * for PL08x derivates
+ * @name: the name of this specific variant
+ * @channels: the number of channels available in this variant
+ * @dualmaster: whether this version supports dual AHB masters
+ * or not.
+ */
+struct vendor_data {
+	char *name;
+	u8 channels;
+	bool dualmaster;
+};
+
+/*
+ * PL08X private data structures
+ * An LLI struct - see pl08x TRM
+ * Note that next uses bit[0] as a bus bit,
+ * start & end do not - their bus bit info
+ * is in cctl
+ */
+struct lli {
+	dma_addr_t src;
+	dma_addr_t dst;
+	dma_addr_t next;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @pool_ctr: counter of LLIs in the pool
+ * @lock: a spinlock for this struct
+ */
+struct pl08x_driver_data {
+	struct dma_device slave;
+	struct dma_device memcpy;
+	void __iomem *base;
+	struct amba_device *adev;
+	struct vendor_data *vd;
+	struct pl08x_platform_data *pd;
+	struct pl08x_phy_chan *phy_chans;
+	struct dma_pool *pool;
+	int pool_ctr;
+	spinlock_t lock;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/*
+ * Memory boundaries: the manual for PL08x says that the controller
+ * cannot read past a 1KiB boundary, so these defines are used to
+ * create transfer LLIs that do not cross such boundaries.
+ */
+#define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
+#define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
+
+/* Minimum period between work queue runs */
+#define PL08X_WQ_PERIODMIN	20
+
+/* Size (bytes) of each LLI buffer allocated for one transfer */
+# define PL08X_LLI_TSFR_SIZE	0x2000
+
+/* Maximimum times we call dma_pool_alloc on this pool without freeing */
+#define PL08X_MAX_ALLOCS	0x40
+#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct lli))
+#define PL08X_ALIGN		8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pl08x_dma_chan, chan);
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+	unsigned int val;
+
+	val = readl(ch->base + PL080_CH_CONFIG);
+	return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next lli pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed
+ */
+static void pl08x_set_cregs(struct pl08x_driver_data *pl08x,
+			    struct pl08x_phy_chan *ch)
+{
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+
+	dev_vdbg(&pl08x->adev->dev,
+		"WRITE channel %d: csrc=%08x, cdst=%08x, "
+		 "cctl=%08x, clli=%08x, ccfg=%08x\n",
+		ch->id,
+		ch->csrc,
+		ch->cdst,
+		ch->cctl,
+		ch->clli,
+		ch->ccfg);
+
+	writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR);
+	writel(ch->cdst, ch->base + PL080_CH_DST_ADDR);
+	writel(ch->clli, ch->base + PL080_CH_LLI);
+	writel(ch->cctl, ch->base + PL080_CH_CONTROL);
+	writel(ch->ccfg, ch->base + PL080_CH_CONFIG);
+}
+
+static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_channel_data *cd = plchan->cd;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_txd *txd = plchan->at;
+
+	/* Copy the basic control register calculated at transfer config */
+	phychan->csrc = txd->csrc;
+	phychan->cdst = txd->cdst;
+	phychan->clli = txd->clli;
+	phychan->cctl = txd->cctl;
+
+	/* Assign the signal to the proper control registers */
+	phychan->ccfg = cd->ccfg;
+	phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK;
+	phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK;
+	/* If it wasn't set from AMBA, ignore it */
+	if (txd->direction == DMA_TO_DEVICE)
+		/* Select signal as destination */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_DST_SEL_SHIFT);
+	else if (txd->direction == DMA_FROM_DEVICE)
+		/* Select signal as source */
+		phychan->ccfg |=
+			(phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT);
+	/* Always enable error interrupts */
+	phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK;
+	/* Always enable terminal interrupts */
+	phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK;
+}
+
+/*
+ * Enable the DMA channel
+ * Assumes all other configuration bits have been set
+ * as desired before this code is called
+ */
+static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x,
+				  struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/*
+	 * Do not access config register until channel shows as disabled
+	 */
+	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id))
+		;
+
+	/*
+	 * Do not access config register until channel shows as inactive
+	 */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+		val = readl(ch->base + PL080_CH_CONFIG);
+
+	writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG);
+}
+
+/*
+ * Overall DMAC remains enabled always.
+ *
+ * Disabling individual channels could lose data.
+ *
+ * Disable the peripheral DMA after disabling the DMAC
+ * in order to allow the DMAC FIFO to drain, and
+ * hence allow the channel to show inactive
+ *
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Set the HALT bit and wait for the FIFO to drain */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val |= PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(ch))
+		;
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Clear the HALT bit */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_HALT;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+
+/* Stops the channel */
+static void pl08x_stop_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	pl08x_pause_phy_chan(ch);
+
+	/* Disable channel */
+	val = readl(ch->base + PL080_CH_CONFIG);
+	val &= ~PL080_CONFIG_ENABLE;
+	val &= ~PL080_CONFIG_ERR_IRQ_MASK;
+	val &= ~PL080_CONFIG_TC_IRQ_MASK;
+	writel(val, ch->base + PL080_CH_CONFIG);
+}
+
+static inline u32 get_bytes_in_cctl(u32 cctl)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_phy_chan *ch;
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *txd;
+	unsigned long flags;
+	u32 bytes = 0;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	ch = plchan->phychan;
+	txd = plchan->at;
+
+	/*
+	 * Next follow the LLIs to get the number of pending bytes in the
+	 * currently active transaction.
+	 */
+	if (ch && txd) {
+		struct lli *llis_va = txd->llis_va;
+		struct lli *llis_bus = (struct lli *) txd->llis_bus;
+		u32 clli = readl(ch->base + PL080_CH_LLI);
+
+		/* First get the bytes in the current active LLI */
+		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+
+		if (clli) {
+			int i = 0;
+
+			/* Forward to the LLI pointed to by clli */
+			while ((clli != (u32) &(llis_bus[i])) &&
+			       (i < MAX_NUM_TSFR_LLIS))
+				i++;
+
+			while (clli) {
+				bytes += get_bytes_in_cctl(llis_va[i].cctl);
+				/*
+				 * A clli of 0x00000000 will terminate the
+				 * LLI list
+				 */
+				clli = llis_va[i].next;
+				i++;
+			}
+		}
+	}
+
+	/* Sum up all queued transactions */
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry(txdi, &plchan->desc_list, node) {
+			bytes += txdi->len;
+		}
+
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+		      struct pl08x_dma_chan *virt_chan)
+{
+	struct pl08x_phy_chan *ch = NULL;
+	unsigned long flags;
+	int i;
+
+	/*
+	 * Try to locate a physical channel to be used for
+	 * this transfer. If all are taken return NULL and
+	 * the requester will have to cope by using some fallback
+	 * PIO mode or retrying later.
+	 */
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+
+		if (!ch->serving) {
+			ch->serving = virt_chan;
+			ch->signal = -1;
+			spin_unlock_irqrestore(&ch->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	if (i == pl08x->vd->channels) {
+		/* No physical channel available, cope with it */
+		return NULL;
+	}
+
+	return ch;
+}
+
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+					 struct pl08x_phy_chan *ch)
+{
+	unsigned long flags;
+
+	/* Stop the channel and clear its interrupts */
+	pl08x_stop_phy_chan(ch);
+	writel((1 << ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel((1 << ch->id), pl08x->base + PL080_TC_CLEAR);
+
+	/* Mark it as free */
+	spin_lock_irqsave(&ch->lock, flags);
+	ch->serving = NULL;
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+{
+	switch (coded) {
+	case PL080_WIDTH_8BIT:
+		return 1;
+	case PL080_WIDTH_16BIT:
+		return 2;
+	case PL080_WIDTH_32BIT:
+		return 4;
+	default:
+		break;
+	}
+	BUG();
+	return 0;
+}
+
+static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
+				  u32 tsize)
+{
+	u32 retbits = cctl;
+
+	/* Remove all src, dst and transfersize bits */
+	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+	/* Then set the bits according to the parameters */
+	switch (srcwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	switch (dstwidth) {
+	case 1:
+		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 2:
+		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 4:
+		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+	return retbits;
+}
+
+/*
+ * Autoselect a master bus to use for the transfer
+ * this prefers the destination bus if both available
+ * if fixed address on one bus the other will be chosen
+ */
+void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus,
+	struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus,
+	struct pl08x_bus_data **sbus, u32 cctl)
+{
+	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+		*mbus = src_bus;
+		*sbus = dst_bus;
+	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+		*mbus = dst_bus;
+		*sbus = src_bus;
+	} else {
+		if (dst_bus->buswidth == 4) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 4) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else if (dst_bus->buswidth == 2) {
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		} else if (src_bus->buswidth == 2) {
+			*mbus = src_bus;
+			*sbus = dst_bus;
+		} else {
+			/* src_bus->buswidth == 1 */
+			*mbus = dst_bus;
+			*sbus = src_bus;
+		}
+	}
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor
+ * and advance the counter
+ */
+int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+			    struct pl08x_txd *txd, int num_llis, int len,
+			    u32 cctl, u32 *remainder)
+{
+	struct lli *llis_va = txd->llis_va;
+	struct lli *llis_bus = (struct lli *) txd->llis_bus;
+
+	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+	llis_va[num_llis].cctl		= cctl;
+	llis_va[num_llis].src		= txd->srcbus.addr;
+	llis_va[num_llis].dst		= txd->dstbus.addr;
+
+	/*
+	 * On versions with dual masters, you can optionally AND on
+	 * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read
+	 * in new LLIs with that controller, but we always try to
+	 * choose AHB1 to point into memory. The idea is to have AHB2
+	 * fixed on the peripheral and AHB1 messing around in the
+	 * memory. So we don't manipulate this bit currently.
+	 */
+
+	llis_va[num_llis].next =
+		(dma_addr_t)((u32) &(llis_bus[num_llis + 1]));
+
+	if (cctl & PL080_CONTROL_SRC_INCR)
+		txd->srcbus.addr += len;
+	if (cctl & PL080_CONTROL_DST_INCR)
+		txd->dstbus.addr += len;
+
+	*remainder -= len;
+
+	return num_llis + 1;
+}
+
+/*
+ * Return number of bytes to fill to boundary, or len
+ */
+static inline u32 pl08x_pre_boundary(u32 addr, u32 len)
+{
+	u32 boundary;
+
+	boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1)
+		<< PL08X_BOUNDARY_SHIFT;
+
+	if (boundary < addr + len)
+		return boundary - addr;
+	else
+		return len;
+}
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+			      struct pl08x_txd *txd)
+{
+	struct pl08x_channel_data *cd = txd->cd;
+	struct pl08x_bus_data *mbus, *sbus;
+	u32 remainder;
+	int num_llis = 0;
+	u32 cctl;
+	int max_bytes_per_lli;
+	int total_bytes = 0;
+	struct lli *llis_va;
+	struct lli *llis_bus;
+
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__);
+		return 0;
+	}
+
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT,
+				      &txd->llis_bus);
+	if (!txd->llis_va) {
+		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+		return 0;
+	}
+
+	pl08x->pool_ctr++;
+
+	/*
+	 * Initialize bus values for this transfer
+	 * from the passed optimal values
+	 */
+	if (!cd) {
+		dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__);
+		return 0;
+	}
+
+	/* Get the default CCTL from the platform data */
+	cctl = cd->cctl;
+
+	/*
+	 * On the PL080 we have two bus masters and we
+	 * should select one for source and one for
+	 * destination. We try to use AHB2 for the
+	 * bus which does not increment (typically the
+	 * peripheral) else we just choose something.
+	 */
+	cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+	if (pl08x->vd->dualmaster) {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			/* Source increments, use AHB2 for destination */
+			cctl |= PL080_CONTROL_DST_AHB2;
+		else if (cctl & PL080_CONTROL_DST_INCR)
+			/* Destination increments, use AHB2 for source */
+			cctl |= PL080_CONTROL_SRC_AHB2;
+		else
+			/* Just pick something, source AHB1 dest AHB2 */
+			cctl |= PL080_CONTROL_DST_AHB2;
+	}
+
+	/* Find maximum width of the source bus */
+	txd->srcbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				       PL080_CONTROL_SWIDTH_SHIFT);
+
+	/* Find maximum width of the destination bus */
+	txd->dstbus.maxwidth =
+		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				       PL080_CONTROL_DWIDTH_SHIFT);
+
+	/* Set up the bus widths to the maximum */
+	txd->srcbus.buswidth = txd->srcbus.maxwidth;
+	txd->dstbus.buswidth = txd->dstbus.maxwidth;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
+		 __func__, txd->srcbus.buswidth, txd->dstbus.buswidth);
+
+
+	/*
+	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
+	 */
+	max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) *
+		PL080_CONTROL_TRANSFER_SIZE_MASK;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s max bytes per lli = %d\n",
+		 __func__, max_bytes_per_lli);
+
+	/* We need to count this down to zero */
+	remainder = txd->len;
+	dev_vdbg(&pl08x->adev->dev,
+		 "%s remainder = %d\n",
+		 __func__, remainder);
+
+	/*
+	 * Choose bus to align to
+	 * - prefers destination bus if both available
+	 * - if fixed address on one bus chooses other
+	 * - modifies cctl to choose an apropriate master
+	 */
+	pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus,
+				&mbus, &sbus, cctl);
+
+
+	/*
+	 * The lowest bit of the LLI register
+	 * is also used to indicate which master to
+	 * use for reading the LLIs.
+	 */
+
+	if (txd->len < mbus->buswidth) {
+		/*
+		 * Less than a bus width available
+		 * - send as single bytes
+		 */
+		while (remainder) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "%s single byte LLIs for a transfer of "
+				 "less than a bus width (remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis =
+				pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1,
+					cctl, &remainder);
+			total_bytes++;
+		}
+	} else {
+		/*
+		 *  Make one byte LLIs until master bus is aligned
+		 *  - slave will then be aligned also
+		 */
+		while ((mbus->addr) % (mbus->buswidth)) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%s adjustment lli for less than bus width "
+				 "(remain %08x)\n",
+				 __func__, remainder);
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			num_llis = pl08x_fill_lli_for_desc
+				(pl08x, txd, num_llis, 1, cctl, &remainder);
+			total_bytes++;
+		}
+
+		/*
+		 *  Master now aligned
+		 * - if slave is not then we must set its width down
+		 */
+		if (sbus->addr % sbus->buswidth) {
+			dev_dbg(&pl08x->adev->dev,
+				"%s set down bus width to one byte\n",
+				 __func__);
+
+			sbus->buswidth = 1;
+		}
+
+		/*
+		 * Make largest possible LLIs until less than one bus
+		 * width left
+		 */
+		while (remainder > (mbus->buswidth - 1)) {
+			int lli_len, target_len;
+			int tsize;
+			int odd_bytes;
+
+			/*
+			 * If enough left try to send max possible,
+			 * otherwise try to send the remainder
+			 */
+			target_len = remainder;
+			if (remainder > max_bytes_per_lli)
+				target_len = max_bytes_per_lli;
+
+			/*
+			 * Set bus lengths for incrementing busses
+			 * to number of bytes which fill to next memory
+			 * boundary
+			 */
+			if (cctl & PL080_CONTROL_SRC_INCR)
+				txd->srcbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->srcbus.addr,
+						remainder);
+			else
+				txd->srcbus.fill_bytes =
+					max_bytes_per_lli;
+
+			if (cctl & PL080_CONTROL_DST_INCR)
+				txd->dstbus.fill_bytes =
+					pl08x_pre_boundary(
+						txd->dstbus.addr,
+						remainder);
+			else
+				txd->dstbus.fill_bytes =
+						max_bytes_per_lli;
+
+			/*
+			 *  Find the nearest
+			 */
+			lli_len	= min(txd->srcbus.fill_bytes,
+				txd->dstbus.fill_bytes);
+
+			BUG_ON(lli_len > remainder);
+
+			if (lli_len <= 0) {
+				dev_err(&pl08x->adev->dev,
+					"%s lli_len is %d, <= 0\n",
+						__func__, lli_len);
+				return 0;
+			}
+
+			if (lli_len == target_len) {
+				/*
+				 * Can send what we wanted
+				 */
+				/*
+				 *  Maintain alignment
+				 */
+				lli_len	= (lli_len/mbus->buswidth) *
+							mbus->buswidth;
+				odd_bytes = 0;
+			} else {
+				/*
+				 * So now we know how many bytes to transfer
+				 * to get to the nearest boundary
+				 * The next lli will past the boundary
+				 * - however we may be working to a boundary
+				 *   on the slave bus
+				 *   We need to ensure the master stays aligned
+				 */
+				odd_bytes = lli_len % mbus->buswidth;
+				/*
+				 * - and that we are working in multiples
+				 *   of the bus widths
+				 */
+				lli_len -= odd_bytes;
+
+			}
+
+			if (lli_len) {
+				/*
+				 * Check against minimum bus alignment:
+				 * Calculate actual transfer size in relation
+				 * to bus width an get a maximum remainder of
+				 * the smallest bus width - 1
+				 */
+				/* FIXME: use round_down()? */
+				tsize = lli_len / min(mbus->buswidth,
+						      sbus->buswidth);
+				lli_len	= tsize * min(mbus->buswidth,
+						      sbus->buswidth);
+
+				if (target_len != lli_len) {
+					dev_vdbg(&pl08x->adev->dev,
+					"%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n",
+					__func__, target_len, lli_len, txd->len);
+				}
+
+				cctl = pl08x_cctl_bits(cctl,
+						       txd->srcbus.buswidth,
+						       txd->dstbus.buswidth,
+						       tsize);
+
+				dev_vdbg(&pl08x->adev->dev,
+					"%s fill lli with single lli chunk of size %08x (remainder %08x)\n",
+					__func__, lli_len, remainder);
+				num_llis = pl08x_fill_lli_for_desc(pl08x, txd,
+						num_llis, lli_len, cctl,
+						&remainder);
+				total_bytes += lli_len;
+			}
+
+
+			if (odd_bytes) {
+				/*
+				 * Creep past the boundary,
+				 * maintaining master alignment
+				 */
+				int j;
+				for (j = 0; (j < mbus->buswidth)
+						&& (remainder); j++) {
+					cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+					dev_vdbg(&pl08x->adev->dev,
+						"%s align with boundardy, single byte (remain %08x)\n",
+						__func__, remainder);
+					num_llis =
+						pl08x_fill_lli_for_desc(pl08x,
+							txd, num_llis, 1,
+							cctl, &remainder);
+					total_bytes++;
+				}
+			}
+		}
+
+		/*
+		 * Send any odd bytes
+		 */
+		if (remainder < 0) {
+			dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n",
+					__func__, remainder);
+			return 0;
+		}
+
+		while (remainder) {
+			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
+			dev_vdbg(&pl08x->adev->dev,
+				"%s align with boundardy, single odd byte (remain %d)\n",
+				__func__, remainder);
+			num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis,
+					1, cctl, &remainder);
+			total_bytes++;
+		}
+	}
+	if (total_bytes != txd->len) {
+		dev_err(&pl08x->adev->dev,
+			"%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n",
+			__func__, total_bytes, txd->len);
+		return 0;
+	}
+
+	if (num_llis >= MAX_NUM_TSFR_LLIS) {
+		dev_err(&pl08x->adev->dev,
+			"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+			__func__, (u32) MAX_NUM_TSFR_LLIS);
+		return 0;
+	}
+	/*
+	 * Decide whether this is a loop or a terminated transfer
+	 */
+	llis_va = txd->llis_va;
+	llis_bus = (struct lli *) txd->llis_bus;
+
+	if (cd->circular_buffer) {
+		/*
+		 * Loop the circular buffer so that the next element
+		 * points back to the beginning of the LLI.
+		 */
+		llis_va[num_llis - 1].next =
+			(dma_addr_t)((unsigned int)&(llis_bus[0]));
+	} else {
+		/*
+		 * On non-circular buffers, the final LLI terminates
+		 * the LLI.
+		 */
+		llis_va[num_llis - 1].next = 0;
+		/*
+		 * The final LLI element shall also fire an interrupt
+		 */
+		llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+	}
+
+	/* Now store the channel register values */
+	txd->csrc = llis_va[0].src;
+	txd->cdst = llis_va[0].dst;
+	if (num_llis > 1)
+		txd->clli = llis_va[0].next;
+	else
+		txd->clli = 0;
+
+	txd->cctl = llis_va[0].cctl;
+	/* ccfg will be set at physical channel allocation time */
+
+#ifdef VERBOSE_DEBUG
+	{
+		int i;
+
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				 "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n",
+				 i,
+				 &llis_va[i],
+				 llis_va[i].src,
+				 llis_va[i].dst,
+				 llis_va[i].cctl,
+				 llis_va[i].next
+				);
+		}
+	}
+#endif
+
+	return num_llis;
+}
+
+/* You should call this with the struct pl08x lock held */
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+			   struct pl08x_txd *txd)
+{
+	if (!txd)
+		dev_err(&pl08x->adev->dev,
+			"%s no descriptor to free\n",
+			__func__);
+
+	/* Free the LLI */
+	dma_pool_free(pl08x->pool, txd->llis_va,
+		      txd->llis_bus);
+
+	pl08x->pool_ctr--;
+
+	kfree(txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+				struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_txd *txdi = NULL;
+	struct pl08x_txd *next;
+
+	if (!list_empty(&plchan->desc_list)) {
+		list_for_each_entry_safe(txdi,
+					 next, &plchan->desc_list, node) {
+			list_del(&txdi->node);
+			pl08x_free_txd(pl08x, txdi);
+		}
+
+	}
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static int pl08x_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+}
+
+/*
+ * This should be called with the channel plchan->lock held
+ */
+static int prep_phy_channel(struct pl08x_dma_chan *plchan,
+			    struct pl08x_txd *txd)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *ch;
+	int ret;
+
+	/* Check if we already have a channel */
+	if (plchan->phychan)
+		return 0;
+
+	ch = pl08x_get_phy_channel(pl08x, plchan);
+	if (!ch) {
+		/* No physical channel available, cope with it */
+		dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+		return -EBUSY;
+	}
+
+	/*
+	 * OK we have a physical channel: for memcpy() this is all we
+	 * need, but for slaves the physical signals may be muxed!
+	 * Can the platform allow us to use this channel?
+	 */
+	if (plchan->slave &&
+	    ch->signal < 0 &&
+	    pl08x->pd->get_signal) {
+		ret = pl08x->pd->get_signal(plchan);
+		if (ret < 0) {
+			dev_dbg(&pl08x->adev->dev,
+				"unable to use physical channel %d for transfer on %s due to platform restrictions\n",
+				ch->id, plchan->name);
+			/* Release physical channel & return */
+			pl08x_put_phy_channel(pl08x, ch);
+			return -EBUSY;
+		}
+		ch->signal = ret;
+	}
+
+	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
+		 ch->id,
+		 ch->signal,
+		 plchan->name);
+
+	plchan->phychan = ch;
+
+	return 0;
+}
+
+static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
+
+	atomic_inc(&plchan->last_issued);
+	tx->cookie = atomic_read(&plchan->last_issued);
+	/* This unlock follows the lock in the prep() function */
+	spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+
+	return tx->cookie;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *retval = NULL;
+
+	return retval;
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop
+ * may give problems - could schedule where indicated.
+ * If slaves are relying on interrupts to signal completion this
+ * function must not be called with interrupts disabled
+ */
+static enum dma_status
+pl08x_dma_tx_status(struct dma_chan *chan,
+		    dma_cookie_t cookie,
+		    struct dma_tx_state *txstate)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+	u32 bytesleft = 0;
+
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS) {
+		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		return ret;
+	}
+
+	/*
+	 * schedule(); could be inserted here
+	 */
+
+	/*
+	 * This cookie not complete yet
+	 */
+	last_used = atomic_read(&plchan->last_issued);
+	last_complete = plchan->lc;
+
+	/* Get number of bytes left in the active transactions and queue */
+	bytesleft = pl08x_getbytes_chan(plchan);
+
+	dma_set_tx_state(txstate, last_complete, last_used,
+			 bytesleft);
+
+	if (plchan->state == PL08X_CHAN_PAUSED)
+		return DMA_PAUSED;
+
+	/* Whether waiting or running, we're in progress */
+	return DMA_IN_PROGRESS;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+	int burstwords;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burstwords = 256,
+		.reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 128,
+		.reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 64,
+		.reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 32,
+		.reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 16,
+		.reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 8,
+		.reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 4,
+		.reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+	{
+		.burstwords = 1,
+		.reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
+	},
+};
+
+static void dma_set_runtime_config(struct dma_chan *chan,
+			       struct dma_slave_config *config)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_channel_data *cd = plchan->cd;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 cctl = 0;
+	/* Mask out all except src and dst channel */
+	u32 ccfg = cd->ccfg & 0x000003DEU;
+	int i = 0;
+
+	/* Transfer direction */
+	plchan->runtime_direction = config->direction;
+	if (config->direction == DMA_TO_DEVICE) {
+		plchan->runtime_addr = config->dst_addr;
+		cctl |= PL080_CONTROL_SRC_INCR;
+		ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_FROM_DEVICE) {
+		plchan->runtime_addr = config->src_addr;
+		cctl |= PL080_CONTROL_DST_INCR;
+		ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien transfer direction\n");
+		return;
+	}
+
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
+			(PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
+		break;
+	default:
+		dev_err(&pl08x->adev->dev,
+			"bad runtime_config: alien address width\n");
+		return;
+	}
+
+	/*
+	 * Now decide on a maxburst:
+	 * If this channel will only request single transfers, set
+	 * this down to ONE element.
+	 */
+	if (plchan->cd->single) {
+		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
+			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
+	} else {
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burstwords <= maxburst)
+				break;
+			i++;
+		}
+		cctl |= burst_sizes[i].reg;
+	}
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	cctl &= ~PL080_CONTROL_PROT_MASK;
+	cctl |= PL080_CONTROL_PROT_SYS;
+
+	/* Modify the default channel data to fit PrimeCell request */
+	cd->cctl = cctl;
+	cd->ccfg = ccfg;
+
+	dev_dbg(&pl08x->adev->dev,
+		"configured channel %s (%s) for %s, data width %d, "
+		"maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n",
+		dma_chan_name(chan), plchan->name,
+		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		addr_width,
+		maxburst,
+		cctl, ccfg);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->lock, flags);
+	/* Something is already active */
+	if (plchan->at) {
+			spin_unlock_irqrestore(&plchan->lock, flags);
+			return;
+	}
+
+	/* Didn't get a physical channel so waiting for it ... */
+	if (plchan->state == PL08X_CHAN_WAITING)
+		return;
+
+	/* Take the first element in the queue and execute it */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		plchan->state = PL08X_CHAN_RUNNING;
+
+		/* Configure the physical channel for the active txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+}
+
+static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
+					struct pl08x_txd *txd)
+{
+	int num_llis;
+	struct pl08x_driver_data *pl08x = plchan->host;
+	int ret;
+
+	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
+
+	if (!num_llis)
+		return -EINVAL;
+
+	spin_lock_irqsave(&plchan->lock, plchan->lockflags);
+
+	/*
+	 * If this device is not using a circular buffer then
+	 * queue this new descriptor for transfer.
+	 * The descriptor for a circular buffer continues
+	 * to be used until the channel is freed.
+	 */
+	if (txd->cd->circular_buffer)
+		dev_err(&pl08x->adev->dev,
+			"%s attempting to queue a circular buffer\n",
+			__func__);
+	else
+		list_add_tail(&txd->node,
+			      &plchan->desc_list);
+
+	/*
+	 * See if we already have a physical channel allocated,
+	 * else this is the time to try to get one.
+	 */
+	ret = prep_phy_channel(plchan, txd);
+	if (ret) {
+		/*
+		 * No physical channel available, we will
+		 * stack up the memcpy channels until there is a channel
+		 * available to handle it whereas slave transfers may
+		 * have been denied due to platform channel muxing restrictions
+		 * and since there is no guarantee that this will ever be
+		 * resolved, and since the signal must be aquired AFTER
+		 * aquiring the physical channel, we will let them be NACK:ed
+		 * with -EBUSY here. The drivers can alway retry the prep()
+		 * call if they are eager on doing this using DMA.
+		 */
+		if (plchan->slave) {
+			pl08x_free_txd_list(pl08x, plchan);
+			spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+			return -EBUSY;
+		}
+		/* Do this memcpy whenever there is a channel ready */
+		plchan->state = PL08X_CHAN_WAITING;
+		plchan->waiting = txd;
+	} else
+		/*
+		 * Else we're all set, paused and ready to roll,
+		 * status will switch to PL08X_CHAN_RUNNING when
+		 * we call issue_pending(). If there is something
+		 * running on the channel already we don't change
+		 * its state.
+		 */
+		if (plchan->state == PL08X_CHAN_IDLE)
+			plchan->state = PL08X_CHAN_PAUSED;
+
+	/*
+	 * Notice that we leave plchan->lock locked on purpose:
+	 * it will be unlocked in the subsequent tx_submit()
+	 * call. This is a consequence of the current API.
+	 */
+
+	return 0;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+	txd->direction = DMA_NONE;
+	txd->srcbus.addr = src;
+	txd->dstbus.addr = dest;
+
+	/* Set platform data for m2m */
+	txd->cd = &pl08x->pd->memcpy_channel;
+	/* Both to be incremented or the code will break */
+	txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = len;
+
+	INIT_LIST_HEAD(&txd->node);
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret;
+
+	/*
+	 * Current implementation ASSUMES only one sg
+	 */
+	if (sg_len != 1) {
+		dev_err(&pl08x->adev->dev, "%s prepared too long sglist\n",
+			__func__);
+		BUG();
+	}
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+		__func__, sgl->length, plchan->name);
+
+	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&txd->tx, chan);
+
+	if (direction != plchan->runtime_direction)
+		dev_err(&pl08x->adev->dev, "%s DMA setup does not match "
+			"the direction configured for the PrimeCell\n",
+			__func__);
+
+	/*
+	 * Set up addresses, the PrimeCell configured address
+	 * will take precedence since this may configure the
+	 * channel target address dynamically at runtime.
+	 */
+	txd->direction = direction;
+	if (direction == DMA_TO_DEVICE) {
+		txd->srcbus.addr = sgl->dma_address;
+		if (plchan->runtime_addr)
+			txd->dstbus.addr = plchan->runtime_addr;
+		else
+			txd->dstbus.addr = plchan->cd->addr;
+	} else if (direction == DMA_FROM_DEVICE) {
+		if (plchan->runtime_addr)
+			txd->srcbus.addr = plchan->runtime_addr;
+		else
+			txd->srcbus.addr = plchan->cd->addr;
+		txd->dstbus.addr = sgl->dma_address;
+	} else {
+		dev_err(&pl08x->adev->dev,
+			"%s direction unsupported\n", __func__);
+		return NULL;
+	}
+	txd->cd = plchan->cd;
+	txd->tx.tx_submit = pl08x_tx_submit;
+	txd->tx.callback = NULL;
+	txd->tx.callback_param = NULL;
+	txd->len = sgl->length;
+	INIT_LIST_HEAD(&txd->node);
+
+	ret = pl08x_prep_channel_resources(plchan, txd);
+	if (ret)
+		return NULL;
+	/*
+	 * NB: the channel lock is held at this point so tx_submit()
+	 * must be called in direct succession.
+	 */
+
+	return &txd->tx;
+}
+
+static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			 unsigned long arg)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	/* Controls applicable to inactive channels */
+	if (cmd == DMA_SLAVE_CONFIG) {
+		dma_set_runtime_config(chan,
+				       (struct dma_slave_config *)
+				       arg);
+		return 0;
+	}
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
+		return 0;
+	}
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		plchan->state = PL08X_CHAN_IDLE;
+
+		if (plchan->phychan) {
+			pl08x_stop_phy_chan(plchan->phychan);
+
+			/*
+			 * Mark physical channel as free and free any slave
+			 * signal
+			 */
+			if ((plchan->phychan->signal >= 0) &&
+			    pl08x->pd->put_signal) {
+				pl08x->pd->put_signal(plchan);
+				plchan->phychan->signal = -1;
+			}
+			pl08x_put_phy_channel(pl08x, plchan->phychan);
+			plchan->phychan = NULL;
+		}
+		/* Stop any pending tasklet */
+		tasklet_disable(&plchan->tasklet);
+		/* Dequeue jobs and free LLIs */
+		if (plchan->at) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/* Dequeue jobs not yet fired as well */
+		pl08x_free_txd_list(pl08x, plchan);
+		break;
+	case DMA_PAUSE:
+		pl08x_pause_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_PAUSED;
+		break;
+	case DMA_RESUME:
+		pl08x_resume_phy_chan(plchan->phychan);
+		plchan->state = PL08X_CHAN_RUNNING;
+		break;
+	default:
+		/* Unknown command */
+		ret = -ENXIO;
+		break;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	return ret;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	char *name = chan_id;
+
+	/* Check that the channel is not taken! */
+	if (!strcmp(plchan->name, name))
+		return true;
+
+	return false;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of
+ * physical channels actually used, if it is zero... well
+ * shut it off. That will save some power. Cut the clock
+ * at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+	u32 val;
+
+	val = readl(pl08x->base + PL080_CONFIG);
+	val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE);
+	/* We implictly clear bit 1 and that means little-endian mode */
+	val |= PL080_CONFIG_ENABLE;
+	writel(val, pl08x->base + PL080_CONFIG);
+}
+
+static void pl08x_tasklet(unsigned long data)
+{
+	struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if (!plchan)
+		BUG();
+
+	spin_lock(&plchan->lock);
+
+	if (plchan->at) {
+		dma_async_tx_callback callback =
+			plchan->at->tx.callback;
+		void *callback_param =
+			plchan->at->tx.callback_param;
+
+		/*
+		 * Update last completed
+		 */
+		plchan->lc =
+			(plchan->at->tx.cookie);
+
+		/*
+		 * Callback to signal completion
+		 */
+		if (callback)
+			callback(callback_param);
+
+		/*
+		 * Device callbacks should NOT clear
+		 * the current transaction on the channel
+		 * Linus: sometimes they should?
+		 */
+		if (!plchan->at)
+			BUG();
+
+		/*
+		 * Free the descriptor if it's not for a device
+		 * using a circular buffer
+		 */
+		if (!plchan->at->cd->circular_buffer) {
+			pl08x_free_txd(pl08x, plchan->at);
+			plchan->at = NULL;
+		}
+		/*
+		 * else descriptor for circular
+		 * buffers only freed when
+		 * client has disabled dma
+		 */
+	}
+	/*
+	 * If a new descriptor is queued, set it up
+	 * plchan->at is NULL here
+	 */
+	if (!list_empty(&plchan->desc_list)) {
+		struct pl08x_txd *next;
+
+		next = list_first_entry(&plchan->desc_list,
+					struct pl08x_txd,
+					node);
+		list_del(&next->node);
+		plchan->at = next;
+		/* Configure the physical channel for the next txd */
+		pl08x_config_phychan_for_txd(plchan);
+		pl08x_set_cregs(pl08x, plchan->phychan);
+		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+	} else {
+		struct pl08x_dma_chan *waiting = NULL;
+
+		/*
+		 * No more jobs, so free up the physical channel
+		 * Free any allocated signal on slave transfers too
+		 */
+		if ((phychan->signal >= 0) && pl08x->pd->put_signal) {
+			pl08x->pd->put_signal(plchan);
+			phychan->signal = -1;
+		}
+		pl08x_put_phy_channel(pl08x, phychan);
+		plchan->phychan = NULL;
+		plchan->state = PL08X_CHAN_IDLE;
+
+		/*
+		 * And NOW before anyone else can grab that free:d
+		 * up physical channel, see if there is some memcpy
+		 * pending that seriously needs to start because of
+		 * being stacked up while we were choking the
+		 * physical channels with data.
+		 */
+		list_for_each_entry(waiting, &pl08x->memcpy.channels,
+				    chan.device_node) {
+		  if (waiting->state == PL08X_CHAN_WAITING &&
+			    waiting->waiting != NULL) {
+				int ret;
+
+				/* This should REALLY not fail now */
+				ret = prep_phy_channel(waiting,
+						       waiting->waiting);
+				BUG_ON(ret);
+				waiting->state = PL08X_CHAN_RUNNING;
+				waiting->waiting = NULL;
+				pl08x_issue_pending(&waiting->chan);
+				break;
+			}
+		}
+	}
+
+	spin_unlock(&plchan->lock);
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+	struct pl08x_driver_data *pl08x = dev;
+	u32 mask = 0;
+	u32 val;
+	int i;
+
+	val = readl(pl08x->base + PL080_ERR_STATUS);
+	if (val) {
+		/*
+		 * An error interrupt (on one or more channels)
+		 */
+		dev_err(&pl08x->adev->dev,
+			"%s error interrupt, register value 0x%08x\n",
+				__func__, val);
+		/*
+		 * Simply clear ALL PL08X error interrupts,
+		 * regardless of channel and cause
+		 * FIXME: should be 0x00000003 on PL081 really.
+		 */
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	}
+	val = readl(pl08x->base + PL080_INT_STATUS);
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		if ((1 << i) & val) {
+			/* Locate physical channel */
+			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+			struct pl08x_dma_chan *plchan = phychan->serving;
+
+			/* Schedule tasklet on this channel */
+			tasklet_schedule(&plchan->tasklet);
+
+			mask |= (1 << i);
+		}
+	}
+	/*
+	 * Clear only the terminal interrupts on channels we processed
+	 */
+	writel(mask, pl08x->base + PL080_TC_CLEAR);
+
+	return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+					   struct dma_device *dmadev,
+					   unsigned int channels,
+					   bool slave)
+{
+	struct pl08x_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = kzalloc(sizeof(struct pl08x_dma_chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(&pl08x->adev->dev,
+				"%s no memory for channel\n", __func__);
+			return -ENOMEM;
+		}
+
+		chan->host = pl08x;
+		chan->state = PL08X_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = pl08x->pd->slave_channels[i].bus_id;
+			chan->cd = &pl08x->pd->slave_channels[i];
+		} else {
+			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+		}
+		dev_info(&pl08x->adev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->chan.device = dmadev;
+		atomic_set(&chan->last_issued, 0);
+		chan->lc = atomic_read(&chan->last_issued);
+
+		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->desc_list);
+		tasklet_init(&chan->tasklet, pl08x_tasklet,
+			     (unsigned long) chan);
+
+		list_add_tail(&chan->chan.device_node, &dmadev->channels);
+	}
+	dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct pl08x_dma_chan *chan = NULL;
+	struct pl08x_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, chan.device_node) {
+		list_del(&chan->chan.device_node);
+		kfree(chan);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+	switch (state) {
+	case PL08X_CHAN_IDLE:
+		return "idle";
+	case PL08X_CHAN_RUNNING:
+		return "running";
+	case PL08X_CHAN_PAUSED:
+		return "paused";
+	case PL08X_CHAN_WAITING:
+		return "waiting";
+	default:
+		break;
+	}
+	return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+	struct pl08x_driver_data *pl08x = s->private;
+	struct pl08x_dma_chan *chan;
+	struct pl08x_phy_chan *ch;
+	unsigned long flags;
+	int i;
+
+	seq_printf(s, "PL08x physical channels:\n");
+	seq_printf(s, "CHANNEL:\tUSER:\n");
+	seq_printf(s, "--------\t-----\n");
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		struct pl08x_dma_chan *virt_chan;
+
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+		virt_chan = ch->serving;
+
+		seq_printf(s, "%d\t\t%s\n",
+			   ch->id, virt_chan ? virt_chan->name : "(none)");
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	seq_printf(s, "\nPL08x virtual slave channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) {
+		seq_printf(s, "%s\t\t\%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	return 0;
+}
+
+static int pl08x_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pl08x_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations pl08x_debugfs_operations = {
+	.open		= pl08x_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO,
+				   NULL, pl08x,
+				   &pl08x_debugfs_operations);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct pl08x_driver_data *pl08x;
+	struct vendor_data *vd = id->data;
+	int ret = 0;
+	int i;
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret)
+		return ret;
+
+	/* Create the driver state holder */
+	pl08x = kzalloc(sizeof(struct pl08x_driver_data), GFP_KERNEL);
+	if (!pl08x) {
+		ret = -ENOMEM;
+		goto out_no_pl08x;
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+	pl08x->memcpy.dev = &adev->dev;
+	pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+	pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+	pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+	pl08x->memcpy.device_control = pl08x_control;
+
+	/* Initialize slave engine */
+	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	pl08x->slave.dev = &adev->dev;
+	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
+	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+	pl08x->slave.device_issue_pending = pl08x_issue_pending;
+	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_control = pl08x_control;
+
+	/* Get the platform data */
+	pl08x->pd = dev_get_platdata(&adev->dev);
+	if (!pl08x->pd) {
+		dev_err(&adev->dev, "no platform data supplied\n");
+		goto out_no_platdata;
+	}
+
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
+	/* A DMA memory pool for LLIs, align on 1-byte boundary */
+	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+	if (!pl08x->pool) {
+		ret = -ENOMEM;
+		goto out_no_lli_pool;
+	}
+
+	spin_lock_init(&pl08x->lock);
+
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	/* Turn on the PL08x */
+	pl08x_ensure_on(pl08x);
+
+	/*
+	 * Attach the interrupt handler
+	 */
+	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
+			  vd->name, pl08x);
+	if (ret) {
+		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+			__func__, adev->irq[0]);
+		goto out_no_irq;
+	}
+
+	/* Initialize physical channels */
+	pl08x->phy_chans = kmalloc((vd->channels * sizeof(struct pl08x_phy_chan)),
+			GFP_KERNEL);
+	if (!pl08x->phy_chans) {
+		dev_err(&adev->dev, "%s failed to allocate "
+			"physical channel holders\n",
+			__func__);
+		goto out_no_phychans;
+	}
+
+	for (i = 0; i < vd->channels; i++) {
+		struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+		ch->id = i;
+		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		spin_lock_init(&ch->lock);
+		ch->serving = NULL;
+		ch->signal = -1;
+		dev_info(&adev->dev,
+			 "physical channel %d is %s\n", i,
+			 pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+	}
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+					      pl08x->vd->channels, false);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto out_no_memcpy;
+	}
+	pl08x->memcpy.chancnt = ret;
+
+	/* Register slave channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+					      pl08x->pd->num_slave_channels,
+					      true);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto out_no_slave;
+	}
+	pl08x->slave.chancnt = ret;
+
+	ret = dma_async_device_register(&pl08x->memcpy);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto out_no_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&pl08x->slave);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto out_no_slave_reg;
+	}
+
+	amba_set_drvdata(adev, pl08x);
+	init_pl08x_debugfs(pl08x);
+	dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n",
+		vd->name, adev->res.start);
+	return 0;
+
+out_no_slave_reg:
+	dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+	pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+	pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+	kfree(pl08x->phy_chans);
+out_no_phychans:
+	free_irq(adev->irq[0], pl08x);
+out_no_irq:
+	iounmap(pl08x->base);
+out_no_ioremap:
+	dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+	kfree(pl08x);
+out_no_pl08x:
+	amba_release_regions(adev);
+	return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+	.name = "PL080",
+	.channels = 8,
+	.dualmaster = true,
+};
+
+static struct vendor_data vendor_pl081 = {
+	.name = "PL081",
+	.channels = 2,
+	.dualmaster = false,
+};
+
+static struct amba_id pl08x_ids[] = {
+	/* PL080 */
+	{
+		.id	= 0x00041080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl080,
+	},
+	/* PL081 */
+	{
+		.id	= 0x00041081,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl081,
+	},
+	/* Nomadik 8815 PL080 variant */
+	{
+		.id	= 0x00280880,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_pl080,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl08x_amba_driver = {
+	.drv.name	= DRIVER_NAME,
+	.id_table	= pl08x_ids,
+	.probe		= pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+	int retval;
+	retval = amba_driver_register(&pl08x_amba_driver);
+	if (retval)
+		printk(KERN_WARNING DRIVER_NAME
+		       "failed to register as an amba device (%d)\n",
+		       retval);
+	return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
new file mode 100644
index 000000000000..521a0f8974ac
--- /dev/null
+++ b/include/linux/amba/pl08x.h
@@ -0,0 +1,222 @@
+/*
+ * linux/amba/pl08x.h - ARM PrimeCell DMA Controller driver
+ *
+ * Copyright (C) 2005 ARM Ltd
+ * Copyright (C) 2010 ST-Ericsson SA
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * pl08x information required by platform code
+ *
+ * Please credit ARM.com
+ * Documentation: ARM DDI 0196D
+ *
+ */
+
+#ifndef AMBA_PL08X_H
+#define AMBA_PL08X_H
+
+/* We need sizes of structs from this header */
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+/**
+ * struct pl08x_channel_data - data structure to pass info between
+ * platform and PL08x driver regarding channel configuration
+ * @bus_id: name of this device channel, not just a device name since
+ * devices may have more than one channel e.g. "foo_tx"
+ * @min_signal: the minimum DMA signal number to be muxed in for this
+ * channel (for platforms supporting muxed signals). If you have
+ * static assignments, make sure this is set to the assigned signal
+ * number, PL08x have 16 possible signals in number 0 thru 15 so
+ * when these are not enough they often get muxed (in hardware)
+ * disabling simultaneous use of the same channel for two devices.
+ * @max_signal: the maximum DMA signal number to be muxed in for
+ * the channel. Set to the same as min_signal for
+ * devices with static assignments
+ * @muxval: a number usually used to poke into some mux regiser to
+ * mux in the signal to this channel
+ * @cctl_opt: default options for the channel control register
+ * @addr: source/target address in physical memory for this DMA channel,
+ * can be the address of a FIFO register for burst requests for example.
+ * This can be left undefined if the PrimeCell API is used for configuring
+ * this.
+ * @circular_buffer: whether the buffer passed in is circular and
+ * shall simply be looped round round (like a record baby round
+ * round round round)
+ * @single: the device connected to this channel will request single
+ * DMA transfers, not bursts. (Bursts are default.)
+ */
+struct pl08x_channel_data {
+	char *bus_id;
+	int min_signal;
+	int max_signal;
+	u32 muxval;
+	u32 cctl;
+	u32 ccfg;
+	dma_addr_t addr;
+	bool circular_buffer;
+	bool single;
+};
+
+/**
+ * Struct pl08x_bus_data - information of source or destination
+ * busses for a transfer
+ * @addr: current address
+ * @maxwidth: the maximum width of a transfer on this bus
+ * @buswidth: the width of this bus in bytes: 1, 2 or 4
+ * @fill_bytes: bytes required to fill to the next bus memory
+ * boundary
+ */
+struct pl08x_bus_data {
+	dma_addr_t addr;
+	u8 maxwidth;
+	u8 buswidth;
+	u32 fill_bytes;
+};
+
+/**
+ * struct pl08x_phy_chan - holder for the physical channels
+ * @id: physical index to this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @signal: the physical signal (aka channel) serving this
+ * physical channel right now
+ * @serving: the virtual channel currently being served by this
+ * physical channel
+ */
+struct pl08x_phy_chan {
+	unsigned int id;
+	void __iomem *base;
+	spinlock_t lock;
+	int signal;
+	struct pl08x_dma_chan *serving;
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+	u32 ccfg;
+};
+
+/**
+ * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
+ * @llis_bus: DMA memory address (physical) start for the LLIs
+ * @llis_va: virtual memory address start for the LLIs
+ */
+struct pl08x_txd {
+	struct dma_async_tx_descriptor tx;
+	struct list_head node;
+	enum dma_data_direction	direction;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	int len;
+	dma_addr_t llis_bus;
+	void *llis_va;
+	struct pl08x_channel_data *cd;
+	bool active;
+	/*
+	 * Settings to be put into the physical channel when we
+	 * trigger this txd
+	 */
+	u32 csrc;
+	u32 cdst;
+	u32 clli;
+	u32 cctl;
+};
+
+/**
+ * struct pl08x_dma_chan_state - holds the PL08x specific virtual
+ * channel states
+ * @PL08X_CHAN_IDLE: the channel is idle
+ * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
+ * channel, but the transfer is currently paused
+ * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum pl08x_dma_chan_state {
+	PL08X_CHAN_IDLE,
+	PL08X_CHAN_RUNNING,
+	PL08X_CHAN_PAUSED,
+	PL08X_CHAN_WAITING,
+};
+
+/**
+ * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
+ * @chan: wrappped abstract channel
+ * @phychan: the physical channel utilized by this channel, if there is one
+ * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
+ * @name: name of channel
+ * @cd: channel platform data
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @runtime_direction: current direction of this channel according to
+ * runtime config
+ * @lc: last completed transaction on this channel
+ * @desc_list: queued transactions pending on this channel
+ * @at: active transaction on this channel
+ * @lockflags: sometimes we let a lock last between two function calls,
+ * especially prep/submit, and then we need to store the IRQ flags
+ * in the channel state, here
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, paused, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ * @waiting: a TX descriptor on this channel which is waiting for
+ * a physical channel to become available
+ */
+struct pl08x_dma_chan {
+	struct dma_chan chan;
+	struct pl08x_phy_chan *phychan;
+	struct tasklet_struct tasklet;
+	char *name;
+	struct pl08x_channel_data *cd;
+	dma_addr_t runtime_addr;
+	enum dma_data_direction	runtime_direction;
+	atomic_t last_issued;
+	dma_cookie_t lc;
+	struct list_head desc_list;
+	struct pl08x_txd *at;
+	unsigned long lockflags;
+	spinlock_t lock;
+	void *host;
+	enum pl08x_dma_chan_state state;
+	bool slave;
+	struct pl08x_txd *waiting;
+};
+
+/**
+ * struct pl08x_platform_data - the platform configuration for the
+ * PL08x PrimeCells.
+ * @slave_channels: the channels defined for the different devices on the
+ * platform, all inclusive, including multiplexed channels. The available
+ * physical channels will be multiplexed around these signals as they
+ * are requested, just enumerate all possible channels.
+ * @get_signal: request a physical signal to be used for a DMA
+ * transfer immediately: if there is some multiplexing or similar blocking
+ * the use of the channel the transfer can be denied by returning
+ * less than zero, else it returns the allocated signal number
+ * @put_signal: indicate to the platform that this physical signal is not
+ * running any DMA transfer and multiplexing can be recycled
+ * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the
+ * LLI addresses are on 0/1 Master 1/2.
+ */
+struct pl08x_platform_data {
+	struct pl08x_channel_data *slave_channels;
+	unsigned int num_slave_channels;
+	struct pl08x_channel_data memcpy_channel;
+	int (*get_signal)(struct pl08x_dma_chan *);
+	void (*put_signal)(struct pl08x_dma_chan *);
+};
+
+#ifdef CONFIG_AMBA_PL08X
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id);
+#else
+static inline bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	return false;
+}
+#endif
+
+#endif	/* AMBA_PL08X_H */
-- 
cgit v1.2.3


From a64de47c091e4a337fa9763315cb6f2fbf0c583b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 28 Sep 2010 17:08:02 +0000
Subject: arp: remove unnecessary export of arp_broken_ops

arp_broken_ops is only used in arp.c

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h | 2 --
 net/ipv4/arp.c    | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/arp.h b/include/net/arp.h
index 716f43c5c98e..f4cf6ce66586 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -26,6 +26,4 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 				  const unsigned char *target_hw);
 extern void arp_xmit(struct sk_buff *skb);
 
-extern const struct neigh_ops arp_broken_ops;
-
 #endif	/* _ARP_H */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4083c186fd30..d9031ad67826 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-const struct neigh_ops arp_broken_ops = {
+static const struct neigh_ops arp_broken_ops = {
 	.family =		AF_INET,
 	.solicit =		arp_solicit,
 	.error_report =		arp_error_report,
@@ -170,7 +170,6 @@ const struct neigh_ops arp_broken_ops = {
 	.hh_output =		dev_queue_xmit,
 	.queue_xmit =		dev_queue_xmit,
 };
-EXPORT_SYMBOL(arp_broken_ops);
 
 struct neigh_table arp_tbl = {
 	.family		= AF_INET,
-- 
cgit v1.2.3


From 1b9f409293529da4630bfc5d6d8e7d7451a6ccb5 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 28 Sep 2010 19:30:14 +0000
Subject: tcp: tcp_enter_quickack_mode can be static

Function only used in tcp_input.c

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 2 --
 net/ipv4/tcp_input.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 914a60c7ad62..4fee0424af7e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
 	}
 }
 
-extern void tcp_enter_quickack_mode(struct sock *sk);
-
 #define	TCP_ECN_OK		1
 #define	TCP_ECN_QUEUE_CWR	2
 #define	TCP_ECN_DEMAND_CWR	4
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fabc09a58d7f..eaf20e7e61da 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk)
 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
 
-void tcp_enter_quickack_mode(struct sock *sk)
+static void tcp_enter_quickack_mode(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	tcp_incr_quickack(sk);
-- 
cgit v1.2.3


From 65836112fc24bdf009554481b36b6ba0a690b855 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 28 Sep 2010 20:20:28 +0200
Subject: wl12xx: fix non-wl12xx build scenarios

Support building wl1271-equipped boards without building the
wl1271 driver itself, e.g.:

CONFIG_MACH_OMAP_ZOOM3=y
CONFIG_WL12XX is not set

Reported-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
---
 include/linux/wl12xx.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 95deae3968f4..4f902e1908aa 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -32,7 +32,20 @@ struct wl12xx_platform_data {
 	int board_ref_clock;
 };
 
+#ifdef CONFIG_WL12XX_PLATFORM_DATA
+
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
+
+#else
+
+static inline
+int wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
+{
+	return -ENOSYS;
+}
+
+#endif
+
 const struct wl12xx_platform_data *wl12xx_get_platform_data(void);
 
 #endif
-- 
cgit v1.2.3


From e454c844644683571617896ab2a4ce0109c1943e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 21 Sep 2010 16:31:11 -0300
Subject: Bluetooth: Fix deadlock in the ERTM logic

The Enhanced Retransmission Mode(ERTM) is a realiable mode of operation
of the Bluetooth L2CAP layer. Think on it like a simplified version of
TCP.
The problem we were facing here was a deadlock. ERTM uses a backlog
queue to queue incomimg packets while the user is helding the lock. At
some moment the sk_sndbuf can be exceeded and we can't alloc new skbs
then the code sleep with the lock to wait for memory, that stalls the
ERTM connection once we can't read the acknowledgements packets in the
backlog queue to free memory and make the allocation of outcoming skb
successful.

This patch actually affect all users of bt_skb_send_alloc(), i.e., all
L2CAP modes and SCO.

We are safe against socket states changes or channels deletion while the
we are sleeping wait memory. Checking for the sk->sk_err and
sk->sk_shutdown make the code safe, since any action that can leave the
socket or the channel in a not usable state set one of the struct
members at least. Then we can check both of them when getting the lock
again and return with the proper error if something unexpected happens.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Ulisses Furquim <ulisses@profusion.mobi>
---
 include/net/bluetooth/bluetooth.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 27a902d9b3a9..30fce0128dd7 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l
 {
 	struct sk_buff *skb;
 
+	release_sock(sk);
 	if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) {
 		skb_reserve(skb, BT_SKB_RESERVE);
 		bt_cb(skb)->incoming  = 0;
 	}
+	lock_sock(sk);
+
+	if (!skb && *err)
+		return NULL;
+
+	*err = sock_error(sk);
+	if (*err)
+		goto out;
+
+	if (sk->sk_shutdown) {
+		*err = -ECONNRESET;
+		goto out;
+	}
 
 	return skb;
+
+out:
+	kfree_skb(skb);
+	return NULL;
 }
 
 int bt_err(__u16 code);
-- 
cgit v1.2.3


From 29d08b3efddca628b0360411ab2b85f7b1723f48 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 27 Sep 2010 16:17:17 +1000
Subject: drm/gem: handlecount isn't really a kref so don't make it one.

There were lots of places being inconsistent since handle count
looked like a kref but it really wasn't.

Fix this my just making handle count an atomic on the object,
and have it increase the normal object kref.

Now i915/radeon/nouveau drivers can drop the normal reference on
userspace object creation, and have the handle hold it.

This patch fixes a memory leak or corruption on unload, because
the driver had no way of knowing if a handle had been actually
added for this object, and the fbcon object needed to know this
to clean itself up properly.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c                  |  8 ++------
 drivers/gpu/drm/drm_info.c                 |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            |  6 ++----
 drivers/gpu/drm/i915/intel_fb.c            |  4 +++-
 drivers/gpu/drm/nouveau/nouveau_fbcon.c    |  1 +
 drivers/gpu/drm/nouveau/nouveau_gem.c      |  6 ++----
 drivers/gpu/drm/nouveau/nouveau_notifier.c |  1 +
 drivers/gpu/drm/radeon/radeon_display.c    |  3 ++-
 drivers/gpu/drm/radeon/radeon_fb.c         | 14 ++++----------
 drivers/gpu/drm/radeon/radeon_gem.c        |  4 ++--
 include/drm/drmP.h                         | 18 +++++++++++++-----
 11 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6fe2cd298c12..f7e61be8430a 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -148,7 +148,7 @@ int drm_gem_object_init(struct drm_device *dev,
 		return -ENOMEM;
 
 	kref_init(&obj->refcount);
-	kref_init(&obj->handlecount);
+	atomic_set(&obj->handle_count, 0);
 	obj->size = size;
 
 	atomic_inc(&dev->object_count);
@@ -496,12 +496,8 @@ static void drm_gem_object_ref_bug(struct kref *list_kref)
  * called before drm_gem_object_free or we'll be touching
  * freed memory
  */
-void
-drm_gem_object_handle_free(struct kref *kref)
+void drm_gem_object_handle_free(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = container_of(kref,
-						  struct drm_gem_object,
-						  handlecount);
 	struct drm_device *dev = obj->dev;
 
 	/* Remove any name for this object */
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 2ef2c7827243..974e970ce3f8 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -255,7 +255,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data)
 
 	seq_printf(m, "%6d %8zd %7d %8d\n",
 		   obj->name, obj->size,
-		   atomic_read(&obj->handlecount.refcount),
+		   atomic_read(&obj->handle_count),
 		   atomic_read(&obj->refcount.refcount));
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf4ffbee1c00..4cdf74264ee8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -136,14 +136,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 
 	ret = drm_gem_handle_create(file_priv, obj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(obj);
 	if (ret) {
-		drm_gem_object_unreference_unlocked(obj);
 		return ret;
 	}
 
-	/* Sink the floating reference from kref_init(handlecount) */
-	drm_gem_object_handle_unreference_unlocked(obj);
-
 	args->handle = handle;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 7bdc96256bf5..56ad9df2ccb5 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -237,8 +237,10 @@ int intel_fbdev_destroy(struct drm_device *dev,
 	drm_fb_helper_fini(&ifbdev->helper);
 
 	drm_framebuffer_cleanup(&ifb->base);
-	if (ifb->obj)
+	if (ifb->obj) {
+		drm_gem_object_handle_unreference(ifb->obj);
 		drm_gem_object_unreference(ifb->obj);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index dbd30b2e43fd..d2047713dc59 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -352,6 +352,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev)
 
 	if (nouveau_fb->nvbo) {
 		nouveau_bo_unmap(nouveau_fb->nvbo);
+		drm_gem_object_handle_unreference_unlocked(nouveau_fb->nvbo->gem);
 		drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem);
 		nouveau_fb->nvbo = NULL;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index ead7b8fc53fc..19620a6709f5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -167,11 +167,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 		goto out;
 
 	ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(nvbo->gem);
 out:
-	drm_gem_object_handle_unreference_unlocked(nvbo->gem);
-
-	if (ret)
-		drm_gem_object_unreference_unlocked(nvbo->gem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 3ec181ff50ce..3c9964a8fbad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -79,6 +79,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan)
 	mutex_lock(&dev->struct_mutex);
 	nouveau_bo_unpin(chan->notifier_bo);
 	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(chan->notifier_bo->gem);
 	drm_gem_object_unreference_unlocked(chan->notifier_bo->gem);
 	drm_mm_takedown(&chan->notifier_heap);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 7422f274615a..b92d2f2fcbed 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -843,8 +843,9 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
 
-	if (radeon_fb->obj)
+	if (radeon_fb->obj) {
 		drm_gem_object_unreference_unlocked(radeon_fb->obj);
+	}
 	drm_framebuffer_cleanup(fb);
 	kfree(radeon_fb);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index c74a8b20d941..9cdf6a35bc2c 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -94,8 +94,10 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj)
 	ret = radeon_bo_reserve(rbo, false);
 	if (likely(ret == 0)) {
 		radeon_bo_kunmap(rbo);
+		radeon_bo_unpin(rbo);
 		radeon_bo_unreserve(rbo);
 	}
+	drm_gem_object_handle_unreference(gobj);
 	drm_gem_object_unreference_unlocked(gobj);
 }
 
@@ -325,8 +327,6 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb
 {
 	struct fb_info *info;
 	struct radeon_framebuffer *rfb = &rfbdev->rfb;
-	struct radeon_bo *rbo;
-	int r;
 
 	if (rfbdev->helper.fbdev) {
 		info = rfbdev->helper.fbdev;
@@ -338,14 +338,8 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb
 	}
 
 	if (rfb->obj) {
-		rbo = rfb->obj->driver_private;
-		r = radeon_bo_reserve(rbo, false);
-		if (likely(r == 0)) {
-			radeon_bo_kunmap(rbo);
-			radeon_bo_unpin(rbo);
-			radeon_bo_unreserve(rbo);
-		}
-		drm_gem_object_unreference_unlocked(rfb->obj);
+		radeonfb_destroy_pinned_object(rfb->obj);
+		rfb->obj = NULL;
 	}
 	drm_fb_helper_fini(&rfbdev->helper);
 	drm_framebuffer_cleanup(&rfb->base);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index c578f265b24c..d1e595d91723 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -201,11 +201,11 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 		return r;
 	}
 	r = drm_gem_handle_create(filp, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {
-		drm_gem_object_unreference_unlocked(gobj);
 		return r;
 	}
-	drm_gem_object_handle_unreference_unlocked(gobj);
 	args->handle = handle;
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 774e1d49509b..07e4726a4ee0 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -612,7 +612,7 @@ struct drm_gem_object {
 	struct kref refcount;
 
 	/** Handle count of this object. Each handle also holds a reference */
-	struct kref handlecount;
+	atomic_t handle_count; /* number of handles on this object */
 
 	/** Related drm device */
 	struct drm_device *dev;
@@ -1461,7 +1461,7 @@ struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 int drm_gem_object_init(struct drm_device *dev,
 			struct drm_gem_object *obj, size_t size);
-void drm_gem_object_handle_free(struct kref *kref);
+void drm_gem_object_handle_free(struct drm_gem_object *obj);
 void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -1496,7 +1496,7 @@ static inline void
 drm_gem_object_handle_reference(struct drm_gem_object *obj)
 {
 	drm_gem_object_reference(obj);
-	kref_get(&obj->handlecount);
+	atomic_inc(&obj->handle_count);
 }
 
 static inline void
@@ -1505,12 +1505,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
 	if (obj == NULL)
 		return;
 
+	if (atomic_read(&obj->handle_count) == 0)
+		return;
 	/*
 	 * Must bump handle count first as this may be the last
 	 * ref, in which case the object would disappear before we
 	 * checked for a name
 	 */
-	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+	if (atomic_dec_and_test(&obj->handle_count))
+		drm_gem_object_handle_free(obj);
 	drm_gem_object_unreference(obj);
 }
 
@@ -1520,12 +1523,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
 	if (obj == NULL)
 		return;
 
+	if (atomic_read(&obj->handle_count) == 0)
+		return;
+
 	/*
 	* Must bump handle count first as this may be the last
 	* ref, in which case the object would disappear before we
 	* checked for a name
 	*/
-	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+
+	if (atomic_dec_and_test(&obj->handle_count))
+		drm_gem_object_handle_free(obj);
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-- 
cgit v1.2.3


From 6110a1f43c27b516e16d5ce8860fca50748c2a87 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 30 Sep 2010 21:19:07 -0400
Subject: intel_idle: Voluntary leave_mm before entering deeper

Avoid TLB flush IPIs for the cores in deeper c-states by voluntary leave_mm()
before entering into that state. CPUs tend to flush TLB in those c-states
anyways.

acpi_idle does this with C3-type states, but it was not caried over
when intel_idle was introduced.  intel_idle can apply it
to C-states in addition to those that ACPI might export as C3...

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 18 ++++++++++++++----
 include/linux/cpuidle.h   |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 96bf38097996..0906fc5b69b9 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -108,7 +108,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "NHM-C3",
 		.desc = "MWAIT 0x10",
 		.driver_data = (void *) 0x10,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
 		.power_usage = 500,
 		.target_residency = 80,
@@ -117,7 +117,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "NHM-C6",
 		.desc = "MWAIT 0x20",
 		.driver_data = (void *) 0x20,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.power_usage = 350,
 		.target_residency = 800,
@@ -149,7 +149,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "ATM-C4",
 		.desc = "MWAIT 0x30",
 		.driver_data = (void *) 0x30,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
 		.power_usage = 250,
 		.target_residency = 400,
@@ -159,7 +159,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
 		.name = "ATM-C6",
 		.desc = "MWAIT 0x40",
 		.driver_data = (void *) 0x40,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.power_usage = 150,
 		.target_residency = 800,
@@ -185,6 +185,16 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 
 	local_irq_disable();
 
+	/*
+	 * If the state flag indicates that the TLB will be flushed or if this
+	 * is the deepest c-state supported, do a voluntary leave mm to avoid
+	 * costly and mostly unnecessary wakeups for flushing the user TLB's
+	 * associated with the active mm.
+	 */
+	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED ||
+	    (&dev->states[dev->state_count - 1] == state))
+		leave_mm(cpu);
+
 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36ca9721a0c2..1be416bbbb82 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -53,6 +53,7 @@ struct cpuidle_state {
 #define CPUIDLE_FLAG_BALANCED	(0x40) /* medium latency, moderate savings */
 #define CPUIDLE_FLAG_DEEP	(0x80) /* high latency, large savings */
 #define CPUIDLE_FLAG_IGNORE	(0x100) /* ignore during this idle period */
+#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-- 
cgit v1.2.3


From 82efee1499a27c06f5afb11b07db384fdb3f7004 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Sep 2010 03:31:56 +0000
Subject: ipv4: introduce __ip_dev_find()

ip_dev_find(net, addr) finds a device given an IPv4 source address and
takes a reference on it.

Introduce __ip_dev_find(), taking a third argument, to optionally take
the device reference. Callers not asking the reference to be taken
should be in an rcu_read_lock() protected section.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |  7 ++++++-
 net/ipv4/fib_frontend.c    | 32 +++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 1ec09bb4a3ab..ccd5b07d678d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -159,7 +159,12 @@ struct in_ifaddr {
 extern int register_inetaddr_notifier(struct notifier_block *nb);
 extern int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern struct net_device *ip_dev_find(struct net *net, __be32 addr);
+extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
+static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
+{
+	return __ip_dev_find(net, addr, true);
+}
+
 extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
 extern int		devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
 extern void		devinet_init(void);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 981f3c59b334..4a69a957872b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -147,34 +147,40 @@ static void fib_flush(struct net *net)
 		rt_cache_flush(net, -1);
 }
 
-/*
- *	Find the first device with a given source address.
+/**
+ * __ip_dev_find - find the first device with a given source address.
+ * @net: the net namespace
+ * @addr: the source address
+ * @devref: if true, take a reference on the found device
+ *
+ * If a caller uses devref=false, it should be protected by RCU
  */
-
-struct net_device * ip_dev_find(struct net *net, __be32 addr)
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 {
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } },
-			    .flags = FLOWI_FLAG_MATCH_ANY_IIF };
-	struct fib_result res;
+	struct flowi fl = {
+		.nl_u = {
+			.ip4_u = {
+				.daddr = addr
+			}
+		},
+		.flags = FLOWI_FLAG_MATCH_ANY_IIF
+	};
+	struct fib_result res = { 0 };
 	struct net_device *dev = NULL;
 
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	res.r = NULL;
-#endif
-
 	if (fib_lookup(net, &fl, &res))
 		return NULL;
 	if (res.type != RTN_LOCAL)
 		goto out;
 	dev = FIB_RES_DEV(res);
 
-	if (dev)
+	if (dev && devref)
 		dev_hold(dev);
 out:
 	fib_res_put(&res);
 	return dev;
 }
-EXPORT_SYMBOL(ip_dev_find);
+EXPORT_SYMBOL(__ip_dev_find);
 
 /*
  * Find address type as if only "dev" was present in the system. If
-- 
cgit v1.2.3


From c5f0231ee6b0441e4c45f461f2b6652b10195494 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 6 Aug 2010 08:57:53 +0800
Subject: ACPICA: Fix acpi_os_read_pci_configuration prototype

Prototype in acpiosxf.h had the output value pointer as a (u32 *).
Should be a (u64 *).

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/exregion.c | 4 +---
 drivers/acpi/osl.c             | 8 +++++---
 include/acpi/acpiosxf.h        | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index 8819d2ac5aee..de17e10da0ed 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -353,7 +353,6 @@ acpi_ex_pci_config_space_handler(u32 function,
 	acpi_status status = AE_OK;
 	struct acpi_pci_id *pci_id;
 	u16 pci_register;
-	u32 value32;
 
 	ACPI_FUNCTION_TRACE(ex_pci_config_space_handler);
 
@@ -381,8 +380,7 @@ acpi_ex_pci_config_space_handler(u32 function,
 	case ACPI_READ:
 
 		status = acpi_os_read_pci_configuration(pci_id, pci_register,
-							&value32, bit_width);
-		*value = value32;
+							value, bit_width);
 		break;
 
 	case ACPI_WRITE:
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 65b25a303b86..a351291496ff 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -547,9 +547,10 @@ acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width)
 
 acpi_status
 acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
-			       u32 *value, u32 width)
+			       u64 *value, u32 width)
 {
 	int result, size;
+	u32 value32;
 
 	if (!value)
 		return AE_BAD_PARAMETER;
@@ -570,7 +571,8 @@ acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
 
 	result = raw_pci_read(pci_id->segment, pci_id->bus,
 				PCI_DEVFN(pci_id->device, pci_id->function),
-				reg, size, value);
+				reg, size, &value32);
+	*value = value32;
 
 	return (result ? AE_ERROR : AE_OK);
 }
@@ -626,7 +628,7 @@ static void acpi_os_derive_pci_id_2(acpi_handle rhandle,	/* upper bound  */
 		status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
 					  &temp);
 		if (ACPI_SUCCESS(status)) {
-			u32 val;
+			u64 val;
 			pci_id->device = ACPI_HIWORD(ACPI_LODWORD(temp));
 			pci_id->function = ACPI_LOWORD(ACPI_LODWORD(temp));
 
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 29bf945143e8..5e2257796448 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -223,7 +223,7 @@ acpi_os_write_memory(acpi_physical_address address, u32 value, u32 width);
  */
 acpi_status
 acpi_os_read_pci_configuration(struct acpi_pci_id *pci_id,
-			       u32 reg, u32 *value, u32 width);
+			       u32 reg, u64 *value, u32 width);
 
 acpi_status
 acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,
-- 
cgit v1.2.3


From b0ed7a915abac309fcb5a51bccd3782e3daa7417 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Fri, 6 Aug 2010 09:35:51 +0800
Subject: ACPICA/ACPI: Add new host interfaces for _OSI support

Adds install/remove interfaces so that the host can dynamically
alter the global _OSI table. Also adds support for _OSI handlers.
Additional support: new debugger command (osi), and test support in
the acpiexec utility. Adds new file, utilities/utosi.c.
ACPICA bugzilla 836.

The Linux OSL _OSI code is also changed.
acpi_osi_setup can't call acpi_install/remove_interface because ACPICA
is not initialized yet at this early time.
So we just save the osi string in acpi_osi_setup and will handle it
later in a new function acpi_osi_setup_late.

http://www.acpica.org/bugzilla/show_bug.cgi?id=836

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/Makefile   |   3 +-
 drivers/acpi/acpica/acdebug.h  |   2 +
 drivers/acpi/acpica/acglobal.h |   6 +
 drivers/acpi/acpica/aclocal.h  |   5 +
 drivers/acpi/acpica/acutils.h  |  17 +-
 drivers/acpi/acpica/uteval.c   | 147 ----------------
 drivers/acpi/acpica/utglobal.c |   2 +
 drivers/acpi/acpica/utinit.c   |   4 +
 drivers/acpi/acpica/utmutex.c  |   8 +
 drivers/acpi/acpica/utosi.c    | 379 +++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/utxface.c  | 125 +++++++++++++-
 drivers/acpi/osl.c             |  88 +++++-----
 include/acpi/acpiosxf.h        |   3 -
 include/acpi/acpixf.h          |   6 +
 include/acpi/actypes.h         |   3 +
 15 files changed, 602 insertions(+), 196 deletions(-)
 create mode 100644 drivers/acpi/acpica/utosi.c

(limited to 'include')

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index d93cc06f4bf8..262903e98196 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -44,4 +44,5 @@ acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
 
 acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
 		utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
-		utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o
+		utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o \
+		utosi.o
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 48faf3eba9fb..72e9d5eb083c 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -105,6 +105,8 @@ void acpi_db_set_method_data(char *type_arg, char *index_arg, char *value_arg);
 acpi_status
 acpi_db_display_objects(char *obj_type_arg, char *display_count_arg);
 
+void acpi_db_display_interfaces(char *action_arg, char *interface_name_arg);
+
 acpi_status acpi_db_find_name_in_namespace(char *name_arg);
 
 void acpi_db_set_scope(char *name);
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 1d192142c691..fe50f57d4e42 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -187,6 +187,10 @@ ACPI_EXTERN u8 acpi_gbl_integer_bit_width;
 ACPI_EXTERN u8 acpi_gbl_integer_byte_width;
 ACPI_EXTERN u8 acpi_gbl_integer_nybble_width;
 
+/* Mutex for _OSI support */
+
+ACPI_EXTERN acpi_mutex acpi_gbl_osi_mutex;
+
 /* Reader/Writer lock is used for namespace walk and dynamic table unload */
 
 ACPI_EXTERN struct acpi_rw_lock acpi_gbl_namespace_rw_lock;
@@ -255,6 +259,7 @@ ACPI_EXTERN acpi_init_handler acpi_gbl_init_handler;
 ACPI_EXTERN acpi_tbl_handler acpi_gbl_table_handler;
 ACPI_EXTERN void *acpi_gbl_table_handler_context;
 ACPI_EXTERN struct acpi_walk_state *acpi_gbl_breakpoint_walk;
+ACPI_EXTERN acpi_interface_handler acpi_gbl_interface_handler;
 
 /* Owner ID support */
 
@@ -275,6 +280,7 @@ ACPI_EXTERN u8 acpi_gbl_acpi_hardware_present;
 ACPI_EXTERN u8 acpi_gbl_events_initialized;
 ACPI_EXTERN u8 acpi_gbl_system_awake_and_running;
 ACPI_EXTERN u8 acpi_gbl_osi_data;
+ACPI_EXTERN struct acpi_interface_info *acpi_gbl_supported_interfaces;
 
 #ifndef DEFINE_ACPI_GLOBALS
 
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index df85b53a674f..53f7512b060f 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -914,9 +914,14 @@ struct acpi_bit_register_info {
 
 struct acpi_interface_info {
 	char *name;
+	struct acpi_interface_info *next;
+	u8 flags;
 	u8 value;
 };
 
+#define ACPI_OSI_INVALID                0x01
+#define ACPI_OSI_DYNAMIC                0x02
+
 struct acpi_port_info {
 	char *name;
 	u16 start;
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 35df755251ce..78e01211c7a1 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -312,8 +312,6 @@ void acpi_ut_delete_internal_object_list(union acpi_operand_object **obj_list);
 /*
  * uteval - object evaluation
  */
-acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state);
-
 acpi_status
 acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 			char *path,
@@ -394,6 +392,21 @@ union acpi_operand_object *acpi_ut_create_string_object(acpi_size string_size);
 acpi_status
 acpi_ut_get_object_size(union acpi_operand_object *obj, acpi_size * obj_length);
 
+/*
+ * utosi - Support for the _OSI predefined control method
+ */
+acpi_status acpi_ut_initialize_interfaces(void);
+
+void acpi_ut_interface_terminate(void);
+
+acpi_status acpi_ut_install_interface(acpi_string interface_name);
+
+acpi_status acpi_ut_remove_interface(acpi_string interface_name);
+
+struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name);
+
+acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state);
+
 /*
  * utstate - Generic state creation/cache routines
  */
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 6dfdeb653490..22f59ef604e0 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -48,153 +48,6 @@
 #define _COMPONENT          ACPI_UTILITIES
 ACPI_MODULE_NAME("uteval")
 
-/*
- * Strings supported by the _OSI predefined (internal) method.
- *
- * March 2009: Removed "Linux" as this host no longer wants to respond true
- * for this string. Basically, the only safe OS strings are windows-related
- * and in many or most cases represent the only test path within the
- * BIOS-provided ASL code.
- *
- * The second element of each entry is used to track the newest version of
- * Windows that the BIOS has requested.
- */
-static struct acpi_interface_info acpi_interfaces_supported[] = {
-	/* Operating System Vendor Strings */
-
-	{"Windows 2000", ACPI_OSI_WIN_2000},	/* Windows 2000 */
-	{"Windows 2001", ACPI_OSI_WIN_XP},	/* Windows XP */
-	{"Windows 2001 SP1", ACPI_OSI_WIN_XP_SP1},	/* Windows XP SP1 */
-	{"Windows 2001.1", ACPI_OSI_WINSRV_2003},	/* Windows Server 2003 */
-	{"Windows 2001 SP2", ACPI_OSI_WIN_XP_SP2},	/* Windows XP SP2 */
-	{"Windows 2001.1 SP1", ACPI_OSI_WINSRV_2003_SP1},	/* Windows Server 2003 SP1 - Added 03/2006 */
-	{"Windows 2006", ACPI_OSI_WIN_VISTA},	/* Windows Vista - Added 03/2006 */
-	{"Windows 2006.1", ACPI_OSI_WINSRV_2008},	/* Windows Server 2008 - Added 09/2009 */
-	{"Windows 2006 SP1", ACPI_OSI_WIN_VISTA_SP1},	/* Windows Vista SP1 - Added 09/2009 */
-	{"Windows 2009", ACPI_OSI_WIN_7},	/* Windows 7 and Server 2008 R2 - Added 09/2009 */
-
-	/* Feature Group Strings */
-
-	{"Extended Address Space Descriptor", 0}
-
-	/*
-	 * All "optional" feature group strings (features that are implemented
-	 * by the host) should be implemented in the host version of
-	 * acpi_os_validate_interface and should not be added here.
-	 */
-};
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_osi_implementation
- *
- * PARAMETERS:  walk_state          - Current walk state
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Implementation of the _OSI predefined control method
- *
- ******************************************************************************/
-
-acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
-{
-	acpi_status status;
-	union acpi_operand_object *string_desc;
-	union acpi_operand_object *return_desc;
-	u32 return_value;
-	u32 i;
-
-	ACPI_FUNCTION_TRACE(ut_osi_implementation);
-
-	/* Validate the string input argument */
-
-	string_desc = walk_state->arguments[0].object;
-	if (!string_desc || (string_desc->common.type != ACPI_TYPE_STRING)) {
-		return_ACPI_STATUS(AE_TYPE);
-	}
-
-	/* Create a return object */
-
-	return_desc = acpi_ut_create_internal_object(ACPI_TYPE_INTEGER);
-	if (!return_desc) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
-	}
-
-	/* Default return value is 0, NOT SUPPORTED */
-
-	return_value = 0;
-
-	/* Compare input string to static table of supported interfaces */
-
-	for (i = 0; i < ACPI_ARRAY_LENGTH(acpi_interfaces_supported); i++) {
-		if (!ACPI_STRCMP(string_desc->string.pointer,
-				 acpi_interfaces_supported[i].name)) {
-			/*
-			 * The interface is supported.
-			 * Update the osi_data if necessary. We keep track of the latest
-			 * version of Windows that has been requested by the BIOS.
-			 */
-			if (acpi_interfaces_supported[i].value >
-			    acpi_gbl_osi_data) {
-				acpi_gbl_osi_data =
-				    acpi_interfaces_supported[i].value;
-			}
-
-			return_value = ACPI_UINT32_MAX;
-			goto exit;
-		}
-	}
-
-	/*
-	 * Did not match the string in the static table, call the host OSL to
-	 * check for a match with one of the optional strings (such as
-	 * "Module Device", "3.0 Thermal Model", etc.)
-	 */
-	status = acpi_os_validate_interface(string_desc->string.pointer);
-	if (ACPI_SUCCESS(status)) {
-
-		/* The interface is supported */
-
-		return_value = ACPI_UINT32_MAX;
-	}
-
-exit:
-	ACPI_DEBUG_PRINT_RAW ((ACPI_DB_INFO,
-		"ACPI: BIOS _OSI(%s) is %ssupported\n",
-		string_desc->string.pointer, return_value == 0 ? "not " : ""));
-
-	/* Complete the return value */
-
-	return_desc->integer.value = return_value;
-	walk_state->return_desc = return_desc;
-	return_ACPI_STATUS (AE_OK);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_osi_invalidate
- *
- * PARAMETERS:  interface_string
- *
- * RETURN:      Status
- *
- * DESCRIPTION: invalidate string in pre-defiend _OSI string list
- *
- ******************************************************************************/
-
-acpi_status acpi_osi_invalidate(char *interface)
-{
-	int i;
-
-	for (i = 0; i < ACPI_ARRAY_LENGTH(acpi_interfaces_supported); i++) {
-		if (!ACPI_STRCMP(interface, acpi_interfaces_supported[i].name)) {
-			*acpi_interfaces_supported[i].name = '\0';
-			return AE_OK;
-		}
-	}
-	return AE_NOT_FOUND;
-}
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_evaluate_object
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 0558747579ef..45f63340ca5f 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -774,6 +774,7 @@ acpi_status acpi_ut_init_globals(void)
 	acpi_gbl_exception_handler = NULL;
 	acpi_gbl_init_handler = NULL;
 	acpi_gbl_table_handler = NULL;
+	acpi_gbl_interface_handler = NULL;
 
 	/* Global Lock support */
 
@@ -800,6 +801,7 @@ acpi_status acpi_ut_init_globals(void)
 	acpi_gbl_debugger_configuration = DEBUGGER_THREADING;
 	acpi_gbl_db_output_flags = ACPI_DB_CONSOLE_OUTPUT;
 	acpi_gbl_osi_data = 0;
+	acpi_gbl_osi_mutex = NULL;
 
 	/* Hardware oriented */
 
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index a39c93dac719..c1b1c803ea9b 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -117,6 +117,10 @@ void acpi_ut_subsystem_shutdown(void)
 	/* Close the acpi_event Handling */
 
 	acpi_ev_terminate();
+
+	/* Delete any dynamic _OSI interfaces */
+
+	acpi_ut_interface_terminate();
 #endif
 
 	/* Close the Namespace */
diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
index f5cca3a1300c..b07b425e2113 100644
--- a/drivers/acpi/acpica/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c
@@ -86,6 +86,12 @@ acpi_status acpi_ut_mutex_initialize(void)
 	spin_lock_init(acpi_gbl_gpe_lock);
 	spin_lock_init(acpi_gbl_hardware_lock);
 
+	/* Mutex for _OSI support */
+	status = acpi_os_create_mutex(&acpi_gbl_osi_mutex);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
 	/* Create the reader/writer lock for namespace access */
 
 	status = acpi_ut_create_rw_lock(&acpi_gbl_namespace_rw_lock);
@@ -117,6 +123,8 @@ void acpi_ut_mutex_terminate(void)
 		acpi_ut_delete_mutex(i);
 	}
 
+	acpi_os_delete_mutex(acpi_gbl_osi_mutex);
+
 	/* Delete the spinlocks */
 
 	acpi_os_delete_lock(acpi_gbl_gpe_lock);
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
new file mode 100644
index 000000000000..0a37950c96aa
--- /dev/null
+++ b/drivers/acpi/acpica/utosi.c
@@ -0,0 +1,379 @@
+/******************************************************************************
+ *
+ * Module Name: utosi - Support for the _OSI predefined control method
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2010, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+#define _COMPONENT          ACPI_UTILITIES
+ACPI_MODULE_NAME("utosi")
+
+/*
+ * Strings supported by the _OSI predefined control method (which is
+ * implemented internally within this module.)
+ *
+ * March 2009: Removed "Linux" as this host no longer wants to respond true
+ * for this string. Basically, the only safe OS strings are windows-related
+ * and in many or most cases represent the only test path within the
+ * BIOS-provided ASL code.
+ *
+ * The last element of each entry is used to track the newest version of
+ * Windows that the BIOS has requested.
+ */
+static struct acpi_interface_info acpi_default_supported_interfaces[] = {
+	/* Operating System Vendor Strings */
+
+	{"Windows 2000", NULL, 0, ACPI_OSI_WIN_2000},	/* Windows 2000 */
+	{"Windows 2001", NULL, 0, ACPI_OSI_WIN_XP},	/* Windows XP */
+	{"Windows 2001 SP1", NULL, 0, ACPI_OSI_WIN_XP_SP1},	/* Windows XP SP1 */
+	{"Windows 2001.1", NULL, 0, ACPI_OSI_WINSRV_2003},	/* Windows Server 2003 */
+	{"Windows 2001 SP2", NULL, 0, ACPI_OSI_WIN_XP_SP2},	/* Windows XP SP2 */
+	{"Windows 2001.1 SP1", NULL, 0, ACPI_OSI_WINSRV_2003_SP1},	/* Windows Server 2003 SP1 - Added 03/2006 */
+	{"Windows 2006", NULL, 0, ACPI_OSI_WIN_VISTA},	/* Windows Vista - Added 03/2006 */
+	{"Windows 2006.1", NULL, 0, ACPI_OSI_WINSRV_2008},	/* Windows Server 2008 - Added 09/2009 */
+	{"Windows 2006 SP1", NULL, 0, ACPI_OSI_WIN_VISTA_SP1},	/* Windows Vista SP1 - Added 09/2009 */
+	{"Windows 2009", NULL, 0, ACPI_OSI_WIN_7},	/* Windows 7 and Server 2008 R2 - Added 09/2009 */
+
+	/* Feature Group Strings */
+
+	{"Extended Address Space Descriptor", NULL, 0, 0}
+
+	/*
+	 * All "optional" feature group strings (features that are implemented
+	 * by the host) should be dynamically added by the host via
+	 * acpi_install_interface and should not be manually added here.
+	 *
+	 * Examples of optional feature group strings:
+	 *
+	 * "Module Device"
+	 * "Processor Device"
+	 * "3.0 Thermal Model"
+	 * "3.0 _SCP Extensions"
+	 * "Processor Aggregator Device"
+	 */
+};
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_initialize_interfaces
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Initialize the global _OSI supported interfaces list
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_initialize_interfaces(void)
+{
+	u32 i;
+
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+	acpi_gbl_supported_interfaces = acpi_default_supported_interfaces;
+
+	/* Link the static list of supported interfaces */
+
+	for (i = 0;
+	     i < (ACPI_ARRAY_LENGTH(acpi_default_supported_interfaces) - 1);
+	     i++) {
+		acpi_default_supported_interfaces[i].next =
+		    &acpi_default_supported_interfaces[(acpi_size) i + 1];
+	}
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_interface_terminate
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Delete all interfaces in the global list. Sets
+ *              acpi_gbl_supported_interfaces to NULL.
+ *
+ ******************************************************************************/
+
+void acpi_ut_interface_terminate(void)
+{
+	struct acpi_interface_info *next_interface;
+
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+	next_interface = acpi_gbl_supported_interfaces;
+
+	while (next_interface) {
+		acpi_gbl_supported_interfaces = next_interface->next;
+
+		/* Only interfaces added at runtime can be freed */
+
+		if (next_interface->flags & ACPI_OSI_DYNAMIC) {
+			ACPI_FREE(next_interface->name);
+			ACPI_FREE(next_interface);
+		}
+
+		next_interface = acpi_gbl_supported_interfaces;
+	}
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_install_interface
+ *
+ * PARAMETERS:  interface_name      - The interface to install
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install the interface into the global interface list.
+ *              Caller MUST hold acpi_gbl_osi_mutex
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_install_interface(acpi_string interface_name)
+{
+	struct acpi_interface_info *interface_info;
+
+	/* Allocate info block and space for the name string */
+
+	interface_info =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_interface_info));
+	if (!interface_info) {
+		return (AE_NO_MEMORY);
+	}
+
+	interface_info->name =
+	    ACPI_ALLOCATE_ZEROED(ACPI_STRLEN(interface_name) + 1);
+	if (!interface_info->name) {
+		ACPI_FREE(interface_info);
+		return (AE_NO_MEMORY);
+	}
+
+	/* Initialize new info and insert at the head of the global list */
+
+	ACPI_STRCPY(interface_info->name, interface_name);
+	interface_info->flags = ACPI_OSI_DYNAMIC;
+	interface_info->next = acpi_gbl_supported_interfaces;
+
+	acpi_gbl_supported_interfaces = interface_info;
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_remove_interface
+ *
+ * PARAMETERS:  interface_name      - The interface to remove
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Remove the interface from the global interface list.
+ *              Caller MUST hold acpi_gbl_osi_mutex
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_remove_interface(acpi_string interface_name)
+{
+	struct acpi_interface_info *previous_interface;
+	struct acpi_interface_info *next_interface;
+
+	previous_interface = next_interface = acpi_gbl_supported_interfaces;
+	while (next_interface) {
+		if (!ACPI_STRCMP(interface_name, next_interface->name)) {
+
+			/* Found: name is in either the static list or was added at runtime */
+
+			if (next_interface->flags & ACPI_OSI_DYNAMIC) {
+
+				/* Interface was added dynamically, remove and free it */
+
+				if (previous_interface == next_interface) {
+					acpi_gbl_supported_interfaces =
+					    next_interface->next;
+				} else {
+					previous_interface->next =
+					    next_interface->next;
+				}
+
+				ACPI_FREE(next_interface->name);
+				ACPI_FREE(next_interface);
+			} else {
+				/*
+				 * Interface is in static list. If marked invalid, then it
+				 * does not actually exist. Else, mark it invalid.
+				 */
+				if (next_interface->flags & ACPI_OSI_INVALID) {
+					return (AE_NOT_EXIST);
+				}
+
+				next_interface->flags |= ACPI_OSI_INVALID;
+			}
+
+			return (AE_OK);
+		}
+
+		previous_interface = next_interface;
+		next_interface = next_interface->next;
+	}
+
+	/* Interface was not found */
+
+	return (AE_NOT_EXIST);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_get_interface
+ *
+ * PARAMETERS:  interface_name      - The interface to find
+ *
+ * RETURN:      struct acpi_interface_info if found. NULL if not found.
+ *
+ * DESCRIPTION: Search for the specified interface name in the global list.
+ *              Caller MUST hold acpi_gbl_osi_mutex
+ *
+ ******************************************************************************/
+
+struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name)
+{
+	struct acpi_interface_info *next_interface;
+
+	next_interface = acpi_gbl_supported_interfaces;
+	while (next_interface) {
+		if (!ACPI_STRCMP(interface_name, next_interface->name)) {
+			return (next_interface);
+		}
+
+		next_interface = next_interface->next;
+	}
+
+	return (NULL);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_osi_implementation
+ *
+ * PARAMETERS:  walk_state          - Current walk state
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Implementation of the _OSI predefined control method. When
+ *              an invocation of _OSI is encountered in the system AML,
+ *              control is transferred to this function.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_osi_implementation(struct acpi_walk_state * walk_state)
+{
+	union acpi_operand_object *string_desc;
+	union acpi_operand_object *return_desc;
+	struct acpi_interface_info *interface_info;
+	acpi_interface_handler interface_handler;
+	u32 return_value;
+
+	ACPI_FUNCTION_TRACE(ut_osi_implementation);
+
+	/* Validate the string input argument (from the AML caller) */
+
+	string_desc = walk_state->arguments[0].object;
+	if (!string_desc || (string_desc->common.type != ACPI_TYPE_STRING)) {
+		return_ACPI_STATUS(AE_TYPE);
+	}
+
+	/* Create a return object */
+
+	return_desc = acpi_ut_create_internal_object(ACPI_TYPE_INTEGER);
+	if (!return_desc) {
+		return_ACPI_STATUS(AE_NO_MEMORY);
+	}
+
+	/* Default return value is 0, NOT SUPPORTED */
+
+	return_value = 0;
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+
+	/* Lookup the interface in the global _OSI list */
+
+	interface_info = acpi_ut_get_interface(string_desc->string.pointer);
+	if (interface_info && !(interface_info->flags & ACPI_OSI_INVALID)) {
+		/*
+		 * The interface is supported.
+		 * Update the osi_data if necessary. We keep track of the latest
+		 * version of Windows that has been requested by the BIOS.
+		 */
+		if (interface_info->value > acpi_gbl_osi_data) {
+			acpi_gbl_osi_data = interface_info->value;
+		}
+
+		return_value = ACPI_UINT32_MAX;
+	}
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+
+	/*
+	 * Invoke an optional _OSI interface handler. The host OS may wish
+	 * to do some interface-specific handling. For example, warn about
+	 * certain interfaces or override the true/false support value.
+	 */
+	interface_handler = acpi_gbl_interface_handler;
+	if (interface_handler) {
+		return_value =
+		    interface_handler(string_desc->string.pointer,
+				      return_value);
+	}
+
+	ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO,
+			      "ACPI: BIOS _OSI(\"%s\") is %ssupported\n",
+			      string_desc->string.pointer,
+			      return_value == 0 ? "not " : ""));
+
+	/* Complete the return object */
+
+	return_desc->integer.value = return_value;
+	walk_state->return_desc = return_desc;
+	return_ACPI_STATUS(AE_OK);
+}
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 7f8cefcb2b32..c2da90f5fbe9 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -110,6 +110,15 @@ acpi_status __init acpi_initialize_subsystem(void)
 		return_ACPI_STATUS(status);
 	}
 
+	/* Initialize the global OSI interfaces list with the static names */
+
+	status = acpi_ut_initialize_interfaces();
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status,
+				"During OSI interfaces initialization"));
+		return_ACPI_STATUS(status);
+	}
+
 	/* If configured, initialize the AML debugger */
 
 	ACPI_DEBUGGER_EXEC(status = acpi_db_initialize());
@@ -506,6 +515,7 @@ acpi_install_initialization_handler(acpi_init_handler handler, u32 function)
 
 ACPI_EXPORT_SYMBOL(acpi_install_initialization_handler)
 #endif				/*  ACPI_FUTURE_USAGE  */
+
 /*****************************************************************************
  *
  * FUNCTION:    acpi_purge_cached_objects
@@ -529,4 +539,117 @@ acpi_status acpi_purge_cached_objects(void)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_purge_cached_objects)
-#endif
+
+/*****************************************************************************
+ *
+ * FUNCTION:    acpi_install_interface
+ *
+ * PARAMETERS:  interface_name      - The interface to install
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install an _OSI interface to the global list
+ *
+ ****************************************************************************/
+acpi_status acpi_install_interface(acpi_string interface_name)
+{
+	acpi_status status;
+	struct acpi_interface_info *interface_info;
+
+	/* Parameter validation */
+
+	if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+
+	/* Check if the interface name is already in the global list */
+
+	interface_info = acpi_ut_get_interface(interface_name);
+	if (interface_info) {
+		/*
+		 * The interface already exists in the list. This is OK if the
+		 * interface has been marked invalid -- just clear the bit.
+		 */
+		if (interface_info->flags & ACPI_OSI_INVALID) {
+			interface_info->flags &= ~ACPI_OSI_INVALID;
+			status = AE_OK;
+		} else {
+			status = AE_ALREADY_EXISTS;
+		}
+	} else {
+		/* New interface name, install into the global list */
+
+		status = acpi_ut_install_interface(interface_name);
+	}
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+	return (status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_install_interface)
+
+/*****************************************************************************
+ *
+ * FUNCTION:    acpi_remove_interface
+ *
+ * PARAMETERS:  interface_name      - The interface to remove
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Remove an _OSI interface from the global list
+ *
+ ****************************************************************************/
+acpi_status acpi_remove_interface(acpi_string interface_name)
+{
+	acpi_status status;
+
+	/* Parameter validation */
+
+	if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+
+	status = acpi_ut_remove_interface(interface_name);
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+	return (status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_remove_interface)
+
+/*****************************************************************************
+ *
+ * FUNCTION:    acpi_install_interface_handler
+ *
+ * PARAMETERS:  Handler             - The _OSI interface handler to install
+ *                                    NULL means "remove existing handler"
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install a handler for the predefined _OSI ACPI method.
+ *              invoked during execution of the internal implementation of
+ *              _OSI. A NULL handler simply removes any existing handler.
+ *
+ ****************************************************************************/
+acpi_status acpi_install_interface_handler(acpi_interface_handler handler)
+{
+	acpi_status status = AE_OK;
+
+	(void)acpi_os_acquire_mutex(acpi_gbl_osi_mutex, ACPI_WAIT_FOREVER);
+
+	if (handler && acpi_gbl_interface_handler) {
+		status = AE_ALREADY_EXISTS;
+	} else {
+		acpi_gbl_interface_handler = handler;
+	}
+
+	acpi_os_release_mutex(acpi_gbl_osi_mutex);
+	return (status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_install_interface_handler)
+#endif				/* !ACPI_ASL_COMPILER */
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index a351291496ff..6652c4929391 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -96,7 +96,9 @@ static LIST_HEAD(resource_list_head);
 static DEFINE_SPINLOCK(acpi_res_lock);
 
 #define	OSI_STRING_LENGTH_MAX 64	/* arbitrary */
-static char osi_additional_string[OSI_STRING_LENGTH_MAX];
+static char osi_setup_string[OSI_STRING_LENGTH_MAX];
+
+static void __init acpi_osi_setup_late(void);
 
 /*
  * The story of _OSI(Linux)
@@ -138,6 +140,20 @@ static struct osi_linux {
 	unsigned int	known:1;
 } osi_linux = { 0, 0, 0, 0};
 
+static u32 acpi_osi_handler(acpi_string interface, u32 supported)
+{
+	if (!strcmp("Linux", interface)) {
+
+		printk(KERN_NOTICE PREFIX
+			"BIOS _OSI(Linux) query %s%s\n",
+			osi_linux.enable ? "honored" : "ignored",
+			osi_linux.cmdline ? " via cmdline" :
+			osi_linux.dmi ? " via DMI" : "");
+	}
+
+	return supported;
+}
+
 static void __init acpi_request_region (struct acpi_generic_address *addr,
 	unsigned int length, char *desc)
 {
@@ -198,6 +214,8 @@ acpi_status acpi_os_initialize1(void)
 	BUG_ON(!kacpid_wq);
 	BUG_ON(!kacpi_notify_wq);
 	BUG_ON(!kacpi_hotplug_wq);
+	acpi_install_interface_handler(acpi_osi_handler);
+	acpi_osi_setup_late();
 	return AE_OK;
 }
 
@@ -979,6 +997,12 @@ static void __init set_osi_linux(unsigned int enable)
 		printk(KERN_NOTICE PREFIX "%sed _OSI(Linux)\n",
 			enable ? "Add": "Delet");
 	}
+
+	if (osi_linux.enable)
+		acpi_osi_setup("Linux");
+	else
+		acpi_osi_setup("!Linux");
+
 	return;
 }
 
@@ -1013,21 +1037,33 @@ void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d)
  * string starting with '!' disables that string
  * otherwise string is added to list, augmenting built-in strings
  */
-int __init acpi_osi_setup(char *str)
+static void __init acpi_osi_setup_late(void)
 {
-	if (str == NULL || *str == '\0') {
-		printk(KERN_INFO PREFIX "_OSI method disabled\n");
-		acpi_gbl_create_osi_method = FALSE;
-	} else if (!strcmp("!Linux", str)) {
+	char *str = osi_setup_string;
+
+	if (*str == '\0')
+		return;
+
+	if (!strcmp("!Linux", str)) {
 		acpi_cmdline_osi_linux(0);	/* !enable */
 	} else if (*str == '!') {
-		if (acpi_osi_invalidate(++str) == AE_OK)
+		if (acpi_remove_interface(++str) == AE_OK)
 			printk(KERN_INFO PREFIX "Deleted _OSI(%s)\n", str);
 	} else if (!strcmp("Linux", str)) {
 		acpi_cmdline_osi_linux(1);	/* enable */
-	} else if (*osi_additional_string == '\0') {
-		strncpy(osi_additional_string, str, OSI_STRING_LENGTH_MAX);
-		printk(KERN_INFO PREFIX "Added _OSI(%s)\n", str);
+	} else {
+		if (acpi_install_interface(str) == AE_OK)
+			printk(KERN_INFO PREFIX "Added _OSI(%s)\n", str);
+	}
+}
+
+int __init acpi_osi_setup(char *str)
+{
+	if (str == NULL || *str == '\0') {
+		printk(KERN_INFO PREFIX "_OSI method disabled\n");
+		acpi_gbl_create_osi_method = FALSE;
+	} else {
+		strncpy(osi_setup_string, str, OSI_STRING_LENGTH_MAX);
 	}
 
 	return 1;
@@ -1284,38 +1320,6 @@ acpi_status acpi_os_release_object(acpi_cache_t * cache, void *object)
 	return (AE_OK);
 }
 
-/******************************************************************************
- *
- * FUNCTION:    acpi_os_validate_interface
- *
- * PARAMETERS:  interface           - Requested interface to be validated
- *
- * RETURN:      AE_OK if interface is supported, AE_SUPPORT otherwise
- *
- * DESCRIPTION: Match an interface string to the interfaces supported by the
- *              host. Strings originate from an AML call to the _OSI method.
- *
- *****************************************************************************/
-
-acpi_status
-acpi_os_validate_interface (char *interface)
-{
-	if (!strncmp(osi_additional_string, interface, OSI_STRING_LENGTH_MAX))
-		return AE_OK;
-	if (!strcmp("Linux", interface)) {
-
-		printk(KERN_NOTICE PREFIX
-			"BIOS _OSI(Linux) query %s%s\n",
-			osi_linux.enable ? "honored" : "ignored",
-			osi_linux.cmdline ? " via cmdline" :
-			osi_linux.dmi ? " via DMI" : "");
-
-		if (osi_linux.enable)
-			return AE_OK;
-	}
-	return AE_SUPPORT;
-}
-
 static inline int acpi_res_list_add(struct acpi_res_list *res)
 {
 	struct acpi_res_list *res_list_elem;
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 5e2257796448..c29b0afc651a 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -239,9 +239,6 @@ acpi_os_derive_pci_id(acpi_handle device,
 /*
  * Miscellaneous
  */
-acpi_status acpi_os_validate_interface(char *interface);
-acpi_status acpi_osi_invalidate(char* interface);
-
 acpi_status
 acpi_os_validate_address(u8 space_id, acpi_physical_address address,
 			 acpi_size length, char *name);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c0786d446a00..e4d6cc8074e4 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -105,6 +105,10 @@ const char *acpi_format_exception(acpi_status exception);
 
 acpi_status acpi_purge_cached_objects(void);
 
+acpi_status acpi_install_interface(acpi_string interface_name);
+
+acpi_status acpi_remove_interface(acpi_string interface_name);
+
 /*
  * ACPI Memory management
  */
@@ -263,6 +267,8 @@ acpi_remove_gpe_handler(acpi_handle gpe_device,
 acpi_status acpi_install_exception_handler(acpi_exception_handler handler);
 #endif
 
+acpi_status acpi_install_interface_handler(acpi_interface_handler handler);
+
 /*
  * Event interfaces
  */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 5db8f472fec9..332d076f3f10 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -950,6 +950,9 @@ acpi_status(*acpi_walk_callback) (acpi_handle object,
 				  u32 nesting_level,
 				  void *context, void **return_value);
 
+typedef
+u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported);
+
 /* Interrupt handler return values */
 
 #define ACPI_INTERRUPT_NOT_HANDLED      0x00
-- 
cgit v1.2.3


From 6087658d7d576b8aeed8f9188cd9917db29aa0dd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 11 Aug 2010 10:14:35 +0800
Subject: ACPICA: Update version to 20100806

Version 20100806.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index e4d6cc8074e4..bfa252ba110b 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20100702
+#define ACPI_CA_VERSION                 0x20100806
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 95abccb576c44bc593e05fa1245d0ad26ce6107b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 15 Sep 2010 13:22:46 +0800
Subject: ACPICA: Obsolete the acpi_os_derive_pci_id OSL interface

This function is not OS-dependent and has been replaced by
acpi_hw_derive_pci_id, which is now in the ACPICA core code.  Local
implementations of acpi_os_derive_pci_id are no longer necessary and
are removed. ACPICA BZ 857.

http://www.acpica.org/bugzilla/show_bug.cgi?id=857

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/Makefile   |   2 +-
 drivers/acpi/acpica/achware.h  |   7 +
 drivers/acpi/acpica/evrgnini.c |  14 +-
 drivers/acpi/acpica/hwpci.c    | 412 +++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/osl.c             |  68 -------
 include/acpi/acpiosxf.h        |   7 -
 6 files changed, 430 insertions(+), 80 deletions(-)
 create mode 100644 drivers/acpi/acpica/hwpci.c

(limited to 'include')

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 262903e98196..4fdcfdb1618b 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -21,7 +21,7 @@ acpi-y += exconfig.o  exfield.o  exnames.o   exoparg6.o  exresolv.o  exstorob.o\
 	 excreate.o  exmisc.o   exoparg2.o  exregion.o  exstore.o   exutils.o \
 	 exdump.o    exmutex.o  exoparg3.o  exresnte.o  exstoren.o  exdebug.o
 
-acpi-y += hwacpi.o  hwgpe.o  hwregs.o  hwsleep.o hwxface.o hwvalid.o
+acpi-y += hwacpi.o  hwgpe.o  hwregs.o  hwsleep.o hwxface.o hwvalid.o hwpci.o
 
 acpi-$(ACPI_FUTURE_USAGE) += hwtimer.o
 
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 120b3af56596..e7c5545d8597 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -122,6 +122,13 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 				 void *context);
 
 #ifdef	ACPI_FUTURE_USAGE
+/*
+ * hwpci - PCI configuration support
+ */
+acpi_status
+acpi_hw_derive_pci_id(struct acpi_pci_id *pci_id,
+		      acpi_handle root_pci_device, acpi_handle pci_region);
+
 /*
  * hwtimer - ACPI Timer prototypes
  */
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index f40d271bf568..0b47a6dc9290 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -289,8 +289,8 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 	}
 
 	/*
-	 * Get the PCI device and function numbers from the _ADR object contained
-	 * in the parent's scope.
+	 * Get the PCI device and function numbers from the _ADR object
+	 * contained in the parent's scope.
 	 */
 	status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR,
 						 pci_device_node, &pci_value);
@@ -320,9 +320,15 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 		pci_id->bus = ACPI_LOWORD(pci_value);
 	}
 
-	/* Complete this device's pci_id */
+	/* Complete/update the PCI ID for this device */
 
-	acpi_os_derive_pci_id(pci_root_node, region_obj->region.node, &pci_id);
+	status =
+	    acpi_hw_derive_pci_id(pci_id, pci_root_node,
+				  region_obj->region.node);
+	if (ACPI_FAILURE(status)) {
+		ACPI_FREE(pci_id);
+		return_ACPI_STATUS(status);
+	}
 
 	*region_context = pci_id;
 	return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/acpica/hwpci.c b/drivers/acpi/acpica/hwpci.c
new file mode 100644
index 000000000000..ad21c7d8bf4f
--- /dev/null
+++ b/drivers/acpi/acpica/hwpci.c
@@ -0,0 +1,412 @@
+/*******************************************************************************
+ *
+ * Module Name: hwpci - Obtain PCI bus, device, and function numbers
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2010, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+#define _COMPONENT          ACPI_NAMESPACE
+ACPI_MODULE_NAME("hwpci")
+
+/* PCI configuration space values */
+#define PCI_CFG_HEADER_TYPE_REG             0x0E
+#define PCI_CFG_PRIMARY_BUS_NUMBER_REG      0x18
+#define PCI_CFG_SECONDARY_BUS_NUMBER_REG    0x19
+/* PCI header values */
+#define PCI_HEADER_TYPE_MASK                0x7F
+#define PCI_TYPE_BRIDGE                     0x01
+#define PCI_TYPE_CARDBUS_BRIDGE             0x02
+typedef struct acpi_pci_device {
+	acpi_handle device;
+	struct acpi_pci_device *next;
+
+} acpi_pci_device;
+
+/* Local prototypes */
+
+static acpi_status
+acpi_hw_build_pci_list(acpi_handle root_pci_device,
+		       acpi_handle pci_region,
+		       struct acpi_pci_device **return_list_head);
+
+static acpi_status
+acpi_hw_process_pci_list(struct acpi_pci_id *pci_id,
+			 struct acpi_pci_device *list_head);
+
+static void acpi_hw_delete_pci_list(struct acpi_pci_device *list_head);
+
+static acpi_status
+acpi_hw_get_pci_device_info(struct acpi_pci_id *pci_id,
+			    acpi_handle pci_device,
+			    u16 *bus_number, u8 *is_bridge);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_derive_pci_id
+ *
+ * PARAMETERS:  pci_id              - Initial values for the PCI ID. May be
+ *                                    modified by this function.
+ *              root_pci_device     - A handle to a PCI device object. This
+ *                                    object must be a PCI Root Bridge having a
+ *                                    _HID value of either PNP0A03 or PNP0A08
+ *              pci_region          - A handle to a PCI configuration space
+ *                                    Operation Region being initialized
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: This function derives a full PCI ID for a PCI device,
+ *              consisting of a Segment number, Bus number, Device number,
+ *              and function code.
+ *
+ *              The PCI hardware dynamically configures PCI bus numbers
+ *              depending on the bus topology discovered during system
+ *              initialization. This function is invoked during configuration
+ *              of a PCI_Config Operation Region in order to (possibly) update
+ *              the Bus/Device/Function numbers in the pci_id with the actual
+ *              values as determined by the hardware and operating system
+ *              configuration.
+ *
+ *              The pci_id parameter is initially populated during the Operation
+ *              Region initialization. This function is then called, and is
+ *              will make any necessary modifications to the Bus, Device, or
+ *              Function number PCI ID subfields as appropriate for the
+ *              current hardware and OS configuration.
+ *
+ * NOTE:        Created 08/2010. Replaces the previous OSL acpi_os_derive_pci_id
+ *              interface since this feature is OS-independent. This module
+ *              specifically avoids any use of recursion by building a local
+ *              temporary device list.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_hw_derive_pci_id(struct acpi_pci_id *pci_id,
+		      acpi_handle root_pci_device, acpi_handle pci_region)
+{
+	acpi_status status;
+	struct acpi_pci_device *list_head = NULL;
+
+	ACPI_FUNCTION_TRACE(hw_derive_pci_id);
+
+	if (!pci_id) {
+		return_ACPI_STATUS(AE_BAD_PARAMETER);
+	}
+
+	/* Build a list of PCI devices, from pci_region up to root_pci_device */
+
+	status =
+	    acpi_hw_build_pci_list(root_pci_device, pci_region, &list_head);
+	if (ACPI_SUCCESS(status)) {
+
+		/* Walk the list, updating the PCI device/function/bus numbers */
+
+		status = acpi_hw_process_pci_list(pci_id, list_head);
+	}
+
+	/* Always delete the list */
+
+	acpi_hw_delete_pci_list(list_head);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_build_pci_list
+ *
+ * PARAMETERS:  root_pci_device     - A handle to a PCI device object. This
+ *                                    object is guaranteed to be a PCI Root
+ *                                    Bridge having a _HID value of either
+ *                                    PNP0A03 or PNP0A08
+ *              pci_region          - A handle to the PCI configuration space
+ *                                    Operation Region
+ *              return_list_head    - Where the PCI device list is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Builds a list of devices from the input PCI region up to the
+ *              Root PCI device for this namespace subtree.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_hw_build_pci_list(acpi_handle root_pci_device,
+		       acpi_handle pci_region,
+		       struct acpi_pci_device **return_list_head)
+{
+	acpi_handle current_device;
+	acpi_handle parent_device;
+	acpi_status status;
+	struct acpi_pci_device *list_element;
+	struct acpi_pci_device *list_head = NULL;
+
+	/*
+	 * Ascend namespace branch until the root_pci_device is reached, building
+	 * a list of device nodes. Loop will exit when either the PCI device is
+	 * found, or the root of the namespace is reached.
+	 */
+	current_device = pci_region;
+	while (1) {
+		status = acpi_get_parent(current_device, &parent_device);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+
+		/* Finished when we reach the PCI root device (PNP0A03 or PNP0A08) */
+
+		if (parent_device == root_pci_device) {
+			*return_list_head = list_head;
+			return (AE_OK);
+		}
+
+		list_element = ACPI_ALLOCATE(sizeof(struct acpi_pci_device));
+		if (!list_element) {
+			return (AE_NO_MEMORY);
+		}
+
+		/* Put new element at the head of the list */
+
+		list_element->next = list_head;
+		list_element->device = parent_device;
+		list_head = list_element;
+
+		current_device = parent_device;
+	}
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_process_pci_list
+ *
+ * PARAMETERS:  pci_id              - Initial values for the PCI ID. May be
+ *                                    modified by this function.
+ *              list_head           - Device list created by
+ *                                    acpi_hw_build_pci_list
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Walk downward through the PCI device list, getting the device
+ *              info for each, via the PCI configuration space and updating
+ *              the PCI ID as necessary. Deletes the list during traversal.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_hw_process_pci_list(struct acpi_pci_id *pci_id,
+			 struct acpi_pci_device *list_head)
+{
+	acpi_status status = AE_OK;
+	struct acpi_pci_device *info;
+	u16 bus_number;
+	u8 is_bridge = TRUE;
+
+	ACPI_FUNCTION_NAME(hw_process_pci_list);
+
+	ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
+			  "Input PciId:  Seg %4.4X Bus %4.4X Dev %4.4X Func %4.4X\n",
+			  pci_id->segment, pci_id->bus, pci_id->device,
+			  pci_id->function));
+
+	bus_number = pci_id->bus;
+
+	/*
+	 * Descend down the namespace tree, collecting PCI device, function,
+	 * and bus numbers. bus_number is only important for PCI bridges.
+	 * Algorithm: As we descend the tree, use the last valid PCI device,
+	 * function, and bus numbers that are discovered, and assign them
+	 * to the PCI ID for the target device.
+	 */
+	info = list_head;
+	while (info) {
+		status = acpi_hw_get_pci_device_info(pci_id, info->device,
+						     &bus_number, &is_bridge);
+		if (ACPI_FAILURE(status)) {
+			return_ACPI_STATUS(status);
+		}
+
+		info = info->next;
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
+			  "Output PciId: Seg %4.4X Bus %4.4X Dev %4.4X Func %4.4X "
+			  "Status %X BusNumber %X IsBridge %X\n",
+			  pci_id->segment, pci_id->bus, pci_id->device,
+			  pci_id->function, status, bus_number, is_bridge));
+
+	return_ACPI_STATUS(AE_OK);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_delete_pci_list
+ *
+ * PARAMETERS:  list_head           - Device list created by
+ *                                    acpi_hw_build_pci_list
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Free the entire PCI list.
+ *
+ ******************************************************************************/
+
+static void acpi_hw_delete_pci_list(struct acpi_pci_device *list_head)
+{
+	struct acpi_pci_device *next;
+	struct acpi_pci_device *previous;
+
+	next = list_head;
+	while (next) {
+		previous = next;
+		next = previous->next;
+		ACPI_FREE(previous);
+	}
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_get_pci_device_info
+ *
+ * PARAMETERS:  pci_id              - Initial values for the PCI ID. May be
+ *                                    modified by this function.
+ *              pci_device          - Handle for the PCI device object
+ *              bus_number          - Where a PCI bridge bus number is returned
+ *              is_bridge           - Return value, indicates if this PCI
+ *                                    device is a PCI bridge
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Get the device info for a single PCI device object. Get the
+ *              _ADR (contains PCI device and function numbers), and for PCI
+ *              bridge devices, get the bus number from PCI configuration
+ *              space.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_hw_get_pci_device_info(struct acpi_pci_id *pci_id,
+			    acpi_handle pci_device,
+			    u16 *bus_number, u8 *is_bridge)
+{
+	acpi_status status;
+	acpi_object_type object_type;
+	u64 return_value;
+	u64 pci_value;
+
+	/* We only care about objects of type Device */
+
+	status = acpi_get_type(pci_device, &object_type);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	if (object_type != ACPI_TYPE_DEVICE) {
+		return (AE_OK);
+	}
+
+	/* We need an _ADR. Ignore device if not present */
+
+	status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR,
+						 pci_device, &return_value);
+	if (ACPI_FAILURE(status)) {
+		return (AE_OK);
+	}
+
+	/*
+	 * From _ADR, get the PCI Device and Function and
+	 * update the PCI ID.
+	 */
+	pci_id->device = ACPI_HIWORD(ACPI_LODWORD(return_value));
+	pci_id->function = ACPI_LOWORD(ACPI_LODWORD(return_value));
+
+	/*
+	 * If the previous device was a bridge, use the previous
+	 * device bus number
+	 */
+	if (*is_bridge) {
+		pci_id->bus = *bus_number;
+	}
+
+	/*
+	 * Get the bus numbers from PCI Config space:
+	 *
+	 * First, get the PCI header_type
+	 */
+	*is_bridge = FALSE;
+	status = acpi_os_read_pci_configuration(pci_id,
+						PCI_CFG_HEADER_TYPE_REG,
+						&pci_value, 8);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/* We only care about bridges (1=pci_bridge, 2=card_bus_bridge) */
+
+	pci_value &= PCI_HEADER_TYPE_MASK;
+
+	if ((pci_value != PCI_TYPE_BRIDGE) &&
+	    (pci_value != PCI_TYPE_CARDBUS_BRIDGE)) {
+		return (AE_OK);
+	}
+
+	/* Bridge: Get the Primary bus_number */
+
+	status = acpi_os_read_pci_configuration(pci_id,
+						PCI_CFG_PRIMARY_BUS_NUMBER_REG,
+						&pci_value, 8);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	*is_bridge = TRUE;
+	pci_id->bus = (u16)pci_value;
+
+	/* Bridge: Get the Secondary bus_number */
+
+	status = acpi_os_read_pci_configuration(pci_id,
+						PCI_CFG_SECONDARY_BUS_NUMBER_REG,
+						&pci_value, 8);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	*bus_number = (u16)pci_value;
+	return (AE_OK);
+}
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 6652c4929391..90a8e86e86f5 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -622,74 +622,6 @@ acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
 	return (result ? AE_ERROR : AE_OK);
 }
 
-/* TODO: Change code to take advantage of driver model more */
-static void acpi_os_derive_pci_id_2(acpi_handle rhandle,	/* upper bound  */
-				    acpi_handle chandle,	/* current node */
-				    struct acpi_pci_id **id,
-				    int *is_bridge, u8 * bus_number)
-{
-	acpi_handle handle;
-	struct acpi_pci_id *pci_id = *id;
-	acpi_status status;
-	unsigned long long temp;
-	acpi_object_type type;
-
-	acpi_get_parent(chandle, &handle);
-	if (handle != rhandle) {
-		acpi_os_derive_pci_id_2(rhandle, handle, &pci_id, is_bridge,
-					bus_number);
-
-		status = acpi_get_type(handle, &type);
-		if ((ACPI_FAILURE(status)) || (type != ACPI_TYPE_DEVICE))
-			return;
-
-		status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL,
-					  &temp);
-		if (ACPI_SUCCESS(status)) {
-			u64 val;
-			pci_id->device = ACPI_HIWORD(ACPI_LODWORD(temp));
-			pci_id->function = ACPI_LOWORD(ACPI_LODWORD(temp));
-
-			if (*is_bridge)
-				pci_id->bus = *bus_number;
-
-			/* any nicer way to get bus number of bridge ? */
-			status =
-			    acpi_os_read_pci_configuration(pci_id, 0x0e, &val,
-							   8);
-			if (ACPI_SUCCESS(status)
-			    && ((val & 0x7f) == 1 || (val & 0x7f) == 2)) {
-				status =
-				    acpi_os_read_pci_configuration(pci_id, 0x18,
-								   &val, 8);
-				if (!ACPI_SUCCESS(status)) {
-					/* Certainly broken...  FIX ME */
-					return;
-				}
-				*is_bridge = 1;
-				pci_id->bus = val;
-				status =
-				    acpi_os_read_pci_configuration(pci_id, 0x19,
-								   &val, 8);
-				if (ACPI_SUCCESS(status)) {
-					*bus_number = val;
-				}
-			} else
-				*is_bridge = 0;
-		}
-	}
-}
-
-void acpi_os_derive_pci_id(acpi_handle rhandle,	/* upper bound  */
-			   acpi_handle chandle,	/* current node */
-			   struct acpi_pci_id **id)
-{
-	int is_bridge = 1;
-	u8 bus_number = (*id)->bus;
-
-	acpi_os_derive_pci_id_2(rhandle, chandle, id, &is_bridge, &bus_number);
-}
-
 static void acpi_os_execute_deferred(struct work_struct *work)
 {
 	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index c29b0afc651a..43027432a055 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -229,13 +229,6 @@ acpi_status
 acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,
 				u32 reg, u64 value, u32 width);
 
-/*
- * Interim function needed for PCI IRQ routing
- */
-void
-acpi_os_derive_pci_id(acpi_handle device,
-		      acpi_handle region, struct acpi_pci_id **pci_id);
-
 /*
  * Miscellaneous
  */
-- 
cgit v1.2.3


From 8f40f171a29d0d2ae1ca8bd4a0c3fc9f514d1e20 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 15 Sep 2010 13:36:55 +0800
Subject: ACPICA: Add ACPI_INLINE configuration parameter

The C inline keyword is not standardized, ACPI_INLINE allows this
to be configured on a per-compiler basis.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/tbfadt.c  | 4 ++--
 include/acpi/platform/acenv.h | 6 ++++++
 include/acpi/platform/acgcc.h | 2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 1728cb9bf600..d2ff4325c427 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -49,7 +49,7 @@
 ACPI_MODULE_NAME("tbfadt")
 
 /* Local prototypes */
-static inline void
+static ACPI_INLINE void
 acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
 			     u8 space_id, u8 byte_width, u64 address);
 
@@ -181,7 +181,7 @@ static struct acpi_fadt_pm_info fadt_pm_info_table[] = {
  *
  ******************************************************************************/
 
-static inline void
+static ACPI_INLINE void
 acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
 			     u8 space_id, u8 byte_width, u64 address)
 {
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index c05aeba9e8f0..a3e334ab1119 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -193,6 +193,12 @@
 #define ACPI_MUTEX_TYPE             ACPI_BINARY_SEMAPHORE
 #endif
 
+/* "inline" keywords - configurable since inline is not standardized */
+
+#ifndef ACPI_INLINE
+#define ACPI_INLINE
+#endif
+
 /*
  * Debugger threading model
  * Use single threaded if the entire subsystem is contained in an application
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 0cd53e3cd1a3..5dcb9537343c 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -44,6 +44,8 @@
 #ifndef __ACGCC_H__
 #define __ACGCC_H__
 
+#define ACPI_INLINE             __inline__
+
 /* Function name is used for debug output. Non-ANSI, compiler-dependent */
 
 #define ACPI_GET_FUNCTION_NAME          __func__
-- 
cgit v1.2.3


From 28eb3fcf8762a3b52f4fef5af29dce50d23c7151 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 15 Sep 2010 13:55:13 +0800
Subject: ACPICA: Make acpi_thread_id no longer configurable, always u64

Change definition of acpi_thread_id to always be a u64. This
simplifies the code, especially any printf output. u64 is
the only common data type for all thread_id types across all
operating systems. We now force the OSL to cast the native
thread_id type to u64 before returning the value to ACPICA
(via acpi_os_get_thread_id).

Signed-off-by: Lin Ming <ming.m.lin@intel.com
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/achware.h   |  2 +-
 drivers/acpi/acpica/aclocal.h   |  2 +-
 drivers/acpi/acpica/dsmethod.c  |  2 +-
 drivers/acpi/acpica/evmisc.c    |  2 +-
 drivers/acpi/acpica/exmutex.c   | 10 +++++-----
 drivers/acpi/acpica/utdebug.c   |  7 +++----
 drivers/acpi/acpica/utmutex.c   | 29 ++++++++++++++++-------------
 include/acpi/actypes.h          | 17 ++++++++++-------
 include/acpi/platform/aclinux.h |  7 +++----
 9 files changed, 41 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index e7c5545d8597..167470ad2d21 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -121,7 +121,6 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 				 struct acpi_gpe_block_info *gpe_block,
 				 void *context);
 
-#ifdef	ACPI_FUTURE_USAGE
 /*
  * hwpci - PCI configuration support
  */
@@ -129,6 +128,7 @@ acpi_status
 acpi_hw_derive_pci_id(struct acpi_pci_id *pci_id,
 		      acpi_handle root_pci_device, acpi_handle pci_region);
 
+#ifdef	ACPI_FUTURE_USAGE
 /*
  * hwtimer - ACPI Timer prototypes
  */
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 53f7512b060f..04ce32272546 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -1001,7 +1001,7 @@ struct acpi_port_info {
 struct acpi_db_method_info {
 	acpi_handle main_thread_gate;
 	acpi_handle thread_complete_gate;
-	u32 *threads;
+	acpi_thread_id *threads;
 	u32 num_threads;
 	u32 num_created;
 	u32 num_completed;
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 64750ee96e20..d94dd8974b55 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -573,7 +573,7 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
 
 				acpi_os_release_mutex(method_desc->method.
 						      mutex->mutex.os_mutex);
-				method_desc->method.mutex->mutex.thread_id = NULL;
+				method_desc->method.mutex->mutex.thread_id = 0;
 			}
 		}
 
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index df0aea9a8cfd..fcaed9fb44ff 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -553,7 +553,7 @@ acpi_status acpi_ev_release_global_lock(void)
 	acpi_gbl_global_lock_acquired = FALSE;
 
 	/* Release the local GL mutex */
-	acpi_ev_global_lock_thread_id = NULL;
+	acpi_ev_global_lock_thread_id = 0;
 	acpi_ev_global_lock_acquired = 0;
 	acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex);
 	return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/exmutex.c b/drivers/acpi/acpica/exmutex.c
index f73be97043c0..6af14e43f839 100644
--- a/drivers/acpi/acpica/exmutex.c
+++ b/drivers/acpi/acpica/exmutex.c
@@ -336,7 +336,7 @@ acpi_status acpi_ex_release_mutex_object(union acpi_operand_object *obj_desc)
 
 	/* Clear mutex info */
 
-	obj_desc->mutex.thread_id = NULL;
+	obj_desc->mutex.thread_id = 0;
 	return_ACPI_STATUS(status);
 }
 
@@ -393,10 +393,10 @@ acpi_ex_release_mutex(union acpi_operand_object *obj_desc,
 	if ((owner_thread->thread_id != walk_state->thread->thread_id) &&
 	    (obj_desc != acpi_gbl_global_lock_mutex)) {
 		ACPI_ERROR((AE_INFO,
-			    "Thread %p cannot release Mutex [%4.4s] acquired by thread %p",
-			    ACPI_CAST_PTR(void, walk_state->thread->thread_id),
+			    "Thread %u cannot release Mutex [%4.4s] acquired by thread %u",
+			    (u32)walk_state->thread->thread_id,
 			    acpi_ut_get_node_name(obj_desc->mutex.node),
-			    ACPI_CAST_PTR(void, owner_thread->thread_id)));
+			    (u32)owner_thread->thread_id));
 		return_ACPI_STATUS(AE_AML_NOT_OWNER);
 	}
 
@@ -488,7 +488,7 @@ void acpi_ex_release_all_mutexes(struct acpi_thread_state *thread)
 		/* Mark mutex unowned */
 
 		obj_desc->mutex.owner_thread = NULL;
-		obj_desc->mutex.thread_id = NULL;
+		obj_desc->mutex.thread_id = 0;
 
 		/* Update Thread sync_level (Last mutex is the important one) */
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 983510640059..f21c486929a5 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -179,9 +179,8 @@ acpi_debug_print(u32 requested_debug_level,
 	if (thread_id != acpi_gbl_prev_thread_id) {
 		if (ACPI_LV_THREADS & acpi_dbg_level) {
 			acpi_os_printf
-			    ("\n**** Context Switch from TID %p to TID %p ****\n\n",
-			     ACPI_CAST_PTR(void, acpi_gbl_prev_thread_id),
-			     ACPI_CAST_PTR(void, thread_id));
+			    ("\n**** Context Switch from TID %u to TID %u ****\n\n",
+			     (u32)acpi_gbl_prev_thread_id, (u32)thread_id);
 		}
 
 		acpi_gbl_prev_thread_id = thread_id;
@@ -194,7 +193,7 @@ acpi_debug_print(u32 requested_debug_level,
 	acpi_os_printf("%8s-%04ld ", module_name, line_number);
 
 	if (ACPI_LV_THREADS & acpi_dbg_level) {
-		acpi_os_printf("[%p] ", ACPI_CAST_PTR(void, thread_id));
+		acpi_os_printf("[%u] ", (u32)thread_id);
 	}
 
 	acpi_os_printf("[%02ld] %-22.22s: ",
diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
index b07b425e2113..d9efa495b433 100644
--- a/drivers/acpi/acpica/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c
@@ -228,18 +228,17 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id)
 			if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) {
 				if (i == mutex_id) {
 					ACPI_ERROR((AE_INFO,
-						    "Mutex [%s] already acquired by this thread [%p]",
+						    "Mutex [%s] already acquired by this thread [%u]",
 						    acpi_ut_get_mutex_name
 						    (mutex_id),
-						    ACPI_CAST_PTR(void,
-								  this_thread_id)));
+						    (u32)this_thread_id));
 
 					return (AE_ALREADY_ACQUIRED);
 				}
 
 				ACPI_ERROR((AE_INFO,
-					    "Invalid acquire order: Thread %p owns [%s], wants [%s]",
-					    ACPI_CAST_PTR(void, this_thread_id),
+					    "Invalid acquire order: Thread %u owns [%s], wants [%s]",
+					    (u32)this_thread_id,
 					    acpi_ut_get_mutex_name(i),
 					    acpi_ut_get_mutex_name(mutex_id)));
 
@@ -250,24 +249,24 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id)
 #endif
 
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX,
-			  "Thread %p attempting to acquire Mutex [%s]\n",
-			  ACPI_CAST_PTR(void, this_thread_id),
+			  "Thread %u attempting to acquire Mutex [%s]\n",
+			  (u32)this_thread_id,
 			  acpi_ut_get_mutex_name(mutex_id)));
 
 	status = acpi_os_acquire_mutex(acpi_gbl_mutex_info[mutex_id].mutex,
 				       ACPI_WAIT_FOREVER);
 	if (ACPI_SUCCESS(status)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_MUTEX,
-				  "Thread %p acquired Mutex [%s]\n",
-				  ACPI_CAST_PTR(void, this_thread_id),
+				  "Thread %u acquired Mutex [%s]\n",
+				  (u32)this_thread_id,
 				  acpi_ut_get_mutex_name(mutex_id)));
 
 		acpi_gbl_mutex_info[mutex_id].use_count++;
 		acpi_gbl_mutex_info[mutex_id].thread_id = this_thread_id;
 	} else {
 		ACPI_EXCEPTION((AE_INFO, status,
-				"Thread %p could not acquire Mutex [0x%X]",
-				ACPI_CAST_PTR(void, this_thread_id), mutex_id));
+				"Thread %u could not acquire Mutex [0x%X]",
+				(u32)this_thread_id, mutex_id));
 	}
 
 	return (status);
@@ -287,10 +286,14 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id)
 
 acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id)
 {
+	acpi_thread_id this_thread_id;
+
 	ACPI_FUNCTION_NAME(ut_release_mutex);
 
-	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Thread %p releasing Mutex [%s]\n",
-			  ACPI_CAST_PTR(void, acpi_os_get_thread_id()),
+	this_thread_id = acpi_os_get_thread_id();
+
+	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Thread %u releasing Mutex [%s]\n",
+			  (u32)this_thread_id,
 			  acpi_ut_get_mutex_name(mutex_id)));
 
 	if (mutex_id > ACPI_MAX_MUTEX) {
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 332d076f3f10..864cfae337f3 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -115,7 +115,6 @@
  *
  * ACPI_SIZE        16/32/64-bit unsigned value
  * ACPI_NATIVE_INT  16/32/64-bit signed value
- *
  */
 
 /*******************************************************************************
@@ -132,6 +131,16 @@ typedef COMPILER_DEPENDENT_INT64 INT64;
 
 /*! [End] no source code translation !*/
 
+/*
+ * Value returned by acpi_os_get_thread_id. There is no standard "thread_id"
+ * across operating systems or even the various UNIX systems. Since ACPICA
+ * only needs the thread ID as a unique thread identifier, we use a u64
+ * as the only common data type - it will accommodate any type of pointer or
+ * any type of integer. It is up to the host-dependent OSL to cast the
+ * native thread ID type to a u64 (in acpi_os_get_thread_id).
+ */
+#define acpi_thread_id                  u64
+
 /*******************************************************************************
  *
  * Types specific to 64-bit targets
@@ -211,12 +220,6 @@ typedef u32 acpi_physical_address;
  *
  ******************************************************************************/
 
-/* Value returned by acpi_os_get_thread_id */
-
-#ifndef acpi_thread_id
-#define acpi_thread_id			acpi_size
-#endif
-
 /* Flags for acpi_os_acquire_lock/acpi_os_release_lock */
 
 #ifndef acpi_cpu_flags
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 103f08aca764..572189e37133 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -75,7 +75,6 @@
 #define acpi_cache_t                        struct kmem_cache
 #define acpi_spinlock                       spinlock_t *
 #define acpi_cpu_flags                      unsigned long
-#define acpi_thread_id                      struct task_struct *
 
 #else /* !__KERNEL__ */
 
@@ -88,7 +87,7 @@
 /* Host-dependent types and defines for user-space ACPICA */
 
 #define ACPI_FLUSH_CPU_CACHE()
-#define acpi_thread_id                      pthread_t
+#define ACPI_CAST_PTHREAD_T(pthread) ((acpi_thread_id) (pthread))
 
 #if defined(__ia64__) || defined(__x86_64__)
 #define ACPI_MACHINE_WIDTH          64
@@ -113,12 +112,13 @@
 
 
 #ifdef __KERNEL__
+#include <acpi/actypes.h>
 /*
  * Overrides for in-kernel ACPICA
  */
 static inline acpi_thread_id acpi_os_get_thread_id(void)
 {
-	return current;
+	return (acpi_thread_id)(unsigned long)current;
 }
 
 /*
@@ -127,7 +127,6 @@ static inline acpi_thread_id acpi_os_get_thread_id(void)
  * However, boot has  (system_state != SYSTEM_RUNNING)
  * to quiet __might_sleep() in kmalloc() and resume does not.
  */
-#include <acpi/actypes.h>
 static inline void *acpi_os_allocate(acpi_size size)
 {
 	return kmalloc(size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
-- 
cgit v1.2.3


From e786db75406b30fa74dea095c571c8c164a2f3dd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 15 Sep 2010 14:00:53 +0800
Subject: ACPICA: Update math module; no functional change

Move the 64-bit overlay structures to the utmath module since
they are used nowhere else. Update module comment. ACPICA BZ 829.

http://www.acpica.org/bugzilla/show_bug.cgi?id=829

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/utmath.c | 23 ++++++++++++++++++++---
 include/acpi/actypes.h       | 10 ----------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utmath.c b/drivers/acpi/acpica/utmath.c
index 35059a14eb72..49cf7b7fd816 100644
--- a/drivers/acpi/acpica/utmath.c
+++ b/drivers/acpi/acpica/utmath.c
@@ -48,11 +48,27 @@
 ACPI_MODULE_NAME("utmath")
 
 /*
- * Support for double-precision integer divide.  This code is included here
- * in order to support kernel environments where the double-precision math
- * library is not available.
+ * Optional support for 64-bit double-precision integer divide. This code
+ * is configurable and is implemented in order to support 32-bit kernel
+ * environments where a 64-bit double-precision math library is not available.
+ *
+ * Support for a more normal 64-bit divide/modulo (with check for a divide-
+ * by-zero) appears after this optional section of code.
  */
 #ifndef ACPI_USE_NATIVE_DIVIDE
+/* Structures used only for 64-bit divide */
+typedef struct uint64_struct {
+	u32 lo;
+	u32 hi;
+
+} uint64_struct;
+
+typedef union uint64_overlay {
+	u64 full;
+	struct uint64_struct part;
+
+} uint64_overlay;
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_short_divide
@@ -69,6 +85,7 @@ ACPI_MODULE_NAME("utmath")
  *              32-bit remainder.
  *
  ******************************************************************************/
+
 acpi_status
 acpi_ut_short_divide(u64 dividend,
 		     u32 divisor, u64 *out_quotient, u32 *out_remainder)
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 864cfae337f3..2b134b691e34 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -378,16 +378,6 @@ typedef void *acpi_handle;	/* Actually a ptr to a NS Node */
 typedef u8 acpi_owner_id;
 #define ACPI_OWNER_ID_MAX               0xFF
 
-struct uint64_struct {
-	u32 lo;
-	u32 hi;
-};
-
-union uint64_overlay {
-	u64 full;
-	struct uint64_struct part;
-};
-
 #define ACPI_INTEGER_BIT_SIZE           64
 #define ACPI_MAX_DECIMAL_DIGITS         20	/* 2^64 = 18,446,744,073,709,551,616 */
 
-- 
cgit v1.2.3


From 31b3d4c3b3c599a2329c3e66ffab7a045b169c85 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 15 Sep 2010 14:02:56 +0800
Subject: ACPICA: Make acpi_gbl_system_awake_and_running publically available

Added extern for this boolean in acpixf.h. Some hosts utilize
this value during suspend/restore operations.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acglobal.h | 2 +-
 include/acpi/acpixf.h          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index fe50f57d4e42..b8dbb9bd6962 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -132,6 +132,7 @@ struct acpi_table_fadt acpi_gbl_FADT;
 u32 acpi_current_gpe_count;
 u32 acpi_gbl_trace_flags;
 acpi_name acpi_gbl_trace_method_name;
+u8 acpi_gbl_system_awake_and_running;
 
 #endif
 
@@ -278,7 +279,6 @@ ACPI_EXTERN u8 acpi_gbl_debugger_configuration;
 ACPI_EXTERN u8 acpi_gbl_step_to_next_call;
 ACPI_EXTERN u8 acpi_gbl_acpi_hardware_present;
 ACPI_EXTERN u8 acpi_gbl_events_initialized;
-ACPI_EXTERN u8 acpi_gbl_system_awake_and_running;
 ACPI_EXTERN u8 acpi_gbl_osi_data;
 ACPI_EXTERN struct acpi_interface_info *acpi_gbl_supported_interfaces;
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index bfa252ba110b..c156e5ec388d 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -72,6 +72,7 @@ extern u8 acpi_gbl_truncate_io_addresses;
 
 extern u32 acpi_current_gpe_count;
 extern struct acpi_table_fadt acpi_gbl_FADT;
+extern u8 acpi_gbl_system_awake_and_running;
 
 extern u32 acpi_rsdt_forced;
 /*
-- 
cgit v1.2.3


From 846b44ad4ed11fe4dc1bddd484dde71c272bcf1b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 17 Sep 2010 08:15:02 +0800
Subject: ACPICA: Update version to 20100915

Version 20100915.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c156e5ec388d..c6bc60403de8 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20100806
+#define ACPI_CA_VERSION                 0x20100915
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 367e5e376922dcf52f92e1db436010fb828d3bfa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Sep 2010 05:36:29 +0000
Subject: neigh: reorder fields in struct neighbour

On 64bit arches, there are two 32bit holes that we can remove.

sizeof(struct neighbour) shrinks from 0xf8 to 0xf0 bytes

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 242879b6c4df..7d08fd1062f0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -94,7 +94,7 @@ struct neighbour {
 	struct neighbour	*next;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
-	struct net_device		*dev;
+	struct net_device	*dev;
 	unsigned long		used;
 	unsigned long		confirmed;
 	unsigned long		updated;
@@ -102,11 +102,11 @@ struct neighbour {
 	__u8			nud_state;
 	__u8			type;
 	__u8			dead;
+	atomic_t		refcnt;
 	atomic_t		probes;
 	rwlock_t		lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
-	atomic_t		refcnt;
 	int			(*output)(struct sk_buff *skb);
 	struct sk_buff_head	arp_queue;
 	struct timer_list	timer;
@@ -163,7 +163,7 @@ struct neigh_table {
 	atomic_t		entries;
 	rwlock_t		lock;
 	unsigned long		last_rand;
-	struct kmem_cache		*kmem_cachep;
+	struct kmem_cache	*kmem_cachep;
 	struct neigh_statistics	__percpu *stats;
 	struct neighbour	**hash_buckets;
 	unsigned int		hash_mask;
-- 
cgit v1.2.3


From 39b4d07aa3583ceefe73622841303a0a3e942ca1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 30 Sep 2010 09:10:26 +0100
Subject: drm: Hold the mutex when dropping the last GEM reference (v2)

In order to be fully threadsafe we need to check that the drm_gem_object
refcount is still 0 after acquiring the mutex in order to call the free
function. Otherwise, we may encounter scenarios like:

Thread A:                                        Thread B:
drm_gem_close
unreference_unlocked
kref_put                                         mutex_lock
...                                              i915_gem_evict
...                                              kref_get -> BUG
...                                              i915_gem_unbind
...                                              kref_put
...                                              i915_gem_object_free
...                                              mutex_unlock
mutex_lock
i915_gem_object_free -> BUG
i915_gem_object_unbind
kfree
mutex_unlock

Note that no driver is currently using the free_unlocked vfunc and it is
scheduled for removal, hasten that process.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30454
Reported-and-Tested-by: Magnus Kessler <Magnus.Kessler@gmx.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c | 22 ----------------------
 include/drm/drmP.h        | 10 ++++++----
 2 files changed, 6 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index f7e61be8430a..5663d2719063 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -462,28 +462,6 @@ drm_gem_object_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
-/**
- * Called after the last reference to the object has been lost.
- * Must be called without holding struct_mutex
- *
- * Frees the object
- */
-void
-drm_gem_object_free_unlocked(struct kref *kref)
-{
-	struct drm_gem_object *obj = (struct drm_gem_object *) kref;
-	struct drm_device *dev = obj->dev;
-
-	if (dev->driver->gem_free_object_unlocked != NULL)
-		dev->driver->gem_free_object_unlocked(obj);
-	else if (dev->driver->gem_free_object != NULL) {
-		mutex_lock(&dev->struct_mutex);
-		dev->driver->gem_free_object(obj);
-		mutex_unlock(&dev->struct_mutex);
-	}
-}
-EXPORT_SYMBOL(drm_gem_object_free_unlocked);
-
 static void drm_gem_object_ref_bug(struct kref *list_kref)
 {
 	BUG();
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 07e4726a4ee0..4c9461a4f9e6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -808,7 +808,6 @@ struct drm_driver {
 	 */
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
-	void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
 
 	/* vga arb irq handler */
 	void (*vgaarb_irq)(struct drm_device *dev, bool state);
@@ -1456,7 +1455,6 @@ int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_release(struct drm_gem_object *obj);
 void drm_gem_object_free(struct kref *kref);
-void drm_gem_object_free_unlocked(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 int drm_gem_object_init(struct drm_device *dev,
@@ -1484,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj)
 static inline void
 drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
 {
-	if (obj != NULL)
-		kref_put(&obj->refcount, drm_gem_object_free_unlocked);
+	if (obj != NULL) {
+		struct drm_device *dev = obj->dev;
+		mutex_lock(&dev->struct_mutex);
+		kref_put(&obj->refcount, drm_gem_object_free);
+		mutex_unlock(&dev->struct_mutex);
+	}
 }
 
 int drm_gem_handle_create(struct drm_file *file_priv,
-- 
cgit v1.2.3


From 73aa808f10effc280e6eb70267314542a7c29426 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 30 Sep 2010 11:46:12 +0100
Subject: drm: Move the GTT accounting to i915

Only drm/i915 does the bookkeeping that makes the information useful,
and the information maintained is driver specific, so move it out of the
core and into its single user.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_debugfs.c       |   1 -
 drivers/gpu/drm/drm_gem.c           |  14 -----
 drivers/gpu/drm/drm_info.c          |  14 -----
 drivers/gpu/drm/drm_proc.c          |   1 -
 drivers/gpu/drm/i915/i915_debugfs.c |  26 +++++++++
 drivers/gpu/drm/i915/i915_drv.h     |   9 +++
 drivers/gpu/drm/i915/i915_gem.c     | 110 +++++++++++++++++++++++++-----------
 include/drm/drmP.h                  |   8 ---
 8 files changed, 111 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 677b275fa721..9d8c892d07c9 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -48,7 +48,6 @@ static struct drm_info_list drm_debugfs_list[] = {
 	{"queues", drm_queues_info, 0},
 	{"bufs", drm_bufs_info, 0},
 	{"gem_names", drm_gem_name_info, DRIVER_GEM},
-	{"gem_objects", drm_gem_object_info, DRIVER_GEM},
 #if DRM_DEBUG_CODE
 	{"vma", drm_vma_info, 0},
 #endif
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index cff7317d3830..3ea0692ce59a 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -92,12 +92,6 @@ drm_gem_init(struct drm_device *dev)
 
 	spin_lock_init(&dev->object_name_lock);
 	idr_init(&dev->object_name_idr);
-	atomic_set(&dev->object_count, 0);
-	atomic_set(&dev->object_memory, 0);
-	atomic_set(&dev->pin_count, 0);
-	atomic_set(&dev->pin_memory, 0);
-	atomic_set(&dev->gtt_count, 0);
-	atomic_set(&dev->gtt_memory, 0);
 
 	mm = kzalloc(sizeof(struct drm_gem_mm), GFP_KERNEL);
 	if (!mm) {
@@ -151,9 +145,6 @@ int drm_gem_object_init(struct drm_device *dev,
 	kref_init(&obj->handlecount);
 	obj->size = size;
 
-	atomic_inc(&dev->object_count);
-	atomic_add(obj->size, &dev->object_memory);
-
 	return 0;
 }
 EXPORT_SYMBOL(drm_gem_object_init);
@@ -180,8 +171,6 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
 	return obj;
 fput:
 	/* Object_init mangles the global counters - readjust them. */
-	atomic_dec(&dev->object_count);
-	atomic_sub(obj->size, &dev->object_memory);
 	fput(obj->filp);
 free:
 	kfree(obj);
@@ -436,10 +425,7 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private)
 void
 drm_gem_object_release(struct drm_gem_object *obj)
 {
-	struct drm_device *dev = obj->dev;
 	fput(obj->filp);
-	atomic_dec(&dev->object_count);
-	atomic_sub(obj->size, &dev->object_memory);
 }
 EXPORT_SYMBOL(drm_gem_object_release);
 
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 2ef2c7827243..5aff08e236cf 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -270,20 +270,6 @@ int drm_gem_name_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-int drm_gem_object_info(struct seq_file *m, void* data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-
-	seq_printf(m, "%d objects\n", atomic_read(&dev->object_count));
-	seq_printf(m, "%d object bytes\n", atomic_read(&dev->object_memory));
-	seq_printf(m, "%d pinned\n", atomic_read(&dev->pin_count));
-	seq_printf(m, "%d pin bytes\n", atomic_read(&dev->pin_memory));
-	seq_printf(m, "%d gtt bytes\n", atomic_read(&dev->gtt_memory));
-	seq_printf(m, "%d gtt total\n", dev->gtt_total);
-	return 0;
-}
-
 #if DRM_DEBUG_CODE
 
 int drm_vma_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index e571de536dc5..9e5b07efebb7 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -55,7 +55,6 @@ static struct drm_info_list drm_proc_list[] = {
 	{"queues", drm_queues_info, 0},
 	{"bufs", drm_bufs_info, 0},
 	{"gem_names", drm_gem_name_info, DRIVER_GEM},
-	{"gem_objects", drm_gem_object_info, DRIVER_GEM},
 #if DRM_DEBUG_CODE
 	{"vma", drm_vma_info, 0},
 #endif
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0d9bbd595ff8..d598070fb279 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -188,6 +188,31 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gem_object_info(struct seq_file *m, void* data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(m, "%u objects\n", dev_priv->mm.object_count);
+	seq_printf(m, "%zu object bytes\n", dev_priv->mm.object_memory);
+	seq_printf(m, "%u pinned\n", dev_priv->mm.pin_count);
+	seq_printf(m, "%zu pin bytes\n", dev_priv->mm.pin_memory);
+	seq_printf(m, "%u objects in gtt\n", dev_priv->mm.gtt_count);
+	seq_printf(m, "%zu gtt bytes\n", dev_priv->mm.gtt_memory);
+	seq_printf(m, "%zu gtt total\n", dev_priv->mm.gtt_total);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+
 static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -994,6 +1019,7 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
 
 static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0, 0},
+	{"i915_gem_objects", i915_gem_object_info, 0},
 	{"i915_gem_render_active", i915_gem_object_list_info, 0, (void *) RENDER_LIST},
 	{"i915_gem_bsd_active", i915_gem_object_list_info, 0, (void *) BSD_LIST},
 	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66acc7c3bb03..7cfbc0fbd952 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -642,6 +642,15 @@ typedef struct drm_i915_private {
 		struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 
 		uint32_t flush_rings;
+
+		/* accounting, useful for userland debugging */
+		size_t object_memory;
+		size_t pin_memory;
+		size_t gtt_memory;
+		size_t gtt_total;
+		u32 object_count;
+		u32 pin_count;
+		u32 gtt_count;
 	} mm;
 	struct sdvo_device_mapping sdvo_mappings[2];
 	/* indicate whether the LVDS_BORDER should be enabled or not */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 302beee03197..16c4b7b9602c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -68,6 +68,49 @@ i915_gem_object_put_pages(struct drm_gem_object *obj);
 static LIST_HEAD(shrink_list);
 static DEFINE_SPINLOCK(shrink_list_lock);
 
+/* some bookkeeping */
+static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
+				  size_t size)
+{
+	dev_priv->mm.object_count++;
+	dev_priv->mm.object_memory += size;
+}
+
+static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
+				     size_t size)
+{
+	dev_priv->mm.object_count--;
+	dev_priv->mm.object_memory -= size;
+}
+
+static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
+				  size_t size)
+{
+	dev_priv->mm.gtt_count++;
+	dev_priv->mm.gtt_memory += size;
+}
+
+static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
+				     size_t size)
+{
+	dev_priv->mm.gtt_count--;
+	dev_priv->mm.gtt_memory -= size;
+}
+
+static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
+				  size_t size)
+{
+	dev_priv->mm.pin_count++;
+	dev_priv->mm.pin_memory += size;
+}
+
+static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
+				     size_t size)
+{
+	dev_priv->mm.pin_count--;
+	dev_priv->mm.pin_memory -= size;
+}
+
 int
 i915_gem_check_is_wedged(struct drm_device *dev)
 {
@@ -128,7 +171,8 @@ i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
 		obj_priv->pin_count == 0;
 }
 
-int i915_gem_do_init(struct drm_device *dev, unsigned long start,
+int i915_gem_do_init(struct drm_device *dev,
+		     unsigned long start,
 		     unsigned long end)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -142,7 +186,7 @@ int i915_gem_do_init(struct drm_device *dev, unsigned long start,
 	drm_mm_init(&dev_priv->mm.gtt_space, start,
 		    end - start);
 
-	dev->gtt_total = (uint32_t) (end - start);
+	dev_priv->mm.gtt_total = end - start;
 
 	return 0;
 }
@@ -165,14 +209,16 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_get_aperture *args = data;
 
 	if (!(dev->driver->driver_features & DRIVER_GEM))
 		return -ENODEV;
 
-	args->aper_size = dev->gtt_total;
-	args->aper_available_size = (args->aper_size -
-				     atomic_read(&dev->pin_memory));
+	mutex_lock(&dev->struct_mutex);
+	args->aper_size = dev_priv->mm.gtt_total;
+	args->aper_available_size = args->aper_size - dev_priv->mm.pin_memory;
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -2084,6 +2130,7 @@ int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	int ret = 0;
 
@@ -2116,25 +2163,18 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
 		i915_gem_clear_fence_reg(obj);
 
-	if (obj_priv->agp_mem != NULL) {
-		drm_unbind_agp(obj_priv->agp_mem);
-		drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
-		obj_priv->agp_mem = NULL;
-	}
+	drm_unbind_agp(obj_priv->agp_mem);
+	drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
 
 	i915_gem_object_put_pages(obj);
 	BUG_ON(obj_priv->pages_refcount);
 
-	if (obj_priv->gtt_space) {
-		atomic_dec(&dev->gtt_count);
-		atomic_sub(obj->size, &dev->gtt_memory);
-
-		drm_mm_put_block(obj_priv->gtt_space);
-		obj_priv->gtt_space = NULL;
-	}
-
+	i915_gem_info_remove_gtt(dev_priv, obj->size);
 	list_del_init(&obj_priv->list);
 
+	drm_mm_put_block(obj_priv->gtt_space);
+	obj_priv->gtt_space = NULL;
+
 	if (i915_gem_object_is_purgeable(obj_priv))
 		i915_gem_object_truncate(obj);
 
@@ -2619,7 +2659,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	/* If the object is bigger than the entire aperture, reject it early
 	 * before evicting everything in a vain attempt to find space.
 	 */
-	if (obj->size > dev->gtt_total) {
+	if (obj->size > dev_priv->mm.gtt_total) {
 		DRM_ERROR("Attempting to bind an object larger than the aperture\n");
 		return -E2BIG;
 	}
@@ -2688,11 +2728,10 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 
 		goto search_free;
 	}
-	atomic_inc(&dev->gtt_count);
-	atomic_add(obj->size, &dev->gtt_memory);
 
 	/* keep track of bounds object by adding it to the inactive list */
 	list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+	i915_gem_info_add_gtt(dev_priv, obj->size);
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -3779,15 +3818,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 					  pinned+1, args->buffer_count,
 					  total_size, num_fences,
 					  ret);
-				DRM_ERROR("%d objects [%d pinned], "
-					  "%d object bytes [%d pinned], "
-					  "%d/%d gtt bytes\n",
-					  atomic_read(&dev->object_count),
-					  atomic_read(&dev->pin_count),
-					  atomic_read(&dev->object_memory),
-					  atomic_read(&dev->pin_memory),
-					  atomic_read(&dev->gtt_memory),
-					  dev->gtt_total);
+				DRM_ERROR("%u objects [%u pinned, %u GTT], "
+					  "%zu object bytes [%zu pinned], "
+					  "%zu /%zu gtt bytes\n",
+					  dev_priv->mm.object_count,
+					  dev_priv->mm.pin_count,
+					  dev_priv->mm.gtt_count,
+					  dev_priv->mm.object_memory,
+					  dev_priv->mm.pin_memory,
+					  dev_priv->mm.gtt_memory,
+					  dev_priv->mm.gtt_total);
 			}
 			goto err;
 		}
@@ -4119,8 +4159,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 	 * remove it from the inactive list
 	 */
 	if (obj_priv->pin_count == 1) {
-		atomic_inc(&dev->pin_count);
-		atomic_add(obj->size, &dev->pin_memory);
+		i915_gem_info_add_pin(dev_priv, obj->size);
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->list,
 				       &dev_priv->mm.pinned_list);
@@ -4150,8 +4189,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->list,
 				       &dev_priv->mm.inactive_list);
-		atomic_dec(&dev->pin_count);
-		atomic_sub(obj->size, &dev->pin_memory);
+		i915_gem_info_remove_pin(dev_priv, obj->size);
 	}
 	WARN_ON(i915_verify_lists(dev));
 }
@@ -4378,6 +4416,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
 					      size_t size)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
@@ -4389,6 +4428,8 @@ struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
 		return NULL;
 	}
 
+	i915_gem_info_add_obj(dev_priv, size);
+
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
@@ -4429,6 +4470,7 @@ static void i915_gem_free_object_tail(struct drm_gem_object *obj)
 		i915_gem_free_mmap_offset(obj);
 
 	drm_gem_object_release(obj);
+	i915_gem_info_remove_obj(dev_priv, obj->size);
 
 	kfree(obj_priv->page_cpu_valid);
 	kfree(obj_priv->bit_17);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 30e827aeba02..bb5c41893c00 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1041,13 +1041,6 @@ struct drm_device {
 	/*@{ */
 	spinlock_t object_name_lock;
 	struct idr object_name_idr;
-	atomic_t object_count;
-	atomic_t object_memory;
-	atomic_t pin_count;
-	atomic_t pin_memory;
-	atomic_t gtt_count;
-	atomic_t gtt_memory;
-	uint32_t gtt_total;
 	uint32_t invalidate_domains;    /* domains pending invalidation */
 	uint32_t flush_domains;         /* domains pending flush */
 	/*@} */
@@ -1378,7 +1371,6 @@ extern int drm_bufs_info(struct seq_file *m, void *data);
 extern int drm_vblank_info(struct seq_file *m, void *data);
 extern int drm_clients_info(struct seq_file *m, void* data);
 extern int drm_gem_name_info(struct seq_file *m, void *data);
-extern int drm_gem_object_info(struct seq_file *m, void* data);
 
 #if DRM_DEBUG_CODE
 extern int drm_vma_info(struct seq_file *m, void *data);
-- 
cgit v1.2.3


From bd1722d4316e42a12fe6337ebe34d7e1e2c088b2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:02:43 +0400
Subject: sunrpc: Factor out rpc_xprt allocation

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xprt.h     |  1 +
 net/sunrpc/xprt.c               | 22 ++++++++++++++++++++++
 net/sunrpc/xprtrdma/transport.c | 13 ++-----------
 net/sunrpc/xprtsock.c           | 15 +++------------
 4 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ff5a77b28c50..00f6e3fe2900 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -280,6 +280,7 @@ void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
 struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
+struct rpc_xprt *	xprt_alloc(int size, int max_req);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..26cbe219388b 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -962,6 +962,28 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 	spin_unlock(&xprt->reserve_lock);
 }
 
+struct rpc_xprt *xprt_alloc(int size, int max_req)
+{
+	struct rpc_xprt *xprt;
+
+	xprt = kzalloc(size, GFP_KERNEL);
+	if (xprt == NULL)
+		goto out;
+
+	xprt->max_reqs = max_req;
+	xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
+	if (xprt->slot == NULL)
+		goto out_free;
+
+	return xprt;
+
+out_free:
+	kfree(xprt);
+out:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xprt_alloc);
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..9d77bf25829f 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -285,23 +285,14 @@ xprt_setup_rdma(struct xprt_create *args)
 		return ERR_PTR(-EBADF);
 	}
 
-	xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+	xprt = xprt_alloc(sizeof(struct rpcrdma_xprt),
+			xprt_rdma_slot_table_entries);
 	if (xprt == NULL) {
 		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
 			__func__);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	xprt->max_reqs = xprt_rdma_slot_table_entries;
-	xprt->slot = kcalloc(xprt->max_reqs,
-				sizeof(struct rpc_rqst), GFP_KERNEL);
-	if (xprt->slot == NULL) {
-		dprintk("RPC:       %s: couldn't allocate %d slots\n",
-			__func__, xprt->max_reqs);
-		kfree(xprt);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	/* 60 second timeout, no retries */
 	xprt->timeout = &xprt_rdma_default_timeout;
 	xprt->bind_timeout = (60U * HZ);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b6309db56226..a7a763821b88 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2273,23 +2273,14 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 		return ERR_PTR(-EBADF);
 	}
 
-	new = kzalloc(sizeof(*new), GFP_KERNEL);
-	if (new == NULL) {
+	xprt = xprt_alloc(sizeof(*new), slot_table_size);
+	if (xprt == NULL) {
 		dprintk("RPC:       xs_setup_xprt: couldn't allocate "
 				"rpc_xprt\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	xprt = &new->xprt;
-
-	xprt->max_reqs = slot_table_size;
-	xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
-	if (xprt->slot == NULL) {
-		kfree(xprt);
-		dprintk("RPC:       xs_setup_xprt: couldn't allocate slot "
-				"table\n");
-		return ERR_PTR(-ENOMEM);
-	}
 
+	new = container_of(xprt, struct sock_xprt, xprt);
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
 	xprt->addrlen = args->addrlen;
 	if (args->srcaddr)
-- 
cgit v1.2.3


From e204e621b4160c802315bc2d0fa335337c0d62e8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:03:13 +0400
Subject: sunrpc: Factor out rpc_xprt freeing

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xprt.h     |  1 +
 net/sunrpc/xprt.c               |  7 +++++++
 net/sunrpc/xprtrdma/transport.c |  7 ++-----
 net/sunrpc/xprtsock.c           | 12 ++++--------
 4 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 00f6e3fe2900..af4b560f0794 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -281,6 +281,7 @@ void			xprt_release(struct rpc_task *task);
 struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
 struct rpc_xprt *	xprt_alloc(int size, int max_req);
+void			xprt_free(struct rpc_xprt *);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 26cbe219388b..0637340e5342 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -984,6 +984,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(xprt_alloc);
 
+void xprt_free(struct rpc_xprt *xprt)
+{
+	kfree(xprt->slot);
+	kfree(xprt);
+}
+EXPORT_SYMBOL_GPL(xprt_free);
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9d77bf25829f..0f7a1b9d05ad 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -251,9 +251,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
 
 	xprt_rdma_free_addresses(xprt);
 
-	kfree(xprt->slot);
-	xprt->slot = NULL;
-	kfree(xprt);
+	xprt_free(xprt);
 
 	dprintk("RPC:       %s: returning\n", __func__);
 
@@ -401,8 +399,7 @@ out3:
 out2:
 	rpcrdma_ia_close(&new_xprt->rx_ia);
 out1:
-	kfree(xprt->slot);
-	kfree(xprt);
+	xprt_free(xprt);
 	return ERR_PTR(rc);
 }
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a7a763821b88..b1e36ec6fd80 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	xs_close(xprt);
 	xs_free_peer_addresses(xprt);
-	kfree(xprt->slot);
-	kfree(xprt);
+	xprt_free(xprt);
 	module_put(THIS_MODULE);
 }
 
@@ -2362,8 +2361,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 		return xprt;
 	ret = ERR_PTR(-EINVAL);
 out_err:
-	kfree(xprt->slot);
-	kfree(xprt);
+	xprt_free(xprt);
 	return ret;
 }
 
@@ -2438,8 +2436,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 		return xprt;
 	ret = ERR_PTR(-EINVAL);
 out_err:
-	kfree(xprt->slot);
-	kfree(xprt);
+	xprt_free(xprt);
 	return ret;
 }
 
@@ -2519,8 +2516,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 		return xprt;
 	ret = ERR_PTR(-EINVAL);
 out_err:
-	kfree(xprt->slot);
-	kfree(xprt);
+	xprt_free(xprt);
 	return ret;
 }
 
-- 
cgit v1.2.3


From fc5d00b04a3a58cac8620403dfe9f43f72578ec1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:03:50 +0400
Subject: sunrpc: Add net argument to svc_create_xprt

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c                  | 2 +-
 fs/nfs/callback.c               | 4 ++--
 fs/nfsd/nfsctl.c                | 4 ++--
 fs/nfsd/nfssvc.c                | 5 +++--
 include/linux/sunrpc/svc_xprt.h | 4 ++--
 net/sunrpc/svc_xprt.c           | 4 ++--
 6 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..b13aabc12298 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
 
 	xprt = svc_find_xprt(serv, name, family, 0);
 	if (xprt == NULL)
-		return svc_create_xprt(serv, name, family, port,
+		return svc_create_xprt(serv, name, &init_net, family, port,
 						SVC_SOCK_DEFAULTS);
 	svc_xprt_put(xprt);
 	return 0;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
 {
 	int ret;
 
-	ret = svc_create_xprt(serv, "tcp", PF_INET,
+	ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
 				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
 	if (ret <= 0)
 		goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
 	dprintk("NFS: Callback listener port = %u (af %u)\n",
 			nfs_callback_tcpport, PF_INET);
 
-	ret = svc_create_xprt(serv, "tcp", PF_INET6,
+	ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
 				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
 	if (ret > 0) {
 		nfs_callback_tcpport6 = ret;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b6e192d25633..b81da24b768c 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1015,12 +1015,12 @@ static ssize_t __write_ports_addxprt(char *buf)
 	if (err != 0)
 		return err;
 
-	err = svc_create_xprt(nfsd_serv, transport,
+	err = svc_create_xprt(nfsd_serv, transport, &init_net,
 				PF_INET, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_create_xprt(nfsd_serv, transport,
+	err = svc_create_xprt(nfsd_serv, transport, &init_net,
 				PF_INET6, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include <net/net_namespace.h>
 #include "nfsd.h"
 #include "cache.h"
 #include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
 	if (!list_empty(&nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
+	error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
 
-	error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
+	error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
 					SVC_SOCK_DEFAULTS);
 	if (error < 0)
 		return error;
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index e50e3eca1c7c..646263cf815d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -74,8 +74,8 @@ int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
-int	svc_create_xprt(struct svc_serv *, const char *, const int,
-			const unsigned short, int);
+int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
+			const int, const unsigned short, int);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f7e8915051b1..d80789a37d88 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -204,8 +204,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 }
 
 int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
-		    const int family, const unsigned short port,
-		    int flags)
+		    struct net *net, const int family,
+		    const unsigned short port, int flags)
 {
 	struct svc_xprt_class *xcl;
 
-- 
cgit v1.2.3


From 62832c039eab9d03cd28a66427ce8276988f28b0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:04:18 +0400
Subject: sunrpc: Pull net argument downto svc_create_socket

After this the socket creation in it knows the context.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          |  1 +
 net/sunrpc/svc_xprt.c                    |  5 +++--
 net/sunrpc/svcsock.c                     | 10 +++++++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 ++
 4 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 646263cf815d..bb182979569e 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -12,6 +12,7 @@
 
 struct svc_xprt_ops {
 	struct svc_xprt	*(*xpo_create)(struct svc_serv *,
+				       struct net *net,
 				       struct sockaddr *, int,
 				       int);
 	struct svc_xprt	*(*xpo_accept)(struct svc_xprt *);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d80789a37d88..678b6ee4da7b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -166,6 +166,7 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
 
 static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 					 struct svc_serv *serv,
+					 struct net *net,
 					 const int family,
 					 const unsigned short port,
 					 int flags)
@@ -200,7 +201,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
 
-	return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+	return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
 }
 
 int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
@@ -221,7 +222,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 			goto err;
 
 		spin_unlock(&svc_xprt_class_lock);
-		newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
+		newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
 		if (IS_ERR(newxprt)) {
 			module_put(xcl->xcl_owner);
 			return PTR_ERR(newxprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..559338527f47 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void		svc_tcp_sock_detach(struct svc_xprt *);
 static void		svc_sock_free(struct svc_xprt *);
 
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
-					  struct sockaddr *, int, int);
+					  struct net *, struct sockaddr *,
+					  int, int);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
 }
 
 static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
+				       struct net *net,
 				       struct sockaddr *sa, int salen,
 				       int flags)
 {
-	return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
+	return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
 }
 
 static struct svc_xprt_ops svc_udp_ops = {
@@ -1178,10 +1180,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 }
 
 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
+				       struct net *net,
 				       struct sockaddr *sa, int salen,
 				       int flags)
 {
-	return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
+	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
 static struct svc_xprt_ops svc_tcp_ops = {
@@ -1385,6 +1388,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
  */
 static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
 					  int protocol,
+					  struct net *net,
 					  struct sockaddr *sin, int len,
 					  int flags)
 {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..950a206600c0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -52,6 +52,7 @@
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
+					struct net *net,
 					struct sockaddr *sa, int salen,
 					int flags);
 static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -670,6 +671,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
  * Create a listening RDMA service endpoint.
  */
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
+					struct net *net,
 					struct sockaddr *sa, int salen,
 					int flags)
 {
-- 
cgit v1.2.3


From c653ce3f0aee9bb2b221ebf3579385c06f81efcd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:04:45 +0400
Subject: sunrpc: Add net to rpc_create_args

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/host.c             | 1 +
 fs/lockd/mon.c              | 1 +
 fs/nfs/client.c             | 1 +
 fs/nfs/mount_clnt.c         | 2 ++
 fs/nfsd/nfs4callback.c      | 1 +
 include/linux/sunrpc/clnt.h | 1 +
 net/sunrpc/rpcb_clnt.c      | 2 ++
 7 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
 			.to_retries	= 5U,
 		};
 		struct rpc_create_args args = {
+			.net		= &init_net,
 			.protocol	= host->h_proto,
 			.address	= nlm_addr(host),
 			.addrsize	= host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
 		.sin_addr.s_addr	= htonl(INADDR_LOOPBACK),
 	};
 	struct rpc_create_args args = {
+		.net			= &init_net,
 		.protocol		= XPRT_TRANSPORT_UDP,
 		.address		= (struct sockaddr *)&sin,
 		.addrsize		= sizeof(sin),
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e7340729af89..351b71187b38 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -601,6 +601,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
 {
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= clp->cl_proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 59047f8d7d72..4b472038342b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
 		.rpc_resp	= &result,
 	};
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= info->protocol,
 		.address	= info->sap,
 		.addrsize	= info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
 		.to_retries = 2,
 	};
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= IPPROTO_UDP,
 		.address	= info->sap,
 		.addrsize	= info->salen,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 014482c4e57d..1112f451295a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -479,6 +479,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
 		.to_retries	= 0,
 	};
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= XPRT_TRANSPORT_TCP,
 		.address	= (struct sockaddr *) &cb->cb_addr,
 		.addrsize	= cb->cb_addrlen,
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 85f38a63f098..58c4473f899a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -102,6 +102,7 @@ struct rpc_procinfo {
 #ifdef __KERNEL__
 
 struct rpc_create_args {
+	struct net		*net;
 	int			protocol;
 	struct sockaddr		*address;
 	size_t			addrsize;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..83af38df3267 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
 static int rpcb_create_local(void)
 {
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= XPRT_TRANSPORT_TCP,
 		.address	= (struct sockaddr *)&rpcb_inaddr_loopback,
 		.addrsize	= sizeof(rpcb_inaddr_loopback),
@@ -228,6 +229,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 				    size_t salen, int proto, u32 version)
 {
 	struct rpc_create_args args = {
+		.net		= &init_net,
 		.protocol	= proto,
 		.address	= srvaddr,
 		.addrsize	= salen,
-- 
cgit v1.2.3


From 9a23e332ec621d36e52cc7a978abc0917067b1aa Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:05:12 +0400
Subject: sunrpc: Add net to xprt_create

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/clnt.c           | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index af4b560f0794..c4f931597d0e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -249,6 +249,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
 
 struct xprt_create {
 	int			ident;		/* XPRT_TRANSPORT identifier */
+	struct net *		net;
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..f4bbd830a4f3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	struct rpc_xprt *xprt;
 	struct rpc_clnt *clnt;
 	struct xprt_create xprtargs = {
+		.net = args->net,
 		.ident = args->protocol,
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
-- 
cgit v1.2.3


From 37aa2133731d9231eb834f700119f0d3f1ed2664 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:05:43 +0400
Subject: sunrpc: Tag rpc_xprt with net

The net is known from the xprt_create and this tagging will also
give un the context in the conntection workers where real sockets
are created.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xprt.h     | 3 ++-
 net/sunrpc/xprt.c               | 4 +++-
 net/sunrpc/xprtrdma/transport.c | 2 +-
 net/sunrpc/xprtsock.c           | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c4f931597d0e..89d10d279a20 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -224,6 +224,7 @@ struct rpc_xprt {
 					bklog_u;	/* backlog queue utilization */
 	} stat;
 
+	struct net		*xprt_net;
 	const char		*address_strings[RPC_DISPLAY_MAX];
 };
 
@@ -281,7 +282,7 @@ void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
 struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
-struct rpc_xprt *	xprt_alloc(int size, int max_req);
+struct rpc_xprt *	xprt_alloc(struct net *net, int size, int max_req);
 void			xprt_free(struct rpc_xprt *);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 0637340e5342..953206d8c6c2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -962,7 +962,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 	spin_unlock(&xprt->reserve_lock);
 }
 
-struct rpc_xprt *xprt_alloc(int size, int max_req)
+struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
 {
 	struct rpc_xprt *xprt;
 
@@ -975,6 +975,7 @@ struct rpc_xprt *xprt_alloc(int size, int max_req)
 	if (xprt->slot == NULL)
 		goto out_free;
 
+	xprt->xprt_net = get_net(net);
 	return xprt;
 
 out_free:
@@ -986,6 +987,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
 
 void xprt_free(struct rpc_xprt *xprt)
 {
+	put_net(xprt->xprt_net);
 	kfree(xprt->slot);
 	kfree(xprt);
 }
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0f7a1b9d05ad..2da32b40bfcf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -283,7 +283,7 @@ xprt_setup_rdma(struct xprt_create *args)
 		return ERR_PTR(-EBADF);
 	}
 
-	xprt = xprt_alloc(sizeof(struct rpcrdma_xprt),
+	xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
 			xprt_rdma_slot_table_entries);
 	if (xprt == NULL) {
 		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b1e36ec6fd80..4ef3a6a9445c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2272,7 +2272,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 		return ERR_PTR(-EBADF);
 	}
 
-	xprt = xprt_alloc(sizeof(*new), slot_table_size);
+	xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
 	if (xprt == NULL) {
 		dprintk("RPC:       xs_setup_xprt: couldn't allocate "
 				"rpc_xprt\n");
-- 
cgit v1.2.3


From 721db93a55dad71bb89e7d11cc6be1f180ec3f2d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 29 Sep 2010 16:06:32 +0400
Subject: net: Export __sock_create

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/net.h | 2 ++
 net/socket.c        | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index dee0b11a8759..16faa130088c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -229,6 +229,8 @@ enum {
 extern int	     sock_wake_async(struct socket *sk, int how, int band);
 extern int	     sock_register(const struct net_proto_family *fam);
 extern void	     sock_unregister(int family);
+extern int	     __sock_create(struct net *net, int family, int type, int proto,
+				 struct socket **res, int kern);
 extern int	     sock_create(int family, int type, int proto,
 				 struct socket **res);
 extern int	     sock_create_kern(int family, int type, int proto,
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..0c37b0037b97 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1144,7 +1144,7 @@ call_kill:
 }
 EXPORT_SYMBOL(sock_wake_async);
 
-static int __sock_create(struct net *net, int family, int type, int protocol,
+int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
 {
 	int err;
@@ -1256,6 +1256,7 @@ out_release:
 	rcu_read_unlock();
 	goto out_sock_release;
 }
+EXPORT_SYMBOL(__sock_create);
 
 int sock_create(int family, int type, int protocol, struct socket **res)
 {
-- 
cgit v1.2.3


From 1e7af1b8062598a038c04dfaaabd038a0d6e8b6a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 1 Oct 2010 15:40:01 -0400
Subject: nfsd4: remove spkm3

Unfortunately, spkm3 never got very far; while interoperability with one
other implementation was demonstrated at some point, problems were found
with the spec that were deemed not worth fixing.

The kernel code is useless on its own without nfs-utils patches which
were never merged into nfs-utils, and were only ever available from
citi.umich.edu.  They appear not to have been updated since 2005.

Therefore it seems safe to assume that this code has no users, and never
will.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/gss_spkm3.h       |  55 -------
 net/sunrpc/Kconfig                     |  19 ---
 net/sunrpc/auth_gss/Makefile           |   5 -
 net/sunrpc/auth_gss/gss_spkm3_mech.c   | 247 ------------------------------
 net/sunrpc/auth_gss/gss_spkm3_seal.c   | 186 -----------------------
 net/sunrpc/auth_gss/gss_spkm3_token.c  | 267 ---------------------------------
 net/sunrpc/auth_gss/gss_spkm3_unseal.c | 127 ----------------
 7 files changed, 906 deletions(-)
 delete mode 100644 include/linux/sunrpc/gss_spkm3.h
 delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_mech.c
 delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_seal.c
 delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_token.c
 delete mode 100644 net/sunrpc/auth_gss/gss_spkm3_unseal.c

(limited to 'include')

diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
deleted file mode 100644
index e3e6a3437f8b..000000000000
--- a/include/linux/sunrpc/gss_spkm3.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  linux/include/linux/sunrpc/gss_spkm3.h
- *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson   <andros@umich.edu>
- */
-
-#include <linux/sunrpc/auth_gss.h>
-#include <linux/sunrpc/gss_err.h>
-#include <linux/sunrpc/gss_asn1.h>
-
-struct spkm3_ctx {
-	struct xdr_netobj	ctx_id;  /* per message context id */
-	int			endtime; /* endtime of the context */
-	struct xdr_netobj	mech_used;
-	unsigned int		ret_flags ;
-	struct xdr_netobj	conf_alg;
-	struct xdr_netobj	derived_conf_key;
-	struct xdr_netobj	intg_alg;
-	struct xdr_netobj 	derived_integ_key;
-};
-
-/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */
-extern const struct xdr_netobj hmac_md5_oid;
-extern const struct xdr_netobj cast5_cbc_oid;
-
-/* SPKM InnerContext Token types */
-
-#define SPKM_ERROR_TOK	3
-#define SPKM_MIC_TOK	4
-#define SPKM_WRAP_TOK	5
-#define SPKM_DEL_TOK	6
-
-u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype);
-
-u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype);
-
-#define CKSUMTYPE_RSA_MD5            0x0007
-#define CKSUMTYPE_HMAC_MD5           0x0008
-
-s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
-		unsigned int hdrlen, struct xdr_buf *body,
-		unsigned int body_offset, struct xdr_netobj *cksum);
-void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits);
-int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen,
-                   int explen);
-void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, 
-                   unsigned char *ctxhdr, int elen, int zbit);
-void spkm3_make_mic_token(unsigned  char **tokp, int toklen, 
-                   struct xdr_netobj *mic_hdr,
-                   struct xdr_netobj *md5cksum, int md5elen, int md5zbit);
-u32 spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, 
-                   unsigned char **cksum);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
 	  Kerberos support should be installed.
 
 	  If unsure, say Y.
-
-config RPCSEC_GSS_SPKM3
-	tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
-	depends on SUNRPC && EXPERIMENTAL
-	select SUNRPC_GSS
-	select CRYPTO
-	select CRYPTO_MD5
-	select CRYPTO_DES
-	select CRYPTO_CAST5
-	select CRYPTO_CBC
-	help
-	  Choose Y here to enable Secure RPC using the SPKM3 public key
-	  GSS-API mechanism (RFC 2025).
-
-	  Secure RPC calls with SPKM3 require an auxiliary userspace
-	  daemon which may be found in the Linux nfs-utils package
-	  available from http://linux-nfs.org/.
-
-	  If unsure, say N.
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
 	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
-
-obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
-
-rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
-	gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_spkm3_mech.c
- *
- *  Copyright (c) 2003 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson <andros@umich.edu>
- *  J. Bruce Fields <bfields@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/sunrpc/auth.h>
-#include <linux/in.h>
-#include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY	RPCDBG_AUTH
-#endif
-
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
-	const void *q = (const void *)((const char *)p + len);
-	if (unlikely(q > end || q < p))
-		return ERR_PTR(-EFAULT);
-	memcpy(res, p, len);
-	return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
-	const void *q;
-	unsigned int len;
-	p = simple_get_bytes(p, end, &len, sizeof(len));
-	if (IS_ERR(p))
-		return p;
-	res->len = len;
-	if (len == 0) {
-		res->data = NULL;
-		return p;
-	}
-	q = (const void *)((const char *)p + len);
-	if (unlikely(q > end || q < p))
-		return ERR_PTR(-EFAULT);
-	res->data = kmemdup(p, len, GFP_NOFS);
-	if (unlikely(res->data == NULL))
-		return ERR_PTR(-ENOMEM);
-	return q;
-}
-
-static int
-gss_import_sec_context_spkm3(const void *p, size_t len,
-				struct gss_ctx *ctx_id,
-				gfp_t gfp_mask)
-{
-	const void *end = (const void *)((const char *)p + len);
-	struct	spkm3_ctx *ctx;
-	int	version;
-
-	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
-		goto out_err;
-
-	p = simple_get_bytes(p, end, &version, sizeof(version));
-	if (IS_ERR(p))
-		goto out_err_free_ctx;
-	if (version != 1) {
-		dprintk("RPC:       unknown spkm3 token format: "
-				"obsolete nfs-utils?\n");
-		p = ERR_PTR(-EINVAL);
-		goto out_err_free_ctx;
-	}
-
-	p = simple_get_netobj(p, end, &ctx->ctx_id);
-	if (IS_ERR(p))
-		goto out_err_free_ctx;
-
-	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
-	if (IS_ERR(p))
-		goto out_err_free_ctx_id;
-
-	p = simple_get_netobj(p, end, &ctx->mech_used);
-	if (IS_ERR(p))
-		goto out_err_free_ctx_id;
-
-	p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
-	if (IS_ERR(p))
-		goto out_err_free_mech;
-
-	p = simple_get_netobj(p, end, &ctx->conf_alg);
-	if (IS_ERR(p))
-		goto out_err_free_mech;
-
-	p = simple_get_netobj(p, end, &ctx->derived_conf_key);
-	if (IS_ERR(p))
-		goto out_err_free_conf_alg;
-
-	p = simple_get_netobj(p, end, &ctx->intg_alg);
-	if (IS_ERR(p))
-		goto out_err_free_conf_key;
-
-	p = simple_get_netobj(p, end, &ctx->derived_integ_key);
-	if (IS_ERR(p))
-		goto out_err_free_intg_alg;
-
-	if (p != end) {
-		p = ERR_PTR(-EFAULT);
-		goto out_err_free_intg_key;
-	}
-
-	ctx_id->internal_ctx_id = ctx;
-
-	dprintk("RPC:       Successfully imported new spkm context.\n");
-	return 0;
-
-out_err_free_intg_key:
-	kfree(ctx->derived_integ_key.data);
-out_err_free_intg_alg:
-	kfree(ctx->intg_alg.data);
-out_err_free_conf_key:
-	kfree(ctx->derived_conf_key.data);
-out_err_free_conf_alg:
-	kfree(ctx->conf_alg.data);
-out_err_free_mech:
-	kfree(ctx->mech_used.data);
-out_err_free_ctx_id:
-	kfree(ctx->ctx_id.data);
-out_err_free_ctx:
-	kfree(ctx);
-out_err:
-	return PTR_ERR(p);
-}
-
-static void
-gss_delete_sec_context_spkm3(void *internal_ctx)
-{
-	struct spkm3_ctx *sctx = internal_ctx;
-
-	kfree(sctx->derived_integ_key.data);
-	kfree(sctx->intg_alg.data);
-	kfree(sctx->derived_conf_key.data);
-	kfree(sctx->conf_alg.data);
-	kfree(sctx->mech_used.data);
-	kfree(sctx->ctx_id.data);
-	kfree(sctx);
-}
-
-static u32
-gss_verify_mic_spkm3(struct gss_ctx		*ctx,
-			struct xdr_buf		*signbuf,
-			struct xdr_netobj	*checksum)
-{
-	u32 maj_stat = 0;
-	struct spkm3_ctx *sctx = ctx->internal_ctx_id;
-
-	maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
-
-	dprintk("RPC:       gss_verify_mic_spkm3 returning %d\n", maj_stat);
-	return maj_stat;
-}
-
-static u32
-gss_get_mic_spkm3(struct gss_ctx	*ctx,
-		     struct xdr_buf	*message_buffer,
-		     struct xdr_netobj	*message_token)
-{
-	u32 err = 0;
-	struct spkm3_ctx *sctx = ctx->internal_ctx_id;
-
-	err = spkm3_make_token(sctx, message_buffer,
-				message_token, SPKM_MIC_TOK);
-	dprintk("RPC:       gss_get_mic_spkm3 returning %d\n", err);
-	return err;
-}
-
-static const struct gss_api_ops gss_spkm3_ops = {
-	.gss_import_sec_context	= gss_import_sec_context_spkm3,
-	.gss_get_mic		= gss_get_mic_spkm3,
-	.gss_verify_mic		= gss_verify_mic_spkm3,
-	.gss_delete_sec_context	= gss_delete_sec_context_spkm3,
-};
-
-static struct pf_desc gss_spkm3_pfs[] = {
-	{RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
-	{RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
-};
-
-static struct gss_api_mech gss_spkm3_mech = {
-	.gm_name	= "spkm3",
-	.gm_owner	= THIS_MODULE,
-	.gm_oid		= {7, "\053\006\001\005\005\001\003"},
-	.gm_ops		= &gss_spkm3_ops,
-	.gm_pf_num	= ARRAY_SIZE(gss_spkm3_pfs),
-	.gm_pfs		= gss_spkm3_pfs,
-};
-
-static int __init init_spkm3_module(void)
-{
-	int status;
-
-	status = gss_mech_register(&gss_spkm3_mech);
-	if (status)
-		printk("Failed to register spkm3 gss mechanism!\n");
-	return status;
-}
-
-static void __exit cleanup_spkm3_module(void)
-{
-	gss_mech_unregister(&gss_spkm3_mech);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_spkm3_module);
-module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_spkm3_seal.c
- *
- *  Copyright (c) 2003 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/random.h>
-#include <linux/crypto.h>
-#include <linux/pagemap.h>
-#include <linux/scatterlist.h>
-#include <linux/sunrpc/xdr.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
-const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
-
-/*
- * spkm3_make_token()
- *
- * Only SPKM_MIC_TOK with md5 intg-alg is supported
- */
-
-u32
-spkm3_make_token(struct spkm3_ctx *ctx,
-		   struct xdr_buf * text, struct xdr_netobj * token,
-		   int toktype)
-{
-	s32			checksum_type;
-	char			tokhdrbuf[25];
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	struct xdr_netobj	mic_hdr = {.len = 0, .data = tokhdrbuf};
-	int			tokenlen = 0;
-	unsigned char		*ptr;
-	s32			now;
-	int			ctxelen = 0, ctxzbit = 0;
-	int			md5elen = 0, md5zbit = 0;
-
-	now = jiffies;
-
-	if (ctx->ctx_id.len != 16) {
-		dprintk("RPC:       spkm3_make_token BAD ctx_id.len %d\n",
-				ctx->ctx_id.len);
-		goto out_err;
-	}
-
-	if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
-		dprintk("RPC:       gss_spkm3_seal: unsupported I-ALG "
-				"algorithm.  only support hmac-md5 I-ALG.\n");
-		goto out_err;
-	} else
-		checksum_type = CKSUMTYPE_HMAC_MD5;
-
-	if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
-		dprintk("RPC:       gss_spkm3_seal: unsupported C-ALG "
-				"algorithm\n");
-		goto out_err;
-	}
-
-	if (toktype == SPKM_MIC_TOK) {
-		/* Calculate checksum over the mic-header */
-		asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
-		spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
-				ctxelen, ctxzbit);
-		if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
-					(char *)mic_hdr.data, mic_hdr.len,
-					text, 0, &md5cksum))
-			goto out_err;
-
-		asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
-		tokenlen = 10 + ctxelen + 1 + md5elen + 1;
-
-		/* Create token header using generic routines */
-		token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
-
-		ptr = token->data;
-		g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
-
-		spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
-	} else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
-		dprintk("RPC:       gss_spkm3_seal: SPKM_WRAP_TOK "
-				"not supported\n");
-		goto out_err;
-	}
-
-	/* XXX need to implement sequence numbers, and ctx->expired */
-
-	return  GSS_S_COMPLETE;
-out_err:
-	token->data = NULL;
-	token->len = 0;
-	return GSS_S_FAILURE;
-}
-
-static int
-spkm3_checksummer(struct scatterlist *sg, void *data)
-{
-	struct hash_desc *desc = data;
-
-	return crypto_hash_update(desc, sg, sg->length);
-}
-
-/* checksum the plaintext data and hdrlen bytes of the token header */
-s32
-make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
-		    unsigned int hdrlen, struct xdr_buf *body,
-		    unsigned int body_offset, struct xdr_netobj *cksum)
-{
-	char				*cksumname;
-	struct hash_desc		desc; /* XXX add to ctx? */
-	struct scatterlist		sg[1];
-	int err;
-
-	switch (cksumtype) {
-		case CKSUMTYPE_HMAC_MD5:
-			cksumname = "hmac(md5)";
-			break;
-		default:
-			dprintk("RPC:       spkm3_make_checksum:"
-					" unsupported checksum %d", cksumtype);
-			return GSS_S_FAILURE;
-	}
-
-	if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
-
-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(desc.tfm))
-		return GSS_S_FAILURE;
-	cksum->len = crypto_hash_digestsize(desc.tfm);
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	err = crypto_hash_setkey(desc.tfm, key->data, key->len);
-	if (err)
-		goto out;
-
-	err = crypto_hash_init(&desc);
-	if (err)
-		goto out;
-
-	sg_init_one(sg, header, hdrlen);
-	crypto_hash_update(&desc, sg, sg->length);
-
-	xdr_process_buf(body, body_offset, body->len - body_offset,
-			spkm3_checksummer, &desc);
-	crypto_hash_final(&desc, cksum->data);
-
-out:
-	crypto_free_hash(desc.tfm);
-
-	return err ? GSS_S_FAILURE : 0;
-}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_spkm3_token.c
- *
- *  Copyright (c) 2003 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/random.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-/*
- * asn1_bitstring_len()
- *
- * calculate the asn1 bitstring length of the xdr_netobject
- */
-void
-asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
-{
-	int i, zbit = 0,elen = in->len;
-	char *ptr;
-
-	ptr = &in->data[in->len -1];
-
-	/* count trailing 0's */
-	for(i = in->len; i > 0; i--) {
-		if (*ptr == 0) {
-			ptr--;
-			elen--;
-		} else
-			break;
-	}
-
-	/* count number of 0 bits in final octet */
-	ptr = &in->data[elen - 1];
-	for(i = 0; i < 8; i++) {
-		short mask = 0x01;
-
-		if (!((mask << i) & *ptr))
-			zbit++;
-		else
-			break;
-	}
-	*enclen = elen;
-	*zerobits = zbit;
-}
-
-/*
- * decode_asn1_bitstring()
- *
- * decode a bitstring into a buffer of the expected length.
- * enclen = bit string length
- * explen = expected length (define in rfc)
- */
-int
-decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
-{
-	if (!(out->data = kzalloc(explen,GFP_NOFS)))
-		return 0;
-	out->len = explen;
-	memcpy(out->data, in, enclen);
-	return 1;
-}
-
-/*
- * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
- *
- * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
- *
- * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
- *
- * pos  value
- * ----------
- * [0]	a4  SPKM-MIC tag
- * [1]	??  innertoken length  (max 44)
- *
- *
- * tok_hdr piece of checksum data starts here
- *
- * the maximum mic-header len = 9 + 17 = 26
- *	mic-header
- *	----------
- * [2]	30      SEQUENCE tag
- * [3]	??	mic-header length: (max 23) = TokenID + ContextID
- *
- *		TokenID  - all fields constant and can be hardcoded
- *		-------
- * [4]	  02	Type 2
- * [5]	  02	Length 2
- * [6][7] 01 01	TokenID (SPKM_MIC_TOK)
- *
- *		ContextID  - encoded length not constant, calculated
- *		---------
- * [8]	03	Type 3
- * [9]	??	encoded length
- * [10]	??	ctxzbit
- * [11]	 	contextid
- *
- * mic_header piece of checksum data ends here.
- *
- *	int-cksum - encoded length not constant, calculated
- *	---------
- * [??]	03	Type 3
- * [??]	??	encoded length
- * [??]	??	md5zbit
- * [??]	 	int-cksum (NID_md5 = 16)
- *
- * maximum SPKM-MIC innercontext token length =
- *	 10 + encoded contextid_size(17 max) + 2 + encoded
- *       cksum_size (17 maxfor NID_md5) = 46
- */
-
-/*
- * spkm3_mic_header()
- *
- * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
- * elen: 16 byte context id asn1 bitstring encoded length
- */
-void
-spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
-{
-	char *hptr = *hdrbuf;
-	char *top = *hdrbuf;
-
-	*(u8 *)hptr++ = 0x30;
-	*(u8 *)hptr++ = elen + 7;  /* on the wire header length */
-
-	/* tokenid */
-	*(u8 *)hptr++ = 0x02;
-	*(u8 *)hptr++ = 0x02;
-	*(u8 *)hptr++ = 0x01;
-	*(u8 *)hptr++ = 0x01;
-
-	/* coniextid */
-	*(u8 *)hptr++ = 0x03;
-	*(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
-	*(u8 *)hptr++ = zbit;
-	memcpy(hptr, ctxdata, elen);
-	hptr += elen;
-	*hdrlen = hptr - top;
-}
-
-/*
- * spkm3_mic_innercontext_token()
- *
- * *tokp points to the beginning of the SPKM_MIC token  described
- * in rfc 2025, section 3.2.1:
- *
- * toklen is the inner token length
- */
-void
-spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
-{
-	unsigned char *ict = *tokp;
-
-	*(u8 *)ict++ = 0xa4;
-	*(u8 *)ict++ = toklen;
-	memcpy(ict, mic_hdr->data, mic_hdr->len);
-	ict += mic_hdr->len;
-
-	*(u8 *)ict++ = 0x03;
-	*(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
-	*(u8 *)ict++ = md5zbit;
-	memcpy(ict, md5cksum->data, md5elen);
-}
-
-u32
-spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
-{
-	struct xdr_netobj       spkm3_ctx_id = {.len =0, .data = NULL};
-	unsigned char 		*ptr = *tokp;
-	int 			ctxelen;
-	u32     		ret = GSS_S_DEFECTIVE_TOKEN;
-
-	/* spkm3 innercontext token preamble */
-	if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
-		dprintk("RPC:       BAD SPKM ictoken preamble\n");
-		goto out;
-	}
-
-	*mic_hdrlen = ptr[3];
-
-	/* token type */
-	if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
-		dprintk("RPC:       BAD asn1 SPKM3 token type\n");
-		goto out;
-	}
-
-	/* only support SPKM_MIC_TOK */
-	if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
-		dprintk("RPC:       ERROR unsupported SPKM3 token\n");
-		goto out;
-	}
-
-	/* contextid */
-	if (ptr[8] != 0x03) {
-		dprintk("RPC:       BAD SPKM3 asn1 context-id type\n");
-		goto out;
-	}
-
-	ctxelen = ptr[9];
-	if (ctxelen > 17) {  /* length includes asn1 zbit octet */
-		dprintk("RPC:       BAD SPKM3 contextid len %d\n", ctxelen);
-		goto out;
-	}
-
-	/* ignore ptr[10] */
-
-	if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
-		goto out;
-
-	/*
-	* in the current implementation: the optional int-alg is not present
-	* so the default int-alg (md5) is used the optional snd-seq field is
-	* also not present
-	*/
-
-	if (*mic_hdrlen != 6 + ctxelen) {
-		dprintk("RPC:       BAD SPKM_ MIC_TOK header len %d: we only "
-				"support default int-alg (should be absent) "
-				"and do not support snd-seq\n", *mic_hdrlen);
-		goto out;
-	}
-	/* checksum */
-	*cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
-
-	ret = GSS_S_COMPLETE;
-out:
-	kfree(spkm3_ctx_id.data);
-	return ret;
-}
-
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_spkm3_unseal.c
- *
- *  Copyright (c) 2003 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-/*
- * spkm3_read_token()
- *
- * only SPKM_MIC_TOK with md5 intg-alg is supported
- */
-u32
-spkm3_read_token(struct spkm3_ctx *ctx,
-		struct xdr_netobj *read_token,    /* checksum */
-		struct xdr_buf *message_buffer, /* signbuf */
-		int toktype)
-{
-	s32			checksum_type;
-	s32			code;
-	struct xdr_netobj	wire_cksum = {.len =0, .data = NULL};
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	unsigned char		*ptr = (unsigned char *)read_token->data;
-	unsigned char		*cksum;
-	int			bodysize, md5elen;
-	int			mic_hdrlen;
-	u32			ret = GSS_S_DEFECTIVE_TOKEN;
-
-	if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
-					&bodysize, &ptr, read_token->len))
-		goto out;
-
-	/* decode the token */
-
-	if (toktype != SPKM_MIC_TOK) {
-		dprintk("RPC:       BAD SPKM3 token type: %d\n", toktype);
-		goto out;
-	}
-
-	if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
-		goto out;
-
-	if (*cksum++ != 0x03) {
-		dprintk("RPC:       spkm3_read_token BAD checksum type\n");
-		goto out;
-	}
-	md5elen = *cksum++;
-	cksum++; 	/* move past the zbit */
-
-	if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
-		goto out;
-
-	/* HARD CODED FOR MD5 */
-
-	/* compute the checksum of the message.
-	 * ptr + 2 = start of header piece of checksum
-	 * mic_hdrlen + 2 = length of header piece of checksum
-	 */
-	ret = GSS_S_DEFECTIVE_TOKEN;
-	if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
-		dprintk("RPC:       gss_spkm3_seal: unsupported I-ALG "
-				"algorithm\n");
-		goto out;
-	}
-
-	checksum_type = CKSUMTYPE_HMAC_MD5;
-
-	code = make_spkm3_checksum(checksum_type,
-		&ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
-		message_buffer, 0, &md5cksum);
-
-	if (code)
-		goto out;
-
-	ret = GSS_S_BAD_SIG;
-	code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
-	if (code) {
-		dprintk("RPC:       bad MIC checksum\n");
-		goto out;
-	}
-
-
-	/* XXX: need to add expiration and sequencing */
-	ret = GSS_S_COMPLETE;
-out:
-	kfree(wire_cksum.data);
-	return ret;
-}
-- 
cgit v1.2.3


From 5c80cc78de46aef6cd5e714208da05c3f7f548f8 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Thu, 30 Sep 2010 14:43:16 +0200
Subject: x86, amd_nb: Enable GART support for AMD family 0x15 CPUs

AMD CPU family 0x15 still supports GART for compatibility reasons.

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
LKML-Reference: <20100930124316.GG20545@loge.amd.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/amd_nb.c | 4 +++-
 include/linux/pci_ids.h  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4ffc38df7ac5..8f6463d8ed0d 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -15,6 +15,7 @@ static u32 *flush_words;
 struct pci_device_id k8_nb_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
 	{}
 };
 EXPORT_SYMBOL(k8_nb_ids);
@@ -45,7 +46,8 @@ int cache_k8_northbridges(void)
 		k8_northbridges.num++;
 
 	/* some CPU families (e.g. family 0x11) do not support GART */
-	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
+	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+	    boot_cpu_data.x86 == 0x15)
 		k8_northbridges.gart_supported = 1;
 
 	k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 10d33309e9a6..edc0279be72f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -514,6 +514,7 @@
 #define PCI_DEVICE_ID_AMD_11H_NB_DRAM	0x1302
 #define PCI_DEVICE_ID_AMD_11H_NB_MISC	0x1303
 #define PCI_DEVICE_ID_AMD_11H_NB_LINK	0x1304
+#define PCI_DEVICE_ID_AMD_15H_NB_MISC	0x1603
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
 #define PCI_DEVICE_ID_AMD_SCSI		0x2020
-- 
cgit v1.2.3


From 620e112cfe1c9281c176de8ad1a7691c4eb4950d Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 1 Oct 2010 10:54:00 +0200
Subject: ACPI/PNP: A HID value of an object never changes -> make it const

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/button.c      | 4 ++--
 drivers/acpi/scan.c        | 5 ++---
 drivers/pnp/base.h         | 5 +++--
 drivers/pnp/core.c         | 3 ++-
 drivers/pnp/driver.c       | 2 +-
 drivers/pnp/pnpacpi/core.c | 2 +-
 include/acpi/acpi_bus.h    | 2 +-
 7 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 1575a9b51f1d..71ef9cd0735f 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -338,7 +338,8 @@ static int acpi_button_add(struct acpi_device *device)
 {
 	struct acpi_button *button;
 	struct input_dev *input;
-	char *hid, *name, *class;
+	const char *hid = acpi_device_hid(device);
+	char *name, *class;
 	int error;
 
 	button = kzalloc(sizeof(struct acpi_button), GFP_KERNEL);
@@ -353,7 +354,6 @@ static int acpi_button_add(struct acpi_device *device)
 		goto err_free_button;
 	}
 
-	hid = acpi_device_hid(device);
 	name = acpi_device_name(device);
 	class = acpi_device_class(device);
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 81aec8d66fd0..6155eeb2e89a 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -26,8 +26,7 @@ extern struct acpi_device *acpi_root;
 
 #define ACPI_IS_ROOT_DEVICE(device)    (!(device)->parent)
 
-/* Should be const */
-static char* dummy_hid = "device";
+static const char *dummy_hid = "device";
 
 static LIST_HEAD(acpi_device_list);
 static LIST_HEAD(acpi_bus_id_list);
@@ -1021,7 +1020,7 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
-char *acpi_device_hid(struct acpi_device *device)
+const char *acpi_device_hid(struct acpi_device *device)
 {
 	struct acpi_hardware_id *hid;
 
diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index 0bab84ebb15d..19bc73695475 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -12,11 +12,12 @@ void pnp_unregister_protocol(struct pnp_protocol *protocol);
 
 #define PNP_EISA_ID_MASK 0x7fffffff
 void pnp_eisa_id_to_string(u32 id, char *str);
-struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *, int id, char *pnpid);
+struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *, int id,
+			      const char *pnpid);
 struct pnp_card *pnp_alloc_card(struct pnp_protocol *, int id, char *pnpid);
 
 int pnp_add_device(struct pnp_dev *dev);
-struct pnp_id *pnp_add_id(struct pnp_dev *dev, char *id);
+struct pnp_id *pnp_add_id(struct pnp_dev *dev, const char *id);
 
 int pnp_add_card(struct pnp_card *card);
 void pnp_remove_card(struct pnp_card *card);
diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index 88b3cde52596..c79ee1ded68d 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -124,7 +124,8 @@ static void pnp_release_device(struct device *dmdev)
 	kfree(dev);
 }
 
-struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid)
+struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id,
+			      const char *pnpid)
 {
 	struct pnp_dev *dev;
 	struct pnp_id *dev_id;
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index cd11b113494f..d1dbb9df53fa 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -236,7 +236,7 @@ void pnp_unregister_driver(struct pnp_driver *drv)
  * @dev: pointer to the desired device
  * @id: pointer to an EISA id string
  */
-struct pnp_id *pnp_add_id(struct pnp_dev *dev, char *id)
+struct pnp_id *pnp_add_id(struct pnp_dev *dev, const char *id)
 {
 	struct pnp_id *dev_id, *ptr;
 
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index dc4e32e031e9..4aafcf89b03b 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -59,7 +59,7 @@ static inline int __init is_exclusive_device(struct acpi_device *dev)
 #define TEST_ALPHA(c) \
 	if (!('@' <= (c) || (c) <= 'Z')) \
 		return 0
-static int __init ispnpidacpi(char *id)
+static int __init ispnpidacpi(const char *id)
 {
 	TEST_ALPHA(id[0]);
 	TEST_ALPHA(id[1]);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 4de84ce3a927..a47bb908ddcd 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -184,7 +184,7 @@ struct acpi_device_pnp {
 
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
 #define acpi_device_adr(d)	((d)->pnp.bus_address)
-char *acpi_device_hid(struct acpi_device *device);
+const char *acpi_device_hid(struct acpi_device *device);
 #define acpi_device_name(d)	((d)->pnp.device_name)
 #define acpi_device_class(d)	((d)->pnp.device_class)
 
-- 
cgit v1.2.3


From c7662518c781edc8059cd9737d18168154bf7510 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 6 Jun 2010 18:12:14 -0400
Subject: nfsd4: keep per-session list of connections

The spec requires us in various places to keep track of the connections
associated with each session.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c  | 69 ++++++++++++++++++++++++++++++++++++++++------------
 fs/nfsd/state.h      |  8 ++++++
 include/linux/nfs4.h |  3 +++
 3 files changed, 65 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f86476c23b2f..c7c1a7afa197 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -625,11 +625,58 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4
 	new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
 }
 
+static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
+{
+	struct nfs4_client *clp = ses->se_client;
+	struct nfsd4_conn *conn;
+
+	conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
+	if (!conn)
+		return nfserr_jukebox;
+	conn->cn_flags = NFS4_CDFC4_FORE;
+	svc_xprt_get(rqstp->rq_xprt);
+	conn->cn_xprt = rqstp->rq_xprt;
+
+	spin_lock(&clp->cl_lock);
+	list_add(&conn->cn_persession, &ses->se_conns);
+	spin_unlock(&clp->cl_lock);
+
+	return nfs_ok;
+}
+
+static void free_conn(struct nfsd4_conn *c)
+{
+	svc_xprt_put(c->cn_xprt);
+	kfree(c);
+}
+
+void free_session(struct kref *kref)
+{
+	struct nfsd4_session *ses;
+	int mem;
+
+	ses = container_of(kref, struct nfsd4_session, se_ref);
+	while (!list_empty(&ses->se_conns)) {
+		struct nfsd4_conn *c;
+		c = list_first_entry(&ses->se_conns, struct nfsd4_conn, cn_persession);
+		list_del(&c->cn_persession);
+		free_conn(c);
+	}
+	spin_lock(&nfsd_drc_lock);
+	mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
+	nfsd_drc_mem_used -= mem;
+	spin_unlock(&nfsd_drc_lock);
+	free_session_slots(ses);
+	kfree(ses);
+}
+
+
 static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	struct nfsd4_session *new;
 	struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
 	int numslots, slotsize;
+	int status;
 	int idx;
 
 	/*
@@ -654,6 +701,8 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp
 	memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
 	       NFS4_MAX_SESSIONID_LEN);
 
+	INIT_LIST_HEAD(&new->se_conns);
+
 	new->se_flags = cses->flags;
 	kref_init(&new->se_ref);
 	idx = hash_sessionid(&new->se_sessionid);
@@ -662,6 +711,11 @@ static __be32 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&client_lock);
 
+	status = nfsd4_new_conn(rqstp, new);
+	if (status) {
+		free_session(&new->se_ref);
+		return nfserr_jukebox;
+	}
 	return nfs_ok;
 }
 
@@ -694,21 +748,6 @@ unhash_session(struct nfsd4_session *ses)
 	list_del(&ses->se_perclnt);
 }
 
-void
-free_session(struct kref *kref)
-{
-	struct nfsd4_session *ses;
-	int mem;
-
-	ses = container_of(kref, struct nfsd4_session, se_ref);
-	spin_lock(&nfsd_drc_lock);
-	mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
-	nfsd_drc_mem_used -= mem;
-	spin_unlock(&nfsd_drc_lock);
-	free_session_slots(ses);
-	kfree(ses);
-}
-
 /* must be called under the client_lock */
 static inline void
 renew_client_locked(struct nfs4_client *clp)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 58bc2a63ca14..29413c2ed270 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -152,6 +152,13 @@ struct nfsd4_clid_slot {
 	struct nfsd4_create_session	sl_cr_ses;
 };
 
+struct nfsd4_conn {
+	struct list_head cn_persession;
+	struct svc_xprt *cn_xprt;
+/* CDFC4_FORE, CDFC4_BACK: */
+	unsigned char cn_flags;
+};
+
 struct nfsd4_session {
 	struct kref		se_ref;
 	struct list_head	se_hash;	/* hash by sessionid */
@@ -161,6 +168,7 @@ struct nfsd4_session {
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
+	struct list_head	se_conns;
 	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 07e40c625972..79b15fb2f304 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -61,6 +61,9 @@
 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL	0x10000
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED		0x20000
 
+#define NFS4_CDFC4_FORE	0x1
+#define NFS4_CDFC4_BACK 0x2
+
 #define NFS4_SET_TO_SERVER_TIME	0
 #define NFS4_SET_TO_CLIENT_TIME	1
 
-- 
cgit v1.2.3


From edc7a894034acb4c7ff8305716ca5df8aaf8e642 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 22 Mar 2010 15:37:17 -0400
Subject: nfsd: provide callbacks on svc_xprt deletion

NFSv4.1 needs warning when a client tcp connection goes down, if that
connection is being used as a backchannel, so that it can warn the
client that it has lost the backchannel connection.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc_xprt.h | 25 +++++++++++++++++++++++++
 net/sunrpc/svc_xprt.c           | 15 +++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index bb182979569e..bbdb680ffbe9 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -33,6 +33,16 @@ struct svc_xprt_class {
 	u32			xcl_max_payload;
 };
 
+/*
+ * This is embedded in an object that wants a callback before deleting
+ * an xprt; intended for use by NFSv4.1, which needs to know when a
+ * client's tcp connection (and hence possibly a backchannel) goes away.
+ */
+struct svc_xpt_user {
+	struct list_head list;
+	void (*callback)(struct svc_xpt_user *);
+};
+
 struct svc_xprt {
 	struct svc_xprt_class	*xpt_class;
 	struct svc_xprt_ops	*xpt_ops;
@@ -67,10 +77,25 @@ struct svc_xprt {
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
+	struct list_head	xpt_users;	/* callbacks on free */
 
 	struct net		*xpt_net;
 };
 
+static inline void register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
+{
+	spin_lock(&xpt->xpt_lock);
+	list_add(&u->list, &xpt->xpt_users);
+	spin_unlock(&xpt->xpt_lock);
+}
+
+static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
+{
+	spin_lock(&xpt->xpt_lock);
+	list_del_init(&u->list);
+	spin_unlock(&xpt->xpt_lock);
+}
+
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 678b6ee4da7b..12025eedc781 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -156,6 +156,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	INIT_LIST_HEAD(&xprt->xpt_list);
 	INIT_LIST_HEAD(&xprt->xpt_ready);
 	INIT_LIST_HEAD(&xprt->xpt_deferred);
+	INIT_LIST_HEAD(&xprt->xpt_users);
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
@@ -881,6 +882,19 @@ static void svc_age_temp_xprts(unsigned long closure)
 	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
 }
 
+static void call_xpt_users(struct svc_xprt *xprt)
+{
+	struct svc_xpt_user *u;
+
+	spin_lock(&xprt->xpt_lock);
+	while (!list_empty(&xprt->xpt_users)) {
+		u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
+		list_del(&u->list);
+		u->callback(u);
+	}
+	spin_unlock(&xprt->xpt_lock);
+}
+
 /*
  * Remove a dead transport
  */
@@ -913,6 +927,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
 		kfree(dr);
 
+	call_xpt_users(xprt);
 	svc_xprt_put(xprt);
 }
 
-- 
cgit v1.2.3


From 51df1142816e469173889fb6d6dc810be9b9e022 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 20 Aug 2010 12:37:15 -0500
Subject: slub: Dynamically size kmalloc cache allocations

kmalloc caches are statically defined and may take up a lot of space just
because the sizes of the node array has to be dimensioned for the largest
node count supported.

This patch makes the size of the kmem_cache structure dynamic throughout by
creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap
occurs by allocating the initial one or two kmem_cache objects from the
page allocator.

C2->C3
	- Fix various issues indicated by David
	- Make create kmalloc_cache return a kmem_cache * pointer.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h |   7 +-
 mm/slub.c                | 191 ++++++++++++++++++++++++++++++++++-------------
 2 files changed, 140 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 9f63538928c0..a6c43ec6a4a5 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -139,19 +139,16 @@ struct kmem_cache {
 
 #ifdef CONFIG_ZONE_DMA
 #define SLUB_DMA __GFP_DMA
-/* Reserve extra caches for potential DMA use */
-#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT)
 #else
 /* Disable DMA functionality */
 #define SLUB_DMA (__force gfp_t)0
-#define KMALLOC_CACHES SLUB_PAGE_SHIFT
 #endif
 
 /*
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES];
+extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -216,7 +213,7 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 	if (index == 0)
 		return NULL;
 
-	return &kmalloc_caches[index];
+	return kmalloc_caches[index];
 }
 
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
diff --git a/mm/slub.c b/mm/slub.c
index e8c117595367..94fee96da0d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -168,7 +168,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
 
 /* Internal SLUB flags */
 #define __OBJECT_POISON		0x80000000UL /* Poison object */
-#define __SYSFS_ADD_DEFERRED	0x40000000UL /* Not yet visible via sysfs */
 
 static int kmem_size = sizeof(struct kmem_cache);
 
@@ -178,7 +177,7 @@ static struct notifier_block slab_notifier;
 
 static enum {
 	DOWN,		/* No slab functionality available */
-	PARTIAL,	/* kmem_cache_open() works but kmalloc does not */
+	PARTIAL,	/* Kmem_cache_node works */
 	UP,		/* Everything works but does not show up in sysfs */
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
@@ -2073,6 +2072,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 }
 
 #ifdef CONFIG_NUMA
+static struct kmem_cache *kmem_cache_node;
+
 /*
  * No kmalloc_node yet so do it by hand. We know that this is the first
  * slab on the node for this slabcache. There are no concurrent accesses
@@ -2088,9 +2089,9 @@ static void early_kmem_cache_node_alloc(int node)
 	struct kmem_cache_node *n;
 	unsigned long flags;
 
-	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
+	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
 
-	page = new_slab(kmalloc_caches, GFP_NOWAIT, node);
+	page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
 
 	BUG_ON(!page);
 	if (page_to_nid(page) != node) {
@@ -2102,15 +2103,15 @@ static void early_kmem_cache_node_alloc(int node)
 
 	n = page->freelist;
 	BUG_ON(!n);
-	page->freelist = get_freepointer(kmalloc_caches, n);
+	page->freelist = get_freepointer(kmem_cache_node, n);
 	page->inuse++;
-	kmalloc_caches->node[node] = n;
+	kmem_cache_node->node[node] = n;
 #ifdef CONFIG_SLUB_DEBUG
-	init_object(kmalloc_caches, n, 1);
-	init_tracking(kmalloc_caches, n);
+	init_object(kmem_cache_node, n, 1);
+	init_tracking(kmem_cache_node, n);
 #endif
-	init_kmem_cache_node(n, kmalloc_caches);
-	inc_slabs_node(kmalloc_caches, node, page->objects);
+	init_kmem_cache_node(n, kmem_cache_node);
+	inc_slabs_node(kmem_cache_node, node, page->objects);
 
 	/*
 	 * lockdep requires consistent irq usage for each lock
@@ -2128,8 +2129,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = s->node[node];
+
 		if (n)
-			kmem_cache_free(kmalloc_caches, n);
+			kmem_cache_free(kmem_cache_node, n);
+
 		s->node[node] = NULL;
 	}
 }
@@ -2145,7 +2148,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 			early_kmem_cache_node_alloc(node);
 			continue;
 		}
-		n = kmem_cache_alloc_node(kmalloc_caches,
+		n = kmem_cache_alloc_node(kmem_cache_node,
 						GFP_KERNEL, node);
 
 		if (!n) {
@@ -2498,11 +2501,13 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
+struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
 EXPORT_SYMBOL(kmalloc_caches);
 
+static struct kmem_cache *kmem_cache;
+
 #ifdef CONFIG_ZONE_DMA
-static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT];
+static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
 #endif
 
 static int __init setup_slub_min_order(char *str)
@@ -2541,9 +2546,13 @@ static int __init setup_slub_nomerge(char *str)
 
 __setup("slub_nomerge", setup_slub_nomerge);
 
-static void create_kmalloc_cache(struct kmem_cache *s,
-		const char *name, int size, unsigned int flags)
+static struct kmem_cache *__init create_kmalloc_cache(const char *name,
+						int size, unsigned int flags)
 {
+	struct kmem_cache *s;
+
+	s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+
 	/*
 	 * This function is called with IRQs disabled during early-boot on
 	 * single CPU so there's no need to take slub_lock here.
@@ -2553,12 +2562,11 @@ static void create_kmalloc_cache(struct kmem_cache *s,
 		goto panic;
 
 	list_add(&s->list, &slab_caches);
-
-	if (!sysfs_slab_add(s))
-		return;
+	return s;
 
 panic:
 	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
+	return NULL;
 }
 
 /*
@@ -2613,10 +2621,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 
 #ifdef CONFIG_ZONE_DMA
 	if (unlikely((flags & SLUB_DMA)))
-		return &kmalloc_dma_caches[index];
+		return kmalloc_dma_caches[index];
 
 #endif
-	return &kmalloc_caches[index];
+	return kmalloc_caches[index];
 }
 
 void *__kmalloc(size_t size, gfp_t flags)
@@ -2940,46 +2948,113 @@ static int slab_memory_callback(struct notifier_block *self,
  *			Basic setup of slabs
  *******************************************************************/
 
+/*
+ * Used for early kmem_cache structures that were allocated using
+ * the page allocator
+ */
+
+static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
+{
+	int node;
+
+	list_add(&s->list, &slab_caches);
+	s->refcount = -1;
+
+	for_each_node_state(node, N_NORMAL_MEMORY) {
+		struct kmem_cache_node *n = get_node(s, node);
+		struct page *p;
+
+		if (n) {
+			list_for_each_entry(p, &n->partial, lru)
+				p->slab = s;
+
+#ifdef CONFIG_SLAB_DEBUG
+			list_for_each_entry(p, &n->full, lru)
+				p->slab = s;
+#endif
+		}
+	}
+}
+
 void __init kmem_cache_init(void)
 {
 	int i;
 	int caches = 0;
+	struct kmem_cache *temp_kmem_cache;
+	int order;
 
 #ifdef CONFIG_NUMA
+	struct kmem_cache *temp_kmem_cache_node;
+	unsigned long kmalloc_size;
+
+	kmem_size = offsetof(struct kmem_cache, node) +
+				nr_node_ids * sizeof(struct kmem_cache_node *);
+
+	/* Allocate two kmem_caches from the page allocator */
+	kmalloc_size = ALIGN(kmem_size, cache_line_size());
+	order = get_order(2 * kmalloc_size);
+	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
+
 	/*
 	 * Must first have the slab cache available for the allocations of the
 	 * struct kmem_cache_node's. There is special bootstrap code in
 	 * kmem_cache_open for slab_state == DOWN.
 	 */
-	create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
-		sizeof(struct kmem_cache_node), 0);
-	kmalloc_caches[0].refcount = -1;
-	caches++;
+	kmem_cache_node = (void *)kmem_cache + kmalloc_size;
+
+	kmem_cache_open(kmem_cache_node, "kmem_cache_node",
+		sizeof(struct kmem_cache_node),
+		0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 
 	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
+#else
+	/* Allocate a single kmem_cache from the page allocator */
+	kmem_size = sizeof(struct kmem_cache);
+	order = get_order(kmem_size);
+	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
 #endif
 
 	/* Able to allocate the per node structures */
 	slab_state = PARTIAL;
 
-	/* Caches that are not of the two-to-the-power-of size */
-	if (KMALLOC_MIN_SIZE <= 32) {
-		create_kmalloc_cache(&kmalloc_caches[1],
-				"kmalloc-96", 96, 0);
-		caches++;
-	}
-	if (KMALLOC_MIN_SIZE <= 64) {
-		create_kmalloc_cache(&kmalloc_caches[2],
-				"kmalloc-192", 192, 0);
-		caches++;
-	}
+	temp_kmem_cache = kmem_cache;
+	kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
+		0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+	kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+	memcpy(kmem_cache, temp_kmem_cache, kmem_size);
 
-	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
-		create_kmalloc_cache(&kmalloc_caches[i],
-			"kmalloc", 1 << i, 0);
-		caches++;
-	}
+#ifdef CONFIG_NUMA
+	/*
+	 * Allocate kmem_cache_node properly from the kmem_cache slab.
+	 * kmem_cache_node is separately allocated so no need to
+	 * update any list pointers.
+	 */
+	temp_kmem_cache_node = kmem_cache_node;
 
+	kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+	memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
+
+	kmem_cache_bootstrap_fixup(kmem_cache_node);
+
+	caches++;
+#else
+	/*
+	 * kmem_cache has kmem_cache_node embedded and we moved it!
+	 * Update the list heads
+	 */
+	INIT_LIST_HEAD(&kmem_cache->local_node.partial);
+	list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial);
+#ifdef CONFIG_SLUB_DEBUG
+	INIT_LIST_HEAD(&kmem_cache->local_node.full);
+	list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full);
+#endif
+#endif
+	kmem_cache_bootstrap_fixup(kmem_cache);
+	caches++;
+	/* Free temporary boot structure */
+	free_pages((unsigned long)temp_kmem_cache, order);
+
+	/* Now we can use the kmem_cache to allocate kmalloc slabs */
 
 	/*
 	 * Patch up the size_index table if we have strange large alignment
@@ -3019,6 +3094,22 @@ void __init kmem_cache_init(void)
 			size_index[size_index_elem(i)] = 8;
 	}
 
+	/* Caches that are not of the two-to-the-power-of size */
+	if (KMALLOC_MIN_SIZE <= 32) {
+		kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
+		caches++;
+	}
+
+	if (KMALLOC_MIN_SIZE <= 64) {
+		kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
+		caches++;
+	}
+
+	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
+		kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
+		caches++;
+	}
+
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
@@ -3026,30 +3117,24 @@ void __init kmem_cache_init(void)
 		char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
 
 		BUG_ON(!s);
-		kmalloc_caches[i].name = s;
+		kmalloc_caches[i]->name = s;
 	}
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
 #endif
-#ifdef CONFIG_NUMA
-	kmem_size = offsetof(struct kmem_cache, node) +
-				nr_node_ids * sizeof(struct kmem_cache_node *);
-#else
-	kmem_size = sizeof(struct kmem_cache);
-#endif
 
 #ifdef CONFIG_ZONE_DMA
-	for (i = 1; i < SLUB_PAGE_SHIFT; i++) {
-		struct kmem_cache *s = &kmalloc_caches[i];
+	for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
+		struct kmem_cache *s = kmalloc_caches[i];
 
-		if (s->size) {
+		if (s && s->size) {
 			char *name = kasprintf(GFP_NOWAIT,
 				 "dma-kmalloc-%d", s->objsize);
 
 			BUG_ON(!name);
-			create_kmalloc_cache(&kmalloc_dma_caches[i],
-				name, s->objsize, SLAB_CACHE_DMA);
+			kmalloc_dma_caches[i] = create_kmalloc_cache(name,
+				s->objsize, SLAB_CACHE_DMA);
 		}
 	}
 #endif
-- 
cgit v1.2.3


From 0bc14062414d35c269b7c7dc3243a890886e7b38 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:47 +0200
Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP

These functions are used only by percpu memory allocator on SMP.
Don't build them on UP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/vmalloc.h | 2 ++
 mm/vmalloc.c            | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 01c2145118dc..63a4fe6d51bd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
 extern struct vm_struct *vmlist;
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
+#ifdef CONFIG_SMP
 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     const size_t *sizes, int nr_vms,
 				     size_t align, gfp_t gfp_mask);
 
 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+#endif
 
 #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6b8889da69a6..c623e0ce3f00 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area)
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
 
+#ifdef CONFIG_SMP
 static struct vmap_area *node_to_va(struct rb_node *n)
 {
 	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 		free_vm_area(vms[i]);
 	kfree(vms);
 }
+#endif	/* CONFIG_SMP */
 
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
-- 
cgit v1.2.3


From a7b6b77b8917488d2d6b99d82673845e508144a3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:47 +0200
Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k

In preparation of enabling percpu allocator for UP, reduce
PCPU_MIN_UNIT_SIZE to 32k.  On UP, the first chunk doesn't have to
include static percpu variables and chunk size can be smaller which is
important as UP percpu allocator will use contiguous kernel memory to
populate chunks.

PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation
size but 32k should still be enough.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..fc8130a7cac0 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -42,7 +42,7 @@
 #ifdef CONFIG_SMP
 
 /* minimum unit size, also is the maximum supported allocation size */
-#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
+#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
 /*
  * Percpu allocator can serve percpu allocations before slab is
-- 
cgit v1.2.3


From 9b8327bb2483ded5e04df6c33cf339ce7c02f6e9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 18:22:48 +0200
Subject: percpu: use percpu allocator on UP too

On UP, percpu allocations were redirected to kmalloc.  This has the
following problems.

* For certain amount of allocations (determined by
  PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu
  allocator can be used before the usual kernel memory allocator is
  brought online.  On SMP, this is used to initialize the kernel
  memory allocator.

* percpu allocator honors alignment upto PAGE_SIZE but kmalloc()
  doesn't.  For example, workqueue makes use of larger alignments for
  cpu_workqueues.

Currently, users of percpu allocators need to handle UP differently,
which is somewhat fragile and ugly.  Other than small amount of
memory, there isn't much to lose by enabling percpu allocator on UP.
It can simply use kernel memory based chunk allocation which was added
for SMP archs w/o MMUs.

This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and
makes UP build use percpu-km.  As percpu addresses and kernel
addresses are always identity mapped and static percpu variables don't
need any special treatment, nothing is arch dependent and mm/percpu.c
implements generic setup_per_cpu_areas() for UP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/percpu.h | 29 +++++-------------------
 mm/Kconfig             |  8 +++++++
 mm/Makefile            |  7 +-----
 mm/percpu-km.c         |  2 +-
 mm/percpu.c            | 60 ++++++++++++++++++++++++++++++++++++++++++++++----
 mm/percpu_up.c         | 30 -------------------------
 6 files changed, 71 insertions(+), 65 deletions(-)
 delete mode 100644 mm/percpu_up.c

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index fc8130a7cac0..aeeeef1093cd 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,8 +39,6 @@
 	preempt_enable();				\
 } while (0)
 
-#ifdef CONFIG_SMP
-
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
@@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
  * dynamically allocated. Non-atomic access to the current CPU's
  * version should probably be combined with get_cpu()/put_cpu().
  */
+#ifdef CONFIG_SMP
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#else
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#endif
 
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
 extern bool is_kernel_percpu_address(unsigned long addr);
 
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 extern void __init setup_per_cpu_areas(void);
 #endif
 extern void __init percpu_init_late(void);
 
-#else /* CONFIG_SMP */
-
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-
-/* can't distinguish from other static vars, always false */
-static inline bool is_kernel_percpu_address(unsigned long addr)
-{
-	return false;
-}
-
-static inline void __init setup_per_cpu_areas(void) { }
-
-static inline void __init percpu_init_late(void) { }
-
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_SMP */
-
 extern void __percpu *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
diff --git a/mm/Kconfig b/mm/Kconfig
index f0fb9124e410..c2c8a4a11898 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
 	  of 1 says that all excess pages should be trimmed.
 
 	  See Documentation/nommu-mmap.txt for more information.
+
+#
+# UP and nommu archs use km based percpu allocator
+#
+config NEED_PER_CPU_KM
+	depends on !SMP
+	bool
+	default y
diff --git a/mm/Makefile b/mm/Makefile
index 34b2546a9e37..f73f75a29f82 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o \
+			   page_isolation.o mm_init.o mmu_context.o percpu.o \
 			   $(mmu-y)
 obj-y += init-mm.o
 
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_SMP
-obj-y += percpu.o
-else
-obj-y += percpu_up.o
-endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index df680855540a..7037bc73bfa4 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -27,7 +27,7 @@
  *   chunk size is not aligned.  percpu-km code will whine about it.
  */
 
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
 #error "contiguous percpu allocation is incompatible with paged first chunk"
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index c76ef3891e0d..9734b184aaac 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -76,6 +76,7 @@
 #define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
 
+#ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
 #define __addr_to_pcpu_ptr(addr)					\
@@ -89,6 +90,11 @@
 			 (unsigned long)pcpu_base_addr -		\
 			 (unsigned long)__per_cpu_start)
 #endif
+#else	/* CONFIG_SMP */
+/* on UP, it's always identity mapped */
+#define __addr_to_pcpu_ptr(addr)	(void __percpu *)(addr)
+#define __pcpu_ptr_to_addr(ptr)		(void __force *)(ptr)
+#endif	/* CONFIG_SMP */
 
 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
@@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 bool is_kernel_percpu_address(unsigned long addr)
 {
+#ifdef CONFIG_SMP
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
 	unsigned int cpu;
@@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr)
 		if ((void *)addr >= start && (void *)addr < start + static_size)
 			return true;
         }
+#endif
+	/* on UP, can't distinguish from other static vars, always false */
 	return false;
 }
 
@@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 	free_bootmem(__pa(ai), ai->__ai_size);
 }
 
+#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
+			    defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK))
 /**
  * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
@@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 
 	return ai;
 }
+#endif	/* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
+			  CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */
 
 /**
  * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
@@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	/* sanity checks */
 	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
+#ifdef CONFIG_SMP
 	PCPU_SETUP_BUG_ON(!ai->static_size);
+#endif
 	PCPU_SETUP_BUG_ON(!base_addr);
 	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
 	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
@@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	return 0;
 }
 
+#ifdef CONFIG_SMP
+
 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
 	[PCPU_FC_AUTO]	= "auto",
 	[PCPU_FC_EMBED]	= "embed",
@@ -1758,8 +1775,9 @@ out_free_ar:
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
+#ifndef	CONFIG_HAVE_SETUP_PER_CPU_AREA
 /*
- * Generic percpu area setup.
+ * Generic SMP percpu area setup.
  *
  * The embedding helper is used because its behavior closely resembles
  * the original non-dynamic generic percpu area setup.  This is
@@ -1770,7 +1788,6 @@ out_free_ar:
  * on the physical linear memory mapping which uses large page
  * mappings on applicable archs.
  */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
@@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void)
 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
 				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
-		panic("Failed to initialized percpu areas.");
+		panic("Failed to initialize percpu areas.");
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
 		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif	/* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+#else	/* CONFIG_SMP */
+
+/*
+ * UP percpu area setup.
+ *
+ * UP always uses km-based percpu allocator with identity mapping.
+ * Static percpu variables are indistinguishable from the usual static
+ * variables and don't require any special preparation.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	const size_t unit_size =
+		roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
+					 PERCPU_DYNAMIC_RESERVE));
+	struct pcpu_alloc_info *ai;
+	void *fc;
+
+	ai = pcpu_alloc_alloc_info(1, 1);
+	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!ai || !fc)
+		panic("Failed to allocate memory for percpu areas.");
+
+	ai->dyn_size = unit_size;
+	ai->unit_size = unit_size;
+	ai->atom_size = unit_size;
+	ai->alloc_size = unit_size;
+	ai->groups[0].nr_units = 1;
+	ai->groups[0].cpu_map[0] = 0;
+
+	if (pcpu_setup_first_chunk(ai, fc) < 0)
+		panic("Failed to initialize percpu areas.");
+}
+
+#endif	/* CONFIG_SMP */
 
 /*
  * First and reserved chunks are initialized with temporary allocation
diff --git a/mm/percpu_up.c b/mm/percpu_up.c
deleted file mode 100644
index db884fae5721..000000000000
--- a/mm/percpu_up.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
- */
-
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-
-void __percpu *__alloc_percpu(size_t size, size_t align)
-{
-	/*
-	 * Can't easily make larger alignment work with kmalloc.  WARN
-	 * on it.  Larger alignment should only be used for module
-	 * percpu sections on SMP for which this path isn't used.
-	 */
-	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-	return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-void free_percpu(void __percpu *p)
-{
-	kfree(this_cpu_ptr(p));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-phys_addr_t per_cpu_ptr_to_phys(void *addr)
-{
-	return __pa(addr);
-}
-- 
cgit v1.2.3


From 7340cc84141d5236c5dd003359ee921513cd9b84 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 28 Sep 2010 08:10:26 -0500
Subject: slub: reduce differences between SMP and NUMA

Reduce the #ifdefs and simplify bootstrap by making SMP and NUMA as much alike
as possible. This means that there will be an additional indirection to get to
the kmem_cache_node field under SMP.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h |  5 +----
 mm/slub.c                | 39 +--------------------------------------
 2 files changed, 2 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a6c43ec6a4a5..b33c0f2e61dc 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -96,11 +96,8 @@ struct kmem_cache {
 	 * Defragmentation by allocating from a remote node.
 	 */
 	int remote_node_defrag_ratio;
-	struct kmem_cache_node *node[MAX_NUMNODES];
-#else
-	/* Avoid an extra cache line for UP */
-	struct kmem_cache_node local_node;
 #endif
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 7e1fe663795a..064bda294af2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,7 @@ int slab_is_available(void)
 
 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 {
-#ifdef CONFIG_NUMA
 	return s->node[node];
-#else
-	return &s->local_node;
-#endif
 }
 
 /* Verify that a pointer has an address that is valid within a slab page */
@@ -871,7 +867,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
 	 * dilemma by deferring the increment of the count during
 	 * bootstrap (see early_kmem_cache_node_alloc).
 	 */
-	if (!NUMA_BUILD || n) {
+	if (n) {
 		atomic_long_inc(&n->nr_slabs);
 		atomic_long_add(objects, &n->total_objects);
 	}
@@ -2112,7 +2108,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 	return s->cpu_slab != NULL;
 }
 
-#ifdef CONFIG_NUMA
 static struct kmem_cache *kmem_cache_node;
 
 /*
@@ -2202,17 +2197,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 	}
 	return 1;
 }
-#else
-static void free_kmem_cache_nodes(struct kmem_cache *s)
-{
-}
-
-static int init_kmem_cache_nodes(struct kmem_cache *s)
-{
-	init_kmem_cache_node(&s->local_node, s);
-	return 1;
-}
-#endif
 
 static void set_min_partial(struct kmem_cache *s, unsigned long min)
 {
@@ -3023,8 +3007,6 @@ void __init kmem_cache_init(void)
 	int caches = 0;
 	struct kmem_cache *temp_kmem_cache;
 	int order;
-
-#ifdef CONFIG_NUMA
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
 
@@ -3048,12 +3030,6 @@ void __init kmem_cache_init(void)
 		0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 
 	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
-#else
-	/* Allocate a single kmem_cache from the page allocator */
-	kmem_size = sizeof(struct kmem_cache);
-	order = get_order(kmem_size);
-	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
-#endif
 
 	/* Able to allocate the per node structures */
 	slab_state = PARTIAL;
@@ -3064,7 +3040,6 @@ void __init kmem_cache_init(void)
 	kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
 	memcpy(kmem_cache, temp_kmem_cache, kmem_size);
 
-#ifdef CONFIG_NUMA
 	/*
 	 * Allocate kmem_cache_node properly from the kmem_cache slab.
 	 * kmem_cache_node is separately allocated so no need to
@@ -3078,18 +3053,6 @@ void __init kmem_cache_init(void)
 	kmem_cache_bootstrap_fixup(kmem_cache_node);
 
 	caches++;
-#else
-	/*
-	 * kmem_cache has kmem_cache_node embedded and we moved it!
-	 * Update the list heads
-	 */
-	INIT_LIST_HEAD(&kmem_cache->local_node.partial);
-	list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial);
-#ifdef CONFIG_SLUB_DEBUG
-	INIT_LIST_HEAD(&kmem_cache->local_node.full);
-	list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full);
-#endif
-#endif
 	kmem_cache_bootstrap_fixup(kmem_cache);
 	caches++;
 	/* Free temporary boot structure */
-- 
cgit v1.2.3


From 4bacd796ccd6976b03dd490708a1abc291d5521e Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 2 Oct 2010 22:02:07 +0900
Subject: sh: Support early IRQ vector map reservation for delayed controllers.

Some controllers will need to be initialized lazily due to pinmux
constraints, while others may simply have no need to be brought online if
there are no backing devices for them attached. In this case it's still
necessary to be able to reserve their hardware vector map before dynamic
IRQs get a hold of them.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-x3proto/setup.c  | 23 ++++++++++++++---------
 arch/sh/kernel/cpu/sh4a/setup-shx3.c |  3 +++
 drivers/sh/intc.c                    | 11 +++++++++++
 include/linux/sh_intc.h              |  1 +
 4 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c
index 102bf56befb4..21f1bb67248e 100644
--- a/arch/sh/boards/mach-x3proto/setup.c
+++ b/arch/sh/boards/mach-x3proto/setup.c
@@ -129,8 +129,22 @@ static struct platform_device *x3proto_devices[] __initdata = {
 	&m66592_usb_peripheral_device,
 };
 
+static void __init x3proto_init_irq(void)
+{
+	plat_irq_setup_pins(IRQ_MODE_IRL3210);
+
+	/* Set ICR0.LVLMODE */
+	__raw_writel(__raw_readl(0xfe410000) | (1 << 21), 0xfe410000);
+}
+
 static int __init x3proto_devices_setup(void)
 {
+	/*
+	 * IRLs are only needed for ILSEL mappings, so flip over the INTC
+	 * pins at a later point to enable the GPIOs to settle.
+	 */
+	x3proto_init_irq();
+
 	r8a66597_usb_host_resources[1].start =
 		r8a66597_usb_host_resources[1].end = ilsel_enable(ILSEL_USBH_I);
 
@@ -145,14 +159,6 @@ static int __init x3proto_devices_setup(void)
 }
 device_initcall(x3proto_devices_setup);
 
-static void __init x3proto_init_irq(void)
-{
-	plat_irq_setup_pins(IRQ_MODE_IRL3210);
-
-	/* Set ICR0.LVLMODE */
-	__raw_writel(__raw_readl(0xfe410000) | (1 << 21), 0xfe410000);
-}
-
 static void __init x3proto_setup(char **cmdline_p)
 {
 	register_smp_ops(&shx3_smp_ops);
@@ -161,5 +167,4 @@ static void __init x3proto_setup(char **cmdline_p)
 static struct sh_machine_vector mv_x3proto __initmv = {
 	.mv_name		= "x3proto",
 	.mv_setup		= x3proto_setup,
-	.mv_init_irq		= x3proto_init_irq,
 };
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index f159ea2cebc7..013f0b144489 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -478,6 +478,9 @@ void __init plat_irq_setup_pins(int mode)
 
 void __init plat_irq_setup(void)
 {
+	reserve_intc_vectors(vectors_irq, ARRAY_SIZE(vectors_irq));
+	reserve_intc_vectors(vectors_irl, ARRAY_SIZE(vectors_irl));
+
 	register_intc_controller(&intc_desc);
 }
 
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index e91a23e5ffd8..4e01d65e5edb 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -1377,6 +1377,17 @@ int reserve_irq_vector(unsigned int irq)
 	return ret;
 }
 
+void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	for (i = 0; i < nr_vecs; i++)
+		__set_bit(evt2irq(vectors[i].vect), intc_irq_map);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
 void reserve_irq_legacy(void)
 {
 	unsigned long flags;
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 0d6cd38e673d..bff2f286ca61 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -106,6 +106,7 @@ struct intc_desc symbol __initdata = {					\
 }
 
 int __init register_intc_controller(struct intc_desc *desc);
+void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
 #ifdef CONFIG_INTC_USERIMASK
-- 
cgit v1.2.3


From b72421d8aa39724474ec2bfb91e182001f1f25a7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 4 Oct 2010 03:54:56 +0900
Subject: sh: pfc: support pinmux deregistration.

Presently the pinmux code is a one-way thing, but there's nothing
preventing an unregistration if no one has grabbed any of the pins.
This will permit us to save a bit of memory on systems that require pin
demux for certain peripherals in the case where registration of those
peripherals fails, or they are otherwise not attached to the system.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 11 ++++++++++-
 include/linux/sh_pfc.h |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index cf0303acab8e..dee581f6382c 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -7,6 +7,8 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -581,7 +583,7 @@ int register_pinmux(struct pinmux_info *pip)
 {
 	struct gpio_chip *chip = &pip->chip;
 
-	pr_info("sh pinmux: %s handling gpio %d -> %d\n",
+	pr_info("%s handling gpio %d -> %d\n",
 		pip->name, pip->first_gpio, pip->last_gpio);
 
 	setup_data_regs(pip);
@@ -602,3 +604,10 @@ int register_pinmux(struct pinmux_info *pip)
 
 	return gpiochip_add(chip);
 }
+
+int unregister_pinmux(struct pinmux_info *pip)
+{
+	pr_info("%s deregistering\n", pip->name);
+
+	return gpiochip_remove(&pip->chip);
+}
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 07c08af9f8f6..30cae70874f4 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -92,5 +92,6 @@ struct pinmux_info {
 };
 
 int register_pinmux(struct pinmux_info *pip);
+int unregister_pinmux(struct pinmux_info *pip);
 
 #endif /* __SH_PFC_H */
-- 
cgit v1.2.3


From a8c9486b816f74d4645144db9e8fa2f711c1fc4b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 1 Oct 2010 16:15:08 +0000
Subject: ipmr: RCU protection for mfc_cache_array

Use RCU & RTNL protection for mfc_cache_array[]

ipmr_cache_find() is called under rcu_read_lock();

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h |  1 +
 net/ipv4/ipmr.c        | 87 +++++++++++++++++++++++++++-----------------------
 2 files changed, 48 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index fa04b246c9ae..0fa7a3a874c8 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -213,6 +213,7 @@ struct mfc_cache {
 			unsigned char ttls[MAXVIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
+	struct rcu_head	rcu;
 };
 
 #define MFC_STATIC		1
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e2db2ea616ff..cbb6dabe024f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -577,11 +577,18 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 	return 0;
 }
 
-static inline void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
+	struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+
 	kmem_cache_free(mrt_cachep, c);
 }
 
+static inline void ipmr_cache_free(struct mfc_cache *c)
+{
+	call_rcu(&c->rcu, ipmr_cache_free_rcu);
+}
+
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
@@ -781,6 +788,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	return 0;
 }
 
+/* called with rcu_read_lock() */
 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 					 __be32 origin,
 					 __be32 mcastgrp)
@@ -788,7 +796,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
+	list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
 			return c;
 	}
@@ -801,19 +809,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
-	if (c == NULL)
-		return NULL;
-	c->mfc_un.res.minvif = MAXVIFS;
+
+	if (c)
+		c->mfc_un.res.minvif = MAXVIFS;
 	return c;
 }
 
 static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
-	if (c == NULL)
-		return NULL;
-	skb_queue_head_init(&c->mfc_un.unres.unresolved);
-	c->mfc_un.unres.expires = jiffies + 10*HZ;
+
+	if (c) {
+		skb_queue_head_init(&c->mfc_un.unres.unresolved);
+		c->mfc_un.unres.expires = jiffies + 10*HZ;
+	}
 	return c;
 }
 
@@ -1040,9 +1049,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
+			list_del_rcu(&c->list);
 
 			ipmr_cache_free(c);
 			return 0;
@@ -1095,9 +1102,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
-	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &mrt->mfc_cache_array[line]);
-	write_unlock_bh(&mrt_lock);
+	list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
 
 	/*
 	 *	Check to see if we resolved a queued list. If so we
@@ -1149,12 +1154,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
 	 */
 	for (i = 0; i < MFC_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
-			if (c->mfc_flags&MFC_STATIC)
+			if (c->mfc_flags & MFC_STATIC)
 				continue;
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
-
+			list_del_rcu(&c->list);
 			ipmr_cache_free(c);
 		}
 	}
@@ -1422,19 +1424,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
 			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -1764,7 +1766,7 @@ int ip_mr_input(struct sk_buff *skb)
 		    }
 	}
 
-	read_lock(&mrt_lock);
+	/* already under rcu_read_lock() */
 	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
@@ -1776,13 +1778,12 @@ int ip_mr_input(struct sk_buff *skb)
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			ip_local_deliver(skb);
-			if (skb2 == NULL) {
-				read_unlock(&mrt_lock);
+			if (skb2 == NULL)
 				return -ENOBUFS;
-			}
 			skb = skb2;
 		}
 
+		read_lock(&mrt_lock);
 		vif = ipmr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
 			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1795,8 +1796,8 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
+	read_lock(&mrt_lock);
 	ip_mr_forward(net, mrt, skb, cache, local);
-
 	read_unlock(&mrt_lock);
 
 	if (local)
@@ -1963,7 +1964,7 @@ int ipmr_get_route(struct net *net,
 	if (mrt == NULL)
 		return -ENOENT;
 
-	read_lock(&mrt_lock);
+	rcu_read_lock();
 	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
 	if (cache == NULL) {
@@ -1973,18 +1974,21 @@ int ipmr_get_route(struct net *net,
 		int vif;
 
 		if (nowait) {
-			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 			return -EAGAIN;
 		}
 
 		dev = skb->dev;
+		read_lock(&mrt_lock);
 		if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 			return -ENODEV;
 		}
 		skb2 = skb_clone(skb, GFP_ATOMIC);
 		if (!skb2) {
 			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 			return -ENOMEM;
 		}
 
@@ -1997,13 +2001,16 @@ int ipmr_get_route(struct net *net,
 		iph->version = 0;
 		err = ipmr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return err;
 	}
 
-	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+	read_lock(&mrt_lock);
+	if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -2055,14 +2062,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[1];
 	s_e = cb->args[2];
 
-	read_lock(&mrt_lock);
+	rcu_read_lock();
 	ipmr_for_each_table(mrt, net) {
 		if (t < s_t)
 			goto next_table;
 		if (t > s_t)
 			s_h = 0;
 		for (h = s_h; h < MFC_LINES; h++) {
-			list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
+			list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
 				if (e < s_e)
 					goto next_entry;
 				if (ipmr_fill_mroute(mrt, skb,
@@ -2080,7 +2087,7 @@ next_table:
 		t++;
 	}
 done:
-	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 
 	cb->args[2] = e;
 	cb->args[1] = h;
@@ -2213,14 +2220,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 	struct mr_table *mrt = it->mrt;
 	struct mfc_cache *mfc;
 
-	read_lock(&mrt_lock);
+	rcu_read_lock();
 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
 		it->cache = &mrt->mfc_cache_array[it->ct];
-		list_for_each_entry(mfc, it->cache, list)
+		list_for_each_entry_rcu(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
 	}
-	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 
 	spin_lock_bh(&mfc_unres_lock);
 	it->cache = &mrt->mfc_unres_queue;
@@ -2279,7 +2286,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 
 	/* exhausted cache_array, show unresolved */
-	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 	it->cache = &mrt->mfc_unres_queue;
 	it->ct = 0;
 
@@ -2302,7 +2309,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
 	else if (it->cache == &mrt->mfc_cache_array[it->ct])
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2426,7 +2433,7 @@ int __init ip_mr_init(void)
 
 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
 				       sizeof(struct mfc_cache),
-				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+				       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 				       NULL);
 	if (!mrt_cachep)
 		return -ENOMEM;
-- 
cgit v1.2.3


From c7d4426a98a5f6654cd0b4b33d9dab2e77192c18 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 3 Oct 2010 22:17:54 -0700
Subject: net: introduce DST_NOCACHE flag

While doing stress tests with IP route cache disabled, and multi queue
devices, I noticed a very high contention on one rwlock used in
neighbour code.

When many cpus are trying to send frames (possibly using a high
performance multiqueue device) to the same neighbour, they fight for the
neigh->lock rwlock in order to call neigh_hh_init(), and fight on
hh->hh_refcnt (a pair of atomic_inc/atomic_dec_and_test())

But we dont need to call neigh_hh_init() for dst that are used only
once. It costs four atomic operations at least, on two contended cache
lines, plus the high contention on neigh->lock rwlock.

Introduce a new dst flag, DST_NOCACHE, that is set when dst was not
inserted in route cache.

With the stress test bench, sending 160000000 frames on one neighbour,
results are :

Before patch:

real	2m28.406s
user	0m11.781s
sys	36m17.964s


After patch:

real	1m26.532s
user	0m12.185s
sys	20m3.903s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h    | 9 +++++----
 net/core/neighbour.c | 4 +++-
 net/ipv4/route.c     | 1 +
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index aa53fbc34b2b..a217c838ec0d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -43,10 +43,11 @@ struct dst_entry {
 	short			error;
 	short			obsolete;
 	int			flags;
-#define DST_HOST		1
-#define DST_NOXFRM		2
-#define DST_NOPOLICY		4
-#define DST_NOHASH		8
+#define DST_HOST		0x0001
+#define DST_NOXFRM		0x0002
+#define DST_NOPOLICY		0x0004
+#define DST_NOHASH		0x0008
+#define DST_NOCACHE		0x0010
 	unsigned long		expires;
 
 	unsigned short		header_len;	/* more space at head required */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 96b1a749abb4..b142a0d76072 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1210,7 +1210,9 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
-		if (dev->header_ops->cache && !dst->hh) {
+		if (dev->header_ops->cache &&
+		    !dst->hh &&
+		    !(dst->flags & DST_NOCACHE)) {
 			write_lock_bh(&neigh->lock);
 			if (!dst->hh)
 				neigh_hh_init(neigh, dst, dst->ops->protocol);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a61acea975f1..c3cb8bd23638 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1107,6 +1107,7 @@ restart:
 		 * on the route gc list.
 		 */
 
+		rt->dst.flags |= DST_NOCACHE;
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
 			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
-- 
cgit v1.2.3


From ff7dcd44dd446db2c3e13bdedf2d52b8e0127f16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 27 Sep 2010 12:44:25 +0000
Subject: genirq: Create irq_data

Low level chip functions need access to irq_desc->handler_data,
irq_desc->chip_data and irq_desc->msi_desc. We hand down the irq
number to the low level functions, so they need to lookup irq_desc.
With sparse irq this means a radix tree lookup.

We could hand down irq_desc itself, but low level chip functions have
no need to fiddle with it directly and we want to restrict access to
irq_desc further.

Preparatory patch for new chip functions.

Note, that the ugly anon union/struct is there to avoid a full tree
wide clean up for now. This is not going to last 3 years like __do_IRQ()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20100927121841.645542300@linutronix.de>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 90 +++++++++++++++++++++++++++++++++++++----------------
 kernel/irq/handle.c | 39 +++++++++++------------
 2 files changed, 82 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 06273a2a17e7..363c76ff82c8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -83,6 +83,37 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 struct proc_dir_entry;
 struct msi_desc;
 
+/**
+ * struct irq_data - per irq and irq chip data passed down to chip functions
+ * @irq:		interrupt number
+ * @node:		node index useful for balancing
+ * @chip:		low level interrupt hardware access
+ * @handler_data:	per-IRQ data for the irq_chip methods
+ * @chip_data:		platform-specific per-chip private data for the chip
+ *			methods, to allow shared chip implementations
+ * @msi_desc:		MSI descriptor
+ * @affinity:		IRQ affinity on SMP
+ * @irq_2_iommu:	iommu with this irq
+ *
+ * The fields here need to overlay the ones in irq_desc until we
+ * cleaned up the direct references and switched everything over to
+ * irq_data.
+ */
+struct irq_data {
+	unsigned int		irq;
+	unsigned int		node;
+	struct irq_chip		*chip;
+	void			*handler_data;
+	void			*chip_data;
+	struct msi_desc		*msi_desc;
+#ifdef CONFIG_SMP
+	cpumask_var_t		affinity;
+#endif
+#ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu      *irq_2_iommu;
+#endif
+};
+
 /**
  * struct irq_chip - hardware interrupt chip descriptor
  *
@@ -140,16 +171,10 @@ struct timer_rand_state;
 struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
- * @irq:		interrupt number for this descriptor
+ * @irq_data:		per irq and chip data passed down to chip functions
  * @timer_rand_state:	pointer to timer rand state struct
  * @kstat_irqs:		irq stats per cpu
- * @irq_2_iommu:	iommu with this irq
  * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
- * @chip:		low level interrupt hardware access
- * @msi_desc:		MSI descriptor
- * @handler_data:	per-IRQ data for the irq_chip methods
- * @chip_data:		platform-specific per-chip private data for the chip
- *			methods, to allow shared chip implementations
  * @action:		the irq action chain
  * @status:		status information
  * @depth:		disable-depth, for nested irq_disable() calls
@@ -158,8 +183,6 @@ struct irq_2_iommu;
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
- * @affinity:		IRQ affinity on SMP
- * @node:		node index useful for balancing
  * @pending_mask:	pending rebalanced interrupts
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
@@ -167,17 +190,32 @@ struct irq_2_iommu;
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
-	unsigned int		irq;
-	struct timer_rand_state *timer_rand_state;
-	unsigned int            *kstat_irqs;
+
+	/*
+	 * This union will go away, once we fixed the direct access to
+	 * irq_desc all over the place. The direct fields are a 1:1
+	 * overlay of irq_data.
+	 */
+	union {
+		struct irq_data		irq_data;
+		struct {
+			unsigned int		irq;
+			unsigned int		node;
+			struct irq_chip		*chip;
+			void			*handler_data;
+			void			*chip_data;
+			struct msi_desc		*msi_desc;
+#ifdef CONFIG_SMP
+			cpumask_var_t		affinity;
+#endif
 #ifdef CONFIG_INTR_REMAP
-	struct irq_2_iommu      *irq_2_iommu;
+			struct irq_2_iommu      *irq_2_iommu;
 #endif
+		};
+	};
+	struct timer_rand_state *timer_rand_state;
+	unsigned int            *kstat_irqs;
 	irq_flow_handler_t	handle_irq;
-	struct irq_chip		*chip;
-	struct msi_desc		*msi_desc;
-	void			*handler_data;
-	void			*chip_data;
 	struct irqaction	*action;	/* IRQ action list */
 	unsigned int		status;		/* IRQ status */
 
@@ -188,9 +226,7 @@ struct irq_desc {
 	unsigned int		irqs_unhandled;
 	raw_spinlock_t		lock;
 #ifdef CONFIG_SMP
-	cpumask_var_t		affinity;
 	const struct cpumask	*affinity_hint;
-	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
@@ -406,15 +442,15 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
-#define get_irq_chip(irq)	(irq_to_desc(irq)->chip)
-#define get_irq_chip_data(irq)	(irq_to_desc(irq)->chip_data)
-#define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
-#define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
+#define get_irq_chip(irq)	(irq_to_desc(irq)->irq_data.chip)
+#define get_irq_chip_data(irq)	(irq_to_desc(irq)->irq_data.chip_data)
+#define get_irq_data(irq)	(irq_to_desc(irq)->irq_data.handler_data)
+#define get_irq_msi(irq)	(irq_to_desc(irq)->irq_data.msi_desc)
 
-#define get_irq_desc_chip(desc)		((desc)->chip)
-#define get_irq_desc_chip_data(desc)	((desc)->chip_data)
-#define get_irq_desc_data(desc)		((desc)->handler_data)
-#define get_irq_desc_msi(desc)		((desc)->msi_desc)
+#define get_irq_desc_chip(desc)		((desc)->irq_data.chip)
+#define get_irq_desc_chip_data(desc)	((desc)->irq_data.chip_data)
+#define get_irq_desc_data(desc)		((desc)->irq_data.handler_data)
+#define get_irq_desc_msi(desc)		((desc)->irq_data.msi_desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 27e5c6911223..099d4fc368c3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -75,12 +75,10 @@ EXPORT_SYMBOL_GPL(nr_irqs);
 #ifdef CONFIG_SPARSE_IRQ
 
 static struct irq_desc irq_desc_init = {
-	.irq	    = -1,
-	.status	    = IRQ_DISABLED,
-	.chip	    = &no_irq_chip,
-	.handle_irq = handle_bad_irq,
-	.depth      = 1,
-	.lock       = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+	.status		= IRQ_DISABLED,
+	.handle_irq	= handle_bad_irq,
+	.depth		= 1,
+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
 void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
@@ -105,7 +103,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
 	raw_spin_lock_init(&desc->lock);
-	desc->irq = irq;
+	desc->irq_data.irq = irq;
 #ifdef CONFIG_SMP
 	desc->node = node;
 #endif
@@ -151,12 +149,10 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
-		.irq	    = -1,
-		.status	    = IRQ_DISABLED,
-		.chip	    = &no_irq_chip,
-		.handle_irq = handle_bad_irq,
-		.depth	    = 1,
-		.lock	    = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+		.status		= IRQ_DISABLED,
+		.handle_irq	= handle_bad_irq,
+		.depth		= 1,
+		.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 	}
 };
 
@@ -183,8 +179,11 @@ int __init early_irq_init(void)
 	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
 					  sizeof(int), GFP_NOWAIT, node);
 
+	irq_desc_init.irq_data.chip = &no_irq_chip;
+
 	for (i = 0; i < legacy_count; i++) {
-		desc[i].irq = i;
+		desc[i].irq_data.irq = i;
+		desc[i].irq_data.chip = &no_irq_chip;
 #ifdef CONFIG_SMP
 		desc[i].node = node;
 #endif
@@ -241,11 +240,10 @@ out_unlock:
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
-		.status = IRQ_DISABLED,
-		.chip = &no_irq_chip,
-		.handle_irq = handle_bad_irq,
-		.depth = 1,
-		.lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
+		.status		= IRQ_DISABLED,
+		.handle_irq	= handle_bad_irq,
+		.depth		= 1,
+		.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
 	}
 };
 
@@ -264,7 +262,8 @@ int __init early_irq_init(void)
 	count = ARRAY_SIZE(irq_desc);
 
 	for (i = 0; i < count; i++) {
-		desc[i].irq = i;
+		desc[i].irq_data.irq = i;
+		desc[i].irq_data.chip = &no_irq_chip;
 		alloc_desc_masks(&desc[i], 0, true);
 		init_desc_masks(&desc[i]);
 		desc[i].kstat_irqs = kstat_irqs_all[i];
-- 
cgit v1.2.3


From 6b8ff3120c758340505dddf08ad685ebb841d5d5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Oct 2010 12:58:38 +0200
Subject: genirq: Convert core code to irq_data

Convert all references in the core code to orq, chip, handler_data,
chip_data, msi_desc, affinity to irq_data.*

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h       | 10 +++---
 kernel/irq/autoprobe.c    | 14 ++++-----
 kernel/irq/chip.c         | 78 +++++++++++++++++++++++------------------------
 kernel/irq/handle.c       | 16 +++++-----
 kernel/irq/internals.h    | 12 ++++----
 kernel/irq/manage.c       | 54 ++++++++++++++++----------------
 kernel/irq/migration.c    | 10 +++---
 kernel/irq/numa_migrate.c |  8 ++---
 kernel/irq/proc.c         |  8 ++---
 kernel/irq/resend.c       |  5 +--
 kernel/irq/spurious.c     |  6 ++--
 11 files changed, 111 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 363c76ff82c8..002351d83c3f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -475,12 +475,12 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 		gfp = GFP_NOWAIT;
 
 #ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
+	if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
 		return false;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
-		free_cpumask_var(desc->affinity);
+		free_cpumask_var(desc->irq_data.affinity);
 		return false;
 	}
 #endif
@@ -490,7 +490,7 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 
 static inline void init_desc_masks(struct irq_desc *desc)
 {
-	cpumask_setall(desc->affinity);
+	cpumask_setall(desc->irq_data.affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
@@ -510,7 +510,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->affinity, old_desc->affinity);
+	cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
@@ -521,7 +521,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 static inline void free_desc_masks(struct irq_desc *old_desc,
 				   struct irq_desc *new_desc)
 {
-	free_cpumask_var(old_desc->affinity);
+	free_cpumask_var(old_desc->irq_data.affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	free_cpumask_var(old_desc->pending_mask);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 2295a31ef110..f9bf9b228033 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -57,9 +57,9 @@ unsigned long probe_irq_on(void)
 			 * Some chips need to know about probing in
 			 * progress:
 			 */
-			if (desc->chip->set_type)
-				desc->chip->set_type(i, IRQ_TYPE_PROBE);
-			desc->chip->startup(i);
+			if (desc->irq_data.chip->set_type)
+				desc->irq_data.chip->set_type(i, IRQ_TYPE_PROBE);
+			desc->irq_data.chip->startup(i);
 		}
 		raw_spin_unlock_irq(&desc->lock);
 	}
@@ -76,7 +76,7 @@ unsigned long probe_irq_on(void)
 		raw_spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
-			if (desc->chip->startup(i))
+			if (desc->irq_data.chip->startup(i))
 				desc->status |= IRQ_PENDING;
 		}
 		raw_spin_unlock_irq(&desc->lock);
@@ -98,7 +98,7 @@ unsigned long probe_irq_on(void)
 			/* It triggered already - consider it spurious. */
 			if (!(status & IRQ_WAITING)) {
 				desc->status = status & ~IRQ_AUTODETECT;
-				desc->chip->shutdown(i);
+				desc->irq_data.chip->shutdown(i);
 			} else
 				if (i < 32)
 					mask |= 1 << i;
@@ -137,7 +137,7 @@ unsigned int probe_irq_mask(unsigned long val)
 				mask |= 1 << i;
 
 			desc->status = status & ~IRQ_AUTODETECT;
-			desc->chip->shutdown(i);
+			desc->irq_data.chip->shutdown(i);
 		}
 		raw_spin_unlock_irq(&desc->lock);
 	}
@@ -181,7 +181,7 @@ int probe_irq_off(unsigned long val)
 				nr_of_irqs++;
 			}
 			desc->status = status & ~IRQ_AUTODETECT;
-			desc->chip->shutdown(i);
+			desc->irq_data.chip->shutdown(i);
 		}
 		raw_spin_unlock_irq(&desc->lock);
 	}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4ea775cc60f0..e0e93ff10afd 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -32,18 +32,18 @@ static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
 	/* Ensure we don't have left over values from a previous use of this irq */
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	desc->status = IRQ_DISABLED;
-	desc->chip = &no_irq_chip;
+	desc->irq_data.chip = &no_irq_chip;
 	desc->handle_irq = handle_bad_irq;
 	desc->depth = 1;
-	desc->msi_desc = NULL;
-	desc->handler_data = NULL;
+	desc->irq_data.msi_desc = NULL;
+	desc->irq_data.handler_data = NULL;
 	if (!keep_chip_data)
-		desc->chip_data = NULL;
+		desc->irq_data.chip_data = NULL;
 	desc->action = NULL;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 #ifdef CONFIG_SMP
-	cpumask_setall(desc->affinity);
+	cpumask_setall(desc->irq_data.affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
@@ -64,7 +64,7 @@ void dynamic_irq_init(unsigned int irq)
  *	dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
  *	@irq:	irq number to initialize
  *
- *	does not set irq_to_desc(irq)->chip_data to NULL
+ *	does not set irq_to_desc(irq)->irq_data.chip_data to NULL
  */
 void dynamic_irq_init_keep_chip_data(unsigned int irq)
 {
@@ -88,12 +88,12 @@ static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
 			irq);
 		return;
 	}
-	desc->msi_desc = NULL;
-	desc->handler_data = NULL;
+	desc->irq_data.msi_desc = NULL;
+	desc->irq_data.handler_data = NULL;
 	if (!keep_chip_data)
-		desc->chip_data = NULL;
+		desc->irq_data.chip_data = NULL;
 	desc->handle_irq = handle_bad_irq;
-	desc->chip = &no_irq_chip;
+	desc->irq_data.chip = &no_irq_chip;
 	desc->name = NULL;
 	clear_kstat_irqs(desc);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -112,7 +112,7 @@ void dynamic_irq_cleanup(unsigned int irq)
  *	dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
  *	@irq:	irq number to initialize
  *
- *	does not set irq_to_desc(irq)->chip_data to NULL
+ *	does not set irq_to_desc(irq)->irq_data.chip_data to NULL
  */
 void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
 {
@@ -140,7 +140,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	irq_chip_set_defaults(chip);
-	desc->chip = chip;
+	desc->irq_data.chip = chip;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return 0;
@@ -193,7 +193,7 @@ int set_irq_data(unsigned int irq, void *data)
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->handler_data = data;
+	desc->irq_data.handler_data = data;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
 }
@@ -218,7 +218,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->msi_desc = entry;
+	desc->irq_data.msi_desc = entry;
 	if (entry)
 		entry->irq = irq;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -243,13 +243,13 @@ int set_irq_chip_data(unsigned int irq, void *data)
 		return -EINVAL;
 	}
 
-	if (!desc->chip) {
+	if (!desc->irq_data.chip) {
 		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
 		return -EINVAL;
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->chip_data = data;
+	desc->irq_data.chip_data = data;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	return 0;
@@ -291,7 +291,7 @@ static void default_enable(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc->chip->unmask(irq);
+	desc->irq_data.chip->unmask(irq);
 	desc->status &= ~IRQ_MASKED;
 }
 
@@ -309,7 +309,7 @@ static unsigned int default_startup(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc->chip->enable(irq);
+	desc->irq_data.chip->enable(irq);
 	return 0;
 }
 
@@ -320,7 +320,7 @@ static void default_shutdown(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc->chip->mask(irq);
+	desc->irq_data.chip->mask(irq);
 	desc->status |= IRQ_MASKED;
 }
 
@@ -350,28 +350,28 @@ void irq_chip_set_defaults(struct irq_chip *chip)
 
 static inline void mask_ack_irq(struct irq_desc *desc, int irq)
 {
-	if (desc->chip->mask_ack)
-		desc->chip->mask_ack(irq);
+	if (desc->irq_data.chip->mask_ack)
+		desc->irq_data.chip->mask_ack(irq);
 	else {
-		desc->chip->mask(irq);
-		if (desc->chip->ack)
-			desc->chip->ack(irq);
+		desc->irq_data.chip->mask(irq);
+		if (desc->irq_data.chip->ack)
+			desc->irq_data.chip->ack(irq);
 	}
 	desc->status |= IRQ_MASKED;
 }
 
 static inline void mask_irq(struct irq_desc *desc, int irq)
 {
-	if (desc->chip->mask) {
-		desc->chip->mask(irq);
+	if (desc->irq_data.chip->mask) {
+		desc->irq_data.chip->mask(irq);
 		desc->status |= IRQ_MASKED;
 	}
 }
 
 static inline void unmask_irq(struct irq_desc *desc, int irq)
 {
-	if (desc->chip->unmask) {
-		desc->chip->unmask(irq);
+	if (desc->irq_data.chip->unmask) {
+		desc->irq_data.chip->unmask(irq);
 		desc->status &= ~IRQ_MASKED;
 	}
 }
@@ -552,7 +552,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	raw_spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
 out:
-	desc->chip->eoi(irq);
+	desc->irq_data.chip->eoi(irq);
 
 	raw_spin_unlock(&desc->lock);
 }
@@ -594,8 +594,8 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
-	if (desc->chip->ack)
-		desc->chip->ack(irq);
+	if (desc->irq_data.chip->ack)
+		desc->irq_data.chip->ack(irq);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -648,15 +648,15 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 
 	kstat_incr_irqs_this_cpu(irq, desc);
 
-	if (desc->chip->ack)
-		desc->chip->ack(irq);
+	if (desc->irq_data.chip->ack)
+		desc->irq_data.chip->ack(irq);
 
 	action_ret = handle_IRQ_event(irq, desc->action);
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi)
-		desc->chip->eoi(irq);
+	if (desc->irq_data.chip->eoi)
+		desc->irq_data.chip->eoi(irq);
 }
 
 void
@@ -674,7 +674,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	if (!handle)
 		handle = handle_bad_irq;
-	else if (desc->chip == &no_irq_chip) {
+	else if (desc->irq_data.chip == &no_irq_chip) {
 		printk(KERN_WARNING "Trying to install %sinterrupt handler "
 		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
 		/*
@@ -684,7 +684,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		 * prevent us to setup the interrupt at all. Switch it to
 		 * dummy_irq_chip for easy transition.
 		 */
-		desc->chip = &dummy_irq_chip;
+		desc->irq_data.chip = &dummy_irq_chip;
 	}
 
 	chip_bus_lock(irq, desc);
@@ -692,7 +692,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip)
+		if (desc->irq_data.chip != &no_irq_chip)
 			mask_ack_irq(desc, irq);
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
@@ -704,7 +704,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->status &= ~IRQ_DISABLED;
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
 		desc->depth = 0;
-		desc->chip->startup(irq);
+		desc->irq_data.chip->startup(irq);
 	}
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	chip_bus_sync_unlock(irq, desc);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 099d4fc368c3..fc27d76e83ef 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -105,7 +105,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 	raw_spin_lock_init(&desc->lock);
 	desc->irq_data.irq = irq;
 #ifdef CONFIG_SMP
-	desc->node = node;
+	desc->irq_data.node = node;
 #endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_kstat_irqs(desc, node, nr_cpu_ids);
@@ -185,7 +185,7 @@ int __init early_irq_init(void)
 		desc[i].irq_data.irq = i;
 		desc[i].irq_data.chip = &no_irq_chip;
 #ifdef CONFIG_SMP
-		desc[i].node = node;
+		desc[i].irq_data.node = node;
 #endif
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -456,20 +456,20 @@ unsigned int __do_IRQ(unsigned int irq)
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack)
-			desc->chip->ack(irq);
+		if (desc->irq_data.chip->ack)
+			desc->irq_data.chip->ack(irq);
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
 				note_interrupt(irq, desc, action_ret);
 		}
-		desc->chip->end(irq);
+		desc->irq_data.chip->end(irq);
 		return 1;
 	}
 
 	raw_spin_lock(&desc->lock);
-	if (desc->chip->ack)
-		desc->chip->ack(irq);
+	if (desc->irq_data.chip->ack)
+		desc->irq_data.chip->ack(irq);
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
@@ -529,7 +529,7 @@ out:
 	 * The ->end() handler has to deal with interrupts which got
 	 * disabled while the handler was running.
 	 */
-	desc->chip->end(irq);
+	desc->irq_data.chip->end(irq);
 	raw_spin_unlock(&desc->lock);
 
 	return 1;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index c63f3bc88f0b..a805a00cfd28 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -43,14 +43,14 @@ extern void irq_set_thread_affinity(struct irq_desc *desc);
 /* Inline functions for support of irq chips on slow busses */
 static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc)
 {
-	if (unlikely(desc->chip->bus_lock))
-		desc->chip->bus_lock(irq);
+	if (unlikely(desc->irq_data.chip->bus_lock))
+		desc->irq_data.chip->bus_lock(irq);
 }
 
 static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc)
 {
-	if (unlikely(desc->chip->bus_sync_unlock))
-		desc->chip->bus_sync_unlock(irq);
+	if (unlikely(desc->irq_data.chip->bus_sync_unlock))
+		desc->irq_data.chip->bus_sync_unlock(irq);
 }
 
 /*
@@ -67,8 +67,8 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 		irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
 	printk("->handle_irq():  %p, ", desc->handle_irq);
 	print_symbol("%s\n", (unsigned long)desc->handle_irq);
-	printk("->chip(): %p, ", desc->chip);
-	print_symbol("%s\n", (unsigned long)desc->chip);
+	printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
+	print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
 	printk("->action(): %p\n", desc->action);
 	if (desc->action) {
 		printk("->action->handler(): %p, ", desc->action->handler);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c3003e9d91a3..4dfb19521d9f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -73,8 +73,8 @@ int irq_can_set_affinity(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
-	    !desc->chip->set_affinity)
+	if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip ||
+	    !desc->irq_data.chip->set_affinity)
 		return 0;
 
 	return 1;
@@ -111,15 +111,15 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (!desc->chip->set_affinity)
+	if (!desc->irq_data.chip->set_affinity)
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT) {
-		if (!desc->chip->set_affinity(irq, cpumask)) {
-			cpumask_copy(desc->affinity, cpumask);
+		if (!desc->irq_data.chip->set_affinity(irq, cpumask)) {
+			cpumask_copy(desc->irq_data.affinity, cpumask);
 			irq_set_thread_affinity(desc);
 		}
 	}
@@ -128,8 +128,8 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 		cpumask_copy(desc->pending_mask, cpumask);
 	}
 #else
-	if (!desc->chip->set_affinity(irq, cpumask)) {
-		cpumask_copy(desc->affinity, cpumask);
+	if (!desc->irq_data.chip->set_affinity(irq, cpumask)) {
+		cpumask_copy(desc->irq_data.affinity, cpumask);
 		irq_set_thread_affinity(desc);
 	}
 #endif
@@ -168,16 +168,16 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc)
 	 * one of the targets is online.
 	 */
 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-		if (cpumask_any_and(desc->affinity, cpu_online_mask)
+		if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask)
 		    < nr_cpu_ids)
 			goto set_affinity;
 		else
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
+	cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity);
 set_affinity:
-	desc->chip->set_affinity(irq, desc->affinity);
+	desc->irq_data.chip->set_affinity(irq, desc->irq_data.affinity);
 
 	return 0;
 }
@@ -223,7 +223,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
 
 	if (!desc->depth++) {
 		desc->status |= IRQ_DISABLED;
-		desc->chip->disable(irq);
+		desc->irq_data.chip->disable(irq);
 	}
 }
 
@@ -313,7 +313,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
  *	IRQ line is re-enabled.
  *
  *	This function may be called from IRQ context only when
- *	desc->chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
+ *	desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
  */
 void enable_irq(unsigned int irq)
 {
@@ -336,8 +336,8 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
 	struct irq_desc *desc = irq_to_desc(irq);
 	int ret = -ENXIO;
 
-	if (desc->chip->set_wake)
-		ret = desc->chip->set_wake(irq, on);
+	if (desc->irq_data.chip->set_wake)
+		ret = desc->irq_data.chip->set_wake(irq, on);
 
 	return ret;
 }
@@ -432,7 +432,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags)
 {
 	int ret;
-	struct irq_chip *chip = desc->chip;
+	struct irq_chip *chip = desc->irq_data.chip;
 
 	if (!chip || !chip->set_type) {
 		/*
@@ -457,8 +457,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
 		desc->status |= flags;
 
-		if (chip != desc->chip)
-			irq_chip_set_defaults(desc->chip);
+		if (chip != desc->irq_data.chip)
+			irq_chip_set_defaults(desc->irq_data.chip);
 	}
 
 	return ret;
@@ -528,7 +528,7 @@ again:
 
 	if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
 		desc->status &= ~IRQ_MASKED;
-		desc->chip->unmask(irq);
+		desc->irq_data.chip->unmask(irq);
 	}
 	raw_spin_unlock_irq(&desc->lock);
 	chip_bus_sync_unlock(irq, desc);
@@ -556,7 +556,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
 	}
 
 	raw_spin_lock_irq(&desc->lock);
-	cpumask_copy(mask, desc->affinity);
+	cpumask_copy(mask, desc->irq_data.affinity);
 	raw_spin_unlock_irq(&desc->lock);
 
 	set_cpus_allowed_ptr(current, mask);
@@ -657,7 +657,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (!desc)
 		return -EINVAL;
 
-	if (desc->chip == &no_irq_chip)
+	if (desc->irq_data.chip == &no_irq_chip)
 		return -ENOSYS;
 	/*
 	 * Some drivers like serial.c use request_irq() heavily,
@@ -752,7 +752,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	}
 
 	if (!shared) {
-		irq_chip_set_defaults(desc->chip);
+		irq_chip_set_defaults(desc->irq_data.chip);
 
 		init_waitqueue_head(&desc->wait_for_threads);
 
@@ -779,7 +779,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
-			desc->chip->startup(irq);
+			desc->irq_data.chip->startup(irq);
 		} else
 			/* Undo nested disables: */
 			desc->depth = 1;
@@ -912,17 +912,17 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 
 	/* Currently used only by UML, might disappear one day: */
 #ifdef CONFIG_IRQ_RELEASE_METHOD
-	if (desc->chip->release)
-		desc->chip->release(irq, dev_id);
+	if (desc->irq_data.chip->release)
+		desc->irq_data.chip->release(irq, dev_id);
 #endif
 
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
 		desc->status |= IRQ_DISABLED;
-		if (desc->chip->shutdown)
-			desc->chip->shutdown(irq);
+		if (desc->irq_data.chip->shutdown)
+			desc->irq_data.chip->shutdown(irq);
 		else
-			desc->chip->disable(irq);
+			desc->irq_data.chip->disable(irq);
 	}
 
 #ifdef CONFIG_SMP
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 241962280836..f923c37e651a 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -24,7 +24,7 @@ void move_masked_irq(int irq)
 	if (unlikely(cpumask_empty(desc->pending_mask)))
 		return;
 
-	if (!desc->chip->set_affinity)
+	if (!desc->irq_data.chip->set_affinity)
 		return;
 
 	assert_raw_spin_locked(&desc->lock);
@@ -43,8 +43,8 @@ void move_masked_irq(int irq)
 	 */
 	if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
 		   < nr_cpu_ids))
-		if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
-			cpumask_copy(desc->affinity, desc->pending_mask);
+		if (!desc->irq_data.chip->set_affinity(irq, desc->pending_mask)) {
+			cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
 			irq_set_thread_affinity(desc);
 		}
 
@@ -61,8 +61,8 @@ void move_native_irq(int irq)
 	if (unlikely(desc->status & IRQ_DISABLED))
 		return;
 
-	desc->chip->mask(irq);
+	desc->irq_data.chip->mask(irq);
 	move_masked_irq(irq);
-	desc->chip->unmask(irq);
+	desc->irq_data.chip->unmask(irq);
 }
 
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 65d3845665ac..e7f1f16402c1 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -44,7 +44,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
 		return false;
 	}
 	raw_spin_lock_init(&desc->lock);
-	desc->node = node;
+	desc->irq_data.node = node;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
 	init_copy_desc_masks(old_desc, desc);
@@ -66,7 +66,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 	unsigned int irq;
 	unsigned long flags;
 
-	irq = old_desc->irq;
+	irq = old_desc->irq_data.irq;
 
 	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 
@@ -109,10 +109,10 @@ out_unlock:
 struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
 	/* those static or target node is -1, do not move them */
-	if (desc->irq < NR_IRQS_LEGACY || node == -1)
+	if (desc->irq_data.irq < NR_IRQS_LEGACY || node == -1)
 		return desc;
 
-	if (desc->node != node)
+	if (desc->irq_data.node != node)
 		desc = __real_move_irq_desc(desc, node);
 
 	return desc;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 09a2ee540bd2..9b0da94b5b2b 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -21,7 +21,7 @@ static struct proc_dir_entry *root_irq_dir;
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	const struct cpumask *mask = desc->affinity;
+	const struct cpumask *mask = desc->irq_data.affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PENDING)
@@ -65,7 +65,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	cpumask_var_t new_value;
 	int err;
 
-	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
+	if (!irq_to_desc(irq)->irq_data.chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
@@ -185,7 +185,7 @@ static int irq_node_proc_show(struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long) m->private);
 
-	seq_printf(m, "%d\n", desc->node);
+	seq_printf(m, "%d\n", desc->irq_data.node);
 	return 0;
 }
 
@@ -269,7 +269,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
 	char name [MAX_NAMELEN];
 
-	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
+	if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir)
 		return;
 
 	memset(name, 0, MAX_NAMELEN);
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 090c3763f3a2..47c56a097928 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -60,7 +60,7 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
 	/*
 	 * Make sure the interrupt is enabled, before resending it:
 	 */
-	desc->chip->enable(irq);
+	desc->irq_data.chip->enable(irq);
 
 	/*
 	 * We do not resend level type interrupts. Level type
@@ -70,7 +70,8 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
 	if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
 		desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
 
-		if (!desc->chip->retrigger || !desc->chip->retrigger(irq)) {
+		if (!desc->irq_data.chip->retrigger ||
+		    !desc->irq_data.chip->retrigger(irq)) {
 #ifdef CONFIG_HARDIRQS_SW_RESEND
 			/* Set it pending and activate the softirq: */
 			set_bit(irq, irqs_resend);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 89fb90ae534f..36c2c9289e2b 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -78,8 +78,8 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 	 * If we did actual work for the real IRQ line we must let the
 	 * IRQ controller clean up too
 	 */
-	if (work && desc->chip && desc->chip->end)
-		desc->chip->end(irq);
+	if (work && desc->irq_data.chip && desc->irq_data.chip->end)
+		desc->irq_data.chip->end(irq);
 	raw_spin_unlock(&desc->lock);
 
 	return ok;
@@ -254,7 +254,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
 		desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
 		desc->depth++;
-		desc->chip->disable(irq);
+		desc->irq_data.chip->disable(irq);
 
 		mod_timer(&poll_spurious_irq_timer,
 			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
-- 
cgit v1.2.3


From f8822657e799b02c55556c99a601261e207a299d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 27 Sep 2010 12:44:32 +0000
Subject: genirq: Provide advanced irq chip functions

The low level irq chip functions want access to irq_desc->irq_data.
Provide new functions which hand down irq_data instead of the irq
number so these functions avoid to call irq_to_desc() which is a radix
tree lookup in case of sparse irq.

This provides all the old functions except one: end(). end() is a
relict of __do_IRQ() and will just go away with the __do_IRQ() code.

The replacement for set_affinity() has an extra argument "bool
force". The reason for this is to notify the low level code, that the
move has to be done right away and cannot be delayed until the next
interrupt happens. That's necessary to handle the irq fixup on cpu
unplug in the generic code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20100927121841.742126604@linutronix.de>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 66 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 002351d83c3f..0c83cbd2df4e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -118,23 +118,38 @@ struct irq_data {
  * struct irq_chip - hardware interrupt chip descriptor
  *
  * @name:		name for /proc/interrupts
- * @startup:		start up the interrupt (defaults to ->enable if NULL)
- * @shutdown:		shut down the interrupt (defaults to ->disable if NULL)
- * @enable:		enable the interrupt (defaults to chip->unmask if NULL)
- * @disable:		disable the interrupt
- * @ack:		start of a new interrupt
- * @mask:		mask an interrupt source
- * @mask_ack:		ack and mask an interrupt source
- * @unmask:		unmask an interrupt source
- * @eoi:		end of interrupt - chip level
- * @end:		end of interrupt - flow level
- * @set_affinity:	set the CPU affinity on SMP machines
- * @retrigger:		resend an IRQ to the CPU
- * @set_type:		set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
- * @set_wake:		enable/disable power-management wake-on of an IRQ
+ * @startup:		deprecated, replaced by irq_startup
+ * @shutdown:		deprecated, replaced by irq_shutdown
+ * @enable:		deprecated, replaced by irq_enable
+ * @disable:		deprecated, replaced by irq_disable
+ * @ack:		deprecated, replaced by irq_ack
+ * @mask:		deprecated, replaced by irq_mask
+ * @mask_ack:		deprecated, replaced by irq_mask_ack
+ * @unmask:		deprecated, replaced by irq_unmask
+ * @eoi:		deprecated, replaced by irq_eoi
+ * @end:		deprecated, will go away with __do_IRQ()
+ * @set_affinity:	deprecated, replaced by irq_set_affinity
+ * @retrigger:		deprecated, replaced by irq_retrigger
+ * @set_type:		deprecated, replaced by irq_set_type
+ * @set_wake:		deprecated, replaced by irq_wake
+ * @bus_lock:		deprecated, replaced by irq_bus_lock
+ * @bus_sync_unlock:	deprecated, replaced by irq_bus_sync_unlock
  *
- * @bus_lock:		function to lock access to slow bus (i2c) chips
- * @bus_sync_unlock:	function to sync and unlock slow bus (i2c) chips
+ * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
+ * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
+ * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
+ * @irq_disable:	disable the interrupt
+ * @irq_ack:		start of a new interrupt
+ * @irq_mask:		mask an interrupt source
+ * @irq_mask_ack:	ack and mask an interrupt source
+ * @irq_unmask:		unmask an interrupt source
+ * @irq_eoi:		end of interrupt
+ * @irq_set_affinity:	set the CPU affinity on SMP machines
+ * @irq_retrigger:	resend an IRQ to the CPU
+ * @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
+ * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
+ * @irq_bus_lock:	function to lock access to slow bus (i2c) chips
+ * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
  *
  * @release:		release function solely used by UML
  */
@@ -161,6 +176,25 @@ struct irq_chip {
 	void		(*bus_lock)(unsigned int irq);
 	void		(*bus_sync_unlock)(unsigned int irq);
 
+	unsigned int	(*irq_startup)(struct irq_data *data);
+	void		(*irq_shutdown)(struct irq_data *data);
+	void		(*irq_enable)(struct irq_data *data);
+	void		(*irq_disable)(struct irq_data *data);
+
+	void		(*irq_ack)(struct irq_data *data);
+	void		(*irq_mask)(struct irq_data *data);
+	void		(*irq_mask_ack)(struct irq_data *data);
+	void		(*irq_unmask)(struct irq_data *data);
+	void		(*irq_eoi)(struct irq_data *data);
+
+	int		(*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
+	int		(*irq_retrigger)(struct irq_data *data);
+	int		(*irq_set_type)(struct irq_data *data, unsigned int flow_type);
+	int		(*irq_set_wake)(struct irq_data *data, unsigned int on);
+
+	void		(*irq_bus_lock)(struct irq_data *data);
+	void		(*irq_bus_sync_unlock)(struct irq_data *data);
+
 	/* Currently used only by UML, might disappear one day.*/
 #ifdef CONFIG_IRQ_RELEASE_METHOD
 	void		(*release)(unsigned int irq, void *dev_id);
-- 
cgit v1.2.3


From bd151412263a67b5321e9dd1d5b4bf6d96fdebf3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Oct 2010 15:17:14 +0200
Subject: genirq: Provide config option to disable deprecated code

This option covers now the old chip functions and the irq_desc data
fields which are moving to struct irq_data. More stuff will follow.

Pretty handy for testing a conversion, whether something broke or not.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h    |  8 +++++++-
 kernel/irq/Kconfig     |  4 ++++
 kernel/irq/chip.c      |  8 +++++++-
 kernel/irq/handle.c    |  9 +++++++--
 kernel/irq/internals.h | 10 ++++++++++
 kernel/irq/spurious.c  |  6 ++++--
 6 files changed, 39 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0c83cbd2df4e..82ed8231394a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -155,6 +155,7 @@ struct irq_data {
  */
 struct irq_chip {
 	const char	*name;
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 	unsigned int	(*startup)(unsigned int irq);
 	void		(*shutdown)(unsigned int irq);
 	void		(*enable)(unsigned int irq);
@@ -175,7 +176,7 @@ struct irq_chip {
 
 	void		(*bus_lock)(unsigned int irq);
 	void		(*bus_sync_unlock)(unsigned int irq);
-
+#endif
 	unsigned int	(*irq_startup)(struct irq_data *data);
 	void		(*irq_shutdown)(struct irq_data *data);
 	void		(*irq_enable)(struct irq_data *data);
@@ -225,6 +226,9 @@ struct irq_2_iommu;
  */
 struct irq_desc {
 
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
+	struct irq_data		irq_data;
+#else
 	/*
 	 * This union will go away, once we fixed the direct access to
 	 * irq_desc all over the place. The direct fields are a 1:1
@@ -247,6 +251,8 @@ struct irq_desc {
 #endif
 		};
 	};
+#endif
+
 	struct timer_rand_state *timer_rand_state;
 	unsigned int            *kstat_irqs;
 	irq_flow_handler_t	handle_irq;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index e0fc6cd78aa0..a42c0191d71a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -12,6 +12,10 @@ config GENERIC_HARDIRQS
 config GENERIC_HARDIRQS_NO__DO_IRQ
        def_bool y
 
+# Select this to disable the deprecated stuff
+config GENERIC_HARDIRQS_NO_DEPRECATED
+       def_bool n
+
 # Options selectable by the architecture code
 config HAVE_SPARSE_IRQ
        def_bool n
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f2c4d28c508a..323547983f15 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -324,6 +324,7 @@ static void default_shutdown(struct irq_data *data)
 	desc->status |= IRQ_MASKED;
 }
 
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 /* Temporary migration helpers */
 static void compat_irq_mask(struct irq_data *data)
 {
@@ -400,12 +401,14 @@ static void compat_bus_sync_unlock(struct irq_data *data)
 {
 	data->chip->bus_sync_unlock(data->irq);
 }
+#endif
 
 /*
  * Fixup enable/disable function pointers
  */
 void irq_chip_set_defaults(struct irq_chip *chip)
 {
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 	/*
 	 * Compat fixup functions need to be before we set the
 	 * defaults for enable/disable/startup/shutdown
@@ -418,7 +421,7 @@ void irq_chip_set_defaults(struct irq_chip *chip)
 		chip->irq_shutdown = compat_irq_shutdown;
 	if (chip->startup)
 		chip->irq_startup = compat_irq_startup;
-
+#endif
 	/*
 	 * The real defaults
 	 */
@@ -437,6 +440,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
 	if (!chip->irq_shutdown)
 		chip->irq_shutdown = chip->irq_disable != default_disable ?
 			chip->irq_disable : default_shutdown;
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 	if (!chip->end)
 		chip->end = dummy_irq_chip.end;
 
@@ -465,6 +470,7 @@ void irq_chip_set_defaults(struct irq_chip *chip)
 		chip->irq_set_wake = compat_irq_set_wake;
 	if (chip->retrigger)
 		chip->irq_retrigger = compat_irq_retrigger;
+#endif
 }
 
 static inline void mask_ack_irq(struct irq_desc *desc)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 8d0697f892a2..3fcef37154a1 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -309,7 +309,12 @@ static unsigned int noop_ret(struct irq_data *data)
 	return 0;
 }
 
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 static void compat_noop(unsigned int irq) { }
+#define END_INIT .end = compat_noop
+#else
+#define END_INIT
+#endif
 
 /*
  * Generic no controller implementation
@@ -321,7 +326,7 @@ struct irq_chip no_irq_chip = {
 	.irq_enable	= noop,
 	.irq_disable	= noop,
 	.irq_ack	= ack_bad,
-	.end		= compat_noop,
+	END_INIT
 };
 
 /*
@@ -337,7 +342,7 @@ struct irq_chip dummy_irq_chip = {
 	.irq_ack	= noop,
 	.irq_mask	= noop,
 	.irq_unmask	= noop,
-	.end		= compat_noop,
+	END_INIT
 };
 
 /*
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ecafbfee5b12..b905f0ab1bb2 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -42,6 +42,16 @@ extern int irq_select_affinity_usr(unsigned int irq);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
 
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
+static inline void irq_end(unsigned int irq, struct irq_desc *desc)
+{
+	if (desc->irq_data.chip && desc->irq_data.chip->end)
+		desc->irq_data.chip->end(irq);
+}
+#else
+static inline void irq_end(unsigned int irq, struct irq_desc *desc) { }
+#endif
+
 /* Inline functions for support of irq chips on slow busses */
 static inline void chip_bus_lock(struct irq_desc *desc)
 {
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 9ee704d3a23c..3089d3b9d5f3 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -14,6 +14,8 @@
 #include <linux/moduleparam.h>
 #include <linux/timer.h>
 
+#include "internals.h"
+
 static int irqfixup __read_mostly;
 
 #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
@@ -78,8 +80,8 @@ static int try_one_irq(int irq, struct irq_desc *desc)
 	 * If we did actual work for the real IRQ line we must let the
 	 * IRQ controller clean up too
 	 */
-	if (work && desc->irq_data.chip && desc->irq_data.chip->end)
-		desc->irq_data.chip->end(irq);
+	if (work)
+		irq_end(irq, desc);
 	raw_spin_unlock(&desc->lock);
 
 	return ok;
-- 
cgit v1.2.3


From 001985b2c0cfad48e1dec8e30f4d432eac240dd2 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:51 +0900
Subject: netfilter: nf_conntrack_sip: Add callid parser

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/linux/netfilter/nf_conntrack_sip.h |  1 +
 net/netfilter/nf_conntrack_sip.c           | 39 ++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ff8cfbcf3b81..0ce91d56a5f2 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -89,6 +89,7 @@ enum sip_header_types {
 	SIP_HDR_VIA_TCP,
 	SIP_HDR_EXPIRES,
 	SIP_HDR_CONTENT_LENGTH,
+	SIP_HDR_CALL_ID,
 };
 
 enum sdp_header_types {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2fd1ea2c1bb3..715ce54d2fc4 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -130,6 +130,44 @@ static int digits_len(const struct nf_conn *ct, const char *dptr,
 	return len;
 }
 
+static int iswordc(const char c)
+{
+	if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
+	    (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
+	    c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
+	    c == '{' || c == '}' || c == '~')
+		return 1;
+	return 0;
+}
+
+static int word_len(const char *dptr, const char *limit)
+{
+	int len = 0;
+	while (dptr < limit && iswordc(*dptr)) {
+		dptr++;
+		len++;
+	}
+	return len;
+}
+
+static int callid_len(const struct nf_conn *ct, const char *dptr,
+		      const char *limit, int *shift)
+{
+	int len, domain_len;
+
+	len = word_len(dptr, limit);
+	dptr += len;
+	if (!len || dptr == limit || *dptr != '@')
+		return len;
+	dptr++;
+	len++;
+
+	domain_len = word_len(dptr, limit);
+	if (!domain_len)
+		return 0;
+	return len + domain_len;
+}
+
 /* get media type + port length */
 static int media_len(const struct nf_conn *ct, const char *dptr,
 		     const char *limit, int *shift)
@@ -299,6 +337,7 @@ static const struct sip_header ct_sip_hdrs[] = {
 	[SIP_HDR_VIA_TCP]		= SIP_HDR("Via", "v", "TCP ", epaddr_len),
 	[SIP_HDR_EXPIRES]		= SIP_HDR("Expires", NULL, NULL, digits_len),
 	[SIP_HDR_CONTENT_LENGTH]	= SIP_HDR("Content-Length", "l", NULL, digits_len),
+	[SIP_HDR_CALL_ID]		= SIP_HDR("Call-Id", "i", NULL, callid_len),
 };
 
 static const char *sip_follow_continuation(const char *dptr, const char *limit)
-- 
cgit v1.2.3


From f11017ec2d1859c661f4e2b12c4a8d250e1f47cf Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:52 +0900
Subject: IPVS: Add struct ip_vs_conn_param

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h                     |  44 +++++---
 net/netfilter/ipvs/ip_vs_conn.c         | 171 ++++++++++++++++----------------
 net/netfilter/ipvs/ip_vs_core.c         |  64 ++++++------
 net/netfilter/ipvs/ip_vs_ftp.c          |  43 ++++----
 net/netfilter/ipvs/ip_vs_nfct.c         |  12 +--
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c |  47 ++++-----
 net/netfilter/ipvs/ip_vs_sync.c         |  33 +++---
 7 files changed, 211 insertions(+), 203 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3915a4f4cd30..d4da774eca75 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -357,6 +357,15 @@ struct ip_vs_protocol {
 
 extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
 
+struct ip_vs_conn_param {
+	const union nf_inet_addr	*caddr;
+	const union nf_inet_addr	*vaddr;
+	__be16				cport;
+	__be16				vport;
+	__u16				protocol;
+	u16				af;
+};
+
 /*
  *	IP_VS structure allocated for each dynamically scheduled connection
  */
@@ -626,13 +635,23 @@ enum {
 	IP_VS_DIR_LAST,
 };
 
-extern struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+static inline void ip_vs_conn_fill_param(int af, int protocol,
+					 const union nf_inet_addr *caddr,
+					 __be16 cport,
+					 const union nf_inet_addr *vaddr,
+					 __be16 vport,
+					 struct ip_vs_conn_param *p)
+{
+	p->af = af;
+	p->protocol = protocol;
+	p->caddr = caddr;
+	p->cport = cport;
+	p->vaddr = vaddr;
+	p->vport = vport;
+}
 
-extern struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
+struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 					    struct ip_vs_protocol *pp,
@@ -640,9 +659,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 					    unsigned int proto_off,
 					    int inverse);
 
-extern struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
 					     struct ip_vs_protocol *pp,
@@ -658,11 +675,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 extern void ip_vs_conn_put(struct ip_vs_conn *cp);
 extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 
-extern struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
-	       const union nf_inet_addr *vaddr, __be16 vport,
-	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest);
+struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
+				  const union nf_inet_addr *daddr,
+				  __be16 dport, unsigned flags,
+				  struct ip_vs_dest *dest);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
 extern const char * ip_vs_state_name(__u16 proto, int state);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a970d9691496..deeb906a797b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -218,27 +218,26 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 /*
  *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
  *  Called for pkts coming from OUTside-to-INside.
- *	s_addr, s_port: pkt source address (foreign host)
- *	d_addr, d_port: pkt dest address (load balancer)
+ *	p->caddr, p->cport: pkt source address (foreign host)
+ *	p->vaddr, p->vport: pkt dest address (load balancer)
  */
-static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+static inline struct ip_vs_conn *
+__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
-		    s_port == cp->cport && d_port == cp->vport &&
-		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-		    protocol == cp->protocol) {
+		if (cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
+		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
+		    p->cport == cp->cport && p->vport == cp->vport &&
+		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
+		    p->protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ct_read_unlock(hash);
@@ -251,71 +250,82 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 	return NULL;
 }
 
-struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 {
 	struct ip_vs_conn *cp;
 
-	cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
-	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-		cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
-					 d_port);
+	cp = __ip_vs_conn_in_get(p);
+	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
+		struct ip_vs_conn_param cport_zero_p = *p;
+		cport_zero_p.cport = 0;
+		cp = __ip_vs_conn_in_get(&cport_zero_p);
+	}
 
 	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
 		      cp ? "hit" : "not hit");
 
 	return cp;
 }
 
+static int
+ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
+			    const struct ip_vs_iphdr *iph,
+			    unsigned int proto_off, int inverse,
+			    struct ip_vs_conn_param *p)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return 1;
+
+	if (likely(!inverse))
+		ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
+				      &iph->daddr, pptr[1], p);
+	else
+		ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
+				      &iph->saddr, pptr[0], p);
+	return 0;
+}
+
 struct ip_vs_conn *
 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 			struct ip_vs_protocol *pp,
 			const struct ip_vs_iphdr *iph,
 			unsigned int proto_off, int inverse)
 {
-	__be16 _ports[2], *pptr;
+	struct ip_vs_conn_param p;
 
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
+	if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
 		return NULL;
 
-	if (likely(!inverse))
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->saddr, pptr[0],
-					 &iph->daddr, pptr[1]);
-	else
-		return ip_vs_conn_in_get(af, iph->protocol,
-					 &iph->daddr, pptr[1],
-					 &iph->saddr, pptr[0]);
+	return ip_vs_conn_in_get(&p);
 }
 EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
 
 /* Get reference to connection template */
-struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
+	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+		if (cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    /* protocol should only be IPPROTO_IP if
-		     * d_addr is a fwmark */
-		    ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af,
-		                     d_addr, &cp->vaddr) &&
-		    s_port == cp->cport && d_port == cp->vport &&
+		     * p->vaddr is a fwmark */
+		    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
+				     p->af, p->vaddr, &cp->vaddr) &&
+		    p->cport == cp->cport && p->vport == cp->vport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-		    protocol == cp->protocol) {
+		    p->protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			goto out;
@@ -327,23 +337,19 @@ struct ip_vs_conn *ip_vs_ct_in_get
 	ct_read_unlock(hash);
 
 	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
 		      cp ? "hit" : "not hit");
 
 	return cp;
 }
 
-/*
- *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
- *  Called for pkts coming from inside-to-OUTside.
- *	s_addr, s_port: pkt source address (inside host)
- *	d_addr, d_port: pkt dest address (foreign host)
- */
-struct ip_vs_conn *ip_vs_conn_out_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port)
+/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
+ * Called for pkts coming from inside-to-OUTside.
+ *	p->caddr, p->cport: pkt source address (inside host)
+ *	p->vaddr, p->vport: pkt dest address (foreign host) */
+struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp, *ret=NULL;
@@ -351,16 +357,16 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	/*
 	 *	Check for "full" addressed entries
 	 */
-	hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
+	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->vaddr, p->vport);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-		if (cp->af == af &&
-		    ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
-		    d_port == cp->cport && s_port == cp->dport &&
-		    protocol == cp->protocol) {
+		if (cp->af == p->af &&
+		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
+		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
+		    p->vport == cp->cport && p->cport == cp->dport &&
+		    p->protocol == cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ret = cp;
@@ -371,9 +377,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
 	ct_read_unlock(hash);
 
 	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
-		      ip_vs_proto_name(protocol),
-		      IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
-		      IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+		      ip_vs_proto_name(p->protocol),
+		      IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
+		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
 		      ret ? "hit" : "not hit");
 
 	return ret;
@@ -385,20 +391,12 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
 			 const struct ip_vs_iphdr *iph,
 			 unsigned int proto_off, int inverse)
 {
-	__be16 _ports[2], *pptr;
+	struct ip_vs_conn_param p;
 
-	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-	if (pptr == NULL)
+	if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
 		return NULL;
 
-	if (likely(!inverse))
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->saddr, pptr[0],
-					  &iph->daddr, pptr[1]);
-	else
-		return ip_vs_conn_out_get(af, iph->protocol,
-					  &iph->daddr, pptr[1],
-					  &iph->saddr, pptr[0]);
+	return ip_vs_conn_out_get(&p);
 }
 EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 
@@ -758,13 +756,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
  *	Create a new connection entry and hash it into the ip_vs_conn_tab
  */
 struct ip_vs_conn *
-ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
-	       const union nf_inet_addr *vaddr, __be16 vport,
+ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
 	       struct ip_vs_dest *dest)
 {
 	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+	struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
 
 	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
@@ -774,14 +771,14 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 
 	INIT_LIST_HEAD(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
-	cp->af		   = af;
-	cp->protocol	   = proto;
-	ip_vs_addr_copy(af, &cp->caddr, caddr);
-	cp->cport	   = cport;
-	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
-	cp->vport	   = vport;
+	cp->af		   = p->af;
+	cp->protocol	   = p->protocol;
+	ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
+	cp->cport	   = p->cport;
+	ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
+	cp->vport	   = p->vport;
 	/* proto should only be IPPROTO_IP if d_addr is a fwmark */
-	ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af,
+	ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
 			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
@@ -810,7 +807,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 
 	/* Bind its packet transmitter */
 #ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
+	if (p->af == AF_INET6)
 		ip_vs_bind_xmit_v6(cp);
 	else
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 70a5cacf86d5..87602a62458e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -193,14 +193,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
-	int protocol = iph.protocol;
 	__be16 dport = 0;		/* destination port to forward */
-	__be16 vport = 0;		/* virtual service port */
 	unsigned int flags;
+	struct ip_vs_conn_param param;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
-	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
-	const union nf_inet_addr *vaddr = &iph.daddr;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
@@ -232,6 +229,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	 * is created for other persistent services.
 	 */
 	{
+		int protocol = iph.protocol;
+		const union nf_inet_addr *vaddr = &iph.daddr;
+		const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
+		__be16 vport = 0;
+
 		if (ports[1] == svc->port) {
 			/* non-FTP template:
 			 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
@@ -253,11 +255,12 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				vaddr = &fwmark;
 			}
 		}
+		ip_vs_conn_fill_param(svc->af, protocol, &snet, 0,
+				      vaddr, vport, &param);
 	}
 
 	/* Check if a template already exists */
-	ct = ip_vs_ct_in_get(svc->af, protocol, &snet, 0, vaddr, vport);
-
+	ct = ip_vs_ct_in_get(&param);
 	if (!ct || !ip_vs_check_template(ct)) {
 		/* No template found or the dest of the connection
 		 * template is not available.
@@ -272,8 +275,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			dport = dest->port;
 
 		/* Create a template */
-		ct = ip_vs_conn_new(svc->af, protocol, &snet, 0,vaddr, vport,
-				    &dest->addr, dport,
+		ct = ip_vs_conn_new(&param, &dest->addr, dport,
 				    IP_VS_CONN_F_TEMPLATE, dest);
 		if (ct == NULL)
 			return NULL;
@@ -294,12 +296,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
-			    &iph.saddr, ports[0],
-			    &iph.daddr, ports[1],
-			    &dest->addr, dport,
-			    flags,
-			    dest);
+	ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
+			      &iph.daddr, ports[1], &param);
+	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
 		return NULL;
@@ -366,14 +365,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/*
 	 *    Create a connection entry.
 	 */
-	cp = ip_vs_conn_new(svc->af, iph.protocol,
-			    &iph.saddr, pptr[0],
-			    &iph.daddr, pptr[1],
-			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    flags,
-			    dest);
-	if (cp == NULL)
-		return NULL;
+	{
+		struct ip_vs_conn_param p;
+		ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
+				      pptr[0], &iph.daddr, pptr[1], &p);
+		cp = ip_vs_conn_new(&p, &dest->addr,
+				    dest->port ? dest->port : pptr[1],
+				    flags, dest);
+		if (!cp)
+			return NULL;
+	}
 
 	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
 		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
@@ -429,14 +430,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 		/* create a new connection entry */
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
-		cp = ip_vs_conn_new(svc->af, iph.protocol,
-				    &iph.saddr, pptr[0],
-				    &iph.daddr, pptr[1],
-				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS | flags,
-				    NULL);
-		if (cp == NULL)
-			return NF_DROP;
+		{
+			struct ip_vs_conn_param p;
+			ip_vs_conn_fill_param(svc->af, iph.protocol,
+					      &iph.saddr, pptr[0],
+					      &iph.daddr, pptr[1], &p);
+			cp = ip_vs_conn_new(&p, &daddr, 0,
+					    IP_VS_CONN_F_BYPASS | flags,
+					    NULL);
+			if (!cp)
+				return NF_DROP;
+		}
 
 		/* statistics */
 		ip_vs_in_stats(cp, skb);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 9cd375f94d61..090889a3b3af 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -195,13 +195,17 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		/*
 		 * Now update or create an connection entry for it
 		 */
-		n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
-					  &cp->caddr, 0);
+		{
+			struct ip_vs_conn_param p;
+			ip_vs_conn_fill_param(AF_INET, iph->protocol,
+					      &from, port, &cp->caddr, 0, &p);
+			n_cp = ip_vs_conn_out_get(&p);
+		}
 		if (!n_cp) {
-			n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
-					      &cp->caddr, 0,
-					      &cp->vaddr, port,
-					      &from, port,
+			struct ip_vs_conn_param p;
+			ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+					      0, &cp->vaddr, port, &p);
+			n_cp = ip_vs_conn_new(&p, &from, port,
 					      IP_VS_CONN_F_NO_CPORT |
 					      IP_VS_CONN_F_NFCT,
 					      cp->dest);
@@ -347,21 +351,22 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		  ip_vs_proto_name(iph->protocol),
 		  &to.ip, ntohs(port), &cp->vaddr.ip, 0);
 
-	n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
-				 &to, port,
-				 &cp->vaddr, htons(ntohs(cp->vport)-1));
-	if (!n_cp) {
-		n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
-				      &to, port,
+	{
+		struct ip_vs_conn_param p;
+		ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
 				      &cp->vaddr, htons(ntohs(cp->vport)-1),
-				      &cp->daddr, htons(ntohs(cp->dport)-1),
-				      IP_VS_CONN_F_NFCT,
-				      cp->dest);
-		if (!n_cp)
-			return 0;
+				      &p);
+		n_cp = ip_vs_conn_in_get(&p);
+		if (!n_cp) {
+			n_cp = ip_vs_conn_new(&p, &cp->daddr,
+					      htons(ntohs(cp->dport)-1),
+					      IP_VS_CONN_F_NFCT, cp->dest);
+			if (!n_cp)
+				return 0;
 
-		/* add its controller */
-		ip_vs_control_add(n_cp, cp);
+			/* add its controller */
+			ip_vs_control_add(n_cp, cp);
+		}
 	}
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index c038458d0290..4680647cd450 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -140,6 +140,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple *orig, new_reply;
 	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
 
 	if (exp->tuple.src.l3num != PF_INET)
 		return;
@@ -154,9 +155,10 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 
 	/* RS->CLIENT */
 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
-				&orig->src.u3, orig->src.u.tcp.port,
-				&orig->dst.u3, orig->dst.u.tcp.port);
+	ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+			      &orig->src.u3, orig->src.u.tcp.port,
+			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
+	cp = ip_vs_conn_out_get(&p);
 	if (cp) {
 		/* Change reply CLIENT->RS to CLIENT->VS */
 		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
@@ -176,9 +178,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 	}
 
 	/* CLIENT->VS */
-	cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
-			       &orig->src.u3, orig->src.u.tcp.port,
-			       &orig->dst.u3, orig->dst.u.tcp.port);
+	cp = ip_vs_conn_in_get(&p);
 	if (cp) {
 		/* Change reply VS->CLIENT to RS->CLIENT */
 		new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 1892dfc12fdd..8956ef33ea6c 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -40,6 +40,19 @@ struct isakmp_hdr {
 
 #define PORT_ISAKMP	500
 
+static void
+ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
+			     int inverse, struct ip_vs_conn_param *p)
+{
+	if (likely(!inverse))
+		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+				      &iph->saddr, htons(PORT_ISAKMP),
+				      &iph->daddr, htons(PORT_ISAKMP), p);
+	else
+		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+				      &iph->daddr, htons(PORT_ISAKMP),
+				      &iph->saddr, htons(PORT_ISAKMP), p);
+}
 
 static struct ip_vs_conn *
 ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -47,21 +60,10 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		   int inverse)
 {
 	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
 
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
-				       &iph->saddr,
-				       htons(PORT_ISAKMP),
-				       &iph->daddr,
-				       htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
-				       &iph->daddr,
-				       htons(PORT_ISAKMP),
-				       &iph->saddr,
-				       htons(PORT_ISAKMP));
-	}
-
+	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	cp = ip_vs_conn_in_get(&p);
 	if (!cp) {
 		/*
 		 * We are not sure if the packet is from our
@@ -87,21 +89,10 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 		    int inverse)
 {
 	struct ip_vs_conn *cp;
+	struct ip_vs_conn_param p;
 
-	if (likely(!inverse)) {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
-					&iph->saddr,
-					htons(PORT_ISAKMP),
-					&iph->daddr,
-					htons(PORT_ISAKMP));
-	} else {
-		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
-					&iph->daddr,
-					htons(PORT_ISAKMP),
-					&iph->saddr,
-					htons(PORT_ISAKMP));
-	}
-
+	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	cp = ip_vs_conn_out_get(&p);
 	if (!cp) {
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
 			      "%s%s %s->%s\n",
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba06939829f..f68631f75f09 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -301,6 +301,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_dest *dest;
+	struct ip_vs_conn_param param;
 	char *p;
 	int i;
 
@@ -370,18 +371,17 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			}
 		}
 
-		if (!(flags & IP_VS_CONN_F_TEMPLATE))
-			cp = ip_vs_conn_in_get(AF_INET, s->protocol,
-					       (union nf_inet_addr *)&s->caddr,
-					       s->cport,
-					       (union nf_inet_addr *)&s->vaddr,
-					       s->vport);
-		else
-			cp = ip_vs_ct_in_get(AF_INET, s->protocol,
-					     (union nf_inet_addr *)&s->caddr,
-					     s->cport,
-					     (union nf_inet_addr *)&s->vaddr,
-					     s->vport);
+		{
+			ip_vs_conn_fill_param(AF_INET, s->protocol,
+					      (union nf_inet_addr *)&s->caddr,
+					      s->cport,
+					      (union nf_inet_addr *)&s->vaddr,
+					      s->vport, &param);
+			if (!(flags & IP_VS_CONN_F_TEMPLATE))
+				cp = ip_vs_conn_in_get(&param);
+			else
+				cp = ip_vs_ct_in_get(&param);
+		}
 		if (!cp) {
 			/*
 			 * Find the appropriate destination for the connection.
@@ -406,14 +406,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				else
 					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
-			cp = ip_vs_conn_new(AF_INET, s->protocol,
-					    (union nf_inet_addr *)&s->caddr,
-					    s->cport,
-					    (union nf_inet_addr *)&s->vaddr,
-					    s->vport,
+			cp = ip_vs_conn_new(&param,
 					    (union nf_inet_addr *)&s->daddr,
-					    s->dport,
-					    flags, dest);
+					    s->dport, flags, dest);
 			if (dest)
 				atomic_dec(&dest->refcnt);
 			if (!cp) {
-- 
cgit v1.2.3


From 85999283a21ab2dd37427fdd8c8e8af57223977c Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:53 +0900
Subject: IPVS: Add struct ip_vs_pe

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/linux/ip_vs.h           |  2 ++
 include/net/ip_vs.h             | 28 ++++++++++++++++-
 net/netfilter/ipvs/ip_vs_conn.c | 67 ++++++++++++++++++++++++++++++++++-------
 net/netfilter/ipvs/ip_vs_core.c | 36 +++++++++++++++++-----
 net/netfilter/ipvs/ip_vs_sync.c | 17 +++++++++--
 5 files changed, 129 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index df7728613720..0a9c44d64292 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -99,8 +99,10 @@
 				0)
 
 #define IP_VS_SCHEDNAME_MAXLEN	16
+#define IP_VS_PENAME_MAXLEN	16
 #define IP_VS_IFNAME_MAXLEN	16
 
+#define IP_VS_PEDATA_MAXLEN     255
 
 /*
  *	The struct ip_vs_service_user and struct ip_vs_dest_user are
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d4da774eca75..b6b309d05e4e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -364,6 +364,10 @@ struct ip_vs_conn_param {
 	__be16				vport;
 	__u16				protocol;
 	u16				af;
+
+	const struct ip_vs_pe		*pe;
+	char				*pe_data;
+	__u8				pe_data_len;
 };
 
 /*
@@ -416,6 +420,9 @@ struct ip_vs_conn {
 	void                    *app_data;      /* Application private data */
 	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
 	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
+
+	char			*pe_data;
+	__u8			pe_data_len;
 };
 
 
@@ -486,6 +493,9 @@ struct ip_vs_service {
 	struct ip_vs_scheduler	*scheduler;    /* bound scheduler object */
 	rwlock_t		sched_lock;    /* lock sched_data */
 	void			*sched_data;   /* scheduler application data */
+
+	/* alternate persistence engine */
+	struct ip_vs_pe		*pe;
 };
 
 
@@ -549,6 +559,20 @@ struct ip_vs_scheduler {
 				       const struct sk_buff *skb);
 };
 
+/* The persistence engine object */
+struct ip_vs_pe {
+	struct list_head	n_list;		/* d-linked list head */
+	char			*name;		/* scheduler name */
+	atomic_t		refcnt;		/* reference counter */
+	struct module		*module;	/* THIS_MODULE/NULL */
+
+	/* get the connection template, if any */
+	int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
+	bool (*ct_match)(const struct ip_vs_conn_param *p,
+			 struct ip_vs_conn *ct);
+	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
+			   bool inverse);
+};
 
 /*
  *	The application module object (a.k.a. app incarnation)
@@ -648,6 +672,8 @@ static inline void ip_vs_conn_fill_param(int af, int protocol,
 	p->cport = cport;
 	p->vaddr = vaddr;
 	p->vport = vport;
+	p->pe = NULL;
+	p->pe_data = NULL;
 }
 
 struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
@@ -803,7 +829,7 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
 extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
 extern struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb);
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb);
 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 			struct ip_vs_protocol *pp);
 
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index deeb906a797b..06da21e97405 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -148,6 +148,42 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
 		& ip_vs_conn_tab_mask;
 }
 
+static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
+					     bool inverse)
+{
+	const union nf_inet_addr *addr;
+	__be16 port;
+
+	if (p->pe && p->pe->hashkey_raw)
+		return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
+			ip_vs_conn_tab_mask;
+
+	if (likely(!inverse)) {
+		addr = p->caddr;
+		port = p->cport;
+	} else {
+		addr = p->vaddr;
+		port = p->vport;
+	}
+
+	return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+}
+
+static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn_param p;
+
+	ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
+			      NULL, 0, &p);
+
+	if (cp->dest && cp->dest->svc->pe) {
+		p.pe = cp->dest->svc->pe;
+		p.pe_data = cp->pe_data;
+		p.pe_data_len = cp->pe_data_len;
+	}
+
+	return ip_vs_conn_hashkey_param(&p, false);
+}
 
 /*
  *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
@@ -162,7 +198,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		return 0;
 
 	/* Hash by protocol, client address and port */
-	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+	hash = ip_vs_conn_hashkey_conn(cp);
 
 	ct_write_lock(hash);
 	spin_lock(&cp->lock);
@@ -195,7 +231,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	int ret;
 
 	/* unhash it and decrease its reference counter */
-	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
+	hash = ip_vs_conn_hashkey_conn(cp);
 
 	ct_write_lock(hash);
 	spin_lock(&cp->lock);
@@ -227,7 +263,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport);
+	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
@@ -312,11 +348,17 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 	unsigned hash;
 	struct ip_vs_conn *cp;
 
-	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport);
+	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (p->pe && p->pe->ct_match) {
+			if (p->pe->ct_match(p, cp))
+				goto out;
+			continue;
+		}
+
 		if (cp->af == p->af &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    /* protocol should only be IPPROTO_IP if
@@ -325,15 +367,14 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 				     p->af, p->vaddr, &cp->vaddr) &&
 		    p->cport == cp->cport && p->vport == cp->vport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-		    p->protocol == cp->protocol) {
-			/* HIT */
-			atomic_inc(&cp->refcnt);
+		    p->protocol == cp->protocol)
 			goto out;
-		}
 	}
 	cp = NULL;
 
   out:
+	if (cp)
+		atomic_inc(&cp->refcnt);
 	ct_read_unlock(hash);
 
 	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
@@ -357,7 +398,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 	/*
 	 *	Check for "full" addressed entries
 	 */
-	hash = ip_vs_conn_hashkey(p->af, p->protocol, p->vaddr, p->vport);
+	hash = ip_vs_conn_hashkey_param(p, true);
 
 	ct_read_lock(hash);
 
@@ -722,6 +763,7 @@ static void ip_vs_conn_expire(unsigned long data)
 		if (cp->flags & IP_VS_CONN_F_NFCT)
 			ip_vs_conn_drop_conntrack(cp);
 
+		kfree(cp->pe_data);
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
@@ -782,6 +824,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
+	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+		cp->pe_data = p->pe_data;
+		cp->pe_data_len = p->pe_data_len;
+	}
 	spin_lock_init(&cp->lock);
 
 	/*
@@ -832,7 +878,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	return cp;
 }
 
-
 /*
  *	/proc/net/ip_vs_conn entries
  */
@@ -848,7 +893,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
 				seq->private = &ip_vs_conn_tab[idx];
-				return cp;
+			return cp;
 			}
 		}
 		ct_read_unlock_bh(idx);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 87602a62458e..ab9889380496 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -176,6 +176,19 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
 	return pp->state_transition(cp, direction, skb, pp);
 }
 
+static inline int
+ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
+			      struct sk_buff *skb, int protocol,
+			      const union nf_inet_addr *caddr, __be16 cport,
+			      const union nf_inet_addr *vaddr, __be16 vport,
+			      struct ip_vs_conn_param *p)
+{
+	ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+	p->pe = svc->pe;
+	if (p->pe && p->pe->fill_param)
+		return p->pe->fill_param(p, skb);
+	return 0;
+}
 
 /*
  *  IPVS persistent scheduling function
@@ -186,7 +199,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
  */
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
-		    const struct sk_buff *skb,
+		    struct sk_buff *skb,
 		    __be16 ports[2])
 {
 	struct ip_vs_conn *cp = NULL;
@@ -255,8 +268,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				vaddr = &fwmark;
 			}
 		}
-		ip_vs_conn_fill_param(svc->af, protocol, &snet, 0,
-				      vaddr, vport, &param);
+		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+						  vaddr, vport, &param))
+			return NULL;
 	}
 
 	/* Check if a template already exists */
@@ -268,22 +282,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		dest = svc->scheduler->schedule(svc, skb);
 		if (!dest) {
 			IP_VS_DBG(1, "p-schedule: no dest found.\n");
+			kfree(param.pe_data);
 			return NULL;
 		}
 
 		if (ports[1] == svc->port && svc->port != FTPPORT)
 			dport = dest->port;
 
-		/* Create a template */
+		/* Create a template
+		 * This adds param.pe_data to the template,
+		 * and thus param.pe_data will be destroyed
+		 * when the template expires */
 		ct = ip_vs_conn_new(&param, &dest->addr, dport,
 				    IP_VS_CONN_F_TEMPLATE, dest);
-		if (ct == NULL)
+		if (ct == NULL) {
+			kfree(param.pe_data);
 			return NULL;
+		}
 
 		ct->timeout = svc->timeout;
-	} else
+	} else {
 		/* set destination with the found template */
 		dest = ct->dest;
+		kfree(param.pe_data);
+	}
 
 	dport = ports[1];
 	if (dport == svc->port && dest->port)
@@ -322,7 +344,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  Protocols supported: TCP, UDP
  */
 struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
 {
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f68631f75f09..ab85aedea17e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -288,6 +288,16 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 		ip_vs_sync_conn(cp->control);
 }
 
+static inline int
+ip_vs_conn_fill_param_sync(int af, int protocol,
+			   const union nf_inet_addr *caddr, __be16 cport,
+			   const union nf_inet_addr *vaddr, __be16 vport,
+			   struct ip_vs_conn_param *p)
+{
+	/* XXX: Need to take into account persistence engine */
+	ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+	return 0;
+}
 
 /*
  *      Process received multicast message and create the corresponding
@@ -372,11 +382,14 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		}
 
 		{
-			ip_vs_conn_fill_param(AF_INET, s->protocol,
+			if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
 					      (union nf_inet_addr *)&s->caddr,
 					      s->cport,
 					      (union nf_inet_addr *)&s->vaddr,
-					      s->vport, &param);
+					      s->vport, &param)) {
+				pr_err("ip_vs_conn_fill_param_sync failed");
+				return;
+			}
 			if (!(flags & IP_VS_CONN_F_TEMPLATE))
 				cp = ip_vs_conn_in_get(&param);
 			else
-- 
cgit v1.2.3


From a3c918acd29a96aba3b46bf50136e7953a480d17 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:53 +0900
Subject: IPVS: Add persistence engine data to /proc/net/ip_vs_conn

This shouldn't break compatibility with userspace as the new data
is at the end of the line.

I have confirmed that this doesn't break ipvsadm, the main (only?)
user-space user of this data.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h             |  1 +
 net/netfilter/ipvs/ip_vs_conn.c | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b6b309d05e4e..974daf52ba76 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -572,6 +572,7 @@ struct ip_vs_pe {
 			 struct ip_vs_conn *ct);
 	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
 			   bool inverse);
+	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
 };
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 06da21e97405..4adedefdf563 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -950,30 +950,45 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq,
-   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
+   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
 	else {
 		const struct ip_vs_conn *cp = v;
+		char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
+		size_t len = 0;
+
+		if (cp->dest && cp->dest->svc->pe &&
+		    cp->dest->svc->pe->show_pe_data) {
+			pe_data[0] = ' ';
+			len = strlen(cp->dest->svc->pe->name);
+			memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+			pe_data[len + 1] = ' ';
+			len += 2;
+			len += cp->dest->svc->pe->show_pe_data(cp,
+							       pe_data + len);
+		}
+		pe_data[len] = '\0';
 
 #ifdef CONFIG_IP_VS_IPV6
 		if (cp->af == AF_INET6)
-			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n",
+			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+				"%pI6 %04X %-11s %7lu%s\n",
 				ip_vs_proto_name(cp->protocol),
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
 				&cp->daddr.in6, ntohs(cp->dport),
 				ip_vs_state_name(cp->protocol, cp->state),
-				(cp->timer.expires-jiffies)/HZ);
+				(cp->timer.expires-jiffies)/HZ, pe_data);
 		else
 #endif
 			seq_printf(seq,
 				"%-3s %08X %04X %08X %04X"
-				" %08X %04X %-11s %7lu\n",
+				" %08X %04X %-11s %7lu%s\n",
 				ip_vs_proto_name(cp->protocol),
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
 				ntohl(cp->daddr.ip), ntohs(cp->dport),
 				ip_vs_state_name(cp->protocol, cp->state),
-				(cp->timer.expires-jiffies)/HZ);
+				(cp->timer.expires-jiffies)/HZ, pe_data);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 8be67a6617b3403551fccb67b1c624c659419515 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:54 +0900
Subject: IPVS: management of persistence engine modules

This is based heavily on the scheduler management code

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h           |   6 ++
 net/netfilter/ipvs/Makefile   |   2 +-
 net/netfilter/ipvs/ip_vs_pe.c | 147 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 net/netfilter/ipvs/ip_vs_pe.c

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 974daf52ba76..a6b93a26d4e2 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -796,6 +796,12 @@ extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
 extern int ip_vs_app_init(void);
 extern void ip_vs_app_cleanup(void);
 
+void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
+void ip_vs_unbind_pe(struct ip_vs_service *svc);
+int register_ip_vs_pe(struct ip_vs_pe *pe);
+int unregister_ip_vs_pe(struct ip_vs_pe *pe);
+extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
+extern void ip_vs_pe_put(struct ip_vs_pe *pe);
 
 /*
  *	IPVS protocol functions (from ip_vs_proto.c)
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 349fe8819b89..4a87bf3c6293 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -14,7 +14,7 @@ ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
 
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
-		ip_vs_est.o ip_vs_proto.o 				   \
+		ip_vs_est.o ip_vs_proto.o ip_vs_pe.o			   \
 		$(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
 
 
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
new file mode 100644
index 000000000000..3414af70ee12
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -0,0 +1,147 @@
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/string.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+
+#include <net/ip_vs.h>
+
+/* IPVS pe list */
+static LIST_HEAD(ip_vs_pe);
+
+/* lock for service table */
+static DEFINE_SPINLOCK(ip_vs_pe_lock);
+
+/* Bind a service with a pe */
+void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
+{
+	svc->pe = pe;
+}
+
+/* Unbind a service from its pe */
+void ip_vs_unbind_pe(struct ip_vs_service *svc)
+{
+	svc->pe = NULL;
+}
+
+/* Get pe in the pe list by name */
+static struct ip_vs_pe *
+ip_vs_pe_getbyname(const char *pe_name)
+{
+	struct ip_vs_pe *pe;
+
+	IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+		  pe_name);
+
+	spin_lock_bh(&ip_vs_pe_lock);
+
+	list_for_each_entry(pe, &ip_vs_pe, n_list) {
+		/* Test and get the modules atomically */
+		if (pe->module &&
+		    !try_module_get(pe->module)) {
+			/* This pe is just deleted */
+			continue;
+		}
+		if (strcmp(pe_name, pe->name)==0) {
+			/* HIT */
+			spin_unlock_bh(&ip_vs_pe_lock);
+			return pe;
+		}
+		if (pe->module)
+			module_put(pe->module);
+	}
+
+	spin_unlock_bh(&ip_vs_pe_lock);
+	return NULL;
+}
+
+/* Lookup pe and try to load it if it doesn't exist */
+struct ip_vs_pe *ip_vs_pe_get(const char *name)
+{
+	struct ip_vs_pe *pe;
+
+	/* Search for the pe by name */
+	pe = ip_vs_pe_getbyname(name);
+
+	/* If pe not found, load the module and search again */
+	if (!pe) {
+		request_module("ip_vs_pe_%s", name);
+		pe = ip_vs_pe_getbyname(name);
+	}
+
+	return pe;
+}
+
+void ip_vs_pe_put(struct ip_vs_pe *pe)
+{
+	if (pe && pe->module)
+		module_put(pe->module);
+}
+
+/* Register a pe in the pe list */
+int register_ip_vs_pe(struct ip_vs_pe *pe)
+{
+	struct ip_vs_pe *tmp;
+
+	/* increase the module use count */
+	ip_vs_use_count_inc();
+
+	spin_lock_bh(&ip_vs_pe_lock);
+
+	if (!list_empty(&pe->n_list)) {
+		spin_unlock_bh(&ip_vs_pe_lock);
+		ip_vs_use_count_dec();
+		pr_err("%s(): [%s] pe already linked\n",
+		       __func__, pe->name);
+		return -EINVAL;
+	}
+
+	/* Make sure that the pe with this name doesn't exist
+	 * in the pe list.
+	 */
+	list_for_each_entry(tmp, &ip_vs_pe, n_list) {
+		if (strcmp(tmp->name, pe->name) == 0) {
+			spin_unlock_bh(&ip_vs_pe_lock);
+			ip_vs_use_count_dec();
+			pr_err("%s(): [%s] pe already existed "
+			       "in the system\n", __func__, pe->name);
+			return -EINVAL;
+		}
+	}
+	/* Add it into the d-linked pe list */
+	list_add(&pe->n_list, &ip_vs_pe);
+	spin_unlock_bh(&ip_vs_pe_lock);
+
+	pr_info("[%s] pe registered.\n", pe->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_ip_vs_pe);
+
+/* Unregister a pe from the pe list */
+int unregister_ip_vs_pe(struct ip_vs_pe *pe)
+{
+	spin_lock_bh(&ip_vs_pe_lock);
+	if (list_empty(&pe->n_list)) {
+		spin_unlock_bh(&ip_vs_pe_lock);
+		pr_err("%s(): [%s] pe is not in the list. failed\n",
+		       __func__, pe->name);
+		return -EINVAL;
+	}
+
+	/* Remove it from the d-linked pe list */
+	list_del(&pe->n_list);
+	spin_unlock_bh(&ip_vs_pe_lock);
+
+	/* decrease the module use count */
+	ip_vs_use_count_dec();
+
+	pr_info("[%s] pe unregistered.\n", pe->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
-- 
cgit v1.2.3


From 0d1e71b04a04b6912e50926b9987c1e72facb1f3 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Sun, 22 Aug 2010 21:37:54 +0900
Subject: IPVS: Allow configuration of persistence engines

Allow the persistence engine of a virtual service to be set, edited
and unset.

This feature only works with the netlink user-space interface.

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
---
 include/linux/ip_vs.h          |  3 +++
 include/net/ip_vs.h            |  1 +
 net/netfilter/ipvs/ip_vs_ctl.c | 57 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 0a9c44d64292..5f43a3b2e3ad 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -336,6 +336,9 @@ enum {
 	IPVS_SVC_ATTR_NETMASK,		/* persistent netmask */
 
 	IPVS_SVC_ATTR_STATS,		/* nested attribute for service stats */
+
+	IPVS_SVC_ATTR_PE_NAME,		/* name of ct retriever */
+
 	__IPVS_SVC_ATTR_MAX,
 };
 
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a6b93a26d4e2..52fbe2308c38 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -444,6 +444,7 @@ struct ip_vs_service_user_kern {
 
 	/* virtual service options */
 	char			*sched_name;
+	char			*pe_name;
 	unsigned		flags;		/* virtual service flags */
 	unsigned		timeout;	/* persistent timeout in sec */
 	u32			netmask;	/* persistent netmask */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5f20caf47a1d..a697591d0e23 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1134,6 +1134,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 {
 	int ret = 0;
 	struct ip_vs_scheduler *sched = NULL;
+	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
 
 	/* increase the module use count */
@@ -1147,6 +1148,16 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		goto out_err;
 	}
 
+	if (u->pe_name && *u->pe_name) {
+		pe = ip_vs_pe_get(u->pe_name);
+		if (pe == NULL) {
+			pr_info("persistence engine module ip_vs_pe_%s "
+				"not found\n", u->pe_name);
+			ret = -ENOENT;
+			goto out_err;
+		}
+	}
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
 		ret = -EINVAL;
@@ -1184,6 +1195,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		goto out_err;
 	sched = NULL;
 
+	/* Bind the ct retriever */
+	ip_vs_bind_pe(svc, pe);
+	pe = NULL;
+
 	/* Update the virtual service counters */
 	if (svc->port == FTPPORT)
 		atomic_inc(&ip_vs_ftpsvc_counter);
@@ -1215,6 +1230,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		kfree(svc);
 	}
 	ip_vs_scheduler_put(sched);
+	ip_vs_pe_put(pe);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -1230,6 +1246,7 @@ static int
 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 {
 	struct ip_vs_scheduler *sched, *old_sched;
+	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
 	int ret = 0;
 
 	/*
@@ -1242,6 +1259,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	}
 	old_sched = sched;
 
+	if (u->pe_name && *u->pe_name) {
+		pe = ip_vs_pe_get(u->pe_name);
+		if (pe == NULL) {
+			pr_info("persistence engine module ip_vs_pe_%s "
+				"not found\n", u->pe_name);
+			ret = -ENOENT;
+			goto out;
+		}
+		old_pe = pe;
+	}
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
 		ret = -EINVAL;
@@ -1293,12 +1321,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 		}
 	}
 
+	old_pe = svc->pe;
+	if (pe != old_pe) {
+		ip_vs_unbind_pe(svc);
+		ip_vs_bind_pe(svc, pe);
+	}
+
   out_unlock:
 	write_unlock_bh(&__ip_vs_svc_lock);
-#ifdef CONFIG_IP_VS_IPV6
   out:
-#endif
 	ip_vs_scheduler_put(old_sched);
+	ip_vs_pe_put(old_pe);
 	return ret;
 }
 
@@ -1312,6 +1345,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 {
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
+	struct ip_vs_pe *old_pe;
+
+	pr_info("%s: enter\n", __func__);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
@@ -1324,6 +1360,11 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	ip_vs_unbind_scheduler(svc);
 	ip_vs_scheduler_put(old_sched);
 
+	/* Unbind persistence engine */
+	old_pe = svc->pe;
+	ip_vs_unbind_pe(svc);
+	ip_vs_pe_put(old_pe);
+
 	/* Unbind app inc */
 	if (svc->inc) {
 		ip_vs_app_inc_put(svc->inc);
@@ -2026,6 +2067,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
 				  struct ip_vs_service_user *usvc_compat)
 {
+	memset(usvc, 0, sizeof(*usvc));
+
 	usvc->af		= AF_INET;
 	usvc->protocol		= usvc_compat->protocol;
 	usvc->addr.ip		= usvc_compat->addr;
@@ -2043,6 +2086,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 				   struct ip_vs_dest_user *udest_compat)
 {
+	memset(udest, 0, sizeof(*udest));
+
 	udest->addr.ip		= udest_compat->addr;
 	udest->port		= udest_compat->port;
 	udest->conn_flags	= udest_compat->conn_flags;
@@ -2539,6 +2584,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
 	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
 	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
 					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_PENAME_MAXLEN },
 	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
 					    .len = sizeof(struct ip_vs_flags) },
 	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
@@ -2615,6 +2662,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	}
 
 	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	if (svc->pe)
+		NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
 	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
 	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
 	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
@@ -2741,11 +2790,12 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 
 	/* If a full entry was requested, check for the additional fields */
 	if (full_entry) {
-		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+		struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
 			      *nla_netmask;
 		struct ip_vs_flags flags;
 
 		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
 		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
 		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
 		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
@@ -2763,6 +2813,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 		usvc->flags = (usvc->flags & ~flags.mask) |
 			      (flags.flags & flags.mask);
 		usvc->sched_name = nla_data(nla_sched);
+		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
 		usvc->timeout = nla_get_u32(nla_timeout);
 		usvc->netmask = nla_get_u32(nla_netmask);
 	}
-- 
cgit v1.2.3


From 44629f57accccbb8e6d443246fe6f51b42f7f781 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 23 Sep 2010 20:09:38 +0900
Subject: sh: intc: Implement reverse mapping for IRQs to per-controller IDs.

This implements a scheme roughly analogous to the PowerPC virtual to
hardware IRQ mapping, which we use for IRQ to per-controller ID mapping.
This makes it possible for drivers to use the IDs directly for lookup
instead of hardcoding the vector.

The main motivation for this work is as a building block for dynamically
allocating virtual IRQs for demuxing INTC events sharing a single INTEVT
in addition to a common masking source.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/Kconfig      |  9 +++++
 drivers/sh/intc.c       | 91 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sh_intc.h |  1 +
 3 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/sh/Kconfig b/drivers/sh/Kconfig
index 1dc8b7ae35fb..e01ae42774af 100644
--- a/drivers/sh/Kconfig
+++ b/drivers/sh/Kconfig
@@ -22,3 +22,12 @@ config INTC_BALANCING
 	  vectors.
 
 	  If in doubt, say N.
+
+config INTC_MAPPING_DEBUG
+	bool "Expose IRQ to per-controller id mapping via debugfs"
+	depends on DEBUG_FS
+	help
+	  This will create a debugfs entry for showing the relationship
+	  between system IRQs and the per-controller id tables.
+
+	  If in doubt, say N.
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 4e01d65e5edb..a27dcb4254c7 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -30,6 +30,11 @@
 #include <linux/topology.h>
 #include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
 #include <asm/sizes.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
@@ -54,9 +59,15 @@ struct intc_window {
 	unsigned long size;
 };
 
+struct intc_map_entry {
+	intc_enum enum_id;
+	struct intc_desc_int *desc;
+};
+
 struct intc_desc_int {
 	struct list_head list;
 	struct sys_device sysdev;
+	struct radix_tree_root tree;
 	pm_message_t state;
 	unsigned long *reg;
 #ifdef CONFIG_SMP
@@ -86,7 +97,9 @@ static LIST_HEAD(intc_list);
  * unused irq_desc positions in the sparse array.
  */
 static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
+static struct intc_map_entry intc_irq_xlate[NR_IRQS];
 static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_MUTEX(irq_xlate_mutex);
 
 #ifdef CONFIG_SMP
 #define IS_SMP(x) x.smp
@@ -828,6 +841,26 @@ static unsigned int __init intc_sense_data(struct intc_desc *desc,
 	return 0;
 }
 
+unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id)
+{
+	struct intc_map_entry *ptr;
+	struct intc_desc_int *d;
+	unsigned int irq = 0;
+
+	list_for_each_entry(d, &intc_list, list) {
+		if (strcmp(d->chip.name, chipname) == 0) {
+			ptr = radix_tree_lookup(&d->tree, enum_id);
+			if (ptr) {
+				irq = ptr - intc_irq_xlate;
+				break;
+			}
+		}
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(intc_irq_lookup);
+
 static void __init intc_register_irq(struct intc_desc *desc,
 				     struct intc_desc_int *d,
 				     intc_enum enum_id,
@@ -837,10 +870,15 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	unsigned int data[2], primary;
 
 	/*
-	 * Register the IRQ position with the global IRQ map
+	 * Register the IRQ position with the global IRQ map, then insert
+	 * it in to the radix tree.
 	 */
 	set_bit(irq, intc_irq_map);
 
+	mutex_lock(&irq_xlate_mutex);
+	radix_tree_insert(&d->tree, enum_id, &intc_irq_xlate[irq]);
+	mutex_unlock(&irq_xlate_mutex);
+
 	/*
 	 * Prefer single interrupt source bitmap over other combinations:
 	 *
@@ -1082,6 +1120,9 @@ int __init register_intc_controller(struct intc_desc *desc)
 			continue;
 		}
 
+		intc_irq_xlate[irq].enum_id = vect->enum_id;
+		intc_irq_xlate[irq].desc = d;
+
 		intc_register_irq(desc, d, vect->enum_id, irq);
 
 		for (k = i + 1; k < hw->nr_vectors; k++) {
@@ -1196,6 +1237,54 @@ static SYSDEV_CLASS_ATTR(userimask, S_IRUSR | S_IWUSR,
 			 show_intc_userimask, store_intc_userimask);
 #endif
 
+#ifdef CONFIG_INTC_MAPPING_DEBUG
+static int intc_irq_xlate_debug(struct seq_file *m, void *priv)
+{
+	int i;
+
+	seq_printf(m, "%-5s  %-7s  %-15s\n", "irq", "enum", "chip name");
+
+	for (i = 1; i < nr_irqs; i++) {
+		struct intc_desc_int *desc = intc_irq_xlate[i].desc;
+
+		if (!desc)
+			continue;
+
+		seq_printf(m, "%5d  ", i);
+		seq_printf(m, "0x%05x  ", intc_irq_xlate[i].enum_id);
+		seq_printf(m, "%-15s\n", desc->chip.name);
+	}
+
+	return 0;
+}
+
+static int intc_irq_xlate_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, intc_irq_xlate_debug, inode->i_private);
+}
+
+static const struct file_operations intc_irq_xlate_fops = {
+	.open = intc_irq_xlate_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init intc_irq_xlate_init(void)
+{
+	/*
+	 * XXX.. use arch_debugfs_dir here when all of the intc users are
+	 * converted.
+	 */
+	if (debugfs_create_file("intc_irq_xlate", S_IRUGO, NULL, NULL,
+				&intc_irq_xlate_fops) == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+fs_initcall(intc_irq_xlate_init);
+#endif
+
 static ssize_t
 show_intc_name(struct sys_device *dev, struct sysdev_attribute *attr, char *buf)
 {
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index bff2f286ca61..d40fd77fa75c 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -108,6 +108,7 @@ struct intc_desc symbol __initdata = {					\
 int __init register_intc_controller(struct intc_desc *desc);
 void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs);
 int intc_set_priority(unsigned int irq, unsigned int prio);
+unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id);
 
 #ifdef CONFIG_INTC_USERIMASK
 int register_intc_userimask(unsigned long addr);
-- 
cgit v1.2.3


From 0c200d935346fe0ebde9b6dffbb683dddd166fb9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 4 Oct 2010 20:53:18 +0200
Subject: netfilter: nf_nat: make find/put static

The functions nf_nat_proto_find_get and nf_nat_proto_put are
only used internally in nf_nat_core. This might break some out
of tree NAT module.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_protocol.h | 3 ---
 net/ipv4/netfilter/nf_nat_core.c        | 6 ++----
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index df17bac46bf5..93cc90d28e66 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -45,9 +45,6 @@ struct nf_nat_protocol {
 extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto);
 extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto);
 
-extern const struct nf_nat_protocol *nf_nat_proto_find_get(u_int8_t protocol);
-extern void nf_nat_proto_put(const struct nf_nat_protocol *proto);
-
 /* Built-in protocols. */
 extern const struct nf_nat_protocol nf_nat_protocol_tcp;
 extern const struct nf_nat_protocol nf_nat_protocol_udp;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 2c084b3a8f0c..e2e00c4da883 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,7 +47,7 @@ __nf_nat_proto_find(u_int8_t protonum)
 	return rcu_dereference(nf_nat_protos[protonum]);
 }
 
-const struct nf_nat_protocol *
+static const struct nf_nat_protocol *
 nf_nat_proto_find_get(u_int8_t protonum)
 {
 	const struct nf_nat_protocol *p;
@@ -60,14 +60,12 @@ nf_nat_proto_find_get(u_int8_t protonum)
 
 	return p;
 }
-EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
 
-void
+static void
 nf_nat_proto_put(const struct nf_nat_protocol *p)
 {
 	module_put(p->me);
 }
-EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-- 
cgit v1.2.3


From 2e54eb96e2c801f33d95b5dade15212ac4d6c4a5 Mon Sep 17 00:00:00 2001
From: Petr Vandrovec <petr@vandrovec.name>
Date: Mon, 27 Sep 2010 01:47:33 +0200
Subject: BKL: Remove BKL from ncpfs

Dozen of changes in ncpfs to provide some locking other than BKL.

In readdir cache unlock and mark complete first page as last operation,
so it can be used for synchronization, as code intended.

When updating dentry name on case insensitive filesystems do at least
some basic locking...

Hold i_mutex when updating inode fields.

Push some ncp_conn_is_valid down to ncp_request.  Connection can become
invalid at any moment, and fewer error code paths to test the better.

Use i_size_{read,write} to modify file size.

Set inode's backing_dev_info as ncpfs has its own special bdi.

In ioctl unbreak ioctls invoked on filesystem mounted 'ro' - tests are
for inode writeable or owner match, but were turned to filesystem
writeable and inode writeable or owner match.  Also collect all permission
checks in single place.

Add some locking, and remove comments saying that it would be cool to
add some locks to the code.

Constify some pointers.

Signed-off-by: Petr Vandrovec <petr@vandrovec.name>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 fs/ncpfs/dir.c            | 221 +++++++++++++---------
 fs/ncpfs/file.c           |  25 +--
 fs/ncpfs/inode.c          |  41 ++--
 fs/ncpfs/ioctl.c          | 470 +++++++++++++++++++++++++---------------------
 fs/ncpfs/ncplib_kernel.c  | 101 +++++-----
 fs/ncpfs/ncplib_kernel.h  |  15 +-
 fs/ncpfs/ncpsign_kernel.c |  10 +-
 fs/ncpfs/sock.c           |   1 -
 include/linux/ncp_fs.h    |  28 ---
 include/linux/ncp_fs_sb.h |   4 +-
 10 files changed, 487 insertions(+), 429 deletions(-)

(limited to 'include')

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9578cbe0cd58..aac8832e919e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -95,6 +95,34 @@ const struct dentry_operations ncp_root_dentry_operations =
 };
 
 
+#define ncp_namespace(i)	(NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber])
+
+static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
+{
+#ifdef CONFIG_NCPFS_SMALLDOS
+	int ns = ncp_namespace(i);
+
+	if ((ns == NW_NS_DOS)
+#ifdef CONFIG_NCPFS_OS2_NS
+		|| ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS))
+#endif /* CONFIG_NCPFS_OS2_NS */
+	   )
+		return 0;
+#endif /* CONFIG_NCPFS_SMALLDOS */
+	return 1;
+}
+
+#define ncp_preserve_case(i)	(ncp_namespace(i) != NW_NS_DOS)
+
+static inline int ncp_case_sensitive(struct dentry *dentry)
+{
+#ifdef CONFIG_NCPFS_NFS_NS
+	return ncp_namespace(dentry->d_inode) == NW_NS_NFS;
+#else
+	return 0;
+#endif /* CONFIG_NCPFS_NFS_NS */
+}
+
 /*
  * Note: leave the hash unchanged if the directory
  * is case-sensitive.
@@ -102,13 +130,12 @@ const struct dentry_operations ncp_root_dentry_operations =
 static int 
 ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
 {
-	struct nls_table *t;
-	unsigned long hash;
-	int i;
-
-	t = NCP_IO_TABLE(dentry);
+	if (!ncp_case_sensitive(dentry)) {
+		struct nls_table *t;
+		unsigned long hash;
+		int i;
 
-	if (!ncp_case_sensitive(dentry->d_inode)) {
+		t = NCP_IO_TABLE(dentry);
 		hash = init_name_hash();
 		for (i=0; i<this->len ; i++)
 			hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -124,7 +151,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
 	if (a->len != b->len)
 		return 1;
 
-	if (ncp_case_sensitive(dentry->d_inode))
+	if (ncp_case_sensitive(dentry))
 		return strncmp(a->name, b->name, a->len);
 
 	return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len);
@@ -266,7 +293,7 @@ leave_me:;
 
 
 static int
-__ncp_lookup_validate(struct dentry *dentry)
+ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct ncp_server *server;
 	struct dentry *parent;
@@ -283,9 +310,6 @@ __ncp_lookup_validate(struct dentry *dentry)
 
 	server = NCP_SERVER(dir);
 
-	if (!ncp_conn_valid(server))
-		goto finished;
-
 	/*
 	 * Inspired by smbfs:
 	 * The default validation is based on dentry age:
@@ -304,8 +328,11 @@ __ncp_lookup_validate(struct dentry *dentry)
 	if (ncp_is_server_root(dir)) {
 		res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
 				 dentry->d_name.len, 1);
-		if (!res)
+		if (!res) {
 			res = ncp_lookup_volume(server, __name, &(finfo.i));
+			if (!res)
+				ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
+		}
 	} else {
 		res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
 				 dentry->d_name.len, !ncp_preserve_case(dir));
@@ -320,13 +347,17 @@ __ncp_lookup_validate(struct dentry *dentry)
 	 * what we remember, it's not valid any more.
 	 */
 	if (!res) {
-		if (finfo.i.dirEntNum == NCP_FINFO(dentry->d_inode)->dirEntNum) {
+		struct inode *inode = dentry->d_inode;
+
+		mutex_lock(&inode->i_mutex);
+		if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
 			ncp_new_dentry(dentry);
 			val=1;
 		} else
 			DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n");
 
-		ncp_update_inode2(dentry->d_inode, &finfo);
+		ncp_update_inode2(inode, &finfo);
+		mutex_unlock(&inode->i_mutex);
 	}
 
 finished:
@@ -335,16 +366,6 @@ finished:
 	return val;
 }
 
-static int
-ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
-{
-	int res;
-	lock_kernel();
-	res = __ncp_lookup_validate(dentry);
-	unlock_kernel();
-	return res;
-}
-
 static struct dentry *
 ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 {
@@ -411,8 +432,6 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	int result, mtime_valid = 0;
 	time_t mtime = 0;
 
-	lock_kernel();
-
 	ctl.page  = NULL;
 	ctl.cache = NULL;
 
@@ -421,6 +440,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		(int) filp->f_pos);
 
 	result = -EIO;
+	/* Do not generate '.' and '..' when server is dead. */
 	if (!ncp_conn_valid(server))
 		goto out;
 
@@ -532,6 +552,12 @@ read_really:
 	ctl.head.end = ctl.fpos - 1;
 	ctl.head.eof = ctl.valid;
 finished:
+	if (ctl.page) {
+		kunmap(ctl.page);
+		SetPageUptodate(ctl.page);
+		unlock_page(ctl.page);
+		page_cache_release(ctl.page);
+	}
 	if (page) {
 		cache->head = ctl.head;
 		kunmap(page);
@@ -539,23 +565,17 @@ finished:
 		unlock_page(page);
 		page_cache_release(page);
 	}
-	if (ctl.page) {
-		kunmap(ctl.page);
-		SetPageUptodate(ctl.page);
-		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
-	}
 out:
-	unlock_kernel();
 	return result;
 }
 
 static int
 ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-		struct ncp_cache_control *ctrl, struct ncp_entry_info *entry)
+		struct ncp_cache_control *ctrl, struct ncp_entry_info *entry,
+		int inval_childs)
 {
 	struct dentry *newdent, *dentry = filp->f_path.dentry;
-	struct inode *newino, *inode = dentry->d_inode;
+	struct inode *dir = dentry->d_inode;
 	struct ncp_cache_control ctl = *ctrl;
 	struct qstr qname;
 	int valid = 0;
@@ -564,9 +584,9 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
 	qname.len = sizeof(__name);
-	if (ncp_vol2io(NCP_SERVER(inode), __name, &qname.len,
+	if (ncp_vol2io(NCP_SERVER(dir), __name, &qname.len,
 			entry->i.entryName, entry->i.nameLen,
-			!ncp_preserve_entry_case(inode, entry->i.NSCreator)))
+			!ncp_preserve_entry_case(dir, entry->i.NSCreator)))
 		return 1; /* I'm not sure */
 
 	qname.name = __name;
@@ -584,22 +604,64 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 			goto end_advance;
 	} else {
 		hashed = 1;
-		memcpy((char *) newdent->d_name.name, qname.name,
-							newdent->d_name.len);
+
+		/* If case sensitivity changed for this volume, all entries below this one
+		   should be thrown away.  This entry itself is not affected, as its case
+		   sensitivity is controlled by its own parent. */
+		if (inval_childs)
+			shrink_dcache_parent(newdent);
+
+		/*
+		 * It is not as dangerous as it looks.  NetWare's OS2 namespace is
+		 * case preserving yet case insensitive.  So we update dentry's name
+		 * as received from server.  We found dentry via d_lookup with our
+		 * hash, so we know that hash does not change, and so replacing name
+		 * should be reasonably safe.
+		 */
+		if (qname.len == newdent->d_name.len &&
+		    memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
+			struct inode *inode = newdent->d_inode;
+
+			/*
+			 * Inside ncpfs all uses of d_name are either for debugging,
+			 * or on functions which acquire inode mutex (mknod, creat,
+			 * lookup).  So grab i_mutex here, to be sure.  d_path
+			 * uses dcache_lock when generating path, so we should too.
+			 * And finally d_compare is protected by dentry's d_lock, so
+			 * here we go.
+			 */
+			if (inode)
+				mutex_lock(&inode->i_mutex);
+			spin_lock(&dcache_lock);
+			spin_lock(&newdent->d_lock);
+			memcpy((char *) newdent->d_name.name, qname.name,
+								newdent->d_name.len);
+			spin_unlock(&newdent->d_lock);
+			spin_unlock(&dcache_lock);
+			if (inode)
+				mutex_unlock(&inode->i_mutex);
+		}
 	}
 
 	if (!newdent->d_inode) {
+		struct inode *inode;
+
 		entry->opened = 0;
-		entry->ino = iunique(inode->i_sb, 2);
-		newino = ncp_iget(inode->i_sb, entry);
-		if (newino) {
+		entry->ino = iunique(dir->i_sb, 2);
+		inode = ncp_iget(dir->i_sb, entry);
+		if (inode) {
 			newdent->d_op = &ncp_dentry_operations;
-			d_instantiate(newdent, newino);
+			d_instantiate(newdent, inode);
 			if (!hashed)
 				d_rehash(newdent);
 		}
-	} else
-		ncp_update_inode2(newdent->d_inode, entry);
+	} else {
+		struct inode *inode = newdent->d_inode;
+
+		mutex_lock(&inode->i_mutex);
+		ncp_update_inode2(inode, entry);
+		mutex_unlock(&inode->i_mutex);
+	}
 
 	if (newdent->d_inode) {
 		ino = newdent->d_inode->i_ino;
@@ -617,7 +679,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 		ctl.cache = NULL;
 		ctl.idx  -= NCP_DIRCACHE_SIZE;
 		ctl.ofs  += 1;
-		ctl.page  = grab_cache_page(&inode->i_data, ctl.ofs);
+		ctl.page  = grab_cache_page(&dir->i_data, ctl.ofs);
 		if (ctl.page)
 			ctl.cache = kmap(ctl.page);
 	}
@@ -633,7 +695,7 @@ end_advance:
 		if (!ino)
 			ino = find_inode_number(dentry, &qname);
 		if (!ino)
-			ino = iunique(inode->i_sb, 2);
+			ino = iunique(dir->i_sb, 2);
 		ctl.filled = filldir(dirent, qname.name, qname.len,
 				     filp->f_pos, ino, DT_UNKNOWN);
 		if (!ctl.filled)
@@ -660,6 +722,7 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir,
 			(unsigned long) filp->f_pos);
 
 	for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) {
+		int inval_dentry;
 
 		if (ncp_get_volume_info_with_number(server, i, &info) != 0)
 			return;
@@ -675,8 +738,9 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir,
 				info.volume_name);
 			continue;
 		}
+		inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL);
 		entry.volume = entry.i.volNumber;
-		if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry))
+		if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, inval_dentry))
 			return;
 	}
 }
@@ -739,7 +803,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 			rpl += onerpl;
 			rpls -= onerpl;
 			entry.volume = entry.i.volNumber;
-			if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry))
+			if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, 0))
 				break;
 		}
 	} while (more);
@@ -775,17 +839,19 @@ int ncp_conn_logged_in(struct super_block *sb)
 		if (dent) {
 			struct inode* ino = dent->d_inode;
 			if (ino) {
+				ncp_update_known_namespace(server, volNumber, NULL);
 				NCP_FINFO(ino)->volNumber = volNumber;
 				NCP_FINFO(ino)->dirEntNum = dirEntNum;
 				NCP_FINFO(ino)->DosDirNum = DosDirNum;
+				result = 0;
 			} else {
 				DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n");
 			}
 		} else {
 			DPRINTK("ncpfs: sb->s_root == NULL!\n");
 		}
-	}
-	result = 0;
+	} else
+		result = 0;
 
 out:
 	return result;
@@ -799,7 +865,6 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
 	int error, res, len;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
-	lock_kernel();
 	error = -EIO;
 	if (!ncp_conn_valid(server))
 		goto finished;
@@ -813,6 +878,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
 				 dentry->d_name.len, 1);
 		if (!res)
 			res = ncp_lookup_volume(server, __name, &(finfo.i));
+			if (!res)
+				ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
 	} else {
 		res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
 				 dentry->d_name.len, !ncp_preserve_case(dir));
@@ -846,7 +913,6 @@ add_entry:
 
 finished:
 	PPRINTK("ncp_lookup: result=%d\n", error);
-	unlock_kernel();
 	return ERR_PTR(error);
 }
 
@@ -887,11 +953,6 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
 	PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, mode);
 
-	error = -EIO;
-	lock_kernel();
-	if (!ncp_conn_valid(server))
-		goto out;
-
 	ncp_age_dentry(server, dentry);
 	len = sizeof(__name);
 	error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -917,6 +978,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
 		if (result) {
 			if (result == 0x87)
 				error = -ENAMETOOLONG;
+			else if (result < 0)
+				error = result;
 			DPRINTK("ncp_create: %s/%s failed\n",
 				dentry->d_parent->d_name.name, dentry->d_name.name);
 			goto out;
@@ -935,7 +998,6 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
 
 	error = ncp_instantiate(dir, dentry, &finfo);
 out:
-	unlock_kernel();
 	return error;
 }
 
@@ -955,11 +1017,6 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	DPRINTK("ncp_mkdir: making %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	error = -EIO;
-	lock_kernel();
-	if (!ncp_conn_valid(server))
-		goto out;
-
 	ncp_age_dentry(server, dentry);
 	len = sizeof(__name);
 	error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -967,12 +1024,11 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (error)
 		goto out;
 
-	error = -EACCES;
-	if (ncp_open_create_file_or_subdir(server, dir, __name,
+	error = ncp_open_create_file_or_subdir(server, dir, __name,
 					   OC_MODE_CREATE, aDIR,
 					   cpu_to_le16(0xffff),
-					   &finfo) == 0)
-	{
+					   &finfo);
+	if (error == 0) {
 		if (ncp_is_nfs_extras(server, finfo.volume)) {
 			mode |= S_IFDIR;
 			finfo.i.nfs.mode = mode;
@@ -983,9 +1039,10 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 				goto out;
 		}
 		error = ncp_instantiate(dir, dentry, &finfo);
+	} else if (error > 0) {
+		error = -EACCES;
 	}
 out:
-	unlock_kernel();
 	return error;
 }
 
@@ -998,11 +1055,6 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
 	DPRINTK("ncp_rmdir: removing %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	error = -EIO;
-	lock_kernel();
-	if (!ncp_conn_valid(server))
-		goto out;
-
 	error = -EBUSY;
 	if (!d_unhashed(dentry))
 		goto out;
@@ -1036,11 +1088,10 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
 			error = -ENOENT;
 			break;
 		default:
-			error = -EACCES;
+			error = result < 0 ? result : -EACCES;
 			break;
        	}
 out:
-	unlock_kernel();
 	return error;
 }
 
@@ -1050,15 +1101,10 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 	struct ncp_server *server;
 	int error;
 
-	lock_kernel();
 	server = NCP_SERVER(dir);
 	DPRINTK("ncp_unlink: unlinking %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 	
-	error = -EIO;
-	if (!ncp_conn_valid(server))
-		goto out;
-
 	/*
 	 * Check whether to close the file ...
 	 */
@@ -1097,12 +1143,9 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 			error = -ENOENT;
 			break;
 		default:
-			error = -EACCES;
+			error = error < 0 ? error : -EACCES;
 			break;
 	}
-		
-out:
-	unlock_kernel();
 	return error;
 }
 
@@ -1118,11 +1161,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
 
-	error = -EIO;
-	lock_kernel();
-	if (!ncp_conn_valid(server))
-		goto out;
-
 	ncp_age_dentry(server, old_dentry);
 	ncp_age_dentry(server, new_dentry);
 
@@ -1161,11 +1199,10 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
 			error = -ENOENT;
 			break;
 		default:
-			error = -EACCES;
+			error = error < 0 ? error : -EACCES;
 			break;
 	}
 out:
-	unlock_kernel();
 	return error;
 }
 
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 3639cc5cbdae..6c754f70c529 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -113,9 +113,6 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	DPRINTK("ncp_file_read: enter %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	if (!ncp_conn_valid(NCP_SERVER(inode)))
-		return -EIO;
-
 	pos = *ppos;
 
 	if ((ssize_t) count < 0) {
@@ -192,13 +189,11 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 
 	DPRINTK("ncp_file_write: enter %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
-	if (!ncp_conn_valid(NCP_SERVER(inode)))
-		return -EIO;
 	if ((ssize_t) count < 0)
 		return -EINVAL;
 	pos = *ppos;
 	if (file->f_flags & O_APPEND) {
-		pos = inode->i_size;
+		pos = i_size_read(inode);
 	}
 
 	if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
@@ -264,8 +259,11 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 
 	*ppos = pos;
 
-	if (pos > inode->i_size) {
-		inode->i_size = pos;
+	if (pos > i_size_read(inode)) {
+		mutex_lock(&inode->i_mutex);
+		if (pos > i_size_read(inode))
+			i_size_write(inode, pos);
+		mutex_unlock(&inode->i_mutex);
 	}
 	DPRINTK("ncp_file_write: exit %s/%s\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
@@ -281,18 +279,9 @@ static int ncp_release(struct inode *inode, struct file *file) {
 	return 0;
 }
 
-static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t ret;
-	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
-	unlock_kernel();
-	return ret;
-}
-
 const struct file_operations ncp_file_operations =
 {
-	.llseek 	= ncp_remote_llseek,
+	.llseek		= generic_file_llseek,
 	.read		= ncp_file_read,
 	.write		= ncp_file_write,
 	.unlocked_ioctl	= ncp_ioctl,
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index b4de38cf49f5..5f4e58d93fdd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -139,7 +139,7 @@ static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
 		inode->i_mode = nwi->nfs.mode;
 	}
 
-	inode->i_blocks = (inode->i_size + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT;
+	inode->i_blocks = (i_size_read(inode) + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT;
 
 	inode->i_mtime.tv_sec = ncp_date_dos2unix(nwi->modifyTime, nwi->modifyDate);
 	inode->i_ctime.tv_sec = ncp_date_dos2unix(nwi->creationTime, nwi->creationDate);
@@ -158,18 +158,21 @@ static void ncp_update_attrs(struct inode *inode, struct ncp_entry_info *nwinfo)
 		inode->i_mode = server->m.dir_mode;
 		/* for directories dataStreamSize seems to be some
 		   Object ID ??? */
-		inode->i_size = NCP_BLOCK_SIZE;
+		i_size_write(inode, NCP_BLOCK_SIZE);
 	} else {
+		u32 size;
+
 		inode->i_mode = server->m.file_mode;
-		inode->i_size = le32_to_cpu(nwi->dataStreamSize);
+		size = le32_to_cpu(nwi->dataStreamSize);
+		i_size_write(inode, size);
 #ifdef CONFIG_NCPFS_EXTRAS
 		if ((server->m.flags & (NCP_MOUNT_EXTRAS|NCP_MOUNT_SYMLINKS)) 
 		 && (nwi->attributes & aSHARED)) {
 			switch (nwi->attributes & (aHIDDEN|aSYSTEM)) {
 				case aHIDDEN:
 					if (server->m.flags & NCP_MOUNT_SYMLINKS) {
-						if (/* (inode->i_size >= NCP_MIN_SYMLINK_SIZE)
-						 && */ (inode->i_size <= NCP_MAX_SYMLINK_SIZE)) {
+						if (/* (size >= NCP_MIN_SYMLINK_SIZE)
+						 && */ (size <= NCP_MAX_SYMLINK_SIZE)) {
 							inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFLNK;
 							NCP_FINFO(inode)->flags |= NCPI_KLUDGE_SYMLINK;
 							break;
@@ -208,7 +211,7 @@ void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo)
 }
 
 /*
- * Fill in the inode based on the ncp_entry_info structure.
+ * Fill in the inode based on the ncp_entry_info structure.  Used only for brand new inodes.
  */
 static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
 {
@@ -254,6 +257,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 	if (inode) {
 		atomic_set(&NCP_FINFO(inode)->opened, info->opened);
 
+		inode->i_mapping->backing_dev_info = sb->s_bdi;
 		inode->i_ino = info->ino;
 		ncp_set_attr(inode, info);
 		if (S_ISREG(inode->i_mode)) {
@@ -565,10 +569,12 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 /*	server->conn_status = 0;	*/
 /*	server->root_dentry = NULL;	*/
 /*	server->root_setuped = 0;	*/
+	mutex_init(&server->root_setup_lock);
 #ifdef CONFIG_NCPFS_PACKET_SIGNING
 /*	server->sign_wanted = 0;	*/
 /*	server->sign_active = 0;	*/
 #endif
+	init_rwsem(&server->auth_rwsem);
 	server->auth.auth_type = NCP_AUTH_NONE;
 /*	server->auth.object_name_len = 0;	*/
 /*	server->auth.object_name = NULL;	*/
@@ -593,7 +599,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	server->nls_io = load_nls_default();
 #endif /* CONFIG_NCPFS_NLS */
 
-	server->dentry_ttl = 0;	/* no caching */
+	atomic_set(&server->dentry_ttl, 0);	/* no caching */
 
 	INIT_LIST_HEAD(&server->tx.requests);
 	mutex_init(&server->rcv.creq_mutex);
@@ -658,8 +664,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 				goto out_disconnect;
 			}
 		}
+		ncp_lock_server(server);
 		if (options & 2)
 			server->sign_wanted = 1;
+		ncp_unlock_server(server);
 	}
 	else 
 #endif	/* CONFIG_NCPFS_PACKET_SIGNING */
@@ -720,6 +728,9 @@ out_nls:
 	unload_nls(server->nls_io);
 	unload_nls(server->nls_vol);
 #endif
+	mutex_destroy(&server->rcv.creq_mutex);
+	mutex_destroy(&server->root_setup_lock);
+	mutex_destroy(&server->mutex);
 out_fput2:
 	if (server->info_filp)
 		fput(server->info_filp);
@@ -743,8 +754,6 @@ static void ncp_put_super(struct super_block *sb)
 {
 	struct ncp_server *server = NCP_SBP(sb);
 
-	lock_kernel();
-
 	ncp_lock_server(server);
 	ncp_disconnect(server);
 	ncp_unlock_server(server);
@@ -756,6 +765,9 @@ static void ncp_put_super(struct super_block *sb)
 	unload_nls(server->nls_vol);
 	unload_nls(server->nls_io);
 #endif /* CONFIG_NCPFS_NLS */
+	mutex_destroy(&server->rcv.creq_mutex);
+	mutex_destroy(&server->root_setup_lock);
+	mutex_destroy(&server->mutex);
 
 	if (server->info_filp)
 		fput(server->info_filp);
@@ -771,8 +783,6 @@ static void ncp_put_super(struct super_block *sb)
 	vfree(server->packet);
 	sb->s_fs_info = NULL;
 	kfree(server);
-
-	unlock_kernel();
 }
 
 static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -851,10 +861,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 
 	result = -EIO;
 
-	lock_kernel();	
-
 	server = NCP_SERVER(inode);
-	if ((!server) || !ncp_conn_valid(server))
+	if (!server)	/* How this could happen? */
 		goto out;
 
 	/* ageing the dentry to force validation */
@@ -981,8 +989,6 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 		result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode),
 				      inode, info_mask, &info);
 		if (result != 0) {
-			result = -EACCES;
-
 			if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) {
 				/* NetWare seems not to allow this. I
 				   do not know why. So, just tell the
@@ -1005,7 +1011,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 	mark_inode_dirty(inode);
 
 out:
-	unlock_kernel();
+	if (result > 0)
+		result = -EACCES;
 	return result;
 }
 
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 84a8cfc4e38e..c2a1f9a155c3 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -35,16 +35,11 @@
 #define NCP_PACKET_SIZE_INTERNAL 65536
 
 static int
-ncp_get_fs_info(struct ncp_server * server, struct file *file,
+ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
 		struct ncp_fs_info __user *arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ncp_fs_info info;
 
-	if (file_permission(file, MAY_WRITE) != 0
-	    && current_uid() != server->m.mounted_uid)
-		return -EACCES;
-
 	if (copy_from_user(&info, arg, sizeof(info)))
 		return -EFAULT;
 
@@ -65,16 +60,11 @@ ncp_get_fs_info(struct ncp_server * server, struct file *file,
 }
 
 static int
-ncp_get_fs_info_v2(struct ncp_server * server, struct file *file,
+ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
 		   struct ncp_fs_info_v2 __user * arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ncp_fs_info_v2 info2;
 
-	if (file_permission(file, MAY_WRITE) != 0
-	    && current_uid() != server->m.mounted_uid)
-		return -EACCES;
-
 	if (copy_from_user(&info2, arg, sizeof(info2)))
 		return -EFAULT;
 
@@ -136,16 +126,11 @@ struct compat_ncp_privatedata_ioctl
 #define NCP_IOC_SETPRIVATEDATA_32	_IOR('n', 10, struct compat_ncp_privatedata_ioctl)
 
 static int
-ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file,
+ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
 		   struct compat_ncp_fs_info_v2 __user * arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
 	struct compat_ncp_fs_info_v2 info2;
 
-	if (file_permission(file, MAY_WRITE) != 0
-	    && current_uid() != server->m.mounted_uid)
-		return -EACCES;
-
 	if (copy_from_user(&info2, arg, sizeof(info2)))
 		return -EFAULT;
 
@@ -182,11 +167,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 	struct nls_table *iocharset;
 	struct nls_table *oldset_io;
 	struct nls_table *oldset_cp;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (server->root_setuped)
-		return -EBUSY;
+	int utf8;
+	int err;
 
 	if (copy_from_user(&user, arg, sizeof(user)))
 		return -EFAULT;
@@ -206,28 +188,40 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 	user.iocharset[NCP_IOCSNAME_LEN] = 0;
 	if (!user.iocharset[0] || !strcmp(user.iocharset, "default")) {
 		iocharset = load_nls_default();
-		NCP_CLR_FLAG(server, NCP_FLAG_UTF8);
+		utf8 = 0;
 	} else if (!strcmp(user.iocharset, "utf8")) {
 		iocharset = load_nls_default();
-		NCP_SET_FLAG(server, NCP_FLAG_UTF8);
+		utf8 = 1;
 	} else {
 		iocharset = load_nls(user.iocharset);
 		if (!iocharset) {
 			unload_nls(codepage);
 			return -EBADRQC;
 		}
-		NCP_CLR_FLAG(server, NCP_FLAG_UTF8);
+		utf8 = 0;
 	}
 
-	oldset_cp = server->nls_vol;
-	server->nls_vol = codepage;
-	oldset_io = server->nls_io;
-	server->nls_io = iocharset;
-
+	mutex_lock(&server->root_setup_lock);
+	if (server->root_setuped) {
+		oldset_cp = codepage;
+		oldset_io = iocharset;
+		err = -EBUSY;
+	} else {
+		if (utf8)
+			NCP_SET_FLAG(server, NCP_FLAG_UTF8);
+		else
+			NCP_CLR_FLAG(server, NCP_FLAG_UTF8);
+		oldset_cp = server->nls_vol;
+		server->nls_vol = codepage;
+		oldset_io = server->nls_io;
+		server->nls_io = iocharset;
+		err = 0;
+	}
+	mutex_unlock(&server->root_setup_lock);
 	unload_nls(oldset_cp);
 	unload_nls(oldset_io);
 
-	return 0;
+	return err;
 }
 
 static int
@@ -237,6 +231,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 	int len;
 
 	memset(&user, 0, sizeof(user));
+	mutex_lock(&server->root_setup_lock);
 	if (server->nls_vol && server->nls_vol->charset) {
 		len = strlen(server->nls_vol->charset);
 		if (len > NCP_IOCSNAME_LEN)
@@ -254,6 +249,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 		strncpy(user.iocharset,	server->nls_io->charset, len);
 		user.iocharset[len] = 0;
 	}
+	mutex_unlock(&server->root_setup_lock);
 
 	if (copy_to_user(arg, &user, sizeof(user)))
 		return -EFAULT;
@@ -261,25 +257,19 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 }
 #endif /* CONFIG_NCPFS_NLS */
 
-static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
 	struct ncp_server *server = NCP_SERVER(inode);
 	int result;
 	struct ncp_ioctl_request request;
 	char* bouncebuffer;
 	void __user *argp = (void __user *)arg;
-	uid_t uid = current_uid();
 
 	switch (cmd) {
 #ifdef CONFIG_COMPAT
 	case NCP_IOC_NCPREQUEST_32:
 #endif
 	case NCP_IOC_NCPREQUEST:
-		if (file_permission(filp, MAY_WRITE) != 0
-		    && uid != server->m.mounted_uid)
-			return -EACCES;
-
 #ifdef CONFIG_COMPAT
 		if (cmd == NCP_IOC_NCPREQUEST_32) {
 			struct compat_ncp_ioctl_request request32;
@@ -314,7 +304,7 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		server->current_size = request.size;
 		memcpy(server->packet, bouncebuffer, request.size);
 
-		result = ncp_request2(server, request.function, 
+		result = ncp_request2(server, request.function,
 			bouncebuffer, NCP_PACKET_SIZE_INTERNAL);
 		if (result < 0)
 			result = -EIO;
@@ -331,69 +321,69 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	case NCP_IOC_CONN_LOGGED_IN:
 
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
 		if (!(server->m.int_flags & NCP_IMOUNT_LOGGEDIN_POSSIBLE))
 			return -EINVAL;
+		mutex_lock(&server->root_setup_lock);
 		if (server->root_setuped)
-			return -EBUSY;
-		server->root_setuped = 1;
-		return ncp_conn_logged_in(inode->i_sb);
+			result = -EBUSY;
+		else {
+			result = ncp_conn_logged_in(inode->i_sb);
+			if (result == 0)
+				server->root_setuped = 1;
+		}
+		mutex_unlock(&server->root_setup_lock);
+		return result;
 
 	case NCP_IOC_GET_FS_INFO:
-		return ncp_get_fs_info(server, filp, argp);
+		return ncp_get_fs_info(server, inode, argp);
 
 	case NCP_IOC_GET_FS_INFO_V2:
-		return ncp_get_fs_info_v2(server, filp, argp);
+		return ncp_get_fs_info_v2(server, inode, argp);
 
 #ifdef CONFIG_COMPAT
 	case NCP_IOC_GET_FS_INFO_V2_32:
-		return ncp_get_compat_fs_info_v2(server, filp, argp);
+		return ncp_get_compat_fs_info_v2(server, inode, argp);
 #endif
 	/* we have too many combinations of CONFIG_COMPAT,
 	 * CONFIG_64BIT and CONFIG_UID16, so just handle
 	 * any of the possible ioctls */
 	case NCP_IOC_GETMOUNTUID16:
-	case NCP_IOC_GETMOUNTUID32:
-	case NCP_IOC_GETMOUNTUID64:
-		if (file_permission(filp, MAY_READ) != 0
-			&& uid != server->m.mounted_uid)
-			return -EACCES;
-
-		if (cmd == NCP_IOC_GETMOUNTUID16) {
+		{
 			u16 uid;
+
 			SET_UID(uid, server->m.mounted_uid);
 			if (put_user(uid, (u16 __user *)argp))
 				return -EFAULT;
-		} else if (cmd == NCP_IOC_GETMOUNTUID32) {
-			if (put_user(server->m.mounted_uid,
-						(u32 __user *)argp))
-				return -EFAULT;
-		} else {
-			if (put_user(server->m.mounted_uid,
-						(u64 __user *)argp))
-				return -EFAULT;
+			return 0;
 		}
+	case NCP_IOC_GETMOUNTUID32:
+		if (put_user(server->m.mounted_uid,
+			     (u32 __user *)argp))
+			return -EFAULT;
+		return 0;
+	case NCP_IOC_GETMOUNTUID64:
+		if (put_user(server->m.mounted_uid,
+			     (u64 __user *)argp))
+			return -EFAULT;
 		return 0;
 
 	case NCP_IOC_GETROOT:
 		{
 			struct ncp_setroot_ioctl sr;
 
-			if (file_permission(filp, MAY_READ) != 0
-			    && uid != server->m.mounted_uid)
-				return -EACCES;
-
+			result = -EACCES;
+			mutex_lock(&server->root_setup_lock);
 			if (server->m.mounted_vol[0]) {
 				struct dentry* dentry = inode->i_sb->s_root;
 
 				if (dentry) {
 					struct inode* s_inode = dentry->d_inode;
-				
+
 					if (s_inode) {
 						sr.volNumber = NCP_FINFO(s_inode)->volNumber;
 						sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum;
 						sr.namespace = server->name_space[sr.volNumber];
+						result = 0;
 					} else
 						DPRINTK("ncpfs: s_root->d_inode==NULL\n");
 				} else
@@ -402,10 +392,12 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				sr.volNumber = -1;
 				sr.namespace = 0;
 				sr.dirEntNum = 0;
+				result = 0;
 			}
-			if (copy_to_user(argp, &sr, sizeof(sr)))
-				return -EFAULT;
-			return 0;
+			mutex_unlock(&server->root_setup_lock);
+			if (!result && copy_to_user(argp, &sr, sizeof(sr)))
+				result = -EFAULT;
+			return result;
 		}
 
 	case NCP_IOC_SETROOT:
@@ -416,103 +408,114 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			__le32 dosde;
 			struct dentry* dentry;
 
-			if (!capable(CAP_SYS_ADMIN))
-			{
-				return -EACCES;
-			}
-			if (server->root_setuped) return -EBUSY;
 			if (copy_from_user(&sr, argp, sizeof(sr)))
 				return -EFAULT;
-			if (sr.volNumber < 0) {
-				server->m.mounted_vol[0] = 0;
-				vnum = NCP_NUMBER_OF_VOLUMES;
-				de = 0;
-				dosde = 0;
-			} else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) {
-				return -EINVAL;
-			} else if (ncp_mount_subdir(server, sr.volNumber,
-						sr.namespace, sr.dirEntNum,
-						&vnum, &de, &dosde)) {
-				return -ENOENT;
-			}
-			
-			dentry = inode->i_sb->s_root;
-			server->root_setuped = 1;
-			if (dentry) {
-				struct inode* s_inode = dentry->d_inode;
-				
-				if (s_inode) {
-					NCP_FINFO(s_inode)->volNumber = vnum;
-					NCP_FINFO(s_inode)->dirEntNum = de;
-					NCP_FINFO(s_inode)->DosDirNum = dosde;
+			mutex_lock(&server->root_setup_lock);
+			if (server->root_setuped)
+				result = -EBUSY;
+			else {
+				if (sr.volNumber < 0) {
+					server->m.mounted_vol[0] = 0;
+					vnum = NCP_NUMBER_OF_VOLUMES;
+					de = 0;
+					dosde = 0;
+					result = 0;
+				} else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) {
+					result = -EINVAL;
+				} else if (ncp_mount_subdir(server, sr.volNumber,
+							sr.namespace, sr.dirEntNum,
+							&vnum, &de, &dosde)) {
+					result = -ENOENT;
 				} else
-					DPRINTK("ncpfs: s_root->d_inode==NULL\n");
-			} else
-				DPRINTK("ncpfs: s_root==NULL\n");
+					result = 0;
+
+				if (result == 0) {
+					dentry = inode->i_sb->s_root;
+					if (dentry) {
+						struct inode* s_inode = dentry->d_inode;
+
+						if (s_inode) {
+							NCP_FINFO(s_inode)->volNumber = vnum;
+							NCP_FINFO(s_inode)->dirEntNum = de;
+							NCP_FINFO(s_inode)->DosDirNum = dosde;
+							server->root_setuped = 1;
+						} else {
+							DPRINTK("ncpfs: s_root->d_inode==NULL\n");
+							result = -EIO;
+						}
+					} else {
+						DPRINTK("ncpfs: s_root==NULL\n");
+						result = -EIO;
+					}
+				}
+				result = 0;
+			}
+			mutex_unlock(&server->root_setup_lock);
 
-			return 0;
+			return result;
 		}
 
-#ifdef CONFIG_NCPFS_PACKET_SIGNING	
+#ifdef CONFIG_NCPFS_PACKET_SIGNING
 	case NCP_IOC_SIGN_INIT:
-		if (file_permission(filp, MAY_WRITE) != 0
-		    && uid != server->m.mounted_uid)
-			return -EACCES;
-
-		if (argp) {
-			if (server->sign_wanted)
-			{
-				struct ncp_sign_init sign;
+		{
+			struct ncp_sign_init sign;
 
+			if (argp)
 				if (copy_from_user(&sign, argp, sizeof(sign)))
 					return -EFAULT;
-				memcpy(server->sign_root,sign.sign_root,8);
-				memcpy(server->sign_last,sign.sign_last,16);
-				server->sign_active = 1;
+			ncp_lock_server(server);
+			mutex_lock(&server->rcv.creq_mutex);
+			if (argp) {
+				if (server->sign_wanted) {
+					memcpy(server->sign_root,sign.sign_root,8);
+					memcpy(server->sign_last,sign.sign_last,16);
+					server->sign_active = 1;
+				}
+				/* ignore when signatures not wanted */
+			} else {
+				server->sign_active = 0;
 			}
-			/* ignore when signatures not wanted */
-		} else {
-			server->sign_active = 0;
+			mutex_unlock(&server->rcv.creq_mutex);
+			ncp_unlock_server(server);
+			return 0;
 		}
-		return 0;		
-		
+
         case NCP_IOC_SIGN_WANTED:
-		if (file_permission(filp, MAY_READ) != 0
-		    && uid != server->m.mounted_uid)
-			return -EACCES;
-		
-                if (put_user(server->sign_wanted, (int __user *)argp))
-			return -EFAULT;
-                return 0;
+		{
+			int state;
+
+			ncp_lock_server(server);
+			state = server->sign_wanted;
+			ncp_unlock_server(server);
+			if (put_user(state, (int __user *)argp))
+				return -EFAULT;
+			return 0;
+		}
 
 	case NCP_IOC_SET_SIGN_WANTED:
 		{
 			int newstate;
 
-			if (file_permission(filp, MAY_WRITE) != 0
-			    && uid != server->m.mounted_uid)
-				return -EACCES;
-
 			/* get only low 8 bits... */
 			if (get_user(newstate, (unsigned char __user *)argp))
 				return -EFAULT;
+			result = 0;
+			ncp_lock_server(server);
 			if (server->sign_active) {
 				/* cannot turn signatures OFF when active */
-				if (!newstate) return -EINVAL;
+				if (!newstate)
+					result = -EINVAL;
 			} else {
 				server->sign_wanted = newstate != 0;
 			}
-			return 0;
+			ncp_unlock_server(server);
+			return result;
 		}
 
 #endif /* CONFIG_NCPFS_PACKET_SIGNING */
 
 #ifdef CONFIG_NCPFS_IOCTL_LOCKING
 	case NCP_IOC_LOCKUNLOCK:
-		if (file_permission(filp, MAY_WRITE) != 0
-		    && uid != server->m.mounted_uid)
-			return -EACCES;
-
 		{
 			struct ncp_lock_ioctl	 rqdata;
 
@@ -541,16 +544,13 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			{
 				return result;
 			}
-			result = -EIO;
-			if (!ncp_conn_valid(server))
-				goto outrel;
 			result = -EISDIR;
 			if (!S_ISREG(inode->i_mode))
 				goto outrel;
 			if (rqdata.cmd == NCP_LOCK_CLEAR)
 			{
 				result = ncp_ClearPhysicalRecord(NCP_SERVER(inode),
-							NCP_FINFO(inode)->file_handle, 
+							NCP_FINFO(inode)->file_handle,
 							rqdata.offset,
 							rqdata.length);
 				if (result > 0) result = 0;	/* no such lock */
@@ -573,7 +573,7 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 							rqdata.timeout);
 				if (result > 0) result = -EAGAIN;
 			}
-outrel:			
+outrel:
 			ncp_inode_close(inode);
 			return result;
 		}
@@ -581,60 +581,62 @@ outrel:
 
 #ifdef CONFIG_COMPAT
 	case NCP_IOC_GETOBJECTNAME_32:
-		if (uid != server->m.mounted_uid)
-			return -EACCES;
 		{
 			struct compat_ncp_objectname_ioctl user;
 			size_t outl;
 
 			if (copy_from_user(&user, argp, sizeof(user)))
 				return -EFAULT;
+			down_read(&server->auth_rwsem);
 			user.auth_type = server->auth.auth_type;
 			outl = user.object_name_len;
 			user.object_name_len = server->auth.object_name_len;
 			if (outl > user.object_name_len)
 				outl = user.object_name_len;
+			result = 0;
 			if (outl) {
 				if (copy_to_user(compat_ptr(user.object_name),
 						 server->auth.object_name,
-						 outl)) return -EFAULT;
+						 outl))
+					result = -EFAULT;
 			}
-			if (copy_to_user(argp, &user, sizeof(user)))
-				return -EFAULT;
-			return 0;
+			up_read(&server->auth_rwsem);
+			if (!result && copy_to_user(argp, &user, sizeof(user)))
+				result = -EFAULT;
+			return result;
 		}
 #endif
 
 	case NCP_IOC_GETOBJECTNAME:
-		if (uid != server->m.mounted_uid)
-			return -EACCES;
 		{
 			struct ncp_objectname_ioctl user;
 			size_t outl;
 
 			if (copy_from_user(&user, argp, sizeof(user)))
 				return -EFAULT;
+			down_read(&server->auth_rwsem);
 			user.auth_type = server->auth.auth_type;
 			outl = user.object_name_len;
 			user.object_name_len = server->auth.object_name_len;
 			if (outl > user.object_name_len)
 				outl = user.object_name_len;
+			result = 0;
 			if (outl) {
 				if (copy_to_user(user.object_name,
 						 server->auth.object_name,
-						 outl)) return -EFAULT;
+						 outl))
+					result = -EFAULT;
 			}
-			if (copy_to_user(argp, &user, sizeof(user)))
-				return -EFAULT;
-			return 0;
+			up_read(&server->auth_rwsem);
+			if (!result && copy_to_user(argp, &user, sizeof(user)))
+				result = -EFAULT;
+			return result;
 		}
 
 #ifdef CONFIG_COMPAT
 	case NCP_IOC_SETOBJECTNAME_32:
 #endif
 	case NCP_IOC_SETOBJECTNAME:
-		if (uid != server->m.mounted_uid)
-			return -EACCES;
 		{
 			struct ncp_objectname_ioctl user;
 			void* newname;
@@ -666,9 +668,7 @@ outrel:
 			} else {
 				newname = NULL;
 			}
-			/* enter critical section */
-			/* maybe that kfree can sleep so do that this way */
-			/* it is at least more SMP friendly (in future...) */
+			down_write(&server->auth_rwsem);
 			oldname = server->auth.object_name;
 			oldnamelen = server->auth.object_name_len;
 			oldprivate = server->priv.data;
@@ -678,7 +678,7 @@ outrel:
 			server->auth.object_name = newname;
 			server->priv.len = 0;
 			server->priv.data = NULL;
-			/* leave critical section */
+			up_write(&server->auth_rwsem);
 			kfree(oldprivate);
 			kfree(oldname);
 			return 0;
@@ -688,8 +688,6 @@ outrel:
 	case NCP_IOC_GETPRIVATEDATA_32:
 #endif
 	case NCP_IOC_GETPRIVATEDATA:
-		if (uid != server->m.mounted_uid)
-			return -EACCES;
 		{
 			struct ncp_privatedata_ioctl user;
 			size_t outl;
@@ -706,14 +704,20 @@ outrel:
 			if (copy_from_user(&user, argp, sizeof(user)))
 				return -EFAULT;
 
+			down_read(&server->auth_rwsem);
 			outl = user.len;
 			user.len = server->priv.len;
 			if (outl > user.len) outl = user.len;
+			result = 0;
 			if (outl) {
 				if (copy_to_user(user.data,
 						 server->priv.data,
-						 outl)) return -EFAULT;
+						 outl))
+					result = -EFAULT;
 			}
+			up_read(&server->auth_rwsem);
+			if (result)
+				return result;
 #ifdef CONFIG_COMPAT
 			if (cmd == NCP_IOC_GETPRIVATEDATA_32) {
 				struct compat_ncp_privatedata_ioctl user32;
@@ -733,8 +737,6 @@ outrel:
 	case NCP_IOC_SETPRIVATEDATA_32:
 #endif
 	case NCP_IOC_SETPRIVATEDATA:
-		if (uid != server->m.mounted_uid)
-			return -EACCES;
 		{
 			struct ncp_privatedata_ioctl user;
 			void* new;
@@ -762,12 +764,12 @@ outrel:
 			} else {
 				new = NULL;
 			}
-			/* enter critical section */
+			down_write(&server->auth_rwsem);
 			old = server->priv.data;
 			oldlen = server->priv.len;
 			server->priv.len = user.len;
 			server->priv.data = new;
-			/* leave critical section */
+			up_write(&server->auth_rwsem);
 			kfree(old);
 			return 0;
 		}
@@ -775,17 +777,13 @@ outrel:
 #ifdef CONFIG_NCPFS_NLS
 	case NCP_IOC_SETCHARSETS:
 		return ncp_set_charsets(server, argp);
-		
+
 	case NCP_IOC_GETCHARSETS:
 		return ncp_get_charsets(server, argp);
 
 #endif /* CONFIG_NCPFS_NLS */
 
 	case NCP_IOC_SETDENTRYTTL:
-		if (file_permission(filp, MAY_WRITE) != 0 &&
-		    uid != server->m.mounted_uid)
-			return -EACCES;
-
 		{
 			u_int32_t user;
 
@@ -795,13 +793,13 @@ outrel:
 			if (user > 20000)
 				return -EINVAL;
 			user = (user * HZ) / 1000;
-			server->dentry_ttl = user;
+			atomic_set(&server->dentry_ttl, user);
 			return 0;
 		}
-		
+
 	case NCP_IOC_GETDENTRYTTL:
 		{
-			u_int32_t user = (server->dentry_ttl * 1000) / HZ;
+			u_int32_t user = (atomic_read(&server->dentry_ttl) * 1000) / HZ;
 			if (copy_to_user(argp, &user, sizeof(user)))
 				return -EFAULT;
 			return 0;
@@ -811,59 +809,103 @@ outrel:
 	return -EINVAL;
 }
 
-static int ncp_ioctl_need_write(unsigned int cmd)
+long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct ncp_server *server = NCP_SERVER(inode);
+	uid_t uid = current_uid();
+	int need_drop_write = 0;
+	long ret;
+
 	switch (cmd) {
-	case NCP_IOC_GET_FS_INFO:
-	case NCP_IOC_GET_FS_INFO_V2:
-	case NCP_IOC_NCPREQUEST:
-	case NCP_IOC_SETDENTRYTTL:
-	case NCP_IOC_SIGN_INIT:
-	case NCP_IOC_LOCKUNLOCK:
-	case NCP_IOC_SET_SIGN_WANTED:
-		return 1;
-	case NCP_IOC_GETOBJECTNAME:
-	case NCP_IOC_SETOBJECTNAME:
-	case NCP_IOC_GETPRIVATEDATA:
-	case NCP_IOC_SETPRIVATEDATA:
 	case NCP_IOC_SETCHARSETS:
-	case NCP_IOC_GETCHARSETS:
 	case NCP_IOC_CONN_LOGGED_IN:
-	case NCP_IOC_GETDENTRYTTL:
-	case NCP_IOC_GETMOUNTUID2:
-	case NCP_IOC_SIGN_WANTED:
-	case NCP_IOC_GETROOT:
 	case NCP_IOC_SETROOT:
-		return 0;
-	default:
-		/* unknown IOCTL command, assume write */
-		return 1;
+		if (!capable(CAP_SYS_ADMIN)) {
+			ret = -EACCES;
+			goto out;
+		}
+		break;
 	}
-}
-
-long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	long ret;
-
-	lock_kernel();
-	if (ncp_ioctl_need_write(cmd)) {
+	if (server->m.mounted_uid != uid) {
+		switch (cmd) {
 		/*
-		 * inside the ioctl(), any failures which
-		 * are because of file_permission() are
-		 * -EACCESS, so it seems consistent to keep
-		 *  that here.
+		 * Only mount owner can issue these ioctls.  Information
+		 * necessary to authenticate to other NDS servers are
+		 * stored here.
 		 */
-		if (mnt_want_write(filp->f_path.mnt)) {
+		case NCP_IOC_GETOBJECTNAME:
+		case NCP_IOC_SETOBJECTNAME:
+		case NCP_IOC_GETPRIVATEDATA:
+		case NCP_IOC_SETPRIVATEDATA:
+#ifdef CONFIG_COMPAT
+		case NCP_IOC_GETOBJECTNAME_32:
+		case NCP_IOC_SETOBJECTNAME_32:
+		case NCP_IOC_GETPRIVATEDATA_32:
+		case NCP_IOC_SETPRIVATEDATA_32:
+#endif
 			ret = -EACCES;
 			goto out;
+		/*
+		 * These require write access on the inode if user id
+		 * does not match.  Note that they do not write to the
+		 * file...  But old code did mnt_want_write, so I keep
+		 * it as is.  Of course not for mountpoint owner, as
+		 * that breaks read-only mounts altogether as ncpmount
+		 * needs working NCP_IOC_NCPREQUEST and
+		 * NCP_IOC_GET_FS_INFO.  Some of these codes (setdentryttl,
+		 * signinit, setsignwanted) should be probably restricted
+		 * to owner only, or even more to CAP_SYS_ADMIN).
+		 */
+		case NCP_IOC_GET_FS_INFO:
+		case NCP_IOC_GET_FS_INFO_V2:
+		case NCP_IOC_NCPREQUEST:
+		case NCP_IOC_SETDENTRYTTL:
+		case NCP_IOC_SIGN_INIT:
+		case NCP_IOC_LOCKUNLOCK:
+		case NCP_IOC_SET_SIGN_WANTED:
+#ifdef CONFIG_COMPAT
+		case NCP_IOC_GET_FS_INFO_V2_32:
+		case NCP_IOC_NCPREQUEST_32:
+#endif
+			ret = mnt_want_write_file(filp);
+			if (ret)
+				goto out;
+			need_drop_write = 1;
+			ret = inode_permission(inode, MAY_WRITE);
+			if (ret)
+				goto outDropWrite;
+			break;
+		/*
+		 * Read access required.
+		 */
+		case NCP_IOC_GETMOUNTUID16:
+		case NCP_IOC_GETMOUNTUID32:
+		case NCP_IOC_GETMOUNTUID64:
+		case NCP_IOC_GETROOT:
+		case NCP_IOC_SIGN_WANTED:
+			ret = inode_permission(inode, MAY_READ);
+			if (ret)
+				goto out;
+			break;
+		/*
+		 * Anybody can read these.
+		 */
+		case NCP_IOC_GETCHARSETS:
+		case NCP_IOC_GETDENTRYTTL:
+		default:
+		/* Three codes below are protected by CAP_SYS_ADMIN above. */
+		case NCP_IOC_SETCHARSETS:
+		case NCP_IOC_CONN_LOGGED_IN:
+		case NCP_IOC_SETROOT:
+			break;
 		}
 	}
-	ret = __ncp_ioctl(filp, cmd, arg);
-	if (ncp_ioctl_need_write(cmd))
+	ret = __ncp_ioctl(inode, cmd, arg);
+outDropWrite:
+	if (need_drop_write)
 		mnt_drop_write(filp->f_path.mnt);
-
 out:
-	unlock_kernel();
 	return ret;
 }
 
@@ -872,10 +914,8 @@ long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	long ret;
 
-	lock_kernel();
 	arg = (unsigned long) compat_ptr(arg);
 	ret = ncp_ioctl(file, cmd, arg);
-	unlock_kernel();
 	return ret;
 }
 #endif
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 0ec6237a5970..a95615a0b6ac 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -107,17 +107,17 @@ ncp_reply_data(struct ncp_server *server, int offset)
 	return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
 }
 
-static inline u8 BVAL(void *data)
+static inline u8 BVAL(const void *data)
 {
-	return *(u8 *)data;
+	return *(const u8 *)data;
 }
 
 static u8 ncp_reply_byte(struct ncp_server *server, int offset)
 {
-	return *(u8 *)ncp_reply_data(server, offset);
+	return *(const u8 *)ncp_reply_data(server, offset);
 }
 
-static inline u16 WVAL_LH(void *data)
+static inline u16 WVAL_LH(const void *data)
 {
 	return get_unaligned_le16(data);
 }
@@ -134,7 +134,7 @@ ncp_reply_be16(struct ncp_server *server, int offset)
 	return get_unaligned_be16(ncp_reply_data(server, offset));
 }
 
-static inline u32 DVAL_LH(void *data)
+static inline u32 DVAL_LH(const void *data)
 {
 	return get_unaligned_le32(data);
 }
@@ -349,9 +349,9 @@ int ncp_dirhandle_free(struct ncp_server* server, __u8 dirhandle) {
 	return result;
 }
 
-void ncp_extract_file_info(void *structure, struct nw_info_struct *target)
+void ncp_extract_file_info(const void *structure, struct nw_info_struct *target)
 {
-	__u8 *name_len;
+	const __u8 *name_len;
 	const int info_struct_size = offsetof(struct nw_info_struct, nameLen);
 
 	memcpy(target, structure, info_struct_size);
@@ -364,7 +364,7 @@ void ncp_extract_file_info(void *structure, struct nw_info_struct *target)
 }
 
 #ifdef CONFIG_NCPFS_NFS_NS
-static inline void ncp_extract_nfs_info(unsigned char *structure,
+static inline void ncp_extract_nfs_info(const unsigned char *structure,
 				 struct nw_nfs_info *target)
 {
 	target->mode = DVAL_LH(structure);
@@ -417,7 +417,7 @@ int ncp_obtain_nfs_info(struct ncp_server *server,
  * Returns information for a (one-component) name relative to
  * the specified directory.
  */
-int ncp_obtain_info(struct ncp_server *server, struct inode *dir, char *path,
+int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *path,
 			struct nw_info_struct *target)
 {
 	__u8  volnum = NCP_FINFO(dir)->volNumber;
@@ -452,16 +452,16 @@ out:
 #ifdef CONFIG_NCPFS_NFS_NS
 static int
 ncp_obtain_DOS_dir_base(struct ncp_server *server,
-		__u8 volnum, __le32 dirent,
-		char *path, /* At most 1 component */
+		__u8 ns, __u8 volnum, __le32 dirent,
+		const char *path, /* At most 1 component */
 		__le32 *DOS_dir_base)
 {
 	int result;
 
 	ncp_init_request(server);
 	ncp_add_byte(server, 6); /* subfunction */
-	ncp_add_byte(server, server->name_space[volnum]);
-	ncp_add_byte(server, server->name_space[volnum]);
+	ncp_add_byte(server, ns);
+	ncp_add_byte(server, ns);
 	ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */
 	ncp_add_dword(server, RIM_DIRECTORY);
 	ncp_add_handle_path(server, volnum, dirent, 1, path);
@@ -523,10 +523,27 @@ ncp_get_known_namespace(struct ncp_server *server, __u8 volume)
 #endif	/* defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) */
 }
 
+int
+ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns)
+{
+	int ns = ncp_get_known_namespace(server, volume);
+
+	if (ret_ns)
+		*ret_ns = ns;
+
+	DPRINTK("lookup_vol: namespace[%d] = %d\n",
+		volume, server->name_space[volume]);
+
+	if (server->name_space[volume] == ns)
+		return 0;
+	server->name_space[volume] = ns;
+	return 1;
+}
+
 static int
 ncp_ObtainSpecificDirBase(struct ncp_server *server,
 		__u8 nsSrc, __u8 nsDst, __u8 vol_num, __le32 dir_base,
-		char *path, /* At most 1 component */
+		const char *path, /* At most 1 component */
 		__le32 *dirEntNum, __le32 *DosDirNum)
 {
 	int result;
@@ -560,14 +577,13 @@ ncp_mount_subdir(struct ncp_server *server,
 {
 	int dstNS;
 	int result;
-	
-	dstNS = ncp_get_known_namespace(server, volNumber);
+
+	ncp_update_known_namespace(server, volNumber, &dstNS);
 	if ((result = ncp_ObtainSpecificDirBase(server, srcNS, dstNS, volNumber, 
 				      dirEntNum, NULL, newDirEnt, newDosEnt)) != 0)
 	{
 		return result;
 	}
-	server->name_space[volNumber] = dstNS;
 	*volume = volNumber;
 	server->m.mounted_vol[1] = 0;
 	server->m.mounted_vol[0] = 'X';
@@ -575,11 +591,10 @@ ncp_mount_subdir(struct ncp_server *server,
 }
 
 int 
-ncp_get_volume_root(struct ncp_server *server, const char *volname,
-		    __u32* volume, __le32* dirent, __le32* dosdirent)
+ncp_get_volume_root(struct ncp_server *server,
+		    const char *volname, __u32* volume, __le32* dirent, __le32* dosdirent)
 {
 	int result;
-	__u8 volnum;
 
 	DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname);
 
@@ -601,21 +616,14 @@ ncp_get_volume_root(struct ncp_server *server, const char *volname,
 		return result;
 	}
 	*dirent = *dosdirent = ncp_reply_dword(server, 4);
-	volnum = ncp_reply_byte(server, 8);
+	*volume = ncp_reply_byte(server, 8);
 	ncp_unlock_server(server);
-	*volume = volnum;
-
-	server->name_space[volnum] = ncp_get_known_namespace(server, volnum);
-
-	DPRINTK("lookup_vol: namespace[%d] = %d\n",
-		volnum, server->name_space[volnum]);
-
 	return 0;
 }
 
 int
-ncp_lookup_volume(struct ncp_server *server, const char *volname,
-		  struct nw_info_struct *target)
+ncp_lookup_volume(struct ncp_server *server,
+		  const char *volname, struct nw_info_struct *target)
 {
 	int result;
 
@@ -625,6 +633,7 @@ ncp_lookup_volume(struct ncp_server *server, const char *volname,
 	if (result) {
 		return result;
 	}
+	ncp_update_known_namespace(server, target->volNumber, NULL);
 	target->nameLen = strlen(volname);
 	memcpy(target->entryName, volname, target->nameLen+1);
 	target->attributes = aDIR;
@@ -676,8 +685,8 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent,
 {
 	int result = 0;
 
+	ncp_init_request(server);
 	if (server->name_space[volnum] == NW_NS_NFS) {
-		ncp_init_request(server);
 		ncp_add_byte(server, 25);	/* subfunction */
 		ncp_add_byte(server, server->name_space[volnum]);
 		ncp_add_byte(server, NW_NS_NFS);
@@ -690,8 +699,8 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent,
 		ncp_add_dword_lh(server, 1);	/* nlinks */
 		ncp_add_dword_lh(server, rdev);
 		result = ncp_request(server, 87);
-		ncp_unlock_server(server);
 	}
+	ncp_unlock_server(server);
 	return result;
 }
 #endif
@@ -700,7 +709,7 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent,
 static int
 ncp_DeleteNSEntry(struct ncp_server *server,
 		  __u8 have_dir_base, __u8 volnum, __le32 dirent,
-		  char* name, __u8 ns, __le16 attr)
+		  const char* name, __u8 ns, __le16 attr)
 {
 	int result;
 
@@ -734,23 +743,25 @@ ncp_del_file_or_subdir2(struct ncp_server *server,
 
 int
 ncp_del_file_or_subdir(struct ncp_server *server,
-		       struct inode *dir, char *name)
+		       struct inode *dir, const char *name)
 {
 	__u8  volnum = NCP_FINFO(dir)->volNumber;
 	__le32 dirent = NCP_FINFO(dir)->dirEntNum;
+	int name_space;
 
+	name_space = server->name_space[volnum];
 #ifdef CONFIG_NCPFS_NFS_NS
-	if (server->name_space[volnum]==NW_NS_NFS)
+	if (name_space == NW_NS_NFS)
  	{
  		int result;
  
- 		result=ncp_obtain_DOS_dir_base(server, volnum, dirent, name, &dirent);
+		result=ncp_obtain_DOS_dir_base(server, name_space, volnum, dirent, name, &dirent);
  		if (result) return result;
- 		return ncp_DeleteNSEntry(server, 1, volnum, dirent, NULL, NW_NS_DOS, cpu_to_le16(0x8006));
+		name = NULL;
+		name_space = NW_NS_DOS;
  	}
- 	else
 #endif	/* CONFIG_NCPFS_NFS_NS */
- 		return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, server->name_space[volnum], cpu_to_le16(0x8006));
+	return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, name_space, cpu_to_le16(0x8006));
 }
 
 static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6])
@@ -765,7 +776,7 @@ static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6])
 /* If both dir and name are NULL, then in target there's already a
    looked-up entry that wants to be opened. */
 int ncp_open_create_file_or_subdir(struct ncp_server *server,
-				   struct inode *dir, char *name,
+				   struct inode *dir, const char *name,
 				   int open_create_mode,
 				   __le32 create_attributes,
 				   __le16 desired_acc_rights,
@@ -890,8 +901,8 @@ int ncp_search_for_fileset(struct ncp_server *server,
 
 static int
 ncp_RenameNSEntry(struct ncp_server *server,
-		  struct inode *old_dir, char *old_name, __le16 old_type,
-		  struct inode *new_dir, char *new_name)
+		  struct inode *old_dir, const char *old_name, __le16 old_type,
+		  struct inode *new_dir, const char *new_name)
 {
 	int result = -EINVAL;
 
@@ -929,8 +940,8 @@ out:
 }
 
 int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server,
-				struct inode *old_dir, char *old_name,
-				struct inode *new_dir, char *new_name)
+				struct inode *old_dir, const char *old_name,
+				struct inode *new_dir, const char *new_name)
 {
         int result;
         __le16 old_type = cpu_to_le16(0x06);
@@ -958,7 +969,7 @@ int
 ncp_read_kernel(struct ncp_server *server, const char *file_id,
 	     __u32 offset, __u16 to_read, char *target, int *bytes_read)
 {
-	char *source;
+	const char *source;
 	int result;
 
 	ncp_init_request(server);
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 2441d1ab57dc..3c57eca634ce 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -65,10 +65,11 @@ static inline void ncp_inode_close(struct inode *inode) {
 	atomic_dec(&NCP_FINFO(inode)->opened);
 }
 
-void ncp_extract_file_info(void* src, struct nw_info_struct* target);
-int ncp_obtain_info(struct ncp_server *server, struct inode *, char *,
+void ncp_extract_file_info(const void* src, struct nw_info_struct* target);
+int ncp_obtain_info(struct ncp_server *server, struct inode *, const char *,
 		struct nw_info_struct *target);
 int ncp_obtain_nfs_info(struct ncp_server *server, struct nw_info_struct *target);
+int ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns);
 int ncp_get_volume_root(struct ncp_server *server, const char *volname,
 			__u32 *volume, __le32 *dirent, __le32 *dosdirent);
 int ncp_lookup_volume(struct ncp_server *, const char *, struct nw_info_struct *);
@@ -80,8 +81,8 @@ int ncp_modify_nfs_info(struct ncp_server *, __u8 volnum, __le32 dirent,
 			__u32 mode, __u32 rdev);
 
 int ncp_del_file_or_subdir2(struct ncp_server *, struct dentry*);
-int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, char *);
-int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *,
+int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, const char *);
+int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, const char *,
 				int, __le32, __le16, struct ncp_entry_info *);
 
 int ncp_initialize_search(struct ncp_server *, struct inode *,
@@ -93,7 +94,7 @@ int ncp_search_for_fileset(struct ncp_server *server,
 			   char** rbuf, size_t* rsize);
 
 int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server,
-			      struct inode *, char *, struct inode *, char *);
+			      struct inode *, const char *, struct inode *, const char *);
 
 
 int
@@ -170,13 +171,13 @@ static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1,
 #endif /* CONFIG_NCPFS_NLS */
 
 #define NCP_GET_AGE(dentry)	(jiffies - (dentry)->d_time)
-#define NCP_MAX_AGE(server)	((server)->dentry_ttl)
+#define NCP_MAX_AGE(server)	atomic_read(&(server)->dentry_ttl)
 #define NCP_TEST_AGE(server,dentry)	(NCP_GET_AGE(dentry) < NCP_MAX_AGE(server))
 
 static inline void
 ncp_age_dentry(struct ncp_server* server, struct dentry* dentry)
 {
-	dentry->d_time = jiffies - server->dentry_ttl;
+	dentry->d_time = jiffies - NCP_MAX_AGE(server);
 }
 
 static inline void
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c
index 7c0b5c21e6cf..d8b2d7e6910b 100644
--- a/fs/ncpfs/ncpsign_kernel.c
+++ b/fs/ncpfs/ncpsign_kernel.c
@@ -15,21 +15,21 @@
 
 /* i386: 32-bit, little endian, handles mis-alignment */
 #ifdef __i386__
-#define GET_LE32(p) (*(int *)(p))
+#define GET_LE32(p) (*(const int *)(p))
 #define PUT_LE32(p,v) { *(int *)(p)=v; }
 #else
 /* from include/ncplib.h */
-#define BVAL(buf,pos) (((__u8 *)(buf))[pos])
+#define BVAL(buf,pos) (((const __u8 *)(buf))[pos])
 #define PVAL(buf,pos) ((unsigned)BVAL(buf,pos))
-#define BSET(buf,pos,val) (BVAL(buf,pos) = (val))
+#define BSET(buf,pos,val) (((__u8 *)(buf))[pos] = (val))
 
 static inline __u16
-WVAL_LH(__u8 * buf, int pos)
+WVAL_LH(const __u8 * buf, int pos)
 {
 	return PVAL(buf, pos) | PVAL(buf, pos + 1) << 8;
 }
 static inline __u32
-DVAL_LH(__u8 * buf, int pos)
+DVAL_LH(const __u8 * buf, int pos)
 {
 	return WVAL_LH(buf, pos) | WVAL_LH(buf, pos + 2) << 16;
 }
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index c7ff6c700a6e..668bd267346e 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -746,7 +746,6 @@ static int ncp_do_request(struct ncp_server *server, int size,
 		return -EIO;
 	}
 	if (!ncp_conn_valid(server)) {
-		printk(KERN_ERR "ncpfs: Connection invalid!\n");
 		return -EIO;
 	}
 	{
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index 4522aed00906..ef663061d5ac 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -241,34 +241,6 @@ int ncp_mmap(struct file *, struct vm_area_struct *);
 /* linux/fs/ncpfs/ncplib_kernel.c */
 int ncp_make_closed(struct inode *);
 
-#define ncp_namespace(i)	(NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber])
-
-static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
-{
-#ifdef CONFIG_NCPFS_SMALLDOS
-	int ns = ncp_namespace(i);
-
-	if ((ns == NW_NS_DOS)
-#ifdef CONFIG_NCPFS_OS2_NS
-		|| ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS))
-#endif /* CONFIG_NCPFS_OS2_NS */
-				)
-		return 0;
-#endif /* CONFIG_NCPFS_SMALLDOS */
-	return 1;
-}
-
-#define ncp_preserve_case(i)	(ncp_namespace(i) != NW_NS_DOS)
-
-static inline int ncp_case_sensitive(struct inode *i)
-{
-#ifdef CONFIG_NCPFS_NFS_NS
-	return ncp_namespace(i) == NW_NS_NFS;
-#else
-	return 0;
-#endif	/* CONFIG_NCPFS_NFS_NS */
-} 
-
 #endif				/* __KERNEL__ */
 
 #endif				/* _LINUX_NCP_FS_H */
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index 8da05bc098ca..d64b0e894336 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -62,6 +62,7 @@ struct ncp_server {
 	int ncp_reply_size;
 
 	int root_setuped;
+	struct mutex root_setup_lock;
 
 	/* info for packet signing */
 	int sign_wanted;	/* 1=Server needs signed packets */
@@ -81,13 +82,14 @@ struct ncp_server {
 		size_t	len;
 		void*	data;
 	} priv;
+	struct rw_semaphore auth_rwsem;
 
 	/* nls info: codepage for volume and charset for I/O */
 	struct nls_table *nls_vol;
 	struct nls_table *nls_io;
 
 	/* maximum age in jiffies */
-	int dentry_ttl;
+	atomic_t dentry_ttl;
 
 	/* miscellaneous */
 	unsigned int flags;
-- 
cgit v1.2.3


From c1e30ad98fe210688edca872686db4a715c2fb23 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 5 Oct 2010 04:47:03 +0900
Subject: sh: intc: Support virtual mappings for IRQ subgroups.

Many interrupts that share a single mask source but are on different
hardware vectors will have an associated register tied to an INTEVT that
denotes the precise cause for the interrupt exception being triggered.

This introduces the concept of IRQ subgroups in the intc core, where
a virtual IRQ map is constructed for each of the pre-defined cause bits,
and a higher level chained handler takes control of the parent INTEVT.
This enables CPUs with heavily muxed IRQ vectors (especially across
disjoint blocks) to break things out in to a series of managed chained
handlers while being able to dynamically lookup and adopt the IRQs
created for them.

This is largely an opt-in interface, requiring CPUs to manually submit
IRQs for subgroup splitting, in addition to providing identifiers in
their enum maps that can be used for lazy lookup via the radix tree.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c    |   2 +
 drivers/sh/intc.c       | 404 ++++++++++++++++++++++++++++++++++++++++--------
 include/linux/sh_intc.h |  12 +-
 3 files changed, 351 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 257de1f0692b..c8e1409f20a2 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -283,6 +283,8 @@ void __init init_IRQ(void)
 	if (sh_mv.mv_init_irq)
 		sh_mv.mv_init_irq();
 
+	intc_finalize();
+
 	irq_ctx_init(smp_processor_id());
 }
 
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index a27dcb4254c7..c81fe23db7f7 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -35,6 +35,7 @@
 #include <linux/seq_file.h>
 #include <linux/radix-tree.h>
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 #include <asm/sizes.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
@@ -64,11 +65,19 @@ struct intc_map_entry {
 	struct intc_desc_int *desc;
 };
 
+struct intc_subgroup_entry {
+	unsigned int pirq;
+	intc_enum enum_id;
+	unsigned long handle;
+};
+
 struct intc_desc_int {
 	struct list_head list;
 	struct sys_device sysdev;
 	struct radix_tree_root tree;
 	pm_message_t state;
+	spinlock_t lock;
+	unsigned int index;
 	unsigned long *reg;
 #ifdef CONFIG_SMP
 	unsigned long *smp;
@@ -84,6 +93,7 @@ struct intc_desc_int {
 };
 
 static LIST_HEAD(intc_list);
+static unsigned int nr_intc_controllers;
 
 /*
  * The intc_irq_map provides a global map of bound IRQ vectors for a
@@ -99,7 +109,7 @@ static LIST_HEAD(intc_list);
 static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
 static struct intc_map_entry intc_irq_xlate[NR_IRQS];
 static DEFINE_SPINLOCK(vector_lock);
-static DEFINE_MUTEX(irq_xlate_mutex);
+static DEFINE_SPINLOCK(xlate_lock);
 
 #ifdef CONFIG_SMP
 #define IS_SMP(x) x.smp
@@ -118,12 +128,39 @@ static unsigned long ack_handle[NR_IRQS];
 static unsigned long dist_handle[NR_IRQS];
 #endif
 
+struct intc_virq_list {
+	unsigned int irq;
+	struct intc_virq_list *next;
+};
+
+#define for_each_virq(entry, head) \
+	for (entry = head; entry; entry = entry->next)
+
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 {
 	struct irq_chip *chip = get_irq_chip(irq);
+
 	return container_of(chip, struct intc_desc_int, chip);
 }
 
+static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
+{
+	generic_handle_irq((unsigned int)get_irq_data(irq));
+}
+
+static inline void activate_irq(int irq)
+{
+#ifdef CONFIG_ARM
+	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
+	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
+	 */
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	/* same effect on other architectures */
+	set_irq_noprobe(irq);
+#endif
+}
+
 static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
 				       unsigned long address)
 {
@@ -177,56 +214,103 @@ static inline unsigned int set_field(unsigned int value,
 	return value;
 }
 
-static void write_8(unsigned long addr, unsigned long h, unsigned long data)
+static inline unsigned long get_field(unsigned int value, unsigned int handle)
+{
+	unsigned int width = _INTC_WIDTH(handle);
+	unsigned int shift = _INTC_SHIFT(handle);
+	unsigned int mask = ((1 << width) - 1) << shift;
+
+	return (value & mask) >> shift;
+}
+
+static unsigned long test_8(unsigned long addr, unsigned long h,
+			    unsigned long ignore)
+{
+	return get_field(__raw_readb(addr), h);
+}
+
+static unsigned long test_16(unsigned long addr, unsigned long h,
+			     unsigned long ignore)
+{
+	return get_field(__raw_readw(addr), h);
+}
+
+static unsigned long test_32(unsigned long addr, unsigned long h,
+			     unsigned long ignore)
+{
+	return get_field(__raw_readl(addr), h);
+}
+
+static unsigned long write_8(unsigned long addr, unsigned long h,
+			     unsigned long data)
 {
 	__raw_writeb(set_field(0, data, h), addr);
 	(void)__raw_readb(addr);	/* Defeat write posting */
+	return 0;
 }
 
-static void write_16(unsigned long addr, unsigned long h, unsigned long data)
+static unsigned long write_16(unsigned long addr, unsigned long h,
+			      unsigned long data)
 {
 	__raw_writew(set_field(0, data, h), addr);
 	(void)__raw_readw(addr);	/* Defeat write posting */
+	return 0;
 }
 
-static void write_32(unsigned long addr, unsigned long h, unsigned long data)
+static unsigned long write_32(unsigned long addr, unsigned long h,
+			      unsigned long data)
 {
 	__raw_writel(set_field(0, data, h), addr);
 	(void)__raw_readl(addr);	/* Defeat write posting */
+	return 0;
 }
 
-static void modify_8(unsigned long addr, unsigned long h, unsigned long data)
+static unsigned long modify_8(unsigned long addr, unsigned long h,
+			      unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	__raw_writeb(set_field(__raw_readb(addr), data, h), addr);
 	(void)__raw_readb(addr);	/* Defeat write posting */
 	local_irq_restore(flags);
+	return 0;
 }
 
-static void modify_16(unsigned long addr, unsigned long h, unsigned long data)
+static unsigned long modify_16(unsigned long addr, unsigned long h,
+			       unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	__raw_writew(set_field(__raw_readw(addr), data, h), addr);
 	(void)__raw_readw(addr);	/* Defeat write posting */
 	local_irq_restore(flags);
+	return 0;
 }
 
-static void modify_32(unsigned long addr, unsigned long h, unsigned long data)
+static unsigned long modify_32(unsigned long addr, unsigned long h,
+			       unsigned long data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	__raw_writel(set_field(__raw_readl(addr), data, h), addr);
 	(void)__raw_readl(addr);	/* Defeat write posting */
 	local_irq_restore(flags);
+	return 0;
 }
 
-enum {	REG_FN_ERR = 0, REG_FN_WRITE_BASE = 1, REG_FN_MODIFY_BASE = 5 };
+enum {
+	REG_FN_ERR = 0,
+	REG_FN_TEST_BASE = 1,
+	REG_FN_WRITE_BASE = 5,
+	REG_FN_MODIFY_BASE = 9
+};
 
-static void (*intc_reg_fns[])(unsigned long addr,
-			      unsigned long h,
-			      unsigned long data) = {
+static unsigned long (*intc_reg_fns[])(unsigned long addr,
+				       unsigned long h,
+				       unsigned long data) = {
+	[REG_FN_TEST_BASE + 0] = test_8,
+	[REG_FN_TEST_BASE + 1] = test_16,
+	[REG_FN_TEST_BASE + 3] = test_32,
 	[REG_FN_WRITE_BASE + 0] = write_8,
 	[REG_FN_WRITE_BASE + 1] = write_16,
 	[REG_FN_WRITE_BASE + 3] = write_32,
@@ -242,42 +326,42 @@ enum {	MODE_ENABLE_REG = 0, /* Bit(s) set -> interrupt enabled */
 	MODE_PCLR_REG,       /* Above plus all bits set to disable interrupt */
 };
 
-static void intc_mode_field(unsigned long addr,
-			    unsigned long handle,
-			    void (*fn)(unsigned long,
-				       unsigned long,
-				       unsigned long),
-			    unsigned int irq)
+static unsigned long intc_mode_field(unsigned long addr,
+				     unsigned long handle,
+				     unsigned long (*fn)(unsigned long,
+						unsigned long,
+						unsigned long),
+				     unsigned int irq)
 {
-	fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1));
+	return fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1));
 }
 
-static void intc_mode_zero(unsigned long addr,
-			   unsigned long handle,
-			   void (*fn)(unsigned long,
-				       unsigned long,
-				       unsigned long),
-			   unsigned int irq)
+static unsigned long intc_mode_zero(unsigned long addr,
+				    unsigned long handle,
+				    unsigned long (*fn)(unsigned long,
+					       unsigned long,
+					       unsigned long),
+				    unsigned int irq)
 {
-	fn(addr, handle, 0);
+	return fn(addr, handle, 0);
 }
 
-static void intc_mode_prio(unsigned long addr,
-			   unsigned long handle,
-			   void (*fn)(unsigned long,
-				       unsigned long,
-				       unsigned long),
-			   unsigned int irq)
+static unsigned long intc_mode_prio(unsigned long addr,
+				    unsigned long handle,
+				    unsigned long (*fn)(unsigned long,
+					       unsigned long,
+					       unsigned long),
+				    unsigned int irq)
 {
-	fn(addr, handle, intc_prio_level[irq]);
+	return fn(addr, handle, intc_prio_level[irq]);
 }
 
-static void (*intc_enable_fns[])(unsigned long addr,
-				 unsigned long handle,
-				 void (*fn)(unsigned long,
-					    unsigned long,
-					    unsigned long),
-				 unsigned int irq) = {
+static unsigned long (*intc_enable_fns[])(unsigned long addr,
+					  unsigned long handle,
+					  unsigned long (*fn)(unsigned long,
+						    unsigned long,
+						    unsigned long),
+					  unsigned int irq) = {
 	[MODE_ENABLE_REG] = intc_mode_field,
 	[MODE_MASK_REG] = intc_mode_zero,
 	[MODE_DUAL_REG] = intc_mode_field,
@@ -285,9 +369,9 @@ static void (*intc_enable_fns[])(unsigned long addr,
 	[MODE_PCLR_REG] = intc_mode_prio,
 };
 
-static void (*intc_disable_fns[])(unsigned long addr,
+static unsigned long (*intc_disable_fns[])(unsigned long addr,
 				  unsigned long handle,
-				  void (*fn)(unsigned long,
+				  unsigned long (*fn)(unsigned long,
 					     unsigned long,
 					     unsigned long),
 				  unsigned int irq) = {
@@ -421,12 +505,13 @@ static void intc_disable(unsigned int irq)
 	}
 }
 
-static void (*intc_enable_noprio_fns[])(unsigned long addr,
-					unsigned long handle,
-					void (*fn)(unsigned long,
-						   unsigned long,
-						   unsigned long),
-					unsigned int irq) = {
+static unsigned long
+(*intc_enable_noprio_fns[])(unsigned long addr,
+			    unsigned long handle,
+			    unsigned long (*fn)(unsigned long,
+					unsigned long,
+					unsigned long),
+			    unsigned int irq) = {
 	[MODE_ENABLE_REG] = intc_mode_field,
 	[MODE_MASK_REG] = intc_mode_zero,
 	[MODE_DUAL_REG] = intc_mode_field,
@@ -439,8 +524,9 @@ static void intc_enable_disable(struct intc_desc_int *d,
 {
 	unsigned long addr;
 	unsigned int cpu;
-	void (*fn)(unsigned long, unsigned long,
-		   void (*)(unsigned long, unsigned long, unsigned long),
+	unsigned long (*fn)(unsigned long, unsigned long,
+		   unsigned long (*)(unsigned long, unsigned long,
+				     unsigned long),
 		   unsigned int);
 
 	if (do_enable) {
@@ -861,6 +947,186 @@ unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id)
 }
 EXPORT_SYMBOL_GPL(intc_irq_lookup);
 
+static int add_virq_to_pirq(unsigned int irq, unsigned int virq)
+{
+	struct intc_virq_list **last, *entry;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	/* scan for duplicates */
+	last = (struct intc_virq_list **)&desc->handler_data;
+	for_each_virq(entry, desc->handler_data) {
+		if (entry->irq == virq)
+			return 0;
+		last = &entry->next;
+	}
+
+	entry = kzalloc(sizeof(struct intc_virq_list), GFP_ATOMIC);
+	if (!entry) {
+		pr_err("can't allocate VIRQ mapping for %d\n", virq);
+		return -ENOMEM;
+	}
+
+	entry->irq = virq;
+
+	*last = entry;
+
+	return 0;
+}
+
+static void intc_virq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct intc_virq_list *entry, *vlist = get_irq_data(irq);
+	struct intc_desc_int *d = get_intc_desc(irq);
+
+	desc->chip->mask_ack(irq);
+
+	for_each_virq(entry, vlist) {
+		unsigned long addr, handle;
+
+		handle = (unsigned long)get_irq_data(entry->irq);
+		addr = INTC_REG(d, _INTC_ADDR_E(handle), 0);
+
+		if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0))
+			generic_handle_irq(entry->irq);
+	}
+
+	desc->chip->unmask(irq);
+}
+
+static unsigned long __init intc_subgroup_data(struct intc_subgroup *subgroup,
+					       struct intc_desc_int *d,
+					       unsigned int index)
+{
+	unsigned int fn = REG_FN_TEST_BASE + (subgroup->reg_width >> 3) - 1;
+
+	return _INTC_MK(fn, MODE_ENABLE_REG, intc_get_reg(d, subgroup->reg),
+			0, 1, (subgroup->reg_width - 1) - index);
+}
+
+#define INTC_TAG_VIRQ_NEEDS_ALLOC	0
+
+static void __init intc_subgroup_init_one(struct intc_desc *desc,
+					  struct intc_desc_int *d,
+					  struct intc_subgroup *subgroup)
+{
+	struct intc_map_entry *mapped;
+	unsigned int pirq;
+	unsigned long flags;
+	int i;
+
+	mapped = radix_tree_lookup(&d->tree, subgroup->parent_id);
+	if (!mapped) {
+		WARN_ON(1);
+		return;
+	}
+
+	pirq = mapped - intc_irq_xlate;
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	for (i = 0; i < ARRAY_SIZE(subgroup->enum_ids); i++) {
+		struct intc_subgroup_entry *entry;
+		int err;
+
+		if (!subgroup->enum_ids[i])
+			continue;
+
+		entry = kmalloc(sizeof(*entry), GFP_NOWAIT);
+		if (!entry)
+			break;
+
+		entry->pirq = pirq;
+		entry->enum_id = subgroup->enum_ids[i];
+		entry->handle = intc_subgroup_data(subgroup, d, i);
+
+		err = radix_tree_insert(&d->tree, entry->enum_id, entry);
+		if (unlikely(err < 0))
+			break;
+
+		radix_tree_tag_set(&d->tree, entry->enum_id,
+				   INTC_TAG_VIRQ_NEEDS_ALLOC);
+	}
+
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+static void __init intc_subgroup_init(struct intc_desc *desc,
+				      struct intc_desc_int *d)
+{
+	int i;
+
+	if (!desc->hw.subgroups)
+		return;
+
+	for (i = 0; i < desc->hw.nr_subgroups; i++)
+		intc_subgroup_init_one(desc, d, desc->hw.subgroups + i);
+}
+
+static void __init intc_subgroup_map(struct intc_desc_int *d)
+{
+	struct intc_subgroup_entry *entries[32];
+	unsigned long flags;
+	unsigned int nr_found;
+	int i;
+
+	spin_lock_irqsave(&d->lock, flags);
+
+restart:
+	nr_found = radix_tree_gang_lookup_tag_slot(&d->tree,
+			(void ***)entries, 0, ARRAY_SIZE(entries),
+			INTC_TAG_VIRQ_NEEDS_ALLOC);
+
+	for (i = 0; i < nr_found; i++) {
+		struct intc_subgroup_entry *entry;
+		int irq;
+
+		entry = radix_tree_deref_slot((void **)entries[i]);
+		if (unlikely(!entry))
+			continue;
+		if (unlikely(entry == RADIX_TREE_RETRY))
+			goto restart;
+
+		irq = create_irq();
+		if (unlikely(irq < 0)) {
+			pr_err("no more free IRQs, bailing..\n");
+			break;
+		}
+
+		pr_info("Setting up a chained VIRQ from %d -> %d\n",
+			irq, entry->pirq);
+
+		spin_lock(&xlate_lock);
+		intc_irq_xlate[irq].desc = d;
+		intc_irq_xlate[irq].enum_id = entry->enum_id;
+		spin_unlock(&xlate_lock);
+
+		set_irq_chip_and_handler_name(irq, get_irq_chip(entry->pirq),
+					      handle_simple_irq, "virq");
+		set_irq_chip_data(irq, get_irq_chip_data(entry->pirq));
+
+		set_irq_data(irq, (void *)entry->handle);
+
+		set_irq_chained_handler(entry->pirq, intc_virq_handler);
+		add_virq_to_pirq(entry->pirq, irq);
+
+		radix_tree_tag_clear(&d->tree, entry->enum_id,
+				     INTC_TAG_VIRQ_NEEDS_ALLOC);
+		radix_tree_replace_slot((void **)entries[i],
+					&intc_irq_xlate[irq]);
+	}
+
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+void __init intc_finalize(void)
+{
+	struct intc_desc_int *d;
+
+	list_for_each_entry(d, &intc_list, list)
+		if (radix_tree_tagged(&d->tree, INTC_TAG_VIRQ_NEEDS_ALLOC))
+			intc_subgroup_map(d);
+}
+
 static void __init intc_register_irq(struct intc_desc *desc,
 				     struct intc_desc_int *d,
 				     intc_enum enum_id,
@@ -868,6 +1134,7 @@ static void __init intc_register_irq(struct intc_desc *desc,
 {
 	struct intc_handle_int *hp;
 	unsigned int data[2], primary;
+	unsigned long flags;
 
 	/*
 	 * Register the IRQ position with the global IRQ map, then insert
@@ -875,9 +1142,9 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	 */
 	set_bit(irq, intc_irq_map);
 
-	mutex_lock(&irq_xlate_mutex);
+	spin_lock_irqsave(&xlate_lock, flags);
 	radix_tree_insert(&d->tree, enum_id, &intc_irq_xlate[irq]);
-	mutex_unlock(&irq_xlate_mutex);
+	spin_unlock_irqrestore(&xlate_lock, flags);
 
 	/*
 	 * Prefer single interrupt source bitmap over other combinations:
@@ -957,9 +1224,7 @@ static void __init intc_register_irq(struct intc_desc *desc,
 		dist_handle[irq] = intc_dist_data(desc, d, enum_id);
 #endif
 
-#ifdef CONFIG_ARM
-	set_irq_flags(irq, IRQF_VALID); /* Enable IRQ on ARM systems */
-#endif
+	activate_irq(irq);
 }
 
 static unsigned int __init save_reg(struct intc_desc_int *d,
@@ -980,11 +1245,6 @@ static unsigned int __init save_reg(struct intc_desc_int *d,
 	return 0;
 }
 
-static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
-{
-	generic_handle_irq((unsigned int)get_irq_data(irq));
-}
-
 int __init register_intc_controller(struct intc_desc *desc)
 {
 	unsigned int i, k, smp;
@@ -1000,7 +1260,11 @@ int __init register_intc_controller(struct intc_desc *desc)
 		goto err0;
 
 	INIT_LIST_HEAD(&d->list);
-	list_add(&d->list, &intc_list);
+	list_add_tail(&d->list, &intc_list);
+
+	spin_lock_init(&d->lock);
+
+	d->index = nr_intc_controllers;
 
 	if (desc->num_resources) {
 		d->nr_windows = desc->num_resources;
@@ -1029,6 +1293,7 @@ int __init register_intc_controller(struct intc_desc *desc)
 	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
 	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
 	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
+	d->nr_reg += hw->subgroups ? hw->nr_subgroups : 0;
 
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
 	if (!d->reg)
@@ -1075,6 +1340,11 @@ int __init register_intc_controller(struct intc_desc *desc)
 			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
 	}
 
+	if (hw->subgroups)
+		for (i = 0; i < hw->nr_subgroups; i++)
+			if (hw->subgroups[i].reg)
+				k+= save_reg(d, k, hw->subgroups[i].reg, 0);
+
 	d->chip.name = desc->name;
 	d->chip.mask = intc_disable;
 	d->chip.unmask = intc_enable;
@@ -1109,6 +1379,7 @@ int __init register_intc_controller(struct intc_desc *desc)
 	for (i = 0; i < hw->nr_vectors; i++) {
 		struct intc_vect *vect = hw->vectors + i;
 		unsigned int irq = evt2irq(vect->vect);
+		unsigned long flags;
 		struct irq_desc *irq_desc;
 
 		if (!vect->enum_id)
@@ -1120,8 +1391,10 @@ int __init register_intc_controller(struct intc_desc *desc)
 			continue;
 		}
 
+		spin_lock_irqsave(&xlate_lock, flags);
 		intc_irq_xlate[irq].enum_id = vect->enum_id;
 		intc_irq_xlate[irq].desc = d;
+		spin_unlock_irqrestore(&xlate_lock, flags);
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
 
@@ -1152,10 +1425,14 @@ int __init register_intc_controller(struct intc_desc *desc)
 		}
 	}
 
+	intc_subgroup_init(desc, d);
+
 	/* enable bits matching force_enable after registering irqs */
 	if (desc->force_enable)
 		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
 
+	nr_intc_controllers++;
+
 	return 0;
 err5:
 	kfree(d->prio);
@@ -1353,7 +1630,6 @@ static int __init register_intc_sysdevs(void)
 {
 	struct intc_desc_int *d;
 	int error;
-	int id = 0;
 
 	error = sysdev_class_register(&intc_sysdev_class);
 #ifdef CONFIG_INTC_USERIMASK
@@ -1363,7 +1639,7 @@ static int __init register_intc_sysdevs(void)
 #endif
 	if (!error) {
 		list_for_each_entry(d, &intc_list, list) {
-			d->sysdev.id = id;
+			d->sysdev.id = d->index;
 			d->sysdev.cls = &intc_sysdev_class;
 			error = sysdev_register(&d->sysdev);
 			if (error == 0)
@@ -1371,8 +1647,6 @@ static int __init register_intc_sysdevs(void)
 							   &attr_name);
 			if (error)
 				break;
-
-			id++;
 		}
 	}
 
@@ -1422,9 +1696,7 @@ out_unlock:
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
-#ifdef CONFIG_ARM
-		set_irq_flags(irq, IRQF_VALID); /* Enable IRQ on ARM systems */
-#endif
+		activate_irq(irq);
 	}
 
 	return irq;
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index d40fd77fa75c..04134a6c7b52 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -20,6 +20,12 @@ struct intc_group {
 
 #define INTC_GROUP(enum_id, ids...) { enum_id, { ids } }
 
+struct intc_subgroup {
+	unsigned long reg, reg_width;
+	intc_enum parent_id;
+	intc_enum enum_ids[32];
+};
+
 struct intc_mask_reg {
 	unsigned long set_reg, clr_reg, reg_width;
 	intc_enum enum_ids[32];
@@ -69,9 +75,12 @@ struct intc_hw_desc {
 	unsigned int nr_sense_regs;
 	struct intc_mask_reg *ack_regs;
 	unsigned int nr_ack_regs;
+	struct intc_subgroup *subgroups;
+	unsigned int nr_subgroups;
 };
 
-#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
+#define _INTC_ARRAY(a) a, a == NULL ? 0 : sizeof(a)/sizeof(*a)
+
 #define INTC_HW_DESC(vectors, groups, mask_regs,	\
 		     prio_regs,	sense_regs, ack_regs)	\
 {							\
@@ -109,6 +118,7 @@ int __init register_intc_controller(struct intc_desc *desc);
 void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id);
+void intc_finalize(void);
 
 #ifdef CONFIG_INTC_USERIMASK
 int register_intc_userimask(unsigned long addr);
-- 
cgit v1.2.3


From eecc545856c8a0f27783a440d25f4ceaa1f95ce8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 4 Oct 2010 23:24:21 +0200
Subject: netfilter: add missing xt_log.h file

Forgot to add xt_log.h in commit a8defca0 (netfilter: ipt_LOG:
add bufferisation to call printk() once)

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/xt_log.h | 54 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 include/net/netfilter/xt_log.h

(limited to 'include')

diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h
new file mode 100644
index 000000000000..0dfb34a5b53c
--- /dev/null
+++ b/include/net/netfilter/xt_log.h
@@ -0,0 +1,54 @@
+#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
+
+struct sbuff {
+	unsigned int	count;
+	char		buf[S_SIZE + 1];
+};
+static struct sbuff emergency, *emergency_ptr = &emergency;
+
+static int sb_add(struct sbuff *m, const char *f, ...)
+{
+	va_list args;
+	int len;
+
+	if (likely(m->count < S_SIZE)) {
+		va_start(args, f);
+		len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args);
+		va_end(args);
+		if (likely(m->count + len < S_SIZE)) {
+			m->count += len;
+			return 0;
+		}
+	}
+	m->count = S_SIZE;
+	printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n");
+	return -1;
+}
+
+static struct sbuff *sb_open(void)
+{
+	struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC);
+
+	if (unlikely(!m)) {
+		local_bh_disable();
+		do {
+			m = xchg(&emergency_ptr, NULL);
+		} while (!m);
+	}
+	m->count = 0;
+	return m;
+}
+
+static void sb_close(struct sbuff *m)
+{
+	m->buf[m->count] = 0;
+	printk("%s\n", m->buf);
+
+	if (likely(m != &emergency))
+		kfree(m);
+	else {
+		xchg(&emergency_ptr, m);
+		local_bh_enable();
+	}
+}
+
-- 
cgit v1.2.3


From 42311ff90dc8746bd81427b2ed6efda9af791b77 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 4 Aug 2010 12:07:08 +1000
Subject: drm/ttm: introduce utility function to free an allocated memory node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Existing core code/drivers call drm_mm_put_block on ttm_mem_reg.mm_node
directly.  Future patches will modify TTM behaviour in such a way that
ttm_mem_reg.mm_node doesn't necessarily belong to drm_mm.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Acked-by: Thomas Hellström <thellstrom@vmware.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 14 ++------------
 drivers/gpu/drm/radeon/radeon_ttm.c  | 18 ++----------------
 drivers/gpu/drm/ttm/ttm_bo.c         | 26 +++++++++++++++-----------
 drivers/gpu/drm/ttm/ttm_bo_util.c    |  9 +--------
 include/drm/ttm/ttm_bo_driver.h      |  4 ++++
 5 files changed, 24 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 4e813638bdb7..f685f392c226 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -693,12 +693,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 out:
-	if (tmp_mem.mm_node) {
-		spin_lock(&bo->bdev->glob->lru_lock);
-		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&bo->bdev->glob->lru_lock);
-	}
-
+	ttm_bo_mem_put(bo, &tmp_mem);
 	return ret;
 }
 
@@ -731,12 +726,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 		goto out;
 
 out:
-	if (tmp_mem.mm_node) {
-		spin_lock(&bo->bdev->glob->lru_lock);
-		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&bo->bdev->glob->lru_lock);
-	}
-
+	ttm_bo_mem_put(bo, &tmp_mem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 84c53e41a88f..cc19aba9bb74 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -326,14 +326,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	}
 	r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
 out_cleanup:
-	if (tmp_mem.mm_node) {
-		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
-
-		spin_lock(&glob->lru_lock);
-		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&glob->lru_lock);
-		return r;
-	}
+	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 }
 
@@ -372,14 +365,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 		goto out_cleanup;
 	}
 out_cleanup:
-	if (tmp_mem.mm_node) {
-		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
-
-		spin_lock(&glob->lru_lock);
-		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&glob->lru_lock);
-		return r;
-	}
+	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb4cf7ef4d1e..80d37b460a8c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -475,11 +475,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 			list_del_init(&bo->ddestroy);
 			++put_count;
 		}
-		if (bo->mem.mm_node) {
-			drm_mm_put_block(bo->mem.mm_node);
-			bo->mem.mm_node = NULL;
-		}
 		spin_unlock(&glob->lru_lock);
+		ttm_bo_mem_put(bo, &bo->mem);
 
 		atomic_set(&bo->reserved, 0);
 
@@ -621,7 +618,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 			bool no_wait_reserve, bool no_wait_gpu)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_mem_reg evict_mem;
 	struct ttm_placement placement;
 	int ret = 0;
@@ -667,12 +663,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	if (ret) {
 		if (ret != -ERESTARTSYS)
 			printk(KERN_ERR TTM_PFX "Buffer eviction failed\n");
-		spin_lock(&glob->lru_lock);
-		if (evict_mem.mm_node) {
-			drm_mm_put_block(evict_mem.mm_node);
-			evict_mem.mm_node = NULL;
-		}
-		spin_unlock(&glob->lru_lock);
+		ttm_bo_mem_put(bo, &evict_mem);
 		goto out;
 	}
 	bo->evicted = true;
@@ -769,6 +760,19 @@ static int ttm_bo_man_get_node(struct ttm_buffer_object *bo,
 	return 0;
 }
 
+void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
+{
+	struct ttm_bo_global *glob = bo->glob;
+
+	if (mem->mm_node) {
+		spin_lock(&glob->lru_lock);
+		drm_mm_put_block(mem->mm_node);
+		spin_unlock(&glob->lru_lock);
+		mem->mm_node = NULL;
+	}
+}
+EXPORT_SYMBOL(ttm_bo_mem_put);
+
 /**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 7cffb3e04232..0ebfe0d94931 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -39,14 +39,7 @@
 
 void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 {
-	struct ttm_mem_reg *old_mem = &bo->mem;
-
-	if (old_mem->mm_node) {
-		spin_lock(&bo->glob->lru_lock);
-		drm_mm_put_block(old_mem->mm_node);
-		spin_unlock(&bo->glob->lru_lock);
-	}
-	old_mem->mm_node = NULL;
+	ttm_bo_mem_put(bo, &bo->mem);
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index b87504235f18..6c694d86e03d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -649,6 +649,10 @@ extern int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				struct ttm_mem_reg *mem,
 				bool interruptible,
 				bool no_wait_reserve, bool no_wait_gpu);
+
+extern void ttm_bo_mem_put(struct ttm_buffer_object *bo,
+			   struct ttm_mem_reg *mem);
+
 /**
  * ttm_bo_wait_for_cpu
  *
-- 
cgit v1.2.3


From d961db75ce86a84f1f04e91ad1014653ed7d9f46 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 5 Aug 2010 10:48:18 +1000
Subject: drm/ttm: restructure to allow driver to plug in alternate memory
 manager
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nouveau will need this on GeForce 8 and up to account for the GPU
reordering physical VRAM for some memory types.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Acked-by: Thomas Hellström <thellstrom@vmware.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c       |  18 ++--
 drivers/gpu/drm/nouveau/nouveau_channel.c  |   6 +-
 drivers/gpu/drm/nouveau/nouveau_notifier.c |   2 +-
 drivers/gpu/drm/nouveau/nouveau_sgdma.c    |   4 +-
 drivers/gpu/drm/nouveau/nv50_crtc.c        |   3 +-
 drivers/gpu/drm/nouveau/nv50_display.c     |   2 +-
 drivers/gpu/drm/nouveau/nv50_instmem.c     |   2 +-
 drivers/gpu/drm/nouveau/nvc0_instmem.c     |   2 +-
 drivers/gpu/drm/radeon/radeon_object.c     |   6 +-
 drivers/gpu/drm/radeon/radeon_ttm.c        |  16 ++--
 drivers/gpu/drm/ttm/Makefile               |   3 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c      |   3 +-
 drivers/gpu/drm/ttm/ttm_bo.c               | 100 ++++---------------
 drivers/gpu/drm/ttm/ttm_bo_manager.c       | 148 +++++++++++++++++++++++++++++
 drivers/gpu/drm/ttm/ttm_bo_util.c          |   3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c     |   3 +-
 include/drm/ttm/ttm_bo_api.h               |   3 +-
 include/drm/ttm/ttm_bo_driver.h            |  21 +++-
 18 files changed, 229 insertions(+), 116 deletions(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_bo_manager.c

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index f685f392c226..80353e2b8409 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -381,6 +381,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		break;
 	case TTM_PL_VRAM:
+		man->func = &ttm_bo_manager_func;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
 		man->available_caching = TTM_PL_FLAG_UNCACHED |
@@ -392,6 +393,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 			man->gpu_offset = 0;
 		break;
 	case TTM_PL_TT:
+		man->func = &ttm_bo_manager_func;
 		switch (dev_priv->gart_info.type) {
 		case NOUVEAU_GART_AGP:
 			man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
@@ -494,8 +496,8 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 	u64 src_offset, dst_offset;
 	int ret;
 
-	src_offset = old_mem->mm_node->start << PAGE_SHIFT;
-	dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
+	src_offset = old_mem->start << PAGE_SHIFT;
+	dst_offset = new_mem->start << PAGE_SHIFT;
 	if (!nvbo->no_vm) {
 		if (old_mem->mem_type == TTM_PL_VRAM)
 			src_offset += dev_priv->vm_vram_base;
@@ -597,8 +599,8 @@ static int
 nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
 {
-	u32 src_offset = old_mem->mm_node->start << PAGE_SHIFT;
-	u32 dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
+	u32 src_offset = old_mem->start << PAGE_SHIFT;
+	u32 dst_offset = new_mem->start << PAGE_SHIFT;
 	u32 page_count = new_mem->num_pages;
 	int ret;
 
@@ -746,7 +748,7 @@ nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
 		return 0;
 	}
 
-	offset = new_mem->mm_node->start << PAGE_SHIFT;
+	offset = new_mem->start << PAGE_SHIFT;
 
 	if (dev_priv->card_type == NV_50) {
 		ret = nv50_mem_vm_bind_linear(dev,
@@ -860,14 +862,14 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 	case TTM_PL_TT:
 #if __OS_HAS_AGP
 		if (dev_priv->gart_info.type == NOUVEAU_GART_AGP) {
-			mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
+			mem->bus.offset = mem->start << PAGE_SHIFT;
 			mem->bus.base = dev_priv->gart_info.aper_base;
 			mem->bus.is_iomem = true;
 		}
 #endif
 		break;
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
+		mem->bus.offset = mem->start << PAGE_SHIFT;
 		mem->bus.base = pci_resource_start(dev->pdev, 1);
 		mem->bus.is_iomem = true;
 		break;
@@ -897,7 +899,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 
 	/* make sure bo is in mappable vram */
-	if (bo->mem.mm_node->start + bo->mem.num_pages < dev_priv->fb_mappable_pages)
+	if (bo->mem.start + bo->mem.num_pages < dev_priv->fb_mappable_pages)
 		return 0;
 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 5eb4c966273f..373950e34814 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -48,14 +48,14 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 						  dev_priv->gart_info.aper_size,
 						  NV_DMA_ACCESS_RO, &pushbuf,
 						  NULL);
-		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
+		chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT;
 	} else
 	if (dev_priv->card_type != NV_04) {
 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0,
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_VIDMEM, &pushbuf);
-		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
+		chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT;
 	} else {
 		/* NV04 cmdbuf hack, from original ddx.. not sure of it's
 		 * exact reason for existing :)  PCI access to cmdbuf in
@@ -67,7 +67,7 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_PCI, &pushbuf);
-		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
+		chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT;
 	}
 
 	nouveau_gpuobj_ref(pushbuf, &chan->pushbuf);
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 22b86189b7bb..2cc59f8c658b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -113,7 +113,7 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
 		return -ENOMEM;
 	}
 
-	offset = chan->notifier_bo->bo.mem.mm_node->start << PAGE_SHIFT;
+	offset = chan->notifier_bo->bo.mem.start << PAGE_SHIFT;
 	if (chan->notifier_bo->bo.mem.mem_type == TTM_PL_VRAM) {
 		target = NV_DMA_TARGET_VIDMEM;
 	} else
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 7f028fee7a58..288bacac7e5a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -95,9 +95,9 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
 	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
 	unsigned i, j, pte;
 
-	NV_DEBUG(dev, "pg=0x%lx\n", mem->mm_node->start);
+	NV_DEBUG(dev, "pg=0x%lx\n", mem->start);
 
-	pte = nouveau_sgdma_pte(nvbe->dev, mem->mm_node->start << PAGE_SHIFT);
+	pte = nouveau_sgdma_pte(nvbe->dev, mem->start << PAGE_SHIFT);
 	nvbe->pte_start = pte;
 	for (i = 0; i < nvbe->nr_pages; i++) {
 		dma_addr_t dma_offset = nvbe->pages[i];
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 1686f8291b6d..3f2fb4ec63ab 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -104,8 +104,7 @@ nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked)
 		OUT_RING(evo, nv_crtc->lut.depth == 8 ?
 				NV50_EVO_CRTC_CLUT_MODE_OFF :
 				NV50_EVO_CRTC_CLUT_MODE_ON);
-		OUT_RING(evo, (nv_crtc->lut.nvbo->bo.mem.mm_node->start <<
-				 PAGE_SHIFT) >> 8);
+		OUT_RING(evo, (nv_crtc->lut.nvbo->bo.mem.start << PAGE_SHIFT) >> 8);
 		if (dev_priv->chipset != 0x50) {
 			BEGIN_RING(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1);
 			OUT_RING(evo, NvEvoVRAM);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 11d366ad4036..55c9663ef2bf 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -345,7 +345,7 @@ nv50_display_init(struct drm_device *dev)
 
 	/* initialise fifo */
 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_DMA_CB(0),
-		((evo->pushbuf_bo->bo.mem.mm_node->start << PAGE_SHIFT) >> 8) |
+		((evo->pushbuf_bo->bo.mem.start << PAGE_SHIFT) >> 8) |
 		NV50_PDISPLAY_CHANNEL_DMA_CB_LOCATION_VRAM |
 		NV50_PDISPLAY_CHANNEL_DMA_CB_VALID);
 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_UNK2(0), 0x00010000);
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index f5800f21a9dc..a53fc974332b 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -347,7 +347,7 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
 		return ret;
 	}
 
-	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
+	gpuobj->vinst = gpuobj->im_backing->bo.mem.start << PAGE_SHIFT;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_instmem.c b/drivers/gpu/drm/nouveau/nvc0_instmem.c
index 6a41d644e044..13a0f78a9088 100644
--- a/drivers/gpu/drm/nouveau/nvc0_instmem.c
+++ b/drivers/gpu/drm/nouveau/nvc0_instmem.c
@@ -50,7 +50,7 @@ nvc0_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
 		return ret;
 	}
 
-	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
+	gpuobj->vinst = gpuobj->im_backing->bo.mem.start << PAGE_SHIFT;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 0afd1e62347d..c26106066ec2 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -435,7 +435,7 @@ int radeon_bo_get_surface_reg(struct radeon_bo *bo)
 
 out:
 	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
-			       bo->tbo.mem.mm_node->start << PAGE_SHIFT,
+			       bo->tbo.mem.start << PAGE_SHIFT,
 			       bo->tbo.num_pages << PAGE_SHIFT);
 	return 0;
 }
@@ -532,7 +532,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	rdev = rbo->rdev;
 	if (bo->mem.mem_type == TTM_PL_VRAM) {
 		size = bo->mem.num_pages << PAGE_SHIFT;
-		offset = bo->mem.mm_node->start << PAGE_SHIFT;
+		offset = bo->mem.start << PAGE_SHIFT;
 		if ((offset + size) > rdev->mc.visible_vram_size) {
 			/* hurrah the memory is not visible ! */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
@@ -540,7 +540,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 			r = ttm_bo_validate(bo, &rbo->placement, false, true, false);
 			if (unlikely(r != 0))
 				return r;
-			offset = bo->mem.mm_node->start << PAGE_SHIFT;
+			offset = bo->mem.start << PAGE_SHIFT;
 			/* this should not happen */
 			if ((offset + size) > rdev->mc.visible_vram_size)
 				return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index cc19aba9bb74..0921910698d4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -152,6 +152,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		break;
 	case TTM_PL_TT:
+		man->func = &ttm_bo_manager_func;
 		man->gpu_offset = rdev->mc.gtt_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->default_caching = TTM_PL_FLAG_CACHED;
@@ -173,6 +174,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		break;
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
+		man->func = &ttm_bo_manager_func;
 		man->gpu_offset = rdev->mc.vram_start;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
@@ -246,8 +248,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	if (unlikely(r)) {
 		return r;
 	}
-	old_start = old_mem->mm_node->start << PAGE_SHIFT;
-	new_start = new_mem->mm_node->start << PAGE_SHIFT;
+	old_start = old_mem->start << PAGE_SHIFT;
+	new_start = new_mem->start << PAGE_SHIFT;
 
 	switch (old_mem->mem_type) {
 	case TTM_PL_VRAM:
@@ -435,14 +437,14 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 #if __OS_HAS_AGP
 		if (rdev->flags & RADEON_IS_AGP) {
 			/* RADEON_IS_AGP is set only if AGP is active */
-			mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
+			mem->bus.offset = mem->start << PAGE_SHIFT;
 			mem->bus.base = rdev->mc.agp_base;
 			mem->bus.is_iomem = !rdev->ddev->agp->cant_use_aperture;
 		}
 #endif
 		break;
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
+		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
 		if ((mem->bus.offset + mem->bus.size) > rdev->mc.visible_vram_size)
 			return -EINVAL;
@@ -685,7 +687,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
 	int r;
 
 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
-	gtt->offset = bo_mem->mm_node->start << PAGE_SHIFT;
+	gtt->offset = bo_mem->start << PAGE_SHIFT;
 	if (!gtt->num_pages) {
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", gtt->num_pages, bo_mem, backend);
 	}
@@ -784,9 +786,9 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
 		radeon_mem_types_list[i].show = &radeon_mm_dump_table;
 		radeon_mem_types_list[i].driver_features = 0;
 		if (i == 0)
-			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].manager;
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].priv;
 		else
-			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].manager;
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].priv;
 
 	}
 	/* Add ttm page pool to debugfs */
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index b256d4adfafe..f3cf6f02c997 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,6 +4,7 @@
 ccflags-y := -Iinclude/drm
 ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
 	ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
-	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
+	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o \
+	ttm_bo_manager.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 4bf69c404491..f999e36f30b4 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -74,6 +74,7 @@ static int ttm_agp_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
 {
 	struct ttm_agp_backend *agp_be =
 	    container_of(backend, struct ttm_agp_backend, backend);
+	struct drm_mm_node *node = bo_mem->mm_node;
 	struct agp_memory *mem = agp_be->mem;
 	int cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
 	int ret;
@@ -81,7 +82,7 @@ static int ttm_agp_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
 	mem->is_flushed = 1;
 	mem->type = (cached) ? AGP_USER_CACHED_MEMORY : AGP_USER_MEMORY;
 
-	ret = agp_bind_memory(mem, bo_mem->mm_node->start);
+	ret = agp_bind_memory(mem, node->start);
 	if (ret)
 		printk(KERN_ERR TTM_PFX "AGP Bind memory failed.\n");
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 80d37b460a8c..af7b57a47fbc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -84,11 +84,8 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
 		man->available_caching);
 	printk(KERN_ERR TTM_PFX "    default_caching: 0x%08X\n",
 		man->default_caching);
-	if (mem_type != TTM_PL_SYSTEM) {
-		spin_lock(&bdev->glob->lru_lock);
-		drm_mm_debug_table(&man->manager, TTM_PFX);
-		spin_unlock(&bdev->glob->lru_lock);
-	}
+	if (mem_type != TTM_PL_SYSTEM)
+		(*man->func->debug)(man, TTM_PFX);
 }
 
 static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
@@ -421,7 +418,7 @@ moved:
 
 	if (bo->mem.mm_node) {
 		spin_lock(&bo->lock);
-		bo->offset = (bo->mem.mm_node->start << PAGE_SHIFT) +
+		bo->offset = (bo->mem.start << PAGE_SHIFT) +
 		    bdev->man[bo->mem.mem_type].gpu_offset;
 		bo->cur_placement = bo->mem.placement;
 		spin_unlock(&bo->lock);
@@ -724,52 +721,12 @@ retry:
 	return ret;
 }
 
-static int ttm_bo_man_get_node(struct ttm_buffer_object *bo,
-				struct ttm_mem_type_manager *man,
-				struct ttm_placement *placement,
-				struct ttm_mem_reg *mem,
-				struct drm_mm_node **node)
-{
-	struct ttm_bo_global *glob = bo->glob;
-	unsigned long lpfn;
-	int ret;
-
-	lpfn = placement->lpfn;
-	if (!lpfn)
-		lpfn = man->size;
-	*node = NULL;
-	do {
-		ret = drm_mm_pre_get(&man->manager);
-		if (unlikely(ret))
-			return ret;
-
-		spin_lock(&glob->lru_lock);
-		*node = drm_mm_search_free_in_range(&man->manager,
-					mem->num_pages, mem->page_alignment,
-					placement->fpfn, lpfn, 1);
-		if (unlikely(*node == NULL)) {
-			spin_unlock(&glob->lru_lock);
-			return 0;
-		}
-		*node = drm_mm_get_block_atomic_range(*node, mem->num_pages,
-							mem->page_alignment,
-							placement->fpfn,
-							lpfn);
-		spin_unlock(&glob->lru_lock);
-	} while (*node == NULL);
-	return 0;
-}
-
 void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 {
-	struct ttm_bo_global *glob = bo->glob;
+	struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type];
 
-	if (mem->mm_node) {
-		spin_lock(&glob->lru_lock);
-		drm_mm_put_block(mem->mm_node);
-		spin_unlock(&glob->lru_lock);
-		mem->mm_node = NULL;
-	}
+	if (mem->mm_node)
+		(*man->func->put_node)(man, mem);
 }
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
@@ -788,14 +745,13 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct drm_mm_node *node;
 	int ret;
 
 	do {
-		ret = ttm_bo_man_get_node(bo, man, placement, mem, &node);
+		ret = (*man->func->get_node)(man, bo, placement, mem);
 		if (unlikely(ret != 0))
 			return ret;
-		if (node)
+		if (mem->mm_node)
 			break;
 		spin_lock(&glob->lru_lock);
 		if (list_empty(&man->lru)) {
@@ -808,9 +764,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	if (node == NULL)
+	if (mem->mm_node == NULL)
 		return -ENOMEM;
-	mem->mm_node = node;
 	mem->mem_type = mem_type;
 	return 0;
 }
@@ -884,7 +839,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	bool type_found = false;
 	bool type_ok = false;
 	bool has_erestartsys = false;
-	struct drm_mm_node *node = NULL;
 	int i, ret;
 
 	mem->mm_node = NULL;
@@ -918,17 +872,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 
 		if (man->has_type && man->use_type) {
 			type_found = true;
-			ret = ttm_bo_man_get_node(bo, man, placement, mem,
-							&node);
+			ret = (*man->func->get_node)(man, bo, placement, mem);
 			if (unlikely(ret))
 				return ret;
 		}
-		if (node)
+		if (mem->mm_node)
 			break;
 	}
 
-	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || node) {
-		mem->mm_node = node;
+	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
 		mem->mem_type = mem_type;
 		mem->placement = cur_flags;
 		return 0;
@@ -998,7 +950,6 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 			bool interruptible, bool no_wait_reserve,
 			bool no_wait_gpu)
 {
-	struct ttm_bo_global *glob = bo->glob;
 	int ret = 0;
 	struct ttm_mem_reg mem;
 
@@ -1026,11 +977,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 		goto out_unlock;
 	ret = ttm_bo_handle_move_mem(bo, &mem, false, interruptible, no_wait_reserve, no_wait_gpu);
 out_unlock:
-	if (ret && mem.mm_node) {
-		spin_lock(&glob->lru_lock);
-		drm_mm_put_block(mem.mm_node);
-		spin_unlock(&glob->lru_lock);
-	}
+	if (ret && mem.mm_node)
+		ttm_bo_mem_put(bo, &mem);
 	return ret;
 }
 
@@ -1038,11 +986,10 @@ static int ttm_bo_mem_compat(struct ttm_placement *placement,
 			     struct ttm_mem_reg *mem)
 {
 	int i;
-	struct drm_mm_node *node = mem->mm_node;
 
-	if (node && placement->lpfn != 0 &&
-	    (node->start < placement->fpfn ||
-	     node->start + node->size > placement->lpfn))
+	if (mem->mm_node && placement->lpfn != 0 &&
+	    (mem->start < placement->fpfn ||
+	     mem->start + mem->num_pages > placement->lpfn))
 		return -1;
 
 	for (i = 0; i < placement->num_placement; i++) {
@@ -1286,7 +1233,6 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 
 int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 {
-	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man;
 	int ret = -EINVAL;
 
@@ -1309,13 +1255,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 	if (mem_type > 0) {
 		ttm_bo_force_list_clean(bdev, mem_type, false);
 
-		spin_lock(&glob->lru_lock);
-		if (drm_mm_clean(&man->manager))
-			drm_mm_takedown(&man->manager);
-		else
-			ret = -EBUSY;
-
-		spin_unlock(&glob->lru_lock);
+		ret = (*man->func->takedown)(man);
 	}
 
 	return ret;
@@ -1366,6 +1306,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	ret = bdev->driver->init_mem_type(bdev, type, man);
 	if (ret)
 		return ret;
+	man->bdev = bdev;
 
 	ret = 0;
 	if (type != TTM_PL_SYSTEM) {
@@ -1375,7 +1316,8 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 			       type);
 			return ret;
 		}
-		ret = drm_mm_init(&man->manager, 0, p_size);
+
+		ret = (*man->func->init)(man, p_size);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
new file mode 100644
index 000000000000..7410c190c891
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include "ttm/ttm_module.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_placement.h"
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/module.h>
+
+static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
+			       struct ttm_buffer_object *bo,
+			       struct ttm_placement *placement,
+			       struct ttm_mem_reg *mem)
+{
+	struct ttm_bo_global *glob = man->bdev->glob;
+	struct drm_mm *mm = man->priv;
+	struct drm_mm_node *node = NULL;
+	unsigned long lpfn;
+	int ret;
+
+	lpfn = placement->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
+	do {
+		ret = drm_mm_pre_get(mm);
+		if (unlikely(ret))
+			return ret;
+
+		spin_lock(&glob->lru_lock);
+		node = drm_mm_search_free_in_range(mm,
+					mem->num_pages, mem->page_alignment,
+					placement->fpfn, lpfn, 1);
+		if (unlikely(node == NULL)) {
+			spin_unlock(&glob->lru_lock);
+			return 0;
+		}
+		node = drm_mm_get_block_atomic_range(node, mem->num_pages,
+							mem->page_alignment,
+							placement->fpfn,
+							lpfn);
+		spin_unlock(&glob->lru_lock);
+	} while (node == NULL);
+
+	mem->mm_node = node;
+	mem->start = node->start;
+	return 0;
+}
+
+static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
+				struct ttm_mem_reg *mem)
+{
+	struct ttm_bo_global *glob = man->bdev->glob;
+
+	if (mem->mm_node) {
+		spin_lock(&glob->lru_lock);
+		drm_mm_put_block(mem->mm_node);
+		spin_unlock(&glob->lru_lock);
+		mem->mm_node = NULL;
+	}
+}
+
+static int ttm_bo_man_init(struct ttm_mem_type_manager *man,
+			   unsigned long p_size)
+{
+	struct drm_mm *mm;
+	int ret;
+
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+	if (!mm)
+		return -ENOMEM;
+
+	ret = drm_mm_init(mm, 0, p_size);
+	if (ret) {
+		kfree(mm);
+		return ret;
+	}
+
+	man->priv = mm;
+	return 0;
+}
+
+static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
+{
+	struct ttm_bo_global *glob = man->bdev->glob;
+	struct drm_mm *mm = man->priv;
+	int ret = 0;
+
+	spin_lock(&glob->lru_lock);
+	if (drm_mm_clean(mm)) {
+		drm_mm_takedown(mm);
+		kfree(mm);
+		man->priv = NULL;
+	} else
+		ret = -EBUSY;
+	spin_unlock(&glob->lru_lock);
+	return ret;
+}
+
+static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
+			     const char *prefix)
+{
+	struct ttm_bo_global *glob = man->bdev->glob;
+	struct drm_mm *mm = man->priv;
+
+	spin_lock(&glob->lru_lock);
+	drm_mm_debug_table(mm, prefix);
+	spin_unlock(&glob->lru_lock);
+}
+
+const struct ttm_mem_type_manager_func ttm_bo_manager_func = {
+	ttm_bo_man_init,
+	ttm_bo_man_takedown,
+	ttm_bo_man_get_node,
+	ttm_bo_man_put_node,
+	ttm_bo_man_debug
+};
+EXPORT_SYMBOL(ttm_bo_manager_func);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 0ebfe0d94931..c9d2d4d8d066 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -256,8 +256,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	dir = 1;
 
 	if ((old_mem->mem_type == new_mem->mem_type) &&
-	    (new_mem->mm_node->start <
-	     old_mem->mm_node->start + old_mem->mm_node->size)) {
+	    (new_mem->start < old_mem->start + old_mem->size)) {
 		dir = -1;
 		add = new_mem->num_pages - 1;
 	}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index c4f5114aee7c..1b3bd8c6c67e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -147,6 +147,7 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		break;
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
+		man->func = &ttm_bo_manager_func;
 		man->gpu_offset = 0;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
 		man->available_caching = TTM_PL_MASK_CACHING;
@@ -203,7 +204,7 @@ static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg
 		/* System memory */
 		return 0;
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->mm_node->start << PAGE_SHIFT;
+		mem->bus.offset = mem->start << PAGE_SHIFT;
 		mem->bus.base = dev_priv->vram_start;
 		mem->bus.is_iomem = true;
 		break;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c74e2e..49b43c23636a 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -102,7 +102,8 @@ struct ttm_bus_placement {
  */
 
 struct ttm_mem_reg {
-	struct drm_mm_node *mm_node;
+	void *mm_node;
+	unsigned long start;
 	unsigned long size;
 	unsigned long num_pages;
 	uint32_t page_alignment;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6c694d86e03d..e3371dbe6a10 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -203,7 +203,22 @@ struct ttm_tt {
  * It's set up by the ttm_bo_driver::init_mem_type method.
  */
 
+struct ttm_mem_type_manager;
+
+struct ttm_mem_type_manager_func {
+	int  (*init)(struct ttm_mem_type_manager *man, unsigned long p_size);
+	int  (*takedown)(struct ttm_mem_type_manager *man);
+	int  (*get_node)(struct ttm_mem_type_manager *man,
+			 struct ttm_buffer_object *bo,
+			 struct ttm_placement *placement,
+			 struct ttm_mem_reg *mem);
+	void (*put_node)(struct ttm_mem_type_manager *man,
+			 struct ttm_mem_reg *mem);
+	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
+};
+
 struct ttm_mem_type_manager {
+	struct ttm_bo_device *bdev;
 
 	/*
 	 * No protection. Constant from start.
@@ -222,8 +237,8 @@ struct ttm_mem_type_manager {
 	 * TODO: Consider one lru_lock per ttm_mem_type_manager.
 	 * Plays ill with list removal, though.
 	 */
-
-	struct drm_mm manager;
+	const struct ttm_mem_type_manager_func *func;
+	void *priv;
 	struct list_head lru;
 };
 
@@ -895,6 +910,8 @@ extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
  */
 extern pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
 
+extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
+
 #if (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
 #define TTM_HAS_AGP
 #include <linux/agp_backend.h>
-- 
cgit v1.2.3


From 24824a09e35402b8d58dcc5be803a5ad3937bdba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 2 Oct 2010 06:11:55 +0000
Subject: net: dynamic ingress_queue allocation

ingress being not used very much, and net_device->ingress_queue being
quite a big object (128 or 256 bytes), use a dynamic allocation if
needed (tc qdisc add dev eth0 ingress ...)

dev_ingress_queue(dev) helper should be used only with RTNL taken.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/linux/rtnetlink.h |  8 ++++++++
 net/core/dev.c            | 34 ++++++++++++++++++++++++++--------
 net/sched/sch_api.c       | 42 ++++++++++++++++++++++++++++--------------
 net/sched/sch_generic.c   | 12 ++++++++----
 5 files changed, 71 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ceed3474014a..92d81edd5808 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -986,7 +986,7 @@ struct net_device {
 	rx_handler_func_t	*rx_handler;
 	void			*rx_handler_data;
 
-	struct netdev_queue	ingress_queue; /* use two cache lines */
+	struct netdev_queue __rcu *ingress_queue;
 
 /*
  * Cache lines mostly used on transmit path
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 68c436bddc88..0bb7b48632bd 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,6 +6,7 @@
 #include <linux/if_link.h>
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
+#include <linux/netdevice.h>
 
 /* rtnetlink families. Values up to 127 are reserved for real address
  * families, values above 128 may be used arbitrarily.
@@ -769,6 +770,13 @@ extern int lockdep_rtnl_is_held(void);
 #define rtnl_dereference(p)					\
 	rcu_dereference_check(p, lockdep_rtnl_is_held())
 
+static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
+{
+	return rtnl_dereference(dev->ingress_queue);
+}
+
+extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
+
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a313bab1b754..ce6ad88c980b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2702,11 +2702,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  * the ingress scheduler, you just cant add policies on ingress.
  *
  */
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
 	struct net_device *dev = skb->dev;
 	u32 ttl = G_TC_RTTL(skb->tc_verd);
-	struct netdev_queue *rxq;
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
@@ -2720,8 +2719,6 @@ static int ing_filter(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	rxq = &dev->ingress_queue;
-
 	q = rxq->qdisc;
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
@@ -2737,7 +2734,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 					 struct packet_type **pt_prev,
 					 int *ret, struct net_device *orig_dev)
 {
-	if (skb->dev->ingress_queue.qdisc == &noop_qdisc)
+	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+	if (!rxq || rxq->qdisc == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
@@ -2745,7 +2744,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 		*pt_prev = NULL;
 	}
 
-	switch (ing_filter(skb)) {
+	switch (ing_filter(skb, rxq)) {
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
 		kfree_skb(skb);
@@ -4940,7 +4939,6 @@ static void __netdev_init_queue_locks_one(struct net_device *dev,
 static void netdev_init_queue_locks(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-	__netdev_init_queue_locks_one(dev, &dev->ingress_queue, NULL);
 }
 
 unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -5452,11 +5450,29 @@ static void netdev_init_one_queue(struct net_device *dev,
 
 static void netdev_init_queues(struct net_device *dev)
 {
-	netdev_init_one_queue(dev, &dev->ingress_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
 }
 
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
+{
+	struct netdev_queue *queue = dev_ingress_queue(dev);
+
+#ifdef CONFIG_NET_CLS_ACT
+	if (queue)
+		return queue;
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return NULL;
+	netdev_init_one_queue(dev, queue, NULL);
+	__netdev_init_queue_locks_one(dev, queue, NULL);
+	queue->qdisc = &noop_qdisc;
+	queue->qdisc_sleeping = &noop_qdisc;
+	rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+	return queue;
+}
+
 /**
  *	alloc_netdev_mq - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -5559,6 +5575,8 @@ void free_netdev(struct net_device *dev)
 
 	kfree(dev->_tx);
 
+	kfree(rcu_dereference_raw(dev->ingress_queue));
+
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b8020784d0e9..b22ca2d1cebc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	if (q)
 		goto out;
 
-	q = qdisc_match_from_root(dev->ingress_queue.qdisc_sleeping, handle);
+	if (dev_ingress_queue(dev))
+		q = qdisc_match_from_root(
+			dev_ingress_queue(dev)->qdisc_sleeping,
+			handle);
 out:
 	return q;
 }
@@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		    (new && new->flags & TCQ_F_INGRESS)) {
 			num_q = 1;
 			ingress = 1;
+			if (!dev_ingress_queue(dev))
+				return -ENOENT;
 		}
 
 		if (dev->flags & IFF_UP)
@@ -701,7 +706,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		for (i = 0; i < num_q; i++) {
-			struct netdev_queue *dev_queue = &dev->ingress_queue;
+			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
 
 			if (!ingress)
 				dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +984,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
 			} else { /* ingress */
-				q = dev->ingress_queue.qdisc_sleeping;
+				if (dev_ingress_queue(dev))
+					q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1043,8 +1049,9 @@ replay:
 				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /*ingress */
-				q = dev->ingress_queue.qdisc_sleeping;
+			} else { /* ingress */
+				if (dev_ingress_queue_create(dev))
+					q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1123,11 +1130,14 @@ replay:
 create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
-	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, &dev->ingress_queue, p,
-				 tcm->tcm_parent, tcm->tcm_parent,
-				 tca, &err);
-	else {
+	if (clid == TC_H_INGRESS) {
+		if (dev_ingress_queue(dev))
+			q = qdisc_create(dev, dev_ingress_queue(dev), p,
+					 tcm->tcm_parent, tcm->tcm_parent,
+					 tca, &err);
+		else
+			err = -ENOENT;
+	} else {
 		struct netdev_queue *dev_queue;
 
 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1304,8 +1314,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
-		dev_queue = &dev->ingress_queue;
-		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+		dev_queue = dev_ingress_queue(dev);
+		if (dev_queue &&
+		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
+				       &q_idx, s_q_idx) < 0)
 			goto done;
 
 cont:
@@ -1595,8 +1607,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
-	dev_queue = &dev->ingress_queue;
-	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+	dev_queue = dev_ingress_queue(dev);
+	if (dev_queue &&
+	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
+				&t, s_t) < 0)
 		goto done;
 
 done:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 545278a1c478..3d57681bdb76 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -753,7 +753,8 @@ void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-	transition_one_qdisc(dev, &dev->ingress_queue, NULL);
+	if (dev_ingress_queue(dev))
+		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
@@ -812,7 +813,8 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 void dev_deactivate(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	dev_deactivate_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
 	dev_watchdog_down(dev);
 
@@ -838,7 +840,8 @@ void dev_init_scheduler(struct net_device *dev)
 {
 	dev->qdisc = &noop_qdisc;
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-	dev_init_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
 	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -861,7 +864,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-	shutdown_scheduler_queue(dev, &dev->ingress_queue, &noop_qdisc);
+	if (dev_ingress_queue(dev))
+		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 	qdisc_destroy(dev->qdisc);
 	dev->qdisc = &noop_qdisc;
 
-- 
cgit v1.2.3


From 29fa060eab3f524d338566d34c1d9e704579ae5e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 5 Oct 2010 00:29:48 -0700
Subject: net: relax rtnl_dereference()

rtnl_dereference() is used in contexts where RTNL is held, to fetch an
RCU protected pointer.

Updates to this pointer are prevented by RTNL, so we dont need
smp_read_barrier_depends() and the ACCESS_ONCE() provided in
rcu_dereference_check().

rtnl_dereference() is mainly a macro to document the locking invariant.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0bb7b48632bd..d42f274418b8 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -755,20 +755,22 @@ extern int lockdep_rtnl_is_held(void);
  * @p: The pointer to read, prior to dereferencing
  *
  * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
- * or RTNL
+ * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference()
  */
 #define rcu_dereference_rtnl(p)					\
 	rcu_dereference_check(p, rcu_read_lock_held() ||	\
 				 lockdep_rtnl_is_held())
 
 /**
- * rtnl_dereference - rcu_dereference with debug checking
+ * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
  * @p: The pointer to read, prior to dereferencing
  *
- * Do an rcu_dereference(p), but check caller holds RTNL
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * caller holds RTNL.
  */
 #define rtnl_dereference(p)					\
-	rcu_dereference_check(p, lockdep_rtnl_is_held())
+	rcu_dereference_protected(p, lockdep_rtnl_is_held())
 
 static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 {
-- 
cgit v1.2.3


From 1df9916e46451533463f227e6be57cc2cfca4c5f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 4 Oct 2010 20:14:17 +0000
Subject: fib: fib_rules_cleanup can be static

fib_rules_cleanup_ups is only defined and used in one place.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 1 -
 net/core/fib_rules.c    | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e8923bc20f9f..ac2fd002812e 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -106,7 +106,6 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 
 extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *);
 extern void fib_rules_unregister(struct fib_rules_ops *);
-extern void                     fib_rules_cleanup_ops(struct fib_rules_ops *);
 
 extern int			fib_rules_lookup(struct fib_rules_ops *,
 						 struct flowi *, int flags,
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 332c2e31d048..cfb7d25c172d 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
 }
 EXPORT_SYMBOL_GPL(fib_rules_register);
 
-void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
+static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 {
 	struct fib_rule *rule, *tmp;
 
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 		fib_rule_put(rule);
 	}
 }
-EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
 
 static void fib_rules_put_rcu(struct rcu_head *head)
 {
-- 
cgit v1.2.3


From c61393ea83573ff422af505b6fd49ef9ec9b91ca Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 4 Oct 2010 20:17:53 +0000
Subject: ipv6: make __ipv6_isatap_ifid static

Another exported symbol only used in one file

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 2 --
 net/ipv6/addrconf.c    | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 958d2749b7a9..a9441249306c 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -276,8 +276,6 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
 		(addr->s6_addr32[3] ^ htonl(0x00000002))) == 0;
 }
 
-extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr);
-
 static inline int ipv6_addr_is_isatap(const struct in6_addr *addr)
 {
 	return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8c88340278f5..ec7a91d9e865 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1544,7 +1544,7 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
 	return 0;
 }
 
-int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
+static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
 {
 	if (addr == 0)
 		return -1;
@@ -1560,7 +1560,6 @@ int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
 	memcpy(eui + 4, &addr, 4);
 	return 0;
 }
-EXPORT_SYMBOL(__ipv6_isatap_ifid);
 
 static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
 {
-- 
cgit v1.2.3


From 97bd234701b2b39a0e749c1fe0e44f1d14c94292 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Oct 2010 10:41:14 +0200
Subject: workqueue: prepare for more tracepoints

Define workqueue_work event class and use it for workqueue_execute_end
trace point.  Also, move trace/events/workqueue.h include downwards
such that all struct definitions are visible to it.  This is to
prepare for more tracepoints and doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 32 +++++++++++++++++++-------------
 kernel/workqueue.c               |  6 +++---
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 49682d7e9d60..ec9d7244eb9f 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -7,13 +7,7 @@
 #include <linux/tracepoint.h>
 #include <linux/workqueue.h>
 
-/**
- * workqueue_execute_start - called immediately before the workqueue callback
- * @work:	pointer to struct work_struct
- *
- * Allows to track workqueue execution.
- */
-TRACE_EVENT(workqueue_execute_start,
+DECLARE_EVENT_CLASS(workqueue_work,
 
 	TP_PROTO(struct work_struct *work),
 
@@ -21,24 +15,22 @@ TRACE_EVENT(workqueue_execute_start,
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
-		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->work		= work;
-		__entry->function	= work->func;
 	),
 
-	TP_printk("work struct %p: function %pf", __entry->work, __entry->function)
+	TP_printk("work struct %p", __entry->work)
 );
 
 /**
- * workqueue_execute_end - called immediately before the workqueue callback
+ * workqueue_execute_start - called immediately before the workqueue callback
  * @work:	pointer to struct work_struct
  *
  * Allows to track workqueue execution.
  */
-TRACE_EVENT(workqueue_execute_end,
+TRACE_EVENT(workqueue_execute_start,
 
 	TP_PROTO(struct work_struct *work),
 
@@ -46,15 +38,29 @@ TRACE_EVENT(workqueue_execute_end,
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
+		__field( void *,	function)
 	),
 
 	TP_fast_assign(
 		__entry->work		= work;
+		__entry->function	= work->func;
 	),
 
-	TP_printk("work struct %p", __entry->work)
+	TP_printk("work struct %p: function %pf", __entry->work, __entry->function)
 );
 
+/**
+ * workqueue_execute_end - called immediately before the workqueue callback
+ * @work:	pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+DEFINE_EVENT(workqueue_work, workqueue_execute_end,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work)
+);
 
 #endif /*  _TRACE_WORKQUEUE_H */
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 19e4bc15ee99..026f778e879b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -42,9 +42,6 @@
 #include <linux/lockdep.h>
 #include <linux/idr.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/workqueue.h>
-
 #include "workqueue_sched.h"
 
 enum {
@@ -257,6 +254,9 @@ EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
+
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
 		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
-- 
cgit v1.2.3


From cdadf0097cdca06c497ffaeb5982e028c6e4ed38 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Oct 2010 10:49:55 +0200
Subject: workqueue: add queue_work and activate_work trace points

These two tracepoints allow tracking when and how a work is queued and
activated.  This patch is based on Frederic's patch to add queue_work
trace point.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 53 ++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c               |  3 +++
 2 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index ec9d7244eb9f..7d497291c85d 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -24,6 +24,59 @@ DECLARE_EVENT_CLASS(workqueue_work,
 	TP_printk("work struct %p", __entry->work)
 );
 
+/**
+ * workqueue_queue_work - called when a work gets queued
+ * @req_cpu:	the requested cpu
+ * @cwq:	pointer to struct cpu_workqueue_struct
+ * @work:	pointer to struct work_struct
+ *
+ * This event occurs when a work is queued immediately or once a
+ * delayed work is actually queued on a workqueue (ie: once the delay
+ * has been reached).
+ */
+TRACE_EVENT(workqueue_queue_work,
+
+	TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+		 struct work_struct *work),
+
+	TP_ARGS(req_cpu, cwq, work),
+
+	TP_STRUCT__entry(
+		__field( void *,	work	)
+		__field( void *,	function)
+		__field( void *,	workqueue)
+		__field( unsigned int,	req_cpu	)
+		__field( unsigned int,	cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->work		= work;
+		__entry->function	= work->func;
+		__entry->workqueue	= cwq->wq;
+		__entry->req_cpu	= req_cpu;
+		__entry->cpu		= cwq->gcwq->cpu;
+	),
+
+	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
+		  __entry->work, __entry->function, __entry->workqueue,
+		  __entry->req_cpu, __entry->cpu)
+);
+
+/**
+ * workqueue_activate_work - called when a work gets activated
+ * @work:	pointer to struct work_struct
+ *
+ * This event occurs when a queued work is put on the active queue,
+ * which happens immediately after queueing unless @max_active limit
+ * is reached.
+ */
+DEFINE_EVENT(workqueue_work, workqueue_activate_work,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work)
+);
+
 /**
  * workqueue_execute_start - called immediately before the workqueue callback
  * @work:	pointer to struct work_struct
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 026f778e879b..cb2ccfbed0c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -997,6 +997,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	/* gcwq determined, get cwq and queue */
 	cwq = get_cwq(gcwq->cpu, wq);
+	trace_workqueue_queue_work(cpu, cwq, work);
 
 	BUG_ON(!list_empty(&work->entry));
 
@@ -1004,6 +1005,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	work_flags = work_color_to_flags(cwq->work_color);
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
+		trace_workqueue_activate_work(work);
 		cwq->nr_active++;
 		worklist = gcwq_determine_ins_pos(gcwq, cwq);
 	} else {
@@ -1679,6 +1681,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 						    struct work_struct, entry);
 	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
+	trace_workqueue_activate_work(work);
 	move_linked_works(work, pos, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
 	cwq->nr_active++;
-- 
cgit v1.2.3


From b89f432133851a01c0d28822f11cbdcc15781a75 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 18 Sep 2010 15:09:31 +0200
Subject: fs/locks.c: prepare for BKL removal

This prepares the removal of the big kernel lock from the
file locking code. We still use the BKL as long as fs/lockd
uses it and ceph might sleep, but we can flip the definition
to a private spinlock as soon as that's done.
All users outside of fs/lockd get converted to use
lock_flocks() instead of lock_kernel() where appropriate.

Based on an earlier patch to use a spinlock from Matthew
Wilcox, who has attempted this a few times before, the
earliest patch from over 10 years ago turned it into
a semaphore, which ended up being slower than the BKL
and was subsequently reverted.

Someone should do some serious performance testing when
this becomes a spinlock, since this has caused problems
before. Using a spinlock should be at least as good
as the BKL in theory, but who knows...

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Sage Weil <sage@newdream.net>
Cc: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
---
 fs/afs/flock.c      |   5 +--
 fs/cifs/cifsfs.c    |   4 +-
 fs/gfs2/file.c      |   2 +
 fs/locks.c          | 112 +++++++++++++++++++++++++++++++---------------------
 fs/nfs/delegation.c |  10 ++---
 fs/nfs/nfs4state.c  |  10 ++---
 fs/nfsd/nfs4state.c |   6 +--
 include/linux/fs.h  |  14 +++++--
 8 files changed, 97 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0931bc1325eb..757d664575dd 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -9,7 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/smp_lock.h>
 #include "internal.h"
 
 #define AFS_LOCK_GRANTED	0
@@ -274,7 +273,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 
 	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
 
-	lock_kernel();
+	lock_flocks();
 
 	/* make sure we've got a callback on this file and that our view of the
 	 * data version is up to date */
@@ -421,7 +420,7 @@ given_lock:
 	afs_vnode_fetch_status(vnode, NULL, key);
 
 error:
-	unlock_kernel();
+	unlock_flocks();
 	_leave(" = %d", ret);
 	return ret;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4e273f7793f6..50208c15309a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -562,8 +562,8 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 
 static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 {
-	/* note that this is called by vfs setlease with the BKL held
-	   although I doubt that BKL is needed here in cifs */
+	/* note that this is called by vfs setlease with lock_flocks held
+	   to protect *lease from going away */
 	struct inode *inode = file->f_path.dentry->d_inode;
 
 	if (!(S_ISREG(inode->i_mode)))
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4edd662c8232..8fcfefb96077 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -620,6 +620,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
  * cluster; until we do, disable leases (by just returning -EINVAL),
  * unless the administrator has requested purely local locking.
  *
+ * Locking: called under lock_flocks
+ *
  * Returns: errno
  */
 
diff --git a/fs/locks.c b/fs/locks.c
index ab24d49fc048..8b2b6ad56a09 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -143,6 +143,22 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
+/*
+ * Protects the two list heads above, plus the inode->i_flock list
+ * FIXME: should use a spinlock, once lockd and ceph are ready.
+ */
+void lock_flocks(void)
+{
+	lock_kernel();
+}
+EXPORT_SYMBOL_GPL(lock_flocks);
+
+void unlock_flocks(void)
+{
+	unlock_kernel();
+}
+EXPORT_SYMBOL_GPL(unlock_flocks);
+
 static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
@@ -511,9 +527,9 @@ static void __locks_delete_block(struct file_lock *waiter)
  */
 static void locks_delete_block(struct file_lock *waiter)
 {
-	lock_kernel();
+	lock_flocks();
 	__locks_delete_block(waiter);
-	unlock_kernel();
+	unlock_flocks();
 }
 
 /* Insert waiter into blocker's block list.
@@ -644,7 +660,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 
-	lock_kernel();
+	lock_flocks();
 	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
 			continue;
@@ -657,7 +673,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 			fl->fl_pid = pid_vnr(cfl->fl_nspid);
 	} else
 		fl->fl_type = F_UNLCK;
-	unlock_kernel();
+	unlock_flocks();
 	return;
 }
 EXPORT_SYMBOL(posix_test_lock);
@@ -730,18 +746,16 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	int error = 0;
 	int found = 0;
 
-	lock_kernel();
-	if (request->fl_flags & FL_ACCESS)
-		goto find_conflict;
-
-	if (request->fl_type != F_UNLCK) {
-		error = -ENOMEM;
+	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
 		new_fl = locks_alloc_lock();
-		if (new_fl == NULL)
-			goto out;
-		error = 0;
+		if (!new_fl)
+			return -ENOMEM;
 	}
 
+	lock_flocks();
+	if (request->fl_flags & FL_ACCESS)
+		goto find_conflict;
+
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -767,8 +781,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	 * If a higher-priority process was blocked on the old file lock,
 	 * give it the opportunity to lock the file.
 	 */
-	if (found)
+	if (found) {
+		unlock_flocks();
 		cond_resched();
+		lock_flocks();
+	}
 
 find_conflict:
 	for_each_lock(inode, before) {
@@ -794,7 +811,7 @@ find_conflict:
 	error = 0;
 
 out:
-	unlock_kernel();
+	unlock_flocks();
 	if (new_fl)
 		locks_free_lock(new_fl);
 	return error;
@@ -823,7 +840,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		new_fl2 = locks_alloc_lock();
 	}
 
-	lock_kernel();
+	lock_flocks();
 	if (request->fl_type != F_UNLCK) {
 		for_each_lock(inode, before) {
 			fl = *before;
@@ -991,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		locks_wake_up_blocks(left);
 	}
  out:
-	unlock_kernel();
+	unlock_flocks();
 	/*
 	 * Free any unused locks.
 	 */
@@ -1066,14 +1083,14 @@ int locks_mandatory_locked(struct inode *inode)
 	/*
 	 * Search the lock list for this inode for any POSIX locks.
 	 */
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!IS_POSIX(fl))
 			continue;
 		if (fl->fl_owner != owner)
 			break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return fl ? -EAGAIN : 0;
 }
 
@@ -1186,7 +1203,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
 
 	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 
-	lock_kernel();
+	lock_flocks();
 
 	time_out_leases(inode);
 
@@ -1247,8 +1264,10 @@ restart:
 			break_time++;
 	}
 	locks_insert_block(flock, new_fl);
+	unlock_flocks();
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
+	lock_flocks();
 	__locks_delete_block(new_fl);
 	if (error >= 0) {
 		if (error == 0)
@@ -1263,7 +1282,7 @@ restart:
 	}
 
 out:
-	unlock_kernel();
+	unlock_flocks();
 	if (!IS_ERR(new_fl))
 		locks_free_lock(new_fl);
 	return error;
@@ -1319,7 +1338,7 @@ int fcntl_getlease(struct file *filp)
 	struct file_lock *fl;
 	int type = F_UNLCK;
 
-	lock_kernel();
+	lock_flocks();
 	time_out_leases(filp->f_path.dentry->d_inode);
 	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
@@ -1328,7 +1347,7 @@ int fcntl_getlease(struct file *filp)
 			break;
 		}
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return type;
 }
 
@@ -1341,7 +1360,7 @@ int fcntl_getlease(struct file *filp)
  *	The (input) flp->fl_lmops->fl_break function is required
  *	by break_lease().
  *
- *	Called with kernel lock held.
+ *	Called with file_lock_lock held.
  */
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 {
@@ -1436,7 +1455,15 @@ out:
 }
 EXPORT_SYMBOL(generic_setlease);
 
- /**
+static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+{
+	if (filp->f_op && filp->f_op->setlease)
+		return filp->f_op->setlease(filp, arg, lease);
+	else
+		return generic_setlease(filp, arg, lease);
+}
+
+/**
  *	vfs_setlease        -       sets a lease on an open file
  *	@filp: file pointer
  *	@arg: type of lease to obtain
@@ -1467,12 +1494,9 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 {
 	int error;
 
-	lock_kernel();
-	if (filp->f_op && filp->f_op->setlease)
-		error = filp->f_op->setlease(filp, arg, lease);
-	else
-		error = generic_setlease(filp, arg, lease);
-	unlock_kernel();
+	lock_flocks();
+	error = __vfs_setlease(filp, arg, lease);
+	unlock_flocks();
 
 	return error;
 }
@@ -1499,9 +1523,9 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	if (error)
 		return error;
 
-	lock_kernel();
+	lock_flocks();
 
-	error = vfs_setlease(filp, arg, &flp);
+	error = __vfs_setlease(filp, arg, &flp);
 	if (error || arg == F_UNLCK)
 		goto out_unlock;
 
@@ -1516,7 +1540,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
-	unlock_kernel();
+	unlock_flocks();
 	return error;
 }
 
@@ -2020,7 +2044,7 @@ void locks_remove_flock(struct file *filp)
 			fl.fl_ops->fl_release_private(&fl);
 	}
 
-	lock_kernel();
+	lock_flocks();
 	before = &inode->i_flock;
 
 	while ((fl = *before) != NULL) {
@@ -2038,7 +2062,7 @@ void locks_remove_flock(struct file *filp)
  		}
 		before = &fl->fl_next;
 	}
-	unlock_kernel();
+	unlock_flocks();
 }
 
 /**
@@ -2053,12 +2077,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
 {
 	int status = 0;
 
-	lock_kernel();
+	lock_flocks();
 	if (waiter->fl_next)
 		__locks_delete_block(waiter);
 	else
 		status = -ENOENT;
-	unlock_kernel();
+	unlock_flocks();
 	return status;
 }
 
@@ -2172,7 +2196,7 @@ static int locks_show(struct seq_file *f, void *v)
 
 static void *locks_start(struct seq_file *f, loff_t *pos)
 {
-	lock_kernel();
+	lock_flocks();
 	f->private = (void *)1;
 	return seq_list_start(&file_lock_list, *pos);
 }
@@ -2184,7 +2208,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
 
 static void locks_stop(struct seq_file *f, void *v)
 {
-	unlock_kernel();
+	unlock_flocks();
 }
 
 static const struct seq_operations locks_seq_operations = {
@@ -2231,7 +2255,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if (fl->fl_type == F_RDLCK)
@@ -2248,7 +2272,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return result;
 }
 
@@ -2271,7 +2295,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
@@ -2286,7 +2310,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	unlock_flocks();
 	return result;
 }
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index b9c3c43cea1d..232a7eead33a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -71,20 +71,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
 	if (inode->i_flock == NULL)
 		goto out;
 
-	/* Protect inode->i_flock using the BKL */
-	lock_kernel();
+	/* Protect inode->i_flock using the file locks lock */
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
 		if (nfs_file_open_context(fl->fl_file) != ctx)
 			continue;
-		unlock_kernel();
+		unlock_flocks();
 		status = nfs4_lock_delegation_recall(state, fl);
 		if (status < 0)
 			goto out;
-		lock_kernel();
+		lock_flocks();
 	}
-	unlock_kernel();
+	unlock_flocks();
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e2f19b04c06..96524c5dca6b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -40,7 +40,7 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
+#include <linux/fs.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
@@ -970,13 +970,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 	/* Guard against delegation returns and new lock/unlock calls */
 	down_write(&nfsi->rwsem);
 	/* Protect inode->i_flock using the BKL */
-	lock_kernel();
+	lock_flocks();
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
 			continue;
 		if (nfs_file_open_context(fl->fl_file)->state != state)
 			continue;
-		unlock_kernel();
+		unlock_flocks();
 		status = ops->recover_lock(state, fl);
 		switch (status) {
 			case 0:
@@ -1003,9 +1003,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 				status = 0;
 		}
-		lock_kernel();
+		lock_flocks();
 	}
-	unlock_kernel();
+	unlock_flocks();
 out:
 	up_write(&nfsi->rwsem);
 	return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cf0d2ffb3c84..a7292fcf7718 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -33,7 +33,7 @@
 */
 
 #include <linux/file.h>
-#include <linux/smp_lock.h>
+#include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/swap.h>
@@ -3895,7 +3895,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner)
 	struct inode *inode = filp->fi_inode;
 	int status = 0;
 
-	lock_kernel();
+	lock_flocks();
 	for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
 		if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
 			status = 1;
@@ -3903,7 +3903,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner)
 		}
 	}
 out:
-	unlock_kernel();
+	unlock_flocks();
 	return status;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069bd80b7..180325268237 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1093,10 +1093,6 @@ struct file_lock {
 
 #include <linux/fcntl.h>
 
-/* temporary stubs for BKL removal */
-#define lock_flocks() lock_kernel()
-#define unlock_flocks() unlock_kernel()
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 #ifdef CONFIG_FILE_LOCKING
@@ -1135,6 +1131,8 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
+extern void lock_flocks(void);
+extern void unlock_flocks(void);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, struct flock __user *user)
 {
@@ -1277,6 +1275,14 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 	return 1;
 }
 
+static inline void lock_flocks(void)
+{
+}
+
+static inline void unlock_flocks(void)
+{
+}
+
 #endif /* !CONFIG_FILE_LOCKING */
 
 
-- 
cgit v1.2.3


From d74310d3b18aabbb7d0549ea9e3fd3259c1dce00 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 5 Oct 2010 18:13:23 +0900
Subject: sh: intc: Handle early lookups of subgroup IRQs.

If lookups happen while the radix node still points to a subgroup
mapping, an IRQ hasn't yet been made available for the specified id, so
error out accordingly. Once the slot is replaced with an IRQ mapping and
the tag is discarded, lookup can commence as normal.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 34 ++++++++++++++++++++++++----------
 include/linux/sh_intc.h |  2 +-
 2 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index c81fe23db7f7..d4325c70cf61 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -927,19 +927,35 @@ static unsigned int __init intc_sense_data(struct intc_desc *desc,
 	return 0;
 }
 
-unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id)
+#define INTC_TAG_VIRQ_NEEDS_ALLOC	0
+
+int intc_irq_lookup(const char *chipname, intc_enum enum_id)
 {
 	struct intc_map_entry *ptr;
 	struct intc_desc_int *d;
-	unsigned int irq = 0;
+	int irq = -1;
 
 	list_for_each_entry(d, &intc_list, list) {
-		if (strcmp(d->chip.name, chipname) == 0) {
-			ptr = radix_tree_lookup(&d->tree, enum_id);
-			if (ptr) {
-				irq = ptr - intc_irq_xlate;
-				break;
-			}
+		int tagged;
+
+		if (strcmp(d->chip.name, chipname) != 0)
+			continue;
+
+		/*
+		 * Catch early lookups for subgroup VIRQs that have not
+		 * yet been allocated an IRQ. This already includes a
+		 * fast-path out if the tree is untagged, so there is no
+		 * need to explicitly test the root tree.
+		 */
+		tagged = radix_tree_tag_get(&d->tree, enum_id,
+					    INTC_TAG_VIRQ_NEEDS_ALLOC);
+		if (unlikely(tagged))
+			break;
+
+		ptr = radix_tree_lookup(&d->tree, enum_id);
+		if (ptr) {
+			irq = ptr - intc_irq_xlate;
+			break;
 		}
 	}
 
@@ -1003,8 +1019,6 @@ static unsigned long __init intc_subgroup_data(struct intc_subgroup *subgroup,
 			0, 1, (subgroup->reg_width - 1) - index);
 }
 
-#define INTC_TAG_VIRQ_NEEDS_ALLOC	0
-
 static void __init intc_subgroup_init_one(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  struct intc_subgroup *subgroup)
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 04134a6c7b52..1fc69701e0f8 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -117,7 +117,7 @@ struct intc_desc symbol __initdata = {					\
 int __init register_intc_controller(struct intc_desc *desc);
 void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs);
 int intc_set_priority(unsigned int irq, unsigned int prio);
-unsigned int intc_irq_lookup(const char *chipname, intc_enum enum_id);
+int intc_irq_lookup(const char *chipname, intc_enum enum_id);
 void intc_finalize(void);
 
 #ifdef CONFIG_INTC_USERIMASK
-- 
cgit v1.2.3


From 2be6bb0c79c7fbda3425b65ee51c558bbaf4cf91 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 5 Oct 2010 22:10:30 +0900
Subject: sh: intc: Split up the INTC code.

This splits up the sh intc core in to something more vaguely resembling
a subsystem. Most of the functionality was alread fairly well
compartmentalized, and there were only a handful of interdependencies
that needed to be resolved in the process.

This also serves as future-proofing for the genirq and sparseirq rework,
which will make some of the split out functionality wholly generic,
allowing things to be killed off in place with minimal migration pain.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/Kconfig             |   34 +-
 drivers/sh/Makefile            |    2 +-
 drivers/sh/intc.c              | 1776 ----------------------------------------
 drivers/sh/intc/access.c       |  237 ++++++
 drivers/sh/intc/balancing.c    |   97 +++
 drivers/sh/intc/chip.c         |  215 +++++
 drivers/sh/intc/core.c         |  469 +++++++++++
 drivers/sh/intc/dynamic.c      |  135 +++
 drivers/sh/intc/handle.c       |  307 +++++++
 drivers/sh/intc/internals.h    |  185 +++++
 drivers/sh/intc/userimask.c    |   83 ++
 drivers/sh/intc/virq-debugfs.c |   64 ++
 drivers/sh/intc/virq.c         |  255 ++++++
 include/linux/sh_intc.h        |    2 +-
 14 files changed, 2052 insertions(+), 1809 deletions(-)
 delete mode 100644 drivers/sh/intc.c
 create mode 100644 drivers/sh/intc/access.c
 create mode 100644 drivers/sh/intc/balancing.c
 create mode 100644 drivers/sh/intc/chip.c
 create mode 100644 drivers/sh/intc/core.c
 create mode 100644 drivers/sh/intc/dynamic.c
 create mode 100644 drivers/sh/intc/handle.c
 create mode 100644 drivers/sh/intc/internals.h
 create mode 100644 drivers/sh/intc/userimask.c
 create mode 100644 drivers/sh/intc/virq-debugfs.c
 create mode 100644 drivers/sh/intc/virq.c

(limited to 'include')

diff --git a/drivers/sh/Kconfig b/drivers/sh/Kconfig
index e01ae42774af..f168a6159961 100644
--- a/drivers/sh/Kconfig
+++ b/drivers/sh/Kconfig
@@ -1,33 +1,5 @@
-config INTC_USERIMASK
-	bool "Userspace interrupt masking support"
-	depends on ARCH_SHMOBILE || (SUPERH && CPU_SH4A)
-	help
-	  This enables support for hardware-assisted userspace hardirq
-	  masking.
+menu "SuperH / SH-Mobile Driver Options"
 
-	  SH-4A and newer interrupt blocks all support a special shadowed
-	  page with all non-masking registers obscured when mapped in to
-	  userspace. This is primarily for use by userspace device
-	  drivers that are using special priority levels.
+source "drivers/sh/intc/Kconfig"
 
-	  If in doubt, say N.
-
-config INTC_BALANCING
-	bool "Hardware IRQ balancing support"
-	depends on SMP && SUPERH && CPU_SHX3
-	help
-	  This enables support for IRQ auto-distribution mode on SH-X3
-	  SMP parts. All of the balancing and CPU wakeup decisions are
-	  taken care of automatically by hardware for distributed
-	  vectors.
-
-	  If in doubt, say N.
-
-config INTC_MAPPING_DEBUG
-	bool "Expose IRQ to per-controller id mapping via debugfs"
-	depends on DEBUG_FS
-	help
-	  This will create a debugfs entry for showing the relationship
-	  between system IRQs and the per-controller id tables.
-
-	  If in doubt, say N.
+endmenu
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 08fc653a825c..50dd5a65f72e 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the SuperH specific drivers.
 #
-obj-y	:= clk.o intc.o
+obj-y	:= clk.o intc/
 
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
deleted file mode 100644
index d4325c70cf61..000000000000
--- a/drivers/sh/intc.c
+++ /dev/null
@@ -1,1776 +0,0 @@
-/*
- * Shared interrupt handling code for IPR and INTC2 types of IRQs.
- *
- * Copyright (C) 2007, 2008 Magnus Damm
- * Copyright (C) 2009, 2010 Paul Mundt
- *
- * Based on intc2.c and ipr.c
- *
- * Copyright (C) 1999  Niibe Yutaka & Takeshi Yaegashi
- * Copyright (C) 2000  Kazumoto Kojima
- * Copyright (C) 2001  David J. Mckay (david.mckay@st.com)
- * Copyright (C) 2003  Takashi Kusuda <kusuda-takashi@hitachi-ul.co.jp>
- * Copyright (C) 2005, 2006  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/sh_intc.h>
-#include <linux/sysdev.h>
-#include <linux/list.h>
-#include <linux/topology.h>
-#include <linux/bitmap.h>
-#include <linux/cpumask.h>
-#include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/radix-tree.h>
-#include <linux/mutex.h>
-#include <linux/rcupdate.h>
-#include <asm/sizes.h>
-
-#define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
-	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
-	 ((addr_e) << 16) | ((addr_d << 24)))
-
-#define _INTC_SHIFT(h) (h & 0x1f)
-#define _INTC_WIDTH(h) ((h >> 5) & 0xf)
-#define _INTC_FN(h) ((h >> 9) & 0xf)
-#define _INTC_MODE(h) ((h >> 13) & 0x7)
-#define _INTC_ADDR_E(h) ((h >> 16) & 0xff)
-#define _INTC_ADDR_D(h) ((h >> 24) & 0xff)
-
-struct intc_handle_int {
-	unsigned int irq;
-	unsigned long handle;
-};
-
-struct intc_window {
-	phys_addr_t phys;
-	void __iomem *virt;
-	unsigned long size;
-};
-
-struct intc_map_entry {
-	intc_enum enum_id;
-	struct intc_desc_int *desc;
-};
-
-struct intc_subgroup_entry {
-	unsigned int pirq;
-	intc_enum enum_id;
-	unsigned long handle;
-};
-
-struct intc_desc_int {
-	struct list_head list;
-	struct sys_device sysdev;
-	struct radix_tree_root tree;
-	pm_message_t state;
-	spinlock_t lock;
-	unsigned int index;
-	unsigned long *reg;
-#ifdef CONFIG_SMP
-	unsigned long *smp;
-#endif
-	unsigned int nr_reg;
-	struct intc_handle_int *prio;
-	unsigned int nr_prio;
-	struct intc_handle_int *sense;
-	unsigned int nr_sense;
-	struct intc_window *window;
-	unsigned int nr_windows;
-	struct irq_chip chip;
-};
-
-static LIST_HEAD(intc_list);
-static unsigned int nr_intc_controllers;
-
-/*
- * The intc_irq_map provides a global map of bound IRQ vectors for a
- * given platform. Allocation of IRQs are either static through the CPU
- * vector map, or dynamic in the case of board mux vectors or MSI.
- *
- * As this is a central point for all IRQ controllers on the system,
- * each of the available sources are mapped out here. This combined with
- * sparseirq makes it quite trivial to keep the vector map tightly packed
- * when dynamically creating IRQs, as well as tying in to otherwise
- * unused irq_desc positions in the sparse array.
- */
-static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
-static struct intc_map_entry intc_irq_xlate[NR_IRQS];
-static DEFINE_SPINLOCK(vector_lock);
-static DEFINE_SPINLOCK(xlate_lock);
-
-#ifdef CONFIG_SMP
-#define IS_SMP(x) x.smp
-#define INTC_REG(d, x, c) (d->reg[(x)] + ((d->smp[(x)] & 0xff) * c))
-#define SMP_NR(d, x) ((d->smp[(x)] >> 8) ? (d->smp[(x)] >> 8) : 1)
-#else
-#define IS_SMP(x) 0
-#define INTC_REG(d, x, c) (d->reg[(x)])
-#define SMP_NR(d, x) 1
-#endif
-
-static unsigned int intc_prio_level[NR_IRQS];	/* for now */
-static unsigned int default_prio_level = 2;	/* 2 - 16 */
-static unsigned long ack_handle[NR_IRQS];
-#ifdef CONFIG_INTC_BALANCING
-static unsigned long dist_handle[NR_IRQS];
-#endif
-
-struct intc_virq_list {
-	unsigned int irq;
-	struct intc_virq_list *next;
-};
-
-#define for_each_virq(entry, head) \
-	for (entry = head; entry; entry = entry->next)
-
-static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
-{
-	struct irq_chip *chip = get_irq_chip(irq);
-
-	return container_of(chip, struct intc_desc_int, chip);
-}
-
-static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
-{
-	generic_handle_irq((unsigned int)get_irq_data(irq));
-}
-
-static inline void activate_irq(int irq)
-{
-#ifdef CONFIG_ARM
-	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
-	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
-	 */
-	set_irq_flags(irq, IRQF_VALID);
-#else
-	/* same effect on other architectures */
-	set_irq_noprobe(irq);
-#endif
-}
-
-static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
-				       unsigned long address)
-{
-	struct intc_window *window;
-	int k;
-
-	/* scan through physical windows and convert address */
-	for (k = 0; k < d->nr_windows; k++) {
-		window = d->window + k;
-
-		if (address < window->phys)
-			continue;
-
-		if (address >= (window->phys + window->size))
-			continue;
-
-		address -= window->phys;
-		address += (unsigned long)window->virt;
-
-		return address;
-	}
-
-	/* no windows defined, register must be 1:1 mapped virt:phys */
-	return address;
-}
-
-static unsigned int intc_get_reg(struct intc_desc_int *d, unsigned long address)
-{
-	unsigned int k;
-
-	address = intc_phys_to_virt(d, address);
-
-	for (k = 0; k < d->nr_reg; k++) {
-		if (d->reg[k] == address)
-			return k;
-	}
-
-	BUG();
-	return 0;
-}
-
-static inline unsigned int set_field(unsigned int value,
-				     unsigned int field_value,
-				     unsigned int handle)
-{
-	unsigned int width = _INTC_WIDTH(handle);
-	unsigned int shift = _INTC_SHIFT(handle);
-
-	value &= ~(((1 << width) - 1) << shift);
-	value |= field_value << shift;
-	return value;
-}
-
-static inline unsigned long get_field(unsigned int value, unsigned int handle)
-{
-	unsigned int width = _INTC_WIDTH(handle);
-	unsigned int shift = _INTC_SHIFT(handle);
-	unsigned int mask = ((1 << width) - 1) << shift;
-
-	return (value & mask) >> shift;
-}
-
-static unsigned long test_8(unsigned long addr, unsigned long h,
-			    unsigned long ignore)
-{
-	return get_field(__raw_readb(addr), h);
-}
-
-static unsigned long test_16(unsigned long addr, unsigned long h,
-			     unsigned long ignore)
-{
-	return get_field(__raw_readw(addr), h);
-}
-
-static unsigned long test_32(unsigned long addr, unsigned long h,
-			     unsigned long ignore)
-{
-	return get_field(__raw_readl(addr), h);
-}
-
-static unsigned long write_8(unsigned long addr, unsigned long h,
-			     unsigned long data)
-{
-	__raw_writeb(set_field(0, data, h), addr);
-	(void)__raw_readb(addr);	/* Defeat write posting */
-	return 0;
-}
-
-static unsigned long write_16(unsigned long addr, unsigned long h,
-			      unsigned long data)
-{
-	__raw_writew(set_field(0, data, h), addr);
-	(void)__raw_readw(addr);	/* Defeat write posting */
-	return 0;
-}
-
-static unsigned long write_32(unsigned long addr, unsigned long h,
-			      unsigned long data)
-{
-	__raw_writel(set_field(0, data, h), addr);
-	(void)__raw_readl(addr);	/* Defeat write posting */
-	return 0;
-}
-
-static unsigned long modify_8(unsigned long addr, unsigned long h,
-			      unsigned long data)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__raw_writeb(set_field(__raw_readb(addr), data, h), addr);
-	(void)__raw_readb(addr);	/* Defeat write posting */
-	local_irq_restore(flags);
-	return 0;
-}
-
-static unsigned long modify_16(unsigned long addr, unsigned long h,
-			       unsigned long data)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__raw_writew(set_field(__raw_readw(addr), data, h), addr);
-	(void)__raw_readw(addr);	/* Defeat write posting */
-	local_irq_restore(flags);
-	return 0;
-}
-
-static unsigned long modify_32(unsigned long addr, unsigned long h,
-			       unsigned long data)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__raw_writel(set_field(__raw_readl(addr), data, h), addr);
-	(void)__raw_readl(addr);	/* Defeat write posting */
-	local_irq_restore(flags);
-	return 0;
-}
-
-enum {
-	REG_FN_ERR = 0,
-	REG_FN_TEST_BASE = 1,
-	REG_FN_WRITE_BASE = 5,
-	REG_FN_MODIFY_BASE = 9
-};
-
-static unsigned long (*intc_reg_fns[])(unsigned long addr,
-				       unsigned long h,
-				       unsigned long data) = {
-	[REG_FN_TEST_BASE + 0] = test_8,
-	[REG_FN_TEST_BASE + 1] = test_16,
-	[REG_FN_TEST_BASE + 3] = test_32,
-	[REG_FN_WRITE_BASE + 0] = write_8,
-	[REG_FN_WRITE_BASE + 1] = write_16,
-	[REG_FN_WRITE_BASE + 3] = write_32,
-	[REG_FN_MODIFY_BASE + 0] = modify_8,
-	[REG_FN_MODIFY_BASE + 1] = modify_16,
-	[REG_FN_MODIFY_BASE + 3] = modify_32,
-};
-
-enum {	MODE_ENABLE_REG = 0, /* Bit(s) set -> interrupt enabled */
-	MODE_MASK_REG,       /* Bit(s) set -> interrupt disabled */
-	MODE_DUAL_REG,       /* Two registers, set bit to enable / disable */
-	MODE_PRIO_REG,       /* Priority value written to enable interrupt */
-	MODE_PCLR_REG,       /* Above plus all bits set to disable interrupt */
-};
-
-static unsigned long intc_mode_field(unsigned long addr,
-				     unsigned long handle,
-				     unsigned long (*fn)(unsigned long,
-						unsigned long,
-						unsigned long),
-				     unsigned int irq)
-{
-	return fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1));
-}
-
-static unsigned long intc_mode_zero(unsigned long addr,
-				    unsigned long handle,
-				    unsigned long (*fn)(unsigned long,
-					       unsigned long,
-					       unsigned long),
-				    unsigned int irq)
-{
-	return fn(addr, handle, 0);
-}
-
-static unsigned long intc_mode_prio(unsigned long addr,
-				    unsigned long handle,
-				    unsigned long (*fn)(unsigned long,
-					       unsigned long,
-					       unsigned long),
-				    unsigned int irq)
-{
-	return fn(addr, handle, intc_prio_level[irq]);
-}
-
-static unsigned long (*intc_enable_fns[])(unsigned long addr,
-					  unsigned long handle,
-					  unsigned long (*fn)(unsigned long,
-						    unsigned long,
-						    unsigned long),
-					  unsigned int irq) = {
-	[MODE_ENABLE_REG] = intc_mode_field,
-	[MODE_MASK_REG] = intc_mode_zero,
-	[MODE_DUAL_REG] = intc_mode_field,
-	[MODE_PRIO_REG] = intc_mode_prio,
-	[MODE_PCLR_REG] = intc_mode_prio,
-};
-
-static unsigned long (*intc_disable_fns[])(unsigned long addr,
-				  unsigned long handle,
-				  unsigned long (*fn)(unsigned long,
-					     unsigned long,
-					     unsigned long),
-				  unsigned int irq) = {
-	[MODE_ENABLE_REG] = intc_mode_zero,
-	[MODE_MASK_REG] = intc_mode_field,
-	[MODE_DUAL_REG] = intc_mode_field,
-	[MODE_PRIO_REG] = intc_mode_zero,
-	[MODE_PCLR_REG] = intc_mode_field,
-};
-
-#ifdef CONFIG_INTC_BALANCING
-static inline void intc_balancing_enable(unsigned int irq)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = dist_handle[irq];
-	unsigned long addr;
-
-	if (irq_balancing_disabled(irq) || !handle)
-		return;
-
-	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
-	intc_reg_fns[_INTC_FN(handle)](addr, handle, 1);
-}
-
-static inline void intc_balancing_disable(unsigned int irq)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = dist_handle[irq];
-	unsigned long addr;
-
-	if (irq_balancing_disabled(irq) || !handle)
-		return;
-
-	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
-	intc_reg_fns[_INTC_FN(handle)](addr, handle, 0);
-}
-
-static unsigned int intc_dist_data(struct intc_desc *desc,
-				   struct intc_desc_int *d,
-				   intc_enum enum_id)
-{
-	struct intc_mask_reg *mr = desc->hw.mask_regs;
-	unsigned int i, j, fn, mode;
-	unsigned long reg_e, reg_d;
-
-	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
-		mr = desc->hw.mask_regs + i;
-
-		/*
-		 * Skip this entry if there's no auto-distribution
-		 * register associated with it.
-		 */
-		if (!mr->dist_reg)
-			continue;
-
-		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
-			if (mr->enum_ids[j] != enum_id)
-				continue;
-
-			fn = REG_FN_MODIFY_BASE;
-			mode = MODE_ENABLE_REG;
-			reg_e = mr->dist_reg;
-			reg_d = mr->dist_reg;
-
-			fn += (mr->reg_width >> 3) - 1;
-			return _INTC_MK(fn, mode,
-					intc_get_reg(d, reg_e),
-					intc_get_reg(d, reg_d),
-					1,
-					(mr->reg_width - 1) - j);
-		}
-	}
-
-	/*
-	 * It's possible we've gotten here with no distribution options
-	 * available for the IRQ in question, so we just skip over those.
-	 */
-	return 0;
-}
-#else
-static inline void intc_balancing_enable(unsigned int irq)
-{
-}
-
-static inline void intc_balancing_disable(unsigned int irq)
-{
-}
-#endif
-
-static inline void _intc_enable(unsigned int irq, unsigned long handle)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long addr;
-	unsigned int cpu;
-
-	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) {
-#ifdef CONFIG_SMP
-		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
-			continue;
-#endif
-		addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu);
-		intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\
-						    [_INTC_FN(handle)], irq);
-	}
-
-	intc_balancing_enable(irq);
-}
-
-static void intc_enable(unsigned int irq)
-{
-	_intc_enable(irq, (unsigned long)get_irq_chip_data(irq));
-}
-
-static void intc_disable(unsigned int irq)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = (unsigned long)get_irq_chip_data(irq);
-	unsigned long addr;
-	unsigned int cpu;
-
-	intc_balancing_disable(irq);
-
-	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
-#ifdef CONFIG_SMP
-		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
-			continue;
-#endif
-		addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu);
-		intc_disable_fns[_INTC_MODE(handle)](addr, handle,intc_reg_fns\
-						     [_INTC_FN(handle)], irq);
-	}
-}
-
-static unsigned long
-(*intc_enable_noprio_fns[])(unsigned long addr,
-			    unsigned long handle,
-			    unsigned long (*fn)(unsigned long,
-					unsigned long,
-					unsigned long),
-			    unsigned int irq) = {
-	[MODE_ENABLE_REG] = intc_mode_field,
-	[MODE_MASK_REG] = intc_mode_zero,
-	[MODE_DUAL_REG] = intc_mode_field,
-	[MODE_PRIO_REG] = intc_mode_field,
-	[MODE_PCLR_REG] = intc_mode_field,
-};
-
-static void intc_enable_disable(struct intc_desc_int *d,
-				unsigned long handle, int do_enable)
-{
-	unsigned long addr;
-	unsigned int cpu;
-	unsigned long (*fn)(unsigned long, unsigned long,
-		   unsigned long (*)(unsigned long, unsigned long,
-				     unsigned long),
-		   unsigned int);
-
-	if (do_enable) {
-		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) {
-			addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu);
-			fn = intc_enable_noprio_fns[_INTC_MODE(handle)];
-			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
-		}
-	} else {
-		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
-			addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu);
-			fn = intc_disable_fns[_INTC_MODE(handle)];
-			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
-		}
-	}
-}
-
-static int intc_set_wake(unsigned int irq, unsigned int on)
-{
-	return 0; /* allow wakeup, but setup hardware in intc_suspend() */
-}
-
-#ifdef CONFIG_SMP
-/*
- * This is held with the irq desc lock held, so we don't require any
- * additional locking here at the intc desc level. The affinity mask is
- * later tested in the enable/disable paths.
- */
-static int intc_set_affinity(unsigned int irq, const struct cpumask *cpumask)
-{
-	if (!cpumask_intersects(cpumask, cpu_online_mask))
-		return -1;
-
-	cpumask_copy(irq_to_desc(irq)->affinity, cpumask);
-
-	return 0;
-}
-#endif
-
-static void intc_mask_ack(unsigned int irq)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = ack_handle[irq];
-	unsigned long addr;
-
-	intc_disable(irq);
-
-	/* read register and write zero only to the associated bit */
-	if (handle) {
-		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
-		switch (_INTC_FN(handle)) {
-		case REG_FN_MODIFY_BASE + 0:	/* 8bit */
-			__raw_readb(addr);
-			__raw_writeb(0xff ^ set_field(0, 1, handle), addr);
-			break;
-		case REG_FN_MODIFY_BASE + 1:	/* 16bit */
-			__raw_readw(addr);
-			__raw_writew(0xffff ^ set_field(0, 1, handle), addr);
-			break;
-		case REG_FN_MODIFY_BASE + 3:	/* 32bit */
-			__raw_readl(addr);
-			__raw_writel(0xffffffff ^ set_field(0, 1, handle), addr);
-			break;
-		default:
-			BUG();
-			break;
-		}
-	}
-}
-
-static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
-					     unsigned int nr_hp,
-					     unsigned int irq)
-{
-	int i;
-
-	/*
-	 * this doesn't scale well, but...
-	 *
-	 * this function should only be used for cerain uncommon
-	 * operations such as intc_set_priority() and intc_set_sense()
-	 * and in those rare cases performance doesn't matter that much.
-	 * keeping the memory footprint low is more important.
-	 *
-	 * one rather simple way to speed this up and still keep the
-	 * memory footprint down is to make sure the array is sorted
-	 * and then perform a bisect to lookup the irq.
-	 */
-	for (i = 0; i < nr_hp; i++) {
-		if ((hp + i)->irq != irq)
-			continue;
-
-		return hp + i;
-	}
-
-	return NULL;
-}
-
-int intc_set_priority(unsigned int irq, unsigned int prio)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	struct intc_handle_int *ihp;
-
-	if (!intc_prio_level[irq] || prio <= 1)
-		return -EINVAL;
-
-	ihp = intc_find_irq(d->prio, d->nr_prio, irq);
-	if (ihp) {
-		if (prio >= (1 << _INTC_WIDTH(ihp->handle)))
-			return -EINVAL;
-
-		intc_prio_level[irq] = prio;
-
-		/*
-		 * only set secondary masking method directly
-		 * primary masking method is using intc_prio_level[irq]
-		 * priority level will be set during next enable()
-		 */
-		if (_INTC_FN(ihp->handle) != REG_FN_ERR)
-			_intc_enable(irq, ihp->handle);
-	}
-	return 0;
-}
-
-#define VALID(x) (x | 0x80)
-
-static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = {
-	[IRQ_TYPE_EDGE_FALLING] = VALID(0),
-	[IRQ_TYPE_EDGE_RISING] = VALID(1),
-	[IRQ_TYPE_LEVEL_LOW] = VALID(2),
-	/* SH7706, SH7707 and SH7709 do not support high level triggered */
-#if !defined(CONFIG_CPU_SUBTYPE_SH7706) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7707) && \
-    !defined(CONFIG_CPU_SUBTYPE_SH7709)
-	[IRQ_TYPE_LEVEL_HIGH] = VALID(3),
-#endif
-};
-
-static int intc_set_sense(unsigned int irq, unsigned int type)
-{
-	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned char value = intc_irq_sense_table[type & IRQ_TYPE_SENSE_MASK];
-	struct intc_handle_int *ihp;
-	unsigned long addr;
-
-	if (!value)
-		return -EINVAL;
-
-	ihp = intc_find_irq(d->sense, d->nr_sense, irq);
-	if (ihp) {
-		addr = INTC_REG(d, _INTC_ADDR_E(ihp->handle), 0);
-		intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle, value);
-	}
-	return 0;
-}
-
-static intc_enum __init intc_grp_id(struct intc_desc *desc,
-				    intc_enum enum_id)
-{
-	struct intc_group *g = desc->hw.groups;
-	unsigned int i, j;
-
-	for (i = 0; g && enum_id && i < desc->hw.nr_groups; i++) {
-		g = desc->hw.groups + i;
-
-		for (j = 0; g->enum_ids[j]; j++) {
-			if (g->enum_ids[j] != enum_id)
-				continue;
-
-			return g->enum_id;
-		}
-	}
-
-	return 0;
-}
-
-static unsigned int __init _intc_mask_data(struct intc_desc *desc,
-					   struct intc_desc_int *d,
-					   intc_enum enum_id,
-					   unsigned int *reg_idx,
-					   unsigned int *fld_idx)
-{
-	struct intc_mask_reg *mr = desc->hw.mask_regs;
-	unsigned int fn, mode;
-	unsigned long reg_e, reg_d;
-
-	while (mr && enum_id && *reg_idx < desc->hw.nr_mask_regs) {
-		mr = desc->hw.mask_regs + *reg_idx;
-
-		for (; *fld_idx < ARRAY_SIZE(mr->enum_ids); (*fld_idx)++) {
-			if (mr->enum_ids[*fld_idx] != enum_id)
-				continue;
-
-			if (mr->set_reg && mr->clr_reg) {
-				fn = REG_FN_WRITE_BASE;
-				mode = MODE_DUAL_REG;
-				reg_e = mr->clr_reg;
-				reg_d = mr->set_reg;
-			} else {
-				fn = REG_FN_MODIFY_BASE;
-				if (mr->set_reg) {
-					mode = MODE_ENABLE_REG;
-					reg_e = mr->set_reg;
-					reg_d = mr->set_reg;
-				} else {
-					mode = MODE_MASK_REG;
-					reg_e = mr->clr_reg;
-					reg_d = mr->clr_reg;
-				}
-			}
-
-			fn += (mr->reg_width >> 3) - 1;
-			return _INTC_MK(fn, mode,
-					intc_get_reg(d, reg_e),
-					intc_get_reg(d, reg_d),
-					1,
-					(mr->reg_width - 1) - *fld_idx);
-		}
-
-		*fld_idx = 0;
-		(*reg_idx)++;
-	}
-
-	return 0;
-}
-
-static unsigned int __init intc_mask_data(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  intc_enum enum_id, int do_grps)
-{
-	unsigned int i = 0;
-	unsigned int j = 0;
-	unsigned int ret;
-
-	ret = _intc_mask_data(desc, d, enum_id, &i, &j);
-	if (ret)
-		return ret;
-
-	if (do_grps)
-		return intc_mask_data(desc, d, intc_grp_id(desc, enum_id), 0);
-
-	return 0;
-}
-
-static unsigned int __init _intc_prio_data(struct intc_desc *desc,
-					   struct intc_desc_int *d,
-					   intc_enum enum_id,
-					   unsigned int *reg_idx,
-					   unsigned int *fld_idx)
-{
-	struct intc_prio_reg *pr = desc->hw.prio_regs;
-	unsigned int fn, n, mode, bit;
-	unsigned long reg_e, reg_d;
-
-	while (pr && enum_id && *reg_idx < desc->hw.nr_prio_regs) {
-		pr = desc->hw.prio_regs + *reg_idx;
-
-		for (; *fld_idx < ARRAY_SIZE(pr->enum_ids); (*fld_idx)++) {
-			if (pr->enum_ids[*fld_idx] != enum_id)
-				continue;
-
-			if (pr->set_reg && pr->clr_reg) {
-				fn = REG_FN_WRITE_BASE;
-				mode = MODE_PCLR_REG;
-				reg_e = pr->set_reg;
-				reg_d = pr->clr_reg;
-			} else {
-				fn = REG_FN_MODIFY_BASE;
-				mode = MODE_PRIO_REG;
-				if (!pr->set_reg)
-					BUG();
-				reg_e = pr->set_reg;
-				reg_d = pr->set_reg;
-			}
-
-			fn += (pr->reg_width >> 3) - 1;
-			n = *fld_idx + 1;
-
-			BUG_ON(n * pr->field_width > pr->reg_width);
-
-			bit = pr->reg_width - (n * pr->field_width);
-
-			return _INTC_MK(fn, mode,
-					intc_get_reg(d, reg_e),
-					intc_get_reg(d, reg_d),
-					pr->field_width, bit);
-		}
-
-		*fld_idx = 0;
-		(*reg_idx)++;
-	}
-
-	return 0;
-}
-
-static unsigned int __init intc_prio_data(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  intc_enum enum_id, int do_grps)
-{
-	unsigned int i = 0;
-	unsigned int j = 0;
-	unsigned int ret;
-
-	ret = _intc_prio_data(desc, d, enum_id, &i, &j);
-	if (ret)
-		return ret;
-
-	if (do_grps)
-		return intc_prio_data(desc, d, intc_grp_id(desc, enum_id), 0);
-
-	return 0;
-}
-
-static void __init intc_enable_disable_enum(struct intc_desc *desc,
-					    struct intc_desc_int *d,
-					    intc_enum enum_id, int enable)
-{
-	unsigned int i, j, data;
-
-	/* go through and enable/disable all mask bits */
-	i = j = 0;
-	do {
-		data = _intc_mask_data(desc, d, enum_id, &i, &j);
-		if (data)
-			intc_enable_disable(d, data, enable);
-		j++;
-	} while (data);
-
-	/* go through and enable/disable all priority fields */
-	i = j = 0;
-	do {
-		data = _intc_prio_data(desc, d, enum_id, &i, &j);
-		if (data)
-			intc_enable_disable(d, data, enable);
-
-		j++;
-	} while (data);
-}
-
-static unsigned int __init intc_ack_data(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  intc_enum enum_id)
-{
-	struct intc_mask_reg *mr = desc->hw.ack_regs;
-	unsigned int i, j, fn, mode;
-	unsigned long reg_e, reg_d;
-
-	for (i = 0; mr && enum_id && i < desc->hw.nr_ack_regs; i++) {
-		mr = desc->hw.ack_regs + i;
-
-		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
-			if (mr->enum_ids[j] != enum_id)
-				continue;
-
-			fn = REG_FN_MODIFY_BASE;
-			mode = MODE_ENABLE_REG;
-			reg_e = mr->set_reg;
-			reg_d = mr->set_reg;
-
-			fn += (mr->reg_width >> 3) - 1;
-			return _INTC_MK(fn, mode,
-					intc_get_reg(d, reg_e),
-					intc_get_reg(d, reg_d),
-					1,
-					(mr->reg_width - 1) - j);
-		}
-	}
-
-	return 0;
-}
-
-static unsigned int __init intc_sense_data(struct intc_desc *desc,
-					   struct intc_desc_int *d,
-					   intc_enum enum_id)
-{
-	struct intc_sense_reg *sr = desc->hw.sense_regs;
-	unsigned int i, j, fn, bit;
-
-	for (i = 0; sr && enum_id && i < desc->hw.nr_sense_regs; i++) {
-		sr = desc->hw.sense_regs + i;
-
-		for (j = 0; j < ARRAY_SIZE(sr->enum_ids); j++) {
-			if (sr->enum_ids[j] != enum_id)
-				continue;
-
-			fn = REG_FN_MODIFY_BASE;
-			fn += (sr->reg_width >> 3) - 1;
-
-			BUG_ON((j + 1) * sr->field_width > sr->reg_width);
-
-			bit = sr->reg_width - ((j + 1) * sr->field_width);
-
-			return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg),
-					0, sr->field_width, bit);
-		}
-	}
-
-	return 0;
-}
-
-#define INTC_TAG_VIRQ_NEEDS_ALLOC	0
-
-int intc_irq_lookup(const char *chipname, intc_enum enum_id)
-{
-	struct intc_map_entry *ptr;
-	struct intc_desc_int *d;
-	int irq = -1;
-
-	list_for_each_entry(d, &intc_list, list) {
-		int tagged;
-
-		if (strcmp(d->chip.name, chipname) != 0)
-			continue;
-
-		/*
-		 * Catch early lookups for subgroup VIRQs that have not
-		 * yet been allocated an IRQ. This already includes a
-		 * fast-path out if the tree is untagged, so there is no
-		 * need to explicitly test the root tree.
-		 */
-		tagged = radix_tree_tag_get(&d->tree, enum_id,
-					    INTC_TAG_VIRQ_NEEDS_ALLOC);
-		if (unlikely(tagged))
-			break;
-
-		ptr = radix_tree_lookup(&d->tree, enum_id);
-		if (ptr) {
-			irq = ptr - intc_irq_xlate;
-			break;
-		}
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(intc_irq_lookup);
-
-static int add_virq_to_pirq(unsigned int irq, unsigned int virq)
-{
-	struct intc_virq_list **last, *entry;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	/* scan for duplicates */
-	last = (struct intc_virq_list **)&desc->handler_data;
-	for_each_virq(entry, desc->handler_data) {
-		if (entry->irq == virq)
-			return 0;
-		last = &entry->next;
-	}
-
-	entry = kzalloc(sizeof(struct intc_virq_list), GFP_ATOMIC);
-	if (!entry) {
-		pr_err("can't allocate VIRQ mapping for %d\n", virq);
-		return -ENOMEM;
-	}
-
-	entry->irq = virq;
-
-	*last = entry;
-
-	return 0;
-}
-
-static void intc_virq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	struct intc_virq_list *entry, *vlist = get_irq_data(irq);
-	struct intc_desc_int *d = get_intc_desc(irq);
-
-	desc->chip->mask_ack(irq);
-
-	for_each_virq(entry, vlist) {
-		unsigned long addr, handle;
-
-		handle = (unsigned long)get_irq_data(entry->irq);
-		addr = INTC_REG(d, _INTC_ADDR_E(handle), 0);
-
-		if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0))
-			generic_handle_irq(entry->irq);
-	}
-
-	desc->chip->unmask(irq);
-}
-
-static unsigned long __init intc_subgroup_data(struct intc_subgroup *subgroup,
-					       struct intc_desc_int *d,
-					       unsigned int index)
-{
-	unsigned int fn = REG_FN_TEST_BASE + (subgroup->reg_width >> 3) - 1;
-
-	return _INTC_MK(fn, MODE_ENABLE_REG, intc_get_reg(d, subgroup->reg),
-			0, 1, (subgroup->reg_width - 1) - index);
-}
-
-static void __init intc_subgroup_init_one(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  struct intc_subgroup *subgroup)
-{
-	struct intc_map_entry *mapped;
-	unsigned int pirq;
-	unsigned long flags;
-	int i;
-
-	mapped = radix_tree_lookup(&d->tree, subgroup->parent_id);
-	if (!mapped) {
-		WARN_ON(1);
-		return;
-	}
-
-	pirq = mapped - intc_irq_xlate;
-
-	spin_lock_irqsave(&d->lock, flags);
-
-	for (i = 0; i < ARRAY_SIZE(subgroup->enum_ids); i++) {
-		struct intc_subgroup_entry *entry;
-		int err;
-
-		if (!subgroup->enum_ids[i])
-			continue;
-
-		entry = kmalloc(sizeof(*entry), GFP_NOWAIT);
-		if (!entry)
-			break;
-
-		entry->pirq = pirq;
-		entry->enum_id = subgroup->enum_ids[i];
-		entry->handle = intc_subgroup_data(subgroup, d, i);
-
-		err = radix_tree_insert(&d->tree, entry->enum_id, entry);
-		if (unlikely(err < 0))
-			break;
-
-		radix_tree_tag_set(&d->tree, entry->enum_id,
-				   INTC_TAG_VIRQ_NEEDS_ALLOC);
-	}
-
-	spin_unlock_irqrestore(&d->lock, flags);
-}
-
-static void __init intc_subgroup_init(struct intc_desc *desc,
-				      struct intc_desc_int *d)
-{
-	int i;
-
-	if (!desc->hw.subgroups)
-		return;
-
-	for (i = 0; i < desc->hw.nr_subgroups; i++)
-		intc_subgroup_init_one(desc, d, desc->hw.subgroups + i);
-}
-
-static void __init intc_subgroup_map(struct intc_desc_int *d)
-{
-	struct intc_subgroup_entry *entries[32];
-	unsigned long flags;
-	unsigned int nr_found;
-	int i;
-
-	spin_lock_irqsave(&d->lock, flags);
-
-restart:
-	nr_found = radix_tree_gang_lookup_tag_slot(&d->tree,
-			(void ***)entries, 0, ARRAY_SIZE(entries),
-			INTC_TAG_VIRQ_NEEDS_ALLOC);
-
-	for (i = 0; i < nr_found; i++) {
-		struct intc_subgroup_entry *entry;
-		int irq;
-
-		entry = radix_tree_deref_slot((void **)entries[i]);
-		if (unlikely(!entry))
-			continue;
-		if (unlikely(entry == RADIX_TREE_RETRY))
-			goto restart;
-
-		irq = create_irq();
-		if (unlikely(irq < 0)) {
-			pr_err("no more free IRQs, bailing..\n");
-			break;
-		}
-
-		pr_info("Setting up a chained VIRQ from %d -> %d\n",
-			irq, entry->pirq);
-
-		spin_lock(&xlate_lock);
-		intc_irq_xlate[irq].desc = d;
-		intc_irq_xlate[irq].enum_id = entry->enum_id;
-		spin_unlock(&xlate_lock);
-
-		set_irq_chip_and_handler_name(irq, get_irq_chip(entry->pirq),
-					      handle_simple_irq, "virq");
-		set_irq_chip_data(irq, get_irq_chip_data(entry->pirq));
-
-		set_irq_data(irq, (void *)entry->handle);
-
-		set_irq_chained_handler(entry->pirq, intc_virq_handler);
-		add_virq_to_pirq(entry->pirq, irq);
-
-		radix_tree_tag_clear(&d->tree, entry->enum_id,
-				     INTC_TAG_VIRQ_NEEDS_ALLOC);
-		radix_tree_replace_slot((void **)entries[i],
-					&intc_irq_xlate[irq]);
-	}
-
-	spin_unlock_irqrestore(&d->lock, flags);
-}
-
-void __init intc_finalize(void)
-{
-	struct intc_desc_int *d;
-
-	list_for_each_entry(d, &intc_list, list)
-		if (radix_tree_tagged(&d->tree, INTC_TAG_VIRQ_NEEDS_ALLOC))
-			intc_subgroup_map(d);
-}
-
-static void __init intc_register_irq(struct intc_desc *desc,
-				     struct intc_desc_int *d,
-				     intc_enum enum_id,
-				     unsigned int irq)
-{
-	struct intc_handle_int *hp;
-	unsigned int data[2], primary;
-	unsigned long flags;
-
-	/*
-	 * Register the IRQ position with the global IRQ map, then insert
-	 * it in to the radix tree.
-	 */
-	set_bit(irq, intc_irq_map);
-
-	spin_lock_irqsave(&xlate_lock, flags);
-	radix_tree_insert(&d->tree, enum_id, &intc_irq_xlate[irq]);
-	spin_unlock_irqrestore(&xlate_lock, flags);
-
-	/*
-	 * Prefer single interrupt source bitmap over other combinations:
-	 *
-	 * 1. bitmap, single interrupt source
-	 * 2. priority, single interrupt source
-	 * 3. bitmap, multiple interrupt sources (groups)
-	 * 4. priority, multiple interrupt sources (groups)
-	 */
-	data[0] = intc_mask_data(desc, d, enum_id, 0);
-	data[1] = intc_prio_data(desc, d, enum_id, 0);
-
-	primary = 0;
-	if (!data[0] && data[1])
-		primary = 1;
-
-	if (!data[0] && !data[1])
-		pr_warning("missing unique irq mask for irq %d (vect 0x%04x)\n",
-			   irq, irq2evt(irq));
-
-	data[0] = data[0] ? data[0] : intc_mask_data(desc, d, enum_id, 1);
-	data[1] = data[1] ? data[1] : intc_prio_data(desc, d, enum_id, 1);
-
-	if (!data[primary])
-		primary ^= 1;
-
-	BUG_ON(!data[primary]); /* must have primary masking method */
-
-	disable_irq_nosync(irq);
-	set_irq_chip_and_handler_name(irq, &d->chip,
-				      handle_level_irq, "level");
-	set_irq_chip_data(irq, (void *)data[primary]);
-
-	/*
-	 * set priority level
-	 * - this needs to be at least 2 for 5-bit priorities on 7780
-	 */
-	intc_prio_level[irq] = default_prio_level;
-
-	/* enable secondary masking method if present */
-	if (data[!primary])
-		_intc_enable(irq, data[!primary]);
-
-	/* add irq to d->prio list if priority is available */
-	if (data[1]) {
-		hp = d->prio + d->nr_prio;
-		hp->irq = irq;
-		hp->handle = data[1];
-
-		if (primary) {
-			/*
-			 * only secondary priority should access registers, so
-			 * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority()
-			 */
-			hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0);
-			hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0);
-		}
-		d->nr_prio++;
-	}
-
-	/* add irq to d->sense list if sense is available */
-	data[0] = intc_sense_data(desc, d, enum_id);
-	if (data[0]) {
-		(d->sense + d->nr_sense)->irq = irq;
-		(d->sense + d->nr_sense)->handle = data[0];
-		d->nr_sense++;
-	}
-
-	/* irq should be disabled by default */
-	d->chip.mask(irq);
-
-	if (desc->hw.ack_regs)
-		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
-
-#ifdef CONFIG_INTC_BALANCING
-	if (desc->hw.mask_regs)
-		dist_handle[irq] = intc_dist_data(desc, d, enum_id);
-#endif
-
-	activate_irq(irq);
-}
-
-static unsigned int __init save_reg(struct intc_desc_int *d,
-				    unsigned int cnt,
-				    unsigned long value,
-				    unsigned int smp)
-{
-	if (value) {
-		value = intc_phys_to_virt(d, value);
-
-		d->reg[cnt] = value;
-#ifdef CONFIG_SMP
-		d->smp[cnt] = smp;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-int __init register_intc_controller(struct intc_desc *desc)
-{
-	unsigned int i, k, smp;
-	struct intc_hw_desc *hw = &desc->hw;
-	struct intc_desc_int *d;
-	struct resource *res;
-
-	pr_info("Registered controller '%s' with %u IRQs\n",
-		desc->name, hw->nr_vectors);
-
-	d = kzalloc(sizeof(*d), GFP_NOWAIT);
-	if (!d)
-		goto err0;
-
-	INIT_LIST_HEAD(&d->list);
-	list_add_tail(&d->list, &intc_list);
-
-	spin_lock_init(&d->lock);
-
-	d->index = nr_intc_controllers;
-
-	if (desc->num_resources) {
-		d->nr_windows = desc->num_resources;
-		d->window = kzalloc(d->nr_windows * sizeof(*d->window),
-				    GFP_NOWAIT);
-		if (!d->window)
-			goto err1;
-
-		for (k = 0; k < d->nr_windows; k++) {
-			res = desc->resource + k;
-			WARN_ON(resource_type(res) != IORESOURCE_MEM);
-			d->window[k].phys = res->start;
-			d->window[k].size = resource_size(res);
-			d->window[k].virt = ioremap_nocache(res->start,
-							 resource_size(res));
-			if (!d->window[k].virt)
-				goto err2;
-		}
-	}
-
-	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
-#ifdef CONFIG_INTC_BALANCING
-	if (d->nr_reg)
-		d->nr_reg += hw->nr_mask_regs;
-#endif
-	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
-	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
-	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
-	d->nr_reg += hw->subgroups ? hw->nr_subgroups : 0;
-
-	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
-	if (!d->reg)
-		goto err2;
-
-#ifdef CONFIG_SMP
-	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
-	if (!d->smp)
-		goto err3;
-#endif
-	k = 0;
-
-	if (hw->mask_regs) {
-		for (i = 0; i < hw->nr_mask_regs; i++) {
-			smp = IS_SMP(hw->mask_regs[i]);
-			k += save_reg(d, k, hw->mask_regs[i].set_reg, smp);
-			k += save_reg(d, k, hw->mask_regs[i].clr_reg, smp);
-#ifdef CONFIG_INTC_BALANCING
-			k += save_reg(d, k, hw->mask_regs[i].dist_reg, 0);
-#endif
-		}
-	}
-
-	if (hw->prio_regs) {
-		d->prio = kzalloc(hw->nr_vectors * sizeof(*d->prio),
-				  GFP_NOWAIT);
-		if (!d->prio)
-			goto err4;
-
-		for (i = 0; i < hw->nr_prio_regs; i++) {
-			smp = IS_SMP(hw->prio_regs[i]);
-			k += save_reg(d, k, hw->prio_regs[i].set_reg, smp);
-			k += save_reg(d, k, hw->prio_regs[i].clr_reg, smp);
-		}
-	}
-
-	if (hw->sense_regs) {
-		d->sense = kzalloc(hw->nr_vectors * sizeof(*d->sense),
-				   GFP_NOWAIT);
-		if (!d->sense)
-			goto err5;
-
-		for (i = 0; i < hw->nr_sense_regs; i++)
-			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
-	}
-
-	if (hw->subgroups)
-		for (i = 0; i < hw->nr_subgroups; i++)
-			if (hw->subgroups[i].reg)
-				k+= save_reg(d, k, hw->subgroups[i].reg, 0);
-
-	d->chip.name = desc->name;
-	d->chip.mask = intc_disable;
-	d->chip.unmask = intc_enable;
-	d->chip.mask_ack = intc_disable;
-	d->chip.enable = intc_enable;
-	d->chip.disable = intc_disable;
-	d->chip.shutdown = intc_disable;
-	d->chip.set_type = intc_set_sense;
-	d->chip.set_wake = intc_set_wake;
-#ifdef CONFIG_SMP
-	d->chip.set_affinity = intc_set_affinity;
-#endif
-
-	if (hw->ack_regs) {
-		for (i = 0; i < hw->nr_ack_regs; i++)
-			k += save_reg(d, k, hw->ack_regs[i].set_reg, 0);
-
-		d->chip.mask_ack = intc_mask_ack;
-	}
-
-	/* disable bits matching force_disable before registering irqs */
-	if (desc->force_disable)
-		intc_enable_disable_enum(desc, d, desc->force_disable, 0);
-
-	/* disable bits matching force_enable before registering irqs */
-	if (desc->force_enable)
-		intc_enable_disable_enum(desc, d, desc->force_enable, 0);
-
-	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
-
-	/* register the vectors one by one */
-	for (i = 0; i < hw->nr_vectors; i++) {
-		struct intc_vect *vect = hw->vectors + i;
-		unsigned int irq = evt2irq(vect->vect);
-		unsigned long flags;
-		struct irq_desc *irq_desc;
-
-		if (!vect->enum_id)
-			continue;
-
-		irq_desc = irq_to_desc_alloc_node(irq, numa_node_id());
-		if (unlikely(!irq_desc)) {
-			pr_err("can't get irq_desc for %d\n", irq);
-			continue;
-		}
-
-		spin_lock_irqsave(&xlate_lock, flags);
-		intc_irq_xlate[irq].enum_id = vect->enum_id;
-		intc_irq_xlate[irq].desc = d;
-		spin_unlock_irqrestore(&xlate_lock, flags);
-
-		intc_register_irq(desc, d, vect->enum_id, irq);
-
-		for (k = i + 1; k < hw->nr_vectors; k++) {
-			struct intc_vect *vect2 = hw->vectors + k;
-			unsigned int irq2 = evt2irq(vect2->vect);
-
-			if (vect->enum_id != vect2->enum_id)
-				continue;
-
-			/*
-			 * In the case of multi-evt handling and sparse
-			 * IRQ support, each vector still needs to have
-			 * its own backing irq_desc.
-			 */
-			irq_desc = irq_to_desc_alloc_node(irq2, numa_node_id());
-			if (unlikely(!irq_desc)) {
-				pr_err("can't get irq_desc for %d\n", irq2);
-				continue;
-			}
-
-			vect2->enum_id = 0;
-
-			/* redirect this interrupts to the first one */
-			set_irq_chip(irq2, &dummy_irq_chip);
-			set_irq_chained_handler(irq2, intc_redirect_irq);
-			set_irq_data(irq2, (void *)irq);
-		}
-	}
-
-	intc_subgroup_init(desc, d);
-
-	/* enable bits matching force_enable after registering irqs */
-	if (desc->force_enable)
-		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
-
-	nr_intc_controllers++;
-
-	return 0;
-err5:
-	kfree(d->prio);
-err4:
-#ifdef CONFIG_SMP
-	kfree(d->smp);
-err3:
-#endif
-	kfree(d->reg);
-err2:
-	for (k = 0; k < d->nr_windows; k++)
-		if (d->window[k].virt)
-			iounmap(d->window[k].virt);
-
-	kfree(d->window);
-err1:
-	kfree(d);
-err0:
-	pr_err("unable to allocate INTC memory\n");
-
-	return -ENOMEM;
-}
-
-#ifdef CONFIG_INTC_USERIMASK
-static void __iomem *uimask;
-
-int register_intc_userimask(unsigned long addr)
-{
-	if (unlikely(uimask))
-		return -EBUSY;
-
-	uimask = ioremap_nocache(addr, SZ_4K);
-	if (unlikely(!uimask))
-		return -ENOMEM;
-
-	pr_info("userimask support registered for levels 0 -> %d\n",
-		default_prio_level - 1);
-
-	return 0;
-}
-
-static ssize_t
-show_intc_userimask(struct sysdev_class *cls,
-		    struct sysdev_class_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%d\n", (__raw_readl(uimask) >> 4) & 0xf);
-}
-
-static ssize_t
-store_intc_userimask(struct sysdev_class *cls,
-		     struct sysdev_class_attribute *attr,
-		     const char *buf, size_t count)
-{
-	unsigned long level;
-
-	level = simple_strtoul(buf, NULL, 10);
-
-	/*
-	 * Minimal acceptable IRQ levels are in the 2 - 16 range, but
-	 * these are chomped so as to not interfere with normal IRQs.
-	 *
-	 * Level 1 is a special case on some CPUs in that it's not
-	 * directly settable, but given that USERIMASK cuts off below a
-	 * certain level, we don't care about this limitation here.
-	 * Level 0 on the other hand equates to user masking disabled.
-	 *
-	 * We use default_prio_level as a cut off so that only special
-	 * case opt-in IRQs can be mangled.
-	 */
-	if (level >= default_prio_level)
-		return -EINVAL;
-
-	__raw_writel(0xa5 << 24 | level << 4, uimask);
-
-	return count;
-}
-
-static SYSDEV_CLASS_ATTR(userimask, S_IRUSR | S_IWUSR,
-			 show_intc_userimask, store_intc_userimask);
-#endif
-
-#ifdef CONFIG_INTC_MAPPING_DEBUG
-static int intc_irq_xlate_debug(struct seq_file *m, void *priv)
-{
-	int i;
-
-	seq_printf(m, "%-5s  %-7s  %-15s\n", "irq", "enum", "chip name");
-
-	for (i = 1; i < nr_irqs; i++) {
-		struct intc_desc_int *desc = intc_irq_xlate[i].desc;
-
-		if (!desc)
-			continue;
-
-		seq_printf(m, "%5d  ", i);
-		seq_printf(m, "0x%05x  ", intc_irq_xlate[i].enum_id);
-		seq_printf(m, "%-15s\n", desc->chip.name);
-	}
-
-	return 0;
-}
-
-static int intc_irq_xlate_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, intc_irq_xlate_debug, inode->i_private);
-}
-
-static const struct file_operations intc_irq_xlate_fops = {
-	.open = intc_irq_xlate_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int __init intc_irq_xlate_init(void)
-{
-	/*
-	 * XXX.. use arch_debugfs_dir here when all of the intc users are
-	 * converted.
-	 */
-	if (debugfs_create_file("intc_irq_xlate", S_IRUGO, NULL, NULL,
-				&intc_irq_xlate_fops) == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-fs_initcall(intc_irq_xlate_init);
-#endif
-
-static ssize_t
-show_intc_name(struct sys_device *dev, struct sysdev_attribute *attr, char *buf)
-{
-	struct intc_desc_int *d;
-
-	d = container_of(dev, struct intc_desc_int, sysdev);
-
-	return sprintf(buf, "%s\n", d->chip.name);
-}
-
-static SYSDEV_ATTR(name, S_IRUGO, show_intc_name, NULL);
-
-static int intc_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct intc_desc_int *d;
-	struct irq_desc *desc;
-	int irq;
-
-	/* get intc controller associated with this sysdev */
-	d = container_of(dev, struct intc_desc_int, sysdev);
-
-	switch (state.event) {
-	case PM_EVENT_ON:
-		if (d->state.event != PM_EVENT_FREEZE)
-			break;
-		for_each_irq_desc(irq, desc) {
-			if (desc->handle_irq == intc_redirect_irq)
-				continue;
-			if (desc->chip != &d->chip)
-				continue;
-			if (desc->status & IRQ_DISABLED)
-				intc_disable(irq);
-			else
-				intc_enable(irq);
-		}
-		break;
-	case PM_EVENT_FREEZE:
-		/* nothing has to be done */
-		break;
-	case PM_EVENT_SUSPEND:
-		/* enable wakeup irqs belonging to this intc controller */
-		for_each_irq_desc(irq, desc) {
-			if ((desc->status & IRQ_WAKEUP) && (desc->chip == &d->chip))
-				intc_enable(irq);
-		}
-		break;
-	}
-	d->state = state;
-
-	return 0;
-}
-
-static int intc_resume(struct sys_device *dev)
-{
-	return intc_suspend(dev, PMSG_ON);
-}
-
-static struct sysdev_class intc_sysdev_class = {
-	.name = "intc",
-	.suspend = intc_suspend,
-	.resume = intc_resume,
-};
-
-/* register this intc as sysdev to allow suspend/resume */
-static int __init register_intc_sysdevs(void)
-{
-	struct intc_desc_int *d;
-	int error;
-
-	error = sysdev_class_register(&intc_sysdev_class);
-#ifdef CONFIG_INTC_USERIMASK
-	if (!error && uimask)
-		error = sysdev_class_create_file(&intc_sysdev_class,
-						 &attr_userimask);
-#endif
-	if (!error) {
-		list_for_each_entry(d, &intc_list, list) {
-			d->sysdev.id = d->index;
-			d->sysdev.cls = &intc_sysdev_class;
-			error = sysdev_register(&d->sysdev);
-			if (error == 0)
-				error = sysdev_create_file(&d->sysdev,
-							   &attr_name);
-			if (error)
-				break;
-		}
-	}
-
-	if (error)
-		pr_err("sysdev registration error\n");
-
-	return error;
-}
-device_initcall(register_intc_sysdevs);
-
-/*
- * Dynamic IRQ allocation and deallocation
- */
-unsigned int create_irq_nr(unsigned int irq_want, int node)
-{
-	unsigned int irq = 0, new;
-	unsigned long flags;
-	struct irq_desc *desc;
-
-	spin_lock_irqsave(&vector_lock, flags);
-
-	/*
-	 * First try the wanted IRQ
-	 */
-	if (test_and_set_bit(irq_want, intc_irq_map) == 0) {
-		new = irq_want;
-	} else {
-		/* .. then fall back to scanning. */
-		new = find_first_zero_bit(intc_irq_map, nr_irqs);
-		if (unlikely(new == nr_irqs))
-			goto out_unlock;
-
-		__set_bit(new, intc_irq_map);
-	}
-
-	desc = irq_to_desc_alloc_node(new, node);
-	if (unlikely(!desc)) {
-		pr_err("can't get irq_desc for %d\n", new);
-		goto out_unlock;
-	}
-
-	desc = move_irq_desc(desc, node);
-	irq = new;
-
-out_unlock:
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-		activate_irq(irq);
-	}
-
-	return irq;
-}
-
-int create_irq(void)
-{
-	int nid = cpu_to_node(smp_processor_id());
-	int irq;
-
-	irq = create_irq_nr(NR_IRQS_LEGACY, nid);
-	if (irq == 0)
-		irq = -1;
-
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	dynamic_irq_cleanup(irq);
-
-	spin_lock_irqsave(&vector_lock, flags);
-	__clear_bit(irq, intc_irq_map);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-int reserve_irq_vector(unsigned int irq)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	if (test_and_set_bit(irq, intc_irq_map))
-		ret = -EBUSY;
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	return ret;
-}
-
-void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs)
-{
-	unsigned long flags;
-	int i;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	for (i = 0; i < nr_vecs; i++)
-		__set_bit(evt2irq(vectors[i].vect), intc_irq_map);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-void reserve_irq_legacy(void)
-{
-	unsigned long flags;
-	int i, j;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	j = find_first_bit(intc_irq_map, nr_irqs);
-	for (i = 0; i < j; i++)
-		__set_bit(i, intc_irq_map);
-	spin_unlock_irqrestore(&vector_lock, flags);
-}
diff --git a/drivers/sh/intc/access.c b/drivers/sh/intc/access.c
new file mode 100644
index 000000000000..f892ae1d212a
--- /dev/null
+++ b/drivers/sh/intc/access.c
@@ -0,0 +1,237 @@
+/*
+ * Common INTC2 register accessors
+ *
+ * Copyright (C) 2007, 2008 Magnus Damm
+ * Copyright (C) 2009, 2010 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/io.h>
+#include "internals.h"
+
+unsigned long intc_phys_to_virt(struct intc_desc_int *d, unsigned long address)
+{
+	struct intc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < d->nr_windows; k++) {
+		window = d->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		address -= window->phys;
+		address += (unsigned long)window->virt;
+
+		return address;
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return address;
+}
+
+unsigned int intc_get_reg(struct intc_desc_int *d, unsigned long address)
+{
+	unsigned int k;
+
+	address = intc_phys_to_virt(d, address);
+
+	for (k = 0; k < d->nr_reg; k++) {
+		if (d->reg[k] == address)
+			return k;
+	}
+
+	BUG();
+	return 0;
+}
+
+unsigned int intc_set_field_from_handle(unsigned int value,
+					unsigned int field_value,
+					unsigned int handle)
+{
+	unsigned int width = _INTC_WIDTH(handle);
+	unsigned int shift = _INTC_SHIFT(handle);
+
+	value &= ~(((1 << width) - 1) << shift);
+	value |= field_value << shift;
+	return value;
+}
+
+unsigned long intc_get_field_from_handle(unsigned int value, unsigned int handle)
+{
+	unsigned int width = _INTC_WIDTH(handle);
+	unsigned int shift = _INTC_SHIFT(handle);
+	unsigned int mask = ((1 << width) - 1) << shift;
+
+	return (value & mask) >> shift;
+}
+
+static unsigned long test_8(unsigned long addr, unsigned long h,
+			    unsigned long ignore)
+{
+	return intc_get_field_from_handle(__raw_readb(addr), h);
+}
+
+static unsigned long test_16(unsigned long addr, unsigned long h,
+			     unsigned long ignore)
+{
+	return intc_get_field_from_handle(__raw_readw(addr), h);
+}
+
+static unsigned long test_32(unsigned long addr, unsigned long h,
+			     unsigned long ignore)
+{
+	return intc_get_field_from_handle(__raw_readl(addr), h);
+}
+
+static unsigned long write_8(unsigned long addr, unsigned long h,
+			     unsigned long data)
+{
+	__raw_writeb(intc_set_field_from_handle(0, data, h), addr);
+	(void)__raw_readb(addr);	/* Defeat write posting */
+	return 0;
+}
+
+static unsigned long write_16(unsigned long addr, unsigned long h,
+			      unsigned long data)
+{
+	__raw_writew(intc_set_field_from_handle(0, data, h), addr);
+	(void)__raw_readw(addr);	/* Defeat write posting */
+	return 0;
+}
+
+static unsigned long write_32(unsigned long addr, unsigned long h,
+			      unsigned long data)
+{
+	__raw_writel(intc_set_field_from_handle(0, data, h), addr);
+	(void)__raw_readl(addr);	/* Defeat write posting */
+	return 0;
+}
+
+static unsigned long modify_8(unsigned long addr, unsigned long h,
+			      unsigned long data)
+{
+	unsigned long flags;
+	unsigned int value;
+	local_irq_save(flags);
+	value = intc_set_field_from_handle(__raw_readb(addr), data, h);
+	__raw_writeb(value, addr);
+	(void)__raw_readb(addr);	/* Defeat write posting */
+	local_irq_restore(flags);
+	return 0;
+}
+
+static unsigned long modify_16(unsigned long addr, unsigned long h,
+			       unsigned long data)
+{
+	unsigned long flags;
+	unsigned int value;
+	local_irq_save(flags);
+	value = intc_set_field_from_handle(__raw_readw(addr), data, h);
+	__raw_writew(value, addr);
+	(void)__raw_readw(addr);	/* Defeat write posting */
+	local_irq_restore(flags);
+	return 0;
+}
+
+static unsigned long modify_32(unsigned long addr, unsigned long h,
+			       unsigned long data)
+{
+	unsigned long flags;
+	unsigned int value;
+	local_irq_save(flags);
+	value = intc_set_field_from_handle(__raw_readl(addr), data, h);
+	__raw_writel(value, addr);
+	(void)__raw_readl(addr);	/* Defeat write posting */
+	local_irq_restore(flags);
+	return 0;
+}
+
+static unsigned long intc_mode_field(unsigned long addr,
+				     unsigned long handle,
+				     unsigned long (*fn)(unsigned long,
+						unsigned long,
+						unsigned long),
+				     unsigned int irq)
+{
+	return fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1));
+}
+
+static unsigned long intc_mode_zero(unsigned long addr,
+				    unsigned long handle,
+				    unsigned long (*fn)(unsigned long,
+					       unsigned long,
+					       unsigned long),
+				    unsigned int irq)
+{
+	return fn(addr, handle, 0);
+}
+
+static unsigned long intc_mode_prio(unsigned long addr,
+				    unsigned long handle,
+				    unsigned long (*fn)(unsigned long,
+					       unsigned long,
+					       unsigned long),
+				    unsigned int irq)
+{
+	return fn(addr, handle, intc_get_prio_level(irq));
+}
+
+unsigned long (*intc_reg_fns[])(unsigned long addr,
+				unsigned long h,
+				unsigned long data) = {
+	[REG_FN_TEST_BASE + 0] = test_8,
+	[REG_FN_TEST_BASE + 1] = test_16,
+	[REG_FN_TEST_BASE + 3] = test_32,
+	[REG_FN_WRITE_BASE + 0] = write_8,
+	[REG_FN_WRITE_BASE + 1] = write_16,
+	[REG_FN_WRITE_BASE + 3] = write_32,
+	[REG_FN_MODIFY_BASE + 0] = modify_8,
+	[REG_FN_MODIFY_BASE + 1] = modify_16,
+	[REG_FN_MODIFY_BASE + 3] = modify_32,
+};
+
+unsigned long (*intc_enable_fns[])(unsigned long addr,
+				   unsigned long handle,
+				   unsigned long (*fn)(unsigned long,
+					    unsigned long,
+					    unsigned long),
+				   unsigned int irq) = {
+	[MODE_ENABLE_REG] = intc_mode_field,
+	[MODE_MASK_REG] = intc_mode_zero,
+	[MODE_DUAL_REG] = intc_mode_field,
+	[MODE_PRIO_REG] = intc_mode_prio,
+	[MODE_PCLR_REG] = intc_mode_prio,
+};
+
+unsigned long (*intc_disable_fns[])(unsigned long addr,
+				    unsigned long handle,
+				    unsigned long (*fn)(unsigned long,
+					     unsigned long,
+					     unsigned long),
+				    unsigned int irq) = {
+	[MODE_ENABLE_REG] = intc_mode_zero,
+	[MODE_MASK_REG] = intc_mode_field,
+	[MODE_DUAL_REG] = intc_mode_field,
+	[MODE_PRIO_REG] = intc_mode_zero,
+	[MODE_PCLR_REG] = intc_mode_field,
+};
+
+unsigned long (*intc_enable_noprio_fns[])(unsigned long addr,
+					  unsigned long handle,
+					  unsigned long (*fn)(unsigned long,
+						unsigned long,
+						unsigned long),
+					  unsigned int irq) = {
+	[MODE_ENABLE_REG] = intc_mode_field,
+	[MODE_MASK_REG] = intc_mode_zero,
+	[MODE_DUAL_REG] = intc_mode_field,
+	[MODE_PRIO_REG] = intc_mode_field,
+	[MODE_PCLR_REG] = intc_mode_field,
+};
diff --git a/drivers/sh/intc/balancing.c b/drivers/sh/intc/balancing.c
new file mode 100644
index 000000000000..cec7a96f2c09
--- /dev/null
+++ b/drivers/sh/intc/balancing.c
@@ -0,0 +1,97 @@
+/*
+ * Support for hardware-managed IRQ auto-distribution.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include "internals.h"
+
+static unsigned long dist_handle[NR_IRQS];
+
+void intc_balancing_enable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 1);
+}
+
+void intc_balancing_disable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 0);
+}
+
+static unsigned int intc_dist_data(struct intc_desc *desc,
+				   struct intc_desc_int *d,
+				   intc_enum enum_id)
+{
+	struct intc_mask_reg *mr = desc->hw.mask_regs;
+	unsigned int i, j, fn, mode;
+	unsigned long reg_e, reg_d;
+
+	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
+		mr = desc->hw.mask_regs + i;
+
+		/*
+		 * Skip this entry if there's no auto-distribution
+		 * register associated with it.
+		 */
+		if (!mr->dist_reg)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
+			if (mr->enum_ids[j] != enum_id)
+				continue;
+
+			fn = REG_FN_MODIFY_BASE;
+			mode = MODE_ENABLE_REG;
+			reg_e = mr->dist_reg;
+			reg_d = mr->dist_reg;
+
+			fn += (mr->reg_width >> 3) - 1;
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					1,
+					(mr->reg_width - 1) - j);
+		}
+	}
+
+	/*
+	 * It's possible we've gotten here with no distribution options
+	 * available for the IRQ in question, so we just skip over those.
+	 */
+	return 0;
+}
+
+void intc_set_dist_handle(unsigned int irq, struct intc_desc *desc,
+			  struct intc_desc_int *d, intc_enum id)
+{
+	unsigned long flags;
+
+	/*
+	 * Nothing to do for this IRQ.
+	 */
+	if (!desc->hw.mask_regs)
+		return;
+
+	raw_spin_lock_irqsave(&intc_big_lock, flags);
+	dist_handle[irq] = intc_dist_data(desc, d, id);
+	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
+}
diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c
new file mode 100644
index 000000000000..35c03706cc21
--- /dev/null
+++ b/drivers/sh/intc/chip.c
@@ -0,0 +1,215 @@
+/*
+ * IRQ chip definitions for INTC IRQs.
+ *
+ * Copyright (C) 2007, 2008 Magnus Damm
+ * Copyright (C) 2009, 2010 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include "internals.h"
+
+void _intc_enable(unsigned int irq, unsigned long handle)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long addr;
+	unsigned int cpu;
+
+	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) {
+#ifdef CONFIG_SMP
+		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
+			continue;
+#endif
+		addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu);
+		intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\
+						    [_INTC_FN(handle)], irq);
+	}
+
+	intc_balancing_enable(irq);
+}
+
+static void intc_enable(unsigned int irq)
+{
+	_intc_enable(irq, (unsigned long)get_irq_chip_data(irq));
+}
+
+static void intc_disable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = (unsigned long)get_irq_chip_data(irq);
+	unsigned long addr;
+	unsigned int cpu;
+
+	intc_balancing_disable(irq);
+
+	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
+#ifdef CONFIG_SMP
+		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
+			continue;
+#endif
+		addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu);
+		intc_disable_fns[_INTC_MODE(handle)](addr, handle,intc_reg_fns\
+						     [_INTC_FN(handle)], irq);
+	}
+}
+
+static int intc_set_wake(unsigned int irq, unsigned int on)
+{
+	return 0; /* allow wakeup, but setup hardware in intc_suspend() */
+}
+
+#ifdef CONFIG_SMP
+/*
+ * This is held with the irq desc lock held, so we don't require any
+ * additional locking here at the intc desc level. The affinity mask is
+ * later tested in the enable/disable paths.
+ */
+static int intc_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	if (!cpumask_intersects(cpumask, cpu_online_mask))
+		return -1;
+
+	cpumask_copy(irq_to_desc(irq)->affinity, cpumask);
+
+	return 0;
+}
+#endif
+
+static void intc_mask_ack(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = intc_get_ack_handle(irq);
+	unsigned long addr;
+
+	intc_disable(irq);
+
+	/* read register and write zero only to the associated bit */
+	if (handle) {
+		unsigned int value;
+
+		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+		value = intc_set_field_from_handle(0, 1, handle);
+
+		switch (_INTC_FN(handle)) {
+		case REG_FN_MODIFY_BASE + 0:	/* 8bit */
+			__raw_readb(addr);
+			__raw_writeb(0xff ^ value, addr);
+			break;
+		case REG_FN_MODIFY_BASE + 1:	/* 16bit */
+			__raw_readw(addr);
+			__raw_writew(0xffff ^ value, addr);
+			break;
+		case REG_FN_MODIFY_BASE + 3:	/* 32bit */
+			__raw_readl(addr);
+			__raw_writel(0xffffffff ^ value, addr);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
+					     unsigned int nr_hp,
+					     unsigned int irq)
+{
+	int i;
+
+	/*
+	 * this doesn't scale well, but...
+	 *
+	 * this function should only be used for cerain uncommon
+	 * operations such as intc_set_priority() and intc_set_type()
+	 * and in those rare cases performance doesn't matter that much.
+	 * keeping the memory footprint low is more important.
+	 *
+	 * one rather simple way to speed this up and still keep the
+	 * memory footprint down is to make sure the array is sorted
+	 * and then perform a bisect to lookup the irq.
+	 */
+	for (i = 0; i < nr_hp; i++) {
+		if ((hp + i)->irq != irq)
+			continue;
+
+		return hp + i;
+	}
+
+	return NULL;
+}
+
+int intc_set_priority(unsigned int irq, unsigned int prio)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	struct intc_handle_int *ihp;
+
+	if (!intc_get_prio_level(irq) || prio <= 1)
+		return -EINVAL;
+
+	ihp = intc_find_irq(d->prio, d->nr_prio, irq);
+	if (ihp) {
+		if (prio >= (1 << _INTC_WIDTH(ihp->handle)))
+			return -EINVAL;
+
+		intc_set_prio_level(irq, prio);
+
+		/*
+		 * only set secondary masking method directly
+		 * primary masking method is using intc_prio_level[irq]
+		 * priority level will be set during next enable()
+		 */
+		if (_INTC_FN(ihp->handle) != REG_FN_ERR)
+			_intc_enable(irq, ihp->handle);
+	}
+	return 0;
+}
+
+#define VALID(x) (x | 0x80)
+
+static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = {
+	[IRQ_TYPE_EDGE_FALLING] = VALID(0),
+	[IRQ_TYPE_EDGE_RISING] = VALID(1),
+	[IRQ_TYPE_LEVEL_LOW] = VALID(2),
+	/* SH7706, SH7707 and SH7709 do not support high level triggered */
+#if !defined(CONFIG_CPU_SUBTYPE_SH7706) && \
+    !defined(CONFIG_CPU_SUBTYPE_SH7707) && \
+    !defined(CONFIG_CPU_SUBTYPE_SH7709)
+	[IRQ_TYPE_LEVEL_HIGH] = VALID(3),
+#endif
+};
+
+static int intc_set_type(unsigned int irq, unsigned int type)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned char value = intc_irq_sense_table[type & IRQ_TYPE_SENSE_MASK];
+	struct intc_handle_int *ihp;
+	unsigned long addr;
+
+	if (!value)
+		return -EINVAL;
+
+	ihp = intc_find_irq(d->sense, d->nr_sense, irq);
+	if (ihp) {
+		addr = INTC_REG(d, _INTC_ADDR_E(ihp->handle), 0);
+		intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle, value);
+	}
+
+	return 0;
+}
+
+struct irq_chip intc_irq_chip	= {
+	.mask		= intc_disable,
+	.unmask		= intc_enable,
+	.mask_ack	= intc_mask_ack,
+	.enable		= intc_enable,
+	.disable	= intc_disable,
+	.shutdown	= intc_disable,
+	.set_type	= intc_set_type,
+	.set_wake	= intc_set_wake,
+#ifdef CONFIG_SMP
+	.set_affinity	= intc_set_affinity,
+#endif
+};
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
new file mode 100644
index 000000000000..306ed287077a
--- /dev/null
+++ b/drivers/sh/intc/core.c
@@ -0,0 +1,469 @@
+/*
+ * Shared interrupt handling code for IPR and INTC2 types of IRQs.
+ *
+ * Copyright (C) 2007, 2008 Magnus Damm
+ * Copyright (C) 2009, 2010 Paul Mundt
+ *
+ * Based on intc2.c and ipr.c
+ *
+ * Copyright (C) 1999  Niibe Yutaka & Takeshi Yaegashi
+ * Copyright (C) 2000  Kazumoto Kojima
+ * Copyright (C) 2001  David J. Mckay (david.mckay@st.com)
+ * Copyright (C) 2003  Takashi Kusuda <kusuda-takashi@hitachi-ul.co.jp>
+ * Copyright (C) 2005, 2006  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#define pr_fmt(fmt) "intc: " fmt
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/sh_intc.h>
+#include <linux/sysdev.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/radix-tree.h>
+#include "internals.h"
+
+LIST_HEAD(intc_list);
+DEFINE_RAW_SPINLOCK(intc_big_lock);
+unsigned int nr_intc_controllers;
+
+/*
+ * Default priority level
+ * - this needs to be at least 2 for 5-bit priorities on 7780
+ */
+static unsigned int default_prio_level = 2;	/* 2 - 16 */
+static unsigned int intc_prio_level[NR_IRQS];	/* for now */
+
+unsigned int intc_get_dfl_prio_level(void)
+{
+	return default_prio_level;
+}
+
+unsigned int intc_get_prio_level(unsigned int irq)
+{
+	return intc_prio_level[irq];
+}
+
+void intc_set_prio_level(unsigned int irq, unsigned int level)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&intc_big_lock, flags);
+	intc_prio_level[irq] = level;
+	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
+}
+
+static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
+{
+	generic_handle_irq((unsigned int)get_irq_data(irq));
+}
+
+static void __init intc_register_irq(struct intc_desc *desc,
+				     struct intc_desc_int *d,
+				     intc_enum enum_id,
+				     unsigned int irq)
+{
+	struct intc_handle_int *hp;
+	unsigned int data[2], primary;
+	unsigned long flags;
+
+	/*
+	 * Register the IRQ position with the global IRQ map, then insert
+	 * it in to the radix tree.
+	 */
+	reserve_irq_vector(irq);
+
+	raw_spin_lock_irqsave(&intc_big_lock, flags);
+	radix_tree_insert(&d->tree, enum_id, intc_irq_xlate_get(irq));
+	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
+
+	/*
+	 * Prefer single interrupt source bitmap over other combinations:
+	 *
+	 * 1. bitmap, single interrupt source
+	 * 2. priority, single interrupt source
+	 * 3. bitmap, multiple interrupt sources (groups)
+	 * 4. priority, multiple interrupt sources (groups)
+	 */
+	data[0] = intc_get_mask_handle(desc, d, enum_id, 0);
+	data[1] = intc_get_prio_handle(desc, d, enum_id, 0);
+
+	primary = 0;
+	if (!data[0] && data[1])
+		primary = 1;
+
+	if (!data[0] && !data[1])
+		pr_warning("missing unique irq mask for irq %d (vect 0x%04x)\n",
+			   irq, irq2evt(irq));
+
+	data[0] = data[0] ? data[0] : intc_get_mask_handle(desc, d, enum_id, 1);
+	data[1] = data[1] ? data[1] : intc_get_prio_handle(desc, d, enum_id, 1);
+
+	if (!data[primary])
+		primary ^= 1;
+
+	BUG_ON(!data[primary]); /* must have primary masking method */
+
+	disable_irq_nosync(irq);
+	set_irq_chip_and_handler_name(irq, &d->chip,
+				      handle_level_irq, "level");
+	set_irq_chip_data(irq, (void *)data[primary]);
+
+	/*
+	 * set priority level
+	 */
+	intc_set_prio_level(irq, intc_get_dfl_prio_level());
+
+	/* enable secondary masking method if present */
+	if (data[!primary])
+		_intc_enable(irq, data[!primary]);
+
+	/* add irq to d->prio list if priority is available */
+	if (data[1]) {
+		hp = d->prio + d->nr_prio;
+		hp->irq = irq;
+		hp->handle = data[1];
+
+		if (primary) {
+			/*
+			 * only secondary priority should access registers, so
+			 * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority()
+			 */
+			hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0);
+			hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0);
+		}
+		d->nr_prio++;
+	}
+
+	/* add irq to d->sense list if sense is available */
+	data[0] = intc_get_sense_handle(desc, d, enum_id);
+	if (data[0]) {
+		(d->sense + d->nr_sense)->irq = irq;
+		(d->sense + d->nr_sense)->handle = data[0];
+		d->nr_sense++;
+	}
+
+	/* irq should be disabled by default */
+	d->chip.mask(irq);
+
+	intc_set_ack_handle(irq, desc, d, enum_id);
+	intc_set_dist_handle(irq, desc, d, enum_id);
+
+	activate_irq(irq);
+}
+
+static unsigned int __init save_reg(struct intc_desc_int *d,
+				    unsigned int cnt,
+				    unsigned long value,
+				    unsigned int smp)
+{
+	if (value) {
+		value = intc_phys_to_virt(d, value);
+
+		d->reg[cnt] = value;
+#ifdef CONFIG_SMP
+		d->smp[cnt] = smp;
+#endif
+		return 1;
+	}
+
+	return 0;
+}
+
+int __init register_intc_controller(struct intc_desc *desc)
+{
+	unsigned int i, k, smp;
+	struct intc_hw_desc *hw = &desc->hw;
+	struct intc_desc_int *d;
+	struct resource *res;
+
+	pr_info("Registered controller '%s' with %u IRQs\n",
+		desc->name, hw->nr_vectors);
+
+	d = kzalloc(sizeof(*d), GFP_NOWAIT);
+	if (!d)
+		goto err0;
+
+	INIT_LIST_HEAD(&d->list);
+	list_add_tail(&d->list, &intc_list);
+
+	raw_spin_lock_init(&d->lock);
+
+	d->index = nr_intc_controllers;
+
+	if (desc->num_resources) {
+		d->nr_windows = desc->num_resources;
+		d->window = kzalloc(d->nr_windows * sizeof(*d->window),
+				    GFP_NOWAIT);
+		if (!d->window)
+			goto err1;
+
+		for (k = 0; k < d->nr_windows; k++) {
+			res = desc->resource + k;
+			WARN_ON(resource_type(res) != IORESOURCE_MEM);
+			d->window[k].phys = res->start;
+			d->window[k].size = resource_size(res);
+			d->window[k].virt = ioremap_nocache(res->start,
+							 resource_size(res));
+			if (!d->window[k].virt)
+				goto err2;
+		}
+	}
+
+	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
+#ifdef CONFIG_INTC_BALANCING
+	if (d->nr_reg)
+		d->nr_reg += hw->nr_mask_regs;
+#endif
+	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
+	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
+	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
+	d->nr_reg += hw->subgroups ? hw->nr_subgroups : 0;
+
+	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
+	if (!d->reg)
+		goto err2;
+
+#ifdef CONFIG_SMP
+	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
+	if (!d->smp)
+		goto err3;
+#endif
+	k = 0;
+
+	if (hw->mask_regs) {
+		for (i = 0; i < hw->nr_mask_regs; i++) {
+			smp = IS_SMP(hw->mask_regs[i]);
+			k += save_reg(d, k, hw->mask_regs[i].set_reg, smp);
+			k += save_reg(d, k, hw->mask_regs[i].clr_reg, smp);
+#ifdef CONFIG_INTC_BALANCING
+			k += save_reg(d, k, hw->mask_regs[i].dist_reg, 0);
+#endif
+		}
+	}
+
+	if (hw->prio_regs) {
+		d->prio = kzalloc(hw->nr_vectors * sizeof(*d->prio),
+				  GFP_NOWAIT);
+		if (!d->prio)
+			goto err4;
+
+		for (i = 0; i < hw->nr_prio_regs; i++) {
+			smp = IS_SMP(hw->prio_regs[i]);
+			k += save_reg(d, k, hw->prio_regs[i].set_reg, smp);
+			k += save_reg(d, k, hw->prio_regs[i].clr_reg, smp);
+		}
+	}
+
+	if (hw->sense_regs) {
+		d->sense = kzalloc(hw->nr_vectors * sizeof(*d->sense),
+				   GFP_NOWAIT);
+		if (!d->sense)
+			goto err5;
+
+		for (i = 0; i < hw->nr_sense_regs; i++)
+			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
+	}
+
+	if (hw->subgroups)
+		for (i = 0; i < hw->nr_subgroups; i++)
+			if (hw->subgroups[i].reg)
+				k+= save_reg(d, k, hw->subgroups[i].reg, 0);
+
+	memcpy(&d->chip, &intc_irq_chip, sizeof(struct irq_chip));
+	d->chip.name = desc->name;
+
+	if (hw->ack_regs)
+		for (i = 0; i < hw->nr_ack_regs; i++)
+			k += save_reg(d, k, hw->ack_regs[i].set_reg, 0);
+	else
+		d->chip.mask_ack = d->chip.disable;
+
+	/* disable bits matching force_disable before registering irqs */
+	if (desc->force_disable)
+		intc_enable_disable_enum(desc, d, desc->force_disable, 0);
+
+	/* disable bits matching force_enable before registering irqs */
+	if (desc->force_enable)
+		intc_enable_disable_enum(desc, d, desc->force_enable, 0);
+
+	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
+
+	/* register the vectors one by one */
+	for (i = 0; i < hw->nr_vectors; i++) {
+		struct intc_vect *vect = hw->vectors + i;
+		unsigned int irq = evt2irq(vect->vect);
+		struct irq_desc *irq_desc;
+
+		if (!vect->enum_id)
+			continue;
+
+		irq_desc = irq_to_desc_alloc_node(irq, numa_node_id());
+		if (unlikely(!irq_desc)) {
+			pr_err("can't get irq_desc for %d\n", irq);
+			continue;
+		}
+
+		intc_irq_xlate_set(irq, vect->enum_id, d);
+		intc_register_irq(desc, d, vect->enum_id, irq);
+
+		for (k = i + 1; k < hw->nr_vectors; k++) {
+			struct intc_vect *vect2 = hw->vectors + k;
+			unsigned int irq2 = evt2irq(vect2->vect);
+
+			if (vect->enum_id != vect2->enum_id)
+				continue;
+
+			/*
+			 * In the case of multi-evt handling and sparse
+			 * IRQ support, each vector still needs to have
+			 * its own backing irq_desc.
+			 */
+			irq_desc = irq_to_desc_alloc_node(irq2, numa_node_id());
+			if (unlikely(!irq_desc)) {
+				pr_err("can't get irq_desc for %d\n", irq2);
+				continue;
+			}
+
+			vect2->enum_id = 0;
+
+			/* redirect this interrupts to the first one */
+			set_irq_chip(irq2, &dummy_irq_chip);
+			set_irq_chained_handler(irq2, intc_redirect_irq);
+			set_irq_data(irq2, (void *)irq);
+		}
+	}
+
+	intc_subgroup_init(desc, d);
+
+	/* enable bits matching force_enable after registering irqs */
+	if (desc->force_enable)
+		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
+
+	nr_intc_controllers++;
+
+	return 0;
+err5:
+	kfree(d->prio);
+err4:
+#ifdef CONFIG_SMP
+	kfree(d->smp);
+err3:
+#endif
+	kfree(d->reg);
+err2:
+	for (k = 0; k < d->nr_windows; k++)
+		if (d->window[k].virt)
+			iounmap(d->window[k].virt);
+
+	kfree(d->window);
+err1:
+	kfree(d);
+err0:
+	pr_err("unable to allocate INTC memory\n");
+
+	return -ENOMEM;
+}
+
+static ssize_t
+show_intc_name(struct sys_device *dev, struct sysdev_attribute *attr, char *buf)
+{
+	struct intc_desc_int *d;
+
+	d = container_of(dev, struct intc_desc_int, sysdev);
+
+	return sprintf(buf, "%s\n", d->chip.name);
+}
+
+static SYSDEV_ATTR(name, S_IRUGO, show_intc_name, NULL);
+
+static int intc_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct intc_desc_int *d;
+	struct irq_desc *desc;
+	int irq;
+
+	/* get intc controller associated with this sysdev */
+	d = container_of(dev, struct intc_desc_int, sysdev);
+
+	switch (state.event) {
+	case PM_EVENT_ON:
+		if (d->state.event != PM_EVENT_FREEZE)
+			break;
+
+		for_each_irq_desc(irq, desc) {
+			/*
+			 * This will catch the redirect and VIRQ cases
+			 * due to the dummy_irq_chip being inserted.
+			 */
+			if (desc->chip != &d->chip)
+				continue;
+			if (desc->status & IRQ_DISABLED)
+				desc->chip->disable(irq);
+			else
+				desc->chip->enable(irq);
+		}
+		break;
+	case PM_EVENT_FREEZE:
+		/* nothing has to be done */
+		break;
+	case PM_EVENT_SUSPEND:
+		/* enable wakeup irqs belonging to this intc controller */
+		for_each_irq_desc(irq, desc) {
+			if (desc->chip != &d->chip)
+				continue;
+			if ((desc->status & IRQ_WAKEUP))
+				desc->chip->enable(irq);
+		}
+		break;
+	}
+
+	d->state = state;
+
+	return 0;
+}
+
+static int intc_resume(struct sys_device *dev)
+{
+	return intc_suspend(dev, PMSG_ON);
+}
+
+struct sysdev_class intc_sysdev_class = {
+	.name		= "intc",
+	.suspend	= intc_suspend,
+	.resume		= intc_resume,
+};
+
+/* register this intc as sysdev to allow suspend/resume */
+static int __init register_intc_sysdevs(void)
+{
+	struct intc_desc_int *d;
+	int error;
+
+	error = sysdev_class_register(&intc_sysdev_class);
+	if (!error) {
+		list_for_each_entry(d, &intc_list, list) {
+			d->sysdev.id = d->index;
+			d->sysdev.cls = &intc_sysdev_class;
+			error = sysdev_register(&d->sysdev);
+			if (error == 0)
+				error = sysdev_create_file(&d->sysdev,
+							   &attr_name);
+			if (error)
+				break;
+		}
+	}
+
+	if (error)
+		pr_err("sysdev registration error\n");
+
+	return error;
+}
+device_initcall(register_intc_sysdevs);
diff --git a/drivers/sh/intc/dynamic.c b/drivers/sh/intc/dynamic.c
new file mode 100644
index 000000000000..6caecdffe201
--- /dev/null
+++ b/drivers/sh/intc/dynamic.c
@@ -0,0 +1,135 @@
+/*
+ * Dynamic IRQ management
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * Modelled after arch/x86/kernel/apic/io_apic.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#define pr_fmt(fmt) "intc: " fmt
+
+#include <linux/irq.h>
+#include <linux/bitmap.h>
+#include <linux/spinlock.h>
+#include "internals.h" /* only for activate_irq() damage.. */
+
+/*
+ * The intc_irq_map provides a global map of bound IRQ vectors for a
+ * given platform. Allocation of IRQs are either static through the CPU
+ * vector map, or dynamic in the case of board mux vectors or MSI.
+ *
+ * As this is a central point for all IRQ controllers on the system,
+ * each of the available sources are mapped out here. This combined with
+ * sparseirq makes it quite trivial to keep the vector map tightly packed
+ * when dynamically creating IRQs, as well as tying in to otherwise
+ * unused irq_desc positions in the sparse array.
+ */
+static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+/*
+ * Dynamic IRQ allocation and deallocation
+ */
+unsigned int create_irq_nr(unsigned int irq_want, int node)
+{
+	unsigned int irq = 0, new;
+	unsigned long flags;
+	struct irq_desc *desc;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+
+	/*
+	 * First try the wanted IRQ
+	 */
+	if (test_and_set_bit(irq_want, intc_irq_map) == 0) {
+		new = irq_want;
+	} else {
+		/* .. then fall back to scanning. */
+		new = find_first_zero_bit(intc_irq_map, nr_irqs);
+		if (unlikely(new == nr_irqs))
+			goto out_unlock;
+
+		__set_bit(new, intc_irq_map);
+	}
+
+	desc = irq_to_desc_alloc_node(new, node);
+	if (unlikely(!desc)) {
+		pr_err("can't get irq_desc for %d\n", new);
+		goto out_unlock;
+	}
+
+	desc = move_irq_desc(desc, node);
+	irq = new;
+
+out_unlock:
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (irq > 0) {
+		dynamic_irq_init(irq);
+		activate_irq(irq);
+	}
+
+	return irq;
+}
+
+int create_irq(void)
+{
+	int nid = cpu_to_node(smp_processor_id());
+	int irq;
+
+	irq = create_irq_nr(NR_IRQS_LEGACY, nid);
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	dynamic_irq_cleanup(irq);
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	__clear_bit(irq, intc_irq_map);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int reserve_irq_vector(unsigned int irq)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	if (test_and_set_bit(irq, intc_irq_map))
+		ret = -EBUSY;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return ret;
+}
+
+void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs)
+{
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	for (i = 0; i < nr_vecs; i++)
+		__set_bit(evt2irq(vectors[i].vect), intc_irq_map);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+void reserve_irq_legacy(void)
+{
+	unsigned long flags;
+	int i, j;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	j = find_first_bit(intc_irq_map, nr_irqs);
+	for (i = 0; i < j; i++)
+		__set_bit(i, intc_irq_map);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
diff --git a/drivers/sh/intc/handle.c b/drivers/sh/intc/handle.c
new file mode 100644
index 000000000000..057ce56829bf
--- /dev/null
+++ b/drivers/sh/intc/handle.c
@@ -0,0 +1,307 @@
+/*
+ * Shared interrupt handling code for IPR and INTC2 types of IRQs.
+ *
+ * Copyright (C) 2007, 2008 Magnus Damm
+ * Copyright (C) 2009, 2010 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include "internals.h"
+
+static unsigned long ack_handle[NR_IRQS];
+
+static intc_enum __init intc_grp_id(struct intc_desc *desc,
+				    intc_enum enum_id)
+{
+	struct intc_group *g = desc->hw.groups;
+	unsigned int i, j;
+
+	for (i = 0; g && enum_id && i < desc->hw.nr_groups; i++) {
+		g = desc->hw.groups + i;
+
+		for (j = 0; g->enum_ids[j]; j++) {
+			if (g->enum_ids[j] != enum_id)
+				continue;
+
+			return g->enum_id;
+		}
+	}
+
+	return 0;
+}
+
+static unsigned int __init _intc_mask_data(struct intc_desc *desc,
+					   struct intc_desc_int *d,
+					   intc_enum enum_id,
+					   unsigned int *reg_idx,
+					   unsigned int *fld_idx)
+{
+	struct intc_mask_reg *mr = desc->hw.mask_regs;
+	unsigned int fn, mode;
+	unsigned long reg_e, reg_d;
+
+	while (mr && enum_id && *reg_idx < desc->hw.nr_mask_regs) {
+		mr = desc->hw.mask_regs + *reg_idx;
+
+		for (; *fld_idx < ARRAY_SIZE(mr->enum_ids); (*fld_idx)++) {
+			if (mr->enum_ids[*fld_idx] != enum_id)
+				continue;
+
+			if (mr->set_reg && mr->clr_reg) {
+				fn = REG_FN_WRITE_BASE;
+				mode = MODE_DUAL_REG;
+				reg_e = mr->clr_reg;
+				reg_d = mr->set_reg;
+			} else {
+				fn = REG_FN_MODIFY_BASE;
+				if (mr->set_reg) {
+					mode = MODE_ENABLE_REG;
+					reg_e = mr->set_reg;
+					reg_d = mr->set_reg;
+				} else {
+					mode = MODE_MASK_REG;
+					reg_e = mr->clr_reg;
+					reg_d = mr->clr_reg;
+				}
+			}
+
+			fn += (mr->reg_width >> 3) - 1;
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					1,
+					(mr->reg_width - 1) - *fld_idx);
+		}
+
+		*fld_idx = 0;
+		(*reg_idx)++;
+	}
+
+	return 0;
+}
+
+unsigned int __init
+intc_get_mask_handle(struct intc_desc *desc, struct intc_desc_int *d,
+		     intc_enum enum_id, int do_grps)
+{
+	unsigned int i = 0;
+	unsigned int j = 0;
+	unsigned int ret;
+
+	ret = _intc_mask_data(desc, d, enum_id, &i, &j);
+	if (ret)
+		return ret;
+
+	if (do_grps)
+		return intc_get_mask_handle(desc, d, intc_grp_id(desc, enum_id), 0);
+
+	return 0;
+}
+
+static unsigned int __init _intc_prio_data(struct intc_desc *desc,
+					   struct intc_desc_int *d,
+					   intc_enum enum_id,
+					   unsigned int *reg_idx,
+					   unsigned int *fld_idx)
+{
+	struct intc_prio_reg *pr = desc->hw.prio_regs;
+	unsigned int fn, n, mode, bit;
+	unsigned long reg_e, reg_d;
+
+	while (pr && enum_id && *reg_idx < desc->hw.nr_prio_regs) {
+		pr = desc->hw.prio_regs + *reg_idx;
+
+		for (; *fld_idx < ARRAY_SIZE(pr->enum_ids); (*fld_idx)++) {
+			if (pr->enum_ids[*fld_idx] != enum_id)
+				continue;
+
+			if (pr->set_reg && pr->clr_reg) {
+				fn = REG_FN_WRITE_BASE;
+				mode = MODE_PCLR_REG;
+				reg_e = pr->set_reg;
+				reg_d = pr->clr_reg;
+			} else {
+				fn = REG_FN_MODIFY_BASE;
+				mode = MODE_PRIO_REG;
+				if (!pr->set_reg)
+					BUG();
+				reg_e = pr->set_reg;
+				reg_d = pr->set_reg;
+			}
+
+			fn += (pr->reg_width >> 3) - 1;
+			n = *fld_idx + 1;
+
+			BUG_ON(n * pr->field_width > pr->reg_width);
+
+			bit = pr->reg_width - (n * pr->field_width);
+
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					pr->field_width, bit);
+		}
+
+		*fld_idx = 0;
+		(*reg_idx)++;
+	}
+
+	return 0;
+}
+
+unsigned int __init
+intc_get_prio_handle(struct intc_desc *desc, struct intc_desc_int *d,
+		     intc_enum enum_id, int do_grps)
+{
+	unsigned int i = 0;
+	unsigned int j = 0;
+	unsigned int ret;
+
+	ret = _intc_prio_data(desc, d, enum_id, &i, &j);
+	if (ret)
+		return ret;
+
+	if (do_grps)
+		return intc_get_prio_handle(desc, d, intc_grp_id(desc, enum_id), 0);
+
+	return 0;
+}
+
+static unsigned int __init intc_ack_data(struct intc_desc *desc,
+					  struct intc_desc_int *d,
+					  intc_enum enum_id)
+{
+	struct intc_mask_reg *mr = desc->hw.ack_regs;
+	unsigned int i, j, fn, mode;
+	unsigned long reg_e, reg_d;
+
+	for (i = 0; mr && enum_id && i < desc->hw.nr_ack_regs; i++) {
+		mr = desc->hw.ack_regs + i;
+
+		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
+			if (mr->enum_ids[j] != enum_id)
+				continue;
+
+			fn = REG_FN_MODIFY_BASE;
+			mode = MODE_ENABLE_REG;
+			reg_e = mr->set_reg;
+			reg_d = mr->set_reg;
+
+			fn += (mr->reg_width >> 3) - 1;
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					1,
+					(mr->reg_width - 1) - j);
+		}
+	}
+
+	return 0;
+}
+
+static void intc_enable_disable(struct intc_desc_int *d,
+				unsigned long handle, int do_enable)
+{
+	unsigned long addr;
+	unsigned int cpu;
+	unsigned long (*fn)(unsigned long, unsigned long,
+		   unsigned long (*)(unsigned long, unsigned long,
+				     unsigned long),
+		   unsigned int);
+
+	if (do_enable) {
+		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) {
+			addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu);
+			fn = intc_enable_noprio_fns[_INTC_MODE(handle)];
+			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
+		}
+	} else {
+		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
+			addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu);
+			fn = intc_disable_fns[_INTC_MODE(handle)];
+			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
+		}
+	}
+}
+
+void __init intc_enable_disable_enum(struct intc_desc *desc,
+				     struct intc_desc_int *d,
+				     intc_enum enum_id, int enable)
+{
+	unsigned int i, j, data;
+
+	/* go through and enable/disable all mask bits */
+	i = j = 0;
+	do {
+		data = _intc_mask_data(desc, d, enum_id, &i, &j);
+		if (data)
+			intc_enable_disable(d, data, enable);
+		j++;
+	} while (data);
+
+	/* go through and enable/disable all priority fields */
+	i = j = 0;
+	do {
+		data = _intc_prio_data(desc, d, enum_id, &i, &j);
+		if (data)
+			intc_enable_disable(d, data, enable);
+
+		j++;
+	} while (data);
+}
+
+unsigned int __init
+intc_get_sense_handle(struct intc_desc *desc, struct intc_desc_int *d,
+		      intc_enum enum_id)
+{
+	struct intc_sense_reg *sr = desc->hw.sense_regs;
+	unsigned int i, j, fn, bit;
+
+	for (i = 0; sr && enum_id && i < desc->hw.nr_sense_regs; i++) {
+		sr = desc->hw.sense_regs + i;
+
+		for (j = 0; j < ARRAY_SIZE(sr->enum_ids); j++) {
+			if (sr->enum_ids[j] != enum_id)
+				continue;
+
+			fn = REG_FN_MODIFY_BASE;
+			fn += (sr->reg_width >> 3) - 1;
+
+			BUG_ON((j + 1) * sr->field_width > sr->reg_width);
+
+			bit = sr->reg_width - ((j + 1) * sr->field_width);
+
+			return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg),
+					0, sr->field_width, bit);
+		}
+	}
+
+	return 0;
+}
+
+
+void intc_set_ack_handle(unsigned int irq, struct intc_desc *desc,
+			 struct intc_desc_int *d, intc_enum id)
+{
+	unsigned long flags;
+
+	/*
+	 * Nothing to do for this IRQ.
+	 */
+	if (!desc->hw.ack_regs)
+		return;
+
+	raw_spin_lock_irqsave(&intc_big_lock, flags);
+	ack_handle[irq] = intc_ack_data(desc, d, id);
+	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
+}
+
+unsigned long intc_get_ack_handle(unsigned int irq)
+{
+	return ack_handle[irq];
+}
diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h
new file mode 100644
index 000000000000..f02a47f74930
--- /dev/null
+++ b/drivers/sh/intc/internals.h
@@ -0,0 +1,185 @@
+#include <linux/sh_intc.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/radix-tree.h>
+#include <linux/sysdev.h>
+
+#define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
+	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
+	 ((addr_e) << 16) | ((addr_d << 24)))
+
+#define _INTC_SHIFT(h)		(h & 0x1f)
+#define _INTC_WIDTH(h)		((h >> 5) & 0xf)
+#define _INTC_FN(h)		((h >> 9) & 0xf)
+#define _INTC_MODE(h)		((h >> 13) & 0x7)
+#define _INTC_ADDR_E(h)		((h >> 16) & 0xff)
+#define _INTC_ADDR_D(h)		((h >> 24) & 0xff)
+
+#ifdef CONFIG_SMP
+#define IS_SMP(x)		(x.smp)
+#define INTC_REG(d, x, c)	(d->reg[(x)] + ((d->smp[(x)] & 0xff) * c))
+#define SMP_NR(d, x)		((d->smp[(x)] >> 8) ? (d->smp[(x)] >> 8) : 1)
+#else
+#define IS_SMP(x)		0
+#define INTC_REG(d, x, c)	(d->reg[(x)])
+#define SMP_NR(d, x)		1
+#endif
+
+struct intc_handle_int {
+	unsigned int irq;
+	unsigned long handle;
+};
+
+struct intc_window {
+	phys_addr_t phys;
+	void __iomem *virt;
+	unsigned long size;
+};
+
+struct intc_map_entry {
+	intc_enum enum_id;
+	struct intc_desc_int *desc;
+};
+
+struct intc_subgroup_entry {
+	unsigned int pirq;
+	intc_enum enum_id;
+	unsigned long handle;
+};
+
+struct intc_desc_int {
+	struct list_head list;
+	struct sys_device sysdev;
+	struct radix_tree_root tree;
+	pm_message_t state;
+	raw_spinlock_t lock;
+	unsigned int index;
+	unsigned long *reg;
+#ifdef CONFIG_SMP
+	unsigned long *smp;
+#endif
+	unsigned int nr_reg;
+	struct intc_handle_int *prio;
+	unsigned int nr_prio;
+	struct intc_handle_int *sense;
+	unsigned int nr_sense;
+	struct intc_window *window;
+	unsigned int nr_windows;
+	struct irq_chip chip;
+};
+
+
+enum {
+	REG_FN_ERR = 0,
+	REG_FN_TEST_BASE = 1,
+	REG_FN_WRITE_BASE = 5,
+	REG_FN_MODIFY_BASE = 9
+};
+
+enum {	MODE_ENABLE_REG = 0, /* Bit(s) set -> interrupt enabled */
+	MODE_MASK_REG,       /* Bit(s) set -> interrupt disabled */
+	MODE_DUAL_REG,       /* Two registers, set bit to enable / disable */
+	MODE_PRIO_REG,       /* Priority value written to enable interrupt */
+	MODE_PCLR_REG,       /* Above plus all bits set to disable interrupt */
+};
+
+static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
+{
+	struct irq_chip *chip = get_irq_chip(irq);
+
+	return container_of(chip, struct intc_desc_int, chip);
+}
+
+/*
+ * Grumble.
+ */
+static inline void activate_irq(int irq)
+{
+#ifdef CONFIG_ARM
+	/* ARM requires an extra step to clear IRQ_NOREQUEST, which it
+	 * sets on behalf of every irq_chip.  Also sets IRQ_NOPROBE.
+	 */
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	/* same effect on other architectures */
+	set_irq_noprobe(irq);
+#endif
+}
+
+/* access.c */
+extern unsigned long
+(*intc_reg_fns[])(unsigned long addr, unsigned long h, unsigned long data);
+
+extern unsigned long
+(*intc_enable_fns[])(unsigned long addr, unsigned long handle,
+		     unsigned long (*fn)(unsigned long,
+				unsigned long, unsigned long),
+		     unsigned int irq);
+extern unsigned long
+(*intc_disable_fns[])(unsigned long addr, unsigned long handle,
+		      unsigned long (*fn)(unsigned long,
+				unsigned long, unsigned long),
+		      unsigned int irq);
+extern unsigned long
+(*intc_enable_noprio_fns[])(unsigned long addr, unsigned long handle,
+		            unsigned long (*fn)(unsigned long,
+				unsigned long, unsigned long),
+			    unsigned int irq);
+
+unsigned long intc_phys_to_virt(struct intc_desc_int *d, unsigned long address);
+unsigned int intc_get_reg(struct intc_desc_int *d, unsigned long address);
+unsigned int intc_set_field_from_handle(unsigned int value,
+			    unsigned int field_value,
+			    unsigned int handle);
+unsigned long intc_get_field_from_handle(unsigned int value,
+					 unsigned int handle);
+
+/* balancing.c */
+#ifdef CONFIG_INTC_BALANCING
+void intc_balancing_enable(unsigned int irq);
+void intc_balancing_disable(unsigned int irq);
+void intc_set_dist_handle(unsigned int irq, struct intc_desc *desc,
+			  struct intc_desc_int *d, intc_enum id);
+#else
+void intc_balancing_enable(unsigned int irq) { }
+void intc_balancing_disable(unsigned int irq) { }
+void intc_set_dist_handle(unsigned int irq, struct intc_desc *desc,
+			  struct intc_desc_int *d, intc_enum id) { }
+#endif
+
+/* chip.c */
+extern struct irq_chip intc_irq_chip;
+void _intc_enable(unsigned int irq, unsigned long handle);
+
+/* core.c */
+extern struct list_head intc_list;
+extern raw_spinlock_t intc_big_lock;
+extern unsigned int nr_intc_controllers;
+extern struct sysdev_class intc_sysdev_class;
+
+unsigned int intc_get_dfl_prio_level(void);
+unsigned int intc_get_prio_level(unsigned int irq);
+void intc_set_prio_level(unsigned int irq, unsigned int level);
+
+/* handle.c */
+unsigned int intc_get_mask_handle(struct intc_desc *desc,
+				  struct intc_desc_int *d,
+				  intc_enum enum_id, int do_grps);
+unsigned int intc_get_prio_handle(struct intc_desc *desc,
+				  struct intc_desc_int *d,
+				  intc_enum enum_id, int do_grps);
+unsigned int intc_get_sense_handle(struct intc_desc *desc,
+				   struct intc_desc_int *d,
+				   intc_enum enum_id);
+void intc_set_ack_handle(unsigned int irq, struct intc_desc *desc,
+			 struct intc_desc_int *d, intc_enum id);
+unsigned long intc_get_ack_handle(unsigned int irq);
+void intc_enable_disable_enum(struct intc_desc *desc, struct intc_desc_int *d,
+			      intc_enum enum_id, int enable);
+
+/* virq.c */
+void intc_subgroup_init(struct intc_desc *desc, struct intc_desc_int *d);
+void intc_irq_xlate_set(unsigned int irq, intc_enum id, struct intc_desc_int *d);
+struct intc_map_entry *intc_irq_xlate_get(unsigned int irq);
diff --git a/drivers/sh/intc/userimask.c b/drivers/sh/intc/userimask.c
new file mode 100644
index 000000000000..e32304b66cf1
--- /dev/null
+++ b/drivers/sh/intc/userimask.c
@@ -0,0 +1,83 @@
+/*
+ * Support for hardware-assisted userspace interrupt masking.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#define pr_fmt(fmt) "intc: " fmt
+
+#include <linux/errno.h>
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/sizes.h>
+#include "internals.h"
+
+static void __iomem *uimask;
+
+static ssize_t
+show_intc_userimask(struct sysdev_class *cls,
+		    struct sysdev_class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", (__raw_readl(uimask) >> 4) & 0xf);
+}
+
+static ssize_t
+store_intc_userimask(struct sysdev_class *cls,
+		     struct sysdev_class_attribute *attr,
+		     const char *buf, size_t count)
+{
+	unsigned long level;
+
+	level = simple_strtoul(buf, NULL, 10);
+
+	/*
+	 * Minimal acceptable IRQ levels are in the 2 - 16 range, but
+	 * these are chomped so as to not interfere with normal IRQs.
+	 *
+	 * Level 1 is a special case on some CPUs in that it's not
+	 * directly settable, but given that USERIMASK cuts off below a
+	 * certain level, we don't care about this limitation here.
+	 * Level 0 on the other hand equates to user masking disabled.
+	 *
+	 * We use the default priority level as a cut off so that only
+	 * special case opt-in IRQs can be mangled.
+	 */
+	if (level >= intc_get_dfl_prio_level())
+		return -EINVAL;
+
+	__raw_writel(0xa5 << 24 | level << 4, uimask);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(userimask, S_IRUSR | S_IWUSR,
+			 show_intc_userimask, store_intc_userimask);
+
+
+static int __init userimask_sysdev_init(void)
+{
+	if (unlikely(!uimask))
+		return -ENXIO;
+
+	return sysdev_class_create_file(&intc_sysdev_class, &attr_userimask);
+}
+late_initcall(userimask_sysdev_init);
+
+int register_intc_userimask(unsigned long addr)
+{
+	if (unlikely(uimask))
+		return -EBUSY;
+
+	uimask = ioremap_nocache(addr, SZ_4K);
+	if (unlikely(!uimask))
+		return -ENOMEM;
+
+	pr_info("userimask support registered for levels 0 -> %d\n",
+		intc_get_dfl_prio_level() - 1);
+
+	return 0;
+}
diff --git a/drivers/sh/intc/virq-debugfs.c b/drivers/sh/intc/virq-debugfs.c
new file mode 100644
index 000000000000..9e62ba9311f0
--- /dev/null
+++ b/drivers/sh/intc/virq-debugfs.c
@@ -0,0 +1,64 @@
+/*
+ * Support for virtual IRQ subgroups debugfs mapping.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * Modelled after arch/powerpc/kernel/irq.c.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include "internals.h"
+
+static int intc_irq_xlate_debug(struct seq_file *m, void *priv)
+{
+	int i;
+
+	seq_printf(m, "%-5s  %-7s  %-15s\n", "irq", "enum", "chip name");
+
+	for (i = 1; i < nr_irqs; i++) {
+		struct intc_map_entry *entry = intc_irq_xlate_get(i);
+		struct intc_desc_int *desc = entry->desc;
+
+		if (!desc)
+			continue;
+
+		seq_printf(m, "%5d  ", i);
+		seq_printf(m, "0x%05x  ", entry->enum_id);
+		seq_printf(m, "%-15s\n", desc->chip.name);
+	}
+
+	return 0;
+}
+
+static int intc_irq_xlate_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, intc_irq_xlate_debug, inode->i_private);
+}
+
+static const struct file_operations intc_irq_xlate_fops = {
+	.open = intc_irq_xlate_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init intc_irq_xlate_init(void)
+{
+	/*
+	 * XXX.. use arch_debugfs_dir here when all of the intc users are
+	 * converted.
+	 */
+	if (debugfs_create_file("intc_irq_xlate", S_IRUGO, NULL, NULL,
+				&intc_irq_xlate_fops) == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+fs_initcall(intc_irq_xlate_init);
diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
new file mode 100644
index 000000000000..643dfd4d2057
--- /dev/null
+++ b/drivers/sh/intc/virq.c
@@ -0,0 +1,255 @@
+/*
+ * Support for virtual IRQ subgroups.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#define pr_fmt(fmt) "intc: " fmt
+
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+#include "internals.h"
+
+static struct intc_map_entry intc_irq_xlate[NR_IRQS];
+
+struct intc_virq_list {
+	unsigned int irq;
+	struct intc_virq_list *next;
+};
+
+#define for_each_virq(entry, head) \
+	for (entry = head; entry; entry = entry->next)
+
+/*
+ * Tags for the radix tree
+ */
+#define INTC_TAG_VIRQ_NEEDS_ALLOC	0
+
+void intc_irq_xlate_set(unsigned int irq, intc_enum id, struct intc_desc_int *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&intc_big_lock, flags);
+	intc_irq_xlate[irq].enum_id = id;
+	intc_irq_xlate[irq].desc = d;
+	raw_spin_unlock_irqrestore(&intc_big_lock, flags);
+}
+
+struct intc_map_entry *intc_irq_xlate_get(unsigned int irq)
+{
+	return intc_irq_xlate + irq;
+}
+
+int intc_irq_lookup(const char *chipname, intc_enum enum_id)
+{
+	struct intc_map_entry *ptr;
+	struct intc_desc_int *d;
+	int irq = -1;
+
+	list_for_each_entry(d, &intc_list, list) {
+		int tagged;
+
+		if (strcmp(d->chip.name, chipname) != 0)
+			continue;
+
+		/*
+		 * Catch early lookups for subgroup VIRQs that have not
+		 * yet been allocated an IRQ. This already includes a
+		 * fast-path out if the tree is untagged, so there is no
+		 * need to explicitly test the root tree.
+		 */
+		tagged = radix_tree_tag_get(&d->tree, enum_id,
+					    INTC_TAG_VIRQ_NEEDS_ALLOC);
+		if (unlikely(tagged))
+			break;
+
+		ptr = radix_tree_lookup(&d->tree, enum_id);
+		if (ptr) {
+			irq = ptr - intc_irq_xlate;
+			break;
+		}
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(intc_irq_lookup);
+
+static int add_virq_to_pirq(unsigned int irq, unsigned int virq)
+{
+	struct intc_virq_list **last, *entry;
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	/* scan for duplicates */
+	last = (struct intc_virq_list **)&desc->handler_data;
+	for_each_virq(entry, desc->handler_data) {
+		if (entry->irq == virq)
+			return 0;
+		last = &entry->next;
+	}
+
+	entry = kzalloc(sizeof(struct intc_virq_list), GFP_ATOMIC);
+	if (!entry) {
+		pr_err("can't allocate VIRQ mapping for %d\n", virq);
+		return -ENOMEM;
+	}
+
+	entry->irq = virq;
+
+	*last = entry;
+
+	return 0;
+}
+
+static void intc_virq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct intc_virq_list *entry, *vlist = get_irq_data(irq);
+	struct intc_desc_int *d = get_intc_desc(irq);
+
+	desc->chip->mask_ack(irq);
+
+	for_each_virq(entry, vlist) {
+		unsigned long addr, handle;
+
+		handle = (unsigned long)get_irq_data(entry->irq);
+		addr = INTC_REG(d, _INTC_ADDR_E(handle), 0);
+
+		if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0))
+			generic_handle_irq(entry->irq);
+	}
+
+	desc->chip->unmask(irq);
+}
+
+static unsigned long __init intc_subgroup_data(struct intc_subgroup *subgroup,
+					       struct intc_desc_int *d,
+					       unsigned int index)
+{
+	unsigned int fn = REG_FN_TEST_BASE + (subgroup->reg_width >> 3) - 1;
+
+	return _INTC_MK(fn, MODE_ENABLE_REG, intc_get_reg(d, subgroup->reg),
+			0, 1, (subgroup->reg_width - 1) - index);
+}
+
+static void __init intc_subgroup_init_one(struct intc_desc *desc,
+					  struct intc_desc_int *d,
+					  struct intc_subgroup *subgroup)
+{
+	struct intc_map_entry *mapped;
+	unsigned int pirq;
+	unsigned long flags;
+	int i;
+
+	mapped = radix_tree_lookup(&d->tree, subgroup->parent_id);
+	if (!mapped) {
+		WARN_ON(1);
+		return;
+	}
+
+	pirq = mapped - intc_irq_xlate;
+
+	raw_spin_lock_irqsave(&d->lock, flags);
+
+	for (i = 0; i < ARRAY_SIZE(subgroup->enum_ids); i++) {
+		struct intc_subgroup_entry *entry;
+		int err;
+
+		if (!subgroup->enum_ids[i])
+			continue;
+
+		entry = kmalloc(sizeof(*entry), GFP_NOWAIT);
+		if (!entry)
+			break;
+
+		entry->pirq = pirq;
+		entry->enum_id = subgroup->enum_ids[i];
+		entry->handle = intc_subgroup_data(subgroup, d, i);
+
+		err = radix_tree_insert(&d->tree, entry->enum_id, entry);
+		if (unlikely(err < 0))
+			break;
+
+		radix_tree_tag_set(&d->tree, entry->enum_id,
+				   INTC_TAG_VIRQ_NEEDS_ALLOC);
+	}
+
+	raw_spin_unlock_irqrestore(&d->lock, flags);
+}
+
+void __init intc_subgroup_init(struct intc_desc *desc, struct intc_desc_int *d)
+{
+	int i;
+
+	if (!desc->hw.subgroups)
+		return;
+
+	for (i = 0; i < desc->hw.nr_subgroups; i++)
+		intc_subgroup_init_one(desc, d, desc->hw.subgroups + i);
+}
+
+static void __init intc_subgroup_map(struct intc_desc_int *d)
+{
+	struct intc_subgroup_entry *entries[32];
+	unsigned long flags;
+	unsigned int nr_found;
+	int i;
+
+	raw_spin_lock_irqsave(&d->lock, flags);
+
+restart:
+	nr_found = radix_tree_gang_lookup_tag_slot(&d->tree,
+			(void ***)entries, 0, ARRAY_SIZE(entries),
+			INTC_TAG_VIRQ_NEEDS_ALLOC);
+
+	for (i = 0; i < nr_found; i++) {
+		struct intc_subgroup_entry *entry;
+		int irq;
+
+		entry = radix_tree_deref_slot((void **)entries[i]);
+		if (unlikely(!entry))
+			continue;
+		if (unlikely(entry == RADIX_TREE_RETRY))
+			goto restart;
+
+		irq = create_irq();
+		if (unlikely(irq < 0)) {
+			pr_err("no more free IRQs, bailing..\n");
+			break;
+		}
+
+		pr_info("Setting up a chained VIRQ from %d -> %d\n",
+			irq, entry->pirq);
+
+		intc_irq_xlate_set(irq, entry->enum_id, d);
+
+		set_irq_chip_and_handler_name(irq, get_irq_chip(entry->pirq),
+					      handle_simple_irq, "virq");
+		set_irq_chip_data(irq, get_irq_chip_data(entry->pirq));
+
+		set_irq_data(irq, (void *)entry->handle);
+
+		set_irq_chained_handler(entry->pirq, intc_virq_handler);
+		add_virq_to_pirq(entry->pirq, irq);
+
+		radix_tree_tag_clear(&d->tree, entry->enum_id,
+				     INTC_TAG_VIRQ_NEEDS_ALLOC);
+		radix_tree_replace_slot((void **)entries[i],
+					&intc_irq_xlate[irq]);
+	}
+
+	raw_spin_unlock_irqrestore(&d->lock, flags);
+}
+
+void __init intc_finalize(void)
+{
+	struct intc_desc_int *d;
+
+	list_for_each_entry(d, &intc_list, list)
+		if (radix_tree_tagged(&d->tree, INTC_TAG_VIRQ_NEEDS_ALLOC))
+			intc_subgroup_map(d);
+}
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 1fc69701e0f8..b4f183a31f13 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -114,7 +114,7 @@ struct intc_desc symbol __initdata = {					\
 			   prio_regs, sense_regs, ack_regs),		\
 }
 
-int __init register_intc_controller(struct intc_desc *desc);
+int register_intc_controller(struct intc_desc *desc);
 void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 int intc_irq_lookup(const char *chipname, intc_enum enum_id);
-- 
cgit v1.2.3


From 2116b7a473bf1c8d26998b477c294e7fe294921f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 4 Oct 2010 22:55:57 +0200
Subject: smbfs: move to drivers/staging

smbfs has been scheduled for removal in 2.6.27, so
maybe we can now move it to drivers/staging on the
way out.

smbfs still uses the big kernel lock and nobody
is going to fix that, so we should be getting
rid of it soon.

This removes the 32 bit compat mount and ioctl
handling code, which is implemented in common fs
code, and moves all smbfs related files into
drivers/staging/smbfs.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/00-INDEX   |    2 -
 Documentation/filesystems/smbfs.txt  |    8 -
 Documentation/ioctl/ioctl-number.txt |    2 +-
 drivers/staging/Kconfig              |    2 +
 drivers/staging/Makefile             |    1 +
 drivers/staging/smbfs/Kconfig        |   55 +
 drivers/staging/smbfs/Makefile       |   18 +
 drivers/staging/smbfs/TODO           |    8 +
 drivers/staging/smbfs/cache.c        |  208 ++
 drivers/staging/smbfs/dir.c          |  702 +++++++
 drivers/staging/smbfs/file.c         |  453 +++++
 drivers/staging/smbfs/getopt.c       |   64 +
 drivers/staging/smbfs/getopt.h       |   14 +
 drivers/staging/smbfs/inode.c        |  839 ++++++++
 drivers/staging/smbfs/ioctl.c        |   68 +
 drivers/staging/smbfs/proc.c         | 3506 +++++++++++++++++++++++++++++++++
 drivers/staging/smbfs/proto.h        |   87 +
 drivers/staging/smbfs/request.c      |  817 ++++++++
 drivers/staging/smbfs/request.h      |   70 +
 drivers/staging/smbfs/smb.h          |  118 ++
 drivers/staging/smbfs/smb_debug.h    |   34 +
 drivers/staging/smbfs/smb_fs.h       |  153 ++
 drivers/staging/smbfs/smb_fs_i.h     |   37 +
 drivers/staging/smbfs/smb_fs_sb.h    |  100 +
 drivers/staging/smbfs/smb_mount.h    |   65 +
 drivers/staging/smbfs/smbfs.txt      |    8 +
 drivers/staging/smbfs/smbiod.c       |  343 ++++
 drivers/staging/smbfs/smbno.h        |  363 ++++
 drivers/staging/smbfs/sock.c         |  385 ++++
 drivers/staging/smbfs/symlink.c      |   67 +
 fs/Kconfig                           |    1 -
 fs/Makefile                          |    1 -
 fs/compat.c                          |   31 +-
 fs/compat_ioctl.c                    |   26 -
 fs/smbfs/Kconfig                     |   55 -
 fs/smbfs/Makefile                    |   18 -
 fs/smbfs/cache.c                     |  208 --
 fs/smbfs/dir.c                       |  702 -------
 fs/smbfs/file.c                      |  454 -----
 fs/smbfs/getopt.c                    |   64 -
 fs/smbfs/getopt.h                    |   14 -
 fs/smbfs/inode.c                     |  839 --------
 fs/smbfs/ioctl.c                     |   69 -
 fs/smbfs/proc.c                      | 3507 ----------------------------------
 fs/smbfs/proto.h                     |   87 -
 fs/smbfs/request.c                   |  818 --------
 fs/smbfs/request.h                   |   70 -
 fs/smbfs/smb_debug.h                 |   34 -
 fs/smbfs/smbiod.c                    |  344 ----
 fs/smbfs/sock.c                      |  386 ----
 fs/smbfs/symlink.c                   |   68 -
 include/linux/Kbuild                 |    4 -
 include/linux/smb.h                  |  118 --
 include/linux/smb_fs.h               |  153 --
 include/linux/smb_fs_i.h             |   37 -
 include/linux/smb_fs_sb.h            |  100 -
 include/linux/smb_mount.h            |   65 -
 include/linux/smbno.h                |  363 ----
 58 files changed, 8587 insertions(+), 8646 deletions(-)
 delete mode 100644 Documentation/filesystems/smbfs.txt
 create mode 100644 drivers/staging/smbfs/Kconfig
 create mode 100644 drivers/staging/smbfs/Makefile
 create mode 100644 drivers/staging/smbfs/TODO
 create mode 100644 drivers/staging/smbfs/cache.c
 create mode 100644 drivers/staging/smbfs/dir.c
 create mode 100644 drivers/staging/smbfs/file.c
 create mode 100644 drivers/staging/smbfs/getopt.c
 create mode 100644 drivers/staging/smbfs/getopt.h
 create mode 100644 drivers/staging/smbfs/inode.c
 create mode 100644 drivers/staging/smbfs/ioctl.c
 create mode 100644 drivers/staging/smbfs/proc.c
 create mode 100644 drivers/staging/smbfs/proto.h
 create mode 100644 drivers/staging/smbfs/request.c
 create mode 100644 drivers/staging/smbfs/request.h
 create mode 100644 drivers/staging/smbfs/smb.h
 create mode 100644 drivers/staging/smbfs/smb_debug.h
 create mode 100644 drivers/staging/smbfs/smb_fs.h
 create mode 100644 drivers/staging/smbfs/smb_fs_i.h
 create mode 100644 drivers/staging/smbfs/smb_fs_sb.h
 create mode 100644 drivers/staging/smbfs/smb_mount.h
 create mode 100644 drivers/staging/smbfs/smbfs.txt
 create mode 100644 drivers/staging/smbfs/smbiod.c
 create mode 100644 drivers/staging/smbfs/smbno.h
 create mode 100644 drivers/staging/smbfs/sock.c
 create mode 100644 drivers/staging/smbfs/symlink.c
 delete mode 100644 fs/smbfs/Kconfig
 delete mode 100644 fs/smbfs/Makefile
 delete mode 100644 fs/smbfs/cache.c
 delete mode 100644 fs/smbfs/dir.c
 delete mode 100644 fs/smbfs/file.c
 delete mode 100644 fs/smbfs/getopt.c
 delete mode 100644 fs/smbfs/getopt.h
 delete mode 100644 fs/smbfs/inode.c
 delete mode 100644 fs/smbfs/ioctl.c
 delete mode 100644 fs/smbfs/proc.c
 delete mode 100644 fs/smbfs/proto.h
 delete mode 100644 fs/smbfs/request.c
 delete mode 100644 fs/smbfs/request.h
 delete mode 100644 fs/smbfs/smb_debug.h
 delete mode 100644 fs/smbfs/smbiod.c
 delete mode 100644 fs/smbfs/sock.c
 delete mode 100644 fs/smbfs/symlink.c
 delete mode 100644 include/linux/smb.h
 delete mode 100644 include/linux/smb_fs.h
 delete mode 100644 include/linux/smb_fs_i.h
 delete mode 100644 include/linux/smb_fs_sb.h
 delete mode 100644 include/linux/smb_mount.h
 delete mode 100644 include/linux/smbno.h

(limited to 'include')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 4303614b5add..8c624a18f67d 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -96,8 +96,6 @@ seq_file.txt
 	- how to use the seq_file API
 sharedsubtree.txt
 	- a description of shared subtrees for namespaces.
-smbfs.txt
-	- info on using filesystems with the SMB protocol (Win 3.11 and NT).
 spufs.txt
 	- info and mount options for the SPU filesystem used on Cell.
 sysfs-pci.txt
diff --git a/Documentation/filesystems/smbfs.txt b/Documentation/filesystems/smbfs.txt
deleted file mode 100644
index 194fb0decd2c..000000000000
--- a/Documentation/filesystems/smbfs.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-Smbfs is a filesystem that implements the SMB protocol, which is the
-protocol used by Windows for Workgroups, Windows 95 and Windows NT.
-Smbfs was inspired by Samba, the program written by Andrew Tridgell
-that turns any Unix host into a file server for DOS or Windows clients.
-
-Smbfs is a SMB client, but uses parts of samba for its operation. For
-more info on samba, including documentation, please go to
-http://www.samba.org/ and then on to your nearest mirror.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 33223ff121d8..d15834a6e461 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -259,7 +259,7 @@ Code  Seq#(hex)	Include File		Comments
 't'	00-7F	linux/if_ppp.h
 't'	80-8F	linux/isdn_ppp.h
 't'	90	linux/toshiba.h
-'u'	00-1F	linux/smb_fs.h
+'u'	00-1F	linux/smb_fs.h		gone
 'v'	all	linux/videodev.h	conflict!
 'v'	00-1F	linux/ext2_fs.h		conflict!
 'v'	00-1F	linux/fs.h		conflict!
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 01503536e457..4a9190808b73 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -149,6 +149,8 @@ source "drivers/staging/msm/Kconfig"
 
 source "drivers/staging/lirc/Kconfig"
 
+source "drivers/staging/smbfs/Kconfig"
+
 source "drivers/staging/easycap/Kconfig"
 
 source "drivers/staging/solo6x10/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index de5c0e5a99ed..7cb02a8e9f77 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_AUTOFS_FS)		+= autofs/
 obj-$(CONFIG_IDE_PHISON)	+= phison/
 obj-$(CONFIG_LINE6_USB)		+= line6/
 obj-$(CONFIG_USB_SERIAL_QUATECH2)	+= serqt_usb2/
+obj-$(CONFIG_SMB_FS)		+= smbfs/
 obj-$(CONFIG_USB_SERIAL_QUATECH_USB2)	+= quatech_usb2/
 obj-$(CONFIG_OCTEON_ETHERNET)	+= octeon/
 obj-$(CONFIG_VT6655)		+= vt6655/
diff --git a/drivers/staging/smbfs/Kconfig b/drivers/staging/smbfs/Kconfig
new file mode 100644
index 000000000000..e668127c8b2e
--- /dev/null
+++ b/drivers/staging/smbfs/Kconfig
@@ -0,0 +1,55 @@
+config SMB_FS
+	tristate "SMB file system support (OBSOLETE, please use CIFS)"
+	depends on INET
+	select NLS
+	help
+	  SMB (Server Message Block) is the protocol Windows for Workgroups
+	  (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
+	  files and printers over local networks.  Saying Y here allows you to
+	  mount their file systems (often called "shares" in this context) and
+	  access them just like any other Unix directory.  Currently, this
+	  works only if the Windows machines use TCP/IP as the underlying
+	  transport protocol, and not NetBEUI.  For details, read
+	  <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
+	  available from <http://www.tldp.org/docs.html#howto>.
+
+	  Note: if you just want your box to act as an SMB *server* and make
+	  files and printing services available to Windows clients (which need
+	  to have a TCP/IP stack), you don't need to say Y here; you can use
+	  the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
+	  for that.
+
+	  General information about how to connect Linux, Windows machines and
+	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
+
+	  To compile the SMB support as a module, choose M here:
+	  the module will be called smbfs.  Most people say N, however.
+
+config SMB_NLS_DEFAULT
+	bool "Use a default NLS"
+	depends on SMB_FS
+	help
+	  Enabling this will make smbfs use nls translations by default. You
+	  need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
+	  settings and you need to give the default nls for the SMB server as
+	  CONFIG_SMB_NLS_REMOTE.
+
+	  The nls settings can be changed at mount time, if your smbmount
+	  supports that, using the codepage and iocharset parameters.
+
+	  smbmount from samba 2.2.0 or later supports this.
+
+config SMB_NLS_REMOTE
+	string "Default Remote NLS Option"
+	depends on SMB_NLS_DEFAULT
+	default "cp437"
+	help
+	  This setting allows you to specify a default value for which
+	  codepage the server uses. If this field is left blank no
+	  translations will be done by default. The local codepage/charset
+	  default to CONFIG_NLS_DEFAULT.
+
+	  The nls settings can be changed at mount time, if your smbmount
+	  supports that, using the codepage and iocharset parameters.
+
+	  smbmount from samba 2.2.0 or later supports this.
diff --git a/drivers/staging/smbfs/Makefile b/drivers/staging/smbfs/Makefile
new file mode 100644
index 000000000000..4faf8c4722c3
--- /dev/null
+++ b/drivers/staging/smbfs/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the linux smb-filesystem routines.
+#
+
+obj-$(CONFIG_SMB_FS) += smbfs.o
+
+smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
+		symlink.o smbiod.o request.o
+
+# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
+# SMBFS_PARANOIA should normally be enabled.
+
+EXTRA_CFLAGS += -DSMBFS_PARANOIA
+#EXTRA_CFLAGS += -DSMBFS_DEBUG
+#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
+#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
+#EXTRA_CFLAGS += -Werror
+
diff --git a/drivers/staging/smbfs/TODO b/drivers/staging/smbfs/TODO
new file mode 100644
index 000000000000..24f4d29d53ac
--- /dev/null
+++ b/drivers/staging/smbfs/TODO
@@ -0,0 +1,8 @@
+smbfs is on its way out of the kernel, it has been replaced
+by cifs several years ago.
+
+The smbfs code uses the big kernel lock which
+is getting deprecated.
+
+Users that find smbfs to work but not cifs should contact
+the CIFS developers on linux-cifs@vger.kernel.org.
diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c
new file mode 100644
index 000000000000..dbb98658148b
--- /dev/null
+++ b/drivers/staging/smbfs/cache.c
@@ -0,0 +1,208 @@
+/*
+ *  cache.c
+ *
+ * Copyright (C) 1997 by Bill Hawes
+ *
+ * Routines to support directory cacheing using the page cache.
+ * This cache code is almost directly taken from ncpfs.
+ *
+ * Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/net.h>
+
+#include <asm/page.h>
+
+#include "smb_fs.h"
+#include "smb_debug.h"
+#include "proto.h"
+
+/*
+ * Force the next attempt to use the cache to be a timeout.
+ * If we can't find the page that's fine, it will cause a refresh.
+ */
+void
+smb_invalid_dir_cache(struct inode * dir)
+{
+	struct smb_sb_info *server = server_from_inode(dir);
+	union  smb_dir_cache *cache = NULL;
+	struct page *page = NULL;
+
+	page = grab_cache_page(&dir->i_data, 0);
+	if (!page)
+		goto out;
+
+	if (!PageUptodate(page))
+		goto out_unlock;
+
+	cache = kmap(page);
+	cache->head.time = jiffies - SMB_MAX_AGE(server);
+
+	kunmap(page);
+	SetPageUptodate(page);
+out_unlock:
+	unlock_page(page);
+	page_cache_release(page);
+out:
+	return;
+}
+
+/*
+ * Mark all dentries for 'parent' as invalid, forcing them to be re-read
+ */
+void
+smb_invalidate_dircache_entries(struct dentry *parent)
+{
+	struct smb_sb_info *server = server_from_dentry(parent);
+	struct list_head *next;
+	struct dentry *dentry;
+
+	spin_lock(&dcache_lock);
+	next = parent->d_subdirs.next;
+	while (next != &parent->d_subdirs) {
+		dentry = list_entry(next, struct dentry, d_u.d_child);
+		dentry->d_fsdata = NULL;
+		smb_age_dentry(server, dentry);
+		next = next->next;
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
+ * dget, but require that fpos and parent matches what the dentry contains.
+ * dentry is not known to be a valid pointer at entry.
+ */
+struct dentry *
+smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
+{
+	struct dentry *dent = dentry;
+	struct list_head *next;
+
+	if (d_validate(dent, parent)) {
+		if (dent->d_name.len <= SMB_MAXNAMELEN &&
+		    (unsigned long)dent->d_fsdata == fpos) {
+			if (!dent->d_inode) {
+				dput(dent);
+				dent = NULL;
+			}
+			return dent;
+		}
+		dput(dent);
+	}
+
+	/* If a pointer is invalid, we search the dentry. */
+	spin_lock(&dcache_lock);
+	next = parent->d_subdirs.next;
+	while (next != &parent->d_subdirs) {
+		dent = list_entry(next, struct dentry, d_u.d_child);
+		if ((unsigned long)dent->d_fsdata == fpos) {
+			if (dent->d_inode)
+				dget_locked(dent);
+			else
+				dent = NULL;
+			goto out_unlock;
+		}
+		next = next->next;
+	}
+	dent = NULL;
+out_unlock:
+	spin_unlock(&dcache_lock);
+	return dent;
+}
+
+
+/*
+ * Create dentry/inode for this file and add it to the dircache.
+ */
+int
+smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+	       struct smb_cache_control *ctrl, struct qstr *qname,
+	       struct smb_fattr *entry)
+{
+	struct dentry *newdent, *dentry = filp->f_path.dentry;
+	struct inode *newino, *inode = dentry->d_inode;
+	struct smb_cache_control ctl = *ctrl;
+	int valid = 0;
+	int hashed = 0;
+	ino_t ino = 0;
+
+	qname->hash = full_name_hash(qname->name, qname->len);
+
+	if (dentry->d_op && dentry->d_op->d_hash)
+		if (dentry->d_op->d_hash(dentry, qname) != 0)
+			goto end_advance;
+
+	newdent = d_lookup(dentry, qname);
+
+	if (!newdent) {
+		newdent = d_alloc(dentry, qname);
+		if (!newdent)
+			goto end_advance;
+	} else {
+		hashed = 1;
+		memcpy((char *) newdent->d_name.name, qname->name,
+		       newdent->d_name.len);
+	}
+
+	if (!newdent->d_inode) {
+		smb_renew_times(newdent);
+		entry->f_ino = iunique(inode->i_sb, 2);
+		newino = smb_iget(inode->i_sb, entry);
+		if (newino) {
+			smb_new_dentry(newdent);
+			d_instantiate(newdent, newino);
+			if (!hashed)
+				d_rehash(newdent);
+		}
+	} else
+		smb_set_inode_attr(newdent->d_inode, entry);
+
+        if (newdent->d_inode) {
+		ino = newdent->d_inode->i_ino;
+		newdent->d_fsdata = (void *) ctl.fpos;
+		smb_new_dentry(newdent);
+	}
+
+	if (ctl.idx >= SMB_DIRCACHE_SIZE) {
+		if (ctl.page) {
+			kunmap(ctl.page);
+			SetPageUptodate(ctl.page);
+			unlock_page(ctl.page);
+			page_cache_release(ctl.page);
+		}
+		ctl.cache = NULL;
+		ctl.idx  -= SMB_DIRCACHE_SIZE;
+		ctl.ofs  += 1;
+		ctl.page  = grab_cache_page(&inode->i_data, ctl.ofs);
+		if (ctl.page)
+			ctl.cache = kmap(ctl.page);
+	}
+	if (ctl.cache) {
+		ctl.cache->dentry[ctl.idx] = newdent;
+		valid = 1;
+	}
+	dput(newdent);
+
+end_advance:
+	if (!valid)
+		ctl.valid = 0;
+	if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
+		if (!ino)
+			ino = find_inode_number(dentry, qname);
+		if (!ino)
+			ino = iunique(inode->i_sb, 2);
+		ctl.filled = filldir(dirent, qname->name, qname->len,
+				     filp->f_pos, ino, DT_UNKNOWN);
+		if (!ctl.filled)
+			filp->f_pos += 1;
+	}
+	ctl.fpos += 1;
+	ctl.idx  += 1;
+	*ctrl = ctl;
+	return (ctl.valid || !ctl.filled);
+}
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
new file mode 100644
index 000000000000..936b3bb2099c
--- /dev/null
+++ b/drivers/staging/smbfs/dir.c
@@ -0,0 +1,702 @@
+/*
+ *  dir.c
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/ctype.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+
+#include "smb_fs.h"
+#include "smb_mount.h"
+#include "smbno.h"
+
+#include "smb_debug.h"
+#include "proto.h"
+
+static int smb_readdir(struct file *, void *, filldir_t);
+static int smb_dir_open(struct inode *, struct file *);
+
+static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int smb_mkdir(struct inode *, struct dentry *, int);
+static int smb_rmdir(struct inode *, struct dentry *);
+static int smb_unlink(struct inode *, struct dentry *);
+static int smb_rename(struct inode *, struct dentry *,
+		      struct inode *, struct dentry *);
+static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
+static int smb_link(struct dentry *, struct inode *, struct dentry *);
+
+const struct file_operations smb_dir_operations =
+{
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= smb_readdir,
+	.unlocked_ioctl	= smb_ioctl,
+	.open		= smb_dir_open,
+};
+
+const struct inode_operations smb_dir_inode_operations =
+{
+	.create		= smb_create,
+	.lookup		= smb_lookup,
+	.unlink		= smb_unlink,
+	.mkdir		= smb_mkdir,
+	.rmdir		= smb_rmdir,
+	.rename		= smb_rename,
+	.getattr	= smb_getattr,
+	.setattr	= smb_notify_change,
+};
+
+const struct inode_operations smb_dir_inode_operations_unix =
+{
+	.create		= smb_create,
+	.lookup		= smb_lookup,
+	.unlink		= smb_unlink,
+	.mkdir		= smb_mkdir,
+	.rmdir		= smb_rmdir,
+	.rename		= smb_rename,
+	.getattr	= smb_getattr,
+	.setattr	= smb_notify_change,
+	.symlink	= smb_symlink,
+	.mknod		= smb_make_node,
+	.link		= smb_link,
+};
+
+/*
+ * Read a directory, using filldir to fill the dirent memory.
+ * smb_proc_readdir does the actual reading from the smb server.
+ *
+ * The cache code is almost directly taken from ncpfs
+ */
+static int 
+smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *dir = dentry->d_inode;
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	union  smb_dir_cache *cache = NULL;
+	struct smb_cache_control ctl;
+	struct page *page = NULL;
+	int result;
+
+	ctl.page  = NULL;
+	ctl.cache = NULL;
+
+	VERBOSE("reading %s/%s, f_pos=%d\n",
+		DENTRY_PATH(dentry),  (int) filp->f_pos);
+
+	result = 0;
+
+	lock_kernel();
+
+	switch ((unsigned int) filp->f_pos) {
+	case 0:
+		if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
+			goto out;
+		filp->f_pos = 1;
+		/* fallthrough */
+	case 1:
+		if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
+			goto out;
+		filp->f_pos = 2;
+	}
+
+	/*
+	 * Make sure our inode is up-to-date.
+	 */
+	result = smb_revalidate_inode(dentry);
+	if (result)
+		goto out;
+
+
+	page = grab_cache_page(&dir->i_data, 0);
+	if (!page)
+		goto read_really;
+
+	ctl.cache = cache = kmap(page);
+	ctl.head  = cache->head;
+
+	if (!PageUptodate(page) || !ctl.head.eof) {
+		VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
+			 DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
+		goto init_cache;
+	}
+
+	if (filp->f_pos == 2) {
+		if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
+			goto init_cache;
+
+		/*
+		 * N.B. ncpfs checks mtime of dentry too here, we don't.
+		 *   1. common smb servers do not update mtime on dir changes
+		 *   2. it requires an extra smb request
+		 *      (revalidate has the same timeout as ctl.head.time)
+		 *
+		 * Instead smbfs invalidates its own cache on local changes
+		 * and remote changes are not seen until timeout.
+		 */
+	}
+
+	if (filp->f_pos > ctl.head.end)
+		goto finished;
+
+	ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
+	ctl.ofs  = ctl.fpos / SMB_DIRCACHE_SIZE;
+	ctl.idx  = ctl.fpos % SMB_DIRCACHE_SIZE;
+
+	for (;;) {
+		if (ctl.ofs != 0) {
+			ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
+			if (!ctl.page)
+				goto invalid_cache;
+			ctl.cache = kmap(ctl.page);
+			if (!PageUptodate(ctl.page))
+				goto invalid_cache;
+		}
+		while (ctl.idx < SMB_DIRCACHE_SIZE) {
+			struct dentry *dent;
+			int res;
+
+			dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
+					     dentry, filp->f_pos);
+			if (!dent)
+				goto invalid_cache;
+
+			res = filldir(dirent, dent->d_name.name,
+				      dent->d_name.len, filp->f_pos,
+				      dent->d_inode->i_ino, DT_UNKNOWN);
+			dput(dent);
+			if (res)
+				goto finished;
+			filp->f_pos += 1;
+			ctl.idx += 1;
+			if (filp->f_pos > ctl.head.end)
+				goto finished;
+		}
+		if (ctl.page) {
+			kunmap(ctl.page);
+			SetPageUptodate(ctl.page);
+			unlock_page(ctl.page);
+			page_cache_release(ctl.page);
+			ctl.page = NULL;
+		}
+		ctl.idx  = 0;
+		ctl.ofs += 1;
+	}
+invalid_cache:
+	if (ctl.page) {
+		kunmap(ctl.page);
+		unlock_page(ctl.page);
+		page_cache_release(ctl.page);
+		ctl.page = NULL;
+	}
+	ctl.cache = cache;
+init_cache:
+	smb_invalidate_dircache_entries(dentry);
+	ctl.head.time = jiffies;
+	ctl.head.eof = 0;
+	ctl.fpos = 2;
+	ctl.ofs = 0;
+	ctl.idx = SMB_DIRCACHE_START;
+	ctl.filled = 0;
+	ctl.valid  = 1;
+read_really:
+	result = server->ops->readdir(filp, dirent, filldir, &ctl);
+	if (result == -ERESTARTSYS && page)
+		ClearPageUptodate(page);
+	if (ctl.idx == -1)
+		goto invalid_cache;	/* retry */
+	ctl.head.end = ctl.fpos - 1;
+	ctl.head.eof = ctl.valid;
+finished:
+	if (page) {
+		cache->head = ctl.head;
+		kunmap(page);
+		if (result != -ERESTARTSYS)
+			SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	if (ctl.page) {
+		kunmap(ctl.page);
+		SetPageUptodate(ctl.page);
+		unlock_page(ctl.page);
+		page_cache_release(ctl.page);
+	}
+out:
+	unlock_kernel();
+	return result;
+}
+
+static int
+smb_dir_open(struct inode *dir, struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct smb_sb_info *server;
+	int error = 0;
+
+	VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
+		file->f_path.dentry->d_name.name);
+
+	/*
+	 * Directory timestamps in the core protocol aren't updated
+	 * when a file is added, so we give them a very short TTL.
+	 */
+	lock_kernel();
+	server = server_from_dentry(dentry);
+	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
+		unsigned long age = jiffies - SMB_I(dir)->oldmtime;
+		if (age > 2*HZ)
+			smb_invalid_dir_cache(dir);
+	}
+
+	/*
+	 * Note: in order to allow the smbmount process to open the
+	 * mount point, we only revalidate if the connection is valid or
+	 * if the process is trying to access something other than the root.
+	 */
+	if (server->state == CONN_VALID || !IS_ROOT(dentry))
+		error = smb_revalidate_inode(dentry);
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * Dentry operations routines
+ */
+static int smb_lookup_validate(struct dentry *, struct nameidata *);
+static int smb_hash_dentry(struct dentry *, struct qstr *);
+static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int smb_delete_dentry(struct dentry *);
+
+static const struct dentry_operations smbfs_dentry_operations =
+{
+	.d_revalidate	= smb_lookup_validate,
+	.d_hash		= smb_hash_dentry,
+	.d_compare	= smb_compare_dentry,
+	.d_delete	= smb_delete_dentry,
+};
+
+static const struct dentry_operations smbfs_dentry_operations_case =
+{
+	.d_revalidate	= smb_lookup_validate,
+	.d_delete	= smb_delete_dentry,
+};
+
+
+/*
+ * This is the callback when the dcache has a lookup hit.
+ */
+static int
+smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	struct inode * inode = dentry->d_inode;
+	unsigned long age = jiffies - dentry->d_time;
+	int valid;
+
+	/*
+	 * The default validation is based on dentry age:
+	 * we believe in dentries for a few seconds.  (But each
+	 * successful server lookup renews the timestamp.)
+	 */
+	valid = (age <= SMB_MAX_AGE(server));
+#ifdef SMBFS_DEBUG_VERBOSE
+	if (!valid)
+		VERBOSE("%s/%s not valid, age=%lu\n", 
+			DENTRY_PATH(dentry), age);
+#endif
+
+	if (inode) {
+		lock_kernel();
+		if (is_bad_inode(inode)) {
+			PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
+			valid = 0;
+		} else if (!valid)
+			valid = (smb_revalidate_inode(dentry) == 0);
+		unlock_kernel();
+	} else {
+		/*
+		 * What should we do for negative dentries?
+		 */
+	}
+	return valid;
+}
+
+static int 
+smb_hash_dentry(struct dentry *dir, struct qstr *this)
+{
+	unsigned long hash;
+	int i;
+
+	hash = init_name_hash();
+	for (i=0; i < this->len ; i++)
+		hash = partial_name_hash(tolower(this->name[i]), hash);
+	this->hash = end_name_hash(hash);
+  
+	return 0;
+}
+
+static int
+smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
+{
+	int i, result = 1;
+
+	if (a->len != b->len)
+		goto out;
+	for (i=0; i < a->len; i++) {
+		if (tolower(a->name[i]) != tolower(b->name[i]))
+			goto out;
+	}
+	result = 0;
+out:
+	return result;
+}
+
+/*
+ * This is the callback from dput() when d_count is going to 0.
+ * We use this to unhash dentries with bad inodes.
+ */
+static int
+smb_delete_dentry(struct dentry * dentry)
+{
+	if (dentry->d_inode) {
+		if (is_bad_inode(dentry->d_inode)) {
+			PARANOIA("bad inode, unhashing %s/%s\n",
+				 DENTRY_PATH(dentry));
+			return 1;
+		}
+	} else {
+		/* N.B. Unhash negative dentries? */
+	}
+	return 0;
+}
+
+/*
+ * Initialize a new dentry
+ */
+void
+smb_new_dentry(struct dentry *dentry)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+
+	if (server->mnt->flags & SMB_MOUNT_CASE)
+		dentry->d_op = &smbfs_dentry_operations_case;
+	else
+		dentry->d_op = &smbfs_dentry_operations;
+	dentry->d_time = jiffies;
+}
+
+
+/*
+ * Whenever a lookup succeeds, we know the parent directories
+ * are all valid, so we want to update the dentry timestamps.
+ * N.B. Move this to dcache?
+ */
+void
+smb_renew_times(struct dentry * dentry)
+{
+	dget(dentry);
+	spin_lock(&dentry->d_lock);
+	for (;;) {
+		struct dentry *parent;
+
+		dentry->d_time = jiffies;
+		if (IS_ROOT(dentry))
+			break;
+		parent = dentry->d_parent;
+		dget(parent);
+		spin_unlock(&dentry->d_lock);
+		dput(dentry);
+		dentry = parent;
+		spin_lock(&dentry->d_lock);
+	}
+	spin_unlock(&dentry->d_lock);
+	dput(dentry);
+}
+
+static struct dentry *
+smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+	struct smb_fattr finfo;
+	struct inode *inode;
+	int error;
+	struct smb_sb_info *server;
+
+	error = -ENAMETOOLONG;
+	if (dentry->d_name.len > SMB_MAXNAMELEN)
+		goto out;
+
+	/* Do not allow lookup of names with backslashes in */
+	error = -EINVAL;
+	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
+		goto out;
+
+	lock_kernel();
+	error = smb_proc_getattr(dentry, &finfo);
+#ifdef SMBFS_PARANOIA
+	if (error && error != -ENOENT)
+		PARANOIA("find %s/%s failed, error=%d\n",
+			 DENTRY_PATH(dentry), error);
+#endif
+
+	inode = NULL;
+	if (error == -ENOENT)
+		goto add_entry;
+	if (!error) {
+		error = -EACCES;
+		finfo.f_ino = iunique(dentry->d_sb, 2);
+		inode = smb_iget(dir->i_sb, &finfo);
+		if (inode) {
+	add_entry:
+			server = server_from_dentry(dentry);
+			if (server->mnt->flags & SMB_MOUNT_CASE)
+				dentry->d_op = &smbfs_dentry_operations_case;
+			else
+				dentry->d_op = &smbfs_dentry_operations;
+
+			d_add(dentry, inode);
+			smb_renew_times(dentry);
+			error = 0;
+		}
+	}
+	unlock_kernel();
+out:
+	return ERR_PTR(error);
+}
+
+/*
+ * This code is common to all routines creating a new inode.
+ */
+static int
+smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	struct inode *inode;
+	int error;
+	struct smb_fattr fattr;
+
+	VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
+
+	error = smb_proc_getattr(dentry, &fattr);
+	if (error)
+		goto out_close;
+
+	smb_renew_times(dentry);
+	fattr.f_ino = iunique(dentry->d_sb, 2);
+	inode = smb_iget(dentry->d_sb, &fattr);
+	if (!inode)
+		goto out_no_inode;
+
+	if (have_id) {
+		struct smb_inode_info *ei = SMB_I(inode);
+		ei->fileid = fileid;
+		ei->access = SMB_O_RDWR;
+		ei->open = server->generation;
+	}
+	d_instantiate(dentry, inode);
+out:
+	return error;
+
+out_no_inode:
+	error = -EACCES;
+out_close:
+	if (have_id) {
+		PARANOIA("%s/%s failed, error=%d, closing %u\n",
+			 DENTRY_PATH(dentry), error, fileid);
+		smb_close_fileid(dentry, fileid);
+	}
+	goto out;
+}
+
+/* N.B. How should the mode argument be used? */
+static int
+smb_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	__u16 fileid;
+	int error;
+	struct iattr attr;
+
+	VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
+
+	lock_kernel();
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
+	if (!error) {
+		if (server->opt.capabilities & SMB_CAP_UNIX) {
+			/* Set attributes for new file */
+			attr.ia_valid = ATTR_MODE;
+			attr.ia_mode = mode;
+			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
+		}
+		error = smb_instantiate(dentry, fileid, 1);
+	} else {
+		PARANOIA("%s/%s failed, error=%d\n",
+			 DENTRY_PATH(dentry), error);
+	}
+	unlock_kernel();
+	return error;
+}
+
+/* N.B. How should the mode argument be used? */
+static int
+smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	int error;
+	struct iattr attr;
+
+	lock_kernel();
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_mkdir(dentry);
+	if (!error) {
+		if (server->opt.capabilities & SMB_CAP_UNIX) {
+			/* Set attributes for new directory */
+			attr.ia_valid = ATTR_MODE;
+			attr.ia_mode = mode;
+			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
+		}
+		error = smb_instantiate(dentry, 0, 0);
+	}
+	unlock_kernel();
+	return error;
+}
+
+static int
+smb_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	/*
+	 * Close the directory if it's open.
+	 */
+	lock_kernel();
+	smb_close(inode);
+
+	/*
+	 * Check that nobody else is using the directory..
+	 */
+	error = -EBUSY;
+	if (!d_unhashed(dentry))
+		goto out;
+
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_rmdir(dentry);
+
+out:
+	unlock_kernel();
+	return error;
+}
+
+static int
+smb_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	/*
+	 * Close the file if it's open.
+	 */
+	lock_kernel();
+	smb_close(dentry->d_inode);
+
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_unlink(dentry);
+	if (!error)
+		smb_renew_times(dentry);
+	unlock_kernel();
+	return error;
+}
+
+static int
+smb_rename(struct inode *old_dir, struct dentry *old_dentry,
+	   struct inode *new_dir, struct dentry *new_dentry)
+{
+	int error;
+
+	/*
+	 * Close any open files, and check whether to delete the
+	 * target before attempting the rename.
+	 */
+	lock_kernel();
+	if (old_dentry->d_inode)
+		smb_close(old_dentry->d_inode);
+	if (new_dentry->d_inode) {
+		smb_close(new_dentry->d_inode);
+		error = smb_proc_unlink(new_dentry);
+		if (error) {
+			VERBOSE("unlink %s/%s, error=%d\n",
+				DENTRY_PATH(new_dentry), error);
+			goto out;
+		}
+		/* FIXME */
+		d_delete(new_dentry);
+	}
+
+	smb_invalid_dir_cache(old_dir);
+	smb_invalid_dir_cache(new_dir);
+	error = smb_proc_mv(old_dentry, new_dentry);
+	if (!error) {
+		smb_renew_times(old_dentry);
+		smb_renew_times(new_dentry);
+	}
+out:
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * FIXME: samba servers won't let you create device nodes unless uid/gid
+ * matches the connection credentials (and we don't know which those are ...)
+ */
+static int
+smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	int error;
+	struct iattr attr;
+
+	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
+	attr.ia_mode = mode;
+	current_euid_egid(&attr.ia_uid, &attr.ia_gid);
+
+	if (!new_valid_dev(dev))
+		return -EINVAL;
+
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
+	if (!error) {
+		error = smb_instantiate(dentry, 0, 0);
+	}
+	return error;
+}
+
+/*
+ * dentry = existing file
+ * new_dentry = new file
+ */
+static int
+smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
+{
+	int error;
+
+	DEBUG1("smb_link old=%s/%s new=%s/%s\n",
+	       DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
+	smb_invalid_dir_cache(dir);
+	error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
+	if (!error) {
+		smb_renew_times(dentry);
+		error = smb_instantiate(new_dentry, 0, 0);
+	}
+	return error;
+}
diff --git a/drivers/staging/smbfs/file.c b/drivers/staging/smbfs/file.c
new file mode 100644
index 000000000000..5dcd19c60eb9
--- /dev/null
+++ b/drivers/staging/smbfs/file.c
@@ -0,0 +1,453 @@
+/*
+ *  file.c
+ *
+ *  Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/net.h>
+#include <linux/aio.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include "smbno.h"
+#include "smb_fs.h"
+#include "smb_debug.h"
+#include "proto.h"
+
+static int
+smb_fsync(struct file *file, int datasync)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	int result;
+
+	VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
+
+	/*
+	 * The VFS will writepage() all dirty pages for us, but we
+	 * should send a SMBflush to the server, letting it know that
+	 * we want things synchronized with actual storage.
+	 *
+	 * Note: this function requires all pages to have been written already
+	 *       (should be ok with writepage_sync)
+	 */
+	result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
+	return result;
+}
+
+/*
+ * Read a page synchronously.
+ */
+static int
+smb_readpage_sync(struct dentry *dentry, struct page *page)
+{
+	char *buffer = kmap(page);
+	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	unsigned int rsize = smb_get_rsize(server);
+	int count = PAGE_SIZE;
+	int result;
+
+	VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
+		DENTRY_PATH(dentry), count, offset, rsize);
+
+	result = smb_open(dentry, SMB_O_RDONLY);
+	if (result < 0)
+		goto io_error;
+
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
+		if (result < 0)
+			goto io_error;
+
+		count -= result;
+		offset += result;
+		buffer += result;
+		dentry->d_inode->i_atime =
+			current_fs_time(dentry->d_inode->i_sb);
+		if (result < rsize)
+			break;
+	} while (count);
+
+	memset(buffer, 0, count);
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	result = 0;
+
+io_error:
+	kunmap(page);
+	unlock_page(page);
+	return result;
+}
+
+/*
+ * We are called with the page locked and we unlock it when done.
+ */
+static int
+smb_readpage(struct file *file, struct page *page)
+{
+	int		error;
+	struct dentry  *dentry = file->f_path.dentry;
+
+	page_cache_get(page);
+	error = smb_readpage_sync(dentry, page);
+	page_cache_release(page);
+	return error;
+}
+
+/*
+ * Write a page synchronously.
+ * Offset is the data offset within the page.
+ */
+static int
+smb_writepage_sync(struct inode *inode, struct page *page,
+		   unsigned long pageoffset, unsigned int count)
+{
+	loff_t offset;
+	char *buffer = kmap(page) + pageoffset;
+	struct smb_sb_info *server = server_from_inode(inode);
+	unsigned int wsize = smb_get_wsize(server);
+	int ret = 0;
+
+	offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
+	VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
+		inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
+
+	do {
+		int write_ret;
+
+		if (count < wsize)
+			wsize = count;
+
+		write_ret = server->ops->write(inode, offset, wsize, buffer);
+		if (write_ret < 0) {
+			PARANOIA("failed write, wsize=%d, write_ret=%d\n",
+				 wsize, write_ret);
+			ret = write_ret;
+			break;
+		}
+		/* N.B. what if result < wsize?? */
+#ifdef SMBFS_PARANOIA
+		if (write_ret < wsize)
+			PARANOIA("short write, wsize=%d, write_ret=%d\n",
+				 wsize, write_ret);
+#endif
+		buffer += wsize;
+		offset += wsize;
+		count -= wsize;
+		/*
+		 * Update the inode now rather than waiting for a refresh.
+		 */
+		inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
+		SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
+		if (offset > inode->i_size)
+			inode->i_size = offset;
+	} while (count);
+
+	kunmap(page);
+	return ret;
+}
+
+/*
+ * Write a page to the server. This will be used for NFS swapping only
+ * (for now), and we currently do this synchronously only.
+ *
+ * We are called with the page locked and we unlock it when done.
+ */
+static int
+smb_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode;
+	unsigned long end_index;
+	unsigned offset = PAGE_CACHE_SIZE;
+	int err;
+
+	BUG_ON(!mapping);
+	inode = mapping->host;
+	BUG_ON(!inode);
+
+	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+
+	/* easy case */
+	if (page->index < end_index)
+		goto do_it;
+	/* things got complicated... */
+	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+	/* OK, are we completely out? */
+	if (page->index >= end_index+1 || !offset)
+		return 0; /* truncated - don't care */
+do_it:
+	page_cache_get(page);
+	err = smb_writepage_sync(inode, page, 0, offset);
+	SetPageUptodate(page);
+	unlock_page(page);
+	page_cache_release(page);
+	return err;
+}
+
+static int
+smb_updatepage(struct file *file, struct page *page, unsigned long offset,
+	       unsigned int count)
+{
+	struct dentry *dentry = file->f_path.dentry;
+
+	DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
+		((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
+
+	return smb_writepage_sync(dentry->d_inode, page, offset, count);
+}
+
+static ssize_t
+smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			unsigned long nr_segs, loff_t pos)
+{
+	struct file * file = iocb->ki_filp;
+	struct dentry * dentry = file->f_path.dentry;
+	ssize_t	status;
+
+	VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
+		(unsigned long) iocb->ki_left, (unsigned long) pos);
+
+	status = smb_revalidate_inode(dentry);
+	if (status) {
+		PARANOIA("%s/%s validation failed, error=%Zd\n",
+			 DENTRY_PATH(dentry), status);
+		goto out;
+	}
+
+	VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
+		(long)dentry->d_inode->i_size,
+		dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
+
+	status = generic_file_aio_read(iocb, iov, nr_segs, pos);
+out:
+	return status;
+}
+
+static int
+smb_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	struct dentry * dentry = file->f_path.dentry;
+	int	status;
+
+	VERBOSE("file %s/%s, address %lu - %lu\n",
+		DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
+
+	status = smb_revalidate_inode(dentry);
+	if (status) {
+		PARANOIA("%s/%s validation failed, error=%d\n",
+			 DENTRY_PATH(dentry), status);
+		goto out;
+	}
+	status = generic_file_mmap(file, vma);
+out:
+	return status;
+}
+
+static ssize_t
+smb_file_splice_read(struct file *file, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t count,
+		     unsigned int flags)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	ssize_t status;
+
+	VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
+		DENTRY_PATH(dentry), *ppos, count);
+
+	status = smb_revalidate_inode(dentry);
+	if (status) {
+		PARANOIA("%s/%s validation failed, error=%Zd\n",
+			 DENTRY_PATH(dentry), status);
+		goto out;
+	}
+	status = generic_file_splice_read(file, ppos, pipe, count, flags);
+out:
+	return status;
+}
+
+/*
+ * This does the "real" work of the write. The generic routine has
+ * allocated the page, locked it, done all the page alignment stuff
+ * calculations etc. Now we should just copy the data from user
+ * space and write it back to the real medium..
+ *
+ * If the writer ends up delaying the write, the writer needs to
+ * increment the page use counts until he is done with the page.
+ */
+static int smb_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
+{
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
+	if (!*pagep)
+		return -ENOMEM;
+	return 0;
+}
+
+static int smb_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	int status;
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+
+	lock_kernel();
+	status = smb_updatepage(file, page, offset, copied);
+	unlock_kernel();
+
+	if (!status) {
+		if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+			SetPageUptodate(page);
+		status = copied;
+	}
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	return status;
+}
+
+const struct address_space_operations smb_file_aops = {
+	.readpage = smb_readpage,
+	.writepage = smb_writepage,
+	.write_begin = smb_write_begin,
+	.write_end = smb_write_end,
+};
+
+/* 
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t pos)
+{
+	struct file * file = iocb->ki_filp;
+	struct dentry * dentry = file->f_path.dentry;
+	ssize_t	result;
+
+	VERBOSE("file %s/%s, count=%lu@%lu\n",
+		DENTRY_PATH(dentry),
+		(unsigned long) iocb->ki_left, (unsigned long) pos);
+
+	result = smb_revalidate_inode(dentry);
+	if (result) {
+		PARANOIA("%s/%s validation failed, error=%Zd\n",
+			 DENTRY_PATH(dentry), result);
+		goto out;
+	}
+
+	result = smb_open(dentry, SMB_O_WRONLY);
+	if (result)
+		goto out;
+
+	if (iocb->ki_left > 0) {
+		result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+		VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
+			(long) file->f_pos, (long) dentry->d_inode->i_size,
+			dentry->d_inode->i_mtime.tv_sec,
+			dentry->d_inode->i_atime.tv_sec);
+	}
+out:
+	return result;
+}
+
+static int
+smb_file_open(struct inode *inode, struct file * file)
+{
+	int result;
+	struct dentry *dentry = file->f_path.dentry;
+	int smb_mode = (file->f_mode & O_ACCMODE) - 1;
+
+	lock_kernel();
+	result = smb_open(dentry, smb_mode);
+	if (result)
+		goto out;
+	SMB_I(inode)->openers++;
+out:
+	unlock_kernel();
+	return result;
+}
+
+static int
+smb_file_release(struct inode *inode, struct file * file)
+{
+	lock_kernel();
+	if (!--SMB_I(inode)->openers) {
+		/* We must flush any dirty pages now as we won't be able to
+		   write anything after close. mmap can trigger this.
+		   "openers" should perhaps include mmap'ers ... */
+		filemap_write_and_wait(inode->i_mapping);
+		smb_close(inode);
+	}
+	unlock_kernel();
+	return 0;
+}
+
+/*
+ * Check whether the required access is compatible with
+ * an inode's permission. SMB doesn't recognize superuser
+ * privileges, so we need our own check for this.
+ */
+static int
+smb_file_permission(struct inode *inode, int mask)
+{
+	int mode = inode->i_mode;
+	int error = 0;
+
+	VERBOSE("mode=%x, mask=%x\n", mode, mask);
+
+	/* Look at user permissions */
+	mode >>= 6;
+	if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
+		error = -EACCES;
+	return error;
+}
+
+static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t ret;
+	lock_kernel();
+	ret = generic_file_llseek_unlocked(file, offset, origin);
+	unlock_kernel();
+	return ret;
+}
+
+const struct file_operations smb_file_operations =
+{
+	.llseek 	= smb_remote_llseek,
+	.read		= do_sync_read,
+	.aio_read	= smb_file_aio_read,
+	.write		= do_sync_write,
+	.aio_write	= smb_file_aio_write,
+	.unlocked_ioctl	= smb_ioctl,
+	.mmap		= smb_file_mmap,
+	.open		= smb_file_open,
+	.release	= smb_file_release,
+	.fsync		= smb_fsync,
+	.splice_read	= smb_file_splice_read,
+};
+
+const struct inode_operations smb_file_inode_operations =
+{
+	.permission	= smb_file_permission,
+	.getattr	= smb_getattr,
+	.setattr	= smb_notify_change,
+};
diff --git a/drivers/staging/smbfs/getopt.c b/drivers/staging/smbfs/getopt.c
new file mode 100644
index 000000000000..7ae0f5273ab1
--- /dev/null
+++ b/drivers/staging/smbfs/getopt.c
@@ -0,0 +1,64 @@
+/*
+ * getopt.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/net.h>
+
+#include "getopt.h"
+
+/**
+ *	smb_getopt - option parser
+ *	@caller: name of the caller, for error messages
+ *	@options: the options string
+ *	@opts: an array of &struct option entries controlling parser operations
+ *	@optopt: output; will contain the current option
+ *	@optarg: output; will contain the value (if one exists)
+ *	@flag: output; may be NULL; should point to a long for or'ing flags
+ *	@value: output; may be NULL; will be overwritten with the integer value
+ *		of the current argument.
+ *
+ *	Helper to parse options on the format used by mount ("a=b,c=d,e,f").
+ *	Returns opts->val if a matching entry in the 'opts' array is found,
+ *	0 when no more tokens are found, -1 if an error is encountered.
+ */
+int smb_getopt(char *caller, char **options, struct option *opts,
+	       char **optopt, char **optarg, unsigned long *flag,
+	       unsigned long *value)
+{
+	char *token;
+	char *val;
+	int i;
+
+	do {
+		if ((token = strsep(options, ",")) == NULL)
+			return 0;
+	} while (*token == '\0');
+	*optopt = token;
+
+	*optarg = NULL;
+	if ((val = strchr (token, '=')) != NULL) {
+		*val++ = 0;
+		if (value)
+			*value = simple_strtoul(val, NULL, 0);
+		*optarg = val;
+	}
+
+	for (i = 0; opts[i].name != NULL; i++) {
+		if (!strcmp(opts[i].name, token)) {
+			if (!opts[i].flag && (!val || !*val)) {
+				printk("%s: the %s option requires an argument\n",
+				       caller, token);
+				return -1;
+			}
+
+			if (flag && opts[i].flag)
+				*flag |= opts[i].flag;
+
+			return opts[i].val;
+		}
+	}
+	printk("%s: Unrecognized mount option %s\n", caller, token);
+	return -1;
+}
diff --git a/drivers/staging/smbfs/getopt.h b/drivers/staging/smbfs/getopt.h
new file mode 100644
index 000000000000..146219ac7c46
--- /dev/null
+++ b/drivers/staging/smbfs/getopt.h
@@ -0,0 +1,14 @@
+#ifndef _LINUX_GETOPT_H
+#define _LINUX_GETOPT_H
+
+struct option {
+	const char *name;
+	unsigned long flag;
+	int val;
+};
+
+extern int smb_getopt(char *caller, char **options, struct option *opts,
+		      char **optopt, char **optarg, unsigned long *flag,
+		      unsigned long *value);
+
+#endif /* _LINUX_GETOPT_H */
diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c
new file mode 100644
index 000000000000..9287599ddb50
--- /dev/null
+++ b/drivers/staging/smbfs/inode.c
@@ -0,0 +1,839 @@
+/*
+ *  inode.c
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include <linux/smp_lock.h>
+#include <linux/nls.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/net.h>
+#include <linux/vfs.h>
+#include <linux/highuid.h>
+#include <linux/sched.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "smb_fs.h"
+#include "smbno.h"
+#include "smb_mount.h"
+#include "smb_debug.h"
+#include "getopt.h"
+#include "proto.h"
+
+/* Always pick a default string */
+#ifdef CONFIG_SMB_NLS_REMOTE
+#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
+#else
+#define SMB_NLS_REMOTE ""
+#endif
+
+#define SMB_TTL_DEFAULT 1000
+
+static void smb_evict_inode(struct inode *);
+static void smb_put_super(struct super_block *);
+static int  smb_statfs(struct dentry *, struct kstatfs *);
+static int  smb_show_options(struct seq_file *, struct vfsmount *);
+
+static struct kmem_cache *smb_inode_cachep;
+
+static struct inode *smb_alloc_inode(struct super_block *sb)
+{
+	struct smb_inode_info *ei;
+	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+	return &ei->vfs_inode;
+}
+
+static void smb_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
+}
+
+static void init_once(void *foo)
+{
+	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static int init_inodecache(void)
+{
+	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
+					     sizeof(struct smb_inode_info),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once);
+	if (smb_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(smb_inode_cachep);
+}
+
+static int smb_remount(struct super_block *sb, int *flags, char *data)
+{
+	*flags |= MS_NODIRATIME;
+	return 0;
+}
+
+static const struct super_operations smb_sops =
+{
+	.alloc_inode	= smb_alloc_inode,
+	.destroy_inode	= smb_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+	.evict_inode	= smb_evict_inode,
+	.put_super	= smb_put_super,
+	.statfs		= smb_statfs,
+	.show_options	= smb_show_options,
+	.remount_fs	= smb_remount,
+};
+
+
+/* We are always generating a new inode here */
+struct inode *
+smb_iget(struct super_block *sb, struct smb_fattr *fattr)
+{
+	struct smb_sb_info *server = SMB_SB(sb);
+	struct inode *result;
+
+	DEBUG1("smb_iget: %p\n", fattr);
+
+	result = new_inode(sb);
+	if (!result)
+		return result;
+	result->i_ino = fattr->f_ino;
+	SMB_I(result)->open = 0;
+	SMB_I(result)->fileid = 0;
+	SMB_I(result)->access = 0;
+	SMB_I(result)->flags = 0;
+	SMB_I(result)->closed = 0;
+	SMB_I(result)->openers = 0;
+	smb_set_inode_attr(result, fattr);
+	if (S_ISREG(result->i_mode)) {
+		result->i_op = &smb_file_inode_operations;
+		result->i_fop = &smb_file_operations;
+		result->i_data.a_ops = &smb_file_aops;
+	} else if (S_ISDIR(result->i_mode)) {
+		if (server->opt.capabilities & SMB_CAP_UNIX)
+			result->i_op = &smb_dir_inode_operations_unix;
+		else
+			result->i_op = &smb_dir_inode_operations;
+		result->i_fop = &smb_dir_operations;
+	} else if (S_ISLNK(result->i_mode)) {
+		result->i_op = &smb_link_inode_operations;
+	} else {
+		init_special_inode(result, result->i_mode, fattr->f_rdev);
+	}
+	insert_inode_hash(result);
+	return result;
+}
+
+/*
+ * Copy the inode data to a smb_fattr structure.
+ */
+void
+smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
+{
+	memset(fattr, 0, sizeof(struct smb_fattr));
+	fattr->f_mode	= inode->i_mode;
+	fattr->f_nlink	= inode->i_nlink;
+	fattr->f_ino	= inode->i_ino;
+	fattr->f_uid	= inode->i_uid;
+	fattr->f_gid	= inode->i_gid;
+	fattr->f_size	= inode->i_size;
+	fattr->f_mtime	= inode->i_mtime;
+	fattr->f_ctime	= inode->i_ctime;
+	fattr->f_atime	= inode->i_atime;
+	fattr->f_blocks	= inode->i_blocks;
+
+	fattr->attr	= SMB_I(inode)->attr;
+	/*
+	 * Keep the attributes in sync with the inode permissions.
+	 */
+	if (fattr->f_mode & S_IWUSR)
+		fattr->attr &= ~aRONLY;
+	else
+		fattr->attr |= aRONLY;
+}
+
+/*
+ * Update the inode, possibly causing it to invalidate its pages if mtime/size
+ * is different from last time.
+ */
+void
+smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
+{
+	struct smb_inode_info *ei = SMB_I(inode);
+
+	/*
+	 * A size change should have a different mtime, or same mtime
+	 * but different size.
+	 */
+	time_t last_time = inode->i_mtime.tv_sec;
+	loff_t last_sz = inode->i_size;
+
+	inode->i_mode	= fattr->f_mode;
+	inode->i_nlink	= fattr->f_nlink;
+	inode->i_uid	= fattr->f_uid;
+	inode->i_gid	= fattr->f_gid;
+	inode->i_ctime	= fattr->f_ctime;
+	inode->i_blocks = fattr->f_blocks;
+	inode->i_size	= fattr->f_size;
+	inode->i_mtime	= fattr->f_mtime;
+	inode->i_atime	= fattr->f_atime;
+	ei->attr = fattr->attr;
+
+	/*
+	 * Update the "last time refreshed" field for revalidation.
+	 */
+	ei->oldmtime = jiffies;
+
+	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
+		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
+			inode->i_ino,
+			(long) last_time, (long) inode->i_mtime.tv_sec,
+			(long) last_sz, (long) inode->i_size);
+
+		if (!S_ISDIR(inode->i_mode))
+			invalidate_remote_inode(inode);
+	}
+}
+
+/*
+ * This is called if the connection has gone bad ...
+ * try to kill off all the current inodes.
+ */
+void
+smb_invalidate_inodes(struct smb_sb_info *server)
+{
+	VERBOSE("\n");
+	shrink_dcache_sb(SB_of(server));
+	invalidate_inodes(SB_of(server));
+}
+
+/*
+ * This is called to update the inode attributes after
+ * we've made changes to a file or directory.
+ */
+static int
+smb_refresh_inode(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+	struct smb_fattr fattr;
+
+	error = smb_proc_getattr(dentry, &fattr);
+	if (!error) {
+		smb_renew_times(dentry);
+		/*
+		 * Check whether the type part of the mode changed,
+		 * and don't update the attributes if it did.
+		 *
+		 * And don't dick with the root inode
+		 */
+		if (inode->i_ino == 2)
+			return error;
+		if (S_ISLNK(inode->i_mode))
+			return error;	/* VFS will deal with it */
+
+		if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
+			smb_set_inode_attr(inode, &fattr);
+		} else {
+			/*
+			 * Big trouble! The inode has become a new object,
+			 * so any operations attempted on it are invalid.
+			 *
+			 * To limit damage, mark the inode as bad so that
+			 * subsequent lookup validations will fail.
+			 */
+			PARANOIA("%s/%s changed mode, %07o to %07o\n",
+				 DENTRY_PATH(dentry),
+				 inode->i_mode, fattr.f_mode);
+
+			fattr.f_mode = inode->i_mode; /* save mode */
+			make_bad_inode(inode);
+			inode->i_mode = fattr.f_mode; /* restore mode */
+			/*
+			 * No need to worry about unhashing the dentry: the
+			 * lookup validation will see that the inode is bad.
+			 * But we do want to invalidate the caches ...
+			 */
+			if (!S_ISDIR(inode->i_mode))
+				invalidate_remote_inode(inode);
+			else
+				smb_invalid_dir_cache(inode);
+			error = -EIO;
+		}
+	}
+	return error;
+}
+
+/*
+ * This is called when we want to check whether the inode
+ * has changed on the server.  If it has changed, we must
+ * invalidate our local caches.
+ */
+int
+smb_revalidate_inode(struct dentry *dentry)
+{
+	struct smb_sb_info *s = server_from_dentry(dentry);
+	struct inode *inode = dentry->d_inode;
+	int error = 0;
+
+	DEBUG1("smb_revalidate_inode\n");
+	lock_kernel();
+
+	/*
+	 * Check whether we've recently refreshed the inode.
+	 */
+	if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
+		VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
+			inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
+		goto out;
+	}
+
+	error = smb_refresh_inode(dentry);
+out:
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * This routine is called when i_nlink == 0 and i_count goes to 0.
+ * All blocking cleanup operations need to go here to avoid races.
+ */
+static void
+smb_evict_inode(struct inode *ino)
+{
+	DEBUG1("ino=%ld\n", ino->i_ino);
+	truncate_inode_pages(&ino->i_data, 0);
+	end_writeback(ino);
+	lock_kernel();
+	if (smb_close(ino))
+		PARANOIA("could not close inode %ld\n", ino->i_ino);
+	unlock_kernel();
+}
+
+static struct option opts[] = {
+	{ "version",	0, 'v' },
+	{ "win95",	SMB_MOUNT_WIN95, 1 },
+	{ "oldattr",	SMB_MOUNT_OLDATTR, 1 },
+	{ "dirattr",	SMB_MOUNT_DIRATTR, 1 },
+	{ "case",	SMB_MOUNT_CASE, 1 },
+	{ "uid",	0, 'u' },
+	{ "gid",	0, 'g' },
+	{ "file_mode",	0, 'f' },
+	{ "dir_mode",	0, 'd' },
+	{ "iocharset",	0, 'i' },
+	{ "codepage",	0, 'c' },
+	{ "ttl",	0, 't' },
+	{ NULL,		0, 0}
+};
+
+static int
+parse_options(struct smb_mount_data_kernel *mnt, char *options)
+{
+	int c;
+	unsigned long flags;
+	unsigned long value;
+	char *optarg;
+	char *optopt;
+
+	flags = 0;
+	while ( (c = smb_getopt("smbfs", &options, opts,
+				&optopt, &optarg, &flags, &value)) > 0) {
+
+		VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
+		switch (c) {
+		case 1:
+			/* got a "flag" option */
+			break;
+		case 'v':
+			if (value != SMB_MOUNT_VERSION) {
+			printk ("smbfs: Bad mount version %ld, expected %d\n",
+				value, SMB_MOUNT_VERSION);
+				return 0;
+			}
+			mnt->version = value;
+			break;
+		case 'u':
+			mnt->uid = value;
+			flags |= SMB_MOUNT_UID;
+			break;
+		case 'g':
+			mnt->gid = value;
+			flags |= SMB_MOUNT_GID;
+			break;
+		case 'f':
+			mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
+			flags |= SMB_MOUNT_FMODE;
+			break;
+		case 'd':
+			mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
+			flags |= SMB_MOUNT_DMODE;
+			break;
+		case 'i':
+			strlcpy(mnt->codepage.local_name, optarg, 
+				SMB_NLS_MAXNAMELEN);
+			break;
+		case 'c':
+			strlcpy(mnt->codepage.remote_name, optarg,
+				SMB_NLS_MAXNAMELEN);
+			break;
+		case 't':
+			mnt->ttl = value;
+			break;
+		default:
+			printk ("smbfs: Unrecognized mount option %s\n",
+				optopt);
+			return -1;
+		}
+	}
+	mnt->flags = flags;
+	return c;
+}
+
+/*
+ * smb_show_options() is for displaying mount options in /proc/mounts.
+ * It tries to avoid showing settings that were not changed from their
+ * defaults.
+ */
+static int
+smb_show_options(struct seq_file *s, struct vfsmount *m)
+{
+	struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
+	int i;
+
+	for (i = 0; opts[i].name != NULL; i++)
+		if (mnt->flags & opts[i].flag)
+			seq_printf(s, ",%s", opts[i].name);
+
+	if (mnt->flags & SMB_MOUNT_UID)
+		seq_printf(s, ",uid=%d", mnt->uid);
+	if (mnt->flags & SMB_MOUNT_GID)
+		seq_printf(s, ",gid=%d", mnt->gid);
+	if (mnt->mounted_uid != 0)
+		seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
+
+	/* 
+	 * Defaults for file_mode and dir_mode are unknown to us; they
+	 * depend on the current umask of the user doing the mount.
+	 */
+	if (mnt->flags & SMB_MOUNT_FMODE)
+		seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
+	if (mnt->flags & SMB_MOUNT_DMODE)
+		seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
+
+	if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
+		seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
+	if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
+		seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
+
+	if (mnt->ttl != SMB_TTL_DEFAULT)
+		seq_printf(s, ",ttl=%d", mnt->ttl);
+
+	return 0;
+}
+
+static void
+smb_unload_nls(struct smb_sb_info *server)
+{
+	unload_nls(server->remote_nls);
+	unload_nls(server->local_nls);
+}
+
+static void
+smb_put_super(struct super_block *sb)
+{
+	struct smb_sb_info *server = SMB_SB(sb);
+
+	lock_kernel();
+
+	smb_lock_server(server);
+	server->state = CONN_INVALID;
+	smbiod_unregister_server(server);
+
+	smb_close_socket(server);
+
+	if (server->conn_pid)
+		kill_pid(server->conn_pid, SIGTERM, 1);
+
+	bdi_destroy(&server->bdi);
+	kfree(server->ops);
+	smb_unload_nls(server);
+	sb->s_fs_info = NULL;
+	smb_unlock_server(server);
+	put_pid(server->conn_pid);
+	kfree(server);
+
+	unlock_kernel();
+}
+
+static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
+{
+	struct smb_sb_info *server;
+	struct smb_mount_data_kernel *mnt;
+	struct smb_mount_data *oldmnt;
+	struct inode *root_inode;
+	struct smb_fattr root;
+	int ver;
+	void *mem;
+	static int warn_count;
+
+	if (warn_count < 5) {
+		warn_count++;
+		printk(KERN_EMERG "smbfs is deprecated and will be removed"
+			" from the 2.6.37 kernel. Please migrate to cifs\n");
+	}
+
+	if (!raw_data)
+		goto out_no_data;
+
+	oldmnt = (struct smb_mount_data *) raw_data;
+	ver = oldmnt->version;
+	if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
+		goto out_wrong_data;
+
+	sb->s_flags |= MS_NODIRATIME;
+	sb->s_blocksize = 1024;	/* Eh...  Is this correct? */
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = SMB_SUPER_MAGIC;
+	sb->s_op = &smb_sops;
+	sb->s_time_gran = 100;
+
+	server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
+	if (!server)
+		goto out_no_server;
+	sb->s_fs_info = server;
+	
+	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
+		goto out_bdi;
+
+	sb->s_bdi = &server->bdi;
+
+	server->super_block = sb;
+	server->mnt = NULL;
+	server->sock_file = NULL;
+	init_waitqueue_head(&server->conn_wq);
+	init_MUTEX(&server->sem);
+	INIT_LIST_HEAD(&server->entry);
+	INIT_LIST_HEAD(&server->xmitq);
+	INIT_LIST_HEAD(&server->recvq);
+	server->conn_error = 0;
+	server->conn_pid = NULL;
+	server->state = CONN_INVALID; /* no connection yet */
+	server->generation = 0;
+
+	/* Allocate the global temp buffer and some superblock helper structs */
+	/* FIXME: move these to the smb_sb_info struct */
+	VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
+		sizeof(struct smb_mount_data_kernel));
+	mem = kmalloc(sizeof(struct smb_ops) +
+		      sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
+	if (!mem)
+		goto out_no_mem;
+
+	server->ops = mem;
+	smb_install_null_ops(server->ops);
+	server->mnt = mem + sizeof(struct smb_ops);
+
+	/* Setup NLS stuff */
+	server->remote_nls = NULL;
+	server->local_nls = NULL;
+
+	mnt = server->mnt;
+
+	memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
+	strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
+		SMB_NLS_MAXNAMELEN);
+	strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
+		SMB_NLS_MAXNAMELEN);
+
+	mnt->ttl = SMB_TTL_DEFAULT;
+	if (ver == SMB_MOUNT_OLDVERSION) {
+		mnt->version = oldmnt->version;
+
+		SET_UID(mnt->uid, oldmnt->uid);
+		SET_GID(mnt->gid, oldmnt->gid);
+
+		mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
+		mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
+
+		mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
+			SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
+	} else {
+		mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
+				S_IROTH | S_IXOTH | S_IFREG;
+		mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
+				S_IROTH | S_IXOTH | S_IFDIR;
+		if (parse_options(mnt, raw_data))
+			goto out_bad_option;
+	}
+	mnt->mounted_uid = current_uid();
+	smb_setcodepage(server, &mnt->codepage);
+
+	/*
+	 * Display the enabled options
+	 * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
+	 */
+	if (mnt->flags & SMB_MOUNT_OLDATTR)
+		printk("SMBFS: Using core getattr (Win 95 speedup)\n");
+	else if (mnt->flags & SMB_MOUNT_DIRATTR)
+		printk("SMBFS: Using dir ff getattr\n");
+
+	if (smbiod_register_server(server) < 0) {
+		printk(KERN_ERR "smbfs: failed to start smbiod\n");
+		goto out_no_smbiod;
+	}
+
+	/*
+	 * Keep the super block locked while we get the root inode.
+	 */
+	smb_init_root_dirent(server, &root, sb);
+	root_inode = smb_iget(sb, &root);
+	if (!root_inode)
+		goto out_no_root;
+
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root)
+		goto out_no_root;
+
+	smb_new_dentry(sb->s_root);
+
+	return 0;
+
+out_no_root:
+	iput(root_inode);
+out_no_smbiod:
+	smb_unload_nls(server);
+out_bad_option:
+	kfree(mem);
+out_no_mem:
+	bdi_destroy(&server->bdi);
+out_bdi:
+	if (!server->mnt)
+		printk(KERN_ERR "smb_fill_super: allocation failure\n");
+	sb->s_fs_info = NULL;
+	kfree(server);
+	goto out_fail;
+out_wrong_data:
+	printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
+	goto out_fail;
+out_no_data:
+	printk(KERN_ERR "smb_fill_super: missing data argument\n");
+out_fail:
+	return -EINVAL;
+out_no_server:
+	printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
+	return -ENOMEM;
+}
+
+static int
+smb_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int result;
+	
+	lock_kernel();
+
+	result = smb_proc_dskattr(dentry, buf);
+
+	unlock_kernel();
+
+	buf->f_type = SMB_SUPER_MAGIC;
+	buf->f_namelen = SMB_MAXPATHLEN;
+	return result;
+}
+
+int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	int err = smb_revalidate_inode(dentry);
+	if (!err)
+		generic_fillattr(dentry->d_inode, stat);
+	return err;
+}
+
+int
+smb_notify_change(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
+	int error, changed, refresh = 0;
+	struct smb_fattr fattr;
+
+	lock_kernel();
+
+	error = smb_revalidate_inode(dentry);
+	if (error)
+		goto out;
+
+	if ((error = inode_change_ok(inode, attr)) < 0)
+		goto out;
+
+	error = -EPERM;
+	if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
+		goto out;
+
+	if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
+		goto out;
+
+	if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
+		goto out;
+
+	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
+			DENTRY_PATH(dentry),
+			(long) inode->i_size, (long) attr->ia_size);
+
+		filemap_write_and_wait(inode->i_mapping);
+
+		error = smb_open(dentry, O_WRONLY);
+		if (error)
+			goto out;
+		error = server->ops->truncate(inode, attr->ia_size);
+		if (error)
+			goto out;
+		truncate_setsize(inode, attr->ia_size);
+		refresh = 1;
+	}
+
+	if (server->opt.capabilities & SMB_CAP_UNIX) {
+		/* For now we don't want to set the size with setattr_unix */
+		attr->ia_valid &= ~ATTR_SIZE;
+		/* FIXME: only call if we actually want to set something? */
+		error = smb_proc_setattr_unix(dentry, attr, 0, 0);
+		if (!error)
+			refresh = 1;
+
+		goto out;
+	}
+
+	/*
+	 * Initialize the fattr and check for changed fields.
+	 * Note: CTIME under SMB is creation time rather than
+	 * change time, so we don't attempt to change it.
+	 */
+	smb_get_inode_attr(inode, &fattr);
+
+	changed = 0;
+	if ((attr->ia_valid & ATTR_MTIME) != 0) {
+		fattr.f_mtime = attr->ia_mtime;
+		changed = 1;
+	}
+	if ((attr->ia_valid & ATTR_ATIME) != 0) {
+		fattr.f_atime = attr->ia_atime;
+		/* Earlier protocols don't have an access time */
+		if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
+			changed = 1;
+	}
+	if (changed) {
+		error = smb_proc_settime(dentry, &fattr);
+		if (error)
+			goto out;
+		refresh = 1;
+	}
+
+	/*
+	 * Check for mode changes ... we're extremely limited in
+	 * what can be set for SMB servers: just the read-only bit.
+	 */
+	if ((attr->ia_valid & ATTR_MODE) != 0) {
+		VERBOSE("%s/%s mode change, old=%x, new=%x\n",
+			DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
+		changed = 0;
+		if (attr->ia_mode & S_IWUSR) {
+			if (fattr.attr & aRONLY) {
+				fattr.attr &= ~aRONLY;
+				changed = 1;
+			}
+		} else {
+			if (!(fattr.attr & aRONLY)) {
+				fattr.attr |= aRONLY;
+				changed = 1;
+			}
+		}
+		if (changed) {
+			error = smb_proc_setattr(dentry, &fattr);
+			if (error)
+				goto out;
+			refresh = 1;
+		}
+	}
+	error = 0;
+
+out:
+	if (refresh)
+		smb_refresh_inode(dentry);
+	unlock_kernel();
+	return error;
+}
+
+static int smb_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
+}
+
+static struct file_system_type smb_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "smbfs",
+	.get_sb		= smb_get_sb,
+	.kill_sb	= kill_anon_super,
+	.fs_flags	= FS_BINARY_MOUNTDATA,
+};
+
+static int __init init_smb_fs(void)
+{
+	int err;
+	DEBUG1("registering ...\n");
+
+	err = init_inodecache();
+	if (err)
+		goto out_inode;
+	err = smb_init_request_cache();
+	if (err)
+		goto out_request;
+	err = register_filesystem(&smb_fs_type);
+	if (err)
+		goto out;
+	return 0;
+out:
+	smb_destroy_request_cache();
+out_request:
+	destroy_inodecache();
+out_inode:
+	return err;
+}
+
+static void __exit exit_smb_fs(void)
+{
+	DEBUG1("unregistering ...\n");
+	unregister_filesystem(&smb_fs_type);
+	smb_destroy_request_cache();
+	destroy_inodecache();
+}
+
+module_init(init_smb_fs)
+module_exit(exit_smb_fs)
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/smbfs/ioctl.c b/drivers/staging/smbfs/ioctl.c
new file mode 100644
index 000000000000..2da169267470
--- /dev/null
+++ b/drivers/staging/smbfs/ioctl.c
@@ -0,0 +1,68 @@
+/*
+ *  ioctl.c
+ *
+ *  Copyright (C) 1995, 1996 by Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/highuid.h>
+#include <linux/smp_lock.h>
+#include <linux/net.h>
+
+#include <asm/uaccess.h>
+
+#include "smb_fs.h"
+#include "smb_mount.h"
+#include "proto.h"
+
+long
+smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
+	struct smb_conn_opt opt;
+	int result = -EINVAL;
+
+	lock_kernel();
+	switch (cmd) {
+		uid16_t uid16;
+		uid_t uid32;
+	case SMB_IOC_GETMOUNTUID:
+		SET_UID(uid16, server->mnt->mounted_uid);
+		result = put_user(uid16, (uid16_t __user *) arg);
+		break;
+	case SMB_IOC_GETMOUNTUID32:
+		SET_UID(uid32, server->mnt->mounted_uid);
+		result = put_user(uid32, (uid_t __user *) arg);
+		break;
+
+	case SMB_IOC_NEWCONN:
+		/* arg is smb_conn_opt, or NULL if no connection was made */
+		if (!arg) {
+			result = 0;
+			smb_lock_server(server);
+			server->state = CONN_RETRIED;
+			printk(KERN_ERR "Connection attempt failed!  [%d]\n",
+			       server->conn_error);
+			smbiod_flush(server);
+			smb_unlock_server(server);
+			break;
+		}
+
+		result = -EFAULT;
+		if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
+			result = smb_newconn(server, &opt);
+		break;
+	default:
+		break;
+	}
+	unlock_kernel();
+
+	return result;
+}
diff --git a/drivers/staging/smbfs/proc.c b/drivers/staging/smbfs/proc.c
new file mode 100644
index 000000000000..2fb079c37f25
--- /dev/null
+++ b/drivers/staging/smbfs/proc.c
@@ -0,0 +1,3506 @@
+/*
+ *  proc.c
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/dcache.h>
+#include <linux/nls.h>
+#include <linux/smp_lock.h>
+#include <linux/net.h>
+#include <linux/vfs.h>
+#include <net/sock.h>
+
+#include <asm/string.h>
+#include <asm/div64.h>
+
+#include "smb_fs.h"
+#include "smbno.h"
+#include "smb_mount.h"
+#include "smb_debug.h"
+#include "proto.h"
+#include "request.h"
+
+
+/* Features. Undefine if they cause problems, this should perhaps be a
+   config option. */
+#define SMBFS_POSIX_UNLINK 1
+
+/* Allow smb_retry to be interrupted. */
+#define SMB_RETRY_INTR
+
+#define SMB_VWV(packet)  ((packet) + SMB_HEADER_LEN)
+#define SMB_CMD(packet)  (*(packet+8))
+#define SMB_WCT(packet)  (*(packet+SMB_HEADER_LEN - 1))
+
+#define SMB_DIRINFO_SIZE 43
+#define SMB_STATUS_SIZE  21
+
+#define SMB_ST_BLKSIZE	(PAGE_SIZE)
+#define SMB_ST_BLKSHIFT	(PAGE_SHIFT)
+
+static struct smb_ops smb_ops_core;
+static struct smb_ops smb_ops_os2;
+static struct smb_ops smb_ops_win95;
+static struct smb_ops smb_ops_winNT;
+static struct smb_ops smb_ops_unix;
+static struct smb_ops smb_ops_null;
+
+static void
+smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
+static void
+smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
+static int
+smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
+		      struct smb_fattr *fattr);
+static int
+smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
+		    struct smb_fattr *fattr);
+static int
+smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
+		      u16 attr);
+static int
+smb_proc_setattr_ext(struct smb_sb_info *server,
+		     struct inode *inode, struct smb_fattr *fattr);
+static int
+smb_proc_query_cifsunix(struct smb_sb_info *server);
+static void
+install_ops(struct smb_ops *dst, struct smb_ops *src);
+
+
+static void
+str_upper(char *name, int len)
+{
+	while (len--)
+	{
+		if (*name >= 'a' && *name <= 'z')
+			*name -= ('a' - 'A');
+		name++;
+	}
+}
+
+#if 0
+static void
+str_lower(char *name, int len)
+{
+	while (len--)
+	{
+		if (*name >= 'A' && *name <= 'Z')
+			*name += ('a' - 'A');
+		name++;
+	}
+}
+#endif
+
+/* reverse a string inline. This is used by the dircache walking routines */
+static void reverse_string(char *buf, int len)
+{
+	char c;
+	char *end = buf+len-1;
+
+	while(buf < end) {
+		c = *buf;
+		*(buf++) = *end;
+		*(end--) = c;
+	}
+}
+
+/* no conversion, just a wrapper for memcpy. */
+static int convert_memcpy(unsigned char *output, int olen,
+			  const unsigned char *input, int ilen,
+			  struct nls_table *nls_from,
+			  struct nls_table *nls_to)
+{
+	if (olen < ilen)
+		return -ENAMETOOLONG;
+	memcpy(output, input, ilen);
+	return ilen;
+}
+
+static inline int write_char(unsigned char ch, char *output, int olen)
+{
+	if (olen < 4)
+		return -ENAMETOOLONG;
+	sprintf(output, ":x%02x", ch);
+	return 4;
+}
+
+static inline int write_unichar(wchar_t ch, char *output, int olen)
+{
+	if (olen < 5)
+		return -ENAMETOOLONG;
+	sprintf(output, ":%04x", ch);
+	return 5;
+}
+
+/* convert from one "codepage" to another (possibly being utf8). */
+static int convert_cp(unsigned char *output, int olen,
+		      const unsigned char *input, int ilen,
+		      struct nls_table *nls_from,
+		      struct nls_table *nls_to)
+{
+	int len = 0;
+	int n;
+	wchar_t ch;
+
+	while (ilen > 0) {
+		/* convert by changing to unicode and back to the new cp */
+		n = nls_from->char2uni(input, ilen, &ch);
+		if (n == -EINVAL) {
+			ilen--;
+			n = write_char(*input++, output, olen);
+			if (n < 0)
+				goto fail;
+			output += n;
+			olen -= n;
+			len += n;
+			continue;
+		} else if (n < 0)
+			goto fail;
+		input += n;
+		ilen -= n;
+
+		n = nls_to->uni2char(ch, output, olen);
+		if (n == -EINVAL)
+			n = write_unichar(ch, output, olen);
+		if (n < 0)
+			goto fail;
+		output += n;
+		olen -= n;
+
+		len += n;
+	}
+	return len;
+fail:
+	return n;
+}
+
+/* ----------------------------------------------------------- */
+
+/*
+ * nls_unicode
+ *
+ * This encodes/decodes little endian unicode format
+ */
+
+static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
+{
+	if (boundlen < 2)
+		return -EINVAL;
+	*out++ = uni & 0xff;
+	*out++ = uni >> 8;
+	return 2;
+}
+
+static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
+{
+	if (boundlen < 2)
+		return -EINVAL;
+	*uni = (rawstring[1] << 8) | rawstring[0];
+	return 2;
+}
+
+static struct nls_table unicode_table = {
+	.charset	= "unicode",
+	.uni2char	= uni2char,
+	.char2uni	= char2uni,
+};
+
+/* ----------------------------------------------------------- */
+
+static int setcodepage(struct nls_table **p, char *name)
+{
+	struct nls_table *nls;
+
+	if (!name || !*name) {
+		nls = NULL;
+	} else if ( (nls = load_nls(name)) == NULL) {
+		printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
+		return -EINVAL;
+	}
+
+	/* if already set, unload the previous one. */
+	if (*p && *p != &unicode_table)
+		unload_nls(*p);
+	*p = nls;
+
+	return 0;
+}
+
+/* Handles all changes to codepage settings. */
+int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
+{
+	int n = 0;
+
+	smb_lock_server(server);
+
+	/* Don't load any nls_* at all, if no remote is requested */
+	if (!*cp->remote_name)
+		goto out;
+
+	/* local */
+	n = setcodepage(&server->local_nls, cp->local_name);
+	if (n != 0)
+		goto out;
+
+	/* remote */
+	if (!strcmp(cp->remote_name, "unicode")) {
+		server->remote_nls = &unicode_table;
+	} else {
+		n = setcodepage(&server->remote_nls, cp->remote_name);
+		if (n != 0)
+			setcodepage(&server->local_nls, NULL);
+	}
+
+out:
+	if (server->local_nls != NULL && server->remote_nls != NULL)
+		server->ops->convert = convert_cp;
+	else
+		server->ops->convert = convert_memcpy;
+
+	smb_unlock_server(server);
+	return n;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Encoding/Decoding section                                                */
+/*                                                                           */
+/*****************************************************************************/
+
+static __u8 *
+smb_encode_smb_length(__u8 * p, __u32 len)
+{
+	*p = 0;
+	*(p+1) = 0;
+	*(p+2) = (len & 0xFF00) >> 8;
+	*(p+3) = (len & 0xFF);
+	if (len > 0xFFFF)
+	{
+		*(p+1) = 1;
+	}
+	return p + 4;
+}
+
+/*
+ * smb_build_path: build the path to entry and name storing it in buf.
+ * The path returned will have the trailing '\0'.
+ */
+static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
+			  int maxlen,
+			  struct dentry *entry, struct qstr *name)
+{
+	unsigned char *path = buf;
+	int len;
+	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
+
+	if (maxlen < (2<<unicode))
+		return -ENAMETOOLONG;
+
+	if (maxlen > SMB_MAXPATHLEN + 1)
+		maxlen = SMB_MAXPATHLEN + 1;
+
+	if (entry == NULL)
+		goto test_name_and_out;
+
+	/*
+	 * If IS_ROOT, we have to do no walking at all.
+	 */
+	if (IS_ROOT(entry) && !name) {
+		*path++ = '\\';
+		if (unicode) *path++ = '\0';
+		*path++ = '\0';
+		if (unicode) *path++ = '\0';
+		return path-buf;
+	}
+
+	/*
+	 * Build the path string walking the tree backward from end to ROOT
+	 * and store it in reversed order [see reverse_string()]
+	 */
+	dget(entry);
+	spin_lock(&entry->d_lock);
+	while (!IS_ROOT(entry)) {
+		struct dentry *parent;
+
+		if (maxlen < (3<<unicode)) {
+			spin_unlock(&entry->d_lock);
+			dput(entry);
+			return -ENAMETOOLONG;
+		}
+
+		len = server->ops->convert(path, maxlen-2, 
+				      entry->d_name.name, entry->d_name.len,
+				      server->local_nls, server->remote_nls);
+		if (len < 0) {
+			spin_unlock(&entry->d_lock);
+			dput(entry);
+			return len;
+		}
+		reverse_string(path, len);
+		path += len;
+		if (unicode) {
+			/* Note: reverse order */
+			*path++ = '\0';
+			maxlen--;
+		}
+		*path++ = '\\';
+		maxlen -= len+1;
+
+		parent = entry->d_parent;
+		dget(parent);
+		spin_unlock(&entry->d_lock);
+		dput(entry);
+		entry = parent;
+		spin_lock(&entry->d_lock);
+	}
+	spin_unlock(&entry->d_lock);
+	dput(entry);
+	reverse_string(buf, path-buf);
+
+	/* maxlen has space for at least one char */
+test_name_and_out:
+	if (name) {
+		if (maxlen < (3<<unicode))
+			return -ENAMETOOLONG;
+		*path++ = '\\';
+		if (unicode) {
+			*path++ = '\0';
+			maxlen--;
+		}
+		len = server->ops->convert(path, maxlen-2, 
+				      name->name, name->len,
+				      server->local_nls, server->remote_nls);
+		if (len < 0)
+			return len;
+		path += len;
+		maxlen -= len+1;
+	}
+	/* maxlen has space for at least one char */
+	*path++ = '\0';
+	if (unicode) *path++ = '\0';
+	return path-buf;
+}
+
+static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
+			   struct dentry *dir, struct qstr *name)
+{
+	int result;
+
+	result = smb_build_path(server, buf, maxlen, dir, name);
+	if (result < 0)
+		goto out;
+	if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
+		str_upper(buf, result);
+out:
+	return result;
+}
+
+/* encode_path for non-trans2 request SMBs */
+static int smb_simple_encode_path(struct smb_request *req, char **p,
+				  struct dentry * entry, struct qstr * name)
+{
+	struct smb_sb_info *server = req->rq_server;
+	char *s = *p;
+	int res;
+	int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
+	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
+
+	if (!maxlen)
+		return -ENAMETOOLONG;
+	*s++ = 4;	/* ASCII data format */
+
+	/*
+	 * SMB Unicode strings must be 16bit aligned relative the start of the
+	 * packet. If they are not they must be padded with 0.
+	 */
+	if (unicode) {
+		int align = s - (char *)req->rq_buffer;
+		if (!(align & 1)) {
+			*s++ = '\0';
+			maxlen--;
+		}
+	}
+
+	res = smb_encode_path(server, s, maxlen-1, entry, name);
+	if (res < 0)
+		return res;
+	*p = s + res;
+	return 0;
+}
+
+/* The following are taken directly from msdos-fs */
+
+/* Linear day numbers of the respective 1sts in non-leap years. */
+
+static int day_n[] =
+{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
+		  /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
+
+
+static time_t
+utc2local(struct smb_sb_info *server, time_t time)
+{
+	return time - server->opt.serverzone*60;
+}
+
+static time_t
+local2utc(struct smb_sb_info *server, time_t time)
+{
+	return time + server->opt.serverzone*60;
+}
+
+/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
+
+static time_t
+date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
+{
+	int month, year;
+	time_t secs;
+
+	/* first subtract and mask after that... Otherwise, if
+	   date == 0, bad things happen */
+	month = ((date >> 5) - 1) & 15;
+	year = date >> 9;
+	secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
+	    ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
+						   month < 2 ? 1 : 0) + 3653);
+	/* days since 1.1.70 plus 80's leap day */
+	return local2utc(server, secs);
+}
+
+
+/* Convert linear UNIX date to a MS-DOS time/date pair. */
+
+static void
+date_unix2dos(struct smb_sb_info *server,
+	      int unix_date, __u16 *date, __u16 *time)
+{
+	int day, year, nl_day, month;
+
+	unix_date = utc2local(server, unix_date);
+	if (unix_date < 315532800)
+		unix_date = 315532800;
+
+	*time = (unix_date % 60) / 2 +
+		(((unix_date / 60) % 60) << 5) +
+		(((unix_date / 3600) % 24) << 11);
+
+	day = unix_date / 86400 - 3652;
+	year = day / 365;
+	if ((year + 3) / 4 + 365 * year > day)
+		year--;
+	day -= (year + 3) / 4 + 365 * year;
+	if (day == 59 && !(year & 3)) {
+		nl_day = day;
+		month = 2;
+	} else {
+		nl_day = (year & 3) || day <= 59 ? day : day - 1;
+		for (month = 1; month < 12; month++)
+			if (day_n[month] > nl_day)
+				break;
+	}
+	*date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
+}
+
+/* The following are taken from fs/ntfs/util.c */
+
+#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
+
+/*
+ * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
+ * into Unix UTC (based 1970-01-01, in seconds).
+ */
+static struct timespec
+smb_ntutc2unixutc(u64 ntutc)
+{
+	struct timespec ts;
+	/* FIXME: what about the timezone difference? */
+	/* Subtract the NTFS time offset, then convert to 1s intervals. */
+	u64 t = ntutc - NTFS_TIME_OFFSET;
+	ts.tv_nsec = do_div(t, 10000000) * 100;
+	ts.tv_sec = t; 
+	return ts;
+}
+
+/* Convert the Unix UTC into NT time */
+static u64
+smb_unixutc2ntutc(struct timespec ts)
+{
+	/* Note: timezone conversion is probably wrong. */
+	/* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
+	return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
+}
+
+#define MAX_FILE_MODE	6
+static mode_t file_mode[] = {
+	S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
+};
+
+static int smb_filetype_to_mode(u32 filetype)
+{
+	if (filetype > MAX_FILE_MODE) {
+		PARANOIA("Filetype out of range: %d\n", filetype);
+		return S_IFREG;
+	}
+	return file_mode[filetype];
+}
+
+static u32 smb_filetype_from_mode(int mode)
+{
+	if (S_ISREG(mode))
+		return UNIX_TYPE_FILE;
+	if (S_ISDIR(mode))
+		return UNIX_TYPE_DIR;
+	if (S_ISLNK(mode))
+		return UNIX_TYPE_SYMLINK;
+	if (S_ISCHR(mode))
+		return UNIX_TYPE_CHARDEV;
+	if (S_ISBLK(mode))
+		return UNIX_TYPE_BLKDEV;
+	if (S_ISFIFO(mode))
+		return UNIX_TYPE_FIFO;
+	if (S_ISSOCK(mode))
+		return UNIX_TYPE_SOCKET;
+	return UNIX_TYPE_UNKNOWN;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Support section.                                                         */
+/*                                                                           */
+/*****************************************************************************/
+
+__u32
+smb_len(__u8 * p)
+{
+	return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
+}
+
+static __u16
+smb_bcc(__u8 * packet)
+{
+	int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
+	return WVAL(packet, pos);
+}
+
+/* smb_valid_packet: We check if packet fulfills the basic
+   requirements of a smb packet */
+
+static int
+smb_valid_packet(__u8 * packet)
+{
+	return (packet[4] == 0xff
+		&& packet[5] == 'S'
+		&& packet[6] == 'M'
+		&& packet[7] == 'B'
+		&& (smb_len(packet) + 4 == SMB_HEADER_LEN
+		    + SMB_WCT(packet) * 2 + smb_bcc(packet)));
+}
+
+/* smb_verify: We check if we got the answer we expected, and if we
+   got enough data. If bcc == -1, we don't care. */
+
+static int
+smb_verify(__u8 * packet, int command, int wct, int bcc)
+{
+	if (SMB_CMD(packet) != command)
+		goto bad_command;
+	if (SMB_WCT(packet) < wct)
+		goto bad_wct;
+	if (bcc != -1 && smb_bcc(packet) < bcc)
+		goto bad_bcc;
+	return 0;
+
+bad_command:
+	printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
+	       command, SMB_CMD(packet));
+	goto fail;
+bad_wct:
+	printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
+	       command, wct, SMB_WCT(packet));
+	goto fail;
+bad_bcc:
+	printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
+	       command, bcc, smb_bcc(packet));
+fail:
+	return -EIO;
+}
+
+/*
+ * Returns the maximum read or write size for the "payload". Making all of the
+ * packet fit within the negotiated max_xmit size.
+ *
+ * N.B. Since this value is usually computed before locking the server,
+ * the server's packet size must never be decreased!
+ */
+static inline int
+smb_get_xmitsize(struct smb_sb_info *server, int overhead)
+{
+	return server->opt.max_xmit - overhead;
+}
+
+/*
+ * Calculate the maximum read size
+ */
+int
+smb_get_rsize(struct smb_sb_info *server)
+{
+	/* readX has 12 parameters, read has 5 */
+	int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
+	int size = smb_get_xmitsize(server, overhead);
+
+	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
+
+	return size;
+}
+
+/*
+ * Calculate the maximum write size
+ */
+int
+smb_get_wsize(struct smb_sb_info *server)
+{
+	/* writeX has 14 parameters, write has 5 */
+	int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
+	int size = smb_get_xmitsize(server, overhead);
+
+	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
+
+	return size;
+}
+
+/*
+ * Convert SMB error codes to -E... errno values.
+ */
+int
+smb_errno(struct smb_request *req)
+{
+	int errcls = req->rq_rcls;
+	int error  = req->rq_err;
+	char *class = "Unknown";
+
+	VERBOSE("errcls %d  code %d  from command 0x%x\n",
+		errcls, error, SMB_CMD(req->rq_header));
+
+	if (errcls == ERRDOS) {
+		switch (error) {
+		case ERRbadfunc:
+			return -EINVAL;
+		case ERRbadfile:
+		case ERRbadpath:
+			return -ENOENT;
+		case ERRnofids:
+			return -EMFILE;
+		case ERRnoaccess:
+			return -EACCES;
+		case ERRbadfid:
+			return -EBADF;
+		case ERRbadmcb:
+			return -EREMOTEIO;
+		case ERRnomem:
+			return -ENOMEM;
+		case ERRbadmem:
+			return -EFAULT;
+		case ERRbadenv:
+		case ERRbadformat:
+			return -EREMOTEIO;
+		case ERRbadaccess:
+			return -EACCES;
+		case ERRbaddata:
+			return -E2BIG;
+		case ERRbaddrive:
+			return -ENXIO;
+		case ERRremcd:
+			return -EREMOTEIO;
+		case ERRdiffdevice:
+			return -EXDEV;
+		case ERRnofiles:
+			return -ENOENT;
+		case ERRbadshare:
+			return -ETXTBSY;
+		case ERRlock:
+			return -EDEADLK;
+		case ERRfilexists:
+			return -EEXIST;
+		case ERROR_INVALID_PARAMETER:
+			return -EINVAL;
+		case ERROR_DISK_FULL:
+			return -ENOSPC;
+		case ERROR_INVALID_NAME:
+			return -ENOENT;
+		case ERROR_DIR_NOT_EMPTY:
+			return -ENOTEMPTY;
+		case ERROR_NOT_LOCKED:
+                       return -ENOLCK;
+		case ERROR_ALREADY_EXISTS:
+			return -EEXIST;
+		default:
+			class = "ERRDOS";
+			goto err_unknown;
+		}
+	} else if (errcls == ERRSRV) {
+		switch (error) {
+		/* N.B. This is wrong ... EIO ? */
+		case ERRerror:
+			return -ENFILE;
+		case ERRbadpw:
+			return -EINVAL;
+		case ERRbadtype:
+		case ERRtimeout:
+			return -EIO;
+		case ERRaccess:
+			return -EACCES;
+		/*
+		 * This is a fatal error, as it means the "tree ID"
+		 * for this connection is no longer valid. We map
+		 * to a special error code and get a new connection.
+		 */
+		case ERRinvnid:
+			return -EBADSLT;
+		default:
+			class = "ERRSRV";
+			goto err_unknown;
+		}
+	} else if (errcls == ERRHRD) {
+		switch (error) {
+		case ERRnowrite:
+			return -EROFS;
+		case ERRbadunit:
+			return -ENODEV;
+		case ERRnotready:
+			return -EUCLEAN;
+		case ERRbadcmd:
+		case ERRdata:
+			return -EIO;
+		case ERRbadreq:
+			return -ERANGE;
+		case ERRbadshare:
+			return -ETXTBSY;
+		case ERRlock:
+			return -EDEADLK;
+		case ERRdiskfull:
+			return -ENOSPC;
+		default:
+			class = "ERRHRD";
+			goto err_unknown;
+		}
+	} else if (errcls == ERRCMD) {
+		class = "ERRCMD";
+	} else if (errcls == SUCCESS) {
+		return 0;	/* This is the only valid 0 return */
+	}
+
+err_unknown:
+	printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
+	       class, error, SMB_CMD(req->rq_header));
+	return -EIO;
+}
+
+/* smb_request_ok: We expect the server to be locked. Then we do the
+   request and check the answer completely. When smb_request_ok
+   returns 0, you can be quite sure that everything went well. When
+   the answer is <=0, the returned number is a valid unix errno. */
+
+static int
+smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
+{
+	int result;
+
+	req->rq_resp_wct = wct;
+	req->rq_resp_bcc = bcc;
+
+	result = smb_add_request(req);
+	if (result != 0) {
+		DEBUG1("smb_request failed\n");
+		goto out;
+	}
+
+	if (smb_valid_packet(req->rq_header) != 0) {
+		PARANOIA("invalid packet!\n");
+		goto out;
+	}
+
+	result = smb_verify(req->rq_header, command, wct, bcc);
+
+out:
+	return result;
+}
+
+/*
+ * This implements the NEWCONN ioctl. It installs the server pid,
+ * sets server->state to CONN_VALID, and wakes up the waiting process.
+ */
+int
+smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
+{
+	struct file *filp;
+	struct sock *sk;
+	int error;
+
+	VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
+
+	smb_lock_server(server);
+
+	/*
+	 * Make sure we don't already have a valid connection ...
+	 */
+	error = -EINVAL;
+	if (server->state == CONN_VALID)
+		goto out;
+
+	error = -EACCES;
+	if (current_uid() != server->mnt->mounted_uid &&
+	    !capable(CAP_SYS_ADMIN))
+		goto out;
+
+	error = -EBADF;
+	filp = fget(opt->fd);
+	if (!filp)
+		goto out;
+	if (!smb_valid_socket(filp->f_path.dentry->d_inode))
+		goto out_putf;
+
+	server->sock_file = filp;
+	server->conn_pid = get_pid(task_pid(current));
+	server->opt = *opt;
+	server->generation += 1;
+	server->state = CONN_VALID;
+	error = 0;
+
+	if (server->conn_error) {
+		/*
+		 * conn_error is the returncode we originally decided to
+		 * drop the old connection on. This message should be positive
+		 * and not make people ask questions on why smbfs is printing
+		 * error messages ...
+		 */
+		printk(KERN_INFO "SMB connection re-established (%d)\n",
+		       server->conn_error);
+		server->conn_error = 0;
+	}
+
+	/*
+	 * Store the server in sock user_data (Only used by sunrpc)
+	 */
+	sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
+	sk->sk_user_data = server;
+
+	/* chain into the data_ready callback */
+	server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
+
+	/* check if we have an old smbmount that uses seconds for the 
+	   serverzone */
+	if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
+		server->opt.serverzone /= 60;
+
+	/* now that we have an established connection we can detect the server
+	   type and enable bug workarounds */
+	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
+		install_ops(server->ops, &smb_ops_core);
+	else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
+		install_ops(server->ops, &smb_ops_os2);
+	else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
+		 (server->opt.max_xmit < 0x1000) &&
+		 !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
+		/* FIXME: can we kill the WIN95 flag now? */
+		server->mnt->flags |= SMB_MOUNT_WIN95;
+		VERBOSE("detected WIN95 server\n");
+		install_ops(server->ops, &smb_ops_win95);
+	} else {
+		/*
+		 * Samba has max_xmit 65535
+		 * NT4spX has max_xmit 4536 (or something like that)
+		 * win2k has ...
+		 */
+		VERBOSE("detected NT1 (Samba, NT4/5) server\n");
+		install_ops(server->ops, &smb_ops_winNT);
+	}
+
+	/* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
+	if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
+		server->ops->getattr = smb_proc_getattr_core;
+	} else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
+		server->ops->getattr = smb_proc_getattr_ff;
+	}
+
+	/* Decode server capabilities */
+	if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
+		/* Should be ok to set this now, as no one can access the
+		   mount until the connection has been established. */
+		SB_of(server)->s_maxbytes = ~0ULL >> 1;
+		VERBOSE("LFS enabled\n");
+	}
+	if (server->opt.capabilities & SMB_CAP_UNICODE) {
+		server->mnt->flags |= SMB_MOUNT_UNICODE;
+		VERBOSE("Unicode enabled\n");
+	} else {
+		server->mnt->flags &= ~SMB_MOUNT_UNICODE;
+	}
+#if 0
+	/* flags we may test for other patches ... */
+	if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
+		VERBOSE("Large reads enabled\n");
+	}
+	if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
+		VERBOSE("Large writes enabled\n");
+	}
+#endif
+	if (server->opt.capabilities & SMB_CAP_UNIX) {
+		struct inode *inode;
+		VERBOSE("Using UNIX CIFS extensions\n");
+		install_ops(server->ops, &smb_ops_unix);
+		inode = SB_of(server)->s_root->d_inode;
+		if (inode)
+			inode->i_op = &smb_dir_inode_operations_unix;
+	}
+
+	VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
+		server->opt.protocol, server->opt.max_xmit,
+		pid_nr(server->conn_pid), server->opt.capabilities);
+
+	/* FIXME: this really should be done by smbmount. */
+	if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
+		server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
+	}
+
+	smb_unlock_server(server);
+	smbiod_wake_up();
+	if (server->opt.capabilities & SMB_CAP_UNIX)
+		smb_proc_query_cifsunix(server);
+
+	server->conn_complete++;
+	wake_up_interruptible_all(&server->conn_wq);
+	return error;
+
+out:
+	smb_unlock_server(server);
+	smbiod_wake_up();
+	return error;
+
+out_putf:
+	fput(filp);
+	goto out;
+}
+
+/* smb_setup_header: We completely set up the packet. You only have to
+   insert the command-specific fields */
+
+__u8 *
+smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
+{
+	__u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
+	__u8 *p = req->rq_header;
+	struct smb_sb_info *server = req->rq_server;
+
+	p = smb_encode_smb_length(p, xmit_len - 4);
+
+	*p++ = 0xff;
+	*p++ = 'S';
+	*p++ = 'M';
+	*p++ = 'B';
+	*p++ = command;
+
+	memset(p, '\0', 19);
+	p += 19;
+	p += 8;
+
+	if (server->opt.protocol > SMB_PROTOCOL_CORE) {
+		int flags = SMB_FLAGS_CASELESS_PATHNAMES;
+		int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
+			SMB_FLAGS2_EXTENDED_ATTRIBUTES;	/* EA? not really ... */
+
+		*(req->rq_header + smb_flg) = flags;
+		if (server->mnt->flags & SMB_MOUNT_UNICODE)
+			flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
+		WSET(req->rq_header, smb_flg2, flags2);
+	}
+	*p++ = wct;		/* wct */
+	p += 2 * wct;
+	WSET(p, 0, bcc);
+
+	/* Include the header in the data to send */
+	req->rq_iovlen = 1;
+	req->rq_iov[0].iov_base = req->rq_header;
+	req->rq_iov[0].iov_len  = xmit_len - bcc;
+
+	return req->rq_buffer;
+}
+
+static void
+smb_setup_bcc(struct smb_request *req, __u8 *p)
+{
+	u16 bcc = p - req->rq_buffer;
+	u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
+
+	WSET(pbcc, 0, bcc);
+
+	smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN + 
+			      2*SMB_WCT(req->rq_header) - 2 + bcc);
+
+	/* Include the "bytes" in the data to send */
+	req->rq_iovlen = 2;
+	req->rq_iov[1].iov_base = req->rq_buffer;
+	req->rq_iov[1].iov_len  = bcc;
+}
+
+static int
+smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
+	      __u16 mode, off_t offset)
+{
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBlseek, 4, 0);
+	WSET(req->rq_header, smb_vwv0, fileid);
+	WSET(req->rq_header, smb_vwv1, mode);
+	DSET(req->rq_header, smb_vwv2, offset);
+	req->rq_flags |= SMB_REQ_NORETRY;
+
+	result = smb_request_ok(req, SMBlseek, 2, 0);
+	if (result < 0) {
+		result = 0;
+		goto out_free;
+	}
+
+	result = DVAL(req->rq_header, smb_vwv0);
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
+{
+	struct inode *ino = dentry->d_inode;
+	struct smb_inode_info *ei = SMB_I(ino);
+	int mode, read_write = 0x42, read_only = 0x40;
+	int res;
+	char *p;
+	struct smb_request *req;
+
+	/*
+	 * Attempt to open r/w, unless there are no write privileges.
+	 */
+	mode = read_write;
+	if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
+		mode = read_only;
+#if 0
+	/* FIXME: why is this code not in? below we fix it so that a caller
+	   wanting RO doesn't get RW. smb_revalidate_inode does some 
+	   optimization based on access mode. tail -f needs it to be correct.
+
+	   We must open rw since we don't do the open if called a second time
+	   with different 'wish'. Is that not supported by smb servers? */
+	if (!(wish & (O_WRONLY | O_RDWR)))
+		mode = read_only;
+#endif
+
+	res = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+      retry:
+	p = smb_setup_header(req, SMBopen, 2, 0);
+	WSET(req->rq_header, smb_vwv0, mode);
+	WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
+	res = smb_simple_encode_path(req, &p, dentry, NULL);
+	if (res < 0)
+		goto out_free;
+	smb_setup_bcc(req, p);
+
+	res = smb_request_ok(req, SMBopen, 7, 0);
+	if (res != 0) {
+		if (mode == read_write &&
+		    (res == -EACCES || res == -ETXTBSY || res == -EROFS))
+		{
+			VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
+				DENTRY_PATH(dentry), res);
+			mode = read_only;
+			req->rq_flags = 0;
+			goto retry;
+		}
+		goto out_free;
+	}
+	/* We should now have data in vwv[0..6]. */
+
+	ei->fileid = WVAL(req->rq_header, smb_vwv0);
+	ei->attr   = WVAL(req->rq_header, smb_vwv1);
+	/* smb_vwv2 has mtime */
+	/* smb_vwv4 has size  */
+	ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
+	ei->open = server->generation;
+
+out_free:
+	smb_rput(req);
+out:
+	return res;
+}
+
+/*
+ * Make sure the file is open, and check that the access
+ * is compatible with the desired access.
+ */
+int
+smb_open(struct dentry *dentry, int wish)
+{
+	struct inode *inode = dentry->d_inode;
+	int result;
+	__u16 access;
+
+	result = -ENOENT;
+	if (!inode) {
+		printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
+		       DENTRY_PATH(dentry));
+		goto out;
+	}
+
+	if (!smb_is_open(inode)) {
+		struct smb_sb_info *server = server_from_inode(inode);
+		result = 0;
+		if (!smb_is_open(inode))
+			result = smb_proc_open(server, dentry, wish);
+		if (result)
+			goto out;
+		/*
+		 * A successful open means the path is still valid ...
+		 */
+		smb_renew_times(dentry);
+	}
+
+	/*
+	 * Check whether the access is compatible with the desired mode.
+	 */
+	result = 0;
+	access = SMB_I(inode)->access;
+	if (access != wish && access != SMB_O_RDWR) {
+		PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
+			 DENTRY_PATH(dentry), access, wish);
+		result = -EACCES;
+	}
+out:
+	return result;
+}
+
+static int 
+smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
+{
+	struct smb_request *req;
+	int result = -ENOMEM;
+
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBclose, 3, 0);
+	WSET(req->rq_header, smb_vwv0, fileid);
+	DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
+	req->rq_flags |= SMB_REQ_NORETRY;
+	result = smb_request_ok(req, SMBclose, 0, 0);
+
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Win NT 4.0 has an apparent bug in that it fails to update the
+ * modify time when writing to a file. As a workaround, we update
+ * both modify and access time locally, and post the times to the
+ * server when closing the file.
+ */
+static int 
+smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
+{
+	struct smb_inode_info *ei = SMB_I(ino);
+	int result = 0;
+	if (smb_is_open(ino))
+	{
+		/*
+		 * We clear the open flag in advance, in case another
+ 		 * process observes the value while we block below.
+		 */
+		ei->open = 0;
+
+		/*
+		 * Kludge alert: SMB timestamps are accurate only to
+		 * two seconds ... round the times to avoid needless
+		 * cache invalidations!
+		 */
+		if (ino->i_mtime.tv_sec & 1) { 
+			ino->i_mtime.tv_sec--;
+			ino->i_mtime.tv_nsec = 0; 
+		}
+		if (ino->i_atime.tv_sec & 1) {
+			ino->i_atime.tv_sec--;
+			ino->i_atime.tv_nsec = 0;
+		}
+		/*
+		 * If the file is open with write permissions,
+		 * update the time stamps to sync mtime and atime.
+		 */
+		if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
+		    (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
+		    !(ei->access == SMB_O_RDONLY))
+		{
+			struct smb_fattr fattr;
+			smb_get_inode_attr(ino, &fattr);
+			smb_proc_setattr_ext(server, ino, &fattr);
+		}
+
+		result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
+		/*
+		 * Force a revalidation after closing ... some servers
+		 * don't post the size until the file has been closed.
+		 */
+		if (server->opt.protocol < SMB_PROTOCOL_NT1)
+			ei->oldmtime = 0;
+		ei->closed = jiffies;
+	}
+	return result;
+}
+
+int
+smb_close(struct inode *ino)
+{
+	int result = 0;
+
+	if (smb_is_open(ino)) {
+		struct smb_sb_info *server = server_from_inode(ino);
+		result = smb_proc_close_inode(server, ino);
+	}
+	return result;
+}
+
+/*
+ * This is used to close a file following a failed instantiate.
+ * Since we don't have an inode, we can't use any of the above.
+ */
+int
+smb_close_fileid(struct dentry *dentry, __u16 fileid)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	int result;
+
+	result = smb_proc_close(server, fileid, get_seconds());
+	return result;
+}
+
+/* In smb_proc_read and smb_proc_write we do not retry, because the
+   file-id would not be valid after a reconnection. */
+
+static void
+smb_proc_read_data(struct smb_request *req)
+{
+	req->rq_iov[0].iov_base = req->rq_buffer;
+	req->rq_iov[0].iov_len  = 3;
+
+	req->rq_iov[1].iov_base = req->rq_page;
+	req->rq_iov[1].iov_len  = req->rq_rsize;
+	req->rq_iovlen = 2;
+
+	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
+}
+
+static int
+smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	__u16 returned_count, data_len;
+	unsigned char *buf;
+	int result;
+	struct smb_request *req;
+	u8 rbuf[4];
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBread, 5, 0);
+	buf = req->rq_header;
+	WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
+	WSET(buf, smb_vwv1, count);
+	DSET(buf, smb_vwv2, offset);
+	WSET(buf, smb_vwv4, 0);
+
+	req->rq_page = data;
+	req->rq_rsize = count;
+	req->rq_callback = smb_proc_read_data;
+	req->rq_buffer = rbuf;
+	req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
+
+	result = smb_request_ok(req, SMBread, 5, -1);
+	if (result < 0)
+		goto out_free;
+	returned_count = WVAL(req->rq_header, smb_vwv0);
+
+	data_len = WVAL(rbuf, 1);
+
+	if (returned_count != data_len) {
+		printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
+		printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
+		       returned_count, data_len);
+	}
+	result = data_len;
+
+out_free:
+	smb_rput(req);
+out:
+	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
+		inode->i_ino, SMB_I(inode)->fileid, count, result);
+	return result;
+}
+
+static int
+smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	int result;
+	u16 fileid = SMB_I(inode)->fileid;
+	u8 buf[4];
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
+		inode->i_ino, fileid, count, offset);
+
+	smb_setup_header(req, SMBwrite, 5, count + 3);
+	WSET(req->rq_header, smb_vwv0, fileid);
+	WSET(req->rq_header, smb_vwv1, count);
+	DSET(req->rq_header, smb_vwv2, offset);
+	WSET(req->rq_header, smb_vwv4, 0);
+
+	buf[0] = 1;
+	WSET(buf, 1, count);	/* yes, again ... */
+	req->rq_iov[1].iov_base = buf;
+	req->rq_iov[1].iov_len = 3;
+	req->rq_iov[2].iov_base = (char *) data;
+	req->rq_iov[2].iov_len = count;
+	req->rq_iovlen = 3;
+	req->rq_flags |= SMB_REQ_NORETRY;
+
+	result = smb_request_ok(req, SMBwrite, 1, 0);
+	if (result >= 0)
+		result = WVAL(req->rq_header, smb_vwv0);
+
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * In smb_proc_readX and smb_proc_writeX we do not retry, because the
+ * file-id would not be valid after a reconnection.
+ */
+
+#define SMB_READX_MAX_PAD      64
+static void
+smb_proc_readX_data(struct smb_request *req)
+{
+	/* header length, excluding the netbios length (-4) */
+	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
+	int data_off = WVAL(req->rq_header, smb_vwv6);
+
+	/*
+	 * Some genius made the padding to the data bytes arbitrary.
+	 * So we must first calculate the amount of padding used by the server.
+	 */
+	data_off -= hdrlen;
+	if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
+		PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
+		PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
+		req->rq_rlen = req->rq_bufsize + 1;
+		return;
+	}
+	req->rq_iov[0].iov_base = req->rq_buffer;
+	req->rq_iov[0].iov_len  = data_off;
+
+	req->rq_iov[1].iov_base = req->rq_page;
+	req->rq_iov[1].iov_len  = req->rq_rsize;
+	req->rq_iovlen = 2;
+
+	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
+}
+
+static int
+smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	unsigned char *buf;
+	int result;
+	struct smb_request *req;
+	static char pad[SMB_READX_MAX_PAD];
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBreadX, 12, 0);
+	buf = req->rq_header;
+	WSET(buf, smb_vwv0, 0x00ff);
+	WSET(buf, smb_vwv1, 0);
+	WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
+	DSET(buf, smb_vwv3, (u32)offset);               /* low 32 bits */
+	WSET(buf, smb_vwv5, count);
+	WSET(buf, smb_vwv6, 0);
+	DSET(buf, smb_vwv7, 0);
+	WSET(buf, smb_vwv9, 0);
+	DSET(buf, smb_vwv10, (u32)(offset >> 32));      /* high 32 bits */
+	WSET(buf, smb_vwv11, 0);
+
+	req->rq_page = data;
+	req->rq_rsize = count;
+	req->rq_callback = smb_proc_readX_data;
+	req->rq_buffer = pad;
+	req->rq_bufsize = SMB_READX_MAX_PAD;
+	req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
+
+	result = smb_request_ok(req, SMBreadX, 12, -1);
+	if (result < 0)
+		goto out_free;
+	result = WVAL(req->rq_header, smb_vwv5);
+
+out_free:
+	smb_rput(req);
+out:
+	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
+		inode->i_ino, SMB_I(inode)->fileid, count, result);
+	return result;
+}
+
+static int
+smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	int result;
+	u8 *p;
+	static u8 pad[4];
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
+		inode->i_ino, SMB_I(inode)->fileid, count, offset);
+
+	p = smb_setup_header(req, SMBwriteX, 14, count + 1);
+	WSET(req->rq_header, smb_vwv0, 0x00ff);
+	WSET(req->rq_header, smb_vwv1, 0);
+	WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
+	DSET(req->rq_header, smb_vwv3, (u32)offset);	/* low 32 bits */
+	DSET(req->rq_header, smb_vwv5, 0);
+	WSET(req->rq_header, smb_vwv7, 0);		/* write mode */
+	WSET(req->rq_header, smb_vwv8, 0);
+	WSET(req->rq_header, smb_vwv9, 0);
+	WSET(req->rq_header, smb_vwv10, count);		/* data length */
+	WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
+	DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
+
+	req->rq_iov[1].iov_base = pad;
+	req->rq_iov[1].iov_len = 1;
+	req->rq_iov[2].iov_base = (char *) data;
+	req->rq_iov[2].iov_len = count;
+	req->rq_iovlen = 3;
+	req->rq_flags |= SMB_REQ_NORETRY;
+
+	result = smb_request_ok(req, SMBwriteX, 6, 0);
+ 	if (result >= 0)
+		result = WVAL(req->rq_header, smb_vwv2);
+
+	smb_rput(req);
+out:
+	return result;
+}
+
+int
+smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	char *p;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	p = smb_setup_header(req, SMBcreate, 3, 0);
+	WSET(req->rq_header, smb_vwv0, attr);
+	DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
+	result = smb_simple_encode_path(req, &p, dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	smb_setup_bcc(req, p);
+
+	result = smb_request_ok(req, SMBcreate, 1, 0);
+	if (result < 0)
+		goto out_free;
+
+	*fileid = WVAL(req->rq_header, smb_vwv0);
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+int
+smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
+{
+	struct smb_sb_info *server = server_from_dentry(old_dentry);
+	char *p;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	p = smb_setup_header(req, SMBmv, 1, 0);
+	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
+	result = smb_simple_encode_path(req, &p, old_dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	result = smb_simple_encode_path(req, &p, new_dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	smb_setup_bcc(req, p);
+
+	if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
+		goto out_free;
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Code common to mkdir and rmdir.
+ */
+static int
+smb_proc_generic_command(struct dentry *dentry, __u8 command)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	char *p;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	p = smb_setup_header(req, command, 0, 0);
+	result = smb_simple_encode_path(req, &p, dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	smb_setup_bcc(req, p);
+
+	result = smb_request_ok(req, command, 0, 0);
+	if (result < 0)
+		goto out_free;
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+int
+smb_proc_mkdir(struct dentry *dentry)
+{
+	return smb_proc_generic_command(dentry, SMBmkdir);
+}
+
+int
+smb_proc_rmdir(struct dentry *dentry)
+{
+	return smb_proc_generic_command(dentry, SMBrmdir);
+}
+
+#if SMBFS_POSIX_UNLINK
+/*
+ * Removes readonly attribute from a file. Used by unlink to give posix
+ * semantics.
+ */
+static int
+smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
+{
+	int result;
+	struct smb_fattr fattr;
+
+	/* FIXME: cifsUE should allow removing a readonly file. */
+
+	/* first get current attribute */
+	smb_init_dirent(server, &fattr);
+	result = server->ops->getattr(server, dentry, &fattr);
+	smb_finish_dirent(server, &fattr);
+	if (result < 0)
+		return result;
+
+	/* if RONLY attribute is set, remove it */
+	if (fattr.attr & aRONLY) {  /* read only attribute is set */
+		fattr.attr &= ~aRONLY;
+		result = smb_proc_setattr_core(server, dentry, fattr.attr);
+	}
+	return result;
+}
+#endif
+
+int
+smb_proc_unlink(struct dentry *dentry)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	int flag = 0;
+	char *p;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+      retry:
+	p = smb_setup_header(req, SMBunlink, 1, 0);
+	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
+	result = smb_simple_encode_path(req, &p, dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	smb_setup_bcc(req, p);
+
+	if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
+#if SMBFS_POSIX_UNLINK
+		if (result == -EACCES && !flag) {
+			/* Posix semantics is for the read-only state
+			   of a file to be ignored in unlink(). In the
+			   SMB world a unlink() is refused on a
+			   read-only file. To make things easier for
+			   unix users we try to override the files
+			   permission if the unlink fails with the
+			   right error.
+			   This introduces a race condition that could
+			   lead to a file being written by someone who
+			   shouldn't have access, but as far as I can
+			   tell that is unavoidable */
+
+			/* remove RONLY attribute and try again */
+			result = smb_set_rw(dentry,server);
+			if (result == 0) {
+				flag = 1;
+				req->rq_flags = 0;
+				goto retry;
+			}
+		}
+#endif
+		goto out_free;
+	}
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+int
+smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
+{
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBflush, 1, 0);
+	WSET(req->rq_header, smb_vwv0, fileid);
+	req->rq_flags |= SMB_REQ_NORETRY;
+	result = smb_request_ok(req, SMBflush, 0, 0);
+
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_trunc32(struct inode *inode, loff_t length)
+{
+	/*
+	 * Writing 0bytes is old-SMB magic for truncating files.
+	 * MAX_NON_LFS should prevent this from being called with a too
+	 * large offset.
+	 */
+	return smb_proc_write(inode, length, 0, NULL);
+}
+
+static int
+smb_proc_trunc64(struct inode *inode, loff_t length)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	int result;
+	char *param;
+	char *data;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 14)))
+		goto out;
+
+	param = req->rq_buffer;
+	data = req->rq_buffer + 6;
+
+	/* FIXME: must we also set allocation size? winNT seems to do that */
+	WSET(param, 0, SMB_I(inode)->fileid);
+	WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
+	WSET(param, 4, 0);
+	LSET(data, 0, length);
+
+	req->rq_trans2_command = TRANSACT2_SETFILEINFO;
+	req->rq_ldata = 8;
+	req->rq_data  = data;
+	req->rq_lparm = 6;
+	req->rq_parm  = param;
+	req->rq_flags |= SMB_REQ_NORETRY;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+
+	result = 0;
+	if (req->rq_rcls != 0)
+		result = smb_errno(req);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_trunc95(struct inode *inode, loff_t length)
+{
+	struct smb_sb_info *server = server_from_inode(inode);
+	int result = smb_proc_trunc32(inode, length);
+ 
+	/*
+	 * win9x doesn't appear to update the size immediately.
+	 * It will return the old file size after the truncate,
+	 * confusing smbfs. So we force an update.
+	 *
+	 * FIXME: is this still necessary?
+	 */
+	smb_proc_flush(server, SMB_I(inode)->fileid);
+	return result;
+}
+
+static void
+smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
+{
+	memset(fattr, 0, sizeof(*fattr));
+
+	fattr->f_nlink = 1;
+	fattr->f_uid = server->mnt->uid;
+	fattr->f_gid = server->mnt->gid;
+	fattr->f_unix = 0;
+}
+
+static void
+smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
+{
+	if (fattr->f_unix)
+		return;
+
+	fattr->f_mode = server->mnt->file_mode;
+	if (fattr->attr & aDIR) {
+		fattr->f_mode = server->mnt->dir_mode;
+		fattr->f_size = SMB_ST_BLKSIZE;
+	}
+	/* Check the read-only flag */
+	if (fattr->attr & aRONLY)
+		fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
+	/* How many 512 byte blocks do we need for this file? */
+	fattr->f_blocks = 0;
+	if (fattr->f_size != 0)
+		fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
+	return;
+}
+
+void
+smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
+		     struct super_block *sb)
+{
+	smb_init_dirent(server, fattr);
+	fattr->attr = aDIR;
+	fattr->f_ino = 2; /* traditional root inode number */
+	fattr->f_mtime = current_fs_time(sb);
+	smb_finish_dirent(server, fattr);
+}
+
+/*
+ * Decode a dirent for old protocols
+ *
+ * qname is filled with the decoded, and possibly translated, name.
+ * fattr receives decoded attributes
+ *
+ * Bugs Noted:
+ * (1) Pathworks servers may pad the name with extra spaces.
+ */
+static char *
+smb_decode_short_dirent(struct smb_sb_info *server, char *p,
+			struct qstr *qname, struct smb_fattr *fattr,
+			unsigned char *name_buf)
+{
+	int len;
+
+	/*
+	 * SMB doesn't have a concept of inode numbers ...
+	 */
+	smb_init_dirent(server, fattr);
+	fattr->f_ino = 0;	/* FIXME: do we need this? */
+
+	p += SMB_STATUS_SIZE;	/* reserved (search_status) */
+	fattr->attr = *p;
+	fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
+	fattr->f_mtime.tv_nsec = 0;
+	fattr->f_size = DVAL(p, 5);
+	fattr->f_ctime = fattr->f_mtime;
+	fattr->f_atime = fattr->f_mtime;
+	qname->name = p + 9;
+	len = strnlen(qname->name, 12);
+
+	/*
+	 * Trim trailing blanks for Pathworks servers
+	 */
+	while (len > 2 && qname->name[len-1] == ' ')
+		len--;
+
+	smb_finish_dirent(server, fattr);
+
+#if 0
+	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
+	   allow the flag to be set anyway. It kills const. Remove? */
+	switch (server->opt.case_handling) {
+	case SMB_CASE_UPPER:
+		str_upper(entry->name, len);
+		break;
+	case SMB_CASE_LOWER:
+		str_lower(entry->name, len);
+		break;
+	default:
+		break;
+	}
+#endif
+
+	qname->len = 0;
+	len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
+				   qname->name, len,
+				   server->remote_nls, server->local_nls);
+	if (len > 0) {
+		qname->len = len;
+		qname->name = name_buf;
+		DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
+	}
+
+	return p + 22;
+}
+
+/*
+ * This routine is used to read in directory entries from the network.
+ * Note that it is for short directory name seeks, i.e.: protocol <
+ * SMB_PROTOCOL_LANMAN2
+ */
+static int
+smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
+		       struct smb_cache_control *ctl)
+{
+	struct dentry *dir = filp->f_path.dentry;
+	struct smb_sb_info *server = server_from_dentry(dir);
+	struct qstr qname;
+	struct smb_fattr fattr;
+	char *p;
+	int result;
+	int i, first, entries_seen, entries;
+	int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
+	__u16 bcc;
+	__u16 count;
+	char status[SMB_STATUS_SIZE];
+	static struct qstr mask = {
+		.name	= "*.*",
+		.len	= 3,
+	};
+	unsigned char *last_status;
+	struct smb_request *req;
+	unsigned char *name_buf;
+
+	VERBOSE("%s/%s\n", DENTRY_PATH(dir));
+
+	lock_kernel();
+
+	result = -ENOMEM;
+	if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
+		goto out;
+
+	first = 1;
+	entries = 0;
+	entries_seen = 2; /* implicit . and .. */
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
+		goto out_name;
+
+	while (1) {
+		p = smb_setup_header(req, SMBsearch, 2, 0);
+		WSET(req->rq_header, smb_vwv0, entries_asked);
+		WSET(req->rq_header, smb_vwv1, aDIR);
+		if (first == 1) {
+			result = smb_simple_encode_path(req, &p, dir, &mask);
+			if (result < 0)
+				goto out_free;
+			if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
+				result = -ENAMETOOLONG;
+				goto out_free;
+			}
+			*p++ = 5;
+			WSET(p, 0, 0);
+			p += 2;
+			first = 0;
+		} else {
+			if (p + 5 + SMB_STATUS_SIZE >
+			    (char *)req->rq_buffer + req->rq_bufsize) {
+				result = -ENAMETOOLONG;
+				goto out_free;
+			}
+				
+			*p++ = 4;
+			*p++ = 0;
+			*p++ = 5;
+			WSET(p, 0, SMB_STATUS_SIZE);
+			p += 2;
+			memcpy(p, status, SMB_STATUS_SIZE);
+			p += SMB_STATUS_SIZE;
+		}
+
+		smb_setup_bcc(req, p);
+
+		result = smb_request_ok(req, SMBsearch, 1, -1);
+		if (result < 0) {
+			if ((req->rq_rcls == ERRDOS) && 
+			    (req->rq_err  == ERRnofiles))
+				break;
+			goto out_free;
+		}
+		count = WVAL(req->rq_header, smb_vwv0);
+		if (count <= 0)
+			break;
+
+		result = -EIO;
+		bcc = smb_bcc(req->rq_header);
+		if (bcc != count * SMB_DIRINFO_SIZE + 3)
+			goto out_free;
+		p = req->rq_buffer + 3;
+
+
+		/* Make sure the response fits in the buffer. Fixed sized 
+		   entries means we don't have to check in the decode loop. */
+
+		last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
+
+		if (last_status + SMB_DIRINFO_SIZE >=
+		    req->rq_buffer + req->rq_bufsize) {
+			printk(KERN_ERR "smb_proc_readdir_short: "
+			       "last dir entry outside buffer! "
+			       "%d@%p  %d@%p\n", SMB_DIRINFO_SIZE, last_status,
+			       req->rq_bufsize, req->rq_buffer);
+			goto out_free;
+		}
+
+		/* Read the last entry into the status field. */
+		memcpy(status, last_status, SMB_STATUS_SIZE);
+
+
+		/* Now we are ready to parse smb directory entries. */
+
+		for (i = 0; i < count; i++) {
+			p = smb_decode_short_dirent(server, p, 
+						    &qname, &fattr, name_buf);
+			if (qname.len == 0)
+				continue;
+
+			if (entries_seen == 2 && qname.name[0] == '.') {
+				if (qname.len == 1)
+					continue;
+				if (qname.name[1] == '.' && qname.len == 2)
+					continue;
+			}
+			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
+					    &qname, &fattr))
+				;	/* stop reading? */
+			entries_seen++;
+		}
+	}
+	result = entries;
+
+out_free:
+	smb_rput(req);
+out_name:
+	kfree(name_buf);
+out:
+	unlock_kernel();
+	return result;
+}
+
+static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
+{
+	u64 size, disk_bytes;
+
+	/* FIXME: verify nls support. all is sent as utf8? */
+
+	fattr->f_unix = 1;
+	fattr->f_mode = 0;
+
+	/* FIXME: use the uniqueID from the remote instead? */
+	/* 0 L file size in bytes */
+	/* 8 L file size on disk in bytes (block count) */
+	/* 40 L uid */
+	/* 48 L gid */
+	/* 56 W file type */
+	/* 60 L devmajor */
+	/* 68 L devminor */
+	/* 76 L unique ID (inode) */
+	/* 84 L permissions */
+	/* 92 L link count */
+
+	size = LVAL(p, 0);
+	disk_bytes = LVAL(p, 8);
+
+	/*
+	 * Some samba versions round up on-disk byte usage
+	 * to 1MB boundaries, making it useless. When seeing
+	 * that, use the size instead.
+	 */
+	if (!(disk_bytes & 0xfffff))
+		disk_bytes = size+511;
+
+	fattr->f_size = size;
+	fattr->f_blocks = disk_bytes >> 9;
+	fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
+	fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
+	fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
+
+	if (server->mnt->flags & SMB_MOUNT_UID)
+		fattr->f_uid = server->mnt->uid;
+	else
+		fattr->f_uid = LVAL(p, 40);
+
+	if (server->mnt->flags & SMB_MOUNT_GID)
+		fattr->f_gid = server->mnt->gid;
+	else
+		fattr->f_gid = LVAL(p, 48);
+
+	fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
+
+	if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
+		__u64 major = LVAL(p, 60);
+		__u64 minor = LVAL(p, 68);
+
+		fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
+		if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
+	    	MINOR(fattr->f_rdev) != (minor & 0xffffffff))
+			fattr->f_rdev = 0;
+	}
+
+	fattr->f_mode |= LVAL(p, 84);
+
+	if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
+	     (S_ISDIR(fattr->f_mode)) )
+		fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
+	else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
+	          !(S_ISDIR(fattr->f_mode)) )
+		fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
+				(fattr->f_mode & S_IFMT);
+
+}
+
+/*
+ * Interpret a long filename structure using the specified info level:
+ *   level 1 for anything below NT1 protocol
+ *   level 260 for NT1 protocol
+ *
+ * qname is filled with the decoded, and possibly translated, name
+ * fattr receives decoded attributes.
+ *
+ * Bugs Noted:
+ * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
+ */
+static char *
+smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
+		       struct qstr *qname, struct smb_fattr *fattr,
+		       unsigned char *name_buf)
+{
+	char *result;
+	unsigned int len = 0;
+	int n;
+	__u16 date, time;
+	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
+
+	/*
+	 * SMB doesn't have a concept of inode numbers ...
+	 */
+	smb_init_dirent(server, fattr);
+	fattr->f_ino = 0;	/* FIXME: do we need this? */
+
+	switch (level) {
+	case 1:
+		len = *((unsigned char *) p + 22);
+		qname->name = p + 23;
+		result = p + 24 + len;
+
+		date = WVAL(p, 0);
+		time = WVAL(p, 2);
+		fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
+		fattr->f_ctime.tv_nsec = 0;
+
+		date = WVAL(p, 4);
+		time = WVAL(p, 6);
+		fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
+		fattr->f_atime.tv_nsec = 0;
+
+		date = WVAL(p, 8);
+		time = WVAL(p, 10);
+		fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
+		fattr->f_mtime.tv_nsec = 0;
+		fattr->f_size = DVAL(p, 12);
+		/* ULONG allocation size */
+		fattr->attr = WVAL(p, 20);
+
+		VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
+			p, len, len, qname->name);
+		break;
+	case 260:
+		result = p + WVAL(p, 0);
+		len = DVAL(p, 60);
+		if (len > 255) len = 255;
+		/* NT4 null terminates, unless we are using unicode ... */
+		qname->name = p + 94;
+		if (!unicode && len && qname->name[len-1] == '\0')
+			len--;
+
+		fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
+		fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
+		fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
+		/* change time (32) */
+		fattr->f_size = LVAL(p, 40);
+		/* alloc size (48) */
+		fattr->attr = DVAL(p, 56);
+
+		VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
+			p, len, len, qname->name);
+		break;
+	case SMB_FIND_FILE_UNIX:
+		result = p + WVAL(p, 0);
+		qname->name = p + 108;
+
+		len = strlen(qname->name);
+		/* FIXME: should we check the length?? */
+
+		p += 8;
+		smb_decode_unix_basic(fattr, server, p);
+		VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
+			p, len, len, qname->name);
+		break;
+	default:
+		PARANOIA("Unknown info level %d\n", level);
+		result = p + WVAL(p, 0);
+		goto out;
+	}
+
+	smb_finish_dirent(server, fattr);
+
+#if 0
+	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
+	   allow the flag to be set anyway. Remove? */
+	switch (server->opt.case_handling) {
+	case SMB_CASE_UPPER:
+		str_upper(qname->name, len);
+		break;
+	case SMB_CASE_LOWER:
+		str_lower(qname->name, len);
+		break;
+	default:
+		break;
+	}
+#endif
+
+	qname->len = 0;
+	n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
+				 qname->name, len,
+				 server->remote_nls, server->local_nls);
+	if (n > 0) {
+		qname->len = n;
+		qname->name = name_buf;
+	}
+
+out:
+	return result;
+}
+
+/* findfirst/findnext flags */
+#define SMB_CLOSE_AFTER_FIRST (1<<0)
+#define SMB_CLOSE_IF_END (1<<1)
+#define SMB_REQUIRE_RESUME_KEY (1<<2)
+#define SMB_CONTINUE_BIT (1<<3)
+
+/*
+ * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
+ * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
+ * go there for advise.
+ *
+ * Bugs Noted:
+ * (1) When using Info Level 1 Win NT 4.0 truncates directory listings 
+ * for certain patterns of names and/or lengths. The breakage pattern
+ * is completely reproducible and can be toggled by the creation of a
+ * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
+ */
+static int
+smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
+		      struct smb_cache_control *ctl)
+{
+	struct dentry *dir = filp->f_path.dentry;
+	struct smb_sb_info *server = server_from_dentry(dir);
+	struct qstr qname;
+	struct smb_fattr fattr;
+
+	unsigned char *p, *lastname;
+	char *mask, *param;
+	__u16 command;
+	int first, entries_seen;
+
+	/* Both NT and OS/2 accept info level 1 (but see note below). */
+	int info_level = 260;
+	const int max_matches = 512;
+
+	unsigned int ff_searchcount = 0;
+	unsigned int ff_eos = 0;
+	unsigned int ff_lastname = 0;
+	unsigned int ff_dir_handle = 0;
+	unsigned int loop_count = 0;
+	unsigned int mask_len, i;
+	int result;
+	struct smb_request *req;
+	unsigned char *name_buf;
+	static struct qstr star = {
+		.name	= "*",
+		.len	= 1,
+	};
+
+	lock_kernel();
+
+	/*
+	 * We always prefer unix style. Use info level 1 for older
+	 * servers that don't do 260.
+	 */
+	if (server->opt.capabilities & SMB_CAP_UNIX)
+		info_level = SMB_FIND_FILE_UNIX;
+	else if (server->opt.protocol < SMB_PROTOCOL_NT1)
+		info_level = 1;
+
+	result = -ENOMEM;
+	if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
+		goto out;
+	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
+		goto out_name;
+	param = req->rq_buffer;
+
+	/*
+	 * Encode the initial path
+	 */
+	mask = param + 12;
+
+	result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
+	if (result <= 0)
+		goto out_free;
+	mask_len = result - 1;	/* mask_len is strlen, not #bytes */
+	result = 0;
+	first = 1;
+	VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
+
+	entries_seen = 2;
+	ff_eos = 0;
+
+	while (ff_eos == 0) {
+		loop_count += 1;
+		if (loop_count > 10) {
+			printk(KERN_WARNING "smb_proc_readdir_long: "
+			       "Looping in FIND_NEXT??\n");
+			result = -EIO;
+			break;
+		}
+
+		if (first != 0) {
+			command = TRANSACT2_FINDFIRST;
+			WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
+			WSET(param, 2, max_matches);	/* max count */
+			WSET(param, 4, SMB_CLOSE_IF_END);
+			WSET(param, 6, info_level);
+			DSET(param, 8, 0);
+		} else {
+			command = TRANSACT2_FINDNEXT;
+
+			VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
+				ff_dir_handle, ff_lastname, mask_len, mask);
+
+			WSET(param, 0, ff_dir_handle);	/* search handle */
+			WSET(param, 2, max_matches);	/* max count */
+			WSET(param, 4, info_level);
+			DSET(param, 6, 0);
+			WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
+		}
+
+		req->rq_trans2_command = command;
+		req->rq_ldata = 0;
+		req->rq_data  = NULL;
+		req->rq_lparm = 12 + mask_len + 1;
+		req->rq_parm  = param;
+		req->rq_flags = 0;
+		result = smb_add_request(req);
+		if (result < 0) {
+			PARANOIA("error=%d, breaking\n", result);
+			break;
+		}
+
+		if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
+			/* a damn Win95 bug - sometimes it clags if you 
+			   ask it too fast */
+			schedule_timeout_interruptible(msecs_to_jiffies(200));
+			continue;
+                }
+
+		if (req->rq_rcls != 0) {
+			result = smb_errno(req);
+			PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
+				 mask, result, req->rq_rcls, req->rq_err);
+			break;
+		}
+
+		/* parse out some important return info */
+		if (first != 0) {
+			ff_dir_handle = WVAL(req->rq_parm, 0);
+			ff_searchcount = WVAL(req->rq_parm, 2);
+			ff_eos = WVAL(req->rq_parm, 4);
+			ff_lastname = WVAL(req->rq_parm, 8);
+		} else {
+			ff_searchcount = WVAL(req->rq_parm, 0);
+			ff_eos = WVAL(req->rq_parm, 2);
+			ff_lastname = WVAL(req->rq_parm, 6);
+		}
+
+		if (ff_searchcount == 0)
+			break;
+
+		/* Now we are ready to parse smb directory entries. */
+
+		/* point to the data bytes */
+		p = req->rq_data;
+		for (i = 0; i < ff_searchcount; i++) {
+			/* make sure we stay within the buffer */
+			if (p >= req->rq_data + req->rq_ldata) {
+				printk(KERN_ERR "smb_proc_readdir_long: "
+				       "dirent pointer outside buffer! "
+				       "%p  %d@%p\n",
+				       p, req->rq_ldata, req->rq_data);
+				result = -EIO; /* always a comm. error? */
+				goto out_free;
+			}
+
+			p = smb_decode_long_dirent(server, p, info_level,
+						   &qname, &fattr, name_buf);
+
+			/* ignore . and .. from the server */
+			if (entries_seen == 2 && qname.name[0] == '.') {
+				if (qname.len == 1)
+					continue;
+				if (qname.name[1] == '.' && qname.len == 2)
+					continue;
+			}
+
+			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
+					    &qname, &fattr))
+				;	/* stop reading? */
+			entries_seen++;
+		}
+
+		VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
+
+		/*
+		 * We might need the lastname for continuations.
+		 *
+		 * Note that some servers (win95?) point to the filename and
+		 * others (NT4, Samba using NT1) to the dir entry. We assume
+		 * here that those who do not point to a filename do not need
+		 * this info to continue the listing.
+		 *
+		 * OS/2 needs this and talks infolevel 1.
+		 * NetApps want lastname with infolevel 260.
+		 * win2k want lastname with infolevel 260, and points to
+		 *       the record not to the name.
+		 * Samba+CifsUnixExt doesn't need lastname.
+		 *
+		 * Both are happy if we return the data they point to. So we do.
+		 * (FIXME: above is not true with win2k)
+		 */
+		mask_len = 0;
+		if (info_level != SMB_FIND_FILE_UNIX &&
+		    ff_lastname > 0 && ff_lastname < req->rq_ldata) {
+			lastname = req->rq_data + ff_lastname;
+
+			switch (info_level) {
+			case 260:
+				mask_len = req->rq_ldata - ff_lastname;
+				break;
+			case 1:
+				/* lastname points to a length byte */
+				mask_len = *lastname++;
+				if (ff_lastname + 1 + mask_len > req->rq_ldata)
+					mask_len = req->rq_ldata - ff_lastname - 1;
+				break;
+			}
+
+			/*
+			 * Update the mask string for the next message.
+			 */
+			if (mask_len > 255)
+				mask_len = 255;
+			if (mask_len)
+				strncpy(mask, lastname, mask_len);
+		}
+		mask_len = strnlen(mask, mask_len);
+		VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
+			mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
+
+		first = 0;
+		loop_count = 0;
+	}
+
+out_free:
+	smb_rput(req);
+out_name:
+	kfree(name_buf);
+out:
+	unlock_kernel();
+	return result;
+}
+
+/*
+ * This version uses the trans2 TRANSACT2_FINDFIRST message 
+ * to get the attribute data.
+ *
+ * Bugs Noted:
+ */
+static int
+smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
+			struct smb_fattr *fattr)
+{
+	char *param, *mask;
+	__u16 date, time;
+	int mask_len, result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+	mask = param + 12;
+
+	mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
+	if (mask_len < 0) {
+		result = mask_len;
+		goto out_free;
+	}
+	VERBOSE("name=%s, len=%d\n", mask, mask_len);
+	WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
+	WSET(param, 2, 1);	/* max count */
+	WSET(param, 4, 1);	/* close after this call */
+	WSET(param, 6, 1);	/* info_level */
+	DSET(param, 8, 0);
+
+	req->rq_trans2_command = TRANSACT2_FINDFIRST;
+	req->rq_ldata = 0;
+	req->rq_data  = NULL;
+	req->rq_lparm = 12 + mask_len;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+	if (req->rq_rcls != 0) {
+		result = smb_errno(req);
+#ifdef SMBFS_PARANOIA
+		if (result != -ENOENT)
+			PARANOIA("error for %s, rcls=%d, err=%d\n",
+				 mask, req->rq_rcls, req->rq_err);
+#endif
+		goto out_free;
+	}
+	/* Make sure we got enough data ... */
+	result = -EINVAL;
+	if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
+		PARANOIA("bad result for %s, len=%d, count=%d\n",
+			 mask, req->rq_ldata, WVAL(req->rq_parm, 2));
+		goto out_free;
+	}
+
+	/*
+	 * Decode the response into the fattr ...
+	 */
+	date = WVAL(req->rq_data, 0);
+	time = WVAL(req->rq_data, 2);
+	fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
+	fattr->f_ctime.tv_nsec = 0;
+
+	date = WVAL(req->rq_data, 4);
+	time = WVAL(req->rq_data, 6);
+	fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
+	fattr->f_atime.tv_nsec = 0;
+
+	date = WVAL(req->rq_data, 8);
+	time = WVAL(req->rq_data, 10);
+	fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
+	fattr->f_mtime.tv_nsec = 0;
+	VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
+		mask, date, time, fattr->f_mtime.tv_sec);
+	fattr->f_size = DVAL(req->rq_data, 12);
+	/* ULONG allocation size */
+	fattr->attr = WVAL(req->rq_data, 20);
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
+		      struct smb_fattr *fattr)
+{
+	int result;
+	char *p;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	p = smb_setup_header(req, SMBgetatr, 0, 0);
+	result = smb_simple_encode_path(req, &p, dir, NULL);
+	if (result < 0)
+ 		goto out_free;
+	smb_setup_bcc(req, p);
+
+	if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
+		goto out_free;
+	fattr->attr    = WVAL(req->rq_header, smb_vwv0);
+	fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
+	fattr->f_mtime.tv_nsec = 0;
+	fattr->f_size  = DVAL(req->rq_header, smb_vwv3);
+	fattr->f_ctime = fattr->f_mtime; 
+	fattr->f_atime = fattr->f_mtime; 
+#ifdef SMBFS_DEBUG_TIMESTAMP
+	printk("getattr_core: %s/%s, mtime=%ld\n",
+	       DENTRY_PATH(dir), fattr->f_mtime);
+#endif
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Bugs Noted:
+ * (1) Win 95 swaps the date and time fields in the standard info level.
+ */
+static int
+smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
+			struct smb_request *req, int infolevel)
+{
+	char *p, *param;
+	int result;
+
+	param = req->rq_buffer;
+	WSET(param, 0, infolevel);
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
+	if (result < 0)
+		goto out;
+	p = param + 6 + result;
+
+	req->rq_trans2_command = TRANSACT2_QPATHINFO;
+	req->rq_ldata = 0;
+	req->rq_data  = NULL;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out;
+	if (req->rq_rcls != 0) {
+		VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
+			&param[6], result, req->rq_rcls, req->rq_err);
+		result = smb_errno(req);
+		goto out;
+	}
+	result = -ENOENT;
+	if (req->rq_ldata < 22) {
+		PARANOIA("not enough data for %s, len=%d\n",
+			 &param[6], req->rq_ldata);
+		goto out;
+	}
+
+	result = 0;
+out:
+	return result;
+}
+
+static int
+smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
+			    struct smb_fattr *attr)
+{
+	u16 date, time;
+	int off_date = 0, off_time = 2;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
+	if (result < 0)
+		goto out_free;
+
+	/*
+	 * Kludge alert: Win 95 swaps the date and time field,
+	 * contrary to the CIFS docs and Win NT practice.
+	 */
+	if (server->mnt->flags & SMB_MOUNT_WIN95) {
+		off_date = 2;
+		off_time = 0;
+	}
+	date = WVAL(req->rq_data, off_date);
+	time = WVAL(req->rq_data, off_time);
+	attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
+	attr->f_ctime.tv_nsec = 0;
+
+	date = WVAL(req->rq_data, 4 + off_date);
+	time = WVAL(req->rq_data, 4 + off_time);
+	attr->f_atime.tv_sec = date_dos2unix(server, date, time);
+	attr->f_atime.tv_nsec = 0;
+
+	date = WVAL(req->rq_data, 8 + off_date);
+	time = WVAL(req->rq_data, 8 + off_time);
+	attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
+	attr->f_mtime.tv_nsec = 0;
+#ifdef SMBFS_DEBUG_TIMESTAMP
+	printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
+	       DENTRY_PATH(dir), date, time, attr->f_mtime);
+#endif
+	attr->f_size = DVAL(req->rq_data, 12);
+	attr->attr = WVAL(req->rq_data, 20);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
+			    struct smb_fattr *attr)
+{
+	struct smb_request *req;
+	int result;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	result = smb_proc_getattr_trans2(server, dir, req,
+					 SMB_QUERY_FILE_ALL_INFO);
+	if (result < 0)
+		goto out_free;
+
+	attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
+	attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
+	attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
+	/* change (24) */
+	attr->attr = WVAL(req->rq_data, 32);
+	/* pad? (34) */
+	/* allocated size (40) */
+	attr->f_size = LVAL(req->rq_data, 48);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
+		      struct smb_fattr *attr)
+{
+	struct smb_request *req;
+	int result;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	result = smb_proc_getattr_trans2(server, dir, req,
+					 SMB_QUERY_FILE_UNIX_BASIC);
+	if (result < 0)
+		goto out_free;
+
+	smb_decode_unix_basic(attr, server, req->rq_data);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
+		    struct smb_fattr *attr)
+{
+	struct inode *inode = dir->d_inode;
+	int result;
+
+	/* FIXME: why not use the "all" version? */
+	result = smb_proc_getattr_trans2_std(server, dir, attr);
+	if (result < 0)
+		goto out;
+
+	/*
+	 * None of the getattr versions here can make win9x return the right
+	 * filesize if there are changes made to an open file.
+	 * A seek-to-end does return the right size, but we only need to do
+	 * that on files we have written.
+	 */
+	if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
+	    smb_is_open(inode))
+	{
+		__u16 fileid = SMB_I(inode)->fileid;
+		attr->f_size = smb_proc_seek(server, fileid, 2, 0);
+	}
+
+out:
+	return result;
+}
+
+static int
+smb_proc_ops_wait(struct smb_sb_info *server)
+{
+	int result;
+
+	result = wait_event_interruptible_timeout(server->conn_wq,
+				server->conn_complete, 30*HZ);
+
+	if (!result || signal_pending(current))
+		return -EIO;
+
+	return 0;
+}
+
+static int
+smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
+			  struct smb_fattr *fattr)
+{
+	int result;
+
+	if (smb_proc_ops_wait(server) < 0)
+		return -EIO;
+
+	smb_init_dirent(server, fattr);
+	result = server->ops->getattr(server, dir, fattr);
+	smb_finish_dirent(server, fattr);
+
+	return result;
+}
+
+static int
+smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
+		      struct smb_cache_control *ctl)
+{
+	struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
+
+	if (smb_proc_ops_wait(server) < 0)
+		return -EIO;
+
+	return server->ops->readdir(filp, dirent, filldir, ctl);
+}
+
+int
+smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
+{
+	struct smb_sb_info *server = server_from_dentry(dir);
+	int result;
+
+	smb_init_dirent(server, fattr);
+	result = server->ops->getattr(server, dir, fattr);
+	smb_finish_dirent(server, fattr);
+
+	return result;
+}
+
+
+/*
+ * Because of bugs in the core protocol, we use this only to set
+ * attributes. See smb_proc_settime() below for timestamp handling.
+ *
+ * Bugs Noted:
+ * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
+ * with an undocumented error (ERRDOS code 50). Setting
+ * mtime to 0 allows the attributes to be set.
+ * (2) The extra parameters following the name string aren't
+ * in the CIFS docs, but seem to be necessary for operation.
+ */
+static int
+smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
+		      __u16 attr)
+{
+	char *p;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+
+	p = smb_setup_header(req, SMBsetatr, 8, 0);
+	WSET(req->rq_header, smb_vwv0, attr);
+	DSET(req->rq_header, smb_vwv1, 0); /* mtime */
+	WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
+	WSET(req->rq_header, smb_vwv4, 0);
+	WSET(req->rq_header, smb_vwv5, 0);
+	WSET(req->rq_header, smb_vwv6, 0);
+	WSET(req->rq_header, smb_vwv7, 0);
+	result = smb_simple_encode_path(req, &p, dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
+		result = -ENAMETOOLONG;
+		goto out_free;
+	}
+	*p++ = 4;
+	*p++ = 0;
+	smb_setup_bcc(req, p);
+
+	result = smb_request_ok(req, SMBsetatr, 0, 0);
+	if (result < 0)
+		goto out_free;
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Because of bugs in the trans2 setattr messages, we must set
+ * attributes and timestamps separately. The core SMBsetatr
+ * message seems to be the only reliable way to set attributes.
+ */
+int
+smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
+{
+	struct smb_sb_info *server = server_from_dentry(dir);
+	int result;
+
+	VERBOSE("setting %s/%s, open=%d\n", 
+		DENTRY_PATH(dir), smb_is_open(dir->d_inode));
+	result = smb_proc_setattr_core(server, dir, fattr->attr);
+	return result;
+}
+
+/*
+ * Sets the timestamps for an file open with write permissions.
+ */
+static int
+smb_proc_setattr_ext(struct smb_sb_info *server,
+		      struct inode *inode, struct smb_fattr *fattr)
+{
+	__u16 date, time;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBsetattrE, 7, 0);
+	WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
+	/* We don't change the creation time */
+	WSET(req->rq_header, smb_vwv1, 0);
+	WSET(req->rq_header, smb_vwv2, 0);
+	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
+	WSET(req->rq_header, smb_vwv3, date);
+	WSET(req->rq_header, smb_vwv4, time);
+	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
+	WSET(req->rq_header, smb_vwv5, date);
+	WSET(req->rq_header, smb_vwv6, time);
+#ifdef SMBFS_DEBUG_TIMESTAMP
+	printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
+	       date, time, fattr->f_mtime);
+#endif
+
+	req->rq_flags |= SMB_REQ_NORETRY;
+	result = smb_request_ok(req, SMBsetattrE, 0, 0);
+	if (result < 0)
+		goto out_free;
+	result = 0;
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Bugs Noted:
+ * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
+ * set the file's attribute flags.
+ */
+static int
+smb_proc_setattr_trans2(struct smb_sb_info *server,
+			struct dentry *dir, struct smb_fattr *fattr)
+{
+	__u16 date, time;
+	char *p, *param;
+	int result;
+	char data[26];
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+
+	WSET(param, 0, 1);	/* Info level SMB_INFO_STANDARD */
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
+	if (result < 0)
+		goto out_free;
+	p = param + 6 + result;
+
+	WSET(data, 0, 0); /* creation time */
+	WSET(data, 2, 0);
+	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
+	WSET(data, 4, date);
+	WSET(data, 6, time);
+	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
+	WSET(data, 8, date);
+	WSET(data, 10, time);
+#ifdef SMBFS_DEBUG_TIMESTAMP
+	printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n", 
+	       DENTRY_PATH(dir), date, time, fattr->f_mtime);
+#endif
+	DSET(data, 12, 0); /* size */
+	DSET(data, 16, 0); /* blksize */
+	WSET(data, 20, 0); /* attr */
+	DSET(data, 22, 0); /* ULONG EA size */
+
+	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
+	req->rq_ldata = 26;
+	req->rq_data  = data;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+	result = 0;
+	if (req->rq_rcls != 0)
+		result = smb_errno(req);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * ATTR_MODE      0x001
+ * ATTR_UID       0x002
+ * ATTR_GID       0x004
+ * ATTR_SIZE      0x008
+ * ATTR_ATIME     0x010
+ * ATTR_MTIME     0x020
+ * ATTR_CTIME     0x040
+ * ATTR_ATIME_SET 0x080
+ * ATTR_MTIME_SET 0x100
+ * ATTR_FORCE     0x200	
+ * ATTR_ATTR_FLAG 0x400
+ *
+ * major/minor should only be set by mknod.
+ */
+int
+smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
+		      unsigned int major, unsigned int minor)
+{
+	struct smb_sb_info *server = server_from_dentry(d);
+	u64 nttime;
+	char *p, *param;
+	int result;
+	char data[100];
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+
+	DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
+
+	WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
+	if (result < 0)
+		goto out_free;
+	p = param + 6 + result;
+
+	/* 0 L file size in bytes */
+	/* 8 L file size on disk in bytes (block count) */
+	/* 40 L uid */
+	/* 48 L gid */
+	/* 56 W file type enum */
+	/* 60 L devmajor */
+	/* 68 L devminor */
+	/* 76 L unique ID (inode) */
+	/* 84 L permissions */
+	/* 92 L link count */
+	LSET(data, 0, SMB_SIZE_NO_CHANGE);
+	LSET(data, 8, SMB_SIZE_NO_CHANGE);
+	LSET(data, 16, SMB_TIME_NO_CHANGE);
+	LSET(data, 24, SMB_TIME_NO_CHANGE);
+	LSET(data, 32, SMB_TIME_NO_CHANGE);
+	LSET(data, 40, SMB_UID_NO_CHANGE);
+	LSET(data, 48, SMB_GID_NO_CHANGE);
+	DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
+	LSET(data, 60, major);
+	LSET(data, 68, minor);
+	LSET(data, 76, 0);
+	LSET(data, 84, SMB_MODE_NO_CHANGE);
+	LSET(data, 92, 0);
+
+	if (attr->ia_valid & ATTR_SIZE) {
+		LSET(data, 0, attr->ia_size);
+		LSET(data, 8, 0); /* can't set anyway */
+	}
+
+	/*
+	 * FIXME: check the conversion function it the correct one
+	 *
+	 * we can't set ctime but we might as well pass this to the server
+	 * and let it ignore it.
+	 */
+	if (attr->ia_valid & ATTR_CTIME) {
+		nttime = smb_unixutc2ntutc(attr->ia_ctime);
+		LSET(data, 16, nttime);
+	}
+	if (attr->ia_valid & ATTR_ATIME) {
+		nttime = smb_unixutc2ntutc(attr->ia_atime);
+		LSET(data, 24, nttime);
+	}
+	if (attr->ia_valid & ATTR_MTIME) {
+		nttime = smb_unixutc2ntutc(attr->ia_mtime);
+		LSET(data, 32, nttime);
+	}
+	
+	if (attr->ia_valid & ATTR_UID) {
+		LSET(data, 40, attr->ia_uid);
+	}
+	if (attr->ia_valid & ATTR_GID) {
+		LSET(data, 48, attr->ia_gid); 
+	}
+	
+	if (attr->ia_valid & ATTR_MODE) {
+		LSET(data, 84, attr->ia_mode);
+	}
+
+	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
+	req->rq_ldata = 100;
+	req->rq_data  = data;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+
+/*
+ * Set the modify and access timestamps for a file.
+ *
+ * Incredibly enough, in all of SMB there is no message to allow
+ * setting both attributes and timestamps at once. 
+ *
+ * Bugs Noted:
+ * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message 
+ * with info level 1 (INFO_STANDARD).
+ * (2) Win 95 seems not to support setting directory timestamps.
+ * (3) Under the core protocol apparently the only way to set the
+ * timestamp is to open and close the file.
+ */
+int
+smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
+{
+	struct smb_sb_info *server = server_from_dentry(dentry);
+	struct inode *inode = dentry->d_inode;
+	int result;
+
+	VERBOSE("setting %s/%s, open=%d\n",
+		DENTRY_PATH(dentry), smb_is_open(inode));
+
+	/* setting the time on a Win95 server fails (tridge) */
+	if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 && 
+	    !(server->mnt->flags & SMB_MOUNT_WIN95)) {
+		if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
+			result = smb_proc_setattr_ext(server, inode, fattr);
+		else
+			result = smb_proc_setattr_trans2(server, dentry, fattr);
+	} else {
+		/*
+		 * Fail silently on directories ... timestamp can't be set?
+		 */
+		result = 0;
+		if (S_ISREG(inode->i_mode)) {
+			/*
+			 * Set the mtime by opening and closing the file.
+			 * Note that the file is opened read-only, but this
+			 * still allows us to set the date (tridge)
+			 */
+			result = -EACCES;
+			if (!smb_is_open(inode))
+				smb_proc_open(server, dentry, SMB_O_RDONLY);
+			if (smb_is_open(inode)) {
+				inode->i_mtime = fattr->f_mtime;
+				result = smb_proc_close_inode(server, inode);
+			}
+		}
+	}
+
+	return result;
+}
+
+int
+smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
+{
+	struct smb_sb_info *server = SMB_SB(dentry->d_sb);
+	int result;
+	char *p;
+	long unit;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 0)))
+		goto out;
+
+	smb_setup_header(req, SMBdskattr, 0, 0);
+	if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
+		goto out_free;
+	p = SMB_VWV(req->rq_header);
+	unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
+	attr->f_blocks = WVAL(p, 0) * unit;
+	attr->f_bsize  = SMB_ST_BLKSIZE;
+	attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+int
+smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
+		   char *buffer, int len)
+{
+	char *p, *param;
+	int result;
+	struct smb_request *req;
+
+	DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+
+	WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
+	if (result < 0)
+		goto out_free;
+	p = param + 6 + result;
+
+	req->rq_trans2_command = TRANSACT2_QPATHINFO;
+	req->rq_ldata = 0;
+	req->rq_data  = NULL;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
+		&param[6], result, req->rq_rcls, req->rq_err);
+
+	/* copy data up to the \0 or buffer length */
+	result = len;
+	if (req->rq_ldata < len)
+		result = req->rq_ldata;
+	strncpy(buffer, req->rq_data, result);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+
+/*
+ * Create a symlink object called dentry which points to oldpath.
+ * Samba does not permit dangling links but returns a suitable error message.
+ */
+int
+smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
+		 const char *oldpath)
+{
+	char *p, *param;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+
+	WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
+	if (result < 0)
+		goto out_free;
+	p = param + 6 + result;
+
+	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
+	req->rq_ldata = strlen(oldpath) + 1;
+	req->rq_data  = (char *) oldpath;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+
+	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
+		&param[6], result, req->rq_rcls, req->rq_err);
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+/*
+ * Create a hard link object called new_dentry which points to dentry.
+ */
+int
+smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
+	      struct dentry *new_dentry)
+{
+	char *p, *param;
+	int result;
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
+		goto out;
+	param = req->rq_buffer;
+
+	WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
+	DSET(param, 2, 0);
+	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
+				 new_dentry, NULL);
+	if (result < 0)
+		goto out_free;
+	p = param + 6 + result;
+
+	/* Grr, pointless separation of parameters and data ... */
+	req->rq_data = p;
+	req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
+					dentry, NULL);
+
+	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
+	req->rq_lparm = p - param;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+
+	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
+	       &param[6], result, req->rq_rcls, req->rq_err);
+	result = 0;
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+static int
+smb_proc_query_cifsunix(struct smb_sb_info *server)
+{
+	int result;
+	int major, minor;
+	u64 caps;
+	char param[2];
+	struct smb_request *req;
+
+	result = -ENOMEM;
+	if (! (req = smb_alloc_request(server, 100)))
+		goto out;
+
+	WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
+
+	req->rq_trans2_command = TRANSACT2_QFSINFO;
+	req->rq_ldata = 0;
+	req->rq_data  = NULL;
+	req->rq_lparm = 2;
+	req->rq_parm  = param;
+	req->rq_flags = 0;
+	result = smb_add_request(req);
+	if (result < 0)
+		goto out_free;
+
+	if (req->rq_ldata < 12) {
+		PARANOIA("Not enough data\n");
+		goto out_free;
+	}
+	major = WVAL(req->rq_data, 0);
+	minor = WVAL(req->rq_data, 2);
+
+	DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
+	       major, minor);
+	/* FIXME: verify that we are ok with this major/minor? */
+
+	caps = LVAL(req->rq_data, 4);
+	DEBUG1("Server capabilities 0x%016llx\n", caps);
+
+out_free:
+	smb_rput(req);
+out:
+	return result;
+}
+
+
+static void
+install_ops(struct smb_ops *dst, struct smb_ops *src)
+{
+	memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
+}
+
+/* < LANMAN2 */
+static struct smb_ops smb_ops_core =
+{
+	.read		= smb_proc_read,
+	.write		= smb_proc_write,
+	.readdir	= smb_proc_readdir_short,
+	.getattr	= smb_proc_getattr_core,
+	.truncate	= smb_proc_trunc32,
+};
+
+/* LANMAN2, OS/2, others? */
+static struct smb_ops smb_ops_os2 =
+{
+	.read		= smb_proc_read,
+	.write		= smb_proc_write,
+	.readdir	= smb_proc_readdir_long,
+	.getattr	= smb_proc_getattr_trans2_std,
+	.truncate	= smb_proc_trunc32,
+};
+
+/* Win95, and possibly some NetApp versions too */
+static struct smb_ops smb_ops_win95 =
+{
+	.read		= smb_proc_read,    /* does not support 12word readX */
+	.write		= smb_proc_write,
+	.readdir	= smb_proc_readdir_long,
+	.getattr	= smb_proc_getattr_95,
+	.truncate	= smb_proc_trunc95,
+};
+
+/* Samba, NT4 and NT5 */
+static struct smb_ops smb_ops_winNT =
+{
+	.read		= smb_proc_readX,
+	.write		= smb_proc_writeX,
+	.readdir	= smb_proc_readdir_long,
+	.getattr	= smb_proc_getattr_trans2_all,
+	.truncate	= smb_proc_trunc64,
+};
+
+/* Samba w/ unix extensions. Others? */
+static struct smb_ops smb_ops_unix =
+{
+	.read		= smb_proc_readX,
+	.write		= smb_proc_writeX,
+	.readdir	= smb_proc_readdir_long,
+	.getattr	= smb_proc_getattr_unix,
+	/* FIXME: core/ext/time setattr needs to be cleaned up! */
+	/* .setattr	= smb_proc_setattr_unix, */
+	.truncate	= smb_proc_trunc64,
+};
+
+/* Place holder until real ops are in place */
+static struct smb_ops smb_ops_null =
+{
+	.readdir	= smb_proc_readdir_null,
+	.getattr	= smb_proc_getattr_null,
+};
+
+void smb_install_null_ops(struct smb_ops *ops)
+{
+	install_ops(ops, &smb_ops_null);
+}
diff --git a/drivers/staging/smbfs/proto.h b/drivers/staging/smbfs/proto.h
new file mode 100644
index 000000000000..05939a6f43e6
--- /dev/null
+++ b/drivers/staging/smbfs/proto.h
@@ -0,0 +1,87 @@
+/*
+ *  Autogenerated with cproto on:  Sat Sep 13 17:18:51 CEST 2003
+ */
+
+struct smb_request;
+struct sock;
+struct statfs;
+
+/* proc.c */
+extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
+extern __u32 smb_len(__u8 *p);
+extern int smb_get_rsize(struct smb_sb_info *server);
+extern int smb_get_wsize(struct smb_sb_info *server);
+extern int smb_errno(struct smb_request *req);
+extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
+extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
+extern int smb_open(struct dentry *dentry, int wish);
+extern int smb_close(struct inode *ino);
+extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
+extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
+extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
+extern int smb_proc_mkdir(struct dentry *dentry);
+extern int smb_proc_rmdir(struct dentry *dentry);
+extern int smb_proc_unlink(struct dentry *dentry);
+extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
+extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
+				 struct super_block *sb);
+extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
+extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
+extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
+extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
+extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
+extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
+extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
+extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
+extern void smb_install_null_ops(struct smb_ops *ops);
+/* dir.c */
+extern const struct file_operations smb_dir_operations;
+extern const struct inode_operations smb_dir_inode_operations;
+extern const struct inode_operations smb_dir_inode_operations_unix;
+extern void smb_new_dentry(struct dentry *dentry);
+extern void smb_renew_times(struct dentry *dentry);
+/* cache.c */
+extern void smb_invalid_dir_cache(struct inode *dir);
+extern void smb_invalidate_dircache_entries(struct dentry *parent);
+extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
+extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
+/* sock.c */
+extern void smb_data_ready(struct sock *sk, int len);
+extern int smb_valid_socket(struct inode *inode);
+extern void smb_close_socket(struct smb_sb_info *server);
+extern int smb_recv_available(struct smb_sb_info *server);
+extern int smb_receive_header(struct smb_sb_info *server);
+extern int smb_receive_drop(struct smb_sb_info *server);
+extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
+extern int smb_send_request(struct smb_request *req);
+/* inode.c */
+extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
+extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
+extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
+extern void smb_invalidate_inodes(struct smb_sb_info *server);
+extern int smb_revalidate_inode(struct dentry *dentry);
+extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
+/* file.c */
+extern const struct address_space_operations smb_file_aops;
+extern const struct file_operations smb_file_operations;
+extern const struct inode_operations smb_file_inode_operations;
+/* ioctl.c */
+extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+/* smbiod.c */
+extern void smbiod_wake_up(void);
+extern int smbiod_register_server(struct smb_sb_info *server);
+extern void smbiod_unregister_server(struct smb_sb_info *server);
+extern void smbiod_flush(struct smb_sb_info *server);
+extern int smbiod_retry(struct smb_sb_info *server);
+/* request.c */
+extern int smb_init_request_cache(void);
+extern void smb_destroy_request_cache(void);
+extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
+extern void smb_rput(struct smb_request *req);
+extern int smb_add_request(struct smb_request *req);
+extern int smb_request_send_server(struct smb_sb_info *server);
+extern int smb_request_recv(struct smb_sb_info *server);
+/* symlink.c */
+extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
+extern const struct inode_operations smb_link_inode_operations;
diff --git a/drivers/staging/smbfs/request.c b/drivers/staging/smbfs/request.c
new file mode 100644
index 000000000000..3e7716864306
--- /dev/null
+++ b/drivers/staging/smbfs/request.c
@@ -0,0 +1,817 @@
+/*
+ *  request.c
+ *
+ *  Copyright (C) 2001 by Urban Widmark
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+
+#include "smb_fs.h"
+#include "smbno.h"
+#include "smb_mount.h"
+#include "smb_debug.h"
+#include "request.h"
+#include "proto.h"
+
+/* #define SMB_SLAB_DEBUG	(SLAB_RED_ZONE | SLAB_POISON) */
+#define SMB_SLAB_DEBUG	0
+
+/* cache for request structures */
+static struct kmem_cache *req_cachep;
+
+static int smb_request_send_req(struct smb_request *req);
+
+/*
+  /proc/slabinfo:
+  name, active, num, objsize, active_slabs, num_slaps, #pages
+*/
+
+
+int smb_init_request_cache(void)
+{
+	req_cachep = kmem_cache_create("smb_request",
+				       sizeof(struct smb_request), 0,
+				       SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
+				       NULL);
+	if (req_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void smb_destroy_request_cache(void)
+{
+	kmem_cache_destroy(req_cachep);
+}
+
+/*
+ * Allocate and initialise a request structure
+ */
+static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
+						int bufsize)
+{
+	struct smb_request *req;
+	unsigned char *buf = NULL;
+
+	req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
+	VERBOSE("allocating request: %p\n", req);
+	if (!req)
+		goto out;
+
+	if (bufsize > 0) {
+		buf = kmalloc(bufsize, GFP_NOFS);
+		if (!buf) {
+			kmem_cache_free(req_cachep, req);
+			return NULL;
+		}
+	}
+
+	req->rq_buffer = buf;
+	req->rq_bufsize = bufsize;
+	req->rq_server = server;
+	init_waitqueue_head(&req->rq_wait);
+	INIT_LIST_HEAD(&req->rq_queue);
+	atomic_set(&req->rq_count, 1);
+
+out:
+	return req;
+}
+
+struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
+{
+	struct smb_request *req = NULL;
+
+	for (;;) {
+		atomic_inc(&server->nr_requests);
+		if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
+			req = smb_do_alloc_request(server, bufsize);
+			if (req != NULL)
+				break;
+		}
+
+#if 0
+		/*
+		 * Try to free up at least one request in order to stay
+		 * below the hard limit
+		 */
+                if (nfs_try_to_free_pages(server))
+			continue;
+
+		if (fatal_signal_pending(current))
+			return ERR_PTR(-ERESTARTSYS);
+		current->policy = SCHED_YIELD;
+		schedule();
+#else
+		/* FIXME: we want something like nfs does above, but that
+		   requires changes to all callers and can wait. */
+		break;
+#endif
+	}
+	return req;
+}
+
+static void smb_free_request(struct smb_request *req)
+{
+	atomic_dec(&req->rq_server->nr_requests);
+	if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
+		kfree(req->rq_buffer);
+	kfree(req->rq_trans2buffer);
+	kmem_cache_free(req_cachep, req);
+}
+
+/*
+ * What prevents a rget to race with a rput? The count must never drop to zero
+ * while it is in use. Only rput if it is ok that it is free'd.
+ */
+static void smb_rget(struct smb_request *req)
+{
+	atomic_inc(&req->rq_count);
+}
+void smb_rput(struct smb_request *req)
+{
+	if (atomic_dec_and_test(&req->rq_count)) {
+		list_del_init(&req->rq_queue);
+		smb_free_request(req);
+	}
+}
+
+/* setup to receive the data part of the SMB */
+static int smb_setup_bcc(struct smb_request *req)
+{
+	int result = 0;
+	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
+
+	if (req->rq_rlen > req->rq_bufsize) {
+		PARANOIA("Packet too large %d > %d\n",
+			 req->rq_rlen, req->rq_bufsize);
+		return -ENOBUFS;
+	}
+
+	req->rq_iov[0].iov_base = req->rq_buffer;
+	req->rq_iov[0].iov_len  = req->rq_rlen;
+	req->rq_iovlen = 1;
+
+	return result;
+}
+
+/*
+ * Prepare a "normal" request structure.
+ */
+static int smb_setup_request(struct smb_request *req)
+{
+	int len = smb_len(req->rq_header) + 4;
+	req->rq_slen = len;
+
+	/* if we expect a data part in the reply we set the iov's to read it */
+	if (req->rq_resp_bcc)
+		req->rq_setup_read = smb_setup_bcc;
+
+	/* This tries to support re-using the same request */
+	req->rq_bytes_sent = 0;
+	req->rq_rcls = 0;
+	req->rq_err = 0;
+	req->rq_errno = 0;
+	req->rq_fragment = 0;
+	kfree(req->rq_trans2buffer);
+	req->rq_trans2buffer = NULL;
+
+	return 0;
+}
+
+/*
+ * Prepare a transaction2 request structure
+ */
+static int smb_setup_trans2request(struct smb_request *req)
+{
+	struct smb_sb_info *server = req->rq_server;
+	int mparam, mdata;
+	static unsigned char padding[4];
+
+	/* I know the following is very ugly, but I want to build the
+	   smb packet as efficiently as possible. */
+
+	const int smb_parameters = 15;
+	const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
+	const int oparam = ALIGN(header + 3, sizeof(u32));
+	const int odata  = ALIGN(oparam + req->rq_lparm, sizeof(u32));
+	const int bcc = (req->rq_data ? odata + req->rq_ldata :
+					oparam + req->rq_lparm) - header;
+
+	if ((bcc + oparam) > server->opt.max_xmit)
+		return -ENOMEM;
+	smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
+
+	/*
+	 * max parameters + max data + max setup == bufsize to make NT4 happy
+	 * and not abort the transfer or split into multiple responses. It also
+	 * makes smbfs happy as handling packets larger than the buffer size
+	 * is extra work.
+	 *
+	 * OS/2 is probably going to hate me for this ...
+	 */
+	mparam = SMB_TRANS2_MAX_PARAM;
+	mdata = req->rq_bufsize - mparam;
+
+	mdata = server->opt.max_xmit - mparam - 100;
+	if (mdata < 1024) {
+		mdata = 1024;
+		mparam = 20;
+	}
+
+#if 0
+	/* NT/win2k has ~4k max_xmit, so with this we request more than it wants
+	   to return as one SMB. Useful for testing the fragmented trans2
+	   handling. */
+	mdata = 8192;
+#endif
+
+	WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
+	WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
+	WSET(req->rq_header, smb_mprcnt, mparam);
+	WSET(req->rq_header, smb_mdrcnt, mdata);
+	WSET(req->rq_header, smb_msrcnt, 0);    /* max setup always 0 ? */
+	WSET(req->rq_header, smb_flags, 0);
+	DSET(req->rq_header, smb_timeout, 0);
+	WSET(req->rq_header, smb_pscnt, req->rq_lparm);
+	WSET(req->rq_header, smb_psoff, oparam - 4);
+	WSET(req->rq_header, smb_dscnt, req->rq_ldata);
+	WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
+	*(req->rq_header + smb_suwcnt) = 0x01;          /* setup count */
+	*(req->rq_header + smb_suwcnt + 1) = 0x00;      /* reserved */
+	WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
+
+	req->rq_iovlen = 2;
+	req->rq_iov[0].iov_base = (void *) req->rq_header;
+	req->rq_iov[0].iov_len = oparam;
+	req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
+	req->rq_iov[1].iov_len = req->rq_lparm;
+	req->rq_slen = oparam + req->rq_lparm;
+
+	if (req->rq_data) {
+		req->rq_iovlen += 2;
+		req->rq_iov[2].iov_base = padding;
+		req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
+		req->rq_iov[3].iov_base = req->rq_data;
+		req->rq_iov[3].iov_len = req->rq_ldata;
+		req->rq_slen = odata + req->rq_ldata;
+	}
+
+	/* always a data part for trans2 replies */
+	req->rq_setup_read = smb_setup_bcc;
+
+	return 0;
+}
+
+/*
+ * Add a request and tell smbiod to process it
+ */
+int smb_add_request(struct smb_request *req)
+{
+	long timeleft;
+	struct smb_sb_info *server = req->rq_server;
+	int result = 0;
+
+	smb_setup_request(req);
+	if (req->rq_trans2_command) {
+		if (req->rq_buffer == NULL) {
+			PARANOIA("trans2 attempted without response buffer!\n");
+			return -EIO;
+		}
+		result = smb_setup_trans2request(req);
+	}
+	if (result < 0)
+		return result;
+
+#ifdef SMB_DEBUG_PACKET_SIZE
+	add_xmit_stats(req);
+#endif
+
+	/* add 'req' to the queue of requests */
+	if (smb_lock_server_interruptible(server))
+		return -EINTR;
+
+	/*
+	 * Try to send the request as the process. If that fails we queue the
+	 * request and let smbiod send it later.
+	 */
+
+	/* FIXME: each server has a number on the maximum number of parallel
+	   requests. 10, 50 or so. We should not allow more requests to be
+	   active. */
+	if (server->mid > 0xf000)
+		server->mid = 0;
+	req->rq_mid = server->mid++;
+	WSET(req->rq_header, smb_mid, req->rq_mid);
+
+	result = 0;
+	if (server->state == CONN_VALID) {
+		if (list_empty(&server->xmitq))
+			result = smb_request_send_req(req);
+		if (result < 0) {
+			/* Connection lost? */
+			server->conn_error = result;
+			server->state = CONN_INVALID;
+		}
+	}
+	if (result != 1)
+		list_add_tail(&req->rq_queue, &server->xmitq);
+	smb_rget(req);
+
+	if (server->state != CONN_VALID)
+		smbiod_retry(server);
+
+	smb_unlock_server(server);
+
+	smbiod_wake_up();
+
+	timeleft = wait_event_interruptible_timeout(req->rq_wait,
+				    req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
+	if (!timeleft || signal_pending(current)) {
+		/*
+		 * On timeout or on interrupt we want to try and remove the
+		 * request from the recvq/xmitq.
+		 * First check if the request is still part of a queue. (May
+		 * have been removed by some error condition)
+		 */
+		smb_lock_server(server);
+		if (!list_empty(&req->rq_queue)) {
+			list_del_init(&req->rq_queue);
+			smb_rput(req);
+		}
+		smb_unlock_server(server);
+	}
+
+	if (!timeleft) {
+		PARANOIA("request [%p, mid=%d] timed out!\n",
+			 req, req->rq_mid);
+		VERBOSE("smb_com:  %02x\n", *(req->rq_header + smb_com));
+		VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
+		VERBOSE("smb_flg:  %02x\n", *(req->rq_header + smb_flg));
+		VERBOSE("smb_tid:  %04x\n", WVAL(req->rq_header, smb_tid));
+		VERBOSE("smb_pid:  %04x\n", WVAL(req->rq_header, smb_pid));
+		VERBOSE("smb_uid:  %04x\n", WVAL(req->rq_header, smb_uid));
+		VERBOSE("smb_mid:  %04x\n", WVAL(req->rq_header, smb_mid));
+		VERBOSE("smb_wct:  %02x\n", *(req->rq_header + smb_wct));
+
+		req->rq_rcls = ERRSRV;
+		req->rq_err  = ERRtimeout;
+
+		/* Just in case it was "stuck" */
+		smbiod_wake_up();
+	}
+	VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
+
+	if (req->rq_rcls != 0)
+		req->rq_errno = smb_errno(req);
+	if (signal_pending(current))
+		req->rq_errno = -ERESTARTSYS;
+	return req->rq_errno;
+}
+
+/*
+ * Send a request and place it on the recvq if successfully sent.
+ * Must be called with the server lock held.
+ */
+static int smb_request_send_req(struct smb_request *req)
+{
+	struct smb_sb_info *server = req->rq_server;
+	int result;
+
+	if (req->rq_bytes_sent == 0) {
+		WSET(req->rq_header, smb_tid, server->opt.tid);
+		WSET(req->rq_header, smb_pid, 1);
+		WSET(req->rq_header, smb_uid, server->opt.server_uid);
+	}
+
+	result = smb_send_request(req);
+	if (result < 0 && result != -EAGAIN)
+		goto out;
+
+	result = 0;
+	if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
+		goto out;
+
+	list_move_tail(&req->rq_queue, &server->recvq);
+	result = 1;
+out:
+	return result;
+}
+
+/*
+ * Sends one request for this server. (smbiod)
+ * Must be called with the server lock held.
+ * Returns: <0 on error
+ *           0 if no request could be completely sent
+ *           1 if all data for one request was sent
+ */
+int smb_request_send_server(struct smb_sb_info *server)
+{
+	struct list_head *head;
+	struct smb_request *req;
+	int result;
+
+	if (server->state != CONN_VALID)
+		return 0;
+
+	/* dequeue first request, if any */
+	req = NULL;
+	head = server->xmitq.next;
+	if (head != &server->xmitq) {
+		req = list_entry(head, struct smb_request, rq_queue);
+	}
+	if (!req)
+		return 0;
+
+	result = smb_request_send_req(req);
+	if (result < 0) {
+		server->conn_error = result;
+		list_move(&req->rq_queue, &server->xmitq);
+		result = -EIO;
+		goto out;
+	}
+
+out:
+	return result;
+}
+
+/*
+ * Try to find a request matching this "mid". Typically the first entry will
+ * be the matching one.
+ */
+static struct smb_request *find_request(struct smb_sb_info *server, int mid)
+{
+	struct list_head *tmp;
+	struct smb_request *req = NULL;
+
+	list_for_each(tmp, &server->recvq) {
+		req = list_entry(tmp, struct smb_request, rq_queue);
+		if (req->rq_mid == mid) {
+			break;
+		}
+		req = NULL;
+	}
+
+	if (!req) {
+		VERBOSE("received reply with mid %d but no request!\n",
+			WVAL(server->header, smb_mid));
+		server->rstate = SMB_RECV_DROP;
+	}
+
+	return req;
+}
+
+/*
+ * Called when we have read the smb header and believe this is a response.
+ */
+static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
+{
+	int hdrlen, wct;
+
+	memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
+
+	wct = *(req->rq_header + smb_wct);
+	if (wct > 20) {	
+		PARANOIA("wct too large, %d > 20\n", wct);
+		server->rstate = SMB_RECV_DROP;
+		return 0;
+	}
+
+	req->rq_resp_wct = wct;
+	hdrlen = SMB_HEADER_LEN + wct*2 + 2;
+	VERBOSE("header length: %d   smb_wct: %2d\n", hdrlen, wct);
+
+	req->rq_bytes_recvd = SMB_HEADER_LEN;
+	req->rq_rlen = hdrlen;
+	req->rq_iov[0].iov_base = req->rq_header;
+	req->rq_iov[0].iov_len  = hdrlen;
+	req->rq_iovlen = 1;
+	server->rstate = SMB_RECV_PARAM;
+
+#ifdef SMB_DEBUG_PACKET_SIZE
+	add_recv_stats(smb_len(server->header));
+#endif
+	return 0;
+}
+
+/*
+ * Reads the SMB parameters
+ */
+static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
+{
+	int result;
+
+	result = smb_receive(server, req);
+	if (result < 0)
+		return result;
+	if (req->rq_bytes_recvd < req->rq_rlen)
+		return 0;
+
+	VERBOSE("result: %d   smb_bcc:  %04x\n", result,
+		WVAL(req->rq_header, SMB_HEADER_LEN +
+		     (*(req->rq_header + smb_wct) * 2)));
+
+	result = 0;
+	req->rq_iov[0].iov_base = NULL;
+	req->rq_rlen = 0;
+	if (req->rq_callback)
+		req->rq_callback(req);
+	else if (req->rq_setup_read)
+		result = req->rq_setup_read(req);
+	if (result < 0) {
+		server->rstate = SMB_RECV_DROP;
+		return result;
+	}
+
+	server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
+
+	req->rq_bytes_recvd = 0;	// recvd out of the iov
+
+	VERBOSE("rlen: %d\n", req->rq_rlen);
+	if (req->rq_rlen < 0) {
+		PARANOIA("Parameters read beyond end of packet!\n");
+		server->rstate = SMB_RECV_END;
+		return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * Reads the SMB data
+ */
+static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
+{
+	int result;
+
+	result = smb_receive(server, req);
+	if (result < 0)
+		goto out;
+	if (req->rq_bytes_recvd < req->rq_rlen)
+		goto out;
+	server->rstate = SMB_RECV_END;
+out:
+	VERBOSE("result: %d\n", result);
+	return result;
+}
+
+/*
+ * Receive a transaction2 response
+ * Return: 0 if the response has been fully read
+ *         1 if there are further "fragments" to read
+ *        <0 if there is an error
+ */
+static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
+{
+	unsigned char *inbuf;
+	unsigned int parm_disp, parm_offset, parm_count, parm_tot;
+	unsigned int data_disp, data_offset, data_count, data_tot;
+	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
+
+	VERBOSE("handling trans2\n");
+
+	inbuf = req->rq_header;
+	data_tot    = WVAL(inbuf, smb_tdrcnt);
+	parm_tot    = WVAL(inbuf, smb_tprcnt);
+	parm_disp   = WVAL(inbuf, smb_prdisp);
+	parm_offset = WVAL(inbuf, smb_proff);
+	parm_count  = WVAL(inbuf, smb_prcnt);
+	data_disp   = WVAL(inbuf, smb_drdisp);
+	data_offset = WVAL(inbuf, smb_droff);
+	data_count  = WVAL(inbuf, smb_drcnt);
+
+	/* Modify offset for the split header/buffer we use */
+	if (data_count || data_offset) {
+		if (unlikely(data_offset < hdrlen))
+			goto out_bad_data;
+		else
+			data_offset -= hdrlen;
+	}
+	if (parm_count || parm_offset) {
+		if (unlikely(parm_offset < hdrlen))
+			goto out_bad_parm;
+		else
+			parm_offset -= hdrlen;
+	}
+
+	if (parm_count == parm_tot && data_count == data_tot) {
+		/*
+		 * This packet has all the trans2 data.
+		 *
+		 * We setup the request so that this will be the common
+		 * case. It may be a server error to not return a
+		 * response that fits.
+		 */
+		VERBOSE("single trans2 response  "
+			"dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
+			data_count, parm_count,
+			data_offset, parm_offset);
+		req->rq_ldata = data_count;
+		req->rq_lparm = parm_count;
+		req->rq_data = req->rq_buffer + data_offset;
+		req->rq_parm = req->rq_buffer + parm_offset;
+		if (unlikely(parm_offset + parm_count > req->rq_rlen))
+			goto out_bad_parm;
+		if (unlikely(data_offset + data_count > req->rq_rlen))
+			goto out_bad_data;
+		return 0;
+	}
+
+	VERBOSE("multi trans2 response  "
+		"frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
+		req->rq_fragment,
+		data_count, parm_count,
+		data_offset, parm_offset);
+
+	if (!req->rq_fragment) {
+		int buf_len;
+
+		/* We got the first trans2 fragment */
+		req->rq_fragment = 1;
+		req->rq_total_data = data_tot;
+		req->rq_total_parm = parm_tot;
+		req->rq_ldata = 0;
+		req->rq_lparm = 0;
+
+		buf_len = data_tot + parm_tot;
+		if (buf_len > SMB_MAX_PACKET_SIZE)
+			goto out_too_long;
+
+		req->rq_trans2bufsize = buf_len;
+		req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
+		if (!req->rq_trans2buffer)
+			goto out_no_mem;
+
+		req->rq_parm = req->rq_trans2buffer;
+		req->rq_data = req->rq_trans2buffer + parm_tot;
+	} else if (unlikely(req->rq_total_data < data_tot ||
+			    req->rq_total_parm < parm_tot))
+		goto out_data_grew;
+
+	if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
+		     parm_offset + parm_count > req->rq_rlen))
+		goto out_bad_parm;
+	if (unlikely(data_disp + data_count > req->rq_total_data ||
+		     data_offset + data_count > req->rq_rlen))
+		goto out_bad_data;
+
+	inbuf = req->rq_buffer;
+	memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
+	memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
+
+	req->rq_ldata += data_count;
+	req->rq_lparm += parm_count;
+
+	/*
+	 * Check whether we've received all of the data. Note that
+	 * we use the packet totals -- total lengths might shrink!
+	 */
+	if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
+		req->rq_ldata = data_tot;
+		req->rq_lparm = parm_tot;
+		return 0;
+	}
+	return 1;
+
+out_too_long:
+	printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
+		data_tot, parm_tot);
+	goto out_EIO;
+out_no_mem:
+	printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
+	       req->rq_trans2bufsize);
+	req->rq_errno = -ENOMEM;
+	goto out;
+out_data_grew:
+	printk(KERN_ERR "smb_trans2: data/params grew!\n");
+	goto out_EIO;
+out_bad_parm:
+	printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
+	       parm_disp, parm_count, parm_tot, parm_offset);
+	goto out_EIO;
+out_bad_data:
+	printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
+	       data_disp, data_count, data_tot, data_offset);
+out_EIO:
+	req->rq_errno = -EIO;
+out:
+	return req->rq_errno;
+}
+
+/*
+ * State machine for receiving responses. We handle the fact that we can't
+ * read the full response in one try by having states telling us how much we
+ * have read.
+ *
+ * Must be called with the server lock held (only called from smbiod).
+ *
+ * Return: <0 on error
+ */
+int smb_request_recv(struct smb_sb_info *server)
+{
+	struct smb_request *req = NULL;
+	int result = 0;
+
+	if (smb_recv_available(server) <= 0)
+		return 0;
+
+	VERBOSE("state: %d\n", server->rstate);
+	switch (server->rstate) {
+	case SMB_RECV_DROP:
+		result = smb_receive_drop(server);
+		if (result < 0)
+			break;
+		if (server->rstate == SMB_RECV_DROP)
+			break;
+		server->rstate = SMB_RECV_START;
+		/* fallthrough */
+	case SMB_RECV_START:
+		server->smb_read = 0;
+		server->rstate = SMB_RECV_HEADER;
+		/* fallthrough */
+	case SMB_RECV_HEADER:
+		result = smb_receive_header(server);
+		if (result < 0)
+			break;
+		if (server->rstate == SMB_RECV_HEADER)
+			break;
+		if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
+			server->rstate = SMB_RECV_REQUEST;
+			break;
+		}
+		if (server->rstate != SMB_RECV_HCOMPLETE)
+			break;
+		/* fallthrough */
+	case SMB_RECV_HCOMPLETE:
+		req = find_request(server, WVAL(server->header, smb_mid));
+		if (!req)
+			break;
+		smb_init_request(server, req);
+		req->rq_rcls = *(req->rq_header + smb_rcls);
+		req->rq_err  = WVAL(req->rq_header, smb_err);
+		if (server->rstate != SMB_RECV_PARAM)
+			break;
+		/* fallthrough */
+	case SMB_RECV_PARAM:
+		if (!req)
+			req = find_request(server,WVAL(server->header,smb_mid));
+		if (!req)
+			break;
+		result = smb_recv_param(server, req);
+		if (result < 0)
+			break;
+		if (server->rstate != SMB_RECV_DATA)
+			break;
+		/* fallthrough */
+	case SMB_RECV_DATA:
+		if (!req)
+			req = find_request(server,WVAL(server->header,smb_mid));
+		if (!req)
+			break;
+		result = smb_recv_data(server, req);
+		if (result < 0)
+			break;
+		break;
+
+		/* We should never be called with any of these states */
+	case SMB_RECV_END:
+	case SMB_RECV_REQUEST:
+		BUG();
+	}
+
+	if (result < 0) {
+		/* We saw an error */
+		return result;
+	}
+
+	if (server->rstate != SMB_RECV_END)
+		return 0;
+
+	result = 0;
+	if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
+		result = smb_recv_trans2(server, req);
+
+	/*
+	 * Response completely read. Drop any extra bytes sent by the server.
+	 * (Yes, servers sometimes add extra bytes to responses)
+	 */
+	VERBOSE("smb_len: %d   smb_read: %d\n",
+		server->smb_len, server->smb_read);
+	if (server->smb_read < server->smb_len)
+		smb_receive_drop(server);
+
+	server->rstate = SMB_RECV_START;
+
+	if (!result) {
+		list_del_init(&req->rq_queue);
+		req->rq_flags |= SMB_REQ_RECEIVED;
+		smb_rput(req);
+		wake_up_interruptible(&req->rq_wait);
+	}
+	return 0;
+}
diff --git a/drivers/staging/smbfs/request.h b/drivers/staging/smbfs/request.h
new file mode 100644
index 000000000000..efb21451e7c9
--- /dev/null
+++ b/drivers/staging/smbfs/request.h
@@ -0,0 +1,70 @@
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/wait.h>
+
+struct smb_request {
+	struct list_head rq_queue;	/* recvq or xmitq for the server */
+
+	atomic_t rq_count;
+
+	wait_queue_head_t rq_wait;
+	int rq_flags;
+	int rq_mid;	/* multiplex ID, set by request.c */
+
+	struct smb_sb_info *rq_server;
+
+	/* header + word count + parameter words + byte count */
+	unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
+
+	int rq_bufsize;
+	unsigned char *rq_buffer;
+
+	/* FIXME: this is not good enough for merging IO requests. */
+	unsigned char *rq_page;
+	int rq_rsize;
+
+	int rq_resp_wct;
+	int rq_resp_bcc;
+
+	int rq_rlen;
+	int rq_bytes_recvd;
+
+	int rq_slen;
+	int rq_bytes_sent;
+
+	int rq_iovlen;
+	struct kvec rq_iov[4];
+
+	int (*rq_setup_read) (struct smb_request *);
+	void (*rq_callback) (struct smb_request *);
+
+	/* ------ trans2 stuff ------ */
+
+	u16 rq_trans2_command;	/* 0 if not a trans2 request */
+	unsigned int rq_ldata;
+	unsigned char *rq_data;
+	unsigned int rq_lparm;
+	unsigned char *rq_parm;
+
+	int rq_fragment;
+	u32 rq_total_data;
+	u32 rq_total_parm;
+	int rq_trans2bufsize;
+	unsigned char *rq_trans2buffer;
+
+	/* ------ response ------ */
+
+	unsigned short rq_rcls;
+	unsigned short rq_err;
+	int rq_errno;
+};
+
+#define SMB_REQ_STATIC		0x0001	/* rq_buffer is static */
+#define SMB_REQ_NORETRY		0x0002	/* request is invalid after retry */
+
+#define SMB_REQ_TRANSMITTED	0x4000	/* all data has been sent */
+#define SMB_REQ_RECEIVED	0x8000	/* reply received, smbiod is done */
+
+#define xSMB_REQ_NOREPLY	0x0004	/* we don't want the reply (if any) */
+#define xSMB_REQ_NORECEIVER	0x0008	/* caller doesn't wait for response */
diff --git a/drivers/staging/smbfs/smb.h b/drivers/staging/smbfs/smb.h
new file mode 100644
index 000000000000..82fefddc5987
--- /dev/null
+++ b/drivers/staging/smbfs/smb.h
@@ -0,0 +1,118 @@
+/*
+ *  smb.h
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ */
+
+#ifndef _LINUX_SMB_H
+#define _LINUX_SMB_H
+
+#include <linux/types.h>
+#include <linux/magic.h>
+#ifdef __KERNEL__
+#include <linux/time.h>
+#endif
+
+enum smb_protocol { 
+	SMB_PROTOCOL_NONE, 
+	SMB_PROTOCOL_CORE, 
+	SMB_PROTOCOL_COREPLUS, 
+	SMB_PROTOCOL_LANMAN1, 
+	SMB_PROTOCOL_LANMAN2, 
+	SMB_PROTOCOL_NT1 
+};
+
+enum smb_case_hndl {
+	SMB_CASE_DEFAULT,
+	SMB_CASE_LOWER,
+	SMB_CASE_UPPER
+};
+
+struct smb_dskattr {
+        __u16 total;
+        __u16 allocblocks;
+        __u16 blocksize;
+        __u16 free;
+};
+
+struct smb_conn_opt {
+
+        /* The socket */
+	unsigned int fd;
+
+	enum smb_protocol protocol;
+	enum smb_case_hndl case_handling;
+
+	/* Connection-Options */
+
+	__u32              max_xmit;
+	__u16              server_uid;
+	__u16              tid;
+
+        /* The following are LANMAN 1.0 options */
+        __u16              secmode;
+        __u16              maxmux;
+        __u16              maxvcs;
+        __u16              rawmode;
+        __u32              sesskey;
+
+	/* The following are NT LM 0.12 options */
+	__u32              maxraw;
+	__u32              capabilities;
+	__s16              serverzone;
+};
+
+#ifdef __KERNEL__
+
+#define SMB_NLS_MAXNAMELEN 20
+struct smb_nls_codepage {
+	char local_name[SMB_NLS_MAXNAMELEN];
+	char remote_name[SMB_NLS_MAXNAMELEN];
+};
+
+
+#define SMB_MAXNAMELEN 255
+#define SMB_MAXPATHLEN 1024
+
+/*
+ * Contains all relevant data on a SMB networked file.
+ */
+struct smb_fattr {
+	__u16 attr;
+
+	unsigned long	f_ino;
+	umode_t		f_mode;
+	nlink_t		f_nlink;
+	uid_t		f_uid;
+	gid_t		f_gid;
+	dev_t		f_rdev;
+	loff_t		f_size;
+	struct timespec	f_atime;
+	struct timespec f_mtime;
+	struct timespec f_ctime;
+	unsigned long	f_blocks;
+	int		f_unix;
+};
+
+enum smb_conn_state {
+	CONN_VALID,		/* everything's fine */
+	CONN_INVALID,		/* Something went wrong, but did not
+				   try to reconnect yet. */
+	CONN_RETRIED,		/* Tried a reconnection, but was refused */
+	CONN_RETRYING		/* Currently trying to reconnect */
+};
+
+#define SMB_HEADER_LEN   37     /* includes everything up to, but not
+                                 * including smb_bcc */
+
+#define SMB_INITIAL_PACKET_SIZE		4000
+#define SMB_MAX_PACKET_SIZE		32768
+
+/* reserve this much space for trans2 parameters. Shouldn't have to be more
+   than 10 or so, but OS/2 seems happier like this. */
+#define SMB_TRANS2_MAX_PARAM 64
+
+#endif
+#endif
diff --git a/drivers/staging/smbfs/smb_debug.h b/drivers/staging/smbfs/smb_debug.h
new file mode 100644
index 000000000000..fc4b1a5dd755
--- /dev/null
+++ b/drivers/staging/smbfs/smb_debug.h
@@ -0,0 +1,34 @@
+/*
+ * Defines some debug macros for smbfs.
+ */
+
+/* This makes a dentry parent/child name pair. Useful for debugging printk's */
+#define DENTRY_PATH(dentry) \
+	(dentry)->d_parent->d_name.name,(dentry)->d_name.name
+
+/*
+ * safety checks that should never happen ???
+ * these are normally enabled.
+ */
+#ifdef SMBFS_PARANOIA
+# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
+#else
+# define PARANOIA(f, a...) do { ; } while(0)
+#endif
+
+/* lots of debug messages */
+#ifdef SMBFS_DEBUG_VERBOSE
+# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
+#else
+# define VERBOSE(f, a...) do { ; } while(0)
+#endif
+
+/*
+ * "normal" debug messages, but not with a normal DEBUG define ... way
+ * too common name.
+ */
+#ifdef SMBFS_DEBUG
+#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
+#else
+#define DEBUG1(f, a...) do { ; } while(0)
+#endif
diff --git a/drivers/staging/smbfs/smb_fs.h b/drivers/staging/smbfs/smb_fs.h
new file mode 100644
index 000000000000..20a05c188eb9
--- /dev/null
+++ b/drivers/staging/smbfs/smb_fs.h
@@ -0,0 +1,153 @@
+/*
+ *  smb_fs.h
+ *
+ *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ */
+
+#ifndef _LINUX_SMB_FS_H
+#define _LINUX_SMB_FS_H
+
+#include "smb.h"
+
+/*
+ * ioctl commands
+ */
+#define	SMB_IOC_GETMOUNTUID		_IOR('u', 1, __kernel_old_uid_t)
+#define SMB_IOC_NEWCONN                 _IOW('u', 2, struct smb_conn_opt)
+
+/* __kernel_uid_t can never change, so we have to use __kernel_uid32_t */
+#define	SMB_IOC_GETMOUNTUID32		_IOR('u', 3, __kernel_uid32_t)
+
+
+#ifdef __KERNEL__
+#include "smb_fs_i.h"
+#include "smb_fs_sb.h"
+#include "smb_mount.h"
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/jiffies.h>
+#include <asm/unaligned.h>
+
+static inline struct smb_sb_info *SMB_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct smb_inode_info *SMB_I(struct inode *inode)
+{
+	return container_of(inode, struct smb_inode_info, vfs_inode);
+}
+
+/* macro names are short for word, double-word, long value (?) */
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
+
+/* where to find the base of the SMB packet proper */
+#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
+
+/*
+ * Flags for the in-memory inode
+ */
+#define SMB_F_LOCALWRITE	0x02	/* file modified locally */
+
+
+/* NT1 protocol capability bits */
+#define SMB_CAP_RAW_MODE         0x00000001
+#define SMB_CAP_MPX_MODE         0x00000002
+#define SMB_CAP_UNICODE          0x00000004
+#define SMB_CAP_LARGE_FILES      0x00000008
+#define SMB_CAP_NT_SMBS          0x00000010
+#define SMB_CAP_RPC_REMOTE_APIS  0x00000020
+#define SMB_CAP_STATUS32         0x00000040
+#define SMB_CAP_LEVEL_II_OPLOCKS 0x00000080
+#define SMB_CAP_LOCK_AND_READ    0x00000100
+#define SMB_CAP_NT_FIND          0x00000200
+#define SMB_CAP_DFS              0x00001000
+#define SMB_CAP_LARGE_READX      0x00004000
+#define SMB_CAP_LARGE_WRITEX     0x00008000
+#define SMB_CAP_UNIX             0x00800000	/* unofficial ... */
+
+
+/*
+ * This is the time we allow an inode, dentry or dir cache to live. It is bad
+ * for performance to have shorter ttl on an inode than on the cache. It can
+ * cause refresh on each inode for a dir listing ... one-by-one
+ */
+#define SMB_MAX_AGE(server) (((server)->mnt->ttl * HZ) / 1000)
+
+static inline void
+smb_age_dentry(struct smb_sb_info *server, struct dentry *dentry)
+{
+	dentry->d_time = jiffies - SMB_MAX_AGE(server);
+}
+
+struct smb_cache_head {
+	time_t		mtime;	/* unused */
+	unsigned long	time;	/* cache age */
+	unsigned long	end;	/* last valid fpos in cache */
+	int		eof;
+};
+
+#define SMB_DIRCACHE_SIZE	((int)(PAGE_CACHE_SIZE/sizeof(struct dentry *)))
+union smb_dir_cache {
+	struct smb_cache_head   head;
+	struct dentry           *dentry[SMB_DIRCACHE_SIZE];
+};
+
+#define SMB_FIRSTCACHE_SIZE	((int)((SMB_DIRCACHE_SIZE * \
+	sizeof(struct dentry *) - sizeof(struct smb_cache_head)) / \
+	sizeof(struct dentry *)))
+
+#define SMB_DIRCACHE_START      (SMB_DIRCACHE_SIZE - SMB_FIRSTCACHE_SIZE)
+
+struct smb_cache_control {
+	struct  smb_cache_head		head;
+	struct  page			*page;
+	union   smb_dir_cache		*cache;
+	unsigned long			fpos, ofs;
+	int				filled, valid, idx;
+};
+
+#define SMB_OPS_NUM_STATIC	5
+struct smb_ops {
+	int (*read)(struct inode *inode, loff_t offset, int count,
+		    char *data);
+	int (*write)(struct inode *inode, loff_t offset, int count, const
+		     char *data);
+	int (*readdir)(struct file *filp, void *dirent, filldir_t filldir,
+		       struct smb_cache_control *ctl);
+
+	int (*getattr)(struct smb_sb_info *server, struct dentry *dir,
+		       struct smb_fattr *fattr);
+	/* int (*setattr)(...); */      /* setattr is really icky! */
+
+	int (*truncate)(struct inode *inode, loff_t length);
+
+
+	/* --- --- --- end of "static" entries --- --- --- */
+
+	int (*convert)(unsigned char *output, int olen,
+		       const unsigned char *input, int ilen,
+		       struct nls_table *nls_from,
+		       struct nls_table *nls_to);
+};
+
+static inline int
+smb_is_open(struct inode *i)
+{
+	return (SMB_I(i)->open == server_from_inode(i)->generation);
+}
+
+extern void smb_install_null_ops(struct smb_ops *);
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SMB_FS_H */
diff --git a/drivers/staging/smbfs/smb_fs_i.h b/drivers/staging/smbfs/smb_fs_i.h
new file mode 100644
index 000000000000..8ccf4eca2c3d
--- /dev/null
+++ b/drivers/staging/smbfs/smb_fs_i.h
@@ -0,0 +1,37 @@
+/*
+ *  smb_fs_i.h
+ *
+ *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ */
+
+#ifndef _LINUX_SMB_FS_I
+#define _LINUX_SMB_FS_I
+
+#include <linux/types.h>
+#include <linux/fs.h>
+
+/*
+ * smb fs inode data (in memory only)
+ */
+struct smb_inode_info {
+
+	/*
+	 * file handles are local to a connection. A file is open if
+	 * (open == generation).
+	 */
+        unsigned int open;	/* open generation */
+	__u16 fileid;		/* What id to handle a file with? */
+	__u16 attr;		/* Attribute fields, DOS value */
+
+	__u16 access;		/* Access mode */
+	__u16 flags;
+	unsigned long oldmtime;	/* last time refreshed */
+	unsigned long closed;	/* timestamp when closed */
+	unsigned openers;	/* number of fileid users */
+
+	struct inode vfs_inode;	/* must be at the end */
+};
+
+#endif
diff --git a/drivers/staging/smbfs/smb_fs_sb.h b/drivers/staging/smbfs/smb_fs_sb.h
new file mode 100644
index 000000000000..ca058afda900
--- /dev/null
+++ b/drivers/staging/smbfs/smb_fs_sb.h
@@ -0,0 +1,100 @@
+/*
+ *  smb_fs_sb.h
+ *
+ *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ */
+
+#ifndef _SMB_FS_SB
+#define _SMB_FS_SB
+
+#include <linux/types.h>
+#include <linux/backing-dev.h>
+#include "smb.h"
+
+/*
+ * Upper limit on the total number of active smb_request structs.
+ */
+#define MAX_REQUEST_HARD       256
+
+enum smb_receive_state {
+	SMB_RECV_START,		/* No data read, looking for length + sig */
+	SMB_RECV_HEADER,	/* Reading the header data */
+	SMB_RECV_HCOMPLETE,	/* Done with the header */
+	SMB_RECV_PARAM,		/* Reading parameter words */
+	SMB_RECV_DATA,		/* Reading data bytes */
+	SMB_RECV_END,		/* End of request */
+	SMB_RECV_DROP,		/* Dropping this SMB */
+	SMB_RECV_REQUEST,	/* Received a request and not a reply */
+};
+
+/* structure access macros */
+#define server_from_inode(inode) SMB_SB((inode)->i_sb)
+#define server_from_dentry(dentry) SMB_SB((dentry)->d_sb)
+#define SB_of(server) ((server)->super_block)
+
+struct smb_sb_info {
+	/* List of all smbfs superblocks */
+	struct list_head entry;
+
+        enum smb_conn_state state;
+	struct file * sock_file;
+	int conn_error;
+	enum smb_receive_state rstate;
+
+	atomic_t nr_requests;
+	struct list_head xmitq;
+	struct list_head recvq;
+	u16 mid;
+
+        struct smb_mount_data_kernel *mnt;
+
+	/* Connections are counted. Each time a new socket arrives,
+	 * generation is incremented.
+	 */
+	unsigned int generation;
+	struct pid *conn_pid;
+	struct smb_conn_opt opt;
+	wait_queue_head_t conn_wq;
+	int conn_complete;
+	struct semaphore sem;
+
+	unsigned char      header[SMB_HEADER_LEN + 20*2 + 2];
+	u32                header_len;
+	u32                smb_len;
+	u32                smb_read;
+
+        /* We use our own data_ready callback, but need the original one */
+        void *data_ready;
+
+	/* nls pointers for codepage conversions */
+	struct nls_table *remote_nls;
+	struct nls_table *local_nls;
+
+	struct smb_ops *ops;
+
+	struct super_block *super_block;
+
+	struct backing_dev_info bdi;
+};
+
+static inline int
+smb_lock_server_interruptible(struct smb_sb_info *server)
+{
+	return down_interruptible(&(server->sem));
+}
+
+static inline void
+smb_lock_server(struct smb_sb_info *server)
+{
+	down(&(server->sem));
+}
+
+static inline void
+smb_unlock_server(struct smb_sb_info *server)
+{
+	up(&(server->sem));
+}
+
+#endif
diff --git a/drivers/staging/smbfs/smb_mount.h b/drivers/staging/smbfs/smb_mount.h
new file mode 100644
index 000000000000..d10f00cb5703
--- /dev/null
+++ b/drivers/staging/smbfs/smb_mount.h
@@ -0,0 +1,65 @@
+/*
+ *  smb_mount.h
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ */
+
+#ifndef _LINUX_SMB_MOUNT_H
+#define _LINUX_SMB_MOUNT_H
+
+#include <linux/types.h>
+
+#define SMB_MOUNT_VERSION	6
+
+struct smb_mount_data {
+	int version;
+	__kernel_uid_t mounted_uid; /* Who may umount() this filesystem? */
+	__kernel_uid_t uid;
+	__kernel_gid_t gid;
+	__kernel_mode_t file_mode;
+	__kernel_mode_t dir_mode;
+};
+
+
+#ifdef __KERNEL__
+
+/* "vers" in big-endian */
+#define SMB_MOUNT_ASCII 0x76657273
+
+#define SMB_MOUNT_OLDVERSION	6
+#undef SMB_MOUNT_VERSION
+#define SMB_MOUNT_VERSION	7
+
+/* flags */
+#define SMB_MOUNT_WIN95		0x0001	/* Win 95 server */
+#define SMB_MOUNT_OLDATTR	0x0002	/* Use core getattr (Win 95 speedup) */
+#define SMB_MOUNT_DIRATTR	0x0004	/* Use find_first for getattr */
+#define SMB_MOUNT_CASE		0x0008	/* Be case sensitive */
+#define SMB_MOUNT_UNICODE	0x0010	/* Server talks unicode */
+#define SMB_MOUNT_UID		0x0020  /* Use user specified uid */
+#define SMB_MOUNT_GID		0x0040  /* Use user specified gid */
+#define SMB_MOUNT_FMODE		0x0080  /* Use user specified file mode */
+#define SMB_MOUNT_DMODE		0x0100  /* Use user specified dir mode */
+
+struct smb_mount_data_kernel {
+	int version;
+
+	uid_t mounted_uid;	/* Who may umount() this filesystem? */
+	uid_t uid;
+	gid_t gid;
+	mode_t file_mode;
+	mode_t dir_mode;
+
+	u32 flags;
+
+        /* maximum age in jiffies (inode, dentry and dircache) */
+	int ttl;
+
+	struct smb_nls_codepage codepage;
+};
+
+#endif
+
+#endif
diff --git a/drivers/staging/smbfs/smbfs.txt b/drivers/staging/smbfs/smbfs.txt
new file mode 100644
index 000000000000..194fb0decd2c
--- /dev/null
+++ b/drivers/staging/smbfs/smbfs.txt
@@ -0,0 +1,8 @@
+Smbfs is a filesystem that implements the SMB protocol, which is the
+protocol used by Windows for Workgroups, Windows 95 and Windows NT.
+Smbfs was inspired by Samba, the program written by Andrew Tridgell
+that turns any Unix host into a file server for DOS or Windows clients.
+
+Smbfs is a SMB client, but uses parts of samba for its operation. For
+more info on samba, including documentation, please go to
+http://www.samba.org/ and then on to your nearest mirror.
diff --git a/drivers/staging/smbfs/smbiod.c b/drivers/staging/smbfs/smbiod.c
new file mode 100644
index 000000000000..ec998920f8d9
--- /dev/null
+++ b/drivers/staging/smbfs/smbiod.c
@@ -0,0 +1,343 @@
+/*
+ *  smbiod.c
+ *
+ *  Copyright (C) 2000, Charles Loep / Corel Corp.
+ *  Copyright (C) 2001, Urban Widmark
+ */
+
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/kthread.h>
+#include <net/ip.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "smb_fs.h"
+#include "smbno.h"
+#include "smb_mount.h"
+#include "smb_debug.h"
+#include "request.h"
+#include "proto.h"
+
+enum smbiod_state {
+	SMBIOD_DEAD,
+	SMBIOD_STARTING,
+	SMBIOD_RUNNING,
+};
+
+static enum smbiod_state smbiod_state = SMBIOD_DEAD;
+static struct task_struct *smbiod_thread;
+static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
+static LIST_HEAD(smb_servers);
+static DEFINE_SPINLOCK(servers_lock);
+
+#define SMBIOD_DATA_READY	(1<<0)
+static unsigned long smbiod_flags;
+
+static int smbiod(void *);
+static int smbiod_start(void);
+
+/*
+ * called when there's work for us to do
+ */
+void smbiod_wake_up(void)
+{
+	if (smbiod_state == SMBIOD_DEAD)
+		return;
+	set_bit(SMBIOD_DATA_READY, &smbiod_flags);
+	wake_up_interruptible(&smbiod_wait);
+}
+
+/*
+ * start smbiod if none is running
+ */
+static int smbiod_start(void)
+{
+	struct task_struct *tsk;
+	int err = 0;
+
+	if (smbiod_state != SMBIOD_DEAD)
+		return 0;
+	smbiod_state = SMBIOD_STARTING;
+	__module_get(THIS_MODULE);
+	spin_unlock(&servers_lock);
+	tsk = kthread_run(smbiod, NULL, "smbiod");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		module_put(THIS_MODULE);
+	}
+
+	spin_lock(&servers_lock);
+	if (err < 0) {
+		smbiod_state = SMBIOD_DEAD;
+		smbiod_thread = NULL;
+	} else {
+		smbiod_state = SMBIOD_RUNNING;
+		smbiod_thread = tsk;
+	}
+	return err;
+}
+
+/*
+ * register a server & start smbiod if necessary
+ */
+int smbiod_register_server(struct smb_sb_info *server)
+{
+	int ret;
+	spin_lock(&servers_lock);
+	list_add(&server->entry, &smb_servers);
+	VERBOSE("%p\n", server);
+	ret = smbiod_start();
+	spin_unlock(&servers_lock);
+	return ret;
+}
+
+/*
+ * Unregister a server
+ * Must be called with the server lock held.
+ */
+void smbiod_unregister_server(struct smb_sb_info *server)
+{
+	spin_lock(&servers_lock);
+	list_del_init(&server->entry);
+	VERBOSE("%p\n", server);
+	spin_unlock(&servers_lock);
+
+	smbiod_wake_up();
+	smbiod_flush(server);
+}
+
+void smbiod_flush(struct smb_sb_info *server)
+{
+	struct list_head *tmp, *n;
+	struct smb_request *req;
+
+	list_for_each_safe(tmp, n, &server->xmitq) {
+		req = list_entry(tmp, struct smb_request, rq_queue);
+		req->rq_errno = -EIO;
+		list_del_init(&req->rq_queue);
+		smb_rput(req);
+		wake_up_interruptible(&req->rq_wait);
+	}
+	list_for_each_safe(tmp, n, &server->recvq) {
+		req = list_entry(tmp, struct smb_request, rq_queue);
+		req->rq_errno = -EIO;
+		list_del_init(&req->rq_queue);
+		smb_rput(req);
+		wake_up_interruptible(&req->rq_wait);
+	}
+}
+
+/*
+ * Wake up smbmount and make it reconnect to the server.
+ * This must be called with the server locked.
+ *
+ * FIXME: add smbconnect version to this
+ */
+int smbiod_retry(struct smb_sb_info *server)
+{
+	struct list_head *head;
+	struct smb_request *req;
+	struct pid *pid = get_pid(server->conn_pid);
+	int result = 0;
+
+	VERBOSE("state: %d\n", server->state);
+	if (server->state == CONN_VALID || server->state == CONN_RETRYING)
+		goto out;
+
+	smb_invalidate_inodes(server);
+
+	/*
+	 * Some requests are meaningless after a retry, so we abort them.
+	 * One example are all requests using 'fileid' since the files are
+	 * closed on retry.
+	 */
+	head = server->xmitq.next;
+	while (head != &server->xmitq) {
+		req = list_entry(head, struct smb_request, rq_queue);
+		head = head->next;
+
+		req->rq_bytes_sent = 0;
+		if (req->rq_flags & SMB_REQ_NORETRY) {
+			VERBOSE("aborting request %p on xmitq\n", req);
+			req->rq_errno = -EIO;
+			list_del_init(&req->rq_queue);
+			smb_rput(req);
+			wake_up_interruptible(&req->rq_wait);
+		}
+	}
+
+	/*
+	 * FIXME: test the code for retrying request we already sent
+	 */
+	head = server->recvq.next;
+	while (head != &server->recvq) {
+		req = list_entry(head, struct smb_request, rq_queue);
+		head = head->next;
+#if 0
+		if (req->rq_flags & SMB_REQ_RETRY) {
+			/* must move the request to the xmitq */
+			VERBOSE("retrying request %p on recvq\n", req);
+			list_move(&req->rq_queue, &server->xmitq);
+			continue;
+		}
+#endif
+
+		VERBOSE("aborting request %p on recvq\n", req);
+		/* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
+		req->rq_errno = -EIO;
+		list_del_init(&req->rq_queue);
+		smb_rput(req);
+		wake_up_interruptible(&req->rq_wait);
+	}
+
+	smb_close_socket(server);
+
+	if (!pid) {
+		/* FIXME: this is fatal, umount? */
+		printk(KERN_ERR "smb_retry: no connection process\n");
+		server->state = CONN_RETRIED;
+		goto out;
+	}
+
+	/*
+	 * Change state so that only one retry per server will be started.
+	 */
+	server->state = CONN_RETRYING;
+
+	/*
+	 * Note: use the "priv" flag, as a user process may need to reconnect.
+	 */
+	result = kill_pid(pid, SIGUSR1, 1);
+	if (result) {
+		/* FIXME: this is most likely fatal, umount? */
+		printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
+		goto out;
+	}
+	VERBOSE("signalled pid %d\n", pid_nr(pid));
+
+	/* FIXME: The retried requests should perhaps get a "time boost". */
+
+out:
+	put_pid(pid);
+	return result;
+}
+
+/*
+ * Currently handles lockingX packets.
+ */
+static void smbiod_handle_request(struct smb_sb_info *server)
+{
+	PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
+	server->rstate = SMB_RECV_DROP;
+}
+
+/*
+ * Do some IO for one server.
+ */
+static void smbiod_doio(struct smb_sb_info *server)
+{
+	int result;
+	int maxwork = 7;
+
+	if (server->state != CONN_VALID)
+		goto out;
+
+	do {
+		result = smb_request_recv(server);
+		if (result < 0) {
+			server->state = CONN_INVALID;
+			smbiod_retry(server);
+			goto out;	/* reconnecting is slow */
+		} else if (server->rstate == SMB_RECV_REQUEST)
+			smbiod_handle_request(server);
+	} while (result > 0 && maxwork-- > 0);
+
+	/*
+	 * If there is more to read then we want to be sure to wake up again.
+	 */
+	if (server->state != CONN_VALID)
+		goto out;
+	if (smb_recv_available(server) > 0)
+		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
+
+	do {
+		result = smb_request_send_server(server);
+		if (result < 0) {
+			server->state = CONN_INVALID;
+			smbiod_retry(server);
+			goto out;	/* reconnecting is slow */
+		}
+	} while (result > 0);
+
+	/*
+	 * If the last request was not sent out we want to wake up again.
+	 */
+	if (!list_empty(&server->xmitq))
+		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
+
+out:
+	return;
+}
+
+/*
+ * smbiod kernel thread
+ */
+static int smbiod(void *unused)
+{
+	VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
+
+	for (;;) {
+		struct smb_sb_info *server;
+		struct list_head *pos, *n;
+
+		/* FIXME: Use poll? */
+		wait_event_interruptible(smbiod_wait,
+			 test_bit(SMBIOD_DATA_READY, &smbiod_flags));
+		if (signal_pending(current)) {
+			spin_lock(&servers_lock);
+			smbiod_state = SMBIOD_DEAD;
+			spin_unlock(&servers_lock);
+			break;
+		}
+
+		clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
+
+		spin_lock(&servers_lock);
+		if (list_empty(&smb_servers)) {
+			smbiod_state = SMBIOD_DEAD;
+			spin_unlock(&servers_lock);
+			break;
+		}
+
+		list_for_each_safe(pos, n, &smb_servers) {
+			server = list_entry(pos, struct smb_sb_info, entry);
+			VERBOSE("checking server %p\n", server);
+
+			if (server->state == CONN_VALID) {
+				spin_unlock(&servers_lock);
+
+				smb_lock_server(server);
+				smbiod_doio(server);
+				smb_unlock_server(server);
+
+				spin_lock(&servers_lock);
+			}
+		}
+		spin_unlock(&servers_lock);
+	}
+
+	VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
+	module_put_and_exit(0);
+}
diff --git a/drivers/staging/smbfs/smbno.h b/drivers/staging/smbfs/smbno.h
new file mode 100644
index 000000000000..f99e02d9ffe2
--- /dev/null
+++ b/drivers/staging/smbfs/smbno.h
@@ -0,0 +1,363 @@
+#ifndef _SMBNO_H_
+#define _SMBNO_H_
+
+/* these define the attribute byte as seen by DOS */
+#define aRONLY	(1L<<0)
+#define aHIDDEN	(1L<<1)
+#define aSYSTEM	(1L<<2)
+#define aVOLID	(1L<<3)
+#define aDIR	(1L<<4)
+#define aARCH	(1L<<5)
+
+/* error classes */
+#define SUCCESS 0  /* The request was successful. */
+#define ERRDOS 0x01 /*  Error is from the core DOS operating system set. */
+#define ERRSRV 0x02 /* Error is generated by the server network file manager.*/
+#define ERRHRD 0x03  /* Error is an hardware error. */
+#define ERRCMD 0xFF  /* Command was not in the "SMB" format. */
+
+/* SMB X/Open error codes for the ERRdos error class */
+
+#define ERRbadfunc 1            /* Invalid function (or system call) */
+#define ERRbadfile 2            /* File not found (pathname error) */
+#define ERRbadpath 3            /* Directory not found */
+#define ERRnofids 4             /* Too many open files */
+#define ERRnoaccess 5           /* Access denied */
+#define ERRbadfid 6             /* Invalid fid */
+#define ERRbadmcb 7             /* Memory control blocks destroyed */
+#define ERRnomem 8              /* Out of memory */
+#define ERRbadmem 9             /* Invalid memory block address */
+#define ERRbadenv 10            /* Invalid environment */
+#define ERRbadformat 11         /* Invalid format */
+#define ERRbadaccess 12         /* Invalid open mode */
+#define ERRbaddata 13           /* Invalid data (only from ioctl call) */
+#define ERRres 14               /* reserved */
+#define ERRbaddrive 15          /* Invalid drive */
+#define ERRremcd 16             /* Attempt to delete current directory */
+#define ERRdiffdevice 17        /* rename/move across different filesystems */
+#define ERRnofiles 18           /* no more files found in file search */
+#define ERRbadshare 32          /* Share mode on file conflict with open mode */
+#define ERRlock 33              /* Lock request conflicts with existing lock */
+#define ERRfilexists 80         /* File in operation already exists */
+#define ERRbadpipe 230          /* Named pipe invalid */
+#define ERRpipebusy 231         /* All instances of pipe are busy */
+#define ERRpipeclosing 232      /* named pipe close in progress */
+#define ERRnotconnected 233     /* No process on other end of named pipe */
+#define ERRmoredata 234         /* More data to be returned */
+
+#define ERROR_INVALID_PARAMETER	 87
+#define ERROR_DISK_FULL		112
+#define ERROR_INVALID_NAME	123
+#define ERROR_DIR_NOT_EMPTY	145
+#define ERROR_NOT_LOCKED	158
+#define ERROR_ALREADY_EXISTS	183  /* see also 80 ? */
+#define ERROR_EAS_DIDNT_FIT	275 /* Extended attributes didn't fit */
+#define ERROR_EAS_NOT_SUPPORTED	282 /* Extended attributes not supported */
+
+/* Error codes for the ERRSRV class */
+
+#define ERRerror 1              /* Non specific error code */
+#define ERRbadpw 2              /* Bad password */
+#define ERRbadtype 3            /* reserved */
+#define ERRaccess 4          /* No permissions to do the requested operation */
+#define ERRinvnid 5             /* tid invalid */
+#define ERRinvnetname 6         /* Invalid servername */
+#define ERRinvdevice 7          /* Invalid device */
+#define ERRqfull 49             /* Print queue full */
+#define ERRqtoobig 50           /* Queued item too big */
+#define ERRinvpfid 52           /* Invalid print file in smb_fid */
+#define ERRsmbcmd 64            /* Unrecognised command */
+#define ERRsrverror 65          /* smb server internal error */
+#define ERRfilespecs 67         /* fid and pathname invalid combination */
+#define ERRbadlink 68           /* reserved */
+#define ERRbadpermits 69        /* Access specified for a file is not valid */
+#define ERRbadpid 70            /* reserved */
+#define ERRsetattrmode 71       /* attribute mode invalid */
+#define ERRpaused 81            /* Message server paused */
+#define ERRmsgoff 82            /* Not receiving messages */
+#define ERRnoroom 83            /* No room for message */
+#define ERRrmuns 87             /* too many remote usernames */
+#define ERRtimeout 88           /* operation timed out */
+#define ERRnoresource  89   /* No resources currently available for request. */
+#define ERRtoomanyuids 90       /* too many userids */
+#define ERRbaduid 91            /* bad userid */
+#define ERRuseMPX 250    /* temporarily unable to use raw mode, use MPX mode */
+#define ERRuseSTD 251    /* temporarily unable to use raw mode, use std.mode */
+#define ERRcontMPX 252          /* resume MPX mode */
+#define ERRbadPW                /* reserved */
+#define ERRnosupport 0xFFFF
+
+/* Error codes for the ERRHRD class */
+
+#define ERRnowrite 19           /* read only media */
+#define ERRbadunit 20           /* Unknown device */
+#define ERRnotready 21          /* Drive not ready */
+#define ERRbadcmd 22            /* Unknown command */
+#define ERRdata 23              /* Data (CRC) error */
+#define ERRbadreq 24            /* Bad request structure length */
+#define ERRseek 25
+#define ERRbadmedia 26
+#define ERRbadsector 27
+#define ERRnopaper 28
+#define ERRwrite 29             /* write fault */
+#define ERRread 30              /* read fault */
+#define ERRgeneral 31           /* General hardware failure */
+#define ERRwrongdisk 34
+#define ERRFCBunavail 35
+#define ERRsharebufexc 36       /* share buffer exceeded */
+#define ERRdiskfull 39
+
+/*
+ * Access modes when opening a file
+ */
+#define SMB_ACCMASK	0x0003
+#define SMB_O_RDONLY	0x0000
+#define SMB_O_WRONLY	0x0001
+#define SMB_O_RDWR	0x0002
+
+/* offsets into message for common items */
+#define smb_com 8
+#define smb_rcls 9
+#define smb_reh 10
+#define smb_err 11
+#define smb_flg 13
+#define smb_flg2 14
+#define smb_reb 13
+#define smb_tid 28
+#define smb_pid 30
+#define smb_uid 32
+#define smb_mid 34
+#define smb_wct 36
+#define smb_vwv 37
+#define smb_vwv0 37
+#define smb_vwv1 39
+#define smb_vwv2 41
+#define smb_vwv3 43
+#define smb_vwv4 45
+#define smb_vwv5 47
+#define smb_vwv6 49
+#define smb_vwv7 51
+#define smb_vwv8 53
+#define smb_vwv9 55
+#define smb_vwv10 57
+#define smb_vwv11 59
+#define smb_vwv12 61
+#define smb_vwv13 63
+#define smb_vwv14 65
+
+/* these are the trans2 sub fields for primary requests */
+#define smb_tpscnt smb_vwv0
+#define smb_tdscnt smb_vwv1
+#define smb_mprcnt smb_vwv2
+#define smb_mdrcnt smb_vwv3
+#define smb_msrcnt smb_vwv4
+#define smb_flags smb_vwv5
+#define smb_timeout smb_vwv6
+#define smb_pscnt smb_vwv9
+#define smb_psoff smb_vwv10
+#define smb_dscnt smb_vwv11
+#define smb_dsoff smb_vwv12
+#define smb_suwcnt smb_vwv13
+#define smb_setup smb_vwv14
+#define smb_setup0 smb_setup
+#define smb_setup1 (smb_setup+2)
+#define smb_setup2 (smb_setup+4)
+
+/* these are for the secondary requests */
+#define smb_spscnt smb_vwv2
+#define smb_spsoff smb_vwv3
+#define smb_spsdisp smb_vwv4
+#define smb_sdscnt smb_vwv5
+#define smb_sdsoff smb_vwv6
+#define smb_sdsdisp smb_vwv7
+#define smb_sfid smb_vwv8
+
+/* and these for responses */
+#define smb_tprcnt smb_vwv0
+#define smb_tdrcnt smb_vwv1
+#define smb_prcnt smb_vwv3
+#define smb_proff smb_vwv4
+#define smb_prdisp smb_vwv5
+#define smb_drcnt smb_vwv6
+#define smb_droff smb_vwv7
+#define smb_drdisp smb_vwv8
+
+/* the complete */
+#define SMBmkdir      0x00   /* create directory */
+#define SMBrmdir      0x01   /* delete directory */
+#define SMBopen       0x02   /* open file */
+#define SMBcreate     0x03   /* create file */
+#define SMBclose      0x04   /* close file */
+#define SMBflush      0x05   /* flush file */
+#define SMBunlink     0x06   /* delete file */
+#define SMBmv         0x07   /* rename file */
+#define SMBgetatr     0x08   /* get file attributes */
+#define SMBsetatr     0x09   /* set file attributes */
+#define SMBread       0x0A   /* read from file */
+#define SMBwrite      0x0B   /* write to file */
+#define SMBlock       0x0C   /* lock byte range */
+#define SMBunlock     0x0D   /* unlock byte range */
+#define SMBctemp      0x0E   /* create temporary file */
+#define SMBmknew      0x0F   /* make new file */
+#define SMBchkpth     0x10   /* check directory path */
+#define SMBexit       0x11   /* process exit */
+#define SMBlseek      0x12   /* seek */
+#define SMBtcon       0x70   /* tree connect */
+#define SMBtconX      0x75   /* tree connect and X*/
+#define SMBtdis       0x71   /* tree disconnect */
+#define SMBnegprot    0x72   /* negotiate protocol */
+#define SMBdskattr    0x80   /* get disk attributes */
+#define SMBsearch     0x81   /* search directory */
+#define SMBsplopen    0xC0   /* open print spool file */
+#define SMBsplwr      0xC1   /* write to print spool file */
+#define SMBsplclose   0xC2   /* close print spool file */
+#define SMBsplretq    0xC3   /* return print queue */
+#define SMBsends      0xD0   /* send single block message */
+#define SMBsendb      0xD1   /* send broadcast message */
+#define SMBfwdname    0xD2   /* forward user name */
+#define SMBcancelf    0xD3   /* cancel forward */
+#define SMBgetmac     0xD4   /* get machine name */
+#define SMBsendstrt   0xD5   /* send start of multi-block message */
+#define SMBsendend    0xD6   /* send end of multi-block message */
+#define SMBsendtxt    0xD7   /* send text of multi-block message */
+
+/* Core+ protocol */
+#define SMBlockread	  0x13   /* Lock a range and read */
+#define SMBwriteunlock 0x14 /* Unlock a range then write */
+#define SMBreadbraw   0x1a  /* read a block of data with no smb header */
+#define SMBwritebraw  0x1d  /* write a block of data with no smb header */
+#define SMBwritec     0x20  /* secondary write request */
+#define SMBwriteclose 0x2c  /* write a file then close it */
+
+/* dos extended protocol */
+#define SMBreadBraw      0x1A   /* read block raw */
+#define SMBreadBmpx      0x1B   /* read block multiplexed */
+#define SMBreadBs        0x1C   /* read block (secondary response) */
+#define SMBwriteBraw     0x1D   /* write block raw */
+#define SMBwriteBmpx     0x1E   /* write block multiplexed */
+#define SMBwriteBs       0x1F   /* write block (secondary request) */
+#define SMBwriteC        0x20   /* write complete response */
+#define SMBsetattrE      0x22   /* set file attributes expanded */
+#define SMBgetattrE      0x23   /* get file attributes expanded */
+#define SMBlockingX      0x24   /* lock/unlock byte ranges and X */
+#define SMBtrans         0x25   /* transaction - name, bytes in/out */
+#define SMBtranss        0x26   /* transaction (secondary request/response) */
+#define SMBioctl         0x27   /* IOCTL */
+#define SMBioctls        0x28   /* IOCTL  (secondary request/response) */
+#define SMBcopy          0x29   /* copy */
+#define SMBmove          0x2A   /* move */
+#define SMBecho          0x2B   /* echo */
+#define SMBopenX         0x2D   /* open and X */
+#define SMBreadX         0x2E   /* read and X */
+#define SMBwriteX        0x2F   /* write and X */
+#define SMBsesssetupX    0x73   /* Session Set Up & X (including User Logon) */
+#define SMBtconX         0x75   /* tree connect and X */
+#define SMBffirst        0x82   /* find first */
+#define SMBfunique       0x83   /* find unique */
+#define SMBfclose        0x84   /* find close */
+#define SMBinvalid       0xFE   /* invalid command */
+
+
+/* Extended 2.0 protocol */
+#define SMBtrans2        0x32   /* TRANS2 protocol set */
+#define SMBtranss2       0x33   /* TRANS2 protocol set, secondary command */
+#define SMBfindclose     0x34   /* Terminate a TRANSACT2_FINDFIRST */
+#define SMBfindnclose    0x35   /* Terminate a TRANSACT2_FINDNOTIFYFIRST */
+#define SMBulogoffX      0x74   /* user logoff */
+
+/* these are the TRANS2 sub commands */
+#define TRANSACT2_OPEN          0
+#define TRANSACT2_FINDFIRST     1
+#define TRANSACT2_FINDNEXT      2
+#define TRANSACT2_QFSINFO       3
+#define TRANSACT2_SETFSINFO     4
+#define TRANSACT2_QPATHINFO     5
+#define TRANSACT2_SETPATHINFO   6
+#define TRANSACT2_QFILEINFO     7
+#define TRANSACT2_SETFILEINFO   8
+#define TRANSACT2_FSCTL         9
+#define TRANSACT2_IOCTL           10
+#define TRANSACT2_FINDNOTIFYFIRST 11
+#define TRANSACT2_FINDNOTIFYNEXT  12
+#define TRANSACT2_MKDIR           13
+
+/* Information Levels -  Shared? */
+#define SMB_INFO_STANDARD		1
+#define SMB_INFO_QUERY_EA_SIZE		2
+#define SMB_INFO_QUERY_EAS_FROM_LIST	3
+#define SMB_INFO_QUERY_ALL_EAS		4
+#define SMB_INFO_IS_NAME_VALID		6
+
+/* Information Levels -  TRANSACT2_FINDFIRST */
+#define SMB_FIND_FILE_DIRECTORY_INFO		0x101
+#define SMB_FIND_FILE_FULL_DIRECTORY_INFO	0x102
+#define SMB_FIND_FILE_NAMES_INFO		0x103
+#define SMB_FIND_FILE_BOTH_DIRECTORY_INFO	0x104
+
+/* Information Levels -  TRANSACT2_QPATHINFO */
+#define SMB_QUERY_FILE_BASIC_INFO	0x101
+#define SMB_QUERY_FILE_STANDARD_INFO	0x102
+#define SMB_QUERY_FILE_EA_INFO		0x103
+#define SMB_QUERY_FILE_NAME_INFO	0x104
+#define SMB_QUERY_FILE_ALL_INFO		0x107
+#define SMB_QUERY_FILE_ALT_NAME_INFO	0x108
+#define SMB_QUERY_FILE_STREAM_INFO	0x109
+#define SMB_QUERY_FILE_COMPRESSION_INFO	0x10b
+
+/* Information Levels - TRANSACT2_SETFILEINFO */
+#define SMB_SET_FILE_BASIC_INFO		0x101
+#define SMB_SET_FILE_DISPOSITION_INFO	0x102
+#define SMB_SET_FILE_ALLOCATION_INFO	0x103
+#define SMB_SET_FILE_END_OF_FILE_INFO	0x104
+
+/* smb_flg field flags */
+#define SMB_FLAGS_SUPPORT_LOCKREAD	0x01
+#define SMB_FLAGS_CLIENT_BUF_AVAIL	0x02
+#define SMB_FLAGS_RESERVED		0x04
+#define SMB_FLAGS_CASELESS_PATHNAMES	0x08
+#define SMB_FLAGS_CANONICAL_PATHNAMES	0x10
+#define SMB_FLAGS_REQUEST_OPLOCK	0x20
+#define SMB_FLAGS_REQUEST_BATCH_OPLOCK	0x40
+#define SMB_FLAGS_REPLY			0x80
+
+/* smb_flg2 field flags (samba-2.2.0/source/include/smb.h) */
+#define SMB_FLAGS2_LONG_PATH_COMPONENTS		0x0001
+#define SMB_FLAGS2_EXTENDED_ATTRIBUTES		0x0002
+#define SMB_FLAGS2_DFS_PATHNAMES		0x1000
+#define SMB_FLAGS2_READ_PERMIT_NO_EXECUTE	0x2000
+#define SMB_FLAGS2_32_BIT_ERROR_CODES		0x4000 
+#define SMB_FLAGS2_UNICODE_STRINGS		0x8000
+
+
+/*
+ * UNIX stuff  (from samba trans2.h)
+ */
+#define MIN_UNIX_INFO_LEVEL		0x200
+#define MAX_UNIX_INFO_LEVEL		0x2FF
+#define SMB_FIND_FILE_UNIX		0x202
+#define SMB_QUERY_FILE_UNIX_BASIC	0x200
+#define SMB_QUERY_FILE_UNIX_LINK	0x201
+#define SMB_QUERY_FILE_UNIX_HLINK	0x202
+#define SMB_SET_FILE_UNIX_BASIC		0x200
+#define SMB_SET_FILE_UNIX_LINK		0x201
+#define SMB_SET_FILE_UNIX_HLINK		0x203
+#define SMB_QUERY_CIFS_UNIX_INFO	0x200
+
+/* values which means "don't change it" */
+#define SMB_MODE_NO_CHANGE		0xFFFFFFFF
+#define SMB_UID_NO_CHANGE		0xFFFFFFFF
+#define SMB_GID_NO_CHANGE		0xFFFFFFFF
+#define SMB_TIME_NO_CHANGE		0xFFFFFFFFFFFFFFFFULL
+#define SMB_SIZE_NO_CHANGE		0xFFFFFFFFFFFFFFFFULL
+
+/* UNIX filetype mappings. */
+#define UNIX_TYPE_FILE		0
+#define UNIX_TYPE_DIR		1
+#define UNIX_TYPE_SYMLINK	2
+#define UNIX_TYPE_CHARDEV	3
+#define UNIX_TYPE_BLKDEV	4
+#define UNIX_TYPE_FIFO		5
+#define UNIX_TYPE_SOCKET	6
+#define UNIX_TYPE_UNKNOWN	0xFFFFFFFF
+
+#endif /* _SMBNO_H_ */
diff --git a/drivers/staging/smbfs/sock.c b/drivers/staging/smbfs/sock.c
new file mode 100644
index 000000000000..9e264090e611
--- /dev/null
+++ b/drivers/staging/smbfs/sock.c
@@ -0,0 +1,385 @@
+/*
+ *  sock.c
+ *
+ *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
+ *  Copyright (C) 1997 by Volker Lendecke
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <net/scm.h>
+#include <net/tcp_states.h>
+#include <net/ip.h>
+
+#include <asm/uaccess.h>
+#include <asm/ioctls.h>
+
+#include "smb_fs.h"
+#include "smb.h"
+#include "smbno.h"
+#include "smb_debug.h"
+#include "proto.h"
+#include "request.h"
+
+
+static int
+_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
+{
+	struct kvec iov = {ubuf, size};
+	struct msghdr msg = {.msg_flags = flags};
+	msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
+	return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
+}
+
+/*
+ * Return the server this socket belongs to
+ */
+static struct smb_sb_info *
+server_from_socket(struct socket *socket)
+{
+	return socket->sk->sk_user_data;
+}
+
+/*
+ * Called when there is data on the socket.
+ */
+void
+smb_data_ready(struct sock *sk, int len)
+{
+	struct smb_sb_info *server = server_from_socket(sk->sk_socket);
+	void (*data_ready)(struct sock *, int) = server->data_ready;
+
+	data_ready(sk, len);
+	VERBOSE("(%p, %d)\n", sk, len);
+	smbiod_wake_up();
+}
+
+int
+smb_valid_socket(struct inode * inode)
+{
+	return (inode && S_ISSOCK(inode->i_mode) && 
+		SOCKET_I(inode)->type == SOCK_STREAM);
+}
+
+static struct socket *
+server_sock(struct smb_sb_info *server)
+{
+	struct file *file;
+
+	if (server && (file = server->sock_file))
+	{
+#ifdef SMBFS_PARANOIA
+		if (!smb_valid_socket(file->f_path.dentry->d_inode))
+			PARANOIA("bad socket!\n");
+#endif
+		return SOCKET_I(file->f_path.dentry->d_inode);
+	}
+	return NULL;
+}
+
+void
+smb_close_socket(struct smb_sb_info *server)
+{
+	struct file * file = server->sock_file;
+
+	if (file) {
+		struct socket *sock = server_sock(server);
+
+		VERBOSE("closing socket %p\n", sock);
+		sock->sk->sk_data_ready = server->data_ready;
+		server->sock_file = NULL;
+		fput(file);
+	}
+}
+
+static int
+smb_get_length(struct socket *socket, unsigned char *header)
+{
+	int result;
+
+	result = _recvfrom(socket, header, 4, MSG_PEEK);
+	if (result == -EAGAIN)
+		return -ENODATA;
+	if (result < 0) {
+		PARANOIA("recv error = %d\n", -result);
+		return result;
+	}
+	if (result < 4)
+		return -ENODATA;
+
+	switch (header[0]) {
+	case 0x00:
+	case 0x82:
+		break;
+
+	case 0x85:
+		DEBUG1("Got SESSION KEEP ALIVE\n");
+		_recvfrom(socket, header, 4, 0);	/* read away */
+		return -ENODATA;
+
+	default:
+		PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
+		return -EIO;
+	}
+
+	/* The length in the RFC NB header is the raw data length */
+	return smb_len(header);
+}
+
+int
+smb_recv_available(struct smb_sb_info *server)
+{
+	mm_segment_t oldfs;
+	int avail, err;
+	struct socket *sock = server_sock(server);
+
+	oldfs = get_fs();
+	set_fs(get_ds());
+	err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
+	set_fs(oldfs);
+	return (err >= 0) ? avail : err;
+}
+
+/*
+ * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
+ */
+static int
+smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
+{
+	struct kvec *iv = *data;
+	int i;
+	int len;
+
+	/*
+	 *	Eat any sent kvecs
+	 */
+	while (iv->iov_len <= amount) {
+		amount -= iv->iov_len;
+		iv++;
+		(*num)--;
+	}
+
+	/*
+	 *	And chew down the partial one
+	 */
+	vec[0].iov_len = iv->iov_len-amount;
+	vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
+	iv++;
+
+	len = vec[0].iov_len;
+
+	/*
+	 *	And copy any others
+	 */
+	for (i = 1; i < *num; i++) {
+		vec[i] = *iv++;
+		len += vec[i].iov_len;
+	}
+
+	*data = vec;
+	return len;
+}
+
+/*
+ * smb_receive_header
+ * Only called by the smbiod thread.
+ */
+int
+smb_receive_header(struct smb_sb_info *server)
+{
+	struct socket *sock;
+	int result = 0;
+	unsigned char peek_buf[4];
+
+	result = -EIO; 
+	sock = server_sock(server);
+	if (!sock)
+		goto out;
+	if (sock->sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	if (!server->smb_read) {
+		result = smb_get_length(sock, peek_buf);
+		if (result < 0) {
+			if (result == -ENODATA)
+				result = 0;
+			goto out;
+		}
+		server->smb_len = result + 4;
+
+		if (server->smb_len < SMB_HEADER_LEN) {
+			PARANOIA("short packet: %d\n", result);
+			server->rstate = SMB_RECV_DROP;
+			result = -EIO;
+			goto out;
+		}
+		if (server->smb_len > SMB_MAX_PACKET_SIZE) {
+			PARANOIA("long packet: %d\n", result);
+			server->rstate = SMB_RECV_DROP;
+			result = -EIO;
+			goto out;
+		}
+	}
+
+	result = _recvfrom(sock, server->header + server->smb_read,
+			   SMB_HEADER_LEN - server->smb_read, 0);
+	VERBOSE("_recvfrom: %d\n", result);
+	if (result < 0) {
+		VERBOSE("receive error: %d\n", result);
+		goto out;
+	}
+	server->smb_read += result;
+
+	if (server->smb_read == SMB_HEADER_LEN)
+		server->rstate = SMB_RECV_HCOMPLETE;
+out:
+	return result;
+}
+
+static char drop_buffer[PAGE_SIZE];
+
+/*
+ * smb_receive_drop - read and throw away the data
+ * Only called by the smbiod thread.
+ *
+ * FIXME: we are in the kernel, could we just tell the socket that we want
+ * to drop stuff from the buffer?
+ */
+int
+smb_receive_drop(struct smb_sb_info *server)
+{
+	struct socket *sock;
+	unsigned int flags;
+	struct kvec iov;
+	struct msghdr msg;
+	int rlen = smb_len(server->header) - server->smb_read + 4;
+	int result = -EIO;
+
+	if (rlen > PAGE_SIZE)
+		rlen = PAGE_SIZE;
+
+	sock = server_sock(server);
+	if (!sock)
+		goto out;
+	if (sock->sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+	iov.iov_base = drop_buffer;
+	iov.iov_len = PAGE_SIZE;
+	msg.msg_flags = flags;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+
+	result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
+
+	VERBOSE("read: %d\n", result);
+	if (result < 0) {
+		VERBOSE("receive error: %d\n", result);
+		goto out;
+	}
+	server->smb_read += result;
+
+	if (server->smb_read >= server->smb_len)
+		server->rstate = SMB_RECV_END;
+
+out:
+	return result;
+}
+
+/*
+ * smb_receive
+ * Only called by the smbiod thread.
+ */
+int
+smb_receive(struct smb_sb_info *server, struct smb_request *req)
+{
+	struct socket *sock;
+	unsigned int flags;
+	struct kvec iov[4];
+	struct kvec *p = req->rq_iov;
+	size_t num = req->rq_iovlen;
+	struct msghdr msg;
+	int rlen;
+	int result = -EIO;
+
+	sock = server_sock(server);
+	if (!sock)
+		goto out;
+	if (sock->sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+	msg.msg_flags = flags;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+
+	/* Dont repeat bytes and count available bufferspace */
+	rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
+			(req->rq_rlen - req->rq_bytes_recvd));
+
+	result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
+
+	VERBOSE("read: %d\n", result);
+	if (result < 0) {
+		VERBOSE("receive error: %d\n", result);
+		goto out;
+	}
+	req->rq_bytes_recvd += result;
+	server->smb_read += result;
+
+out:
+	return result;
+}
+
+/*
+ * Try to send a SMB request. This may return after sending only parts of the
+ * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
+ *
+ * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
+ */
+int
+smb_send_request(struct smb_request *req)
+{
+	struct smb_sb_info *server = req->rq_server;
+	struct socket *sock;
+	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
+        int slen = req->rq_slen - req->rq_bytes_sent;
+	int result = -EIO;
+	struct kvec iov[4];
+	struct kvec *p = req->rq_iov;
+	size_t num = req->rq_iovlen;
+
+	sock = server_sock(server);
+	if (!sock)
+		goto out;
+	if (sock->sk->sk_state != TCP_ESTABLISHED)
+		goto out;
+
+	/* Dont repeat bytes */
+	if (req->rq_bytes_sent)
+		smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
+
+	result = kernel_sendmsg(sock, &msg, p, num, slen);
+
+	if (result >= 0) {
+		req->rq_bytes_sent += result;
+		if (req->rq_bytes_sent >= req->rq_slen)
+			req->rq_flags |= SMB_REQ_TRANSMITTED;
+	}
+out:
+	return result;
+}
diff --git a/drivers/staging/smbfs/symlink.c b/drivers/staging/smbfs/symlink.c
new file mode 100644
index 000000000000..632c4acd062d
--- /dev/null
+++ b/drivers/staging/smbfs/symlink.c
@@ -0,0 +1,67 @@
+/*
+ *  symlink.c
+ *
+ *  Copyright (C) 2002 by John Newbigin
+ *
+ *  Please add a note about your changes to smbfs in the ChangeLog file.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/net.h>
+#include <linux/namei.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include "smbno.h"
+#include "smb_fs.h"
+#include "smb_debug.h"
+#include "proto.h"
+
+int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
+{
+	DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
+
+	return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
+}
+
+static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *link = __getname();
+	DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
+
+	if (!link) {
+		link = ERR_PTR(-ENOMEM);
+	} else {
+		int len = smb_proc_read_link(server_from_dentry(dentry),
+						dentry, link, PATH_MAX - 1);
+		if (len < 0) {
+			__putname(link);
+			link = ERR_PTR(len);
+		} else {
+			link[len] = 0;
+		}
+	}
+	nd_set_link(nd, link);
+	return NULL;
+}
+
+static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		__putname(s);
+}
+
+const struct inode_operations smb_link_inode_operations =
+{
+	.readlink	= generic_readlink,
+	.follow_link	= smb_follow_link,
+	.put_link	= smb_put_link,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 30da8ee16a96..25ce2dc1c6d4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -233,7 +233,6 @@ config NFS_COMMON
 	default y
 
 source "net/sunrpc/Kconfig"
-source "fs/smbfs/Kconfig"
 source "fs/ceph/Kconfig"
 source "fs/cifs/Kconfig"
 source "fs/ncpfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index e571feddd7b7..9284c74c2db9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -91,7 +91,6 @@ obj-$(CONFIG_NFSD)		+= nfsd/
 obj-$(CONFIG_LOCKD)		+= lockd/
 obj-$(CONFIG_NLS)		+= nls/
 obj-$(CONFIG_SYSV_FS)		+= sysv/
-obj-$(CONFIG_SMB_FS)		+= smbfs/
 obj-$(CONFIG_CIFS)		+= cifs/
 obj-$(CONFIG_NCP_FS)		+= ncpfs/
 obj-$(CONFIG_HPFS_FS)		+= hpfs/
diff --git a/fs/compat.c b/fs/compat.c
index 718c7062aec1..b42f29a44edb 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
 #include <linux/vfs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
-#include <linux/smb.h>
-#include <linux/smb_mount.h>
 #include <linux/ncp_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/syscalls.h>
@@ -745,30 +743,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
 	return raw_data;
 }
 
-struct compat_smb_mount_data {
-	compat_int_t version;
-	__compat_uid_t mounted_uid;
-	__compat_uid_t uid;
-	__compat_gid_t gid;
-	compat_mode_t file_mode;
-	compat_mode_t dir_mode;
-};
-
-static void *do_smb_super_data_conv(void *raw_data)
-{
-	struct smb_mount_data *s = raw_data;
-	struct compat_smb_mount_data *c_s = raw_data;
-
-	if (c_s->version != SMB_MOUNT_OLDVERSION)
-		goto out;
-	s->dir_mode = c_s->dir_mode;
-	s->file_mode = c_s->file_mode;
-	s->gid = c_s->gid;
-	s->uid = c_s->uid;
-	s->mounted_uid = c_s->mounted_uid;
- out:
-	return raw_data;
-}
 
 struct compat_nfs_string {
 	compat_uint_t len;
@@ -835,7 +809,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
 	return 0;
 }
 
-#define SMBFS_NAME      "smbfs"
 #define NCPFS_NAME      "ncpfs"
 #define NFS4_NAME	"nfs4"
 
@@ -870,9 +843,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
 	retval = -EINVAL;
 
 	if (kernel_type && data_page) {
-		if (!strcmp(kernel_type, SMBFS_NAME)) {
-			do_smb_super_data_conv((void *)data_page);
-		} else if (!strcmp(kernel_type, NCPFS_NAME)) {
+		if (!strcmp(kernel_type, NCPFS_NAME)) {
 			do_ncp_super_data_conv((void *)data_page);
 		} else if (!strcmp(kernel_type, NFS4_NAME)) {
 			if (do_nfs4_super_data_conv((void *) data_page))
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 03e59aa318eb..34cf03cd791f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -46,7 +46,6 @@
 #include <linux/videodev.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
-#include <linux/smb_fs.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/rtc.h>
@@ -558,25 +557,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
 
 #endif /* CONFIG_BLOCK */
 
-static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
-			compat_uid_t __user *argp)
-{
-	mm_segment_t old_fs = get_fs();
-	__kernel_uid_t kuid;
-	int err;
-
-	cmd = SMB_IOC_GETMOUNTUID;
-
-	set_fs(KERNEL_DS);
-	err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
-	set_fs(old_fs);
-
-	if (err >= 0)
-		err = put_user(kuid, argp);
-
-	return err;
-}
-
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO		_IOW('U', 200, int)
 #define HCIUARTGETPROTO		_IOR('U', 201, int)
@@ -1265,8 +1245,6 @@ COMPATIBLE_IOCTL(OSS_GETVERSION)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
 COMPATIBLE_IOCTL(RAW_GETBIND)
-/* SMB ioctls which do not need any translations */
-COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
 /* Watchdog */
 COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
 COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1528,10 +1506,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case RAW_GETBIND:
 		return raw_ioctl(fd, cmd, argp);
 #endif
-	/* One SMB ioctl needs translations. */
-#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
-	case SMB_IOC_GETMOUNTUID_32:
-		return do_smb_getmountuid(fd, cmd, argp);
 	/* Serial */
 	case TIOCGSERIAL:
 	case TIOCSSERIAL:
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index e668127c8b2e..000000000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,55 +0,0 @@
-config SMB_FS
-	tristate "SMB file system support (OBSOLETE, please use CIFS)"
-	depends on INET
-	select NLS
-	help
-	  SMB (Server Message Block) is the protocol Windows for Workgroups
-	  (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
-	  files and printers over local networks.  Saying Y here allows you to
-	  mount their file systems (often called "shares" in this context) and
-	  access them just like any other Unix directory.  Currently, this
-	  works only if the Windows machines use TCP/IP as the underlying
-	  transport protocol, and not NetBEUI.  For details, read
-	  <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
-	  available from <http://www.tldp.org/docs.html#howto>.
-
-	  Note: if you just want your box to act as an SMB *server* and make
-	  files and printing services available to Windows clients (which need
-	  to have a TCP/IP stack), you don't need to say Y here; you can use
-	  the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
-	  for that.
-
-	  General information about how to connect Linux, Windows machines and
-	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
-
-	  To compile the SMB support as a module, choose M here:
-	  the module will be called smbfs.  Most people say N, however.
-
-config SMB_NLS_DEFAULT
-	bool "Use a default NLS"
-	depends on SMB_FS
-	help
-	  Enabling this will make smbfs use nls translations by default. You
-	  need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
-	  settings and you need to give the default nls for the SMB server as
-	  CONFIG_SMB_NLS_REMOTE.
-
-	  The nls settings can be changed at mount time, if your smbmount
-	  supports that, using the codepage and iocharset parameters.
-
-	  smbmount from samba 2.2.0 or later supports this.
-
-config SMB_NLS_REMOTE
-	string "Default Remote NLS Option"
-	depends on SMB_NLS_DEFAULT
-	default "cp437"
-	help
-	  This setting allows you to specify a default value for which
-	  codepage the server uses. If this field is left blank no
-	  translations will be done by default. The local codepage/charset
-	  default to CONFIG_NLS_DEFAULT.
-
-	  The nls settings can be changed at mount time, if your smbmount
-	  supports that, using the codepage and iocharset parameters.
-
-	  smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4722c3..000000000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux smb-filesystem routines.
-#
-
-obj-$(CONFIG_SMB_FS) += smbfs.o
-
-smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
-		symlink.o smbiod.o request.o
-
-# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
-# SMBFS_PARANOIA should normally be enabled.
-
-EXTRA_CFLAGS += -DSMBFS_PARANOIA
-#EXTRA_CFLAGS += -DSMBFS_DEBUG
-#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
-#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
-#EXTRA_CFLAGS += -Werror
-
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb7e344..000000000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  cache.c
- *
- * Copyright (C) 1997 by Bill Hawes
- *
- * Routines to support directory cacheing using the page cache.
- * This cache code is almost directly taken from ncpfs.
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smb_fs.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-
-#include <asm/page.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-/*
- * Force the next attempt to use the cache to be a timeout.
- * If we can't find the page that's fine, it will cause a refresh.
- */
-void
-smb_invalid_dir_cache(struct inode * dir)
-{
-	struct smb_sb_info *server = server_from_inode(dir);
-	union  smb_dir_cache *cache = NULL;
-	struct page *page = NULL;
-
-	page = grab_cache_page(&dir->i_data, 0);
-	if (!page)
-		goto out;
-
-	if (!PageUptodate(page))
-		goto out_unlock;
-
-	cache = kmap(page);
-	cache->head.time = jiffies - SMB_MAX_AGE(server);
-
-	kunmap(page);
-	SetPageUptodate(page);
-out_unlock:
-	unlock_page(page);
-	page_cache_release(page);
-out:
-	return;
-}
-
-/*
- * Mark all dentries for 'parent' as invalid, forcing them to be re-read
- */
-void
-smb_invalidate_dircache_entries(struct dentry *parent)
-{
-	struct smb_sb_info *server = server_from_dentry(parent);
-	struct list_head *next;
-	struct dentry *dentry;
-
-	spin_lock(&dcache_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_u.d_child);
-		dentry->d_fsdata = NULL;
-		smb_age_dentry(server, dentry);
-		next = next->next;
-	}
-	spin_unlock(&dcache_lock);
-}
-
-/*
- * dget, but require that fpos and parent matches what the dentry contains.
- * dentry is not known to be a valid pointer at entry.
- */
-struct dentry *
-smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
-{
-	struct dentry *dent = dentry;
-	struct list_head *next;
-
-	if (d_validate(dent, parent)) {
-		if (dent->d_name.len <= SMB_MAXNAMELEN &&
-		    (unsigned long)dent->d_fsdata == fpos) {
-			if (!dent->d_inode) {
-				dput(dent);
-				dent = NULL;
-			}
-			return dent;
-		}
-		dput(dent);
-	}
-
-	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_u.d_child);
-		if ((unsigned long)dent->d_fsdata == fpos) {
-			if (dent->d_inode)
-				dget_locked(dent);
-			else
-				dent = NULL;
-			goto out_unlock;
-		}
-		next = next->next;
-	}
-	dent = NULL;
-out_unlock:
-	spin_unlock(&dcache_lock);
-	return dent;
-}
-
-
-/*
- * Create dentry/inode for this file and add it to the dircache.
- */
-int
-smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-	       struct smb_cache_control *ctrl, struct qstr *qname,
-	       struct smb_fattr *entry)
-{
-	struct dentry *newdent, *dentry = filp->f_path.dentry;
-	struct inode *newino, *inode = dentry->d_inode;
-	struct smb_cache_control ctl = *ctrl;
-	int valid = 0;
-	int hashed = 0;
-	ino_t ino = 0;
-
-	qname->hash = full_name_hash(qname->name, qname->len);
-
-	if (dentry->d_op && dentry->d_op->d_hash)
-		if (dentry->d_op->d_hash(dentry, qname) != 0)
-			goto end_advance;
-
-	newdent = d_lookup(dentry, qname);
-
-	if (!newdent) {
-		newdent = d_alloc(dentry, qname);
-		if (!newdent)
-			goto end_advance;
-	} else {
-		hashed = 1;
-		memcpy((char *) newdent->d_name.name, qname->name,
-		       newdent->d_name.len);
-	}
-
-	if (!newdent->d_inode) {
-		smb_renew_times(newdent);
-		entry->f_ino = iunique(inode->i_sb, 2);
-		newino = smb_iget(inode->i_sb, entry);
-		if (newino) {
-			smb_new_dentry(newdent);
-			d_instantiate(newdent, newino);
-			if (!hashed)
-				d_rehash(newdent);
-		}
-	} else
-		smb_set_inode_attr(newdent->d_inode, entry);
-
-        if (newdent->d_inode) {
-		ino = newdent->d_inode->i_ino;
-		newdent->d_fsdata = (void *) ctl.fpos;
-		smb_new_dentry(newdent);
-	}
-
-	if (ctl.idx >= SMB_DIRCACHE_SIZE) {
-		if (ctl.page) {
-			kunmap(ctl.page);
-			SetPageUptodate(ctl.page);
-			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
-		}
-		ctl.cache = NULL;
-		ctl.idx  -= SMB_DIRCACHE_SIZE;
-		ctl.ofs  += 1;
-		ctl.page  = grab_cache_page(&inode->i_data, ctl.ofs);
-		if (ctl.page)
-			ctl.cache = kmap(ctl.page);
-	}
-	if (ctl.cache) {
-		ctl.cache->dentry[ctl.idx] = newdent;
-		valid = 1;
-	}
-	dput(newdent);
-
-end_advance:
-	if (!valid)
-		ctl.valid = 0;
-	if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
-		if (!ino)
-			ino = find_inode_number(dentry, qname);
-		if (!ino)
-			ino = iunique(inode->i_sb, 2);
-		ctl.filled = filldir(dirent, qname->name, qname->len,
-				     filp->f_pos, ino, DT_UNKNOWN);
-		if (!ctl.filled)
-			filp->f_pos += 1;
-	}
-	ctl.fpos += 1;
-	ctl.idx  += 1;
-	*ctrl = ctl;
-	return (ctl.valid || !ctl.filled);
-}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index 00a70cab1f36..000000000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- *  dir.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/smp_lock.h>
-#include <linux/ctype.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-#include <linux/smbno.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int smb_readdir(struct file *, void *, filldir_t);
-static int smb_dir_open(struct inode *, struct file *);
-
-static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int smb_mkdir(struct inode *, struct dentry *, int);
-static int smb_rmdir(struct inode *, struct dentry *);
-static int smb_unlink(struct inode *, struct dentry *);
-static int smb_rename(struct inode *, struct dentry *,
-		      struct inode *, struct dentry *);
-static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
-static int smb_link(struct dentry *, struct inode *, struct dentry *);
-
-const struct file_operations smb_dir_operations =
-{
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-	.readdir	= smb_readdir,
-	.unlocked_ioctl	= smb_ioctl,
-	.open		= smb_dir_open,
-};
-
-const struct inode_operations smb_dir_inode_operations =
-{
-	.create		= smb_create,
-	.lookup		= smb_lookup,
-	.unlink		= smb_unlink,
-	.mkdir		= smb_mkdir,
-	.rmdir		= smb_rmdir,
-	.rename		= smb_rename,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-};
-
-const struct inode_operations smb_dir_inode_operations_unix =
-{
-	.create		= smb_create,
-	.lookup		= smb_lookup,
-	.unlink		= smb_unlink,
-	.mkdir		= smb_mkdir,
-	.rmdir		= smb_rmdir,
-	.rename		= smb_rename,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-	.symlink	= smb_symlink,
-	.mknod		= smb_make_node,
-	.link		= smb_link,
-};
-
-/*
- * Read a directory, using filldir to fill the dirent memory.
- * smb_proc_readdir does the actual reading from the smb server.
- *
- * The cache code is almost directly taken from ncpfs
- */
-static int 
-smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *dir = dentry->d_inode;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	union  smb_dir_cache *cache = NULL;
-	struct smb_cache_control ctl;
-	struct page *page = NULL;
-	int result;
-
-	ctl.page  = NULL;
-	ctl.cache = NULL;
-
-	VERBOSE("reading %s/%s, f_pos=%d\n",
-		DENTRY_PATH(dentry),  (int) filp->f_pos);
-
-	result = 0;
-
-	lock_kernel();
-
-	switch ((unsigned int) filp->f_pos) {
-	case 0:
-		if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = 1;
-		/* fallthrough */
-	case 1:
-		if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
-			goto out;
-		filp->f_pos = 2;
-	}
-
-	/*
-	 * Make sure our inode is up-to-date.
-	 */
-	result = smb_revalidate_inode(dentry);
-	if (result)
-		goto out;
-
-
-	page = grab_cache_page(&dir->i_data, 0);
-	if (!page)
-		goto read_really;
-
-	ctl.cache = cache = kmap(page);
-	ctl.head  = cache->head;
-
-	if (!PageUptodate(page) || !ctl.head.eof) {
-		VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
-			 DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
-		goto init_cache;
-	}
-
-	if (filp->f_pos == 2) {
-		if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
-			goto init_cache;
-
-		/*
-		 * N.B. ncpfs checks mtime of dentry too here, we don't.
-		 *   1. common smb servers do not update mtime on dir changes
-		 *   2. it requires an extra smb request
-		 *      (revalidate has the same timeout as ctl.head.time)
-		 *
-		 * Instead smbfs invalidates its own cache on local changes
-		 * and remote changes are not seen until timeout.
-		 */
-	}
-
-	if (filp->f_pos > ctl.head.end)
-		goto finished;
-
-	ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
-	ctl.ofs  = ctl.fpos / SMB_DIRCACHE_SIZE;
-	ctl.idx  = ctl.fpos % SMB_DIRCACHE_SIZE;
-
-	for (;;) {
-		if (ctl.ofs != 0) {
-			ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
-			if (!ctl.page)
-				goto invalid_cache;
-			ctl.cache = kmap(ctl.page);
-			if (!PageUptodate(ctl.page))
-				goto invalid_cache;
-		}
-		while (ctl.idx < SMB_DIRCACHE_SIZE) {
-			struct dentry *dent;
-			int res;
-
-			dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
-					     dentry, filp->f_pos);
-			if (!dent)
-				goto invalid_cache;
-
-			res = filldir(dirent, dent->d_name.name,
-				      dent->d_name.len, filp->f_pos,
-				      dent->d_inode->i_ino, DT_UNKNOWN);
-			dput(dent);
-			if (res)
-				goto finished;
-			filp->f_pos += 1;
-			ctl.idx += 1;
-			if (filp->f_pos > ctl.head.end)
-				goto finished;
-		}
-		if (ctl.page) {
-			kunmap(ctl.page);
-			SetPageUptodate(ctl.page);
-			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
-			ctl.page = NULL;
-		}
-		ctl.idx  = 0;
-		ctl.ofs += 1;
-	}
-invalid_cache:
-	if (ctl.page) {
-		kunmap(ctl.page);
-		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
-		ctl.page = NULL;
-	}
-	ctl.cache = cache;
-init_cache:
-	smb_invalidate_dircache_entries(dentry);
-	ctl.head.time = jiffies;
-	ctl.head.eof = 0;
-	ctl.fpos = 2;
-	ctl.ofs = 0;
-	ctl.idx = SMB_DIRCACHE_START;
-	ctl.filled = 0;
-	ctl.valid  = 1;
-read_really:
-	result = server->ops->readdir(filp, dirent, filldir, &ctl);
-	if (result == -ERESTARTSYS && page)
-		ClearPageUptodate(page);
-	if (ctl.idx == -1)
-		goto invalid_cache;	/* retry */
-	ctl.head.end = ctl.fpos - 1;
-	ctl.head.eof = ctl.valid;
-finished:
-	if (page) {
-		cache->head = ctl.head;
-		kunmap(page);
-		if (result != -ERESTARTSYS)
-			SetPageUptodate(page);
-		unlock_page(page);
-		page_cache_release(page);
-	}
-	if (ctl.page) {
-		kunmap(ctl.page);
-		SetPageUptodate(ctl.page);
-		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
-	}
-out:
-	unlock_kernel();
-	return result;
-}
-
-static int
-smb_dir_open(struct inode *dir, struct file *file)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct smb_sb_info *server;
-	int error = 0;
-
-	VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name);
-
-	/*
-	 * Directory timestamps in the core protocol aren't updated
-	 * when a file is added, so we give them a very short TTL.
-	 */
-	lock_kernel();
-	server = server_from_dentry(dentry);
-	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
-		unsigned long age = jiffies - SMB_I(dir)->oldmtime;
-		if (age > 2*HZ)
-			smb_invalid_dir_cache(dir);
-	}
-
-	/*
-	 * Note: in order to allow the smbmount process to open the
-	 * mount point, we only revalidate if the connection is valid or
-	 * if the process is trying to access something other than the root.
-	 */
-	if (server->state == CONN_VALID || !IS_ROOT(dentry))
-		error = smb_revalidate_inode(dentry);
-	unlock_kernel();
-	return error;
-}
-
-/*
- * Dentry operations routines
- */
-static int smb_lookup_validate(struct dentry *, struct nameidata *);
-static int smb_hash_dentry(struct dentry *, struct qstr *);
-static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
-
-static const struct dentry_operations smbfs_dentry_operations =
-{
-	.d_revalidate	= smb_lookup_validate,
-	.d_hash		= smb_hash_dentry,
-	.d_compare	= smb_compare_dentry,
-	.d_delete	= smb_delete_dentry,
-};
-
-static const struct dentry_operations smbfs_dentry_operations_case =
-{
-	.d_revalidate	= smb_lookup_validate,
-	.d_delete	= smb_delete_dentry,
-};
-
-
-/*
- * This is the callback when the dcache has a lookup hit.
- */
-static int
-smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode * inode = dentry->d_inode;
-	unsigned long age = jiffies - dentry->d_time;
-	int valid;
-
-	/*
-	 * The default validation is based on dentry age:
-	 * we believe in dentries for a few seconds.  (But each
-	 * successful server lookup renews the timestamp.)
-	 */
-	valid = (age <= SMB_MAX_AGE(server));
-#ifdef SMBFS_DEBUG_VERBOSE
-	if (!valid)
-		VERBOSE("%s/%s not valid, age=%lu\n", 
-			DENTRY_PATH(dentry), age);
-#endif
-
-	if (inode) {
-		lock_kernel();
-		if (is_bad_inode(inode)) {
-			PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
-			valid = 0;
-		} else if (!valid)
-			valid = (smb_revalidate_inode(dentry) == 0);
-		unlock_kernel();
-	} else {
-		/*
-		 * What should we do for negative dentries?
-		 */
-	}
-	return valid;
-}
-
-static int 
-smb_hash_dentry(struct dentry *dir, struct qstr *this)
-{
-	unsigned long hash;
-	int i;
-
-	hash = init_name_hash();
-	for (i=0; i < this->len ; i++)
-		hash = partial_name_hash(tolower(this->name[i]), hash);
-	this->hash = end_name_hash(hash);
-  
-	return 0;
-}
-
-static int
-smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
-{
-	int i, result = 1;
-
-	if (a->len != b->len)
-		goto out;
-	for (i=0; i < a->len; i++) {
-		if (tolower(a->name[i]) != tolower(b->name[i]))
-			goto out;
-	}
-	result = 0;
-out:
-	return result;
-}
-
-/*
- * This is the callback from dput() when d_count is going to 0.
- * We use this to unhash dentries with bad inodes.
- */
-static int
-smb_delete_dentry(struct dentry * dentry)
-{
-	if (dentry->d_inode) {
-		if (is_bad_inode(dentry->d_inode)) {
-			PARANOIA("bad inode, unhashing %s/%s\n",
-				 DENTRY_PATH(dentry));
-			return 1;
-		}
-	} else {
-		/* N.B. Unhash negative dentries? */
-	}
-	return 0;
-}
-
-/*
- * Initialize a new dentry
- */
-void
-smb_new_dentry(struct dentry *dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-
-	if (server->mnt->flags & SMB_MOUNT_CASE)
-		dentry->d_op = &smbfs_dentry_operations_case;
-	else
-		dentry->d_op = &smbfs_dentry_operations;
-	dentry->d_time = jiffies;
-}
-
-
-/*
- * Whenever a lookup succeeds, we know the parent directories
- * are all valid, so we want to update the dentry timestamps.
- * N.B. Move this to dcache?
- */
-void
-smb_renew_times(struct dentry * dentry)
-{
-	dget(dentry);
-	spin_lock(&dentry->d_lock);
-	for (;;) {
-		struct dentry *parent;
-
-		dentry->d_time = jiffies;
-		if (IS_ROOT(dentry))
-			break;
-		parent = dentry->d_parent;
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		dput(dentry);
-		dentry = parent;
-		spin_lock(&dentry->d_lock);
-	}
-	spin_unlock(&dentry->d_lock);
-	dput(dentry);
-}
-
-static struct dentry *
-smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
-	struct smb_fattr finfo;
-	struct inode *inode;
-	int error;
-	struct smb_sb_info *server;
-
-	error = -ENAMETOOLONG;
-	if (dentry->d_name.len > SMB_MAXNAMELEN)
-		goto out;
-
-	/* Do not allow lookup of names with backslashes in */
-	error = -EINVAL;
-	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
-		goto out;
-
-	lock_kernel();
-	error = smb_proc_getattr(dentry, &finfo);
-#ifdef SMBFS_PARANOIA
-	if (error && error != -ENOENT)
-		PARANOIA("find %s/%s failed, error=%d\n",
-			 DENTRY_PATH(dentry), error);
-#endif
-
-	inode = NULL;
-	if (error == -ENOENT)
-		goto add_entry;
-	if (!error) {
-		error = -EACCES;
-		finfo.f_ino = iunique(dentry->d_sb, 2);
-		inode = smb_iget(dir->i_sb, &finfo);
-		if (inode) {
-	add_entry:
-			server = server_from_dentry(dentry);
-			if (server->mnt->flags & SMB_MOUNT_CASE)
-				dentry->d_op = &smbfs_dentry_operations_case;
-			else
-				dentry->d_op = &smbfs_dentry_operations;
-
-			d_add(dentry, inode);
-			smb_renew_times(dentry);
-			error = 0;
-		}
-	}
-	unlock_kernel();
-out:
-	return ERR_PTR(error);
-}
-
-/*
- * This code is common to all routines creating a new inode.
- */
-static int
-smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode *inode;
-	int error;
-	struct smb_fattr fattr;
-
-	VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
-
-	error = smb_proc_getattr(dentry, &fattr);
-	if (error)
-		goto out_close;
-
-	smb_renew_times(dentry);
-	fattr.f_ino = iunique(dentry->d_sb, 2);
-	inode = smb_iget(dentry->d_sb, &fattr);
-	if (!inode)
-		goto out_no_inode;
-
-	if (have_id) {
-		struct smb_inode_info *ei = SMB_I(inode);
-		ei->fileid = fileid;
-		ei->access = SMB_O_RDWR;
-		ei->open = server->generation;
-	}
-	d_instantiate(dentry, inode);
-out:
-	return error;
-
-out_no_inode:
-	error = -EACCES;
-out_close:
-	if (have_id) {
-		PARANOIA("%s/%s failed, error=%d, closing %u\n",
-			 DENTRY_PATH(dentry), error, fileid);
-		smb_close_fileid(dentry, fileid);
-	}
-	goto out;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	__u16 fileid;
-	int error;
-	struct iattr attr;
-
-	VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
-
-	lock_kernel();
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
-	if (!error) {
-		if (server->opt.capabilities & SMB_CAP_UNIX) {
-			/* Set attributes for new file */
-			attr.ia_valid = ATTR_MODE;
-			attr.ia_mode = mode;
-			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
-		}
-		error = smb_instantiate(dentry, fileid, 1);
-	} else {
-		PARANOIA("%s/%s failed, error=%d\n",
-			 DENTRY_PATH(dentry), error);
-	}
-	unlock_kernel();
-	return error;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int error;
-	struct iattr attr;
-
-	lock_kernel();
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_mkdir(dentry);
-	if (!error) {
-		if (server->opt.capabilities & SMB_CAP_UNIX) {
-			/* Set attributes for new directory */
-			attr.ia_valid = ATTR_MODE;
-			attr.ia_mode = mode;
-			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
-		}
-		error = smb_instantiate(dentry, 0, 0);
-	}
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-
-	/*
-	 * Close the directory if it's open.
-	 */
-	lock_kernel();
-	smb_close(inode);
-
-	/*
-	 * Check that nobody else is using the directory..
-	 */
-	error = -EBUSY;
-	if (!d_unhashed(dentry))
-		goto out;
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_rmdir(dentry);
-
-out:
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_unlink(struct inode *dir, struct dentry *dentry)
-{
-	int error;
-
-	/*
-	 * Close the file if it's open.
-	 */
-	lock_kernel();
-	smb_close(dentry->d_inode);
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_unlink(dentry);
-	if (!error)
-		smb_renew_times(dentry);
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_rename(struct inode *old_dir, struct dentry *old_dentry,
-	   struct inode *new_dir, struct dentry *new_dentry)
-{
-	int error;
-
-	/*
-	 * Close any open files, and check whether to delete the
-	 * target before attempting the rename.
-	 */
-	lock_kernel();
-	if (old_dentry->d_inode)
-		smb_close(old_dentry->d_inode);
-	if (new_dentry->d_inode) {
-		smb_close(new_dentry->d_inode);
-		error = smb_proc_unlink(new_dentry);
-		if (error) {
-			VERBOSE("unlink %s/%s, error=%d\n",
-				DENTRY_PATH(new_dentry), error);
-			goto out;
-		}
-		/* FIXME */
-		d_delete(new_dentry);
-	}
-
-	smb_invalid_dir_cache(old_dir);
-	smb_invalid_dir_cache(new_dir);
-	error = smb_proc_mv(old_dentry, new_dentry);
-	if (!error) {
-		smb_renew_times(old_dentry);
-		smb_renew_times(new_dentry);
-	}
-out:
-	unlock_kernel();
-	return error;
-}
-
-/*
- * FIXME: samba servers won't let you create device nodes unless uid/gid
- * matches the connection credentials (and we don't know which those are ...)
- */
-static int
-smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
-{
-	int error;
-	struct iattr attr;
-
-	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
-	attr.ia_mode = mode;
-	current_euid_egid(&attr.ia_uid, &attr.ia_gid);
-
-	if (!new_valid_dev(dev))
-		return -EINVAL;
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
-	if (!error) {
-		error = smb_instantiate(dentry, 0, 0);
-	}
-	return error;
-}
-
-/*
- * dentry = existing file
- * new_dentry = new file
- */
-static int
-smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
-{
-	int error;
-
-	DEBUG1("smb_link old=%s/%s new=%s/%s\n",
-	       DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
-	if (!error) {
-		smb_renew_times(dentry);
-		error = smb_instantiate(new_dentry, 0, 0);
-	}
-	return error;
-}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0f94bb..000000000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- *  file.c
- *
- *  Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/aio.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int
-smb_fsync(struct file *file, int datasync)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int result;
-
-	VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
-
-	/*
-	 * The VFS will writepage() all dirty pages for us, but we
-	 * should send a SMBflush to the server, letting it know that
-	 * we want things synchronized with actual storage.
-	 *
-	 * Note: this function requires all pages to have been written already
-	 *       (should be ok with writepage_sync)
-	 */
-	result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
-	return result;
-}
-
-/*
- * Read a page synchronously.
- */
-static int
-smb_readpage_sync(struct dentry *dentry, struct page *page)
-{
-	char *buffer = kmap(page);
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	unsigned int rsize = smb_get_rsize(server);
-	int count = PAGE_SIZE;
-	int result;
-
-	VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
-		DENTRY_PATH(dentry), count, offset, rsize);
-
-	result = smb_open(dentry, SMB_O_RDONLY);
-	if (result < 0)
-		goto io_error;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
-		if (result < 0)
-			goto io_error;
-
-		count -= result;
-		offset += result;
-		buffer += result;
-		dentry->d_inode->i_atime =
-			current_fs_time(dentry->d_inode->i_sb);
-		if (result < rsize)
-			break;
-	} while (count);
-
-	memset(buffer, 0, count);
-	flush_dcache_page(page);
-	SetPageUptodate(page);
-	result = 0;
-
-io_error:
-	kunmap(page);
-	unlock_page(page);
-	return result;
-}
-
-/*
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_readpage(struct file *file, struct page *page)
-{
-	int		error;
-	struct dentry  *dentry = file->f_path.dentry;
-
-	page_cache_get(page);
-	error = smb_readpage_sync(dentry, page);
-	page_cache_release(page);
-	return error;
-}
-
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int
-smb_writepage_sync(struct inode *inode, struct page *page,
-		   unsigned long pageoffset, unsigned int count)
-{
-	loff_t offset;
-	char *buffer = kmap(page) + pageoffset;
-	struct smb_sb_info *server = server_from_inode(inode);
-	unsigned int wsize = smb_get_wsize(server);
-	int ret = 0;
-
-	offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
-	VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
-
-	do {
-		int write_ret;
-
-		if (count < wsize)
-			wsize = count;
-
-		write_ret = server->ops->write(inode, offset, wsize, buffer);
-		if (write_ret < 0) {
-			PARANOIA("failed write, wsize=%d, write_ret=%d\n",
-				 wsize, write_ret);
-			ret = write_ret;
-			break;
-		}
-		/* N.B. what if result < wsize?? */
-#ifdef SMBFS_PARANOIA
-		if (write_ret < wsize)
-			PARANOIA("short write, wsize=%d, write_ret=%d\n",
-				 wsize, write_ret);
-#endif
-		buffer += wsize;
-		offset += wsize;
-		count -= wsize;
-		/*
-		 * Update the inode now rather than waiting for a refresh.
-		 */
-		inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
-		SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
-		if (offset > inode->i_size)
-			inode->i_size = offset;
-	} while (count);
-
-	kunmap(page);
-	return ret;
-}
-
-/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
- *
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct address_space *mapping = page->mapping;
-	struct inode *inode;
-	unsigned long end_index;
-	unsigned offset = PAGE_CACHE_SIZE;
-	int err;
-
-	BUG_ON(!mapping);
-	inode = mapping->host;
-	BUG_ON(!inode);
-
-	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-
-	/* easy case */
-	if (page->index < end_index)
-		goto do_it;
-	/* things got complicated... */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-	/* OK, are we completely out? */
-	if (page->index >= end_index+1 || !offset)
-		return 0; /* truncated - don't care */
-do_it:
-	page_cache_get(page);
-	err = smb_writepage_sync(inode, page, 0, offset);
-	SetPageUptodate(page);
-	unlock_page(page);
-	page_cache_release(page);
-	return err;
-}
-
-static int
-smb_updatepage(struct file *file, struct page *page, unsigned long offset,
-	       unsigned int count)
-{
-	struct dentry *dentry = file->f_path.dentry;
-
-	DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
-		((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
-
-	return smb_writepage_sync(dentry->d_inode, page, offset, count);
-}
-
-static ssize_t
-smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			unsigned long nr_segs, loff_t pos)
-{
-	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_path.dentry;
-	ssize_t	status;
-
-	VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
-		(unsigned long) iocb->ki_left, (unsigned long) pos);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-
-	VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
-		(long)dentry->d_inode->i_size,
-		dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
-
-	status = generic_file_aio_read(iocb, iov, nr_segs, pos);
-out:
-	return status;
-}
-
-static int
-smb_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	struct dentry * dentry = file->f_path.dentry;
-	int	status;
-
-	VERBOSE("file %s/%s, address %lu - %lu\n",
-		DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%d\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-	status = generic_file_mmap(file, vma);
-out:
-	return status;
-}
-
-static ssize_t
-smb_file_splice_read(struct file *file, loff_t *ppos,
-		     struct pipe_inode_info *pipe, size_t count,
-		     unsigned int flags)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	ssize_t status;
-
-	VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
-		DENTRY_PATH(dentry), *ppos, count);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-	status = generic_file_splice_read(file, ppos, pipe, count, flags);
-out:
-	return status;
-}
-
-/*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
- *
- * If the writer ends up delaying the write, the writer needs to
- * increment the page use counts until he is done with the page.
- */
-static int smb_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
-{
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	*pagep = grab_cache_page_write_begin(mapping, index, flags);
-	if (!*pagep)
-		return -ENOMEM;
-	return 0;
-}
-
-static int smb_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
-{
-	int status;
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-
-	lock_kernel();
-	status = smb_updatepage(file, page, offset, copied);
-	unlock_kernel();
-
-	if (!status) {
-		if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
-			SetPageUptodate(page);
-		status = copied;
-	}
-
-	unlock_page(page);
-	page_cache_release(page);
-
-	return status;
-}
-
-const struct address_space_operations smb_file_aops = {
-	.readpage = smb_readpage,
-	.writepage = smb_writepage,
-	.write_begin = smb_write_begin,
-	.write_end = smb_write_end,
-};
-
-/* 
- * Write to a file (through the page cache).
- */
-static ssize_t
-smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos)
-{
-	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_path.dentry;
-	ssize_t	result;
-
-	VERBOSE("file %s/%s, count=%lu@%lu\n",
-		DENTRY_PATH(dentry),
-		(unsigned long) iocb->ki_left, (unsigned long) pos);
-
-	result = smb_revalidate_inode(dentry);
-	if (result) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), result);
-		goto out;
-	}
-
-	result = smb_open(dentry, SMB_O_WRONLY);
-	if (result)
-		goto out;
-
-	if (iocb->ki_left > 0) {
-		result = generic_file_aio_write(iocb, iov, nr_segs, pos);
-		VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
-			(long) file->f_pos, (long) dentry->d_inode->i_size,
-			dentry->d_inode->i_mtime.tv_sec,
-			dentry->d_inode->i_atime.tv_sec);
-	}
-out:
-	return result;
-}
-
-static int
-smb_file_open(struct inode *inode, struct file * file)
-{
-	int result;
-	struct dentry *dentry = file->f_path.dentry;
-	int smb_mode = (file->f_mode & O_ACCMODE) - 1;
-
-	lock_kernel();
-	result = smb_open(dentry, smb_mode);
-	if (result)
-		goto out;
-	SMB_I(inode)->openers++;
-out:
-	unlock_kernel();
-	return result;
-}
-
-static int
-smb_file_release(struct inode *inode, struct file * file)
-{
-	lock_kernel();
-	if (!--SMB_I(inode)->openers) {
-		/* We must flush any dirty pages now as we won't be able to
-		   write anything after close. mmap can trigger this.
-		   "openers" should perhaps include mmap'ers ... */
-		filemap_write_and_wait(inode->i_mapping);
-		smb_close(inode);
-	}
-	unlock_kernel();
-	return 0;
-}
-
-/*
- * Check whether the required access is compatible with
- * an inode's permission. SMB doesn't recognize superuser
- * privileges, so we need our own check for this.
- */
-static int
-smb_file_permission(struct inode *inode, int mask)
-{
-	int mode = inode->i_mode;
-	int error = 0;
-
-	VERBOSE("mode=%x, mask=%x\n", mode, mask);
-
-	/* Look at user permissions */
-	mode >>= 6;
-	if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
-		error = -EACCES;
-	return error;
-}
-
-static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t ret;
-	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
-	unlock_kernel();
-	return ret;
-}
-
-const struct file_operations smb_file_operations =
-{
-	.llseek 	= smb_remote_llseek,
-	.read		= do_sync_read,
-	.aio_read	= smb_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= smb_file_aio_write,
-	.unlocked_ioctl	= smb_ioctl,
-	.mmap		= smb_file_mmap,
-	.open		= smb_file_open,
-	.release	= smb_file_release,
-	.fsync		= smb_fsync,
-	.splice_read	= smb_file_splice_read,
-};
-
-const struct inode_operations smb_file_inode_operations =
-{
-	.permission	= smb_file_permission,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f5273ab1..000000000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * getopt.c
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/net.h>
-
-#include "getopt.h"
-
-/**
- *	smb_getopt - option parser
- *	@caller: name of the caller, for error messages
- *	@options: the options string
- *	@opts: an array of &struct option entries controlling parser operations
- *	@optopt: output; will contain the current option
- *	@optarg: output; will contain the value (if one exists)
- *	@flag: output; may be NULL; should point to a long for or'ing flags
- *	@value: output; may be NULL; will be overwritten with the integer value
- *		of the current argument.
- *
- *	Helper to parse options on the format used by mount ("a=b,c=d,e,f").
- *	Returns opts->val if a matching entry in the 'opts' array is found,
- *	0 when no more tokens are found, -1 if an error is encountered.
- */
-int smb_getopt(char *caller, char **options, struct option *opts,
-	       char **optopt, char **optarg, unsigned long *flag,
-	       unsigned long *value)
-{
-	char *token;
-	char *val;
-	int i;
-
-	do {
-		if ((token = strsep(options, ",")) == NULL)
-			return 0;
-	} while (*token == '\0');
-	*optopt = token;
-
-	*optarg = NULL;
-	if ((val = strchr (token, '=')) != NULL) {
-		*val++ = 0;
-		if (value)
-			*value = simple_strtoul(val, NULL, 0);
-		*optarg = val;
-	}
-
-	for (i = 0; opts[i].name != NULL; i++) {
-		if (!strcmp(opts[i].name, token)) {
-			if (!opts[i].flag && (!val || !*val)) {
-				printk("%s: the %s option requires an argument\n",
-				       caller, token);
-				return -1;
-			}
-
-			if (flag && opts[i].flag)
-				*flag |= opts[i].flag;
-
-			return opts[i].val;
-		}
-	}
-	printk("%s: Unrecognized mount option %s\n", caller, token);
-	return -1;
-}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219ac7c46..000000000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LINUX_GETOPT_H
-#define _LINUX_GETOPT_H
-
-struct option {
-	const char *name;
-	unsigned long flag;
-	int val;
-};
-
-extern int smb_getopt(char *caller, char **options, struct option *opts,
-		      char **optopt, char **optarg, unsigned long *flag,
-		      unsigned long *value);
-
-#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index 450c91941988..000000000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,839 +0,0 @@
-/*
- *  inode.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/smp_lock.h>
-#include <linux/nls.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/highuid.h>
-#include <linux/sched.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "getopt.h"
-#include "proto.h"
-
-/* Always pick a default string */
-#ifdef CONFIG_SMB_NLS_REMOTE
-#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
-#else
-#define SMB_NLS_REMOTE ""
-#endif
-
-#define SMB_TTL_DEFAULT 1000
-
-static void smb_evict_inode(struct inode *);
-static void smb_put_super(struct super_block *);
-static int  smb_statfs(struct dentry *, struct kstatfs *);
-static int  smb_show_options(struct seq_file *, struct vfsmount *);
-
-static struct kmem_cache *smb_inode_cachep;
-
-static struct inode *smb_alloc_inode(struct super_block *sb)
-{
-	struct smb_inode_info *ei;
-	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
-	if (!ei)
-		return NULL;
-	return &ei->vfs_inode;
-}
-
-static void smb_destroy_inode(struct inode *inode)
-{
-	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
-}
-
-static void init_once(void *foo)
-{
-	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
-
-	inode_init_once(&ei->vfs_inode);
-}
-
-static int init_inodecache(void)
-{
-	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
-					     sizeof(struct smb_inode_info),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     init_once);
-	if (smb_inode_cachep == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-static void destroy_inodecache(void)
-{
-	kmem_cache_destroy(smb_inode_cachep);
-}
-
-static int smb_remount(struct super_block *sb, int *flags, char *data)
-{
-	*flags |= MS_NODIRATIME;
-	return 0;
-}
-
-static const struct super_operations smb_sops =
-{
-	.alloc_inode	= smb_alloc_inode,
-	.destroy_inode	= smb_destroy_inode,
-	.drop_inode	= generic_delete_inode,
-	.evict_inode	= smb_evict_inode,
-	.put_super	= smb_put_super,
-	.statfs		= smb_statfs,
-	.show_options	= smb_show_options,
-	.remount_fs	= smb_remount,
-};
-
-
-/* We are always generating a new inode here */
-struct inode *
-smb_iget(struct super_block *sb, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = SMB_SB(sb);
-	struct inode *result;
-
-	DEBUG1("smb_iget: %p\n", fattr);
-
-	result = new_inode(sb);
-	if (!result)
-		return result;
-	result->i_ino = fattr->f_ino;
-	SMB_I(result)->open = 0;
-	SMB_I(result)->fileid = 0;
-	SMB_I(result)->access = 0;
-	SMB_I(result)->flags = 0;
-	SMB_I(result)->closed = 0;
-	SMB_I(result)->openers = 0;
-	smb_set_inode_attr(result, fattr);
-	if (S_ISREG(result->i_mode)) {
-		result->i_op = &smb_file_inode_operations;
-		result->i_fop = &smb_file_operations;
-		result->i_data.a_ops = &smb_file_aops;
-	} else if (S_ISDIR(result->i_mode)) {
-		if (server->opt.capabilities & SMB_CAP_UNIX)
-			result->i_op = &smb_dir_inode_operations_unix;
-		else
-			result->i_op = &smb_dir_inode_operations;
-		result->i_fop = &smb_dir_operations;
-	} else if (S_ISLNK(result->i_mode)) {
-		result->i_op = &smb_link_inode_operations;
-	} else {
-		init_special_inode(result, result->i_mode, fattr->f_rdev);
-	}
-	insert_inode_hash(result);
-	return result;
-}
-
-/*
- * Copy the inode data to a smb_fattr structure.
- */
-void
-smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
-	memset(fattr, 0, sizeof(struct smb_fattr));
-	fattr->f_mode	= inode->i_mode;
-	fattr->f_nlink	= inode->i_nlink;
-	fattr->f_ino	= inode->i_ino;
-	fattr->f_uid	= inode->i_uid;
-	fattr->f_gid	= inode->i_gid;
-	fattr->f_size	= inode->i_size;
-	fattr->f_mtime	= inode->i_mtime;
-	fattr->f_ctime	= inode->i_ctime;
-	fattr->f_atime	= inode->i_atime;
-	fattr->f_blocks	= inode->i_blocks;
-
-	fattr->attr	= SMB_I(inode)->attr;
-	/*
-	 * Keep the attributes in sync with the inode permissions.
-	 */
-	if (fattr->f_mode & S_IWUSR)
-		fattr->attr &= ~aRONLY;
-	else
-		fattr->attr |= aRONLY;
-}
-
-/*
- * Update the inode, possibly causing it to invalidate its pages if mtime/size
- * is different from last time.
- */
-void
-smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
-	struct smb_inode_info *ei = SMB_I(inode);
-
-	/*
-	 * A size change should have a different mtime, or same mtime
-	 * but different size.
-	 */
-	time_t last_time = inode->i_mtime.tv_sec;
-	loff_t last_sz = inode->i_size;
-
-	inode->i_mode	= fattr->f_mode;
-	inode->i_nlink	= fattr->f_nlink;
-	inode->i_uid	= fattr->f_uid;
-	inode->i_gid	= fattr->f_gid;
-	inode->i_ctime	= fattr->f_ctime;
-	inode->i_blocks = fattr->f_blocks;
-	inode->i_size	= fattr->f_size;
-	inode->i_mtime	= fattr->f_mtime;
-	inode->i_atime	= fattr->f_atime;
-	ei->attr = fattr->attr;
-
-	/*
-	 * Update the "last time refreshed" field for revalidation.
-	 */
-	ei->oldmtime = jiffies;
-
-	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
-		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
-			inode->i_ino,
-			(long) last_time, (long) inode->i_mtime.tv_sec,
-			(long) last_sz, (long) inode->i_size);
-
-		if (!S_ISDIR(inode->i_mode))
-			invalidate_remote_inode(inode);
-	}
-}
-
-/*
- * This is called if the connection has gone bad ...
- * try to kill off all the current inodes.
- */
-void
-smb_invalidate_inodes(struct smb_sb_info *server)
-{
-	VERBOSE("\n");
-	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
-}
-
-/*
- * This is called to update the inode attributes after
- * we've made changes to a file or directory.
- */
-static int
-smb_refresh_inode(struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-	struct smb_fattr fattr;
-
-	error = smb_proc_getattr(dentry, &fattr);
-	if (!error) {
-		smb_renew_times(dentry);
-		/*
-		 * Check whether the type part of the mode changed,
-		 * and don't update the attributes if it did.
-		 *
-		 * And don't dick with the root inode
-		 */
-		if (inode->i_ino == 2)
-			return error;
-		if (S_ISLNK(inode->i_mode))
-			return error;	/* VFS will deal with it */
-
-		if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
-			smb_set_inode_attr(inode, &fattr);
-		} else {
-			/*
-			 * Big trouble! The inode has become a new object,
-			 * so any operations attempted on it are invalid.
-			 *
-			 * To limit damage, mark the inode as bad so that
-			 * subsequent lookup validations will fail.
-			 */
-			PARANOIA("%s/%s changed mode, %07o to %07o\n",
-				 DENTRY_PATH(dentry),
-				 inode->i_mode, fattr.f_mode);
-
-			fattr.f_mode = inode->i_mode; /* save mode */
-			make_bad_inode(inode);
-			inode->i_mode = fattr.f_mode; /* restore mode */
-			/*
-			 * No need to worry about unhashing the dentry: the
-			 * lookup validation will see that the inode is bad.
-			 * But we do want to invalidate the caches ...
-			 */
-			if (!S_ISDIR(inode->i_mode))
-				invalidate_remote_inode(inode);
-			else
-				smb_invalid_dir_cache(inode);
-			error = -EIO;
-		}
-	}
-	return error;
-}
-
-/*
- * This is called when we want to check whether the inode
- * has changed on the server.  If it has changed, we must
- * invalidate our local caches.
- */
-int
-smb_revalidate_inode(struct dentry *dentry)
-{
-	struct smb_sb_info *s = server_from_dentry(dentry);
-	struct inode *inode = dentry->d_inode;
-	int error = 0;
-
-	DEBUG1("smb_revalidate_inode\n");
-	lock_kernel();
-
-	/*
-	 * Check whether we've recently refreshed the inode.
-	 */
-	if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
-		VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
-			inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
-		goto out;
-	}
-
-	error = smb_refresh_inode(dentry);
-out:
-	unlock_kernel();
-	return error;
-}
-
-/*
- * This routine is called when i_nlink == 0 and i_count goes to 0.
- * All blocking cleanup operations need to go here to avoid races.
- */
-static void
-smb_evict_inode(struct inode *ino)
-{
-	DEBUG1("ino=%ld\n", ino->i_ino);
-	truncate_inode_pages(&ino->i_data, 0);
-	end_writeback(ino);
-	lock_kernel();
-	if (smb_close(ino))
-		PARANOIA("could not close inode %ld\n", ino->i_ino);
-	unlock_kernel();
-}
-
-static struct option opts[] = {
-	{ "version",	0, 'v' },
-	{ "win95",	SMB_MOUNT_WIN95, 1 },
-	{ "oldattr",	SMB_MOUNT_OLDATTR, 1 },
-	{ "dirattr",	SMB_MOUNT_DIRATTR, 1 },
-	{ "case",	SMB_MOUNT_CASE, 1 },
-	{ "uid",	0, 'u' },
-	{ "gid",	0, 'g' },
-	{ "file_mode",	0, 'f' },
-	{ "dir_mode",	0, 'd' },
-	{ "iocharset",	0, 'i' },
-	{ "codepage",	0, 'c' },
-	{ "ttl",	0, 't' },
-	{ NULL,		0, 0}
-};
-
-static int
-parse_options(struct smb_mount_data_kernel *mnt, char *options)
-{
-	int c;
-	unsigned long flags;
-	unsigned long value;
-	char *optarg;
-	char *optopt;
-
-	flags = 0;
-	while ( (c = smb_getopt("smbfs", &options, opts,
-				&optopt, &optarg, &flags, &value)) > 0) {
-
-		VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
-		switch (c) {
-		case 1:
-			/* got a "flag" option */
-			break;
-		case 'v':
-			if (value != SMB_MOUNT_VERSION) {
-			printk ("smbfs: Bad mount version %ld, expected %d\n",
-				value, SMB_MOUNT_VERSION);
-				return 0;
-			}
-			mnt->version = value;
-			break;
-		case 'u':
-			mnt->uid = value;
-			flags |= SMB_MOUNT_UID;
-			break;
-		case 'g':
-			mnt->gid = value;
-			flags |= SMB_MOUNT_GID;
-			break;
-		case 'f':
-			mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
-			flags |= SMB_MOUNT_FMODE;
-			break;
-		case 'd':
-			mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
-			flags |= SMB_MOUNT_DMODE;
-			break;
-		case 'i':
-			strlcpy(mnt->codepage.local_name, optarg, 
-				SMB_NLS_MAXNAMELEN);
-			break;
-		case 'c':
-			strlcpy(mnt->codepage.remote_name, optarg,
-				SMB_NLS_MAXNAMELEN);
-			break;
-		case 't':
-			mnt->ttl = value;
-			break;
-		default:
-			printk ("smbfs: Unrecognized mount option %s\n",
-				optopt);
-			return -1;
-		}
-	}
-	mnt->flags = flags;
-	return c;
-}
-
-/*
- * smb_show_options() is for displaying mount options in /proc/mounts.
- * It tries to avoid showing settings that were not changed from their
- * defaults.
- */
-static int
-smb_show_options(struct seq_file *s, struct vfsmount *m)
-{
-	struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
-	int i;
-
-	for (i = 0; opts[i].name != NULL; i++)
-		if (mnt->flags & opts[i].flag)
-			seq_printf(s, ",%s", opts[i].name);
-
-	if (mnt->flags & SMB_MOUNT_UID)
-		seq_printf(s, ",uid=%d", mnt->uid);
-	if (mnt->flags & SMB_MOUNT_GID)
-		seq_printf(s, ",gid=%d", mnt->gid);
-	if (mnt->mounted_uid != 0)
-		seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
-
-	/* 
-	 * Defaults for file_mode and dir_mode are unknown to us; they
-	 * depend on the current umask of the user doing the mount.
-	 */
-	if (mnt->flags & SMB_MOUNT_FMODE)
-		seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
-	if (mnt->flags & SMB_MOUNT_DMODE)
-		seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
-
-	if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
-		seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
-	if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
-		seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
-
-	if (mnt->ttl != SMB_TTL_DEFAULT)
-		seq_printf(s, ",ttl=%d", mnt->ttl);
-
-	return 0;
-}
-
-static void
-smb_unload_nls(struct smb_sb_info *server)
-{
-	unload_nls(server->remote_nls);
-	unload_nls(server->local_nls);
-}
-
-static void
-smb_put_super(struct super_block *sb)
-{
-	struct smb_sb_info *server = SMB_SB(sb);
-
-	lock_kernel();
-
-	smb_lock_server(server);
-	server->state = CONN_INVALID;
-	smbiod_unregister_server(server);
-
-	smb_close_socket(server);
-
-	if (server->conn_pid)
-		kill_pid(server->conn_pid, SIGTERM, 1);
-
-	bdi_destroy(&server->bdi);
-	kfree(server->ops);
-	smb_unload_nls(server);
-	sb->s_fs_info = NULL;
-	smb_unlock_server(server);
-	put_pid(server->conn_pid);
-	kfree(server);
-
-	unlock_kernel();
-}
-
-static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
-{
-	struct smb_sb_info *server;
-	struct smb_mount_data_kernel *mnt;
-	struct smb_mount_data *oldmnt;
-	struct inode *root_inode;
-	struct smb_fattr root;
-	int ver;
-	void *mem;
-	static int warn_count;
-
-	if (warn_count < 5) {
-		warn_count++;
-		printk(KERN_EMERG "smbfs is deprecated and will be removed"
-			" from the 2.6.27 kernel. Please migrate to cifs\n");
-	}
-
-	if (!raw_data)
-		goto out_no_data;
-
-	oldmnt = (struct smb_mount_data *) raw_data;
-	ver = oldmnt->version;
-	if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
-		goto out_wrong_data;
-
-	sb->s_flags |= MS_NODIRATIME;
-	sb->s_blocksize = 1024;	/* Eh...  Is this correct? */
-	sb->s_blocksize_bits = 10;
-	sb->s_magic = SMB_SUPER_MAGIC;
-	sb->s_op = &smb_sops;
-	sb->s_time_gran = 100;
-
-	server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
-	if (!server)
-		goto out_no_server;
-	sb->s_fs_info = server;
-	
-	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
-		goto out_bdi;
-
-	sb->s_bdi = &server->bdi;
-
-	server->super_block = sb;
-	server->mnt = NULL;
-	server->sock_file = NULL;
-	init_waitqueue_head(&server->conn_wq);
-	init_MUTEX(&server->sem);
-	INIT_LIST_HEAD(&server->entry);
-	INIT_LIST_HEAD(&server->xmitq);
-	INIT_LIST_HEAD(&server->recvq);
-	server->conn_error = 0;
-	server->conn_pid = NULL;
-	server->state = CONN_INVALID; /* no connection yet */
-	server->generation = 0;
-
-	/* Allocate the global temp buffer and some superblock helper structs */
-	/* FIXME: move these to the smb_sb_info struct */
-	VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
-		sizeof(struct smb_mount_data_kernel));
-	mem = kmalloc(sizeof(struct smb_ops) +
-		      sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
-	if (!mem)
-		goto out_no_mem;
-
-	server->ops = mem;
-	smb_install_null_ops(server->ops);
-	server->mnt = mem + sizeof(struct smb_ops);
-
-	/* Setup NLS stuff */
-	server->remote_nls = NULL;
-	server->local_nls = NULL;
-
-	mnt = server->mnt;
-
-	memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
-	strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
-		SMB_NLS_MAXNAMELEN);
-	strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
-		SMB_NLS_MAXNAMELEN);
-
-	mnt->ttl = SMB_TTL_DEFAULT;
-	if (ver == SMB_MOUNT_OLDVERSION) {
-		mnt->version = oldmnt->version;
-
-		SET_UID(mnt->uid, oldmnt->uid);
-		SET_GID(mnt->gid, oldmnt->gid);
-
-		mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
-		mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-
-		mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
-			SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
-	} else {
-		mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
-				S_IROTH | S_IXOTH | S_IFREG;
-		mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
-				S_IROTH | S_IXOTH | S_IFDIR;
-		if (parse_options(mnt, raw_data))
-			goto out_bad_option;
-	}
-	mnt->mounted_uid = current_uid();
-	smb_setcodepage(server, &mnt->codepage);
-
-	/*
-	 * Display the enabled options
-	 * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
-	 */
-	if (mnt->flags & SMB_MOUNT_OLDATTR)
-		printk("SMBFS: Using core getattr (Win 95 speedup)\n");
-	else if (mnt->flags & SMB_MOUNT_DIRATTR)
-		printk("SMBFS: Using dir ff getattr\n");
-
-	if (smbiod_register_server(server) < 0) {
-		printk(KERN_ERR "smbfs: failed to start smbiod\n");
-		goto out_no_smbiod;
-	}
-
-	/*
-	 * Keep the super block locked while we get the root inode.
-	 */
-	smb_init_root_dirent(server, &root, sb);
-	root_inode = smb_iget(sb, &root);
-	if (!root_inode)
-		goto out_no_root;
-
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_no_root;
-
-	smb_new_dentry(sb->s_root);
-
-	return 0;
-
-out_no_root:
-	iput(root_inode);
-out_no_smbiod:
-	smb_unload_nls(server);
-out_bad_option:
-	kfree(mem);
-out_no_mem:
-	bdi_destroy(&server->bdi);
-out_bdi:
-	if (!server->mnt)
-		printk(KERN_ERR "smb_fill_super: allocation failure\n");
-	sb->s_fs_info = NULL;
-	kfree(server);
-	goto out_fail;
-out_wrong_data:
-	printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
-	goto out_fail;
-out_no_data:
-	printk(KERN_ERR "smb_fill_super: missing data argument\n");
-out_fail:
-	return -EINVAL;
-out_no_server:
-	printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
-	return -ENOMEM;
-}
-
-static int
-smb_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	int result;
-	
-	lock_kernel();
-
-	result = smb_proc_dskattr(dentry, buf);
-
-	unlock_kernel();
-
-	buf->f_type = SMB_SUPER_MAGIC;
-	buf->f_namelen = SMB_MAXPATHLEN;
-	return result;
-}
-
-int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
-{
-	int err = smb_revalidate_inode(dentry);
-	if (!err)
-		generic_fillattr(dentry->d_inode, stat);
-	return err;
-}
-
-int
-smb_notify_change(struct dentry *dentry, struct iattr *attr)
-{
-	struct inode *inode = dentry->d_inode;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
-	int error, changed, refresh = 0;
-	struct smb_fattr fattr;
-
-	lock_kernel();
-
-	error = smb_revalidate_inode(dentry);
-	if (error)
-		goto out;
-
-	if ((error = inode_change_ok(inode, attr)) < 0)
-		goto out;
-
-	error = -EPERM;
-	if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_SIZE) != 0) {
-		VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
-			DENTRY_PATH(dentry),
-			(long) inode->i_size, (long) attr->ia_size);
-
-		filemap_write_and_wait(inode->i_mapping);
-
-		error = smb_open(dentry, O_WRONLY);
-		if (error)
-			goto out;
-		error = server->ops->truncate(inode, attr->ia_size);
-		if (error)
-			goto out;
-		truncate_setsize(inode, attr->ia_size);
-		refresh = 1;
-	}
-
-	if (server->opt.capabilities & SMB_CAP_UNIX) {
-		/* For now we don't want to set the size with setattr_unix */
-		attr->ia_valid &= ~ATTR_SIZE;
-		/* FIXME: only call if we actually want to set something? */
-		error = smb_proc_setattr_unix(dentry, attr, 0, 0);
-		if (!error)
-			refresh = 1;
-
-		goto out;
-	}
-
-	/*
-	 * Initialize the fattr and check for changed fields.
-	 * Note: CTIME under SMB is creation time rather than
-	 * change time, so we don't attempt to change it.
-	 */
-	smb_get_inode_attr(inode, &fattr);
-
-	changed = 0;
-	if ((attr->ia_valid & ATTR_MTIME) != 0) {
-		fattr.f_mtime = attr->ia_mtime;
-		changed = 1;
-	}
-	if ((attr->ia_valid & ATTR_ATIME) != 0) {
-		fattr.f_atime = attr->ia_atime;
-		/* Earlier protocols don't have an access time */
-		if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
-			changed = 1;
-	}
-	if (changed) {
-		error = smb_proc_settime(dentry, &fattr);
-		if (error)
-			goto out;
-		refresh = 1;
-	}
-
-	/*
-	 * Check for mode changes ... we're extremely limited in
-	 * what can be set for SMB servers: just the read-only bit.
-	 */
-	if ((attr->ia_valid & ATTR_MODE) != 0) {
-		VERBOSE("%s/%s mode change, old=%x, new=%x\n",
-			DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
-		changed = 0;
-		if (attr->ia_mode & S_IWUSR) {
-			if (fattr.attr & aRONLY) {
-				fattr.attr &= ~aRONLY;
-				changed = 1;
-			}
-		} else {
-			if (!(fattr.attr & aRONLY)) {
-				fattr.attr |= aRONLY;
-				changed = 1;
-			}
-		}
-		if (changed) {
-			error = smb_proc_setattr(dentry, &fattr);
-			if (error)
-				goto out;
-			refresh = 1;
-		}
-	}
-	error = 0;
-
-out:
-	if (refresh)
-		smb_refresh_inode(dentry);
-	unlock_kernel();
-	return error;
-}
-
-static int smb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
-}
-
-static struct file_system_type smb_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "smbfs",
-	.get_sb		= smb_get_sb,
-	.kill_sb	= kill_anon_super,
-	.fs_flags	= FS_BINARY_MOUNTDATA,
-};
-
-static int __init init_smb_fs(void)
-{
-	int err;
-	DEBUG1("registering ...\n");
-
-	err = init_inodecache();
-	if (err)
-		goto out_inode;
-	err = smb_init_request_cache();
-	if (err)
-		goto out_request;
-	err = register_filesystem(&smb_fs_type);
-	if (err)
-		goto out;
-	return 0;
-out:
-	smb_destroy_request_cache();
-out_request:
-	destroy_inodecache();
-out_inode:
-	return err;
-}
-
-static void __exit exit_smb_fs(void)
-{
-	DEBUG1("unregistering ...\n");
-	unregister_filesystem(&smb_fs_type);
-	smb_destroy_request_cache();
-	destroy_inodecache();
-}
-
-module_init(init_smb_fs)
-module_exit(exit_smb_fs)
-MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 07215312ad39..000000000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  ioctl.c
- *
- *  Copyright (C) 1995, 1996 by Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/ioctl.h>
-#include <linux/time.h>
-#include <linux/mm.h>
-#include <linux/highuid.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-
-#include <asm/uaccess.h>
-
-#include "proto.h"
-
-long
-smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
-	struct smb_conn_opt opt;
-	int result = -EINVAL;
-
-	lock_kernel();
-	switch (cmd) {
-		uid16_t uid16;
-		uid_t uid32;
-	case SMB_IOC_GETMOUNTUID:
-		SET_UID(uid16, server->mnt->mounted_uid);
-		result = put_user(uid16, (uid16_t __user *) arg);
-		break;
-	case SMB_IOC_GETMOUNTUID32:
-		SET_UID(uid32, server->mnt->mounted_uid);
-		result = put_user(uid32, (uid_t __user *) arg);
-		break;
-
-	case SMB_IOC_NEWCONN:
-		/* arg is smb_conn_opt, or NULL if no connection was made */
-		if (!arg) {
-			result = 0;
-			smb_lock_server(server);
-			server->state = CONN_RETRIED;
-			printk(KERN_ERR "Connection attempt failed!  [%d]\n",
-			       server->conn_error);
-			smbiod_flush(server);
-			smb_unlock_server(server);
-			break;
-		}
-
-		result = -EFAULT;
-		if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
-			result = smb_newconn(server, &opt);
-		break;
-	default:
-		break;
-	}
-	unlock_kernel();
-
-	return result;
-}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 71c29b6670b4..000000000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3507 +0,0 @@
-/*
- *  proc.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/dcache.h>
-#include <linux/nls.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <net/sock.h>
-
-#include <asm/string.h>
-#include <asm/div64.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-/* Features. Undefine if they cause problems, this should perhaps be a
-   config option. */
-#define SMBFS_POSIX_UNLINK 1
-
-/* Allow smb_retry to be interrupted. */
-#define SMB_RETRY_INTR
-
-#define SMB_VWV(packet)  ((packet) + SMB_HEADER_LEN)
-#define SMB_CMD(packet)  (*(packet+8))
-#define SMB_WCT(packet)  (*(packet+SMB_HEADER_LEN - 1))
-
-#define SMB_DIRINFO_SIZE 43
-#define SMB_STATUS_SIZE  21
-
-#define SMB_ST_BLKSIZE	(PAGE_SIZE)
-#define SMB_ST_BLKSHIFT	(PAGE_SHIFT)
-
-static struct smb_ops smb_ops_core;
-static struct smb_ops smb_ops_os2;
-static struct smb_ops smb_ops_win95;
-static struct smb_ops smb_ops_winNT;
-static struct smb_ops smb_ops_unix;
-static struct smb_ops smb_ops_null;
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *fattr);
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
-		    struct smb_fattr *fattr);
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
-		      u16 attr);
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
-		     struct inode *inode, struct smb_fattr *fattr);
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server);
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src);
-
-
-static void
-str_upper(char *name, int len)
-{
-	while (len--)
-	{
-		if (*name >= 'a' && *name <= 'z')
-			*name -= ('a' - 'A');
-		name++;
-	}
-}
-
-#if 0
-static void
-str_lower(char *name, int len)
-{
-	while (len--)
-	{
-		if (*name >= 'A' && *name <= 'Z')
-			*name += ('a' - 'A');
-		name++;
-	}
-}
-#endif
-
-/* reverse a string inline. This is used by the dircache walking routines */
-static void reverse_string(char *buf, int len)
-{
-	char c;
-	char *end = buf+len-1;
-
-	while(buf < end) {
-		c = *buf;
-		*(buf++) = *end;
-		*(end--) = c;
-	}
-}
-
-/* no conversion, just a wrapper for memcpy. */
-static int convert_memcpy(unsigned char *output, int olen,
-			  const unsigned char *input, int ilen,
-			  struct nls_table *nls_from,
-			  struct nls_table *nls_to)
-{
-	if (olen < ilen)
-		return -ENAMETOOLONG;
-	memcpy(output, input, ilen);
-	return ilen;
-}
-
-static inline int write_char(unsigned char ch, char *output, int olen)
-{
-	if (olen < 4)
-		return -ENAMETOOLONG;
-	sprintf(output, ":x%02x", ch);
-	return 4;
-}
-
-static inline int write_unichar(wchar_t ch, char *output, int olen)
-{
-	if (olen < 5)
-		return -ENAMETOOLONG;
-	sprintf(output, ":%04x", ch);
-	return 5;
-}
-
-/* convert from one "codepage" to another (possibly being utf8). */
-static int convert_cp(unsigned char *output, int olen,
-		      const unsigned char *input, int ilen,
-		      struct nls_table *nls_from,
-		      struct nls_table *nls_to)
-{
-	int len = 0;
-	int n;
-	wchar_t ch;
-
-	while (ilen > 0) {
-		/* convert by changing to unicode and back to the new cp */
-		n = nls_from->char2uni(input, ilen, &ch);
-		if (n == -EINVAL) {
-			ilen--;
-			n = write_char(*input++, output, olen);
-			if (n < 0)
-				goto fail;
-			output += n;
-			olen -= n;
-			len += n;
-			continue;
-		} else if (n < 0)
-			goto fail;
-		input += n;
-		ilen -= n;
-
-		n = nls_to->uni2char(ch, output, olen);
-		if (n == -EINVAL)
-			n = write_unichar(ch, output, olen);
-		if (n < 0)
-			goto fail;
-		output += n;
-		olen -= n;
-
-		len += n;
-	}
-	return len;
-fail:
-	return n;
-}
-
-/* ----------------------------------------------------------- */
-
-/*
- * nls_unicode
- *
- * This encodes/decodes little endian unicode format
- */
-
-static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
-{
-	if (boundlen < 2)
-		return -EINVAL;
-	*out++ = uni & 0xff;
-	*out++ = uni >> 8;
-	return 2;
-}
-
-static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
-{
-	if (boundlen < 2)
-		return -EINVAL;
-	*uni = (rawstring[1] << 8) | rawstring[0];
-	return 2;
-}
-
-static struct nls_table unicode_table = {
-	.charset	= "unicode",
-	.uni2char	= uni2char,
-	.char2uni	= char2uni,
-};
-
-/* ----------------------------------------------------------- */
-
-static int setcodepage(struct nls_table **p, char *name)
-{
-	struct nls_table *nls;
-
-	if (!name || !*name) {
-		nls = NULL;
-	} else if ( (nls = load_nls(name)) == NULL) {
-		printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
-		return -EINVAL;
-	}
-
-	/* if already set, unload the previous one. */
-	if (*p && *p != &unicode_table)
-		unload_nls(*p);
-	*p = nls;
-
-	return 0;
-}
-
-/* Handles all changes to codepage settings. */
-int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
-{
-	int n = 0;
-
-	smb_lock_server(server);
-
-	/* Don't load any nls_* at all, if no remote is requested */
-	if (!*cp->remote_name)
-		goto out;
-
-	/* local */
-	n = setcodepage(&server->local_nls, cp->local_name);
-	if (n != 0)
-		goto out;
-
-	/* remote */
-	if (!strcmp(cp->remote_name, "unicode")) {
-		server->remote_nls = &unicode_table;
-	} else {
-		n = setcodepage(&server->remote_nls, cp->remote_name);
-		if (n != 0)
-			setcodepage(&server->local_nls, NULL);
-	}
-
-out:
-	if (server->local_nls != NULL && server->remote_nls != NULL)
-		server->ops->convert = convert_cp;
-	else
-		server->ops->convert = convert_memcpy;
-
-	smb_unlock_server(server);
-	return n;
-}
-
-
-/*****************************************************************************/
-/*                                                                           */
-/*  Encoding/Decoding section                                                */
-/*                                                                           */
-/*****************************************************************************/
-
-static __u8 *
-smb_encode_smb_length(__u8 * p, __u32 len)
-{
-	*p = 0;
-	*(p+1) = 0;
-	*(p+2) = (len & 0xFF00) >> 8;
-	*(p+3) = (len & 0xFF);
-	if (len > 0xFFFF)
-	{
-		*(p+1) = 1;
-	}
-	return p + 4;
-}
-
-/*
- * smb_build_path: build the path to entry and name storing it in buf.
- * The path returned will have the trailing '\0'.
- */
-static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
-			  int maxlen,
-			  struct dentry *entry, struct qstr *name)
-{
-	unsigned char *path = buf;
-	int len;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
-
-	if (maxlen < (2<<unicode))
-		return -ENAMETOOLONG;
-
-	if (maxlen > SMB_MAXPATHLEN + 1)
-		maxlen = SMB_MAXPATHLEN + 1;
-
-	if (entry == NULL)
-		goto test_name_and_out;
-
-	/*
-	 * If IS_ROOT, we have to do no walking at all.
-	 */
-	if (IS_ROOT(entry) && !name) {
-		*path++ = '\\';
-		if (unicode) *path++ = '\0';
-		*path++ = '\0';
-		if (unicode) *path++ = '\0';
-		return path-buf;
-	}
-
-	/*
-	 * Build the path string walking the tree backward from end to ROOT
-	 * and store it in reversed order [see reverse_string()]
-	 */
-	dget(entry);
-	spin_lock(&entry->d_lock);
-	while (!IS_ROOT(entry)) {
-		struct dentry *parent;
-
-		if (maxlen < (3<<unicode)) {
-			spin_unlock(&entry->d_lock);
-			dput(entry);
-			return -ENAMETOOLONG;
-		}
-
-		len = server->ops->convert(path, maxlen-2, 
-				      entry->d_name.name, entry->d_name.len,
-				      server->local_nls, server->remote_nls);
-		if (len < 0) {
-			spin_unlock(&entry->d_lock);
-			dput(entry);
-			return len;
-		}
-		reverse_string(path, len);
-		path += len;
-		if (unicode) {
-			/* Note: reverse order */
-			*path++ = '\0';
-			maxlen--;
-		}
-		*path++ = '\\';
-		maxlen -= len+1;
-
-		parent = entry->d_parent;
-		dget(parent);
-		spin_unlock(&entry->d_lock);
-		dput(entry);
-		entry = parent;
-		spin_lock(&entry->d_lock);
-	}
-	spin_unlock(&entry->d_lock);
-	dput(entry);
-	reverse_string(buf, path-buf);
-
-	/* maxlen has space for at least one char */
-test_name_and_out:
-	if (name) {
-		if (maxlen < (3<<unicode))
-			return -ENAMETOOLONG;
-		*path++ = '\\';
-		if (unicode) {
-			*path++ = '\0';
-			maxlen--;
-		}
-		len = server->ops->convert(path, maxlen-2, 
-				      name->name, name->len,
-				      server->local_nls, server->remote_nls);
-		if (len < 0)
-			return len;
-		path += len;
-		maxlen -= len+1;
-	}
-	/* maxlen has space for at least one char */
-	*path++ = '\0';
-	if (unicode) *path++ = '\0';
-	return path-buf;
-}
-
-static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
-			   struct dentry *dir, struct qstr *name)
-{
-	int result;
-
-	result = smb_build_path(server, buf, maxlen, dir, name);
-	if (result < 0)
-		goto out;
-	if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
-		str_upper(buf, result);
-out:
-	return result;
-}
-
-/* encode_path for non-trans2 request SMBs */
-static int smb_simple_encode_path(struct smb_request *req, char **p,
-				  struct dentry * entry, struct qstr * name)
-{
-	struct smb_sb_info *server = req->rq_server;
-	char *s = *p;
-	int res;
-	int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
-	if (!maxlen)
-		return -ENAMETOOLONG;
-	*s++ = 4;	/* ASCII data format */
-
-	/*
-	 * SMB Unicode strings must be 16bit aligned relative the start of the
-	 * packet. If they are not they must be padded with 0.
-	 */
-	if (unicode) {
-		int align = s - (char *)req->rq_buffer;
-		if (!(align & 1)) {
-			*s++ = '\0';
-			maxlen--;
-		}
-	}
-
-	res = smb_encode_path(server, s, maxlen-1, entry, name);
-	if (res < 0)
-		return res;
-	*p = s + res;
-	return 0;
-}
-
-/* The following are taken directly from msdos-fs */
-
-/* Linear day numbers of the respective 1sts in non-leap years. */
-
-static int day_n[] =
-{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
-		  /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
-
-
-static time_t
-utc2local(struct smb_sb_info *server, time_t time)
-{
-	return time - server->opt.serverzone*60;
-}
-
-static time_t
-local2utc(struct smb_sb_info *server, time_t time)
-{
-	return time + server->opt.serverzone*60;
-}
-
-/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-
-static time_t
-date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
-{
-	int month, year;
-	time_t secs;
-
-	/* first subtract and mask after that... Otherwise, if
-	   date == 0, bad things happen */
-	month = ((date >> 5) - 1) & 15;
-	year = date >> 9;
-	secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
-	    ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
-						   month < 2 ? 1 : 0) + 3653);
-	/* days since 1.1.70 plus 80's leap day */
-	return local2utc(server, secs);
-}
-
-
-/* Convert linear UNIX date to a MS-DOS time/date pair. */
-
-static void
-date_unix2dos(struct smb_sb_info *server,
-	      int unix_date, __u16 *date, __u16 *time)
-{
-	int day, year, nl_day, month;
-
-	unix_date = utc2local(server, unix_date);
-	if (unix_date < 315532800)
-		unix_date = 315532800;
-
-	*time = (unix_date % 60) / 2 +
-		(((unix_date / 60) % 60) << 5) +
-		(((unix_date / 3600) % 24) << 11);
-
-	day = unix_date / 86400 - 3652;
-	year = day / 365;
-	if ((year + 3) / 4 + 365 * year > day)
-		year--;
-	day -= (year + 3) / 4 + 365 * year;
-	if (day == 59 && !(year & 3)) {
-		nl_day = day;
-		month = 2;
-	} else {
-		nl_day = (year & 3) || day <= 59 ? day : day - 1;
-		for (month = 1; month < 12; month++)
-			if (day_n[month] > nl_day)
-				break;
-	}
-	*date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
-}
-
-/* The following are taken from fs/ntfs/util.c */
-
-#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
-
-/*
- * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
- * into Unix UTC (based 1970-01-01, in seconds).
- */
-static struct timespec
-smb_ntutc2unixutc(u64 ntutc)
-{
-	struct timespec ts;
-	/* FIXME: what about the timezone difference? */
-	/* Subtract the NTFS time offset, then convert to 1s intervals. */
-	u64 t = ntutc - NTFS_TIME_OFFSET;
-	ts.tv_nsec = do_div(t, 10000000) * 100;
-	ts.tv_sec = t; 
-	return ts;
-}
-
-/* Convert the Unix UTC into NT time */
-static u64
-smb_unixutc2ntutc(struct timespec ts)
-{
-	/* Note: timezone conversion is probably wrong. */
-	/* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
-	return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
-}
-
-#define MAX_FILE_MODE	6
-static mode_t file_mode[] = {
-	S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
-};
-
-static int smb_filetype_to_mode(u32 filetype)
-{
-	if (filetype > MAX_FILE_MODE) {
-		PARANOIA("Filetype out of range: %d\n", filetype);
-		return S_IFREG;
-	}
-	return file_mode[filetype];
-}
-
-static u32 smb_filetype_from_mode(int mode)
-{
-	if (S_ISREG(mode))
-		return UNIX_TYPE_FILE;
-	if (S_ISDIR(mode))
-		return UNIX_TYPE_DIR;
-	if (S_ISLNK(mode))
-		return UNIX_TYPE_SYMLINK;
-	if (S_ISCHR(mode))
-		return UNIX_TYPE_CHARDEV;
-	if (S_ISBLK(mode))
-		return UNIX_TYPE_BLKDEV;
-	if (S_ISFIFO(mode))
-		return UNIX_TYPE_FIFO;
-	if (S_ISSOCK(mode))
-		return UNIX_TYPE_SOCKET;
-	return UNIX_TYPE_UNKNOWN;
-}
-
-
-/*****************************************************************************/
-/*                                                                           */
-/*  Support section.                                                         */
-/*                                                                           */
-/*****************************************************************************/
-
-__u32
-smb_len(__u8 * p)
-{
-	return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
-}
-
-static __u16
-smb_bcc(__u8 * packet)
-{
-	int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
-	return WVAL(packet, pos);
-}
-
-/* smb_valid_packet: We check if packet fulfills the basic
-   requirements of a smb packet */
-
-static int
-smb_valid_packet(__u8 * packet)
-{
-	return (packet[4] == 0xff
-		&& packet[5] == 'S'
-		&& packet[6] == 'M'
-		&& packet[7] == 'B'
-		&& (smb_len(packet) + 4 == SMB_HEADER_LEN
-		    + SMB_WCT(packet) * 2 + smb_bcc(packet)));
-}
-
-/* smb_verify: We check if we got the answer we expected, and if we
-   got enough data. If bcc == -1, we don't care. */
-
-static int
-smb_verify(__u8 * packet, int command, int wct, int bcc)
-{
-	if (SMB_CMD(packet) != command)
-		goto bad_command;
-	if (SMB_WCT(packet) < wct)
-		goto bad_wct;
-	if (bcc != -1 && smb_bcc(packet) < bcc)
-		goto bad_bcc;
-	return 0;
-
-bad_command:
-	printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
-	       command, SMB_CMD(packet));
-	goto fail;
-bad_wct:
-	printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
-	       command, wct, SMB_WCT(packet));
-	goto fail;
-bad_bcc:
-	printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
-	       command, bcc, smb_bcc(packet));
-fail:
-	return -EIO;
-}
-
-/*
- * Returns the maximum read or write size for the "payload". Making all of the
- * packet fit within the negotiated max_xmit size.
- *
- * N.B. Since this value is usually computed before locking the server,
- * the server's packet size must never be decreased!
- */
-static inline int
-smb_get_xmitsize(struct smb_sb_info *server, int overhead)
-{
-	return server->opt.max_xmit - overhead;
-}
-
-/*
- * Calculate the maximum read size
- */
-int
-smb_get_rsize(struct smb_sb_info *server)
-{
-	/* readX has 12 parameters, read has 5 */
-	int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
-	int size = smb_get_xmitsize(server, overhead);
-
-	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
-	return size;
-}
-
-/*
- * Calculate the maximum write size
- */
-int
-smb_get_wsize(struct smb_sb_info *server)
-{
-	/* writeX has 14 parameters, write has 5 */
-	int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
-	int size = smb_get_xmitsize(server, overhead);
-
-	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
-	return size;
-}
-
-/*
- * Convert SMB error codes to -E... errno values.
- */
-int
-smb_errno(struct smb_request *req)
-{
-	int errcls = req->rq_rcls;
-	int error  = req->rq_err;
-	char *class = "Unknown";
-
-	VERBOSE("errcls %d  code %d  from command 0x%x\n",
-		errcls, error, SMB_CMD(req->rq_header));
-
-	if (errcls == ERRDOS) {
-		switch (error) {
-		case ERRbadfunc:
-			return -EINVAL;
-		case ERRbadfile:
-		case ERRbadpath:
-			return -ENOENT;
-		case ERRnofids:
-			return -EMFILE;
-		case ERRnoaccess:
-			return -EACCES;
-		case ERRbadfid:
-			return -EBADF;
-		case ERRbadmcb:
-			return -EREMOTEIO;
-		case ERRnomem:
-			return -ENOMEM;
-		case ERRbadmem:
-			return -EFAULT;
-		case ERRbadenv:
-		case ERRbadformat:
-			return -EREMOTEIO;
-		case ERRbadaccess:
-			return -EACCES;
-		case ERRbaddata:
-			return -E2BIG;
-		case ERRbaddrive:
-			return -ENXIO;
-		case ERRremcd:
-			return -EREMOTEIO;
-		case ERRdiffdevice:
-			return -EXDEV;
-		case ERRnofiles:
-			return -ENOENT;
-		case ERRbadshare:
-			return -ETXTBSY;
-		case ERRlock:
-			return -EDEADLK;
-		case ERRfilexists:
-			return -EEXIST;
-		case ERROR_INVALID_PARAMETER:
-			return -EINVAL;
-		case ERROR_DISK_FULL:
-			return -ENOSPC;
-		case ERROR_INVALID_NAME:
-			return -ENOENT;
-		case ERROR_DIR_NOT_EMPTY:
-			return -ENOTEMPTY;
-		case ERROR_NOT_LOCKED:
-                       return -ENOLCK;
-		case ERROR_ALREADY_EXISTS:
-			return -EEXIST;
-		default:
-			class = "ERRDOS";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRSRV) {
-		switch (error) {
-		/* N.B. This is wrong ... EIO ? */
-		case ERRerror:
-			return -ENFILE;
-		case ERRbadpw:
-			return -EINVAL;
-		case ERRbadtype:
-		case ERRtimeout:
-			return -EIO;
-		case ERRaccess:
-			return -EACCES;
-		/*
-		 * This is a fatal error, as it means the "tree ID"
-		 * for this connection is no longer valid. We map
-		 * to a special error code and get a new connection.
-		 */
-		case ERRinvnid:
-			return -EBADSLT;
-		default:
-			class = "ERRSRV";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRHRD) {
-		switch (error) {
-		case ERRnowrite:
-			return -EROFS;
-		case ERRbadunit:
-			return -ENODEV;
-		case ERRnotready:
-			return -EUCLEAN;
-		case ERRbadcmd:
-		case ERRdata:
-			return -EIO;
-		case ERRbadreq:
-			return -ERANGE;
-		case ERRbadshare:
-			return -ETXTBSY;
-		case ERRlock:
-			return -EDEADLK;
-		case ERRdiskfull:
-			return -ENOSPC;
-		default:
-			class = "ERRHRD";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRCMD) {
-		class = "ERRCMD";
-	} else if (errcls == SUCCESS) {
-		return 0;	/* This is the only valid 0 return */
-	}
-
-err_unknown:
-	printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
-	       class, error, SMB_CMD(req->rq_header));
-	return -EIO;
-}
-
-/* smb_request_ok: We expect the server to be locked. Then we do the
-   request and check the answer completely. When smb_request_ok
-   returns 0, you can be quite sure that everything went well. When
-   the answer is <=0, the returned number is a valid unix errno. */
-
-static int
-smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
-{
-	int result;
-
-	req->rq_resp_wct = wct;
-	req->rq_resp_bcc = bcc;
-
-	result = smb_add_request(req);
-	if (result != 0) {
-		DEBUG1("smb_request failed\n");
-		goto out;
-	}
-
-	if (smb_valid_packet(req->rq_header) != 0) {
-		PARANOIA("invalid packet!\n");
-		goto out;
-	}
-
-	result = smb_verify(req->rq_header, command, wct, bcc);
-
-out:
-	return result;
-}
-
-/*
- * This implements the NEWCONN ioctl. It installs the server pid,
- * sets server->state to CONN_VALID, and wakes up the waiting process.
- */
-int
-smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
-{
-	struct file *filp;
-	struct sock *sk;
-	int error;
-
-	VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
-
-	smb_lock_server(server);
-
-	/*
-	 * Make sure we don't already have a valid connection ...
-	 */
-	error = -EINVAL;
-	if (server->state == CONN_VALID)
-		goto out;
-
-	error = -EACCES;
-	if (current_uid() != server->mnt->mounted_uid &&
-	    !capable(CAP_SYS_ADMIN))
-		goto out;
-
-	error = -EBADF;
-	filp = fget(opt->fd);
-	if (!filp)
-		goto out;
-	if (!smb_valid_socket(filp->f_path.dentry->d_inode))
-		goto out_putf;
-
-	server->sock_file = filp;
-	server->conn_pid = get_pid(task_pid(current));
-	server->opt = *opt;
-	server->generation += 1;
-	server->state = CONN_VALID;
-	error = 0;
-
-	if (server->conn_error) {
-		/*
-		 * conn_error is the returncode we originally decided to
-		 * drop the old connection on. This message should be positive
-		 * and not make people ask questions on why smbfs is printing
-		 * error messages ...
-		 */
-		printk(KERN_INFO "SMB connection re-established (%d)\n",
-		       server->conn_error);
-		server->conn_error = 0;
-	}
-
-	/*
-	 * Store the server in sock user_data (Only used by sunrpc)
-	 */
-	sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
-	sk->sk_user_data = server;
-
-	/* chain into the data_ready callback */
-	server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
-
-	/* check if we have an old smbmount that uses seconds for the 
-	   serverzone */
-	if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
-		server->opt.serverzone /= 60;
-
-	/* now that we have an established connection we can detect the server
-	   type and enable bug workarounds */
-	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
-		install_ops(server->ops, &smb_ops_core);
-	else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
-		install_ops(server->ops, &smb_ops_os2);
-	else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
-		 (server->opt.max_xmit < 0x1000) &&
-		 !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
-		/* FIXME: can we kill the WIN95 flag now? */
-		server->mnt->flags |= SMB_MOUNT_WIN95;
-		VERBOSE("detected WIN95 server\n");
-		install_ops(server->ops, &smb_ops_win95);
-	} else {
-		/*
-		 * Samba has max_xmit 65535
-		 * NT4spX has max_xmit 4536 (or something like that)
-		 * win2k has ...
-		 */
-		VERBOSE("detected NT1 (Samba, NT4/5) server\n");
-		install_ops(server->ops, &smb_ops_winNT);
-	}
-
-	/* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
-	if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
-		server->ops->getattr = smb_proc_getattr_core;
-	} else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
-		server->ops->getattr = smb_proc_getattr_ff;
-	}
-
-	/* Decode server capabilities */
-	if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
-		/* Should be ok to set this now, as no one can access the
-		   mount until the connection has been established. */
-		SB_of(server)->s_maxbytes = ~0ULL >> 1;
-		VERBOSE("LFS enabled\n");
-	}
-	if (server->opt.capabilities & SMB_CAP_UNICODE) {
-		server->mnt->flags |= SMB_MOUNT_UNICODE;
-		VERBOSE("Unicode enabled\n");
-	} else {
-		server->mnt->flags &= ~SMB_MOUNT_UNICODE;
-	}
-#if 0
-	/* flags we may test for other patches ... */
-	if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
-		VERBOSE("Large reads enabled\n");
-	}
-	if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
-		VERBOSE("Large writes enabled\n");
-	}
-#endif
-	if (server->opt.capabilities & SMB_CAP_UNIX) {
-		struct inode *inode;
-		VERBOSE("Using UNIX CIFS extensions\n");
-		install_ops(server->ops, &smb_ops_unix);
-		inode = SB_of(server)->s_root->d_inode;
-		if (inode)
-			inode->i_op = &smb_dir_inode_operations_unix;
-	}
-
-	VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
-		server->opt.protocol, server->opt.max_xmit,
-		pid_nr(server->conn_pid), server->opt.capabilities);
-
-	/* FIXME: this really should be done by smbmount. */
-	if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
-		server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
-	}
-
-	smb_unlock_server(server);
-	smbiod_wake_up();
-	if (server->opt.capabilities & SMB_CAP_UNIX)
-		smb_proc_query_cifsunix(server);
-
-	server->conn_complete++;
-	wake_up_interruptible_all(&server->conn_wq);
-	return error;
-
-out:
-	smb_unlock_server(server);
-	smbiod_wake_up();
-	return error;
-
-out_putf:
-	fput(filp);
-	goto out;
-}
-
-/* smb_setup_header: We completely set up the packet. You only have to
-   insert the command-specific fields */
-
-__u8 *
-smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
-{
-	__u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
-	__u8 *p = req->rq_header;
-	struct smb_sb_info *server = req->rq_server;
-
-	p = smb_encode_smb_length(p, xmit_len - 4);
-
-	*p++ = 0xff;
-	*p++ = 'S';
-	*p++ = 'M';
-	*p++ = 'B';
-	*p++ = command;
-
-	memset(p, '\0', 19);
-	p += 19;
-	p += 8;
-
-	if (server->opt.protocol > SMB_PROTOCOL_CORE) {
-		int flags = SMB_FLAGS_CASELESS_PATHNAMES;
-		int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
-			SMB_FLAGS2_EXTENDED_ATTRIBUTES;	/* EA? not really ... */
-
-		*(req->rq_header + smb_flg) = flags;
-		if (server->mnt->flags & SMB_MOUNT_UNICODE)
-			flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
-		WSET(req->rq_header, smb_flg2, flags2);
-	}
-	*p++ = wct;		/* wct */
-	p += 2 * wct;
-	WSET(p, 0, bcc);
-
-	/* Include the header in the data to send */
-	req->rq_iovlen = 1;
-	req->rq_iov[0].iov_base = req->rq_header;
-	req->rq_iov[0].iov_len  = xmit_len - bcc;
-
-	return req->rq_buffer;
-}
-
-static void
-smb_setup_bcc(struct smb_request *req, __u8 *p)
-{
-	u16 bcc = p - req->rq_buffer;
-	u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
-
-	WSET(pbcc, 0, bcc);
-
-	smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN + 
-			      2*SMB_WCT(req->rq_header) - 2 + bcc);
-
-	/* Include the "bytes" in the data to send */
-	req->rq_iovlen = 2;
-	req->rq_iov[1].iov_base = req->rq_buffer;
-	req->rq_iov[1].iov_len  = bcc;
-}
-
-static int
-smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
-	      __u16 mode, off_t offset)
-{
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBlseek, 4, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	WSET(req->rq_header, smb_vwv1, mode);
-	DSET(req->rq_header, smb_vwv2, offset);
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBlseek, 2, 0);
-	if (result < 0) {
-		result = 0;
-		goto out_free;
-	}
-
-	result = DVAL(req->rq_header, smb_vwv0);
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
-{
-	struct inode *ino = dentry->d_inode;
-	struct smb_inode_info *ei = SMB_I(ino);
-	int mode, read_write = 0x42, read_only = 0x40;
-	int res;
-	char *p;
-	struct smb_request *req;
-
-	/*
-	 * Attempt to open r/w, unless there are no write privileges.
-	 */
-	mode = read_write;
-	if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
-		mode = read_only;
-#if 0
-	/* FIXME: why is this code not in? below we fix it so that a caller
-	   wanting RO doesn't get RW. smb_revalidate_inode does some 
-	   optimization based on access mode. tail -f needs it to be correct.
-
-	   We must open rw since we don't do the open if called a second time
-	   with different 'wish'. Is that not supported by smb servers? */
-	if (!(wish & (O_WRONLY | O_RDWR)))
-		mode = read_only;
-#endif
-
-	res = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-      retry:
-	p = smb_setup_header(req, SMBopen, 2, 0);
-	WSET(req->rq_header, smb_vwv0, mode);
-	WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
-	res = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (res < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	res = smb_request_ok(req, SMBopen, 7, 0);
-	if (res != 0) {
-		if (mode == read_write &&
-		    (res == -EACCES || res == -ETXTBSY || res == -EROFS))
-		{
-			VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
-				DENTRY_PATH(dentry), res);
-			mode = read_only;
-			req->rq_flags = 0;
-			goto retry;
-		}
-		goto out_free;
-	}
-	/* We should now have data in vwv[0..6]. */
-
-	ei->fileid = WVAL(req->rq_header, smb_vwv0);
-	ei->attr   = WVAL(req->rq_header, smb_vwv1);
-	/* smb_vwv2 has mtime */
-	/* smb_vwv4 has size  */
-	ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
-	ei->open = server->generation;
-
-out_free:
-	smb_rput(req);
-out:
-	return res;
-}
-
-/*
- * Make sure the file is open, and check that the access
- * is compatible with the desired access.
- */
-int
-smb_open(struct dentry *dentry, int wish)
-{
-	struct inode *inode = dentry->d_inode;
-	int result;
-	__u16 access;
-
-	result = -ENOENT;
-	if (!inode) {
-		printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
-		       DENTRY_PATH(dentry));
-		goto out;
-	}
-
-	if (!smb_is_open(inode)) {
-		struct smb_sb_info *server = server_from_inode(inode);
-		result = 0;
-		if (!smb_is_open(inode))
-			result = smb_proc_open(server, dentry, wish);
-		if (result)
-			goto out;
-		/*
-		 * A successful open means the path is still valid ...
-		 */
-		smb_renew_times(dentry);
-	}
-
-	/*
-	 * Check whether the access is compatible with the desired mode.
-	 */
-	result = 0;
-	access = SMB_I(inode)->access;
-	if (access != wish && access != SMB_O_RDWR) {
-		PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
-			 DENTRY_PATH(dentry), access, wish);
-		result = -EACCES;
-	}
-out:
-	return result;
-}
-
-static int 
-smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
-{
-	struct smb_request *req;
-	int result = -ENOMEM;
-
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBclose, 3, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBclose, 0, 0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Win NT 4.0 has an apparent bug in that it fails to update the
- * modify time when writing to a file. As a workaround, we update
- * both modify and access time locally, and post the times to the
- * server when closing the file.
- */
-static int 
-smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
-{
-	struct smb_inode_info *ei = SMB_I(ino);
-	int result = 0;
-	if (smb_is_open(ino))
-	{
-		/*
-		 * We clear the open flag in advance, in case another
- 		 * process observes the value while we block below.
-		 */
-		ei->open = 0;
-
-		/*
-		 * Kludge alert: SMB timestamps are accurate only to
-		 * two seconds ... round the times to avoid needless
-		 * cache invalidations!
-		 */
-		if (ino->i_mtime.tv_sec & 1) { 
-			ino->i_mtime.tv_sec--;
-			ino->i_mtime.tv_nsec = 0; 
-		}
-		if (ino->i_atime.tv_sec & 1) {
-			ino->i_atime.tv_sec--;
-			ino->i_atime.tv_nsec = 0;
-		}
-		/*
-		 * If the file is open with write permissions,
-		 * update the time stamps to sync mtime and atime.
-		 */
-		if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
-		    (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
-		    !(ei->access == SMB_O_RDONLY))
-		{
-			struct smb_fattr fattr;
-			smb_get_inode_attr(ino, &fattr);
-			smb_proc_setattr_ext(server, ino, &fattr);
-		}
-
-		result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
-		/*
-		 * Force a revalidation after closing ... some servers
-		 * don't post the size until the file has been closed.
-		 */
-		if (server->opt.protocol < SMB_PROTOCOL_NT1)
-			ei->oldmtime = 0;
-		ei->closed = jiffies;
-	}
-	return result;
-}
-
-int
-smb_close(struct inode *ino)
-{
-	int result = 0;
-
-	if (smb_is_open(ino)) {
-		struct smb_sb_info *server = server_from_inode(ino);
-		result = smb_proc_close_inode(server, ino);
-	}
-	return result;
-}
-
-/*
- * This is used to close a file following a failed instantiate.
- * Since we don't have an inode, we can't use any of the above.
- */
-int
-smb_close_fileid(struct dentry *dentry, __u16 fileid)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int result;
-
-	result = smb_proc_close(server, fileid, get_seconds());
-	return result;
-}
-
-/* In smb_proc_read and smb_proc_write we do not retry, because the
-   file-id would not be valid after a reconnection. */
-
-static void
-smb_proc_read_data(struct smb_request *req)
-{
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = 3;
-
-	req->rq_iov[1].iov_base = req->rq_page;
-	req->rq_iov[1].iov_len  = req->rq_rsize;
-	req->rq_iovlen = 2;
-
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	__u16 returned_count, data_len;
-	unsigned char *buf;
-	int result;
-	struct smb_request *req;
-	u8 rbuf[4];
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBread, 5, 0);
-	buf = req->rq_header;
-	WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
-	WSET(buf, smb_vwv1, count);
-	DSET(buf, smb_vwv2, offset);
-	WSET(buf, smb_vwv4, 0);
-
-	req->rq_page = data;
-	req->rq_rsize = count;
-	req->rq_callback = smb_proc_read_data;
-	req->rq_buffer = rbuf;
-	req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
-
-	result = smb_request_ok(req, SMBread, 5, -1);
-	if (result < 0)
-		goto out_free;
-	returned_count = WVAL(req->rq_header, smb_vwv0);
-
-	data_len = WVAL(rbuf, 1);
-
-	if (returned_count != data_len) {
-		printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
-		printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
-		       returned_count, data_len);
-	}
-	result = data_len;
-
-out_free:
-	smb_rput(req);
-out:
-	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, result);
-	return result;
-}
-
-static int
-smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	u16 fileid = SMB_I(inode)->fileid;
-	u8 buf[4];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
-		inode->i_ino, fileid, count, offset);
-
-	smb_setup_header(req, SMBwrite, 5, count + 3);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	WSET(req->rq_header, smb_vwv1, count);
-	DSET(req->rq_header, smb_vwv2, offset);
-	WSET(req->rq_header, smb_vwv4, 0);
-
-	buf[0] = 1;
-	WSET(buf, 1, count);	/* yes, again ... */
-	req->rq_iov[1].iov_base = buf;
-	req->rq_iov[1].iov_len = 3;
-	req->rq_iov[2].iov_base = (char *) data;
-	req->rq_iov[2].iov_len = count;
-	req->rq_iovlen = 3;
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBwrite, 1, 0);
-	if (result >= 0)
-		result = WVAL(req->rq_header, smb_vwv0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * In smb_proc_readX and smb_proc_writeX we do not retry, because the
- * file-id would not be valid after a reconnection.
- */
-
-#define SMB_READX_MAX_PAD      64
-static void
-smb_proc_readX_data(struct smb_request *req)
-{
-	/* header length, excluding the netbios length (-4) */
-	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-	int data_off = WVAL(req->rq_header, smb_vwv6);
-
-	/*
-	 * Some genius made the padding to the data bytes arbitrary.
-	 * So we must first calculate the amount of padding used by the server.
-	 */
-	data_off -= hdrlen;
-	if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
-		PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
-		PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
-		req->rq_rlen = req->rq_bufsize + 1;
-		return;
-	}
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = data_off;
-
-	req->rq_iov[1].iov_base = req->rq_page;
-	req->rq_iov[1].iov_len  = req->rq_rsize;
-	req->rq_iovlen = 2;
-
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	unsigned char *buf;
-	int result;
-	struct smb_request *req;
-	static char pad[SMB_READX_MAX_PAD];
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBreadX, 12, 0);
-	buf = req->rq_header;
-	WSET(buf, smb_vwv0, 0x00ff);
-	WSET(buf, smb_vwv1, 0);
-	WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
-	DSET(buf, smb_vwv3, (u32)offset);               /* low 32 bits */
-	WSET(buf, smb_vwv5, count);
-	WSET(buf, smb_vwv6, 0);
-	DSET(buf, smb_vwv7, 0);
-	WSET(buf, smb_vwv9, 0);
-	DSET(buf, smb_vwv10, (u32)(offset >> 32));      /* high 32 bits */
-	WSET(buf, smb_vwv11, 0);
-
-	req->rq_page = data;
-	req->rq_rsize = count;
-	req->rq_callback = smb_proc_readX_data;
-	req->rq_buffer = pad;
-	req->rq_bufsize = SMB_READX_MAX_PAD;
-	req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBreadX, 12, -1);
-	if (result < 0)
-		goto out_free;
-	result = WVAL(req->rq_header, smb_vwv5);
-
-out_free:
-	smb_rput(req);
-out:
-	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, result);
-	return result;
-}
-
-static int
-smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	u8 *p;
-	static u8 pad[4];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, offset);
-
-	p = smb_setup_header(req, SMBwriteX, 14, count + 1);
-	WSET(req->rq_header, smb_vwv0, 0x00ff);
-	WSET(req->rq_header, smb_vwv1, 0);
-	WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
-	DSET(req->rq_header, smb_vwv3, (u32)offset);	/* low 32 bits */
-	DSET(req->rq_header, smb_vwv5, 0);
-	WSET(req->rq_header, smb_vwv7, 0);		/* write mode */
-	WSET(req->rq_header, smb_vwv8, 0);
-	WSET(req->rq_header, smb_vwv9, 0);
-	WSET(req->rq_header, smb_vwv10, count);		/* data length */
-	WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
-	DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
-
-	req->rq_iov[1].iov_base = pad;
-	req->rq_iov[1].iov_len = 1;
-	req->rq_iov[2].iov_base = (char *) data;
-	req->rq_iov[2].iov_len = count;
-	req->rq_iovlen = 3;
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBwriteX, 6, 0);
- 	if (result >= 0)
-		result = WVAL(req->rq_header, smb_vwv2);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBcreate, 3, 0);
-	WSET(req->rq_header, smb_vwv0, attr);
-	DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, SMBcreate, 1, 0);
-	if (result < 0)
-		goto out_free;
-
-	*fileid = WVAL(req->rq_header, smb_vwv0);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(old_dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBmv, 1, 0);
-	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
-	result = smb_simple_encode_path(req, &p, old_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	result = smb_simple_encode_path(req, &p, new_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Code common to mkdir and rmdir.
- */
-static int
-smb_proc_generic_command(struct dentry *dentry, __u8 command)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, command, 0, 0);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, command, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_mkdir(struct dentry *dentry)
-{
-	return smb_proc_generic_command(dentry, SMBmkdir);
-}
-
-int
-smb_proc_rmdir(struct dentry *dentry)
-{
-	return smb_proc_generic_command(dentry, SMBrmdir);
-}
-
-#if SMBFS_POSIX_UNLINK
-/*
- * Removes readonly attribute from a file. Used by unlink to give posix
- * semantics.
- */
-static int
-smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
-{
-	int result;
-	struct smb_fattr fattr;
-
-	/* FIXME: cifsUE should allow removing a readonly file. */
-
-	/* first get current attribute */
-	smb_init_dirent(server, &fattr);
-	result = server->ops->getattr(server, dentry, &fattr);
-	smb_finish_dirent(server, &fattr);
-	if (result < 0)
-		return result;
-
-	/* if RONLY attribute is set, remove it */
-	if (fattr.attr & aRONLY) {  /* read only attribute is set */
-		fattr.attr &= ~aRONLY;
-		result = smb_proc_setattr_core(server, dentry, fattr.attr);
-	}
-	return result;
-}
-#endif
-
-int
-smb_proc_unlink(struct dentry *dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int flag = 0;
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-      retry:
-	p = smb_setup_header(req, SMBunlink, 1, 0);
-	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
-#if SMBFS_POSIX_UNLINK
-		if (result == -EACCES && !flag) {
-			/* Posix semantics is for the read-only state
-			   of a file to be ignored in unlink(). In the
-			   SMB world a unlink() is refused on a
-			   read-only file. To make things easier for
-			   unix users we try to override the files
-			   permission if the unlink fails with the
-			   right error.
-			   This introduces a race condition that could
-			   lead to a file being written by someone who
-			   shouldn't have access, but as far as I can
-			   tell that is unavoidable */
-
-			/* remove RONLY attribute and try again */
-			result = smb_set_rw(dentry,server);
-			if (result == 0) {
-				flag = 1;
-				req->rq_flags = 0;
-				goto retry;
-			}
-		}
-#endif
-		goto out_free;
-	}
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
-{
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBflush, 1, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBflush, 0, 0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_trunc32(struct inode *inode, loff_t length)
-{
-	/*
-	 * Writing 0bytes is old-SMB magic for truncating files.
-	 * MAX_NON_LFS should prevent this from being called with a too
-	 * large offset.
-	 */
-	return smb_proc_write(inode, length, 0, NULL);
-}
-
-static int
-smb_proc_trunc64(struct inode *inode, loff_t length)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	char *param;
-	char *data;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 14)))
-		goto out;
-
-	param = req->rq_buffer;
-	data = req->rq_buffer + 6;
-
-	/* FIXME: must we also set allocation size? winNT seems to do that */
-	WSET(param, 0, SMB_I(inode)->fileid);
-	WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
-	WSET(param, 4, 0);
-	LSET(data, 0, length);
-
-	req->rq_trans2_command = TRANSACT2_SETFILEINFO;
-	req->rq_ldata = 8;
-	req->rq_data  = data;
-	req->rq_lparm = 6;
-	req->rq_parm  = param;
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	result = 0;
-	if (req->rq_rcls != 0)
-		result = smb_errno(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_trunc95(struct inode *inode, loff_t length)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result = smb_proc_trunc32(inode, length);
- 
-	/*
-	 * win9x doesn't appear to update the size immediately.
-	 * It will return the old file size after the truncate,
-	 * confusing smbfs. So we force an update.
-	 *
-	 * FIXME: is this still necessary?
-	 */
-	smb_proc_flush(server, SMB_I(inode)->fileid);
-	return result;
-}
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
-	memset(fattr, 0, sizeof(*fattr));
-
-	fattr->f_nlink = 1;
-	fattr->f_uid = server->mnt->uid;
-	fattr->f_gid = server->mnt->gid;
-	fattr->f_unix = 0;
-}
-
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
-	if (fattr->f_unix)
-		return;
-
-	fattr->f_mode = server->mnt->file_mode;
-	if (fattr->attr & aDIR) {
-		fattr->f_mode = server->mnt->dir_mode;
-		fattr->f_size = SMB_ST_BLKSIZE;
-	}
-	/* Check the read-only flag */
-	if (fattr->attr & aRONLY)
-		fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
-
-	/* How many 512 byte blocks do we need for this file? */
-	fattr->f_blocks = 0;
-	if (fattr->f_size != 0)
-		fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
-	return;
-}
-
-void
-smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
-		     struct super_block *sb)
-{
-	smb_init_dirent(server, fattr);
-	fattr->attr = aDIR;
-	fattr->f_ino = 2; /* traditional root inode number */
-	fattr->f_mtime = current_fs_time(sb);
-	smb_finish_dirent(server, fattr);
-}
-
-/*
- * Decode a dirent for old protocols
- *
- * qname is filled with the decoded, and possibly translated, name.
- * fattr receives decoded attributes
- *
- * Bugs Noted:
- * (1) Pathworks servers may pad the name with extra spaces.
- */
-static char *
-smb_decode_short_dirent(struct smb_sb_info *server, char *p,
-			struct qstr *qname, struct smb_fattr *fattr,
-			unsigned char *name_buf)
-{
-	int len;
-
-	/*
-	 * SMB doesn't have a concept of inode numbers ...
-	 */
-	smb_init_dirent(server, fattr);
-	fattr->f_ino = 0;	/* FIXME: do we need this? */
-
-	p += SMB_STATUS_SIZE;	/* reserved (search_status) */
-	fattr->attr = *p;
-	fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
-	fattr->f_mtime.tv_nsec = 0;
-	fattr->f_size = DVAL(p, 5);
-	fattr->f_ctime = fattr->f_mtime;
-	fattr->f_atime = fattr->f_mtime;
-	qname->name = p + 9;
-	len = strnlen(qname->name, 12);
-
-	/*
-	 * Trim trailing blanks for Pathworks servers
-	 */
-	while (len > 2 && qname->name[len-1] == ' ')
-		len--;
-
-	smb_finish_dirent(server, fattr);
-
-#if 0
-	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
-	   allow the flag to be set anyway. It kills const. Remove? */
-	switch (server->opt.case_handling) {
-	case SMB_CASE_UPPER:
-		str_upper(entry->name, len);
-		break;
-	case SMB_CASE_LOWER:
-		str_lower(entry->name, len);
-		break;
-	default:
-		break;
-	}
-#endif
-
-	qname->len = 0;
-	len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
-				   qname->name, len,
-				   server->remote_nls, server->local_nls);
-	if (len > 0) {
-		qname->len = len;
-		qname->name = name_buf;
-		DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
-	}
-
-	return p + 22;
-}
-
-/*
- * This routine is used to read in directory entries from the network.
- * Note that it is for short directory name seeks, i.e.: protocol <
- * SMB_PROTOCOL_LANMAN2
- */
-static int
-smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
-		       struct smb_cache_control *ctl)
-{
-	struct dentry *dir = filp->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dir);
-	struct qstr qname;
-	struct smb_fattr fattr;
-	char *p;
-	int result;
-	int i, first, entries_seen, entries;
-	int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
-	__u16 bcc;
-	__u16 count;
-	char status[SMB_STATUS_SIZE];
-	static struct qstr mask = {
-		.name	= "*.*",
-		.len	= 3,
-	};
-	unsigned char *last_status;
-	struct smb_request *req;
-	unsigned char *name_buf;
-
-	VERBOSE("%s/%s\n", DENTRY_PATH(dir));
-
-	lock_kernel();
-
-	result = -ENOMEM;
-	if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
-		goto out;
-
-	first = 1;
-	entries = 0;
-	entries_seen = 2; /* implicit . and .. */
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
-		goto out_name;
-
-	while (1) {
-		p = smb_setup_header(req, SMBsearch, 2, 0);
-		WSET(req->rq_header, smb_vwv0, entries_asked);
-		WSET(req->rq_header, smb_vwv1, aDIR);
-		if (first == 1) {
-			result = smb_simple_encode_path(req, &p, dir, &mask);
-			if (result < 0)
-				goto out_free;
-			if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
-				result = -ENAMETOOLONG;
-				goto out_free;
-			}
-			*p++ = 5;
-			WSET(p, 0, 0);
-			p += 2;
-			first = 0;
-		} else {
-			if (p + 5 + SMB_STATUS_SIZE >
-			    (char *)req->rq_buffer + req->rq_bufsize) {
-				result = -ENAMETOOLONG;
-				goto out_free;
-			}
-				
-			*p++ = 4;
-			*p++ = 0;
-			*p++ = 5;
-			WSET(p, 0, SMB_STATUS_SIZE);
-			p += 2;
-			memcpy(p, status, SMB_STATUS_SIZE);
-			p += SMB_STATUS_SIZE;
-		}
-
-		smb_setup_bcc(req, p);
-
-		result = smb_request_ok(req, SMBsearch, 1, -1);
-		if (result < 0) {
-			if ((req->rq_rcls == ERRDOS) && 
-			    (req->rq_err  == ERRnofiles))
-				break;
-			goto out_free;
-		}
-		count = WVAL(req->rq_header, smb_vwv0);
-		if (count <= 0)
-			break;
-
-		result = -EIO;
-		bcc = smb_bcc(req->rq_header);
-		if (bcc != count * SMB_DIRINFO_SIZE + 3)
-			goto out_free;
-		p = req->rq_buffer + 3;
-
-
-		/* Make sure the response fits in the buffer. Fixed sized 
-		   entries means we don't have to check in the decode loop. */
-
-		last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
-
-		if (last_status + SMB_DIRINFO_SIZE >=
-		    req->rq_buffer + req->rq_bufsize) {
-			printk(KERN_ERR "smb_proc_readdir_short: "
-			       "last dir entry outside buffer! "
-			       "%d@%p  %d@%p\n", SMB_DIRINFO_SIZE, last_status,
-			       req->rq_bufsize, req->rq_buffer);
-			goto out_free;
-		}
-
-		/* Read the last entry into the status field. */
-		memcpy(status, last_status, SMB_STATUS_SIZE);
-
-
-		/* Now we are ready to parse smb directory entries. */
-
-		for (i = 0; i < count; i++) {
-			p = smb_decode_short_dirent(server, p, 
-						    &qname, &fattr, name_buf);
-			if (qname.len == 0)
-				continue;
-
-			if (entries_seen == 2 && qname.name[0] == '.') {
-				if (qname.len == 1)
-					continue;
-				if (qname.name[1] == '.' && qname.len == 2)
-					continue;
-			}
-			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
-					    &qname, &fattr))
-				;	/* stop reading? */
-			entries_seen++;
-		}
-	}
-	result = entries;
-
-out_free:
-	smb_rput(req);
-out_name:
-	kfree(name_buf);
-out:
-	unlock_kernel();
-	return result;
-}
-
-static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
-{
-	u64 size, disk_bytes;
-
-	/* FIXME: verify nls support. all is sent as utf8? */
-
-	fattr->f_unix = 1;
-	fattr->f_mode = 0;
-
-	/* FIXME: use the uniqueID from the remote instead? */
-	/* 0 L file size in bytes */
-	/* 8 L file size on disk in bytes (block count) */
-	/* 40 L uid */
-	/* 48 L gid */
-	/* 56 W file type */
-	/* 60 L devmajor */
-	/* 68 L devminor */
-	/* 76 L unique ID (inode) */
-	/* 84 L permissions */
-	/* 92 L link count */
-
-	size = LVAL(p, 0);
-	disk_bytes = LVAL(p, 8);
-
-	/*
-	 * Some samba versions round up on-disk byte usage
-	 * to 1MB boundaries, making it useless. When seeing
-	 * that, use the size instead.
-	 */
-	if (!(disk_bytes & 0xfffff))
-		disk_bytes = size+511;
-
-	fattr->f_size = size;
-	fattr->f_blocks = disk_bytes >> 9;
-	fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
-	fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
-	fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
-
-	if (server->mnt->flags & SMB_MOUNT_UID)
-		fattr->f_uid = server->mnt->uid;
-	else
-		fattr->f_uid = LVAL(p, 40);
-
-	if (server->mnt->flags & SMB_MOUNT_GID)
-		fattr->f_gid = server->mnt->gid;
-	else
-		fattr->f_gid = LVAL(p, 48);
-
-	fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
-
-	if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
-		__u64 major = LVAL(p, 60);
-		__u64 minor = LVAL(p, 68);
-
-		fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
-		if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
-	    	MINOR(fattr->f_rdev) != (minor & 0xffffffff))
-			fattr->f_rdev = 0;
-	}
-
-	fattr->f_mode |= LVAL(p, 84);
-
-	if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
-	     (S_ISDIR(fattr->f_mode)) )
-		fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-	else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
-	          !(S_ISDIR(fattr->f_mode)) )
-		fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
-				(fattr->f_mode & S_IFMT);
-
-}
-
-/*
- * Interpret a long filename structure using the specified info level:
- *   level 1 for anything below NT1 protocol
- *   level 260 for NT1 protocol
- *
- * qname is filled with the decoded, and possibly translated, name
- * fattr receives decoded attributes.
- *
- * Bugs Noted:
- * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
- */
-static char *
-smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
-		       struct qstr *qname, struct smb_fattr *fattr,
-		       unsigned char *name_buf)
-{
-	char *result;
-	unsigned int len = 0;
-	int n;
-	__u16 date, time;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
-	/*
-	 * SMB doesn't have a concept of inode numbers ...
-	 */
-	smb_init_dirent(server, fattr);
-	fattr->f_ino = 0;	/* FIXME: do we need this? */
-
-	switch (level) {
-	case 1:
-		len = *((unsigned char *) p + 22);
-		qname->name = p + 23;
-		result = p + 24 + len;
-
-		date = WVAL(p, 0);
-		time = WVAL(p, 2);
-		fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_ctime.tv_nsec = 0;
-
-		date = WVAL(p, 4);
-		time = WVAL(p, 6);
-		fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_atime.tv_nsec = 0;
-
-		date = WVAL(p, 8);
-		time = WVAL(p, 10);
-		fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_mtime.tv_nsec = 0;
-		fattr->f_size = DVAL(p, 12);
-		/* ULONG allocation size */
-		fattr->attr = WVAL(p, 20);
-
-		VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	case 260:
-		result = p + WVAL(p, 0);
-		len = DVAL(p, 60);
-		if (len > 255) len = 255;
-		/* NT4 null terminates, unless we are using unicode ... */
-		qname->name = p + 94;
-		if (!unicode && len && qname->name[len-1] == '\0')
-			len--;
-
-		fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
-		fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
-		fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
-		/* change time (32) */
-		fattr->f_size = LVAL(p, 40);
-		/* alloc size (48) */
-		fattr->attr = DVAL(p, 56);
-
-		VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	case SMB_FIND_FILE_UNIX:
-		result = p + WVAL(p, 0);
-		qname->name = p + 108;
-
-		len = strlen(qname->name);
-		/* FIXME: should we check the length?? */
-
-		p += 8;
-		smb_decode_unix_basic(fattr, server, p);
-		VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	default:
-		PARANOIA("Unknown info level %d\n", level);
-		result = p + WVAL(p, 0);
-		goto out;
-	}
-
-	smb_finish_dirent(server, fattr);
-
-#if 0
-	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
-	   allow the flag to be set anyway. Remove? */
-	switch (server->opt.case_handling) {
-	case SMB_CASE_UPPER:
-		str_upper(qname->name, len);
-		break;
-	case SMB_CASE_LOWER:
-		str_lower(qname->name, len);
-		break;
-	default:
-		break;
-	}
-#endif
-
-	qname->len = 0;
-	n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
-				 qname->name, len,
-				 server->remote_nls, server->local_nls);
-	if (n > 0) {
-		qname->len = n;
-		qname->name = name_buf;
-	}
-
-out:
-	return result;
-}
-
-/* findfirst/findnext flags */
-#define SMB_CLOSE_AFTER_FIRST (1<<0)
-#define SMB_CLOSE_IF_END (1<<1)
-#define SMB_REQUIRE_RESUME_KEY (1<<2)
-#define SMB_CONTINUE_BIT (1<<3)
-
-/*
- * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
- * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
- * go there for advise.
- *
- * Bugs Noted:
- * (1) When using Info Level 1 Win NT 4.0 truncates directory listings 
- * for certain patterns of names and/or lengths. The breakage pattern
- * is completely reproducible and can be toggled by the creation of a
- * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
- */
-static int
-smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
-		      struct smb_cache_control *ctl)
-{
-	struct dentry *dir = filp->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dir);
-	struct qstr qname;
-	struct smb_fattr fattr;
-
-	unsigned char *p, *lastname;
-	char *mask, *param;
-	__u16 command;
-	int first, entries_seen;
-
-	/* Both NT and OS/2 accept info level 1 (but see note below). */
-	int info_level = 260;
-	const int max_matches = 512;
-
-	unsigned int ff_searchcount = 0;
-	unsigned int ff_eos = 0;
-	unsigned int ff_lastname = 0;
-	unsigned int ff_dir_handle = 0;
-	unsigned int loop_count = 0;
-	unsigned int mask_len, i;
-	int result;
-	struct smb_request *req;
-	unsigned char *name_buf;
-	static struct qstr star = {
-		.name	= "*",
-		.len	= 1,
-	};
-
-	lock_kernel();
-
-	/*
-	 * We always prefer unix style. Use info level 1 for older
-	 * servers that don't do 260.
-	 */
-	if (server->opt.capabilities & SMB_CAP_UNIX)
-		info_level = SMB_FIND_FILE_UNIX;
-	else if (server->opt.protocol < SMB_PROTOCOL_NT1)
-		info_level = 1;
-
-	result = -ENOMEM;
-	if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
-		goto out;
-	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
-		goto out_name;
-	param = req->rq_buffer;
-
-	/*
-	 * Encode the initial path
-	 */
-	mask = param + 12;
-
-	result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
-	if (result <= 0)
-		goto out_free;
-	mask_len = result - 1;	/* mask_len is strlen, not #bytes */
-	result = 0;
-	first = 1;
-	VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
-
-	entries_seen = 2;
-	ff_eos = 0;
-
-	while (ff_eos == 0) {
-		loop_count += 1;
-		if (loop_count > 10) {
-			printk(KERN_WARNING "smb_proc_readdir_long: "
-			       "Looping in FIND_NEXT??\n");
-			result = -EIO;
-			break;
-		}
-
-		if (first != 0) {
-			command = TRANSACT2_FINDFIRST;
-			WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
-			WSET(param, 2, max_matches);	/* max count */
-			WSET(param, 4, SMB_CLOSE_IF_END);
-			WSET(param, 6, info_level);
-			DSET(param, 8, 0);
-		} else {
-			command = TRANSACT2_FINDNEXT;
-
-			VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
-				ff_dir_handle, ff_lastname, mask_len, mask);
-
-			WSET(param, 0, ff_dir_handle);	/* search handle */
-			WSET(param, 2, max_matches);	/* max count */
-			WSET(param, 4, info_level);
-			DSET(param, 6, 0);
-			WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
-		}
-
-		req->rq_trans2_command = command;
-		req->rq_ldata = 0;
-		req->rq_data  = NULL;
-		req->rq_lparm = 12 + mask_len + 1;
-		req->rq_parm  = param;
-		req->rq_flags = 0;
-		result = smb_add_request(req);
-		if (result < 0) {
-			PARANOIA("error=%d, breaking\n", result);
-			break;
-		}
-
-		if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
-			/* a damn Win95 bug - sometimes it clags if you 
-			   ask it too fast */
-			schedule_timeout_interruptible(msecs_to_jiffies(200));
-			continue;
-                }
-
-		if (req->rq_rcls != 0) {
-			result = smb_errno(req);
-			PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
-				 mask, result, req->rq_rcls, req->rq_err);
-			break;
-		}
-
-		/* parse out some important return info */
-		if (first != 0) {
-			ff_dir_handle = WVAL(req->rq_parm, 0);
-			ff_searchcount = WVAL(req->rq_parm, 2);
-			ff_eos = WVAL(req->rq_parm, 4);
-			ff_lastname = WVAL(req->rq_parm, 8);
-		} else {
-			ff_searchcount = WVAL(req->rq_parm, 0);
-			ff_eos = WVAL(req->rq_parm, 2);
-			ff_lastname = WVAL(req->rq_parm, 6);
-		}
-
-		if (ff_searchcount == 0)
-			break;
-
-		/* Now we are ready to parse smb directory entries. */
-
-		/* point to the data bytes */
-		p = req->rq_data;
-		for (i = 0; i < ff_searchcount; i++) {
-			/* make sure we stay within the buffer */
-			if (p >= req->rq_data + req->rq_ldata) {
-				printk(KERN_ERR "smb_proc_readdir_long: "
-				       "dirent pointer outside buffer! "
-				       "%p  %d@%p\n",
-				       p, req->rq_ldata, req->rq_data);
-				result = -EIO; /* always a comm. error? */
-				goto out_free;
-			}
-
-			p = smb_decode_long_dirent(server, p, info_level,
-						   &qname, &fattr, name_buf);
-
-			/* ignore . and .. from the server */
-			if (entries_seen == 2 && qname.name[0] == '.') {
-				if (qname.len == 1)
-					continue;
-				if (qname.name[1] == '.' && qname.len == 2)
-					continue;
-			}
-
-			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
-					    &qname, &fattr))
-				;	/* stop reading? */
-			entries_seen++;
-		}
-
-		VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
-
-		/*
-		 * We might need the lastname for continuations.
-		 *
-		 * Note that some servers (win95?) point to the filename and
-		 * others (NT4, Samba using NT1) to the dir entry. We assume
-		 * here that those who do not point to a filename do not need
-		 * this info to continue the listing.
-		 *
-		 * OS/2 needs this and talks infolevel 1.
-		 * NetApps want lastname with infolevel 260.
-		 * win2k want lastname with infolevel 260, and points to
-		 *       the record not to the name.
-		 * Samba+CifsUnixExt doesn't need lastname.
-		 *
-		 * Both are happy if we return the data they point to. So we do.
-		 * (FIXME: above is not true with win2k)
-		 */
-		mask_len = 0;
-		if (info_level != SMB_FIND_FILE_UNIX &&
-		    ff_lastname > 0 && ff_lastname < req->rq_ldata) {
-			lastname = req->rq_data + ff_lastname;
-
-			switch (info_level) {
-			case 260:
-				mask_len = req->rq_ldata - ff_lastname;
-				break;
-			case 1:
-				/* lastname points to a length byte */
-				mask_len = *lastname++;
-				if (ff_lastname + 1 + mask_len > req->rq_ldata)
-					mask_len = req->rq_ldata - ff_lastname - 1;
-				break;
-			}
-
-			/*
-			 * Update the mask string for the next message.
-			 */
-			if (mask_len > 255)
-				mask_len = 255;
-			if (mask_len)
-				strncpy(mask, lastname, mask_len);
-		}
-		mask_len = strnlen(mask, mask_len);
-		VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
-			mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
-
-		first = 0;
-		loop_count = 0;
-	}
-
-out_free:
-	smb_rput(req);
-out_name:
-	kfree(name_buf);
-out:
-	unlock_kernel();
-	return result;
-}
-
-/*
- * This version uses the trans2 TRANSACT2_FINDFIRST message 
- * to get the attribute data.
- *
- * Bugs Noted:
- */
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
-			struct smb_fattr *fattr)
-{
-	char *param, *mask;
-	__u16 date, time;
-	int mask_len, result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-	mask = param + 12;
-
-	mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
-	if (mask_len < 0) {
-		result = mask_len;
-		goto out_free;
-	}
-	VERBOSE("name=%s, len=%d\n", mask, mask_len);
-	WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
-	WSET(param, 2, 1);	/* max count */
-	WSET(param, 4, 1);	/* close after this call */
-	WSET(param, 6, 1);	/* info_level */
-	DSET(param, 8, 0);
-
-	req->rq_trans2_command = TRANSACT2_FINDFIRST;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = 12 + mask_len;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	if (req->rq_rcls != 0) {
-		result = smb_errno(req);
-#ifdef SMBFS_PARANOIA
-		if (result != -ENOENT)
-			PARANOIA("error for %s, rcls=%d, err=%d\n",
-				 mask, req->rq_rcls, req->rq_err);
-#endif
-		goto out_free;
-	}
-	/* Make sure we got enough data ... */
-	result = -EINVAL;
-	if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
-		PARANOIA("bad result for %s, len=%d, count=%d\n",
-			 mask, req->rq_ldata, WVAL(req->rq_parm, 2));
-		goto out_free;
-	}
-
-	/*
-	 * Decode the response into the fattr ...
-	 */
-	date = WVAL(req->rq_data, 0);
-	time = WVAL(req->rq_data, 2);
-	fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_ctime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 4);
-	time = WVAL(req->rq_data, 6);
-	fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_atime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 8);
-	time = WVAL(req->rq_data, 10);
-	fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_mtime.tv_nsec = 0;
-	VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
-		mask, date, time, fattr->f_mtime.tv_sec);
-	fattr->f_size = DVAL(req->rq_data, 12);
-	/* ULONG allocation size */
-	fattr->attr = WVAL(req->rq_data, 20);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *fattr)
-{
-	int result;
-	char *p;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBgetatr, 0, 0);
-	result = smb_simple_encode_path(req, &p, dir, NULL);
-	if (result < 0)
- 		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
-		goto out_free;
-	fattr->attr    = WVAL(req->rq_header, smb_vwv0);
-	fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
-	fattr->f_mtime.tv_nsec = 0;
-	fattr->f_size  = DVAL(req->rq_header, smb_vwv3);
-	fattr->f_ctime = fattr->f_mtime; 
-	fattr->f_atime = fattr->f_mtime; 
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk("getattr_core: %s/%s, mtime=%ld\n",
-	       DENTRY_PATH(dir), fattr->f_mtime);
-#endif
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Bugs Noted:
- * (1) Win 95 swaps the date and time fields in the standard info level.
- */
-static int
-smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
-			struct smb_request *req, int infolevel)
-{
-	char *p, *param;
-	int result;
-
-	param = req->rq_buffer;
-	WSET(param, 0, infolevel);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
-	if (result < 0)
-		goto out;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_QPATHINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out;
-	if (req->rq_rcls != 0) {
-		VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
-			&param[6], result, req->rq_rcls, req->rq_err);
-		result = smb_errno(req);
-		goto out;
-	}
-	result = -ENOENT;
-	if (req->rq_ldata < 22) {
-		PARANOIA("not enough data for %s, len=%d\n",
-			 &param[6], req->rq_ldata);
-		goto out;
-	}
-
-	result = 0;
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
-			    struct smb_fattr *attr)
-{
-	u16 date, time;
-	int off_date = 0, off_time = 2;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
-	if (result < 0)
-		goto out_free;
-
-	/*
-	 * Kludge alert: Win 95 swaps the date and time field,
-	 * contrary to the CIFS docs and Win NT practice.
-	 */
-	if (server->mnt->flags & SMB_MOUNT_WIN95) {
-		off_date = 2;
-		off_time = 0;
-	}
-	date = WVAL(req->rq_data, off_date);
-	time = WVAL(req->rq_data, off_time);
-	attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_ctime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 4 + off_date);
-	time = WVAL(req->rq_data, 4 + off_time);
-	attr->f_atime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_atime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 8 + off_date);
-	time = WVAL(req->rq_data, 8 + off_time);
-	attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_mtime.tv_nsec = 0;
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
-	       DENTRY_PATH(dir), date, time, attr->f_mtime);
-#endif
-	attr->f_size = DVAL(req->rq_data, 12);
-	attr->attr = WVAL(req->rq_data, 20);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
-			    struct smb_fattr *attr)
-{
-	struct smb_request *req;
-	int result;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req,
-					 SMB_QUERY_FILE_ALL_INFO);
-	if (result < 0)
-		goto out_free;
-
-	attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
-	attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
-	attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
-	/* change (24) */
-	attr->attr = WVAL(req->rq_data, 32);
-	/* pad? (34) */
-	/* allocated size (40) */
-	attr->f_size = LVAL(req->rq_data, 48);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *attr)
-{
-	struct smb_request *req;
-	int result;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req,
-					 SMB_QUERY_FILE_UNIX_BASIC);
-	if (result < 0)
-		goto out_free;
-
-	smb_decode_unix_basic(attr, server, req->rq_data);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
-		    struct smb_fattr *attr)
-{
-	struct inode *inode = dir->d_inode;
-	int result;
-
-	/* FIXME: why not use the "all" version? */
-	result = smb_proc_getattr_trans2_std(server, dir, attr);
-	if (result < 0)
-		goto out;
-
-	/*
-	 * None of the getattr versions here can make win9x return the right
-	 * filesize if there are changes made to an open file.
-	 * A seek-to-end does return the right size, but we only need to do
-	 * that on files we have written.
-	 */
-	if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
-	    smb_is_open(inode))
-	{
-		__u16 fileid = SMB_I(inode)->fileid;
-		attr->f_size = smb_proc_seek(server, fileid, 2, 0);
-	}
-
-out:
-	return result;
-}
-
-static int
-smb_proc_ops_wait(struct smb_sb_info *server)
-{
-	int result;
-
-	result = wait_event_interruptible_timeout(server->conn_wq,
-				server->conn_complete, 30*HZ);
-
-	if (!result || signal_pending(current))
-		return -EIO;
-
-	return 0;
-}
-
-static int
-smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
-			  struct smb_fattr *fattr)
-{
-	int result;
-
-	if (smb_proc_ops_wait(server) < 0)
-		return -EIO;
-
-	smb_init_dirent(server, fattr);
-	result = server->ops->getattr(server, dir, fattr);
-	smb_finish_dirent(server, fattr);
-
-	return result;
-}
-
-static int
-smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
-		      struct smb_cache_control *ctl)
-{
-	struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
-
-	if (smb_proc_ops_wait(server) < 0)
-		return -EIO;
-
-	return server->ops->readdir(filp, dirent, filldir, ctl);
-}
-
-int
-smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dir);
-	int result;
-
-	smb_init_dirent(server, fattr);
-	result = server->ops->getattr(server, dir, fattr);
-	smb_finish_dirent(server, fattr);
-
-	return result;
-}
-
-
-/*
- * Because of bugs in the core protocol, we use this only to set
- * attributes. See smb_proc_settime() below for timestamp handling.
- *
- * Bugs Noted:
- * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
- * with an undocumented error (ERRDOS code 50). Setting
- * mtime to 0 allows the attributes to be set.
- * (2) The extra parameters following the name string aren't
- * in the CIFS docs, but seem to be necessary for operation.
- */
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
-		      __u16 attr)
-{
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBsetatr, 8, 0);
-	WSET(req->rq_header, smb_vwv0, attr);
-	DSET(req->rq_header, smb_vwv1, 0); /* mtime */
-	WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
-	WSET(req->rq_header, smb_vwv4, 0);
-	WSET(req->rq_header, smb_vwv5, 0);
-	WSET(req->rq_header, smb_vwv6, 0);
-	WSET(req->rq_header, smb_vwv7, 0);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
-		result = -ENAMETOOLONG;
-		goto out_free;
-	}
-	*p++ = 4;
-	*p++ = 0;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, SMBsetatr, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Because of bugs in the trans2 setattr messages, we must set
- * attributes and timestamps separately. The core SMBsetatr
- * message seems to be the only reliable way to set attributes.
- */
-int
-smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dir);
-	int result;
-
-	VERBOSE("setting %s/%s, open=%d\n", 
-		DENTRY_PATH(dir), smb_is_open(dir->d_inode));
-	result = smb_proc_setattr_core(server, dir, fattr->attr);
-	return result;
-}
-
-/*
- * Sets the timestamps for an file open with write permissions.
- */
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
-		      struct inode *inode, struct smb_fattr *fattr)
-{
-	__u16 date, time;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBsetattrE, 7, 0);
-	WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
-	/* We don't change the creation time */
-	WSET(req->rq_header, smb_vwv1, 0);
-	WSET(req->rq_header, smb_vwv2, 0);
-	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
-	WSET(req->rq_header, smb_vwv3, date);
-	WSET(req->rq_header, smb_vwv4, time);
-	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
-	WSET(req->rq_header, smb_vwv5, date);
-	WSET(req->rq_header, smb_vwv6, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
-	       date, time, fattr->f_mtime);
-#endif
-
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBsetattrE, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Bugs Noted:
- * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
- * set the file's attribute flags.
- */
-static int
-smb_proc_setattr_trans2(struct smb_sb_info *server,
-			struct dentry *dir, struct smb_fattr *fattr)
-{
-	__u16 date, time;
-	char *p, *param;
-	int result;
-	char data[26];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, 1);	/* Info level SMB_INFO_STANDARD */
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	WSET(data, 0, 0); /* creation time */
-	WSET(data, 2, 0);
-	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
-	WSET(data, 4, date);
-	WSET(data, 6, time);
-	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
-	WSET(data, 8, date);
-	WSET(data, 10, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n", 
-	       DENTRY_PATH(dir), date, time, fattr->f_mtime);
-#endif
-	DSET(data, 12, 0); /* size */
-	DSET(data, 16, 0); /* blksize */
-	WSET(data, 20, 0); /* attr */
-	DSET(data, 22, 0); /* ULONG EA size */
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = 26;
-	req->rq_data  = data;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-	if (req->rq_rcls != 0)
-		result = smb_errno(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * ATTR_MODE      0x001
- * ATTR_UID       0x002
- * ATTR_GID       0x004
- * ATTR_SIZE      0x008
- * ATTR_ATIME     0x010
- * ATTR_MTIME     0x020
- * ATTR_CTIME     0x040
- * ATTR_ATIME_SET 0x080
- * ATTR_MTIME_SET 0x100
- * ATTR_FORCE     0x200	
- * ATTR_ATTR_FLAG 0x400
- *
- * major/minor should only be set by mknod.
- */
-int
-smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
-		      unsigned int major, unsigned int minor)
-{
-	struct smb_sb_info *server = server_from_dentry(d);
-	u64 nttime;
-	char *p, *param;
-	int result;
-	char data[100];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	/* 0 L file size in bytes */
-	/* 8 L file size on disk in bytes (block count) */
-	/* 40 L uid */
-	/* 48 L gid */
-	/* 56 W file type enum */
-	/* 60 L devmajor */
-	/* 68 L devminor */
-	/* 76 L unique ID (inode) */
-	/* 84 L permissions */
-	/* 92 L link count */
-	LSET(data, 0, SMB_SIZE_NO_CHANGE);
-	LSET(data, 8, SMB_SIZE_NO_CHANGE);
-	LSET(data, 16, SMB_TIME_NO_CHANGE);
-	LSET(data, 24, SMB_TIME_NO_CHANGE);
-	LSET(data, 32, SMB_TIME_NO_CHANGE);
-	LSET(data, 40, SMB_UID_NO_CHANGE);
-	LSET(data, 48, SMB_GID_NO_CHANGE);
-	DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
-	LSET(data, 60, major);
-	LSET(data, 68, minor);
-	LSET(data, 76, 0);
-	LSET(data, 84, SMB_MODE_NO_CHANGE);
-	LSET(data, 92, 0);
-
-	if (attr->ia_valid & ATTR_SIZE) {
-		LSET(data, 0, attr->ia_size);
-		LSET(data, 8, 0); /* can't set anyway */
-	}
-
-	/*
-	 * FIXME: check the conversion function it the correct one
-	 *
-	 * we can't set ctime but we might as well pass this to the server
-	 * and let it ignore it.
-	 */
-	if (attr->ia_valid & ATTR_CTIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_ctime);
-		LSET(data, 16, nttime);
-	}
-	if (attr->ia_valid & ATTR_ATIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_atime);
-		LSET(data, 24, nttime);
-	}
-	if (attr->ia_valid & ATTR_MTIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_mtime);
-		LSET(data, 32, nttime);
-	}
-	
-	if (attr->ia_valid & ATTR_UID) {
-		LSET(data, 40, attr->ia_uid);
-	}
-	if (attr->ia_valid & ATTR_GID) {
-		LSET(data, 48, attr->ia_gid); 
-	}
-	
-	if (attr->ia_valid & ATTR_MODE) {
-		LSET(data, 84, attr->ia_mode);
-	}
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = 100;
-	req->rq_data  = data;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-/*
- * Set the modify and access timestamps for a file.
- *
- * Incredibly enough, in all of SMB there is no message to allow
- * setting both attributes and timestamps at once. 
- *
- * Bugs Noted:
- * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message 
- * with info level 1 (INFO_STANDARD).
- * (2) Win 95 seems not to support setting directory timestamps.
- * (3) Under the core protocol apparently the only way to set the
- * timestamp is to open and close the file.
- */
-int
-smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode *inode = dentry->d_inode;
-	int result;
-
-	VERBOSE("setting %s/%s, open=%d\n",
-		DENTRY_PATH(dentry), smb_is_open(inode));
-
-	/* setting the time on a Win95 server fails (tridge) */
-	if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 && 
-	    !(server->mnt->flags & SMB_MOUNT_WIN95)) {
-		if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
-			result = smb_proc_setattr_ext(server, inode, fattr);
-		else
-			result = smb_proc_setattr_trans2(server, dentry, fattr);
-	} else {
-		/*
-		 * Fail silently on directories ... timestamp can't be set?
-		 */
-		result = 0;
-		if (S_ISREG(inode->i_mode)) {
-			/*
-			 * Set the mtime by opening and closing the file.
-			 * Note that the file is opened read-only, but this
-			 * still allows us to set the date (tridge)
-			 */
-			result = -EACCES;
-			if (!smb_is_open(inode))
-				smb_proc_open(server, dentry, SMB_O_RDONLY);
-			if (smb_is_open(inode)) {
-				inode->i_mtime = fattr->f_mtime;
-				result = smb_proc_close_inode(server, inode);
-			}
-		}
-	}
-
-	return result;
-}
-
-int
-smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
-{
-	struct smb_sb_info *server = SMB_SB(dentry->d_sb);
-	int result;
-	char *p;
-	long unit;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBdskattr, 0, 0);
-	if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
-		goto out_free;
-	p = SMB_VWV(req->rq_header);
-	unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
-	attr->f_blocks = WVAL(p, 0) * unit;
-	attr->f_bsize  = SMB_ST_BLKSIZE;
-	attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
-		   char *buffer, int len)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_QPATHINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-		&param[6], result, req->rq_rcls, req->rq_err);
-
-	/* copy data up to the \0 or buffer length */
-	result = len;
-	if (req->rq_ldata < len)
-		result = req->rq_ldata;
-	strncpy(buffer, req->rq_data, result);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-/*
- * Create a symlink object called dentry which points to oldpath.
- * Samba does not permit dangling links but returns a suitable error message.
- */
-int
-smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
-		 const char *oldpath)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = strlen(oldpath) + 1;
-	req->rq_data  = (char *) oldpath;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-		&param[6], result, req->rq_rcls, req->rq_err);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Create a hard link object called new_dentry which points to dentry.
- */
-int
-smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
-	      struct dentry *new_dentry)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
-				 new_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	/* Grr, pointless separation of parameters and data ... */
-	req->rq_data = p;
-	req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
-					dentry, NULL);
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-	       &param[6], result, req->rq_rcls, req->rq_err);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server)
-{
-	int result;
-	int major, minor;
-	u64 caps;
-	char param[2];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 100)))
-		goto out;
-
-	WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
-
-	req->rq_trans2_command = TRANSACT2_QFSINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = 2;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	if (req->rq_ldata < 12) {
-		PARANOIA("Not enough data\n");
-		goto out_free;
-	}
-	major = WVAL(req->rq_data, 0);
-	minor = WVAL(req->rq_data, 2);
-
-	DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
-	       major, minor);
-	/* FIXME: verify that we are ok with this major/minor? */
-
-	caps = LVAL(req->rq_data, 4);
-	DEBUG1("Server capabilities 0x%016llx\n", caps);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src)
-{
-	memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
-}
-
-/* < LANMAN2 */
-static struct smb_ops smb_ops_core =
-{
-	.read		= smb_proc_read,
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_short,
-	.getattr	= smb_proc_getattr_core,
-	.truncate	= smb_proc_trunc32,
-};
-
-/* LANMAN2, OS/2, others? */
-static struct smb_ops smb_ops_os2 =
-{
-	.read		= smb_proc_read,
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_trans2_std,
-	.truncate	= smb_proc_trunc32,
-};
-
-/* Win95, and possibly some NetApp versions too */
-static struct smb_ops smb_ops_win95 =
-{
-	.read		= smb_proc_read,    /* does not support 12word readX */
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_95,
-	.truncate	= smb_proc_trunc95,
-};
-
-/* Samba, NT4 and NT5 */
-static struct smb_ops smb_ops_winNT =
-{
-	.read		= smb_proc_readX,
-	.write		= smb_proc_writeX,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_trans2_all,
-	.truncate	= smb_proc_trunc64,
-};
-
-/* Samba w/ unix extensions. Others? */
-static struct smb_ops smb_ops_unix =
-{
-	.read		= smb_proc_readX,
-	.write		= smb_proc_writeX,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_unix,
-	/* FIXME: core/ext/time setattr needs to be cleaned up! */
-	/* .setattr	= smb_proc_setattr_unix, */
-	.truncate	= smb_proc_trunc64,
-};
-
-/* Place holder until real ops are in place */
-static struct smb_ops smb_ops_null =
-{
-	.readdir	= smb_proc_readdir_null,
-	.getattr	= smb_proc_getattr_null,
-};
-
-void smb_install_null_ops(struct smb_ops *ops)
-{
-	install_ops(ops, &smb_ops_null);
-}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6f43e6..000000000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Autogenerated with cproto on:  Sat Sep 13 17:18:51 CEST 2003
- */
-
-struct smb_request;
-struct sock;
-struct statfs;
-
-/* proc.c */
-extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
-extern __u32 smb_len(__u8 *p);
-extern int smb_get_rsize(struct smb_sb_info *server);
-extern int smb_get_wsize(struct smb_sb_info *server);
-extern int smb_errno(struct smb_request *req);
-extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
-extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
-extern int smb_open(struct dentry *dentry, int wish);
-extern int smb_close(struct inode *ino);
-extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
-extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
-extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
-extern int smb_proc_mkdir(struct dentry *dentry);
-extern int smb_proc_rmdir(struct dentry *dentry);
-extern int smb_proc_unlink(struct dentry *dentry);
-extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
-extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
-				 struct super_block *sb);
-extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
-extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
-extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
-extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
-extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
-extern void smb_install_null_ops(struct smb_ops *ops);
-/* dir.c */
-extern const struct file_operations smb_dir_operations;
-extern const struct inode_operations smb_dir_inode_operations;
-extern const struct inode_operations smb_dir_inode_operations_unix;
-extern void smb_new_dentry(struct dentry *dentry);
-extern void smb_renew_times(struct dentry *dentry);
-/* cache.c */
-extern void smb_invalid_dir_cache(struct inode *dir);
-extern void smb_invalidate_dircache_entries(struct dentry *parent);
-extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
-extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
-/* sock.c */
-extern void smb_data_ready(struct sock *sk, int len);
-extern int smb_valid_socket(struct inode *inode);
-extern void smb_close_socket(struct smb_sb_info *server);
-extern int smb_recv_available(struct smb_sb_info *server);
-extern int smb_receive_header(struct smb_sb_info *server);
-extern int smb_receive_drop(struct smb_sb_info *server);
-extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
-extern int smb_send_request(struct smb_request *req);
-/* inode.c */
-extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
-extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_invalidate_inodes(struct smb_sb_info *server);
-extern int smb_revalidate_inode(struct dentry *dentry);
-extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
-/* file.c */
-extern const struct address_space_operations smb_file_aops;
-extern const struct file_operations smb_file_operations;
-extern const struct inode_operations smb_file_inode_operations;
-/* ioctl.c */
-extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-/* smbiod.c */
-extern void smbiod_wake_up(void);
-extern int smbiod_register_server(struct smb_sb_info *server);
-extern void smbiod_unregister_server(struct smb_sb_info *server);
-extern void smbiod_flush(struct smb_sb_info *server);
-extern int smbiod_retry(struct smb_sb_info *server);
-/* request.c */
-extern int smb_init_request_cache(void);
-extern void smb_destroy_request_cache(void);
-extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
-extern void smb_rput(struct smb_request *req);
-extern int smb_add_request(struct smb_request *req);
-extern int smb_request_send_server(struct smb_sb_info *server);
-extern int smb_request_recv(struct smb_sb_info *server);
-/* symlink.c */
-extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
-extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f45933e862..000000000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- *  request.c
- *
- *  Copyright (C) 2001 by Urban Widmark
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-/* #define SMB_SLAB_DEBUG	(SLAB_RED_ZONE | SLAB_POISON) */
-#define SMB_SLAB_DEBUG	0
-
-/* cache for request structures */
-static struct kmem_cache *req_cachep;
-
-static int smb_request_send_req(struct smb_request *req);
-
-/*
-  /proc/slabinfo:
-  name, active, num, objsize, active_slabs, num_slaps, #pages
-*/
-
-
-int smb_init_request_cache(void)
-{
-	req_cachep = kmem_cache_create("smb_request",
-				       sizeof(struct smb_request), 0,
-				       SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
-				       NULL);
-	if (req_cachep == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void smb_destroy_request_cache(void)
-{
-	kmem_cache_destroy(req_cachep);
-}
-
-/*
- * Allocate and initialise a request structure
- */
-static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
-						int bufsize)
-{
-	struct smb_request *req;
-	unsigned char *buf = NULL;
-
-	req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
-	VERBOSE("allocating request: %p\n", req);
-	if (!req)
-		goto out;
-
-	if (bufsize > 0) {
-		buf = kmalloc(bufsize, GFP_NOFS);
-		if (!buf) {
-			kmem_cache_free(req_cachep, req);
-			return NULL;
-		}
-	}
-
-	req->rq_buffer = buf;
-	req->rq_bufsize = bufsize;
-	req->rq_server = server;
-	init_waitqueue_head(&req->rq_wait);
-	INIT_LIST_HEAD(&req->rq_queue);
-	atomic_set(&req->rq_count, 1);
-
-out:
-	return req;
-}
-
-struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
-{
-	struct smb_request *req = NULL;
-
-	for (;;) {
-		atomic_inc(&server->nr_requests);
-		if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
-			req = smb_do_alloc_request(server, bufsize);
-			if (req != NULL)
-				break;
-		}
-
-#if 0
-		/*
-		 * Try to free up at least one request in order to stay
-		 * below the hard limit
-		 */
-                if (nfs_try_to_free_pages(server))
-			continue;
-
-		if (fatal_signal_pending(current))
-			return ERR_PTR(-ERESTARTSYS);
-		current->policy = SCHED_YIELD;
-		schedule();
-#else
-		/* FIXME: we want something like nfs does above, but that
-		   requires changes to all callers and can wait. */
-		break;
-#endif
-	}
-	return req;
-}
-
-static void smb_free_request(struct smb_request *req)
-{
-	atomic_dec(&req->rq_server->nr_requests);
-	if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
-		kfree(req->rq_buffer);
-	kfree(req->rq_trans2buffer);
-	kmem_cache_free(req_cachep, req);
-}
-
-/*
- * What prevents a rget to race with a rput? The count must never drop to zero
- * while it is in use. Only rput if it is ok that it is free'd.
- */
-static void smb_rget(struct smb_request *req)
-{
-	atomic_inc(&req->rq_count);
-}
-void smb_rput(struct smb_request *req)
-{
-	if (atomic_dec_and_test(&req->rq_count)) {
-		list_del_init(&req->rq_queue);
-		smb_free_request(req);
-	}
-}
-
-/* setup to receive the data part of the SMB */
-static int smb_setup_bcc(struct smb_request *req)
-{
-	int result = 0;
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-
-	if (req->rq_rlen > req->rq_bufsize) {
-		PARANOIA("Packet too large %d > %d\n",
-			 req->rq_rlen, req->rq_bufsize);
-		return -ENOBUFS;
-	}
-
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = req->rq_rlen;
-	req->rq_iovlen = 1;
-
-	return result;
-}
-
-/*
- * Prepare a "normal" request structure.
- */
-static int smb_setup_request(struct smb_request *req)
-{
-	int len = smb_len(req->rq_header) + 4;
-	req->rq_slen = len;
-
-	/* if we expect a data part in the reply we set the iov's to read it */
-	if (req->rq_resp_bcc)
-		req->rq_setup_read = smb_setup_bcc;
-
-	/* This tries to support re-using the same request */
-	req->rq_bytes_sent = 0;
-	req->rq_rcls = 0;
-	req->rq_err = 0;
-	req->rq_errno = 0;
-	req->rq_fragment = 0;
-	kfree(req->rq_trans2buffer);
-	req->rq_trans2buffer = NULL;
-
-	return 0;
-}
-
-/*
- * Prepare a transaction2 request structure
- */
-static int smb_setup_trans2request(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	int mparam, mdata;
-	static unsigned char padding[4];
-
-	/* I know the following is very ugly, but I want to build the
-	   smb packet as efficiently as possible. */
-
-	const int smb_parameters = 15;
-	const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
-	const int oparam = ALIGN(header + 3, sizeof(u32));
-	const int odata  = ALIGN(oparam + req->rq_lparm, sizeof(u32));
-	const int bcc = (req->rq_data ? odata + req->rq_ldata :
-					oparam + req->rq_lparm) - header;
-
-	if ((bcc + oparam) > server->opt.max_xmit)
-		return -ENOMEM;
-	smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
-
-	/*
-	 * max parameters + max data + max setup == bufsize to make NT4 happy
-	 * and not abort the transfer or split into multiple responses. It also
-	 * makes smbfs happy as handling packets larger than the buffer size
-	 * is extra work.
-	 *
-	 * OS/2 is probably going to hate me for this ...
-	 */
-	mparam = SMB_TRANS2_MAX_PARAM;
-	mdata = req->rq_bufsize - mparam;
-
-	mdata = server->opt.max_xmit - mparam - 100;
-	if (mdata < 1024) {
-		mdata = 1024;
-		mparam = 20;
-	}
-
-#if 0
-	/* NT/win2k has ~4k max_xmit, so with this we request more than it wants
-	   to return as one SMB. Useful for testing the fragmented trans2
-	   handling. */
-	mdata = 8192;
-#endif
-
-	WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
-	WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
-	WSET(req->rq_header, smb_mprcnt, mparam);
-	WSET(req->rq_header, smb_mdrcnt, mdata);
-	WSET(req->rq_header, smb_msrcnt, 0);    /* max setup always 0 ? */
-	WSET(req->rq_header, smb_flags, 0);
-	DSET(req->rq_header, smb_timeout, 0);
-	WSET(req->rq_header, smb_pscnt, req->rq_lparm);
-	WSET(req->rq_header, smb_psoff, oparam - 4);
-	WSET(req->rq_header, smb_dscnt, req->rq_ldata);
-	WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
-	*(req->rq_header + smb_suwcnt) = 0x01;          /* setup count */
-	*(req->rq_header + smb_suwcnt + 1) = 0x00;      /* reserved */
-	WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
-
-	req->rq_iovlen = 2;
-	req->rq_iov[0].iov_base = (void *) req->rq_header;
-	req->rq_iov[0].iov_len = oparam;
-	req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
-	req->rq_iov[1].iov_len = req->rq_lparm;
-	req->rq_slen = oparam + req->rq_lparm;
-
-	if (req->rq_data) {
-		req->rq_iovlen += 2;
-		req->rq_iov[2].iov_base = padding;
-		req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
-		req->rq_iov[3].iov_base = req->rq_data;
-		req->rq_iov[3].iov_len = req->rq_ldata;
-		req->rq_slen = odata + req->rq_ldata;
-	}
-
-	/* always a data part for trans2 replies */
-	req->rq_setup_read = smb_setup_bcc;
-
-	return 0;
-}
-
-/*
- * Add a request and tell smbiod to process it
- */
-int smb_add_request(struct smb_request *req)
-{
-	long timeleft;
-	struct smb_sb_info *server = req->rq_server;
-	int result = 0;
-
-	smb_setup_request(req);
-	if (req->rq_trans2_command) {
-		if (req->rq_buffer == NULL) {
-			PARANOIA("trans2 attempted without response buffer!\n");
-			return -EIO;
-		}
-		result = smb_setup_trans2request(req);
-	}
-	if (result < 0)
-		return result;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
-	add_xmit_stats(req);
-#endif
-
-	/* add 'req' to the queue of requests */
-	if (smb_lock_server_interruptible(server))
-		return -EINTR;
-
-	/*
-	 * Try to send the request as the process. If that fails we queue the
-	 * request and let smbiod send it later.
-	 */
-
-	/* FIXME: each server has a number on the maximum number of parallel
-	   requests. 10, 50 or so. We should not allow more requests to be
-	   active. */
-	if (server->mid > 0xf000)
-		server->mid = 0;
-	req->rq_mid = server->mid++;
-	WSET(req->rq_header, smb_mid, req->rq_mid);
-
-	result = 0;
-	if (server->state == CONN_VALID) {
-		if (list_empty(&server->xmitq))
-			result = smb_request_send_req(req);
-		if (result < 0) {
-			/* Connection lost? */
-			server->conn_error = result;
-			server->state = CONN_INVALID;
-		}
-	}
-	if (result != 1)
-		list_add_tail(&req->rq_queue, &server->xmitq);
-	smb_rget(req);
-
-	if (server->state != CONN_VALID)
-		smbiod_retry(server);
-
-	smb_unlock_server(server);
-
-	smbiod_wake_up();
-
-	timeleft = wait_event_interruptible_timeout(req->rq_wait,
-				    req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
-	if (!timeleft || signal_pending(current)) {
-		/*
-		 * On timeout or on interrupt we want to try and remove the
-		 * request from the recvq/xmitq.
-		 * First check if the request is still part of a queue. (May
-		 * have been removed by some error condition)
-		 */
-		smb_lock_server(server);
-		if (!list_empty(&req->rq_queue)) {
-			list_del_init(&req->rq_queue);
-			smb_rput(req);
-		}
-		smb_unlock_server(server);
-	}
-
-	if (!timeleft) {
-		PARANOIA("request [%p, mid=%d] timed out!\n",
-			 req, req->rq_mid);
-		VERBOSE("smb_com:  %02x\n", *(req->rq_header + smb_com));
-		VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
-		VERBOSE("smb_flg:  %02x\n", *(req->rq_header + smb_flg));
-		VERBOSE("smb_tid:  %04x\n", WVAL(req->rq_header, smb_tid));
-		VERBOSE("smb_pid:  %04x\n", WVAL(req->rq_header, smb_pid));
-		VERBOSE("smb_uid:  %04x\n", WVAL(req->rq_header, smb_uid));
-		VERBOSE("smb_mid:  %04x\n", WVAL(req->rq_header, smb_mid));
-		VERBOSE("smb_wct:  %02x\n", *(req->rq_header + smb_wct));
-
-		req->rq_rcls = ERRSRV;
-		req->rq_err  = ERRtimeout;
-
-		/* Just in case it was "stuck" */
-		smbiod_wake_up();
-	}
-	VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
-
-	if (req->rq_rcls != 0)
-		req->rq_errno = smb_errno(req);
-	if (signal_pending(current))
-		req->rq_errno = -ERESTARTSYS;
-	return req->rq_errno;
-}
-
-/*
- * Send a request and place it on the recvq if successfully sent.
- * Must be called with the server lock held.
- */
-static int smb_request_send_req(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	int result;
-
-	if (req->rq_bytes_sent == 0) {
-		WSET(req->rq_header, smb_tid, server->opt.tid);
-		WSET(req->rq_header, smb_pid, 1);
-		WSET(req->rq_header, smb_uid, server->opt.server_uid);
-	}
-
-	result = smb_send_request(req);
-	if (result < 0 && result != -EAGAIN)
-		goto out;
-
-	result = 0;
-	if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
-		goto out;
-
-	list_move_tail(&req->rq_queue, &server->recvq);
-	result = 1;
-out:
-	return result;
-}
-
-/*
- * Sends one request for this server. (smbiod)
- * Must be called with the server lock held.
- * Returns: <0 on error
- *           0 if no request could be completely sent
- *           1 if all data for one request was sent
- */
-int smb_request_send_server(struct smb_sb_info *server)
-{
-	struct list_head *head;
-	struct smb_request *req;
-	int result;
-
-	if (server->state != CONN_VALID)
-		return 0;
-
-	/* dequeue first request, if any */
-	req = NULL;
-	head = server->xmitq.next;
-	if (head != &server->xmitq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-	}
-	if (!req)
-		return 0;
-
-	result = smb_request_send_req(req);
-	if (result < 0) {
-		server->conn_error = result;
-		list_move(&req->rq_queue, &server->xmitq);
-		result = -EIO;
-		goto out;
-	}
-
-out:
-	return result;
-}
-
-/*
- * Try to find a request matching this "mid". Typically the first entry will
- * be the matching one.
- */
-static struct smb_request *find_request(struct smb_sb_info *server, int mid)
-{
-	struct list_head *tmp;
-	struct smb_request *req = NULL;
-
-	list_for_each(tmp, &server->recvq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		if (req->rq_mid == mid) {
-			break;
-		}
-		req = NULL;
-	}
-
-	if (!req) {
-		VERBOSE("received reply with mid %d but no request!\n",
-			WVAL(server->header, smb_mid));
-		server->rstate = SMB_RECV_DROP;
-	}
-
-	return req;
-}
-
-/*
- * Called when we have read the smb header and believe this is a response.
- */
-static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
-{
-	int hdrlen, wct;
-
-	memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
-
-	wct = *(req->rq_header + smb_wct);
-	if (wct > 20) {	
-		PARANOIA("wct too large, %d > 20\n", wct);
-		server->rstate = SMB_RECV_DROP;
-		return 0;
-	}
-
-	req->rq_resp_wct = wct;
-	hdrlen = SMB_HEADER_LEN + wct*2 + 2;
-	VERBOSE("header length: %d   smb_wct: %2d\n", hdrlen, wct);
-
-	req->rq_bytes_recvd = SMB_HEADER_LEN;
-	req->rq_rlen = hdrlen;
-	req->rq_iov[0].iov_base = req->rq_header;
-	req->rq_iov[0].iov_len  = hdrlen;
-	req->rq_iovlen = 1;
-	server->rstate = SMB_RECV_PARAM;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
-	add_recv_stats(smb_len(server->header));
-#endif
-	return 0;
-}
-
-/*
- * Reads the SMB parameters
- */
-static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
-{
-	int result;
-
-	result = smb_receive(server, req);
-	if (result < 0)
-		return result;
-	if (req->rq_bytes_recvd < req->rq_rlen)
-		return 0;
-
-	VERBOSE("result: %d   smb_bcc:  %04x\n", result,
-		WVAL(req->rq_header, SMB_HEADER_LEN +
-		     (*(req->rq_header + smb_wct) * 2)));
-
-	result = 0;
-	req->rq_iov[0].iov_base = NULL;
-	req->rq_rlen = 0;
-	if (req->rq_callback)
-		req->rq_callback(req);
-	else if (req->rq_setup_read)
-		result = req->rq_setup_read(req);
-	if (result < 0) {
-		server->rstate = SMB_RECV_DROP;
-		return result;
-	}
-
-	server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
-
-	req->rq_bytes_recvd = 0;	// recvd out of the iov
-
-	VERBOSE("rlen: %d\n", req->rq_rlen);
-	if (req->rq_rlen < 0) {
-		PARANOIA("Parameters read beyond end of packet!\n");
-		server->rstate = SMB_RECV_END;
-		return -EIO;
-	}
-	return 0;
-}
-
-/*
- * Reads the SMB data
- */
-static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
-{
-	int result;
-
-	result = smb_receive(server, req);
-	if (result < 0)
-		goto out;
-	if (req->rq_bytes_recvd < req->rq_rlen)
-		goto out;
-	server->rstate = SMB_RECV_END;
-out:
-	VERBOSE("result: %d\n", result);
-	return result;
-}
-
-/*
- * Receive a transaction2 response
- * Return: 0 if the response has been fully read
- *         1 if there are further "fragments" to read
- *        <0 if there is an error
- */
-static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
-{
-	unsigned char *inbuf;
-	unsigned int parm_disp, parm_offset, parm_count, parm_tot;
-	unsigned int data_disp, data_offset, data_count, data_tot;
-	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-
-	VERBOSE("handling trans2\n");
-
-	inbuf = req->rq_header;
-	data_tot    = WVAL(inbuf, smb_tdrcnt);
-	parm_tot    = WVAL(inbuf, smb_tprcnt);
-	parm_disp   = WVAL(inbuf, smb_prdisp);
-	parm_offset = WVAL(inbuf, smb_proff);
-	parm_count  = WVAL(inbuf, smb_prcnt);
-	data_disp   = WVAL(inbuf, smb_drdisp);
-	data_offset = WVAL(inbuf, smb_droff);
-	data_count  = WVAL(inbuf, smb_drcnt);
-
-	/* Modify offset for the split header/buffer we use */
-	if (data_count || data_offset) {
-		if (unlikely(data_offset < hdrlen))
-			goto out_bad_data;
-		else
-			data_offset -= hdrlen;
-	}
-	if (parm_count || parm_offset) {
-		if (unlikely(parm_offset < hdrlen))
-			goto out_bad_parm;
-		else
-			parm_offset -= hdrlen;
-	}
-
-	if (parm_count == parm_tot && data_count == data_tot) {
-		/*
-		 * This packet has all the trans2 data.
-		 *
-		 * We setup the request so that this will be the common
-		 * case. It may be a server error to not return a
-		 * response that fits.
-		 */
-		VERBOSE("single trans2 response  "
-			"dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
-			data_count, parm_count,
-			data_offset, parm_offset);
-		req->rq_ldata = data_count;
-		req->rq_lparm = parm_count;
-		req->rq_data = req->rq_buffer + data_offset;
-		req->rq_parm = req->rq_buffer + parm_offset;
-		if (unlikely(parm_offset + parm_count > req->rq_rlen))
-			goto out_bad_parm;
-		if (unlikely(data_offset + data_count > req->rq_rlen))
-			goto out_bad_data;
-		return 0;
-	}
-
-	VERBOSE("multi trans2 response  "
-		"frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
-		req->rq_fragment,
-		data_count, parm_count,
-		data_offset, parm_offset);
-
-	if (!req->rq_fragment) {
-		int buf_len;
-
-		/* We got the first trans2 fragment */
-		req->rq_fragment = 1;
-		req->rq_total_data = data_tot;
-		req->rq_total_parm = parm_tot;
-		req->rq_ldata = 0;
-		req->rq_lparm = 0;
-
-		buf_len = data_tot + parm_tot;
-		if (buf_len > SMB_MAX_PACKET_SIZE)
-			goto out_too_long;
-
-		req->rq_trans2bufsize = buf_len;
-		req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
-		if (!req->rq_trans2buffer)
-			goto out_no_mem;
-
-		req->rq_parm = req->rq_trans2buffer;
-		req->rq_data = req->rq_trans2buffer + parm_tot;
-	} else if (unlikely(req->rq_total_data < data_tot ||
-			    req->rq_total_parm < parm_tot))
-		goto out_data_grew;
-
-	if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
-		     parm_offset + parm_count > req->rq_rlen))
-		goto out_bad_parm;
-	if (unlikely(data_disp + data_count > req->rq_total_data ||
-		     data_offset + data_count > req->rq_rlen))
-		goto out_bad_data;
-
-	inbuf = req->rq_buffer;
-	memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
-	memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
-
-	req->rq_ldata += data_count;
-	req->rq_lparm += parm_count;
-
-	/*
-	 * Check whether we've received all of the data. Note that
-	 * we use the packet totals -- total lengths might shrink!
-	 */
-	if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
-		req->rq_ldata = data_tot;
-		req->rq_lparm = parm_tot;
-		return 0;
-	}
-	return 1;
-
-out_too_long:
-	printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
-		data_tot, parm_tot);
-	goto out_EIO;
-out_no_mem:
-	printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
-	       req->rq_trans2bufsize);
-	req->rq_errno = -ENOMEM;
-	goto out;
-out_data_grew:
-	printk(KERN_ERR "smb_trans2: data/params grew!\n");
-	goto out_EIO;
-out_bad_parm:
-	printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
-	       parm_disp, parm_count, parm_tot, parm_offset);
-	goto out_EIO;
-out_bad_data:
-	printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
-	       data_disp, data_count, data_tot, data_offset);
-out_EIO:
-	req->rq_errno = -EIO;
-out:
-	return req->rq_errno;
-}
-
-/*
- * State machine for receiving responses. We handle the fact that we can't
- * read the full response in one try by having states telling us how much we
- * have read.
- *
- * Must be called with the server lock held (only called from smbiod).
- *
- * Return: <0 on error
- */
-int smb_request_recv(struct smb_sb_info *server)
-{
-	struct smb_request *req = NULL;
-	int result = 0;
-
-	if (smb_recv_available(server) <= 0)
-		return 0;
-
-	VERBOSE("state: %d\n", server->rstate);
-	switch (server->rstate) {
-	case SMB_RECV_DROP:
-		result = smb_receive_drop(server);
-		if (result < 0)
-			break;
-		if (server->rstate == SMB_RECV_DROP)
-			break;
-		server->rstate = SMB_RECV_START;
-		/* fallthrough */
-	case SMB_RECV_START:
-		server->smb_read = 0;
-		server->rstate = SMB_RECV_HEADER;
-		/* fallthrough */
-	case SMB_RECV_HEADER:
-		result = smb_receive_header(server);
-		if (result < 0)
-			break;
-		if (server->rstate == SMB_RECV_HEADER)
-			break;
-		if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
-			server->rstate = SMB_RECV_REQUEST;
-			break;
-		}
-		if (server->rstate != SMB_RECV_HCOMPLETE)
-			break;
-		/* fallthrough */
-	case SMB_RECV_HCOMPLETE:
-		req = find_request(server, WVAL(server->header, smb_mid));
-		if (!req)
-			break;
-		smb_init_request(server, req);
-		req->rq_rcls = *(req->rq_header + smb_rcls);
-		req->rq_err  = WVAL(req->rq_header, smb_err);
-		if (server->rstate != SMB_RECV_PARAM)
-			break;
-		/* fallthrough */
-	case SMB_RECV_PARAM:
-		if (!req)
-			req = find_request(server,WVAL(server->header,smb_mid));
-		if (!req)
-			break;
-		result = smb_recv_param(server, req);
-		if (result < 0)
-			break;
-		if (server->rstate != SMB_RECV_DATA)
-			break;
-		/* fallthrough */
-	case SMB_RECV_DATA:
-		if (!req)
-			req = find_request(server,WVAL(server->header,smb_mid));
-		if (!req)
-			break;
-		result = smb_recv_data(server, req);
-		if (result < 0)
-			break;
-		break;
-
-		/* We should never be called with any of these states */
-	case SMB_RECV_END:
-	case SMB_RECV_REQUEST:
-		BUG();
-	}
-
-	if (result < 0) {
-		/* We saw an error */
-		return result;
-	}
-
-	if (server->rstate != SMB_RECV_END)
-		return 0;
-
-	result = 0;
-	if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
-		result = smb_recv_trans2(server, req);
-
-	/*
-	 * Response completely read. Drop any extra bytes sent by the server.
-	 * (Yes, servers sometimes add extra bytes to responses)
-	 */
-	VERBOSE("smb_len: %d   smb_read: %d\n",
-		server->smb_len, server->smb_read);
-	if (server->smb_read < server->smb_len)
-		smb_receive_drop(server);
-
-	server->rstate = SMB_RECV_START;
-
-	if (!result) {
-		list_del_init(&req->rq_queue);
-		req->rq_flags |= SMB_REQ_RECEIVED;
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-	return 0;
-}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb21451e7c9..000000000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/uio.h>
-#include <linux/wait.h>
-
-struct smb_request {
-	struct list_head rq_queue;	/* recvq or xmitq for the server */
-
-	atomic_t rq_count;
-
-	wait_queue_head_t rq_wait;
-	int rq_flags;
-	int rq_mid;	/* multiplex ID, set by request.c */
-
-	struct smb_sb_info *rq_server;
-
-	/* header + word count + parameter words + byte count */
-	unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
-
-	int rq_bufsize;
-	unsigned char *rq_buffer;
-
-	/* FIXME: this is not good enough for merging IO requests. */
-	unsigned char *rq_page;
-	int rq_rsize;
-
-	int rq_resp_wct;
-	int rq_resp_bcc;
-
-	int rq_rlen;
-	int rq_bytes_recvd;
-
-	int rq_slen;
-	int rq_bytes_sent;
-
-	int rq_iovlen;
-	struct kvec rq_iov[4];
-
-	int (*rq_setup_read) (struct smb_request *);
-	void (*rq_callback) (struct smb_request *);
-
-	/* ------ trans2 stuff ------ */
-
-	u16 rq_trans2_command;	/* 0 if not a trans2 request */
-	unsigned int rq_ldata;
-	unsigned char *rq_data;
-	unsigned int rq_lparm;
-	unsigned char *rq_parm;
-
-	int rq_fragment;
-	u32 rq_total_data;
-	u32 rq_total_parm;
-	int rq_trans2bufsize;
-	unsigned char *rq_trans2buffer;
-
-	/* ------ response ------ */
-
-	unsigned short rq_rcls;
-	unsigned short rq_err;
-	int rq_errno;
-};
-
-#define SMB_REQ_STATIC		0x0001	/* rq_buffer is static */
-#define SMB_REQ_NORETRY		0x0002	/* request is invalid after retry */
-
-#define SMB_REQ_TRANSMITTED	0x4000	/* all data has been sent */
-#define SMB_REQ_RECEIVED	0x8000	/* reply received, smbiod is done */
-
-#define xSMB_REQ_NOREPLY	0x0004	/* we don't want the reply (if any) */
-#define xSMB_REQ_NORECEIVER	0x0008	/* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5dd755..000000000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Defines some debug macros for smbfs.
- */
-
-/* This makes a dentry parent/child name pair. Useful for debugging printk's */
-#define DENTRY_PATH(dentry) \
-	(dentry)->d_parent->d_name.name,(dentry)->d_name.name
-
-/*
- * safety checks that should never happen ???
- * these are normally enabled.
- */
-#ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
-#else
-# define PARANOIA(f, a...) do { ; } while(0)
-#endif
-
-/* lots of debug messages */
-#ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-# define VERBOSE(f, a...) do { ; } while(0)
-#endif
-
-/*
- * "normal" debug messages, but not with a normal DEBUG define ... way
- * too common name.
- */
-#ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-#define DEBUG1(f, a...) do { ; } while(0)
-#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a924f10a..000000000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- *  smbiod.c
- *
- *  Copyright (C) 2000, Charles Loep / Corel Corp.
- *  Copyright (C) 2001, Urban Widmark
- */
-
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/kthread.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-enum smbiod_state {
-	SMBIOD_DEAD,
-	SMBIOD_STARTING,
-	SMBIOD_RUNNING,
-};
-
-static enum smbiod_state smbiod_state = SMBIOD_DEAD;
-static struct task_struct *smbiod_thread;
-static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
-static LIST_HEAD(smb_servers);
-static DEFINE_SPINLOCK(servers_lock);
-
-#define SMBIOD_DATA_READY	(1<<0)
-static unsigned long smbiod_flags;
-
-static int smbiod(void *);
-static int smbiod_start(void);
-
-/*
- * called when there's work for us to do
- */
-void smbiod_wake_up(void)
-{
-	if (smbiod_state == SMBIOD_DEAD)
-		return;
-	set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-	wake_up_interruptible(&smbiod_wait);
-}
-
-/*
- * start smbiod if none is running
- */
-static int smbiod_start(void)
-{
-	struct task_struct *tsk;
-	int err = 0;
-
-	if (smbiod_state != SMBIOD_DEAD)
-		return 0;
-	smbiod_state = SMBIOD_STARTING;
-	__module_get(THIS_MODULE);
-	spin_unlock(&servers_lock);
-	tsk = kthread_run(smbiod, NULL, "smbiod");
-	if (IS_ERR(tsk)) {
-		err = PTR_ERR(tsk);
-		module_put(THIS_MODULE);
-	}
-
-	spin_lock(&servers_lock);
-	if (err < 0) {
-		smbiod_state = SMBIOD_DEAD;
-		smbiod_thread = NULL;
-	} else {
-		smbiod_state = SMBIOD_RUNNING;
-		smbiod_thread = tsk;
-	}
-	return err;
-}
-
-/*
- * register a server & start smbiod if necessary
- */
-int smbiod_register_server(struct smb_sb_info *server)
-{
-	int ret;
-	spin_lock(&servers_lock);
-	list_add(&server->entry, &smb_servers);
-	VERBOSE("%p\n", server);
-	ret = smbiod_start();
-	spin_unlock(&servers_lock);
-	return ret;
-}
-
-/*
- * Unregister a server
- * Must be called with the server lock held.
- */
-void smbiod_unregister_server(struct smb_sb_info *server)
-{
-	spin_lock(&servers_lock);
-	list_del_init(&server->entry);
-	VERBOSE("%p\n", server);
-	spin_unlock(&servers_lock);
-
-	smbiod_wake_up();
-	smbiod_flush(server);
-}
-
-void smbiod_flush(struct smb_sb_info *server)
-{
-	struct list_head *tmp, *n;
-	struct smb_request *req;
-
-	list_for_each_safe(tmp, n, &server->xmitq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-	list_for_each_safe(tmp, n, &server->recvq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-}
-
-/*
- * Wake up smbmount and make it reconnect to the server.
- * This must be called with the server locked.
- *
- * FIXME: add smbconnect version to this
- */
-int smbiod_retry(struct smb_sb_info *server)
-{
-	struct list_head *head;
-	struct smb_request *req;
-	struct pid *pid = get_pid(server->conn_pid);
-	int result = 0;
-
-	VERBOSE("state: %d\n", server->state);
-	if (server->state == CONN_VALID || server->state == CONN_RETRYING)
-		goto out;
-
-	smb_invalidate_inodes(server);
-
-	/*
-	 * Some requests are meaningless after a retry, so we abort them.
-	 * One example are all requests using 'fileid' since the files are
-	 * closed on retry.
-	 */
-	head = server->xmitq.next;
-	while (head != &server->xmitq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-		head = head->next;
-
-		req->rq_bytes_sent = 0;
-		if (req->rq_flags & SMB_REQ_NORETRY) {
-			VERBOSE("aborting request %p on xmitq\n", req);
-			req->rq_errno = -EIO;
-			list_del_init(&req->rq_queue);
-			smb_rput(req);
-			wake_up_interruptible(&req->rq_wait);
-		}
-	}
-
-	/*
-	 * FIXME: test the code for retrying request we already sent
-	 */
-	head = server->recvq.next;
-	while (head != &server->recvq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-		head = head->next;
-#if 0
-		if (req->rq_flags & SMB_REQ_RETRY) {
-			/* must move the request to the xmitq */
-			VERBOSE("retrying request %p on recvq\n", req);
-			list_move(&req->rq_queue, &server->xmitq);
-			continue;
-		}
-#endif
-
-		VERBOSE("aborting request %p on recvq\n", req);
-		/* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-
-	smb_close_socket(server);
-
-	if (!pid) {
-		/* FIXME: this is fatal, umount? */
-		printk(KERN_ERR "smb_retry: no connection process\n");
-		server->state = CONN_RETRIED;
-		goto out;
-	}
-
-	/*
-	 * Change state so that only one retry per server will be started.
-	 */
-	server->state = CONN_RETRYING;
-
-	/*
-	 * Note: use the "priv" flag, as a user process may need to reconnect.
-	 */
-	result = kill_pid(pid, SIGUSR1, 1);
-	if (result) {
-		/* FIXME: this is most likely fatal, umount? */
-		printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
-		goto out;
-	}
-	VERBOSE("signalled pid %d\n", pid_nr(pid));
-
-	/* FIXME: The retried requests should perhaps get a "time boost". */
-
-out:
-	put_pid(pid);
-	return result;
-}
-
-/*
- * Currently handles lockingX packets.
- */
-static void smbiod_handle_request(struct smb_sb_info *server)
-{
-	PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
-	server->rstate = SMB_RECV_DROP;
-}
-
-/*
- * Do some IO for one server.
- */
-static void smbiod_doio(struct smb_sb_info *server)
-{
-	int result;
-	int maxwork = 7;
-
-	if (server->state != CONN_VALID)
-		goto out;
-
-	do {
-		result = smb_request_recv(server);
-		if (result < 0) {
-			server->state = CONN_INVALID;
-			smbiod_retry(server);
-			goto out;	/* reconnecting is slow */
-		} else if (server->rstate == SMB_RECV_REQUEST)
-			smbiod_handle_request(server);
-	} while (result > 0 && maxwork-- > 0);
-
-	/*
-	 * If there is more to read then we want to be sure to wake up again.
-	 */
-	if (server->state != CONN_VALID)
-		goto out;
-	if (smb_recv_available(server) > 0)
-		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-	do {
-		result = smb_request_send_server(server);
-		if (result < 0) {
-			server->state = CONN_INVALID;
-			smbiod_retry(server);
-			goto out;	/* reconnecting is slow */
-		}
-	} while (result > 0);
-
-	/*
-	 * If the last request was not sent out we want to wake up again.
-	 */
-	if (!list_empty(&server->xmitq))
-		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-out:
-	return;
-}
-
-/*
- * smbiod kernel thread
- */
-static int smbiod(void *unused)
-{
-	VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
-
-	for (;;) {
-		struct smb_sb_info *server;
-		struct list_head *pos, *n;
-
-		/* FIXME: Use poll? */
-		wait_event_interruptible(smbiod_wait,
-			 test_bit(SMBIOD_DATA_READY, &smbiod_flags));
-		if (signal_pending(current)) {
-			spin_lock(&servers_lock);
-			smbiod_state = SMBIOD_DEAD;
-			spin_unlock(&servers_lock);
-			break;
-		}
-
-		clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-		spin_lock(&servers_lock);
-		if (list_empty(&smb_servers)) {
-			smbiod_state = SMBIOD_DEAD;
-			spin_unlock(&servers_lock);
-			break;
-		}
-
-		list_for_each_safe(pos, n, &smb_servers) {
-			server = list_entry(pos, struct smb_sb_info, entry);
-			VERBOSE("checking server %p\n", server);
-
-			if (server->state == CONN_VALID) {
-				spin_unlock(&servers_lock);
-
-				smb_lock_server(server);
-				smbiod_doio(server);
-				smb_unlock_server(server);
-
-				spin_lock(&servers_lock);
-			}
-		}
-		spin_unlock(&servers_lock);
-	}
-
-	VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
-	module_put_and_exit(0);
-}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4deebd0..000000000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- *  sock.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/fcntl.h>
-#include <linux/file.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/mm.h>
-#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <net/scm.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb.h>
-#include <linux/smbno.h>
-
-#include <asm/uaccess.h>
-#include <asm/ioctls.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-static int
-_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
-{
-	struct kvec iov = {ubuf, size};
-	struct msghdr msg = {.msg_flags = flags};
-	msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
-	return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
-}
-
-/*
- * Return the server this socket belongs to
- */
-static struct smb_sb_info *
-server_from_socket(struct socket *socket)
-{
-	return socket->sk->sk_user_data;
-}
-
-/*
- * Called when there is data on the socket.
- */
-void
-smb_data_ready(struct sock *sk, int len)
-{
-	struct smb_sb_info *server = server_from_socket(sk->sk_socket);
-	void (*data_ready)(struct sock *, int) = server->data_ready;
-
-	data_ready(sk, len);
-	VERBOSE("(%p, %d)\n", sk, len);
-	smbiod_wake_up();
-}
-
-int
-smb_valid_socket(struct inode * inode)
-{
-	return (inode && S_ISSOCK(inode->i_mode) && 
-		SOCKET_I(inode)->type == SOCK_STREAM);
-}
-
-static struct socket *
-server_sock(struct smb_sb_info *server)
-{
-	struct file *file;
-
-	if (server && (file = server->sock_file))
-	{
-#ifdef SMBFS_PARANOIA
-		if (!smb_valid_socket(file->f_path.dentry->d_inode))
-			PARANOIA("bad socket!\n");
-#endif
-		return SOCKET_I(file->f_path.dentry->d_inode);
-	}
-	return NULL;
-}
-
-void
-smb_close_socket(struct smb_sb_info *server)
-{
-	struct file * file = server->sock_file;
-
-	if (file) {
-		struct socket *sock = server_sock(server);
-
-		VERBOSE("closing socket %p\n", sock);
-		sock->sk->sk_data_ready = server->data_ready;
-		server->sock_file = NULL;
-		fput(file);
-	}
-}
-
-static int
-smb_get_length(struct socket *socket, unsigned char *header)
-{
-	int result;
-
-	result = _recvfrom(socket, header, 4, MSG_PEEK);
-	if (result == -EAGAIN)
-		return -ENODATA;
-	if (result < 0) {
-		PARANOIA("recv error = %d\n", -result);
-		return result;
-	}
-	if (result < 4)
-		return -ENODATA;
-
-	switch (header[0]) {
-	case 0x00:
-	case 0x82:
-		break;
-
-	case 0x85:
-		DEBUG1("Got SESSION KEEP ALIVE\n");
-		_recvfrom(socket, header, 4, 0);	/* read away */
-		return -ENODATA;
-
-	default:
-		PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
-		return -EIO;
-	}
-
-	/* The length in the RFC NB header is the raw data length */
-	return smb_len(header);
-}
-
-int
-smb_recv_available(struct smb_sb_info *server)
-{
-	mm_segment_t oldfs;
-	int avail, err;
-	struct socket *sock = server_sock(server);
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
-	set_fs(oldfs);
-	return (err >= 0) ? avail : err;
-}
-
-/*
- * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
- */
-static int
-smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
-{
-	struct kvec *iv = *data;
-	int i;
-	int len;
-
-	/*
-	 *	Eat any sent kvecs
-	 */
-	while (iv->iov_len <= amount) {
-		amount -= iv->iov_len;
-		iv++;
-		(*num)--;
-	}
-
-	/*
-	 *	And chew down the partial one
-	 */
-	vec[0].iov_len = iv->iov_len-amount;
-	vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
-	iv++;
-
-	len = vec[0].iov_len;
-
-	/*
-	 *	And copy any others
-	 */
-	for (i = 1; i < *num; i++) {
-		vec[i] = *iv++;
-		len += vec[i].iov_len;
-	}
-
-	*data = vec;
-	return len;
-}
-
-/*
- * smb_receive_header
- * Only called by the smbiod thread.
- */
-int
-smb_receive_header(struct smb_sb_info *server)
-{
-	struct socket *sock;
-	int result = 0;
-	unsigned char peek_buf[4];
-
-	result = -EIO; 
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	if (!server->smb_read) {
-		result = smb_get_length(sock, peek_buf);
-		if (result < 0) {
-			if (result == -ENODATA)
-				result = 0;
-			goto out;
-		}
-		server->smb_len = result + 4;
-
-		if (server->smb_len < SMB_HEADER_LEN) {
-			PARANOIA("short packet: %d\n", result);
-			server->rstate = SMB_RECV_DROP;
-			result = -EIO;
-			goto out;
-		}
-		if (server->smb_len > SMB_MAX_PACKET_SIZE) {
-			PARANOIA("long packet: %d\n", result);
-			server->rstate = SMB_RECV_DROP;
-			result = -EIO;
-			goto out;
-		}
-	}
-
-	result = _recvfrom(sock, server->header + server->smb_read,
-			   SMB_HEADER_LEN - server->smb_read, 0);
-	VERBOSE("_recvfrom: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	server->smb_read += result;
-
-	if (server->smb_read == SMB_HEADER_LEN)
-		server->rstate = SMB_RECV_HCOMPLETE;
-out:
-	return result;
-}
-
-static char drop_buffer[PAGE_SIZE];
-
-/*
- * smb_receive_drop - read and throw away the data
- * Only called by the smbiod thread.
- *
- * FIXME: we are in the kernel, could we just tell the socket that we want
- * to drop stuff from the buffer?
- */
-int
-smb_receive_drop(struct smb_sb_info *server)
-{
-	struct socket *sock;
-	unsigned int flags;
-	struct kvec iov;
-	struct msghdr msg;
-	int rlen = smb_len(server->header) - server->smb_read + 4;
-	int result = -EIO;
-
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
-	iov.iov_base = drop_buffer;
-	iov.iov_len = PAGE_SIZE;
-	msg.msg_flags = flags;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-
-	result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
-
-	VERBOSE("read: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	server->smb_read += result;
-
-	if (server->smb_read >= server->smb_len)
-		server->rstate = SMB_RECV_END;
-
-out:
-	return result;
-}
-
-/*
- * smb_receive
- * Only called by the smbiod thread.
- */
-int
-smb_receive(struct smb_sb_info *server, struct smb_request *req)
-{
-	struct socket *sock;
-	unsigned int flags;
-	struct kvec iov[4];
-	struct kvec *p = req->rq_iov;
-	size_t num = req->rq_iovlen;
-	struct msghdr msg;
-	int rlen;
-	int result = -EIO;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
-	msg.msg_flags = flags;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-
-	/* Dont repeat bytes and count available bufferspace */
-	rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
-			(req->rq_rlen - req->rq_bytes_recvd));
-
-	result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
-
-	VERBOSE("read: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	req->rq_bytes_recvd += result;
-	server->smb_read += result;
-
-out:
-	return result;
-}
-
-/*
- * Try to send a SMB request. This may return after sending only parts of the
- * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
- *
- * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
- */
-int
-smb_send_request(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	struct socket *sock;
-	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
-        int slen = req->rq_slen - req->rq_bytes_sent;
-	int result = -EIO;
-	struct kvec iov[4];
-	struct kvec *p = req->rq_iov;
-	size_t num = req->rq_iovlen;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	/* Dont repeat bytes */
-	if (req->rq_bytes_sent)
-		smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
-
-	result = kernel_sendmsg(sock, &msg, p, num, slen);
-
-	if (result >= 0) {
-		req->rq_bytes_sent += result;
-		if (req->rq_bytes_sent >= req->rq_slen)
-			req->rq_flags |= SMB_REQ_TRANSMITTED;
-	}
-out:
-	return result;
-}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909bd469..000000000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  symlink.c
- *
- *  Copyright (C) 2002 by John Newbigin
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-#include <linux/namei.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
-{
-	DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
-
-	return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
-}
-
-static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *link = __getname();
-	DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
-
-	if (!link) {
-		link = ERR_PTR(-ENOMEM);
-	} else {
-		int len = smb_proc_read_link(server_from_dentry(dentry),
-						dentry, link, PATH_MAX - 1);
-		if (len < 0) {
-			__putname(link);
-			link = ERR_PTR(len);
-		} else {
-			link[len] = 0;
-		}
-	}
-	nd_set_link(nd, link);
-	return NULL;
-}
-
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		__putname(s);
-}
-
-const struct inode_operations smb_link_inode_operations =
-{
-	.readlink	= generic_readlink,
-	.follow_link	= smb_follow_link,
-	.put_link	= smb_put_link,
-};
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429ff..98d520d371ed 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -326,10 +326,6 @@ header-y += serio.h
 header-y += shm.h
 header-y += signal.h
 header-y += signalfd.h
-header-y += smb.h
-header-y += smb_fs.h
-header-y += smb_mount.h
-header-y += smbno.h
 header-y += snmp.h
 header-y += socket.h
 header-y += sockios.h
diff --git a/include/linux/smb.h b/include/linux/smb.h
deleted file mode 100644
index 82fefddc5987..000000000000
--- a/include/linux/smb.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *  smb.h
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- */
-
-#ifndef _LINUX_SMB_H
-#define _LINUX_SMB_H
-
-#include <linux/types.h>
-#include <linux/magic.h>
-#ifdef __KERNEL__
-#include <linux/time.h>
-#endif
-
-enum smb_protocol { 
-	SMB_PROTOCOL_NONE, 
-	SMB_PROTOCOL_CORE, 
-	SMB_PROTOCOL_COREPLUS, 
-	SMB_PROTOCOL_LANMAN1, 
-	SMB_PROTOCOL_LANMAN2, 
-	SMB_PROTOCOL_NT1 
-};
-
-enum smb_case_hndl {
-	SMB_CASE_DEFAULT,
-	SMB_CASE_LOWER,
-	SMB_CASE_UPPER
-};
-
-struct smb_dskattr {
-        __u16 total;
-        __u16 allocblocks;
-        __u16 blocksize;
-        __u16 free;
-};
-
-struct smb_conn_opt {
-
-        /* The socket */
-	unsigned int fd;
-
-	enum smb_protocol protocol;
-	enum smb_case_hndl case_handling;
-
-	/* Connection-Options */
-
-	__u32              max_xmit;
-	__u16              server_uid;
-	__u16              tid;
-
-        /* The following are LANMAN 1.0 options */
-        __u16              secmode;
-        __u16              maxmux;
-        __u16              maxvcs;
-        __u16              rawmode;
-        __u32              sesskey;
-
-	/* The following are NT LM 0.12 options */
-	__u32              maxraw;
-	__u32              capabilities;
-	__s16              serverzone;
-};
-
-#ifdef __KERNEL__
-
-#define SMB_NLS_MAXNAMELEN 20
-struct smb_nls_codepage {
-	char local_name[SMB_NLS_MAXNAMELEN];
-	char remote_name[SMB_NLS_MAXNAMELEN];
-};
-
-
-#define SMB_MAXNAMELEN 255
-#define SMB_MAXPATHLEN 1024
-
-/*
- * Contains all relevant data on a SMB networked file.
- */
-struct smb_fattr {
-	__u16 attr;
-
-	unsigned long	f_ino;
-	umode_t		f_mode;
-	nlink_t		f_nlink;
-	uid_t		f_uid;
-	gid_t		f_gid;
-	dev_t		f_rdev;
-	loff_t		f_size;
-	struct timespec	f_atime;
-	struct timespec f_mtime;
-	struct timespec f_ctime;
-	unsigned long	f_blocks;
-	int		f_unix;
-};
-
-enum smb_conn_state {
-	CONN_VALID,		/* everything's fine */
-	CONN_INVALID,		/* Something went wrong, but did not
-				   try to reconnect yet. */
-	CONN_RETRIED,		/* Tried a reconnection, but was refused */
-	CONN_RETRYING		/* Currently trying to reconnect */
-};
-
-#define SMB_HEADER_LEN   37     /* includes everything up to, but not
-                                 * including smb_bcc */
-
-#define SMB_INITIAL_PACKET_SIZE		4000
-#define SMB_MAX_PACKET_SIZE		32768
-
-/* reserve this much space for trans2 parameters. Shouldn't have to be more
-   than 10 or so, but OS/2 seems happier like this. */
-#define SMB_TRANS2_MAX_PARAM 64
-
-#endif
-#endif
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
deleted file mode 100644
index 923cd8a247b1..000000000000
--- a/include/linux/smb_fs.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  smb_fs.h
- *
- *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- */
-
-#ifndef _LINUX_SMB_FS_H
-#define _LINUX_SMB_FS_H
-
-#include <linux/smb.h>
-
-/*
- * ioctl commands
- */
-#define	SMB_IOC_GETMOUNTUID		_IOR('u', 1, __kernel_old_uid_t)
-#define SMB_IOC_NEWCONN                 _IOW('u', 2, struct smb_conn_opt)
-
-/* __kernel_uid_t can never change, so we have to use __kernel_uid32_t */
-#define	SMB_IOC_GETMOUNTUID32		_IOR('u', 3, __kernel_uid32_t)
-
-
-#ifdef __KERNEL__
-#include <linux/smb_fs_i.h>
-#include <linux/smb_fs_sb.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/vmalloc.h>
-#include <linux/smb_mount.h>
-#include <linux/jiffies.h>
-#include <asm/unaligned.h>
-
-static inline struct smb_sb_info *SMB_SB(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-static inline struct smb_inode_info *SMB_I(struct inode *inode)
-{
-	return container_of(inode, struct smb_inode_info, vfs_inode);
-}
-
-/* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
-#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
-#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
-
-#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
-#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
-#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
-
-/* where to find the base of the SMB packet proper */
-#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
-
-/*
- * Flags for the in-memory inode
- */
-#define SMB_F_LOCALWRITE	0x02	/* file modified locally */
-
-
-/* NT1 protocol capability bits */
-#define SMB_CAP_RAW_MODE         0x00000001
-#define SMB_CAP_MPX_MODE         0x00000002
-#define SMB_CAP_UNICODE          0x00000004
-#define SMB_CAP_LARGE_FILES      0x00000008
-#define SMB_CAP_NT_SMBS          0x00000010
-#define SMB_CAP_RPC_REMOTE_APIS  0x00000020
-#define SMB_CAP_STATUS32         0x00000040
-#define SMB_CAP_LEVEL_II_OPLOCKS 0x00000080
-#define SMB_CAP_LOCK_AND_READ    0x00000100
-#define SMB_CAP_NT_FIND          0x00000200
-#define SMB_CAP_DFS              0x00001000
-#define SMB_CAP_LARGE_READX      0x00004000
-#define SMB_CAP_LARGE_WRITEX     0x00008000
-#define SMB_CAP_UNIX             0x00800000	/* unofficial ... */
-
-
-/*
- * This is the time we allow an inode, dentry or dir cache to live. It is bad
- * for performance to have shorter ttl on an inode than on the cache. It can
- * cause refresh on each inode for a dir listing ... one-by-one
- */
-#define SMB_MAX_AGE(server) (((server)->mnt->ttl * HZ) / 1000)
-
-static inline void
-smb_age_dentry(struct smb_sb_info *server, struct dentry *dentry)
-{
-	dentry->d_time = jiffies - SMB_MAX_AGE(server);
-}
-
-struct smb_cache_head {
-	time_t		mtime;	/* unused */
-	unsigned long	time;	/* cache age */
-	unsigned long	end;	/* last valid fpos in cache */
-	int		eof;
-};
-
-#define SMB_DIRCACHE_SIZE	((int)(PAGE_CACHE_SIZE/sizeof(struct dentry *)))
-union smb_dir_cache {
-	struct smb_cache_head   head;
-	struct dentry           *dentry[SMB_DIRCACHE_SIZE];
-};
-
-#define SMB_FIRSTCACHE_SIZE	((int)((SMB_DIRCACHE_SIZE * \
-	sizeof(struct dentry *) - sizeof(struct smb_cache_head)) / \
-	sizeof(struct dentry *)))
-
-#define SMB_DIRCACHE_START      (SMB_DIRCACHE_SIZE - SMB_FIRSTCACHE_SIZE)
-
-struct smb_cache_control {
-	struct  smb_cache_head		head;
-	struct  page			*page;
-	union   smb_dir_cache		*cache;
-	unsigned long			fpos, ofs;
-	int				filled, valid, idx;
-};
-
-#define SMB_OPS_NUM_STATIC	5
-struct smb_ops {
-	int (*read)(struct inode *inode, loff_t offset, int count,
-		    char *data);
-	int (*write)(struct inode *inode, loff_t offset, int count, const
-		     char *data);
-	int (*readdir)(struct file *filp, void *dirent, filldir_t filldir,
-		       struct smb_cache_control *ctl);
-
-	int (*getattr)(struct smb_sb_info *server, struct dentry *dir,
-		       struct smb_fattr *fattr);
-	/* int (*setattr)(...); */      /* setattr is really icky! */
-
-	int (*truncate)(struct inode *inode, loff_t length);
-
-
-	/* --- --- --- end of "static" entries --- --- --- */
-
-	int (*convert)(unsigned char *output, int olen,
-		       const unsigned char *input, int ilen,
-		       struct nls_table *nls_from,
-		       struct nls_table *nls_to);
-};
-
-static inline int
-smb_is_open(struct inode *i)
-{
-	return (SMB_I(i)->open == server_from_inode(i)->generation);
-}
-
-extern void smb_install_null_ops(struct smb_ops *);
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_SMB_FS_H */
diff --git a/include/linux/smb_fs_i.h b/include/linux/smb_fs_i.h
deleted file mode 100644
index 8ccf4eca2c3d..000000000000
--- a/include/linux/smb_fs_i.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  smb_fs_i.h
- *
- *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- */
-
-#ifndef _LINUX_SMB_FS_I
-#define _LINUX_SMB_FS_I
-
-#include <linux/types.h>
-#include <linux/fs.h>
-
-/*
- * smb fs inode data (in memory only)
- */
-struct smb_inode_info {
-
-	/*
-	 * file handles are local to a connection. A file is open if
-	 * (open == generation).
-	 */
-        unsigned int open;	/* open generation */
-	__u16 fileid;		/* What id to handle a file with? */
-	__u16 attr;		/* Attribute fields, DOS value */
-
-	__u16 access;		/* Access mode */
-	__u16 flags;
-	unsigned long oldmtime;	/* last time refreshed */
-	unsigned long closed;	/* timestamp when closed */
-	unsigned openers;	/* number of fileid users */
-
-	struct inode vfs_inode;	/* must be at the end */
-};
-
-#endif
diff --git a/include/linux/smb_fs_sb.h b/include/linux/smb_fs_sb.h
deleted file mode 100644
index bb947dd1fba9..000000000000
--- a/include/linux/smb_fs_sb.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  smb_fs_sb.h
- *
- *  Copyright (C) 1995 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- */
-
-#ifndef _SMB_FS_SB
-#define _SMB_FS_SB
-
-#include <linux/types.h>
-#include <linux/backing-dev.h>
-#include <linux/smb.h>
-
-/*
- * Upper limit on the total number of active smb_request structs.
- */
-#define MAX_REQUEST_HARD       256
-
-enum smb_receive_state {
-	SMB_RECV_START,		/* No data read, looking for length + sig */
-	SMB_RECV_HEADER,	/* Reading the header data */
-	SMB_RECV_HCOMPLETE,	/* Done with the header */
-	SMB_RECV_PARAM,		/* Reading parameter words */
-	SMB_RECV_DATA,		/* Reading data bytes */
-	SMB_RECV_END,		/* End of request */
-	SMB_RECV_DROP,		/* Dropping this SMB */
-	SMB_RECV_REQUEST,	/* Received a request and not a reply */
-};
-
-/* structure access macros */
-#define server_from_inode(inode) SMB_SB((inode)->i_sb)
-#define server_from_dentry(dentry) SMB_SB((dentry)->d_sb)
-#define SB_of(server) ((server)->super_block)
-
-struct smb_sb_info {
-	/* List of all smbfs superblocks */
-	struct list_head entry;
-
-        enum smb_conn_state state;
-	struct file * sock_file;
-	int conn_error;
-	enum smb_receive_state rstate;
-
-	atomic_t nr_requests;
-	struct list_head xmitq;
-	struct list_head recvq;
-	u16 mid;
-
-        struct smb_mount_data_kernel *mnt;
-
-	/* Connections are counted. Each time a new socket arrives,
-	 * generation is incremented.
-	 */
-	unsigned int generation;
-	struct pid *conn_pid;
-	struct smb_conn_opt opt;
-	wait_queue_head_t conn_wq;
-	int conn_complete;
-	struct semaphore sem;
-
-	unsigned char      header[SMB_HEADER_LEN + 20*2 + 2];
-	u32                header_len;
-	u32                smb_len;
-	u32                smb_read;
-
-        /* We use our own data_ready callback, but need the original one */
-        void *data_ready;
-
-	/* nls pointers for codepage conversions */
-	struct nls_table *remote_nls;
-	struct nls_table *local_nls;
-
-	struct smb_ops *ops;
-
-	struct super_block *super_block;
-
-	struct backing_dev_info bdi;
-};
-
-static inline int
-smb_lock_server_interruptible(struct smb_sb_info *server)
-{
-	return down_interruptible(&(server->sem));
-}
-
-static inline void
-smb_lock_server(struct smb_sb_info *server)
-{
-	down(&(server->sem));
-}
-
-static inline void
-smb_unlock_server(struct smb_sb_info *server)
-{
-	up(&(server->sem));
-}
-
-#endif
diff --git a/include/linux/smb_mount.h b/include/linux/smb_mount.h
deleted file mode 100644
index d10f00cb5703..000000000000
--- a/include/linux/smb_mount.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- *  smb_mount.h
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- */
-
-#ifndef _LINUX_SMB_MOUNT_H
-#define _LINUX_SMB_MOUNT_H
-
-#include <linux/types.h>
-
-#define SMB_MOUNT_VERSION	6
-
-struct smb_mount_data {
-	int version;
-	__kernel_uid_t mounted_uid; /* Who may umount() this filesystem? */
-	__kernel_uid_t uid;
-	__kernel_gid_t gid;
-	__kernel_mode_t file_mode;
-	__kernel_mode_t dir_mode;
-};
-
-
-#ifdef __KERNEL__
-
-/* "vers" in big-endian */
-#define SMB_MOUNT_ASCII 0x76657273
-
-#define SMB_MOUNT_OLDVERSION	6
-#undef SMB_MOUNT_VERSION
-#define SMB_MOUNT_VERSION	7
-
-/* flags */
-#define SMB_MOUNT_WIN95		0x0001	/* Win 95 server */
-#define SMB_MOUNT_OLDATTR	0x0002	/* Use core getattr (Win 95 speedup) */
-#define SMB_MOUNT_DIRATTR	0x0004	/* Use find_first for getattr */
-#define SMB_MOUNT_CASE		0x0008	/* Be case sensitive */
-#define SMB_MOUNT_UNICODE	0x0010	/* Server talks unicode */
-#define SMB_MOUNT_UID		0x0020  /* Use user specified uid */
-#define SMB_MOUNT_GID		0x0040  /* Use user specified gid */
-#define SMB_MOUNT_FMODE		0x0080  /* Use user specified file mode */
-#define SMB_MOUNT_DMODE		0x0100  /* Use user specified dir mode */
-
-struct smb_mount_data_kernel {
-	int version;
-
-	uid_t mounted_uid;	/* Who may umount() this filesystem? */
-	uid_t uid;
-	gid_t gid;
-	mode_t file_mode;
-	mode_t dir_mode;
-
-	u32 flags;
-
-        /* maximum age in jiffies (inode, dentry and dircache) */
-	int ttl;
-
-	struct smb_nls_codepage codepage;
-};
-
-#endif
-
-#endif
diff --git a/include/linux/smbno.h b/include/linux/smbno.h
deleted file mode 100644
index f99e02d9ffe2..000000000000
--- a/include/linux/smbno.h
+++ /dev/null
@@ -1,363 +0,0 @@
-#ifndef _SMBNO_H_
-#define _SMBNO_H_
-
-/* these define the attribute byte as seen by DOS */
-#define aRONLY	(1L<<0)
-#define aHIDDEN	(1L<<1)
-#define aSYSTEM	(1L<<2)
-#define aVOLID	(1L<<3)
-#define aDIR	(1L<<4)
-#define aARCH	(1L<<5)
-
-/* error classes */
-#define SUCCESS 0  /* The request was successful. */
-#define ERRDOS 0x01 /*  Error is from the core DOS operating system set. */
-#define ERRSRV 0x02 /* Error is generated by the server network file manager.*/
-#define ERRHRD 0x03  /* Error is an hardware error. */
-#define ERRCMD 0xFF  /* Command was not in the "SMB" format. */
-
-/* SMB X/Open error codes for the ERRdos error class */
-
-#define ERRbadfunc 1            /* Invalid function (or system call) */
-#define ERRbadfile 2            /* File not found (pathname error) */
-#define ERRbadpath 3            /* Directory not found */
-#define ERRnofids 4             /* Too many open files */
-#define ERRnoaccess 5           /* Access denied */
-#define ERRbadfid 6             /* Invalid fid */
-#define ERRbadmcb 7             /* Memory control blocks destroyed */
-#define ERRnomem 8              /* Out of memory */
-#define ERRbadmem 9             /* Invalid memory block address */
-#define ERRbadenv 10            /* Invalid environment */
-#define ERRbadformat 11         /* Invalid format */
-#define ERRbadaccess 12         /* Invalid open mode */
-#define ERRbaddata 13           /* Invalid data (only from ioctl call) */
-#define ERRres 14               /* reserved */
-#define ERRbaddrive 15          /* Invalid drive */
-#define ERRremcd 16             /* Attempt to delete current directory */
-#define ERRdiffdevice 17        /* rename/move across different filesystems */
-#define ERRnofiles 18           /* no more files found in file search */
-#define ERRbadshare 32          /* Share mode on file conflict with open mode */
-#define ERRlock 33              /* Lock request conflicts with existing lock */
-#define ERRfilexists 80         /* File in operation already exists */
-#define ERRbadpipe 230          /* Named pipe invalid */
-#define ERRpipebusy 231         /* All instances of pipe are busy */
-#define ERRpipeclosing 232      /* named pipe close in progress */
-#define ERRnotconnected 233     /* No process on other end of named pipe */
-#define ERRmoredata 234         /* More data to be returned */
-
-#define ERROR_INVALID_PARAMETER	 87
-#define ERROR_DISK_FULL		112
-#define ERROR_INVALID_NAME	123
-#define ERROR_DIR_NOT_EMPTY	145
-#define ERROR_NOT_LOCKED	158
-#define ERROR_ALREADY_EXISTS	183  /* see also 80 ? */
-#define ERROR_EAS_DIDNT_FIT	275 /* Extended attributes didn't fit */
-#define ERROR_EAS_NOT_SUPPORTED	282 /* Extended attributes not supported */
-
-/* Error codes for the ERRSRV class */
-
-#define ERRerror 1              /* Non specific error code */
-#define ERRbadpw 2              /* Bad password */
-#define ERRbadtype 3            /* reserved */
-#define ERRaccess 4          /* No permissions to do the requested operation */
-#define ERRinvnid 5             /* tid invalid */
-#define ERRinvnetname 6         /* Invalid servername */
-#define ERRinvdevice 7          /* Invalid device */
-#define ERRqfull 49             /* Print queue full */
-#define ERRqtoobig 50           /* Queued item too big */
-#define ERRinvpfid 52           /* Invalid print file in smb_fid */
-#define ERRsmbcmd 64            /* Unrecognised command */
-#define ERRsrverror 65          /* smb server internal error */
-#define ERRfilespecs 67         /* fid and pathname invalid combination */
-#define ERRbadlink 68           /* reserved */
-#define ERRbadpermits 69        /* Access specified for a file is not valid */
-#define ERRbadpid 70            /* reserved */
-#define ERRsetattrmode 71       /* attribute mode invalid */
-#define ERRpaused 81            /* Message server paused */
-#define ERRmsgoff 82            /* Not receiving messages */
-#define ERRnoroom 83            /* No room for message */
-#define ERRrmuns 87             /* too many remote usernames */
-#define ERRtimeout 88           /* operation timed out */
-#define ERRnoresource  89   /* No resources currently available for request. */
-#define ERRtoomanyuids 90       /* too many userids */
-#define ERRbaduid 91            /* bad userid */
-#define ERRuseMPX 250    /* temporarily unable to use raw mode, use MPX mode */
-#define ERRuseSTD 251    /* temporarily unable to use raw mode, use std.mode */
-#define ERRcontMPX 252          /* resume MPX mode */
-#define ERRbadPW                /* reserved */
-#define ERRnosupport 0xFFFF
-
-/* Error codes for the ERRHRD class */
-
-#define ERRnowrite 19           /* read only media */
-#define ERRbadunit 20           /* Unknown device */
-#define ERRnotready 21          /* Drive not ready */
-#define ERRbadcmd 22            /* Unknown command */
-#define ERRdata 23              /* Data (CRC) error */
-#define ERRbadreq 24            /* Bad request structure length */
-#define ERRseek 25
-#define ERRbadmedia 26
-#define ERRbadsector 27
-#define ERRnopaper 28
-#define ERRwrite 29             /* write fault */
-#define ERRread 30              /* read fault */
-#define ERRgeneral 31           /* General hardware failure */
-#define ERRwrongdisk 34
-#define ERRFCBunavail 35
-#define ERRsharebufexc 36       /* share buffer exceeded */
-#define ERRdiskfull 39
-
-/*
- * Access modes when opening a file
- */
-#define SMB_ACCMASK	0x0003
-#define SMB_O_RDONLY	0x0000
-#define SMB_O_WRONLY	0x0001
-#define SMB_O_RDWR	0x0002
-
-/* offsets into message for common items */
-#define smb_com 8
-#define smb_rcls 9
-#define smb_reh 10
-#define smb_err 11
-#define smb_flg 13
-#define smb_flg2 14
-#define smb_reb 13
-#define smb_tid 28
-#define smb_pid 30
-#define smb_uid 32
-#define smb_mid 34
-#define smb_wct 36
-#define smb_vwv 37
-#define smb_vwv0 37
-#define smb_vwv1 39
-#define smb_vwv2 41
-#define smb_vwv3 43
-#define smb_vwv4 45
-#define smb_vwv5 47
-#define smb_vwv6 49
-#define smb_vwv7 51
-#define smb_vwv8 53
-#define smb_vwv9 55
-#define smb_vwv10 57
-#define smb_vwv11 59
-#define smb_vwv12 61
-#define smb_vwv13 63
-#define smb_vwv14 65
-
-/* these are the trans2 sub fields for primary requests */
-#define smb_tpscnt smb_vwv0
-#define smb_tdscnt smb_vwv1
-#define smb_mprcnt smb_vwv2
-#define smb_mdrcnt smb_vwv3
-#define smb_msrcnt smb_vwv4
-#define smb_flags smb_vwv5
-#define smb_timeout smb_vwv6
-#define smb_pscnt smb_vwv9
-#define smb_psoff smb_vwv10
-#define smb_dscnt smb_vwv11
-#define smb_dsoff smb_vwv12
-#define smb_suwcnt smb_vwv13
-#define smb_setup smb_vwv14
-#define smb_setup0 smb_setup
-#define smb_setup1 (smb_setup+2)
-#define smb_setup2 (smb_setup+4)
-
-/* these are for the secondary requests */
-#define smb_spscnt smb_vwv2
-#define smb_spsoff smb_vwv3
-#define smb_spsdisp smb_vwv4
-#define smb_sdscnt smb_vwv5
-#define smb_sdsoff smb_vwv6
-#define smb_sdsdisp smb_vwv7
-#define smb_sfid smb_vwv8
-
-/* and these for responses */
-#define smb_tprcnt smb_vwv0
-#define smb_tdrcnt smb_vwv1
-#define smb_prcnt smb_vwv3
-#define smb_proff smb_vwv4
-#define smb_prdisp smb_vwv5
-#define smb_drcnt smb_vwv6
-#define smb_droff smb_vwv7
-#define smb_drdisp smb_vwv8
-
-/* the complete */
-#define SMBmkdir      0x00   /* create directory */
-#define SMBrmdir      0x01   /* delete directory */
-#define SMBopen       0x02   /* open file */
-#define SMBcreate     0x03   /* create file */
-#define SMBclose      0x04   /* close file */
-#define SMBflush      0x05   /* flush file */
-#define SMBunlink     0x06   /* delete file */
-#define SMBmv         0x07   /* rename file */
-#define SMBgetatr     0x08   /* get file attributes */
-#define SMBsetatr     0x09   /* set file attributes */
-#define SMBread       0x0A   /* read from file */
-#define SMBwrite      0x0B   /* write to file */
-#define SMBlock       0x0C   /* lock byte range */
-#define SMBunlock     0x0D   /* unlock byte range */
-#define SMBctemp      0x0E   /* create temporary file */
-#define SMBmknew      0x0F   /* make new file */
-#define SMBchkpth     0x10   /* check directory path */
-#define SMBexit       0x11   /* process exit */
-#define SMBlseek      0x12   /* seek */
-#define SMBtcon       0x70   /* tree connect */
-#define SMBtconX      0x75   /* tree connect and X*/
-#define SMBtdis       0x71   /* tree disconnect */
-#define SMBnegprot    0x72   /* negotiate protocol */
-#define SMBdskattr    0x80   /* get disk attributes */
-#define SMBsearch     0x81   /* search directory */
-#define SMBsplopen    0xC0   /* open print spool file */
-#define SMBsplwr      0xC1   /* write to print spool file */
-#define SMBsplclose   0xC2   /* close print spool file */
-#define SMBsplretq    0xC3   /* return print queue */
-#define SMBsends      0xD0   /* send single block message */
-#define SMBsendb      0xD1   /* send broadcast message */
-#define SMBfwdname    0xD2   /* forward user name */
-#define SMBcancelf    0xD3   /* cancel forward */
-#define SMBgetmac     0xD4   /* get machine name */
-#define SMBsendstrt   0xD5   /* send start of multi-block message */
-#define SMBsendend    0xD6   /* send end of multi-block message */
-#define SMBsendtxt    0xD7   /* send text of multi-block message */
-
-/* Core+ protocol */
-#define SMBlockread	  0x13   /* Lock a range and read */
-#define SMBwriteunlock 0x14 /* Unlock a range then write */
-#define SMBreadbraw   0x1a  /* read a block of data with no smb header */
-#define SMBwritebraw  0x1d  /* write a block of data with no smb header */
-#define SMBwritec     0x20  /* secondary write request */
-#define SMBwriteclose 0x2c  /* write a file then close it */
-
-/* dos extended protocol */
-#define SMBreadBraw      0x1A   /* read block raw */
-#define SMBreadBmpx      0x1B   /* read block multiplexed */
-#define SMBreadBs        0x1C   /* read block (secondary response) */
-#define SMBwriteBraw     0x1D   /* write block raw */
-#define SMBwriteBmpx     0x1E   /* write block multiplexed */
-#define SMBwriteBs       0x1F   /* write block (secondary request) */
-#define SMBwriteC        0x20   /* write complete response */
-#define SMBsetattrE      0x22   /* set file attributes expanded */
-#define SMBgetattrE      0x23   /* get file attributes expanded */
-#define SMBlockingX      0x24   /* lock/unlock byte ranges and X */
-#define SMBtrans         0x25   /* transaction - name, bytes in/out */
-#define SMBtranss        0x26   /* transaction (secondary request/response) */
-#define SMBioctl         0x27   /* IOCTL */
-#define SMBioctls        0x28   /* IOCTL  (secondary request/response) */
-#define SMBcopy          0x29   /* copy */
-#define SMBmove          0x2A   /* move */
-#define SMBecho          0x2B   /* echo */
-#define SMBopenX         0x2D   /* open and X */
-#define SMBreadX         0x2E   /* read and X */
-#define SMBwriteX        0x2F   /* write and X */
-#define SMBsesssetupX    0x73   /* Session Set Up & X (including User Logon) */
-#define SMBtconX         0x75   /* tree connect and X */
-#define SMBffirst        0x82   /* find first */
-#define SMBfunique       0x83   /* find unique */
-#define SMBfclose        0x84   /* find close */
-#define SMBinvalid       0xFE   /* invalid command */
-
-
-/* Extended 2.0 protocol */
-#define SMBtrans2        0x32   /* TRANS2 protocol set */
-#define SMBtranss2       0x33   /* TRANS2 protocol set, secondary command */
-#define SMBfindclose     0x34   /* Terminate a TRANSACT2_FINDFIRST */
-#define SMBfindnclose    0x35   /* Terminate a TRANSACT2_FINDNOTIFYFIRST */
-#define SMBulogoffX      0x74   /* user logoff */
-
-/* these are the TRANS2 sub commands */
-#define TRANSACT2_OPEN          0
-#define TRANSACT2_FINDFIRST     1
-#define TRANSACT2_FINDNEXT      2
-#define TRANSACT2_QFSINFO       3
-#define TRANSACT2_SETFSINFO     4
-#define TRANSACT2_QPATHINFO     5
-#define TRANSACT2_SETPATHINFO   6
-#define TRANSACT2_QFILEINFO     7
-#define TRANSACT2_SETFILEINFO   8
-#define TRANSACT2_FSCTL         9
-#define TRANSACT2_IOCTL           10
-#define TRANSACT2_FINDNOTIFYFIRST 11
-#define TRANSACT2_FINDNOTIFYNEXT  12
-#define TRANSACT2_MKDIR           13
-
-/* Information Levels -  Shared? */
-#define SMB_INFO_STANDARD		1
-#define SMB_INFO_QUERY_EA_SIZE		2
-#define SMB_INFO_QUERY_EAS_FROM_LIST	3
-#define SMB_INFO_QUERY_ALL_EAS		4
-#define SMB_INFO_IS_NAME_VALID		6
-
-/* Information Levels -  TRANSACT2_FINDFIRST */
-#define SMB_FIND_FILE_DIRECTORY_INFO		0x101
-#define SMB_FIND_FILE_FULL_DIRECTORY_INFO	0x102
-#define SMB_FIND_FILE_NAMES_INFO		0x103
-#define SMB_FIND_FILE_BOTH_DIRECTORY_INFO	0x104
-
-/* Information Levels -  TRANSACT2_QPATHINFO */
-#define SMB_QUERY_FILE_BASIC_INFO	0x101
-#define SMB_QUERY_FILE_STANDARD_INFO	0x102
-#define SMB_QUERY_FILE_EA_INFO		0x103
-#define SMB_QUERY_FILE_NAME_INFO	0x104
-#define SMB_QUERY_FILE_ALL_INFO		0x107
-#define SMB_QUERY_FILE_ALT_NAME_INFO	0x108
-#define SMB_QUERY_FILE_STREAM_INFO	0x109
-#define SMB_QUERY_FILE_COMPRESSION_INFO	0x10b
-
-/* Information Levels - TRANSACT2_SETFILEINFO */
-#define SMB_SET_FILE_BASIC_INFO		0x101
-#define SMB_SET_FILE_DISPOSITION_INFO	0x102
-#define SMB_SET_FILE_ALLOCATION_INFO	0x103
-#define SMB_SET_FILE_END_OF_FILE_INFO	0x104
-
-/* smb_flg field flags */
-#define SMB_FLAGS_SUPPORT_LOCKREAD	0x01
-#define SMB_FLAGS_CLIENT_BUF_AVAIL	0x02
-#define SMB_FLAGS_RESERVED		0x04
-#define SMB_FLAGS_CASELESS_PATHNAMES	0x08
-#define SMB_FLAGS_CANONICAL_PATHNAMES	0x10
-#define SMB_FLAGS_REQUEST_OPLOCK	0x20
-#define SMB_FLAGS_REQUEST_BATCH_OPLOCK	0x40
-#define SMB_FLAGS_REPLY			0x80
-
-/* smb_flg2 field flags (samba-2.2.0/source/include/smb.h) */
-#define SMB_FLAGS2_LONG_PATH_COMPONENTS		0x0001
-#define SMB_FLAGS2_EXTENDED_ATTRIBUTES		0x0002
-#define SMB_FLAGS2_DFS_PATHNAMES		0x1000
-#define SMB_FLAGS2_READ_PERMIT_NO_EXECUTE	0x2000
-#define SMB_FLAGS2_32_BIT_ERROR_CODES		0x4000 
-#define SMB_FLAGS2_UNICODE_STRINGS		0x8000
-
-
-/*
- * UNIX stuff  (from samba trans2.h)
- */
-#define MIN_UNIX_INFO_LEVEL		0x200
-#define MAX_UNIX_INFO_LEVEL		0x2FF
-#define SMB_FIND_FILE_UNIX		0x202
-#define SMB_QUERY_FILE_UNIX_BASIC	0x200
-#define SMB_QUERY_FILE_UNIX_LINK	0x201
-#define SMB_QUERY_FILE_UNIX_HLINK	0x202
-#define SMB_SET_FILE_UNIX_BASIC		0x200
-#define SMB_SET_FILE_UNIX_LINK		0x201
-#define SMB_SET_FILE_UNIX_HLINK		0x203
-#define SMB_QUERY_CIFS_UNIX_INFO	0x200
-
-/* values which means "don't change it" */
-#define SMB_MODE_NO_CHANGE		0xFFFFFFFF
-#define SMB_UID_NO_CHANGE		0xFFFFFFFF
-#define SMB_GID_NO_CHANGE		0xFFFFFFFF
-#define SMB_TIME_NO_CHANGE		0xFFFFFFFFFFFFFFFFULL
-#define SMB_SIZE_NO_CHANGE		0xFFFFFFFFFFFFFFFFULL
-
-/* UNIX filetype mappings. */
-#define UNIX_TYPE_FILE		0
-#define UNIX_TYPE_DIR		1
-#define UNIX_TYPE_SYMLINK	2
-#define UNIX_TYPE_CHARDEV	3
-#define UNIX_TYPE_BLKDEV	4
-#define UNIX_TYPE_FIFO		5
-#define UNIX_TYPE_SOCKET	6
-#define UNIX_TYPE_UNKNOWN	0xFFFFFFFF
-
-#endif /* _SMBNO_H_ */
-- 
cgit v1.2.3


From 17e5a8082894a4b66cb69e7ec16074f0f01281e1 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Wed, 29 Sep 2010 17:15:30 +0200
Subject: nl80211: allow drivers to indicate whether the survey data channel is
 in use

Some user space applications only want to display survey data for
the operating channel, however there is no API to get that yet.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 2 ++
 net/wireless/nl80211.c  | 2 ++
 3 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f0518b0278a9..edd21ae6acf7 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1400,6 +1400,7 @@ enum nl80211_reg_rule_flags {
  * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
+ * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -1408,6 +1409,7 @@ enum nl80211_survey_info {
 	__NL80211_SURVEY_INFO_INVALID,
 	NL80211_SURVEY_INFO_FREQUENCY,
 	NL80211_SURVEY_INFO_NOISE,
+	NL80211_SURVEY_INFO_IN_USE,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a0613ff62c97..ecc0403b918a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -293,12 +293,14 @@ struct key_params {
  * enum survey_info_flags - survey information flags
  *
  * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
+ * @SURVEY_INFO_IN_USE: channel is currently being used
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
  */
 enum survey_info_flags {
 	SURVEY_INFO_NOISE_DBM = 1<<0,
+	SURVEY_INFO_IN_USE = 1<<1,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c84825803ce..0087c4323c53 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3489,6 +3489,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
 	if (survey->filled & SURVEY_INFO_NOISE_DBM)
 		NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE,
 			    survey->noise);
+	if (survey->filled & SURVEY_INFO_IN_USE)
+		NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE);
 
 	nla_nest_end(msg, infoattr);
 
-- 
cgit v1.2.3


From ea229e682633a18c1fa2c408400a6923cfc47910 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Sep 2010 21:21:25 +0200
Subject: cfg80211: remove spurious __KERNEL__ ifdef

The net/cfg80211.h header file isn't exported to
userspace, so there's no need for any kind of
__KERNEL__ protection in it. If it was exported,
everything else in it would need protection as
well, not just the logging stuff ...

Cc:Joe Perches <joe@perches.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ecc0403b918a..5f4d8acf7abb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2553,8 +2553,6 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
 			      gfp_t gfp);
 
-#ifdef __KERNEL__
-
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
@@ -2601,6 +2599,4 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 #define wiphy_WARN(wiphy, format, args...)			\
 	WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args);
 
-#endif
-
 #endif /* __NET_CFG80211_H */
-- 
cgit v1.2.3


From e8347ebad2f1b15bddb6ed3ed5f767531eb52dc3 Mon Sep 17 00:00:00 2001
From: Bill Jordan <bjordan@ig88.(none)>
Date: Fri, 1 Oct 2010 13:54:28 -0400
Subject: cfg80211: patches to allow setting the WDS peer

Added a nl interface to set the peer bssid of a WDS interface.

Signed-off-by: Bill Jordan <bjordan@rajant.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  3 +++
 net/wireless/nl80211.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index edd21ae6acf7..73d9390d4ddb 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -387,6 +387,8 @@
  *	of any other interfaces, and other interfaces will again take
  *	precedence when they are used.
  *
+ * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -489,6 +491,7 @@ enum nl80211_commands {
 	NL80211_CMD_NOTIFY_CQM,
 
 	NL80211_CMD_SET_CHANNEL,
+	NL80211_CMD_SET_WDS_PEER,
 
 	/* add new commands above here */
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 21061ccee557..fd92b6b7ff04 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -603,6 +603,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
 	}
 	CMD(set_channel, SET_CHANNEL);
+	CMD(set_wds_peer, SET_WDS_PEER);
 
 #undef CMD
 
@@ -833,6 +834,53 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
 	return result;
 }
 
+static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	u8 *bssid;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (netif_running(dev)) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!rdev->ops->set_wds_peer) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_WDS) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	err = rdev->ops->set_wds_peer(wdev->wiphy, dev, bssid);
+
+out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+
+	return err;
+}
+
+
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
@@ -5473,6 +5521,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_WDS_PEER,
+		.doit = nl80211_set_wds_peer,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.3


From 78be49ec2a0df34de9441930fdced20311fd709f Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Sat, 2 Oct 2010 11:31:55 +0200
Subject: mac80211: distinct between max rates and the number of rates the hw
 can report

Some drivers cannot handle multiple retry rates specified by the rc
algorithm but instead use their own retry table (for example rt2800).
However, if such a device registers itself with a max_rates value of 1
the rc algorithm cannot make use of the extended information the device
can provide about retried rates. On the other hand, if a device
registers itself with a max_rates value > 1 the rc algorithm assumes
that the device can handle multi rate retries.

Fix this issue by introducing another hw parameter max_report_rates that
can be set to a different value then max_rates to indicate if a device
is capable of reporting more rates then specified in max_rates.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 6 +++++-
 net/mac80211/main.c    | 4 ++++
 net/mac80211/status.c  | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fe8b9dae4dee..47316a653ae1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1109,7 +1109,10 @@ enum ieee80211_hw_flags {
  * @sta_data_size: size (in bytes) of the drv_priv data area
  *	within &struct ieee80211_sta.
  *
- * @max_rates: maximum number of alternate rate retry stages
+ * @max_rates: maximum number of alternate rate retry stages the hw
+ *	can handle.
+ * @max_report_rates: maximum number of alternate rate retry stages
+ *	the hw can report back.
  * @max_rate_tries: maximum number of tries for each stage
  *
  * @napi_weight: weight used for NAPI polling.  You must specify an
@@ -1131,6 +1134,7 @@ struct ieee80211_hw {
 	u16 max_listen_interval;
 	s8 max_signal;
 	u8 max_rates;
+	u8 max_report_rates;
 	u8 max_rate_tries;
 };
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e127fbb8424e..9c2f3f934c74 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -537,6 +537,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	/* set up some defaults */
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
+	local->hw.max_report_rates = 0;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->user_power_level = -1;
@@ -612,6 +613,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		WLAN_CIPHER_SUITE_AES_CMAC
 	};
 
+	if (hw->max_report_rates == 0)
+		hw->max_report_rates = hw->max_rates;
+
 	/*
 	 * generic code guarantees at least one band,
 	 * set this very early because much code assumes
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index dd85006c4fe8..95763e036975 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -176,7 +176,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		/* the HW cannot have attempted that rate */
-		if (i >= hw->max_rates) {
+		if (i >= hw->max_report_rates) {
 			info->status.rates[i].idx = -1;
 			info->status.rates[i].count = 0;
 		} else if (info->status.rates[i].idx >= 0) {
-- 
cgit v1.2.3


From ff4c92d85c6f2777d2067f8552e7fefb4d1754ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 4 Oct 2010 21:14:03 +0200
Subject: genetlink: introduce pre_doit/post_doit hooks

Each family may have some amount of boilerplate
locking code that applies to most, or even all,
commands.

This allows a family to handle such things in
a more generic way, by allowing it to
 a) include private flags in each operation
 b) specify a pre_doit hook that is called,
    before an operation's doit() callback and
    may return an error directly,
 c) specify a post_doit hook that can undo
    locking or similar things done by pre_doit,
    and finally
 d) include two private pointers in each info
    struct passed between all these operations
    including doit(). (It's two because I'll
    need two in nl80211 -- can be extended.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/genetlink.h | 18 ++++++++++++++++++
 net/netlink/genetlink.c | 14 +++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index f7dcd2c70412..8a64b811a39a 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -20,6 +20,9 @@ struct genl_multicast_group {
 	u32			id;
 };
 
+struct genl_ops;
+struct genl_info;
+
 /**
  * struct genl_family - generic netlink family
  * @id: protocol family idenfitier
@@ -29,6 +32,10 @@ struct genl_multicast_group {
  * @maxattr: maximum number of attributes supported
  * @netnsok: set to true if the family can handle network
  *	namespaces and should be presented in all of them
+ * @pre_doit: called before an operation's doit callback, it may
+ *	do additional, common, filtering and return an error
+ * @post_doit: called after an operation's doit callback, it may
+ *	undo operations done by pre_doit, for example release locks
  * @attrbuf: buffer to store parsed attributes
  * @ops_list: list of all assigned operations
  * @family_list: family list
@@ -41,6 +48,12 @@ struct genl_family {
 	unsigned int		version;
 	unsigned int		maxattr;
 	bool			netnsok;
+	int			(*pre_doit)(struct genl_ops *ops,
+					    struct sk_buff *skb,
+					    struct genl_info *info);
+	void			(*post_doit)(struct genl_ops *ops,
+					     struct sk_buff *skb,
+					     struct genl_info *info);
 	struct nlattr **	attrbuf;	/* private */
 	struct list_head	ops_list;	/* private */
 	struct list_head	family_list;	/* private */
@@ -55,6 +68,8 @@ struct genl_family {
  * @genlhdr: generic netlink message header
  * @userhdr: user specific header
  * @attrs: netlink attributes
+ * @_net: network namespace
+ * @user_ptr: user pointers
  */
 struct genl_info {
 	u32			snd_seq;
@@ -66,6 +81,7 @@ struct genl_info {
 #ifdef CONFIG_NET_NS
 	struct net *		_net;
 #endif
+	void *			user_ptr[2];
 };
 
 static inline struct net *genl_info_net(struct genl_info *info)
@@ -81,6 +97,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 /**
  * struct genl_ops - generic netlink operations
  * @cmd: command identifier
+ * @internal_flags: flags used by the family
  * @flags: flags
  * @policy: attribute validation policy
  * @doit: standard command callback
@@ -90,6 +107,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
  */
 struct genl_ops {
 	u8			cmd;
+	u8			internal_flags;
 	unsigned int		flags;
 	const struct nla_policy	*policy;
 	int		       (*doit)(struct sk_buff *skb,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 26ed3e8587c2..1781d99145e2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -547,8 +547,20 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
 	info.attrs = family->attrbuf;
 	genl_info_net_set(&info, net);
+	memset(&info.user_ptr, 0, sizeof(info.user_ptr));
 
-	return ops->doit(skb, &info);
+	if (family->pre_doit) {
+		err = family->pre_doit(ops, skb, &info);
+		if (err)
+			return err;
+	}
+
+	err = ops->doit(skb, &info);
+
+	if (family->post_doit)
+		family->post_doit(ops, skb, &info);
+
+	return err;
 }
 
 static void genl_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From 691895e7e2204be9a717809fb78d6ff7c10b470a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 5 Oct 2010 16:19:42 +0200
Subject: nl80211: fix remain-on-channel documentation

The documentation for NL80211_CMD_REMAIN_ON_CHANNEL
isn't accurate, an interface index is required by
the command. Update it accordingly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 73d9390d4ddb..c4efdfa24ed8 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -315,8 +315,8 @@
  *	channel for the specified amount of time. This can be used to do
  *	off-channel operations like transmit a Public Action frame and wait for
  *	a response while being associated to an AP on another channel.
- *	%NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which
- *	radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the
+ *	%NL80211_ATTR_IFINDEX is used to specify which interface (and thus
+ *	radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the
  *	frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be
  *	optionally used to specify additional channel parameters.
  *	%NL80211_ATTR_DURATION is used to specify the duration in milliseconds
-- 
cgit v1.2.3


From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 5 Oct 2010 11:29:27 -0700
Subject: modules: Fix module_bug_list list corruption race

With all the recent module loading cleanups, we've minimized the code
that sits under module_mutex, fixing various deadlocks and making it
possible to do most of the module loading in parallel.

However, that whole conversion totally missed the rather obscure code
that adds a new module to the list for BUG() handling.  That code was
doubly obscure because (a) the code itself lives in lib/bugs.c (for
dubious reasons) and (b) it gets called from the architecture-specific
"module_finalize()" rather than from generic code.

Calling it from arch-specific code makes no sense what-so-ever to begin
with, and is now actively wrong since that code isn't protected by the
module loading lock any more.

So this commit moves the "module_bug_{finalize,cleanup}()" calls away
from the arch-specific code, and into the generic code - and in the
process protects it with the module_mutex so that the list operations
are now safe.

Future fixups:
 - move the module list handling code into kernel/module.c where it
   belongs.
 - get rid of 'module_bug_list' and just use the regular list of modules
   (called 'modules' - imagine that) that we already create and maintain
   for other reasons.

Reported-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Adrian Bunk <bunk@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/kernel/module.c   | 3 +--
 arch/h8300/kernel/module.c   | 3 +--
 arch/mn10300/kernel/module.c | 3 +--
 arch/parisc/kernel/module.c  | 3 +--
 arch/powerpc/kernel/module.c | 5 -----
 arch/s390/kernel/module.c    | 3 +--
 arch/sh/kernel/module.c      | 2 --
 arch/x86/kernel/module.c     | 3 +--
 include/linux/module.h       | 5 ++---
 kernel/module.c              | 4 ++++
 lib/bug.c                    | 6 ++----
 11 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
index 98f94d041d9c..a727f54d64d6 100644
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	vfree(module->arch.syminfo);
 	module->arch.syminfo = NULL;
 
-	return module_bug_finalize(hdr, sechdrs, module);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *module)
 {
-	module_bug_cleanup(module);
 }
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c
index 0865e291c20d..db4953dc4e1b 100644
--- a/arch/h8300/kernel/module.c
+++ b/arch/h8300/kernel/module.c
@@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c
index 6aea7fd76993..196a111e2e29 100644
--- a/arch/mn10300/kernel/module.c
+++ b/arch/mn10300/kernel/module.c
@@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 /*
@@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr,
  */
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 159a2b81e90c..6e81bb596e5b 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 	nsyms = newptr - (Elf_Sym *)symhdr->sh_addr;
 	DEBUGP("NEW num_symtab %lu\n", nsyms);
 	symhdr->sh_size = nsyms * sizeof(Elf_Sym);
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	deregister_unwind_table(mod);
-	module_bug_cleanup(mod);
 }
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 477c663e0140..4ef93ae2235f 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const Elf_Shdr *sect;
 	int err;
 
-	err = module_bug_finalize(hdr, sechdrs, me);
-	if (err)
-		return err;
-
 	/* Apply feature fixups */
 	sect = find_section(hdr, sechdrs, "__ftr_fixup");
 	if (sect != NULL)
@@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr,
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 22cfd634c355..f7167ee4604c 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr,
 {
 	vfree(me->arch.syminfo);
 	me->arch.syminfo = NULL;
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index 43adddfe4c04..ae0be697a89e 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 	int ret = 0;
 
 	ret |= module_dwarf_finalize(hdr, sechdrs, me);
-	ret |= module_bug_finalize(hdr, sechdrs, me);
 
 	return ret;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 	module_dwarf_cleanup(mod);
 }
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e0bc186d7501..1c355c550960 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr,
 		apply_paravirt(pseg, pseg + para->sh_size);
 	}
 
-	return module_bug_finalize(hdr, sechdrs, me);
+	return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	alternatives_smp_module_del(mod);
-	module_bug_cleanup(mod);
 }
diff --git a/include/linux/module.h b/include/linux/module.h
index 8a6b9fdc7ffa..aace066bad8f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -686,17 +686,16 @@ extern int module_sysfs_initialized;
 
 
 #ifdef CONFIG_GENERIC_BUG
-int  module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
 			 struct module *);
 void module_bug_cleanup(struct module *);
 
 #else	/* !CONFIG_GENERIC_BUG */
 
-static inline int  module_bug_finalize(const Elf_Ehdr *hdr,
+static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 					const Elf_Shdr *sechdrs,
 					struct module *mod)
 {
-	return 0;
 }
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
diff --git a/kernel/module.c b/kernel/module.c
index d0b5f8db11b4..ccd641991842 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod)
 {
 	struct module *mod = _mod;
 	list_del(&mod->list);
+	module_bug_cleanup(mod);
 	return 0;
 }
 
@@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod,
 	if (err < 0)
 		goto ddebug;
 
+	module_bug_finalize(info.hdr, info.sechdrs, mod);
 	list_add_rcu(&mod->list, &modules);
 	mutex_unlock(&module_mutex);
 
@@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod,
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	module_bug_cleanup(mod);
+
  ddebug:
 	if (!mod->taints)
 		dynamic_debug_remove(info.debug);
diff --git a/lib/bug.c b/lib/bug.c
index 7cdfad88128f..19552096d16b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 	return NULL;
 }
 
-int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-			struct module *mod)
+void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+			 struct module *mod)
 {
 	char *secstrings;
 	unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	 * could potentially lead to deadlock and thus be counter-productive.
 	 */
 	list_add(&mod->bug_list, &module_bug_list);
-
-	return 0;
 }
 
 void module_bug_cleanup(struct module *mod)
-- 
cgit v1.2.3


From 231d0aefd88e94129cb8fb84794f9bb788c6366e Mon Sep 17 00:00:00 2001
From: Evgeny Kuznetsov <ext-eugeny.kuznetsov@nokia.com>
Date: Tue, 5 Oct 2010 12:47:57 +0400
Subject: wait: using uninitialized member of wait queue

The "flags" member of "struct wait_queue_t" is used in several places in
the kernel code without beeing initialized by init_wait().  "flags" is
used in bitwise operations.

If "flags" not initialized then unexpected behaviour may take place.
Incorrect flags might used later in code.

Added initialization of "wait_queue_t.flags" with zero value into
"init_wait".

Signed-off-by: Evgeny Kuznetsov <EXT-Eugeny.Kuznetsov@nokia.com>
[ The bit we care about does end up being initialized by both
   prepare_to_wait() and add_to_wait_queue(), so this doesn't seem to
   cause actual bugs, but is definitely the right thing to do -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0836ccc57121..3efc9f3f43a0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 		(wait)->private = current;				\
 		(wait)->func = autoremove_wake_function;		\
 		INIT_LIST_HEAD(&(wait)->task_list);			\
+		(wait)->flags = 0;					\
 	} while (0)
 
 /**
-- 
cgit v1.2.3


From 40fb29a777d23b1161271ec88fd21739835e4f6a Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Thu, 30 Sep 2010 16:13:28 -0400
Subject: tty.h: new ldisc for TI WiLink ST

Texas Instrument's WiLink7 connectivity devices pack wireless connectivity
technologies like Bluetooth, FM Radio Receiver and Transmitter, GPS and WLAN
into a single die.
The BT, FM and GPS core on the chip are interfaced to application
processors via a single UART.

This line discipline driver allows such different technologies to be used
simultaneous and independent of each other.
Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/ti-st/ti_wilink_st.h | 5 -----
 include/linux/tty.h                  | 1 +
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/ti-st/ti_wilink_st.h b/drivers/staging/ti-st/ti_wilink_st.h
index 537efce96d58..afb0cc644c1f 100644
--- a/drivers/staging/ti-st/ti_wilink_st.h
+++ b/drivers/staging/ti-st/ti_wilink_st.h
@@ -25,11 +25,6 @@
 #ifndef TI_WILINK_ST_H
 #define TI_WILINK_ST_H
 
-/* TODO:
- * Move the following to tty.h upon acceptance
- */
-#define N_TI_WL	22	/* Ldisc for TI's WL BT, FM, GPS combo chips */
-
 #ifdef __KERNEL__
 /**
  * enum kim_gpio_state - Few protocols such as FM have ACTIVE LOW
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 67d64e6efe7a..244bb8de2b92 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -50,6 +50,7 @@
 #define N_V253		19	/* Codec control over voice modem */
 #define N_CAIF		20      /* CAIF protocol for talking to modems */
 #define N_GSM0710	21	/* GSM 0710 Mux */
+#define N_TI_WL	22	/* for TI's WL BT, FM, GPS combo chips */
 
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
-- 
cgit v1.2.3


From e5558679bbb80788dc8c4c30484ac0a68e971ca5 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Thu, 30 Sep 2010 16:13:30 -0400
Subject: staging: ti-st: mv ti_wilink_st header

Move the header to a standard linux device driver location.
This should pave the way for other drivers to be moved into the relevant
directories.

ti_wilink_st.h is a common header file used by the TI's shared transport device
driver for WiLink chipsets. Each individual protocol drivers like bluetooth
driver, FM V4L2 driver and GPS drivers will make use of this header.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/ti-st/bt_drv.c       |   2 +-
 drivers/staging/ti-st/st_core.c      |   2 +-
 drivers/staging/ti-st/st_kim.c       |   2 +-
 drivers/staging/ti-st/st_ll.c        |   2 +-
 drivers/staging/ti-st/ti_wilink_st.h | 402 -----------------------------------
 include/linux/ti_wilink_st.h         | 402 +++++++++++++++++++++++++++++++++++
 6 files changed, 406 insertions(+), 406 deletions(-)
 delete mode 100644 drivers/staging/ti-st/ti_wilink_st.h
 create mode 100644 include/linux/ti_wilink_st.h

(limited to 'include')

diff --git a/drivers/staging/ti-st/bt_drv.c b/drivers/staging/ti-st/bt_drv.c
index 50da66db72c2..75065bf39e5c 100644
--- a/drivers/staging/ti-st/bt_drv.c
+++ b/drivers/staging/ti-st/bt_drv.c
@@ -24,7 +24,7 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#include "ti_wilink_st.h"
+#include <linux/ti_wilink_st.h>
 #include "bt_drv.h"
 
 /* Define this macro to get debug msg */
diff --git a/drivers/staging/ti-st/st_core.c b/drivers/staging/ti-st/st_core.c
index 03397bee37db..5bef61e579c3 100644
--- a/drivers/staging/ti-st/st_core.c
+++ b/drivers/staging/ti-st/st_core.c
@@ -28,7 +28,7 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/hci.h>
-#include "ti_wilink_st.h"
+#include <linux/ti_wilink_st.h>
 
 /* strings to be used for rfkill entries and by
  * ST Core to be used for sysfs debug entry
diff --git a/drivers/staging/ti-st/st_kim.c b/drivers/staging/ti-st/st_kim.c
index 09e2c97562be..372a9960c304 100644
--- a/drivers/staging/ti-st/st_kim.c
+++ b/drivers/staging/ti-st/st_kim.c
@@ -36,7 +36,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/hci.h>
 
-#include "ti_wilink_st.h"
+#include <linux/ti_wilink_st.h>
 
 
 static int kim_probe(struct platform_device *pdev);
diff --git a/drivers/staging/ti-st/st_ll.c b/drivers/staging/ti-st/st_ll.c
index d08b58e78083..ed1e4bc180d3 100644
--- a/drivers/staging/ti-st/st_ll.c
+++ b/drivers/staging/ti-st/st_ll.c
@@ -21,7 +21,7 @@
 #define pr_fmt(fmt) "(stll) :" fmt
 #include <linux/skbuff.h>
 #include <linux/module.h>
-#include "ti_wilink_st.h"
+#include <linux/ti_wilink_st.h>
 
 /**********************************************************************/
 /* internal functions */
diff --git a/drivers/staging/ti-st/ti_wilink_st.h b/drivers/staging/ti-st/ti_wilink_st.h
deleted file mode 100644
index 2a5acf599598..000000000000
--- a/drivers/staging/ti-st/ti_wilink_st.h
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- *  Shared Transport Header file
- *	To be included by the protocol stack drivers for
- *	Texas Instruments BT,FM and GPS combo chip drivers
- *	and also serves the sub-modules of the shared transport driver.
- *
- *  Copyright (C) 2009-2010 Texas Instruments
- *  Author: Pavan Savoy <pavan_savoy@ti.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef TI_WILINK_ST_H
-#define TI_WILINK_ST_H
-
-/**
- * enum kim_gpio_state - Few protocols such as FM have ACTIVE LOW
- *	gpio states for their chip/core enable gpios
- */
-enum kim_gpio_state {
-	KIM_GPIO_INACTIVE,
-	KIM_GPIO_ACTIVE,
-};
-
-/**
- * enum proto-type - The protocol on WiLink chips which share a
- *	common physical interface like UART.
- */
-enum proto_type {
-	ST_BT,
-	ST_FM,
-	ST_GPS,
-	ST_MAX,
-};
-
-/**
- * struct st_proto_s - Per Protocol structure from BT/FM/GPS to ST
- * @type: type of the protocol being registered among the
- *	available proto_type(BT, FM, GPS the protocol which share TTY).
- * @recv: the receiver callback pointing to a function in the
- *	protocol drivers called by the ST driver upon receiving
- *	relevant data.
- * @match_packet: reserved for future use, to make ST more generic
- * @reg_complete_cb: callback handler pointing to a function in protocol
- *	handler called by ST when the pending registrations are complete.
- *	The registrations are marked pending, in situations when fw
- *	download is in progress.
- * @write: pointer to function in ST provided to protocol drivers from ST,
- *	to be made use when protocol drivers have data to send to TTY.
- * @priv_data: privdate data holder for the protocol drivers, sent
- *	from the protocol drivers during registration, and sent back on
- *	reg_complete_cb and recv.
- */
-struct st_proto_s {
-	enum proto_type type;
-	long (*recv) (void *, struct sk_buff *);
-	unsigned char (*match_packet) (const unsigned char *data);
-	void (*reg_complete_cb) (void *, char data);
-	long (*write) (struct sk_buff *skb);
-	void *priv_data;
-};
-
-extern long st_register(struct st_proto_s *);
-extern long st_unregister(enum proto_type);
-
-
-/*
- * header information used by st_core.c
- */
-
-/* states of protocol list */
-#define ST_NOTEMPTY	1
-#define ST_EMPTY	0
-
-/*
- * possible st_states
- */
-#define ST_INITIALIZING		1
-#define ST_REG_IN_PROGRESS	2
-#define ST_REG_PENDING		3
-#define ST_WAITING_FOR_RESP	4
-
-/**
- * struct st_data_s - ST core internal structure
- * @st_state: different states of ST like initializing, registration
- *	in progress, this is mainly used to return relevant err codes
- *	when protocol drivers are registering. It is also used to track
- *	the recv function, as in during fw download only HCI events
- *	can occur , where as during other times other events CH8, CH9
- *	can occur.
- * @tty: tty provided by the TTY core for line disciplines.
- * @ldisc_ops: the procedures that this line discipline registers with TTY.
- * @tx_skb: If for some reason the tty's write returns lesser bytes written
- *	then to maintain the rest of data to be written on next instance.
- *	This needs to be protected, hence the lock inside wakeup func.
- * @tx_state: if the data is being written onto the TTY and protocol driver
- *	wants to send more, queue up data and mark that there is
- *	more data to send.
- * @list: the list of protocols registered, only MAX can exist, one protocol
- *	can register only once.
- * @rx_state: states to be maintained inside st's tty receive
- * @rx_count: count to be maintained inside st's tty receieve
- * @rx_skb: the skb where all data for a protocol gets accumulated,
- *	since tty might not call receive when a complete event packet
- *	is received, the states, count and the skb needs to be maintained.
- * @txq: the list of skbs which needs to be sent onto the TTY.
- * @tx_waitq: if the chip is not in AWAKE state, the skbs needs to be queued
- *	up in here, PM(WAKEUP_IND) data needs to be sent and then the skbs
- *	from waitq can be moved onto the txq.
- *	Needs locking too.
- * @lock: the lock to protect skbs, queues, and ST states.
- * @protos_registered: count of the protocols registered, also when 0 the
- *	chip enable gpio can be toggled, and when it changes to 1 the fw
- *	needs to be downloaded to initialize chip side ST.
- * @ll_state: the various PM states the chip can be, the states are notified
- *	to us, when the chip sends relevant PM packets(SLEEP_IND, WAKE_IND).
- * @kim_data: reference to the parent encapsulating structure.
- *
- */
-struct st_data_s {
-	unsigned long st_state;
-	struct tty_struct *tty;
-	struct tty_ldisc_ops *ldisc_ops;
-	struct sk_buff *tx_skb;
-#define ST_TX_SENDING	1
-#define ST_TX_WAKEUP	2
-	unsigned long tx_state;
-	struct st_proto_s *list[ST_MAX];
-	unsigned long rx_state;
-	unsigned long rx_count;
-	struct sk_buff *rx_skb;
-	struct sk_buff_head txq, tx_waitq;
-	spinlock_t lock;
-	unsigned char	protos_registered;
-	unsigned long ll_state;
-	void *kim_data;
-};
-
-/**
- * st_int_write -
- * point this to tty->driver->write or tty->ops->write
- * depending upon the kernel version
- */
-int st_int_write(struct st_data_s*, const unsigned char*, int);
-
-/**
- * st_write -
- * internal write function, passed onto protocol drivers
- * via the write function ptr of protocol struct
- */
-long st_write(struct sk_buff *);
-
-/* function to be called from ST-LL */
-void st_ll_send_frame(enum proto_type, struct sk_buff *);
-
-/* internal wake up function */
-void st_tx_wakeup(struct st_data_s *st_data);
-
-/* init, exit entry funcs called from KIM */
-int st_core_init(struct st_data_s **);
-void st_core_exit(struct st_data_s *);
-
-/* ask for reference from KIM */
-void st_kim_ref(struct st_data_s **, int);
-
-#define GPS_STUB_TEST
-#ifdef GPS_STUB_TEST
-int gps_chrdrv_stub_write(const unsigned char*, int);
-void gps_chrdrv_stub_init(void);
-#endif
-
-/*
- * header information used by st_kim.c
- */
-
-/* time in msec to wait for
- * line discipline to be installed
- */
-#define LDISC_TIME	500
-#define CMD_RESP_TIME	500
-#define MAKEWORD(a, b)  ((unsigned short)(((unsigned char)(a)) \
-	| ((unsigned short)((unsigned char)(b))) << 8))
-
-#define GPIO_HIGH 1
-#define GPIO_LOW  0
-
-/* the Power-On-Reset logic, requires to attempt
- * to download firmware onto chip more than once
- * since the self-test for chip takes a while
- */
-#define POR_RETRY_COUNT 5
-
-/**
- * struct chip_version - save the chip version
- */
-struct chip_version {
-	unsigned short full;
-	unsigned short chip;
-	unsigned short min_ver;
-	unsigned short maj_ver;
-};
-
-/**
- * struct kim_data_s - the KIM internal data, embedded as the
- *	platform's drv data. One for each ST device in the system.
- * @uim_pid: KIM needs to communicate with UIM to request to install
- *	the ldisc by opening UART when protocol drivers register.
- * @kim_pdev: the platform device added in one of the board-XX.c file
- *	in arch/XX/ directory, 1 for each ST device.
- * @kim_rcvd: completion handler to notify when data was received,
- *	mainly used during fw download, which involves multiple send/wait
- *	for each of the HCI-VS commands.
- * @ldisc_installed: completion handler to notify that the UIM accepted
- *	the request to install ldisc, notify from tty_open which suggests
- *	the ldisc was properly installed.
- * @resp_buffer: data buffer for the .bts fw file name.
- * @fw_entry: firmware class struct to request/release the fw.
- * @gpios: the list of core/chip enable gpios for BT, FM and GPS cores.
- * @rx_state: the rx state for kim's receive func during fw download.
- * @rx_count: the rx count for the kim's receive func during fw download.
- * @rx_skb: all of fw data might not come at once, and hence data storage for
- *	whole of the fw response, only HCI_EVENTs and hence diff from ST's
- *	response.
- * @rfkill: rfkill data for each of the cores to be registered with rfkill.
- * @rf_protos: proto types of the data registered with rfkill sub-system.
- * @core_data: ST core's data, which mainly is the tty's disc_data
- * @version: chip version available via a sysfs entry.
- *
- */
-struct kim_data_s {
-	long uim_pid;
-	struct platform_device *kim_pdev;
-	struct completion kim_rcvd, ldisc_installed;
-	char resp_buffer[30];
-	const struct firmware *fw_entry;
-	long gpios[ST_MAX];
-	unsigned long rx_state;
-	unsigned long rx_count;
-	struct sk_buff *rx_skb;
-	struct rfkill *rfkill[ST_MAX];
-	enum proto_type rf_protos[ST_MAX];
-	struct st_data_s *core_data;
-	struct chip_version version;
-};
-
-/**
- * functions called when 1 of the protocol drivers gets
- * registered, these need to communicate with UIM to request
- * ldisc installed, read chip_version, download relevant fw
- */
-long st_kim_start(void *);
-long st_kim_stop(void *);
-
-void st_kim_recv(void *, const unsigned char *, long count);
-void st_kim_chip_toggle(enum proto_type, enum kim_gpio_state);
-void st_kim_complete(void *);
-void kim_st_list_protocols(struct st_data_s *, void *);
-
-/*
- * BTS headers
- */
-#define ACTION_SEND_COMMAND     1
-#define ACTION_WAIT_EVENT       2
-#define ACTION_SERIAL           3
-#define ACTION_DELAY            4
-#define ACTION_RUN_SCRIPT       5
-#define ACTION_REMARKS          6
-
-/**
- * struct bts_header - the fw file is NOT binary which can
- *	be sent onto TTY as is. The .bts is more a script
- *	file which has different types of actions.
- *	Each such action needs to be parsed by the KIM and
- *	relevant procedure to be called.
- */
-struct bts_header {
-	u32 magic;
-	u32 version;
-	u8 future[24];
-	u8 actions[0];
-} __attribute__ ((packed));
-
-/**
- * struct bts_action - Each .bts action has its own type of
- *	data.
- */
-struct bts_action {
-	u16 type;
-	u16 size;
-	u8 data[0];
-} __attribute__ ((packed));
-
-struct bts_action_send {
-	u8 data[0];
-} __attribute__ ((packed));
-
-struct bts_action_wait {
-	u32 msec;
-	u32 size;
-	u8 data[0];
-} __attribute__ ((packed));
-
-struct bts_action_delay {
-	u32 msec;
-} __attribute__ ((packed));
-
-struct bts_action_serial {
-	u32 baud;
-	u32 flow_control;
-} __attribute__ ((packed));
-
-/**
- * struct hci_command - the HCI-VS for intrepreting
- *	the change baud rate of host-side UART, which
- *	needs to be ignored, since UIM would do that
- *	when it receives request from KIM for ldisc installation.
- */
-struct hci_command {
-	u8 prefix;
-	u16 opcode;
-	u8 plen;
-	u32 speed;
-} __attribute__ ((packed));
-
-/*
- * header information used by st_ll.c
- */
-
-/* ST LL receiver states */
-#define ST_W4_PACKET_TYPE       0
-#define ST_BT_W4_EVENT_HDR      1
-#define ST_BT_W4_ACL_HDR        2
-#define ST_BT_W4_SCO_HDR        3
-#define ST_BT_W4_DATA           4
-#define ST_FM_W4_EVENT_HDR      5
-#define ST_GPS_W4_EVENT_HDR	6
-
-/* ST LL state machines */
-#define ST_LL_ASLEEP               0
-#define ST_LL_ASLEEP_TO_AWAKE      1
-#define ST_LL_AWAKE                2
-#define ST_LL_AWAKE_TO_ASLEEP      3
-#define ST_LL_INVALID		   4
-
-/* different PM notifications coming from chip */
-#define LL_SLEEP_IND	0x30
-#define LL_SLEEP_ACK	0x31
-#define LL_WAKE_UP_IND	0x32
-#define LL_WAKE_UP_ACK	0x33
-
-/* initialize and de-init ST LL */
-long st_ll_init(struct st_data_s *);
-long st_ll_deinit(struct st_data_s *);
-
-/**
- * enable/disable ST LL along with KIM start/stop
- * called by ST Core
- */
-void st_ll_enable(struct st_data_s *);
-void st_ll_disable(struct st_data_s *);
-
-/**
- * various funcs used by ST core to set/get the various PM states
- * of the chip.
- */
-unsigned long st_ll_getstate(struct st_data_s *);
-unsigned long st_ll_sleep_state(struct st_data_s *, unsigned char);
-void st_ll_wakeup(struct st_data_s *);
-
-/*
- * header information used by st_core.c for FM and GPS
- * packet parsing, the bluetooth headers are already available
- * at net/bluetooth/
- */
-
-struct fm_event_hdr {
-	u8 plen;
-} __attribute__ ((packed));
-
-#define FM_MAX_FRAME_SIZE 0xFF	/* TODO: */
-#define FM_EVENT_HDR_SIZE 1	/* size of fm_event_hdr */
-#define ST_FM_CH8_PKT 0x8
-
-/* gps stuff */
-struct gps_event_hdr {
-	u8 opcode;
-	u16 plen;
-} __attribute__ ((packed));
-
-#endif /* TI_WILINK_ST_H */
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
new file mode 100644
index 000000000000..2a5acf599598
--- /dev/null
+++ b/include/linux/ti_wilink_st.h
@@ -0,0 +1,402 @@
+/*
+ *  Shared Transport Header file
+ *	To be included by the protocol stack drivers for
+ *	Texas Instruments BT,FM and GPS combo chip drivers
+ *	and also serves the sub-modules of the shared transport driver.
+ *
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef TI_WILINK_ST_H
+#define TI_WILINK_ST_H
+
+/**
+ * enum kim_gpio_state - Few protocols such as FM have ACTIVE LOW
+ *	gpio states for their chip/core enable gpios
+ */
+enum kim_gpio_state {
+	KIM_GPIO_INACTIVE,
+	KIM_GPIO_ACTIVE,
+};
+
+/**
+ * enum proto-type - The protocol on WiLink chips which share a
+ *	common physical interface like UART.
+ */
+enum proto_type {
+	ST_BT,
+	ST_FM,
+	ST_GPS,
+	ST_MAX,
+};
+
+/**
+ * struct st_proto_s - Per Protocol structure from BT/FM/GPS to ST
+ * @type: type of the protocol being registered among the
+ *	available proto_type(BT, FM, GPS the protocol which share TTY).
+ * @recv: the receiver callback pointing to a function in the
+ *	protocol drivers called by the ST driver upon receiving
+ *	relevant data.
+ * @match_packet: reserved for future use, to make ST more generic
+ * @reg_complete_cb: callback handler pointing to a function in protocol
+ *	handler called by ST when the pending registrations are complete.
+ *	The registrations are marked pending, in situations when fw
+ *	download is in progress.
+ * @write: pointer to function in ST provided to protocol drivers from ST,
+ *	to be made use when protocol drivers have data to send to TTY.
+ * @priv_data: privdate data holder for the protocol drivers, sent
+ *	from the protocol drivers during registration, and sent back on
+ *	reg_complete_cb and recv.
+ */
+struct st_proto_s {
+	enum proto_type type;
+	long (*recv) (void *, struct sk_buff *);
+	unsigned char (*match_packet) (const unsigned char *data);
+	void (*reg_complete_cb) (void *, char data);
+	long (*write) (struct sk_buff *skb);
+	void *priv_data;
+};
+
+extern long st_register(struct st_proto_s *);
+extern long st_unregister(enum proto_type);
+
+
+/*
+ * header information used by st_core.c
+ */
+
+/* states of protocol list */
+#define ST_NOTEMPTY	1
+#define ST_EMPTY	0
+
+/*
+ * possible st_states
+ */
+#define ST_INITIALIZING		1
+#define ST_REG_IN_PROGRESS	2
+#define ST_REG_PENDING		3
+#define ST_WAITING_FOR_RESP	4
+
+/**
+ * struct st_data_s - ST core internal structure
+ * @st_state: different states of ST like initializing, registration
+ *	in progress, this is mainly used to return relevant err codes
+ *	when protocol drivers are registering. It is also used to track
+ *	the recv function, as in during fw download only HCI events
+ *	can occur , where as during other times other events CH8, CH9
+ *	can occur.
+ * @tty: tty provided by the TTY core for line disciplines.
+ * @ldisc_ops: the procedures that this line discipline registers with TTY.
+ * @tx_skb: If for some reason the tty's write returns lesser bytes written
+ *	then to maintain the rest of data to be written on next instance.
+ *	This needs to be protected, hence the lock inside wakeup func.
+ * @tx_state: if the data is being written onto the TTY and protocol driver
+ *	wants to send more, queue up data and mark that there is
+ *	more data to send.
+ * @list: the list of protocols registered, only MAX can exist, one protocol
+ *	can register only once.
+ * @rx_state: states to be maintained inside st's tty receive
+ * @rx_count: count to be maintained inside st's tty receieve
+ * @rx_skb: the skb where all data for a protocol gets accumulated,
+ *	since tty might not call receive when a complete event packet
+ *	is received, the states, count and the skb needs to be maintained.
+ * @txq: the list of skbs which needs to be sent onto the TTY.
+ * @tx_waitq: if the chip is not in AWAKE state, the skbs needs to be queued
+ *	up in here, PM(WAKEUP_IND) data needs to be sent and then the skbs
+ *	from waitq can be moved onto the txq.
+ *	Needs locking too.
+ * @lock: the lock to protect skbs, queues, and ST states.
+ * @protos_registered: count of the protocols registered, also when 0 the
+ *	chip enable gpio can be toggled, and when it changes to 1 the fw
+ *	needs to be downloaded to initialize chip side ST.
+ * @ll_state: the various PM states the chip can be, the states are notified
+ *	to us, when the chip sends relevant PM packets(SLEEP_IND, WAKE_IND).
+ * @kim_data: reference to the parent encapsulating structure.
+ *
+ */
+struct st_data_s {
+	unsigned long st_state;
+	struct tty_struct *tty;
+	struct tty_ldisc_ops *ldisc_ops;
+	struct sk_buff *tx_skb;
+#define ST_TX_SENDING	1
+#define ST_TX_WAKEUP	2
+	unsigned long tx_state;
+	struct st_proto_s *list[ST_MAX];
+	unsigned long rx_state;
+	unsigned long rx_count;
+	struct sk_buff *rx_skb;
+	struct sk_buff_head txq, tx_waitq;
+	spinlock_t lock;
+	unsigned char	protos_registered;
+	unsigned long ll_state;
+	void *kim_data;
+};
+
+/**
+ * st_int_write -
+ * point this to tty->driver->write or tty->ops->write
+ * depending upon the kernel version
+ */
+int st_int_write(struct st_data_s*, const unsigned char*, int);
+
+/**
+ * st_write -
+ * internal write function, passed onto protocol drivers
+ * via the write function ptr of protocol struct
+ */
+long st_write(struct sk_buff *);
+
+/* function to be called from ST-LL */
+void st_ll_send_frame(enum proto_type, struct sk_buff *);
+
+/* internal wake up function */
+void st_tx_wakeup(struct st_data_s *st_data);
+
+/* init, exit entry funcs called from KIM */
+int st_core_init(struct st_data_s **);
+void st_core_exit(struct st_data_s *);
+
+/* ask for reference from KIM */
+void st_kim_ref(struct st_data_s **, int);
+
+#define GPS_STUB_TEST
+#ifdef GPS_STUB_TEST
+int gps_chrdrv_stub_write(const unsigned char*, int);
+void gps_chrdrv_stub_init(void);
+#endif
+
+/*
+ * header information used by st_kim.c
+ */
+
+/* time in msec to wait for
+ * line discipline to be installed
+ */
+#define LDISC_TIME	500
+#define CMD_RESP_TIME	500
+#define MAKEWORD(a, b)  ((unsigned short)(((unsigned char)(a)) \
+	| ((unsigned short)((unsigned char)(b))) << 8))
+
+#define GPIO_HIGH 1
+#define GPIO_LOW  0
+
+/* the Power-On-Reset logic, requires to attempt
+ * to download firmware onto chip more than once
+ * since the self-test for chip takes a while
+ */
+#define POR_RETRY_COUNT 5
+
+/**
+ * struct chip_version - save the chip version
+ */
+struct chip_version {
+	unsigned short full;
+	unsigned short chip;
+	unsigned short min_ver;
+	unsigned short maj_ver;
+};
+
+/**
+ * struct kim_data_s - the KIM internal data, embedded as the
+ *	platform's drv data. One for each ST device in the system.
+ * @uim_pid: KIM needs to communicate with UIM to request to install
+ *	the ldisc by opening UART when protocol drivers register.
+ * @kim_pdev: the platform device added in one of the board-XX.c file
+ *	in arch/XX/ directory, 1 for each ST device.
+ * @kim_rcvd: completion handler to notify when data was received,
+ *	mainly used during fw download, which involves multiple send/wait
+ *	for each of the HCI-VS commands.
+ * @ldisc_installed: completion handler to notify that the UIM accepted
+ *	the request to install ldisc, notify from tty_open which suggests
+ *	the ldisc was properly installed.
+ * @resp_buffer: data buffer for the .bts fw file name.
+ * @fw_entry: firmware class struct to request/release the fw.
+ * @gpios: the list of core/chip enable gpios for BT, FM and GPS cores.
+ * @rx_state: the rx state for kim's receive func during fw download.
+ * @rx_count: the rx count for the kim's receive func during fw download.
+ * @rx_skb: all of fw data might not come at once, and hence data storage for
+ *	whole of the fw response, only HCI_EVENTs and hence diff from ST's
+ *	response.
+ * @rfkill: rfkill data for each of the cores to be registered with rfkill.
+ * @rf_protos: proto types of the data registered with rfkill sub-system.
+ * @core_data: ST core's data, which mainly is the tty's disc_data
+ * @version: chip version available via a sysfs entry.
+ *
+ */
+struct kim_data_s {
+	long uim_pid;
+	struct platform_device *kim_pdev;
+	struct completion kim_rcvd, ldisc_installed;
+	char resp_buffer[30];
+	const struct firmware *fw_entry;
+	long gpios[ST_MAX];
+	unsigned long rx_state;
+	unsigned long rx_count;
+	struct sk_buff *rx_skb;
+	struct rfkill *rfkill[ST_MAX];
+	enum proto_type rf_protos[ST_MAX];
+	struct st_data_s *core_data;
+	struct chip_version version;
+};
+
+/**
+ * functions called when 1 of the protocol drivers gets
+ * registered, these need to communicate with UIM to request
+ * ldisc installed, read chip_version, download relevant fw
+ */
+long st_kim_start(void *);
+long st_kim_stop(void *);
+
+void st_kim_recv(void *, const unsigned char *, long count);
+void st_kim_chip_toggle(enum proto_type, enum kim_gpio_state);
+void st_kim_complete(void *);
+void kim_st_list_protocols(struct st_data_s *, void *);
+
+/*
+ * BTS headers
+ */
+#define ACTION_SEND_COMMAND     1
+#define ACTION_WAIT_EVENT       2
+#define ACTION_SERIAL           3
+#define ACTION_DELAY            4
+#define ACTION_RUN_SCRIPT       5
+#define ACTION_REMARKS          6
+
+/**
+ * struct bts_header - the fw file is NOT binary which can
+ *	be sent onto TTY as is. The .bts is more a script
+ *	file which has different types of actions.
+ *	Each such action needs to be parsed by the KIM and
+ *	relevant procedure to be called.
+ */
+struct bts_header {
+	u32 magic;
+	u32 version;
+	u8 future[24];
+	u8 actions[0];
+} __attribute__ ((packed));
+
+/**
+ * struct bts_action - Each .bts action has its own type of
+ *	data.
+ */
+struct bts_action {
+	u16 type;
+	u16 size;
+	u8 data[0];
+} __attribute__ ((packed));
+
+struct bts_action_send {
+	u8 data[0];
+} __attribute__ ((packed));
+
+struct bts_action_wait {
+	u32 msec;
+	u32 size;
+	u8 data[0];
+} __attribute__ ((packed));
+
+struct bts_action_delay {
+	u32 msec;
+} __attribute__ ((packed));
+
+struct bts_action_serial {
+	u32 baud;
+	u32 flow_control;
+} __attribute__ ((packed));
+
+/**
+ * struct hci_command - the HCI-VS for intrepreting
+ *	the change baud rate of host-side UART, which
+ *	needs to be ignored, since UIM would do that
+ *	when it receives request from KIM for ldisc installation.
+ */
+struct hci_command {
+	u8 prefix;
+	u16 opcode;
+	u8 plen;
+	u32 speed;
+} __attribute__ ((packed));
+
+/*
+ * header information used by st_ll.c
+ */
+
+/* ST LL receiver states */
+#define ST_W4_PACKET_TYPE       0
+#define ST_BT_W4_EVENT_HDR      1
+#define ST_BT_W4_ACL_HDR        2
+#define ST_BT_W4_SCO_HDR        3
+#define ST_BT_W4_DATA           4
+#define ST_FM_W4_EVENT_HDR      5
+#define ST_GPS_W4_EVENT_HDR	6
+
+/* ST LL state machines */
+#define ST_LL_ASLEEP               0
+#define ST_LL_ASLEEP_TO_AWAKE      1
+#define ST_LL_AWAKE                2
+#define ST_LL_AWAKE_TO_ASLEEP      3
+#define ST_LL_INVALID		   4
+
+/* different PM notifications coming from chip */
+#define LL_SLEEP_IND	0x30
+#define LL_SLEEP_ACK	0x31
+#define LL_WAKE_UP_IND	0x32
+#define LL_WAKE_UP_ACK	0x33
+
+/* initialize and de-init ST LL */
+long st_ll_init(struct st_data_s *);
+long st_ll_deinit(struct st_data_s *);
+
+/**
+ * enable/disable ST LL along with KIM start/stop
+ * called by ST Core
+ */
+void st_ll_enable(struct st_data_s *);
+void st_ll_disable(struct st_data_s *);
+
+/**
+ * various funcs used by ST core to set/get the various PM states
+ * of the chip.
+ */
+unsigned long st_ll_getstate(struct st_data_s *);
+unsigned long st_ll_sleep_state(struct st_data_s *, unsigned char);
+void st_ll_wakeup(struct st_data_s *);
+
+/*
+ * header information used by st_core.c for FM and GPS
+ * packet parsing, the bluetooth headers are already available
+ * at net/bluetooth/
+ */
+
+struct fm_event_hdr {
+	u8 plen;
+} __attribute__ ((packed));
+
+#define FM_MAX_FRAME_SIZE 0xFF	/* TODO: */
+#define FM_EVENT_HDR_SIZE 1	/* size of fm_event_hdr */
+#define ST_FM_CH8_PKT 0x8
+
+/* gps stuff */
+struct gps_event_hdr {
+	u8 opcode;
+	u16 plen;
+} __attribute__ ((packed));
+
+#endif /* TI_WILINK_ST_H */
-- 
cgit v1.2.3


From 773e3f93577ffb493fb7c39b1a6ecf39b5748e87 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 5 Oct 2010 14:03:02 -0700
Subject: rcu: move check from rcu_dereference_bh to rcu_read_lock_bh_held

As suggested by Linus, push the irqs_disabled() down to the
rcu_read_lock_bh_held() level so that all callers get the benefit
of the correct check.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 kernel/rcupdate.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 83af1f8d8b74..9fbc54a2585d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * Makes rcu_dereference_check() do the dirty work.
  */
 #define rcu_dereference_bh(p) \
-		rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled())
+		rcu_dereference_check(p, rcu_read_lock_bh_held())
 
 /**
  * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4d169835fb36..0af1dc70fece 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -86,7 +86,7 @@ int rcu_read_lock_bh_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	return in_softirq();
+	return in_softirq() || irqs_disabled();
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
-- 
cgit v1.2.3


From caf586e5f23cebb2a68cbaf288d59dbbf2d74052 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Sep 2010 21:06:55 +0000
Subject: net: add a core netdev->rx_dropped counter

In various situations, a device provides a packet to our stack and we
drop it before it enters protocol stack :
- softnet backlog full (accounted in /proc/net/softnet_stat)
- bad vlan tag (not accounted)
- unknown/unregistered protocol (not accounted)

We can handle a per-device counter of such dropped frames at core level,
and automatically adds it to the device provided stats (rx_dropped), so
that standard tools can be used (ifconfig, ip link, cat /proc/net/dev)

This is a generalization of commit 8990f468a (net: rx_dropped
accounting), thus reverting it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    |  8 +-------
 include/linux/netdevice.h |  3 +++
 net/8021q/vlan.h          |  2 --
 net/8021q/vlan_core.c     |  2 ++
 net/8021q/vlan_dev.c      | 11 ++++-------
 net/core/dev.c            | 19 +++++++++++--------
 net/ipv4/ip_gre.c         |  3 +--
 net/ipv4/ipip.c           |  3 +--
 net/ipv6/ip6_tunnel.c     |  3 +--
 net/ipv6/ip6mr.c          |  3 +--
 net/ipv6/sit.c            |  3 +--
 11 files changed, 26 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4b0e30b564e5..2d9663a1c54d 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -64,7 +64,6 @@ struct pcpu_lstats {
 	u64			packets;
 	u64			bytes;
 	struct u64_stats_sync	syncp;
-	unsigned long		drops;
 };
 
 /*
@@ -90,8 +89,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 		lb_stats->bytes += len;
 		lb_stats->packets++;
 		u64_stats_update_end(&lb_stats->syncp);
-	} else
-		lb_stats->drops++;
+	}
 
 	return NETDEV_TX_OK;
 }
@@ -101,7 +99,6 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
 {
 	u64 bytes = 0;
 	u64 packets = 0;
-	u64 drops = 0;
 	int i;
 
 	for_each_possible_cpu(i) {
@@ -115,14 +112,11 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
 			tbytes = lb_stats->bytes;
 			tpackets = lb_stats->packets;
 		} while (u64_stats_fetch_retry(&lb_stats->syncp, start));
-		drops   += lb_stats->drops;
 		bytes   += tbytes;
 		packets += tpackets;
 	}
 	stats->rx_packets = packets;
 	stats->tx_packets = packets;
-	stats->rx_dropped = drops;
-	stats->rx_errors  = drops;
 	stats->rx_bytes   = bytes;
 	stats->tx_bytes   = bytes;
 	return stats;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 92d81edd5808..6abcef67b178 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -884,6 +884,9 @@ struct net_device {
 	int			iflink;
 
 	struct net_device_stats	stats;
+	atomic_long_t		rx_dropped; /* dropped packets by core network
+					     * Do not use this in drivers.
+					     */
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index b26ce343072c..8d9503ad01da 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -25,7 +25,6 @@ struct vlan_priority_tci_mapping {
  *	@rx_multicast: number of received multicast packets
  *	@syncp: synchronization point for 64bit counters
  *	@rx_errors: number of errors
- *	@rx_dropped: number of dropped packets
  */
 struct vlan_rx_stats {
 	u64			rx_packets;
@@ -33,7 +32,6 @@ struct vlan_rx_stats {
 	u64			rx_multicast;
 	struct u64_stats_sync	syncp;
 	unsigned long		rx_errors;
-	unsigned long		rx_dropped;
 };
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index b6d55a9304f2..dee727ce0291 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -33,6 +33,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return polling ? netif_receive_skb(skb) : netif_rx(skb);
 
 drop:
+	atomic_long_inc(&skb->dev->rx_dropped);
 	dev_kfree_skb_any(skb);
 	return NET_RX_DROP;
 }
@@ -123,6 +124,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 	return dev_gro_receive(napi, skb);
 
 drop:
+	atomic_long_inc(&skb->dev->rx_dropped);
 	return GRO_DROP;
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f6fbcc0f1af9..f54251edd40d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -225,16 +225,15 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
-	if (unlikely(netif_rx(skb) == NET_RX_DROP)) {
-		if (rx_stats)
-			rx_stats->rx_dropped++;
-	}
+	netif_rx(skb);
+
 	rcu_read_unlock();
 	return NET_RX_SUCCESS;
 
 err_unlock:
 	rcu_read_unlock();
 err_free:
+	atomic_long_inc(&dev->rx_dropped);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
@@ -846,15 +845,13 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
 			accum.rx_packets += rxpackets;
 			accum.rx_bytes   += rxbytes;
 			accum.rx_multicast += rxmulticast;
-			/* rx_errors, rx_dropped are ulong, not protected by syncp */
+			/* rx_errors is ulong, not protected by syncp */
 			accum.rx_errors  += p->rx_errors;
-			accum.rx_dropped += p->rx_dropped;
 		}
 		stats->rx_packets = accum.rx_packets;
 		stats->rx_bytes   = accum.rx_bytes;
 		stats->rx_errors  = accum.rx_errors;
 		stats->multicast  = accum.rx_multicast;
-		stats->rx_dropped = accum.rx_dropped;
 	}
 	return stats;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index ce6ad88c980b..7d149550e8d6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1483,8 +1483,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	skb_orphan(skb);
 	nf_reset(skb);
 
-	if (!(dev->flags & IFF_UP) ||
-	    (skb->len > (dev->mtu + dev->hard_header_len))) {
+	if (unlikely(!(dev->flags & IFF_UP) ||
+		     (skb->len > (dev->mtu + dev->hard_header_len)))) {
+		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -2548,6 +2549,7 @@ enqueue:
 
 	local_irq_restore(flags);
 
+	atomic_long_inc(&skb->dev->rx_dropped);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
@@ -2995,6 +2997,7 @@ ncls:
 	if (pt_prev) {
 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
+		atomic_long_inc(&skb->dev->rx_dropped);
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
@@ -5429,14 +5432,14 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 
 	if (ops->ndo_get_stats64) {
 		memset(storage, 0, sizeof(*storage));
-		return ops->ndo_get_stats64(dev, storage);
-	}
-	if (ops->ndo_get_stats) {
+		ops->ndo_get_stats64(dev, storage);
+	} else if (ops->ndo_get_stats) {
 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
-		return storage;
+	} else {
+		netdev_stats_to_stats64(storage, &dev->stats);
+		dev_txq_stats_fold(dev, storage);
 	}
-	netdev_stats_to_stats64(storage, &dev->stats);
-	dev_txq_stats_fold(dev, storage);
+	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fbe2c473a06a..9d421f4cf3ef 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -679,8 +679,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		skb_reset_network_header(skb);
 		ipgre_ecn_decapsulate(iph, skb);
 
-		if (netif_rx(skb) == NET_RX_DROP)
-			tunnel->dev->stats.rx_dropped++;
+		netif_rx(skb);
 
 		rcu_read_unlock();
 		return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 6ad46c28ede2..e9b816e6cd73 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -414,8 +414,7 @@ static int ipip_rcv(struct sk_buff *skb)
 
 		ipip_ecn_decapsulate(iph, skb);
 
-		if (netif_rx(skb) == NET_RX_DROP)
-			tunnel->dev->stats.rx_dropped++;
+		netif_rx(skb);
 
 		rcu_read_unlock();
 		return 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8be3c452af90..c2c0f89397b1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -768,8 +768,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 
 		dscp_ecn_decapsulate(t, ipv6h, skb);
 
-		if (netif_rx(skb) == NET_RX_DROP)
-			t->dev->stats.rx_dropped++;
+		netif_rx(skb);
 
 		rcu_read_unlock();
 		return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2640c9be589d..6f32ffce7022 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -666,8 +666,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	skb_tunnel_rx(skb, reg_dev);
 
-	if (netif_rx(skb) == NET_RX_DROP)
-		reg_dev->stats.rx_dropped++;
+	netif_rx(skb);
 
 	dev_put(reg_dev);
 	return 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d7701782b639..367a6cc584cc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -600,8 +600,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 
 		ipip6_ecn_decapsulate(iph, skb);
 
-		if (netif_rx(skb) == NET_RX_DROP)
-			tunnel->dev->stats.rx_dropped++;
+		netif_rx(skb);
 
 		rcu_read_unlock();
 		return 0;
-- 
cgit v1.2.3


From d6bf781712a1d25cc8987036b3a48535b331eb91 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 4 Oct 2010 06:15:44 +0000
Subject: net neigh: RCU conversion of neigh hash table

David

This is the first step for RCU conversion of neigh code.

Next patches will convert hash_buckets[] and "struct neighbour" to RCU
protected objects.

Thanks

[PATCH net-next] net neigh: RCU conversion of neigh hash table

Instead of storing hash_buckets, hash_mask and hash_rnd in "struct
neigh_table", a new structure is defined :

struct neigh_hash_table {
       struct neighbour        **hash_buckets;
       unsigned int            hash_mask;
       __u32                   hash_rnd;
       struct rcu_head         rcu;
};

And "struct neigh_table" has an RCU protected pointer to such a
neigh_hash_table.

This means the signature of (*hash)() function changed: We need to add a
third parameter with the actual hash_rnd value, since this is not
anymore a neigh_table field.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  16 +++-
 net/atm/clip.c          |   4 +-
 net/core/neighbour.c    | 219 ++++++++++++++++++++++++++++++------------------
 net/decnet/dn_neigh.c   |  13 +--
 net/ipv4/arp.c          |   8 +-
 net/ipv6/ndisc.c        |  10 ++-
 6 files changed, 170 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 7d08fd1062f0..37845dae6488 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -138,13 +138,22 @@ struct pneigh_entry {
  *	neighbour table manipulation
  */
 
+struct neigh_hash_table {
+	struct neighbour	**hash_buckets;
+	unsigned int		hash_mask;
+	__u32			hash_rnd;
+	struct rcu_head		rcu;
+};
+
 
 struct neigh_table {
 	struct neigh_table	*next;
 	int			family;
 	int			entry_size;
 	int			key_len;
-	__u32			(*hash)(const void *pkey, const struct net_device *);
+	__u32			(*hash)(const void *pkey,
+					const struct net_device *dev,
+					__u32 hash_rnd);
 	int			(*constructor)(struct neighbour *);
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
@@ -165,9 +174,7 @@ struct neigh_table {
 	unsigned long		last_rand;
 	struct kmem_cache	*kmem_cachep;
 	struct neigh_statistics	__percpu *stats;
-	struct neighbour	**hash_buckets;
-	unsigned int		hash_mask;
-	__u32			hash_rnd;
+	struct neigh_hash_table __rcu *nht;
 	struct pneigh_entry	**phash_buckets;
 };
 
@@ -237,6 +244,7 @@ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_en
 struct neigh_seq_state {
 	struct seq_net_private p;
 	struct neigh_table *tbl;
+	struct neigh_hash_table *nht;
 	void *(*neigh_sub_iter)(struct neigh_seq_state *state,
 				struct neighbour *n, loff_t *pos);
 	unsigned int bucket;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 95fdd1185067..ff956d1115bc 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -310,9 +310,9 @@ static int clip_constructor(struct neighbour *neigh)
 	return 0;
 }
 
-static u32 clip_hash(const void *pkey, const struct net_device *dev)
+static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
 {
-	return jhash_2words(*(u32 *) pkey, dev->ifindex, clip_tbl.hash_rnd);
+	return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
 }
 
 static struct neigh_table clip_tbl = {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d6996e072a41..dd8920e4f508 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -131,14 +131,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 {
 	int shrunk = 0;
 	int i;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 
 	write_lock_bh(&tbl->lock);
-	for (i = 0; i <= tbl->hash_mask; i++) {
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (i = 0; i <= nht->hash_mask; i++) {
 		struct neighbour *n, **np;
 
-		np = &tbl->hash_buckets[i];
+		np = &nht->hash_buckets[i];
 		while ((n = *np) != NULL) {
 			/* Neighbour record may be discarded if:
 			 * - nobody refers to it.
@@ -199,9 +202,13 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 {
 	int i;
+	struct neigh_hash_table *nht;
 
-	for (i = 0; i <= tbl->hash_mask; i++) {
-		struct neighbour *n, **np = &tbl->hash_buckets[i];
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+
+	for (i = 0; i <= nht->hash_mask; i++) {
+		struct neighbour *n, **np = &nht->hash_buckets[i];
 
 		while ((n = *np) != NULL) {
 			if (dev && n->dev != dev) {
@@ -297,64 +304,81 @@ out_entries:
 	goto out;
 }
 
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 {
-	unsigned long size = entries * sizeof(struct neighbour *);
-	struct neighbour **ret;
+	size_t size = entries * sizeof(struct neighbour *);
+	struct neigh_hash_table *ret;
+	struct neighbour **buckets;
 
-	if (size <= PAGE_SIZE) {
-		ret = kzalloc(size, GFP_ATOMIC);
-	} else {
-		ret = (struct neighbour **)
-		      __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+	if (!ret)
+		return NULL;
+	if (size <= PAGE_SIZE)
+		buckets = kzalloc(size, GFP_ATOMIC);
+	else
+		buckets = (struct neighbour **)
+			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+					   get_order(size));
+	if (!buckets) {
+		kfree(ret);
+		return NULL;
 	}
+	ret->hash_buckets = buckets;
+	ret->hash_mask = entries - 1;
+	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
 	return ret;
 }
 
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free_rcu(struct rcu_head *head)
 {
-	unsigned long size = entries * sizeof(struct neighbour *);
+	struct neigh_hash_table *nht = container_of(head,
+						    struct neigh_hash_table,
+						    rcu);
+	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+	struct neighbour **buckets = nht->hash_buckets;
 
 	if (size <= PAGE_SIZE)
-		kfree(hash);
+		kfree(buckets);
 	else
-		free_pages((unsigned long)hash, get_order(size));
+		free_pages((unsigned long)buckets, get_order(size));
+	kfree(nht);
 }
 
-static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+						unsigned long new_entries)
 {
-	struct neighbour **new_hash, **old_hash;
-	unsigned int i, new_hash_mask, old_entries;
+	unsigned int i, hash;
+	struct neigh_hash_table *new_nht, *old_nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 
 	BUG_ON(!is_power_of_2(new_entries));
-	new_hash = neigh_hash_alloc(new_entries);
-	if (!new_hash)
-		return;
+	old_nht = rcu_dereference_protected(tbl->nht,
+					    lockdep_is_held(&tbl->lock));
+	new_nht = neigh_hash_alloc(new_entries);
+	if (!new_nht)
+		return old_nht;
 
-	old_entries = tbl->hash_mask + 1;
-	new_hash_mask = new_entries - 1;
-	old_hash = tbl->hash_buckets;
-
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-	for (i = 0; i < old_entries; i++) {
+	for (i = 0; i <= old_nht->hash_mask; i++) {
 		struct neighbour *n, *next;
 
-		for (n = old_hash[i]; n; n = next) {
-			unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
+		for (n = old_nht->hash_buckets[i];
+		     n != NULL;
+		     n = next) {
+			hash = tbl->hash(n->primary_key, n->dev,
+					 new_nht->hash_rnd);
 
-			hash_val &= new_hash_mask;
+			hash &= new_nht->hash_mask;
 			next = n->next;
 
-			n->next = new_hash[hash_val];
-			new_hash[hash_val] = n;
+			n->next = new_nht->hash_buckets[hash];
+			new_nht->hash_buckets[hash] = n;
 		}
 	}
-	tbl->hash_buckets = new_hash;
-	tbl->hash_mask = new_hash_mask;
 
-	neigh_hash_free(old_hash, old_entries);
+	rcu_assign_pointer(tbl->nht, new_nht);
+	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+	return new_nht;
 }
 
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +387,23 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 	struct neighbour *n;
 	int key_len = tbl->key_len;
 	u32 hash_val;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-	read_lock_bh(&tbl->lock);
-	hash_val = tbl->hash(pkey, dev);
-	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+	read_lock(&tbl->lock);
+	for (n = nht->hash_buckets[hash_val]; n; n = n->next) {
 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 			neigh_hold(n);
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 	return n;
 }
 EXPORT_SYMBOL(neigh_lookup);
@@ -386,12 +414,15 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 	struct neighbour *n;
 	int key_len = tbl->key_len;
 	u32 hash_val;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-	read_lock_bh(&tbl->lock);
-	hash_val = tbl->hash(pkey, NULL);
-	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+	read_lock(&tbl->lock);
+	for (n = nht->hash_buckets[hash_val]; n; n = n->next) {
 		if (!memcmp(n->primary_key, pkey, key_len) &&
 		    net_eq(dev_net(n->dev), net)) {
 			neigh_hold(n);
@@ -399,7 +430,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 			break;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 	return n;
 }
 EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +443,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	int key_len = tbl->key_len;
 	int error;
 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neigh_hash_table *nht;
 
 	if (!n) {
 		rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +470,20 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
 
-	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
-		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
+		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
 
-	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
 
 	if (n->parms->dead) {
 		rc = ERR_PTR(-EINVAL);
 		goto out_tbl_unlock;
 	}
 
-	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+	for (n1 = nht->hash_buckets[hash_val]; n1; n1 = n1->next) {
 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 			neigh_hold(n1);
 			rc = n1;
@@ -456,8 +491,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
-	n->next = tbl->hash_buckets[hash_val];
-	tbl->hash_buckets[hash_val] = n;
+	n->next = nht->hash_buckets[hash_val];
+	nht->hash_buckets[hash_val] = n;
 	n->dead = 0;
 	neigh_hold(n);
 	write_unlock_bh(&tbl->lock);
@@ -698,10 +733,13 @@ static void neigh_periodic_work(struct work_struct *work)
 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 	struct neighbour *n, **np;
 	unsigned int i;
+	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
 	write_lock_bh(&tbl->lock);
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
 
 	/*
 	 *	periodically recompute ReachableTime from random function
@@ -715,8 +753,8 @@ static void neigh_periodic_work(struct work_struct *work)
 				neigh_rand_reach_time(p->base_reachable_time);
 	}
 
-	for (i = 0 ; i <= tbl->hash_mask; i++) {
-		np = &tbl->hash_buckets[i];
+	for (i = 0 ; i <= nht->hash_mask; i++) {
+		np = &nht->hash_buckets[i];
 
 		while ((n = *np) != NULL) {
 			unsigned int state;
@@ -1438,17 +1476,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot create neighbour proc dir entry");
 #endif
 
-	tbl->hash_mask = 1;
-	tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+	tbl->nht = neigh_hash_alloc(8);
 
 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
 
-	if (!tbl->hash_buckets || !tbl->phash_buckets)
+	if (!tbl->nht || !tbl->phash_buckets)
 		panic("cannot allocate neighbour cache hashes");
 
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
 	rwlock_init(&tbl->lock);
 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1504,8 +1539,8 @@ int neigh_table_clear(struct neigh_table *tbl)
 	}
 	write_unlock(&neigh_tbl_lock);
 
-	neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
-	tbl->hash_buckets = NULL;
+	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+	tbl->nht = NULL;
 
 	kfree(tbl->phash_buckets);
 	tbl->phash_buckets = NULL;
@@ -1745,18 +1780,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
 		unsigned long now = jiffies;
 		unsigned int flush_delta = now - tbl->last_flush;
 		unsigned int rand_delta = now - tbl->last_rand;
-
+		struct neigh_hash_table *nht;
 		struct ndt_config ndc = {
 			.ndtc_key_len		= tbl->key_len,
 			.ndtc_entry_size	= tbl->entry_size,
 			.ndtc_entries		= atomic_read(&tbl->entries),
 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
-			.ndtc_hash_rnd		= tbl->hash_rnd,
-			.ndtc_hash_mask		= tbl->hash_mask,
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
+		rcu_read_lock_bh();
+		nht = rcu_dereference_bh(tbl->nht);
+		ndc.ndtc_hash_rnd = nht->hash_rnd;
+		ndc.ndtc_hash_mask = nht->hash_mask;
+		rcu_read_unlock_bh();
+
 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
 	}
 
@@ -2088,14 +2127,18 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
+	struct neigh_hash_table *nht;
 
-	read_lock_bh(&tbl->lock);
-	for (h = 0; h <= tbl->hash_mask; h++) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	read_lock(&tbl->lock);
+	for (h = 0; h <= nht->hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+		for (n = nht->hash_buckets[h], idx = 0; n; n = n->next) {
 			if (!net_eq(dev_net(n->dev), net))
 				continue;
 			if (idx < s_idx)
@@ -2104,7 +2147,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
 					    NLM_F_MULTI) <= 0) {
-				read_unlock_bh(&tbl->lock);
 				rc = -1;
 				goto out;
 			}
@@ -2112,9 +2154,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			idx++;
 		}
 	}
-	read_unlock_bh(&tbl->lock);
 	rc = skb->len;
 out:
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 	cb->args[1] = h;
 	cb->args[2] = idx;
 	return rc;
@@ -2147,15 +2190,20 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
 {
 	int chain;
+	struct neigh_hash_table *nht;
 
-	read_lock_bh(&tbl->lock);
-	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+	rcu_read_lock_bh();
+	nht = rcu_dereference_bh(tbl->nht);
+
+	read_lock(&tbl->lock);
+	for (chain = 0; chain <= nht->hash_mask; chain++) {
 		struct neighbour *n;
 
-		for (n = tbl->hash_buckets[chain]; n; n = n->next)
+		for (n = nht->hash_buckets[chain]; n; n = n->next)
 			cb(n, cookie);
 	}
-	read_unlock_bh(&tbl->lock);
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_for_each);
 
@@ -2164,11 +2212,14 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 			      int (*cb)(struct neighbour *))
 {
 	int chain;
+	struct neigh_hash_table *nht;
 
-	for (chain = 0; chain <= tbl->hash_mask; chain++) {
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	for (chain = 0; chain <= nht->hash_mask; chain++) {
 		struct neighbour *n, **np;
 
-		np = &tbl->hash_buckets[chain];
+		np = &nht->hash_buckets[chain];
 		while ((n = *np) != NULL) {
 			int release;
 
@@ -2193,13 +2244,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct neigh_table *tbl = state->tbl;
+	struct neigh_hash_table *nht = state->nht;
 	struct neighbour *n = NULL;
 	int bucket = state->bucket;
 
 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
-	for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
-		n = tbl->hash_buckets[bucket];
+	for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+		n = nht->hash_buckets[bucket];
 
 		while (n) {
 			if (!net_eq(dev_net(n->dev), net))
@@ -2234,7 +2285,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 {
 	struct neigh_seq_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct neigh_table *tbl = state->tbl;
+	struct neigh_hash_table *nht = state->nht;
 
 	if (state->neigh_sub_iter) {
 		void *v = state->neigh_sub_iter(state, n, pos);
@@ -2265,10 +2316,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 		if (n)
 			break;
 
-		if (++state->bucket > tbl->hash_mask)
+		if (++state->bucket > nht->hash_mask)
 			break;
 
-		n = tbl->hash_buckets[state->bucket];
+		n = nht->hash_buckets[state->bucket];
 	}
 
 	if (n && pos)
@@ -2367,6 +2418,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
 
 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
 	__acquires(tbl->lock)
+	__acquires(rcu_bh)
 {
 	struct neigh_seq_state *state = seq->private;
 
@@ -2374,8 +2426,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
 	state->bucket = 0;
 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
 
-	read_lock_bh(&tbl->lock);
-
+	rcu_read_lock_bh();
+	state->nht = rcu_dereference_bh(tbl->nht);
+	read_lock(&tbl->lock);
 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
 }
 EXPORT_SYMBOL(neigh_seq_start);
@@ -2409,11 +2462,13 @@ EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
 	__releases(tbl->lock)
+	__releases(rcu_bh)
 {
 	struct neigh_seq_state *state = seq->private;
 	struct neigh_table *tbl = state->tbl;
 
-	read_unlock_bh(&tbl->lock);
+	read_unlock(&tbl->lock);
+	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_seq_stop);
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 0363bb95cc7d..a085dbcf5c7f 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -48,7 +48,6 @@
 #include <net/dn_neigh.h>
 #include <net/dn_route.h>
 
-static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev);
 static int dn_neigh_construct(struct neighbour *);
 static void dn_long_error_report(struct neighbour *, struct sk_buff *);
 static void dn_short_error_report(struct neighbour *, struct sk_buff *);
@@ -93,6 +92,13 @@ static const struct neigh_ops dn_phase3_ops = {
 	.queue_xmit =		dev_queue_xmit
 };
 
+static u32 dn_neigh_hash(const void *pkey,
+			 const struct net_device *dev,
+			 __u32 hash_rnd)
+{
+	return jhash_2words(*(__u16 *)pkey, 0, hash_rnd);
+}
+
 struct neigh_table dn_neigh_table = {
 	.family =			PF_DECnet,
 	.entry_size =			sizeof(struct dn_neigh),
@@ -122,11 +128,6 @@ struct neigh_table dn_neigh_table = {
 	.gc_thresh3 =			1024,
 };
 
-static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev)
-{
-	return jhash_2words(*(__u16 *)pkey, 0, dn_neigh_table.hash_rnd);
-}
-
 static int dn_neigh_construct(struct neighbour *neigh)
 {
 	struct net_device *dev = neigh->dev;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d9031ad67826..f35309578170 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
 /*
  *	Interface to generic neighbour cache.
  */
-static u32 arp_hash(const void *pkey, const struct net_device *dev);
+static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd);
 static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -225,9 +225,11 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
 }
 
 
-static u32 arp_hash(const void *pkey, const struct net_device *dev)
+static u32 arp_hash(const void *pkey,
+		    const struct net_device *dev,
+		    __u32 hash_rnd)
 {
-	return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd);
+	return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd);
 }
 
 static int arp_constructor(struct neighbour *neigh)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3dd844cd34f..998d6d27e7cf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -91,7 +91,9 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
-static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
+static u32 ndisc_hash(const void *pkey,
+		      const struct net_device *dev,
+		      __u32 rnd);
 static int ndisc_constructor(struct neighbour *neigh);
 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -350,7 +352,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 
 EXPORT_SYMBOL(ndisc_mc_map);
 
-static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
+static u32 ndisc_hash(const void *pkey,
+		      const struct net_device *dev,
+		      __u32 hash_rnd)
 {
 	const u32 *p32 = pkey;
 	u32 addr_hash, i;
@@ -359,7 +363,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
 	for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
 		addr_hash ^= *p32++;
 
-	return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
+	return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
 }
 
 static int ndisc_constructor(struct neighbour *neigh)
-- 
cgit v1.2.3


From 782bc950d84e404422ba21008fd51ee894c8d231 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 30 Sep 2010 13:56:32 +0000
Subject: dmaengine: add possibility for cyclic transfers

Cyclic transfers are useful for audio where a single buffer divided
in periods has to be transfered endlessly until stopped. After being
prepared the transfer is started using the dma_async_descriptor->tx_submit
function. dma_async_descriptor->callback is called after each period.
The transfer is stopped using the DMA_TERMINATE_ALL callback.
While being used for cyclic transfers the channel cannot be used
for other transfer types.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 2 ++
 include/linux/dmaengine.h | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5eb95c1..e5e79ced4f4b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -692,6 +692,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
+	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
+		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c61d4ca27bcc..32cd84b47478 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -67,10 +67,11 @@ enum dma_transaction_type {
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
+	DMA_CYCLIC,
 };
 
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_SLAVE + 1)
+#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
 
 /**
@@ -422,6 +423,9 @@ struct dma_tx_state {
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
+ * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
+ *	The function takes a buffer of size buf_len. The callback function will
+ *	be called after period_len bytes have been transferred.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -478,6 +482,9 @@ struct dma_device {
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
-- 
cgit v1.2.3


From 6e3ecaf0ad49de0bed829d409a164e7107c02993 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 30 Sep 2010 13:56:33 +0000
Subject: dmaengine: add wrapper functions for device control functions

Add wrapper functions around the dma_device->device_control function
to bring back type safety. Also, add a wrapper function around
dma_async_tx_descriptor->tx_submit. This is named dmaengine_submit
instead of dmaengine_tx_submit to get rid of the confusing 'tx' in the
function name

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 32cd84b47478..2218fdcbe8a9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -494,6 +494,40 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline int dmaengine_device_control(struct dma_chan *chan,
+					   enum dma_ctrl_cmd cmd,
+					   unsigned long arg)
+{
+	return chan->device->device_control(chan, cmd, arg);
+}
+
+static inline int dmaengine_slave_config(struct dma_chan *chan,
+					  struct dma_slave_config *config)
+{
+	return dmaengine_device_control(chan, DMA_SLAVE_CONFIG,
+			(unsigned long)config);
+}
+
+static inline int dmaengine_terminate_all(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static inline int dmaengine_pause(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_PAUSE, 0);
+}
+
+static inline int dmaengine_resume(struct dma_chan *chan)
+{
+	return dmaengine_device_control(chan, DMA_RESUME, 0);
+}
+
+static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc)
+{
+	return desc->tx_submit(desc);
+}
+
 static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
 {
 	size_t mask;
-- 
cgit v1.2.3


From 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 30 Sep 2010 12:36:45 +0200
Subject: drm/ttm: Fix two race conditions + fix busy codepaths

This fixes a race pointed out by Dave Airlie where we don't take a buffer
object about to be destroyed off the LRU lists properly. It also fixes a rare
case where a buffer object could be destroyed in the middle of an
accelerated eviction.

The patch also adds a utility function that can be used to prematurely
release GPU memory space usage of an object waiting to be destroyed.
For example during eviction or swapout.

The above mentioned commit didn't queue the buffer on the delayed destroy
list under some rare circumstances. It also didn't completely honor the
remove_all parameter.

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=615505
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 83 +++++++++++++++++++++++++++++++++++++-------
 include/drm/ttm/ttm_bo_api.h |  4 ++-
 2 files changed, 74 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb4cf7ef4d1e..db809e034cc4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -441,6 +441,43 @@ out_err:
 	return ret;
 }
 
+/**
+ * Call bo::reserved and with the lru lock held.
+ * Will release GPU memory type usage on destruction.
+ * This is the place to put in driver specific hooks.
+ * Will release the bo::reserved lock and the
+ * lru lock on exit.
+ */
+
+static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_global *glob = bo->glob;
+
+	if (bo->ttm) {
+
+		/**
+		 * Release the lru_lock, since we don't want to have
+		 * an atomic requirement on ttm_tt[unbind|destroy].
+		 */
+
+		spin_unlock(&glob->lru_lock);
+		ttm_tt_unbind(bo->ttm);
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+		spin_lock(&glob->lru_lock);
+	}
+
+	if (bo->mem.mm_node) {
+		drm_mm_put_block(bo->mem.mm_node);
+		bo->mem.mm_node = NULL;
+	}
+
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+	spin_unlock(&glob->lru_lock);
+}
+
+
 /**
  * If bo idle, remove from delayed- and lru lists, and unref.
  * If not idle, and already on delayed list, do nothing.
@@ -456,6 +493,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 	int ret;
 
 	spin_lock(&bo->lock);
+retry:
 	(void) ttm_bo_wait(bo, false, false, !remove_all);
 
 	if (!bo->sync_obj) {
@@ -464,31 +502,52 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		spin_unlock(&bo->lock);
 
 		spin_lock(&glob->lru_lock);
-		put_count = ttm_bo_del_from_lru(bo);
+		ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
+
+		/**
+		 * Someone else has the object reserved. Bail and retry.
+		 */
 
-		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
-		BUG_ON(ret);
-		if (bo->ttm)
-			ttm_tt_unbind(bo->ttm);
+		if (unlikely(ret == -EBUSY)) {
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			goto requeue;
+		}
+
+		/**
+		 * We can re-check for sync object without taking
+		 * the bo::lock since setting the sync object requires
+		 * also bo::reserved. A busy object at this point may
+		 * be caused by another thread starting an accelerated
+		 * eviction.
+		 */
+
+		if (unlikely(bo->sync_obj)) {
+			atomic_set(&bo->reserved, 0);
+			wake_up_all(&bo->event_queue);
+			spin_unlock(&glob->lru_lock);
+			spin_lock(&bo->lock);
+			if (remove_all)
+				goto retry;
+			else
+				goto requeue;
+		}
+
+		put_count = ttm_bo_del_from_lru(bo);
 
 		if (!list_empty(&bo->ddestroy)) {
 			list_del_init(&bo->ddestroy);
 			++put_count;
 		}
-		if (bo->mem.mm_node) {
-			drm_mm_put_block(bo->mem.mm_node);
-			bo->mem.mm_node = NULL;
-		}
-		spin_unlock(&glob->lru_lock);
 
-		atomic_set(&bo->reserved, 0);
+		ttm_bo_cleanup_memtype_use(bo);
 
 		while (put_count--)
 			kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
 		return 0;
 	}
-
+requeue:
 	spin_lock(&glob->lru_lock);
 	if (list_empty(&bo->ddestroy)) {
 		void *sync_obj = bo->sync_obj;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c74e2e..2040e6c4f172 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
 
 	atomic_t reserved;
 
-
 	/**
 	 * Members protected by the bo::lock
+	 * In addition, setting sync_obj to anything else
+	 * than NULL requires bo::reserved to be held. This allows for
+	 * checking NULL while reserved but not holding bo::lock.
 	 */
 
 	void *sync_obj_arg;
-- 
cgit v1.2.3


From 02b001624f0384540299d9288fdaf37b7d37c814 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 5 Oct 2010 12:43:02 +0200
Subject: drm: vmwgfx: Add a struct drm_file parameter to the dirty framebuffer
 callback

This is needed for the callback to identify the caller and take
appropriate locks if needed.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c          | 3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 ++
 include/drm/drm_crtc.h              | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 37e0b4fa482a..6985cb1da72c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1854,7 +1854,8 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
 	}
 
 	if (fb->funcs->dirty) {
-		ret = fb->funcs->dirty(fb, flags, r->color, clips, num_clips);
+		ret = fb->funcs->dirty(fb, file_priv, flags, r->color,
+				       clips, num_clips);
 	} else {
 		ret = -ENOSYS;
 		goto out_err2;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index f30223cafadb..073b3e1c9cc9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -392,6 +392,7 @@ out_unlock:
 
 
 int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
+				  struct drm_file *file_priv,
 				  unsigned flags, unsigned color,
 				  struct drm_clip_rect *clips,
 				  unsigned num_clips)
@@ -583,6 +584,7 @@ void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
 }
 
 int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
+				 struct drm_file *file_priv,
 				 unsigned flags, unsigned color,
 				 struct drm_clip_rect *clips,
 				 unsigned num_clips)
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3e5a51af757c..15c4796fd467 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -221,7 +221,8 @@ struct drm_framebuffer_funcs {
 	 * the semantics and arguments have a one to one mapping
 	 * on this function.
 	 */
-	int (*dirty)(struct drm_framebuffer *framebuffer, unsigned flags,
+	int (*dirty)(struct drm_framebuffer *framebuffer,
+		     struct drm_file *file_priv, unsigned flags,
 		     unsigned color, struct drm_clip_rect *clips,
 		     unsigned num_clips);
 };
-- 
cgit v1.2.3


From 30f47fc85d524d25d63da9e6d77e55ab99c6cc4a Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 5 Oct 2010 12:43:06 +0200
Subject: drm/vmwgfx: Add a parameter to get the max fb size

This can be used by the X server to restrict mode resolutions and size of
root pixmap.

Bump minor to announce this availability.
Bump driver date.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   | 4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 3 +++
 include/drm/vmwgfx_drm.h              | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a10d0ad31036..217ba1fe5dd7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,9 +39,9 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20100209"
+#define VMWGFX_DRIVER_DATE "20100723"
 #define VMWGFX_DRIVER_MAJOR 1
-#define VMWGFX_DRIVER_MINOR 2
+#define VMWGFX_DRIVER_MINOR 3
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 1c7a316454d8..570d57775a58 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -54,6 +54,9 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_FIFO_CAPS:
 		param->value = dev_priv->fifo.capabilities;
 		break;
+	case DRM_VMW_PARAM_MAX_FB_SIZE:
+		param->value = dev_priv->vram_size;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index 4d0842391edc..650e6bf6f69f 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -72,6 +72,7 @@
 #define DRM_VMW_PARAM_FIFO_OFFSET      3
 #define DRM_VMW_PARAM_HW_CAPS          4
 #define DRM_VMW_PARAM_FIFO_CAPS        5
+#define DRM_VMW_PARAM_MAX_FB_SIZE      6
 
 /**
  * struct drm_vmw_getparam_arg
-- 
cgit v1.2.3


From 413d45d3627be4748058dea697718ed6fb88bd01 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Sun, 26 Sep 2010 06:47:25 -0500
Subject: drm, kdb, kms: Add an enter argument to mode_set_base_atomic() API

Some devices such as the radeon chips receive information from user
space which needs to be saved when executing an atomic mode set
operation, else the user space would have to be queried again for the
information.

This patch extends the mode_set_base_atomic() call to pass an argument
to indicate if this is an entry or an exit from an atomic kernel mode
set change.  Individual drm drivers can properly save and restore
state accordingly.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: David Airlie <airlied@linux.ie>
CC: dri-devel@lists.freedesktop.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c             | 5 +++--
 drivers/gpu/drm/i915/intel_display.c        | 4 ++--
 drivers/gpu/drm/nouveau/nv04_crtc.c         | 2 +-
 drivers/gpu/drm/nouveau/nv50_crtc.c         | 2 +-
 drivers/gpu/drm/radeon/atombios_crtc.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 2 +-
 drivers/gpu/drm/radeon/radeon_mode.h        | 4 ++--
 include/drm/drm_crtc_helper.h               | 3 ++-
 8 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 6a5e403f9aa1..625a2d551d6a 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -263,7 +263,8 @@ int drm_fb_helper_debug_enter(struct fb_info *info)
 			funcs->mode_set_base_atomic(mode_set->crtc,
 						    mode_set->fb,
 						    mode_set->x,
-						    mode_set->y);
+						    mode_set->y,
+						    1);
 
 		}
 	}
@@ -309,7 +310,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 		}
 
 		funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x,
-					    crtc->y);
+					    crtc->y, 0);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 69c54c5a4254..9109c00f3ead 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1492,7 +1492,7 @@ err_unpin:
 /* Assume fb object is pinned & idle & fenced and just update base pointers */
 static int
 intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			   int x, int y)
+			   int x, int y, int enter)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1614,7 +1614,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 			   atomic_read(&obj_priv->pending_flip) == 0);
 	}
 
-	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y);
+	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, 0);
 	if (ret) {
 		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
 		mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index f5bbd46f76bc..fb669dd39c3c 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -858,7 +858,7 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 static int
 nv04_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
-			       int x, int y)
+			       int x, int y, int enter)
 {
 	return nv04_crtc_do_mode_set_base(crtc, fb, x, y, true);
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index f41b44864e80..727a7a12fed9 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -712,7 +712,7 @@ nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 static int
 nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
-			       int x, int y)
+			       int x, int y, int enter)
 {
 	return nv50_crtc_do_mode_set_base(crtc, fb, x, y, true, true);
 }
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 2ab9b360d3c9..501e5286ec3f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1180,7 +1180,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 
 int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
                                   struct drm_framebuffer *fb,
-                                  int x, int y)
+                                  int x, int y, int enter)
 {
        struct drm_device *dev = crtc->dev;
        struct radeon_device *rdev = dev->dev_private;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index bfa090e1f512..8752d3447b72 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -353,7 +353,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 
 int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
-				int x, int y)
+				int x, int y, int enter)
 {
 	return radeon_crtc_do_set_base(crtc, fb, x, y, 1);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 9dd27c23a798..c4116d3d8d06 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -516,7 +516,7 @@ extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 				   struct drm_framebuffer *old_fb);
 extern int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
 					 struct drm_framebuffer *fb,
-					 int x, int y);
+					 int x, int y, int enter);
 extern int atombios_crtc_mode_set(struct drm_crtc *crtc,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode,
@@ -528,7 +528,7 @@ extern int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 				 struct drm_framebuffer *old_fb);
 extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
 				       struct drm_framebuffer *fb,
-				       int x, int y);
+				       int x, int y, int enter);
 extern int radeon_crtc_do_set_base(struct drm_crtc *crtc,
 				   struct drm_framebuffer *fb,
 				   int x, int y, int atomic);
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 59b7073b13fe..6a9f3935ea0b 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -61,7 +61,8 @@ struct drm_crtc_helper_funcs {
 	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
 			     struct drm_framebuffer *old_fb);
 	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
-				    struct drm_framebuffer *fb, int x, int y);
+				    struct drm_framebuffer *fb, int x, int y,
+				    int is_enter);
 
 	/* reload the current crtc LUT */
 	void (*load_lut)(struct drm_crtc *crtc);
-- 
cgit v1.2.3


From c2952c314b4fe61820ba8fd6c949eed636140d52 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Tue, 5 Oct 2010 14:23:59 +0000
Subject: bonding: add retransmit membership reports tunable

Allow sysadmins to configure the number of multicast
membership report sent on a link failure event.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt |  8 +++++++
 drivers/net/bonding/bond_main.c      | 15 ++++++++++++
 drivers/net/bonding/bond_sysfs.c     | 44 ++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bonding.h        |  2 ++
 include/linux/if_bonding.h           |  3 +++
 5 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d2b62b71b617..5dc638791d97 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -765,6 +765,14 @@ xmit_hash_policy
 	does not exist, and the layer2 policy is the only policy.  The
 	layer2+3 value was added for bonding version 3.2.2.
 
+resend_igmp
+
+	Specifies the number of IGMP membership reports to be issued after
+	a failover event. One membership report is issued immediately after
+	the failover, subsequent packets are sent in each 200ms interval.
+
+	The valid range is 0 - 255; the default value is 1. This option
+	was added for bonding version 3.7.0.
 
 3. Configuring Bonding Devices
 ==============================
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ad6386671f28..6f5e6b453da6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -109,6 +109,7 @@ static char *arp_validate;
 static char *fail_over_mac;
 static int all_slaves_active = 0;
 static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
 				     "by setting active flag for all slaves.  "
 				     "0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -905,6 +908,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 		}
 	}
 
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
 	read_unlock(&bond->lock);
 }
 
@@ -1213,6 +1219,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 	 * all were sent on curr_active_slave */
 	if ((USES_PRIMARY(bond->params.mode) && new_active) ||
 	    bond->params.mode == BOND_MODE_ROUNDROBIN) {
+		bond->igmp_retrans = bond->params.resend_igmp;
 		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
@@ -4933,6 +4940,13 @@ static int bond_check_params(struct bond_params *params)
 		all_slaves_active = 0;
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		pr_warning("Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
+
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5105,6 +5119,7 @@ static int bond_check_params(struct bond_params *params)
 	params->fail_over_mac = fail_over_mac_value;
 	params->tx_queues = tx_queues;
 	params->all_slaves_active = all_slaves_active;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c311aed9bd02..01b4c3f5d9e7 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1592,6 +1592,49 @@ out:
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
 		   bonding_show_slaves_active, bonding_store_slaves_active);
 
+/*
+ * Show and set the number of IGMP membership reports to send on link failure
+ */
+static ssize_t bonding_show_resend_igmp(struct device *d,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.resend_igmp);
+}
+
+static ssize_t bonding_store_resend_igmp(struct device *d,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		pr_err("%s: no resend_igmp value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (new_value < 0) {
+		pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n",
+		       bond->dev->name, new_value);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pr_info("%s: Setting resend_igmp to %d.\n",
+		bond->dev->name, new_value);
+	bond->params.resend_igmp = new_value;
+out:
+	return ret;
+}
+
+static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+
 static struct attribute *per_bond_attrs[] = {
 	&dev_attr_slaves.attr,
 	&dev_attr_mode.attr,
@@ -1619,6 +1662,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_ad_partner_mac.attr,
 	&dev_attr_queue_id.attr,
 	&dev_attr_all_slaves_active.attr,
+	&dev_attr_resend_igmp.attr,
 	NULL,
 };
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 308ed10dca90..c15f21347486 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -136,6 +136,7 @@ struct bond_params {
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
 	int tx_queues;
 	int all_slaves_active;
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -202,6 +203,7 @@ struct bonding {
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
 	s8	 setup_by_slave;
+	s8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
 	struct   proc_dir_entry *proc_entry;
 	char     proc_file_name[IFNAMSIZ];
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 2c7994372bde..a17edda8a781 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -84,6 +84,9 @@
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports */
+
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
-- 
cgit v1.2.3


From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 5 Oct 2010 10:41:36 +0000
Subject: fib: RCU conversion of fib_lookup()

fib_lookup() converted to be called in RCU protected context, no
reference taken and released on a contended cache line (fib_clntref)

fib_table_lookup() and fib_semantic_match() get an additional parameter.

struct fib_info gets an rcu_head field, and is freed after an rcu grace
period.

Stress test :
(Sending 160.000.000 UDP frames on same neighbour,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_HASH) (about same results for FIB_TRIE)

Before patch :

real	1m31.199s
user	0m13.761s
sys	23m24.780s

After patch:

real	1m5.375s
user	0m14.997s
sys	15m50.115s

Before patch Profile :

13044.00 15.4% __ip_route_output_key vmlinux
 8438.00 10.0% dst_destroy           vmlinux
 5983.00  7.1% fib_semantic_match    vmlinux
 5410.00  6.4% fib_rules_lookup      vmlinux
 4803.00  5.7% neigh_lookup          vmlinux
 4420.00  5.2% _raw_spin_lock        vmlinux
 3883.00  4.6% rt_set_nexthop        vmlinux
 3261.00  3.9% _raw_read_lock        vmlinux
 2794.00  3.3% fib_table_lookup      vmlinux
 2374.00  2.8% neigh_resolve_output  vmlinux
 2153.00  2.5% dst_alloc             vmlinux
 1502.00  1.8% _raw_read_lock_bh     vmlinux
 1484.00  1.8% kmem_cache_alloc      vmlinux
 1407.00  1.7% eth_header            vmlinux
 1406.00  1.7% ipv4_dst_destroy      vmlinux
 1298.00  1.5% __copy_from_user_ll   vmlinux
 1174.00  1.4% dev_queue_xmit        vmlinux
 1000.00  1.2% ip_output             vmlinux

After patch Profile :

13712.00 15.8% dst_destroy             vmlinux
 8548.00  9.9% __ip_route_output_key   vmlinux
 7017.00  8.1% neigh_lookup            vmlinux
 4554.00  5.3% fib_semantic_match      vmlinux
 4067.00  4.7% _raw_read_lock          vmlinux
 3491.00  4.0% dst_alloc               vmlinux
 3186.00  3.7% neigh_resolve_output    vmlinux
 3103.00  3.6% fib_table_lookup        vmlinux
 2098.00  2.4% _raw_read_lock_bh       vmlinux
 2081.00  2.4% kmem_cache_alloc        vmlinux
 2013.00  2.3% _raw_spin_lock          vmlinux
 1763.00  2.0% __copy_from_user_ll     vmlinux
 1763.00  2.0% ip_output               vmlinux
 1761.00  2.0% ipv4_dst_destroy        vmlinux
 1631.00  1.9% eth_header              vmlinux
 1440.00  1.7% _raw_read_unlock_bh     vmlinux

Reference results, if IP route cache is enabled :

real	0m29.718s
user	0m10.845s
sys	7m37.341s

25213.00 29.5% __ip_route_output_key   vmlinux
 9011.00 10.5% dst_release             vmlinux
 4817.00  5.6% ip_push_pending_frames  vmlinux
 4232.00  5.0% ip_finish_output        vmlinux
 3940.00  4.6% udp_sendmsg             vmlinux
 3730.00  4.4% __copy_from_user_ll     vmlinux
 3716.00  4.4% ip_route_output_flow    vmlinux
 2451.00  2.9% __xfrm_lookup           vmlinux
 2221.00  2.6% ip_append_data          vmlinux
 1718.00  2.0% _raw_spin_lock_bh       vmlinux
 1655.00  1.9% __alloc_skb             vmlinux
 1572.00  1.8% sock_wfree              vmlinux
 1345.00  1.6% kfree                   vmlinux

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h  |  2 ++
 include/net/ip_fib.h     | 17 ++++----------
 net/core/fib_rules.c     |  3 ++-
 net/ipv4/fib_frontend.c  | 27 +++++++++++-----------
 net/ipv4/fib_hash.c      |  5 ++--
 net/ipv4/fib_lookup.h    |  2 +-
 net/ipv4/fib_rules.c     |  3 ++-
 net/ipv4/fib_semantics.c | 21 +++++++++++++----
 net/ipv4/fib_trie.c      | 10 ++++----
 net/ipv4/route.c         | 59 ++++++++++++++++++++----------------------------
 10 files changed, 72 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index ac2fd002812e..106f3097d384 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -31,6 +31,8 @@ struct fib_lookup_arg {
 	void			*lookup_ptr;
 	void			*result;
 	struct fib_rule		*rule;
+	int			flags;
+#define FIB_LOOKUP_NOREF	1
 };
 
 struct fib_rules_ops {
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c93f94edc610..ba3666d31766 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -86,6 +86,7 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_power;
 #endif
+	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
 };
@@ -148,7 +149,7 @@ struct fib_table {
 };
 
 extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
-			    struct fib_result *res);
+			    struct fib_result *res, int fib_flags);
 extern int fib_table_insert(struct fib_table *, struct fib_config *);
 extern int fib_table_delete(struct fib_table *, struct fib_config *);
 extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
@@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
 	struct fib_table *table;
 
 	table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!fib_table_lookup(table, flp, res))
+	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
 		return 0;
 
 	table = fib_get_table(net, RT_TABLE_MAIN);
-	if (!fib_table_lookup(table, flp, res))
+	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
 		return 0;
 	return -ENETUNREACH;
 }
@@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi)
 		free_fib_info(fi);
 }
 
-static inline void fib_res_put(struct fib_result *res)
-{
-	if (res->fi)
-		fib_info_put(res->fi);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	if (res->r)
-		fib_rule_put(res->r);
-#endif
-}
-
 #ifdef CONFIG_PROC_FS
 extern int __net_init  fib_proc_init(struct net *net);
 extern void __net_exit fib_proc_exit(struct net *net);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cfb7d25c172d..21698f8c49ee 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -225,7 +225,8 @@ jumped:
 			err = ops->action(rule, fl, flags, arg);
 
 		if (err != -EAGAIN) {
-			if (likely(atomic_inc_not_zero(&rule->refcnt))) {
+			if ((arg->flags & FIB_LOOKUP_NOREF) ||
+			    likely(atomic_inc_not_zero(&rule->refcnt))) {
 				arg->rule = rule;
 				goto out;
 			}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b05c23b05a9f..919f2ad19b49 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 	struct fib_result res = { 0 };
 	struct net_device *dev = NULL;
 
-	if (fib_lookup(net, &fl, &res))
+	rcu_read_lock();
+	if (fib_lookup(net, &fl, &res)) {
+		rcu_read_unlock();
 		return NULL;
+	}
 	if (res.type != RTN_LOCAL)
 		goto out;
 	dev = FIB_RES_DEV(res);
@@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 	if (dev && devref)
 		dev_hold(dev);
 out:
-	fib_res_put(&res);
+	rcu_read_unlock();
 	return dev;
 }
 EXPORT_SYMBOL(__ip_dev_find);
@@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
 	if (local_table) {
 		ret = RTN_UNICAST;
-		if (!fib_table_lookup(local_table, &fl, &res)) {
+		rcu_read_lock();
+		if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
 			if (!dev || dev == res.fi->fib_dev)
 				ret = res.type;
-			fib_res_put(&res);
 		}
+		rcu_read_unlock();
 	}
 	return ret;
 }
@@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type);
  * - figure out what "logical" interface this packet arrived
  *   and calculate "specific destination" address.
  * - check, that packet arrived from expected physical interface.
+ * called with rcu_read_lock()
  */
 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			struct net_device *dev, __be32 *spec_dst,
@@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	struct net *net;
 
 	no_addr = rpf = accept_local = 0;
-	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		no_addr = in_dev->ifa_list == NULL;
@@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		if (mark && !IN_DEV_SRC_VMARK(in_dev))
 			fl.mark = 0;
 	}
-	rcu_read_unlock();
 
 	if (in_dev == NULL)
 		goto e_inval;
@@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		goto last_resort;
 	if (res.type != RTN_UNICAST) {
 		if (res.type != RTN_LOCAL || !accept_local)
-			goto e_inval_res;
+			goto e_inval;
 	}
 	*spec_dst = FIB_RES_PREFSRC(res);
 	fib_combine_itag(itag, &res);
@@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 #endif
 	if (dev_match) {
 		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
-		fib_res_put(&res);
 		return ret;
 	}
-	fib_res_put(&res);
 	if (no_addr)
 		goto last_resort;
 	if (rpf == 1)
@@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			*spec_dst = FIB_RES_PREFSRC(res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 		}
-		fib_res_put(&res);
 	}
 	return ret;
 
@@ -326,8 +326,6 @@ last_resort:
 	*itag = 0;
 	return 0;
 
-e_inval_res:
-	fib_res_put(&res);
 e_inval:
 	return -EINVAL;
 e_rpf:
@@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 		local_bh_disable();
 
 		frn->tb_id = tb->tb_id;
-		frn->err = fib_table_lookup(tb, &fl, &res);
+		rcu_read_lock();
+		frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF);
 
 		if (!frn->err) {
 			frn->prefixlen = res.prefixlen;
 			frn->nh_sel = res.nh_sel;
 			frn->type = res.type;
 			frn->scope = res.scope;
-			fib_res_put(&res);
 		}
+		rcu_read_unlock();
 		local_bh_enable();
 	}
 }
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 4ed7e0dea1bc..83cca68e259c 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -244,7 +244,8 @@ fn_new_zone(struct fn_hash *table, int z)
 }
 
 int fib_table_lookup(struct fib_table *tb,
-		     const struct flowi *flp, struct fib_result *res)
+		     const struct flowi *flp, struct fib_result *res,
+		     int fib_flags)
 {
 	int err;
 	struct fn_zone *fz;
@@ -264,7 +265,7 @@ int fib_table_lookup(struct fib_table *tb,
 
 			err = fib_semantic_match(&f->fn_alias,
 						 flp, res,
-						 fz->fz_order);
+						 fz->fz_order, fib_flags);
 			if (err <= 0)
 				goto out;
 		}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 637b133973bd..b9c9a9f2aee5 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -22,7 +22,7 @@ struct fib_alias {
 /* Exported by fib_semantics.c */
 extern int fib_semantic_match(struct list_head *head,
 			      const struct flowi *flp,
-			      struct fib_result *res, int prefixlen);
+			      struct fib_result *res, int prefixlen, int fib_flags);
 extern void fib_release_info(struct fib_info *);
 extern struct fib_info *fib_create_info(struct fib_config *cfg);
 extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 32300521e32c..7981a24f5c7b 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -57,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
 {
 	struct fib_lookup_arg arg = {
 		.result = res,
+		.flags = FIB_LOOKUP_NOREF,
 	};
 	int err;
 
@@ -94,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 	if (!tbl)
 		goto errout;
 
-	err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result);
+	err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags);
 	if (err > 0)
 		err = -EAGAIN;
 errout:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ba52f399a898..0f80dfc2f7fb 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -148,6 +148,13 @@ static const struct
 
 /* Release a nexthop info record */
 
+static void free_fib_info_rcu(struct rcu_head *head)
+{
+	struct fib_info *fi = container_of(head, struct fib_info, rcu);
+
+	kfree(fi);
+}
+
 void free_fib_info(struct fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
@@ -161,7 +168,7 @@ void free_fib_info(struct fib_info *fi)
 	} endfor_nexthops(fi);
 	fib_info_cnt--;
 	release_net(fi->fib_net);
-	kfree(fi);
+	call_rcu(&fi->rcu, free_fib_info_rcu);
 }
 
 void fib_release_info(struct fib_info *fi)
@@ -553,6 +560,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			nh->nh_scope = RT_SCOPE_LINK;
 			return 0;
 		}
+		rcu_read_lock();
 		{
 			struct flowi fl = {
 				.nl_u = {
@@ -568,8 +576,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			if (fl.fl4_scope < RT_SCOPE_LINK)
 				fl.fl4_scope = RT_SCOPE_LINK;
 			err = fib_lookup(net, &fl, &res);
-			if (err)
+			if (err) {
+				rcu_read_unlock();
 				return err;
+			}
 		}
 		err = -EINVAL;
 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
@@ -585,7 +595,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			goto out;
 		err = 0;
 out:
-		fib_res_put(&res);
+		rcu_read_unlock();
 		return err;
 	} else {
 		struct in_device *in_dev;
@@ -879,7 +889,7 @@ failure:
 
 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
-		       struct fib_result *res, int prefixlen)
+		       struct fib_result *res, int prefixlen, int fib_flags)
 {
 	struct fib_alias *fa;
 	int nh_sel = 0;
@@ -943,7 +953,8 @@ out_fill_res:
 	res->type = fa->fa_type;
 	res->scope = fa->fa_scope;
 	res->fi = fa->fa_info;
-	atomic_inc(&res->fi->fib_clntref);
+	if (!(fib_flags & FIB_LOOKUP_NOREF))
+		atomic_inc(&res->fi->fib_clntref);
 	return 0;
 }
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a96e5ec211a0..271c89bdf049 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1342,7 +1342,7 @@ err:
 /* should be called with rcu_read_lock */
 static int check_leaf(struct trie *t, struct leaf *l,
 		      t_key key,  const struct flowi *flp,
-		      struct fib_result *res)
+		      struct fib_result *res, int fib_flags)
 {
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
@@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
 		if (l->key != (key & ntohl(mask)))
 			continue;
 
-		err = fib_semantic_match(&li->falh, flp, res, plen);
+		err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags);
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 		if (err <= 0)
@@ -1372,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
 }
 
 int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
-		     struct fib_result *res)
+		     struct fib_result *res, int fib_flags)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	int ret;
@@ -1399,7 +1399,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+		ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
 		goto found;
 	}
 
@@ -1424,7 +1424,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 		}
 
 		if (IS_LEAF(n)) {
-			ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+			ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
 			if (ret > 0)
 				goto backtrace;
 			goto found;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 04e0df82b88c..7864d0c48968 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
-		src = FIB_RES_PREFSRC(res);
-		fib_res_put(&res);
-	} else
-		src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
+	else {
+		rcu_read_lock();
+		if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
+			src = FIB_RES_PREFSRC(res);
+		else
+			src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
+		rcu_read_unlock();
+	}
 	memcpy(addr, &src, 4);
 }
 
@@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
  *	Such approach solves two big problems:
  *	1. Not simplex devices are handled properly.
  *	2. IP spoofing attempts are filtered with 100% of guarantee.
+ *	called with rcu_read_lock()
  */
 
 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	unsigned	hash;
 	__be32		spec_dst;
 	int		err = -EINVAL;
-	int		free_res = 0;
 	struct net    * net = dev_net(dev);
 
 	/* IP on this device is disabled. */
@@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/*
 	 *	Now we are ready to route packet.
 	 */
-	if ((err = fib_lookup(net, &fl, &res)) != 0) {
+	err = fib_lookup(net, &fl, &res);
+	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			goto e_hostunreach;
 		goto no_route;
 	}
-	free_res = 1;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 
@@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	if (res.type == RTN_LOCAL) {
 		err = fib_validate_source(saddr, daddr, tos,
-					     net->loopback_dev->ifindex,
-					     dev, &spec_dst, &itag, skb->mark);
+					  net->loopback_dev->ifindex,
+					  dev, &spec_dst, &itag, skb->mark);
 		if (err < 0)
 			goto martian_source_keep_err;
 		if (err)
@@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto martian_destination;
 
 	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
-done:
-	if (free_res)
-		fib_res_put(&res);
 out:	return err;
 
 brd_input:
@@ -2226,7 +2226,7 @@ local_input:
 	rth->rt_type	= res.type;
 	hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
 	err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
-	goto done;
+	goto out;
 
 no_route:
 	RT_CACHE_STAT_INC(in_no_route);
@@ -2249,21 +2249,21 @@ martian_destination:
 
 e_hostunreach:
 	err = -EHOSTUNREACH;
-	goto done;
+	goto out;
 
 e_inval:
 	err = -EINVAL;
-	goto done;
+	goto out;
 
 e_nobufs:
 	err = -ENOBUFS;
-	goto done;
+	goto out;
 
 martian_source:
 	err = -EINVAL;
 martian_source_keep_err:
 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
-	goto done;
+	goto out;
 }
 
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2349,6 +2349,7 @@ skip_cache:
 }
 EXPORT_SYMBOL(ip_route_input_common);
 
+/* called with rcu_read_lock() */
 static int __mkroute_output(struct rtable **result,
 			    struct fib_result *res,
 			    const struct flowi *fl,
@@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result,
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev_out);
-	if (!in_dev) {
-		rcu_read_unlock();
+	if (!in_dev)
 		return -EINVAL;
-	}
+
 	if (res->type == RTN_BROADCAST) {
 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
-		if (res->fi) {
-			fib_info_put(res->fi);
-			res->fi = NULL;
-		}
+		res->fi = NULL;
 	} else if (res->type == RTN_MULTICAST) {
 		flags |= RTCF_MULTICAST | RTCF_LOCAL;
 		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
@@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result,
 		 * default one, but do not gateway in this case.
 		 * Yes, it is hack.
 		 */
-		if (res->fi && res->prefixlen < 4) {
-			fib_info_put(res->fi);
+		if (res->fi && res->prefixlen < 4)
 			res->fi = NULL;
-		}
 	}
 
 
@@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result,
 	return 0;
 }
 
+/* called with rcu_read_lock() */
 static int ip_mkroute_output(struct rtable **rp,
 			     struct fib_result *res,
 			     const struct flowi *fl,
@@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 	struct fib_result res;
 	unsigned int flags = 0;
 	struct net_device *dev_out = NULL;
-	int free_res = 0;
 	int err;
 
 
@@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 		err = -ENETUNREACH;
 		goto out;
 	}
-	free_res = 1;
 
 	if (res.type == RTN_LOCAL) {
 		if (!fl.fl4_src)
 			fl.fl4_src = fl.fl4_dst;
 		dev_out = net->loopback_dev;
 		fl.oif = dev_out->ifindex;
-		if (res.fi)
-			fib_info_put(res.fi);
 		res.fi = NULL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
@@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 make_route:
 	err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
 
-	if (free_res)
-		fib_res_put(&res);
 out:	return err;
 }
 
-- 
cgit v1.2.3


From 85efc8a18cedf70e55acd0c825e2d9d2f3b19999 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Date: Mon, 4 Oct 2010 10:51:37 +0300
Subject: power_supply: Add types for USB chargers

This adds power supply types for USB chargers defined in
Battery Charging Specification 1.1.

Signed-off-by: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_sysfs.c | 3 ++-
 include/linux/power_supply.h       | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 9d30eeb8c810..88f5e43100c2 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -42,7 +42,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf) {
 	static char *type_text[] = {
-		"Battery", "UPS", "Mains", "USB"
+		"Battery", "UPS", "Mains", "USB",
+		"USB_DCP", "USB_CDP", "USB_ACA"
 	};
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 30083a896f36..d37fef67ece2 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -125,7 +125,10 @@ enum power_supply_type {
 	POWER_SUPPLY_TYPE_BATTERY = 0,
 	POWER_SUPPLY_TYPE_UPS,
 	POWER_SUPPLY_TYPE_MAINS,
-	POWER_SUPPLY_TYPE_USB,
+	POWER_SUPPLY_TYPE_USB,		/* Standard Downstream Port */
+	POWER_SUPPLY_TYPE_USB_DCP,	/* Dedicated Charging Port */
+	POWER_SUPPLY_TYPE_USB_CDP,	/* Charging Downstream Port */
+	POWER_SUPPLY_TYPE_USB_ACA,	/* Accessory Charger Adapters */
 };
 
 union power_supply_propval {
-- 
cgit v1.2.3


From fe3f6d097a042cff54bc1dc06f21ef528affe8ca Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Date: Mon, 4 Oct 2010 10:51:38 +0300
Subject: power_supply: Introduce maximum current property

USB only gives the maximum current allowed to draw.

Signed-off-by: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_sysfs.c | 1 +
 include/linux/power_supply.h       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 88f5e43100c2..cd1f90754a3a 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -139,6 +139,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(voltage_min_design),
 	POWER_SUPPLY_ATTR(voltage_now),
 	POWER_SUPPLY_ATTR(voltage_avg),
+	POWER_SUPPLY_ATTR(current_max),
 	POWER_SUPPLY_ATTR(current_now),
 	POWER_SUPPLY_ATTR(current_avg),
 	POWER_SUPPLY_ATTR(power_now),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index d37fef67ece2..7d7325685c42 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -89,6 +89,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 	POWER_SUPPLY_PROP_POWER_NOW,
-- 
cgit v1.2.3


From ab4d5ed5eeda4f57c50d14131ce1b1da75d0c938 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 5 Oct 2010 13:57:26 -0500
Subject: slub: Enable sysfs support for !CONFIG_SLUB_DEBUG

Currently disabling CONFIG_SLUB_DEBUG also disabled SYSFS support meaning
that the slabs cannot be tuned without DEBUG.

Make SYSFS support independent of CONFIG_SLUB_DEBUG

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slub_def.h |  2 +-
 lib/Kconfig.debug        |  2 +-
 mm/slub.c                | 40 +++++++++++++++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b33c0f2e61dc..e4f5ed180b9b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -87,7 +87,7 @@ struct kmem_cache {
 	unsigned long min_partial;
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
-#ifdef CONFIG_SLUB_DEBUG
+#ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca0..b6263651a955 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -353,7 +353,7 @@ config SLUB_DEBUG_ON
 config SLUB_STATS
 	default n
 	bool "Enable SLUB performance statistics"
-	depends on SLUB && SLUB_DEBUG && SYSFS
+	depends on SLUB && SYSFS
 	help
 	  SLUB statistics are useful to debug SLUBs allocation behavior in
 	  order find ways to optimize the allocator. This should never be
diff --git a/mm/slub.c b/mm/slub.c
index a018019aa91d..be4d66231c6f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -198,7 +198,7 @@ struct track {
 
 enum track_item { TRACK_ALLOC, TRACK_FREE };
 
-#ifdef CONFIG_SLUB_DEBUG
+#ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
@@ -1102,7 +1102,7 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
 static inline void slab_free_hook_irq(struct kmem_cache *s,
 		void *object) {}
 
-#endif
+#endif /* CONFIG_SLUB_DEBUG */
 
 /*
  * Slab allocation and freeing
@@ -3373,7 +3373,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 }
 #endif
 
-#ifdef CONFIG_SLUB_DEBUG
+#ifdef CONFIG_SYSFS
 static int count_inuse(struct page *page)
 {
 	return page->inuse;
@@ -3383,7 +3383,9 @@ static int count_total(struct page *page)
 {
 	return page->objects;
 }
+#endif
 
+#ifdef CONFIG_SLUB_DEBUG
 static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
 {
@@ -3474,6 +3476,7 @@ static long validate_slab_cache(struct kmem_cache *s)
 	kfree(map);
 	return count;
 }
+#endif
 
 #ifdef SLUB_RESILIENCY_TEST
 static void resiliency_test(void)
@@ -3532,9 +3535,12 @@ static void resiliency_test(void)
 	validate_slab_cache(kmalloc_caches[9]);
 }
 #else
+#ifdef CONFIG_SYSFS
 static void resiliency_test(void) {};
 #endif
+#endif
 
+#ifdef CONFIG_DEBUG
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.
@@ -3763,7 +3769,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
 		len += sprintf(buf, "No data\n");
 	return len;
 }
+#endif
 
+#ifdef CONFIG_SYSFS
 enum slab_stat_type {
 	SL_ALL,			/* All slabs */
 	SL_PARTIAL,		/* Only partially allocated slabs */
@@ -3816,6 +3824,8 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 		}
 	}
 
+	down_read(&slub_lock);
+#ifdef CONFIG_SLUB_DEBUG
 	if (flags & SO_ALL) {
 		for_each_node_state(node, N_NORMAL_MEMORY) {
 			struct kmem_cache_node *n = get_node(s, node);
@@ -3832,7 +3842,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 			nodes[node] += x;
 		}
 
-	} else if (flags & SO_PARTIAL) {
+	} else
+#endif
+	if (flags & SO_PARTIAL) {
 		for_each_node_state(node, N_NORMAL_MEMORY) {
 			struct kmem_cache_node *n = get_node(s, node);
 
@@ -3857,6 +3869,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 	return x + sprintf(buf + x, "\n");
 }
 
+#ifdef CONFIG_SLUB_DEBUG
 static int any_slab_objects(struct kmem_cache *s)
 {
 	int node;
@@ -3872,6 +3885,7 @@ static int any_slab_objects(struct kmem_cache *s)
 	}
 	return 0;
 }
+#endif
 
 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
 #define to_slab(n) container_of(n, struct kmem_cache, kobj);
@@ -3973,11 +3987,13 @@ static ssize_t aliases_show(struct kmem_cache *s, char *buf)
 }
 SLAB_ATTR_RO(aliases);
 
+#ifdef CONFIG_SLUB_DEBUG
 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_ALL);
 }
 SLAB_ATTR_RO(slabs);
+#endif
 
 static ssize_t partial_show(struct kmem_cache *s, char *buf)
 {
@@ -4003,6 +4019,7 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
 }
 SLAB_ATTR_RO(objects_partial);
 
+#ifdef CONFIG_SLUB_DEBUG
 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
 {
 	return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
@@ -4055,6 +4072,7 @@ static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
 }
 SLAB_ATTR(failslab);
 #endif
+#endif
 
 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
 {
@@ -4091,6 +4109,7 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 
+#ifdef CONFIG_SLUB_DEBUG
 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
@@ -4166,6 +4185,7 @@ static ssize_t validate_store(struct kmem_cache *s,
 	return ret;
 }
 SLAB_ATTR(validate);
+#endif
 
 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
 {
@@ -4186,6 +4206,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
 }
 SLAB_ATTR(shrink);
 
+#ifdef CONFIG_SLUB_DEBUG
 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
 {
 	if (!(s->flags & SLAB_STORE_USER))
@@ -4201,6 +4222,7 @@ static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
 	return list_locations(s, buf, TRACK_FREE);
 }
 SLAB_ATTR_RO(free_calls);
+#endif
 
 #ifdef CONFIG_NUMA
 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
@@ -4307,25 +4329,33 @@ static struct attribute *slab_attrs[] = {
 	&min_partial_attr.attr,
 	&objects_attr.attr,
 	&objects_partial_attr.attr,
+#ifdef CONFIG_SLUB_DEBUG
 	&total_objects_attr.attr,
 	&slabs_attr.attr,
+#endif
 	&partial_attr.attr,
 	&cpu_slabs_attr.attr,
 	&ctor_attr.attr,
 	&aliases_attr.attr,
 	&align_attr.attr,
+#ifdef CONFIG_SLUB_DEBUG
 	&sanity_checks_attr.attr,
 	&trace_attr.attr,
+#endif
 	&hwcache_align_attr.attr,
 	&reclaim_account_attr.attr,
 	&destroy_by_rcu_attr.attr,
+#ifdef CONFIG_SLUB_DEBUG
 	&red_zone_attr.attr,
 	&poison_attr.attr,
 	&store_user_attr.attr,
 	&validate_attr.attr,
+#endif
 	&shrink_attr.attr,
+#ifdef CONFIG_SLUB_DEBUG
 	&alloc_calls_attr.attr,
 	&free_calls_attr.attr,
+#endif
 #ifdef CONFIG_ZONE_DMA
 	&cache_dma_attr.attr,
 #endif
@@ -4608,7 +4638,7 @@ static int __init slab_sysfs_init(void)
 }
 
 __initcall(slab_sysfs_init);
-#endif
+#endif /* CONFIG_SYSFS */
 
 /*
  * The /proc/slabinfo ABI
-- 
cgit v1.2.3


From e31b82136d1adc7a599b6e99d3321e5831841f5a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 5 Oct 2010 19:39:30 +0200
Subject: cfg80211/mac80211: allow per-station GTKs

This adds API to allow adding per-station GTKs,
updates mac80211 to support it, and also allows
drivers to remove a key from hwaccel again when
this may be necessary due to multiple GTKs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c |  7 +-
 drivers/net/wireless/libertas/cfg.c          |  4 +-
 drivers/net/wireless/rndis_wlan.c            | 12 ++--
 include/linux/nl80211.h                      | 12 ++++
 include/net/cfg80211.h                       |  9 ++-
 include/net/mac80211.h                       | 24 +++++++
 net/mac80211/cfg.c                           | 32 +++++++---
 net/mac80211/ieee80211_i.h                   |  2 -
 net/mac80211/key.c                           | 95 ++++++++++++++++++----------
 net/mac80211/key.h                           |  3 +
 net/mac80211/rx.c                            | 41 +++++++-----
 net/mac80211/sta_info.c                      | 10 +--
 net/mac80211/sta_info.h                      |  6 +-
 net/mac80211/tx.c                            |  2 +-
 net/wireless/core.h                          |  2 +-
 net/wireless/ibss.c                          |  2 +-
 net/wireless/nl80211.c                       | 87 ++++++++++++++++++++++---
 net/wireless/sme.c                           |  2 +-
 net/wireless/util.c                          | 12 +++-
 net/wireless/wext-compat.c                   | 38 +++++++----
 20 files changed, 293 insertions(+), 109 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index 60619678f4ec..c6c0eff9b5ed 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -161,7 +161,7 @@ static int iwm_key_init(struct iwm_key *key, u8 key_index,
 }
 
 static int iwm_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev,
-				u8 key_index, const u8 *mac_addr,
+				u8 key_index, bool pairwise, const u8 *mac_addr,
 				struct key_params *params)
 {
 	struct iwm_priv *iwm = ndev_to_iwm(ndev);
@@ -181,7 +181,8 @@ static int iwm_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev,
 }
 
 static int iwm_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev,
-				u8 key_index, const u8 *mac_addr, void *cookie,
+				u8 key_index, bool pairwise, const u8 *mac_addr,
+				void *cookie,
 				void (*callback)(void *cookie,
 						 struct key_params*))
 {
@@ -206,7 +207,7 @@ static int iwm_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev,
 
 
 static int iwm_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev,
-				u8 key_index, const u8 *mac_addr)
+				u8 key_index, bool pairwise, const u8 *mac_addr)
 {
 	struct iwm_priv *iwm = ndev_to_iwm(ndev);
 	struct iwm_key *key = &iwm->keys[key_index];
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index cb3b855d949c..1dc44bc6511f 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1438,7 +1438,7 @@ static int lbs_cfg_set_default_key(struct wiphy *wiphy,
 
 
 static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 idx, const u8 *mac_addr,
+			   u8 idx, bool pairwise, const u8 *mac_addr,
 			   struct key_params *params)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
@@ -1498,7 +1498,7 @@ static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev,
 
 
 static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, const u8 *mac_addr)
+			   u8 key_index, bool pairwise, const u8 *mac_addr)
 {
 
 	lbs_deb_enter(LBS_DEB_CFG80211);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 719573bbbf81..71b5971da597 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -540,11 +540,11 @@ static int rndis_set_channel(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_channel *chan, enum nl80211_channel_type channel_type);
 
 static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
-					u8 key_index, const u8 *mac_addr,
-					struct key_params *params);
+			 u8 key_index, bool pairwise, const u8 *mac_addr,
+			 struct key_params *params);
 
 static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
-					u8 key_index, const u8 *mac_addr);
+			 u8 key_index, bool pairwise, const u8 *mac_addr);
 
 static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
 								u8 key_index);
@@ -2308,8 +2308,8 @@ static int rndis_set_channel(struct wiphy *wiphy, struct net_device *netdev,
 }
 
 static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
-					u8 key_index, const u8 *mac_addr,
-					struct key_params *params)
+			 u8 key_index, bool pairwise, const u8 *mac_addr,
+			 struct key_params *params)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
@@ -2344,7 +2344,7 @@ static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
 }
 
 static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
-					u8 key_index, const u8 *mac_addr)
+			 u8 key_index, bool pairwise, const u8 *mac_addr)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c4efdfa24ed8..e451f176e662 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -801,6 +801,9 @@ enum nl80211_commands {
  *      This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING
  *      for non-automatic settings.
  *
+ * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly
+ *	means support for per-station GTKs.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -968,6 +971,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
 	NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
 
+	NL80211_ATTR_SUPPORT_IBSS_RSN,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1659,11 +1664,14 @@ enum nl80211_auth_type {
  * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key
  * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key
  * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS)
+ * @NUM_NL80211_KEYTYPES: number of defined key types
  */
 enum nl80211_key_type {
 	NL80211_KEYTYPE_GROUP,
 	NL80211_KEYTYPE_PAIRWISE,
 	NL80211_KEYTYPE_PEERKEY,
+
+	NUM_NL80211_KEYTYPES
 };
 
 /**
@@ -1694,6 +1702,9 @@ enum nl80211_wpa_versions {
  *	CCMP keys, each six bytes in little endian
  * @NL80211_KEY_DEFAULT: flag indicating default key
  * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key
+ * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not
+ *	specified the default depends on whether a MAC address was
+ *	given with the command using the key or not (u32)
  * @__NL80211_KEY_AFTER_LAST: internal
  * @NL80211_KEY_MAX: highest key attribute
  */
@@ -1705,6 +1716,7 @@ enum nl80211_key_attributes {
 	NL80211_KEY_SEQ,
 	NL80211_KEY_DEFAULT,
 	NL80211_KEY_DEFAULT_MGMT,
+	NL80211_KEY_TYPE,
 
 	/* keep last */
 	__NL80211_KEY_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5f4d8acf7abb..0f77515266b8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1130,13 +1130,14 @@ struct cfg80211_ops {
 				       struct vif_params *params);
 
 	int	(*add_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, const u8 *mac_addr,
+			   u8 key_index, bool pairwise, const u8 *mac_addr,
 			   struct key_params *params);
 	int	(*get_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, const u8 *mac_addr, void *cookie,
+			   u8 key_index, bool pairwise, const u8 *mac_addr,
+			   void *cookie,
 			   void (*callback)(void *cookie, struct key_params*));
 	int	(*del_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, const u8 *mac_addr);
+			   u8 key_index, bool pairwise, const u8 *mac_addr);
 	int	(*set_default_key)(struct wiphy *wiphy,
 				   struct net_device *netdev,
 				   u8 key_index);
@@ -1304,6 +1305,7 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the
  *	control port protocol ethertype. The device also honours the
  *	control_port_no_encrypt flag.
+ * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1314,6 +1316,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_4ADDR_AP			= BIT(5),
 	WIPHY_FLAG_4ADDR_STATION		= BIT(6),
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
+	WIPHY_FLAG_IBSS_RSN			= BIT(7),
 };
 
 struct mac_address {
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 47316a653ae1..33aa2e39147b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1041,6 +1041,13 @@ enum ieee80211_tkip_key_type {
  * @IEEE80211_HW_NEED_DTIM_PERIOD:
  *	This device needs to know the DTIM period for the BSS before
  *	associating.
+ *
+ * @IEEE80211_HW_SUPPORTS_PER_STA_GTK: The device's crypto engine supports
+ *	per-station GTKs as used by IBSS RSN or during fast transition. If
+ *	the device doesn't support per-station GTKs, but can be asked not
+ *	to decrypt group addressed frames, then IBSS RSN support is still
+ *	possible but software crypto will be used. Advertise the wiphy flag
+ *	only in that case.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1064,6 +1071,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
 	IEEE80211_HW_SUPPORTS_CQM_RSSI			= 1<<20,
+	IEEE80211_HW_SUPPORTS_PER_STA_GTK		= 1<<21,
 };
 
 /**
@@ -2582,6 +2590,22 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
 void ieee80211_request_smps(struct ieee80211_vif *vif,
 			    enum ieee80211_smps_mode smps_mode);
 
+/**
+ * ieee80211_key_removed - disable hw acceleration for key
+ * @key_conf: The key hw acceleration should be disabled for
+ *
+ * This allows drivers to indicate that the given key has been
+ * removed from hardware acceleration, due to a new key that
+ * was added. Don't use this if the key can continue to be used
+ * for TX, if the key restriction is on RX only it is permitted
+ * to keep the key for TX only and not call this function.
+ *
+ * Due to locking constraints, it may only be called during
+ * @set_key. This function must be allowed to sleep, and the
+ * key it tries to disable may still be used until it returns.
+ */
+void ieee80211_key_removed(struct ieee80211_key_conf *key_conf);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 94bf550bd4c9..8b0e874a3d65 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -103,7 +103,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 }
 
 static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, const u8 *mac_addr,
+			     u8 key_idx, bool pairwise, const u8 *mac_addr,
 			     struct key_params *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -131,6 +131,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	if (IS_ERR(key))
 		return PTR_ERR(key);
 
+	if (pairwise)
+		key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
+
 	mutex_lock(&sdata->local->sta_mtx);
 
 	if (mac_addr) {
@@ -153,7 +156,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, const u8 *mac_addr)
+			     u8 key_idx, bool pairwise, const u8 *mac_addr)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
@@ -170,10 +173,17 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 		if (!sta)
 			goto out_unlock;
 
-		if (sta->key) {
-			ieee80211_key_free(sdata->local, sta->key);
-			WARN_ON(sta->key);
-			ret = 0;
+		if (pairwise) {
+			if (sta->ptk) {
+				ieee80211_key_free(sdata->local, sta->ptk);
+				ret = 0;
+			}
+		} else {
+			if (sta->gtk[key_idx]) {
+				ieee80211_key_free(sdata->local,
+						   sta->gtk[key_idx]);
+				ret = 0;
+			}
 		}
 
 		goto out_unlock;
@@ -195,7 +205,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, const u8 *mac_addr, void *cookie,
+			     u8 key_idx, bool pairwise, const u8 *mac_addr,
+			     void *cookie,
 			     void (*callback)(void *cookie,
 					      struct key_params *params))
 {
@@ -203,7 +214,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 	struct sta_info *sta = NULL;
 	u8 seq[6] = {0};
 	struct key_params params;
-	struct ieee80211_key *key;
+	struct ieee80211_key *key = NULL;
 	u32 iv32;
 	u16 iv16;
 	int err = -ENOENT;
@@ -217,7 +228,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		if (!sta)
 			goto out;
 
-		key = sta->key;
+		if (pairwise)
+			key = sta->ptk;
+		else if (key_idx < NUM_DEFAULT_KEYS)
+			key = sta->gtk[key_idx];
 	} else
 		key = sdata->keys[key_idx];
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 76c2b50ec6f8..f0610fa4fbe0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -549,8 +549,6 @@ struct ieee80211_sub_if_data {
 	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
 	unsigned int fragment_next;
 
-#define NUM_DEFAULT_KEYS 4
-#define NUM_DEFAULT_MGMT_KEYS 2
 	struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
 	struct ieee80211_key *default_key;
 	struct ieee80211_key *default_mgmt_key;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 6a63d1abd14d..ccd676b2f599 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -68,15 +68,21 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 
 	might_sleep();
 
-	if (!key->local->ops->set_key) {
-		ret = -EOPNOTSUPP;
+	if (!key->local->ops->set_key)
 		goto out_unsupported;
-	}
 
 	assert_key_lock(key->local);
 
 	sta = get_sta_for_key(key);
 
+	/*
+	 * If this is a per-STA GTK, check if it
+	 * is supported; if not, return.
+	 */
+	if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) &&
+	    !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK))
+		goto out_unsupported;
+
 	sdata = key->sdata;
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
@@ -85,31 +91,28 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
-	if (!ret)
+	if (!ret) {
 		key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
+		return 0;
+	}
 
-	if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
+	if (ret != -ENOSPC && ret != -EOPNOTSUPP)
 		wiphy_err(key->local->hw.wiphy,
 			  "failed to set key (%d, %pM) to hardware (%d)\n",
 			  key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
 
-out_unsupported:
-	if (ret) {
-		switch (key->conf.cipher) {
-		case WLAN_CIPHER_SUITE_WEP40:
-		case WLAN_CIPHER_SUITE_WEP104:
-		case WLAN_CIPHER_SUITE_TKIP:
-		case WLAN_CIPHER_SUITE_CCMP:
-		case WLAN_CIPHER_SUITE_AES_CMAC:
-			/* all of these we can do in software */
-			ret = 0;
-			break;
-		default:
-			ret = -EINVAL;
-		}
+ out_unsupported:
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+	case WLAN_CIPHER_SUITE_TKIP:
+	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		/* all of these we can do in software */
+		return 0;
+	default:
+		return -EINVAL;
 	}
-
-	return ret;
 }
 
 static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -147,6 +150,26 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 	key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 }
 
+void ieee80211_key_removed(struct ieee80211_key_conf *key_conf)
+{
+	struct ieee80211_key *key;
+
+	key = container_of(key_conf, struct ieee80211_key, conf);
+
+	might_sleep();
+	assert_key_lock(key->local);
+
+	key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
+
+	/*
+	 * Flush TX path to avoid attempts to use this key
+	 * after this function returns. Until then, drivers
+	 * must be prepared to handle the key.
+	 */
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(ieee80211_key_removed);
+
 static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
 					int idx)
 {
@@ -202,6 +225,7 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 
 static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 				    struct sta_info *sta,
+				    bool pairwise,
 				    struct ieee80211_key *old,
 				    struct ieee80211_key *new)
 {
@@ -210,8 +234,14 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 	if (new)
 		list_add(&new->list, &sdata->key_list);
 
-	if (sta) {
-		rcu_assign_pointer(sta->key, new);
+	if (sta && pairwise) {
+		rcu_assign_pointer(sta->ptk, new);
+	} else if (sta) {
+		if (old)
+			idx = old->conf.keyidx;
+		else
+			idx = new->conf.keyidx;
+		rcu_assign_pointer(sta->gtk[idx], new);
 	} else {
 		WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
 
@@ -355,6 +385,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 {
 	struct ieee80211_key *old_key;
 	int idx, ret;
+	bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
 
 	BUG_ON(!sdata);
 	BUG_ON(!key);
@@ -371,13 +402,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		 */
 		if (test_sta_flags(sta, WLAN_STA_WME))
 			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
-
-		/*
-		 * This key is for a specific sta interface,
-		 * inform the driver that it should try to store
-		 * this key as pairwise key.
-		 */
-		key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
 	} else {
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			struct sta_info *ap;
@@ -399,12 +423,14 @@ int ieee80211_key_link(struct ieee80211_key *key,
 
 	mutex_lock(&sdata->local->key_mtx);
 
-	if (sta)
-		old_key = sta->key;
+	if (sta && pairwise)
+		old_key = sta->ptk;
+	else if (sta)
+		old_key = sta->gtk[idx];
 	else
 		old_key = sdata->keys[idx];
 
-	__ieee80211_key_replace(sdata, sta, old_key, key);
+	__ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
 	__ieee80211_key_destroy(old_key);
 
 	ieee80211_debugfs_key_add(key);
@@ -423,7 +449,8 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
 	 */
 	if (key->sdata)
 		__ieee80211_key_replace(key->sdata, key->sta,
-					key, NULL);
+				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+				key, NULL);
 	__ieee80211_key_destroy(key);
 }
 
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index cb9a4a65cc68..0db1c0f5f697 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -16,6 +16,9 @@
 #include <linux/rcupdate.h>
 #include <net/mac80211.h>
 
+#define NUM_DEFAULT_KEYS 4
+#define NUM_DEFAULT_MGMT_KEYS 2
+
 #define WEP_IV_LEN		4
 #define WEP_ICV_LEN		4
 #define ALG_TKIP_KEY_LEN	32
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b3e161ffa4b3..b67221def584 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -846,7 +846,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
-	struct ieee80211_key *stakey = NULL;
+	struct ieee80211_key *sta_ptk = NULL;
 	int mmie_keyidx = -1;
 	__le16 fc;
 
@@ -888,15 +888,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	rx->key = NULL;
 
 	if (rx->sta)
-		stakey = rcu_dereference(rx->sta->key);
+		sta_ptk = rcu_dereference(rx->sta->ptk);
 
 	fc = hdr->frame_control;
 
 	if (!ieee80211_has_protected(fc))
 		mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
 
-	if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
-		rx->key = stakey;
+	if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
+		rx->key = sta_ptk;
 		if ((status->flag & RX_FLAG_DECRYPTED) &&
 		    (status->flag & RX_FLAG_IV_STRIPPED))
 			return RX_CONTINUE;
@@ -912,7 +912,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		if (mmie_keyidx < NUM_DEFAULT_KEYS ||
 		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
 			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
-		rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
+		if (rx->sta)
+			rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+		if (!rx->key)
+			rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
 	} else if (!ieee80211_has_protected(fc)) {
 		/*
 		 * The frame was not protected, so skip decryption. However, we
@@ -955,17 +958,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
 		keyidx = keyid >> 6;
 
-		rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
+		/* check per-station GTK first, if multicast packet */
+		if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
+			rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
 
-		/*
-		 * RSNA-protected unicast frames should always be sent with
-		 * pairwise or station-to-station keys, but for WEP we allow
-		 * using a key index as well.
-		 */
-		if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
-		    rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
-		    !is_multicast_ether_addr(hdr->addr1))
-			rx->key = NULL;
+		/* if not found, try default key */
+		if (!rx->key) {
+			rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
+
+			/*
+			 * RSNA-protected unicast frames should always be
+			 * sent with pairwise or station-to-station keys,
+			 * but for WEP we allow using a key index as well.
+			 */
+			if (rx->key &&
+			    rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+			    rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
+			    !is_multicast_ether_addr(hdr->addr1))
+				rx->key = NULL;
+		}
 	}
 
 	if (rx->key) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index aeaf2d6fccc8..6d8f897d8763 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -616,7 +616,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int ret;
+	int ret, i;
 
 	might_sleep();
 
@@ -644,10 +644,10 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	if (ret)
 		return ret;
 
-	if (sta->key) {
-		ieee80211_key_free(local, sta->key);
-		WARN_ON(sta->key);
-	}
+	for (i = 0; i < NUM_DEFAULT_KEYS; i++)
+		ieee80211_key_free(local, sta->gtk[i]);
+	if (sta->ptk)
+		ieee80211_key_free(local, sta->ptk);
 
 	sta->dead = true;
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index cf21a2e8134f..9265acadef32 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -199,7 +199,8 @@ enum plink_state {
  * @hnext: hash table linked list pointer
  * @local: pointer to the global information
  * @sdata: virtual interface this station belongs to
- * @key: peer key negotiated with this station, if any
+ * @ptk: peer key negotiated with this station, if any
+ * @gtk: group keys negotiated with this station, if any
  * @rate_ctrl: rate control algorithm reference
  * @rate_ctrl_priv: rate control private per-STA pointer
  * @last_tx_rate: rate used for last transmit, to report to userspace as
@@ -254,7 +255,8 @@ struct sta_info {
 	struct sta_info *hnext;
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_key *key;
+	struct ieee80211_key *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
+	struct ieee80211_key *ptk;
 	struct rate_control_ref *rate_ctrl;
 	void *rate_ctrl_priv;
 	spinlock_t lock;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 258fbdbedbdf..96c594309506 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -532,7 +532,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 
 	if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
 		tx->key = NULL;
-	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
+	else if (tx->sta && (key = rcu_dereference(tx->sta->ptk)))
 		tx->key = key;
 	else if (ieee80211_is_mgmt(hdr->frame_control) &&
 		 is_multicast_ether_addr(hdr->addr1) &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 37580e090a3d..2d1d4c70113c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -375,7 +375,7 @@ bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev);
 /* internal helpers */
 int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 				   struct key_params *params, int key_idx,
-				   const u8 *mac_addr);
+				   bool pairwise, const u8 *mac_addr);
 void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap);
 void cfg80211_sme_scan_done(struct net_device *dev);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 8cb6e08373b9..f33fbb79437c 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -160,7 +160,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 	 */
 	if (rdev->ops->del_key)
 		for (i = 0; i < 6; i++)
-			rdev->ops->del_key(wdev->wiphy, dev, i, NULL);
+			rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0c9497170f1f..8826888cc14e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -93,6 +93,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
 	[NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
+	[NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 },
 
 	[NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
 	[NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
@@ -168,7 +169,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
 };
 
-/* policy for the attributes */
+/* policy for the key attributes */
 static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
 	[NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN },
 	[NL80211_KEY_IDX] = { .type = NLA_U8 },
@@ -176,6 +177,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
 	[NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
 	[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
+	[NL80211_KEY_TYPE] = { .type = NLA_U32 },
 };
 
 /* ifidx get helper */
@@ -306,6 +308,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 struct key_parse {
 	struct key_params p;
 	int idx;
+	int type;
 	bool def, defmgmt;
 };
 
@@ -336,6 +339,12 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 	if (tb[NL80211_KEY_CIPHER])
 		k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
 
+	if (tb[NL80211_KEY_TYPE]) {
+		k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
+		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -360,6 +369,12 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
 	k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
 	k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
 
+	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
+		k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
+		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -369,6 +384,7 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
 
 	memset(k, 0, sizeof(*k));
 	k->idx = -1;
+	k->type = -1;
 
 	if (info->attrs[NL80211_ATTR_KEY])
 		err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k);
@@ -433,7 +449,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
 		} else if (parse.defmgmt)
 			goto error;
 		err = cfg80211_validate_key_settings(rdev, &parse.p,
-						     parse.idx, NULL);
+						     parse.idx, false, NULL);
 		if (err)
 			goto error;
 		result->params[parse.idx].cipher = parse.p.cipher;
@@ -516,6 +532,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
 		    dev->wiphy.max_scan_ie_len);
 
+	if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
+
 	NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
 		sizeof(u32) * dev->wiphy.n_cipher_suites,
 		dev->wiphy.cipher_suites);
@@ -1446,7 +1465,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	struct net_device *dev = info->user_ptr[1];
 	u8 key_idx = 0;
-	u8 *mac_addr = NULL;
+	const u8 *mac_addr = NULL;
+	bool pairwise;
 	struct get_key_cookie cookie = {
 		.error = 0,
 	};
@@ -1462,6 +1482,17 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
+	pairwise = !!mac_addr;
+	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
+		u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
+		if (kt >= NUM_NL80211_KEYTYPES)
+			return -EINVAL;
+		if (kt != NL80211_KEYTYPE_GROUP &&
+		    kt != NL80211_KEYTYPE_PAIRWISE)
+			return -EINVAL;
+		pairwise = kt == NL80211_KEYTYPE_PAIRWISE;
+	}
+
 	if (!rdev->ops->get_key)
 		return -EOPNOTSUPP;
 
@@ -1482,8 +1513,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (mac_addr)
 		NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
 
-	err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, mac_addr,
-				&cookie, get_key_callback);
+	if (pairwise && mac_addr &&
+	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+		return -ENOENT;
+
+	err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, pairwise,
+				 mac_addr, &cookie, get_key_callback);
 
 	if (err)
 		goto free_msg;
@@ -1553,7 +1588,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	struct net_device *dev = info->user_ptr[1];
 	struct key_parse key;
-	u8 *mac_addr = NULL;
+	const u8 *mac_addr = NULL;
 
 	err = nl80211_parse_key(info, &key);
 	if (err)
@@ -1565,16 +1600,31 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
+	if (key.type == -1) {
+		if (mac_addr)
+			key.type = NL80211_KEYTYPE_PAIRWISE;
+		else
+			key.type = NL80211_KEYTYPE_GROUP;
+	}
+
+	/* for now */
+	if (key.type != NL80211_KEYTYPE_PAIRWISE &&
+	    key.type != NL80211_KEYTYPE_GROUP)
+		return -EINVAL;
+
 	if (!rdev->ops->add_key)
 		return -EOPNOTSUPP;
 
-	if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr))
+	if (cfg80211_validate_key_settings(rdev, &key.p, key.idx,
+					   key.type == NL80211_KEYTYPE_PAIRWISE,
+					   mac_addr))
 		return -EINVAL;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = nl80211_key_allowed(dev->ieee80211_ptr);
 	if (!err)
 		err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx,
+					 key.type == NL80211_KEYTYPE_PAIRWISE,
 					 mac_addr, &key.p);
 	wdev_unlock(dev->ieee80211_ptr);
 
@@ -1596,13 +1646,32 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
+	if (key.type == -1) {
+		if (mac_addr)
+			key.type = NL80211_KEYTYPE_PAIRWISE;
+		else
+			key.type = NL80211_KEYTYPE_GROUP;
+	}
+
+	/* for now */
+	if (key.type != NL80211_KEYTYPE_PAIRWISE &&
+	    key.type != NL80211_KEYTYPE_GROUP)
+		return -EINVAL;
+
 	if (!rdev->ops->del_key)
 		return -EOPNOTSUPP;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = nl80211_key_allowed(dev->ieee80211_ptr);
+
+	if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr &&
+	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+		err = -ENOENT;
+
 	if (!err)
-		err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr);
+		err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx,
+					 key.type == NL80211_KEYTYPE_PAIRWISE,
+					 mac_addr);
 
 #ifdef CONFIG_CFG80211_WEXT
 	if (!err) {
@@ -3212,6 +3281,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		return err;
 
 	if (key.idx >= 0) {
+		if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP)
+			return -EINVAL;
 		if (!key.p.key || !key.p.key_len)
 			return -EINVAL;
 		if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 ||
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f161b9844542..e17b0bee6bdc 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -698,7 +698,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 	 */
 	if (rdev->ops->del_key)
 		for (i = 0; i < 6; i++)
-			rdev->ops->del_key(wdev->wiphy, dev, i, NULL);
+			rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
 
 #ifdef CONFIG_CFG80211_WEXT
 	memset(&wrqu, 0, sizeof(wrqu));
diff --git a/net/wireless/util.c b/net/wireless/util.c
index fb5448f7d55a..76120aeda57d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -144,19 +144,25 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
 
 int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 				   struct key_params *params, int key_idx,
-				   const u8 *mac_addr)
+				   bool pairwise, const u8 *mac_addr)
 {
 	int i;
 
 	if (key_idx > 5)
 		return -EINVAL;
 
+	if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+		return -EINVAL;
+
+	if (pairwise && !mac_addr)
+		return -EINVAL;
+
 	/*
 	 * Disallow pairwise keys with non-zero index unless it's WEP
 	 * (because current deployments use pairwise WEP keys with
 	 * non-zero indizes but 802.11i clearly specifies to use zero)
 	 */
-	if (mac_addr && key_idx &&
+	if (pairwise && key_idx &&
 	    params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
 	    params->cipher != WLAN_CIPHER_SUITE_WEP104)
 		return -EINVAL;
@@ -677,7 +683,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 	for (i = 0; i < 6; i++) {
 		if (!wdev->connect_keys->params[i].cipher)
 			continue;
-		if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL,
+		if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL,
 					&wdev->connect_keys->params[i])) {
 			printk(KERN_ERR "%s: failed to set key %d\n",
 				dev->name, i);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7e5c3a45f811..6002265289c6 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -432,14 +432,17 @@ int cfg80211_wext_giwretry(struct net_device *dev,
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry);
 
 static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
-				     struct net_device *dev, const u8 *addr,
-				     bool remove, bool tx_key, int idx,
-				     struct key_params *params)
+				     struct net_device *dev, bool pairwise,
+				     const u8 *addr, bool remove, bool tx_key,
+				     int idx, struct key_params *params)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err, i;
 	bool rejoin = false;
 
+	if (pairwise && !addr)
+		return -EINVAL;
+
 	if (!wdev->wext.keys) {
 		wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
 					      GFP_KERNEL);
@@ -478,7 +481,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 				__cfg80211_leave_ibss(rdev, wdev->netdev, true);
 				rejoin = true;
 			}
-			err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr);
+
+			if (!pairwise && addr &&
+			    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+				err = -ENOENT;
+			else
+				err = rdev->ops->del_key(&rdev->wiphy, dev, idx,
+							 pairwise, addr);
 		}
 		wdev->wext.connect.privacy = false;
 		/*
@@ -507,12 +516,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 	if (addr)
 		tx_key = false;
 
-	if (cfg80211_validate_key_settings(rdev, params, idx, addr))
+	if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr))
 		return -EINVAL;
 
 	err = 0;
 	if (wdev->current_bss)
-		err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params);
+		err = rdev->ops->add_key(&rdev->wiphy, dev, idx,
+					 pairwise, addr, params);
 	if (err)
 		return err;
 
@@ -563,17 +573,17 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 }
 
 static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
-				   struct net_device *dev, const u8 *addr,
-				   bool remove, bool tx_key, int idx,
-				   struct key_params *params)
+				   struct net_device *dev, bool pairwise,
+				   const u8 *addr, bool remove, bool tx_key,
+				   int idx, struct key_params *params)
 {
 	int err;
 
 	/* devlist mutex needed for possible IBSS re-join */
 	mutex_lock(&rdev->devlist_mtx);
 	wdev_lock(dev->ieee80211_ptr);
-	err = __cfg80211_set_encryption(rdev, dev, addr, remove,
-					tx_key, idx, params);
+	err = __cfg80211_set_encryption(rdev, dev, pairwise, addr,
+					remove, tx_key, idx, params);
 	wdev_unlock(dev->ieee80211_ptr);
 	mutex_unlock(&rdev->devlist_mtx);
 
@@ -635,7 +645,7 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 	else if (!remove)
 		return -EINVAL;
 
-	return cfg80211_set_encryption(rdev, dev, NULL, remove,
+	return cfg80211_set_encryption(rdev, dev, false, NULL, remove,
 				       wdev->wext.default_key == -1,
 				       idx, &params);
 }
@@ -725,7 +735,9 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
 	}
 
 	return cfg80211_set_encryption(
-			rdev, dev, addr, remove,
+			rdev, dev,
+			!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY),
+			addr, remove,
 			ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
 			idx, &params);
 }
-- 
cgit v1.2.3


From b206b4ef062d83c0875a085672ed50e8c8b01521 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 6 Oct 2010 18:34:12 +0900
Subject: nl80211/mac80211: Add retry and failed transmission count to station
 info

This information is already available in mac80211, we just need to export it
via cfg80211 and nl80211.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 include/net/cfg80211.h  | 8 ++++++++
 net/mac80211/cfg.c      | 4 ++++
 net/wireless/nl80211.c  | 6 ++++++
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e451f176e662..c08709fe36fc 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1137,6 +1137,8 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
  * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
  *	station)
+ * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
+ * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -1150,6 +1152,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_BITRATE,
 	NL80211_STA_INFO_RX_PACKETS,
 	NL80211_STA_INFO_TX_PACKETS,
+	NL80211_STA_INFO_TX_RETRIES,
+	NL80211_STA_INFO_TX_FAILED,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0f77515266b8..e76daaa7dc25 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -401,6 +401,8 @@ struct station_parameters {
  *  (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
  * @STATION_INFO_RX_PACKETS: @rx_packets filled
  * @STATION_INFO_TX_PACKETS: @tx_packets filled
+ * @STATION_INFO_TX_RETRIES: @tx_retries filled
+ * @STATION_INFO_TX_FAILED: @tx_failed filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -413,6 +415,8 @@ enum station_info_flags {
 	STATION_INFO_TX_BITRATE		= 1<<7,
 	STATION_INFO_RX_PACKETS		= 1<<8,
 	STATION_INFO_TX_PACKETS		= 1<<9,
+	STATION_INFO_TX_RETRIES		= 1<<10,
+	STATION_INFO_TX_FAILED		= 1<<11,
 };
 
 /**
@@ -462,6 +466,8 @@ struct rate_info {
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
+ * @tx_retries: cumulative retry counts
+ * @tx_failed: number of failed transmissions (retries exceeded, no ACK)
  * @generation: generation number for nl80211 dumps.
  *	This number should increase every time the list of stations
  *	changes, i.e. when a station is added or removed, so that
@@ -479,6 +485,8 @@ struct station_info {
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
+	u32 tx_retries;
+	u32 tx_failed;
 
 	int generation;
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8b0e874a3d65..2e5a3fb38efe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -327,6 +327,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_BYTES |
 			STATION_INFO_RX_PACKETS |
 			STATION_INFO_TX_PACKETS |
+			STATION_INFO_TX_RETRIES |
+			STATION_INFO_TX_FAILED |
 			STATION_INFO_TX_BITRATE;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
@@ -334,6 +336,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->tx_bytes = sta->tx_bytes;
 	sinfo->rx_packets = sta->rx_packets;
 	sinfo->tx_packets = sta->tx_packets;
+	sinfo->tx_retries = sta->tx_retry_count;
+	sinfo->tx_failed = sta->tx_retry_failed;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9942f0b061ff..524f55402838 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1890,6 +1890,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_TX_PACKETS)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
 			    sinfo->tx_packets);
+	if (sinfo->filled & STATION_INFO_TX_RETRIES)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_RETRIES,
+			    sinfo->tx_retries);
+	if (sinfo->filled & STATION_INFO_TX_FAILED)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
+			    sinfo->tx_failed);
 	nla_nest_end(msg, sinfoattr);
 
 	return genlmsg_end(msg, hdr);
-- 
cgit v1.2.3


From 767e97e1e0db0d0f3152cd2f3bd3403596aedbad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 6 Oct 2010 17:49:21 -0700
Subject: neigh: RCU conversion of struct neighbour

This is the second step for neighbour RCU conversion.

(first was commit d6bf7817 : RCU conversion of neigh hash table)

neigh_lookup() becomes lockless, but still take a reference on found
neighbour. (no more read_lock()/read_unlock() on tbl->lock)

struct neighbour gets an additional rcu_head field and is freed after an
RCU grace period.

Future work would need to eventually not take a reference on neighbour
for temporary dst (DST_NOCACHE), but this would need dst->_neighbour to
use a noref bit like we did for skb->_dst.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |   5 +-
 net/core/neighbour.c    | 137 ++++++++++++++++++++++++++++++------------------
 2 files changed, 88 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 37845dae6488..a4538d553704 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -91,7 +91,7 @@ struct neigh_statistics {
 #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
 
 struct neighbour {
-	struct neighbour	*next;
+	struct neighbour __rcu	*next;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	struct net_device	*dev;
@@ -111,6 +111,7 @@ struct neighbour {
 	struct sk_buff_head	arp_queue;
 	struct timer_list	timer;
 	const struct neigh_ops	*ops;
+	struct rcu_head		rcu;
 	u8			primary_key[0];
 };
 
@@ -139,7 +140,7 @@ struct pneigh_entry {
  */
 
 struct neigh_hash_table {
-	struct neighbour	**hash_buckets;
+	struct neighbour __rcu	**hash_buckets;
 	unsigned int		hash_mask;
 	__u32			hash_rnd;
 	struct rcu_head		rcu;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index dd8920e4f508..3ffafaa0414c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -139,10 +139,12 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 	for (i = 0; i <= nht->hash_mask; i++) {
-		struct neighbour *n, **np;
+		struct neighbour *n;
+		struct neighbour __rcu **np;
 
 		np = &nht->hash_buckets[i];
-		while ((n = *np) != NULL) {
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			/* Neighbour record may be discarded if:
 			 * - nobody refers to it.
 			 * - it is not permanent
@@ -150,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			write_lock(&n->lock);
 			if (atomic_read(&n->refcnt) == 1 &&
 			    !(n->nud_state & NUD_PERMANENT)) {
-				*np	= n->next;
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						  lockdep_is_held(&tbl->lock)));
 				n->dead = 1;
 				shrunk	= 1;
 				write_unlock(&n->lock);
@@ -208,14 +212,18 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 					lockdep_is_held(&tbl->lock));
 
 	for (i = 0; i <= nht->hash_mask; i++) {
-		struct neighbour *n, **np = &nht->hash_buckets[i];
+		struct neighbour *n;
+		struct neighbour __rcu **np = &nht->hash_buckets[i];
 
-		while ((n = *np) != NULL) {
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			if (dev && n->dev != dev) {
 				np = &n->next;
 				continue;
 			}
-			*np = n->next;
+			rcu_assign_pointer(*np,
+				   rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
 			write_lock(&n->lock);
 			neigh_del_timer(n);
 			n->dead = 1;
@@ -323,7 +331,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 		kfree(ret);
 		return NULL;
 	}
-	ret->hash_buckets = buckets;
+	rcu_assign_pointer(ret->hash_buckets, buckets);
 	ret->hash_mask = entries - 1;
 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
 	return ret;
@@ -362,17 +370,22 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 	for (i = 0; i <= old_nht->hash_mask; i++) {
 		struct neighbour *n, *next;
 
-		for (n = old_nht->hash_buckets[i];
+		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+						   lockdep_is_held(&tbl->lock));
 		     n != NULL;
 		     n = next) {
 			hash = tbl->hash(n->primary_key, n->dev,
 					 new_nht->hash_rnd);
 
 			hash &= new_nht->hash_mask;
-			next = n->next;
-
-			n->next = new_nht->hash_buckets[hash];
-			new_nht->hash_buckets[hash] = n;
+			next = rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock));
+
+			rcu_assign_pointer(n->next,
+					   rcu_dereference_protected(
+						new_nht->hash_buckets[hash],
+						lockdep_is_held(&tbl->lock)));
+			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 		}
 	}
 
@@ -394,15 +407,18 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
-	read_lock(&tbl->lock);
-	for (n = nht->hash_buckets[hash_val]; n; n = n->next) {
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
-			neigh_hold(n);
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
 		}
 	}
-	read_unlock(&tbl->lock);
+
 	rcu_read_unlock_bh();
 	return n;
 }
@@ -421,16 +437,19 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
-	read_lock(&tbl->lock);
-	for (n = nht->hash_buckets[hash_val]; n; n = n->next) {
+
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
 		if (!memcmp(n->primary_key, pkey, key_len) &&
 		    net_eq(dev_net(n->dev), net)) {
-			neigh_hold(n);
+			if (!atomic_inc_not_zero(&n->refcnt))
+				n = NULL;
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
 		}
 	}
-	read_unlock(&tbl->lock);
+
 	rcu_read_unlock_bh();
 	return n;
 }
@@ -483,7 +502,11 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		goto out_tbl_unlock;
 	}
 
-	for (n1 = nht->hash_buckets[hash_val]; n1; n1 = n1->next) {
+	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+					    lockdep_is_held(&tbl->lock));
+	     n1 != NULL;
+	     n1 = rcu_dereference_protected(n1->next,
+			lockdep_is_held(&tbl->lock))) {
 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 			neigh_hold(n1);
 			rc = n1;
@@ -491,10 +514,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
-	n->next = nht->hash_buckets[hash_val];
-	nht->hash_buckets[hash_val] = n;
 	n->dead = 0;
 	neigh_hold(n);
+	rcu_assign_pointer(n->next,
+			   rcu_dereference_protected(nht->hash_buckets[hash_val],
+						     lockdep_is_held(&tbl->lock)));
+	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 	write_unlock_bh(&tbl->lock);
 	NEIGH_PRINTK2("neigh %p is created.\n", n);
 	rc = n;
@@ -651,6 +676,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
 		neigh_parms_destroy(parms);
 }
 
+static void neigh_destroy_rcu(struct rcu_head *head)
+{
+	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
 /*
  *	neighbour must already be out of the table;
  *
@@ -690,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
 	atomic_dec(&neigh->tbl->entries);
-	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+	call_rcu(&neigh->rcu, neigh_destroy_rcu);
 }
 EXPORT_SYMBOL(neigh_destroy);
 
@@ -731,7 +762,8 @@ static void neigh_connect(struct neighbour *neigh)
 static void neigh_periodic_work(struct work_struct *work)
 {
 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
-	struct neighbour *n, **np;
+	struct neighbour *n;
+	struct neighbour __rcu **np;
 	unsigned int i;
 	struct neigh_hash_table *nht;
 
@@ -756,7 +788,8 @@ static void neigh_periodic_work(struct work_struct *work)
 	for (i = 0 ; i <= nht->hash_mask; i++) {
 		np = &nht->hash_buckets[i];
 
-		while ((n = *np) != NULL) {
+		while ((n = rcu_dereference_protected(*np,
+				lockdep_is_held(&tbl->lock))) != NULL) {
 			unsigned int state;
 
 			write_lock(&n->lock);
@@ -1213,8 +1246,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
-   worked, f.e. if you want to override normal output path (eql, shaper),
-   but resolution is not made yet.
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
  */
 
 int neigh_compat_output(struct sk_buff *skb)
@@ -2123,7 +2156,7 @@ static void neigh_update_notify(struct neighbour *neigh)
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			    struct netlink_callback *cb)
 {
-	struct net * net = sock_net(skb->sk);
+	struct net *net = sock_net(skb->sk);
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
@@ -2132,13 +2165,14 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
 
-	read_lock(&tbl->lock);
 	for (h = 0; h <= nht->hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		for (n = nht->hash_buckets[h], idx = 0; n; n = n->next) {
+		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next)) {
 			if (!net_eq(dev_net(n->dev), net))
 				continue;
 			if (idx < s_idx)
@@ -2150,13 +2184,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				rc = -1;
 				goto out;
 			}
-		next:
+next:
 			idx++;
 		}
 	}
 	rc = skb->len;
 out:
-	read_unlock(&tbl->lock);
 	rcu_read_unlock_bh();
 	cb->args[1] = h;
 	cb->args[2] = idx;
@@ -2195,11 +2228,13 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
 
-	read_lock(&tbl->lock);
+	read_lock(&tbl->lock); /* avoid resizes */
 	for (chain = 0; chain <= nht->hash_mask; chain++) {
 		struct neighbour *n;
 
-		for (n = nht->hash_buckets[chain]; n; n = n->next)
+		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+		     n != NULL;
+		     n = rcu_dereference_bh(n->next))
 			cb(n, cookie);
 	}
 	read_unlock(&tbl->lock);
@@ -2217,16 +2252,20 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 	for (chain = 0; chain <= nht->hash_mask; chain++) {
-		struct neighbour *n, **np;
+		struct neighbour *n;
+		struct neighbour __rcu **np;
 
 		np = &nht->hash_buckets[chain];
-		while ((n = *np) != NULL) {
+		while ((n = rcu_dereference_protected(*np,
+					lockdep_is_held(&tbl->lock))) != NULL) {
 			int release;
 
 			write_lock(&n->lock);
 			release = cb(n);
 			if (release) {
-				*np = n->next;
+				rcu_assign_pointer(*np,
+					rcu_dereference_protected(n->next,
+						lockdep_is_held(&tbl->lock)));
 				n->dead = 1;
 			} else
 				np = &n->next;
@@ -2250,7 +2289,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 
 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
-		n = nht->hash_buckets[bucket];
+		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
 
 		while (n) {
 			if (!net_eq(dev_net(n->dev), net))
@@ -2267,8 +2306,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 				break;
 			if (n->nud_state & ~NUD_NOARP)
 				break;
-		next:
-			n = n->next;
+next:
+			n = rcu_dereference_bh(n->next);
 		}
 
 		if (n)
@@ -2292,7 +2331,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 		if (v)
 			return n;
 	}
-	n = n->next;
+	n = rcu_dereference_bh(n->next);
 
 	while (1) {
 		while (n) {
@@ -2309,8 +2348,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 
 			if (n->nud_state & ~NUD_NOARP)
 				break;
-		next:
-			n = n->next;
+next:
+			n = rcu_dereference_bh(n->next);
 		}
 
 		if (n)
@@ -2319,7 +2358,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 		if (++state->bucket > nht->hash_mask)
 			break;
 
-		n = nht->hash_buckets[state->bucket];
+		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
 	}
 
 	if (n && pos)
@@ -2417,7 +2456,6 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
 }
 
 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
-	__acquires(tbl->lock)
 	__acquires(rcu_bh)
 {
 	struct neigh_seq_state *state = seq->private;
@@ -2428,7 +2466,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
 
 	rcu_read_lock_bh();
 	state->nht = rcu_dereference_bh(tbl->nht);
-	read_lock(&tbl->lock);
+
 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
 }
 EXPORT_SYMBOL(neigh_seq_start);
@@ -2461,13 +2499,8 @@ out:
 EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
-	__releases(tbl->lock)
 	__releases(rcu_bh)
 {
-	struct neigh_seq_state *state = seq->private;
-	struct neigh_table *tbl = state->tbl;
-
-	read_unlock(&tbl->lock);
 	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(neigh_seq_stop);
-- 
cgit v1.2.3


From 430c62fb2948d964cf8dc7f3e2f69623c04ef62f Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Thu, 7 Oct 2010 09:35:16 +0200
Subject: elevator: fix oops on early call to elevator_change()

2.6.36 introduces an API for drivers to switch the IO scheduler
instead of manually calling the elevator exit and init functions.
This API was added since q->elevator must be cleared in between
those two calls. And since we already have this functionality
directly from use by the sysfs interface to switch schedulers
online, it was prudent to reuse it internally too.

But this API needs the queue to be in a fully initialized state
before it is called, or it will attempt to unregister elevator
kobjects before they have been added. This results in an oops
like this:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000051
IP: [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
PGD 47ddfc067 PUD 47c6a1067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/0000:04:00.1/irq
CPU 2
Modules linked in: t(+) loop hid_apple usbhid ahci ehci_hcd uhci_hcd libahci usbcore nls_base igb

Pid: 7319, comm: modprobe Not tainted 2.6.36-rc6+ #132 QSSC-S4R/QSSC-S4R
RIP: 0010:[<ffffffff8116f15e>]  [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
RSP: 0018:ffff88027da25d08  EFLAGS: 00010246
RAX: ffff88047c68c528 RBX: 00000000fffffffe RCX: 0000000000000000
RDX: 000000000000002f RSI: 000000000000002f RDI: ffff88047e196c88
RBP: ffff88027da25d38 R08: 0000000000000000 R09: d84156c5635688c0
R10: d84156c5635688c0 R11: 0000000000000000 R12: ffff88047e196c88
R13: 0000000000000000 R14: 0000000000000000 R15: ffff88047c68c528
FS:  00007fcb0b26f6e0(0000) GS:ffff880287400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000051 CR3: 000000047e76e000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 7319, threadinfo ffff88027da24000, task ffff88027d377090)
Stack:
 ffff88027da25d58 ffff88047c68c528 00000000fffffffe ffff88047e196c88
<0> ffff88047c68c528 ffff88047e05bd90 ffff88027da25d78 ffffffff8123fb77
<0> ffff88047e05bd90 0000000000000000 ffff88047e196c88 ffff88047c68c528
Call Trace:
 [<ffffffff8123fb77>] kobject_add_internal+0xe7/0x1f0
 [<ffffffff8123fd98>] kobject_add_varg+0x38/0x60
 [<ffffffff8123feb9>] kobject_add+0x69/0x90
 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0
 [<ffffffff8103d48d>] ? sub_preempt_count+0x9d/0xe0
 [<ffffffff8143de20>] ? _raw_spin_unlock+0x30/0x50
 [<ffffffff8116efe0>] ? sysfs_remove_dir+0x20/0xa0
 [<ffffffff8116eff4>] ? sysfs_remove_dir+0x34/0xa0
 [<ffffffff81224204>] elv_register_queue+0x34/0xa0
 [<ffffffff81224aad>] elevator_change+0xfd/0x250
 [<ffffffffa007e000>] ? t_init+0x0/0x361 [t]
 [<ffffffffa007e000>] ? t_init+0x0/0x361 [t]
 [<ffffffffa007e0a8>] t_init+0xa8/0x361 [t]
 [<ffffffff810001de>] do_one_initcall+0x3e/0x170
 [<ffffffff8108c3fd>] sys_init_module+0xbd/0x220
 [<ffffffff81002f2b>] system_call_fastpath+0x16/0x1b
Code: e5 41 56 41 55 41 54 49 89 fc 53 48 83 ec 10 48 85 ff 74 52 48 8b 47 18 49 c7 c5 00 46 61 81 48 85 c0 74 04 4c 8b 68 30 45 31 f6 <41> 80 7d 51 00 74 0e 49 8b 44 24 28 4c 89 e7 ff 50 20 49 89 c6
RIP  [<ffffffff8116f15e>] sysfs_create_dir+0x2e/0xc0
 RSP <ffff88027da25d08>
CR2: 0000000000000051
---[ end trace a6541d3bf07945df ]---

Fix this by adding a registered bit to the elevator queue, which is
set when the sysfs kobjects have been registered.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/elevator.c         | 12 ++++++++----
 include/linux/elevator.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index 205b09a5bd9e..4e11559aa2b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -938,6 +938,7 @@ int elv_register_queue(struct request_queue *q)
 			}
 		}
 		kobject_uevent(&e->kobj, KOBJ_ADD);
+		e->registered = 1;
 	}
 	return error;
 }
@@ -947,6 +948,7 @@ static void __elv_unregister_queue(struct elevator_queue *e)
 {
 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
 	kobject_del(&e->kobj);
+	e->registered = 0;
 }
 
 void elv_unregister_queue(struct request_queue *q)
@@ -1042,11 +1044,13 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 
 	spin_unlock_irq(q->queue_lock);
 
-	__elv_unregister_queue(old_elevator);
+	if (old_elevator->registered) {
+		__elv_unregister_queue(old_elevator);
 
-	err = elv_register_queue(q);
-	if (err)
-		goto fail_register;
+		err = elv_register_queue(q);
+		if (err)
+			goto fail_register;
+	}
 
 	/*
 	 * finally exit old elevator and turn off BYPASS.
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 926b50322a46..4fd978e7eb83 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -93,6 +93,7 @@ struct elevator_queue
 	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
 	struct hlist_head *hash;
+	unsigned int registered:1;
 };
 
 /*
-- 
cgit v1.2.3


From bcdb714c8856c76383ca455294f0074168705eab Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Oct 2010 14:08:53 +0100
Subject: Drop a couple of unnecessary asm/system.h inclusions

Drop inclusions of asm/system.h from linux/hardirq.h and linux/list.h as
they're no longer required and prevent the M68K arch's IRQ flag handling macros
from being made into inlined functions due to circular dependencies.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/linux/hardirq.h | 1 -
 include/linux/list.h    | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669dab..7dfdc06c7e18 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,7 +8,6 @@
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <asm/hardirq.h>
-#include <asm/system.h>
 
 /*
  * We put the hardirq and softirq counter into the preemption
diff --git a/include/linux/list.h b/include/linux/list.h
index d167b5d7c0ac..88a000617d77 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -5,7 +5,6 @@
 #include <linux/stddef.h>
 #include <linux/poison.h>
 #include <linux/prefetch.h>
-#include <asm/system.h>
 
 /*
  * Simple doubly linked list implementation.
-- 
cgit v1.2.3


From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Oct 2010 14:08:55 +0100
Subject: Fix IRQ flag handling naming

Fix the IRQ flag handling naming.  In linux/irqflags.h under one configuration,
it maps:

	local_irq_enable() -> raw_local_irq_enable()
	local_irq_disable() -> raw_local_irq_disable()
	local_irq_save() -> raw_local_irq_save()
	...

and under the other configuration, it maps:

	raw_local_irq_enable() -> local_irq_enable()
	raw_local_irq_disable() -> local_irq_disable()
	raw_local_irq_save() -> local_irq_save()
	...

This is quite confusing.  There should be one set of names expected of the
arch, and this should be wrapped to give another set of names that are expected
by users of this facility.

Change this to have the arch provide:

	flags = arch_local_save_flags()
	flags = arch_local_irq_save()
	arch_local_irq_restore(flags)
	arch_local_irq_disable()
	arch_local_irq_enable()
	arch_irqs_disabled_flags(flags)
	arch_irqs_disabled()
	arch_safe_halt()

Then linux/irqflags.h wraps these to provide:

	raw_local_save_flags(flags)
	raw_local_irq_save(flags)
	raw_local_irq_restore(flags)
	raw_local_irq_disable()
	raw_local_irq_enable()
	raw_irqs_disabled_flags(flags)
	raw_irqs_disabled()
	raw_safe_halt()

with type checking on the flags 'arguments', and then wraps those to provide:

	local_save_flags(flags)
	local_irq_save(flags)
	local_irq_restore(flags)
	local_irq_disable()
	local_irq_enable()
	irqs_disabled_flags(flags)
	irqs_disabled()
	safe_halt()

with tracing included if enabled.

The arch functions can now all be inline functions rather than some of them
having to be macros.

Signed-off-by: David Howells <dhowells@redhat.com> [X86, FRV, MN10300]
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> [Tile]
Signed-off-by: Michal Simek <monstr@monstr.eu> [Microblaze]
Tested-by: Catalin Marinas <catalin.marinas@arm.com> [ARM]
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> [AVR]
Acked-by: Tony Luck <tony.luck@intel.com> [IA-64]
Acked-by: Hirokazu Takata <takata@linux-m32r.org> [M32R]
Acked-by: Greg Ungerer <gerg@uclinux.org> [M68K/M68KNOMMU]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [MIPS]
Acked-by: Kyle McMartin <kyle@mcmartin.ca> [PA-RISC]
Acked-by: Paul Mackerras <paulus@samba.org> [PowerPC]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [S390]
Acked-by: Chen Liqin <liqin.chen@sunplusct.com> [Score]
Acked-by: Matt Fleming <matt@console-pimps.org> [SH]
Acked-by: David S. Miller <davem@davemloft.net> [Sparc]
Acked-by: Chris Zankel <chris@zankel.net> [Xtensa]
Reviewed-by: Richard Henderson <rth@twiddle.net> [Alpha]
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp> [H8300]
Cc: starvik@axis.com [CRIS]
Cc: jesper.nilsson@axis.com [CRIS]
Cc: linux-cris-kernel@axis.com
---
 arch/alpha/include/asm/irqflags.h          |  67 ++++++++++
 arch/alpha/include/asm/system.h            |  28 -----
 arch/arm/include/asm/irqflags.h            | 145 +++++++++++++---------
 arch/avr32/include/asm/irqflags.h          |  29 ++---
 arch/blackfin/include/asm/irqflags.h       |  12 --
 arch/blackfin/kernel/trace.c               |   1 +
 arch/cris/include/arch-v10/arch/irqflags.h |  45 +++++++
 arch/cris/include/arch-v10/arch/system.h   |  16 ---
 arch/cris/include/arch-v32/arch/irqflags.h |  46 +++++++
 arch/cris/include/arch-v32/arch/system.h   |  22 ----
 arch/cris/include/asm/irqflags.h           |   1 +
 arch/cris/include/asm/system.h             |   1 +
 arch/frv/include/asm/irqflags.h            | 158 +++++++++++++++++++++++
 arch/frv/include/asm/system.h              | 136 --------------------
 arch/h8300/include/asm/irqflags.h          |  43 +++++++
 arch/h8300/include/asm/system.h            |  24 +---
 arch/ia64/include/asm/irqflags.h           |  94 ++++++++++++++
 arch/ia64/include/asm/system.h             |  76 ------------
 arch/m32r/include/asm/irqflags.h           | 104 ++++++++++++++++
 arch/m32r/include/asm/system.h             |  66 +---------
 arch/m68k/include/asm/entry_no.h           |   2 +-
 arch/m68k/include/asm/irqflags.h           |  76 ++++++++++++
 arch/m68k/include/asm/system_mm.h          |  25 +---
 arch/m68k/include/asm/system_no.h          |  57 +--------
 arch/m68knommu/kernel/asm-offsets.c        |   2 -
 arch/m68knommu/platform/coldfire/head.S    |   1 +
 arch/microblaze/include/asm/irqflags.h     | 193 +++++++++++++++--------------
 arch/mips/include/asm/irqflags.h           |  53 ++++----
 arch/mips/kernel/smtc.c                    |   4 +-
 arch/mn10300/include/asm/irqflags.h        | 123 ++++++++++++++++++
 arch/mn10300/include/asm/system.h          | 109 +---------------
 arch/mn10300/kernel/entry.S                |   1 +
 arch/parisc/include/asm/irqflags.h         |  46 +++++++
 arch/parisc/include/asm/system.h           |  19 +--
 arch/powerpc/include/asm/hw_irq.h          | 113 ++++++++++-------
 arch/powerpc/include/asm/irqflags.h        |   2 +-
 arch/powerpc/kernel/exceptions-64s.S       |   4 +-
 arch/powerpc/kernel/irq.c                  |   4 +-
 arch/s390/include/asm/irqflags.h           |  51 ++++----
 arch/s390/include/asm/system.h             |   2 +-
 arch/s390/kernel/mem_detect.c              |   4 +-
 arch/s390/mm/init.c                        |   3 +-
 arch/s390/mm/maccess.c                     |   4 +-
 arch/score/include/asm/irqflags.h          | 187 +++++++++++++++-------------
 arch/sh/include/asm/irqflags.h             |   4 +-
 arch/sh/kernel/irq_32.c                    |  12 +-
 arch/sparc/include/asm/irqflags_32.h       |  35 +++---
 arch/sparc/include/asm/irqflags_64.h       |  29 ++---
 arch/sparc/kernel/irq_32.c                 |  13 +-
 arch/sparc/prom/p1275.c                    |   2 +-
 arch/tile/include/asm/irqflags.h           |  36 +++---
 arch/x86/include/asm/irqflags.h            |  32 ++---
 arch/x86/include/asm/paravirt.h            |  16 +--
 arch/x86/xen/spinlock.c                    |   2 +-
 arch/xtensa/include/asm/irqflags.h         |  58 +++++++++
 arch/xtensa/include/asm/system.h           |  33 +----
 drivers/s390/char/sclp.c                   |   2 +-
 include/asm-generic/atomic.h               |   5 +-
 include/asm-generic/cmpxchg-local.h        |   1 +
 include/asm-generic/hardirq.h              |   1 -
 include/asm-generic/irqflags.h             |  52 ++++----
 include/linux/irqflags.h                   | 107 +++++++++-------
 include/linux/spinlock.h                   |   1 +
 63 files changed, 1482 insertions(+), 1158 deletions(-)
 create mode 100644 arch/alpha/include/asm/irqflags.h
 create mode 100644 arch/cris/include/arch-v10/arch/irqflags.h
 create mode 100644 arch/cris/include/arch-v32/arch/irqflags.h
 create mode 100644 arch/cris/include/asm/irqflags.h
 create mode 100644 arch/frv/include/asm/irqflags.h
 create mode 100644 arch/h8300/include/asm/irqflags.h
 create mode 100644 arch/ia64/include/asm/irqflags.h
 create mode 100644 arch/m32r/include/asm/irqflags.h
 create mode 100644 arch/m68k/include/asm/irqflags.h
 create mode 100644 arch/mn10300/include/asm/irqflags.h
 create mode 100644 arch/parisc/include/asm/irqflags.h
 create mode 100644 arch/xtensa/include/asm/irqflags.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/irqflags.h b/arch/alpha/include/asm/irqflags.h
new file mode 100644
index 000000000000..299bbc7e9d71
--- /dev/null
+++ b/arch/alpha/include/asm/irqflags.h
@@ -0,0 +1,67 @@
+#ifndef __ALPHA_IRQFLAGS_H
+#define __ALPHA_IRQFLAGS_H
+
+#include <asm/system.h>
+
+#define IPL_MIN		0
+#define IPL_SW0		1
+#define IPL_SW1		2
+#define IPL_DEV0	3
+#define IPL_DEV1	4
+#define IPL_TIMER	5
+#define IPL_PERF	6
+#define IPL_POWERFAIL	6
+#define IPL_MCHECK	7
+#define IPL_MAX		7
+
+#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
+#undef IPL_MIN
+#define IPL_MIN		__min_ipl
+extern int __min_ipl;
+#endif
+
+#define getipl()		(rdps() & 7)
+#define setipl(ipl)		((void) swpipl(ipl))
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return rdps();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	setipl(IPL_MAX);
+	barrier();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = swpipl(IPL_MAX);
+	barrier();
+	return flags;
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	barrier();
+	setipl(IPL_MIN);
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	setipl(flags);
+	barrier();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == IPL_MAX;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(getipl());
+}
+
+#endif /* __ALPHA_IRQFLAGS_H */
diff --git a/arch/alpha/include/asm/system.h b/arch/alpha/include/asm/system.h
index 5aa40cca4f23..9f78e6934637 100644
--- a/arch/alpha/include/asm/system.h
+++ b/arch/alpha/include/asm/system.h
@@ -259,34 +259,6 @@ __CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
 __CALL_PAL_W1(wrusp, unsigned long);
 __CALL_PAL_W1(wrvptptr, unsigned long);
 
-#define IPL_MIN		0
-#define IPL_SW0		1
-#define IPL_SW1		2
-#define IPL_DEV0	3
-#define IPL_DEV1	4
-#define IPL_TIMER	5
-#define IPL_PERF	6
-#define IPL_POWERFAIL	6
-#define IPL_MCHECK	7
-#define IPL_MAX		7
-
-#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
-#undef IPL_MIN
-#define IPL_MIN		__min_ipl
-extern int __min_ipl;
-#endif
-
-#define getipl()		(rdps() & 7)
-#define setipl(ipl)		((void) swpipl(ipl))
-
-#define local_irq_disable()			do { setipl(IPL_MAX); barrier(); } while(0)
-#define local_irq_enable()			do { barrier(); setipl(IPL_MIN); } while(0)
-#define local_save_flags(flags)	((flags) = rdps())
-#define local_irq_save(flags)	do { (flags) = swpipl(IPL_MAX); barrier(); } while(0)
-#define local_irq_restore(flags)	do { barrier(); setipl(flags); barrier(); } while(0)
-
-#define irqs_disabled()	(getipl() == IPL_MAX)
-
 /*
  * TB routines..
  */
diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 6d09974e6646..1e6cca55c750 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -10,66 +10,85 @@
  */
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define raw_local_irq_save(x)					\
-	({							\
-	__asm__ __volatile__(					\
-	"mrs	%0, cpsr		@ local_irq_save\n"	\
-	"cpsid	i"						\
-	: "=r" (x) : : "memory", "cc");				\
-	})
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+	asm volatile(
+		"	mrs	%0, cpsr	@ arch_local_irq_save\n"
+		"	cpsid	i"
+		: "=r" (flags) : : "memory", "cc");
+	return flags;
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile(
+		"	cpsie i			@ arch_local_irq_enable"
+		:
+		:
+		: "memory", "cc");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile(
+		"	cpsid i			@ arch_local_irq_disable"
+		:
+		:
+		: "memory", "cc");
+}
 
-#define raw_local_irq_enable()  __asm__("cpsie i	@ __sti" : : : "memory", "cc")
-#define raw_local_irq_disable() __asm__("cpsid i	@ __cli" : : : "memory", "cc")
 #define local_fiq_enable()  __asm__("cpsie f	@ __stf" : : : "memory", "cc")
 #define local_fiq_disable() __asm__("cpsid f	@ __clf" : : : "memory", "cc")
-
 #else
 
 /*
  * Save the current interrupt enable state & disable IRQs
  */
-#define raw_local_irq_save(x)					\
-	({							\
-		unsigned long temp;				\
-		(void) (&temp == &x);				\
-	__asm__ __volatile__(					\
-	"mrs	%0, cpsr		@ local_irq_save\n"	\
-"	orr	%1, %0, #128\n"					\
-"	msr	cpsr_c, %1"					\
-	: "=r" (x), "=r" (temp)					\
-	:							\
-	: "memory", "cc");					\
-	})
-	
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags, temp;
+
+	asm volatile(
+		"	mrs	%0, cpsr	@ arch_local_irq_save\n"
+		"	orr	%1, %0, #128\n"
+		"	msr	cpsr_c, %1"
+		: "=r" (flags), "=r" (temp)
+		:
+		: "memory", "cc");
+	return flags;
+}
+
 /*
  * Enable IRQs
  */
-#define raw_local_irq_enable()					\
-	({							\
-		unsigned long temp;				\
-	__asm__ __volatile__(					\
-	"mrs	%0, cpsr		@ local_irq_enable\n"	\
-"	bic	%0, %0, #128\n"					\
-"	msr	cpsr_c, %0"					\
-	: "=r" (temp)						\
-	:							\
-	: "memory", "cc");					\
-	})
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long temp;
+	asm volatile(
+		"	mrs	%0, cpsr	@ arch_local_irq_enable\n"
+		"	bic	%0, %0, #128\n"
+		"	msr	cpsr_c, %0"
+		: "=r" (temp)
+		:
+		: "memory", "cc");
+}
 
 /*
  * Disable IRQs
  */
-#define raw_local_irq_disable()					\
-	({							\
-		unsigned long temp;				\
-	__asm__ __volatile__(					\
-	"mrs	%0, cpsr		@ local_irq_disable\n"	\
-"	orr	%0, %0, #128\n"					\
-"	msr	cpsr_c, %0"					\
-	: "=r" (temp)						\
-	:							\
-	: "memory", "cc");					\
-	})
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long temp;
+	asm volatile(
+		"	mrs	%0, cpsr	@ arch_local_irq_disable\n"
+		"	orr	%0, %0, #128\n"
+		"	msr	cpsr_c, %0"
+		: "=r" (temp)
+		:
+		: "memory", "cc");
+}
 
 /*
  * Enable FIQs
@@ -106,27 +125,31 @@
 /*
  * Save the current interrupt enable state.
  */
-#define raw_local_save_flags(x)					\
-	({							\
-	__asm__ __volatile__(					\
-	"mrs	%0, cpsr		@ local_save_flags"	\
-	: "=r" (x) : : "memory", "cc");				\
-	})
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile(
+		"	mrs	%0, cpsr	@ local_save_flags"
+		: "=r" (flags) : : "memory", "cc");
+	return flags;
+}
 
 /*
  * restore saved IRQ & FIQ state
  */
-#define raw_local_irq_restore(x)				\
-	__asm__ __volatile__(					\
-	"msr	cpsr_c, %0		@ local_irq_restore\n"	\
-	:							\
-	: "r" (x)						\
-	: "memory", "cc")
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile(
+		"	msr	cpsr_c, %0	@ local_irq_restore"
+		:
+		: "r" (flags)
+		: "memory", "cc");
+}
 
-#define raw_irqs_disabled_flags(flags)	\
-({					\
-	(int)((flags) & PSR_I_BIT);	\
-})
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags & PSR_I_BIT;
+}
 
 #endif
 #endif
diff --git a/arch/avr32/include/asm/irqflags.h b/arch/avr32/include/asm/irqflags.h
index 93570daac38a..006e9487372d 100644
--- a/arch/avr32/include/asm/irqflags.h
+++ b/arch/avr32/include/asm/irqflags.h
@@ -8,16 +8,14 @@
 #ifndef __ASM_AVR32_IRQFLAGS_H
 #define __ASM_AVR32_IRQFLAGS_H
 
+#include <linux/types.h>
 #include <asm/sysreg.h>
 
-static inline unsigned long __raw_local_save_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	return sysreg_read(SR);
 }
 
-#define raw_local_save_flags(x)					\
-	do { (x) = __raw_local_save_flags(); } while (0)
-
 /*
  * This will restore ALL status register flags, not only the interrupt
  * mask flag.
@@ -25,44 +23,39 @@ static inline unsigned long __raw_local_save_flags(void)
  * The empty asm statement informs the compiler of this fact while
  * also serving as a barrier.
  */
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
 {
 	sysreg_write(SR, flags);
 	asm volatile("" : : : "memory", "cc");
 }
 
-static inline void raw_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	asm volatile("ssrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 	asm volatile("csrf %0" : : "n"(SYSREG_GM_OFFSET) : "memory");
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags & SYSREG_BIT(GM)) != 0;
 }
 
-static inline int raw_irqs_disabled(void)
+static inline bool arch_irqs_disabled(void)
 {
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
+	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
-static inline unsigned long __raw_local_irq_save(void)
+static inline unsigned long arch_local_irq_save(void)
 {
-	unsigned long flags = __raw_local_save_flags();
+	unsigned long flags = arch_local_save_flags();
 
-	raw_local_irq_disable();
+	arch_local_irq_disable();
 
 	return flags;
 }
 
-#define raw_local_irq_save(flags)				\
-	do { (flags) = __raw_local_irq_save(); } while (0)
-
 #endif /* __ASM_AVR32_IRQFLAGS_H */
diff --git a/arch/blackfin/include/asm/irqflags.h b/arch/blackfin/include/asm/irqflags.h
index 994d76791016..41c4d70544ef 100644
--- a/arch/blackfin/include/asm/irqflags.h
+++ b/arch/blackfin/include/asm/irqflags.h
@@ -218,16 +218,4 @@ static inline void hard_local_irq_restore(unsigned long flags)
 
 
 #endif /* !CONFIG_IPIPE */
-
-/*
- * Raw interface to linux/irqflags.h.
- */
-#define raw_local_save_flags(flags)	do { (flags) = arch_local_save_flags(); } while (0)
-#define raw_local_irq_save(flags)	do { (flags) = arch_local_irq_save(); } while (0)
-#define raw_local_irq_restore(flags)	arch_local_irq_restore(flags)
-#define raw_local_irq_enable()		arch_local_irq_enable()
-#define raw_local_irq_disable()		arch_local_irq_disable()
-#define raw_irqs_disabled_flags(flags)	arch_irqs_disabled_flags(flags)
-#define raw_irqs_disabled()		arch_irqs_disabled()
-
 #endif
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 59fcdf6b0138..05b550891ce5 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -15,6 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/irq.h>
 #include <asm/dma.h>
 #include <asm/trace.h>
 #include <asm/fixed_code.h>
diff --git a/arch/cris/include/arch-v10/arch/irqflags.h b/arch/cris/include/arch-v10/arch/irqflags.h
new file mode 100644
index 000000000000..75ef18991240
--- /dev/null
+++ b/arch/cris/include/arch-v10/arch/irqflags.h
@@ -0,0 +1,45 @@
+#ifndef __ASM_CRIS_ARCH_IRQFLAGS_H
+#define __ASM_CRIS_ARCH_IRQFLAGS_H
+
+#include <linux/types.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("move $ccr,%0" : "=rm" (flags) : : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile("di" : : : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile("ei" : : : "memory");
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("move %0,$ccr" : : "rm" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 5));
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_CRIS_ARCH_IRQFLAGS_H */
diff --git a/arch/cris/include/arch-v10/arch/system.h b/arch/cris/include/arch-v10/arch/system.h
index 4a9cd36c9e16..935fde34aa15 100644
--- a/arch/cris/include/arch-v10/arch/system.h
+++ b/arch/cris/include/arch-v10/arch/system.h
@@ -44,20 +44,4 @@ static inline unsigned long _get_base(char * addr)
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
-/* interrupt control.. */
-#define local_save_flags(x)	__asm__ __volatile__ ("move $ccr,%0" : "=rm" (x) : : "memory");
-#define local_irq_restore(x) 	__asm__ __volatile__ ("move %0,$ccr" : : "rm" (x) : "memory");
-#define local_irq_disable() 	__asm__ __volatile__ ( "di" : : :"memory");
-#define local_irq_enable()	__asm__ __volatile__ ( "ei" : : :"memory");
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<5));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x) __asm__ __volatile__ ("move $ccr,%0\n\tdi" : "=rm" (x) : : "memory");
-
 #endif
diff --git a/arch/cris/include/arch-v32/arch/irqflags.h b/arch/cris/include/arch-v32/arch/irqflags.h
new file mode 100644
index 000000000000..041851f8ec6f
--- /dev/null
+++ b/arch/cris/include/arch-v32/arch/irqflags.h
@@ -0,0 +1,46 @@
+#ifndef __ASM_CRIS_ARCH_IRQFLAGS_H
+#define __ASM_CRIS_ARCH_IRQFLAGS_H
+
+#include <linux/types.h>
+#include <arch/ptrace.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("move $ccs,%0" : "=rm" (flags) : : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile("di" : : : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile("ei" : : : "memory");
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("move %0,$ccs" : : "rm" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << I_CCS_BITNR));
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_CRIS_ARCH_IRQFLAGS_H */
diff --git a/arch/cris/include/arch-v32/arch/system.h b/arch/cris/include/arch-v32/arch/system.h
index 6ca90f1f110a..76cea99eaa60 100644
--- a/arch/cris/include/arch-v32/arch/system.h
+++ b/arch/cris/include/arch-v32/arch/system.h
@@ -44,26 +44,4 @@ static inline unsigned long rdsp(void)
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((struct __xchg_dummy *)(x))
 
-/* Used for interrupt control. */
-#define local_save_flags(x) \
-	__asm__ __volatile__ ("move $ccs, %0" : "=rm" (x) : : "memory");
-
-#define local_irq_restore(x) \
-	__asm__ __volatile__ ("move %0, $ccs" : : "rm" (x) : "memory");
-
-#define local_irq_disable()  __asm__ __volatile__ ("di" : : : "memory");
-#define local_irq_enable()   __asm__ __volatile__ ("ei" : : : "memory");
-
-#define irqs_disabled()		\
-({				\
-	unsigned long flags;	\
-				\
-	local_save_flags(flags);\
-	!(flags & (1 << I_CCS_BITNR));	\
-})
-
-/* Used for spinlocks, etc. */
-#define local_irq_save(x) \
-	__asm__ __volatile__ ("move $ccs, %0\n\tdi" : "=rm" (x) : : "memory");
-
 #endif /* _ASM_CRIS_ARCH_SYSTEM_H */
diff --git a/arch/cris/include/asm/irqflags.h b/arch/cris/include/asm/irqflags.h
new file mode 100644
index 000000000000..943ba5ca6d2c
--- /dev/null
+++ b/arch/cris/include/asm/irqflags.h
@@ -0,0 +1 @@
+#include <arch/irqflags.h>
diff --git a/arch/cris/include/asm/system.h b/arch/cris/include/asm/system.h
index 8657b084a922..ea10592f7d75 100644
--- a/arch/cris/include/asm/system.h
+++ b/arch/cris/include/asm/system.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_CRIS_SYSTEM_H
 #define __ASM_CRIS_SYSTEM_H
 
+#include <linux/irqflags.h>
 #include <arch/system.h>
 
 /* the switch_to macro calls resume, an asm function in entry.S which does the actual
diff --git a/arch/frv/include/asm/irqflags.h b/arch/frv/include/asm/irqflags.h
new file mode 100644
index 000000000000..82f0b5363f42
--- /dev/null
+++ b/arch/frv/include/asm/irqflags.h
@@ -0,0 +1,158 @@
+/* FR-V interrupt handling
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+/*
+ * interrupt flag manipulation
+ * - use virtual interrupt management since touching the PSR is slow
+ *   - ICC2.Z: T if interrupts virtually disabled
+ *   - ICC2.C: F if interrupts really disabled
+ * - if Z==1 upon interrupt:
+ *   - C is set to 0
+ *   - interrupts are really disabled
+ *   - entry.S returns immediately
+ * - uses TIHI (TRAP if Z==0 && C==0) #2 to really reenable interrupts
+ *   - if taken, the trap:
+ *     - sets ICC2.C
+ *     - enables interrupts
+ */
+static inline void arch_local_irq_disable(void)
+{
+	/* set Z flag, but don't change the C flag */
+	asm volatile("	andcc	gr0,gr0,gr0,icc2	\n"
+		     :
+		     :
+		     : "memory", "icc2"
+		     );
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	/* clear Z flag and then test the C flag */
+	asm volatile("  oricc	gr0,#1,gr0,icc2		\n"
+		     "	tihi	icc2,gr0,#2		\n"
+		     :
+		     :
+		     : "memory", "icc2"
+		     );
+}
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	asm volatile("movsg ccr,%0"
+		     : "=r"(flags)
+		     :
+		     : "memory");
+
+	/* shift ICC2.Z to bit 0 */
+	flags >>= 26;
+
+	/* make flags 1 if interrupts disabled, 0 otherwise */
+	return flags & 1UL;
+
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	/* load the Z flag by turning 1 if disabled into 0 if disabled
+	 * and thus setting the Z flag but not the C flag */
+	asm volatile("  xoricc	%0,#1,gr0,icc2		\n"
+		     /* then trap if Z=0 and C=0 */
+		     "	tihi	icc2,gr0,#2		\n"
+		     :
+		     : "r"(flags)
+		     : "memory", "icc2"
+		     );
+
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+/*
+ * real interrupt flag manipulation
+ */
+#define __arch_local_irq_disable()			\
+do {							\
+	unsigned long psr;				\
+	asm volatile("	movsg	psr,%0		\n"	\
+		     "	andi	%0,%2,%0	\n"	\
+		     "	ori	%0,%1,%0	\n"	\
+		     "	movgs	%0,psr		\n"	\
+		     : "=r"(psr)			\
+		     : "i" (PSR_PIL_14), "i" (~PSR_PIL)	\
+		     : "memory");			\
+} while (0)
+
+#define __arch_local_irq_enable()			\
+do {							\
+	unsigned long psr;				\
+	asm volatile("	movsg	psr,%0		\n"	\
+		     "	andi	%0,%1,%0	\n"	\
+		     "	movgs	%0,psr		\n"	\
+		     : "=r"(psr)			\
+		     : "i" (~PSR_PIL)			\
+		     : "memory");			\
+} while (0)
+
+#define __arch_local_save_flags(flags)		\
+do {						\
+	typecheck(unsigned long, flags);	\
+	asm("movsg psr,%0"			\
+	    : "=r"(flags)			\
+	    :					\
+	    : "memory");			\
+} while (0)
+
+#define	__arch_local_irq_save(flags)			\
+do {							\
+	unsigned long npsr;				\
+	typecheck(unsigned long, flags);		\
+	asm volatile("	movsg	psr,%0		\n"	\
+		     "	andi	%0,%3,%1	\n"	\
+		     "	ori	%1,%2,%1	\n"	\
+		     "	movgs	%1,psr		\n"	\
+		     : "=r"(flags), "=r"(npsr)		\
+		     : "i" (PSR_PIL_14), "i" (~PSR_PIL)	\
+		     : "memory");			\
+} while (0)
+
+#define	__arch_local_irq_restore(flags)			\
+do {							\
+	typecheck(unsigned long, flags);		\
+	asm volatile("	movgs	%0,psr		\n"	\
+		     :					\
+		     : "r" (flags)			\
+		     : "memory");			\
+} while (0)
+
+#define __arch_irqs_disabled()			\
+	((__get_PSR() & PSR_PIL) >= PSR_PIL_14)
+
+#endif /* _ASM_IRQFLAGS_H */
diff --git a/arch/frv/include/asm/system.h b/arch/frv/include/asm/system.h
index efd22d9077ac..0a6d8d9ca45b 100644
--- a/arch/frv/include/asm/system.h
+++ b/arch/frv/include/asm/system.h
@@ -36,142 +36,6 @@ do {									\
 	mb();								\
 } while(0)
 
-/*
- * interrupt flag manipulation
- * - use virtual interrupt management since touching the PSR is slow
- *   - ICC2.Z: T if interrupts virtually disabled
- *   - ICC2.C: F if interrupts really disabled
- * - if Z==1 upon interrupt:
- *   - C is set to 0
- *   - interrupts are really disabled
- *   - entry.S returns immediately
- * - uses TIHI (TRAP if Z==0 && C==0) #2 to really reenable interrupts
- *   - if taken, the trap:
- *     - sets ICC2.C
- *     - enables interrupts
- */
-#define local_irq_disable()					\
-do {								\
-	/* set Z flag, but don't change the C flag */		\
-	asm volatile("	andcc	gr0,gr0,gr0,icc2	\n"	\
-		     :						\
-		     :						\
-		     : "memory", "icc2"				\
-		     );						\
-} while(0)
-
-#define local_irq_enable()					\
-do {								\
-	/* clear Z flag and then test the C flag */		\
-	asm volatile("  oricc	gr0,#1,gr0,icc2		\n"	\
-		     "	tihi	icc2,gr0,#2		\n"	\
-		     :						\
-		     :						\
-		     : "memory", "icc2"				\
-		     );						\
-} while(0)
-
-#define local_save_flags(flags)					\
-do {								\
-	typecheck(unsigned long, flags);			\
-	asm volatile("movsg ccr,%0"				\
-		     : "=r"(flags)				\
-		     :						\
-		     : "memory");				\
-								\
-	/* shift ICC2.Z to bit 0 */				\
-	flags >>= 26;						\
-								\
-	/* make flags 1 if interrupts disabled, 0 otherwise */	\
-	flags &= 1UL;						\
-} while(0)
-
-#define irqs_disabled() \
-	({unsigned long flags; local_save_flags(flags); !!flags; })
-
-#define	local_irq_save(flags)			\
-do {						\
-	typecheck(unsigned long, flags);	\
-	local_save_flags(flags);		\
-	local_irq_disable();			\
-} while(0)
-
-#define	local_irq_restore(flags)					\
-do {									\
-	typecheck(unsigned long, flags);				\
-									\
-	/* load the Z flag by turning 1 if disabled into 0 if disabled	\
-	 * and thus setting the Z flag but not the C flag */		\
-	asm volatile("  xoricc	%0,#1,gr0,icc2		\n"		\
-		     /* then test Z=0 and C=0 */			\
-		     "	tihi	icc2,gr0,#2		\n"		\
-		     :							\
-		     : "r"(flags)					\
-		     : "memory", "icc2"					\
-		     );							\
-									\
-} while(0)
-
-/*
- * real interrupt flag manipulation
- */
-#define __local_irq_disable()				\
-do {							\
-	unsigned long psr;				\
-	asm volatile("	movsg	psr,%0		\n"	\
-		     "	andi	%0,%2,%0	\n"	\
-		     "	ori	%0,%1,%0	\n"	\
-		     "	movgs	%0,psr		\n"	\
-		     : "=r"(psr)			\
-		     : "i" (PSR_PIL_14), "i" (~PSR_PIL)	\
-		     : "memory");			\
-} while(0)
-
-#define __local_irq_enable()				\
-do {							\
-	unsigned long psr;				\
-	asm volatile("	movsg	psr,%0		\n"	\
-		     "	andi	%0,%1,%0	\n"	\
-		     "	movgs	%0,psr		\n"	\
-		     : "=r"(psr)			\
-		     : "i" (~PSR_PIL)			\
-		     : "memory");			\
-} while(0)
-
-#define __local_save_flags(flags)		\
-do {						\
-	typecheck(unsigned long, flags);	\
-	asm("movsg psr,%0"			\
-	    : "=r"(flags)			\
-	    :					\
-	    : "memory");			\
-} while(0)
-
-#define	__local_irq_save(flags)				\
-do {							\
-	unsigned long npsr;				\
-	typecheck(unsigned long, flags);		\
-	asm volatile("	movsg	psr,%0		\n"	\
-		     "	andi	%0,%3,%1	\n"	\
-		     "	ori	%1,%2,%1	\n"	\
-		     "	movgs	%1,psr		\n"	\
-		     : "=r"(flags), "=r"(npsr)		\
-		     : "i" (PSR_PIL_14), "i" (~PSR_PIL)	\
-		     : "memory");			\
-} while(0)
-
-#define	__local_irq_restore(flags)			\
-do {							\
-	typecheck(unsigned long, flags);		\
-	asm volatile("	movgs	%0,psr		\n"	\
-		     :					\
-		     : "r" (flags)			\
-		     : "memory");			\
-} while(0)
-
-#define __irqs_disabled() \
-	((__get_PSR() & PSR_PIL) >= PSR_PIL_14)
-
 /*
  * Force strict CPU ordering.
  */
diff --git a/arch/h8300/include/asm/irqflags.h b/arch/h8300/include/asm/irqflags.h
new file mode 100644
index 000000000000..9617cd57aebd
--- /dev/null
+++ b/arch/h8300/include/asm/irqflags.h
@@ -0,0 +1,43 @@
+#ifndef _H8300_IRQFLAGS_H
+#define _H8300_IRQFLAGS_H
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile ("stc ccr,%w0" : "=r" (flags));
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile ("orc  #0x80,ccr" : : : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile ("andc #0x7f,ccr" : : : "memory");
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile ("ldc %w0,ccr" : : "r" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & 0x80) == 0x80;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* _H8300_IRQFLAGS_H */
diff --git a/arch/h8300/include/asm/system.h b/arch/h8300/include/asm/system.h
index 16bf1560ff68..2c2382e50d93 100644
--- a/arch/h8300/include/asm/system.h
+++ b/arch/h8300/include/asm/system.h
@@ -2,6 +2,7 @@
 #define _H8300_SYSTEM_H
 
 #include <linux/linkage.h>
+#include <linux/irqflags.h>
 
 struct pt_regs;
 
@@ -51,31 +52,8 @@ asmlinkage void resume(void);
   (last) = _last; 					    \
 }
 
-#define __sti() asm volatile ("andc #0x7f,ccr")
-#define __cli() asm volatile ("orc  #0x80,ccr")
-
-#define __save_flags(x) \
-       asm volatile ("stc ccr,%w0":"=r" (x))
-
-#define __restore_flags(x) \
-       asm volatile ("ldc %w0,ccr": :"r" (x))
-
-#define	irqs_disabled()			\
-({					\
-	unsigned char flags;		\
-	__save_flags(flags);	        \
-	((flags & 0x80) == 0x80);	\
-})
-
 #define iret() __asm__ __volatile__ ("rte": : :"memory", "sp", "cc")
 
-/* For spinlocks etc */
-#define local_irq_disable()	__cli()
-#define local_irq_enable()      __sti()
-#define local_irq_save(x)	({ __save_flags(x); local_irq_disable(); })
-#define local_irq_restore(x)	__restore_flags(x)
-#define local_save_flags(x)     __save_flags(x)
-
 /*
  * Force strict CPU ordering.
  * Not really required on H8...
diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h
new file mode 100644
index 000000000000..f82d6be2ecd2
--- /dev/null
+++ b/arch/ia64/include/asm/irqflags.h
@@ -0,0 +1,94 @@
+/*
+ * IRQ flags defines.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+
+#ifndef _ASM_IA64_IRQFLAGS_H
+#define _ASM_IA64_IRQFLAGS_H
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+extern unsigned long last_cli_ip;
+static inline void arch_maybe_save_ip(unsigned long flags)
+{
+	if (flags & IA64_PSR_I)
+		last_cli_ip = ia64_getreg(_IA64_REG_IP);
+}
+#else
+#define arch_maybe_save_ip(flags) do {} while (0)
+#endif
+
+/*
+ * - clearing psr.i is implicitly serialized (visible by next insn)
+ * - setting psr.i requires data serialization
+ * - we need a stop-bit before reading PSR because we sometimes
+ *   write a floating-point register right before reading the PSR
+ *   and that writes to PSR.mfl
+ */
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	ia64_stop();
+#ifdef CONFIG_PARAVIRT
+	return ia64_get_psr_i();
+#else
+	return ia64_getreg(_IA64_REG_PSR);
+#endif
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+	arch_maybe_save_ip(flags);
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	arch_local_irq_save();
+#else
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I);
+#endif
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	ia64_stop();
+	ia64_ssm(IA64_PSR_I);
+	ia64_srlz_d();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+#ifdef CONFIG_IA64_DEBUG_IRQ
+	unsigned long old_psr = arch_local_save_flags();
+#endif
+	ia64_intrin_local_irq_restore(flags & IA64_PSR_I);
+	arch_maybe_save_ip(old_psr & ~flags);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & IA64_PSR_I) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline void arch_safe_halt(void)
+{
+	ia64_pal_halt_light();	/* PAL_HALT_LIGHT */
+}
+
+
+#endif /* _ASM_IA64_IRQFLAGS_H */
diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
index 9f342a574ce8..2feb7f64c035 100644
--- a/arch/ia64/include/asm/system.h
+++ b/arch/ia64/include/asm/system.h
@@ -107,87 +107,11 @@ extern struct ia64_boot_param {
  */
 #define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
 
-#define safe_halt()         ia64_pal_halt_light()    /* PAL_HALT_LIGHT */
-
 /*
  * The group barrier in front of the rsm & ssm are necessary to ensure
  * that none of the previous instructions in the same group are
  * affected by the rsm/ssm.
  */
-/* For spinlocks etc */
-
-/*
- * - clearing psr.i is implicitly serialized (visible by next insn)
- * - setting psr.i requires data serialization
- * - we need a stop-bit before reading PSR because we sometimes
- *   write a floating-point register right before reading the PSR
- *   and that writes to PSR.mfl
- */
-#ifdef CONFIG_PARAVIRT
-#define __local_save_flags()	ia64_get_psr_i()
-#else
-#define __local_save_flags()	ia64_getreg(_IA64_REG_PSR)
-#endif
-
-#define __local_irq_save(x)			\
-do {						\
-	ia64_stop();				\
-	(x) = __local_save_flags();		\
-	ia64_stop();				\
-	ia64_rsm(IA64_PSR_I);			\
-} while (0)
-
-#define __local_irq_disable()			\
-do {						\
-	ia64_stop();				\
-	ia64_rsm(IA64_PSR_I);			\
-} while (0)
-
-#define __local_irq_restore(x)	ia64_intrin_local_irq_restore((x) & IA64_PSR_I)
-
-#ifdef CONFIG_IA64_DEBUG_IRQ
-
-  extern unsigned long last_cli_ip;
-
-# define __save_ip()		last_cli_ip = ia64_getreg(_IA64_REG_IP)
-
-# define local_irq_save(x)					\
-do {								\
-	unsigned long __psr;					\
-								\
-	__local_irq_save(__psr);				\
-	if (__psr & IA64_PSR_I)					\
-		__save_ip();					\
-	(x) = __psr;						\
-} while (0)
-
-# define local_irq_disable()	do { unsigned long __x; local_irq_save(__x); } while (0)
-
-# define local_irq_restore(x)					\
-do {								\
-	unsigned long __old_psr, __psr = (x);			\
-								\
-	local_save_flags(__old_psr);				\
-	__local_irq_restore(__psr);				\
-	if ((__old_psr & IA64_PSR_I) && !(__psr & IA64_PSR_I))	\
-		__save_ip();					\
-} while (0)
-
-#else /* !CONFIG_IA64_DEBUG_IRQ */
-# define local_irq_save(x)	__local_irq_save(x)
-# define local_irq_disable()	__local_irq_disable()
-# define local_irq_restore(x)	__local_irq_restore(x)
-#endif /* !CONFIG_IA64_DEBUG_IRQ */
-
-#define local_irq_enable()	({ ia64_stop(); ia64_ssm(IA64_PSR_I); ia64_srlz_d(); })
-#define local_save_flags(flags)	({ ia64_stop(); (flags) = __local_save_flags(); })
-
-#define irqs_disabled()				\
-({						\
-	unsigned long __ia64_id_flags;		\
-	local_save_flags(__ia64_id_flags);	\
-	(__ia64_id_flags & IA64_PSR_I) == 0;	\
-})
 
 #ifdef __KERNEL__
 
diff --git a/arch/m32r/include/asm/irqflags.h b/arch/m32r/include/asm/irqflags.h
new file mode 100644
index 000000000000..1f92d29982ae
--- /dev/null
+++ b/arch/m32r/include/asm/irqflags.h
@@ -0,0 +1,104 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001  Hiroyuki Kondo, Hirokazu Takata, and Hitoshi Yamamoto
+ * Copyright (C) 2004, 2006  Hirokazu Takata <takata at linux-m32r.org>
+ */
+
+#ifndef _ASM_M32R_IRQFLAGS_H
+#define _ASM_M32R_IRQFLAGS_H
+
+#include <linux/types.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("mvfc %0,psw" : "=r"(flags));
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+#if !defined(CONFIG_CHIP_M32102) && !defined(CONFIG_CHIP_M32104)
+	asm volatile (
+		"clrpsw #0x40 -> nop"
+		: : : "memory");
+#else
+	unsigned long tmpreg0, tmpreg1;
+	asm volatile (
+		"ld24	%0, #0	; Use 32-bit insn.			\n\t"
+		"mvfc	%1, psw	; No interrupt can be accepted here.	\n\t"
+		"mvtc	%0, psw						\n\t"
+		"and3	%0, %1, #0xffbf					\n\t"
+		"mvtc	%0, psw						\n\t"
+		: "=&r" (tmpreg0), "=&r" (tmpreg1)
+		:
+		: "cbit", "memory");
+#endif
+}
+
+static inline void arch_local_irq_enable(void)
+{
+#if !defined(CONFIG_CHIP_M32102) && !defined(CONFIG_CHIP_M32104)
+	asm volatile (
+		"setpsw #0x40 -> nop"
+		: : : "memory");
+#else
+	unsigned long tmpreg;
+	asm volatile (
+		"mvfc	%0, psw;		\n\t"
+		"or3	%0, %0, #0x0040;	\n\t"
+		"mvtc	%0, psw;		\n\t"
+		: "=&r" (tmpreg)
+		:
+		: "cbit", "memory");
+#endif
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+#if !(defined(CONFIG_CHIP_M32102) || defined(CONFIG_CHIP_M32104))
+	asm volatile (
+		"mvfc	%0, psw;	\n\t"
+		"clrpsw	#0x40 -> nop;	\n\t"
+		: "=r" (flags)
+		:
+		: "memory");
+#else
+	unsigned long tmpreg;
+	asm volatile (
+		"ld24	%1, #0		\n\t"
+		"mvfc	%0, psw		\n\t"
+		"mvtc	%1, psw		\n\t"
+		"and3	%1, %0, #0xffbf	\n\t"
+		"mvtc	%1, psw		\n\t"
+		: "=r" (flags), "=&r" (tmpreg)
+		:
+		: "cbit", "memory");
+#endif
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("mvtc %0,psw"
+		     :
+		     : "r" (flags)
+		     : "cbit", "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & 0x40);
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* _ASM_M32R_IRQFLAGS_H */
diff --git a/arch/m32r/include/asm/system.h b/arch/m32r/include/asm/system.h
index c980f5ba8de7..13c46794ccb1 100644
--- a/arch/m32r/include/asm/system.h
+++ b/arch/m32r/include/asm/system.h
@@ -11,6 +11,7 @@
  */
 
 #include <linux/compiler.h>
+#include <linux/irqflags.h>
 #include <asm/assembler.h>
 
 #ifdef __KERNEL__
@@ -54,71 +55,6 @@
 	); \
 } while(0)
 
-/* Interrupt Control */
-#if !defined(CONFIG_CHIP_M32102) && !defined(CONFIG_CHIP_M32104)
-#define local_irq_enable() \
-	__asm__ __volatile__ ("setpsw #0x40 -> nop": : :"memory")
-#define local_irq_disable() \
-	__asm__ __volatile__ ("clrpsw #0x40 -> nop": : :"memory")
-#else	/* CONFIG_CHIP_M32102 || CONFIG_CHIP_M32104 */
-static inline void local_irq_enable(void)
-{
-	unsigned long tmpreg;
-	__asm__ __volatile__(
-		"mvfc	%0, psw;		\n\t"
-		"or3	%0, %0, #0x0040;	\n\t"
-		"mvtc	%0, psw;		\n\t"
-	: "=&r" (tmpreg) : : "cbit", "memory");
-}
-
-static inline void local_irq_disable(void)
-{
-	unsigned long tmpreg0, tmpreg1;
-	__asm__ __volatile__(
-		"ld24	%0, #0	; Use 32-bit insn. \n\t"
-		"mvfc	%1, psw	; No interrupt can be accepted here. \n\t"
-		"mvtc	%0, psw	\n\t"
-		"and3	%0, %1, #0xffbf	\n\t"
-		"mvtc	%0, psw	\n\t"
-	: "=&r" (tmpreg0), "=&r" (tmpreg1) : : "cbit", "memory");
-}
-#endif	/* CONFIG_CHIP_M32102 || CONFIG_CHIP_M32104 */
-
-#define local_save_flags(x) \
-	__asm__ __volatile__("mvfc %0,psw" : "=r"(x) : /* no input */)
-
-#define local_irq_restore(x) \
-	__asm__ __volatile__("mvtc %0,psw" : /* no outputs */ \
-		: "r" (x) : "cbit", "memory")
-
-#if !(defined(CONFIG_CHIP_M32102) || defined(CONFIG_CHIP_M32104))
-#define local_irq_save(x)				\
-	__asm__ __volatile__(				\
-  		"mvfc	%0, psw;		\n\t"	\
-	  	"clrpsw	#0x40 -> nop;		\n\t"	\
-  		: "=r" (x) : /* no input */ : "memory")
-#else	/* CONFIG_CHIP_M32102 || CONFIG_CHIP_M32104 */
-#define local_irq_save(x) 				\
-	({						\
-		unsigned long tmpreg;			\
-		__asm__ __volatile__( 			\
-			"ld24	%1, #0 \n\t" 		\
-			"mvfc	%0, psw \n\t"		\
-			"mvtc	%1, psw \n\t"		\
-			"and3	%1, %0, #0xffbf \n\t"	\
-			"mvtc	%1, psw \n\t" 		\
-			: "=r" (x), "=&r" (tmpreg)	\
-			: : "cbit", "memory");		\
-	})
-#endif	/* CONFIG_CHIP_M32102 || CONFIG_CHIP_M32104 */
-
-#define irqs_disabled()					\
-	({						\
-		unsigned long flags;			\
-		local_save_flags(flags);		\
-		!(flags & 0x40);			\
-	})
-
 #define nop()	__asm__ __volatile__ ("nop" : : )
 
 #define xchg(ptr, x)							\
diff --git a/arch/m68k/include/asm/entry_no.h b/arch/m68k/include/asm/entry_no.h
index 907ed03d792f..80e41492aa2a 100644
--- a/arch/m68k/include/asm/entry_no.h
+++ b/arch/m68k/include/asm/entry_no.h
@@ -28,7 +28,7 @@
  *			M68K		  COLDFIRE
  */
 
-#define ALLOWINT 0xf8ff
+#define ALLOWINT (~0x700)
 
 #ifdef __ASSEMBLY__
 
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
new file mode 100644
index 000000000000..4a5b284a1550
--- /dev/null
+++ b/arch/m68k/include/asm/irqflags.h
@@ -0,0 +1,76 @@
+#ifndef _M68K_IRQFLAGS_H
+#define _M68K_IRQFLAGS_H
+
+#include <linux/types.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <asm/thread_info.h>
+#include <asm/entry.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile ("movew %%sr,%0" : "=d" (flags) : : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+#ifdef CONFIG_COLDFIRE
+	asm volatile (
+		"move	%/sr,%%d0	\n\t"
+		"ori.l	#0x0700,%%d0	\n\t"
+		"move	%%d0,%/sr	\n"
+		: /* no outputs */
+		:
+		: "cc", "%d0", "memory");
+#else
+	asm volatile ("oriw  #0x0700,%%sr" : : : "memory");
+#endif
+}
+
+static inline void arch_local_irq_enable(void)
+{
+#if defined(CONFIG_COLDFIRE)
+	asm volatile (
+		"move	%/sr,%%d0	\n\t"
+		"andi.l	#0xf8ff,%%d0	\n\t"
+		"move	%%d0,%/sr	\n"
+		: /* no outputs */
+		:
+		: "cc", "%d0", "memory");
+#else
+# if defined(CONFIG_MMU)
+	if (MACH_IS_Q40 || !hardirq_count())
+# endif
+		asm volatile (
+			"andiw %0,%%sr"
+			:
+			: "i" (ALLOWINT)
+			: "memory");
+#endif
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile ("movew %0,%%sr" : : "d" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & ~ALLOWINT) != 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* _M68K_IRQFLAGS_H */
diff --git a/arch/m68k/include/asm/system_mm.h b/arch/m68k/include/asm/system_mm.h
index dbb6515ffd5b..12053c44cccf 100644
--- a/arch/m68k/include/asm/system_mm.h
+++ b/arch/m68k/include/asm/system_mm.h
@@ -3,6 +3,7 @@
 
 #include <linux/linkage.h>
 #include <linux/kernel.h>
+#include <linux/irqflags.h>
 #include <asm/segment.h>
 #include <asm/entry.h>
 
@@ -62,30 +63,6 @@ asmlinkage void resume(void);
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	((void)0)
 
-/* interrupt control.. */
-#if 0
-#define local_irq_enable() asm volatile ("andiw %0,%%sr": : "i" (ALLOWINT) : "memory")
-#else
-#include <linux/hardirq.h>
-#define local_irq_enable() ({							\
-	if (MACH_IS_Q40 || !hardirq_count())					\
-		asm volatile ("andiw %0,%%sr": : "i" (ALLOWINT) : "memory");	\
-})
-#endif
-#define local_irq_disable() asm volatile ("oriw  #0x0700,%%sr": : : "memory")
-#define local_save_flags(x) asm volatile ("movew %%sr,%0":"=d" (x) : : "memory")
-#define local_irq_restore(x) asm volatile ("movew %0,%%sr": :"d" (x) : "memory")
-
-static inline int irqs_disabled(void)
-{
-	unsigned long flags;
-	local_save_flags(flags);
-	return flags & ~ALLOWINT;
-}
-
-/* For spinlocks etc */
-#define local_irq_save(x)	({ local_save_flags(x); local_irq_disable(); })
-
 #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 struct __xchg_dummy { unsigned long a[100]; };
diff --git a/arch/m68k/include/asm/system_no.h b/arch/m68k/include/asm/system_no.h
index 3c0718d74398..20126c09794e 100644
--- a/arch/m68k/include/asm/system_no.h
+++ b/arch/m68k/include/asm/system_no.h
@@ -2,6 +2,7 @@
 #define _M68KNOMMU_SYSTEM_H
 
 #include <linux/linkage.h>
+#include <linux/irqflags.h>
 #include <asm/segment.h>
 #include <asm/entry.h>
 
@@ -46,54 +47,6 @@ asmlinkage void resume(void);
   (last) = _last;						\
 }
 
-#ifdef CONFIG_COLDFIRE
-#define local_irq_enable() __asm__ __volatile__ (		\
-	"move %/sr,%%d0\n\t"					\
-	"andi.l #0xf8ff,%%d0\n\t"				\
-	"move %%d0,%/sr\n"					\
-	: /* no outputs */					\
-	:							\
-        : "cc", "%d0", "memory")
-#define local_irq_disable() __asm__ __volatile__ (		\
-	"move %/sr,%%d0\n\t"					\
-	"ori.l #0x0700,%%d0\n\t"				\
-	"move %%d0,%/sr\n"					\
-	: /* no outputs */					\
-	:							\
-	: "cc", "%d0", "memory")
-/* For spinlocks etc */
-#define local_irq_save(x) __asm__ __volatile__ (		\
-	"movew %%sr,%0\n\t"					\
-	"movew #0x0700,%%d0\n\t"				\
-	"or.l  %0,%%d0\n\t"					\
-	"movew %%d0,%/sr"					\
-	: "=d" (x)						\
-	:							\
-	: "cc", "%d0", "memory")
-#else
-
-/* portable version */ /* FIXME - see entry.h*/
-#define ALLOWINT 0xf8ff
-
-#define local_irq_enable() asm volatile ("andiw %0,%%sr": : "i" (ALLOWINT) : "memory")
-#define local_irq_disable() asm volatile ("oriw  #0x0700,%%sr": : : "memory")
-#endif
-
-#define local_save_flags(x) asm volatile ("movew %%sr,%0":"=d" (x) : : "memory")
-#define local_irq_restore(x) asm volatile ("movew %0,%%sr": :"d" (x) : "memory")
-
-/* For spinlocks etc */
-#ifndef local_irq_save
-#define local_irq_save(x) do { local_save_flags(x); local_irq_disable(); } while (0)
-#endif
-
-#define	irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	((flags & 0x0700) == 0x0700);	\
-})
-
 #define iret() __asm__ __volatile__ ("rte": : :"memory", "sp", "cc")
 
 /*
@@ -206,12 +159,4 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 #define arch_align_stack(x) (x)
 
 
-static inline int irqs_disabled_flags(unsigned long flags)
-{
-	if (flags & 0x0700)
-		return 0;
-	else
-		return 1;
-}
-
 #endif /* _M68KNOMMU_SYSTEM_H */
diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c
index 9a8876f715d8..24335022fa2c 100644
--- a/arch/m68knommu/kernel/asm-offsets.c
+++ b/arch/m68knommu/kernel/asm-offsets.c
@@ -74,8 +74,6 @@ int main(void)
 
 	DEFINE(PT_PTRACED, PT_PTRACED);
 
-	DEFINE(THREAD_SIZE, THREAD_SIZE);
-
 	/* Offsets in thread_info structure */
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
 	DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
diff --git a/arch/m68knommu/platform/coldfire/head.S b/arch/m68knommu/platform/coldfire/head.S
index 4b91aa24eb00..0b2d7c7adf79 100644
--- a/arch/m68knommu/platform/coldfire/head.S
+++ b/arch/m68knommu/platform/coldfire/head.S
@@ -15,6 +15,7 @@
 #include <asm/coldfire.h>
 #include <asm/mcfcache.h>
 #include <asm/mcfsim.h>
+#include <asm/thread_info.h>
 
 /*****************************************************************************/
 
diff --git a/arch/microblaze/include/asm/irqflags.h b/arch/microblaze/include/asm/irqflags.h
index 2c38c6d80176..5fd31905775d 100644
--- a/arch/microblaze/include/asm/irqflags.h
+++ b/arch/microblaze/include/asm/irqflags.h
@@ -9,103 +9,114 @@
 #ifndef _ASM_MICROBLAZE_IRQFLAGS_H
 #define _ASM_MICROBLAZE_IRQFLAGS_H
 
-#include <linux/irqflags.h>
+#include <linux/types.h>
 #include <asm/registers.h>
 
-# if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
-
-# define raw_local_irq_save(flags)			\
-	do {						\
-		asm volatile ("	msrclr %0, %1;		\
-				nop;"			\
-				: "=r"(flags)		\
-				: "i"(MSR_IE)		\
-				: "memory");		\
-	} while (0)
-
-# define raw_local_irq_disable()			\
-	do {						\
-		asm volatile ("	msrclr r0, %0;		\
-				nop;"			\
-				:			\
-				: "i"(MSR_IE)		\
-				: "memory");		\
-	} while (0)
-
-# define raw_local_irq_enable()				\
-	do {						\
-		asm volatile ("	msrset	r0, %0;		\
-				nop;"			\
-				:			\
-				: "i"(MSR_IE)		\
-				: "memory");		\
-	} while (0)
-
-# else /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR == 0 */
-
-# define raw_local_irq_save(flags)				\
-	do {							\
-		register unsigned tmp;				\
-		asm volatile ("	mfs	%0, rmsr;		\
-				nop;				\
-				andi	%1, %0, %2;		\
-				mts	rmsr, %1;		\
-				nop;"				\
-				: "=r"(flags), "=r" (tmp)	\
-				: "i"(~MSR_IE)			\
-				: "memory");			\
-	} while (0)
-
-# define raw_local_irq_disable()				\
-	do {							\
-		register unsigned tmp;				\
-		asm volatile ("	mfs	%0, rmsr;		\
-				nop;				\
-				andi	%0, %0, %1;		\
-				mts	rmsr, %0;		\
-				nop;"			\
-				: "=r"(tmp)			\
-				: "i"(~MSR_IE)			\
-				: "memory");			\
-	} while (0)
-
-# define raw_local_irq_enable()					\
-	do {							\
-		register unsigned tmp;				\
-		asm volatile ("	mfs	%0, rmsr;		\
-				nop;				\
-				ori	%0, %0, %1;		\
-				mts	rmsr, %0;		\
-				nop;"				\
-				: "=r"(tmp)			\
-				: "i"(MSR_IE)			\
-				: "memory");			\
-	} while (0)
-
-# endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
-
-#define raw_local_irq_restore(flags)				\
-	do {							\
-		asm volatile ("	mts	rmsr, %0;		\
-				nop;"				\
-				:				\
-				: "r"(flags)			\
-				: "memory");			\
-	} while (0)
-
-static inline unsigned long get_msr(void)
+#ifdef CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	asm volatile("	msrclr %0, %1	\n"
+		     "	nop		\n"
+		     : "=r"(flags)
+		     : "i"(MSR_IE)
+		     : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	/* this uses r0 without declaring it - is that correct? */
+	asm volatile("	msrclr r0, %0	\n"
+		     "	nop		\n"
+		     :
+		     : "i"(MSR_IE)
+		     : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	/* this uses r0 without declaring it - is that correct? */
+	asm volatile("	msrset	r0, %0	\n"
+		     "	nop		\n"
+		     :
+		     : "i"(MSR_IE)
+		     : "memory");
+}
+
+#else /* !CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags, tmp;
+	asm volatile ("	mfs	%0, rmsr	\n"
+		      "	nop			\n"
+		      "	andi	%1, %0, %2	\n"
+		      "	mts	rmsr, %1	\n"
+		      "	nop			\n"
+		      : "=r"(flags), "=r"(tmp)
+		      : "i"(~MSR_IE)
+		      : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long tmp;
+	asm volatile("	mfs	%0, rmsr	\n"
+		     "	nop			\n"
+		     "	andi	%0, %0, %1	\n"
+		     "	mts	rmsr, %0	\n"
+		     "	nop			\n"
+		     : "=r"(tmp)
+		     : "i"(~MSR_IE)
+		     : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long tmp;
+	asm volatile("	mfs	%0, rmsr	\n"
+		     "	nop			\n"
+		     "	ori	%0, %0, %1	\n"
+		     "	mts	rmsr, %0	\n"
+		     "	nop			\n"
+		     : "=r"(tmp)
+		     : "i"(MSR_IE)
+		     : "memory");
+}
+
+#endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
+
+static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
-	asm volatile ("	mfs	%0, rmsr;	\
-			nop;"			\
-			: "=r"(flags)		\
-			:			\
-			: "memory");		\
+	asm volatile("	mfs	%0, rmsr	\n"
+		     "	nop			\n"
+		     : "=r"(flags)
+		     :
+		     : "memory");
 	return flags;
 }
 
-#define raw_local_save_flags(flags)	((flags) = get_msr())
-#define raw_irqs_disabled()		((get_msr() & MSR_IE) == 0)
-#define raw_irqs_disabled_flags(flags)	((flags & MSR_IE) == 0)
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("	mts	rmsr, %0	\n"
+		     "	nop			\n"
+		     :
+		     : "r"(flags)
+		     : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & MSR_IE) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
 
 #endif /* _ASM_MICROBLAZE_IRQFLAGS_H */
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index 701ec0ba8fa9..9ef3b0d17896 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -17,7 +17,7 @@
 #include <asm/hazards.h>
 
 __asm__(
-	"	.macro	raw_local_irq_enable				\n"
+	"	.macro	arch_local_irq_enable				\n"
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
@@ -40,7 +40,7 @@ __asm__(
 
 extern void smtc_ipi_replay(void);
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 #ifdef CONFIG_MIPS_MT_SMTC
 	/*
@@ -50,7 +50,7 @@ static inline void raw_local_irq_enable(void)
 	smtc_ipi_replay();
 #endif
 	__asm__ __volatile__(
-		"raw_local_irq_enable"
+		"arch_local_irq_enable"
 		: /* no outputs */
 		: /* no inputs */
 		: "memory");
@@ -76,7 +76,7 @@ static inline void raw_local_irq_enable(void)
  * Workaround: mask EXL bit of the result or place a nop before mfc0.
  */
 __asm__(
-	"	.macro	raw_local_irq_disable\n"
+	"	.macro	arch_local_irq_disable\n"
 	"	.set	push						\n"
 	"	.set	noat						\n"
 #ifdef CONFIG_MIPS_MT_SMTC
@@ -97,17 +97,17 @@ __asm__(
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-static inline void raw_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	__asm__ __volatile__(
-		"raw_local_irq_disable"
+		"arch_local_irq_disable"
 		: /* no outputs */
 		: /* no inputs */
 		: "memory");
 }
 
 __asm__(
-	"	.macro	raw_local_save_flags flags			\n"
+	"	.macro	arch_local_save_flags flags			\n"
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 #ifdef CONFIG_MIPS_MT_SMTC
@@ -118,13 +118,15 @@ __asm__(
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-#define raw_local_save_flags(x)						\
-__asm__ __volatile__(							\
-	"raw_local_save_flags %0"					\
-	: "=r" (x))
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("arch_local_save_flags %0" : "=r" (flags));
+	return flags;
+}
 
 __asm__(
-	"	.macro	raw_local_irq_save result			\n"
+	"	.macro	arch_local_irq_save result			\n"
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
@@ -148,15 +150,18 @@ __asm__(
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-#define raw_local_irq_save(x)						\
-__asm__ __volatile__(							\
-	"raw_local_irq_save\t%0"					\
-	: "=r" (x)							\
-	: /* no inputs */						\
-	: "memory")
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	asm volatile("arch_local_irq_save\t%0"
+		     : "=r" (flags)
+		     : /* no inputs */
+		     : "memory");
+	return flags;
+}
 
 __asm__(
-	"	.macro	raw_local_irq_restore flags			\n"
+	"	.macro	arch_local_irq_restore flags			\n"
 	"	.set	push						\n"
 	"	.set	noreorder					\n"
 	"	.set	noat						\n"
@@ -196,7 +201,7 @@ __asm__(
 	"	.endm							\n");
 
 
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
@@ -211,24 +216,24 @@ static inline void raw_local_irq_restore(unsigned long flags)
 #endif
 
 	__asm__ __volatile__(
-		"raw_local_irq_restore\t%0"
+		"arch_local_irq_restore\t%0"
 		: "=r" (__tmp1)
 		: "0" (flags)
 		: "memory");
 }
 
-static inline void __raw_local_irq_restore(unsigned long flags)
+static inline void __arch_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
 	__asm__ __volatile__(
-		"raw_local_irq_restore\t%0"
+		"arch_local_irq_restore\t%0"
 		: "=r" (__tmp1)
 		: "0" (flags)
 		: "memory");
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 #ifdef CONFIG_MIPS_MT_SMTC
 	/*
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index cfeb2c155896..39c08254b0f1 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -1038,7 +1038,7 @@ void deferred_smtc_ipi(void)
 		 * but it's more efficient, given that we're already
 		 * running down the IPI queue.
 		 */
-		__raw_local_irq_restore(flags);
+		__arch_local_irq_restore(flags);
 	}
 }
 
@@ -1190,7 +1190,7 @@ void smtc_ipi_replay(void)
 		/*
 		 ** But use a raw restore here to avoid recursion.
 		 */
-		__raw_local_irq_restore(flags);
+		__arch_local_irq_restore(flags);
 
 		if (pipi) {
 			self_ipi(pipi);
diff --git a/arch/mn10300/include/asm/irqflags.h b/arch/mn10300/include/asm/irqflags.h
new file mode 100644
index 000000000000..5e529a117cb2
--- /dev/null
+++ b/arch/mn10300/include/asm/irqflags.h
@@ -0,0 +1,123 @@
+/* MN10300 IRQ flag handling
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#include <asm/cpu-regs.h>
+
+/*
+ * interrupt control
+ * - "disabled": run in IM1/2
+ *   - level 0 - GDB stub
+ *   - level 1 - virtual serial DMA (if present)
+ *   - level 5 - normal interrupt priority
+ *   - level 6 - timer interrupt
+ * - "enabled":  run in IM7
+ */
+#ifdef CONFIG_MN10300_TTYSM
+#define MN10300_CLI_LEVEL	EPSW_IM_2
+#else
+#define MN10300_CLI_LEVEL	EPSW_IM_1
+#endif
+
+#ifndef __ASSEMBLY__
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	asm volatile("mov epsw,%0" : "=d"(flags));
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile(
+		"	and %0,epsw	\n"
+		"	or %1,epsw	\n"
+		"	nop		\n"
+		"	nop		\n"
+		"	nop		\n"
+		:
+		: "i"(~EPSW_IM), "i"(EPSW_IE | MN10300_CLI_LEVEL)
+		: "memory");
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+	flags = arch_local_save_flags();
+	arch_local_irq_disable();
+	return flags;
+}
+
+/*
+ * we make sure arch_irq_enable() doesn't cause priority inversion
+ */
+extern unsigned long __mn10300_irq_enabled_epsw;
+
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long tmp;
+
+	asm volatile(
+		"	mov	epsw,%0		\n"
+		"	and	%1,%0		\n"
+		"	or	%2,%0		\n"
+		"	mov	%0,epsw		\n"
+		: "=&d"(tmp)
+		: "i"(~EPSW_IM), "r"(__mn10300_irq_enabled_epsw)
+		: "memory");
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile(
+		"	mov %0,epsw	\n"
+		"	nop		\n"
+		"	nop		\n"
+		"	nop		\n"
+		:
+		: "d"(flags)
+		: "memory", "cc");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & EPSW_IM) <= MN10300_CLI_LEVEL;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+/*
+ * Hook to save power by halting the CPU
+ * - called from the idle loop
+ * - must reenable interrupts (which takes three instruction cycles to complete)
+ */
+static inline void arch_safe_halt(void)
+{
+	asm volatile(
+		"	or	%0,epsw	\n"
+		"	nop		\n"
+		"	nop		\n"
+		"	bset	%2,(%1)	\n"
+		:
+		: "i"(EPSW_IE|EPSW_IM), "n"(&CPUM), "i"(CPUM_SLEEP)
+		: "cc");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_IRQFLAGS_H */
diff --git a/arch/mn10300/include/asm/system.h b/arch/mn10300/include/asm/system.h
index 3636c054dcd5..9f7c7e17c01e 100644
--- a/arch/mn10300/include/asm/system.h
+++ b/arch/mn10300/include/asm/system.h
@@ -17,6 +17,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/kernel.h>
+#include <linux/irqflags.h>
 
 struct task_struct;
 struct thread_struct;
@@ -79,114 +80,6 @@ do {									\
 #define read_barrier_depends()		do {} while (0)
 #define smp_read_barrier_depends()	do {} while (0)
 
-/*****************************************************************************/
-/*
- * interrupt control
- * - "disabled": run in IM1/2
- *   - level 0 - GDB stub
- *   - level 1 - virtual serial DMA (if present)
- *   - level 5 - normal interrupt priority
- *   - level 6 - timer interrupt
- * - "enabled":  run in IM7
- */
-#ifdef CONFIG_MN10300_TTYSM
-#define MN10300_CLI_LEVEL	EPSW_IM_2
-#else
-#define MN10300_CLI_LEVEL	EPSW_IM_1
-#endif
-
-#define local_save_flags(x)			\
-do {						\
-	typecheck(unsigned long, x);		\
-	asm volatile(				\
-		"	mov epsw,%0	\n"	\
-		: "=d"(x)			\
-		);				\
-} while (0)
-
-#define local_irq_disable()						\
-do {									\
-	asm volatile(							\
-		"	and %0,epsw	\n"				\
-		"	or %1,epsw	\n"				\
-		"	nop		\n"				\
-		"	nop		\n"				\
-		"	nop		\n"				\
-		:							\
-		: "i"(~EPSW_IM), "i"(EPSW_IE | MN10300_CLI_LEVEL)	\
-		);							\
-} while (0)
-
-#define local_irq_save(x)			\
-do {						\
-	local_save_flags(x);			\
-	local_irq_disable();			\
-} while (0)
-
-/*
- * we make sure local_irq_enable() doesn't cause priority inversion
- */
-#ifndef __ASSEMBLY__
-
-extern unsigned long __mn10300_irq_enabled_epsw;
-
-#endif
-
-#define local_irq_enable()						\
-do {									\
-	unsigned long tmp;						\
-									\
-	asm volatile(							\
-		"	mov	epsw,%0		\n"			\
-		"	and	%1,%0		\n"			\
-		"	or	%2,%0		\n"			\
-		"	mov	%0,epsw		\n"			\
-		: "=&d"(tmp)						\
-		: "i"(~EPSW_IM), "r"(__mn10300_irq_enabled_epsw)	\
-		: "cc"							\
-		);							\
-} while (0)
-
-#define local_irq_restore(x)			\
-do {						\
-	typecheck(unsigned long, x);		\
-	asm volatile(				\
-		"	mov %0,epsw	\n"	\
-		"	nop		\n"	\
-		"	nop		\n"	\
-		"	nop		\n"	\
-		:				\
-		: "d"(x)			\
-		: "memory", "cc"		\
-		);				\
-} while (0)
-
-#define irqs_disabled()				\
-({						\
-	unsigned long flags;			\
-	local_save_flags(flags);		\
-	(flags & EPSW_IM) <= MN10300_CLI_LEVEL;	\
-})
-
-/* hook to save power by halting the CPU
- * - called from the idle loop
- * - must reenable interrupts (which takes three instruction cycles to complete)
- */
-#define safe_halt()							\
-do {									\
-	asm volatile("	or	%0,epsw	\n"				\
-		     "	nop		\n"				\
-		     "	nop		\n"				\
-		     "	bset	%2,(%1)	\n"				\
-		     :							\
-		     : "i"(EPSW_IE|EPSW_IM), "n"(&CPUM), "i"(CPUM_SLEEP)\
-		     : "cc"						\
-		     );							\
-} while (0)
-
-#define STI	or EPSW_IE|EPSW_IM,epsw
-#define CLI	and ~EPSW_IM,epsw; or EPSW_IE|MN10300_CLI_LEVEL,epsw; nop; nop; nop
-
 /*****************************************************************************/
 /*
  * MN10300 doesn't actually have an exchange instruction
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index d9ed5a15c547..3d394b4eefba 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -16,6 +16,7 @@
 #include <linux/linkage.h>
 #include <asm/smp.h>
 #include <asm/system.h>
+#include <asm/irqflags.h>
 #include <asm/thread_info.h>
 #include <asm/intctl-regs.h>
 #include <asm/busctl-regs.h>
diff --git a/arch/parisc/include/asm/irqflags.h b/arch/parisc/include/asm/irqflags.h
new file mode 100644
index 000000000000..34f9cb9b4754
--- /dev/null
+++ b/arch/parisc/include/asm/irqflags.h
@@ -0,0 +1,46 @@
+#ifndef __PARISC_IRQFLAGS_H
+#define __PARISC_IRQFLAGS_H
+
+#include <linux/types.h>
+#include <asm/psw.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("ssm 0, %0" : "=r" (flags) : : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile("rsm %0,%%r0\n" : : "i" (PSW_I) : "memory");
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile("ssm %0,%%r0\n" : : "i" (PSW_I) : "memory");
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	asm volatile("rsm %1,%0" : "=r" (flags) : "i" (PSW_I) : "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("mtsm %0" : : "r" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & PSW_I) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __PARISC_IRQFLAGS_H */
diff --git a/arch/parisc/include/asm/system.h b/arch/parisc/include/asm/system.h
index 2ab4af58ecb9..b19e63a8e848 100644
--- a/arch/parisc/include/asm/system.h
+++ b/arch/parisc/include/asm/system.h
@@ -1,7 +1,7 @@
 #ifndef __PARISC_SYSTEM_H
 #define __PARISC_SYSTEM_H
 
-#include <asm/psw.h>
+#include <linux/irqflags.h>
 
 /* The program status word as bitfields.  */
 struct pa_psw {
@@ -48,23 +48,6 @@ extern struct task_struct *_switch_to(struct task_struct *, struct task_struct *
 	(last) = _switch_to(prev, next);			\
 } while(0)
 
-/* interrupt control */
-#define local_save_flags(x)	__asm__ __volatile__("ssm 0, %0" : "=r" (x) : : "memory")
-#define local_irq_disable()	__asm__ __volatile__("rsm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
-#define local_irq_enable()	__asm__ __volatile__("ssm %0,%%r0\n" : : "i" (PSW_I) : "memory" )
-
-#define local_irq_save(x) \
-	__asm__ __volatile__("rsm %1,%0" : "=r" (x) :"i" (PSW_I) : "memory" )
-#define local_irq_restore(x) \
-	__asm__ __volatile__("mtsm %0" : : "r" (x) : "memory" )
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	(flags & PSW_I) == 0;		\
-})
-
 #define mfctl(reg)	({		\
 	unsigned long cr;		\
 	__asm__ __volatile__(		\
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index bd100fcf40d0..ff08b70b36d4 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -16,42 +16,57 @@ extern void timer_interrupt(struct pt_regs *);
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 
-static inline unsigned long local_get_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 
-	__asm__ __volatile__("lbz %0,%1(13)"
-	: "=r" (flags)
-	: "i" (offsetof(struct paca_struct, soft_enabled)));
+	asm volatile(
+		"lbz %0,%1(13)"
+		: "=r" (flags)
+		: "i" (offsetof(struct paca_struct, soft_enabled)));
 
 	return flags;
 }
 
-static inline unsigned long raw_local_irq_disable(void)
+static inline unsigned long arch_local_irq_disable(void)
 {
 	unsigned long flags, zero;
 
-	__asm__ __volatile__("li %1,0; lbz %0,%2(13); stb %1,%2(13)"
-	: "=r" (flags), "=&r" (zero)
-	: "i" (offsetof(struct paca_struct, soft_enabled))
-	: "memory");
+	asm volatile(
+		"li %1,0; lbz %0,%2(13); stb %1,%2(13)"
+		: "=r" (flags), "=&r" (zero)
+		: "i" (offsetof(struct paca_struct, soft_enabled))
+		: "memory");
 
 	return flags;
 }
 
-extern void raw_local_irq_restore(unsigned long);
+extern void arch_local_irq_restore(unsigned long);
 extern void iseries_handle_interrupts(void);
 
-#define raw_local_irq_enable()		raw_local_irq_restore(1)
-#define raw_local_save_flags(flags)	((flags) = local_get_flags())
-#define raw_local_irq_save(flags)	((flags) = raw_local_irq_disable())
+static inline void arch_local_irq_enable(void)
+{
+	arch_local_irq_restore(1);
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	return arch_local_irq_disable();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == 0;
+}
 
-#define raw_irqs_disabled()		(local_get_flags() == 0)
-#define raw_irqs_disabled_flags(flags)	((flags) == 0)
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
 
 #ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()	__asm__ __volatile__("wrteei 1": : :"memory");
-#define __hard_irq_disable()	__asm__ __volatile__("wrteei 0": : :"memory");
+#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
+#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
 #else
 #define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
 #define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
@@ -64,64 +79,66 @@ extern void iseries_handle_interrupts(void);
 		get_paca()->hard_enabled = 0;	\
 	} while(0)
 
-#else
+#else /* CONFIG_PPC64 */
 
-#if defined(CONFIG_BOOKE)
 #define SET_MSR_EE(x)	mtmsr(x)
-#define raw_local_irq_restore(flags)	__asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory")
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return mfmsr();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+#if defined(CONFIG_BOOKE)
+	asm volatile("wrtee %0" : : "r" (flags) : "memory");
 #else
-#define SET_MSR_EE(x)	mtmsr(x)
-#define raw_local_irq_restore(flags)	mtmsr(flags)
+	mtmsr(flags);
 #endif
+}
 
-static inline void raw_local_irq_disable(void)
+static inline unsigned long arch_local_irq_save(void)
 {
+	unsigned long flags = arch_local_save_flags();
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 0": : :"memory");
+	asm volatile("wrteei 0" : : : "memory");
 #else
-	unsigned long msr;
-
-	msr = mfmsr();
-	SET_MSR_EE(msr & ~MSR_EE);
+	SET_MSR_EE(flags & ~MSR_EE);
 #endif
+	return flags;
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_disable(void)
 {
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 1": : :"memory");
+	asm volatile("wrteei 0" : : : "memory");
 #else
-	unsigned long msr;
-
-	msr = mfmsr();
-	SET_MSR_EE(msr | MSR_EE);
+	arch_local_irq_save();
 #endif
 }
 
-static inline void raw_local_irq_save_ptr(unsigned long *flags)
+static inline void arch_local_irq_enable(void)
 {
-	unsigned long msr;
-	msr = mfmsr();
-	*flags = msr;
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 0": : :"memory");
+	asm volatile("wrteei 1" : : : "memory");
 #else
-	SET_MSR_EE(msr & ~MSR_EE);
+	unsigned long msr = mfmsr();
+	SET_MSR_EE(msr | MSR_EE);
 #endif
 }
 
-#define raw_local_save_flags(flags)	((flags) = mfmsr())
-#define raw_local_irq_save(flags)	raw_local_irq_save_ptr(&flags)
-#define raw_irqs_disabled()		((mfmsr() & MSR_EE) == 0)
-#define raw_irqs_disabled_flags(flags)	(((flags) & MSR_EE) == 0)
-
-#define hard_irq_disable()		raw_local_irq_disable()
-
-static inline int irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags & MSR_EE) == 0;
 }
 
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#define hard_irq_disable()		arch_local_irq_disable()
+
 #endif /* CONFIG_PPC64 */
 
 /*
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 5f68ecfdf516..b85d8ddbb666 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -6,7 +6,7 @@
 
 #ifndef __ASSEMBLY__
 /*
- * Get definitions for raw_local_save_flags(x), etc.
+ * Get definitions for arch_local_save_flags(x), etc.
  */
 #include <asm/hw_irq.h>
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f53029a01554..39b0c48872d2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -818,12 +818,12 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
 
 	/*
 	 * hash_page couldn't handle it, set soft interrupt enable back
-	 * to what it was before the trap.  Note that .raw_local_irq_restore
+	 * to what it was before the trap.  Note that .arch_local_irq_restore
 	 * handles any interrupts pending at this point.
 	 */
 	ld	r3,SOFTE(r1)
 	TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
-	bl	.raw_local_irq_restore
+	bl	.arch_local_irq_restore
 	b	11f
 
 /* We have a data breakpoint exception - handle it */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4a65386995d7..1903290f5469 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -116,7 +116,7 @@ static inline notrace void set_soft_enabled(unsigned long enable)
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
-notrace void raw_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long en)
 {
 	/*
 	 * get_paca()->soft_enabled = en;
@@ -192,7 +192,7 @@ notrace void raw_local_irq_restore(unsigned long en)
 
 	__hard_irq_enable();
 }
-EXPORT_SYMBOL(raw_local_irq_restore);
+EXPORT_SYMBOL(arch_local_irq_restore);
 #endif /* CONFIG_PPC64 */
 
 static int show_other_interrupts(struct seq_file *p, int prec)
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 15b3ac253898..865d6d891ace 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -8,8 +8,8 @@
 
 #include <linux/types.h>
 
-/* store then or system mask. */
-#define __raw_local_irq_stosm(__or)					\
+/* store then OR system mask. */
+#define __arch_local_irq_stosm(__or)					\
 ({									\
 	unsigned long __mask;						\
 	asm volatile(							\
@@ -18,8 +18,8 @@
 	__mask;								\
 })
 
-/* store then and system mask. */
-#define __raw_local_irq_stnsm(__and)					\
+/* store then AND system mask. */
+#define __arch_local_irq_stnsm(__and)					\
 ({									\
 	unsigned long __mask;						\
 	asm volatile(							\
@@ -29,39 +29,44 @@
 })
 
 /* set system mask. */
-#define __raw_local_irq_ssm(__mask)					\
-({									\
-	asm volatile("ssm   %0" : : "Q" (__mask) : "memory");		\
-})
+static inline void __arch_local_irq_ssm(unsigned long flags)
+{
+	asm volatile("ssm   %0" : : "Q" (flags) : "memory");
+}
 
-/* interrupt control.. */
-static inline unsigned long raw_local_irq_enable(void)
+static inline unsigned long arch_local_save_flags(void)
 {
-	return __raw_local_irq_stosm(0x03);
+	return __arch_local_irq_stosm(0x00);
 }
 
-static inline unsigned long raw_local_irq_disable(void)
+static inline unsigned long arch_local_irq_save(void)
 {
-	return __raw_local_irq_stnsm(0xfc);
+	return __arch_local_irq_stnsm(0xfc);
 }
 
-#define raw_local_save_flags(x)						\
-do {									\
-	typecheck(unsigned long, x);					\
-	(x) = __raw_local_irq_stosm(0x00);				\
-} while (0)
+static inline void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
+}
 
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_enable(void)
 {
-	__raw_local_irq_ssm(flags);
+	__arch_local_irq_stosm(0x03);
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	__arch_local_irq_ssm(flags);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return !(flags & (3UL << (BITS_PER_LONG - 8)));
 }
 
-/* For spinlocks etc */
-#define raw_local_irq_save(x)	((x) = raw_local_irq_disable())
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
 
 #endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index cef66210c846..8e8a50eeed92 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -399,7 +399,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 static inline void
 __set_psw_mask(unsigned long mask)
 {
-	__load_psw_mask(mask | (__raw_local_irq_stosm(0x00) & ~(-1UL >> 8)));
+	__load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
 }
 
 #define local_mcck_enable()  __set_psw_mask(psw_kernel_bits)
diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
index 559af0d07878..0fbe4e32f7ba 100644
--- a/arch/s390/kernel/mem_detect.c
+++ b/arch/s390/kernel/mem_detect.c
@@ -54,11 +54,11 @@ void detect_memory_layout(struct mem_chunk chunk[])
 	 * right thing and we don't get scheduled away with low address
 	 * protection disabled.
 	 */
-	flags = __raw_local_irq_stnsm(0xf8);
+	flags = __arch_local_irq_stnsm(0xf8);
 	__ctl_store(cr0, 0, 0);
 	__ctl_clear_bit(0, 28);
 	find_memory_chunks(chunk);
 	__ctl_load(cr0, 0, 0);
-	__raw_local_irq_ssm(flags);
+	arch_local_irq_restore(flags);
 }
 EXPORT_SYMBOL(detect_memory_layout);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 30eb6d02ddb8..94b8ba2ec857 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -50,7 +50,6 @@ EXPORT_SYMBOL(empty_zero_page);
  */
 void __init paging_init(void)
 {
-	static const int ssm_mask = 0x04000000L;
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	unsigned long pgd_type;
 
@@ -72,7 +71,7 @@ void __init paging_init(void)
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-	__raw_local_irq_ssm(ssm_mask);
+	arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
 
 	atomic_set(&init_mm.context.attach_count, 1);
 
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index a8c2af8c650f..71a4b0d34be0 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -71,7 +71,7 @@ int memcpy_real(void *dest, void *src, size_t count)
 
 	if (!count)
 		return 0;
-	flags = __raw_local_irq_stnsm(0xf8UL);
+	flags = __arch_local_irq_stnsm(0xf8UL);
 	asm volatile (
 		"0:	mvcle	%1,%2,0x0\n"
 		"1:	jo	0b\n"
@@ -82,6 +82,6 @@ int memcpy_real(void *dest, void *src, size_t count)
 		  "+d" (_len2), "=m" (*((long *) dest))
 		: "m" (*((long *) src))
 		: "cc", "memory");
-	__raw_local_irq_ssm(flags);
+	arch_local_irq_restore(flags);
 	return rc;
 }
diff --git a/arch/score/include/asm/irqflags.h b/arch/score/include/asm/irqflags.h
index 690a6cae7294..5c7563891e28 100644
--- a/arch/score/include/asm/irqflags.h
+++ b/arch/score/include/asm/irqflags.h
@@ -3,107 +3,118 @@
 
 #ifndef __ASSEMBLY__
 
-#define raw_local_irq_save(x)			\
-{						\
-	__asm__ __volatile__(			\
-		"mfcr	r8, cr0;"		\
-		"li	r9, 0xfffffffe;"	\
-		"nop;"				\
-		"mv	%0, r8;"		\
-		"and	r8, r8, r9;"		\
-		"mtcr	r8, cr0;"		\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		: "=r" (x)			\
-		:				\
-		: "r8", "r9"			\
-		);				\
+#include <linux/types.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	asm volatile(
+		"	mfcr	r8, cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	mv	%0, r8		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	ldi	r9, 0x1		\n"
+		"	and	%0, %0, r9	\n"
+		: "=r" (flags)
+		:
+		: "r8", "r9");
+	return flags;
 }
 
-#define raw_local_irq_restore(x)		\
-{						\
-	__asm__ __volatile__(			\
-		"mfcr	r8, cr0;"		\
-		"ldi	r9, 0x1;"		\
-		"and	%0, %0, r9;"		\
-		"or	r8, r8, %0;"		\
-		"mtcr	r8, cr0;"		\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		:				\
-		: "r"(x)			\
-		: "r8", "r9"			\
-		);				\
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags
+
+	asm volatile(
+		"	mfcr	r8, cr0		\n"
+		"	li	r9, 0xfffffffe	\n"
+		"	nop			\n"
+		"	mv	%0, r8		\n"
+		"	and	r8, r8, r9	\n"
+		"	mtcr	r8, cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		: "=r" (flags)
+		:
+		: "r8", "r9", "memory");
+
+	return flags;
 }
 
-#define raw_local_irq_enable(void)		\
-{						\
-	__asm__ __volatile__(			\
-		"mfcr\tr8,cr0;"			\
-		"nop;"				\
-		"nop;"				\
-		"ori\tr8,0x1;"			\
-		"mtcr\tr8,cr0;"			\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		:				\
-		:				\
-		: "r8");			\
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile(
+		"	mfcr	r8, cr0		\n"
+		"	ldi	r9, 0x1		\n"
+		"	and	%0, %0, r9	\n"
+		"	or	r8, r8, %0	\n"
+		"	mtcr	r8, cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		:
+		: "r"(flags)
+		: "r8", "r9", "memory");
 }
 
-#define raw_local_irq_disable(void)		\
-{						\
-	__asm__ __volatile__(			\
-		"mfcr\tr8,cr0;"			\
-		"nop;"				\
-		"nop;"				\
-		"srli\tr8,r8,1;"		\
-		"slli\tr8,r8,1;"		\
-		"mtcr\tr8,cr0;"			\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		:				\
-		:				\
-		: "r8");			\
+static inline void arch_local_irq_enable(void)
+{
+	asm volatile(
+		"	mfcr	r8,cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	ori	r8,0x1		\n"
+		"	mtcr	r8,cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		:
+		:
+		: "r8", "memory");
 }
 
-#define raw_local_save_flags(x)			\
-{						\
-	__asm__ __volatile__(			\
-		"mfcr	r8, cr0;"		\
-		"nop;"				\
-		"nop;"				\
-		"mv	%0, r8;"		\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"nop;"				\
-		"ldi	r9, 0x1;"		\
-		"and	%0, %0, r9;"		\
-		: "=r" (x)			\
-		:				\
-		: "r8", "r9"			\
-		);				\
+static inline void arch_local_irq_disable(void)
+{
+	asm volatile(
+		"	mfcr	r8,cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	srli	r8,r8,1		\n"
+		"	slli	r8,r8,1		\n"
+		"	mtcr	r8,cr0		\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		"	nop			\n"
+		:
+		:
+		: "r8", "memory");
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return !(flags & 1);
 }
 
-#endif
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_SCORE_IRQFLAGS_H */
diff --git a/arch/sh/include/asm/irqflags.h b/arch/sh/include/asm/irqflags.h
index a741153b41c2..43b7608606c3 100644
--- a/arch/sh/include/asm/irqflags.h
+++ b/arch/sh/include/asm/irqflags.h
@@ -1,8 +1,8 @@
 #ifndef __ASM_SH_IRQFLAGS_H
 #define __ASM_SH_IRQFLAGS_H
 
-#define RAW_IRQ_DISABLED	0xf0
-#define RAW_IRQ_ENABLED		0x00
+#define ARCH_IRQ_DISABLED	0xf0
+#define ARCH_IRQ_ENABLED	0x00
 
 #include <asm-generic/irqflags.h>
 
diff --git a/arch/sh/kernel/irq_32.c b/arch/sh/kernel/irq_32.c
index e33ab15831f9..e5a755be9129 100644
--- a/arch/sh/kernel/irq_32.c
+++ b/arch/sh/kernel/irq_32.c
@@ -10,11 +10,11 @@
 #include <linux/irqflags.h>
 #include <linux/module.h>
 
-void notrace raw_local_irq_restore(unsigned long flags)
+void notrace arch_local_irq_restore(unsigned long flags)
 {
 	unsigned long __dummy0, __dummy1;
 
-	if (flags == RAW_IRQ_DISABLED) {
+	if (flags == ARCH_IRQ_DISABLED) {
 		__asm__ __volatile__ (
 			"stc	sr, %0\n\t"
 			"or	#0xf0, %0\n\t"
@@ -33,14 +33,14 @@ void notrace raw_local_irq_restore(unsigned long flags)
 #endif
 			"ldc	%0, sr\n\t"
 			: "=&r" (__dummy0), "=r" (__dummy1)
-			: "1" (~RAW_IRQ_DISABLED)
+			: "1" (~ARCH_IRQ_DISABLED)
 			: "memory"
 		);
 	}
 }
-EXPORT_SYMBOL(raw_local_irq_restore);
+EXPORT_SYMBOL(arch_local_irq_restore);
 
-unsigned long notrace __raw_local_save_flags(void)
+unsigned long notrace arch_local_save_flags(void)
 {
 	unsigned long flags;
 
@@ -54,4 +54,4 @@ unsigned long notrace __raw_local_save_flags(void)
 
 	return flags;
 }
-EXPORT_SYMBOL(__raw_local_save_flags);
+EXPORT_SYMBOL(arch_local_save_flags);
diff --git a/arch/sparc/include/asm/irqflags_32.h b/arch/sparc/include/asm/irqflags_32.h
index 0fca9d97d44f..d4d0711de0f9 100644
--- a/arch/sparc/include/asm/irqflags_32.h
+++ b/arch/sparc/include/asm/irqflags_32.h
@@ -5,33 +5,40 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
+ * arch_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
 #ifndef __ASSEMBLY__
 
-extern void raw_local_irq_restore(unsigned long);
-extern unsigned long __raw_local_irq_save(void);
-extern void raw_local_irq_enable(void);
+#include <linux/types.h>
 
-static inline unsigned long getipl(void)
+extern void arch_local_irq_restore(unsigned long);
+extern unsigned long arch_local_irq_save(void);
+extern void arch_local_irq_enable(void);
+
+static inline unsigned long arch_local_save_flags(void)
 {
-        unsigned long retval;
+	unsigned long flags;
+
+	asm volatile("rd        %%psr, %0" : "=r" (flags));
+	return flags;
+}
 
-        __asm__ __volatile__("rd        %%psr, %0" : "=r" (retval));
-        return retval;
+static inline void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
 }
 
-#define raw_local_save_flags(flags) ((flags) = getipl())
-#define raw_local_irq_save(flags)   ((flags) = __raw_local_irq_save())
-#define raw_local_irq_disable()     ((void) __raw_local_irq_save())
-#define raw_irqs_disabled()         ((getipl() & PSR_PIL) != 0)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & PSR_PIL) != 0;
+}
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled(void)
 {
-        return ((flags & PSR_PIL) != 0);
+	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
 #endif /* (__ASSEMBLY__) */
diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h
index bfa1ea45b4cd..aab969c82c2b 100644
--- a/arch/sparc/include/asm/irqflags_64.h
+++ b/arch/sparc/include/asm/irqflags_64.h
@@ -5,7 +5,7 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
+ * arch_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
@@ -14,7 +14,7 @@
 
 #ifndef __ASSEMBLY__
 
-static inline unsigned long __raw_local_save_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 
@@ -26,10 +26,7 @@ static inline unsigned long __raw_local_save_flags(void)
 	return flags;
 }
 
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
 {
 	__asm__ __volatile__(
 		"wrpr	%0, %%pil"
@@ -39,7 +36,7 @@ static inline void raw_local_irq_restore(unsigned long flags)
 	);
 }
 
-static inline void raw_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	__asm__ __volatile__(
 		"wrpr	%0, %%pil"
@@ -49,7 +46,7 @@ static inline void raw_local_irq_disable(void)
 	);
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 	__asm__ __volatile__(
 		"wrpr	0, %%pil"
@@ -59,22 +56,17 @@ static inline void raw_local_irq_enable(void)
 	);
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags > 0);
 }
 
-static inline int raw_irqs_disabled(void)
+static inline int arch_irqs_disabled(void)
 {
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
+	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
-/*
- * For spinlocks, etc:
- */
-static inline unsigned long __raw_local_irq_save(void)
+static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags, tmp;
 
@@ -100,9 +92,6 @@ static inline unsigned long __raw_local_irq_save(void)
 	return flags;
 }
 
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
 #endif /* (__ASSEMBLY__) */
 
 #endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index e1af43728329..0116d8d10def 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -57,7 +57,7 @@
 #define SMP_NOP2
 #define SMP_NOP3
 #endif /* SMP */
-unsigned long __raw_local_irq_save(void)
+unsigned long arch_local_irq_save(void)
 {
 	unsigned long retval;
 	unsigned long tmp;
@@ -74,8 +74,9 @@ unsigned long __raw_local_irq_save(void)
 
 	return retval;
 }
+EXPORT_SYMBOL(arch_local_irq_save);
 
-void raw_local_irq_enable(void)
+void arch_local_irq_enable(void)
 {
 	unsigned long tmp;
 
@@ -89,8 +90,9 @@ void raw_local_irq_enable(void)
 		: "i" (PSR_PIL)
 		: "memory");
 }
+EXPORT_SYMBOL(arch_local_irq_enable);
 
-void raw_local_irq_restore(unsigned long old_psr)
+void arch_local_irq_restore(unsigned long old_psr)
 {
 	unsigned long tmp;
 
@@ -105,10 +107,7 @@ void raw_local_irq_restore(unsigned long old_psr)
 		: "i" (PSR_PIL), "r" (old_psr)
 		: "memory");
 }
-
-EXPORT_SYMBOL(__raw_local_irq_save);
-EXPORT_SYMBOL(raw_local_irq_enable);
-EXPORT_SYMBOL(raw_local_irq_restore);
+EXPORT_SYMBOL(arch_local_irq_restore);
 
 /*
  * Dave Redman (djhr@tadpole.co.uk)
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index fa6e4e219b9c..d9850c2b9bf2 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -39,7 +39,7 @@ void p1275_cmd_direct(unsigned long *args)
 	unsigned long flags;
 
 	raw_local_save_flags(flags);
-	raw_local_irq_restore(PIL_NMI);
+	raw_local_irq_restore((unsigned long)PIL_NMI);
 	raw_spin_lock(&prom_entry_lock);
 
 	prom_world(1);
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 45cf67c2f286..a11d4837ee4d 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -103,55 +103,57 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 #define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
 
 /* Disable interrupts. */
-#define raw_local_irq_disable() \
+#define arch_local_irq_disable() \
 	interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
 
 /* Disable all interrupts, including NMIs. */
-#define raw_local_irq_disable_all() \
+#define arch_local_irq_disable_all() \
 	interrupt_mask_set_mask(-1UL)
 
 /* Re-enable all maskable interrupts. */
-#define raw_local_irq_enable() \
+#define arch_local_irq_enable() \
 	interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
 
 /* Disable or enable interrupts based on flag argument. */
-#define raw_local_irq_restore(disabled) do { \
+#define arch_local_irq_restore(disabled) do { \
 	if (disabled) \
-		raw_local_irq_disable(); \
+		arch_local_irq_disable(); \
 	else \
-		raw_local_irq_enable(); \
+		arch_local_irq_enable(); \
 } while (0)
 
 /* Return true if "flags" argument means interrupts are disabled. */
-#define raw_irqs_disabled_flags(flags) ((flags) != 0)
+#define arch_irqs_disabled_flags(flags) ((flags) != 0)
 
 /* Return true if interrupts are currently disabled. */
-#define raw_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
+#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
 
 /* Save whether interrupts are currently disabled. */
-#define raw_local_save_flags(flags) ((flags) = raw_irqs_disabled())
+#define arch_local_save_flags() arch_irqs_disabled()
 
 /* Save whether interrupts are currently disabled, then disable them. */
-#define raw_local_irq_save(flags) \
-	do { raw_local_save_flags(flags); raw_local_irq_disable(); } while (0)
+#define arch_local_irq_save() ({ \
+	unsigned long __flags = arch_local_save_flags(); \
+	arch_local_irq_disable(); \
+	__flags; })
 
 /* Prevent the given interrupt from being enabled next time we enable irqs. */
-#define raw_local_irq_mask(interrupt) \
+#define arch_local_irq_mask(interrupt) \
 	(__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
 
 /* Prevent the given interrupt from being enabled immediately. */
-#define raw_local_irq_mask_now(interrupt) do { \
-	raw_local_irq_mask(interrupt); \
+#define arch_local_irq_mask_now(interrupt) do { \
+	arch_local_irq_mask(interrupt); \
 	interrupt_mask_set(interrupt); \
 } while (0)
 
 /* Allow the given interrupt to be enabled next time we enable irqs. */
-#define raw_local_irq_unmask(interrupt) \
+#define arch_local_irq_unmask(interrupt) \
 	(__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
 
 /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
-#define raw_local_irq_unmask_now(interrupt) do { \
-	raw_local_irq_unmask(interrupt); \
+#define arch_local_irq_unmask_now(interrupt) do { \
+	arch_local_irq_unmask(interrupt); \
 	if (!irqs_disabled()) \
 		interrupt_mask_reset(interrupt); \
 } while (0)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 9e2b952f810a..5745ce8bf108 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -61,22 +61,22 @@ static inline void native_halt(void)
 #else
 #ifndef __ASSEMBLY__
 
-static inline unsigned long __raw_local_save_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	return native_save_fl();
 }
 
-static inline void raw_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
 {
 	native_restore_fl(flags);
 }
 
-static inline void raw_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	native_irq_disable();
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 	native_irq_enable();
 }
@@ -85,7 +85,7 @@ static inline void raw_local_irq_enable(void)
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-static inline void raw_safe_halt(void)
+static inline void arch_safe_halt(void)
 {
 	native_safe_halt();
 }
@@ -102,12 +102,10 @@ static inline void halt(void)
 /*
  * For spinlocks, etc:
  */
-static inline unsigned long __raw_local_irq_save(void)
+static inline unsigned long arch_local_irq_save(void)
 {
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_disable();
-
+	unsigned long flags = arch_local_save_flags();
+	arch_local_irq_disable();
 	return flags;
 }
 #else
@@ -153,22 +151,16 @@ static inline unsigned long __raw_local_irq_save(void)
 #endif /* CONFIG_PARAVIRT */
 
 #ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags)				\
-	do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags)				\
-	do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return !(flags & X86_EFLAGS_IF);
 }
 
-static inline int raw_irqs_disabled(void)
+static inline int arch_irqs_disabled(void)
 {
-	unsigned long flags = __raw_local_save_flags();
+	unsigned long flags = arch_local_save_flags();
 
-	return raw_irqs_disabled_flags(flags);
+	return arch_irqs_disabled_flags(flags);
 }
 
 #else
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5653f43d90e5..499954c530da 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -105,7 +105,7 @@ static inline void write_cr8(unsigned long x)
 }
 #endif
 
-static inline void raw_safe_halt(void)
+static inline void arch_safe_halt(void)
 {
 	PVOP_VCALL0(pv_irq_ops.safe_halt);
 }
@@ -829,32 +829,32 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
 #define __PV_IS_CALLEE_SAVE(func)			\
 	((struct paravirt_callee_save) { func })
 
-static inline unsigned long __raw_local_save_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
 }
 
-static inline void raw_local_irq_restore(unsigned long f)
+static inline void arch_local_irq_restore(unsigned long f)
 {
 	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
 }
 
-static inline void raw_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
 }
 
-static inline unsigned long __raw_local_irq_save(void)
+static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long f;
 
-	f = __raw_local_save_flags();
-	raw_local_irq_disable();
+	f = arch_local_save_flags();
+	arch_local_irq_disable();
 	return f;
 }
 
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index e0500646585d..23e061b9327b 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -224,7 +224,7 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab
 			goto out;
 		}
 
-		flags = __raw_local_save_flags();
+		flags = arch_local_save_flags();
 		if (irq_enable) {
 			ADD_STATS(taken_slow_irqenable, 1);
 			raw_local_irq_enable();
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
new file mode 100644
index 000000000000..dae9a8bdcb17
--- /dev/null
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -0,0 +1,58 @@
+/*
+ * Xtensa IRQ flags handling functions
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2005 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_IRQFLAGS_H
+#define _XTENSA_IRQFLAGS_H
+
+#include <linux/types.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+	asm volatile("rsr %0,"__stringify(PS) : "=a" (flags));
+	return flags;
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags;
+	asm volatile("rsil %0, "__stringify(LOCKLEVEL)
+		     : "=a" (flags) :: "memory");
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long flags;
+	asm volatile("rsil %0, 0" : "=a" (flags) :: "memory");
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	asm volatile("wsr %0, "__stringify(PS)" ; rsync"
+		     :: "a" (flags) : "memory");
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & 0xf) != 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* _XTENSA_IRQFLAGS_H */
diff --git a/arch/xtensa/include/asm/system.h b/arch/xtensa/include/asm/system.h
index 62b1e8f3c13c..1e7e09ab6cd7 100644
--- a/arch/xtensa/include/asm/system.h
+++ b/arch/xtensa/include/asm/system.h
@@ -12,41 +12,10 @@
 #define _XTENSA_SYSTEM_H
 
 #include <linux/stringify.h>
+#include <linux/irqflags.h>
 
 #include <asm/processor.h>
 
-/* interrupt control */
-
-#define local_save_flags(x)						\
-	__asm__ __volatile__ ("rsr %0,"__stringify(PS) : "=a" (x));
-#define local_irq_restore(x)	do {					\
-	__asm__ __volatile__ ("wsr %0, "__stringify(PS)" ; rsync" 	\
-	    		      :: "a" (x) : "memory"); } while(0);
-#define local_irq_save(x)	do {					\
-	__asm__ __volatile__ ("rsil %0, "__stringify(LOCKLEVEL) 	\
-	    		      : "=a" (x) :: "memory");} while(0);
-
-static inline void local_irq_disable(void)
-{
-	unsigned long flags;
-	__asm__ __volatile__ ("rsil %0, "__stringify(LOCKLEVEL)
-	    		      : "=a" (flags) :: "memory");
-}
-static inline void local_irq_enable(void)
-{
-	unsigned long flags;
-	__asm__ __volatile__ ("rsil %0, 0" : "=a" (flags) :: "memory");
-
-}
-
-static inline int irqs_disabled(void)
-{
-	unsigned long flags;
-	local_save_flags(flags);
-	return flags & 0xf;
-}
-
-
 #define smp_read_barrier_depends() do { } while(0)
 #define read_barrier_depends() do { } while(0)
 
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index f6d72e1f2a38..5707a80b96b6 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -468,7 +468,7 @@ sclp_sync_wait(void)
 	cr0_sync &= 0xffff00a0;
 	cr0_sync |= 0x00000200;
 	__ctl_load(cr0_sync, 0, 0);
-	__raw_local_irq_stosm(0x01);
+	__arch_local_irq_stosm(0x01);
 	/* Loop until driver state indicates finished request */
 	while (sclp_running_state != sclp_running_state_idle) {
 		/* Check for expired request timer */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e53347fbf1da..fd57b8477fab 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -43,6 +43,7 @@
  */
 #define atomic_set(v, i) (((v)->counter) = (i))
 
+#include <linux/irqflags.h>
 #include <asm/system.h>
 
 /**
@@ -57,7 +58,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	unsigned long flags;
 	int temp;
 
-	raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */
+	raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
 	temp = v->counter;
 	temp += i;
 	v->counter = temp;
@@ -78,7 +79,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	unsigned long flags;
 	int temp;
 
-	raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */
+	raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */
 	temp = v->counter;
 	temp -= i;
 	v->counter = temp;
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index b2ba2fc8829a..2533fddd34a6 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -2,6 +2,7 @@
 #define __ASM_GENERIC_CMPXCHG_LOCAL_H
 
 #include <linux/types.h>
+#include <linux/irqflags.h>
 
 extern unsigned long wrong_size_cmpxchg(volatile void *ptr);
 
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..c0771aa248cf 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,7 +3,6 @@
 
 #include <linux/cache.h>
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h
index 9aebf618275a..1f40d0024cf3 100644
--- a/include/asm-generic/irqflags.h
+++ b/include/asm-generic/irqflags.h
@@ -5,68 +5,62 @@
  * All architectures should implement at least the first two functions,
  * usually inline assembly will be the best way.
  */
-#ifndef RAW_IRQ_DISABLED
-#define RAW_IRQ_DISABLED 0
-#define RAW_IRQ_ENABLED 1
+#ifndef ARCH_IRQ_DISABLED
+#define ARCH_IRQ_DISABLED 0
+#define ARCH_IRQ_ENABLED 1
 #endif
 
 /* read interrupt enabled status */
-#ifndef __raw_local_save_flags
-unsigned long __raw_local_save_flags(void);
+#ifndef arch_local_save_flags
+unsigned long arch_local_save_flags(void);
 #endif
 
 /* set interrupt enabled status */
-#ifndef raw_local_irq_restore
-void raw_local_irq_restore(unsigned long flags);
+#ifndef arch_local_irq_restore
+void arch_local_irq_restore(unsigned long flags);
 #endif
 
 /* get status and disable interrupts */
-#ifndef __raw_local_irq_save
-static inline unsigned long __raw_local_irq_save(void)
+#ifndef arch_local_irq_save
+static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
-	flags = __raw_local_save_flags();
-	raw_local_irq_restore(RAW_IRQ_DISABLED);
+	flags = arch_local_save_flags();
+	arch_local_irq_restore(ARCH_IRQ_DISABLED);
 	return flags;
 }
 #endif
 
 /* test flags */
-#ifndef raw_irqs_disabled_flags
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+#ifndef arch_irqs_disabled_flags
+static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return flags == RAW_IRQ_DISABLED;
+	return flags == ARCH_IRQ_DISABLED;
 }
 #endif
 
 /* unconditionally enable interrupts */
-#ifndef raw_local_irq_enable
-static inline void raw_local_irq_enable(void)
+#ifndef arch_local_irq_enable
+static inline void arch_local_irq_enable(void)
 {
-	raw_local_irq_restore(RAW_IRQ_ENABLED);
+	arch_local_irq_restore(ARCH_IRQ_ENABLED);
 }
 #endif
 
 /* unconditionally disable interrupts */
-#ifndef raw_local_irq_disable
-static inline void raw_local_irq_disable(void)
+#ifndef arch_local_irq_disable
+static inline void arch_local_irq_disable(void)
 {
-	raw_local_irq_restore(RAW_IRQ_DISABLED);
+	arch_local_irq_restore(ARCH_IRQ_DISABLED);
 }
 #endif
 
 /* test hardware interrupt enable bit */
-#ifndef raw_irqs_disabled
-static inline int raw_irqs_disabled(void)
+#ifndef arch_irqs_disabled
+static inline int arch_irqs_disabled(void)
 {
-	return raw_irqs_disabled_flags(__raw_local_save_flags());
+	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 #endif
 
-#define raw_local_save_flags(flags) \
-	do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
-	do { (flags) = __raw_local_irq_save(); } while (0)
-
 #endif /* __ASM_GENERIC_IRQFLAGS_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 006bf45eae30..d176d658fe25 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,6 +12,7 @@
 #define _LINUX_TRACE_IRQFLAGS_H
 
 #include <linux/typecheck.h>
+#include <asm/irqflags.h>
 
 #ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_softirqs_on(unsigned long ip);
@@ -52,17 +53,45 @@
 # define start_critical_timings() do { } while (0)
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-
-#include <asm/irqflags.h>
+/*
+ * Wrap the arch provided IRQ routines to provide appropriate checks.
+ */
+#define raw_local_irq_disable()		arch_local_irq_disable()
+#define raw_local_irq_enable()		arch_local_irq_enable()
+#define raw_local_irq_save(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = arch_local_irq_save();		\
+	} while (0)
+#define raw_local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		arch_local_irq_restore(flags);		\
+	} while (0)
+#define raw_local_save_flags(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = arch_local_save_flags();	\
+	} while (0)
+#define raw_irqs_disabled_flags(flags)			\
+	({						\
+		typecheck(unsigned long, flags);	\
+		arch_irqs_disabled_flags(flags);	\
+	})
+#define raw_irqs_disabled()		(arch_irqs_disabled())
+#define raw_safe_halt()			arch_safe_halt()
 
+/*
+ * The local_irq_*() APIs are equal to the raw_local_irq*()
+ * if !TRACE_IRQFLAGS.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
 	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
 #define local_irq_save(flags)				\
 	do {						\
-		typecheck(unsigned long, flags);	\
 		raw_local_irq_save(flags);		\
 		trace_hardirqs_off();			\
 	} while (0)
@@ -70,7 +99,6 @@
 
 #define local_irq_restore(flags)			\
 	do {						\
-		typecheck(unsigned long, flags);	\
 		if (raw_irqs_disabled_flags(flags)) {	\
 			raw_local_irq_restore(flags);	\
 			trace_hardirqs_off();		\
@@ -79,51 +107,44 @@
 			raw_local_irq_restore(flags);	\
 		}					\
 	} while (0)
-#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
-/*
- * The local_irq_*() APIs are equal to the raw_local_irq*()
- * if !TRACE_IRQFLAGS.
- */
-# define raw_local_irq_disable()	local_irq_disable()
-# define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)			\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		local_irq_save(flags);			\
-	} while (0)
-# define raw_local_irq_restore(flags)			\
+#define local_save_flags(flags)				\
 	do {						\
-		typecheck(unsigned long, flags);	\
-		local_irq_restore(flags);		\
+		raw_local_save_flags(flags);		\
 	} while (0)
-#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-#define safe_halt()						\
-	do {							\
-		trace_hardirqs_on();				\
-		raw_safe_halt();				\
-	} while (0)
+#define irqs_disabled_flags(flags)			\
+	({						\
+		raw_irqs_disabled_flags(flags);		\
+	})
 
-#define local_save_flags(flags)				\
-	do {						\
-		typecheck(unsigned long, flags);	\
-		raw_local_save_flags(flags);		\
+#define irqs_disabled()					\
+	({						\
+		unsigned long _flags;			\
+		raw_local_save_flags(_flags);		\
+		raw_irqs_disabled_flags(_flags);	\
+	})
+
+#define safe_halt()				\
+	do {					\
+		trace_hardirqs_on();		\
+		raw_safe_halt();		\
 	} while (0)
 
-#define irqs_disabled()						\
-({								\
-	unsigned long _flags;					\
-								\
-	raw_local_save_flags(_flags);				\
-	raw_irqs_disabled_flags(_flags);			\
-})
 
-#define irqs_disabled_flags(flags)		\
-({						\
-	typecheck(unsigned long, flags);	\
-	raw_irqs_disabled_flags(flags);		\
-})
+#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
+
+#define local_irq_enable()	do { raw_local_irq_enable(); } while (0)
+#define local_irq_disable()	do { raw_local_irq_disable(); } while (0)
+#define local_irq_save(flags)					\
+	do {							\
+		raw_local_irq_save(flags);			\
+	} while (0)
+#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
+#define local_save_flags(flags)	do { raw_local_save_flags(flags); } while (0)
+#define irqs_disabled()		(raw_irqs_disabled())
+#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags))
+#define safe_halt()		do { raw_safe_halt(); } while (0)
+
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index f8854655860e..80e535897de6 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -50,6 +50,7 @@
 #include <linux/preempt.h>
 #include <linux/linkage.h>
 #include <linux/compiler.h>
+#include <linux/irqflags.h>
 #include <linux/thread_info.h>
 #include <linux/kernel.h>
 #include <linux/stringify.h>
-- 
cgit v1.2.3


From 388ac775be95e510c2095ed6cd59422a5183a9fb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 7 Oct 2010 13:11:09 +0200
Subject: cfg80211: constify WDS address

There's no need for the WDS peer address
to not be const, so make it const.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 net/mac80211/cfg.c     | 2 +-
 net/wireless/nl80211.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e76daaa7dc25..0778d04b3bbe 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1229,7 +1229,7 @@ struct cfg80211_ops {
 	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
 
 	int	(*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev,
-				u8 *addr);
+				const u8 *addr);
 
 	void	(*rfkill_poll)(struct wiphy *wiphy);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2e5a3fb38efe..ecf9b7166ed1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1363,7 +1363,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
 }
 
 static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev,
-				  u8 *addr)
+				  const u8 *addr)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4fed1663aba3..882dc921103b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -879,7 +879,7 @@ static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	u8 *bssid;
+	const u8 *bssid;
 
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
-- 
cgit v1.2.3


From 4c14d78e8ad3bacfe1f70cb49ae17afcd658e368 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 6 Oct 2010 15:54:28 -0700
Subject: ASoC: Use delayed work for debounce of GPIO based jacks

Rather than block the workqueue by sleeping to do the debounce use delayed
work to implement the debounce time. This should also means that we extend
the debounce time on each new bounce, potentially allowing shorter debounce
times for clean insertions.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Jarkko Nikula <jhnikula@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/sound/soc.h  |  2 +-
 sound/soc/soc-jack.c | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 493b3a4c193a..4fb079e14e16 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -385,7 +385,7 @@ struct snd_soc_jack_gpio {
 	int invert;
 	int debounce_time;
 	struct snd_soc_jack *jack;
-	struct work_struct work;
+	struct delayed_work work;
 
 	int (*jack_status_check)(void);
 };
diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c
index 8862770aa221..8a0a9205b1e7 100644
--- a/sound/soc/soc-jack.c
+++ b/sound/soc/soc-jack.c
@@ -188,9 +188,6 @@ static void snd_soc_jack_gpio_detect(struct snd_soc_jack_gpio *gpio)
 	int enable;
 	int report;
 
-	if (gpio->debounce_time > 0)
-		mdelay(gpio->debounce_time);
-
 	enable = gpio_get_value(gpio->gpio);
 	if (gpio->invert)
 		enable = !enable;
@@ -211,7 +208,8 @@ static irqreturn_t gpio_handler(int irq, void *data)
 {
 	struct snd_soc_jack_gpio *gpio = data;
 
-	schedule_work(&gpio->work);
+	schedule_delayed_work(&gpio->work,
+			      msecs_to_jiffies(gpio->debounce_time));
 
 	return IRQ_HANDLED;
 }
@@ -221,7 +219,7 @@ static void gpio_work(struct work_struct *work)
 {
 	struct snd_soc_jack_gpio *gpio;
 
-	gpio = container_of(work, struct snd_soc_jack_gpio, work);
+	gpio = container_of(work, struct snd_soc_jack_gpio, work.work);
 	snd_soc_jack_gpio_detect(gpio);
 }
 
@@ -262,7 +260,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count,
 		if (ret)
 			goto err;
 
-		INIT_WORK(&gpios[i].work, gpio_work);
+		INIT_DELAYED_WORK(&gpios[i].work, gpio_work);
 		gpios[i].jack = jack;
 
 		ret = request_irq(gpio_to_irq(gpios[i].gpio),
@@ -312,6 +310,7 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count,
 		gpio_unexport(gpios[i].gpio);
 #endif
 		free_irq(gpio_to_irq(gpios[i].gpio), &gpios[i]);
+		cancel_delayed_work_sync(&gpios[i].work);
 		gpio_free(gpios[i].gpio);
 		gpios[i].jack = NULL;
 	}
-- 
cgit v1.2.3


From a86ee03ce6f279ebe581a7a8c0c4393eaeb789ee Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:44 +0000
Subject: dma: add support for scatterlist to scatterlist copy

This adds support for scatterlist to scatterlist DMA transfers. A
similar interface is exposed by the fsldma driver (through the DMA_SLAVE
API) and by the ste_dma40 driver (through an exported function).

This patch paves the way for making this type of copy operation a part
of the generic DMAEngine API. Futher patches will add support in
individual drivers.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 2 ++
 include/linux/dmaengine.h | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9d31d5eb95c1..db403b8ccabd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -690,6 +690,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
+		!device->device_prep_dma_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e2106495cc11..2c9ee98f6c77 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -64,6 +64,7 @@ enum dma_transaction_type {
 	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_INTERRUPT,
+	DMA_SG,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
@@ -473,6 +474,11 @@ struct dma_device {
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags);
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
-- 
cgit v1.2.3


From 968f19ae802fdc6b6b6b5af6fe79cf23d281be0f Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 30 Sep 2010 11:46:46 +0000
Subject: fsldma: improved DMA_SLAVE support

Now that the generic DMAEngine API has support for scatterlist to
scatterlist copying, the device_prep_slave_sg() portion of the
DMA_SLAVE API is no longer necessary and has been removed.

However, the device_control() portion of the DMA_SLAVE API is still
useful to control device specific parameters, such as externally
controlled DMA transfers and maximum burst length.

A special dma_ctrl_cmd has been added to enable externally controlled
DMA transfers. This is currently specific to the Freescale DMA
controller, but can easily be made generic when another user is found.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/include/asm/fsldma.h | 137 -----------------------
 drivers/dma/fsldma.c              | 226 ++++++++------------------------------
 include/linux/dmaengine.h         |   3 +
 3 files changed, 47 insertions(+), 319 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/fsldma.h

(limited to 'include')

diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
deleted file mode 100644
index debc5ed96d6e..000000000000
--- a/arch/powerpc/include/asm/fsldma.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Freescale MPC83XX / MPC85XX DMA Controller
- *
- * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
-#define __ARCH_POWERPC_ASM_FSLDMA_H__
-
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-
-/*
- * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
- *
- * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
- * transfers. An example usage would be an accelerated copy between two
- * scatterlists. Another example use would be an accelerated copy from
- * multiple non-contiguous device buffers into a single scatterlist.
- *
- * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
- * structure contains a list of hardware addresses that should be copied
- * to/from the scatterlist passed into device_prep_slave_sg(). The structure
- * also has some fields to enable hardware-specific features.
- */
-
-/**
- * struct fsl_dma_hw_addr
- * @entry: linked list entry
- * @address: the hardware address
- * @length: length to transfer
- *
- * Holds a single physical hardware address / length pair for use
- * with the DMAEngine DMA_SLAVE API.
- */
-struct fsl_dma_hw_addr {
-	struct list_head entry;
-
-	dma_addr_t address;
-	size_t length;
-};
-
-/**
- * struct fsl_dma_slave
- * @addresses: a linked list of struct fsl_dma_hw_addr structures
- * @request_count: value for DMA request count
- * @src_loop_size: setup and enable constant source-address DMA transfers
- * @dst_loop_size: setup and enable constant destination address DMA transfers
- * @external_start: enable externally started DMA transfers
- * @external_pause: enable externally paused DMA transfers
- *
- * Holds a list of address / length pairs for use with the DMAEngine
- * DMA_SLAVE API implementation for the Freescale DMA controller.
- */
-struct fsl_dma_slave {
-
-	/* List of hardware address/length pairs */
-	struct list_head addresses;
-
-	/* Support for extra controller features */
-	unsigned int request_count;
-	unsigned int src_loop_size;
-	unsigned int dst_loop_size;
-	bool external_start;
-	bool external_pause;
-};
-
-/**
- * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
- * @slave: the &struct fsl_dma_slave to add to
- * @address: the hardware address to add
- * @length: the length of bytes to transfer from @address
- *
- * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
- * success, -ERRNO otherwise.
- */
-static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
-				       dma_addr_t address, size_t length)
-{
-	struct fsl_dma_hw_addr *addr;
-
-	addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
-	if (!addr)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&addr->entry);
-	addr->address = address;
-	addr->length = length;
-
-	list_add_tail(&addr->entry, &slave->addresses);
-	return 0;
-}
-
-/**
- * fsl_dma_slave_free - free a struct fsl_dma_slave
- * @slave: the struct fsl_dma_slave to free
- *
- * Free a struct fsl_dma_slave and all associated address/length pairs
- */
-static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
-{
-	struct fsl_dma_hw_addr *addr, *tmp;
-
-	if (slave) {
-		list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
-			list_del(&addr->entry);
-			kfree(addr);
-		}
-
-		kfree(slave);
-	}
-}
-
-/**
- * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
- * @gfp: the flags to pass to kmalloc when allocating this structure
- *
- * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
- * struct fsl_dma_slave on success, or NULL on failure.
- */
-static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
-{
-	struct fsl_dma_slave *slave;
-
-	slave = kzalloc(sizeof(*slave), gfp);
-	if (!slave)
-		return NULL;
-
-	INIT_LIST_HEAD(&slave->addresses);
-	return slave;
-}
-
-#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 1ed29d10a5fa..286c3ac6bdcc 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,7 +35,6 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
-#include <asm/fsldma.h>
 #include "fsldma.h"
 
 static const char msg_ld_oom[] = "No free memory for link descriptor\n";
@@ -719,207 +718,70 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
 	enum dma_data_direction direction, unsigned long flags)
 {
-	struct fsldma_chan *chan;
-	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
-	struct fsl_dma_slave *slave;
-	size_t copy;
-
-	int i;
-	struct scatterlist *sg;
-	size_t sg_used;
-	size_t hw_used;
-	struct fsl_dma_hw_addr *hw;
-	dma_addr_t dma_dst, dma_src;
-
-	if (!dchan)
-		return NULL;
-
-	if (!dchan->private)
-		return NULL;
-
-	chan = to_fsl_chan(dchan);
-	slave = dchan->private;
-
-	if (list_empty(&slave->addresses))
-		return NULL;
-
-	hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
-	hw_used = 0;
-
 	/*
-	 * Build the hardware transaction to copy from the scatterlist to
-	 * the hardware, or from the hardware to the scatterlist
+	 * This operation is not supported on the Freescale DMA controller
 	 *
-	 * If you are copying from the hardware to the scatterlist and it
-	 * takes two hardware entries to fill an entire page, then both
-	 * hardware entries will be coalesced into the same page
-	 *
-	 * If you are copying from the scatterlist to the hardware and a
-	 * single page can fill two hardware entries, then the data will
-	 * be read out of the page into the first hardware entry, and so on
+	 * However, we need to provide the function pointer to allow the
+	 * device_control() method to work.
 	 */
-	for_each_sg(sgl, sg, sg_len, i) {
-		sg_used = 0;
-
-		/* Loop until the entire scatterlist entry is used */
-		while (sg_used < sg_dma_len(sg)) {
-
-			/*
-			 * If we've used up the current hardware address/length
-			 * pair, we need to load a new one
-			 *
-			 * This is done in a while loop so that descriptors with
-			 * length == 0 will be skipped
-			 */
-			while (hw_used >= hw->length) {
-
-				/*
-				 * If the current hardware entry is the last
-				 * entry in the list, we're finished
-				 */
-				if (list_is_last(&hw->entry, &slave->addresses))
-					goto finished;
-
-				/* Get the next hardware address/length pair */
-				hw = list_entry(hw->entry.next,
-						struct fsl_dma_hw_addr, entry);
-				hw_used = 0;
-			}
-
-			/* Allocate the link descriptor from DMA pool */
-			new = fsl_dma_alloc_descriptor(chan);
-			if (!new) {
-				dev_err(chan->dev, "No free memory for "
-						       "link descriptor\n");
-				goto fail;
-			}
-#ifdef FSL_DMA_LD_DEBUG
-			dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
-
-			/*
-			 * Calculate the maximum number of bytes to transfer,
-			 * making sure it is less than the DMA controller limit
-			 */
-			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
-					     hw->length - hw_used);
-			copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
-
-			/*
-			 * DMA_FROM_DEVICE
-			 * from the hardware to the scatterlist
-			 *
-			 * DMA_TO_DEVICE
-			 * from the scatterlist to the hardware
-			 */
-			if (direction == DMA_FROM_DEVICE) {
-				dma_src = hw->address + hw_used;
-				dma_dst = sg_dma_address(sg) + sg_used;
-			} else {
-				dma_src = sg_dma_address(sg) + sg_used;
-				dma_dst = hw->address + hw_used;
-			}
-
-			/* Fill in the descriptor */
-			set_desc_cnt(chan, &new->hw, copy);
-			set_desc_src(chan, &new->hw, dma_src);
-			set_desc_dst(chan, &new->hw, dma_dst);
-
-			/*
-			 * If this is not the first descriptor, chain the
-			 * current descriptor after the previous descriptor
-			 */
-			if (!first) {
-				first = new;
-			} else {
-				set_desc_next(chan, &prev->hw,
-					      new->async_tx.phys);
-			}
-
-			new->async_tx.cookie = 0;
-			async_tx_ack(&new->async_tx);
-
-			prev = new;
-			sg_used += copy;
-			hw_used += copy;
-
-			/* Insert the link descriptor into the LD ring */
-			list_add_tail(&new->node, &first->tx_list);
-		}
-	}
-
-finished:
-
-	/* All of the hardware address/length pairs had length == 0 */
-	if (!first || !new)
-		return NULL;
-
-	new->async_tx.flags = flags;
-	new->async_tx.cookie = -EBUSY;
-
-	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(chan, new);
-
-	/* Enable extra controller features */
-	if (chan->set_src_loop_size)
-		chan->set_src_loop_size(chan, slave->src_loop_size);
-
-	if (chan->set_dst_loop_size)
-		chan->set_dst_loop_size(chan, slave->dst_loop_size);
-
-	if (chan->toggle_ext_start)
-		chan->toggle_ext_start(chan, slave->external_start);
-
-	if (chan->toggle_ext_pause)
-		chan->toggle_ext_pause(chan, slave->external_pause);
-
-	if (chan->set_request_count)
-		chan->set_request_count(chan, slave->request_count);
-
-	return &first->async_tx;
-
-fail:
-	/* If first was not set, then we failed to allocate the very first
-	 * descriptor, and we're done */
-	if (!first)
-		return NULL;
-
-	/*
-	 * First is set, so all of the descriptors we allocated have been added
-	 * to first->tx_list, INCLUDING "first" itself. Therefore we
-	 * must traverse the list backwards freeing each descriptor in turn
-	 *
-	 * We're re-using variables for the loop, oh well
-	 */
-	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
 static int fsl_dma_device_control(struct dma_chan *dchan,
 				  enum dma_ctrl_cmd cmd, unsigned long arg)
 {
+	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
 	unsigned long flags;
-
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
-		return -ENXIO;
+	int size;
 
 	if (!dchan)
 		return -EINVAL;
 
 	chan = to_fsl_chan(dchan);
 
-	/* Halt the DMA engine */
-	dma_halt(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* Halt the DMA engine */
+		dma_halt(chan);
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* Remove and free all of the descriptors in the LD queue */
-	fsldma_free_desc_list(chan, &chan->ld_pending);
-	fsldma_free_desc_list(chan, &chan->ld_running);
+		/* Remove and free all of the descriptors in the LD queue */
+		fsldma_free_desc_list(chan, &chan->ld_pending);
+		fsldma_free_desc_list(chan, &chan->ld_running);
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		return 0;
+
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+
+		/* make sure the channel supports setting burst size */
+		if (!chan->set_request_count)
+			return -ENXIO;
+
+		/* we set the controller burst size depending on direction */
+		if (config->direction == DMA_TO_DEVICE)
+			size = config->dst_addr_width * config->dst_maxburst;
+		else
+			size = config->src_addr_width * config->src_maxburst;
+
+		chan->set_request_count(chan, size);
+		return 0;
+
+	case FSLDMA_EXTERNAL_START:
+
+		/* make sure the channel supports external start */
+		if (!chan->toggle_ext_start)
+			return -ENXIO;
+
+		chan->toggle_ext_start(chan, arg);
+		return 0;
+
+	default:
+		return -ENXIO;
+	}
 
 	return 0;
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2c9ee98f6c77..885f35211675 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -120,12 +120,15 @@ enum dma_ctrl_flags {
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
+ * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
+ * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
+	FSLDMA_EXTERNAL_START,
 };
 
 /**
-- 
cgit v1.2.3


From 576e3c394a6c427c9a1378ec88ef7eb97e731992 Mon Sep 17 00:00:00 2001
From: Ramesh Babu K V <ramesh.b.k.v@intel.com>
Date: Mon, 4 Oct 2010 10:37:53 +0000
Subject: intel_mid_dma: Add sg list support to DMA driver

For a very high speed DMA various periphral devices need
scatter-gather list support. The DMA hardware support link list items.
This list can be circular also (adding new flag DMA_PREP_CIRCULAR_LIST)
Right now this flag is in driver header and should be moved to
dmaengine header file eventually

Signed-off-by: Ramesh Babu K V <ramesh.b.k.v@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c      | 267 ++++++++++++++++++++++++++++++++-------
 drivers/dma/intel_mid_dma_regs.h |  30 ++++-
 include/linux/intel_mid_dma.h    |   3 +
 3 files changed, 250 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 3c4333ee1fb7..2ae1086b9481 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -258,6 +258,7 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
 	/*write registers and en*/
 	iowrite32(first->sar, midc->ch_regs + SAR);
 	iowrite32(first->dar, midc->ch_regs + DAR);
+	iowrite32(first->lli_phys, midc->ch_regs + LLP);
 	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
 	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
 	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
@@ -265,9 +266,9 @@ static void midc_dostart(struct intel_mid_dma_chan *midc,
 	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
 		(int)first->sar, (int)first->dar, first->cfg_hi,
 		first->cfg_lo, first->ctl_hi, first->ctl_lo);
+	first->status = DMA_IN_PROGRESS;
 
 	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-	first->status = DMA_IN_PROGRESS;
 }
 
 /**
@@ -284,20 +285,36 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
 {
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
 	dma_async_tx_callback callback_txd = NULL;
+	struct intel_mid_dma_lli	*llitem;
 	void *param_txd = NULL;
 
 	midc->completed = txd->cookie;
 	callback_txd = txd->callback;
 	param_txd = txd->callback_param;
 
-	list_move(&desc->desc_node, &midc->free_list);
-	midc->busy = false;
+	if (desc->lli != NULL) {
+		/*clear the DONE bit of completed LLI in memory*/
+		llitem = desc->lli + desc->current_lli;
+		llitem->ctl_hi &= CLEAR_DONE;
+		if (desc->current_lli < desc->lli_length-1)
+			(desc->current_lli)++;
+		else
+			desc->current_lli = 0;
+	}
 	spin_unlock_bh(&midc->lock);
 	if (callback_txd) {
 		pr_debug("MDMA: TXD callback set ... calling\n");
 		callback_txd(param_txd);
-		spin_lock_bh(&midc->lock);
-		return;
+	}
+	if (midc->raw_tfr) {
+		desc->status = DMA_SUCCESS;
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+		}
+		list_move(&desc->desc_node, &midc->free_list);
+		midc->busy = false;
 	}
 	spin_lock_bh(&midc->lock);
 
@@ -318,14 +335,89 @@ static void midc_scan_descriptors(struct middma_device *mid,
 
 	/*tx is complete*/
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->status == DMA_IN_PROGRESS)  {
-			desc->status = DMA_SUCCESS;
+		if (desc->status == DMA_IN_PROGRESS)
 			midc_descriptor_complete(midc, desc);
-		}
 	}
 	return;
-}
+	}
+/**
+ * midc_lli_fill_sg -		Helper function to convert
+ *				SG list to Linked List Items.
+ *@midc: Channel
+ *@desc: DMA descriptor
+ *@sglist: Pointer to SG list
+ *@sglen: SG list length
+ *@flags: DMA transaction flags
+ *
+ * Walk through the SG list and convert the SG list into Linked
+ * List Items (LLI).
+ */
+static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
+				struct intel_mid_dma_desc *desc,
+				struct scatterlist *sglist,
+				unsigned int sglen,
+				unsigned int flags)
+{
+	struct intel_mid_dma_slave *mids;
+	struct scatterlist  *sg;
+	dma_addr_t lli_next, sg_phy_addr;
+	struct intel_mid_dma_lli *lli_bloc_desc;
+	union intel_mid_dma_ctl_lo ctl_lo;
+	union intel_mid_dma_ctl_hi ctl_hi;
+	int i;
+
+	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
+	mids = midc->chan.private;
+
+	lli_bloc_desc = desc->lli;
+	lli_next = desc->lli_phys;
 
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_hi.ctl_hi = desc->ctl_hi;
+	for_each_sg(sglist, sg, sglen, i) {
+		/*Populate CTL_LOW and LLI values*/
+		if (i != sglen - 1) {
+			lli_next = lli_next +
+				sizeof(struct intel_mid_dma_lli);
+		} else {
+		/*Check for circular list, otherwise terminate LLI to ZERO*/
+			if (flags & DMA_PREP_CIRCULAR_LIST) {
+				pr_debug("MDMA: LLI is configured in circular mode\n");
+				lli_next = desc->lli_phys;
+			} else {
+				lli_next = 0;
+				ctl_lo.ctlx.llp_dst_en = 0;
+				ctl_lo.ctlx.llp_src_en = 0;
+			}
+		}
+		/*Populate CTL_HI values*/
+		ctl_hi.ctlx.block_ts = get_block_ts(sg->length,
+							desc->width,
+							midc->dma->block_size);
+		/*Populate SAR and DAR values*/
+		sg_phy_addr = sg_phys(sg);
+		if (desc->dirn ==  DMA_TO_DEVICE) {
+			lli_bloc_desc->sar  = sg_phy_addr;
+			lli_bloc_desc->dar  = mids->per_addr;
+		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
+			lli_bloc_desc->sar  = mids->per_addr;
+			lli_bloc_desc->dar  = sg_phy_addr;
+		}
+		/*Copy values into block descriptor in system memroy*/
+		lli_bloc_desc->llp = lli_next;
+		lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
+		lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
+
+		lli_bloc_desc++;
+	}
+	/*Copy very first LLI values to descriptor*/
+	desc->ctl_lo = desc->lli->ctl_lo;
+	desc->ctl_hi = desc->lli->ctl_hi;
+	desc->sar = desc->lli->sar;
+	desc->dar = desc->lli->dar;
+
+	return 0;
+}
 /*****************************************************************************
 DMA engine callback Functions*/
 /**
@@ -350,12 +442,12 @@ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	desc->txd.cookie = cookie;
 
 
-	if (list_empty(&midc->active_list)) {
-		midc_dostart(midc, desc);
+	if (list_empty(&midc->active_list))
 		list_add_tail(&desc->desc_node, &midc->active_list);
-	} else {
+	else
 		list_add_tail(&desc->desc_node, &midc->queue);
-	}
+
+	midc_dostart(midc, desc);
 	spin_unlock_bh(&midc->lock);
 
 	return cookie;
@@ -429,7 +521,7 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
 	struct middma_device	*mid = to_middma_device(chan->device);
 	struct intel_mid_dma_desc	*desc, *_desc;
-	LIST_HEAD(list);
+	union intel_mid_dma_cfg_lo cfg_lo;
 
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
@@ -439,39 +531,29 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 		spin_unlock_bh(&midc->lock);
 		return 0;
 	}
-	list_splice_init(&midc->free_list, &list);
-	midc->descs_allocated = 0;
-	midc->slave = NULL;
-
+	/*Suspend and disable the channel*/
+	cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
+	cfg_lo.cfgx.ch_susp = 1;
+	iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
+	iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
+	midc->busy = false;
 	/* Disable interrupts */
 	disable_dma_interrupt(midc);
+	midc->descs_allocated = 0;
+	midc->slave = NULL;
 
 	spin_unlock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
-		pr_debug("MDMA: freeing descriptor %p\n", desc);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
+	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
+		if (desc->lli != NULL) {
+			pci_pool_free(desc->lli_pool, desc->lli,
+						desc->lli_phys);
+			pci_pool_destroy(desc->lli_pool);
+		}
+		list_move(&desc->desc_node, &midc->free_list);
 	}
 	return 0;
 }
 
-/**
- * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
- * @chan: chan for DMA transfer
- * @sgl: scatter gather list
- * @sg_len: length of sg txn
- * @direction: DMA transfer dirtn
- * @flags: DMA flags
- *
- * Do DMA sg txn: NOT supported now
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
-			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
-			unsigned long flags)
-{
-	/*not supported now*/
-	return NULL;
-}
 
 /**
  * intel_mid_dma_prep_memcpy -	Prep memcpy txn
@@ -553,6 +635,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 
 	/*calculate CTL_HI*/
 	ctl_hi.ctlx.reser = 0;
+	ctl_hi.ctlx.done  = 0;
 	width = mids->src_width;
 
 	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
@@ -599,6 +682,9 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	desc->ctl_hi = ctl_hi.ctl_hi;
 	desc->width = width;
 	desc->dirn = mids->dirn;
+	desc->lli_phys = 0;
+	desc->lli = NULL;
+	desc->lli_pool = NULL;
 	return &desc->txd;
 
 err_desc_get:
@@ -606,6 +692,85 @@ err_desc_get:
 	midc_desc_put(midc, desc);
 	return NULL;
 }
+/**
+ * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
+ * @chan: chan for DMA transfer
+ * @sgl: scatter gather list
+ * @sg_len: length of sg txn
+ * @direction: DMA transfer dirtn
+ * @flags: DMA flags
+ *
+ * Prepares LLI based periphral transfer
+ */
+static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
+			struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned long flags)
+{
+	struct intel_mid_dma_chan *midc = NULL;
+	struct intel_mid_dma_slave *mids = NULL;
+	struct intel_mid_dma_desc *desc = NULL;
+	struct dma_async_tx_descriptor *txd = NULL;
+	union intel_mid_dma_ctl_lo ctl_lo;
+
+	pr_debug("MDMA: Prep for slave SG\n");
+
+	if (!sg_len) {
+		pr_err("MDMA: Invalid SG length\n");
+		return NULL;
+	}
+	midc = to_intel_mid_dma_chan(chan);
+	BUG_ON(!midc);
+
+	mids = chan->private;
+	BUG_ON(!mids);
+
+	if (!midc->dma->pimr_mask) {
+		pr_debug("MDMA: SG list is not supported by this controller\n");
+		return  NULL;
+	}
+
+	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
+			sg_len, direction, flags);
+
+	txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sgl->length, flags);
+	if (NULL == txd) {
+		pr_err("MDMA: Prep memcpy failed\n");
+		return NULL;
+	}
+	desc = to_intel_mid_dma_desc(txd);
+	desc->dirn = direction;
+	ctl_lo.ctl_lo = desc->ctl_lo;
+	ctl_lo.ctlx.llp_dst_en = 1;
+	ctl_lo.ctlx.llp_src_en = 1;
+	desc->ctl_lo = ctl_lo.ctl_lo;
+	desc->lli_length = sg_len;
+	desc->current_lli = 0;
+	/* DMA coherent memory pool for LLI descriptors*/
+	desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
+				midc->dma->pdev,
+				(sizeof(struct intel_mid_dma_lli)*sg_len),
+				32, 0);
+	if (NULL == desc->lli_pool) {
+		pr_err("MID_DMA:LLI pool create failed\n");
+		return NULL;
+	}
+
+	desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
+	if (!desc->lli) {
+		pr_err("MID_DMA: LLI alloc failed\n");
+		pci_pool_destroy(desc->lli_pool);
+		return NULL;
+	}
+
+	midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
+	if (flags & DMA_PREP_INTERRUPT) {
+		iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				midc->dma_base + MASK_BLOCK);
+		pr_debug("MDMA:Enabled Block interrupt\n");
+	}
+	return &desc->txd;
+}
 
 /**
  * intel_mid_dma_free_chan_resources -	Frees dma resources
@@ -728,7 +893,7 @@ static void dma_tasklet(unsigned long data)
 {
 	struct middma_device *mid = NULL;
 	struct intel_mid_dma_chan *midc = NULL;
-	u32 status;
+	u32 status, raw_tfr, raw_block;
 	int i;
 
 	mid = (struct middma_device *)data;
@@ -737,8 +902,9 @@ static void dma_tasklet(unsigned long data)
 		return;
 	}
 	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
-	status = ioread32(mid->dma_base + RAW_TFR);
-	pr_debug("MDMA:RAW_TFR %x\n", status);
+	raw_tfr = ioread32(mid->dma_base + RAW_TFR);
+	raw_block = ioread32(mid->dma_base + RAW_BLOCK);
+	status = raw_tfr | raw_block;
 	status &= mid->intr_mask;
 	while (status) {
 		/*txn interrupt*/
@@ -754,15 +920,23 @@ static void dma_tasklet(unsigned long data)
 		}
 		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
 				status, midc->ch_id, i);
+		midc->raw_tfr = raw_tfr;
+		midc->raw_block = raw_block;
+		spin_lock_bh(&midc->lock);
 		/*clearing this interrupts first*/
 		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_BLOCK);
-
-		spin_lock_bh(&midc->lock);
+		if (raw_block) {
+			iowrite32((1 << midc->ch_id),
+				mid->dma_base + CLEAR_BLOCK);
+		}
 		midc_scan_descriptors(mid, midc);
 		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
 		iowrite32(UNMASK_INTR_REG(midc->ch_id),
 				mid->dma_base + MASK_TFR);
+		if (raw_block) {
+			iowrite32(UNMASK_INTR_REG(midc->ch_id),
+				mid->dma_base + MASK_BLOCK);
+		}
 		spin_unlock_bh(&midc->lock);
 	}
 
@@ -836,7 +1010,8 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 	tfr_status &= mid->intr_mask;
 	if (tfr_status) {
 		/*need to disable intr*/
-		iowrite32((tfr_status << 8), mid->dma_base + MASK_TFR);
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
+		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
 		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
 		call_tasklet = 1;
 	}
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index a12dd2572dc3..7a5ac56d1324 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -29,11 +29,12 @@
 #include <linux/dmapool.h>
 #include <linux/pci_ids.h>
 
-#define INTEL_MID_DMA_DRIVER_VERSION "1.0.6"
+#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
 
 #define	REG_BIT0		0x00000001
 #define	REG_BIT8		0x00000100
-
+#define INT_MASK_WE		0x8
+#define CLEAR_DONE		0xFFFFEFFF
 #define UNMASK_INTR_REG(chan_num) \
 	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
 #define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
@@ -41,6 +42,9 @@
 #define ENABLE_CHANNEL(chan_num) \
 	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
 
+#define DISABLE_CHANNEL(chan_num) \
+	(REG_BIT8 << chan_num)
+
 #define DESCS_PER_CHANNEL	16
 /*DMA Registers*/
 /*registers associated with channel programming*/
@@ -50,6 +54,7 @@
 /*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
 #define SAR			0x00 /* Source Address Register*/
 #define DAR			0x08 /* Destination Address Register*/
+#define LLP			0x10 /* Linked List Pointer Register*/
 #define CTL_LOW			0x18 /* Control Register*/
 #define CTL_HIGH		0x1C /* Control Register*/
 #define CFG_LOW			0x40 /* Configuration Register Low*/
@@ -112,8 +117,8 @@ union intel_mid_dma_ctl_lo {
 union intel_mid_dma_ctl_hi {
 	struct {
 		u32	block_ts:12;	/*block transfer size*/
-					/*configured by DMAC*/
-		u32	reser:20;
+		u32	done:1;		/*Done - updated by DMAC*/
+		u32	reser:19;	/*configured by DMAC*/
 	} ctlx;
 	u32	ctl_hi;
 
@@ -169,6 +174,8 @@ union intel_mid_dma_cfg_hi {
  * @dma: dma device struture pointer
  * @busy: bool representing if ch is busy (active txn) or not
  * @in_use: bool representing if ch is in use or not
+ * @raw_tfr: raw trf interrupt recieved
+ * @raw_block: raw block interrupt recieved
  */
 struct intel_mid_dma_chan {
 	struct dma_chan		chan;
@@ -185,6 +192,8 @@ struct intel_mid_dma_chan {
 	struct middma_device	*dma;
 	bool			busy;
 	bool			in_use;
+	u32			raw_tfr;
+	u32			raw_block;
 };
 
 static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
@@ -247,6 +256,11 @@ struct intel_mid_dma_desc {
 	u32				cfg_lo;
 	u32				ctl_lo;
 	u32				ctl_hi;
+	struct pci_pool			*lli_pool;
+	struct intel_mid_dma_lli	*lli;
+	dma_addr_t			lli_phys;
+	unsigned int			lli_length;
+	unsigned int			current_lli;
 	dma_addr_t			next;
 	enum dma_data_direction		dirn;
 	enum dma_status			status;
@@ -255,6 +269,14 @@ struct intel_mid_dma_desc {
 
 };
 
+struct intel_mid_dma_lli {
+	dma_addr_t			sar;
+	dma_addr_t			dar;
+	dma_addr_t			llp;
+	u32				ctl_lo;
+	u32				ctl_hi;
+} __attribute__ ((packed));
+
 static inline int test_ch_en(void __iomem *dma, u32 ch_no)
 {
 	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
index d9d08b6269b6..befe3fbd9e28 100644
--- a/include/linux/intel_mid_dma.h
+++ b/include/linux/intel_mid_dma.h
@@ -27,6 +27,7 @@
 
 #include <linux/dmaengine.h>
 
+#define DMA_PREP_CIRCULAR_LIST		(1 << 10)
 /*DMA transaction width, src and dstn width would be same
 The DMA length must be width aligned,
 for 32 bit width the length must be 32 bit (4bytes) aligned only*/
@@ -69,6 +70,7 @@ enum intel_mid_dma_msize {
  * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
  * @src_msize: Source DMA burst size
  * @dst_msize: Dst DMA burst size
+ * @per_addr: Periphral address
  * @device_instance: DMA peripheral device instance, we can have multiple
  *		peripheral device connected to single DMAC
  */
@@ -80,6 +82,7 @@ struct intel_mid_dma_slave {
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
 	enum intel_mid_dma_msize	src_msize; /*size if src burst*/
 	enum intel_mid_dma_msize	dst_msize; /*size of dst burst*/
+	dma_addr_t			per_addr; /*Peripheral address*/
 	unsigned int		device_instance; /*0, 1 for periphral instance*/
 };
 
-- 
cgit v1.2.3


From 20dd63900d238e17b122fe0c7376ff090867f528 Mon Sep 17 00:00:00 2001
From: "Koul, Vinod" <vinod.koul@intel.com>
Date: Mon, 4 Oct 2010 10:38:43 +0000
Subject: intel_mid_dma: change the slave interface

In 2.6.36 kernel, dma slave control command was introduced,
this patch changes the intel-mid-dma driver to this
new kernel slave interface

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/intel_mid_dma.c      | 66 ++++++++++++++++++++++++++--------------
 drivers/dma/intel_mid_dma_regs.h | 11 +++++--
 include/linux/intel_mid_dma.h    | 15 +--------
 3 files changed, 53 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index ef7ffb813fe9..338bc4eed1f3 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -92,13 +92,13 @@ static int get_block_ts(int len, int tx_width, int block_size)
 	int byte_width = 0, block_ts = 0;
 
 	switch (tx_width) {
-	case LNW_DMA_WIDTH_8BIT:
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
 		byte_width = 1;
 		break;
-	case LNW_DMA_WIDTH_16BIT:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
 		byte_width = 2;
 		break;
-	case LNW_DMA_WIDTH_32BIT:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
 	default:
 		byte_width = 4;
 		break;
@@ -367,7 +367,7 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 	int i;
 
 	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
-	mids = midc->chan.private;
+	mids = midc->mid_slave;
 
 	lli_bloc_desc = desc->lli;
 	lli_next = desc->lli_phys;
@@ -398,9 +398,9 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 		sg_phy_addr = sg_phys(sg);
 		if (desc->dirn ==  DMA_TO_DEVICE) {
 			lli_bloc_desc->sar  = sg_phy_addr;
-			lli_bloc_desc->dar  = mids->per_addr;
+			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
 		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
-			lli_bloc_desc->sar  = mids->per_addr;
+			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
 			lli_bloc_desc->dar  = sg_phy_addr;
 		}
 		/*Copy values into block descriptor in system memroy*/
@@ -507,6 +507,23 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
+static int dma_slave_control(struct dma_chan *chan, unsigned long arg)
+{
+	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
+	struct dma_slave_config  *slave = (struct dma_slave_config *)arg;
+	struct intel_mid_dma_slave *mid_slave;
+
+	BUG_ON(!midc);
+	BUG_ON(!slave);
+	pr_debug("MDMA: slave control called\n");
+
+	mid_slave = to_intel_mid_dma_slave(slave);
+
+	BUG_ON(!mid_slave);
+
+	midc->mid_slave = mid_slave;
+	return 0;
+}
 /**
  * intel_mid_dma_device_control -	DMA device control
  * @chan: chan for DMA control
@@ -523,6 +540,9 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	struct intel_mid_dma_desc	*desc, *_desc;
 	union intel_mid_dma_cfg_lo cfg_lo;
 
+	if (cmd == DMA_SLAVE_CONFIG)
+		return dma_slave_control(chan, arg);
+
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
 
@@ -540,7 +560,6 @@ static int intel_mid_dma_device_control(struct dma_chan *chan,
 	/* Disable interrupts */
 	disable_dma_interrupt(midc);
 	midc->descs_allocated = 0;
-	midc->slave = NULL;
 
 	spin_unlock_bh(&midc->lock);
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
@@ -578,23 +597,24 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	union intel_mid_dma_ctl_hi ctl_hi;
 	union intel_mid_dma_cfg_lo cfg_lo;
 	union intel_mid_dma_cfg_hi cfg_hi;
-	enum intel_mid_dma_width width = 0;
+	enum dma_slave_buswidth width;
 
 	pr_debug("MDMA: Prep for memcpy\n");
 	BUG_ON(!chan);
 	if (!len)
 		return NULL;
 
-	mids = chan->private;
-	BUG_ON(!mids);
-
 	midc = to_intel_mid_dma_chan(chan);
 	BUG_ON(!midc);
 
+	mids = midc->mid_slave;
+	BUG_ON(!mids);
+
 	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
 				midc->dma->pci_id, midc->ch_id, len);
 	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
-		mids->cfg_mode, mids->dirn, mids->hs_mode, mids->src_width);
+			mids->cfg_mode, mids->dma_slave.direction,
+			mids->hs_mode, mids->dma_slave.src_addr_width);
 
 	/*calculate CFG_LO*/
 	if (mids->hs_mode == LNW_DMA_SW_HS) {
@@ -613,13 +633,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		if (midc->dma->pimr_mask) {
 			cfg_hi.cfgx.protctl = 0x0; /*default value*/
 			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dirn == DMA_TO_DEVICE) {
+			if (mids->dma_slave.direction == DMA_TO_DEVICE) {
 				cfg_hi.cfgx.src_per = 0;
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.dst_per = 3;
 				if (mids->device_instance == 1)
 					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dirn == DMA_FROM_DEVICE) {
+			} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.src_per = 2;
 				if (mids->device_instance == 1)
@@ -636,7 +656,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	/*calculate CTL_HI*/
 	ctl_hi.ctlx.reser = 0;
 	ctl_hi.ctlx.done  = 0;
-	width = mids->src_width;
+	width = mids->dma_slave.src_addr_width;
 
 	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
 	pr_debug("MDMA:calc len %d for block size %d\n",
@@ -644,21 +664,21 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	/*calculate CTL_LO*/
 	ctl_lo.ctl_lo = 0;
 	ctl_lo.ctlx.int_en = 1;
-	ctl_lo.ctlx.dst_tr_width = mids->dst_width;
-	ctl_lo.ctlx.src_tr_width = mids->src_width;
-	ctl_lo.ctlx.dst_msize = mids->src_msize;
-	ctl_lo.ctlx.src_msize = mids->dst_msize;
+	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width;
+	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width;
+	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
+	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
 
 	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
 		ctl_lo.ctlx.tt_fc = 0;
 		ctl_lo.ctlx.sinc = 0;
 		ctl_lo.ctlx.dinc = 0;
 	} else {
-		if (mids->dirn == DMA_TO_DEVICE) {
+		if (mids->dma_slave.direction == DMA_TO_DEVICE) {
 			ctl_lo.ctlx.sinc = 0;
 			ctl_lo.ctlx.dinc = 2;
 			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dirn == DMA_FROM_DEVICE) {
+		} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
 			ctl_lo.ctlx.sinc = 2;
 			ctl_lo.ctlx.dinc = 0;
 			ctl_lo.ctlx.tt_fc = 2;
@@ -681,7 +701,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	desc->ctl_lo = ctl_lo.ctl_lo;
 	desc->ctl_hi = ctl_hi.ctl_hi;
 	desc->width = width;
-	desc->dirn = mids->dirn;
+	desc->dirn = mids->dma_slave.direction;
 	desc->lli_phys = 0;
 	desc->lli = NULL;
 	desc->lli_pool = NULL;
@@ -722,7 +742,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 	midc = to_intel_mid_dma_chan(chan);
 	BUG_ON(!midc);
 
-	mids = chan->private;
+	mids = midc->mid_slave;
 	BUG_ON(!mids);
 
 	if (!midc->dma->pimr_mask) {
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index 7a5ac56d1324..709fecbdde79 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -187,13 +187,13 @@ struct intel_mid_dma_chan {
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
-	struct intel_mid_dma_slave	*slave;
 	unsigned int		descs_allocated;
 	struct middma_device	*dma;
 	bool			busy;
 	bool			in_use;
 	u32			raw_tfr;
 	u32			raw_block;
+	struct intel_mid_dma_slave *mid_slave;
 };
 
 static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
@@ -264,7 +264,7 @@ struct intel_mid_dma_desc {
 	dma_addr_t			next;
 	enum dma_data_direction		dirn;
 	enum dma_status			status;
-	enum intel_mid_dma_width	width; /*width of DMA txn*/
+	enum dma_slave_buswidth		width; /*width of DMA txn*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
 
 };
@@ -289,6 +289,13 @@ static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
 	return container_of(txd, struct intel_mid_dma_desc, txd);
 }
 
+static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
+		(struct dma_slave_config *slave)
+{
+	return container_of(slave, struct intel_mid_dma_slave, dma_slave);
+}
+
+
 int dma_resume(struct pci_dev *pci);
 
 #endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
index befe3fbd9e28..10496bd24c5c 100644
--- a/include/linux/intel_mid_dma.h
+++ b/include/linux/intel_mid_dma.h
@@ -28,14 +28,6 @@
 #include <linux/dmaengine.h>
 
 #define DMA_PREP_CIRCULAR_LIST		(1 << 10)
-/*DMA transaction width, src and dstn width would be same
-The DMA length must be width aligned,
-for 32 bit width the length must be 32 bit (4bytes) aligned only*/
-enum intel_mid_dma_width {
-	LNW_DMA_WIDTH_8BIT = 0x0,
-	LNW_DMA_WIDTH_16BIT = 0x1,
-	LNW_DMA_WIDTH_32BIT = 0x2,
-};
 
 /*DMA mode configurations*/
 enum intel_mid_dma_mode {
@@ -75,15 +67,10 @@ enum intel_mid_dma_msize {
  *		peripheral device connected to single DMAC
  */
 struct intel_mid_dma_slave {
-	enum dma_data_direction		dirn;
-	enum intel_mid_dma_width	src_width; /*width of DMA src txn*/
-	enum intel_mid_dma_width	dst_width; /*width of DMA dst txn*/
 	enum intel_mid_dma_hs_mode	hs_mode;  /*handshaking*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-	enum intel_mid_dma_msize	src_msize; /*size if src burst*/
-	enum intel_mid_dma_msize	dst_msize; /*size of dst burst*/
-	dma_addr_t			per_addr; /*Peripheral address*/
 	unsigned int		device_instance; /*0, 1 for periphral instance*/
+	struct dma_slave_config		dma_slave;
 };
 
 #endif /*__INTEL_MID_DMA_H__*/
-- 
cgit v1.2.3


From 43ca910a9c90566308f39f51ac03a55f94a5f83c Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 15 Sep 2010 16:52:32 -0500
Subject: [SCSI] fc class: add fc host dev loss sysfs file

This adds a fc host dev loss sysfs file. Instead of
calling into the driver using the get_host_def_dev_loss_tmo
callback, we allow drivers to init the dev loss like is done
for other fc host params, and then the fc class will handle
updating the value if the user writes to the new sysfs file.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/scsi_transport_fc.c | 124 +++++++++++++++++++++++++++------------
 include/scsi/scsi_transport_fc.h |   7 +--
 2 files changed, 91 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 78486d540652..998c01be3234 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -52,6 +52,25 @@ static int fc_bsg_rportadd(struct Scsi_Host *, struct fc_rport *);
 static void fc_bsg_remove(struct request_queue *);
 static void fc_bsg_goose_queue(struct fc_rport *);
 
+/*
+ * Module Parameters
+ */
+
+/*
+ * dev_loss_tmo: the default number of seconds that the FC transport
+ *   should insulate the loss of a remote port.
+ *   The maximum will be capped by the value of SCSI_DEVICE_BLOCK_MAX_TIMEOUT.
+ */
+static unsigned int fc_dev_loss_tmo = 60;		/* seconds */
+
+module_param_named(dev_loss_tmo, fc_dev_loss_tmo, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(dev_loss_tmo,
+		 "Maximum number of seconds that the FC transport should"
+		 " insulate the loss of a remote port. Once this value is"
+		 " exceeded, the scsi target is removed. Value should be"
+		 " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT if"
+		 " fast_io_fail_tmo is not set.");
+
 /*
  * Redefine so that we can have same named attributes in the
  * sdev/starget/host objects.
@@ -408,6 +427,7 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 	if (!fc_host->work_q)
 		return -ENOMEM;
 
+	fc_host->dev_loss_tmo = fc_dev_loss_tmo;
 	snprintf(fc_host->devloss_work_q_name,
 		 sizeof(fc_host->devloss_work_q_name),
 		 "fc_dl_%d", shost->host_no);
@@ -461,25 +481,6 @@ static DECLARE_TRANSPORT_CLASS(fc_vport_class,
 			       NULL,
 			       NULL);
 
-/*
- * Module Parameters
- */
-
-/*
- * dev_loss_tmo: the default number of seconds that the FC transport
- *   should insulate the loss of a remote port.
- *   The maximum will be capped by the value of SCSI_DEVICE_BLOCK_MAX_TIMEOUT.
- */
-static unsigned int fc_dev_loss_tmo = 60;		/* seconds */
-
-module_param_named(dev_loss_tmo, fc_dev_loss_tmo, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(dev_loss_tmo,
-		 "Maximum number of seconds that the FC transport should"
-		 " insulate the loss of a remote port. Once this value is"
-		 " exceeded, the scsi target is removed. Value should be"
-		 " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT if"
-		 " fast_io_fail_tmo is not set.");
-
 /*
  * Netlink Infrastructure
  */
@@ -830,24 +831,32 @@ static FC_DEVICE_ATTR(rport, supported_classes, S_IRUGO,
 /*
  * dev_loss_tmo attribute
  */
-fc_rport_show_function(dev_loss_tmo, "%d\n", 20, )
-static ssize_t
-store_fc_rport_dev_loss_tmo(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
+static int fc_str_to_dev_loss(const char *buf, unsigned long *val)
+{
+	char *cp;
+
+	*val = simple_strtoul(buf, &cp, 0);
+	if ((*cp && (*cp != '\n')) || (*val < 0))
+		return -EINVAL;
+	/*
+	 * Check for overflow; dev_loss_tmo is u32
+	 */
+	if (*val > UINT_MAX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int fc_rport_set_dev_loss_tmo(struct fc_rport *rport,
+				     unsigned long val)
 {
-	unsigned long val;
-	struct fc_rport *rport = transport_class_to_rport(dev);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
-	char *cp;
+
 	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
 	    (rport->port_state == FC_PORTSTATE_DELETED) ||
 	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
 		return -EBUSY;
-	val = simple_strtoul(buf, &cp, 0);
-	if ((*cp && (*cp != '\n')) || (val < 0))
-		return -EINVAL;
-
 	/*
 	 * Check for overflow; dev_loss_tmo is u32
 	 */
@@ -863,6 +872,25 @@ store_fc_rport_dev_loss_tmo(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	i->f->set_rport_dev_loss_tmo(rport, val);
+	return 0;
+}
+
+fc_rport_show_function(dev_loss_tmo, "%d\n", 20, )
+static ssize_t
+store_fc_rport_dev_loss_tmo(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct fc_rport *rport = transport_class_to_rport(dev);
+	unsigned long val;
+	int rc;
+
+	rc = fc_str_to_dev_loss(buf, &val);
+	if (rc)
+		return rc;
+
+	rc = fc_rport_set_dev_loss_tmo(rport, val);
+	if (rc)
+		return rc;
 	return count;
 }
 static FC_DEVICE_ATTR(rport, dev_loss_tmo, S_IRUGO | S_IWUSR,
@@ -1608,8 +1636,35 @@ store_fc_private_host_issue_lip(struct device *dev,
 static FC_DEVICE_ATTR(host, issue_lip, S_IWUSR, NULL,
 			store_fc_private_host_issue_lip);
 
-fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20);
+static ssize_t
+store_fc_private_host_dev_loss_tmo(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = transport_class_to_shost(dev);
+	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
+	struct fc_rport *rport;
+	unsigned long val, flags;
+	int rc;
+
+	rc = fc_str_to_dev_loss(buf, &val);
+	if (rc)
+		return rc;
+
+	fc_host_dev_loss_tmo(shost) = val;
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_for_each_entry(rport, &fc_host->rports, peers)
+		fc_rport_set_dev_loss_tmo(rport, val);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+	return count;
+}
 
+fc_private_host_show_function(dev_loss_tmo, "%d\n", 20, );
+static FC_DEVICE_ATTR(host, dev_loss_tmo, S_IRUGO | S_IWUSR,
+		      show_fc_host_dev_loss_tmo,
+		      store_fc_private_host_dev_loss_tmo);
+
+fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20);
 
 /*
  * Host Statistics Management
@@ -2165,6 +2220,7 @@ fc_attach_transport(struct fc_function_template *ft)
 	SETUP_HOST_ATTRIBUTE_RW(system_hostname);
 
 	/* Transport-managed attributes */
+	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(dev_loss_tmo);
 	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
 	if (ft->issue_fc_host_lip)
 		SETUP_PRIVATE_HOST_ATTRIBUTE_RW(issue_lip);
@@ -2525,11 +2581,7 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 
 	rport->maxframe_size = -1;
 	rport->supported_classes = FC_COS_UNSPECIFIED;
-	if (fci->f->get_host_def_dev_loss_tmo) {
-		fci->f->get_host_def_dev_loss_tmo(shost);
-		rport->dev_loss_tmo = fc_host_def_dev_loss_tmo(shost);
-	} else
-		rport->dev_loss_tmo = fc_dev_loss_tmo;
+	rport->dev_loss_tmo = fc_host->dev_loss_tmo;
 	memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name));
 	memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name));
 	rport->port_id = ids->port_id;
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 9f98fca9b763..59816fe31e68 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -496,7 +496,7 @@ struct fc_host_attrs {
 	u64 fabric_name;
 	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
 	char system_hostname[FC_SYMBOLIC_NAME_SIZE];
-	u32 def_dev_loss_tmo;
+	u32 dev_loss_tmo;
 
 	/* Private (Transport-managed) Attributes */
 	enum fc_tgtid_binding_type  tgtid_bind_type;
@@ -581,8 +581,8 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name)
 #define fc_host_devloss_work_q(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
-#define fc_host_def_dev_loss_tmo(x) \
-	(((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo)
+#define fc_host_dev_loss_tmo(x) \
+	(((struct fc_host_attrs *)(x)->shost_data)->dev_loss_tmo)
 
 
 struct fc_bsg_buffer {
@@ -643,7 +643,6 @@ struct fc_function_template {
 	void	(*get_host_fabric_name)(struct Scsi_Host *);
 	void	(*get_host_symbolic_name)(struct Scsi_Host *);
 	void	(*set_host_system_hostname)(struct Scsi_Host *);
-	void	(*get_host_def_dev_loss_tmo)(struct Scsi_Host *);
 
 	struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *);
 	void	(*reset_fc_host_stats)(struct Scsi_Host *);
-- 
cgit v1.2.3


From 955a857e062642cd3ebe1dc7bb38c0f85d8f8f17 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Wed, 29 Sep 2010 15:41:49 -0400
Subject: NFS: new idmapper

This patch creates a new idmapper system that uses the request-key function to
place a call into userspace to map user and group ids to names.  The old
idmapper was single threaded, which prevented more than one request from running
at a single time.  This means that a user would have to wait for an upcall to
finish before accessing a cached result.

The upcall result is stored on a keyring of type id_resolver.  See the file
Documentation/filesystems/nfs/idmapper.txt for instructions.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
[Trond: fix up the return value of nfs_idmap_lookup_name and clean up code]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 Documentation/filesystems/nfs/00-INDEX     |   2 +
 Documentation/filesystems/nfs/idmapper.txt |  67 +++++++++
 fs/nfs/Kconfig                             |  11 ++
 fs/nfs/idmap.c                             | 211 ++++++++++++++++++++++++++++-
 fs/nfs/inode.c                             |   7 +
 fs/nfs/nfs4xdr.c                           |   4 +-
 fs/nfs/sysctl.c                            |   2 +
 include/linux/nfs_idmap.h                  |  31 ++++-
 8 files changed, 329 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/filesystems/nfs/idmapper.txt

(limited to 'include')

diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index 2f68cd688769..3225a5662114 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -14,3 +14,5 @@ nfsroot.txt
 	- short guide on setting up a diskless box with NFS root filesystem.
 rpc-cache.txt
 	- introduction to the caching mechanisms in the sunrpc layer.
+idmapper.txt
+	- information for configuring request-keys to be used by idmapper
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
new file mode 100644
index 000000000000..c3852041a21f
--- /dev/null
+++ b/Documentation/filesystems/nfs/idmapper.txt
@@ -0,0 +1,67 @@
+
+=========
+ID Mapper
+=========
+Id mapper is used by NFS to translate user and group ids into names, and to
+translate user and group names into ids.  Part of this translation involves
+performing an upcall to userspace to request the information.  Id mapper will
+user request-key to perform this upcall and cache the result.  The program
+/usr/sbin/nfs.upcall should be called by request-key, and will perform the
+translation and initialize a key with the resulting information.
+
+ NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
+ feature.
+
+===========
+Configuring
+===========
+The file /etc/request-key.conf will need to be modified so /sbin/request-key can
+direct the upcall.  The following line should be added:
+
+#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...
+#======	=======	===============	===============	===============================
+create	id_resolver	*	*		/usr/sbin/nfs.upcall %k %d 600
+
+This will direct all id_resolver requests to the program /usr/sbin/nfs.upcall.
+The last parameter, 600, defines how many seconds into the future the key will
+expire.  This parameter is optional for /usr/sbin/nfs.upcall.  When the timeout
+is not specified, nfs.upcall will default to 600 seconds.
+
+id mapper uses for key descriptions:
+	  uid:  Find the UID for the given user
+	  gid:  Find the GID for the given group
+	 user:  Find the user  name for the given UID
+	group:  Find the group name for the given GID
+
+You can handle any of these individually, rather than using the generic upcall
+program.  If you would like to use your own program for a uid lookup then you
+would edit your request-key.conf so it look similar to this:
+
+#OP	TYPE	DESCRIPTION	CALLOUT INFO	PROGRAM ARG1 ARG2 ARG3 ...
+#======	=======	===============	===============	===============================
+create	id_resolver	uid:*	*		/some/other/program  %k %d 600
+create	id_resolver	*	*		/usr/sbin/nfs.upcall %k %d 600
+
+Notice that the new line was added above the line for the generic program.
+request-key will find the first matching line and corresponding program.  In
+this case, /some/other/program will handle all uid lookups and
+/usr/sbin/nfs.upcall will handle gid, user, and group lookups.
+
+See <file:Documentation/keys-request-keys.txt> for more information about the
+request-key function.
+
+
+==========
+nfs.upcall
+==========
+nfs.upcall is designed to be called by request-key, and should not be run "by
+hand".  This program takes two arguments, a serialized key and a key
+description.  The serialized key is first converted into a key_serial_t, and
+then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
+
+The actual lookups are performed by functions found in nfsidmap.h.  nfs.upcall
+determines the correct function to call by looking at the first part of the
+description string.  For example, a uid lookup description will appear as
+"uid:user@domain".
+
+nfs.upcall will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad49d731..3f69752d6f18 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -116,3 +116,14 @@ config NFS_USE_KERNEL_DNS
 	select DNS_RESOLVER
 	select KEYS
 	default y
+
+config NFS_USE_NEW_IDMAPPER
+	bool "Use the new idmapper upcall routine"
+	depends on NFS_V4 && KEYS
+	help
+	  Say Y here if you want NFS to use the new idmapper upcall functions.
+	  You will need /sbin/request-key (usually provided by the keyutils
+	  package).  For details, read
+	  <file:Documentation/filesystems/nfs/idmapper.txt>.
+
+	  If you are unsure, say N.
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 21a84d45916f..dec47ed8b6b9 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,6 +34,212 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
+
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/nfs_idmap.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+
+#include <keys/user-type.h>
+
+#define NFS_UINT_MAXLEN 11
+
+const struct cred *id_resolver_cache;
+
+struct key_type key_type_id_resolver = {
+	.name		= "id_resolver",
+	.instantiate	= user_instantiate,
+	.match		= user_match,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= user_describe,
+	.read		= user_read,
+};
+
+int nfs_idmap_init(void)
+{
+	struct cred *cred;
+	struct key *keyring;
+	int ret = 0;
+
+	printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
+
+	cred = prepare_kernel_cred(NULL);
+	if (!cred)
+		return -ENOMEM;
+
+	keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
+			     (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			     KEY_USR_VIEW | KEY_USR_READ,
+			     KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(keyring)) {
+		ret = PTR_ERR(keyring);
+		goto failed_put_cred;
+	}
+
+	ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+	if (ret < 0)
+		goto failed_put_key;
+
+	ret = register_key_type(&key_type_id_resolver);
+	if (ret < 0)
+		goto failed_put_key;
+
+	cred->thread_keyring = keyring;
+	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+	id_resolver_cache = cred;
+	return 0;
+
+failed_put_key:
+	key_put(keyring);
+failed_put_cred:
+	put_cred(cred);
+	return ret;
+}
+
+void nfs_idmap_quit(void)
+{
+	key_revoke(id_resolver_cache->thread_keyring);
+	unregister_key_type(&key_type_id_resolver);
+	put_cred(id_resolver_cache);
+}
+
+/*
+ * Assemble the description to pass to request_key()
+ * This function will allocate a new string and update dest to point
+ * at it.  The caller is responsible for freeing dest.
+ *
+ * On error 0 is returned.  Otherwise, the length of dest is returned.
+ */
+static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
+				const char *type, size_t typelen, char **desc)
+{
+	char *cp;
+	size_t desclen = typelen + namelen + 2;
+
+	*desc = kmalloc(desclen, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	cp = *desc;
+	memcpy(cp, type, typelen);
+	cp += typelen;
+	*cp++ = ':';
+
+	memcpy(cp, name, namelen);
+	cp += namelen;
+	*cp = '\0';
+	return desclen;
+}
+
+static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
+		const char *type, void *data, size_t data_size)
+{
+	const struct cred *saved_cred;
+	struct key *rkey;
+	char *desc;
+	struct user_key_payload *payload;
+	ssize_t ret;
+
+	ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
+	if (ret <= 0)
+		goto out;
+
+	saved_cred = override_creds(id_resolver_cache);
+	rkey = request_key(&key_type_id_resolver, desc, "");
+	revert_creds(saved_cred);
+	kfree(desc);
+	if (IS_ERR(rkey)) {
+		ret = PTR_ERR(rkey);
+		goto out;
+	}
+
+	rcu_read_lock();
+	rkey->perm |= KEY_USR_VIEW;
+
+	ret = key_validate(rkey);
+	if (ret < 0)
+		goto out_up;
+
+	payload = rcu_dereference(rkey->payload.data);
+	if (IS_ERR_OR_NULL(payload)) {
+		ret = PTR_ERR(payload);
+		goto out_up;
+	}
+
+	ret = payload->datalen;
+	if (ret > 0 && ret <= data_size)
+		memcpy(data, payload->data, ret);
+	else
+		ret = -EINVAL;
+
+out_up:
+	rcu_read_unlock();
+	key_put(rkey);
+out:
+	return ret;
+}
+
+
+/* ID -> Name */
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
+{
+	char id_str[NFS_UINT_MAXLEN];
+	int id_len;
+	ssize_t ret;
+
+	id_len = snprintf(id_str, sizeof(id_str), "%u", id);
+	ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
+	if (ret < 0)
+		return -EINVAL;
+	return ret;
+}
+
+/* Name -> ID */
+static int nfs_idmap_lookup_id(const char *name, size_t namelen,
+				const char *type, __u32 *id)
+{
+	char id_str[NFS_UINT_MAXLEN];
+	long id_long;
+	ssize_t data_size;
+	int ret = 0;
+
+	data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
+	if (data_size <= 0) {
+		ret = -EINVAL;
+	} else {
+		ret = strict_strtol(id_str, 10, &id_long);
+		*id = (__u32)id_long;
+	}
+	return ret;
+}
+
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+	return nfs_idmap_lookup_id(name, namelen, "uid", uid);
+}
+
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
+{
+	return nfs_idmap_lookup_id(name, namelen, "gid", gid);
+}
+
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
+{
+	return nfs_idmap_lookup_name(uid, "user", buf, buflen);
+}
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
+{
+	return nfs_idmap_lookup_name(gid, "group", buf, buflen);
+}
+
+#else  /* CONFIG_NFS_USE_IDMAPPER not defined */
+
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
 }
 
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 18be041abd23..f2d2c801e0af 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1526,6 +1526,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_idmap_init();
+	if (err < 0)
+		goto out9;
+
 	err = nfs_dns_resolver_init();
 	if (err < 0)
 		goto out8;
@@ -1590,6 +1594,8 @@ out6:
 out7:
 	nfs_dns_resolver_destroy();
 out8:
+	nfs_idmap_quit();
+out9:
 	return err;
 }
 
@@ -1602,6 +1608,7 @@ static void __exit exit_nfs_fs(void)
 	nfs_destroy_nfspagecache();
 	nfs_fscache_unregister();
 	nfs_dns_resolver_destroy();
+	nfs_idmap_quit();
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 #endif
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3feace66b981..6ea5c9392fe4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -816,7 +816,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
 		if (owner_namelen < 0) {
 			dprintk("nfs: couldn't resolve uid %d to string\n",
 					iap->ia_uid);
@@ -828,7 +828,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
 		if (owner_grouplen < 0) {
 			dprintk("nfs: couldn't resolve gid %d to string\n",
 					iap->ia_gid);
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..978aaeb8a093 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
 		.extra1 = (int *)&nfs_set_port_min,
 		.extra2 = (int *)&nfs_set_port_max,
 	},
+#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
 	{
 		.procname = "idmap_cache_timeout",
 		.data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
 		.mode = 0644,
 		.proc_handler = proc_dointvec_jiffies,
 	},
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 #endif
 	{
 		.procname	= "nfs_mountpoint_timeout",
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 91a1c24e0cbf..e8352dc5afb5 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -66,13 +66,40 @@ struct idmap_msg {
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
 
+#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
+
+int nfs_idmap_init(void);
+void nfs_idmap_quit(void);
+
+static inline int nfs_idmap_new(struct nfs_client *clp)
+{
+	return 0;
+}
+
+static inline void nfs_idmap_delete(struct nfs_client *clp)
+{
+}
+
+#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
+
+static inline int nfs_idmap_init(void)
+{
+	return 0;
+}
+
+static inline void nfs_idmap_quit(void)
+{
+}
+
 int nfs_idmap_new(struct nfs_client *);
 void nfs_idmap_delete(struct nfs_client *);
 
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
+
 int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
 int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(struct nfs_client *, __u32, char *);
-int nfs_map_gid_to_group(struct nfs_client *, __u32, char *);
+int nfs_map_uid_to_name(struct nfs_client *, __u32, char *, size_t);
+int nfs_map_gid_to_group(struct nfs_client *, __u32, char *, size_t);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 5fc6d897fde352bad5db5767e7260741a8cdd9e9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 7 Oct 2010 16:44:50 -0700
Subject: async_tx: make async_tx channel switching opt-in

The majority of drivers in drivers/dma/ will never establish cross
channel operation chains and do not need the extra overhead in struct
dma_async_tx_descriptor.  Make channel switching opt-in by default.

Cc: Anatolij Gustschin <agust@denx.de>
Cc: Ira Snyder <iws@ovro.caltech.edu>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Cc: Saeed Bishara <saeed@marvell.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/Kconfig       | 7 +++++--
 drivers/dma/dmaengine.c   | 4 ++--
 include/linux/dmaengine.h | 8 ++++----
 3 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index ab28f6093414..79d1542f31c0 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -46,7 +46,7 @@ config INTEL_MID_DMAC
 
 	  If unsure, say N.
 
-config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+config ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	bool
 
 config AMBA_PL08X
@@ -62,7 +62,6 @@ config INTEL_IOATDMA
 	depends on PCI && X86
 	select DMA_ENGINE
 	select DCA
-	select ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	select ASYNC_TX_DISABLE_PQ_VAL_DMA
 	select ASYNC_TX_DISABLE_XOR_VAL_DMA
 	help
@@ -77,6 +76,7 @@ config INTEL_IOP_ADMA
 	tristate "Intel IOP ADMA support"
 	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
 	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
 	  Enable support for the Intel(R) IOP Series RAID engines.
 
@@ -101,6 +101,7 @@ config FSL_DMA
 	tristate "Freescale Elo and Elo Plus DMA support"
 	depends on FSL_SOC
 	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	---help---
 	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
 	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
@@ -117,6 +118,7 @@ config MV_XOR
 	bool "Marvell XOR engine support"
 	depends on PLAT_ORION
 	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	---help---
 	  Enable support for the Marvell XOR engine.
 
@@ -174,6 +176,7 @@ config AMCC_PPC440SPE_ADMA
 	depends on 440SPe || 440SP
 	select DMA_ENGINE
 	select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
 	  Enable support for the AMCC PPC440SPe RAID engines.
 
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 235153cd7ac5..8bcb15fb959d 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -706,7 +706,7 @@ int dma_async_device_register(struct dma_device *device)
 	BUG_ON(!device->dev);
 
 	/* note: this only matters in the
-	 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
 	 */
 	if (device_has_all_tx_types(device))
 		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
@@ -980,7 +980,7 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan)
 {
 	tx->chan = chan;
-	#ifndef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	#ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	spin_lock_init(&tx->lock);
 	#endif
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3934ebdd85c2..9d8688b92d8b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -321,14 +321,14 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
 	void *callback_param;
-#ifndef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	struct dma_async_tx_descriptor *next;
 	struct dma_async_tx_descriptor *parent;
 	spinlock_t lock;
 #endif
 };
 
-#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 static inline void txd_lock(struct dma_async_tx_descriptor *txd)
 {
 }
@@ -656,11 +656,11 @@ static inline void net_dmaengine_put(void)
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
-#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 #define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
 #else
 #define async_dma_find_channel(type) dma_find_channel(type)
-#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
+#endif /* CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH */
 #else
 static inline void async_dmaengine_get(void)
 {
-- 
cgit v1.2.3


From b8aeec34175fc8fe8b0d40efea4846dfc1ba663e Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Thu, 7 Oct 2010 15:31:31 +0900
Subject: HWPOISON/signalfd: add support for addr_lsb

Similar change as to signal delivery: copy out the si_addr_lsb field
to user space in signalfd

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/signalfd.c            | 10 ++++++++++
 include/linux/signalfd.h |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 1c5a6add779d..bdd4496ae67f 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -98,6 +98,16 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		err |= __put_user((long) kinfo->si_addr, &uinfo->ssi_addr);
 #ifdef __ARCH_SI_TRAPNO
 		err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
+#endif
+#ifdef BUS_MCEERR_AO
+		/* 
+		 * Other callers might not initialize the si_lsb field,
+		 * so check explicitly for the right codes here.
+		 */
+		if (kinfo->si_code == BUS_MCEERR_AR ||
+		    kinfo->si_code == BUS_MCEERR_AO)
+			err |= __put_user((short) kinfo->si_addr_lsb,
+					  &uinfo->ssi_addr_lsb);
 #endif
 		break;
 	case __SI_CHLD:
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index b363b916c909..3ff4961da9b5 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -33,6 +33,7 @@ struct signalfd_siginfo {
 	__u64 ssi_utime;
 	__u64 ssi_stime;
 	__u64 ssi_addr;
+	__u16 ssi_addr_lsb;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
@@ -43,7 +44,7 @@ struct signalfd_siginfo {
 	 * comes out of a read(2) and we really don't want to have
 	 * a compat on read(2).
 	 */
-	__u8 __pad[48];
+	__u8 __pad[46];
 };
 
 
-- 
cgit v1.2.3


From bf50bab2b34483316162443587b8467952e07730 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 8 Sep 2010 10:19:33 +0900
Subject: hugetlb: add allocate function for hugepage migration

We can't use existing hugepage allocation functions to allocate hugepage
for page migration, because page migration can happen asynchronously with
the running processes and page migration users should call the allocation
function with physical addresses (not virtual addresses) as arguments.

ChangeLog since v3:
- unify alloc_buddy_huge_page() and alloc_buddy_huge_page_node()

ChangeLog since v2:
- remove unnecessary get/put_mems_allowed() (thanks to David Rientjes)

ChangeLog since v1:
- add comment on top of alloc_huge_page_no_vma()

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/hugetlb.h |  3 ++
 mm/hugetlb.c            | 79 +++++++++++++++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f479700df61b..0b73c536afd2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -228,6 +228,8 @@ struct huge_bootmem_page {
 	struct hstate *hstate;
 };
 
+struct page *alloc_huge_page_node(struct hstate *h, int nid);
+
 /* arch callback */
 int __init alloc_bootmem_huge_page(struct hstate *h);
 
@@ -303,6 +305,7 @@ static inline struct hstate *page_hstate(struct page *page)
 
 #else
 struct hstate {};
+#define alloc_huge_page_node(h, nid) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_vma(v) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bd031a4c738e..83fa0c3b6e2b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -466,11 +466,23 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
+static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+{
+	struct page *page;
+
+	if (list_empty(&h->hugepage_freelists[nid]))
+		return NULL;
+	page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
+	list_del(&page->lru);
+	h->free_huge_pages--;
+	h->free_huge_pages_node[nid]--;
+	return page;
+}
+
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
 {
-	int nid;
 	struct page *page = NULL;
 	struct mempolicy *mpol;
 	nodemask_t *nodemask;
@@ -496,19 +508,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
-		nid = zone_to_nid(zone);
-		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
-		    !list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
-
-			if (!avoid_reserve)
-				decrement_hugepage_resv_vma(h, vma);
-
-			break;
+		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
+			page = dequeue_huge_page_node(h, zone_to_nid(zone));
+			if (page) {
+				if (!avoid_reserve)
+					decrement_hugepage_resv_vma(h, vma);
+				break;
+			}
 		}
 	}
 err:
@@ -770,11 +776,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 	return ret;
 }
 
-static struct page *alloc_buddy_huge_page(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long address)
+static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 {
 	struct page *page;
-	unsigned int nid;
+	unsigned int r_nid;
 
 	if (h->order >= MAX_ORDER)
 		return NULL;
@@ -812,9 +817,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 	}
 	spin_unlock(&hugetlb_lock);
 
-	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
-					__GFP_REPEAT|__GFP_NOWARN,
-					huge_page_order(h));
+	if (nid == NUMA_NO_NODE)
+		page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+				   __GFP_REPEAT|__GFP_NOWARN,
+				   huge_page_order(h));
+	else
+		page = alloc_pages_exact_node(nid,
+			htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+			__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
 
 	if (page && arch_prepare_hugepage(page)) {
 		__free_pages(page, huge_page_order(h));
@@ -829,13 +839,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 		 */
 		put_page_testzero(page);
 		VM_BUG_ON(page_count(page));
-		nid = page_to_nid(page);
+		r_nid = page_to_nid(page);
 		set_compound_page_dtor(page, free_huge_page);
 		/*
 		 * We incremented the global counters already
 		 */
-		h->nr_huge_pages_node[nid]++;
-		h->surplus_huge_pages_node[nid]++;
+		h->nr_huge_pages_node[r_nid]++;
+		h->surplus_huge_pages_node[r_nid]++;
 		__count_vm_event(HTLB_BUDDY_PGALLOC);
 	} else {
 		h->nr_huge_pages--;
@@ -847,6 +857,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 	return page;
 }
 
+/*
+ * This allocation function is useful in the context where vma is irrelevant.
+ * E.g. soft-offlining uses this function because it only cares physical
+ * address of error page.
+ */
+struct page *alloc_huge_page_node(struct hstate *h, int nid)
+{
+	struct page *page;
+
+	spin_lock(&hugetlb_lock);
+	page = dequeue_huge_page_node(h, nid);
+	spin_unlock(&hugetlb_lock);
+
+	if (!page)
+		page = alloc_buddy_huge_page(h, nid);
+
+	return page;
+}
+
 /*
  * Increase the hugetlb pool such that it can accomodate a reservation
  * of size 'delta'.
@@ -871,7 +900,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = alloc_buddy_huge_page(h, NULL, 0);
+		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
 		if (!page) {
 			/*
 			 * We were not able to allocate enough pages to
@@ -1052,7 +1081,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
-		page = alloc_buddy_huge_page(h, vma, addr);
+		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
 		if (!page) {
 			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_SIGBUS);
-- 
cgit v1.2.3


From 0ebabb416f585ace711769057422af4bbc9d1110 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 8 Sep 2010 10:19:34 +0900
Subject: hugetlb: redefine hugepage copy functions

This patch modifies hugepage copy functions to have only destination
and source hugepages as arguments for later use.
The old ones are renamed from copy_{gigantic,huge}_page() to
copy_user_{gigantic,huge}_page().
This naming convention is consistent with that between copy_highpage()
and copy_user_highpage().

ChangeLog since v4:
- add blank line between local declaration and code
- remove unnecessary might_sleep()

ChangeLog since v2:
- change copy_huge_page() from macro to inline dummy function
  to avoid compile warning when !CONFIG_HUGETLB_PAGE.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/hugetlb.h |  4 ++++
 mm/hugetlb.c            | 45 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 44 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0b73c536afd2..9e51f77d44ca 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -44,6 +44,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						int acctflags);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 void __isolate_hwpoisoned_huge_page(struct page *page);
+void copy_huge_page(struct page *dst, struct page *src);
 
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -102,6 +103,9 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address)	0
 #define __isolate_hwpoisoned_huge_page(page)	0
+static inline void copy_huge_page(struct page *dst, struct page *src)
+{
+}
 
 #define hugetlb_change_protection(vma, address, end, newprot)
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 83fa0c3b6e2b..a73dbdcb89eb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page,
 	}
 }
 
-static void copy_gigantic_page(struct page *dst, struct page *src,
+static void copy_user_gigantic_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
 	struct hstate *h = hstate_vma(vma);
 	struct page *dst_base = dst;
 	struct page *src_base = src;
-	might_sleep();
+
 	for (i = 0; i < pages_per_huge_page(h); ) {
 		cond_resched();
 		copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
@@ -440,14 +440,15 @@ static void copy_gigantic_page(struct page *dst, struct page *src,
 		src = mem_map_next(src, src_base, i);
 	}
 }
-static void copy_huge_page(struct page *dst, struct page *src,
+
+static void copy_user_huge_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
 	struct hstate *h = hstate_vma(vma);
 
 	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-		copy_gigantic_page(dst, src, addr, vma);
+		copy_user_gigantic_page(dst, src, addr, vma);
 		return;
 	}
 
@@ -458,6 +459,40 @@ static void copy_huge_page(struct page *dst, struct page *src,
 	}
 }
 
+static void copy_gigantic_page(struct page *dst, struct page *src)
+{
+	int i;
+	struct hstate *h = page_hstate(src);
+	struct page *dst_base = dst;
+	struct page *src_base = src;
+
+	for (i = 0; i < pages_per_huge_page(h); ) {
+		cond_resched();
+		copy_highpage(dst, src);
+
+		i++;
+		dst = mem_map_next(dst, dst_base, i);
+		src = mem_map_next(src, src_base, i);
+	}
+}
+
+void copy_huge_page(struct page *dst, struct page *src)
+{
+	int i;
+	struct hstate *h = page_hstate(src);
+
+	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
+		copy_gigantic_page(dst, src);
+		return;
+	}
+
+	might_sleep();
+	for (i = 0; i < pages_per_huge_page(h); i++) {
+		cond_resched();
+		copy_highpage(dst + i, src + i);
+	}
+}
+
 static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
 	int nid = page_to_nid(page);
@@ -2412,7 +2447,7 @@ retry_avoidcopy:
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
 
-	copy_huge_page(new_page, old_page, address, vma);
+	copy_user_huge_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
 	/*
-- 
cgit v1.2.3


From 290408d4a25002f099efeee7b6a5778d431154d6 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 8 Sep 2010 10:19:35 +0900
Subject: hugetlb: hugepage migration core

This patch extends page migration code to support hugepage migration.
One of the potential users of this feature is soft offlining which
is triggered by memory corrected errors (added by the next patch.)

Todo:
- there are other users of page migration such as memory policy,
  memory hotplug and memocy compaction.
  They are not ready for hugepage support for now.

ChangeLog since v4:
- define migrate_huge_pages()
- remove changes on isolation/putback_lru_page()

ChangeLog since v2:
- refactor isolate/putback_lru_page() to handle hugepage
- add comment about race on unmap_and_move_huge_page()

ChangeLog since v1:
- divide migration code path for hugepage
- define routine checking migration swap entry for hugetlb
- replace "goto" with "if/else" in remove_migration_pte()

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/hugetlbfs/inode.c    |  15 ++++
 include/linux/migrate.h |  16 ++++
 mm/hugetlb.c            |  18 +++-
 mm/migrate.c            | 232 ++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 262 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e5bd42f3860..1f7ca505d48e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,6 +31,7 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/magic.h>
+#include <linux/migrate.h>
 
 #include <asm/uaccess.h>
 
@@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 	return 0;
 }
 
+static int hugetlbfs_migrate_page(struct address_space *mapping,
+				struct page *newpage, struct page *page)
+{
+	int rc;
+
+	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
+	if (rc)
+		return rc;
+	migrate_page_copy(newpage, page);
+
+	return 0;
+}
+
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 	.write_begin	= hugetlbfs_write_begin,
 	.write_end	= hugetlbfs_write_end,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
+	.migratepage    = hugetlbfs_migrate_page,
 };
 
 
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7238231b8dd4..3c1941e40e61 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, int offlining);
+extern int migrate_huge_pages(struct list_head *l, new_page_t x,
+			unsigned long private, int offlining);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -23,12 +25,17 @@ extern int migrate_prep_local(void);
 extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
+extern void migrate_page_copy(struct page *newpage, struct page *page);
+extern int migrate_huge_page_move_mapping(struct address_space *mapping,
+				  struct page *newpage, struct page *page);
 #else
 #define PAGE_MIGRATION 0
 
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, int offlining) { return -ENOSYS; }
+static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
+		unsigned long private, int offlining) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
@@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm,
 	return -ENOSYS;
 }
 
+static inline void migrate_page_copy(struct page *newpage,
+				     struct page *page) {}
+
+extern int migrate_huge_page_move_mapping(struct address_space *mapping,
+				  struct page *newpage, struct page *page)
+{
+	return -ENOSYS;
+}
+
 /* Possible settings for the migrate_page() method in address_operations */
 #define migrate_page NULL
 #define fail_migrate_page NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a73dbdcb89eb..0fa9de8361bd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2217,6 +2217,19 @@ nomem:
 	return -ENOMEM;
 }
 
+static int is_hugetlb_entry_migration(pte_t pte)
+{
+	swp_entry_t swp;
+
+	if (huge_pte_none(pte) || pte_present(pte))
+		return 0;
+	swp = pte_to_swp_entry(pte);
+	if (non_swap_entry(swp) && is_migration_entry(swp)) {
+		return 1;
+	} else
+		return 0;
+}
+
 static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 {
 	swp_entry_t swp;
@@ -2648,7 +2661,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	ptep = huge_pte_offset(mm, address);
 	if (ptep) {
 		entry = huge_ptep_get(ptep);
-		if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+		if (unlikely(is_hugetlb_entry_migration(entry))) {
+			migration_entry_wait(mm, (pmd_t *)ptep, address);
+			return 0;
+		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 38e7cad782f4..55dbc45880c6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/memcontrol.h>
 #include <linux/syscalls.h>
+#include <linux/hugetlb.h>
 #include <linux/gfp.h>
 
 #include "internal.h"
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	pte_t *ptep, pte;
  	spinlock_t *ptl;
 
- 	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
-		goto out;
+	if (unlikely(PageHuge(new))) {
+		ptep = huge_pte_offset(mm, addr);
+		if (!ptep)
+			goto out;
+		ptl = &mm->page_table_lock;
+	} else {
+		pgd = pgd_offset(mm, addr);
+		if (!pgd_present(*pgd))
+			goto out;
 
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		goto out;
+		pud = pud_offset(pgd, addr);
+		if (!pud_present(*pud))
+			goto out;
 
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		goto out;
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_present(*pmd))
+			goto out;
 
-	ptep = pte_offset_map(pmd, addr);
+		ptep = pte_offset_map(pmd, addr);
 
-	if (!is_swap_pte(*ptep)) {
-		pte_unmap(ptep);
-		goto out;
- 	}
+		if (!is_swap_pte(*ptep)) {
+			pte_unmap(ptep);
+			goto out;
+		}
+
+		ptl = pte_lockptr(mm, pmd);
+	}
 
- 	ptl = pte_lockptr(mm, pmd);
  	spin_lock(ptl);
 	pte = *ptep;
 	if (!is_swap_pte(pte))
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (is_write_migration_entry(entry))
 		pte = pte_mkwrite(pte);
+	if (PageHuge(new))
+		pte = pte_mkhuge(pte);
 	flush_cache_page(vma, addr, pte_pfn(pte));
 	set_pte_at(mm, addr, ptep, pte);
 
-	if (PageAnon(new))
+	if (PageHuge(new)) {
+		if (PageAnon(new))
+			hugepage_add_anon_rmap(new, vma, addr);
+		else
+			page_dup_rmap(new);
+	} else if (PageAnon(new))
 		page_add_anon_rmap(new, vma, addr);
 	else
 		page_add_file_rmap(new);
@@ -275,12 +291,60 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	return 0;
 }
 
+/*
+ * The expected number of remaining references is the same as that
+ * of migrate_page_move_mapping().
+ */
+int migrate_huge_page_move_mapping(struct address_space *mapping,
+				   struct page *newpage, struct page *page)
+{
+	int expected_count;
+	void **pslot;
+
+	if (!mapping) {
+		if (page_count(page) != 1)
+			return -EAGAIN;
+		return 0;
+	}
+
+	spin_lock_irq(&mapping->tree_lock);
+
+	pslot = radix_tree_lookup_slot(&mapping->page_tree,
+					page_index(page));
+
+	expected_count = 2 + page_has_private(page);
+	if (page_count(page) != expected_count ||
+	    (struct page *)radix_tree_deref_slot(pslot) != page) {
+		spin_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
+	if (!page_freeze_refs(page, expected_count)) {
+		spin_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
+	get_page(newpage);
+
+	radix_tree_replace_slot(pslot, newpage);
+
+	page_unfreeze_refs(page, expected_count);
+
+	__put_page(page);
+
+	spin_unlock_irq(&mapping->tree_lock);
+	return 0;
+}
+
 /*
  * Copy the page to its new location
  */
-static void migrate_page_copy(struct page *newpage, struct page *page)
+void migrate_page_copy(struct page *newpage, struct page *page)
 {
-	copy_highpage(newpage, page);
+	if (PageHuge(page))
+		copy_huge_page(newpage, page);
+	else
+		copy_highpage(newpage, page);
 
 	if (PageError(page))
 		SetPageError(newpage);
@@ -723,6 +787,92 @@ move_newpage:
 	return rc;
 }
 
+/*
+ * Counterpart of unmap_and_move_page() for hugepage migration.
+ *
+ * This function doesn't wait the completion of hugepage I/O
+ * because there is no race between I/O and migration for hugepage.
+ * Note that currently hugepage I/O occurs only in direct I/O
+ * where no lock is held and PG_writeback is irrelevant,
+ * and writeback status of all subpages are counted in the reference
+ * count of the head page (i.e. if all subpages of a 2MB hugepage are
+ * under direct I/O, the reference of the head page is 512 and a bit more.)
+ * This means that when we try to migrate hugepage whose subpages are
+ * doing direct I/O, some references remain after try_to_unmap() and
+ * hugepage migration fails without data corruption.
+ *
+ * There is also no race when direct I/O is issued on the page under migration,
+ * because then pte is replaced with migration swap entry and direct I/O code
+ * will wait in the page fault for migration to complete.
+ */
+static int unmap_and_move_huge_page(new_page_t get_new_page,
+				unsigned long private, struct page *hpage,
+				int force, int offlining)
+{
+	int rc = 0;
+	int *result = NULL;
+	struct page *new_hpage = get_new_page(hpage, private, &result);
+	int rcu_locked = 0;
+	struct anon_vma *anon_vma = NULL;
+
+	if (!new_hpage)
+		return -ENOMEM;
+
+	rc = -EAGAIN;
+
+	if (!trylock_page(hpage)) {
+		if (!force)
+			goto out;
+		lock_page(hpage);
+	}
+
+	if (PageAnon(hpage)) {
+		rcu_read_lock();
+		rcu_locked = 1;
+
+		if (page_mapped(hpage)) {
+			anon_vma = page_anon_vma(hpage);
+			atomic_inc(&anon_vma->external_refcount);
+		}
+	}
+
+	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+
+	if (!page_mapped(hpage))
+		rc = move_to_new_page(new_hpage, hpage, 1);
+
+	if (rc)
+		remove_migration_ptes(hpage, hpage);
+
+	if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
+					    &anon_vma->lock)) {
+		int empty = list_empty(&anon_vma->head);
+		spin_unlock(&anon_vma->lock);
+		if (empty)
+			anon_vma_free(anon_vma);
+	}
+
+	if (rcu_locked)
+		rcu_read_unlock();
+out:
+	unlock_page(hpage);
+
+	if (rc != -EAGAIN) {
+		list_del(&hpage->lru);
+		put_page(hpage);
+	}
+
+	put_page(new_hpage);
+
+	if (result) {
+		if (rc)
+			*result = rc;
+		else
+			*result = page_to_nid(new_hpage);
+	}
+	return rc;
+}
+
 /*
  * migrate_pages
  *
@@ -788,6 +938,52 @@ out:
 	return nr_failed + retry;
 }
 
+int migrate_huge_pages(struct list_head *from,
+		new_page_t get_new_page, unsigned long private, int offlining)
+{
+	int retry = 1;
+	int nr_failed = 0;
+	int pass = 0;
+	struct page *page;
+	struct page *page2;
+	int rc;
+
+	for (pass = 0; pass < 10 && retry; pass++) {
+		retry = 0;
+
+		list_for_each_entry_safe(page, page2, from, lru) {
+			cond_resched();
+
+			rc = unmap_and_move_huge_page(get_new_page,
+					private, page, pass > 2, offlining);
+
+			switch(rc) {
+			case -ENOMEM:
+				goto out;
+			case -EAGAIN:
+				retry++;
+				break;
+			case 0:
+				break;
+			default:
+				/* Permanent failure */
+				nr_failed++;
+				break;
+			}
+		}
+	}
+	rc = 0;
+out:
+
+	list_for_each_entry_safe(page, page2, from, lru)
+		put_page(page);
+
+	if (rc)
+		return rc;
+
+	return nr_failed + retry;
+}
+
 #ifdef CONFIG_NUMA
 /*
  * Move a list of individual pages
-- 
cgit v1.2.3


From 6de2b1aab94355482bd2accdc115666509667458 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 8 Sep 2010 10:19:36 +0900
Subject: HWPOISON, hugetlb: add free check to dequeue_hwpoison_huge_page()

This check is necessary to avoid race between dequeue and allocation,
which can cause a free hugepage to be dequeued twice and get kernel unstable.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/hugetlb.h |  4 ++--
 mm/hugetlb.c            | 29 +++++++++++++++++++++++++----
 mm/memory-failure.c     |  6 ++++--
 3 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9e51f77d44ca..796f30e00806 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -43,7 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						int acctflags);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
-void __isolate_hwpoisoned_huge_page(struct page *page);
+int dequeue_hwpoisoned_huge_page(struct page *page);
 void copy_huge_page(struct page *dst, struct page *src);
 
 extern unsigned long hugepages_treat_as_movable;
@@ -102,7 +102,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address)	0
-#define __isolate_hwpoisoned_huge_page(page)	0
+#define dequeue_hwpoisoned_huge_page(page)	0
 static inline void copy_huge_page(struct page *dst, struct page *src)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0fa9de8361bd..deb7bebefe68 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2955,18 +2955,39 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	hugetlb_acct_memory(h, -(chg - freed));
 }
 
+/* Should be called in hugetlb_lock */
+static int is_hugepage_on_freelist(struct page *hpage)
+{
+	struct page *page;
+	struct page *tmp;
+	struct hstate *h = page_hstate(hpage);
+	int nid = page_to_nid(hpage);
+
+	list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
+		if (page == hpage)
+			return 1;
+	return 0;
+}
+
+#ifdef CONFIG_MEMORY_FAILURE
 /*
  * This function is called from memory failure code.
  * Assume the caller holds page lock of the head page.
  */
-void __isolate_hwpoisoned_huge_page(struct page *hpage)
+int dequeue_hwpoisoned_huge_page(struct page *hpage)
 {
 	struct hstate *h = page_hstate(hpage);
 	int nid = page_to_nid(hpage);
+	int ret = -EBUSY;
 
 	spin_lock(&hugetlb_lock);
-	list_del(&hpage->lru);
-	h->free_huge_pages--;
-	h->free_huge_pages_node[nid]--;
+	if (is_hugepage_on_freelist(hpage)) {
+		list_del(&hpage->lru);
+		h->free_huge_pages--;
+		h->free_huge_pages_node[nid]--;
+		ret = 0;
+	}
 	spin_unlock(&hugetlb_lock);
+	return ret;
 }
+#endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 757f6b0accfe..5c7158a11592 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -698,6 +698,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
  */
 static int me_huge_page(struct page *p, unsigned long pfn)
 {
+	int res = 0;
 	struct page *hpage = compound_head(p);
 	/*
 	 * We can safely recover from error on free or reserved (i.e.
@@ -710,8 +711,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 	 * so there is no race between isolation and mapping/unmapping.
 	 */
 	if (!(page_mapping(hpage) || PageAnon(hpage))) {
-		__isolate_hwpoisoned_huge_page(hpage);
-		return RECOVERED;
+		res = dequeue_hwpoisoned_huge_page(hpage);
+		if (!res)
+			return RECOVERED;
 	}
 	return DELAYED;
 }
-- 
cgit v1.2.3


From 6f39ce056ab2ab2d29b2fae4aed61ed0b485972f Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 30 Sep 2010 11:54:51 +0900
Subject: Fix build error with !CONFIG_MIGRATION

migrate_huge_page_move_mapping() is declared as "extern int ..."
in include/linux/migrate.h for !CONFIG_MIGRATION,
which causes the build error like below:

  mm/mprotect.o: In function `migrate_huge_page_move_mapping':
  mprotect.c:(.text+0x0): multiple definition of `migrate_huge_page_move_mapping'
  mm/shmem.o:shmem.c:(.text+0x0): first defined here
  mm/rmap.o: In function `migrate_huge_page_move_mapping':
  rmap.c:(.text+0x0): multiple definition of `migrate_huge_page_move_mapping'
  mm/shmem.o:shmem.c:(.text+0x0): first defined here

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/migrate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3c1941e40e61..085527fb8261 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -50,7 +50,7 @@ static inline int migrate_vmas(struct mm_struct *mm,
 static inline void migrate_page_copy(struct page *newpage,
 				     struct page *page) {}
 
-extern int migrate_huge_page_move_mapping(struct address_space *mapping,
+static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page)
 {
 	return -ENOSYS;
-- 
cgit v1.2.3


From aa50d3a7aa8147b9e14dc9d5972a5d2359db4ef8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 6 Oct 2010 21:45:00 +0200
Subject: Encode huge page size for VM_FAULT_HWPOISON errors

This fixes a problem introduced with the hugetlb hwpoison handling

The user space SIGBUS signalling wants to know the size of the hugepage
that caused a HWPOISON fault.

Unfortunately the architecture page fault handlers do not have easy
access to the struct page.

Pass the information out in the fault error code instead.

I added a separate VM_FAULT_HWPOISON_LARGE bit for this case and encode
the hpage index in some free upper bits of the fault code. The small
page hwpoison keeps stays with the VM_FAULT_HWPOISON name to minimize
changes.

Also add code to hugetlb.h to convert that index into a page shift.

Will be used in a further patch.

Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: fengguang.wu@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/hugetlb.h |  6 ++++++
 include/linux/mm.h      | 12 ++++++++++--
 mm/hugetlb.c            |  6 ++++--
 mm/memory.c             |  3 ++-
 4 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 796f30e00806..943c76b3d4bb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -307,6 +307,11 @@ static inline struct hstate *page_hstate(struct page *page)
 	return size_to_hstate(PAGE_SIZE << compound_order(page));
 }
 
+static inline unsigned hstate_index_to_shift(unsigned index)
+{
+	return hstates[index].order + PAGE_SHIFT;
+}
+
 #else
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
@@ -324,6 +329,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1;
 }
+#define hstate_index_to_shift(index) 0
 #endif
 
 #endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fbef8c6..f7e9efc6720b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -718,12 +718,20 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_SIGBUS	0x0002
 #define VM_FAULT_MAJOR	0x0004
 #define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
-#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned page */
+#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned small page */
+#define VM_FAULT_HWPOISON_LARGE 0x0020  /* Hit poisoned large page. Index encoded in upper bits */
 
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 
-#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
+#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
+
+#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
+			 VM_FAULT_HWPOISON_LARGE)
+
+/* Encode hstate index for a hwpoisoned large page */
+#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
+#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
 
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 67cd03239b75..96991ded82fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2589,7 +2589,8 @@ retry:
 		 * So we need to block hugepage fault by PG_hwpoison bit check.
 		 */
 		if (unlikely(PageHWPoison(page))) {
-			ret = VM_FAULT_HWPOISON;
+			ret = VM_FAULT_HWPOISON | 
+			      VM_FAULT_SET_HINDEX(h - hstates);
 			goto backout_unlocked;
 		}
 		page_dup_rmap(page);
@@ -2656,7 +2657,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			migration_entry_wait(mm, (pmd_t *)ptep, address);
 			return 0;
 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
-			return VM_FAULT_HWPOISON;
+			return VM_FAULT_HWPOISON_LARGE | 
+			       VM_FAULT_SET_HINDEX(h - hstates);
 	}
 
 	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
diff --git a/mm/memory.c b/mm/memory.c
index 0e18b4d649ec..c2d6dd315659 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1450,7 +1450,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 					if (ret & VM_FAULT_OOM)
 						return i ? i : -ENOMEM;
 					if (ret &
-					    (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
+					    (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE|
+					     VM_FAULT_SIGBUS))
 						return i ? i : -EFAULT;
 					BUG();
 				}
-- 
cgit v1.2.3


From 01723a9566f9e9ce4c75e5c4c9f6dc20600871a7 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 7 Sep 2010 22:43:19 +0100
Subject: ARM: 6368/1: move the PrimeCell IDs to use macros

This make four macros for the PrimeCell ID register available to
drivers that use them witout using the PrimeCell/AMBA bus
abstraction and struct amba_device. It also moves the magic
PrimeCell CID "B105F00D" to the bus.h header file.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c       |  2 +-
 include/linux/amba/bus.h | 15 +++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index d31590e7011b..2737b9752205 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -298,7 +298,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
 
 		amba_put_disable_pclk(dev);
 
-		if (cid == 0xb105f00d)
+		if (cid == AMBA_CID)
 			dev->periphid = pid;
 
 		if (!dev->periphid)
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index b0c174012436..c6454cca0447 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -20,6 +20,7 @@
 #include <linux/resource.h>
 
 #define AMBA_NR_IRQS	2
+#define AMBA_CID	0xb105f00d
 
 struct clk;
 
@@ -70,9 +71,15 @@ void amba_release_regions(struct amba_device *);
 #define amba_pclk_disable(d)	\
 	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
 
-#define amba_config(d)	(((d)->periphid >> 24) & 0xff)
-#define amba_rev(d)	(((d)->periphid >> 20) & 0x0f)
-#define amba_manf(d)	(((d)->periphid >> 12) & 0xff)
-#define amba_part(d)	((d)->periphid & 0xfff)
+/* Some drivers don't use the struct amba_device */
+#define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
+#define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
+#define AMBA_MANF_BITS(a) (((a) >> 12) & 0xff)
+#define AMBA_PART_BITS(a) ((a) & 0xfff)
+
+#define amba_config(d)	AMBA_CONFIG_BITS((d)->periphid)
+#define amba_rev(d)	AMBA_REV_BITS((d)->periphid)
+#define amba_manf(d)	AMBA_MANF_BITS((d)->periphid)
+#define amba_part(d)	AMBA_PART_BITS((d)->periphid)
 
 #endif
-- 
cgit v1.2.3


From 9f0e7ff4b366d27570cbe0ffa137ed1018009114 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 7 Oct 2010 16:01:14 -0700
Subject: drm/i915: fetch eDP configuration data from the VBT

We need to use some of these values in eDP configurations, so be sure to
fetch them and store them in the i915 private structure.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h   | 18 ++++++------
 drivers/gpu/drm/i915/intel_bios.c | 60 ++++++++++++++++++++++++++++++++-------
 include/drm/drm_dp_helper.h       |  3 ++
 3 files changed, 61 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e4ffcd3a7aef..6d49a9f5c2b1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -339,16 +339,16 @@ typedef struct drm_i915_private {
 	unsigned int int_crt_support:1;
 	unsigned int lvds_use_ssc:1;
 	int lvds_ssc_freq;
-
 	struct {
-		u8 rate:4;
-		u8 lanes:4;
-		u8 preemphasis:4;
-		u8 vswing:4;
-
-		u8 initialized:1;
-		u8 support:1;
-		u8 bpp:6;
+		int rate;
+		int lanes;
+		int preemphasis;
+		int vswing;
+
+		bool initialized;
+		bool support;
+		int bpp;
+		struct edp_power_seq pps;
 	} edp;
 
 	struct notifier_block lid_notifier;
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index b1f73ac0f3fd..cc15447eff41 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -24,6 +24,7 @@
  *    Eric Anholt <eric@anholt.net>
  *
  */
+#include <drm/drm_dp_helper.h>
 #include "drmP.h"
 #include "drm.h"
 #include "i915_drm.h"
@@ -413,6 +414,8 @@ static void
 parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 {
 	struct bdb_edp *edp;
+	struct edp_power_seq *edp_pps;
+	struct edp_link_params *edp_link_params;
 
 	edp = find_section(bdb, BDB_EDP);
 	if (!edp) {
@@ -437,19 +440,54 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
 		break;
 	}
 
-	dev_priv->edp.rate = edp->link_params[panel_type].rate;
-	dev_priv->edp.lanes = edp->link_params[panel_type].lanes;
-	dev_priv->edp.preemphasis = edp->link_params[panel_type].preemphasis;
-	dev_priv->edp.vswing = edp->link_params[panel_type].vswing;
+	/* Get the eDP sequencing and link info */
+	edp_pps = &edp->power_seqs[panel_type];
+	edp_link_params = &edp->link_params[panel_type];
 
-	DRM_DEBUG_KMS("eDP vBIOS settings: bpp=%d, rate=%d, lanes=%d, preemphasis=%d, vswing=%d\n",
-		      dev_priv->edp.bpp,
-		      dev_priv->edp.rate,
-		      dev_priv->edp.lanes,
-		      dev_priv->edp.preemphasis,
-		      dev_priv->edp.vswing);
+	dev_priv->edp.pps = *edp_pps;
 
-	dev_priv->edp.initialized = true;
+	dev_priv->edp.rate = edp_link_params->rate ? DP_LINK_BW_2_7 :
+		DP_LINK_BW_1_62;
+	switch (edp_link_params->lanes) {
+	case 0:
+		dev_priv->edp.lanes = 1;
+		break;
+	case 1:
+		dev_priv->edp.lanes = 2;
+		break;
+	case 3:
+	default:
+		dev_priv->edp.lanes = 4;
+		break;
+	}
+	switch (edp_link_params->preemphasis) {
+	case 0:
+		dev_priv->edp.preemphasis = DP_TRAIN_PRE_EMPHASIS_0;
+		break;
+	case 1:
+		dev_priv->edp.preemphasis = DP_TRAIN_PRE_EMPHASIS_3_5;
+		break;
+	case 2:
+		dev_priv->edp.preemphasis = DP_TRAIN_PRE_EMPHASIS_6;
+		break;
+	case 3:
+		dev_priv->edp.preemphasis = DP_TRAIN_PRE_EMPHASIS_9_5;
+		break;
+	}
+	switch (edp_link_params->vswing) {
+	case 0:
+		dev_priv->edp.vswing = DP_TRAIN_VOLTAGE_SWING_400;
+		break;
+	case 1:
+		dev_priv->edp.vswing = DP_TRAIN_VOLTAGE_SWING_600;
+		break;
+	case 2:
+		dev_priv->edp.vswing = DP_TRAIN_VOLTAGE_SWING_800;
+		break;
+	case 3:
+		dev_priv->edp.vswing = DP_TRAIN_VOLTAGE_SWING_1200;
+		break;
+	}
 }
 
 static void
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index a49e791db0b0..83a389e44543 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -23,6 +23,9 @@
 #ifndef _DRM_DP_HELPER_H_
 #define _DRM_DP_HELPER_H_
 
+#include <linux/types.h>
+#include <linux/i2c.h>
+
 /* From the VESA DisplayPort spec */
 
 #define AUX_NATIVE_WRITE	0x8
-- 
cgit v1.2.3


From 56dd2c0691a5a387b7b05835fe547dc6fade9407 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Fri, 1 Oct 2010 13:55:47 -0700
Subject: [SCSI] libsas: Don't issue commands to devices that have been
 hot-removed

sd will get hung up issuing commands to flush write cache if a SAS
device behind the expander is unplugged without warning.  Change libsas
to reject commands to domain devices that have already gone away.

[maciej.trela@intel.com: removed setting ->gone in sas_deform_port() to
 permit sync cache commands at module removal]

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Tested-by: Haipao Fan <haipao.fan@intel.com>
Signed-off-by: Maciej Trela <maciej.trela@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libsas/sas_ata.c       | 4 ++++
 drivers/scsi/libsas/sas_expander.c  | 3 +++
 drivers/scsi/libsas/sas_scsi_host.c | 7 +++++++
 include/scsi/libsas.h               | 1 +
 4 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index ddbade7beec9..e1a395b438ee 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -162,6 +162,10 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 	unsigned int xfer = 0;
 	unsigned int si;
 
+	/* If the device fell off, no sense in issuing commands */
+	if (dev->gone)
+		return AC_ERR_SYSTEM;
+
 	task = sas_alloc_task(GFP_ATOMIC);
 	if (!task)
 		return AC_ERR_SYSTEM;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 83dd5070a15c..61d81f858a5a 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1724,6 +1724,7 @@ static void sas_unregister_ex_tree(struct domain_device *dev)
 	struct domain_device *child, *n;
 
 	list_for_each_entry_safe(child, n, &ex->children, siblings) {
+		child->gone = 1;
 		if (child->dev_type == EDGE_DEV ||
 		    child->dev_type == FANOUT_DEV)
 			sas_unregister_ex_tree(child);
@@ -1744,6 +1745,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 			&ex_dev->children, siblings) {
 			if (SAS_ADDR(child->sas_addr) ==
 			    SAS_ADDR(phy->attached_sas_addr)) {
+				child->gone = 1;
 				if (child->dev_type == EDGE_DEV ||
 				    child->dev_type == FANOUT_DEV)
 					sas_unregister_ex_tree(child);
@@ -1752,6 +1754,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 				break;
 			}
 		}
+		parent->gone = 1;
 		sas_disable_routing(parent, phy->attached_sas_addr);
 	}
 	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index f0cfba9a1fc8..1787bd2eb4a6 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -228,6 +228,13 @@ int sas_queuecommand(struct scsi_cmnd *cmd,
 			goto out;
 		}
 
+		/* If the device fell off, no sense in issuing commands */
+		if (dev->gone) {
+			cmd->result = DID_BAD_TARGET << 16;
+			scsi_done(cmd);
+			goto out;
+		}
+
 		res = -ENOMEM;
 		task = sas_create_task(cmd, dev, GFP_ATOMIC);
 		if (!task)
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index d06e13be717b..3dec1949f69c 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -205,6 +205,7 @@ struct domain_device {
         };
 
         void *lldd_dev;
+	int gone;
 };
 
 struct sas_discovery_event {
-- 
cgit v1.2.3


From 03789f26722a15ccfe6f191e9fb3d356f2f18a1e Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Fri, 8 Oct 2010 04:02:02 +0000
Subject: Phonet: cleanup pipe enable socket option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current code works like this:

  int garbage, status;
  socklen_t len = sizeof(status);

  /* enable pipe */
  setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &garbage, sizeof(garbage));
  /* disable pipe */
  setsockopt(fd, SOL_PNPIPE, PNPIPE_DISABLE, &garbage, sizeof(garbage));
  /* get status */
  getsockopt(fd, SOL_PNPIPE, PNPIPE_INQ, &status, &len);

...which does not follow the usual socket option pattern. This patch
merges all three "options" into a single gettable&settable option,
before Linux 2.6.37 gets out:

  int status;
  socklen_t len = sizeof(status);

  /* enable pipe */
  status = 1;
  setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, sizeof(status));
  /* disable pipe */
  status = 0;
  setsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, sizeof(status));
  /* get status */
  getsockopt(fd, SOL_PNPIPE, PNPIPE_ENABLE, &status, &len);

This also fixes the error code from EFAULT to ENOTCONN.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Cc: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phonet.txt | 15 ++------
 include/linux/phonet.h              |  3 +-
 net/phonet/pep.c                    | 72 ++++++++++++++++---------------------
 3 files changed, 34 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
index cccf5ff07ec2..2d9bc2b711fc 100644
--- a/Documentation/networking/phonet.txt
+++ b/Documentation/networking/phonet.txt
@@ -213,12 +213,9 @@ The implementation adds socket options at SOL_PNPIPE level:
 	It then updates the pipe state associated with the sequenced socket to
 	be PIPE_DISABLED.
 
-  PNPIPE_ENABLE
-	It follows the same sequence as above for enabling a pipe by sending
-	PNS_PEP_ENABLE_REQ initially and then sending PNS_PEP_ENABLED_IND after
-	getting responses from sequenced socket and remote-pep.
-	It will also update the pipe state associated with the sequenced socket
-	to PIPE_ENABLED.
+  PNPIPE_ENABLE accepts one integer value (int). If set to zero, the pipe
+    is disabled. If the value is non-zero, the pipe is enabled. If the pipe
+    is not (yet) connected, ENOTCONN is error is returned.
 
    PNPIPE_DESTROY
 	This will send out PNS_PEP_DISCONNECT_REQ on the sequenced socket and
@@ -226,12 +223,6 @@ The implementation adds socket options at SOL_PNPIPE level:
 	It will also update the pipe state associated with the sequenced socket
 	to PIPE_IDLE
 
-   PNPIPE_INQ
-	This getsocktopt allows the user-space running on the sequenced socket
-	to examine the pipe state associated with that socket ie. whether the
-	pipe is created (PIPE_DISABLED) or enabled (PIPE_ENABLED) or disabled
-	(PIPE_DISABLED) or no pipe exists (PIPE_IDLE).
-
 After a pipe has been created and enabled successfully, the Pipe data can be
 exchanged between the host-pep and remote-pep (modem).
 
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 96f5625d62fa..e27cbf931740 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,9 +38,8 @@
 #define PNPIPE_IFINDEX		2
 #define PNPIPE_CREATE           3
 #define PNPIPE_ENABLE           4
-#define PNPIPE_DISABLE          5
+/* unused slot */
 #define PNPIPE_DESTROY          6
-#define PNPIPE_INQ              7
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index aa3d8700d213..f818f76d297d 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -327,29 +327,20 @@ static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid,
 	return pn_skb_send(sk, skb, &spn);
 }
 
-static int pipe_handler_enable_pipe(struct sock *sk, int cmd)
+static int pipe_handler_enable_pipe(struct sock *sk, int enable)
 {
-	int ret;
 	struct pep_sock *pn = pep_sk(sk);
-
-	switch (cmd) {
-	case PNPIPE_ENABLE:
-		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
-				PNS_PIPE_ENABLE_UTID, PNS_PEP_ENABLE_REQ,
-				pn->pipe_handle, GFP_ATOMIC);
-		break;
-
-	case PNPIPE_DISABLE:
-		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
-				PNS_PIPE_DISABLE_UTID, PNS_PEP_DISABLE_REQ,
-				pn->pipe_handle, GFP_ATOMIC);
-		break;
-
-	default:
-		ret = -EINVAL;
+	int utid, req;
+
+	if (enable) {
+		utid = PNS_PIPE_ENABLE_UTID;
+		req = PNS_PEP_ENABLE_REQ;
+	} else {
+		utid = PNS_PIPE_DISABLE_UTID;
+		req = PNS_PEP_DISABLE_REQ;
 	}
-
-	return ret;
+	return pipe_handler_send_req(sk, pn->pn_sk.sobject, utid, req,
+			pn->pipe_handle, GFP_ATOMIC);
 }
 
 static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd)
@@ -1187,23 +1178,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 			break;
 		}
 
-	case PNPIPE_ENABLE:
-		if (pn->pipe_state != PIPE_DISABLED) {
-			err = -EFAULT;
-			break;
-		}
-		err = pipe_handler_enable_pipe(sk, PNPIPE_ENABLE);
-		break;
-
-	case PNPIPE_DISABLE:
-		if (pn->pipe_state != PIPE_ENABLED) {
-			err = -EFAULT;
-			break;
-		}
-
-		err = pipe_handler_enable_pipe(sk, PNPIPE_DISABLE);
-		break;
-
 	case PNPIPE_DESTROY:
 		if (pn->pipe_state < PIPE_DISABLED) {
 			err = -EFAULT;
@@ -1239,6 +1213,17 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 			err = 0;
 		}
 		goto out_norel;
+
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNPIPE_ENABLE:
+		if (pn->pipe_state <= PIPE_IDLE) {
+			err = -ENOTCONN;
+			break;
+		}
+		err = pipe_handler_enable_pipe(sk, val);
+		break;
+#endif
+
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1264,15 +1249,18 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 		val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
 		break;
 
+	case PNPIPE_IFINDEX:
+		val = pn->ifindex;
+		break;
+
 #ifdef CONFIG_PHONET_PIPECTRLR
-	case PNPIPE_INQ:
-		val = pn->pipe_state;
+	case PNPIPE_ENABLE:
+		if (pn->pipe_state <= PIPE_IDLE)
+			return -ENOTCONN;
+		val = pn->pipe_state != PIPE_DISABLED;
 		break;
 #endif
 
-	case PNPIPE_IFINDEX:
-		val = pn->ifindex;
-		break;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 3e51d3c924aea8a1f1372e6c615b0a37b528121d Mon Sep 17 00:00:00 2001
From: Kai Makisara <Kai.Makisara@kolumbus.fi>
Date: Sat, 9 Oct 2010 00:17:56 +0300
Subject: [SCSI] st: add MTWEOFI to write filemarks without flushing drive
 buffer

This patch adds a new MTIOCTOP operation MTWEOFI that writes filemarks with
immediate bit set. This means that the drive does not flush its buffer and the
next file can be started immediately. This speeds up writing in applications
that have to write multiple small files.

Signed-off-by: Kai Makisara <kai.makisara@kolumbus.fi>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 Documentation/scsi/st.txt | 15 ++++++++++++++-
 drivers/scsi/st.c         | 15 +++++++++------
 include/linux/mtio.h      |  1 +
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
index 40752602c050..691ca292c24d 100644
--- a/Documentation/scsi/st.txt
+++ b/Documentation/scsi/st.txt
@@ -2,7 +2,7 @@ This file contains brief information about the SCSI tape driver.
 The driver is currently maintained by Kai Mäkisara (email
 Kai.Makisara@kolumbus.fi)
 
-Last modified: Sun Feb 24 21:59:07 2008 by kai.makisara
+Last modified: Sun Aug 29 18:25:47 2010 by kai.makisara
 
 
 BASICS
@@ -85,6 +85,17 @@ writing and the last operation has been a write. Two filemarks can be
 optionally written. In both cases end of data is signified by
 returning zero bytes for two consecutive reads.
 
+Writing filemarks without the immediate bit set in the SCSI command block acts
+as a synchronization point, i.e., all remaining data form the drive buffers is
+written to tape before the command returns. This makes sure that write errors
+are caught at that point, but this takes time. In some applications, several
+consecutive files must be written fast. The MTWEOFI operation can be used to
+write the filemarks without flushing the drive buffer. Writing filemark at
+close() is always flushing the drive buffers. However, if the previous
+operation is MTWEOFI, close() does not write a filemark. This can be used if
+the program wants to close/open the tape device between files and wants to
+skip waiting.
+
 If rewind, offline, bsf, or seek is done and previous tape operation was
 write, a filemark is written before moving tape.
 
@@ -301,6 +312,8 @@ MTBSR   Space backward over count records.
 MTFSS   Space forward over count setmarks.
 MTBSS   Space backward over count setmarks.
 MTWEOF  Write count filemarks.
+MTWEOFI	Write count filemarks with immediate bit set (i.e., does not
+	wait until data is on tape)
 MTWSM   Write count setmarks.
 MTREW   Rewind tape.
 MTOFFL  Set device off line (often rewind plus eject).
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 24211d0efa6d..9e2c3a72ff4d 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -9,7 +9,7 @@
    Steve Hirsch, Andreas Koppenh"ofer, Michael Leodolter, Eyal Lebedinsky,
    Michael Schaefer, J"org Weule, and Eric Youngdale.
 
-   Copyright 1992 - 2008 Kai Makisara
+   Copyright 1992 - 2010 Kai Makisara
    email Kai.Makisara@kolumbus.fi
 
    Some small formal changes - aeb, 950809
@@ -17,7 +17,7 @@
    Last modified: 18-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Devfs support
  */
 
-static const char *verstr = "20081215";
+static const char *verstr = "20100829";
 
 #include <linux/module.h>
 
@@ -2696,18 +2696,21 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon
 		}
 		break;
 	case MTWEOF:
+	case MTWEOFI:
 	case MTWSM:
 		if (STp->write_prot)
 			return (-EACCES);
 		cmd[0] = WRITE_FILEMARKS;
 		if (cmd_in == MTWSM)
 			cmd[1] = 2;
+		if (cmd_in == MTWEOFI)
+			cmd[1] |= 1;
 		cmd[2] = (arg >> 16);
 		cmd[3] = (arg >> 8);
 		cmd[4] = arg;
 		timeout = STp->device->request_queue->rq_timeout;
                 DEBC(
-                     if (cmd_in == MTWEOF)
+		     if (cmd_in != MTWSM)
                                printk(ST_DEB_MSG "%s: Writing %d filemarks.\n", name,
 				 cmd[2] * 65536 + cmd[3] * 256 + cmd[4]);
                      else
@@ -2883,8 +2886,8 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon
 		else if (chg_eof)
 			STps->eof = ST_NOEOF;
 
-		if (cmd_in == MTWEOF)
-			STps->rw = ST_IDLE;
+		if (cmd_in == MTWEOF || cmd_in == MTWEOFI)
+			STps->rw = ST_IDLE;  /* prevent automatic WEOF at close */
 	} else { /* SCSI command was not completely successful. Don't return
                     from this block without releasing the SCSI command block! */
 		struct st_cmdstatus *cmdstatp = &STp->buffer->cmdstat;
@@ -2901,7 +2904,7 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon
 		else
 			undone = 0;
 
-		if (cmd_in == MTWEOF &&
+		if ((cmd_in == MTWEOF || cmd_in == MTWEOFI) &&
 		    cmdstatp->have_sense &&
 		    (cmdstatp->flags & SENSE_EOM)) {
 			if (cmdstatp->sense_hdr.sense_key == NO_SENSE ||
diff --git a/include/linux/mtio.h b/include/linux/mtio.h
index ef01d6aa5934..8f825756c459 100644
--- a/include/linux/mtio.h
+++ b/include/linux/mtio.h
@@ -63,6 +63,7 @@ struct	mtop {
 #define MTCOMPRESSION 32/* control compression with SCSI mode page 15 */
 #define MTSETPART 33	/* Change the active tape partition */
 #define MTMKPART  34	/* Format the tape with one or two partitions */
+#define MTWEOFI	35	/* write an end-of-file record (mark) in immediate mode */
 
 /* structure for MTIOCGET - mag tape get status command */
 
-- 
cgit v1.2.3


From 8b9d40691e8f5e7e0c8fb839c2bad29c5e0888ce Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Sun, 27 Jun 2010 12:26:06 +0200
Subject: asm-generic: make atomic_add_unless a function

atomic_add_unless is a macro so, bad things happen if the caller defines
a local variable named c, just like like the local variable c defined by
the macro. Thus, convert atomic_add_unless to a function. (bug triggered
by net/ipv4/netfilter/ipt_CLUSTERIP.c: clusterip_config_find_get calls
atomic_inc_not_zero)

Signed-off-by: Mathieu Lacage <mathieu.lacage@inria.fr>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/atomic.h | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e53347fbf1da..a6cc019a41e0 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -119,14 +119,23 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
 
-#define atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
-		c = old;					\
-	c != (u);						\
-})
+#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
+#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
+
+#define cmpxchg_local(ptr, o, n)				  	       \
+	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+			(unsigned long)(n), sizeof(*(ptr))))
+
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+  int c, old;
+  c = atomic_read(v);
+  while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
+    c = old;
+  return c != u;
+}
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
@@ -140,15 +149,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 	raw_local_irq_restore(flags);
 }
 
-#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
-#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
-
-#define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
-
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
 /* Assume that atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
-- 
cgit v1.2.3


From c6691126636769bd22bfd7b55829f0373a93c1ce Mon Sep 17 00:00:00 2001
From: Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Date: Tue, 22 Jun 2010 10:30:15 +0200
Subject: asm-generic: cmpxchg does not handle non-long arguments

The version of cmpxchg defined in asm-generic/system.h does not handle
correctly non-long arguments. Use the version defined in cmpxchg.h
instead.

Signed-off-by: Mathieu Lacage <mathieu.lacage@inria.fr>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/system.h | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h
index efa403b5e121..4b0b9cbbfae5 100644
--- a/include/asm-generic/system.h
+++ b/include/asm-generic/system.h
@@ -21,6 +21,7 @@
 #include <linux/irqflags.h>
 
 #include <asm/cmpxchg-local.h>
+#include <asm/cmpxchg.h>
 
 struct task_struct;
 
@@ -136,25 +137,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 #define xchg(ptr, x) \
 	((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
 
-static inline unsigned long __cmpxchg(volatile unsigned long *m,
-				      unsigned long old, unsigned long new)
-{
-	unsigned long retval;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	retval = *m;
-	if (retval == old)
-		*m = new;
-	local_irq_restore(flags);
-	return retval;
-}
-
-#define cmpxchg(ptr, o, n)					\
-	((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
-					(unsigned long)(o),	\
-					(unsigned long)(n)))
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 269b8fd5d058f2c0da01a42b20315ffc2640d99b Mon Sep 17 00:00:00 2001
From: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
Date: Wed, 6 Oct 2010 15:03:47 -0700
Subject: asm-generic: fcntl: make exported headers use strict posix types

All 'pid_t' were changed to '__kernel_pid_t' in a previous commit:
make exported headers use strict posix types

    A number of standard posix types are used in exported headers,
    which is not allowed if __STRICT_KERNEL_NAMES is defined. In order
    to get rid of the non-__STRICT_KERNEL_NAMES part and to make sane
    headers the default, we have to change them all to safe types.

but a later change introduced 'pid_t' again:
    fcntl: add F_[SG]ETOWN_EX

This makes asm-generic/fcntl.h d use strict posix types again.

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/fcntl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index a70b2d2bfc14..0fc16e3f0bfc 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -122,7 +122,7 @@
 
 struct f_owner_ex {
 	int	type;
-	pid_t	pid;
+	__kernel_pid_t	pid;
 };
 
 /* for F_[GET|SET]FL */
-- 
cgit v1.2.3


From c24cef0b68a719324c344c1563ef3d750ac6bf0e Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 27 Feb 2010 17:51:35 +0100
Subject: asm-generic: kdebug.h: Checkpatch cleanup

include/asm-generic/kdebug.h:6: ERROR: spaces required around that '=' (ctx:VxV)

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/kdebug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/kdebug.h b/include/asm-generic/kdebug.h
index 11e57b6a85fc..d1814497bcdb 100644
--- a/include/asm-generic/kdebug.h
+++ b/include/asm-generic/kdebug.h
@@ -3,7 +3,7 @@
 
 enum die_val {
 	DIE_UNUSED,
-	DIE_OOPS=1
+	DIE_OOPS = 1,
 };
 
 #endif /* _ASM_GENERIC_KDEBUG_H */
-- 
cgit v1.2.3


From 708ff2a0097b02d32d375b66996661f36cd4d6d1 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 29 Sep 2010 18:08:50 +0900
Subject: bitops: make asm-generic/bitops/find.h more generic

asm-generic/bitops/find.h has the extern declarations of find_next_bit()
and find_next_zero_bit() and the macro definitions of find_first_bit()
and find_first_zero_bit(). It is only usable by the architectures which
enables CONFIG_GENERIC_FIND_NEXT_BIT and disables
CONFIG_GENERIC_FIND_FIRST_BIT.

x86 and tile enable both CONFIG_GENERIC_FIND_NEXT_BIT and
CONFIG_GENERIC_FIND_FIRST_BIT. These architectures cannot include
asm-generic/bitops/find.h in their asm/bitops.h. So ifdefed extern
declarations of find_first_bit and find_first_zero_bit() are put in
linux/bitops.h.

This makes asm-generic/bitops/find.h usable by these architectures
and use it. Also this change is needed for the forthcoming duplicated
extern declarations cleanup.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/bitops.h    |  1 +
 arch/x86/include/asm/bitops.h     |  2 ++
 include/asm-generic/bitops/find.h | 25 +++++++++++++++++++++++++
 include/linux/bitops.h            | 22 ----------------------
 4 files changed, 28 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 6832b4be8990..6d4f0ff2c68c 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -120,6 +120,7 @@ static inline unsigned long __arch_hweight64(__u64 w)
 
 #include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 #include <asm-generic/bitops/minix.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index bafd80defa43..903683b07e42 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -440,6 +440,8 @@ static inline int fls(int x)
 
 #ifdef __KERNEL__
 
+#include <asm-generic/bitops/find.h>
+
 #include <asm-generic/bitops/sched.h>
 
 #define ARCH_HAS_FAST_MULTIPLIER 1
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 1914e9742512..30afec0db7d7 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -9,7 +9,32 @@ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 		long size, unsigned long offset);
 #endif
 
+#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit.
+ */
+extern unsigned long find_first_bit(const unsigned long *addr,
+				    unsigned long size);
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first cleared bit.
+ */
+extern unsigned long find_first_zero_bit(const unsigned long *addr,
+					 unsigned long size);
+#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
 #define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
 
+#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
 #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index fc68053378ce..adb0f113f571 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -136,28 +136,6 @@ static inline unsigned long __ffs64(u64 word)
 }
 
 #ifdef __KERNEL__
-#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit number of the first set bit.
- */
-extern unsigned long find_first_bit(const unsigned long *addr,
-				    unsigned long size);
-
-/**
- * find_first_zero_bit - find the first cleared bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit number of the first cleared bit.
- */
-extern unsigned long find_first_zero_bit(const unsigned long *addr,
-					 unsigned long size);
-#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef CONFIG_GENERIC_FIND_LAST_BIT
 /**
-- 
cgit v1.2.3


From d852a6afd91fc928128f59ebff381838c365e358 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 29 Sep 2010 18:08:51 +0900
Subject: bitops: remove duplicated extern declarations

If CONFIG_GENERIC_FIND_NEXT_BIT is enabled, find_next_bit() and
find_next_zero_bit() are doubly declared in asm-generic/bitops/find.h
and linux/bitops.h.

asm/bitops.h includes asm-generic/bitops/find.h if and only if the
architecture enables CONFIG_GENERIC_FIND_NEXT_BIT. And asm/bitops.h
is included by linux/bitops.h

So we can just remove the extern declarations of find_next_bit() and
find_next_zero_bit() in linux/bitops.h.

Also we can remove unneeded #ifndef CONFIG_GENERIC_FIND_NEXT_BIT in
asm-generic/bitops/find.h.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/bitops/find.h | 14 ++++++++++++--
 include/linux/bitops.h            | 23 -----------------------
 2 files changed, 12 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 30afec0db7d7..110fa700f853 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -1,13 +1,23 @@
 #ifndef _ASM_GENERIC_BITOPS_FIND_H_
 #define _ASM_GENERIC_BITOPS_FIND_H_
 
-#ifndef CONFIG_GENERIC_FIND_NEXT_BIT
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
 extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
 
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
 extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 		long size, unsigned long offset);
-#endif
 
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
 
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index adb0f113f571..827cc95711ef 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -149,28 +149,5 @@ extern unsigned long find_last_bit(const unsigned long *addr,
 				   unsigned long size);
 #endif /* CONFIG_GENERIC_FIND_LAST_BIT */
 
-#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- */
-extern unsigned long find_next_bit(const unsigned long *addr,
-				   unsigned long size, unsigned long offset);
-
-/**
- * find_next_zero_bit - find the next cleared bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The bitmap size in bits
- */
-
-extern unsigned long find_next_zero_bit(const unsigned long *addr,
-					unsigned long size,
-					unsigned long offset);
-
-#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From 3cfc535c5df8122af1258ae05aaf2770c033425d Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Sun, 10 Oct 2010 21:42:33 -0600
Subject: of/promtree: make drivers/of/pdt.c no longer sparc-only

Clean up pdt.c:
 - make build dependent upon config OF_PROMTREE
 - #ifdef out the sparc-specific stuff
 - create pdt-specific header

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/sparc/Kconfig              |  1 +
 arch/sparc/include/asm/prom.h   |  5 ++--
 arch/sparc/kernel/prom.h        |  6 -----
 arch/sparc/kernel/prom_common.c | 10 +++++++-
 drivers/of/Kconfig              |  5 +++-
 drivers/of/Makefile             |  1 +
 drivers/of/pdt.c                | 54 +++++++++++++++++++++++++++++------------
 include/linux/of_pdt.h          | 24 ++++++++++++++++++
 8 files changed, 81 insertions(+), 25 deletions(-)
 create mode 100644 include/linux/of_pdt.h

(limited to 'include')

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 491e9d6de191..a06c9598c2ed 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -19,6 +19,7 @@ config SPARC
 	bool
 	default y
 	select OF
+	select OF_PROMTREE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_ARCH_KGDB if !SMP || SPARC64
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 291f12575edd..56bbaadef646 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -18,6 +18,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
+#include <linux/of_pdt.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
 #include <asm/atomic.h>
@@ -67,8 +68,8 @@ extern struct device_node *of_console_device;
 extern char *of_console_path;
 extern char *of_console_options;
 
-extern void (*prom_build_more)(struct device_node *dp, struct device_node ***nextp);
-extern char *build_full_name(struct device_node *dp);
+extern void irq_trans_init(struct device_node *dp);
+extern char *build_path_component(struct device_node *dp);
 
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h
index eeb04a782ec8..cf5fe1c0b024 100644
--- a/arch/sparc/kernel/prom.h
+++ b/arch/sparc/kernel/prom.h
@@ -4,12 +4,6 @@
 #include <linux/spinlock.h>
 #include <asm/prom.h>
 
-extern void * prom_early_alloc(unsigned long size);
-extern void irq_trans_init(struct device_node *dp);
-
-extern unsigned int prom_unique_id;
-
-extern char *build_path_component(struct device_node *dp);
 extern void of_console_init(void);
 
 extern unsigned int prom_early_allocated;
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 7b454f6413f7..fe84d56b7c5a 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -20,6 +20,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/of_pdt.h>
 #include <asm/prom.h>
 #include <asm/oplib.h>
 #include <asm/leon.h>
@@ -119,4 +120,11 @@ EXPORT_SYMBOL(of_find_in_proplist);
 
 unsigned int prom_early_allocated __initdata;
 
-#include "../../../drivers/of/pdt.c"
+void __init prom_build_devicetree(void)
+{
+	of_pdt_build_devicetree(prom_root_node);
+	of_console_init();
+
+	pr_info("PROM: Built device tree with %u bytes of memory.\n",
+			prom_early_allocated);
+}
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 6acbff389ab6..aa675ebd8eb3 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -4,7 +4,7 @@ config DTC
 config OF
 	bool
 
-menu "Flattened Device Tree and Open Firmware support"
+menu "Device Tree and Open Firmware support"
 	depends on OF
 
 config PROC_DEVICETREE
@@ -19,6 +19,9 @@ config OF_FLATTREE
 	bool
 	select DTC
 
+config OF_PROMTREE
+	bool
+
 config OF_DYNAMIC
 	def_bool y
 	depends on PPC_OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 0052c405463a..7888155bea08 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,5 +1,6 @@
 obj-y = base.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
+obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index c3c2d70b3f75..2fdb1b478d6f 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -1,4 +1,4 @@
-/* prom_common.c: OF device tree support common code.
+/* pdt.c: OF PROM device tree support code.
  *
  * Paul Mackerras	August 1996.
  * Copyright (C) 1996-2005 Paul Mackerras.
@@ -7,6 +7,7 @@
  *    {engebret|bergner}@us.ibm.com
  *
  *  Adapted for sparc by David S. Miller davem@davemloft.net
+ *  Adapted for multiple architectures by Andres Salomon <dilinger@queued.net>
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -20,13 +21,36 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/of_pdt.h>
 #include <asm/prom.h>
 #include <asm/oplib.h>
-#include <asm/leon.h>
 
-void (*prom_build_more)(struct device_node *dp, struct device_node ***nextp);
+void __initdata (*prom_build_more)(struct device_node *dp,
+		struct device_node ***nextp);
 
-unsigned int prom_unique_id;
+#if defined(CONFIG_SPARC)
+unsigned int of_pdt_unique_id __initdata;
+
+#define of_pdt_incr_unique_id(p) do { \
+	(p)->unique_id = of_pdt_unique_id++; \
+} while (0)
+
+static inline const char *of_pdt_node_name(struct device_node *dp)
+{
+	return dp->path_component_name;
+}
+
+#else
+
+static inline void of_pdt_incr_unique_id(void *p) { }
+static inline void irq_trans_init(struct device_node *dp) { }
+
+static inline const char *of_pdt_node_name(struct device_node *dp)
+{
+	return dp->name;
+}
+
+#endif /* !CONFIG_SPARC */
 
 static struct property * __init build_one_prop(phandle node, char *prev,
 					       char *special_name,
@@ -43,7 +67,7 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 		tmp = NULL;
 	} else {
 		p = prom_early_alloc(sizeof(struct property) + 32);
-		p->unique_id = prom_unique_id++;
+		of_pdt_incr_unique_id(p);
 	}
 
 	p->name = (char *) (p + 1);
@@ -124,7 +148,7 @@ static struct device_node * __init prom_create_node(phandle node,
 		return NULL;
 
 	dp = prom_early_alloc(sizeof(*dp));
-	dp->unique_id = prom_unique_id++;
+	of_pdt_incr_unique_id(dp);
 	dp->parent = parent;
 
 	kref_init(&dp->kref);
@@ -140,13 +164,13 @@ static struct device_node * __init prom_create_node(phandle node,
 	return dp;
 }
 
-char * __init build_full_name(struct device_node *dp)
+static char * __init build_full_name(struct device_node *dp)
 {
 	int len, ourlen, plen;
 	char *n;
 
 	plen = strlen(dp->parent->full_name);
-	ourlen = strlen(dp->path_component_name);
+	ourlen = strlen(of_pdt_node_name(dp));
 	len = ourlen + plen + 2;
 
 	n = prom_early_alloc(len);
@@ -155,7 +179,7 @@ char * __init build_full_name(struct device_node *dp)
 		strcpy(n + plen, "/");
 		plen++;
 	}
-	strcpy(n + plen, dp->path_component_name);
+	strcpy(n + plen, of_pdt_node_name(dp));
 
 	return n;
 }
@@ -182,7 +206,9 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 		*(*nextp) = dp;
 		*nextp = &dp->allnext;
 
+#if defined(CONFIG_SPARC)
 		dp->path_component_name = build_path_component(dp);
+#endif
 		dp->full_name = build_full_name(dp);
 
 		dp->child = prom_build_tree(dp, prom_getchild(node), nextp);
@@ -196,20 +222,18 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 	return ret;
 }
 
-void __init prom_build_devicetree(void)
+void __init of_pdt_build_devicetree(phandle root_node)
 {
 	struct device_node **nextp;
 
-	allnodes = prom_create_node(prom_root_node, NULL);
+	allnodes = prom_create_node(root_node, NULL);
+#if defined(CONFIG_SPARC)
 	allnodes->path_component_name = "";
+#endif
 	allnodes->full_name = "/";
 
 	nextp = &allnodes->allnext;
 	allnodes->child = prom_build_tree(allnodes,
 					  prom_getchild(allnodes->phandle),
 					  &nextp);
-	of_console_init();
-
-	printk("PROM: Built device tree with %u bytes of memory.\n",
-	       prom_early_allocated);
 }
diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h
new file mode 100644
index 000000000000..c0a8774e45d0
--- /dev/null
+++ b/include/linux/of_pdt.h
@@ -0,0 +1,24 @@
+/*
+ * Definitions for building a device tree by calling into the
+ * Open Firmware PROM.
+ *
+ * Copyright (C) 2010  Andres Salomon <dilinger@queued.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_OF_PDT_H
+#define _LINUX_OF_PDT_H
+
+extern void *prom_early_alloc(unsigned long size);
+
+/* for building the device tree */
+extern void of_pdt_build_devicetree(phandle root_node);
+
+extern void (*prom_build_more)(struct device_node *dp,
+		struct device_node ***nextp);
+
+#endif /* _LINUX_OF_PDT_H */
-- 
cgit v1.2.3


From 3bf101ba42a1c89b5afbc7492e7647dae5e18735 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Mon, 27 Sep 2010 20:22:24 +0100
Subject: perf: Add helper function to return number of counters

The number of counters for the registered pmu is needed in a few places
so provide a helper function that returns this number.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/arm/kernel/perf_event.c |  6 ++++++
 arch/arm/oprofile/common.c   | 31 ++++++++++++++++++-------------
 arch/sh/kernel/perf_event.c  |  9 +++++++++
 include/linux/perf_event.h   |  1 +
 4 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 417c392ddf1c..3b0aedfb96e7 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
 }
 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
 
+int perf_num_counters(void)
+{
+	return armpmu_get_max_events();
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 #define HW_OP_UNSUPPORTED		0xFFFF
 
 #define C(_x) \
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index d660cb8dab36..1e971a7fcf82 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -43,7 +43,7 @@ static DEFINE_MUTEX(op_arm_mutex);
 
 static struct op_counter_config *counter_config;
 static struct perf_event **perf_events[nr_cpumask_bits];
-static int perf_num_counters;
+static int num_counters;
 
 /*
  * Overflow callback for oprofile.
@@ -54,11 +54,11 @@ static void op_overflow_handler(struct perf_event *event, int unused,
 	int id;
 	u32 cpu = smp_processor_id();
 
-	for (id = 0; id < perf_num_counters; ++id)
+	for (id = 0; id < num_counters; ++id)
 		if (perf_events[cpu][id] == event)
 			break;
 
-	if (id != perf_num_counters)
+	if (id != num_counters)
 		oprofile_add_sample(regs, id);
 	else
 		pr_warning("oprofile: ignoring spurious overflow "
@@ -76,7 +76,7 @@ static void op_perf_setup(void)
 	u32 size = sizeof(struct perf_event_attr);
 	struct perf_event_attr *attr;
 
-	for (i = 0; i < perf_num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i) {
 		attr = &counter_config[i].attr;
 		memset(attr, 0, size);
 		attr->type		= PERF_TYPE_RAW;
@@ -131,7 +131,7 @@ static int op_perf_start(void)
 	int cpu, event, ret = 0;
 
 	for_each_online_cpu(cpu) {
-		for (event = 0; event < perf_num_counters; ++event) {
+		for (event = 0; event < num_counters; ++event) {
 			ret = op_create_counter(cpu, event);
 			if (ret)
 				goto out;
@@ -150,7 +150,7 @@ static void op_perf_stop(void)
 	int cpu, event;
 
 	for_each_online_cpu(cpu)
-		for (event = 0; event < perf_num_counters; ++event)
+		for (event = 0; event < num_counters; ++event)
 			op_destroy_counter(cpu, event);
 }
 
@@ -179,7 +179,7 @@ static int op_arm_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
-	for (i = 0; i < perf_num_counters; i++) {
+	for (i = 0; i < num_counters; i++) {
 		struct dentry *dir;
 		char buf[4];
 
@@ -353,14 +353,19 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 
 	memset(&perf_events, 0, sizeof(perf_events));
 
-	perf_num_counters = armpmu_get_max_events();
+	num_counters = perf_num_counters();
+	if (num_counters <= 0) {
+		pr_info("oprofile: no performance counters\n");
+		ret = -ENODEV;
+		goto out;
+	}
 
-	counter_config = kcalloc(perf_num_counters,
+	counter_config = kcalloc(num_counters,
 			sizeof(struct op_counter_config), GFP_KERNEL);
 
 	if (!counter_config) {
 		pr_info("oprofile: failed to allocate %d "
-				"counters\n", perf_num_counters);
+				"counters\n", num_counters);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -370,11 +375,11 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 		goto out;
 
 	for_each_possible_cpu(cpu) {
-		perf_events[cpu] = kcalloc(perf_num_counters,
+		perf_events[cpu] = kcalloc(num_counters,
 				sizeof(struct perf_event *), GFP_KERNEL);
 		if (!perf_events[cpu]) {
 			pr_info("oprofile: failed to allocate %d perf events "
-					"for cpu %d\n", perf_num_counters, cpu);
+					"for cpu %d\n", num_counters, cpu);
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -409,7 +414,7 @@ void __exit oprofile_arch_exit(void)
 	struct perf_event *event;
 
 	for_each_possible_cpu(cpu) {
-		for (id = 0; id < perf_num_counters; ++id) {
+		for (id = 0; id < num_counters; ++id) {
 			event = perf_events[cpu][id];
 			if (event)
 				perf_event_release_kernel(event);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..2cb9ad59d4b1 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -59,6 +59,15 @@ static inline int sh_pmu_initialized(void)
 	return !!sh_pmu;
 }
 
+int perf_num_counters(void)
+{
+	if (!sh_pmu)
+		return 0;
+
+	return sh_pmu->num_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 /*
  * Release the PMU if this is the last perf_event.
  */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 716f99b682c1..1a0219247183 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -849,6 +849,7 @@ extern int perf_max_events;
 
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
+extern int perf_num_counters(void);
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 extern void perf_event_task_tick(struct task_struct *task);
-- 
cgit v1.2.3


From 6370a6ad3b53df90b4700977f7718118a2cd524a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Oct 2010 15:12:27 +0200
Subject: workqueue: add and use WQ_MEM_RECLAIM flag

Add WQ_MEM_RECLAIM flag which currently maps to WQ_RESCUER, mark
WQ_RESCUER as internal and replace all external WQ_RESCUER usages to
WQ_MEM_RECLAIM.

This makes the API users express the intent of the workqueue instead
of indicating the internal mechanism used to guarantee forward
progress.  This is also to make it cleaner to add more semantics to
WQ_MEM_RECLAIM.  For example, if deemed necessary, memory reclaim
workqueues can be made highpri.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
---
 Documentation/workqueue.txt | 29 +++++++++++++++--------------
 drivers/ata/libata-sff.c    |  2 +-
 fs/gfs2/main.c              |  2 +-
 fs/xfs/linux-2.6/xfs_buf.c  |  2 +-
 include/linux/workqueue.h   | 11 ++++++-----
 kernel/workqueue.c          |  7 +++++++
 6 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index e4498a2872c3..996a27d9b8db 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -196,11 +196,11 @@ resources, scheduled and executed.
 	suspend operations.  Work items on the wq are drained and no
 	new work item starts execution until thawed.
 
-  WQ_RESCUER
+  WQ_MEM_RECLAIM
 
 	All wq which might be used in the memory reclaim paths _MUST_
-	have this flag set.  This reserves one worker exclusively for
-	the execution of this wq under memory pressure.
+	have this flag set.  The wq is guaranteed to have at least one
+	execution context regardless of memory pressure.
 
   WQ_HIGHPRI
 
@@ -356,11 +356,11 @@ If q1 has WQ_CPU_INTENSIVE set,
 
 6. Guidelines
 
-* Do not forget to use WQ_RESCUER if a wq may process work items which
-  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
-  rescuer thread reserved for it.  If there is dependency among
-  multiple work items used during memory reclaim, they should be
-  queued to separate wq each with WQ_RESCUER.
+* Do not forget to use WQ_MEM_RECLAIM if a wq may process work items
+  which are used during memory reclaim.  Each wq with WQ_MEM_RECLAIM
+  set has an execution context reserved for it.  If there is
+  dependency among multiple work items used during memory reclaim,
+  they should be queued to separate wq each with WQ_MEM_RECLAIM.
 
 * Unless strict ordering is required, there is no need to use ST wq.
 
@@ -368,12 +368,13 @@ If q1 has WQ_CPU_INTENSIVE set,
   recommended.  In most use cases, concurrency level usually stays
   well under the default limit.
 
-* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
-  flush and work item attributes.  Work items which are not involved
-  in memory reclaim and don't need to be flushed as a part of a group
-  of work items, and don't require any special attribute, can use one
-  of the system wq.  There is no difference in execution
-  characteristics between using a dedicated wq and a system wq.
+* A wq serves as a domain for forward progress guarantee
+  (WQ_MEM_RECLAIM, flush and work item attributes.  Work items which
+  are not involved in memory reclaim and don't need to be flushed as a
+  part of a group of work items, and don't require any special
+  attribute, can use one of the system wq.  There is no difference in
+  execution characteristics between using a dedicated wq and a system
+  wq.
 
 * Unless work items are expected to consume a huge amount of CPU
   cycles, using a bound wq is usually beneficial due to the increased
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index e30c537cce32..f5296bb19ec0 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3335,7 +3335,7 @@ void ata_sff_port_init(struct ata_port *ap)
 
 int __init ata_sff_init(void)
 {
-	ata_sff_wq = alloc_workqueue("ata_sff", WQ_RESCUER, WQ_MAX_ACTIVE);
+	ata_sff_wq = alloc_workqueue("ata_sff", WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
 	if (!ata_sff_wq)
 		return -ENOMEM;
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index b1e9630eb46a..1c5f46075d52 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -140,7 +140,7 @@ static int __init init_gfs2_fs(void)
 
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
-					  WQ_NON_REENTRANT | WQ_RESCUER, 0);
+					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 286e36e21dae..6838aefca71f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1933,7 +1933,7 @@ xfs_buf_init(void)
 		goto out;
 
 	xfslogd_workqueue = alloc_workqueue("xfslogd",
-					WQ_RESCUER | WQ_HIGHPRI, 1);
+					WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e33ff4a91703..03bbe903e5ce 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -243,11 +243,12 @@ enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
-	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+	WQ_RESCUER		= 1 << 7, /* internal: workqueue has rescuer */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -309,7 +310,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
  * @name: name of the workqueue
- * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful)
+ * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful)
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -325,11 +326,11 @@ alloc_ordered_workqueue(const char *name, unsigned int flags)
 }
 
 #define create_workqueue(name)					\
-	alloc_workqueue((name), WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
 #define create_freezeable_workqueue(name)			\
-	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 #define create_singlethread_workqueue(name)			\
-	alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b57a8babdec3..2c6871cbcbee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2847,6 +2847,13 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	/*
+	 * Workqueues which may be used during memory reclaim should
+	 * have a rescuer to guarantee forward progress.
+	 */
+	if (flags & WQ_MEM_RECLAIM)
+		flags |= WQ_RESCUER;
+
 	/*
 	 * Unbound workqueues aren't concurrency managed and should be
 	 * dispatched to workers immediately.
-- 
cgit v1.2.3


From 84c7991059c9c4530cc911137c5bf508a41ed129 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Sun, 3 Oct 2010 21:41:13 +0100
Subject: perf: New helper function for pmu name

Introduce perf_pmu_name() helper function that returns the name of the
pmu. This gives us a generic way to get the name of a pmu regardless of
how an architecture identifies it internally.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/sh/kernel/perf_event.c | 9 +++++++++
 include/linux/perf_event.h  | 1 +
 kernel/perf_event.c         | 5 +++++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 2cb9ad59d4b1..55fe89bbdfe0 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -59,6 +59,15 @@ static inline int sh_pmu_initialized(void)
 	return !!sh_pmu;
 }
 
+const char *perf_pmu_name(void)
+{
+	if (!sh_pmu)
+		return NULL;
+
+	return sh_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
 int perf_num_counters(void)
 {
 	if (!sh_pmu)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a0219247183..33f08dafda2f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -850,6 +850,7 @@ extern int perf_max_events;
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
 extern int perf_num_counters(void);
+extern const char *perf_pmu_name(void);
 extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 extern void perf_event_task_tick(struct task_struct *task);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 403d1804b198..e2534691db0d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -85,6 +85,11 @@ void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak perf_event_print_debug(void)	{ }
 
+extern __weak const char *perf_pmu_name(void)
+{
+	return "pmu";
+}
+
 static DEFINE_PER_CPU(int, perf_disable_count);
 
 void perf_disable(void)
-- 
cgit v1.2.3


From 56946331b28d53232115a155ba662ab3dc598952 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Fri, 8 Oct 2010 21:42:17 +0100
Subject: oprofile: Make op_name_from_perf_id() global

Make op_name_from_perf_id() global so that we have a way for each
architecture to construct an oprofile name for op->cpu_type. We need to
remove the argument from the function prototype so that we can hide all
implementation details inside the function.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/arm/oprofile/common.c | 6 ++++--
 include/linux/oprofile.h   | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 1e971a7fcf82..4f67cfab2c59 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -155,8 +155,10 @@ static void op_perf_stop(void)
 }
 
 
-static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
+char *op_name_from_perf_id(void)
 {
+	enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
+
 	switch (id) {
 	case ARM_PERF_PMU_ID_XSCALE1:
 		return "arm/xscale1";
@@ -391,7 +393,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	ops->start		= op_arm_start;
 	ops->stop		= op_arm_stop;
 	ops->shutdown		= op_arm_stop;
-	ops->cpu_type		= op_name_from_perf_id(armpmu_get_pmu_id());
+	ops->cpu_type		= op_name_from_perf_id();
 
 	if (!ops->cpu_type)
 		ret = -ENODEV;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 5171639ecf0f..1574d4aca721 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -185,4 +185,8 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val);
 int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 
+#ifdef CONFIG_PERF_EVENTS
+char *op_name_from_perf_id(void);
+#endif /* CONFIG_PERF_EVENTS */
+
 #endif /* OPROFILE_H */
-- 
cgit v1.2.3


From 3d90a00763b51e1db344a7430c966be723b67a29 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Mon, 27 Sep 2010 20:45:08 +0100
Subject: oprofile: Abstract the perf-events backend

Move the perf-events backend from arch/arm/oprofile into
drivers/oprofile so that the code can be shared between architectures.

This allows each architecture to maintain only a single copy of the PMU
accessor functions instead of one for both perf and OProfile. It also
becomes possible for other architectures to delete much of their
OProfile code in favour of the common code now available in
drivers/oprofile/oprofile_perf.c.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/arm/oprofile/Makefile       |   4 +
 arch/arm/oprofile/common.c       | 319 --------------------------------------
 drivers/oprofile/oprofile_perf.c | 326 +++++++++++++++++++++++++++++++++++++++
 include/linux/oprofile.h         |   3 +
 4 files changed, 333 insertions(+), 319 deletions(-)
 create mode 100644 drivers/oprofile/oprofile_perf.c

(limited to 'include')

diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile
index e666eafed152..b2215c61cdf0 100644
--- a/arch/arm/oprofile/Makefile
+++ b/arch/arm/oprofile/Makefile
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o \
 		timer_int.o )
 
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
 oprofile-y				:= $(DRIVER_OBJS) common.o
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 8718311cb530..8aa974491dfc 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -25,136 +25,6 @@
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_HW_PERF_EVENTS
-/*
- * Per performance monitor configuration as set via oprofilefs.
- */
-struct op_counter_config {
-	unsigned long count;
-	unsigned long enabled;
-	unsigned long event;
-	unsigned long unit_mask;
-	unsigned long kernel;
-	unsigned long user;
-	struct perf_event_attr attr;
-};
-
-static int oprofile_perf_enabled;
-static DEFINE_MUTEX(oprofile_perf_mutex);
-
-static struct op_counter_config *counter_config;
-static struct perf_event **perf_events[nr_cpumask_bits];
-static int num_counters;
-
-/*
- * Overflow callback for oprofile.
- */
-static void op_overflow_handler(struct perf_event *event, int unused,
-			struct perf_sample_data *data, struct pt_regs *regs)
-{
-	int id;
-	u32 cpu = smp_processor_id();
-
-	for (id = 0; id < num_counters; ++id)
-		if (perf_events[cpu][id] == event)
-			break;
-
-	if (id != num_counters)
-		oprofile_add_sample(regs, id);
-	else
-		pr_warning("oprofile: ignoring spurious overflow "
-				"on cpu %u\n", cpu);
-}
-
-/*
- * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
- * settings in counter_config. Attributes are created as `pinned' events and
- * so are permanently scheduled on the PMU.
- */
-static void op_perf_setup(void)
-{
-	int i;
-	u32 size = sizeof(struct perf_event_attr);
-	struct perf_event_attr *attr;
-
-	for (i = 0; i < num_counters; ++i) {
-		attr = &counter_config[i].attr;
-		memset(attr, 0, size);
-		attr->type		= PERF_TYPE_RAW;
-		attr->size		= size;
-		attr->config		= counter_config[i].event;
-		attr->sample_period	= counter_config[i].count;
-		attr->pinned		= 1;
-	}
-}
-
-static int op_create_counter(int cpu, int event)
-{
-	int ret = 0;
-	struct perf_event *pevent;
-
-	if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
-		return ret;
-
-	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
-						  cpu, -1,
-						  op_overflow_handler);
-
-	if (IS_ERR(pevent)) {
-		ret = PTR_ERR(pevent);
-	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
-		pr_warning("oprofile: failed to enable event %d "
-				"on CPU %d\n", event, cpu);
-		ret = -EBUSY;
-	} else {
-		perf_events[cpu][event] = pevent;
-	}
-
-	return ret;
-}
-
-static void op_destroy_counter(int cpu, int event)
-{
-	struct perf_event *pevent = perf_events[cpu][event];
-
-	if (pevent) {
-		perf_event_release_kernel(pevent);
-		perf_events[cpu][event] = NULL;
-	}
-}
-
-/*
- * Called by oprofile_perf_start to create active perf events based on the
- * perviously configured attributes.
- */
-static int op_perf_start(void)
-{
-	int cpu, event, ret = 0;
-
-	for_each_online_cpu(cpu) {
-		for (event = 0; event < num_counters; ++event) {
-			ret = op_create_counter(cpu, event);
-			if (ret)
-				goto out;
-		}
-	}
-
-out:
-	return ret;
-}
-
-/*
- * Called by oprofile_perf_stop at the end of a profiling run.
- */
-static void op_perf_stop(void)
-{
-	int cpu, event;
-
-	for_each_online_cpu(cpu)
-		for (event = 0; event < num_counters; ++event)
-			op_destroy_counter(cpu, event);
-}
-
-
 char *op_name_from_perf_id(void)
 {
 	enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
@@ -177,116 +47,6 @@ char *op_name_from_perf_id(void)
 	}
 }
 
-static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
-{
-	unsigned int i;
-
-	for (i = 0; i < num_counters; i++) {
-		struct dentry *dir;
-		char buf[4];
-
-		snprintf(buf, sizeof buf, "%d", i);
-		dir = oprofilefs_mkdir(sb, root, buf);
-		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
-		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
-		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
-		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
-		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
-		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
-	}
-
-	return 0;
-}
-
-static int oprofile_perf_setup(void)
-{
-	spin_lock(&oprofilefs_lock);
-	op_perf_setup();
-	spin_unlock(&oprofilefs_lock);
-	return 0;
-}
-
-static int oprofile_perf_start(void)
-{
-	int ret = -EBUSY;
-
-	mutex_lock(&oprofile_perf_mutex);
-	if (!oprofile_perf_enabled) {
-		ret = 0;
-		op_perf_start();
-		oprofile_perf_enabled = 1;
-	}
-	mutex_unlock(&oprofile_perf_mutex);
-	return ret;
-}
-
-static void oprofile_perf_stop(void)
-{
-	mutex_lock(&oprofile_perf_mutex);
-	if (oprofile_perf_enabled)
-		op_perf_stop();
-	oprofile_perf_enabled = 0;
-	mutex_unlock(&oprofile_perf_mutex);
-}
-
-#ifdef CONFIG_PM
-static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
-{
-	mutex_lock(&oprofile_perf_mutex);
-	if (oprofile_perf_enabled)
-		op_perf_stop();
-	mutex_unlock(&oprofile_perf_mutex);
-	return 0;
-}
-
-static int oprofile_perf_resume(struct platform_device *dev)
-{
-	mutex_lock(&oprofile_perf_mutex);
-	if (oprofile_perf_enabled && op_perf_start())
-		oprofile_perf_enabled = 0;
-	mutex_unlock(&oprofile_perf_mutex);
-	return 0;
-}
-
-static struct platform_driver oprofile_driver = {
-	.driver		= {
-		.name		= "oprofile-perf",
-	},
-	.resume		= oprofile_perf_resume,
-	.suspend	= oprofile_perf_suspend,
-};
-
-static struct platform_device *oprofile_pdev;
-
-static int __init init_driverfs(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&oprofile_driver);
-	if (ret)
-		goto out;
-
-	oprofile_pdev =	platform_device_register_simple(
-				oprofile_driver.driver.name, 0, NULL, 0);
-	if (IS_ERR(oprofile_pdev)) {
-		ret = PTR_ERR(oprofile_pdev);
-		platform_driver_unregister(&oprofile_driver);
-	}
-
-out:
-	return ret;
-}
-
-static void __exit exit_driverfs(void)
-{
-	platform_device_unregister(oprofile_pdev);
-	platform_driver_unregister(&oprofile_driver);
-}
-#else
-static int __init init_driverfs(void) { return 0; }
-#define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
-
 static int report_trace(struct stackframe *frame, void *d)
 {
 	unsigned int *depth = d;
@@ -349,66 +109,6 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 		tail = user_backtrace(tail);
 }
 
-int __init oprofile_perf_init(struct oprofile_operations *ops)
-{
-	int cpu, ret = 0;
-
-	memset(&perf_events, 0, sizeof(perf_events));
-
-	num_counters = perf_num_counters();
-	if (num_counters <= 0) {
-		pr_info("oprofile: no performance counters\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	counter_config = kcalloc(num_counters,
-			sizeof(struct op_counter_config), GFP_KERNEL);
-
-	if (!counter_config) {
-		pr_info("oprofile: failed to allocate %d "
-				"counters\n", num_counters);
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = init_driverfs();
-	if (ret)
-		goto out;
-
-	for_each_possible_cpu(cpu) {
-		perf_events[cpu] = kcalloc(num_counters,
-				sizeof(struct perf_event *), GFP_KERNEL);
-		if (!perf_events[cpu]) {
-			pr_info("oprofile: failed to allocate %d perf events "
-					"for cpu %d\n", num_counters, cpu);
-			ret = -ENOMEM;
-			goto out;
-		}
-	}
-
-	ops->create_files	= oprofile_perf_create_files;
-	ops->setup		= oprofile_perf_setup;
-	ops->start		= oprofile_perf_start;
-	ops->stop		= oprofile_perf_stop;
-	ops->shutdown		= oprofile_perf_stop;
-	ops->cpu_type		= op_name_from_perf_id();
-
-	if (!ops->cpu_type)
-		ret = -ENODEV;
-	else
-		pr_info("oprofile: using %s\n", ops->cpu_type);
-
-out:
-	if (ret) {
-		for_each_possible_cpu(cpu)
-			kfree(perf_events[cpu]);
-		kfree(counter_config);
-	}
-
-	return ret;
-}
-
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace		= arm_backtrace;
@@ -416,25 +116,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	return oprofile_perf_init(ops);
 }
 
-void __exit oprofile_perf_exit(void)
-{
-	int cpu, id;
-	struct perf_event *event;
-
-	for_each_possible_cpu(cpu) {
-		for (id = 0; id < num_counters; ++id) {
-			event = perf_events[cpu][id];
-			if (event)
-				perf_event_release_kernel(event);
-		}
-
-		kfree(perf_events[cpu]);
-	}
-
-	kfree(counter_config);
-	exit_driverfs();
-}
-
 void __exit oprofile_arch_exit(void)
 {
 	oprofile_perf_exit();
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
new file mode 100644
index 000000000000..ebb40cb87474
--- /dev/null
+++ b/drivers/oprofile/oprofile_perf.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2010 ARM Ltd.
+ *
+ * Perf-events backend for OProfile.
+ */
+#include <linux/perf_event.h>
+#include <linux/oprofile.h>
+#include <linux/slab.h>
+
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+	unsigned long count;
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long unit_mask;
+	unsigned long kernel;
+	unsigned long user;
+	struct perf_event_attr attr;
+};
+
+static int oprofile_perf_enabled;
+static DEFINE_MUTEX(oprofile_perf_mutex);
+
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+			struct perf_sample_data *data, struct pt_regs *regs)
+{
+	int id;
+	u32 cpu = smp_processor_id();
+
+	for (id = 0; id < num_counters; ++id)
+		if (perf_events[cpu][id] == event)
+			break;
+
+	if (id != num_counters)
+		oprofile_add_sample(regs, id);
+	else
+		pr_warning("oprofile: ignoring spurious overflow "
+				"on cpu %u\n", cpu);
+}
+
+/*
+ * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+	int i;
+	u32 size = sizeof(struct perf_event_attr);
+	struct perf_event_attr *attr;
+
+	for (i = 0; i < num_counters; ++i) {
+		attr = &counter_config[i].attr;
+		memset(attr, 0, size);
+		attr->type		= PERF_TYPE_RAW;
+		attr->size		= size;
+		attr->config		= counter_config[i].event;
+		attr->sample_period	= counter_config[i].count;
+		attr->pinned		= 1;
+	}
+}
+
+static int op_create_counter(int cpu, int event)
+{
+	int ret = 0;
+	struct perf_event *pevent;
+
+	if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
+		return ret;
+
+	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+						  cpu, -1,
+						  op_overflow_handler);
+
+	if (IS_ERR(pevent)) {
+		ret = PTR_ERR(pevent);
+	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_warning("oprofile: failed to enable event %d "
+				"on CPU %d\n", event, cpu);
+		ret = -EBUSY;
+	} else {
+		perf_events[cpu][event] = pevent;
+	}
+
+	return ret;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+	struct perf_event *pevent = perf_events[cpu][event];
+
+	if (pevent) {
+		perf_event_release_kernel(pevent);
+		perf_events[cpu][event] = NULL;
+	}
+}
+
+/*
+ * Called by oprofile_perf_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+	int cpu, event, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		for (event = 0; event < num_counters; ++event) {
+			ret = op_create_counter(cpu, event);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Called by oprofile_perf_stop at the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+	int cpu, event;
+
+	for_each_online_cpu(cpu)
+		for (event = 0; event < num_counters; ++event)
+			op_destroy_counter(cpu, event);
+}
+
+static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_counters; i++) {
+		struct dentry *dir;
+		char buf[4];
+
+		snprintf(buf, sizeof buf, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+	}
+
+	return 0;
+}
+
+static int oprofile_perf_setup(void)
+{
+	spin_lock(&oprofilefs_lock);
+	op_perf_setup();
+	spin_unlock(&oprofilefs_lock);
+	return 0;
+}
+
+static int oprofile_perf_start(void)
+{
+	int ret = -EBUSY;
+
+	mutex_lock(&oprofile_perf_mutex);
+	if (!oprofile_perf_enabled) {
+		ret = 0;
+		op_perf_start();
+		oprofile_perf_enabled = 1;
+	}
+	mutex_unlock(&oprofile_perf_mutex);
+	return ret;
+}
+
+static void oprofile_perf_stop(void)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled)
+		op_perf_stop();
+	oprofile_perf_enabled = 0;
+	mutex_unlock(&oprofile_perf_mutex);
+}
+
+#ifdef CONFIG_PM
+static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled)
+		op_perf_stop();
+	mutex_unlock(&oprofile_perf_mutex);
+	return 0;
+}
+
+static int oprofile_perf_resume(struct platform_device *dev)
+{
+	mutex_lock(&oprofile_perf_mutex);
+	if (oprofile_perf_enabled && op_perf_start())
+		oprofile_perf_enabled = 0;
+	mutex_unlock(&oprofile_perf_mutex);
+	return 0;
+}
+
+static struct platform_driver oprofile_driver = {
+	.driver		= {
+		.name		= "oprofile-perf",
+	},
+	.resume		= oprofile_perf_resume,
+	.suspend	= oprofile_perf_suspend,
+};
+
+static struct platform_device *oprofile_pdev;
+
+static int __init init_driverfs(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&oprofile_driver);
+	if (ret)
+		goto out;
+
+	oprofile_pdev =	platform_device_register_simple(
+				oprofile_driver.driver.name, 0, NULL, 0);
+	if (IS_ERR(oprofile_pdev)) {
+		ret = PTR_ERR(oprofile_pdev);
+		platform_driver_unregister(&oprofile_driver);
+	}
+
+out:
+	return ret;
+}
+
+static void __exit exit_driverfs(void)
+{
+	platform_device_unregister(oprofile_pdev);
+	platform_driver_unregister(&oprofile_driver);
+}
+#else
+static int __init init_driverfs(void) { return 0; }
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+int __init oprofile_perf_init(struct oprofile_operations *ops)
+{
+	int cpu, ret = 0;
+
+	memset(&perf_events, 0, sizeof(perf_events));
+
+	num_counters = perf_num_counters();
+	if (num_counters <= 0) {
+		pr_info("oprofile: no performance counters\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	counter_config = kcalloc(num_counters,
+			sizeof(struct op_counter_config), GFP_KERNEL);
+
+	if (!counter_config) {
+		pr_info("oprofile: failed to allocate %d "
+				"counters\n", num_counters);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = init_driverfs();
+	if (ret)
+		goto out;
+
+	for_each_possible_cpu(cpu) {
+		perf_events[cpu] = kcalloc(num_counters,
+				sizeof(struct perf_event *), GFP_KERNEL);
+		if (!perf_events[cpu]) {
+			pr_info("oprofile: failed to allocate %d perf events "
+					"for cpu %d\n", num_counters, cpu);
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ops->create_files	= oprofile_perf_create_files;
+	ops->setup		= oprofile_perf_setup;
+	ops->start		= oprofile_perf_start;
+	ops->stop		= oprofile_perf_stop;
+	ops->shutdown		= oprofile_perf_stop;
+	ops->cpu_type		= op_name_from_perf_id();
+
+	if (!ops->cpu_type)
+		ret = -ENODEV;
+	else
+		pr_info("oprofile: using %s\n", ops->cpu_type);
+
+out:
+	if (ret) {
+		for_each_possible_cpu(cpu)
+			kfree(perf_events[cpu]);
+		kfree(counter_config);
+	}
+
+	return ret;
+}
+
+void __exit oprofile_perf_exit(void)
+{
+	int cpu, id;
+	struct perf_event *event;
+
+	for_each_possible_cpu(cpu) {
+		for (id = 0; id < num_counters; ++id) {
+			event = perf_events[cpu][id];
+			if (event)
+				perf_event_release_kernel(event);
+		}
+
+		kfree(perf_events[cpu]);
+	}
+
+	kfree(counter_config);
+	exit_driverfs();
+}
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1574d4aca721..d67a8330b41e 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/atomic.h>
  
 /* Each escaped entry is prefixed by ESCAPE_CODE
@@ -186,6 +187,8 @@ int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 
 #ifdef CONFIG_PERF_EVENTS
+int __init oprofile_perf_init(struct oprofile_operations *ops);
+void __exit oprofile_perf_exit(void);
 char *op_name_from_perf_id(void);
 #endif /* CONFIG_PERF_EVENTS */
 
-- 
cgit v1.2.3


From 34d101dd6204bd100fc2e6f7b5f9a10f959ce2c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 09:16:57 -0700
Subject: neigh: speedup neigh_hh_init()

When a new dst is used to send a frame, neigh_resolve_output() tries to
associate an struct hh_cache to this dst, calling neigh_hh_init() with
the neigh rwlock write locked.

Most of the time, hh_cache is already known and linked into neighbour,
so we find it and increment its refcount.

This patch changes the logic so that we call neigh_hh_init() with
neighbour lock read locked only, so that fast path can be run in
parallel by concurrent cpus.

This brings part of the speedup we got with commit c7d4426a98a5f
(introduce DST_NOCACHE flag) for non cached dsts, even for cached ones,
removing one of the contention point that routers hit on multiqueue
enabled machines.

Further improvements would need to use a seqlock instead of an rwlock to
protect neigh->ha[], to not dirty neigh too often and remove two atomic
ops.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++
 net/core/dst.c            |  4 +-
 net/core/neighbour.c      | 99 +++++++++++++++++++++++++++++------------------
 3 files changed, 69 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6abcef67b178..4160db3721ba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -281,6 +281,12 @@ struct hh_cache {
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
 };
 
+static inline void hh_cache_put(struct hh_cache *hh)
+{
+	if (atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+}
+
 /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
  * Alternative is:
  *   dev->hard_header_len ? (dev->hard_header_len +
diff --git a/net/core/dst.c b/net/core/dst.c
index 6c41b1fac3db..978a1ee1f7d0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -228,8 +228,8 @@ again:
 	child = dst->child;
 
 	dst->hh = NULL;
-	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
-		kfree(hh);
+	if (hh)
+		hh_cache_put(hh);
 
 	if (neigh) {
 		dst->neighbour = NULL;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3ffafaa0414c..2044906ecd1a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -709,8 +709,7 @@ void neigh_destroy(struct neighbour *neigh)
 		write_seqlock_bh(&hh->hh_lock);
 		hh->hh_output = neigh_blackhole;
 		write_sequnlock_bh(&hh->hh_lock);
-		if (atomic_dec_and_test(&hh->hh_refcnt))
-			kfree(hh);
+		hh_cache_put(hh);
 	}
 
 	skb_queue_purge(&neigh->arp_queue);
@@ -1210,39 +1209,67 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
 }
 EXPORT_SYMBOL(neigh_event_ns);
 
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
+				   __be16 protocol)
+{
+	struct hh_cache *hh;
+
+	for (hh = n->hh; hh; hh = hh->hh_next) {
+		if (hh->hh_type == protocol) {
+			atomic_inc(&hh->hh_refcnt);
+			if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+				hh_cache_put(hh);
+			return true;
+		}
+	}
+	return false;
+}
+
+/* called with read_lock_bh(&n->lock); */
 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 			  __be16 protocol)
 {
 	struct hh_cache	*hh;
 	struct net_device *dev = dst->dev;
 
-	for (hh = n->hh; hh; hh = hh->hh_next)
-		if (hh->hh_type == protocol)
-			break;
+	if (likely(neigh_hh_lookup(n, dst, protocol)))
+		return;
 
-	if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
-		seqlock_init(&hh->hh_lock);
-		hh->hh_type = protocol;
-		atomic_set(&hh->hh_refcnt, 0);
-		hh->hh_next = NULL;
+	/* slow path */
+	hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
+	if (!hh)
+		return;
 
-		if (dev->header_ops->cache(n, hh)) {
-			kfree(hh);
-			hh = NULL;
-		} else {
-			atomic_inc(&hh->hh_refcnt);
-			hh->hh_next = n->hh;
-			n->hh	    = hh;
-			if (n->nud_state & NUD_CONNECTED)
-				hh->hh_output = n->ops->hh_output;
-			else
-				hh->hh_output = n->ops->output;
-		}
+	seqlock_init(&hh->hh_lock);
+	hh->hh_type = protocol;
+	atomic_set(&hh->hh_refcnt, 2);
+
+	if (dev->header_ops->cache(n, hh)) {
+		kfree(hh);
+		return;
 	}
-	if (hh)	{
-		atomic_inc(&hh->hh_refcnt);
-		dst->hh = hh;
+	read_unlock(&n->lock);
+	write_lock(&n->lock);
+
+	/* must check if another thread already did the insert */
+	if (neigh_hh_lookup(n, dst, protocol)) {
+		kfree(hh);
+		goto end;
 	}
+
+	if (n->nud_state & NUD_CONNECTED)
+		hh->hh_output = n->ops->hh_output;
+	else
+		hh->hh_output = n->ops->output;
+
+	hh->hh_next = n->hh;
+	n->hh	    = hh;
+
+	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+		hh_cache_put(hh);
+end:
+	write_unlock(&n->lock);
+	read_lock(&n->lock);
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
@@ -1281,21 +1308,17 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
+
+		read_lock_bh(&neigh->lock);
 		if (dev->header_ops->cache &&
 		    !dst->hh &&
-		    !(dst->flags & DST_NOCACHE)) {
-			write_lock_bh(&neigh->lock);
-			if (!dst->hh)
-				neigh_hh_init(neigh, dst, dst->ops->protocol);
-			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-					      neigh->ha, NULL, skb->len);
-			write_unlock_bh(&neigh->lock);
-		} else {
-			read_lock_bh(&neigh->lock);
-			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-					      neigh->ha, NULL, skb->len);
-			read_unlock_bh(&neigh->lock);
-		}
+		    !(dst->flags & DST_NOCACHE))
+			neigh_hh_init(neigh, dst, dst->ops->protocol);
+
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      neigh->ha, NULL, skb->len);
+		read_unlock_bh(&neigh->lock);
+
 		if (err >= 0)
 			rc = neigh->ops->queue_xmit(skb);
 		else
-- 
cgit v1.2.3


From 5a5c731aa59cc2c44ca20f45b1a577cd4f5435e2 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 7 Oct 2010 16:39:20 -0700
Subject: wireless: Set some stats used by /proc/net/wireless (wext)

Some stats for /proc/net/wireless (and wext in general) are not
being set.  This patch addresses a few of those with values easily
obtained from mac80211 core.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 4 ++++
 net/mac80211/cfg.c         | 4 +++-
 net/wireless/wext-compat.c | 4 ++++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0778d04b3bbe..f920a06f363e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -403,6 +403,7 @@ struct station_parameters {
  * @STATION_INFO_TX_PACKETS: @tx_packets filled
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
+ * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -417,6 +418,7 @@ enum station_info_flags {
 	STATION_INFO_TX_PACKETS		= 1<<9,
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
+	STATION_INFO_RX_DROP_MISC	= 1<<12,
 };
 
 /**
@@ -468,6 +470,7 @@ struct rate_info {
  * @tx_packets: packets transmitted to this station
  * @tx_retries: cumulative retry counts
  * @tx_failed: number of failed transmissions (retries exceeded, no ACK)
+ * @rx_dropped_misc:  Dropped for un-specified reason.
  * @generation: generation number for nl80211 dumps.
  *	This number should increase every time the list of stations
  *	changes, i.e. when a station is added or removed, so that
@@ -487,6 +490,7 @@ struct station_info {
 	u32 tx_packets;
 	u32 tx_retries;
 	u32 tx_failed;
+	u32 rx_dropped_misc;
 
 	int generation;
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ecf9b7166ed1..25fb351e00f8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -329,7 +329,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_PACKETS |
 			STATION_INFO_TX_RETRIES |
 			STATION_INFO_TX_FAILED |
-			STATION_INFO_TX_BITRATE;
+			STATION_INFO_TX_BITRATE |
+			STATION_INFO_RX_DROP_MISC;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
@@ -338,6 +339,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->tx_packets = sta->tx_packets;
 	sinfo->tx_retries = sta->tx_retry_count;
 	sinfo->tx_failed = sta->tx_retry_failed;
+	sinfo->rx_dropped_misc = sta->rx_dropped;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 6002265289c6..12222ee6ebf2 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1366,6 +1366,10 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	}
 
 	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
+	if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
+		wstats.discard.misc = sinfo.rx_dropped_misc;
+	if (sinfo.filled & STATION_INFO_TX_FAILED)
+		wstats.discard.retries = sinfo.tx_failed;
 
 	return &wstats;
 }
-- 
cgit v1.2.3


From 8610c29a2c9f273886b1c31ae4d92c69d4326262 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 9 Oct 2010 02:39:29 +0200
Subject: cfg80211: add channel utilization stats to the survey command

Using these, user space can calculate a relative channel utilization
with arbitrary intervals by regularly taking snapshots of the survey
results.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 15 +++++++++++++++
 include/net/cfg80211.h  | 20 ++++++++++++++++++++
 net/wireless/nl80211.c  | 15 +++++++++++++++
 3 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c08709fe36fc..0edb2566c14c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1413,6 +1413,16 @@ enum nl80211_reg_rule_flags {
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
+ * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio
+ *	spent on this channel
+ * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary
+ *	channel was sensed busy (either due to activity or energy detect)
+ * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension
+ *	channel was sensed busy
+ * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent
+ *	receiving data
+ * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent
+ *	transmitting data
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -1422,6 +1432,11 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_FREQUENCY,
 	NL80211_SURVEY_INFO_NOISE,
 	NL80211_SURVEY_INFO_IN_USE,
+	NL80211_SURVEY_INFO_CHANNEL_TIME,
+	NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
+	NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
+	NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
+	NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f920a06f363e..24d5b5869272 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -294,6 +294,11 @@ struct key_params {
  *
  * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
  * @SURVEY_INFO_IN_USE: channel is currently being used
+ * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in
+ * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in
+ * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in
+ * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in
+ * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
@@ -301,6 +306,11 @@ struct key_params {
 enum survey_info_flags {
 	SURVEY_INFO_NOISE_DBM = 1<<0,
 	SURVEY_INFO_IN_USE = 1<<1,
+	SURVEY_INFO_CHANNEL_TIME = 1<<2,
+	SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3,
+	SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4,
+	SURVEY_INFO_CHANNEL_TIME_RX = 1<<5,
+	SURVEY_INFO_CHANNEL_TIME_TX = 1<<6,
 };
 
 /**
@@ -310,6 +320,11 @@ enum survey_info_flags {
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *     optional
+ * @channel_time: amount of time in ms the radio spent on the channel
+ * @channel_time_busy: amount of time the primary channel was sensed busy
+ * @channel_time_ext_busy: amount of time the extension channel was sensed busy
+ * @channel_time_rx: amount of time the radio spent receiving data
+ * @channel_time_tx: amount of time the radio spent transmitting data
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -318,6 +333,11 @@ enum survey_info_flags {
  */
 struct survey_info {
 	struct ieee80211_channel *channel;
+	u64 channel_time;
+	u64 channel_time_busy;
+	u64 channel_time_ext_busy;
+	u64 channel_time_rx;
+	u64 channel_time_tx;
 	u32 filled;
 	s8 noise;
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 882dc921103b..c506241f8637 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3153,6 +3153,21 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
 			    survey->noise);
 	if (survey->filled & SURVEY_INFO_IN_USE)
 		NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE);
+	if (survey->filled & SURVEY_INFO_CHANNEL_TIME)
+		NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
+			    survey->channel_time);
+	if (survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
+		NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
+			    survey->channel_time_busy);
+	if (survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
+		NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
+			    survey->channel_time_ext_busy);
+	if (survey->filled & SURVEY_INFO_CHANNEL_TIME_RX)
+		NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
+			    survey->channel_time_rx);
+	if (survey->filled & SURVEY_INFO_CHANNEL_TIME_TX)
+		NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
+			    survey->channel_time_tx);
 
 	nla_nest_end(msg, infoattr);
 
-- 
cgit v1.2.3


From cfdfa4d3a0c7aa1287c61326a7714f262466157a Mon Sep 17 00:00:00 2001
From: Steve deRosier <steve@cozybit.com>
Date: Sat, 9 Oct 2010 17:23:28 -0700
Subject: mac80211: Update mesh constants to approved IEEE ANA values

This patch updates IEEE802.11 mesh constants to be consistent with newly
approved values. It modifies some values, as well as adds many new constants
in preparation for updating mesh code to the current 802.11s drafts. ANA
numbers were taken from:
https://mentor.ieee.org/802.11/dcn/09/11-09-0031-12-0000-ana-database-assigned-number-authority.xls

A few notes are in order:
1. This will break backwards compatibility with existing Linux kernels as
over-the-air constants have changed.
2. Some old and obsolete constants have been retained for now as the mesh code
itself hasn't been updated yet to the new 802.11s draft. This was desired to
keep the existing mesh scheme working until it can be updated. Adding the
approved values is the first step in updating the mesh code.
3. Obsolete constants have been clearly marked.
4. All ANA approved 802.11s constants have been added.

Signed-off-by: Steve deRosier <steve@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 71 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 97b2eae6a22c..ed5a03cbe184 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -986,6 +986,7 @@ struct ieee80211_ht_info {
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
 #define WLAN_AUTH_FT 2
+#define WLAN_AUTH_SAE 3
 #define WLAN_AUTH_LEAP 128
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
@@ -1072,6 +1073,10 @@ enum ieee80211_statuscode {
 	WLAN_STATUS_NO_DIRECT_LINK = 48,
 	WLAN_STATUS_STA_NOT_PRESENT = 49,
 	WLAN_STATUS_STA_NOT_QSTA = 50,
+	/* 802.11s */
+	WLAN_STATUS_ANTI_CLOG_REQUIRED = 76,
+	WLAN_STATUS_FCG_NOT_SUPP = 78,
+	WLAN_STATUS_STA_NO_TBTT = 78,
 };
 
 
@@ -1112,6 +1117,22 @@ enum ieee80211_reasoncode {
 	WLAN_REASON_QSTA_REQUIRE_SETUP = 38,
 	WLAN_REASON_QSTA_TIMEOUT = 39,
 	WLAN_REASON_QSTA_CIPHER_NOT_SUPP = 45,
+	/* 802.11s */
+	WLAN_REASON_MESH_PEER_CANCELED = 52,
+	WLAN_REASON_MESH_MAX_PEERS = 53,
+	WLAN_REASON_MESH_CONFIG = 54,
+	WLAN_REASON_MESH_CLOSE = 55,
+	WLAN_REASON_MESH_MAX_RETRIES = 56,
+	WLAN_REASON_MESH_CONFIRM_TIMEOUT = 57,
+	WLAN_REASON_MESH_INVALID_GTK = 58,
+	WLAN_REASON_MESH_INCONSISTENT_PARAM = 59,
+	WLAN_REASON_MESH_INVALID_SECURITY = 60,
+	WLAN_REASON_MESH_PATH_ERROR = 61,
+	WLAN_REASON_MESH_PATH_NOFORWARD = 62,
+	WLAN_REASON_MESH_PATH_DEST_UNREACHABLE = 63,
+	WLAN_REASON_MAC_EXISTS_IN_MBSS = 64,
+	WLAN_REASON_MESH_CHAN_REGULATORY = 65,
+	WLAN_REASON_MESH_CHAN = 66,
 };
 
 
@@ -1139,20 +1160,33 @@ enum ieee80211_eid {
 	WLAN_EID_TS_DELAY = 43,
 	WLAN_EID_TCLAS_PROCESSING = 44,
 	WLAN_EID_QOS_CAPA = 46,
-	/* 802.11s
-	 *
-	 * All mesh EID numbers are pending IEEE 802.11 ANA approval.
-	 * The numbers have been incremented from those suggested in
-	 * 802.11s/D2.0 so that MESH_CONFIG does not conflict with
-	 * EXT_SUPP_RATES.
+	/* 802.11s */
+	WLAN_EID_MESH_CONFIG = 113,
+	WLAN_EID_MESH_ID = 114,
+	WLAN_EID_LINK_METRIC_REPORT = 115,
+	WLAN_EID_CONGESTION_NOTIFICATION = 116,
+	/* Note that the Peer Link IE has been replaced with the similar
+	 * Peer Management IE.  We will keep the former definition until mesh
+	 * code is changed to comply with latest 802.11s drafts.
 	 */
-	WLAN_EID_MESH_CONFIG = 51,
-	WLAN_EID_MESH_ID = 52,
-	WLAN_EID_PEER_LINK = 55,
-	WLAN_EID_PREQ = 68,
-	WLAN_EID_PREP = 69,
-	WLAN_EID_PERR = 70,
-	WLAN_EID_RANN = 49,	/* compatible with FreeBSD */
+	WLAN_EID_PEER_LINK = 55,  /* no longer in 802.11s drafts */
+	WLAN_EID_PEER_MGMT = 117,
+	WLAN_EID_CHAN_SWITCH_PARAM = 118,
+	WLAN_EID_MESH_AWAKE_WINDOW = 119,
+	WLAN_EID_BEACON_TIMING = 120,
+	WLAN_EID_MCCAOP_SETUP_REQ = 121,
+	WLAN_EID_MCCAOP_SETUP_RESP = 122,
+	WLAN_EID_MCCAOP_ADVERT = 123,
+	WLAN_EID_MCCAOP_TEARDOWN = 124,
+	WLAN_EID_GANN = 125,
+	WLAN_EID_RANN = 126,
+	WLAN_EID_PREQ = 130,
+	WLAN_EID_PREP = 131,
+	WLAN_EID_PERR = 132,
+	WLAN_EID_PXU = 137,
+	WLAN_EID_PXUC = 138,
+	WLAN_EID_AUTH_MESH_PEER_EXCH = 139,
+	WLAN_EID_MIC = 140,
 
 	WLAN_EID_PWR_CONSTRAINT = 32,
 	WLAN_EID_PWR_CAPABILITY = 33,
@@ -1211,9 +1245,14 @@ enum ieee80211_category {
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
+	WLAN_CATEGORY_MESH_ACTION = 13,
+	WLAN_CATEGORY_MULTIHOP_ACTION = 14,
+	WLAN_CATEGORY_SELF_PROTECTED = 15,
 	WLAN_CATEGORY_WMM = 17,
-	WLAN_CATEGORY_MESH_PLINK = 30,		/* Pending ANA approval */
-	WLAN_CATEGORY_MESH_PATH_SEL = 32,	/* Pending ANA approval */
+	/* TODO: remove MESH_PLINK and MESH_PATH_SEL after */
+	/*       mesh is updated to current 802.11s draft  */
+	WLAN_CATEGORY_MESH_PLINK = 30,
+	WLAN_CATEGORY_MESH_PATH_SEL = 32,
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
@@ -1351,6 +1390,8 @@ enum ieee80211_sa_query_action {
 /* AKM suite selectors */
 #define WLAN_AKM_SUITE_8021X		0x000FAC01
 #define WLAN_AKM_SUITE_PSK		0x000FAC02
+#define WLAN_AKM_SUITE_SAE			0x000FAC08
+#define WLAN_AKM_SUITE_FT_OVER_SAE	0x000FAC09
 
 #define WLAN_MAX_KEY_LEN		32
 
-- 
cgit v1.2.3


From 0ed8ddf4045fcfcac36bad753dc4046118c603ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 7 Oct 2010 10:44:07 +0000
Subject: neigh: Protect neigh->ha[] with a seqlock

Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)

Dirtying takes place because of read_lock(&n->lock) and n->used writes.

Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 16 +++++++++++++++-
 net/core/neighbour.c    | 47 ++++++++++++++++++++++++++++++-----------------
 net/ipv4/arp.c          |  6 ++----
 net/sched/sch_teql.c    |  8 ++++----
 4 files changed, 51 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a4538d553704..f04e7a2522c5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -105,6 +105,7 @@ struct neighbour {
 	atomic_t		refcnt;
 	atomic_t		probes;
 	rwlock_t		lock;
+	seqlock_t		ha_lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	int			(*output)(struct sk_buff *skb);
@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh)
 
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
-	neigh->used = jiffies;
+	unsigned long now = ACCESS_ONCE(jiffies);
+	
+	if (neigh->used != now)
+		neigh->used = now;
 	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
 		return __neigh_event_send(neigh, skb);
 	return 0;
@@ -373,4 +377,14 @@ struct neighbour_cb {
 
 #define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)
 
+static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
+				     const struct net_device *dev)
+{
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&n->ha_lock);
+		memcpy(dst, n->ha, dev->addr_len);
+	} while (read_seqretry(&n->ha_lock, seq));
+}
 #endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2044906ecd1a..b165b96355bf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 
 	skb_queue_head_init(&n->arp_queue);
 	rwlock_init(&n->lock);
+	seqlock_init(&n->ha_lock);
 	n->updated	  = n->used = now;
 	n->nud_state	  = NUD_NONE;
 	n->output	  = neigh_blackhole;
@@ -1015,7 +1016,7 @@ out_unlock_bh:
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 	}
 
 	if (lladdr != neigh->ha) {
+		write_seqlock(&neigh->ha_lock);
 		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		write_sequnlock(&neigh->ha_lock);
 		neigh_update_hhs(neigh);
 		if (!(new & NUD_CONNECTED))
 			neigh->confirmed = jiffies -
@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
 {
 	struct hh_cache *hh;
 
+	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
 	for (hh = n->hh; hh; hh = hh->hh_next) {
 		if (hh->hh_type == protocol) {
 			atomic_inc(&hh->hh_refcnt);
@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 		kfree(hh);
 		return;
 	}
-	read_unlock(&n->lock);
-	write_lock(&n->lock);
+
+	write_lock_bh(&n->lock);
 
 	/* must check if another thread already did the insert */
 	if (neigh_hh_lookup(n, dst, protocol)) {
@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
 		hh->hh_output = n->ops->output;
 
 	hh->hh_next = n->hh;
+	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
 	n->hh	    = hh;
 
 	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
 		hh_cache_put(hh);
 end:
-	write_unlock(&n->lock);
-	read_lock(&n->lock);
+	write_unlock_bh(&n->lock);
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
+		unsigned int seq;
 
-		read_lock_bh(&neigh->lock);
 		if (dev->header_ops->cache &&
 		    !dst->hh &&
 		    !(dst->flags & DST_NOCACHE))
 			neigh_hh_init(neigh, dst, dst->ops->protocol);
 
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-				      neigh->ha, NULL, skb->len);
-		read_unlock_bh(&neigh->lock);
+		do {
+			seq = read_seqbegin(&neigh->ha_lock);
+			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+					      neigh->ha, NULL, skb->len);
+		} while (read_seqretry(&neigh->ha_lock, seq));
 
 		if (err >= 0)
 			rc = neigh->ops->queue_xmit(skb);
@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
+	unsigned int seq;
 
 	__skb_pull(skb, skb_network_offset(skb));
 
-	read_lock_bh(&neigh->lock);
-	err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-			      neigh->ha, NULL, skb->len);
-	read_unlock_bh(&neigh->lock);
+	do {
+		seq = read_seqbegin(&neigh->ha_lock);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      neigh->ha, NULL, skb->len);
+	} while (read_seqretry(&neigh->ha_lock, seq));
+
 	if (err >= 0)
 		err = neigh->ops->queue_xmit(skb);
 	else {
@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 
 	read_lock_bh(&neigh->lock);
 	ndm->ndm_state	 = neigh->nud_state;
-	if ((neigh->nud_state & NUD_VALID) &&
-	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
-		read_unlock_bh(&neigh->lock);
-		goto nla_put_failure;
+	if (neigh->nud_state & NUD_VALID) {
+		char haddr[MAX_ADDR_LEN];
+
+		neigh_ha_snapshot(haddr, neigh, neigh->dev);
+		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+			read_unlock_bh(&neigh->lock);
+			goto nla_put_failure;
+		}
 	}
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f35309578170..d8e540c5b071 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 
 	if (n) {
 		n->used = jiffies;
-		if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
-			read_lock_bh(&n->lock);
-			memcpy(haddr, n->ha, dev->addr_len);
-			read_unlock_bh(&n->lock);
+		if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
+			neigh_ha_snapshot(haddr, n, dev);
 			neigh_release(n);
 			return 0;
 		}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..401af9596709 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 	}
 	if (neigh_event_send(n, skb_res) == 0) {
 		int err;
+		char haddr[MAX_ADDR_LEN];
 
-		read_lock(&n->lock);
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-				      n->ha, NULL, skb->len);
-		read_unlock(&n->lock);
+		neigh_ha_snapshot(haddr, n, dev);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
+				      NULL, skb->len);
 
 		if (err < 0) {
 			neigh_release(n);
-- 
cgit v1.2.3


From fc66f95c68b6d4535a0ea2ea15d5cf626e310956 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 8 Oct 2010 06:37:34 +0000
Subject: net dst: use a percpu_counter to track entries

struct dst_ops tracks number of allocated dst in an atomic_t field,
subject to high cache line contention in stress workload.

Switch to a percpu_counter, to reduce number of time we need to dirty a
central location. Place it on a separate cache line to avoid dirtying
read only fields.

Stress test :

(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE, SLUB/NUMA)

Before:

real    0m51.179s
user    0m15.329s
sys     10m15.942s

After:

real	0m45.570s
user	0m15.525s
sys	9m56.669s

With a small reordering of struct neighbour fields, subject of a
following patch, (to separate refcnt from other read mostly fields)

real	0m41.841s
user	0m15.261s
sys	8m45.949s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_ops.h     | 37 ++++++++++++++++++++++++++++++++++++-
 net/bridge/br_netfilter.c | 11 +++++++++--
 net/core/dst.c            |  6 +++---
 net/decnet/dn_route.c     |  3 ++-
 net/ipv4/route.c          | 36 ++++++++++++++++++++++--------------
 net/ipv4/xfrm4_policy.c   |  4 ++--
 net/ipv6/route.c          | 28 ++++++++++++++++++++--------
 net/ipv6/xfrm6_policy.c   | 10 ++++++----
 8 files changed, 100 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index d1ff9b7e99b8..1fa5306e3e23 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -1,6 +1,7 @@
 #ifndef _NET_DST_OPS_H
 #define _NET_DST_OPS_H
 #include <linux/types.h>
+#include <linux/percpu_counter.h>
 
 struct dst_entry;
 struct kmem_cachep;
@@ -22,7 +23,41 @@ struct dst_ops {
 	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
 	int			(*local_out)(struct sk_buff *skb);
 
-	atomic_t		entries;
 	struct kmem_cache	*kmem_cachep;
+
+	struct percpu_counter	pcpuc_entries ____cacheline_aligned_in_smp;
 };
+
+static inline int dst_entries_get_fast(struct dst_ops *dst)
+{
+	return percpu_counter_read_positive(&dst->pcpuc_entries);
+}
+
+static inline int dst_entries_get_slow(struct dst_ops *dst)
+{
+	int res;
+
+	local_bh_disable();
+	res = percpu_counter_sum_positive(&dst->pcpuc_entries);
+	local_bh_enable();
+	return res;
+}
+
+static inline void dst_entries_add(struct dst_ops *dst, int val)
+{
+	local_bh_disable();
+	percpu_counter_add(&dst->pcpuc_entries, val);
+	local_bh_enable();
+}
+
+static inline int dst_entries_init(struct dst_ops *dst)
+{
+	return percpu_counter_init(&dst->pcpuc_entries, 0);
+}
+
+static inline void dst_entries_destroy(struct dst_ops *dst)
+{
+	percpu_counter_destroy(&dst->pcpuc_entries);
+}
+
 #endif
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 77f7b5fda45a..7f9ce9600ef3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -106,7 +106,6 @@ static struct dst_ops fake_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.update_pmtu =		fake_update_pmtu,
-	.entries =		ATOMIC_INIT(0),
 };
 
 /*
@@ -1003,15 +1002,22 @@ int __init br_netfilter_init(void)
 {
 	int ret;
 
-	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+	ret = dst_entries_init(&fake_dst_ops);
 	if (ret < 0)
 		return ret;
+
+	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+	if (ret < 0) {
+		dst_entries_destroy(&fake_dst_ops);
+		return ret;
+	}
 #ifdef CONFIG_SYSCTL
 	brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
 	if (brnf_sysctl_header == NULL) {
 		printk(KERN_WARNING
 		       "br_netfilter: can't register to sysctl.\n");
 		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+		dst_entries_destroy(&fake_dst_ops);
 		return -ENOMEM;
 	}
 #endif
@@ -1025,4 +1031,5 @@ void br_netfilter_fini(void)
 #ifdef CONFIG_SYSCTL
 	unregister_sysctl_table(brnf_sysctl_header);
 #endif
+	dst_entries_destroy(&fake_dst_ops);
 }
diff --git a/net/core/dst.c b/net/core/dst.c
index 978a1ee1f7d0..32e542d7f472 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
 {
 	struct dst_entry *dst;
 
-	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+	if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
 		if (ops->gc(ops))
 			return NULL;
 	}
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
 #if RT_CACHE_DEBUG >= 2
 	atomic_inc(&dst_total);
 #endif
-	atomic_inc(&ops->entries);
+	dst_entries_add(ops, 1);
 	return dst;
 }
 EXPORT_SYMBOL(dst_alloc);
@@ -236,7 +236,7 @@ again:
 		neigh_release(neigh);
 	}
 
-	atomic_dec(&dst->ops->entries);
+	dst_entries_add(dst->ops, -1);
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6585ea6d1182..df0f3e54ff8a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = {
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
-	.entries =		ATOMIC_INIT(0),
 };
 
 static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void)
 	dn_dst_ops.kmem_cachep =
 		kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+	dst_entries_init(&dn_dst_ops);
 	setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void)
 	dn_run_flush(0);
 
 	proc_net_remove(&init_net, "decnet_cache");
+	dst_entries_destroy(&dn_dst_ops);
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3888f6ba0a5c..0755aa4af86c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
 	.link_failure =		ipv4_link_failure,
 	.update_pmtu =		ip_rt_update_pmtu,
 	.local_out =		__ip_local_out,
-	.entries =		ATOMIC_INIT(0),
 };
 
 #define ECN_OR_COST(class)	TC_PRIO_##class
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
 
 	seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
 		   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
-		   atomic_read(&ipv4_dst_ops.entries),
+		   dst_entries_get_slow(&ipv4_dst_ops),
 		   st->in_hit,
 		   st->in_slow_tot,
 		   st->in_slow_mc,
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	struct rtable *rth, **rthp;
 	unsigned long now = jiffies;
 	int goal;
+	int entries = dst_entries_get_fast(&ipv4_dst_ops);
 
 	/*
 	 * Garbage collection is pretty expensive,
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	RT_CACHE_STAT_INC(gc_total);
 
 	if (now - last_gc < ip_rt_gc_min_interval &&
-	    atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
+	    entries < ip_rt_max_size) {
 		RT_CACHE_STAT_INC(gc_ignored);
 		goto out;
 	}
 
+	entries = dst_entries_get_slow(&ipv4_dst_ops);
 	/* Calculate number of entries, which we want to expire now. */
-	goal = atomic_read(&ipv4_dst_ops.entries) -
-		(ip_rt_gc_elasticity << rt_hash_log);
+	goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
 	if (goal <= 0) {
 		if (equilibrium < ipv4_dst_ops.gc_thresh)
 			equilibrium = ipv4_dst_ops.gc_thresh;
-		goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+		goal = entries - equilibrium;
 		if (goal > 0) {
 			equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
-			goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+			goal = entries - equilibrium;
 		}
 	} else {
 		/* We are in dangerous area. Try to reduce cache really
 		 * aggressively.
 		 */
 		goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
-		equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
+		equilibrium = entries - goal;
 	}
 
 	if (now - last_gc >= ip_rt_gc_min_interval)
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
 		expire >>= 1;
 #if RT_CACHE_DEBUG >= 2
 		printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
-				atomic_read(&ipv4_dst_ops.entries), goal, i);
+				dst_entries_get_fast(&ipv4_dst_ops), goal, i);
 #endif
 
-		if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+		if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
 			goto out;
 	} while (!in_softirq() && time_before_eq(jiffies, now));
 
-	if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+	if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
+		goto out;
+	if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
 		goto out;
 	if (net_ratelimit())
 		printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
 work_done:
 	expire += ip_rt_gc_min_interval;
 	if (expire > ip_rt_gc_timeout ||
-	    atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+	    dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
+	    dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
 		expire = ip_rt_gc_timeout;
 #if RT_CACHE_DEBUG >= 2
 	printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
-			atomic_read(&ipv4_dst_ops.entries), goal, rover);
+			dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
 #endif
 out:	return 0;
 }
@@ -2717,7 +2720,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_blackhole_dst_check,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
-	.entries		=	ATOMIC_INIT(0),
 };
 
 
@@ -3287,6 +3289,12 @@ int __init ip_rt_init(void)
 
 	ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
 
+	if (dst_entries_init(&ipv4_dst_ops) < 0)
+		panic("IP: failed to allocate ipv4_dst_ops counter\n");
+
+	if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
+		panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
+
 	rt_hash_table = (struct rt_hash_bucket *)
 		alloc_large_system_hash("IP route cache",
 					sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a580349f0b8a..4464f3bff6a7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
 
 	xfrm4_policy_afinfo.garbage_collect(net);
-	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
+	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = {
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
 	.gc_thresh =		1024,
-	.entries =		ATOMIC_INIT(0),
 };
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size)
 	 * and start cleaning when were 1/2 full
 	 */
 	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+	dst_entries_init(&xfrm4_dst_ops);
 
 	xfrm4_state_init();
 	xfrm4_policy_init();
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 17e217933885..25661f968f3f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = {
 	.link_failure		=	ip6_link_failure,
 	.update_pmtu		=	ip6_rt_update_pmtu,
 	.local_out		=	__ip6_local_out,
-	.entries		=	ATOMIC_INIT(0),
 };
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
-	.entries		=	ATOMIC_INIT(0),
 };
 
 static struct rt6_info ip6_null_entry_template = {
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+	int entries;
 
+	entries = dst_entries_get_fast(ops);
 	if (time_after(rt_last_gc + rt_min_interval, now) &&
-	    atomic_read(&ops->entries) <= rt_max_size)
+	    entries <= rt_max_size)
 		goto out;
 
 	net->ipv6.ip6_rt_gc_expire++;
 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
 	net->ipv6.ip6_rt_last_gc = now;
-	if (atomic_read(&ops->entries) < ops->gc_thresh)
+	entries = dst_entries_get_slow(ops);
+	if (entries < ops->gc_thresh)
 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
 out:
 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
-	return atomic_read(&ops->entries) > rt_max_size;
+	return entries > rt_max_size;
 }
 
 /* Clean host part of a prefix. Not necessary in radix tree,
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 		   net->ipv6.rt6_stats->fib_rt_alloc,
 		   net->ipv6.rt6_stats->fib_rt_entries,
 		   net->ipv6.rt6_stats->fib_rt_cache,
-		   atomic_read(&net->ipv6.ip6_dst_ops.entries),
+		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
 		   net->ipv6.rt6_stats->fib_discarded_routes);
 
 	return 0;
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net)
 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
 	       sizeof(net->ipv6.ip6_dst_ops));
 
+	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
+		goto out_ip6_dst_ops;
+
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
-		goto out_ip6_dst_ops;
+		goto out_ip6_dst_entries;
 	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry:
 out_ip6_null_entry:
 	kfree(net->ipv6.ip6_null_entry);
 #endif
+out_ip6_dst_entries:
+	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 out_ip6_dst_ops:
 	goto out;
 }
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void)
 	if (!ip6_dst_ops_template.kmem_cachep)
 		goto out;
 
-	ret = register_pernet_subsys(&ip6_route_net_ops);
+	ret = dst_entries_init(&ip6_dst_blackhole_ops);
 	if (ret)
 		goto out_kmem_cache;
 
+	ret = register_pernet_subsys(&ip6_route_net_ops);
+	if (ret)
+		goto out_dst_entries;
+
 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
 
 	/* Registering of the loopback is done before this portion of code,
@@ -2808,6 +2818,8 @@ out_fib6_init:
 	fib6_gc_cleanup();
 out_register_subsys:
 	unregister_pernet_subsys(&ip6_route_net_ops);
+out_dst_entries:
+	dst_entries_destroy(&ip6_dst_blackhole_ops);
 out_kmem_cache:
 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
 	goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 39676eac3a37..7e74023ea6e4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
 
 	xfrm6_policy_afinfo.garbage_collect(net);
-	return atomic_read(&ops->entries) > ops->gc_thresh * 2;
+	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = {
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
 	.gc_thresh =		1024,
-	.entries =		ATOMIC_INIT(0),
 };
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@ int __init xfrm6_init(void)
 	 */
 	gc_thresh = FIB6_TABLE_HASHSZ * 8;
 	xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+	dst_entries_init(&xfrm6_dst_ops);
 
 	ret = xfrm6_policy_init();
-	if (ret)
+	if (ret) {
+		dst_entries_destroy(&xfrm6_dst_ops);
 		goto out;
-
+	}
 	ret = xfrm6_state_init();
 	if (ret)
 		goto out_policy;
@@ -341,4 +342,5 @@ void xfrm6_fini(void)
 	//xfrm6_input_fini();
 	xfrm6_policy_fini();
 	xfrm6_state_fini();
+	dst_entries_destroy(&xfrm6_dst_ops);
 }
-- 
cgit v1.2.3


From 5a0fd09428e47fb08d5a887515d92bb2447f4b65 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 7 Oct 2010 16:24:16 +0200
Subject: IB/mlx4: Limit size of fast registration WRs

Fix the limit on the size of max fast registration WRs that can be
posted to match hardware capabilities.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c | 2 +-
 drivers/infiniband/hw/mlx4/mr.c   | 2 +-
 include/linux/mlx4/device.h       | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4e94e360e43b..62e8cd6f0371 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -135,7 +135,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
 	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
-	props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
+	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 1d27b9a8e2d6..dca55b19a6f1 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -226,7 +226,7 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
 	struct mlx4_ib_fast_reg_page_list *mfrpl;
 	int size = page_list_len * sizeof (u64);
 
-	if (size > PAGE_SIZE)
+	if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
 		return ERR_PTR(-EINVAL);
 
 	mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7a7f9c1e679a..ada69389fb91 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -171,6 +171,10 @@ enum {
 	MLX4_NUM_FEXCH          = 64 * 1024,
 };
 
+enum {
+	MLX4_MAX_FAST_REG_PAGES = 511,
+};
+
 static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 {
 	return (major << 32) | (minor << 16) | subminor;
-- 
cgit v1.2.3


From 29e29f27486ed7074df259b3eda8656bb014e9b5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 1 Oct 2010 09:15:41 +0100
Subject: ARM: 6421/1: amba-pl011: add missing ST specific registers

The ST Micro derivates have several extra interesting registers
that we may soon use for something interesting so may just as
well define them in the header.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/serial.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index e1b634b635f2..6021588ba0a8 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -32,7 +32,9 @@
 #define UART01x_RSR		0x04	/* Receive status register (Read). */
 #define UART01x_ECR		0x04	/* Error clear register (Write). */
 #define UART010_LCRH		0x08	/* Line control register, high byte. */
+#define ST_UART011_DMAWM	0x08    /* DMA watermark configure register. */
 #define UART010_LCRM		0x0C	/* Line control register, middle byte. */
+#define ST_UART011_TIMEOUT	0x0C    /* Timeout period register. */
 #define UART010_LCRL		0x10	/* Line control register, low byte. */
 #define UART010_CR		0x14	/* Control register. */
 #define UART01x_FR		0x18	/* Flag register (Read only). */
@@ -51,6 +53,15 @@
 #define UART011_MIS		0x40	/* Masked interrupt status. */
 #define UART011_ICR		0x44	/* Interrupt clear register. */
 #define UART011_DMACR		0x48	/* DMA control register. */
+#define ST_UART011_XFCR		0x50	/* XON/XOFF control register. */
+#define ST_UART011_XON1		0x54	/* XON1 register. */
+#define ST_UART011_XON2		0x58	/* XON2 register. */
+#define ST_UART011_XOFF1	0x5C	/* XON1 register. */
+#define ST_UART011_XOFF2	0x60	/* XON2 register. */
+#define ST_UART011_ITCR		0x80	/* Integration test control register. */
+#define ST_UART011_ITIP		0x84	/* Integration test input register. */
+#define ST_UART011_ABCR		0x100	/* Autobaud control register. */
+#define ST_UART011_ABIMSC	0x15C	/* Autobaud interrupt mask/clear register. */
 
 #define UART011_DR_OE		(1 << 11)
 #define UART011_DR_BE		(1 << 10)
-- 
cgit v1.2.3


From e37ef961e50d74f55e9edb48e54dd2e7963aad39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 12:20:54 +0000
Subject: neigh: reorder struct neighbour fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le mardi 12 octobre 2010 à 00:02 +0200, Eric Dumazet a écrit :
> Here is the followup patch.
>
> Thanks !
>

Oops, this was an old version, the up2date ones also took care of "used"
field.

I guess its time for a sleep, sorry again.

[PATCH net-next V2] neigh: reorder struct neighbour fields

(refcnt) and (ha_lock, ha, used, dev, output, ops, primary_key) should
be placed on a separate cache lines.

refcnt can be often written, while other fields are mostly read.

This gave me good result on stress test :

before:

real    0m45.570s
user    0m15.525s
sys     9m56.669s

After:

real    0m41.841s
user    0m15.261s
sys     8m45.949s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f04e7a2522c5..55590ab16b3e 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -94,8 +94,6 @@ struct neighbour {
 	struct neighbour __rcu	*next;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
-	struct net_device	*dev;
-	unsigned long		used;
 	unsigned long		confirmed;
 	unsigned long		updated;
 	__u8			flags;
@@ -103,16 +101,18 @@ struct neighbour {
 	__u8			type;
 	__u8			dead;
 	atomic_t		refcnt;
+	struct sk_buff_head	arp_queue;
+	struct timer_list	timer;
+	unsigned long		used;
 	atomic_t		probes;
 	rwlock_t		lock;
 	seqlock_t		ha_lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	int			(*output)(struct sk_buff *skb);
-	struct sk_buff_head	arp_queue;
-	struct timer_list	timer;
 	const struct neigh_ops	*ops;
 	struct rcu_head		rcu;
+	struct net_device	*dev;
 	u8			primary_key[0];
 };
 
-- 
cgit v1.2.3


From 7c5347733dcc4ba0bac0baf86d99fae0561f33b7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 11 Oct 2010 18:13:31 -0400
Subject: fanotify: disable fanotify syscalls

This patch disables the fanotify syscalls by just not building them and
letting the cond_syscall() statements in kernel/sys_ni.c redirect them
to sys_ni_syscall().

It was pointed out by Tvrtko Ursulin that the fanotify interface did not
include an explicit prioritization between groups.  This is necessary
for fanotify to be usable for hierarchical storage management software,
as they must get first access to the file, before inotify-like notifiers
see the file.

This feature can be added in an ABI compatible way in the next release
(by using a number of bits in the flags field to carry the info) but it
was suggested by Alan that maybe we should just hold off and do it in
the next cycle, likely with an (new) explicit argument to the syscall.
I don't like this approach best as I know people are already starting to
use the current interface, but Alan is all wise and noone on list backed
me up with just using what we have.  I feel this is needlessly ripping
the rug out from under people at the last minute, but if others think it
needs to be a new argument it might be the best way forward.

Three choices:
Go with what we got (and implement the new feature next cycle).  Add a
new field right now (and implement the new feature next cycle).  Wait
till next cycle to release the ABI (and implement the new feature next
cycle).  This is number 3.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/Kconfig    | 2 +-
 include/linux/Kbuild | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 22c629eedd82..b388443c3a09 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
-source "fs/notify/fanotify/Kconfig"
+#source "fs/notify/fanotify/Kconfig"
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429ff..4e8ea8c8ec1e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,7 +118,6 @@ header-y += eventpoll.h
 header-y += ext2_fs.h
 header-y += fadvise.h
 header-y += falloc.h
-header-y += fanotify.h
 header-y += fb.h
 header-y += fcntl.h
 header-y += fd.h
-- 
cgit v1.2.3


From e144710b302525de5b90b9c3ba43562458d8957f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Oct 2010 16:03:45 +0200
Subject: genirq: Distangle irq.h

Move irq_desc and internal functions out of irq.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h     | 292 +++---------------------------------------------
 include/linux/irqdesc.h | 171 ++++++++++++++++++++++++++++
 kernel/irq/internals.h  | 100 +++++++++++++++++
 3 files changed, 284 insertions(+), 279 deletions(-)
 create mode 100644 include/linux/irqdesc.h

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 82ed8231394a..f5827abbc034 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -80,7 +80,6 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 # define IRQ_NO_BALANCING_MASK	IRQ_NO_BALANCING
 #endif
 
-struct proc_dir_entry;
 struct msi_desc;
 
 /**
@@ -202,152 +201,36 @@ struct irq_chip {
 #endif
 };
 
-struct timer_rand_state;
-struct irq_2_iommu;
-/**
- * struct irq_desc - interrupt descriptor
- * @irq_data:		per irq and chip data passed down to chip functions
- * @timer_rand_state:	pointer to timer rand state struct
- * @kstat_irqs:		irq stats per cpu
- * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
- * @action:		the irq action chain
- * @status:		status information
- * @depth:		disable-depth, for nested irq_disable() calls
- * @wake_depth:		enable depth, for multiple set_irq_wake() callers
- * @irq_count:		stats field to detect stalled irqs
- * @last_unhandled:	aging timer for unhandled count
- * @irqs_unhandled:	stats field for spurious unhandled interrupts
- * @lock:		locking for SMP
- * @pending_mask:	pending rebalanced interrupts
- * @threads_active:	number of irqaction threads currently running
- * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
- * @dir:		/proc/irq/ procfs entry
- * @name:		flow handler name for /proc/interrupts output
- */
-struct irq_desc {
-
-#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-	struct irq_data		irq_data;
-#else
-	/*
-	 * This union will go away, once we fixed the direct access to
-	 * irq_desc all over the place. The direct fields are a 1:1
-	 * overlay of irq_data.
-	 */
-	union {
-		struct irq_data		irq_data;
-		struct {
-			unsigned int		irq;
-			unsigned int		node;
-			struct irq_chip		*chip;
-			void			*handler_data;
-			void			*chip_data;
-			struct msi_desc		*msi_desc;
-#ifdef CONFIG_SMP
-			cpumask_var_t		affinity;
-#endif
-#ifdef CONFIG_INTR_REMAP
-			struct irq_2_iommu      *irq_2_iommu;
-#endif
-		};
-	};
-#endif
-
-	struct timer_rand_state *timer_rand_state;
-	unsigned int            *kstat_irqs;
-	irq_flow_handler_t	handle_irq;
-	struct irqaction	*action;	/* IRQ action list */
-	unsigned int		status;		/* IRQ status */
-
-	unsigned int		depth;		/* nested irq disables */
-	unsigned int		wake_depth;	/* nested wake enables */
-	unsigned int		irq_count;	/* For detecting broken IRQs */
-	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
-	unsigned int		irqs_unhandled;
-	raw_spinlock_t		lock;
-#ifdef CONFIG_SMP
-	const struct cpumask	*affinity_hint;
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_var_t		pending_mask;
-#endif
-#endif
-	atomic_t		threads_active;
-	wait_queue_head_t       wait_for_threads;
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry	*dir;
-#endif
-	const char		*name;
-} ____cacheline_internodealigned_in_smp;
-
-extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int node);
-extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
-
-#ifndef CONFIG_SPARSE_IRQ
-extern struct irq_desc irq_desc[NR_IRQS];
-#endif
-
-#ifdef CONFIG_NUMA_IRQ_DESC
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
-#else
-static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
-{
-	return desc;
-}
-#endif
-
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
+/* This include will go away once we isolated irq_desc usage to core code */
+#include <linux/irqdesc.h>
 
 /*
  * Pick up the arch-dependent methods:
  */
 #include <asm/hw_irq.h>
 
+struct irqaction;
 extern int setup_irq(unsigned int irq, struct irqaction *new);
 extern void remove_irq(unsigned int irq, struct irqaction *act);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
 #ifdef CONFIG_SMP
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-
+# ifdef CONFIG_GENERIC_PENDING_IRQ
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
-
-#else /* CONFIG_GENERIC_PENDING_IRQ */
-
-static inline void move_irq(int irq)
-{
-}
-
-static inline void move_native_irq(int irq)
-{
-}
-
-static inline void move_masked_irq(int irq)
-{
-}
-
-#endif /* CONFIG_GENERIC_PENDING_IRQ */
-
-#else /* CONFIG_SMP */
-
-#define move_native_irq(x)
-#define move_masked_irq(x)
-
-#endif /* CONFIG_SMP */
+# else
+static inline void move_irq(int irq) { }
+static inline void move_native_irq(int irq) { }
+static inline void move_masked_irq(int irq) { }
+# endif
+#else
+static inline void move_native_irq(int irq) { }
+static inline void move_masked_irq(int irq) { }
+#endif
 
 extern int no_irq_affinity;
 
-static inline int irq_balancing_disabled(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	return desc->status & IRQ_NO_BALANCING_MASK;
-}
-
 /* Handle irq action chains: */
 extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
 
@@ -363,42 +246,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
-/*
- * Monolithic do_IRQ implementation.
- */
-#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-extern unsigned int __do_IRQ(unsigned int irq);
-#endif
-
-/*
- * Architectures call this to let the generic IRQ layer
- * handle an interrupt. If the descriptor is attached to an
- * irqchip-style controller then we call the ->handle_irq() handler,
- * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
- */
-static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-	desc->handle_irq(irq, desc);
-#else
-	if (likely(desc->handle_irq))
-		desc->handle_irq(irq, desc);
-	else
-		__do_IRQ(irq);
-#endif
-}
-
-static inline void generic_handle_irq(unsigned int irq)
-{
-	generic_handle_irq_desc(irq, irq_to_desc(irq));
-}
-
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   irqreturn_t action_ret);
 
-/* Resending of interrupts :*/
-void check_irq_resend(struct irq_desc *desc, unsigned int irq);
 
 /* Enable/disable irq debugging output: */
 extern int noirqdebug_setup(char *str);
@@ -421,16 +272,6 @@ extern void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name);
 
-/* caller has locked the irq_desc and both params are valid */
-static inline void __set_irq_handler_unlocked(int irq,
-					      irq_flow_handler_t handler)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->handle_irq = handler;
-}
-
 /*
  * Set a highlevel flow handler for a given IRQ:
  */
@@ -462,13 +303,6 @@ extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-/* Test to see if a driver has successfully requested an irq */
-static inline int irq_has_action(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	return desc->action != NULL;
-}
-
 /* Dynamic irq helper functions */
 extern void dynamic_irq_init(unsigned int irq);
 void dynamic_irq_init_keep_chip_data(unsigned int irq);
@@ -487,108 +321,8 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 #define get_irq_data(irq)	(irq_to_desc(irq)->irq_data.handler_data)
 #define get_irq_msi(irq)	(irq_to_desc(irq)->irq_data.msi_desc)
 
-#define get_irq_desc_chip(desc)		((desc)->irq_data.chip)
-#define get_irq_desc_chip_data(desc)	((desc)->irq_data.chip_data)
-#define get_irq_desc_data(desc)		((desc)->irq_data.handler_data)
-#define get_irq_desc_msi(desc)		((desc)->irq_data.msi_desc)
-
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
 
-#ifdef CONFIG_SMP
-/**
- * alloc_desc_masks - allocate cpumasks for irq_desc
- * @desc:	pointer to irq_desc struct
- * @node:	node which will be handling the cpumasks
- * @boot:	true if need bootmem
- *
- * Allocates affinity and pending_mask cpumask if required.
- * Returns true if successful (or not required).
- */
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-							bool boot)
-{
-	gfp_t gfp = GFP_ATOMIC;
-
-	if (boot)
-		gfp = GFP_NOWAIT;
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
-		return false;
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
-		free_cpumask_var(desc->irq_data.affinity);
-		return false;
-	}
-#endif
-#endif
-	return true;
-}
-
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-	cpumask_setall(desc->irq_data.affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc:	pointer to old irq_desc struct
- * @new_desc:	pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
-{
-	free_cpumask_var(old_desc->irq_data.affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	free_cpumask_var(old_desc->pending_mask);
-#endif
-}
-
-#else /* !CONFIG_SMP */
-
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-								bool boot)
-{
-	return true;
-}
-
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-}
-
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
-{
-}
-#endif	/* CONFIG_SMP */
-
 #endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
new file mode 100644
index 000000000000..22e426fdd301
--- /dev/null
+++ b/include/linux/irqdesc.h
@@ -0,0 +1,171 @@
+#ifndef _LINUX_IRQDESC_H
+#define _LINUX_IRQDESC_H
+
+/*
+ * Core internal functions to deal with irq descriptors
+ *
+ * This include will move to kernel/irq once we cleaned up the tree.
+ * For now it's included from <linux/irq.h>
+ */
+
+struct proc_dir_entry;
+struct timer_rand_state;
+struct irq_2_iommu;
+/**
+ * struct irq_desc - interrupt descriptor
+ * @irq_data:		per irq and chip data passed down to chip functions
+ * @timer_rand_state:	pointer to timer rand state struct
+ * @kstat_irqs:		irq stats per cpu
+ * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
+ * @action:		the irq action chain
+ * @status:		status information
+ * @depth:		disable-depth, for nested irq_disable() calls
+ * @wake_depth:		enable depth, for multiple set_irq_wake() callers
+ * @irq_count:		stats field to detect stalled irqs
+ * @last_unhandled:	aging timer for unhandled count
+ * @irqs_unhandled:	stats field for spurious unhandled interrupts
+ * @lock:		locking for SMP
+ * @pending_mask:	pending rebalanced interrupts
+ * @threads_active:	number of irqaction threads currently running
+ * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
+ * @dir:		/proc/irq/ procfs entry
+ * @name:		flow handler name for /proc/interrupts output
+ */
+struct irq_desc {
+
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
+	struct irq_data		irq_data;
+#else
+	/*
+	 * This union will go away, once we fixed the direct access to
+	 * irq_desc all over the place. The direct fields are a 1:1
+	 * overlay of irq_data.
+	 */
+	union {
+		struct irq_data		irq_data;
+		struct {
+			unsigned int		irq;
+			unsigned int		node;
+			struct irq_chip		*chip;
+			void			*handler_data;
+			void			*chip_data;
+			struct msi_desc		*msi_desc;
+#ifdef CONFIG_SMP
+			cpumask_var_t		affinity;
+#endif
+#ifdef CONFIG_INTR_REMAP
+			struct irq_2_iommu      *irq_2_iommu;
+#endif
+		};
+	};
+#endif
+
+	struct timer_rand_state *timer_rand_state;
+	unsigned int		*kstat_irqs;
+	irq_flow_handler_t	handle_irq;
+	struct irqaction	*action;	/* IRQ action list */
+	unsigned int		status;		/* IRQ status */
+
+	unsigned int		depth;		/* nested irq disables */
+	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		irq_count;	/* For detecting broken IRQs */
+	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+	unsigned int		irqs_unhandled;
+	raw_spinlock_t		lock;
+#ifdef CONFIG_SMP
+	const struct cpumask	*affinity_hint;
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_var_t		pending_mask;
+#endif
+#endif
+	atomic_t		threads_active;
+	wait_queue_head_t       wait_for_threads;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*dir;
+#endif
+	const char		*name;
+} ____cacheline_internodealigned_in_smp;
+
+extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+					struct irq_desc *desc, int node);
+extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
+
+#ifndef CONFIG_SPARSE_IRQ
+extern struct irq_desc irq_desc[NR_IRQS];
+#endif
+
+#ifdef CONFIG_NUMA_IRQ_DESC
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
+#else
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	return desc;
+}
+#endif
+
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+#define get_irq_desc_chip(desc)		((desc)->irq_data.chip)
+#define get_irq_desc_chip_data(desc)	((desc)->irq_data.chip_data)
+#define get_irq_desc_data(desc)		((desc)->irq_data.handler_data)
+#define get_irq_desc_msi(desc)		((desc)->irq_data.msi_desc)
+
+/*
+ * Monolithic do_IRQ implementation.
+ */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+extern unsigned int __do_IRQ(unsigned int irq);
+#endif
+
+/*
+ * Architectures call this to let the generic IRQ layer
+ * handle an interrupt. If the descriptor is attached to an
+ * irqchip-style controller then we call the ->handle_irq() handler,
+ * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
+ */
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+	desc->handle_irq(irq, desc);
+#else
+	if (likely(desc->handle_irq))
+		desc->handle_irq(irq, desc);
+	else
+		__do_IRQ(irq);
+#endif
+}
+
+static inline void generic_handle_irq(unsigned int irq)
+{
+	generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
+/* Test to see if a driver has successfully requested an irq */
+static inline int irq_has_action(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->action != NULL;
+}
+
+static inline int irq_balancing_disabled(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status & IRQ_NO_BALANCING_MASK;
+}
+
+/* caller has locked the irq_desc and both params are valid */
+static inline void __set_irq_handler_unlocked(int irq,
+					      irq_flow_handler_t handler)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
+}
+#endif
+
+#endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b905f0ab1bb2..e281e45fbb55 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,6 +1,7 @@
 /*
  * IRQ subsystem internal functions and variables:
  */
+#include <linux/irqdesc.h>
 
 extern int noirqdebug;
 
@@ -22,6 +23,9 @@ extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
 extern raw_spinlock_t sparse_irq_lock;
 
+/* Resending of interrupts :*/
+void check_irq_resend(struct irq_desc *desc, unsigned int irq);
+
 #ifdef CONFIG_SPARSE_IRQ
 void replace_irq_desc(unsigned int irq, struct irq_desc *desc);
 #endif
@@ -105,3 +109,99 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 
 #undef P
 
+/* Stuff below will be cleaned up after the sparse allocator is done */
+
+#ifdef CONFIG_SMP
+/**
+ * alloc_desc_masks - allocate cpumasks for irq_desc
+ * @desc:	pointer to irq_desc struct
+ * @node:	node which will be handling the cpumasks
+ * @boot:	true if need bootmem
+ *
+ * Allocates affinity and pending_mask cpumask if required.
+ * Returns true if successful (or not required).
+ */
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
+							bool boot)
+{
+	gfp_t gfp = GFP_ATOMIC;
+
+	if (boot)
+		gfp = GFP_NOWAIT;
+
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
+		return false;
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+		free_cpumask_var(desc->irq_data.affinity);
+		return false;
+	}
+#endif
+#endif
+	return true;
+}
+
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->irq_data.affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc:	pointer to old irq_desc struct
+ * @new_desc:	pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
+static inline void free_desc_masks(struct irq_desc *old_desc,
+				   struct irq_desc *new_desc)
+{
+	free_cpumask_var(old_desc->irq_data.affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	free_cpumask_var(old_desc->pending_mask);
+#endif
+}
+
+#else /* !CONFIG_SMP */
+
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
+								bool boot)
+{
+	return true;
+}
+
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+}
+
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+}
+
+static inline void free_desc_masks(struct irq_desc *old_desc,
+				   struct irq_desc *new_desc)
+{
+}
+#endif	/* CONFIG_SMP */
-- 
cgit v1.2.3


From 3a3856d00c74560a7b8d9f8a13c1ca94ee786b78 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Oct 2010 13:47:12 +0200
Subject: genirq: Remove unsused inline

move_irq() has no users. Remove it and simplify the ifdef forrest while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f5827abbc034..80fdab208c13 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -215,15 +215,9 @@ extern void remove_irq(unsigned int irq, struct irqaction *act);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifdef CONFIG_SMP
-# ifdef CONFIG_GENERIC_PENDING_IRQ
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
-# else
-static inline void move_irq(int irq) { }
-static inline void move_native_irq(int irq) { }
-static inline void move_masked_irq(int irq) { }
-# endif
 #else
 static inline void move_native_irq(int irq) { }
 static inline void move_masked_irq(int irq) { }
-- 
cgit v1.2.3


From 442471848f5abb55b99cba1229301655f67492b4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 10:40:18 +0200
Subject: genirq: Provide status modifier

Provide a irq_desc.status modifier function to cleanup the direct
access to irq_desc in arch and driver code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 27 +++++++++++++++++++++++++--
 kernel/irq/chip.c   | 26 +++++++-------------------
 2 files changed, 32 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fdab208c13..e7e7ac83edd8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -72,6 +72,10 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_ONESHOT		0x08000000	/* IRQ is not unmasked after hardirq */
 #define IRQ_NESTED_THREAD	0x10000000	/* IRQ is nested into another, no own handler thread */
 
+#define IRQF_MODIFY_MASK	\
+	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
+	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL)
+
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
 # define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
@@ -289,8 +293,27 @@ set_irq_chained_handler(unsigned int irq,
 
 extern void set_irq_nested_thread(unsigned int irq, int nest);
 
-extern void set_irq_noprobe(unsigned int irq);
-extern void set_irq_probe(unsigned int irq);
+void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
+
+static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
+{
+	irq_modify_status(irq, 0, set);
+}
+
+static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
+{
+	irq_modify_status(irq, clr, 0);
+}
+
+static inline void set_irq_noprobe(unsigned int irq)
+{
+	irq_modify_status(irq, 0, IRQ_NOPROBE);
+}
+
+static inline void set_irq_probe(unsigned int irq)
+{
+	irq_modify_status(irq, IRQ_NOPROBE, 0);
+}
 
 /* Handle dynamic irq creation and destruction */
 extern unsigned int create_irq_nr(unsigned int irq_want, int node);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 323547983f15..2b1f6906b824 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -851,32 +851,20 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 	__set_irq_handler(irq, handle, 0, name);
 }
 
-void set_irq_noprobe(unsigned int irq)
+void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 
-	if (!desc) {
-		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
+	if (!desc)
 		return;
-	}
-
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->status |= IRQ_NOPROBE;
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
 
-void set_irq_probe(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	unsigned long flags;
-
-	if (!desc) {
-		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
-		return;
-	}
+	/* Sanitize flags */
+	set &= IRQF_MODIFY_MASK;
+	clr &= IRQF_MODIFY_MASK;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->status &= ~IRQ_NOPROBE;
+	desc->status &= ~clr;
+	desc->status |= set;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
-- 
cgit v1.2.3


From f303a6dd127b5ec6de90d1cd79ed19820c7e9658 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 17:34:01 +0200
Subject: genirq: Sanitize irq_data accessors

Get the data structure from the core and provide inline wrappers to
access the irq_data members.

Provide accessor inlines for irq_data as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++++----
 kernel/irq/chip.c   |  8 +++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e7e7ac83edd8..bea40556c5a6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -85,6 +85,7 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #endif
 
 struct msi_desc;
+struct irq_2_iommu;
 
 /**
  * struct irq_data - per irq and irq chip data passed down to chip functions
@@ -332,11 +333,64 @@ extern int set_irq_data(unsigned int irq, void *data);
 extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
+extern struct irq_data *irq_get_irq_data(unsigned int irq);
 
-#define get_irq_chip(irq)	(irq_to_desc(irq)->irq_data.chip)
-#define get_irq_chip_data(irq)	(irq_to_desc(irq)->irq_data.chip_data)
-#define get_irq_data(irq)	(irq_to_desc(irq)->irq_data.handler_data)
-#define get_irq_msi(irq)	(irq_to_desc(irq)->irq_data.msi_desc)
+static inline struct irq_chip *get_irq_chip(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->chip : NULL;
+}
+
+static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
+{
+	return d->chip;
+}
+
+static inline void *get_irq_chip_data(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->chip_data : NULL;
+}
+
+static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
+{
+	return d->chip_data;
+}
+
+static inline void *get_irq_data(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->handler_data : NULL;
+}
+
+static inline void *irq_data_get_irq_data(struct irq_data *d)
+{
+	return d->handler_data;
+}
+
+static inline struct msi_desc *get_irq_msi(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->msi_desc : NULL;
+}
+
+static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
+{
+	return d->msi_desc;
+}
+
+#ifdef CONFIG_INTR_REMAP
+static inline struct irq_2_iommu *get_irq_iommu(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->irq_2_iommu : NULL;
+}
+
+static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
+{
+	return d->irq_2_iommu;
+}
+#endif
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2b1f6906b824..659be326c8e8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -256,6 +256,14 @@ int set_irq_chip_data(unsigned int irq, void *data)
 }
 EXPORT_SYMBOL(set_irq_chip_data);
 
+struct irq_data *irq_get_irq_data(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc ? &desc->irq_data : NULL;
+}
+EXPORT_SYMBOL_GPL(irq_get_irq_data);
+
 /**
  *	set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
  *
-- 
cgit v1.2.3


From 154cd387cdf0e5566ce523cbddf92dd2a062dfd6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 22 Sep 2010 15:58:45 +0200
Subject: genirq: Remove early_init_irq_lock_class()

early_init_irq_lock_class() is called way before anything touches the
irq descriptors. In case of SPARSE_IRQ=y this is a NOP operation
because the radix tree is empty at this point. For the SPARSE_IRQ=n
case it's sufficient to set the lock class in early_init_irq().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h |  8 --------
 init/main.c             |  1 -
 kernel/irq/irqdesc.c    | 11 +----------
 3 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 06aed8305bf3..17d050ce7ab8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -424,14 +424,6 @@ do {								\
 
 #endif /* CONFIG_LOCKDEP */
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-extern void early_init_irq_lock_class(void);
-#else
-static inline void early_init_irq_lock_class(void)
-{
-}
-#endif
-
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void early_boot_irqs_off(void);
 extern void early_boot_irqs_on(void);
diff --git a/init/main.c b/init/main.c
index 94ab488039aa..9684c9670b48 100644
--- a/init/main.c
+++ b/init/main.c
@@ -556,7 +556,6 @@ asmlinkage void __init start_kernel(void)
 
 	local_irq_disable();
 	early_boot_irqs_off();
-	early_init_irq_lock_class();
 
 /*
  * Interrupts are still disabled. Do necessary setups, then
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index fbf8cfa00510..0a7a0908afbc 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -231,6 +231,7 @@ int __init early_irq_init(void)
 		alloc_desc_masks(&desc[i], 0, true);
 		init_desc_masks(&desc[i]);
 		desc[i].kstat_irqs = kstat_irqs_all[i];
+		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 	}
 	return arch_early_irq_init();
 }
@@ -251,16 +252,6 @@ void clear_kstat_irqs(struct irq_desc *desc)
 	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
 }
 
-void early_init_irq_lock_class(void)
-{
-	struct irq_desc *desc;
-	int i;
-
-	for_each_irq_desc(i, desc) {
-		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	}
-}
-
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-- 
cgit v1.2.3


From 1318a481fc37c503a901b96ae06b692ca2b21af5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 27 Sep 2010 21:01:37 +0200
Subject: genirq: Provide default irq init flags

Arch code sets it's own irq_desc.status flags right after boot and for
dynamically allocated interrupts. That might involve iterating over a
huge array.

Allow ARCH_IRQ_INIT_FLAGS to set separate flags aside of IRQ_DISABLED
which is the default.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h  | 6 ++++++
 kernel/irq/chip.c    | 2 +-
 kernel/irq/irqdesc.c | 6 +++---
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index bea40556c5a6..30a300991ed4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -214,6 +214,12 @@ struct irq_chip {
  */
 #include <asm/hw_irq.h>
 
+#ifndef ARCH_IRQ_INIT_FLAGS
+# define ARCH_IRQ_INIT_FLAGS	0
+#endif
+
+#define IRQ_DEFAULT_INIT_FLAGS	(IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS)
+
 struct irqaction;
 extern int setup_irq(unsigned int irq, struct irqaction *new);
 extern void remove_irq(unsigned int irq, struct irqaction *act);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 659be326c8e8..3405761d6224 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -31,7 +31,7 @@ static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
 
 	/* Ensure we don't have left over values from a previous use of this irq */
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->status = IRQ_DISABLED;
+	desc->status = IRQ_DEFAULT_INIT_FLAGS;
 	desc->irq_data.chip = &no_irq_chip;
 	desc->handle_irq = handle_bad_irq;
 	desc->depth = 1;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 78ff426a6cb7..29963f99f24d 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(nr_irqs);
 #ifdef CONFIG_SPARSE_IRQ
 
 static struct irq_desc irq_desc_init = {
-	.status		= IRQ_DISABLED,
+	.status		= IRQ_DEFAULT_INIT_FLAGS,
 	.handle_irq	= handle_bad_irq,
 	.depth		= 1,
 	.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
@@ -113,7 +113,7 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
 
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
-		.status		= IRQ_DISABLED,
+		.status		= IRQ_DEFAULT_INIT_FLAGS,
 		.handle_irq	= handle_bad_irq,
 		.depth		= 1,
 		.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
@@ -204,7 +204,7 @@ out_unlock:
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
-		.status		= IRQ_DISABLED,
+		.status		= IRQ_DEFAULT_INIT_FLAGS,
 		.handle_irq	= handle_bad_irq,
 		.depth		= 1,
 		.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
-- 
cgit v1.2.3


From 1f5a5b87f78fade3ae48dfd55e8765d1d622ea4e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 27 Sep 2010 17:48:26 +0200
Subject: genirq: Implement a sane sparse_irq allocator

The current sparse_irq allocator has several short comings due to
failures in the design or the lack of it:

 - Requires iteration over the number of active irqs to find a free slot
   (Some architectures have grown their own workarounds for this)
 - Removal of entries is not possible
 - Racy between create_irq_nr and destroy_irq (plugged by horrible
   callbacks)
 - Migration of active irq descriptors is not possible
 - No bulk allocation of irq ranges
 - Sprinkeled irq_desc references all over the place outside of kernel/irq/
   (The previous chip functions series is addressing this issue)

Implement a sane allocator which fixes the above short comings (though
migration of active descriptors needs a full tree wide cleanup of the
direct and mostly unlocked access to irq_desc).

The new allocator still uses a radix_tree, but uses a bitmap for
keeping track of allocated irq numbers. That allows:

 - Fast lookup of a free slot
 - Allows the removal of descriptors
 - Prevents the create/destroy race
 - Bulk allocation of consecutive irq ranges
 - Basic design is ready for migration of life descriptors after
   further cleanups

The bitmap is also used in the SPARSE_IRQ=n case for lookup and
raceless (de)allocation of irq numbers. So it removes the requirement
for looping through the descriptor array to find slots.

Right now it uses sparse_irq_lock to protect the bitmap and the radix
tree, but after cleaning up all users we should be able convert that
to a mutex and to switch the radix_tree and decriptor allocations to
GFP_KERNEL.

[ Folded in a bugfix from Yinghai Lu ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h  |  23 +++++
 kernel/irq/irqdesc.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 246 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 30a300991ed4..cefacf928b33 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -398,6 +398,29 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
 }
 #endif
 
+int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
+void irq_free_descs(unsigned int irq, unsigned int cnt);
+
+static inline int irq_alloc_desc(int node)
+{
+	return irq_alloc_descs(-1, 0, 1, node);
+}
+
+static inline int irq_alloc_desc_at(unsigned int at, int node)
+{
+	return irq_alloc_descs(at, at, 1, node);
+}
+
+static inline int irq_alloc_desc_from(unsigned int from, int node)
+{
+	return irq_alloc_descs(-1, from, 1, node);
+}
+
+static inline void irq_free_desc(unsigned int irq)
+{
+	irq_free_descs(irq, 1);
+}
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 29963f99f24d..4eea48b4f576 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/radix-tree.h>
+#include <linux/bitmap.h>
 
 #include "internals.h"
 
@@ -33,9 +34,54 @@ static void __init init_irq_default_affinity(void)
 }
 #endif
 
+#ifdef CONFIG_SMP
+static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
+{
+	if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
+		return -ENOMEM;
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+		free_cpumask_var(desc->irq_data.affinity);
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void desc_smp_init(struct irq_desc *desc, int node)
+{
+	desc->node = node;
+	cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
+}
+
+#else
+static inline int
+alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
+static inline void desc_smp_init(struct irq_desc *desc, int node) { }
+#endif
+
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+{
+	desc->irq_data.irq = irq;
+	desc->irq_data.chip = &no_irq_chip;
+	desc->irq_data.chip_data = NULL;
+	desc->irq_data.handler_data = NULL;
+	desc->irq_data.msi_desc = NULL;
+	desc->status = IRQ_DEFAULT_INIT_FLAGS;
+	desc->handle_irq = handle_bad_irq;
+	desc->depth = 1;
+	desc->name = NULL;
+	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+	desc_smp_init(desc, node);
+}
+
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
+DEFINE_RAW_SPINLOCK(sparse_irq_lock);
+static DECLARE_BITMAP(allocated_irqs, NR_IRQS);
+
 #ifdef CONFIG_SPARSE_IRQ
 
 static struct irq_desc irq_desc_init = {
@@ -85,14 +131,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 	arch_init_chip_data(desc, node);
 }
 
-/*
- * Protect the sparse_irqs:
- */
-DEFINE_RAW_SPINLOCK(sparse_irq_lock);
-
 static RADIX_TREE(irq_desc_tree, GFP_ATOMIC);
 
-static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
+static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
 {
 	radix_tree_insert(&irq_desc_tree, irq, desc);
 }
@@ -111,6 +152,94 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
 		radix_tree_replace_slot(ptr, desc);
 }
 
+static void delete_irq_desc(unsigned int irq)
+{
+	radix_tree_delete(&irq_desc_tree, irq);
+}
+
+#ifdef CONFIG_SMP
+static void free_masks(struct irq_desc *desc)
+{
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	free_cpumask_var(desc->pending_mask);
+#endif
+	free_cpumask_var(desc->affinity);
+}
+#else
+static inline void free_masks(struct irq_desc *desc) { }
+#endif
+
+static struct irq_desc *alloc_desc(int irq, int node)
+{
+	struct irq_desc *desc;
+	gfp_t gfp = GFP_KERNEL;
+
+	desc = kzalloc_node(sizeof(*desc), gfp, node);
+	if (!desc)
+		return NULL;
+	/* allocate based on nr_cpu_ids */
+	desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
+					 gfp, node);
+	if (!desc->kstat_irqs)
+		goto err_desc;
+
+	if (alloc_masks(desc, gfp, node))
+		goto err_kstat;
+
+	raw_spin_lock_init(&desc->lock);
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+
+	desc_set_defaults(irq, desc, node);
+
+	return desc;
+
+err_kstat:
+	kfree(desc->kstat_irqs);
+err_desc:
+	kfree(desc);
+	return NULL;
+}
+
+static void free_desc(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+	delete_irq_desc(irq);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	free_masks(desc);
+	kfree(desc->kstat_irqs);
+	kfree(desc);
+}
+
+static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < cnt; i++) {
+		desc = alloc_desc(start + i, node);
+		if (!desc)
+			goto err;
+		raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+		irq_insert_desc(start + i, desc);
+		raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	}
+	return start;
+
+err:
+	for (i--; i >= 0; i--)
+		free_desc(start + i);
+
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+	bitmap_clear(allocated_irqs, start, cnt);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	return -ENOMEM;
+}
+
 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS_LEGACY-1] = {
 		.status		= IRQ_DEFAULT_INIT_FLAGS,
@@ -155,7 +284,7 @@ int __init early_irq_init(void)
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		alloc_desc_masks(&desc[i], node, true);
 		init_desc_masks(&desc[i]);
-		set_irq_desc(i, &desc[i]);
+		irq_insert_desc(i, &desc[i]);
 	}
 
 	return arch_early_irq_init();
@@ -192,7 +321,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 	}
 	init_one_irq_desc(irq, desc, node);
 
-	set_irq_desc(irq, desc);
+	irq_insert_desc(irq, desc);
 
 out_unlock:
 	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
@@ -245,8 +374,94 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	return irq_to_desc(irq);
 }
+
+#ifdef CONFIG_SMP
+static inline int desc_node(struct irq_desc *desc)
+{
+	return desc->irq_data.node;
+}
+#else
+static inline int desc_node(struct irq_desc *desc) { return 0; }
+#endif
+
+static void free_desc(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc_set_defaults(irq, desc, desc_node(desc));
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+{
+	return start;
+}
 #endif /* !CONFIG_SPARSE_IRQ */
 
+/* Dynamic interrupt handling */
+
+/**
+ * irq_free_descs - free irq descriptors
+ * @from:	Start of descriptor range
+ * @cnt:	Number of consecutive irqs to free
+ */
+void irq_free_descs(unsigned int from, unsigned int cnt)
+{
+	unsigned long flags;
+	int i;
+
+	if (from >= nr_irqs || (from + cnt) > nr_irqs)
+		return;
+
+	for (i = 0; i < cnt; i++)
+		free_desc(from + i);
+
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+	bitmap_clear(allocated_irqs, from, cnt);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+}
+
+/**
+ * irq_alloc_descs - allocate and initialize a range of irq descriptors
+ * @irq:	Allocate for specific irq number if irq >= 0
+ * @from:	Start the search from this irq number
+ * @cnt:	Number of consecutive irqs to allocate.
+ * @node:	Preferred node on which the irq descriptor should be allocated
+ *
+ * Returns the first irq number or error code
+ */
+int __ref
+irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+{
+	unsigned long flags;
+	int start, ret;
+
+	if (!cnt)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
+	ret = -EEXIST;
+	if (irq >=0 && start != irq)
+		goto err;
+
+	ret = -ENOMEM;
+	if (start >= nr_irqs)
+		goto err;
+
+	bitmap_set(allocated_irqs, start, cnt);
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	return alloc_descs(start, cnt, node);
+
+err:
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	return ret;
+}
+
+/* Statistics access */
 void clear_kstat_irqs(struct irq_desc *desc)
 {
 	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
-- 
cgit v1.2.3


From a98d24b71b6e229965f18dc00d28dc71cb8fe324 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 30 Sep 2010 10:45:07 +0200
Subject: genirq: Implement sane enumeration

Use the allocator bitmap to lookup active interrupts.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqnr.h |  5 +++++
 kernel/irq/irqdesc.c  | 11 +++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 7bf89bc8cbca..05aa8c23483f 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -25,6 +25,7 @@
 
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+unsigned int irq_get_next_irq(unsigned int offset);
 
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
@@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 #define irq_node(irq)	0
 #endif
 
+# define for_each_active_irq(irq)			\
+	for (irq = irq_get_next_irq(0); irq < nr_irqs;	\
+	     irq = irq_get_next_irq(irq + 1))
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 6312a2c83971..2e7e94ef64da 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -463,6 +463,17 @@ err:
 	return ret;
 }
 
+/**
+ * irq_get_next_irq - get next allocated irq number
+ * @offset:	where to start the search
+ *
+ * Returns next irq number after offset or nr_irqs if none is found.
+ */
+unsigned int irq_get_next_irq(unsigned int offset)
+{
+	return find_next_bit(allocated_irqs, nr_irqs, offset);
+}
+
 /* Statistics access */
 void clear_kstat_irqs(struct irq_desc *desc)
 {
-- 
cgit v1.2.3


From 06f6c3399e9f9ff6eafc200e80f9226c3cee0eaf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Oct 2010 12:31:46 +0200
Subject: genirq: Implement irq reservation

Mark a range of interrupts as allocated. In the SPARSE_IRQ=n case we
need this to update the bitmap for the legacy irqs so the enumerator
via irq_get_next_irq() works.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h  |  1 +
 kernel/irq/irqdesc.c | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index cefacf928b33..096b74d5d0d7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -400,6 +400,7 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
 
 int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
 void irq_free_descs(unsigned int irq, unsigned int cnt);
+int irq_reserve_irqs(unsigned int from, unsigned int cnt);
 
 static inline int irq_alloc_desc(int node)
 {
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2e7e94ef64da..35d9052901b9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -463,6 +463,32 @@ err:
 	return ret;
 }
 
+/**
+ * irq_reserve_irqs - mark irqs allocated
+ * @from:	mark from irq number
+ * @cnt:	number of irqs to mark
+ *
+ * Returns 0 on success or an appropriate error code
+ */
+int irq_reserve_irqs(unsigned int from, unsigned int cnt)
+{
+	unsigned long flags;
+	unsigned int start;
+	int ret = 0;
+
+	if (!cnt || (from + cnt) > nr_irqs)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
+	start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
+	if (start == from)
+		bitmap_set(allocated_irqs, start, cnt);
+	else
+		ret = -EEXIST;
+	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
+	return ret;
+}
+
 /**
  * irq_get_next_irq - get next allocated irq number
  * @offset:	where to start the search
-- 
cgit v1.2.3


From b683de2b3cb17bb10fa6fd4af614dc75b5749fe0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 27 Sep 2010 20:55:03 +0200
Subject: genirq: Query arch for number of early descriptors

sparse irq sets up NR_IRQS_LEGACY irq descriptors and archs then go
ahead and allocate more.

Use the unused return value of arch_probe_nr_irqs() to let the
architecture return the number of early allocations. Fix up all users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/irq.c          |  6 ++----
 arch/sh/kernel/irq.c           |  2 +-
 arch/x86/kernel/apic/io_apic.c |  2 +-
 include/linux/irq.h            |  4 ++++
 kernel/irq/irqdesc.c           | 10 +++++-----
 kernel/softirq.c               |  4 +++-
 6 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index c0d5c3b3a760..5456d11d6ae4 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -157,10 +157,8 @@ void __init init_IRQ(void)
 	struct irq_desc *desc;
 	int irq;
 
-	for (irq = 0; irq < nr_irqs; irq++) {
-		desc = irq_to_desc_alloc_node(irq, 0);
+	for (irq = 0; irq < nr_irqs; irq++)
 		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
-	}
 
 	init_arch_irq();
 }
@@ -169,7 +167,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS;
-	return 0;
+	return nr_irqs;
 }
 #endif
 
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 257de1f0692b..ae5bac39b896 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -290,7 +290,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = sh_mv.mv_nr_irqs;
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 #endif
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f1efebaf5510..5aee1d1a306d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3880,7 +3880,7 @@ int __init arch_probe_nr_irqs(void)
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 #endif
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 096b74d5d0d7..ef878823ee3b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -214,6 +214,10 @@ struct irq_chip {
  */
 #include <asm/hw_irq.h>
 
+#ifndef NR_IRQS_LEGACY
+# define NR_IRQS_LEGACY 0
+#endif
+
 #ifndef ARCH_IRQ_INIT_FLAGS
 # define ARCH_IRQ_INIT_FLAGS	0
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7cbe4f93e2fb..a1fbd1d347af 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -226,16 +226,16 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 
 int __init early_irq_init(void)
 {
-	int i, node = first_online_node;
+	int i, initcnt, node = first_online_node;
 	struct irq_desc *desc;
 
 	init_irq_default_affinity();
 
-	 /* initialize nr_irqs based on nr_cpu_ids */
-	arch_probe_nr_irqs();
-	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
+	/* Let arch update nr_irqs and return the nr of preallocated irqs */
+	initcnt = arch_probe_nr_irqs();
+	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
 
-	for (i = 0; i < NR_IRQS_LEGACY; i++) {
+	for (i = 0; i < initcnt; i++) {
 		desc = alloc_desc(i, node);
 		set_bit(i, allocated_irqs);
 		irq_insert_desc(i, desc);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b1a73a..14a7b80b2cce 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -886,9 +886,10 @@ int __init __weak early_irq_init(void)
 	return 0;
 }
 
+#ifdef CONFIG_GENERIC_HARDIRQS
 int __init __weak arch_probe_nr_irqs(void)
 {
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 
 int __init __weak arch_early_irq_init(void)
@@ -900,3 +901,4 @@ int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	return 0;
 }
+#endif
-- 
cgit v1.2.3


From 1c9db52534a2c0e9776788cd34ccc193289fc18c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 16:46:51 +0200
Subject: pci: Convert msi to new irq_chip functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Russell King <linux@arm.linux.org.uk>
---
 arch/arm/mach-iop13xx/msi.c            |  8 ++++----
 arch/ia64/kernel/msi_ia64.c            |  4 ++--
 arch/ia64/sn/kernel/msi_sn.c           |  4 ++--
 arch/powerpc/platforms/cell/axon_msi.c |  6 +++---
 arch/powerpc/platforms/pseries/xics.c  |  2 +-
 arch/powerpc/sysdev/fsl_msi.c          |  4 ++--
 arch/powerpc/sysdev/mpic_pasemi_msi.c  | 22 +++++++++++-----------
 arch/powerpc/sysdev/mpic_u3msi.c       | 18 +++++++++---------
 arch/sparc/kernel/pci_msi.c            |  8 ++++----
 arch/x86/kernel/apic/io_apic.c         |  8 ++++----
 drivers/pci/msi.c                      | 14 +++++++-------
 include/linux/msi.h                    |  5 +++--
 12 files changed, 52 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c
index f34b0ed80630..7149fcc16c8a 100644
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@@ -164,10 +164,10 @@ static void iop13xx_msi_nop(unsigned int irq)
 static struct irq_chip iop13xx_msi_chip = {
 	.name = "PCI-MSI",
 	.ack = iop13xx_msi_nop,
-	.enable = unmask_msi_irq,
-	.disable = mask_msi_irq,
-	.mask = mask_msi_irq,
-	.unmask = unmask_msi_irq,
+	.irq_enable = unmask_msi_irq,
+	.irq_disable = mask_msi_irq,
+	.irq_mask = mask_msi_irq,
+	.irq_unmask = unmask_msi_irq,
 };
 
 int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 4a746ea838ff..dbb43424704c 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -104,8 +104,8 @@ static int ia64_msi_retrigger_irq(unsigned int irq)
  */
 static struct irq_chip ia64_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= ia64_set_msi_irq_affinity,
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 0c72dd463831..a5e500f02853 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -228,8 +228,8 @@ static int sn_msi_retrigger_irq(unsigned int irq)
 
 static struct irq_chip sn_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= sn_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= sn_set_msi_irq_affinity,
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 97085530aa63..e3e379c6caa7 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
 }
 
 static struct irq_chip msic_irq_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.shutdown	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_shutdown	= mask_msi_irq,
 	.name		= "AXON-MSI",
 };
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 93834b0d8272..67e2c4bdac8f 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq)
 	 * at that level, so we do it here by hand.
 	 */
 	if (irq_to_desc(virq)->msi_desc)
-		unmask_msi_irq(virq);
+		unmask_msi_irq(irq_get_irq_data(virq));
 
 	/* unmask it */
 	xics_unmask_irq(virq);
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 87991d3abbab..bdbd896c89d8 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq)
 }
 
 static struct irq_chip fsl_msi_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= fsl_msi_end_irq,
 	.name		= "FSL-MSI",
 };
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
index 3b6a9a43718f..320ad5a9a25d 100644
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@@ -39,24 +39,24 @@
 static struct mpic *msi_mpic;
 
 
-static void mpic_pasemi_msi_mask_irq(unsigned int irq)
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq);
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_pasemi_msi_unmask_irq(unsigned int irq)
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq);
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_pasemi_msi_chip = {
-	.shutdown	= mpic_pasemi_msi_mask_irq,
-	.mask		= mpic_pasemi_msi_mask_irq,
-	.unmask		= mpic_pasemi_msi_unmask_irq,
+	.irq_shutdown	= mpic_pasemi_msi_mask_irq,
+	.irq_mask	= mpic_pasemi_msi_mask_irq,
+	.irq_unmask	= mpic_pasemi_msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index bcbfe79c704b..a2b028b4a202 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -23,22 +23,22 @@
 /* A bit ugly, can we get this from the pci_dev somehow? */
 static struct mpic *msi_mpic;
 
-static void mpic_u3msi_mask_irq(unsigned int irq)
+static void mpic_u3msi_mask_irq(struct irq_data *data)
 {
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_u3msi_unmask_irq(unsigned int irq)
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
 {
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_u3msi_chip = {
-	.shutdown	= mpic_u3msi_mask_irq,
-	.mask		= mpic_u3msi_mask_irq,
-	.unmask		= mpic_u3msi_unmask_irq,
+	.irq_shutdown	= mpic_u3msi_mask_irq,
+	.irq_mask	= mpic_u3msi_mask_irq,
+	.irq_unmask	= mpic_u3msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 548b8ca9c210..b210416ace7b 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -114,10 +114,10 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num)
 
 static struct irq_chip msi_irq = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.enable		= unmask_msi_irq,
-	.disable	= mask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_enable	= unmask_msi_irq,
+	.irq_disable	= mask_msi_irq,
 	/* XXX affinity XXX */
 };
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7556eb7a1a47..b79938ff9bde 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3441,8 +3441,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  */
 static struct irq_chip msi_chip = {
 	.name		= "PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
 	.ack		= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_msi_irq_affinity,
@@ -3452,8 +3452,8 @@ static struct irq_chip msi_chip = {
 
 static struct irq_chip msi_ir_chip = {
 	.name		= "IR-PCI-MSI",
-	.unmask		= unmask_msi_irq,
-	.mask		= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
 #ifdef CONFIG_INTR_REMAP
 	.ack		= ir_ack_apic_edge,
 #ifdef CONFIG_SMP
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 69b7be33b3a2..55e0f9378dfe 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -170,27 +170,27 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 	desc->masked = __msix_mask_irq(desc, flag);
 }
 
-static void msi_set_mask_bit(unsigned irq, u32 flag)
+static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 {
-	struct msi_desc *desc = get_irq_msi(irq);
+	struct msi_desc *desc = irq_data_get_msi(data);
 
 	if (desc->msi_attrib.is_msix) {
 		msix_mask_irq(desc, flag);
 		readl(desc->mask_base);		/* Flush write to device */
 	} else {
-		unsigned offset = irq - desc->dev->irq;
+		unsigned offset = data->irq - desc->dev->irq;
 		msi_mask_irq(desc, 1 << offset, flag << offset);
 	}
 }
 
-void mask_msi_irq(unsigned int irq)
+void mask_msi_irq(struct irq_data *data)
 {
-	msi_set_mask_bit(irq, 1);
+	msi_set_mask_bit(data, 1);
 }
 
-void unmask_msi_irq(unsigned int irq)
+void unmask_msi_irq(struct irq_data *data)
 {
-	msi_set_mask_bit(irq, 0);
+	msi_set_mask_bit(data, 0);
 }
 
 void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 91b05c171854..329d17c395a7 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -11,8 +11,9 @@ struct msi_msg {
 
 /* Helper functions */
 struct irq_desc;
-extern void mask_msi_irq(unsigned int irq);
-extern void unmask_msi_irq(unsigned int irq);
+struct irq_data;
+extern void mask_msi_irq(struct irq_data *data);
+extern void unmask_msi_irq(struct irq_data *data);
 extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
-- 
cgit v1.2.3


From 39431acb1a4c464e62471cb3058b8ffffb9244db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 19:09:51 +0200
Subject: pci: Cleanup the irq_desc mess in msi

Handing down irq_desc to msi just so that msi can access
irq_desc.irq_data.msi_desc is a pretty stupid idea. The calling code
can hand down a pointer to msi_desc so msi code does not need to know
about the irq descriptor at all.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/kernel/apic/io_apic.c |  4 ++--
 drivers/pci/msi.c              | 24 +++++++++---------------
 include/linux/msi.h            |  8 ++++----
 3 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b79938ff9bde..74bb027b517e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3383,14 +3383,14 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	cfg = desc->chip_data;
 
-	get_cached_msi_msg_desc(desc, &msg);
+	__get_cached_msi_msg(desc->irq_data.msi_desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	write_msi_msg_desc(desc, &msg);
+	__write_msi_msg(desc->irq_data.msi_desc, &msg);
 
 	return 0;
 }
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 55e0f9378dfe..5fcf5aec680f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -193,10 +193,8 @@ void unmask_msi_irq(struct irq_data *data)
 	msi_set_mask_bit(data, 0);
 }
 
-void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	BUG_ON(entry->dev->current_state != PCI_D0);
 
 	if (entry->msi_attrib.is_msix) {
@@ -227,15 +225,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	read_msi_msg_desc(desc, msg);
+	__read_msi_msg(entry, msg);
 }
 
-void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	/* Assert that the cache is valid, assuming that
 	 * valid messages are not all-zeroes. */
 	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
@@ -246,15 +242,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	get_cached_msi_msg_desc(desc, msg);
+	__get_cached_msi_msg(entry, msg);
 }
 
-void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	if (entry->dev->current_state != PCI_D0) {
 		/* Don't touch the hardware now */
 	} else if (entry->msi_attrib.is_msix) {
@@ -292,9 +286,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	write_msi_msg_desc(desc, msg);
+	__write_msi_msg(entry, msg);
 }
 
 static void free_msi_irqs(struct pci_dev *dev)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 329d17c395a7..05acced439a3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -10,13 +10,13 @@ struct msi_msg {
 };
 
 /* Helper functions */
-struct irq_desc;
 struct irq_data;
+struct msi_desc;
 extern void mask_msi_irq(struct irq_data *data);
 extern void unmask_msi_irq(struct irq_data *data);
-extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
-extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
-extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+extern void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+extern void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+extern void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
-- 
cgit v1.2.3


From 5c2837fbaa609e615ef9a1c58a4cd26ce90be35b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 17:15:11 +0200
Subject: dmar: Convert to new irq chip functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David Woodhouse <dwmw2@infradead.org>
---
 arch/ia64/kernel/msi_ia64.c    | 4 ++--
 arch/x86/kernel/apic/io_apic.c | 4 ++--
 drivers/pci/dmar.c             | 8 ++++----
 include/linux/dmar.h           | 5 +++--
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index dbb43424704c..00b19a416eab 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -160,8 +160,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
 	.ack = ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity = dmar_msi_set_affinity,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 49aa857ff004..72b253ef310e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3578,8 +3578,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
 	.irq_ack = ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity = dmar_msi_set_affinity,
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0a19708074c2..3de3a436a432 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1221,9 +1221,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 	}
 }
 
-void dmar_msi_unmask(unsigned int irq)
+void dmar_msi_unmask(struct irq_data *data)
 {
-	struct intel_iommu *iommu = get_irq_data(irq);
+	struct intel_iommu *iommu = irq_data_get_irq_data(data);
 	unsigned long flag;
 
 	/* unmask it */
@@ -1234,10 +1234,10 @@ void dmar_msi_unmask(unsigned int irq)
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
-void dmar_msi_mask(unsigned int irq)
+void dmar_msi_mask(struct irq_data *data)
 {
 	unsigned long flag;
-	struct intel_iommu *iommu = get_irq_data(irq);
+	struct intel_iommu *iommu = irq_data_get_irq_data(data);
 
 	/* mask it */
 	spin_lock_irqsave(&iommu->register_lock, flag);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index d7cecc90ed34..cb86aa1ca436 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -187,8 +187,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
-extern void dmar_msi_unmask(unsigned int irq);
-extern void dmar_msi_mask(unsigned int irq);
+struct irq_data;
+extern void dmar_msi_unmask(struct irq_data *data);
+extern void dmar_msi_mask(struct irq_data *data);
 extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From e9f7ac664bfc36685a8eb3315ec21c067d0cee36 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 17:22:09 +0200
Subject: ht: Convert to new irq_chip functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/kernel/apic/io_apic.c |  4 ++--
 drivers/pci/htirq.c            | 22 ++++++++--------------
 include/linux/htirq.h          |  5 +++--
 3 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 72b253ef310e..5579f3f5943a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3730,8 +3730,8 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 static struct irq_chip ht_irq_chip = {
 	.name		= "PCI-HT",
-	.mask		= mask_ht_irq,
-	.unmask		= unmask_ht_irq,
+	.irq_mask	= mask_ht_irq,
+	.irq_unmask	= unmask_ht_irq,
 	.irq_ack	= ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_ht_irq_affinity,
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 98abf8b91294..834842aa5bbf 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -57,28 +57,22 @@ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 	*msg = cfg->msg;
 }
 
-void mask_ht_irq(unsigned int irq)
+void mask_ht_irq(struct irq_data *data)
 {
-	struct ht_irq_cfg *cfg;
-	struct ht_irq_msg msg;
-
-	cfg = get_irq_data(irq);
+	struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
+	struct ht_irq_msg msg = cfg->msg;
 
-	msg = cfg->msg;
 	msg.address_lo |= 1;
-	write_ht_irq_msg(irq, &msg);
+	write_ht_irq_msg(data->irq, &msg);
 }
 
-void unmask_ht_irq(unsigned int irq)
+void unmask_ht_irq(struct irq_data *data)
 {
-	struct ht_irq_cfg *cfg;
-	struct ht_irq_msg msg;
-
-	cfg = get_irq_data(irq);
+	struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
+	struct ht_irq_msg msg = cfg->msg;
 
-	msg = cfg->msg;
 	msg.address_lo &= ~1;
-	write_ht_irq_msg(irq, &msg);
+	write_ht_irq_msg(data->irq, &msg);
 }
 
 /**
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index c96ea46737d0..70a1dbbf2093 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -9,8 +9,9 @@ struct ht_irq_msg {
 /* Helper functions.. */
 void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
 void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void mask_ht_irq(unsigned int irq);
-void unmask_ht_irq(unsigned int irq);
+struct irq_data;
+void mask_ht_irq(struct irq_data *data);
+void unmask_ht_irq(struct irq_data *data);
 
 /* The arch hook for getting things started */
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);
-- 
cgit v1.2.3


From d0ad63927c6d4d511e172c78ba4a623539ef6901 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Oct 2010 18:41:37 +0200
Subject: pci: intr_remap: Remove unused functions

No users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/intr_remapping.c | 47 --------------------------------------------
 include/linux/dmar.h         |  2 --
 2 files changed, 49 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index cb6252988546..343f7299c783 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -275,28 +275,6 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	return 0;
 }
 
-int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
-{
-	struct irq_2_iommu *irq_iommu;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		return -1;
-	}
-
-	irq_iommu->iommu = NULL;
-	irq_iommu->irte_index = 0;
-	irq_iommu->sub_handle = 0;
-	irq_2_iommu(irq)->irte_mask = 0;
-
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
-	return 0;
-}
-
 int modify_irte(int irq, struct irte *irte_modified)
 {
 	int rc;
@@ -328,31 +306,6 @@ int modify_irte(int irq, struct irte *irte_modified)
 	return rc;
 }
 
-int flush_irte(int irq)
-{
-	int rc;
-	int index;
-	struct intel_iommu *iommu;
-	struct irq_2_iommu *irq_iommu;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		return -1;
-	}
-
-	iommu = irq_iommu->iommu;
-
-	index = irq_iommu->irte_index + irq_iommu->sub_handle;
-
-	rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
-	return rc;
-}
-
 struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 {
 	int i;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index cb86aa1ca436..200439ec7c49 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -119,8 +119,6 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
 extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
    			u16 sub_handle);
 extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
-extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
-extern int flush_irte(int irq);
 extern int free_irte(int irq);
 
 extern int irq_remapped(int irq);
-- 
cgit v1.2.3


From 423f085952fd7253407cb92984cc2d495a564481 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 10 Oct 2010 11:39:09 +0200
Subject: x86: Embedd irq_2_iommu into irq_cfg

That interrupt remapping code is x86 specific and tied to the io_apic
code. No need for separate allocator functions in the interrupt
remapping code. This allows to simplify the code and irq_2_iommu is
small (13 bytes on 64bit) so it's not a real problem even if interrupt
remapping is runtime disabled. If it's compile time disabled the
impact is zero.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/include/asm/hw_irq.h | 10 ++++++++++
 drivers/pci/intr_remapping.c  |  7 -------
 include/linux/dmar.h          |  1 +
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 76848f27b1ac..e756c4bfed94 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 	irq_attr->polarity	= polarity;
 }
 
+struct irq_2_iommu {
+	struct intel_iommu *iommu;
+	u16 irte_index;
+	u16 sub_handle;
+	u8  irte_mask;
+};
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -89,6 +96,9 @@ struct irq_cfg {
 	cpumask_var_t		old_domain;
 	u8			vector;
 	u8			move_in_progress : 1;
+#ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu	irq_2_iommu;
+#endif
 };
 
 extern struct irq_cfg *irq_cfg(unsigned int);
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 343f7299c783..0f4691c5fab3 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -46,13 +46,6 @@ static __init int setup_intremap(char *str)
 }
 early_param("intremap", setup_intremap);
 
-struct irq_2_iommu {
-	struct intel_iommu *iommu;
-	u16 irte_index;
-	u16 sub_handle;
-	u8  irte_mask;
-};
-
 #ifdef CONFIG_GENERIC_HARDIRQS
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 200439ec7c49..4475f8cf7a62 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -106,6 +106,7 @@ struct irte {
 		__u64 high;
 	};
 };
+
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
 extern int intr_remapping_supported(void);
-- 
cgit v1.2.3


From 1a0730d6649113c820217387a011a17dd4aff3ad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 11 Oct 2010 11:55:37 +0200
Subject: x86: Speed up the irq_remapped check in hot pathes

irq_2_iommu is in struct irq_cfg, so we can do the irq_remapped check
based on irq_cfg instead of going through a lookup function. That's
especially interesting in the eoi_ioapic_irq() hotpath.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/include/asm/irq_remapping.h |  8 ++++++++
 arch/x86/kernel/apic/io_apic.c       | 12 ++++++------
 drivers/pci/intr_remapping.c         |  7 -------
 include/linux/dmar.h                 |  2 --
 4 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 8d841505344e..1c23360fb2d8 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -24,10 +24,18 @@ static inline void prepare_irte(struct irte *irte, int vector,
 	irte->dest_id = IRTE_DEST(dest);
 	irte->redir_hint = 1;
 }
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return cfg->irq_2_iommu.iommu != NULL;
+}
 #else
 static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
 {
 }
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return false;
+}
 #endif
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ff6bb883c58..1b8e8a106120 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1237,7 +1237,7 @@ static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
 	else
 		irq_clear_status_flags(irq, IRQ_LEVEL);
 
-	if (irq_remapped(irq)) {
+	if (irq_remapped(get_irq_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 		if (trigger)
 			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
@@ -2183,7 +2183,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
-		if (!irq_remapped(irq))
+		if (!irq_remapped(cfg))
 			io_apic_write(apic, 0x11 + pin*2, dest);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2415,7 +2415,7 @@ static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 			 * intr-remapping table entry. Hence for the io-apic
 			 * EOI we use the pin number.
 			 */
-			if (irq_remapped(irq))
+			if (irq_remapped(cfg))
 				io_apic_eoi(entry->apic, entry->pin);
 			else
 				io_apic_eoi(entry->apic, cfg->vector);
@@ -3139,7 +3139,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
 
-	if (irq_remapped(irq)) {
+	if (irq_remapped(get_irq_chip_data(irq))) {
 		struct irte irte;
 		int ir_index;
 		u16 sub_handle;
@@ -3321,7 +3321,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
-	if (irq_remapped(irq)) {
+	if (irq_remapped(get_irq_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
 	} else
@@ -3522,7 +3522,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 
 	hpet_msi_write(get_irq_data(irq), &msg);
 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	if (irq_remapped(irq))
+	if (irq_remapped(get_irq_chip_data(irq)))
 		set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
 					      handle_edge_irq, "edge");
 	else
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index a620b8bd8f4b..ec87cd66f3eb 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -54,13 +54,6 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
-int irq_remapped(int irq)
-{
-	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-
-	return irq_iommu ? irq_iommu->iommu != NULL : 0;
-}
-
 int get_irte(int irq, struct irte *entry)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 4475f8cf7a62..51651b76d40f 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -122,7 +122,6 @@ extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
 extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
 extern int free_irte(int irq);
 
-extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
 extern struct intel_iommu *map_hpet_to_ir(u8 id);
@@ -176,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 	return 0;
 }
 
-#define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
 #define disable_intr_remapping()	(0)
 #define reenable_intr_remapping(mode)	(0)
-- 
cgit v1.2.3


From 10ba1e0eeef6a3c9453d96364e28cb4d911e1ac3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 11 Oct 2010 12:21:18 +0200
Subject: genirq: Remove irq_2_iommu

irq_2_iommu is now in the x86 code where it belongs. Remove all
leftovers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/irq.h     | 18 ------------------
 include/linux/irqdesc.h |  4 ----
 2 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ef878823ee3b..49702b22883e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -85,7 +85,6 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #endif
 
 struct msi_desc;
-struct irq_2_iommu;
 
 /**
  * struct irq_data - per irq and irq chip data passed down to chip functions
@@ -97,7 +96,6 @@ struct irq_2_iommu;
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
  * @affinity:		IRQ affinity on SMP
- * @irq_2_iommu:	iommu with this irq
  *
  * The fields here need to overlay the ones in irq_desc until we
  * cleaned up the direct references and switched everything over to
@@ -113,9 +111,6 @@ struct irq_data {
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 #endif
-#ifdef CONFIG_INTR_REMAP
-	struct irq_2_iommu      *irq_2_iommu;
-#endif
 };
 
 /**
@@ -389,19 +384,6 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
 	return d->msi_desc;
 }
 
-#ifdef CONFIG_INTR_REMAP
-static inline struct irq_2_iommu *get_irq_iommu(unsigned int irq)
-{
-	struct irq_data *d = irq_get_irq_data(irq);
-	return d ? d->irq_2_iommu : NULL;
-}
-
-static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
-{
-	return d->irq_2_iommu;
-}
-#endif
-
 int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
 void irq_free_descs(unsigned int irq, unsigned int cnt);
 int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 22e426fdd301..f77dc5618d7e 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -10,7 +10,6 @@
 
 struct proc_dir_entry;
 struct timer_rand_state;
-struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  * @irq_data:		per irq and chip data passed down to chip functions
@@ -52,9 +51,6 @@ struct irq_desc {
 			struct msi_desc		*msi_desc;
 #ifdef CONFIG_SMP
 			cpumask_var_t		affinity;
-#endif
-#ifdef CONFIG_INTR_REMAP
-			struct irq_2_iommu      *irq_2_iommu;
 #endif
 		};
 	};
-- 
cgit v1.2.3


From b7d0d8258a9f71949b810e0f82a3d75088f4d364 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 29 Sep 2010 18:44:23 +0200
Subject: genirq: Remove arch_init_chip_data()

This function should have not been there in the first place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h | 3 ---
 kernel/irq/irqdesc.c      | 2 --
 kernel/softirq.c          | 5 -----
 3 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..19988983aeac 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -641,11 +641,8 @@ static inline void init_irq_proc(void)
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 
-struct irq_desc;
-
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1fbd1d347af..6c71f8ea5d7d 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -197,8 +197,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node)
 		desc = alloc_desc(start + i, node);
 		if (!desc)
 			goto err;
-		/* temporary until I fixed x86 madness */
-		arch_init_chip_data(desc, node);
 		raw_spin_lock_irqsave(&sparse_irq_lock, flags);
 		irq_insert_desc(start + i, desc);
 		raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 14a7b80b2cce..d19b1c9aa7c5 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -896,9 +896,4 @@ int __init __weak arch_early_irq_init(void)
 {
 	return 0;
 }
-
-int __weak arch_init_chip_data(struct irq_desc *desc, int node)
-{
-	return 0;
-}
 #endif
-- 
cgit v1.2.3


From b7b29338dc7111ed8bd4d6555d84afae13ebe752 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 29 Sep 2010 18:46:55 +0200
Subject: genirq: Sanitize dynamic irq handling

Use the cleanup functions of the dynamic allocator. No need to have
separate implementations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h    |  12 ++++--
 kernel/irq/chip.c      | 102 -------------------------------------------------
 kernel/irq/internals.h |   1 -
 kernel/irq/irqdesc.c   |  41 +++++++++++---------
 4 files changed, 31 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 49702b22883e..e9639115dff1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -326,11 +326,15 @@ extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-/* Dynamic irq helper functions */
-extern void dynamic_irq_init(unsigned int irq);
-void dynamic_irq_init_keep_chip_data(unsigned int irq);
+/*
+ * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
+ * irq_free_desc instead.
+ */
 extern void dynamic_irq_cleanup(unsigned int irq);
-void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
+static inline void dynamic_irq_init(unsigned int irq)
+{
+	dynamic_irq_cleanup(irq);
+}
 
 /* Set/get chip/data for an IRQ: */
 extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3405761d6224..baa5c4acad83 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -18,108 +18,6 @@
 
 #include "internals.h"
 
-static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
-{
-	struct irq_desc *desc;
-	unsigned long flags;
-
-	desc = irq_to_desc(irq);
-	if (!desc) {
-		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
-		return;
-	}
-
-	/* Ensure we don't have left over values from a previous use of this irq */
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc->status = IRQ_DEFAULT_INIT_FLAGS;
-	desc->irq_data.chip = &no_irq_chip;
-	desc->handle_irq = handle_bad_irq;
-	desc->depth = 1;
-	desc->irq_data.msi_desc = NULL;
-	desc->irq_data.handler_data = NULL;
-	if (!keep_chip_data)
-		desc->irq_data.chip_data = NULL;
-	desc->action = NULL;
-	desc->irq_count = 0;
-	desc->irqs_unhandled = 0;
-#ifdef CONFIG_SMP
-	cpumask_setall(desc->irq_data.affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
-#endif
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	dynamic_irq_init - initialize a dynamically allocated irq
- *	@irq:	irq number to initialize
- */
-void dynamic_irq_init(unsigned int irq)
-{
-	dynamic_irq_init_x(irq, false);
-}
-
-/**
- *	dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
- *	@irq:	irq number to initialize
- *
- *	does not set irq_to_desc(irq)->irq_data.chip_data to NULL
- */
-void dynamic_irq_init_keep_chip_data(unsigned int irq)
-{
-	dynamic_irq_init_x(irq, true);
-}
-
-static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	unsigned long flags;
-
-	if (!desc) {
-		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
-		return;
-	}
-
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	if (desc->action) {
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-		WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
-			irq);
-		return;
-	}
-	desc->irq_data.msi_desc = NULL;
-	desc->irq_data.handler_data = NULL;
-	if (!keep_chip_data)
-		desc->irq_data.chip_data = NULL;
-	desc->handle_irq = handle_bad_irq;
-	desc->irq_data.chip = &no_irq_chip;
-	desc->name = NULL;
-	clear_kstat_irqs(desc);
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
- *	@irq:	irq number to initialize
- */
-void dynamic_irq_cleanup(unsigned int irq)
-{
-	dynamic_irq_cleanup_x(irq, false);
-}
-
-/**
- *	dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
- *	@irq:	irq number to initialize
- *
- *	does not set irq_to_desc(irq)->irq_data.chip_data to NULL
- */
-void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
-{
-	dynamic_irq_cleanup_x(irq, true);
-}
-
-
 /**
  *	set_irq_chip - set the irq chip for an irq
  *	@irq:	irq number
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 8eb01e379ccc..f444203a772d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -20,7 +20,6 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
 extern struct lock_class_key irq_desc_lock_class;
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
-extern void clear_kstat_irqs(struct irq_desc *desc);
 extern raw_spinlock_t sparse_irq_lock;
 
 /* Resending of interrupts :*/
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 6c71f8ea5d7d..c9d5a1c12874 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -53,12 +53,21 @@ static void desc_smp_init(struct irq_desc *desc, int node)
 {
 	desc->irq_data.node = node;
 	cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
+static inline int desc_node(struct irq_desc *desc)
+{
+	return desc->irq_data.node;
 }
 
 #else
 static inline int
 alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
 static inline void desc_smp_init(struct irq_desc *desc, int node) { }
+static inline int desc_node(struct irq_desc *desc) { return 0; }
 #endif
 
 static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
@@ -71,6 +80,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
 	desc->status = IRQ_DEFAULT_INIT_FLAGS;
 	desc->handle_irq = handle_bad_irq;
 	desc->depth = 1;
+	desc->irq_count = 0;
+	desc->irqs_unhandled = 0;
 	desc->name = NULL;
 	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
 	desc_smp_init(desc, node);
@@ -286,23 +297,9 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 	return irq_to_desc(irq);
 }
 
-#ifdef CONFIG_SMP
-static inline int desc_node(struct irq_desc *desc)
-{
-	return desc->irq_data.node;
-}
-#else
-static inline int desc_node(struct irq_desc *desc) { return 0; }
-#endif
-
 static void free_desc(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc_set_defaults(irq, desc, desc_node(desc));
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
+	dynamic_irq_cleanup(irq);
 }
 
 static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
@@ -409,10 +406,18 @@ unsigned int irq_get_next_irq(unsigned int offset)
 	return find_next_bit(allocated_irqs, nr_irqs, offset);
 }
 
-/* Statistics access */
-void clear_kstat_irqs(struct irq_desc *desc)
+/**
+ * dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ * @irq:	irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
 {
-	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc_set_defaults(irq, desc, desc_node(desc));
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-- 
cgit v1.2.3


From 78f90d91f395cd0dc1ef3f21e0c5cd6fd50d202c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 29 Sep 2010 17:18:47 +0200
Subject: genirq: Remove the now unused sparse irq leftovers

The move_irq_desc() function was only used due to the problem that the
allocator did not free the old descriptors. So the descriptors had to
be moved in create_irq_nr(). That's history.

The code would have never been able to move active interrupt
descriptors on affinity settings. That can be done in a completely
different way w/o all this horror.

Remove all of it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqdesc.h   |  12 +----
 kernel/irq/Kconfig        |   5 --
 kernel/irq/Makefile       |   1 -
 kernel/irq/internals.h    | 102 ---------------------------------------
 kernel/irq/irqdesc.c      |  30 +-----------
 kernel/irq/numa_migrate.c | 120 ----------------------------------------------
 6 files changed, 4 insertions(+), 266 deletions(-)
 delete mode 100644 kernel/irq/numa_migrate.c

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index f77dc5618d7e..979c68cc7458 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -82,24 +82,16 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int node);
-extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
-
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
-#ifdef CONFIG_NUMA_IRQ_DESC
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
-#else
+/* Will be removed once the last users in power and sh are gone */
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
 	return desc;
 }
-#endif
-
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index a42c0191d71a..31d766bf5d2e 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -26,11 +26,6 @@ config GENERIC_IRQ_PROBE
 config GENERIC_PENDING_IRQ
 	def_bool n
 
-if SPARSE_IRQ && NUMA
-config NUMA_IRQ_DESC
-	def_bool n
-endif
-
 config AUTO_IRQ_AFFINITY
        def_bool n
 
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 1eaab0da56db..54329cd7b3ee 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,4 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devr
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index f444203a772d..4571ae7e085a 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -18,17 +18,11 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
-extern struct lock_class_key irq_desc_lock_class;
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
-extern raw_spinlock_t sparse_irq_lock;
 
 /* Resending of interrupts :*/
 void check_irq_resend(struct irq_desc *desc, unsigned int irq);
 
-#ifdef CONFIG_SPARSE_IRQ
-void replace_irq_desc(unsigned int irq, struct irq_desc *desc);
-#endif
-
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -110,99 +104,3 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 
 #undef P
 
-/* Stuff below will be cleaned up after the sparse allocator is done */
-
-#ifdef CONFIG_SMP
-/**
- * alloc_desc_masks - allocate cpumasks for irq_desc
- * @desc:	pointer to irq_desc struct
- * @node:	node which will be handling the cpumasks
- * @boot:	true if need bootmem
- *
- * Allocates affinity and pending_mask cpumask if required.
- * Returns true if successful (or not required).
- */
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-							bool boot)
-{
-	gfp_t gfp = GFP_ATOMIC;
-
-	if (boot)
-		gfp = GFP_NOWAIT;
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
-		return false;
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
-		free_cpumask_var(desc->irq_data.affinity);
-		return false;
-	}
-#endif
-#endif
-	return true;
-}
-
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-	cpumask_setall(desc->irq_data.affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc:	pointer to old irq_desc struct
- * @new_desc:	pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->irq_data.affinity, old_desc->irq_data.affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
-{
-	free_cpumask_var(old_desc->irq_data.affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	free_cpumask_var(old_desc->pending_mask);
-#endif
-}
-
-#else /* !CONFIG_SMP */
-
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-								bool boot)
-{
-	return true;
-}
-
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-}
-
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
-{
-}
-#endif	/* CONFIG_SMP */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index c9d5a1c12874..4f0b9c9d5c46 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -20,7 +20,7 @@
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
-struct lock_class_key irq_desc_lock_class;
+static struct lock_class_key irq_desc_lock_class;
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 static void __init init_irq_default_affinity(void)
@@ -90,28 +90,11 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
-DEFINE_RAW_SPINLOCK(sparse_irq_lock);
+static DEFINE_RAW_SPINLOCK(sparse_irq_lock);
 static DECLARE_BITMAP(allocated_irqs, NR_IRQS);
 
 #ifdef CONFIG_SPARSE_IRQ
 
-void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
-{
-	void *ptr;
-
-	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
-			   GFP_ATOMIC, node);
-
-	/*
-	 * don't overwite if can not get new one
-	 * init_copy_kstat_irqs() could still use old one
-	 */
-	if (ptr) {
-		printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
-		desc->kstat_irqs = ptr;
-	}
-}
-
 static RADIX_TREE(irq_desc_tree, GFP_ATOMIC);
 
 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -124,15 +107,6 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return radix_tree_lookup(&irq_desc_tree, irq);
 }
 
-void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
-{
-	void **ptr;
-
-	ptr = radix_tree_lookup_slot(&irq_desc_tree, irq);
-	if (ptr)
-		radix_tree_replace_slot(ptr, desc);
-}
-
 static void delete_irq_desc(unsigned int irq)
 {
 	radix_tree_delete(&irq_desc_tree, irq);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
deleted file mode 100644
index e7f1f16402c1..000000000000
--- a/kernel/irq/numa_migrate.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * NUMA irq-desc migration code
- *
- * Migrate IRQ data structures (irq_desc, chip_data, etc.) over to
- * the new "home node" of the IRQ.
- */
-
-#include <linux/irq.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/random.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-
-#include "internals.h"
-
-static void init_copy_kstat_irqs(struct irq_desc *old_desc,
-				 struct irq_desc *desc,
-				 int node, int nr)
-{
-	init_kstat_irqs(desc, node, nr);
-
-	if (desc->kstat_irqs != old_desc->kstat_irqs)
-		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
-			 nr * sizeof(*desc->kstat_irqs));
-}
-
-static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
-{
-	if (old_desc->kstat_irqs == desc->kstat_irqs)
-		return;
-
-	kfree(old_desc->kstat_irqs);
-	old_desc->kstat_irqs = NULL;
-}
-
-static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
-		 struct irq_desc *desc, int node)
-{
-	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!alloc_desc_masks(desc, node, false)) {
-		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
-				"for migration.\n", irq);
-		return false;
-	}
-	raw_spin_lock_init(&desc->lock);
-	desc->irq_data.node = node;
-	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
-	init_copy_desc_masks(old_desc, desc);
-	arch_init_copy_chip_data(old_desc, desc, node);
-	return true;
-}
-
-static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
-{
-	free_kstat_irqs(old_desc, desc);
-	free_desc_masks(old_desc, desc);
-	arch_free_chip_data(old_desc, desc);
-}
-
-static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
-						int node)
-{
-	struct irq_desc *desc;
-	unsigned int irq;
-	unsigned long flags;
-
-	irq = old_desc->irq_data.irq;
-
-	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
-
-	/* We have to check it to avoid races with another CPU */
-	desc = irq_to_desc(irq);
-
-	if (desc && old_desc != desc)
-		goto out_unlock;
-
-	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	if (!desc) {
-		printk(KERN_ERR "irq %d: can not get new irq_desc "
-				"for migration.\n", irq);
-		/* still use old one */
-		desc = old_desc;
-		goto out_unlock;
-	}
-	if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
-		/* still use old one */
-		kfree(desc);
-		desc = old_desc;
-		goto out_unlock;
-	}
-
-	replace_irq_desc(irq, desc);
-	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
-
-	/* free the old one */
-	free_one_irq_desc(old_desc, desc);
-	kfree(old_desc);
-
-	return desc;
-
-out_unlock:
-	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
-
-	return desc;
-}
-
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
-{
-	/* those static or target node is -1, do not move them */
-	if (desc->irq_data.irq < NR_IRQS_LEGACY || node == -1)
-		return desc;
-
-	if (desc->irq_data.node != node)
-		desc = __real_move_irq_desc(desc, node);
-
-	return desc;
-}
-
-- 
cgit v1.2.3


From 8f1e1742233cd1c3444dfc6c945a2efb2814e157 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 9 Aug 2010 17:38:10 -0400
Subject: Bluetooth: HCI devices are either BR/EDR or AMP radios

HCI transport drivers may not know what type of radio an AMP device has
so only say whether they're BR/EDR or AMP devices.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 drivers/bluetooth/btmrvl_main.c | 4 ++--
 include/net/bluetooth/hci.h     | 2 +-
 net/bluetooth/hci_sysfs.c       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index 0d32ec82e9bf..548d1d9e4dda 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -117,8 +117,8 @@ int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb)
 				(event->data[2] == MODULE_ALREADY_UP)) ?
 				"Bring-up succeed" : "Bring-up failed");
 
-			if (event->length > 3)
-				priv->btmrvl_dev.dev_type = event->data[3];
+			if (event->length > 3 && event->data[3])
+				priv->btmrvl_dev.dev_type = HCI_AMP;
 			else
 				priv->btmrvl_dev.dev_type = HCI_BREDR;
 
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index bcbdd6d4e6dd..e30e00834340 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -54,7 +54,7 @@
 
 /* HCI controller types */
 #define HCI_BREDR	0x00
-#define HCI_80211	0x01
+#define HCI_AMP		0x01
 
 /* HCI device quirks */
 enum {
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 8fb967beee80..1a9f0db027da 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -196,8 +196,8 @@ static inline char *host_typetostr(int type)
 	switch (type) {
 	case HCI_BREDR:
 		return "BR/EDR";
-	case HCI_80211:
-		return "802.11";
+	case HCI_AMP:
+		return "AMP";
 	default:
 		return "UNKNOWN";
 	}
-- 
cgit v1.2.3


From fb3d8eb47ce377d6d7a8fc58b8046ea9eb376a28 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 9 Aug 2010 17:42:21 -0400
Subject: Bluetooth: Support SDIO devices that are AMP controllers

Signed-off-by: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 drivers/bluetooth/btsdio.c   | 8 ++++++++
 include/linux/mmc/sdio_ids.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 76e5127884f0..792e32d29a1d 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -46,6 +46,9 @@ static const struct sdio_device_id btsdio_table[] = {
 	/* Generic Bluetooth Type-B SDIO device */
 	{ SDIO_DEVICE_CLASS(SDIO_CLASS_BT_B) },
 
+	/* Generic Bluetooth AMP controller */
+	{ SDIO_DEVICE_CLASS(SDIO_CLASS_BT_AMP) },
+
 	{ }	/* Terminating entry */
 };
 
@@ -329,6 +332,11 @@ static int btsdio_probe(struct sdio_func *func,
 	hdev->bus = HCI_SDIO;
 	hdev->driver_data = data;
 
+	if (id->class == SDIO_CLASS_BT_AMP)
+		hdev->dev_type = HCI_AMP;
+	else
+		hdev->dev_type = HCI_BREDR;
+
 	data->hdev = hdev;
 
 	SET_HCIDEV_DEV(hdev, &func->dev);
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 33b2ea09a4ad..a36ab3bc7b03 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -18,6 +18,7 @@
 #define SDIO_CLASS_PHS		0x06	/* PHS standard interface */
 #define SDIO_CLASS_WLAN		0x07	/* WLAN interface */
 #define SDIO_CLASS_ATA		0x08	/* Embedded SDIO-ATA std interface */
+#define SDIO_CLASS_BT_AMP	0x09	/* Type-A Bluetooth AMP interface */
 
 /*
  * Vendors and devices.  Sort key: vendor first, device next.
-- 
cgit v1.2.3


From 796c86eec84ddfd02281c5071838ed1fefda6b90 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathewm@codeaurora.org>
Date: Wed, 8 Sep 2010 10:05:27 -0700
Subject: Bluetooth: Add common code for stream-oriented recvmsg()

This commit adds a bt_sock_stream_recvmsg() function for use by any
Bluetooth code that uses SOCK_STREAM sockets.  This code is copied
from rfcomm_sock_recvmsg() with minimal modifications to remove
RFCOMM-specific functionality and improve readability.

L2CAP (with the SOCK_STREAM socket type) and RFCOMM have common needs
when it comes to reading data.  Proper stream read semantics require
that applications can read from a stream one byte at a time and not
lose any data.  The RFCOMM code already operated on and pulled data
from the underlying L2CAP socket, so very few changes were required to
make the code more generic for use with non-RFCOMM data over L2CAP.

Applications that need more awareness of L2CAP frame boundaries are
still free to use SOCK_SEQPACKET sockets, and may verify that they
connection did not fall back to basic mode by calling getsockopt().

Signed-off-by: Mat Martineau <mathewm@codeaurora.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/bluetooth.h |   2 +
 net/bluetooth/af_bluetooth.c      | 109 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 30fce0128dd7..d81ea7997701 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -126,6 +126,8 @@ int  bt_sock_unregister(int proto);
 void bt_sock_link(struct bt_sock_list *l, struct sock *s);
 void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
 int  bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags);
+int  bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t len, int flags);
 uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait);
 int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ed0f22f57668..c4cf3f595004 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -265,6 +265,115 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 }
 EXPORT_SYMBOL(bt_sock_recvmsg);
 
+static long bt_sock_data_wait(struct sock *sk, long timeo)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!skb_queue_empty(&sk->sk_receive_queue))
+			break;
+
+		if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
+			break;
+
+		if (signal_pending(current) || !timeo)
+			break;
+
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	}
+
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return timeo;
+}
+
+int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+			       struct msghdr *msg, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+	size_t target, copied = 0;
+	long timeo;
+
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	msg->msg_namelen = 0;
+
+	BT_DBG("sk %p size %zu", sk, size);
+
+	lock_sock(sk);
+
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+	timeo  = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	do {
+		struct sk_buff *skb;
+		int chunk;
+
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (!skb) {
+			if (copied >= target)
+				break;
+
+			if ((err = sock_error(sk)) != 0)
+				break;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+
+			err = -EAGAIN;
+			if (!timeo)
+				break;
+
+			timeo = bt_sock_data_wait(sk, timeo);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			continue;
+		}
+
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (!copied)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size   -= chunk;
+
+		sock_recv_ts_and_drops(msg, sk, skb);
+
+		if (!(flags & MSG_PEEK)) {
+			skb_pull(skb, chunk);
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
+
+		} else {
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
+		}
+	} while (size);
+
+out:
+	release_sock(sk);
+	return copied ? : err;
+}
+EXPORT_SYMBOL(bt_sock_stream_recvmsg);
+
 static inline unsigned int bt_accept_poll(struct sock *parent)
 {
 	struct list_head *p, *n;
-- 
cgit v1.2.3


From 534c92fde74c8d2143fc15b5f1d957f42cb43570 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Fri, 1 Oct 2010 12:05:11 +0300
Subject: Bluetooth: clean up rfcomm code

Remove dead code and unused rfcomm thread events

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/rfcomm.h |  5 -----
 net/bluetooth/rfcomm/core.c    | 29 ++++++++++++++---------------
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index a140847d622c..71047bc0af84 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -213,11 +213,6 @@ struct rfcomm_dlc {
 #define RFCOMM_DEFER_SETUP  8
 
 /* Scheduling flags and events */
-#define RFCOMM_SCHED_STATE  0
-#define RFCOMM_SCHED_RX     1
-#define RFCOMM_SCHED_TX     2
-#define RFCOMM_SCHED_TIMEO  3
-#define RFCOMM_SCHED_AUTH   4
 #define RFCOMM_SCHED_WAKEUP 31
 
 /* MSC exchange flags */
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4a22b14a2c60..39a5d87e33b4 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -113,11 +113,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
 #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
 #define __get_rpn_parity(line)    (((line) >> 3) & 0x7)
 
-static inline void rfcomm_schedule(uint event)
+static inline void rfcomm_schedule(void)
 {
 	if (!rfcomm_thread)
 		return;
-	//set_bit(event, &rfcomm_event);
 	set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
 	wake_up_process(rfcomm_thread);
 }
@@ -203,13 +202,13 @@ static inline int __check_fcs(u8 *data, int type, u8 fcs)
 static void rfcomm_l2state_change(struct sock *sk)
 {
 	BT_DBG("%p state %d", sk, sk->sk_state);
-	rfcomm_schedule(RFCOMM_SCHED_STATE);
+	rfcomm_schedule();
 }
 
 static void rfcomm_l2data_ready(struct sock *sk, int bytes)
 {
 	BT_DBG("%p bytes %d", sk, bytes);
-	rfcomm_schedule(RFCOMM_SCHED_RX);
+	rfcomm_schedule();
 }
 
 static int rfcomm_l2sock_create(struct socket **sock)
@@ -255,7 +254,7 @@ static void rfcomm_session_timeout(unsigned long arg)
 	BT_DBG("session %p state %ld", s, s->state);
 
 	set_bit(RFCOMM_TIMED_OUT, &s->flags);
-	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+	rfcomm_schedule();
 }
 
 static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
@@ -283,7 +282,7 @@ static void rfcomm_dlc_timeout(unsigned long arg)
 
 	set_bit(RFCOMM_TIMED_OUT, &d->flags);
 	rfcomm_dlc_put(d);
-	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+	rfcomm_schedule();
 }
 
 static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout)
@@ -465,7 +464,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
 	case BT_CONFIG:
 		if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
 			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
-			rfcomm_schedule(RFCOMM_SCHED_AUTH);
+			rfcomm_schedule();
 			break;
 		}
 		/* Fall through */
@@ -485,7 +484,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
 	case BT_CONNECT2:
 		if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
 			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
-			rfcomm_schedule(RFCOMM_SCHED_AUTH);
+			rfcomm_schedule();
 			break;
 		}
 		/* Fall through */
@@ -533,7 +532,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
 	skb_queue_tail(&d->tx_queue, skb);
 
 	if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
-		rfcomm_schedule(RFCOMM_SCHED_TX);
+		rfcomm_schedule();
 	return len;
 }
 
@@ -545,7 +544,7 @@ void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
 		d->v24_sig |= RFCOMM_V24_FC;
 		set_bit(RFCOMM_MSC_PENDING, &d->flags);
 	}
-	rfcomm_schedule(RFCOMM_SCHED_TX);
+	rfcomm_schedule();
 }
 
 void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
@@ -556,7 +555,7 @@ void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
 		d->v24_sig &= ~RFCOMM_V24_FC;
 		set_bit(RFCOMM_MSC_PENDING, &d->flags);
 	}
-	rfcomm_schedule(RFCOMM_SCHED_TX);
+	rfcomm_schedule();
 }
 
 /*
@@ -577,7 +576,7 @@ int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig)
 	d->v24_sig = v24_sig;
 
 	if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags))
-		rfcomm_schedule(RFCOMM_SCHED_TX);
+		rfcomm_schedule();
 
 	return 0;
 }
@@ -816,7 +815,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d)
 	cmd->fcs  = __fcs2((u8 *) cmd);
 
 	skb_queue_tail(&d->tx_queue, skb);
-	rfcomm_schedule(RFCOMM_SCHED_TX);
+	rfcomm_schedule();
 	return 0;
 }
 
@@ -1884,7 +1883,7 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
 		 * L2CAP MTU minus UIH header and FCS. */
 		s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5;
 
-		rfcomm_schedule(RFCOMM_SCHED_RX);
+		rfcomm_schedule();
 	} else
 		sock_release(nsock);
 }
@@ -2093,7 +2092,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
 
 	rfcomm_session_put(s);
 
-	rfcomm_schedule(RFCOMM_SCHED_AUTH);
+	rfcomm_schedule();
 }
 
 static struct hci_cb rfcomm_cb = {
-- 
cgit v1.2.3


From 29b4433d991c88d86ca48a4c1cc33c671475be4b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 10:22:12 +0000
Subject: net: percpu net_device refcount

We tried very hard to remove all possible dev_hold()/dev_put() pairs in
network stack, using RCU conversions.

There is still an unavoidable device refcount change for every dst we
create/destroy, and this can slow down some workloads (routers or some
app servers, mmap af_packet)

We can switch to a percpu refcount implementation, now dynamic per_cpu
infrastructure is mature. On a 64 cpus machine, this consumes 256 bytes
per device.

On x86, dev_hold(dev) code :

before
        lock    incl 0x280(%ebx)
after:
        movl    0x260(%ebx),%eax
        incl    fs:(%eax)

Stress bench :

(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE)

Before:

real    1m1.662s
user    0m14.373s
sys     12m55.960s

After:

real    0m51.179s
user    0m15.329s
sys     10m15.942s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_cm.c    |  4 ++--
 drivers/infiniband/hw/nes/nes_verbs.c |  4 ++--
 include/linux/netdevice.h             |  7 +++---
 net/core/dev.c                        | 40 +++++++++++++++++++++++++++++------
 4 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 61e0efd4ccfb..6220d9d75b58 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2701,7 +2701,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
 	nesibdev = nesvnic->nesibdev;
 
 	nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
-			atomic_read(&nesvnic->netdev->refcnt));
+			netdev_refcnt_read(nesvnic->netdev));
 
 	if (nesqp->active_conn) {
 
@@ -2791,7 +2791,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	atomic_inc(&cm_accepts);
 
 	nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
-			atomic_read(&nesvnic->netdev->refcnt));
+			netdev_refcnt_read(nesvnic->netdev));
 
 	/* allocate the ietf frame and space for private data */
 	nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 9046e6675686..546fc22405fe 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -785,7 +785,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
 
 	nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
 			nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
-			atomic_read(&nesvnic->netdev->refcnt));
+			netdev_refcnt_read(nesvnic->netdev));
 
 	err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
 			nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
@@ -1416,7 +1416,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 	/* update the QP table */
 	nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
 	nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
-			atomic_read(&nesvnic->netdev->refcnt));
+			netdev_refcnt_read(nesvnic->netdev));
 
 	return &nesqp->ibqp;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4160db3721ba..14fbb04c459d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1026,7 +1026,7 @@ struct net_device {
 	struct timer_list	watchdog_timer;
 
 	/* Number of references to this device */
-	atomic_t		refcnt ____cacheline_aligned_in_smp;
+	int __percpu		*pcpu_refcnt;
 
 	/* delayed register/unregister */
 	struct list_head	todo_list;
@@ -1330,6 +1330,7 @@ static inline void unregister_netdevice(struct net_device *dev)
 	unregister_netdevice_queue(dev, NULL);
 }
 
+extern int 		netdev_refcnt_read(const struct net_device *dev);
 extern void		free_netdev(struct net_device *dev);
 extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
@@ -1798,7 +1799,7 @@ extern void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
-	atomic_dec(&dev->refcnt);
+	irqsafe_cpu_dec(*dev->pcpu_refcnt);
 }
 
 /**
@@ -1809,7 +1810,7 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
-	atomic_inc(&dev->refcnt);
+	irqsafe_cpu_inc(*dev->pcpu_refcnt);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/net/core/dev.c b/net/core/dev.c
index 193eafaabd88..04972a4783e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5192,9 +5192,6 @@ int init_dummy_netdev(struct net_device *dev)
 	 */
 	dev->reg_state = NETREG_DUMMY;
 
-	/* initialize the ref count */
-	atomic_set(&dev->refcnt, 1);
-
 	/* NAPI wants this */
 	INIT_LIST_HEAD(&dev->napi_list);
 
@@ -5202,6 +5199,11 @@ int init_dummy_netdev(struct net_device *dev)
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 	set_bit(__LINK_STATE_START, &dev->state);
 
+	/* Note : We dont allocate pcpu_refcnt for dummy devices,
+	 * because users of this 'device' dont need to change
+	 * its refcount.
+	 */
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5243,6 +5245,16 @@ out:
 }
 EXPORT_SYMBOL(register_netdev);
 
+int netdev_refcnt_read(const struct net_device *dev)
+{
+	int i, refcnt = 0;
+
+	for_each_possible_cpu(i)
+		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+	return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
 /*
  * netdev_wait_allrefs - wait until all references are gone.
  *
@@ -5257,11 +5269,14 @@ EXPORT_SYMBOL(register_netdev);
 static void netdev_wait_allrefs(struct net_device *dev)
 {
 	unsigned long rebroadcast_time, warning_time;
+	int refcnt;
 
 	linkwatch_forget_dev(dev);
 
 	rebroadcast_time = warning_time = jiffies;
-	while (atomic_read(&dev->refcnt) != 0) {
+	refcnt = netdev_refcnt_read(dev);
+
+	while (refcnt != 0) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 
@@ -5288,11 +5303,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		msleep(250);
 
+		refcnt = netdev_refcnt_read(dev);
+
 		if (time_after(jiffies, warning_time + 10 * HZ)) {
 			printk(KERN_EMERG "unregister_netdevice: "
 			       "waiting for %s to become free. Usage "
 			       "count = %d\n",
-			       dev->name, atomic_read(&dev->refcnt));
+			       dev->name, refcnt);
 			warning_time = jiffies;
 		}
 	}
@@ -5350,7 +5367,7 @@ void netdev_run_todo(void)
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-		BUG_ON(atomic_read(&dev->refcnt));
+		BUG_ON(netdev_refcnt_read(dev));
 		WARN_ON(rcu_dereference_raw(dev->ip_ptr));
 		WARN_ON(dev->ip6_ptr);
 		WARN_ON(dev->dn_ptr);
@@ -5520,9 +5537,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
-	if (dev_addr_init(dev))
+	dev->pcpu_refcnt = alloc_percpu(int);
+	if (!dev->pcpu_refcnt)
 		goto free_tx;
 
+	if (dev_addr_init(dev))
+		goto free_pcpu;
+
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
@@ -5553,6 +5574,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 free_tx:
 	kfree(tx);
+free_pcpu:
+	free_percpu(dev->pcpu_refcnt);
 free_p:
 	kfree(p);
 	return NULL;
@@ -5586,6 +5609,9 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+	free_percpu(dev->pcpu_refcnt);
+	dev->pcpu_refcnt = NULL;
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		kfree((char *)dev - dev->padded);
-- 
cgit v1.2.3


From c7fc2de0c83dbd2eaf759c5cd0e2b9cf1eb4df3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 12 Oct 2010 14:07:09 -0700
Subject: memblock, bootmem: Round pfn properly for memory and reserved regions

We need to round memory regions correctly -- specifically, we need to
round reserved region in the more expansive direction (lower limit
down, upper limit up) whereas usable memory regions need to be rounded
in the more restrictive direction (lower limit up, upper limit down).

This introduces two set of inlines:

	memblock_region_memory_base_pfn()
	memblock_region_memory_end_pfn()
	memblock_region_reserved_base_pfn()
	memblock_region_reserved_end_pfn()

Although they are antisymmetric (and therefore are technically
duplicates) the use of the different inlines explicitly documents the
programmer's intention.

The lack of proper rounding caused a bug on ARM, which was then found
to also affect other architectures.

Reported-by: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4CB4CDFD.4020105@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/arm/mm/init.c       |  8 ++++----
 arch/powerpc/mm/mem.c    | 14 +++++++-------
 arch/powerpc/mm/numa.c   |  4 ++--
 arch/sh/mm/init.c        |  4 ++--
 arch/sparc/mm/init_64.c  |  4 ++--
 include/linux/memblock.h | 25 ++++++++++++-------------
 6 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d6022d1f51d1..63f441797c96 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -182,8 +182,8 @@ static void __init arm_bootmem_init(struct meminfo *mi,
 	 * Reserve the memblock reserved regions in bootmem.
 	 */
 	for_each_memblock(reserved, reg) {
-		phys_addr_t start = memblock_region_base_pfn(reg);
-		phys_addr_t end = memblock_region_end_pfn(reg);
+		phys_addr_t start = memblock_region_reserved_base_pfn(reg);
+		phys_addr_t end = memblock_region_reserved_end_pfn(reg);
 		if (start >= start_pfn && end <= end_pfn)
 			reserve_bootmem_node(pgdat, __pfn_to_phys(start),
 					     (end - start) << PAGE_SHIFT,
@@ -251,8 +251,8 @@ static void arm_memory_present(void)
 	struct memblock_region *reg;
 
 	for_each_memblock(memory, reg)
-		memory_present(0, memblock_region_base_pfn(reg),
-			       memblock_region_end_pfn(reg));
+		memory_present(0, memblock_region_memory_base_pfn(reg),
+			       memblock_region_memory_end_pfn(reg));
 }
 #endif
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f661f6c527da..a66499650909 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -148,8 +148,8 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 	int ret = -1;
 
 	for_each_memblock(memory, reg) {
-		tstart = max(start_pfn, memblock_region_base_pfn(reg));
-		tend = min(end_pfn, memblock_region_end_pfn(reg));
+		tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+		tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
 		if (tstart >= tend)
 			continue;
 		ret = (*func)(tstart, tend - tstart, arg);
@@ -195,8 +195,8 @@ void __init do_init_bootmem(void)
 	/* Add active regions with valid PFNs */
 	for_each_memblock(memory, reg) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -236,9 +236,9 @@ static int __init mark_nonram_nosave(void)
 
 	for_each_memblock(memory, reg) {
 		if (prev &&
-		    memblock_region_end_pfn(prev) < memblock_region_base_pfn(reg))
-			register_nosave_region(memblock_region_end_pfn(prev),
-					       memblock_region_base_pfn(reg));
+		    memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
+			register_nosave_region(memblock_region_memory_end_pfn(prev),
+					       memblock_region_memory_base_pfn(reg));
 		prev = reg;
 	}
 	return 0;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 066fb443ba5a..74505b245374 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -811,8 +811,8 @@ static void __init setup_nonnuma(void)
 	       (top_of_ram - total_ram) >> 20);
 
 	for_each_memblock(memory, reg) {
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 
 		fake_numa_create_new_node(end_pfn, &nid);
 		add_active_range(nid, start_pfn, end_pfn);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index b977475f7446..552bea5113f5 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -244,8 +244,8 @@ static void __init do_init_bootmem(void)
 	/* Add active regions with valid PFNs. */
 	for_each_memblock(memory, reg) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 		__add_active_range(0, start_pfn, end_pfn);
 	}
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index dc584d26d597..4c2572773b55 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1294,8 +1294,8 @@ static void __init bootmem_init_nonnuma(void)
 		if (!reg->size)
 			continue;
 
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5096458c7535..62a10c2a11f2 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -111,40 +111,39 @@ extern void memblock_set_current_limit(phys_addr_t limit);
  */
 
 /**
- * memblock_region_base_pfn - Return the lowest pfn intersecting with the region
+ * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region
  * @reg: memblock_region structure
  */
-static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg)
+static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg)
 {
-	return reg->base >> PAGE_SHIFT;
+	return PFN_UP(reg->base);
 }
 
 /**
- * memblock_region_last_pfn - Return the highest pfn intersecting with the region
+ * memblock_region_memory_end_pfn - Return the end_pfn this region
  * @reg: memblock_region structure
  */
-static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg)
+static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg)
 {
-	return (reg->base + reg->size - 1) >> PAGE_SHIFT;
+	return PFN_DOWN(reg->base + reg->size);
 }
 
 /**
- * memblock_region_end_pfn - Return the pfn of the first page following the region
- *                      but not intersecting it
+ * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region
  * @reg: memblock_region structure
  */
-static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg)
+static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg)
 {
-	return memblock_region_last_pfn(reg) + 1;
+	return PFN_DOWN(reg->base);
 }
 
 /**
- * memblock_region_pages - Return the number of pages covering a region
+ * memblock_region_reserved_end_pfn - Return the end_pfn this region
  * @reg: memblock_region structure
  */
-static inline unsigned long memblock_region_pages(const struct memblock_region *reg)
+static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg)
 {
-	return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg);
+	return PFN_UP(reg->base + reg->size);
 }
 
 #define for_each_memblock(memblock_type, region)					\
-- 
cgit v1.2.3


From b1b6b9aa6fd32db97469e65d301ebc32dcd67992 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 29 Sep 2010 17:31:35 +0900
Subject: spi/pl022: add PrimeCell generic DMA support

This extends the PL022 SSP/SPI driver with generic DMA engine
support using the PrimeCell DMA engine interface. Also fix up the
test code for the U300 platform.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/amba-pl022.c   | 516 +++++++++++++++++++++++++++++++++++++--------
 include/linux/amba/pl022.h |   6 +
 2 files changed, 434 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index 8cdddc97325c..db0c67908d2b 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -27,7 +27,6 @@
 /*
  * TODO:
  * - add timeout on polled transfers
- * - add generic DMA framework support
  */
 
 #include <linux/init.h>
@@ -45,6 +44,9 @@
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
 
 /*
  * This macro is used to define some register default values.
@@ -381,6 +383,14 @@ struct pl022 {
 	enum ssp_reading		read;
 	enum ssp_writing		write;
 	u32				exp_fifo_level;
+	/* DMA settings */
+#ifdef CONFIG_DMA_ENGINE
+	struct dma_chan			*dma_rx_channel;
+	struct dma_chan			*dma_tx_channel;
+	struct sg_table			sgt_rx;
+	struct sg_table			sgt_tx;
+	char				*dummypage;
+#endif
 };
 
 /**
@@ -406,7 +416,7 @@ struct chip_data {
 	u16 dmacr;
 	u16 cpsr;
 	u8 n_bytes;
-	u8 enable_dma:1;
+	bool enable_dma;
 	enum ssp_reading read;
 	enum ssp_writing write;
 	void (*cs_control) (u32 command);
@@ -763,6 +773,371 @@ static void *next_transfer(struct pl022 *pl022)
 	}
 	return STATE_DONE;
 }
+
+/*
+ * This DMA functionality is only compiled in if we have
+ * access to the generic DMA devices/DMA engine.
+ */
+#ifdef CONFIG_DMA_ENGINE
+static void unmap_free_dma_scatter(struct pl022 *pl022)
+{
+	/* Unmap and free the SG tables */
+	dma_unmap_sg(&pl022->adev->dev, pl022->sgt_tx.sgl,
+		     pl022->sgt_tx.nents, DMA_TO_DEVICE);
+	dma_unmap_sg(&pl022->adev->dev, pl022->sgt_rx.sgl,
+		     pl022->sgt_rx.nents, DMA_FROM_DEVICE);
+	sg_free_table(&pl022->sgt_rx);
+	sg_free_table(&pl022->sgt_tx);
+}
+
+static void dma_callback(void *data)
+{
+	struct pl022 *pl022 = data;
+	struct spi_message *msg = pl022->cur_msg;
+
+	BUG_ON(!pl022->sgt_rx.sgl);
+
+#ifdef VERBOSE_DEBUG
+	/*
+	 * Optionally dump out buffers to inspect contents, this is
+	 * good if you want to convince yourself that the loopback
+	 * read/write contents are the same, when adopting to a new
+	 * DMA engine.
+	 */
+	{
+		struct scatterlist *sg;
+		unsigned int i;
+
+		dma_sync_sg_for_cpu(&pl022->adev->dev,
+				    pl022->sgt_rx.sgl,
+				    pl022->sgt_rx.nents,
+				    DMA_FROM_DEVICE);
+
+		for_each_sg(pl022->sgt_rx.sgl, sg, pl022->sgt_rx.nents, i) {
+			dev_dbg(&pl022->adev->dev, "SPI RX SG ENTRY: %d", i);
+			print_hex_dump(KERN_ERR, "SPI RX: ",
+				       DUMP_PREFIX_OFFSET,
+				       16,
+				       1,
+				       sg_virt(sg),
+				       sg_dma_len(sg),
+				       1);
+		}
+		for_each_sg(pl022->sgt_tx.sgl, sg, pl022->sgt_tx.nents, i) {
+			dev_dbg(&pl022->adev->dev, "SPI TX SG ENTRY: %d", i);
+			print_hex_dump(KERN_ERR, "SPI TX: ",
+				       DUMP_PREFIX_OFFSET,
+				       16,
+				       1,
+				       sg_virt(sg),
+				       sg_dma_len(sg),
+				       1);
+		}
+	}
+#endif
+
+	unmap_free_dma_scatter(pl022);
+
+	/* Update total bytes transfered */
+	msg->actual_length += pl022->cur_transfer->len;
+	if (pl022->cur_transfer->cs_change)
+		pl022->cur_chip->
+			cs_control(SSP_CHIP_DESELECT);
+
+	/* Move to next transfer */
+	msg->state = next_transfer(pl022);
+	tasklet_schedule(&pl022->pump_transfers);
+}
+
+static void setup_dma_scatter(struct pl022 *pl022,
+			      void *buffer,
+			      unsigned int length,
+			      struct sg_table *sgtab)
+{
+	struct scatterlist *sg;
+	int bytesleft = length;
+	void *bufp = buffer;
+	int mapbytes;
+	int i;
+
+	if (buffer) {
+		for_each_sg(sgtab->sgl, sg, sgtab->nents, i) {
+			/*
+			 * If there are less bytes left than what fits
+			 * in the current page (plus page alignment offset)
+			 * we just feed in this, else we stuff in as much
+			 * as we can.
+			 */
+			if (bytesleft < (PAGE_SIZE - offset_in_page(bufp)))
+				mapbytes = bytesleft;
+			else
+				mapbytes = PAGE_SIZE - offset_in_page(bufp);
+			sg_set_page(sg, virt_to_page(bufp),
+				    mapbytes, offset_in_page(bufp));
+			bufp += mapbytes;
+			bytesleft -= mapbytes;
+			dev_dbg(&pl022->adev->dev,
+				"set RX/TX target page @ %p, %d bytes, %d left\n",
+				bufp, mapbytes, bytesleft);
+		}
+	} else {
+		/* Map the dummy buffer on every page */
+		for_each_sg(sgtab->sgl, sg, sgtab->nents, i) {
+			if (bytesleft < PAGE_SIZE)
+				mapbytes = bytesleft;
+			else
+				mapbytes = PAGE_SIZE;
+			sg_set_page(sg, virt_to_page(pl022->dummypage),
+				    mapbytes, 0);
+			bytesleft -= mapbytes;
+			dev_dbg(&pl022->adev->dev,
+				"set RX/TX to dummy page %d bytes, %d left\n",
+				mapbytes, bytesleft);
+
+		}
+	}
+	BUG_ON(bytesleft);
+}
+
+/**
+ * configure_dma - configures the channels for the next transfer
+ * @pl022: SSP driver's private data structure
+ */
+static int configure_dma(struct pl022 *pl022)
+{
+	struct dma_slave_config rx_conf = {
+		.src_addr = SSP_DR(pl022->phybase),
+		.direction = DMA_FROM_DEVICE,
+		.src_maxburst = pl022->vendor->fifodepth >> 1,
+	};
+	struct dma_slave_config tx_conf = {
+		.dst_addr = SSP_DR(pl022->phybase),
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = pl022->vendor->fifodepth >> 1,
+	};
+	unsigned int pages;
+	int ret;
+	int sglen;
+	struct dma_chan *rxchan = pl022->dma_rx_channel;
+	struct dma_chan *txchan = pl022->dma_tx_channel;
+	struct dma_async_tx_descriptor *rxdesc;
+	struct dma_async_tx_descriptor *txdesc;
+	dma_cookie_t cookie;
+
+	/* Check that the channels are available */
+	if (!rxchan || !txchan)
+		return -ENODEV;
+
+	switch (pl022->read) {
+	case READING_NULL:
+		/* Use the same as for writing */
+		rx_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+		break;
+	case READING_U8:
+		rx_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+	case READING_U16:
+		rx_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	case READING_U32:
+		rx_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
+	}
+
+	switch (pl022->write) {
+	case WRITING_NULL:
+		/* Use the same as for reading */
+		tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+		break;
+	case WRITING_U8:
+		tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+	case WRITING_U16:
+		tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	case WRITING_U32:
+		tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;;
+		break;
+	}
+
+	/* SPI pecularity: we need to read and write the same width */
+	if (rx_conf.src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+		rx_conf.src_addr_width = tx_conf.dst_addr_width;
+	if (tx_conf.dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+		tx_conf.dst_addr_width = rx_conf.src_addr_width;
+	BUG_ON(rx_conf.src_addr_width != tx_conf.dst_addr_width);
+
+	rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG,
+				       (unsigned long) &rx_conf);
+	txchan->device->device_control(txchan, DMA_SLAVE_CONFIG,
+				       (unsigned long) &tx_conf);
+
+	/* Create sglists for the transfers */
+	pages = (pl022->cur_transfer->len >> PAGE_SHIFT) + 1;
+	dev_dbg(&pl022->adev->dev, "using %d pages for transfer\n", pages);
+
+	ret = sg_alloc_table(&pl022->sgt_rx, pages, GFP_KERNEL);
+	if (ret)
+		goto err_alloc_rx_sg;
+
+	ret = sg_alloc_table(&pl022->sgt_tx, pages, GFP_KERNEL);
+	if (ret)
+		goto err_alloc_tx_sg;
+
+	/* Fill in the scatterlists for the RX+TX buffers */
+	setup_dma_scatter(pl022, pl022->rx,
+			  pl022->cur_transfer->len, &pl022->sgt_rx);
+	setup_dma_scatter(pl022, pl022->tx,
+			  pl022->cur_transfer->len, &pl022->sgt_tx);
+
+	/* Map DMA buffers */
+	sglen = dma_map_sg(&pl022->adev->dev, pl022->sgt_rx.sgl,
+			   pl022->sgt_rx.nents, DMA_FROM_DEVICE);
+	if (!sglen)
+		goto err_rx_sgmap;
+
+	sglen = dma_map_sg(&pl022->adev->dev, pl022->sgt_tx.sgl,
+			   pl022->sgt_tx.nents, DMA_TO_DEVICE);
+	if (!sglen)
+		goto err_tx_sgmap;
+
+	/* Send both scatterlists */
+	rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
+				      pl022->sgt_rx.sgl,
+				      pl022->sgt_rx.nents,
+				      DMA_FROM_DEVICE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!rxdesc)
+		goto err_rxdesc;
+
+	txdesc = txchan->device->device_prep_slave_sg(txchan,
+				      pl022->sgt_tx.sgl,
+				      pl022->sgt_tx.nents,
+				      DMA_TO_DEVICE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!txdesc)
+		goto err_txdesc;
+
+	/* Put the callback on the RX transfer only, that should finish last */
+	rxdesc->callback = dma_callback;
+	rxdesc->callback_param = pl022;
+
+	/* Submit and fire RX and TX with TX last so we're ready to read! */
+	cookie = rxdesc->tx_submit(rxdesc);
+	if (dma_submit_error(cookie))
+		goto err_submit_rx;
+	cookie = txdesc->tx_submit(txdesc);
+	if (dma_submit_error(cookie))
+		goto err_submit_tx;
+	rxchan->device->device_issue_pending(rxchan);
+	txchan->device->device_issue_pending(txchan);
+
+	return 0;
+
+err_submit_tx:
+err_submit_rx:
+err_txdesc:
+	txchan->device->device_control(txchan, DMA_TERMINATE_ALL, 0);
+err_rxdesc:
+	rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+	dma_unmap_sg(&pl022->adev->dev, pl022->sgt_tx.sgl,
+		     pl022->sgt_tx.nents, DMA_TO_DEVICE);
+err_tx_sgmap:
+	dma_unmap_sg(&pl022->adev->dev, pl022->sgt_rx.sgl,
+		     pl022->sgt_tx.nents, DMA_FROM_DEVICE);
+err_rx_sgmap:
+	sg_free_table(&pl022->sgt_tx);
+err_alloc_tx_sg:
+	sg_free_table(&pl022->sgt_rx);
+err_alloc_rx_sg:
+	return -ENOMEM;
+}
+
+static int __init pl022_dma_probe(struct pl022 *pl022)
+{
+	dma_cap_mask_t mask;
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	/*
+	 * We need both RX and TX channels to do DMA, else do none
+	 * of them.
+	 */
+	pl022->dma_rx_channel = dma_request_channel(mask,
+					    pl022->master_info->dma_filter,
+					    pl022->master_info->dma_rx_param);
+	if (!pl022->dma_rx_channel) {
+		dev_err(&pl022->adev->dev, "no RX DMA channel!\n");
+		goto err_no_rxchan;
+	}
+
+	pl022->dma_tx_channel = dma_request_channel(mask,
+					    pl022->master_info->dma_filter,
+					    pl022->master_info->dma_tx_param);
+	if (!pl022->dma_tx_channel) {
+		dev_err(&pl022->adev->dev, "no TX DMA channel!\n");
+		goto err_no_txchan;
+	}
+
+	pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!pl022->dummypage) {
+		dev_err(&pl022->adev->dev, "no DMA dummypage!\n");
+		goto err_no_dummypage;
+	}
+
+	dev_info(&pl022->adev->dev, "setup for DMA on RX %s, TX %s\n",
+		 dma_chan_name(pl022->dma_rx_channel),
+		 dma_chan_name(pl022->dma_tx_channel));
+
+	return 0;
+
+err_no_dummypage:
+	dma_release_channel(pl022->dma_tx_channel);
+err_no_txchan:
+	dma_release_channel(pl022->dma_rx_channel);
+	pl022->dma_rx_channel = NULL;
+err_no_rxchan:
+	return -ENODEV;
+}
+
+static void terminate_dma(struct pl022 *pl022)
+{
+	struct dma_chan *rxchan = pl022->dma_rx_channel;
+	struct dma_chan *txchan = pl022->dma_tx_channel;
+
+	rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+	txchan->device->device_control(txchan, DMA_TERMINATE_ALL, 0);
+	unmap_free_dma_scatter(pl022);
+}
+
+static void pl022_dma_remove(struct pl022 *pl022)
+{
+	if (pl022->busy)
+		terminate_dma(pl022);
+	if (pl022->dma_tx_channel)
+		dma_release_channel(pl022->dma_tx_channel);
+	if (pl022->dma_rx_channel)
+		dma_release_channel(pl022->dma_rx_channel);
+	kfree(pl022->dummypage);
+}
+
+#else
+static inline int configure_dma(struct pl022 *pl022)
+{
+	return -ENODEV;
+}
+
+static inline int pl022_dma_probe(struct pl022 *pl022)
+{
+	return 0;
+}
+
+static inline void pl022_dma_remove(struct pl022 *pl022)
+{
+}
+#endif
+
 /**
  * pl022_interrupt_handler - Interrupt handler for SSP controller
  *
@@ -794,14 +1169,17 @@ static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
 	if (unlikely(!irq_status))
 		return IRQ_NONE;
 
-	/* This handles the error code interrupts */
+	/*
+	 * This handles the FIFO interrupts, the timeout
+	 * interrupts are flatly ignored, they cannot be
+	 * trusted.
+	 */
 	if (unlikely(irq_status & SSP_MIS_MASK_RORMIS)) {
 		/*
 		 * Overrun interrupt - bail out since our Data has been
 		 * corrupted
 		 */
-		dev_err(&pl022->adev->dev,
-			"FIFO overrun\n");
+		dev_err(&pl022->adev->dev, "FIFO overrun\n");
 		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
 			dev_err(&pl022->adev->dev,
 				"RXFIFO is full\n");
@@ -896,8 +1274,8 @@ static int set_up_next_transfer(struct pl022 *pl022,
 }
 
 /**
- * pump_transfers - Tasklet function which schedules next interrupt transfer
- * when running in interrupt transfer mode.
+ * pump_transfers - Tasklet function which schedules next transfer
+ * when running in interrupt or DMA transfer mode.
  * @data: SSP driver private data structure
  *
  */
@@ -954,65 +1332,23 @@ static void pump_transfers(unsigned long data)
 	}
 	/* Flush the FIFOs and let's go! */
 	flush(pl022);
-	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
-}
-
-/**
- * NOT IMPLEMENTED
- * configure_dma - It configures the DMA pipes for DMA transfers
- * @data: SSP driver's private data structure
- *
- */
-static int configure_dma(void *data)
-{
-	struct pl022 *pl022 = data;
-	dev_dbg(&pl022->adev->dev, "configure DMA\n");
-	return -ENOTSUPP;
-}
-
-/**
- * do_dma_transfer - It handles transfers of the current message
- * if it is DMA xfer.
- * NOT FULLY IMPLEMENTED
- * @data: SSP driver's private data structure
- */
-static void do_dma_transfer(void *data)
-{
-	struct pl022 *pl022 = data;
-
-	if (configure_dma(data)) {
-		dev_dbg(&pl022->adev->dev, "configuration of DMA Failed!\n");
-		goto err_config_dma;
-	}
 
-	/* TODO: Implememt DMA setup of pipes here */
-
-	/* Enable target chip, set up transfer */
-	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
-	if (set_up_next_transfer(pl022, pl022->cur_transfer)) {
-		/* Error path */
-		pl022->cur_msg->state = STATE_ERROR;
-		pl022->cur_msg->status = -EIO;
-		giveback(pl022);
+	if (pl022->cur_chip->enable_dma) {
+		if (configure_dma(pl022)) {
+			dev_dbg(&pl022->adev->dev,
+				"configuration of DMA failed, fall back to interrupt mode\n");
+			goto err_config_dma;
+		}
 		return;
 	}
-	/* Enable SSP */
-	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
-	       SSP_CR1(pl022->virtbase));
-
-	/* TODO: Enable the DMA transfer here */
-	return;
 
- err_config_dma:
-	pl022->cur_msg->state = STATE_ERROR;
-	pl022->cur_msg->status = -EIO;
-	giveback(pl022);
-	return;
+err_config_dma:
+	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
 }
 
-static void do_interrupt_transfer(void *data)
+static void do_interrupt_dma_transfer(struct pl022 *pl022)
 {
-	struct pl022 *pl022 = data;
+	u32 irqflags = ENABLE_ALL_INTERRUPTS;
 
 	/* Enable target chip */
 	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
@@ -1023,15 +1359,26 @@ static void do_interrupt_transfer(void *data)
 		giveback(pl022);
 		return;
 	}
+	/* If we're using DMA, set up DMA here */
+	if (pl022->cur_chip->enable_dma) {
+		/* Configure DMA transfer */
+		if (configure_dma(pl022)) {
+			dev_dbg(&pl022->adev->dev,
+				"configuration of DMA failed, fall back to interrupt mode\n");
+			goto err_config_dma;
+		}
+		/* Disable interrupts in DMA mode, IRQ from DMA controller */
+		irqflags = DISABLE_ALL_INTERRUPTS;
+	}
+err_config_dma:
 	/* Enable SSP, turn on interrupts */
 	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
 	       SSP_CR1(pl022->virtbase));
-	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+	writew(irqflags, SSP_IMSC(pl022->virtbase));
 }
 
-static void do_polling_transfer(void *data)
+static void do_polling_transfer(struct pl022 *pl022)
 {
-	struct pl022 *pl022 = data;
 	struct spi_message *message = NULL;
 	struct spi_transfer *transfer = NULL;
 	struct spi_transfer *previous = NULL;
@@ -1101,7 +1448,7 @@ static void do_polling_transfer(void *data)
  *
  * This function checks if there is any spi message in the queue that
  * needs processing and delegate control to appropriate function
- * do_polling_transfer()/do_interrupt_transfer()/do_dma_transfer()
+ * do_polling_transfer()/do_interrupt_dma_transfer()
  * based on the kind of the transfer
  *
  */
@@ -1150,10 +1497,8 @@ static void pump_messages(struct work_struct *work)
 
 	if (pl022->cur_chip->xfer_type == POLLING_TRANSFER)
 		do_polling_transfer(pl022);
-	else if (pl022->cur_chip->xfer_type == INTERRUPT_TRANSFER)
-		do_interrupt_transfer(pl022);
 	else
-		do_dma_transfer(pl022);
+		do_interrupt_dma_transfer(pl022);
 }
 
 
@@ -1468,23 +1813,6 @@ static int calculate_effective_freq(struct pl022 *pl022,
 	return 0;
 }
 
-/**
- * NOT IMPLEMENTED
- * process_dma_info - Processes the DMA info provided by client drivers
- * @chip_info: chip info provided by client device
- * @chip: Runtime state maintained by the SSP controller for each spi device
- *
- * This function processes and stores DMA config provided by client driver
- * into the runtime state maintained by the SSP controller driver
- */
-static int process_dma_info(struct pl022_config_chip *chip_info,
-			    struct chip_data *chip)
-{
-	dev_err(chip_info->dev,
-		"cannot process DMA info, DMA not implemented!\n");
-	return -ENOTSUPP;
-}
-
 /**
  * pl022_setup - setup function registered to SPI master framework
  * @spi: spi device which is requesting setup
@@ -1552,8 +1880,6 @@ static int pl022_setup(struct spi_device *spi)
 
 		dev_dbg(&spi->dev, "allocated memory for controller data\n");
 
-		/* Pointer back to the SPI device */
-		chip_info->dev = &spi->dev;
 		/*
 		 * Set controller data default values:
 		 * Polling is supported by default
@@ -1579,6 +1905,9 @@ static int pl022_setup(struct spi_device *spi)
 			"using user supplied controller_data settings\n");
 	}
 
+	/* Pointer back to the SPI device */
+	chip_info->dev = &spi->dev;
+
 	/*
 	 * We can override with custom divisors, else we use the board
 	 * frequency setting
@@ -1637,9 +1966,8 @@ static int pl022_setup(struct spi_device *spi)
 	chip->cpsr = 0;
 	if ((chip_info->com_mode == DMA_TRANSFER)
 	    && ((pl022->master_info)->enable_dma)) {
-		chip->enable_dma = 1;
+		chip->enable_dma = true;
 		dev_dbg(&spi->dev, "DMA mode set in controller state\n");
-		status = process_dma_info(chip_info, chip);
 		if (status < 0)
 			goto err_config_params;
 		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
@@ -1647,7 +1975,7 @@ static int pl022_setup(struct spi_device *spi)
 		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
 			       SSP_DMACR_MASK_TXDMAE, 1);
 	} else {
-		chip->enable_dma = 0;
+		chip->enable_dma = false;
 		dev_dbg(&spi->dev, "DMA mode NOT set in controller state\n");
 		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED,
 			       SSP_DMACR_MASK_RXDMAE, 0);
@@ -1773,6 +2101,7 @@ pl022_probe(struct amba_device *adev, struct amba_id *id)
 	if (status)
 		goto err_no_ioregion;
 
+	pl022->phybase = adev->res.start;
 	pl022->virtbase = ioremap(adev->res.start, resource_size(&adev->res));
 	if (pl022->virtbase == NULL) {
 		status = -ENOMEM;
@@ -1799,6 +2128,14 @@ pl022_probe(struct amba_device *adev, struct amba_id *id)
 		dev_err(&adev->dev, "probe - cannot get IRQ (%d)\n", status);
 		goto err_no_irq;
 	}
+
+	/* Get DMA channels */
+	if (platform_info->enable_dma) {
+		status = pl022_dma_probe(pl022);
+		if (status != 0)
+			goto err_no_dma;
+	}
+
 	/* Initialize and start queue */
 	status = init_queue(pl022);
 	if (status != 0) {
@@ -1827,6 +2164,8 @@ pl022_probe(struct amba_device *adev, struct amba_id *id)
  err_start_queue:
  err_init_queue:
 	destroy_queue(pl022);
+	pl022_dma_remove(pl022);
+ err_no_dma:
 	free_irq(adev->irq[0], pl022);
  err_no_irq:
 	clk_put(pl022->clk);
@@ -1857,6 +2196,7 @@ pl022_remove(struct amba_device *adev)
 		return status;
 	}
 	load_ssp_default_config(pl022);
+	pl022_dma_remove(pl022);
 	free_irq(adev->irq[0], pl022);
 	clk_disable(pl022->clk);
 	clk_put(pl022->clk);
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index abf26cc47a2b..db6a191ddcf7 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -228,6 +228,7 @@ enum ssp_chip_select {
 };
 
 
+struct dma_chan;
 /**
  * struct pl022_ssp_master - device.platform_data for SPI controller devices.
  * @num_chipselect: chipselects are used to distinguish individual
@@ -235,11 +236,16 @@ enum ssp_chip_select {
  *     each slave has a chipselect signal, but it's common that not
  *     every chipselect is connected to a slave.
  * @enable_dma: if true enables DMA driven transfers.
+ * @dma_rx_param: parameter to locate an RX DMA channel.
+ * @dma_tx_param: parameter to locate a TX DMA channel.
  */
 struct pl022_ssp_controller {
 	u16 bus_id;
 	u8 num_chipselect;
 	u8 enable_dma:1;
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
 };
 
 /**
-- 
cgit v1.2.3


From bde435a9ca376d0b7809768ca803dbf14416b9c1 Mon Sep 17 00:00:00 2001
From: Kevin Wells <wellsk40@gmail.com>
Date: Thu, 16 Sep 2010 06:18:50 -0700
Subject: spi/pl022: Add spi->mode support to AMBA SPI driver

This patch adds spi->mode support for the AMBA pl022 driver and
allows spidev to correctly alter SPI modes. Unused fields used in
the pl022 header file for the pl022_config_chip have been removed.

The ab8500 client driver selects the data transfer size instead
of the platform data.

For platforms that use the amba pl022 driver, the unused fields
in the controller data structure have been removed and the .mode
field in the SPI board info structure is used instead.

Signed-off-by: Kevin Wells <wellsk40@gmail.com>
Tested-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/arm/mach-lpc32xx/phy3250.c    |   7 +--
 arch/arm/mach-u300/dummyspichip.c  |   5 +-
 arch/arm/mach-u300/spi.c           |  10 +--
 arch/arm/mach-ux500/board-mop500.c |   8 +--
 drivers/mfd/ab8500-spi.c           |   5 ++
 drivers/spi/amba-pl022.c           | 121 ++++++++++++++++---------------------
 include/linux/amba/pl022.h         |   6 --
 7 files changed, 63 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index bc9a42da2145..0c936cf5675a 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -172,18 +172,12 @@ static void phy3250_spi_cs_set(u32 control)
 }
 
 static struct pl022_config_chip spi0_chip_info = {
-	.lbm			= LOOPBACK_DISABLED,
 	.com_mode		= INTERRUPT_TRANSFER,
 	.iface			= SSP_INTERFACE_MOTOROLA_SPI,
 	.hierarchy		= SSP_MASTER,
 	.slave_tx_disable	= 0,
-	.endian_tx		= SSP_TX_LSB,
-	.endian_rx		= SSP_RX_LSB,
-	.data_size		= SSP_DATA_BITS_8,
 	.rx_lev_trig		= SSP_RX_4_OR_MORE_ELEM,
 	.tx_lev_trig		= SSP_TX_4_OR_MORE_EMPTY_LOC,
-	.clk_phase		= SSP_CLK_FIRST_EDGE,
-	.clk_pol		= SSP_CLK_POL_IDLE_LOW,
 	.ctrl_len		= SSP_BITS_8,
 	.wait_state		= SSP_MWIRE_WAIT_ZERO,
 	.duplex			= SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
@@ -239,6 +233,7 @@ static int __init phy3250_spi_board_register(void)
 			.max_speed_hz = 5000000,
 			.bus_num = 0,
 			.chip_select = 0,
+			.mode = SPI_MODE_0,
 			.platform_data = &eeprom,
 			.controller_data = &spi0_chip_info,
 		},
diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c
index 5f55012b7c9e..03f793612594 100644
--- a/arch/arm/mach-u300/dummyspichip.c
+++ b/arch/arm/mach-u300/dummyspichip.c
@@ -46,7 +46,6 @@ static ssize_t dummy_looptest(struct device *dev,
 	 * struct, this is just used here to alter the behaviour of the chip
 	 * in order to perform tests.
 	 */
-	struct pl022_config_chip *chip_info = spi->controller_data;
 	int status;
 	u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD,
 			0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05,
@@ -72,7 +71,7 @@ static ssize_t dummy_looptest(struct device *dev,
 	 * Force chip to 8 bit mode
 	 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
 	 */
-	chip_info->data_size = SSP_DATA_BITS_8;
+	spi->bits_per_word = 8;
 	/* You should NOT DO THIS EITHER */
 	spi->master->setup(spi);
 
@@ -159,7 +158,7 @@ static ssize_t dummy_looptest(struct device *dev,
 	 * Force chip to 16 bit mode
 	 * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
 	 */
-	chip_info->data_size = SSP_DATA_BITS_16;
+	spi->bits_per_word = 16;
 	/* You should NOT DO THIS EITHER */
 	spi->master->setup(spi);
 
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index f0e887bea30e..edb2c0d255c2 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -30,8 +30,6 @@ static void select_dummy_chip(u32 chipselect)
 }
 
 struct pl022_config_chip dummy_chip_info = {
-	/* Nominally this is LOOPBACK_DISABLED, but this is our dummy chip! */
-	.lbm = LOOPBACK_ENABLED,
 	/*
 	 * available POLLING_TRANSFER and INTERRUPT_TRANSFER,
 	 * DMA_TRANSFER does not work
@@ -42,14 +40,8 @@ struct pl022_config_chip dummy_chip_info = {
 	.hierarchy = SSP_MASTER,
 	/* 0 = drive TX even as slave, 1 = do not drive TX as slave */
 	.slave_tx_disable = 0,
-	/* LSB first */
-	.endian_tx = SSP_TX_LSB,
-	.endian_rx = SSP_RX_LSB,
-	.data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
 	.rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
 	.tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
-	.clk_phase = SSP_CLK_SECOND_EDGE,
-	.clk_pol = SSP_CLK_POL_IDLE_LOW,
 	.ctrl_len = SSP_BITS_12,
 	.wait_state = SSP_MWIRE_WAIT_ZERO,
 	.duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
@@ -75,7 +67,7 @@ static struct spi_board_info u300_spi_devices[] = {
 		.bus_num        = 0, /* Only one bus on this chip */
 		.chip_select    = 0,
 		/* Means SPI_CS_HIGH, change if e.g low CS */
-		.mode           = 0,
+		.mode           = SPI_MODE_1 | SPI_LSB_FIRST | SPI_LOOP,
 	},
 #endif
 };
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 0e8fd135a57d..219ae0ca4eef 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -55,19 +55,13 @@ static void ab4500_spi_cs_control(u32 command)
 }
 
 struct pl022_config_chip ab4500_chip_info = {
-	.lbm = LOOPBACK_DISABLED,
 	.com_mode = INTERRUPT_TRANSFER,
 	.iface = SSP_INTERFACE_MOTOROLA_SPI,
 	/* we can act as master only */
 	.hierarchy = SSP_MASTER,
 	.slave_tx_disable = 0,
-	.endian_rx = SSP_RX_MSB,
-	.endian_tx = SSP_TX_MSB,
-	.data_size = SSP_DATA_BITS_24,
 	.rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
 	.tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
-	.clk_phase = SSP_CLK_SECOND_EDGE,
-	.clk_pol = SSP_CLK_POL_IDLE_HIGH,
 	.cs_control = ab4500_spi_cs_control,
 };
 
@@ -83,7 +77,7 @@ static struct spi_board_info u8500_spi_devices[] = {
 		.max_speed_hz = 12000000,
 		.bus_num = 0,
 		.chip_select = 0,
-		.mode = SPI_MODE_0,
+		.mode = SPI_MODE_3,
 		.irq = IRQ_DB8500_AB8500,
 	},
 };
diff --git a/drivers/mfd/ab8500-spi.c b/drivers/mfd/ab8500-spi.c
index e1c8b62b086d..01b6d584442c 100644
--- a/drivers/mfd/ab8500-spi.c
+++ b/drivers/mfd/ab8500-spi.c
@@ -83,6 +83,11 @@ static int __devinit ab8500_spi_probe(struct spi_device *spi)
 	struct ab8500 *ab8500;
 	int ret;
 
+	spi->bits_per_word = 24;
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
 	ab8500 = kzalloc(sizeof *ab8500, GFP_KERNEL);
 	if (!ab8500)
 		return -ENOMEM;
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index db0c67908d2b..59c90f3ccc26 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -1595,12 +1595,6 @@ static int destroy_queue(struct pl022 *pl022)
 static int verify_controller_parameters(struct pl022 *pl022,
 					struct pl022_config_chip *chip_info)
 {
-	if ((chip_info->lbm != LOOPBACK_ENABLED)
-	    && (chip_info->lbm != LOOPBACK_DISABLED)) {
-		dev_err(chip_info->dev,
-			"loopback Mode is configured incorrectly\n");
-		return -EINVAL;
-	}
 	if ((chip_info->iface < SSP_INTERFACE_MOTOROLA_SPI)
 	    || (chip_info->iface > SSP_INTERFACE_UNIDIRECTIONAL)) {
 		dev_err(chip_info->dev,
@@ -1626,24 +1620,6 @@ static int verify_controller_parameters(struct pl022 *pl022,
 			"cpsdvsr is configured incorrectly\n");
 		return -EINVAL;
 	}
-	if ((chip_info->endian_rx != SSP_RX_MSB)
-	    && (chip_info->endian_rx != SSP_RX_LSB)) {
-		dev_err(chip_info->dev,
-			"RX FIFO endianess is configured incorrectly\n");
-		return -EINVAL;
-	}
-	if ((chip_info->endian_tx != SSP_TX_MSB)
-	    && (chip_info->endian_tx != SSP_TX_LSB)) {
-		dev_err(chip_info->dev,
-			"TX FIFO endianess is configured incorrectly\n");
-		return -EINVAL;
-	}
-	if ((chip_info->data_size < SSP_DATA_BITS_4)
-	    || (chip_info->data_size > SSP_DATA_BITS_32)) {
-		dev_err(chip_info->dev,
-			"DATA Size is configured incorrectly\n");
-		return -EINVAL;
-	}
 	if ((chip_info->com_mode != INTERRUPT_TRANSFER)
 	    && (chip_info->com_mode != DMA_TRANSFER)
 	    && (chip_info->com_mode != POLLING_TRANSFER)) {
@@ -1663,20 +1639,6 @@ static int verify_controller_parameters(struct pl022 *pl022,
 			"TX FIFO Trigger Level is configured incorrectly\n");
 		return -EINVAL;
 	}
-	if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
-		if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE)
-		    && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) {
-			dev_err(chip_info->dev,
-				"Clock Phase is configured incorrectly\n");
-			return -EINVAL;
-		}
-		if ((chip_info->clk_pol != SSP_CLK_POL_IDLE_LOW)
-		    && (chip_info->clk_pol != SSP_CLK_POL_IDLE_HIGH)) {
-			dev_err(chip_info->dev,
-				"Clock Polarity is configured incorrectly\n");
-			return -EINVAL;
-		}
-	}
 	if (chip_info->iface == SSP_INTERFACE_NATIONAL_MICROWIRE) {
 		if ((chip_info->ctrl_len < SSP_BITS_4)
 		    || (chip_info->ctrl_len > SSP_BITS_32)) {
@@ -1825,23 +1787,14 @@ static int calculate_effective_freq(struct pl022 *pl022,
  * controller hardware here, that is not done until the actual transfer
  * commence.
  */
-
-/* FIXME: JUST GUESSING the spi->mode bits understood by this driver */
-#define MODEBITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
-			| SPI_LSB_FIRST | SPI_LOOP)
-
 static int pl022_setup(struct spi_device *spi)
 {
 	struct pl022_config_chip *chip_info;
 	struct chip_data *chip;
 	int status = 0;
 	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
-
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
+	unsigned int bits = spi->bits_per_word;
+	u32 tmp;
 
 	if (!spi->max_speed_hz)
 		return -EINVAL;
@@ -1884,18 +1837,12 @@ static int pl022_setup(struct spi_device *spi)
 		 * Set controller data default values:
 		 * Polling is supported by default
 		 */
-		chip_info->lbm = LOOPBACK_DISABLED;
 		chip_info->com_mode = POLLING_TRANSFER;
 		chip_info->iface = SSP_INTERFACE_MOTOROLA_SPI;
 		chip_info->hierarchy = SSP_SLAVE;
 		chip_info->slave_tx_disable = DO_NOT_DRIVE_TX;
-		chip_info->endian_tx = SSP_TX_LSB;
-		chip_info->endian_rx = SSP_RX_LSB;
-		chip_info->data_size = SSP_DATA_BITS_12;
 		chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
 		chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
-		chip_info->clk_phase = SSP_CLK_SECOND_EDGE;
-		chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
 		chip_info->ctrl_len = SSP_BITS_8;
 		chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
 		chip_info->duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX;
@@ -1933,12 +1880,16 @@ static int pl022_setup(struct spi_device *spi)
 	chip->xfer_type = chip_info->com_mode;
 	chip->cs_control = chip_info->cs_control;
 
-	if (chip_info->data_size <= 8) {
-		dev_dbg(&spi->dev, "1 <= n <=8 bits per word\n");
+	if (bits <= 3) {
+		/* PL022 doesn't support less than 4-bits */
+		status = -ENOTSUPP;
+		goto err_config_params;
+	} else if (bits <= 8) {
+		dev_dbg(&spi->dev, "4 <= n <=8 bits per word\n");
 		chip->n_bytes = 1;
 		chip->read = READING_U8;
 		chip->write = WRITING_U8;
-	} else if (chip_info->data_size <= 16) {
+	} else if (bits <= 16) {
 		dev_dbg(&spi->dev, "9 <= n <= 16 bits per word\n");
 		chip->n_bytes = 2;
 		chip->read = READING_U16;
@@ -1955,6 +1906,7 @@ static int pl022_setup(struct spi_device *spi)
 			dev_err(&spi->dev,
 				"a standard pl022 can only handle "
 				"1 <= n <= 16 bit words\n");
+			status = -ENOTSUPP;
 			goto err_config_params;
 		}
 	}
@@ -1987,6 +1939,8 @@ static int pl022_setup(struct spi_device *spi)
 
 	/* Special setup for the ST micro extended control registers */
 	if (pl022->vendor->extended_cr) {
+		u32 etx;
+
 		if (pl022->vendor->pl023) {
 			/* These bits are only in the PL023 */
 			SSP_WRITE_BITS(chip->cr1, chip_info->clkdelay,
@@ -2002,29 +1956,51 @@ static int pl022_setup(struct spi_device *spi)
 			SSP_WRITE_BITS(chip->cr1, chip_info->wait_state,
 				       SSP_CR1_MASK_MWAIT_ST, 6);
 		}
-		SSP_WRITE_BITS(chip->cr0, chip_info->data_size,
+		SSP_WRITE_BITS(chip->cr0, bits - 1,
 			       SSP_CR0_MASK_DSS_ST, 0);
-		SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx,
-			       SSP_CR1_MASK_RENDN_ST, 4);
-		SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx,
-			       SSP_CR1_MASK_TENDN_ST, 5);
+
+		if (spi->mode & SPI_LSB_FIRST) {
+			tmp = SSP_RX_LSB;
+			etx = SSP_TX_LSB;
+		} else {
+			tmp = SSP_RX_MSB;
+			etx = SSP_TX_MSB;
+		}
+		SSP_WRITE_BITS(chip->cr1, tmp, SSP_CR1_MASK_RENDN_ST, 4);
+		SSP_WRITE_BITS(chip->cr1, etx, SSP_CR1_MASK_TENDN_ST, 5);
 		SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig,
 			       SSP_CR1_MASK_RXIFLSEL_ST, 7);
 		SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig,
 			       SSP_CR1_MASK_TXIFLSEL_ST, 10);
 	} else {
-		SSP_WRITE_BITS(chip->cr0, chip_info->data_size,
+		SSP_WRITE_BITS(chip->cr0, bits - 1,
 			       SSP_CR0_MASK_DSS, 0);
 		SSP_WRITE_BITS(chip->cr0, chip_info->iface,
 			       SSP_CR0_MASK_FRF, 4);
 	}
+
 	/* Stuff that is common for all versions */
-	SSP_WRITE_BITS(chip->cr0, chip_info->clk_pol, SSP_CR0_MASK_SPO, 6);
-	SSP_WRITE_BITS(chip->cr0, chip_info->clk_phase, SSP_CR0_MASK_SPH, 7);
+	if (spi->mode & SPI_CPOL)
+		tmp = SSP_CLK_POL_IDLE_HIGH;
+	else
+		tmp = SSP_CLK_POL_IDLE_LOW;
+	SSP_WRITE_BITS(chip->cr0, tmp, SSP_CR0_MASK_SPO, 6);
+
+	if (spi->mode & SPI_CPHA)
+		tmp = SSP_CLK_SECOND_EDGE;
+	else
+		tmp = SSP_CLK_FIRST_EDGE;
+	SSP_WRITE_BITS(chip->cr0, tmp, SSP_CR0_MASK_SPH, 7);
+
 	SSP_WRITE_BITS(chip->cr0, chip_info->clk_freq.scr, SSP_CR0_MASK_SCR, 8);
 	/* Loopback is available on all versions except PL023 */
-	if (!pl022->vendor->pl023)
-		SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0);
+	if (!pl022->vendor->pl023) {
+		if (spi->mode & SPI_LOOP)
+			tmp = LOOPBACK_ENABLED;
+		else
+			tmp = LOOPBACK_DISABLED;
+		SSP_WRITE_BITS(chip->cr1, tmp, SSP_CR1_MASK_LBM, 0);
+	}
 	SSP_WRITE_BITS(chip->cr1, SSP_DISABLED, SSP_CR1_MASK_SSE, 1);
 	SSP_WRITE_BITS(chip->cr1, chip_info->hierarchy, SSP_CR1_MASK_MS, 2);
 	SSP_WRITE_BITS(chip->cr1, chip_info->slave_tx_disable, SSP_CR1_MASK_SOD, 3);
@@ -2033,6 +2009,7 @@ static int pl022_setup(struct spi_device *spi)
 	spi_set_ctldata(spi, chip);
 	return status;
  err_config_params:
+	spi_set_ctldata(spi, NULL);
  err_first_setup:
 	kfree(chip);
 	return status;
@@ -2095,6 +2072,14 @@ pl022_probe(struct amba_device *adev, struct amba_id *id)
 	master->setup = pl022_setup;
 	master->transfer = pl022_transfer;
 
+	/*
+	 * Supports mode 0-3, loopback, and active low CS. Transfers are
+	 * always MS bit first on the original pl022.
+	 */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+	if (pl022->vendor->extended_cr)
+		master->mode_bits |= SPI_LSB_FIRST;
+
 	dev_dbg(&adev->dev, "BUSNO: %d\n", master->bus_num);
 
 	status = amba_request_regions(adev, NULL);
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index db6a191ddcf7..bf143663df81 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -277,19 +277,13 @@ struct pl022_ssp_controller {
  */
 struct pl022_config_chip {
 	struct device *dev;
-	enum ssp_loopback lbm;
 	enum ssp_interface iface;
 	enum ssp_hierarchy hierarchy;
 	bool slave_tx_disable;
 	struct ssp_clock_params clk_freq;
-	enum ssp_rx_endian endian_rx;
-	enum ssp_tx_endian endian_tx;
-	enum ssp_data_size data_size;
 	enum ssp_mode com_mode;
 	enum ssp_rx_level_trig rx_lev_trig;
 	enum ssp_tx_level_trig tx_lev_trig;
-	enum ssp_spi_clk_phase clk_phase;
-	enum ssp_spi_clk_pol clk_pol;
 	enum ssp_microwire_ctrl_len ctrl_len;
 	enum ssp_microwire_wait_state wait_state;
 	enum ssp_duplex duplex;
-- 
cgit v1.2.3


From 5a1c98be1de165c8ad1bd5343a5d779230669489 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 1 Oct 2010 11:47:32 +0200
Subject: spi/pl022: get rid of chipinfo dev pointer

What is the dev pointer doing inside the platform data anyway.
We have another pointer to the actual device at hand, use that.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/amba-pl022.c   | 27 ++++++++++++---------------
 include/linux/amba/pl022.h |  1 -
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index 59c90f3ccc26..19d54aa31a36 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -1597,58 +1597,58 @@ static int verify_controller_parameters(struct pl022 *pl022,
 {
 	if ((chip_info->iface < SSP_INTERFACE_MOTOROLA_SPI)
 	    || (chip_info->iface > SSP_INTERFACE_UNIDIRECTIONAL)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"interface is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if ((chip_info->iface == SSP_INTERFACE_UNIDIRECTIONAL) &&
 	    (!pl022->vendor->unidir)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"unidirectional mode not supported in this "
 			"hardware version\n");
 		return -EINVAL;
 	}
 	if ((chip_info->hierarchy != SSP_MASTER)
 	    && (chip_info->hierarchy != SSP_SLAVE)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"hierarchy is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if (((chip_info->clk_freq).cpsdvsr < CPSDVR_MIN)
 	    || ((chip_info->clk_freq).cpsdvsr > CPSDVR_MAX)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"cpsdvsr is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if ((chip_info->com_mode != INTERRUPT_TRANSFER)
 	    && (chip_info->com_mode != DMA_TRANSFER)
 	    && (chip_info->com_mode != POLLING_TRANSFER)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"Communication mode is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if ((chip_info->rx_lev_trig < SSP_RX_1_OR_MORE_ELEM)
 	    || (chip_info->rx_lev_trig > SSP_RX_32_OR_MORE_ELEM)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"RX FIFO Trigger Level is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if ((chip_info->tx_lev_trig < SSP_TX_1_OR_MORE_EMPTY_LOC)
 	    || (chip_info->tx_lev_trig > SSP_TX_32_OR_MORE_EMPTY_LOC)) {
-		dev_err(chip_info->dev,
+		dev_err(&pl022->adev->dev,
 			"TX FIFO Trigger Level is configured incorrectly\n");
 		return -EINVAL;
 	}
 	if (chip_info->iface == SSP_INTERFACE_NATIONAL_MICROWIRE) {
 		if ((chip_info->ctrl_len < SSP_BITS_4)
 		    || (chip_info->ctrl_len > SSP_BITS_32)) {
-			dev_err(chip_info->dev,
+			dev_err(&pl022->adev->dev,
 				"CTRL LEN is configured incorrectly\n");
 			return -EINVAL;
 		}
 		if ((chip_info->wait_state != SSP_MWIRE_WAIT_ZERO)
 		    && (chip_info->wait_state != SSP_MWIRE_WAIT_ONE)) {
-			dev_err(chip_info->dev,
+			dev_err(&pl022->adev->dev,
 				"Wait State is configured incorrectly\n");
 			return -EINVAL;
 		}
@@ -1658,13 +1658,13 @@ static int verify_controller_parameters(struct pl022 *pl022,
 			     SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
 			    && (chip_info->duplex !=
 				SSP_MICROWIRE_CHANNEL_HALF_DUPLEX)) {
-				dev_err(chip_info->dev,
+				dev_err(&pl022->adev->dev,
 					"Microwire duplex mode is configured incorrectly\n");
 				return -EINVAL;
 			}
 		} else {
 			if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
-				dev_err(chip_info->dev,
+				dev_err(&pl022->adev->dev,
 					"Microwire half duplex mode requested,"
 					" but this is only available in the"
 					" ST version of PL022\n");
@@ -1672,7 +1672,7 @@ static int verify_controller_parameters(struct pl022 *pl022,
 		}
 	}
 	if (chip_info->cs_control == NULL) {
-		dev_warn(chip_info->dev,
+		dev_warn(&pl022->adev->dev,
 			"Chip Select Function is NULL for this chip\n");
 		chip_info->cs_control = null_cs_control;
 	}
@@ -1852,9 +1852,6 @@ static int pl022_setup(struct spi_device *spi)
 			"using user supplied controller_data settings\n");
 	}
 
-	/* Pointer back to the SPI device */
-	chip_info->dev = &spi->dev;
-
 	/*
 	 * We can override with custom divisors, else we use the board
 	 * frequency setting
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index bf143663df81..4ce98f54186b 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -276,7 +276,6 @@ struct pl022_ssp_controller {
  * @dma_config: DMA configuration for SSP controller and peripheral
  */
 struct pl022_config_chip {
-	struct device *dev;
 	enum ssp_interface iface;
 	enum ssp_hierarchy hierarchy;
 	bool slave_tx_disable;
-- 
cgit v1.2.3


From f90c34bd658d240cb5ebc5fe0a17796e590c6ec8 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Sun, 10 Oct 2010 21:49:45 -0600
Subject: of/promtree: no longer call prom_ functions directly; use an ops
 structure

Rather than assuming an architecture defines prom_getchild and friends,
define an ops struct with hooks for the various prom functions that
pdt.c needs.  This ops struct is filled in by the
arch-(and sometimes firmware-)specific code, and passed to
of_pdt_build_devicetree.

Update sparc code to define the ops struct as well.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/sparc/kernel/prom_common.c | 36 +++++++++++++++++++++++++++++++++++-
 drivers/of/pdt.c                | 40 ++++++++++++++++++----------------------
 include/linux/of_pdt.h          | 20 +++++++++++++++++++-
 3 files changed, 72 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index fe84d56b7c5a..ed25834328f4 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -118,11 +118,45 @@ int of_find_in_proplist(const char *list, const char *match, int len)
 }
 EXPORT_SYMBOL(of_find_in_proplist);
 
+/*
+ * SPARC32 and SPARC64's prom_nextprop() do things differently
+ * here, despite sharing the same interface.  SPARC32 doesn't fill in 'buf',
+ * returning NULL on an error.  SPARC64 fills in 'buf', but sets it to an
+ * empty string upon error.
+ */
+static int __init handle_nextprop_quirks(char *buf, const char *name)
+{
+	if (!name || strlen(name) == 0)
+		return -1;
+
+#ifdef CONFIG_SPARC32
+	strcpy(buf, name);
+#endif
+	return 0;
+}
+
+static int __init prom_common_nextprop(phandle node, char *prev, char *buf)
+{
+	const char *name;
+
+	buf[0] = '\0';
+	name = prom_nextprop(node, prev, buf);
+	return handle_nextprop_quirks(buf, name);
+}
+
 unsigned int prom_early_allocated __initdata;
 
+static struct of_pdt_ops prom_sparc_ops __initdata = {
+	.nextprop = prom_common_nextprop,
+	.getproplen = prom_getproplen,
+	.getproperty = prom_getproperty,
+	.getchild = prom_getchild,
+	.getsibling = prom_getsibling,
+};
+
 void __init prom_build_devicetree(void)
 {
-	of_pdt_build_devicetree(prom_root_node);
+	of_pdt_build_devicetree(prom_root_node, &prom_sparc_ops);
 	of_console_init();
 
 	pr_info("PROM: Built device tree with %u bytes of memory.\n",
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 2fdb1b478d6f..fd02fc1dd891 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -23,7 +23,8 @@
 #include <linux/of.h>
 #include <linux/of_pdt.h>
 #include <asm/prom.h>
-#include <asm/oplib.h>
+
+static struct of_pdt_ops *of_pdt_prom_ops __initdata;
 
 void __initdata (*prom_build_more)(struct device_node *dp,
 		struct device_node ***nextp);
@@ -59,7 +60,7 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 {
 	static struct property *tmp = NULL;
 	struct property *p;
-	const char *name;
+	int err;
 
 	if (tmp) {
 		p = tmp;
@@ -77,28 +78,20 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 		p->value = prom_early_alloc(special_len);
 		memcpy(p->value, special_val, special_len);
 	} else {
-		if (prev == NULL) {
-			name = prom_firstprop(node, p->name);
-		} else {
-			name = prom_nextprop(node, prev, p->name);
-		}
-
-		if (!name || strlen(name) == 0) {
+		err = of_pdt_prom_ops->nextprop(node, prev, p->name);
+		if (err) {
 			tmp = p;
 			return NULL;
 		}
-#ifdef CONFIG_SPARC32
-		strcpy(p->name, name);
-#endif
-		p->length = prom_getproplen(node, p->name);
+		p->length = of_pdt_prom_ops->getproplen(node, p->name);
 		if (p->length <= 0) {
 			p->length = 0;
 		} else {
 			int len;
 
 			p->value = prom_early_alloc(p->length + 1);
-			len = prom_getproperty(node, p->name, p->value,
-					       p->length);
+			len = of_pdt_prom_ops->getproperty(node, p->name,
+					p->value, p->length);
 			if (len <= 0)
 				p->length = 0;
 			((unsigned char *)p->value)[p->length] = '\0';
@@ -130,10 +123,10 @@ static char * __init get_one_property(phandle node, const char *name)
 	char *buf = "<NULL>";
 	int len;
 
-	len = prom_getproplen(node, name);
+	len = of_pdt_prom_ops->getproplen(node, name);
 	if (len > 0) {
 		buf = prom_early_alloc(len);
-		len = prom_getproperty(node, name, buf, len);
+		len = of_pdt_prom_ops->getproperty(node, name, buf, len);
 	}
 
 	return buf;
@@ -211,21 +204,25 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 #endif
 		dp->full_name = build_full_name(dp);
 
-		dp->child = prom_build_tree(dp, prom_getchild(node), nextp);
+		dp->child = prom_build_tree(dp,
+				of_pdt_prom_ops->getchild(node), nextp);
 
 		if (prom_build_more)
 			prom_build_more(dp, nextp);
 
-		node = prom_getsibling(node);
+		node = of_pdt_prom_ops->getsibling(node);
 	}
 
 	return ret;
 }
 
-void __init of_pdt_build_devicetree(phandle root_node)
+void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops)
 {
 	struct device_node **nextp;
 
+	BUG_ON(!ops);
+	of_pdt_prom_ops = ops;
+
 	allnodes = prom_create_node(root_node, NULL);
 #if defined(CONFIG_SPARC)
 	allnodes->path_component_name = "";
@@ -234,6 +231,5 @@ void __init of_pdt_build_devicetree(phandle root_node)
 
 	nextp = &allnodes->allnext;
 	allnodes->child = prom_build_tree(allnodes,
-					  prom_getchild(allnodes->phandle),
-					  &nextp);
+			of_pdt_prom_ops->getchild(allnodes->phandle), &nextp);
 }
diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h
index c0a8774e45d0..303c5ffae9b4 100644
--- a/include/linux/of_pdt.h
+++ b/include/linux/of_pdt.h
@@ -13,10 +13,28 @@
 #ifndef _LINUX_OF_PDT_H
 #define _LINUX_OF_PDT_H
 
+/* overridable operations for calling into the PROM */
+struct of_pdt_ops {
+	/*
+	 * buf should be 32 bytes; return 0 on success.
+	 * If prev is NULL, the first property will be returned.
+	 */
+	int (*nextprop)(phandle node, char *prev, char *buf);
+
+	/* for both functions, return proplen on success; -1 on error */
+	int (*getproplen)(phandle node, const char *prop);
+	int (*getproperty)(phandle node, const char *prop, char *buf,
+			int bufsize);
+
+	/* phandles are 0 if no child or sibling exists */
+	phandle (*getchild)(phandle parent);
+	phandle (*getsibling)(phandle node);
+};
+
 extern void *prom_early_alloc(unsigned long size);
 
 /* for building the device tree */
-extern void of_pdt_build_devicetree(phandle root_node);
+extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops);
 
 extern void (*prom_build_more)(struct device_node *dp,
 		struct device_node ***nextp);
-- 
cgit v1.2.3


From ed41850298f7a55519de0b8573e217ed8a45c199 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Sun, 10 Oct 2010 21:51:25 -0600
Subject: of/promtree: add of_pdt namespace to pdt code

For symbols still lacking namespace qualifiers, add an of_pdt_ prefix.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/sparc/kernel/leon_kernel.c |  2 +-
 drivers/of/pdt.c                | 40 ++++++++++++++++++++--------------------
 include/linux/of_pdt.h          |  2 +-
 3 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 6a7b4dbc8e09..2d51527d810f 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -282,5 +282,5 @@ void __init leon_init_IRQ(void)
 
 void __init leon_init(void)
 {
-	prom_build_more = &leon_node_init;
+	of_pdt_build_more = &leon_node_init;
 }
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index fd02fc1dd891..31a4fb8694db 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -26,7 +26,7 @@
 
 static struct of_pdt_ops *of_pdt_prom_ops __initdata;
 
-void __initdata (*prom_build_more)(struct device_node *dp,
+void __initdata (*of_pdt_build_more)(struct device_node *dp,
 		struct device_node ***nextp);
 
 #if defined(CONFIG_SPARC)
@@ -53,7 +53,7 @@ static inline const char *of_pdt_node_name(struct device_node *dp)
 
 #endif /* !CONFIG_SPARC */
 
-static struct property * __init build_one_prop(phandle node, char *prev,
+static struct property * __init of_pdt_build_one_prop(phandle node, char *prev,
 					       char *special_name,
 					       void *special_val,
 					       int special_len)
@@ -100,17 +100,17 @@ static struct property * __init build_one_prop(phandle node, char *prev,
 	return p;
 }
 
-static struct property * __init build_prop_list(phandle node)
+static struct property * __init of_pdt_build_prop_list(phandle node)
 {
 	struct property *head, *tail;
 
-	head = tail = build_one_prop(node, NULL,
+	head = tail = of_pdt_build_one_prop(node, NULL,
 				     ".node", &node, sizeof(node));
 
-	tail->next = build_one_prop(node, NULL, NULL, NULL, 0);
+	tail->next = of_pdt_build_one_prop(node, NULL, NULL, NULL, 0);
 	tail = tail->next;
 	while(tail) {
-		tail->next = build_one_prop(node, tail->name,
+		tail->next = of_pdt_build_one_prop(node, tail->name,
 					    NULL, NULL, 0);
 		tail = tail->next;
 	}
@@ -118,7 +118,7 @@ static struct property * __init build_prop_list(phandle node)
 	return head;
 }
 
-static char * __init get_one_property(phandle node, const char *name)
+static char * __init of_pdt_get_one_property(phandle node, const char *name)
 {
 	char *buf = "<NULL>";
 	int len;
@@ -132,7 +132,7 @@ static char * __init get_one_property(phandle node, const char *name)
 	return buf;
 }
 
-static struct device_node * __init prom_create_node(phandle node,
+static struct device_node * __init of_pdt_create_node(phandle node,
 						    struct device_node *parent)
 {
 	struct device_node *dp;
@@ -146,18 +146,18 @@ static struct device_node * __init prom_create_node(phandle node,
 
 	kref_init(&dp->kref);
 
-	dp->name = get_one_property(node, "name");
-	dp->type = get_one_property(node, "device_type");
+	dp->name = of_pdt_get_one_property(node, "name");
+	dp->type = of_pdt_get_one_property(node, "device_type");
 	dp->phandle = node;
 
-	dp->properties = build_prop_list(node);
+	dp->properties = of_pdt_build_prop_list(node);
 
 	irq_trans_init(dp);
 
 	return dp;
 }
 
-static char * __init build_full_name(struct device_node *dp)
+static char * __init of_pdt_build_full_name(struct device_node *dp)
 {
 	int len, ourlen, plen;
 	char *n;
@@ -177,7 +177,7 @@ static char * __init build_full_name(struct device_node *dp)
 	return n;
 }
 
-static struct device_node * __init prom_build_tree(struct device_node *parent,
+static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
 						   phandle node,
 						   struct device_node ***nextp)
 {
@@ -185,7 +185,7 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 	struct device_node *dp;
 
 	while (1) {
-		dp = prom_create_node(node, parent);
+		dp = of_pdt_create_node(node, parent);
 		if (!dp)
 			break;
 
@@ -202,13 +202,13 @@ static struct device_node * __init prom_build_tree(struct device_node *parent,
 #if defined(CONFIG_SPARC)
 		dp->path_component_name = build_path_component(dp);
 #endif
-		dp->full_name = build_full_name(dp);
+		dp->full_name = of_pdt_build_full_name(dp);
 
-		dp->child = prom_build_tree(dp,
+		dp->child = of_pdt_build_tree(dp,
 				of_pdt_prom_ops->getchild(node), nextp);
 
-		if (prom_build_more)
-			prom_build_more(dp, nextp);
+		if (of_pdt_build_more)
+			of_pdt_build_more(dp, nextp);
 
 		node = of_pdt_prom_ops->getsibling(node);
 	}
@@ -223,13 +223,13 @@ void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops)
 	BUG_ON(!ops);
 	of_pdt_prom_ops = ops;
 
-	allnodes = prom_create_node(root_node, NULL);
+	allnodes = of_pdt_create_node(root_node, NULL);
 #if defined(CONFIG_SPARC)
 	allnodes->path_component_name = "";
 #endif
 	allnodes->full_name = "/";
 
 	nextp = &allnodes->allnext;
-	allnodes->child = prom_build_tree(allnodes,
+	allnodes->child = of_pdt_build_tree(allnodes,
 			of_pdt_prom_ops->getchild(allnodes->phandle), &nextp);
 }
diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h
index 303c5ffae9b4..0f7d0c56348a 100644
--- a/include/linux/of_pdt.h
+++ b/include/linux/of_pdt.h
@@ -36,7 +36,7 @@ extern void *prom_early_alloc(unsigned long size);
 /* for building the device tree */
 extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops);
 
-extern void (*prom_build_more)(struct device_node *dp,
+extern void (*of_pdt_build_more)(struct device_node *dp,
 		struct device_node ***nextp);
 
 #endif /* _LINUX_OF_PDT_H */
-- 
cgit v1.2.3


From e2f2a93b6384cfe0face0be595bfbda1475d864b Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Sun, 10 Oct 2010 21:52:57 -0600
Subject: of/promtree: add package-to-path support to pdt

package-to-path is a PROM function which tells us the real (full) name of the
node.  This provides a hook for that in the prom ops struct, and makes use
of it in the pdt code when attempting to determine a node's name.  If the
hook is available, try using it (falling back to looking at the "name"
property if it fails).

Signed-off-by: Andres Salomon <dilinger@queued.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/pdt.c       | 43 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/of_pdt.h |  3 +++
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 31a4fb8694db..28295d0a50f6 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -132,6 +132,47 @@ static char * __init of_pdt_get_one_property(phandle node, const char *name)
 	return buf;
 }
 
+static char * __init of_pdt_try_pkg2path(phandle node)
+{
+	char *res, *buf = NULL;
+	int len;
+
+	if (!of_pdt_prom_ops->pkg2path)
+		return NULL;
+
+	if (of_pdt_prom_ops->pkg2path(node, buf, 0, &len))
+		return NULL;
+	buf = prom_early_alloc(len + 1);
+	if (of_pdt_prom_ops->pkg2path(node, buf, len, &len)) {
+		pr_err("%s: package-to-path failed\n", __func__);
+		return NULL;
+	}
+
+	res = strrchr(buf, '/');
+	if (!res) {
+		pr_err("%s: couldn't find / in %s\n", __func__, buf);
+		return NULL;
+	}
+	return res+1;
+}
+
+/*
+ * When fetching the node's name, first try using package-to-path; if
+ * that fails (either because the arch hasn't supplied a PROM callback,
+ * or some other random failure), fall back to just looking at the node's
+ * 'name' property.
+ */
+static char * __init of_pdt_build_name(phandle node)
+{
+	char *buf;
+
+	buf = of_pdt_try_pkg2path(node);
+	if (!buf)
+		buf = of_pdt_get_one_property(node, "name");
+
+	return buf;
+}
+
 static struct device_node * __init of_pdt_create_node(phandle node,
 						    struct device_node *parent)
 {
@@ -146,7 +187,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
 
 	kref_init(&dp->kref);
 
-	dp->name = of_pdt_get_one_property(node, "name");
+	dp->name = of_pdt_build_name(node);
 	dp->type = of_pdt_get_one_property(node, "device_type");
 	dp->phandle = node;
 
diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h
index 0f7d0c56348a..c65a18a0cfdf 100644
--- a/include/linux/of_pdt.h
+++ b/include/linux/of_pdt.h
@@ -29,6 +29,9 @@ struct of_pdt_ops {
 	/* phandles are 0 if no child or sibling exists */
 	phandle (*getchild)(phandle parent);
 	phandle (*getsibling)(phandle node);
+
+	/* return 0 on success; fill in 'len' with number of bytes in path */
+	int (*pkg2path)(phandle node, char *buf, const int buflen, int *len);
 };
 
 extern void *prom_early_alloc(unsigned long size);
-- 
cgit v1.2.3


From 52f6537cb2f0b461a9ce3457c01a6cfa2ae0bb22 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Sun, 10 Oct 2010 21:35:05 -0600
Subject: of/irq: remove references to NO_IRQ in drivers/of/platform.c

Instead of referencing NO_IRQ in platform.c, define some helper functions
in irq.c to call instead from platform.c.  Keep NO_IRQ usage local to
irq.c, and define NO_IRQ if not defined in headers.

Signed-off-by: Andres Salomon <dilinger@queued.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/irq.c       | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/of/platform.c  | 10 +++-------
 include/linux/of_irq.h |  3 +++
 3 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 6e595e5a3977..75b0d3cb7676 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -24,6 +24,11 @@
 #include <linux/of_irq.h>
 #include <linux/string.h>
 
+/* For archs that don't support NO_IRQ (such as x86), provide a dummy value */
+#ifndef NO_IRQ
+#define NO_IRQ 0
+#endif
+
 /**
  * irq_of_parse_and_map - Parse and map an interrupt into linux virq space
  * @device: Device node of the device whose interrupt is to be mapped
@@ -347,3 +352,37 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 	return irq;
 }
 EXPORT_SYMBOL_GPL(of_irq_to_resource);
+
+/**
+ * of_irq_count - Count the number of IRQs a node uses
+ * @dev: pointer to device tree node
+ */
+int of_irq_count(struct device_node *dev)
+{
+	int nr = 0;
+
+	while (of_irq_to_resource(dev, nr, NULL) != NO_IRQ)
+		nr++;
+
+	return nr;
+}
+
+/**
+ * of_irq_to_resource_table - Fill in resource table with node's IRQ info
+ * @dev: pointer to device tree node
+ * @res: array of resources to fill in
+ * @nr_irqs: the number of IRQs (and upper bound for num of @res elements)
+ *
+ * Returns the size of the filled in table (up to @nr_irqs).
+ */
+int of_irq_to_resource_table(struct device_node *dev, struct resource *res,
+		int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++, res++)
+		if (of_irq_to_resource(dev, i, res) == NO_IRQ)
+			break;
+
+	return i;
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index bb72223c22ae..30726c8b0693 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -584,14 +584,13 @@ struct platform_device *of_device_alloc(struct device_node *np,
 				  struct device *parent)
 {
 	struct platform_device *dev;
-	int rc, i, num_reg = 0, num_irq = 0;
+	int rc, i, num_reg = 0, num_irq;
 	struct resource *res, temp_res;
 
 	/* First count how many resources are needed */
 	while (of_address_to_resource(np, num_reg, &temp_res) == 0)
 		num_reg++;
-	while (of_irq_to_resource(np, num_irq, &temp_res) != NO_IRQ)
-		num_irq++;
+	num_irq = of_irq_count(np);
 
 	/* Allocate memory for both the struct device and the resource table */
 	dev = kzalloc(sizeof(*dev) + (sizeof(*res) * (num_reg + num_irq)),
@@ -608,10 +607,7 @@ struct platform_device *of_device_alloc(struct device_node *np,
 			rc = of_address_to_resource(np, i, res);
 			WARN_ON(rc);
 		}
-		for (i = 0; i < num_irq; i++, res++) {
-			rc = of_irq_to_resource(np, i, res);
-			WARN_ON(rc == NO_IRQ);
-		}
+		WARN_ON(of_irq_to_resource_table(np, res, num_irq) != num_irq);
 	}
 
 	dev->dev.of_node = of_node_get(np);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 5929781c104d..090cbaa4bd36 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -64,6 +64,9 @@ extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  unsigned int intsize);
 extern int of_irq_to_resource(struct device_node *dev, int index,
 			      struct resource *r);
+extern int of_irq_count(struct device_node *dev);
+extern int of_irq_to_resource_table(struct device_node *dev,
+		struct resource *res, int nr_irqs);
 
 #endif /* CONFIG_OF_IRQ */
 #endif /* CONFIG_OF */
-- 
cgit v1.2.3


From 0562f7882d968463119bb63d47ef4bdaba7d6631 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@iki.fi>
Date: Wed, 13 Oct 2010 11:30:32 +0300
Subject: ASoC: don't register AC97 devices twice

With generic AC97 ASoC glue driver (codec/ac97.c), we get following warning when
the device is registered (slightly stripped the backtrace):

kobject (c5a863e8): tried to init an initialized object, something is seriously
                    wrong.
[<c00254fc>] (unwind_backtrace+0x0/0xec)
[<c014fad0>] (kobject_init+0x38/0x70)
[<c0171e94>] (device_initialize+0x20/0x70)
[<c017267c>] (device_register+0xc/0x18)
[<bf20db70>] (snd_soc_instantiate_cards+0x924/0xacc [snd_soc_core])
[<bf20e0d0>] (snd_soc_register_platform+0x16c/0x198 [snd_soc_core])
[<c0175304>] (platform_drv_probe+0x18/0x1c)
[<c0174454>] (driver_probe_device+0xb0/0x16c)
[<c017456c>] (__driver_attach+0x5c/0x7c)
[<c0173cec>] (bus_for_each_dev+0x48/0x78)
[<c0173600>] (bus_add_driver+0x98/0x214)
[<c0174834>] (driver_register+0xa4/0x130)
[<c001f410>] (do_one_initcall+0xd0/0x1a4)
[<c0062ddc>] (sys_init_module+0x12b0/0x1454)

This happens because the generic AC97 glue driver creates its codec->ac97 via
calling snd_ac97_mixer(). snd_ac97_mixer() provides own version of
snd_device.register which handles the device registration when
snd_card_register() is called.

To avoid registering the AC97 device twice, we add a new flag to the
snd_soc_codec: ac97_created which tells whether the AC97 device was created by
SoC subsystem.

Signed-off-by: Mika Westerberg <mika.westerberg@iki.fi>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  1 +
 sound/soc/soc-core.c | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 4fb079e14e16..5c3bce83f28a 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -442,6 +442,7 @@ struct snd_soc_codec {
 	unsigned int suspended:1; /* Codec is in suspend PM state */
 	unsigned int probed:1; /* Codec has been probed */
 	unsigned int ac97_registered:1; /* Codec has been AC97 registered */
+	unsigned int ac97_created:1; /* Codec has been created by SoC */
 	unsigned int sysfs_registered:1; /* codec has been sysfs registered */
 
 	/* codec IO */
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 8751efdea119..7d22b5d5bc0d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1497,6 +1497,16 @@ static int soc_register_ac97_dai_link(struct snd_soc_pcm_runtime *rtd)
 	 * for the generic AC97 subsystem.
 	 */
 	if (rtd->codec_dai->driver->ac97_control && !rtd->codec->ac97_registered) {
+		/*
+		 * It is possible that the AC97 device is already registered to
+		 * the device subsystem. This happens when the device is created
+		 * via snd_ac97_mixer(). Currently only SoC codec that does so
+		 * is the generic AC97 glue but others migh emerge.
+		 *
+		 * In those cases we don't try to register the device again.
+		 */
+		if (!rtd->codec->ac97_created)
+			return 0;
 
 		ret = soc_ac97_dev_register(rtd->codec);
 		if (ret < 0) {
@@ -1812,6 +1822,13 @@ int snd_soc_new_ac97_codec(struct snd_soc_codec *codec,
 
 	codec->ac97->bus->ops = ops;
 	codec->ac97->num = num;
+
+	/*
+	 * Mark the AC97 device to be created by us. This way we ensure that the
+	 * device will be registered with the device subsystem later on.
+	 */
+	codec->ac97_created = 1;
+
 	mutex_unlock(&codec->mutex);
 	return 0;
 }
@@ -1832,6 +1849,7 @@ void snd_soc_free_ac97_codec(struct snd_soc_codec *codec)
 	kfree(codec->ac97->bus);
 	kfree(codec->ac97);
 	codec->ac97 = NULL;
+	codec->ac97_created = 0;
 	mutex_unlock(&codec->mutex);
 }
 EXPORT_SYMBOL_GPL(snd_soc_free_ac97_codec);
-- 
cgit v1.2.3


From 69395396a0a8866f30d59c66b7be1912ccb5d160 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 13 Oct 2010 07:44:36 +0000
Subject: sh: remove name and id from struct clk

Remove "name" and "id" from drivers/sh/ struct clk.

The struct clk members "name" and "id" are not used
now when matching is done through clkdev.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk-cpg.c   |  2 --
 drivers/sh/clk.c       | 14 ++++++--------
 include/linux/sh_clk.h |  3 ---
 3 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/sh/clk-cpg.c b/drivers/sh/clk-cpg.c
index 8c024b984ed8..392627be4544 100644
--- a/drivers/sh/clk-cpg.c
+++ b/drivers/sh/clk-cpg.c
@@ -180,7 +180,6 @@ static int __init sh_clk_div6_register_ops(struct clk *clks, int nr,
 		clkp = clks + k;
 
 		clkp->ops = ops;
-		clkp->id = -1;
 		clkp->freq_table = freq_table + (k * freq_table_size);
 		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
 
@@ -319,7 +318,6 @@ static int __init sh_clk_div4_register_ops(struct clk *clks, int nr,
 		clkp = clks + k;
 
 		clkp->ops = ops;
-		clkp->id = -1;
 		clkp->priv = table;
 
 		clkp->freq_table = freq_table + (k * freq_table_size);
diff --git a/drivers/sh/clk.c b/drivers/sh/clk.c
index b9c57a640c24..30ad6f04afb0 100644
--- a/drivers/sh/clk.c
+++ b/drivers/sh/clk.c
@@ -161,8 +161,8 @@ void propagate_rate(struct clk *tclk)
 
 static void __clk_disable(struct clk *clk)
 {
-	if (WARN(!clk->usecount, "Trying to disable clock %s with 0 usecount\n",
-		 clk->name))
+	if (WARN(!clk->usecount, "Trying to disable clock %p with 0 usecount\n",
+		 clk))
 		return;
 
 	if (!(--clk->usecount)) {
@@ -357,8 +357,8 @@ int clk_set_parent(struct clk *clk, struct clk *parent)
 		if (ret == 0) {
 			if (clk->ops->recalc)
 				clk->rate = clk->ops->recalc(clk);
-			pr_debug("clock: set parent of %s to %s (new rate %ld)\n",
-				 clk->name, clk->parent->name, clk->rate);
+			pr_debug("clock: set parent of %p to %p (new rate %ld)\n",
+				 clk, clk->parent, clk->rate);
 			propagate_rate(clk);
 		}
 	} else
@@ -470,9 +470,7 @@ static int clk_debugfs_register_one(struct clk *c)
 	char s[255];
 	char *p = s;
 
-	p += sprintf(p, "%s", c->name);
-	if (c->id >= 0)
-		sprintf(p, ":%d", c->id);
+	p += sprintf(p, "%p", c);
 	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
 	if (!d)
 		return -ENOMEM;
@@ -514,7 +512,7 @@ static int clk_debugfs_register(struct clk *c)
 			return err;
 	}
 
-	if (!c->dentry && c->name) {
+	if (!c->dentry) {
 		err = clk_debugfs_register_one(c);
 		if (err)
 			return err;
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 875ce50719a9..ecdfea54a49e 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -21,9 +21,6 @@ struct clk_ops {
 
 struct clk {
 	struct list_head	node;
-	const char		*name;
-	int			id;
-
 	struct clk		*parent;
 	struct clk		**parent_table;	/* list of parents to */
 	unsigned short		parent_num;	/* choose between */
-- 
cgit v1.2.3


From 4780c8df3856398020be2928d9e9fa8c457a09a4 Mon Sep 17 00:00:00 2001
From: Naveen Kumar Gaddipati <naveen.gaddipati@stericsson.com>
Date: Mon, 4 Oct 2010 22:32:48 -0700
Subject: Input: add ROHM BU21013 touch panel controller support

Add the ROHM BU21013 capacitive touch panel controller support with
i2c interface.

Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Naveen Kumar Gaddipati <naveen.gaddipati@stericsson.com>
Acked-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig      |  12 +
 drivers/input/touchscreen/Makefile     |   1 +
 drivers/input/touchscreen/bu21013_ts.c | 648 +++++++++++++++++++++++++++++++++
 include/linux/input/bu21013.h          |  44 +++
 4 files changed, 705 insertions(+)
 create mode 100644 drivers/input/touchscreen/bu21013_ts.c
 create mode 100644 include/linux/input/bu21013.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index d59feb7b90f6..0ea361f23fa7 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -98,6 +98,18 @@ config TOUCHSCREEN_BITSY
 	  To compile this driver as a module, choose M here: the
 	  module will be called h3600_ts_input.
 
+config TOUCHSCREEN_BU21013
+	tristate "BU21013 based touch panel controllers"
+	depends on I2C
+	help
+	  Say Y here if you have a bu21013 touchscreen connected to
+	  your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called bu21013_ts.
+
 config TOUCHSCREEN_CY8CTMG110
 	tristate "cy8ctmg110 touchscreen"
 	depends on I2C
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index f1bc8a416824..99b353c4e9ae 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879_SPI)	+= ad7879-spi.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
+obj-$(CONFIG_TOUCHSCREEN_BU21013)       += bu21013_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)	+= cy8ctmg110_ts.o
 obj-$(CONFIG_TOUCHSCREEN_DA9034)	+= da9034-ts.o
 obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
new file mode 100644
index 000000000000..ccde58602563
--- /dev/null
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
+ * License terms:GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+#include <linux/input.h>
+#include <linux/input/bu21013.h>
+#include <linux/slab.h>
+
+#define PEN_DOWN_INTR	0
+#define MAX_FINGERS	2
+#define RESET_DELAY	30
+#define PENUP_TIMEOUT	(10)
+#define DELTA_MIN	16
+#define MASK_BITS	0x03
+#define SHIFT_8		8
+#define SHIFT_2		2
+#define LENGTH_OF_BUFFER	11
+#define I2C_RETRY_COUNT	5
+
+#define BU21013_SENSORS_BTN_0_7_REG	0x70
+#define BU21013_SENSORS_BTN_8_15_REG	0x71
+#define BU21013_SENSORS_BTN_16_23_REG	0x72
+#define BU21013_X1_POS_MSB_REG		0x73
+#define BU21013_X1_POS_LSB_REG		0x74
+#define BU21013_Y1_POS_MSB_REG		0x75
+#define BU21013_Y1_POS_LSB_REG		0x76
+#define BU21013_X2_POS_MSB_REG		0x77
+#define BU21013_X2_POS_LSB_REG		0x78
+#define BU21013_Y2_POS_MSB_REG		0x79
+#define BU21013_Y2_POS_LSB_REG		0x7A
+#define BU21013_INT_CLR_REG		0xE8
+#define BU21013_INT_MODE_REG		0xE9
+#define BU21013_GAIN_REG		0xEA
+#define BU21013_OFFSET_MODE_REG		0xEB
+#define BU21013_XY_EDGE_REG		0xEC
+#define BU21013_RESET_REG		0xED
+#define BU21013_CALIB_REG		0xEE
+#define BU21013_DONE_REG		0xEF
+#define BU21013_SENSOR_0_7_REG		0xF0
+#define BU21013_SENSOR_8_15_REG		0xF1
+#define BU21013_SENSOR_16_23_REG	0xF2
+#define BU21013_POS_MODE1_REG		0xF3
+#define BU21013_POS_MODE2_REG		0xF4
+#define BU21013_CLK_MODE_REG		0xF5
+#define BU21013_IDLE_REG		0xFA
+#define BU21013_FILTER_REG		0xFB
+#define BU21013_TH_ON_REG		0xFC
+#define BU21013_TH_OFF_REG		0xFD
+
+
+#define BU21013_RESET_ENABLE		0x01
+
+#define BU21013_SENSORS_EN_0_7		0x3F
+#define BU21013_SENSORS_EN_8_15		0xFC
+#define BU21013_SENSORS_EN_16_23	0x1F
+
+#define BU21013_POS_MODE1_0		0x02
+#define BU21013_POS_MODE1_1		0x04
+#define BU21013_POS_MODE1_2		0x08
+
+#define BU21013_POS_MODE2_ZERO		0x01
+#define BU21013_POS_MODE2_AVG1		0x02
+#define BU21013_POS_MODE2_AVG2		0x04
+#define BU21013_POS_MODE2_EN_XY		0x08
+#define BU21013_POS_MODE2_EN_RAW	0x10
+#define BU21013_POS_MODE2_MULTI		0x80
+
+#define BU21013_CLK_MODE_DIV		0x01
+#define BU21013_CLK_MODE_EXT		0x02
+#define BU21013_CLK_MODE_CALIB		0x80
+
+#define BU21013_IDLET_0			0x01
+#define BU21013_IDLET_1			0x02
+#define BU21013_IDLET_2			0x04
+#define BU21013_IDLET_3			0x08
+#define BU21013_IDLE_INTERMIT_EN	0x10
+
+#define BU21013_DELTA_0_6	0x7F
+#define BU21013_FILTER_EN	0x80
+
+#define BU21013_INT_MODE_LEVEL	0x00
+#define BU21013_INT_MODE_EDGE	0x01
+
+#define BU21013_GAIN_0		0x01
+#define BU21013_GAIN_1		0x02
+#define BU21013_GAIN_2		0x04
+
+#define BU21013_OFFSET_MODE_DEFAULT	0x00
+#define BU21013_OFFSET_MODE_MOVE	0x01
+#define BU21013_OFFSET_MODE_DISABLE	0x02
+
+#define BU21013_TH_ON_0		0x01
+#define BU21013_TH_ON_1		0x02
+#define BU21013_TH_ON_2		0x04
+#define BU21013_TH_ON_3		0x08
+#define BU21013_TH_ON_4		0x10
+#define BU21013_TH_ON_5		0x20
+#define BU21013_TH_ON_6		0x40
+#define BU21013_TH_ON_7		0x80
+#define BU21013_TH_ON_MAX	0xFF
+
+#define BU21013_TH_OFF_0	0x01
+#define BU21013_TH_OFF_1	0x02
+#define BU21013_TH_OFF_2	0x04
+#define BU21013_TH_OFF_3	0x08
+#define BU21013_TH_OFF_4	0x10
+#define BU21013_TH_OFF_5	0x20
+#define BU21013_TH_OFF_6	0x40
+#define BU21013_TH_OFF_7	0x80
+#define BU21013_TH_OFF_MAX	0xFF
+
+#define BU21013_X_EDGE_0	0x01
+#define BU21013_X_EDGE_1	0x02
+#define BU21013_X_EDGE_2	0x04
+#define BU21013_X_EDGE_3	0x08
+#define BU21013_Y_EDGE_0	0x10
+#define BU21013_Y_EDGE_1	0x20
+#define BU21013_Y_EDGE_2	0x40
+#define BU21013_Y_EDGE_3	0x80
+
+#define BU21013_DONE	0x01
+#define BU21013_NUMBER_OF_X_SENSORS	(6)
+#define BU21013_NUMBER_OF_Y_SENSORS	(11)
+
+#define DRIVER_TP	"bu21013_tp"
+
+/**
+ * struct bu21013_ts_data - touch panel data structure
+ * @client: pointer to the i2c client
+ * @wait: variable to wait_queue_head_t structure
+ * @touch_stopped: touch stop flag
+ * @chip: pointer to the touch panel controller
+ * @in_dev: pointer to the input device structure
+ * @intr_pin: interrupt pin value
+ *
+ * Touch panel device data structure
+ */
+struct bu21013_ts_data {
+	struct i2c_client *client;
+	wait_queue_head_t wait;
+	bool touch_stopped;
+	const struct bu21013_platform_device *chip;
+	struct input_dev *in_dev;
+	unsigned int intr_pin;
+};
+
+/**
+ * bu21013_read_block_data(): read the touch co-ordinates
+ * @data: bu21013_ts_data structure pointer
+ * @buf: byte pointer
+ *
+ * Read the touch co-ordinates using i2c read block into buffer
+ * and returns integer.
+ */
+static int bu21013_read_block_data(struct bu21013_ts_data *data, u8 *buf)
+{
+	int ret, i;
+
+	for (i = 0; i < I2C_RETRY_COUNT; i++) {
+		ret = i2c_smbus_read_i2c_block_data
+			(data->client, BU21013_SENSORS_BTN_0_7_REG,
+				LENGTH_OF_BUFFER, buf);
+		if (ret == LENGTH_OF_BUFFER)
+			return 0;
+	}
+	return -EINVAL;
+}
+
+/**
+ * bu21013_do_touch_report(): Get the touch co-ordinates
+ * @data: bu21013_ts_data structure pointer
+ *
+ * Get the touch co-ordinates from touch sensor registers and writes
+ * into device structure and returns integer.
+ */
+static int bu21013_do_touch_report(struct bu21013_ts_data *data)
+{
+	u8	buf[LENGTH_OF_BUFFER];
+	unsigned int pos_x[2], pos_y[2];
+	bool	has_x_sensors, has_y_sensors;
+	int	finger_down_count = 0;
+	int	i;
+
+	if (data == NULL)
+		return -EINVAL;
+
+	if (bu21013_read_block_data(data, buf) < 0)
+		return -EINVAL;
+
+	has_x_sensors = hweight32(buf[0] & BU21013_SENSORS_EN_0_7);
+	has_y_sensors = hweight32(((buf[1] & BU21013_SENSORS_EN_8_15) |
+		((buf[2] & BU21013_SENSORS_EN_16_23) << SHIFT_8)) >> SHIFT_2);
+	if (!has_x_sensors || !has_y_sensors)
+		return 0;
+
+	for (i = 0; i < MAX_FINGERS; i++) {
+		const u8 *p = &buf[4 * i + 3];
+		unsigned int x = p[0] << SHIFT_2 | (p[1] & MASK_BITS);
+		unsigned int y = p[2] << SHIFT_2 | (p[3] & MASK_BITS);
+		if (x == 0 || y == 0)
+			continue;
+		pos_x[finger_down_count] = x;
+		pos_y[finger_down_count] = y;
+		finger_down_count++;
+	}
+
+	if (finger_down_count) {
+		if (finger_down_count == 2 &&
+		    (abs(pos_x[0] - pos_x[1]) < DELTA_MIN ||
+		     abs(pos_y[0] - pos_y[1]) < DELTA_MIN)) {
+			return 0;
+		}
+
+		for (i = 0; i < finger_down_count; i++) {
+			if (data->chip->x_flip)
+				pos_x[i] = data->chip->touch_x_max - pos_x[i];
+			if (data->chip->y_flip)
+				pos_y[i] = data->chip->touch_y_max - pos_y[i];
+
+			input_report_abs(data->in_dev,
+					 ABS_MT_POSITION_X, pos_x[i]);
+			input_report_abs(data->in_dev,
+					 ABS_MT_POSITION_Y, pos_y[i]);
+			input_mt_sync(data->in_dev);
+		}
+	} else
+		input_mt_sync(data->in_dev);
+
+	input_sync(data->in_dev);
+
+	return 0;
+}
+/**
+ * bu21013_gpio_irq() - gpio thread function for touch interrupt
+ * @irq: irq value
+ * @device_data: void pointer
+ *
+ * This gpio thread function for touch interrupt
+ * and returns irqreturn_t.
+ */
+static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
+{
+	struct bu21013_ts_data *data = device_data;
+	struct i2c_client *i2c = data->client;
+	int retval;
+
+	do {
+		retval = bu21013_do_touch_report(data);
+		if (retval < 0) {
+			dev_err(&i2c->dev, "bu21013_do_touch_report failed\n");
+			return IRQ_NONE;
+		}
+
+		data->intr_pin = data->chip->irq_read_val();
+		if (data->intr_pin == PEN_DOWN_INTR)
+			wait_event_timeout(data->wait, data->touch_stopped,
+					   msecs_to_jiffies(2));
+	} while (!data->intr_pin && !data->touch_stopped);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * bu21013_init_chip() - power on sequence for the bu21013 controller
+ * @data: device structure pointer
+ *
+ * This function is used to power on
+ * the bu21013 controller and returns integer.
+ */
+static int bu21013_init_chip(struct bu21013_ts_data *data)
+{
+	int retval;
+	struct i2c_client *i2c = data->client;
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_RESET_REG,
+					BU21013_RESET_ENABLE);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_RESET reg write failed\n");
+		return retval;
+	}
+	msleep(RESET_DELAY);
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_SENSOR_0_7_REG,
+					BU21013_SENSORS_EN_0_7);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_SENSOR_0_7 reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_SENSOR_8_15_REG,
+						BU21013_SENSORS_EN_8_15);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_SENSOR_8_15 reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_SENSOR_16_23_REG,
+						BU21013_SENSORS_EN_16_23);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_SENSOR_16_23 reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_POS_MODE1_REG,
+				(BU21013_POS_MODE1_0 | BU21013_POS_MODE1_1));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_POS_MODE1 reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_POS_MODE2_REG,
+			(BU21013_POS_MODE2_ZERO | BU21013_POS_MODE2_AVG1 |
+			BU21013_POS_MODE2_AVG2 | BU21013_POS_MODE2_EN_RAW |
+			BU21013_POS_MODE2_MULTI));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_POS_MODE2 reg write failed\n");
+		return retval;
+	}
+
+	if (data->chip->ext_clk)
+		retval = i2c_smbus_write_byte_data(i2c, BU21013_CLK_MODE_REG,
+			(BU21013_CLK_MODE_EXT | BU21013_CLK_MODE_CALIB));
+	else
+		retval = i2c_smbus_write_byte_data(i2c, BU21013_CLK_MODE_REG,
+			(BU21013_CLK_MODE_DIV | BU21013_CLK_MODE_CALIB));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_CLK_MODE reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_IDLE_REG,
+				(BU21013_IDLET_0 | BU21013_IDLE_INTERMIT_EN));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_IDLE reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_INT_MODE_REG,
+						BU21013_INT_MODE_LEVEL);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_INT_MODE reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_FILTER_REG,
+						(BU21013_DELTA_0_6 |
+							BU21013_FILTER_EN));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_FILTER reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_TH_ON_REG,
+					BU21013_TH_ON_5);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_TH_ON reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_TH_OFF_REG,
+				BU21013_TH_OFF_4 || BU21013_TH_OFF_3);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_TH_OFF reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_GAIN_REG,
+					(BU21013_GAIN_0 | BU21013_GAIN_1));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_GAIN reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_OFFSET_MODE_REG,
+					BU21013_OFFSET_MODE_DEFAULT);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_OFFSET_MODE reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_XY_EDGE_REG,
+				(BU21013_X_EDGE_0 | BU21013_X_EDGE_2 |
+				BU21013_Y_EDGE_1 | BU21013_Y_EDGE_3));
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_XY_EDGE reg write failed\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(i2c, BU21013_DONE_REG,
+							BU21013_DONE);
+	if (retval < 0) {
+		dev_err(&i2c->dev, "BU21013_REG_DONE reg write failed\n");
+		return retval;
+	}
+
+	return 0;
+}
+
+/**
+ * bu21013_free_irq() - frees IRQ registered for touchscreen
+ * @bu21013_data: device structure pointer
+ *
+ * This function signals interrupt thread to stop processing and
+ * frees interrupt.
+ */
+static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
+{
+	bu21013_data->touch_stopped = true;
+	wake_up(&bu21013_data->wait);
+	free_irq(bu21013_data->chip->irq, bu21013_data);
+}
+
+/**
+ * bu21013_probe() - initializes the i2c-client touchscreen driver
+ * @client: i2c client structure pointer
+ * @id: i2c device id pointer
+ *
+ * This function used to initializes the i2c-client touchscreen
+ * driver and returns integer.
+ */
+static int __devinit bu21013_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct bu21013_ts_data *bu21013_data;
+	struct input_dev *in_dev;
+	const struct bu21013_platform_device *pdata =
+					client->dev.platform_data;
+	int error;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "i2c smbus byte data not supported\n");
+		return -EIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "platform data not defined\n");
+		return -EINVAL;
+	}
+
+	bu21013_data = kzalloc(sizeof(struct bu21013_ts_data), GFP_KERNEL);
+	in_dev = input_allocate_device();
+	if (!bu21013_data || !in_dev) {
+		dev_err(&client->dev, "device memory alloc failed\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	bu21013_data->in_dev = in_dev;
+	bu21013_data->chip = pdata;
+	bu21013_data->client = client;
+	bu21013_data->touch_stopped = false;
+	init_waitqueue_head(&bu21013_data->wait);
+
+	/* configure the gpio pins */
+	if (pdata->cs_en) {
+		error = pdata->cs_en(pdata->cs_pin);
+		if (error < 0) {
+			dev_err(&client->dev, "chip init failed\n");
+			goto err_free_mem;
+		}
+	}
+
+	/* configure the touch panel controller */
+	error = bu21013_init_chip(bu21013_data);
+	if (error) {
+		dev_err(&client->dev, "error in bu21013 config\n");
+		goto err_cs_disable;
+	}
+
+	/* register the device to input subsystem */
+	in_dev->name = DRIVER_TP;
+	in_dev->id.bustype = BUS_I2C;
+	in_dev->dev.parent = &client->dev;
+
+	__set_bit(EV_SYN, in_dev->evbit);
+	__set_bit(EV_KEY, in_dev->evbit);
+	__set_bit(EV_ABS, in_dev->evbit);
+
+	input_set_abs_params(in_dev, ABS_MT_POSITION_X, 0,
+						pdata->x_max_res, 0, 0);
+	input_set_abs_params(in_dev, ABS_MT_POSITION_Y, 0,
+						pdata->y_max_res, 0, 0);
+	input_set_drvdata(in_dev, bu21013_data);
+
+	error = request_threaded_irq(pdata->irq, NULL, bu21013_gpio_irq,
+				     IRQF_TRIGGER_FALLING | IRQF_SHARED,
+				     DRIVER_TP, bu21013_data);
+	if (error) {
+		dev_err(&client->dev, "request irq %d failed\n", pdata->irq);
+		goto err_cs_disable;
+	}
+
+	error = input_register_device(in_dev);
+	if (error) {
+		dev_err(&client->dev, "failed to register input device\n");
+		goto err_free_irq;
+	}
+
+	device_init_wakeup(&client->dev, pdata->wakeup);
+	i2c_set_clientdata(client, bu21013_data);
+
+	return 0;
+
+err_free_irq:
+	bu21013_free_irq(bu21013_data);
+err_cs_disable:
+	pdata->cs_dis(pdata->cs_pin);
+err_free_mem:
+	input_free_device(bu21013_data->in_dev);
+	kfree(bu21013_data);
+
+	return error;
+}
+/**
+ * bu21013_remove() - removes the i2c-client touchscreen driver
+ * @client: i2c client structure pointer
+ *
+ * This function uses to remove the i2c-client
+ * touchscreen driver and returns integer.
+ */
+static int __devexit bu21013_remove(struct i2c_client *client)
+{
+	struct bu21013_ts_data *bu21013_data = i2c_get_clientdata(client);
+
+	bu21013_free_irq(bu21013_data);
+
+	bu21013_data->chip->cs_dis(bu21013_data->chip->cs_pin);
+
+	input_unregister_device(bu21013_data->in_dev);
+	kfree(bu21013_data);
+
+	device_init_wakeup(&client->dev, false);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/**
+ * bu21013_suspend() - suspend the touch screen controller
+ * @dev: pointer to device structure
+ *
+ * This function is used to suspend the
+ * touch panel controller and returns integer
+ */
+static int bu21013_suspend(struct device *dev)
+{
+	struct bu21013_ts_data *bu21013_data = dev_get_drvdata(dev);
+	struct i2c_client *client = bu21013_data->client;
+
+	bu21013_data->touch_stopped = true;
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(bu21013_data->chip->irq);
+	else
+		disable_irq(bu21013_data->chip->irq);
+
+	return 0;
+}
+
+/**
+ * bu21013_resume() - resume the touch screen controller
+ * @dev: pointer to device structure
+ *
+ * This function is used to resume the touch panel
+ * controller and returns integer.
+ */
+static int bu21013_resume(struct device *dev)
+{
+	struct bu21013_ts_data *bu21013_data = dev_get_drvdata(dev);
+	struct i2c_client *client = bu21013_data->client;
+	int retval;
+
+	retval = bu21013_init_chip(bu21013_data);
+	if (retval < 0) {
+		dev_err(&client->dev, "bu21013 controller config failed\n");
+		return retval;
+	}
+
+	bu21013_data->touch_stopped = false;
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(bu21013_data->chip->irq);
+	else
+		enable_irq(bu21013_data->chip->irq);
+
+	return 0;
+}
+
+static const struct dev_pm_ops bu21013_dev_pm_ops = {
+	.suspend = bu21013_suspend,
+	.resume  = bu21013_resume,
+};
+#endif
+
+static const struct i2c_device_id bu21013_id[] = {
+	{ DRIVER_TP, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, bu21013_id);
+
+static struct i2c_driver bu21013_driver = {
+	.driver	= {
+		.name	=	DRIVER_TP,
+		.owner	=	THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	=	&bu21013_dev_pm_ops,
+#endif
+	},
+	.probe		=	bu21013_probe,
+	.remove		=	__devexit_p(bu21013_remove),
+	.id_table	=	bu21013_id,
+};
+
+/**
+ * bu21013_init() - initializes the bu21013 touchscreen driver
+ *
+ * This function used to initializes the bu21013
+ * touchscreen driver and returns integer.
+ */
+static int __init bu21013_init(void)
+{
+	return i2c_add_driver(&bu21013_driver);
+}
+
+/**
+ * bu21013_exit() - de-initializes the bu21013 touchscreen driver
+ *
+ * This function uses to de-initializes the bu21013
+ * touchscreen driver and returns none.
+ */
+static void __exit bu21013_exit(void)
+{
+	i2c_del_driver(&bu21013_driver);
+}
+
+module_init(bu21013_init);
+module_exit(bu21013_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Naveen Kumar G <naveen.gaddipati@stericsson.com>");
+MODULE_DESCRIPTION("bu21013 touch screen controller driver");
diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
new file mode 100644
index 000000000000..e470d387dd49
--- /dev/null
+++ b/include/linux/input/bu21013.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
+ * License terms:GNU General Public License (GPL) version 2
+ */
+
+#ifndef _BU21013_H
+#define _BU21013_H
+
+/**
+ * struct bu21013_platform_device - Handle the platform data
+ * @cs_en:	pointer to the cs enable function
+ * @cs_dis:	pointer to the cs disable function
+ * @irq_read_val:    pointer to read the pen irq value function
+ * @x_max_res: xmax resolution
+ * @y_max_res: ymax resolution
+ * @touch_x_max: touch x max
+ * @touch_y_max: touch y max
+ * @cs_pin: chip select pin
+ * @irq: irq pin
+ * @ext_clk: external clock flag
+ * @x_flip: x flip flag
+ * @y_flip: y flip flag
+ * @wakeup: wakeup flag
+ *
+ * This is used to handle the platform data
+ */
+struct bu21013_platform_device {
+	int (*cs_en)(int reset_pin);
+	int (*cs_dis)(int reset_pin);
+	int (*irq_read_val)(void);
+	int x_max_res;
+	int y_max_res;
+	int touch_x_max;
+	int touch_y_max;
+	unsigned int cs_pin;
+	unsigned int irq;
+	bool ext_clk;
+	bool x_flip;
+	bool y_flip;
+	bool wakeup;
+};
+
+#endif
-- 
cgit v1.2.3


From 0982258264d2f615612ab957634efdeb874f47c8 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 4 Oct 2010 21:46:10 -0700
Subject: Input: serio - support multiple child devices per single parent

Some (rare) serio devices need to have multiple serio children. One of
the examples is PS/2 multiplexer present on several TQC STKxxx boards,
which connect PS/2 keyboard and mouse to single tty port.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/psmouse-base.c |   4 +-
 drivers/input/serio/serio.c        | 124 ++++++++++++++++++++++++-------------
 include/linux/serio.h              |   4 +-
 3 files changed, 86 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 73a7af2542a8..cd9d0c97e429 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1584,10 +1584,10 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 	if (!new_dev)
 		return -ENOMEM;
 
-	while (serio->child) {
+	while (!list_empty(&serio->children)) {
 		if (++retry > 3) {
 			printk(KERN_WARNING
-				"psmouse: failed to destroy child port, "
+				"psmouse: failed to destroy children ports, "
 				"protocol change aborted.\n");
 			input_free_device(new_dev);
 			return -EIO;
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 8a426375fcb3..405bf214527c 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -55,7 +55,7 @@ static struct bus_type serio_bus;
 static void serio_add_port(struct serio *serio);
 static int serio_reconnect_port(struct serio *serio);
 static void serio_disconnect_port(struct serio *serio);
-static void serio_reconnect_chain(struct serio *serio);
+static void serio_reconnect_subtree(struct serio *serio);
 static void serio_attach_driver(struct serio_driver *drv);
 
 static int serio_connect_driver(struct serio *serio, struct serio_driver *drv)
@@ -151,7 +151,7 @@ static void serio_find_driver(struct serio *serio)
 enum serio_event_type {
 	SERIO_RESCAN_PORT,
 	SERIO_RECONNECT_PORT,
-	SERIO_RECONNECT_CHAIN,
+	SERIO_RECONNECT_SUBTREE,
 	SERIO_REGISTER_PORT,
 	SERIO_ATTACH_DRIVER,
 };
@@ -291,8 +291,8 @@ static void serio_handle_event(void)
 			serio_find_driver(event->object);
 			break;
 
-		case SERIO_RECONNECT_CHAIN:
-			serio_reconnect_chain(event->object);
+		case SERIO_RECONNECT_SUBTREE:
+			serio_reconnect_subtree(event->object);
 			break;
 
 		case SERIO_ATTACH_DRIVER:
@@ -329,12 +329,10 @@ static void serio_remove_pending_events(void *object)
 }
 
 /*
- * Destroy child serio port (if any) that has not been fully registered yet.
+ * Locate child serio port (if any) that has not been fully registered yet.
  *
- * Note that we rely on the fact that port can have only one child and therefore
- * only one child registration request can be pending. Additionally, children
- * are registered by driver's connect() handler so there can't be a grandchild
- * pending registration together with a child.
+ * Children are registered by driver's connect() handler so there can't be a
+ * grandchild pending registration together with a child.
  */
 static struct serio *serio_get_pending_child(struct serio *parent)
 {
@@ -448,7 +446,7 @@ static ssize_t serio_rebind_driver(struct device *dev, struct device_attribute *
 	if (!strncmp(buf, "none", count)) {
 		serio_disconnect_port(serio);
 	} else if (!strncmp(buf, "reconnect", count)) {
-		serio_reconnect_chain(serio);
+		serio_reconnect_subtree(serio);
 	} else if (!strncmp(buf, "rescan", count)) {
 		serio_disconnect_port(serio);
 		serio_find_driver(serio);
@@ -515,6 +513,8 @@ static void serio_init_port(struct serio *serio)
 	__module_get(THIS_MODULE);
 
 	INIT_LIST_HEAD(&serio->node);
+	INIT_LIST_HEAD(&serio->child_node);
+	INIT_LIST_HEAD(&serio->children);
 	spin_lock_init(&serio->lock);
 	mutex_init(&serio->drv_mutex);
 	device_initialize(&serio->dev);
@@ -537,12 +537,13 @@ static void serio_init_port(struct serio *serio)
  */
 static void serio_add_port(struct serio *serio)
 {
+	struct serio *parent = serio->parent;
 	int error;
 
-	if (serio->parent) {
-		serio_pause_rx(serio->parent);
-		serio->parent->child = serio;
-		serio_continue_rx(serio->parent);
+	if (parent) {
+		serio_pause_rx(parent);
+		list_add_tail(&serio->child_node, &parent->children);
+		serio_continue_rx(parent);
 	}
 
 	list_add_tail(&serio->node, &serio_list);
@@ -558,15 +559,14 @@ static void serio_add_port(struct serio *serio)
 }
 
 /*
- * serio_destroy_port() completes deregistration process and removes
+ * serio_destroy_port() completes unregistration process and removes
  * port from the system
  */
 static void serio_destroy_port(struct serio *serio)
 {
 	struct serio *child;
 
-	child = serio_get_pending_child(serio);
-	if (child) {
+	while ((child = serio_get_pending_child(serio)) != NULL) {
 		serio_remove_pending_events(child);
 		put_device(&child->dev);
 	}
@@ -576,7 +576,7 @@ static void serio_destroy_port(struct serio *serio)
 
 	if (serio->parent) {
 		serio_pause_rx(serio->parent);
-		serio->parent->child = NULL;
+		list_del_init(&serio->child_node);
 		serio_continue_rx(serio->parent);
 		serio->parent = NULL;
 	}
@@ -608,46 +608,82 @@ static int serio_reconnect_port(struct serio *serio)
 }
 
 /*
- * Reconnect serio port and all its children (re-initialize attached devices)
+ * Reconnect serio port and all its children (re-initialize attached
+ * devices).
  */
-static void serio_reconnect_chain(struct serio *serio)
+static void serio_reconnect_subtree(struct serio *root)
 {
+	struct serio *s = root;
+	int error;
+
 	do {
-		if (serio_reconnect_port(serio)) {
-			/* Ok, old children are now gone, we are done */
-			break;
+		error = serio_reconnect_port(s);
+		if (!error) {
+			/*
+			 * Reconnect was successful, move on to do the
+			 * first child.
+			 */
+			if (!list_empty(&s->children)) {
+				s = list_first_entry(&s->children,
+						     struct serio, child_node);
+				continue;
+			}
 		}
-		serio = serio->child;
-	} while (serio);
+
+		/*
+		 * Either it was a leaf node or reconnect failed and it
+		 * became a leaf node. Continue reconnecting starting with
+		 * the next sibling of the parent node.
+		 */
+		while (s != root) {
+			struct serio *parent = s->parent;
+
+			if (!list_is_last(&s->child_node, &parent->children)) {
+				s = list_entry(s->child_node.next,
+					       struct serio, child_node);
+				break;
+			}
+
+			s = parent;
+		}
+	} while (s != root);
 }
 
 /*
  * serio_disconnect_port() unbinds a port from its driver. As a side effect
- * all child ports are unbound and destroyed.
+ * all children ports are unbound and destroyed.
  */
 static void serio_disconnect_port(struct serio *serio)
 {
-	struct serio *s, *parent;
+	struct serio *s = serio;
+
+	/*
+	 * Children ports should be disconnected and destroyed
+	 * first; we travel the tree in depth-first order.
+	 */
+	while (!list_empty(&serio->children)) {
+
+		/* Locate a leaf */
+		while (!list_empty(&s->children))
+			s = list_first_entry(&s->children,
+					     struct serio, child_node);
 
-	if (serio->child) {
 		/*
-		 * Children ports should be disconnected and destroyed
-		 * first, staring with the leaf one, since we don't want
-		 * to do recursion
+		 * Prune this leaf node unless it is the one we
+		 * started with.
 		 */
-		for (s = serio; s->child; s = s->child)
-			/* empty */;
-
-		do {
-			parent = s->parent;
+		if (s != serio) {
+			struct serio *parent = s->parent;
 
 			device_release_driver(&s->dev);
 			serio_destroy_port(s);
-		} while ((s = parent) != serio);
+
+			s = parent;
+		}
 	}
 
 	/*
-	 * Ok, no children left, now disconnect this port
+	 * OK, no children left, now disconnect this port.
 	 */
 	device_release_driver(&serio->dev);
 }
@@ -660,7 +696,7 @@ EXPORT_SYMBOL(serio_rescan);
 
 void serio_reconnect(struct serio *serio)
 {
-	serio_queue_event(serio, NULL, SERIO_RECONNECT_CHAIN);
+	serio_queue_event(serio, NULL, SERIO_RECONNECT_SUBTREE);
 }
 EXPORT_SYMBOL(serio_reconnect);
 
@@ -688,14 +724,16 @@ void serio_unregister_port(struct serio *serio)
 EXPORT_SYMBOL(serio_unregister_port);
 
 /*
- * Safely unregisters child port if one is present.
+ * Safely unregisters children ports if they are present.
  */
 void serio_unregister_child_port(struct serio *serio)
 {
+	struct serio *s, *next;
+
 	mutex_lock(&serio_mutex);
-	if (serio->child) {
-		serio_disconnect_port(serio->child);
-		serio_destroy_port(serio->child);
+	list_for_each_entry_safe(s, next, &serio->children, child_node) {
+		serio_disconnect_port(s);
+		serio_destroy_port(s);
 	}
 	mutex_unlock(&serio_mutex);
 }
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 111ad501b054..109b237603b6 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -41,7 +41,9 @@ struct serio {
 	int (*start)(struct serio *);
 	void (*stop)(struct serio *);
 
-	struct serio *parent, *child;
+	struct serio *parent;
+	struct list_head child_node;	/* Entry in parent->children list */
+	struct list_head children;
 	unsigned int depth;		/* level of nesting in serio hierarchy */
 
 	struct serio_driver *drv;	/* accessed from interrupt, must be protected by serio->lock and serio->sem */
-- 
cgit v1.2.3


From 12b00c2c025b8af697d9a022ea2e928cad889ef1 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 13 Oct 2010 15:56:56 +0200
Subject: netfilter: xtables: resolve indirect macros 1/3

Many of the used macros are just there for userspace compatibility.
Substitute the in-kernel code to directly use the terminal macro
and stuff the defines into #ifndef __KERNEL__ sections.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter_arp/arp_tables.h  | 10 ++++++----
 include/linux/netfilter_ipv4/ip_tables.h  | 10 ++++++----
 include/linux/netfilter_ipv6/ip6_tables.h | 11 ++++++-----
 net/ipv4/netfilter/arp_tables.c           | 10 +++++-----
 net/ipv4/netfilter/ip_tables.c            | 12 ++++++------
 net/ipv6/netfilter/ip6_tables.c           | 12 ++++++------
 6 files changed, 35 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index e9948c0560f6..81938600470d 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -21,8 +21,10 @@
 
 #include <linux/netfilter/x_tables.h>
 
+#ifndef __KERNEL__
 #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
 #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
+#endif
 
 #define ARPT_DEV_ADDR_LEN_MAX 16
 
@@ -134,7 +136,7 @@ struct arpt_entry
 /* The argument to ARPT_SO_GET_INFO */
 struct arpt_getinfo {
 	/* Which table: caller fills this in. */
-	char name[ARPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Kernel fills these in. */
 	/* Which hook entry points are valid: bitmask */
@@ -156,7 +158,7 @@ struct arpt_getinfo {
 /* The argument to ARPT_SO_SET_REPLACE. */
 struct arpt_replace {
 	/* Which table. */
-	char name[ARPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Which hook entry points are valid: bitmask.  You can't
            change this. */
@@ -191,7 +193,7 @@ struct arpt_replace {
 /* The argument to ARPT_SO_GET_ENTRIES. */
 struct arpt_get_entries {
 	/* Which table: user fills this in. */
-	char name[ARPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* User fills this in: total entry size. */
 	unsigned int size;
@@ -230,7 +232,7 @@ struct arpt_standard {
 
 struct arpt_error_target {
 	struct arpt_entry_target target;
-	char errorname[ARPT_FUNCTION_MAXNAMELEN];
+	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
 struct arpt_error {
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 704a7b6e8169..1b7cdf1137e3 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -27,12 +27,14 @@
 
 #include <linux/netfilter/x_tables.h>
 
+#ifndef __KERNEL__
 #define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
 #define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
 #define ipt_match xt_match
 #define ipt_target xt_target
 #define ipt_table xt_table
 #define ipt_get_revision xt_get_revision
+#endif
 
 /* Yes, Virginia, you have to zero the padding. */
 struct ipt_ip {
@@ -146,7 +148,7 @@ struct ipt_icmp {
 /* The argument to IPT_SO_GET_INFO */
 struct ipt_getinfo {
 	/* Which table: caller fills this in. */
-	char name[IPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Kernel fills these in. */
 	/* Which hook entry points are valid: bitmask */
@@ -168,7 +170,7 @@ struct ipt_getinfo {
 /* The argument to IPT_SO_SET_REPLACE. */
 struct ipt_replace {
 	/* Which table. */
-	char name[IPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Which hook entry points are valid: bitmask.  You can't
            change this. */
@@ -202,7 +204,7 @@ struct ipt_replace {
 /* The argument to IPT_SO_GET_ENTRIES. */
 struct ipt_get_entries {
 	/* Which table: user fills this in. */
-	char name[IPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* User fills this in: total entry size. */
 	unsigned int size;
@@ -254,7 +256,7 @@ struct ipt_standard {
 
 struct ipt_error_target {
 	struct ipt_entry_target target;
-	char errorname[IPT_FUNCTION_MAXNAMELEN];
+	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
 struct ipt_error {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 18442ff19c07..abe31d020e3c 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -27,13 +27,14 @@
 
 #include <linux/netfilter/x_tables.h>
 
+#ifndef __KERNEL__
 #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
 #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
-
 #define ip6t_match xt_match
 #define ip6t_target xt_target
 #define ip6t_table xt_table
 #define ip6t_get_revision xt_get_revision
+#endif
 
 /* Yes, Virginia, you have to zero the padding. */
 struct ip6t_ip6 {
@@ -117,7 +118,7 @@ struct ip6t_standard {
 
 struct ip6t_error_target {
 	struct ip6t_entry_target target;
-	char errorname[IP6T_FUNCTION_MAXNAMELEN];
+	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
 struct ip6t_error {
@@ -203,7 +204,7 @@ struct ip6t_icmp {
 /* The argument to IP6T_SO_GET_INFO */
 struct ip6t_getinfo {
 	/* Which table: caller fills this in. */
-	char name[IP6T_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Kernel fills these in. */
 	/* Which hook entry points are valid: bitmask */
@@ -225,7 +226,7 @@ struct ip6t_getinfo {
 /* The argument to IP6T_SO_SET_REPLACE. */
 struct ip6t_replace {
 	/* Which table. */
-	char name[IP6T_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* Which hook entry points are valid: bitmask.  You can't
            change this. */
@@ -259,7 +260,7 @@ struct ip6t_replace {
 /* The argument to IP6T_SO_GET_ENTRIES. */
 struct ip6t_get_entries {
 	/* Which table: user fills this in. */
-	char name[IP6T_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 
 	/* User fills this in: total entry size. */
 	unsigned int size;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f12..e427a9e3c489 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -895,7 +895,7 @@ static int compat_table_info(const struct xt_table_info *info,
 static int get_info(struct net *net, void __user *user,
                     const int *len, int compat)
 {
-	char name[ARPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
 	int ret;
 
@@ -908,7 +908,7 @@ static int get_info(struct net *net, void __user *user,
 	if (copy_from_user(name, user, sizeof(name)) != 0)
 		return -EFAULT;
 
-	name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+	name[XT_TABLE_MAXNAMELEN-1] = '\0';
 #ifdef CONFIG_COMPAT
 	if (compat)
 		xt_compat_lock(NFPROTO_ARP);
@@ -1474,7 +1474,7 @@ out_unlock:
 }
 
 struct compat_arpt_replace {
-	char				name[ARPT_TABLE_MAXNAMELEN];
+	char				name[XT_TABLE_MAXNAMELEN];
 	u32				valid_hooks;
 	u32				num_entries;
 	u32				size;
@@ -1628,7 +1628,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 }
 
 struct compat_arpt_get_entries {
-	char name[ARPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	compat_uint_t size;
 	struct compat_arpt_entry entrytable[0];
 };
@@ -1840,7 +1840,7 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
 	{
 		.name             = ARPT_ERROR_TARGET,
 		.target           = arpt_error,
-		.targetsize       = ARPT_FUNCTION_MAXNAMELEN,
+		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_ARP,
 	},
 };
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d163f2e3b2e9..2efd41bef452 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
 static int get_info(struct net *net, void __user *user,
                     const int *len, int compat)
 {
-	char name[IPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
 	int ret;
 
@@ -1105,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
 	if (copy_from_user(name, user, sizeof(name)) != 0)
 		return -EFAULT;
 
-	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+	name[XT_TABLE_MAXNAMELEN-1] = '\0';
 #ifdef CONFIG_COMPAT
 	if (compat)
 		xt_compat_lock(AF_INET);
@@ -1400,7 +1400,7 @@ do_add_counters(struct net *net, const void __user *user,
 
 #ifdef CONFIG_COMPAT
 struct compat_ipt_replace {
-	char			name[IPT_TABLE_MAXNAMELEN];
+	char			name[XT_TABLE_MAXNAMELEN];
 	u32			valid_hooks;
 	u32			num_entries;
 	u32			size;
@@ -1884,7 +1884,7 @@ compat_do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user,
 }
 
 struct compat_ipt_get_entries {
-	char name[IPT_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	compat_uint_t size;
 	struct compat_ipt_entry entrytable[0];
 };
@@ -2039,7 +2039,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 
 	case IPT_SO_GET_REVISION_MATCH:
 	case IPT_SO_GET_REVISION_TARGET: {
-		struct ipt_get_revision rev;
+		struct xt_get_revision rev;
 		int target;
 
 		if (*len != sizeof(rev)) {
@@ -2188,7 +2188,7 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
 	{
 		.name             = IPT_ERROR_TARGET,
 		.target           = ipt_error,
-		.targetsize       = IPT_FUNCTION_MAXNAMELEN,
+		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_IPV4,
 	},
 };
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c24..4b973e13952d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1105,7 +1105,7 @@ static int compat_table_info(const struct xt_table_info *info,
 static int get_info(struct net *net, void __user *user,
                     const int *len, int compat)
 {
-	char name[IP6T_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
 	int ret;
 
@@ -1118,7 +1118,7 @@ static int get_info(struct net *net, void __user *user,
 	if (copy_from_user(name, user, sizeof(name)) != 0)
 		return -EFAULT;
 
-	name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+	name[XT_TABLE_MAXNAMELEN-1] = '\0';
 #ifdef CONFIG_COMPAT
 	if (compat)
 		xt_compat_lock(AF_INET6);
@@ -1415,7 +1415,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 
 #ifdef CONFIG_COMPAT
 struct compat_ip6t_replace {
-	char			name[IP6T_TABLE_MAXNAMELEN];
+	char			name[XT_TABLE_MAXNAMELEN];
 	u32			valid_hooks;
 	u32			num_entries;
 	u32			size;
@@ -1899,7 +1899,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
 }
 
 struct compat_ip6t_get_entries {
-	char name[IP6T_TABLE_MAXNAMELEN];
+	char name[XT_TABLE_MAXNAMELEN];
 	compat_uint_t size;
 	struct compat_ip6t_entry entrytable[0];
 };
@@ -2054,7 +2054,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 
 	case IP6T_SO_GET_REVISION_MATCH:
 	case IP6T_SO_GET_REVISION_TARGET: {
-		struct ip6t_get_revision rev;
+		struct xt_get_revision rev;
 		int target;
 
 		if (*len != sizeof(rev)) {
@@ -2203,7 +2203,7 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
 	{
 		.name             = IP6T_ERROR_TARGET,
 		.target           = ip6t_error,
-		.targetsize       = IP6T_FUNCTION_MAXNAMELEN,
+		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_IPV6,
 	},
 };
-- 
cgit v1.2.3


From 87a2e70db62fec7348c6e5545eb7b7650c33d81b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 13 Oct 2010 16:11:22 +0200
Subject: netfilter: xtables: resolve indirect macros 2/3

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter_arp/arp_tables.h  | 15 ++++-----
 include/linux/netfilter_ipv4/ip_tables.h  | 18 +++++------
 include/linux/netfilter_ipv6/ip6_tables.h | 20 ++++++------
 net/ipv4/netfilter/arp_tables.c           | 38 +++++++++++-----------
 net/ipv4/netfilter/ip_tables.c            | 54 +++++++++++++++----------------
 net/ipv6/netfilter/ip6_tables.c           | 54 +++++++++++++++----------------
 net/sched/act_ipt.c                       | 12 +++----
 7 files changed, 103 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 81938600470d..7e193c9241b3 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -24,6 +24,8 @@
 #ifndef __KERNEL__
 #define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
 #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
+#define arpt_entry_target xt_entry_target
+#define arpt_standard_target xt_standard_target
 #endif
 
 #define ARPT_DEV_ADDR_LEN_MAX 16
@@ -65,9 +67,6 @@ struct arpt_arp {
 	u_int16_t invflags;
 };
 
-#define arpt_entry_target xt_entry_target
-#define arpt_standard_target xt_standard_target
-
 /* Values for "flag" field in struct arpt_ip (general arp structure).
  * No flags defined yet.
  */
@@ -208,7 +207,7 @@ struct arpt_get_entries {
 #define ARPT_ERROR_TARGET XT_ERROR_TARGET
 
 /* Helper functions */
-static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e)
+static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e)
 {
 	return (void *)e + e->target_offset;
 }
@@ -227,11 +226,11 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e
 /* Standard entry. */
 struct arpt_standard {
 	struct arpt_entry entry;
-	struct arpt_standard_target target;
+	struct xt_standard_target target;
 };
 
 struct arpt_error_target {
-	struct arpt_entry_target target;
+	struct xt_entry_target target;
 	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
@@ -250,7 +249,7 @@ struct arpt_error {
 {									       \
 	.entry		= ARPT_ENTRY_INIT(sizeof(struct arpt_standard)),       \
 	.target		= XT_TARGET_INIT(ARPT_STANDARD_TARGET,		       \
-					 sizeof(struct arpt_standard_target)), \
+					 sizeof(struct xt_standard_target)), \
 	.target.verdict	= -(__verdict) - 1,				       \
 }
 
@@ -287,7 +286,7 @@ struct compat_arpt_entry {
 	unsigned char elems[0];
 };
 
-static inline struct arpt_entry_target *
+static inline struct xt_entry_target *
 compat_arpt_get_target(struct compat_arpt_entry *e)
 {
 	return (void *)e + e->target_offset;
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 1b7cdf1137e3..ec506918a9b9 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -34,6 +34,10 @@
 #define ipt_target xt_target
 #define ipt_table xt_table
 #define ipt_get_revision xt_get_revision
+#define ipt_entry_match xt_entry_match
+#define ipt_entry_target xt_entry_target
+#define ipt_standard_target xt_standard_target
+#define ipt_counters xt_counters
 #endif
 
 /* Yes, Virginia, you have to zero the padding. */
@@ -54,12 +58,6 @@ struct ipt_ip {
 	u_int8_t invflags;
 };
 
-#define ipt_entry_match xt_entry_match
-#define ipt_entry_target xt_entry_target
-#define ipt_standard_target xt_standard_target
-
-#define ipt_counters xt_counters
-
 /* Values for "flag" field in struct ipt_ip (general ip structure). */
 #define IPT_F_FRAG		0x01	/* Set if rule is a fragment rule */
 #define IPT_F_GOTO		0x02	/* Set if jump is a goto */
@@ -219,7 +217,7 @@ struct ipt_get_entries {
 #define IPT_ERROR_TARGET XT_ERROR_TARGET
 
 /* Helper functions */
-static __inline__ struct ipt_entry_target *
+static __inline__ struct xt_entry_target *
 ipt_get_target(struct ipt_entry *e)
 {
 	return (void *)e + e->target_offset;
@@ -251,11 +249,11 @@ extern void ipt_unregister_table(struct net *net, struct xt_table *table);
 /* Standard entry. */
 struct ipt_standard {
 	struct ipt_entry entry;
-	struct ipt_standard_target target;
+	struct xt_standard_target target;
 };
 
 struct ipt_error_target {
-	struct ipt_entry_target target;
+	struct xt_entry_target target;
 	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
@@ -309,7 +307,7 @@ struct compat_ipt_entry {
 };
 
 /* Helper functions */
-static inline struct ipt_entry_target *
+static inline struct xt_entry_target *
 compat_ipt_get_target(struct compat_ipt_entry *e)
 {
 	return (void *)e + e->target_offset;
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index abe31d020e3c..40d11fa05840 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -34,6 +34,10 @@
 #define ip6t_target xt_target
 #define ip6t_table xt_table
 #define ip6t_get_revision xt_get_revision
+#define ip6t_entry_match xt_entry_match
+#define ip6t_entry_target xt_entry_target
+#define ip6t_standard_target xt_standard_target
+#define ip6t_counters xt_counters
 #endif
 
 /* Yes, Virginia, you have to zero the padding. */
@@ -63,12 +67,6 @@ struct ip6t_ip6 {
 	u_int8_t invflags;
 };
 
-#define ip6t_entry_match xt_entry_match
-#define ip6t_entry_target xt_entry_target
-#define ip6t_standard_target xt_standard_target
-
-#define ip6t_counters	xt_counters
-
 /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */
 #define IP6T_F_PROTO		0x01	/* Set if rule cares about upper 
 					   protocols */
@@ -113,11 +111,11 @@ struct ip6t_entry {
 /* Standard entry */
 struct ip6t_standard {
 	struct ip6t_entry entry;
-	struct ip6t_standard_target target;
+	struct xt_standard_target target;
 };
 
 struct ip6t_error_target {
-	struct ip6t_entry_target target;
+	struct xt_entry_target target;
 	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
@@ -136,7 +134,7 @@ struct ip6t_error {
 {									       \
 	.entry		= IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)),       \
 	.target		= XT_TARGET_INIT(IP6T_STANDARD_TARGET,		       \
-					 sizeof(struct ip6t_standard_target)), \
+					 sizeof(struct xt_standard_target)),   \
 	.target.verdict	= -(__verdict) - 1,				       \
 }
 
@@ -275,7 +273,7 @@ struct ip6t_get_entries {
 #define IP6T_ERROR_TARGET XT_ERROR_TARGET
 
 /* Helper functions */
-static __inline__ struct ip6t_entry_target *
+static __inline__ struct xt_entry_target *
 ip6t_get_target(struct ip6t_entry *e)
 {
 	return (void *)e + e->target_offset;
@@ -332,7 +330,7 @@ struct compat_ip6t_entry {
 	unsigned char elems[0];
 };
 
-static inline struct ip6t_entry_target *
+static inline struct xt_entry_target *
 compat_ip6t_get_target(struct compat_ip6t_entry *e)
 {
 	return (void *)e + e->target_offset;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e427a9e3c489..ed178cbe6626 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -228,7 +228,7 @@ arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 	return NF_DROP;
 }
 
-static inline const struct arpt_entry_target *
+static inline const struct xt_entry_target *
 arpt_get_target_c(const struct arpt_entry *e)
 {
 	return arpt_get_target((struct arpt_entry *)e);
@@ -282,7 +282,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
 	arp = arp_hdr(skb);
 	do {
-		const struct arpt_entry_target *t;
+		const struct xt_entry_target *t;
 
 		if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
 			e = arpt_next_entry(e);
@@ -297,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		if (!t->u.kernel.target->target) {
 			int v;
 
-			v = ((struct arpt_standard_target *)t)->verdict;
+			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
 				if (v != ARPT_RETURN) {
@@ -377,7 +377,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			const struct arpt_standard_target *t
+			const struct xt_standard_target *t
 				= (void *)arpt_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
@@ -464,14 +464,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 
 static inline int check_entry(const struct arpt_entry *e, const char *name)
 {
-	const struct arpt_entry_target *t;
+	const struct xt_entry_target *t;
 
 	if (!arp_checkentry(&e->arp)) {
 		duprintf("arp_tables: arp check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
-	if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
+	if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
 		return -EINVAL;
 
 	t = arpt_get_target_c(e);
@@ -483,7 +483,7 @@ static inline int check_entry(const struct arpt_entry *e, const char *name)
 
 static inline int check_target(struct arpt_entry *e, const char *name)
 {
-	struct arpt_entry_target *t = arpt_get_target(e);
+	struct xt_entry_target *t = arpt_get_target(e);
 	int ret;
 	struct xt_tgchk_param par = {
 		.table     = name,
@@ -506,7 +506,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 static inline int
 find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 {
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	int ret;
 
@@ -536,7 +536,7 @@ out:
 
 static bool check_underflow(const struct arpt_entry *e)
 {
-	const struct arpt_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->arp))
@@ -544,7 +544,7 @@ static bool check_underflow(const struct arpt_entry *e)
 	t = arpt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
-	verdict = ((struct arpt_standard_target *)t)->verdict;
+	verdict = ((struct xt_standard_target *)t)->verdict;
 	verdict = -verdict - 1;
 	return verdict == NF_DROP || verdict == NF_ACCEPT;
 }
@@ -566,7 +566,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 	}
 
 	if (e->next_offset
-	    < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+	    < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
 		duprintf("checking: element %p size %u\n",
 			 e, e->next_offset);
 		return -EINVAL;
@@ -598,7 +598,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 static inline void cleanup_entry(struct arpt_entry *e)
 {
 	struct xt_tgdtor_param par;
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 
 	t = arpt_get_target(e);
 	par.target   = t->u.kernel.target;
@@ -794,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	/* FIXME: use iterator macros --RR */
 	/* ... then go back and fix counters and names */
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
-		const struct arpt_entry_target *t;
+		const struct xt_entry_target *t;
 
 		e = (struct arpt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
@@ -807,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size,
 
 		t = arpt_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
-				 + offsetof(struct arpt_entry_target,
+				 + offsetof(struct xt_entry_target,
 					    u.user.name),
 				 t->u.kernel.target->name,
 				 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -844,7 +844,7 @@ static int compat_calc_entry(const struct arpt_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
 {
-	const struct arpt_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
@@ -1204,7 +1204,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 #ifdef CONFIG_COMPAT
 static inline void compat_release_entry(struct compat_arpt_entry *e)
 {
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 
 	t = compat_arpt_get_target(e);
 	module_put(t->u.kernel.target->me);
@@ -1220,7 +1220,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 				  const unsigned int *underflows,
 				  const char *name)
 {
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
 	int ret, off, h;
@@ -1288,7 +1288,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 			    unsigned int *size, const char *name,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	struct arpt_entry *de;
 	unsigned int origsize;
@@ -1567,7 +1567,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
 				     struct xt_counters *counters,
 				     unsigned int i)
 {
-	struct arpt_entry_target *t;
+	struct xt_entry_target *t;
 	struct compat_arpt_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2efd41bef452..cb108880050a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,7 +186,7 @@ static inline bool unconditional(const struct ipt_ip *ip)
 }
 
 /* for const-correctness */
-static inline const struct ipt_entry_target *
+static inline const struct xt_entry_target *
 ipt_get_target_c(const struct ipt_entry *e)
 {
 	return ipt_get_target((struct ipt_entry *)e);
@@ -230,7 +230,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 		      const char *hookname, const char **chainname,
 		      const char **comment, unsigned int *rulenum)
 {
-	const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
+	const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
 
 	if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
@@ -346,7 +346,7 @@ ipt_do_table(struct sk_buff *skb,
 		 get_entry(table_base, private->underflow[hook]));
 
 	do {
-		const struct ipt_entry_target *t;
+		const struct xt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
@@ -380,7 +380,7 @@ ipt_do_table(struct sk_buff *skb,
 		if (!t->u.kernel.target->target) {
 			int v;
 
-			v = ((struct ipt_standard_target *)t)->verdict;
+			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
 				if (v != IPT_RETURN) {
@@ -461,7 +461,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			const struct ipt_standard_target *t
+			const struct xt_standard_target *t
 				= (void *)ipt_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
@@ -552,7 +552,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	return 1;
 }
 
-static void cleanup_match(struct ipt_entry_match *m, struct net *net)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
 {
 	struct xt_mtdtor_param par;
 
@@ -568,14 +568,14 @@ static void cleanup_match(struct ipt_entry_match *m, struct net *net)
 static int
 check_entry(const struct ipt_entry *e, const char *name)
 {
-	const struct ipt_entry_target *t;
+	const struct xt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
 		duprintf("ip check failed %p %s.\n", e, par->match->name);
 		return -EINVAL;
 	}
 
-	if (e->target_offset + sizeof(struct ipt_entry_target) >
+	if (e->target_offset + sizeof(struct xt_entry_target) >
 	    e->next_offset)
 		return -EINVAL;
 
@@ -587,7 +587,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 }
 
 static int
-check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
+check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	int ret;
@@ -605,7 +605,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 }
 
 static int
-find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 {
 	struct xt_match *match;
 	int ret;
@@ -630,7 +630,7 @@ err:
 
 static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 {
-	struct ipt_entry_target *t = ipt_get_target(e);
+	struct xt_entry_target *t = ipt_get_target(e);
 	struct xt_tgchk_param par = {
 		.net       = net,
 		.table     = name,
@@ -656,7 +656,7 @@ static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 		 unsigned int size)
 {
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	int ret;
 	unsigned int j;
@@ -707,7 +707,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 
 static bool check_underflow(const struct ipt_entry *e)
 {
-	const struct ipt_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->ip))
@@ -715,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e)
 	t = ipt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
-	verdict = ((struct ipt_standard_target *)t)->verdict;
+	verdict = ((struct xt_standard_target *)t)->verdict;
 	verdict = -verdict - 1;
 	return verdict == NF_DROP || verdict == NF_ACCEPT;
 }
@@ -738,7 +738,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 	}
 
 	if (e->next_offset
-	    < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+	    < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
 		duprintf("checking: element %p size %u\n",
 			 e, e->next_offset);
 		return -EINVAL;
@@ -771,7 +771,7 @@ static void
 cleanup_entry(struct ipt_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
@@ -972,8 +972,8 @@ copy_entries_to_user(unsigned int total_size,
 	/* ... then go back and fix counters and names */
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		unsigned int i;
-		const struct ipt_entry_match *m;
-		const struct ipt_entry_target *t;
+		const struct xt_entry_match *m;
+		const struct xt_entry_target *t;
 
 		e = (struct ipt_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
@@ -990,7 +990,7 @@ copy_entries_to_user(unsigned int total_size,
 			m = (void *)e + i;
 
 			if (copy_to_user(userptr + off + i
-					 + offsetof(struct ipt_entry_match,
+					 + offsetof(struct xt_entry_match,
 						    u.user.name),
 					 m->u.kernel.match->name,
 					 strlen(m->u.kernel.match->name)+1)
@@ -1002,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size,
 
 		t = ipt_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
-				 + offsetof(struct ipt_entry_target,
+				 + offsetof(struct xt_entry_target,
 					    u.user.name),
 				 t->u.kernel.target->name,
 				 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1040,7 +1040,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
 			     const void *base, struct xt_table_info *newinfo)
 {
 	const struct xt_entry_match *ematch;
-	const struct ipt_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
@@ -1407,7 +1407,7 @@ struct compat_ipt_replace {
 	u32			hook_entry[NF_INET_NUMHOOKS];
 	u32			underflow[NF_INET_NUMHOOKS];
 	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ipt_counters * */
+	compat_uptr_t		counters;	/* struct xt_counters * */
 	struct compat_ipt_entry	entries[0];
 };
 
@@ -1416,7 +1416,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 			  unsigned int *size, struct xt_counters *counters,
 			  unsigned int i)
 {
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct compat_ipt_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
@@ -1451,7 +1451,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 }
 
 static int
-compat_find_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct xt_entry_match *m,
 		       const char *name,
 		       const struct ipt_ip *ip,
 		       unsigned int hookmask,
@@ -1473,7 +1473,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
 
 static void compat_release_entry(struct compat_ipt_entry *e)
 {
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
@@ -1494,7 +1494,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  const char *name)
 {
 	struct xt_entry_match *ematch;
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
 	unsigned int j;
@@ -1576,7 +1576,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 			    unsigned int *size, const char *name,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4b973e13952d..c7334c10a4b3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -215,7 +215,7 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6)
 	return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
 }
 
-static inline const struct ip6t_entry_target *
+static inline const struct xt_entry_target *
 ip6t_get_target_c(const struct ip6t_entry *e)
 {
 	return ip6t_get_target((struct ip6t_entry *)e);
@@ -260,7 +260,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
 		      const char *hookname, const char **chainname,
 		      const char **comment, unsigned int *rulenum)
 {
-	const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s);
+	const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
 
 	if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
@@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 
 	do {
-		const struct ip6t_entry_target *t;
+		const struct xt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
@@ -403,7 +403,7 @@ ip6t_do_table(struct sk_buff *skb,
 		if (!t->u.kernel.target->target) {
 			int v;
 
-			v = ((struct ip6t_standard_target *)t)->verdict;
+			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
 				if (v != IP6T_RETURN) {
@@ -474,7 +474,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 		e->counters.pcnt = pos;
 
 		for (;;) {
-			const struct ip6t_standard_target *t
+			const struct xt_standard_target *t
 				= (void *)ip6t_get_target_c(e);
 			int visited = e->comefrom & (1 << hook);
 
@@ -565,7 +565,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	return 1;
 }
 
-static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
 {
 	struct xt_mtdtor_param par;
 
@@ -581,14 +581,14 @@ static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
 static int
 check_entry(const struct ip6t_entry *e, const char *name)
 {
-	const struct ip6t_entry_target *t;
+	const struct xt_entry_target *t;
 
 	if (!ip6_checkentry(&e->ipv6)) {
 		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
-	if (e->target_offset + sizeof(struct ip6t_entry_target) >
+	if (e->target_offset + sizeof(struct xt_entry_target) >
 	    e->next_offset)
 		return -EINVAL;
 
@@ -599,7 +599,7 @@ check_entry(const struct ip6t_entry *e, const char *name)
 	return 0;
 }
 
-static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
+static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ipv6 = par->entryinfo;
 	int ret;
@@ -618,7 +618,7 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 }
 
 static int
-find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
 {
 	struct xt_match *match;
 	int ret;
@@ -643,7 +643,7 @@ err:
 
 static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 {
-	struct ip6t_entry_target *t = ip6t_get_target(e);
+	struct xt_entry_target *t = ip6t_get_target(e);
 	struct xt_tgchk_param par = {
 		.net       = net,
 		.table     = name,
@@ -670,7 +670,7 @@ static int
 find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 		 unsigned int size)
 {
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	int ret;
 	unsigned int j;
@@ -721,7 +721,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 
 static bool check_underflow(const struct ip6t_entry *e)
 {
-	const struct ip6t_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int verdict;
 
 	if (!unconditional(&e->ipv6))
@@ -729,7 +729,7 @@ static bool check_underflow(const struct ip6t_entry *e)
 	t = ip6t_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
 		return false;
-	verdict = ((struct ip6t_standard_target *)t)->verdict;
+	verdict = ((struct xt_standard_target *)t)->verdict;
 	verdict = -verdict - 1;
 	return verdict == NF_DROP || verdict == NF_ACCEPT;
 }
@@ -752,7 +752,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 	}
 
 	if (e->next_offset
-	    < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
+	    < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
 		duprintf("checking: element %p size %u\n",
 			 e, e->next_offset);
 		return -EINVAL;
@@ -784,7 +784,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
@@ -985,8 +985,8 @@ copy_entries_to_user(unsigned int total_size,
 	/* ... then go back and fix counters and names */
 	for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
 		unsigned int i;
-		const struct ip6t_entry_match *m;
-		const struct ip6t_entry_target *t;
+		const struct xt_entry_match *m;
+		const struct xt_entry_target *t;
 
 		e = (struct ip6t_entry *)(loc_cpu_entry + off);
 		if (copy_to_user(userptr + off
@@ -1003,7 +1003,7 @@ copy_entries_to_user(unsigned int total_size,
 			m = (void *)e + i;
 
 			if (copy_to_user(userptr + off + i
-					 + offsetof(struct ip6t_entry_match,
+					 + offsetof(struct xt_entry_match,
 						    u.user.name),
 					 m->u.kernel.match->name,
 					 strlen(m->u.kernel.match->name)+1)
@@ -1015,7 +1015,7 @@ copy_entries_to_user(unsigned int total_size,
 
 		t = ip6t_get_target_c(e);
 		if (copy_to_user(userptr + off + e->target_offset
-				 + offsetof(struct ip6t_entry_target,
+				 + offsetof(struct xt_entry_target,
 					    u.user.name),
 				 t->u.kernel.target->name,
 				 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1053,7 +1053,7 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 			     const void *base, struct xt_table_info *newinfo)
 {
 	const struct xt_entry_match *ematch;
-	const struct ip6t_entry_target *t;
+	const struct xt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
@@ -1422,7 +1422,7 @@ struct compat_ip6t_replace {
 	u32			hook_entry[NF_INET_NUMHOOKS];
 	u32			underflow[NF_INET_NUMHOOKS];
 	u32			num_counters;
-	compat_uptr_t		counters;	/* struct ip6t_counters * */
+	compat_uptr_t		counters;	/* struct xt_counters * */
 	struct compat_ip6t_entry entries[0];
 };
 
@@ -1431,7 +1431,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 			  unsigned int *size, struct xt_counters *counters,
 			  unsigned int i)
 {
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct compat_ip6t_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
@@ -1466,7 +1466,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 }
 
 static int
-compat_find_calc_match(struct ip6t_entry_match *m,
+compat_find_calc_match(struct xt_entry_match *m,
 		       const char *name,
 		       const struct ip6t_ip6 *ipv6,
 		       unsigned int hookmask,
@@ -1488,7 +1488,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 
 static void compat_release_entry(struct compat_ip6t_entry *e)
 {
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
@@ -1509,7 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  const char *name)
 {
 	struct xt_entry_match *ematch;
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
 	unsigned int j;
@@ -1591,7 +1591,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 			    unsigned int *size, const char *name,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
-	struct ip6t_entry_target *t;
+	struct xt_entry_target *t;
 	struct xt_target *target;
 	struct ip6t_entry *de;
 	unsigned int origsize;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..f6d464f993ef 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
 	.lock	=	&ipt_lock,
 };
 
-static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
 {
 	struct xt_tgchk_param par;
 	struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 	return 0;
 }
 
-static void ipt_destroy_target(struct ipt_entry_target *t)
+static void ipt_destroy_target(struct xt_entry_target *t)
 {
 	struct xt_tgdtor_param par = {
 		.target   = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 	[TCA_IPT_TABLE]	= { .type = NLA_STRING, .len = IFNAMSIZ },
 	[TCA_IPT_HOOK]	= { .type = NLA_U32 },
 	[TCA_IPT_INDEX]	= { .type = NLA_U32 },
-	[TCA_IPT_TARG]	= { .len = sizeof(struct ipt_entry_target) },
+	[TCA_IPT_TARG]	= { .len = sizeof(struct xt_entry_target) },
 };
 
 static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	struct nlattr *tb[TCA_IPT_MAX + 1];
 	struct tcf_ipt *ipt;
 	struct tcf_common *pc;
-	struct ipt_entry_target *td, *t;
+	struct xt_entry_target *td, *t;
 	char *tname;
 	int ret = 0, err;
 	u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	if (tb[TCA_IPT_TARG] == NULL)
 		return -EINVAL;
 
-	td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
+	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
 		return -EINVAL;
 
@@ -249,7 +249,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_ipt *ipt = a->priv;
-	struct ipt_entry_target *t;
+	struct xt_entry_target *t;
 	struct tcf_t tm;
 	struct tc_cnt c;
 
-- 
cgit v1.2.3


From 243bf6e29eef642de0ff62f1ebf58bc2396d6d6e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 13 Oct 2010 16:28:00 +0200
Subject: netfilter: xtables: resolve indirect macros 3/3

---
 include/linux/netfilter_arp/arp_tables.h  | 33 +++++----------
 include/linux/netfilter_ipv4/ip_tables.h  | 69 ++++++++++++++-----------------
 include/linux/netfilter_ipv6/ip6_tables.h | 69 +++++++++++--------------------
 net/ipv4/netfilter/arp_tables.c           | 14 +++----
 net/ipv4/netfilter/arpt_mangle.c          |  2 +-
 net/ipv4/netfilter/ip_tables.c            | 18 ++++----
 net/ipv6/netfilter/ip6_tables.c           | 18 ++++----
 net/sched/act_ipt.c                       |  2 +-
 8 files changed, 94 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 7e193c9241b3..6e2341a811d6 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -26,6 +26,14 @@
 #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
 #define arpt_entry_target xt_entry_target
 #define arpt_standard_target xt_standard_target
+#define ARPT_CONTINUE XT_CONTINUE
+#define ARPT_RETURN XT_RETURN
+#define arpt_counters_info xt_counters_info
+#define arpt_counters xt_counters
+#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET
+#define ARPT_ERROR_TARGET XT_ERROR_TARGET
+#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \
+	XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args)
 #endif
 
 #define ARPT_DEV_ADDR_LEN_MAX 16
@@ -126,12 +134,6 @@ struct arpt_entry
 #define ARPT_SO_GET_REVISION_TARGET	(ARPT_BASE_CTL + 3)
 #define ARPT_SO_GET_MAX			(ARPT_SO_GET_REVISION_TARGET)
 
-/* CONTINUE verdict for targets */
-#define ARPT_CONTINUE XT_CONTINUE
-
-/* For standard target */
-#define ARPT_RETURN XT_RETURN
-
 /* The argument to ARPT_SO_GET_INFO */
 struct arpt_getinfo {
 	/* Which table: caller fills this in. */
@@ -185,10 +187,6 @@ struct arpt_replace {
 	struct arpt_entry entries[0];
 };
 
-/* The argument to ARPT_SO_ADD_COUNTERS. */
-#define arpt_counters_info xt_counters_info
-#define arpt_counters xt_counters
-
 /* The argument to ARPT_SO_GET_ENTRIES. */
 struct arpt_get_entries {
 	/* Which table: user fills this in. */
@@ -201,23 +199,12 @@ struct arpt_get_entries {
 	struct arpt_entry entrytable[0];
 };
 
-/* Standard return verdict, or do jump. */
-#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET
-/* Error verdict. */
-#define ARPT_ERROR_TARGET XT_ERROR_TARGET
-
 /* Helper functions */
 static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e)
 {
 	return (void *)e + e->target_offset;
 }
 
-#ifndef __KERNEL__
-/* fn returns 0 to continue iteration */
-#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args)
-#endif
-
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
@@ -248,7 +235,7 @@ struct arpt_error {
 #define ARPT_STANDARD_INIT(__verdict)					       \
 {									       \
 	.entry		= ARPT_ENTRY_INIT(sizeof(struct arpt_standard)),       \
-	.target		= XT_TARGET_INIT(ARPT_STANDARD_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_STANDARD_TARGET,		       \
 					 sizeof(struct xt_standard_target)), \
 	.target.verdict	= -(__verdict) - 1,				       \
 }
@@ -256,7 +243,7 @@ struct arpt_error {
 #define ARPT_ERROR_INIT							       \
 {									       \
 	.entry		= ARPT_ENTRY_INIT(sizeof(struct arpt_error)),	       \
-	.target		= XT_TARGET_INIT(ARPT_ERROR_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
 					 sizeof(struct arpt_error_target)),    \
 	.target.errorname = "ERROR",					       \
 }
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index ec506918a9b9..ee54b3b7e237 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -38,6 +38,36 @@
 #define ipt_entry_target xt_entry_target
 #define ipt_standard_target xt_standard_target
 #define ipt_counters xt_counters
+#define IPT_CONTINUE XT_CONTINUE
+#define IPT_RETURN XT_RETURN
+
+/* This group is older than old (iptables < v1.4.0-rc1~89) */
+#include <linux/netfilter/xt_tcpudp.h>
+#define ipt_udp xt_udp
+#define ipt_tcp xt_tcp
+#define IPT_TCP_INV_SRCPT	XT_TCP_INV_SRCPT
+#define IPT_TCP_INV_DSTPT	XT_TCP_INV_DSTPT
+#define IPT_TCP_INV_FLAGS	XT_TCP_INV_FLAGS
+#define IPT_TCP_INV_OPTION	XT_TCP_INV_OPTION
+#define IPT_TCP_INV_MASK	XT_TCP_INV_MASK
+#define IPT_UDP_INV_SRCPT	XT_UDP_INV_SRCPT
+#define IPT_UDP_INV_DSTPT	XT_UDP_INV_DSTPT
+#define IPT_UDP_INV_MASK	XT_UDP_INV_MASK
+
+/* The argument to IPT_SO_ADD_COUNTERS. */
+#define ipt_counters_info xt_counters_info
+/* Standard return verdict, or do jump. */
+#define IPT_STANDARD_TARGET XT_STANDARD_TARGET
+/* Error verdict. */
+#define IPT_ERROR_TARGET XT_ERROR_TARGET
+
+/* fn returns 0 to continue iteration */
+#define IPT_MATCH_ITERATE(e, fn, args...) \
+	XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args)
+
+/* fn returns 0 to continue iteration */
+#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \
+	XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args)
 #endif
 
 /* Yes, Virginia, you have to zero the padding. */
@@ -116,23 +146,6 @@ struct ipt_entry {
 #define IPT_SO_GET_REVISION_TARGET	(IPT_BASE_CTL + 3)
 #define IPT_SO_GET_MAX			IPT_SO_GET_REVISION_TARGET
 
-#define IPT_CONTINUE XT_CONTINUE
-#define IPT_RETURN XT_RETURN
-
-#include <linux/netfilter/xt_tcpudp.h>
-#define ipt_udp xt_udp
-#define ipt_tcp xt_tcp
-
-#define IPT_TCP_INV_SRCPT	XT_TCP_INV_SRCPT
-#define IPT_TCP_INV_DSTPT	XT_TCP_INV_DSTPT
-#define IPT_TCP_INV_FLAGS	XT_TCP_INV_FLAGS
-#define IPT_TCP_INV_OPTION	XT_TCP_INV_OPTION
-#define IPT_TCP_INV_MASK	XT_TCP_INV_MASK
-
-#define IPT_UDP_INV_SRCPT	XT_UDP_INV_SRCPT
-#define IPT_UDP_INV_DSTPT	XT_UDP_INV_DSTPT
-#define IPT_UDP_INV_MASK	XT_UDP_INV_MASK
-
 /* ICMP matching stuff */
 struct ipt_icmp {
 	u_int8_t type;				/* type to match */
@@ -196,9 +209,6 @@ struct ipt_replace {
 	struct ipt_entry entries[0];
 };
 
-/* The argument to IPT_SO_ADD_COUNTERS. */
-#define ipt_counters_info xt_counters_info
-
 /* The argument to IPT_SO_GET_ENTRIES. */
 struct ipt_get_entries {
 	/* Which table: user fills this in. */
@@ -211,11 +221,6 @@ struct ipt_get_entries {
 	struct ipt_entry entrytable[0];
 };
 
-/* Standard return verdict, or do jump. */
-#define IPT_STANDARD_TARGET XT_STANDARD_TARGET
-/* Error verdict. */
-#define IPT_ERROR_TARGET XT_ERROR_TARGET
-
 /* Helper functions */
 static __inline__ struct xt_entry_target *
 ipt_get_target(struct ipt_entry *e)
@@ -223,16 +228,6 @@ ipt_get_target(struct ipt_entry *e)
 	return (void *)e + e->target_offset;
 }
 
-#ifndef __KERNEL__
-/* fn returns 0 to continue iteration */
-#define IPT_MATCH_ITERATE(e, fn, args...) \
-	XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args)
-
-/* fn returns 0 to continue iteration */
-#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args)
-#endif
-
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
@@ -271,7 +266,7 @@ struct ipt_error {
 #define IPT_STANDARD_INIT(__verdict)					       \
 {									       \
 	.entry		= IPT_ENTRY_INIT(sizeof(struct ipt_standard)),	       \
-	.target		= XT_TARGET_INIT(IPT_STANDARD_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_STANDARD_TARGET,		       \
 					 sizeof(struct xt_standard_target)),   \
 	.target.verdict	= -(__verdict) - 1,				       \
 }
@@ -279,7 +274,7 @@ struct ipt_error {
 #define IPT_ERROR_INIT							       \
 {									       \
 	.entry		= IPT_ENTRY_INIT(sizeof(struct ipt_error)),	       \
-	.target		= XT_TARGET_INIT(IPT_ERROR_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
 					 sizeof(struct ipt_error_target)),     \
 	.target.errorname = "ERROR",					       \
 }
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 40d11fa05840..ac2b411ea63a 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -38,6 +38,29 @@
 #define ip6t_entry_target xt_entry_target
 #define ip6t_standard_target xt_standard_target
 #define ip6t_counters xt_counters
+#define IP6T_CONTINUE XT_CONTINUE
+#define IP6T_RETURN XT_RETURN
+
+/* Pre-iptables-1.4.0 */
+#include <linux/netfilter/xt_tcpudp.h>
+#define ip6t_tcp xt_tcp
+#define ip6t_udp xt_udp
+#define IP6T_TCP_INV_SRCPT	XT_TCP_INV_SRCPT
+#define IP6T_TCP_INV_DSTPT	XT_TCP_INV_DSTPT
+#define IP6T_TCP_INV_FLAGS	XT_TCP_INV_FLAGS
+#define IP6T_TCP_INV_OPTION	XT_TCP_INV_OPTION
+#define IP6T_TCP_INV_MASK	XT_TCP_INV_MASK
+#define IP6T_UDP_INV_SRCPT	XT_UDP_INV_SRCPT
+#define IP6T_UDP_INV_DSTPT	XT_UDP_INV_DSTPT
+#define IP6T_UDP_INV_MASK	XT_UDP_INV_MASK
+
+#define ip6t_counters_info xt_counters_info
+#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET
+#define IP6T_ERROR_TARGET XT_ERROR_TARGET
+#define IP6T_MATCH_ITERATE(e, fn, args...) \
+	XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args)
+#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
+	XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args)
 #endif
 
 /* Yes, Virginia, you have to zero the padding. */
@@ -133,7 +156,7 @@ struct ip6t_error {
 #define IP6T_STANDARD_INIT(__verdict)					       \
 {									       \
 	.entry		= IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)),       \
-	.target		= XT_TARGET_INIT(IP6T_STANDARD_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_STANDARD_TARGET,		       \
 					 sizeof(struct xt_standard_target)),   \
 	.target.verdict	= -(__verdict) - 1,				       \
 }
@@ -141,7 +164,7 @@ struct ip6t_error {
 #define IP6T_ERROR_INIT							       \
 {									       \
 	.entry		= IP6T_ENTRY_INIT(sizeof(struct ip6t_error)),	       \
-	.target		= XT_TARGET_INIT(IP6T_ERROR_TARGET,		       \
+	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
 					 sizeof(struct ip6t_error_target)),    \
 	.target.errorname = "ERROR",					       \
 }
@@ -165,30 +188,6 @@ struct ip6t_error {
 #define IP6T_SO_GET_REVISION_TARGET	(IP6T_BASE_CTL + 5)
 #define IP6T_SO_GET_MAX			IP6T_SO_GET_REVISION_TARGET
 
-/* CONTINUE verdict for targets */
-#define IP6T_CONTINUE XT_CONTINUE
-
-/* For standard target */
-#define IP6T_RETURN XT_RETURN
-
-/* TCP/UDP matching stuff */
-#include <linux/netfilter/xt_tcpudp.h>
-
-#define ip6t_tcp xt_tcp
-#define ip6t_udp xt_udp
-
-/* Values for "inv" field in struct ipt_tcp. */
-#define IP6T_TCP_INV_SRCPT	XT_TCP_INV_SRCPT
-#define IP6T_TCP_INV_DSTPT	XT_TCP_INV_DSTPT
-#define IP6T_TCP_INV_FLAGS	XT_TCP_INV_FLAGS
-#define IP6T_TCP_INV_OPTION	XT_TCP_INV_OPTION
-#define IP6T_TCP_INV_MASK	XT_TCP_INV_MASK
-
-/* Values for "invflags" field in struct ipt_udp. */
-#define IP6T_UDP_INV_SRCPT	XT_UDP_INV_SRCPT
-#define IP6T_UDP_INV_DSTPT	XT_UDP_INV_DSTPT
-#define IP6T_UDP_INV_MASK	XT_UDP_INV_MASK
-
 /* ICMP matching stuff */
 struct ip6t_icmp {
 	u_int8_t type;				/* type to match */
@@ -252,9 +251,6 @@ struct ip6t_replace {
 	struct ip6t_entry entries[0];
 };
 
-/* The argument to IP6T_SO_ADD_COUNTERS. */
-#define ip6t_counters_info xt_counters_info
-
 /* The argument to IP6T_SO_GET_ENTRIES. */
 struct ip6t_get_entries {
 	/* Which table: user fills this in. */
@@ -267,11 +263,6 @@ struct ip6t_get_entries {
 	struct ip6t_entry entrytable[0];
 };
 
-/* Standard return verdict, or do jump. */
-#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET
-/* Error verdict. */
-#define IP6T_ERROR_TARGET XT_ERROR_TARGET
-
 /* Helper functions */
 static __inline__ struct xt_entry_target *
 ip6t_get_target(struct ip6t_entry *e)
@@ -279,16 +270,6 @@ ip6t_get_target(struct ip6t_entry *e)
 	return (void *)e + e->target_offset;
 }
 
-#ifndef __KERNEL__
-/* fn returns 0 to continue iteration */
-#define IP6T_MATCH_ITERATE(e, fn, args...) \
-	XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args)
-
-/* fn returns 0 to continue iteration */
-#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args)
-#endif
-
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index ed178cbe6626..d756edae59ec 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -300,7 +300,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
-				if (v != ARPT_RETURN) {
+				if (v != XT_RETURN) {
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
@@ -332,7 +332,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		/* Target might have changed stuff. */
 		arp = arp_hdr(skb);
 
-		if (verdict == ARPT_CONTINUE)
+		if (verdict == XT_CONTINUE)
 			e = arpt_next_entry(e);
 		else
 			/* Verdict */
@@ -392,13 +392,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct arpt_entry) &&
 			     (strcmp(t->target.u.user.name,
-				     ARPT_STANDARD_TARGET) == 0) &&
+				     XT_STANDARD_TARGET) == 0) &&
 			     t->verdict < 0 && unconditional(&e->arp)) ||
 			    visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
-					    ARPT_STANDARD_TARGET) == 0) &&
+					    XT_STANDARD_TARGET) == 0) &&
 				    t->verdict < -NF_MAX_VERDICT - 1) {
 					duprintf("mark_source_chains: bad "
 						"negative verdict (%i)\n",
@@ -433,7 +433,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 				int newpos = t->verdict;
 
 				if (strcmp(t->target.u.user.name,
-					   ARPT_STANDARD_TARGET) == 0 &&
+					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					if (newpos > newinfo->size -
 						sizeof(struct arpt_entry)) {
@@ -1828,7 +1828,7 @@ void arpt_unregister_table(struct xt_table *table)
 /* The built-in targets: standard (NULL) and error. */
 static struct xt_target arpt_builtin_tg[] __read_mostly = {
 	{
-		.name             = ARPT_STANDARD_TARGET,
+		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_ARP,
 #ifdef CONFIG_COMPAT
@@ -1838,7 +1838,7 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
 #endif
 	},
 	{
-		.name             = ARPT_ERROR_TARGET,
+		.name             = XT_ERROR_TARGET,
 		.target           = arpt_error,
 		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_ARP,
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index e1be7dd1171b..b8ddcc480ed9 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -63,7 +63,7 @@ static int checkentry(const struct xt_tgchk_param *par)
 		return false;
 
 	if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
-	   mangle->target != ARPT_CONTINUE)
+	   mangle->target != XT_CONTINUE)
 		return false;
 	return true;
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cb108880050a..d31b007a6d80 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -232,7 +232,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 {
 	const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
 
-	if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
+	if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
 		*chainname = t->target.data;
 		(*rulenum) = 0;
@@ -241,7 +241,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 
 		if (s->target_offset == sizeof(struct ipt_entry) &&
 		    strcmp(t->target.u.kernel.target->name,
-			   IPT_STANDARD_TARGET) == 0 &&
+			   XT_STANDARD_TARGET) == 0 &&
 		   t->verdict < 0 &&
 		   unconditional(&s->ip)) {
 			/* Tail of chains: STANDARD target (return/policy) */
@@ -383,7 +383,7 @@ ipt_do_table(struct sk_buff *skb,
 			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
-				if (v != IPT_RETURN) {
+				if (v != XT_RETURN) {
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
@@ -421,7 +421,7 @@ ipt_do_table(struct sk_buff *skb,
 		verdict = t->u.kernel.target->target(skb, &acpar);
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
-		if (verdict == IPT_CONTINUE)
+		if (verdict == XT_CONTINUE)
 			e = ipt_next_entry(e);
 		else
 			/* Verdict */
@@ -475,13 +475,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ipt_entry) &&
 			     (strcmp(t->target.u.user.name,
-				     IPT_STANDARD_TARGET) == 0) &&
+				     XT_STANDARD_TARGET) == 0) &&
 			     t->verdict < 0 && unconditional(&e->ip)) ||
 			    visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
-			    		    IPT_STANDARD_TARGET) == 0) &&
+			    		    XT_STANDARD_TARGET) == 0) &&
 				    t->verdict < -NF_MAX_VERDICT - 1) {
 					duprintf("mark_source_chains: bad "
 						"negative verdict (%i)\n",
@@ -524,7 +524,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				int newpos = t->verdict;
 
 				if (strcmp(t->target.u.user.name,
-					   IPT_STANDARD_TARGET) == 0 &&
+					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					if (newpos > newinfo->size -
 						sizeof(struct ipt_entry)) {
@@ -2176,7 +2176,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
 
 static struct xt_target ipt_builtin_tg[] __read_mostly = {
 	{
-		.name             = IPT_STANDARD_TARGET,
+		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_IPV4,
 #ifdef CONFIG_COMPAT
@@ -2186,7 +2186,7 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
 #endif
 	},
 	{
-		.name             = IPT_ERROR_TARGET,
+		.name             = XT_ERROR_TARGET,
 		.target           = ipt_error,
 		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_IPV4,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c7334c10a4b3..c683e9e7023b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -262,7 +262,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
 {
 	const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
 
-	if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) {
+	if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
 		/* Head of user chain: ERROR target with chainname */
 		*chainname = t->target.data;
 		(*rulenum) = 0;
@@ -271,7 +271,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
 
 		if (s->target_offset == sizeof(struct ip6t_entry) &&
 		    strcmp(t->target.u.kernel.target->name,
-			   IP6T_STANDARD_TARGET) == 0 &&
+			   XT_STANDARD_TARGET) == 0 &&
 		    t->verdict < 0 &&
 		    unconditional(&s->ipv6)) {
 			/* Tail of chains: STANDARD target (return/policy) */
@@ -406,7 +406,7 @@ ip6t_do_table(struct sk_buff *skb,
 			v = ((struct xt_standard_target *)t)->verdict;
 			if (v < 0) {
 				/* Pop from stack? */
-				if (v != IP6T_RETURN) {
+				if (v != XT_RETURN) {
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
@@ -434,7 +434,7 @@ ip6t_do_table(struct sk_buff *skb,
 		acpar.targinfo = t->data;
 
 		verdict = t->u.kernel.target->target(skb, &acpar);
-		if (verdict == IP6T_CONTINUE)
+		if (verdict == XT_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
 			/* Verdict */
@@ -488,13 +488,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			/* Unconditional return/END. */
 			if ((e->target_offset == sizeof(struct ip6t_entry) &&
 			     (strcmp(t->target.u.user.name,
-				     IP6T_STANDARD_TARGET) == 0) &&
+				     XT_STANDARD_TARGET) == 0) &&
 			     t->verdict < 0 &&
 			     unconditional(&e->ipv6)) || visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
-					    IP6T_STANDARD_TARGET) == 0) &&
+					    XT_STANDARD_TARGET) == 0) &&
 				    t->verdict < -NF_MAX_VERDICT - 1) {
 					duprintf("mark_source_chains: bad "
 						"negative verdict (%i)\n",
@@ -537,7 +537,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				int newpos = t->verdict;
 
 				if (strcmp(t->target.u.user.name,
-					   IP6T_STANDARD_TARGET) == 0 &&
+					   XT_STANDARD_TARGET) == 0 &&
 				    newpos >= 0) {
 					if (newpos > newinfo->size -
 						sizeof(struct ip6t_entry)) {
@@ -2191,7 +2191,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
 /* The built-in targets: standard (NULL) and error. */
 static struct xt_target ip6t_builtin_tg[] __read_mostly = {
 	{
-		.name             = IP6T_STANDARD_TARGET,
+		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_IPV6,
 #ifdef CONFIG_COMPAT
@@ -2201,7 +2201,7 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
 #endif
 	},
 	{
-		.name             = IP6T_ERROR_TARGET,
+		.name             = XT_ERROR_TARGET,
 		.target           = ip6t_error,
 		.targetsize       = XT_FUNCTION_MAXNAMELEN,
 		.family           = NFPROTO_IPV6,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index f6d464f993ef..8daef9632255 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -230,7 +230,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 		result = TC_ACT_SHOT;
 		ipt->tcf_qstats.drops++;
 		break;
-	case IPT_CONTINUE:
+	case XT_CONTINUE:
 		result = TC_ACT_PIPE;
 		break;
 	default:
-- 
cgit v1.2.3


From 75f0a0fd787bfa3ea1a916ca632a5b9e0007cbb7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 13 Oct 2010 16:37:45 +0200
Subject: netfilter: xtables: unify {ip,ip6,arp}t_error_target

Unification of struct *_error_target was forgotten in
v2.6.16-1689-g1e30a01.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h        |  5 +++++
 include/linux/netfilter_arp/arp_tables.h  | 10 +++-------
 include/linux/netfilter_ipv4/ip_tables.h  | 10 +++-------
 include/linux/netfilter_ipv6/ip6_tables.h | 10 +++-------
 4 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 24e5d01d27d0..742bec051440 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -66,6 +66,11 @@ struct xt_standard_target {
 	int verdict;
 };
 
+struct xt_error_target {
+	struct xt_entry_target target;
+	char errorname[XT_FUNCTION_MAXNAMELEN];
+};
+
 /* The argument to IPT_SO_GET_REVISION_*.  Returns highest revision
  * kernel supports, if >= revision. */
 struct xt_get_revision {
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 6e2341a811d6..f02d57436a34 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -26,6 +26,7 @@
 #define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
 #define arpt_entry_target xt_entry_target
 #define arpt_standard_target xt_standard_target
+#define arpt_error_target xt_error_target
 #define ARPT_CONTINUE XT_CONTINUE
 #define ARPT_RETURN XT_RETURN
 #define arpt_counters_info xt_counters_info
@@ -216,14 +217,9 @@ struct arpt_standard {
 	struct xt_standard_target target;
 };
 
-struct arpt_error_target {
-	struct xt_entry_target target;
-	char errorname[XT_FUNCTION_MAXNAMELEN];
-};
-
 struct arpt_error {
 	struct arpt_entry entry;
-	struct arpt_error_target target;
+	struct xt_error_target target;
 };
 
 #define ARPT_ENTRY_INIT(__size)						       \
@@ -244,7 +240,7 @@ struct arpt_error {
 {									       \
 	.entry		= ARPT_ENTRY_INIT(sizeof(struct arpt_error)),	       \
 	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
-					 sizeof(struct arpt_error_target)),    \
+					 sizeof(struct xt_error_target)),      \
 	.target.errorname = "ERROR",					       \
 }
 
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index ee54b3b7e237..d0fef0a436f9 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -37,6 +37,7 @@
 #define ipt_entry_match xt_entry_match
 #define ipt_entry_target xt_entry_target
 #define ipt_standard_target xt_standard_target
+#define ipt_error_target xt_error_target
 #define ipt_counters xt_counters
 #define IPT_CONTINUE XT_CONTINUE
 #define IPT_RETURN XT_RETURN
@@ -247,14 +248,9 @@ struct ipt_standard {
 	struct xt_standard_target target;
 };
 
-struct ipt_error_target {
-	struct xt_entry_target target;
-	char errorname[XT_FUNCTION_MAXNAMELEN];
-};
-
 struct ipt_error {
 	struct ipt_entry entry;
-	struct ipt_error_target target;
+	struct xt_error_target target;
 };
 
 #define IPT_ENTRY_INIT(__size)						       \
@@ -275,7 +271,7 @@ struct ipt_error {
 {									       \
 	.entry		= IPT_ENTRY_INIT(sizeof(struct ipt_error)),	       \
 	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
-					 sizeof(struct ipt_error_target)),     \
+					 sizeof(struct xt_error_target)),      \
 	.target.errorname = "ERROR",					       \
 }
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index ac2b411ea63a..dca11186e522 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -37,6 +37,7 @@
 #define ip6t_entry_match xt_entry_match
 #define ip6t_entry_target xt_entry_target
 #define ip6t_standard_target xt_standard_target
+#define ip6t_error_target xt_error_target
 #define ip6t_counters xt_counters
 #define IP6T_CONTINUE XT_CONTINUE
 #define IP6T_RETURN XT_RETURN
@@ -137,14 +138,9 @@ struct ip6t_standard {
 	struct xt_standard_target target;
 };
 
-struct ip6t_error_target {
-	struct xt_entry_target target;
-	char errorname[XT_FUNCTION_MAXNAMELEN];
-};
-
 struct ip6t_error {
 	struct ip6t_entry entry;
-	struct ip6t_error_target target;
+	struct xt_error_target target;
 };
 
 #define IP6T_ENTRY_INIT(__size)						       \
@@ -165,7 +161,7 @@ struct ip6t_error {
 {									       \
 	.entry		= IP6T_ENTRY_INIT(sizeof(struct ip6t_error)),	       \
 	.target		= XT_TARGET_INIT(XT_ERROR_TARGET,		       \
-					 sizeof(struct ip6t_error_target)),    \
+					 sizeof(struct xt_error_target)),      \
 	.target.errorname = "ERROR",					       \
 }
 
-- 
cgit v1.2.3


From 9ecdafd883db3c43296797382fc0b2c868144070 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 13 Oct 2010 16:42:02 +0200
Subject: netfilter: xtables: remove unused defines

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter_arp/arp_tables.h  | 4 ----
 include/linux/netfilter_ipv4/ip_tables.h  | 4 ----
 include/linux/netfilter_ipv6/ip6_tables.h | 4 ----
 3 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index f02d57436a34..adbf4bff87ed 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -255,8 +255,6 @@ extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct net_device *out,
 				  struct xt_table *table);
 
-#define ARPT_ALIGN(s) XT_ALIGN(s)
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
@@ -275,8 +273,6 @@ compat_arpt_get_target(struct compat_arpt_entry *e)
 	return (void *)e + e->target_offset;
 }
 
-#define COMPAT_ARPT_ALIGN(s)	COMPAT_XT_ALIGN(s)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _ARPTABLES_H */
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index d0fef0a436f9..64a5d95c58e8 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -282,8 +282,6 @@ extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 const struct net_device *out,
 				 struct xt_table *table);
 
-#define IPT_ALIGN(s) XT_ALIGN(s)
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
@@ -304,8 +302,6 @@ compat_ipt_get_target(struct compat_ipt_entry *e)
 	return (void *)e + e->target_offset;
 }
 
-#define COMPAT_IPT_ALIGN(s) 	COMPAT_XT_ALIGN(s)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index dca11186e522..c9784f7a9c1f 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -292,8 +292,6 @@ extern int ip6t_ext_hdr(u8 nexthdr);
 extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 			 int target, unsigned short *fragoff);
 
-#define IP6T_ALIGN(s) XT_ALIGN(s)
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
@@ -313,8 +311,6 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e)
 	return (void *)e + e->target_offset;
 }
 
-#define COMPAT_IP6T_ALIGN(s)	COMPAT_XT_ALIGN(s)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IP6_TABLES_H */
-- 
cgit v1.2.3


From 892b6f90db81cccb723d5d92f4fddc2d68b206e1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 13 Oct 2010 21:18:03 +0200
Subject: block: Ensure physical block size is unsigned int

Physical block size was declared unsigned int to accomodate the maximum
size reported by READ CAPACITY(16).  Make sure we use the right type in
the related functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 2 +-
 include/linux/blkdev.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a3600a7ab8bb..315b88c8cbbb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(blk_queue_logical_block_size);
  *   hardware can operate on without reverting to read-modify-write
  *   operations.
  */
-void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
+void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
 {
 	q->limits.physical_block_size = size;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1341df5806df..8f3dd981b973 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -860,7 +860,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
-extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
@@ -1013,7 +1013,7 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
 	return q->limits.physical_block_size;
 }
 
-static inline int bdev_physical_block_size(struct block_device *bdev)
+static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
 {
 	return queue_physical_block_size(bdev_get_queue(bdev));
 }
-- 
cgit v1.2.3


From 10d8dad8453f8648a448960d7a2d3d983dfe0ed3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 12 Oct 2010 07:07:42 +0200
Subject: wext: fix alignment problem in serializing 'struct iw_point'

wext: fix alignment problem in serializing 'struct iw_point'

This fixes a typo in the definition of the serialized length of struct iw_point:
 a) wireless.h is exported to userspace, the typo causes IW_EV_POINT_PK_LEN
    to be 12 on 64-bit, and 8 on 32-bit systems (causing misalignment);
 b) in compat-64 mode iwe_stream_add_point() memcpys overlap (see below).

The second case in  in compat-64 mode looks like (variable names are as in
include/net/iw_handler.h:iwe_stream_add_point()):

 point_len = IW_EV_COMPAT_POINT_LEN = 8
 lcp_len   = IW_EV_COMPAT_LCP_LEN   = 4
 2nd memcpy: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN = 12 - 4 = 8

 IW_EV_LCP_PK_LEN
 <-------------->                *---> 'extra' data area
 +-------+-------+-------+-------+---------------+------- ...-+
 | len   | cmd   |length | flags |  (empty) -> extra      ... |
 +-------+-------+-------+-------+---------------+------- ...-+
    2       2       2       2          4

     lcp_len
 <-------------->                <-!! OVERLAP !!>
 <--1st memcpy--><------- 2nd memcpy ----------->
                                 <---- 3rd memcpy ------- ... >
 <--------- point_len ---------->

This case could cause overrun whenever iw_point.length < 4.
The other two cases are -
 * 32-bit systems: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN =  8 - 4 = 4,
   the second memcpy copies exactly the 4 required bytes;
 * 64-bit systems: IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN = 12 - 4 = 8,
   the second memcpy copies a superfluous (but non overlapping) 4 bytes.

The patch changes IW_EV_POINT_PK_LEN to be 8, so that in all 3 cases always only
the requested iw_point.{length,flags} (both __u16) are copied, avoiding overrrun
(compat-64) and superfluous copy (64-bit). In addition, the userspace header is
sanitized (in agreement with version 30 of the wireless tools).

Many thanks to Johannes Berg for help and review with this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index e6827eedf18b..4395b28bb86c 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1157,6 +1157,6 @@ struct __compat_iw_event {
 #define IW_EV_PARAM_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct iw_param))
 #define IW_EV_ADDR_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct sockaddr))
 #define IW_EV_QUAL_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct iw_quality))
-#define IW_EV_POINT_PK_LEN	(IW_EV_LCP_LEN + 4)
+#define IW_EV_POINT_PK_LEN	(IW_EV_LCP_PK_LEN + 4)
 
 #endif	/* _LINUX_WIRELESS_H */
-- 
cgit v1.2.3


From 271733cf844a2f5f186ef3b40c26d6397b71039a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 13 Oct 2010 12:06:23 +0200
Subject: cfg80211: notify drivers about frame registrations

Drivers may need to adjust their filters according
to frame registrations, so notify them about them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  7 +++++++
 net/wireless/mlme.c    | 23 ++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 24d5b5869272..2a7936d7851d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1147,6 +1147,9 @@ struct cfg80211_pmksa {
  *	allows the driver to adjust the dynamic ps timeout value.
  * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
  *
+ * @mgmt_frame_register: Notify driver that a management frame type was
+ *	registered. Note that this callback may not sleep, and cannot run
+ *	concurrently with itself.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -1297,6 +1300,10 @@ struct cfg80211_ops {
 	int	(*set_cqm_rssi_config)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       s32 rssi_thold, u32 rssi_hyst);
+
+	void	(*mgmt_frame_register)(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       u16 frame_type, bool reg);
 };
 
 /*
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index caf11a427507..26838d903b9a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -764,6 +764,8 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 				u16 frame_type, const u8 *match_data,
 				int match_len)
 {
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct cfg80211_mgmt_registration *reg, *nreg;
 	int err = 0;
 	u16 mgmt_type;
@@ -810,22 +812,37 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 	nreg->frame_type = cpu_to_le16(frame_type);
 	list_add(&nreg->list, &wdev->mgmt_registrations);
 
+	if (rdev->ops->mgmt_frame_register)
+		rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
+					       frame_type, true);
+
  out:
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
 	return err;
 }
 
 void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 {
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct cfg80211_mgmt_registration *reg, *tmp;
 
 	spin_lock_bh(&wdev->mgmt_registrations_lock);
 
 	list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
-		if (reg->nlpid == nlpid) {
-			list_del(&reg->list);
-			kfree(reg);
+		if (reg->nlpid != nlpid)
+			continue;
+
+		if (rdev->ops->mgmt_frame_register) {
+			u16 frame_type = le16_to_cpu(reg->frame_type);
+
+			rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
+						       frame_type, false);
 		}
+
+		list_del(&reg->list);
+		kfree(reg);
 	}
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
-- 
cgit v1.2.3


From 7be5086d4cb7cceb71d724a9524d5e927785d04f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 13 Oct 2010 12:06:24 +0200
Subject: mac80211: add probe request filter flag

Using the frame registration notification, we
can see when probe requests are requested and
notify the low-level driver via filtering. The
flag is also set in AP and IBSS modes.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  9 ++++++---
 net/mac80211/cfg.c         | 18 ++++++++++++++++++
 net/mac80211/ieee80211_i.h |  4 +++-
 net/mac80211/iface.c       |  9 ++++++++-
 net/mac80211/main.c        |  3 +++
 5 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 33aa2e39147b..9fdf982d1286 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1478,12 +1478,14 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  *	honour this flag if possible.
  *
  * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS
- *  is not set then only those addressed to this station.
+ * 	is not set then only those addressed to this station.
  *
  * @FIF_OTHER_BSS: pass frames destined to other BSSes
  *
- * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS  is not set then only
- *  those addressed to this station.
+ * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only
+ * 	those addressed to this station.
+ *
+ * @FIF_PROBE_REQ: pass probe request frames
  */
 enum ieee80211_filter_flags {
 	FIF_PROMISC_IN_BSS	= 1<<0,
@@ -1494,6 +1496,7 @@ enum ieee80211_filter_flags {
 	FIF_CONTROL		= 1<<5,
 	FIF_OTHER_BSS		= 1<<6,
 	FIF_PSPOLL		= 1<<7,
+	FIF_PROBE_REQ		= 1<<8,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 25fb351e00f8..18bd0e550600 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1604,6 +1604,23 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
+					  struct net_device *dev,
+					  u16 frame_type, bool reg)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	if (frame_type != (IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ))
+		return;
+
+	if (reg)
+		local->probe_req_reg++;
+	else
+		local->probe_req_reg--;
+
+	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1655,4 +1672,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
 	.mgmt_tx = ieee80211_mgmt_tx,
 	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
+	.mgmt_frame_register = ieee80211_mgmt_frame_register,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f0610fa4fbe0..b80c38689927 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -707,7 +707,9 @@ struct ieee80211_local {
 	int open_count;
 	int monitors, cooked_mntrs;
 	/* number of interfaces with corresponding FIF_ flags */
-	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll;
+	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
+	    fif_probe_req;
+	int probe_req_reg;
 	unsigned int filter_flags; /* FIF_* */
 
 	bool wiphy_ciphers_allocated;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e99d1b60557c..f9163b12c7f1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -280,8 +280,11 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 			ieee80211_start_mesh(sdata);
 		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
 			local->fif_pspoll++;
+			local->fif_probe_req++;
 
 			ieee80211_configure_filter(local);
+		} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+			local->fif_probe_req++;
 		}
 
 		changed |= ieee80211_reset_erp_info(sdata);
@@ -428,8 +431,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_dec(&local->iff_promiscs);
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP)
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		local->fif_pspoll--;
+		local->fif_probe_req--;
+	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+		local->fif_probe_req--;
+	}
 
 	netif_addr_lock_bh(sdata->dev);
 	spin_lock_bh(&local->filter_lock);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 915ecf87e4ac..5162303a8b4a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -54,6 +54,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	if (local->monitors || local->scanning)
 		new_flags |= FIF_BCN_PRBRESP_PROMISC;
 
+	if (local->fif_probe_req || local->probe_req_reg)
+		new_flags |= FIF_PROBE_REQ;
+
 	if (local->fif_fcsfail)
 		new_flags |= FIF_FCSFAIL;
 
-- 
cgit v1.2.3


From b3d6255388de0680a14f0907deb7b7f4fa0d25d5 Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Date: Tue, 12 Oct 2010 20:14:43 +0000
Subject: Phonet: 'connect' socket implementation for Pipe controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on suggestion by Rémi Denis-Courmont to implement 'connect'
for Pipe controller logic,  this patch implements 'connect' socket
call for the Pipe controller logic.
The patch does following:-
- Removes setsockopts for PNPIPE_CREATE and PNPIPE_DESTROY
- Adds setsockopt for setting the Pipe handle value
- Implements connect socket call
- Updates the Pipe controller logic

User-space should now follow below sequence with Pipe controller:-
-socket
-bind
-setsockopt for PNPIPE_PIPE_HANDLE
-connect
-setsockopt for PNPIPE_ENCAP_IP
-setsockopt for PNPIPE_ENABLE

GPRS/3G data has been tested working fine with this.

Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h   |   3 +-
 include/net/phonet/pep.h |   4 +-
 net/phonet/pep.c         | 300 ++++++++++++++++++-----------------------------
 net/phonet/socket.c      |  99 ++++++++++++++++
 4 files changed, 215 insertions(+), 191 deletions(-)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index e27cbf931740..26c8df786918 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -36,10 +36,9 @@
 /* Socket options for SOL_PNPIPE level */
 #define PNPIPE_ENCAP		1
 #define PNPIPE_IFINDEX		2
-#define PNPIPE_CREATE           3
+#define PNPIPE_PIPE_HANDLE	3
 #define PNPIPE_ENABLE           4
 /* unused slot */
-#define PNPIPE_DESTROY          6
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index def6cfa3f451..b60b28c99e87 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -46,8 +46,8 @@ struct pep_sock {
 	u8			init_enable;	/* auto-enable at creation */
 	u8			aligned;
 #ifdef CONFIG_PHONET_PIPECTRLR
-	u16                     remote_pep;
-	u8                      pipe_state;
+	u8			pipe_state;
+	struct sockaddr_pn	remote_pep;
 #endif
 };
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index f818f76d297d..9c903f9e5079 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -88,15 +88,6 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
 	const struct pnpipehdr *oph = pnp_hdr(oskb);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	const struct phonethdr *hdr = pn_hdr(oskb);
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = hdr->pn_sdev,
-		.spn_obj = hdr->pn_sobj,
-	};
-#endif
 
 	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
 	if (!skb)
@@ -114,11 +105,7 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
 	ph->pipe_handle = oph->pipe_handle;
 	ph->error_code = code;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	return pn_skb_send(sk, skb, &spn);
-#else
 	return pn_skb_send(sk, skb, &pipe_srv);
-#endif
 }
 
 #define PAD 0x00
@@ -188,18 +175,13 @@ static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
 	return 0;
 }
 
-static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid,
-		u8 msg_id, u8 p_handle, gfp_t priority)
+static int pipe_handler_send_req(struct sock *sk, u8 utid,
+		u8 msg_id, gfp_t priority)
 {
 	int len;
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = pn_dev(dobj),
-		.spn_obj = pn_obj(dobj),
-	};
+	struct pep_sock *pn = pep_sk(sk);
 
 	static const u8 data[4] = {
 		PAD, PAD, PAD, PAD,
@@ -235,30 +217,25 @@ static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid,
 	ph = pnp_hdr(skb);
 	ph->utid = utid;
 	ph->message_id = msg_id;
-	ph->pipe_handle = p_handle;
+	ph->pipe_handle = pn->pipe_handle;
 	ph->error_code = PN_PIPE_NO_ERROR;
 
-	return pn_skb_send(sk, skb, &spn);
+	return pn_skb_send(sk, skb, &pn->remote_pep);
 }
 
-static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj,
-		u8 utid, u8 p_handle, u8 msg_id, u8 tx_fc, u8 rx_fc)
+static int pipe_handler_send_created_ind(struct sock *sk,
+		u8 utid, u8 msg_id)
 {
 	int err_code;
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = pn_dev(dobj),
-		.spn_obj = pn_obj(dobj),
-	};
 
+	struct pep_sock *pn = pep_sk(sk);
 	static u8 data[4] = {
 		0x03, 0x04,
 	};
-	data[2] = tx_fc;
-	data[3] = rx_fc;
+	data[2] = pn->tx_fc;
+	data[3] = pn->rx_fc;
 
 	/*
 	 * actually, below is number of sub-blocks and not error code.
@@ -282,24 +259,18 @@ static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj,
 	ph = pnp_hdr(skb);
 	ph->utid = utid;
 	ph->message_id = msg_id;
-	ph->pipe_handle = p_handle;
+	ph->pipe_handle = pn->pipe_handle;
 	ph->error_code = err_code;
 
-	return pn_skb_send(sk, skb, &spn);
+	return pn_skb_send(sk, skb, &pn->remote_pep);
 }
 
-static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid,
-		u8 p_handle, u8 msg_id)
+static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id)
 {
 	int err_code;
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = pn_dev(dobj),
-		.spn_obj = pn_obj(dobj),
-	};
+	struct pep_sock *pn = pep_sk(sk);
 
 	/*
 	 * actually, below is a filler.
@@ -321,10 +292,10 @@ static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid,
 	ph = pnp_hdr(skb);
 	ph->utid = utid;
 	ph->message_id = msg_id;
-	ph->pipe_handle = p_handle;
+	ph->pipe_handle = pn->pipe_handle;
 	ph->error_code = err_code;
 
-	return pn_skb_send(sk, skb, &spn);
+	return pn_skb_send(sk, skb, &pn->remote_pep);
 }
 
 static int pipe_handler_enable_pipe(struct sock *sk, int enable)
@@ -339,34 +310,7 @@ static int pipe_handler_enable_pipe(struct sock *sk, int enable)
 		utid = PNS_PIPE_DISABLE_UTID;
 		req = PNS_PEP_DISABLE_REQ;
 	}
-	return pipe_handler_send_req(sk, pn->pn_sk.sobject, utid, req,
-			pn->pipe_handle, GFP_ATOMIC);
-}
-
-static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd)
-{
-	int ret;
-	struct pep_sock *pn = pep_sk(sk);
-
-	switch (cmd) {
-	case PNPIPE_CREATE:
-		ret = pipe_handler_send_req(sk, pn->pn_sk.sobject,
-				PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
-				pipe_handle, GFP_ATOMIC);
-		break;
-
-	case PNPIPE_DESTROY:
-		ret = pipe_handler_send_req(sk, pn->remote_pep,
-				PNS_PEP_DISCONNECT_UTID,
-				PNS_PEP_DISCONNECT_REQ,
-				pn->pipe_handle, GFP_ATOMIC);
-		break;
-
-	default:
-		ret = -EINVAL;
-	}
-
-	return ret;
+	return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
 }
 #endif
 
@@ -434,14 +378,6 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = pn_dev(pn->remote_pep),
-		.spn_obj = pn_obj(pn->remote_pep),
-	};
-#endif
 
 	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
 	if (!skb)
@@ -462,7 +398,7 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
 	ph->data[4] = status;
 
 #ifdef CONFIG_PHONET_PIPECTRLR
-	return pn_skb_send(sk, skb, &spn);
+	return pn_skb_send(sk, skb, &pn->remote_pep);
 #else
 	return pn_skb_send(sk, skb, &pipe_srv);
 #endif
@@ -582,12 +518,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 	struct pnpipehdr *hdr = pnp_hdr(skb);
 	struct sk_buff_head *queue;
 	int err = 0;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	struct phonethdr *ph = pn_hdr(skb);
-	static u8 host_pref_rx_fc[3], host_req_tx_fc[3];
-	u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
-	u8 negotiated_rx_fc, negotiated_tx_fc;
-#endif
 
 	BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
 
@@ -596,40 +526,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
 		break;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNS_PEP_CONNECT_RESP:
-		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
-				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
-			pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
-					remote_req_tx_fc);
-
-			 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
-					 host_pref_rx_fc,
-					 sizeof(host_pref_rx_fc));
-			 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
-					 remote_pref_rx_fc,
-					 sizeof(host_pref_rx_fc));
-
-			pn->pipe_state = PIPE_DISABLED;
-			pipe_handler_send_created_ind(sk, pn->remote_pep,
-					PNS_PIPE_CREATED_IND_UTID,
-					pn->pipe_handle, PNS_PIPE_CREATED_IND,
-					negotiated_tx_fc, negotiated_rx_fc);
-			pipe_handler_send_created_ind(sk, pn->pn_sk.sobject,
-					PNS_PIPE_CREATED_IND_UTID,
-					pn->pipe_handle, PNS_PIPE_CREATED_IND,
-					negotiated_tx_fc, negotiated_rx_fc);
-		} else {
-			pipe_handler_send_req(sk, pn->remote_pep,
-					PNS_PEP_CONNECT_UTID,
-					PNS_PEP_CONNECT_REQ, pn->pipe_handle,
-					GFP_ATOMIC);
-			pipe_get_flow_info(sk, skb, host_pref_rx_fc,
-					host_req_tx_fc);
-		}
-		break;
-#endif
-
 	case PNS_PEP_DISCONNECT_REQ:
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		sk->sk_state = TCP_CLOSE_WAIT;
@@ -640,10 +536,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_PHONET_PIPECTRLR
 	case PNS_PEP_DISCONNECT_RESP:
 		pn->pipe_state = PIPE_IDLE;
-		pipe_handler_send_req(sk, pn->pn_sk.sobject,
-				PNS_PEP_DISCONNECT_UTID,
-				PNS_PEP_DISCONNECT_REQ, pn->pipe_handle,
-				GFP_KERNEL);
+		sk->sk_state = TCP_CLOSE;
 		break;
 #endif
 
@@ -654,21 +547,18 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 #ifdef CONFIG_PHONET_PIPECTRLR
 	case PNS_PEP_ENABLE_RESP:
-		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
-				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
-			pn->pipe_state = PIPE_ENABLED;
-			pipe_handler_send_ind(sk, pn->remote_pep,
-					PNS_PIPE_ENABLED_IND_UTID,
-					pn->pipe_handle, PNS_PIPE_ENABLED_IND);
-			pipe_handler_send_ind(sk, pn->pn_sk.sobject,
-					PNS_PIPE_ENABLED_IND_UTID,
-					pn->pipe_handle, PNS_PIPE_ENABLED_IND);
-		} else
-			pipe_handler_send_req(sk, pn->remote_pep,
-					PNS_PIPE_ENABLE_UTID,
-					PNS_PEP_ENABLE_REQ, pn->pipe_handle,
-					GFP_KERNEL);
+		pn->pipe_state = PIPE_ENABLED;
+		pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
+				PNS_PIPE_ENABLED_IND);
 
+		if (!pn_flow_safe(pn->tx_fc)) {
+			atomic_set(&pn->tx_credits, 1);
+			sk->sk_write_space(sk);
+		}
+		if (sk->sk_state == TCP_ESTABLISHED)
+			break; /* Nothing to do */
+		sk->sk_state = TCP_ESTABLISHED;
+		pipe_grant_credits(sk);
 		break;
 #endif
 
@@ -692,22 +582,12 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 #ifdef CONFIG_PHONET_PIPECTRLR
 	case PNS_PEP_DISABLE_RESP:
-		if ((ph->pn_sdev == pn_dev(pn->remote_pep)) &&
-				(ph->pn_sobj == pn_obj(pn->remote_pep))) {
-			pn->pipe_state = PIPE_DISABLED;
-			pipe_handler_send_ind(sk, pn->remote_pep,
-					PNS_PIPE_DISABLED_IND_UTID,
-					pn->pipe_handle,
-					PNS_PIPE_DISABLED_IND);
-			pipe_handler_send_ind(sk, pn->pn_sk.sobject,
-					PNS_PIPE_DISABLED_IND_UTID,
-					pn->pipe_handle,
-					PNS_PIPE_DISABLED_IND);
-		} else
-			pipe_handler_send_req(sk, pn->remote_pep,
-					PNS_PIPE_DISABLE_UTID,
-					PNS_PEP_DISABLE_REQ, pn->pipe_handle,
-					GFP_KERNEL);
+		pn->pipe_state = PIPE_DISABLED;
+		atomic_set(&pn->tx_credits, 0);
+		pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
+				PNS_PIPE_DISABLED_IND);
+		sk->sk_state = TCP_SYN_RECV;
+		pn->rx_credits = 0;
 		break;
 #endif
 
@@ -802,6 +682,42 @@ static void pipe_destruct(struct sock *sk)
 	skb_queue_purge(&pn->ctrlreq_queue);
 }
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1};
+	u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
+	u8 negotiated_rx_fc, negotiated_tx_fc;
+	int ret;
+
+	pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
+			remote_req_tx_fc);
+	negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
+			host_pref_rx_fc,
+			sizeof(host_pref_rx_fc));
+	negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
+			remote_pref_rx_fc,
+			sizeof(host_pref_rx_fc));
+
+	pn->pipe_state = PIPE_DISABLED;
+	sk->sk_state = TCP_SYN_RECV;
+	sk->sk_backlog_rcv = pipe_do_rcv;
+	sk->sk_destruct = pipe_destruct;
+	pn->rx_credits = 0;
+	pn->rx_fc = negotiated_rx_fc;
+	pn->tx_fc = negotiated_tx_fc;
+	sk->sk_state_change(sk);
+
+	ret = pipe_handler_send_created_ind(sk,
+			PNS_PIPE_CREATED_IND_UTID,
+			PNS_PIPE_CREATED_IND
+			);
+
+	return ret;
+}
+#endif
+
 static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct sock *newsk;
@@ -884,9 +800,6 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	newpn->init_enable = enabled;
 	newpn->aligned = aligned;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	newpn->remote_pep = pn->remote_pep;
-#endif
 
 	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
 	skb_queue_head(&newsk->sk_receive_queue, skb);
@@ -968,6 +881,12 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 		err = pep_connreq_rcv(sk, skb);
 		break;
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	case PNS_PEP_CONNECT_RESP:
+		err = pep_connresp_rcv(sk, skb);
+		break;
+#endif
+
 	case PNS_PEP_DISCONNECT_REQ:
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
@@ -1032,6 +951,18 @@ static void pep_sock_close(struct sock *sk, long timeout)
 		/* Forcefully remove dangling Phonet pipe */
 		pipe_do_remove(sk);
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+	if (pn->pipe_state != PIPE_IDLE) {
+		/* send pep disconnect request */
+		pipe_handler_send_req(sk,
+				PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
+				GFP_KERNEL);
+
+		pn->pipe_state = PIPE_IDLE;
+		sk->sk_state = TCP_CLOSE;
+	}
+#endif
+
 	ifindex = pn->ifindex;
 	pn->ifindex = 0;
 	release_sock(sk);
@@ -1108,6 +1039,20 @@ out:
 	return newsk;
 }
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct sockaddr_pn *spn =  (struct sockaddr_pn *)addr;
+
+	memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
+
+	return pipe_handler_send_req(sk,
+			PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
+			GFP_ATOMIC);
+}
+#endif
+
 static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	struct pep_sock *pn = pep_sk(sk);
@@ -1149,10 +1094,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int val = 0, err = 0;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	int remote_pep;
-	int pipe_handle;
-#endif
 
 	if (level != SOL_PNPIPE)
 		return -ENOPROTOOPT;
@@ -1164,28 +1105,15 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 	lock_sock(sk);
 	switch (optname) {
 #ifdef CONFIG_PHONET_PIPECTRLR
-	case PNPIPE_CREATE:
+	case PNPIPE_PIPE_HANDLE:
 		if (val) {
 			if (pn->pipe_state > PIPE_IDLE) {
 				err = -EFAULT;
 				break;
 			}
-			remote_pep = val & 0xFFFF;
-			pipe_handle =  (val >> 16) & 0xFF;
-			pn->remote_pep = remote_pep;
-			err = pipe_handler_create_pipe(sk, pipe_handle,
-					PNPIPE_CREATE);
-			break;
-		}
-
-	case PNPIPE_DESTROY:
-		if (pn->pipe_state < PIPE_DISABLED) {
-			err = -EFAULT;
+			pn->pipe_handle = val;
 			break;
 		}
-
-		err = pipe_handler_create_pipe(sk, 0x0, PNPIPE_DESTROY);
-		break;
 #endif
 
 	case PNPIPE_ENCAP:
@@ -1278,14 +1206,6 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 	int err;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	struct sockaddr_pn spn = {
-		.spn_family = AF_PHONET,
-		.spn_resource = 0xD9,
-		.spn_dev = pn_dev(pn->remote_pep),
-		.spn_obj = pn_obj(pn->remote_pep),
-	};
-#endif
 
 	if (pn_flow_safe(pn->tx_fc) &&
 	    !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -1304,7 +1224,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
 #ifdef CONFIG_PHONET_PIPECTRLR
-	err = pn_skb_send(sk, skb, &spn);
+	err = pn_skb_send(sk, skb, &pn->remote_pep);
 #else
 	err = pn_skb_send(sk, skb, &pipe_srv);
 #endif
@@ -1504,6 +1424,8 @@ static void pep_sock_unhash(struct sock *sk)
 	struct sock *skparent = NULL;
 
 	lock_sock(sk);
+
+#ifndef CONFIG_PHONET_PIPECTRLR
 	if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
 		skparent = pn->listener;
 		release_sock(sk);
@@ -1513,6 +1435,7 @@ static void pep_sock_unhash(struct sock *sk)
 		sk_del_node_init(sk);
 		sk = skparent;
 	}
+#endif
 	/* Unhash a listening sock only when it is closed
 	 * and all of its active connected pipes are closed. */
 	if (hlist_empty(&pn->hlist))
@@ -1526,6 +1449,9 @@ static void pep_sock_unhash(struct sock *sk)
 static struct proto pep_proto = {
 	.close		= pep_sock_close,
 	.accept		= pep_sock_accept,
+#ifdef CONFIG_PHONET_PIPECTRLR
+	.connect	= pep_sock_connect,
+#endif
 	.ioctl		= pep_ioctl,
 	.init		= pep_init,
 	.setsockopt	= pep_setsockopt,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index aca8fba099e9..25f746d20c1f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -225,6 +225,101 @@ static int pn_socket_autobind(struct socket *sock)
 	return 0; /* socket was already bound */
 }
 
+#ifdef CONFIG_PHONET_PIPECTRLR
+static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
+		int len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
+	long timeo;
+	int err;
+
+	if (len < sizeof(struct sockaddr_pn))
+		return -EINVAL;
+	if (spn->spn_family != AF_PHONET)
+		return -EAFNOSUPPORT;
+
+	lock_sock(sk);
+
+	switch (sock->state) {
+	case SS_UNCONNECTED:
+		sk->sk_state = TCP_CLOSE;
+		break;
+	case SS_CONNECTING:
+		switch (sk->sk_state) {
+		case TCP_SYN_RECV:
+			sock->state = SS_CONNECTED;
+			err = -EISCONN;
+			goto out;
+		case TCP_CLOSE:
+			err = -EALREADY;
+			if (flags & O_NONBLOCK)
+				goto out;
+			goto wait_connect;
+		}
+		break;
+	case SS_CONNECTED:
+		switch (sk->sk_state) {
+		case TCP_SYN_RECV:
+			err = -EISCONN;
+			goto out;
+		case TCP_CLOSE:
+			sock->state = SS_UNCONNECTED;
+			break;
+		}
+		break;
+	case SS_DISCONNECTING:
+	case SS_FREE:
+		break;
+	}
+	sk->sk_state = TCP_CLOSE;
+	sk_stream_kill_queues(sk);
+
+	sock->state = SS_CONNECTING;
+	err = sk->sk_prot->connect(sk, addr, len);
+	if (err < 0) {
+		sock->state = SS_UNCONNECTED;
+		sk->sk_state = TCP_CLOSE;
+		goto out;
+	}
+
+	err = -EINPROGRESS;
+wait_connect:
+	if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
+		goto out;
+
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+	release_sock(sk);
+
+	err = -ERESTARTSYS;
+	timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
+			sk->sk_state != TCP_CLOSE,
+			timeo);
+
+	lock_sock(sk);
+	if (timeo < 0)
+		goto out; /* -ERESTARTSYS */
+
+	err = -ETIMEDOUT;
+	if (timeo == 0 && sk->sk_state != TCP_SYN_RECV)
+		goto out;
+
+	if (sk->sk_state != TCP_SYN_RECV) {
+		sock->state = SS_UNCONNECTED;
+		err = sock_error(sk);
+		if (!err)
+			err = -ECONNREFUSED;
+		goto out;
+	}
+	sock->state = SS_CONNECTED;
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+#endif
+
 static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 				int flags)
 {
@@ -393,7 +488,11 @@ const struct proto_ops phonet_stream_ops = {
 	.owner		= THIS_MODULE,
 	.release	= pn_socket_release,
 	.bind		= pn_socket_bind,
+#ifdef CONFIG_PHONET_PIPECTRLR
+	.connect	= pn_socket_connect,
+#else
 	.connect	= sock_no_connect,
+#endif
 	.socketpair	= sock_no_socketpair,
 	.accept		= pn_socket_accept,
 	.getname	= pn_socket_getname,
-- 
cgit v1.2.3


From 3c86aa70bf677a31b71c8292e349242e26cbc743 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Wed, 13 Oct 2010 21:26:51 +0200
Subject: RDMA/cm: Add RDMA CM support for IBoE devices

Add support for IBoE device binding and IP --> GID resolution.  Path
resolving and multicast joining are implemented within cma.c by
filling in the responses and running callbacks in the CMA work queue.

IP --> GID resolution always yields IPv6 link local addresses; remote
GIDs are derived from the destination MAC address of the remote port.
Multicast GIDs are always mapped to multicast MACs as is done in IPv6.
(IPv4 multicast is enabled by translating IPv4 multicast addresses to
IPv6 multicast as described in
<http://www.mail-archive.com/ipng@sunroof.eng.sun.com/msg02134.html>.)

Some helper functions are added to ib_addr.h.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c      | 309 ++++++++++++++++++++++++++++++++++---
 drivers/infiniband/core/sa_query.c |   5 +-
 drivers/infiniband/core/ucma.c     |  45 +++++-
 include/rdma/ib_addr.h             |  99 +++++++++++-
 4 files changed, 431 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index b930b8110a63..f61bc0738488 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 #define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_MAX_CM_RETRIES 15
 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
+#define CMA_IBOE_PACKET_LIFETIME 18
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
@@ -157,6 +158,7 @@ struct cma_multicast {
 	struct list_head	list;
 	void			*context;
 	struct sockaddr_storage	addr;
+	struct kref		mcref;
 };
 
 struct cma_work {
@@ -173,6 +175,12 @@ struct cma_ndev_work {
 	struct rdma_cm_event	event;
 };
 
+struct iboe_mcast_work {
+	struct work_struct	 work;
+	struct rdma_id_private	*id;
+	struct cma_multicast	*mc;
+};
+
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
@@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 	atomic_inc(&cma_dev->refcount);
 	id_priv->cma_dev = cma_dev;
 	id_priv->id.device = cma_dev->device;
+	id_priv->id.route.addr.dev_addr.transport =
+		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
@@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev)
 		complete(&cma_dev->comp);
 }
 
+static inline void release_mc(struct kref *kref)
+{
+	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
+
+	kfree(mc->multicast.ib);
+	kfree(mc);
+}
+
 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 {
 	list_del(&id_priv->list);
@@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
 	return ret;
 }
 
+static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
+{
+	int i;
+	int err;
+	struct ib_port_attr props;
+	union ib_gid tmp;
+
+	err = ib_query_port(device, port_num, &props);
+	if (err)
+		return 1;
+
+	for (i = 0; i < props.gid_tbl_len; ++i) {
+		err = ib_query_gid(device, port_num, i, &tmp);
+		if (err)
+			return 1;
+		if (!memcmp(&tmp, gid, sizeof tmp))
+			return 0;
+	}
+
+	return -EAGAIN;
+}
+
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct cma_device *cma_dev;
-	union ib_gid gid;
+	union ib_gid gid, iboe_gid;
 	int ret = -ENODEV;
+	u8 port;
+	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
+		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 
-	rdma_addr_get_sgid(dev_addr, &gid);
+	iboe_addr_get_sgid(dev_addr, &iboe_gid);
+	memcpy(&gid, dev_addr->src_dev_addr +
+	       rdma_addr_gid_offset(dev_addr), sizeof gid);
 	list_for_each_entry(cma_dev, &dev_list, list) {
-		ret = ib_find_cached_gid(cma_dev->device, &gid,
-					 &id_priv->id.port_num, NULL);
-		if (!ret) {
-			cma_attach_to_dev(id_priv, cma_dev);
-			break;
+		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
+				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+					ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+				else
+					ret = find_gid_port(cma_dev->device, &gid, port);
+
+				if (!ret) {
+					id_priv->id.port_num = port;
+					goto out;
+				} else if (ret == 1)
+					break;
+			}
 		}
 	}
+
+out:
+	if (!ret)
+		cma_attach_to_dev(id_priv, cma_dev);
+
 	return ret;
 }
 
@@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int ret;
+	u16 pkey;
+
+	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
+	    IB_LINK_LAYER_INFINIBAND)
+		pkey = ib_addr_get_pkey(dev_addr);
+	else
+		pkey = 0xffff;
 
 	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
-				  ib_addr_get_pkey(dev_addr),
-				  &qp_attr->pkey_index);
+				  pkey, &qp_attr->pkey_index);
 	if (ret)
 		return ret;
 
@@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps)
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
-	case RDMA_TRANSPORT_IB:
+	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
+	case IB_LINK_LAYER_INFINIBAND:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 		mc = container_of(id_priv->mc_list.next,
 				  struct cma_multicast, list);
 		list_del(&mc->list);
-		ib_sa_free_multicast(mc->multicast.ib);
-		kfree(mc);
+		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
+		case IB_LINK_LAYER_INFINIBAND:
+			ib_sa_free_multicast(mc->multicast.ib);
+			kfree(mc);
+			break;
+		case IB_LINK_LAYER_ETHERNET:
+			kref_put(&mc->mcref, release_mc);
+			break;
+		default:
+			break;
+		}
 	}
 }
 
@@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
 		mutex_unlock(&lock);
-		switch (rdma_node_get_transport(id->device->node_type)) {
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 		case RDMA_TRANSPORT_IB:
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1708,6 +1782,77 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 	return 0;
 }
 
+static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+{
+	struct rdma_route *route = &id_priv->id.route;
+	struct rdma_addr *addr = &route->addr;
+	struct cma_work *work;
+	int ret;
+	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
+	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
+	struct net_device *ndev = NULL;
+
+	if (src_addr->sin_family != dst_addr->sin_family)
+		return -EINVAL;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler);
+
+	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
+	if (!route->path_rec) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	route->num_paths = 1;
+
+	iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr);
+	iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr);
+
+	route->path_rec->hop_limit = 1;
+	route->path_rec->reversible = 1;
+	route->path_rec->pkey = cpu_to_be16(0xffff);
+	route->path_rec->mtu_selector = IB_SA_EQ;
+
+	if (addr->dev_addr.bound_dev_if)
+		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+	if (!ndev) {
+		ret = -ENODEV;
+		goto err2;
+	}
+
+	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
+	route->path_rec->rate_selector = IB_SA_EQ;
+	route->path_rec->rate = iboe_get_rate(ndev);
+	dev_put(ndev);
+	route->path_rec->packet_life_time_selector = IB_SA_EQ;
+	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+	if (!route->path_rec->mtu) {
+		ret = -EINVAL;
+		goto err2;
+	}
+
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	work->event.status = 0;
+
+	queue_work(cma_wq, &work->work);
+
+	return 0;
+
+err2:
+	kfree(route->path_rec);
+	route->path_rec = NULL;
+err1:
+	kfree(work);
+	return ret;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
@@ -1720,7 +1865,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 	atomic_inc(&id_priv->refcount);
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		ret = cma_resolve_ib_route(id_priv, timeout_ms);
+		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+		case IB_LINK_LAYER_INFINIBAND:
+			ret = cma_resolve_ib_route(id_priv, timeout_ms);
+			break;
+		case IB_LINK_LAYER_ETHERNET:
+			ret = cma_resolve_iboe_route(id_priv);
+			break;
+		default:
+			ret = -ENOSYS;
+		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 		ret = cma_resolve_iw_route(id_priv, timeout_ms);
@@ -1773,7 +1927,7 @@ port_found:
 		goto out;
 
 	id_priv->id.route.addr.dev_addr.dev_type =
-		(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
 		ARPHRD_INFINIBAND : ARPHRD_ETHER;
 
 	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
@@ -2758,6 +2912,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 	return 0;
 }
 
+static void iboe_mcast_work_handler(struct work_struct *work)
+{
+	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
+	struct cma_multicast *mc = mw->mc;
+	struct ib_sa_multicast *m = mc->multicast.ib;
+
+	mc->multicast.ib->context = mc;
+	cma_ib_mc_handler(0, m);
+	kref_put(&mc->mcref, release_mc);
+	kfree(mw);
+}
+
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+
+	if (cma_any_addr(addr)) {
+		memset(mgid, 0, sizeof *mgid);
+	} else if (addr->sa_family == AF_INET6) {
+		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else {
+		mgid->raw[0] = 0xff;
+		mgid->raw[1] = 0x0e;
+		mgid->raw[2] = 0;
+		mgid->raw[3] = 0;
+		mgid->raw[4] = 0;
+		mgid->raw[5] = 0;
+		mgid->raw[6] = 0;
+		mgid->raw[7] = 0;
+		mgid->raw[8] = 0;
+		mgid->raw[9] = 0;
+		mgid->raw[10] = 0xff;
+		mgid->raw[11] = 0xff;
+		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
+	}
+}
+
+static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
+				   struct cma_multicast *mc)
+{
+	struct iboe_mcast_work *work;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	int err;
+	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
+	struct net_device *ndev = NULL;
+
+	if (cma_zero_addr((struct sockaddr *)&mc->addr))
+		return -EINVAL;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
+	if (!mc->multicast.ib) {
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+
+	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+	if (id_priv->id.ps == RDMA_PS_UDP)
+		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+
+	if (dev_addr->bound_dev_if)
+		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+	if (!ndev) {
+		err = -ENODEV;
+		goto out2;
+	}
+	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
+	mc->multicast.ib->rec.hop_limit = 1;
+	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+	dev_put(ndev);
+	if (!mc->multicast.ib->rec.mtu) {
+		err = -EINVAL;
+		goto out2;
+	}
+	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+	work->id = id_priv;
+	work->mc = mc;
+	INIT_WORK(&work->work, iboe_mcast_work_handler);
+	kref_get(&mc->mcref);
+	queue_work(cma_wq, &work->work);
+
+	return 0;
+
+out2:
+	kfree(mc->multicast.ib);
+out1:
+	kfree(work);
+	return err;
+}
+
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 			void *context)
 {
@@ -2784,7 +3034,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		ret = cma_join_ib_multicast(id_priv, mc);
+		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+		case IB_LINK_LAYER_INFINIBAND:
+			ret = cma_join_ib_multicast(id_priv, mc);
+			break;
+		case IB_LINK_LAYER_ETHERNET:
+			kref_init(&mc->mcref);
+			ret = cma_iboe_join_multicast(id_priv, mc);
+			break;
+		default:
+			ret = -EINVAL;
+		}
 		break;
 	default:
 		ret = -ENOSYS;
@@ -2817,8 +3077,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 				ib_detach_mcast(id->qp,
 						&mc->multicast.ib->rec.mgid,
 						mc->multicast.ib->rec.mlid);
-			ib_sa_free_multicast(mc->multicast.ib);
-			kfree(mc);
+			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
+				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
+				case IB_LINK_LAYER_INFINIBAND:
+					ib_sa_free_multicast(mc->multicast.ib);
+					kfree(mc);
+					break;
+				case IB_LINK_LAYER_ETHERNET:
+					kref_put(&mc->mcref, release_mc);
+					break;
+				default:
+					break;
+				}
+			}
 			return;
 		}
 	}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 27674c790a73..91a660310b7c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -496,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 {
 	int ret;
 	u16 gid_index;
+	int force_grh;
 
 	memset(ah_attr, 0, sizeof *ah_attr);
 	ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -505,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 	ah_attr->port_num = port_num;
 	ah_attr->static_rate = rec->rate;
 
-	if (rec->hop_limit > 1) {
+	force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET;
+
+	if (rec->hop_limit > 1 || force_grh) {
 		ah_attr->ah_flags = IB_AH_GRH;
 		ah_attr->grh.dgid = rec->dgid;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ac7edc24165c..3d3c9264c450 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -583,6 +583,34 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 	}
 }
 
+static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
+				 struct rdma_route *route)
+{
+	struct rdma_dev_addr *dev_addr;
+
+	resp->num_paths = route->num_paths;
+	switch (route->num_paths) {
+	case 0:
+		dev_addr = &route->addr.dev_addr;
+		iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
+			       dev_addr->dst_dev_addr);
+		iboe_addr_get_sgid(dev_addr,
+				   (union ib_gid *) &resp->ib_route[0].sgid);
+		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
+		break;
+	case 2:
+		ib_copy_path_rec_to_user(&resp->ib_route[1],
+					 &route->path_rec[1]);
+		/* fall through */
+	case 1:
+		ib_copy_path_rec_to_user(&resp->ib_route[0],
+					 &route->path_rec[0]);
+		break;
+	default:
+		break;
+	}
+}
+
 static ssize_t ucma_query_route(struct ucma_file *file,
 				const char __user *inbuf,
 				int in_len, int out_len)
@@ -617,12 +645,17 @@ static ssize_t ucma_query_route(struct ucma_file *file,
 
 	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 	resp.port_num = ctx->cm_id->port_num;
-	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
-	case RDMA_TRANSPORT_IB:
-		ucma_copy_ib_route(&resp, &ctx->cm_id->route);
-		break;
-	default:
-		break;
+	if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
+		switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
+		case IB_LINK_LAYER_INFINIBAND:
+			ucma_copy_ib_route(&resp, &ctx->cm_id->route);
+			break;
+		case IB_LINK_LAYER_ETHERNET:
+			ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
+			break;
+		default:
+			break;
+		}
 	}
 
 out:
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index fa0d52b8e622..904ffa92fc93 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -40,6 +40,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
 
 struct rdma_addr_client {
 	atomic_t refcount;
@@ -63,6 +64,7 @@ struct rdma_dev_addr {
 	unsigned char broadcast[MAX_ADDR_LEN];
 	unsigned short dev_type;
 	int bound_dev_if;
+	enum rdma_transport_type transport;
 };
 
 /**
@@ -127,9 +129,31 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 
+static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac)
+{
+	memset(gid->raw, 0, 16);
+	*((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
+	gid->raw[12] = 0xfe;
+	gid->raw[11] = 0xff;
+	memcpy(gid->raw + 13, mac + 3, 3);
+	memcpy(gid->raw + 8, mac, 3);
+	gid->raw[8] ^= 2;
+}
+
+static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				      union ib_gid *gid)
+{
+	iboe_mac_to_ll(gid, dev_addr->src_dev_addr);
+}
+
 static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
+	if (dev_addr->transport == RDMA_TRANSPORT_IB &&
+	    dev_addr->dev_type != ARPHRD_INFINIBAND)
+		iboe_addr_get_sgid(dev_addr, gid);
+	else
+		memcpy(gid, dev_addr->src_dev_addr +
+		       rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
 static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
@@ -147,4 +171,77 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g
 	memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
+static inline enum ib_mtu iboe_get_mtu(int mtu)
+{
+	/*
+	 * reduce IB headers from effective IBoE MTU. 28 stands for
+	 * atomic header which is the biggest possible header after BTH
+	 */
+	mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28;
+
+	if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096))
+		return IB_MTU_4096;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048))
+		return IB_MTU_2048;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024))
+		return IB_MTU_1024;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512))
+		return IB_MTU_512;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256))
+		return IB_MTU_256;
+	else
+		return 0;
+}
+
+static inline int iboe_get_rate(struct net_device *dev)
+{
+	struct ethtool_cmd cmd;
+
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
+	    dev->ethtool_ops->get_settings(dev, &cmd))
+		return IB_RATE_PORT_CURRENT;
+
+	if (cmd.speed >= 40000)
+		return IB_RATE_40_GBPS;
+	else if (cmd.speed >= 30000)
+		return IB_RATE_30_GBPS;
+	else if (cmd.speed >= 20000)
+		return IB_RATE_20_GBPS;
+	else if (cmd.speed >= 10000)
+		return IB_RATE_10_GBPS;
+	else
+		return IB_RATE_PORT_CURRENT;
+}
+
+static inline int rdma_link_local_addr(struct in6_addr *addr)
+{
+	if (addr->s6_addr32[0] == htonl(0xfe800000) &&
+	    addr->s6_addr32[1] == 0)
+		return 1;
+
+	return 0;
+}
+
+static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+	memcpy(mac, &addr->s6_addr[8], 3);
+	memcpy(mac + 3, &addr->s6_addr[13], 3);
+	mac[0] ^= 2;
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+	return addr->s6_addr[0] == 0xff;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+	int i;
+
+	mac[0] = 0x33;
+	mac[1] = 0x33;
+	for (i = 2; i < 6; ++i)
+		mac[i] = addr->s6_addr[i + 10];
+}
+
 #endif /* IB_ADDR_H */
-- 
cgit v1.2.3


From 4f0e332239e2b5f79757cb8f8f3db16c66f5d220 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 24 Sep 2010 13:34:42 -0500
Subject: powerpc/fsl-booke: Add PCI device ids for P2040/P3041/P5010/P5020
 QoirQ chips

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 8 ++++++++
 include/linux/pci_ids.h       | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 505c8f0ece9b..818f7c6c8fa1 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -407,10 +407,18 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020, quirk_fsl_pcie_header);
 #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 570fddeb0388..f69dfe5e01ef 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2315,6 +2315,14 @@
 #define PCI_DEVICE_ID_P4080		0x0401
 #define PCI_DEVICE_ID_P4040E		0x0408
 #define PCI_DEVICE_ID_P4040		0x0409
+#define PCI_DEVICE_ID_P2040E		0x0410
+#define PCI_DEVICE_ID_P2040		0x0411
+#define PCI_DEVICE_ID_P3041E		0x041E
+#define PCI_DEVICE_ID_P3041		0x041F
+#define PCI_DEVICE_ID_P5020E		0x0420
+#define PCI_DEVICE_ID_P5020		0x0421
+#define PCI_DEVICE_ID_P5010E		0x0428
+#define PCI_DEVICE_ID_P5010		0x0429
 #define PCI_DEVICE_ID_MPC8641		0x7010
 #define PCI_DEVICE_ID_MPC8641D		0x7011
 #define PCI_DEVICE_ID_MPC8610		0x7018
-- 
cgit v1.2.3


From 087a4eb55971dfcc8df18312faf9393d0a479f3a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 14 Oct 2010 12:10:30 +0900
Subject: stopmachine: Define __stop_machine when CONFIG_STOP_MACHINE=n

Define dummy __stop_machine() function even when
CONFIG_STOP_MACHINE=n. This getcpu-required version of
stop_machine() will be used from poke_text_smp().

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: 2nddept-manager@sdl.hitachi.co.jp
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20101014031030.4100.34156.stgit@ltc236.sdl.hitachi.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/stop_machine.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 6b524a0d02e4..1808960c5059 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
 #else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 
-static inline int stop_machine(int (*fn)(void *), void *data,
-			       const struct cpumask *cpus)
+static inline int __stop_machine(int (*fn)(void *), void *data,
+				 const struct cpumask *cpus)
 {
 	int ret;
 	local_irq_disable();
@@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	return ret;
 }
 
+static inline int stop_machine(int (*fn)(void *), void *data,
+			       const struct cpumask *cpus)
+{
+	return __stop_machine(fn, data, cpus);
+}
+
 #endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
 #endif	/* _LINUX_STOP_MACHINE */
-- 
cgit v1.2.3


From 265be2d09853d425ad14a61cda0ca63345613d0c Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 31 May 2010 10:14:17 +0200
Subject: drbd: Finished the "on-no-data-accessible suspend-io;" functionality

When no data is accessible (no connection to the peer, nor a local disk)
allow the user to select to freeze all IO operations instead of getting
IO errors.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h    |  1 +
 drivers/block/drbd/drbd_main.c   | 26 +++++++++++++++++++++++++-
 drivers/block/drbd/drbd_nl.c     | 13 +++++++++++++
 drivers/block/drbd/drbd_req.c    | 24 ++++++++++++++++++++++++
 drivers/block/drbd/drbd_req.h    |  2 ++
 drivers/block/drbd/drbd_worker.c | 18 ++++++++++++++++++
 include/linux/drbd.h             |  5 +++++
 include/linux/drbd_limits.h      |  1 +
 include/linux/drbd_nl.h          |  1 +
 9 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index bef9138f1975..03cc975b9e6c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1469,6 +1469,7 @@ extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
 
 extern void resync_timer_fn(unsigned long data);
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7d359863ae32..106b9abdc430 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -925,7 +925,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	if (fp == FP_STONITH &&
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
 	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
-		ns.susp = 1;
+		ns.susp = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
+
+	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
+	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
+		ns.susp = 1; /* Suspend IO while no data available (no accessible data available) */
 
 	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
 		if (ns.conn == C_SYNC_SOURCE)
@@ -1236,6 +1241,25 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	/* Here we have the actions that are performed after a
 	   state change. This function might sleep */
 
+	if (os.susp && ns.susp && mdev->sync_conf.on_no_data == OND_SUSPEND_IO) {
+		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
+			if (ns.conn == C_CONNECTED) {
+				spin_lock_irq(&mdev->req_lock);
+				_tl_restart(mdev, resend);
+				_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+				spin_unlock_irq(&mdev->req_lock);
+			} else /* ns.conn > C_CONNECTED */
+				dev_err(DEV, "Unexpected Resynd going on!\n");
+		}
+
+		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) {
+			spin_lock_irq(&mdev->req_lock);
+			_tl_restart(mdev, restart_frozen_disk_io);
+			_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->req_lock);
+		}
+	}
+
 	if (fp == FP_STONITH && ns.susp) {
 		/* case1: The outdate peer handler is successful:
 		 * case2: The connection was established again: */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 73131c5ae339..563a6ade0179 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -33,6 +33,7 @@
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
+#include "drbd_req.h"
 #include "drbd_wrappers.h"
 #include <asm/unaligned.h>
 #include <linux/drbd_tag_magic.h>
@@ -494,6 +495,8 @@ char *ppsize(char *buf, unsigned long long size)
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
+	if (mdev->state.susp)
+		return;
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
 
@@ -1557,6 +1560,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		sc.rate       = DRBD_RATE_DEF;
 		sc.after      = DRBD_AFTER_DEF;
 		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
@@ -1765,7 +1769,16 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
 static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			     struct drbd_nl_cfg_reply *reply)
 {
+	drbd_suspend_io(mdev);
 	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+	if (reply->ret_code == SS_SUCCESS) {
+		if (mdev->state.conn < C_CONNECTED)
+			tl_clear(mdev);
+		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
+			tl_restart(mdev, fail_frozen_disk_io);
+	}
+	drbd_resume_io(mdev);
+
 	return 0;
 }
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 48647589aa0d..8259d4f77285 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -226,6 +226,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 		return;
 	if (s & RQ_LOCAL_PENDING)
 		return;
+	if (mdev->state.susp)
+		return;
 
 	if (req->master_bio) {
 		/* this is data_received (remote read)
@@ -634,6 +636,28 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* else: done by handed_over_to_network */
 		break;
 
+	case fail_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		_req_may_be_done(req, m);
+		break;
+
+	case restart_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		req->rq_state &= ~RQ_LOCAL_COMPLETED;
+
+		rv = MR_READ;
+		if (bio_data_dir(req->master_bio) == WRITE)
+			rv = MR_WRITE;
+
+		get_ldev(mdev);
+		req->w.cb = w_restart_disk_io;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
 	case resend:
 		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
 		   before the connection loss; only P_BARRIER_ACK was missing.
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 07cb3b12edb4..f2e45aaa2cd5 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -105,6 +105,8 @@ enum drbd_req_event {
 	write_completed_with_error,
 	completed_ok,
 	resend,
+	fail_frozen_disk_io,
+	restart_frozen_disk_io,
 	nothing, /* for tracing only */
 };
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ca4a16cea2d8..3c1e88480d37 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1173,6 +1173,24 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	return ok;
 }
 
+int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	if (bio_data_dir(req->master_bio) == WRITE)
+		drbd_al_begin_io(mdev, req->sector);
+	/* Calling drbd_al_begin_io() out of the worker might deadlocks
+	   theoretically. Practically it can not deadlock, since this is
+	   only used when unfreezing IOs. All the extents of the requests
+	   that made it into the TL are already active */
+
+	drbd_req_make_private_bio(req, req->master_bio);
+	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+	generic_make_request(req->private_bio);
+
+	return 1;
+}
+
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 479ee3a1d901..7be069fcca57 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -91,6 +91,11 @@ enum drbd_after_sb_p {
 	ASB_VIOLENTLY
 };
 
+enum drbd_on_no_data {
+	OND_IO_ERROR,
+	OND_SUSPEND_IO
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
 enum drbd_ret_codes {
 	ERR_CODE_BASE		= 100,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 440b42e38e89..7eb1e98009ec 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -128,6 +128,7 @@
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
+#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index 5f042810a56c..9aebd0d80a5d 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -87,6 +87,7 @@ NL_PACKET(syncer_conf, 8,
 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
 	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
+	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
 )
 
 NL_PACKET(invalidate, 9, )
-- 
cgit v1.2.3


From 47ff2d0a8e7ce87fed180729e8341f650bf585c8 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 18 Jun 2010 13:56:57 +0200
Subject: drbd: Do not allow a fencing-policy of resource-and-stonith with
 protocol A

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c  | 20 +++++++++++++++++++-
 drivers/block/drbd/drbd_req.c |  2 +-
 include/linux/drbd.h          |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 563a6ade0179..5288bd72cd27 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -806,6 +806,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		goto fail;
 	}
 
+	if (get_net_conf(mdev)) {
+		int prot = mdev->net_conf->wire_protocol;
+		put_net_conf(mdev);
+		if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
+
 	nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
 	if (IS_ERR(nbc->lo_file)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
@@ -1238,7 +1247,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
 		retcode = ERR_NOT_PROTO_C;
 		goto fail;
-	};
+	}
+
+	if (get_ldev(mdev)) {
+		enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+		if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
 
 	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
 		retcode = ERR_DISCARD;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 8259d4f77285..fbe027886bad 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -660,7 +660,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 	case resend:
 		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
-		   before the connection loss; only P_BARRIER_ACK was missing.
+		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
 		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
 		   TODO: Either resync them, or ensure peer was not rebooted. */
 		if (!(req->rq_state & RQ_NET_OK)) {
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 7be069fcca57..0b2bfb58d9c5 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -145,6 +145,7 @@ enum drbd_ret_codes {
 	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
 	ERR_PERM		= 152,
 	ERR_NEED_APV_93		= 153,
+	ERR_STONITH_AND_PROT_A  = 154,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3


From 9a31d7164d409ca59cfadb7957ac7b0acf4545b8 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 5 Jul 2010 13:42:03 +0200
Subject: drbd: New sync parameters for the smart resync rate controller

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c |  6 +++++-
 drivers/block/drbd/drbd_nl.c   |  4 ++++
 include/linux/drbd_limits.h    | 24 ++++++++++++------------
 include/linux/drbd_nl.h        |  4 ++++
 4 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 410d3d4f361e..5a484c1f5ce7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2690,7 +2690,11 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
 		/* .cpu_mask = */	{}, 0,
 		/* .csums_alg = */	{}, 0,
 		/* .use_rle = */	0,
-		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF
+		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF,
+		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
+		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
+		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
+		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF
 	};
 
 	/* Have to use that way, because the layout differs between
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6c08e637e25c..7d384fd39c16 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1599,6 +1599,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		sc.after      = DRBD_AFTER_DEF;
 		sc.al_extents = DRBD_AL_EXTENTS_DEF;
 		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
+		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
+		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
+		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
+		sc.c_max_rate   = DRBD_C_MAX_RATE_DEF;
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 7eb1e98009ec..06dbba47a8ef 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -134,21 +134,21 @@
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
 
-#define DRBD_DP_VOLUME_MIN 4
-#define DRBD_DP_VOLUME_MAX 1048576
-#define DRBD_DP_VOLUME_DEF 16384
+#define DRBD_C_PLAN_AHEAD_MIN  0
+#define DRBD_C_PLAN_AHEAD_MAX  300
+#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
 
-#define DRBD_DP_INTERVAL_MIN 1
-#define DRBD_DP_INTERVAL_MAX 600
-#define DRBD_DP_INTERVAL_DEF 5
+#define DRBD_C_DELAY_TARGET_MIN 1
+#define DRBD_C_DELAY_TARGET_MAX 100
+#define DRBD_C_DELAY_TARGET_DEF 10
 
-#define DRBD_RS_THROTTLE_TH_MIN 1
-#define DRBD_RS_THROTTLE_TH_MAX 600
-#define DRBD_RS_THROTTLE_TH_DEF 20
+#define DRBD_C_FILL_TARGET_MIN 0
+#define DRBD_C_FILL_TARGET_MAX 100000
+#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
 
-#define DRBD_RS_HOLD_OFF_TH_MIN 1
-#define DRBD_RS_HOLD_OFF_TH_MAX 6000
-#define DRBD_RS_HOLD_OFF_TH_DEF 100
+#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MAX     (4 << 20)
+#define DRBD_C_MAX_RATE_DEF     102400
 
 #undef RANGE
 #endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index 9aebd0d80a5d..e23683c87ca1 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -88,6 +88,10 @@ NL_PACKET(syncer_conf, 8,
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
 	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
 	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
+	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
+	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
+	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
+	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
 )
 
 NL_PACKET(invalidate, 9, )
-- 
cgit v1.2.3


From 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 11 Aug 2010 23:40:24 +0200
Subject: drbd: new configuration parameter c-min-rate

We now track the data rate of locally submitted resync related requests,
and can thus detect non-resync activity on the lower level device.

If the current sync rate is above c-min-rate, and the lower level device
appears to be busy, we throttle the resyncer.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  1 +
 drivers/block/drbd/drbd_main.c     |  7 ++-
 drivers/block/drbd/drbd_nl.c       |  3 +-
 drivers/block/drbd/drbd_receiver.c | 88 +++++++++++++++++++++++++++++++++++---
 drivers/block/drbd/drbd_worker.c   | 29 ++++++++-----
 include/linux/drbd_limits.h        |  4 ++
 include/linux/drbd_nl.h            |  1 +
 7 files changed, 116 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 0fce3f36fc1c..0fedcc0b8dc9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1513,6 +1513,7 @@ extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
 extern void resync_timer_fn(unsigned long data);
 
 /* drbd_receiver.c */
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
 extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
 		const unsigned rw, const int fault_type);
 extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 1ff8418ae0fa..db93eee7e543 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1098,6 +1098,8 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		mdev->ov_left = mdev->rs_total
 			      - BM_SECT_TO_BIT(mdev->ov_position);
 		mdev->rs_start = now;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
 		mdev->ov_last_oos_size = 0;
 		mdev->ov_last_oos_start = 0;
 
@@ -2706,7 +2708,8 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
 		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
 		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
 		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
-		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF
+		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF,
+		/* .c_min_rate = */	DRBD_C_MIN_RATE_DEF
 	};
 
 	/* Have to use that way, because the layout differs between
@@ -2742,6 +2745,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	atomic_set(&mdev->packet_seq, 0);
 	atomic_set(&mdev->pp_in_use, 0);
 	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
 
 	mutex_init(&mdev->md_io_mutex);
 	mutex_init(&mdev->data.mutex);
@@ -2819,6 +2823,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
 	mdev->rs_total     =
 	mdev->rs_failed    = 0;
 	mdev->rs_last_events = 0;
+	mdev->rs_last_sect_ev = 0;
 	for (i = 0; i < DRBD_SYNC_MARKS; i++) {
 		mdev->rs_mark_left[i] = 0;
 		mdev->rs_mark_time[i] = 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 295b8d593708..6b35d41706e4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1604,7 +1604,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
 		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
 		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
-		sc.c_max_rate   = DRBD_C_MAX_RATE_DEF;
+		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
+		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 346aed98027f..0d9967fef528 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
 	list_add(&e->w.list, &mdev->sync_ee);
 	spin_unlock_irq(&mdev->req_lock);
 
+	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
 	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
 		return TRUE;
 
@@ -2017,17 +2018,66 @@ out_interrupted:
 	return FALSE;
 }
 
+/* We may throttle resync, if the lower device seems to be busy,
+ * and current sync rate is above c_min_rate.
+ *
+ * To decide whether or not the lower device is busy, we use a scheme similar
+ * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
+ * (more than 64 sectors) of activity we cannot account for with our own resync
+ * activity, it obviously is "busy".
+ *
+ * The current sync rate used here uses only the most recent two step marks,
+ * to have a short time average so we can react faster.
+ */
+int drbd_rs_should_slow_down(struct drbd_conf *mdev)
+{
+	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
+	unsigned long db, dt, dbdt;
+	int curr_events;
+	int throttle = 0;
+
+	/* feature disabled? */
+	if (mdev->sync_conf.c_min_rate == 0)
+		return 0;
+
+	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
+		      (int)part_stat_read(&disk->part0, sectors[1]) -
+			atomic_read(&mdev->rs_sect_ev);
+	if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
+		unsigned long rs_left;
+		int i;
+
+		mdev->rs_last_events = curr_events;
+
+		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
+		 * approx. */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
+		rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+
+		dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+
+		if (dbdt > mdev->sync_conf.c_min_rate)
+			throttle = 1;
+	}
+	return throttle;
+}
+
+
 static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
 {
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
 	struct drbd_epoch_entry *e;
 	struct digest_info *di = NULL;
+	struct p_block_req *p = (struct p_block_req *)h;
+	const int brps = sizeof(*p)-sizeof(*h);
 	int size, digest_size;
 	unsigned int fault_type;
-	struct p_block_req *p =
-		(struct p_block_req *)h;
-	const int brps = sizeof(*p)-sizeof(*h);
+
 
 	if (drbd_recv(mdev, h->payload, brps) != brps)
 		return FALSE;
@@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
 		} else if (h->command == P_OV_REPLY) {
 			e->w.cb = w_e_end_ov_reply;
 			dec_rs_pending(mdev);
-			/* drbd_rs_begin_io done when we sent this request */
-			goto submit;
+			/* drbd_rs_begin_io done when we sent this request,
+			 * but accounting still needs to be done. */
+			goto submit_for_resync;
 		}
 		break;
 
@@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
 		goto out_free_e;
 	}
 
+	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
+	 * wrt the receiver, but it is not as straightforward as it may seem.
+	 * Various places in the resync start and stop logic assume resync
+	 * requests are processed in order, requeuing this on the worker thread
+	 * introduces a bunch of new code for synchronization between threads.
+	 *
+	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
+	 * "forever", throttling after drbd_rs_begin_io will lock that extent
+	 * for application writes for the same time.  For now, just throttle
+	 * here, where the rest of the code expects the receiver to sleep for
+	 * a while, anyways.
+	 */
+
+	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
+	 * this defers syncer requests for some time, before letting at least
+	 * on request through.  The resync controller on the receiving side
+	 * will adapt to the incoming rate accordingly.
+	 *
+	 * We cannot throttle here if remote is Primary/SyncTarget:
+	 * we would also throttle its application reads.
+	 * In that case, throttling is done on the SyncTarget only.
+	 */
+	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
+		msleep(100);
 	if (drbd_rs_begin_io(mdev, e->sector))
 		goto out_free_e;
 
+submit_for_resync:
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
+
 submit:
 	inc_unacked(mdev);
 	spin_lock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f5d779b4d685..99c937acb471 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -215,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
  */
 void drbd_endio_pri(struct bio *bio, int error)
 {
-	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
 	struct drbd_conf *mdev = req->mdev;
-	struct bio_and_error m;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
@@ -244,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	__req_mod(req, what, &m);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (m.bio)
-		complete_master_bio(mdev, &m);
+	req_mod(req, what);
 }
 
 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -376,6 +369,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 	if (!get_ldev(mdev))
 		return -EIO;
 
+	if (drbd_rs_should_slow_down(mdev))
+		goto defer;
+
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
@@ -387,6 +383,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 	list_add(&e->w.list, &mdev->read_ee);
 	spin_unlock_irq(&mdev->req_lock);
 
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
 	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
 		return 0;
 
@@ -512,8 +509,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
 	int max_segment_size;
-	int number, i, rollback_i, size, pe, mx;
+	int number, rollback_i, size, pe, mx;
 	int align, queued, sndbuf;
+	int i = 0;
 
 	if (unlikely(cancel))
 		return 1;
@@ -549,7 +547,14 @@ int w_make_resync_request(struct drbd_conf *mdev,
 		mdev->c_sync_rate = mdev->sync_conf.rate;
 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 	}
-	pe = atomic_read(&mdev->rs_pending_cnt);
+
+	/* Throttle resync on lower level disk activity, which may also be
+	 * caused by application IO on Primary/SyncTarget.
+	 * Keep this after the call to drbd_rs_controller, as that assumes
+	 * to be called as precisely as possible every SLEEP_TIME,
+	 * and would be confused otherwise. */
+	if (drbd_rs_should_slow_down(mdev))
+		goto requeue;
 
 	mutex_lock(&mdev->data.mutex);
 	if (mdev->data.socket)
@@ -563,6 +568,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
 		mx = number;
 
 	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
+	pe = atomic_read(&mdev->rs_pending_cnt);
 	if ((pe + number) > mx) {
 		number = mx - pe;
 	}
@@ -1492,6 +1498,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 		mdev->rs_failed    = 0;
 		mdev->rs_paused    = 0;
 		mdev->rs_same_csum = 0;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
 		mdev->rs_total     = tw;
 		mdev->rs_start     = now;
 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
@@ -1516,6 +1524,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 		}
 
 		atomic_set(&mdev->rs_sect_in, 0);
+		atomic_set(&mdev->rs_sect_ev, 0);
 		mdev->rs_in_flight = 0;
 		mdev->rs_planed = 0;
 		spin_lock(&mdev->peer_seq_lock);
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 06dbba47a8ef..0b24ded6fffd 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -150,5 +150,9 @@
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
 #define DRBD_C_MAX_RATE_DEF     102400
 
+#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MAX     (4 << 20)
+#define DRBD_C_MIN_RATE_DEF     4096
+
 #undef RANGE
 #endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index e23683c87ca1..ade91107c9a5 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -92,6 +92,7 @@ NL_PACKET(syncer_conf, 8,
 	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
 	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
 	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
+	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
 )
 
 NL_PACKET(invalidate, 9, )
-- 
cgit v1.2.3


From 0b70a13dac014ec9274640b9e945bde493ba365e Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 20 Aug 2010 13:36:10 +0200
Subject: drbd: Sending of big packets, for payloads from 64KByte to 4GByte

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      | 74 +++++++++++++++++-------------
 drivers/block/drbd/drbd_main.c     | 79 +++++++++++++++++++-------------
 drivers/block/drbd/drbd_receiver.c | 94 +++++++++++++++++++-------------------
 drivers/block/drbd/drbd_worker.c   |  2 +-
 include/linux/drbd.h               |  2 +
 5 files changed, 139 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 0fedcc0b8dc9..3f10efc2ac14 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
  * NOTE that the payload starts at a long aligned offset,
  * regardless of 32 or 64 bit arch!
  */
-struct p_header {
+struct p_header80 {
 	u32	  magic;
 	u16	  command;
 	u16	  length;	/* bytes of data after this header */
 	u8	  payload[0];
 } __packed;
-/* 8 bytes. packet FIXED for the next century! */
+
+/* Header for big packets, Used for data packets exceeding 64kB */
+struct p_header95 {
+	u16	  magic;	/* use DRBD_MAGIC_BIG here */
+	u16	  command;
+	u32	  length;
+	u8	  payload[0];
+} __packed;
+
+union p_header {
+	struct p_header80 h80;
+	struct p_header95 h95;
+};
 
 /*
  * short commands, packets without payload, plain p_header:
@@ -367,7 +379,7 @@ struct p_header {
 #define DP_MAY_SET_IN_SYNC    4
 
 struct p_data {
-	struct p_header head;
+	union p_header head;
 	u64	    sector;    /* 64 bits sector number */
 	u64	    block_id;  /* to identify the request in protocol B&C */
 	u32	    seq_num;
@@ -383,7 +395,7 @@ struct p_data {
  *   P_DATA_REQUEST, P_RS_DATA_REQUEST
  */
 struct p_block_ack {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    sector;
 	u64	    block_id;
 	u32	    blksize;
@@ -392,7 +404,7 @@ struct p_block_ack {
 
 
 struct p_block_req {
-	struct p_header head;
+	struct p_header80 head;
 	u64 sector;
 	u64 block_id;
 	u32 blksize;
@@ -409,7 +421,7 @@ struct p_block_req {
  */
 
 struct p_handshake {
-	struct p_header head;	/* 8 bytes */
+	struct p_header80 head;	/* 8 bytes */
 	u32 protocol_min;
 	u32 feature_flags;
 	u32 protocol_max;
@@ -424,19 +436,19 @@ struct p_handshake {
 /* 80 bytes, FIXED for the next century */
 
 struct p_barrier {
-	struct p_header head;
+	struct p_header80 head;
 	u32 barrier;	/* barrier number _handle_ only */
 	u32 pad;	/* to multiple of 8 Byte */
 } __packed;
 
 struct p_barrier_ack {
-	struct p_header head;
+	struct p_header80 head;
 	u32 barrier;
 	u32 set_size;
 } __packed;
 
 struct p_rs_param {
-	struct p_header head;
+	struct p_header80 head;
 	u32 rate;
 
 	      /* Since protocol version 88 and higher. */
@@ -444,7 +456,7 @@ struct p_rs_param {
 } __packed;
 
 struct p_rs_param_89 {
-	struct p_header head;
+	struct p_header80 head;
 	u32 rate;
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
@@ -452,7 +464,7 @@ struct p_rs_param_89 {
 } __packed;
 
 struct p_rs_param_95 {
-	struct p_header head;
+	struct p_header80 head;
 	u32 rate;
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
@@ -468,7 +480,7 @@ enum drbd_conn_flags {
 };
 
 struct p_protocol {
-	struct p_header head;
+	struct p_header80 head;
 	u32 protocol;
 	u32 after_sb_0p;
 	u32 after_sb_1p;
@@ -482,17 +494,17 @@ struct p_protocol {
 } __packed;
 
 struct p_uuids {
-	struct p_header head;
+	struct p_header80 head;
 	u64 uuid[UI_EXTENDED_SIZE];
 } __packed;
 
 struct p_rs_uuid {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    uuid;
 } __packed;
 
 struct p_sizes {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
 	u64	    c_size;  /* current exported size */
@@ -502,18 +514,18 @@ struct p_sizes {
 } __packed;
 
 struct p_state {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    state;
 } __packed;
 
 struct p_req_state {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    mask;
 	u32	    val;
 } __packed;
 
 struct p_req_state_reply {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    retcode;
 } __packed;
 
@@ -528,7 +540,7 @@ struct p_drbd06_param {
 } __packed;
 
 struct p_discard {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    block_id;
 	u32	    seq_num;
 	u32	    pad;
@@ -544,7 +556,7 @@ enum drbd_bitmap_code {
 };
 
 struct p_compressed_bm {
-	struct p_header head;
+	struct p_header80 head;
 	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 	 * (encoding & 0x80): polarity (set/unset) of first runlength
 	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -555,10 +567,10 @@ struct p_compressed_bm {
 	u8 code[0];
 } __packed;
 
-struct p_delay_probe {
-	struct p_header head;
-	u32	seq_num; /* sequence number to match the two probe packets */
-	u32	offset;	 /* usecs the probe got sent after the reference time point */
+struct p_delay_probe93 {
+	struct p_header80 head;
+	u32     seq_num; /* sequence number to match the two probe packets */
+	u32     offset;  /* usecs the probe got sent after the reference time point */
 } __packed;
 
 /* DCBP: Drbd Compressed Bitmap Packet ... */
@@ -605,7 +617,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
  * so we need to use the fixed size 4KiB page size
  * most architechtures have used for a long time.
  */
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
 #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
 #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
 #if (PAGE_SIZE < 4096)
@@ -614,7 +626,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
 #endif
 
 union p_polymorph {
-        struct p_header          header;
+        struct p_header80        header;
         struct p_handshake       handshake;
         struct p_data            data;
         struct p_block_ack       block_ack;
@@ -1188,12 +1200,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
 extern int _drbd_send_state(struct drbd_conf *mdev);
 extern int drbd_send_state(struct drbd_conf *mdev);
 extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			enum drbd_packets cmd, struct p_header *h,
+			enum drbd_packets cmd, struct p_header80 *h,
 			size_t size, unsigned msg_flags);
 #define USE_DATA_SOCKET 1
 #define USE_META_SOCKET 0
 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-			enum drbd_packets cmd, struct p_header *h,
+			enum drbd_packets cmd, struct p_header80 *h,
 			size_t size);
 extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
 			char *data, size_t size);
@@ -1936,19 +1948,19 @@ static inline void request_ping(struct drbd_conf *mdev)
 static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
 	enum drbd_packets cmd)
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
 }
 
 static inline int drbd_send_ping(struct drbd_conf *mdev)
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
 }
 
 static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
 }
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index db93eee7e543..f3f4ea9c5eb9 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1647,7 +1647,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
 
 /* the appropriate socket mutex must be held already */
 int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			  enum drbd_packets cmd, struct p_header *h,
+			  enum drbd_packets cmd, struct p_header80 *h,
 			  size_t size, unsigned msg_flags)
 {
 	int sent, ok;
@@ -1657,7 +1657,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 
 	h->magic   = BE_DRBD_MAGIC;
 	h->command = cpu_to_be16(cmd);
-	h->length  = cpu_to_be16(size-sizeof(struct p_header));
+	h->length  = cpu_to_be16(size-sizeof(struct p_header80));
 
 	sent = drbd_send(mdev, sock, h, size, msg_flags);
 
@@ -1672,7 +1672,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
  * when we hold the appropriate socket mutex.
  */
 int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-		  enum drbd_packets cmd, struct p_header *h, size_t size)
+		  enum drbd_packets cmd, struct p_header80 *h, size_t size)
 {
 	int ok = 0;
 	struct socket *sock;
@@ -1700,7 +1700,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
 int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
 		   size_t size)
 {
-	struct p_header h;
+	struct p_header80 h;
 	int ok;
 
 	h.magic   = BE_DRBD_MAGIC;
@@ -1807,7 +1807,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
 		strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
 
 	rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
-			   (struct p_header *)p, size);
+			   (struct p_header80 *)p, size);
 	kfree(p);
 	return rv;
 }
@@ -1833,7 +1833,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
 	put_ldev(mdev);
 
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
-			     (struct p_header *)&p, sizeof(p));
+			     (struct p_header80 *)&p, sizeof(p));
 }
 
 int drbd_send_uuids(struct drbd_conf *mdev)
@@ -1854,7 +1854,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
 	p.uuid = cpu_to_be64(val);
 
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
-			     (struct p_header *)&p, sizeof(p));
+			     (struct p_header80 *)&p, sizeof(p));
 }
 
 int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
@@ -1884,7 +1884,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
 	p.dds_flags = cpu_to_be16(flags);
 
 	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
-			   (struct p_header *)&p, sizeof(p));
+			   (struct p_header80 *)&p, sizeof(p));
 	return ok;
 }
 
@@ -1909,7 +1909,7 @@ int drbd_send_state(struct drbd_conf *mdev)
 
 	if (likely(sock != NULL)) {
 		ok = _drbd_send_cmd(mdev, sock, P_STATE,
-				    (struct p_header *)&p, sizeof(p), 0);
+				    (struct p_header80 *)&p, sizeof(p), 0);
 	}
 
 	mutex_unlock(&mdev->data.mutex);
@@ -1927,7 +1927,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
 	p.val     = cpu_to_be32(val.i);
 
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
-			     (struct p_header *)&p, sizeof(p));
+			     (struct p_header80 *)&p, sizeof(p));
 }
 
 int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
@@ -1937,7 +1937,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
 	p.retcode    = cpu_to_be32(retcode);
 
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
-			     (struct p_header *)&p, sizeof(p));
+			     (struct p_header80 *)&p, sizeof(p));
 }
 
 int fill_bitmap_rle_bits(struct drbd_conf *mdev,
@@ -2036,7 +2036,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
 
 enum { OK, FAILED, DONE }
 send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-	struct p_header *h, struct bm_xfer_ctx *c)
+	struct p_header80 *h, struct bm_xfer_ctx *c)
 {
 	struct p_compressed_bm *p = (void*)h;
 	unsigned long num_words;
@@ -2066,12 +2066,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
 		if (len)
 			drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
 		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
-				   h, sizeof(struct p_header) + len, 0);
+				   h, sizeof(struct p_header80) + len, 0);
 		c->word_offset += num_words;
 		c->bit_offset = c->word_offset * BITS_PER_LONG;
 
 		c->packets[1]++;
-		c->bytes[1] += sizeof(struct p_header) + len;
+		c->bytes[1] += sizeof(struct p_header80) + len;
 
 		if (c->bit_offset > c->bm_bits)
 			c->bit_offset = c->bm_bits;
@@ -2087,14 +2087,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
 int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
 	struct bm_xfer_ctx c;
-	struct p_header *p;
+	struct p_header80 *p;
 	int ret;
 
 	ERR_IF(!mdev->bitmap) return FALSE;
 
 	/* maybe we should use some per thread scratch page,
 	 * and allocate that during initial device creation? */
-	p = (struct p_header *) __get_free_page(GFP_NOIO);
+	p = (struct p_header80 *) __get_free_page(GFP_NOIO);
 	if (!p) {
 		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
 		return FALSE;
@@ -2152,7 +2152,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
 	if (mdev->state.conn < C_CONNECTED)
 		return FALSE;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
-			(struct p_header *)&p, sizeof(p));
+			(struct p_header80 *)&p, sizeof(p));
 	return ok;
 }
 
@@ -2180,7 +2180,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
 	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
 		return FALSE;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
-				(struct p_header *)&p, sizeof(p));
+				(struct p_header80 *)&p, sizeof(p));
 	return ok;
 }
 
@@ -2188,8 +2188,8 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
 		     struct p_data *dp)
 {
 	const int header_size = sizeof(struct p_data)
-			      - sizeof(struct p_header);
-	int data_size  = ((struct p_header *)dp)->length - header_size;
+			      - sizeof(struct p_header80);
+	int data_size  = ((struct p_header80 *)dp)->length - header_size;
 
 	return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
 			      dp->block_id);
@@ -2238,7 +2238,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 	p.blksize  = cpu_to_be32(size);
 
 	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
-				(struct p_header *)&p, sizeof(p));
+				(struct p_header80 *)&p, sizeof(p));
 	return ok;
 }
 
@@ -2256,7 +2256,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev,
 
 	p.head.magic   = BE_DRBD_MAGIC;
 	p.head.command = cpu_to_be16(cmd);
-	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size);
+	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
 
 	mutex_lock(&mdev->data.mutex);
 
@@ -2278,7 +2278,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
 	p.blksize  = cpu_to_be32(size);
 
 	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
-			   (struct p_header *)&p, sizeof(p));
+			   (struct p_header80 *)&p, sizeof(p));
 	return ok;
 }
 
@@ -2447,10 +2447,17 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
 
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(P_DATA);
-	p.head.length  =
-		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size);
+	if (req->size <= (1 << 15)) {
+		p.head.h80.magic   = BE_DRBD_MAGIC;
+		p.head.h80.command = cpu_to_be16(P_DATA);
+		p.head.h80.length  =
+			cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
+	} else {
+		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
+		p.head.h95.command = cpu_to_be16(P_DATA);
+		p.head.h95.length  =
+			cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
+	}
 
 	p.sector   = cpu_to_be64(req->sector);
 	p.block_id = (unsigned long)req;
@@ -2511,10 +2518,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
 
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(cmd);
-	p.head.length  =
-		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size);
+	if (e->size <= (1 << 15)) {
+		p.head.h80.magic   = BE_DRBD_MAGIC;
+		p.head.h80.command = cpu_to_be16(cmd);
+		p.head.h80.length  =
+			cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
+	} else {
+		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
+		p.head.h95.command = cpu_to_be16(cmd);
+		p.head.h95.length  =
+			cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
+	}
 
 	p.sector   = cpu_to_be64(e->sector);
 	p.block_id = e->block_id;
@@ -2527,8 +2541,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	if (!drbd_get_data_sock(mdev))
 		return 0;
 
-	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
-					sizeof(p), dgs ? MSG_MORE : 0);
+	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
 	if (ok && dgs) {
 		dgb = mdev->int_dig_out;
 		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 591a171291d9..9b3321e2c3cd 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -720,14 +720,14 @@ out:
 static int drbd_send_fp(struct drbd_conf *mdev,
 	struct socket *sock, enum drbd_packets cmd)
 {
-	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+	struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header;
 
 	return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
 }
 
 static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
 {
-	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+	struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header;
 	int rr;
 
 	rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
@@ -944,7 +944,7 @@ out_release_sockets:
 	return -1;
 }
 
-static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h)
+static int drbd_recv_header(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	int r;
 
@@ -1266,7 +1266,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea
 	return 1;
 }
 
-static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h)
+static int receive_Barrier(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	int rv, issue_flush;
 	struct p_barrier *p = (struct p_barrier *)h;
@@ -1570,7 +1570,7 @@ fail:
 	return FALSE;
 }
 
-static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
+static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct drbd_request *req;
 	sector_t sector;
@@ -1610,7 +1610,7 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
 	return ok;
 }
 
-static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
+static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	sector_t sector;
 	unsigned int header_size, data_size;
@@ -1767,7 +1767,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
 }
 
 /* mirrored write */
-static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
+static int receive_Data(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	sector_t sector;
 	struct drbd_epoch_entry *e;
@@ -2066,7 +2066,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
 }
 
 
-static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
+static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
@@ -2756,7 +2756,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
 	return 1;
 }
 
-static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
+static int receive_protocol(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_protocol *p = (struct p_protocol *)h;
 	int header_size, data_size;
@@ -2862,7 +2862,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
 	return tfm;
 }
 
-static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
+static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	int ok = TRUE;
 	struct p_rs_param_95 *p = (struct p_rs_param_95 *)h;
@@ -3032,7 +3032,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev,
 		     (unsigned long long)a, (unsigned long long)b);
 }
 
-static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
+static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_sizes *p = (struct p_sizes *)h;
 	enum determine_dev_size dd = unchanged;
@@ -3148,7 +3148,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
+static int receive_uuids(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_uuids *p = (struct p_uuids *)h;
 	u64 *p_uuid;
@@ -3241,7 +3241,7 @@ static union drbd_state convert_state(union drbd_state ps)
 	return ms;
 }
 
-static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
+static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_req_state *p = (struct p_req_state *)h;
 	union drbd_state mask, val;
@@ -3271,7 +3271,7 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int receive_state(struct drbd_conf *mdev, struct p_header *h)
+static int receive_state(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_state *p = (struct p_state *)h;
 	enum drbd_conns nconn, oconn;
@@ -3395,7 +3395,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
+static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_rs_uuid *p = (struct p_rs_uuid *)h;
 
@@ -3428,7 +3428,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
 enum receive_bitmap_ret { OK, DONE, FAILED };
 
 static enum receive_bitmap_ret
-receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h,
+receive_bitmap_plain(struct drbd_conf *mdev, struct p_header80 *h,
 	unsigned long *buffer, struct bm_xfer_ctx *c)
 {
 	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
@@ -3533,7 +3533,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
 		const char *direction, struct bm_xfer_ctx *c)
 {
 	/* what would it take to transfer it "plaintext" */
-	unsigned plain = sizeof(struct p_header) *
+	unsigned plain = sizeof(struct p_header80) *
 		((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
 		+ c->bm_words * sizeof(long);
 	unsigned total = c->bytes[0] + c->bytes[1];
@@ -3571,7 +3571,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
    in order to be agnostic to the 32 vs 64 bits issue.
 
    returns 0 on failure, 1 if we successfully received it. */
-static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
+static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct bm_xfer_ctx c;
 	void *buffer;
@@ -3623,7 +3623,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
 		}
 
 		c.packets[h->command == P_BITMAP]++;
-		c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length;
+		c.bytes[h->command == P_BITMAP] += sizeof(struct p_header80) + h->length;
 
 		if (ret != OK)
 			break;
@@ -3659,7 +3659,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
 	return ok;
 }
 
-static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
+static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silent)
 {
 	/* TODO zero copy sink :) */
 	static char sink[128];
@@ -3679,17 +3679,17 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
 	return size == 0;
 }
 
-static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+static int receive_skip(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	return receive_skip_(mdev, h, 0);
 }
 
-static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
+static int receive_skip_silent(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	return receive_skip_(mdev, h, 1);
 }
 
-static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
+static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	if (mdev->state.disk >= D_INCONSISTENT)
 		drbd_kick_lo(mdev);
@@ -3701,7 +3701,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
+typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header80 *);
 
 static drbd_cmd_handler_f drbd_default_handler[] = {
 	[P_DATA]	    = receive_Data,
@@ -3736,7 +3736,7 @@ static drbd_cmd_handler_f *drbd_opt_cmd_handler;
 static void drbdd(struct drbd_conf *mdev)
 {
 	drbd_cmd_handler_f handler;
-	struct p_header *header = &mdev->data.rbuf.header;
+	struct p_header80 *header = &mdev->data.rbuf.header;
 
 	while (get_t_state(&mdev->receiver) == Running) {
 		drbd_thread_current_set_cpu(mdev);
@@ -3964,7 +3964,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev)
 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
 	ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
-			     (struct p_header *)p, sizeof(*p), 0 );
+			     (struct p_header80 *)p, sizeof(*p), 0 );
 	mutex_unlock(&mdev->data.mutex);
 	return ok;
 }
@@ -3981,7 +3981,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
 	/* ASSERT current == mdev->receiver ... */
 	struct p_handshake *p = &mdev->data.rbuf.handshake;
 	const int expect = sizeof(struct p_handshake)
-			  -sizeof(struct p_header);
+			  -sizeof(struct p_header80);
 	int rv;
 
 	rv = drbd_send_handshake(mdev);
@@ -4058,7 +4058,7 @@ static int drbd_do_auth(struct drbd_conf *mdev)
 	char *response = NULL;
 	char *right_response = NULL;
 	char *peers_ch = NULL;
-	struct p_header p;
+	struct p_header80 p;
 	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
 	unsigned int resp_size;
 	struct hash_desc desc;
@@ -4231,7 +4231,7 @@ int drbdd_init(struct drbd_thread *thi)
 
 /* ********* acknowledge sender ******** */
 
-static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
+static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_req_state_reply *p = (struct p_req_state_reply *)h;
 
@@ -4249,13 +4249,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int got_Ping(struct drbd_conf *mdev, struct p_header *h)
+static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	return drbd_send_ping_ack(mdev);
 
 }
 
-static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
+static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	/* restore idle timeout */
 	mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
@@ -4265,7 +4265,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
+static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	sector_t sector = be64_to_cpu(p->sector);
@@ -4336,7 +4336,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev,
 	return TRUE;
 }
 
-static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
+static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	sector_t sector = be64_to_cpu(p->sector);
@@ -4376,7 +4376,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
 		_ack_id_to_req, __func__ , what);
 }
 
-static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
+static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	sector_t sector = be64_to_cpu(p->sector);
@@ -4396,7 +4396,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
 		_ack_id_to_req, __func__ , neg_acked);
 }
 
-static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
+static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	sector_t sector = be64_to_cpu(p->sector);
@@ -4409,7 +4409,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
 		_ar_id_to_req, __func__ , neg_acked);
 }
 
-static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
+static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	sector_t sector;
 	int size;
@@ -4431,7 +4431,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
+static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_barrier_ack *p = (struct p_barrier_ack *)h;
 
@@ -4440,7 +4440,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
+static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	struct drbd_work *w;
@@ -4474,7 +4474,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
 	return TRUE;
 }
 
-static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
+static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header80 *h)
 {
 	/* IGNORE */
 	return TRUE;
@@ -4482,7 +4482,7 @@ static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
 
 struct asender_cmd {
 	size_t pkt_size;
-	int (*process)(struct drbd_conf *mdev, struct p_header *h);
+	int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
 };
 
 static struct asender_cmd *get_asender_cmd(int cmd)
@@ -4491,8 +4491,8 @@ static struct asender_cmd *get_asender_cmd(int cmd)
 		/* anything missing from this table is in
 		 * the drbd_cmd_handler (drbd_default_handler) table,
 		 * see the beginning of drbdd() */
-	[P_PING]	    = { sizeof(struct p_header), got_Ping },
-	[P_PING_ACK]	    = { sizeof(struct p_header), got_PingAck },
+	[P_PING]	    = { sizeof(struct p_header80), got_Ping },
+	[P_PING_ACK]	    = { sizeof(struct p_header80), got_PingAck },
 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
@@ -4504,7 +4504,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
-	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe), got_something_to_ignore_m },
+	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_something_to_ignore_m },
 	[P_MAX_CMD]	    = { 0, NULL },
 	};
 	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
@@ -4515,13 +4515,13 @@ static struct asender_cmd *get_asender_cmd(int cmd)
 int drbd_asender(struct drbd_thread *thi)
 {
 	struct drbd_conf *mdev = thi->mdev;
-	struct p_header *h = &mdev->meta.rbuf.header;
+	struct p_header80 *h = &mdev->meta.rbuf.header;
 	struct asender_cmd *cmd = NULL;
 
 	int rv, len;
 	void *buf    = h;
 	int received = 0;
-	int expect   = sizeof(struct p_header);
+	int expect   = sizeof(struct p_header80);
 	int empty;
 
 	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
@@ -4621,7 +4621,7 @@ int drbd_asender(struct drbd_thread *thi)
 				goto disconnect;
 			}
 			expect = cmd->pkt_size;
-			ERR_IF(len != expect-sizeof(struct p_header))
+			ERR_IF(len != expect-sizeof(struct p_header80))
 				goto reconnect;
 		}
 		if (received == expect) {
@@ -4631,7 +4631,7 @@ int drbd_asender(struct drbd_thread *thi)
 
 			buf	 = h;
 			received = 0;
-			expect	 = sizeof(struct p_header);
+			expect	 = sizeof(struct p_header80);
 			cmd	 = NULL;
 		}
 	}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1eeb55423b3e..3d0e14e3ade3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1204,7 +1204,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * or (on connection loss) in w_clear_epoch.  */
 	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
-				(struct p_header *)p, sizeof(*p), 0);
+				(struct p_header80 *)p, sizeof(*p), 0);
 	drbd_put_data_sock(mdev);
 
 	return ok;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 0b2bfb58d9c5..89718a39791e 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -318,6 +318,8 @@ enum drbd_timeout_flag {
 
 #define DRBD_MAGIC 0x83740267
 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
+#define DRBD_MAGIC_BIG 0x835a
+#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
 
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
-- 
cgit v1.2.3


From fb22c402ffdf61dd121795b5809de587185d5240 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 8 Sep 2010 23:20:21 +0200
Subject: drbd: Track the reasons to suspend IO in dedicated state bits

There are three ways to get IO suspended:

 * Loss of any access to data
 * Fence-peer-handler running
 * User requested to suspend IO

Track those in different bits, so that one condition clearing its
state bit does not interfere with the other two conditions.

Only when the user resumes IO he overrules all three bits.

The fact is hidden from the user, he sees only a single suspend
bit.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  9 ++++++++-
 drivers/block/drbd/drbd_main.c     | 36 +++++++++++++++++++++++-------------
 drivers/block/drbd/drbd_nl.c       | 20 ++++++++++++++------
 drivers/block/drbd/drbd_proc.c     |  2 +-
 drivers/block/drbd/drbd_receiver.c |  6 +++---
 drivers/block/drbd/drbd_req.c      |  6 +++---
 include/linux/drbd.h               | 10 +++++++---
 7 files changed, 59 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index eb1273d04caf..ff7fffa00dac 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1681,6 +1681,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
 #define susp_MASK 1
 #define user_isp_MASK 1
 #define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
 
 #define NS(T, S) \
 	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@@ -2254,11 +2256,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
 	return 1;
 }
 
+static inline int is_susp(union drbd_state s)
+{
+	return s.susp || s.susp_nod || s.susp_fen;
+}
+
 static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
 {
 	int mxb = drbd_get_max_buffers(mdev);
 
-	if (mdev->state.susp)
+	if (is_susp(mdev->state))
 		return 0;
 	if (test_bit(SUSPEND_IO, &mdev->flags))
 		return 0;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 04c305d36f8e..4f33714fb3cd 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -654,7 +654,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
 	    drbd_role_str(ns.peer),
 	    drbd_disk_str(ns.disk),
 	    drbd_disk_str(ns.pdsk),
-	    ns.susp ? 's' : 'r',
+	    is_susp(ns) ? 's' : 'r',
 	    ns.aftr_isp ? 'a' : '-',
 	    ns.peer_isp ? 'p' : '-',
 	    ns.user_isp ? 'u' : '-'
@@ -925,12 +925,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	if (fp == FP_STONITH &&
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
 	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
-		ns.susp = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
+		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
 
 	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
 	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
 	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
-		ns.susp = 1; /* Suspend IO while no data available (no accessible data available) */
+		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
 
 	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
 		if (ns.conn == C_SYNC_SOURCE)
@@ -1030,7 +1030,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
 		PSC(conn);
 		PSC(disk);
 		PSC(pdsk);
-		PSC(susp);
+		if (is_susp(ns) != is_susp(os))
+			pbp += sprintf(pbp, "susp( %s -> %s ) ",
+				       drbd_susp_str(is_susp(os)),
+				       drbd_susp_str(is_susp(ns)));
 		PSC(aftr_isp);
 		PSC(peer_isp);
 		PSC(user_isp);
@@ -1218,6 +1221,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 {
 	enum drbd_fencing_p fp;
 	enum drbd_req_event what = nothing;
+	union drbd_state nsm = (union drbd_state){ .i = -1 };
 
 	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1241,19 +1245,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	/* Here we have the actions that are performed after a
 	   state change. This function might sleep */
 
-	if (os.susp && ns.susp && mdev->sync_conf.on_no_data == OND_SUSPEND_IO) {
+	nsm.i = -1;
+	if (ns.susp_nod) {
 		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 			if (ns.conn == C_CONNECTED)
-				what = resend;
+				what = resend, nsm.susp_nod = 0;
 			else /* ns.conn > C_CONNECTED */
 				dev_err(DEV, "Unexpected Resynd going on!\n");
 		}
 
 		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
-			what = restart_frozen_disk_io;
+			what = restart_frozen_disk_io, nsm.susp_nod = 0;
+
 	}
 
-	if (fp == FP_STONITH && ns.susp) {
+	if (ns.susp_fen) {
 		/* case1: The outdate peer handler is successful: */
 		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
 			tl_clear(mdev);
@@ -1263,20 +1269,22 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 				drbd_md_sync(mdev);
 			}
 			spin_lock_irq(&mdev->req_lock);
-			_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+			_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
 			spin_unlock_irq(&mdev->req_lock);
 		}
 		/* case2: The connection was established again: */
 		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 			clear_bit(NEW_CUR_UUID, &mdev->flags);
 			what = resend;
+			nsm.susp_fen = 0;
 		}
 	}
 
 	if (what != nothing) {
 		spin_lock_irq(&mdev->req_lock);
 		_tl_restart(mdev, what);
-		_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
+		nsm.i &= mdev->state.i;
+		_drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
 		spin_unlock_irq(&mdev->req_lock);
 	}
 
@@ -1298,7 +1306,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 		if (get_ldev(mdev)) {
 			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
 			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-				if (mdev->state.susp) {
+				if (is_susp(mdev->state)) {
 					set_bit(NEW_CUR_UUID, &mdev->flags);
 				} else {
 					drbd_uuid_new_current(mdev);
@@ -1417,7 +1425,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 		resume_next_sg(mdev);
 
 	/* free tl_hash if we Got thawed and are C_STANDALONE */
-	if (ns.conn == C_STANDALONE && ns.susp == 0 && mdev->tl_hash)
+	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
 		drbd_free_tl_hash(mdev);
 
 	/* Upon network connection, we need to start the receiver */
@@ -2732,7 +2740,9 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
 		  .conn = C_STANDALONE,
 		  .disk = D_DISKLESS,
 		  .pdsk = D_UNKNOWN,
-		  .susp = 0
+		  .susp = 0,
+		  .susp_nod = 0,
+		  .susp_fen = 0
 		} };
 }
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5b30f90cab3e..9ee44568dce3 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -209,7 +209,8 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
 		put_ldev(mdev);
 	} else {
 		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		return mdev->state.pdsk;
+		nps = mdev->state.pdsk;
+		goto out;
 	}
 
 	r = drbd_khelper(mdev, "fence-peer");
@@ -256,6 +257,14 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
 
 	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
 			(r>>8) & 0xff, ex_to_string);
+
+out:
+	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
+		/* The handler was not successful... unfreeze here, the
+		   state engine can not unfreeze... */
+		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
+	}
+
 	return nps;
 }
 
@@ -550,7 +559,7 @@ char *ppsize(char *buf, unsigned long long size)
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
-	if (mdev->state.susp)
+	if (is_susp(mdev->state))
 		return;
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
@@ -1016,7 +1025,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	drbd_suspend_io(mdev);
 	/* also wait for the last barrier ack. */
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || mdev->state.susp);
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
 	/* and for any other previously queued work */
 	drbd_flush_workqueue(mdev);
 
@@ -1114,8 +1123,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-	    !(mdev->state.role == R_PRIMARY && mdev->state.susp &&
-	      mdev->sync_conf.on_no_data == OND_SUSPEND_IO)) {
+	    !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
 		cp_discovered = 1;
 	}
@@ -1939,7 +1947,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		drbd_md_sync(mdev);
 	}
 	drbd_suspend_io(mdev);
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
 	if (reply->ret_code == SS_SUCCESS) {
 		if (mdev->state.conn < C_CONNECTED)
 			tl_clear(mdev);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index a4a4a06908c5..aec8426c1bf6 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -213,7 +213,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			   drbd_disk_str(mdev->state.pdsk),
 			   (mdev->net_conf == NULL ? ' ' :
 			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
-			   mdev->state.susp ? 's' : 'r',
+			   is_susp(mdev->state) ? 's' : 'r',
 			   mdev->state.aftr_isp ? 'a' : '-',
 			   mdev->state.peer_isp ? 'p' : '-',
 			   mdev->state.user_isp ? 'u' : '-',
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 983e49cbd233..6b69b2f734dc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3315,7 +3315,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 	if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
 		ns.disk = mdev->new_state_tmp.disk;
 	cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD);
-	if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED &&
+	if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED &&
 	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
 		/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
 		   for temporal network outages! */
@@ -3829,7 +3829,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
 	kfree(mdev->p_uuid);
 	mdev->p_uuid = NULL;
 
-	if (!mdev->state.susp)
+	if (!is_susp(mdev->state))
 		tl_clear(mdev);
 
 	dev_info(DEV, "Connection closed\n");
@@ -3858,7 +3858,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
 	if (os.conn == C_DISCONNECTING) {
 		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
 
-		if (!mdev->state.susp) {
+		if (!is_susp(mdev->state)) {
 			/* we must not free the tl_hash
 			 * while application io is still on the fly */
 			wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index af608b39c4e0..9e91a2545fc8 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -287,7 +287,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e
 {
 	struct drbd_conf *mdev = req->mdev;
 
-	if (!mdev->state.susp)
+	if (!is_susp(mdev->state))
 		_req_may_be_done(req, m);
 }
 
@@ -812,7 +812,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
 			    (mdev->state.pdsk == D_INCONSISTENT &&
 			     mdev->state.conn >= C_CONNECTED));
 
-	if (!(local || remote) && !mdev->state.susp) {
+	if (!(local || remote) && !is_susp(mdev->state)) {
 		dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
 		goto fail_free_complete;
 	}
@@ -838,7 +838,7 @@ allocate_barrier:
 	/* GOOD, everything prepared, grab the spin_lock */
 	spin_lock_irq(&mdev->req_lock);
 
-	if (mdev->state.susp) {
+	if (is_susp(mdev->state)) {
 		/* If we got suspended, use the retry mechanism of
 		   generic_make_request() to restart processing of this
 		   bio. In the next call to drbd_make_request_26
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 89718a39791e..5e72a5d3d48f 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -232,13 +232,17 @@ union drbd_state {
 		unsigned conn:5 ;   /* 17/32	 cstates */
 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
-		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned susp:1 ;   /* 2/2	 IO suspended no/yes (by user) */
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
 		unsigned peer_isp:1 ;
 		unsigned user_isp:1 ;
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned susp_nod:1 ; /* IO suspended because no data */
+		unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
+		unsigned _pad:9;   /* 0	 unused */
 #elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned _pad:9;
+		unsigned susp_fen:1 ;
+		unsigned susp_nod:1 ;
 		unsigned user_isp:1 ;
 		unsigned peer_isp:1 ;
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
-- 
cgit v1.2.3


From 00b425377d60e67e86721d4ce6d7cbf131a5d0fd Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 5 Oct 2010 11:19:39 +0200
Subject: drbd: Allow larger values for c-fill-target.

Connections through a compressing proxy might have more bits
on the fly. 500MByte instead of 50MByte

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h | 2 +-
 include/linux/drbd_limits.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index ff7fffa00dac..1680939de101 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -348,7 +348,7 @@ struct p_header80 {
 struct p_header95 {
 	u16	  magic;	/* use DRBD_MAGIC_BIG here */
 	u16	  command;
-	u32	  length;
+	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
 	u8	  payload[0];
 } __packed;
 
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 0b24ded6fffd..4ac33f34b77e 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -143,7 +143,7 @@
 #define DRBD_C_DELAY_TARGET_DEF 10
 
 #define DRBD_C_FILL_TARGET_MIN 0
-#define DRBD_C_FILL_TARGET_MAX 100000
+#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
 #define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
 
 #define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
-- 
cgit v1.2.3


From 22cc37a943832c948808884604ec6f5ff2594c1d Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 14 Sep 2010 20:40:41 +0200
Subject: drbd: fix unlikely access after free and list corruption

Various cleanup paths have been incomplete, for the very unlikely case
that we cannot allocate enough bios from process context when submitting
on behalf of the peer or resync process.

Never observed.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 25 +++++++++++++++++++++++++
 drivers/block/drbd/drbd_worker.c   |  7 +++++++
 include/linux/drbd.h               |  4 ++--
 3 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 990fe01afa50..71775a9de21d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1573,6 +1573,13 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
 	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
 		return TRUE;
 
+	/* drbd_submit_ee currently fails for one reason only:
+	 * not being able to allocate enough bios.
+	 * Is dropping the connection going to help? */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+
 	drbd_free_ee(mdev, e);
 fail:
 	put_ldev(mdev);
@@ -1998,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 	if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
 		return TRUE;
 
+	/* drbd_submit_ee currently fails for one reason only:
+	 * not being able to allocate enough bios.
+	 * Is dropping the connection going to help? */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	hlist_del_init(&e->colision);
+	spin_unlock_irq(&mdev->req_lock);
+	if (e->flags & EE_CALL_AL_COMPLETE_IO)
+		drbd_al_complete_io(mdev, e->sector);
+
 out_interrupted:
 	/* yes, the epoch_size now is imbalanced.
 	 * but we drop the connection anyways, so we don't have a chance to
@@ -2202,6 +2219,14 @@ submit:
 	if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
 		return TRUE;
 
+	/* drbd_submit_ee currently fails for one reason only:
+	 * not being able to allocate enough bios.
+	 * Is dropping the connection going to help? */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
+
 out_free_e:
 	put_ldev(mdev);
 	drbd_free_ee(mdev, e);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 88be45ad84ed..f12822d53867 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -387,6 +387,13 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
 		return 0;
 
+	/* drbd_submit_ee currently fails for one reason only:
+	 * not being able to allocate enough bios.
+	 * Is dropping the connection going to help? */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+
 	drbd_free_ee(mdev, e);
 defer:
 	put_ldev(mdev);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 5e72a5d3d48f..da7d9bd4f3f0 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,10 +53,10 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8.1"
+#define REL_VERSION "8.3.9rc1"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 94
+#define PRO_VERSION_MAX 95
 
 
 enum drbd_io_error_p {
-- 
cgit v1.2.3


From 0eead9ab41da33644ae2c97c57ad03da636a0422 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Oct 2010 10:57:40 -0700
Subject: Don't dump task struct in a.out core-dumps

akiphie points out that a.out core-dumps have that odd task struct
dumping that was never used and was never really a good idea (it goes
back into the mists of history, probably the original core-dumping
code).  Just remove it.

Also do the access_ok() check on dump_write().  It probably doesn't
matter (since normal filesystems all seem to do it anyway), but he
points out that it's normally done by the VFS layer, so ...

[ I suspect that we should possibly do "vfs_write()" instead of
  calling ->write directly.  That also does the whole fsnotify and write
  statistics thing, which may or may not be a good idea. ]

And just to be anal, do this all for the x86-64 32-bit a.out emulation
code too, even though it's not enabled (and won't currently even
compile)

Reported-by: akiphie <akiphie@lavabit.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32_aout.c | 22 +++++-----------------
 fs/binfmt_aout.c          |  4 ----
 include/linux/coredump.h  |  2 +-
 3 files changed, 6 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 0350311906ae..2d93bdbc9ac0 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -34,7 +34,7 @@
 #include <asm/ia32.h>
 
 #undef WARN_OLD
-#undef CORE_DUMP /* probably broken */
+#undef CORE_DUMP /* definitely broken */
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
 static int load_aout_library(struct file *);
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end)
  * macros to write out all the necessary info.
  */
 
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
+#include <linux/coredump.h>
 
 #define DUMP_WRITE(addr, nr)			     \
 	if (!dump_write(file, (void *)(addr), (nr))) \
 		goto end_coredump;
 
-#define DUMP_SEEK(offset)						\
-	if (file->f_op->llseek) {					\
-		if (file->f_op->llseek(file, (offset), 0) != (offset))	\
-			goto end_coredump;				\
-	} else								\
-		file->f_pos = (offset)
+#define DUMP_SEEK(offset)		\
+	if (!dump_seek(file, offset))	\
+		goto end_coredump;
 
 #define START_DATA()	(u.u_tsize << PAGE_SHIFT)
 #define START_STACK(u)	(u.start_stack)
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
 		dump_size = dump.u_ssize << PAGE_SHIFT;
 		DUMP_WRITE(dump_start, dump_size);
 	}
-	/*
-	 * Finally dump the task struct.  Not be used by gdb, but
-	 * could be useful
-	 */
-	set_fs(KERNEL_DS);
-	DUMP_WRITE(current, sizeof(*current));
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f96eff04e11a..a6395bdb26ae 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm)
 		if (!dump_write(file, dump_start, dump_size))
 			goto end_coredump;
 	}
-/* Finally dump the task struct.  Not be used by gdb, but could be useful */
-	set_fs(KERNEL_DS);
-	if (!dump_write(file, current, sizeof(*current)))
-		goto end_coredump;
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 8ba66a9d9022..59579cfee6a0 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,7 +11,7 @@
  */
 static inline int dump_write(struct file *file, const void *addr, int nr)
 {
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
 }
 
 static inline int dump_seek(struct file *file, loff_t off)
-- 
cgit v1.2.3


From 73f12e8d3d94828b9efe2b8b8a34b4ad6d14ee47 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Tue, 12 Oct 2010 16:27:38 -0400
Subject: drivers: misc: ti-st: fix review comments

Based on comments from Jiri Slaby, drop the register
storage specifier, remove the unused code, cleanup
the const to non-const type casting.
Also make the line discipline ops structure static, since
its a singleton, unmodified structure which need not be
in heap.

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 73 +++++++++++---------------------------------
 drivers/misc/ti-st/st_kim.c  | 15 ++++-----
 include/linux/ti_wilink_st.h |  2 --
 3 files changed, 25 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 9bae0eb26b96..f9aad06d1ae5 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -31,15 +31,6 @@
 #include <net/bluetooth/hci.h>
 #include <linux/ti_wilink_st.h>
 
-/* strings to be used for rfkill entries and by
- * ST Core to be used for sysfs debug entry
- */
-#define PROTO_ENTRY(type, name)	name
-const unsigned char *protocol_strngs[] = {
-	PROTO_ENTRY(ST_BT, "Bluetooth"),
-	PROTO_ENTRY(ST_FM, "FM"),
-	PROTO_ENTRY(ST_GPS, "GPS"),
-};
 /* function pointer pointing to either,
  * st_kim_recv during registration to receive fw download responses
  * st_int_recv after registration to receive proto stack responses
@@ -144,7 +135,7 @@ void st_reg_complete(struct st_data_s *st_gdata, char err)
 static inline int st_check_data_len(struct st_data_s *st_gdata,
 	int protoid, int len)
 {
-	register int room = skb_tailroom(st_gdata->rx_skb);
+	int room = skb_tailroom(st_gdata->rx_skb);
 
 	pr_debug("len %d room %d", len, room);
 
@@ -187,7 +178,7 @@ static inline int st_check_data_len(struct st_data_s *st_gdata,
 static inline void st_wakeup_ack(struct st_data_s *st_gdata,
 	unsigned char cmd)
 {
-	register struct sk_buff *waiting_skb;
+	struct sk_buff *waiting_skb;
 	unsigned long flags = 0;
 
 	spin_lock_irqsave(&st_gdata->lock, flags);
@@ -216,13 +207,13 @@ static inline void st_wakeup_ack(struct st_data_s *st_gdata,
 void st_int_recv(void *disc_data,
 	const unsigned char *data, long count)
 {
-	register char *ptr;
+	char *ptr;
 	struct hci_event_hdr *eh;
 	struct hci_acl_hdr *ah;
 	struct hci_sco_hdr *sh;
 	struct fm_event_hdr *fm;
 	struct gps_event_hdr *gps;
-	register int len = 0, type = 0, dlen = 0;
+	int len = 0, type = 0, dlen = 0;
 	static enum proto_type protoid = ST_MAX;
 	struct st_data_s *st_gdata = (struct st_data_s *)disc_data;
 
@@ -918,34 +909,27 @@ static void st_tty_flush_buffer(struct tty_struct *tty)
 	return;
 }
 
+static struct tty_ldisc_ops st_ldisc_ops = {
+	.magic = TTY_LDISC_MAGIC,
+	.name = "n_st",
+	.open = st_tty_open,
+	.close = st_tty_close,
+	.receive_buf = st_tty_receive,
+	.write_wakeup = st_tty_wakeup,
+	.flush_buffer = st_tty_flush_buffer,
+	.owner = THIS_MODULE
+};
+
 /********************************************************************/
 int st_core_init(struct st_data_s **core_data)
 {
 	struct st_data_s *st_gdata;
 	long err;
-	static struct tty_ldisc_ops *st_ldisc_ops;
-
-	/* populate and register to TTY line discipline */
-	st_ldisc_ops = kzalloc(sizeof(*st_ldisc_ops), GFP_KERNEL);
-	if (!st_ldisc_ops) {
-		pr_err("no mem to allocate");
-		return -ENOMEM;
-	}
 
-	st_ldisc_ops->magic = TTY_LDISC_MAGIC;
-	st_ldisc_ops->name = "n_st";	/*"n_hci"; */
-	st_ldisc_ops->open = st_tty_open;
-	st_ldisc_ops->close = st_tty_close;
-	st_ldisc_ops->receive_buf = st_tty_receive;
-	st_ldisc_ops->write_wakeup = st_tty_wakeup;
-	st_ldisc_ops->flush_buffer = st_tty_flush_buffer;
-	st_ldisc_ops->owner = THIS_MODULE;
-
-	err = tty_register_ldisc(N_TI_WL, st_ldisc_ops);
+	err = tty_register_ldisc(N_TI_WL, &st_ldisc_ops);
 	if (err) {
 		pr_err("error registering %d line discipline %ld",
 			   N_TI_WL, err);
-		kfree(st_ldisc_ops);
 		return err;
 	}
 	pr_debug("registered n_shared line discipline");
@@ -956,7 +940,6 @@ int st_core_init(struct st_data_s **core_data)
 		err = tty_unregister_ldisc(N_TI_WL);
 		if (err)
 			pr_err("unable to un-register ldisc %ld", err);
-		kfree(st_ldisc_ops);
 		err = -ENOMEM;
 		return err;
 	}
@@ -970,22 +953,6 @@ int st_core_init(struct st_data_s **core_data)
 	/* Locking used in st_int_enqueue() to avoid multiple execution */
 	spin_lock_init(&st_gdata->lock);
 
-	/* ldisc_ops ref to be only used in __exit of module */
-	st_gdata->ldisc_ops = st_ldisc_ops;
-
-#if 0
-	err = st_kim_init();
-	if (err) {
-		pr_err("error during kim initialization(%ld)", err);
-		kfree(st_gdata);
-		err = tty_unregister_ldisc(N_TI_WL);
-		if (err)
-			pr_err("unable to un-register ldisc");
-		kfree(st_ldisc_ops);
-		return -1;
-	}
-#endif
-
 	err = st_ll_init(st_gdata);
 	if (err) {
 		pr_err("error during st_ll initialization(%ld)", err);
@@ -993,7 +960,6 @@ int st_core_init(struct st_data_s **core_data)
 		err = tty_unregister_ldisc(N_TI_WL);
 		if (err)
 			pr_err("unable to un-register ldisc");
-		kfree(st_ldisc_ops);
 		return -1;
 	}
 	*core_data = st_gdata;
@@ -1007,11 +973,7 @@ void st_core_exit(struct st_data_s *st_gdata)
 	err = st_ll_deinit(st_gdata);
 	if (err)
 		pr_err("error during deinit of ST LL %ld", err);
-#if 0
-	err = st_kim_deinit();
-	if (err)
-		pr_err("error during deinit of ST KIM %ld", err);
-#endif
+
 	if (st_gdata != NULL) {
 		/* Free ST Tx Qs and skbs */
 		skb_queue_purge(&st_gdata->txq);
@@ -1022,7 +984,6 @@ void st_core_exit(struct st_data_s *st_gdata)
 		err = tty_unregister_ldisc(N_TI_WL);
 		if (err)
 			pr_err("unable to un-register ldisc %ld", err);
-		kfree(st_gdata->ldisc_ops);
 		/* free the global data pointer */
 		kfree(st_gdata);
 	}
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 6d23a7222627..73b6c8b0e869 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -75,7 +75,7 @@ const unsigned char *protocol_names[] = {
 };
 
 #define MAX_ST_DEVICES	3	/* Imagine 1 on each UART for now */
-struct platform_device *st_kim_devices[MAX_ST_DEVICES];
+static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
 
 /**********************************************************************/
 /* internal functions */
@@ -157,17 +157,18 @@ static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len)
 void kim_int_recv(struct kim_data_s *kim_gdata,
 	const unsigned char *data, long count)
 {
-	register char *ptr;
+	const unsigned char *ptr;
 	struct hci_event_hdr *eh;
-	register int len = 0, type = 0;
+	int len = 0, type = 0;
 
 	pr_debug("%s", __func__);
 	/* Decode received bytes here */
-	ptr = (char *)data;
+	ptr = data;
 	if (unlikely(ptr == NULL)) {
 		pr_err(" received null from TTY ");
 		return;
 	}
+
 	while (count) {
 		if (kim_gdata->rx_count) {
 			len = min_t(unsigned int, kim_gdata->rx_count, count);
@@ -231,7 +232,7 @@ void kim_int_recv(struct kim_data_s *kim_gdata,
 static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
 {
 	unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0;
-	char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 };
+	const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 };
 
 	pr_debug("%s", __func__);
 
@@ -278,8 +279,8 @@ static long download_firmware(struct kim_data_s *kim_gdata)
 {
 	long err = 0;
 	long len = 0;
-	register unsigned char *ptr = NULL;
-	register unsigned char *action_ptr = NULL;
+	unsigned char *ptr = NULL;
+	unsigned char *action_ptr = NULL;
 	unsigned char bts_scr_name[30] = { 0 };	/* 30 char long bts scr name? */
 
 	err = read_local_version(kim_gdata, bts_scr_name);
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 2a5acf599598..4c7be2263011 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -101,7 +101,6 @@ extern long st_unregister(enum proto_type);
  *	can occur , where as during other times other events CH8, CH9
  *	can occur.
  * @tty: tty provided by the TTY core for line disciplines.
- * @ldisc_ops: the procedures that this line discipline registers with TTY.
  * @tx_skb: If for some reason the tty's write returns lesser bytes written
  *	then to maintain the rest of data to be written on next instance.
  *	This needs to be protected, hence the lock inside wakeup func.
@@ -132,7 +131,6 @@ extern long st_unregister(enum proto_type);
 struct st_data_s {
 	unsigned long st_state;
 	struct tty_struct *tty;
-	struct tty_ldisc_ops *ldisc_ops;
 	struct sk_buff *tx_skb;
 #define ST_TX_SENDING	1
 #define ST_TX_WAKEUP	2
-- 
cgit v1.2.3


From ff7f5aab354dee01f29c9c00933f6d4aa590eadb Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 26 Aug 2010 14:17:56 +0000
Subject: IB/pack: IBoE UD packet packing support

Add support for packing IBoE packet headers.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>

[ Clean up and fix ib_ud_header_init() a bit.  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/ud_header.c    | 117 +++++++++++++++++++++++++--------
 drivers/infiniband/hw/mlx4/qp.c        |   2 +-
 drivers/infiniband/hw/mthca/mthca_qp.c |   2 +-
 include/rdma/ib_pack.h                 |  30 ++++++---
 4 files changed, 113 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 650b501eb142..cb0dd5ae2777 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -80,6 +80,29 @@ static const struct ib_field lrh_table[]  = {
 	  .size_bits    = 16 }
 };
 
+static const struct ib_field eth_table[]  = {
+	{ STRUCT_FIELD(eth, dmac_h),
+	  .offset_words = 0,
+	  .offset_bits  = 0,
+	  .size_bits    = 32 },
+	{ STRUCT_FIELD(eth, dmac_l),
+	  .offset_words = 1,
+	  .offset_bits  = 0,
+	  .size_bits    = 16 },
+	{ STRUCT_FIELD(eth, smac_h),
+	  .offset_words = 1,
+	  .offset_bits  = 16,
+	  .size_bits    = 16 },
+	{ STRUCT_FIELD(eth, smac_l),
+	  .offset_words = 2,
+	  .offset_bits  = 0,
+	  .size_bits    = 32 },
+	{ STRUCT_FIELD(eth, type),
+	  .offset_words = 3,
+	  .offset_bits  = 0,
+	  .size_bits    = 16 }
+};
+
 static const struct ib_field grh_table[]  = {
 	{ STRUCT_FIELD(grh, ip_version),
 	  .offset_words = 0,
@@ -180,38 +203,38 @@ static const struct ib_field deth_table[] = {
 /**
  * ib_ud_header_init - Initialize UD header structure
  * @payload_bytes:Length of packet payload
+ * @lrh_present: specify if LRH is present
+ * @eth_present: specify if Eth header is present
  * @grh_present:GRH flag (if non-zero, GRH will be included)
- * @immediate_present: specify if immediate data should be used
+ * @immediate_present: specify if immediate data is present
  * @header:Structure to initialize
- *
- * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
- * lrh.packet_length, grh.ip_version, grh.payload_length,
- * grh.next_header, bth.opcode, bth.pad_count and
- * bth.transport_header_version fields of a &struct ib_ud_header given
- * the payload length and whether a GRH will be included.
  */
 void ib_ud_header_init(int     		    payload_bytes,
+		       int		    lrh_present,
+		       int		    eth_present,
 		       int    		    grh_present,
 		       int		    immediate_present,
 		       struct ib_ud_header *header)
 {
-	u16 packet_length;
-
 	memset(header, 0, sizeof *header);
 
-	header->lrh.link_version     = 0;
-	header->lrh.link_next_header =
-		grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
-	packet_length		     = (IB_LRH_BYTES     +
-					IB_BTH_BYTES     +
-					IB_DETH_BYTES    +
-					payload_bytes    +
-					4                + /* ICRC     */
-					3) / 4;            /* round up */
-
-	header->grh_present          = grh_present;
+	if (lrh_present) {
+		u16 packet_length;
+
+		header->lrh.link_version     = 0;
+		header->lrh.link_next_header =
+			grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
+		packet_length = (IB_LRH_BYTES	+
+				 IB_BTH_BYTES	+
+				 IB_DETH_BYTES	+
+				 (grh_present ? IB_GRH_BYTES : 0) +
+				 payload_bytes	+
+				 4		+ /* ICRC     */
+				 3) / 4;	  /* round up */
+		header->lrh.packet_length = cpu_to_be16(packet_length);
+	}
+
 	if (grh_present) {
-		packet_length		   += IB_GRH_BYTES / 4;
 		header->grh.ip_version      = 6;
 		header->grh.payload_length  =
 			cpu_to_be16((IB_BTH_BYTES     +
@@ -222,18 +245,50 @@ void ib_ud_header_init(int     		    payload_bytes,
 		header->grh.next_header     = 0x1b;
 	}
 
-	header->lrh.packet_length = cpu_to_be16(packet_length);
-
-	header->immediate_present	     = immediate_present;
 	if (immediate_present)
 		header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 	else
 		header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY;
 	header->bth.pad_count                = (4 - payload_bytes) & 3;
 	header->bth.transport_header_version = 0;
+
+	header->lrh_present = lrh_present;
+	header->eth_present = eth_present;
+	header->grh_present = grh_present;
+	header->immediate_present = immediate_present;
 }
 EXPORT_SYMBOL(ib_ud_header_init);
 
+/**
+ * ib_lrh_header_pack - Pack LRH header struct into wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_pack() packs the LRH header structure @lrh into
+ * wire format in the buffer @buf.
+ */
+int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
+{
+	ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
+	return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_pack);
+
+/**
+ * ib_lrh_header_unpack - Unpack LRH structure from wire format
+ * @lrh:unpacked LRH header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_lrh_header_unpack() unpacks the LRH header structure from
+ * wire format (in buf) into @lrh.
+ */
+int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
+{
+	ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
+	return 0;
+}
+EXPORT_SYMBOL(ib_lrh_header_unpack);
+
 /**
  * ib_ud_header_pack - Pack UD header struct into wire format
  * @header:UD header struct
@@ -247,10 +302,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
 {
 	int len = 0;
 
-	ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
-		&header->lrh, buf);
-	len += IB_LRH_BYTES;
-
+	if (header->lrh_present) {
+		ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
+			&header->lrh, buf + len);
+		len += IB_LRH_BYTES;
+	}
+	if (header->eth_present) {
+		ib_pack(eth_table, ARRAY_SIZE(eth_table),
+			&header->eth, buf + len);
+		len += IB_ETH_BYTES;
+	}
 	if (header->grh_present) {
 		ib_pack(grh_table, ARRAY_SIZE(grh_table),
 			&header->grh, buf + len);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6a60827b2301..bb1277c8fbf0 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1231,7 +1231,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 	for (i = 0; i < wr->num_sge; ++i)
 		send_size += wr->sg_list[i].length;
 
-	ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header);
+	ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header);
 
 	sqp->ud_header.lrh.service_level   =
 		be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index d2d172e6289c..1a1c55fb13f3 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
 	int err;
 	u16 pkey;
 
-	ib_ud_header_init(256, /* assume a MAD */
+	ib_ud_header_init(256, /* assume a MAD */ 1, 0,
 			  mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
 			  &sqp->ud_header);
 
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index cbb50f4da3dd..6b91d8e7a1fa 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -37,6 +37,7 @@
 
 enum {
 	IB_LRH_BYTES  = 8,
+	IB_ETH_BYTES  = 14,
 	IB_GRH_BYTES  = 40,
 	IB_BTH_BYTES  = 12,
 	IB_DETH_BYTES = 8
@@ -210,14 +211,25 @@ struct ib_unpacked_deth {
 	__be32       source_qpn;
 };
 
+struct ib_unpacked_eth {
+	u8	dmac_h[4];
+	u8	dmac_l[2];
+	u8	smac_h[2];
+	u8	smac_l[4];
+	__be16	type;
+};
+
 struct ib_ud_header {
+	int                     lrh_present;
 	struct ib_unpacked_lrh  lrh;
-	int                     grh_present;
-	struct ib_unpacked_grh  grh;
-	struct ib_unpacked_bth  bth;
+	int			eth_present;
+	struct ib_unpacked_eth	eth;
+	int			grh_present;
+	struct ib_unpacked_grh	grh;
+	struct ib_unpacked_bth	bth;
 	struct ib_unpacked_deth deth;
-	int            		immediate_present;
-	__be32         		immediate_data;
+	int			immediate_present;
+	__be32			immediate_data;
 };
 
 void ib_pack(const struct ib_field        *desc,
@@ -230,9 +242,11 @@ void ib_unpack(const struct ib_field        *desc,
 	       void                         *buf,
 	       void                         *structure);
 
-void ib_ud_header_init(int     		   payload_bytes,
-		       int    		   grh_present,
-		       int		   immediate_present,
+void ib_ud_header_init(int		    payload_bytes,
+		       int		    lrh_present,
+		       int		    eth_present,
+		       int		    grh_present,
+		       int		    immediate_present,
 		       struct ib_ud_header *header);
 
 int ib_ud_header_pack(struct ib_ud_header *header,
-- 
cgit v1.2.3


From 3aa0ce825ade0cf5506e32ccf51d01fc8d22a9cf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 14 Oct 2010 14:32:06 -0700
Subject: Un-inline the core-dump helper functions

Tony Luck reports that the addition of the access_ok() check in commit
0eead9ab41da ("Don't dump task struct in a.out core-dumps") broke the
ia64 compile due to missing the necessary header file includes.

Rather than add yet another include (<asm/unistd.h>) to make everything
happy, just uninline the silly core dump helper functions and move the
bodies to fs/exec.c where they make a lot more sense.

dump_seek() in particular was too big to be an inline function anyway,
and none of them are in any way performance-critical.  And we really
don't need to mess up our include file headers more than they already
are.

Reported-and-tested-by: Tony Luck <tony.luck@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/coredump.h | 34 ++--------------------------------
 2 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 828dd2461d6b..03278c984ba0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2014,3 +2014,41 @@ fail_creds:
 fail:
 	return;
 }
+
+/*
+ * Core dumping helper functions.  These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+int dump_seek(struct file *file, loff_t off)
+{
+	int ret = 1;
+
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+			return 0;
+	} else {
+		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+		if (!buf)
+			return 0;
+		while (off > 0) {
+			unsigned long n = off;
+
+			if (n > PAGE_SIZE)
+				n = PAGE_SIZE;
+			if (!dump_write(file, buf, n)) {
+				ret = 0;
+				break;
+			}
+			off -= n;
+		}
+		free_page((unsigned long)buf);
+	}
+	return ret;
+}
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 59579cfee6a0..ba4b85a6d9b8 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -9,37 +9,7 @@
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-static inline int dump_write(struct file *file, const void *addr, int nr)
-{
-	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static inline int dump_seek(struct file *file, loff_t off)
-{
-	int ret = 1;
-
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n)) {
-				ret = 0;
-				break;
-			}
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return ret;
-}
+extern int dump_write(struct file *file, const void *addr, int nr);
+extern int dump_seek(struct file *file, loff_t off);
 
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From 28085bc5de19cad365bcff98e9c8785c397c7c36 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 15 Oct 2010 16:46:37 +0900
Subject: sh: clkfwk: support clock remapping.

This implements support for ioremapping of register windows that
encapsulate clock control registers used by a struct clk, with
transparent sibling inheritance.

Root clocks at the top of a given topology often encapsulate the entire
register space of all of their sibling clocks, so this mapping can be
done once and handed down. A given clock enable/disable case maps out to
a single bit in a shared register, so this prevents creating multiple
overlapping mappings.

The mapping case breaks down in to a couple of different situations:

	- Sibling clocks without a specific mapping.
	- Root clocks without a specific mapping.
	- Any of sibling/root clocks with a specific mapping.

Sibling clocks with no specified mapping will grovel up the clock chain
and install the root clock mapping unconditionally at registration time.

Root clocks without their own mappings have a dummy BSS-initialized
mapping inserted that is handed down the chain just like any other
mapping. This permits all of the sibling clock ops to read/write using
the mapping offsets without any special configuration, enabling them to
not care whether access ultimately goes through translatable or
untranslatable memory.

Any clock with its own mapping will have the window initialized at
registration time and be ready for use by its clock ops. Failure to
establish the mapping will prevent registration, so no additional sanity
checks are needed. Sibling clocks that double as parents for the moment
will not propagate their mapping down, but this is easily tunable if the
need arises.

All clock mappings are kref refcounted, with each instance of mapping
inheritance incrementing the refcount.

Tested-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk.c       | 91 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/sh_clk.h | 10 ++++++
 2 files changed, 99 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/sh/clk.c b/drivers/sh/clk.c
index 661a801126ad..813d97cdad49 100644
--- a/drivers/sh/clk.c
+++ b/drivers/sh/clk.c
@@ -25,7 +25,7 @@
 #include <linux/sysdev.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
-#include <linux/platform_device.h>
+#include <linux/io.h>
 #include <linux/debugfs.h>
 #include <linux/cpufreq.h>
 #include <linux/clk.h>
@@ -251,8 +251,88 @@ void recalculate_root_clocks(void)
 	}
 }
 
+static struct clk_mapping dummy_mapping;
+
+static struct clk *lookup_root_clock(struct clk *clk)
+{
+	while (clk->parent)
+		clk = clk->parent;
+
+	return clk;
+}
+
+static int clk_establish_mapping(struct clk *clk)
+{
+	struct clk_mapping *mapping = clk->mapping;
+
+	/*
+	 * Propagate mappings.
+	 */
+	if (!mapping) {
+		struct clk *clkp;
+
+		/*
+		 * dummy mapping for root clocks with no specified ranges
+		 */
+		if (!clk->parent) {
+			clk->mapping = &dummy_mapping;
+			return 0;
+		}
+
+		/*
+		 * If we're on a child clock and it provides no mapping of its
+		 * own, inherit the mapping from its root clock.
+		 */
+		clkp = lookup_root_clock(clk);
+		mapping = clkp->mapping;
+		BUG_ON(!mapping);
+	}
+
+	/*
+	 * Establish initial mapping.
+	 */
+	if (!mapping->base && mapping->phys) {
+		kref_init(&mapping->ref);
+
+		mapping->base = ioremap_nocache(mapping->phys, mapping->len);
+		if (unlikely(!mapping->base))
+			return -ENXIO;
+	} else if (mapping->base) {
+		/*
+		 * Bump the refcount for an existing mapping
+		 */
+		kref_get(&mapping->ref);
+	}
+
+	clk->mapping = mapping;
+	return 0;
+}
+
+static void clk_destroy_mapping(struct kref *kref)
+{
+	struct clk_mapping *mapping;
+
+	mapping = container_of(kref, struct clk_mapping, ref);
+
+	iounmap(mapping->base);
+}
+
+static void clk_teardown_mapping(struct clk *clk)
+{
+	struct clk_mapping *mapping = clk->mapping;
+
+	/* Nothing to do */
+	if (mapping == &dummy_mapping)
+		return;
+
+	kref_put(&mapping->ref, clk_destroy_mapping);
+	clk->mapping = NULL;
+}
+
 int clk_register(struct clk *clk)
 {
+	int ret;
+
 	if (clk == NULL || IS_ERR(clk))
 		return -EINVAL;
 
@@ -267,6 +347,10 @@ int clk_register(struct clk *clk)
 	INIT_LIST_HEAD(&clk->children);
 	clk->usecount = 0;
 
+	ret = clk_establish_mapping(clk);
+	if (unlikely(ret))
+		goto out_unlock;
+
 	if (clk->parent)
 		list_add(&clk->sibling, &clk->parent->children);
 	else
@@ -275,9 +359,11 @@ int clk_register(struct clk *clk)
 	list_add(&clk->node, &clock_list);
 	if (clk->ops && clk->ops->init)
 		clk->ops->init(clk);
+
+out_unlock:
 	mutex_unlock(&clock_list_sem);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(clk_register);
 
@@ -286,6 +372,7 @@ void clk_unregister(struct clk *clk)
 	mutex_lock(&clock_list_sem);
 	list_del(&clk->sibling);
 	list_del(&clk->node);
+	clk_teardown_mapping(clk);
 	mutex_unlock(&clock_list_sem);
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index ecdfea54a49e..8ae37707a4a4 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -4,11 +4,20 @@
 #include <linux/list.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <linux/types.h>
+#include <linux/kref.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 
 struct clk;
 
+struct clk_mapping {
+	phys_addr_t		phys;
+	void __iomem		*base;
+	unsigned long		len;
+	struct kref		ref;
+};
+
 struct clk_ops {
 	void (*init)(struct clk *clk);
 	int (*enable)(struct clk *clk);
@@ -42,6 +51,7 @@ struct clk {
 	unsigned long		arch_flags;
 	void			*priv;
 	struct dentry		*dentry;
+	struct clk_mapping	*mapping;
 	struct cpufreq_frequency_table *freq_table;
 };
 
-- 
cgit v1.2.3


From be70e2671b95a8982ff133ebaafff6399ad393d4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Thu, 14 Oct 2010 11:58:20 +0200
Subject: dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG
 not set

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
---
 include/linux/dynamic_debug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 52c0da4bdd18..81bc20e36ec0 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -80,7 +80,7 @@ static inline int ddebug_remove_module(const char *mod)
 
 #define dynamic_pr_debug(fmt, ...)					\
 	do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
-#define dynamic_dev_dbg(dev, format, ...)				\
+#define dynamic_dev_dbg(dev, fmt, ...)					\
 	do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
 #endif
 
-- 
cgit v1.2.3


From f586903d27e2503a3e7d427b3d665bbaf1b7f4d4 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 15 Oct 2010 18:17:35 +0900
Subject: sh: clkfwk: Abstract rate rounding helper.

Presently the only assisted rate rounding is frequency table backed, but
there are cases where it's impractical to use a frequency table for
certain clocks (such as the FSIDIV case, which supports 65535 divisors),
and we wish to reuse the same rate rounding algorithm.

This breaks out the core of the rate rounding logic in to its own helper
routine and shuffles the frequency table logic around, switching to using
an iterator for the generic helper routine.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk.c       | 66 +++++++++++++++++++++++++++++++++++++++-----------
 include/linux/sh_clk.h |  1 +
 2 files changed, 53 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/sh/clk.c b/drivers/sh/clk.c
index 813d97cdad49..3ac6fa0005b9 100644
--- a/drivers/sh/clk.c
+++ b/drivers/sh/clk.c
@@ -45,6 +45,8 @@ void clk_rate_table_build(struct clk *clk,
 	unsigned long freq;
 	int i;
 
+	clk->nr_freqs = nr_freqs;
+
 	for (i = 0; i < nr_freqs; i++) {
 		div = 1;
 		mult = 1;
@@ -69,30 +71,39 @@ void clk_rate_table_build(struct clk *clk,
 	freq_table[i].frequency = CPUFREQ_TABLE_END;
 }
 
-long clk_rate_table_round(struct clk *clk,
-			  struct cpufreq_frequency_table *freq_table,
-			  unsigned long rate)
+struct clk_rate_round_data;
+
+struct clk_rate_round_data {
+	unsigned long rate;
+	unsigned int min, max;
+	long (*func)(unsigned int pos, struct clk_rate_round_data *arg);
+	void *arg;
+};
+
+#define for_each_frequency(pos, r, freq)			\
+	for (pos = r->min, freq = r->func(pos, r->arg);		\
+	     pos < r->max; pos++, freq = r->func(pos, r))	\
+		if (unlikely(freq == 0))			\
+			;					\
+		else
+
+static long clk_rate_round_helper(struct clk_rate_round_data *rounder)
 {
 	unsigned long rate_error, rate_error_prev = ~0UL;
-	unsigned long rate_best_fit = rate;
-	unsigned long highest, lowest;
+	unsigned long rate_best_fit = rounder->rate;
+	unsigned long highest, lowest, freq;
 	int i;
 
 	highest = 0;
 	lowest = ~0UL;
 
-	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		unsigned long freq = freq_table[i].frequency;
-
-		if (freq == CPUFREQ_ENTRY_INVALID)
-			continue;
-
+	for_each_frequency(i, rounder, freq) {
 		if (freq > highest)
 			highest = freq;
 		if (freq < lowest)
 			lowest = freq;
 
-		rate_error = abs(freq - rate);
+		rate_error = abs(freq - rounder->rate);
 		if (rate_error < rate_error_prev) {
 			rate_best_fit = freq;
 			rate_error_prev = rate_error;
@@ -102,14 +113,41 @@ long clk_rate_table_round(struct clk *clk,
 			break;
 	}
 
-	if (rate >= highest)
+	if (rounder->rate >= highest)
 		rate_best_fit = highest;
-	if (rate <= lowest)
+	if (rounder->rate <= lowest)
 		rate_best_fit = lowest;
 
 	return rate_best_fit;
 }
 
+static long clk_rate_table_iter(unsigned int pos,
+				struct clk_rate_round_data *rounder)
+{
+	struct cpufreq_frequency_table *freq_table = rounder->arg;
+	unsigned long freq = freq_table[pos].frequency;
+
+	if (freq == CPUFREQ_ENTRY_INVALID)
+		freq = 0;
+
+	return freq;
+}
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate)
+{
+	struct clk_rate_round_data table_round = {
+		.min	= 0,
+		.max	= clk->nr_freqs,
+		.func	= clk_rate_table_iter,
+		.arg	= freq_table,
+		.rate	= rate,
+	};
+
+	return clk_rate_round_helper(&table_round);
+}
+
 int clk_rate_table_find(struct clk *clk,
 			struct cpufreq_frequency_table *freq_table,
 			unsigned long rate)
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 8ae37707a4a4..49f6e9b6eda2 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -53,6 +53,7 @@ struct clk {
 	struct dentry		*dentry;
 	struct clk_mapping	*mapping;
 	struct cpufreq_frequency_table *freq_table;
+	unsigned int		nr_freqs;
 };
 
 #define CLK_ENABLE_ON_INIT	(1 << 0)
-- 
cgit v1.2.3


From 8e122db61c98debbc35e26dd29504958cbcf2cbb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 15 Oct 2010 18:33:24 +0900
Subject: sh: clkfwk: Add a helper for rate rounding by divisor ranges.

This adds a new clk_rate_div_range_round() for implementing rate rounding
by divisor ranges. This can be used trivially by clocks that support
arbitrary ranged divisors without the need for rate table construction.

This should only be used by clocks that both have large divisor ranges in
addition to clocks that will never be arbitrarily scaled, as the lack of
a backing frequency table will prevent cpufreq from being able to do much
of anything with them.

Primarily intended for use as a ->recalc helper.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk.c       | 22 +++++++++++++++++++++-
 include/linux/sh_clk.h |  3 +++
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/sh/clk.c b/drivers/sh/clk.c
index 3ac6fa0005b9..018be37ef339 100644
--- a/drivers/sh/clk.c
+++ b/drivers/sh/clk.c
@@ -76,7 +76,7 @@ struct clk_rate_round_data;
 struct clk_rate_round_data {
 	unsigned long rate;
 	unsigned int min, max;
-	long (*func)(unsigned int pos, struct clk_rate_round_data *arg);
+	long (*func)(unsigned int, struct clk_rate_round_data *);
 	void *arg;
 };
 
@@ -148,6 +148,26 @@ long clk_rate_table_round(struct clk *clk,
 	return clk_rate_round_helper(&table_round);
 }
 
+static long clk_rate_div_range_iter(unsigned int pos,
+				    struct clk_rate_round_data *rounder)
+{
+	return clk_get_rate(rounder->arg) / pos;
+}
+
+long clk_rate_div_range_round(struct clk *clk, unsigned int div_min,
+			      unsigned int div_max, unsigned long rate)
+{
+	struct clk_rate_round_data div_range_round = {
+		.min	= div_min,
+		.max	= div_max,
+		.func	= clk_rate_div_range_iter,
+		.arg	= clk_get_parent(clk),
+		.rate	= rate,
+	};
+
+	return clk_rate_round_helper(&div_range_round);
+}
+
 int clk_rate_table_find(struct clk *clk,
 			struct cpufreq_frequency_table *freq_table,
 			unsigned long rate)
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 49f6e9b6eda2..4dca992f3093 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -119,6 +119,9 @@ int clk_rate_table_find(struct clk *clk,
 			struct cpufreq_frequency_table *freq_table,
 			unsigned long rate);
 
+long clk_rate_div_range_round(struct clk *clk, unsigned int div_min,
+			      unsigned int div_max, unsigned long rate);
+
 #define SH_CLK_MSTP32(_parent, _enable_reg, _enable_bit, _flags)	\
 {									\
 	.parent		= _parent,					\
-- 
cgit v1.2.3


From b3b3a9b63f2deacfd59137e3781211d21a568ca9 Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Thu, 14 Oct 2010 11:31:42 -0400
Subject: oprofile: fix linker errors

Commit e9677b3ce (oprofile, ARM: Use oprofile_arch_exit() to
cleanup on failure) caused oprofile_perf_exit to be called
in the cleanup path of oprofile_perf_init. The __exit tag
for oprofile_perf_exit should therefore be dropped.

The same has to be done for exit_driverfs as well, as this
function is called from oprofile_perf_exit. Else, we get
the following two linker errors.

  LD      .tmp_vmlinux1
`oprofile_perf_exit' referenced in section `.init.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o
make: *** [.tmp_vmlinux1] Error 1

  LD      .tmp_vmlinux1
`exit_driverfs' referenced in section `.text' of arch/arm/oprofile/built-in.o: defined in discarded section `.exit.text' of arch/arm/oprofile/built-in.o
make: *** [.tmp_vmlinux1] Error 1

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/oprofile_perf.c | 2 +-
 include/linux/oprofile.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
index e707a72a3799..36ec67e3d91d 100644
--- a/drivers/oprofile/oprofile_perf.c
+++ b/drivers/oprofile/oprofile_perf.c
@@ -236,7 +236,7 @@ static int __init init_driverfs(void)
 	return ret;
 }
 
-static void __exit exit_driverfs(void)
+static void exit_driverfs(void)
 {
 	platform_device_unregister(oprofile_pdev);
 	platform_driver_unregister(&oprofile_driver);
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index d67a8330b41e..32fb81212fd1 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -188,7 +188,7 @@ int oprofile_write_commit(struct op_entry *entry);
 
 #ifdef CONFIG_PERF_EVENTS
 int __init oprofile_perf_init(struct oprofile_operations *ops);
-void __exit oprofile_perf_exit(void);
+void oprofile_perf_exit(void);
 char *op_name_from_perf_id(void);
 #endif /* CONFIG_PERF_EVENTS */
 
-- 
cgit v1.2.3


From 5dbfe7aedf54aa7f62fd659e34371d4ea0e7bffe Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Fri, 15 Oct 2010 09:52:46 +0200
Subject: drbd: add race-breaker to drbd_go_diskless

This adds a necessary race breaker to these commits:
    drbd: fix for possible deadlock on IO error during resync
    drbd: drop wrong debug asserts, fix recently introduced race

What we do is get a refcount, check the state, then depending on the
state and the requested minimum disk state, either hold it (success),
or give it back immediately (failed "try lock").

Some code paths (flushing of drbd metadata) may still grab and hold a
refcount even if we are D_FAILED (application IO won't).
So even if we hit local_cnt == 0 once after being D_FAILED,
we still need to wait for that again after we changed to D_DISKLESS.
Once local_cnt reaches 0 while we are D_DISKLESS, we can be sure that
no one will look at the protected members anymore, so only then is it
safe to free them.

We cannot easily convert to standard locking primitives here, as we want
to be able to use it in atomic context (we always do a "try lock"),
as well as hold references for a "long time" (from IO submission to
completion callback).

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c | 3 +++
 include/linux/drbd.h           | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index bbe3bff2cad6..8bfedc7164fa 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3763,6 +3763,9 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
 	 * the protected members anymore, though, so in the after_state_ch work
 	 * it will be safe to free them. */
 	drbd_force_state(mdev, NS(disk, D_DISKLESS));
+	/* We need to wait for return of references checked out while we still
+	 * have been D_FAILED, though (drbd_md_sync, bitmap io). */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
 	clear_bit(GO_DISKLESS, &mdev->flags);
 	return 1;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index da7d9bd4f3f0..9b2a0158f399 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.9rc1"
+#define REL_VERSION "8.3.9rc2"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 95
-- 
cgit v1.2.3


From 495d2b3883682fcd1c3dee3a45e38fd00154ae25 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 15 Oct 2010 15:49:20 +0200
Subject: block: Make the integrity mapped property a bio flag

Previously we tracked whether the integrity metadata had been remapped
using a request flag. This was fine for low-level retries. However, if
an I/O was redriven by upper layers we would end up remapping again,
causing the retry to fail.

Deprecate the REQ_INTEGRITY flag and introduce BIO_MAPPED_INTEGRITY
which enables filesystems to notify lower layers that the bio in
question has already been remapped.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/scsi/sd_dif.c     | 11 ++++++-----
 include/linux/blk_types.h |  3 +--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 84be62149c6c..0cb39ff21171 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -375,21 +375,20 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
 	unsigned int i, j;
 	u32 phys, virt;
 
-	/* Already remapped? */
-	if (rq->cmd_flags & REQ_INTEGRITY)
-		return 0;
-
 	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
 
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
 		return 0;
 
-	rq->cmd_flags |= REQ_INTEGRITY;
 	phys = hw_sector & 0xffffffff;
 
 	__rq_for_each_bio(bio, rq) {
 		struct bio_vec *iv;
 
+		/* Already remapped? */
+		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
+			break;
+
 		virt = bio->bi_integrity->bip_sector & 0xffffffff;
 
 		bip_for_each_vec(iv, bio->bi_integrity, i) {
@@ -408,6 +407,8 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
 
 			kunmap_atomic(sdt, KM_USER0);
 		}
+
+		bio->bi_flags |= BIO_MAPPED_INTEGRITY;
 	}
 
 	return 0;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 10a0c291b55a..d36629620a4f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -97,6 +97,7 @@ struct bio {
 #define BIO_NULL_MAPPED 9	/* contains invalid user pages */
 #define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
 #define BIO_QUIET	11	/* Make BIO Quiet */
+#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
@@ -148,7 +149,6 @@ enum rq_flag_bits {
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_COPY_USER,	/* contains copies of user pages */
-	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
 	__REQ_FLUSH,		/* request for cache flush */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
@@ -190,7 +190,6 @@ enum rq_flag_bits {
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_ALLOCED		(1 << __REQ_ALLOCED)
 #define REQ_COPY_USER		(1 << __REQ_COPY_USER)
-#define REQ_INTEGRITY		(1 << __REQ_INTEGRITY)
 #define REQ_FLUSH		(1 << __REQ_FLUSH)
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
-- 
cgit v1.2.3


From 40ffa93791985ab300fd488072e9f37ccf72e88c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Oct 2010 21:08:14 +0200
Subject: x86: Remove stale pmtimer_64.c

This file is unused since the apic unification in 2.6.29, but nobody
noticed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile     |  1 -
 arch/x86/kernel/pmtimer_64.c | 69 --------------------------------------------
 include/linux/acpi_pmtmr.h   |  2 --
 3 files changed, 72 deletions(-)
 delete mode 100644 arch/x86/kernel/pmtimer_64.c

(limited to 'include')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0925676266bd..dd9a2e459c78 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -120,7 +120,6 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
-	obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
 	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
deleted file mode 100644
index b112406f1996..000000000000
--- a/arch/x86/kernel/pmtimer_64.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Ported over from i386 by AK, original copyright was:
- *
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- *
- * Dropped all the hardware bug workarounds for now. Hopefully they
- * are not needed on 64bit chipsets.
- */
-
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/cpumask.h>
-#include <linux/acpi_pmtmr.h>
-
-#include <asm/io.h>
-#include <asm/proto.h>
-#include <asm/msr.h>
-#include <asm/vsyscall.h>
-
-static inline u32 cyc2us(u32 cycles)
-{
-	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
-	 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
-	 *
-	 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
-	 * easily be multiplied with 286 (=0x11E) without having to fear
-	 * u32 overflows.
-	 */
-	cycles *= 286;
-	return (cycles >> 10);
-}
-
-static unsigned pmtimer_wait_tick(void)
-{
-	u32 a, b;
-	for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
-	     a == b;
-	     b = inl(pmtmr_ioport) & ACPI_PM_MASK)
-		cpu_relax();
-	return b;
-}
-
-/* note: wait time is rounded up to one tick */
-void pmtimer_wait(unsigned us)
-{
-	u32 a, b;
-	a = pmtimer_wait_tick();
-	do {
-		b = inl(pmtmr_ioport);
-		cpu_relax();
-	} while (cyc2us(b - a) < us);
-}
-
-static int __init nopmtimer_setup(char *s)
-{
-	pmtmr_ioport = 0;
-	return 1;
-}
-
-__setup("nopmtimer", nopmtimer_setup);
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h
index 7e3d2859be50..1d0ef1ae8036 100644
--- a/include/linux/acpi_pmtmr.h
+++ b/include/linux/acpi_pmtmr.h
@@ -25,8 +25,6 @@ static inline u32 acpi_pm_read_early(void)
 	return acpi_pm_read_verified() & ACPI_PM_MASK;
 }
 
-extern void pmtimer_wait(unsigned);
-
 #else
 
 static inline u32 acpi_pm_read_early(void)
-- 
cgit v1.2.3


From 42b219322a97ccef347388b233aceaafe3fa517d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 2 Sep 2010 14:28:51 -0700
Subject: PCI: pci_driver make name const

The name field in pci_driver should be const, it is not
modified by PCI subsystem.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index c8d95e369ff4..30faf4f3db0b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -541,7 +541,7 @@ struct pci_error_handlers {
 struct module;
 struct pci_driver {
 	struct list_head node;
-	char *name;
+	const char *name;
 	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
-- 
cgit v1.2.3


From 66db60eaf158aa953651d03e43e931e757e87262 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 21 Sep 2010 13:54:39 -0400
Subject: PCI: add quirk for non-symmetric-mode irq routing to versions 0 and 4
 of the MCP55 northbridge

A long time ago I worked on a RHEL5 bug in which kdump hung during boot
on a set of systems.  The systems hung because they never received timer
interrupts during calibrate_delay.  These systems also all had Opteron
processors on a hypertransport bus, bridged to a pci bus via an Nvidia
MCP55 northbridge chip.  After much wrangling I managed to learn from
Nvidia that they have an undocumented register in some versions of that
chip which control how legacy interrupts are send to the cpu complex
when the ioapic isn't active.  Nvidia defaults this register to only
send legacy interrupts to the BSP, so if kdump happens to boot on an AP,
we never get timer interrupts and boom.  I had initially used this quirk
as a workaround, with my intent being to move apic initalization to an
earlier point in the boot process, so the setting of the register would
be irrelevant.  Given the work involved in doing that however, the
fragile nature of the apic initalization code, and the fact that, over
the 2 years since we found this bug, the MCP55 is the only chip which
seems to have this issue, I've figure at this point its likely safer to
just carry the quirk around.  By setting the referenced bits in this
hidden register, interrupts will be broadcast to all cpus when the
ioapic isn't active on the above described systems.

Acked-by: Simon Horman <horms@verge.net.au>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/quirks.c    | 31 +++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 857ae01734a6..034430690a5b 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2296,6 +2296,37 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
 			PCI_DEVICE_ID_NVIDIA_NVENET_15,
 			nvenet_msi_disable);
 
+/*
+ * Some versions of the MCP55 bridge from nvidia have a legacy irq routing
+ * config register.  This register controls the routing of legacy interrupts
+ * from devices that route through the MCP55.  If this register is misprogramed
+ * interrupts are only sent to the bsp, unlike conventional systems where the
+ * irq is broadxast to all online cpus.  Not having this register set
+ * properly prevents kdump from booting up properly, so lets make sure that
+ * we have it set correctly.
+ * Note this is an undocumented register.
+ */
+static void __devinit nvbridge_check_legacy_irq_routing(struct pci_dev *dev)
+{
+	u32 cfg;
+
+	pci_read_config_dword(dev, 0x74, &cfg);
+
+	if (cfg & ((1 << 2) | (1 << 15))) {
+		printk(KERN_INFO "Rewriting irq routing register on MCP55\n");
+		cfg &= ~((1 << 2) | (1 << 15));
+		pci_write_config_dword(dev, 0x74, cfg);
+	}
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+			PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V0,
+			nvbridge_check_legacy_irq_routing);
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+			PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4,
+			nvbridge_check_legacy_irq_routing);
+
 static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
 {
 	int pos, ttl = 48;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 570fddeb0388..dc2827723c1e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1249,6 +1249,8 @@
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2    0x0348
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000       0x034C
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100         0x034E
+#define PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V0	    0x0360
+#define PCI_DEVICE_ID_NVIDIA_MCP55_BRIDGE_V4	    0x0364
 #define PCI_DEVICE_ID_NVIDIA_NVENET_15              0x0373
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA      0x03E7
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SMBUS	    0x03EB
-- 
cgit v1.2.3


From 25143fd1270d28782ae0620aa86ef5f8c14030fd Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Fri, 10 Sep 2010 16:36:39 -0700
Subject: x86/PCI: irq and pci_ids patch for Intel Patsburg DeviceIDs

This patch adds the LPC Controller DeviceIDs for the Intel Patsburg PCH.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 1 +
 include/linux/pci_ids.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index f547ee05f715..ee7fc8fc8a83 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -589,6 +589,7 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
+	case PCI_DEVICE_ID_INTEL_PBG_LPC:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index dc2827723c1e..b9ff2801cf76 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2436,6 +2436,7 @@
 #define PCI_DEVICE_ID_INTEL_CPT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_CPT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_CPT_LPC_MAX	0x1c5f
+#define PCI_DEVICE_ID_INTEL_PBG_LPC	0x1d40
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
-- 
cgit v1.2.3


From 7473fbf4a016301bfa3faa4f81c9a9c978330359 Mon Sep 17 00:00:00 2001
From: Anders Wallin <anders.wallin@windriver.com>
Date: Thu, 23 Sep 2010 19:39:04 +0200
Subject: PCI: add PCI vendor id for STmicroelectronics

Signed-off-by: Anders Wallin <anders.wallin@windriver.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b9ff2801cf76..ea5a3d19aaba 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -765,6 +765,8 @@
 #define PCI_DEVICE_ID_ELSA_MICROLINK	0x1000
 #define PCI_DEVICE_ID_ELSA_QS3000	0x3000
 
+#define PCI_VENDOR_ID_STMICRO		0x104A
+
 #define PCI_VENDOR_ID_BUSLOGIC		      0x104B
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER    0x1040
-- 
cgit v1.2.3


From 761afb869f649ea23e2dea7bfe9b550d3a1b7631 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 14 Oct 2010 23:24:13 +0200
Subject: ACPI / PM: Fix problems with acpi_pm_device_sleep_state()

There is a number of problems with acpi_pm_device_sleep_state() now.
First, if _S0W is not defined, it prevents devices from being put
into D3 by PCI runtime PM, which shouldn't happen.  Second, it
shouldn't use adev->wakeup.state.enabled, because if it's set, it
only means that either the device is permanently enabled to wake up
the system, or that it has been enabled to do that through
/proc/acpi/wakeup.  Finally, it should be compiled if CONFIG_PM_SLEEP
is not set, so that PCI runtime PM works correctly in that case.
Fix these problems.

Reported-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/sleep.c    | 16 ++++++++++------
 include/acpi/acpi_bus.h | 12 ++++++++----
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 4754ff6e70e6..e807f4196f89 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -27,6 +27,8 @@
 
 u8 sleep_states[ACPI_S_STATE_COUNT];
 
+static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+
 static void acpi_sleep_tts_switch(u32 acpi_state)
 {
 	union acpi_object in_arg = { ACPI_TYPE_INTEGER };
@@ -79,8 +81,6 @@ static int acpi_sleep_prepare(u32 acpi_state)
 }
 
 #ifdef CONFIG_ACPI_SLEEP
-static u32 acpi_target_sleep_state = ACPI_STATE_S0;
-
 /*
  * The ACPI specification wants us to save NVS memory regions during hibernation
  * and to restore them during the subsequent resume.  Windows does that also for
@@ -562,7 +562,7 @@ int acpi_suspend(u32 acpi_state)
 	return -EINVAL;
 }
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_OPS
 /**
  *	acpi_pm_device_sleep_state - return preferred power state of ACPI device
  *		in the system sleep state given by %acpi_target_sleep_state
@@ -624,7 +624,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
 	 * can wake the system.  _S0W may be valid, too.
 	 */
 	if (acpi_target_sleep_state == ACPI_STATE_S0 ||
-	    (device_may_wakeup(dev) && adev->wakeup.state.enabled &&
+	    (device_may_wakeup(dev) &&
 	     adev->wakeup.sleep_state <= acpi_target_sleep_state)) {
 		acpi_status status;
 
@@ -632,7 +632,9 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
 		status = acpi_evaluate_integer(handle, acpi_method, NULL,
 						&d_max);
 		if (ACPI_FAILURE(status)) {
-			d_max = d_min;
+			if (acpi_target_sleep_state != ACPI_STATE_S0 ||
+			    status != AE_NOT_FOUND)
+				d_max = d_min;
 		} else if (d_max < d_min) {
 			/* Warn the user of the broken DSDT */
 			printk(KERN_WARNING "ACPI: Wrong value from %s\n",
@@ -646,7 +648,9 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
 		*d_min_p = d_min;
 	return d_max;
 }
+#endif /* CONFIG_PM_OPS */
 
+#ifdef CONFIG_PM_SLEEP
 /**
  *	acpi_pm_device_sleep_wake - enable or disable the system wake-up
  *                                  capability of given device
@@ -677,7 +681,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
 
 	return error;
 }
-#endif
+#endif  /* CONFIG_PM_SLEEP */
 
 static void acpi_power_off_prepare(void)
 {
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 4de84ce3a927..8ef68a16aa03 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -389,21 +389,25 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_OPS
 int acpi_pm_device_sleep_state(struct device *, int *);
-int acpi_pm_device_sleep_wake(struct device *, bool);
-#else /* !CONFIG_PM_SLEEP */
+#else
 static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
 {
 	if (p)
 		*p = ACPI_STATE_D0;
 	return ACPI_STATE_D3;
 }
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+int acpi_pm_device_sleep_wake(struct device *, bool);
+#else
 static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
 {
 	return -ENODEV;
 }
-#endif /* !CONFIG_PM_SLEEP */
+#endif
 
 #endif				/* CONFIG_ACPI */
 
-- 
cgit v1.2.3


From 79b5dc0c64d88cda3da23b2e22a5cec0964372ac Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 15 Oct 2010 14:34:14 -0700
Subject: types.h: define __aligned_u64 and expose to userspace

We currently have a kernel internal type called aligned_u64 which aligns
__u64's on 8 bytes boundaries even on systems which would normally align
them on 4 byte boundaries.  This patch creates a new type __aligned_u64
which does the same thing but which is exposed to userspace rather than
being kernel internal.

[akpm: merge early as both the net and audit trees want this]

[akpm@linux-foundation.org: enhance the comment describing the reasons for using aligned_u64.  Via Andreas and Andi.]
Based-on-patch-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Cc: David Miller <davem@davemloft.net>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/types.h b/include/linux/types.h
index 01a082f56ef4..357dbc19606f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -121,7 +121,15 @@ typedef		__u64		u_int64_t;
 typedef		__s64		int64_t;
 #endif
 
-/* this is a special 64bit data type that is 8-byte aligned */
+/*
+ * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
+ * common 32/64-bit compat problems.
+ * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
+ * architectures) and to 8-byte boundaries on 64-bit architetures.  The new
+ * aligned_64 type enforces 8-byte alignment so that structs containing
+ * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
+ * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
+ */
 #define aligned_u64 __u64 __attribute__((aligned(8)))
 #define aligned_be64 __be64 __attribute__((aligned(8)))
 #define aligned_le64 __le64 __attribute__((aligned(8)))
@@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64;
 typedef __u16 __bitwise __sum16;
 typedef __u32 __bitwise __wsum;
 
+/* this is a special 64bit data type that is 8-byte aligned */
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
 #ifdef __KERNEL__
 typedef unsigned __bitwise__ gfp_t;
 typedef unsigned __bitwise__ fmode_t;
-- 
cgit v1.2.3


From 564824b0c52c34692d804bb6ea214451615b0b50 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 19:05:25 +0000
Subject: net: allocate skbs on local node

commit b30973f877 (node-aware skb allocation) spread a wrong habit of
allocating net drivers skbs on a given memory node : The one closest to
the NIC hardware. This is wrong because as soon as we try to scale
network stack, we need to use many cpus to handle traffic and hit
slub/slab management on cross-node allocations/frees when these cpus
have to alloc/free skbs bound to a central node.

skb allocated in RX path are ephemeral, they have a very short
lifetime : Extra cost to maintain NUMA affinity is too expensive. What
appeared as a nice idea four years ago is in fact a bad one.

In 2010, NIC hardwares are multiqueue, or we use RPS to spread the load,
and two 10Gb NIC might deliver more than 28 million packets per second,
needing all the available cpus.

Cost of cross-node handling in network and vm stacks outperforms the
small benefit hardware had when doing its DMA transfert in its 'local'
memory node at RX time. Even trying to differentiate the two allocations
done for one skb (the sk_buff on local node, the data part on NIC
hardware node) is not enough to bring good performance.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 20 ++++++++++++++++----
 net/core/skbuff.c      | 13 +------------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0b53c43ac92e..05a358f1ba11 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -496,13 +496,13 @@ extern struct sk_buff *__alloc_skb(unsigned int size,
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
-	return __alloc_skb(size, priority, 0, -1);
+	return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-	return __alloc_skb(size, priority, 1, -1);
+	return __alloc_skb(size, priority, 1, NUMA_NO_NODE);
 }
 
 extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
@@ -1563,13 +1563,25 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return skb;
 }
 
-extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask);
+/**
+ *	__netdev_alloc_page - allocate a page for ps-rx on a specific device
+ *	@dev: network device to receive on
+ *	@gfp_mask: alloc_pages_node mask
+ *
+ * 	Allocate a new page. dev currently unused.
+ *
+ * 	%NULL is returned if there is no free memory.
+ */
+static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
+{
+	return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0);
+}
 
 /**
  *	netdev_alloc_page - allocate a page for ps-rx on a specific device
  *	@dev: network device to receive on
  *
- * 	Allocate a new page node local to the specified device.
+ * 	Allocate a new page. dev currently unused.
  *
  * 	%NULL is returned if there is no free memory.
  */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c1972b3a7..4e8b82e167d8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -247,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 		unsigned int length, gfp_t gfp_mask)
 {
-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 	struct sk_buff *skb;
 
-	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
 		skb->dev = dev;
@@ -259,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 }
 EXPORT_SYMBOL(__netdev_alloc_skb);
 
-struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
-	struct page *page;
-
-	page = alloc_pages_node(node, gfp_mask, 0);
-	return page;
-}
-EXPORT_SYMBOL(__netdev_alloc_page);
-
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		int size)
 {
-- 
cgit v1.2.3


From 31e3c3f6f1f9b154981a0e6620df700463db30ee Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 13 Oct 2010 13:20:35 +0000
Subject: tipc: cleanup function namespace

Do some cleanups of TIPC based on make namespacecheck
  1. Don't export unused symbols
  2. Eliminate dead code
  3. Make functions and variables local
  4. Rename buf_acquire to tipc_buf_acquire since it is used in several files

Compile tested only.
This make break out of tree kernel modules that depend on TIPC routines.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tipc/tipc.h      |  71 -------------
 include/net/tipc/tipc_port.h |   2 -
 net/tipc/addr.c              |   5 -
 net/tipc/bcast.c             |  10 +-
 net/tipc/bcast.h             |   3 -
 net/tipc/cluster.c           |  21 +---
 net/tipc/cluster.h           |   2 +-
 net/tipc/config.c            |   7 +-
 net/tipc/config.h            |   6 --
 net/tipc/core.c              |  28 ++----
 net/tipc/core.h              |   9 +-
 net/tipc/dbg.c               |  13 ++-
 net/tipc/dbg.h               |   3 -
 net/tipc/discover.c          |  16 +--
 net/tipc/discover.h          |   2 -
 net/tipc/link.c              |  45 +++++----
 net/tipc/link.h              |   4 -
 net/tipc/msg.c               |   2 +-
 net/tipc/name_distr.c        |   2 +-
 net/tipc/node.c              |  19 +---
 net/tipc/node.h              |   1 -
 net/tipc/port.c              | 234 +++++--------------------------------------
 net/tipc/port.h              |   2 -
 net/tipc/ref.c               |  17 ----
 net/tipc/ref.h               |   1 -
 net/tipc/subscr.c            |   9 --
 net/tipc/zone.c              |  11 --
 net/tipc/zone.h              |   1 -
 28 files changed, 84 insertions(+), 462 deletions(-)

(limited to 'include')

diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h
index 15af6dca0b49..1e0645e1eed2 100644
--- a/include/net/tipc/tipc.h
+++ b/include/net/tipc/tipc.h
@@ -50,8 +50,6 @@
  * TIPC operating mode routines
  */
 
-u32 tipc_get_addr(void);
-
 #define TIPC_NOT_RUNNING  0
 #define TIPC_NODE_MODE    1
 #define TIPC_NET_MODE     2
@@ -62,8 +60,6 @@ int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle);
 
 void tipc_detach(unsigned int userref);
 
-int tipc_get_mode(void);
-
 /*
  * TIPC port manipulation routines
  */
@@ -153,12 +149,6 @@ int tipc_disconnect(u32 portref);
 
 int tipc_shutdown(u32 ref);
 
-int tipc_isconnected(u32 portref, int *isconnected);
-
-int tipc_peer(u32 portref, struct tipc_portid *peer);
-
-int tipc_ref_valid(u32 portref); 
-
 /*
  * TIPC messaging routines
  */
@@ -170,38 +160,12 @@ int tipc_send(u32 portref,
 	      unsigned int num_sect,
 	      struct iovec const *msg_sect);
 
-int tipc_send_buf(u32 portref,
-		  struct sk_buff *buf,
-		  unsigned int dsz);
-
 int tipc_send2name(u32 portref, 
 		   struct tipc_name const *name, 
 		   u32 domain,
 		   unsigned int num_sect,
 		   struct iovec const *msg_sect);
 
-int tipc_send_buf2name(u32 portref,
-		       struct tipc_name const *name,
-		       u32 domain,
-		       struct sk_buff *buf,
-		       unsigned int dsz);
-
-int tipc_forward2name(u32 portref, 
-		      struct tipc_name const *name, 
-		      u32 domain,
-		      unsigned int section_count,
-		      struct iovec const *msg_sect,
-		      struct tipc_portid const *origin,
-		      unsigned int importance);
-
-int tipc_forward_buf2name(u32 portref,
-			  struct tipc_name const *name,
-			  u32 domain,
-			  struct sk_buff *buf,
-			  unsigned int dsz,
-			  struct tipc_portid const *orig,
-			  unsigned int importance);
-
 int tipc_send2port(u32 portref,
 		   struct tipc_portid const *dest,
 		   unsigned int num_sect,
@@ -212,46 +176,11 @@ int tipc_send_buf2port(u32 portref,
 		       struct sk_buff *buf,
 		       unsigned int dsz);
 
-int tipc_forward2port(u32 portref,
-		      struct tipc_portid const *dest,
-		      unsigned int num_sect,
-		      struct iovec const *msg_sect,
-		      struct tipc_portid const *origin,
-		      unsigned int importance);
-
-int tipc_forward_buf2port(u32 portref,
-			  struct tipc_portid const *dest,
-			  struct sk_buff *buf,
-			  unsigned int dsz,
-			  struct tipc_portid const *orig,
-			  unsigned int importance);
-
 int tipc_multicast(u32 portref, 
 		   struct tipc_name_seq const *seq, 
 		   u32 domain,	/* currently unused */
 		   unsigned int section_count,
 		   struct iovec const *msg);
-
-#if 0
-int tipc_multicast_buf(u32 portref, 
-		       struct tipc_name_seq const *seq, 
-		       u32 domain,
-		       void *buf,
-		       unsigned int size);
-#endif
-
-/*
- * TIPC subscription routines
- */
-
-int tipc_ispublished(struct tipc_name const *name);
-
-/*
- * Get number of available nodes within specified domain (excluding own node)
- */
-
-unsigned int tipc_available_nodes(const u32 domain);
-
 #endif
 
 #endif
diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
index c54917cbfa48..1893aaf49426 100644
--- a/include/net/tipc/tipc_port.h
+++ b/include/net/tipc/tipc_port.h
@@ -88,8 +88,6 @@ void tipc_acknowledge(u32 port_ref,u32 ack);
 
 struct tipc_port *tipc_get_port(const u32 ref);
 
-void *tipc_get_handle(const u32 ref);
-
 /*
  * The following routines require that the port be locked on entry
  */
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 2ddc351b3be9..8a2e89bffde5 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -41,11 +41,6 @@
 #include "cluster.h"
 #include "net.h"
 
-u32 tipc_get_addr(void)
-{
-	return tipc_own_addr;
-}
-
 /**
  * tipc_addr_domain_valid - validates a network domain address
  *
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index ecfaac10d0b4..22a60fc98392 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -121,6 +121,9 @@ static DEFINE_SPINLOCK(bc_lock);
 
 const char tipc_bclink_name[] = "broadcast-link";
 
+static void tipc_nmap_diff(struct tipc_node_map *nm_a,
+			   struct tipc_node_map *nm_b,
+			   struct tipc_node_map *nm_diff);
 
 static u32 buf_seqno(struct sk_buff *buf)
 {
@@ -287,7 +290,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
 	if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to))
 		return;
 
-	buf = buf_acquire(INT_H_SIZE);
+	buf = tipc_buf_acquire(INT_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
 		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
@@ -871,8 +874,9 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
  * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
  */
 
-void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
-				  struct tipc_node_map *nm_diff)
+static void tipc_nmap_diff(struct tipc_node_map *nm_a,
+			   struct tipc_node_map *nm_b,
+			   struct tipc_node_map *nm_diff)
 {
 	int stop = ARRAY_SIZE(nm_a->map);
 	int w;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index e8c2b81658c7..011c03f0a4ab 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -84,9 +84,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
 }
 
-void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
-				  struct tipc_node_map *nm_diff);
-
 void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
 void tipc_port_list_free(struct port_list *pl_ptr);
 
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index e68f705381bc..7fea14b98b97 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -113,25 +113,6 @@ void tipc_cltr_delete(struct cluster *c_ptr)
 	kfree(c_ptr);
 }
 
-u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
-{
-	struct tipc_node *n_ptr;
-	u32 n_num = tipc_node(addr) + 1;
-
-	if (!c_ptr)
-		return addr;
-	for (; n_num <= c_ptr->highest_node; n_num++) {
-		n_ptr = c_ptr->nodes[n_num];
-		if (n_ptr && tipc_node_has_active_links(n_ptr))
-			return n_ptr->addr;
-	}
-	for (n_num = 1; n_num < tipc_node(addr); n_num++) {
-		n_ptr = c_ptr->nodes[n_num];
-		if (n_ptr && tipc_node_has_active_links(n_ptr))
-			return n_ptr->addr;
-	}
-	return 0;
-}
 
 void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 {
@@ -232,7 +213,7 @@ struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
 static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
 {
 	u32 size = INT_H_SIZE + data_size;
-	struct sk_buff *buf = buf_acquire(size);
+	struct sk_buff *buf = tipc_buf_acquire(size);
 	struct tipc_msg *msg;
 
 	if (buf) {
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 333efb0b9c44..32636d98c9c6 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -75,7 +75,7 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
 void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
-u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr);
+
 void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
 void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index c429b0d488a3..50a6133a3668 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -95,7 +95,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 	return 1;
 }
 
-struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
+static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
 {
 	struct sk_buff *buf;
 	__be32 value_net;
@@ -109,6 +109,11 @@ struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
 	return buf;
 }
 
+static struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
+{
+	return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
+}
+
 struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
 {
 	struct sk_buff *buf;
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 5cd7cc56c54d..481e12ece715 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -45,7 +45,6 @@
 struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
 int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 			void *tlv_data, int tlv_data_size);
-struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value);
 struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
 
 static inline struct sk_buff *tipc_cfg_reply_none(void)
@@ -53,11 +52,6 @@ static inline struct sk_buff *tipc_cfg_reply_none(void)
 	return tipc_cfg_reply_alloc(0);
 }
 
-static inline struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
-{
-	return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
-}
-
 static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
 {
 	return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 466b861dab91..c00530386e3b 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -96,13 +96,13 @@ int tipc_net_id;
 int tipc_remote_management;
 
 
-int tipc_get_mode(void)
+static int tipc_get_mode(void)
 {
 	return tipc_mode;
 }
 
 /**
- * buf_acquire - creates a TIPC message buffer
+ * tipc_buf_acquire - creates a TIPC message buffer
  * @size: message size (including TIPC header)
  *
  * Returns a new buffer with data pointers set to the specified size.
@@ -111,7 +111,7 @@ int tipc_get_mode(void)
  *       There may also be unrequested tailroom present at the buffer's end.
  */
 
-struct sk_buff *buf_acquire(u32 size)
+struct sk_buff *tipc_buf_acquire(u32 size)
 {
 	struct sk_buff *skb;
 	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
@@ -129,7 +129,7 @@ struct sk_buff *buf_acquire(u32 size)
  * tipc_core_stop_net - shut down TIPC networking sub-systems
  */
 
-void tipc_core_stop_net(void)
+static void tipc_core_stop_net(void)
 {
 	tipc_eth_media_stop();
 	tipc_net_stop();
@@ -154,7 +154,7 @@ int tipc_core_start_net(unsigned long addr)
  * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
  */
 
-void tipc_core_stop(void)
+static void tipc_core_stop(void)
 {
 	if (tipc_mode != TIPC_NODE_MODE)
 		return;
@@ -176,7 +176,7 @@ void tipc_core_stop(void)
  * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
  */
 
-int tipc_core_start(void)
+static int tipc_core_start(void)
 {
 	int res;
 
@@ -246,7 +246,6 @@ MODULE_VERSION(TIPC_MOD_VER);
 
 EXPORT_SYMBOL(tipc_attach);
 EXPORT_SYMBOL(tipc_detach);
-EXPORT_SYMBOL(tipc_get_addr);
 EXPORT_SYMBOL(tipc_get_mode);
 EXPORT_SYMBOL(tipc_createport);
 EXPORT_SYMBOL(tipc_deleteport);
@@ -262,23 +261,10 @@ EXPORT_SYMBOL(tipc_withdraw);
 EXPORT_SYMBOL(tipc_connect2port);
 EXPORT_SYMBOL(tipc_disconnect);
 EXPORT_SYMBOL(tipc_shutdown);
-EXPORT_SYMBOL(tipc_isconnected);
-EXPORT_SYMBOL(tipc_peer);
-EXPORT_SYMBOL(tipc_ref_valid);
 EXPORT_SYMBOL(tipc_send);
-EXPORT_SYMBOL(tipc_send_buf);
 EXPORT_SYMBOL(tipc_send2name);
-EXPORT_SYMBOL(tipc_forward2name);
-EXPORT_SYMBOL(tipc_send_buf2name);
-EXPORT_SYMBOL(tipc_forward_buf2name);
 EXPORT_SYMBOL(tipc_send2port);
-EXPORT_SYMBOL(tipc_forward2port);
-EXPORT_SYMBOL(tipc_send_buf2port);
-EXPORT_SYMBOL(tipc_forward_buf2port);
 EXPORT_SYMBOL(tipc_multicast);
-/* EXPORT_SYMBOL(tipc_multicast_buf); not available yet */
-EXPORT_SYMBOL(tipc_ispublished);
-EXPORT_SYMBOL(tipc_available_nodes);
 
 /* TIPC API for external bearers (see tipc_bearer.h) */
 
@@ -295,6 +281,4 @@ EXPORT_SYMBOL(tipc_createport_raw);
 EXPORT_SYMBOL(tipc_reject_msg);
 EXPORT_SYMBOL(tipc_send_buf_fast);
 EXPORT_SYMBOL(tipc_acknowledge);
-EXPORT_SYMBOL(tipc_get_port);
-EXPORT_SYMBOL(tipc_get_handle);
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 188799017abd..e19389e57227 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -83,9 +83,7 @@
  * Note: TIPC_LOG is configured to echo its output to the system console;
  *       user-defined buffers can be configured to do the same thing.
  */
-
 extern struct print_buf *const TIPC_NULL;
-extern struct print_buf *const TIPC_CONS;
 extern struct print_buf *const TIPC_LOG;
 
 void tipc_printf(struct print_buf *, const char *fmt, ...);
@@ -204,10 +202,7 @@ extern atomic_t tipc_user_count;
  * Routines available to privileged subsystems
  */
 
-extern int  tipc_core_start(void);
-extern void tipc_core_stop(void);
-extern int  tipc_core_start_net(unsigned long addr);
-extern void tipc_core_stop_net(void);
+extern int tipc_core_start_net(unsigned long);
 extern int  tipc_handler_start(void);
 extern void tipc_handler_stop(void);
 extern int  tipc_netlink_start(void);
@@ -328,7 +323,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
 	return (struct tipc_msg *)skb->data;
 }
 
-extern struct sk_buff *buf_acquire(u32 size);
+extern struct sk_buff *tipc_buf_acquire(u32 size);
 
 /**
  * buf_discard - frees a TIPC message buffer
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 6569d45bfb9a..46f51d208e5e 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 };
 struct print_buf *const TIPC_NULL = &null_buf;
 
 static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
-struct print_buf *const TIPC_CONS = &cons_buf;
+static struct print_buf *const TIPC_CONS = &cons_buf;
 
 static struct print_buf log_buf = { NULL, 0, NULL, 1 };
 struct print_buf *const TIPC_LOG = &log_buf;
@@ -76,6 +76,10 @@ struct print_buf *const TIPC_LOG = &log_buf;
 static char print_string[TIPC_PB_MAX_STR];
 static DEFINE_SPINLOCK(print_lock);
 
+static void tipc_printbuf_reset(struct print_buf *pb);
+static int  tipc_printbuf_empty(struct print_buf *pb);
+static void tipc_printbuf_move(struct print_buf *pb_to,
+			       struct print_buf *pb_from);
 
 #define FORMAT(PTR,LEN,FMT) \
 {\
@@ -116,7 +120,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
  * @pb: pointer to print buffer structure
  */
 
-void tipc_printbuf_reset(struct print_buf *pb)
+static void tipc_printbuf_reset(struct print_buf *pb)
 {
 	if (pb->buf) {
 		pb->crs = pb->buf;
@@ -132,7 +136,7 @@ void tipc_printbuf_reset(struct print_buf *pb)
  * Returns non-zero if print buffer is empty.
  */
 
-int tipc_printbuf_empty(struct print_buf *pb)
+static int tipc_printbuf_empty(struct print_buf *pb)
 {
 	return !pb->buf || (pb->crs == pb->buf);
 }
@@ -181,7 +185,8 @@ int tipc_printbuf_validate(struct print_buf *pb)
  * Source print buffer becomes empty if a successful move occurs.
  */
 
-void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from)
+static void tipc_printbuf_move(struct print_buf *pb_to,
+			       struct print_buf *pb_from)
 {
 	int len;
 
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index 5ef1bc8f64ef..3ba6ba8b434a 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -56,10 +56,7 @@ struct print_buf {
 #define TIPC_PB_MAX_STR 512	/* max printable string (with trailing NUL) */
 
 void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
-void tipc_printbuf_reset(struct print_buf *pb);
-int  tipc_printbuf_empty(struct print_buf *pb);
 int  tipc_printbuf_validate(struct print_buf *pb);
-void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
 
 int tipc_log_resize(int log_size);
 
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index dbd79c67d7c0..4a7cd3719b78 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -68,20 +68,6 @@ struct link_req {
 	unsigned int timer_intv;
 };
 
-
-/*
- * disc_lost_link(): A link has lost contact
- */
-
-void tipc_disc_link_event(u32 addr, char *name, int up)
-{
-	if (in_own_cluster(addr))
-		return;
-	/*
-	 * Code for inter cluster link setup here
-	 */
-}
-
 /**
  * tipc_disc_init_msg - initialize a link setup message
  * @type: message type (request or response)
@@ -95,7 +81,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 					  u32 dest_domain,
 					  struct bearer *b_ptr)
 {
-	struct sk_buff *buf = buf_acquire(DSC_H_SIZE);
+	struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE);
 	struct tipc_msg *msg;
 
 	if (buf) {
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 9d064c3639bf..f8e750636123 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -50,6 +50,4 @@ void tipc_disc_stop_link_req(struct link_req *req);
 
 void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
 
-void tipc_disc_link_event(u32 addr, char *name, int up);
-
 #endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 4be78ecf4a67..b31992ccd5d3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -112,6 +112,9 @@ static void link_state_event(struct link *l_ptr, u32 event);
 static void link_reset_statistics(struct link *l_ptr);
 static void link_print(struct link *l_ptr, struct print_buf *buf,
 		       const char *str);
+static void link_start(struct link *l_ptr);
+static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
+
 
 /*
  * Debugging code used by link routines only
@@ -442,7 +445,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
 	list_add_tail(&l_ptr->link_list, &b_ptr->links);
-	tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr);
+	tipc_k_signal((Handler)link_start, (unsigned long)l_ptr);
 
 	dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
 	    l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit);
@@ -482,9 +485,9 @@ void tipc_link_delete(struct link *l_ptr)
 	kfree(l_ptr);
 }
 
-void tipc_link_start(struct link *l_ptr)
+static void link_start(struct link *l_ptr)
 {
-	dbg("tipc_link_start %x\n", l_ptr);
+	dbg("link_start %x\n", l_ptr);
 	link_state_event(l_ptr, STARTING_EVT);
 }
 
@@ -1000,7 +1003,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 	/* Fragmentation needed ? */
 
 	if (size > max_packet)
-		return tipc_link_send_long_buf(l_ptr, buf);
+		return link_send_long_buf(l_ptr, buf);
 
 	/* Packet can be queued or sent: */
 
@@ -1036,7 +1039,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 		/* Try creating a new bundle */
 
 		if (size <= max_packet * 2 / 3) {
-			struct sk_buff *bundler = buf_acquire(max_packet);
+			struct sk_buff *bundler = tipc_buf_acquire(max_packet);
 			struct tipc_msg bundler_hdr;
 
 			if (bundler) {
@@ -1312,7 +1315,7 @@ again:
 
 	/* Prepare header of first fragment: */
 
-	buf_chain = buf = buf_acquire(max_pkt);
+	buf_chain = buf = tipc_buf_acquire(max_pkt);
 	if (!buf)
 		return -ENOMEM;
 	buf->next = NULL;
@@ -1369,7 +1372,7 @@ error:
 			msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
 			msg_set_fragm_no(&fragm_hdr, ++fragm_no);
 			prev = buf;
-			buf = buf_acquire(fragm_sz + INT_H_SIZE);
+			buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
 			if (!buf)
 				goto error;
 
@@ -2145,7 +2148,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
 		if (!l_ptr->proto_msg_queue) {
 			l_ptr->proto_msg_queue =
-				buf_acquire(sizeof(l_ptr->proto_msg));
+				tipc_buf_acquire(sizeof(l_ptr->proto_msg));
 		}
 		buf = l_ptr->proto_msg_queue;
 		if (!buf)
@@ -2159,7 +2162,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 
 	msg_dbg(msg, ">>");
 
-	buf = buf_acquire(msg_size);
+	buf = tipc_buf_acquire(msg_size);
 	if (!buf)
 		return;
 
@@ -2318,10 +2321,10 @@ exit:
  * tipc_link_tunnel(): Send one message via a link belonging to
  * another bearer. Owner node is locked.
  */
-void tipc_link_tunnel(struct link *l_ptr,
-		      struct tipc_msg *tunnel_hdr,
-		      struct tipc_msg  *msg,
-		      u32 selector)
+static void tipc_link_tunnel(struct link *l_ptr,
+			     struct tipc_msg *tunnel_hdr,
+			     struct tipc_msg  *msg,
+			     u32 selector)
 {
 	struct link *tunnel;
 	struct sk_buff *buf;
@@ -2334,7 +2337,7 @@ void tipc_link_tunnel(struct link *l_ptr,
 		return;
 	}
 	msg_set_size(tunnel_hdr, length + INT_H_SIZE);
-	buf = buf_acquire(length + INT_H_SIZE);
+	buf = tipc_buf_acquire(length + INT_H_SIZE);
 	if (!buf) {
 		warn("Link changeover error, "
 		     "unable to send tunnel msg\n");
@@ -2380,7 +2383,7 @@ void tipc_link_changeover(struct link *l_ptr)
 	if (!l_ptr->first_out) {
 		struct sk_buff *buf;
 
-		buf = buf_acquire(INT_H_SIZE);
+		buf = tipc_buf_acquire(INT_H_SIZE);
 		if (buf) {
 			skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
 			msg_set_size(&tunnel_hdr, INT_H_SIZE);
@@ -2441,7 +2444,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));	/* Update */
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
 		msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
-		outbuf = buf_acquire(length + INT_H_SIZE);
+		outbuf = tipc_buf_acquire(length + INT_H_SIZE);
 		if (outbuf == NULL) {
 			warn("Link changeover error, "
 			     "unable to send duplicate msg\n");
@@ -2477,7 +2480,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 	u32 size = msg_size(msg);
 	struct sk_buff *eb;
 
-	eb = buf_acquire(size);
+	eb = tipc_buf_acquire(size);
 	if (eb)
 		skb_copy_to_linear_data(eb, msg, size);
 	return eb;
@@ -2605,11 +2608,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 
 
 /*
- * tipc_link_send_long_buf: Entry for buffers needing fragmentation.
+ * link_send_long_buf: Entry for buffers needing fragmentation.
  * The buffer is complete, inclusive total message length.
  * Returns user data length.
  */
-int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
+static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 {
 	struct tipc_msg *inmsg = buf_msg(buf);
 	struct tipc_msg fragm_hdr;
@@ -2648,7 +2651,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 			fragm_sz = rest;
 			msg_set_type(&fragm_hdr, LAST_FRAGMENT);
 		}
-		fragm = buf_acquire(fragm_sz + INT_H_SIZE);
+		fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
 		if (fragm == NULL) {
 			warn("Link unable to fragment message\n");
 			dsz = -ENOMEM;
@@ -2753,7 +2756,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 			buf_discard(fbuf);
 			return 0;
 		}
-		pbuf = buf_acquire(msg_size(imsg));
+		pbuf = tipc_buf_acquire(msg_size(imsg));
 		if (pbuf != NULL) {
 			pbuf->next = *pending;
 			*pending = pbuf;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 4e944ef4a540..f98bc613de67 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -225,7 +225,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest);
 void tipc_link_reset_fragments(struct link *l_ptr);
 int tipc_link_is_up(struct link *l_ptr);
 int tipc_link_is_active(struct link *l_ptr);
-void tipc_link_start(struct link *l_ptr);
 u32 tipc_link_push_packet(struct link *l_ptr);
 void tipc_link_stop(struct link *l_ptr);
 struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
@@ -239,9 +238,6 @@ int tipc_link_send_sections_fast(struct port* sender,
 				 struct iovec const *msg_sect,
 				 const u32 num_sect,
 				 u32 destnode);
-int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
-void tipc_link_tunnel(struct link *l_ptr, struct tipc_msg *tnl_hdr,
-		      struct tipc_msg *msg, u32 selector);
 void tipc_link_recv_bundle(struct sk_buff *buf);
 int  tipc_link_recv_fragment(struct sk_buff **pending,
 			     struct sk_buff **fb,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 381063817b41..ecb532fb0351 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -112,7 +112,7 @@ int tipc_msg_build(struct tipc_msg *hdr,
 		return dsz;
 	}
 
-	*buf = buf_acquire(sz);
+	*buf = tipc_buf_acquire(sz);
 	if (!(*buf))
 		return -ENOMEM;
 	skb_copy_to_linear_data(*buf, hdr, hsz);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6ac3c543250b..7b907171f879 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -98,7 +98,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 
 static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 {
-	struct sk_buff *buf = buf_acquire(LONG_H_SIZE + size);
+	struct sk_buff *buf = tipc_buf_acquire(LONG_H_SIZE + size);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 823e9abb7ef5..b4d87eb2dc5d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -50,7 +50,8 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
 static void node_lost_contact(struct tipc_node *n_ptr);
 static void node_established_contact(struct tipc_node *n_ptr);
 
-struct tipc_node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
+/* sorted list of nodes within cluster */
+static struct tipc_node *tipc_nodes = NULL;
 
 static DEFINE_SPINLOCK(node_create_lock);
 
@@ -587,22 +588,6 @@ void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
 		node_lost_contact(n_ptr);
 }
 
-u32 tipc_available_nodes(const u32 domain)
-{
-	struct tipc_node *n_ptr;
-	u32 cnt = 0;
-
-	read_lock_bh(&tipc_net_lock);
-	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!tipc_in_scope(domain, n_ptr->addr))
-			continue;
-		if (tipc_node_is_up(n_ptr))
-			cnt++;
-	}
-	read_unlock_bh(&tipc_net_lock);
-	return cnt;
-}
-
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 domain;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 45f3db3a595d..fff331b2d26c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -96,7 +96,6 @@ struct tipc_node {
 	} bclink;
 };
 
-extern struct tipc_node *tipc_nodes;
 extern u32 tipc_own_tag;
 
 struct tipc_node *tipc_node_create(u32 addr);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5c4285b2d555..82092eaa1536 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -293,34 +293,6 @@ int tipc_deleteport(u32 ref)
 	return 0;
 }
 
-/**
- * tipc_get_port() - return port associated with 'ref'
- *
- * Note: Port is not locked.
- */
-
-struct tipc_port *tipc_get_port(const u32 ref)
-{
-	return (struct tipc_port *)tipc_ref_deref(ref);
-}
-
-/**
- * tipc_get_handle - return user handle associated to port 'ref'
- */
-
-void *tipc_get_handle(const u32 ref)
-{
-	struct port *p_ptr;
-	void * handle;
-
-	p_ptr = tipc_port_lock(ref);
-	if (!p_ptr)
-		return NULL;
-	handle = p_ptr->publ.usr_handle;
-	tipc_port_unlock(p_ptr);
-	return handle;
-}
-
 static int port_unreliable(struct port *p_ptr)
 {
 	return msg_src_droppable(&p_ptr->publ.phdr);
@@ -392,7 +364,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 	struct sk_buff *buf;
 	struct tipc_msg *msg;
 
-	buf = buf_acquire(LONG_H_SIZE);
+	buf = tipc_buf_acquire(LONG_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
 		tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
@@ -433,7 +405,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 		hdr_sz = MCAST_H_SIZE;
 	else
 		hdr_sz = LONG_H_SIZE;
-	rbuf = buf_acquire(data_sz + hdr_sz);
+	rbuf = tipc_buf_acquire(data_sz + hdr_sz);
 	if (rbuf == NULL) {
 		buf_discard(buf);
 		return data_sz;
@@ -1242,50 +1214,13 @@ int tipc_shutdown(u32 ref)
 	return tipc_disconnect(ref);
 }
 
-int tipc_isconnected(u32 ref, int *isconnected)
-{
-	struct port *p_ptr;
-
-	p_ptr = tipc_port_lock(ref);
-	if (!p_ptr)
-		return -EINVAL;
-	*isconnected = p_ptr->publ.connected;
-	tipc_port_unlock(p_ptr);
-	return 0;
-}
-
-int tipc_peer(u32 ref, struct tipc_portid *peer)
-{
-	struct port *p_ptr;
-	int res;
-
-	p_ptr = tipc_port_lock(ref);
-	if (!p_ptr)
-		return -EINVAL;
-	if (p_ptr->publ.connected) {
-		peer->ref = port_peerport(p_ptr);
-		peer->node = port_peernode(p_ptr);
-		res = 0;
-	} else
-		res = -ENOTCONN;
-	tipc_port_unlock(p_ptr);
-	return res;
-}
-
-int tipc_ref_valid(u32 ref)
-{
-	/* Works irrespective of type */
-	return !!tipc_ref_deref(ref);
-}
-
-
 /*
  *  tipc_port_recv_sections(): Concatenate and deliver sectioned
  *                        message for this node.
  */
 
-int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
-		       struct iovec const *msg_sect)
+static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
+				   struct iovec const *msg_sect)
 {
 	struct sk_buff *buf;
 	int res;
@@ -1335,66 +1270,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 	return -ELINKCONG;
 }
 
-/**
- * tipc_send_buf - send message buffer on connection
- */
-
-int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
-{
-	struct port *p_ptr;
-	struct tipc_msg *msg;
-	u32 destnode;
-	u32 hsz;
-	u32 sz;
-	u32 res;
-
-	p_ptr = tipc_port_deref(ref);
-	if (!p_ptr || !p_ptr->publ.connected)
-		return -EINVAL;
-
-	msg = &p_ptr->publ.phdr;
-	hsz = msg_hdr_sz(msg);
-	sz = hsz + dsz;
-	msg_set_size(msg, sz);
-	if (skb_cow(buf, hsz))
-		return -ENOMEM;
-
-	skb_push(buf, hsz);
-	skb_copy_to_linear_data(buf, msg, hsz);
-	destnode = msg_destnode(msg);
-	p_ptr->publ.congested = 1;
-	if (!tipc_port_congested(p_ptr)) {
-		if (likely(destnode != tipc_own_addr))
-			res = tipc_send_buf_fast(buf, destnode);
-		else {
-			tipc_port_recv_msg(buf);
-			res = sz;
-		}
-		if (likely(res != -ELINKCONG)) {
-			port_incr_out_seqno(p_ptr);
-			p_ptr->sent++;
-			p_ptr->publ.congested = 0;
-			return res;
-		}
-	}
-	if (port_unreliable(p_ptr)) {
-		p_ptr->publ.congested = 0;
-		return dsz;
-	}
-	return -ELINKCONG;
-}
-
 /**
  * tipc_forward2name - forward message sections to port name
  */
 
-int tipc_forward2name(u32 ref,
-		      struct tipc_name const *name,
-		      u32 domain,
-		      u32 num_sect,
-		      struct iovec const *msg_sect,
-		      struct tipc_portid const *orig,
-		      unsigned int importance)
+static int tipc_forward2name(u32 ref,
+			     struct tipc_name const *name,
+			     u32 domain,
+			     u32 num_sect,
+			     struct iovec const *msg_sect,
+			     struct tipc_portid const *orig,
+			     unsigned int importance)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -1456,90 +1342,16 @@ int tipc_send2name(u32 ref,
 				 TIPC_PORT_IMPORTANCE);
 }
 
-/**
- * tipc_forward_buf2name - forward message buffer to port name
- */
-
-int tipc_forward_buf2name(u32 ref,
-			  struct tipc_name const *name,
-			  u32 domain,
-			  struct sk_buff *buf,
-			  unsigned int dsz,
-			  struct tipc_portid const *orig,
-			  unsigned int importance)
-{
-	struct port *p_ptr;
-	struct tipc_msg *msg;
-	u32 destnode = domain;
-	u32 destport;
-	int res;
-
-	p_ptr = (struct port *)tipc_ref_deref(ref);
-	if (!p_ptr || p_ptr->publ.connected)
-		return -EINVAL;
-
-	msg = &p_ptr->publ.phdr;
-	if (importance <= TIPC_CRITICAL_IMPORTANCE)
-		msg_set_importance(msg, importance);
-	msg_set_type(msg, TIPC_NAMED_MSG);
-	msg_set_orignode(msg, orig->node);
-	msg_set_origport(msg, orig->ref);
-	msg_set_nametype(msg, name->type);
-	msg_set_nameinst(msg, name->instance);
-	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
-	msg_set_hdr_sz(msg, LONG_H_SIZE);
-	msg_set_size(msg, LONG_H_SIZE + dsz);
-	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
-	msg_set_destnode(msg, destnode);
-	msg_set_destport(msg, destport);
-	msg_dbg(msg, "forw2name ==> ");
-	if (skb_cow(buf, LONG_H_SIZE))
-		return -ENOMEM;
-	skb_push(buf, LONG_H_SIZE);
-	skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
-	msg_dbg(buf_msg(buf),"PREP:");
-	if (likely(destport)) {
-		p_ptr->sent++;
-		if (destnode == tipc_own_addr)
-			return tipc_port_recv_msg(buf);
-		res = tipc_send_buf_fast(buf, destnode);
-		if (likely(res != -ELINKCONG))
-			return res;
-		if (port_unreliable(p_ptr))
-			return dsz;
-		return -ELINKCONG;
-	}
-	return tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
-}
-
-/**
- * tipc_send_buf2name - send message buffer to port name
- */
-
-int tipc_send_buf2name(u32 ref,
-		       struct tipc_name const *dest,
-		       u32 domain,
-		       struct sk_buff *buf,
-		       unsigned int dsz)
-{
-	struct tipc_portid orig;
-
-	orig.ref = ref;
-	orig.node = tipc_own_addr;
-	return tipc_forward_buf2name(ref, dest, domain, buf, dsz, &orig,
-				     TIPC_PORT_IMPORTANCE);
-}
-
 /**
  * tipc_forward2port - forward message sections to port identity
  */
 
-int tipc_forward2port(u32 ref,
-		      struct tipc_portid const *dest,
-		      unsigned int num_sect,
-		      struct iovec const *msg_sect,
-		      struct tipc_portid const *orig,
-		      unsigned int importance)
+static int tipc_forward2port(u32 ref,
+			     struct tipc_portid const *dest,
+			     unsigned int num_sect,
+			     struct iovec const *msg_sect,
+			     struct tipc_portid const *orig,
+			     unsigned int importance)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -1591,12 +1403,12 @@ int tipc_send2port(u32 ref,
 /**
  * tipc_forward_buf2port - forward message buffer to port identity
  */
-int tipc_forward_buf2port(u32 ref,
-			  struct tipc_portid const *dest,
-			  struct sk_buff *buf,
-			  unsigned int dsz,
-			  struct tipc_portid const *orig,
-			  unsigned int importance)
+static int tipc_forward_buf2port(u32 ref,
+				 struct tipc_portid const *dest,
+				 struct sk_buff *buf,
+				 unsigned int dsz,
+				 struct tipc_portid const *orig,
+				 unsigned int importance)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index e74bd9563739..73bbf442b346 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -109,8 +109,6 @@ struct port {
 extern spinlock_t tipc_port_list_lock;
 struct port_list;
 
-int tipc_port_recv_sections(struct port *p_ptr, u32 num_sect,
-			    struct iovec const *msg_sect);
 int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 			      struct iovec const *msg_sect, u32 num_sect,
 			      int err);
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 8dea66500cf5..ab8ad32d8c20 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -282,23 +282,6 @@ void *tipc_ref_lock(u32 ref)
 	return NULL;
 }
 
-/**
- * tipc_ref_unlock - unlock referenced object
- */
-
-void tipc_ref_unlock(u32 ref)
-{
-	if (likely(tipc_ref_table.entries)) {
-		struct reference *entry;
-
-		entry = &tipc_ref_table.entries[ref &
-						tipc_ref_table.index_mask];
-		if (likely((entry->ref == ref) && (entry->object)))
-			spin_unlock_bh(&entry->lock);
-		else
-			err("Attempt to unlock non-existent reference\n");
-	}
-}
 
 /**
  * tipc_ref_deref - return pointer referenced object (without locking it)
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 7e3798ea93b9..5bc8e7ab84de 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -44,7 +44,6 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock);
 void tipc_ref_discard(u32 ref);
 
 void *tipc_ref_lock(u32 ref);
-void tipc_ref_unlock(u32 ref);
 void *tipc_ref_deref(u32 ref);
 
 #endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 1a5b9a6bd128..18813acc6bef 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -598,12 +598,3 @@ void tipc_subscr_stop(void)
 		topsrv.user_ref = 0;
 	}
 }
-
-
-int tipc_ispublished(struct tipc_name const *name)
-{
-	u32 domain = 0;
-
-	return tipc_nametbl_translate(name->type, name->instance, &domain) != 0;
-}
-
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 2c01ba2d86bf..83f8b5e91fc8 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -160,14 +160,3 @@ u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
 	}
 	return 0;
 }
-
-
-u32 tipc_zone_next_node(u32 addr)
-{
-	struct cluster *c_ptr = tipc_cltr_find(addr);
-
-	if (c_ptr)
-		return tipc_cltr_next_node(c_ptr, addr);
-	return 0;
-}
-
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
index 7bdc3406ba9b..bd1c20ce9d06 100644
--- a/net/tipc/zone.h
+++ b/net/tipc/zone.h
@@ -61,7 +61,6 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
 struct _zone *tipc_zone_create(u32 addr);
 void tipc_zone_delete(struct _zone *z_ptr);
 void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
-u32 tipc_zone_next_node(u32 addr);
 
 static inline struct _zone *tipc_zone_find(u32 addr)
 {
-- 
cgit v1.2.3


From 074037ec79bea73edf1b1ec72fef1010e83e3cc5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 22 Sep 2010 22:09:10 +0200
Subject: PM / Wakeup: Introduce wakeup source objects and event statistics
 (v3)

Introduce struct wakeup_source for representing system wakeup sources
within the kernel and for collecting statistics related to them.
Make the recently introduced helper functions pm_wakeup_event(),
pm_stay_awake() and pm_relax() use struct wakeup_source objects
internally, so that wakeup statistics associated with wakeup devices
can be collected and reported in a consistent way (the definition of
pm_relax() is changed, which is harmless, because this function is
not called directly by anyone yet).  Introduce new wakeup-related
sysfs device attributes in /sys/devices/.../power for reporting the
device wakeup statistics.

Change the global wakeup events counters event_count and
events_in_progress into atomic variables, so that it is not necessary
to acquire a global spinlock in pm_wakeup_event(), pm_stay_awake()
and pm_relax(), which should allow us to avoid lock contention in
these functions on SMP systems with many wakeup devices.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/ABI/testing/sysfs-devices-power |  70 ++++
 drivers/base/power/main.c                     |   4 +-
 drivers/base/power/power.h                    |   1 +
 drivers/base/power/runtime.c                  |   2 -
 drivers/base/power/sysfs.c                    | 121 +++++-
 drivers/base/power/wakeup.c                   | 528 ++++++++++++++++++++++----
 include/linux/pm.h                            |  16 +-
 include/linux/pm_wakeup.h                     | 127 +++++--
 include/linux/suspend.h                       |   4 +-
 kernel/power/main.c                           |   8 +-
 10 files changed, 756 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 6123c523bfd7..6bb2dd3c3a71 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -77,3 +77,73 @@ Description:
 		devices this attribute is set to "enabled" by bus type code or
 		device drivers and in that cases it should be safe to leave the
 		default value.
+
+What:		/sys/devices/.../power/wakeup_count
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_count attribute contains the number
+		of signaled wakeup events associated with the device.  This
+		attribute is read-only.  If the device is not enabled to wake up
+		the system from sleep states, this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_active_count
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_active_count attribute contains the
+		number of times the processing of wakeup events associated with
+		the device was completed (at the kernel level).  This attribute
+		is read-only.  If the device is not enabled to wake up the
+		system from sleep states, this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_hit_count
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_hit_count attribute contains the
+		number of times the processing of a wakeup event associated with
+		the device might prevent the system from entering a sleep state.
+		This attribute is read-only.  If the device is not enabled to
+		wake up the system from sleep states, this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_active
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_active attribute contains either 1,
+		or 0, depending on whether or not a wakeup event associated with
+		the device is being processed (1).  This attribute is read-only.
+		If the device is not enabled to wake up the system from sleep
+		states, this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_total_time_ms
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_total_time_ms attribute contains
+		the total time of processing wakeup events associated with the
+		device, in milliseconds.  This attribute is read-only.  If the
+		device is not enabled to wake up the system from sleep states,
+		this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_max_time_ms
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_max_time_ms attribute contains
+		the maximum time of processing a single wakeup event associated
+		with the device, in milliseconds.  This attribute is read-only.
+		If the device is not enabled to wake up the system from sleep
+		states, this attribute is empty.
+
+What:		/sys/devices/.../power/wakeup_last_time_ms
+Date:		September 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../wakeup_last_time_ms attribute contains
+		the value of the monotonic clock corresponding to the time of
+		signaling the last wakeup event associated with the device, in
+		milliseconds.  This attribute is read-only.  If the device is
+		not enabled to wake up the system from sleep states, this
+		attribute is empty.
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index db677199403c..7ae6fe414c38 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -60,7 +60,8 @@ void device_pm_init(struct device *dev)
 	dev->power.status = DPM_ON;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
-	dev->power.wakeup_count = 0;
+	dev->power.wakeup = NULL;
+	spin_lock_init(&dev->power.lock);
 	pm_runtime_init(dev);
 }
 
@@ -120,6 +121,7 @@ void device_pm_remove(struct device *dev)
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	device_wakeup_disable(dev);
 	pm_runtime_remove(dev);
 }
 
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index c0bd03c83b9c..8b2745c69e2a 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -34,6 +34,7 @@ extern void device_pm_move_last(struct device *);
 
 static inline void device_pm_init(struct device *dev)
 {
+	spin_lock_init(&dev->power.lock);
 	pm_runtime_init(dev);
 }
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b78c401ffa73..e9520ad2d716 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1099,8 +1099,6 @@ EXPORT_SYMBOL_GPL(pm_runtime_allow);
  */
 void pm_runtime_init(struct device *dev)
 {
-	spin_lock_init(&dev->power.lock);
-
 	dev->power.runtime_status = RPM_SUSPENDED;
 	dev->power.idle_notification = false;
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index e56b4388fe61..8859780817e1 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -210,11 +210,122 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 static ssize_t wakeup_count_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu\n", dev->power.wakeup_count);
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->event_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
 }
 
 static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
-#endif
+
+static ssize_t wakeup_active_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->active_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
+
+static ssize_t wakeup_hit_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->hit_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
+
+static ssize_t wakeup_active_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned int active = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		active = dev->power.wakeup->active;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL);
+
+static ssize_t wakeup_total_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->total_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL);
+
+static ssize_t wakeup_max_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->max_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL);
+
+static ssize_t wakeup_last_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->last_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 #ifdef CONFIG_PM_RUNTIME
@@ -288,6 +399,12 @@ static struct attribute * power_attrs[] = {
 	&dev_attr_wakeup.attr,
 #ifdef CONFIG_PM_SLEEP
 	&dev_attr_wakeup_count.attr,
+	&dev_attr_wakeup_active_count.attr,
+	&dev_attr_wakeup_hit_count.attr,
+	&dev_attr_wakeup_active.attr,
+	&dev_attr_wakeup_total_time_ms.attr,
+	&dev_attr_wakeup_max_time_ms.attr,
+	&dev_attr_wakeup_last_time_ms.attr,
 #endif
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_async.attr,
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index eb594facfc3f..03751a01dbad 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -11,7 +11,10 @@
 #include <linux/sched.h>
 #include <linux/capability.h>
 #include <linux/suspend.h>
-#include <linux/pm.h>
+
+#include "power.h"
+
+#define TIMEOUT		100
 
 /*
  * If set, the suspend/hibernate code will abort transitions to a sleep state
@@ -20,18 +23,244 @@
 bool events_check_enabled;
 
 /* The counter of registered wakeup events. */
-static unsigned long event_count;
+static atomic_t event_count = ATOMIC_INIT(0);
 /* A preserved old value of event_count. */
-static unsigned long saved_event_count;
+static unsigned int saved_count;
 /* The counter of wakeup events being processed. */
-static unsigned long events_in_progress;
+static atomic_t events_in_progress = ATOMIC_INIT(0);
 
 static DEFINE_SPINLOCK(events_lock);
 
 static void pm_wakeup_timer_fn(unsigned long data);
 
-static DEFINE_TIMER(events_timer, pm_wakeup_timer_fn, 0, 0);
-static unsigned long events_timer_expires;
+static LIST_HEAD(wakeup_sources);
+
+/**
+ * wakeup_source_create - Create a struct wakeup_source object.
+ * @name: Name of the new wakeup source.
+ */
+struct wakeup_source *wakeup_source_create(const char *name)
+{
+	struct wakeup_source *ws;
+
+	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+	if (!ws)
+		return NULL;
+
+	spin_lock_init(&ws->lock);
+	if (name)
+		ws->name = kstrdup(name, GFP_KERNEL);
+
+	return ws;
+}
+EXPORT_SYMBOL_GPL(wakeup_source_create);
+
+/**
+ * wakeup_source_destroy - Destroy a struct wakeup_source object.
+ * @ws: Wakeup source to destroy.
+ */
+void wakeup_source_destroy(struct wakeup_source *ws)
+{
+	if (!ws)
+		return;
+
+	spin_lock_irq(&ws->lock);
+	while (ws->active) {
+		spin_unlock_irq(&ws->lock);
+
+		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
+
+		spin_lock_irq(&ws->lock);
+	}
+	spin_unlock_irq(&ws->lock);
+
+	kfree(ws->name);
+	kfree(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_destroy);
+
+/**
+ * wakeup_source_add - Add given object to the list of wakeup sources.
+ * @ws: Wakeup source object to add to the list.
+ */
+void wakeup_source_add(struct wakeup_source *ws)
+{
+	if (WARN_ON(!ws))
+		return;
+
+	setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
+	ws->active = false;
+
+	spin_lock_irq(&events_lock);
+	list_add_rcu(&ws->entry, &wakeup_sources);
+	spin_unlock_irq(&events_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(wakeup_source_add);
+
+/**
+ * wakeup_source_remove - Remove given object from the wakeup sources list.
+ * @ws: Wakeup source object to remove from the list.
+ */
+void wakeup_source_remove(struct wakeup_source *ws)
+{
+	if (WARN_ON(!ws))
+		return;
+
+	spin_lock_irq(&events_lock);
+	list_del_rcu(&ws->entry);
+	spin_unlock_irq(&events_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(wakeup_source_remove);
+
+/**
+ * wakeup_source_register - Create wakeup source and add it to the list.
+ * @name: Name of the wakeup source to register.
+ */
+struct wakeup_source *wakeup_source_register(const char *name)
+{
+	struct wakeup_source *ws;
+
+	ws = wakeup_source_create(name);
+	if (ws)
+		wakeup_source_add(ws);
+
+	return ws;
+}
+EXPORT_SYMBOL_GPL(wakeup_source_register);
+
+/**
+ * wakeup_source_unregister - Remove wakeup source from the list and remove it.
+ * @ws: Wakeup source object to unregister.
+ */
+void wakeup_source_unregister(struct wakeup_source *ws)
+{
+	wakeup_source_remove(ws);
+	wakeup_source_destroy(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_unregister);
+
+/**
+ * device_wakeup_attach - Attach a wakeup source object to a device object.
+ * @dev: Device to handle.
+ * @ws: Wakeup source object to attach to @dev.
+ *
+ * This causes @dev to be treated as a wakeup device.
+ */
+static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
+{
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		spin_unlock_irq(&dev->power.lock);
+		return -EEXIST;
+	}
+	dev->power.wakeup = ws;
+	spin_unlock_irq(&dev->power.lock);
+	return 0;
+}
+
+/**
+ * device_wakeup_enable - Enable given device to be a wakeup source.
+ * @dev: Device to handle.
+ *
+ * Create a wakeup source object, register it and attach it to @dev.
+ */
+int device_wakeup_enable(struct device *dev)
+{
+	struct wakeup_source *ws;
+	int ret;
+
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	ws = wakeup_source_register(dev_name(dev));
+	if (!ws)
+		return -ENOMEM;
+
+	ret = device_wakeup_attach(dev, ws);
+	if (ret)
+		wakeup_source_unregister(ws);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(device_wakeup_enable);
+
+/**
+ * device_wakeup_detach - Detach a device's wakeup source object from it.
+ * @dev: Device to detach the wakeup source object from.
+ *
+ * After it returns, @dev will not be treated as a wakeup device any more.
+ */
+static struct wakeup_source *device_wakeup_detach(struct device *dev)
+{
+	struct wakeup_source *ws;
+
+	spin_lock_irq(&dev->power.lock);
+	ws = dev->power.wakeup;
+	dev->power.wakeup = NULL;
+	spin_unlock_irq(&dev->power.lock);
+	return ws;
+}
+
+/**
+ * device_wakeup_disable - Do not regard a device as a wakeup source any more.
+ * @dev: Device to handle.
+ *
+ * Detach the @dev's wakeup source object from it, unregister this wakeup source
+ * object and destroy it.
+ */
+int device_wakeup_disable(struct device *dev)
+{
+	struct wakeup_source *ws;
+
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	ws = device_wakeup_detach(dev);
+	if (ws)
+		wakeup_source_unregister(ws);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(device_wakeup_disable);
+
+/**
+ * device_init_wakeup - Device wakeup initialization.
+ * @dev: Device to handle.
+ * @enable: Whether or not to enable @dev as a wakeup device.
+ *
+ * By default, most devices should leave wakeup disabled.  The exceptions are
+ * devices that everyone expects to be wakeup sources: keyboards, power buttons,
+ * possibly network interfaces, etc.
+ */
+int device_init_wakeup(struct device *dev, bool enable)
+{
+	int ret = 0;
+
+	if (enable) {
+		device_set_wakeup_capable(dev, true);
+		ret = device_wakeup_enable(dev);
+	} else {
+		device_set_wakeup_capable(dev, false);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(device_init_wakeup);
+
+/**
+ * device_set_wakeup_enable - Enable or disable a device to wake up the system.
+ * @dev: Device to handle.
+ */
+int device_set_wakeup_enable(struct device *dev, bool enable)
+{
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
+}
+EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
 
 /*
  * The functions below use the observation that each wakeup event starts a
@@ -55,118 +284,259 @@ static unsigned long events_timer_expires;
  * knowledge, however, may not be available to it, so it can simply specify time
  * to wait before the system can be suspended and pass it as the second
  * argument of pm_wakeup_event().
+ *
+ * It is valid to call pm_relax() after pm_wakeup_event(), in which case the
+ * "no suspend" period will be ended either by the pm_relax(), or by the timer
+ * function executed when the timer expires, whichever comes first.
  */
 
+/**
+ * wakup_source_activate - Mark given wakeup source as active.
+ * @ws: Wakeup source to handle.
+ *
+ * Update the @ws' statistics and, if @ws has just been activated, notify the PM
+ * core of the event by incrementing the counter of of wakeup events being
+ * processed.
+ */
+static void wakeup_source_activate(struct wakeup_source *ws)
+{
+	ws->active = true;
+	ws->active_count++;
+	ws->timer_expires = jiffies;
+	ws->last_time = ktime_get();
+
+	atomic_inc(&events_in_progress);
+}
+
+/**
+ * __pm_stay_awake - Notify the PM core of a wakeup event.
+ * @ws: Wakeup source object associated with the source of the event.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void __pm_stay_awake(struct wakeup_source *ws)
+{
+	unsigned long flags;
+
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+	ws->event_count++;
+	if (!ws->active)
+		wakeup_source_activate(ws);
+	spin_unlock_irqrestore(&ws->lock, flags);
+}
+EXPORT_SYMBOL_GPL(__pm_stay_awake);
+
 /**
  * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
  * @dev: Device the wakeup event is related to.
  *
- * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the
- * counter of wakeup events being processed.  If @dev is not NULL, the counter
- * of wakeup events related to @dev is incremented too.
+ * Notify the PM core of a wakeup event (signaled by @dev) by calling
+ * __pm_stay_awake for the @dev's wakeup source object.
  *
  * Call this function after detecting of a wakeup event if pm_relax() is going
  * to be called directly after processing the event (and possibly passing it to
  * user space for further processing).
- *
- * It is safe to call this function from interrupt context.
  */
 void pm_stay_awake(struct device *dev)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (dev)
-		dev->power.wakeup_count++;
+	if (!dev)
+		return;
 
-	events_in_progress++;
-	spin_unlock_irqrestore(&events_lock, flags);
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_stay_awake(dev->power.wakeup);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
+EXPORT_SYMBOL_GPL(pm_stay_awake);
 
 /**
- * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * wakup_source_deactivate - Mark given wakeup source as inactive.
+ * @ws: Wakeup source to handle.
  *
- * Notify the PM core that a wakeup event has been processed by decrementing
- * the counter of wakeup events being processed and incrementing the counter
- * of registered wakeup events.
+ * Update the @ws' statistics and notify the PM core that the wakeup source has
+ * become inactive by decrementing the counter of wakeup events being processed
+ * and incrementing the counter of registered wakeup events.
+ */
+static void wakeup_source_deactivate(struct wakeup_source *ws)
+{
+	ktime_t duration;
+	ktime_t now;
+
+	ws->relax_count++;
+	/*
+	 * __pm_relax() may be called directly or from a timer function.
+	 * If it is called directly right after the timer function has been
+	 * started, but before the timer function calls __pm_relax(), it is
+	 * possible that __pm_stay_awake() will be called in the meantime and
+	 * will set ws->active.  Then, ws->active may be cleared immediately
+	 * by the __pm_relax() called from the timer function, but in such a
+	 * case ws->relax_count will be different from ws->active_count.
+	 */
+	if (ws->relax_count != ws->active_count) {
+		ws->relax_count--;
+		return;
+	}
+
+	ws->active = false;
+
+	now = ktime_get();
+	duration = ktime_sub(now, ws->last_time);
+	ws->total_time = ktime_add(ws->total_time, duration);
+	if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
+		ws->max_time = duration;
+
+	del_timer(&ws->timer);
+
+	/*
+	 * event_count has to be incremented before events_in_progress is
+	 * modified, so that the callers of pm_check_wakeup_events() and
+	 * pm_save_wakeup_count() don't see the old value of event_count and
+	 * events_in_progress equal to zero at the same time.
+	 */
+	atomic_inc(&event_count);
+	smp_mb__before_atomic_dec();
+	atomic_dec(&events_in_progress);
+}
+
+/**
+ * __pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * @ws: Wakeup source object associated with the source of the event.
  *
  * Call this function for wakeup events whose processing started with calling
- * pm_stay_awake().
+ * __pm_stay_awake().
  *
  * It is safe to call it from interrupt context.
  */
-void pm_relax(void)
+void __pm_relax(struct wakeup_source *ws)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (events_in_progress) {
-		events_in_progress--;
-		event_count++;
-	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+	if (ws->active)
+		wakeup_source_deactivate(ws);
+	spin_unlock_irqrestore(&ws->lock, flags);
+}
+EXPORT_SYMBOL_GPL(__pm_relax);
+
+/**
+ * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * @dev: Device that signaled the event.
+ *
+ * Execute __pm_relax() for the @dev's wakeup source object.
+ */
+void pm_relax(struct device *dev)
+{
+	unsigned long flags;
+
+	if (!dev)
+		return;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_relax(dev->power.wakeup);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
+EXPORT_SYMBOL_GPL(pm_relax);
 
 /**
  * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
+ * @data: Address of the wakeup source object associated with the event source.
  *
- * Decrease the counter of wakeup events being processed after it was increased
- * by pm_wakeup_event().
+ * Call __pm_relax() for the wakeup source whose address is stored in @data.
  */
 static void pm_wakeup_timer_fn(unsigned long data)
+{
+	__pm_relax((struct wakeup_source *)data);
+}
+
+/**
+ * __pm_wakeup_event - Notify the PM core of a wakeup event.
+ * @ws: Wakeup source object associated with the event source.
+ * @msec: Anticipated event processing time (in milliseconds).
+ *
+ * Notify the PM core of a wakeup event whose source is @ws that will take
+ * approximately @msec milliseconds to be processed by the kernel.  If @ws is
+ * not active, activate it.  If @msec is nonzero, set up the @ws' timer to
+ * execute pm_wakeup_timer_fn() in future.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 {
 	unsigned long flags;
+	unsigned long expires;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (events_timer_expires
-	    && time_before_eq(events_timer_expires, jiffies)) {
-		events_in_progress--;
-		events_timer_expires = 0;
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	ws->event_count++;
+	if (!ws->active)
+		wakeup_source_activate(ws);
+
+	if (!msec) {
+		wakeup_source_deactivate(ws);
+		goto unlock;
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+
+	expires = jiffies + msecs_to_jiffies(msec);
+	if (!expires)
+		expires = 1;
+
+	if (time_after(expires, ws->timer_expires)) {
+		mod_timer(&ws->timer, expires);
+		ws->timer_expires = expires;
+	}
+
+ unlock:
+	spin_unlock_irqrestore(&ws->lock, flags);
 }
+EXPORT_SYMBOL_GPL(__pm_wakeup_event);
+
 
 /**
  * pm_wakeup_event - Notify the PM core of a wakeup event.
  * @dev: Device the wakeup event is related to.
  * @msec: Anticipated event processing time (in milliseconds).
  *
- * Notify the PM core of a wakeup event (signaled by @dev) that will take
- * approximately @msec milliseconds to be processed by the kernel.  Increment
- * the counter of registered wakeup events and (if @msec is nonzero) set up
- * the wakeup events timer to execute pm_wakeup_timer_fn() in future (if the
- * timer has not been set up already, increment the counter of wakeup events
- * being processed).  If @dev is not NULL, the counter of wakeup events related
- * to @dev is incremented too.
- *
- * It is safe to call this function from interrupt context.
+ * Call __pm_wakeup_event() for the @dev's wakeup source object.
  */
 void pm_wakeup_event(struct device *dev, unsigned int msec)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	event_count++;
-	if (dev)
-		dev->power.wakeup_count++;
-
-	if (msec) {
-		unsigned long expires;
+	if (!dev)
+		return;
 
-		expires = jiffies + msecs_to_jiffies(msec);
-		if (!expires)
-			expires = 1;
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_wakeup_event(dev->power.wakeup, msec);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
-		if (!events_timer_expires
-		    || time_after(expires, events_timer_expires)) {
-			if (!events_timer_expires)
-				events_in_progress++;
+/**
+ * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
+ */
+static void pm_wakeup_update_hit_counts(void)
+{
+	unsigned long flags;
+	struct wakeup_source *ws;
 
-			mod_timer(&events_timer, expires);
-			events_timer_expires = expires;
-		}
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		spin_lock_irqsave(&ws->lock, flags);
+		if (ws->active)
+			ws->hit_count++;
+		spin_unlock_irqrestore(&ws->lock, flags);
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	rcu_read_unlock();
 }
 
 /**
@@ -184,10 +554,13 @@ bool pm_check_wakeup_events(void)
 
 	spin_lock_irqsave(&events_lock, flags);
 	if (events_check_enabled) {
-		ret = (event_count == saved_event_count) && !events_in_progress;
+		ret = ((unsigned int)atomic_read(&event_count) == saved_count)
+			&& !atomic_read(&events_in_progress);
 		events_check_enabled = ret;
 	}
 	spin_unlock_irqrestore(&events_lock, flags);
+	if (!ret)
+		pm_wakeup_update_hit_counts();
 	return ret;
 }
 
@@ -202,24 +575,20 @@ bool pm_check_wakeup_events(void)
  * drop down to zero has been interrupted by a signal (and the current number
  * of wakeup events being processed is still nonzero).  Otherwise return true.
  */
-bool pm_get_wakeup_count(unsigned long *count)
+bool pm_get_wakeup_count(unsigned int *count)
 {
 	bool ret;
 
-	spin_lock_irq(&events_lock);
 	if (capable(CAP_SYS_ADMIN))
 		events_check_enabled = false;
 
-	while (events_in_progress && !signal_pending(current)) {
-		spin_unlock_irq(&events_lock);
-
-		schedule_timeout_interruptible(msecs_to_jiffies(100));
-
-		spin_lock_irq(&events_lock);
+	while (atomic_read(&events_in_progress) && !signal_pending(current)) {
+		pm_wakeup_update_hit_counts();
+		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
 	}
-	*count = event_count;
-	ret = !events_in_progress;
-	spin_unlock_irq(&events_lock);
+
+	ret = !atomic_read(&events_in_progress);
+	*count = atomic_read(&event_count);
 	return ret;
 }
 
@@ -232,16 +601,19 @@ bool pm_get_wakeup_count(unsigned long *count)
  * old number of registered wakeup events to be used by pm_check_wakeup_events()
  * and return true.  Otherwise return false.
  */
-bool pm_save_wakeup_count(unsigned long count)
+bool pm_save_wakeup_count(unsigned int count)
 {
 	bool ret = false;
 
 	spin_lock_irq(&events_lock);
-	if (count == event_count && !events_in_progress) {
-		saved_event_count = count;
+	if (count == (unsigned int)atomic_read(&event_count)
+	    && !atomic_read(&events_in_progress)) {
+		saved_count = count;
 		events_check_enabled = true;
 		ret = true;
 	}
 	spin_unlock_irq(&events_lock);
+	if (!ret)
+		pm_wakeup_update_hit_counts();
 	return ret;
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 52e8c55ff314..a84118911ced 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -448,23 +448,24 @@ enum rpm_request {
 	RPM_REQ_RESUME,
 };
 
+struct wakeup_source;
+
 struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
-	unsigned int		should_wakeup:1;
 	unsigned		async_suspend:1;
 	enum dpm_state		status;		/* Owned by the PM core */
+	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
 	struct completion	completion;
-	unsigned long		wakeup_count;
+	struct wakeup_source	*wakeup;
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	struct timer_list	suspend_timer;
 	unsigned long		timer_expires;
 	struct work_struct	work;
 	wait_queue_head_t	wait_queue;
-	spinlock_t		lock;
 	atomic_t		usage_count;
 	atomic_t		child_count;
 	unsigned int		disable_depth:3;
@@ -559,11 +560,6 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
-
-/* drivers/base/power/wakeup.c */
-extern void pm_wakeup_event(struct device *dev, unsigned int msec);
-extern void pm_stay_awake(struct device *dev);
-extern void pm_relax(void);
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -577,10 +573,6 @@ static inline int dpm_suspend_start(pm_message_t state)
 #define suspend_report_result(fn, ret)		do {} while (0)
 
 static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
-
-static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
-static inline void pm_stay_awake(struct device *dev) {}
-static inline void pm_relax(void) {}
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 76aca48722ae..9cff00dd6b63 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -2,6 +2,7 @@
  *  pm_wakeup.h - Power management wakeup interface
  *
  *  Copyright (C) 2008 Alan Stern
+ *  Copyright (C) 2010 Rafael J. Wysocki, Novell Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -27,19 +28,77 @@
 
 #include <linux/types.h>
 
-#ifdef CONFIG_PM
-
-/* Changes to device_may_wakeup take effect on the next pm state change.
+/**
+ * struct wakeup_source - Representation of wakeup sources
  *
- * By default, most devices should leave wakeup disabled.  The exceptions
- * are devices that everyone expects to be wakeup sources: keyboards,
- * power buttons, possibly network interfaces, etc.
+ * @total_time: Total time this wakeup source has been active.
+ * @max_time: Maximum time this wakeup source has been continuously active.
+ * @last_time: Monotonic clock when the wakeup source's was activated last time.
+ * @event_count: Number of signaled wakeup events.
+ * @active_count: Number of times the wakeup sorce was activated.
+ * @relax_count: Number of times the wakeup sorce was deactivated.
+ * @hit_count: Number of times the wakeup sorce might abort system suspend.
+ * @active: Status of the wakeup source.
  */
-static inline void device_init_wakeup(struct device *dev, bool val)
+struct wakeup_source {
+	char 			*name;
+	struct list_head	entry;
+	spinlock_t		lock;
+	struct timer_list	timer;
+	unsigned long		timer_expires;
+	ktime_t total_time;
+	ktime_t max_time;
+	ktime_t last_time;
+	unsigned long		event_count;
+	unsigned long		active_count;
+	unsigned long		relax_count;
+	unsigned long		hit_count;
+	unsigned int		active:1;
+};
+
+#ifdef CONFIG_PM_SLEEP
+
+/*
+ * Changes to device_may_wakeup take effect on the next pm state change.
+ */
+
+static inline void device_set_wakeup_capable(struct device *dev, bool capable)
+{
+	dev->power.can_wakeup = capable;
+}
+
+static inline bool device_can_wakeup(struct device *dev)
+{
+	return dev->power.can_wakeup;
+}
+
+
+
+static inline bool device_may_wakeup(struct device *dev)
 {
-	dev->power.can_wakeup = dev->power.should_wakeup = val;
+	return dev->power.can_wakeup && !!dev->power.wakeup;
 }
 
+/* drivers/base/power/wakeup.c */
+extern struct wakeup_source *wakeup_source_create(const char *name);
+extern void wakeup_source_destroy(struct wakeup_source *ws);
+extern void wakeup_source_add(struct wakeup_source *ws);
+extern void wakeup_source_remove(struct wakeup_source *ws);
+extern struct wakeup_source *wakeup_source_register(const char *name);
+extern void wakeup_source_unregister(struct wakeup_source *ws);
+extern int device_wakeup_enable(struct device *dev);
+extern int device_wakeup_disable(struct device *dev);
+extern int device_init_wakeup(struct device *dev, bool val);
+extern int device_set_wakeup_enable(struct device *dev, bool enable);
+extern void __pm_stay_awake(struct wakeup_source *ws);
+extern void pm_stay_awake(struct device *dev);
+extern void __pm_relax(struct wakeup_source *ws);
+extern void pm_relax(struct device *dev);
+extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec);
+extern void pm_wakeup_event(struct device *dev, unsigned int msec);
+
+#else /* !CONFIG_PM_SLEEP */
+
 static inline void device_set_wakeup_capable(struct device *dev, bool capable)
 {
 	dev->power.can_wakeup = capable;
@@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev)
 	return dev->power.can_wakeup;
 }
 
-static inline void device_set_wakeup_enable(struct device *dev, bool enable)
+static inline bool device_may_wakeup(struct device *dev)
 {
-	dev->power.should_wakeup = enable;
+	return false;
 }
 
-static inline bool device_may_wakeup(struct device *dev)
+static inline struct wakeup_source *wakeup_source_create(const char *name)
 {
-	return dev->power.can_wakeup && dev->power.should_wakeup;
+	return NULL;
 }
 
-#else /* !CONFIG_PM */
+static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
+
+static inline void wakeup_source_add(struct wakeup_source *ws) {}
 
-/* For some reason the following routines work even without CONFIG_PM */
-static inline void device_init_wakeup(struct device *dev, bool val)
+static inline void wakeup_source_remove(struct wakeup_source *ws) {}
+
+static inline struct wakeup_source *wakeup_source_register(const char *name)
 {
-	dev->power.can_wakeup = val;
+	return NULL;
 }
 
-static inline void device_set_wakeup_capable(struct device *dev, bool capable)
+static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
+
+static inline int device_wakeup_enable(struct device *dev)
 {
-	dev->power.can_wakeup = capable;
+	return -EINVAL;
 }
 
-static inline bool device_can_wakeup(struct device *dev)
+static inline int device_wakeup_disable(struct device *dev)
 {
-	return dev->power.can_wakeup;
+	return 0;
 }
 
-static inline void device_set_wakeup_enable(struct device *dev, bool enable)
+static inline int device_init_wakeup(struct device *dev, bool val)
 {
+	dev->power.can_wakeup = val;
+	return val ? -EINVAL : 0;
 }
 
-static inline bool device_may_wakeup(struct device *dev)
+
+static inline int device_set_wakeup_enable(struct device *dev, bool enable)
 {
-	return false;
+	return -EINVAL;
 }
 
-#endif /* !CONFIG_PM */
+static inline void __pm_stay_awake(struct wakeup_source *ws) {}
+
+static inline void pm_stay_awake(struct device *dev) {}
+
+static inline void __pm_relax(struct wakeup_source *ws) {}
+
+static inline void pm_relax(struct device *dev) {}
+
+static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {}
+
+static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
+
+#endif /* !CONFIG_PM_SLEEP */
 
 #endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 4af270ec2204..6b1712c51102 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 extern bool events_check_enabled;
 
 extern bool pm_check_wakeup_events(void);
-extern bool pm_get_wakeup_count(unsigned long *count);
-extern bool pm_save_wakeup_count(unsigned long count);
+extern bool pm_get_wakeup_count(unsigned int *count);
+extern bool pm_save_wakeup_count(unsigned int count);
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f06ad6eff37a..6b12a0cf4d9f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -237,18 +237,18 @@ static ssize_t wakeup_count_show(struct kobject *kobj,
 				struct kobj_attribute *attr,
 				char *buf)
 {
-	unsigned long val;
+	unsigned int val;
 
-	return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
+	return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR;
 }
 
 static ssize_t wakeup_count_store(struct kobject *kobj,
 				struct kobj_attribute *attr,
 				const char *buf, size_t n)
 {
-	unsigned long val;
+	unsigned int val;
 
-	if (sscanf(buf, "%lu", &val) == 1) {
+	if (sscanf(buf, "%u", &val) == 1) {
 		if (pm_save_wakeup_count(val))
 			return n;
 	}
-- 
cgit v1.2.3


From 098dff738abbeaea15fc95c4f4fdaee1e9bbea75 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 22 Sep 2010 22:10:57 +0200
Subject: PM: Fix potential issue with failing asynchronous suspend

There is a potential issue with the asynchronous suspend code that
a device driver suspending asynchronously may not notice that it
should back off.  There are two failing scenarions, (1) when the
driver is waiting for a driver suspending synchronously to complete
and that second driver returns error code, in which case async_error
won't be set and the waiting driver will continue suspending and (2)
after the driver has called device_pm_wait_for_dev() and the waited
for driver returns error code, in which case the caller of
device_pm_wait_for_dev() will not know that there was an error and
will continue suspending.

To fix this issue make __device_suspend() set async_error, so
async_suspend() doesn't need to set it any more, and make
device_pm_wait_for_dev() return async_error, so that its callers
can check whether or not they should continue suspending.

No more changes are necessary, since device_pm_wait_for_dev() is
not used by any drivers' suspend routines.

Reported-by: Colin Cross <ccross@android.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/power/main.c | 15 +++++++++------
 include/linux/pm.h        |  7 +++++--
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 7ae6fe414c38..31b526661ec4 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -51,6 +51,8 @@ static pm_message_t pm_transition;
  */
 static bool transition_started;
 
+static int async_error;
+
 /**
  * device_pm_init - Initialize the PM-related part of a device object.
  * @dev: Device object being initialized.
@@ -602,6 +604,7 @@ static void dpm_resume(pm_message_t state)
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
+	async_error = 0;
 
 	list_for_each_entry(dev, &dpm_list, power.entry) {
 		if (dev->power.status < DPM_OFF)
@@ -831,8 +834,6 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	return error;
 }
 
-static int async_error;
-
 /**
  * device_suspend - Execute "suspend" callbacks for given device.
  * @dev: Device to handle.
@@ -887,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
+	if (error)
+		async_error = error;
+
 	return error;
 }
 
@@ -896,10 +900,8 @@ static void async_suspend(void *data, async_cookie_t cookie)
 	int error;
 
 	error = __device_suspend(dev, pm_transition, true);
-	if (error) {
+	if (error)
 		pm_dev_err(dev, pm_transition, " async", error);
-		async_error = error;
-	}
 
 	put_device(dev);
 }
@@ -1087,8 +1089,9 @@ EXPORT_SYMBOL_GPL(__suspend_report_result);
  * @dev: Device to wait for.
  * @subordinate: Device that needs to wait for @dev.
  */
-void device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
+int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
 {
 	dpm_wait(dev, subordinate->power.async_suspend);
+	return async_error;
 }
 EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index a84118911ced..1abfe84f447d 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -559,7 +559,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 		__suspend_report_result(__func__, fn, ret);		\
 	} while (0)
 
-extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
+extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -572,7 +572,10 @@ static inline int dpm_suspend_start(pm_message_t state)
 
 #define suspend_report_result(fn, ret)		do {} while (0)
 
-static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
+static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
+{
+	return 0;
+}
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
-- 
cgit v1.2.3


From 69d44ffbd772bede8c2a6d182e6e14f94826520b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 25 Sep 2010 23:34:22 +0200
Subject: sysfs: Add sysfs_merge_group() and sysfs_unmerge_group()

This patch (as1420) adds sysfs_merge_group() and sysfs_unmerge_group()
functions, allowing drivers easily to add and remove sets of
attributes to a pre-existing attribute group directory.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/sysfs/group.c      | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 15 +++++++++++++
 2 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 23c1e598792a..442f34ff1af8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -148,6 +148,65 @@ void sysfs_remove_group(struct kobject * kobj,
 	sysfs_put(sd);
 }
 
+/**
+ * sysfs_merge_group - merge files into a pre-existing attribute group.
+ * @kobj:	The kobject containing the group.
+ * @grp:	The files to create and the attribute group they belong to.
+ *
+ * This function returns an error if the group doesn't exist or any of the
+ * files already exist in that group, in which case none of the new files
+ * are created.
+ */
+int sysfs_merge_group(struct kobject *kobj,
+		       const struct attribute_group *grp)
+{
+	struct sysfs_dirent *dir_sd;
+	int error = 0;
+	struct attribute *const *attr;
+	int i;
+
+	if (grp)
+		dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	else
+		dir_sd = sysfs_get(kobj->sd);
+	if (!dir_sd)
+		return -ENOENT;
+
+	for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
+		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+	if (error) {
+		while (--i >= 0)
+			sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name);
+	}
+	sysfs_put(dir_sd);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(sysfs_merge_group);
+
+/**
+ * sysfs_unmerge_group - remove files from a pre-existing attribute group.
+ * @kobj:	The kobject containing the group.
+ * @grp:	The files to remove and the attribute group they belong to.
+ */
+void sysfs_unmerge_group(struct kobject *kobj,
+		       const struct attribute_group *grp)
+{
+	struct sysfs_dirent *dir_sd;
+	struct attribute *const *attr;
+
+	if (grp)
+		dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name);
+	else
+		dir_sd = sysfs_get(kobj->sd);
+	if (dir_sd) {
+		for (attr = grp->attrs; *attr; ++attr)
+			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+		sysfs_put(dir_sd);
+	}
+}
+EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
+
 
 EXPORT_SYMBOL_GPL(sysfs_create_group);
 EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 96eb576d82fd..30b881555fa5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 			const struct attribute *attr, const char *group);
 void sysfs_remove_file_from_group(struct kobject *kobj,
 			const struct attribute *attr, const char *group);
+int sysfs_merge_group(struct kobject *kobj,
+		       const struct attribute_group *grp);
+void sysfs_unmerge_group(struct kobject *kobj,
+		       const struct attribute_group *grp);
 
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
@@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj,
 {
 }
 
+static inline int sysfs_merge_group(struct kobject *kobj,
+		       const struct attribute_group *grp)
+{
+	return 0;
+}
+
+static inline void sysfs_unmerge_group(struct kobject *kobj,
+		       const struct attribute_group *grp)
+{
+}
+
 static inline void sysfs_notify(struct kobject *kobj, const char *dir,
 				const char *attr)
 {
-- 
cgit v1.2.3


From 3f9af0513ae5b1f185302c2d0ba656640926d970 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 25 Sep 2010 23:34:54 +0200
Subject: PM / Runtime: Replace boolean arguments with bitflags

The "from_wq" argument in __pm_runtime_suspend() and
__pm_runtime_resume() supposedly indicates whether or not the function
was called by the PM workqueue thread, but in fact it isn't always
used this way.  It really indicates whether or not the function should
return early if the requested operation is already in progress.

Along with this badly-named boolean argument, later patches in this
series will add several other boolean arguments to these functions and
others.  Therefore this patch (as1422) begins the conversion process
by replacing from_wq with a bitflag argument.  The same bitflags are
also used in __pm_runtime_get() and __pm_runtime_put(), where they
indicate whether or not the operation should be asynchronous.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/runtime.c | 75 +++++++++++++++++++++++---------------------
 include/linux/pm_runtime.h   | 23 +++++++++-----
 2 files changed, 54 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index ec08f1ae63f1..0c1db879544b 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -10,7 +10,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/jiffies.h>
 
-static int __pm_runtime_resume(struct device *dev, bool from_wq);
+static int __pm_runtime_resume(struct device *dev, int rpmflags);
 static int __pm_request_idle(struct device *dev);
 static int __pm_request_resume(struct device *dev);
 
@@ -164,24 +164,24 @@ EXPORT_SYMBOL_GPL(pm_runtime_idle);
 /**
  * __pm_runtime_suspend - Carry out run-time suspend of given device.
  * @dev: Device to suspend.
- * @from_wq: If set, the function has been called via pm_wq.
+ * @rpmflags: Flag bits.
  *
  * Check if the device can be suspended and run the ->runtime_suspend() callback
- * provided by its bus type.  If another suspend has been started earlier, wait
- * for it to finish.  If an idle notification or suspend request is pending or
+ * provided by its bus type.  If another suspend has been started earlier,
+ * either return immediately or wait for it to finish, depending on the
+ * RPM_NOWAIT flag.  If an idle notification or suspend request is pending or
  * scheduled, cancel it.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-int __pm_runtime_suspend(struct device *dev, bool from_wq)
+static int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	struct device *parent = NULL;
 	bool notify = false;
 	int retval = 0;
 
-	dev_dbg(dev, "__pm_runtime_suspend()%s!\n",
-		from_wq ? " from workqueue" : "");
+	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
 
  repeat:
 	if (dev->power.runtime_error) {
@@ -213,7 +213,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	if (dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
 
-		if (from_wq) {
+		if (rpmflags & RPM_NOWAIT) {
 			retval = -EINPROGRESS;
 			goto out;
 		}
@@ -286,7 +286,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (dev->power.deferred_resume) {
-		__pm_runtime_resume(dev, false);
+		__pm_runtime_resume(dev, 0);
 		retval = -EAGAIN;
 		goto out;
 	}
@@ -303,7 +303,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	}
 
  out:
-	dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval);
+	dev_dbg(dev, "%s returns %d\n", __func__, retval);
 
 	return retval;
 }
@@ -317,7 +317,7 @@ int pm_runtime_suspend(struct device *dev)
 	int retval;
 
 	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_suspend(dev, false);
+	retval = __pm_runtime_suspend(dev, 0);
 	spin_unlock_irq(&dev->power.lock);
 
 	return retval;
@@ -327,24 +327,25 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspend);
 /**
  * __pm_runtime_resume - Carry out run-time resume of given device.
  * @dev: Device to resume.
- * @from_wq: If set, the function has been called via pm_wq.
+ * @rpmflags: Flag bits.
  *
  * Check if the device can be woken up and run the ->runtime_resume() callback
- * provided by its bus type.  If another resume has been started earlier, wait
- * for it to finish.  If there's a suspend running in parallel with this
- * function, wait for it to finish and resume the device.  Cancel any scheduled
- * or pending requests.
+ * provided by its bus type.  If another resume has been started earlier,
+ * either return imediately or wait for it to finish, depending on the
+ * RPM_NOWAIT flag.  If there's a suspend running in parallel with this
+ * function, either tell the other process to resume after suspending
+ * (deferred_resume) or wait for it to finish, depending on the RPM_NOWAIT
+ * flag.  Cancel any scheduled or pending requests.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-int __pm_runtime_resume(struct device *dev, bool from_wq)
+static int __pm_runtime_resume(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	struct device *parent = NULL;
 	int retval = 0;
 
-	dev_dbg(dev, "__pm_runtime_resume()%s!\n",
-		from_wq ? " from workqueue" : "");
+	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
 
  repeat:
 	if (dev->power.runtime_error) {
@@ -365,7 +366,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 	    || dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
 
-		if (from_wq) {
+		if (rpmflags & RPM_NOWAIT) {
 			if (dev->power.runtime_status == RPM_SUSPENDING)
 				dev->power.deferred_resume = true;
 			retval = -EINPROGRESS;
@@ -407,7 +408,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		 */
 		if (!parent->power.disable_depth
 		    && !parent->power.ignore_children) {
-			__pm_runtime_resume(parent, false);
+			__pm_runtime_resume(parent, 0);
 			if (parent->power.runtime_status != RPM_ACTIVE)
 				retval = -EBUSY;
 		}
@@ -470,7 +471,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		spin_lock_irq(&dev->power.lock);
 	}
 
-	dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval);
+	dev_dbg(dev, "%s returns %d\n", __func__, retval);
 
 	return retval;
 }
@@ -484,7 +485,7 @@ int pm_runtime_resume(struct device *dev)
 	int retval;
 
 	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_resume(dev, false);
+	retval = __pm_runtime_resume(dev, 0);
 	spin_unlock_irq(&dev->power.lock);
 
 	return retval;
@@ -519,10 +520,10 @@ static void pm_runtime_work(struct work_struct *work)
 		__pm_runtime_idle(dev);
 		break;
 	case RPM_REQ_SUSPEND:
-		__pm_runtime_suspend(dev, true);
+		__pm_runtime_suspend(dev, RPM_NOWAIT);
 		break;
 	case RPM_REQ_RESUME:
-		__pm_runtime_resume(dev, true);
+		__pm_runtime_resume(dev, RPM_NOWAIT);
 		break;
 	}
 
@@ -782,17 +783,18 @@ EXPORT_SYMBOL_GPL(pm_request_resume);
 /**
  * __pm_runtime_get - Reference count a device and wake it up, if necessary.
  * @dev: Device to handle.
- * @sync: If set and the device is suspended, resume it synchronously.
+ * @rpmflags: Flag bits.
  *
  * Increment the usage count of the device and resume it or submit a resume
- * request for it, depending on the value of @sync.
+ * request for it, depending on the RPM_ASYNC flag bit.
  */
-int __pm_runtime_get(struct device *dev, bool sync)
+int __pm_runtime_get(struct device *dev, int rpmflags)
 {
 	int retval;
 
 	atomic_inc(&dev->power.usage_count);
-	retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev);
+	retval = (rpmflags & RPM_ASYNC) ?
+	    pm_request_resume(dev) : pm_runtime_resume(dev);
 
 	return retval;
 }
@@ -801,18 +803,19 @@ EXPORT_SYMBOL_GPL(__pm_runtime_get);
 /**
  * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
  * @dev: Device to handle.
- * @sync: If the device's bus type is to be notified, do that synchronously.
+ * @rpmflags: Flag bits.
  *
  * Decrement the usage count of the device and if it reaches zero, carry out a
  * synchronous idle notification or submit an idle notification request for it,
- * depending on the value of @sync.
+ * depending on the RPM_ASYNC flag bit.
  */
-int __pm_runtime_put(struct device *dev, bool sync)
+int __pm_runtime_put(struct device *dev, int rpmflags)
 {
 	int retval = 0;
 
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev);
+		retval = (rpmflags & RPM_ASYNC) ?
+		    pm_request_idle(dev) : pm_runtime_idle(dev);
 
 	return retval;
 }
@@ -967,7 +970,7 @@ int pm_runtime_barrier(struct device *dev)
 
 	if (dev->power.request_pending
 	    && dev->power.request == RPM_REQ_RESUME) {
-		__pm_runtime_resume(dev, false);
+		__pm_runtime_resume(dev, 0);
 		retval = 1;
 	}
 
@@ -1016,7 +1019,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 		 */
 		pm_runtime_get_noresume(dev);
 
-		__pm_runtime_resume(dev, false);
+		__pm_runtime_resume(dev, 0);
 
 		pm_runtime_put_noidle(dev);
 	}
@@ -1064,7 +1067,7 @@ void pm_runtime_forbid(struct device *dev)
 
 	dev->power.runtime_auto = false;
 	atomic_inc(&dev->power.usage_count);
-	__pm_runtime_resume(dev, false);
+	__pm_runtime_resume(dev, 0);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 6e81888c6222..c030cac59aac 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -12,6 +12,11 @@
 #include <linux/device.h>
 #include <linux/pm.h>
 
+/* Runtime PM flag argument bits */
+#define RPM_ASYNC		0x01	/* Request is asynchronous */
+#define RPM_NOWAIT		0x02	/* Don't wait for concurrent
+					    state change */
+
 #ifdef CONFIG_PM_RUNTIME
 
 extern struct workqueue_struct *pm_wq;
@@ -22,8 +27,8 @@ extern int pm_runtime_resume(struct device *dev);
 extern int pm_request_idle(struct device *dev);
 extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
 extern int pm_request_resume(struct device *dev);
-extern int __pm_runtime_get(struct device *dev, bool sync);
-extern int __pm_runtime_put(struct device *dev, bool sync);
+extern int __pm_runtime_get(struct device *dev, int rpmflags);
+extern int __pm_runtime_put(struct device *dev, int rpmflags);
 extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
 extern int pm_runtime_barrier(struct device *dev);
 extern void pm_runtime_enable(struct device *dev);
@@ -81,8 +86,10 @@ static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
 	return -ENOSYS;
 }
 static inline int pm_request_resume(struct device *dev) { return 0; }
-static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; }
-static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; }
+static inline int __pm_runtime_get(struct device *dev, int rpmflags)
+					{ return 1; }
+static inline int __pm_runtime_put(struct device *dev, int rpmflags)
+					{ return 0; }
 static inline int __pm_runtime_set_status(struct device *dev,
 					    unsigned int status) { return 0; }
 static inline int pm_runtime_barrier(struct device *dev) { return 0; }
@@ -107,22 +114,22 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
 
 static inline int pm_runtime_get(struct device *dev)
 {
-	return __pm_runtime_get(dev, false);
+	return __pm_runtime_get(dev, RPM_ASYNC);
 }
 
 static inline int pm_runtime_get_sync(struct device *dev)
 {
-	return __pm_runtime_get(dev, true);
+	return __pm_runtime_get(dev, 0);
 }
 
 static inline int pm_runtime_put(struct device *dev)
 {
-	return __pm_runtime_put(dev, false);
+	return __pm_runtime_put(dev, RPM_ASYNC);
 }
 
 static inline int pm_runtime_put_sync(struct device *dev)
 {
-	return __pm_runtime_put(dev, true);
+	return __pm_runtime_put(dev, 0);
 }
 
 static inline int pm_runtime_set_active(struct device *dev)
-- 
cgit v1.2.3


From 140a6c945211ee911dec776fafa52e03a7d7bb9a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 25 Sep 2010 23:35:07 +0200
Subject: PM / Runtime: Combine runtime PM entry points

This patch (as1424) combines the various public entry points for the
runtime PM routines into three simple functions: one for idle, one for
suspend, and one for resume.  A new bitflag specifies whether or not
to increment or decrement the usage_count field.

The new entry points are named __pm_runtime_idle,
__pm_runtime_suspend, and __pm_runtime_resume, to reflect that they
are trampolines.  Simultaneously, the corresponding internal routines
are renamed to rpm_idle, rpm_suspend, and rpm_resume.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/runtime.c | 174 ++++++++++++++++---------------------------
 include/linux/pm_runtime.h   |  66 +++++++++++-----
 2 files changed, 110 insertions(+), 130 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index d7b5d84c235c..ed227b7c1bb5 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -11,7 +11,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/jiffies.h>
 
-static int __pm_runtime_resume(struct device *dev, int rpmflags);
+static int rpm_resume(struct device *dev, int rpmflags);
 
 /**
  * update_pm_runtime_accounting - Update the time accounting of power states
@@ -107,7 +107,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
 
 
 /**
- * __pm_runtime_idle - Notify device bus type if the device can be suspended.
+ * rpm_idle - Notify device bus type if the device can be suspended.
  * @dev: Device to notify the bus type about.
  * @rpmflags: Flag bits.
  *
@@ -118,7 +118,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-static int __pm_runtime_idle(struct device *dev, int rpmflags)
+static int rpm_idle(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	int retval;
@@ -189,23 +189,7 @@ static int __pm_runtime_idle(struct device *dev, int rpmflags)
 }
 
 /**
- * pm_runtime_idle - Notify device bus type if the device can be suspended.
- * @dev: Device to notify the bus type about.
- */
-int pm_runtime_idle(struct device *dev)
-{
-	int retval;
-
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_idle(dev, 0);
-	spin_unlock_irq(&dev->power.lock);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_runtime_idle);
-
-/**
- * __pm_runtime_suspend - Carry out run-time suspend of given device.
+ * rpm_suspend - Carry out run-time suspend of given device.
  * @dev: Device to suspend.
  * @rpmflags: Flag bits.
  *
@@ -220,7 +204,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_idle);
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-static int __pm_runtime_suspend(struct device *dev, int rpmflags)
+static int rpm_suspend(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	struct device *parent = NULL;
@@ -332,13 +316,13 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (dev->power.deferred_resume) {
-		__pm_runtime_resume(dev, 0);
+		rpm_resume(dev, 0);
 		retval = -EAGAIN;
 		goto out;
 	}
 
 	if (notify)
-		__pm_runtime_idle(dev, 0);
+		rpm_idle(dev, 0);
 
 	if (parent && !parent->power.ignore_children) {
 		spin_unlock_irq(&dev->power.lock);
@@ -355,23 +339,7 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags)
 }
 
 /**
- * pm_runtime_suspend - Carry out run-time suspend of given device.
- * @dev: Device to suspend.
- */
-int pm_runtime_suspend(struct device *dev)
-{
-	int retval;
-
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_suspend(dev, 0);
-	spin_unlock_irq(&dev->power.lock);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_runtime_suspend);
-
-/**
- * __pm_runtime_resume - Carry out run-time resume of given device.
+ * rpm_resume - Carry out run-time resume of given device.
  * @dev: Device to resume.
  * @rpmflags: Flag bits.
  *
@@ -387,7 +355,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspend);
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-static int __pm_runtime_resume(struct device *dev, int rpmflags)
+static int rpm_resume(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	struct device *parent = NULL;
@@ -469,7 +437,7 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags)
 		 */
 		if (!parent->power.disable_depth
 		    && !parent->power.ignore_children) {
-			__pm_runtime_resume(parent, 0);
+			rpm_resume(parent, 0);
 			if (parent->power.runtime_status != RPM_ACTIVE)
 				retval = -EBUSY;
 		}
@@ -521,7 +489,7 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (!retval)
-		__pm_runtime_idle(dev, RPM_ASYNC);
+		rpm_idle(dev, RPM_ASYNC);
 
  out:
 	if (parent) {
@@ -537,22 +505,6 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags)
 	return retval;
 }
 
-/**
- * pm_runtime_resume - Carry out run-time resume of given device.
- * @dev: Device to suspend.
- */
-int pm_runtime_resume(struct device *dev)
-{
-	int retval;
-
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_resume(dev, 0);
-	spin_unlock_irq(&dev->power.lock);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_runtime_resume);
-
 /**
  * pm_runtime_work - Universal run-time PM work function.
  * @work: Work structure used for scheduling the execution of this function.
@@ -578,13 +530,13 @@ static void pm_runtime_work(struct work_struct *work)
 	case RPM_REQ_NONE:
 		break;
 	case RPM_REQ_IDLE:
-		__pm_runtime_idle(dev, RPM_NOWAIT);
+		rpm_idle(dev, RPM_NOWAIT);
 		break;
 	case RPM_REQ_SUSPEND:
-		__pm_runtime_suspend(dev, RPM_NOWAIT);
+		rpm_suspend(dev, RPM_NOWAIT);
 		break;
 	case RPM_REQ_RESUME:
-		__pm_runtime_resume(dev, RPM_NOWAIT);
+		rpm_resume(dev, RPM_NOWAIT);
 		break;
 	}
 
@@ -592,23 +544,6 @@ static void pm_runtime_work(struct work_struct *work)
 	spin_unlock_irq(&dev->power.lock);
 }
 
-/**
- * pm_request_idle - Submit an idle notification request for given device.
- * @dev: Device to handle.
- */
-int pm_request_idle(struct device *dev)
-{
-	unsigned long flags;
-	int retval;
-
-	spin_lock_irqsave(&dev->power.lock, flags);
-	retval = __pm_runtime_idle(dev, RPM_ASYNC);
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_request_idle);
-
 /**
  * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
  * @data: Device pointer passed by pm_schedule_suspend().
@@ -627,7 +562,7 @@ static void pm_suspend_timer_fn(unsigned long data)
 	/* If 'expire' is after 'jiffies' we've been called too early. */
 	if (expires > 0 && !time_after(expires, jiffies)) {
 		dev->power.timer_expires = 0;
-		__pm_runtime_suspend(dev, RPM_ASYNC);
+		rpm_suspend(dev, RPM_ASYNC);
 	}
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -646,7 +581,7 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 	spin_lock_irqsave(&dev->power.lock, flags);
 
 	if (!delay) {
-		retval = __pm_runtime_suspend(dev, RPM_ASYNC);
+		retval = rpm_suspend(dev, RPM_ASYNC);
 		goto out;
 	}
 
@@ -669,62 +604,81 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
 /**
- * pm_request_resume - Submit a resume request for given device.
- * @dev: Device to resume.
+ * __pm_runtime_idle - Entry point for run-time idle operations.
+ * @dev: Device to send idle notification for.
+ * @rpmflags: Flag bits.
+ *
+ * If the RPM_GET_PUT flag is set, decrement the device's usage count and
+ * return immediately if it is larger than zero.  Then carry out an idle
+ * notification, either synchronous or asynchronous.
+ *
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-int pm_request_resume(struct device *dev)
+int __pm_runtime_idle(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
 
+	if (rpmflags & RPM_GET_PUT) {
+		if (!atomic_dec_and_test(&dev->power.usage_count))
+			return 0;
+	}
+
 	spin_lock_irqsave(&dev->power.lock, flags);
-	retval = __pm_runtime_resume(dev, RPM_ASYNC);
+	retval = rpm_idle(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(pm_request_resume);
+EXPORT_SYMBOL_GPL(__pm_runtime_idle);
 
 /**
- * __pm_runtime_get - Reference count a device and wake it up, if necessary.
- * @dev: Device to handle.
+ * __pm_runtime_suspend - Entry point for run-time put/suspend operations.
+ * @dev: Device to suspend.
  * @rpmflags: Flag bits.
  *
- * Increment the usage count of the device and resume it or submit a resume
- * request for it, depending on the RPM_ASYNC flag bit.
+ * Carry out a suspend, either synchronous or asynchronous.
+ *
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-int __pm_runtime_get(struct device *dev, int rpmflags)
+int __pm_runtime_suspend(struct device *dev, int rpmflags)
 {
+	unsigned long flags;
 	int retval;
 
-	atomic_inc(&dev->power.usage_count);
-	retval = (rpmflags & RPM_ASYNC) ?
-	    pm_request_resume(dev) : pm_runtime_resume(dev);
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = rpm_suspend(dev, rpmflags);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(__pm_runtime_get);
+EXPORT_SYMBOL_GPL(__pm_runtime_suspend);
 
 /**
- * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
- * @dev: Device to handle.
+ * __pm_runtime_resume - Entry point for run-time resume operations.
+ * @dev: Device to resume.
  * @rpmflags: Flag bits.
  *
- * Decrement the usage count of the device and if it reaches zero, carry out a
- * synchronous idle notification or submit an idle notification request for it,
- * depending on the RPM_ASYNC flag bit.
+ * If the RPM_GET_PUT flag is set, increment the device's usage count.  Then
+ * carry out a resume, either synchronous or asynchronous.
+ *
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-int __pm_runtime_put(struct device *dev, int rpmflags)
+int __pm_runtime_resume(struct device *dev, int rpmflags)
 {
-	int retval = 0;
+	unsigned long flags;
+	int retval;
 
-	if (atomic_dec_and_test(&dev->power.usage_count))
-		retval = (rpmflags & RPM_ASYNC) ?
-		    pm_request_idle(dev) : pm_runtime_idle(dev);
+	if (rpmflags & RPM_GET_PUT)
+		atomic_inc(&dev->power.usage_count);
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = rpm_resume(dev, rpmflags);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(__pm_runtime_put);
+EXPORT_SYMBOL_GPL(__pm_runtime_resume);
 
 /**
  * __pm_runtime_set_status - Set run-time PM status of a device.
@@ -875,7 +829,7 @@ int pm_runtime_barrier(struct device *dev)
 
 	if (dev->power.request_pending
 	    && dev->power.request == RPM_REQ_RESUME) {
-		__pm_runtime_resume(dev, 0);
+		rpm_resume(dev, 0);
 		retval = 1;
 	}
 
@@ -924,7 +878,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 		 */
 		pm_runtime_get_noresume(dev);
 
-		__pm_runtime_resume(dev, 0);
+		rpm_resume(dev, 0);
 
 		pm_runtime_put_noidle(dev);
 	}
@@ -972,7 +926,7 @@ void pm_runtime_forbid(struct device *dev)
 
 	dev->power.runtime_auto = false;
 	atomic_inc(&dev->power.usage_count);
-	__pm_runtime_resume(dev, 0);
+	rpm_resume(dev, 0);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -993,7 +947,7 @@ void pm_runtime_allow(struct device *dev)
 
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		__pm_runtime_idle(dev, 0);
+		rpm_idle(dev, 0);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index c030cac59aac..5869d87fffac 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -16,19 +16,17 @@
 #define RPM_ASYNC		0x01	/* Request is asynchronous */
 #define RPM_NOWAIT		0x02	/* Don't wait for concurrent
 					    state change */
+#define RPM_GET_PUT		0x04	/* Increment/decrement the
+					    usage_count */
 
 #ifdef CONFIG_PM_RUNTIME
 
 extern struct workqueue_struct *pm_wq;
 
-extern int pm_runtime_idle(struct device *dev);
-extern int pm_runtime_suspend(struct device *dev);
-extern int pm_runtime_resume(struct device *dev);
-extern int pm_request_idle(struct device *dev);
+extern int __pm_runtime_idle(struct device *dev, int rpmflags);
+extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
+extern int __pm_runtime_resume(struct device *dev, int rpmflags);
 extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
-extern int pm_request_resume(struct device *dev);
-extern int __pm_runtime_get(struct device *dev, int rpmflags);
-extern int __pm_runtime_put(struct device *dev, int rpmflags);
 extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
 extern int pm_runtime_barrier(struct device *dev);
 extern void pm_runtime_enable(struct device *dev);
@@ -77,19 +75,22 @@ static inline bool pm_runtime_suspended(struct device *dev)
 
 #else /* !CONFIG_PM_RUNTIME */
 
-static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
-static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; }
-static inline int pm_runtime_resume(struct device *dev) { return 0; }
-static inline int pm_request_idle(struct device *dev) { return -ENOSYS; }
+static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
+{
+	return -ENOSYS;
+}
+static inline int __pm_runtime_suspend(struct device *dev, int rpmflags)
+{
+	return -ENOSYS;
+}
+static inline int __pm_runtime_resume(struct device *dev, int rpmflags)
+{
+	return 1;
+}
 static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
 {
 	return -ENOSYS;
 }
-static inline int pm_request_resume(struct device *dev) { return 0; }
-static inline int __pm_runtime_get(struct device *dev, int rpmflags)
-					{ return 1; }
-static inline int __pm_runtime_put(struct device *dev, int rpmflags)
-					{ return 0; }
 static inline int __pm_runtime_set_status(struct device *dev,
 					    unsigned int status) { return 0; }
 static inline int pm_runtime_barrier(struct device *dev) { return 0; }
@@ -112,24 +113,49 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
 
 #endif /* !CONFIG_PM_RUNTIME */
 
+static inline int pm_runtime_idle(struct device *dev)
+{
+	return __pm_runtime_idle(dev, 0);
+}
+
+static inline int pm_runtime_suspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev, 0);
+}
+
+static inline int pm_runtime_resume(struct device *dev)
+{
+	return __pm_runtime_resume(dev, 0);
+}
+
+static inline int pm_request_idle(struct device *dev)
+{
+	return __pm_runtime_idle(dev, RPM_ASYNC);
+}
+
+static inline int pm_request_resume(struct device *dev)
+{
+	return __pm_runtime_resume(dev, RPM_ASYNC);
+}
+
 static inline int pm_runtime_get(struct device *dev)
 {
-	return __pm_runtime_get(dev, RPM_ASYNC);
+	return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
 static inline int pm_runtime_get_sync(struct device *dev)
 {
-	return __pm_runtime_get(dev, 0);
+	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
 static inline int pm_runtime_put(struct device *dev)
 {
-	return __pm_runtime_put(dev, RPM_ASYNC);
+	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
 static inline int pm_runtime_put_sync(struct device *dev)
 {
-	return __pm_runtime_put(dev, 0);
+	return __pm_runtime_idle(dev, RPM_GET_PUT);
 }
 
 static inline int pm_runtime_set_active(struct device *dev)
-- 
cgit v1.2.3


From 7490e44239e60293bca0c2663229050c36c660c2 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 25 Sep 2010 23:35:15 +0200
Subject: PM / Runtime: Add no_callbacks flag

Some devices, such as USB interfaces, cannot be power-managed
independently of their parents, i.e., they cannot be put in low power
while the parent remains at full power.  This patch (as1425) creates a
new "no_callbacks" flag, which tells the PM core not to invoke the
runtime-PM callback routines for the such devices but instead to
assume that the callbacks always succeed.  In addition, the
non-debugging runtime-PM sysfs attributes for the devices are removed,
since they are pretty much meaningless.

The advantage of this scheme comes not so much from avoiding the
callbacks themselves, but rather from the fact that without the need
for a process context in which to run the callbacks, more work can be
done in interrupt context.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/runtime_pm.txt | 37 +++++++++++++++++++++++++
 drivers/base/power/power.h         |  1 +
 drivers/base/power/runtime.c       | 54 +++++++++++++++++++++++++++++++++++-
 drivers/base/power/sysfs.c         | 56 +++++++++++++++++++++++++++++++++-----
 include/linux/pm.h                 |  7 +++++
 include/linux/pm_runtime.h         |  2 ++
 6 files changed, 149 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 55b859b3bc72..9ba49b21ac86 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -1,6 +1,7 @@
 Run-time Power Management Framework for I/O Devices
 
 (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+(C) 2010 Alan Stern <stern@rowland.harvard.edu>
 
 1. Introduction
 
@@ -230,6 +231,11 @@ defined in include/linux/pm.h:
       interface; it may only be modified with the help of the pm_runtime_allow()
       and pm_runtime_forbid() helper functions
 
+  unsigned int no_callbacks;
+    - indicates that the device does not use the run-time PM callbacks (see
+      Section 8); it may be modified only by the pm_runtime_no_callbacks()
+      helper function
+
 All of the above fields are members of the 'power' member of 'struct device'.
 
 4. Run-time PM Device Helper Functions
@@ -349,6 +355,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       counter (used by the /sys/devices/.../power/control interface to
       effectively prevent the device from being power managed at run time)
 
+  void pm_runtime_no_callbacks(struct device *dev);
+    - set the power.no_callbacks flag for the device and remove the run-time
+      PM attributes from /sys/devices/.../power (or prevent them from being
+      added when the device is registered)
+
 It is safe to execute the following helper functions from interrupt context:
 
 pm_request_idle()
@@ -524,3 +535,29 @@ poweroff and run-time suspend callback, and similarly for system resume, thaw,
 restore, and run-time resume, can achieve this with the help of the
 UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
 last argument to NULL).
+
+8. "No-Callback" Devices
+
+Some "devices" are only logical sub-devices of their parent and cannot be
+power-managed on their own.  (The prototype example is a USB interface.  Entire
+USB devices can go into low-power mode or send wake-up requests, but neither is
+possible for individual interfaces.)  The drivers for these devices have no
+need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend()
+and ->runtime_resume() would always return 0 without doing anything else and
+->runtime_idle() would always call pm_runtime_suspend().
+
+Subsystems can tell the PM core about these devices by calling
+pm_runtime_no_callbacks().  This should be done after the device structure is
+initialized and before it is registered (although after device registration is
+also okay).  The routine will set the device's power.no_callbacks flag and
+prevent the non-debugging run-time PM sysfs attributes from being created.
+
+When power.no_callbacks is set, the PM core will not invoke the
+->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
+Instead it will assume that suspends and resumes always succeed and that idle
+devices should be suspended.
+
+As a consequence, the PM core will never directly inform the device's subsystem
+or driver about run-time power changes.  Instead, the driver for the device's
+parent must take responsibility for telling the device's driver when the
+parent's power state changes.
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 8b2745c69e2a..698dde742587 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -60,6 +60,7 @@ static inline void device_pm_move_last(struct device *dev) {}
 
 extern int dpm_sysfs_add(struct device *);
 extern void dpm_sysfs_remove(struct device *);
+extern void rpm_sysfs_remove(struct device *);
 
 #else /* CONFIG_PM */
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index ed227b7c1bb5..5bd4daa93ef1 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -10,8 +10,10 @@
 #include <linux/sched.h>
 #include <linux/pm_runtime.h>
 #include <linux/jiffies.h>
+#include "power.h"
 
 static int rpm_resume(struct device *dev, int rpmflags);
+static int rpm_suspend(struct device *dev, int rpmflags);
 
 /**
  * update_pm_runtime_accounting - Update the time accounting of power states
@@ -148,6 +150,12 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Pending requests need to be canceled. */
 	dev->power.request = RPM_REQ_NONE;
 
+	if (dev->power.no_callbacks) {
+		/* Assume ->runtime_idle() callback would have suspended. */
+		retval = rpm_suspend(dev, rpmflags);
+		goto out;
+	}
+
 	/* Carry out an asynchronous or a synchronous idle notification. */
 	if (rpmflags & RPM_ASYNC) {
 		dev->power.request = RPM_REQ_IDLE;
@@ -254,6 +262,10 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 		goto repeat;
 	}
 
+	dev->power.deferred_resume = false;
+	if (dev->power.no_callbacks)
+		goto no_callback;	/* Assume success. */
+
 	/* Carry out an asynchronous or a synchronous suspend. */
 	if (rpmflags & RPM_ASYNC) {
 		dev->power.request = RPM_REQ_SUSPEND;
@@ -265,7 +277,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	}
 
 	__update_runtime_status(dev, RPM_SUSPENDING);
-	dev->power.deferred_resume = false;
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
 		spin_unlock_irq(&dev->power.lock);
@@ -305,6 +316,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 			pm_runtime_cancel_pending(dev);
 		}
 	} else {
+ no_callback:
 		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_deactivate_timer(dev);
 
@@ -409,6 +421,23 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		goto repeat;
 	}
 
+	/*
+	 * See if we can skip waking up the parent.  This is safe only if
+	 * power.no_callbacks is set, because otherwise we don't know whether
+	 * the resume will actually succeed.
+	 */
+	if (dev->power.no_callbacks && !parent && dev->parent) {
+		spin_lock(&dev->parent->power.lock);
+		if (dev->parent->power.disable_depth > 0
+		    || dev->parent->power.ignore_children
+		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
+			atomic_inc(&dev->parent->power.child_count);
+			spin_unlock(&dev->parent->power.lock);
+			goto no_callback;	/* Assume success. */
+		}
+		spin_unlock(&dev->parent->power.lock);
+	}
+
 	/* Carry out an asynchronous or a synchronous resume. */
 	if (rpmflags & RPM_ASYNC) {
 		dev->power.request = RPM_REQ_RESUME;
@@ -449,6 +478,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		goto repeat;
 	}
 
+	if (dev->power.no_callbacks)
+		goto no_callback;	/* Assume success. */
+
 	__update_runtime_status(dev, RPM_RESUMING);
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
@@ -482,6 +514,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_cancel_pending(dev);
 	} else {
+ no_callback:
 		__update_runtime_status(dev, RPM_ACTIVE);
 		if (parent)
 			atomic_inc(&parent->power.child_count);
@@ -954,6 +987,25 @@ void pm_runtime_allow(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_allow);
 
+/**
+ * pm_runtime_no_callbacks - Ignore run-time PM callbacks for a device.
+ * @dev: Device to handle.
+ *
+ * Set the power.no_callbacks flag, which tells the PM core that this
+ * device is power-managed through its parent and has no run-time PM
+ * callbacks of its own.  The run-time sysfs attributes will be removed.
+ *
+ */
+void pm_runtime_no_callbacks(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	dev->power.no_callbacks = 1;
+	spin_unlock_irq(&dev->power.lock);
+	if (device_is_registered(dev))
+		rpm_sysfs_remove(dev);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
+
 /**
  * pm_runtime_init - Initialize run-time PM fields in given device object.
  * @dev: Device object to initialize.
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 8859780817e1..b5708c47ce2d 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -81,6 +81,9 @@
 static const char enabled[] = "enabled";
 static const char disabled[] = "disabled";
 
+const char power_group_name[] = "power";
+EXPORT_SYMBOL_GPL(power_group_name);
+
 #ifdef CONFIG_PM_RUNTIME
 static const char ctrl_auto[] = "auto";
 static const char ctrl_on[] = "on";
@@ -390,12 +393,6 @@ static DEVICE_ATTR(async, 0644, async_show, async_store);
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 
 static struct attribute * power_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
-	&dev_attr_control.attr,
-	&dev_attr_runtime_status.attr,
-	&dev_attr_runtime_suspended_time.attr,
-	&dev_attr_runtime_active_time.attr,
-#endif
 	&dev_attr_wakeup.attr,
 #ifdef CONFIG_PM_SLEEP
 	&dev_attr_wakeup_count.attr,
@@ -409,6 +406,7 @@ static struct attribute * power_attrs[] = {
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_async.attr,
 #ifdef CONFIG_PM_RUNTIME
+	&dev_attr_runtime_status.attr,
 	&dev_attr_runtime_usage.attr,
 	&dev_attr_runtime_active_kids.attr,
 	&dev_attr_runtime_enabled.attr,
@@ -417,10 +415,52 @@ static struct attribute * power_attrs[] = {
 	NULL,
 };
 static struct attribute_group pm_attr_group = {
-	.name	= "power",
+	.name	= power_group_name,
 	.attrs	= power_attrs,
 };
 
+#ifdef CONFIG_PM_RUNTIME
+
+static struct attribute *runtime_attrs[] = {
+#ifndef CONFIG_PM_ADVANCED_DEBUG
+	&dev_attr_runtime_status.attr,
+#endif
+	&dev_attr_control.attr,
+	&dev_attr_runtime_suspended_time.attr,
+	&dev_attr_runtime_active_time.attr,
+	NULL,
+};
+static struct attribute_group pm_runtime_attr_group = {
+	.name	= power_group_name,
+	.attrs	= runtime_attrs,
+};
+
+int dpm_sysfs_add(struct device *dev)
+{
+	int rc;
+
+	rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
+	if (rc == 0 && !dev->power.no_callbacks) {
+		rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
+		if (rc)
+			sysfs_remove_group(&dev->kobj, &pm_attr_group);
+	}
+	return rc;
+}
+
+void rpm_sysfs_remove(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+}
+
+void dpm_sysfs_remove(struct device *dev)
+{
+	rpm_sysfs_remove(dev);
+	sysfs_remove_group(&dev->kobj, &pm_attr_group);
+}
+
+#else /* CONFIG_PM_RUNTIME */
+
 int dpm_sysfs_add(struct device * dev)
 {
 	return sysfs_create_group(&dev->kobj, &pm_attr_group);
@@ -430,3 +470,5 @@ void dpm_sysfs_remove(struct device * dev)
 {
 	sysfs_remove_group(&dev->kobj, &pm_attr_group);
 }
+
+#endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1abfe84f447d..abd81ffaba3c 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void);
 
 struct device;
 
+#ifdef CONFIG_PM
+extern const char power_group_name[];		/* = "power" */
+#else
+#define power_group_name	NULL
+#endif
+
 typedef struct pm_message {
 	int event;
 } pm_message_t;
@@ -475,6 +481,7 @@ struct dev_pm_info {
 	unsigned int		deferred_resume:1;
 	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
+	unsigned int		no_callbacks:1;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 5869d87fffac..8ca52f7c357e 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -36,6 +36,7 @@ extern void pm_runtime_forbid(struct device *dev);
 extern int pm_generic_runtime_idle(struct device *dev);
 extern int pm_generic_runtime_suspend(struct device *dev);
 extern int pm_generic_runtime_resume(struct device *dev);
+extern void pm_runtime_no_callbacks(struct device *dev);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -110,6 +111,7 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
+static inline void pm_runtime_no_callbacks(struct device *dev) {}
 
 #endif /* !CONFIG_PM_RUNTIME */
 
-- 
cgit v1.2.3


From 15bcb91d7e607d8a2e060f01f7784a7454668da4 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 25 Sep 2010 23:35:21 +0200
Subject: PM / Runtime: Implement autosuspend support

This patch (as1427) implements the "autosuspend" facility for runtime
PM.  A few new fields are added to the dev_pm_info structure and
several new PM helper functions are defined, for telling the PM core
whether or not a device uses autosuspend, for setting the autosuspend
delay, and for marking periods of device activity.

Drivers that do not want to use autosuspend can continue using the
same helper functions as before; their behavior will not change.  In
addition, drivers supporting autosuspend can also call the old helper
functions to get the old behavior.

The details are all explained in Documentation/power/runtime_pm.txt
and Documentation/ABI/testing/sysfs-devices-power.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/ABI/testing/sysfs-devices-power |  18 +++
 Documentation/power/runtime_pm.txt            | 190 +++++++++++++++++++++++++-
 drivers/base/power/runtime.c                  | 186 ++++++++++++++++++++++++-
 drivers/base/power/sysfs.c                    |  40 ++++++
 include/linux/pm.h                            |   8 ++
 include/linux/pm_runtime.h                    |  45 ++++++
 6 files changed, 473 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 6bb2dd3c3a71..7628cd1bc36a 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -147,3 +147,21 @@ Description:
 		milliseconds.  This attribute is read-only.  If the device is
 		not enabled to wake up the system from sleep states, this
 		attribute is empty.
+
+What:		/sys/devices/.../power/autosuspend_delay_ms
+Date:		September 2010
+Contact:	Alan Stern <stern@rowland.harvard.edu>
+Description:
+		The /sys/devices/.../power/autosuspend_delay_ms attribute
+		contains the autosuspend delay value (in milliseconds).  Some
+		drivers do not want their device to suspend as soon as it
+		becomes idle at run time; they want the device to remain
+		inactive for a certain minimum period of time first.  That
+		period is called the autosuspend delay.  Negative values will
+		prevent the device from being suspended at run time (similar
+		to writing "on" to the power/control attribute).  Values >=
+		1000 will cause the autosuspend timer expiration to be rounded
+		up to the nearest second.
+
+		Not all drivers support this attribute.  If it isn't supported,
+		attempts to read or write it will yield I/O errors.
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 9ba49b21ac86..489e9bacd165 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -158,7 +158,8 @@ rules:
     to execute it, the other callbacks will not be executed for the same device.
 
   * A request to execute ->runtime_resume() will cancel any pending or
-    scheduled requests to execute the other callbacks for the same device.
+    scheduled requests to execute the other callbacks for the same device,
+    except for scheduled autosuspends.
 
 3. Run-time PM Device Fields
 
@@ -166,7 +167,7 @@ The following device run-time PM fields are present in 'struct dev_pm_info', as
 defined in include/linux/pm.h:
 
   struct timer_list suspend_timer;
-    - timer used for scheduling (delayed) suspend request
+    - timer used for scheduling (delayed) suspend and autosuspend requests
 
   unsigned long timer_expires;
     - timer expiration time, in jiffies (if this is different from zero, the
@@ -236,6 +237,23 @@ defined in include/linux/pm.h:
       Section 8); it may be modified only by the pm_runtime_no_callbacks()
       helper function
 
+  unsigned int use_autosuspend;
+    - indicates that the device's driver supports delayed autosuspend (see
+      Section 9); it may be modified only by the
+      pm_runtime{_dont}_use_autosuspend() helper functions
+
+  unsigned int timer_autosuspends;
+    - indicates that the PM core should attempt to carry out an autosuspend
+      when the timer expires rather than a normal suspend
+
+  int autosuspend_delay;
+    - the delay time (in milliseconds) to be used for autosuspend
+
+  unsigned long last_busy;
+    - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
+      function was last called for this device; used in calculating inactivity
+      periods for autosuspend
+
 All of the above fields are members of the 'power' member of 'struct device'.
 
 4. Run-time PM Device Helper Functions
@@ -261,6 +279,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
       to suspend the device again in future
 
+  int pm_runtime_autosuspend(struct device *dev);
+    - same as pm_runtime_suspend() except that the autosuspend delay is taken
+      into account; if pm_runtime_autosuspend_expiration() says the delay has
+      not yet expired then an autosuspend is scheduled for the appropriate time
+      and 0 is returned
+
   int pm_runtime_resume(struct device *dev);
     - execute the subsystem-level resume callback for the device; returns 0 on
       success, 1 if the device's run-time PM status was already 'active' or
@@ -273,6 +297,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       device (the request is represented by a work item in pm_wq); returns 0 on
       success or error code if the request has not been queued up
 
+  int pm_request_autosuspend(struct device *dev);
+    - schedule the execution of the subsystem-level suspend callback for the
+      device when the autosuspend delay has expired; if the delay has already
+      expired then the work item is queued up immediately
+
   int pm_schedule_suspend(struct device *dev, unsigned int delay);
     - schedule the execution of the subsystem-level suspend callback for the
       device in future, where 'delay' is the time to wait before queuing up a
@@ -304,12 +333,20 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
     - decrement the device's usage counter
 
   int pm_runtime_put(struct device *dev);
-    - decrement the device's usage counter, run pm_request_idle(dev) and return
-      its result
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_request_idle(dev) and return its result
+
+  int pm_runtime_put_autosuspend(struct device *dev);
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_request_autosuspend(dev) and return its result
 
   int pm_runtime_put_sync(struct device *dev);
-    - decrement the device's usage counter, run pm_runtime_idle(dev) and return
-      its result
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_idle(dev) and return its result
+
+  int pm_runtime_put_sync_autosuspend(struct device *dev);
+    - decrement the device's usage counter; if the result is 0 then run
+      pm_runtime_autosuspend(dev) and return its result
 
   void pm_runtime_enable(struct device *dev);
     - enable the run-time PM helper functions to run the device bus type's
@@ -360,19 +397,46 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       PM attributes from /sys/devices/.../power (or prevent them from being
       added when the device is registered)
 
+  void pm_runtime_mark_last_busy(struct device *dev);
+    - set the power.last_busy field to the current time
+
+  void pm_runtime_use_autosuspend(struct device *dev);
+    - set the power.use_autosuspend flag, enabling autosuspend delays
+
+  void pm_runtime_dont_use_autosuspend(struct device *dev);
+    - clear the power.use_autosuspend flag, disabling autosuspend delays
+
+  void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
+    - set the power.autosuspend_delay value to 'delay' (expressed in
+      milliseconds); if 'delay' is negative then run-time suspends are
+      prevented
+
+  unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
+    - calculate the time when the current autosuspend delay period will expire,
+      based on power.last_busy and power.autosuspend_delay; if the delay time
+      is 1000 ms or larger then the expiration time is rounded up to the
+      nearest second; returns 0 if the delay period has already expired or
+      power.use_autosuspend isn't set, otherwise returns the expiration time
+      in jiffies
+
 It is safe to execute the following helper functions from interrupt context:
 
 pm_request_idle()
+pm_request_autosuspend()
 pm_schedule_suspend()
 pm_request_resume()
 pm_runtime_get_noresume()
 pm_runtime_get()
 pm_runtime_put_noidle()
 pm_runtime_put()
+pm_runtime_put_autosuspend()
+pm_runtime_enable()
 pm_suspend_ignore_children()
 pm_runtime_set_active()
 pm_runtime_set_suspended()
-pm_runtime_enable()
+pm_runtime_suspended()
+pm_runtime_mark_last_busy()
+pm_runtime_autosuspend_expiration()
 
 5. Run-time PM Initialization, Device Probing and Removal
 
@@ -561,3 +625,115 @@ As a consequence, the PM core will never directly inform the device's subsystem
 or driver about run-time power changes.  Instead, the driver for the device's
 parent must take responsibility for telling the device's driver when the
 parent's power state changes.
+
+9. Autosuspend, or automatically-delayed suspends
+
+Changing a device's power state isn't free; it requires both time and energy.
+A device should be put in a low-power state only when there's some reason to
+think it will remain in that state for a substantial time.  A common heuristic
+says that a device which hasn't been used for a while is liable to remain
+unused; following this advice, drivers should not allow devices to be suspended
+at run-time until they have been inactive for some minimum period.  Even when
+the heuristic ends up being non-optimal, it will still prevent devices from
+"bouncing" too rapidly between low-power and full-power states.
+
+The term "autosuspend" is an historical remnant.  It doesn't mean that the
+device is automatically suspended (the subsystem or driver still has to call
+the appropriate PM routines); rather it means that run-time suspends will
+automatically be delayed until the desired period of inactivity has elapsed.
+
+Inactivity is determined based on the power.last_busy field.  Drivers should
+call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
+typically just before calling pm_runtime_put_autosuspend().  The desired length
+of the inactivity period is a matter of policy.  Subsystems can set this length
+initially by calling pm_runtime_set_autosuspend_delay(), but after device
+registration the length should be controlled by user space, using the
+/sys/devices/.../power/autosuspend_delay_ms attribute.
+
+In order to use autosuspend, subsystems or drivers must call
+pm_runtime_use_autosuspend() (preferably before registering the device), and
+thereafter they should use the various *_autosuspend() helper functions instead
+of the non-autosuspend counterparts:
+
+	Instead of: pm_runtime_suspend    use: pm_runtime_autosuspend;
+	Instead of: pm_schedule_suspend   use: pm_request_autosuspend;
+	Instead of: pm_runtime_put        use: pm_runtime_put_autosuspend;
+	Instead of: pm_runtime_put_sync   use: pm_runtime_put_sync_autosuspend.
+
+Drivers may also continue to use the non-autosuspend helper functions; they
+will behave normally, not taking the autosuspend delay into account.
+Similarly, if the power.use_autosuspend field isn't set then the autosuspend
+helper functions will behave just like the non-autosuspend counterparts.
+
+The implementation is well suited for asynchronous use in interrupt contexts.
+However such use inevitably involves races, because the PM core can't
+synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
+This synchronization must be handled by the driver, using its private lock.
+Here is a schematic pseudo-code example:
+
+	foo_read_or_write(struct foo_priv *foo, void *data)
+	{
+		lock(&foo->private_lock);
+		add_request_to_io_queue(foo, data);
+		if (foo->num_pending_requests++ == 0)
+			pm_runtime_get(&foo->dev);
+		if (!foo->is_suspended)
+			foo_process_next_request(foo);
+		unlock(&foo->private_lock);
+	}
+
+	foo_io_completion(struct foo_priv *foo, void *req)
+	{
+		lock(&foo->private_lock);
+		if (--foo->num_pending_requests == 0) {
+			pm_runtime_mark_last_busy(&foo->dev);
+			pm_runtime_put_autosuspend(&foo->dev);
+		} else {
+			foo_process_next_request(foo);
+		}
+		unlock(&foo->private_lock);
+		/* Send req result back to the user ... */
+	}
+
+	int foo_runtime_suspend(struct device *dev)
+	{
+		struct foo_priv foo = container_of(dev, ...);
+		int ret = 0;
+
+		lock(&foo->private_lock);
+		if (foo->num_pending_requests > 0) {
+			ret = -EBUSY;
+		} else {
+			/* ... suspend the device ... */
+			foo->is_suspended = 1;
+		}
+		unlock(&foo->private_lock);
+		return ret;
+	}
+
+	int foo_runtime_resume(struct device *dev)
+	{
+		struct foo_priv foo = container_of(dev, ...);
+
+		lock(&foo->private_lock);
+		/* ... resume the device ... */
+		foo->is_suspended = 0;
+		pm_runtime_mark_last_busy(&foo->dev);
+		if (foo->num_pending_requests > 0)
+			foo_process_requests(foo);
+		unlock(&foo->private_lock);
+		return 0;
+	}
+
+The important point is that after foo_io_completion() asks for an autosuspend,
+the foo_runtime_suspend() callback may race with foo_read_or_write().
+Therefore foo_runtime_suspend() has to check whether there are any pending I/O
+requests (while holding the private lock) before allowing the suspend to
+proceed.
+
+In addition, the power.autosuspend_delay field can be changed by user space at
+any time.  If a driver cares about this, it can call
+pm_runtime_autosuspend_expiration() from within the ->runtime_suspend()
+callback while holding its private lock.  If the function returns a nonzero
+value then the delay has not yet expired and the callback should return
+-EAGAIN.
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 5bd4daa93ef1..cd4e100a1362 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -9,7 +9,6 @@
 
 #include <linux/sched.h>
 #include <linux/pm_runtime.h>
-#include <linux/jiffies.h>
 #include "power.h"
 
 static int rpm_resume(struct device *dev, int rpmflags);
@@ -79,6 +78,53 @@ static void pm_runtime_cancel_pending(struct device *dev)
 	dev->power.request = RPM_REQ_NONE;
 }
 
+/*
+ * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time.
+ * @dev: Device to handle.
+ *
+ * Compute the autosuspend-delay expiration time based on the device's
+ * power.last_busy time.  If the delay has already expired or is disabled
+ * (negative) or the power.use_autosuspend flag isn't set, return 0.
+ * Otherwise return the expiration time in jiffies (adjusted to be nonzero).
+ *
+ * This function may be called either with or without dev->power.lock held.
+ * Either way it can be racy, since power.last_busy may be updated at any time.
+ */
+unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
+{
+	int autosuspend_delay;
+	long elapsed;
+	unsigned long last_busy;
+	unsigned long expires = 0;
+
+	if (!dev->power.use_autosuspend)
+		goto out;
+
+	autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay);
+	if (autosuspend_delay < 0)
+		goto out;
+
+	last_busy = ACCESS_ONCE(dev->power.last_busy);
+	elapsed = jiffies - last_busy;
+	if (elapsed < 0)
+		goto out;	/* jiffies has wrapped around. */
+
+	/*
+	 * If the autosuspend_delay is >= 1 second, align the timer by rounding
+	 * up to the nearest second.
+	 */
+	expires = last_busy + msecs_to_jiffies(autosuspend_delay);
+	if (autosuspend_delay >= 1000)
+		expires = round_jiffies(expires);
+	expires += !expires;
+	if (elapsed >= expires - last_busy)
+		expires = 0;	/* Already expired. */
+
+ out:
+	return expires;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
+
 /**
  * rpm_check_suspend_allowed - Test whether a device may be suspended.
  * @dev: Device to test.
@@ -234,6 +280,32 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	if (retval)
 		goto out;
 
+	/* If the autosuspend_delay time hasn't expired yet, reschedule. */
+	if ((rpmflags & RPM_AUTO)
+	    && dev->power.runtime_status != RPM_SUSPENDING) {
+		unsigned long expires = pm_runtime_autosuspend_expiration(dev);
+
+		if (expires != 0) {
+			/* Pending requests need to be canceled. */
+			dev->power.request = RPM_REQ_NONE;
+
+			/*
+			 * Optimization: If the timer is already running and is
+			 * set to expire at or before the autosuspend delay,
+			 * avoid the overhead of resetting it.  Just let it
+			 * expire; pm_suspend_timer_fn() will take care of the
+			 * rest.
+			 */
+			if (!(dev->power.timer_expires && time_before_eq(
+			    dev->power.timer_expires, expires))) {
+				dev->power.timer_expires = expires;
+				mod_timer(&dev->power.suspend_timer, expires);
+			}
+			dev->power.timer_autosuspends = 1;
+			goto out;
+		}
+	}
+
 	/* Other scheduled or pending requests need to be canceled. */
 	pm_runtime_cancel_pending(dev);
 
@@ -268,7 +340,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
 	/* Carry out an asynchronous or a synchronous suspend. */
 	if (rpmflags & RPM_ASYNC) {
-		dev->power.request = RPM_REQ_SUSPEND;
+		dev->power.request = (rpmflags & RPM_AUTO) ?
+		    RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND;
 		if (!dev->power.request_pending) {
 			dev->power.request_pending = true;
 			queue_work(pm_wq, &dev->power.work);
@@ -383,8 +456,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	if (retval)
 		goto out;
 
-	/* Other scheduled or pending requests need to be canceled. */
-	pm_runtime_cancel_pending(dev);
+	/*
+	 * Other scheduled or pending requests need to be canceled.  Small
+	 * optimization: If an autosuspend timer is running, leave it running
+	 * rather than cancelling it now only to restart it again in the near
+	 * future.
+	 */
+	dev->power.request = RPM_REQ_NONE;
+	if (!dev->power.timer_autosuspends)
+		pm_runtime_deactivate_timer(dev);
 
 	if (dev->power.runtime_status == RPM_ACTIVE) {
 		retval = 1;
@@ -568,6 +648,9 @@ static void pm_runtime_work(struct work_struct *work)
 	case RPM_REQ_SUSPEND:
 		rpm_suspend(dev, RPM_NOWAIT);
 		break;
+	case RPM_REQ_AUTOSUSPEND:
+		rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
+		break;
 	case RPM_REQ_RESUME:
 		rpm_resume(dev, RPM_NOWAIT);
 		break;
@@ -595,7 +678,8 @@ static void pm_suspend_timer_fn(unsigned long data)
 	/* If 'expire' is after 'jiffies' we've been called too early. */
 	if (expires > 0 && !time_after(expires, jiffies)) {
 		dev->power.timer_expires = 0;
-		rpm_suspend(dev, RPM_ASYNC);
+		rpm_suspend(dev, dev->power.timer_autosuspends ?
+		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
 	}
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -627,6 +711,7 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 
 	dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
 	dev->power.timer_expires += !dev->power.timer_expires;
+	dev->power.timer_autosuspends = 0;
 	mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
 
  out:
@@ -670,7 +755,9 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
  * @dev: Device to suspend.
  * @rpmflags: Flag bits.
  *
- * Carry out a suspend, either synchronous or asynchronous.
+ * If the RPM_GET_PUT flag is set, decrement the device's usage count and
+ * return immediately if it is larger than zero.  Then carry out a suspend,
+ * either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
@@ -679,6 +766,11 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	unsigned long flags;
 	int retval;
 
+	if (rpmflags & RPM_GET_PUT) {
+		if (!atomic_dec_and_test(&dev->power.usage_count))
+			return 0;
+	}
+
 	spin_lock_irqsave(&dev->power.lock, flags);
 	retval = rpm_suspend(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -980,7 +1072,7 @@ void pm_runtime_allow(struct device *dev)
 
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		rpm_idle(dev, 0);
+		rpm_idle(dev, RPM_AUTO);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1006,6 +1098,86 @@ void pm_runtime_no_callbacks(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
 
+/**
+ * update_autosuspend - Handle a change to a device's autosuspend settings.
+ * @dev: Device to handle.
+ * @old_delay: The former autosuspend_delay value.
+ * @old_use: The former use_autosuspend value.
+ *
+ * Prevent runtime suspend if the new delay is negative and use_autosuspend is
+ * set; otherwise allow it.  Send an idle notification if suspends are allowed.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static void update_autosuspend(struct device *dev, int old_delay, int old_use)
+{
+	int delay = dev->power.autosuspend_delay;
+
+	/* Should runtime suspend be prevented now? */
+	if (dev->power.use_autosuspend && delay < 0) {
+
+		/* If it used to be allowed then prevent it. */
+		if (!old_use || old_delay >= 0) {
+			atomic_inc(&dev->power.usage_count);
+			rpm_resume(dev, 0);
+		}
+	}
+
+	/* Runtime suspend should be allowed now. */
+	else {
+
+		/* If it used to be prevented then allow it. */
+		if (old_use && old_delay < 0)
+			atomic_dec(&dev->power.usage_count);
+
+		/* Maybe we can autosuspend now. */
+		rpm_idle(dev, RPM_AUTO);
+	}
+}
+
+/**
+ * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value.
+ * @dev: Device to handle.
+ * @delay: Value of the new delay in milliseconds.
+ *
+ * Set the device's power.autosuspend_delay value.  If it changes to negative
+ * and the power.use_autosuspend flag is set, prevent run-time suspends.  If it
+ * changes the other way, allow run-time suspends.
+ */
+void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
+{
+	int old_delay, old_use;
+
+	spin_lock_irq(&dev->power.lock);
+	old_delay = dev->power.autosuspend_delay;
+	old_use = dev->power.use_autosuspend;
+	dev->power.autosuspend_delay = delay;
+	update_autosuspend(dev, old_delay, old_use);
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);
+
+/**
+ * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag.
+ * @dev: Device to handle.
+ * @use: New value for use_autosuspend.
+ *
+ * Set the device's power.use_autosuspend flag, and allow or prevent run-time
+ * suspends as needed.
+ */
+void __pm_runtime_use_autosuspend(struct device *dev, bool use)
+{
+	int old_delay, old_use;
+
+	spin_lock_irq(&dev->power.lock);
+	old_delay = dev->power.autosuspend_delay;
+	old_use = dev->power.use_autosuspend;
+	dev->power.use_autosuspend = use;
+	update_autosuspend(dev, old_delay, old_use);
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
+
 /**
  * pm_runtime_init - Initialize run-time PM fields in given device object.
  * @dev: Device object to initialize.
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index b5708c47ce2d..0b1e46bf3e56 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -75,6 +75,18 @@
  *	attribute is set to "enabled" by bus type code or device drivers and in
  *	that cases it should be safe to leave the default value.
  *
+ *	autosuspend_delay_ms - Report/change a device's autosuspend_delay value
+ *
+ *	Some drivers don't want to carry out a runtime suspend as soon as a
+ *	device becomes idle; they want it always to remain idle for some period
+ *	of time before suspending it.  This period is the autosuspend_delay
+ *	value (expressed in milliseconds) and it can be controlled by the user.
+ *	If the value is negative then the device will never be runtime
+ *	suspended.
+ *
+ *	NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay
+ *	value are used only if the driver calls pm_runtime_use_autosuspend().
+ *
  *	wakeup_count - Report the number of wakeup events related to the device
  */
 
@@ -173,6 +185,33 @@ static ssize_t rtpm_status_show(struct device *dev,
 }
 
 static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL);
+
+static ssize_t autosuspend_delay_ms_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	if (!dev->power.use_autosuspend)
+		return -EIO;
+	return sprintf(buf, "%d\n", dev->power.autosuspend_delay);
+}
+
+static ssize_t autosuspend_delay_ms_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t n)
+{
+	long delay;
+
+	if (!dev->power.use_autosuspend)
+		return -EIO;
+
+	if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay)
+		return -EINVAL;
+
+	pm_runtime_set_autosuspend_delay(dev, delay);
+	return n;
+}
+
+static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
+		autosuspend_delay_ms_store);
+
 #endif
 
 static ssize_t
@@ -428,6 +467,7 @@ static struct attribute *runtime_attrs[] = {
 	&dev_attr_control.attr,
 	&dev_attr_runtime_suspended_time.attr,
 	&dev_attr_runtime_active_time.attr,
+	&dev_attr_autosuspend_delay_ms.attr,
 	NULL,
 };
 static struct attribute_group pm_runtime_attr_group = {
diff --git a/include/linux/pm.h b/include/linux/pm.h
index abd81ffaba3c..40f3f45702ba 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -444,6 +444,9 @@ enum rpm_status {
  *
  * RPM_REQ_SUSPEND	Run the device bus type's ->runtime_suspend() callback
  *
+ * RPM_REQ_AUTOSUSPEND	Same as RPM_REQ_SUSPEND, but not until the device has
+ *			been inactive for as long as power.autosuspend_delay
+ *
  * RPM_REQ_RESUME	Run the device bus type's ->runtime_resume() callback
  */
 
@@ -451,6 +454,7 @@ enum rpm_request {
 	RPM_REQ_NONE = 0,
 	RPM_REQ_IDLE,
 	RPM_REQ_SUSPEND,
+	RPM_REQ_AUTOSUSPEND,
 	RPM_REQ_RESUME,
 };
 
@@ -482,9 +486,13 @@ struct dev_pm_info {
 	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
 	unsigned int		no_callbacks:1;
+	unsigned int		use_autosuspend:1;
+	unsigned int		timer_autosuspends:1;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
+	int			autosuspend_delay;
+	unsigned long		last_busy;
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 8ca52f7c357e..99ed1aa8f933 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -12,12 +12,15 @@
 #include <linux/device.h>
 #include <linux/pm.h>
 
+#include <linux/jiffies.h>
+
 /* Runtime PM flag argument bits */
 #define RPM_ASYNC		0x01	/* Request is asynchronous */
 #define RPM_NOWAIT		0x02	/* Don't wait for concurrent
 					    state change */
 #define RPM_GET_PUT		0x04	/* Increment/decrement the
 					    usage_count */
+#define RPM_AUTO		0x08	/* Use autosuspend_delay */
 
 #ifdef CONFIG_PM_RUNTIME
 
@@ -37,6 +40,9 @@ extern int pm_generic_runtime_idle(struct device *dev);
 extern int pm_generic_runtime_suspend(struct device *dev);
 extern int pm_generic_runtime_resume(struct device *dev);
 extern void pm_runtime_no_callbacks(struct device *dev);
+extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
+extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
+extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -74,6 +80,11 @@ static inline bool pm_runtime_suspended(struct device *dev)
 	return dev->power.runtime_status == RPM_SUSPENDED;
 }
 
+static inline void pm_runtime_mark_last_busy(struct device *dev)
+{
+	ACCESS_ONCE(dev->power.last_busy) = jiffies;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
@@ -113,6 +124,14 @@ static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
 
+static inline void pm_runtime_mark_last_busy(struct device *dev) {}
+static inline void __pm_runtime_use_autosuspend(struct device *dev,
+						bool use) {}
+static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
+						int delay) {}
+static inline unsigned long pm_runtime_autosuspend_expiration(
+				struct device *dev) { return 0; }
+
 #endif /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev)
@@ -125,6 +144,11 @@ static inline int pm_runtime_suspend(struct device *dev)
 	return __pm_runtime_suspend(dev, 0);
 }
 
+static inline int pm_runtime_autosuspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev, RPM_AUTO);
+}
+
 static inline int pm_runtime_resume(struct device *dev)
 {
 	return __pm_runtime_resume(dev, 0);
@@ -155,11 +179,22 @@ static inline int pm_runtime_put(struct device *dev)
 	return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
 }
 
+static inline int pm_runtime_put_autosuspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev,
+	    RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
+}
+
 static inline int pm_runtime_put_sync(struct device *dev)
 {
 	return __pm_runtime_idle(dev, RPM_GET_PUT);
 }
 
+static inline int pm_runtime_put_sync_autosuspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO);
+}
+
 static inline int pm_runtime_set_active(struct device *dev)
 {
 	return __pm_runtime_set_status(dev, RPM_ACTIVE);
@@ -175,4 +210,14 @@ static inline void pm_runtime_disable(struct device *dev)
 	__pm_runtime_disable(dev, true);
 }
 
+static inline void pm_runtime_use_autosuspend(struct device *dev)
+{
+	__pm_runtime_use_autosuspend(dev, true);
+}
+
+static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
+{
+	__pm_runtime_use_autosuspend(dev, false);
+}
+
 #endif
-- 
cgit v1.2.3


From 5fc62aad4e7779c2f04691e48b351d08c050c1f1 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 29 Sep 2010 00:12:22 +0200
Subject: PM: runtime: add missed pm_request_autosuspend

The patch "PM / Runtime: Implement autosuspend support" introduces
"autosuspend" facility for runtime PM, but misses helper function
of pm_request_autosuspend, so add it.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm_runtime.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 99ed1aa8f933..3ec2358f8692 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -164,6 +164,11 @@ static inline int pm_request_resume(struct device *dev)
 	return __pm_runtime_resume(dev, RPM_ASYNC);
 }
 
+static inline int pm_request_autosuspend(struct device *dev)
+{
+	return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO);
+}
+
 static inline int pm_runtime_get(struct device *dev)
 {
 	return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC);
-- 
cgit v1.2.3


From dbeeec5fe868f2e2e92fe94daa2c5a047240fdc4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 4 Oct 2010 22:07:32 +0200
Subject: PM: Allow wakeup events to abort freezing of tasks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If there is a wakeup event during the freezing of tasks, suspend or
hibernation will fail anyway.  Since try_to_freeze_tasks() can take
up to 20 seconds to complete or fail, aborting it as soon as a wakeup
event is detected improves the worst case wakeup latency.

Based on a patch from Arve Hjønnevåg.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 include/linux/suspend.h |  2 ++
 kernel/power/process.c  | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 6b1712c51102..26697514c5ec 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 }
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
+
+static inline bool pm_check_wakeup_events(void) { return true; }
 #endif /* !CONFIG_PM_SLEEP */
 
 extern struct mutex pm_mutex;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 028a99598f49..e50b4c1b2a0f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -40,6 +40,7 @@ static int try_to_freeze_tasks(bool sig_only)
 	struct timeval start, end;
 	u64 elapsed_csecs64;
 	unsigned int elapsed_csecs;
+	bool wakeup = false;
 
 	do_gettimeofday(&start);
 
@@ -78,6 +79,11 @@ static int try_to_freeze_tasks(bool sig_only)
 		if (!todo || time_after(jiffies, end_time))
 			break;
 
+		if (!pm_check_wakeup_events()) {
+			wakeup = true;
+			break;
+		}
+
 		/*
 		 * We need to retry, but first give the freezing tasks some
 		 * time to enter the regrigerator.
@@ -97,8 +103,9 @@ static int try_to_freeze_tasks(bool sig_only)
 		 * but it cleans up leftover PF_FREEZE requests.
 		 */
 		printk("\n");
-		printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
+		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
 		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
+		       wakeup ? "aborted" : "failed",
 		       elapsed_csecs / 100, elapsed_csecs % 100,
 		       todo - wq_busy, wq_busy);
 
@@ -107,7 +114,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			task_lock(p);
-			if (freezing(p) && !freezer_should_skip(p))
+			if (!wakeup && freezing(p) && !freezer_should_skip(p))
 				sched_show_task(p);
 			cancel_freezing(p);
 			task_unlock(p);
-- 
cgit v1.2.3


From d33ac60beaf2c7dee5cd90aba7c1eb385dd70937 Mon Sep 17 00:00:00 2001
From: James Hogan <james@albanarts.com>
Date: Tue, 12 Oct 2010 00:00:25 +0200
Subject: PM: Add sysfs attr for rechecking dev hash from PM trace

If the device which fails to resume is part of a loadable kernel module
it won't be checked at startup against the magic number stored in the
RTC.

Add a read-only sysfs attribute /sys/power/pm_trace_dev_match which
contains a list of newline separated devices (usually just the one)
which currently match the last magic number. This allows the device
which is failing to resume to be found after the modules are loaded
again.

Signed-off-by: James Hogan <james@albanarts.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/ABI/testing/sysfs-power | 29 +++++++++++++++++++++++++++++
 Documentation/power/s2ram.txt         |  7 +++++++
 drivers/base/power/trace.c            | 31 +++++++++++++++++++++++++++++++
 include/linux/resume-trace.h          |  2 ++
 kernel/power/main.c                   | 18 ++++++++++++++++++
 5 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 2875f1f74a07..194ca446ac28 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -99,9 +99,38 @@ Description:
 
 		dmesg -s 1000000 | grep 'hash matches'
 
+		If you do not get any matches (or they appear to be false
+		positives), it is possible that the last PM event point
+		referred to a device created by a loadable kernel module.  In
+		this case cat /sys/power/pm_trace_dev_match (see below) after
+		your system is started up and the kernel modules are loaded.
+
 		CAUTION: Using it will cause your machine's real-time (CMOS)
 		clock to be set to a random invalid time after a resume.
 
+What;		/sys/power/pm_trace_dev_match
+Date:		October 2010
+Contact:	James Hogan <james@albanarts.com>
+Description:
+		The /sys/power/pm_trace_dev_match file contains the name of the
+		device associated with the last PM event point saved in the RTC
+		across reboots when pm_trace has been used.  More precisely it
+		contains the list of current devices (including those
+		registered by loadable kernel modules since boot) which match
+		the device hash in the RTC at boot, with a newline after each
+		one.
+
+		The advantage of this file over the hash matches printed to the
+		kernel log (see /sys/power/pm_trace), is that it includes
+		devices created after boot by loadable kernel modules.
+
+		Due to the small hash size necessary to fit in the RTC, it is
+		possible that more than one device matches the hash, in which
+		case further investigation is required to determine which
+		device is causing the problem.  Note that genuine RTC clock
+		values (such as when pm_trace has not been used), can still
+		match a device and output it's name here.
+
 What:		/sys/power/pm_async
 Date:		January 2009
 Contact:	Rafael J. Wysocki <rjw@sisk.pl>
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
index 514b94fc931e..1bdfa0443773 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.txt
@@ -49,6 +49,13 @@ machine that doesn't boot) is:
    device (lspci and /sys/devices/pci* is your friend), and see if you can
    fix it, disable it, or trace into its resume function.
 
+   If no device matches the hash (or any matches appear to be false positives),
+   the culprit may be a device from a loadable kernel module that is not loaded
+   until after the hash is checked. You can check the hash against the current
+   devices again after more modules are loaded using sysfs:
+
+	cat /sys/power/pm_trace_dev_match
+
 For example, the above happens to be the VGA device on my EVO, which I
 used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out
 that "radeonfb" simply cannot resume that device - it tries to set the
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 17e24e3f4422..9f4258df4cfd 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -207,6 +207,37 @@ static int show_dev_hash(unsigned int value)
 
 static unsigned int hash_value_early_read;
 
+int show_trace_dev_match(char *buf, size_t size)
+{
+	unsigned int value = hash_value_early_read / (USERHASH * FILEHASH);
+	int ret = 0;
+	struct list_head *entry;
+
+	/*
+	 * It's possible that multiple devices will match the hash and we can't
+	 * tell which is the culprit, so it's best to output them all.
+	 */
+	device_pm_lock();
+	entry = dpm_list.prev;
+	while (size && entry != &dpm_list) {
+		struct device *dev = to_device(entry);
+		unsigned int hash = hash_string(DEVSEED, dev_name(dev),
+						DEVHASH);
+		if (hash == value) {
+			int len = snprintf(buf, size, "%s\n",
+					    dev_driver_string(dev));
+			if (len > size)
+				len = size;
+			buf += len;
+			ret += len;
+			size -= len;
+		}
+		entry = entry->prev;
+	}
+	device_pm_unlock();
+	return ret;
+}
+
 static int early_resume_init(void)
 {
 	hash_value_early_read = read_magic_time();
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index bc8c3881c729..f31db2368782 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_PM_TRACE
 #include <asm/resume-trace.h>
+#include <linux/types.h>
 
 extern int pm_trace_enabled;
 
@@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void)
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(const void *tracedata, unsigned int user);
+extern int show_trace_dev_match(char *buf, size_t size);
 
 #define TRACE_DEVICE(dev) do { \
 	if (pm_trace_enabled) \
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6b12a0cf4d9f..7b5db6a8561e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -281,12 +281,30 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
 }
 
 power_attr(pm_trace);
+
+static ssize_t pm_trace_dev_match_show(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       char *buf)
+{
+	return show_trace_dev_match(buf, PAGE_SIZE);
+}
+
+static ssize_t
+pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t n)
+{
+	return -EINVAL;
+}
+
+power_attr(pm_trace_dev_match);
+
 #endif /* CONFIG_PM_TRACE */
 
 static struct attribute * g[] = {
 	&state_attr.attr,
 #ifdef CONFIG_PM_TRACE
 	&pm_trace_attr.attr,
+	&pm_trace_dev_match_attr.attr,
 #endif
 #ifdef CONFIG_PM_SLEEP
 	&pm_async_attr.attr,
-- 
cgit v1.2.3


From e1f60b292ffd61151403327aa19ff7a1871820bd Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Wed, 13 Oct 2010 00:13:10 +0200
Subject: PM: Introduce library for device-specific OPPs (v7)

SoCs have a standard set of tuples consisting of frequency and
voltage pairs that the device will support per voltage domain. These
are called Operating Performance Points or OPPs. The actual
definitions of OPP varies over silicon versions. For a specific domain,
we can have a set of {frequency, voltage} pairs. As the kernel boots
and more information is available, a default set of these are activated
based on the precise nature of device. Further on operation, based on
conditions prevailing in the system (such as temperature), some OPP
availability may be temporarily controlled by the SoC frameworks.

To implement an OPP, some sort of power management support is necessary
hence this library depends on CONFIG_PM.

Contributions include:
Sanjeev Premi for the initial concept:
	http://patchwork.kernel.org/patch/50998/
Kevin Hilman for converting original design to device-based.
Kevin Hilman and Paul Walmsey for cleaning up many of the function
abstractions, improvements and data structure handling.
Romit Dasgupta for using enums instead of opp pointers.
Thara Gopinath, Eduardo Valentin and Vishwanath BS for fixes and
cleanups.
Linus Walleij for recommending this layer be made generic for usage
in other architectures beyond OMAP and ARM.
Mark Brown, Andrew Morton, Rafael J. Wysocki, Paul E. McKenney for
valuable improvements.

Discussions and comments from:
http://marc.info/?l=linux-omap&m=126033945313269&w=2
http://marc.info/?l=linux-omap&m=125482970102327&w=2
http://marc.info/?t=125809247500002&r=1&w=2
http://marc.info/?l=linux-omap&m=126025973426007&w=2
http://marc.info/?t=128152609200064&r=1&w=2
http://marc.info/?t=128468723000002&r=1&w=2
incorporated.

v1: http://marc.info/?t=128468723000002&r=1&w=2

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/00-INDEX |   2 +
 Documentation/power/opp.txt  | 375 ++++++++++++++++++++++++++
 drivers/base/power/Makefile  |   1 +
 drivers/base/power/opp.c     | 628 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/opp.h          | 105 ++++++++
 kernel/power/Kconfig         |  14 +
 6 files changed, 1125 insertions(+)
 create mode 100644 Documentation/power/opp.txt
 create mode 100644 drivers/base/power/opp.c
 create mode 100644 include/linux/opp.h

(limited to 'include')

diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
index fb742c213c9e..45e9d4a91284 100644
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@@ -14,6 +14,8 @@ interface.txt
 	- Power management user interface in /sys/power
 notifiers.txt
 	- Registering suspend notifiers in device drivers
+opp.txt
+	- Operating Performance Point library
 pci.txt
 	- How the PCI Subsystem Does Power Management
 pm_qos_interface.txt
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
new file mode 100644
index 000000000000..44d87ad3cea9
--- /dev/null
+++ b/Documentation/power/opp.txt
@@ -0,0 +1,375 @@
+*=============*
+* OPP Library *
+*=============*
+
+(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
+
+Contents
+--------
+1. Introduction
+2. Initial OPP List Registration
+3. OPP Search Functions
+4. OPP Availability Control Functions
+5. OPP Data Retrieval Functions
+6. Cpufreq Table Generation
+7. Data Structures
+
+1. Introduction
+===============
+Complex SoCs of today consists of a multiple sub-modules working in conjunction.
+In an operational system executing varied use cases, not all modules in the SoC
+need to function at their highest performing frequency all the time. To
+facilitate this, sub-modules in a SoC are grouped into domains, allowing some
+domains to run at lower voltage and frequency while other domains are loaded
+more. The set of discrete tuples consisting of frequency and voltage pairs that
+the device will support per domain are called Operating Performance Points or
+OPPs.
+
+OPP library provides a set of helper functions to organize and query the OPP
+information. The library is located in drivers/base/power/opp.c and the header
+is located in include/linux/opp.h. OPP library can be enabled by enabling
+CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
+CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
+optionally boot at a certain OPP without needing cpufreq.
+
+Typical usage of the OPP library is as follows:
+(users)		-> registers a set of default OPPs		-> (library)
+SoC framework	-> modifies on required cases certain OPPs	-> OPP layer
+		-> queries to search/retrieve information	->
+
+OPP layer expects each domain to be represented by a unique device pointer. SoC
+framework registers a set of initial OPPs per device with the OPP layer. This
+list is expected to be an optimally small number typically around 5 per device.
+This initial list contains a set of OPPs that the framework expects to be safely
+enabled by default in the system.
+
+Note on OPP Availability:
+------------------------
+As the system proceeds to operate, SoC framework may choose to make certain
+OPPs available or not available on each device based on various external
+factors. Example usage: Thermal management or other exceptional situations where
+SoC framework might choose to disable a higher frequency OPP to safely continue
+operations until that OPP could be re-enabled if possible.
+
+OPP library facilitates this concept in it's implementation. The following
+operational functions operate only on available opps:
+opp_find_freq_{ceil, floor}, opp_get_voltage, opp_get_freq, opp_get_opp_count
+and opp_init_cpufreq_table
+
+opp_find_freq_exact is meant to be used to find the opp pointer which can then
+be used for opp_enable/disable functions to make an opp available as required.
+
+WARNING: Users of OPP library should refresh their availability count using
+get_opp_count if opp_enable/disable functions are invoked for a device, the
+exact mechanism to trigger these or the notification mechanism to other
+dependent subsystems such as cpufreq are left to the discretion of the SoC
+specific framework which uses the OPP library. Similar care needs to be taken
+care to refresh the cpufreq table in cases of these operations.
+
+WARNING on OPP List locking mechanism:
+-------------------------------------------------
+OPP library uses RCU for exclusivity. RCU allows the query functions to operate
+in multiple contexts and this synchronization mechanism is optimal for a read
+intensive operations on data structure as the OPP library caters to.
+
+To ensure that the data retrieved are sane, the users such as SoC framework
+should ensure that the section of code operating on OPP queries are locked
+using RCU read locks. The opp_find_freq_{exact,ceil,floor},
+opp_get_{voltage, freq, opp_count} fall into this category.
+
+opp_{add,enable,disable} are updaters which use mutex and implement it's own
+RCU locking mechanisms. opp_init_cpufreq_table acts as an updater and uses
+mutex to implment RCU updater strategy. These functions should *NOT* be called
+under RCU locks and other contexts that prevent blocking functions in RCU or
+mutex operations from working.
+
+2. Initial OPP List Registration
+================================
+The SoC implementation calls opp_add function iteratively to add OPPs per
+device. It is expected that the SoC framework will register the OPP entries
+optimally- typical numbers range to be less than 5. The list generated by
+registering the OPPs is maintained by OPP library throughout the device
+operation. The SoC framework can subsequently control the availability of the
+OPPs dynamically using the opp_enable / disable functions.
+
+opp_add - Add a new OPP for a specific domain represented by the device pointer.
+	The OPP is defined using the frequency and voltage. Once added, the OPP
+	is assumed to be available and control of it's availability can be done
+	with the opp_enable/disable functions. OPP library internally stores
+	and manages this information in the opp struct. This function may be
+	used by SoC framework to define a optimal list as per the demands of
+	SoC usage environment.
+
+	WARNING: Do not use this function in interrupt context.
+
+	Example:
+	 soc_pm_init()
+	 {
+		/* Do things */
+		r = opp_add(mpu_dev, 1000000, 900000);
+		if (!r) {
+			pr_err("%s: unable to register mpu opp(%d)\n", r);
+			goto no_cpufreq;
+		}
+		/* Do cpufreq things */
+	 no_cpufreq:
+		/* Do remaining things */
+	 }
+
+3. OPP Search Functions
+=======================
+High level framework such as cpufreq operates on frequencies. To map the
+frequency back to the corresponding OPP, OPP library provides handy functions
+to search the OPP list that OPP library internally manages. These search
+functions return the matching pointer representing the opp if a match is
+found, else returns error. These errors are expected to be handled by standard
+error checks such as IS_ERR() and appropriate actions taken by the caller.
+
+opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
+	availability. This function is especially useful to enable an OPP which
+	is not available by default.
+	Example: In a case when SoC framework detects a situation where a
+	higher frequency could be made available, it can use this function to
+	find the OPP prior to call the opp_enable to actually make it available.
+	 rcu_read_lock();
+	 opp = opp_find_freq_exact(dev, 1000000000, false);
+	 rcu_read_unlock();
+	 /* dont operate on the pointer.. just do a sanity check.. */
+	 if (IS_ERR(opp)) {
+		pr_err("frequency not disabled!\n");
+		/* trigger appropriate actions.. */
+	 } else {
+		opp_enable(dev,1000000000);
+	 }
+
+	NOTE: This is the only search function that operates on OPPs which are
+	not available.
+
+opp_find_freq_floor - Search for an available OPP which is *at most* the
+	provided frequency. This function is useful while searching for a lesser
+	match OR operating on OPP information in the order of decreasing
+	frequency.
+	Example: To find the highest opp for a device:
+	 freq = ULONG_MAX;
+	 rcu_read_lock();
+	 opp_find_freq_floor(dev, &freq);
+	 rcu_read_unlock();
+
+opp_find_freq_ceil - Search for an available OPP which is *at least* the
+	provided frequency. This function is useful while searching for a
+	higher match OR operating on OPP information in the order of increasing
+	frequency.
+	Example 1: To find the lowest opp for a device:
+	 freq = 0;
+	 rcu_read_lock();
+	 opp_find_freq_ceil(dev, &freq);
+	 rcu_read_unlock();
+	Example 2: A simplified implementation of a SoC cpufreq_driver->target:
+	 soc_cpufreq_target(..)
+	 {
+		/* Do stuff like policy checks etc. */
+		/* Find the best frequency match for the req */
+		rcu_read_lock();
+		opp = opp_find_freq_ceil(dev, &freq);
+		rcu_read_unlock();
+		if (!IS_ERR(opp))
+			soc_switch_to_freq_voltage(freq);
+		else
+			/* do something when we cant satisfy the req */
+		/* do other stuff */
+	 }
+
+4. OPP Availability Control Functions
+=====================================
+A default OPP list registered with the OPP library may not cater to all possible
+situation. The OPP library provides a set of functions to modify the
+availability of a OPP within the OPP list. This allows SoC frameworks to have
+fine grained dynamic control of which sets of OPPs are operationally available.
+These functions are intended to *temporarily* remove an OPP in conditions such
+as thermal considerations (e.g. don't use OPPx until the temperature drops).
+
+WARNING: Do not use these functions in interrupt context.
+
+opp_enable - Make a OPP available for operation.
+	Example: Lets say that 1GHz OPP is to be made available only if the
+	SoC temperature is lower than a certain threshold. The SoC framework
+	implementation might choose to do something as follows:
+	 if (cur_temp < temp_low_thresh) {
+		/* Enable 1GHz if it was disabled */
+		rcu_read_lock();
+		opp = opp_find_freq_exact(dev, 1000000000, false);
+		rcu_read_unlock();
+		/* just error check */
+		if (!IS_ERR(opp))
+			ret = opp_enable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+opp_disable - Make an OPP to be not available for operation
+	Example: Lets say that 1GHz OPP is to be disabled if the temperature
+	exceeds a threshold value. The SoC framework implementation might
+	choose to do something as follows:
+	 if (cur_temp > temp_high_thresh) {
+		/* Disable 1GHz if it was enabled */
+		rcu_read_lock();
+		opp = opp_find_freq_exact(dev, 1000000000, true);
+		rcu_read_unlock();
+		/* just error check */
+		if (!IS_ERR(opp))
+			ret = opp_disable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+5. OPP Data Retrieval Functions
+===============================
+Since OPP library abstracts away the OPP information, a set of functions to pull
+information from the OPP structure is necessary. Once an OPP pointer is
+retrieved using the search functions, the following functions can be used by SoC
+framework to retrieve the information represented inside the OPP layer.
+
+opp_get_voltage - Retrieve the voltage represented by the opp pointer.
+	Example: At a cpufreq transition to a different frequency, SoC
+	framework requires to set the voltage represented by the OPP using
+	the regulator framework to the Power Management chip providing the
+	voltage.
+	 soc_switch_to_freq_voltage(freq)
+	 {
+		/* do things */
+		rcu_read_lock();
+		opp = opp_find_freq_ceil(dev, &freq);
+		v = opp_get_voltage(opp);
+		rcu_read_unlock();
+		if (v)
+			regulator_set_voltage(.., v);
+		/* do other things */
+	 }
+
+opp_get_freq - Retrieve the freq represented by the opp pointer.
+	Example: Lets say the SoC framework uses a couple of helper functions
+	we could pass opp pointers instead of doing additional parameters to
+	handle quiet a bit of data parameters.
+	 soc_cpufreq_target(..)
+	 {
+		/* do things.. */
+		 max_freq = ULONG_MAX;
+		 rcu_read_lock();
+		 max_opp = opp_find_freq_floor(dev,&max_freq);
+		 requested_opp = opp_find_freq_ceil(dev,&freq);
+		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+			r = soc_test_validity(max_opp, requested_opp);
+		 rcu_read_unlock();
+		/* do other things */
+	 }
+	 soc_test_validity(..)
+	 {
+		 if(opp_get_voltage(max_opp) < opp_get_voltage(requested_opp))
+			 return -EINVAL;
+		 if(opp_get_freq(max_opp) < opp_get_freq(requested_opp))
+			 return -EINVAL;
+		/* do things.. */
+	 }
+
+opp_get_opp_count - Retrieve the number of available opps for a device
+	Example: Lets say a co-processor in the SoC needs to know the available
+	frequencies in a table, the main processor can notify as following:
+	 soc_notify_coproc_available_frequencies()
+	 {
+		/* Do things */
+		rcu_read_lock();
+		num_available = opp_get_opp_count(dev);
+		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+		/* populate the table in increasing order */
+		freq = 0;
+		while (!IS_ERR(opp = opp_find_freq_ceil(dev, &freq))) {
+			speeds[i] = freq;
+			freq++;
+			i++;
+		}
+		rcu_read_unlock();
+
+		soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+		/* Do other things */
+	 }
+
+6. Cpufreq Table Generation
+===========================
+opp_init_cpufreq_table - cpufreq framework typically is initialized with
+	cpufreq_frequency_table_cpuinfo which is provided with the list of
+	frequencies that are available for operation. This function provides
+	a ready to use conversion routine to translate the OPP layer's internal
+	information about the available frequencies into a format readily
+	providable to cpufreq.
+
+	WARNING: Do not use this function in interrupt context.
+
+	Example:
+	 soc_pm_init()
+	 {
+		/* Do things */
+		r = opp_init_cpufreq_table(dev, &freq_table);
+		if (!r)
+			cpufreq_frequency_table_cpuinfo(policy, freq_table);
+		/* Do other things */
+	 }
+
+	NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
+	addition to CONFIG_PM as power management feature is required to
+	dynamically scale voltage and frequency in a system.
+
+7. Data Structures
+==================
+Typically an SoC contains multiple voltage domains which are variable. Each
+domain is represented by a device pointer. The relationship to OPP can be
+represented as follows:
+SoC
+ |- device 1
+ |	|- opp 1 (availability, freq, voltage)
+ |	|- opp 2 ..
+ ...	...
+ |	`- opp n ..
+ |- device 2
+ ...
+ `- device m
+
+OPP library maintains a internal list that the SoC framework populates and
+accessed by various functions as described above. However, the structures
+representing the actual OPPs and domains are internal to the OPP library itself
+to allow for suitable abstraction reusable across systems.
+
+struct opp - The internal data structure of OPP library which is used to
+	represent an OPP. In addition to the freq, voltage, availability
+	information, it also contains internal book keeping information required
+	for the OPP library to operate on.  Pointer to this structure is
+	provided back to the users such as SoC framework to be used as a
+	identifier for OPP in the interactions with OPP layer.
+
+	WARNING: The struct opp pointer should not be parsed or modified by the
+	users. The defaults of for an instance is populated by opp_add, but the
+	availability of the OPP can be modified by opp_enable/disable functions.
+
+struct device - This is used to identify a domain to the OPP layer. The
+	nature of the device and it's implementation is left to the user of
+	OPP library such as the SoC framework.
+
+Overall, in a simplistic view, the data structure operations is represented as
+following:
+
+Initialization / modification:
+            +-----+        /- opp_enable
+opp_add --> | opp | <-------
+  |         +-----+        \- opp_disable
+  \-------> domain_info(device)
+
+Search functions:
+             /-- opp_find_freq_ceil  ---\   +-----+
+domain_info<---- opp_find_freq_exact -----> | opp |
+             \-- opp_find_freq_floor ---/   +-----+
+
+Retrieval functions:
++-----+     /- opp_get_voltage
+| opp | <---
++-----+     \- opp_get_freq
+
+domain_info <- opp_get_opp_count
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index cbccf9a3cee4..abe46edfe5b4 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
+obj-$(CONFIG_PM_OPP)	+= opp.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
new file mode 100644
index 000000000000..2bb9b4cf59d7
--- /dev/null
+++ b/drivers/base/power/opp.c
@@ -0,0 +1,628 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/opp.h>
+
+/*
+ * Internal data structure organization with the OPP layer library is as
+ * follows:
+ * dev_opp_list (root)
+ *	|- device 1 (represents voltage domain 1)
+ *	|	|- opp 1 (availability, freq, voltage)
+ *	|	|- opp 2 ..
+ *	...	...
+ *	|	`- opp n ..
+ *	|- device 2 (represents the next voltage domain)
+ *	...
+ *	`- device m (represents mth voltage domain)
+ * device 1, 2.. are represented by dev_opp structure while each opp
+ * is represented by the opp structure.
+ */
+
+/**
+ * struct opp - Generic OPP description structure
+ * @node:	opp list node. The nodes are maintained throughout the lifetime
+ *		of boot. It is expected only an optimal set of OPPs are
+ *		added to the library by the SoC framework.
+ *		RCU usage: opp list is traversed with RCU locks. node
+ *		modification is possible realtime, hence the modifications
+ *		are protected by the dev_opp_list_lock for integrity.
+ *		IMPORTANT: the opp nodes should be maintained in increasing
+ *		order.
+ * @available:	true/false - marks if this OPP as available or not
+ * @rate:	Frequency in hertz
+ * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
+ * @dev_opp:	points back to the device_opp struct this opp belongs to
+ *
+ * This structure stores the OPP information for a given device.
+ */
+struct opp {
+	struct list_head node;
+
+	bool available;
+	unsigned long rate;
+	unsigned long u_volt;
+
+	struct device_opp *dev_opp;
+};
+
+/**
+ * struct device_opp - Device opp structure
+ * @node:	list node - contains the devices with OPPs that
+ *		have been registered. Nodes once added are not modified in this
+ *		list.
+ *		RCU usage: nodes are not modified in the list of device_opp,
+ *		however addition is possible and is secured by dev_opp_list_lock
+ * @dev:	device pointer
+ * @opp_list:	list of opps
+ *
+ * This is an internal data structure maintaining the link to opps attached to
+ * a device. This structure is not meant to be shared to users as it is
+ * meant for book keeping and private to OPP library
+ */
+struct device_opp {
+	struct list_head node;
+
+	struct device *dev;
+	struct list_head opp_list;
+};
+
+/*
+ * The root of the list of all devices. All device_opp structures branch off
+ * from here, with each device_opp containing the list of opp it supports in
+ * various states of availability.
+ */
+static LIST_HEAD(dev_opp_list);
+/* Lock to allow exclusive modification to the device and opp lists */
+static DEFINE_MUTEX(dev_opp_list_lock);
+
+/**
+ * find_device_opp() - find device_opp struct using device pointer
+ * @dev:	device pointer used to lookup device OPPs
+ *
+ * Search list of device OPPs for one containing matching device. Does a RCU
+ * reader operation to grab the pointer needed.
+ *
+ * Returns pointer to 'struct device_opp' if found, otherwise -ENODEV or
+ * -EINVAL based on type of error.
+ *
+ * Locking: This function must be called under rcu_read_lock(). device_opp
+ * is a RCU protected pointer. This means that device_opp is valid as long
+ * as we are under RCU lock.
+ */
+static struct device_opp *find_device_opp(struct device *dev)
+{
+	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
+
+	if (unlikely(IS_ERR_OR_NULL(dev))) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	list_for_each_entry_rcu(tmp_dev_opp, &dev_opp_list, node) {
+		if (tmp_dev_opp->dev == dev) {
+			dev_opp = tmp_dev_opp;
+			break;
+		}
+	}
+
+	return dev_opp;
+}
+
+/**
+ * opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * @opp:	opp for which voltage has to be returned for
+ *
+ * Return voltage in micro volt corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long opp_get_voltage(struct opp *opp)
+{
+	struct opp *tmp_opp;
+	unsigned long v = 0;
+
+	tmp_opp = rcu_dereference(opp);
+	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		v = tmp_opp->u_volt;
+
+	return v;
+}
+
+/**
+ * opp_get_freq() - Gets the frequency corresponding to an available opp
+ * @opp:	opp for which frequency has to be returned for
+ *
+ * Return frequency in hertz corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long opp_get_freq(struct opp *opp)
+{
+	struct opp *tmp_opp;
+	unsigned long f = 0;
+
+	tmp_opp = rcu_dereference(opp);
+	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		f = tmp_opp->rate;
+
+	return f;
+}
+
+/**
+ * opp_get_opp_count() - Get number of opps available in the opp list
+ * @dev:	device for which we do this operation
+ *
+ * This function returns the number of available opps if there are any,
+ * else returns 0 if none or the corresponding error value.
+ *
+ * Locking: This function must be called under rcu_read_lock(). This function
+ * internally references two RCU protected structures: device_opp and opp which
+ * are safe as long as we are under a common RCU locked section.
+ */
+int opp_get_opp_count(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp;
+	int count = 0;
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+		return r;
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available)
+			count++;
+	}
+
+	return count;
+}
+
+/**
+ * opp_find_freq_exact() - search for an exact frequency
+ * @dev:		device for which we do this operation
+ * @freq:		frequency to search for
+ * @is_available:	true/false - match for available opp
+ *
+ * Searches for exact match in the opp list and returns pointer to the matching
+ * opp if found, else returns ERR_PTR in case of error and should be handled
+ * using IS_ERR.
+ *
+ * Note: available is a modifier for the search. if available=true, then the
+ * match is for exact matching frequency and is available in the stored OPP
+ * table. if false, the match is for exact frequency which is not available.
+ *
+ * This provides a mechanism to enable an opp which is not available currently
+ * or the opposite as well.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
+				bool available)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+		return ERR_PTR(r);
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available == available &&
+				temp_opp->rate == freq) {
+			opp = temp_opp;
+			break;
+		}
+	}
+
+	return opp;
+}
+
+/**
+ * opp_find_freq_ceil() - Search for an rounded ceil freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching ceil *available* OPP from a starting freq
+ * for a device.
+ *
+ * Returns matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return opp;
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available && temp_opp->rate >= *freq) {
+			opp = temp_opp;
+			*freq = opp->rate;
+			break;
+		}
+	}
+
+	return opp;
+}
+
+/**
+ * opp_find_freq_floor() - Search for a rounded floor freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching floor *available* OPP from a starting freq
+ * for a device.
+ *
+ * Returns matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return opp;
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available) {
+			/* go to the next node, before choosing prev */
+			if (temp_opp->rate > *freq)
+				break;
+			else
+				opp = temp_opp;
+		}
+	}
+	if (!IS_ERR(opp))
+		*freq = opp->rate;
+
+	return opp;
+}
+
+/**
+ * opp_add()  - Add an OPP table from a table definitions
+ * @dev:	device for which we do this operation
+ * @freq:	Frequency in Hz for this OPP
+ * @u_volt:	Voltage in uVolts for this OPP
+ *
+ * This function adds an opp definition to the opp list and returns status.
+ * The opp is made available by default and it can be controlled using
+ * opp_enable/disable functions.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+{
+	struct device_opp *dev_opp = NULL;
+	struct opp *opp, *new_opp;
+	struct list_head *head;
+
+	/* allocate new OPP node */
+	new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
+	if (!new_opp) {
+		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' */
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		/*
+		 * Allocate a new device OPP table. In the infrequent case
+		 * where a new device is needed to be added, we pay this
+		 * penalty.
+		 */
+		dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
+		if (!dev_opp) {
+			mutex_unlock(&dev_opp_list_lock);
+			kfree(new_opp);
+			dev_warn(dev,
+				"%s: Unable to create device OPP structure\n",
+				__func__);
+			return -ENOMEM;
+		}
+
+		dev_opp->dev = dev;
+		INIT_LIST_HEAD(&dev_opp->opp_list);
+
+		/* Secure the device list modification */
+		list_add_rcu(&dev_opp->node, &dev_opp_list);
+	}
+
+	/* populate the opp table */
+	new_opp->dev_opp = dev_opp;
+	new_opp->rate = freq;
+	new_opp->u_volt = u_volt;
+	new_opp->available = true;
+
+	/* Insert new OPP in order of increasing frequency */
+	head = &dev_opp->opp_list;
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (new_opp->rate < opp->rate)
+			break;
+		else
+			head = &opp->node;
+	}
+
+	list_add_rcu(&new_opp->node, head);
+	mutex_unlock(&dev_opp_list_lock);
+
+	return 0;
+}
+
+/**
+ * opp_set_availability() - helper to set the availability of an opp
+ * @dev:		device for which we do this operation
+ * @freq:		OPP frequency to modify availability
+ * @availability_req:	availability status requested for this opp
+ *
+ * Set the availability of an OPP with an RCU operation, opp_{enable,disable}
+ * share a common logic which is isolated here.
+ *
+ * Returns -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modifcation was done OR modification was
+ * successful.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks to
+ * keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+static int opp_set_availability(struct device *dev, unsigned long freq,
+		bool availability_req)
+{
+	struct device_opp *tmp_dev_opp, *dev_opp = NULL;
+	struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	int r = 0;
+
+	/* keep the node allocated */
+	new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
+	if (!new_opp) {
+		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Find the device_opp */
+	list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
+		if (dev == tmp_dev_opp->dev) {
+			dev_opp = tmp_dev_opp;
+			break;
+		}
+	}
+	if (IS_ERR(dev_opp)) {
+		r = PTR_ERR(dev_opp);
+		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+		goto unlock;
+	}
+
+	/* Do we have the frequency? */
+	list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
+		if (tmp_opp->rate == freq) {
+			opp = tmp_opp;
+			break;
+		}
+	}
+	if (IS_ERR(opp)) {
+		r = PTR_ERR(opp);
+		goto unlock;
+	}
+
+	/* Is update really needed? */
+	if (opp->available == availability_req)
+		goto unlock;
+	/* copy the old data over */
+	*new_opp = *opp;
+
+	/* plug in new node */
+	new_opp->available = availability_req;
+
+	list_replace_rcu(&opp->node, &new_opp->node);
+	mutex_unlock(&dev_opp_list_lock);
+	synchronize_rcu();
+
+	/* clean up old opp */
+	new_opp = opp;
+	goto out;
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+out:
+	kfree(new_opp);
+	return r;
+}
+
+/**
+ * opp_enable() - Enable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to enable
+ *
+ * Enables a provided opp. If the operation is valid, this returns 0, else the
+ * corresponding error value. It is meant to be used for users an OPP available
+ * after being temporarily made unavailable with opp_disable.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+int opp_enable(struct device *dev, unsigned long freq)
+{
+	return opp_set_availability(dev, freq, true);
+}
+
+/**
+ * opp_disable() - Disable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to disable
+ *
+ * Disables a provided opp. If the operation is valid, this returns
+ * 0, else the corresponding error value. It is meant to be a temporary
+ * control by users to make this OPP not available until the circumstances are
+ * right to make it available again (with a call to opp_enable).
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+int opp_disable(struct device *dev, unsigned long freq)
+{
+	return opp_set_availability(dev, freq, false);
+}
+
+#ifdef CONFIG_CPU_FREQ
+/**
+ * opp_init_cpufreq_table() - create a cpufreq table for a device
+ * @dev:	device for which we do this operation
+ * @table:	Cpufreq table returned back to caller
+ *
+ * Generate a cpufreq table for a provided device- this assumes that the
+ * opp list is already initialized and ready for usage.
+ *
+ * This function allocates required memory for the cpufreq table. It is
+ * expected that the caller does the required maintenance such as freeing
+ * the table as required.
+ *
+ * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
+ * if no memory available for the operation (table is not populated), returns 0
+ * if successful and table is populated.
+ *
+ * WARNING: It is  important for the callers to ensure refreshing their copy of
+ * the table if any of the mentioned functions have been invoked in the interim.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * To simplify the logic, we pretend we are updater and hold relevant mutex here
+ * Callers should ensure that this function is *NOT* called under RCU protection
+ * or in contexts where mutex locking cannot be used.
+ */
+int opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table)
+{
+	struct device_opp *dev_opp;
+	struct opp *opp;
+	struct cpufreq_frequency_table *freq_table;
+	int i = 0;
+
+	/* Pretend as if I am an updater */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		mutex_unlock(&dev_opp_list_lock);
+		dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+		return r;
+	}
+
+	freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
+			     (opp_get_opp_count(dev) + 1), GFP_KERNEL);
+	if (!freq_table) {
+		mutex_unlock(&dev_opp_list_lock);
+		dev_warn(dev, "%s: Unable to allocate frequency table\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+		if (opp->available) {
+			freq_table[i].index = i;
+			freq_table[i].frequency = opp->rate / 1000;
+			i++;
+		}
+	}
+	mutex_unlock(&dev_opp_list_lock);
+
+	freq_table[i].index = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+
+	*table = &freq_table[0];
+
+	return 0;
+}
+#endif		/* CONFIG_CPU_FREQ */
diff --git a/include/linux/opp.h b/include/linux/opp.h
new file mode 100644
index 000000000000..5449945d589f
--- /dev/null
+++ b/include/linux/opp.h
@@ -0,0 +1,105 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_OPP_H__
+#define __LINUX_OPP_H__
+
+#include <linux/err.h>
+#include <linux/cpufreq.h>
+
+struct opp;
+
+#if defined(CONFIG_PM_OPP)
+
+unsigned long opp_get_voltage(struct opp *opp);
+
+unsigned long opp_get_freq(struct opp *opp);
+
+int opp_get_opp_count(struct device *dev);
+
+struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
+				bool available);
+
+struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq);
+
+struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq);
+
+int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt);
+
+int opp_enable(struct device *dev, unsigned long freq);
+
+int opp_disable(struct device *dev, unsigned long freq);
+
+#else
+static inline unsigned long opp_get_voltage(struct opp *opp)
+{
+	return 0;
+}
+
+static inline unsigned long opp_get_freq(struct opp *opp)
+{
+	return 0;
+}
+
+static inline int opp_get_opp_count(struct device *dev)
+{
+	return 0;
+}
+
+static inline struct opp *opp_find_freq_exact(struct device *dev,
+					unsigned long freq, bool available)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct opp *opp_find_freq_floor(struct device *dev,
+					unsigned long *freq)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct opp *opp_find_freq_ceil(struct device *dev,
+					unsigned long *freq)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int opp_add(struct device *dev, unsigned long freq,
+					unsigned long u_volt)
+{
+	return -EINVAL;
+}
+
+static inline int opp_enable(struct device *dev, unsigned long freq)
+{
+	return 0;
+}
+
+static inline int opp_disable(struct device *dev, unsigned long freq)
+{
+	return 0;
+}
+#endif		/* CONFIG_PM */
+
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
+int opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table);
+#else
+static inline int opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table)
+{
+	return -EINVAL;
+}
+#endif		/* CONFIG_CPU_FREQ */
+
+#endif		/* __LINUX_OPP_H__ */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e45894c696ee..29bff6117abc 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -245,3 +245,17 @@ config PM_OPS
 	bool
 	depends on PM_SLEEP || PM_RUNTIME
 	default y
+
+config PM_OPP
+	bool "Operating Performance Point (OPP) Layer library"
+	depends on PM
+	---help---
+	  SOCs have a standard set of tuples consisting of frequency and
+	  voltage pairs that the device will support per voltage domain. This
+	  is called Operating Performance Point or OPP. The actual definitions
+	  of OPP varies over silicon within the same family of devices.
+
+	  OPP layer organizes the data internally using device pointers
+	  representing individual voltage domains and provides SOC
+	  implementations a ready to use framework to manage OPPs.
+	  For more information, read <file:Documentation/power/opp.txt>
-- 
cgit v1.2.3


From 363129ea90e0835b8552b797714cd200f674e287 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sat, 16 Oct 2010 13:24:52 -0400
Subject: ALSA: fix unused warnings with snd_power_get_state

If we compile the ASoC code with PM disabled, we hit stuff like:
sound/soc/soc-dapm.c: In function 'snd_soc_dapm_suspend_check':
sound/soc/soc-dapm.c:440: warning: unused variable 'codec'

So tweak the stub macro to avoid these issues.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 89e0ac17f44a..c129f0813bae 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -179,7 +179,7 @@ int snd_power_wait(struct snd_card *card, unsigned int power_state);
 #define snd_power_lock(card)		do { (void)(card); } while (0)
 #define snd_power_unlock(card)		do { (void)(card); } while (0)
 static inline int snd_power_wait(struct snd_card *card, unsigned int state) { return 0; }
-#define snd_power_get_state(card)	SNDRV_CTL_POWER_D0
+#define snd_power_get_state(card)	({ (void)(card); SNDRV_CTL_POWER_D0; })
 #define snd_power_change_state(card, state)	do { (void)(card); } while (0)
 
 #endif /* CONFIG_PM */
-- 
cgit v1.2.3


From c08d91695b2a3349254a62b60f03f7971bd90fa0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Oct 2010 10:40:53 +0200
Subject: ALSA: tlv - Define numbers in sound/tlv.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h       | 4 +++-
 sound/pci/hda/hda_codec.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index 9fd5b19ccf5c..7067e2dfb0b9 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -38,9 +38,11 @@
 #define SNDRV_CTL_TLVT_DB_MINMAX 4	/* dB scale with min/max */
 #define SNDRV_CTL_TLVT_DB_MINMAX_MUTE 5	/* dB scale with min/max with mute */
 
+#define TLV_DB_SCALE_MASK	0xffff
+#define TLV_DB_SCALE_MUTE	0x10000
 #define TLV_DB_SCALE_ITEM(min, step, mute)			\
 	SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int),	\
-	(min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0)
+	(min), ((step) & TLV_DB_SCALE_MASK) | ((mute) ? TLV_DB_SCALE_MUTE : 0)
 #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
 	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
 
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 8c933c8006f4..ee134a25092c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1843,7 +1843,7 @@ int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	val1 += ofs;
 	val1 = ((int)val1) * ((int)val2);
 	if (min_mute)
-		val2 |= 0x10000;
+		val2 |= TLV_DB_SCALE_MUTE;
 	if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv))
 		return -EFAULT;
 	if (put_user(2 * sizeof(unsigned int), _tlv + 1))
-- 
cgit v1.2.3


From 8e602ce2980fd6941dc0d3dda12e5095e8206f34 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 14 Oct 2010 05:56:18 +0000
Subject: netns: reorder fields in struct net

In a network bench, I noticed an unfortunate false sharing between
'loopback_dev' and 'count' fields in "struct net".

'count' is written each time a socket is created or destroyed, while
loopback_dev might be often read in routing code.

Move loopback_dev in a read mostly section of "struct net"

Note: struct netns_xfrm is cache line aligned on SMP.
(It contains a "struct dst_ops")
Move it at the end to avoid holes, and reduce sizeof(struct net) by 128
bytes on ia32.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 17 ++++++++++-------
 include/net/netns/xfrm.h    |  9 +++++----
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bd10a7908993..65af9a07cf76 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -41,6 +41,8 @@ struct net {
 						 * destroy on demand
 						 */
 #endif
+	spinlock_t		rules_mod_lock;
+
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	cleanup_list;	/* namespaces on death row */
 	struct list_head	exit_list;	/* Use only net_mutex */
@@ -52,7 +54,8 @@ struct net {
 	struct ctl_table_set	sysctls;
 #endif
 
-	struct net_device       *loopback_dev;          /* The loopback */
+	struct sock 		*rtnl;			/* rtnetlink socket */
+	struct sock		*genl_sock;
 
 	struct list_head 	dev_base_head;
 	struct hlist_head 	*dev_name_head;
@@ -60,11 +63,9 @@ struct net {
 
 	/* core fib_rules */
 	struct list_head	rules_ops;
-	spinlock_t		rules_mod_lock;
 
-	struct sock 		*rtnl;			/* rtnetlink socket */
-	struct sock		*genl_sock;
 
+	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
 	struct netns_mib	mib;
 	struct netns_packet	packet;
@@ -84,13 +85,15 @@ struct net {
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
 #endif
-#ifdef CONFIG_XFRM
-	struct netns_xfrm	xfrm;
-#endif
 #ifdef CONFIG_WEXT_CORE
 	struct sk_buff_head	wext_nlevents;
 #endif
 	struct net_generic	*gen;
+
+	/* Note : following structs are cache line aligned */
+#ifdef CONFIG_XFRM
+	struct netns_xfrm	xfrm;
+#endif
 };
 
 
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 74f119a2829a..748f91f87cd5 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,10 +43,6 @@ struct netns_xfrm {
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
 
-	struct dst_ops		xfrm4_dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct dst_ops		xfrm6_dst_ops;
-#endif
 
 	struct sock		*nlsk;
 	struct sock		*nlsk_stash;
@@ -58,6 +54,11 @@ struct netns_xfrm {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_hdr;
 #endif
+
+	struct dst_ops		xfrm4_dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct dst_ops		xfrm6_dst_ops;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From 8af2cdeaeefb2712b752e223c6d3396b9894b80d Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 18 Oct 2010 09:10:58 +0800
Subject: ACPICA: Update version to 20101013

Version 20101013.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c6bc60403de8..12fe9797c0b1 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20100915
+#define ACPI_CA_VERSION                 0x20101013
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From db5004195481fcb500c929bd3a0e1c0c48eec527 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Wed, 13 Oct 2010 15:00:23 +0900
Subject: PCI: add PCI_MSIX_TABLE/PBA defines

These are already defined in pcilib's pci/header.h but not in kernel's
linux/pci_regs.h.  Copy them to avoid using magic numbers.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/msi.h        | 4 ++--
 include/linux/pci_regs.h | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index de27c1cb5a2b..feff3bee6fe5 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -22,8 +22,8 @@
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 
-#define msix_table_offset_reg(base)	(base + 0x04)
-#define msix_pba_offset_reg(base)	(base + 0x08)
+#define msix_table_offset_reg(base)	(base + PCI_MSIX_TABLE)
+#define msix_pba_offset_reg(base)	(base + PCI_MSIX_PBA)
 #define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
 #define multi_msix_capable(control)	msix_table_size((control))
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 455b9ccdfca7..af83076c31a6 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -300,12 +300,14 @@
 #define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
 #define PCI_MSI_MASK_64		16	/* Mask bits register for 64-bit devices */
 
-/* MSI-X registers (these are at offset PCI_MSIX_FLAGS) */
+/* MSI-X registers */
 #define PCI_MSIX_FLAGS		2
 #define  PCI_MSIX_FLAGS_QSIZE	0x7FF
 #define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
 #define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
-#define PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+#define PCI_MSIX_TABLE		4
+#define PCI_MSIX_PBA		8
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
 
 /* CompactPCI Hotswap Register */
 
-- 
cgit v1.2.3


From cb04e95bdd0bfd618ab731c84a3ab56b56974df8 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Mon, 4 Oct 2010 13:27:14 -0700
Subject: PCI: update Intel chipset names and defines

This patch updates the defines for Intel devices in
include/linux/pci_ids.h, referenced in arch/x86/pci/irq.c and
drivers/i2c/busses/i2c-i801.c, reflecting approved legal branding, and
using fuller code-names for products under development.

Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c            | 12 ++++++------
 drivers/i2c/busses/Kconfig    |  4 ++--
 drivers/i2c/busses/i2c-i801.c | 10 +++++-----
 include/linux/pci_ids.h       | 18 +++++++++---------
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index ee7fc8fc8a83..9f9bfb705cf9 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -584,28 +584,28 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH9_3:
 	case PCI_DEVICE_ID_INTEL_ICH9_4:
 	case PCI_DEVICE_ID_INTEL_ICH9_5:
-	case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
+	case PCI_DEVICE_ID_INTEL_EP80579_0:
 	case PCI_DEVICE_ID_INTEL_ICH10_0:
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
-	case PCI_DEVICE_ID_INTEL_PBG_LPC:
+	case PCI_DEVICE_ID_INTEL_PATSBURG_LPC:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
 		return 1;
 	}
 
-	if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && 
-		(device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+	if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) && 
+		(device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) {
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
 		return 1;
 	}
 
-	if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && 
-		(device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) {
+	if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) && 
+		(device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) {
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 6539ac2907e9..fd455a2fdd12 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -95,9 +95,9 @@ config I2C_I801
 	    ESB2
 	    ICH8
 	    ICH9
-	    Tolapai
+	    EP80579 (Tolapai)
 	    ICH10
-	    3400/5 Series (PCH)
+	    5/3400 Series (PCH)
 	    Cougar Point (PCH)
 
 	  This driver can also be built as a module.  If so, the module
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index c60081169cc3..59d65981eed7 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -38,10 +38,10 @@
   82801G   (ICH7)       0x27da     32     hard     yes     yes     yes
   82801H   (ICH8)       0x283e     32     hard     yes     yes     yes
   82801I   (ICH9)       0x2930     32     hard     yes     yes     yes
-  Tolapai               0x5032     32     hard     yes     yes     yes
+  EP80579 (Tolapai)     0x5032     32     hard     yes     yes     yes
   ICH10                 0x3a30     32     hard     yes     yes     yes
   ICH10                 0x3a60     32     hard     yes     yes     yes
-  3400/5 Series (PCH)   0x3b30     32     hard     yes     yes     yes
+  5/3400 Series (PCH)   0x3b30     32     hard     yes     yes     yes
   Cougar Point (PCH)    0x1c22     32     hard     yes     yes     yes
 
   Features supported by this driver:
@@ -587,11 +587,11 @@ static const struct pci_device_id i801_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_17) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_5) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_6) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TOLAPAI_1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EP80579_1) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PCH_SMBUS) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CPT_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS) },
 	{ 0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ea5a3d19aaba..bb6daa5f8240 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2435,10 +2435,10 @@
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
 #define PCI_DEVICE_ID_INTEL_IOAT	0x1a38
-#define PCI_DEVICE_ID_INTEL_CPT_SMBUS	0x1c22
-#define PCI_DEVICE_ID_INTEL_CPT_LPC_MIN	0x1c41
-#define PCI_DEVICE_ID_INTEL_CPT_LPC_MAX	0x1c5f
-#define PCI_DEVICE_ID_INTEL_PBG_LPC	0x1d40
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
+#define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
+#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC	0x1d40
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
@@ -2644,9 +2644,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_3	0x3a1a
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
-#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN	0x3b00
-#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX	0x3b1f
-#define PCI_DEVICE_ID_INTEL_PCH_SMBUS	0x3b30
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN	0x3b00
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX	0x3b1f
+#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
@@ -2655,8 +2655,8 @@
 #define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
 #define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
-#define PCI_DEVICE_ID_INTEL_TOLAPAI_0	0x5031
-#define PCI_DEVICE_ID_INTEL_TOLAPAI_1	0x5032
+#define PCI_DEVICE_ID_INTEL_EP80579_0	0x5031
+#define PCI_DEVICE_ID_INTEL_EP80579_1	0x5032
 #define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
 #define PCI_DEVICE_ID_INTEL_82371SB_1	0x7010
 #define PCI_DEVICE_ID_INTEL_82371SB_2	0x7020
-- 
cgit v1.2.3


From bf4d29086972ceaeaf72544d8f64933c2cfdc992 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Mon, 4 Oct 2010 14:22:26 -0400
Subject: PCI: Export some PCI PM functionality

It's helpful to have some extra PCI power management functions available to
platform code, so move the declarations to an exported header.

Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.h   | 3 ---
 include/linux/pci.h | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6beb11b617a9..f5c7c382765f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -63,11 +63,8 @@ struct pci_platform_pm_ops {
 extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops);
 extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
 extern void pci_disable_enabled_device(struct pci_dev *dev);
-extern bool pci_check_pme_status(struct pci_dev *dev);
 extern int pci_finish_runtime_suspend(struct pci_dev *dev);
-extern void pci_wakeup_event(struct pci_dev *dev);
 extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
-extern void pci_pme_wakeup_bus(struct pci_bus *bus);
 extern void pci_pm_init(struct pci_dev *dev);
 extern void platform_pci_wakeup_init(struct pci_dev *dev);
 extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 30faf4f3db0b..7454408c41b6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -819,6 +819,9 @@ pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
+bool pci_check_pme_status(struct pci_dev *dev);
+void pci_wakeup_event(struct pci_dev *dev);
+void pci_pme_wakeup_bus(struct pci_bus *bus);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
 				  bool enable)
-- 
cgit v1.2.3


From 83fc3bc09518d42e8f5073e2a65884701dfadf19 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Mon, 18 Oct 2010 12:20:39 +0900
Subject: ALSA: emu10k1: Fix warning: "CCR" redefined

CCR is defined in emu10k1, but SuperH is defined too.
If user use this driver with SuperH, it becomes a double definition.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/emu10k1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 7dc97d12253c..4f865df42f0f 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -438,6 +438,8 @@
 #define CCCA_CURRADDR_MASK	0x00ffffff	/* Current address of the selected channel		*/
 #define CCCA_CURRADDR		0x18000008
 
+/* undefine CCR to avoid conflict with the definition for SH */
+#undef CCR
 #define CCR			0x09		/* Cache control register				*/
 #define CCR_CACHEINVALIDSIZE	0x07190009
 #define CCR_CACHEINVALIDSIZE_MASK	0xfe000000	/* Number of invalid samples cache for this channel    	*/
-- 
cgit v1.2.3


From 35dbc0e020c6587f78a6c17693beca73aead7b54 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 18 Oct 2010 03:09:39 -0400
Subject: asm-generic/io.h: allow people to override individual funcs

For the Blackfin port, we can use much of the asm-generic/io.h header,
but we still need to declare some of our own versions of functions.
Like the __raw_read* and in/out "string" helpers.  So let people do
this easily for many of these funcs.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 118601fce92d..3577ca11a0be 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -19,7 +19,9 @@
 #include <asm-generic/iomap.h>
 #endif
 
+#ifndef mmiowb
 #define mmiowb() do {} while (0)
+#endif
 
 /*****************************************************************************/
 /*
@@ -28,39 +30,51 @@
  * differently. On the simple architectures, we just read/write the
  * memory location directly.
  */
+#ifndef __raw_readb
 static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	return *(const volatile u8 __force *) addr;
 }
+#endif
 
+#ifndef __raw_readw
 static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	return *(const volatile u16 __force *) addr;
 }
+#endif
 
+#ifndef __raw_readl
 static inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	return *(const volatile u32 __force *) addr;
 }
+#endif
 
 #define readb __raw_readb
 #define readw(addr) __le16_to_cpu(__raw_readw(addr))
 #define readl(addr) __le32_to_cpu(__raw_readl(addr))
 
+#ifndef __raw_writeb
 static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
 {
 	*(volatile u8 __force *) addr = b;
 }
+#endif
 
+#ifndef __raw_writew
 static inline void __raw_writew(u16 b, volatile void __iomem *addr)
 {
 	*(volatile u16 __force *) addr = b;
 }
+#endif
 
+#ifndef __raw_writel
 static inline void __raw_writel(u32 b, volatile void __iomem *addr)
 {
 	*(volatile u32 __force *) addr = b;
 }
+#endif
 
 #define writeb __raw_writeb
 #define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr)
@@ -122,6 +136,7 @@ static inline void outl(u32 b, unsigned long addr)
 #define outw_p(x, addr)	outw((x), (addr))
 #define outl_p(x, addr)	outl((x), (addr))
 
+#ifndef insb
 static inline void insb(unsigned long addr, void *buffer, int count)
 {
 	if (count) {
@@ -132,7 +147,9 @@ static inline void insb(unsigned long addr, void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
+#ifndef insw
 static inline void insw(unsigned long addr, void *buffer, int count)
 {
 	if (count) {
@@ -143,7 +160,9 @@ static inline void insw(unsigned long addr, void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
+#ifndef insl
 static inline void insl(unsigned long addr, void *buffer, int count)
 {
 	if (count) {
@@ -154,7 +173,9 @@ static inline void insl(unsigned long addr, void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
+#ifndef outsb
 static inline void outsb(unsigned long addr, const void *buffer, int count)
 {
 	if (count) {
@@ -164,7 +185,9 @@ static inline void outsb(unsigned long addr, const void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
+#ifndef outsw
 static inline void outsw(unsigned long addr, const void *buffer, int count)
 {
 	if (count) {
@@ -174,7 +197,9 @@ static inline void outsw(unsigned long addr, const void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
+#ifndef outsl
 static inline void outsl(unsigned long addr, const void *buffer, int count)
 {
 	if (count) {
@@ -184,6 +209,7 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
 		} while (--count);
 	}
 }
+#endif
 
 #ifndef CONFIG_GENERIC_IOMAP
 #define ioread8(addr)		readb(addr)
-- 
cgit v1.2.3


From 631dd1a885b6d7e9f6f51b4e5b311c2bb04c323c Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Mon, 18 Oct 2010 11:03:14 +0200
Subject: Update broken web addresses in the kernel.

The patch below updates broken web addresses in the kernel

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Dimitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Mike Frysinger <vapier.adi@gmail.com>
Acked-by: Ben Pfaff <blp@cs.stanford.edu>
Acked-by: Hans J. Koch <hjk@linutronix.de>
Reviewed-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/ata/pata_it821x.c                       |  4 ++--
 drivers/atm/Kconfig                             |  2 +-
 drivers/char/agp/Kconfig                        |  2 +-
 drivers/char/agp/i460-agp.c                     |  2 +-
 drivers/char/apm-emulation.c                    |  4 ++--
 drivers/char/ipmi/ipmi_bt_sm.c                  |  2 +-
 drivers/char/ipmi/ipmi_si_intf.c                |  3 +--
 drivers/char/n_r3964.c                          |  1 -
 drivers/char/pcmcia/Kconfig                     |  4 ++--
 drivers/char/tpm/Kconfig                        |  2 +-
 drivers/char/tpm/tpm_infineon.c                 |  2 +-
 drivers/edac/edac_device_sysfs.c                |  2 +-
 drivers/edac/i82443bxgx_edac.c                  |  2 +-
 drivers/firmware/Kconfig                        |  3 ++-
 drivers/firmware/edd.c                          |  2 +-
 drivers/firmware/pcdp.h                         |  4 ++--
 drivers/gpu/drm/drm_modes.c                     |  2 +-
 drivers/hwmon/adm1025.c                         |  2 +-
 drivers/hwmon/adm1026.c                         |  2 +-
 drivers/hwmon/f75375s.c                         |  4 ++--
 drivers/hwmon/g760a.c                           |  2 +-
 drivers/hwmon/hwmon-vid.c                       |  2 +-
 drivers/ide/hpt366.c                            |  2 +-
 drivers/ide/ht6560b.c                           |  1 -
 drivers/infiniband/Kconfig                      |  4 ++--
 drivers/infiniband/hw/cxgb3/Kconfig             |  2 +-
 drivers/infiniband/hw/cxgb4/Kconfig             |  2 +-
 drivers/infiniband/ulp/iser/Kconfig             |  2 +-
 drivers/input/joystick/gamecon.c                |  3 +--
 drivers/input/misc/cm109.c                      |  2 +-
 drivers/input/mouse/Kconfig                     |  1 +
 drivers/input/mouse/touchkit_ps2.c              |  4 ++--
 drivers/input/touchscreen/mk712.c               |  2 +-
 drivers/isdn/i4l/isdn_audio.c                   |  2 +-
 drivers/macintosh/therm_adt746x.c               |  6 +++---
 drivers/media/IR/keymaps/rc-manli.c             |  1 -
 drivers/media/dvb/ttpci/av7110.c                |  9 ++-------
 drivers/media/dvb/ttpci/av7110_av.c             |  2 +-
 drivers/media/dvb/ttpci/av7110_ca.c             |  2 +-
 drivers/media/dvb/ttpci/av7110_hw.c             |  2 +-
 drivers/media/dvb/ttpci/av7110_v4l.c            |  2 +-
 drivers/media/dvb/ttpci/budget-av.c             |  2 +-
 drivers/media/dvb/ttpci/budget-ci.c             |  2 +-
 drivers/media/dvb/ttpci/budget-core.c           |  2 +-
 drivers/media/dvb/ttpci/budget-patch.c          |  2 +-
 drivers/media/dvb/ttpci/budget.c                |  2 +-
 drivers/media/radio/radio-maxiradio.c           |  2 +-
 drivers/media/radio/radio-typhoon.c             |  3 ---
 drivers/media/video/Kconfig                     |  2 +-
 drivers/media/video/cafe_ccic.c                 |  2 +-
 drivers/media/video/cx18/cx18-cards.c           |  2 +-
 drivers/media/video/cx23885/cx23885-417.c       |  2 +-
 drivers/media/video/cx88/cx88-blackbird.c       |  2 +-
 drivers/media/video/ivtv/ivtv-cards.c           |  2 +-
 drivers/media/video/mxb.c                       |  2 +-
 drivers/media/video/sn9c102/sn9c102_pas202bcb.c |  1 -
 drivers/misc/Kconfig                            |  4 ++--
 drivers/mtd/chips/cfi_cmdset_0002.c             |  4 ++--
 drivers/mtd/devices/lart.c                      |  2 +-
 drivers/mtd/ftl.c                               |  2 +-
 drivers/mtd/maps/Kconfig                        |  4 ++--
 drivers/mtd/nand/cafe_nand.c                    |  2 +-
 drivers/net/Kconfig                             | 21 ++++++++++-----------
 drivers/net/appletalk/Kconfig                   |  2 +-
 drivers/net/atp.c                               |  2 +-
 drivers/net/epic100.c                           |  4 ++--
 drivers/net/hamradio/Kconfig                    |  2 +-
 drivers/net/ibmlana.c                           |  2 +-
 drivers/net/irda/donauboe.h                     |  2 +-
 drivers/net/pci-skeleton.c                      |  2 +-
 drivers/net/pcmcia/3c574_cs.c                   |  2 +-
 drivers/net/sc92031.c                           |  2 +-
 drivers/net/tlan.c                              |  2 +-
 drivers/net/tokenring/tms380tr.c                |  2 +-
 drivers/net/tulip/Kconfig                       |  2 +-
 drivers/net/usb/plusb.c                         |  2 +-
 drivers/net/wan/Kconfig                         |  2 +-
 drivers/net/wireless/ath/ath5k/ath5k.h          |  2 +-
 drivers/net/wireless/ath/ath5k/reg.h            |  1 -
 drivers/net/wireless/p54/Kconfig                |  6 +++---
 drivers/net/wireless/prism54/islpci_hotplug.c   |  2 +-
 drivers/parisc/README.dino                      |  3 +--
 drivers/pci/quirks.c                            |  3 ++-
 drivers/pcmcia/yenta_socket.c                   |  2 +-
 drivers/pnp/pnpbios/proc.c                      |  1 -
 drivers/scsi/Kconfig                            |  9 +++++----
 drivers/serial/8250.c                           |  2 +-
 drivers/serial/bfin_sport_uart.c                |  2 +-
 drivers/serial/bfin_sport_uart.h                |  2 +-
 drivers/serial/uartlite.c                       |  2 +-
 drivers/staging/asus_oled/README                |  2 +-
 drivers/staging/asus_oled/asus_oled.c           |  2 +-
 drivers/staging/comedi/drivers/cb_pcimdas.c     |  2 +-
 drivers/staging/comedi/drivers/daqboard2000.c   |  4 ++--
 drivers/staging/comedi/drivers/ni_labpc.c       |  2 +-
 drivers/staging/comedi/drivers/ni_mio_common.c  |  2 +-
 drivers/staging/comedi/drivers/plx9080.h        |  2 +-
 drivers/staging/comedi/drivers/rtd520.c         |  2 +-
 drivers/staging/quickstart/quickstart.c         |  3 +--
 drivers/uio/Kconfig                             |  6 +++---
 drivers/usb/serial/Kconfig                      |  4 ++--
 drivers/usb/serial/ftdi_sio_ids.h               | 12 ++++++------
 drivers/usb/serial/keyspan.c                    |  2 +-
 drivers/usb/serial/keyspan.h                    |  2 +-
 drivers/usb/serial/mct_u232.h                   |  9 ++++-----
 drivers/usb/storage/Kconfig                     |  2 +-
 drivers/video/Kconfig                           | 10 +++++-----
 drivers/video/arcfb.c                           |  1 -
 drivers/video/epson1355fb.c                     |  2 +-
 drivers/video/fbcvt.c                           |  2 +-
 drivers/video/metronomefb.c                     |  2 +-
 firmware/keyspan_pda/keyspan_pda.S              |  2 +-
 firmware/keyspan_pda/xircom_pgs.S               |  2 +-
 fs/hostfs/hostfs.h                              |  7 +------
 fs/partitions/ldm.c                             |  2 +-
 fs/partitions/ldm.h                             |  2 +-
 fs/reiserfs/Kconfig                             |  6 ++++--
 fs/reiserfs/README                              |  2 +-
 include/crypto/gf128mul.h                       |  4 ++--
 include/linux/fdreg.h                           |  2 +-
 include/linux/if_infiniband.h                   |  2 +-
 include/linux/n_r3964.h                         |  1 -
 net/ax25/Kconfig                                |  8 ++++----
 net/ipv4/Kconfig                                |  4 ++--
 net/ipv4/cipso_ipv4.c                           |  2 +-
 net/ipv4/fib_trie.c                             |  2 +-
 net/ipv4/netfilter/Kconfig                      |  2 +-
 net/ipv4/tcp_illinois.c                         |  2 +-
 net/ipv4/tcp_input.c                            |  4 ++--
 net/ipv4/tcp_veno.c                             |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c          |  4 ++--
 sound/oss/ac97_codec.c                          |  7 ++-----
 sound/pci/ens1370.c                             |  2 +-
 sound/pci/intel8x0.c                            |  2 +-
 134 files changed, 183 insertions(+), 207 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index bf88f71a21f4..aa0e0c51cc08 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -15,8 +15,8 @@
  *  May be copied or modified under the terms of the GNU General Public License
  *  Based in part on the ITE vendor provided SCSI driver.
  *
- *  Documentation available from
- * 	http://www.ite.com.tw/pc/IT8212F_V04.pdf
+ *  Documentation available from IT8212F_V04.pdf
+ * 	http://www.ite.com.tw/EN/products_more.aspx?CategoryID=3&ID=5,91
  *  Some other documents are NDA.
  *
  *  The ITE8212 isn't exactly a standard IDE controller. It has two
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index be7461c9a87e..31c60101a69a 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -301,7 +301,7 @@ config ATM_IA
 	  control memory (128K-1KVC, 512K-4KVC), the size of the packet
 	  memory (128K, 512K, 1M), and the PHY type (Single/Multi mode OC3,
 	  UTP155, UTP25, DS3 and E3). Go to:
-	  	<http://www.iphase.com/products/ClassSheet.cfm?ClassID=ATM>
+	  	<http://www.iphase.com/>
 	  for more info about the cards. Say Y (or M to compile as a module
 	  named iphase) here if you have one of these cards.
 
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 4b66c69eaf57..c8ad61958e24 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -34,7 +34,7 @@ config AGP_ALI
 	  X on the following ALi chipsets.  The supported chipsets
 	  include M1541, M1621, M1631, M1632, M1641,M1647,and M1651.
 	  For the ALi-chipset question, ALi suggests you refer to
-	  <http://www.ali.com.tw/eng/support/index.shtml>.
+	  <http://www.ali.com.tw/>.
 
 	  The M1541 chipset can do AGP 1x and 2x, but note that there is an
 	  acknowledged incompatibility with Matrox G200 cards. Due to
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index e763d3312ce7..75b763cb3ea1 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -1,7 +1,7 @@
 /*
  * For documentation on the i460 AGP interface, see Chapter 7 (AGP Subsystem) of
  * the "Intel 460GTX Chipset Software Developer's Manual":
- * http://developer.intel.com/design/itanium/downloads/24870401s.htm
+ * http://www.intel.com/design/archives/itanium/downloads/248704.htm 
  */
 /*
  * 460GX support by Chris Ahna <christopher.j.ahna@intel.com>
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index 033e1505fca9..0848ca90255d 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -7,8 +7,8 @@
  *   Intel Corporation, Microsoft Corporation. Advanced Power Management
  *   (APM) BIOS Interface Specification, Revision 1.2, February 1996.
  *
- * [This document is available from Microsoft at:
- *    http://www.microsoft.com/hwdev/busbios/amp_12.htm]
+ * This document is available from Microsoft at:
+ *    http://www.microsoft.com/whdc/archive/amp_12.mspx
  */
 #include <linux/module.h>
 #include <linux/poll.h>
diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 7b98c067190a..3ed20e8abc0d 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -2,7 +2,7 @@
  *  ipmi_bt_sm.c
  *
  *  The state machine for an Open IPMI BT sub-driver under ipmi_si.c, part
- *  of the driver architecture at http://sourceforge.net/project/openipmi
+ *  of the driver architecture at http://sourceforge.net/projects/openipmi 
  *
  *  Author:	Rocky Craig <first.last@hp.com>
  *
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index ff68e7c34ce7..2a84379b9104 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1965,8 +1965,7 @@ static int acpi_gpe_irq_setup(struct smi_info *info)
 
 /*
  * Defined at
- * http://h21007.www2.hp.com/dspp/files/unprotected/devresource/
- * Docs/TechPapers/IA64/hpspmi.pdf
+ * http://h21007.www2.hp.com/portal/download/files/unprot/hpspmi.pdf
  */
 struct SPMITable {
 	s8	Signature[4];
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index a98290d7a2c5..88dda0c45ee0 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -4,7 +4,6 @@
  * Copyright by 
  * Philips Automation Projects
  * Kassel (Germany)
- * http://www.pap-philips.de
  * -----------------------------------------------------------
  * This software may be used and distributed according to the terms of
  * the GNU General Public License, incorporated herein by reference.
diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig
index ffa0efce0aed..6614416a8623 100644
--- a/drivers/char/pcmcia/Kconfig
+++ b/drivers/char/pcmcia/Kconfig
@@ -28,7 +28,7 @@ config CARDMAN_4000
 
 	  This kernel driver requires additional userspace support, either
 	  by the vendor-provided PC/SC ifd_handler (http://www.omnikey.com/),
-	  or via the cm4000 backend of OpenCT (http://www.opensc.com/).
+	  or via the cm4000 backend of OpenCT (http://www.opensc-project.org/opensc).
 
 config CARDMAN_4040
 	tristate "Omnikey CardMan 4040 support"
@@ -41,7 +41,7 @@ config CARDMAN_4040
 	  in I/O space.  To use the kernel driver, you will need either the
 	  PC/SC ifdhandler provided from the Omnikey homepage
 	  (http://www.omnikey.com/), or a current development version of OpenCT
-	  (http://www.opensc.org/).
+	  (http://www.opensc-project.org/opensc).
 
 config IPWIRELESS
 	tristate "IPWireless 3G UMTS PCMCIA card support"
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 4dc338f3d1aa..f6595aba4f0f 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -58,6 +58,6 @@ config TCG_INFINEON
 	  To compile this driver as a module, choose M here; the module
 	  will be called tpm_infineon.
 	  Further information on this driver and the supported hardware
-	  can be found at http://www.prosec.rub.de/tpm
+	  can be found at http://www.trust.rub.de/projects/linux-device-driver-infineon-tpm/ 
 
 endif # TCG_TPM
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index f58440791e65..76da32e11f18 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2005, Marcel Selhorst <m.selhorst@sirrix.com>
  * Sirrix AG - security technologies, http://www.sirrix.com and
  * Applied Data Security Group, Ruhr-University Bochum, Germany
- * Project-Homepage: http://www.prosec.rub.de/tpm
+ * Project-Homepage: http://www.trust.rub.de/projects/linux-device-driver-infineon-tpm/ 
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 070968178a24..413f0dfbbf75 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -1,7 +1,7 @@
 /*
  * file for managing the edac_device class of devices for EDAC
  *
- * (C) 2007 SoftwareBitMaker (http://www.softwarebitmaker.com)
+ * (C) 2007 SoftwareBitMaker 
  *
  * This file may be distributed under the terms of the
  * GNU General Public License.
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index a2fa1feed724..678405ab04e4 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -12,7 +12,7 @@
  * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
  *
  * Written with reference to 82443BX Host Bridge Datasheet:
- * http://www.intel.com/design/chipsets/440/documentation.htm
+ * http://download.intel.com/design/chipsets/datashts/29063301.pdf 
  * references to this document given in [].
  *
  * This module doesn't support the 440LX, but it may be possible to
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index a6c670b8ce52..af39bbd7394d 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -74,7 +74,8 @@ config EFI_PCDP
 
 	  You must also enable the appropriate drivers (serial, VGA, etc.)
 
-	  See <http://www.dig64.org/specifications/DIG64_HCDPv20_042804.pdf>
+	  See DIG64_HCDPv20_042804.pdf available from
+	  <http://www.dig64.org/specifications/> 
 
 config DELL_RBU
 	tristate "BIOS update support for DELL systems via sysfs"
diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c
index f287fe79edc4..96c25d93eed1 100644
--- a/drivers/firmware/edd.c
+++ b/drivers/firmware/edd.c
@@ -15,7 +15,7 @@
  * made in setup.S, copied to safe structures in setup.c,
  * and presents it in sysfs.
  *
- * Please see http://linux.dell.com/edd30/results.html for
+ * Please see http://linux.dell.com/edd/results.html for
  * the list of BIOSs which have been reported to implement EDD.
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h
index ce910d68bd19..e5530608e00d 100644
--- a/drivers/firmware/pcdp.h
+++ b/drivers/firmware/pcdp.h
@@ -1,8 +1,8 @@
 /*
  * Definitions for PCDP-defined console devices
  *
- * v1.0a: http://www.dig64.org/specifications/DIG64_HCDPv10a_01.pdf
- * v2.0:  http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
+ * For DIG64_HCDPv10a_01.pdf and DIG64_PCDPv20.pdf (v1.0a and v2.0 resp.),
+ * please see <http://www.dig64.org/specifications/>
  *
  * (c) Copyright 2002, 2004 Hewlett-Packard Development Company, L.P.
  *	Khalid Aziz <khalid.aziz@hp.com>
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index f1f473ea97d3..045d63e374c3 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -76,7 +76,7 @@ EXPORT_SYMBOL(drm_mode_debug_printmodeline);
  * according to the hdisplay, vdisplay, vrefresh.
  * It is based from the VESA(TM) Coordinated Video Timing Generator by
  * Graham Loveridge April 9, 2003 available at
- * http://www.vesa.org/public/CVT/CVTd6r1.xls
+ * http://www.elo.utfsm.cl/~elo212/docs/CVTd6r1.xls 
  *
  * And it is copied from xf86CVTmode in xserver/hw/xfree86/modes/xf86cvt.c.
  * What I have done is to translate it by using integer calculation.
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index 251b63165e2a..60befc0ee65f 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -12,7 +12,7 @@
  * resolution of about 0.5% of the nominal value). Temperature values are
  * reported with a 1 deg resolution and a 3 deg accuracy. Complete
  * datasheet can be obtained from Analog's website at:
- *   http://www.analog.com/Analog_Root/productPage/productHome/0,2121,ADM1025,00.html
+ *   http://www.onsemi.com/PowerSolutions/product.do?id=ADM1025 
  *
  * This driver also supports the ADM1025A, which differs from the ADM1025
  * only in that it has "open-drain VID inputs while the ADM1025 has
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index 65335b268fa9..4bf969c0a32b 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -6,7 +6,7 @@
 
     Chip details at:
 
-    <http://www.analog.com/UploadedFiles/Data_Sheets/779263102ADM1026_a.pdf>
+    <http://www.onsemi.com/PowerSolutions/product.do?id=ADM1026>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
index 0f58ecc5334d..3e2e10084ad9 100644
--- a/drivers/hwmon/f75375s.c
+++ b/drivers/hwmon/f75375s.c
@@ -6,10 +6,10 @@
  * Datasheets available at:
  *
  * f75375:
- * http://www.fintek.com.tw/files/productfiles/2005111152950.pdf
+ * http://www.fintek.com.tw/files/productfiles/F75375_V026P.pdf 
  *
  * f75373:
- * http://www.fintek.com.tw/files/productfiles/2005111153128.pdf
+ * http://www.fintek.com.tw/files/productfiles/F75373_V025P.pdf
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/hwmon/g760a.c b/drivers/hwmon/g760a.c
index 1f63d1a3af5e..1d6a6fa31fb4 100644
--- a/drivers/hwmon/g760a.c
+++ b/drivers/hwmon/g760a.c
@@ -5,7 +5,7 @@
     Copyright (C) 2007  Herbert Valerio Riedel <hvr@gnu.org>
 
     Complete datasheet is available at GMT's website:
-      http://www.gmt.com.tw/datasheet/g760a.pdf
+      http://www.gmt.com.tw/product/datasheet/EDS-760A.pdf 
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index bf0862a803c0..2b2ca1694f95 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -38,7 +38,7 @@
  * available at http://developer.intel.com/.
  *
  * AMD Athlon 64 and AMD Opteron Processors, AMD Publication 26094,
- * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
+ * http://support.amd.com/us/Processor_TechDocs/26094.PDF 
  * Table 74. VID Code Voltages
  * This corresponds to an arbitrary VRM code of 24 in the functions below.
  * These CPU models (K8 revision <= E) have 5 VID pins. See also:
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 45163693f737..97d98fbf5849 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -12,7 +12,7 @@
  *
  *
  * HighPoint has its own drivers (open source except for the RAID part)
- * available from http://www.highpoint-tech.com/BIOS%20+%20Driver/.
+ * available from http://www.highpoint-tech.com/USA_new/service_support.htm 
  * This may be useful to anyone wanting to work on this driver, however  do not
  * trust  them too much since the code tends to become less and less meaningful
  * as the time passes... :-/
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index d81e49680c3f..808bcdcbf8e1 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -10,7 +10,6 @@
  *  Author:    Mikko Ala-Fossi            <maf@iki.fi>
  *             Jan Evert van Grootheest   <j.e.van.grootheest@caiway.nl>
  *
- *  Try:  http://www.maf.iki.fi/~maf/ht6560b/
  */
 
 #define DRV_NAME	"ht6560b"
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 89d70de5e235..6e35eccc9caa 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -16,7 +16,7 @@ config INFINIBAND_USER_MAD
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
 	  userspace processes to send and receive MADs. You will also
-	  need libibumad from <http://www.openib.org>.
+	  need libibumad from <http://www.openfabrics.org/downloads/management/>.
 
 config INFINIBAND_USER_ACCESS
 	tristate "InfiniBand userspace access (verbs and CM)"
@@ -28,7 +28,7 @@ config INFINIBAND_USER_ACCESS
 	  to set up connections and directly access InfiniBand
 	  hardware for fast-path operations.  You will also need
 	  libibverbs, libibcm and a hardware driver library from
-	  <http://www.openib.org>.
+	  <http://www.openfabrics.org/git/>.
 
 config INFINIBAND_USER_MEM
 	bool
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 2acec3fadf69..2b6352b85485 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -10,7 +10,7 @@ config INFINIBAND_CXGB3
 	  our website at <http://www.chelsio.com>.
 
 	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.htm>.
+	  <http://www.chelsio.com/support.html>.
 
 	  Please send feedback to <linux-bugs@chelsio.com>.
 
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index ccb85eaaad75..6b7e6c543534 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -10,7 +10,7 @@ config INFINIBAND_CXGB4
 	  our website at <http://www.chelsio.com>.
 
 	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.htm>.
+	  <http://www.chelsio.com/support.html>.
 
 	  Please send feedback to <linux-bugs@chelsio.com>.
 
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index b411c51842da..d00af71a2cfc 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -9,4 +9,4 @@ config INFINIBAND_ISER
 
 	  The iSER protocol is defined by IETF.
 	  See <http://www.ietf.org/rfc/rfc5046.txt>
-	  and <http://www.infinibandta.org/members/spec/Annex_iSER.PDF>
+	  and <http://members.infinibandta.org/kwspub/spec/Annex_iSER.PDF>
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 0ffaf2c77a19..e68e49786483 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -521,9 +521,8 @@ static void gc_multi_process_packet(struct gc *gc)
  * PSX support
  *
  * See documentation at:
- *	http://www.dim.com/~mackys/psxmemcard/ps-eng2.txt
+ *	http://www.geocities.co.jp/Playtown/2004/psx/ps_eng.txt	
  *	http://www.gamesx.com/controldata/psxcont/psxcont.htm
- *	ftp://milano.usal.es/pablo/
  *
  */
 
diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c
index 2b0eba6619bd..b09c7d127219 100644
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -259,7 +259,7 @@ static unsigned short keymap_usbph01(int scancode)
 
 /*
  * Keymap for ATCom AU-100
- * http://www.atcom.cn/En_products_AU100.html
+ * http://www.atcom.cn/products.html 
  * http://www.packetizer.com/products/au100/
  * http://www.voip-info.org/wiki/view/AU-100
  *
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index c714ca2407f8..bf5fd7f6a313 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -30,6 +30,7 @@ config MOUSE_PS2
 		<http://w1.894.telia.com/~u89404340/touchpad/index.html>
 	  and a new version of GPM at:
 		<http://www.geocities.com/dt_or/gpm/gpm.html>
+		<http://xorg.freedesktop.org/archive/individual/driver/>
 	  to take advantage of the advanced features of the touchpad.
 
 	  If unsure, say Y.
diff --git a/drivers/input/mouse/touchkit_ps2.c b/drivers/input/mouse/touchkit_ps2.c
index 88121c59c3cc..1fd8f5e192f9 100644
--- a/drivers/input/mouse/touchkit_ps2.c
+++ b/drivers/input/mouse/touchkit_ps2.c
@@ -21,8 +21,8 @@
  *
  * Based upon touchkitusb.c
  *
- * Vendor documentation is available in support section of:
- * http://www.egalax.com.tw/
+ * Vendor documentation is available at:
+ * http://home.eeti.com.tw/web20/drivers/Software%20Programming%20Guide_v2.0.pdf 
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index efd3aebaba5f..36e57deacd03 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -17,7 +17,7 @@
  * found in Gateway AOL Connected Touchpad computers.
  *
  * Documentation for ICS MK712 can be found at:
- *	http://www.icst.com/pdf/mk712.pdf
+ *	http://www.idt.com/products/getDoc.cfm?docID=18713923
  */
 
 /*
diff --git a/drivers/isdn/i4l/isdn_audio.c b/drivers/isdn/i4l/isdn_audio.c
index 861bdf3421f2..d5013935ac62 100644
--- a/drivers/isdn/i4l/isdn_audio.c
+++ b/drivers/isdn/i4l/isdn_audio.c
@@ -439,7 +439,7 @@ isdn_audio_xlaw2adpcm(adpcm_state * s, int fmt, unsigned char *in,
 
 /*
  * Goertzel algorithm.
- * See http://ptolemy.eecs.berkeley.edu/~pino/Ptolemy/papers/96/dtmf_ict/
+ * See http://ptolemy.eecs.berkeley.edu/papers/96/dtmf_ict/ 
  * for more info.
  * Result is stored into an sk_buff and queued up for later
  * evaluation.
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index c42eeb43042d..f153fc20ad6e 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -3,9 +3,9 @@
  *
  * Copyright (C) 2003, 2004 Colin Leroy, Rasmus Rohde, Benjamin Herrenschmidt
  *
- * Documentation from
- * http://www.analog.com/UploadedFiles/Data_Sheets/115254175ADT7467_pra.pdf
- * http://www.analog.com/UploadedFiles/Data_Sheets/3686221171167ADT7460_b.pdf
+ * Documentation from 115254175ADT7467_pra.pdf and 3686221171167ADT7460_b.pdf
+ * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7467
+ * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7460
  *
  */
 
diff --git a/drivers/media/IR/keymaps/rc-manli.c b/drivers/media/IR/keymaps/rc-manli.c
index 1e9fbfa90a1e..0f590b3d01c0 100644
--- a/drivers/media/IR/keymaps/rc-manli.c
+++ b/drivers/media/IR/keymaps/rc-manli.c
@@ -13,7 +13,6 @@
 #include <media/rc-map.h>
 
 /* Michael Tokarev <mjt@tls.msk.ru>
-   http://www.corpit.ru/mjt/beholdTV/remote_control.jpg
    keytable is used by MANLI MTV00[0x0c] and BeholdTV 40[13] at
    least, and probably other cards too.
    The "ascii-art picture" below (in comments, first row
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index a6be529eec5c..f96c4a675c65 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -26,7 +26,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 
@@ -2290,12 +2290,7 @@ static int frontend_init(struct av7110 *av7110)
 /* Budgetpatch note:
  * Original hardware design by Roberto Deza:
  * There is a DVB_Wiki at
- * http://212.227.36.83/linuxtv/wiki/index.php/Main_Page
- * where is described this 'DVB TT Budget Patch', on Card Modding:
- * http://212.227.36.83/linuxtv/wiki/index.php/DVB_TT_Budget_Patch
- * On the short description there is also a link to a external file,
- * with more details:
- * http://perso.wanadoo.es/jesussolano/Ttf_tsc1.zip
+ * http://www.linuxtv.org/
  *
  * New software triggering design by Emard that works on
  * original Roberto Deza's hardware:
diff --git a/drivers/media/dvb/ttpci/av7110_av.c b/drivers/media/dvb/ttpci/av7110_av.c
index 13efba942dac..878da6a19fbc 100644
--- a/drivers/media/dvb/ttpci/av7110_av.c
+++ b/drivers/media/dvb/ttpci/av7110_av.c
@@ -25,7 +25,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include <linux/types.h>
diff --git a/drivers/media/dvb/ttpci/av7110_ca.c b/drivers/media/dvb/ttpci/av7110_ca.c
index 4eba35a018e3..7564c2618947 100644
--- a/drivers/media/dvb/ttpci/av7110_ca.c
+++ b/drivers/media/dvb/ttpci/av7110_ca.c
@@ -25,7 +25,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/media/dvb/ttpci/av7110_hw.c b/drivers/media/dvb/ttpci/av7110_hw.c
index e162691b515d..f1cbfe526989 100644
--- a/drivers/media/dvb/ttpci/av7110_hw.c
+++ b/drivers/media/dvb/ttpci/av7110_hw.c
@@ -22,7 +22,7 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 /* for debugging ARM communication: */
diff --git a/drivers/media/dvb/ttpci/av7110_v4l.c b/drivers/media/dvb/ttpci/av7110_v4l.c
index 8986d967d2f4..ac20c5bbfa43 100644
--- a/drivers/media/dvb/ttpci/av7110_v4l.c
+++ b/drivers/media/dvb/ttpci/av7110_v4l.c
@@ -22,7 +22,7 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/media/dvb/ttpci/budget-av.c b/drivers/media/dvb/ttpci/budget-av.c
index 983672aa2450..97afc01f60d0 100644
--- a/drivers/media/dvb/ttpci/budget-av.c
+++ b/drivers/media/dvb/ttpci/budget-av.c
@@ -30,7 +30,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include "budget.h"
diff --git a/drivers/media/dvb/ttpci/budget-ci.c b/drivers/media/dvb/ttpci/budget-ci.c
index 13ac9e3ab121..a9c2c326df4b 100644
--- a/drivers/media/dvb/ttpci/budget-ci.c
+++ b/drivers/media/dvb/ttpci/budget-ci.c
@@ -26,7 +26,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include <linux/module.h>
diff --git a/drivers/media/dvb/ttpci/budget-core.c b/drivers/media/dvb/ttpci/budget-core.c
index ba18e56d5f11..054661315311 100644
--- a/drivers/media/dvb/ttpci/budget-core.c
+++ b/drivers/media/dvb/ttpci/budget-core.c
@@ -31,7 +31,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 
diff --git a/drivers/media/dvb/ttpci/budget-patch.c b/drivers/media/dvb/ttpci/budget-patch.c
index 9c92f9ddd223..579835590690 100644
--- a/drivers/media/dvb/ttpci/budget-patch.c
+++ b/drivers/media/dvb/ttpci/budget-patch.c
@@ -27,7 +27,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include "av7110.h"
diff --git a/drivers/media/dvb/ttpci/budget.c b/drivers/media/dvb/ttpci/budget.c
index 874a10a9d493..d238fb9371a7 100644
--- a/drivers/media/dvb/ttpci/budget.c
+++ b/drivers/media/dvb/ttpci/budget.c
@@ -31,7 +31,7 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  *
  *
- * the project's page is at http://www.linuxtv.org/dvb/
+ * the project's page is at http://www.linuxtv.org/ 
  */
 
 #include "budget.h"
diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c
index 4349213b403b..255d40df4b46 100644
--- a/drivers/media/radio/radio-maxiradio.c
+++ b/drivers/media/radio/radio-maxiradio.c
@@ -13,7 +13,7 @@
  * anybody does please mail me.
  *
  * For the pdf file see:
- * http://www.semiconductors.philips.com/pip/TEA5757H/V1
+ * http://www.nxp.com/acrobat_download2/expired_datasheets/TEA5757_5759_3.pdf 
  *
  *
  * CHANGES:
diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c
index 03439282dfce..b1f630527dc1 100644
--- a/drivers/media/radio/radio-typhoon.c
+++ b/drivers/media/radio/radio-typhoon.c
@@ -1,9 +1,6 @@
 /* Typhoon Radio Card driver for radio support
  * (c) 1999 Dr. Henrik Seidel <Henrik.Seidel@gmx.de>
  *
- * Card manufacturer:
- * http://194.18.155.92/idc/prod2.idc?nr=50753&lang=e
- *
  * Notes on the hardware
  *
  * This card has two output sockets, one for speakers and one for line.
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index f6e4d0475351..d000522cb0f4 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -978,7 +978,7 @@ config USB_STKWEBCAM
 	  Supported devices are typically found in some Asus laptops,
 	  with USB id 174f:a311 and 05e1:0501. Other Syntek cameras
 	  may be supported by the stk11xx driver, from which this is
-	  derived, see http://stk11xx.sourceforge.net
+	  derived, see <http://sourceforge.net/projects/syntekdriver/> 
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called stkwebcam.
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index be35e6965829..9536f1a40dd2 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -4,7 +4,7 @@
  * sensor.
  *
  * The data sheet for this device can be found at:
- *    http://www.marvell.com/products/pcconn/88ALP01.jsp
+ *    http://www.marvell.com/products/pc_connectivity/88alp01/ 
  *
  * Copyright 2006 One Laptop Per Child Association, Inc.
  * Copyright 2006-7 Jonathan Corbet <corbet@lwn.net>
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 6b805afe5d20..fe1090940b01 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -39,7 +39,7 @@ static struct cx18_card_tuner_i2c cx18_i2c_std = {
 	.tv    = { 0x61, 0x60, I2C_CLIENT_END },
 };
 
-/* Please add new PCI IDs to: http://pci-ids.ucw.cz/iii
+/* Please add new PCI IDs to: http://pci-ids.ucw.cz/ 
    This keeps the PCI ID database up to date. Note that the entries
    must be added under vendor 0x4444 (Conexant) as subsystem IDs.
    New vendor IDs should still be added to the vendor ID list. */
diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c
index abd64e89f60f..53a67824071b 100644
--- a/drivers/media/video/cx23885/cx23885-417.c
+++ b/drivers/media/video/cx23885/cx23885-417.c
@@ -7,7 +7,7 @@
  *    (c) 2008 Steven Toth <stoth@linuxtv.org>
  *      - CX23885/7/8 support
  *
- *  Includes parts from the ivtv driver( http://ivtv.sourceforge.net/),
+ *  Includes parts from the ivtv driver <http://sourceforge.net/projects/ivtv/>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index e46e1ceef72c..660b2a927feb 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -9,7 +9,7 @@
  *    (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org>
  *        - video_ioctl2 conversion
  *
- *  Includes parts from the ivtv driver( http://ivtv.sourceforge.net/),
+ *  Includes parts from the ivtv driver <http://sourceforge.net/projects/ivtv/>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/ivtv/ivtv-cards.c b/drivers/media/video/ivtv/ivtv-cards.c
index ca1fd3227a93..87afbbee2063 100644
--- a/drivers/media/video/ivtv/ivtv-cards.c
+++ b/drivers/media/video/ivtv/ivtv-cards.c
@@ -65,7 +65,7 @@ static struct ivtv_card_tuner_i2c ivtv_i2c_tda8290 = {
 
 /********************** card configuration *******************************/
 
-/* Please add new PCI IDs to: http://pci-ids.ucw.cz/iii
+/* Please add new PCI IDs to: http://pci-ids.ucw.cz/ 
    This keeps the PCI ID database up to date. Note that the entries
    must be added under vendor 0x4444 (Conexant) as subsystem IDs.
    New vendor IDs should still be added to the vendor ID list. */
diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c
index ef0c8178f255..b1dbcf1d2bcb 100644
--- a/drivers/media/video/mxb.c
+++ b/drivers/media/video/mxb.c
@@ -3,7 +3,7 @@
 
     Copyright (C) 1998-2006 Michael Hunold <michael@mihu.de>
 
-    Visit http://www.mihu.de/linux/saa7146/mxb/
+    Visit http://www.themm.net/~mihu/linux/saa7146/mxb.html 
     for further details about this card.
 
     This program is free software; you can redistribute it and/or modify
diff --git a/drivers/media/video/sn9c102/sn9c102_pas202bcb.c b/drivers/media/video/sn9c102/sn9c102_pas202bcb.c
index 2782f94cf6f8..2e86fdc86989 100644
--- a/drivers/media/video/sn9c102/sn9c102_pas202bcb.c
+++ b/drivers/media/video/sn9c102/sn9c102_pas202bcb.c
@@ -4,7 +4,6 @@
  *                                                                         *
  * Copyright (C) 2004 by Carlos Eduardo Medaglia Dyonisio                  *
  *                       <medaglia@undl.org.br>                            *
- *                       http://cadu.homelinux.com:8080/                   *
  *                                                                         *
  * Support for SN9C103, DAC Magnitude, exposure and green gain controls    *
  * added by Luca Risolia <luca.risolia@studio.unibo.it>                    *
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 9df5b759a00b..0c31927c1562 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -112,8 +112,8 @@ config IBM_ASM
 
 	  WARNING: This software may not be supported or function
 	  correctly on your IBM server. Please consult the IBM ServerProven
-	  website <http://www.pc.ibm.com/ww/eserver/xseries/serverproven> for
-	  information on the specific driver level and support statement
+	  website <http://www-03.ibm.com/systems/info/x86servers/serverproven/compat/us/>
+	  for information on the specific driver level and support statement
 	  for your IBM server.
 
 config PHANTOM
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 3e6c47bdce53..ba29d2f0ffd7 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -418,8 +418,8 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 
 			/*
 			 * Valid primary extension versions are: 1.0, 1.1, 1.2, 1.3, 1.4
-			 * see: http://www.amd.com/us-en/assets/content_type/DownloadableAssets/cfi_r20.pdf, page 19
-			 *      http://www.amd.com/us-en/assets/content_type/DownloadableAssets/cfi_100_20011201.pdf
+			 * see: http://cs.ozerki.net/zap/pub/axim-x5/docs/cfi_r20.pdf, page 19 
+			 *      http://www.spansion.com/Support/AppNotes/cfi_100_20011201.pdf
 			 *      http://www.spansion.com/Support/Datasheets/s29ws-p_00_a12_e.pdf
 			 */
 			if (extp->MajorVersion != '1' ||
diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index f4359fe7150f..caf604167f03 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -17,7 +17,7 @@
  *           - January 2000
  *
  *    [2] MTD internal API documentation
- *           - http://www.linux-mtd.infradead.org/tech/
+ *           - http://www.linux-mtd.infradead.org/ 
  *
  * Limitations:
  *
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 4d6a64c387ec..037b399df3f1 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -51,7 +51,7 @@
 
     Use of the FTL format for non-PCMCIA applications may be an
     infringement of these patents.  For additional information,
-    contact M-Systems (http://www.m-sys.com) directly.
+    contact M-Systems directly. M-Systems since acquired by Sandisk. 
 
 ======================================================================*/
 #include <linux/mtd/blktrans.h>
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 701d942c6795..962212628f6e 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -172,7 +172,7 @@ config MTD_OCTAGON
 	  This provides a 'mapping' driver which supports the way in which
 	  the flash chips are connected in the Octagon-5066 Single Board
 	  Computer. More information on the board is available at
-	  <http://www.octagonsystems.com/CPUpages/5066.html>.
+	  <http://www.octagonsystems.com/products/5066.aspx>.
 
 config MTD_VMAX
 	tristate "JEDEC Flash device mapped on Tempustech VMAX SBC301"
@@ -284,7 +284,7 @@ config MTD_TQM8XXL
 	  chips, currently uses AMD one. This 'mapping' driver supports
 	  that arrangement, allowing the CFI probe and command set driver
 	  code to communicate with the chips on the TQM8xxL board. More at
-	  <http://www.denx.de/embedded-ppc-en.html>.
+	  <http://www.denx.de/wiki/PPCEmbedded/>.
 
 config MTD_RPXLITE
 	tristate "CFI Flash device mapped on RPX Lite or CLLF"
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index db1dfc5a1b11..e06c8983978e 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -2,7 +2,7 @@
  * Driver for One Laptop Per Child ‘CAFÉ’ controller, aka Marvell 88ALP01
  *
  * The data sheet for this device can be found at:
- *    http://www.marvell.com/products/pcconn/88ALP01.jsp
+ *    http://wiki.laptop.org/go/Datasheets 
  *
  * Copyright © 2006 Red Hat, Inc.
  * Copyright © 2006 David Woodhouse <dwmw2@infradead.org>
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 5a6895320b48..2a34e214a7f9 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -921,7 +921,7 @@ config SMC91X
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
 	  compiled into the kernel, and read the file
 	  <file:Documentation/networking/smc9.txt>  and the Ethernet-HOWTO,
-	  available from  <http://www.linuxdoc.org/docs.html#howto>.
+	  available from  <http://www.tldp.org/docs.html#howto>.
 
 	  This driver is also available as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want).
@@ -1021,7 +1021,7 @@ config SMC911X
 	  including the new LAN9115, LAN9116, LAN9117, and LAN9118.
 	  Say Y if you want it compiled into the kernel, 
 	  and read the Ethernet-HOWTO, available from
-	  <http://www.linuxdoc.org/docs.html#howto>.
+	  <http://www.tldp.org/docs.html#howto>.
 
 	  This driver is also available as a module. The module will be 
 	  called smc911x.  If you want to compile it as a module, say M 
@@ -1503,7 +1503,7 @@ config E100
 
 	  For the latest Intel PRO/100 network driver for Linux, see:
 
-	  <http://appsr.intel.com/scripts-df/support_intel.asp>
+	  <http://www.intel.com/p/en_US/support/highlights/network/pro100plus>
 
 	  More specific information on configuring the driver is in 
 	  <file:Documentation/networking/e100.txt>.
@@ -1529,9 +1529,8 @@ config FEALNX
 	select CRC32
 	select MII
 	help
-	  Say Y here to support the Mysom MTD-800 family of PCI-based Ethernet
-	  cards. Specifications and data at
-	  <http://www.myson.com.hk/mtd/datasheet/>.
+	  Say Y here to support the Myson MTD-800 family of PCI-based Ethernet 
+	  cards. <http://www.myson.com.tw/>
 
 config NATSEMI
 	tristate "National Semiconductor DP8381x series PCI Ethernet support"
@@ -1705,7 +1704,7 @@ config SMSC9420
 	  This is a driver for SMSC's LAN9420 PCI ethernet adapter.
 	  Say Y if you want it compiled into the kernel,
 	  and read the Ethernet-HOWTO, available from
-	  <http://www.linuxdoc.org/docs.html#howto>.
+	  <http://www.tldp.org/docs.html#howto>.
 
 	  This driver is also available as a module. The module will be
 	  called smsc9420.  If you want to compile it as a module, say M
@@ -2540,7 +2539,7 @@ config CHELSIO_T1
           our website at <http://www.chelsio.com>.
 
           For customer support, please visit our customer support page at
-          <http://www.chelsio.com/support.htm>.
+          <http://www.chelsio.com/support.html>.
 
           Please send feedback to <linux-bugs@chelsio.com>.
 
@@ -2572,7 +2571,7 @@ config CHELSIO_T3
 	  our website at <http://www.chelsio.com>.
 
 	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.htm>.
+	  <http://www.chelsio.com/support.html>.
 
 	  Please send feedback to <linux-bugs@chelsio.com>.
 
@@ -2597,7 +2596,7 @@ config CHELSIO_T4
 	  our website at <http://www.chelsio.com>.
 
 	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.htm>.
+	  <http://www.chelsio.com/support.html>.
 
 	  Please send feedback to <linux-bugs@chelsio.com>.
 
@@ -2620,7 +2619,7 @@ config CHELSIO_T4VF
 	  our website at <http://www.chelsio.com>.
 
 	  For customer support, please visit our customer support page at
-	  <http://www.chelsio.com/support.htm>.
+	  <http://www.chelsio.com/support.html>.
 
 	  Please send feedback to <linux-bugs@chelsio.com>.
 
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index 0a0e0cd81a23..f5a89164e779 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -18,7 +18,7 @@ config ATALK
 
 	  General information about how to connect Linux, Windows machines and
 	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.  The
-	  NET-3-HOWTO, available from
+	  NET3-4-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>, contains valuable
 	  information as well.
 
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index bd2f9d331dac..0eee04426697 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -68,7 +68,7 @@ static int xcvr[NUM_UNITS]; 			/* The data transfer mode. */
 
 	In 1997 Realtek made available the documentation for the second generation
 	RTL8012 chip, which has lead to several driver improvements.
-	  http://www.realtek.com.tw/cn/cn.html
+	  http://www.realtek.com.tw/
 
 					Theory of Operation
 
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 57c8ac0ef3f1..c6e25716b2c2 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -131,8 +131,8 @@ IIIa. Ring buffers
 
 IVb. References
 
-http://www.smsc.com/main/tools/discontinued/83c171.pdf
-http://www.smsc.com/main/tools/discontinued/83c175.pdf
+http://www.smsc.com/media/Downloads_Public/discontinued/83c171.pdf
+http://www.smsc.com/media/Downloads_Public/discontinued/83c175.pdf
 http://scyld.com/expert/NWay.html
 http://www.national.com/pf/DP/DP83840A.html
 
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index 62d5d5cfd6a6..95dbcfdf131d 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -73,7 +73,7 @@ config DMASCC
 	  certain parameters, such as channel access timing, clock mode, and
 	  DMA channel. This is accomplished with a small utility program,
 	  dmascc_cfg, available at
-	  <http://cacofonix.nt.tuwien.ac.at/~oe1kib/Linux/>. Please be sure to
+	  <http://www.linux-ax25.org/wiki/Ax25-tools>. Please be sure to
 	  get at least version 1.27 of dmascc_cfg, as older versions will not
 	  work with the current driver.
 
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 294ccfb427cf..8de327321c41 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -23,7 +23,7 @@ paper sources:
   'LAN Technical Reference Ethernet Adapter Interface Version 1 Release 1.0
    Document Number SC30-3661-00' by IBM for info on the adapter itself
 
-  Also see http://www.natsemi.com/
+  Also see http://www.national.com/analog 
 
 special acknowledgements to:
   - Bob Eager for helping me out with documentation from IBM
diff --git a/drivers/net/irda/donauboe.h b/drivers/net/irda/donauboe.h
index 36c3060411d2..4dc39e5f0156 100644
--- a/drivers/net/irda/donauboe.h
+++ b/drivers/net/irda/donauboe.h
@@ -54,7 +54,7 @@
 /* anyone who has. HOWEVER the chip bears a striking resemblence */
 /* to the IrDA controller in the Toshiba RISC TMPR3922 chip      */
 /* the documentation for this is freely available at             */
-/* http://www.toshiba.com/taec/components/Generic/TMPR3922.shtml */
+/* http://www.madingley.org/james/resources/toshoboe/TMPR3922.pdf */
 /* The mapping between the registers in that document and the    */
 /* Registers in the 701 oboe chip are as follows    */
 
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 56f3fc45dbaa..627b619742e3 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -78,7 +78,7 @@ that almost all frames will need to be copied to an alignment buffer.
 
 IVb. References
 
-http://www.realtek.com.tw/cn/cn.html
+http://www.realtek.com.tw/
 http://www.scyld.com/expert/NWay.html
 
 IVc. Errata
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c683f77c6f42..e09267965aad 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -62,7 +62,7 @@ invalid ramWidth is Very Bad.
 V. References
 
 http://www.scyld.com/expert/NWay.html
-http://www.national.com/pf/DP/DP83840.html
+http://www.national.com/opf/DP/DP83840A.html
 
 Thanks to Terry Murphy of 3Com for providing development information for
 earlier 3Com products.
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index 8c4067af32b0..12fb5607176c 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -15,7 +15,7 @@
  *  Rewritten for 2.6 by Cesar Eduardo Barros
  *
  *  A datasheet for this chip can be found at
- *  http://www.silan.com.cn/english/products/pdf/SC92031AY.pdf
+ *  http://www.silan.com.cn/english/product/pdf/SC92031AY.pdf 
  */
 
 /* Note about set_mac_address: I don't know how to change the hardware
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index ccee3eddc5f4..ef000b20d8af 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -78,7 +78,7 @@
  * 			     - Updated tlan.txt accordingly.
  * 			     - Adjusted minimum/maximum frame length.
  * 			     - There is now a TLAN website up at
- * 			       http://tlan.kernel.dk
+ * 			       http://hp.sourceforge.net/ 
  *
  * 	v1.7 April 07, 2000  - Started to implement custom ioctls. Driver now
  * 			       reports PHY information when used with Donald
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index 435ef7d5470f..ccc16d6f5266 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -5,7 +5,7 @@
  *  Originally sktr.c: Written 1997 by Christoph Goos
  *
  *  A fine result of the Linux Systems Network Architecture Project.
- *  http://www.linux-sna.org
+ *  http://www.vanheusden.com/sna/ 
  *
  *  This software may be used and distributed according to the terms
  *  of the GNU General Public License, incorporated herein by reference.
diff --git a/drivers/net/tulip/Kconfig b/drivers/net/tulip/Kconfig
index 516713fa0a05..14c02e12139f 100644
--- a/drivers/net/tulip/Kconfig
+++ b/drivers/net/tulip/Kconfig
@@ -151,7 +151,7 @@ config ULI526X
 	select CRC32
 	---help---
 	  This driver is for ULi M5261/M5263 10/100M Ethernet Controller
-	  (<http://www.uli.com.tw/>).
+	  (<http://www.nvidia.com/page/uli_drivers.html>).
 
 	  To compile this driver as a module, choose M here. The module will
 	  be called uli526x.
diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
index 08555f8b15f4..08ad269f6b4e 100644
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c
@@ -32,7 +32,7 @@
 
 
 /*
- * Prolific PL-2301/PL-2302 driver ... http://www.prolifictech.com
+ * Prolific PL-2301/PL-2302 driver ... http://www.prolific.com.tw/ 
  *
  * The protocol and handshaking used here should be bug-compatible
  * with the Linux 2.2 "plusb" driver, by Deti Fliegl.
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index d08ce6a264cb..423eb26386c8 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -409,7 +409,7 @@ config CYCLADES_SYNC
 	tristate "Cyclom 2X(tm) cards (EXPERIMENTAL)"
 	depends on WAN_ROUTER_DRIVERS && (PCI || ISA)
 	---help---
-	  Cyclom 2X from Cyclades Corporation <http://www.cyclades.com/> is an
+	  Cyclom 2X from Cyclades Corporation <http://www.avocent.com/> is an
 	  intelligent multiprotocol WAN adapter with data transfer rates up to
 	  512 Kbps. These cards support the X.25 and SNA related protocols.
 
diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h
index ea6362a8988d..97659a077af8 100644
--- a/drivers/net/wireless/ath/ath5k/ath5k.h
+++ b/drivers/net/wireless/ath/ath5k/ath5k.h
@@ -351,7 +351,7 @@ struct ath5k_srev_name {
 /*
  * Some of this information is based on Documentation from:
  *
- * http://madwifi.org/wiki/ChipsetFeatures/SuperAG
+ * http://madwifi-project.org/wiki/ChipsetFeatures/SuperAG 
  *
  * Modulation for Atheros' eXtended Range - range enhancing extension that is
  * supposed to double the distance an Atheros client device can keep a
diff --git a/drivers/net/wireless/ath/ath5k/reg.h b/drivers/net/wireless/ath/ath5k/reg.h
index 55b4ac6d236f..69a9b34ed456 100644
--- a/drivers/net/wireless/ath/ath5k/reg.h
+++ b/drivers/net/wireless/ath/ath5k/reg.h
@@ -26,7 +26,6 @@
  * Atheros presentations and papers like these:
  *
  * 5210 - http://nova.stanford.edu/~bbaas/ps/isscc2002_slides.pdf
- *        http://www.it.iitb.ac.in/~janak/wifire/01222734.pdf
  *
  * 5211 - http://www.hotchips.org/archives/hc14/3_Tue/16_mcfarland.pdf
  *
diff --git a/drivers/net/wireless/p54/Kconfig b/drivers/net/wireless/p54/Kconfig
index b0342a520bf1..8e823796cb94 100644
--- a/drivers/net/wireless/p54/Kconfig
+++ b/drivers/net/wireless/p54/Kconfig
@@ -8,7 +8,7 @@ config P54_COMMON
 	  also need to be enabled in order to support any devices.
 
 	  These devices require softmac firmware which can be found at
-	  http://prism54.org/
+	  <http://wireless.kernel.org/en/users/Drivers/p54>
 
 	  If you choose to build a module, it'll be called p54common.
 
@@ -20,7 +20,7 @@ config P54_USB
 	  This driver is for USB isl38xx based wireless cards.
 
 	  These devices require softmac firmware which can be found at
-	  http://prism54.org/
+	  <http://wireless.kernel.org/en/users/Drivers/p54>
 
 	  If you choose to build a module, it'll be called p54usb.
 
@@ -34,7 +34,7 @@ config P54_PCI
 	  supported by the fullmac driver/firmware.
 
 	  This driver requires softmac firmware which can be found at
-	  http://prism54.org/
+	  <http://wireless.kernel.org/en/users/Drivers/p54>
 
 	  If you choose to build a module, it'll be called p54pci.
 
diff --git a/drivers/net/wireless/prism54/islpci_hotplug.c b/drivers/net/wireless/prism54/islpci_hotplug.c
index dc14420a9adc..b5e64d71b7a6 100644
--- a/drivers/net/wireless/prism54/islpci_hotplug.c
+++ b/drivers/net/wireless/prism54/islpci_hotplug.c
@@ -38,7 +38,7 @@ module_param(init_pcitm, int, 0);
 /* In this order: vendor, device, subvendor, subdevice, class, class_mask,
  * driver_data
  * If you have an update for this please contact prism54-devel@prism54.org
- * The latest list can be found at http://prism54.org/supported_cards.php */
+ * The latest list can be found at http://wireless.kernel.org/en/users/Drivers/p54 */
 static DEFINE_PCI_DEVICE_TABLE(prism54_id_tbl) = {
 	/* Intersil PRISM Duette/Prism GT Wireless LAN adapter */
 	{
diff --git a/drivers/parisc/README.dino b/drivers/parisc/README.dino
index 097324f34bbe..1627426996c1 100644
--- a/drivers/parisc/README.dino
+++ b/drivers/parisc/README.dino
@@ -10,8 +10,7 @@
 ** PCI bus. HP-supplied graphics cards that utilize the PCI bus are
 ** not affected."
 **
-** REVISIT: "go/pci_defect" link below is stale.
-**	HP Internal can use <http://hpfcdma.fc.hp.com:80/Dino/>
+** http://h20000.www2.hp.com/bizsupport/TechSupport/Home.jsp?locale=en_US&prodTypeId=12454&prodSeriesId=44443
 **
 **	Product		First Good Serial Number
 **  C200/C240 (US)	US67350000
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 89ed181cd90c..dcc333f2cb6a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -206,6 +206,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 	PCI_DEVICE_ID_INTEL_82439TX, 	quir
  *	VIA Apollo KT133 needs PCI latency patch
  *	Made according to a windows driver based patch by George E. Breese
  *	see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
+ *	and http://www.georgebreese.com/net/software/#PCI
  *      Also see http://www.au-ja.org/review-kt133a-1-en.phtml for
  *      the info on which Mr Breese based his work.
  *
@@ -996,7 +997,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA,	0x605,	quirk_transparent_bridge)
 /*
  * Common misconfiguration of the MediaGX/Geode PCI master that will
  * reduce PCI bandwidth from 70MB/s to 25MB/s.  See the GXM/GXLV/GX1
- * datasheets found at http://www.national.com/ds/GX for info on what
+ * datasheets found at http://www.national.com/analog for info on what
  * these bits do.  <christer@weinigel.se>
  */
 static void quirk_mediagx_master(struct pci_dev *dev)
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 414d9a6f9a32..91a722518289 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1073,7 +1073,7 @@ static void yenta_config_init(struct yenta_socket *socket)
  * invisible during PCI scans because of a misconfigured subordinate number
  * of the parent brige - some BIOSes seem to be too lazy to set it right.
  * Does the fixup carefully by checking how far it can go without conflicts.
- * See http\://bugzilla.kernel.org/show_bug.cgi?id=2944 for more information.
+ * See http://bugzilla.kernel.org/show_bug.cgi?id=2944 for more information.
  */
 static void yenta_fixup_parent_bridge(struct pci_bus *cardbus_bridge)
 {
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 2d8ac43f78e8..bc89f392a629 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -11,7 +11,6 @@
  *
  * The .../escd file is utilized by the lsescd utility written by
  * Gunther Mayer.
- *     http://home.t-online.de/home/gunther.mayer/lsescd
  *
  * The .../legacy_device_resources file is not used yet.
  *
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 158284f05732..85da296a49be 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -116,7 +116,7 @@ config CHR_DEV_OSST
 	  <http://www.tldp.org/docs.html#howto>  and
 	  <file:Documentation/scsi/osst.txt>  in the kernel source.
 	  More info on the OnStream driver may be found on
-	  <http://linux1.onstream.nl/test/>
+	  <http://sourceforge.net/projects/osst/>
 	  Please also have a look at the standard st docu, as most of it
 	  applies to osst as well.
 
@@ -156,9 +156,9 @@ config CHR_DEV_SG
 	  directly, so you need some additional software which knows how to
 	  talk to these devices using the SCSI protocol:
 
-	  For scanners, look at SANE (<http://www.mostang.com/sane/>). For CD
+	  For scanners, look at SANE (<http://www.sane-project.org/>). For CD
 	  writer software look at Cdrtools
-	  (<http://www.fokus.gmd.de/research/cc/glone/employees/joerg.schilling/private/cdrecord.html>)
+	  (<http://cdrecord.berlios.de/private/cdrecord.html>)
 	  and for burning a "disk at once": CDRDAO
 	  (<http://cdrdao.sourceforge.net/>). Cdparanoia is a high
 	  quality digital reader of audio CDs (<http://www.xiph.org/paranoia/>).
@@ -942,6 +942,7 @@ config SCSI_IPS
 	---help---
 	  This is support for the IBM ServeRAID hardware RAID controllers.
 	  See <http://www.developer.ibm.com/welcome/netfinity/serveraid.html>
+	  and <http://www-947.ibm.com/support/entry/portal/docdisplay?brand=5000008&lndocid=SERV-RAID>
 	  for more information.  If this driver does not work correctly
 	  without modification please contact the author by email at
 	  <ipslinux@adaptec.com>.
@@ -1601,7 +1602,7 @@ config SCSI_DEBUG
 	  host adapter with one dummy SCSI disk. Each dummy disk uses kernel
 	  RAM as storage (i.e. it is a ramdisk). To save space when multiple
 	  dummy disks are simulated, they share the same kernel RAM for 
-	  their storage. See <http://www.torque.net/sg/sdebug.html> for more
+	  their storage. See <http://sg.danny.cz/sg/sdebug26.html> for more
 	  information. This driver is primarily of use to those testing the
 	  SCSI and block subsystems. If unsure, say N.
 
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 24110f6f61e0..131c95f5476a 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -924,7 +924,7 @@ static int broken_efr(struct uart_8250_port *up)
 	/*
 	 * Exar ST16C2550 "A2" devices incorrectly detect as
 	 * having an EFR, and report an ID of 0x0201.  See
-	 * http://www.exar.com/info.php?pdf=dan180_oct2004.pdf
+	 * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html 
 	 */
 	if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16)
 		return 1;
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index e57fb3d228e2..81987a7b9aa3 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -10,7 +10,7 @@
 
 /*
  * This driver and the hardware supported are in term of EE-191 of ADI.
- * http://www.analog.com/UploadedFiles/Application_Notes/399447663EE191.pdf
+ * http://www.analog.com/static/imported-files/application_notes/EE191.pdf 
  * This application note describe how to implement a UART on a Sharc DSP,
  * but this driver is implemented on Blackfin Processor.
  * Transmit Frame Sync is not used by this driver to transfer data out.
diff --git a/drivers/serial/bfin_sport_uart.h b/drivers/serial/bfin_sport_uart.h
index 9ce253e381d2..6d06ce1d5675 100644
--- a/drivers/serial/bfin_sport_uart.h
+++ b/drivers/serial/bfin_sport_uart.h
@@ -10,7 +10,7 @@
 
 /*
  * This driver and the hardware supported are in term of EE-191 of ADI.
- * http://www.analog.com/UploadedFiles/Application_Notes/399447663EE191.pdf
+ * http://www.analog.com/static/imported-files/application_notes/EE191.pdf 
  * This application note describe how to implement a UART on a Sharc DSP,
  * but this driver is implemented on Blackfin Processor.
  * Transmit Frame Sync is not used by this driver to transfer data out.
diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c
index caf085d3a76a..c761b2223129 100644
--- a/drivers/serial/uartlite.c
+++ b/drivers/serial/uartlite.c
@@ -44,7 +44,7 @@ MODULE_DEVICE_TABLE(of, ulite_of_match);
  * Register definitions
  *
  * For register details see datasheet:
- * http://www.xilinx.com/bvdocs/ipcenter/data_sheet/opb_uartlite.pdf
+ * http://www.xilinx.com/support/documentation/ip_documentation/opb_uartlite.pdf 
  */
 
 #define ULITE_RX		0x00
diff --git a/drivers/staging/asus_oled/README b/drivers/staging/asus_oled/README
index 96b9717f168f..0d82a6d5fa58 100644
--- a/drivers/staging/asus_oled/README
+++ b/drivers/staging/asus_oled/README
@@ -2,7 +2,7 @@
     Driver for Asus OLED display present in some Asus laptops.
 
     The code of this driver is based on 'asusoled' program taken from
-    https://launchpad.net/asusoled/. I just wanted to have a simple
+    <http://lapsus.berlios.de/asus_oled.html>. I just wanted to have a simple
     kernel driver for controlling this device, but I didn't know how
     to do that. Now I know ;) Also, that program can not be used
     with usbhid loaded, which means no USB mouse/keyboard while
diff --git a/drivers/staging/asus_oled/asus_oled.c b/drivers/staging/asus_oled/asus_oled.c
index 5b279fb30f3f..8c95d8c2a4f4 100644
--- a/drivers/staging/asus_oled/asus_oled.c
+++ b/drivers/staging/asus_oled/asus_oled.c
@@ -24,7 +24,7 @@
  *
  *
  *  Asus OLED support is based on asusoled program taken from
- *  https://launchpad.net/asusoled/.
+ *  <http://lapsus.berlios.de/asus_oled.html>.
  *
  *
  */
diff --git a/drivers/staging/comedi/drivers/cb_pcimdas.c b/drivers/staging/comedi/drivers/cb_pcimdas.c
index ced346a7cae3..78b1410ba4f6 100644
--- a/drivers/staging/comedi/drivers/cb_pcimdas.c
+++ b/drivers/staging/comedi/drivers/cb_pcimdas.c
@@ -37,7 +37,7 @@ Configuration Options:
 Developed from cb_pcidas and skel by Richard Bytheway (mocelet@sucs.org).
 Only supports DIO, AO and simple AI in it's present form.
 No interrupts, multi channel or FIFO AI, although the card looks like it could support this.
-See http://www.measurementcomputing.com/PDFManuals/pcim-das1602_16.pdf for more details.
+See http://www.mccdaq.com/PDFs/Manuals/pcim-das1602-16.pdf for more details.
 */
 
 #include "../comedidev.h"
diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c
index 6af6c8323d56..82be77daa7d7 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -50,8 +50,8 @@ Configuration options:
    With some help from our swedish distributor, we got the Windows sourcecode
    for the card, and here are the findings so far.
 
-   1. A good document that describes the PCI interface chip is found at:
-      http://plx.plxtech.com/download/9080/databook/9080db-106.pdf
+   1. A good document that describes the PCI interface chip is 9080db-106.pdf
+      available from http://www.plxtech.com/products/io/pci9080 
 
    2. The initialization done so far is:
         a. program the FPGA (windows code sans a lot of error messages)
diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c
index 3acf7e62bec4..1411dd8f4e7c 100644
--- a/drivers/staging/comedi/drivers/ni_labpc.c
+++ b/drivers/staging/comedi/drivers/ni_labpc.c
@@ -37,7 +37,7 @@ boards has not
 yet been added to the driver, mainly due to the fact that
 I don't know the device id numbers.  If you have one
 of these boards,
-please file a bug report at https://bugs.comedi.org/
+please file a bug report at http://comedi.org/ 
 so I can get the necessary information from you.
 
 The 1200 series boards have onboard calibration dacs for correcting
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index bd16f913af23..986ef6712989 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -34,7 +34,7 @@
 	   340747b.pdf  AT-MIO E series Register Level Programmer Manual
 	   341079b.pdf  PCI E Series RLPM
 	   340934b.pdf  DAQ-STC reference manual
-	67xx and 611x registers (from http://www.ni.com/pdf/daq/us)
+	67xx and 611x registers (from ftp://ftp.ni.com/support/daq/mhddk/documentation/)
 	release_ni611x.pdf
 	release_ni67xx.pdf
 	Other possibly relevant info:
diff --git a/drivers/staging/comedi/drivers/plx9080.h b/drivers/staging/comedi/drivers/plx9080.h
index 485d63f99293..0d254a1b78a7 100644
--- a/drivers/staging/comedi/drivers/plx9080.h
+++ b/drivers/staging/comedi/drivers/plx9080.h
@@ -13,7 +13,7 @@
  *
  ********************************************************************
  *
- * Copyright (C) 1999 RG Studio s.c., http://www.rgstudio.com.pl/
+ * Copyright (C) 1999 RG Studio s.c.
  * Written by Krzysztof Halasa <khc@rgstudio.com.pl>
  *
  * Portions (C) SBE Inc., used by permission.
diff --git a/drivers/staging/comedi/drivers/rtd520.c b/drivers/staging/comedi/drivers/rtd520.c
index 0367d2b9e2fa..a49a7c566d37 100644
--- a/drivers/staging/comedi/drivers/rtd520.c
+++ b/drivers/staging/comedi/drivers/rtd520.c
@@ -59,7 +59,7 @@ Configuration options:
     Data sheet: http://www.rtdusa.com/pdf/dm7520.pdf
     Example source: http://www.rtdusa.com/examples/dm/dm7520.zip
     Call them and ask for the register level manual.
-    PCI chip: http://www.plxtech.com/products/toolbox/9080.htm
+    PCI chip: http://www.plxtech.com/products/io/pci9080 
 
     Notes:
     This board is memory mapped.  There is some IO stuff, but it isn't needed.
diff --git a/drivers/staging/quickstart/quickstart.c b/drivers/staging/quickstart/quickstart.c
index 66122479d529..ba8f670ec0a7 100644
--- a/drivers/staging/quickstart/quickstart.c
+++ b/drivers/staging/quickstart/quickstart.c
@@ -5,8 +5,7 @@
  *  Copyright (C) 2007-2010 Angelo Arrifano <miknix@gmail.com>
  *
  *  Information gathered from disassebled dsdt and from here:
- *  "http://download.microsoft.com/download/9/c/5/
- *  9c5b2167-8017-4bae-9fde-d599bac8184a/DirAppLaunch_Vista.doc"
+ *  <http://www.microsoft.com/whdc/system/platform/firmware/DirAppLaunch.mspx> 
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 1da73ecd9799..bb440792a1b7 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -17,9 +17,9 @@ config UIO_CIF
 	depends on PCI
 	help
 	  Driver for Hilscher CIF DeviceNet and Profibus cards.  This
-	  driver requires a userspace component that handles all of the
-	  heavy lifting and can be found at:
-	  	http://www.osadl.org/projects/downloads/UIO/user/cif-*
+  	  driver requires a userspace component called cif that handles
+	  all of the heavy lifting and can be found at:
+	        <http://www.osadl.org/projects/downloads/UIO/user/>
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called uio_cif.
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index 916b2b6d765f..b71e309116a3 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -176,7 +176,7 @@ config USB_SERIAL_VISOR
 	help
 	  Say Y here if you want to connect to your HandSpring Visor, Palm
 	  m500 or m505 through its USB docking station. See
-	  <http://usbvisor.sourceforge.net/> for more information on using this
+	  <http://usbvisor.sourceforge.net/index.php3> for more information on using this
 	  driver.
 
 	  To compile this driver as a module, choose M here: the
@@ -289,7 +289,7 @@ config USB_SERIAL_KEYSPAN
 	  and was developed with their support.  You must also include
 	  firmware to support your particular device(s).
 
-	  See <http://misc.nu/hugh/keyspan.html> for more information.
+	  See <http://blemings.org/hugh/keyspan.html> for more information.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called keyspan.
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 6e612c52e763..732ff2a2fa36 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -46,7 +46,7 @@
 #define FTDI_USINT_RS232_PID	0xb812	/* Navigator RS232 and CONFIG lines */
 
 /* OOCDlink by Joern Kaipf <joernk@web.de>
- * (http://www.joernonline.de/dw/doku.php?id=start&idx=projects:oocdlink) */
+ * (http://www.joernonline.de/) */
 #define FTDI_OOCDLINK_PID	0xbaf8	/* Amontec JTAGkey */
 
 /* Luminary Micro Stellaris Boards, VID = FTDI_VID */
@@ -320,7 +320,7 @@
 #define FTDI_PIEGROUP_PID	0xF208	/* Product Id */
 
 /* ACT Solutions HomePro ZWave interface
-   (http://www.act-solutions.com/HomePro.htm) */
+   (http://www.act-solutions.com/HomePro-Product-Matrix.html) */
 #define FTDI_ACTZWAVE_PID	0xF2D0
 
 /*
@@ -351,7 +351,7 @@
 #define FTDI_SUUNTO_SPORTS_PID	0xF680	/* Suunto Sports instrument */
 
 /* USB-UIRT - An infrared receiver and transmitter using the 8U232AM chip */
-/* http://home.earthlink.net/~jrhees/USBUIRT/index.htm */
+/* http://www.usbuirt.com/ */
 #define FTDI_USB_UIRT_PID	0xF850	/* Product Id */
 
 /* CCS Inc. ICDU/ICDU40 product ID -
@@ -380,7 +380,7 @@
  */
 #define FTDI_HE_TIRA1_PID	0xFA78	/* Tira-1 IR transceiver */
 
-/* Inside Accesso contactless reader (http://www.insidefr.com) */
+/* Inside Accesso contactless reader (http://www.insidecontactless.com/) */
 #define INSIDE_ACCESSO		0xFAD0
 
 /*
@@ -619,14 +619,14 @@
 
 /*
  * JETI SPECTROMETER SPECBOS 1201
- * http://www.jeti.com/products/sys/scb/scb1201.php
+ * http://www.jeti.com/cms/index.php/instruments/other-instruments/specbos-2101
  */
 #define JETI_VID		0x0c6c
 #define JETI_SPC1201_PID	0x04b2
 
 /*
  * FTDI USB UART chips used in construction projects from the
- * Elektor Electronics magazine (http://elektor-electronics.co.uk)
+ * Elektor Electronics magazine (http://www.elektor.com/)
  */
 #define ELEKTOR_VID		0x0C7D
 #define ELEKTOR_FT323R_PID	0x0005	/* RFID-Reader, issue 09-2006 */
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 297163c3c610..0791778a66f3 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -9,7 +9,7 @@
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
-  See http://misc.nu/hugh/keyspan.html for more information.
+  See http://blemings.org/hugh/keyspan.html for more information.
 
   Code in this driver inspired by and in a number of places taken
   from Brian Warner's original Keyspan-PDA driver.
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index bf3297ddd186..2d8baf6ac472 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -9,7 +9,7 @@
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
-  See http://misc.nu/hugh/keyspan.html for more information.
+  See http://blemings.org/hugh/keyspan.html for more information.
   
   Code in this driver inspired by and in a number of places taken
   from Brian Warner's original Keyspan-PDA driver.
diff --git a/drivers/usb/serial/mct_u232.h b/drivers/usb/serial/mct_u232.h
index 3a3f5e6b8f96..d325bb8cb583 100644
--- a/drivers/usb/serial/mct_u232.h
+++ b/drivers/usb/serial/mct_u232.h
@@ -10,10 +10,9 @@
  *
  * This driver is for the device MCT USB-RS232 Converter (25 pin, Model No.
  * U232-P25) from Magic Control Technology Corp. (there is also a 9 pin
- * Model No. U232-P9). See http://www.mct.com.tw/p_u232.html for further
- * information. The properties of this device are listed at the end of this
- * file. This device is available from various distributors. I know Hana,
- * http://www.hana.de and D-Link, http://www.dlink.com/products/usb/dsbs25.
+ * Model No. U232-P9). See http://www.mct.com.tw/products/product_us232.html 
+ * for further information. The properties of this device are listed at the end 
+ * of this file. This device was used in the Dlink DSB-S25.
  *
  * All of the information about the device was acquired by using SniffUSB
  * on Windows98. The technical details of the reverse engineering are
@@ -458,7 +457,7 @@ static int mct_u232_calculate_baud_rate(struct usb_serial *serial,
  * embedded UART.  Exhaustive documentation for these is available at:
  *
  *   http://www.semiconductors.philips.com/pip/p87c52ubaa
- *   http://www.semiconductors.philips.com/pip/pdiusbd12
+ *   http://www.nxp.com/acrobat_download/various/PDIUSBD12_PROGRAMMING_GUIDE.pdf
  *
  * Thanks to Julian Highfield for the pointer to the Philips database.
  *
diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index 8a372bac0e43..b356e1565db6 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -36,7 +36,7 @@ config USB_STORAGE_DATAFAB
 	depends on USB_STORAGE
 	help
 	  Support for certain Datafab CompactFlash readers.
-	  Datafab has a web page at <http://www.datafabusa.com/>.
+	  Datafab has a web page at <http://www.datafab.com/>.
 
 	  If this driver is compiled as a module, it will be named ums-datafab.
 
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index a9ca72f301bf..d4979f3f77fb 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -49,7 +49,7 @@ menuconfig FB
 	  You need an utility program called fbset to make full use of frame
 	  buffer devices. Please read <file:Documentation/fb/framebuffer.txt>
 	  and the Framebuffer-HOWTO at
-	  <http://www.munted.org.uk/programming/Framebuffer-HOWTO-1.2.html> for more
+	  <http://www.munted.org.uk/programming/Framebuffer-HOWTO-1.3.html> for more
 	  information.
 
 	  Say Y here and to the driver for your graphics board below if you
@@ -955,7 +955,7 @@ config FB_EPSON1355
 	  Build in support for the SED1355 Epson Research Embedded RAMDAC
 	  LCD/CRT Controller (since redesignated as the S1D13505) as a
 	  framebuffer.  Product specs at
-	  <http://www.erd.epson.com/vdc/html/products.htm>.
+	  <http://vdc.epson.com/>.
 
 config FB_S1D13XXX
 	tristate "Epson S1D13XXX framebuffer support"
@@ -966,7 +966,7 @@ config FB_S1D13XXX
 	help
 	  Support for S1D13XXX framebuffer device family (currently only
 	  working with S1D13806). Product specs at
-	  <http://www.erd.epson.com/vdc/html/legacy_13xxx.htm>
+	  <http://vdc.epson.com/>
 
 config FB_ATMEL
 	tristate "AT91/AT32 LCD Controller support"
@@ -1323,7 +1323,7 @@ config FB_RADEON
 	  don't need to choose this to run the Radeon in plain VGA mode.
 
 	  There is a product page at
-	  http://apps.ati.com/ATIcompare/
+	  http://products.amd.com/en-us/GraphicCardResult.aspx
 
 config FB_RADEON_I2C
 	bool "DDC/I2C for ATI Radeon support"
@@ -1395,7 +1395,7 @@ config FB_ATY_CT
 	  Say Y here to support use of ATI's 64-bit Rage boards (or other
 	  boards based on the Mach64 CT, VT, GT, and LT chipsets) as a
 	  framebuffer device.  The ATI product support page for these boards
-	  is at <http://support.ati.com/products/pc/mach64/>.
+	  is at <http://support.ati.com/products/pc/mach64/mach64.html>.
 
 config FB_ATY_GENERIC_LCD
 	bool "Mach64 generic LCD support (EXPERIMENTAL)"
diff --git a/drivers/video/arcfb.c b/drivers/video/arcfb.c
index f3d7440f0072..3ec4923c2d84 100644
--- a/drivers/video/arcfb.c
+++ b/drivers/video/arcfb.c
@@ -2,7 +2,6 @@
  * linux/drivers/video/arcfb.c -- FB driver for Arc monochrome LCD board
  *
  * Copyright (C) 2005, Jaya Kumar <jayalk@intworks.biz>
- * http://www.intworks.biz/arclcd
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file COPYING in the main directory of this archive for
diff --git a/drivers/video/epson1355fb.c b/drivers/video/epson1355fb.c
index db9713b49ce9..a268cbf1cbea 100644
--- a/drivers/video/epson1355fb.c
+++ b/drivers/video/epson1355fb.c
@@ -4,7 +4,7 @@
  * Epson Research S1D13505 Embedded RAMDAC LCD/CRT Controller
  *   (previously known as SED1355)
  *
- * Cf. http://www.erd.epson.com/vdc/html/S1D13505.html
+ * Cf. http://vdc.epson.com/
  *
  *
  * Copyright (C) Hewlett-Packard Company.  All rights reserved.
diff --git a/drivers/video/fbcvt.c b/drivers/video/fbcvt.c
index 7293eaccd81b..7cb715dfc0e1 100644
--- a/drivers/video/fbcvt.c
+++ b/drivers/video/fbcvt.c
@@ -5,7 +5,7 @@
  *
  *      Based from the VESA(TM) Coordinated Video Timing Generator by
  *      Graham Loveridge April 9, 2003 available at
- *      http://www.vesa.org/public/CVT/CVTd6r1.xls
+ *      http://www.elo.utfsm.cl/~elo212/docs/CVTd6r1.xls
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file COPYING in the main directory of this archive
diff --git a/drivers/video/metronomefb.c b/drivers/video/metronomefb.c
index 9b3d6e4584cc..63ed3b72b01c 100644
--- a/drivers/video/metronomefb.c
+++ b/drivers/video/metronomefb.c
@@ -10,7 +10,7 @@
  * Layout is based on skeletonfb.c by James Simmons and Geert Uytterhoeven.
  *
  * This work was made possible by help and equipment support from E-Ink
- * Corporation. http://support.eink.com/community
+ * Corporation. http://www.eink.com/
  *
  * This driver is written to be used with the Metronome display controller.
  * It is intended to be architecture independent. A board specific driver
diff --git a/firmware/keyspan_pda/keyspan_pda.S b/firmware/keyspan_pda/keyspan_pda.S
index 418fe69aa5e0..f3acc197a5ef 100644
--- a/firmware/keyspan_pda/keyspan_pda.S
+++ b/firmware/keyspan_pda/keyspan_pda.S
@@ -74,7 +74,7 @@
  *  recognizes the new device ID and glues it to the real serial driver code.
  *
  * USEFUL DOCS:
- *  EzUSB Technical Reference Manual: <http://www.anchorchips.com>
+ *  EzUSB Technical Reference Manual: <http://www.cypress.com/>
  *  8051 manuals: everywhere, but try www.dalsemi.com because the EzUSB is
  *   basically the Dallas enhanced 8051 code. Remember that the EzUSB IO ports
  *   use totally different registers!
diff --git a/firmware/keyspan_pda/xircom_pgs.S b/firmware/keyspan_pda/xircom_pgs.S
index 05d99dd63776..0b79bbf0ae15 100644
--- a/firmware/keyspan_pda/xircom_pgs.S
+++ b/firmware/keyspan_pda/xircom_pgs.S
@@ -74,7 +74,7 @@
  *  recognizes the new device ID and glues it to the real serial driver code.
  *
  * USEFUL DOCS:
- *  EzUSB Technical Reference Manual: <http://www.anchorchips.com>
+ *  EzUSB Technical Reference Manual: <http://www.cypress.com/>
  *  8051 manuals: everywhere, but try www.dalsemi.com because the EzUSB is
  *   basically the Dallas enhanced 8051 code. Remember that the EzUSB IO ports
  *   use totally different registers!
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 6bbd75c5589b..7c232c1487ee 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -28,12 +28,7 @@
  * #define ATTR_KILL_SUID	2048
  * #define ATTR_KILL_SGID	4096
  *
- * and this is because they were added in 2.5 development in this patch:
- *
- * http://linux.bkbits.net:8080/linux-2.5/
- * cset@3caf4a12k4XgDzK7wyK-TGpSZ9u2Ww?nav=index.html
- * |src/.|src/include|src/include/linux|related/include/linux/fs.h
- *
+ * and this is because they were added in 2.5 development.
  * Actually, they are not needed by most ->setattr() methods - they are set by
  * callers of notify_change() to notify that the setuid/setgid bits must be
  * dropped.
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 648c9d8f3357..e55fefcb0dcc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -5,7 +5,7 @@
  * Copyright (c) 2001-2007 Anton Altaparmakov
  * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
  *
- * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/
+ * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads 
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License as published by the Free Software
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index d1fb50b28d86..374242c0971a 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -5,7 +5,7 @@
  * Copyright (c) 2001-2007 Anton Altaparmakov
  * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
  *
- * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/
+ * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads 
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index 513f431038f9..7cd46666ba2c 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -10,7 +10,8 @@ config REISERFS_FS
 
 	  In general, ReiserFS is as fast as ext2, but is very efficient with
 	  large directories and small files.  Additional patches are needed
-	  for NFS and quotas, please see <http://www.namesys.com/> for links.
+	  for NFS and quotas, please see 
+	  <https://reiser4.wiki.kernel.org/index.php/Main_Page> for links.
 
 	  It is more easily extended to have features currently found in
 	  database and keyword search systems than block allocation based file
@@ -18,7 +19,8 @@ config REISERFS_FS
 	  plugins consistent with our motto ``It takes more than a license to
 	  make source code open.''
 
-	  Read <http://www.namesys.com/> to learn more about reiserfs.
+	  Read <https://reiser4.wiki.kernel.org/index.php/Main_Page> 
+	  to learn more about reiserfs.
 
 	  Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com.
 
diff --git a/fs/reiserfs/README b/fs/reiserfs/README
index 14e8c9d460e5..e2f7a264e3ff 100644
--- a/fs/reiserfs/README
+++ b/fs/reiserfs/README
@@ -43,7 +43,7 @@ to address the fair crediting issue in the next GPL version.)
 [END LICENSING]
 
 Reiserfs is a file system based on balanced tree algorithms, which is
-described at http://devlinux.com/namesys.
+described at https://reiser4.wiki.kernel.org/index.php/Main_Page 
 
 Stop reading here.  Go there, then return.
 
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index 4086b8ebfafe..da2530e34b26 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -54,8 +54,8 @@
 
 /* Comment by Rik:
  *
- * For some background on GF(2^128) see for example: http://-
- * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf
+ * For some background on GF(2^128) see for example: 
+ * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 
  *
  * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
  * be mapped to computer memory in a variety of ways. Let's examine
diff --git a/include/linux/fdreg.h b/include/linux/fdreg.h
index c2eeb63b72db..61ce64169004 100644
--- a/include/linux/fdreg.h
+++ b/include/linux/fdreg.h
@@ -89,7 +89,7 @@
 /* the following commands are new in the 82078. They are not used in the
  * floppy driver, except the first three. These commands may be useful for apps
  * which use the FDRAWCMD interface. For doc, get the 82078 spec sheets at
- * http://www-techdoc.intel.com/docs/periph/fd_contr/datasheets/ */
+ * http://www.intel.com/design/archives/periphrl/docs/29046803.htm */
 
 #define FD_PARTID		0x18	/* part id ("extended" version cmd) */
 #define FD_SAVE			0x2e	/* save fdc regs for later restore */
diff --git a/include/linux/if_infiniband.h b/include/linux/if_infiniband.h
index 3e659ec7dfdd..7d958475d4ac 100644
--- a/include/linux/if_infiniband.h
+++ b/include/linux/if_infiniband.h
@@ -5,7 +5,7 @@
  * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
  * license, available in the LICENSE.TXT file accompanying this
  * software.  These details are also available at
- * <http://openib.org/license.html>.
+ * <http://www.openfabrics.org/software_license.htm>.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h
index de24af79ebd3..54b8e0d8d916 100644
--- a/include/linux/n_r3964.h
+++ b/include/linux/n_r3964.h
@@ -4,7 +4,6 @@
  * Copyright by
  * Philips Automation Projects
  * Kassel (Germany)
- * http://www.pap-philips.de
  * -----------------------------------------------------------
  * This software may be used and distributed according to the terms of
  * the GNU General Public License, incorporated herein by reference.
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 2a72aa96a568..705e53ef4af0 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -7,7 +7,7 @@ menuconfig HAMRADIO
 	bool "Amateur Radio support"
 	help
 	  If you want to connect your Linux box to an amateur radio, answer Y
-	  here. You want to read <http://www.tapr.org/tapr/html/pkthome.html>
+	  here. You want to read <http://www.tapr.org/>
 	  and more specifically about AX.25 on Linux
 	  <http://www.linux-ax25.org/>.
 
@@ -42,7 +42,7 @@ config AX25
 	  check out the file <file:Documentation/networking/ax25.txt> in the
 	  kernel source. More information about digital amateur radio in
 	  general is on the WWW at
-	  <http://www.tapr.org/tapr/html/pkthome.html>.
+	  <http://www.tapr.org/>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called ax25.
@@ -89,7 +89,7 @@ config NETROM
 	  <http://www.linux-ax25.org>. You also might want to check out the
 	  file <file:Documentation/networking/ax25.txt>. More information about
 	  digital amateur radio in general is on the WWW at
-	  <http://www.tapr.org/tapr/html/pkthome.html>.
+	  <http://www.tapr.org/>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called netrom.
@@ -108,7 +108,7 @@ config ROSE
 	  <http://www.linux-ax25.org>.  You also might want to check out the
 	  file <file:Documentation/networking/ax25.txt>. More information about
 	  digital amateur radio in general is on the WWW at
-	  <http://www.tapr.org/tapr/html/pkthome.html>.
+	  <http://www.tapr.org/>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rose.
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7c3a7d191249..1cc7ef270d54 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -84,7 +84,7 @@ config IP_FIB_TRIE
 
 	  An experimental study of compression methods for dynamic tries
 	  Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
-	  http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
+	  <http://www.csc.kth.se/~snilsson/software/dyntrie2/>
 
 endchoice
 
@@ -555,7 +555,7 @@ config TCP_CONG_VENO
 	distinguishing to circumvent the difficult judgment of the packet loss
 	type. TCP Veno cuts down less congestion window in response to random
 	loss packets.
-	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
+	See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186> 
 
 config TCP_CONG_YEAH
 	tristate "YeAH TCP"
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 3a92a76ae41d..094e150c6260 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -9,7 +9,7 @@
  *
  * The CIPSO draft specification can be found in the kernel's Documentation
  * directory as well as the following URL:
- *   http://netlabel.sourceforge.net/files/draft-ietf-cipso-ipsecurity-01.txt
+ *   http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
  * The FIPS-188 specification can be found at the following URL:
  *   http://www.itl.nist.gov/fipspubs/fip188.htm
  *
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 79d057a939ba..2230ae3bf20e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -16,7 +16,7 @@
  *
  * An experimental study of compression methods for dynamic tries
  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
- * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
+ * http://www.csc.kth.se/~snilsson/software/dyntrie2/
  *
  *
  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1833bdbf9805..d048275a62cb 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -147,7 +147,7 @@ config IP_NF_TARGET_ULOG
 	  which can only be viewed through syslog.
 
 	  The appropriate userspace logging daemon (ulogd) may be obtained from
-	  <http://www.gnumonks.org/projects/ulogd/>
+	  <http://www.netfilter.org/projects/ulogd/index.html>
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 1eba160b72dc..00ca688d8964 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
  * The algorithm is described in:
  * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
  *  for High-Speed Networks"
- * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
+ * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
  *
  * Implemented from description in paper and ns-2 simulation.
  * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e663b78a2ef6..bccce3424a63 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -428,10 +428,10 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
  *
  * The algorithm for RTT estimation w/o timestamps is based on
  * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
- * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
+ * <http://public.lanl.gov/radiant/pubs.html#DRS>
  *
  * More detail on this code can be found at
- * <http://www.psc.edu/~jheffner/senior_thesis.ps>,
+ * <http://staff.psc.edu/jheffner/>,
  * though this reference is out of date.  A new paper
  * is pending.
  */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index b612acf76183..38bc0b52d745 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -6,7 +6,7 @@
  *    "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks."
  *    IEEE Journal on Selected Areas in Communication,
  *    Feb. 2003.
- * 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
+ * 	See http://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf
  */
 
 #include <linux/mm.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c4c885dca3bd..3fb2b73b24dc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -329,8 +329,8 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
    in IP Filter' by Guido van Rooij.
 
-   http://www.nluug.nl/events/sane2000/papers.html
-   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+   http://www.sane.nl/events/sane2000/papers.html
+   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
 
    The boundaries and the conditions are changed according to RFC793:
    the packet must intersect the window (i.e. segments may be
diff --git a/sound/oss/ac97_codec.c b/sound/oss/ac97_codec.c
index 456a1b4d7832..854c303264dc 100644
--- a/sound/oss/ac97_codec.c
+++ b/sound/oss/ac97_codec.c
@@ -21,11 +21,8 @@
  *
  **************************************************************************
  *
- * The Intel Audio Codec '97 specification is available at the Intel
- * audio homepage: http://developer.intel.com/ial/scalableplatforms/audio/
- *
- * The specification itself is currently available at:
- * ftp://download.intel.com/ial/scalableplatforms/ac97r22.pdf
+ * The Intel Audio Codec '97 specification is available at:
+ * http://download.intel.com/support/motherboards/desktop/sb/ac97_r23.pdf
  *
  **************************************************************************
  *
diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c
index c7fba5379813..537cfba829a5 100644
--- a/sound/pci/ens1370.c
+++ b/sound/pci/ens1370.c
@@ -22,7 +22,7 @@
 /* Power-Management-Code ( CONFIG_PM )
  * for ens1371 only ( FIXME )
  * derived from cs4281.c, atiixp.c and via82xx.c
- * using http://www.alsa-project.org/~iwai/writing-an-alsa-driver/c1540.htm
+ * using http://www.alsa-project.org/~tiwai/writing-an-alsa-driver/ 
  * by Kurt J. Bosch
  */
 
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 6433e65c9507..a2999d678918 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -716,7 +716,7 @@ static void snd_intel8x0_setup_periods(struct intel8x0 *chip, struct ichdev *ich
  * Intel 82443MX running a 100MHz processor system bus has a hardware bug,
  * which aborts PCI busmaster for audio transfer.  A workaround is to set
  * the pages as non-cached.  For details, see the errata in
- *	http://www.intel.com/design/chipsets/specupdt/245051.htm
+ *	http://download.intel.com/design/chipsets/specupdt/24505108.pdf
  */
 static void fill_nocache(void *buf, int size, int nocache)
 {
-- 
cgit v1.2.3


From 43f974cdb4ab6d65f849610deb9ef738d62b2e65 Mon Sep 17 00:00:00 2001
From: Nick Bowler <nbowler@elliptictech.com>
Date: Mon, 18 Oct 2010 11:22:05 +0200
Subject: netfilter: install missing ebtables headers for userspace

The ebt_ip6.h and ebt_nflog.h headers are not not known to Kbuild and
therefore not installed by make headers_install.  Fix that up.

Signed-off-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild
index d4d78672873e..e48f1a3f5a4a 100644
--- a/include/linux/netfilter_bridge/Kbuild
+++ b/include/linux/netfilter_bridge/Kbuild
@@ -3,11 +3,13 @@ header-y += ebt_among.h
 header-y += ebt_arp.h
 header-y += ebt_arpreply.h
 header-y += ebt_ip.h
+header-y += ebt_ip6.h
 header-y += ebt_limit.h
 header-y += ebt_log.h
 header-y += ebt_mark_m.h
 header-y += ebt_mark_t.h
 header-y += ebt_nat.h
+header-y += ebt_nflog.h
 header-y += ebt_pkttype.h
 header-y += ebt_redirect.h
 header-y += ebt_stp.h
-- 
cgit v1.2.3


From f1f8c6cbe6f08f93ac2a4ca19625891d8a82b7f8 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 18 Oct 2010 15:00:18 +0200
Subject: can: mcp251x: Don't use pdata->model for chip selection anymore

Since commit e446630c960946b5c1762e4eadb618becef599e7, i.e. v2.6.35-rc1,
the mcp251x chip model can be selected via the modalias member in the
struct spi_board_info. The driver stores the actual model in the
struct mcp251x_platform_data.

From the driver point of view the platform_data should be read only.
Since all in-tree users of the mcp251x have already been converted to
the modalias method, this patch moves the "model" member from the
struct mcp251x_platform_data to the driver's private data structure.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Christian Pellegrin <chripell@fsfe.org>
Cc: Marc Zyngier <maz@misterjones.org>
---
 drivers/net/can/mcp251x.c            | 24 ++++++++++++------------
 include/linux/can/platform/mcp251x.h |  4 ----
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index f5e2edd09ce4..0386bed43551 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -38,14 +38,14 @@
  * static struct mcp251x_platform_data mcp251x_info = {
  *         .oscillator_frequency = 8000000,
  *         .board_specific_setup = &mcp251x_setup,
- *         .model = CAN_MCP251X_MCP2510,
  *         .power_enable = mcp251x_power_enable,
  *         .transceiver_enable = NULL,
  * };
  *
  * static struct spi_board_info spi_board_info[] = {
  *         {
- *                 .modalias = "mcp251x",
+ *                 .modalias = "mcp2510",
+ *			// or "mcp2515" depending on your controller
  *                 .platform_data = &mcp251x_info,
  *                 .irq = IRQ_EINT13,
  *                 .max_speed_hz = 2*1000*1000,
@@ -224,10 +224,16 @@ static struct can_bittiming_const mcp251x_bittiming_const = {
 	.brp_inc = 1,
 };
 
+enum mcp251x_model {
+	CAN_MCP251X_MCP2510	= 0x2510,
+	CAN_MCP251X_MCP2515	= 0x2515,
+};
+
 struct mcp251x_priv {
 	struct can_priv	   can;
 	struct net_device *net;
 	struct spi_device *spi;
+	enum mcp251x_model model;
 
 	struct mutex mcp_lock; /* SPI device lock */
 
@@ -362,10 +368,9 @@ static void mcp251x_write_bits(struct spi_device *spi, u8 reg,
 static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf,
 				int len, int tx_buf_idx)
 {
-	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
 	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
 
-	if (pdata->model == CAN_MCP251X_MCP2510) {
+	if (priv->model == CAN_MCP251X_MCP2510) {
 		int i;
 
 		for (i = 1; i < TXBDAT_OFF + len; i++)
@@ -408,9 +413,8 @@ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
 				int buf_idx)
 {
 	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
-	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
 
-	if (pdata->model == CAN_MCP251X_MCP2510) {
+	if (priv->model == CAN_MCP251X_MCP2510) {
 		int i, len;
 
 		for (i = 1; i < RXBDAT_OFF; i++)
@@ -951,16 +955,12 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 	struct net_device *net;
 	struct mcp251x_priv *priv;
 	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
-	int model = spi_get_device_id(spi)->driver_data;
 	int ret = -ENODEV;
 
 	if (!pdata)
 		/* Platform data is required for osc freq */
 		goto error_out;
 
-	if (model)
-		pdata->model = model;
-
 	/* Allocate can/net device */
 	net = alloc_candev(sizeof(struct mcp251x_priv), TX_ECHO_SKB_MAX);
 	if (!net) {
@@ -977,6 +977,7 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 	priv->can.clock.freq = pdata->oscillator_frequency / 2;
 	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
 		CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
+	priv->model = spi_get_device_id(spi)->driver_data;
 	priv->net = net;
 	dev_set_drvdata(&spi->dev, priv);
 
@@ -1150,8 +1151,7 @@ static int mcp251x_can_resume(struct spi_device *spi)
 #define mcp251x_can_resume NULL
 #endif
 
-static struct spi_device_id mcp251x_id_table[] = {
-	{ "mcp251x", 	0 /* Use pdata.model */ },
+static const struct spi_device_id mcp251x_id_table[] = {
 	{ "mcp2510",	CAN_MCP251X_MCP2510 },
 	{ "mcp2515",	CAN_MCP251X_MCP2515 },
 	{ },
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index dba28268e651..8e20540043f5 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h
@@ -12,7 +12,6 @@
 /**
  * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data
  * @oscillator_frequency:       - oscillator frequency in Hz
- * @model:                      - actual type of chip
  * @board_specific_setup:       - called before probing the chip (power,reset)
  * @transceiver_enable:         - called to power on/off the transceiver
  * @power_enable:               - called to power on/off the mcp *and* the
@@ -25,9 +24,6 @@
 
 struct mcp251x_platform_data {
 	unsigned long oscillator_frequency;
-	int model;
-#define CAN_MCP251X_MCP2510 0x2510
-#define CAN_MCP251X_MCP2515 0x2515
 	int (*board_specific_setup)(struct spi_device *spi);
 	int (*transceiver_enable)(int enable);
 	int (*power_enable) (int enable);
-- 
cgit v1.2.3


From d46a78b05c0e37f76ddf4a7a67bf0b6c68bada55 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 1 Oct 2010 12:20:09 -0400
Subject: xen: implement pirq type event channels

A privileged PV Xen domain can get direct access to hardware.  In
order for this to be useful, it must be able to get hardware
interrupts.

Being a PV Xen domain, all interrupts are delivered as event channels.
PIRQ event channels are bound to a pirq number and an interrupt
vector.  When a IO APIC raises a hardware interrupt on that vector, it
is delivered as an event channel, which we can deliver to the
appropriate device driver(s).

This patch simply implements the infrastructure for dealing with pirq
event channels.

[ Impact: integrate hardware interrupts into Xen's event scheme ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/events.h |  11 +++
 2 files changed, 252 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7d24b0d94ed4..bc69a9d92abc 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -16,7 +16,7 @@
  *    (typically dom0).
  * 2. VIRQs, typically used for timers.  These are per-cpu events.
  * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
+ * 4. PIRQs - Hardware interrupts.
  *
  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  */
@@ -46,6 +46,9 @@
 #include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/hvm/params.h>
 
+/* Leave low irqs free for identity mapping */
+#define LEGACY_IRQS	16
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -89,10 +92,12 @@ struct irq_info
 		enum ipi_vector ipi;
 		struct {
 			unsigned short gsi;
-			unsigned short vector;
+			unsigned char vector;
+			unsigned char flags;
 		} pirq;
 	} u;
 };
+#define PIRQ_NEEDS_EOI	(1 << 0)
 
 static struct irq_info irq_info[NR_IRQS];
 
@@ -113,6 +118,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
 
 static struct irq_chip xen_dynamic_chip;
 static struct irq_chip xen_percpu_chip;
+static struct irq_chip xen_pirq_chip;
 
 /* Constructor for packed IRQ information. */
 static struct irq_info mk_unbound_info(void)
@@ -225,6 +231,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
 	return ret;
 }
 
+static bool pirq_needs_eoi(unsigned irq)
+{
+	struct irq_info *info = info_for_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
+}
+
 static inline unsigned long active_evtchns(unsigned int cpu,
 					   struct shared_info *sh,
 					   unsigned int idx)
@@ -365,6 +380,210 @@ static int find_unbound_irq(void)
 	return irq;
 }
 
+static bool identity_mapped_irq(unsigned irq)
+{
+	/* only identity map legacy irqs */
+	return irq < LEGACY_IRQS;
+}
+
+static void pirq_unmask_notify(int irq)
+{
+	struct physdev_eoi eoi = { .irq = irq };
+
+	if (unlikely(pirq_needs_eoi(irq))) {
+		int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+		WARN_ON(rc);
+	}
+}
+
+static void pirq_query_unmask(int irq)
+{
+	struct physdev_irq_status_query irq_status;
+	struct irq_info *info = info_for_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	irq_status.irq = irq;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+		irq_status.flags = 0;
+
+	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
+	if (irq_status.flags & XENIRQSTAT_needs_eoi)
+		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
+}
+
+static bool probing_irq(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc && desc->action == NULL;
+}
+
+static unsigned int startup_pirq(unsigned int irq)
+{
+	struct evtchn_bind_pirq bind_pirq;
+	struct irq_info *info = info_for_irq(irq);
+	int evtchn = evtchn_from_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	if (VALID_EVTCHN(evtchn))
+		goto out;
+
+	bind_pirq.pirq = irq;
+	/* NB. We are happy to share unless we are probing. */
+	bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
+		if (!probing_irq(irq))
+			printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
+			       irq);
+		return 0;
+	}
+	evtchn = bind_pirq.port;
+
+	pirq_query_unmask(irq);
+
+	evtchn_to_irq[evtchn] = irq;
+	bind_evtchn_to_cpu(evtchn, 0);
+	info->evtchn = evtchn;
+
+out:
+	unmask_evtchn(evtchn);
+	pirq_unmask_notify(irq);
+
+	return 0;
+}
+
+static void shutdown_pirq(unsigned int irq)
+{
+	struct evtchn_close close;
+	struct irq_info *info = info_for_irq(irq);
+	int evtchn = evtchn_from_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	if (!VALID_EVTCHN(evtchn))
+		return;
+
+	mask_evtchn(evtchn);
+
+	close.port = evtchn;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+
+	bind_evtchn_to_cpu(evtchn, 0);
+	evtchn_to_irq[evtchn] = -1;
+	info->evtchn = 0;
+}
+
+static void enable_pirq(unsigned int irq)
+{
+	startup_pirq(irq);
+}
+
+static void disable_pirq(unsigned int irq)
+{
+}
+
+static void ack_pirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	move_native_irq(irq);
+
+	if (VALID_EVTCHN(evtchn)) {
+		mask_evtchn(evtchn);
+		clear_evtchn(evtchn);
+	}
+}
+
+static void end_pirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (WARN_ON(!desc))
+		return;
+
+	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
+	    (IRQ_DISABLED|IRQ_PENDING)) {
+		shutdown_pirq(irq);
+	} else if (VALID_EVTCHN(evtchn)) {
+		unmask_evtchn(evtchn);
+		pirq_unmask_notify(irq);
+	}
+}
+
+static int find_irq_by_gsi(unsigned gsi)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		struct irq_info *info = info_for_irq(irq);
+
+		if (info == NULL || info->type != IRQT_PIRQ)
+			continue;
+
+		if (gsi_from_irq(irq) == gsi)
+			return irq;
+	}
+
+	return -1;
+}
+
+/*
+ * Allocate a physical irq, along with a vector.  We don't assign an
+ * event channel until the irq actually started up.  Return an
+ * existing irq if we've already got one for the gsi.
+ */
+int xen_allocate_pirq(unsigned gsi)
+{
+	int irq;
+	struct physdev_irq irq_op;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = find_irq_by_gsi(gsi);
+	if (irq != -1) {
+		printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n",
+		       irq, gsi);
+		goto out;	/* XXX need refcount? */
+	}
+
+	if (identity_mapped_irq(gsi)) {
+		irq = gsi;
+		dynamic_irq_init(irq);
+	} else
+		irq = find_unbound_irq();
+
+	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+				      handle_level_irq, "pirq");
+
+	irq_op.irq = irq;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+		dynamic_irq_cleanup(irq);
+		irq = -ENOSPC;
+		goto out;
+	}
+
+	irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector);
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
+
+	return irq;
+}
+
+int xen_vector_from_irq(unsigned irq)
+{
+	return vector_from_irq(irq);
+}
+
+int xen_gsi_from_irq(unsigned irq)
+{
+	return gsi_from_irq(irq);
+}
+
 int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
@@ -964,6 +1183,26 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
 	.retrigger	= retrigger_dynirq,
 };
 
+static struct irq_chip xen_pirq_chip __read_mostly = {
+	.name		= "xen-pirq",
+
+	.startup	= startup_pirq,
+	.shutdown	= shutdown_pirq,
+
+	.enable		= enable_pirq,
+	.unmask		= enable_pirq,
+
+	.disable	= disable_pirq,
+	.mask		= disable_pirq,
+
+	.ack		= ack_pirq,
+	.end		= end_pirq,
+
+	.set_affinity	= set_affinity_irq,
+
+	.retrigger	= retrigger_dynirq,
+};
+
 static struct irq_chip xen_percpu_chip __read_mostly = {
 	.name		= "xen-percpu",
 
diff --git a/include/xen/events.h b/include/xen/events.h
index a15d93262e30..8f6232023b75 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -63,4 +63,15 @@ int xen_set_callback_via(uint64_t via);
 void xen_evtchn_do_upcall(struct pt_regs *regs);
 void xen_hvm_evtchn_do_upcall(void);
 
+/* Allocate an irq for a physical interrupt, given a gsi.  "Legacy"
+ * GSIs are identity mapped; others are dynamically allocated as
+ * usual. */
+int xen_allocate_pirq(unsigned gsi);
+
+/* Return vector allocated to pirq */
+int xen_vector_from_irq(unsigned pirq);
+
+/* Return gsi allocated to pirq */
+int xen_gsi_from_irq(unsigned pirq);
+
 #endif	/* _XEN_EVENTS_H */
-- 
cgit v1.2.3


From 1a60d05f40882303dad13f8f0e077e2e49ea8996 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@xeni.home.kraxel.org>
Date: Mon, 4 Oct 2010 13:42:27 -0400
Subject: xen: set pirq name to something useful.

Impact: cleanup

Make pirq show useful information in /proc/interrupts

[v2: Removed the parts for arch/x86/xen/pci.c ]

Signed-off-by: Gerd Hoffmann <kraxel@xeni.home.kraxel.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c | 4 ++--
 include/xen/events.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 19a93297e890..0fcfb4a1ceab 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -544,7 +544,7 @@ static int find_irq_by_gsi(unsigned gsi)
  * event channel until the irq actually started up.  Return an
  * existing irq if we've already got one for the gsi.
  */
-int xen_allocate_pirq(unsigned gsi)
+int xen_allocate_pirq(unsigned gsi, char *name)
 {
 	int irq;
 	struct physdev_irq irq_op;
@@ -566,7 +566,7 @@ int xen_allocate_pirq(unsigned gsi)
 		irq = find_unbound_irq();
 
 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
-				      handle_level_irq, "pirq");
+				      handle_level_irq, name);
 
 	irq_op.irq = irq;
 	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
diff --git a/include/xen/events.h b/include/xen/events.h
index 8f6232023b75..8227da8f7165 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -66,7 +66,7 @@ void xen_hvm_evtchn_do_upcall(void);
 /* Allocate an irq for a physical interrupt, given a gsi.  "Legacy"
  * GSIs are identity mapped; others are dynamically allocated as
  * usual. */
-int xen_allocate_pirq(unsigned gsi);
+int xen_allocate_pirq(unsigned gsi, char *name);
 
 /* Return vector allocated to pirq */
 int xen_vector_from_irq(unsigned pirq);
-- 
cgit v1.2.3


From d9a8814f27080cec6126fca3ef0c210d9f56181e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 5 Nov 2009 16:33:09 -0500
Subject: xen: Provide a variant of xen_poll_irq with timeout.

The 'xen_poll_irq_timeout' provides a method to pass in
the poll timeout for IRQs if requested. We also export
those two poll functions as Xen PCI fronted uses them.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c | 17 ++++++++++++-----
 include/xen/events.h |  4 ++++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index bab5ac18fe0e..4e0f868517be 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1139,7 +1139,7 @@ void xen_clear_irq_pending(int irq)
 	if (VALID_EVTCHN(evtchn))
 		clear_evtchn(evtchn);
 }
-
+EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)
 {
 	int evtchn = evtchn_from_irq(irq);
@@ -1159,9 +1159,9 @@ bool xen_test_irq_pending(int irq)
 	return ret;
 }
 
-/* Poll waiting for an irq to become pending.  In the usual case, the
-   irq will be disabled so it won't deliver an interrupt. */
-void xen_poll_irq(int irq)
+/* Poll waiting for an irq to become pending with timeout.  In the usual case,
+ * the irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq_timeout(int irq, u64 timeout)
 {
 	evtchn_port_t evtchn = evtchn_from_irq(irq);
 
@@ -1169,13 +1169,20 @@ void xen_poll_irq(int irq)
 		struct sched_poll poll;
 
 		poll.nr_ports = 1;
-		poll.timeout = 0;
+		poll.timeout = timeout;
 		set_xen_guest_handle(poll.ports, &evtchn);
 
 		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
 			BUG();
 	}
 }
+EXPORT_SYMBOL(xen_poll_irq_timeout);
+/* Poll waiting for an irq to become pending.  In the usual case, the
+ * irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq(int irq)
+{
+	xen_poll_irq_timeout(irq, 0 /* no timeout */);
+}
 
 void xen_irq_resume(void)
 {
diff --git a/include/xen/events.h b/include/xen/events.h
index 8227da8f7165..2532f8bd2401 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,6 +53,10 @@ bool xen_test_irq_pending(int irq);
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Poll waiting for an irq to become pending with a timeout.  In the usual case,
+ * the irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq_timeout(int irq, u64 timeout);
+
 /* Determine the IRQ which is bound to an event channel */
 unsigned irq_from_evtchn(unsigned int evtchn);
 
-- 
cgit v1.2.3


From 15ebbb82bac700db3c91e662fb70cb3559e9d930 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 4 Oct 2010 13:43:27 -0400
Subject: xen: fix shared irq device passthrough

In driver/xen/events.c, whether bind_pirq is shareable or not is
determined by desc->action is NULL or not. But in __setup_irq,
startup(irq) is invoked before desc->action is assigned with
new action. So desc->action in startup_irq is always NULL, and
bind_pirq is always not shareable. This results in pt_irq_create_bind
failure when passthrough a device which shares irq to other devices.

This patch doesn't use probing_irq to determine if pirq is shareable
or not, instead set shareable flag in irq_info according to trigger
mode in xen_allocate_pirq. Set level triggered interrupts shareable.
Thus use this flag to set bind_pirq flag accordingly.

[v2: arch/x86/xen/pci.c no more, so file skipped]

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c | 11 ++++++++---
 include/xen/events.h |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4e0f868517be..cd504092299b 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -97,6 +97,7 @@ struct irq_info
 	} u;
 };
 #define PIRQ_NEEDS_EOI	(1 << 0)
+#define PIRQ_SHAREABLE	(1 << 1)
 
 static struct irq_info *irq_info;
 
@@ -445,6 +446,7 @@ static unsigned int startup_pirq(unsigned int irq)
 	struct evtchn_bind_pirq bind_pirq;
 	struct irq_info *info = info_for_irq(irq);
 	int evtchn = evtchn_from_irq(irq);
+	int rc;
 
 	BUG_ON(info->type != IRQT_PIRQ);
 
@@ -453,8 +455,10 @@ static unsigned int startup_pirq(unsigned int irq)
 
 	bind_pirq.pirq = irq;
 	/* NB. We are happy to share unless we are probing. */
-	bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
+	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
+					BIND_PIRQ__WILL_SHARE : 0;
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
+	if (rc != 0) {
 		if (!probing_irq(irq))
 			printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
 			       irq);
@@ -564,7 +568,7 @@ static int find_irq_by_gsi(unsigned gsi)
  * event channel until the irq actually started up.  Return an
  * existing irq if we've already got one for the gsi.
  */
-int xen_allocate_pirq(unsigned gsi, char *name)
+int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
 {
 	int irq;
 	struct physdev_irq irq_op;
@@ -596,6 +600,7 @@ int xen_allocate_pirq(unsigned gsi, char *name)
 	}
 
 	irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector);
+	irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
 
 out:
 	spin_unlock(&irq_mapping_update_lock);
diff --git a/include/xen/events.h b/include/xen/events.h
index 2532f8bd2401..d7a4ca7d17b5 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -70,7 +70,7 @@ void xen_hvm_evtchn_do_upcall(void);
 /* Allocate an irq for a physical interrupt, given a gsi.  "Legacy"
  * GSIs are identity mapped; others are dynamically allocated as
  * usual. */
-int xen_allocate_pirq(unsigned gsi, char *name);
+int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
 
 /* Return vector allocated to pirq */
 int xen_vector_from_irq(unsigned pirq);
-- 
cgit v1.2.3


From b5401a96b59475c1c878439caecb8c521bdfd4ad Mon Sep 17 00:00:00 2001
From: Alex Nixon <alex.nixon@citrix.com>
Date: Thu, 18 Mar 2010 16:31:34 -0400
Subject: xen/x86/PCI: Add support for the Xen PCI subsystem

The frontend stub lives in arch/x86/pci/xen.c, alongside other
sub-arch PCI init code (e.g. olpc.c).

It provides a mechanism for Xen PCI frontend to setup/destroy
legacy interrupts, MSI/MSI-X, and PCI configuration operations.

[ Impact: add core of Xen PCI support ]
[ v2: Removed the IOMMU code and only focusing on PCI.]
[ v3: removed usage of pci_scan_all_fns as that does not exist]
[ v4: introduced pci_xen value to fix compile warnings]
[ v5: squished fixes+features in one patch, changed Reviewed-by to Ccs]
[ v7: added Acked-by]
Signed-off-by: Alex Nixon <alex.nixon@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Qing He <qing.he@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
---
 arch/x86/Kconfig               |   5 ++
 arch/x86/include/asm/xen/pci.h |  53 +++++++++++++++
 arch/x86/pci/Makefile          |   1 +
 arch/x86/pci/xen.c             | 147 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/enlighten.c       |   3 +
 drivers/xen/events.c           |  32 ++++++++-
 include/xen/events.h           |   3 +
 7 files changed, 242 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/xen/pci.h
 create mode 100644 arch/x86/pci/xen.c

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8cc510874e1b..74ea59d34076 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1898,6 +1898,11 @@ config PCI_OLPC
 	def_bool y
 	depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
 
+config PCI_XEN
+	def_bool y
+	depends on PCI && XEN
+	select SWIOTLB_XEN
+
 config PCI_DOMAINS
 	def_bool y
 	depends on PCI
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
new file mode 100644
index 000000000000..449c82f71677
--- /dev/null
+++ b/arch/x86/include/asm/xen/pci.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_XEN_PCI_H
+#define _ASM_X86_XEN_PCI_H
+
+#if defined(CONFIG_PCI_XEN)
+extern int __init pci_xen_init(void);
+#define pci_xen 1
+#else
+#define pci_xen 0
+#define pci_xen_init (0)
+#endif
+
+#if defined(CONFIG_PCI_MSI)
+#if defined(CONFIG_PCI_XEN)
+/* The drivers/pci/xen-pcifront.c sets this structure to
+ * its own functions.
+ */
+struct xen_pci_frontend_ops {
+	int (*enable_msi)(struct pci_dev *dev, int **vectors);
+	void (*disable_msi)(struct pci_dev *dev);
+	int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+	void (*disable_msix)(struct pci_dev *dev);
+};
+
+extern struct xen_pci_frontend_ops *xen_pci_frontend;
+
+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
+					      int **vectors)
+{
+	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
+		return xen_pci_frontend->enable_msi(dev, vectors);
+	return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
+{
+	if (xen_pci_frontend && xen_pci_frontend->disable_msi)
+			xen_pci_frontend->disable_msi(dev);
+}
+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
+					       int **vectors, int nvec)
+{
+	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
+		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
+	return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
+{
+	if (xen_pci_frontend && xen_pci_frontend->disable_msix)
+			xen_pci_frontend->disable_msix(dev);
+}
+#endif /* CONFIG_PCI_XEN */
+#endif /* CONFIG_PCI_MSI */
+
+#endif	/* _ASM_X86_XEN_PCI_H */
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index a0207a7fdf39..effd96e33f16 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig_$(BITS).o direct.o mmconfig-shared.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 obj-$(CONFIG_PCI_OLPC)		+= olpc.o
+obj-$(CONFIG_PCI_XEN)		+= xen.o
 
 obj-y				+= fixup.o
 obj-$(CONFIG_ACPI)		+= acpi.o
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
new file mode 100644
index 000000000000..b19c873d8d0c
--- /dev/null
+++ b/arch/x86/pci/xen.c
@@ -0,0 +1,147 @@
+/*
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ *			   x86 PCI core to support the Xen PCI Frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+#include <linux/io.h>
+#include <asm/pci_x86.h>
+
+#include <asm/xen/hypervisor.h>
+
+#include <xen/events.h>
+#include <asm/xen/pci.h>
+
+#if defined(CONFIG_PCI_MSI)
+#include <linux/msi.h>
+
+struct xen_pci_frontend_ops *xen_pci_frontend;
+EXPORT_SYMBOL_GPL(xen_pci_frontend);
+
+/*
+ * For MSI interrupts we have to use drivers/xen/event.s functions to
+ * allocate an irq_desc and setup the right */
+
+
+static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, i;
+	struct msi_desc *msidesc;
+	int *v;
+
+	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+	if (!v)
+		return -ENOMEM;
+
+	if (!xen_initial_domain()) {
+		if (type == PCI_CAP_ID_MSIX)
+			ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+		else
+			ret = xen_pci_frontend_enable_msi(dev, &v);
+		if (ret)
+			goto error;
+	}
+	i = 0;
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = xen_allocate_pirq(v[i], 0, /* not sharable */
+			(type == PCI_CAP_ID_MSIX) ?
+			"pcifront-msi-x" : "pcifront-msi");
+		if (irq < 0)
+			return -1;
+
+		ret = set_irq_msi(irq, msidesc);
+		if (ret)
+			goto error_while;
+		i++;
+	}
+	kfree(v);
+	return 0;
+
+error_while:
+	unbind_from_irqhandler(irq, NULL);
+error:
+	if (ret == -ENODEV)
+		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+			" MSI/MSI-X support!\n");
+
+	kfree(v);
+	return ret;
+}
+
+static void xen_teardown_msi_irqs(struct pci_dev *dev)
+{
+	/* Only do this when were are in non-privileged mode.*/
+	if (!xen_initial_domain()) {
+		struct msi_desc *msidesc;
+
+		msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+		if (msidesc->msi_attrib.is_msix)
+			xen_pci_frontend_disable_msix(dev);
+		else
+			xen_pci_frontend_disable_msi(dev);
+	}
+
+}
+
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+	xen_destroy_irq(irq);
+}
+#endif
+
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+	int rc;
+	int share = 1;
+
+	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+
+	if (dev->irq < 0)
+		return -EINVAL;
+
+	if (dev->irq < NR_IRQS_LEGACY)
+		share = 0;
+
+	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+			 dev->irq, rc);
+		return rc;
+	}
+	return 0;
+}
+
+int __init pci_xen_init(void)
+{
+	if (!xen_pv_domain() || xen_initial_domain())
+		return -ENODEV;
+
+	printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
+
+	pcibios_set_cache_line_size();
+
+	pcibios_enable_irq = xen_pcifront_enable_irq;
+	pcibios_disable_irq = NULL;
+
+#ifdef CONFIG_ACPI
+	/* Keep ACPI out of the picture */
+	acpi_noirq = 1;
+#endif
+
+#ifdef CONFIG_ISAPNP
+	/* Stop isapnp from probing */
+	isapnp_disable = 1;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
+	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
+#endif
+	return 0;
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7d46c8441418..1ccfa1bf0f89 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
 #include <asm/paravirt.h>
 #include <asm/apic.h>
 #include <asm/page.h>
+#include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
@@ -1220,6 +1221,8 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("xenboot", 0, NULL);
 		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
+		if (pci_xen)
+			x86_init.pci.arch_init = pci_xen_init;
 	} else {
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index cd504092299b..7016a734257c 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -582,7 +582,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
 		goto out;	/* XXX need refcount? */
 	}
 
-	if (identity_mapped_irq(gsi)) {
+	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+	 * we are using the !xen_initial_domain() to drop in the function.*/
+	if (identity_mapped_irq(gsi) || !xen_initial_domain()) {
 		irq = gsi;
 		irq_to_desc_alloc_node(irq, 0);
 		dynamic_irq_init(irq);
@@ -593,7 +595,13 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
 				      handle_level_irq, name);
 
 	irq_op.irq = irq;
-	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+	irq_op.vector = 0;
+
+	/* Only the privileged domain can do this. For non-priv, the pcifront
+	 * driver provides a PCI bus that does the call to do exactly
+	 * this in the priv domain. */
+	if (xen_initial_domain() &&
+	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
 		dynamic_irq_cleanup(irq);
 		irq = -ENOSPC;
 		goto out;
@@ -608,6 +616,26 @@ out:
 	return irq;
 }
 
+int xen_destroy_irq(int irq)
+{
+	struct irq_desc *desc;
+	int rc = -ENOENT;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	desc = irq_to_desc(irq);
+	if (!desc)
+		goto out;
+
+	irq_info[irq] = mk_unbound_info();
+
+	dynamic_irq_cleanup(irq);
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
+	return rc;
+}
+
 int xen_vector_from_irq(unsigned irq)
 {
 	return vector_from_irq(irq);
diff --git a/include/xen/events.h b/include/xen/events.h
index d7a4ca7d17b5..c1717ca5ac13 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -72,6 +72,9 @@ void xen_hvm_evtchn_do_upcall(void);
  * usual. */
 int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
 
+/* De-allocates the above mentioned physical interrupt. */
+int xen_destroy_irq(int irq);
+
 /* Return vector allocated to pirq */
 int xen_vector_from_irq(unsigned pirq);
 
-- 
cgit v1.2.3


From 89afb6e46a0f72e0e5c51ef44aa900b74681664b Mon Sep 17 00:00:00 2001
From: Yosuke Iwamatsu <y-iwamatsu@ab.jp.nec.com>
Date: Tue, 13 Oct 2009 17:22:27 -0400
Subject: xenbus: Xen paravirtualised PCI hotplug support.

The Xen PCI front driver adds two new states that are utilizez
for PCI hotplug support. This is a patch pulled from the
linux-2.6-xen-sparse tree.

Signed-off-by: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Yosuke Iwamatsu <y-iwamatsu@ab.jp.nec.com>
---
 drivers/xen/xenbus/xenbus_client.c | 2 ++
 include/xen/interface/io/xenbus.h  | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 7e49527189b6..cdacf923e073 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state)
 		[ XenbusStateConnected    ] = "Connected",
 		[ XenbusStateClosing      ] = "Closing",
 		[ XenbusStateClosed	  ] = "Closed",
+		[XenbusStateReconfiguring] = "Reconfiguring",
+		[XenbusStateReconfigured] = "Reconfigured",
 	};
 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
 }
diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h
index 46508c7fa399..9fda532973a5 100644
--- a/include/xen/interface/io/xenbus.h
+++ b/include/xen/interface/io/xenbus.h
@@ -27,8 +27,14 @@ enum xenbus_state
 	XenbusStateClosing      = 5,  /* The device is being closed
 					 due to an error or an unplug
 					 event. */
-	XenbusStateClosed       = 6
+	XenbusStateClosed       = 6,
 
+	/*
+	* Reconfiguring: The device is being reconfigured.
+	*/
+	XenbusStateReconfiguring = 7,
+
+	XenbusStateReconfigured  = 8
 };
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-- 
cgit v1.2.3


From 956a9202cd1220397933a07beda9f96b3df1fa24 Mon Sep 17 00:00:00 2001
From: Ryan Wilson <hap9@epoch.ncsc.mil>
Date: Mon, 2 Aug 2010 21:31:05 -0400
Subject: xen-pcifront: Xen PCI frontend driver.

This is a port of the 2.6.18 Xen PCI front driver with fixes
to make it build under 2.6.34 and later (for the full list of
changes: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
historic/xen-pcifront-0.1). It also includes the fixes
to make it work properly.

[v2: Updated Kconfig, removed crud, added Reviewed-by]
[v3: Added 'static', fixed grant table leak, redid Kconfig]
[v4: Added one more 'static' and removed comments]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Jan Beulich <JBeulich@novell.com>
---
 drivers/pci/Kconfig              |   21 +
 drivers/pci/Makefile             |    2 +
 drivers/pci/xen-pcifront.c       | 1148 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/io/pciif.h |  112 ++++
 4 files changed, 1283 insertions(+)
 create mode 100644 drivers/pci/xen-pcifront.c
 create mode 100644 include/xen/interface/io/pciif.h

(limited to 'include')

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 34ef70d562b2..5b1630e4e9e3 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -40,6 +40,27 @@ config PCI_STUB
 
 	  When in doubt, say N.
 
+config XEN_PCIDEV_FRONTEND
+        tristate "Xen PCI Frontend"
+        depends on PCI && X86 && XEN
+        select HOTPLUG
+        select PCI_XEN
+        default y
+        help
+          The PCI device frontend driver allows the kernel to import arbitrary
+          PCI devices from a PCI backend to support PCI driver domains.
+
+config XEN_PCIDEV_FE_DEBUG
+        bool "Xen PCI Frontend debugging"
+        depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
+	help
+	  Say Y here if you want the Xen PCI frontend to produce a bunch of debug
+	  messages to the system log.  Select this if you are having a
+	  problem with Xen PCI frontend support and want to see more of what is
+	  going on.
+
+	  When in doubt, say N.
+
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index dc1aa0922868..d5e27050c4e3 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
 
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+
 ifeq ($(CONFIG_PCI_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 endif
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
new file mode 100644
index 000000000000..a87c4985326e
--- /dev/null
+++ b/drivers/pci/xen-pcifront.c
@@ -0,0 +1,1148 @@
+/*
+ * Xen PCI Frontend.
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/pciif.h>
+#include <asm/xen/pci.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/time.h>
+
+#define INVALID_GRANT_REF (0)
+#define INVALID_EVTCHN    (-1)
+
+struct pci_bus_entry {
+	struct list_head list;
+	struct pci_bus *bus;
+};
+
+#define _PDEVB_op_active		(0)
+#define PDEVB_op_active			(1 << (_PDEVB_op_active))
+
+struct pcifront_device {
+	struct xenbus_device *xdev;
+	struct list_head root_buses;
+
+	int evtchn;
+	int gnt_ref;
+
+	int irq;
+
+	/* Lock this when doing any operations in sh_info */
+	spinlock_t sh_info_lock;
+	struct xen_pci_sharedinfo *sh_info;
+	struct work_struct op_work;
+	unsigned long flags;
+
+};
+
+struct pcifront_sd {
+	int domain;
+	struct pcifront_device *pdev;
+};
+
+static inline struct pcifront_device *
+pcifront_get_pdev(struct pcifront_sd *sd)
+{
+	return sd->pdev;
+}
+
+static inline void pcifront_init_sd(struct pcifront_sd *sd,
+				    unsigned int domain, unsigned int bus,
+				    struct pcifront_device *pdev)
+{
+	sd->domain = domain;
+	sd->pdev = pdev;
+}
+
+static DEFINE_SPINLOCK(pcifront_dev_lock);
+static struct pcifront_device *pcifront_dev;
+
+static int verbose_request;
+module_param(verbose_request, int, 0644);
+
+static int errno_to_pcibios_err(int errno)
+{
+	switch (errno) {
+	case XEN_PCI_ERR_success:
+		return PCIBIOS_SUCCESSFUL;
+
+	case XEN_PCI_ERR_dev_not_found:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	case XEN_PCI_ERR_invalid_offset:
+	case XEN_PCI_ERR_op_failed:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	case XEN_PCI_ERR_not_implemented:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+
+	case XEN_PCI_ERR_access_denied:
+		return PCIBIOS_SET_FAILED;
+	}
+	return errno;
+}
+
+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
+{
+	if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+		&& !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
+		dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
+		schedule_work(&pdev->op_work);
+	}
+}
+
+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+	int err = 0;
+	struct xen_pci_op *active_op = &pdev->sh_info->op;
+	unsigned long irq_flags;
+	evtchn_port_t port = pdev->evtchn;
+	unsigned irq = pdev->irq;
+	s64 ns, ns_timeout;
+	struct timeval tv;
+
+	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
+
+	memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+	/* Go */
+	wmb();
+	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(port);
+
+	/*
+	 * We set a poll timeout of 3 seconds but give up on return after
+	 * 2 seconds. It is better to time out too late rather than too early
+	 * (in the latter case we end up continually re-executing poll() with a
+	 * timeout in the past). 1s difference gives plenty of slack for error.
+	 */
+	do_gettimeofday(&tv);
+	ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+
+	xen_clear_irq_pending(irq);
+
+	while (test_bit(_XEN_PCIF_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		xen_poll_irq_timeout(irq, jiffies + 3*HZ);
+		xen_clear_irq_pending(irq);
+		do_gettimeofday(&tv);
+		ns = timeval_to_ns(&tv);
+		if (ns > ns_timeout) {
+			dev_err(&pdev->xdev->dev,
+				"pciback not responding!!!\n");
+			clear_bit(_XEN_PCIF_active,
+				  (unsigned long *)&pdev->sh_info->flags);
+			err = XEN_PCI_ERR_dev_not_found;
+			goto out;
+		}
+	}
+
+	/*
+	* We might lose backend service request since we
+	* reuse same evtchn with pci_conf backend response. So re-schedule
+	* aer pcifront service.
+	*/
+	if (test_bit(_XEN_PCIB_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		dev_err(&pdev->xdev->dev,
+			"schedule aer pcifront service\n");
+		schedule_pcifront_aer_op(pdev);
+	}
+
+	memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+	err = op->err;
+out:
+	spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
+	return err;
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
+			     int where, int size, u32 *val)
+{
+	int err = 0;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_read,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	if (verbose_request)
+		dev_info(&pdev->xdev->dev,
+			 "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
+			 pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+			 PCI_FUNC(devfn), where, size);
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		if (verbose_request)
+			dev_info(&pdev->xdev->dev, "read got back value %x\n",
+				 op.value);
+
+		*val = op.value;
+	} else if (err == -ENODEV) {
+		/* No device here, pretend that it just returned 0 */
+		err = 0;
+		*val = 0;
+	}
+
+	return errno_to_pcibios_err(err);
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 val)
+{
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_write,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+		.value  = val,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	if (verbose_request)
+		dev_info(&pdev->xdev->dev,
+			 "write dev=%04x:%02x:%02x.%01x - "
+			 "offset %x size %d val %x\n",
+			 pci_domain_nr(bus), bus->number,
+			 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+
+	return errno_to_pcibios_err(do_pci_op(pdev, &op));
+}
+
+struct pci_ops pcifront_bus_ops = {
+	.read = pcifront_bus_read,
+	.write = pcifront_bus_write,
+};
+
+#ifdef CONFIG_PCI_MSI
+static int pci_frontend_enable_msix(struct pci_dev *dev,
+				    int **vector, int nvec)
+{
+	int err;
+	int i;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+		.value = nvec,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+	struct msi_desc *entry;
+
+	if (nvec > SH_INFO_MAX_VEC) {
+		dev_err(&dev->dev, "too much vector for pci frontend: %x."
+				   " Increase SH_INFO_MAX_VEC.\n", nvec);
+		return -EINVAL;
+	}
+
+	i = 0;
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
+		/* Vector is useless at this point. */
+		op.msix_entries[i].vector = -1;
+		i++;
+	}
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		if (likely(!op.value)) {
+			/* we get the result */
+			for (i = 0; i < nvec; i++)
+				*(*vector+i) = op.msix_entries[i].vector;
+			return 0;
+		} else {
+			printk(KERN_DEBUG "enable msix get value %x\n",
+				op.value);
+			return op.value;
+		}
+	} else {
+		dev_err(&dev->dev, "enable msix get err %x\n", err);
+		return err;
+	}
+}
+
+static void pci_frontend_disable_msix(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+
+	/* What should do for error ? */
+	if (err)
+		dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
+}
+
+static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (likely(!err)) {
+		*(*vector) = op.value;
+	} else {
+		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
+				    "%x:%x\n", op.bus, op.devfn);
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static void pci_frontend_disable_msi(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (err == XEN_PCI_ERR_dev_not_found) {
+		/* XXX No response from backend, what shall we do? */
+		printk(KERN_DEBUG "get no response from backend for disable MSI\n");
+		return;
+	}
+	if (err)
+		/* how can pciback notify us fail? */
+		printk(KERN_DEBUG "get fake response frombackend\n");
+}
+
+static struct xen_pci_frontend_ops pci_frontend_ops = {
+	.enable_msi = pci_frontend_enable_msi,
+	.disable_msi = pci_frontend_disable_msi,
+	.enable_msix = pci_frontend_enable_msix,
+	.disable_msix = pci_frontend_disable_msix,
+};
+
+static void pci_frontend_registrar(int enable)
+{
+	if (enable)
+		xen_pci_frontend = &pci_frontend_ops;
+	else
+		xen_pci_frontend = NULL;
+};
+#else
+static inline void pci_frontend_registrar(int enable) { };
+#endif /* CONFIG_PCI_MSI */
+
+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
+{
+	struct pcifront_device *pdev = data;
+	int i;
+	struct resource *r;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		r = &dev->resource[i];
+
+		if (!r->parent && r->start && r->flags) {
+			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
+				pci_name(dev), i);
+			if (pci_claim_resource(dev, i)) {
+				dev_err(&pdev->xdev->dev, "Could not claim "
+					"resource %s/%d! Device offline. Try "
+					"giving less than 4GB to domain.\n",
+					pci_name(dev), i);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
+				unsigned int domain, unsigned int bus,
+				struct pci_bus *b)
+{
+	struct pci_dev *d;
+	unsigned int devfn;
+
+	/* Scan the bus for functions and add.
+	 * We omit handling of PCI bridge attachment because pciback prevents
+	 * bridges from being exported.
+	 */
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		d = pci_get_slot(b, devfn);
+		if (d) {
+			/* Device is already known. */
+			pci_dev_put(d);
+			continue;
+		}
+
+		d = pci_scan_single_device(b, devfn);
+		if (d)
+			dev_info(&pdev->xdev->dev, "New device on "
+				 "%04x:%02x:%02x.%02x found.\n", domain, bus,
+				 PCI_SLOT(devfn), PCI_FUNC(devfn));
+	}
+
+	return 0;
+}
+
+static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
+				 unsigned int domain, unsigned int bus)
+{
+	struct pci_bus *b;
+	struct pcifront_sd *sd = NULL;
+	struct pci_bus_entry *bus_entry = NULL;
+	int err = 0;
+
+#ifndef CONFIG_PCI_DOMAINS
+	if (domain != 0) {
+		dev_err(&pdev->xdev->dev,
+			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
+		dev_err(&pdev->xdev->dev,
+			"Please compile with CONFIG_PCI_DOMAINS\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+#endif
+
+	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
+	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	if (!bus_entry || !sd) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	pcifront_init_sd(sd, domain, bus, pdev);
+
+	b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
+				  &pcifront_bus_ops, sd);
+	if (!b) {
+		dev_err(&pdev->xdev->dev,
+			"Error creating PCI Frontend Bus!\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	bus_entry->bus = b;
+
+	list_add(&bus_entry->list, &pdev->root_buses);
+
+	/* pci_scan_bus_parented skips devices which do not have a have
+	* devfn==0. The pcifront_scan_bus enumerates all devfn. */
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	return err;
+
+err_out:
+	kfree(bus_entry);
+	kfree(sd);
+
+	return err;
+}
+
+static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
+				   unsigned int domain, unsigned int bus)
+{
+	int err;
+	struct pci_bus *b;
+
+#ifndef CONFIG_PCI_DOMAINS
+	if (domain != 0) {
+		dev_err(&pdev->xdev->dev,
+			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
+		dev_err(&pdev->xdev->dev,
+			"Please compile with CONFIG_PCI_DOMAINS\n");
+		return -EINVAL;
+	}
+#endif
+
+	dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	b = pci_find_bus(domain, bus);
+	if (!b)
+		/* If the bus is unknown, create it. */
+		return pcifront_scan_root(pdev, domain, bus);
+
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	return err;
+}
+
+static void free_root_bus_devs(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	while (!list_empty(&bus->devices)) {
+		dev = container_of(bus->devices.next, struct pci_dev,
+				   bus_list);
+		dev_dbg(&dev->dev, "removing device\n");
+		pci_remove_bus_device(dev);
+	}
+}
+
+static void pcifront_free_roots(struct pcifront_device *pdev)
+{
+	struct pci_bus_entry *bus_entry, *t;
+
+	dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
+
+	list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
+		list_del(&bus_entry->list);
+
+		free_root_bus_devs(bus_entry->bus);
+
+		kfree(bus_entry->bus->sysdata);
+
+		device_unregister(bus_entry->bus->bridge);
+		pci_remove_bus(bus_entry->bus);
+
+		kfree(bus_entry);
+	}
+}
+
+static pci_ers_result_t pcifront_common_process(int cmd,
+						struct pcifront_device *pdev,
+						pci_channel_state_t state)
+{
+	pci_ers_result_t result;
+	struct pci_driver *pdrv;
+	int bus = pdev->sh_info->aer_op.bus;
+	int devfn = pdev->sh_info->aer_op.devfn;
+	struct pci_dev *pcidev;
+	int flag = 0;
+
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront AER process: cmd %x (bus:%x, devfn%x)",
+		cmd, bus, devfn);
+	result = PCI_ERS_RESULT_NONE;
+
+	pcidev = pci_get_bus_and_slot(bus, devfn);
+	if (!pcidev || !pcidev->driver) {
+		dev_err(&pcidev->dev,
+			"device or driver is NULL\n");
+		return result;
+	}
+	pdrv = pcidev->driver;
+
+	if (get_driver(&pdrv->driver)) {
+		if (pdrv->err_handler && pdrv->err_handler->error_detected) {
+			dev_dbg(&pcidev->dev,
+				"trying to call AER service\n");
+			if (pcidev) {
+				flag = 1;
+				switch (cmd) {
+				case XEN_PCI_OP_aer_detected:
+					result = pdrv->err_handler->
+						 error_detected(pcidev, state);
+					break;
+				case XEN_PCI_OP_aer_mmio:
+					result = pdrv->err_handler->
+						 mmio_enabled(pcidev);
+					break;
+				case XEN_PCI_OP_aer_slotreset:
+					result = pdrv->err_handler->
+						 slot_reset(pcidev);
+					break;
+				case XEN_PCI_OP_aer_resume:
+					pdrv->err_handler->resume(pcidev);
+					break;
+				default:
+					dev_err(&pdev->xdev->dev,
+						"bad request in aer recovery "
+						"operation!\n");
+
+				}
+			}
+		}
+		put_driver(&pdrv->driver);
+	}
+	if (!flag)
+		result = PCI_ERS_RESULT_NONE;
+
+	return result;
+}
+
+
+static void pcifront_do_aer(struct work_struct *data)
+{
+	struct pcifront_device *pdev =
+		container_of(data, struct pcifront_device, op_work);
+	int cmd = pdev->sh_info->aer_op.cmd;
+	pci_channel_state_t state =
+		(pci_channel_state_t)pdev->sh_info->aer_op.err;
+
+	/*If a pci_conf op is in progress,
+		we have to wait until it is done before service aer op*/
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront service aer bus %x devfn %x\n",
+		pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
+
+	pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
+
+	/* Post the operation to the guest. */
+	wmb();
+	clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(pdev->evtchn);
+
+	/*in case of we lost an aer request in four lines time_window*/
+	smp_mb__before_clear_bit();
+	clear_bit(_PDEVB_op_active, &pdev->flags);
+	smp_mb__after_clear_bit();
+
+	schedule_pcifront_aer_op(pdev);
+
+}
+
+static irqreturn_t pcifront_handler_aer(int irq, void *dev)
+{
+	struct pcifront_device *pdev = dev;
+	schedule_pcifront_aer_op(pdev);
+	return IRQ_HANDLED;
+}
+static int pcifront_connect(struct pcifront_device *pdev)
+{
+	int err = 0;
+
+	spin_lock(&pcifront_dev_lock);
+
+	if (!pcifront_dev) {
+		dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
+		pcifront_dev = pdev;
+	} else {
+		dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
+		err = -EEXIST;
+	}
+
+	spin_unlock(&pcifront_dev_lock);
+
+	return err;
+}
+
+static void pcifront_disconnect(struct pcifront_device *pdev)
+{
+	spin_lock(&pcifront_dev_lock);
+
+	if (pdev == pcifront_dev) {
+		dev_info(&pdev->xdev->dev,
+			 "Disconnecting PCI Frontend Buses\n");
+		pcifront_dev = NULL;
+	}
+
+	spin_unlock(&pcifront_dev_lock);
+}
+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev;
+
+	pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
+	if (pdev == NULL)
+		goto out;
+
+	pdev->sh_info =
+	    (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
+	if (pdev->sh_info == NULL) {
+		kfree(pdev);
+		pdev = NULL;
+		goto out;
+	}
+	pdev->sh_info->flags = 0;
+
+	/*Flag for registering PV AER handler*/
+	set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
+
+	dev_set_drvdata(&xdev->dev, pdev);
+	pdev->xdev = xdev;
+
+	INIT_LIST_HEAD(&pdev->root_buses);
+
+	spin_lock_init(&pdev->sh_info_lock);
+
+	pdev->evtchn = INVALID_EVTCHN;
+	pdev->gnt_ref = INVALID_GRANT_REF;
+	pdev->irq = -1;
+
+	INIT_WORK(&pdev->op_work, pcifront_do_aer);
+
+	dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
+		pdev, pdev->sh_info);
+out:
+	return pdev;
+}
+
+static void free_pdev(struct pcifront_device *pdev)
+{
+	dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
+
+	pcifront_free_roots(pdev);
+
+	/*For PCIE_AER error handling job*/
+	flush_scheduled_work();
+
+	if (pdev->irq >= 0)
+		unbind_from_irqhandler(pdev->irq, pdev);
+
+	if (pdev->evtchn != INVALID_EVTCHN)
+		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+	if (pdev->gnt_ref != INVALID_GRANT_REF)
+		gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
+					  (unsigned long)pdev->sh_info);
+	else
+		free_page((unsigned long)pdev->sh_info);
+
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
+
+	kfree(pdev);
+}
+
+static int pcifront_publish_info(struct pcifront_device *pdev)
+{
+	int err = 0;
+	struct xenbus_transaction trans;
+
+	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	if (err < 0)
+		goto out;
+
+	pdev->gnt_ref = err;
+
+	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+	if (err)
+		goto out;
+
+	err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
+		0, "pcifront", pdev);
+
+	if (err < 0)
+		return err;
+
+	pdev->irq = err;
+
+do_publish:
+	err = xenbus_transaction_start(&trans);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend "
+				 "(start transaction)");
+		goto out;
+	}
+
+	err = xenbus_printf(trans, pdev->xdev->nodename,
+			    "pci-op-ref", "%u", pdev->gnt_ref);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "event-channel", "%u", pdev->evtchn);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "magic", XEN_PCI_MAGIC);
+
+	if (err) {
+		xenbus_transaction_end(trans, 1);
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend");
+		goto out;
+	} else {
+		err = xenbus_transaction_end(trans, 0);
+		if (err == -EAGAIN)
+			goto do_publish;
+		else if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error completing transaction "
+					 "for backend");
+			goto out;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
+
+	dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
+
+out:
+	return err;
+}
+
+static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
+{
+	int err = -EFAULT;
+	int i, num_roots, len;
+	char str[64];
+	unsigned int domain, bus;
+
+
+	/* Only connect once */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateInitialised)
+		goto out;
+
+	err = pcifront_connect(pdev);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error connecting PCI Frontend");
+		goto out;
+	}
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
+			   "root_num", "%d", &num_roots);
+	if (err == -ENOENT) {
+		xenbus_dev_error(pdev->xdev, err,
+				 "No PCI Roots found, trying 0000:00");
+		err = pcifront_scan_root(pdev, 0, 0);
+		num_roots = 0;
+	} else if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	for (i = 0; i < num_roots; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI root %d", i);
+			goto out;
+		}
+
+		err = pcifront_scan_root(pdev, domain, bus);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root %04x:%02x",
+					 domain, bus);
+			goto out;
+		}
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+
+out:
+	return err;
+}
+
+static int pcifront_try_disconnect(struct pcifront_device *pdev)
+{
+	int err = 0;
+	enum xenbus_state prev_state;
+
+
+	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+	if (prev_state >= XenbusStateClosing)
+		goto out;
+
+	if (prev_state == XenbusStateConnected) {
+		pcifront_free_roots(pdev);
+		pcifront_disconnect(pdev);
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
+
+out:
+
+	return err;
+}
+
+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
+{
+	int err = -EFAULT;
+	int i, num_roots, len;
+	unsigned int domain, bus;
+	char str[64];
+
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateReconfiguring)
+		goto out;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
+			   "root_num", "%d", &num_roots);
+	if (err == -ENOENT) {
+		xenbus_dev_error(pdev->xdev, err,
+				 "No PCI Roots found, trying 0000:00");
+		err = pcifront_rescan_root(pdev, 0, 0);
+		num_roots = 0;
+	} else if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	for (i = 0; i < num_roots; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI root %d", i);
+			goto out;
+		}
+
+		err = pcifront_rescan_root(pdev, domain, bus);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root %04x:%02x",
+					 domain, bus);
+			goto out;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+
+out:
+	return err;
+}
+
+static int pcifront_detach_devices(struct pcifront_device *pdev)
+{
+	int err = 0;
+	int i, num_devs;
+	unsigned int domain, bus, slot, func;
+	struct pci_bus *pci_bus;
+	struct pci_dev *pci_dev;
+	char str[64];
+
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateConnected)
+		goto out;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
+			   &num_devs);
+	if (err != 1) {
+		if (err >= 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI devices");
+		goto out;
+	}
+
+	/* Find devices being detached and remove them. */
+	for (i = 0; i < num_devs; i++) {
+		int l, state;
+		l = snprintf(str, sizeof(str), "state-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
+				   &state);
+		if (err != 1)
+			state = XenbusStateUnknown;
+
+		if (state != XenbusStateClosing)
+			continue;
+
+		/* Remove device. */
+		l = snprintf(str, sizeof(str), "vdev-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
+		if (err != 4) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI device %d", i);
+			goto out;
+		}
+
+		pci_bus = pci_find_bus(domain, bus);
+		if (!pci_bus) {
+			dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
+				domain, bus);
+			continue;
+		}
+		pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
+		if (!pci_dev) {
+			dev_dbg(&pdev->xdev->dev,
+				"Cannot get PCI device %04x:%02x:%02x.%02x\n",
+				domain, bus, slot, func);
+			continue;
+		}
+		pci_remove_bus_device(pci_dev);
+		pci_dev_put(pci_dev);
+
+		dev_dbg(&pdev->xdev->dev,
+			"PCI device %04x:%02x:%02x.%02x removed.\n",
+			domain, bus, slot, func);
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
+
+out:
+	return err;
+}
+
+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
+						  enum xenbus_state be_state)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
+	switch (be_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateConnected:
+		pcifront_try_connect(pdev);
+		break;
+
+	case XenbusStateClosing:
+		dev_warn(&xdev->dev, "backend going away!\n");
+		pcifront_try_disconnect(pdev);
+		break;
+
+	case XenbusStateReconfiguring:
+		pcifront_detach_devices(pdev);
+		break;
+
+	case XenbusStateReconfigured:
+		pcifront_attach_devices(pdev);
+		break;
+	}
+}
+
+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
+				 const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pcifront_device *pdev = alloc_pdev(xdev);
+
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err,
+				 "Error allocating pcifront_device struct");
+		goto out;
+	}
+
+	err = pcifront_publish_info(pdev);
+	if (err)
+		free_pdev(pdev);
+
+out:
+	return err;
+}
+
+static int pcifront_xenbus_remove(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+	if (pdev)
+		free_pdev(pdev);
+
+	return 0;
+}
+
+static const struct xenbus_device_id xenpci_ids[] = {
+	{"pci"},
+	{""},
+};
+
+static struct xenbus_driver xenbus_pcifront_driver = {
+	.name			= "pcifront",
+	.owner			= THIS_MODULE,
+	.ids			= xenpci_ids,
+	.probe			= pcifront_xenbus_probe,
+	.remove			= pcifront_xenbus_remove,
+	.otherend_changed	= pcifront_backend_changed,
+};
+
+static int __init pcifront_init(void)
+{
+	if (!xen_pv_domain() || xen_initial_domain())
+		return -ENODEV;
+
+	pci_frontend_registrar(1 /* enable */);
+
+	return xenbus_register_frontend(&xenbus_pcifront_driver);
+}
+
+static void __exit pcifront_cleanup(void)
+{
+	xenbus_unregister_driver(&xenbus_pcifront_driver);
+	pci_frontend_registrar(0 /* disable */);
+}
+module_init(pcifront_init);
+module_exit(pcifront_cleanup);
+
+MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:pci");
diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h
new file mode 100644
index 000000000000..d9922ae36eb5
--- /dev/null
+++ b/include/xen/interface/io/pciif.h
@@ -0,0 +1,112 @@
+/*
+ * PCI Backend/Frontend Common Data Structures & Macros
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#ifndef __XEN_PCI_COMMON_H__
+#define __XEN_PCI_COMMON_H__
+
+/* Be sure to bump this number if you change this file */
+#define XEN_PCI_MAGIC "7"
+
+/* xen_pci_sharedinfo flags */
+#define	_XEN_PCIF_active		(0)
+#define	XEN_PCIF_active			(1<<_XEN_PCIF_active)
+#define	_XEN_PCIB_AERHANDLER		(1)
+#define	XEN_PCIB_AERHANDLER		(1<<_XEN_PCIB_AERHANDLER)
+#define	_XEN_PCIB_active		(2)
+#define	XEN_PCIB_active			(1<<_XEN_PCIB_active)
+
+/* xen_pci_op commands */
+#define	XEN_PCI_OP_conf_read		(0)
+#define	XEN_PCI_OP_conf_write		(1)
+#define	XEN_PCI_OP_enable_msi		(2)
+#define	XEN_PCI_OP_disable_msi		(3)
+#define	XEN_PCI_OP_enable_msix		(4)
+#define	XEN_PCI_OP_disable_msix		(5)
+#define	XEN_PCI_OP_aer_detected		(6)
+#define	XEN_PCI_OP_aer_resume		(7)
+#define	XEN_PCI_OP_aer_mmio		(8)
+#define	XEN_PCI_OP_aer_slotreset	(9)
+
+/* xen_pci_op error numbers */
+#define	XEN_PCI_ERR_success		(0)
+#define	XEN_PCI_ERR_dev_not_found	(-1)
+#define	XEN_PCI_ERR_invalid_offset	(-2)
+#define	XEN_PCI_ERR_access_denied	(-3)
+#define	XEN_PCI_ERR_not_implemented	(-4)
+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
+#define XEN_PCI_ERR_op_failed		(-5)
+
+/*
+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
+ * Should not exceed 128
+ */
+#define SH_INFO_MAX_VEC			128
+
+struct xen_msix_entry {
+	uint16_t vector;
+	uint16_t entry;
+};
+struct xen_pci_op {
+	/* IN: what action to perform: XEN_PCI_OP_* */
+	uint32_t cmd;
+
+	/* OUT: will contain an error number (if any) from errno.h */
+	int32_t err;
+
+	/* IN: which device to touch */
+	uint32_t domain; /* PCI Domain/Segment */
+	uint32_t bus;
+	uint32_t devfn;
+
+	/* IN: which configuration registers to touch */
+	int32_t offset;
+	int32_t size;
+
+	/* IN/OUT: Contains the result after a READ or the value to WRITE */
+	uint32_t value;
+	/* IN: Contains extra infor for this operation */
+	uint32_t info;
+	/*IN:  param for msi-x */
+	struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
+};
+
+/*used for pcie aer handling*/
+struct xen_pcie_aer_op {
+	/* IN: what action to perform: XEN_PCI_OP_* */
+	uint32_t cmd;
+	/*IN/OUT: return aer_op result or carry error_detected state as input*/
+	int32_t err;
+
+	/* IN: which device to touch */
+	uint32_t domain; /* PCI Domain/Segment*/
+	uint32_t bus;
+	uint32_t devfn;
+};
+struct xen_pci_sharedinfo {
+	/* flags - XEN_PCIF_* */
+	uint32_t flags;
+	struct xen_pci_op op;
+	struct xen_pcie_aer_op aer_op;
+};
+
+#endif /* __XEN_PCI_COMMON_H__ */
-- 
cgit v1.2.3


From c2355e1ab910278a94d487b78590ee3c8eecd08a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 13 Oct 2010 16:01:49 +0000
Subject: bonding: Fix bonding drivers improper modification of netpoll
 structure

The bonding driver currently modifies the netpoll structure in its xmit path
while sending frames from netpoll.  This is racy, as other cpus can access the
netpoll structure in parallel. Since the bonding driver points np->dev to a
slave device, other cpus can inadvertently attempt to send data directly to
slave devices, leading to improper locking with the bonding master, lost frames,
and deadlocks.  This patch fixes that up.

This patch also removes the real_dev pointer from the netpoll structure as that
data is really only used by bonding in the poll_controller, and we can emulate
its behavior by check each slave for IS_UP.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 15 +++++++++------
 include/linux/netpoll.h         |  9 +++++++--
 net/core/netpoll.c              |  6 +++---
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7703d35de65d..813cc2f8edd6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -449,11 +449,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
 		struct netpoll *np = bond->dev->npinfo->netpoll;
 		slave_dev->npinfo = bond->dev->npinfo;
-		np->real_dev = np->dev = skb->dev;
 		slave_dev->priv_flags |= IFF_IN_NETPOLL;
-		netpoll_send_skb(np, skb);
+		netpoll_send_skb_on_dev(np, skb, slave_dev);
 		slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
-		np->dev = bond->dev;
 	} else
 #endif
 		dev_queue_xmit(skb);
@@ -1332,9 +1330,14 @@ static bool slaves_support_netpoll(struct net_device *bond_dev)
 
 static void bond_poll_controller(struct net_device *bond_dev)
 {
-	struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
-	if (dev != bond_dev)
-		netpoll_poll_dev(dev);
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	int i;
+
+	bond_for_each_slave(bond, slave, i) {
+		if (slave->dev && IS_UP(slave->dev))
+			netpoll_poll_dev(slave->dev);
+	}
 }
 
 static void bond_netpoll_cleanup(struct net_device *bond_dev)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 50d8009be86c..79358bb712c6 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -14,7 +14,6 @@
 
 struct netpoll {
 	struct net_device *dev;
-	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -53,7 +52,13 @@ void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+			     struct net_device *dev);
+static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+	netpoll_send_skb_on_dev(np, skb, np->dev);
+}
+
 
 
 #ifdef CONFIG_NETPOLL
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 537e01afd81b..4e98ffac3af0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+			     struct net_device *dev)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo = np->dev->npinfo;
@@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
-EXPORT_SYMBOL(netpoll_send_skb);
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
-- 
cgit v1.2.3


From fc58d12be416eb51932eec594667ca3181903b9e Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 18 Oct 2010 09:18:13 -0700
Subject: Input: serio - add support for PS2Mult multiplexer protocol

PS2Mult is a simple serial protocol used for multiplexing several PS/2
streams into one serial data stream. It's used e.g. on TQM85xx series
of boards.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/Kconfig   |   9 ++
 drivers/input/serio/Makefile  |   1 +
 drivers/input/serio/ps2mult.c | 318 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/serio.h         |   1 +
 4 files changed, 329 insertions(+)
 create mode 100644 drivers/input/serio/ps2mult.c

(limited to 'include')

diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 3bfe8fafc6ad..6256233d2bfb 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -226,4 +226,13 @@ config SERIO_AMS_DELTA
 	  To compile this driver as a module, choose M here;
 	  the module will be called ams_delta_serio.
 
+config SERIO_PS2MULT
+	tristate "TQC PS/2 multiplexer"
+	help
+	  Say Y here if you have the PS/2 line multiplexer like the one
+	  present on TQC boads.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ps2mult.
+
 endif
diff --git a/drivers/input/serio/Makefile b/drivers/input/serio/Makefile
index 84c80bf7185e..dbbe37616c92 100644
--- a/drivers/input/serio/Makefile
+++ b/drivers/input/serio/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_SERIO_GSCPS2)	+= gscps2.o
 obj-$(CONFIG_HP_SDC)		+= hp_sdc.o
 obj-$(CONFIG_HIL_MLC)		+= hp_sdc_mlc.o hil_mlc.o
 obj-$(CONFIG_SERIO_PCIPS2)	+= pcips2.o
+obj-$(CONFIG_SERIO_PS2MULT)	+= ps2mult.o
 obj-$(CONFIG_SERIO_MACEPS2)	+= maceps2.o
 obj-$(CONFIG_SERIO_LIBPS2)	+= libps2.o
 obj-$(CONFIG_SERIO_RAW)		+= serio_raw.o
diff --git a/drivers/input/serio/ps2mult.c b/drivers/input/serio/ps2mult.c
new file mode 100644
index 000000000000..6bce22e4e495
--- /dev/null
+++ b/drivers/input/serio/ps2mult.c
@@ -0,0 +1,318 @@
+/*
+ * TQC PS/2 Multiplexer driver
+ *
+ * Copyright (C) 2010 Dmitry Eremin-Solenikov
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/serio.h>
+
+MODULE_AUTHOR("Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>");
+MODULE_DESCRIPTION("TQC PS/2 Multiplexer driver");
+MODULE_LICENSE("GPL");
+
+#define PS2MULT_KB_SELECTOR		0xA0
+#define PS2MULT_MS_SELECTOR		0xA1
+#define PS2MULT_ESCAPE			0x7D
+#define PS2MULT_BSYNC			0x7E
+#define PS2MULT_SESSION_START		0x55
+#define PS2MULT_SESSION_END		0x56
+
+struct ps2mult_port {
+	struct serio *serio;
+	unsigned char sel;
+	bool registered;
+};
+
+#define PS2MULT_NUM_PORTS	2
+#define PS2MULT_KBD_PORT	0
+#define PS2MULT_MOUSE_PORT	1
+
+struct ps2mult {
+	struct serio *mx_serio;
+	struct ps2mult_port ports[PS2MULT_NUM_PORTS];
+
+	spinlock_t lock;
+	struct ps2mult_port *in_port;
+	struct ps2mult_port *out_port;
+	bool escape;
+};
+
+/* First MUST come PS2MULT_NUM_PORTS selectors */
+static const unsigned char ps2mult_controls[] = {
+	PS2MULT_KB_SELECTOR, PS2MULT_MS_SELECTOR,
+	PS2MULT_ESCAPE, PS2MULT_BSYNC,
+	PS2MULT_SESSION_START, PS2MULT_SESSION_END,
+};
+
+static const struct serio_device_id ps2mult_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_PS2MULT,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, ps2mult_serio_ids);
+
+static void ps2mult_select_port(struct ps2mult *psm, struct ps2mult_port *port)
+{
+	struct serio *mx_serio = psm->mx_serio;
+
+	serio_write(mx_serio, port->sel);
+	psm->out_port = port;
+	dev_dbg(&mx_serio->dev, "switched to sel %02x\n", port->sel);
+}
+
+static int ps2mult_serio_write(struct serio *serio, unsigned char data)
+{
+	struct serio *mx_port = serio->parent;
+	struct ps2mult *psm = serio_get_drvdata(mx_port);
+	struct ps2mult_port *port = serio->port_data;
+	bool need_escape;
+	unsigned long flags;
+
+	spin_lock_irqsave(&psm->lock, flags);
+
+	if (psm->out_port != port)
+		ps2mult_select_port(psm, port);
+
+	need_escape = memchr(ps2mult_controls, data, sizeof(ps2mult_controls));
+
+	dev_dbg(&serio->dev,
+		"write: %s%02x\n", need_escape ? "ESC " : "", data);
+
+	if (need_escape)
+		serio_write(mx_port, PS2MULT_ESCAPE);
+
+	serio_write(mx_port, data);
+
+	spin_unlock_irqrestore(&psm->lock, flags);
+
+	return 0;
+}
+
+static int ps2mult_serio_start(struct serio *serio)
+{
+	struct ps2mult *psm = serio_get_drvdata(serio->parent);
+	struct ps2mult_port *port = serio->port_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&psm->lock, flags);
+	port->registered = true;
+	spin_unlock_irqrestore(&psm->lock, flags);
+
+	return 0;
+}
+
+static void ps2mult_serio_stop(struct serio *serio)
+{
+	struct ps2mult *psm = serio_get_drvdata(serio->parent);
+	struct ps2mult_port *port = serio->port_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&psm->lock, flags);
+	port->registered = false;
+	spin_unlock_irqrestore(&psm->lock, flags);
+}
+
+static int ps2mult_create_port(struct ps2mult *psm, int i)
+{
+	struct serio *mx_serio = psm->mx_serio;
+	struct serio *serio;
+
+	serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
+	if (!serio)
+		return -ENOMEM;
+
+	strlcpy(serio->name, "TQC PS/2 Multiplexer", sizeof(serio->name));
+	snprintf(serio->phys, sizeof(serio->phys),
+		 "%s/port%d", mx_serio->phys, i);
+	serio->id.type = SERIO_8042;
+	serio->write = ps2mult_serio_write;
+	serio->start = ps2mult_serio_start;
+	serio->stop = ps2mult_serio_stop;
+	serio->parent = psm->mx_serio;
+	serio->port_data = &psm->ports[i];
+
+	psm->ports[i].serio = serio;
+
+	return 0;
+}
+
+static void ps2mult_reset(struct ps2mult *psm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&psm->lock, flags);
+
+	serio_write(psm->mx_serio, PS2MULT_SESSION_END);
+	serio_write(psm->mx_serio, PS2MULT_SESSION_START);
+
+	ps2mult_select_port(psm, &psm->ports[PS2MULT_KBD_PORT]);
+
+	spin_unlock_irqrestore(&psm->lock, flags);
+}
+
+static int ps2mult_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct ps2mult *psm;
+	int i;
+	int error;
+
+	if (!serio->write)
+		return -EINVAL;
+
+	psm = kzalloc(sizeof(*psm), GFP_KERNEL);
+	if (!psm)
+		return -ENOMEM;
+
+	spin_lock_init(&psm->lock);
+	psm->mx_serio = serio;
+
+	for (i = 0; i < PS2MULT_NUM_PORTS; i++) {
+		psm->ports[i].sel = ps2mult_controls[i];
+		error = ps2mult_create_port(psm, i);
+		if (error)
+			goto err_out;
+	}
+
+	psm->in_port = psm->out_port = &psm->ports[PS2MULT_KBD_PORT];
+
+	serio_set_drvdata(serio, psm);
+	error = serio_open(serio, drv);
+	if (error)
+		goto err_out;
+
+	ps2mult_reset(psm);
+
+	for (i = 0; i <  PS2MULT_NUM_PORTS; i++) {
+		struct serio *s = psm->ports[i].serio;
+
+		dev_info(&serio->dev, "%s port at %s\n", s->name, serio->phys);
+		serio_register_port(s);
+	}
+
+	return 0;
+
+err_out:
+	while (--i >= 0)
+		kfree(psm->ports[i].serio);
+	kfree(serio);
+	return error;
+}
+
+static void ps2mult_disconnect(struct serio *serio)
+{
+	struct ps2mult *psm = serio_get_drvdata(serio);
+
+	/* Note that serio core already take care of children ports */
+	serio_write(serio, PS2MULT_SESSION_END);
+	serio_close(serio);
+	kfree(psm);
+
+	serio_set_drvdata(serio, NULL);
+}
+
+static int ps2mult_reconnect(struct serio *serio)
+{
+	struct ps2mult *psm = serio_get_drvdata(serio);
+
+	ps2mult_reset(psm);
+
+	return 0;
+}
+
+static irqreturn_t ps2mult_interrupt(struct serio *serio,
+				     unsigned char data, unsigned int dfl)
+{
+	struct ps2mult *psm = serio_get_drvdata(serio);
+	struct ps2mult_port *in_port;
+	unsigned long flags;
+
+	dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, dfl);
+
+	spin_lock_irqsave(&psm->lock, flags);
+
+	if (psm->escape) {
+		psm->escape = false;
+		in_port = psm->in_port;
+		if (in_port->registered)
+			serio_interrupt(in_port->serio, data, dfl);
+		goto out;
+	}
+
+	switch (data) {
+	case PS2MULT_ESCAPE:
+		dev_dbg(&serio->dev, "ESCAPE\n");
+		psm->escape = true;
+		break;
+
+	case PS2MULT_BSYNC:
+		dev_dbg(&serio->dev, "BSYNC\n");
+		psm->in_port = psm->out_port;
+		break;
+
+	case PS2MULT_SESSION_START:
+		dev_dbg(&serio->dev, "SS\n");
+		break;
+
+	case PS2MULT_SESSION_END:
+		dev_dbg(&serio->dev, "SE\n");
+		break;
+
+	case PS2MULT_KB_SELECTOR:
+		dev_dbg(&serio->dev, "KB\n");
+		psm->in_port = &psm->ports[PS2MULT_KBD_PORT];
+		break;
+
+	case PS2MULT_MS_SELECTOR:
+		dev_dbg(&serio->dev, "MS\n");
+		psm->in_port = &psm->ports[PS2MULT_MOUSE_PORT];
+		break;
+
+	default:
+		in_port = psm->in_port;
+		if (in_port->registered)
+			serio_interrupt(in_port->serio, data, dfl);
+		break;
+	}
+
+ out:
+	spin_unlock_irqrestore(&psm->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static struct serio_driver ps2mult_drv = {
+	.driver		= {
+		.name	= "ps2mult",
+	},
+	.description	= "TQC PS/2 Multiplexer driver",
+	.id_table	= ps2mult_serio_ids,
+	.interrupt	= ps2mult_interrupt,
+	.connect	= ps2mult_connect,
+	.disconnect	= ps2mult_disconnect,
+	.reconnect	= ps2mult_reconnect,
+};
+
+static int __init ps2mult_init(void)
+{
+	return serio_register_driver(&ps2mult_drv);
+}
+
+static void __exit ps2mult_exit(void)
+{
+	serio_unregister_driver(&ps2mult_drv);
+}
+
+module_init(ps2mult_init);
+module_exit(ps2mult_exit);
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 109b237603b6..e26f4788845f 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -198,5 +198,6 @@ static inline void serio_continue_rx(struct serio *serio)
 #define SERIO_W8001	0x39
 #define SERIO_DYNAPRO	0x3a
 #define SERIO_HAMPSHIRE	0x3b
+#define SERIO_PS2MULT	0x3c
 
 #endif
-- 
cgit v1.2.3


From 620162505e5d46bc4494b1761743e4b0b3bf8e16 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Tue, 5 Oct 2010 18:01:51 +0900
Subject: lockdep: Add improved subclass caching

Current lockdep_map only caches one class with subclass == 0,
and looks up hash table of classes when subclass != 0.

It seems that this has no problem because the case of
subclass != 0 is rare. But locks of struct rq are
acquired with subclass == 1 when task migration is executed.
Task migration is high frequent event, so I modified lockdep
to cache subclasses.

I measured the score of perf bench sched messaging.
This patch has slightly but certain (order of milli seconds
or 10 milli seconds) effect when lots of tasks are running.
I'll show the result in the tail of this description.

NR_LOCKDEP_CACHING_CLASSES specifies how many classes can be
cached in the instances of lockdep_map.
I discussed with Peter Zijlstra in LinuxCon Japan about
this approach and he taught me that caching every subclasses(8)
is cleary waste of memory. So number of cached classes
should be configurable.

=== Score comparison of benchmarks ===
# "min" means best score, and "max" means worst score

for i in `seq 1 10`; do ./perf bench -f simple sched messaging; done

before: min: 0.565000, max: 0.583000, avg: 0.572500
after:  min: 0.559000, max: 0.568000, avg: 0.563300

# with more processes
for i in `seq 1 10`; do ./perf bench -f simple sched messaging -g 40; done

before: min: 2.274000, max: 2.298000, avg: 2.286300
after:  min: 2.242000, max: 2.270000, avg: 2.259700

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286269311-28336-2-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 13 ++++++++++++-
 kernel/lockdep.c        | 25 ++++++++++++++++++-------
 2 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 06aed8305bf3..2186a64ee4b5 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -31,6 +31,17 @@ extern int lock_stat;
 
 #define MAX_LOCKDEP_SUBCLASSES		8UL
 
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES	2
+
 /*
  * Lock-classes are keyed via unique addresses, by embedding the
  * lockclass-key into the kernel (or module) .data section. (For
@@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class);
  */
 struct lockdep_map {
 	struct lock_class_key		*key;
-	struct lock_class		*class_cache;
+	struct lock_class		*class_cache[NR_LOCKDEP_CACHING_CLASSES];
 	const char			*name;
 #ifdef CONFIG_LOCK_STAT
 	int				cpu;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 84baa71cfda5..bc4d32871f9a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -774,7 +774,9 @@ out_unlock_set:
 	raw_local_irq_restore(flags);
 
 	if (!subclass || force)
-		lock->class_cache = class;
+		lock->class_cache[0] = class;
+	else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
+		lock->class_cache[subclass] = class;
 
 	if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
 		return NULL;
@@ -2679,7 +2681,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 void lockdep_init_map(struct lockdep_map *lock, const char *name,
 		      struct lock_class_key *key, int subclass)
 {
-	lock->class_cache = NULL;
+	int i;
+
+	for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
+		lock->class_cache[i] = NULL;
+
 #ifdef CONFIG_LOCK_STAT
 	lock->cpu = raw_smp_processor_id();
 #endif
@@ -2750,10 +2756,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (lock->key == &__lockdep_no_validate__)
 		check = 1;
 
-	if (!subclass)
-		class = lock->class_cache;
+	if (subclass < NR_LOCKDEP_CACHING_CLASSES)
+		class = lock->class_cache[subclass];
 	/*
-	 * Not cached yet or subclass?
+	 * Not cached?
 	 */
 	if (unlikely(!class)) {
 		class = register_lock_class(lock, subclass, 0);
@@ -2918,7 +2924,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
 		return 1;
 
 	if (hlock->references) {
-		struct lock_class *class = lock->class_cache;
+		struct lock_class *class = lock->class_cache[0];
 
 		if (!class)
 			class = look_up_lock_class(lock, 0);
@@ -3559,7 +3565,12 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 		if (list_empty(head))
 			continue;
 		list_for_each_entry_safe(class, next, head, hash_entry) {
-			if (unlikely(class == lock->class_cache)) {
+			int match = 0;
+
+			for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
+				match |= class == lock->class_cache[j];
+
+			if (unlikely(match)) {
 				if (debug_locks_off_graph_unlock())
 					WARN_ON(1);
 				goto out_restore;
-- 
cgit v1.2.3


From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 14:01:34 +0800
Subject: irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is
most useful for NMI code that needs to interact with the rest of the
system -- like wakeup a task to drain buffers.

Perf currently has such a mechanism, so extract that and provide it as
a generic feature, independent of perf so that others may also
benefit.

The IRQ context callback is generated through self-IPIs where
possible, or on architectures like powerpc the decrementer (the
built-in timer facility) is set to generate an interrupt immediately.

Architectures that don't have anything like this get to do with a
callback from the timer tick. These architectures can call
irq_work_run() at the tail of any IRQ handlers that might enqueue such
work (like the perf IRQ handler) to avoid undue latencies in
processing the work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
[ various fixes ]
Signed-off-by: Huang Ying <ying.huang@intel.com>
LKML-Reference: <1287036094.7768.291.camel@yhuang-dev>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/Kconfig                   |   1 +
 arch/alpha/include/asm/perf_event.h  |   5 --
 arch/alpha/kernel/time.c             |  30 +++----
 arch/arm/Kconfig                     |   1 +
 arch/arm/include/asm/perf_event.h    |  12 ---
 arch/arm/kernel/perf_event.c         |   8 +-
 arch/frv/Kconfig                     |   1 +
 arch/frv/lib/Makefile                |   2 +-
 arch/frv/lib/perf_event.c            |  19 ----
 arch/parisc/Kconfig                  |   1 +
 arch/parisc/include/asm/perf_event.h |   3 +-
 arch/powerpc/Kconfig                 |   1 +
 arch/powerpc/include/asm/paca.h      |   2 +-
 arch/powerpc/kernel/time.c           |  42 ++++-----
 arch/s390/Kconfig                    |   1 +
 arch/s390/include/asm/perf_event.h   |   3 +-
 arch/sh/Kconfig                      |   1 +
 arch/sh/include/asm/perf_event.h     |   7 --
 arch/sparc/Kconfig                   |   2 +
 arch/sparc/include/asm/perf_event.h  |   4 -
 arch/sparc/kernel/pcr.c              |   8 +-
 arch/x86/Kconfig                     |   1 +
 arch/x86/include/asm/entry_arch.h    |   4 +-
 arch/x86/include/asm/hardirq.h       |   2 +-
 arch/x86/include/asm/hw_irq.h        |   2 +-
 arch/x86/include/asm/irq_vectors.h   |   4 +-
 arch/x86/kernel/Makefile             |   1 +
 arch/x86/kernel/cpu/perf_event.c     |  19 ----
 arch/x86/kernel/entry_64.S           |   6 +-
 arch/x86/kernel/irq.c                |   8 +-
 arch/x86/kernel/irq_work.c           |  30 +++++++
 arch/x86/kernel/irqinit.c            |   6 +-
 include/linux/irq_work.h             |  20 +++++
 include/linux/perf_event.h           |  11 +--
 init/Kconfig                         |   8 ++
 kernel/Makefile                      |   2 +
 kernel/irq_work.c                    | 164 +++++++++++++++++++++++++++++++++++
 kernel/perf_event.c                  | 104 ++--------------------
 kernel/timer.c                       |   7 +-
 39 files changed, 311 insertions(+), 242 deletions(-)
 delete mode 100644 arch/frv/lib/perf_event.c
 create mode 100644 arch/x86/kernel/irq_work.c
 create mode 100644 include/linux/irq_work.h
 create mode 100644 kernel/irq_work.c

(limited to 'include')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index b9647bb66d13..d04ccd73af45 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -9,6 +9,7 @@ config ALPHA
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	help
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index 4157cd3c44a9..fe792ca818f6 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,11 +1,6 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-/* Alpha only supports software events through this interface. */
-extern void set_perf_event_pending(void);
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
 #else
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 396af1799ea4..0f1d8493cfca 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -41,7 +41,7 @@
 #include <linux/init.h>
 #include <linux/bcd.h>
 #include <linux/profile.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
 
 unsigned long est_cycle_freq;
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
-#define set_perf_event_pending_flag()  __get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()      __get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()     __get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()  __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
-	set_perf_event_pending_flag();
+	set_irq_work_pending_flag();
 }
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
-#define test_perf_event_pending()      0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()      0
+#define clear_irq_work_pending()
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 
 static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
 
 	write_sequnlock(&xtime_lock);
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
 	}
 
 #ifndef CONFIG_SMP
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 88c97bc7a6f5..7c0dfccd05bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -23,6 +23,7 @@ config ARM
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index b5799a3b7117..c4aa4e8c6af9 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,18 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/*
- * NOP: on *most* (read: all supported) ARM platforms, the performance
- * counter interrupts are regular interrupts and not an NMI. This
- * means that when we receive the interrupt we can call
- * perf_event_do_pending() that handles all of the work with
- * interrupts disabled.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
 /* ARM performance counters start from 1 (in the cp15 accesses) so use the
  * same indexes here for consistency. */
 #define PERF_EVENT_INDEX_OFFSET 1
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 6cc6521881aa..49643b1467e6 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -1092,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
-	perf_event_do_pending();
+	irq_work_run();
 
 	return IRQ_HANDLED;
 }
@@ -2068,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
-	perf_event_do_pending();
+	irq_work_run();
 
 	return IRQ_HANDLED;
 }
@@ -2436,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 	}
 
-	perf_event_do_pending();
+	irq_work_run();
 
 	/*
 	 * Re-enable the PMU.
@@ -2763,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			armpmu->disable(hwc, idx);
 	}
 
-	perf_event_do_pending();
+	irq_work_run();
 
 	/*
 	 * Re-enable the PMU.
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 16399bd24993..0f2417df6323 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 
 config ZONE_DMA
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index f4709756d0d9..4ff2fb1e6b16 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
deleted file mode 100644
index 9ac5acfd2e91..000000000000
--- a/arch/frv/lib/perf_event.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance event handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_event.h>
-
-/*
- * mark the performance event as pending
- */
-void set_perf_event_pending(void)
-{
-}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 907417d187e1..79a04a9394d5 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	help
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
index cc146427d8f9..1e0fd8ba6c03 100644
--- a/arch/parisc/include/asm/perf_event.h
+++ b/arch/parisc/include/asm/perf_event.h
@@ -1,7 +1,6 @@
 #ifndef __ASM_PARISC_PERF_EVENT_H
 #define __ASM_PARISC_PERF_EVENT_H
 
-/* parisc only supports software events through this interface. */
-static inline void set_perf_event_pending(void) { }
+/* Empty, just to avoid compiling error */
 
 #endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 631e5a0fb6ab..4b1e521d966f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..9b287fdd8ea3 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -129,7 +129,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
+	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..54888eb10c3b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
 /*
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
+static inline unsigned long test_irq_work_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_event_pending)));
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
 	return x;
 }
 
-static inline void set_perf_event_pending_flag(void)
+static inline void set_irq_work_pending_flag(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
-static inline void clear_perf_event_pending(void)
+static inline void clear_irq_work_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
 #else /* 32-bit */
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
-#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()	__get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()		__get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()	__get_cpu_var(irq_work_pending) = 0
 
 #endif /* 32 vs 64 bit */
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
 	preempt_disable();
-	set_perf_event_pending_flag();
+	set_irq_work_pending_flag();
 	set_dec(1);
 	preempt_enable();
 }
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
-#define test_perf_event_pending()	0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()	0
+#define clear_irq_work_pending()
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
 
 	calculate_steal_time();
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
 	}
 
 #ifdef CONFIG_PPC_ISERIES
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f0777a47e3a5..958f0dadeadf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,6 +95,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 3840cbe77637..a75f168d2718 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -4,7 +4,6 @@
  * Copyright 2009 Martin Schwidefsky, IBM Corporation.
  */
 
-static inline void set_perf_event_pending(void) {}
-static inline void clear_perf_event_pending(void) {}
+/* Empty, just to avoid compiling error */
 
 #define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 35b6c3f85173..35b6879628a0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 3d0c9f36d150..14308bed7ea5 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
 extern int reserve_pmc_hardware(void);
 extern void release_pmc_hardware(void);
 
-static inline void set_perf_event_pending(void)
-{
-	/* Nothing to see here, move along. */
-}
-
-#define PERF_EVENT_INDEX_OFFSET	0
-
 #endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9212cd42a832..3e9d31401fb2 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -26,6 +26,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_DMA_ATTRS
@@ -54,6 +55,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 727af70646cb..6e8bfa1786da 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -1,10 +1,6 @@
 #ifndef __ASM_SPARC_PERF_EVENT_H
 #define __ASM_SPARC_PERF_EVENT_H
 
-extern void set_perf_event_pending(void);
-
-#define	PERF_EVENT_INDEX_OFFSET	0
-
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index c4a6a50b4849..b87873c0e8ea 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <linux/ftrace.h>
 
 #include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-#ifdef CONFIG_PERF_EVENTS
-	perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+	irq_work_run();
 #endif
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-void set_perf_event_pending(void)
+void arch_irq_work_raise(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9815221976a7..fd227d6b8d9c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS if (!M386 && !M486)
+	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..b8e96a18676b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_EVENTS
-BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
+#ifdef CONFIG_IRQ_WORK
+BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
 #endif
 
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index aeab29aee617..55e4de613f0e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
 #endif
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
-	unsigned int apic_pending_irqs;
+	unsigned int apic_irq_work_irqs;
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..3a54a1ca1a02 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
 extern void error_interrupt(void);
-extern void perf_pending_interrupt(void);
+extern void irq_work_interrupt(void);
 
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6af0894dafb4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
 #define X86_PLATFORM_IPI_VECTOR		0xed
 
 /*
- * Performance monitoring pending work vector:
+ * IRQ work vector:
  */
-#define LOCAL_PENDING_VECTOR		0xec
+#define IRQ_WORK_VECTOR			0xec
 
 #define UV_BAU_MESSAGE			0xea
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9d3f485e5dd0..7490bf8d1459 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,6 +35,7 @@ obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
+obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e2513f26ba8b..fe73c1844a9a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1196,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	return handled;
 }
 
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_pending_irqs);
-	perf_event_do_pending();
-	irq_exit();
-}
-
-void set_perf_event_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!x86_pmu.apic || !x86_pmu_initialized())
-		return;
-
-	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
 void perf_events_lapic_init(void)
 {
 	if (!x86_pmu.apic || !x86_pmu_initialized())
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..c375c79065f8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
-	perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+	irq_work_interrupt smp_irq_work_interrupt
 #endif
 
 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..44edb03fc9ec 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
-	seq_printf(p, "%*s: ", prec, "PND");
+	seq_printf(p, "%*s: ", prec, "IWI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
-	seq_printf(p, "  Performance pending work\n");
+		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+	seq_printf(p, "  IRQ work interrupts\n");
 #endif
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
-	sum += irq_stats(cpu)->apic_pending_irqs;
+	sum += irq_stats(cpu)->apic_irq_work_irqs;
 #endif
 	if (x86_platform_ipi_callback)
 		sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 000000000000..ca8f703a1e70
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
+/*
+ * x86 specific code for irq_work
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/apic.h>
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_irq_work_irqs);
+	irq_work_run();
+	irq_exit();
+}
+
+void arch_irq_work_raise(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!cpu_has_apic)
+		return;
+
+	apic->send_IPI_self(IRQ_WORK_VECTOR);
+	apic_wait_icr_idle();
+#endif
+}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..713969b9266b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
-	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_EVENTS
-	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+	/* IRQ work interrupts: */
+# ifdef CONFIG_IRQ_WORK
+	alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
 # endif
 
 #endif
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
new file mode 100644
index 000000000000..4fa09d4d0b71
--- /dev/null
+++ b/include/linux/irq_work.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_IRQ_WORK_H
+#define _LINUX_IRQ_WORK_H
+
+struct irq_work {
+	struct irq_work *next;
+	void (*func)(struct irq_work *);
+};
+
+static inline
+void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+{
+	entry->next = NULL;
+	entry->func = func;
+}
+
+bool irq_work_queue(struct irq_work *entry);
+void irq_work_run(void);
+void irq_work_sync(struct irq_work *entry);
+
+#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a9227e985207..2ebfc9ae4755 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -486,6 +486,7 @@ struct perf_guest_info_callbacks {
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
+#include <linux/irq_work.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -672,11 +673,6 @@ struct perf_buffer {
 	void				*data_pages[0];
 };
 
-struct perf_pending_entry {
-	struct perf_pending_entry *next;
-	void (*func)(struct perf_pending_entry *);
-};
-
 struct perf_sample_data;
 
 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -784,7 +780,7 @@ struct perf_event {
 	int				pending_wakeup;
 	int				pending_kill;
 	int				pending_disable;
-	struct perf_pending_entry	pending;
+	struct irq_work			pending;
 
 	atomic_t			event_limit;
 
@@ -898,8 +894,6 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern void set_perf_event_pending(void);
-extern void perf_event_do_pending(void);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
@@ -1078,7 +1072,6 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
-static inline void perf_event_do_pending(void)				{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1cbadd9..1ef0b439908e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,6 +21,13 @@ config CONSTRUCTORS
 	depends on !UML
 	default y
 
+config HAVE_IRQ_WORK
+	bool
+
+config IRQ_WORK
+	bool
+	depends on HAVE_IRQ_WORK
+
 menu "General setup"
 
 config EXPERIMENTAL
@@ -987,6 +994,7 @@ config PERF_EVENTS
 	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
+	select IRQ_WORK
 	help
 	  Enable kernel support for various performance events provided
 	  by software and hardware.
diff --git a/kernel/Makefile b/kernel/Makefile
index d52b473c99a1..4d9bf5f8531f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
+CFLAGS_REMOVE_irq_work.o = -pg
 endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
new file mode 100644
index 000000000000..f16763ff8481
--- /dev/null
+++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Provides a framework for enqueueing and running callbacks from hardirq
+ * context. The enqueueing is NMI-safe.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+
+/*
+ * An entry can be in one of four states:
+ *
+ * free	     NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ *
+ * We use the lower two bits of the next pointer to keep PENDING and BUSY
+ * flags.
+ */
+
+#define IRQ_WORK_PENDING	1UL
+#define IRQ_WORK_BUSY		2UL
+#define IRQ_WORK_FLAGS		3UL
+
+static inline bool irq_work_is_set(struct irq_work *entry, int flags)
+{
+	return (unsigned long)entry->next & flags;
+}
+
+static inline struct irq_work *irq_work_next(struct irq_work *entry)
+{
+	unsigned long next = (unsigned long)entry->next;
+	next &= ~IRQ_WORK_FLAGS;
+	return (struct irq_work *)next;
+}
+
+static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
+{
+	unsigned long next = (unsigned long)entry;
+	next |= flags;
+	return (struct irq_work *)next;
+}
+
+static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+
+/*
+ * Claim the entry so that no one else will poke at it.
+ */
+static bool irq_work_claim(struct irq_work *entry)
+{
+	struct irq_work *next, *nflags;
+
+	do {
+		next = entry->next;
+		if ((unsigned long)next & IRQ_WORK_PENDING)
+			return false;
+		nflags = next_flags(next, IRQ_WORK_FLAGS);
+	} while (cmpxchg(&entry->next, next, nflags) != next);
+
+	return true;
+}
+
+
+void __weak arch_irq_work_raise(void)
+{
+	/*
+	 * Lame architectures will get the timer tick callback
+	 */
+}
+
+/*
+ * Queue the entry and raise the IPI if needed.
+ */
+static void __irq_work_queue(struct irq_work *entry)
+{
+	struct irq_work **head, *next;
+
+	head = &get_cpu_var(irq_work_list);
+
+	do {
+		next = *head;
+		/* Can assign non-atomic because we keep the flags set. */
+		entry->next = next_flags(next, IRQ_WORK_FLAGS);
+	} while (cmpxchg(head, next, entry) != next);
+
+	/* The list was empty, raise self-interrupt to start processing. */
+	if (!irq_work_next(entry))
+		arch_irq_work_raise();
+
+	put_cpu_var(irq_work_list);
+}
+
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *entry)
+{
+	if (!irq_work_claim(entry)) {
+		/*
+		 * Already enqueued, can't do!
+		 */
+		return false;
+	}
+
+	__irq_work_queue(entry);
+	return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+	struct irq_work *list, **head;
+
+	head = &__get_cpu_var(irq_work_list);
+	if (*head == NULL)
+		return;
+
+	BUG_ON(!in_irq());
+	BUG_ON(!irqs_disabled());
+
+	list = xchg(head, NULL);
+	while (list != NULL) {
+		struct irq_work *entry = list;
+
+		list = irq_work_next(list);
+
+		/*
+		 * Clear the PENDING bit, after this point the @entry
+		 * can be re-used.
+		 */
+		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
+		entry->func(entry);
+		/*
+		 * Clear the BUSY bit and return to the free state if
+		 * no-one else claimed it meanwhile.
+		 */
+		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+	}
+}
+EXPORT_SYMBOL_GPL(irq_work_run);
+
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *entry)
+{
+	WARN_ON_ONCE(irqs_disabled());
+
+	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+		cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 634f86a4b2f9..99b9700e74d0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
 	kfree(event);
 }
 
-static void perf_pending_sync(struct perf_event *event);
 static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
-	perf_pending_sync(event);
+	irq_work_sync(&event->pending);
 
 	if (!event->parent) {
 		atomic_dec(&nr_events);
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
 	}
 }
 
-/*
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-
-static void perf_pending_event(struct perf_pending_entry *entry)
+static void perf_pending_event(struct irq_work *entry)
 {
 	struct perf_event *event = container_of(entry,
 			struct perf_event, pending);
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
 	}
 }
 
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
-	PENDING_TAIL,
-};
-
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
-
-	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
-		return;
-
-	entry->func = func;
-
-	head = &get_cpu_var(perf_pending_head);
-
-	do {
-		entry->next = *head;
-	} while (cmpxchg(head, entry->next, entry) != entry->next);
-
-	set_perf_event_pending();
-
-	put_cpu_var(perf_pending_head);
-}
-
-static int __perf_pending_run(void)
-{
-	struct perf_pending_entry *list;
-	int nr = 0;
-
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
-	while (list != PENDING_TAIL) {
-		void (*func)(struct perf_pending_entry *);
-		struct perf_pending_entry *entry = list;
-
-		list = list->next;
-
-		func = entry->func;
-		entry->next = NULL;
-		/*
-		 * Ensure we observe the unqueue before we issue the wakeup,
-		 * so that we won't be waiting forever.
-		 * -- see perf_not_pending().
-		 */
-		smp_wmb();
-
-		func(entry);
-		nr++;
-	}
-
-	return nr;
-}
-
-static inline int perf_not_pending(struct perf_event *event)
-{
-	/*
-	 * If we flush on whatever cpu we run, there is a chance we don't
-	 * need to wait.
-	 */
-	get_cpu();
-	__perf_pending_run();
-	put_cpu();
-
-	/*
-	 * Ensure we see the proper queue state before going to sleep
-	 * so that we do not miss the wakeup. -- see perf_pending_handle()
-	 */
-	smp_rmb();
-	return event->pending.next == NULL;
-}
-
-static void perf_pending_sync(struct perf_event *event)
-{
-	wait_event(event->waitq, perf_not_pending(event));
-}
-
-void perf_event_do_pending(void)
-{
-	__perf_pending_run();
-}
-
 /*
  * We assume there is only KVM supporting the callbacks.
  * Later on, we might change it to a list if there is
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 
 	if (handle->nmi) {
 		handle->event->pending_wakeup = 1;
-		perf_pending_queue(&handle->event->pending,
-				   perf_pending_event);
+		irq_work_queue(&handle->event->pending);
 	} else
 		perf_event_wakeup(handle->event);
 }
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 		event->pending_kill = POLL_HUP;
 		if (nmi) {
 			event->pending_disable = 1;
-			perf_pending_queue(&event->pending,
-					   perf_pending_event);
+			irq_work_queue(&event->pending);
 		} else
 			perf_event_disable(event);
 	}
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	INIT_LIST_HEAD(&event->event_entry);
 	INIT_LIST_HEAD(&event->sibling_list);
 	init_waitqueue_head(&event->waitq);
+	init_irq_work(&event->pending, perf_pending_event);
 
 	mutex_init(&event->mmap_mutex);
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..68a9ae7679b7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
@@ -1279,7 +1279,10 @@ void update_process_times(int user_tick)
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
 	printk_tick();
-	perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+	if (in_irq())
+		irq_work_run();
+#endif
 	scheduler_tick();
 	run_posix_cpu_timers(p);
 }
-- 
cgit v1.2.3


From d580ff8699e8811a9af37e9de4dea375401bdeec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 17:43:23 +0200
Subject: perf, hw_breakpoint: Fix crash in hw_breakpoint creation

hw_breakpoint creation needs to account stuff per-task to ensure there
is always sufficient hardware resources to back these things due to
ptrace.

With the perf per pmu context changes the event initialization no
longer has access to the event context, for the simple reason that we
need to first find the pmu (result of initialization) before we can
find the context.

This makes hw_breakpoints unhappy, because it can no longer do per
task accounting, cure this by frobbing a task pointer in the event::hw
bits for now...

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20101014203625.391543667@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  7 +++++++
 kernel/hw_breakpoint.c     |  8 ++++----
 kernel/perf_event.c        | 23 ++++++++++++++++++-----
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2ebfc9ae4755..97965fac55fe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -536,6 +536,12 @@ struct hw_perf_event {
 		struct { /* breakpoint */
 			struct arch_hw_breakpoint	info;
 			struct list_head		bp_list;
+			/*
+			 * Crufty hack to avoid the chicken and egg
+			 * problem hw_breakpoint has with context
+			 * creation and event initalization.
+			 */
+			struct task_struct		*bp_target;
 		};
 #endif
 	};
@@ -693,6 +699,7 @@ struct swevent_hlist {
 
 #define PERF_ATTACH_CONTEXT	0x01
 #define PERF_ATTACH_GROUP	0x02
+#define PERF_ATTACH_TASK	0x04
 
 /**
  * struct perf_event - performance event kernel representation:
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 3b714e839c10..2c9120f0afca 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -113,12 +113,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  */
 static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
-	struct perf_event_context *ctx = bp->ctx;
+	struct task_struct *tsk = bp->hw.bp_target;
 	struct perf_event *iter;
 	int count = 0;
 
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
-		if (iter->ctx == ctx && find_slot_idx(iter) == type)
+		if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type)
 			count += hw_breakpoint_weight(iter);
 	}
 
@@ -134,7 +134,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		    enum bp_type_idx type)
 {
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->ctx->task;
+	struct task_struct *tsk = bp->hw.bp_target;
 
 	if (cpu >= 0) {
 		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -213,7 +213,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 	       int weight)
 {
 	int cpu = bp->cpu;
-	struct task_struct *tsk = bp->ctx->task;
+	struct task_struct *tsk = bp->hw.bp_target;
 
 	/* Pinned counter cpu profiling */
 	if (!tsk) {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b21d06aaef60..856e20baf13f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5255,9 +5255,10 @@ unlock:
  */
 static struct perf_event *
 perf_event_alloc(struct perf_event_attr *attr, int cpu,
-		   struct perf_event *group_leader,
-		   struct perf_event *parent_event,
-		   perf_overflow_handler_t overflow_handler)
+		 struct task_struct *task,
+		 struct perf_event *group_leader,
+		 struct perf_event *parent_event,
+		 perf_overflow_handler_t overflow_handler)
 {
 	struct pmu *pmu;
 	struct perf_event *event;
@@ -5299,6 +5300,17 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
+	if (task) {
+		event->attach_state = PERF_ATTACH_TASK;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		/*
+		 * hw_breakpoint is a bit difficult here..
+		 */
+		if (attr->type == PERF_TYPE_BREAKPOINT)
+			event->hw.bp_target = task;
+#endif
+	}
+
 	if (!overflow_handler && parent_event)
 		overflow_handler = parent_event->overflow_handler;
 	
@@ -5559,7 +5571,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
-	event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
+	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err_task;
@@ -5728,7 +5740,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	 * Get the target context (task or percpu):
 	 */
 
-	event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler);
+	event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err;
@@ -5996,6 +6008,7 @@ inherit_event(struct perf_event *parent_event,
 
 	child_event = perf_event_alloc(&parent_event->attr,
 					   parent_event->cpu,
+					   child,
 					   group_leader, parent_event,
 					   NULL);
 	if (IS_ERR(child_event))
-- 
cgit v1.2.3


From 3b6e901f839f42afb40f614418df82c08b01320a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 21:10:38 +0200
Subject: jump_label: Use more consistent naming

Now that there's still only a few users around, rename things to make
them more consistent.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101014203625.448565169@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jump_label.h | 8 ++++----
 kernel/tracepoint.c        | 6 +++---
 lib/dynamic_debug.c        | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b72cd9f92c2e..81be4962b7a1 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -25,10 +25,10 @@ extern void jump_label_update(unsigned long key, enum jump_label_type type);
 extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
 
-#define enable_jump_label(key) \
+#define jump_label_enable(key) \
 	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
 
-#define disable_jump_label(key) \
+#define jump_label_disable(key) \
 	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
 
 #else
@@ -39,12 +39,12 @@ do {						\
 		goto label;			\
 } while (0)
 
-#define enable_jump_label(cond_var)	\
+#define jump_label_enable(cond_var)	\
 do {					\
        *(cond_var) = 1;			\
 } while (0)
 
-#define disable_jump_label(cond_var)	\
+#define jump_label_disable(cond_var)	\
 do {					\
        *(cond_var) = 0;			\
 } while (0)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index d6073a50a6ca..e95ee7f31d43 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -265,10 +265,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 	if (!elem->state && active) {
-		enable_jump_label(&elem->state);
+		jump_label_enable(&elem->state);
 		elem->state = active;
 	} else if (elem->state && !active) {
-		disable_jump_label(&elem->state);
+		jump_label_disable(&elem->state);
 		elem->state = active;
 	}
 }
@@ -285,7 +285,7 @@ static void disable_tracepoint(struct tracepoint *elem)
 		elem->unregfunc();
 
 	if (elem->state) {
-		disable_jump_label(&elem->state);
+		jump_label_disable(&elem->state);
 		elem->state = 0;
 	}
 	rcu_assign_pointer(elem->funcs, NULL);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e925c7b960f1..7bd6df781ce5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -142,9 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
 				dt->num_enabled++;
 			dp->flags = newflags;
 			if (newflags) {
-				enable_jump_label(&dp->enabled);
+				jump_label_enable(&dp->enabled);
 			} else {
-				disable_jump_label(&dp->enabled);
+				jump_label_disable(&dp->enabled);
 			}
 			if (verbose)
 				printk(KERN_INFO
-- 
cgit v1.2.3


From 8b92538d84e50062560ba33adbaed7887b6e4a42 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 21:39:02 +0200
Subject: jump_label: Add atomic_t interface

Add an interface to allow usage of jump_labels with atomic counters.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20101014203625.501657727@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jump_label_ref.h | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 include/linux/jump_label_ref.h

(limited to 'include')

diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
new file mode 100644
index 000000000000..e5d012ad92c6
--- /dev/null
+++ b/include/linux/jump_label_ref.h
@@ -0,0 +1,44 @@
+#ifndef _LINUX_JUMP_LABEL_REF_H
+#define _LINUX_JUMP_LABEL_REF_H
+
+#include <linux/jump_label.h>
+#include <asm/atomic.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static inline void jump_label_inc(atomic_t *key)
+{
+	if (atomic_add_return(1, key) == 1)
+		jump_label_enable(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+	if (atomic_dec_and_test(key))
+		jump_label_disable(key);
+}
+
+#else /* !HAVE_JUMP_LABEL */
+
+static inline void jump_label_inc(atomic_t *key)
+{
+	atomic_inc(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+	atomic_dec(key);
+}
+
+#undef JUMP_LABEL
+#define JUMP_LABEL(key, label)						\
+do {									\
+	if (unlikely(__builtin_choose_expr(				\
+	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
+	      atomic_read((atomic_t *)(key)), *(key))))			\
+		goto label;						\
+} while (0)
+
+#endif /* HAVE_JUMP_LABEL */
+
+#endif /* _LINUX_JUMP_LABEL_REF_H */
-- 
cgit v1.2.3


From 82cd6def9806dcb6a325fb6abbc1d61388a15f6a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 17:57:23 +0200
Subject: perf: Use jump_labels to optimize the scheduler hooks

Trades a call + conditional + ret for an unconditional jmp.

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101014203625.501657727@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 27 +++++++++++++++++++++++++--
 kernel/perf_event.c        | 24 +++++++++---------------
 2 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 97965fac55fe..7f0e7f52af8b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -487,6 +487,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
+#include <linux/jump_label_ref.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -895,8 +896,30 @@ extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
-extern void perf_event_task_sched_in(struct task_struct *task);
-extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+extern void __perf_event_task_sched_in(struct task_struct *task);
+extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+
+extern atomic_t perf_task_events;
+
+static inline void perf_event_task_sched_in(struct task_struct *task)
+{
+	JUMP_LABEL(&perf_task_events, have_events);
+	return;
+
+have_events:
+	__perf_event_task_sched_in(task);
+}
+
+static inline
+void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+{
+	JUMP_LABEL(&perf_task_events, have_events);
+	return;
+
+have_events:
+	__perf_event_task_sched_out(task, next);
+}
+
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 856e20baf13f..f7febb02ab97 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -34,7 +34,7 @@
 
 #include <asm/irq_regs.h>
 
-static atomic_t nr_events __read_mostly;
+atomic_t perf_task_events __read_mostly;
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
@@ -1311,8 +1311,8 @@ void perf_event_context_sched_out(struct task_struct *task, int ctxn,
  * accessing the event control register. If a NMI hits, then it will
  * not restart the event.
  */
-void perf_event_task_sched_out(struct task_struct *task,
-			       struct task_struct *next)
+void __perf_event_task_sched_out(struct task_struct *task,
+				 struct task_struct *next)
 {
 	int ctxn;
 
@@ -1337,14 +1337,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
 	cpuctx->task_ctx = NULL;
 }
 
-/*
- * Called with IRQs disabled
- */
-static void __perf_event_task_sched_out(struct perf_event_context *ctx)
-{
-	task_ctx_sched_out(ctx, EVENT_ALL);
-}
-
 /*
  * Called with IRQs disabled
  */
@@ -1494,7 +1486,7 @@ void perf_event_context_sched_in(struct perf_event_context *ctx)
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *task)
 {
 	struct perf_event_context *ctx;
 	int ctxn;
@@ -2216,7 +2208,8 @@ static void free_event(struct perf_event *event)
 	irq_work_sync(&event->pending);
 
 	if (!event->parent) {
-		atomic_dec(&nr_events);
+		if (event->attach_state & PERF_ATTACH_TASK)
+			jump_label_dec(&perf_task_events);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_dec(&nr_mmap_events);
 		if (event->attr.comm)
@@ -5354,7 +5347,8 @@ done:
 	event->pmu = pmu;
 
 	if (!event->parent) {
-		atomic_inc(&nr_events);
+		if (event->attach_state & PERF_ATTACH_TASK)
+			jump_label_inc(&perf_task_events);
 		if (event->attr.mmap || event->attr.mmap_data)
 			atomic_inc(&nr_mmap_events);
 		if (event->attr.comm)
@@ -5849,7 +5843,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * our context.
 	 */
 	child_ctx = child->perf_event_ctxp[ctxn];
-	__perf_event_task_sched_out(child_ctx);
+	task_ctx_sched_out(child_ctx, EVENT_ALL);
 
 	/*
 	 * Take the context lock here so that if find_get_context is
-- 
cgit v1.2.3


From 7e54a5a0b655734326dc78c2b5efc1eb35497bb6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 22:32:45 +0200
Subject: perf: Optimize sw events

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 20 +++++++++++---------
 kernel/perf_event.c        |  4 ++--
 2 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7f0e7f52af8b..3b80cbf509ef 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1012,18 +1012,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 
-static inline void
+static __always_inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-	if (atomic_read(&perf_swevent_enabled[event_id])) {
-		struct pt_regs hot_regs;
-
-		if (!regs) {
-			perf_fetch_caller_regs(&hot_regs);
-			regs = &hot_regs;
-		}
-		__perf_sw_event(event_id, nr, nmi, regs, addr);
+	struct pt_regs hot_regs;
+
+	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
+	return;
+
+have_event:
+	if (!regs) {
+		perf_fetch_caller_regs(&hot_regs);
+		regs = &hot_regs;
 	}
+	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f7febb02ab97..05ecf6f7c672 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4669,7 +4669,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 
 	WARN_ON(event->parent);
 
-	atomic_dec(&perf_swevent_enabled[event_id]);
+	jump_label_dec(&perf_swevent_enabled[event_id]);
 	swevent_hlist_put(event);
 }
 
@@ -4699,7 +4699,7 @@ static int perf_swevent_init(struct perf_event *event)
 		if (err)
 			return err;
 
-		atomic_inc(&perf_swevent_enabled[event_id]);
+		jump_label_inc(&perf_swevent_enabled[event_id]);
 		event->destroy = sw_perf_event_destroy;
 	}
 
-- 
cgit v1.2.3


From ebf31f502492527e2b6b5e5cf85a4ebc7fc8a52e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 17 Oct 2010 12:15:00 +0200
Subject: jump_label: Add COND_STMT(), reducer wrappery

The use of the JUMP_LABEL() construct ends up creating endless silly
wrappers, create a higher level construct to reduce this clutter.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/jump_label.h | 10 ++++++++++
 include/linux/perf_event.h | 12 ++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 81be4962b7a1..b67cb180e6e9 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -61,4 +61,14 @@ static inline int jump_label_text_reserved(void *start, void *end)
 
 #endif
 
+#define COND_STMT(key, stmt)					\
+do {								\
+	__label__ jl_enabled;					\
+	JUMP_LABEL(key, jl_enabled);				\
+	if (0) {						\
+jl_enabled:							\
+		stmt;						\
+	}							\
+} while (0)
+
 #endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3b80cbf509ef..057bf22a8323 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -903,21 +903,13 @@ extern atomic_t perf_task_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *task)
 {
-	JUMP_LABEL(&perf_task_events, have_events);
-	return;
-
-have_events:
-	__perf_event_task_sched_in(task);
+	COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
 }
 
 static inline
 void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
-	JUMP_LABEL(&perf_task_events, have_events);
-	return;
-
-have_events:
-	__perf_event_task_sched_out(task, next);
+	COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
 }
 
 extern int perf_event_init_task(struct task_struct *child);
-- 
cgit v1.2.3


From 75e1056f5c57050415b64cb761a3acc35d91f013 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Mon, 4 Oct 2010 17:03:16 -0700
Subject: sched: Fix softirq time accounting

Peter Zijlstra found a bug in the way softirq time is accounted in
VIRT_CPU_ACCOUNTING on this thread:

   http://lkml.indiana.edu/hypermail//linux/kernel/1009.2/01366.html

The problem is, softirq processing uses local_bh_disable internally. There
is no way, later in the flow, to differentiate between whether softirq is
being processed or is it just that bh has been disabled. So, a hardirq when bh
is disabled results in time being wrongly accounted as softirq.

Looking at the code a bit more, the problem exists in !VIRT_CPU_ACCOUNTING
as well. As account_system_time() in normal tick based accouting also uses
softirq_count, which will be set even when not in softirq with bh disabled.

Peter also suggested solution of using 2*SOFTIRQ_OFFSET as irq count
for local_bh_{disable,enable} and using just SOFTIRQ_OFFSET while softirq
processing. The patch below does that and adds API in_serving_softirq() which
returns whether we are currently processing softirq or not.

Also changes one of the usages of softirq_count in net/sched/cls_cgroup.c
to in_serving_softirq.

Looks like many usages of in_softirq really want in_serving_softirq. Those
changes can be made individually on a case by case basis.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-2-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h |  5 +++++
 include/linux/sched.h   |  6 +++---
 kernel/sched.c          |  2 +-
 kernel/softirq.c        | 51 ++++++++++++++++++++++++++++++++-----------------
 net/sched/cls_cgroup.c  |  2 +-
 5 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b387669dab..e37a77cbd588 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -64,6 +64,8 @@
 #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
 #define NMI_OFFSET	(1UL << NMI_SHIFT)
 
+#define SOFTIRQ_DISABLE_OFFSET	(2 * SOFTIRQ_OFFSET)
+
 #ifndef PREEMPT_ACTIVE
 #define PREEMPT_ACTIVE_BITS	1
 #define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
@@ -82,10 +84,13 @@
 /*
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
  */
 #define in_irq()		(hardirq_count())
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
+#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
 
 /*
  * Are we in NMI context?
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cdf56693ecbf..8744e50cb083 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2366,9 +2366,9 @@ extern int __cond_resched_lock(spinlock_t *lock);
 
 extern int __cond_resched_softirq(void);
 
-#define cond_resched_softirq() ({				\
-	__might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET);	\
-	__cond_resched_softirq();				\
+#define cond_resched_softirq() ({					\
+	__might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
+	__cond_resched_softirq();					\
 })
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 771b518e5f1f..089be8adb074 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3422,7 +3422,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset)
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
-	else if (softirq_count())
+	else if (in_serving_softirq())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
 	else
 		cpustat->system = cputime64_add(cpustat->system, tmp);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b1a73a..988dfbe6bbe8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -76,12 +76,22 @@ void wakeup_softirqd(void)
 		wake_up_process(tsk);
 }
 
+/*
+ * preempt_count and SOFTIRQ_OFFSET usage:
+ * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
+ *   softirq processing.
+ * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
+ *   on local_bh_disable or local_bh_enable.
+ * This lets us distinguish between whether we are currently processing
+ * softirq and whether we just have bh disabled.
+ */
+
 /*
  * This one is for softirq.c-internal use,
  * where hardirqs are disabled legitimately:
  */
 #ifdef CONFIG_TRACE_IRQFLAGS
-static void __local_bh_disable(unsigned long ip)
+static void __local_bh_disable(unsigned long ip, unsigned int cnt)
 {
 	unsigned long flags;
 
@@ -95,32 +105,43 @@ static void __local_bh_disable(unsigned long ip)
 	 * We must manually increment preempt_count here and manually
 	 * call the trace_preempt_off later.
 	 */
-	preempt_count() += SOFTIRQ_OFFSET;
+	preempt_count() += cnt;
 	/*
 	 * Were softirqs turned off above:
 	 */
-	if (softirq_count() == SOFTIRQ_OFFSET)
+	if (softirq_count() == cnt)
 		trace_softirqs_off(ip);
 	raw_local_irq_restore(flags);
 
-	if (preempt_count() == SOFTIRQ_OFFSET)
+	if (preempt_count() == cnt)
 		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
 #else /* !CONFIG_TRACE_IRQFLAGS */
-static inline void __local_bh_disable(unsigned long ip)
+static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
 {
-	add_preempt_count(SOFTIRQ_OFFSET);
+	add_preempt_count(cnt);
 	barrier();
 }
 #endif /* CONFIG_TRACE_IRQFLAGS */
 
 void local_bh_disable(void)
 {
-	__local_bh_disable((unsigned long)__builtin_return_address(0));
+	__local_bh_disable((unsigned long)__builtin_return_address(0),
+				SOFTIRQ_DISABLE_OFFSET);
 }
 
 EXPORT_SYMBOL(local_bh_disable);
 
+static void __local_bh_enable(unsigned int cnt)
+{
+	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (softirq_count() == cnt)
+		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+	sub_preempt_count(cnt);
+}
+
 /*
  * Special-case - softirqs can safely be enabled in
  * cond_resched_softirq(), or by __do_softirq(),
@@ -128,12 +149,7 @@ EXPORT_SYMBOL(local_bh_disable);
  */
 void _local_bh_enable(void)
 {
-	WARN_ON_ONCE(in_irq());
-	WARN_ON_ONCE(!irqs_disabled());
-
-	if (softirq_count() == SOFTIRQ_OFFSET)
-		trace_softirqs_on((unsigned long)__builtin_return_address(0));
-	sub_preempt_count(SOFTIRQ_OFFSET);
+	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
 }
 
 EXPORT_SYMBOL(_local_bh_enable);
@@ -147,13 +163,13 @@ static inline void _local_bh_enable_ip(unsigned long ip)
 	/*
 	 * Are softirqs going to be turned on now:
 	 */
-	if (softirq_count() == SOFTIRQ_OFFSET)
+	if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
 		trace_softirqs_on(ip);
 	/*
 	 * Keep preemption disabled until we are done with
 	 * softirq processing:
  	 */
- 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+	sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
 
 	if (unlikely(!in_interrupt() && local_softirq_pending()))
 		do_softirq();
@@ -198,7 +214,8 @@ asmlinkage void __do_softirq(void)
 	pending = local_softirq_pending();
 	account_system_vtime(current);
 
-	__local_bh_disable((unsigned long)__builtin_return_address(0));
+	__local_bh_disable((unsigned long)__builtin_return_address(0),
+				SOFTIRQ_OFFSET);
 	lockdep_softirq_enter();
 
 	cpu = smp_processor_id();
@@ -245,7 +262,7 @@ restart:
 	lockdep_softirq_exit();
 
 	account_system_vtime(current);
-	_local_bh_enable();
+	__local_bh_enable(SOFTIRQ_OFFSET);
 }
 
 #ifndef __ARCH_HAS_DO_SOFTIRQ
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 78ef2c5e130b..37dff78e9cb1 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -123,7 +123,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	 * calls by looking at the number of nested bh disable calls because
 	 * softirqs always disables bh.
 	 */
-	if (softirq_count() != SOFTIRQ_OFFSET) {
+	if (in_serving_softirq()) {
 		/* If there is an sk_classid we'll use that. */
 		if (!skb->sk)
 			return -1;
-- 
cgit v1.2.3


From e1e10a265d28273ab8c70be19d43dcbdeead6c5a Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Mon, 4 Oct 2010 17:03:17 -0700
Subject: sched: Consolidate account_system_vtime extern declaration

Just a minor cleanup patch that makes things easier to the following patches.
No functionality change in this patch.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-3-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/system.h    | 4 ----
 arch/powerpc/include/asm/system.h | 4 ----
 arch/s390/include/asm/system.h    | 1 -
 include/linux/hardirq.h           | 2 ++
 4 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
index 9f342a574ce8..dd028f2b13b3 100644
--- a/arch/ia64/include/asm/system.h
+++ b/arch/ia64/include/asm/system.h
@@ -272,10 +272,6 @@ void cpu_idle_wait(void);
 
 void default_idle(void);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void account_system_vtime(struct task_struct *);
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 6c294acac848..9c3d160670b4 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long);
 
 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void account_system_vtime(struct task_struct *);
-#endif
-
 extern struct dentry *powerpc_debugfs_root;
 
 #endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index cef66210c846..38ddd8a9a9e8 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -97,7 +97,6 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 extern void account_vtime(struct task_struct *, struct task_struct *);
 extern void account_tick_vtime(struct task_struct *);
-extern void account_system_vtime(struct task_struct *);
 
 #ifdef CONFIG_PFAULT
 extern void pfault_irq_init(void);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index e37a77cbd588..41367c5c3c68 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -141,6 +141,8 @@ struct task_struct;
 static inline void account_system_vtime(struct task_struct *tsk)
 {
 }
+#else
+extern void account_system_vtime(struct task_struct *tsk);
 #endif
 
 #if defined(CONFIG_NO_HZ)
-- 
cgit v1.2.3


From 6cdd5199daf0cb7b0fcc8dca941af08492612887 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Mon, 4 Oct 2010 17:03:18 -0700
Subject: sched: Add a PF flag for ksoftirqd identification

To account softirq time cleanly in scheduler, we need to identify whether
softirq is invoked in ksoftirqd context or softirq at hardirq tail context.
Add PF_KSOFTIRQD for that purpose.

As all PF flag bits are currently taken, create space by moving one of the
infrequently used bits (PF_THREAD_BOUND) down in task_struct to be along
with some other state fields.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-4-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 kernel/softirq.c      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8744e50cb083..aca0ce675939 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1682,6 +1682,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 /*
  * Per process flags
  */
+#define PF_KSOFTIRQD	0x00000001	/* I am ksoftirqd */
 #define PF_STARTING	0x00000002	/* being created */
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 988dfbe6bbe8..267f7b763ebb 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -713,6 +713,7 @@ static int run_ksoftirqd(void * __bind_cpu)
 {
 	set_current_state(TASK_INTERRUPTIBLE);
 
+	current->flags |= PF_KSOFTIRQD;
 	while (!kthread_should_stop()) {
 		preempt_disable();
 		if (!local_softirq_pending()) {
-- 
cgit v1.2.3


From b52bfee445d315549d41eacf2fa7c156e7d153d5 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Mon, 4 Oct 2010 17:03:19 -0700
Subject: sched: Add IRQ_TIME_ACCOUNTING, finer accounting of irq time

s390/powerpc/ia64 have support for CONFIG_VIRT_CPU_ACCOUNTING which does
the fine granularity accounting of user, system, hardirq, softirq times.
Adding that option on archs like x86 will be challenging however, given the
state of TSC reliability on various platforms and also the overhead it will
add in syscall entry exit.

Instead, add a lighter variant that only does finer accounting of
hardirq and softirq times, providing precise irq times (instead of timer tick
based samples). This accounting is added with a new config option
CONFIG_IRQ_TIME_ACCOUNTING so that there won't be any overhead for users not
interested in paying the perf penalty.

This accounting is based on sched_clock, with the code being generic.
So, other archs may find it useful as well.

This patch just adds the core logic and does not enable this logic yet.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-5-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h |  2 +-
 include/linux/sched.h   | 13 +++++++++++++
 kernel/sched.c          | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 41367c5c3c68..ff43e9268449 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -137,7 +137,7 @@ extern void synchronize_irq(unsigned int irq);
 
 struct task_struct;
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
 static inline void account_system_vtime(struct task_struct *tsk)
 {
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aca0ce675939..2cca9a92f5e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1826,6 +1826,19 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
+ * The reason for this explicit opt-in is not to have perf penalty with
+ * slow sched_clocks.
+ */
+extern void enable_sched_clock_irqtime(void);
+extern void disable_sched_clock_irqtime(void);
+#else
+static inline void enable_sched_clock_irqtime(void) {}
+static inline void disable_sched_clock_irqtime(void) {}
+#endif
+
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
diff --git a/kernel/sched.c b/kernel/sched.c
index 089be8adb074..9b302e355791 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1908,6 +1908,55 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 	dec_nr_running(rq);
 }
 
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+
+static DEFINE_PER_CPU(u64, cpu_hardirq_time);
+static DEFINE_PER_CPU(u64, cpu_softirq_time);
+
+static DEFINE_PER_CPU(u64, irq_start_time);
+static int sched_clock_irqtime;
+
+void enable_sched_clock_irqtime(void)
+{
+	sched_clock_irqtime = 1;
+}
+
+void disable_sched_clock_irqtime(void)
+{
+	sched_clock_irqtime = 0;
+}
+
+void account_system_vtime(struct task_struct *curr)
+{
+	unsigned long flags;
+	int cpu;
+	u64 now, delta;
+
+	if (!sched_clock_irqtime)
+		return;
+
+	local_irq_save(flags);
+
+	now = sched_clock();
+	cpu = smp_processor_id();
+	delta = now - per_cpu(irq_start_time, cpu);
+	per_cpu(irq_start_time, cpu) = now;
+	/*
+	 * We do not account for softirq time from ksoftirqd here.
+	 * We want to continue accounting softirq time to ksoftirqd thread
+	 * in that case, so as not to confuse scheduler with a special task
+	 * that do not consume any time, but still wants to run.
+	 */
+	if (hardirq_count())
+		per_cpu(cpu_hardirq_time, cpu) += delta;
+	else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+		per_cpu(cpu_softirq_time, cpu) += delta;
+
+	local_irq_restore(flags);
+}
+
+#endif
+
 #include "sched_idletask.c"
 #include "sched_fair.c"
 #include "sched_rt.c"
-- 
cgit v1.2.3


From e86e1244a41352d1b78d32c10316fc4df2c86a8a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 18 Oct 2010 16:45:24 -0700
Subject: ASoC: Restore MAX98088 CODEC driver

This reverts commit f6765502f8daae3d237a394889276c8987f3e299 and adds
the missing include file.

Signed-off-by: Peter Hsiang <Peter.Hsiang@maxim-ic.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/max98088.h    |   50 ++
 sound/soc/codecs/Kconfig    |    4 +
 sound/soc/codecs/Makefile   |    2 +
 sound/soc/codecs/max98088.c | 2097 +++++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/max98088.h |  193 ++++
 5 files changed, 2346 insertions(+)
 create mode 100644 include/sound/max98088.h
 create mode 100644 sound/soc/codecs/max98088.c
 create mode 100644 sound/soc/codecs/max98088.h

(limited to 'include')

diff --git a/include/sound/max98088.h b/include/sound/max98088.h
new file mode 100644
index 000000000000..c3ba8239182d
--- /dev/null
+++ b/include/sound/max98088.h
@@ -0,0 +1,50 @@
+/*
+ * Platform data for MAX98088
+ *
+ * Copyright 2010 Maxim Integrated Products
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __SOUND_MAX98088_PDATA_H__
+#define __SOUND_MAX98088_PDATA_H__
+
+/* Equalizer filter response configuration */
+struct max98088_eq_cfg {
+       const char *name;
+       unsigned int rate;
+       u16 band1[5];
+       u16 band2[5];
+       u16 band3[5];
+       u16 band4[5];
+       u16 band5[5];
+};
+
+/* codec platform data */
+struct max98088_pdata {
+
+       /* Equalizers for DAI1 and DAI2 */
+       struct max98088_eq_cfg *eq_cfg;
+       unsigned int eq_cfgcnt;
+
+       /* Receiver output can be configured as power amplifier or LINE out */
+       /* Set receiver_mode to:
+        * 0 = amplifier output, or
+        * 1 = LINE level output
+        */
+       unsigned int receiver_mode:1;
+
+       /* Analog/digital microphone configuration:
+        * 0 = analog microphone input (normal setting)
+        * 1 = digital microphone input
+        */
+       unsigned int digmic_left_mode:1;
+       unsigned int digmic_right_mode:1;
+
+};
+
+#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index ff7b922a0f41..94a9d06b9027 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -27,6 +27,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_CS4270 if I2C
 	select SND_SOC_DA7210 if I2C
 	select SND_SOC_JZ4740 if SOC_JZ4740
+	select SND_SOC_MAX98088 if I2C
 	select SND_SOC_MAX9877 if I2C
 	select SND_SOC_PCM3008
 	select SND_SOC_SPDIF
@@ -158,6 +159,9 @@ config SND_SOC_L3
 config SND_SOC_DA7210
         tristate
 
+config SND_SOC_MAX98088
+       tristate
+
 config SND_SOC_PCM3008
        tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index af4d4c4e17b4..f67a2d6f7a46 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -15,6 +15,7 @@ snd-soc-cs4270-objs := cs4270.o
 snd-soc-cx20442-objs := cx20442.o
 snd-soc-da7210-objs := da7210.o
 snd-soc-l3-objs := l3.o
+snd-soc-max98088-objs := max98088.o
 snd-soc-pcm3008-objs := pcm3008.o
 snd-soc-spdif-objs := spdif_transciever.o
 snd-soc-ssm2602-objs := ssm2602.o
@@ -89,6 +90,7 @@ obj-$(CONFIG_SND_SOC_CX20442)	+= snd-soc-cx20442.o
 obj-$(CONFIG_SND_SOC_DA7210)	+= snd-soc-da7210.o
 obj-$(CONFIG_SND_SOC_L3)	+= snd-soc-l3.o
 obj-$(CONFIG_SND_SOC_JZ4740_CODEC)	+= snd-soc-jz4740-codec.o
+obj-$(CONFIG_SND_SOC_MAX98088)	+= snd-soc-max98088.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
 obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
new file mode 100644
index 000000000000..4186b2755a58
--- /dev/null
+++ b/sound/soc/codecs/max98088.c
@@ -0,0 +1,2097 @@
+/*
+ * max98088.c -- MAX98088 ALSA SoC Audio driver
+ *
+ * Copyright 2010 Maxim Integrated Products
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+#include <linux/slab.h>
+#include <asm/div64.h>
+#include <sound/max98088.h>
+#include "max98088.h"
+
+struct max98088_cdata {
+       unsigned int rate;
+       unsigned int fmt;
+       int eq_sel;
+};
+
+struct max98088_priv {
+       u8 reg_cache[M98088_REG_CNT];
+       void *control_data;
+       struct max98088_pdata *pdata;
+       unsigned int sysclk;
+       struct max98088_cdata dai[2];
+       int eq_textcnt;
+       const char **eq_texts;
+       struct soc_enum eq_enum;
+       u8 ina_state;
+       u8 inb_state;
+       unsigned int ex_mode;
+       unsigned int digmic;
+       unsigned int mic1pre;
+       unsigned int mic2pre;
+       unsigned int extmic_mode;
+};
+
+static const u8 max98088_reg[M98088_REG_CNT] = {
+       0x00, /* 00 IRQ status */
+       0x00, /* 01 MIC status */
+       0x00, /* 02 jack status */
+       0x00, /* 03 battery voltage */
+       0x00, /* 04 */
+       0x00, /* 05 */
+       0x00, /* 06 */
+       0x00, /* 07 */
+       0x00, /* 08 */
+       0x00, /* 09 */
+       0x00, /* 0A */
+       0x00, /* 0B */
+       0x00, /* 0C */
+       0x00, /* 0D */
+       0x00, /* 0E */
+       0x00, /* 0F interrupt enable */
+
+       0x00, /* 10 master clock */
+       0x00, /* 11 DAI1 clock mode */
+       0x00, /* 12 DAI1 clock control */
+       0x00, /* 13 DAI1 clock control */
+       0x00, /* 14 DAI1 format */
+       0x00, /* 15 DAI1 clock */
+       0x00, /* 16 DAI1 config */
+       0x00, /* 17 DAI1 TDM */
+       0x00, /* 18 DAI1 filters */
+       0x00, /* 19 DAI2 clock mode */
+       0x00, /* 1A DAI2 clock control */
+       0x00, /* 1B DAI2 clock control */
+       0x00, /* 1C DAI2 format */
+       0x00, /* 1D DAI2 clock */
+       0x00, /* 1E DAI2 config */
+       0x00, /* 1F DAI2 TDM */
+
+       0x00, /* 20 DAI2 filters */
+       0x00, /* 21 data config */
+       0x00, /* 22 DAC mixer */
+       0x00, /* 23 left ADC mixer */
+       0x00, /* 24 right ADC mixer */
+       0x00, /* 25 left HP mixer */
+       0x00, /* 26 right HP mixer */
+       0x00, /* 27 HP control */
+       0x00, /* 28 left REC mixer */
+       0x00, /* 29 right REC mixer */
+       0x00, /* 2A REC control */
+       0x00, /* 2B left SPK mixer */
+       0x00, /* 2C right SPK mixer */
+       0x00, /* 2D SPK control */
+       0x00, /* 2E sidetone */
+       0x00, /* 2F DAI1 playback level */
+
+       0x00, /* 30 DAI1 playback level */
+       0x00, /* 31 DAI2 playback level */
+       0x00, /* 32 DAI2 playbakc level */
+       0x00, /* 33 left ADC level */
+       0x00, /* 34 right ADC level */
+       0x00, /* 35 MIC1 level */
+       0x00, /* 36 MIC2 level */
+       0x00, /* 37 INA level */
+       0x00, /* 38 INB level */
+       0x00, /* 39 left HP volume */
+       0x00, /* 3A right HP volume */
+       0x00, /* 3B left REC volume */
+       0x00, /* 3C right REC volume */
+       0x00, /* 3D left SPK volume */
+       0x00, /* 3E right SPK volume */
+       0x00, /* 3F MIC config */
+
+       0x00, /* 40 MIC threshold */
+       0x00, /* 41 excursion limiter filter */
+       0x00, /* 42 excursion limiter threshold */
+       0x00, /* 43 ALC */
+       0x00, /* 44 power limiter threshold */
+       0x00, /* 45 power limiter config */
+       0x00, /* 46 distortion limiter config */
+       0x00, /* 47 audio input */
+       0x00, /* 48 microphone */
+       0x00, /* 49 level control */
+       0x00, /* 4A bypass switches */
+       0x00, /* 4B jack detect */
+       0x00, /* 4C input enable */
+       0x00, /* 4D output enable */
+       0xF0, /* 4E bias control */
+       0x00, /* 4F DAC power */
+
+       0x0F, /* 50 DAC power */
+       0x00, /* 51 system */
+       0x00, /* 52 DAI1 EQ1 */
+       0x00, /* 53 DAI1 EQ1 */
+       0x00, /* 54 DAI1 EQ1 */
+       0x00, /* 55 DAI1 EQ1 */
+       0x00, /* 56 DAI1 EQ1 */
+       0x00, /* 57 DAI1 EQ1 */
+       0x00, /* 58 DAI1 EQ1 */
+       0x00, /* 59 DAI1 EQ1 */
+       0x00, /* 5A DAI1 EQ1 */
+       0x00, /* 5B DAI1 EQ1 */
+       0x00, /* 5C DAI1 EQ2 */
+       0x00, /* 5D DAI1 EQ2 */
+       0x00, /* 5E DAI1 EQ2 */
+       0x00, /* 5F DAI1 EQ2 */
+
+       0x00, /* 60 DAI1 EQ2 */
+       0x00, /* 61 DAI1 EQ2 */
+       0x00, /* 62 DAI1 EQ2 */
+       0x00, /* 63 DAI1 EQ2 */
+       0x00, /* 64 DAI1 EQ2 */
+       0x00, /* 65 DAI1 EQ2 */
+       0x00, /* 66 DAI1 EQ3 */
+       0x00, /* 67 DAI1 EQ3 */
+       0x00, /* 68 DAI1 EQ3 */
+       0x00, /* 69 DAI1 EQ3 */
+       0x00, /* 6A DAI1 EQ3 */
+       0x00, /* 6B DAI1 EQ3 */
+       0x00, /* 6C DAI1 EQ3 */
+       0x00, /* 6D DAI1 EQ3 */
+       0x00, /* 6E DAI1 EQ3 */
+       0x00, /* 6F DAI1 EQ3 */
+
+       0x00, /* 70 DAI1 EQ4 */
+       0x00, /* 71 DAI1 EQ4 */
+       0x00, /* 72 DAI1 EQ4 */
+       0x00, /* 73 DAI1 EQ4 */
+       0x00, /* 74 DAI1 EQ4 */
+       0x00, /* 75 DAI1 EQ4 */
+       0x00, /* 76 DAI1 EQ4 */
+       0x00, /* 77 DAI1 EQ4 */
+       0x00, /* 78 DAI1 EQ4 */
+       0x00, /* 79 DAI1 EQ4 */
+       0x00, /* 7A DAI1 EQ5 */
+       0x00, /* 7B DAI1 EQ5 */
+       0x00, /* 7C DAI1 EQ5 */
+       0x00, /* 7D DAI1 EQ5 */
+       0x00, /* 7E DAI1 EQ5 */
+       0x00, /* 7F DAI1 EQ5 */
+
+       0x00, /* 80 DAI1 EQ5 */
+       0x00, /* 81 DAI1 EQ5 */
+       0x00, /* 82 DAI1 EQ5 */
+       0x00, /* 83 DAI1 EQ5 */
+       0x00, /* 84 DAI2 EQ1 */
+       0x00, /* 85 DAI2 EQ1 */
+       0x00, /* 86 DAI2 EQ1 */
+       0x00, /* 87 DAI2 EQ1 */
+       0x00, /* 88 DAI2 EQ1 */
+       0x00, /* 89 DAI2 EQ1 */
+       0x00, /* 8A DAI2 EQ1 */
+       0x00, /* 8B DAI2 EQ1 */
+       0x00, /* 8C DAI2 EQ1 */
+       0x00, /* 8D DAI2 EQ1 */
+       0x00, /* 8E DAI2 EQ2 */
+       0x00, /* 8F DAI2 EQ2 */
+
+       0x00, /* 90 DAI2 EQ2 */
+       0x00, /* 91 DAI2 EQ2 */
+       0x00, /* 92 DAI2 EQ2 */
+       0x00, /* 93 DAI2 EQ2 */
+       0x00, /* 94 DAI2 EQ2 */
+       0x00, /* 95 DAI2 EQ2 */
+       0x00, /* 96 DAI2 EQ2 */
+       0x00, /* 97 DAI2 EQ2 */
+       0x00, /* 98 DAI2 EQ3 */
+       0x00, /* 99 DAI2 EQ3 */
+       0x00, /* 9A DAI2 EQ3 */
+       0x00, /* 9B DAI2 EQ3 */
+       0x00, /* 9C DAI2 EQ3 */
+       0x00, /* 9D DAI2 EQ3 */
+       0x00, /* 9E DAI2 EQ3 */
+       0x00, /* 9F DAI2 EQ3 */
+
+       0x00, /* A0 DAI2 EQ3 */
+       0x00, /* A1 DAI2 EQ3 */
+       0x00, /* A2 DAI2 EQ4 */
+       0x00, /* A3 DAI2 EQ4 */
+       0x00, /* A4 DAI2 EQ4 */
+       0x00, /* A5 DAI2 EQ4 */
+       0x00, /* A6 DAI2 EQ4 */
+       0x00, /* A7 DAI2 EQ4 */
+       0x00, /* A8 DAI2 EQ4 */
+       0x00, /* A9 DAI2 EQ4 */
+       0x00, /* AA DAI2 EQ4 */
+       0x00, /* AB DAI2 EQ4 */
+       0x00, /* AC DAI2 EQ5 */
+       0x00, /* AD DAI2 EQ5 */
+       0x00, /* AE DAI2 EQ5 */
+       0x00, /* AF DAI2 EQ5 */
+
+       0x00, /* B0 DAI2 EQ5 */
+       0x00, /* B1 DAI2 EQ5 */
+       0x00, /* B2 DAI2 EQ5 */
+       0x00, /* B3 DAI2 EQ5 */
+       0x00, /* B4 DAI2 EQ5 */
+       0x00, /* B5 DAI2 EQ5 */
+       0x00, /* B6 DAI1 biquad */
+       0x00, /* B7 DAI1 biquad */
+       0x00, /* B8 DAI1 biquad */
+       0x00, /* B9 DAI1 biquad */
+       0x00, /* BA DAI1 biquad */
+       0x00, /* BB DAI1 biquad */
+       0x00, /* BC DAI1 biquad */
+       0x00, /* BD DAI1 biquad */
+       0x00, /* BE DAI1 biquad */
+       0x00, /* BF DAI1 biquad */
+
+       0x00, /* C0 DAI2 biquad */
+       0x00, /* C1 DAI2 biquad */
+       0x00, /* C2 DAI2 biquad */
+       0x00, /* C3 DAI2 biquad */
+       0x00, /* C4 DAI2 biquad */
+       0x00, /* C5 DAI2 biquad */
+       0x00, /* C6 DAI2 biquad */
+       0x00, /* C7 DAI2 biquad */
+       0x00, /* C8 DAI2 biquad */
+       0x00, /* C9 DAI2 biquad */
+       0x00, /* CA */
+       0x00, /* CB */
+       0x00, /* CC */
+       0x00, /* CD */
+       0x00, /* CE */
+       0x00, /* CF */
+
+       0x00, /* D0 */
+       0x00, /* D1 */
+       0x00, /* D2 */
+       0x00, /* D3 */
+       0x00, /* D4 */
+       0x00, /* D5 */
+       0x00, /* D6 */
+       0x00, /* D7 */
+       0x00, /* D8 */
+       0x00, /* D9 */
+       0x00, /* DA */
+       0x70, /* DB */
+       0x00, /* DC */
+       0x00, /* DD */
+       0x00, /* DE */
+       0x00, /* DF */
+
+       0x00, /* E0 */
+       0x00, /* E1 */
+       0x00, /* E2 */
+       0x00, /* E3 */
+       0x00, /* E4 */
+       0x00, /* E5 */
+       0x00, /* E6 */
+       0x00, /* E7 */
+       0x00, /* E8 */
+       0x00, /* E9 */
+       0x00, /* EA */
+       0x00, /* EB */
+       0x00, /* EC */
+       0x00, /* ED */
+       0x00, /* EE */
+       0x00, /* EF */
+
+       0x00, /* F0 */
+       0x00, /* F1 */
+       0x00, /* F2 */
+       0x00, /* F3 */
+       0x00, /* F4 */
+       0x00, /* F5 */
+       0x00, /* F6 */
+       0x00, /* F7 */
+       0x00, /* F8 */
+       0x00, /* F9 */
+       0x00, /* FA */
+       0x00, /* FB */
+       0x00, /* FC */
+       0x00, /* FD */
+       0x00, /* FE */
+       0x00, /* FF */
+};
+
+static struct {
+       int readable;
+       int writable;
+       int vol;
+} max98088_access[M98088_REG_CNT] = {
+       { 0xFF, 0xFF, 1 }, /* 00 IRQ status */
+       { 0xFF, 0x00, 1 }, /* 01 MIC status */
+       { 0xFF, 0x00, 1 }, /* 02 jack status */
+       { 0x1F, 0x1F, 1 }, /* 03 battery voltage */
+       { 0xFF, 0xFF, 0 }, /* 04 */
+       { 0xFF, 0xFF, 0 }, /* 05 */
+       { 0xFF, 0xFF, 0 }, /* 06 */
+       { 0xFF, 0xFF, 0 }, /* 07 */
+       { 0xFF, 0xFF, 0 }, /* 08 */
+       { 0xFF, 0xFF, 0 }, /* 09 */
+       { 0xFF, 0xFF, 0 }, /* 0A */
+       { 0xFF, 0xFF, 0 }, /* 0B */
+       { 0xFF, 0xFF, 0 }, /* 0C */
+       { 0xFF, 0xFF, 0 }, /* 0D */
+       { 0xFF, 0xFF, 0 }, /* 0E */
+       { 0xFF, 0xFF, 0 }, /* 0F interrupt enable */
+
+       { 0xFF, 0xFF, 0 }, /* 10 master clock */
+       { 0xFF, 0xFF, 0 }, /* 11 DAI1 clock mode */
+       { 0xFF, 0xFF, 0 }, /* 12 DAI1 clock control */
+       { 0xFF, 0xFF, 0 }, /* 13 DAI1 clock control */
+       { 0xFF, 0xFF, 0 }, /* 14 DAI1 format */
+       { 0xFF, 0xFF, 0 }, /* 15 DAI1 clock */
+       { 0xFF, 0xFF, 0 }, /* 16 DAI1 config */
+       { 0xFF, 0xFF, 0 }, /* 17 DAI1 TDM */
+       { 0xFF, 0xFF, 0 }, /* 18 DAI1 filters */
+       { 0xFF, 0xFF, 0 }, /* 19 DAI2 clock mode */
+       { 0xFF, 0xFF, 0 }, /* 1A DAI2 clock control */
+       { 0xFF, 0xFF, 0 }, /* 1B DAI2 clock control */
+       { 0xFF, 0xFF, 0 }, /* 1C DAI2 format */
+       { 0xFF, 0xFF, 0 }, /* 1D DAI2 clock */
+       { 0xFF, 0xFF, 0 }, /* 1E DAI2 config */
+       { 0xFF, 0xFF, 0 }, /* 1F DAI2 TDM */
+
+       { 0xFF, 0xFF, 0 }, /* 20 DAI2 filters */
+       { 0xFF, 0xFF, 0 }, /* 21 data config */
+       { 0xFF, 0xFF, 0 }, /* 22 DAC mixer */
+       { 0xFF, 0xFF, 0 }, /* 23 left ADC mixer */
+       { 0xFF, 0xFF, 0 }, /* 24 right ADC mixer */
+       { 0xFF, 0xFF, 0 }, /* 25 left HP mixer */
+       { 0xFF, 0xFF, 0 }, /* 26 right HP mixer */
+       { 0xFF, 0xFF, 0 }, /* 27 HP control */
+       { 0xFF, 0xFF, 0 }, /* 28 left REC mixer */
+       { 0xFF, 0xFF, 0 }, /* 29 right REC mixer */
+       { 0xFF, 0xFF, 0 }, /* 2A REC control */
+       { 0xFF, 0xFF, 0 }, /* 2B left SPK mixer */
+       { 0xFF, 0xFF, 0 }, /* 2C right SPK mixer */
+       { 0xFF, 0xFF, 0 }, /* 2D SPK control */
+       { 0xFF, 0xFF, 0 }, /* 2E sidetone */
+       { 0xFF, 0xFF, 0 }, /* 2F DAI1 playback level */
+
+       { 0xFF, 0xFF, 0 }, /* 30 DAI1 playback level */
+       { 0xFF, 0xFF, 0 }, /* 31 DAI2 playback level */
+       { 0xFF, 0xFF, 0 }, /* 32 DAI2 playbakc level */
+       { 0xFF, 0xFF, 0 }, /* 33 left ADC level */
+       { 0xFF, 0xFF, 0 }, /* 34 right ADC level */
+       { 0xFF, 0xFF, 0 }, /* 35 MIC1 level */
+       { 0xFF, 0xFF, 0 }, /* 36 MIC2 level */
+       { 0xFF, 0xFF, 0 }, /* 37 INA level */
+       { 0xFF, 0xFF, 0 }, /* 38 INB level */
+       { 0xFF, 0xFF, 0 }, /* 39 left HP volume */
+       { 0xFF, 0xFF, 0 }, /* 3A right HP volume */
+       { 0xFF, 0xFF, 0 }, /* 3B left REC volume */
+       { 0xFF, 0xFF, 0 }, /* 3C right REC volume */
+       { 0xFF, 0xFF, 0 }, /* 3D left SPK volume */
+       { 0xFF, 0xFF, 0 }, /* 3E right SPK volume */
+       { 0xFF, 0xFF, 0 }, /* 3F MIC config */
+
+       { 0xFF, 0xFF, 0 }, /* 40 MIC threshold */
+       { 0xFF, 0xFF, 0 }, /* 41 excursion limiter filter */
+       { 0xFF, 0xFF, 0 }, /* 42 excursion limiter threshold */
+       { 0xFF, 0xFF, 0 }, /* 43 ALC */
+       { 0xFF, 0xFF, 0 }, /* 44 power limiter threshold */
+       { 0xFF, 0xFF, 0 }, /* 45 power limiter config */
+       { 0xFF, 0xFF, 0 }, /* 46 distortion limiter config */
+       { 0xFF, 0xFF, 0 }, /* 47 audio input */
+       { 0xFF, 0xFF, 0 }, /* 48 microphone */
+       { 0xFF, 0xFF, 0 }, /* 49 level control */
+       { 0xFF, 0xFF, 0 }, /* 4A bypass switches */
+       { 0xFF, 0xFF, 0 }, /* 4B jack detect */
+       { 0xFF, 0xFF, 0 }, /* 4C input enable */
+       { 0xFF, 0xFF, 0 }, /* 4D output enable */
+       { 0xFF, 0xFF, 0 }, /* 4E bias control */
+       { 0xFF, 0xFF, 0 }, /* 4F DAC power */
+
+       { 0xFF, 0xFF, 0 }, /* 50 DAC power */
+       { 0xFF, 0xFF, 0 }, /* 51 system */
+       { 0xFF, 0xFF, 0 }, /* 52 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 53 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 54 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 55 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 56 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 57 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 58 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 59 DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 5A DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 5B DAI1 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 5C DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 5D DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 5E DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 5F DAI1 EQ2 */
+
+       { 0xFF, 0xFF, 0 }, /* 60 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 61 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 62 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 63 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 64 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 65 DAI1 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 66 DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 67 DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 68 DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 69 DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6A DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6B DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6C DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6D DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6E DAI1 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 6F DAI1 EQ3 */
+
+       { 0xFF, 0xFF, 0 }, /* 70 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 71 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 72 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 73 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 74 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 75 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 76 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 77 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 78 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 79 DAI1 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* 7A DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 7B DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 7C DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 7D DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 7E DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 7F DAI1 EQ5 */
+
+       { 0xFF, 0xFF, 0 }, /* 80 DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 81 DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 82 DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 83 DAI1 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* 84 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 85 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 86 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 87 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 88 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 89 DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 8A DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 8B DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 8C DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 8D DAI2 EQ1 */
+       { 0xFF, 0xFF, 0 }, /* 8E DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 8F DAI2 EQ2 */
+
+       { 0xFF, 0xFF, 0 }, /* 90 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 91 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 92 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 93 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 94 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 95 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 96 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 97 DAI2 EQ2 */
+       { 0xFF, 0xFF, 0 }, /* 98 DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 99 DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9A DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9B DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9C DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9D DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9E DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* 9F DAI2 EQ3 */
+
+       { 0xFF, 0xFF, 0 }, /* A0 DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* A1 DAI2 EQ3 */
+       { 0xFF, 0xFF, 0 }, /* A2 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A3 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A4 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A5 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A6 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A7 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A8 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* A9 DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* AA DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* AB DAI2 EQ4 */
+       { 0xFF, 0xFF, 0 }, /* AC DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* AD DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* AE DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* AF DAI2 EQ5 */
+
+       { 0xFF, 0xFF, 0 }, /* B0 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B1 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B2 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B3 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B4 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B5 DAI2 EQ5 */
+       { 0xFF, 0xFF, 0 }, /* B6 DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* B7 DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* B8 DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* B9 DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BA DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BB DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BC DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BD DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BE DAI1 biquad */
+       { 0xFF, 0xFF, 0 }, /* BF DAI1 biquad */
+
+       { 0xFF, 0xFF, 0 }, /* C0 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C1 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C2 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C3 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C4 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C5 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C6 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C7 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C8 DAI2 biquad */
+       { 0xFF, 0xFF, 0 }, /* C9 DAI2 biquad */
+       { 0x00, 0x00, 0 }, /* CA */
+       { 0x00, 0x00, 0 }, /* CB */
+       { 0x00, 0x00, 0 }, /* CC */
+       { 0x00, 0x00, 0 }, /* CD */
+       { 0x00, 0x00, 0 }, /* CE */
+       { 0x00, 0x00, 0 }, /* CF */
+
+       { 0x00, 0x00, 0 }, /* D0 */
+       { 0x00, 0x00, 0 }, /* D1 */
+       { 0x00, 0x00, 0 }, /* D2 */
+       { 0x00, 0x00, 0 }, /* D3 */
+       { 0x00, 0x00, 0 }, /* D4 */
+       { 0x00, 0x00, 0 }, /* D5 */
+       { 0x00, 0x00, 0 }, /* D6 */
+       { 0x00, 0x00, 0 }, /* D7 */
+       { 0x00, 0x00, 0 }, /* D8 */
+       { 0x00, 0x00, 0 }, /* D9 */
+       { 0x00, 0x00, 0 }, /* DA */
+       { 0x00, 0x00, 0 }, /* DB */
+       { 0x00, 0x00, 0 }, /* DC */
+       { 0x00, 0x00, 0 }, /* DD */
+       { 0x00, 0x00, 0 }, /* DE */
+       { 0x00, 0x00, 0 }, /* DF */
+
+       { 0x00, 0x00, 0 }, /* E0 */
+       { 0x00, 0x00, 0 }, /* E1 */
+       { 0x00, 0x00, 0 }, /* E2 */
+       { 0x00, 0x00, 0 }, /* E3 */
+       { 0x00, 0x00, 0 }, /* E4 */
+       { 0x00, 0x00, 0 }, /* E5 */
+       { 0x00, 0x00, 0 }, /* E6 */
+       { 0x00, 0x00, 0 }, /* E7 */
+       { 0x00, 0x00, 0 }, /* E8 */
+       { 0x00, 0x00, 0 }, /* E9 */
+       { 0x00, 0x00, 0 }, /* EA */
+       { 0x00, 0x00, 0 }, /* EB */
+       { 0x00, 0x00, 0 }, /* EC */
+       { 0x00, 0x00, 0 }, /* ED */
+       { 0x00, 0x00, 0 }, /* EE */
+       { 0x00, 0x00, 0 }, /* EF */
+
+       { 0x00, 0x00, 0 }, /* F0 */
+       { 0x00, 0x00, 0 }, /* F1 */
+       { 0x00, 0x00, 0 }, /* F2 */
+       { 0x00, 0x00, 0 }, /* F3 */
+       { 0x00, 0x00, 0 }, /* F4 */
+       { 0x00, 0x00, 0 }, /* F5 */
+       { 0x00, 0x00, 0 }, /* F6 */
+       { 0x00, 0x00, 0 }, /* F7 */
+       { 0x00, 0x00, 0 }, /* F8 */
+       { 0x00, 0x00, 0 }, /* F9 */
+       { 0x00, 0x00, 0 }, /* FA */
+       { 0x00, 0x00, 0 }, /* FB */
+       { 0x00, 0x00, 0 }, /* FC */
+       { 0x00, 0x00, 0 }, /* FD */
+       { 0x00, 0x00, 0 }, /* FE */
+       { 0xFF, 0x00, 1 }, /* FF */
+};
+
+static int max98088_volatile_register(unsigned int reg)
+{
+       return max98088_access[reg].vol;
+}
+
+
+/*
+ * Load equalizer DSP coefficient configurations registers
+ */
+void m98088_eq_band(struct snd_soc_codec *codec, unsigned int dai,
+                   unsigned int band, u16 *coefs)
+{
+       unsigned int eq_reg;
+       unsigned int i;
+
+       BUG_ON(band > 4);
+       BUG_ON(dai > 1);
+
+       /* Load the base register address */
+       eq_reg = dai ? M98088_REG_84_DAI2_EQ_BASE : M98088_REG_52_DAI1_EQ_BASE;
+
+       /* Add the band address offset, note adjustment for word address */
+       eq_reg += band * (M98088_COEFS_PER_BAND << 1);
+
+       /* Step through the registers and coefs */
+       for (i = 0; i < M98088_COEFS_PER_BAND; i++) {
+               snd_soc_write(codec, eq_reg++, M98088_BYTE1(coefs[i]));
+               snd_soc_write(codec, eq_reg++, M98088_BYTE0(coefs[i]));
+       }
+}
+
+/*
+ * Excursion limiter modes
+ */
+static const char *max98088_exmode_texts[] = {
+       "Off", "100Hz", "400Hz", "600Hz", "800Hz", "1000Hz", "200-400Hz",
+       "400-600Hz", "400-800Hz",
+};
+
+static const unsigned int max98088_exmode_values[] = {
+       0x00, 0x43, 0x10, 0x20, 0x30, 0x40, 0x11, 0x22, 0x32
+};
+
+static const struct soc_enum max98088_exmode_enum =
+       SOC_VALUE_ENUM_SINGLE(M98088_REG_41_SPKDHP, 0, 127,
+                             ARRAY_SIZE(max98088_exmode_texts),
+                             max98088_exmode_texts,
+                             max98088_exmode_values);
+static const struct snd_kcontrol_new max98088_exmode_controls =
+       SOC_DAPM_VALUE_ENUM("Route", max98088_exmode_enum);
+
+static const char *max98088_ex_thresh[] = { /* volts PP */
+       "0.6", "1.2", "1.8", "2.4", "3.0", "3.6", "4.2", "4.8"};
+static const struct soc_enum max98088_ex_thresh_enum[] = {
+       SOC_ENUM_SINGLE(M98088_REG_42_SPKDHP_THRESH, 0, 8,
+               max98088_ex_thresh),
+};
+
+static const char *max98088_fltr_mode[] = {"Voice", "Music" };
+static const struct soc_enum max98088_filter_mode_enum[] = {
+       SOC_ENUM_SINGLE(M98088_REG_18_DAI1_FILTERS, 7, 2, max98088_fltr_mode),
+};
+
+static const char *max98088_extmic_text[] = { "None", "MIC1", "MIC2" };
+
+static const struct soc_enum max98088_extmic_enum =
+       SOC_ENUM_SINGLE(M98088_REG_48_CFG_MIC, 0, 3, max98088_extmic_text);
+
+static const struct snd_kcontrol_new max98088_extmic_mux =
+       SOC_DAPM_ENUM("External MIC Mux", max98088_extmic_enum);
+
+static const char *max98088_dai1_fltr[] = {
+       "Off", "fc=258/fs=16k", "fc=500/fs=16k",
+       "fc=258/fs=8k", "fc=500/fs=8k", "fc=200"};
+static const struct soc_enum max98088_dai1_dac_filter_enum[] = {
+       SOC_ENUM_SINGLE(M98088_REG_18_DAI1_FILTERS, 0, 6, max98088_dai1_fltr),
+};
+static const struct soc_enum max98088_dai1_adc_filter_enum[] = {
+       SOC_ENUM_SINGLE(M98088_REG_18_DAI1_FILTERS, 4, 6, max98088_dai1_fltr),
+};
+
+static int max98088_mic1pre_set(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       unsigned int sel = ucontrol->value.integer.value[0];
+
+       max98088->mic1pre = sel;
+       snd_soc_update_bits(codec, M98088_REG_35_LVL_MIC1, M98088_MICPRE_MASK,
+               (1+sel)<<M98088_MICPRE_SHIFT);
+
+       return 0;
+}
+
+static int max98088_mic1pre_get(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+
+       ucontrol->value.integer.value[0] = max98088->mic1pre;
+       return 0;
+}
+
+static int max98088_mic2pre_set(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       unsigned int sel = ucontrol->value.integer.value[0];
+
+       max98088->mic2pre = sel;
+       snd_soc_update_bits(codec, M98088_REG_36_LVL_MIC2, M98088_MICPRE_MASK,
+               (1+sel)<<M98088_MICPRE_SHIFT);
+
+       return 0;
+}
+
+static int max98088_mic2pre_get(struct snd_kcontrol *kcontrol,
+                               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+
+       ucontrol->value.integer.value[0] = max98088->mic2pre;
+       return 0;
+}
+
+static const unsigned int max98088_micboost_tlv[] = {
+       TLV_DB_RANGE_HEAD(2),
+       0, 1, TLV_DB_SCALE_ITEM(0, 2000, 0),
+       2, 2, TLV_DB_SCALE_ITEM(3000, 0, 0),
+};
+
+static const struct snd_kcontrol_new max98088_snd_controls[] = {
+
+       SOC_DOUBLE_R("Headphone Volume", M98088_REG_39_LVL_HP_L,
+               M98088_REG_3A_LVL_HP_R, 0, 31, 0),
+       SOC_DOUBLE_R("Speaker Volume", M98088_REG_3D_LVL_SPK_L,
+               M98088_REG_3E_LVL_SPK_R, 0, 31, 0),
+       SOC_DOUBLE_R("Receiver Volume", M98088_REG_3B_LVL_REC_L,
+               M98088_REG_3C_LVL_REC_R, 0, 31, 0),
+
+       SOC_DOUBLE_R("Headphone Switch", M98088_REG_39_LVL_HP_L,
+               M98088_REG_3A_LVL_HP_R, 7, 1, 1),
+       SOC_DOUBLE_R("Speaker Switch", M98088_REG_3D_LVL_SPK_L,
+               M98088_REG_3E_LVL_SPK_R, 7, 1, 1),
+       SOC_DOUBLE_R("Receiver Switch", M98088_REG_3B_LVL_REC_L,
+               M98088_REG_3C_LVL_REC_R, 7, 1, 1),
+
+       SOC_SINGLE("MIC1 Volume", M98088_REG_35_LVL_MIC1, 0, 31, 1),
+       SOC_SINGLE("MIC2 Volume", M98088_REG_36_LVL_MIC2, 0, 31, 1),
+
+       SOC_SINGLE_EXT_TLV("MIC1 Boost Volume",
+                       M98088_REG_35_LVL_MIC1, 5, 2, 0,
+                       max98088_mic1pre_get, max98088_mic1pre_set,
+                       max98088_micboost_tlv),
+       SOC_SINGLE_EXT_TLV("MIC2 Boost Volume",
+                       M98088_REG_36_LVL_MIC2, 5, 2, 0,
+                       max98088_mic2pre_get, max98088_mic2pre_set,
+                       max98088_micboost_tlv),
+
+       SOC_SINGLE("INA Volume", M98088_REG_37_LVL_INA, 0, 7, 1),
+       SOC_SINGLE("INB Volume", M98088_REG_38_LVL_INB, 0, 7, 1),
+
+       SOC_SINGLE("ADCL Volume", M98088_REG_33_LVL_ADC_L, 0, 15, 0),
+       SOC_SINGLE("ADCR Volume", M98088_REG_34_LVL_ADC_R, 0, 15, 0),
+
+       SOC_SINGLE("ADCL Boost Volume", M98088_REG_33_LVL_ADC_L, 4, 3, 0),
+       SOC_SINGLE("ADCR Boost Volume", M98088_REG_34_LVL_ADC_R, 4, 3, 0),
+
+       SOC_SINGLE("EQ1 Switch", M98088_REG_49_CFG_LEVEL, 0, 1, 0),
+       SOC_SINGLE("EQ2 Switch", M98088_REG_49_CFG_LEVEL, 1, 1, 0),
+
+       SOC_ENUM("EX Limiter Threshold", max98088_ex_thresh_enum),
+
+       SOC_ENUM("DAI1 Filter Mode", max98088_filter_mode_enum),
+       SOC_ENUM("DAI1 DAC Filter", max98088_dai1_dac_filter_enum),
+       SOC_ENUM("DAI1 ADC Filter", max98088_dai1_adc_filter_enum),
+       SOC_SINGLE("DAI2 DC Block Switch", M98088_REG_20_DAI2_FILTERS,
+               0, 1, 0),
+
+       SOC_SINGLE("ALC Switch", M98088_REG_43_SPKALC_COMP, 7, 1, 0),
+       SOC_SINGLE("ALC Threshold", M98088_REG_43_SPKALC_COMP, 0, 7, 0),
+       SOC_SINGLE("ALC Multiband", M98088_REG_43_SPKALC_COMP, 3, 1, 0),
+       SOC_SINGLE("ALC Release Time", M98088_REG_43_SPKALC_COMP, 4, 7, 0),
+
+       SOC_SINGLE("PWR Limiter Threshold", M98088_REG_44_PWRLMT_CFG,
+               4, 15, 0),
+       SOC_SINGLE("PWR Limiter Weight", M98088_REG_44_PWRLMT_CFG, 0, 7, 0),
+       SOC_SINGLE("PWR Limiter Time1", M98088_REG_45_PWRLMT_TIME, 0, 15, 0),
+       SOC_SINGLE("PWR Limiter Time2", M98088_REG_45_PWRLMT_TIME, 4, 15, 0),
+
+       SOC_SINGLE("THD Limiter Threshold", M98088_REG_46_THDLMT_CFG, 4, 15, 0),
+       SOC_SINGLE("THD Limiter Time", M98088_REG_46_THDLMT_CFG, 0, 7, 0),
+};
+
+/* Left speaker mixer switch */
+static const struct snd_kcontrol_new max98088_left_speaker_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_2B_MIX_SPK_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_2B_MIX_SPK_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_2B_MIX_SPK_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_2B_MIX_SPK_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_2B_MIX_SPK_LEFT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_2B_MIX_SPK_LEFT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_2B_MIX_SPK_LEFT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_2B_MIX_SPK_LEFT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_2B_MIX_SPK_LEFT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_2B_MIX_SPK_LEFT, 4, 1, 0),
+};
+
+/* Right speaker mixer switch */
+static const struct snd_kcontrol_new max98088_right_speaker_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_2C_MIX_SPK_RIGHT, 4, 1, 0),
+};
+
+/* Left headphone mixer switch */
+static const struct snd_kcontrol_new max98088_left_hp_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_25_MIX_HP_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_25_MIX_HP_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_25_MIX_HP_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_25_MIX_HP_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_25_MIX_HP_LEFT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_25_MIX_HP_LEFT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_25_MIX_HP_LEFT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_25_MIX_HP_LEFT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_25_MIX_HP_LEFT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_25_MIX_HP_LEFT, 4, 1, 0),
+};
+
+/* Right headphone mixer switch */
+static const struct snd_kcontrol_new max98088_right_hp_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_26_MIX_HP_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_26_MIX_HP_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_26_MIX_HP_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_26_MIX_HP_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_26_MIX_HP_RIGHT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_26_MIX_HP_RIGHT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_26_MIX_HP_RIGHT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_26_MIX_HP_RIGHT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_26_MIX_HP_RIGHT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_26_MIX_HP_RIGHT, 4, 1, 0),
+};
+
+/* Left earpiece/receiver mixer switch */
+static const struct snd_kcontrol_new max98088_left_rec_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_28_MIX_REC_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_28_MIX_REC_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_28_MIX_REC_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_28_MIX_REC_LEFT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_28_MIX_REC_LEFT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_28_MIX_REC_LEFT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_28_MIX_REC_LEFT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_28_MIX_REC_LEFT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_28_MIX_REC_LEFT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_28_MIX_REC_LEFT, 4, 1, 0),
+};
+
+/* Right earpiece/receiver mixer switch */
+static const struct snd_kcontrol_new max98088_right_rec_mixer_controls[] = {
+       SOC_DAPM_SINGLE("Left DAC1 Switch", M98088_REG_29_MIX_REC_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC1 Switch", M98088_REG_29_MIX_REC_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("Left DAC2 Switch", M98088_REG_29_MIX_REC_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("Right DAC2 Switch", M98088_REG_29_MIX_REC_RIGHT, 0, 1, 0),
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_29_MIX_REC_RIGHT, 5, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_29_MIX_REC_RIGHT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_29_MIX_REC_RIGHT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_29_MIX_REC_RIGHT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_29_MIX_REC_RIGHT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_29_MIX_REC_RIGHT, 4, 1, 0),
+};
+
+/* Left ADC mixer switch */
+static const struct snd_kcontrol_new max98088_left_ADC_mixer_controls[] = {
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_23_MIX_ADC_LEFT, 7, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_23_MIX_ADC_LEFT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_23_MIX_ADC_LEFT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_23_MIX_ADC_LEFT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_23_MIX_ADC_LEFT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_23_MIX_ADC_LEFT, 0, 1, 0),
+};
+
+/* Right ADC mixer switch */
+static const struct snd_kcontrol_new max98088_right_ADC_mixer_controls[] = {
+       SOC_DAPM_SINGLE("MIC1 Switch", M98088_REG_24_MIX_ADC_RIGHT, 7, 1, 0),
+       SOC_DAPM_SINGLE("MIC2 Switch", M98088_REG_24_MIX_ADC_RIGHT, 6, 1, 0),
+       SOC_DAPM_SINGLE("INA1 Switch", M98088_REG_24_MIX_ADC_RIGHT, 3, 1, 0),
+       SOC_DAPM_SINGLE("INA2 Switch", M98088_REG_24_MIX_ADC_RIGHT, 2, 1, 0),
+       SOC_DAPM_SINGLE("INB1 Switch", M98088_REG_24_MIX_ADC_RIGHT, 1, 1, 0),
+       SOC_DAPM_SINGLE("INB2 Switch", M98088_REG_24_MIX_ADC_RIGHT, 0, 1, 0),
+};
+
+static int max98088_mic_event(struct snd_soc_dapm_widget *w,
+                            struct snd_kcontrol *kcontrol, int event)
+{
+       struct snd_soc_codec *codec = w->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+
+       switch (event) {
+       case SND_SOC_DAPM_POST_PMU:
+               if (w->reg == M98088_REG_35_LVL_MIC1) {
+                       snd_soc_update_bits(codec, w->reg, M98088_MICPRE_MASK,
+                               (1+max98088->mic1pre)<<M98088_MICPRE_SHIFT);
+               } else {
+                       snd_soc_update_bits(codec, w->reg, M98088_MICPRE_MASK,
+                               (1+max98088->mic2pre)<<M98088_MICPRE_SHIFT);
+               }
+               break;
+       case SND_SOC_DAPM_POST_PMD:
+               snd_soc_update_bits(codec, w->reg, M98088_MICPRE_MASK, 0);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * The line inputs are 2-channel stereo inputs with the left
+ * and right channels sharing a common PGA power control signal.
+ */
+static int max98088_line_pga(struct snd_soc_dapm_widget *w,
+                            int event, int line, u8 channel)
+{
+       struct snd_soc_codec *codec = w->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       u8 *state;
+
+       BUG_ON(!((channel == 1) || (channel == 2)));
+
+       switch (line) {
+       case LINE_INA:
+               state = &max98088->ina_state;
+               break;
+       case LINE_INB:
+               state = &max98088->inb_state;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       switch (event) {
+       case SND_SOC_DAPM_POST_PMU:
+               *state |= channel;
+               snd_soc_update_bits(codec, w->reg,
+                       (1 << w->shift), (1 << w->shift));
+               break;
+       case SND_SOC_DAPM_POST_PMD:
+               *state &= ~channel;
+               if (*state == 0) {
+                       snd_soc_update_bits(codec, w->reg,
+                               (1 << w->shift), 0);
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int max98088_pga_ina1_event(struct snd_soc_dapm_widget *w,
+                                  struct snd_kcontrol *k, int event)
+{
+       return max98088_line_pga(w, event, LINE_INA, 1);
+}
+
+static int max98088_pga_ina2_event(struct snd_soc_dapm_widget *w,
+                                  struct snd_kcontrol *k, int event)
+{
+       return max98088_line_pga(w, event, LINE_INA, 2);
+}
+
+static int max98088_pga_inb1_event(struct snd_soc_dapm_widget *w,
+                                  struct snd_kcontrol *k, int event)
+{
+       return max98088_line_pga(w, event, LINE_INB, 1);
+}
+
+static int max98088_pga_inb2_event(struct snd_soc_dapm_widget *w,
+                                  struct snd_kcontrol *k, int event)
+{
+       return max98088_line_pga(w, event, LINE_INB, 2);
+}
+
+static const struct snd_soc_dapm_widget max98088_dapm_widgets[] = {
+
+       SND_SOC_DAPM_ADC("ADCL", "HiFi Capture", M98088_REG_4C_PWR_EN_IN, 1, 0),
+       SND_SOC_DAPM_ADC("ADCR", "HiFi Capture", M98088_REG_4C_PWR_EN_IN, 0, 0),
+
+       SND_SOC_DAPM_DAC("DACL1", "HiFi Playback",
+               M98088_REG_4D_PWR_EN_OUT, 1, 0),
+       SND_SOC_DAPM_DAC("DACR1", "HiFi Playback",
+               M98088_REG_4D_PWR_EN_OUT, 0, 0),
+       SND_SOC_DAPM_DAC("DACL2", "Aux Playback",
+               M98088_REG_4D_PWR_EN_OUT, 1, 0),
+       SND_SOC_DAPM_DAC("DACR2", "Aux Playback",
+               M98088_REG_4D_PWR_EN_OUT, 0, 0),
+
+       SND_SOC_DAPM_PGA("HP Left Out", M98088_REG_4D_PWR_EN_OUT,
+               7, 0, NULL, 0),
+       SND_SOC_DAPM_PGA("HP Right Out", M98088_REG_4D_PWR_EN_OUT,
+               6, 0, NULL, 0),
+
+       SND_SOC_DAPM_PGA("SPK Left Out", M98088_REG_4D_PWR_EN_OUT,
+               5, 0, NULL, 0),
+       SND_SOC_DAPM_PGA("SPK Right Out", M98088_REG_4D_PWR_EN_OUT,
+               4, 0, NULL, 0),
+
+       SND_SOC_DAPM_PGA("REC Left Out", M98088_REG_4D_PWR_EN_OUT,
+               3, 0, NULL, 0),
+       SND_SOC_DAPM_PGA("REC Right Out", M98088_REG_4D_PWR_EN_OUT,
+               2, 0, NULL, 0),
+
+       SND_SOC_DAPM_MUX("External MIC", SND_SOC_NOPM, 0, 0,
+               &max98088_extmic_mux),
+
+       SND_SOC_DAPM_MIXER("Left HP Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_left_hp_mixer_controls[0],
+               ARRAY_SIZE(max98088_left_hp_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Right HP Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_right_hp_mixer_controls[0],
+               ARRAY_SIZE(max98088_right_hp_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Left SPK Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_left_speaker_mixer_controls[0],
+               ARRAY_SIZE(max98088_left_speaker_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Right SPK Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_right_speaker_mixer_controls[0],
+               ARRAY_SIZE(max98088_right_speaker_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Left REC Mixer", SND_SOC_NOPM, 0, 0,
+         &max98088_left_rec_mixer_controls[0],
+               ARRAY_SIZE(max98088_left_rec_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Right REC Mixer", SND_SOC_NOPM, 0, 0,
+         &max98088_right_rec_mixer_controls[0],
+               ARRAY_SIZE(max98088_right_rec_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Left ADC Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_left_ADC_mixer_controls[0],
+               ARRAY_SIZE(max98088_left_ADC_mixer_controls)),
+
+       SND_SOC_DAPM_MIXER("Right ADC Mixer", SND_SOC_NOPM, 0, 0,
+               &max98088_right_ADC_mixer_controls[0],
+               ARRAY_SIZE(max98088_right_ADC_mixer_controls)),
+
+       SND_SOC_DAPM_PGA_E("MIC1 Input", M98088_REG_35_LVL_MIC1,
+               5, 0, NULL, 0, max98088_mic_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_PGA_E("MIC2 Input", M98088_REG_36_LVL_MIC2,
+               5, 0, NULL, 0, max98088_mic_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_PGA_E("INA1 Input", M98088_REG_4C_PWR_EN_IN,
+               7, 0, NULL, 0, max98088_pga_ina1_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_PGA_E("INA2 Input", M98088_REG_4C_PWR_EN_IN,
+               7, 0, NULL, 0, max98088_pga_ina2_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_PGA_E("INB1 Input", M98088_REG_4C_PWR_EN_IN,
+               6, 0, NULL, 0, max98088_pga_inb1_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_PGA_E("INB2 Input", M98088_REG_4C_PWR_EN_IN,
+               6, 0, NULL, 0, max98088_pga_inb2_event,
+               SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
+
+       SND_SOC_DAPM_MICBIAS("MICBIAS", M98088_REG_4C_PWR_EN_IN, 3, 0),
+
+       SND_SOC_DAPM_MUX("EX Limiter Mode", SND_SOC_NOPM, 0, 0,
+               &max98088_exmode_controls),
+
+       SND_SOC_DAPM_OUTPUT("HPL"),
+       SND_SOC_DAPM_OUTPUT("HPR"),
+       SND_SOC_DAPM_OUTPUT("SPKL"),
+       SND_SOC_DAPM_OUTPUT("SPKR"),
+       SND_SOC_DAPM_OUTPUT("RECL"),
+       SND_SOC_DAPM_OUTPUT("RECR"),
+
+       SND_SOC_DAPM_INPUT("MIC1"),
+       SND_SOC_DAPM_INPUT("MIC2"),
+       SND_SOC_DAPM_INPUT("INA1"),
+       SND_SOC_DAPM_INPUT("INA2"),
+       SND_SOC_DAPM_INPUT("INB1"),
+       SND_SOC_DAPM_INPUT("INB2"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+       /* Left headphone output mixer */
+       {"Left HP Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Left HP Mixer", "Left DAC2 Switch", "DACL2"},
+       {"Left HP Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Left HP Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Left HP Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Left HP Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Left HP Mixer", "INA1 Switch", "INA1 Input"},
+       {"Left HP Mixer", "INA2 Switch", "INA2 Input"},
+       {"Left HP Mixer", "INB1 Switch", "INB1 Input"},
+       {"Left HP Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Right headphone output mixer */
+       {"Right HP Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Right HP Mixer", "Left DAC2 Switch", "DACL2"  },
+       {"Right HP Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Right HP Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Right HP Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Right HP Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Right HP Mixer", "INA1 Switch", "INA1 Input"},
+       {"Right HP Mixer", "INA2 Switch", "INA2 Input"},
+       {"Right HP Mixer", "INB1 Switch", "INB1 Input"},
+       {"Right HP Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Left speaker output mixer */
+       {"Left SPK Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Left SPK Mixer", "Left DAC2 Switch", "DACL2"},
+       {"Left SPK Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Left SPK Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Left SPK Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Left SPK Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Left SPK Mixer", "INA1 Switch", "INA1 Input"},
+       {"Left SPK Mixer", "INA2 Switch", "INA2 Input"},
+       {"Left SPK Mixer", "INB1 Switch", "INB1 Input"},
+       {"Left SPK Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Right speaker output mixer */
+       {"Right SPK Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Right SPK Mixer", "Left DAC2 Switch", "DACL2"},
+       {"Right SPK Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Right SPK Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Right SPK Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Right SPK Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Right SPK Mixer", "INA1 Switch", "INA1 Input"},
+       {"Right SPK Mixer", "INA2 Switch", "INA2 Input"},
+       {"Right SPK Mixer", "INB1 Switch", "INB1 Input"},
+       {"Right SPK Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Earpiece/Receiver output mixer */
+       {"Left REC Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Left REC Mixer", "Left DAC2 Switch", "DACL2"},
+       {"Left REC Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Left REC Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Left REC Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Left REC Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Left REC Mixer", "INA1 Switch", "INA1 Input"},
+       {"Left REC Mixer", "INA2 Switch", "INA2 Input"},
+       {"Left REC Mixer", "INB1 Switch", "INB1 Input"},
+       {"Left REC Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Earpiece/Receiver output mixer */
+       {"Right REC Mixer", "Left DAC1 Switch", "DACL1"},
+       {"Right REC Mixer", "Left DAC2 Switch", "DACL2"},
+       {"Right REC Mixer", "Right DAC1 Switch", "DACR1"},
+       {"Right REC Mixer", "Right DAC2 Switch", "DACR2"},
+       {"Right REC Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Right REC Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Right REC Mixer", "INA1 Switch", "INA1 Input"},
+       {"Right REC Mixer", "INA2 Switch", "INA2 Input"},
+       {"Right REC Mixer", "INB1 Switch", "INB1 Input"},
+       {"Right REC Mixer", "INB2 Switch", "INB2 Input"},
+
+       {"HP Left Out", NULL, "Left HP Mixer"},
+       {"HP Right Out", NULL, "Right HP Mixer"},
+       {"SPK Left Out", NULL, "Left SPK Mixer"},
+       {"SPK Right Out", NULL, "Right SPK Mixer"},
+       {"REC Left Out", NULL, "Left REC Mixer"},
+       {"REC Right Out", NULL, "Right REC Mixer"},
+
+       {"HPL", NULL, "HP Left Out"},
+       {"HPR", NULL, "HP Right Out"},
+       {"SPKL", NULL, "SPK Left Out"},
+       {"SPKR", NULL, "SPK Right Out"},
+       {"RECL", NULL, "REC Left Out"},
+       {"RECR", NULL, "REC Right Out"},
+
+       /* Left ADC input mixer */
+       {"Left ADC Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Left ADC Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Left ADC Mixer", "INA1 Switch", "INA1 Input"},
+       {"Left ADC Mixer", "INA2 Switch", "INA2 Input"},
+       {"Left ADC Mixer", "INB1 Switch", "INB1 Input"},
+       {"Left ADC Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Right ADC input mixer */
+       {"Right ADC Mixer", "MIC1 Switch", "MIC1 Input"},
+       {"Right ADC Mixer", "MIC2 Switch", "MIC2 Input"},
+       {"Right ADC Mixer", "INA1 Switch", "INA1 Input"},
+       {"Right ADC Mixer", "INA2 Switch", "INA2 Input"},
+       {"Right ADC Mixer", "INB1 Switch", "INB1 Input"},
+       {"Right ADC Mixer", "INB2 Switch", "INB2 Input"},
+
+       /* Inputs */
+       {"ADCL", NULL, "Left ADC Mixer"},
+       {"ADCR", NULL, "Right ADC Mixer"},
+       {"INA1 Input", NULL, "INA1"},
+       {"INA2 Input", NULL, "INA2"},
+       {"INB1 Input", NULL, "INB1"},
+       {"INB2 Input", NULL, "INB2"},
+       {"MIC1 Input", NULL, "MIC1"},
+       {"MIC2 Input", NULL, "MIC2"},
+};
+
+static int max98088_add_widgets(struct snd_soc_codec *codec)
+{
+       snd_soc_dapm_new_controls(codec, max98088_dapm_widgets,
+                                 ARRAY_SIZE(max98088_dapm_widgets));
+
+       snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+
+       snd_soc_add_controls(codec, max98088_snd_controls,
+                            ARRAY_SIZE(max98088_snd_controls));
+
+       snd_soc_dapm_new_widgets(codec);
+       return 0;
+}
+
+/* codec mclk clock divider coefficients */
+static const struct {
+       u32 rate;
+       u8  sr;
+} rate_table[] = {
+       {8000,  0x10},
+       {11025, 0x20},
+       {16000, 0x30},
+       {22050, 0x40},
+       {24000, 0x50},
+       {32000, 0x60},
+       {44100, 0x70},
+       {48000, 0x80},
+       {88200, 0x90},
+       {96000, 0xA0},
+};
+
+static inline int rate_value(int rate, u8 *value)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(rate_table); i++) {
+               if (rate_table[i].rate >= rate) {
+                       *value = rate_table[i].sr;
+                       return 0;
+               }
+       }
+       *value = rate_table[0].sr;
+       return -EINVAL;
+}
+
+static int max98088_dai1_hw_params(struct snd_pcm_substream *substream,
+                                  struct snd_pcm_hw_params *params,
+                                  struct snd_soc_dai *dai)
+{
+       struct snd_soc_codec *codec = dai->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_cdata *cdata;
+       unsigned long long ni;
+       unsigned int rate;
+       u8 regval;
+
+       cdata = &max98088->dai[0];
+
+       rate = params_rate(params);
+
+       switch (params_format(params)) {
+       case SNDRV_PCM_FORMAT_S16_LE:
+               snd_soc_update_bits(codec, M98088_REG_14_DAI1_FORMAT,
+                       M98088_DAI_WS, 0);
+               break;
+       case SNDRV_PCM_FORMAT_S24_LE:
+               snd_soc_update_bits(codec, M98088_REG_14_DAI1_FORMAT,
+                       M98088_DAI_WS, M98088_DAI_WS);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS, M98088_SHDNRUN, 0);
+
+       if (rate_value(rate, &regval))
+               return -EINVAL;
+
+       snd_soc_update_bits(codec, M98088_REG_11_DAI1_CLKMODE,
+               M98088_CLKMODE_MASK, regval);
+       cdata->rate = rate;
+
+       /* Configure NI when operating as master */
+       if (snd_soc_read(codec, M98088_REG_14_DAI1_FORMAT)
+               & M98088_DAI_MAS) {
+               if (max98088->sysclk == 0) {
+                       dev_err(codec->dev, "Invalid system clock frequency\n");
+                       return -EINVAL;
+               }
+               ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL)
+                               * (unsigned long long int)rate;
+               do_div(ni, (unsigned long long int)max98088->sysclk);
+               snd_soc_write(codec, M98088_REG_12_DAI1_CLKCFG_HI,
+                       (ni >> 8) & 0x7F);
+               snd_soc_write(codec, M98088_REG_13_DAI1_CLKCFG_LO,
+                       ni & 0xFF);
+       }
+
+       /* Update sample rate mode */
+       if (rate < 50000)
+               snd_soc_update_bits(codec, M98088_REG_18_DAI1_FILTERS,
+                       M98088_DAI_DHF, 0);
+       else
+               snd_soc_update_bits(codec, M98088_REG_18_DAI1_FILTERS,
+                       M98088_DAI_DHF, M98088_DAI_DHF);
+
+       snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS, M98088_SHDNRUN,
+               M98088_SHDNRUN);
+
+       return 0;
+}
+
+static int max98088_dai2_hw_params(struct snd_pcm_substream *substream,
+                                  struct snd_pcm_hw_params *params,
+                                  struct snd_soc_dai *dai)
+{
+       struct snd_soc_codec *codec = dai->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_cdata *cdata;
+       unsigned long long ni;
+       unsigned int rate;
+       u8 regval;
+
+       cdata = &max98088->dai[1];
+
+       rate = params_rate(params);
+
+       switch (params_format(params)) {
+       case SNDRV_PCM_FORMAT_S16_LE:
+               snd_soc_update_bits(codec, M98088_REG_1C_DAI2_FORMAT,
+                       M98088_DAI_WS, 0);
+               break;
+       case SNDRV_PCM_FORMAT_S24_LE:
+               snd_soc_update_bits(codec, M98088_REG_1C_DAI2_FORMAT,
+                       M98088_DAI_WS, M98088_DAI_WS);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS, M98088_SHDNRUN, 0);
+
+       if (rate_value(rate, &regval))
+               return -EINVAL;
+
+       snd_soc_update_bits(codec, M98088_REG_19_DAI2_CLKMODE,
+               M98088_CLKMODE_MASK, regval);
+       cdata->rate = rate;
+
+       /* Configure NI when operating as master */
+       if (snd_soc_read(codec, M98088_REG_1C_DAI2_FORMAT)
+               & M98088_DAI_MAS) {
+               if (max98088->sysclk == 0) {
+                       dev_err(codec->dev, "Invalid system clock frequency\n");
+                       return -EINVAL;
+               }
+               ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL)
+                               * (unsigned long long int)rate;
+               do_div(ni, (unsigned long long int)max98088->sysclk);
+               snd_soc_write(codec, M98088_REG_1A_DAI2_CLKCFG_HI,
+                       (ni >> 8) & 0x7F);
+               snd_soc_write(codec, M98088_REG_1B_DAI2_CLKCFG_LO,
+                       ni & 0xFF);
+       }
+
+       /* Update sample rate mode */
+       if (rate < 50000)
+               snd_soc_update_bits(codec, M98088_REG_20_DAI2_FILTERS,
+                       M98088_DAI_DHF, 0);
+       else
+               snd_soc_update_bits(codec, M98088_REG_20_DAI2_FILTERS,
+                       M98088_DAI_DHF, M98088_DAI_DHF);
+
+       snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS, M98088_SHDNRUN,
+               M98088_SHDNRUN);
+
+       return 0;
+}
+
+static int max98088_dai_set_sysclk(struct snd_soc_dai *dai,
+                                  int clk_id, unsigned int freq, int dir)
+{
+       struct snd_soc_codec *codec = dai->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+
+       /* Requested clock frequency is already setup */
+       if (freq == max98088->sysclk)
+               return 0;
+
+       max98088->sysclk = freq; /* remember current sysclk */
+
+       /* Setup clocks for slave mode, and using the PLL
+        * PSCLK = 0x01 (when master clk is 10MHz to 20MHz)
+        *         0x02 (when master clk is 20MHz to 30MHz)..
+        */
+       if ((freq >= 10000000) && (freq < 20000000)) {
+               snd_soc_write(codec, M98088_REG_10_SYS_CLK, 0x10);
+       } else if ((freq >= 20000000) && (freq < 30000000)) {
+               snd_soc_write(codec, M98088_REG_10_SYS_CLK, 0x20);
+       } else {
+               dev_err(codec->dev, "Invalid master clock frequency\n");
+               return -EINVAL;
+       }
+
+       if (snd_soc_read(codec, M98088_REG_51_PWR_SYS)  & M98088_SHDNRUN) {
+               snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS,
+                       M98088_SHDNRUN, 0);
+               snd_soc_update_bits(codec, M98088_REG_51_PWR_SYS,
+                       M98088_SHDNRUN, M98088_SHDNRUN);
+       }
+
+       dev_dbg(dai->dev, "Clock source is %d at %uHz\n", clk_id, freq);
+
+       max98088->sysclk = freq;
+       return 0;
+}
+
+static int max98088_dai1_set_fmt(struct snd_soc_dai *codec_dai,
+                                unsigned int fmt)
+{
+       struct snd_soc_codec *codec = codec_dai->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_cdata *cdata;
+       u8 reg15val;
+       u8 reg14val = 0;
+
+       cdata = &max98088->dai[0];
+
+       if (fmt != cdata->fmt) {
+               cdata->fmt = fmt;
+
+               switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+               case SND_SOC_DAIFMT_CBS_CFS:
+                       /* Slave mode PLL */
+                       snd_soc_write(codec, M98088_REG_12_DAI1_CLKCFG_HI,
+                               0x80);
+                       snd_soc_write(codec, M98088_REG_13_DAI1_CLKCFG_LO,
+                               0x00);
+                       break;
+               case SND_SOC_DAIFMT_CBM_CFM:
+                       /* Set to master mode */
+                       reg14val |= M98088_DAI_MAS;
+                       break;
+               case SND_SOC_DAIFMT_CBS_CFM:
+               case SND_SOC_DAIFMT_CBM_CFS:
+               default:
+                       dev_err(codec->dev, "Clock mode unsupported");
+                       return -EINVAL;
+               }
+
+               switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+               case SND_SOC_DAIFMT_I2S:
+                       reg14val |= M98088_DAI_DLY;
+                       break;
+               case SND_SOC_DAIFMT_LEFT_J:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+               case SND_SOC_DAIFMT_NB_NF:
+                       break;
+               case SND_SOC_DAIFMT_NB_IF:
+                       reg14val |= M98088_DAI_WCI;
+                       break;
+               case SND_SOC_DAIFMT_IB_NF:
+                       reg14val |= M98088_DAI_BCI;
+                       break;
+               case SND_SOC_DAIFMT_IB_IF:
+                       reg14val |= M98088_DAI_BCI|M98088_DAI_WCI;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               snd_soc_update_bits(codec, M98088_REG_14_DAI1_FORMAT,
+                       M98088_DAI_MAS | M98088_DAI_DLY | M98088_DAI_BCI |
+                       M98088_DAI_WCI, reg14val);
+
+               reg15val = M98088_DAI_BSEL64;
+               if (max98088->digmic)
+                       reg15val |= M98088_DAI_OSR64;
+               snd_soc_write(codec, M98088_REG_15_DAI1_CLOCK, reg15val);
+       }
+
+       return 0;
+}
+
+static int max98088_dai2_set_fmt(struct snd_soc_dai *codec_dai,
+                                unsigned int fmt)
+{
+       struct snd_soc_codec *codec = codec_dai->codec;
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_cdata *cdata;
+       u8 reg1Cval = 0;
+
+       cdata = &max98088->dai[1];
+
+       if (fmt != cdata->fmt) {
+               cdata->fmt = fmt;
+
+               switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+               case SND_SOC_DAIFMT_CBS_CFS:
+                       /* Slave mode PLL */
+                       snd_soc_write(codec, M98088_REG_1A_DAI2_CLKCFG_HI,
+                               0x80);
+                       snd_soc_write(codec, M98088_REG_1B_DAI2_CLKCFG_LO,
+                               0x00);
+                       break;
+               case SND_SOC_DAIFMT_CBM_CFM:
+                       /* Set to master mode */
+                       reg1Cval |= M98088_DAI_MAS;
+                       break;
+               case SND_SOC_DAIFMT_CBS_CFM:
+               case SND_SOC_DAIFMT_CBM_CFS:
+               default:
+                       dev_err(codec->dev, "Clock mode unsupported");
+                       return -EINVAL;
+               }
+
+               switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+               case SND_SOC_DAIFMT_I2S:
+                       reg1Cval |= M98088_DAI_DLY;
+                       break;
+               case SND_SOC_DAIFMT_LEFT_J:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+               case SND_SOC_DAIFMT_NB_NF:
+                       break;
+               case SND_SOC_DAIFMT_NB_IF:
+                       reg1Cval |= M98088_DAI_WCI;
+                       break;
+               case SND_SOC_DAIFMT_IB_NF:
+                       reg1Cval |= M98088_DAI_BCI;
+                       break;
+               case SND_SOC_DAIFMT_IB_IF:
+                       reg1Cval |= M98088_DAI_BCI|M98088_DAI_WCI;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               snd_soc_update_bits(codec, M98088_REG_1C_DAI2_FORMAT,
+                       M98088_DAI_MAS | M98088_DAI_DLY | M98088_DAI_BCI |
+                       M98088_DAI_WCI, reg1Cval);
+
+               snd_soc_write(codec, M98088_REG_1D_DAI2_CLOCK,
+                       M98088_DAI_BSEL64);
+       }
+
+       return 0;
+}
+
+static void max98088_sync_cache(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       int i;
+
+       if (!codec->cache_sync)
+               return;
+
+       codec->cache_only = 0;
+
+       /* write back cached values if they're writeable and
+        * different from the hardware default.
+        */
+       for (i = 1; i < ARRAY_SIZE(max98088->reg_cache); i++) {
+               if (!max98088_access[i].writable)
+                       continue;
+
+               if (max98088->reg_cache[i] == max98088_reg[i])
+                       continue;
+
+               snd_soc_write(codec, i, max98088->reg_cache[i]);
+       }
+
+       codec->cache_sync = 0;
+}
+
+static int max98088_set_bias_level(struct snd_soc_codec *codec,
+                                  enum snd_soc_bias_level level)
+{
+       switch (level) {
+       case SND_SOC_BIAS_ON:
+               break;
+
+       case SND_SOC_BIAS_PREPARE:
+               break;
+
+       case SND_SOC_BIAS_STANDBY:
+               if (codec->bias_level == SND_SOC_BIAS_OFF)
+                       max98088_sync_cache(codec);
+
+               snd_soc_update_bits(codec, M98088_REG_4C_PWR_EN_IN,
+                               M98088_MBEN, M98088_MBEN);
+               break;
+
+       case SND_SOC_BIAS_OFF:
+               snd_soc_update_bits(codec, M98088_REG_4C_PWR_EN_IN,
+                               M98088_MBEN, 0);
+               codec->cache_sync = 1;
+               break;
+       }
+       codec->bias_level = level;
+       return 0;
+}
+
+#define MAX98088_RATES SNDRV_PCM_RATE_8000_96000
+#define MAX98088_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
+
+static struct snd_soc_dai_ops max98088_dai1_ops = {
+       .set_sysclk = max98088_dai_set_sysclk,
+       .set_fmt = max98088_dai1_set_fmt,
+       .hw_params = max98088_dai1_hw_params,
+};
+
+static struct snd_soc_dai_ops max98088_dai2_ops = {
+       .set_sysclk = max98088_dai_set_sysclk,
+       .set_fmt = max98088_dai2_set_fmt,
+       .hw_params = max98088_dai2_hw_params,
+};
+
+static struct snd_soc_dai_driver max98088_dai[] = {
+{
+       .name = "HiFi",
+       .playback = {
+               .stream_name = "HiFi Playback",
+               .channels_min = 1,
+               .channels_max = 2,
+               .rates = MAX98088_RATES,
+               .formats = MAX98088_FORMATS,
+       },
+       .capture = {
+               .stream_name = "HiFi Capture",
+               .channels_min = 1,
+               .channels_max = 2,
+               .rates = MAX98088_RATES,
+               .formats = MAX98088_FORMATS,
+       },
+        .ops = &max98088_dai1_ops,
+},
+{
+       .name = "Aux",
+       .playback = {
+               .stream_name = "Aux Playback",
+               .channels_min = 1,
+               .channels_max = 2,
+               .rates = MAX98088_RATES,
+               .formats = MAX98088_FORMATS,
+       },
+       .ops = &max98088_dai2_ops,
+}
+};
+
+static int max98088_get_channel(const char *name)
+{
+       if (strcmp(name, "EQ1 Mode") == 0)
+               return 0;
+       if (strcmp(name, "EQ2 Mode") == 0)
+               return 1;
+       return -EINVAL;
+}
+
+static void max98088_setup_eq1(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_pdata *pdata = max98088->pdata;
+       struct max98088_eq_cfg *coef_set;
+       int best, best_val, save, i, sel, fs;
+       struct max98088_cdata *cdata;
+
+       cdata = &max98088->dai[0];
+
+       if (!pdata || !max98088->eq_textcnt)
+               return;
+
+       /* Find the selected configuration with nearest sample rate */
+       fs = cdata->rate;
+       sel = cdata->eq_sel;
+
+       best = 0;
+       best_val = INT_MAX;
+       for (i = 0; i < pdata->eq_cfgcnt; i++) {
+               if (strcmp(pdata->eq_cfg[i].name, max98088->eq_texts[sel]) == 0 &&
+                   abs(pdata->eq_cfg[i].rate - fs) < best_val) {
+                       best = i;
+                       best_val = abs(pdata->eq_cfg[i].rate - fs);
+               }
+       }
+
+       dev_dbg(codec->dev, "Selected %s/%dHz for %dHz sample rate\n",
+               pdata->eq_cfg[best].name,
+               pdata->eq_cfg[best].rate, fs);
+
+       /* Disable EQ while configuring, and save current on/off state */
+       save = snd_soc_read(codec, M98088_REG_49_CFG_LEVEL);
+       snd_soc_update_bits(codec, M98088_REG_49_CFG_LEVEL, M98088_EQ1EN, 0);
+
+       coef_set = &pdata->eq_cfg[sel];
+
+       m98088_eq_band(codec, 0, 0, coef_set->band1);
+       m98088_eq_band(codec, 0, 1, coef_set->band2);
+       m98088_eq_band(codec, 0, 2, coef_set->band3);
+       m98088_eq_band(codec, 0, 3, coef_set->band4);
+       m98088_eq_band(codec, 0, 4, coef_set->band5);
+
+       /* Restore the original on/off state */
+       snd_soc_update_bits(codec, M98088_REG_49_CFG_LEVEL, M98088_EQ1EN, save);
+}
+
+static void max98088_setup_eq2(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_pdata *pdata = max98088->pdata;
+       struct max98088_eq_cfg *coef_set;
+       int best, best_val, save, i, sel, fs;
+       struct max98088_cdata *cdata;
+
+       cdata = &max98088->dai[1];
+
+       if (!pdata || !max98088->eq_textcnt)
+               return;
+
+       /* Find the selected configuration with nearest sample rate */
+       fs = cdata->rate;
+
+       sel = cdata->eq_sel;
+       best = 0;
+       best_val = INT_MAX;
+       for (i = 0; i < pdata->eq_cfgcnt; i++) {
+               if (strcmp(pdata->eq_cfg[i].name, max98088->eq_texts[sel]) == 0 &&
+                   abs(pdata->eq_cfg[i].rate - fs) < best_val) {
+                       best = i;
+                       best_val = abs(pdata->eq_cfg[i].rate - fs);
+               }
+       }
+
+       dev_dbg(codec->dev, "Selected %s/%dHz for %dHz sample rate\n",
+               pdata->eq_cfg[best].name,
+               pdata->eq_cfg[best].rate, fs);
+
+       /* Disable EQ while configuring, and save current on/off state */
+       save = snd_soc_read(codec, M98088_REG_49_CFG_LEVEL);
+       snd_soc_update_bits(codec, M98088_REG_49_CFG_LEVEL, M98088_EQ2EN, 0);
+
+       coef_set = &pdata->eq_cfg[sel];
+
+       m98088_eq_band(codec, 1, 0, coef_set->band1);
+       m98088_eq_band(codec, 1, 1, coef_set->band2);
+       m98088_eq_band(codec, 1, 2, coef_set->band3);
+       m98088_eq_band(codec, 1, 3, coef_set->band4);
+       m98088_eq_band(codec, 1, 4, coef_set->band5);
+
+       /* Restore the original on/off state */
+       snd_soc_update_bits(codec, M98088_REG_49_CFG_LEVEL, M98088_EQ2EN,
+               save);
+}
+
+static int max98088_put_eq_enum(struct snd_kcontrol *kcontrol,
+                                struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_pdata *pdata = max98088->pdata;
+       int channel = max98088_get_channel(kcontrol->id.name);
+       struct max98088_cdata *cdata;
+       int sel = ucontrol->value.integer.value[0];
+
+       cdata = &max98088->dai[channel];
+
+       if (sel >= pdata->eq_cfgcnt)
+               return -EINVAL;
+
+       cdata->eq_sel = sel;
+
+       switch (channel) {
+       case 0:
+               max98088_setup_eq1(codec);
+               break;
+       case 1:
+               max98088_setup_eq2(codec);
+               break;
+       }
+
+       return 0;
+}
+
+static int max98088_get_eq_enum(struct snd_kcontrol *kcontrol,
+                                struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       int channel = max98088_get_channel(kcontrol->id.name);
+       struct max98088_cdata *cdata;
+
+       cdata = &max98088->dai[channel];
+       ucontrol->value.enumerated.item[0] = cdata->eq_sel;
+       return 0;
+}
+
+static void max98088_handle_eq_pdata(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_pdata *pdata = max98088->pdata;
+       struct max98088_eq_cfg *cfg;
+       unsigned int cfgcnt;
+       int i, j;
+       const char **t;
+       int ret;
+
+       struct snd_kcontrol_new controls[] = {
+               SOC_ENUM_EXT("EQ1 Mode",
+                       max98088->eq_enum,
+                       max98088_get_eq_enum,
+                       max98088_put_eq_enum),
+               SOC_ENUM_EXT("EQ2 Mode",
+                       max98088->eq_enum,
+                       max98088_get_eq_enum,
+                       max98088_put_eq_enum),
+       };
+
+       cfg = pdata->eq_cfg;
+       cfgcnt = pdata->eq_cfgcnt;
+
+       /* Setup an array of texts for the equalizer enum.
+        * This is based on Mark Brown's equalizer driver code.
+        */
+       max98088->eq_textcnt = 0;
+       max98088->eq_texts = NULL;
+       for (i = 0; i < cfgcnt; i++) {
+               for (j = 0; j < max98088->eq_textcnt; j++) {
+                       if (strcmp(cfg[i].name, max98088->eq_texts[j]) == 0)
+                               break;
+               }
+
+               if (j != max98088->eq_textcnt)
+                       continue;
+
+               /* Expand the array */
+               t = krealloc(max98088->eq_texts,
+                            sizeof(char *) * (max98088->eq_textcnt + 1),
+                            GFP_KERNEL);
+               if (t == NULL)
+                       continue;
+
+               /* Store the new entry */
+               t[max98088->eq_textcnt] = cfg[i].name;
+               max98088->eq_textcnt++;
+               max98088->eq_texts = t;
+       }
+
+       /* Now point the soc_enum to .texts array items */
+       max98088->eq_enum.texts = max98088->eq_texts;
+       max98088->eq_enum.max = max98088->eq_textcnt;
+
+       ret = snd_soc_add_controls(codec, controls, ARRAY_SIZE(controls));
+       if (ret != 0)
+               dev_err(codec->dev, "Failed to add EQ control: %d\n", ret);
+}
+
+static void max98088_handle_pdata(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_pdata *pdata = max98088->pdata;
+       u8 regval = 0;
+
+       if (!pdata) {
+               dev_dbg(codec->dev, "No platform data\n");
+               return;
+       }
+
+       /* Configure mic for analog/digital mic mode */
+       if (pdata->digmic_left_mode)
+               regval |= M98088_DIGMIC_L;
+
+       if (pdata->digmic_right_mode)
+               regval |= M98088_DIGMIC_R;
+
+       max98088->digmic = (regval ? 1 : 0);
+
+       snd_soc_write(codec, M98088_REG_48_CFG_MIC, regval);
+
+       /* Configure receiver output */
+       regval = ((pdata->receiver_mode) ? M98088_REC_LINEMODE : 0);
+       snd_soc_update_bits(codec, M98088_REG_2A_MIC_REC_CNTL,
+               M98088_REC_LINEMODE_MASK, regval);
+
+       /* Configure equalizers */
+       if (pdata->eq_cfgcnt)
+               max98088_handle_eq_pdata(codec);
+}
+
+#ifdef CONFIG_PM
+static int max98088_suspend(struct snd_soc_codec *codec, pm_message_t state)
+{
+       max98088_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+       return 0;
+}
+
+static int max98088_resume(struct snd_soc_codec *codec)
+{
+       max98088_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+       return 0;
+}
+#else
+#define max98088_suspend NULL
+#define max98088_resume NULL
+#endif
+
+static int max98088_probe(struct snd_soc_codec *codec)
+{
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       struct max98088_cdata *cdata;
+       int ret = 0;
+
+       codec->cache_sync = 1;
+       memcpy(codec->reg_cache, max98088_reg, sizeof(max98088_reg));
+
+       ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C);
+       if (ret != 0) {
+               dev_err(codec->dev, "Failed to set cache I/O: %d\n", ret);
+               return ret;
+       }
+
+       /* initalize private data */
+
+       max98088->sysclk = (unsigned)-1;
+       max98088->eq_textcnt = 0;
+
+       cdata = &max98088->dai[0];
+       cdata->rate = (unsigned)-1;
+       cdata->fmt  = (unsigned)-1;
+       cdata->eq_sel = 0;
+
+       cdata = &max98088->dai[1];
+       cdata->rate = (unsigned)-1;
+       cdata->fmt  = (unsigned)-1;
+       cdata->eq_sel = 0;
+
+       max98088->ina_state = 0;
+       max98088->inb_state = 0;
+       max98088->ex_mode = 0;
+       max98088->digmic = 0;
+       max98088->mic1pre = 0;
+       max98088->mic2pre = 0;
+
+       ret = snd_soc_read(codec, M98088_REG_FF_REV_ID);
+       if (ret < 0) {
+               dev_err(codec->dev, "Failed to read device revision: %d\n",
+                       ret);
+               goto err_access;
+       }
+       dev_info(codec->dev, "revision %c\n", ret + 'A');
+
+       snd_soc_write(codec, M98088_REG_51_PWR_SYS, M98088_PWRSV);
+
+       /* initialize registers cache to hardware default */
+       max98088_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+       snd_soc_write(codec, M98088_REG_0F_IRQ_ENABLE, 0x00);
+
+       snd_soc_write(codec, M98088_REG_22_MIX_DAC,
+               M98088_DAI1L_TO_DACL|M98088_DAI2L_TO_DACL|
+               M98088_DAI1R_TO_DACR|M98088_DAI2R_TO_DACR);
+
+       snd_soc_write(codec, M98088_REG_4E_BIAS_CNTL, 0xF0);
+       snd_soc_write(codec, M98088_REG_50_DAC_BIAS2, 0x0F);
+
+       snd_soc_write(codec, M98088_REG_16_DAI1_IOCFG,
+               M98088_S1NORMAL|M98088_SDATA);
+
+       snd_soc_write(codec, M98088_REG_1E_DAI2_IOCFG,
+               M98088_S2NORMAL|M98088_SDATA);
+
+       max98088_handle_pdata(codec);
+
+       max98088_add_widgets(codec);
+
+err_access:
+       return ret;
+}
+
+static int max98088_remove(struct snd_soc_codec *codec)
+{
+       max98088_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+       return 0;
+}
+
+static struct snd_soc_codec_driver soc_codec_dev_max98088 = {
+       .probe   = max98088_probe,
+       .remove  = max98088_remove,
+       .suspend = max98088_suspend,
+       .resume  = max98088_resume,
+       .set_bias_level = max98088_set_bias_level,
+       .reg_cache_size = ARRAY_SIZE(max98088_reg),
+       .reg_word_size = sizeof(u8),
+       .reg_cache_default = max98088_reg,
+       .volatile_register = max98088_volatile_register,
+};
+
+static int max98088_i2c_probe(struct i2c_client *i2c,
+                            const struct i2c_device_id *id)
+{
+       struct max98088_priv *max98088;
+       int ret;
+
+       max98088 = kzalloc(sizeof(struct max98088_priv), GFP_KERNEL);
+       if (max98088 == NULL)
+               return -ENOMEM;
+
+       i2c_set_clientdata(i2c, max98088);
+       max98088->control_data = i2c;
+       max98088->pdata = i2c->dev.platform_data;
+
+       ret = snd_soc_register_codec(&i2c->dev,
+                       &soc_codec_dev_max98088, &max98088_dai[0], 2);
+       if (ret < 0)
+               kfree(max98088);
+       return ret;
+}
+
+static int max98088_i2c_remove(struct i2c_client *client)
+{
+       snd_soc_unregister_codec(&client->dev);
+       kfree(i2c_get_clientdata(client));
+       return 0;
+}
+
+static const struct i2c_device_id max98088_i2c_id[] = {
+       { "max98088", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, max98088_i2c_id);
+
+static struct i2c_driver max98088_i2c_driver = {
+       .driver = {
+               .name = "max98088",
+               .owner = THIS_MODULE,
+       },
+       .probe  = max98088_i2c_probe,
+       .remove = __devexit_p(max98088_i2c_remove),
+       .id_table = max98088_i2c_id,
+};
+
+static int __init max98088_init(void)
+{
+       int ret;
+
+       ret = i2c_add_driver(&max98088_i2c_driver);
+       if (ret)
+               pr_err("Failed to register max98088 I2C driver: %d\n", ret);
+
+       return ret;
+}
+module_init(max98088_init);
+
+static void __exit max98088_exit(void)
+{
+       i2c_del_driver(&max98088_i2c_driver);
+}
+module_exit(max98088_exit);
+
+MODULE_DESCRIPTION("ALSA SoC MAX98088 driver");
+MODULE_AUTHOR("Peter Hsiang, Jesse Marroquin");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/max98088.h b/sound/soc/codecs/max98088.h
new file mode 100644
index 000000000000..56554c797fef
--- /dev/null
+++ b/sound/soc/codecs/max98088.h
@@ -0,0 +1,193 @@
+/*
+ * max98088.h -- MAX98088 ALSA SoC Audio driver
+ *
+ * Copyright 2010 Maxim Integrated Products
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _MAX98088_H
+#define _MAX98088_H
+
+/*
+ * MAX98088 Registers Definition
+ */
+#define M98088_REG_00_IRQ_STATUS            0x00
+#define M98088_REG_01_MIC_STATUS            0x01
+#define M98088_REG_02_JACK_STAUS            0x02
+#define M98088_REG_03_BATTERY_VOLTAGE       0x03
+#define M98088_REG_0F_IRQ_ENABLE            0x0F
+#define M98088_REG_10_SYS_CLK               0x10
+#define M98088_REG_11_DAI1_CLKMODE          0x11
+#define M98088_REG_12_DAI1_CLKCFG_HI        0x12
+#define M98088_REG_13_DAI1_CLKCFG_LO        0x13
+#define M98088_REG_14_DAI1_FORMAT           0x14
+#define M98088_REG_15_DAI1_CLOCK            0x15
+#define M98088_REG_16_DAI1_IOCFG            0x16
+#define M98088_REG_17_DAI1_TDM              0x17
+#define M98088_REG_18_DAI1_FILTERS          0x18
+#define M98088_REG_19_DAI2_CLKMODE          0x19
+#define M98088_REG_1A_DAI2_CLKCFG_HI        0x1A
+#define M98088_REG_1B_DAI2_CLKCFG_LO        0x1B
+#define M98088_REG_1C_DAI2_FORMAT           0x1C
+#define M98088_REG_1D_DAI2_CLOCK            0x1D
+#define M98088_REG_1E_DAI2_IOCFG            0x1E
+#define M98088_REG_1F_DAI2_TDM              0x1F
+#define M98088_REG_20_DAI2_FILTERS          0x20
+#define M98088_REG_21_SRC                   0x21
+#define M98088_REG_22_MIX_DAC               0x22
+#define M98088_REG_23_MIX_ADC_LEFT          0x23
+#define M98088_REG_24_MIX_ADC_RIGHT         0x24
+#define M98088_REG_25_MIX_HP_LEFT           0x25
+#define M98088_REG_26_MIX_HP_RIGHT          0x26
+#define M98088_REG_27_MIX_HP_CNTL           0x27
+#define M98088_REG_28_MIX_REC_LEFT          0x28
+#define M98088_REG_29_MIX_REC_RIGHT         0x29
+#define M98088_REG_2A_MIC_REC_CNTL          0x2A
+#define M98088_REG_2B_MIX_SPK_LEFT          0x2B
+#define M98088_REG_2C_MIX_SPK_RIGHT         0x2C
+#define M98088_REG_2D_MIX_SPK_CNTL          0x2D
+#define M98088_REG_2E_LVL_SIDETONE          0x2E
+#define M98088_REG_2F_LVL_DAI1_PLAY         0x2F
+#define M98088_REG_30_LVL_DAI1_PLAY_EQ      0x30
+#define M98088_REG_31_LVL_DAI2_PLAY         0x31
+#define M98088_REG_32_LVL_DAI2_PLAY_EQ      0x32
+#define M98088_REG_33_LVL_ADC_L             0x33
+#define M98088_REG_34_LVL_ADC_R             0x34
+#define M98088_REG_35_LVL_MIC1              0x35
+#define M98088_REG_36_LVL_MIC2              0x36
+#define M98088_REG_37_LVL_INA               0x37
+#define M98088_REG_38_LVL_INB               0x38
+#define M98088_REG_39_LVL_HP_L              0x39
+#define M98088_REG_3A_LVL_HP_R              0x3A
+#define M98088_REG_3B_LVL_REC_L             0x3B
+#define M98088_REG_3C_LVL_REC_R             0x3C
+#define M98088_REG_3D_LVL_SPK_L             0x3D
+#define M98088_REG_3E_LVL_SPK_R             0x3E
+#define M98088_REG_3F_MICAGC_CFG            0x3F
+#define M98088_REG_40_MICAGC_THRESH         0x40
+#define M98088_REG_41_SPKDHP                0x41
+#define M98088_REG_42_SPKDHP_THRESH         0x42
+#define M98088_REG_43_SPKALC_COMP           0x43
+#define M98088_REG_44_PWRLMT_CFG            0x44
+#define M98088_REG_45_PWRLMT_TIME           0x45
+#define M98088_REG_46_THDLMT_CFG            0x46
+#define M98088_REG_47_CFG_AUDIO_IN          0x47
+#define M98088_REG_48_CFG_MIC               0x48
+#define M98088_REG_49_CFG_LEVEL             0x49
+#define M98088_REG_4A_CFG_BYPASS            0x4A
+#define M98088_REG_4B_CFG_JACKDET           0x4B
+#define M98088_REG_4C_PWR_EN_IN             0x4C
+#define M98088_REG_4D_PWR_EN_OUT            0x4D
+#define M98088_REG_4E_BIAS_CNTL             0x4E
+#define M98088_REG_4F_DAC_BIAS1             0x4F
+#define M98088_REG_50_DAC_BIAS2             0x50
+#define M98088_REG_51_PWR_SYS               0x51
+#define M98088_REG_52_DAI1_EQ_BASE          0x52
+#define M98088_REG_84_DAI2_EQ_BASE          0x84
+#define M98088_REG_B6_DAI1_BIQUAD_BASE      0xB6
+#define M98088_REG_C0_DAI2_BIQUAD_BASE      0xC0
+#define M98088_REG_FF_REV_ID                0xFF
+
+#define M98088_REG_CNT                      (0xFF+1)
+
+/* MAX98088 Registers Bit Fields */
+
+/* M98088_REG_11_DAI1_CLKMODE, M98088_REG_19_DAI2_CLKMODE */
+       #define M98088_CLKMODE_MASK             0xFF
+
+/* M98088_REG_14_DAI1_FORMAT, M98088_REG_1C_DAI2_FORMAT */
+       #define M98088_DAI_MAS                  (1<<7)
+       #define M98088_DAI_WCI                  (1<<6)
+       #define M98088_DAI_BCI                  (1<<5)
+       #define M98088_DAI_DLY                  (1<<4)
+       #define M98088_DAI_TDM                  (1<<2)
+       #define M98088_DAI_FSW                  (1<<1)
+       #define M98088_DAI_WS                   (1<<0)
+
+/* M98088_REG_15_DAI1_CLOCK, M98088_REG_1D_DAI2_CLOCK */
+       #define M98088_DAI_BSEL64               (1<<0)
+       #define M98088_DAI_OSR64                (1<<6)
+
+/* M98088_REG_16_DAI1_IOCFG, M98088_REG_1E_DAI2_IOCFG */
+       #define M98088_S1NORMAL                 (1<<6)
+       #define M98088_S2NORMAL                 (2<<6)
+       #define M98088_SDATA                    (3<<0)
+
+/* M98088_REG_18_DAI1_FILTERS, M98088_REG_20_DAI2_FILTERS */
+       #define M98088_DAI_DHF                  (1<<3)
+
+/* M98088_REG_22_MIX_DAC */
+       #define M98088_DAI1L_TO_DACL            (1<<7)
+       #define M98088_DAI1R_TO_DACL            (1<<6)
+       #define M98088_DAI2L_TO_DACL            (1<<5)
+       #define M98088_DAI2R_TO_DACL            (1<<4)
+       #define M98088_DAI1L_TO_DACR            (1<<3)
+       #define M98088_DAI1R_TO_DACR            (1<<2)
+       #define M98088_DAI2L_TO_DACR            (1<<1)
+       #define M98088_DAI2R_TO_DACR            (1<<0)
+
+/* M98088_REG_2A_MIC_REC_CNTL */
+       #define M98088_REC_LINEMODE             (1<<7)
+       #define M98088_REC_LINEMODE_MASK        (1<<7)
+
+/* M98088_REG_35_LVL_MIC1, M98088_REG_36_LVL_MIC2 */
+       #define M98088_MICPRE_MASK              (3<<5)
+       #define M98088_MICPRE_SHIFT             5
+
+/* M98088_REG_3A_LVL_HP_R */
+       #define M98088_HP_MUTE                  (1<<7)
+
+/* M98088_REG_3C_LVL_REC_R */
+       #define M98088_REC_MUTE                 (1<<7)
+
+/* M98088_REG_3E_LVL_SPK_R */
+       #define M98088_SP_MUTE                  (1<<7)
+
+/* M98088_REG_48_CFG_MIC */
+       #define M98088_EXTMIC_MASK              (3<<0)
+       #define M98088_DIGMIC_L                 (1<<5)
+       #define M98088_DIGMIC_R                 (1<<4)
+
+/* M98088_REG_49_CFG_LEVEL */
+       #define M98088_VSEN                     (1<<6)
+       #define M98088_ZDEN                     (1<<5)
+       #define M98088_EQ2EN                    (1<<1)
+       #define M98088_EQ1EN                    (1<<0)
+
+/* M98088_REG_4C_PWR_EN_IN */
+       #define M98088_INAEN                    (1<<7)
+       #define M98088_INBEN                    (1<<6)
+       #define M98088_MBEN                     (1<<3)
+       #define M98088_ADLEN                    (1<<1)
+       #define M98088_ADREN                    (1<<0)
+
+/* M98088_REG_4D_PWR_EN_OUT */
+       #define M98088_HPLEN                    (1<<7)
+       #define M98088_HPREN                    (1<<6)
+       #define M98088_HPEN                     ((1<<7)|(1<<6))
+       #define M98088_SPLEN                    (1<<5)
+       #define M98088_SPREN                    (1<<4)
+       #define M98088_RECEN                    (1<<3)
+       #define M98088_DALEN                    (1<<1)
+       #define M98088_DAREN                    (1<<0)
+
+/* M98088_REG_51_PWR_SYS */
+       #define M98088_SHDNRUN                  (1<<7)
+       #define M98088_PERFMODE                 (1<<3)
+       #define M98088_HPPLYBACK                (1<<2)
+       #define M98088_PWRSV8K                  (1<<1)
+       #define M98088_PWRSV                    (1<<0)
+
+/* Line inputs */
+#define LINE_INA  0
+#define LINE_INB  1
+
+#define M98088_COEFS_PER_BAND               5
+
+#define M98088_BYTE1(w) ((w >> 8) & 0xff)
+#define M98088_BYTE0(w) (w & 0xff)
+
+#endif
-- 
cgit v1.2.3


From c9220b0f7cbd1d2272426aa81a72ae2f6582bb71 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 8 Oct 2010 08:57:10 +1000
Subject: drm/ttm: add unlocked variant of new manager put node.

We need the unlocked variant for the new codepath introduced to fix the
race condition in master recently.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c         | 13 ++++++++++---
 drivers/gpu/drm/ttm/ttm_bo_manager.c | 10 ++++++++++
 include/drm/ttm/ttm_bo_driver.h      |  4 ++++
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1e9bb2156dcf..5ef0103bd0b6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -464,9 +464,7 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 		spin_lock(&glob->lru_lock);
 	}
 
-	if (bo->mem.mm_node) {
-		ttm_bo_mem_put(bo, &bo->mem);
-	}
+	ttm_bo_mem_put_locked(bo, &bo->mem);
 
 	atomic_set(&bo->reserved, 0);
 	wake_up_all(&bo->event_queue);
@@ -791,6 +789,15 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 }
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
+void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type];
+
+	if (mem->mm_node)
+		(*man->func->put_node_locked)(man, mem);
+}
+EXPORT_SYMBOL(ttm_bo_mem_put_locked);
+
 /**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 7410c190c891..35c97b20bdae 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -90,6 +90,15 @@ static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
 	}
 }
 
+static void ttm_bo_man_put_node_locked(struct ttm_mem_type_manager *man,
+				       struct ttm_mem_reg *mem)
+{
+	if (mem->mm_node) {
+		drm_mm_put_block(mem->mm_node);
+		mem->mm_node = NULL;
+	}
+}
+
 static int ttm_bo_man_init(struct ttm_mem_type_manager *man,
 			   unsigned long p_size)
 {
@@ -143,6 +152,7 @@ const struct ttm_mem_type_manager_func ttm_bo_manager_func = {
 	ttm_bo_man_takedown,
 	ttm_bo_man_get_node,
 	ttm_bo_man_put_node,
+	ttm_bo_man_put_node_locked,
 	ttm_bo_man_debug
 };
 EXPORT_SYMBOL(ttm_bo_manager_func);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e3371dbe6a10..d0ff529fedde 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -214,6 +214,8 @@ struct ttm_mem_type_manager_func {
 			 struct ttm_mem_reg *mem);
 	void (*put_node)(struct ttm_mem_type_manager *man,
 			 struct ttm_mem_reg *mem);
+	void (*put_node_locked)(struct ttm_mem_type_manager *man,
+				struct ttm_mem_reg *mem);
 	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
 };
 
@@ -667,6 +669,8 @@ extern int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 
 extern void ttm_bo_mem_put(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *mem);
+extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
+				  struct ttm_mem_reg *mem);
 
 /**
  * ttm_bo_wait_for_cpu
-- 
cgit v1.2.3


From 82acf2a8fe4764e21a7ad406590453b004751e58 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 19 Oct 2010 10:15:03 +0800
Subject: SFI: remove the v0.7 related definitions from sfi.h

SFI v0.8's DEVS and GPIO tables replaces v0.7's SPI/I2C tables.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/sfi.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index 0299b4ce63db..7f770c638e99 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -70,9 +70,6 @@
 #define SFI_SIG_APIC		"APIC"
 #define SFI_SIG_XSDT		"XSDT"
 #define SFI_SIG_WAKE		"WAKE"
-#define SFI_SIG_SPIB		"SPIB"
-#define SFI_SIG_I2CB		"I2CB"
-#define SFI_SIG_GPEM		"GPEM"
 #define SFI_SIG_DEVS		"DEVS"
 #define SFI_SIG_GPIO		"GPIO"
 
@@ -168,27 +165,6 @@ struct sfi_gpio_table_entry {
 	char	pin_name[16];
 } __packed;
 
-struct sfi_spi_table_entry {
-	u16	host_num;	/* attached to host 0, 1...*/
-	u16	cs;		/* chip select */
-	u16	irq_info;
-	char	name[16];
-	u8	dev_info[10];
-} __packed;
-
-struct sfi_i2c_table_entry {
-	u16	host_num;
-	u16	addr;		/* slave addr */
-	u16	irq_info;
-	char	name[16];
-	u8	dev_info[10];
-} __packed;
-
-struct sfi_gpe_table_entry {
-	u16	logical_id;	/* logical id */
-	u16	phys_id;	/* physical GPE id */
-} __packed;
-
 typedef int (*sfi_table_handler) (struct sfi_table_header *table);
 
 #ifdef CONFIG_SFI
-- 
cgit v1.2.3


From 21c74a8ea8b47eb6c3c621e36578f6e27f65c5c7 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 13 Oct 2010 14:09:44 -0500
Subject: drm, kdb, kms: Change mode_set_base_atomic() enter argument to be an
 enum

The enter argument as implemented by commit 413d45d3627 (drm, kdb, kms:
Add an enter argument to mode_set_base_atomic() API) should be more
descriptive as to what it does vs just passing 1 and 0 around.

There is no runtime behavior change as a result of this patch.

Reported-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
CC: David Airlie <airlied@linux.ie>
CC: dri-devel@lists.freedesktop.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c             | 5 ++---
 drivers/gpu/drm/i915/intel_display.c        | 5 +++--
 drivers/gpu/drm/nouveau/nv04_crtc.c         | 4 ++--
 drivers/gpu/drm/nouveau/nv50_crtc.c         | 2 +-
 drivers/gpu/drm/radeon/atombios_crtc.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 2 +-
 drivers/gpu/drm/radeon/radeon_mode.h        | 7 +++++--
 include/drm/drm_crtc_helper.h               | 7 ++++++-
 8 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 8208e190faaa..d2849e4ea4d0 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -289,8 +289,7 @@ int drm_fb_helper_debug_enter(struct fb_info *info)
 						    mode_set->fb,
 						    mode_set->x,
 						    mode_set->y,
-						    1);
-
+						    ENTER_ATOMIC_MODE_SET);
 		}
 	}
 
@@ -336,7 +335,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 
 		drm_fb_helper_restore_lut_atomic(mode_set->crtc);
 		funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x,
-					    crtc->y, 0);
+					    crtc->y, LEAVE_ATOMIC_MODE_SET);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9109c00f3ead..96d08a9f3aaa 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1492,7 +1492,7 @@ err_unpin:
 /* Assume fb object is pinned & idle & fenced and just update base pointers */
 static int
 intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			   int x, int y, int enter)
+			   int x, int y, enum mode_set_atomic state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1614,7 +1614,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 			   atomic_read(&obj_priv->pending_flip) == 0);
 	}
 
-	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, 0);
+	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
+					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
 		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
 		mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 17f7cf0c11a8..c71abc2a34d5 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -860,12 +860,12 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 static int
 nv04_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
-			       int x, int y, int enter)
+			       int x, int y, enum mode_set_atomic state)
 {
 	struct drm_nouveau_private *dev_priv = crtc->dev->dev_private;
 	struct drm_device *dev = dev_priv->dev;
 
-	if (enter)
+	if (state == ENTER_ATOMIC_MODE_SET)
 		nouveau_fbcon_save_disable_accel(dev);
 	else
 		nouveau_fbcon_restore_accel(dev);
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index ba91befd3734..16380d52cd88 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -708,7 +708,7 @@ nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 static int
 nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
-			       int x, int y, int enter)
+			       int x, int y, enum mode_set_atomic state)
 {
 	return nv50_crtc_do_mode_set_base(crtc, fb, x, y, true, true);
 }
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 176f424975ac..df2b6f2b35f8 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1270,7 +1270,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 
 int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
                                   struct drm_framebuffer *fb,
-                                  int x, int y, int enter)
+				  int x, int y, enum mode_set_atomic state)
 {
        struct drm_device *dev = crtc->dev;
        struct radeon_device *rdev = dev->dev_private;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 326843ec51f6..ace2e6384d40 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -353,7 +353,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 
 int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
-				int x, int y, int enter)
+				int x, int y, enum mode_set_atomic state)
 {
 	return radeon_crtc_do_set_base(crtc, fb, x, y, 1);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index f99e12daa81d..61b9243db217 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -35,6 +35,7 @@
 #include <drm_edid.h>
 #include <drm_dp_helper.h>
 #include <drm_fixed.h>
+#include <drm_crtc_helper.h>
 #include <linux/i2c.h>
 #include <linux/i2c-id.h>
 #include <linux/i2c-algo-bit.h>
@@ -526,7 +527,8 @@ extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 				   struct drm_framebuffer *old_fb);
 extern int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
 					 struct drm_framebuffer *fb,
-					 int x, int y, int enter);
+					 int x, int y,
+					 enum mode_set_atomic state);
 extern int atombios_crtc_mode_set(struct drm_crtc *crtc,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode,
@@ -538,7 +540,8 @@ extern int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 				 struct drm_framebuffer *old_fb);
 extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc,
 				       struct drm_framebuffer *fb,
-				       int x, int y, int enter);
+				       int x, int y,
+				       enum mode_set_atomic state);
 extern int radeon_crtc_do_set_base(struct drm_crtc *crtc,
 				   struct drm_framebuffer *fb,
 				   int x, int y, int atomic);
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 6a9f3935ea0b..73b071203dcc 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -39,6 +39,11 @@
 
 #include <linux/fb.h>
 
+enum mode_set_atomic {
+	LEAVE_ATOMIC_MODE_SET,
+	ENTER_ATOMIC_MODE_SET,
+};
+
 struct drm_crtc_helper_funcs {
 	/*
 	 * Control power levels on the CRTC.  If the mode passed in is
@@ -62,7 +67,7 @@ struct drm_crtc_helper_funcs {
 			     struct drm_framebuffer *old_fb);
 	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb, int x, int y,
-				    int is_enter);
+				    enum mode_set_atomic);
 
 	/* reload the current crtc LUT */
 	void (*load_lut)(struct drm_crtc *crtc);
-- 
cgit v1.2.3


From 7681bfeeccff5efa9eb29bf09249a3c400b15327 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Tue, 19 Oct 2010 09:05:00 +0200
Subject: block: fix accounting bug on cross partition merges

/proc/diskstats would display a strange output as follows.

$ cat /proc/diskstats |grep sda
   8       0 sda 90524 7579 102154 20464 0 0 0 0 0 14096 20089
   8       1 sda1 19085 1352 21841 4209 0 0 0 0 4294967064 15689 4293424691
                                                ~~~~~~~~~~
   8       2 sda2 71252 3624 74891 15950 0 0 0 0 232 23995 1562390
   8       3 sda3 54 487 2188 92 0 0 0 0 0 88 92
   8       4 sda4 4 0 8 0 0 0 0 0 0 0 0
   8       5 sda5 81 2027 2130 138 0 0 0 0 0 87 137

Its reason is the wrong way of accounting hd_struct->in_flight. When a bio is
merged into a request belongs to different partition by ELEVATOR_FRONT_MERGE.

The detailed root cause is as follows.

Assuming that there are two partition, sda1 and sda2.

1. A request for sda2 is in request_queue. Hence sda1's hd_struct->in_flight
   is 0 and sda2's one is 1.

        | hd_struct->in_flight
   ---------------------------
   sda1 |          0
   sda2 |          1
   ---------------------------

2. A bio belongs to sda1 is issued and is merged into the request mentioned on
   step1 by ELEVATOR_BACK_MERGE. The first sector of the request is changed
   from sda2 region to sda1 region. However the two partition's
   hd_struct->in_flight are not changed.

        | hd_struct->in_flight
   ---------------------------
   sda1 |          0
   sda2 |          1
   ---------------------------

3. The request is finished and blk_account_io_done() is called. In this case,
   sda2's hd_struct->in_flight, not a sda1's one, is decremented.

        | hd_struct->in_flight
   ---------------------------
   sda1 |         -1
   sda2 |          1
   ---------------------------

The patch fixes the problem by caching the partition lookup
inside the request structure, hence making sure that the increment
and decrement will always happen on the same partition struct. This
also speeds up IO with accounting enabled, since it cuts down on
the number of lookups we have to do.

When reloading partition tables, quiesce IO to ensure that no
request references to the partition struct exists. When it is safe
to free the partition table, the IO for that device is restarted
again.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c         | 24 ++++++++++++++++--------
 block/blk-merge.c        |  2 +-
 block/blk.h              |  4 ----
 block/genhd.c            | 14 ++++++++++++++
 fs/partitions/check.c    | 12 ++++++++++++
 include/linux/blkdev.h   |  1 +
 include/linux/elevator.h |  2 ++
 include/linux/genhd.h    |  1 +
 8 files changed, 47 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 797d5095eb83..ddc68332d655 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,13 +64,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	cpu = part_stat_lock();
-	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 
-	if (!new_io)
+	if (!new_io) {
+		part = rq->part;
 		part_stat_inc(cpu, part, merges[rw]);
-	else {
+	} else {
+		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part, rw);
+		rq->part = part;
 	}
 
 	part_stat_unlock();
@@ -128,6 +130,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
+	rq->part = NULL;
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -804,11 +807,16 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	rl->starved[is_sync] = 0;
 
 	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-	if (priv)
+	if (priv) {
 		rl->elvpriv++;
 
-	if (blk_queue_io_stat(q))
-		rw_flags |= REQ_IO_STAT;
+		/*
+		 * Don't do stats for non-priv requests
+		 */
+		if (blk_queue_io_stat(q))
+			rw_flags |= REQ_IO_STAT;
+	}
+
 	spin_unlock_irq(q->queue_lock);
 
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
@@ -1777,7 +1785,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+		part = req->part;
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1797,7 +1805,7 @@ static void blk_account_io_done(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+		part = req->part;
 
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6a725461654d..38ff234012a4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+		part = req->part;
 
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part, rq_data_dir(req));
diff --git a/block/blk.h b/block/blk.h
index 6738831ba447..1340cce5721a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -110,10 +110,6 @@ void blk_queue_congestion_threshold(struct request_queue *q);
 
 int blk_dev_init(void);
 
-void elv_quiesce_start(struct request_queue *q);
-void elv_quiesce_end(struct request_queue *q);
-
-
 /*
  * Return the threshold (number of used requests) at which the queue is
  * considered to be congested.  It include a little hysteresis to keep the
diff --git a/block/genhd.c b/block/genhd.c
index 7923e720ddf5..8313834596db 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -932,8 +932,15 @@ static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 {
 	struct disk_part_tbl *ptbl =
 		container_of(head, struct disk_part_tbl, rcu_head);
+	struct gendisk *disk = ptbl->disk;
+	struct request_queue *q = disk->queue;
+	unsigned long flags;
 
 	kfree(ptbl);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	elv_quiesce_end(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 /**
@@ -951,11 +958,17 @@ static void disk_replace_part_tbl(struct gendisk *disk,
 				  struct disk_part_tbl *new_ptbl)
 {
 	struct disk_part_tbl *old_ptbl = disk->part_tbl;
+	struct request_queue *q = disk->queue;
 
 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
 
 	if (old_ptbl) {
 		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
+
+		spin_lock_irq(q->queue_lock);
+		elv_quiesce_start(q);
+		spin_unlock_irq(q->queue_lock);
+
 		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 	}
 }
@@ -996,6 +1009,7 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
 		return -ENOMEM;
 
 	new_ptbl->len = target;
+	new_ptbl->disk = disk;
 
 	for (i = 0; i < len; i++)
 		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6dfbee03ccc6..30f46c2cb9d5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -365,17 +365,25 @@ struct device_type part_type = {
 static void delete_partition_rcu_cb(struct rcu_head *head)
 {
 	struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
+	struct gendisk *disk = part_to_disk(part);
+	struct request_queue *q = disk->queue;
+	unsigned long flags;
 
 	part->start_sect = 0;
 	part->nr_sects = 0;
 	part_stat_set_all(part, 0);
 	put_device(part_to_dev(part));
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	elv_quiesce_end(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 void delete_partition(struct gendisk *disk, int partno)
 {
 	struct disk_part_tbl *ptbl = disk->part_tbl;
 	struct hd_struct *part;
+	struct request_queue *q = disk->queue;
 
 	if (partno >= ptbl->len)
 		return;
@@ -390,6 +398,10 @@ void delete_partition(struct gendisk *disk, int partno)
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
 
+	spin_lock_irq(q->queue_lock);
+	elv_quiesce_start(q);
+	spin_unlock_irq(q->queue_lock);
+
 	call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8f3dd981b973..16f7f1be1acf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -115,6 +115,7 @@ struct request {
 	void *elevator_private3;
 
 	struct gendisk *rq_disk;
+	struct hd_struct *part;
 	unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
 	unsigned long long start_time_ns;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2c958f4fce1e..df1ee866d715 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -121,6 +121,8 @@ extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
 extern void elv_put_request(struct request_queue *, struct request *);
 extern void elv_drain_elevator(struct request_queue *);
+extern void elv_quiesce_start(struct request_queue *);
+extern void elv_quiesce_end(struct request_queue *);
 
 /*
  * io scheduler registration
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 66e26b5a1537..57647ecfc1bd 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -140,6 +140,7 @@ struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
 	struct hd_struct *last_lookup;
+	struct gendisk *disk;
 	struct hd_struct *part[];
 };
 
-- 
cgit v1.2.3


From 6362beea8914cbd4630ccde3617d944aeca2d48f Mon Sep 17 00:00:00 2001
From: Mike Miller <mike.miller@hp.com>
Date: Tue, 19 Oct 2010 09:40:34 +0200
Subject: cciss: fix PCI IDs for new Smart Array controllers

cciss: fix PCI IDs for new controllers

This patch fixes the botched up PCI IDs of new controllers. Please consider
this patch for inclusion.

Signed-off-by: Mike Miller <mike.miller@hp.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/cciss.c   | 22 ++++++++++++----------
 include/linux/pci_ids.h |  1 +
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index df2ed4d6684d..41f9acb8ecd7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -105,11 +105,12 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3250},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3251},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3252},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3253},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3254},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3350},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3351},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3352},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3353},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3354},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3355},
 	{0,}
 };
 
@@ -149,11 +150,12 @@ static struct board_type products[] = {
 	{0x3249103C, "Smart Array P812", &SA5_access},
 	{0x324A103C, "Smart Array P712m", &SA5_access},
 	{0x324B103C, "Smart Array P711m", &SA5_access},
-	{0x3250103C, "Smart Array", &SA5_access},
-	{0x3251103C, "Smart Array", &SA5_access},
-	{0x3252103C, "Smart Array", &SA5_access},
-	{0x3253103C, "Smart Array", &SA5_access},
-	{0x3254103C, "Smart Array", &SA5_access},
+	{0x3350103C, "Smart Array", &SA5_access},
+	{0x3351103C, "Smart Array", &SA5_access},
+	{0x3352103C, "Smart Array", &SA5_access},
+	{0x3353103C, "Smart Array", &SA5_access},
+	{0x3354103C, "Smart Array", &SA5_access},
+	{0x3355103C, "Smart Array", &SA5_access},
 };
 
 /* How long to wait (in milliseconds) for board to go into simple mode */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f6a3b2d36cad..fe7d72ce33af 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -739,6 +739,7 @@
 #define PCI_DEVICE_ID_HP_CISSC		0x3230
 #define PCI_DEVICE_ID_HP_CISSD		0x3238
 #define PCI_DEVICE_ID_HP_CISSE		0x323a
+#define PCI_DEVICE_ID_HP_CISSF		0x323b
 #define PCI_DEVICE_ID_HP_ZX2_IOC	0x4031
 
 #define PCI_VENDOR_ID_PCTECH		0x1042
-- 
cgit v1.2.3


From 8fe9790d1652e7c306c862ea102a5e6126b412e1 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Sun, 19 Sep 2010 14:27:28 +0800
Subject: drm/edid: add helper function to detect monitor audio capability

To help to determine if digital display port needs to enable
audio output or not. This one adds a helper to get monitor's
audio capability via EDID CEA extension block.

Tested-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_edid.c | 92 +++++++++++++++++++++++++++++++++++++++-------
 include/drm/drm_crtc.h     |  1 +
 2 files changed, 79 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index fd033ebbdf84..c1a26217a530 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1267,34 +1267,51 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 }
 
 #define HDMI_IDENTIFIER 0x000C03
+#define AUDIO_BLOCK	0x01
 #define VENDOR_BLOCK    0x03
+#define EDID_BASIC_AUDIO	(1 << 6)
+
 /**
- * drm_detect_hdmi_monitor - detect whether monitor is hdmi.
- * @edid: monitor EDID information
- *
- * Parse the CEA extension according to CEA-861-B.
- * Return true if HDMI, false if not or unknown.
+ * Search EDID for CEA extension block.
  */
-bool drm_detect_hdmi_monitor(struct edid *edid)
+static u8 *drm_find_cea_extension(struct edid *edid)
 {
-	char *edid_ext = NULL;
-	int i, hdmi_id;
-	int start_offset, end_offset;
-	bool is_hdmi = false;
+	u8 *edid_ext = NULL;
+	int i;
 
 	/* No EDID or EDID extensions */
 	if (edid == NULL || edid->extensions == 0)
-		goto end;
+		return NULL;
 
 	/* Find CEA extension */
 	for (i = 0; i < edid->extensions; i++) {
-		edid_ext = (char *)edid + EDID_LENGTH * (i + 1);
-		/* This block is CEA extension */
-		if (edid_ext[0] == 0x02)
+		edid_ext = (u8 *)edid + EDID_LENGTH * (i + 1);
+		if (edid_ext[0] == CEA_EXT)
 			break;
 	}
 
 	if (i == edid->extensions)
+		return NULL;
+
+	return edid_ext;
+}
+
+/**
+ * drm_detect_hdmi_monitor - detect whether monitor is hdmi.
+ * @edid: monitor EDID information
+ *
+ * Parse the CEA extension according to CEA-861-B.
+ * Return true if HDMI, false if not or unknown.
+ */
+bool drm_detect_hdmi_monitor(struct edid *edid)
+{
+	u8 *edid_ext;
+	int i, hdmi_id;
+	int start_offset, end_offset;
+	bool is_hdmi = false;
+
+	edid_ext = drm_find_cea_extension(edid);
+	if (!edid_ext)
 		goto end;
 
 	/* Data block offset in CEA extension block */
@@ -1324,6 +1341,53 @@ end:
 }
 EXPORT_SYMBOL(drm_detect_hdmi_monitor);
 
+/**
+ * drm_detect_monitor_audio - check monitor audio capability
+ *
+ * Monitor should have CEA extension block.
+ * If monitor has 'basic audio', but no CEA audio blocks, it's 'basic
+ * audio' only. If there is any audio extension block and supported
+ * audio format, assume at least 'basic audio' support, even if 'basic
+ * audio' is not defined in EDID.
+ *
+ */
+bool drm_detect_monitor_audio(struct edid *edid)
+{
+	u8 *edid_ext;
+	int i, j;
+	bool has_audio = false;
+	int start_offset, end_offset;
+
+	edid_ext = drm_find_cea_extension(edid);
+	if (!edid_ext)
+		goto end;
+
+	has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0);
+
+	if (has_audio) {
+		DRM_DEBUG_KMS("Monitor has basic audio support\n");
+		goto end;
+	}
+
+	/* Data block offset in CEA extension block */
+	start_offset = 4;
+	end_offset = edid_ext[2];
+
+	for (i = start_offset; i < end_offset;
+			i += ((edid_ext[i] & 0x1f) + 1)) {
+		if ((edid_ext[i] >> 5) == AUDIO_BLOCK) {
+			has_audio = true;
+			for (j = 1; j < (edid_ext[i] & 0x1f); j += 3)
+				DRM_DEBUG_KMS("CEA audio format %d\n",
+					      (edid_ext[i + j] >> 3) & 0xf);
+			goto end;
+		}
+	}
+end:
+	return has_audio;
+}
+EXPORT_SYMBOL(drm_detect_monitor_audio);
+
 /**
  * drm_add_edid_modes - add modes from EDID data, if available
  * @connector: connector we're probing
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 15c4796fd467..029aa688e787 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -763,6 +763,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
+extern bool drm_detect_monitor_audio(struct edid *edid);
 extern int drm_mode_page_flip_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
-- 
cgit v1.2.3


From ebbf41df4aabb6d506fa18ea8cb4c2b4388a18b9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 Oct 2010 10:19:06 +0200
Subject: netfilter: ctnetlink: add expectation deletion events

This patch allows to listen to events that inform about
expectations destroyed.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h |  1 +
 include/net/netfilter/nf_conntrack_expect.h   |  8 ++++++-
 net/netfilter/nf_conntrack_expect.c           |  6 ++++--
 net/netfilter/nf_conntrack_netlink.c          | 30 +++++++++++++++++++--------
 4 files changed, 33 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 23a1a08578a8..50cdc2559a5a 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -98,6 +98,7 @@ enum ip_conntrack_events {
 
 enum ip_conntrack_expect_events {
 	IPEXP_NEW,		/* new expectation */
+	IPEXP_DESTROY,		/* destroyed expectation */
 };
 
 /* expectation flags */
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 416b83844485..0f8a8c587532 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -82,7 +82,13 @@ struct nf_conntrack_expect *
 nf_ct_find_expectation(struct net *net, u16 zone,
 		       const struct nf_conntrack_tuple *tuple);
 
-void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
+void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+				u32 pid, int report);
+static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+{
+	nf_ct_unlink_expect_report(exp, 0, 0);
+}
+
 void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 void nf_ct_remove_userspace_expectations(void);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index b30a1f2aac00..46e8966912b1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -41,7 +41,8 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 static HLIST_HEAD(nf_ct_userspace_expect_list);
 
 /* nf_conntrack_expect helper functions */
-void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
+				u32 pid, int report)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
@@ -55,11 +56,12 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 	if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
 		master_help->expecting[exp->class]--;
 
+	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
 	nf_ct_expect_put(exp);
 
 	NF_CT_STAT_INC(net, expect_delete);
 }
-EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
+EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
 
 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b4077be5f663..62bad229106b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1632,17 +1632,20 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct sk_buff *skb;
-	unsigned int type;
+	unsigned int type, group;
 	int flags = 0;
 
-	if (events & (1 << IPEXP_NEW)) {
+	if (events & (1 << IPEXP_DESTROY)) {
+		type = IPCTNL_MSG_EXP_DELETE;
+		group = NFNLGRP_CONNTRACK_EXP_DESTROY;
+	} else if (events & (1 << IPEXP_NEW)) {
 		type = IPCTNL_MSG_EXP_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
+		group = NFNLGRP_CONNTRACK_EXP_NEW;
 	} else
 		return 0;
 
-	if (!item->report &&
-	    !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
+	if (!item->report && !nfnetlink_has_listeners(net, group))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1665,8 +1668,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
-		       item->report, GFP_ATOMIC);
+	nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
 	return 0;
 
 nla_put_failure:
@@ -1849,7 +1851,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		}
 
 		/* after list removal, usage count == 1 */
-		nf_ct_unexpect_related(exp);
+		spin_lock_bh(&nf_conntrack_lock);
+		if (del_timer(&exp->timeout)) {
+			nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
+						   nlmsg_report(nlh));
+			nf_ct_expect_put(exp);
+		}
+		spin_unlock_bh(&nf_conntrack_lock);
 		/* have to put what we 'get' above.
 		 * after this line usage count == 0 */
 		nf_ct_expect_put(exp);
@@ -1866,7 +1874,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 				m_help = nfct_help(exp->master);
 				if (!strcmp(m_help->helper->name, name) &&
 				    del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect(exp);
+					nf_ct_unlink_expect_report(exp,
+							NETLINK_CB(skb).pid,
+							nlmsg_report(nlh));
 					nf_ct_expect_put(exp);
 				}
 			}
@@ -1880,7 +1890,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect(exp);
+					nf_ct_unlink_expect_report(exp,
+							NETLINK_CB(skb).pid,
+							nlmsg_report(nlh));
 					nf_ct_expect_put(exp);
 				}
 			}
-- 
cgit v1.2.3


From 714f095f74582764d629785f03b459a3d0503624 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Tue, 19 Oct 2010 10:38:48 +0200
Subject: ipvs: IPv6 tunnel mode

IPv6 encapsulation uses a bad source address for the tunnel.
i.e. VIP will be used as local-addr and encap. dst addr.
Decapsulation will not accept this.

Example
LVS (eth1 2003::2:0:1/96, VIP 2003::2:0:100)
   (eth0 2003::1:0:1/96)
RS  (ethX 2003::1:0:5/96)

tcpdump
2003::2:0:100 > 2003::1:0:5: IP6 (hlim 63, next-header TCP (6) payload length: 40)  2003::3:0:10.50991 > 2003::2:0:100.http: Flags [S], cksum 0x7312 (correct), seq 3006460279, win 5760, options [mss 1440,sackOK,TS val 1904932 ecr 0,nop,wscale 3], length 0

In Linux IPv6 impl. you can't have a tunnel with an any cast address
receiving packets (I have not tried to interpret RFC 2473)
To have receive capabilities the tunnel must have:
 - Local address set as multicast addr or an unicast addr
 - Remote address set as an unicast addr.
 - Loop back addres or Link local address are not allowed.

This causes us to setup a tunnel in the Real Server with the
LVS as the remote address, here you can't use the VIP address since it's
used inside the tunnel.

Solution
Use outgoing interface IPv6 address (match against the destination).
i.e. use ip6_route_output() to look up the route cache and
then use ipv6_dev_get_saddr(...) to set the source address of the
encapsulated packet.

Additionally, cache the results in new destination
fields: dst_cookie and dst_saddr and properly check the
returned dst from ip6_route_output. We now add xfrm_lookup
call only for the tunneling method where the source address
is a local one.

Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/ip_vs.h             |   4 +
 net/netfilter/ipvs/ip_vs_xmit.c | 171 +++++++++++++++++++++-------------------
 2 files changed, 96 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 52fbe2308c38..6e8a6192e574 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -529,6 +529,10 @@ struct ip_vs_dest {
 	spinlock_t		dst_lock;	/* lock of dst_cache */
 	struct dst_entry	*dst_cache;	/* destination cache entry */
 	u32			dst_rtos;	/* RT_TOS(tos) for dst */
+	u32			dst_cookie;
+#ifdef CONFIG_IP_VS_IPV6
+	struct in6_addr		dst_saddr;
+#endif
 
 	/* for virtual service */
 	struct ip_vs_service	*svc;		/* service it belongs to */
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 8817afa34e6a..b0bd8afbf368 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -26,6 +26,7 @@
 #include <net/route.h>                  /* for ip_route_output */
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/addrconf.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -37,26 +38,27 @@
  *      Destination cache to speed up outgoing route lookup
  */
 static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
+__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
+		u32 dst_cookie)
 {
 	struct dst_entry *old_dst;
 
 	old_dst = dest->dst_cache;
 	dest->dst_cache = dst;
 	dest->dst_rtos = rtos;
+	dest->dst_cookie = dst_cookie;
 	dst_release(old_dst);
 }
 
 static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
+__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
 {
 	struct dst_entry *dst = dest->dst_cache;
 
 	if (!dst)
 		return NULL;
-	if ((dst->obsolete
-	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
-	    dst->ops->check(dst, cookie) == NULL) {
+	if ((dst->obsolete || rtos != dest->dst_rtos) &&
+	    dst->ops->check(dst, dest->dst_cookie) == NULL) {
 		dest->dst_cache = NULL;
 		dst_release(dst);
 		return NULL;
@@ -66,15 +68,16 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
 }
 
 static struct rtable *
-__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
+__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
 {
+	struct net *net = dev_net(skb->dev);
 	struct rtable *rt;			/* Route to the other host */
 	struct ip_vs_dest *dest = cp->dest;
 
 	if (dest) {
 		spin_lock(&dest->dst_lock);
 		if (!(rt = (struct rtable *)
-		      __ip_vs_dst_check(dest, rtos, 0))) {
+		      __ip_vs_dst_check(dest, rtos))) {
 			struct flowi fl = {
 				.oif = 0,
 				.nl_u = {
@@ -84,13 +87,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 						.tos = rtos, } },
 			};
 
-			if (ip_route_output_key(&init_net, &rt, &fl)) {
+			if (ip_route_output_key(net, &rt, &fl)) {
 				spin_unlock(&dest->dst_lock);
 				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
 					     &dest->addr.ip);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
 				  &dest->addr.ip,
 				  atomic_read(&rt->dst.__refcnt), rtos);
@@ -106,7 +109,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					.tos = rtos, } },
 		};
 
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
+		if (ip_route_output_key(net, &rt, &fl)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
 				     &cp->daddr.ip);
 			return NULL;
@@ -117,62 +120,79 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 }
 
 #ifdef CONFIG_IP_VS_IPV6
+
+static struct dst_entry *
+__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
+			struct in6_addr *ret_saddr, int do_xfrm)
+{
+	struct dst_entry *dst;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *daddr,
+			},
+		},
+	};
+
+	dst = ip6_route_output(net, NULL, &fl);
+	if (dst->error)
+		goto out_err;
+	if (!ret_saddr)
+		return dst;
+	if (ipv6_addr_any(&fl.fl6_src) &&
+	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+			       &fl.fl6_dst, 0, &fl.fl6_src) < 0)
+		goto out_err;
+	if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
+		goto out_err;
+	ipv6_addr_copy(ret_saddr, &fl.fl6_src);
+	return dst;
+
+out_err:
+	dst_release(dst);
+	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
+	return NULL;
+}
+
 static struct rt6_info *
-__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+		      struct in6_addr *ret_saddr, int do_xfrm)
 {
+	struct net *net = dev_net(skb->dev);
 	struct rt6_info *rt;			/* Route to the other host */
 	struct ip_vs_dest *dest = cp->dest;
+	struct dst_entry *dst;
 
 	if (dest) {
 		spin_lock(&dest->dst_lock);
-		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
 		if (!rt) {
-			struct flowi fl = {
-				.oif = 0,
-				.nl_u = {
-					.ip6_u = {
-						.daddr = dest->addr.in6,
-						.saddr = {
-							.s6_addr32 =
-								{ 0, 0, 0, 0 },
-						},
-					},
-				},
-			};
+			u32 cookie;
 
-			rt = (struct rt6_info *)ip6_route_output(&init_net,
-								 NULL, &fl);
-			if (!rt) {
+			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
+						      &dest->dst_saddr,
+						      do_xfrm);
+			if (!dst) {
 				spin_unlock(&dest->dst_lock);
-				IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
-					     &dest->addr.in6);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
-			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
-				  &dest->addr.in6,
+			rt = (struct rt6_info *) dst;
+			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
+			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
+				  &dest->addr.in6, &dest->dst_saddr,
 				  atomic_read(&rt->dst.__refcnt));
 		}
+		if (ret_saddr)
+			ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
 		spin_unlock(&dest->dst_lock);
 	} else {
-		struct flowi fl = {
-			.oif = 0,
-			.nl_u = {
-				.ip6_u = {
-					.daddr = cp->daddr.in6,
-					.saddr = {
-						.s6_addr32 = { 0, 0, 0, 0 },
-					},
-				},
-			},
-		};
-
-		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
-		if (!rt) {
-			IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
-				     &cp->daddr.in6);
+		dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
+					      do_xfrm);
+		if (!dst)
 			return NULL;
-		}
+		rt = (struct rt6_info *) dst;
 	}
 
 	return rt;
@@ -248,6 +268,7 @@ int
 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp)
 {
+	struct net *net = dev_net(skb->dev);
 	struct rtable *rt;			/* Route to the other host */
 	struct iphdr  *iph = ip_hdr(skb);
 	u8     tos = iph->tos;
@@ -263,7 +284,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (ip_route_output_key(&init_net, &rt, &fl)) {
+	if (ip_route_output_key(net, &rt, &fl)) {
 		IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
 			     __func__, &iph->daddr);
 		goto tx_error_icmp;
@@ -313,25 +334,18 @@ int
 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		     struct ip_vs_protocol *pp)
 {
+	struct net *net = dev_net(skb->dev);
+	struct dst_entry *dst;
 	struct rt6_info *rt;			/* Route to the other host */
 	struct ipv6hdr  *iph = ipv6_hdr(skb);
 	int    mtu;
-	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = iph->daddr,
-				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
-	};
 
 	EnterFunction(10);
 
-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
-	if (!rt) {
-		IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
-			     __func__, &iph->daddr);
+	dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
+	if (!dst)
 		goto tx_error_icmp;
-	}
+	rt = (struct rt6_info *) dst;
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
@@ -397,7 +411,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
 		goto tx_error_icmp;
 
 	/* MTU checking */
@@ -472,7 +486,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	rt = __ip_vs_get_out_rt_v6(cp);
+	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
 	if (!rt)
 		goto tx_error_icmp;
 
@@ -557,7 +571,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
-	sk_buff_data_t old_transport_header = skb->transport_header;
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	int    mtu;
@@ -572,7 +585,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 	}
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
 		goto tx_error_icmp;
 
 	tdev = rt->dst.dev;
@@ -616,7 +629,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->transport_header = old_transport_header;
+	skb->transport_header = skb->network_header;
 
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
@@ -670,9 +683,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		     struct ip_vs_protocol *pp)
 {
 	struct rt6_info *rt;		/* Route to the other host */
+	struct in6_addr saddr;		/* Source for tunnel */
 	struct net_device *tdev;	/* Device to other host */
 	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
-	sk_buff_data_t old_transport_header = skb->transport_header;
 	struct ipv6hdr  *iph;		/* Our new IP header */
 	unsigned int max_headroom;	/* The extra header space needed */
 	int    mtu;
@@ -687,17 +700,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 	}
 
-	rt = __ip_vs_get_out_rt_v6(cp);
+	rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
 	if (!rt)
 		goto tx_error_icmp;
 
 	tdev = rt->dst.dev;
 
 	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
-	/* TODO IPv6: do we need this check in IPv6? */
-	if (mtu < 1280) {
+	if (mtu < IPV6_MIN_MTU) {
 		dst_release(&rt->dst);
-		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
+		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
+			     IPV6_MIN_MTU);
 		goto tx_error;
 	}
 	if (skb_dst(skb))
@@ -730,7 +743,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ipv6_hdr(skb);
 	}
 
-	skb->transport_header = old_transport_header;
+	skb->transport_header = skb->network_header;
 
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
@@ -750,8 +763,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
 	iph->priority		=	old_iph->priority;
 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
-	iph->daddr		=	rt->rt6i_dst.addr;
-	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
+	ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
+	ipv6_addr_copy(&iph->saddr, &saddr);
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
@@ -791,7 +804,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
 		goto tx_error_icmp;
 
 	/* MTU checking */
@@ -843,7 +856,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rt = __ip_vs_get_out_rt_v6(cp);
+	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
 	if (!rt)
 		goto tx_error_icmp;
 
@@ -919,7 +932,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
 		goto tx_error_icmp;
 
 	/* MTU checking */
@@ -993,7 +1006,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	rt = __ip_vs_get_out_rt_v6(cp);
+	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
 	if (!rt)
 		goto tx_error_icmp;
 
-- 
cgit v1.2.3


From daaae6b010ac0f60c9c35e481589966f9f1fcc22 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Oct 2010 11:28:15 +0200
Subject: workqueue: remove in_workqueue_context()

Commit a25909a4 (lockdep: Add an in_workqueue_context() lockdep-based
test function) added in_workqueue_context() but there hasn't been any
in-kernel user and the lockdep annotation in workqueue is scheduled to
change.  Remove the unused function.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/workqueue.h |  4 ----
 kernel/workqueue.c        | 15 ---------------
 2 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 03bbe903e5ce..070bb7a88936 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -430,8 +430,4 @@ extern bool freeze_workqueues_busy(void);
 extern void thaw_workqueues(void);
 #endif /* CONFIG_FREEZER */
 
-#ifdef CONFIG_LOCKDEP
-int in_workqueue_context(struct workqueue_struct *wq);
-#endif
-
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eb5c1972443a..30acdb74cc23 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -310,21 +310,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_NONE;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
 
-#ifdef CONFIG_LOCKDEP
-/**
- * in_workqueue_context() - in context of specified workqueue?
- * @wq: the workqueue of interest
- *
- * Checks lockdep state to see if the current task is executing from
- * within a workqueue item.  This function exists only if lockdep is
- * enabled.
- */
-int in_workqueue_context(struct workqueue_struct *wq)
-{
-	return lock_is_held(&wq->lockdep_map);
-}
-#endif
-
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
-- 
cgit v1.2.3


From 5ba8b1c6fe40c314a02e28553c25552d8f1442e7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 18 Oct 2010 08:42:48 -0700
Subject: ACPI: remove dead code

Found by running make namespacecheck on linux-next

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c               |  25 ---------
 drivers/acpi/processor_driver.c  |   2 -
 drivers/acpi/processor_thermal.c | 107 ---------------------------------------
 include/acpi/acpi_drivers.h      |   2 -
 include/acpi/acpiosxf.h          |   2 -
 include/linux/acpi.h             |   8 ---
 6 files changed, 146 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 65b25a303b86..82097fd5a75b 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -779,16 +779,6 @@ void acpi_os_wait_events_complete(void *context)
 
 EXPORT_SYMBOL(acpi_os_wait_events_complete);
 
-/*
- * Allocate the memory for a spinlock and initialize it.
- */
-acpi_status acpi_os_create_lock(acpi_spinlock * handle)
-{
-	spin_lock_init(*handle);
-
-	return AE_OK;
-}
-
 /*
  * Deallocate the memory for a spinlock.
  */
@@ -1152,21 +1142,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
 }
 EXPORT_SYMBOL(acpi_check_region);
 
-int acpi_check_mem_region(resource_size_t start, resource_size_t n,
-		      const char *name)
-{
-	struct resource res = {
-		.start = start,
-		.end   = start + n - 1,
-		.name  = name,
-		.flags = IORESOURCE_MEM,
-	};
-
-	return acpi_check_resource_conflict(&res);
-
-}
-EXPORT_SYMBOL(acpi_check_mem_region);
-
 /*
  * Let drivers know whether the resource checks are effective
  */
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 347eb21b2353..8d924a8e4e0b 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -899,6 +899,4 @@ static void __exit acpi_processor_exit(void)
 module_init(acpi_processor_init);
 module_exit(acpi_processor_exit);
 
-EXPORT_SYMBOL(acpi_processor_set_thermal_limit);
-
 MODULE_ALIAS("processor");
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 953b25fb9869..419f651b63f3 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -238,113 +238,6 @@ static int acpi_thermal_cpufreq_decrease(unsigned int cpu)
 
 #endif
 
-int acpi_processor_set_thermal_limit(acpi_handle handle, int type)
-{
-	int result = 0;
-	struct acpi_processor *pr = NULL;
-	struct acpi_device *device = NULL;
-	int tx = 0, max_tx_px = 0;
-
-
-	if ((type < ACPI_PROCESSOR_LIMIT_NONE)
-	    || (type > ACPI_PROCESSOR_LIMIT_DECREMENT))
-		return -EINVAL;
-
-	result = acpi_bus_get_device(handle, &device);
-	if (result)
-		return result;
-
-	pr = acpi_driver_data(device);
-	if (!pr)
-		return -ENODEV;
-
-	/* Thermal limits are always relative to the current Px/Tx state. */
-	if (pr->flags.throttling)
-		pr->limit.thermal.tx = pr->throttling.state;
-
-	/*
-	 * Our default policy is to only use throttling at the lowest
-	 * performance state.
-	 */
-
-	tx = pr->limit.thermal.tx;
-
-	switch (type) {
-
-	case ACPI_PROCESSOR_LIMIT_NONE:
-		do {
-			result = acpi_thermal_cpufreq_decrease(pr->id);
-		} while (!result);
-		tx = 0;
-		break;
-
-	case ACPI_PROCESSOR_LIMIT_INCREMENT:
-		/* if going up: P-states first, T-states later */
-
-		result = acpi_thermal_cpufreq_increase(pr->id);
-		if (!result)
-			goto end;
-		else if (result == -ERANGE)
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-					  "At maximum performance state\n"));
-
-		if (pr->flags.throttling) {
-			if (tx == (pr->throttling.state_count - 1))
-				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-						  "At maximum throttling state\n"));
-			else
-				tx++;
-		}
-		break;
-
-	case ACPI_PROCESSOR_LIMIT_DECREMENT:
-		/* if going down: T-states first, P-states later */
-
-		if (pr->flags.throttling) {
-			if (tx == 0) {
-				max_tx_px = 1;
-				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-						  "At minimum throttling state\n"));
-			} else {
-				tx--;
-				goto end;
-			}
-		}
-
-		result = acpi_thermal_cpufreq_decrease(pr->id);
-		if (result) {
-			/*
-			 * We only could get -ERANGE, 1 or 0.
-			 * In the first two cases we reached max freq again.
-			 */
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-					  "At minimum performance state\n"));
-			max_tx_px = 1;
-		} else
-			max_tx_px = 0;
-
-		break;
-	}
-
-      end:
-	if (pr->flags.throttling) {
-		pr->limit.thermal.px = 0;
-		pr->limit.thermal.tx = tx;
-
-		result = acpi_processor_apply_limit(pr);
-		if (result)
-			printk(KERN_ERR PREFIX "Unable to set thermal limit\n");
-
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Thermal limit now (P%d:T%d)\n",
-				  pr->limit.thermal.px, pr->limit.thermal.tx));
-	} else
-		result = 0;
-	if (max_tx_px)
-		return 1;
-	else
-		return result;
-}
-
 int acpi_processor_get_limit_info(struct acpi_processor *pr)
 {
 
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 23d78b4d088b..3090471b2a5e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -115,8 +115,6 @@ void pci_acpi_crs_quirks(void);
 #define ACPI_PROCESSOR_LIMIT_INCREMENT	0x01
 #define ACPI_PROCESSOR_LIMIT_DECREMENT	0x02
 
-int acpi_processor_set_thermal_limit(acpi_handle handle, int type);
-
 /*--------------------------------------------------------------------------
                                   Dock Station
   -------------------------------------------------------------------------- */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 29bf945143e8..782acdf0286d 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -98,8 +98,6 @@ acpi_os_table_override(struct acpi_table_header *existing_table,
 /*
  * Spinlock primitives
  */
-acpi_status acpi_os_create_lock(acpi_spinlock * out_handle);
-
 void acpi_os_delete_lock(acpi_spinlock handle);
 
 acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c227757feb06..659c743b63e3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -245,8 +245,6 @@ int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
-int acpi_check_mem_region(resource_size_t start, resource_size_t n,
-		      const char *name);
 
 int acpi_resources_are_enforced(void);
 
@@ -344,12 +342,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n,
 	return 0;
 }
 
-static inline int acpi_check_mem_region(resource_size_t start,
-					resource_size_t n, const char *name)
-{
-	return 0;
-}
-
 struct acpi_table_header;
 static inline int acpi_table_parse(char *id,
 				int (*handler)(struct acpi_table_header *))
-- 
cgit v1.2.3


From e6484930d7c73d324bccda7d43d131088da697b9 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 18 Oct 2010 18:04:39 +0000
Subject: net: allocate tx queues in register_netdevice

This patch introduces netif_alloc_netdev_queues which is called from
register_device instead of alloc_netdev_mq.  This makes TX queue
allocation symmetric with RX allocation.  Also, queue locks allocation
is done in netdev_init_one_queue.  Change set_real_num_tx_queues to
fail if requested number < 1 or greater than number of allocated
queues.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   4 +-
 net/core/dev.c            | 106 +++++++++++++++++++++++-----------------------
 2 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14fbb04c459d..880d56565828 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1696,8 +1696,8 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 	return dev->num_tx_queues > 1;
 }
 
-extern void netif_set_real_num_tx_queues(struct net_device *dev,
-					 unsigned int txq);
+extern int netif_set_real_num_tx_queues(struct net_device *dev,
+					unsigned int txq);
 
 #ifdef CONFIG_RPS
 extern int netif_set_real_num_rx_queues(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index d33adecec44b..4c3ac53e4b16 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,18 +1553,20 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
-	unsigned int real_num = dev->real_num_tx_queues;
+	if (txq < 1 || txq > dev->num_tx_queues)
+		return -EINVAL;
 
-	if (unlikely(txq > dev->num_tx_queues))
-		;
-	else if (txq > real_num)
-		dev->real_num_tx_queues = txq;
-	else if (txq < real_num) {
-		dev->real_num_tx_queues = txq;
-		qdisc_reset_all_tx_gt(dev, txq);
+	if (dev->reg_state == NETREG_REGISTERED) {
+		ASSERT_RTNL();
+
+		if (txq < dev->real_num_tx_queues)
+			qdisc_reset_all_tx_gt(dev, txq);
 	}
+
+	dev->real_num_tx_queues = txq;
+	return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
@@ -4928,20 +4930,6 @@ static void rollback_registered(struct net_device *dev)
 	rollback_registered_many(&single);
 }
 
-static void __netdev_init_queue_locks_one(struct net_device *dev,
-					  struct netdev_queue *dev_queue,
-					  void *_unused)
-{
-	spin_lock_init(&dev_queue->_xmit_lock);
-	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
-	dev_queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queue_locks(struct net_device *dev)
-{
-	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-}
-
 unsigned long netdev_fix_features(unsigned long features, const char *name)
 {
 	/* Fix illegal SG+CSUM combinations. */
@@ -5034,6 +5022,41 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	return 0;
 }
 
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+	unsigned int count = dev->num_tx_queues;
+	struct netdev_queue *tx;
+
+	BUG_ON(count < 1);
+
+	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+	if (!tx) {
+		pr_err("netdev: Unable to allocate %u tx queues.\n",
+		       count);
+		return -ENOMEM;
+	}
+	dev->_tx = tx;
+	return 0;
+}
+
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue,
+				  void *_unused)
+{
+	queue->dev = dev;
+
+	/* Initialize queue lock */
+	spin_lock_init(&queue->_xmit_lock);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
+}
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -5067,7 +5090,6 @@ int register_netdevice(struct net_device *dev)
 
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
-	netdev_init_queue_locks(dev);
 
 	dev->iflink = -1;
 
@@ -5075,6 +5097,12 @@ int register_netdevice(struct net_device *dev)
 	if (ret)
 		goto out;
 
+	ret = netif_alloc_netdev_queues(dev);
+	if (ret)
+		goto out;
+
+	netdev_init_queues(dev);
+
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5456,19 +5484,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_get_stats);
 
-static void netdev_init_one_queue(struct net_device *dev,
-				  struct netdev_queue *queue,
-				  void *_unused)
-{
-	queue->dev = dev;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
-	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
-	spin_lock_init(&dev->tx_global_lock);
-}
-
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 {
 	struct netdev_queue *queue = dev_ingress_queue(dev);
@@ -5480,7 +5495,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 	if (!queue)
 		return NULL;
 	netdev_init_one_queue(dev, queue, NULL);
-	__netdev_init_queue_locks_one(dev, queue, NULL);
 	queue->qdisc = &noop_qdisc;
 	queue->qdisc_sleeping = &noop_qdisc;
 	rcu_assign_pointer(dev->ingress_queue, queue);
@@ -5502,7 +5516,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
-	struct netdev_queue *tx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
@@ -5530,20 +5543,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
-	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
-	if (!tx) {
-		printk(KERN_ERR "alloc_netdev: Unable to allocate "
-		       "tx qdiscs.\n");
-		goto free_p;
-	}
-
-
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
-		goto free_tx;
+		goto free_p;
 
 	if (dev_addr_init(dev))
 		goto free_pcpu;
@@ -5553,7 +5558,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
-	dev->_tx = tx;
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
@@ -5564,8 +5568,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
-	netdev_init_queues(dev);
-
 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
 	dev->ethtool_ntuple_list.count = 0;
 	INIT_LIST_HEAD(&dev->napi_list);
@@ -5576,8 +5578,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	strcpy(dev->name, name);
 	return dev;
 
-free_tx:
-	kfree(tx);
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 free_p:
-- 
cgit v1.2.3


From 27b75c95f10d249574d9c4cb9dab878107faede8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 15 Oct 2010 05:44:11 +0000
Subject: net: avoid RCU for NOCACHE dst

There is no point using RCU for dst we allocate for a very short time
(used once).

Change dst_release() to take DST_NOCACHE into account, but also change
skb_dst_set_noref() to force a refcount increment for such dst.

This is a _huge_ gain, because we dont waste memory to store xx thousand
of dsts. Instead of queueing them to RCU, we can free them instantly.

CPU caches can stay hot, re-using same memory blocks to hold temporary
dsts.

Note : remove unneeded smp_mb__before_atomic_dec(); in dst_release(),
since atomic_dec_return() implies a full memory barrier.

Stress test, 160.000.000 udp frames sent, IP route cache disabled
(DDOS).

Before:

real    0m38.091s
user    0m13.189s
sys     7m53.018s

After:

real	0m29.946s
user	0m12.157s
sys	7m40.605s

For reference, if IP route cache was enabled :

real	0m32.030s
user	0m10.521s
sys	8m15.243s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 14 +-------------
 net/core/dst.c         | 29 ++++++++++++++++++++++++++++-
 net/ipv4/route.c       |  9 ++++-----
 3 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 05a358f1ba11..e6ba898de61c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 	skb->_skb_refdst = (unsigned long)dst;
 }
 
-/**
- * skb_dst_set_noref - sets skb dst, without a reference
- * @skb: buffer
- * @dst: dst entry
- *
- * Sets skb dst, assuming a reference was not taken on dst
- * skb_dst_drop() should not dst_release() this dst
- */
-static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
-{
-	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
-	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
-}
+extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
 
 /**
  * skb_dst_is_noref - Test if skb dst isnt refcounted
diff --git a/net/core/dst.c b/net/core/dst.c
index 32e542d7f472..8abe628b79f1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
 	if (dst) {
 		int newrefcnt;
 
-		smp_mb__before_atomic_dec();
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
 		WARN_ON(newrefcnt < 0);
+		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
+			dst = dst_destroy(dst);
+			if (dst)
+				__dst_free(dst);
+		}
 	}
 }
 EXPORT_SYMBOL(dst_release);
 
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	/* If dst not in cache, we must take a reference, because
+	 * dst_release() will destroy dst as soon as its refcount becomes zero
+	 */
+	if (unlikely(dst->flags & DST_NOCACHE)) {
+		dst_hold(dst);
+		skb_dst_set(skb, dst);
+	} else {
+		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+	}
+}
+EXPORT_SYMBOL(skb_dst_set_noref);
+
 /* Dirty hack. We did it in 2.2 (in __dst_free),
  * we have _very_ good reasons not to repeat
  * this mistake in 2.3, but we have no choice
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff98983d2a45..d6cb2bfcd8e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1105,9 +1105,9 @@ restart:
 		 * Note that we do rt_free on this new route entry, so that
 		 * once its refcount hits zero, we are still able to reap it
 		 * (Thanks Alexey)
-		 * Note also the rt_free uses call_rcu.  We don't actually
-		 * need rcu protection here, this is just our path to get
-		 * on the route gc list.
+		 * Note: To avoid expensive rcu stuff for this uncached dst,
+		 * we set DST_NOCACHE so that dst_release() can free dst without
+		 * waiting a grace period.
 		 */
 
 		rt->dst.flags |= DST_NOCACHE;
@@ -1117,12 +1117,11 @@ restart:
 				if (net_ratelimit())
 					printk(KERN_WARNING
 					    "Neighbour table failure & not caching routes.\n");
-				rt_drop(rt);
+				ip_rt_put(rt);
 				return err;
 			}
 		}
 
-		rt_free(rt);
 		goto skip_hashing;
 	}
 
-- 
cgit v1.2.3


From afcc5c6872f0215d515a637041bb51f8691a8ea7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 20 Oct 2010 13:37:56 -0400
Subject: ring-buffer: Remove ring_buffer_event_time_delta()

The ring_buffer_event_time_delta() static inline function does not
have any users. Remove it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 25b4f686d918..8d3a2486544d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -62,18 +62,6 @@ enum ring_buffer_type {
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
 void *ring_buffer_event_data(struct ring_buffer_event *event);
 
-/**
- * ring_buffer_event_time_delta - return the delta timestamp of the event
- * @event: the event to get the delta timestamp of
- *
- * The delta timestamp is the 27 bit timestamp since the last event.
- */
-static inline unsigned
-ring_buffer_event_time_delta(struct ring_buffer_event *event)
-{
-	return event->time_delta;
-}
-
 /*
  * ring_buffer_discard_commit will remove an event that has not
  *   ben committed yet. If this is used, then ring_buffer_unlock_commit
-- 
cgit v1.2.3


From c957ef2c59e952803766ddc22e89981ab534606f Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 20 Oct 2010 11:07:02 +0800
Subject: percpu: Introduce a read-mostly percpu API

Add a new readmostly percpu section and API.  This can be used to
avoid dirtying data lines which are generally not written to, which is
especially important for data which may be accessed by processors
other than the one for which the percpu area belongs to.

[ hpa: moved it *after* the page-aligned section, for obvious
  reasons. ]

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/asm-generic/vmlinux.lds.h | 4 ++++
 include/linux/percpu-defs.h       | 9 +++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a92a170fb7d..d7e7b21511b1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -677,7 +677,9 @@
 				- LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data..percpu..first)					\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.data..percpu..page_aligned)				\
+		*(.data..percpu..readmostly)				\
 		*(.data..percpu)					\
 		*(.data..percpu..shared_aligned)			\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
@@ -703,6 +705,8 @@
 		VMLINUX_SYMBOL(__per_cpu_load) = .;			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data..percpu..first)					\
+		. = ALIGN(PAGE_SIZE);					\
+		*(.data..percpu..readmostly)				\
 		*(.data..percpu..page_aligned)				\
 		*(.data..percpu)					\
 		*(.data..percpu..shared_aligned)			\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ce2dc655cd1d..27ef6b190ea6 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -138,6 +138,15 @@
 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
 	__aligned(PAGE_SIZE)
 
+/*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+ */
+#define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
+	DECLARE_PER_CPU_SECTION(type, name, "..readmostly")
+
+#define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
+	DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
+
 /*
  * Intermodule exports for per-CPU variables.  sparse forgets about
  * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
-- 
cgit v1.2.3


From 3d14c5d2b6e15c21d8e5467dc62d33127c23a644 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Tue, 6 Apr 2010 15:14:15 -0700
Subject: ceph: factor out libceph from Ceph file system

This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph.  This
is mostly a matter of moving files around.  However, a few key pieces
of the interface change as well:

 - ceph_client becomes ceph_fs_client and ceph_client, where the latter
   captures the mon and osd clients, and the fs_client gets the mds client
   and file system specific pieces.
 - Mount option parsing and debugfs setup is correspondingly broken into
   two pieces.
 - The mon client gets a generic handler callback for otherwise unknown
   messages (mds map, in this case).
 - The basic supported/required feature bits can be expanded (and are by
   ceph_fs_client).

No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 MAINTAINERS                     |    2 +
 fs/ceph/Kconfig                 |   14 +-
 fs/ceph/Makefile                |   11 +-
 fs/ceph/README                  |   20 -
 fs/ceph/addr.c                  |   65 +-
 fs/ceph/armor.c                 |  103 --
 fs/ceph/auth.c                  |  259 -----
 fs/ceph/auth.h                  |   92 --
 fs/ceph/auth_none.c             |  131 ---
 fs/ceph/auth_none.h             |   30 -
 fs/ceph/auth_x.c                |  687 -----------
 fs/ceph/auth_x.h                |   49 -
 fs/ceph/auth_x_protocol.h       |   90 --
 fs/ceph/buffer.c                |   65 --
 fs/ceph/buffer.h                |   39 -
 fs/ceph/caps.c                  |   35 +-
 fs/ceph/ceph_debug.h            |   37 -
 fs/ceph/ceph_frag.c             |    3 +-
 fs/ceph/ceph_frag.h             |  109 --
 fs/ceph/ceph_fs.c               |   72 --
 fs/ceph/ceph_fs.h               |  728 ------------
 fs/ceph/ceph_hash.c             |  118 --
 fs/ceph/ceph_hash.h             |   13 -
 fs/ceph/ceph_strings.c          |  193 ---
 fs/ceph/crush/crush.c           |  151 ---
 fs/ceph/crush/crush.h           |  180 ---
 fs/ceph/crush/hash.c            |  149 ---
 fs/ceph/crush/hash.h            |   17 -
 fs/ceph/crush/mapper.c          |  609 ----------
 fs/ceph/crush/mapper.h          |   20 -
 fs/ceph/crypto.c                |  412 -------
 fs/ceph/crypto.h                |   48 -
 fs/ceph/debugfs.c               |  407 ++-----
 fs/ceph/decode.h                |  201 ----
 fs/ceph/dir.c                   |   55 +-
 fs/ceph/export.c                |    5 +-
 fs/ceph/file.c                  |  207 +---
 fs/ceph/inode.c                 |   19 +-
 fs/ceph/ioctl.c                 |   11 +-
 fs/ceph/locks.c                 |    6 +-
 fs/ceph/mds_client.c            |   86 +-
 fs/ceph/mds_client.h            |   20 +-
 fs/ceph/mdsmap.c                |   11 +-
 fs/ceph/mdsmap.h                |   62 -
 fs/ceph/messenger.c             | 2432 --------------------------------------
 fs/ceph/messenger.h             |  257 ----
 fs/ceph/mon_client.c            | 1018 ----------------
 fs/ceph/mon_client.h            |  121 --
 fs/ceph/msgpool.c               |   64 -
 fs/ceph/msgpool.h               |   25 -
 fs/ceph/msgr.h                  |  175 ---
 fs/ceph/osd_client.c            | 1771 ----------------------------
 fs/ceph/osd_client.h            |  233 ----
 fs/ceph/osdmap.c                | 1123 ------------------
 fs/ceph/osdmap.h                |  130 ---
 fs/ceph/pagelist.c              |   63 -
 fs/ceph/pagelist.h              |   54 -
 fs/ceph/rados.h                 |  405 -------
 fs/ceph/snap.c                  |   10 +-
 fs/ceph/strings.c               |  117 ++
 fs/ceph/super.c                 | 1154 ++++++++----------
 fs/ceph/super.h                 |  397 +++----
 fs/ceph/types.h                 |   29 -
 fs/ceph/xattr.c                 |   15 +-
 include/linux/ceph/auth.h       |   92 ++
 include/linux/ceph/buffer.h     |   39 +
 include/linux/ceph/ceph_debug.h |   38 +
 include/linux/ceph/ceph_frag.h  |  109 ++
 include/linux/ceph/ceph_fs.h    |  728 ++++++++++++
 include/linux/ceph/ceph_hash.h  |   13 +
 include/linux/ceph/debugfs.h    |   33 +
 include/linux/ceph/decode.h     |  201 ++++
 include/linux/ceph/libceph.h    |  249 ++++
 include/linux/ceph/mdsmap.h     |   62 +
 include/linux/ceph/messenger.h  |  261 +++++
 include/linux/ceph/mon_client.h |  122 ++
 include/linux/ceph/msgpool.h    |   25 +
 include/linux/ceph/msgr.h       |  175 +++
 include/linux/ceph/osd_client.h |  234 ++++
 include/linux/ceph/osdmap.h     |  130 +++
 include/linux/ceph/pagelist.h   |   54 +
 include/linux/ceph/rados.h      |  405 +++++++
 include/linux/ceph/types.h      |   29 +
 include/linux/crush/crush.h     |  180 +++
 include/linux/crush/hash.h      |   17 +
 include/linux/crush/mapper.h    |   20 +
 net/Kconfig                     |    1 +
 net/Makefile                    |    1 +
 net/ceph/Kconfig                |   28 +
 net/ceph/Makefile               |   37 +
 net/ceph/armor.c                |  103 ++
 net/ceph/auth.c                 |  259 +++++
 net/ceph/auth_none.c            |  132 +++
 net/ceph/auth_none.h            |   29 +
 net/ceph/auth_x.c               |  688 +++++++++++
 net/ceph/auth_x.h               |   50 +
 net/ceph/auth_x_protocol.h      |   90 ++
 net/ceph/buffer.c               |   68 ++
 net/ceph/ceph_common.c          |  529 +++++++++
 net/ceph/ceph_fs.c              |   75 ++
 net/ceph/ceph_hash.c            |  118 ++
 net/ceph/ceph_strings.c         |   84 ++
 net/ceph/crush/crush.c          |  151 +++
 net/ceph/crush/hash.c           |  149 +++
 net/ceph/crush/mapper.c         |  609 ++++++++++
 net/ceph/crypto.c               |  412 +++++++
 net/ceph/crypto.h               |   48 +
 net/ceph/debugfs.c              |  268 +++++
 net/ceph/messenger.c            | 2453 +++++++++++++++++++++++++++++++++++++++
 net/ceph/mon_client.c           | 1027 ++++++++++++++++
 net/ceph/msgpool.c              |   64 +
 net/ceph/osd_client.c           | 1773 ++++++++++++++++++++++++++++
 net/ceph/osdmap.c               | 1128 ++++++++++++++++++
 net/ceph/pagelist.c             |   65 ++
 net/ceph/pagevec.c              |  223 ++++
 115 files changed, 14920 insertions(+), 14192 deletions(-)
 delete mode 100644 fs/ceph/README
 delete mode 100644 fs/ceph/armor.c
 delete mode 100644 fs/ceph/auth.c
 delete mode 100644 fs/ceph/auth.h
 delete mode 100644 fs/ceph/auth_none.c
 delete mode 100644 fs/ceph/auth_none.h
 delete mode 100644 fs/ceph/auth_x.c
 delete mode 100644 fs/ceph/auth_x.h
 delete mode 100644 fs/ceph/auth_x_protocol.h
 delete mode 100644 fs/ceph/buffer.c
 delete mode 100644 fs/ceph/buffer.h
 delete mode 100644 fs/ceph/ceph_debug.h
 delete mode 100644 fs/ceph/ceph_frag.h
 delete mode 100644 fs/ceph/ceph_fs.c
 delete mode 100644 fs/ceph/ceph_fs.h
 delete mode 100644 fs/ceph/ceph_hash.c
 delete mode 100644 fs/ceph/ceph_hash.h
 delete mode 100644 fs/ceph/ceph_strings.c
 delete mode 100644 fs/ceph/crush/crush.c
 delete mode 100644 fs/ceph/crush/crush.h
 delete mode 100644 fs/ceph/crush/hash.c
 delete mode 100644 fs/ceph/crush/hash.h
 delete mode 100644 fs/ceph/crush/mapper.c
 delete mode 100644 fs/ceph/crush/mapper.h
 delete mode 100644 fs/ceph/crypto.c
 delete mode 100644 fs/ceph/crypto.h
 delete mode 100644 fs/ceph/decode.h
 delete mode 100644 fs/ceph/mdsmap.h
 delete mode 100644 fs/ceph/messenger.c
 delete mode 100644 fs/ceph/messenger.h
 delete mode 100644 fs/ceph/mon_client.c
 delete mode 100644 fs/ceph/mon_client.h
 delete mode 100644 fs/ceph/msgpool.c
 delete mode 100644 fs/ceph/msgpool.h
 delete mode 100644 fs/ceph/msgr.h
 delete mode 100644 fs/ceph/osd_client.c
 delete mode 100644 fs/ceph/osd_client.h
 delete mode 100644 fs/ceph/osdmap.c
 delete mode 100644 fs/ceph/osdmap.h
 delete mode 100644 fs/ceph/pagelist.c
 delete mode 100644 fs/ceph/pagelist.h
 delete mode 100644 fs/ceph/rados.h
 create mode 100644 fs/ceph/strings.c
 delete mode 100644 fs/ceph/types.h
 create mode 100644 include/linux/ceph/auth.h
 create mode 100644 include/linux/ceph/buffer.h
 create mode 100644 include/linux/ceph/ceph_debug.h
 create mode 100644 include/linux/ceph/ceph_frag.h
 create mode 100644 include/linux/ceph/ceph_fs.h
 create mode 100644 include/linux/ceph/ceph_hash.h
 create mode 100644 include/linux/ceph/debugfs.h
 create mode 100644 include/linux/ceph/decode.h
 create mode 100644 include/linux/ceph/libceph.h
 create mode 100644 include/linux/ceph/mdsmap.h
 create mode 100644 include/linux/ceph/messenger.h
 create mode 100644 include/linux/ceph/mon_client.h
 create mode 100644 include/linux/ceph/msgpool.h
 create mode 100644 include/linux/ceph/msgr.h
 create mode 100644 include/linux/ceph/osd_client.h
 create mode 100644 include/linux/ceph/osdmap.h
 create mode 100644 include/linux/ceph/pagelist.h
 create mode 100644 include/linux/ceph/rados.h
 create mode 100644 include/linux/ceph/types.h
 create mode 100644 include/linux/crush/crush.h
 create mode 100644 include/linux/crush/hash.h
 create mode 100644 include/linux/crush/mapper.h
 create mode 100644 net/ceph/Kconfig
 create mode 100644 net/ceph/Makefile
 create mode 100644 net/ceph/armor.c
 create mode 100644 net/ceph/auth.c
 create mode 100644 net/ceph/auth_none.c
 create mode 100644 net/ceph/auth_none.h
 create mode 100644 net/ceph/auth_x.c
 create mode 100644 net/ceph/auth_x.h
 create mode 100644 net/ceph/auth_x_protocol.h
 create mode 100644 net/ceph/buffer.c
 create mode 100644 net/ceph/ceph_common.c
 create mode 100644 net/ceph/ceph_fs.c
 create mode 100644 net/ceph/ceph_hash.c
 create mode 100644 net/ceph/ceph_strings.c
 create mode 100644 net/ceph/crush/crush.c
 create mode 100644 net/ceph/crush/hash.c
 create mode 100644 net/ceph/crush/mapper.c
 create mode 100644 net/ceph/crypto.c
 create mode 100644 net/ceph/crypto.h
 create mode 100644 net/ceph/debugfs.c
 create mode 100644 net/ceph/messenger.c
 create mode 100644 net/ceph/mon_client.c
 create mode 100644 net/ceph/msgpool.c
 create mode 100644 net/ceph/osd_client.c
 create mode 100644 net/ceph/osdmap.c
 create mode 100644 net/ceph/pagelist.c
 create mode 100644 net/ceph/pagevec.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 7679bf32f7bb..48d05654d456 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1527,6 +1527,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 S:	Supported
 F:	Documentation/filesystems/ceph.txt
 F:	fs/ceph
+F:	net/ceph
+F:	include/linux/ceph
 
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 M:	David Vrabel <david.vrabel@csr.com>
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 0fcd2640c23f..9eb134ea6eb2 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,9 +1,11 @@
 config CEPH_FS
         tristate "Ceph distributed file system (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
+	select CEPH_LIB
 	select LIBCRC32C
 	select CRYPTO_AES
 	select CRYPTO
+	default n
 	help
 	  Choose Y or M here to include support for mounting the
 	  experimental Ceph distributed file system.  Ceph is an extremely
@@ -14,15 +16,3 @@ config CEPH_FS
 
 	  If unsure, say N.
 
-config CEPH_FS_PRETTYDEBUG
-	bool "Include file:line in ceph debug output"
-	depends on CEPH_FS
-	default n
-	help
-	  If you say Y here, debug output will include a filename and
-	  line to aid debugging.  This icnreases kernel size and slows
-	  execution slightly when debug call sites are enabled (e.g.,
-	  via CONFIG_DYNAMIC_DEBUG).
-
-	  If unsure, say N.
-
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 278e1172600d..9e6c4f2e8ff1 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 	export.o caps.o snap.o xattr.o \
-	messenger.o msgpool.o buffer.o pagelist.o \
-	mds_client.o mdsmap.o \
-	mon_client.o \
-	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
-	debugfs.o \
-	auth.o auth_none.o \
-	crypto.o armor.o \
-	auth_x.o \
-	ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
+	mds_client.o mdsmap.o strings.o ceph_frag.o \
+	debugfs.o
 
 else
 #Otherwise we were called directly from the command
diff --git a/fs/ceph/README b/fs/ceph/README
deleted file mode 100644
index 18352fab37c0..000000000000
--- a/fs/ceph/README
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# The following files are shared by (and manually synchronized
-# between) the Ceph userland and kernel client.
-#
-# userland                  kernel
-src/include/ceph_fs.h	    fs/ceph/ceph_fs.h
-src/include/ceph_fs.cc	    fs/ceph/ceph_fs.c
-src/include/msgr.h	    fs/ceph/msgr.h
-src/include/rados.h	    fs/ceph/rados.h
-src/include/ceph_strings.cc fs/ceph/ceph_strings.c
-src/include/ceph_frag.h	    fs/ceph/ceph_frag.h
-src/include/ceph_frag.cc    fs/ceph/ceph_frag.c
-src/include/ceph_hash.h	    fs/ceph/ceph_hash.h
-src/include/ceph_hash.cc    fs/ceph/ceph_hash.c
-src/crush/crush.c	    fs/ceph/crush/crush.c
-src/crush/crush.h	    fs/ceph/crush/crush.h
-src/crush/mapper.c	    fs/ceph/crush/mapper.c
-src/crush/mapper.h	    fs/ceph/crush/mapper.h
-src/crush/hash.h	    fs/ceph/crush/hash.h
-src/crush/hash.c	    fs/ceph/crush/hash.c
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index efbc604001c8..51bcc5ce3230 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
@@ -10,7 +10,8 @@
 #include <linux/task_io_accounting_ops.h>
 
 #include "super.h"
-#include "osd_client.h"
+#include "mds_client.h"
+#include <linux/ceph/osd_client.h>
 
 /*
  * Ceph address space ops.
@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
+	struct ceph_osd_client *osdc = 
+		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
 	u64 len = PAGE_CACHE_SIZE;
 
@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
+	struct ceph_osd_client *osdc =
+		&ceph_inode_to_client(inode)->client->osdc;
 	int rc = 0;
 	struct page **pages;
 	loff_t offset;
@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 {
 	struct inode *inode;
 	struct ceph_inode_info *ci;
-	struct ceph_client *client;
+	struct ceph_fs_client *fsc;
 	struct ceph_osd_client *osdc;
 	loff_t page_off = page->index << PAGE_CACHE_SHIFT;
 	int len = PAGE_CACHE_SIZE;
@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	}
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
-	client = ceph_inode_to_client(inode);
-	osdc = &client->osdc;
+	fsc = ceph_inode_to_client(inode);
+	osdc = &fsc->client->osdc;
 
 	/* verify this is a writeable snap context */
 	snapc = (void *)page->private;
@@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
 	     inode, page, page->index, page_off, len, snapc);
 
-	writeback_stat = atomic_long_inc_return(&client->writeback_count);
+	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
 	if (writeback_stat >
-	    CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
-		set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
+	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
+		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
 
 	set_page_writeback(page);
 	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req,
 	struct address_space *mapping = inode->i_mapping;
 	__s32 rc = -EIO;
 	u64 bytes = 0;
-	struct ceph_client *client = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	long writeback_stat;
 	unsigned issued = ceph_caps_issued(ci);
 
@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req,
 		WARN_ON(!PageUptodate(page));
 
 		writeback_stat =
-			atomic_long_dec_return(&client->writeback_count);
+			atomic_long_dec_return(&fsc->writeback_count);
 		if (writeback_stat <
-		    CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
-			clear_bdi_congested(&client->backing_dev_info,
+		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+			clear_bdi_congested(&fsc->backing_dev_info,
 					    BLK_RW_ASYNC);
 
 		ceph_put_snap_context((void *)page->private);
@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req,
  * mempool.  we avoid the mempool if we can because req->r_num_pages
  * may be less than the maximum write size.
  */
-static void alloc_page_vec(struct ceph_client *client,
+static void alloc_page_vec(struct ceph_fs_client *fsc,
 			   struct ceph_osd_request *req)
 {
 	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
 			       GFP_NOFS);
 	if (!req->r_pages) {
-		req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS);
+		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
 		req->r_pages_from_pool = 1;
 		WARN_ON(!req->r_pages);
 	}
@@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_client *client;
+	struct ceph_fs_client *fsc;
 	pgoff_t index, start, end;
 	int range_whole = 0;
 	int should_loop = 1;
@@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	client = ceph_inode_to_client(inode);
-	if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
+	fsc = ceph_inode_to_client(inode);
+	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
 		pr_warning("writepage_start %p on forced umount\n", inode);
 		return -EIO; /* we're in a forced umount, don't write! */
 	}
-	if (client->mount_args->wsize && client->mount_args->wsize < wsize)
-		wsize = client->mount_args->wsize;
+	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
+		wsize = fsc->mount_options->wsize;
 	if (wsize < PAGE_CACHE_SIZE)
 		wsize = PAGE_CACHE_SIZE;
 	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
@@ -769,7 +772,7 @@ get_more_pages:
 				offset = (unsigned long long)page->index
 					<< PAGE_CACHE_SHIFT;
 				len = wsize;
-				req = ceph_osdc_new_request(&client->osdc,
+				req = ceph_osdc_new_request(&fsc->client->osdc,
 					    &ci->i_layout,
 					    ceph_vino(inode),
 					    offset, &len,
@@ -782,7 +785,7 @@ get_more_pages:
 					    &inode->i_mtime, true, 1);
 				max_pages = req->r_num_pages;
 
-				alloc_page_vec(client, req);
+				alloc_page_vec(fsc, req);
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
 			}
@@ -794,10 +797,10 @@ get_more_pages:
 			     inode, page, page->index);
 
 			writeback_stat =
-			       atomic_long_inc_return(&client->writeback_count);
+			       atomic_long_inc_return(&fsc->writeback_count);
 			if (writeback_stat > CONGESTION_ON_THRESH(
-				    client->mount_args->congestion_kb)) {
-				set_bdi_congested(&client->backing_dev_info,
+				    fsc->mount_options->congestion_kb)) {
+				set_bdi_congested(&fsc->backing_dev_info,
 						  BLK_RW_ASYNC);
 			}
 
@@ -846,7 +849,7 @@ get_more_pages:
 		op->payload_len = cpu_to_le32(len);
 		req->r_request->hdr.data_len = cpu_to_le32(len);
 
-		ceph_osdc_start_request(&client->osdc, req, true);
+		ceph_osdc_start_request(&fsc->client->osdc, req, true);
 		req = NULL;
 
 		/* continue? */
@@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	loff_t page_off = pos & PAGE_CACHE_MASK;
 	int pos_in_page = pos & ~PAGE_CACHE_MASK;
 	int end_in_page = pos_in_page + len;
@@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_client *client = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
 	int check_cap = 0;
 
@@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_dentry->d_inode;
 	struct page *page = vmf->page;
-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	loff_t off = page->index << PAGE_CACHE_SHIFT;
 	loff_t size, len;
 	int ret;
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c
deleted file mode 100644
index eb2a666b0be7..000000000000
--- a/fs/ceph/armor.c
+++ /dev/null
@@ -1,103 +0,0 @@
-
-#include <linux/errno.h>
-
-int ceph_armor(char *dst, const char *src, const char *end);
-int ceph_unarmor(char *dst, const char *src, const char *end);
-
-/*
- * base64 encode/decode.
- */
-
-static const char *pem_key =
-	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
-static int encode_bits(int c)
-{
-	return pem_key[c];
-}
-
-static int decode_bits(char c)
-{
-	if (c >= 'A' && c <= 'Z')
-		return c - 'A';
-	if (c >= 'a' && c <= 'z')
-		return c - 'a' + 26;
-	if (c >= '0' && c <= '9')
-		return c - '0' + 52;
-	if (c == '+')
-		return 62;
-	if (c == '/')
-		return 63;
-	if (c == '=')
-		return 0; /* just non-negative, please */
-	return -EINVAL;
-}
-
-int ceph_armor(char *dst, const char *src, const char *end)
-{
-	int olen = 0;
-	int line = 0;
-
-	while (src < end) {
-		unsigned char a, b, c;
-
-		a = *src++;
-		*dst++ = encode_bits(a >> 2);
-		if (src < end) {
-			b = *src++;
-			*dst++ = encode_bits(((a & 3) << 4) | (b >> 4));
-			if (src < end) {
-				c = *src++;
-				*dst++ = encode_bits(((b & 15) << 2) |
-						     (c >> 6));
-				*dst++ = encode_bits(c & 63);
-			} else {
-				*dst++ = encode_bits((b & 15) << 2);
-				*dst++ = '=';
-			}
-		} else {
-			*dst++ = encode_bits(((a & 3) << 4));
-			*dst++ = '=';
-			*dst++ = '=';
-		}
-		olen += 4;
-		line += 4;
-		if (line == 64) {
-			line = 0;
-			*(dst++) = '\n';
-			olen++;
-		}
-	}
-	return olen;
-}
-
-int ceph_unarmor(char *dst, const char *src, const char *end)
-{
-	int olen = 0;
-
-	while (src < end) {
-		int a, b, c, d;
-
-		if (src < end && src[0] == '\n')
-			src++;
-		if (src + 4 > end)
-			return -EINVAL;
-		a = decode_bits(src[0]);
-		b = decode_bits(src[1]);
-		c = decode_bits(src[2]);
-		d = decode_bits(src[3]);
-		if (a < 0 || b < 0 || c < 0 || d < 0)
-			return -EINVAL;
-
-		*dst++ = (a << 2) | (b >> 4);
-		if (src[2] == '=')
-			return olen + 1;
-		*dst++ = ((b & 15) << 4) | (c >> 2);
-		if (src[3] == '=')
-			return olen + 2;
-		*dst++ = ((c & 3) << 6) | d;
-		olen += 3;
-		src += 4;
-	}
-	return olen;
-}
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
deleted file mode 100644
index 6d2e30600627..000000000000
--- a/fs/ceph/auth.c
+++ /dev/null
@@ -1,259 +0,0 @@
-#include "ceph_debug.h"
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-#include "types.h"
-#include "auth_none.h"
-#include "auth_x.h"
-#include "decode.h"
-#include "super.h"
-
-#include "messenger.h"
-
-/*
- * get protocol handler
- */
-static u32 supported_protocols[] = {
-	CEPH_AUTH_NONE,
-	CEPH_AUTH_CEPHX
-};
-
-static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
-{
-	switch (protocol) {
-	case CEPH_AUTH_NONE:
-		return ceph_auth_none_init(ac);
-	case CEPH_AUTH_CEPHX:
-		return ceph_x_init(ac);
-	default:
-		return -ENOENT;
-	}
-}
-
-/*
- * setup, teardown.
- */
-struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
-{
-	struct ceph_auth_client *ac;
-	int ret;
-
-	dout("auth_init name '%s' secret '%s'\n", name, secret);
-
-	ret = -ENOMEM;
-	ac = kzalloc(sizeof(*ac), GFP_NOFS);
-	if (!ac)
-		goto out;
-
-	ac->negotiating = true;
-	if (name)
-		ac->name = name;
-	else
-		ac->name = CEPH_AUTH_NAME_DEFAULT;
-	dout("auth_init name %s secret %s\n", ac->name, secret);
-	ac->secret = secret;
-	return ac;
-
-out:
-	return ERR_PTR(ret);
-}
-
-void ceph_auth_destroy(struct ceph_auth_client *ac)
-{
-	dout("auth_destroy %p\n", ac);
-	if (ac->ops)
-		ac->ops->destroy(ac);
-	kfree(ac);
-}
-
-/*
- * Reset occurs when reconnecting to the monitor.
- */
-void ceph_auth_reset(struct ceph_auth_client *ac)
-{
-	dout("auth_reset %p\n", ac);
-	if (ac->ops && !ac->negotiating)
-		ac->ops->reset(ac);
-	ac->negotiating = true;
-}
-
-int ceph_entity_name_encode(const char *name, void **p, void *end)
-{
-	int len = strlen(name);
-
-	if (*p + 2*sizeof(u32) + len > end)
-		return -ERANGE;
-	ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT);
-	ceph_encode_32(p, len);
-	ceph_encode_copy(p, name, len);
-	return 0;
-}
-
-/*
- * Initiate protocol negotiation with monitor.  Include entity name
- * and list supported protocols.
- */
-int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
-{
-	struct ceph_mon_request_header *monhdr = buf;
-	void *p = monhdr + 1, *end = buf + len, *lenp;
-	int i, num;
-	int ret;
-
-	dout("auth_build_hello\n");
-	monhdr->have_version = 0;
-	monhdr->session_mon = cpu_to_le16(-1);
-	monhdr->session_mon_tid = 0;
-
-	ceph_encode_32(&p, 0);  /* no protocol, yet */
-
-	lenp = p;
-	p += sizeof(u32);
-
-	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
-	ceph_encode_8(&p, 1);
-	num = ARRAY_SIZE(supported_protocols);
-	ceph_encode_32(&p, num);
-	ceph_decode_need(&p, end, num * sizeof(u32), bad);
-	for (i = 0; i < num; i++)
-		ceph_encode_32(&p, supported_protocols[i]);
-
-	ret = ceph_entity_name_encode(ac->name, &p, end);
-	if (ret < 0)
-		return ret;
-	ceph_decode_need(&p, end, sizeof(u64), bad);
-	ceph_encode_64(&p, ac->global_id);
-
-	ceph_encode_32(&lenp, p - lenp - sizeof(u32));
-	return p - buf;
-
-bad:
-	return -ERANGE;
-}
-
-static int ceph_build_auth_request(struct ceph_auth_client *ac,
-				   void *msg_buf, size_t msg_len)
-{
-	struct ceph_mon_request_header *monhdr = msg_buf;
-	void *p = monhdr + 1;
-	void *end = msg_buf + msg_len;
-	int ret;
-
-	monhdr->have_version = 0;
-	monhdr->session_mon = cpu_to_le16(-1);
-	monhdr->session_mon_tid = 0;
-
-	ceph_encode_32(&p, ac->protocol);
-
-	ret = ac->ops->build_request(ac, p + sizeof(u32), end);
-	if (ret < 0) {
-		pr_err("error %d building auth method %s request\n", ret,
-		       ac->ops->name);
-		return ret;
-	}
-	dout(" built request %d bytes\n", ret);
-	ceph_encode_32(&p, ret);
-	return p + ret - msg_buf;
-}
-
-/*
- * Handle auth message from monitor.
- */
-int ceph_handle_auth_reply(struct ceph_auth_client *ac,
-			   void *buf, size_t len,
-			   void *reply_buf, size_t reply_len)
-{
-	void *p = buf;
-	void *end = buf + len;
-	int protocol;
-	s32 result;
-	u64 global_id;
-	void *payload, *payload_end;
-	int payload_len;
-	char *result_msg;
-	int result_msg_len;
-	int ret = -EINVAL;
-
-	dout("handle_auth_reply %p %p\n", p, end);
-	ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
-	protocol = ceph_decode_32(&p);
-	result = ceph_decode_32(&p);
-	global_id = ceph_decode_64(&p);
-	payload_len = ceph_decode_32(&p);
-	payload = p;
-	p += payload_len;
-	ceph_decode_need(&p, end, sizeof(u32), bad);
-	result_msg_len = ceph_decode_32(&p);
-	result_msg = p;
-	p += result_msg_len;
-	if (p != end)
-		goto bad;
-
-	dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len,
-	     result_msg, global_id, payload_len);
-
-	payload_end = payload + payload_len;
-
-	if (global_id && ac->global_id != global_id) {
-		dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
-		ac->global_id = global_id;
-	}
-
-	if (ac->negotiating) {
-		/* server does not support our protocols? */
-		if (!protocol && result < 0) {
-			ret = result;
-			goto out;
-		}
-		/* set up (new) protocol handler? */
-		if (ac->protocol && ac->protocol != protocol) {
-			ac->ops->destroy(ac);
-			ac->protocol = 0;
-			ac->ops = NULL;
-		}
-		if (ac->protocol != protocol) {
-			ret = ceph_auth_init_protocol(ac, protocol);
-			if (ret) {
-				pr_err("error %d on auth protocol %d init\n",
-				       ret, protocol);
-				goto out;
-			}
-		}
-
-		ac->negotiating = false;
-	}
-
-	ret = ac->ops->handle_reply(ac, result, payload, payload_end);
-	if (ret == -EAGAIN) {
-		return ceph_build_auth_request(ac, reply_buf, reply_len);
-	} else if (ret) {
-		pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
-		return ret;
-	}
-	return 0;
-
-bad:
-	pr_err("failed to decode auth msg\n");
-out:
-	return ret;
-}
-
-int ceph_build_auth(struct ceph_auth_client *ac,
-		    void *msg_buf, size_t msg_len)
-{
-	if (!ac->protocol)
-		return ceph_auth_build_hello(ac, msg_buf, msg_len);
-	BUG_ON(!ac->ops);
-	if (ac->ops->should_authenticate(ac))
-		return ceph_build_auth_request(ac, msg_buf, msg_len);
-	return 0;
-}
-
-int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
-{
-	if (!ac->ops)
-		return 0;
-	return ac->ops->is_authenticated(ac);
-}
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
deleted file mode 100644
index d38a2fb4a137..000000000000
--- a/fs/ceph/auth.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef _FS_CEPH_AUTH_H
-#define _FS_CEPH_AUTH_H
-
-#include "types.h"
-#include "buffer.h"
-
-/*
- * Abstract interface for communicating with the authenticate module.
- * There is some handshake that takes place between us and the monitor
- * to acquire the necessary keys.  These are used to generate an
- * 'authorizer' that we use when connecting to a service (mds, osd).
- */
-
-struct ceph_auth_client;
-struct ceph_authorizer;
-
-struct ceph_auth_client_ops {
-	const char *name;
-
-	/*
-	 * true if we are authenticated and can connect to
-	 * services.
-	 */
-	int (*is_authenticated)(struct ceph_auth_client *ac);
-
-	/*
-	 * true if we should (re)authenticate, e.g., when our tickets
-	 * are getting old and crusty.
-	 */
-	int (*should_authenticate)(struct ceph_auth_client *ac);
-
-	/*
-	 * build requests and process replies during monitor
-	 * handshake.  if handle_reply returns -EAGAIN, we build
-	 * another request.
-	 */
-	int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
-	int (*handle_reply)(struct ceph_auth_client *ac, int result,
-			    void *buf, void *end);
-
-	/*
-	 * Create authorizer for connecting to a service, and verify
-	 * the response to authenticate the service.
-	 */
-	int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
-				 struct ceph_authorizer **a,
-				 void **buf, size_t *len,
-				 void **reply_buf, size_t *reply_len);
-	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
-				       struct ceph_authorizer *a, size_t len);
-	void (*destroy_authorizer)(struct ceph_auth_client *ac,
-				   struct ceph_authorizer *a);
-	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
-				      int peer_type);
-
-	/* reset when we (re)connect to a monitor */
-	void (*reset)(struct ceph_auth_client *ac);
-
-	void (*destroy)(struct ceph_auth_client *ac);
-};
-
-struct ceph_auth_client {
-	u32 protocol;           /* CEPH_AUTH_* */
-	void *private;          /* for use by protocol implementation */
-	const struct ceph_auth_client_ops *ops;  /* null iff protocol==0 */
-
-	bool negotiating;       /* true if negotiating protocol */
-	const char *name;       /* entity name */
-	u64 global_id;          /* our unique id in system */
-	const char *secret;     /* our secret key */
-	unsigned want_keys;     /* which services we want */
-};
-
-extern struct ceph_auth_client *ceph_auth_init(const char *name,
-					       const char *secret);
-extern void ceph_auth_destroy(struct ceph_auth_client *ac);
-
-extern void ceph_auth_reset(struct ceph_auth_client *ac);
-
-extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
-				 void *buf, size_t len);
-extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
-				  void *buf, size_t len,
-				  void *reply_buf, size_t reply_len);
-extern int ceph_entity_name_encode(const char *name, void **p, void *end);
-
-extern int ceph_build_auth(struct ceph_auth_client *ac,
-		    void *msg_buf, size_t msg_len);
-
-extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
-
-#endif
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
deleted file mode 100644
index ad1dc21286c7..000000000000
--- a/fs/ceph/auth_none.c
+++ /dev/null
@@ -1,131 +0,0 @@
-
-#include "ceph_debug.h"
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-
-#include "auth_none.h"
-#include "auth.h"
-#include "decode.h"
-
-static void reset(struct ceph_auth_client *ac)
-{
-	struct ceph_auth_none_info *xi = ac->private;
-
-	xi->starting = true;
-	xi->built_authorizer = false;
-}
-
-static void destroy(struct ceph_auth_client *ac)
-{
-	kfree(ac->private);
-	ac->private = NULL;
-}
-
-static int is_authenticated(struct ceph_auth_client *ac)
-{
-	struct ceph_auth_none_info *xi = ac->private;
-
-	return !xi->starting;
-}
-
-static int should_authenticate(struct ceph_auth_client *ac)
-{
-	struct ceph_auth_none_info *xi = ac->private;
-
-	return xi->starting;
-}
-
-/*
- * the generic auth code decode the global_id, and we carry no actual
- * authenticate state, so nothing happens here.
- */
-static int handle_reply(struct ceph_auth_client *ac, int result,
-			void *buf, void *end)
-{
-	struct ceph_auth_none_info *xi = ac->private;
-
-	xi->starting = false;
-	return result;
-}
-
-/*
- * build an 'authorizer' with our entity_name and global_id.  we can
- * reuse a single static copy since it is identical for all services
- * we connect to.
- */
-static int ceph_auth_none_create_authorizer(
-	struct ceph_auth_client *ac, int peer_type,
-	struct ceph_authorizer **a,
-	void **buf, size_t *len,
-	void **reply_buf, size_t *reply_len)
-{
-	struct ceph_auth_none_info *ai = ac->private;
-	struct ceph_none_authorizer *au = &ai->au;
-	void *p, *end;
-	int ret;
-
-	if (!ai->built_authorizer) {
-		p = au->buf;
-		end = p + sizeof(au->buf);
-		ceph_encode_8(&p, 1);
-		ret = ceph_entity_name_encode(ac->name, &p, end - 8);
-		if (ret < 0)
-			goto bad;
-		ceph_decode_need(&p, end, sizeof(u64), bad2);
-		ceph_encode_64(&p, ac->global_id);
-		au->buf_len = p - (void *)au->buf;
-		ai->built_authorizer = true;
-		dout("built authorizer len %d\n", au->buf_len);
-	}
-
-	*a = (struct ceph_authorizer *)au;
-	*buf = au->buf;
-	*len = au->buf_len;
-	*reply_buf = au->reply_buf;
-	*reply_len = sizeof(au->reply_buf);
-	return 0;
-
-bad2:
-	ret = -ERANGE;
-bad:
-	return ret;
-}
-
-static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
-				      struct ceph_authorizer *a)
-{
-	/* nothing to do */
-}
-
-static const struct ceph_auth_client_ops ceph_auth_none_ops = {
-	.name = "none",
-	.reset = reset,
-	.destroy = destroy,
-	.is_authenticated = is_authenticated,
-	.should_authenticate = should_authenticate,
-	.handle_reply = handle_reply,
-	.create_authorizer = ceph_auth_none_create_authorizer,
-	.destroy_authorizer = ceph_auth_none_destroy_authorizer,
-};
-
-int ceph_auth_none_init(struct ceph_auth_client *ac)
-{
-	struct ceph_auth_none_info *xi;
-
-	dout("ceph_auth_none_init %p\n", ac);
-	xi = kzalloc(sizeof(*xi), GFP_NOFS);
-	if (!xi)
-		return -ENOMEM;
-
-	xi->starting = true;
-	xi->built_authorizer = false;
-
-	ac->protocol = CEPH_AUTH_NONE;
-	ac->private = xi;
-	ac->ops = &ceph_auth_none_ops;
-	return 0;
-}
-
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
deleted file mode 100644
index 8164df1a08be..000000000000
--- a/fs/ceph/auth_none.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _FS_CEPH_AUTH_NONE_H
-#define _FS_CEPH_AUTH_NONE_H
-
-#include <linux/slab.h>
-
-#include "auth.h"
-
-/*
- * null security mode.
- *
- * we use a single static authorizer that simply encodes our entity name
- * and global id.
- */
-
-struct ceph_none_authorizer {
-	char buf[128];
-	int buf_len;
-	char reply_buf[0];
-};
-
-struct ceph_auth_none_info {
-	bool starting;
-	bool built_authorizer;
-	struct ceph_none_authorizer au;   /* we only need one; it's static */
-};
-
-extern int ceph_auth_none_init(struct ceph_auth_client *ac);
-
-#endif
-
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
deleted file mode 100644
index a2d002cbdec2..000000000000
--- a/fs/ceph/auth_x.c
+++ /dev/null
@@ -1,687 +0,0 @@
-
-#include "ceph_debug.h"
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-
-#include "auth_x.h"
-#include "auth_x_protocol.h"
-#include "crypto.h"
-#include "auth.h"
-#include "decode.h"
-
-#define TEMP_TICKET_BUF_LEN	256
-
-static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
-
-static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
-{
-	struct ceph_x_info *xi = ac->private;
-	int need;
-
-	ceph_x_validate_tickets(ac, &need);
-	dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
-	     ac->want_keys, need, xi->have_keys);
-	return (ac->want_keys & xi->have_keys) == ac->want_keys;
-}
-
-static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
-{
-	struct ceph_x_info *xi = ac->private;
-	int need;
-
-	ceph_x_validate_tickets(ac, &need);
-	dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
-	     ac->want_keys, need, xi->have_keys);
-	return need != 0;
-}
-
-static int ceph_x_encrypt_buflen(int ilen)
-{
-	return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
-		sizeof(u32);
-}
-
-static int ceph_x_encrypt(struct ceph_crypto_key *secret,
-			  void *ibuf, int ilen, void *obuf, size_t olen)
-{
-	struct ceph_x_encrypt_header head = {
-		.struct_v = 1,
-		.magic = cpu_to_le64(CEPHX_ENC_MAGIC)
-	};
-	size_t len = olen - sizeof(u32);
-	int ret;
-
-	ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
-			    &head, sizeof(head), ibuf, ilen);
-	if (ret)
-		return ret;
-	ceph_encode_32(&obuf, len);
-	return len + sizeof(u32);
-}
-
-static int ceph_x_decrypt(struct ceph_crypto_key *secret,
-			  void **p, void *end, void *obuf, size_t olen)
-{
-	struct ceph_x_encrypt_header head;
-	size_t head_len = sizeof(head);
-	int len, ret;
-
-	len = ceph_decode_32(p);
-	if (*p + len > end)
-		return -EINVAL;
-
-	dout("ceph_x_decrypt len %d\n", len);
-	ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
-			    *p, len);
-	if (ret)
-		return ret;
-	if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
-		return -EPERM;
-	*p += len;
-	return olen;
-}
-
-/*
- * get existing (or insert new) ticket handler
- */
-static struct ceph_x_ticket_handler *
-get_ticket_handler(struct ceph_auth_client *ac, int service)
-{
-	struct ceph_x_ticket_handler *th;
-	struct ceph_x_info *xi = ac->private;
-	struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node;
-
-	while (*p) {
-		parent = *p;
-		th = rb_entry(parent, struct ceph_x_ticket_handler, node);
-		if (service < th->service)
-			p = &(*p)->rb_left;
-		else if (service > th->service)
-			p = &(*p)->rb_right;
-		else
-			return th;
-	}
-
-	/* add it */
-	th = kzalloc(sizeof(*th), GFP_NOFS);
-	if (!th)
-		return ERR_PTR(-ENOMEM);
-	th->service = service;
-	rb_link_node(&th->node, parent, p);
-	rb_insert_color(&th->node, &xi->ticket_handlers);
-	return th;
-}
-
-static void remove_ticket_handler(struct ceph_auth_client *ac,
-				  struct ceph_x_ticket_handler *th)
-{
-	struct ceph_x_info *xi = ac->private;
-
-	dout("remove_ticket_handler %p %d\n", th, th->service);
-	rb_erase(&th->node, &xi->ticket_handlers);
-	ceph_crypto_key_destroy(&th->session_key);
-	if (th->ticket_blob)
-		ceph_buffer_put(th->ticket_blob);
-	kfree(th);
-}
-
-static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
-				    struct ceph_crypto_key *secret,
-				    void *buf, void *end)
-{
-	struct ceph_x_info *xi = ac->private;
-	int num;
-	void *p = buf;
-	int ret;
-	char *dbuf;
-	char *ticket_buf;
-	u8 reply_struct_v;
-
-	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!dbuf)
-		return -ENOMEM;
-
-	ret = -ENOMEM;
-	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-	if (!ticket_buf)
-		goto out_dbuf;
-
-	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
-	reply_struct_v = ceph_decode_8(&p);
-	if (reply_struct_v != 1)
-		goto bad;
-	num = ceph_decode_32(&p);
-	dout("%d tickets\n", num);
-	while (num--) {
-		int type;
-		u8 tkt_struct_v, blob_struct_v;
-		struct ceph_x_ticket_handler *th;
-		void *dp, *dend;
-		int dlen;
-		char is_enc;
-		struct timespec validity;
-		struct ceph_crypto_key old_key;
-		void *tp, *tpend;
-		struct ceph_timespec new_validity;
-		struct ceph_crypto_key new_session_key;
-		struct ceph_buffer *new_ticket_blob;
-		unsigned long new_expires, new_renew_after;
-		u64 new_secret_id;
-
-		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
-
-		type = ceph_decode_32(&p);
-		dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
-
-		tkt_struct_v = ceph_decode_8(&p);
-		if (tkt_struct_v != 1)
-			goto bad;
-
-		th = get_ticket_handler(ac, type);
-		if (IS_ERR(th)) {
-			ret = PTR_ERR(th);
-			goto out;
-		}
-
-		/* blob for me */
-		dlen = ceph_x_decrypt(secret, &p, end, dbuf,
-				      TEMP_TICKET_BUF_LEN);
-		if (dlen <= 0) {
-			ret = dlen;
-			goto out;
-		}
-		dout(" decrypted %d bytes\n", dlen);
-		dend = dbuf + dlen;
-		dp = dbuf;
-
-		tkt_struct_v = ceph_decode_8(&dp);
-		if (tkt_struct_v != 1)
-			goto bad;
-
-		memcpy(&old_key, &th->session_key, sizeof(old_key));
-		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
-		if (ret)
-			goto out;
-
-		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
-		ceph_decode_timespec(&validity, &new_validity);
-		new_expires = get_seconds() + validity.tv_sec;
-		new_renew_after = new_expires - (validity.tv_sec / 4);
-		dout(" expires=%lu renew_after=%lu\n", new_expires,
-		     new_renew_after);
-
-		/* ticket blob for service */
-		ceph_decode_8_safe(&p, end, is_enc, bad);
-		tp = ticket_buf;
-		if (is_enc) {
-			/* encrypted */
-			dout(" encrypted ticket\n");
-			dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
-					      TEMP_TICKET_BUF_LEN);
-			if (dlen < 0) {
-				ret = dlen;
-				goto out;
-			}
-			dlen = ceph_decode_32(&tp);
-		} else {
-			/* unencrypted */
-			ceph_decode_32_safe(&p, end, dlen, bad);
-			ceph_decode_need(&p, end, dlen, bad);
-			ceph_decode_copy(&p, ticket_buf, dlen);
-		}
-		tpend = tp + dlen;
-		dout(" ticket blob is %d bytes\n", dlen);
-		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
-		blob_struct_v = ceph_decode_8(&tp);
-		new_secret_id = ceph_decode_64(&tp);
-		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
-		if (ret)
-			goto out;
-
-		/* all is well, update our ticket */
-		ceph_crypto_key_destroy(&th->session_key);
-		if (th->ticket_blob)
-			ceph_buffer_put(th->ticket_blob);
-		th->session_key = new_session_key;
-		th->ticket_blob = new_ticket_blob;
-		th->validity = new_validity;
-		th->secret_id = new_secret_id;
-		th->expires = new_expires;
-		th->renew_after = new_renew_after;
-		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
-		     type, ceph_entity_type_name(type), th->secret_id,
-		     (int)th->ticket_blob->vec.iov_len);
-		xi->have_keys |= th->service;
-	}
-
-	ret = 0;
-out:
-	kfree(ticket_buf);
-out_dbuf:
-	kfree(dbuf);
-	return ret;
-
-bad:
-	ret = -EINVAL;
-	goto out;
-}
-
-static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
-				   struct ceph_x_ticket_handler *th,
-				   struct ceph_x_authorizer *au)
-{
-	int maxlen;
-	struct ceph_x_authorize_a *msg_a;
-	struct ceph_x_authorize_b msg_b;
-	void *p, *end;
-	int ret;
-	int ticket_blob_len =
-		(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
-
-	dout("build_authorizer for %s %p\n",
-	     ceph_entity_type_name(th->service), au);
-
-	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
-		ceph_x_encrypt_buflen(ticket_blob_len);
-	dout("  need len %d\n", maxlen);
-	if (au->buf && au->buf->alloc_len < maxlen) {
-		ceph_buffer_put(au->buf);
-		au->buf = NULL;
-	}
-	if (!au->buf) {
-		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
-		if (!au->buf)
-			return -ENOMEM;
-	}
-	au->service = th->service;
-
-	msg_a = au->buf->vec.iov_base;
-	msg_a->struct_v = 1;
-	msg_a->global_id = cpu_to_le64(ac->global_id);
-	msg_a->service_id = cpu_to_le32(th->service);
-	msg_a->ticket_blob.struct_v = 1;
-	msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id);
-	msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len);
-	if (ticket_blob_len) {
-		memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base,
-		       th->ticket_blob->vec.iov_len);
-	}
-	dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
-	     le64_to_cpu(msg_a->ticket_blob.secret_id));
-
-	p = msg_a + 1;
-	p += ticket_blob_len;
-	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
-
-	get_random_bytes(&au->nonce, sizeof(au->nonce));
-	msg_b.struct_v = 1;
-	msg_b.nonce = cpu_to_le64(au->nonce);
-	ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
-			     p, end - p);
-	if (ret < 0)
-		goto out_buf;
-	p += ret;
-	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
-	dout(" built authorizer nonce %llx len %d\n", au->nonce,
-	     (int)au->buf->vec.iov_len);
-	BUG_ON(au->buf->vec.iov_len > maxlen);
-	return 0;
-
-out_buf:
-	ceph_buffer_put(au->buf);
-	au->buf = NULL;
-	return ret;
-}
-
-static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
-				void **p, void *end)
-{
-	ceph_decode_need(p, end, 1 + sizeof(u64), bad);
-	ceph_encode_8(p, 1);
-	ceph_encode_64(p, th->secret_id);
-	if (th->ticket_blob) {
-		const char *buf = th->ticket_blob->vec.iov_base;
-		u32 len = th->ticket_blob->vec.iov_len;
-
-		ceph_encode_32_safe(p, end, len, bad);
-		ceph_encode_copy_safe(p, end, buf, len, bad);
-	} else {
-		ceph_encode_32_safe(p, end, 0, bad);
-	}
-
-	return 0;
-bad:
-	return -ERANGE;
-}
-
-static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
-{
-	int want = ac->want_keys;
-	struct ceph_x_info *xi = ac->private;
-	int service;
-
-	*pneed = ac->want_keys & ~(xi->have_keys);
-
-	for (service = 1; service <= want; service <<= 1) {
-		struct ceph_x_ticket_handler *th;
-
-		if (!(ac->want_keys & service))
-			continue;
-
-		if (*pneed & service)
-			continue;
-
-		th = get_ticket_handler(ac, service);
-
-		if (IS_ERR(th)) {
-			*pneed |= service;
-			continue;
-		}
-
-		if (get_seconds() >= th->renew_after)
-			*pneed |= service;
-		if (get_seconds() >= th->expires)
-			xi->have_keys &= ~service;
-	}
-}
-
-
-static int ceph_x_build_request(struct ceph_auth_client *ac,
-				void *buf, void *end)
-{
-	struct ceph_x_info *xi = ac->private;
-	int need;
-	struct ceph_x_request_header *head = buf;
-	int ret;
-	struct ceph_x_ticket_handler *th =
-		get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
-
-	if (IS_ERR(th))
-		return PTR_ERR(th);
-
-	ceph_x_validate_tickets(ac, &need);
-
-	dout("build_request want %x have %x need %x\n",
-	     ac->want_keys, xi->have_keys, need);
-
-	if (need & CEPH_ENTITY_TYPE_AUTH) {
-		struct ceph_x_authenticate *auth = (void *)(head + 1);
-		void *p = auth + 1;
-		struct ceph_x_challenge_blob tmp;
-		char tmp_enc[40];
-		u64 *u;
-
-		if (p > end)
-			return -ERANGE;
-
-		dout(" get_auth_session_key\n");
-		head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY);
-
-		/* encrypt and hash */
-		get_random_bytes(&auth->client_challenge, sizeof(u64));
-		tmp.client_challenge = auth->client_challenge;
-		tmp.server_challenge = cpu_to_le64(xi->server_challenge);
-		ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
-				     tmp_enc, sizeof(tmp_enc));
-		if (ret < 0)
-			return ret;
-
-		auth->struct_v = 1;
-		auth->key = 0;
-		for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
-			auth->key ^= *(__le64 *)u;
-		dout(" server_challenge %llx client_challenge %llx key %llx\n",
-		     xi->server_challenge, le64_to_cpu(auth->client_challenge),
-		     le64_to_cpu(auth->key));
-
-		/* now encode the old ticket if exists */
-		ret = ceph_x_encode_ticket(th, &p, end);
-		if (ret < 0)
-			return ret;
-
-		return p - buf;
-	}
-
-	if (need) {
-		void *p = head + 1;
-		struct ceph_x_service_ticket_request *req;
-
-		if (p > end)
-			return -ERANGE;
-		head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
-
-		ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
-		if (ret)
-			return ret;
-		ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
-				 xi->auth_authorizer.buf->vec.iov_len);
-
-		req = p;
-		req->keys = cpu_to_le32(need);
-		p += sizeof(*req);
-		return p - buf;
-	}
-
-	return 0;
-}
-
-static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
-			       void *buf, void *end)
-{
-	struct ceph_x_info *xi = ac->private;
-	struct ceph_x_reply_header *head = buf;
-	struct ceph_x_ticket_handler *th;
-	int len = end - buf;
-	int op;
-	int ret;
-
-	if (result)
-		return result;  /* XXX hmm? */
-
-	if (xi->starting) {
-		/* it's a hello */
-		struct ceph_x_server_challenge *sc = buf;
-
-		if (len != sizeof(*sc))
-			return -EINVAL;
-		xi->server_challenge = le64_to_cpu(sc->server_challenge);
-		dout("handle_reply got server challenge %llx\n",
-		     xi->server_challenge);
-		xi->starting = false;
-		xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH;
-		return -EAGAIN;
-	}
-
-	op = le16_to_cpu(head->op);
-	result = le32_to_cpu(head->result);
-	dout("handle_reply op %d result %d\n", op, result);
-	switch (op) {
-	case CEPHX_GET_AUTH_SESSION_KEY:
-		/* verify auth key */
-		ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
-					       buf + sizeof(*head), end);
-		break;
-
-	case CEPHX_GET_PRINCIPAL_SESSION_KEY:
-		th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
-		if (IS_ERR(th))
-			return PTR_ERR(th);
-		ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
-					       buf + sizeof(*head), end);
-		break;
-
-	default:
-		return -EINVAL;
-	}
-	if (ret)
-		return ret;
-	if (ac->want_keys == xi->have_keys)
-		return 0;
-	return -EAGAIN;
-}
-
-static int ceph_x_create_authorizer(
-	struct ceph_auth_client *ac, int peer_type,
-	struct ceph_authorizer **a,
-	void **buf, size_t *len,
-	void **reply_buf, size_t *reply_len)
-{
-	struct ceph_x_authorizer *au;
-	struct ceph_x_ticket_handler *th;
-	int ret;
-
-	th = get_ticket_handler(ac, peer_type);
-	if (IS_ERR(th))
-		return PTR_ERR(th);
-
-	au = kzalloc(sizeof(*au), GFP_NOFS);
-	if (!au)
-		return -ENOMEM;
-
-	ret = ceph_x_build_authorizer(ac, th, au);
-	if (ret) {
-		kfree(au);
-		return ret;
-	}
-
-	*a = (struct ceph_authorizer *)au;
-	*buf = au->buf->vec.iov_base;
-	*len = au->buf->vec.iov_len;
-	*reply_buf = au->reply_buf;
-	*reply_len = sizeof(au->reply_buf);
-	return 0;
-}
-
-static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
-					  struct ceph_authorizer *a, size_t len)
-{
-	struct ceph_x_authorizer *au = (void *)a;
-	struct ceph_x_ticket_handler *th;
-	int ret = 0;
-	struct ceph_x_authorize_reply reply;
-	void *p = au->reply_buf;
-	void *end = p + sizeof(au->reply_buf);
-
-	th = get_ticket_handler(ac, au->service);
-	if (IS_ERR(th))
-		return PTR_ERR(th);
-	ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
-	if (ret < 0)
-		return ret;
-	if (ret != sizeof(reply))
-		return -EPERM;
-
-	if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
-		ret = -EPERM;
-	else
-		ret = 0;
-	dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
-	     au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
-	return ret;
-}
-
-static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
-				      struct ceph_authorizer *a)
-{
-	struct ceph_x_authorizer *au = (void *)a;
-
-	ceph_buffer_put(au->buf);
-	kfree(au);
-}
-
-
-static void ceph_x_reset(struct ceph_auth_client *ac)
-{
-	struct ceph_x_info *xi = ac->private;
-
-	dout("reset\n");
-	xi->starting = true;
-	xi->server_challenge = 0;
-}
-
-static void ceph_x_destroy(struct ceph_auth_client *ac)
-{
-	struct ceph_x_info *xi = ac->private;
-	struct rb_node *p;
-
-	dout("ceph_x_destroy %p\n", ac);
-	ceph_crypto_key_destroy(&xi->secret);
-
-	while ((p = rb_first(&xi->ticket_handlers)) != NULL) {
-		struct ceph_x_ticket_handler *th =
-			rb_entry(p, struct ceph_x_ticket_handler, node);
-		remove_ticket_handler(ac, th);
-	}
-
-	if (xi->auth_authorizer.buf)
-		ceph_buffer_put(xi->auth_authorizer.buf);
-
-	kfree(ac->private);
-	ac->private = NULL;
-}
-
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
-				   int peer_type)
-{
-	struct ceph_x_ticket_handler *th;
-
-	th = get_ticket_handler(ac, peer_type);
-	if (!IS_ERR(th))
-		remove_ticket_handler(ac, th);
-}
-
-
-static const struct ceph_auth_client_ops ceph_x_ops = {
-	.name = "x",
-	.is_authenticated = ceph_x_is_authenticated,
-	.should_authenticate = ceph_x_should_authenticate,
-	.build_request = ceph_x_build_request,
-	.handle_reply = ceph_x_handle_reply,
-	.create_authorizer = ceph_x_create_authorizer,
-	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
-	.destroy_authorizer = ceph_x_destroy_authorizer,
-	.invalidate_authorizer = ceph_x_invalidate_authorizer,
-	.reset =  ceph_x_reset,
-	.destroy = ceph_x_destroy,
-};
-
-
-int ceph_x_init(struct ceph_auth_client *ac)
-{
-	struct ceph_x_info *xi;
-	int ret;
-
-	dout("ceph_x_init %p\n", ac);
-	ret = -ENOMEM;
-	xi = kzalloc(sizeof(*xi), GFP_NOFS);
-	if (!xi)
-		goto out;
-
-	ret = -EINVAL;
-	if (!ac->secret) {
-		pr_err("no secret set (for auth_x protocol)\n");
-		goto out_nomem;
-	}
-
-	ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
-	if (ret)
-		goto out_nomem;
-
-	xi->starting = true;
-	xi->ticket_handlers = RB_ROOT;
-
-	ac->protocol = CEPH_AUTH_CEPHX;
-	ac->private = xi;
-	ac->ops = &ceph_x_ops;
-	return 0;
-
-out_nomem:
-	kfree(xi);
-out:
-	return ret;
-}
-
-
diff --git a/fs/ceph/auth_x.h b/fs/ceph/auth_x.h
deleted file mode 100644
index ff6f8180e681..000000000000
--- a/fs/ceph/auth_x.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _FS_CEPH_AUTH_X_H
-#define _FS_CEPH_AUTH_X_H
-
-#include <linux/rbtree.h>
-
-#include "crypto.h"
-#include "auth.h"
-#include "auth_x_protocol.h"
-
-/*
- * Handle ticket for a single service.
- */
-struct ceph_x_ticket_handler {
-	struct rb_node node;
-	unsigned service;
-
-	struct ceph_crypto_key session_key;
-	struct ceph_timespec validity;
-
-	u64 secret_id;
-	struct ceph_buffer *ticket_blob;
-
-	unsigned long renew_after, expires;
-};
-
-
-struct ceph_x_authorizer {
-	struct ceph_buffer *buf;
-	unsigned service;
-	u64 nonce;
-	char reply_buf[128];  /* big enough for encrypted blob */
-};
-
-struct ceph_x_info {
-	struct ceph_crypto_key secret;
-
-	bool starting;
-	u64 server_challenge;
-
-	unsigned have_keys;
-	struct rb_root ticket_handlers;
-
-	struct ceph_x_authorizer auth_authorizer;
-};
-
-extern int ceph_x_init(struct ceph_auth_client *ac);
-
-#endif
-
diff --git a/fs/ceph/auth_x_protocol.h b/fs/ceph/auth_x_protocol.h
deleted file mode 100644
index 671d30576c4f..000000000000
--- a/fs/ceph/auth_x_protocol.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef __FS_CEPH_AUTH_X_PROTOCOL
-#define __FS_CEPH_AUTH_X_PROTOCOL
-
-#define CEPHX_GET_AUTH_SESSION_KEY      0x0100
-#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200
-#define CEPHX_GET_ROTATING_KEY          0x0400
-
-/* common bits */
-struct ceph_x_ticket_blob {
-	__u8 struct_v;
-	__le64 secret_id;
-	__le32 blob_len;
-	char blob[];
-} __attribute__ ((packed));
-
-
-/* common request/reply headers */
-struct ceph_x_request_header {
-	__le16 op;
-} __attribute__ ((packed));
-
-struct ceph_x_reply_header {
-	__le16 op;
-	__le32 result;
-} __attribute__ ((packed));
-
-
-/* authenticate handshake */
-
-/* initial hello (no reply header) */
-struct ceph_x_server_challenge {
-	__u8 struct_v;
-	__le64 server_challenge;
-} __attribute__ ((packed));
-
-struct ceph_x_authenticate {
-	__u8 struct_v;
-	__le64 client_challenge;
-	__le64 key;
-	/* ticket blob */
-} __attribute__ ((packed));
-
-struct ceph_x_service_ticket_request {
-	__u8 struct_v;
-	__le32 keys;
-} __attribute__ ((packed));
-
-struct ceph_x_challenge_blob {
-	__le64 server_challenge;
-	__le64 client_challenge;
-} __attribute__ ((packed));
-
-
-
-/* authorize handshake */
-
-/*
- * The authorizer consists of two pieces:
- *  a - service id, ticket blob
- *  b - encrypted with session key
- */
-struct ceph_x_authorize_a {
-	__u8 struct_v;
-	__le64 global_id;
-	__le32 service_id;
-	struct ceph_x_ticket_blob ticket_blob;
-} __attribute__ ((packed));
-
-struct ceph_x_authorize_b {
-	__u8 struct_v;
-	__le64 nonce;
-} __attribute__ ((packed));
-
-struct ceph_x_authorize_reply {
-	__u8 struct_v;
-	__le64 nonce_plus_one;
-} __attribute__ ((packed));
-
-
-/*
- * encyption bundle
- */
-#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
-
-struct ceph_x_encrypt_header {
-	__u8 struct_v;
-	__le64 magic;
-} __attribute__ ((packed));
-
-#endif
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c
deleted file mode 100644
index cd39f17021de..000000000000
--- a/fs/ceph/buffer.c
+++ /dev/null
@@ -1,65 +0,0 @@
-
-#include "ceph_debug.h"
-
-#include <linux/slab.h>
-
-#include "buffer.h"
-#include "decode.h"
-
-struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
-{
-	struct ceph_buffer *b;
-
-	b = kmalloc(sizeof(*b), gfp);
-	if (!b)
-		return NULL;
-
-	b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN);
-	if (b->vec.iov_base) {
-		b->is_vmalloc = false;
-	} else {
-		b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
-		if (!b->vec.iov_base) {
-			kfree(b);
-			return NULL;
-		}
-		b->is_vmalloc = true;
-	}
-
-	kref_init(&b->kref);
-	b->alloc_len = len;
-	b->vec.iov_len = len;
-	dout("buffer_new %p\n", b);
-	return b;
-}
-
-void ceph_buffer_release(struct kref *kref)
-{
-	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
-
-	dout("buffer_release %p\n", b);
-	if (b->vec.iov_base) {
-		if (b->is_vmalloc)
-			vfree(b->vec.iov_base);
-		else
-			kfree(b->vec.iov_base);
-	}
-	kfree(b);
-}
-
-int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
-{
-	size_t len;
-
-	ceph_decode_need(p, end, sizeof(u32), bad);
-	len = ceph_decode_32(p);
-	dout("decode_buffer len %d\n", (int)len);
-	ceph_decode_need(p, end, len, bad);
-	*b = ceph_buffer_new(len, GFP_NOFS);
-	if (!*b)
-		return -ENOMEM;
-	ceph_decode_copy(p, (*b)->vec.iov_base, len);
-	return 0;
-bad:
-	return -EINVAL;
-}
diff --git a/fs/ceph/buffer.h b/fs/ceph/buffer.h
deleted file mode 100644
index 58d19014068f..000000000000
--- a/fs/ceph/buffer.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __FS_CEPH_BUFFER_H
-#define __FS_CEPH_BUFFER_H
-
-#include <linux/kref.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/types.h>
-#include <linux/uio.h>
-
-/*
- * a simple reference counted buffer.
- *
- * use kmalloc for small sizes (<= one page), vmalloc for larger
- * sizes.
- */
-struct ceph_buffer {
-	struct kref kref;
-	struct kvec vec;
-	size_t alloc_len;
-	bool is_vmalloc;
-};
-
-extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
-extern void ceph_buffer_release(struct kref *kref);
-
-static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
-{
-	kref_get(&b->kref);
-	return b;
-}
-
-static inline void ceph_buffer_put(struct ceph_buffer *b)
-{
-	kref_put(&b->kref, ceph_buffer_release);
-}
-
-extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
-
-#endif
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5e9da996a151..3cff67cbb9c0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -9,8 +9,9 @@
 #include <linux/writeback.h>
 
 #include "super.h"
-#include "decode.h"
-#include "messenger.h"
+#include "mds_client.h"
+#include <linux/ceph/decode.h>
+#include <linux/ceph/messenger.h>
 
 /*
  * Capability management
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_reservation_status(struct ceph_client *client,
+void ceph_reservation_status(struct ceph_fs_client *fsc,
 			     int *total, int *avail, int *used, int *reserved,
 			     int *min)
 {
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 
 	if (total)
 		*total = mdsc->caps_total_count;
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
-	struct ceph_mount_args *ma = mdsc->client->mount_args;
+	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
 
 	ci->i_hold_caps_min = round_jiffies(jiffies +
 					    ma->caps_wanted_delay_min * HZ);
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode,
 		 unsigned seq, unsigned mseq, u64 realmino, int flags,
 		 struct ceph_cap_reservation *caps_reservation)
 {
-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_cap *new_cap = NULL;
 	struct ceph_cap *cap;
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_mds_client *mdsc =
-		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 	int removed = 0;
 
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
@@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
 	int mds;
 	struct ceph_cap_snap *capsnap;
 	u32 mseq;
-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
 						    session->s_mutex */
 	u64 next_follows = 0;  /* keep track of how far we've gotten through the
@@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 {
 	struct ceph_mds_client *mdsc =
-		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 	struct inode *inode = &ci->vfs_inode;
 	int was = ci->i_dirty_caps;
 	int dirty = 0;
@@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 static int __mark_caps_flushing(struct inode *inode,
 				 struct ceph_mds_session *session)
 {
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int flushing;
 
@@ -1462,8 +1463,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		     struct ceph_mds_session *session)
 {
-	struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = &ci->vfs_inode;
 	struct ceph_cap *cap;
 	int file_wanted, used;
@@ -1706,7 +1707,7 @@ ack:
 static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
 			  unsigned *flush_tid)
 {
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int unlock_session = session ? 0 : 1;
 	int flushing = 0;
@@ -1872,7 +1873,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 				       caps_are_flushed(inode, flush_tid));
 	} else {
 		struct ceph_mds_client *mdsc =
-			&ceph_sb_to_client(inode->i_sb)->mdsc;
+			ceph_sb_to_client(inode->i_sb)->mdsc;
 
 		spin_lock(&inode->i_lock);
 		if (__ceph_caps_dirty(ci))
@@ -2465,7 +2466,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 	__releases(inode->i_lock)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	unsigned seq = le32_to_cpu(m->seq);
 	int dirty = le32_to_cpu(m->dirty);
 	int cleaned = 0;
@@ -2713,7 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
 	struct ceph_mds_client *mdsc = session->s_mdsc;
-	struct super_block *sb = mdsc->client->sb;
+	struct super_block *sb = mdsc->fsc->sb;
 	struct inode *inode;
 	struct ceph_cap *cap;
 	struct ceph_mds_caps *h;
diff --git a/fs/ceph/ceph_debug.h b/fs/ceph/ceph_debug.h
deleted file mode 100644
index 1818c2305610..000000000000
--- a/fs/ceph/ceph_debug.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef _FS_CEPH_DEBUG_H
-#define _FS_CEPH_DEBUG_H
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#ifdef CONFIG_CEPH_FS_PRETTYDEBUG
-
-/*
- * wrap pr_debug to include a filename:lineno prefix on each line.
- * this incurs some overhead (kernel size and execution time) due to
- * the extra function call at each call site.
- */
-
-# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
-#  define dout(fmt, ...)						\
-	pr_debug(" %12.12s:%-4d : " fmt,				\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
-# else
-/* faux printk call just to see any compiler warnings. */
-#  define dout(fmt, ...)	do {				\
-		if (0)						\
-			printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
-	} while (0)
-# endif
-
-#else
-
-/*
- * or, just wrap pr_debug
- */
-# define dout(fmt, ...)	pr_debug(" " fmt, ##__VA_ARGS__)
-
-#endif
-
-#endif
diff --git a/fs/ceph/ceph_frag.c b/fs/ceph/ceph_frag.c
index ab6cf35c4091..bdce8b1fbd06 100644
--- a/fs/ceph/ceph_frag.c
+++ b/fs/ceph/ceph_frag.c
@@ -1,7 +1,8 @@
 /*
  * Ceph 'frag' type
  */
-#include "types.h"
+#include <linux/module.h>
+#include <linux/ceph/types.h>
 
 int ceph_frag_compare(__u32 a, __u32 b)
 {
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h
deleted file mode 100644
index 5babb8e95352..000000000000
--- a/fs/ceph/ceph_frag.h
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef FS_CEPH_FRAG_H
-#define FS_CEPH_FRAG_H
-
-/*
- * "Frags" are a way to describe a subset of a 32-bit number space,
- * using a mask and a value to match against that mask.  Any given frag
- * (subset of the number space) can be partitioned into 2^n sub-frags.
- *
- * Frags are encoded into a 32-bit word:
- *   8 upper bits = "bits"
- *  24 lower bits = "value"
- * (We could go to 5+27 bits, but who cares.)
- *
- * We use the _most_ significant bits of the 24 bit value.  This makes
- * values logically sort.
- *
- * Unfortunately, because the "bits" field is still in the high bits, we
- * can't sort encoded frags numerically.  However, it does allow you
- * to feed encoded frags as values into frag_contains_value.
- */
-static inline __u32 ceph_frag_make(__u32 b, __u32 v)
-{
-	return (b << 24) |
-		(v & (0xffffffu << (24-b)) & 0xffffffu);
-}
-static inline __u32 ceph_frag_bits(__u32 f)
-{
-	return f >> 24;
-}
-static inline __u32 ceph_frag_value(__u32 f)
-{
-	return f & 0xffffffu;
-}
-static inline __u32 ceph_frag_mask(__u32 f)
-{
-	return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
-}
-static inline __u32 ceph_frag_mask_shift(__u32 f)
-{
-	return 24 - ceph_frag_bits(f);
-}
-
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
-{
-	return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
-	/* is sub as specific as us, and contained by us? */
-	return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
-	       (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
-
-static inline __u32 ceph_frag_parent(__u32 f)
-{
-	return ceph_frag_make(ceph_frag_bits(f) - 1,
-			 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
-	return ceph_frag_bits(f) > 0 &&
-		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
-	return ceph_frag_bits(f) > 0 &&
-		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
-	return ceph_frag_make(ceph_frag_bits(f),
-		      ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
-	return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
-	return ceph_frag_make(ceph_frag_bits(f)+1,
-	      ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
-static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
-{
-	int newbits = ceph_frag_bits(f) + by;
-	return ceph_frag_make(newbits,
-			 ceph_frag_value(f) | (i << (24 - newbits)));
-}
-static inline int ceph_frag_is_leftmost(__u32 f)
-{
-	return ceph_frag_value(f) == 0;
-}
-static inline int ceph_frag_is_rightmost(__u32 f)
-{
-	return ceph_frag_value(f) == ceph_frag_mask(f);
-}
-static inline __u32 ceph_frag_next(__u32 f)
-{
-	return ceph_frag_make(ceph_frag_bits(f),
-			 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
-}
-
-/*
- * comparator to sort frags logically, as when traversing the
- * number space in ascending order...
- */
-int ceph_frag_compare(__u32 a, __u32 b);
-
-#endif
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c
deleted file mode 100644
index 3ac6cc7c1156..000000000000
--- a/fs/ceph/ceph_fs.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Some non-inline ceph helpers
- */
-#include "types.h"
-
-/*
- * return true if @layout appears to be valid
- */
-int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
-{
-	__u32 su = le32_to_cpu(layout->fl_stripe_unit);
-	__u32 sc = le32_to_cpu(layout->fl_stripe_count);
-	__u32 os = le32_to_cpu(layout->fl_object_size);
-
-	/* stripe unit, object size must be non-zero, 64k increment */
-	if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
-		return 0;
-	if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
-		return 0;
-	/* object size must be a multiple of stripe unit */
-	if (os < su || os % su)
-		return 0;
-	/* stripe count must be non-zero */
-	if (!sc)
-		return 0;
-	return 1;
-}
-
-
-int ceph_flags_to_mode(int flags)
-{
-	int mode;
-
-#ifdef O_DIRECTORY  /* fixme */
-	if ((flags & O_DIRECTORY) == O_DIRECTORY)
-		return CEPH_FILE_MODE_PIN;
-#endif
-	if ((flags & O_APPEND) == O_APPEND)
-		flags |= O_WRONLY;
-
-	if ((flags & O_ACCMODE) == O_RDWR)
-		mode = CEPH_FILE_MODE_RDWR;
-	else if ((flags & O_ACCMODE) == O_WRONLY)
-		mode = CEPH_FILE_MODE_WR;
-	else
-		mode = CEPH_FILE_MODE_RD;
-
-#ifdef O_LAZY
-	if (flags & O_LAZY)
-		mode |= CEPH_FILE_MODE_LAZY;
-#endif
-
-	return mode;
-}
-
-int ceph_caps_for_mode(int mode)
-{
-	int caps = CEPH_CAP_PIN;
-
-	if (mode & CEPH_FILE_MODE_RD)
-		caps |= CEPH_CAP_FILE_SHARED |
-			CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
-	if (mode & CEPH_FILE_MODE_WR)
-		caps |= CEPH_CAP_FILE_EXCL |
-			CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
-			CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
-			CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
-	if (mode & CEPH_FILE_MODE_LAZY)
-		caps |= CEPH_CAP_FILE_LAZYIO;
-
-	return caps;
-}
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
deleted file mode 100644
index d5619ac86711..000000000000
--- a/fs/ceph/ceph_fs.h
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- * ceph_fs.h - Ceph constants and data types to share between kernel and
- * user space.
- *
- * Most types in this file are defined as little-endian, and are
- * primarily intended to describe data structures that pass over the
- * wire or that are stored on disk.
- *
- * LGPL2
- */
-
-#ifndef CEPH_FS_H
-#define CEPH_FS_H
-
-#include "msgr.h"
-#include "rados.h"
-
-/*
- * subprotocol versions.  when specific messages types or high-level
- * protocols change, bump the affected components.  we keep rev
- * internal cluster protocols separately from the public,
- * client-facing protocol.
- */
-#define CEPH_OSD_PROTOCOL     8 /* cluster internal */
-#define CEPH_MDS_PROTOCOL    12 /* cluster internal */
-#define CEPH_MON_PROTOCOL     5 /* cluster internal */
-#define CEPH_OSDC_PROTOCOL   24 /* server/client */
-#define CEPH_MDSC_PROTOCOL   32 /* server/client */
-#define CEPH_MONC_PROTOCOL   15 /* server/client */
-
-
-#define CEPH_INO_ROOT  1
-#define CEPH_INO_CEPH  2        /* hidden .ceph dir */
-
-/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
-#define CEPH_MAX_MON   31
-
-
-/*
- * feature bits
- */
-#define CEPH_FEATURE_UID            (1<<0)
-#define CEPH_FEATURE_NOSRCADDR      (1<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK  (1<<2)
-#define CEPH_FEATURE_FLOCK          (1<<3)
-
-
-/*
- * ceph_file_layout - describe data layout for a file/inode
- */
-struct ceph_file_layout {
-	/* file -> object mapping */
-	__le32 fl_stripe_unit;     /* stripe unit, in bytes.  must be multiple
-				      of page size. */
-	__le32 fl_stripe_count;    /* over this many objects */
-	__le32 fl_object_size;     /* until objects are this big, then move to
-				      new objects */
-	__le32 fl_cas_hash;        /* 0 = none; 1 = sha256 */
-
-	/* pg -> disk layout */
-	__le32 fl_object_stripe_unit;  /* for per-object parity, if any */
-
-	/* object -> pg layout */
-	__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
-	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
-} __attribute__ ((packed));
-
-#define CEPH_MIN_STRIPE_UNIT 65536
-
-int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
-
-
-/* crypto algorithms */
-#define CEPH_CRYPTO_NONE 0x0
-#define CEPH_CRYPTO_AES  0x1
-
-#define CEPH_AES_IV "cephsageyudagreg"
-
-/* security/authentication protocols */
-#define CEPH_AUTH_UNKNOWN	0x0
-#define CEPH_AUTH_NONE	 	0x1
-#define CEPH_AUTH_CEPHX	 	0x2
-
-#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
-
-
-/*********************************************
- * message layer
- */
-
-/*
- * message types
- */
-
-/* misc */
-#define CEPH_MSG_SHUTDOWN               1
-#define CEPH_MSG_PING                   2
-
-/* client <-> monitor */
-#define CEPH_MSG_MON_MAP                4
-#define CEPH_MSG_MON_GET_MAP            5
-#define CEPH_MSG_STATFS                 13
-#define CEPH_MSG_STATFS_REPLY           14
-#define CEPH_MSG_MON_SUBSCRIBE          15
-#define CEPH_MSG_MON_SUBSCRIBE_ACK      16
-#define CEPH_MSG_AUTH			17
-#define CEPH_MSG_AUTH_REPLY		18
-
-/* client <-> mds */
-#define CEPH_MSG_MDS_MAP                21
-
-#define CEPH_MSG_CLIENT_SESSION         22
-#define CEPH_MSG_CLIENT_RECONNECT       23
-
-#define CEPH_MSG_CLIENT_REQUEST         24
-#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
-#define CEPH_MSG_CLIENT_REPLY           26
-#define CEPH_MSG_CLIENT_CAPS            0x310
-#define CEPH_MSG_CLIENT_LEASE           0x311
-#define CEPH_MSG_CLIENT_SNAP            0x312
-#define CEPH_MSG_CLIENT_CAPRELEASE      0x313
-
-/* pool ops */
-#define CEPH_MSG_POOLOP_REPLY           48
-#define CEPH_MSG_POOLOP                 49
-
-
-/* osd */
-#define CEPH_MSG_OSD_MAP          41
-#define CEPH_MSG_OSD_OP           42
-#define CEPH_MSG_OSD_OPREPLY      43
-
-/* pool operations */
-enum {
-  POOL_OP_CREATE			= 0x01,
-  POOL_OP_DELETE			= 0x02,
-  POOL_OP_AUID_CHANGE			= 0x03,
-  POOL_OP_CREATE_SNAP			= 0x11,
-  POOL_OP_DELETE_SNAP			= 0x12,
-  POOL_OP_CREATE_UNMANAGED_SNAP		= 0x21,
-  POOL_OP_DELETE_UNMANAGED_SNAP		= 0x22,
-};
-
-struct ceph_mon_request_header {
-	__le64 have_version;
-	__le16 session_mon;
-	__le64 session_mon_tid;
-} __attribute__ ((packed));
-
-struct ceph_mon_statfs {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-} __attribute__ ((packed));
-
-struct ceph_statfs {
-	__le64 kb, kb_used, kb_avail;
-	__le64 num_objects;
-} __attribute__ ((packed));
-
-struct ceph_mon_statfs_reply {
-	struct ceph_fsid fsid;
-	__le64 version;
-	struct ceph_statfs st;
-} __attribute__ ((packed));
-
-const char *ceph_pool_op_name(int op);
-
-struct ceph_mon_poolop {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__le32 pool;
-	__le32 op;
-	__le64 auid;
-	__le64 snapid;
-	__le32 name_len;
-} __attribute__ ((packed));
-
-struct ceph_mon_poolop_reply {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__le32 reply_code;
-	__le32 epoch;
-	char has_data;
-	char data[0];
-} __attribute__ ((packed));
-
-struct ceph_mon_unmanaged_snap {
-	__le64 snapid;
-} __attribute__ ((packed));
-
-struct ceph_osd_getmap {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-	__le32 start;
-} __attribute__ ((packed));
-
-struct ceph_mds_getmap {
-	struct ceph_mon_request_header monhdr;
-	struct ceph_fsid fsid;
-} __attribute__ ((packed));
-
-struct ceph_client_mount {
-	struct ceph_mon_request_header monhdr;
-} __attribute__ ((packed));
-
-struct ceph_mon_subscribe_item {
-	__le64 have_version;	__le64 have;
-	__u8 onetime;
-} __attribute__ ((packed));
-
-struct ceph_mon_subscribe_ack {
-	__le32 duration;         /* seconds */
-	struct ceph_fsid fsid;
-} __attribute__ ((packed));
-
-/*
- * mds states
- *   > 0 -> in
- *  <= 0 -> out
- */
-#define CEPH_MDS_STATE_DNE          0  /* down, does not exist. */
-#define CEPH_MDS_STATE_STOPPED     -1  /* down, once existed, but no subtrees.
-					  empty log. */
-#define CEPH_MDS_STATE_BOOT        -4  /* up, boot announcement. */
-#define CEPH_MDS_STATE_STANDBY     -5  /* up, idle.  waiting for assignment. */
-#define CEPH_MDS_STATE_CREATING    -6  /* up, creating MDS instance. */
-#define CEPH_MDS_STATE_STARTING    -7  /* up, starting previously stopped mds */
-#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
-
-#define CEPH_MDS_STATE_REPLAY       8  /* up, replaying journal. */
-#define CEPH_MDS_STATE_RESOLVE      9  /* up, disambiguating distributed
-					  operations (import, rename, etc.) */
-#define CEPH_MDS_STATE_RECONNECT    10 /* up, reconnect to clients */
-#define CEPH_MDS_STATE_REJOIN       11 /* up, rejoining distributed cache */
-#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
-#define CEPH_MDS_STATE_ACTIVE       13 /* up, active */
-#define CEPH_MDS_STATE_STOPPING     14 /* up, but exporting metadata */
-
-extern const char *ceph_mds_state_name(int s);
-
-
-/*
- * metadata lock types.
- *  - these are bitmasks.. we can compose them
- *  - they also define the lock ordering by the MDS
- *  - a few of these are internal to the mds
- */
-#define CEPH_LOCK_DVERSION    1
-#define CEPH_LOCK_DN          2
-#define CEPH_LOCK_ISNAP       16
-#define CEPH_LOCK_IVERSION    32    /* mds internal */
-#define CEPH_LOCK_IFILE       64
-#define CEPH_LOCK_IAUTH       128
-#define CEPH_LOCK_ILINK       256
-#define CEPH_LOCK_IDFT        512   /* dir frag tree */
-#define CEPH_LOCK_INEST       1024  /* mds internal */
-#define CEPH_LOCK_IXATTR      2048
-#define CEPH_LOCK_IFLOCK      4096  /* advisory file locks */
-#define CEPH_LOCK_INO         8192  /* immutable inode bits; not a lock */
-
-/* client_session ops */
-enum {
-	CEPH_SESSION_REQUEST_OPEN,
-	CEPH_SESSION_OPEN,
-	CEPH_SESSION_REQUEST_CLOSE,
-	CEPH_SESSION_CLOSE,
-	CEPH_SESSION_REQUEST_RENEWCAPS,
-	CEPH_SESSION_RENEWCAPS,
-	CEPH_SESSION_STALE,
-	CEPH_SESSION_RECALL_STATE,
-};
-
-extern const char *ceph_session_op_name(int op);
-
-struct ceph_mds_session_head {
-	__le32 op;
-	__le64 seq;
-	struct ceph_timespec stamp;
-	__le32 max_caps, max_leases;
-} __attribute__ ((packed));
-
-/* client_request */
-/*
- * metadata ops.
- *  & 0x001000 -> write op
- *  & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
- &  & 0x100000 -> use weird ino/path trace
- */
-#define CEPH_MDS_OP_WRITE        0x001000
-enum {
-	CEPH_MDS_OP_LOOKUP     = 0x00100,
-	CEPH_MDS_OP_GETATTR    = 0x00101,
-	CEPH_MDS_OP_LOOKUPHASH = 0x00102,
-	CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
-
-	CEPH_MDS_OP_SETXATTR   = 0x01105,
-	CEPH_MDS_OP_RMXATTR    = 0x01106,
-	CEPH_MDS_OP_SETLAYOUT  = 0x01107,
-	CEPH_MDS_OP_SETATTR    = 0x01108,
-	CEPH_MDS_OP_SETFILELOCK= 0x01109,
-	CEPH_MDS_OP_GETFILELOCK= 0x00110,
-
-	CEPH_MDS_OP_MKNOD      = 0x01201,
-	CEPH_MDS_OP_LINK       = 0x01202,
-	CEPH_MDS_OP_UNLINK     = 0x01203,
-	CEPH_MDS_OP_RENAME     = 0x01204,
-	CEPH_MDS_OP_MKDIR      = 0x01220,
-	CEPH_MDS_OP_RMDIR      = 0x01221,
-	CEPH_MDS_OP_SYMLINK    = 0x01222,
-
-	CEPH_MDS_OP_CREATE     = 0x01301,
-	CEPH_MDS_OP_OPEN       = 0x00302,
-	CEPH_MDS_OP_READDIR    = 0x00305,
-
-	CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
-	CEPH_MDS_OP_MKSNAP     = 0x01400,
-	CEPH_MDS_OP_RMSNAP     = 0x01401,
-	CEPH_MDS_OP_LSSNAP     = 0x00402,
-};
-
-extern const char *ceph_mds_op_name(int op);
-
-
-#define CEPH_SETATTR_MODE   1
-#define CEPH_SETATTR_UID    2
-#define CEPH_SETATTR_GID    4
-#define CEPH_SETATTR_MTIME  8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE  32
-#define CEPH_SETATTR_CTIME 64
-
-union ceph_mds_request_args {
-	struct {
-		__le32 mask;                 /* CEPH_CAP_* */
-	} __attribute__ ((packed)) getattr;
-	struct {
-		__le32 mode;
-		__le32 uid;
-		__le32 gid;
-		struct ceph_timespec mtime;
-		struct ceph_timespec atime;
-		__le64 size, old_size;       /* old_size needed by truncate */
-		__le32 mask;                 /* CEPH_SETATTR_* */
-	} __attribute__ ((packed)) setattr;
-	struct {
-		__le32 frag;                 /* which dir fragment */
-		__le32 max_entries;          /* how many dentries to grab */
-		__le32 max_bytes;
-	} __attribute__ ((packed)) readdir;
-	struct {
-		__le32 mode;
-		__le32 rdev;
-	} __attribute__ ((packed)) mknod;
-	struct {
-		__le32 mode;
-	} __attribute__ ((packed)) mkdir;
-	struct {
-		__le32 flags;
-		__le32 mode;
-		__le32 stripe_unit;          /* layout for newly created file */
-		__le32 stripe_count;         /* ... */
-		__le32 object_size;
-		__le32 file_replication;
-		__le32 preferred;
-	} __attribute__ ((packed)) open;
-	struct {
-		__le32 flags;
-	} __attribute__ ((packed)) setxattr;
-	struct {
-		struct ceph_file_layout layout;
-	} __attribute__ ((packed)) setlayout;
-	struct {
-		__u8 rule; /* currently fcntl or flock */
-		__u8 type; /* shared, exclusive, remove*/
-		__le64 pid; /* process id requesting the lock */
-		__le64 pid_namespace;
-		__le64 start; /* initial location to lock */
-		__le64 length; /* num bytes to lock from start */
-		__u8 wait; /* will caller wait for lock to become available? */
-	} __attribute__ ((packed)) filelock_change;
-} __attribute__ ((packed));
-
-#define CEPH_MDS_FLAG_REPLAY        1  /* this is a replayed op */
-#define CEPH_MDS_FLAG_WANT_DENTRY   2  /* want dentry in reply */
-
-struct ceph_mds_request_head {
-	__le64 oldest_client_tid;
-	__le32 mdsmap_epoch;           /* on client */
-	__le32 flags;                  /* CEPH_MDS_FLAG_* */
-	__u8 num_retry, num_fwd;       /* count retry, fwd attempts */
-	__le16 num_releases;           /* # include cap/lease release records */
-	__le32 op;                     /* mds op code */
-	__le32 caller_uid, caller_gid;
-	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
-					  etc. (if replaying) */
-	union ceph_mds_request_args args;
-} __attribute__ ((packed));
-
-/* cap/lease release record */
-struct ceph_mds_request_release {
-	__le64 ino, cap_id;            /* ino and unique cap id */
-	__le32 caps, wanted;           /* new issued, wanted */
-	__le32 seq, issue_seq, mseq;
-	__le32 dname_seq;              /* if releasing a dentry lease, a */
-	__le32 dname_len;              /* string follows. */
-} __attribute__ ((packed));
-
-/* client reply */
-struct ceph_mds_reply_head {
-	__le32 op;
-	__le32 result;
-	__le32 mdsmap_epoch;
-	__u8 safe;                     /* true if committed to disk */
-	__u8 is_dentry, is_target;     /* true if dentry, target inode records
-					  are included with reply */
-} __attribute__ ((packed));
-
-/* one for each node split */
-struct ceph_frag_tree_split {
-	__le32 frag;                   /* this frag splits... */
-	__le32 by;                     /* ...by this many bits */
-} __attribute__ ((packed));
-
-struct ceph_frag_tree_head {
-	__le32 nsplits;                /* num ceph_frag_tree_split records */
-	struct ceph_frag_tree_split splits[];
-} __attribute__ ((packed));
-
-/* capability issue, for bundling with mds reply */
-struct ceph_mds_reply_cap {
-	__le32 caps, wanted;           /* caps issued, wanted */
-	__le64 cap_id;
-	__le32 seq, mseq;
-	__le64 realm;                  /* snap realm */
-	__u8 flags;                    /* CEPH_CAP_FLAG_* */
-} __attribute__ ((packed));
-
-#define CEPH_CAP_FLAG_AUTH  1          /* cap is issued by auth mds */
-
-/* inode record, for bundling with mds reply */
-struct ceph_mds_reply_inode {
-	__le64 ino;
-	__le64 snapid;
-	__le32 rdev;
-	__le64 version;                /* inode version */
-	__le64 xattr_version;          /* version for xattr blob */
-	struct ceph_mds_reply_cap cap; /* caps issued for this inode */
-	struct ceph_file_layout layout;
-	struct ceph_timespec ctime, mtime, atime;
-	__le32 time_warp_seq;
-	__le64 size, max_size, truncate_size;
-	__le32 truncate_seq;
-	__le32 mode, uid, gid;
-	__le32 nlink;
-	__le64 files, subdirs, rbytes, rfiles, rsubdirs;  /* dir stats */
-	struct ceph_timespec rctime;
-	struct ceph_frag_tree_head fragtree;  /* (must be at end of struct) */
-} __attribute__ ((packed));
-/* followed by frag array, then symlink string, then xattr blob */
-
-/* reply_lease follows dname, and reply_inode */
-struct ceph_mds_reply_lease {
-	__le16 mask;            /* lease type(s) */
-	__le32 duration_ms;     /* lease duration */
-	__le32 seq;
-} __attribute__ ((packed));
-
-struct ceph_mds_reply_dirfrag {
-	__le32 frag;            /* fragment */
-	__le32 auth;            /* auth mds, if this is a delegation point */
-	__le32 ndist;           /* number of mds' this is replicated on */
-	__le32 dist[];
-} __attribute__ ((packed));
-
-#define CEPH_LOCK_FCNTL    1
-#define CEPH_LOCK_FLOCK    2
-
-#define CEPH_LOCK_SHARED   1
-#define CEPH_LOCK_EXCL     2
-#define CEPH_LOCK_UNLOCK   4
-
-struct ceph_filelock {
-	__le64 start;/* file offset to start lock at */
-	__le64 length; /* num bytes to lock; 0 for all following start */
-	__le64 client; /* which client holds the lock */
-	__le64 pid; /* process id holding the lock on the client */
-	__le64 pid_namespace;
-	__u8 type; /* shared lock, exclusive lock, or unlock */
-} __attribute__ ((packed));
-
-
-/* file access modes */
-#define CEPH_FILE_MODE_PIN        0
-#define CEPH_FILE_MODE_RD         1
-#define CEPH_FILE_MODE_WR         2
-#define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
-#define CEPH_FILE_MODE_LAZY       4  /* lazy io */
-#define CEPH_FILE_MODE_NUM        8  /* bc these are bit fields.. mostly */
-
-int ceph_flags_to_mode(int flags);
-
-
-/* capability bits */
-#define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
-
-/* generic cap bits */
-#define CEPH_CAP_GSHARED     1  /* client can reads */
-#define CEPH_CAP_GEXCL       2  /* client can read and update */
-#define CEPH_CAP_GCACHE      4  /* (file) client can cache reads */
-#define CEPH_CAP_GRD         8  /* (file) client can read */
-#define CEPH_CAP_GWR        16  /* (file) client can write */
-#define CEPH_CAP_GBUFFER    32  /* (file) client can buffer writes */
-#define CEPH_CAP_GWREXTEND  64  /* (file) client can extend EOF */
-#define CEPH_CAP_GLAZYIO   128  /* (file) client can perform lazy io */
-
-/* per-lock shift */
-#define CEPH_CAP_SAUTH      2
-#define CEPH_CAP_SLINK      4
-#define CEPH_CAP_SXATTR     6
-#define CEPH_CAP_SFILE      8
-#define CEPH_CAP_SFLOCK    20 
-
-#define CEPH_CAP_BITS       22
-
-/* composed values */
-#define CEPH_CAP_AUTH_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SAUTH)
-#define CEPH_CAP_AUTH_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SAUTH)
-#define CEPH_CAP_LINK_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SLINK)
-#define CEPH_CAP_LINK_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SLINK)
-#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED  << CEPH_CAP_SXATTR)
-#define CEPH_CAP_XATTR_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SXATTR)
-#define CEPH_CAP_FILE(x)    (x << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_SHARED   (CEPH_CAP_GSHARED   << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_CACHE    (CEPH_CAP_GCACHE    << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_RD       (CEPH_CAP_GRD       << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_WR       (CEPH_CAP_GWR       << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_BUFFER   (CEPH_CAP_GBUFFER   << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
-#define CEPH_CAP_FILE_LAZYIO   (CEPH_CAP_GLAZYIO   << CEPH_CAP_SFILE)
-#define CEPH_CAP_FLOCK_SHARED  (CEPH_CAP_GSHARED   << CEPH_CAP_SFLOCK)
-#define CEPH_CAP_FLOCK_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SFLOCK)
-
-
-/* cap masks (for getattr) */
-#define CEPH_STAT_CAP_INODE    CEPH_CAP_PIN
-#define CEPH_STAT_CAP_TYPE     CEPH_CAP_PIN  /* mode >> 12 */
-#define CEPH_STAT_CAP_SYMLINK  CEPH_CAP_PIN
-#define CEPH_STAT_CAP_UID      CEPH_CAP_AUTH_SHARED
-#define CEPH_STAT_CAP_GID      CEPH_CAP_AUTH_SHARED
-#define CEPH_STAT_CAP_MODE     CEPH_CAP_AUTH_SHARED
-#define CEPH_STAT_CAP_NLINK    CEPH_CAP_LINK_SHARED
-#define CEPH_STAT_CAP_LAYOUT   CEPH_CAP_FILE_SHARED
-#define CEPH_STAT_CAP_MTIME    CEPH_CAP_FILE_SHARED
-#define CEPH_STAT_CAP_SIZE     CEPH_CAP_FILE_SHARED
-#define CEPH_STAT_CAP_ATIME    CEPH_CAP_FILE_SHARED  /* fixme */
-#define CEPH_STAT_CAP_XATTR    CEPH_CAP_XATTR_SHARED
-#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN |			\
-				 CEPH_CAP_AUTH_SHARED |	\
-				 CEPH_CAP_LINK_SHARED |	\
-				 CEPH_CAP_FILE_SHARED |	\
-				 CEPH_CAP_XATTR_SHARED)
-
-#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
-			      CEPH_CAP_LINK_SHARED |			\
-			      CEPH_CAP_XATTR_SHARED |			\
-			      CEPH_CAP_FILE_SHARED)
-#define CEPH_CAP_ANY_RD   (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD |	\
-			   CEPH_CAP_FILE_CACHE)
-
-#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL |		\
-			   CEPH_CAP_LINK_EXCL |		\
-			   CEPH_CAP_XATTR_EXCL |	\
-			   CEPH_CAP_FILE_EXCL)
-#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
-			      CEPH_CAP_FILE_EXCL)
-#define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
-#define CEPH_CAP_ANY      (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
-			   CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
-			   CEPH_CAP_PIN)
-
-#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
-			CEPH_LOCK_IXATTR)
-
-int ceph_caps_for_mode(int mode);
-
-enum {
-	CEPH_CAP_OP_GRANT,         /* mds->client grant */
-	CEPH_CAP_OP_REVOKE,        /* mds->client revoke */
-	CEPH_CAP_OP_TRUNC,         /* mds->client trunc notify */
-	CEPH_CAP_OP_EXPORT,        /* mds has exported the cap */
-	CEPH_CAP_OP_IMPORT,        /* mds has imported the cap */
-	CEPH_CAP_OP_UPDATE,        /* client->mds update */
-	CEPH_CAP_OP_DROP,          /* client->mds drop cap bits */
-	CEPH_CAP_OP_FLUSH,         /* client->mds cap writeback */
-	CEPH_CAP_OP_FLUSH_ACK,     /* mds->client flushed */
-	CEPH_CAP_OP_FLUSHSNAP,     /* client->mds flush snapped metadata */
-	CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
-	CEPH_CAP_OP_RELEASE,       /* client->mds release (clean) cap */
-	CEPH_CAP_OP_RENEW,         /* client->mds renewal request */
-};
-
-extern const char *ceph_cap_op_name(int op);
-
-/*
- * caps message, used for capability callbacks, acks, requests, etc.
- */
-struct ceph_mds_caps {
-	__le32 op;                  /* CEPH_CAP_OP_* */
-	__le64 ino, realm;
-	__le64 cap_id;
-	__le32 seq, issue_seq;
-	__le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
-	__le32 migrate_seq;
-	__le64 snap_follows;
-	__le32 snap_trace_len;
-
-	/* authlock */
-	__le32 uid, gid, mode;
-
-	/* linklock */
-	__le32 nlink;
-
-	/* xattrlock */
-	__le32 xattr_len;
-	__le64 xattr_version;
-
-	/* filelock */
-	__le64 size, max_size, truncate_size;
-	__le32 truncate_seq;
-	struct ceph_timespec mtime, atime, ctime;
-	struct ceph_file_layout layout;
-	__le32 time_warp_seq;
-} __attribute__ ((packed));
-
-/* cap release msg head */
-struct ceph_mds_cap_release {
-	__le32 num;                /* number of cap_items that follow */
-} __attribute__ ((packed));
-
-struct ceph_mds_cap_item {
-	__le64 ino;
-	__le64 cap_id;
-	__le32 migrate_seq, seq;
-} __attribute__ ((packed));
-
-#define CEPH_MDS_LEASE_REVOKE           1  /*    mds  -> client */
-#define CEPH_MDS_LEASE_RELEASE          2  /* client  -> mds    */
-#define CEPH_MDS_LEASE_RENEW            3  /* client <-> mds    */
-#define CEPH_MDS_LEASE_REVOKE_ACK       4  /* client  -> mds    */
-
-extern const char *ceph_lease_op_name(int o);
-
-/* lease msg header */
-struct ceph_mds_lease {
-	__u8 action;            /* CEPH_MDS_LEASE_* */
-	__le16 mask;            /* which lease */
-	__le64 ino;
-	__le64 first, last;     /* snap range */
-	__le32 seq;
-	__le32 duration_ms;     /* duration of renewal */
-} __attribute__ ((packed));
-/* followed by a __le32+string for dname */
-
-/* client reconnect */
-struct ceph_mds_cap_reconnect {
-	__le64 cap_id;
-	__le32 wanted;
-	__le32 issued;
-	__le64 snaprealm;
-	__le64 pathbase;        /* base ino for our path to this ino */
-	__le32 flock_len;       /* size of flock state blob, if any */
-} __attribute__ ((packed));
-/* followed by flock blob */
-
-struct ceph_mds_cap_reconnect_v1 {
-	__le64 cap_id;
-	__le32 wanted;
-	__le32 issued;
-	__le64 size;
-	struct ceph_timespec mtime, atime;
-	__le64 snaprealm;
-	__le64 pathbase;        /* base ino for our path to this ino */
-} __attribute__ ((packed));
-
-struct ceph_mds_snaprealm_reconnect {
-	__le64 ino;     /* snap realm base */
-	__le64 seq;     /* snap seq for this snap realm */
-	__le64 parent;  /* parent realm */
-} __attribute__ ((packed));
-
-/*
- * snaps
- */
-enum {
-	CEPH_SNAP_OP_UPDATE,  /* CREATE or DESTROY */
-	CEPH_SNAP_OP_CREATE,
-	CEPH_SNAP_OP_DESTROY,
-	CEPH_SNAP_OP_SPLIT,
-};
-
-extern const char *ceph_snap_op_name(int o);
-
-/* snap msg header */
-struct ceph_mds_snap_head {
-	__le32 op;                /* CEPH_SNAP_OP_* */
-	__le64 split;             /* ino to split off, if any */
-	__le32 num_split_inos;    /* # inos belonging to new child realm */
-	__le32 num_split_realms;  /* # child realms udner new child realm */
-	__le32 trace_len;         /* size of snap trace blob */
-} __attribute__ ((packed));
-/* followed by split ino list, then split realms, then the trace blob */
-
-/*
- * encode info about a snaprealm, as viewed by a client
- */
-struct ceph_mds_snap_realm {
-	__le64 ino;           /* ino */
-	__le64 created;       /* snap: when created */
-	__le64 parent;        /* ino: parent realm */
-	__le64 parent_since;  /* snap: same parent since */
-	__le64 seq;           /* snap: version */
-	__le32 num_snaps;
-	__le32 num_prior_parent_snaps;
-} __attribute__ ((packed));
-/* followed by my snap list, then prior parent snap list */
-
-#endif
diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c
deleted file mode 100644
index bd570015d147..000000000000
--- a/fs/ceph/ceph_hash.c
+++ /dev/null
@@ -1,118 +0,0 @@
-
-#include "types.h"
-
-/*
- * Robert Jenkin's hash function.
- * http://burtleburtle.net/bob/hash/evahash.html
- * This is in the public domain.
- */
-#define mix(a, b, c)						\
-	do {							\
-		a = a - b;  a = a - c;  a = a ^ (c >> 13);	\
-		b = b - c;  b = b - a;  b = b ^ (a << 8);	\
-		c = c - a;  c = c - b;  c = c ^ (b >> 13);	\
-		a = a - b;  a = a - c;  a = a ^ (c >> 12);	\
-		b = b - c;  b = b - a;  b = b ^ (a << 16);	\
-		c = c - a;  c = c - b;  c = c ^ (b >> 5);	\
-		a = a - b;  a = a - c;  a = a ^ (c >> 3);	\
-		b = b - c;  b = b - a;  b = b ^ (a << 10);	\
-		c = c - a;  c = c - b;  c = c ^ (b >> 15);	\
-	} while (0)
-
-unsigned ceph_str_hash_rjenkins(const char *str, unsigned length)
-{
-	const unsigned char *k = (const unsigned char *)str;
-	__u32 a, b, c;  /* the internal state */
-	__u32 len;      /* how many key bytes still need mixing */
-
-	/* Set up the internal state */
-	len = length;
-	a = 0x9e3779b9;      /* the golden ratio; an arbitrary value */
-	b = a;
-	c = 0;               /* variable initialization of internal state */
-
-	/* handle most of the key */
-	while (len >= 12) {
-		a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
-			 ((__u32)k[3] << 24));
-		b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
-			 ((__u32)k[7] << 24));
-		c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
-			 ((__u32)k[11] << 24));
-		mix(a, b, c);
-		k = k + 12;
-		len = len - 12;
-	}
-
-	/* handle the last 11 bytes */
-	c = c + length;
-	switch (len) {            /* all the case statements fall through */
-	case 11:
-		c = c + ((__u32)k[10] << 24);
-	case 10:
-		c = c + ((__u32)k[9] << 16);
-	case 9:
-		c = c + ((__u32)k[8] << 8);
-		/* the first byte of c is reserved for the length */
-	case 8:
-		b = b + ((__u32)k[7] << 24);
-	case 7:
-		b = b + ((__u32)k[6] << 16);
-	case 6:
-		b = b + ((__u32)k[5] << 8);
-	case 5:
-		b = b + k[4];
-	case 4:
-		a = a + ((__u32)k[3] << 24);
-	case 3:
-		a = a + ((__u32)k[2] << 16);
-	case 2:
-		a = a + ((__u32)k[1] << 8);
-	case 1:
-		a = a + k[0];
-		/* case 0: nothing left to add */
-	}
-	mix(a, b, c);
-
-	return c;
-}
-
-/*
- * linux dcache hash
- */
-unsigned ceph_str_hash_linux(const char *str, unsigned length)
-{
-	unsigned long hash = 0;
-	unsigned char c;
-
-	while (length--) {
-		c = *str++;
-		hash = (hash + (c << 4) + (c >> 4)) * 11;
-	}
-	return hash;
-}
-
-
-unsigned ceph_str_hash(int type, const char *s, unsigned len)
-{
-	switch (type) {
-	case CEPH_STR_HASH_LINUX:
-		return ceph_str_hash_linux(s, len);
-	case CEPH_STR_HASH_RJENKINS:
-		return ceph_str_hash_rjenkins(s, len);
-	default:
-		return -1;
-	}
-}
-
-const char *ceph_str_hash_name(int type)
-{
-	switch (type) {
-	case CEPH_STR_HASH_LINUX:
-		return "linux";
-	case CEPH_STR_HASH_RJENKINS:
-		return "rjenkins";
-	default:
-		return "unknown";
-	}
-}
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h
deleted file mode 100644
index d099c3f90236..000000000000
--- a/fs/ceph/ceph_hash.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef FS_CEPH_HASH_H
-#define FS_CEPH_HASH_H
-
-#define CEPH_STR_HASH_LINUX      0x1  /* linux dcache hash */
-#define CEPH_STR_HASH_RJENKINS   0x2  /* robert jenkins' */
-
-extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
-extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
-
-extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
-extern const char *ceph_str_hash_name(int type);
-
-#endif
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c
deleted file mode 100644
index c6179d3a26a2..000000000000
--- a/fs/ceph/ceph_strings.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Ceph string constants
- */
-#include "types.h"
-
-const char *ceph_entity_type_name(int type)
-{
-	switch (type) {
-	case CEPH_ENTITY_TYPE_MDS: return "mds";
-	case CEPH_ENTITY_TYPE_OSD: return "osd";
-	case CEPH_ENTITY_TYPE_MON: return "mon";
-	case CEPH_ENTITY_TYPE_CLIENT: return "client";
-	case CEPH_ENTITY_TYPE_AUTH: return "auth";
-	default: return "unknown";
-	}
-}
-
-const char *ceph_osd_op_name(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_READ: return "read";
-	case CEPH_OSD_OP_STAT: return "stat";
-
-	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
-
-	case CEPH_OSD_OP_WRITE: return "write";
-	case CEPH_OSD_OP_DELETE: return "delete";
-	case CEPH_OSD_OP_TRUNCATE: return "truncate";
-	case CEPH_OSD_OP_ZERO: return "zero";
-	case CEPH_OSD_OP_WRITEFULL: return "writefull";
-	case CEPH_OSD_OP_ROLLBACK: return "rollback";
-
-	case CEPH_OSD_OP_APPEND: return "append";
-	case CEPH_OSD_OP_STARTSYNC: return "startsync";
-	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
-	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
-
-	case CEPH_OSD_OP_TMAPUP: return "tmapup";
-	case CEPH_OSD_OP_TMAPGET: return "tmapget";
-	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
-
-	case CEPH_OSD_OP_GETXATTR: return "getxattr";
-	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
-	case CEPH_OSD_OP_SETXATTR: return "setxattr";
-	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
-	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
-	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
-	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
-
-	case CEPH_OSD_OP_PULL: return "pull";
-	case CEPH_OSD_OP_PUSH: return "push";
-	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
-	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
-	case CEPH_OSD_OP_SCRUB: return "scrub";
-
-	case CEPH_OSD_OP_WRLOCK: return "wrlock";
-	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
-	case CEPH_OSD_OP_RDLOCK: return "rdlock";
-	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
-	case CEPH_OSD_OP_UPLOCK: return "uplock";
-	case CEPH_OSD_OP_DNLOCK: return "dnlock";
-
-	case CEPH_OSD_OP_CALL: return "call";
-
-	case CEPH_OSD_OP_PGLS: return "pgls";
-	}
-	return "???";
-}
-
-const char *ceph_mds_state_name(int s)
-{
-	switch (s) {
-		/* down and out */
-	case CEPH_MDS_STATE_DNE:        return "down:dne";
-	case CEPH_MDS_STATE_STOPPED:    return "down:stopped";
-		/* up and out */
-	case CEPH_MDS_STATE_BOOT:       return "up:boot";
-	case CEPH_MDS_STATE_STANDBY:    return "up:standby";
-	case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
-	case CEPH_MDS_STATE_CREATING:   return "up:creating";
-	case CEPH_MDS_STATE_STARTING:   return "up:starting";
-		/* up and in */
-	case CEPH_MDS_STATE_REPLAY:     return "up:replay";
-	case CEPH_MDS_STATE_RESOLVE:    return "up:resolve";
-	case CEPH_MDS_STATE_RECONNECT:  return "up:reconnect";
-	case CEPH_MDS_STATE_REJOIN:     return "up:rejoin";
-	case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay";
-	case CEPH_MDS_STATE_ACTIVE:     return "up:active";
-	case CEPH_MDS_STATE_STOPPING:   return "up:stopping";
-	}
-	return "???";
-}
-
-const char *ceph_session_op_name(int op)
-{
-	switch (op) {
-	case CEPH_SESSION_REQUEST_OPEN: return "request_open";
-	case CEPH_SESSION_OPEN: return "open";
-	case CEPH_SESSION_REQUEST_CLOSE: return "request_close";
-	case CEPH_SESSION_CLOSE: return "close";
-	case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps";
-	case CEPH_SESSION_RENEWCAPS: return "renewcaps";
-	case CEPH_SESSION_STALE: return "stale";
-	case CEPH_SESSION_RECALL_STATE: return "recall_state";
-	}
-	return "???";
-}
-
-const char *ceph_mds_op_name(int op)
-{
-	switch (op) {
-	case CEPH_MDS_OP_LOOKUP:  return "lookup";
-	case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
-	case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
-	case CEPH_MDS_OP_GETATTR:  return "getattr";
-	case CEPH_MDS_OP_SETXATTR: return "setxattr";
-	case CEPH_MDS_OP_SETATTR: return "setattr";
-	case CEPH_MDS_OP_RMXATTR: return "rmxattr";
-	case CEPH_MDS_OP_READDIR: return "readdir";
-	case CEPH_MDS_OP_MKNOD: return "mknod";
-	case CEPH_MDS_OP_LINK: return "link";
-	case CEPH_MDS_OP_UNLINK: return "unlink";
-	case CEPH_MDS_OP_RENAME: return "rename";
-	case CEPH_MDS_OP_MKDIR: return "mkdir";
-	case CEPH_MDS_OP_RMDIR: return "rmdir";
-	case CEPH_MDS_OP_SYMLINK: return "symlink";
-	case CEPH_MDS_OP_CREATE: return "create";
-	case CEPH_MDS_OP_OPEN: return "open";
-	case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap";
-	case CEPH_MDS_OP_LSSNAP: return "lssnap";
-	case CEPH_MDS_OP_MKSNAP: return "mksnap";
-	case CEPH_MDS_OP_RMSNAP: return "rmsnap";
-	case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
-	case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
-	}
-	return "???";
-}
-
-const char *ceph_cap_op_name(int op)
-{
-	switch (op) {
-	case CEPH_CAP_OP_GRANT: return "grant";
-	case CEPH_CAP_OP_REVOKE: return "revoke";
-	case CEPH_CAP_OP_TRUNC: return "trunc";
-	case CEPH_CAP_OP_EXPORT: return "export";
-	case CEPH_CAP_OP_IMPORT: return "import";
-	case CEPH_CAP_OP_UPDATE: return "update";
-	case CEPH_CAP_OP_DROP: return "drop";
-	case CEPH_CAP_OP_FLUSH: return "flush";
-	case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack";
-	case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap";
-	case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack";
-	case CEPH_CAP_OP_RELEASE: return "release";
-	case CEPH_CAP_OP_RENEW: return "renew";
-	}
-	return "???";
-}
-
-const char *ceph_lease_op_name(int o)
-{
-	switch (o) {
-	case CEPH_MDS_LEASE_REVOKE: return "revoke";
-	case CEPH_MDS_LEASE_RELEASE: return "release";
-	case CEPH_MDS_LEASE_RENEW: return "renew";
-	case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack";
-	}
-	return "???";
-}
-
-const char *ceph_snap_op_name(int o)
-{
-	switch (o) {
-	case CEPH_SNAP_OP_UPDATE: return "update";
-	case CEPH_SNAP_OP_CREATE: return "create";
-	case CEPH_SNAP_OP_DESTROY: return "destroy";
-	case CEPH_SNAP_OP_SPLIT: return "split";
-	}
-	return "???";
-}
-
-const char *ceph_pool_op_name(int op)
-{
-	switch (op) {
-	case POOL_OP_CREATE: return "create";
-	case POOL_OP_DELETE: return "delete";
-	case POOL_OP_AUID_CHANGE: return "auid change";
-	case POOL_OP_CREATE_SNAP: return "create snap";
-	case POOL_OP_DELETE_SNAP: return "delete snap";
-	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
-	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
-	}
-	return "???";
-}
diff --git a/fs/ceph/crush/crush.c b/fs/ceph/crush/crush.c
deleted file mode 100644
index fabd302e5779..000000000000
--- a/fs/ceph/crush/crush.c
+++ /dev/null
@@ -1,151 +0,0 @@
-
-#ifdef __KERNEL__
-# include <linux/slab.h>
-#else
-# include <stdlib.h>
-# include <assert.h>
-# define kfree(x) do { if (x) free(x); } while (0)
-# define BUG_ON(x) assert(!(x))
-#endif
-
-#include "crush.h"
-
-const char *crush_bucket_alg_name(int alg)
-{
-	switch (alg) {
-	case CRUSH_BUCKET_UNIFORM: return "uniform";
-	case CRUSH_BUCKET_LIST: return "list";
-	case CRUSH_BUCKET_TREE: return "tree";
-	case CRUSH_BUCKET_STRAW: return "straw";
-	default: return "unknown";
-	}
-}
-
-/**
- * crush_get_bucket_item_weight - Get weight of an item in given bucket
- * @b: bucket pointer
- * @p: item index in bucket
- */
-int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
-{
-	if (p >= b->size)
-		return 0;
-
-	switch (b->alg) {
-	case CRUSH_BUCKET_UNIFORM:
-		return ((struct crush_bucket_uniform *)b)->item_weight;
-	case CRUSH_BUCKET_LIST:
-		return ((struct crush_bucket_list *)b)->item_weights[p];
-	case CRUSH_BUCKET_TREE:
-		if (p & 1)
-			return ((struct crush_bucket_tree *)b)->node_weights[p];
-		return 0;
-	case CRUSH_BUCKET_STRAW:
-		return ((struct crush_bucket_straw *)b)->item_weights[p];
-	}
-	return 0;
-}
-
-/**
- * crush_calc_parents - Calculate parent vectors for the given crush map.
- * @map: crush_map pointer
- */
-void crush_calc_parents(struct crush_map *map)
-{
-	int i, b, c;
-
-	for (b = 0; b < map->max_buckets; b++) {
-		if (map->buckets[b] == NULL)
-			continue;
-		for (i = 0; i < map->buckets[b]->size; i++) {
-			c = map->buckets[b]->items[i];
-			BUG_ON(c >= map->max_devices ||
-			       c < -map->max_buckets);
-			if (c >= 0)
-				map->device_parents[c] = map->buckets[b]->id;
-			else
-				map->bucket_parents[-1-c] = map->buckets[b]->id;
-		}
-	}
-}
-
-void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
-{
-	kfree(b->h.perm);
-	kfree(b->h.items);
-	kfree(b);
-}
-
-void crush_destroy_bucket_list(struct crush_bucket_list *b)
-{
-	kfree(b->item_weights);
-	kfree(b->sum_weights);
-	kfree(b->h.perm);
-	kfree(b->h.items);
-	kfree(b);
-}
-
-void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
-{
-	kfree(b->node_weights);
-	kfree(b);
-}
-
-void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
-{
-	kfree(b->straws);
-	kfree(b->item_weights);
-	kfree(b->h.perm);
-	kfree(b->h.items);
-	kfree(b);
-}
-
-void crush_destroy_bucket(struct crush_bucket *b)
-{
-	switch (b->alg) {
-	case CRUSH_BUCKET_UNIFORM:
-		crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
-		break;
-	case CRUSH_BUCKET_LIST:
-		crush_destroy_bucket_list((struct crush_bucket_list *)b);
-		break;
-	case CRUSH_BUCKET_TREE:
-		crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
-		break;
-	case CRUSH_BUCKET_STRAW:
-		crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
-		break;
-	}
-}
-
-/**
- * crush_destroy - Destroy a crush_map
- * @map: crush_map pointer
- */
-void crush_destroy(struct crush_map *map)
-{
-	int b;
-
-	/* buckets */
-	if (map->buckets) {
-		for (b = 0; b < map->max_buckets; b++) {
-			if (map->buckets[b] == NULL)
-				continue;
-			crush_destroy_bucket(map->buckets[b]);
-		}
-		kfree(map->buckets);
-	}
-
-	/* rules */
-	if (map->rules) {
-		for (b = 0; b < map->max_rules; b++)
-			kfree(map->rules[b]);
-		kfree(map->rules);
-	}
-
-	kfree(map->bucket_parents);
-	kfree(map->device_parents);
-	kfree(map);
-}
-
-
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h
deleted file mode 100644
index 97e435b191f4..000000000000
--- a/fs/ceph/crush/crush.h
+++ /dev/null
@@ -1,180 +0,0 @@
-#ifndef CEPH_CRUSH_CRUSH_H
-#define CEPH_CRUSH_CRUSH_H
-
-#include <linux/types.h>
-
-/*
- * CRUSH is a pseudo-random data distribution algorithm that
- * efficiently distributes input values (typically, data objects)
- * across a heterogeneous, structured storage cluster.
- *
- * The algorithm was originally described in detail in this paper
- * (although the algorithm has evolved somewhat since then):
- *
- *     http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
- *
- * LGPL2
- */
-
-
-#define CRUSH_MAGIC 0x00010000ul   /* for detecting algorithm revisions */
-
-
-#define CRUSH_MAX_DEPTH 10  /* max crush hierarchy depth */
-#define CRUSH_MAX_SET   10  /* max size of a mapping result */
-
-
-/*
- * CRUSH uses user-defined "rules" to describe how inputs should be
- * mapped to devices.  A rule consists of sequence of steps to perform
- * to generate the set of output devices.
- */
-struct crush_rule_step {
-	__u32 op;
-	__s32 arg1;
-	__s32 arg2;
-};
-
-/* step op codes */
-enum {
-	CRUSH_RULE_NOOP = 0,
-	CRUSH_RULE_TAKE = 1,          /* arg1 = value to start with */
-	CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
-				      /* arg2 = type */
-	CRUSH_RULE_CHOOSE_INDEP = 3,  /* same */
-	CRUSH_RULE_EMIT = 4,          /* no args */
-	CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
-	CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
-};
-
-/*
- * for specifying choose num (arg1) relative to the max parameter
- * passed to do_rule
- */
-#define CRUSH_CHOOSE_N            0
-#define CRUSH_CHOOSE_N_MINUS(x)   (-(x))
-
-/*
- * The rule mask is used to describe what the rule is intended for.
- * Given a ruleset and size of output set, we search through the
- * rule list for a matching rule_mask.
- */
-struct crush_rule_mask {
-	__u8 ruleset;
-	__u8 type;
-	__u8 min_size;
-	__u8 max_size;
-};
-
-struct crush_rule {
-	__u32 len;
-	struct crush_rule_mask mask;
-	struct crush_rule_step steps[0];
-};
-
-#define crush_rule_size(len) (sizeof(struct crush_rule) + \
-			      (len)*sizeof(struct crush_rule_step))
-
-
-
-/*
- * A bucket is a named container of other items (either devices or
- * other buckets).  Items within a bucket are chosen using one of a
- * few different algorithms.  The table summarizes how the speed of
- * each option measures up against mapping stability when items are
- * added or removed.
- *
- *  Bucket Alg     Speed       Additions    Removals
- *  ------------------------------------------------
- *  uniform         O(1)       poor         poor
- *  list            O(n)       optimal      poor
- *  tree            O(log n)   good         good
- *  straw           O(n)       optimal      optimal
- */
-enum {
-	CRUSH_BUCKET_UNIFORM = 1,
-	CRUSH_BUCKET_LIST = 2,
-	CRUSH_BUCKET_TREE = 3,
-	CRUSH_BUCKET_STRAW = 4
-};
-extern const char *crush_bucket_alg_name(int alg);
-
-struct crush_bucket {
-	__s32 id;        /* this'll be negative */
-	__u16 type;      /* non-zero; type=0 is reserved for devices */
-	__u8 alg;        /* one of CRUSH_BUCKET_* */
-	__u8 hash;       /* which hash function to use, CRUSH_HASH_* */
-	__u32 weight;    /* 16-bit fixed point */
-	__u32 size;      /* num items */
-	__s32 *items;
-
-	/*
-	 * cached random permutation: used for uniform bucket and for
-	 * the linear search fallback for the other bucket types.
-	 */
-	__u32 perm_x;  /* @x for which *perm is defined */
-	__u32 perm_n;  /* num elements of *perm that are permuted/defined */
-	__u32 *perm;
-};
-
-struct crush_bucket_uniform {
-	struct crush_bucket h;
-	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
-};
-
-struct crush_bucket_list {
-	struct crush_bucket h;
-	__u32 *item_weights;  /* 16-bit fixed point */
-	__u32 *sum_weights;   /* 16-bit fixed point.  element i is sum
-				 of weights 0..i, inclusive */
-};
-
-struct crush_bucket_tree {
-	struct crush_bucket h;  /* note: h.size is _tree_ size, not number of
-				   actual items */
-	__u8 num_nodes;
-	__u32 *node_weights;
-};
-
-struct crush_bucket_straw {
-	struct crush_bucket h;
-	__u32 *item_weights;   /* 16-bit fixed point */
-	__u32 *straws;         /* 16-bit fixed point */
-};
-
-
-
-/*
- * CRUSH map includes all buckets, rules, etc.
- */
-struct crush_map {
-	struct crush_bucket **buckets;
-	struct crush_rule **rules;
-
-	/*
-	 * Parent pointers to identify the parent bucket a device or
-	 * bucket in the hierarchy.  If an item appears more than
-	 * once, this is the _last_ time it appeared (where buckets
-	 * are processed in bucket id order, from -1 on down to
-	 * -max_buckets.
-	 */
-	__u32 *bucket_parents;
-	__u32 *device_parents;
-
-	__s32 max_buckets;
-	__u32 max_rules;
-	__s32 max_devices;
-};
-
-
-/* crush.c */
-extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
-extern void crush_calc_parents(struct crush_map *map);
-extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
-extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
-extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
-extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
-extern void crush_destroy_bucket(struct crush_bucket *b);
-extern void crush_destroy(struct crush_map *map);
-
-#endif
diff --git a/fs/ceph/crush/hash.c b/fs/ceph/crush/hash.c
deleted file mode 100644
index 5873aed694bf..000000000000
--- a/fs/ceph/crush/hash.c
+++ /dev/null
@@ -1,149 +0,0 @@
-
-#include <linux/types.h>
-#include "hash.h"
-
-/*
- * Robert Jenkins' function for mixing 32-bit values
- * http://burtleburtle.net/bob/hash/evahash.html
- * a, b = random bits, c = input and output
- */
-#define crush_hashmix(a, b, c) do {			\
-		a = a-b;  a = a-c;  a = a^(c>>13);	\
-		b = b-c;  b = b-a;  b = b^(a<<8);	\
-		c = c-a;  c = c-b;  c = c^(b>>13);	\
-		a = a-b;  a = a-c;  a = a^(c>>12);	\
-		b = b-c;  b = b-a;  b = b^(a<<16);	\
-		c = c-a;  c = c-b;  c = c^(b>>5);	\
-		a = a-b;  a = a-c;  a = a^(c>>3);	\
-		b = b-c;  b = b-a;  b = b^(a<<10);	\
-		c = c-a;  c = c-b;  c = c^(b>>15);	\
-	} while (0)
-
-#define crush_hash_seed 1315423911
-
-static __u32 crush_hash32_rjenkins1(__u32 a)
-{
-	__u32 hash = crush_hash_seed ^ a;
-	__u32 b = a;
-	__u32 x = 231232;
-	__u32 y = 1232;
-	crush_hashmix(b, x, hash);
-	crush_hashmix(y, a, hash);
-	return hash;
-}
-
-static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b)
-{
-	__u32 hash = crush_hash_seed ^ a ^ b;
-	__u32 x = 231232;
-	__u32 y = 1232;
-	crush_hashmix(a, b, hash);
-	crush_hashmix(x, a, hash);
-	crush_hashmix(b, y, hash);
-	return hash;
-}
-
-static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c)
-{
-	__u32 hash = crush_hash_seed ^ a ^ b ^ c;
-	__u32 x = 231232;
-	__u32 y = 1232;
-	crush_hashmix(a, b, hash);
-	crush_hashmix(c, x, hash);
-	crush_hashmix(y, a, hash);
-	crush_hashmix(b, x, hash);
-	crush_hashmix(y, c, hash);
-	return hash;
-}
-
-static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d)
-{
-	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d;
-	__u32 x = 231232;
-	__u32 y = 1232;
-	crush_hashmix(a, b, hash);
-	crush_hashmix(c, d, hash);
-	crush_hashmix(a, x, hash);
-	crush_hashmix(y, b, hash);
-	crush_hashmix(c, x, hash);
-	crush_hashmix(y, d, hash);
-	return hash;
-}
-
-static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d,
-				      __u32 e)
-{
-	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e;
-	__u32 x = 231232;
-	__u32 y = 1232;
-	crush_hashmix(a, b, hash);
-	crush_hashmix(c, d, hash);
-	crush_hashmix(e, x, hash);
-	crush_hashmix(y, a, hash);
-	crush_hashmix(b, x, hash);
-	crush_hashmix(y, c, hash);
-	crush_hashmix(d, x, hash);
-	crush_hashmix(y, e, hash);
-	return hash;
-}
-
-
-__u32 crush_hash32(int type, __u32 a)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return crush_hash32_rjenkins1(a);
-	default:
-		return 0;
-	}
-}
-
-__u32 crush_hash32_2(int type, __u32 a, __u32 b)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return crush_hash32_rjenkins1_2(a, b);
-	default:
-		return 0;
-	}
-}
-
-__u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return crush_hash32_rjenkins1_3(a, b, c);
-	default:
-		return 0;
-	}
-}
-
-__u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return crush_hash32_rjenkins1_4(a, b, c, d);
-	default:
-		return 0;
-	}
-}
-
-__u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return crush_hash32_rjenkins1_5(a, b, c, d, e);
-	default:
-		return 0;
-	}
-}
-
-const char *crush_hash_name(int type)
-{
-	switch (type) {
-	case CRUSH_HASH_RJENKINS1:
-		return "rjenkins1";
-	default:
-		return "unknown";
-	}
-}
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h
deleted file mode 100644
index 91e884230d5d..000000000000
--- a/fs/ceph/crush/hash.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef CEPH_CRUSH_HASH_H
-#define CEPH_CRUSH_HASH_H
-
-#define CRUSH_HASH_RJENKINS1   0
-
-#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
-
-extern const char *crush_hash_name(int type);
-
-extern __u32 crush_hash32(int type, __u32 a);
-extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
-extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
-extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
-extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
-			    __u32 e);
-
-#endif
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
deleted file mode 100644
index a4eec133258e..000000000000
--- a/fs/ceph/crush/mapper.c
+++ /dev/null
@@ -1,609 +0,0 @@
-
-#ifdef __KERNEL__
-# include <linux/string.h>
-# include <linux/slab.h>
-# include <linux/bug.h>
-# include <linux/kernel.h>
-# ifndef dprintk
-#  define dprintk(args...)
-# endif
-#else
-# include <string.h>
-# include <stdio.h>
-# include <stdlib.h>
-# include <assert.h>
-# define BUG_ON(x) assert(!(x))
-# define dprintk(args...) /* printf(args) */
-# define kmalloc(x, f) malloc(x)
-# define kfree(x) free(x)
-#endif
-
-#include "crush.h"
-#include "hash.h"
-
-/*
- * Implement the core CRUSH mapping algorithm.
- */
-
-/**
- * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
- * @map: the crush_map
- * @ruleset: the storage ruleset id (user defined)
- * @type: storage ruleset type (user defined)
- * @size: output set size
- */
-int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
-{
-	int i;
-
-	for (i = 0; i < map->max_rules; i++) {
-		if (map->rules[i] &&
-		    map->rules[i]->mask.ruleset == ruleset &&
-		    map->rules[i]->mask.type == type &&
-		    map->rules[i]->mask.min_size <= size &&
-		    map->rules[i]->mask.max_size >= size)
-			return i;
-	}
-	return -1;
-}
-
-
-/*
- * bucket choose methods
- *
- * For each bucket algorithm, we have a "choose" method that, given a
- * crush input @x and replica position (usually, position in output set) @r,
- * will produce an item in the bucket.
- */
-
-/*
- * Choose based on a random permutation of the bucket.
- *
- * We used to use some prime number arithmetic to do this, but it
- * wasn't very random, and had some other bad behaviors.  Instead, we
- * calculate an actual random permutation of the bucket members.
- * Since this is expensive, we optimize for the r=0 case, which
- * captures the vast majority of calls.
- */
-static int bucket_perm_choose(struct crush_bucket *bucket,
-			      int x, int r)
-{
-	unsigned pr = r % bucket->size;
-	unsigned i, s;
-
-	/* start a new permutation if @x has changed */
-	if (bucket->perm_x != x || bucket->perm_n == 0) {
-		dprintk("bucket %d new x=%d\n", bucket->id, x);
-		bucket->perm_x = x;
-
-		/* optimize common r=0 case */
-		if (pr == 0) {
-			s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
-				bucket->size;
-			bucket->perm[0] = s;
-			bucket->perm_n = 0xffff;   /* magic value, see below */
-			goto out;
-		}
-
-		for (i = 0; i < bucket->size; i++)
-			bucket->perm[i] = i;
-		bucket->perm_n = 0;
-	} else if (bucket->perm_n == 0xffff) {
-		/* clean up after the r=0 case above */
-		for (i = 1; i < bucket->size; i++)
-			bucket->perm[i] = i;
-		bucket->perm[bucket->perm[0]] = 0;
-		bucket->perm_n = 1;
-	}
-
-	/* calculate permutation up to pr */
-	for (i = 0; i < bucket->perm_n; i++)
-		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
-	while (bucket->perm_n <= pr) {
-		unsigned p = bucket->perm_n;
-		/* no point in swapping the final entry */
-		if (p < bucket->size - 1) {
-			i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
-				(bucket->size - p);
-			if (i) {
-				unsigned t = bucket->perm[p + i];
-				bucket->perm[p + i] = bucket->perm[p];
-				bucket->perm[p] = t;
-			}
-			dprintk(" perm_choose swap %d with %d\n", p, p+i);
-		}
-		bucket->perm_n++;
-	}
-	for (i = 0; i < bucket->size; i++)
-		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
-
-	s = bucket->perm[pr];
-out:
-	dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
-		bucket->size, x, r, pr, s);
-	return bucket->items[s];
-}
-
-/* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
-				 int x, int r)
-{
-	return bucket_perm_choose(&bucket->h, x, r);
-}
-
-/* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
-			      int x, int r)
-{
-	int i;
-
-	for (i = bucket->h.size-1; i >= 0; i--) {
-		__u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
-					 r, bucket->h.id);
-		w &= 0xffff;
-		dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
-			"sw %x rand %llx",
-			i, x, r, bucket->h.items[i], bucket->item_weights[i],
-			bucket->sum_weights[i], w);
-		w *= bucket->sum_weights[i];
-		w = w >> 16;
-		/*dprintk(" scaled %llx\n", w);*/
-		if (w < bucket->item_weights[i])
-			return bucket->h.items[i];
-	}
-
-	BUG_ON(1);
-	return 0;
-}
-
-
-/* (binary) tree */
-static int height(int n)
-{
-	int h = 0;
-	while ((n & 1) == 0) {
-		h++;
-		n = n >> 1;
-	}
-	return h;
-}
-
-static int left(int x)
-{
-	int h = height(x);
-	return x - (1 << (h-1));
-}
-
-static int right(int x)
-{
-	int h = height(x);
-	return x + (1 << (h-1));
-}
-
-static int terminal(int x)
-{
-	return x & 1;
-}
-
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
-			      int x, int r)
-{
-	int n, l;
-	__u32 w;
-	__u64 t;
-
-	/* start at root */
-	n = bucket->num_nodes >> 1;
-
-	while (!terminal(n)) {
-		/* pick point in [0, w) */
-		w = bucket->node_weights[n];
-		t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
-					  bucket->h.id) * (__u64)w;
-		t = t >> 32;
-
-		/* descend to the left or right? */
-		l = left(n);
-		if (t < bucket->node_weights[l])
-			n = l;
-		else
-			n = right(n);
-	}
-
-	return bucket->h.items[n >> 1];
-}
-
-
-/* straw */
-
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
-			       int x, int r)
-{
-	int i;
-	int high = 0;
-	__u64 high_draw = 0;
-	__u64 draw;
-
-	for (i = 0; i < bucket->h.size; i++) {
-		draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
-		draw &= 0xffff;
-		draw *= bucket->straws[i];
-		if (i == 0 || draw > high_draw) {
-			high = i;
-			high_draw = draw;
-		}
-	}
-	return bucket->h.items[high];
-}
-
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
-{
-	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
-	switch (in->alg) {
-	case CRUSH_BUCKET_UNIFORM:
-		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
-					  x, r);
-	case CRUSH_BUCKET_LIST:
-		return bucket_list_choose((struct crush_bucket_list *)in,
-					  x, r);
-	case CRUSH_BUCKET_TREE:
-		return bucket_tree_choose((struct crush_bucket_tree *)in,
-					  x, r);
-	case CRUSH_BUCKET_STRAW:
-		return bucket_straw_choose((struct crush_bucket_straw *)in,
-					   x, r);
-	default:
-		BUG_ON(1);
-		return in->items[0];
-	}
-}
-
-/*
- * true if device is marked "out" (failed, fully offloaded)
- * of the cluster
- */
-static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
-{
-	if (weight[item] >= 0x10000)
-		return 0;
-	if (weight[item] == 0)
-		return 1;
-	if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
-	    < weight[item])
-		return 0;
-	return 1;
-}
-
-/**
- * crush_choose - choose numrep distinct items of given type
- * @map: the crush_map
- * @bucket: the bucket we are choose an item from
- * @x: crush input value
- * @numrep: the number of items to choose
- * @type: the type of item to choose
- * @out: pointer to output vector
- * @outpos: our position in that vector
- * @firstn: true if choosing "first n" items, false if choosing "indep"
- * @recurse_to_leaf: true if we want one device under each item of given type
- * @out2: second output vector for leaf items (if @recurse_to_leaf)
- */
-static int crush_choose(struct crush_map *map,
-			struct crush_bucket *bucket,
-			__u32 *weight,
-			int x, int numrep, int type,
-			int *out, int outpos,
-			int firstn, int recurse_to_leaf,
-			int *out2)
-{
-	int rep;
-	int ftotal, flocal;
-	int retry_descent, retry_bucket, skip_rep;
-	struct crush_bucket *in = bucket;
-	int r;
-	int i;
-	int item = 0;
-	int itemtype;
-	int collide, reject;
-	const int orig_tries = 5; /* attempts before we fall back to search */
-
-	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
-		bucket->id, x, outpos, numrep);
-
-	for (rep = outpos; rep < numrep; rep++) {
-		/* keep trying until we get a non-out, non-colliding item */
-		ftotal = 0;
-		skip_rep = 0;
-		do {
-			retry_descent = 0;
-			in = bucket;               /* initial bucket */
-
-			/* choose through intervening buckets */
-			flocal = 0;
-			do {
-				collide = 0;
-				retry_bucket = 0;
-				r = rep;
-				if (in->alg == CRUSH_BUCKET_UNIFORM) {
-					/* be careful */
-					if (firstn || numrep >= in->size)
-						/* r' = r + f_total */
-						r += ftotal;
-					else if (in->size % numrep == 0)
-						/* r'=r+(n+1)*f_local */
-						r += (numrep+1) *
-							(flocal+ftotal);
-					else
-						/* r' = r + n*f_local */
-						r += numrep * (flocal+ftotal);
-				} else {
-					if (firstn)
-						/* r' = r + f_total */
-						r += ftotal;
-					else
-						/* r' = r + n*f_local */
-						r += numrep * (flocal+ftotal);
-				}
-
-				/* bucket choose */
-				if (in->size == 0) {
-					reject = 1;
-					goto reject;
-				}
-				if (flocal >= (in->size>>1) &&
-				    flocal > orig_tries)
-					item = bucket_perm_choose(in, x, r);
-				else
-					item = crush_bucket_choose(in, x, r);
-				BUG_ON(item >= map->max_devices);
-
-				/* desired type? */
-				if (item < 0)
-					itemtype = map->buckets[-1-item]->type;
-				else
-					itemtype = 0;
-				dprintk("  item %d type %d\n", item, itemtype);
-
-				/* keep going? */
-				if (itemtype != type) {
-					BUG_ON(item >= 0 ||
-					       (-1-item) >= map->max_buckets);
-					in = map->buckets[-1-item];
-					retry_bucket = 1;
-					continue;
-				}
-
-				/* collision? */
-				for (i = 0; i < outpos; i++) {
-					if (out[i] == item) {
-						collide = 1;
-						break;
-					}
-				}
-
-				reject = 0;
-				if (recurse_to_leaf) {
-					if (item < 0) {
-						if (crush_choose(map,
-							 map->buckets[-1-item],
-							 weight,
-							 x, outpos+1, 0,
-							 out2, outpos,
-							 firstn, 0,
-							 NULL) <= outpos)
-							/* didn't get leaf */
-							reject = 1;
-					} else {
-						/* we already have a leaf! */
-						out2[outpos] = item;
-					}
-				}
-
-				if (!reject) {
-					/* out? */
-					if (itemtype == 0)
-						reject = is_out(map, weight,
-								item, x);
-					else
-						reject = 0;
-				}
-
-reject:
-				if (reject || collide) {
-					ftotal++;
-					flocal++;
-
-					if (collide && flocal < 3)
-						/* retry locally a few times */
-						retry_bucket = 1;
-					else if (flocal < in->size + orig_tries)
-						/* exhaustive bucket search */
-						retry_bucket = 1;
-					else if (ftotal < 20)
-						/* then retry descent */
-						retry_descent = 1;
-					else
-						/* else give up */
-						skip_rep = 1;
-					dprintk("  reject %d  collide %d  "
-						"ftotal %d  flocal %d\n",
-						reject, collide, ftotal,
-						flocal);
-				}
-			} while (retry_bucket);
-		} while (retry_descent);
-
-		if (skip_rep) {
-			dprintk("skip rep\n");
-			continue;
-		}
-
-		dprintk("CHOOSE got %d\n", item);
-		out[outpos] = item;
-		outpos++;
-	}
-
-	dprintk("CHOOSE returns %d\n", outpos);
-	return outpos;
-}
-
-
-/**
- * crush_do_rule - calculate a mapping with the given input and rule
- * @map: the crush_map
- * @ruleno: the rule id
- * @x: hash input
- * @result: pointer to result vector
- * @result_max: maximum result size
- * @force: force initial replica choice; -1 for none
- */
-int crush_do_rule(struct crush_map *map,
-		  int ruleno, int x, int *result, int result_max,
-		  int force, __u32 *weight)
-{
-	int result_len;
-	int force_context[CRUSH_MAX_DEPTH];
-	int force_pos = -1;
-	int a[CRUSH_MAX_SET];
-	int b[CRUSH_MAX_SET];
-	int c[CRUSH_MAX_SET];
-	int recurse_to_leaf;
-	int *w;
-	int wsize = 0;
-	int *o;
-	int osize;
-	int *tmp;
-	struct crush_rule *rule;
-	int step;
-	int i, j;
-	int numrep;
-	int firstn;
-	int rc = -1;
-
-	BUG_ON(ruleno >= map->max_rules);
-
-	rule = map->rules[ruleno];
-	result_len = 0;
-	w = a;
-	o = b;
-
-	/*
-	 * determine hierarchical context of force, if any.  note
-	 * that this may or may not correspond to the specific types
-	 * referenced by the crush rule.
-	 */
-	if (force >= 0) {
-		if (force >= map->max_devices ||
-		    map->device_parents[force] == 0) {
-			/*dprintk("CRUSH: forcefed device dne\n");*/
-			rc = -1;  /* force fed device dne */
-			goto out;
-		}
-		if (!is_out(map, weight, force, x)) {
-			while (1) {
-				force_context[++force_pos] = force;
-				if (force >= 0)
-					force = map->device_parents[force];
-				else
-					force = map->bucket_parents[-1-force];
-				if (force == 0)
-					break;
-			}
-		}
-	}
-
-	for (step = 0; step < rule->len; step++) {
-		firstn = 0;
-		switch (rule->steps[step].op) {
-		case CRUSH_RULE_TAKE:
-			w[0] = rule->steps[step].arg1;
-			if (force_pos >= 0) {
-				BUG_ON(force_context[force_pos] != w[0]);
-				force_pos--;
-			}
-			wsize = 1;
-			break;
-
-		case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
-		case CRUSH_RULE_CHOOSE_FIRSTN:
-			firstn = 1;
-		case CRUSH_RULE_CHOOSE_LEAF_INDEP:
-		case CRUSH_RULE_CHOOSE_INDEP:
-			BUG_ON(wsize == 0);
-
-			recurse_to_leaf =
-				rule->steps[step].op ==
-				 CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
-				rule->steps[step].op ==
-				CRUSH_RULE_CHOOSE_LEAF_INDEP;
-
-			/* reset output */
-			osize = 0;
-
-			for (i = 0; i < wsize; i++) {
-				/*
-				 * see CRUSH_N, CRUSH_N_MINUS macros.
-				 * basically, numrep <= 0 means relative to
-				 * the provided result_max
-				 */
-				numrep = rule->steps[step].arg1;
-				if (numrep <= 0) {
-					numrep += result_max;
-					if (numrep <= 0)
-						continue;
-				}
-				j = 0;
-				if (osize == 0 && force_pos >= 0) {
-					/* skip any intermediate types */
-					while (force_pos &&
-					       force_context[force_pos] < 0 &&
-					       rule->steps[step].arg2 !=
-					       map->buckets[-1 -
-					       force_context[force_pos]]->type)
-						force_pos--;
-					o[osize] = force_context[force_pos];
-					if (recurse_to_leaf)
-						c[osize] = force_context[0];
-					j++;
-					force_pos--;
-				}
-				osize += crush_choose(map,
-						      map->buckets[-1-w[i]],
-						      weight,
-						      x, numrep,
-						      rule->steps[step].arg2,
-						      o+osize, j,
-						      firstn,
-						      recurse_to_leaf, c+osize);
-			}
-
-			if (recurse_to_leaf)
-				/* copy final _leaf_ values to output set */
-				memcpy(o, c, osize*sizeof(*o));
-
-			/* swap t and w arrays */
-			tmp = o;
-			o = w;
-			w = tmp;
-			wsize = osize;
-			break;
-
-
-		case CRUSH_RULE_EMIT:
-			for (i = 0; i < wsize && result_len < result_max; i++) {
-				result[result_len] = w[i];
-				result_len++;
-			}
-			wsize = 0;
-			break;
-
-		default:
-			BUG_ON(1);
-		}
-	}
-	rc = result_len;
-
-out:
-	return rc;
-}
-
-
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h
deleted file mode 100644
index c46b99c18bb0..000000000000
--- a/fs/ceph/crush/mapper.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef CEPH_CRUSH_MAPPER_H
-#define CEPH_CRUSH_MAPPER_H
-
-/*
- * CRUSH functions for find rules and then mapping an input to an
- * output set.
- *
- * LGPL2
- */
-
-#include "crush.h"
-
-extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
-extern int crush_do_rule(struct crush_map *map,
-			 int ruleno,
-			 int x, int *result, int result_max,
-			 int forcefeed,    /* -1 for none */
-			 __u32 *weights);
-
-#endif
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
deleted file mode 100644
index a3e627f63293..000000000000
--- a/fs/ceph/crypto.c
+++ /dev/null
@@ -1,412 +0,0 @@
-
-#include "ceph_debug.h"
-
-#include <linux/err.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <crypto/hash.h>
-
-#include "crypto.h"
-#include "decode.h"
-
-int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
-{
-	if (*p + sizeof(u16) + sizeof(key->created) +
-	    sizeof(u16) + key->len > end)
-		return -ERANGE;
-	ceph_encode_16(p, key->type);
-	ceph_encode_copy(p, &key->created, sizeof(key->created));
-	ceph_encode_16(p, key->len);
-	ceph_encode_copy(p, key->key, key->len);
-	return 0;
-}
-
-int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
-{
-	ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
-	key->type = ceph_decode_16(p);
-	ceph_decode_copy(p, &key->created, sizeof(key->created));
-	key->len = ceph_decode_16(p);
-	ceph_decode_need(p, end, key->len, bad);
-	key->key = kmalloc(key->len, GFP_NOFS);
-	if (!key->key)
-		return -ENOMEM;
-	ceph_decode_copy(p, key->key, key->len);
-	return 0;
-
-bad:
-	dout("failed to decode crypto key\n");
-	return -EINVAL;
-}
-
-int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
-{
-	int inlen = strlen(inkey);
-	int blen = inlen * 3 / 4;
-	void *buf, *p;
-	int ret;
-
-	dout("crypto_key_unarmor %s\n", inkey);
-	buf = kmalloc(blen, GFP_NOFS);
-	if (!buf)
-		return -ENOMEM;
-	blen = ceph_unarmor(buf, inkey, inkey+inlen);
-	if (blen < 0) {
-		kfree(buf);
-		return blen;
-	}
-
-	p = buf;
-	ret = ceph_crypto_key_decode(key, &p, p + blen);
-	kfree(buf);
-	if (ret)
-		return ret;
-	dout("crypto_key_unarmor key %p type %d len %d\n", key,
-	     key->type, key->len);
-	return 0;
-}
-
-
-
-#define AES_KEY_SIZE 16
-
-static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
-{
-	return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
-}
-
-static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
-
-static int ceph_aes_encrypt(const void *key, int key_len,
-			    void *dst, size_t *dst_len,
-			    const void *src, size_t src_len)
-{
-	struct scatterlist sg_in[2], sg_out[1];
-	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
-	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
-	int ret;
-	void *iv;
-	int ivsize;
-	size_t zero_padding = (0x10 - (src_len & 0x0f));
-	char pad[16];
-
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	memset(pad, zero_padding, zero_padding);
-
-	*dst_len = src_len + zero_padding;
-
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	sg_init_table(sg_in, 2);
-	sg_set_buf(&sg_in[0], src, src_len);
-	sg_set_buf(&sg_in[1], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
-	iv = crypto_blkcipher_crt(tfm)->iv;
-	ivsize = crypto_blkcipher_ivsize(tfm);
-
-	memcpy(iv, aes_iv, ivsize);
-	/*
-	print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
-		       key, key_len, 1);
-	print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
-			src, src_len, 1);
-	print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
-			pad, zero_padding, 1);
-	*/
-	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
-				     src_len + zero_padding);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0)
-		pr_err("ceph_aes_crypt failed %d\n", ret);
-	/*
-	print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
-		       dst, *dst_len, 1);
-	*/
-	return 0;
-}
-
-static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
-			     size_t *dst_len,
-			     const void *src1, size_t src1_len,
-			     const void *src2, size_t src2_len)
-{
-	struct scatterlist sg_in[3], sg_out[1];
-	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
-	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
-	int ret;
-	void *iv;
-	int ivsize;
-	size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
-	char pad[16];
-
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	memset(pad, zero_padding, zero_padding);
-
-	*dst_len = src1_len + src2_len + zero_padding;
-
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	sg_init_table(sg_in, 3);
-	sg_set_buf(&sg_in[0], src1, src1_len);
-	sg_set_buf(&sg_in[1], src2, src2_len);
-	sg_set_buf(&sg_in[2], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
-	iv = crypto_blkcipher_crt(tfm)->iv;
-	ivsize = crypto_blkcipher_ivsize(tfm);
-
-	memcpy(iv, aes_iv, ivsize);
-	/*
-	print_hex_dump(KERN_ERR, "enc  key: ", DUMP_PREFIX_NONE, 16, 1,
-		       key, key_len, 1);
-	print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
-			src1, src1_len, 1);
-	print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
-			src2, src2_len, 1);
-	print_hex_dump(KERN_ERR, "enc  pad: ", DUMP_PREFIX_NONE, 16, 1,
-			pad, zero_padding, 1);
-	*/
-	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
-				     src1_len + src2_len + zero_padding);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0)
-		pr_err("ceph_aes_crypt2 failed %d\n", ret);
-	/*
-	print_hex_dump(KERN_ERR, "enc  out: ", DUMP_PREFIX_NONE, 16, 1,
-		       dst, *dst_len, 1);
-	*/
-	return 0;
-}
-
-static int ceph_aes_decrypt(const void *key, int key_len,
-			    void *dst, size_t *dst_len,
-			    const void *src, size_t src_len)
-{
-	struct scatterlist sg_in[1], sg_out[2];
-	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
-	struct blkcipher_desc desc = { .tfm = tfm };
-	char pad[16];
-	void *iv;
-	int ivsize;
-	int ret;
-	int last_byte;
-
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	sg_init_table(sg_in, 1);
-	sg_init_table(sg_out, 2);
-	sg_set_buf(sg_in, src, src_len);
-	sg_set_buf(&sg_out[0], dst, *dst_len);
-	sg_set_buf(&sg_out[1], pad, sizeof(pad));
-
-	iv = crypto_blkcipher_crt(tfm)->iv;
-	ivsize = crypto_blkcipher_ivsize(tfm);
-
-	memcpy(iv, aes_iv, ivsize);
-
-	/*
-	print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
-		       key, key_len, 1);
-	print_hex_dump(KERN_ERR, "dec  in: ", DUMP_PREFIX_NONE, 16, 1,
-		       src, src_len, 1);
-	*/
-
-	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0) {
-		pr_err("ceph_aes_decrypt failed %d\n", ret);
-		return ret;
-	}
-
-	if (src_len <= *dst_len)
-		last_byte = ((char *)dst)[src_len - 1];
-	else
-		last_byte = pad[src_len - *dst_len - 1];
-	if (last_byte <= 16 && src_len >= last_byte) {
-		*dst_len = src_len - last_byte;
-	} else {
-		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
-		       last_byte, (int)src_len);
-		return -EPERM;  /* bad padding */
-	}
-	/*
-	print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
-		       dst, *dst_len, 1);
-	*/
-	return 0;
-}
-
-static int ceph_aes_decrypt2(const void *key, int key_len,
-			     void *dst1, size_t *dst1_len,
-			     void *dst2, size_t *dst2_len,
-			     const void *src, size_t src_len)
-{
-	struct scatterlist sg_in[1], sg_out[3];
-	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
-	struct blkcipher_desc desc = { .tfm = tfm };
-	char pad[16];
-	void *iv;
-	int ivsize;
-	int ret;
-	int last_byte;
-
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	sg_init_table(sg_in, 1);
-	sg_set_buf(sg_in, src, src_len);
-	sg_init_table(sg_out, 3);
-	sg_set_buf(&sg_out[0], dst1, *dst1_len);
-	sg_set_buf(&sg_out[1], dst2, *dst2_len);
-	sg_set_buf(&sg_out[2], pad, sizeof(pad));
-
-	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	iv = crypto_blkcipher_crt(tfm)->iv;
-	ivsize = crypto_blkcipher_ivsize(tfm);
-
-	memcpy(iv, aes_iv, ivsize);
-
-	/*
-	print_hex_dump(KERN_ERR, "dec  key: ", DUMP_PREFIX_NONE, 16, 1,
-		       key, key_len, 1);
-	print_hex_dump(KERN_ERR, "dec   in: ", DUMP_PREFIX_NONE, 16, 1,
-		       src, src_len, 1);
-	*/
-
-	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
-	crypto_free_blkcipher(tfm);
-	if (ret < 0) {
-		pr_err("ceph_aes_decrypt failed %d\n", ret);
-		return ret;
-	}
-
-	if (src_len <= *dst1_len)
-		last_byte = ((char *)dst1)[src_len - 1];
-	else if (src_len <= *dst1_len + *dst2_len)
-		last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
-	else
-		last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
-	if (last_byte <= 16 && src_len >= last_byte) {
-		src_len -= last_byte;
-	} else {
-		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
-		       last_byte, (int)src_len);
-		return -EPERM;  /* bad padding */
-	}
-
-	if (src_len < *dst1_len) {
-		*dst1_len = src_len;
-		*dst2_len = 0;
-	} else {
-		*dst2_len = src_len - *dst1_len;
-	}
-	/*
-	print_hex_dump(KERN_ERR, "dec  out1: ", DUMP_PREFIX_NONE, 16, 1,
-		       dst1, *dst1_len, 1);
-	print_hex_dump(KERN_ERR, "dec  out2: ", DUMP_PREFIX_NONE, 16, 1,
-		       dst2, *dst2_len, 1);
-	*/
-
-	return 0;
-}
-
-
-int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
-		 const void *src, size_t src_len)
-{
-	switch (secret->type) {
-	case CEPH_CRYPTO_NONE:
-		if (*dst_len < src_len)
-			return -ERANGE;
-		memcpy(dst, src, src_len);
-		*dst_len = src_len;
-		return 0;
-
-	case CEPH_CRYPTO_AES:
-		return ceph_aes_decrypt(secret->key, secret->len, dst,
-					dst_len, src, src_len);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-int ceph_decrypt2(struct ceph_crypto_key *secret,
-			void *dst1, size_t *dst1_len,
-			void *dst2, size_t *dst2_len,
-			const void *src, size_t src_len)
-{
-	size_t t;
-
-	switch (secret->type) {
-	case CEPH_CRYPTO_NONE:
-		if (*dst1_len + *dst2_len < src_len)
-			return -ERANGE;
-		t = min(*dst1_len, src_len);
-		memcpy(dst1, src, t);
-		*dst1_len = t;
-		src += t;
-		src_len -= t;
-		if (src_len) {
-			t = min(*dst2_len, src_len);
-			memcpy(dst2, src, t);
-			*dst2_len = t;
-		}
-		return 0;
-
-	case CEPH_CRYPTO_AES:
-		return ceph_aes_decrypt2(secret->key, secret->len,
-					 dst1, dst1_len, dst2, dst2_len,
-					 src, src_len);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
-		 const void *src, size_t src_len)
-{
-	switch (secret->type) {
-	case CEPH_CRYPTO_NONE:
-		if (*dst_len < src_len)
-			return -ERANGE;
-		memcpy(dst, src, src_len);
-		*dst_len = src_len;
-		return 0;
-
-	case CEPH_CRYPTO_AES:
-		return ceph_aes_encrypt(secret->key, secret->len, dst,
-					dst_len, src, src_len);
-
-	default:
-		return -EINVAL;
-	}
-}
-
-int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
-		  const void *src1, size_t src1_len,
-		  const void *src2, size_t src2_len)
-{
-	switch (secret->type) {
-	case CEPH_CRYPTO_NONE:
-		if (*dst_len < src1_len + src2_len)
-			return -ERANGE;
-		memcpy(dst, src1, src1_len);
-		memcpy(dst + src1_len, src2, src2_len);
-		*dst_len = src1_len + src2_len;
-		return 0;
-
-	case CEPH_CRYPTO_AES:
-		return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
-					 src1, src1_len, src2, src2_len);
-
-	default:
-		return -EINVAL;
-	}
-}
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
deleted file mode 100644
index bdf38607323c..000000000000
--- a/fs/ceph/crypto.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef _FS_CEPH_CRYPTO_H
-#define _FS_CEPH_CRYPTO_H
-
-#include "types.h"
-#include "buffer.h"
-
-/*
- * cryptographic secret
- */
-struct ceph_crypto_key {
-	int type;
-	struct ceph_timespec created;
-	int len;
-	void *key;
-};
-
-static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
-{
-	kfree(key->key);
-}
-
-extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
-				  void **p, void *end);
-extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
-				  void **p, void *end);
-extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
-
-/* crypto.c */
-extern int ceph_decrypt(struct ceph_crypto_key *secret,
-			void *dst, size_t *dst_len,
-			const void *src, size_t src_len);
-extern int ceph_encrypt(struct ceph_crypto_key *secret,
-			void *dst, size_t *dst_len,
-			const void *src, size_t src_len);
-extern int ceph_decrypt2(struct ceph_crypto_key *secret,
-			void *dst1, size_t *dst1_len,
-			void *dst2, size_t *dst2_len,
-			const void *src, size_t src_len);
-extern int ceph_encrypt2(struct ceph_crypto_key *secret,
-			 void *dst, size_t *dst_len,
-			 const void *src1, size_t src1_len,
-			 const void *src2, size_t src2_len);
-
-/* armor.c */
-extern int ceph_armor(char *dst, const char *src, const char *end);
-extern int ceph_unarmor(char *dst, const char *src, const char *end);
-
-#endif
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 6fd8b20a8611..94fe1d284c3a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/device.h>
 #include <linux/slab.h>
@@ -7,143 +7,48 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#include "super.h"
-#include "mds_client.h"
-#include "mon_client.h"
-#include "auth.h"
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
 
-/*
- * Implement /sys/kernel/debug/ceph fun
- *
- * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
- *      .../osdmap      - current osdmap
- *      .../mdsmap      - current mdsmap
- *      .../monmap      - current monmap
- *      .../osdc        - active osd requests
- *      .../mdsc        - active mds requests
- *      .../monc        - mon client state
- *      .../dentry_lru  - dump contents of dentry lru
- *      .../caps        - expose cap (reservation) stats
- *      .../bdi         - symlink to ../../bdi/something
- */
-
-static struct dentry *ceph_debugfs_dir;
-
-static int monmap_show(struct seq_file *s, void *p)
-{
-	int i;
-	struct ceph_client *client = s->private;
-
-	if (client->monc.monmap == NULL)
-		return 0;
-
-	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
-	for (i = 0; i < client->monc.monmap->num_mon; i++) {
-		struct ceph_entity_inst *inst =
-			&client->monc.monmap->mon_inst[i];
-
-		seq_printf(s, "\t%s%lld\t%s\n",
-			   ENTITY_NAME(inst->name),
-			   pr_addr(&inst->addr.in_addr));
-	}
-	return 0;
-}
+#include "super.h"
+#include "mds_client.h"
 
 static int mdsmap_show(struct seq_file *s, void *p)
 {
 	int i;
-	struct ceph_client *client = s->private;
+	struct ceph_fs_client *fsc = s->private;
 
-	if (client->mdsc.mdsmap == NULL)
+	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
 		return 0;
-	seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root);
+	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
 	seq_printf(s, "session_timeout %d\n",
-		       client->mdsc.mdsmap->m_session_timeout);
+		       fsc->mdsc->mdsmap->m_session_timeout);
 	seq_printf(s, "session_autoclose %d\n",
-		       client->mdsc.mdsmap->m_session_autoclose);
-	for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) {
+		       fsc->mdsc->mdsmap->m_session_autoclose);
+	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
 		struct ceph_entity_addr *addr =
-			&client->mdsc.mdsmap->m_info[i].addr;
-		int state = client->mdsc.mdsmap->m_info[i].state;
+			&fsc->mdsc->mdsmap->m_info[i].addr;
+		int state = fsc->mdsc->mdsmap->m_info[i].state;
 
-		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr),
+		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
+			       ceph_pr_addr(&addr->in_addr),
 			       ceph_mds_state_name(state));
 	}
 	return 0;
 }
 
-static int osdmap_show(struct seq_file *s, void *p)
-{
-	int i;
-	struct ceph_client *client = s->private;
-	struct rb_node *n;
-
-	if (client->osdc.osdmap == NULL)
-		return 0;
-	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
-	seq_printf(s, "flags%s%s\n",
-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
-		   " NEARFULL" : "",
-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
-		   " FULL" : "");
-	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
-		struct ceph_pg_pool_info *pool =
-			rb_entry(n, struct ceph_pg_pool_info, node);
-		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-			   pool->id, pool->v.pg_num, pool->pg_num_mask,
-			   pool->v.lpg_num, pool->lpg_num_mask);
-	}
-	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
-		struct ceph_entity_addr *addr =
-			&client->osdc.osdmap->osd_addr[i];
-		int state = client->osdc.osdmap->osd_state[i];
-		char sb[64];
-
-		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
-			   i, pr_addr(&addr->in_addr),
-			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
-			   ceph_osdmap_state_str(sb, sizeof(sb), state));
-	}
-	return 0;
-}
-
-static int monc_show(struct seq_file *s, void *p)
-{
-	struct ceph_client *client = s->private;
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_client *monc = &client->monc;
-	struct rb_node *rp;
-
-	mutex_lock(&monc->mutex);
-
-	if (monc->have_mdsmap)
-		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
-	if (monc->have_osdmap)
-		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
-	if (monc->want_next_osdmap)
-		seq_printf(s, "want next osdmap\n");
-
-	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
-		__u16 op;
-		req = rb_entry(rp, struct ceph_mon_generic_request, node);
-		op = le16_to_cpu(req->request->hdr.type);
-		if (op == CEPH_MSG_STATFS)
-			seq_printf(s, "%lld statfs\n", req->tid);
-		else
-			seq_printf(s, "%lld unknown\n", req->tid);
-	}
-
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-
+/*
+ * mdsc debugfs
+ */
 static int mdsc_show(struct seq_file *s, void *p)
 {
-	struct ceph_client *client = s->private;
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	struct rb_node *rp;
 	int pathlen;
@@ -214,61 +119,12 @@ static int mdsc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
-static int osdc_show(struct seq_file *s, void *pp)
-{
-	struct ceph_client *client = s->private;
-	struct ceph_osd_client *osdc = &client->osdc;
-	struct rb_node *p;
-
-	mutex_lock(&osdc->request_mutex);
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		struct ceph_osd_request *req;
-		struct ceph_osd_request_head *head;
-		struct ceph_osd_op *op;
-		int num_ops;
-		int opcode, olen;
-		int i;
-
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
-			   req->r_osd ? req->r_osd->o_osd : -1,
-			   le32_to_cpu(req->r_pgid.pool),
-			   le16_to_cpu(req->r_pgid.ps));
-
-		head = req->r_request->front.iov_base;
-		op = (void *)(head + 1);
-
-		num_ops = le16_to_cpu(head->num_ops);
-		olen = le32_to_cpu(head->object_len);
-		seq_printf(s, "%.*s", olen,
-			   (const char *)(head->ops + num_ops));
-
-		if (req->r_reassert_version.epoch)
-			seq_printf(s, "\t%u'%llu",
-			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
-			   le64_to_cpu(req->r_reassert_version.version));
-		else
-			seq_printf(s, "\t");
-
-		for (i = 0; i < num_ops; i++) {
-			opcode = le16_to_cpu(op->op);
-			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-			op++;
-		}
-
-		seq_printf(s, "\n");
-	}
-	mutex_unlock(&osdc->request_mutex);
-	return 0;
-}
-
 static int caps_show(struct seq_file *s, void *p)
 {
-	struct ceph_client *client = s->private;
+	struct ceph_fs_client *fsc = s->private;
 	int total, avail, used, reserved, min;
 
-	ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
+	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
 	seq_printf(s, "total\t\t%d\n"
 		   "avail\t\t%d\n"
 		   "used\t\t%d\n"
@@ -280,8 +136,8 @@ static int caps_show(struct seq_file *s, void *p)
 
 static int dentry_lru_show(struct seq_file *s, void *ptr)
 {
-	struct ceph_client *client = s->private;
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_dentry_info *di;
 
 	spin_lock(&mdsc->dentry_lru_lock);
@@ -295,199 +151,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
 	return 0;
 }
 
-#define DEFINE_SHOW_FUNC(name)						\
-static int name##_open(struct inode *inode, struct file *file)		\
-{									\
-	struct seq_file *sf;						\
-	int ret;							\
-									\
-	ret = single_open(file, name, NULL);				\
-	sf = file->private_data;					\
-	sf->private = inode->i_private;					\
-	return ret;							\
-}									\
-									\
-static const struct file_operations name##_fops = {			\
-	.open		= name##_open,					\
-	.read		= seq_read,					\
-	.llseek		= seq_lseek,					\
-	.release	= single_release,				\
-};
-
-DEFINE_SHOW_FUNC(monmap_show)
-DEFINE_SHOW_FUNC(mdsmap_show)
-DEFINE_SHOW_FUNC(osdmap_show)
-DEFINE_SHOW_FUNC(monc_show)
-DEFINE_SHOW_FUNC(mdsc_show)
-DEFINE_SHOW_FUNC(osdc_show)
-DEFINE_SHOW_FUNC(dentry_lru_show)
-DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
+CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
+
 
+/*
+ * debugfs
+ */
 static int congestion_kb_set(void *data, u64 val)
 {
-	struct ceph_client *client = (struct ceph_client *)data;
-
-	if (client)
-		client->mount_args->congestion_kb = (int)val;
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
 
+	fsc->mount_options->congestion_kb = (int)val;
 	return 0;
 }
 
 static int congestion_kb_get(void *data, u64 *val)
 {
-	struct ceph_client *client = (struct ceph_client *)data;
-
-	if (client)
-		*val = (u64)client->mount_args->congestion_kb;
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
 
+	*val = (u64)fsc->mount_options->congestion_kb;
 	return 0;
 }
 
-
 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
 			congestion_kb_set, "%llu\n");
 
-int __init ceph_debugfs_init(void)
-{
-	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
-	if (!ceph_debugfs_dir)
-		return -ENOMEM;
-	return 0;
-}
 
-void ceph_debugfs_cleanup(void)
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
-	debugfs_remove(ceph_debugfs_dir);
+	dout("ceph_fs_debugfs_cleanup\n");
+	debugfs_remove(fsc->debugfs_bdi);
+	debugfs_remove(fsc->debugfs_congestion_kb);
+	debugfs_remove(fsc->debugfs_mdsmap);
+	debugfs_remove(fsc->debugfs_caps);
+	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove(fsc->debugfs_dentry_lru);
 }
 
-int ceph_debugfs_client_init(struct ceph_client *client)
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
-	int ret = 0;
-	char name[80];
-
-	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
-		 client->monc.auth->global_id);
-
-	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
-	if (!client->debugfs_dir)
-		goto out;
+	char name[100];
+	int err = -ENOMEM;
 
-	client->monc.debugfs_file = debugfs_create_file("monc",
-						      0600,
-						      client->debugfs_dir,
-						      client,
-						      &monc_show_fops);
-	if (!client->monc.debugfs_file)
-		goto out;
-
-	client->mdsc.debugfs_file = debugfs_create_file("mdsc",
-						      0600,
-						      client->debugfs_dir,
-						      client,
-						      &mdsc_show_fops);
-	if (!client->mdsc.debugfs_file)
+	dout("ceph_fs_debugfs_init\n");
+	fsc->debugfs_congestion_kb =
+		debugfs_create_file("writeback_congestion_kb",
+				    0600,
+				    fsc->client->debugfs_dir,
+				    fsc,
+				    &congestion_kb_fops);
+	if (!fsc->debugfs_congestion_kb)
 		goto out;
 
-	client->osdc.debugfs_file = debugfs_create_file("osdc",
-						      0600,
-						      client->debugfs_dir,
-						      client,
-						      &osdc_show_fops);
-	if (!client->osdc.debugfs_file)
-		goto out;
+	dout("a\n");
 
-	client->debugfs_monmap = debugfs_create_file("monmap",
-					0600,
-					client->debugfs_dir,
-					client,
-					&monmap_show_fops);
-	if (!client->debugfs_monmap)
+	snprintf(name, sizeof(name), "../../bdi/%s",
+		 dev_name(fsc->backing_dev_info.dev));
+	fsc->debugfs_bdi =
+		debugfs_create_symlink("bdi",
+				       fsc->client->debugfs_dir,
+				       name);
+	if (!fsc->debugfs_bdi)
 		goto out;
 
-	client->debugfs_mdsmap = debugfs_create_file("mdsmap",
+	dout("b\n");
+	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
 					0600,
-					client->debugfs_dir,
-					client,
+					fsc->client->debugfs_dir,
+					fsc,
 					&mdsmap_show_fops);
-	if (!client->debugfs_mdsmap)
+	if (!fsc->debugfs_mdsmap)
 		goto out;
 
-	client->debugfs_osdmap = debugfs_create_file("osdmap",
-					0600,
-					client->debugfs_dir,
-					client,
-					&osdmap_show_fops);
-	if (!client->debugfs_osdmap)
+	dout("ca\n");
+	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
+						0600,
+						fsc->client->debugfs_dir,
+						fsc,
+						&mdsc_show_fops);
+	if (!fsc->debugfs_mdsc)
 		goto out;
 
-	client->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-					0600,
-					client->debugfs_dir,
-					client,
-					&dentry_lru_show_fops);
-	if (!client->debugfs_dentry_lru)
-		goto out;
-
-	client->debugfs_caps = debugfs_create_file("caps",
+	dout("da\n");
+	fsc->debugfs_caps = debugfs_create_file("caps",
 						   0400,
-						   client->debugfs_dir,
-						   client,
+						   fsc->client->debugfs_dir,
+						   fsc,
 						   &caps_show_fops);
-	if (!client->debugfs_caps)
+	if (!fsc->debugfs_caps)
 		goto out;
 
-	client->debugfs_congestion_kb =
-		debugfs_create_file("writeback_congestion_kb",
-				    0600,
-				    client->debugfs_dir,
-				    client,
-				    &congestion_kb_fops);
-	if (!client->debugfs_congestion_kb)
+	dout("ea\n");
+	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
+					0600,
+					fsc->client->debugfs_dir,
+					fsc,
+					&dentry_lru_show_fops);
+	if (!fsc->debugfs_dentry_lru)
 		goto out;
 
-	sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev));
-	client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir,
-						     name);
-
 	return 0;
 
 out:
-	ceph_debugfs_client_cleanup(client);
-	return ret;
+	ceph_fs_debugfs_cleanup(fsc);
+	return err;
 }
 
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
-{
-	debugfs_remove(client->debugfs_bdi);
-	debugfs_remove(client->debugfs_caps);
-	debugfs_remove(client->debugfs_dentry_lru);
-	debugfs_remove(client->debugfs_osdmap);
-	debugfs_remove(client->debugfs_mdsmap);
-	debugfs_remove(client->debugfs_monmap);
-	debugfs_remove(client->osdc.debugfs_file);
-	debugfs_remove(client->mdsc.debugfs_file);
-	debugfs_remove(client->monc.debugfs_file);
-	debugfs_remove(client->debugfs_congestion_kb);
-	debugfs_remove(client->debugfs_dir);
-}
 
 #else  /* CONFIG_DEBUG_FS */
 
-int __init ceph_debugfs_init(void)
-{
-	return 0;
-}
-
-void ceph_debugfs_cleanup(void)
-{
-}
-
-int ceph_debugfs_client_init(struct ceph_client *client)
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 {
 	return 0;
 }
 
-void ceph_debugfs_client_cleanup(struct ceph_client *client)
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 {
 }
 
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h
deleted file mode 100644
index c5b6939fb32a..000000000000
--- a/fs/ceph/decode.h
+++ /dev/null
@@ -1,201 +0,0 @@
-#ifndef __CEPH_DECODE_H
-#define __CEPH_DECODE_H
-
-#include <asm/unaligned.h>
-#include <linux/time.h>
-
-#include "types.h"
-
-/*
- * in all cases,
- *   void **p     pointer to position pointer
- *   void *end    pointer to end of buffer (last byte + 1)
- */
-
-static inline u64 ceph_decode_64(void **p)
-{
-	u64 v = get_unaligned_le64(*p);
-	*p += sizeof(u64);
-	return v;
-}
-static inline u32 ceph_decode_32(void **p)
-{
-	u32 v = get_unaligned_le32(*p);
-	*p += sizeof(u32);
-	return v;
-}
-static inline u16 ceph_decode_16(void **p)
-{
-	u16 v = get_unaligned_le16(*p);
-	*p += sizeof(u16);
-	return v;
-}
-static inline u8 ceph_decode_8(void **p)
-{
-	u8 v = *(u8 *)*p;
-	(*p)++;
-	return v;
-}
-static inline void ceph_decode_copy(void **p, void *pv, size_t n)
-{
-	memcpy(pv, *p, n);
-	*p += n;
-}
-
-/*
- * bounds check input.
- */
-#define ceph_decode_need(p, end, n, bad)		\
-	do {						\
-		if (unlikely(*(p) + (n) > (end))) 	\
-			goto bad;			\
-	} while (0)
-
-#define ceph_decode_64_safe(p, end, v, bad)			\
-	do {							\
-		ceph_decode_need(p, end, sizeof(u64), bad);	\
-		v = ceph_decode_64(p);				\
-	} while (0)
-#define ceph_decode_32_safe(p, end, v, bad)			\
-	do {							\
-		ceph_decode_need(p, end, sizeof(u32), bad);	\
-		v = ceph_decode_32(p);				\
-	} while (0)
-#define ceph_decode_16_safe(p, end, v, bad)			\
-	do {							\
-		ceph_decode_need(p, end, sizeof(u16), bad);	\
-		v = ceph_decode_16(p);				\
-	} while (0)
-#define ceph_decode_8_safe(p, end, v, bad)			\
-	do {							\
-		ceph_decode_need(p, end, sizeof(u8), bad);	\
-		v = ceph_decode_8(p);				\
-	} while (0)
-
-#define ceph_decode_copy_safe(p, end, pv, n, bad)		\
-	do {							\
-		ceph_decode_need(p, end, n, bad);		\
-		ceph_decode_copy(p, pv, n);			\
-	} while (0)
-
-/*
- * struct ceph_timespec <-> struct timespec
- */
-static inline void ceph_decode_timespec(struct timespec *ts,
-					const struct ceph_timespec *tv)
-{
-	ts->tv_sec = le32_to_cpu(tv->tv_sec);
-	ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
-}
-static inline void ceph_encode_timespec(struct ceph_timespec *tv,
-					const struct timespec *ts)
-{
-	tv->tv_sec = cpu_to_le32(ts->tv_sec);
-	tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
-}
-
-/*
- * sockaddr_storage <-> ceph_sockaddr
- */
-static inline void ceph_encode_addr(struct ceph_entity_addr *a)
-{
-	__be16 ss_family = htons(a->in_addr.ss_family);
-	a->in_addr.ss_family = *(__u16 *)&ss_family;
-}
-static inline void ceph_decode_addr(struct ceph_entity_addr *a)
-{
-	__be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
-	a->in_addr.ss_family = ntohs(ss_family);
-	WARN_ON(a->in_addr.ss_family == 512);
-}
-
-/*
- * encoders
- */
-static inline void ceph_encode_64(void **p, u64 v)
-{
-	put_unaligned_le64(v, (__le64 *)*p);
-	*p += sizeof(u64);
-}
-static inline void ceph_encode_32(void **p, u32 v)
-{
-	put_unaligned_le32(v, (__le32 *)*p);
-	*p += sizeof(u32);
-}
-static inline void ceph_encode_16(void **p, u16 v)
-{
-	put_unaligned_le16(v, (__le16 *)*p);
-	*p += sizeof(u16);
-}
-static inline void ceph_encode_8(void **p, u8 v)
-{
-	*(u8 *)*p = v;
-	(*p)++;
-}
-static inline void ceph_encode_copy(void **p, const void *s, int len)
-{
-	memcpy(*p, s, len);
-	*p += len;
-}
-
-/*
- * filepath, string encoders
- */
-static inline void ceph_encode_filepath(void **p, void *end,
-					u64 ino, const char *path)
-{
-	u32 len = path ? strlen(path) : 0;
-	BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
-	ceph_encode_8(p, 1);
-	ceph_encode_64(p, ino);
-	ceph_encode_32(p, len);
-	if (len)
-		memcpy(*p, path, len);
-	*p += len;
-}
-
-static inline void ceph_encode_string(void **p, void *end,
-				      const char *s, u32 len)
-{
-	BUG_ON(*p + sizeof(len) + len > end);
-	ceph_encode_32(p, len);
-	if (len)
-		memcpy(*p, s, len);
-	*p += len;
-}
-
-#define ceph_encode_need(p, end, n, bad)		\
-	do {						\
-		if (unlikely(*(p) + (n) > (end))) 	\
-			goto bad;			\
-	} while (0)
-
-#define ceph_encode_64_safe(p, end, v, bad)			\
-	do {							\
-		ceph_encode_need(p, end, sizeof(u64), bad);	\
-		ceph_encode_64(p, v);				\
-	} while (0)
-#define ceph_encode_32_safe(p, end, v, bad)			\
-	do {							\
-		ceph_encode_need(p, end, sizeof(u32), bad);	\
-		ceph_encode_32(p, v);			\
-	} while (0)
-#define ceph_encode_16_safe(p, end, v, bad)			\
-	do {							\
-		ceph_encode_need(p, end, sizeof(u16), bad);	\
-		ceph_encode_16(p, v);			\
-	} while (0)
-
-#define ceph_encode_copy_safe(p, end, pv, n, bad)		\
-	do {							\
-		ceph_encode_need(p, end, n, bad);		\
-		ceph_encode_copy(p, pv, n);			\
-	} while (0)
-#define ceph_encode_string_safe(p, end, s, n, bad)		\
-	do {							\
-		ceph_encode_need(p, end, n, bad);		\
-		ceph_encode_string(p, end, s, n);		\
-	} while (0)
-
-
-#endif
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a1986eb52045..a8d2aacc612b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 
 #include "super.h"
+#include "mds_client.h"
 
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
@@ -227,15 +228,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct ceph_file_info *fi = filp->private_data;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_client *client = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	unsigned frag = fpos_frag(filp->f_pos);
 	int off = fpos_off(filp->f_pos);
 	int err;
 	u32 ftype;
 	struct ceph_mds_reply_info_parsed *rinfo;
-	const int max_entries = client->mount_args->max_readdir;
-	const int max_bytes = client->mount_args->max_readdir_bytes;
+	const int max_entries = fsc->mount_options->max_readdir;
+	const int max_bytes = fsc->mount_options->max_readdir_bytes;
 
 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
 	if (fi->at_end)
@@ -267,7 +268,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	/* can we use the dcache? */
 	spin_lock(&inode->i_lock);
 	if ((filp->f_pos == 2 || fi->dentry) &&
-	    !ceph_test_opt(client, NOASYNCREADDIR) &&
+	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
@@ -487,14 +488,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 				  struct dentry *dentry, int err)
 {
-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 	struct inode *parent = dentry->d_parent->d_inode;
 
 	/* .snap dir? */
 	if (err == -ENOENT &&
 	    ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */
 	    strcmp(dentry->d_name.name,
-		   client->mount_args->snapdir_name) == 0) {
+		   fsc->mount_options->snapdir_name) == 0) {
 		struct inode *inode = ceph_get_snapdir(parent);
 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
@@ -539,8 +540,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 				  struct nameidata *nd)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int op;
 	int err;
@@ -572,7 +573,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		spin_lock(&dir->i_lock);
 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
 		if (strncmp(dentry->d_name.name,
-			    client->mount_args->snapdir_name,
+			    fsc->mount_options->snapdir_name,
 			    dentry->d_name.len) &&
 		    !is_root_ceph_dentry(dir, dentry) &&
 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
@@ -629,8 +630,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 		      int mode, dev_t rdev)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -685,8 +686,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 			    const char *dest)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -716,8 +717,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 
 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
 	int op;
@@ -758,8 +759,8 @@ out:
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -813,8 +814,8 @@ static int drop_caps_for_unlink(struct inode *inode)
  */
 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = dentry->d_inode;
 	struct ceph_mds_request *req;
 	int err = -EROFS;
@@ -854,8 +855,8 @@ out:
 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		       struct inode *new_dir, struct dentry *new_dentry)
 {
-	struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
@@ -1076,7 +1077,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
 
-	if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 
 	if (!cf->dir_info) {
@@ -1177,7 +1178,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_add_tail(&di->lru, &mdsc->dentry_lru);
 		mdsc->num_dentry++;
@@ -1193,7 +1194,7 @@ void ceph_dentry_lru_touch(struct dentry *dn)
 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
 	     dn->d_name.len, dn->d_name.name, di->offset);
 	if (di) {
-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_move_tail(&di->lru, &mdsc->dentry_lru);
 		spin_unlock(&mdsc->dentry_lru_lock);
@@ -1208,7 +1209,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
 	if (di) {
-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 		spin_lock(&mdsc->dentry_lru_lock);
 		list_del_init(&di->lru);
 		mdsc->num_dentry--;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e38423e82f2e..2297d9426992 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -1,10 +1,11 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/exportfs.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
 #include "super.h"
+#include "mds_client.h"
 
 /*
  * NFS export support
@@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 static struct dentry *__cfh_to_dentry(struct super_block *sb,
 				      struct ceph_nfs_confh *cfh)
 {
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 	struct inode *inode;
 	struct dentry *dentry;
 	struct ceph_vino vino;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 66e4da6dba22..e77c28cf3690 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1,5 +1,6 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/file.h>
@@ -38,8 +39,8 @@
 static struct ceph_mds_request *
 prepare_open_request(struct super_block *sb, int flags, int create_mode)
 {
-	struct ceph_client *client = ceph_sb_to_client(sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int want_auth = USE_ANY_MDS;
 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 int ceph_open(struct inode *inode, struct file *file)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_file_info *cf = file->private_data;
 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 				struct nameidata *nd, int mode,
 				int locked_dir)
 {
-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct file *file = nd->intent.open.file;
 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
 	struct ceph_mds_request *req;
@@ -269,163 +270,6 @@ int ceph_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-/*
- * build a vector of user pages
- */
-static struct page **get_direct_page_vector(const char __user *data,
-					    int num_pages,
-					    loff_t off, size_t len)
-{
-	struct page **pages;
-	int rc;
-
-	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	down_read(&current->mm->mmap_sem);
-	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, 0, 0, pages, NULL);
-	up_read(&current->mm->mmap_sem);
-	if (rc < 0)
-		goto fail;
-	return pages;
-
-fail:
-	kfree(pages);
-	return ERR_PTR(rc);
-}
-
-static void put_page_vector(struct page **pages, int num_pages)
-{
-	int i;
-
-	for (i = 0; i < num_pages; i++)
-		put_page(pages[i]);
-	kfree(pages);
-}
-
-void ceph_release_page_vector(struct page **pages, int num_pages)
-{
-	int i;
-
-	for (i = 0; i < num_pages; i++)
-		__free_pages(pages[i], 0);
-	kfree(pages);
-}
-
-/*
- * allocate a vector new pages
- */
-static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
-{
-	struct page **pages;
-	int i;
-
-	pages = kmalloc(sizeof(*pages) * num_pages, flags);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-	for (i = 0; i < num_pages; i++) {
-		pages[i] = __page_cache_alloc(flags);
-		if (pages[i] == NULL) {
-			ceph_release_page_vector(pages, i);
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-	return pages;
-}
-
-/*
- * copy user data into a page vector
- */
-static int copy_user_to_page_vector(struct page **pages,
-				    const char __user *data,
-				    loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, PAGE_CACHE_SIZE-po, left);
-		bad = copy_from_user(page_address(pages[i]) + po, data, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		po += l - bad;
-		if (po == PAGE_CACHE_SIZE) {
-			po = 0;
-			i++;
-		}
-	}
-	return len;
-}
-
-/*
- * copy user data from a page vector into a user pointer
- */
-static int copy_page_vector_to_user(struct page **pages, char __user *data,
-				    loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		if (po) {
-			po += l - bad;
-			if (po == PAGE_CACHE_SIZE)
-				po = 0;
-		}
-		i++;
-	}
-	return len;
-}
-
-/*
- * Zero an extent within a page vector.  Offset is relative to the
- * start of the first page.
- */
-static void zero_page_vector_range(int off, int len, struct page **pages)
-{
-	int i = off >> PAGE_CACHE_SHIFT;
-
-	off &= ~PAGE_CACHE_MASK;
-
-	dout("zero_page_vector_page %u~%u\n", off, len);
-
-	/* leading partial page? */
-	if (off) {
-		int end = min((int)PAGE_CACHE_SIZE, off + len);
-		dout("zeroing %d %p head from %d\n", i, pages[i],
-		     (int)off);
-		zero_user_segment(pages[i], off, end);
-		len -= (end - off);
-		i++;
-	}
-	while (len >= PAGE_CACHE_SIZE) {
-		dout("zeroing %d %p len=%d\n", i, pages[i], len);
-		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-		len -= PAGE_CACHE_SIZE;
-		i++;
-	}
-	/* trailing partial page? */
-	if (len) {
-		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
-		zero_user_segment(pages[i], 0, len);
-	}
-}
-
-
 /*
  * Read a range of bytes striped over one or more objects.  Iterate over
  * objects we stripe over.  (That's not atomic, but good enough for now.)
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode,
 			struct page **pages, int num_pages,
 			int *checkeof)
 {
-	struct ceph_client *client = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 pos, this_len;
 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode,
 
 more:
 	this_len = left;
-	ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
+	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
 				  &ci->i_layout, pos, &this_len,
 				  ci->i_truncate_seq,
 				  ci->i_truncate_size,
@@ -477,8 +321,8 @@ more:
 
 		if (read < pos - off) {
 			dout(" zero gap %llu to %llu\n", off + read, pos);
-			zero_page_vector_range(page_off + read,
-					       pos - off - read, pages);
+			ceph_zero_page_vector_range(page_off + read,
+						    pos - off - read, pages);
 		}
 		pos += ret;
 		read = pos - off;
@@ -495,8 +339,8 @@ more:
 		/* was original extent fully inside i_size? */
 		if (pos + left <= inode->i_size) {
 			dout("zero tail\n");
-			zero_page_vector_range(page_off + read, len - read,
-					       pages);
+			ceph_zero_page_vector_range(page_off + read, len - read,
+						    pages);
 			read = len;
 			goto out;
 		}
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
 	if (file->f_flags & O_DIRECT) {
-		pages = get_direct_page_vector(data, num_pages, off, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, off, len);
 
 		/*
 		 * flush any page cache pages in this range.  this
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	ret = striped_read(inode, off, len, pages, num_pages, checkeof);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-		ret = copy_page_vector_to_user(pages, data, off, ret);
+		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
 	if (ret >= 0)
 		*poff = off + ret;
 
 done:
 	if (file->f_flags & O_DIRECT)
-		put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_client *client = ceph_inode_to_client(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_osd_request *req;
 	struct page **pages;
 	int num_pages;
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	 */
 more:
 	len = left;
-	req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
 				    ci->i_snap_realm->cached_context,
@@ -655,7 +499,7 @@ more:
 	num_pages = calc_pages_for(pos, len);
 
 	if (file->f_flags & O_DIRECT) {
-		pages = get_direct_page_vector(data, num_pages, pos, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -673,7 +517,7 @@ more:
 			ret = PTR_ERR(pages);
 			goto out;
 		}
-		ret = copy_user_to_page_vector(pages, data, pos, len);
+		ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
 		if (ret < 0) {
 			ceph_release_page_vector(pages, num_pages);
 			goto out;
@@ -689,7 +533,7 @@ more:
 	req->r_num_pages = num_pages;
 	req->r_inode = inode;
 
-	ret = ceph_osdc_start_request(&client->osdc, req, false);
+	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!ret) {
 		if (req->r_safe_callback) {
 			/*
@@ -701,11 +545,11 @@ more:
 			spin_unlock(&ci->i_unsafe_lock);
 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
 		}
-		ret = ceph_osdc_wait_request(&client->osdc, req);
+		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 	}
 
 	if (file->f_flags & O_DIRECT)
-		put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
+	struct ceph_osd_client *osdc =
+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	loff_t endoff = pos + iov->iov_len;
 	int want, got = 0;
 	int ret, err;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 62377ec37edf..1d6a45b5a04c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -13,7 +13,8 @@
 #include <linux/pagevec.h>
 
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+#include <linux/ceph/decode.h>
 
 /*
  * Ceph inode operations
@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode)
 	 */
 	if (ci->i_snap_realm) {
 		struct ceph_mds_client *mdsc =
-			&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
 		struct ceph_snap_realm *realm = ci->i_snap_realm;
 
 		dout(" dropping residual ref to snap realm %p\n", realm);
@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode,
 		}
 
 		/* it may be better to set st_size in getattr instead? */
-		if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
+		if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
 			inode->i_size = ci->i_rbytes;
 		break;
 	default:
@@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	struct inode *in = NULL;
 	struct ceph_mds_reply_inode *ininfo;
 	struct ceph_vino vino;
-	struct ceph_client *client = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 	int i = 0;
 	int err = 0;
 
@@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	 */
 	if (rinfo->head->is_dentry && !req->r_aborted &&
 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
-					       client->mount_args->snapdir_name,
+					       fsc->mount_options->snapdir_name,
 					       req->r_dentry->d_name.len))) {
 		/*
 		 * lookup link rename   : null -> possibly existing inode
@@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	const unsigned int ia_valid = attr->ia_valid;
 	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
 	int issued;
 	int release = 0, dirtied = 0;
 	int mask = 0;
@@ -1728,8 +1729,8 @@ out:
  */
 int ceph_do_getattr(struct inode *inode, int mask)
 {
-	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 76e307d2aba1..899578b0c46b 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -1,8 +1,10 @@
 #include <linux/in.h>
 
-#include "ioctl.h"
 #include "super.h"
-#include "ceph_debug.h"
+#include "mds_client.h"
+#include <linux/ceph/ceph_debug.h>
+
+#include "ioctl.h"
 
 
 /*
@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
 	int err, i;
@@ -98,7 +100,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	struct ceph_ioctl_dataloc dl;
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
+	struct ceph_osd_client *osdc =
+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	u64 len = 1, olen;
 	u64 tmp;
 	struct ceph_object_layout ol;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ff4e753aae92..087afe9ca367 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -1,11 +1,11 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/file.h>
 #include <linux/namei.h>
 
 #include "super.h"
 #include "mds_client.h"
-#include "pagelist.h"
+#include <linux/ceph/pagelist.h>
 
 /**
  * Implement fcntl and flock locking functions.
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_mds_client *mdsc =
-		&ceph_sb_to_client(inode->i_sb)->mdsc;
+		ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fad95f8f2608..33568239a08e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,17 +1,20 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
-#include "mds_client.h"
-#include "mon_client.h"
 #include "super.h"
-#include "messenger.h"
-#include "decode.h"
-#include "auth.h"
-#include "pagelist.h"
+#include "mds_client.h"
+
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 /*
  * A cluster of MDS (metadata server) daemons is responsible for
@@ -286,8 +289,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
 	if (atomic_dec_and_test(&s->s_ref)) {
 		if (s->s_authorizer)
-			s->s_mdsc->client->monc.auth->ops->destroy_authorizer(
-				s->s_mdsc->client->monc.auth, s->s_authorizer);
+		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
+			     s->s_mdsc->fsc->client->monc.auth,
+			     s->s_authorizer);
 		kfree(s);
 	}
 }
@@ -344,7 +348,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s->s_seq = 0;
 	mutex_init(&s->s_mutex);
 
-	ceph_con_init(mdsc->client->msgr, &s->s_con);
+	ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
 	s->s_con.private = s;
 	s->s_con.ops = &mds_con_ops;
 	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
@@ -599,7 +603,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	} else if (req->r_dentry) {
 		struct inode *dir = req->r_dentry->d_parent->d_inode;
 
-		if (dir->i_sb != mdsc->client->sb) {
+		if (dir->i_sb != mdsc->fsc->sb) {
 			/* not this fs! */
 			inode = req->r_dentry->d_inode;
 		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -884,7 +888,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	__ceph_remove_cap(cap);
 	if (!__ceph_is_any_real_caps(ci)) {
 		struct ceph_mds_client *mdsc =
-			&ceph_sb_to_client(inode->i_sb)->mdsc;
+			ceph_sb_to_client(inode->i_sb)->mdsc;
 
 		spin_lock(&mdsc->cap_dirty_lock);
 		if (!list_empty(&ci->i_dirty_item)) {
@@ -1146,7 +1150,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
 	struct ceph_msg *msg, *partial = NULL;
 	struct ceph_mds_cap_release *head;
 	int err = -ENOMEM;
-	int extra = mdsc->client->mount_args->cap_release_safety;
+	int extra = mdsc->fsc->mount_options->cap_release_safety;
 	int num;
 
 	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
@@ -2085,7 +2089,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	/* insert trace into our cache */
 	mutex_lock(&req->r_fill_mutex);
-	err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
+	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
 		if (result == 0 && rinfo->dir_nr)
 			ceph_readdir_prepopulate(req, req->r_session);
@@ -2613,7 +2617,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 			 struct ceph_mds_session *session,
 			 struct ceph_msg *msg)
 {
-	struct super_block *sb = mdsc->client->sb;
+	struct super_block *sb = mdsc->fsc->sb;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct dentry *parent, *dentry;
@@ -2891,10 +2895,16 @@ static void delayed_work(struct work_struct *work)
 	schedule_delayed(mdsc);
 }
 
+int ceph_mdsc_init(struct ceph_fs_client *fsc)
 
-int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 {
-	mdsc->client = client;
+	struct ceph_mds_client *mdsc;
+
+	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
+	if (!mdsc)
+		return -ENOMEM;
+	mdsc->fsc = fsc;
+	fsc->mdsc = mdsc;
 	mutex_init(&mdsc->mutex);
 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
 	if (mdsc->mdsmap == NULL)
@@ -2927,7 +2937,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 	INIT_LIST_HEAD(&mdsc->dentry_lru);
 
 	ceph_caps_init(mdsc);
-	ceph_adjust_min_caps(mdsc, client->min_caps);
+	ceph_adjust_min_caps(mdsc, fsc->min_caps);
 
 	return 0;
 }
@@ -2939,7 +2949,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 static void wait_requests(struct ceph_mds_client *mdsc)
 {
 	struct ceph_mds_request *req;
-	struct ceph_client *client = mdsc->client;
+	struct ceph_fs_client *fsc = mdsc->fsc;
 
 	mutex_lock(&mdsc->mutex);
 	if (__get_oldest_req(mdsc)) {
@@ -2947,7 +2957,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 
 		dout("wait_requests waiting for requests\n");
 		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
-				    client->mount_args->mount_timeout * HZ);
+				    fsc->client->options->mount_timeout * HZ);
 
 		/* tear down remaining requests */
 		mutex_lock(&mdsc->mutex);
@@ -3030,7 +3040,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
 	u64 want_tid, want_flush;
 
-	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
 		return;
 
 	dout("sync\n");
@@ -3053,7 +3063,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc)
 {
 	int i, n = 0;
 
-	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
 		return true;
 
 	mutex_lock(&mdsc->mutex);
@@ -3071,8 +3081,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 {
 	struct ceph_mds_session *session;
 	int i;
-	struct ceph_client *client = mdsc->client;
-	unsigned long timeout = client->mount_args->mount_timeout * HZ;
+	struct ceph_fs_client *fsc = mdsc->fsc;
+	unsigned long timeout = fsc->client->options->mount_timeout * HZ;
 
 	dout("close_sessions\n");
 
@@ -3119,7 +3129,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 	dout("stopped\n");
 }
 
-void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 {
 	dout("stop\n");
 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
@@ -3129,6 +3139,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 	ceph_caps_finalize(mdsc);
 }
 
+void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	ceph_mdsc_stop(mdsc);
+	fsc->mdsc = NULL;
+	kfree(mdsc);
+}
+
 
 /*
  * handle mds map update.
@@ -3145,14 +3164,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-	if (ceph_check_fsid(mdsc->client, &fsid) < 0)
+	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
 		return;
 	epoch = ceph_decode_32(&p);
 	maplen = ceph_decode_32(&p);
 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
 
 	/* do we need it? */
-	ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
+	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
 	mutex_lock(&mdsc->mutex);
 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
 		dout("handle_map epoch %u <= our %u\n",
@@ -3176,7 +3195,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	} else {
 		mdsc->mdsmap = newmap;  /* first mds map */
 	}
-	mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
 
 	__wake_requests(mdsc, &mdsc->waiting_for_map);
 
@@ -3277,7 +3296,7 @@ static int get_authorizer(struct ceph_connection *con,
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 	int ret = 0;
 
 	if (force_new && s->s_authorizer) {
@@ -3311,7 +3330,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
 	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
 }
@@ -3320,12 +3339,12 @@ static int invalidate_authorizer(struct ceph_connection *con)
 {
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 
 	if (ac->ops->invalidate_authorizer)
 		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
 
-	return ceph_monc_validate_auth(&mdsc->client->monc);
+	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
 }
 
 static const struct ceph_connection_operations mds_con_ops = {
@@ -3338,7 +3357,4 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.peer_reset = peer_reset,
 };
 
-
-
-
 /* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c98267ce6d2a..d66d63c72355 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,9 +8,9 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "mdsmap.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/mdsmap.h>
 
 /*
  * Some lock dependencies:
@@ -26,7 +26,7 @@
  *
  */
 
-struct ceph_client;
+struct ceph_fs_client;
 struct ceph_cap;
 
 /*
@@ -230,7 +230,7 @@ struct ceph_mds_request {
  * mds client state
  */
 struct ceph_mds_client {
-	struct ceph_client      *client;
+	struct ceph_fs_client  *fsc;
 	struct mutex            mutex;         /* all nested structures */
 
 	struct ceph_mdsmap      *mdsmap;
@@ -289,11 +289,6 @@ struct ceph_mds_client {
 	int		caps_avail_count;    /* unused, unreserved */
 	int		caps_min_count;      /* keep at least this many
 						(unreserved) */
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry 	  *debugfs_file;
-#endif
-
 	spinlock_t	  dentry_lru_lock;
 	struct list_head  dentry_lru;
 	int		  num_dentry;
@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s);
 extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
 			     struct ceph_msg *msg, int mds);
 
-extern int ceph_mdsc_init(struct ceph_mds_client *mdsc,
-			   struct ceph_client *client);
+extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
 extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
-extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
 
 extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
 
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 040be6d1150b..73b7d44e8a35 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -1,4 +1,4 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/bug.h>
 #include <linux/err.h>
@@ -6,9 +6,9 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include "mdsmap.h"
-#include "messenger.h"
-#include "decode.h"
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
 
 #include "super.h"
 
@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		}
 
 		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
-		     i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr),
+		     i+1, n, global_id, mds, inc,
+		     ceph_pr_addr(&addr.in_addr),
 		     ceph_mds_state_name(state));
 		if (mds >= 0 && mds < m->m_max_mds && state > 0) {
 			m->m_info[mds].global_id = global_id;
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h
deleted file mode 100644
index 4c5cb0880bba..000000000000
--- a/fs/ceph/mdsmap.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef _FS_CEPH_MDSMAP_H
-#define _FS_CEPH_MDSMAP_H
-
-#include "types.h"
-
-/*
- * mds map - describe servers in the mds cluster.
- *
- * we limit fields to those the client actually xcares about
- */
-struct ceph_mds_info {
-	u64 global_id;
-	struct ceph_entity_addr addr;
-	s32 state;
-	int num_export_targets;
-	bool laggy;
-	u32 *export_targets;
-};
-
-struct ceph_mdsmap {
-	u32 m_epoch, m_client_epoch, m_last_failure;
-	u32 m_root;
-	u32 m_session_timeout;          /* seconds */
-	u32 m_session_autoclose;        /* seconds */
-	u64 m_max_file_size;
-	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
-	struct ceph_mds_info *m_info;
-
-	/* which object pools file data can be stored in */
-	int m_num_data_pg_pools;
-	u32 *m_data_pg_pools;
-	u32 m_cas_pg_pool;
-};
-
-static inline struct ceph_entity_addr *
-ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
-{
-	if (w >= m->m_max_mds)
-		return NULL;
-	return &m->m_info[w].addr;
-}
-
-static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
-{
-	BUG_ON(w < 0);
-	if (w >= m->m_max_mds)
-		return CEPH_MDS_STATE_DNE;
-	return m->m_info[w].state;
-}
-
-static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
-{
-	if (w >= 0 && w < m->m_max_mds)
-		return m->m_info[w].laggy;
-	return false;
-}
-
-extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
-extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
-extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
-
-#endif
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
deleted file mode 100644
index 17a09b32a591..000000000000
--- a/fs/ceph/messenger.c
+++ /dev/null
@@ -1,2432 +0,0 @@
-#include "ceph_debug.h"
-
-#include <linux/crc32c.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/inet.h>
-#include <linux/kthread.h>
-#include <linux/net.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/string.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <net/tcp.h>
-
-#include "super.h"
-#include "messenger.h"
-#include "decode.h"
-#include "pagelist.h"
-
-/*
- * Ceph uses the messenger to exchange ceph_msg messages with other
- * hosts in the system.  The messenger provides ordered and reliable
- * delivery.  We tolerate TCP disconnects by reconnecting (with
- * exponential backoff) in the case of a fault (disconnection, bad
- * crc, protocol error).  Acks allow sent messages to be discarded by
- * the sender.
- */
-
-/* static tag bytes (protocol control messages) */
-static char tag_msg = CEPH_MSGR_TAG_MSG;
-static char tag_ack = CEPH_MSGR_TAG_ACK;
-static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key socket_class;
-#endif
-
-
-static void queue_con(struct ceph_connection *con);
-static void con_work(struct work_struct *);
-static void ceph_fault(struct ceph_connection *con);
-
-/*
- * nicely render a sockaddr as a string.
- */
-#define MAX_ADDR_STR 20
-#define MAX_ADDR_STR_LEN 60
-static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
-static DEFINE_SPINLOCK(addr_str_lock);
-static int last_addr_str;
-
-const char *pr_addr(const struct sockaddr_storage *ss)
-{
-	int i;
-	char *s;
-	struct sockaddr_in *in4 = (void *)ss;
-	struct sockaddr_in6 *in6 = (void *)ss;
-
-	spin_lock(&addr_str_lock);
-	i = last_addr_str++;
-	if (last_addr_str == MAX_ADDR_STR)
-		last_addr_str = 0;
-	spin_unlock(&addr_str_lock);
-	s = addr_str[i];
-
-	switch (ss->ss_family) {
-	case AF_INET:
-		snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
-			 (unsigned int)ntohs(in4->sin_port));
-		break;
-
-	case AF_INET6:
-		snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
-			 (unsigned int)ntohs(in6->sin6_port));
-		break;
-
-	default:
-		sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family);
-	}
-
-	return s;
-}
-
-static void encode_my_addr(struct ceph_messenger *msgr)
-{
-	memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
-	ceph_encode_addr(&msgr->my_enc_addr);
-}
-
-/*
- * work queue for all reading and writing to/from the socket.
- */
-struct workqueue_struct *ceph_msgr_wq;
-
-int __init ceph_msgr_init(void)
-{
-	ceph_msgr_wq = create_workqueue("ceph-msgr");
-	if (IS_ERR(ceph_msgr_wq)) {
-		int ret = PTR_ERR(ceph_msgr_wq);
-		pr_err("msgr_init failed to create workqueue: %d\n", ret);
-		ceph_msgr_wq = NULL;
-		return ret;
-	}
-	return 0;
-}
-
-void ceph_msgr_exit(void)
-{
-	destroy_workqueue(ceph_msgr_wq);
-}
-
-void ceph_msgr_flush(void)
-{
-	flush_workqueue(ceph_msgr_wq);
-}
-
-
-/*
- * socket callback functions
- */
-
-/* data available on socket, or listen socket received a connect */
-static void ceph_data_ready(struct sock *sk, int count_unused)
-{
-	struct ceph_connection *con =
-		(struct ceph_connection *)sk->sk_user_data;
-	if (sk->sk_state != TCP_CLOSE_WAIT) {
-		dout("ceph_data_ready on %p state = %lu, queueing work\n",
-		     con, con->state);
-		queue_con(con);
-	}
-}
-
-/* socket has buffer space for writing */
-static void ceph_write_space(struct sock *sk)
-{
-	struct ceph_connection *con =
-		(struct ceph_connection *)sk->sk_user_data;
-
-	/* only queue to workqueue if there is data we want to write. */
-	if (test_bit(WRITE_PENDING, &con->state)) {
-		dout("ceph_write_space %p queueing write work\n", con);
-		queue_con(con);
-	} else {
-		dout("ceph_write_space %p nothing to write\n", con);
-	}
-
-	/* since we have our own write_space, clear the SOCK_NOSPACE flag */
-	clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-}
-
-/* socket's state has changed */
-static void ceph_state_change(struct sock *sk)
-{
-	struct ceph_connection *con =
-		(struct ceph_connection *)sk->sk_user_data;
-
-	dout("ceph_state_change %p state = %lu sk_state = %u\n",
-	     con, con->state, sk->sk_state);
-
-	if (test_bit(CLOSED, &con->state))
-		return;
-
-	switch (sk->sk_state) {
-	case TCP_CLOSE:
-		dout("ceph_state_change TCP_CLOSE\n");
-	case TCP_CLOSE_WAIT:
-		dout("ceph_state_change TCP_CLOSE_WAIT\n");
-		if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
-			if (test_bit(CONNECTING, &con->state))
-				con->error_msg = "connection failed";
-			else
-				con->error_msg = "socket closed";
-			queue_con(con);
-		}
-		break;
-	case TCP_ESTABLISHED:
-		dout("ceph_state_change TCP_ESTABLISHED\n");
-		queue_con(con);
-		break;
-	}
-}
-
-/*
- * set up socket callbacks
- */
-static void set_sock_callbacks(struct socket *sock,
-			       struct ceph_connection *con)
-{
-	struct sock *sk = sock->sk;
-	sk->sk_user_data = (void *)con;
-	sk->sk_data_ready = ceph_data_ready;
-	sk->sk_write_space = ceph_write_space;
-	sk->sk_state_change = ceph_state_change;
-}
-
-
-/*
- * socket helpers
- */
-
-/*
- * initiate connection to a remote socket.
- */
-static struct socket *ceph_tcp_connect(struct ceph_connection *con)
-{
-	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
-	struct socket *sock;
-	int ret;
-
-	BUG_ON(con->sock);
-	ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
-			       IPPROTO_TCP, &sock);
-	if (ret)
-		return ERR_PTR(ret);
-	con->sock = sock;
-	sock->sk->sk_allocation = GFP_NOFS;
-
-#ifdef CONFIG_LOCKDEP
-	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
-#endif
-
-	set_sock_callbacks(sock, con);
-
-	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
-
-	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
-				 O_NONBLOCK);
-	if (ret == -EINPROGRESS) {
-		dout("connect %s EINPROGRESS sk_state = %u\n",
-		     pr_addr(&con->peer_addr.in_addr),
-		     sock->sk->sk_state);
-		ret = 0;
-	}
-	if (ret < 0) {
-		pr_err("connect %s error %d\n",
-		       pr_addr(&con->peer_addr.in_addr), ret);
-		sock_release(sock);
-		con->sock = NULL;
-		con->error_msg = "connect error";
-	}
-
-	if (ret < 0)
-		return ERR_PTR(ret);
-	return sock;
-}
-
-static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
-{
-	struct kvec iov = {buf, len};
-	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
-
-	return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
-}
-
-/*
- * write something.  @more is true if caller will be sending more data
- * shortly.
- */
-static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
-		     size_t kvlen, size_t len, int more)
-{
-	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
-
-	if (more)
-		msg.msg_flags |= MSG_MORE;
-	else
-		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
-
-	return kernel_sendmsg(sock, &msg, iov, kvlen, len);
-}
-
-
-/*
- * Shutdown/close the socket for the given connection.
- */
-static int con_close_socket(struct ceph_connection *con)
-{
-	int rc;
-
-	dout("con_close_socket on %p sock %p\n", con, con->sock);
-	if (!con->sock)
-		return 0;
-	set_bit(SOCK_CLOSED, &con->state);
-	rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
-	sock_release(con->sock);
-	con->sock = NULL;
-	clear_bit(SOCK_CLOSED, &con->state);
-	return rc;
-}
-
-/*
- * Reset a connection.  Discard all incoming and outgoing messages
- * and clear *_seq state.
- */
-static void ceph_msg_remove(struct ceph_msg *msg)
-{
-	list_del_init(&msg->list_head);
-	ceph_msg_put(msg);
-}
-static void ceph_msg_remove_list(struct list_head *head)
-{
-	while (!list_empty(head)) {
-		struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
-							list_head);
-		ceph_msg_remove(msg);
-	}
-}
-
-static void reset_connection(struct ceph_connection *con)
-{
-	/* reset connection, out_queue, msg_ and connect_seq */
-	/* discard existing out_queue and msg_seq */
-	ceph_msg_remove_list(&con->out_queue);
-	ceph_msg_remove_list(&con->out_sent);
-
-	if (con->in_msg) {
-		ceph_msg_put(con->in_msg);
-		con->in_msg = NULL;
-	}
-
-	con->connect_seq = 0;
-	con->out_seq = 0;
-	if (con->out_msg) {
-		ceph_msg_put(con->out_msg);
-		con->out_msg = NULL;
-	}
-	con->out_keepalive_pending = false;
-	con->in_seq = 0;
-	con->in_seq_acked = 0;
-}
-
-/*
- * mark a peer down.  drop any open connections.
- */
-void ceph_con_close(struct ceph_connection *con)
-{
-	dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
-	set_bit(CLOSED, &con->state);  /* in case there's queued work */
-	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
-	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
-	clear_bit(KEEPALIVE_PENDING, &con->state);
-	clear_bit(WRITE_PENDING, &con->state);
-	mutex_lock(&con->mutex);
-	reset_connection(con);
-	con->peer_global_seq = 0;
-	cancel_delayed_work(&con->work);
-	mutex_unlock(&con->mutex);
-	queue_con(con);
-}
-
-/*
- * Reopen a closed connection, with a new peer address.
- */
-void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
-{
-	dout("con_open %p %s\n", con, pr_addr(&addr->in_addr));
-	set_bit(OPENING, &con->state);
-	clear_bit(CLOSED, &con->state);
-	memcpy(&con->peer_addr, addr, sizeof(*addr));
-	con->delay = 0;      /* reset backoff memory */
-	queue_con(con);
-}
-
-/*
- * return true if this connection ever successfully opened
- */
-bool ceph_con_opened(struct ceph_connection *con)
-{
-	return con->connect_seq > 0;
-}
-
-/*
- * generic get/put
- */
-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
-{
-	dout("con_get %p nref = %d -> %d\n", con,
-	     atomic_read(&con->nref), atomic_read(&con->nref) + 1);
-	if (atomic_inc_not_zero(&con->nref))
-		return con;
-	return NULL;
-}
-
-void ceph_con_put(struct ceph_connection *con)
-{
-	dout("con_put %p nref = %d -> %d\n", con,
-	     atomic_read(&con->nref), atomic_read(&con->nref) - 1);
-	BUG_ON(atomic_read(&con->nref) == 0);
-	if (atomic_dec_and_test(&con->nref)) {
-		BUG_ON(con->sock);
-		kfree(con);
-	}
-}
-
-/*
- * initialize a new connection.
- */
-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
-{
-	dout("con_init %p\n", con);
-	memset(con, 0, sizeof(*con));
-	atomic_set(&con->nref, 1);
-	con->msgr = msgr;
-	mutex_init(&con->mutex);
-	INIT_LIST_HEAD(&con->out_queue);
-	INIT_LIST_HEAD(&con->out_sent);
-	INIT_DELAYED_WORK(&con->work, con_work);
-}
-
-
-/*
- * We maintain a global counter to order connection attempts.  Get
- * a unique seq greater than @gt.
- */
-static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
-{
-	u32 ret;
-
-	spin_lock(&msgr->global_seq_lock);
-	if (msgr->global_seq < gt)
-		msgr->global_seq = gt;
-	ret = ++msgr->global_seq;
-	spin_unlock(&msgr->global_seq_lock);
-	return ret;
-}
-
-
-/*
- * Prepare footer for currently outgoing message, and finish things
- * off.  Assumes out_kvec* are already valid.. we just add on to the end.
- */
-static void prepare_write_message_footer(struct ceph_connection *con, int v)
-{
-	struct ceph_msg *m = con->out_msg;
-
-	dout("prepare_write_message_footer %p\n", con);
-	con->out_kvec_is_msg = true;
-	con->out_kvec[v].iov_base = &m->footer;
-	con->out_kvec[v].iov_len = sizeof(m->footer);
-	con->out_kvec_bytes += sizeof(m->footer);
-	con->out_kvec_left++;
-	con->out_more = m->more_to_follow;
-	con->out_msg_done = true;
-}
-
-/*
- * Prepare headers for the next outgoing message.
- */
-static void prepare_write_message(struct ceph_connection *con)
-{
-	struct ceph_msg *m;
-	int v = 0;
-
-	con->out_kvec_bytes = 0;
-	con->out_kvec_is_msg = true;
-	con->out_msg_done = false;
-
-	/* Sneak an ack in there first?  If we can get it into the same
-	 * TCP packet that's a good thing. */
-	if (con->in_seq > con->in_seq_acked) {
-		con->in_seq_acked = con->in_seq;
-		con->out_kvec[v].iov_base = &tag_ack;
-		con->out_kvec[v++].iov_len = 1;
-		con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
-		con->out_kvec[v].iov_base = &con->out_temp_ack;
-		con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack);
-		con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
-	}
-
-	m = list_first_entry(&con->out_queue,
-		       struct ceph_msg, list_head);
-	con->out_msg = m;
-	if (test_bit(LOSSYTX, &con->state)) {
-		list_del_init(&m->list_head);
-	} else {
-		/* put message on sent list */
-		ceph_msg_get(m);
-		list_move_tail(&m->list_head, &con->out_sent);
-	}
-
-	/*
-	 * only assign outgoing seq # if we haven't sent this message
-	 * yet.  if it is requeued, resend with it's original seq.
-	 */
-	if (m->needs_out_seq) {
-		m->hdr.seq = cpu_to_le64(++con->out_seq);
-		m->needs_out_seq = false;
-	}
-
-	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
-	     m, con->out_seq, le16_to_cpu(m->hdr.type),
-	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
-	     le32_to_cpu(m->hdr.data_len),
-	     m->nr_pages);
-	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
-
-	/* tag + hdr + front + middle */
-	con->out_kvec[v].iov_base = &tag_msg;
-	con->out_kvec[v++].iov_len = 1;
-	con->out_kvec[v].iov_base = &m->hdr;
-	con->out_kvec[v++].iov_len = sizeof(m->hdr);
-	con->out_kvec[v++] = m->front;
-	if (m->middle)
-		con->out_kvec[v++] = m->middle->vec;
-	con->out_kvec_left = v;
-	con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len +
-		(m->middle ? m->middle->vec.iov_len : 0);
-	con->out_kvec_cur = con->out_kvec;
-
-	/* fill in crc (except data pages), footer */
-	con->out_msg->hdr.crc =
-		cpu_to_le32(crc32c(0, (void *)&m->hdr,
-				      sizeof(m->hdr) - sizeof(m->hdr.crc)));
-	con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
-	con->out_msg->footer.front_crc =
-		cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len));
-	if (m->middle)
-		con->out_msg->footer.middle_crc =
-			cpu_to_le32(crc32c(0, m->middle->vec.iov_base,
-					   m->middle->vec.iov_len));
-	else
-		con->out_msg->footer.middle_crc = 0;
-	con->out_msg->footer.data_crc = 0;
-	dout("prepare_write_message front_crc %u data_crc %u\n",
-	     le32_to_cpu(con->out_msg->footer.front_crc),
-	     le32_to_cpu(con->out_msg->footer.middle_crc));
-
-	/* is there a data payload? */
-	if (le32_to_cpu(m->hdr.data_len) > 0) {
-		/* initialize page iterator */
-		con->out_msg_pos.page = 0;
-		if (m->pages)
-			con->out_msg_pos.page_pos =
-				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
-		else
-			con->out_msg_pos.page_pos = 0;
-		con->out_msg_pos.data_pos = 0;
-		con->out_msg_pos.did_page_crc = 0;
-		con->out_more = 1;  /* data + footer will follow */
-	} else {
-		/* no, queue up footer too and be done */
-		prepare_write_message_footer(con, v);
-	}
-
-	set_bit(WRITE_PENDING, &con->state);
-}
-
-/*
- * Prepare an ack.
- */
-static void prepare_write_ack(struct ceph_connection *con)
-{
-	dout("prepare_write_ack %p %llu -> %llu\n", con,
-	     con->in_seq_acked, con->in_seq);
-	con->in_seq_acked = con->in_seq;
-
-	con->out_kvec[0].iov_base = &tag_ack;
-	con->out_kvec[0].iov_len = 1;
-	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
-	con->out_kvec[1].iov_base = &con->out_temp_ack;
-	con->out_kvec[1].iov_len = sizeof(con->out_temp_ack);
-	con->out_kvec_left = 2;
-	con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
-	con->out_kvec_cur = con->out_kvec;
-	con->out_more = 1;  /* more will follow.. eventually.. */
-	set_bit(WRITE_PENDING, &con->state);
-}
-
-/*
- * Prepare to write keepalive byte.
- */
-static void prepare_write_keepalive(struct ceph_connection *con)
-{
-	dout("prepare_write_keepalive %p\n", con);
-	con->out_kvec[0].iov_base = &tag_keepalive;
-	con->out_kvec[0].iov_len = 1;
-	con->out_kvec_left = 1;
-	con->out_kvec_bytes = 1;
-	con->out_kvec_cur = con->out_kvec;
-	set_bit(WRITE_PENDING, &con->state);
-}
-
-/*
- * Connection negotiation.
- */
-
-static void prepare_connect_authorizer(struct ceph_connection *con)
-{
-	void *auth_buf;
-	int auth_len = 0;
-	int auth_protocol = 0;
-
-	mutex_unlock(&con->mutex);
-	if (con->ops->get_authorizer)
-		con->ops->get_authorizer(con, &auth_buf, &auth_len,
-					 &auth_protocol, &con->auth_reply_buf,
-					 &con->auth_reply_buf_len,
-					 con->auth_retry);
-	mutex_lock(&con->mutex);
-
-	con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
-	con->out_connect.authorizer_len = cpu_to_le32(auth_len);
-
-	con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
-	con->out_kvec[con->out_kvec_left].iov_len = auth_len;
-	con->out_kvec_left++;
-	con->out_kvec_bytes += auth_len;
-}
-
-/*
- * We connected to a peer and are saying hello.
- */
-static void prepare_write_banner(struct ceph_messenger *msgr,
-				 struct ceph_connection *con)
-{
-	int len = strlen(CEPH_BANNER);
-
-	con->out_kvec[0].iov_base = CEPH_BANNER;
-	con->out_kvec[0].iov_len = len;
-	con->out_kvec[1].iov_base = &msgr->my_enc_addr;
-	con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr);
-	con->out_kvec_left = 2;
-	con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr);
-	con->out_kvec_cur = con->out_kvec;
-	con->out_more = 0;
-	set_bit(WRITE_PENDING, &con->state);
-}
-
-static void prepare_write_connect(struct ceph_messenger *msgr,
-				  struct ceph_connection *con,
-				  int after_banner)
-{
-	unsigned global_seq = get_global_seq(con->msgr, 0);
-	int proto;
-
-	switch (con->peer_name.type) {
-	case CEPH_ENTITY_TYPE_MON:
-		proto = CEPH_MONC_PROTOCOL;
-		break;
-	case CEPH_ENTITY_TYPE_OSD:
-		proto = CEPH_OSDC_PROTOCOL;
-		break;
-	case CEPH_ENTITY_TYPE_MDS:
-		proto = CEPH_MDSC_PROTOCOL;
-		break;
-	default:
-		BUG();
-	}
-
-	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
-	     con->connect_seq, global_seq, proto);
-
-	con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
-	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
-	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
-	con->out_connect.global_seq = cpu_to_le32(global_seq);
-	con->out_connect.protocol_version = cpu_to_le32(proto);
-	con->out_connect.flags = 0;
-
-	if (!after_banner) {
-		con->out_kvec_left = 0;
-		con->out_kvec_bytes = 0;
-	}
-	con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect;
-	con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect);
-	con->out_kvec_left++;
-	con->out_kvec_bytes += sizeof(con->out_connect);
-	con->out_kvec_cur = con->out_kvec;
-	con->out_more = 0;
-	set_bit(WRITE_PENDING, &con->state);
-
-	prepare_connect_authorizer(con);
-}
-
-
-/*
- * write as much of pending kvecs to the socket as we can.
- *  1 -> done
- *  0 -> socket full, but more to do
- * <0 -> error
- */
-static int write_partial_kvec(struct ceph_connection *con)
-{
-	int ret;
-
-	dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
-	while (con->out_kvec_bytes > 0) {
-		ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
-				       con->out_kvec_left, con->out_kvec_bytes,
-				       con->out_more);
-		if (ret <= 0)
-			goto out;
-		con->out_kvec_bytes -= ret;
-		if (con->out_kvec_bytes == 0)
-			break;            /* done */
-		while (ret > 0) {
-			if (ret >= con->out_kvec_cur->iov_len) {
-				ret -= con->out_kvec_cur->iov_len;
-				con->out_kvec_cur++;
-				con->out_kvec_left--;
-			} else {
-				con->out_kvec_cur->iov_len -= ret;
-				con->out_kvec_cur->iov_base += ret;
-				ret = 0;
-				break;
-			}
-		}
-	}
-	con->out_kvec_left = 0;
-	con->out_kvec_is_msg = false;
-	ret = 1;
-out:
-	dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
-	     con->out_kvec_bytes, con->out_kvec_left, ret);
-	return ret;  /* done! */
-}
-
-#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
-{
-	if (!bio) {
-		*iter = NULL;
-		*seg = 0;
-		return;
-	}
-	*iter = bio;
-	*seg = bio->bi_idx;
-}
-
-static void iter_bio_next(struct bio **bio_iter, int *seg)
-{
-	if (*bio_iter == NULL)
-		return;
-
-	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
-
-	(*seg)++;
-	if (*seg == (*bio_iter)->bi_vcnt)
-		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
-#endif
-
-/*
- * Write as much message data payload as we can.  If we finish, queue
- * up the footer.
- *  1 -> done, footer is now queued in out_kvec[].
- *  0 -> socket full, but more to do
- * <0 -> error
- */
-static int write_partial_msg_pages(struct ceph_connection *con)
-{
-	struct ceph_msg *msg = con->out_msg;
-	unsigned data_len = le32_to_cpu(msg->hdr.data_len);
-	size_t len;
-	int crc = con->msgr->nocrc;
-	int ret;
-	int total_max_write;
-	int in_trail = 0;
-	size_t trail_len = (msg->trail ? msg->trail->length : 0);
-
-	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
-	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
-	     con->out_msg_pos.page_pos);
-
-#ifdef CONFIG_BLOCK
-	if (msg->bio && !msg->bio_iter)
-		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-
-	while (data_len > con->out_msg_pos.data_pos) {
-		struct page *page = NULL;
-		void *kaddr = NULL;
-		int max_write = PAGE_SIZE;
-		int page_shift = 0;
-
-		total_max_write = data_len - trail_len -
-			con->out_msg_pos.data_pos;
-
-		/*
-		 * if we are calculating the data crc (the default), we need
-		 * to map the page.  if our pages[] has been revoked, use the
-		 * zero page.
-		 */
-
-		/* have we reached the trail part of the data? */
-		if (con->out_msg_pos.data_pos >= data_len - trail_len) {
-			in_trail = 1;
-
-			total_max_write = data_len - con->out_msg_pos.data_pos;
-
-			page = list_first_entry(&msg->trail->head,
-						struct page, lru);
-			if (crc)
-				kaddr = kmap(page);
-			max_write = PAGE_SIZE;
-		} else if (msg->pages) {
-			page = msg->pages[con->out_msg_pos.page];
-			if (crc)
-				kaddr = kmap(page);
-		} else if (msg->pagelist) {
-			page = list_first_entry(&msg->pagelist->head,
-						struct page, lru);
-			if (crc)
-				kaddr = kmap(page);
-#ifdef CONFIG_BLOCK
-		} else if (msg->bio) {
-			struct bio_vec *bv;
-
-			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
-			page = bv->bv_page;
-			page_shift = bv->bv_offset;
-			if (crc)
-				kaddr = kmap(page) + page_shift;
-			max_write = bv->bv_len;
-#endif
-		} else {
-			page = con->msgr->zero_page;
-			if (crc)
-				kaddr = page_address(con->msgr->zero_page);
-		}
-		len = min_t(int, max_write - con->out_msg_pos.page_pos,
-			    total_max_write);
-
-		if (crc && !con->out_msg_pos.did_page_crc) {
-			void *base = kaddr + con->out_msg_pos.page_pos;
-			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
-
-			BUG_ON(kaddr == NULL);
-			con->out_msg->footer.data_crc =
-				cpu_to_le32(crc32c(tmpcrc, base, len));
-			con->out_msg_pos.did_page_crc = 1;
-		}
-		ret = kernel_sendpage(con->sock, page,
-				      con->out_msg_pos.page_pos + page_shift,
-				      len,
-				      MSG_DONTWAIT | MSG_NOSIGNAL |
-				      MSG_MORE);
-
-		if (crc &&
-		    (msg->pages || msg->pagelist || msg->bio || in_trail))
-			kunmap(page);
-
-		if (ret <= 0)
-			goto out;
-
-		con->out_msg_pos.data_pos += ret;
-		con->out_msg_pos.page_pos += ret;
-		if (ret == len) {
-			con->out_msg_pos.page_pos = 0;
-			con->out_msg_pos.page++;
-			con->out_msg_pos.did_page_crc = 0;
-			if (in_trail)
-				list_move_tail(&page->lru,
-					       &msg->trail->head);
-			else if (msg->pagelist)
-				list_move_tail(&page->lru,
-					       &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
-			else if (msg->bio)
-				iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
-		}
-	}
-
-	dout("write_partial_msg_pages %p msg %p done\n", con, msg);
-
-	/* prepare and queue up footer, too */
-	if (!crc)
-		con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
-	con->out_kvec_bytes = 0;
-	con->out_kvec_left = 0;
-	con->out_kvec_cur = con->out_kvec;
-	prepare_write_message_footer(con, 0);
-	ret = 1;
-out:
-	return ret;
-}
-
-/*
- * write some zeros
- */
-static int write_partial_skip(struct ceph_connection *con)
-{
-	int ret;
-
-	while (con->out_skip > 0) {
-		struct kvec iov = {
-			.iov_base = page_address(con->msgr->zero_page),
-			.iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE)
-		};
-
-		ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1);
-		if (ret <= 0)
-			goto out;
-		con->out_skip -= ret;
-	}
-	ret = 1;
-out:
-	return ret;
-}
-
-/*
- * Prepare to read connection handshake, or an ack.
- */
-static void prepare_read_banner(struct ceph_connection *con)
-{
-	dout("prepare_read_banner %p\n", con);
-	con->in_base_pos = 0;
-}
-
-static void prepare_read_connect(struct ceph_connection *con)
-{
-	dout("prepare_read_connect %p\n", con);
-	con->in_base_pos = 0;
-}
-
-static void prepare_read_ack(struct ceph_connection *con)
-{
-	dout("prepare_read_ack %p\n", con);
-	con->in_base_pos = 0;
-}
-
-static void prepare_read_tag(struct ceph_connection *con)
-{
-	dout("prepare_read_tag %p\n", con);
-	con->in_base_pos = 0;
-	con->in_tag = CEPH_MSGR_TAG_READY;
-}
-
-/*
- * Prepare to read a message.
- */
-static int prepare_read_message(struct ceph_connection *con)
-{
-	dout("prepare_read_message %p\n", con);
-	BUG_ON(con->in_msg != NULL);
-	con->in_base_pos = 0;
-	con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
-	return 0;
-}
-
-
-static int read_partial(struct ceph_connection *con,
-			int *to, int size, void *object)
-{
-	*to += size;
-	while (con->in_base_pos < *to) {
-		int left = *to - con->in_base_pos;
-		int have = size - left;
-		int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
-		if (ret <= 0)
-			return ret;
-		con->in_base_pos += ret;
-	}
-	return 1;
-}
-
-
-/*
- * Read all or part of the connect-side handshake on a new connection
- */
-static int read_partial_banner(struct ceph_connection *con)
-{
-	int ret, to = 0;
-
-	dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
-
-	/* peer's banner */
-	ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
-	if (ret <= 0)
-		goto out;
-	ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
-			   &con->actual_peer_addr);
-	if (ret <= 0)
-		goto out;
-	ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
-			   &con->peer_addr_for_me);
-	if (ret <= 0)
-		goto out;
-out:
-	return ret;
-}
-
-static int read_partial_connect(struct ceph_connection *con)
-{
-	int ret, to = 0;
-
-	dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
-
-	ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
-	if (ret <= 0)
-		goto out;
-	ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
-			   con->auth_reply_buf);
-	if (ret <= 0)
-		goto out;
-
-	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
-	     con, (int)con->in_reply.tag,
-	     le32_to_cpu(con->in_reply.connect_seq),
-	     le32_to_cpu(con->in_reply.global_seq));
-out:
-	return ret;
-
-}
-
-/*
- * Verify the hello banner looks okay.
- */
-static int verify_hello(struct ceph_connection *con)
-{
-	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
-		pr_err("connect to %s got bad banner\n",
-		       pr_addr(&con->peer_addr.in_addr));
-		con->error_msg = "protocol error, bad banner";
-		return -1;
-	}
-	return 0;
-}
-
-static bool addr_is_blank(struct sockaddr_storage *ss)
-{
-	switch (ss->ss_family) {
-	case AF_INET:
-		return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
-	case AF_INET6:
-		return
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
-	}
-	return false;
-}
-
-static int addr_port(struct sockaddr_storage *ss)
-{
-	switch (ss->ss_family) {
-	case AF_INET:
-		return ntohs(((struct sockaddr_in *)ss)->sin_port);
-	case AF_INET6:
-		return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
-	}
-	return 0;
-}
-
-static void addr_set_port(struct sockaddr_storage *ss, int p)
-{
-	switch (ss->ss_family) {
-	case AF_INET:
-		((struct sockaddr_in *)ss)->sin_port = htons(p);
-	case AF_INET6:
-		((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
-	}
-}
-
-/*
- * Parse an ip[:port] list into an addr array.  Use the default
- * monitor port if a port isn't specified.
- */
-int ceph_parse_ips(const char *c, const char *end,
-		   struct ceph_entity_addr *addr,
-		   int max_count, int *count)
-{
-	int i;
-	const char *p = c;
-
-	dout("parse_ips on '%.*s'\n", (int)(end-c), c);
-	for (i = 0; i < max_count; i++) {
-		const char *ipend;
-		struct sockaddr_storage *ss = &addr[i].in_addr;
-		struct sockaddr_in *in4 = (void *)ss;
-		struct sockaddr_in6 *in6 = (void *)ss;
-		int port;
-		char delim = ',';
-
-		if (*p == '[') {
-			delim = ']';
-			p++;
-		}
-
-		memset(ss, 0, sizeof(*ss));
-		if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
-			     delim, &ipend))
-			ss->ss_family = AF_INET;
-		else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
-				  delim, &ipend))
-			ss->ss_family = AF_INET6;
-		else
-			goto bad;
-		p = ipend;
-
-		if (delim == ']') {
-			if (*p != ']') {
-				dout("missing matching ']'\n");
-				goto bad;
-			}
-			p++;
-		}
-
-		/* port? */
-		if (p < end && *p == ':') {
-			port = 0;
-			p++;
-			while (p < end && *p >= '0' && *p <= '9') {
-				port = (port * 10) + (*p - '0');
-				p++;
-			}
-			if (port > 65535 || port == 0)
-				goto bad;
-		} else {
-			port = CEPH_MON_PORT;
-		}
-
-		addr_set_port(ss, port);
-
-		dout("parse_ips got %s\n", pr_addr(ss));
-
-		if (p == end)
-			break;
-		if (*p != ',')
-			goto bad;
-		p++;
-	}
-
-	if (p != end)
-		goto bad;
-
-	if (count)
-		*count = i + 1;
-	return 0;
-
-bad:
-	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
-	return -EINVAL;
-}
-
-static int process_banner(struct ceph_connection *con)
-{
-	dout("process_banner on %p\n", con);
-
-	if (verify_hello(con) < 0)
-		return -1;
-
-	ceph_decode_addr(&con->actual_peer_addr);
-	ceph_decode_addr(&con->peer_addr_for_me);
-
-	/*
-	 * Make sure the other end is who we wanted.  note that the other
-	 * end may not yet know their ip address, so if it's 0.0.0.0, give
-	 * them the benefit of the doubt.
-	 */
-	if (memcmp(&con->peer_addr, &con->actual_peer_addr,
-		   sizeof(con->peer_addr)) != 0 &&
-	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
-	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
-		pr_warning("wrong peer, want %s/%d, got %s/%d\n",
-			   pr_addr(&con->peer_addr.in_addr),
-			   (int)le32_to_cpu(con->peer_addr.nonce),
-			   pr_addr(&con->actual_peer_addr.in_addr),
-			   (int)le32_to_cpu(con->actual_peer_addr.nonce));
-		con->error_msg = "wrong peer at address";
-		return -1;
-	}
-
-	/*
-	 * did we learn our address?
-	 */
-	if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
-		int port = addr_port(&con->msgr->inst.addr.in_addr);
-
-		memcpy(&con->msgr->inst.addr.in_addr,
-		       &con->peer_addr_for_me.in_addr,
-		       sizeof(con->peer_addr_for_me.in_addr));
-		addr_set_port(&con->msgr->inst.addr.in_addr, port);
-		encode_my_addr(con->msgr);
-		dout("process_banner learned my addr is %s\n",
-		     pr_addr(&con->msgr->inst.addr.in_addr));
-	}
-
-	set_bit(NEGOTIATING, &con->state);
-	prepare_read_connect(con);
-	return 0;
-}
-
-static void fail_protocol(struct ceph_connection *con)
-{
-	reset_connection(con);
-	set_bit(CLOSED, &con->state);  /* in case there's queued work */
-
-	mutex_unlock(&con->mutex);
-	if (con->ops->bad_proto)
-		con->ops->bad_proto(con);
-	mutex_lock(&con->mutex);
-}
-
-static int process_connect(struct ceph_connection *con)
-{
-	u64 sup_feat = CEPH_FEATURE_SUPPORTED;
-	u64 req_feat = CEPH_FEATURE_REQUIRED;
-	u64 server_feat = le64_to_cpu(con->in_reply.features);
-
-	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
-
-	switch (con->in_reply.tag) {
-	case CEPH_MSGR_TAG_FEATURES:
-		pr_err("%s%lld %s feature set mismatch,"
-		       " my %llx < server's %llx, missing %llx\n",
-		       ENTITY_NAME(con->peer_name),
-		       pr_addr(&con->peer_addr.in_addr),
-		       sup_feat, server_feat, server_feat & ~sup_feat);
-		con->error_msg = "missing required protocol features";
-		fail_protocol(con);
-		return -1;
-
-	case CEPH_MSGR_TAG_BADPROTOVER:
-		pr_err("%s%lld %s protocol version mismatch,"
-		       " my %d != server's %d\n",
-		       ENTITY_NAME(con->peer_name),
-		       pr_addr(&con->peer_addr.in_addr),
-		       le32_to_cpu(con->out_connect.protocol_version),
-		       le32_to_cpu(con->in_reply.protocol_version));
-		con->error_msg = "protocol version mismatch";
-		fail_protocol(con);
-		return -1;
-
-	case CEPH_MSGR_TAG_BADAUTHORIZER:
-		con->auth_retry++;
-		dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
-		     con->auth_retry);
-		if (con->auth_retry == 2) {
-			con->error_msg = "connect authorization failure";
-			reset_connection(con);
-			set_bit(CLOSED, &con->state);
-			return -1;
-		}
-		con->auth_retry = 1;
-		prepare_write_connect(con->msgr, con, 0);
-		prepare_read_connect(con);
-		break;
-
-	case CEPH_MSGR_TAG_RESETSESSION:
-		/*
-		 * If we connected with a large connect_seq but the peer
-		 * has no record of a session with us (no connection, or
-		 * connect_seq == 0), they will send RESETSESION to indicate
-		 * that they must have reset their session, and may have
-		 * dropped messages.
-		 */
-		dout("process_connect got RESET peer seq %u\n",
-		     le32_to_cpu(con->in_connect.connect_seq));
-		pr_err("%s%lld %s connection reset\n",
-		       ENTITY_NAME(con->peer_name),
-		       pr_addr(&con->peer_addr.in_addr));
-		reset_connection(con);
-		prepare_write_connect(con->msgr, con, 0);
-		prepare_read_connect(con);
-
-		/* Tell ceph about it. */
-		mutex_unlock(&con->mutex);
-		pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
-		if (con->ops->peer_reset)
-			con->ops->peer_reset(con);
-		mutex_lock(&con->mutex);
-		break;
-
-	case CEPH_MSGR_TAG_RETRY_SESSION:
-		/*
-		 * If we sent a smaller connect_seq than the peer has, try
-		 * again with a larger value.
-		 */
-		dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
-		     le32_to_cpu(con->out_connect.connect_seq),
-		     le32_to_cpu(con->in_connect.connect_seq));
-		con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
-		prepare_write_connect(con->msgr, con, 0);
-		prepare_read_connect(con);
-		break;
-
-	case CEPH_MSGR_TAG_RETRY_GLOBAL:
-		/*
-		 * If we sent a smaller global_seq than the peer has, try
-		 * again with a larger value.
-		 */
-		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
-		     con->peer_global_seq,
-		     le32_to_cpu(con->in_connect.global_seq));
-		get_global_seq(con->msgr,
-			       le32_to_cpu(con->in_connect.global_seq));
-		prepare_write_connect(con->msgr, con, 0);
-		prepare_read_connect(con);
-		break;
-
-	case CEPH_MSGR_TAG_READY:
-		if (req_feat & ~server_feat) {
-			pr_err("%s%lld %s protocol feature mismatch,"
-			       " my required %llx > server's %llx, need %llx\n",
-			       ENTITY_NAME(con->peer_name),
-			       pr_addr(&con->peer_addr.in_addr),
-			       req_feat, server_feat, req_feat & ~server_feat);
-			con->error_msg = "missing required protocol features";
-			fail_protocol(con);
-			return -1;
-		}
-		clear_bit(CONNECTING, &con->state);
-		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
-		con->connect_seq++;
-		con->peer_features = server_feat;
-		dout("process_connect got READY gseq %d cseq %d (%d)\n",
-		     con->peer_global_seq,
-		     le32_to_cpu(con->in_reply.connect_seq),
-		     con->connect_seq);
-		WARN_ON(con->connect_seq !=
-			le32_to_cpu(con->in_reply.connect_seq));
-
-		if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
-			set_bit(LOSSYTX, &con->state);
-
-		prepare_read_tag(con);
-		break;
-
-	case CEPH_MSGR_TAG_WAIT:
-		/*
-		 * If there is a connection race (we are opening
-		 * connections to each other), one of us may just have
-		 * to WAIT.  This shouldn't happen if we are the
-		 * client.
-		 */
-		pr_err("process_connect peer connecting WAIT\n");
-
-	default:
-		pr_err("connect protocol error, will retry\n");
-		con->error_msg = "protocol error, garbage tag during connect";
-		return -1;
-	}
-	return 0;
-}
-
-
-/*
- * read (part of) an ack
- */
-static int read_partial_ack(struct ceph_connection *con)
-{
-	int to = 0;
-
-	return read_partial(con, &to, sizeof(con->in_temp_ack),
-			    &con->in_temp_ack);
-}
-
-
-/*
- * We can finally discard anything that's been acked.
- */
-static void process_ack(struct ceph_connection *con)
-{
-	struct ceph_msg *m;
-	u64 ack = le64_to_cpu(con->in_temp_ack);
-	u64 seq;
-
-	while (!list_empty(&con->out_sent)) {
-		m = list_first_entry(&con->out_sent, struct ceph_msg,
-				     list_head);
-		seq = le64_to_cpu(m->hdr.seq);
-		if (seq > ack)
-			break;
-		dout("got ack for seq %llu type %d at %p\n", seq,
-		     le16_to_cpu(m->hdr.type), m);
-		ceph_msg_remove(m);
-	}
-	prepare_read_tag(con);
-}
-
-
-
-
-static int read_partial_message_section(struct ceph_connection *con,
-					struct kvec *section,
-					unsigned int sec_len, u32 *crc)
-{
-	int ret, left;
-
-	BUG_ON(!section);
-
-	while (section->iov_len < sec_len) {
-		BUG_ON(section->iov_base == NULL);
-		left = sec_len - section->iov_len;
-		ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
-				       section->iov_len, left);
-		if (ret <= 0)
-			return ret;
-		section->iov_len += ret;
-		if (section->iov_len == sec_len)
-			*crc = crc32c(0, section->iov_base,
-				      section->iov_len);
-	}
-
-	return 1;
-}
-
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
-				struct ceph_msg_header *hdr,
-				int *skip);
-
-
-static int read_partial_message_pages(struct ceph_connection *con,
-				      struct page **pages,
-				      unsigned data_len, int datacrc)
-{
-	void *p;
-	int ret;
-	int left;
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
-	/* (page) data */
-	BUG_ON(pages == NULL);
-	p = kmap(pages[con->in_msg_pos.page]);
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(pages[con->in_msg_pos.page]);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
-		con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.page++;
-	}
-
-	return ret;
-}
-
-#ifdef CONFIG_BLOCK
-static int read_partial_message_bio(struct ceph_connection *con,
-				    struct bio **bio_iter, int *bio_seg,
-				    unsigned data_len, int datacrc)
-{
-	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
-	void *p;
-	int ret, left;
-
-	if (IS_ERR(bv))
-		return PTR_ERR(bv);
-
-	left = min((int)(data_len - con->in_msg_pos.data_pos),
-		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
-
-	p = kmap(bv->bv_page) + bv->bv_offset;
-
-	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
-			       left);
-	if (ret > 0 && datacrc)
-		con->in_data_crc =
-			crc32c(con->in_data_crc,
-				  p + con->in_msg_pos.page_pos, ret);
-	kunmap(bv->bv_page);
-	if (ret <= 0)
-		return ret;
-	con->in_msg_pos.data_pos += ret;
-	con->in_msg_pos.page_pos += ret;
-	if (con->in_msg_pos.page_pos == bv->bv_len) {
-		con->in_msg_pos.page_pos = 0;
-		iter_bio_next(bio_iter, bio_seg);
-	}
-
-	return ret;
-}
-#endif
-
-/*
- * read (part of) a message.
- */
-static int read_partial_message(struct ceph_connection *con)
-{
-	struct ceph_msg *m = con->in_msg;
-	int ret;
-	int to, left;
-	unsigned front_len, middle_len, data_len, data_off;
-	int datacrc = con->msgr->nocrc;
-	int skip;
-	u64 seq;
-
-	dout("read_partial_message con %p msg %p\n", con, m);
-
-	/* header */
-	while (con->in_base_pos < sizeof(con->in_hdr)) {
-		left = sizeof(con->in_hdr) - con->in_base_pos;
-		ret = ceph_tcp_recvmsg(con->sock,
-				       (char *)&con->in_hdr + con->in_base_pos,
-				       left);
-		if (ret <= 0)
-			return ret;
-		con->in_base_pos += ret;
-		if (con->in_base_pos == sizeof(con->in_hdr)) {
-			u32 crc = crc32c(0, (void *)&con->in_hdr,
-				 sizeof(con->in_hdr) - sizeof(con->in_hdr.crc));
-			if (crc != le32_to_cpu(con->in_hdr.crc)) {
-				pr_err("read_partial_message bad hdr "
-				       " crc %u != expected %u\n",
-				       crc, con->in_hdr.crc);
-				return -EBADMSG;
-			}
-		}
-	}
-	front_len = le32_to_cpu(con->in_hdr.front_len);
-	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
-		return -EIO;
-	middle_len = le32_to_cpu(con->in_hdr.middle_len);
-	if (middle_len > CEPH_MSG_MAX_DATA_LEN)
-		return -EIO;
-	data_len = le32_to_cpu(con->in_hdr.data_len);
-	if (data_len > CEPH_MSG_MAX_DATA_LEN)
-		return -EIO;
-	data_off = le16_to_cpu(con->in_hdr.data_off);
-
-	/* verify seq# */
-	seq = le64_to_cpu(con->in_hdr.seq);
-	if ((s64)seq - (s64)con->in_seq < 1) {
-		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
-			ENTITY_NAME(con->peer_name),
-			pr_addr(&con->peer_addr.in_addr),
-			seq, con->in_seq + 1);
-		con->in_base_pos = -front_len - middle_len - data_len -
-			sizeof(m->footer);
-		con->in_tag = CEPH_MSGR_TAG_READY;
-		con->in_seq++;
-		return 0;
-	} else if ((s64)seq - (s64)con->in_seq > 1) {
-		pr_err("read_partial_message bad seq %lld expected %lld\n",
-		       seq, con->in_seq + 1);
-		con->error_msg = "bad message sequence # for incoming message";
-		return -EBADMSG;
-	}
-
-	/* allocate message? */
-	if (!con->in_msg) {
-		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
-		     con->in_hdr.front_len, con->in_hdr.data_len);
-		skip = 0;
-		con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
-		if (skip) {
-			/* skip this message */
-			dout("alloc_msg said skip message\n");
-			BUG_ON(con->in_msg);
-			con->in_base_pos = -front_len - middle_len - data_len -
-				sizeof(m->footer);
-			con->in_tag = CEPH_MSGR_TAG_READY;
-			con->in_seq++;
-			return 0;
-		}
-		if (!con->in_msg) {
-			con->error_msg =
-				"error allocating memory for incoming message";
-			return -ENOMEM;
-		}
-		m = con->in_msg;
-		m->front.iov_len = 0;    /* haven't read it yet */
-		if (m->middle)
-			m->middle->vec.iov_len = 0;
-
-		con->in_msg_pos.page = 0;
-		if (m->pages)
-			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
-		else
-			con->in_msg_pos.page_pos = 0;
-		con->in_msg_pos.data_pos = 0;
-	}
-
-	/* front */
-	ret = read_partial_message_section(con, &m->front, front_len,
-					   &con->in_front_crc);
-	if (ret <= 0)
-		return ret;
-
-	/* middle */
-	if (m->middle) {
-		ret = read_partial_message_section(con, &m->middle->vec,
-						   middle_len,
-						   &con->in_middle_crc);
-		if (ret <= 0)
-			return ret;
-	}
-#ifdef CONFIG_BLOCK
-	if (m->bio && !m->bio_iter)
-		init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
-
-	/* (page) data */
-	while (con->in_msg_pos.data_pos < data_len) {
-		if (m->pages) {
-			ret = read_partial_message_pages(con, m->pages,
-						 data_len, datacrc);
-			if (ret <= 0)
-				return ret;
-#ifdef CONFIG_BLOCK
-		} else if (m->bio) {
-
-			ret = read_partial_message_bio(con,
-						 &m->bio_iter, &m->bio_seg,
-						 data_len, datacrc);
-			if (ret <= 0)
-				return ret;
-#endif
-		} else {
-			BUG_ON(1);
-		}
-	}
-
-	/* footer */
-	to = sizeof(m->hdr) + sizeof(m->footer);
-	while (con->in_base_pos < to) {
-		left = to - con->in_base_pos;
-		ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
-				       (con->in_base_pos - sizeof(m->hdr)),
-				       left);
-		if (ret <= 0)
-			return ret;
-		con->in_base_pos += ret;
-	}
-	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
-	     m, front_len, m->footer.front_crc, middle_len,
-	     m->footer.middle_crc, data_len, m->footer.data_crc);
-
-	/* crc ok? */
-	if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
-		pr_err("read_partial_message %p front crc %u != exp. %u\n",
-		       m, con->in_front_crc, m->footer.front_crc);
-		return -EBADMSG;
-	}
-	if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
-		pr_err("read_partial_message %p middle crc %u != exp %u\n",
-		       m, con->in_middle_crc, m->footer.middle_crc);
-		return -EBADMSG;
-	}
-	if (datacrc &&
-	    (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
-	    con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
-		pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
-		       con->in_data_crc, le32_to_cpu(m->footer.data_crc));
-		return -EBADMSG;
-	}
-
-	return 1; /* done! */
-}
-
-/*
- * Process message.  This happens in the worker thread.  The callback should
- * be careful not to do anything that waits on other incoming messages or it
- * may deadlock.
- */
-static void process_message(struct ceph_connection *con)
-{
-	struct ceph_msg *msg;
-
-	msg = con->in_msg;
-	con->in_msg = NULL;
-
-	/* if first message, set peer_name */
-	if (con->peer_name.type == 0)
-		con->peer_name = msg->hdr.src;
-
-	con->in_seq++;
-	mutex_unlock(&con->mutex);
-
-	dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
-	     msg, le64_to_cpu(msg->hdr.seq),
-	     ENTITY_NAME(msg->hdr.src),
-	     le16_to_cpu(msg->hdr.type),
-	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
-	     le32_to_cpu(msg->hdr.front_len),
-	     le32_to_cpu(msg->hdr.data_len),
-	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
-	con->ops->dispatch(con, msg);
-
-	mutex_lock(&con->mutex);
-	prepare_read_tag(con);
-}
-
-
-/*
- * Write something to the socket.  Called in a worker thread when the
- * socket appears to be writeable and we have something ready to send.
- */
-static int try_write(struct ceph_connection *con)
-{
-	struct ceph_messenger *msgr = con->msgr;
-	int ret = 1;
-
-	dout("try_write start %p state %lu nref %d\n", con, con->state,
-	     atomic_read(&con->nref));
-
-more:
-	dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
-
-	/* open the socket first? */
-	if (con->sock == NULL) {
-		/*
-		 * if we were STANDBY and are reconnecting _this_
-		 * connection, bump connect_seq now.  Always bump
-		 * global_seq.
-		 */
-		if (test_and_clear_bit(STANDBY, &con->state))
-			con->connect_seq++;
-
-		prepare_write_banner(msgr, con);
-		prepare_write_connect(msgr, con, 1);
-		prepare_read_banner(con);
-		set_bit(CONNECTING, &con->state);
-		clear_bit(NEGOTIATING, &con->state);
-
-		BUG_ON(con->in_msg);
-		con->in_tag = CEPH_MSGR_TAG_READY;
-		dout("try_write initiating connect on %p new state %lu\n",
-		     con, con->state);
-		con->sock = ceph_tcp_connect(con);
-		if (IS_ERR(con->sock)) {
-			con->sock = NULL;
-			con->error_msg = "connect error";
-			ret = -1;
-			goto out;
-		}
-	}
-
-more_kvec:
-	/* kvec data queued? */
-	if (con->out_skip) {
-		ret = write_partial_skip(con);
-		if (ret <= 0)
-			goto done;
-		if (ret < 0) {
-			dout("try_write write_partial_skip err %d\n", ret);
-			goto done;
-		}
-	}
-	if (con->out_kvec_left) {
-		ret = write_partial_kvec(con);
-		if (ret <= 0)
-			goto done;
-	}
-
-	/* msg pages? */
-	if (con->out_msg) {
-		if (con->out_msg_done) {
-			ceph_msg_put(con->out_msg);
-			con->out_msg = NULL;   /* we're done with this one */
-			goto do_next;
-		}
-
-		ret = write_partial_msg_pages(con);
-		if (ret == 1)
-			goto more_kvec;  /* we need to send the footer, too! */
-		if (ret == 0)
-			goto done;
-		if (ret < 0) {
-			dout("try_write write_partial_msg_pages err %d\n",
-			     ret);
-			goto done;
-		}
-	}
-
-do_next:
-	if (!test_bit(CONNECTING, &con->state)) {
-		/* is anything else pending? */
-		if (!list_empty(&con->out_queue)) {
-			prepare_write_message(con);
-			goto more;
-		}
-		if (con->in_seq > con->in_seq_acked) {
-			prepare_write_ack(con);
-			goto more;
-		}
-		if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
-			prepare_write_keepalive(con);
-			goto more;
-		}
-	}
-
-	/* Nothing to do! */
-	clear_bit(WRITE_PENDING, &con->state);
-	dout("try_write nothing else to write.\n");
-done:
-	ret = 0;
-out:
-	dout("try_write done on %p\n", con);
-	return ret;
-}
-
-
-
-/*
- * Read what we can from the socket.
- */
-static int try_read(struct ceph_connection *con)
-{
-	int ret = -1;
-
-	if (!con->sock)
-		return 0;
-
-	if (test_bit(STANDBY, &con->state))
-		return 0;
-
-	dout("try_read start on %p\n", con);
-
-more:
-	dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
-	     con->in_base_pos);
-	if (test_bit(CONNECTING, &con->state)) {
-		if (!test_bit(NEGOTIATING, &con->state)) {
-			dout("try_read connecting\n");
-			ret = read_partial_banner(con);
-			if (ret <= 0)
-				goto done;
-			if (process_banner(con) < 0) {
-				ret = -1;
-				goto out;
-			}
-		}
-		ret = read_partial_connect(con);
-		if (ret <= 0)
-			goto done;
-		if (process_connect(con) < 0) {
-			ret = -1;
-			goto out;
-		}
-		goto more;
-	}
-
-	if (con->in_base_pos < 0) {
-		/*
-		 * skipping + discarding content.
-		 *
-		 * FIXME: there must be a better way to do this!
-		 */
-		static char buf[1024];
-		int skip = min(1024, -con->in_base_pos);
-		dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
-		ret = ceph_tcp_recvmsg(con->sock, buf, skip);
-		if (ret <= 0)
-			goto done;
-		con->in_base_pos += ret;
-		if (con->in_base_pos)
-			goto more;
-	}
-	if (con->in_tag == CEPH_MSGR_TAG_READY) {
-		/*
-		 * what's next?
-		 */
-		ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
-		if (ret <= 0)
-			goto done;
-		dout("try_read got tag %d\n", (int)con->in_tag);
-		switch (con->in_tag) {
-		case CEPH_MSGR_TAG_MSG:
-			prepare_read_message(con);
-			break;
-		case CEPH_MSGR_TAG_ACK:
-			prepare_read_ack(con);
-			break;
-		case CEPH_MSGR_TAG_CLOSE:
-			set_bit(CLOSED, &con->state);   /* fixme */
-			goto done;
-		default:
-			goto bad_tag;
-		}
-	}
-	if (con->in_tag == CEPH_MSGR_TAG_MSG) {
-		ret = read_partial_message(con);
-		if (ret <= 0) {
-			switch (ret) {
-			case -EBADMSG:
-				con->error_msg = "bad crc";
-				ret = -EIO;
-				goto out;
-			case -EIO:
-				con->error_msg = "io error";
-				goto out;
-			default:
-				goto done;
-			}
-		}
-		if (con->in_tag == CEPH_MSGR_TAG_READY)
-			goto more;
-		process_message(con);
-		goto more;
-	}
-	if (con->in_tag == CEPH_MSGR_TAG_ACK) {
-		ret = read_partial_ack(con);
-		if (ret <= 0)
-			goto done;
-		process_ack(con);
-		goto more;
-	}
-
-done:
-	ret = 0;
-out:
-	dout("try_read done on %p\n", con);
-	return ret;
-
-bad_tag:
-	pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
-	con->error_msg = "protocol error, garbage tag";
-	ret = -1;
-	goto out;
-}
-
-
-/*
- * Atomically queue work on a connection.  Bump @con reference to
- * avoid races with connection teardown.
- *
- * There is some trickery going on with QUEUED and BUSY because we
- * only want a _single_ thread operating on each connection at any
- * point in time, but we want to use all available CPUs.
- *
- * The worker thread only proceeds if it can atomically set BUSY.  It
- * clears QUEUED and does it's thing.  When it thinks it's done, it
- * clears BUSY, then rechecks QUEUED.. if it's set again, it loops
- * (tries again to set BUSY).
- *
- * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we
- * try to queue work.  If that fails (work is already queued, or BUSY)
- * we give up (work also already being done or is queued) but leave QUEUED
- * set so that the worker thread will loop if necessary.
- */
-static void queue_con(struct ceph_connection *con)
-{
-	if (test_bit(DEAD, &con->state)) {
-		dout("queue_con %p ignoring: DEAD\n",
-		     con);
-		return;
-	}
-
-	if (!con->ops->get(con)) {
-		dout("queue_con %p ref count 0\n", con);
-		return;
-	}
-
-	set_bit(QUEUED, &con->state);
-	if (test_bit(BUSY, &con->state)) {
-		dout("queue_con %p - already BUSY\n", con);
-		con->ops->put(con);
-	} else if (!queue_work(ceph_msgr_wq, &con->work.work)) {
-		dout("queue_con %p - already queued\n", con);
-		con->ops->put(con);
-	} else {
-		dout("queue_con %p\n", con);
-	}
-}
-
-/*
- * Do some work on a connection.  Drop a connection ref when we're done.
- */
-static void con_work(struct work_struct *work)
-{
-	struct ceph_connection *con = container_of(work, struct ceph_connection,
-						   work.work);
-	int backoff = 0;
-
-more:
-	if (test_and_set_bit(BUSY, &con->state) != 0) {
-		dout("con_work %p BUSY already set\n", con);
-		goto out;
-	}
-	dout("con_work %p start, clearing QUEUED\n", con);
-	clear_bit(QUEUED, &con->state);
-
-	mutex_lock(&con->mutex);
-
-	if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
-		dout("con_work CLOSED\n");
-		con_close_socket(con);
-		goto done;
-	}
-	if (test_and_clear_bit(OPENING, &con->state)) {
-		/* reopen w/ new peer */
-		dout("con_work OPENING\n");
-		con_close_socket(con);
-	}
-
-	if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
-	    try_read(con) < 0 ||
-	    try_write(con) < 0) {
-		mutex_unlock(&con->mutex);
-		backoff = 1;
-		ceph_fault(con);     /* error/fault path */
-		goto done_unlocked;
-	}
-
-done:
-	mutex_unlock(&con->mutex);
-
-done_unlocked:
-	clear_bit(BUSY, &con->state);
-	dout("con->state=%lu\n", con->state);
-	if (test_bit(QUEUED, &con->state)) {
-		if (!backoff || test_bit(OPENING, &con->state)) {
-			dout("con_work %p QUEUED reset, looping\n", con);
-			goto more;
-		}
-		dout("con_work %p QUEUED reset, but just faulted\n", con);
-		clear_bit(QUEUED, &con->state);
-	}
-	dout("con_work %p done\n", con);
-
-out:
-	con->ops->put(con);
-}
-
-
-/*
- * Generic error/fault handler.  A retry mechanism is used with
- * exponential backoff
- */
-static void ceph_fault(struct ceph_connection *con)
-{
-	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-	       pr_addr(&con->peer_addr.in_addr), con->error_msg);
-	dout("fault %p state %lu to peer %s\n",
-	     con, con->state, pr_addr(&con->peer_addr.in_addr));
-
-	if (test_bit(LOSSYTX, &con->state)) {
-		dout("fault on LOSSYTX channel\n");
-		goto out;
-	}
-
-	mutex_lock(&con->mutex);
-	if (test_bit(CLOSED, &con->state))
-		goto out_unlock;
-
-	con_close_socket(con);
-
-	if (con->in_msg) {
-		ceph_msg_put(con->in_msg);
-		con->in_msg = NULL;
-	}
-
-	/* Requeue anything that hasn't been acked */
-	list_splice_init(&con->out_sent, &con->out_queue);
-
-	/* If there are no messages in the queue, place the connection
-	 * in a STANDBY state (i.e., don't try to reconnect just yet). */
-	if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
-		dout("fault setting STANDBY\n");
-		set_bit(STANDBY, &con->state);
-	} else {
-		/* retry after a delay. */
-		if (con->delay == 0)
-			con->delay = BASE_DELAY_INTERVAL;
-		else if (con->delay < MAX_DELAY_INTERVAL)
-			con->delay *= 2;
-		dout("fault queueing %p delay %lu\n", con, con->delay);
-		con->ops->get(con);
-		if (queue_delayed_work(ceph_msgr_wq, &con->work,
-				       round_jiffies_relative(con->delay)) == 0)
-			con->ops->put(con);
-	}
-
-out_unlock:
-	mutex_unlock(&con->mutex);
-out:
-	/*
-	 * in case we faulted due to authentication, invalidate our
-	 * current tickets so that we can get new ones.
-	 */
-	if (con->auth_retry && con->ops->invalidate_authorizer) {
-		dout("calling invalidate_authorizer()\n");
-		con->ops->invalidate_authorizer(con);
-	}
-
-	if (con->ops->fault)
-		con->ops->fault(con);
-}
-
-
-
-/*
- * create a new messenger instance
- */
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
-{
-	struct ceph_messenger *msgr;
-
-	msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
-	if (msgr == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&msgr->global_seq_lock);
-
-	/* the zero page is needed if a request is "canceled" while the message
-	 * is being written over the socket */
-	msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO);
-	if (!msgr->zero_page) {
-		kfree(msgr);
-		return ERR_PTR(-ENOMEM);
-	}
-	kmap(msgr->zero_page);
-
-	if (myaddr)
-		msgr->inst.addr = *myaddr;
-
-	/* select a random nonce */
-	msgr->inst.addr.type = 0;
-	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
-	encode_my_addr(msgr);
-
-	dout("messenger_create %p\n", msgr);
-	return msgr;
-}
-
-void ceph_messenger_destroy(struct ceph_messenger *msgr)
-{
-	dout("destroy %p\n", msgr);
-	kunmap(msgr->zero_page);
-	__free_page(msgr->zero_page);
-	kfree(msgr);
-	dout("destroyed messenger %p\n", msgr);
-}
-
-/*
- * Queue up an outgoing message on the given connection.
- */
-void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	if (test_bit(CLOSED, &con->state)) {
-		dout("con_send %p closed, dropping %p\n", con, msg);
-		ceph_msg_put(msg);
-		return;
-	}
-
-	/* set src+dst */
-	msg->hdr.src = con->msgr->inst.name;
-
-	BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
-
-	msg->needs_out_seq = true;
-
-	/* queue */
-	mutex_lock(&con->mutex);
-	BUG_ON(!list_empty(&msg->list_head));
-	list_add_tail(&msg->list_head, &con->out_queue);
-	dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
-	     ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
-	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
-	     le32_to_cpu(msg->hdr.front_len),
-	     le32_to_cpu(msg->hdr.middle_len),
-	     le32_to_cpu(msg->hdr.data_len));
-	mutex_unlock(&con->mutex);
-
-	/* if there wasn't anything waiting to send before, queue
-	 * new work */
-	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
-		queue_con(con);
-}
-
-/*
- * Revoke a message that was previously queued for send
- */
-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	mutex_lock(&con->mutex);
-	if (!list_empty(&msg->list_head)) {
-		dout("con_revoke %p msg %p - was on queue\n", con, msg);
-		list_del_init(&msg->list_head);
-		ceph_msg_put(msg);
-		msg->hdr.seq = 0;
-	}
-	if (con->out_msg == msg) {
-		dout("con_revoke %p msg %p - was sending\n", con, msg);
-		con->out_msg = NULL;
-		if (con->out_kvec_is_msg) {
-			con->out_skip = con->out_kvec_bytes;
-			con->out_kvec_is_msg = false;
-		}
-		ceph_msg_put(msg);
-		msg->hdr.seq = 0;
-	}
-	mutex_unlock(&con->mutex);
-}
-
-/*
- * Revoke a message that we may be reading data into
- */
-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	mutex_lock(&con->mutex);
-	if (con->in_msg && con->in_msg == msg) {
-		unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
-		unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
-		unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
-
-		/* skip rest of message */
-		dout("con_revoke_pages %p msg %p revoked\n", con, msg);
-			con->in_base_pos = con->in_base_pos -
-				sizeof(struct ceph_msg_header) -
-				front_len -
-				middle_len -
-				data_len -
-				sizeof(struct ceph_msg_footer);
-		ceph_msg_put(con->in_msg);
-		con->in_msg = NULL;
-		con->in_tag = CEPH_MSGR_TAG_READY;
-		con->in_seq++;
-	} else {
-		dout("con_revoke_pages %p msg %p pages %p no-op\n",
-		     con, con->in_msg, msg);
-	}
-	mutex_unlock(&con->mutex);
-}
-
-/*
- * Queue a keepalive byte to ensure the tcp connection is alive.
- */
-void ceph_con_keepalive(struct ceph_connection *con)
-{
-	if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
-	    test_and_set_bit(WRITE_PENDING, &con->state) == 0)
-		queue_con(con);
-}
-
-
-/*
- * construct a new message with given type, size
- * the new msg has a ref count of 1.
- */
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
-{
-	struct ceph_msg *m;
-
-	m = kmalloc(sizeof(*m), flags);
-	if (m == NULL)
-		goto out;
-	kref_init(&m->kref);
-	INIT_LIST_HEAD(&m->list_head);
-
-	m->hdr.tid = 0;
-	m->hdr.type = cpu_to_le16(type);
-	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
-	m->hdr.version = 0;
-	m->hdr.front_len = cpu_to_le32(front_len);
-	m->hdr.middle_len = 0;
-	m->hdr.data_len = 0;
-	m->hdr.data_off = 0;
-	m->hdr.reserved = 0;
-	m->footer.front_crc = 0;
-	m->footer.middle_crc = 0;
-	m->footer.data_crc = 0;
-	m->footer.flags = 0;
-	m->front_max = front_len;
-	m->front_is_vmalloc = false;
-	m->more_to_follow = false;
-	m->pool = NULL;
-
-	/* front */
-	if (front_len) {
-		if (front_len > PAGE_CACHE_SIZE) {
-			m->front.iov_base = __vmalloc(front_len, flags,
-						      PAGE_KERNEL);
-			m->front_is_vmalloc = true;
-		} else {
-			m->front.iov_base = kmalloc(front_len, flags);
-		}
-		if (m->front.iov_base == NULL) {
-			pr_err("msg_new can't allocate %d bytes\n",
-			     front_len);
-			goto out2;
-		}
-	} else {
-		m->front.iov_base = NULL;
-	}
-	m->front.iov_len = front_len;
-
-	/* middle */
-	m->middle = NULL;
-
-	/* data */
-	m->nr_pages = 0;
-	m->pages = NULL;
-	m->pagelist = NULL;
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-	m->trail = NULL;
-
-	dout("ceph_msg_new %p front %d\n", m, front_len);
-	return m;
-
-out2:
-	ceph_msg_put(m);
-out:
-	pr_err("msg_new can't create type %d front %d\n", type, front_len);
-	return NULL;
-}
-
-/*
- * Allocate "middle" portion of a message, if it is needed and wasn't
- * allocated by alloc_msg.  This allows us to read a small fixed-size
- * per-type header in the front and then gracefully fail (i.e.,
- * propagate the error to the caller based on info in the front) when
- * the middle is too large.
- */
-static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	int type = le16_to_cpu(msg->hdr.type);
-	int middle_len = le32_to_cpu(msg->hdr.middle_len);
-
-	dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
-	     ceph_msg_type_name(type), middle_len);
-	BUG_ON(!middle_len);
-	BUG_ON(msg->middle);
-
-	msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
-	if (!msg->middle)
-		return -ENOMEM;
-	return 0;
-}
-
-/*
- * Generic message allocator, for incoming messages.
- */
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
-				struct ceph_msg_header *hdr,
-				int *skip)
-{
-	int type = le16_to_cpu(hdr->type);
-	int front_len = le32_to_cpu(hdr->front_len);
-	int middle_len = le32_to_cpu(hdr->middle_len);
-	struct ceph_msg *msg = NULL;
-	int ret;
-
-	if (con->ops->alloc_msg) {
-		mutex_unlock(&con->mutex);
-		msg = con->ops->alloc_msg(con, hdr, skip);
-		mutex_lock(&con->mutex);
-		if (!msg || *skip)
-			return NULL;
-	}
-	if (!msg) {
-		*skip = 0;
-		msg = ceph_msg_new(type, front_len, GFP_NOFS);
-		if (!msg) {
-			pr_err("unable to allocate msg type %d len %d\n",
-			       type, front_len);
-			return NULL;
-		}
-	}
-	memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
-
-	if (middle_len && !msg->middle) {
-		ret = ceph_alloc_middle(con, msg);
-		if (ret < 0) {
-			ceph_msg_put(msg);
-			return NULL;
-		}
-	}
-
-	return msg;
-}
-
-
-/*
- * Free a generically kmalloc'd message.
- */
-void ceph_msg_kfree(struct ceph_msg *m)
-{
-	dout("msg_kfree %p\n", m);
-	if (m->front_is_vmalloc)
-		vfree(m->front.iov_base);
-	else
-		kfree(m->front.iov_base);
-	kfree(m);
-}
-
-/*
- * Drop a msg ref.  Destroy as needed.
- */
-void ceph_msg_last_put(struct kref *kref)
-{
-	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-
-	dout("ceph_msg_put last one on %p\n", m);
-	WARN_ON(!list_empty(&m->list_head));
-
-	/* drop middle, data, if any */
-	if (m->middle) {
-		ceph_buffer_put(m->middle);
-		m->middle = NULL;
-	}
-	m->nr_pages = 0;
-	m->pages = NULL;
-
-	if (m->pagelist) {
-		ceph_pagelist_release(m->pagelist);
-		kfree(m->pagelist);
-		m->pagelist = NULL;
-	}
-
-	m->trail = NULL;
-
-	if (m->pool)
-		ceph_msgpool_put(m->pool, m);
-	else
-		ceph_msg_kfree(m);
-}
-
-void ceph_msg_dump(struct ceph_msg *msg)
-{
-	pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
-		 msg->front_max, msg->nr_pages);
-	print_hex_dump(KERN_DEBUG, "header: ",
-		       DUMP_PREFIX_OFFSET, 16, 1,
-		       &msg->hdr, sizeof(msg->hdr), true);
-	print_hex_dump(KERN_DEBUG, " front: ",
-		       DUMP_PREFIX_OFFSET, 16, 1,
-		       msg->front.iov_base, msg->front.iov_len, true);
-	if (msg->middle)
-		print_hex_dump(KERN_DEBUG, "middle: ",
-			       DUMP_PREFIX_OFFSET, 16, 1,
-			       msg->middle->vec.iov_base,
-			       msg->middle->vec.iov_len, true);
-	print_hex_dump(KERN_DEBUG, "footer: ",
-		       DUMP_PREFIX_OFFSET, 16, 1,
-		       &msg->footer, sizeof(msg->footer), true);
-}
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
deleted file mode 100644
index 5a79450604ef..000000000000
--- a/fs/ceph/messenger.h
+++ /dev/null
@@ -1,257 +0,0 @@
-#ifndef __FS_CEPH_MESSENGER_H
-#define __FS_CEPH_MESSENGER_H
-
-#include <linux/kref.h>
-#include <linux/mutex.h>
-#include <linux/net.h>
-#include <linux/radix-tree.h>
-#include <linux/uio.h>
-#include <linux/version.h>
-#include <linux/workqueue.h>
-
-#include "types.h"
-#include "buffer.h"
-
-struct ceph_msg;
-struct ceph_connection;
-
-extern struct workqueue_struct *ceph_msgr_wq;       /* receive work queue */
-
-/*
- * Ceph defines these callbacks for handling connection events.
- */
-struct ceph_connection_operations {
-	struct ceph_connection *(*get)(struct ceph_connection *);
-	void (*put)(struct ceph_connection *);
-
-	/* handle an incoming message. */
-	void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
-
-	/* authorize an outgoing connection */
-	int (*get_authorizer) (struct ceph_connection *con,
-			       void **buf, int *len, int *proto,
-			       void **reply_buf, int *reply_len, int force_new);
-	int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
-	int (*invalidate_authorizer)(struct ceph_connection *con);
-
-	/* protocol version mismatch */
-	void (*bad_proto) (struct ceph_connection *con);
-
-	/* there was some error on the socket (disconnect, whatever) */
-	void (*fault) (struct ceph_connection *con);
-
-	/* a remote host as terminated a message exchange session, and messages
-	 * we sent (or they tried to send us) may be lost. */
-	void (*peer_reset) (struct ceph_connection *con);
-
-	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
-					struct ceph_msg_header *hdr,
-					int *skip);
-};
-
-/* use format string %s%d */
-#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
-
-struct ceph_messenger {
-	struct ceph_entity_inst inst;    /* my name+address */
-	struct ceph_entity_addr my_enc_addr;
-	struct page *zero_page;          /* used in certain error cases */
-
-	bool nocrc;
-
-	/*
-	 * the global_seq counts connections i (attempt to) initiate
-	 * in order to disambiguate certain connect race conditions.
-	 */
-	u32 global_seq;
-	spinlock_t global_seq_lock;
-};
-
-/*
- * a single message.  it contains a header (src, dest, message type, etc.),
- * footer (crc values, mainly), a "front" message body, and possibly a
- * data payload (stored in some number of pages).
- */
-struct ceph_msg {
-	struct ceph_msg_header hdr;	/* header */
-	struct ceph_msg_footer footer;	/* footer */
-	struct kvec front;              /* unaligned blobs of message */
-	struct ceph_buffer *middle;
-	struct page **pages;            /* data payload.  NOT OWNER. */
-	unsigned nr_pages;              /* size of page array */
-	struct ceph_pagelist *pagelist; /* instead of pages */
-	struct list_head list_head;
-	struct kref kref;
-	struct bio  *bio;		/* instead of pages/pagelist */
-	struct bio  *bio_iter;		/* bio iterator */
-	int bio_seg;			/* current bio segment */
-	struct ceph_pagelist *trail;	/* the trailing part of the data */
-	bool front_is_vmalloc;
-	bool more_to_follow;
-	bool needs_out_seq;
-	int front_max;
-
-	struct ceph_msgpool *pool;
-};
-
-struct ceph_msg_pos {
-	int page, page_pos;  /* which page; offset in page */
-	int data_pos;        /* offset in data payload */
-	int did_page_crc;    /* true if we've calculated crc for current page */
-};
-
-/* ceph connection fault delay defaults, for exponential backoff */
-#define BASE_DELAY_INTERVAL	(HZ/2)
-#define MAX_DELAY_INTERVAL	(5 * 60 * HZ)
-
-/*
- * ceph_connection state bit flags
- *
- * QUEUED and BUSY are used together to ensure that only a single
- * thread is currently opening, reading or writing data to the socket.
- */
-#define LOSSYTX         0  /* we can close channel or drop messages on errors */
-#define CONNECTING	1
-#define NEGOTIATING	2
-#define KEEPALIVE_PENDING      3
-#define WRITE_PENDING	4  /* we have data ready to send */
-#define QUEUED          5  /* there is work queued on this connection */
-#define BUSY            6  /* work is being done */
-#define STANDBY		8  /* no outgoing messages, socket closed.  we keep
-			    * the ceph_connection around to maintain shared
-			    * state with the peer. */
-#define CLOSED		10 /* we've closed the connection */
-#define SOCK_CLOSED	11 /* socket state changed to closed */
-#define OPENING         13 /* open connection w/ (possibly new) peer */
-#define DEAD            14 /* dead, about to kfree */
-
-/*
- * A single connection with another host.
- *
- * We maintain a queue of outgoing messages, and some session state to
- * ensure that we can preserve the lossless, ordered delivery of
- * messages in the case of a TCP disconnect.
- */
-struct ceph_connection {
-	void *private;
-	atomic_t nref;
-
-	const struct ceph_connection_operations *ops;
-
-	struct ceph_messenger *msgr;
-	struct socket *sock;
-	unsigned long state;	/* connection state (see flags above) */
-	const char *error_msg;  /* error message, if any */
-
-	struct ceph_entity_addr peer_addr; /* peer address */
-	struct ceph_entity_name peer_name; /* peer name */
-	struct ceph_entity_addr peer_addr_for_me;
-	unsigned peer_features;
-	u32 connect_seq;      /* identify the most recent connection
-				 attempt for this connection, client */
-	u32 peer_global_seq;  /* peer's global seq for this connection */
-
-	int auth_retry;       /* true if we need a newer authorizer */
-	void *auth_reply_buf;   /* where to put the authorizer reply */
-	int auth_reply_buf_len;
-
-	struct mutex mutex;
-
-	/* out queue */
-	struct list_head out_queue;
-	struct list_head out_sent;   /* sending or sent but unacked */
-	u64 out_seq;		     /* last message queued for send */
-	bool out_keepalive_pending;
-
-	u64 in_seq, in_seq_acked;  /* last message received, acked */
-
-	/* connection negotiation temps */
-	char in_banner[CEPH_BANNER_MAX_LEN];
-	union {
-		struct {  /* outgoing connection */
-			struct ceph_msg_connect out_connect;
-			struct ceph_msg_connect_reply in_reply;
-		};
-		struct {  /* incoming */
-			struct ceph_msg_connect in_connect;
-			struct ceph_msg_connect_reply out_reply;
-		};
-	};
-	struct ceph_entity_addr actual_peer_addr;
-
-	/* message out temps */
-	struct ceph_msg *out_msg;        /* sending message (== tail of
-					    out_sent) */
-	bool out_msg_done;
-	struct ceph_msg_pos out_msg_pos;
-
-	struct kvec out_kvec[8],         /* sending header/footer data */
-		*out_kvec_cur;
-	int out_kvec_left;   /* kvec's left in out_kvec */
-	int out_skip;        /* skip this many bytes */
-	int out_kvec_bytes;  /* total bytes left */
-	bool out_kvec_is_msg; /* kvec refers to out_msg */
-	int out_more;        /* there is more data after the kvecs */
-	__le64 out_temp_ack; /* for writing an ack */
-
-	/* message in temps */
-	struct ceph_msg_header in_hdr;
-	struct ceph_msg *in_msg;
-	struct ceph_msg_pos in_msg_pos;
-	u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
-
-	char in_tag;         /* protocol control byte */
-	int in_base_pos;     /* bytes read */
-	__le64 in_temp_ack;  /* for reading an ack */
-
-	struct delayed_work work;	    /* send|recv work */
-	unsigned long       delay;          /* current delay interval */
-};
-
-
-extern const char *pr_addr(const struct sockaddr_storage *ss);
-extern int ceph_parse_ips(const char *c, const char *end,
-			  struct ceph_entity_addr *addr,
-			  int max_count, int *count);
-
-
-extern int ceph_msgr_init(void);
-extern void ceph_msgr_exit(void);
-extern void ceph_msgr_flush(void);
-
-extern struct ceph_messenger *ceph_messenger_create(
-	struct ceph_entity_addr *myaddr);
-extern void ceph_messenger_destroy(struct ceph_messenger *);
-
-extern void ceph_con_init(struct ceph_messenger *msgr,
-			  struct ceph_connection *con);
-extern void ceph_con_open(struct ceph_connection *con,
-			  struct ceph_entity_addr *addr);
-extern bool ceph_con_opened(struct ceph_connection *con);
-extern void ceph_con_close(struct ceph_connection *con);
-extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke_message(struct ceph_connection *con,
-				  struct ceph_msg *msg);
-extern void ceph_con_keepalive(struct ceph_connection *con);
-extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
-extern void ceph_con_put(struct ceph_connection *con);
-
-extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
-extern void ceph_msg_kfree(struct ceph_msg *m);
-
-
-static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
-{
-	kref_get(&msg->kref);
-	return msg;
-}
-extern void ceph_msg_last_put(struct kref *kref);
-static inline void ceph_msg_put(struct ceph_msg *msg)
-{
-	kref_put(&msg->kref, ceph_msg_last_put);
-}
-
-extern void ceph_msg_dump(struct ceph_msg *msg);
-
-#endif
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
deleted file mode 100644
index b2a5a3e4a671..000000000000
--- a/fs/ceph/mon_client.c
+++ /dev/null
@@ -1,1018 +0,0 @@
-#include "ceph_debug.h"
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-#include "mon_client.h"
-#include "super.h"
-#include "auth.h"
-#include "decode.h"
-
-/*
- * Interact with Ceph monitor cluster.  Handle requests for new map
- * versions, and periodically resend as needed.  Also implement
- * statfs() and umount().
- *
- * A small cluster of Ceph "monitors" are responsible for managing critical
- * cluster configuration and state information.  An odd number (e.g., 3, 5)
- * of cmon daemons use a modified version of the Paxos part-time parliament
- * algorithm to manage the MDS map (mds cluster membership), OSD map, and
- * list of clients who have mounted the file system.
- *
- * We maintain an open, active session with a monitor at all times in order to
- * receive timely MDSMap updates.  We periodically send a keepalive byte on the
- * TCP socket to ensure we detect a failure.  If the connection does break, we
- * randomly hunt for a new monitor.  Once the connection is reestablished, we
- * resend any outstanding requests.
- */
-
-static const struct ceph_connection_operations mon_con_ops;
-
-static int __validate_auth(struct ceph_mon_client *monc);
-
-/*
- * Decode a monmap blob (e.g., during mount).
- */
-struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
-{
-	struct ceph_monmap *m = NULL;
-	int i, err = -EINVAL;
-	struct ceph_fsid fsid;
-	u32 epoch, num_mon;
-	u16 version;
-	u32 len;
-
-	ceph_decode_32_safe(&p, end, len, bad);
-	ceph_decode_need(&p, end, len, bad);
-
-	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
-
-	ceph_decode_16_safe(&p, end, version, bad);
-
-	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
-	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-	epoch = ceph_decode_32(&p);
-
-	num_mon = ceph_decode_32(&p);
-	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
-
-	if (num_mon >= CEPH_MAX_MON)
-		goto bad;
-	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
-	if (m == NULL)
-		return ERR_PTR(-ENOMEM);
-	m->fsid = fsid;
-	m->epoch = epoch;
-	m->num_mon = num_mon;
-	ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
-	for (i = 0; i < num_mon; i++)
-		ceph_decode_addr(&m->mon_inst[i].addr);
-
-	dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
-	     m->num_mon);
-	for (i = 0; i < m->num_mon; i++)
-		dout("monmap_decode  mon%d is %s\n", i,
-		     pr_addr(&m->mon_inst[i].addr.in_addr));
-	return m;
-
-bad:
-	dout("monmap_decode failed with %d\n", err);
-	kfree(m);
-	return ERR_PTR(err);
-}
-
-/*
- * return true if *addr is included in the monmap.
- */
-int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
-{
-	int i;
-
-	for (i = 0; i < m->num_mon; i++)
-		if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
-			return 1;
-	return 0;
-}
-
-/*
- * Send an auth request.
- */
-static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
-{
-	monc->pending_auth = 1;
-	monc->m_auth->front.iov_len = len;
-	monc->m_auth->hdr.front_len = cpu_to_le32(len);
-	ceph_con_revoke(monc->con, monc->m_auth);
-	ceph_msg_get(monc->m_auth);  /* keep our ref */
-	ceph_con_send(monc->con, monc->m_auth);
-}
-
-/*
- * Close monitor session, if any.
- */
-static void __close_session(struct ceph_mon_client *monc)
-{
-	if (monc->con) {
-		dout("__close_session closing mon%d\n", monc->cur_mon);
-		ceph_con_revoke(monc->con, monc->m_auth);
-		ceph_con_close(monc->con);
-		monc->cur_mon = -1;
-		monc->pending_auth = 0;
-		ceph_auth_reset(monc->auth);
-	}
-}
-
-/*
- * Open a session with a (new) monitor.
- */
-static int __open_session(struct ceph_mon_client *monc)
-{
-	char r;
-	int ret;
-
-	if (monc->cur_mon < 0) {
-		get_random_bytes(&r, 1);
-		monc->cur_mon = r % monc->monmap->num_mon;
-		dout("open_session num=%d r=%d -> mon%d\n",
-		     monc->monmap->num_mon, r, monc->cur_mon);
-		monc->sub_sent = 0;
-		monc->sub_renew_after = jiffies;  /* i.e., expired */
-		monc->want_next_osdmap = !!monc->want_next_osdmap;
-
-		dout("open_session mon%d opening\n", monc->cur_mon);
-		monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
-		monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
-		ceph_con_open(monc->con,
-			      &monc->monmap->mon_inst[monc->cur_mon].addr);
-
-		/* initiatiate authentication handshake */
-		ret = ceph_auth_build_hello(monc->auth,
-					    monc->m_auth->front.iov_base,
-					    monc->m_auth->front_max);
-		__send_prepared_auth_request(monc, ret);
-	} else {
-		dout("open_session mon%d already open\n", monc->cur_mon);
-	}
-	return 0;
-}
-
-static bool __sub_expired(struct ceph_mon_client *monc)
-{
-	return time_after_eq(jiffies, monc->sub_renew_after);
-}
-
-/*
- * Reschedule delayed work timer.
- */
-static void __schedule_delayed(struct ceph_mon_client *monc)
-{
-	unsigned delay;
-
-	if (monc->cur_mon < 0 || __sub_expired(monc))
-		delay = 10 * HZ;
-	else
-		delay = 20 * HZ;
-	dout("__schedule_delayed after %u\n", delay);
-	schedule_delayed_work(&monc->delayed_work, delay);
-}
-
-/*
- * Send subscribe request for mdsmap and/or osdmap.
- */
-static void __send_subscribe(struct ceph_mon_client *monc)
-{
-	dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
-	     (unsigned)monc->sub_sent, __sub_expired(monc),
-	     monc->want_next_osdmap);
-	if ((__sub_expired(monc) && !monc->sub_sent) ||
-	    monc->want_next_osdmap == 1) {
-		struct ceph_msg *msg = monc->m_subscribe;
-		struct ceph_mon_subscribe_item *i;
-		void *p, *end;
-
-		p = msg->front.iov_base;
-		end = p + msg->front_max;
-
-		dout("__send_subscribe to 'mdsmap' %u+\n",
-		     (unsigned)monc->have_mdsmap);
-		if (monc->want_next_osdmap) {
-			dout("__send_subscribe to 'osdmap' %u\n",
-			     (unsigned)monc->have_osdmap);
-			ceph_encode_32(&p, 3);
-			ceph_encode_string(&p, end, "osdmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_osdmap);
-			i->onetime = 1;
-			p += sizeof(*i);
-			monc->want_next_osdmap = 2;  /* requested */
-		} else {
-			ceph_encode_32(&p, 2);
-		}
-		ceph_encode_string(&p, end, "mdsmap", 6);
-		i = p;
-		i->have = cpu_to_le64(monc->have_mdsmap);
-		i->onetime = 0;
-		p += sizeof(*i);
-		ceph_encode_string(&p, end, "monmap", 6);
-		i = p;
-		i->have = 0;
-		i->onetime = 0;
-		p += sizeof(*i);
-
-		msg->front.iov_len = p - msg->front.iov_base;
-		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-		ceph_con_revoke(monc->con, msg);
-		ceph_con_send(monc->con, ceph_msg_get(msg));
-
-		monc->sub_sent = jiffies | 1;  /* never 0 */
-	}
-}
-
-static void handle_subscribe_ack(struct ceph_mon_client *monc,
-				 struct ceph_msg *msg)
-{
-	unsigned seconds;
-	struct ceph_mon_subscribe_ack *h = msg->front.iov_base;
-
-	if (msg->front.iov_len < sizeof(*h))
-		goto bad;
-	seconds = le32_to_cpu(h->duration);
-
-	mutex_lock(&monc->mutex);
-	if (monc->hunting) {
-		pr_info("mon%d %s session established\n",
-			monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr));
-		monc->hunting = false;
-	}
-	dout("handle_subscribe_ack after %d seconds\n", seconds);
-	monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
-	monc->sub_sent = 0;
-	mutex_unlock(&monc->mutex);
-	return;
-bad:
-	pr_err("got corrupt subscribe-ack msg\n");
-	ceph_msg_dump(msg);
-}
-
-/*
- * Keep track of which maps we have
- */
-int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
-{
-	mutex_lock(&monc->mutex);
-	monc->have_mdsmap = got;
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-
-int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
-{
-	mutex_lock(&monc->mutex);
-	monc->have_osdmap = got;
-	monc->want_next_osdmap = 0;
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-
-/*
- * Register interest in the next osdmap
- */
-void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
-{
-	dout("request_next_osdmap have %u\n", monc->have_osdmap);
-	mutex_lock(&monc->mutex);
-	if (!monc->want_next_osdmap)
-		monc->want_next_osdmap = 1;
-	if (monc->want_next_osdmap < 2)
-		__send_subscribe(monc);
-	mutex_unlock(&monc->mutex);
-}
-
-/*
- *
- */
-int ceph_monc_open_session(struct ceph_mon_client *monc)
-{
-	if (!monc->con) {
-		monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
-		if (!monc->con)
-			return -ENOMEM;
-		ceph_con_init(monc->client->msgr, monc->con);
-		monc->con->private = monc;
-		monc->con->ops = &mon_con_ops;
-	}
-
-	mutex_lock(&monc->mutex);
-	__open_session(monc);
-	__schedule_delayed(monc);
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-
-/*
- * The monitor responds with mount ack indicate mount success.  The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
-static void ceph_monc_handle_map(struct ceph_mon_client *monc,
-				 struct ceph_msg *msg)
-{
-	struct ceph_client *client = monc->client;
-	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
-	void *p, *end;
-
-	mutex_lock(&monc->mutex);
-
-	dout("handle_monmap\n");
-	p = msg->front.iov_base;
-	end = p + msg->front.iov_len;
-
-	monmap = ceph_monmap_decode(p, end);
-	if (IS_ERR(monmap)) {
-		pr_err("problem decoding monmap, %d\n",
-		       (int)PTR_ERR(monmap));
-		goto out;
-	}
-
-	if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) {
-		kfree(monmap);
-		goto out;
-	}
-
-	client->monc.monmap = monmap;
-	kfree(old);
-
-out:
-	mutex_unlock(&monc->mutex);
-	wake_up_all(&client->auth_wq);
-}
-
-/*
- * generic requests (e.g., statfs, poolop)
- */
-static struct ceph_mon_generic_request *__lookup_generic_req(
-	struct ceph_mon_client *monc, u64 tid)
-{
-	struct ceph_mon_generic_request *req;
-	struct rb_node *n = monc->generic_request_tree.rb_node;
-
-	while (n) {
-		req = rb_entry(n, struct ceph_mon_generic_request, node);
-		if (tid < req->tid)
-			n = n->rb_left;
-		else if (tid > req->tid)
-			n = n->rb_right;
-		else
-			return req;
-	}
-	return NULL;
-}
-
-static void __insert_generic_request(struct ceph_mon_client *monc,
-			    struct ceph_mon_generic_request *new)
-{
-	struct rb_node **p = &monc->generic_request_tree.rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_mon_generic_request *req = NULL;
-
-	while (*p) {
-		parent = *p;
-		req = rb_entry(parent, struct ceph_mon_generic_request, node);
-		if (new->tid < req->tid)
-			p = &(*p)->rb_left;
-		else if (new->tid > req->tid)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, &monc->generic_request_tree);
-}
-
-static void release_generic_request(struct kref *kref)
-{
-	struct ceph_mon_generic_request *req =
-		container_of(kref, struct ceph_mon_generic_request, kref);
-
-	if (req->reply)
-		ceph_msg_put(req->reply);
-	if (req->request)
-		ceph_msg_put(req->request);
-
-	kfree(req);
-}
-
-static void put_generic_request(struct ceph_mon_generic_request *req)
-{
-	kref_put(&req->kref, release_generic_request);
-}
-
-static void get_generic_request(struct ceph_mon_generic_request *req)
-{
-	kref_get(&req->kref);
-}
-
-static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
-					 struct ceph_msg_header *hdr,
-					 int *skip)
-{
-	struct ceph_mon_client *monc = con->private;
-	struct ceph_mon_generic_request *req;
-	u64 tid = le64_to_cpu(hdr->tid);
-	struct ceph_msg *m;
-
-	mutex_lock(&monc->mutex);
-	req = __lookup_generic_req(monc, tid);
-	if (!req) {
-		dout("get_generic_reply %lld dne\n", tid);
-		*skip = 1;
-		m = NULL;
-	} else {
-		dout("get_generic_reply %lld got %p\n", tid, req->reply);
-		m = ceph_msg_get(req->reply);
-		/*
-		 * we don't need to track the connection reading into
-		 * this reply because we only have one open connection
-		 * at a time, ever.
-		 */
-	}
-	mutex_unlock(&monc->mutex);
-	return m;
-}
-
-static int do_generic_request(struct ceph_mon_client *monc,
-			      struct ceph_mon_generic_request *req)
-{
-	int err;
-
-	/* register request */
-	mutex_lock(&monc->mutex);
-	req->tid = ++monc->last_tid;
-	req->request->hdr.tid = cpu_to_le64(req->tid);
-	__insert_generic_request(monc, req);
-	monc->num_generic_requests++;
-	ceph_con_send(monc->con, ceph_msg_get(req->request));
-	mutex_unlock(&monc->mutex);
-
-	err = wait_for_completion_interruptible(&req->completion);
-
-	mutex_lock(&monc->mutex);
-	rb_erase(&req->node, &monc->generic_request_tree);
-	monc->num_generic_requests--;
-	mutex_unlock(&monc->mutex);
-
-	if (!err)
-		err = req->result;
-	return err;
-}
-
-/*
- * statfs
- */
-static void handle_statfs_reply(struct ceph_mon_client *monc,
-				struct ceph_msg *msg)
-{
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_statfs_reply *reply = msg->front.iov_base;
-	u64 tid = le64_to_cpu(msg->hdr.tid);
-
-	if (msg->front.iov_len != sizeof(*reply))
-		goto bad;
-	dout("handle_statfs_reply %p tid %llu\n", msg, tid);
-
-	mutex_lock(&monc->mutex);
-	req = __lookup_generic_req(monc, tid);
-	if (req) {
-		*(struct ceph_statfs *)req->buf = reply->st;
-		req->result = 0;
-		get_generic_request(req);
-	}
-	mutex_unlock(&monc->mutex);
-	if (req) {
-		complete_all(&req->completion);
-		put_generic_request(req);
-	}
-	return;
-
-bad:
-	pr_err("corrupt generic reply, tid %llu\n", tid);
-	ceph_msg_dump(msg);
-}
-
-/*
- * Do a synchronous statfs().
- */
-int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
-{
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_statfs *h;
-	int err;
-
-	req = kzalloc(sizeof(*req), GFP_NOFS);
-	if (!req)
-		return -ENOMEM;
-
-	kref_init(&req->kref);
-	req->buf = buf;
-	req->buf_len = sizeof(*buf);
-	init_completion(&req->completion);
-
-	err = -ENOMEM;
-	req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS);
-	if (!req->request)
-		goto out;
-	req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS);
-	if (!req->reply)
-		goto out;
-
-	/* fill out request */
-	h = req->request->front.iov_base;
-	h->monhdr.have_version = 0;
-	h->monhdr.session_mon = cpu_to_le16(-1);
-	h->monhdr.session_mon_tid = 0;
-	h->fsid = monc->monmap->fsid;
-
-	err = do_generic_request(monc, req);
-
-out:
-	kref_put(&req->kref, release_generic_request);
-	return err;
-}
-
-/*
- * pool ops
- */
-static int get_poolop_reply_buf(const char *src, size_t src_len,
-				char *dst, size_t dst_len)
-{
-	u32 buf_len;
-
-	if (src_len != sizeof(u32) + dst_len)
-		return -EINVAL;
-
-	buf_len = le32_to_cpu(*(u32 *)src);
-	if (buf_len != dst_len)
-		return -EINVAL;
-
-	memcpy(dst, src + sizeof(u32), dst_len);
-	return 0;
-}
-
-static void handle_poolop_reply(struct ceph_mon_client *monc,
-				struct ceph_msg *msg)
-{
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
-	u64 tid = le64_to_cpu(msg->hdr.tid);
-
-	if (msg->front.iov_len < sizeof(*reply))
-		goto bad;
-	dout("handle_poolop_reply %p tid %llu\n", msg, tid);
-
-	mutex_lock(&monc->mutex);
-	req = __lookup_generic_req(monc, tid);
-	if (req) {
-		if (req->buf_len &&
-		    get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
-				     msg->front.iov_len - sizeof(*reply),
-				     req->buf, req->buf_len) < 0) {
-			mutex_unlock(&monc->mutex);
-			goto bad;
-		}
-		req->result = le32_to_cpu(reply->reply_code);
-		get_generic_request(req);
-	}
-	mutex_unlock(&monc->mutex);
-	if (req) {
-		complete(&req->completion);
-		put_generic_request(req);
-	}
-	return;
-
-bad:
-	pr_err("corrupt generic reply, tid %llu\n", tid);
-	ceph_msg_dump(msg);
-}
-
-/*
- * Do a synchronous pool op.
- */
-int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op,
-			u32 pool, u64 snapid,
-			char *buf, int len)
-{
-	struct ceph_mon_generic_request *req;
-	struct ceph_mon_poolop *h;
-	int err;
-
-	req = kzalloc(sizeof(*req), GFP_NOFS);
-	if (!req)
-		return -ENOMEM;
-
-	kref_init(&req->kref);
-	req->buf = buf;
-	req->buf_len = len;
-	init_completion(&req->completion);
-
-	err = -ENOMEM;
-	req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS);
-	if (!req->request)
-		goto out;
-	req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS);
-	if (!req->reply)
-		goto out;
-
-	/* fill out request */
-	req->request->hdr.version = cpu_to_le16(2);
-	h = req->request->front.iov_base;
-	h->monhdr.have_version = 0;
-	h->monhdr.session_mon = cpu_to_le16(-1);
-	h->monhdr.session_mon_tid = 0;
-	h->fsid = monc->monmap->fsid;
-	h->pool = cpu_to_le32(pool);
-	h->op = cpu_to_le32(op);
-	h->auid = 0;
-	h->snapid = cpu_to_le64(snapid);
-	h->name_len = 0;
-
-	err = do_generic_request(monc, req);
-
-out:
-	kref_put(&req->kref, release_generic_request);
-	return err;
-}
-
-int ceph_monc_create_snapid(struct ceph_mon_client *monc,
-			    u32 pool, u64 *snapid)
-{
-	return ceph_monc_do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
-				   pool, 0, (char *)snapid, sizeof(*snapid));
-
-}
-
-int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
-			    u32 pool, u64 snapid)
-{
-	return ceph_monc_do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
-				   pool, snapid, 0, 0);
-
-}
-
-/*
- * Resend pending generic requests.
- */
-static void __resend_generic_request(struct ceph_mon_client *monc)
-{
-	struct ceph_mon_generic_request *req;
-	struct rb_node *p;
-
-	for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_mon_generic_request, node);
-		ceph_con_revoke(monc->con, req->request);
-		ceph_con_send(monc->con, ceph_msg_get(req->request));
-	}
-}
-
-/*
- * Delayed work.  If we haven't mounted yet, retry.  Otherwise,
- * renew/retry subscription as needed (in case it is timing out, or we
- * got an ENOMEM).  And keep the monitor connection alive.
- */
-static void delayed_work(struct work_struct *work)
-{
-	struct ceph_mon_client *monc =
-		container_of(work, struct ceph_mon_client, delayed_work.work);
-
-	dout("monc delayed_work\n");
-	mutex_lock(&monc->mutex);
-	if (monc->hunting) {
-		__close_session(monc);
-		__open_session(monc);  /* continue hunting */
-	} else {
-		ceph_con_keepalive(monc->con);
-
-		__validate_auth(monc);
-
-		if (monc->auth->ops->is_authenticated(monc->auth))
-			__send_subscribe(monc);
-	}
-	__schedule_delayed(monc);
-	mutex_unlock(&monc->mutex);
-}
-
-/*
- * On startup, we build a temporary monmap populated with the IPs
- * provided by mount(2).
- */
-static int build_initial_monmap(struct ceph_mon_client *monc)
-{
-	struct ceph_mount_args *args = monc->client->mount_args;
-	struct ceph_entity_addr *mon_addr = args->mon_addr;
-	int num_mon = args->num_mon;
-	int i;
-
-	/* build initial monmap */
-	monc->monmap = kzalloc(sizeof(*monc->monmap) +
-			       num_mon*sizeof(monc->monmap->mon_inst[0]),
-			       GFP_KERNEL);
-	if (!monc->monmap)
-		return -ENOMEM;
-	for (i = 0; i < num_mon; i++) {
-		monc->monmap->mon_inst[i].addr = mon_addr[i];
-		monc->monmap->mon_inst[i].addr.nonce = 0;
-		monc->monmap->mon_inst[i].name.type =
-			CEPH_ENTITY_TYPE_MON;
-		monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
-	}
-	monc->monmap->num_mon = num_mon;
-	monc->have_fsid = false;
-
-	/* release addr memory */
-	kfree(args->mon_addr);
-	args->mon_addr = NULL;
-	args->num_mon = 0;
-	return 0;
-}
-
-int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
-{
-	int err = 0;
-
-	dout("init\n");
-	memset(monc, 0, sizeof(*monc));
-	monc->client = cl;
-	monc->monmap = NULL;
-	mutex_init(&monc->mutex);
-
-	err = build_initial_monmap(monc);
-	if (err)
-		goto out;
-
-	monc->con = NULL;
-
-	/* authentication */
-	monc->auth = ceph_auth_init(cl->mount_args->name,
-				    cl->mount_args->secret);
-	if (IS_ERR(monc->auth))
-		return PTR_ERR(monc->auth);
-	monc->auth->want_keys =
-		CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
-		CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
-
-	/* msgs */
-	err = -ENOMEM;
-	monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
-				     sizeof(struct ceph_mon_subscribe_ack),
-				     GFP_NOFS);
-	if (!monc->m_subscribe_ack)
-		goto out_monmap;
-
-	monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS);
-	if (!monc->m_subscribe)
-		goto out_subscribe_ack;
-
-	monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS);
-	if (!monc->m_auth_reply)
-		goto out_subscribe;
-
-	monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS);
-	monc->pending_auth = 0;
-	if (!monc->m_auth)
-		goto out_auth_reply;
-
-	monc->cur_mon = -1;
-	monc->hunting = true;
-	monc->sub_renew_after = jiffies;
-	monc->sub_sent = 0;
-
-	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
-	monc->generic_request_tree = RB_ROOT;
-	monc->num_generic_requests = 0;
-	monc->last_tid = 0;
-
-	monc->have_mdsmap = 0;
-	monc->have_osdmap = 0;
-	monc->want_next_osdmap = 1;
-	return 0;
-
-out_auth_reply:
-	ceph_msg_put(monc->m_auth_reply);
-out_subscribe:
-	ceph_msg_put(monc->m_subscribe);
-out_subscribe_ack:
-	ceph_msg_put(monc->m_subscribe_ack);
-out_monmap:
-	kfree(monc->monmap);
-out:
-	return err;
-}
-
-void ceph_monc_stop(struct ceph_mon_client *monc)
-{
-	dout("stop\n");
-	cancel_delayed_work_sync(&monc->delayed_work);
-
-	mutex_lock(&monc->mutex);
-	__close_session(monc);
-	if (monc->con) {
-		monc->con->private = NULL;
-		monc->con->ops->put(monc->con);
-		monc->con = NULL;
-	}
-	mutex_unlock(&monc->mutex);
-
-	ceph_auth_destroy(monc->auth);
-
-	ceph_msg_put(monc->m_auth);
-	ceph_msg_put(monc->m_auth_reply);
-	ceph_msg_put(monc->m_subscribe);
-	ceph_msg_put(monc->m_subscribe_ack);
-
-	kfree(monc->monmap);
-}
-
-static void handle_auth_reply(struct ceph_mon_client *monc,
-			      struct ceph_msg *msg)
-{
-	int ret;
-	int was_auth = 0;
-
-	mutex_lock(&monc->mutex);
-	if (monc->auth->ops)
-		was_auth = monc->auth->ops->is_authenticated(monc->auth);
-	monc->pending_auth = 0;
-	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
-				     msg->front.iov_len,
-				     monc->m_auth->front.iov_base,
-				     monc->m_auth->front_max);
-	if (ret < 0) {
-		monc->client->auth_err = ret;
-		wake_up_all(&monc->client->auth_wq);
-	} else if (ret > 0) {
-		__send_prepared_auth_request(monc, ret);
-	} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
-		dout("authenticated, starting session\n");
-
-		monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
-		monc->client->msgr->inst.name.num =
-					cpu_to_le64(monc->auth->global_id);
-
-		__send_subscribe(monc);
-		__resend_generic_request(monc);
-	}
-	mutex_unlock(&monc->mutex);
-}
-
-static int __validate_auth(struct ceph_mon_client *monc)
-{
-	int ret;
-
-	if (monc->pending_auth)
-		return 0;
-
-	ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
-			      monc->m_auth->front_max);
-	if (ret <= 0)
-		return ret; /* either an error, or no need to authenticate */
-	__send_prepared_auth_request(monc, ret);
-	return 0;
-}
-
-int ceph_monc_validate_auth(struct ceph_mon_client *monc)
-{
-	int ret;
-
-	mutex_lock(&monc->mutex);
-	ret = __validate_auth(monc);
-	mutex_unlock(&monc->mutex);
-	return ret;
-}
-
-/*
- * handle incoming message
- */
-static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	struct ceph_mon_client *monc = con->private;
-	int type = le16_to_cpu(msg->hdr.type);
-
-	if (!monc)
-		return;
-
-	switch (type) {
-	case CEPH_MSG_AUTH_REPLY:
-		handle_auth_reply(monc, msg);
-		break;
-
-	case CEPH_MSG_MON_SUBSCRIBE_ACK:
-		handle_subscribe_ack(monc, msg);
-		break;
-
-	case CEPH_MSG_STATFS_REPLY:
-		handle_statfs_reply(monc, msg);
-		break;
-
-	case CEPH_MSG_POOLOP_REPLY:
-		handle_poolop_reply(monc, msg);
-		break;
-
-	case CEPH_MSG_MON_MAP:
-		ceph_monc_handle_map(monc, msg);
-		break;
-
-	case CEPH_MSG_MDS_MAP:
-		ceph_mdsc_handle_map(&monc->client->mdsc, msg);
-		break;
-
-	case CEPH_MSG_OSD_MAP:
-		ceph_osdc_handle_map(&monc->client->osdc, msg);
-		break;
-
-	default:
-		pr_err("received unknown message type %d %s\n", type,
-		       ceph_msg_type_name(type));
-	}
-	ceph_msg_put(msg);
-}
-
-/*
- * Allocate memory for incoming message
- */
-static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
-				      struct ceph_msg_header *hdr,
-				      int *skip)
-{
-	struct ceph_mon_client *monc = con->private;
-	int type = le16_to_cpu(hdr->type);
-	int front_len = le32_to_cpu(hdr->front_len);
-	struct ceph_msg *m = NULL;
-
-	*skip = 0;
-
-	switch (type) {
-	case CEPH_MSG_MON_SUBSCRIBE_ACK:
-		m = ceph_msg_get(monc->m_subscribe_ack);
-		break;
-	case CEPH_MSG_POOLOP_REPLY:
-	case CEPH_MSG_STATFS_REPLY:
-		return get_generic_reply(con, hdr, skip);
-	case CEPH_MSG_AUTH_REPLY:
-		m = ceph_msg_get(monc->m_auth_reply);
-		break;
-	case CEPH_MSG_MON_MAP:
-	case CEPH_MSG_MDS_MAP:
-	case CEPH_MSG_OSD_MAP:
-		m = ceph_msg_new(type, front_len, GFP_NOFS);
-		break;
-	}
-
-	if (!m) {
-		pr_info("alloc_msg unknown type %d\n", type);
-		*skip = 1;
-	}
-	return m;
-}
-
-/*
- * If the monitor connection resets, pick a new monitor and resubmit
- * any pending requests.
- */
-static void mon_fault(struct ceph_connection *con)
-{
-	struct ceph_mon_client *monc = con->private;
-
-	if (!monc)
-		return;
-
-	dout("mon_fault\n");
-	mutex_lock(&monc->mutex);
-	if (!con->private)
-		goto out;
-
-	if (monc->con && !monc->hunting)
-		pr_info("mon%d %s session lost, "
-			"hunting for new mon\n", monc->cur_mon,
-			pr_addr(&monc->con->peer_addr.in_addr));
-
-	__close_session(monc);
-	if (!monc->hunting) {
-		/* start hunting */
-		monc->hunting = true;
-		__open_session(monc);
-	} else {
-		/* already hunting, let's wait a bit */
-		__schedule_delayed(monc);
-	}
-out:
-	mutex_unlock(&monc->mutex);
-}
-
-static const struct ceph_connection_operations mon_con_ops = {
-	.get = ceph_con_get,
-	.put = ceph_con_put,
-	.dispatch = dispatch,
-	.fault = mon_fault,
-	.alloc_msg = mon_alloc_msg,
-};
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h
deleted file mode 100644
index 8e396f2c0963..000000000000
--- a/fs/ceph/mon_client.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef _FS_CEPH_MON_CLIENT_H
-#define _FS_CEPH_MON_CLIENT_H
-
-#include <linux/completion.h>
-#include <linux/kref.h>
-#include <linux/rbtree.h>
-
-#include "messenger.h"
-
-struct ceph_client;
-struct ceph_mount_args;
-struct ceph_auth_client;
-
-/*
- * The monitor map enumerates the set of all monitors.
- */
-struct ceph_monmap {
-	struct ceph_fsid fsid;
-	u32 epoch;
-	u32 num_mon;
-	struct ceph_entity_inst mon_inst[0];
-};
-
-struct ceph_mon_client;
-struct ceph_mon_generic_request;
-
-
-/*
- * Generic mechanism for resending monitor requests.
- */
-typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
-					 int newmon);
-
-/* a pending monitor request */
-struct ceph_mon_request {
-	struct ceph_mon_client *monc;
-	struct delayed_work delayed_work;
-	unsigned long delay;
-	ceph_monc_request_func_t do_request;
-};
-
-/*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
- */
-struct ceph_mon_generic_request {
-	struct kref kref;
-	u64 tid;
-	struct rb_node node;
-	int result;
-	void *buf;
-	int buf_len;
-	struct completion completion;
-	struct ceph_msg *request;  /* original request */
-	struct ceph_msg *reply;    /* and reply */
-};
-
-struct ceph_mon_client {
-	struct ceph_client *client;
-	struct ceph_monmap *monmap;
-
-	struct mutex mutex;
-	struct delayed_work delayed_work;
-
-	struct ceph_auth_client *auth;
-	struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
-	int pending_auth;
-
-	bool hunting;
-	int cur_mon;                       /* last monitor i contacted */
-	unsigned long sub_sent, sub_renew_after;
-	struct ceph_connection *con;
-	bool have_fsid;
-
-	/* pending generic requests */
-	struct rb_root generic_request_tree;
-	int num_generic_requests;
-	u64 last_tid;
-
-	/* mds/osd map */
-	int want_next_osdmap; /* 1 = want, 2 = want+asked */
-	u32 have_osdmap, have_mdsmap;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_file;
-#endif
-};
-
-extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
-extern int ceph_monmap_contains(struct ceph_monmap *m,
-				struct ceph_entity_addr *addr);
-
-extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
-extern void ceph_monc_stop(struct ceph_mon_client *monc);
-
-/*
- * The model here is to indicate that we need a new map of at least
- * epoch @want, and also call in when we receive a map.  We will
- * periodically rerequest the map from the monitor cluster until we
- * get what we want.
- */
-extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
-extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
-
-extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
-
-extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
-			       struct ceph_statfs *buf);
-
-extern int ceph_monc_open_session(struct ceph_mon_client *monc);
-
-extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
-
-extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
-				   u32 pool, u64 *snapid);
-
-extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
-				   u32 pool, u64 snapid);
-
-#endif
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c
deleted file mode 100644
index dd65a6438131..000000000000
--- a/fs/ceph/msgpool.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "ceph_debug.h"
-
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-
-#include "msgpool.h"
-
-static void *alloc_fn(gfp_t gfp_mask, void *arg)
-{
-	struct ceph_msgpool *pool = arg;
-	void *p;
-
-	p = ceph_msg_new(0, pool->front_len, gfp_mask);
-	if (!p)
-		pr_err("msgpool %s alloc failed\n", pool->name);
-	return p;
-}
-
-static void free_fn(void *element, void *arg)
-{
-	ceph_msg_put(element);
-}
-
-int ceph_msgpool_init(struct ceph_msgpool *pool,
-		      int front_len, int size, bool blocking, const char *name)
-{
-	pool->front_len = front_len;
-	pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
-	if (!pool->pool)
-		return -ENOMEM;
-	pool->name = name;
-	return 0;
-}
-
-void ceph_msgpool_destroy(struct ceph_msgpool *pool)
-{
-	mempool_destroy(pool->pool);
-}
-
-struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
-				  int front_len)
-{
-	if (front_len > pool->front_len) {
-		pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
-		       pool->name, front_len, pool->front_len);
-		WARN_ON(1);
-
-		/* try to alloc a fresh message */
-		return ceph_msg_new(0, front_len, GFP_NOFS);
-	}
-
-	return mempool_alloc(pool->pool, GFP_NOFS);
-}
-
-void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
-{
-	/* reset msg front_len; user may have changed it */
-	msg->front.iov_len = pool->front_len;
-	msg->hdr.front_len = cpu_to_le32(pool->front_len);
-
-	kref_init(&msg->kref);  /* retake single ref */
-}
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h
deleted file mode 100644
index a362605f9368..000000000000
--- a/fs/ceph/msgpool.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _FS_CEPH_MSGPOOL
-#define _FS_CEPH_MSGPOOL
-
-#include <linux/mempool.h>
-#include "messenger.h"
-
-/*
- * we use memory pools for preallocating messages we may receive, to
- * avoid unexpected OOM conditions.
- */
-struct ceph_msgpool {
-	const char *name;
-	mempool_t *pool;
-	int front_len;          /* preallocated payload size */
-};
-
-extern int ceph_msgpool_init(struct ceph_msgpool *pool,
-			     int front_len, int size, bool blocking,
-			     const char *name);
-extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
-extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
-					 int front_len);
-extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
-
-#endif
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h
deleted file mode 100644
index 680d3d648cac..000000000000
--- a/fs/ceph/msgr.h
+++ /dev/null
@@ -1,175 +0,0 @@
-#ifndef CEPH_MSGR_H
-#define CEPH_MSGR_H
-
-/*
- * Data types for message passing layer used by Ceph.
- */
-
-#define CEPH_MON_PORT    6789  /* default monitor port */
-
-/*
- * client-side processes will try to bind to ports in this
- * range, simply for the benefit of tools like nmap or wireshark
- * that would like to identify the protocol.
- */
-#define CEPH_PORT_FIRST  6789
-#define CEPH_PORT_START  6800  /* non-monitors start here */
-#define CEPH_PORT_LAST   6900
-
-/*
- * tcp connection banner.  include a protocol version. and adjust
- * whenever the wire protocol changes.  try to keep this string length
- * constant.
- */
-#define CEPH_BANNER "ceph v027"
-#define CEPH_BANNER_MAX_LEN 30
-
-
-/*
- * Rollover-safe type and comparator for 32-bit sequence numbers.
- * Comparator returns -1, 0, or 1.
- */
-typedef __u32 ceph_seq_t;
-
-static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
-{
-       return (__s32)a - (__s32)b;
-}
-
-
-/*
- * entity_name -- logical name for a process participating in the
- * network, e.g. 'mds0' or 'osd3'.
- */
-struct ceph_entity_name {
-	__u8 type;      /* CEPH_ENTITY_TYPE_* */
-	__le64 num;
-} __attribute__ ((packed));
-
-#define CEPH_ENTITY_TYPE_MON    0x01
-#define CEPH_ENTITY_TYPE_MDS    0x02
-#define CEPH_ENTITY_TYPE_OSD    0x04
-#define CEPH_ENTITY_TYPE_CLIENT 0x08
-#define CEPH_ENTITY_TYPE_AUTH   0x20
-
-#define CEPH_ENTITY_TYPE_ANY    0xFF
-
-extern const char *ceph_entity_type_name(int type);
-
-/*
- * entity_addr -- network address
- */
-struct ceph_entity_addr {
-	__le32 type;
-	__le32 nonce;  /* unique id for process (e.g. pid) */
-	struct sockaddr_storage in_addr;
-} __attribute__ ((packed));
-
-struct ceph_entity_inst {
-	struct ceph_entity_name name;
-	struct ceph_entity_addr addr;
-} __attribute__ ((packed));
-
-
-/* used by message exchange protocol */
-#define CEPH_MSGR_TAG_READY         1  /* server->client: ready for messages */
-#define CEPH_MSGR_TAG_RESETSESSION  2  /* server->client: reset, try again */
-#define CEPH_MSGR_TAG_WAIT          3  /* server->client: wait for racing
-					  incoming connection */
-#define CEPH_MSGR_TAG_RETRY_SESSION 4  /* server->client + cseq: try again
-					  with higher cseq */
-#define CEPH_MSGR_TAG_RETRY_GLOBAL  5  /* server->client + gseq: try again
-					  with higher gseq */
-#define CEPH_MSGR_TAG_CLOSE         6  /* closing pipe */
-#define CEPH_MSGR_TAG_MSG           7  /* message */
-#define CEPH_MSGR_TAG_ACK           8  /* message ack */
-#define CEPH_MSGR_TAG_KEEPALIVE     9  /* just a keepalive byte! */
-#define CEPH_MSGR_TAG_BADPROTOVER  10  /* bad protocol version */
-#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
-#define CEPH_MSGR_TAG_FEATURES      12 /* insufficient features */
-
-
-/*
- * connection negotiation
- */
-struct ceph_msg_connect {
-	__le64 features;     /* supported feature bits */
-	__le32 host_type;    /* CEPH_ENTITY_TYPE_* */
-	__le32 global_seq;   /* count connections initiated by this host */
-	__le32 connect_seq;  /* count connections initiated in this session */
-	__le32 protocol_version;
-	__le32 authorizer_protocol;
-	__le32 authorizer_len;
-	__u8  flags;         /* CEPH_MSG_CONNECT_* */
-} __attribute__ ((packed));
-
-struct ceph_msg_connect_reply {
-	__u8 tag;
-	__le64 features;     /* feature bits for this session */
-	__le32 global_seq;
-	__le32 connect_seq;
-	__le32 protocol_version;
-	__le32 authorizer_len;
-	__u8 flags;
-} __attribute__ ((packed));
-
-#define CEPH_MSG_CONNECT_LOSSY  1  /* messages i send may be safely dropped */
-
-
-/*
- * message header
- */
-struct ceph_msg_header_old {
-	__le64 seq;       /* message seq# for this session */
-	__le64 tid;       /* transaction id */
-	__le16 type;      /* message type */
-	__le16 priority;  /* priority.  higher value == higher priority */
-	__le16 version;   /* version of message encoding */
-
-	__le32 front_len; /* bytes in main payload */
-	__le32 middle_len;/* bytes in middle payload */
-	__le32 data_len;  /* bytes of data payload */
-	__le16 data_off;  /* sender: include full offset;
-			     receiver: mask against ~PAGE_MASK */
-
-	struct ceph_entity_inst src, orig_src;
-	__le32 reserved;
-	__le32 crc;       /* header crc32c */
-} __attribute__ ((packed));
-
-struct ceph_msg_header {
-	__le64 seq;       /* message seq# for this session */
-	__le64 tid;       /* transaction id */
-	__le16 type;      /* message type */
-	__le16 priority;  /* priority.  higher value == higher priority */
-	__le16 version;   /* version of message encoding */
-
-	__le32 front_len; /* bytes in main payload */
-	__le32 middle_len;/* bytes in middle payload */
-	__le32 data_len;  /* bytes of data payload */
-	__le16 data_off;  /* sender: include full offset;
-			     receiver: mask against ~PAGE_MASK */
-
-	struct ceph_entity_name src;
-	__le32 reserved;
-	__le32 crc;       /* header crc32c */
-} __attribute__ ((packed));
-
-#define CEPH_MSG_PRIO_LOW     64
-#define CEPH_MSG_PRIO_DEFAULT 127
-#define CEPH_MSG_PRIO_HIGH    196
-#define CEPH_MSG_PRIO_HIGHEST 255
-
-/*
- * follows data payload
- */
-struct ceph_msg_footer {
-	__le32 front_crc, middle_crc, data_crc;
-	__u8 flags;
-} __attribute__ ((packed));
-
-#define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
-#define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
-
-
-#endif
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
deleted file mode 100644
index 0edb43f1ef26..000000000000
--- a/fs/ceph/osd_client.c
+++ /dev/null
@@ -1,1771 +0,0 @@
-#include "ceph_debug.h"
-
-#include <linux/err.h>
-#include <linux/highmem.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#ifdef CONFIG_BLOCK
-#include <linux/bio.h>
-#endif
-
-#include "super.h"
-#include "osd_client.h"
-#include "messenger.h"
-#include "decode.h"
-#include "auth.h"
-#include "pagelist.h"
-
-#define OSD_OP_FRONT_LEN	4096
-#define OSD_OPREPLY_FRONT_LEN	512
-
-static const struct ceph_connection_operations osd_con_ops;
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd);
-
-static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
-
-static int op_needs_trail(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_CALL:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static int op_has_extent(int op)
-{
-	return (op == CEPH_OSD_OP_READ ||
-		op == CEPH_OSD_OP_WRITE);
-}
-
-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
-			u64 snapid,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
-{
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	u64 orig_len = *plen;
-	u64 objoff, objlen;    /* extent in object */
-
-	reqhead->snapid = cpu_to_le64(snapid);
-
-	/* object extent? */
-	ceph_calc_file_object_mapping(layout, off, plen, bno,
-				      &objoff, &objlen);
-	if (*plen < orig_len)
-		dout(" skipping last %llu, final file extent %llu~%llu\n",
-		     orig_len - *plen, off, *plen);
-
-	if (op_has_extent(op->op)) {
-		op->extent.offset = objoff;
-		op->extent.length = objlen;
-	}
-	req->r_num_pages = calc_pages_for(off, *plen);
-
-	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-	     *bno, objoff, objlen, req->r_num_pages);
-
-}
-
-/*
- * Implement client access to distributed object storage cluster.
- *
- * All data objects are stored within a cluster/cloud of OSDs, or
- * "object storage devices."  (Note that Ceph OSDs have _nothing_ to
- * do with the T10 OSD extensions to SCSI.)  Ceph OSDs are simply
- * remote daemons serving up and coordinating consistent and safe
- * access to storage.
- *
- * Cluster membership and the mapping of data objects onto storage devices
- * are described by the osd map.
- *
- * We keep track of pending OSD requests (read, write), resubmit
- * requests to different OSDs when the cluster topology/data layout
- * change, or retry the affected requests when the communications
- * channel with an OSD is reset.
- */
-
-/*
- * calculate the mapping of a file extent onto an object, and fill out the
- * request accordingly.  shorten extent as necessary if it crosses an
- * object boundary.
- *
- * fill osd op in request message.
- */
-static void calc_layout(struct ceph_osd_client *osdc,
-			struct ceph_vino vino,
-			struct ceph_file_layout *layout,
-			u64 off, u64 *plen,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
-{
-	u64 bno;
-
-	ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-			     plen, &bno, req, op);
-
-	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
-	req->r_oid_len = strlen(req->r_oid);
-}
-
-/*
- * requests
- */
-void ceph_osdc_release_request(struct kref *kref)
-{
-	struct ceph_osd_request *req = container_of(kref,
-						    struct ceph_osd_request,
-						    r_kref);
-
-	if (req->r_request)
-		ceph_msg_put(req->r_request);
-	if (req->r_reply)
-		ceph_msg_put(req->r_reply);
-	if (req->r_con_filling_msg) {
-		dout("release_request revoking pages %p from con %p\n",
-		     req->r_pages, req->r_con_filling_msg);
-		ceph_con_revoke_message(req->r_con_filling_msg,
-				      req->r_reply);
-		ceph_con_put(req->r_con_filling_msg);
-	}
-	if (req->r_own_pages)
-		ceph_release_page_vector(req->r_pages,
-					 req->r_num_pages);
-#ifdef CONFIG_BLOCK
-	if (req->r_bio)
-		bio_put(req->r_bio);
-#endif
-	ceph_put_snap_context(req->r_snapc);
-	if (req->r_trail) {
-		ceph_pagelist_release(req->r_trail);
-		kfree(req->r_trail);
-	}
-	if (req->r_mempool)
-		mempool_free(req, req->r_osdc->req_mempool);
-	else
-		kfree(req);
-}
-
-static int op_needs_trail(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_CALL:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
-{
-	int i = 0;
-
-	if (needs_trail)
-		*needs_trail = 0;
-	while (ops[i].op) {
-		if (needs_trail && op_needs_trail(ops[i].op))
-			*needs_trail = 1;
-		i++;
-	}
-
-	return i;
-}
-
-struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
-					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
-					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio)
-{
-	struct ceph_osd_request *req;
-	struct ceph_msg *msg;
-	int needs_trail;
-	int num_op = get_num_ops(ops, &needs_trail);
-	size_t msg_size = sizeof(struct ceph_osd_request_head);
-
-	msg_size += num_op*sizeof(struct ceph_osd_op);
-
-	if (use_mempool) {
-		req = mempool_alloc(osdc->req_mempool, gfp_flags);
-		memset(req, 0, sizeof(*req));
-	} else {
-		req = kzalloc(sizeof(*req), gfp_flags);
-	}
-	if (req == NULL)
-		return NULL;
-
-	req->r_osdc = osdc;
-	req->r_mempool = use_mempool;
-
-	kref_init(&req->r_kref);
-	init_completion(&req->r_completion);
-	init_completion(&req->r_safe_completion);
-	INIT_LIST_HEAD(&req->r_unsafe_item);
-	req->r_flags = flags;
-
-	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
-	/* create reply message */
-	if (use_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
-	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
-				   OSD_OPREPLY_FRONT_LEN, gfp_flags);
-	if (!msg) {
-		ceph_osdc_put_request(req);
-		return NULL;
-	}
-	req->r_reply = msg;
-
-	/* allocate space for the trailing data */
-	if (needs_trail) {
-		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-		if (!req->r_trail) {
-			ceph_osdc_put_request(req);
-			return NULL;
-		}
-		ceph_pagelist_init(req->r_trail);
-	}
-	/* create request message; allow space for oid */
-	msg_size += 40;
-	if (snapc)
-		msg_size += sizeof(u64) * snapc->num_snaps;
-	if (use_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
-	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags);
-	if (!msg) {
-		ceph_osdc_put_request(req);
-		return NULL;
-	}
-
-	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
-	memset(msg->front.iov_base, 0, msg->front.iov_len);
-
-	req->r_request = msg;
-	req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-	if (bio) {
-		req->r_bio = bio;
-		bio_get(req->r_bio);
-	}
-#endif
-
-	return req;
-}
-
-static void osd_req_encode_op(struct ceph_osd_request *req,
-			      struct ceph_osd_op *dst,
-			      struct ceph_osd_req_op *src)
-{
-	dst->op = cpu_to_le16(src->op);
-
-	switch (dst->op) {
-	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_WRITE:
-		dst->extent.offset =
-			cpu_to_le64(src->extent.offset);
-		dst->extent.length =
-			cpu_to_le64(src->extent.length);
-		dst->extent.truncate_size =
-			cpu_to_le64(src->extent.truncate_size);
-		dst->extent.truncate_seq =
-			cpu_to_le32(src->extent.truncate_seq);
-		break;
-
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-		BUG_ON(!req->r_trail);
-
-		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
-		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
-		dst->xattr.cmp_op = src->xattr.cmp_op;
-		dst->xattr.cmp_mode = src->xattr.cmp_mode;
-		ceph_pagelist_append(req->r_trail, src->xattr.name,
-				     src->xattr.name_len);
-		ceph_pagelist_append(req->r_trail, src->xattr.val,
-				     src->xattr.value_len);
-		break;
-	case CEPH_OSD_OP_CALL:
-		BUG_ON(!req->r_trail);
-
-		dst->cls.class_len = src->cls.class_len;
-		dst->cls.method_len = src->cls.method_len;
-		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
-
-		ceph_pagelist_append(req->r_trail, src->cls.class_name,
-				     src->cls.class_len);
-		ceph_pagelist_append(req->r_trail, src->cls.method_name,
-				     src->cls.method_len);
-		ceph_pagelist_append(req->r_trail, src->cls.indata,
-				     src->cls.indata_len);
-		break;
-	case CEPH_OSD_OP_ROLLBACK:
-		dst->snap.snapid = cpu_to_le64(src->snap.snapid);
-		break;
-	case CEPH_OSD_OP_STARTSYNC:
-		break;
-	default:
-		pr_err("unrecognized osd opcode %d\n", dst->op);
-		WARN_ON(1);
-		break;
-	}
-	dst->payload_len = cpu_to_le32(src->payload_len);
-}
-
-/*
- * build new request AND message
- *
- */
-void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 *plen,
-			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc,
-			     struct timespec *mtime,
-			     const char *oid,
-			     int oid_len)
-{
-	struct ceph_msg *msg = req->r_request;
-	struct ceph_osd_request_head *head;
-	struct ceph_osd_req_op *src_op;
-	struct ceph_osd_op *op;
-	void *p;
-	int num_op = get_num_ops(src_ops, NULL);
-	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
-	int flags = req->r_flags;
-	u64 data_len = 0;
-	int i;
-
-	head = msg->front.iov_base;
-	op = (void *)(head + 1);
-	p = (void *)(op + num_op);
-
-	req->r_snapc = ceph_get_snap_context(snapc);
-
-	head->client_inc = cpu_to_le32(1); /* always, for now. */
-	head->flags = cpu_to_le32(flags);
-	if (flags & CEPH_OSD_FLAG_WRITE)
-		ceph_encode_timespec(&head->mtime, mtime);
-	head->num_ops = cpu_to_le16(num_op);
-
-
-	/* fill in oid */
-	head->object_len = cpu_to_le32(oid_len);
-	memcpy(p, oid, oid_len);
-	p += oid_len;
-
-	src_op = src_ops;
-	while (src_op->op) {
-		osd_req_encode_op(req, op, src_op);
-		src_op++;
-		op++;
-	}
-
-	if (req->r_trail)
-		data_len += req->r_trail->length;
-
-	if (snapc) {
-		head->snap_seq = cpu_to_le64(snapc->seq);
-		head->num_snaps = cpu_to_le32(snapc->num_snaps);
-		for (i = 0; i < snapc->num_snaps; i++) {
-			put_unaligned_le64(snapc->snaps[i], p);
-			p += sizeof(u64);
-		}
-	}
-
-	if (flags & CEPH_OSD_FLAG_WRITE) {
-		req->r_request->hdr.data_off = cpu_to_le16(off);
-		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
-	} else if (data_len) {
-		req->r_request->hdr.data_off = 0;
-		req->r_request->hdr.data_len = cpu_to_le32(data_len);
-	}
-
-	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
-	msg_size = p - msg->front.iov_base;
-	msg->front.iov_len = msg_size;
-	msg->hdr.front_len = cpu_to_le32(msg_size);
-	return;
-}
-
-/*
- * build new request AND message, calculate layout, and adjust file
- * extent as needed.
- *
- * if the file was recently truncated, we include information about its
- * old and new size so that the object can be updated appropriately.  (we
- * avoid synchronously deleting truncated objects because it's slow.)
- *
- * if @do_sync, include a 'startsync' command so that the osd will flush
- * data quickly.
- */
-struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
-					       struct ceph_file_layout *layout,
-					       struct ceph_vino vino,
-					       u64 off, u64 *plen,
-					       int opcode, int flags,
-					       struct ceph_snap_context *snapc,
-					       int do_sync,
-					       u32 truncate_seq,
-					       u64 truncate_size,
-					       struct timespec *mtime,
-					       bool use_mempool, int num_reply)
-{
-	struct ceph_osd_req_op ops[3];
-	struct ceph_osd_request *req;
-
-	ops[0].op = opcode;
-	ops[0].extent.truncate_seq = truncate_seq;
-	ops[0].extent.truncate_size = truncate_size;
-	ops[0].payload_len = 0;
-
-	if (do_sync) {
-		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		ops[1].payload_len = 0;
-		ops[2].op = 0;
-	} else
-		ops[1].op = 0;
-
-	req = ceph_osdc_alloc_request(osdc, flags,
-					 snapc, ops,
-					 use_mempool,
-					 GFP_NOFS, NULL, NULL);
-	if (IS_ERR(req))
-		return req;
-
-	/* calculate max write size */
-	calc_layout(osdc, vino, layout, off, plen, req, ops);
-	req->r_file_layout = *layout;  /* keep a copy */
-
-	ceph_osdc_build_request(req, off, plen, ops,
-				snapc,
-				mtime,
-				req->r_oid, req->r_oid_len);
-
-	return req;
-}
-
-/*
- * We keep osd requests in an rbtree, sorted by ->r_tid.
- */
-static void __insert_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *new)
-{
-	struct rb_node **p = &osdc->requests.rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_osd_request *req = NULL;
-
-	while (*p) {
-		parent = *p;
-		req = rb_entry(parent, struct ceph_osd_request, r_node);
-		if (new->r_tid < req->r_tid)
-			p = &(*p)->rb_left;
-		else if (new->r_tid > req->r_tid)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->r_node, parent, p);
-	rb_insert_color(&new->r_node, &osdc->requests);
-}
-
-static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc,
-						 u64 tid)
-{
-	struct ceph_osd_request *req;
-	struct rb_node *n = osdc->requests.rb_node;
-
-	while (n) {
-		req = rb_entry(n, struct ceph_osd_request, r_node);
-		if (tid < req->r_tid)
-			n = n->rb_left;
-		else if (tid > req->r_tid)
-			n = n->rb_right;
-		else
-			return req;
-	}
-	return NULL;
-}
-
-static struct ceph_osd_request *
-__lookup_request_ge(struct ceph_osd_client *osdc,
-		    u64 tid)
-{
-	struct ceph_osd_request *req;
-	struct rb_node *n = osdc->requests.rb_node;
-
-	while (n) {
-		req = rb_entry(n, struct ceph_osd_request, r_node);
-		if (tid < req->r_tid) {
-			if (!n->rb_left)
-				return req;
-			n = n->rb_left;
-		} else if (tid > req->r_tid) {
-			n = n->rb_right;
-		} else {
-			return req;
-		}
-	}
-	return NULL;
-}
-
-
-/*
- * If the osd connection drops, we need to resubmit all requests.
- */
-static void osd_reset(struct ceph_connection *con)
-{
-	struct ceph_osd *osd = con->private;
-	struct ceph_osd_client *osdc;
-
-	if (!osd)
-		return;
-	dout("osd_reset osd%d\n", osd->o_osd);
-	osdc = osd->o_osdc;
-	down_read(&osdc->map_sem);
-	kick_requests(osdc, osd);
-	up_read(&osdc->map_sem);
-}
-
-/*
- * Track open sessions with osds.
- */
-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
-{
-	struct ceph_osd *osd;
-
-	osd = kzalloc(sizeof(*osd), GFP_NOFS);
-	if (!osd)
-		return NULL;
-
-	atomic_set(&osd->o_ref, 1);
-	osd->o_osdc = osdc;
-	INIT_LIST_HEAD(&osd->o_requests);
-	INIT_LIST_HEAD(&osd->o_osd_lru);
-	osd->o_incarnation = 1;
-
-	ceph_con_init(osdc->client->msgr, &osd->o_con);
-	osd->o_con.private = osd;
-	osd->o_con.ops = &osd_con_ops;
-	osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
-
-	INIT_LIST_HEAD(&osd->o_keepalive_item);
-	return osd;
-}
-
-static struct ceph_osd *get_osd(struct ceph_osd *osd)
-{
-	if (atomic_inc_not_zero(&osd->o_ref)) {
-		dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
-		     atomic_read(&osd->o_ref));
-		return osd;
-	} else {
-		dout("get_osd %p FAIL\n", osd);
-		return NULL;
-	}
-}
-
-static void put_osd(struct ceph_osd *osd)
-{
-	dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
-	     atomic_read(&osd->o_ref) - 1);
-	if (atomic_dec_and_test(&osd->o_ref)) {
-		struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
-
-		if (osd->o_authorizer)
-			ac->ops->destroy_authorizer(ac, osd->o_authorizer);
-		kfree(osd);
-	}
-}
-
-/*
- * remove an osd from our map
- */
-static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
-{
-	dout("__remove_osd %p\n", osd);
-	BUG_ON(!list_empty(&osd->o_requests));
-	rb_erase(&osd->o_node, &osdc->osds);
-	list_del_init(&osd->o_osd_lru);
-	ceph_con_close(&osd->o_con);
-	put_osd(osd);
-}
-
-static void __move_osd_to_lru(struct ceph_osd_client *osdc,
-			      struct ceph_osd *osd)
-{
-	dout("__move_osd_to_lru %p\n", osd);
-	BUG_ON(!list_empty(&osd->o_osd_lru));
-	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
-	osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
-}
-
-static void __remove_osd_from_lru(struct ceph_osd *osd)
-{
-	dout("__remove_osd_from_lru %p\n", osd);
-	if (!list_empty(&osd->o_osd_lru))
-		list_del_init(&osd->o_osd_lru);
-}
-
-static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
-{
-	struct ceph_osd *osd, *nosd;
-
-	dout("__remove_old_osds %p\n", osdc);
-	mutex_lock(&osdc->request_mutex);
-	list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
-		if (!remove_all && time_before(jiffies, osd->lru_ttl))
-			break;
-		__remove_osd(osdc, osd);
-	}
-	mutex_unlock(&osdc->request_mutex);
-}
-
-/*
- * reset osd connect
- */
-static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
-{
-	struct ceph_osd_request *req;
-	int ret = 0;
-
-	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
-	if (list_empty(&osd->o_requests)) {
-		__remove_osd(osdc, osd);
-	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
-			  &osd->o_con.peer_addr,
-			  sizeof(osd->o_con.peer_addr)) == 0 &&
-		   !ceph_con_opened(&osd->o_con)) {
-		dout(" osd addr hasn't changed and connection never opened,"
-		     " letting msgr retry");
-		/* touch each r_stamp for handle_timeout()'s benfit */
-		list_for_each_entry(req, &osd->o_requests, r_osd_item)
-			req->r_stamp = jiffies;
-		ret = -EAGAIN;
-	} else {
-		ceph_con_close(&osd->o_con);
-		ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
-		osd->o_incarnation++;
-	}
-	return ret;
-}
-
-static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
-{
-	struct rb_node **p = &osdc->osds.rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_osd *osd = NULL;
-
-	while (*p) {
-		parent = *p;
-		osd = rb_entry(parent, struct ceph_osd, o_node);
-		if (new->o_osd < osd->o_osd)
-			p = &(*p)->rb_left;
-		else if (new->o_osd > osd->o_osd)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->o_node, parent, p);
-	rb_insert_color(&new->o_node, &osdc->osds);
-}
-
-static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
-{
-	struct ceph_osd *osd;
-	struct rb_node *n = osdc->osds.rb_node;
-
-	while (n) {
-		osd = rb_entry(n, struct ceph_osd, o_node);
-		if (o < osd->o_osd)
-			n = n->rb_left;
-		else if (o > osd->o_osd)
-			n = n->rb_right;
-		else
-			return osd;
-	}
-	return NULL;
-}
-
-static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
-{
-	schedule_delayed_work(&osdc->timeout_work,
-			osdc->client->mount_args->osd_keepalive_timeout * HZ);
-}
-
-static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
-{
-	cancel_delayed_work(&osdc->timeout_work);
-}
-
-/*
- * Register request, assign tid.  If this is the first request, set up
- * the timeout event.
- */
-static void register_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *req)
-{
-	mutex_lock(&osdc->request_mutex);
-	req->r_tid = ++osdc->last_tid;
-	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
-	INIT_LIST_HEAD(&req->r_req_lru_item);
-
-	dout("register_request %p tid %lld\n", req, req->r_tid);
-	__insert_request(osdc, req);
-	ceph_osdc_get_request(req);
-	osdc->num_requests++;
-
-	if (osdc->num_requests == 1) {
-		dout(" first request, scheduling timeout\n");
-		__schedule_osd_timeout(osdc);
-	}
-	mutex_unlock(&osdc->request_mutex);
-}
-
-/*
- * called under osdc->request_mutex
- */
-static void __unregister_request(struct ceph_osd_client *osdc,
-				 struct ceph_osd_request *req)
-{
-	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
-	rb_erase(&req->r_node, &osdc->requests);
-	osdc->num_requests--;
-
-	if (req->r_osd) {
-		/* make sure the original request isn't in flight. */
-		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
-
-		list_del_init(&req->r_osd_item);
-		if (list_empty(&req->r_osd->o_requests))
-			__move_osd_to_lru(osdc, req->r_osd);
-		req->r_osd = NULL;
-	}
-
-	ceph_osdc_put_request(req);
-
-	list_del_init(&req->r_req_lru_item);
-	if (osdc->num_requests == 0) {
-		dout(" no requests, canceling timeout\n");
-		__cancel_osd_timeout(osdc);
-	}
-}
-
-/*
- * Cancel a previously queued request message
- */
-static void __cancel_request(struct ceph_osd_request *req)
-{
-	if (req->r_sent && req->r_osd) {
-		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
-		req->r_sent = 0;
-	}
-	list_del_init(&req->r_req_lru_item);
-}
-
-/*
- * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
- * (as needed), and set the request r_osd appropriately.  If there is
- * no up osd, set r_osd to NULL.
- *
- * Return 0 if unchanged, 1 if changed, or negative on error.
- *
- * Caller should hold map_sem for read and request_mutex.
- */
-static int __map_osds(struct ceph_osd_client *osdc,
-		      struct ceph_osd_request *req)
-{
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	struct ceph_pg pgid;
-	int acting[CEPH_PG_MAX_SIZE];
-	int o = -1, num = 0;
-	int err;
-
-	dout("map_osds %p tid %lld\n", req, req->r_tid);
-	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
-				      &req->r_file_layout, osdc->osdmap);
-	if (err)
-		return err;
-	pgid = reqhead->layout.ol_pgid;
-	req->r_pgid = pgid;
-
-	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
-	if (err > 0) {
-		o = acting[0];
-		num = err;
-	}
-
-	if ((req->r_osd && req->r_osd->o_osd == o &&
-	     req->r_sent >= req->r_osd->o_incarnation &&
-	     req->r_num_pg_osds == num &&
-	     memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
-	    (req->r_osd == NULL && o == -1))
-		return 0;  /* no change */
-
-	dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
-	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
-	     req->r_osd ? req->r_osd->o_osd : -1);
-
-	/* record full pg acting set */
-	memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
-	req->r_num_pg_osds = num;
-
-	if (req->r_osd) {
-		__cancel_request(req);
-		list_del_init(&req->r_osd_item);
-		req->r_osd = NULL;
-	}
-
-	req->r_osd = __lookup_osd(osdc, o);
-	if (!req->r_osd && o >= 0) {
-		err = -ENOMEM;
-		req->r_osd = create_osd(osdc);
-		if (!req->r_osd)
-			goto out;
-
-		dout("map_osds osd %p is osd%d\n", req->r_osd, o);
-		req->r_osd->o_osd = o;
-		req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
-		__insert_osd(osdc, req->r_osd);
-
-		ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
-	}
-
-	if (req->r_osd) {
-		__remove_osd_from_lru(req->r_osd);
-		list_add(&req->r_osd_item, &req->r_osd->o_requests);
-	}
-	err = 1;   /* osd or pg changed */
-
-out:
-	return err;
-}
-
-/*
- * caller should hold map_sem (for read) and request_mutex
- */
-static int __send_request(struct ceph_osd_client *osdc,
-			  struct ceph_osd_request *req)
-{
-	struct ceph_osd_request_head *reqhead;
-	int err;
-
-	err = __map_osds(osdc, req);
-	if (err < 0)
-		return err;
-	if (req->r_osd == NULL) {
-		dout("send_request %p no up osds in pg\n", req);
-		ceph_monc_request_next_osdmap(&osdc->client->monc);
-		return 0;
-	}
-
-	dout("send_request %p tid %llu to osd%d flags %d\n",
-	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
-
-	reqhead = req->r_request->front.iov_base;
-	reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
-	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
-	reqhead->reassert_version = req->r_reassert_version;
-
-	req->r_stamp = jiffies;
-	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
-
-	ceph_msg_get(req->r_request); /* send consumes a ref */
-	ceph_con_send(&req->r_osd->o_con, req->r_request);
-	req->r_sent = req->r_osd->o_incarnation;
-	return 0;
-}
-
-/*
- * Timeout callback, called every N seconds when 1 or more osd
- * requests has been active for more than N seconds.  When this
- * happens, we ping all OSDs with requests who have timed out to
- * ensure any communications channel reset is detected.  Reset the
- * request timeouts another N seconds in the future as we go.
- * Reschedule the timeout event another N seconds in future (unless
- * there are no open requests).
- */
-static void handle_timeout(struct work_struct *work)
-{
-	struct ceph_osd_client *osdc =
-		container_of(work, struct ceph_osd_client, timeout_work.work);
-	struct ceph_osd_request *req, *last_req = NULL;
-	struct ceph_osd *osd;
-	unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
-	unsigned long keepalive =
-		osdc->client->mount_args->osd_keepalive_timeout * HZ;
-	unsigned long last_stamp = 0;
-	struct rb_node *p;
-	struct list_head slow_osds;
-
-	dout("timeout\n");
-	down_read(&osdc->map_sem);
-
-	ceph_monc_request_next_osdmap(&osdc->client->monc);
-
-	mutex_lock(&osdc->request_mutex);
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			int err;
-
-			dout("osdc resending prev failed %lld\n", req->r_tid);
-			err = __send_request(osdc, req);
-			if (err)
-				dout("osdc failed again on %lld\n", req->r_tid);
-			else
-				req->r_resend = false;
-			continue;
-		}
-	}
-
-	/*
-	 * reset osds that appear to be _really_ unresponsive.  this
-	 * is a failsafe measure.. we really shouldn't be getting to
-	 * this point if the system is working properly.  the monitors
-	 * should mark the osd as failed and we should find out about
-	 * it from an updated osd map.
-	 */
-	while (timeout && !list_empty(&osdc->req_lru)) {
-		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-				 r_req_lru_item);
-
-		if (time_before(jiffies, req->r_stamp + timeout))
-			break;
-
-		BUG_ON(req == last_req && req->r_stamp == last_stamp);
-		last_req = req;
-		last_stamp = req->r_stamp;
-
-		osd = req->r_osd;
-		BUG_ON(!osd);
-		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-			   req->r_tid, osd->o_osd);
-		__kick_requests(osdc, osd);
-	}
-
-	/*
-	 * ping osds that are a bit slow.  this ensures that if there
-	 * is a break in the TCP connection we will notice, and reopen
-	 * a connection with that osd (from the fault callback).
-	 */
-	INIT_LIST_HEAD(&slow_osds);
-	list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
-		if (time_before(jiffies, req->r_stamp + keepalive))
-			break;
-
-		osd = req->r_osd;
-		BUG_ON(!osd);
-		dout(" tid %llu is slow, will send keepalive on osd%d\n",
-		     req->r_tid, osd->o_osd);
-		list_move_tail(&osd->o_keepalive_item, &slow_osds);
-	}
-	while (!list_empty(&slow_osds)) {
-		osd = list_entry(slow_osds.next, struct ceph_osd,
-				 o_keepalive_item);
-		list_del_init(&osd->o_keepalive_item);
-		ceph_con_keepalive(&osd->o_con);
-	}
-
-	__schedule_osd_timeout(osdc);
-	mutex_unlock(&osdc->request_mutex);
-
-	up_read(&osdc->map_sem);
-}
-
-static void handle_osds_timeout(struct work_struct *work)
-{
-	struct ceph_osd_client *osdc =
-		container_of(work, struct ceph_osd_client,
-			     osds_timeout_work.work);
-	unsigned long delay =
-		osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
-
-	dout("osds timeout\n");
-	down_read(&osdc->map_sem);
-	remove_old_osds(osdc, 0);
-	up_read(&osdc->map_sem);
-
-	schedule_delayed_work(&osdc->osds_timeout_work,
-			      round_jiffies_relative(delay));
-}
-
-/*
- * handle osd op reply.  either call the callback if it is specified,
- * or do the completion to wake up the waiting thread.
- */
-static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
-			 struct ceph_connection *con)
-{
-	struct ceph_osd_reply_head *rhead = msg->front.iov_base;
-	struct ceph_osd_request *req;
-	u64 tid;
-	int numops, object_len, flags;
-	s32 result;
-
-	tid = le64_to_cpu(msg->hdr.tid);
-	if (msg->front.iov_len < sizeof(*rhead))
-		goto bad;
-	numops = le32_to_cpu(rhead->num_ops);
-	object_len = le32_to_cpu(rhead->object_len);
-	result = le32_to_cpu(rhead->result);
-	if (msg->front.iov_len != sizeof(*rhead) + object_len +
-	    numops * sizeof(struct ceph_osd_op))
-		goto bad;
-	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
-
-	/* lookup */
-	mutex_lock(&osdc->request_mutex);
-	req = __lookup_request(osdc, tid);
-	if (req == NULL) {
-		dout("handle_reply tid %llu dne\n", tid);
-		mutex_unlock(&osdc->request_mutex);
-		return;
-	}
-	ceph_osdc_get_request(req);
-	flags = le32_to_cpu(rhead->flags);
-
-	/*
-	 * if this connection filled our message, drop our reference now, to
-	 * avoid a (safe but slower) revoke later.
-	 */
-	if (req->r_con_filling_msg == con && req->r_reply == msg) {
-		dout(" dropping con_filling_msg ref %p\n", con);
-		req->r_con_filling_msg = NULL;
-		ceph_con_put(con);
-	}
-
-	if (!req->r_got_reply) {
-		unsigned bytes;
-
-		req->r_result = le32_to_cpu(rhead->result);
-		bytes = le32_to_cpu(msg->hdr.data_len);
-		dout("handle_reply result %d bytes %d\n", req->r_result,
-		     bytes);
-		if (req->r_result == 0)
-			req->r_result = bytes;
-
-		/* in case this is a write and we need to replay, */
-		req->r_reassert_version = rhead->reassert_version;
-
-		req->r_got_reply = 1;
-	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
-		dout("handle_reply tid %llu dup ack\n", tid);
-		mutex_unlock(&osdc->request_mutex);
-		goto done;
-	}
-
-	dout("handle_reply tid %llu flags %d\n", tid, flags);
-
-	/* either this is a read, or we got the safe response */
-	if (result < 0 ||
-	    (flags & CEPH_OSD_FLAG_ONDISK) ||
-	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
-		__unregister_request(osdc, req);
-
-	mutex_unlock(&osdc->request_mutex);
-
-	if (req->r_callback)
-		req->r_callback(req, msg);
-	else
-		complete_all(&req->r_completion);
-
-	if (flags & CEPH_OSD_FLAG_ONDISK) {
-		if (req->r_safe_callback)
-			req->r_safe_callback(req, msg);
-		complete_all(&req->r_safe_completion);  /* fsync waiter */
-	}
-
-done:
-	ceph_osdc_put_request(req);
-	return;
-
-bad:
-	pr_err("corrupt osd_op_reply got %d %d expected %d\n",
-	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len),
-	       (int)sizeof(*rhead));
-	ceph_msg_dump(msg);
-}
-
-
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
-{
-	struct ceph_osd_request *req;
-	struct rb_node *p, *n;
-	int needmap = 0;
-	int err;
-
-	dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
-	if (kickosd) {
-		err = __reset_osd(osdc, kickosd);
-		if (err == -EAGAIN)
-			return 1;
-	} else {
-		for (p = rb_first(&osdc->osds); p; p = n) {
-			struct ceph_osd *osd =
-				rb_entry(p, struct ceph_osd, o_node);
-
-			n = rb_next(p);
-			if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
-			    memcmp(&osd->o_con.peer_addr,
-				   ceph_osd_addr(osdc->osdmap,
-						 osd->o_osd),
-				   sizeof(struct ceph_entity_addr)) != 0)
-				__reset_osd(osdc, osd);
-		}
-	}
-
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			dout(" r_resend set on tid %llu\n", req->r_tid);
-			__cancel_request(req);
-			goto kick;
-		}
-		if (req->r_osd && kickosd == req->r_osd) {
-			__cancel_request(req);
-			goto kick;
-		}
-
-		err = __map_osds(osdc, req);
-		if (err == 0)
-			continue;  /* no change */
-		if (err < 0) {
-			/*
-			 * FIXME: really, we should set the request
-			 * error and fail if this isn't a 'nofail'
-			 * request, but that's a fair bit more
-			 * complicated to do.  So retry!
-			 */
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-			continue;
-		}
-		if (req->r_osd == NULL) {
-			dout("tid %llu maps to no valid osd\n", req->r_tid);
-			needmap++;  /* request a newer map */
-			continue;
-		}
-
-kick:
-		dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
-		     req->r_osd ? req->r_osd->o_osd : -1);
-		req->r_flags |= CEPH_OSD_FLAG_RETRY;
-		err = __send_request(osdc, req);
-		if (err) {
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-		}
-	}
-
-	return needmap;
-}
-
-/*
- * Resubmit osd requests whose osd or osd address has changed.  Request
- * a new osd map if osds are down, or we are otherwise unable to determine
- * how to direct a request.
- *
- * Close connections to down osds.
- *
- * If @who is specified, resubmit requests for that specific osd.
- *
- * Caller should hold map_sem for read and request_mutex.
- */
-static void kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
-{
-	int needmap;
-
-	mutex_lock(&osdc->request_mutex);
-	needmap = __kick_requests(osdc, kickosd);
-	mutex_unlock(&osdc->request_mutex);
-
-	if (needmap) {
-		dout("%d requests for down osds, need new map\n", needmap);
-		ceph_monc_request_next_osdmap(&osdc->client->monc);
-	}
-
-}
-/*
- * Process updated osd map.
- *
- * The message contains any number of incremental and full maps, normally
- * indicating some sort of topology change in the cluster.  Kick requests
- * off to different OSDs as needed.
- */
-void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
-{
-	void *p, *end, *next;
-	u32 nr_maps, maplen;
-	u32 epoch;
-	struct ceph_osdmap *newmap = NULL, *oldmap;
-	int err;
-	struct ceph_fsid fsid;
-
-	dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0);
-	p = msg->front.iov_base;
-	end = p + msg->front.iov_len;
-
-	/* verify fsid */
-	ceph_decode_need(&p, end, sizeof(fsid), bad);
-	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-	if (ceph_check_fsid(osdc->client, &fsid) < 0)
-		return;
-
-	down_write(&osdc->map_sem);
-
-	/* incremental maps */
-	ceph_decode_32_safe(&p, end, nr_maps, bad);
-	dout(" %d inc maps\n", nr_maps);
-	while (nr_maps > 0) {
-		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
-		epoch = ceph_decode_32(&p);
-		maplen = ceph_decode_32(&p);
-		ceph_decode_need(&p, end, maplen, bad);
-		next = p + maplen;
-		if (osdc->osdmap && osdc->osdmap->epoch+1 == epoch) {
-			dout("applying incremental map %u len %d\n",
-			     epoch, maplen);
-			newmap = osdmap_apply_incremental(&p, next,
-							  osdc->osdmap,
-							  osdc->client->msgr);
-			if (IS_ERR(newmap)) {
-				err = PTR_ERR(newmap);
-				goto bad;
-			}
-			BUG_ON(!newmap);
-			if (newmap != osdc->osdmap) {
-				ceph_osdmap_destroy(osdc->osdmap);
-				osdc->osdmap = newmap;
-			}
-		} else {
-			dout("ignoring incremental map %u len %d\n",
-			     epoch, maplen);
-		}
-		p = next;
-		nr_maps--;
-	}
-	if (newmap)
-		goto done;
-
-	/* full maps */
-	ceph_decode_32_safe(&p, end, nr_maps, bad);
-	dout(" %d full maps\n", nr_maps);
-	while (nr_maps) {
-		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
-		epoch = ceph_decode_32(&p);
-		maplen = ceph_decode_32(&p);
-		ceph_decode_need(&p, end, maplen, bad);
-		if (nr_maps > 1) {
-			dout("skipping non-latest full map %u len %d\n",
-			     epoch, maplen);
-		} else if (osdc->osdmap && osdc->osdmap->epoch >= epoch) {
-			dout("skipping full map %u len %d, "
-			     "older than our %u\n", epoch, maplen,
-			     osdc->osdmap->epoch);
-		} else {
-			dout("taking full map %u len %d\n", epoch, maplen);
-			newmap = osdmap_decode(&p, p+maplen);
-			if (IS_ERR(newmap)) {
-				err = PTR_ERR(newmap);
-				goto bad;
-			}
-			BUG_ON(!newmap);
-			oldmap = osdc->osdmap;
-			osdc->osdmap = newmap;
-			if (oldmap)
-				ceph_osdmap_destroy(oldmap);
-		}
-		p += maplen;
-		nr_maps--;
-	}
-
-done:
-	downgrade_write(&osdc->map_sem);
-	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
-	if (newmap)
-		kick_requests(osdc, NULL);
-	up_read(&osdc->map_sem);
-	wake_up_all(&osdc->client->auth_wq);
-	return;
-
-bad:
-	pr_err("osdc handle_map corrupt msg\n");
-	ceph_msg_dump(msg);
-	up_write(&osdc->map_sem);
-	return;
-}
-
-/*
- * Register request, send initial attempt.
- */
-int ceph_osdc_start_request(struct ceph_osd_client *osdc,
-			    struct ceph_osd_request *req,
-			    bool nofail)
-{
-	int rc = 0;
-
-	req->r_request->pages = req->r_pages;
-	req->r_request->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-	req->r_request->bio = req->r_bio;
-#endif
-	req->r_request->trail = req->r_trail;
-
-	register_request(osdc, req);
-
-	down_read(&osdc->map_sem);
-	mutex_lock(&osdc->request_mutex);
-	/*
-	 * a racing kick_requests() may have sent the message for us
-	 * while we dropped request_mutex above, so only send now if
-	 * the request still han't been touched yet.
-	 */
-	if (req->r_sent == 0) {
-		rc = __send_request(osdc, req);
-		if (rc) {
-			if (nofail) {
-				dout("osdc_start_request failed send, "
-				     " marking %lld\n", req->r_tid);
-				req->r_resend = true;
-				rc = 0;
-			} else {
-				__unregister_request(osdc, req);
-			}
-		}
-	}
-	mutex_unlock(&osdc->request_mutex);
-	up_read(&osdc->map_sem);
-	return rc;
-}
-
-/*
- * wait for a request to complete
- */
-int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
-			   struct ceph_osd_request *req)
-{
-	int rc;
-
-	rc = wait_for_completion_interruptible(&req->r_completion);
-	if (rc < 0) {
-		mutex_lock(&osdc->request_mutex);
-		__cancel_request(req);
-		__unregister_request(osdc, req);
-		mutex_unlock(&osdc->request_mutex);
-		dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
-		return rc;
-	}
-
-	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
-	return req->r_result;
-}
-
-/*
- * sync - wait for all in-flight requests to flush.  avoid starvation.
- */
-void ceph_osdc_sync(struct ceph_osd_client *osdc)
-{
-	struct ceph_osd_request *req;
-	u64 last_tid, next_tid = 0;
-
-	mutex_lock(&osdc->request_mutex);
-	last_tid = osdc->last_tid;
-	while (1) {
-		req = __lookup_request_ge(osdc, next_tid);
-		if (!req)
-			break;
-		if (req->r_tid > last_tid)
-			break;
-
-		next_tid = req->r_tid + 1;
-		if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0)
-			continue;
-
-		ceph_osdc_get_request(req);
-		mutex_unlock(&osdc->request_mutex);
-		dout("sync waiting on tid %llu (last is %llu)\n",
-		     req->r_tid, last_tid);
-		wait_for_completion(&req->r_safe_completion);
-		mutex_lock(&osdc->request_mutex);
-		ceph_osdc_put_request(req);
-	}
-	mutex_unlock(&osdc->request_mutex);
-	dout("sync done (thru tid %llu)\n", last_tid);
-}
-
-/*
- * init, shutdown
- */
-int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
-{
-	int err;
-
-	dout("init\n");
-	osdc->client = client;
-	osdc->osdmap = NULL;
-	init_rwsem(&osdc->map_sem);
-	init_completion(&osdc->map_waiters);
-	osdc->last_requested_map = 0;
-	mutex_init(&osdc->request_mutex);
-	osdc->last_tid = 0;
-	osdc->osds = RB_ROOT;
-	INIT_LIST_HEAD(&osdc->osd_lru);
-	osdc->requests = RB_ROOT;
-	INIT_LIST_HEAD(&osdc->req_lru);
-	osdc->num_requests = 0;
-	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
-	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
-
-	schedule_delayed_work(&osdc->osds_timeout_work,
-	   round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
-
-	err = -ENOMEM;
-	osdc->req_mempool = mempool_create_kmalloc_pool(10,
-					sizeof(struct ceph_osd_request));
-	if (!osdc->req_mempool)
-		goto out;
-
-	err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
-				"osd_op");
-	if (err < 0)
-		goto out_mempool;
-	err = ceph_msgpool_init(&osdc->msgpool_op_reply,
-				OSD_OPREPLY_FRONT_LEN, 10, true,
-				"osd_op_reply");
-	if (err < 0)
-		goto out_msgpool;
-	return 0;
-
-out_msgpool:
-	ceph_msgpool_destroy(&osdc->msgpool_op);
-out_mempool:
-	mempool_destroy(osdc->req_mempool);
-out:
-	return err;
-}
-
-void ceph_osdc_stop(struct ceph_osd_client *osdc)
-{
-	cancel_delayed_work_sync(&osdc->timeout_work);
-	cancel_delayed_work_sync(&osdc->osds_timeout_work);
-	if (osdc->osdmap) {
-		ceph_osdmap_destroy(osdc->osdmap);
-		osdc->osdmap = NULL;
-	}
-	remove_old_osds(osdc, 1);
-	mempool_destroy(osdc->req_mempool);
-	ceph_msgpool_destroy(&osdc->msgpool_op);
-	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
-}
-
-/*
- * Read some contiguous pages.  If we cross a stripe boundary, shorten
- * *plen.  Return number of bytes read, or error.
- */
-int ceph_osdc_readpages(struct ceph_osd_client *osdc,
-			struct ceph_vino vino, struct ceph_file_layout *layout,
-			u64 off, u64 *plen,
-			u32 truncate_seq, u64 truncate_size,
-			struct page **pages, int num_pages)
-{
-	struct ceph_osd_request *req;
-	int rc = 0;
-
-	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
-	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
-				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
-				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, 1);
-	if (!req)
-		return -ENOMEM;
-
-	/* it may be a short read due to an object boundary */
-	req->r_pages = pages;
-
-	dout("readpages  final extent is %llu~%llu (%d pages)\n",
-	     off, *plen, req->r_num_pages);
-
-	rc = ceph_osdc_start_request(osdc, req, false);
-	if (!rc)
-		rc = ceph_osdc_wait_request(osdc, req);
-
-	ceph_osdc_put_request(req);
-	dout("readpages result %d\n", rc);
-	return rc;
-}
-
-/*
- * do a synchronous write on N pages
- */
-int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
-			 struct ceph_file_layout *layout,
-			 struct ceph_snap_context *snapc,
-			 u64 off, u64 len,
-			 u32 truncate_seq, u64 truncate_size,
-			 struct timespec *mtime,
-			 struct page **pages, int num_pages,
-			 int flags, int do_sync, bool nofail)
-{
-	struct ceph_osd_request *req;
-	int rc = 0;
-
-	BUG_ON(vino.snap != CEPH_NOSNAP);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
-				    CEPH_OSD_OP_WRITE,
-				    flags | CEPH_OSD_FLAG_ONDISK |
-					    CEPH_OSD_FLAG_WRITE,
-				    snapc, do_sync,
-				    truncate_seq, truncate_size, mtime,
-				    nofail, 1);
-	if (!req)
-		return -ENOMEM;
-
-	/* it may be a short write due to an object boundary */
-	req->r_pages = pages;
-	dout("writepages %llu~%llu (%d pages)\n", off, len,
-	     req->r_num_pages);
-
-	rc = ceph_osdc_start_request(osdc, req, nofail);
-	if (!rc)
-		rc = ceph_osdc_wait_request(osdc, req);
-
-	ceph_osdc_put_request(req);
-	if (rc == 0)
-		rc = len;
-	dout("writepages result %d\n", rc);
-	return rc;
-}
-
-/*
- * handle incoming message
- */
-static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	struct ceph_osd *osd = con->private;
-	struct ceph_osd_client *osdc;
-	int type = le16_to_cpu(msg->hdr.type);
-
-	if (!osd)
-		goto out;
-	osdc = osd->o_osdc;
-
-	switch (type) {
-	case CEPH_MSG_OSD_MAP:
-		ceph_osdc_handle_map(osdc, msg);
-		break;
-	case CEPH_MSG_OSD_OPREPLY:
-		handle_reply(osdc, msg, con);
-		break;
-
-	default:
-		pr_err("received unknown message type %d %s\n", type,
-		       ceph_msg_type_name(type));
-	}
-out:
-	ceph_msg_put(msg);
-}
-
-/*
- * lookup and return message for incoming reply.  set up reply message
- * pages.
- */
-static struct ceph_msg *get_reply(struct ceph_connection *con,
-				  struct ceph_msg_header *hdr,
-				  int *skip)
-{
-	struct ceph_osd *osd = con->private;
-	struct ceph_osd_client *osdc = osd->o_osdc;
-	struct ceph_msg *m;
-	struct ceph_osd_request *req;
-	int front = le32_to_cpu(hdr->front_len);
-	int data_len = le32_to_cpu(hdr->data_len);
-	u64 tid;
-
-	tid = le64_to_cpu(hdr->tid);
-	mutex_lock(&osdc->request_mutex);
-	req = __lookup_request(osdc, tid);
-	if (!req) {
-		*skip = 1;
-		m = NULL;
-		pr_info("get_reply unknown tid %llu from osd%d\n", tid,
-			osd->o_osd);
-		goto out;
-	}
-
-	if (req->r_con_filling_msg) {
-		dout("get_reply revoking msg %p from old con %p\n",
-		     req->r_reply, req->r_con_filling_msg);
-		ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
-		ceph_con_put(req->r_con_filling_msg);
-		req->r_con_filling_msg = NULL;
-	}
-
-	if (front > req->r_reply->front.iov_len) {
-		pr_warning("get_reply front %d > preallocated %d\n",
-			   front, (int)req->r_reply->front.iov_len);
-		m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS);
-		if (!m)
-			goto out;
-		ceph_msg_put(req->r_reply);
-		req->r_reply = m;
-	}
-	m = ceph_msg_get(req->r_reply);
-
-	if (data_len > 0) {
-		unsigned data_off = le16_to_cpu(hdr->data_off);
-		int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
-
-		if (unlikely(req->r_num_pages < want)) {
-			pr_warning("tid %lld reply %d > expected %d pages\n",
-				   tid, want, m->nr_pages);
-			*skip = 1;
-			ceph_msg_put(m);
-			m = NULL;
-			goto out;
-		}
-		m->pages = req->r_pages;
-		m->nr_pages = req->r_num_pages;
-#ifdef CONFIG_BLOCK
-		m->bio = req->r_bio;
-#endif
-	}
-	*skip = 0;
-	req->r_con_filling_msg = ceph_con_get(con);
-	dout("get_reply tid %lld %p\n", tid, m);
-
-out:
-	mutex_unlock(&osdc->request_mutex);
-	return m;
-
-}
-
-static struct ceph_msg *alloc_msg(struct ceph_connection *con,
-				  struct ceph_msg_header *hdr,
-				  int *skip)
-{
-	struct ceph_osd *osd = con->private;
-	int type = le16_to_cpu(hdr->type);
-	int front = le32_to_cpu(hdr->front_len);
-
-	switch (type) {
-	case CEPH_MSG_OSD_MAP:
-		return ceph_msg_new(type, front, GFP_NOFS);
-	case CEPH_MSG_OSD_OPREPLY:
-		return get_reply(con, hdr, skip);
-	default:
-		pr_info("alloc_msg unexpected msg type %d from osd%d\n", type,
-			osd->o_osd);
-		*skip = 1;
-		return NULL;
-	}
-}
-
-/*
- * Wrappers to refcount containing ceph_osd struct
- */
-static struct ceph_connection *get_osd_con(struct ceph_connection *con)
-{
-	struct ceph_osd *osd = con->private;
-	if (get_osd(osd))
-		return con;
-	return NULL;
-}
-
-static void put_osd_con(struct ceph_connection *con)
-{
-	struct ceph_osd *osd = con->private;
-	put_osd(osd);
-}
-
-/*
- * authentication
- */
-static int get_authorizer(struct ceph_connection *con,
-			  void **buf, int *len, int *proto,
-			  void **reply_buf, int *reply_len, int force_new)
-{
-	struct ceph_osd *o = con->private;
-	struct ceph_osd_client *osdc = o->o_osdc;
-	struct ceph_auth_client *ac = osdc->client->monc.auth;
-	int ret = 0;
-
-	if (force_new && o->o_authorizer) {
-		ac->ops->destroy_authorizer(ac, o->o_authorizer);
-		o->o_authorizer = NULL;
-	}
-	if (o->o_authorizer == NULL) {
-		ret = ac->ops->create_authorizer(
-			ac, CEPH_ENTITY_TYPE_OSD,
-			&o->o_authorizer,
-			&o->o_authorizer_buf,
-			&o->o_authorizer_buf_len,
-			&o->o_authorizer_reply_buf,
-			&o->o_authorizer_reply_buf_len);
-		if (ret)
-			return ret;
-	}
-
-	*proto = ac->protocol;
-	*buf = o->o_authorizer_buf;
-	*len = o->o_authorizer_buf_len;
-	*reply_buf = o->o_authorizer_reply_buf;
-	*reply_len = o->o_authorizer_reply_buf_len;
-	return 0;
-}
-
-
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
-{
-	struct ceph_osd *o = con->private;
-	struct ceph_osd_client *osdc = o->o_osdc;
-	struct ceph_auth_client *ac = osdc->client->monc.auth;
-
-	return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
-}
-
-static int invalidate_authorizer(struct ceph_connection *con)
-{
-	struct ceph_osd *o = con->private;
-	struct ceph_osd_client *osdc = o->o_osdc;
-	struct ceph_auth_client *ac = osdc->client->monc.auth;
-
-	if (ac->ops->invalidate_authorizer)
-		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
-
-	return ceph_monc_validate_auth(&osdc->client->monc);
-}
-
-static const struct ceph_connection_operations osd_con_ops = {
-	.get = get_osd_con,
-	.put = put_osd_con,
-	.dispatch = dispatch,
-	.get_authorizer = get_authorizer,
-	.verify_authorizer_reply = verify_authorizer_reply,
-	.invalidate_authorizer = invalidate_authorizer,
-	.alloc_msg = alloc_msg,
-	.fault = osd_reset,
-};
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
deleted file mode 100644
index 65c9c560f1ac..000000000000
--- a/fs/ceph/osd_client.h
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifndef _FS_CEPH_OSD_CLIENT_H
-#define _FS_CEPH_OSD_CLIENT_H
-
-#include <linux/completion.h>
-#include <linux/kref.h>
-#include <linux/mempool.h>
-#include <linux/rbtree.h>
-
-#include "types.h"
-#include "osdmap.h"
-#include "messenger.h"
-
-struct ceph_msg;
-struct ceph_snap_context;
-struct ceph_osd_request;
-struct ceph_osd_client;
-struct ceph_authorizer;
-struct ceph_pagelist;
-
-/*
- * completion callback for async writepages
- */
-typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
-				     struct ceph_msg *);
-
-/* a given osd we're communicating with */
-struct ceph_osd {
-	atomic_t o_ref;
-	struct ceph_osd_client *o_osdc;
-	int o_osd;
-	int o_incarnation;
-	struct rb_node o_node;
-	struct ceph_connection o_con;
-	struct list_head o_requests;
-	struct list_head o_osd_lru;
-	struct ceph_authorizer *o_authorizer;
-	void *o_authorizer_buf, *o_authorizer_reply_buf;
-	size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
-	unsigned long lru_ttl;
-	int o_marked_for_keepalive;
-	struct list_head o_keepalive_item;
-};
-
-/* an in-flight request */
-struct ceph_osd_request {
-	u64             r_tid;              /* unique for this client */
-	struct rb_node  r_node;
-	struct list_head r_req_lru_item;
-	struct list_head r_osd_item;
-	struct ceph_osd *r_osd;
-	struct ceph_pg   r_pgid;
-	int              r_pg_osds[CEPH_PG_MAX_SIZE];
-	int              r_num_pg_osds;
-
-	struct ceph_connection *r_con_filling_msg;
-
-	struct ceph_msg  *r_request, *r_reply;
-	int               r_result;
-	int               r_flags;     /* any additional flags for the osd */
-	u32               r_sent;      /* >0 if r_request is sending/sent */
-	int               r_got_reply;
-
-	struct ceph_osd_client *r_osdc;
-	struct kref       r_kref;
-	bool              r_mempool;
-	struct completion r_completion, r_safe_completion;
-	ceph_osdc_callback_t r_callback, r_safe_callback;
-	struct ceph_eversion r_reassert_version;
-	struct list_head  r_unsafe_item;
-
-	struct inode *r_inode;         	      /* for use by callbacks */
-
-	char              r_oid[40];          /* object name */
-	int               r_oid_len;
-	unsigned long     r_stamp;            /* send OR check time */
-	bool              r_resend;           /* msg send failed, needs retry */
-
-	struct ceph_file_layout r_file_layout;
-	struct ceph_snap_context *r_snapc;    /* snap context for writes */
-	unsigned          r_num_pages;        /* size of page array (follows) */
-	struct page     **r_pages;            /* pages for data payload */
-	int               r_pages_from_pool;
-	int               r_own_pages;        /* if true, i own page list */
-#ifdef CONFIG_BLOCK
-	struct bio       *r_bio;	      /* instead of pages */
-#endif
-
-	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
-};
-
-struct ceph_osd_client {
-	struct ceph_client     *client;
-
-	struct ceph_osdmap     *osdmap;       /* current map */
-	struct rw_semaphore    map_sem;
-	struct completion      map_waiters;
-	u64                    last_requested_map;
-
-	struct mutex           request_mutex;
-	struct rb_root         osds;          /* osds */
-	struct list_head       osd_lru;       /* idle osds */
-	u64                    timeout_tid;   /* tid of timeout triggering rq */
-	u64                    last_tid;      /* tid of last request */
-	struct rb_root         requests;      /* pending requests */
-	struct list_head       req_lru;	      /* pending requests lru */
-	int                    num_requests;
-	struct delayed_work    timeout_work;
-	struct delayed_work    osds_timeout_work;
-#ifdef CONFIG_DEBUG_FS
-	struct dentry 	       *debugfs_file;
-#endif
-
-	mempool_t              *req_mempool;
-
-	struct ceph_msgpool	msgpool_op;
-	struct ceph_msgpool	msgpool_op_reply;
-};
-
-struct ceph_osd_req_op {
-	u16 op;           /* CEPH_OSD_OP_* */
-	u32 flags;        /* CEPH_OSD_FLAG_* */
-	union {
-		struct {
-			u64 offset, length;
-			u64 truncate_size;
-			u32 truncate_seq;
-		} extent;
-		struct {
-			const char *name;
-			u32 name_len;
-			const char  *val;
-			u32 value_len;
-			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
-			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
-		} xattr;
-		struct {
-			const char *class_name;
-			__u8 class_len;
-			const char *method_name;
-			__u8 method_len;
-			__u8 argc;
-			const char *indata;
-			u32 indata_len;
-		} cls;
-		struct {
-			u64 cookie, count;
-		} pgls;
-	        struct {
-		        u64 snapid;
-	        } snap;
-	};
-	u32 payload_len;
-};
-
-extern int ceph_osdc_init(struct ceph_osd_client *osdc,
-			  struct ceph_client *client);
-extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
-
-extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
-				   struct ceph_msg *msg);
-extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
-				 struct ceph_msg *msg);
-
-extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
-			u64 snapid,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op);
-
-extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
-					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
-					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio);
-
-extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-				    u64 off, u64 *plen,
-				    struct ceph_osd_req_op *src_ops,
-				    struct ceph_snap_context *snapc,
-				    struct timespec *mtime,
-				    const char *oid,
-				    int oid_len);
-
-extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
-				      struct ceph_file_layout *layout,
-				      struct ceph_vino vino,
-				      u64 offset, u64 *len, int op, int flags,
-				      struct ceph_snap_context *snapc,
-				      int do_sync, u32 truncate_seq,
-				      u64 truncate_size,
-				      struct timespec *mtime,
-				      bool use_mempool, int num_reply);
-
-static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
-{
-	kref_get(&req->r_kref);
-}
-extern void ceph_osdc_release_request(struct kref *kref);
-static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
-{
-	kref_put(&req->r_kref, ceph_osdc_release_request);
-}
-
-extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
-				   struct ceph_osd_request *req,
-				   bool nofail);
-extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
-				  struct ceph_osd_request *req);
-extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
-
-extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
-			       struct ceph_vino vino,
-			       struct ceph_file_layout *layout,
-			       u64 off, u64 *plen,
-			       u32 truncate_seq, u64 truncate_size,
-			       struct page **pages, int nr_pages);
-
-extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
-				struct ceph_vino vino,
-				struct ceph_file_layout *layout,
-				struct ceph_snap_context *sc,
-				u64 off, u64 len,
-				u32 truncate_seq, u64 truncate_size,
-				struct timespec *mtime,
-				struct page **pages, int nr_pages,
-				int flags, int do_sync, bool nofail);
-
-#endif
-
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
deleted file mode 100644
index 3ccd11761cb6..000000000000
--- a/fs/ceph/osdmap.c
+++ /dev/null
@@ -1,1123 +0,0 @@
-
-#include "ceph_debug.h"
-
-#include <linux/slab.h>
-#include <asm/div64.h>
-
-#include "super.h"
-#include "osdmap.h"
-#include "crush/hash.h"
-#include "crush/mapper.h"
-#include "decode.h"
-
-char *ceph_osdmap_state_str(char *str, int len, int state)
-{
-	int flag = 0;
-
-	if (!len)
-		goto done;
-
-	*str = '\0';
-	if (state) {
-		if (state & CEPH_OSD_EXISTS) {
-			snprintf(str, len, "exists");
-			flag = 1;
-		}
-		if (state & CEPH_OSD_UP) {
-			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
-				 "up");
-			flag = 1;
-		}
-	} else {
-		snprintf(str, len, "doesn't exist");
-	}
-done:
-	return str;
-}
-
-/* maps */
-
-static int calc_bits_of(unsigned t)
-{
-	int b = 0;
-	while (t) {
-		t = t >> 1;
-		b++;
-	}
-	return b;
-}
-
-/*
- * the foo_mask is the smallest value 2^n-1 that is >= foo.
- */
-static void calc_pg_masks(struct ceph_pg_pool_info *pi)
-{
-	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
-	pi->pgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
-	pi->lpg_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
-	pi->lpgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
-}
-
-/*
- * decode crush map
- */
-static int crush_decode_uniform_bucket(void **p, void *end,
-				       struct crush_bucket_uniform *b)
-{
-	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
-	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
-	b->item_weight = ceph_decode_32(p);
-	return 0;
-bad:
-	return -EINVAL;
-}
-
-static int crush_decode_list_bucket(void **p, void *end,
-				    struct crush_bucket_list *b)
-{
-	int j;
-	dout("crush_decode_list_bucket %p to %p\n", *p, end);
-	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
-	if (b->item_weights == NULL)
-		return -ENOMEM;
-	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
-	if (b->sum_weights == NULL)
-		return -ENOMEM;
-	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
-	for (j = 0; j < b->h.size; j++) {
-		b->item_weights[j] = ceph_decode_32(p);
-		b->sum_weights[j] = ceph_decode_32(p);
-	}
-	return 0;
-bad:
-	return -EINVAL;
-}
-
-static int crush_decode_tree_bucket(void **p, void *end,
-				    struct crush_bucket_tree *b)
-{
-	int j;
-	dout("crush_decode_tree_bucket %p to %p\n", *p, end);
-	ceph_decode_32_safe(p, end, b->num_nodes, bad);
-	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
-	if (b->node_weights == NULL)
-		return -ENOMEM;
-	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
-	for (j = 0; j < b->num_nodes; j++)
-		b->node_weights[j] = ceph_decode_32(p);
-	return 0;
-bad:
-	return -EINVAL;
-}
-
-static int crush_decode_straw_bucket(void **p, void *end,
-				     struct crush_bucket_straw *b)
-{
-	int j;
-	dout("crush_decode_straw_bucket %p to %p\n", *p, end);
-	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
-	if (b->item_weights == NULL)
-		return -ENOMEM;
-	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
-	if (b->straws == NULL)
-		return -ENOMEM;
-	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
-	for (j = 0; j < b->h.size; j++) {
-		b->item_weights[j] = ceph_decode_32(p);
-		b->straws[j] = ceph_decode_32(p);
-	}
-	return 0;
-bad:
-	return -EINVAL;
-}
-
-static struct crush_map *crush_decode(void *pbyval, void *end)
-{
-	struct crush_map *c;
-	int err = -EINVAL;
-	int i, j;
-	void **p = &pbyval;
-	void *start = pbyval;
-	u32 magic;
-
-	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
-
-	c = kzalloc(sizeof(*c), GFP_NOFS);
-	if (c == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ceph_decode_need(p, end, 4*sizeof(u32), bad);
-	magic = ceph_decode_32(p);
-	if (magic != CRUSH_MAGIC) {
-		pr_err("crush_decode magic %x != current %x\n",
-		       (unsigned)magic, (unsigned)CRUSH_MAGIC);
-		goto bad;
-	}
-	c->max_buckets = ceph_decode_32(p);
-	c->max_rules = ceph_decode_32(p);
-	c->max_devices = ceph_decode_32(p);
-
-	c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
-	if (c->device_parents == NULL)
-		goto badmem;
-	c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
-	if (c->bucket_parents == NULL)
-		goto badmem;
-
-	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
-	if (c->buckets == NULL)
-		goto badmem;
-	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
-	if (c->rules == NULL)
-		goto badmem;
-
-	/* buckets */
-	for (i = 0; i < c->max_buckets; i++) {
-		int size = 0;
-		u32 alg;
-		struct crush_bucket *b;
-
-		ceph_decode_32_safe(p, end, alg, bad);
-		if (alg == 0) {
-			c->buckets[i] = NULL;
-			continue;
-		}
-		dout("crush_decode bucket %d off %x %p to %p\n",
-		     i, (int)(*p-start), *p, end);
-
-		switch (alg) {
-		case CRUSH_BUCKET_UNIFORM:
-			size = sizeof(struct crush_bucket_uniform);
-			break;
-		case CRUSH_BUCKET_LIST:
-			size = sizeof(struct crush_bucket_list);
-			break;
-		case CRUSH_BUCKET_TREE:
-			size = sizeof(struct crush_bucket_tree);
-			break;
-		case CRUSH_BUCKET_STRAW:
-			size = sizeof(struct crush_bucket_straw);
-			break;
-		default:
-			err = -EINVAL;
-			goto bad;
-		}
-		BUG_ON(size == 0);
-		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
-		if (b == NULL)
-			goto badmem;
-
-		ceph_decode_need(p, end, 4*sizeof(u32), bad);
-		b->id = ceph_decode_32(p);
-		b->type = ceph_decode_16(p);
-		b->alg = ceph_decode_8(p);
-		b->hash = ceph_decode_8(p);
-		b->weight = ceph_decode_32(p);
-		b->size = ceph_decode_32(p);
-
-		dout("crush_decode bucket size %d off %x %p to %p\n",
-		     b->size, (int)(*p-start), *p, end);
-
-		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
-		if (b->items == NULL)
-			goto badmem;
-		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
-		if (b->perm == NULL)
-			goto badmem;
-		b->perm_n = 0;
-
-		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
-		for (j = 0; j < b->size; j++)
-			b->items[j] = ceph_decode_32(p);
-
-		switch (b->alg) {
-		case CRUSH_BUCKET_UNIFORM:
-			err = crush_decode_uniform_bucket(p, end,
-				  (struct crush_bucket_uniform *)b);
-			if (err < 0)
-				goto bad;
-			break;
-		case CRUSH_BUCKET_LIST:
-			err = crush_decode_list_bucket(p, end,
-			       (struct crush_bucket_list *)b);
-			if (err < 0)
-				goto bad;
-			break;
-		case CRUSH_BUCKET_TREE:
-			err = crush_decode_tree_bucket(p, end,
-				(struct crush_bucket_tree *)b);
-			if (err < 0)
-				goto bad;
-			break;
-		case CRUSH_BUCKET_STRAW:
-			err = crush_decode_straw_bucket(p, end,
-				(struct crush_bucket_straw *)b);
-			if (err < 0)
-				goto bad;
-			break;
-		}
-	}
-
-	/* rules */
-	dout("rule vec is %p\n", c->rules);
-	for (i = 0; i < c->max_rules; i++) {
-		u32 yes;
-		struct crush_rule *r;
-
-		ceph_decode_32_safe(p, end, yes, bad);
-		if (!yes) {
-			dout("crush_decode NO rule %d off %x %p to %p\n",
-			     i, (int)(*p-start), *p, end);
-			c->rules[i] = NULL;
-			continue;
-		}
-
-		dout("crush_decode rule %d off %x %p to %p\n",
-		     i, (int)(*p-start), *p, end);
-
-		/* len */
-		ceph_decode_32_safe(p, end, yes, bad);
-#if BITS_PER_LONG == 32
-		err = -EINVAL;
-		if (yes > ULONG_MAX / sizeof(struct crush_rule_step))
-			goto bad;
-#endif
-		r = c->rules[i] = kmalloc(sizeof(*r) +
-					  yes*sizeof(struct crush_rule_step),
-					  GFP_NOFS);
-		if (r == NULL)
-			goto badmem;
-		dout(" rule %d is at %p\n", i, r);
-		r->len = yes;
-		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
-		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
-		for (j = 0; j < r->len; j++) {
-			r->steps[j].op = ceph_decode_32(p);
-			r->steps[j].arg1 = ceph_decode_32(p);
-			r->steps[j].arg2 = ceph_decode_32(p);
-		}
-	}
-
-	/* ignore trailing name maps. */
-
-	dout("crush_decode success\n");
-	return c;
-
-badmem:
-	err = -ENOMEM;
-bad:
-	dout("crush_decode fail %d\n", err);
-	crush_destroy(c);
-	return ERR_PTR(err);
-}
-
-/*
- * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
- * to a set of osds)
- */
-static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
-{
-	u64 a = *(u64 *)&l;
-	u64 b = *(u64 *)&r;
-
-	if (a < b)
-		return -1;
-	if (a > b)
-		return 1;
-	return 0;
-}
-
-static int __insert_pg_mapping(struct ceph_pg_mapping *new,
-			       struct rb_root *root)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_pg_mapping *pg = NULL;
-	int c;
-
-	while (*p) {
-		parent = *p;
-		pg = rb_entry(parent, struct ceph_pg_mapping, node);
-		c = pgid_cmp(new->pgid, pg->pgid);
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-	return 0;
-}
-
-static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   struct ceph_pg pgid)
-{
-	struct rb_node *n = root->rb_node;
-	struct ceph_pg_mapping *pg;
-	int c;
-
-	while (n) {
-		pg = rb_entry(n, struct ceph_pg_mapping, node);
-		c = pgid_cmp(pgid, pg->pgid);
-		if (c < 0)
-			n = n->rb_left;
-		else if (c > 0)
-			n = n->rb_right;
-		else
-			return pg;
-	}
-	return NULL;
-}
-
-/*
- * rbtree of pg pool info
- */
-static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_pg_pool_info *pi = NULL;
-
-	while (*p) {
-		parent = *p;
-		pi = rb_entry(parent, struct ceph_pg_pool_info, node);
-		if (new->id < pi->id)
-			p = &(*p)->rb_left;
-		else if (new->id > pi->id)
-			p = &(*p)->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-	return 0;
-}
-
-static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
-{
-	struct ceph_pg_pool_info *pi;
-	struct rb_node *n = root->rb_node;
-
-	while (n) {
-		pi = rb_entry(n, struct ceph_pg_pool_info, node);
-		if (id < pi->id)
-			n = n->rb_left;
-		else if (id > pi->id)
-			n = n->rb_right;
-		else
-			return pi;
-	}
-	return NULL;
-}
-
-int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
-{
-	struct rb_node *rbp;
-
-	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
-		struct ceph_pg_pool_info *pi =
-			rb_entry(rbp, struct ceph_pg_pool_info, node);
-		if (pi->name && strcmp(pi->name, name) == 0)
-			return pi->id;
-	}
-	return -ENOENT;
-}
-
-static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
-{
-	rb_erase(&pi->node, root);
-	kfree(pi->name);
-	kfree(pi);
-}
-
-static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
-{
-	unsigned n, m;
-
-	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
-	calc_pg_masks(pi);
-
-	/* num_snaps * snap_info_t */
-	n = le32_to_cpu(pi->v.num_snaps);
-	while (n--) {
-		ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
-				 sizeof(struct ceph_timespec), bad);
-		*p += sizeof(u64) +       /* key */
-			1 + sizeof(u64) + /* u8, snapid */
-			sizeof(struct ceph_timespec);
-		m = ceph_decode_32(p);    /* snap name */
-		*p += m;
-	}
-
-	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
-	return 0;
-
-bad:
-	return -EINVAL;
-}
-
-static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
-{
-	struct ceph_pg_pool_info *pi;
-	u32 num, len, pool;
-
-	ceph_decode_32_safe(p, end, num, bad);
-	dout(" %d pool names\n", num);
-	while (num--) {
-		ceph_decode_32_safe(p, end, pool, bad);
-		ceph_decode_32_safe(p, end, len, bad);
-		dout("  pool %d len %d\n", pool, len);
-		pi = __lookup_pg_pool(&map->pg_pools, pool);
-		if (pi) {
-			kfree(pi->name);
-			pi->name = kmalloc(len + 1, GFP_NOFS);
-			if (pi->name) {
-				memcpy(pi->name, *p, len);
-				pi->name[len] = '\0';
-				dout("  name is %s\n", pi->name);
-			}
-		}
-		*p += len;
-	}
-	return 0;
-
-bad:
-	return -EINVAL;
-}
-
-/*
- * osd map
- */
-void ceph_osdmap_destroy(struct ceph_osdmap *map)
-{
-	dout("osdmap_destroy %p\n", map);
-	if (map->crush)
-		crush_destroy(map->crush);
-	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
-		struct ceph_pg_mapping *pg =
-			rb_entry(rb_first(&map->pg_temp),
-				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->pg_temp);
-		kfree(pg);
-	}
-	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
-		struct ceph_pg_pool_info *pi =
-			rb_entry(rb_first(&map->pg_pools),
-				 struct ceph_pg_pool_info, node);
-		__remove_pg_pool(&map->pg_pools, pi);
-	}
-	kfree(map->osd_state);
-	kfree(map->osd_weight);
-	kfree(map->osd_addr);
-	kfree(map);
-}
-
-/*
- * adjust max osd value.  reallocate arrays.
- */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
-{
-	u8 *state;
-	struct ceph_entity_addr *addr;
-	u32 *weight;
-
-	state = kcalloc(max, sizeof(*state), GFP_NOFS);
-	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
-	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
-	if (state == NULL || addr == NULL || weight == NULL) {
-		kfree(state);
-		kfree(addr);
-		kfree(weight);
-		return -ENOMEM;
-	}
-
-	/* copy old? */
-	if (map->osd_state) {
-		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
-		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
-		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
-		kfree(map->osd_state);
-		kfree(map->osd_addr);
-		kfree(map->osd_weight);
-	}
-
-	map->osd_state = state;
-	map->osd_weight = weight;
-	map->osd_addr = addr;
-	map->max_osd = max;
-	return 0;
-}
-
-/*
- * decode a full map.
- */
-struct ceph_osdmap *osdmap_decode(void **p, void *end)
-{
-	struct ceph_osdmap *map;
-	u16 version;
-	u32 len, max, i;
-	u8 ev;
-	int err = -EINVAL;
-	void *start = *p;
-	struct ceph_pg_pool_info *pi;
-
-	dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p));
-
-	map = kzalloc(sizeof(*map), GFP_NOFS);
-	if (map == NULL)
-		return ERR_PTR(-ENOMEM);
-	map->pg_temp = RB_ROOT;
-
-	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_VERSION) {
-		pr_warning("got unknown v %d > %d of osdmap\n", version,
-			   CEPH_OSDMAP_VERSION);
-		goto bad;
-	}
-
-	ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad);
-	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
-	map->epoch = ceph_decode_32(p);
-	ceph_decode_copy(p, &map->created, sizeof(map->created));
-	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
-
-	ceph_decode_32_safe(p, end, max, bad);
-	while (max--) {
-		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
-		pi = kzalloc(sizeof(*pi), GFP_NOFS);
-		if (!pi)
-			goto bad;
-		pi->id = ceph_decode_32(p);
-		ev = ceph_decode_8(p); /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			kfree(pi);
-			goto bad;
-		}
-		err = __decode_pool(p, end, pi);
-		if (err < 0)
-			goto bad;
-		__insert_pg_pool(&map->pg_pools, pi);
-	}
-
-	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
-		goto bad;
-
-	ceph_decode_32_safe(p, end, map->pool_max, bad);
-
-	ceph_decode_32_safe(p, end, map->flags, bad);
-
-	max = ceph_decode_32(p);
-
-	/* (re)alloc osd arrays */
-	err = osdmap_set_max_osd(map, max);
-	if (err < 0)
-		goto bad;
-	dout("osdmap_decode max_osd = %d\n", map->max_osd);
-
-	/* osds */
-	err = -EINVAL;
-	ceph_decode_need(p, end, 3*sizeof(u32) +
-			 map->max_osd*(1 + sizeof(*map->osd_weight) +
-				       sizeof(*map->osd_addr)), bad);
-	*p += 4; /* skip length field (should match max) */
-	ceph_decode_copy(p, map->osd_state, map->max_osd);
-
-	*p += 4; /* skip length field (should match max) */
-	for (i = 0; i < map->max_osd; i++)
-		map->osd_weight[i] = ceph_decode_32(p);
-
-	*p += 4; /* skip length field (should match max) */
-	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
-	for (i = 0; i < map->max_osd; i++)
-		ceph_decode_addr(&map->osd_addr[i]);
-
-	/* pg_temp */
-	ceph_decode_32_safe(p, end, len, bad);
-	for (i = 0; i < len; i++) {
-		int n, j;
-		struct ceph_pg pgid;
-		struct ceph_pg_mapping *pg;
-
-		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
-		n = ceph_decode_32(p);
-		ceph_decode_need(p, end, n * sizeof(u32), bad);
-		err = -ENOMEM;
-		pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
-		if (!pg)
-			goto bad;
-		pg->pgid = pgid;
-		pg->len = n;
-		for (j = 0; j < n; j++)
-			pg->osds[j] = ceph_decode_32(p);
-
-		err = __insert_pg_mapping(pg, &map->pg_temp);
-		if (err)
-			goto bad;
-		dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
-	}
-
-	/* crush */
-	ceph_decode_32_safe(p, end, len, bad);
-	dout("osdmap_decode crush len %d from off 0x%x\n", len,
-	     (int)(*p - start));
-	ceph_decode_need(p, end, len, bad);
-	map->crush = crush_decode(*p, end);
-	*p += len;
-	if (IS_ERR(map->crush)) {
-		err = PTR_ERR(map->crush);
-		map->crush = NULL;
-		goto bad;
-	}
-
-	/* ignore the rest of the map */
-	*p = end;
-
-	dout("osdmap_decode done %p %p\n", *p, end);
-	return map;
-
-bad:
-	dout("osdmap_decode fail\n");
-	ceph_osdmap_destroy(map);
-	return ERR_PTR(err);
-}
-
-/*
- * decode and apply an incremental map update.
- */
-struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
-					     struct ceph_osdmap *map,
-					     struct ceph_messenger *msgr)
-{
-	struct crush_map *newcrush = NULL;
-	struct ceph_fsid fsid;
-	u32 epoch = 0;
-	struct ceph_timespec modified;
-	u32 len, pool;
-	__s32 new_pool_max, new_flags, max;
-	void *start = *p;
-	int err = -EINVAL;
-	u16 version;
-	struct rb_node *rbp;
-
-	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_INC_VERSION) {
-		pr_warning("got unknown v %d > %d of inc osdmap\n", version,
-			   CEPH_OSDMAP_INC_VERSION);
-		goto bad;
-	}
-
-	ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32),
-			 bad);
-	ceph_decode_copy(p, &fsid, sizeof(fsid));
-	epoch = ceph_decode_32(p);
-	BUG_ON(epoch != map->epoch+1);
-	ceph_decode_copy(p, &modified, sizeof(modified));
-	new_pool_max = ceph_decode_32(p);
-	new_flags = ceph_decode_32(p);
-
-	/* full map? */
-	ceph_decode_32_safe(p, end, len, bad);
-	if (len > 0) {
-		dout("apply_incremental full map len %d, %p to %p\n",
-		     len, *p, end);
-		return osdmap_decode(p, min(*p+len, end));
-	}
-
-	/* new crush? */
-	ceph_decode_32_safe(p, end, len, bad);
-	if (len > 0) {
-		dout("apply_incremental new crush map len %d, %p to %p\n",
-		     len, *p, end);
-		newcrush = crush_decode(*p, min(*p+len, end));
-		if (IS_ERR(newcrush))
-			return ERR_CAST(newcrush);
-		*p += len;
-	}
-
-	/* new flags? */
-	if (new_flags >= 0)
-		map->flags = new_flags;
-	if (new_pool_max >= 0)
-		map->pool_max = new_pool_max;
-
-	ceph_decode_need(p, end, 5*sizeof(u32), bad);
-
-	/* new max? */
-	max = ceph_decode_32(p);
-	if (max >= 0) {
-		err = osdmap_set_max_osd(map, max);
-		if (err < 0)
-			goto bad;
-	}
-
-	map->epoch++;
-	map->modified = map->modified;
-	if (newcrush) {
-		if (map->crush)
-			crush_destroy(map->crush);
-		map->crush = newcrush;
-		newcrush = NULL;
-	}
-
-	/* new_pool */
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		__u8 ev;
-		struct ceph_pg_pool_info *pi;
-
-		ceph_decode_32_safe(p, end, pool, bad);
-		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
-		ev = ceph_decode_8(p);  /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			goto bad;
-		}
-		pi = __lookup_pg_pool(&map->pg_pools, pool);
-		if (!pi) {
-			pi = kzalloc(sizeof(*pi), GFP_NOFS);
-			if (!pi) {
-				err = -ENOMEM;
-				goto bad;
-			}
-			pi->id = pool;
-			__insert_pg_pool(&map->pg_pools, pi);
-		}
-		err = __decode_pool(p, end, pi);
-		if (err < 0)
-			goto bad;
-	}
-	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
-		goto bad;
-
-	/* old_pool */
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		struct ceph_pg_pool_info *pi;
-
-		ceph_decode_32_safe(p, end, pool, bad);
-		pi = __lookup_pg_pool(&map->pg_pools, pool);
-		if (pi)
-			__remove_pg_pool(&map->pg_pools, pi);
-	}
-
-	/* new_up */
-	err = -EINVAL;
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		u32 osd;
-		struct ceph_entity_addr addr;
-		ceph_decode_32_safe(p, end, osd, bad);
-		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
-		ceph_decode_addr(&addr);
-		pr_info("osd%d up\n", osd);
-		BUG_ON(osd >= map->max_osd);
-		map->osd_state[osd] |= CEPH_OSD_UP;
-		map->osd_addr[osd] = addr;
-	}
-
-	/* new_down */
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		u32 osd;
-		ceph_decode_32_safe(p, end, osd, bad);
-		(*p)++;  /* clean flag */
-		pr_info("osd%d down\n", osd);
-		if (osd < map->max_osd)
-			map->osd_state[osd] &= ~CEPH_OSD_UP;
-	}
-
-	/* new_weight */
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		u32 osd, off;
-		ceph_decode_need(p, end, sizeof(u32)*2, bad);
-		osd = ceph_decode_32(p);
-		off = ceph_decode_32(p);
-		pr_info("osd%d weight 0x%x %s\n", osd, off,
-		     off == CEPH_OSD_IN ? "(in)" :
-		     (off == CEPH_OSD_OUT ? "(out)" : ""));
-		if (osd < map->max_osd)
-			map->osd_weight[osd] = off;
-	}
-
-	/* new_pg_temp */
-	rbp = rb_first(&map->pg_temp);
-	ceph_decode_32_safe(p, end, len, bad);
-	while (len--) {
-		struct ceph_pg_mapping *pg;
-		int j;
-		struct ceph_pg pgid;
-		u32 pglen;
-		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
-		pglen = ceph_decode_32(p);
-
-		/* remove any? */
-		while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
-						node)->pgid, pgid) <= 0) {
-			struct ceph_pg_mapping *cur =
-				rb_entry(rbp, struct ceph_pg_mapping, node);
-
-			rbp = rb_next(rbp);
-			dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
-			rb_erase(&cur->node, &map->pg_temp);
-			kfree(cur);
-		}
-
-		if (pglen) {
-			/* insert */
-			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
-			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
-			if (!pg) {
-				err = -ENOMEM;
-				goto bad;
-			}
-			pg->pgid = pgid;
-			pg->len = pglen;
-			for (j = 0; j < pglen; j++)
-				pg->osds[j] = ceph_decode_32(p);
-			err = __insert_pg_mapping(pg, &map->pg_temp);
-			if (err) {
-				kfree(pg);
-				goto bad;
-			}
-			dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
-			     pglen);
-		}
-	}
-	while (rbp) {
-		struct ceph_pg_mapping *cur =
-			rb_entry(rbp, struct ceph_pg_mapping, node);
-
-		rbp = rb_next(rbp);
-		dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
-		rb_erase(&cur->node, &map->pg_temp);
-		kfree(cur);
-	}
-
-	/* ignore the rest */
-	*p = end;
-	return map;
-
-bad:
-	pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n",
-	       epoch, (int)(*p - start), *p, start, end);
-	print_hex_dump(KERN_DEBUG, "osdmap: ",
-		       DUMP_PREFIX_OFFSET, 16, 1,
-		       start, end - start, true);
-	if (newcrush)
-		crush_destroy(newcrush);
-	return ERR_PTR(err);
-}
-
-
-
-
-/*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 *plen,
-				   u64 *ono,
-				   u64 *oxoff, u64 *oxlen)
-{
-	u32 osize = le32_to_cpu(layout->fl_object_size);
-	u32 su = le32_to_cpu(layout->fl_stripe_unit);
-	u32 sc = le32_to_cpu(layout->fl_stripe_count);
-	u32 bl, stripeno, stripepos, objsetno;
-	u32 su_per_object;
-	u64 t, su_offset;
-
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
-	     osize, su);
-	su_per_object = osize / su;
-	dout("osize %u / su %u = su_per_object %u\n", osize, su,
-	     su_per_object);
-
-	BUG_ON((su & ~PAGE_MASK) != 0);
-	/* bl = *off / su; */
-	t = off;
-	do_div(t, su);
-	bl = t;
-	dout("off %llu / su %u = bl %u\n", off, su, bl);
-
-	stripeno = bl / sc;
-	stripepos = bl % sc;
-	objsetno = stripeno / su_per_object;
-
-	*ono = objsetno * sc + stripepos;
-	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned)*ono);
-
-	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
-	t = off;
-	su_offset = do_div(t, su);
-	*oxoff = su_offset + (stripeno % su_per_object) * su;
-
-	/*
-	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (plen) or
-	 * the remainder of the current stripe being written to.
-	 */
-	*oxlen = min_t(u64, *plen, su - su_offset);
-	*plen = *oxlen;
-
-	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
-}
-
-/*
- * calculate an object layout (i.e. pgid) from an oid,
- * file_layout, and osdmap
- */
-int ceph_calc_object_layout(struct ceph_object_layout *ol,
-			    const char *oid,
-			    struct ceph_file_layout *fl,
-			    struct ceph_osdmap *osdmap)
-{
-	unsigned num, num_mask;
-	struct ceph_pg pgid;
-	s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
-	int poolid = le32_to_cpu(fl->fl_pg_pool);
-	struct ceph_pg_pool_info *pool;
-	unsigned ps;
-
-	BUG_ON(!osdmap);
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
-	if (!pool)
-		return -EIO;
-	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-	if (preferred >= 0) {
-		ps += preferred;
-		num = le32_to_cpu(pool->v.lpg_num);
-		num_mask = pool->lpg_num_mask;
-	} else {
-		num = le32_to_cpu(pool->v.pg_num);
-		num_mask = pool->pg_num_mask;
-	}
-
-	pgid.ps = cpu_to_le16(ps);
-	pgid.preferred = cpu_to_le16(preferred);
-	pgid.pool = fl->fl_pg_pool;
-	if (preferred >= 0)
-		dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
-		     (int)preferred);
-	else
-		dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
-
-	ol->ol_pgid = pgid;
-	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
-	return 0;
-}
-
-/*
- * Calculate raw osd vector for the given pgid.  Return pointer to osd
- * array, or NULL on failure.
- */
-static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
-			int *osds, int *num)
-{
-	struct ceph_pg_mapping *pg;
-	struct ceph_pg_pool_info *pool;
-	int ruleno;
-	unsigned poolid, ps, pps;
-	int preferred;
-
-	/* pg_temp? */
-	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
-	if (pg) {
-		*num = pg->len;
-		return pg->osds;
-	}
-
-	/* crush */
-	poolid = le32_to_cpu(pgid.pool);
-	ps = le16_to_cpu(pgid.ps);
-	preferred = (s16)le16_to_cpu(pgid.preferred);
-
-	/* don't forcefeed bad device ids to crush */
-	if (preferred >= osdmap->max_osd ||
-	    preferred >= osdmap->crush->max_devices)
-		preferred = -1;
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
-	if (!pool)
-		return NULL;
-	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
-				 pool->v.type, pool->v.size);
-	if (ruleno < 0) {
-		pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
-		       poolid, pool->v.crush_ruleset, pool->v.type,
-		       pool->v.size);
-		return NULL;
-	}
-
-	if (preferred >= 0)
-		pps = ceph_stable_mod(ps,
-				      le32_to_cpu(pool->v.lpgp_num),
-				      pool->lpgp_num_mask);
-	else
-		pps = ceph_stable_mod(ps,
-				      le32_to_cpu(pool->v.pgp_num),
-				      pool->pgp_num_mask);
-	pps += poolid;
-	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
-			     min_t(int, pool->v.size, *num),
-			     preferred, osdmap->osd_weight);
-	return osds;
-}
-
-/*
- * Return acting set for given pgid.
- */
-int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
-			int *acting)
-{
-	int rawosds[CEPH_PG_MAX_SIZE], *osds;
-	int i, o, num = CEPH_PG_MAX_SIZE;
-
-	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
-	if (!osds)
-		return -1;
-
-	/* primary is first up osd */
-	o = 0;
-	for (i = 0; i < num; i++)
-		if (ceph_osd_is_up(osdmap, osds[i]))
-			acting[o++] = osds[i];
-	return o;
-}
-
-/*
- * Return primary osd for given pgid, or -1 if none.
- */
-int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
-{
-	int rawosds[CEPH_PG_MAX_SIZE], *osds;
-	int i, num = CEPH_PG_MAX_SIZE;
-
-	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
-	if (!osds)
-		return -1;
-
-	/* primary is first up osd */
-	for (i = 0; i < num; i++)
-		if (ceph_osd_is_up(osdmap, osds[i]))
-			return osds[i];
-	return -1;
-}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
deleted file mode 100644
index a592b211be39..000000000000
--- a/fs/ceph/osdmap.h
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef _FS_CEPH_OSDMAP_H
-#define _FS_CEPH_OSDMAP_H
-
-#include <linux/rbtree.h>
-#include "types.h"
-#include "ceph_fs.h"
-#include "crush/crush.h"
-
-/*
- * The osd map describes the current membership of the osd cluster and
- * specifies the mapping of objects to placement groups and placement
- * groups to (sets of) osds.  That is, it completely specifies the
- * (desired) distribution of all data objects in the system at some
- * point in time.
- *
- * Each map version is identified by an epoch, which increases monotonically.
- *
- * The map can be updated either via an incremental map (diff) describing
- * the change between two successive epochs, or as a fully encoded map.
- */
-struct ceph_pg_pool_info {
-	struct rb_node node;
-	int id;
-	struct ceph_pg_pool v;
-	int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
-	char *name;
-};
-
-struct ceph_pg_mapping {
-	struct rb_node node;
-	struct ceph_pg pgid;
-	int len;
-	int osds[];
-};
-
-struct ceph_osdmap {
-	struct ceph_fsid fsid;
-	u32 epoch;
-	u32 mkfs_epoch;
-	struct ceph_timespec created, modified;
-
-	u32 flags;         /* CEPH_OSDMAP_* */
-
-	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
-	u8 *osd_state;     /* CEPH_OSD_* */
-	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
-	struct ceph_entity_addr *osd_addr;
-
-	struct rb_root pg_temp;
-	struct rb_root pg_pools;
-	u32 pool_max;
-
-	/* the CRUSH map specifies the mapping of placement groups to
-	 * the list of osds that store+replicate them. */
-	struct crush_map *crush;
-};
-
-/*
- * file layout helpers
- */
-#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
-#define ceph_file_layout_stripe_count(l) \
-	((__s32)le32_to_cpu((l).fl_stripe_count))
-#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
-#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
-#define ceph_file_layout_object_su(l) \
-	((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_preferred(l) \
-	((__s32)le32_to_cpu((l).fl_pg_preferred))
-#define ceph_file_layout_pg_pool(l) \
-	((__s32)le32_to_cpu((l).fl_pg_pool))
-
-static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
-{
-	return le32_to_cpu(l->fl_stripe_unit) *
-		le32_to_cpu(l->fl_stripe_count);
-}
-
-/* "period" == bytes before i start on a new set of objects */
-static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
-{
-	return le32_to_cpu(l->fl_object_size) *
-		le32_to_cpu(l->fl_stripe_count);
-}
-
-
-static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
-{
-	return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
-}
-
-static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
-{
-	return map && (map->flags & flag);
-}
-
-extern char *ceph_osdmap_state_str(char *str, int len, int state);
-
-static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
-						     int osd)
-{
-	if (osd >= map->max_osd)
-		return NULL;
-	return &map->osd_addr[osd];
-}
-
-extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
-extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
-					    struct ceph_osdmap *map,
-					    struct ceph_messenger *msgr);
-extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
-
-/* calculate mapping of a file extent to an object */
-extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-					  u64 off, u64 *plen,
-					  u64 *bno, u64 *oxoff, u64 *oxlen);
-
-/* calculate mapping of object to a placement group */
-extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
-				   const char *oid,
-				   struct ceph_file_layout *fl,
-				   struct ceph_osdmap *osdmap);
-extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
-			       int *acting);
-extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
-				struct ceph_pg pgid);
-
-extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
-
-#endif
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
deleted file mode 100644
index 326e1c04176f..000000000000
--- a/fs/ceph/pagelist.c
+++ /dev/null
@@ -1,63 +0,0 @@
-
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-
-#include "pagelist.h"
-
-static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
-{
-	struct page *page = list_entry(pl->head.prev, struct page,
-				       lru);
-	kunmap(page);
-}
-
-int ceph_pagelist_release(struct ceph_pagelist *pl)
-{
-	if (pl->mapped_tail)
-		ceph_pagelist_unmap_tail(pl);
-
-	while (!list_empty(&pl->head)) {
-		struct page *page = list_first_entry(&pl->head, struct page,
-						     lru);
-		list_del(&page->lru);
-		__free_page(page);
-	}
-	return 0;
-}
-
-static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
-{
-	struct page *page = __page_cache_alloc(GFP_NOFS);
-	if (!page)
-		return -ENOMEM;
-	pl->room += PAGE_SIZE;
-	list_add_tail(&page->lru, &pl->head);
-	if (pl->mapped_tail)
-		ceph_pagelist_unmap_tail(pl);
-	pl->mapped_tail = kmap(page);
-	return 0;
-}
-
-int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
-{
-	while (pl->room < len) {
-		size_t bit = pl->room;
-		int ret;
-
-		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
-		       buf, bit);
-		pl->length += bit;
-		pl->room -= bit;
-		buf += bit;
-		len -= bit;
-		ret = ceph_pagelist_addpage(pl);
-		if (ret)
-			return ret;
-	}
-
-	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
-	pl->length += len;
-	pl->room -= len;
-	return 0;
-}
diff --git a/fs/ceph/pagelist.h b/fs/ceph/pagelist.h
deleted file mode 100644
index cc9327aa1c98..000000000000
--- a/fs/ceph/pagelist.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef __FS_CEPH_PAGELIST_H
-#define __FS_CEPH_PAGELIST_H
-
-#include <linux/list.h>
-
-struct ceph_pagelist {
-	struct list_head head;
-	void *mapped_tail;
-	size_t length;
-	size_t room;
-};
-
-static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
-{
-	INIT_LIST_HEAD(&pl->head);
-	pl->mapped_tail = NULL;
-	pl->length = 0;
-	pl->room = 0;
-}
-extern int ceph_pagelist_release(struct ceph_pagelist *pl);
-
-extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
-
-static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
-{
-	__le64 ev = cpu_to_le64(v);
-	return ceph_pagelist_append(pl, &ev, sizeof(ev));
-}
-static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
-{
-	__le32 ev = cpu_to_le32(v);
-	return ceph_pagelist_append(pl, &ev, sizeof(ev));
-}
-static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
-{
-	__le16 ev = cpu_to_le16(v);
-	return ceph_pagelist_append(pl, &ev, sizeof(ev));
-}
-static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
-{
-	return ceph_pagelist_append(pl, &v, 1);
-}
-static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
-					      char *s, size_t len)
-{
-	int ret = ceph_pagelist_encode_32(pl, len);
-	if (ret)
-		return ret;
-	if (len)
-		return ceph_pagelist_append(pl, s, len);
-	return 0;
-}
-
-#endif
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
deleted file mode 100644
index 6d5247f2e81b..000000000000
--- a/fs/ceph/rados.h
+++ /dev/null
@@ -1,405 +0,0 @@
-#ifndef CEPH_RADOS_H
-#define CEPH_RADOS_H
-
-/*
- * Data types for the Ceph distributed object storage layer RADOS
- * (Reliable Autonomic Distributed Object Store).
- */
-
-#include "msgr.h"
-
-/*
- * osdmap encoding versions
- */
-#define CEPH_OSDMAP_INC_VERSION     5
-#define CEPH_OSDMAP_INC_VERSION_EXT 5
-#define CEPH_OSDMAP_VERSION         5
-#define CEPH_OSDMAP_VERSION_EXT     5
-
-/*
- * fs id
- */
-struct ceph_fsid {
-	unsigned char fsid[16];
-};
-
-static inline int ceph_fsid_compare(const struct ceph_fsid *a,
-				    const struct ceph_fsid *b)
-{
-	return memcmp(a, b, sizeof(*a));
-}
-
-/*
- * ino, object, etc.
- */
-typedef __le64 ceph_snapid_t;
-#define CEPH_SNAPDIR ((__u64)(-1))  /* reserved for hidden .snap dir */
-#define CEPH_NOSNAP  ((__u64)(-2))  /* "head", "live" revision */
-#define CEPH_MAXSNAP ((__u64)(-3))  /* largest valid snapid */
-
-struct ceph_timespec {
-	__le32 tv_sec;
-	__le32 tv_nsec;
-} __attribute__ ((packed));
-
-
-/*
- * object layout - how objects are mapped into PGs
- */
-#define CEPH_OBJECT_LAYOUT_HASH     1
-#define CEPH_OBJECT_LAYOUT_LINEAR   2
-#define CEPH_OBJECT_LAYOUT_HASHINO  3
-
-/*
- * pg layout -- how PGs are mapped onto (sets of) OSDs
- */
-#define CEPH_PG_LAYOUT_CRUSH  0
-#define CEPH_PG_LAYOUT_HASH   1
-#define CEPH_PG_LAYOUT_LINEAR 2
-#define CEPH_PG_LAYOUT_HYBRID 3
-
-#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
-
-/*
- * placement group.
- * we encode this into one __le64.
- */
-struct ceph_pg {
-	__le16 preferred; /* preferred primary osd */
-	__le16 ps;        /* placement seed */
-	__le32 pool;      /* object pool */
-} __attribute__ ((packed));
-
-/*
- * pg_pool is a set of pgs storing a pool of objects
- *
- *  pg_num -- base number of pseudorandomly placed pgs
- *
- *  pgp_num -- effective number when calculating pg placement.  this
- * is used for pg_num increases.  new pgs result in data being "split"
- * into new pgs.  for this to proceed smoothly, new pgs are intiially
- * colocated with their parents; that is, pgp_num doesn't increase
- * until the new pgs have successfully split.  only _then_ are the new
- * pgs placed independently.
- *
- *  lpg_num -- localized pg count (per device).  replicas are randomly
- * selected.
- *
- *  lpgp_num -- as above.
- */
-#define CEPH_PG_TYPE_REP     1
-#define CEPH_PG_TYPE_RAID4   2
-#define CEPH_PG_POOL_VERSION 2
-struct ceph_pg_pool {
-	__u8 type;                /* CEPH_PG_TYPE_* */
-	__u8 size;                /* number of osds in each pg */
-	__u8 crush_ruleset;       /* crush placement rule */
-	__u8 object_hash;         /* hash mapping object name to ps */
-	__le32 pg_num, pgp_num;   /* number of pg's */
-	__le32 lpg_num, lpgp_num; /* number of localized pg's */
-	__le32 last_change;       /* most recent epoch changed */
-	__le64 snap_seq;          /* seq for per-pool snapshot */
-	__le32 snap_epoch;        /* epoch of last snap */
-	__le32 num_snaps;
-	__le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
-	__le64 auid;               /* who owns the pg */
-} __attribute__ ((packed));
-
-/*
- * stable_mod func is used to control number of placement groups.
- * similar to straight-up modulo, but produces a stable mapping as b
- * increases over time.  b is the number of bins, and bmask is the
- * containing power of 2 minus 1.
- *
- * b <= bmask and bmask=(2**n)-1
- * e.g., b=12 -> bmask=15, b=123 -> bmask=127
- */
-static inline int ceph_stable_mod(int x, int b, int bmask)
-{
-	if ((x & bmask) < b)
-		return x & bmask;
-	else
-		return x & (bmask >> 1);
-}
-
-/*
- * object layout - how a given object should be stored.
- */
-struct ceph_object_layout {
-	struct ceph_pg ol_pgid;   /* raw pg, with _full_ ps precision. */
-	__le32 ol_stripe_unit;    /* for per-object parity, if any */
-} __attribute__ ((packed));
-
-/*
- * compound epoch+version, used by storage layer to serialize mutations
- */
-struct ceph_eversion {
-	__le32 epoch;
-	__le64 version;
-} __attribute__ ((packed));
-
-/*
- * osd map bits
- */
-
-/* status bits */
-#define CEPH_OSD_EXISTS 1
-#define CEPH_OSD_UP     2
-
-/* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
-#define CEPH_OSD_IN  0x10000
-#define CEPH_OSD_OUT 0
-
-
-/*
- * osd map flag bits
- */
-#define CEPH_OSDMAP_NEARFULL (1<<0)  /* sync writes (near ENOSPC) */
-#define CEPH_OSDMAP_FULL     (1<<1)  /* no data writes (ENOSPC) */
-#define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
-#define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
-#define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
-
-/*
- * osd ops
- */
-#define CEPH_OSD_OP_MODE       0xf000
-#define CEPH_OSD_OP_MODE_RD    0x1000
-#define CEPH_OSD_OP_MODE_WR    0x2000
-#define CEPH_OSD_OP_MODE_RMW   0x3000
-#define CEPH_OSD_OP_MODE_SUB   0x4000
-
-#define CEPH_OSD_OP_TYPE       0x0f00
-#define CEPH_OSD_OP_TYPE_LOCK  0x0100
-#define CEPH_OSD_OP_TYPE_DATA  0x0200
-#define CEPH_OSD_OP_TYPE_ATTR  0x0300
-#define CEPH_OSD_OP_TYPE_EXEC  0x0400
-#define CEPH_OSD_OP_TYPE_PG    0x0500
-
-enum {
-	/** data **/
-	/* read */
-	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
-
-	/* fancy read */
-	CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
-
-	/* write */
-	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
-	CEPH_OSD_OP_TRUNCATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
-	CEPH_OSD_OP_ZERO      = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
-	CEPH_OSD_OP_DELETE    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
-
-	/* fancy write */
-	CEPH_OSD_OP_APPEND    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
-	CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
-	CEPH_OSD_OP_SETTRUNC  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
-	CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
-
-	CEPH_OSD_OP_TMAPUP  = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
-	CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
-	CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
-
-	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
-	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
-
-	/** attrs **/
-	/* read */
-	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_CMPXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
-
-	/* write */
-	CEPH_OSD_OP_SETXATTR  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
-	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
-
-	/** subop **/
-	CEPH_OSD_OP_PULL           = CEPH_OSD_OP_MODE_SUB | 1,
-	CEPH_OSD_OP_PUSH           = CEPH_OSD_OP_MODE_SUB | 2,
-	CEPH_OSD_OP_BALANCEREADS   = CEPH_OSD_OP_MODE_SUB | 3,
-	CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
-	CEPH_OSD_OP_SCRUB          = CEPH_OSD_OP_MODE_SUB | 5,
-
-	/** lock **/
-	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
-	CEPH_OSD_OP_WRUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
-	CEPH_OSD_OP_RDLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
-	CEPH_OSD_OP_RDUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
-	CEPH_OSD_OP_UPLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
-	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
-
-	/** exec **/
-	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
-
-	/** pg **/
-	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
-};
-
-static inline int ceph_osd_op_type_lock(int op)
-{
-	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
-}
-static inline int ceph_osd_op_type_data(int op)
-{
-	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
-}
-static inline int ceph_osd_op_type_attr(int op)
-{
-	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
-}
-static inline int ceph_osd_op_type_exec(int op)
-{
-	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
-}
-static inline int ceph_osd_op_type_pg(int op)
-{
-	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
-}
-
-static inline int ceph_osd_op_mode_subop(int op)
-{
-	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
-}
-static inline int ceph_osd_op_mode_read(int op)
-{
-	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
-}
-static inline int ceph_osd_op_mode_modify(int op)
-{
-	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
-}
-
-/*
- * note that the following tmap stuff is also defined in the ceph librados.h
- * any modification here needs to be updated there
- */
-#define CEPH_OSD_TMAP_HDR 'h'
-#define CEPH_OSD_TMAP_SET 's'
-#define CEPH_OSD_TMAP_RM  'r'
-
-extern const char *ceph_osd_op_name(int op);
-
-
-/*
- * osd op flags
- *
- * An op may be READ, WRITE, or READ|WRITE.
- */
-enum {
-	CEPH_OSD_FLAG_ACK = 1,          /* want (or is) "ack" ack */
-	CEPH_OSD_FLAG_ONNVRAM = 2,      /* want (or is) "onnvram" ack */
-	CEPH_OSD_FLAG_ONDISK = 4,       /* want (or is) "ondisk" ack */
-	CEPH_OSD_FLAG_RETRY = 8,        /* resend attempt */
-	CEPH_OSD_FLAG_READ = 16,        /* op may read */
-	CEPH_OSD_FLAG_WRITE = 32,       /* op may write */
-	CEPH_OSD_FLAG_ORDERSNAP = 64,   /* EOLDSNAP if snapc is out of order */
-	CEPH_OSD_FLAG_PEERSTAT = 128,   /* msg includes osd_peer_stat */
-	CEPH_OSD_FLAG_BALANCE_READS = 256,
-	CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
-	CEPH_OSD_FLAG_PGOP = 1024,      /* pg op, no object */
-	CEPH_OSD_FLAG_EXEC = 2048,      /* op may exec */
-	CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
-};
-
-enum {
-	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
-};
-
-#define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
-#define EBLACKLISTED ESHUTDOWN /* blacklisted */
-
-/* xattr comparison */
-enum {
-	CEPH_OSD_CMPXATTR_OP_NOP = 0,
-	CEPH_OSD_CMPXATTR_OP_EQ  = 1,
-	CEPH_OSD_CMPXATTR_OP_NE  = 2,
-	CEPH_OSD_CMPXATTR_OP_GT  = 3,
-	CEPH_OSD_CMPXATTR_OP_GTE = 4,
-	CEPH_OSD_CMPXATTR_OP_LT  = 5,
-	CEPH_OSD_CMPXATTR_OP_LTE = 6
-};
-
-enum {
-	CEPH_OSD_CMPXATTR_MODE_STRING = 1,
-	CEPH_OSD_CMPXATTR_MODE_U64    = 2
-};
-
-/*
- * an individual object operation.  each may be accompanied by some data
- * payload
- */
-struct ceph_osd_op {
-	__le16 op;           /* CEPH_OSD_OP_* */
-	__le32 flags;        /* CEPH_OSD_FLAG_* */
-	union {
-		struct {
-			__le64 offset, length;
-			__le64 truncate_size;
-			__le32 truncate_seq;
-		} __attribute__ ((packed)) extent;
-		struct {
-			__le32 name_len;
-			__le32 value_len;
-			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
-			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
-		} __attribute__ ((packed)) xattr;
-		struct {
-			__u8 class_len;
-			__u8 method_len;
-			__u8 argc;
-			__le32 indata_len;
-		} __attribute__ ((packed)) cls;
-		struct {
-			__le64 cookie, count;
-		} __attribute__ ((packed)) pgls;
-	        struct {
-		        __le64 snapid;
-	        } __attribute__ ((packed)) snap;
-	};
-	__le32 payload_len;
-} __attribute__ ((packed));
-
-/*
- * osd request message header.  each request may include multiple
- * ceph_osd_op object operations.
- */
-struct ceph_osd_request_head {
-	__le32 client_inc;                 /* client incarnation */
-	struct ceph_object_layout layout;  /* pgid */
-	__le32 osdmap_epoch;               /* client's osdmap epoch */
-
-	__le32 flags;
-
-	struct ceph_timespec mtime;        /* for mutations only */
-	struct ceph_eversion reassert_version; /* if we are replaying op */
-
-	__le32 object_len;     /* length of object name */
-
-	__le64 snapid;         /* snapid to read */
-	__le64 snap_seq;       /* writer's snap context */
-	__le32 num_snaps;
-
-	__le16 num_ops;
-	struct ceph_osd_op ops[];  /* followed by ops[], obj, ticket, snaps */
-} __attribute__ ((packed));
-
-struct ceph_osd_reply_head {
-	__le32 client_inc;                /* client incarnation */
-	__le32 flags;
-	struct ceph_object_layout layout;
-	__le32 osdmap_epoch;
-	struct ceph_eversion reassert_version; /* for replaying uncommitted */
-
-	__le32 result;                    /* result code */
-
-	__le32 object_len;                /* length of object name */
-	__le32 num_ops;
-	struct ceph_osd_op ops[0];  /* ops[], object */
-} __attribute__ ((packed));
-
-
-#endif
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 190b6c4a6f2b..39c243acd062 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,10 +1,12 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/sort.h>
 #include <linux/slab.h>
 
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
 
 /*
  * Snapshots in ceph are driven in large part by cooperation from the
@@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 			    struct ceph_cap_snap *capsnap)
 {
 	struct inode *inode = &ci->vfs_inode;
-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 
 	BUG_ON(capsnap->writing);
 	capsnap->size = inode->i_size;
@@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 		      struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
-	struct super_block *sb = mdsc->client->sb;
+	struct super_block *sb = mdsc->fsc->sb;
 	int mds = session->s_mds;
 	u64 split;
 	int op;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
new file mode 100644
index 000000000000..cd5097d7c804
--- /dev/null
+++ b/fs/ceph/strings.c
@@ -0,0 +1,117 @@
+/*
+ * Ceph fs string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+
+const char *ceph_mds_state_name(int s)
+{
+	switch (s) {
+		/* down and out */
+	case CEPH_MDS_STATE_DNE:        return "down:dne";
+	case CEPH_MDS_STATE_STOPPED:    return "down:stopped";
+		/* up and out */
+	case CEPH_MDS_STATE_BOOT:       return "up:boot";
+	case CEPH_MDS_STATE_STANDBY:    return "up:standby";
+	case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
+	case CEPH_MDS_STATE_CREATING:   return "up:creating";
+	case CEPH_MDS_STATE_STARTING:   return "up:starting";
+		/* up and in */
+	case CEPH_MDS_STATE_REPLAY:     return "up:replay";
+	case CEPH_MDS_STATE_RESOLVE:    return "up:resolve";
+	case CEPH_MDS_STATE_RECONNECT:  return "up:reconnect";
+	case CEPH_MDS_STATE_REJOIN:     return "up:rejoin";
+	case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay";
+	case CEPH_MDS_STATE_ACTIVE:     return "up:active";
+	case CEPH_MDS_STATE_STOPPING:   return "up:stopping";
+	}
+	return "???";
+}
+
+const char *ceph_session_op_name(int op)
+{
+	switch (op) {
+	case CEPH_SESSION_REQUEST_OPEN: return "request_open";
+	case CEPH_SESSION_OPEN: return "open";
+	case CEPH_SESSION_REQUEST_CLOSE: return "request_close";
+	case CEPH_SESSION_CLOSE: return "close";
+	case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps";
+	case CEPH_SESSION_RENEWCAPS: return "renewcaps";
+	case CEPH_SESSION_STALE: return "stale";
+	case CEPH_SESSION_RECALL_STATE: return "recall_state";
+	}
+	return "???";
+}
+
+const char *ceph_mds_op_name(int op)
+{
+	switch (op) {
+	case CEPH_MDS_OP_LOOKUP:  return "lookup";
+	case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
+	case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
+	case CEPH_MDS_OP_GETATTR:  return "getattr";
+	case CEPH_MDS_OP_SETXATTR: return "setxattr";
+	case CEPH_MDS_OP_SETATTR: return "setattr";
+	case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+	case CEPH_MDS_OP_READDIR: return "readdir";
+	case CEPH_MDS_OP_MKNOD: return "mknod";
+	case CEPH_MDS_OP_LINK: return "link";
+	case CEPH_MDS_OP_UNLINK: return "unlink";
+	case CEPH_MDS_OP_RENAME: return "rename";
+	case CEPH_MDS_OP_MKDIR: return "mkdir";
+	case CEPH_MDS_OP_RMDIR: return "rmdir";
+	case CEPH_MDS_OP_SYMLINK: return "symlink";
+	case CEPH_MDS_OP_CREATE: return "create";
+	case CEPH_MDS_OP_OPEN: return "open";
+	case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap";
+	case CEPH_MDS_OP_LSSNAP: return "lssnap";
+	case CEPH_MDS_OP_MKSNAP: return "mksnap";
+	case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+	case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
+	case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
+	}
+	return "???";
+}
+
+const char *ceph_cap_op_name(int op)
+{
+	switch (op) {
+	case CEPH_CAP_OP_GRANT: return "grant";
+	case CEPH_CAP_OP_REVOKE: return "revoke";
+	case CEPH_CAP_OP_TRUNC: return "trunc";
+	case CEPH_CAP_OP_EXPORT: return "export";
+	case CEPH_CAP_OP_IMPORT: return "import";
+	case CEPH_CAP_OP_UPDATE: return "update";
+	case CEPH_CAP_OP_DROP: return "drop";
+	case CEPH_CAP_OP_FLUSH: return "flush";
+	case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack";
+	case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap";
+	case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack";
+	case CEPH_CAP_OP_RELEASE: return "release";
+	case CEPH_CAP_OP_RENEW: return "renew";
+	}
+	return "???";
+}
+
+const char *ceph_lease_op_name(int o)
+{
+	switch (o) {
+	case CEPH_MDS_LEASE_REVOKE: return "revoke";
+	case CEPH_MDS_LEASE_RELEASE: return "release";
+	case CEPH_MDS_LEASE_RENEW: return "renew";
+	case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack";
+	}
+	return "???";
+}
+
+const char *ceph_snap_op_name(int o)
+{
+	switch (o) {
+	case CEPH_SNAP_OP_UPDATE: return "update";
+	case CEPH_SNAP_OP_CREATE: return "create";
+	case CEPH_SNAP_OP_DESTROY: return "destroy";
+	case CEPH_SNAP_OP_SPLIT: return "split";
+	}
+	return "???";
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9922628532b2..d6e0e0421891 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1,5 +1,5 @@
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <linux/backing-dev.h>
 #include <linux/ctype.h>
@@ -15,10 +15,13 @@
 #include <linux/statfs.h>
 #include <linux/string.h>
 
-#include "decode.h"
 #include "super.h"
-#include "mon_client.h"
-#include "auth.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
 
 /*
  * Ceph superblock operations
@@ -26,36 +29,22 @@
  * Handle the basics of mounting, unmounting.
  */
 
-
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-
-
 /*
  * super ops
  */
 static void ceph_put_super(struct super_block *s)
 {
-	struct ceph_client *client = ceph_sb_to_client(s);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 
 	dout("put_super\n");
-	ceph_mdsc_close_sessions(&client->mdsc);
+	ceph_mdsc_close_sessions(fsc->mdsc);
 
 	/*
 	 * ensure we release the bdi before put_anon_super releases
 	 * the device name.
 	 */
-	if (s->s_bdi == &client->backing_dev_info) {
-		bdi_unregister(&client->backing_dev_info);
+	if (s->s_bdi == &fsc->backing_dev_info) {
+		bdi_unregister(&fsc->backing_dev_info);
 		s->s_bdi = NULL;
 	}
 
@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s)
 
 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
-	struct ceph_monmap *monmap = client->monc.monmap;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+	struct ceph_monmap *monmap = fsc->client->monc.monmap;
 	struct ceph_statfs st;
 	u64 fsid;
 	int err;
 
 	dout("statfs\n");
-	err = ceph_monc_do_statfs(&client->monc, &st);
+	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
 	if (err < 0)
 		return err;
 
@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int ceph_sync_fs(struct super_block *sb, int wait)
 {
-	struct ceph_client *client = ceph_sb_to_client(sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 
 	if (!wait) {
 		dout("sync_fs (non-blocking)\n");
-		ceph_flush_dirty_caps(&client->mdsc);
+		ceph_flush_dirty_caps(fsc->mdsc);
 		dout("sync_fs (non-blocking) done\n");
 		return 0;
 	}
 
 	dout("sync_fs (blocking)\n");
-	ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc);
-	ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc);
+	ceph_osdc_sync(&fsc->client->osdc);
+	ceph_mdsc_sync(fsc->mdsc);
 	dout("sync_fs (blocking) done\n");
 	return 0;
 }
 
-static int default_congestion_kb(void)
-{
-	int congestion_kb;
-
-	/*
-	 * Copied from NFS
-	 *
-	 * congestion size, scale with available memory.
-	 *
-	 *  64MB:    8192k
-	 * 128MB:   11585k
-	 * 256MB:   16384k
-	 * 512MB:   23170k
-	 *   1GB:   32768k
-	 *   2GB:   46340k
-	 *   4GB:   65536k
-	 *   8GB:   92681k
-	 *  16GB:  131072k
-	 *
-	 * This allows larger machines to have larger/more transfers.
-	 * Limit the default to 256M
-	 */
-	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-	if (congestion_kb > 256*1024)
-		congestion_kb = 256*1024;
-
-	return congestion_kb;
-}
-
-/**
- * ceph_show_options - Show mount options in /proc/mounts
- * @m: seq_file to write to
- * @mnt: mount descriptor
- */
-static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
-{
-	struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
-	struct ceph_mount_args *args = client->mount_args;
-
-	if (args->flags & CEPH_OPT_FSID)
-		seq_printf(m, ",fsid=%pU", &args->fsid);
-	if (args->flags & CEPH_OPT_NOSHARE)
-		seq_puts(m, ",noshare");
-	if (args->flags & CEPH_OPT_DIRSTAT)
-		seq_puts(m, ",dirstat");
-	if ((args->flags & CEPH_OPT_RBYTES) == 0)
-		seq_puts(m, ",norbytes");
-	if (args->flags & CEPH_OPT_NOCRC)
-		seq_puts(m, ",nocrc");
-	if (args->flags & CEPH_OPT_NOASYNCREADDIR)
-		seq_puts(m, ",noasyncreaddir");
-
-	if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-		seq_printf(m, ",mount_timeout=%d", args->mount_timeout);
-	if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-		seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl);
-	if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", args->osd_timeout);
-	if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-		seq_printf(m, ",osdkeepalivetimeout=%d",
-			 args->osd_keepalive_timeout);
-	if (args->wsize)
-		seq_printf(m, ",wsize=%d", args->wsize);
-	if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
-		seq_printf(m, ",rsize=%d", args->rsize);
-	if (args->congestion_kb != default_congestion_kb())
-		seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb);
-	if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_min=%d",
-			 args->caps_wanted_delay_min);
-	if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_max=%d",
-			   args->caps_wanted_delay_max);
-	if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
-		seq_printf(m, ",cap_release_safety=%d",
-			   args->cap_release_safety);
-	if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT)
-		seq_printf(m, ",readdir_max_entries=%d", args->max_readdir);
-	if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
-		seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes);
-	if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
-		seq_printf(m, ",snapdirname=%s", args->snapdir_name);
-	if (args->name)
-		seq_printf(m, ",name=%s", args->name);
-	if (args->secret)
-		seq_puts(m, ",secret=<hidden>");
-	return 0;
-}
-
-/*
- * caches
- */
-struct kmem_cache *ceph_inode_cachep;
-struct kmem_cache *ceph_cap_cachep;
-struct kmem_cache *ceph_dentry_cachep;
-struct kmem_cache *ceph_file_cachep;
-
-static void ceph_inode_init_once(void *foo)
-{
-	struct ceph_inode_info *ci = foo;
-	inode_init_once(&ci->vfs_inode);
-}
-
-static int __init init_caches(void)
-{
-	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
-				      sizeof(struct ceph_inode_info),
-				      __alignof__(struct ceph_inode_info),
-				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-				      ceph_inode_init_once);
-	if (ceph_inode_cachep == NULL)
-		return -ENOMEM;
-
-	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_cap_cachep == NULL)
-		goto bad_cap;
-
-	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
-					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_dentry_cachep == NULL)
-		goto bad_dentry;
-
-	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
-				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_file_cachep == NULL)
-		goto bad_file;
-
-	return 0;
-
-bad_file:
-	kmem_cache_destroy(ceph_dentry_cachep);
-bad_dentry:
-	kmem_cache_destroy(ceph_cap_cachep);
-bad_cap:
-	kmem_cache_destroy(ceph_inode_cachep);
-	return -ENOMEM;
-}
-
-static void destroy_caches(void)
-{
-	kmem_cache_destroy(ceph_inode_cachep);
-	kmem_cache_destroy(ceph_cap_cachep);
-	kmem_cache_destroy(ceph_dentry_cachep);
-	kmem_cache_destroy(ceph_file_cachep);
-}
-
-
-/*
- * ceph_umount_begin - initiate forced umount.  Tear down down the
- * mount, skipping steps that may hang while waiting for server(s).
- */
-static void ceph_umount_begin(struct super_block *sb)
-{
-	struct ceph_client *client = ceph_sb_to_client(sb);
-
-	dout("ceph_umount_begin - starting forced umount\n");
-	if (!client)
-		return;
-	client->mount_state = CEPH_MOUNT_SHUTDOWN;
-	return;
-}
-
-static const struct super_operations ceph_super_ops = {
-	.alloc_inode	= ceph_alloc_inode,
-	.destroy_inode	= ceph_destroy_inode,
-	.write_inode    = ceph_write_inode,
-	.sync_fs        = ceph_sync_fs,
-	.put_super	= ceph_put_super,
-	.show_options   = ceph_show_options,
-	.statfs		= ceph_statfs,
-	.umount_begin   = ceph_umount_begin,
-};
-
-
-const char *ceph_msg_type_name(int type)
-{
-	switch (type) {
-	case CEPH_MSG_SHUTDOWN: return "shutdown";
-	case CEPH_MSG_PING: return "ping";
-	case CEPH_MSG_AUTH: return "auth";
-	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
-	case CEPH_MSG_MON_MAP: return "mon_map";
-	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
-	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
-	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
-	case CEPH_MSG_STATFS: return "statfs";
-	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
-	case CEPH_MSG_MDS_MAP: return "mds_map";
-	case CEPH_MSG_CLIENT_SESSION: return "client_session";
-	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
-	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
-	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
-	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
-	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
-	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
-	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
-	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
-	case CEPH_MSG_OSD_MAP: return "osd_map";
-	case CEPH_MSG_OSD_OP: return "osd_op";
-	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
-	default: return "unknown";
-	}
-}
-
-
 /*
  * mount options
  */
 enum {
 	Opt_wsize,
 	Opt_rsize,
-	Opt_osdtimeout,
-	Opt_osdkeepalivetimeout,
-	Opt_mount_timeout,
-	Opt_osd_idle_ttl,
 	Opt_caps_wanted_delay_min,
 	Opt_caps_wanted_delay_max,
 	Opt_cap_release_safety,
@@ -344,29 +123,19 @@ enum {
 	Opt_congestion_kb,
 	Opt_last_int,
 	/* int args above */
-	Opt_fsid,
 	Opt_snapdirname,
-	Opt_name,
-	Opt_secret,
 	Opt_last_string,
 	/* string args above */
-	Opt_ip,
-	Opt_noshare,
 	Opt_dirstat,
 	Opt_nodirstat,
 	Opt_rbytes,
 	Opt_norbytes,
-	Opt_nocrc,
 	Opt_noasyncreaddir,
 };
 
-static match_table_t arg_tokens = {
+static match_table_t fsopt_tokens = {
 	{Opt_wsize, "wsize=%d"},
 	{Opt_rsize, "rsize=%d"},
-	{Opt_osdtimeout, "osdtimeout=%d"},
-	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
-	{Opt_mount_timeout, "mount_timeout=%d"},
-	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
 	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 	{Opt_cap_release_safety, "cap_release_safety=%d"},
@@ -374,403 +143,459 @@ static match_table_t arg_tokens = {
 	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 	{Opt_congestion_kb, "write_congestion_kb=%d"},
 	/* int args above */
-	{Opt_fsid, "fsid=%s"},
 	{Opt_snapdirname, "snapdirname=%s"},
-	{Opt_name, "name=%s"},
-	{Opt_secret, "secret=%s"},
 	/* string args above */
-	{Opt_ip, "ip=%s"},
-	{Opt_noshare, "noshare"},
 	{Opt_dirstat, "dirstat"},
 	{Opt_nodirstat, "nodirstat"},
 	{Opt_rbytes, "rbytes"},
 	{Opt_norbytes, "norbytes"},
-	{Opt_nocrc, "nocrc"},
 	{Opt_noasyncreaddir, "noasyncreaddir"},
 	{-1, NULL}
 };
 
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+static int parse_fsopt_token(char *c, void *private)
 {
-	int i = 0;
-	char tmp[3];
-	int err = -EINVAL;
-	int d;
-
-	dout("parse_fsid '%s'\n", str);
-	tmp[2] = 0;
-	while (*str && i < 16) {
-		if (ispunct(*str)) {
-			str++;
-			continue;
+	struct ceph_mount_options *fsopt = private;
+	substring_t argstr[MAX_OPT_ARGS];
+	int token, intval, ret;
+
+	token = match_token((char *)c, fsopt_tokens, argstr);
+	if (token < 0)
+		return -EINVAL;
+
+	if (token < Opt_last_int) {
+		ret = match_int(&argstr[0], &intval);
+		if (ret < 0) {
+			pr_err("bad mount option arg (not int) "
+			       "at '%s'\n", c);
+			return ret;
 		}
-		if (!isxdigit(str[0]) || !isxdigit(str[1]))
-			break;
-		tmp[0] = str[0];
-		tmp[1] = str[1];
-		if (sscanf(tmp, "%x", &d) < 1)
-			break;
-		fsid->fsid[i] = d & 0xff;
-		i++;
-		str += 2;
+		dout("got int token %d val %d\n", token, intval);
+	} else if (token > Opt_last_int && token < Opt_last_string) {
+		dout("got string token %d val %s\n", token,
+		     argstr[0].from);
+	} else {
+		dout("got token %d\n", token);
 	}
 
-	if (i == 16)
-		err = 0;
-	dout("parse_fsid ret %d got fsid %pU", err, fsid);
-	return err;
+	switch (token) {
+	case Opt_snapdirname:
+		kfree(fsopt->snapdir_name);
+		fsopt->snapdir_name = kstrndup(argstr[0].from,
+					       argstr[0].to-argstr[0].from,
+					       GFP_KERNEL);
+		if (!fsopt->snapdir_name)
+			return -ENOMEM;
+		break;
+
+		/* misc */
+	case Opt_wsize:
+		fsopt->wsize = intval;
+		break;
+	case Opt_rsize:
+		fsopt->rsize = intval;
+		break;
+	case Opt_caps_wanted_delay_min:
+		fsopt->caps_wanted_delay_min = intval;
+		break;
+	case Opt_caps_wanted_delay_max:
+		fsopt->caps_wanted_delay_max = intval;
+		break;
+	case Opt_readdir_max_entries:
+		fsopt->max_readdir = intval;
+		break;
+	case Opt_readdir_max_bytes:
+		fsopt->max_readdir_bytes = intval;
+		break;
+	case Opt_congestion_kb:
+		fsopt->congestion_kb = intval;
+		break;
+	case Opt_dirstat:
+		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
+		break;
+	case Opt_nodirstat:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
+		break;
+	case Opt_rbytes:
+		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
+		break;
+	case Opt_norbytes:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
+		break;
+	case Opt_noasyncreaddir:
+		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
+		break;
+	default:
+		BUG_ON(token);
+	}
+	return 0;
 }
 
-static struct ceph_mount_args *parse_mount_args(int flags, char *options,
-						const char *dev_name,
-						const char **path)
+static void destroy_mount_options(struct ceph_mount_options *args)
 {
-	struct ceph_mount_args *args;
-	const char *c;
-	int err = -ENOMEM;
-	substring_t argstr[MAX_OPT_ARGS];
+	dout("destroy_mount_options %p\n", args);
+	kfree(args->snapdir_name);
+	kfree(args);
+}
 
-	args = kzalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
-		return ERR_PTR(-ENOMEM);
-	args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr),
-				 GFP_KERNEL);
-	if (!args->mon_addr)
-		goto out;
+static int strcmp_null(const char *s1, const char *s2)
+{
+	if (!s1 && !s2)
+		return 0;
+	if (s1 && !s2)
+		return -1;
+	if (!s1 && s2)
+		return 1;
+	return strcmp(s1, s2);
+}
 
-	dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name);
-
-	/* start with defaults */
-	args->sb_flags = flags;
-	args->flags = CEPH_OPT_DEFAULT;
-	args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
-	args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
-	args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
-	args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
-	args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
-	args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
-	args->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
-	args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
-	args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
-	args->max_readdir = CEPH_MAX_READDIR_DEFAULT;
-	args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
-	args->congestion_kb = default_congestion_kb();
-
-	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
-	err = -EINVAL;
-	if (!dev_name)
-		goto out;
-	*path = strstr(dev_name, ":/");
-	if (*path == NULL) {
-		pr_err("device name is missing path (no :/ in %s)\n",
-		       dev_name);
-		goto out;
-	}
+static int compare_mount_options(struct ceph_mount_options *new_fsopt,
+				 struct ceph_options *new_opt,
+				 struct ceph_fs_client *fsc)
+{
+	struct ceph_mount_options *fsopt1 = new_fsopt;
+	struct ceph_mount_options *fsopt2 = fsc->mount_options;
+	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
+	int ret;
 
-	/* get mon ip(s) */
-	err = ceph_parse_ips(dev_name, *path, args->mon_addr,
-			     CEPH_MAX_MON, &args->num_mon);
-	if (err < 0)
-		goto out;
+	ret = memcmp(fsopt1, fsopt2, ofs);
+	if (ret)
+		return ret;
+
+	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
+	if (ret)
+		return ret;
+
+	return ceph_compare_options(new_opt, fsc->client);
+}
+
+static int parse_mount_options(struct ceph_mount_options **pfsopt,
+			       struct ceph_options **popt,
+			       int flags, char *options,
+			       const char *dev_name,
+			       const char **path)
+{
+	struct ceph_mount_options *fsopt;
+	const char *dev_name_end;
+	int err = -ENOMEM;
+
+	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
+	if (!fsopt)
+		return -ENOMEM;
+
+	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
+
+        fsopt->sb_flags = flags;
+        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+
+        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
+        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+        fsopt->congestion_kb = default_congestion_kb();
+	
+        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
+        err = -EINVAL;
+        if (!dev_name)
+                goto out;
+        *path = strstr(dev_name, ":/");
+        if (*path == NULL) {
+                pr_err("device name is missing path (no :/ in %s)\n",
+                       dev_name);
+                goto out;
+        }
+	dev_name_end = *path;
+	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 
 	/* path on server */
 	*path += 2;
 	dout("server path '%s'\n", *path);
 
-	/* parse mount options */
-	while ((c = strsep(&options, ",")) != NULL) {
-		int token, intval, ret;
-		if (!*c)
-			continue;
-		err = -EINVAL;
-		token = match_token((char *)c, arg_tokens, argstr);
-		if (token < 0) {
-			pr_err("bad mount option at '%s'\n", c);
-			goto out;
-		}
-		if (token < Opt_last_int) {
-			ret = match_int(&argstr[0], &intval);
-			if (ret < 0) {
-				pr_err("bad mount option arg (not int) "
-				       "at '%s'\n", c);
-				continue;
-			}
-			dout("got int token %d val %d\n", token, intval);
-		} else if (token > Opt_last_int && token < Opt_last_string) {
-			dout("got string token %d val %s\n", token,
-			     argstr[0].from);
-		} else {
-			dout("got token %d\n", token);
-		}
-		switch (token) {
-		case Opt_ip:
-			err = ceph_parse_ips(argstr[0].from,
-					     argstr[0].to,
-					     &args->my_addr,
-					     1, NULL);
-			if (err < 0)
-				goto out;
-			args->flags |= CEPH_OPT_MYIP;
-			break;
-
-		case Opt_fsid:
-			err = parse_fsid(argstr[0].from, &args->fsid);
-			if (err == 0)
-				args->flags |= CEPH_OPT_FSID;
-			break;
-		case Opt_snapdirname:
-			kfree(args->snapdir_name);
-			args->snapdir_name = kstrndup(argstr[0].from,
-					      argstr[0].to-argstr[0].from,
-					      GFP_KERNEL);
-			break;
-		case Opt_name:
-			args->name = kstrndup(argstr[0].from,
-					      argstr[0].to-argstr[0].from,
-					      GFP_KERNEL);
-			break;
-		case Opt_secret:
-			args->secret = kstrndup(argstr[0].from,
-						argstr[0].to-argstr[0].from,
-						GFP_KERNEL);
-			break;
-
-			/* misc */
-		case Opt_wsize:
-			args->wsize = intval;
-			break;
-		case Opt_rsize:
-			args->rsize = intval;
-			break;
-		case Opt_osdtimeout:
-			args->osd_timeout = intval;
-			break;
-		case Opt_osdkeepalivetimeout:
-			args->osd_keepalive_timeout = intval;
-			break;
-		case Opt_osd_idle_ttl:
-			args->osd_idle_ttl = intval;
-			break;
-		case Opt_mount_timeout:
-			args->mount_timeout = intval;
-			break;
-		case Opt_caps_wanted_delay_min:
-			args->caps_wanted_delay_min = intval;
-			break;
-		case Opt_caps_wanted_delay_max:
-			args->caps_wanted_delay_max = intval;
-			break;
-		case Opt_readdir_max_entries:
-			args->max_readdir = intval;
-			break;
-		case Opt_readdir_max_bytes:
-			args->max_readdir_bytes = intval;
-			break;
-		case Opt_congestion_kb:
-			args->congestion_kb = intval;
-			break;
-
-		case Opt_noshare:
-			args->flags |= CEPH_OPT_NOSHARE;
-			break;
-
-		case Opt_dirstat:
-			args->flags |= CEPH_OPT_DIRSTAT;
-			break;
-		case Opt_nodirstat:
-			args->flags &= ~CEPH_OPT_DIRSTAT;
-			break;
-		case Opt_rbytes:
-			args->flags |= CEPH_OPT_RBYTES;
-			break;
-		case Opt_norbytes:
-			args->flags &= ~CEPH_OPT_RBYTES;
-			break;
-		case Opt_nocrc:
-			args->flags |= CEPH_OPT_NOCRC;
-			break;
-		case Opt_noasyncreaddir:
-			args->flags |= CEPH_OPT_NOASYNCREADDIR;
-			break;
-
-		default:
-			BUG_ON(token);
-		}
-	}
-	return args;
+	err = ceph_parse_options(popt, options, dev_name, dev_name_end,
+				 parse_fsopt_token, (void *)fsopt);
+	if (err)
+		goto out;
+
+	/* success */
+	*pfsopt = fsopt;
+	return 0;
 
 out:
-	kfree(args->mon_addr);
-	kfree(args);
-	return ERR_PTR(err);
+	destroy_mount_options(fsopt);
+	return err;
 }
 
-static void destroy_mount_args(struct ceph_mount_args *args)
+/**
+ * ceph_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ */
+static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
 {
-	dout("destroy_mount_args %p\n", args);
-	kfree(args->snapdir_name);
-	args->snapdir_name = NULL;
-	kfree(args->name);
-	args->name = NULL;
-	kfree(args->secret);
-	args->secret = NULL;
-	kfree(args);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
+	struct ceph_mount_options *fsopt = fsc->mount_options;
+	struct ceph_options *opt = fsc->client->options;
+
+	if (opt->flags & CEPH_OPT_FSID)
+		seq_printf(m, ",fsid=%pU", &opt->fsid);
+	if (opt->flags & CEPH_OPT_NOSHARE)
+		seq_puts(m, ",noshare");
+	if (opt->flags & CEPH_OPT_NOCRC)
+		seq_puts(m, ",nocrc");
+
+	if (opt->name)
+		seq_printf(m, ",name=%s", opt->name);
+	if (opt->secret)
+		seq_puts(m, ",secret=<hidden>");
+
+	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
+	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
+	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
+		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
+	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+		seq_printf(m, ",osdkeepalivetimeout=%d",
+			   opt->osd_keepalive_timeout);
+
+	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
+		seq_puts(m, ",dirstat");
+	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
+		seq_puts(m, ",norbytes");
+	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
+		seq_puts(m, ",noasyncreaddir");
+
+	if (fsopt->wsize)
+		seq_printf(m, ",wsize=%d", fsopt->wsize);
+	if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+		seq_printf(m, ",rsize=%d", fsopt->rsize);
+	if (fsopt->congestion_kb != default_congestion_kb())
+		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_min=%d",
+			 fsopt->caps_wanted_delay_min);
+	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_max=%d",
+			   fsopt->caps_wanted_delay_max);
+	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+		seq_printf(m, ",cap_release_safety=%d",
+			   fsopt->cap_release_safety);
+	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
+	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
+	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
+		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+	return 0;
 }
 
 /*
- * create a fresh client instance
+ * handle any mon messages the standard library doesn't understand.
+ * return error if we don't either.
  */
-static struct ceph_client *ceph_create_client(struct ceph_mount_args *args)
+static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 {
-	struct ceph_client *client;
+	struct ceph_fs_client *fsc = client->private;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	switch (type) {
+	case CEPH_MSG_MDS_MAP:
+		ceph_mdsc_handle_map(fsc->mdsc, msg);
+		return 0;
+
+	default:
+		return -1;
+	}
+}
+
+/*
+ * create a new fs client
+ */
+struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+					struct ceph_options *opt)
+{
+	struct ceph_fs_client *fsc;
 	int err = -ENOMEM;
 
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (client == NULL)
+	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
+	if (!fsc)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_init(&client->mount_mutex);
-
-	init_waitqueue_head(&client->auth_wq);
+	fsc->client = ceph_create_client(opt, fsc);
+	if (IS_ERR(fsc->client)) {
+		err = PTR_ERR(fsc->client);
+		goto fail;
+	}
+	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
+	fsc->client->supported_features |= CEPH_FEATURE_FLOCK;
+	fsc->client->monc.want_mdsmap = 1;
 
-	client->sb = NULL;
-	client->mount_state = CEPH_MOUNT_MOUNTING;
-	client->mount_args = args;
+	fsc->mount_options = fsopt;
 
-	client->msgr = NULL;
+	fsc->sb = NULL;
+	fsc->mount_state = CEPH_MOUNT_MOUNTING;
 
-	client->auth_err = 0;
-	atomic_long_set(&client->writeback_count, 0);
+	atomic_long_set(&fsc->writeback_count, 0);
 
-	err = bdi_init(&client->backing_dev_info);
+	err = bdi_init(&fsc->backing_dev_info);
 	if (err < 0)
-		goto fail;
+		goto fail_client;
 
 	err = -ENOMEM;
-	client->wb_wq = create_workqueue("ceph-writeback");
-	if (client->wb_wq == NULL)
+	fsc->wb_wq = create_workqueue("ceph-writeback");
+	if (fsc->wb_wq == NULL)
 		goto fail_bdi;
-	client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
-	if (client->pg_inv_wq == NULL)
+	fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
+	if (fsc->pg_inv_wq == NULL)
 		goto fail_wb_wq;
-	client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
-	if (client->trunc_wq == NULL)
+	fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc");
+	if (fsc->trunc_wq == NULL)
 		goto fail_pg_inv_wq;
 
 	/* set up mempools */
 	err = -ENOMEM;
-	client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
-			      client->mount_args->wsize >> PAGE_CACHE_SHIFT);
-	if (!client->wb_pagevec_pool)
+	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
+			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
+	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
 	/* caps */
-	client->min_caps = args->max_readdir;
+	fsc->min_caps = fsopt->max_readdir;
+
+	return fsc;
 
-	/* subsystems */
-	err = ceph_monc_init(&client->monc, client);
-	if (err < 0)
-		goto fail_mempool;
-	err = ceph_osdc_init(&client->osdc, client);
-	if (err < 0)
-		goto fail_monc;
-	err = ceph_mdsc_init(&client->mdsc, client);
-	if (err < 0)
-		goto fail_osdc;
-	return client;
-
-fail_osdc:
-	ceph_osdc_stop(&client->osdc);
-fail_monc:
-	ceph_monc_stop(&client->monc);
-fail_mempool:
-	mempool_destroy(client->wb_pagevec_pool);
 fail_trunc_wq:
-	destroy_workqueue(client->trunc_wq);
+	destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
-	destroy_workqueue(client->pg_inv_wq);
+	destroy_workqueue(fsc->pg_inv_wq);
 fail_wb_wq:
-	destroy_workqueue(client->wb_wq);
+	destroy_workqueue(fsc->wb_wq);
 fail_bdi:
-	bdi_destroy(&client->backing_dev_info);
+	bdi_destroy(&fsc->backing_dev_info);
+fail_client:
+	ceph_destroy_client(fsc->client);
 fail:
-	kfree(client);
+	kfree(fsc);
 	return ERR_PTR(err);
 }
 
-static void ceph_destroy_client(struct ceph_client *client)
+void destroy_fs_client(struct ceph_fs_client *fsc)
 {
-	dout("destroy_client %p\n", client);
+	dout("destroy_fs_client %p\n", fsc);
 
-	/* unmount */
-	ceph_mdsc_stop(&client->mdsc);
-	ceph_osdc_stop(&client->osdc);
+	destroy_workqueue(fsc->wb_wq);
+	destroy_workqueue(fsc->pg_inv_wq);
+	destroy_workqueue(fsc->trunc_wq);
 
-	/*
-	 * make sure mds and osd connections close out before destroying
-	 * the auth module, which is needed to free those connections'
-	 * ceph_authorizers.
-	 */
-	ceph_msgr_flush();
-
-	ceph_monc_stop(&client->monc);
+	bdi_destroy(&fsc->backing_dev_info);
 
-	ceph_debugfs_client_cleanup(client);
-	destroy_workqueue(client->wb_wq);
-	destroy_workqueue(client->pg_inv_wq);
-	destroy_workqueue(client->trunc_wq);
+	mempool_destroy(fsc->wb_pagevec_pool);
 
-	bdi_destroy(&client->backing_dev_info);
+	destroy_mount_options(fsc->mount_options);
 
-	if (client->msgr)
-		ceph_messenger_destroy(client->msgr);
-	mempool_destroy(client->wb_pagevec_pool);
+	ceph_fs_debugfs_cleanup(fsc);
 
-	destroy_mount_args(client->mount_args);
+	ceph_destroy_client(fsc->client);
 
-	kfree(client);
-	dout("destroy_client %p done\n", client);
+	kfree(fsc);
+	dout("destroy_fs_client %p done\n", fsc);
 }
 
 /*
- * Initially learn our fsid, or verify an fsid matches.
+ * caches
  */
-int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+struct kmem_cache *ceph_inode_cachep;
+struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_dentry_cachep;
+struct kmem_cache *ceph_file_cachep;
+
+static void ceph_inode_init_once(void *foo)
 {
-	if (client->have_fsid) {
-		if (ceph_fsid_compare(&client->fsid, fsid)) {
-			pr_err("bad fsid, had %pU got %pU",
-			       &client->fsid, fsid);
-			return -1;
-		}
-	} else {
-		pr_info("client%lld fsid %pU\n", client->monc.auth->global_id,
-			fsid);
-		memcpy(&client->fsid, fsid, sizeof(*fsid));
-		ceph_debugfs_client_init(client);
-		client->have_fsid = true;
-	}
+	struct ceph_inode_info *ci = foo;
+	inode_init_once(&ci->vfs_inode);
+}
+
+static int __init init_caches(void)
+{
+	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
+				      sizeof(struct ceph_inode_info),
+				      __alignof__(struct ceph_inode_info),
+				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+				      ceph_inode_init_once);
+	if (ceph_inode_cachep == NULL)
+		return -ENOMEM;
+
+	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
+				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_cap_cachep == NULL)
+		goto bad_cap;
+
+	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
+					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_dentry_cachep == NULL)
+		goto bad_dentry;
+
+	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
+				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_file_cachep == NULL)
+		goto bad_file;
+
 	return 0;
+
+bad_file:
+	kmem_cache_destroy(ceph_dentry_cachep);
+bad_dentry:
+	kmem_cache_destroy(ceph_cap_cachep);
+bad_cap:
+	kmem_cache_destroy(ceph_inode_cachep);
+	return -ENOMEM;
 }
 
+static void destroy_caches(void)
+{
+	kmem_cache_destroy(ceph_inode_cachep);
+	kmem_cache_destroy(ceph_cap_cachep);
+	kmem_cache_destroy(ceph_dentry_cachep);
+	kmem_cache_destroy(ceph_file_cachep);
+}
+
+
 /*
- * true if we have the mon map (and have thus joined the cluster)
+ * ceph_umount_begin - initiate forced umount.  Tear down down the
+ * mount, skipping steps that may hang while waiting for server(s).
  */
-static int have_mon_and_osd_map(struct ceph_client *client)
+static void ceph_umount_begin(struct super_block *sb)
 {
-	return client->monc.monmap && client->monc.monmap->epoch &&
-	       client->osdc.osdmap && client->osdc.osdmap->epoch;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+	dout("ceph_umount_begin - starting forced umount\n");
+	if (!fsc)
+		return;
+	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+	return;
 }
 
+static const struct super_operations ceph_super_ops = {
+	.alloc_inode	= ceph_alloc_inode,
+	.destroy_inode	= ceph_destroy_inode,
+	.write_inode    = ceph_write_inode,
+	.sync_fs        = ceph_sync_fs,
+	.put_super	= ceph_put_super,
+	.show_options   = ceph_show_options,
+	.statfs		= ceph_statfs,
+	.umount_begin   = ceph_umount_begin,
+};
+
 /*
  * Bootstrap mount by opening the root directory.  Note the mount
  * @started time from caller, and time out if this takes too long.
  */
-static struct dentry *open_root_dentry(struct ceph_client *client,
+static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 				       const char *path,
 				       unsigned long started)
 {
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req = NULL;
 	int err;
 	struct dentry *root;
@@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
 	req->r_ino1.ino = CEPH_INO_ROOT;
 	req->r_ino1.snap = CEPH_NOSNAP;
 	req->r_started = started;
-	req->r_timeout = client->mount_args->mount_timeout * HZ;
+	req->r_timeout = fsc->client->options->mount_timeout * HZ;
 	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 	req->r_num_caps = 2;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	if (err == 0) {
 		dout("open_root_inode success\n");
 		if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
-		    client->sb->s_root == NULL)
+		    fsc->sb->s_root == NULL)
 			root = d_alloc_root(req->r_target_inode);
 		else
 			root = d_obtain_alias(req->r_target_inode);
@@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
 	return root;
 }
 
+
+
+
 /*
  * mount: join the ceph cluster, and open root directory.
  */
-static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
+static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
 		      const char *path)
 {
-	struct ceph_entity_addr *myaddr = NULL;
 	int err;
-	unsigned long timeout = client->mount_args->mount_timeout * HZ;
 	unsigned long started = jiffies;  /* note the start time */
 	struct dentry *root;
+	int first = 0;   /* first vfsmount for this super_block */
 
 	dout("mount start\n");
-	mutex_lock(&client->mount_mutex);
-
-	/* initialize the messenger */
-	if (client->msgr == NULL) {
-		if (ceph_test_opt(client, MYIP))
-			myaddr = &client->mount_args->my_addr;
-		client->msgr = ceph_messenger_create(myaddr);
-		if (IS_ERR(client->msgr)) {
-			err = PTR_ERR(client->msgr);
-			client->msgr = NULL;
-			goto out;
-		}
-		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
-	}
+	mutex_lock(&fsc->client->mount_mutex);
 
-	/* open session, and wait for mon, mds, and osd maps */
-	err = ceph_monc_open_session(&client->monc);
+	err = __ceph_open_session(fsc->client, started);
 	if (err < 0)
 		goto out;
 
-	while (!have_mon_and_osd_map(client)) {
-		err = -EIO;
-		if (timeout && time_after_eq(jiffies, started + timeout))
-			goto out;
-
-		/* wait */
-		dout("mount waiting for mon_map\n");
-		err = wait_event_interruptible_timeout(client->auth_wq,
-		       have_mon_and_osd_map(client) || (client->auth_err < 0),
-		       timeout);
-		if (err == -EINTR || err == -ERESTARTSYS)
-			goto out;
-		if (client->auth_err < 0) {
-			err = client->auth_err;
-			goto out;
-		}
-	}
-
 	dout("mount opening root\n");
-	root = open_root_dentry(client, "", started);
+	root = open_root_dentry(fsc, "", started);
 	if (IS_ERR(root)) {
 		err = PTR_ERR(root);
 		goto out;
 	}
-	if (client->sb->s_root)
+	if (fsc->sb->s_root) {
 		dput(root);
-	else
-		client->sb->s_root = root;
+	} else {
+		fsc->sb->s_root = root;
+		first = 1;
+
+		err = ceph_fs_debugfs_init(fsc);
+		if (err < 0)
+			goto fail;
+	}
 
 	if (path[0] == 0) {
 		dget(root);
 	} else {
 		dout("mount opening base mountpoint\n");
-		root = open_root_dentry(client, path, started);
+		root = open_root_dentry(fsc, path, started);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
-			dput(client->sb->s_root);
-			client->sb->s_root = NULL;
-			goto out;
+			goto fail;
 		}
 	}
 
 	mnt->mnt_root = root;
-	mnt->mnt_sb = client->sb;
+	mnt->mnt_sb = fsc->sb;
 
-	client->mount_state = CEPH_MOUNT_MOUNTED;
+	fsc->mount_state = CEPH_MOUNT_MOUNTED;
 	dout("mount success\n");
 	err = 0;
 
 out:
-	mutex_unlock(&client->mount_mutex);
+	mutex_unlock(&fsc->client->mount_mutex);
 	return err;
+
+fail:
+	if (first) {
+		dput(fsc->sb->s_root);
+		fsc->sb->s_root = NULL;
+	}
+	goto out;
 }
 
 static int ceph_set_super(struct super_block *s, void *data)
 {
-	struct ceph_client *client = data;
+	struct ceph_fs_client *fsc = data;
 	int ret;
 
 	dout("set_super %p data %p\n", s, data);
 
-	s->s_flags = client->mount_args->sb_flags;
+	s->s_flags = fsc->mount_options->sb_flags;
 	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 
-	s->s_fs_info = client;
-	client->sb = s;
+	s->s_fs_info = fsc;
+	fsc->sb = s;
 
 	s->s_op = &ceph_super_ops;
 	s->s_export_op = &ceph_export_ops;
@@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data)
 
 fail:
 	s->s_fs_info = NULL;
-	client->sb = NULL;
+	fsc->sb = NULL;
 	return ret;
 }
 
@@ -926,30 +732,23 @@ fail:
  */
 static int ceph_compare_super(struct super_block *sb, void *data)
 {
-	struct ceph_client *new = data;
-	struct ceph_mount_args *args = new->mount_args;
-	struct ceph_client *other = ceph_sb_to_client(sb);
-	int i;
+	struct ceph_fs_client *new = data;
+	struct ceph_mount_options *fsopt = new->mount_options;
+	struct ceph_options *opt = new->client->options;
+	struct ceph_fs_client *other = ceph_sb_to_client(sb);
 
 	dout("ceph_compare_super %p\n", sb);
-	if (args->flags & CEPH_OPT_FSID) {
-		if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
-			dout("fsid doesn't match\n");
-			return 0;
-		}
-	} else {
-		/* do we share (a) monitor? */
-		for (i = 0; i < new->monc.monmap->num_mon; i++)
-			if (ceph_monmap_contains(other->monc.monmap,
-					 &new->monc.monmap->mon_inst[i].addr))
-				break;
-		if (i == new->monc.monmap->num_mon) {
-			dout("mon ip not part of monmap\n");
-			return 0;
-		}
-		dout("mon ip matches existing sb %p\n", sb);
+
+	if (compare_mount_options(fsopt, opt, other)) {
+		dout("monitor(s)/mount options don't match\n");
+		return 0;
 	}
-	if (args->sb_flags != other->mount_args->sb_flags) {
+	if ((opt->flags & CEPH_OPT_FSID) &&
+	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
+		dout("fsid doesn't match\n");
+		return 0;
+	}
+	if (fsopt->sb_flags != other->mount_options->sb_flags) {
 		dout("flags differ\n");
 		return 0;
 	}
@@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data)
  */
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
+static int ceph_register_bdi(struct super_block *sb,
+			     struct ceph_fs_client *fsc)
 {
 	int err;
 
 	/* set ra_pages based on rsize mount option? */
-	if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
-		client->backing_dev_info.ra_pages =
-			(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
+	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+		fsc->backing_dev_info.ra_pages =
+			(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
 			>> PAGE_SHIFT;
-	err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d",
+	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
-		sb->s_bdi = &client->backing_dev_info;
+		sb->s_bdi = &fsc->backing_dev_info;
 	return err;
 }
 
@@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 		       struct vfsmount *mnt)
 {
 	struct super_block *sb;
-	struct ceph_client *client;
+	struct ceph_fs_client *fsc;
 	int err;
 	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 	const char *path = NULL;
-	struct ceph_mount_args *args;
+	struct ceph_mount_options *fsopt = NULL;
+	struct ceph_options *opt = NULL;
 
 	dout("ceph_get_sb\n");
-	args = parse_mount_args(flags, data, dev_name, &path);
-	if (IS_ERR(args)) {
-		err = PTR_ERR(args);
+	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
+	if (err < 0)
 		goto out_final;
-	}
 
 	/* create client (which we may/may not use) */
-	client = ceph_create_client(args);
-	if (IS_ERR(client)) {
-		err = PTR_ERR(client);
+	fsc = create_fs_client(fsopt, opt);
+	if (IS_ERR(fsc)) {
+		err = PTR_ERR(fsc);
+		kfree(fsopt);
+		kfree(opt);
 		goto out_final;
 	}
 
-	if (client->mount_args->flags & CEPH_OPT_NOSHARE)
+	err = ceph_mdsc_init(fsc);
+	if (err < 0)
+		goto out;
+
+	if (ceph_test_opt(fsc->client, NOSHARE))
 		compare_super = NULL;
-	sb = sget(fs_type, compare_super, ceph_set_super, client);
+	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
 	if (IS_ERR(sb)) {
 		err = PTR_ERR(sb);
 		goto out;
 	}
 
-	if (ceph_sb_to_client(sb) != client) {
-		ceph_destroy_client(client);
-		client = ceph_sb_to_client(sb);
-		dout("get_sb got existing client %p\n", client);
+	if (ceph_sb_to_client(sb) != fsc) {
+		ceph_mdsc_destroy(fsc);
+		destroy_fs_client(fsc);
+		fsc = ceph_sb_to_client(sb);
+		dout("get_sb got existing client %p\n", fsc);
 	} else {
-		dout("get_sb using new client %p\n", client);
-		err = ceph_register_bdi(sb, client);
+		dout("get_sb using new client %p\n", fsc);
+		err = ceph_register_bdi(sb, fsc);
 		if (err < 0)
 			goto out_splat;
 	}
 
-	err = ceph_mount(client, mnt, path);
+	err = ceph_mount(fsc, mnt, path);
 	if (err < 0)
 		goto out_splat;
 	dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
@@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 	return 0;
 
 out_splat:
-	ceph_mdsc_close_sessions(&client->mdsc);
+	ceph_mdsc_close_sessions(fsc->mdsc);
 	deactivate_locked_super(sb);
 	goto out_final;
 
 out:
-	ceph_destroy_client(client);
+	ceph_mdsc_destroy(fsc);
+	destroy_fs_client(fsc);
 out_final:
 	dout("ceph_get_sb fail %d\n", err);
 	return err;
@@ -1042,11 +849,12 @@ out_final:
 
 static void ceph_kill_sb(struct super_block *s)
 {
-	struct ceph_client *client = ceph_sb_to_client(s);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 	dout("kill_sb %p\n", s);
-	ceph_mdsc_pre_umount(&client->mdsc);
+	ceph_mdsc_pre_umount(fsc->mdsc);
 	kill_anon_super(s);    /* will call put_super after sb is r/o */
-	ceph_destroy_client(client);
+	ceph_mdsc_destroy(fsc);
+	destroy_fs_client(fsc);
 }
 
 static struct file_system_type ceph_fs_type = {
@@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = {
 
 static int __init init_ceph(void)
 {
-	int ret = 0;
-
-	ret = ceph_debugfs_init();
-	if (ret < 0)
-		goto out;
-
-	ret = ceph_msgr_init();
-	if (ret < 0)
-		goto out_debugfs;
-
-	ret = init_caches();
+	int ret = init_caches();
 	if (ret)
-		goto out_msgr;
+		goto out;
 
 	ret = register_filesystem(&ceph_fs_type);
 	if (ret)
 		goto out_icache;
 
-	pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
-		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
+
 	return 0;
 
 out_icache:
 	destroy_caches();
-out_msgr:
-	ceph_msgr_exit();
-out_debugfs:
-	ceph_debugfs_cleanup();
 out:
 	return ret;
 }
@@ -1101,8 +893,6 @@ static void __exit exit_ceph(void)
 	dout("exit_ceph\n");
 	unregister_filesystem(&ceph_fs_type);
 	destroy_caches();
-	ceph_msgr_exit();
-	ceph_debugfs_cleanup();
 }
 
 module_init(init_ceph);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b87638e84c4b..e2e904442ce2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1,7 +1,7 @@
 #ifndef _FS_CEPH_SUPER_H
 #define _FS_CEPH_SUPER_H
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
@@ -14,13 +14,7 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "msgpool.h"
-#include "mon_client.h"
-#include "mds_client.h"
-#include "osd_client.h"
-#include "ceph_fs.h"
+#include <linux/ceph/libceph.h>
 
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400
@@ -30,42 +24,25 @@
 #define CEPH_BLOCK_SHIFT   20  /* 1 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
-/*
- * Supported features
- */
-#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
-#define CEPH_FEATURE_REQUIRED  CEPH_FEATURE_NOSRCADDR
+#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
+#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
+#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
 
-/*
- * mount options
- */
-#define CEPH_OPT_FSID             (1<<0)
-#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
-#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
-#define CEPH_OPT_DIRSTAT          (1<<4) /* funky `cat dirname` for stats */
-#define CEPH_OPT_RBYTES           (1<<5) /* dir st_bytes = rbytes */
-#define CEPH_OPT_NOCRC            (1<<6) /* no data crc on writes */
-#define CEPH_OPT_NOASYNCREADDIR   (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
 
-#define CEPH_OPT_DEFAULT   (CEPH_OPT_RBYTES)
+#define ceph_set_mount_opt(fsc, opt) \
+	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
+#define ceph_test_mount_opt(fsc, opt) \
+	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
 
-#define ceph_set_opt(client, opt) \
-	(client)->mount_args->flags |= CEPH_OPT_##opt;
-#define ceph_test_opt(client, opt) \
-	(!!((client)->mount_args->flags & CEPH_OPT_##opt))
+#define CEPH_MAX_READDIR_DEFAULT        1024
+#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
+#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
 
-
-struct ceph_mount_args {
-	int sb_flags;
+struct ceph_mount_options {
 	int flags;
-	struct ceph_fsid fsid;
-	struct ceph_entity_addr my_addr;
-	int num_mon;
-	struct ceph_entity_addr *mon_addr;
-	int mount_timeout;
-	int osd_idle_ttl;
-	int osd_timeout;
-	int osd_keepalive_timeout;
+	int sb_flags;
+
 	int wsize;
 	int rsize;            /* max readahead */
 	int congestion_kb;    /* max writeback in flight */
@@ -73,82 +50,25 @@ struct ceph_mount_args {
 	int cap_release_safety;
 	int max_readdir;       /* max readdir result (entires) */
 	int max_readdir_bytes; /* max readdir result (bytes) */
-	char *snapdir_name;   /* default ".snap" */
-	char *name;
-	char *secret;
-};
-
-/*
- * defaults
- */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
-#define CEPH_OSD_KEEPALIVE_DEFAULT  5
-#define CEPH_OSD_IDLE_TTL_DEFAULT    60
-#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
-#define CEPH_MAX_READDIR_DEFAULT    1024
-#define CEPH_MAX_READDIR_BYTES_DEFAULT    (512*1024)
-
-#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
-#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
-
-#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
-#define CEPH_AUTH_NAME_DEFAULT   "guest"
-/*
- * Delay telling the MDS we no longer want caps, in case we reopen
- * the file.  Delay a minimum amount of time, even if we send a cap
- * message for some other reason.  Otherwise, take the oppotunity to
- * update the mds to avoid sending another message later.
- */
-#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
-#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
-
-#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
-
-/* mount state */
-enum {
-	CEPH_MOUNT_MOUNTING,
-	CEPH_MOUNT_MOUNTED,
-	CEPH_MOUNT_UNMOUNTING,
-	CEPH_MOUNT_UNMOUNTED,
-	CEPH_MOUNT_SHUTDOWN,
-};
 
-/*
- * subtract jiffies
- */
-static inline unsigned long time_sub(unsigned long a, unsigned long b)
-{
-	BUG_ON(time_after(b, a));
-	return (long)a - (long)b;
-}
-
-/*
- * per-filesystem client state
- *
- * possibly shared by multiple mount points, if they are
- * mounting the same ceph filesystem/cluster.
- */
-struct ceph_client {
-	struct ceph_fsid fsid;
-	bool have_fsid;
+	/*
+	 * everything above this point can be memcmp'd; everything below
+	 * is handled in compare_mount_options()
+	 */
 
-	struct mutex mount_mutex;       /* serialize mount attempts */
-	struct ceph_mount_args *mount_args;
+	char *snapdir_name;   /* default ".snap" */
+};
 
+struct ceph_fs_client {
 	struct super_block *sb;
 
-	unsigned long mount_state;
-	wait_queue_head_t auth_wq;
-
-	int auth_err;
+	struct ceph_mount_options *mount_options;
+	struct ceph_client *client;
 
+	unsigned long mount_state;
 	int min_caps;                  /* min caps i added */
 
-	struct ceph_messenger *msgr;   /* messenger instance */
-	struct ceph_mon_client monc;
-	struct ceph_mds_client mdsc;
-	struct ceph_osd_client osdc;
+	struct ceph_mds_client *mdsc;
 
 	/* writeback */
 	mempool_t *wb_pagevec_pool;
@@ -160,14 +80,14 @@ struct ceph_client {
 	struct backing_dev_info backing_dev_info;
 
 #ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_monmap;
-	struct dentry *debugfs_mdsmap, *debugfs_osdmap;
-	struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps;
+	struct dentry *debugfs_dentry_lru, *debugfs_caps;
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
+	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
 };
 
+
 /*
  * File i/o capability.  This tracks shared state with the metadata
  * server that allows us to cache or writeback attributes or to read
@@ -275,6 +195,20 @@ struct ceph_inode_xattr {
 	int should_free_val;
 };
 
+/*
+ * Ceph dentry state
+ */
+struct ceph_dentry_info {
+	struct ceph_mds_session *lease_session;
+	u32 lease_gen, lease_shared_gen;
+	u32 lease_seq;
+	unsigned long lease_renew_after, lease_renew_from;
+	struct list_head lru;
+	struct dentry *dentry;
+	u64 time;
+	u64 offset;
+};
+
 struct ceph_inode_xattrs_info {
 	/*
 	 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info {
 /*
  * Ceph inode.
  */
-#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
-
 struct ceph_inode_info {
 	struct ceph_vino i_vino;   /* ceph ino + snap */
 
@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
 	return container_of(inode, struct ceph_inode_info, vfs_inode);
 }
 
+static inline struct ceph_vino ceph_vino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino;
+}
+
+/*
+ * ino_t is <64 bits on many architectures, blech.
+ *
+ * don't include snap in ino hash, at least for now.
+ */
+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+{
+	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
+#if BITS_PER_LONG == 32
+	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
+	if (!ino)
+		ino = 1;
+#endif
+	return ino;
+}
+
+/* for printf-style formatting */
+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
+
+static inline u64 ceph_ino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.ino;
+}
+static inline u64 ceph_snap(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.snap;
+}
+
+static inline int ceph_ino_compare(struct inode *inode, void *data)
+{
+	struct ceph_vino *pvino = (struct ceph_vino *)data;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return ci->i_vino.ino == pvino->ino &&
+		ci->i_vino.snap == pvino->snap;
+}
+
+static inline struct inode *ceph_find_inode(struct super_block *sb,
+					    struct ceph_vino vino)
+{
+	ino_t t = ceph_vino_to_ino(vino);
+	return ilookup5(sb, t, ceph_ino_compare, &vino);
+}
+
+
+/*
+ * Ceph inode.
+ */
+#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
+#define CEPH_I_NODELAY   4  /* do not delay cap release */
+#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+
 static inline void ceph_i_clear(struct inode *inode, unsigned mask)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -432,20 +418,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 			    struct ceph_inode_frag *pfrag,
 			    int *found);
 
-/*
- * Ceph dentry state
- */
-struct ceph_dentry_info {
-	struct ceph_mds_session *lease_session;
-	u32 lease_gen, lease_shared_gen;
-	u32 lease_seq;
-	unsigned long lease_renew_after, lease_renew_from;
-	struct list_head lru;
-	struct dentry *dentry;
-	u64 time;
-	u64 offset;
-};
-
 static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
 {
 	return (struct ceph_dentry_info *)dentry->d_fsdata;
@@ -456,22 +428,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 	return ((loff_t)frag << 32) | (loff_t)off;
 }
 
-/*
- * ino_t is <64 bits on many architectures, blech.
- *
- * don't include snap in ino hash, at least for now.
- */
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
-{
-	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
-#if BITS_PER_LONG == 32
-	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
-	if (!ino)
-		ino = 1;
-#endif
-	return ino;
-}
-
 static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 {
 	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
@@ -479,39 +435,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 	return 0;
 }
 
-static inline struct ceph_vino ceph_vino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino;
-}
-
-/* for printf-style formatting */
-#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
-
-static inline u64 ceph_ino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.ino;
-}
-static inline u64 ceph_snap(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.snap;
-}
-
-static inline int ceph_ino_compare(struct inode *inode, void *data)
-{
-	struct ceph_vino *pvino = (struct ceph_vino *)data;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	return ci->i_vino.ino == pvino->ino &&
-		ci->i_vino.snap == pvino->snap;
-}
-
-static inline struct inode *ceph_find_inode(struct super_block *sb,
-					    struct ceph_vino vino)
-{
-	ino_t t = ceph_vino_to_ino(vino);
-	return ilookup5(sb, t, ceph_ino_compare, &vino);
-}
-
-
 /*
  * caps helpers
  */
@@ -576,18 +499,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_cap_reservation *ctx, int need);
 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			       struct ceph_cap_reservation *ctx);
-extern void ceph_reservation_status(struct ceph_client *client,
+extern void ceph_reservation_status(struct ceph_fs_client *client,
 				    int *total, int *avail, int *used,
 				    int *reserved, int *min);
 
-static inline struct ceph_client *ceph_inode_to_client(struct inode *inode)
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
 {
-	return (struct ceph_client *)inode->i_sb->s_fs_info;
+	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
 }
 
-static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb)
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
 {
-	return (struct ceph_client *)sb->s_fs_info;
+	return (struct ceph_fs_client *)sb->s_fs_info;
 }
 
 
@@ -616,51 +539,6 @@ struct ceph_file_info {
 
 
-/*
- * snapshots
- */
-
-/*
- * A "snap context" is the set of existing snapshots when we
- * write data.  It is used by the OSD to guide its COW behavior.
- *
- * The ceph_snap_context is refcounted, and attached to each dirty
- * page, indicating which context the dirty data belonged when it was
- * dirtied.
- */
-struct ceph_snap_context {
-	atomic_t nref;
-	u64 seq;
-	int num_snaps;
-	u64 snaps[];
-};
-
-static inline struct ceph_snap_context *
-ceph_get_snap_context(struct ceph_snap_context *sc)
-{
-	/*
-	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)+1);
-	*/
-	if (sc)
-		atomic_inc(&sc->nref);
-	return sc;
-}
-
-static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
-{
-	if (!sc)
-		return;
-	/*
-	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
-	       atomic_read(&sc->nref)-1);
-	*/
-	if (atomic_dec_and_test(&sc->nref)) {
-		/*printk(" deleting snap_context %p\n", sc);*/
-		kfree(sc);
-	}
-}
-
 /*
  * A "snap realm" describes a subset of the file hierarchy sharing
  * the same set of snapshots that apply to it.  The realms themselves
@@ -699,16 +577,33 @@ struct ceph_snap_realm {
 	spinlock_t inodes_with_caps_lock;
 };
 
-
-
-/*
- * calculate the number of pages a given length and offset map onto,
- * if we align the data.
- */
-static inline int calc_pages_for(u64 off, u64 len)
+static inline int default_congestion_kb(void)
 {
-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-		(off >> PAGE_CACHE_SHIFT);
+	int congestion_kb;
+
+	/*
+	 * Copied from NFS
+	 *
+	 * congestion size, scale with available memory.
+	 *
+	 *  64MB:    8192k
+	 * 128MB:   11585k
+	 * 256MB:   16384k
+	 * 512MB:   23170k
+	 *   1GB:   32768k
+	 *   2GB:   46340k
+	 *   4GB:   65536k
+	 *   8GB:   92681k
+	 *  16GB:  131072k
+	 *
+	 * This allows larger machines to have larger/more transfers.
+	 * Limit the default to 256M
+	 */
+	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+	if (congestion_kb > 256*1024)
+		congestion_kb = 256*1024;
+
+	return congestion_kb;
 }
 
 
@@ -741,16 +636,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 			   ci_item)->writing;
 }
 
-
-/* super.c */
-extern struct kmem_cache *ceph_inode_cachep;
-extern struct kmem_cache *ceph_cap_cachep;
-extern struct kmem_cache *ceph_dentry_cachep;
-extern struct kmem_cache *ceph_file_cachep;
-
-extern const char *ceph_msg_type_name(int type);
-extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
-
 /* inode.c */
 extern const struct inode_operations ceph_file_iops;
 
@@ -857,12 +742,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 extern const struct address_space_operations ceph_aops;
+extern int ceph_copy_to_page_vector(struct page **pages,
+				    const char *data,
+				    loff_t off, size_t len);
+extern int ceph_copy_from_page_vector(struct page **pages,
+				    char *data,
+				    loff_t off, size_t len);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 				       struct nameidata *nd, int mode,
 				       int locked_dir);
 extern int ceph_release(struct inode *inode, struct file *filp);
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
@@ -892,12 +783,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 /* export.c */
 extern const struct export_operations ceph_export_ops;
 
-/* debugfs.c */
-extern int ceph_debugfs_init(void);
-extern void ceph_debugfs_cleanup(void);
-extern int ceph_debugfs_client_init(struct ceph_client *client);
-extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
-
 /* locks.c */
 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
@@ -914,4 +799,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
 	return NULL;
 }
 
+/* debugfs.c */
+extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
+extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
+
 #endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/types.h b/fs/ceph/types.h
deleted file mode 100644
index 28b35a005ec2..000000000000
--- a/fs/ceph/types.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _FS_CEPH_TYPES_H
-#define _FS_CEPH_TYPES_H
-
-/* needed before including ceph_fs.h */
-#include <linux/in.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/string.h>
-
-#include "ceph_fs.h"
-#include "ceph_frag.h"
-#include "ceph_hash.h"
-
-/*
- * Identify inodes by both their ino AND snapshot id (a u64).
- */
-struct ceph_vino {
-	u64 ino;
-	u64 snap;
-};
-
-
-/* context for the caps reservation mechanism */
-struct ceph_cap_reservation {
-	int count;
-};
-
-
-#endif
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 9578af610b73..70e919960acb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1,6 +1,9 @@
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
+
 #include "super.h"
-#include "decode.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
 
 #include <linux/xattr.h>
 #include <linux/slab.h>
@@ -620,12 +623,12 @@ out:
 static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 			      const char *value, size_t size, int flags)
 {
-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 	struct inode *inode = dentry->d_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	int err;
 	int i, nr_pages;
 	struct page **pages = NULL;
@@ -777,8 +780,8 @@ out:
 
 static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 {
-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
-	struct ceph_mds_client *mdsc = &client->mdsc;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = dentry->d_inode;
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	struct ceph_mds_request *req;
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
new file mode 100644
index 000000000000..7fff521d7eb5
--- /dev/null
+++ b/include/linux/ceph/auth.h
@@ -0,0 +1,92 @@
+#ifndef _FS_CEPH_AUTH_H
+#define _FS_CEPH_AUTH_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * Abstract interface for communicating with the authenticate module.
+ * There is some handshake that takes place between us and the monitor
+ * to acquire the necessary keys.  These are used to generate an
+ * 'authorizer' that we use when connecting to a service (mds, osd).
+ */
+
+struct ceph_auth_client;
+struct ceph_authorizer;
+
+struct ceph_auth_client_ops {
+	const char *name;
+
+	/*
+	 * true if we are authenticated and can connect to
+	 * services.
+	 */
+	int (*is_authenticated)(struct ceph_auth_client *ac);
+
+	/*
+	 * true if we should (re)authenticate, e.g., when our tickets
+	 * are getting old and crusty.
+	 */
+	int (*should_authenticate)(struct ceph_auth_client *ac);
+
+	/*
+	 * build requests and process replies during monitor
+	 * handshake.  if handle_reply returns -EAGAIN, we build
+	 * another request.
+	 */
+	int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
+	int (*handle_reply)(struct ceph_auth_client *ac, int result,
+			    void *buf, void *end);
+
+	/*
+	 * Create authorizer for connecting to a service, and verify
+	 * the response to authenticate the service.
+	 */
+	int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
+				 struct ceph_authorizer **a,
+				 void **buf, size_t *len,
+				 void **reply_buf, size_t *reply_len);
+	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a, size_t len);
+	void (*destroy_authorizer)(struct ceph_auth_client *ac,
+				   struct ceph_authorizer *a);
+	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+				      int peer_type);
+
+	/* reset when we (re)connect to a monitor */
+	void (*reset)(struct ceph_auth_client *ac);
+
+	void (*destroy)(struct ceph_auth_client *ac);
+};
+
+struct ceph_auth_client {
+	u32 protocol;           /* CEPH_AUTH_* */
+	void *private;          /* for use by protocol implementation */
+	const struct ceph_auth_client_ops *ops;  /* null iff protocol==0 */
+
+	bool negotiating;       /* true if negotiating protocol */
+	const char *name;       /* entity name */
+	u64 global_id;          /* our unique id in system */
+	const char *secret;     /* our secret key */
+	unsigned want_keys;     /* which services we want */
+};
+
+extern struct ceph_auth_client *ceph_auth_init(const char *name,
+					       const char *secret);
+extern void ceph_auth_destroy(struct ceph_auth_client *ac);
+
+extern void ceph_auth_reset(struct ceph_auth_client *ac);
+
+extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
+				 void *buf, size_t len);
+extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+				  void *buf, size_t len,
+				  void *reply_buf, size_t reply_len);
+extern int ceph_entity_name_encode(const char *name, void **p, void *end);
+
+extern int ceph_build_auth(struct ceph_auth_client *ac,
+		    void *msg_buf, size_t msg_len);
+
+extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
+
+#endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
new file mode 100644
index 000000000000..58d19014068f
--- /dev/null
+++ b/include/linux/ceph/buffer.h
@@ -0,0 +1,39 @@
+#ifndef __FS_CEPH_BUFFER_H
+#define __FS_CEPH_BUFFER_H
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+
+/*
+ * a simple reference counted buffer.
+ *
+ * use kmalloc for small sizes (<= one page), vmalloc for larger
+ * sizes.
+ */
+struct ceph_buffer {
+	struct kref kref;
+	struct kvec vec;
+	size_t alloc_len;
+	bool is_vmalloc;
+};
+
+extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
+extern void ceph_buffer_release(struct kref *kref);
+
+static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
+{
+	kref_get(&b->kref);
+	return b;
+}
+
+static inline void ceph_buffer_put(struct ceph_buffer *b)
+{
+	kref_put(&b->kref, ceph_buffer_release);
+}
+
+extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
+
+#endif
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
new file mode 100644
index 000000000000..aa2e19182d99
--- /dev/null
+++ b/include/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
+#ifndef _FS_CEPH_DEBUG_H
+#define _FS_CEPH_DEBUG_H
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
+
+/*
+ * wrap pr_debug to include a filename:lineno prefix on each line.
+ * this incurs some overhead (kernel size and execution time) due to
+ * the extra function call at each call site.
+ */
+
+# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+extern const char *ceph_file_part(const char *s, int len);
+#  define dout(fmt, ...)						\
+	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
+		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
+		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
+		 __LINE__, ##__VA_ARGS__)
+# else
+/* faux printk call just to see any compiler warnings. */
+#  define dout(fmt, ...)	do {				\
+		if (0)						\
+			printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+	} while (0)
+# endif
+
+#else
+
+/*
+ * or, just wrap pr_debug
+ */
+# define dout(fmt, ...)	pr_debug(" " fmt, ##__VA_ARGS__)
+
+#endif
+
+#endif
diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h
new file mode 100644
index 000000000000..5babb8e95352
--- /dev/null
+++ b/include/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
+#ifndef FS_CEPH_FRAG_H
+#define FS_CEPH_FRAG_H
+
+/*
+ * "Frags" are a way to describe a subset of a 32-bit number space,
+ * using a mask and a value to match against that mask.  Any given frag
+ * (subset of the number space) can be partitioned into 2^n sub-frags.
+ *
+ * Frags are encoded into a 32-bit word:
+ *   8 upper bits = "bits"
+ *  24 lower bits = "value"
+ * (We could go to 5+27 bits, but who cares.)
+ *
+ * We use the _most_ significant bits of the 24 bit value.  This makes
+ * values logically sort.
+ *
+ * Unfortunately, because the "bits" field is still in the high bits, we
+ * can't sort encoded frags numerically.  However, it does allow you
+ * to feed encoded frags as values into frag_contains_value.
+ */
+static inline __u32 ceph_frag_make(__u32 b, __u32 v)
+{
+	return (b << 24) |
+		(v & (0xffffffu << (24-b)) & 0xffffffu);
+}
+static inline __u32 ceph_frag_bits(__u32 f)
+{
+	return f >> 24;
+}
+static inline __u32 ceph_frag_value(__u32 f)
+{
+	return f & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask(__u32 f)
+{
+	return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask_shift(__u32 f)
+{
+	return 24 - ceph_frag_bits(f);
+}
+
+static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+{
+	return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
+{
+	/* is sub as specific as us, and contained by us? */
+	return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
+	       (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+
+static inline __u32 ceph_frag_parent(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f) - 1,
+			 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
+}
+static inline int ceph_frag_is_left_child(__u32 f)
+{
+	return ceph_frag_bits(f) > 0 &&
+		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
+}
+static inline int ceph_frag_is_right_child(__u32 f)
+{
+	return ceph_frag_bits(f) > 0 &&
+		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
+}
+static inline __u32 ceph_frag_sibling(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f),
+		      ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
+}
+static inline __u32 ceph_frag_left_child(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
+}
+static inline __u32 ceph_frag_right_child(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f)+1,
+	      ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
+}
+static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
+{
+	int newbits = ceph_frag_bits(f) + by;
+	return ceph_frag_make(newbits,
+			 ceph_frag_value(f) | (i << (24 - newbits)));
+}
+static inline int ceph_frag_is_leftmost(__u32 f)
+{
+	return ceph_frag_value(f) == 0;
+}
+static inline int ceph_frag_is_rightmost(__u32 f)
+{
+	return ceph_frag_value(f) == ceph_frag_mask(f);
+}
+static inline __u32 ceph_frag_next(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f),
+			 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
+}
+
+/*
+ * comparator to sort frags logically, as when traversing the
+ * number space in ascending order...
+ */
+int ceph_frag_compare(__u32 a, __u32 b);
+
+#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
new file mode 100644
index 000000000000..d5619ac86711
--- /dev/null
+++ b/include/linux/ceph/ceph_fs.h
@@ -0,0 +1,728 @@
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_H
+#define CEPH_FS_H
+
+#include "msgr.h"
+#include "rados.h"
+
+/*
+ * subprotocol versions.  when specific messages types or high-level
+ * protocols change, bump the affected components.  we keep rev
+ * internal cluster protocols separately from the public,
+ * client-facing protocol.
+ */
+#define CEPH_OSD_PROTOCOL     8 /* cluster internal */
+#define CEPH_MDS_PROTOCOL    12 /* cluster internal */
+#define CEPH_MON_PROTOCOL     5 /* cluster internal */
+#define CEPH_OSDC_PROTOCOL   24 /* server/client */
+#define CEPH_MDSC_PROTOCOL   32 /* server/client */
+#define CEPH_MONC_PROTOCOL   15 /* server/client */
+
+
+#define CEPH_INO_ROOT  1
+#define CEPH_INO_CEPH  2        /* hidden .ceph dir */
+
+/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
+#define CEPH_MAX_MON   31
+
+
+/*
+ * feature bits
+ */
+#define CEPH_FEATURE_UID            (1<<0)
+#define CEPH_FEATURE_NOSRCADDR      (1<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK  (1<<2)
+#define CEPH_FEATURE_FLOCK          (1<<3)
+
+
+/*
+ * ceph_file_layout - describe data layout for a file/inode
+ */
+struct ceph_file_layout {
+	/* file -> object mapping */
+	__le32 fl_stripe_unit;     /* stripe unit, in bytes.  must be multiple
+				      of page size. */
+	__le32 fl_stripe_count;    /* over this many objects */
+	__le32 fl_object_size;     /* until objects are this big, then move to
+				      new objects */
+	__le32 fl_cas_hash;        /* 0 = none; 1 = sha256 */
+
+	/* pg -> disk layout */
+	__le32 fl_object_stripe_unit;  /* for per-object parity, if any */
+
+	/* object -> pg layout */
+	__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
+} __attribute__ ((packed));
+
+#define CEPH_MIN_STRIPE_UNIT 65536
+
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
+
+
+/* crypto algorithms */
+#define CEPH_CRYPTO_NONE 0x0
+#define CEPH_CRYPTO_AES  0x1
+
+#define CEPH_AES_IV "cephsageyudagreg"
+
+/* security/authentication protocols */
+#define CEPH_AUTH_UNKNOWN	0x0
+#define CEPH_AUTH_NONE	 	0x1
+#define CEPH_AUTH_CEPHX	 	0x2
+
+#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+
+
+/*********************************************
+ * message layer
+ */
+
+/*
+ * message types
+ */
+
+/* misc */
+#define CEPH_MSG_SHUTDOWN               1
+#define CEPH_MSG_PING                   2
+
+/* client <-> monitor */
+#define CEPH_MSG_MON_MAP                4
+#define CEPH_MSG_MON_GET_MAP            5
+#define CEPH_MSG_STATFS                 13
+#define CEPH_MSG_STATFS_REPLY           14
+#define CEPH_MSG_MON_SUBSCRIBE          15
+#define CEPH_MSG_MON_SUBSCRIBE_ACK      16
+#define CEPH_MSG_AUTH			17
+#define CEPH_MSG_AUTH_REPLY		18
+
+/* client <-> mds */
+#define CEPH_MSG_MDS_MAP                21
+
+#define CEPH_MSG_CLIENT_SESSION         22
+#define CEPH_MSG_CLIENT_RECONNECT       23
+
+#define CEPH_MSG_CLIENT_REQUEST         24
+#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
+#define CEPH_MSG_CLIENT_REPLY           26
+#define CEPH_MSG_CLIENT_CAPS            0x310
+#define CEPH_MSG_CLIENT_LEASE           0x311
+#define CEPH_MSG_CLIENT_SNAP            0x312
+#define CEPH_MSG_CLIENT_CAPRELEASE      0x313
+
+/* pool ops */
+#define CEPH_MSG_POOLOP_REPLY           48
+#define CEPH_MSG_POOLOP                 49
+
+
+/* osd */
+#define CEPH_MSG_OSD_MAP          41
+#define CEPH_MSG_OSD_OP           42
+#define CEPH_MSG_OSD_OPREPLY      43
+
+/* pool operations */
+enum {
+  POOL_OP_CREATE			= 0x01,
+  POOL_OP_DELETE			= 0x02,
+  POOL_OP_AUID_CHANGE			= 0x03,
+  POOL_OP_CREATE_SNAP			= 0x11,
+  POOL_OP_DELETE_SNAP			= 0x12,
+  POOL_OP_CREATE_UNMANAGED_SNAP		= 0x21,
+  POOL_OP_DELETE_UNMANAGED_SNAP		= 0x22,
+};
+
+struct ceph_mon_request_header {
+	__le64 have_version;
+	__le16 session_mon;
+	__le64 session_mon_tid;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_statfs {
+	__le64 kb, kb_used, kb_avail;
+	__le64 num_objects;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs_reply {
+	struct ceph_fsid fsid;
+	__le64 version;
+	struct ceph_statfs st;
+} __attribute__ ((packed));
+
+const char *ceph_pool_op_name(int op);
+
+struct ceph_mon_poolop {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 pool;
+	__le32 op;
+	__le64 auid;
+	__le64 snapid;
+	__le32 name_len;
+} __attribute__ ((packed));
+
+struct ceph_mon_poolop_reply {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 reply_code;
+	__le32 epoch;
+	char has_data;
+	char data[0];
+} __attribute__ ((packed));
+
+struct ceph_mon_unmanaged_snap {
+	__le64 snapid;
+} __attribute__ ((packed));
+
+struct ceph_osd_getmap {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 start;
+} __attribute__ ((packed));
+
+struct ceph_mds_getmap {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_client_mount {
+	struct ceph_mon_request_header monhdr;
+} __attribute__ ((packed));
+
+struct ceph_mon_subscribe_item {
+	__le64 have_version;	__le64 have;
+	__u8 onetime;
+} __attribute__ ((packed));
+
+struct ceph_mon_subscribe_ack {
+	__le32 duration;         /* seconds */
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+/*
+ * mds states
+ *   > 0 -> in
+ *  <= 0 -> out
+ */
+#define CEPH_MDS_STATE_DNE          0  /* down, does not exist. */
+#define CEPH_MDS_STATE_STOPPED     -1  /* down, once existed, but no subtrees.
+					  empty log. */
+#define CEPH_MDS_STATE_BOOT        -4  /* up, boot announcement. */
+#define CEPH_MDS_STATE_STANDBY     -5  /* up, idle.  waiting for assignment. */
+#define CEPH_MDS_STATE_CREATING    -6  /* up, creating MDS instance. */
+#define CEPH_MDS_STATE_STARTING    -7  /* up, starting previously stopped mds */
+#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
+
+#define CEPH_MDS_STATE_REPLAY       8  /* up, replaying journal. */
+#define CEPH_MDS_STATE_RESOLVE      9  /* up, disambiguating distributed
+					  operations (import, rename, etc.) */
+#define CEPH_MDS_STATE_RECONNECT    10 /* up, reconnect to clients */
+#define CEPH_MDS_STATE_REJOIN       11 /* up, rejoining distributed cache */
+#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
+#define CEPH_MDS_STATE_ACTIVE       13 /* up, active */
+#define CEPH_MDS_STATE_STOPPING     14 /* up, but exporting metadata */
+
+extern const char *ceph_mds_state_name(int s);
+
+
+/*
+ * metadata lock types.
+ *  - these are bitmasks.. we can compose them
+ *  - they also define the lock ordering by the MDS
+ *  - a few of these are internal to the mds
+ */
+#define CEPH_LOCK_DVERSION    1
+#define CEPH_LOCK_DN          2
+#define CEPH_LOCK_ISNAP       16
+#define CEPH_LOCK_IVERSION    32    /* mds internal */
+#define CEPH_LOCK_IFILE       64
+#define CEPH_LOCK_IAUTH       128
+#define CEPH_LOCK_ILINK       256
+#define CEPH_LOCK_IDFT        512   /* dir frag tree */
+#define CEPH_LOCK_INEST       1024  /* mds internal */
+#define CEPH_LOCK_IXATTR      2048
+#define CEPH_LOCK_IFLOCK      4096  /* advisory file locks */
+#define CEPH_LOCK_INO         8192  /* immutable inode bits; not a lock */
+
+/* client_session ops */
+enum {
+	CEPH_SESSION_REQUEST_OPEN,
+	CEPH_SESSION_OPEN,
+	CEPH_SESSION_REQUEST_CLOSE,
+	CEPH_SESSION_CLOSE,
+	CEPH_SESSION_REQUEST_RENEWCAPS,
+	CEPH_SESSION_RENEWCAPS,
+	CEPH_SESSION_STALE,
+	CEPH_SESSION_RECALL_STATE,
+};
+
+extern const char *ceph_session_op_name(int op);
+
+struct ceph_mds_session_head {
+	__le32 op;
+	__le64 seq;
+	struct ceph_timespec stamp;
+	__le32 max_caps, max_leases;
+} __attribute__ ((packed));
+
+/* client_request */
+/*
+ * metadata ops.
+ *  & 0x001000 -> write op
+ *  & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
+ &  & 0x100000 -> use weird ino/path trace
+ */
+#define CEPH_MDS_OP_WRITE        0x001000
+enum {
+	CEPH_MDS_OP_LOOKUP     = 0x00100,
+	CEPH_MDS_OP_GETATTR    = 0x00101,
+	CEPH_MDS_OP_LOOKUPHASH = 0x00102,
+	CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
+
+	CEPH_MDS_OP_SETXATTR   = 0x01105,
+	CEPH_MDS_OP_RMXATTR    = 0x01106,
+	CEPH_MDS_OP_SETLAYOUT  = 0x01107,
+	CEPH_MDS_OP_SETATTR    = 0x01108,
+	CEPH_MDS_OP_SETFILELOCK= 0x01109,
+	CEPH_MDS_OP_GETFILELOCK= 0x00110,
+
+	CEPH_MDS_OP_MKNOD      = 0x01201,
+	CEPH_MDS_OP_LINK       = 0x01202,
+	CEPH_MDS_OP_UNLINK     = 0x01203,
+	CEPH_MDS_OP_RENAME     = 0x01204,
+	CEPH_MDS_OP_MKDIR      = 0x01220,
+	CEPH_MDS_OP_RMDIR      = 0x01221,
+	CEPH_MDS_OP_SYMLINK    = 0x01222,
+
+	CEPH_MDS_OP_CREATE     = 0x01301,
+	CEPH_MDS_OP_OPEN       = 0x00302,
+	CEPH_MDS_OP_READDIR    = 0x00305,
+
+	CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
+	CEPH_MDS_OP_MKSNAP     = 0x01400,
+	CEPH_MDS_OP_RMSNAP     = 0x01401,
+	CEPH_MDS_OP_LSSNAP     = 0x00402,
+};
+
+extern const char *ceph_mds_op_name(int op);
+
+
+#define CEPH_SETATTR_MODE   1
+#define CEPH_SETATTR_UID    2
+#define CEPH_SETATTR_GID    4
+#define CEPH_SETATTR_MTIME  8
+#define CEPH_SETATTR_ATIME 16
+#define CEPH_SETATTR_SIZE  32
+#define CEPH_SETATTR_CTIME 64
+
+union ceph_mds_request_args {
+	struct {
+		__le32 mask;                 /* CEPH_CAP_* */
+	} __attribute__ ((packed)) getattr;
+	struct {
+		__le32 mode;
+		__le32 uid;
+		__le32 gid;
+		struct ceph_timespec mtime;
+		struct ceph_timespec atime;
+		__le64 size, old_size;       /* old_size needed by truncate */
+		__le32 mask;                 /* CEPH_SETATTR_* */
+	} __attribute__ ((packed)) setattr;
+	struct {
+		__le32 frag;                 /* which dir fragment */
+		__le32 max_entries;          /* how many dentries to grab */
+		__le32 max_bytes;
+	} __attribute__ ((packed)) readdir;
+	struct {
+		__le32 mode;
+		__le32 rdev;
+	} __attribute__ ((packed)) mknod;
+	struct {
+		__le32 mode;
+	} __attribute__ ((packed)) mkdir;
+	struct {
+		__le32 flags;
+		__le32 mode;
+		__le32 stripe_unit;          /* layout for newly created file */
+		__le32 stripe_count;         /* ... */
+		__le32 object_size;
+		__le32 file_replication;
+		__le32 preferred;
+	} __attribute__ ((packed)) open;
+	struct {
+		__le32 flags;
+	} __attribute__ ((packed)) setxattr;
+	struct {
+		struct ceph_file_layout layout;
+	} __attribute__ ((packed)) setlayout;
+	struct {
+		__u8 rule; /* currently fcntl or flock */
+		__u8 type; /* shared, exclusive, remove*/
+		__le64 pid; /* process id requesting the lock */
+		__le64 pid_namespace;
+		__le64 start; /* initial location to lock */
+		__le64 length; /* num bytes to lock from start */
+		__u8 wait; /* will caller wait for lock to become available? */
+	} __attribute__ ((packed)) filelock_change;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_FLAG_REPLAY        1  /* this is a replayed op */
+#define CEPH_MDS_FLAG_WANT_DENTRY   2  /* want dentry in reply */
+
+struct ceph_mds_request_head {
+	__le64 oldest_client_tid;
+	__le32 mdsmap_epoch;           /* on client */
+	__le32 flags;                  /* CEPH_MDS_FLAG_* */
+	__u8 num_retry, num_fwd;       /* count retry, fwd attempts */
+	__le16 num_releases;           /* # include cap/lease release records */
+	__le32 op;                     /* mds op code */
+	__le32 caller_uid, caller_gid;
+	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
+					  etc. (if replaying) */
+	union ceph_mds_request_args args;
+} __attribute__ ((packed));
+
+/* cap/lease release record */
+struct ceph_mds_request_release {
+	__le64 ino, cap_id;            /* ino and unique cap id */
+	__le32 caps, wanted;           /* new issued, wanted */
+	__le32 seq, issue_seq, mseq;
+	__le32 dname_seq;              /* if releasing a dentry lease, a */
+	__le32 dname_len;              /* string follows. */
+} __attribute__ ((packed));
+
+/* client reply */
+struct ceph_mds_reply_head {
+	__le32 op;
+	__le32 result;
+	__le32 mdsmap_epoch;
+	__u8 safe;                     /* true if committed to disk */
+	__u8 is_dentry, is_target;     /* true if dentry, target inode records
+					  are included with reply */
+} __attribute__ ((packed));
+
+/* one for each node split */
+struct ceph_frag_tree_split {
+	__le32 frag;                   /* this frag splits... */
+	__le32 by;                     /* ...by this many bits */
+} __attribute__ ((packed));
+
+struct ceph_frag_tree_head {
+	__le32 nsplits;                /* num ceph_frag_tree_split records */
+	struct ceph_frag_tree_split splits[];
+} __attribute__ ((packed));
+
+/* capability issue, for bundling with mds reply */
+struct ceph_mds_reply_cap {
+	__le32 caps, wanted;           /* caps issued, wanted */
+	__le64 cap_id;
+	__le32 seq, mseq;
+	__le64 realm;                  /* snap realm */
+	__u8 flags;                    /* CEPH_CAP_FLAG_* */
+} __attribute__ ((packed));
+
+#define CEPH_CAP_FLAG_AUTH  1          /* cap is issued by auth mds */
+
+/* inode record, for bundling with mds reply */
+struct ceph_mds_reply_inode {
+	__le64 ino;
+	__le64 snapid;
+	__le32 rdev;
+	__le64 version;                /* inode version */
+	__le64 xattr_version;          /* version for xattr blob */
+	struct ceph_mds_reply_cap cap; /* caps issued for this inode */
+	struct ceph_file_layout layout;
+	struct ceph_timespec ctime, mtime, atime;
+	__le32 time_warp_seq;
+	__le64 size, max_size, truncate_size;
+	__le32 truncate_seq;
+	__le32 mode, uid, gid;
+	__le32 nlink;
+	__le64 files, subdirs, rbytes, rfiles, rsubdirs;  /* dir stats */
+	struct ceph_timespec rctime;
+	struct ceph_frag_tree_head fragtree;  /* (must be at end of struct) */
+} __attribute__ ((packed));
+/* followed by frag array, then symlink string, then xattr blob */
+
+/* reply_lease follows dname, and reply_inode */
+struct ceph_mds_reply_lease {
+	__le16 mask;            /* lease type(s) */
+	__le32 duration_ms;     /* lease duration */
+	__le32 seq;
+} __attribute__ ((packed));
+
+struct ceph_mds_reply_dirfrag {
+	__le32 frag;            /* fragment */
+	__le32 auth;            /* auth mds, if this is a delegation point */
+	__le32 ndist;           /* number of mds' this is replicated on */
+	__le32 dist[];
+} __attribute__ ((packed));
+
+#define CEPH_LOCK_FCNTL    1
+#define CEPH_LOCK_FLOCK    2
+
+#define CEPH_LOCK_SHARED   1
+#define CEPH_LOCK_EXCL     2
+#define CEPH_LOCK_UNLOCK   4
+
+struct ceph_filelock {
+	__le64 start;/* file offset to start lock at */
+	__le64 length; /* num bytes to lock; 0 for all following start */
+	__le64 client; /* which client holds the lock */
+	__le64 pid; /* process id holding the lock on the client */
+	__le64 pid_namespace;
+	__u8 type; /* shared lock, exclusive lock, or unlock */
+} __attribute__ ((packed));
+
+
+/* file access modes */
+#define CEPH_FILE_MODE_PIN        0
+#define CEPH_FILE_MODE_RD         1
+#define CEPH_FILE_MODE_WR         2
+#define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
+#define CEPH_FILE_MODE_LAZY       4  /* lazy io */
+#define CEPH_FILE_MODE_NUM        8  /* bc these are bit fields.. mostly */
+
+int ceph_flags_to_mode(int flags);
+
+
+/* capability bits */
+#define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
+
+/* generic cap bits */
+#define CEPH_CAP_GSHARED     1  /* client can reads */
+#define CEPH_CAP_GEXCL       2  /* client can read and update */
+#define CEPH_CAP_GCACHE      4  /* (file) client can cache reads */
+#define CEPH_CAP_GRD         8  /* (file) client can read */
+#define CEPH_CAP_GWR        16  /* (file) client can write */
+#define CEPH_CAP_GBUFFER    32  /* (file) client can buffer writes */
+#define CEPH_CAP_GWREXTEND  64  /* (file) client can extend EOF */
+#define CEPH_CAP_GLAZYIO   128  /* (file) client can perform lazy io */
+
+/* per-lock shift */
+#define CEPH_CAP_SAUTH      2
+#define CEPH_CAP_SLINK      4
+#define CEPH_CAP_SXATTR     6
+#define CEPH_CAP_SFILE      8
+#define CEPH_CAP_SFLOCK    20 
+
+#define CEPH_CAP_BITS       22
+
+/* composed values */
+#define CEPH_CAP_AUTH_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SAUTH)
+#define CEPH_CAP_AUTH_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SAUTH)
+#define CEPH_CAP_LINK_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SLINK)
+#define CEPH_CAP_LINK_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SLINK)
+#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED  << CEPH_CAP_SXATTR)
+#define CEPH_CAP_XATTR_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SXATTR)
+#define CEPH_CAP_FILE(x)    (x << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_SHARED   (CEPH_CAP_GSHARED   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_CACHE    (CEPH_CAP_GCACHE    << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_RD       (CEPH_CAP_GRD       << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WR       (CEPH_CAP_GWR       << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_BUFFER   (CEPH_CAP_GBUFFER   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_LAZYIO   (CEPH_CAP_GLAZYIO   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FLOCK_SHARED  (CEPH_CAP_GSHARED   << CEPH_CAP_SFLOCK)
+#define CEPH_CAP_FLOCK_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SFLOCK)
+
+
+/* cap masks (for getattr) */
+#define CEPH_STAT_CAP_INODE    CEPH_CAP_PIN
+#define CEPH_STAT_CAP_TYPE     CEPH_CAP_PIN  /* mode >> 12 */
+#define CEPH_STAT_CAP_SYMLINK  CEPH_CAP_PIN
+#define CEPH_STAT_CAP_UID      CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_GID      CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_MODE     CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_NLINK    CEPH_CAP_LINK_SHARED
+#define CEPH_STAT_CAP_LAYOUT   CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_MTIME    CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_SIZE     CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_ATIME    CEPH_CAP_FILE_SHARED  /* fixme */
+#define CEPH_STAT_CAP_XATTR    CEPH_CAP_XATTR_SHARED
+#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN |			\
+				 CEPH_CAP_AUTH_SHARED |	\
+				 CEPH_CAP_LINK_SHARED |	\
+				 CEPH_CAP_FILE_SHARED |	\
+				 CEPH_CAP_XATTR_SHARED)
+
+#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
+			      CEPH_CAP_LINK_SHARED |			\
+			      CEPH_CAP_XATTR_SHARED |			\
+			      CEPH_CAP_FILE_SHARED)
+#define CEPH_CAP_ANY_RD   (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD |	\
+			   CEPH_CAP_FILE_CACHE)
+
+#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL |		\
+			   CEPH_CAP_LINK_EXCL |		\
+			   CEPH_CAP_XATTR_EXCL |	\
+			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
+			      CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
+#define CEPH_CAP_ANY      (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
+			   CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
+			   CEPH_CAP_PIN)
+
+#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
+			CEPH_LOCK_IXATTR)
+
+int ceph_caps_for_mode(int mode);
+
+enum {
+	CEPH_CAP_OP_GRANT,         /* mds->client grant */
+	CEPH_CAP_OP_REVOKE,        /* mds->client revoke */
+	CEPH_CAP_OP_TRUNC,         /* mds->client trunc notify */
+	CEPH_CAP_OP_EXPORT,        /* mds has exported the cap */
+	CEPH_CAP_OP_IMPORT,        /* mds has imported the cap */
+	CEPH_CAP_OP_UPDATE,        /* client->mds update */
+	CEPH_CAP_OP_DROP,          /* client->mds drop cap bits */
+	CEPH_CAP_OP_FLUSH,         /* client->mds cap writeback */
+	CEPH_CAP_OP_FLUSH_ACK,     /* mds->client flushed */
+	CEPH_CAP_OP_FLUSHSNAP,     /* client->mds flush snapped metadata */
+	CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
+	CEPH_CAP_OP_RELEASE,       /* client->mds release (clean) cap */
+	CEPH_CAP_OP_RENEW,         /* client->mds renewal request */
+};
+
+extern const char *ceph_cap_op_name(int op);
+
+/*
+ * caps message, used for capability callbacks, acks, requests, etc.
+ */
+struct ceph_mds_caps {
+	__le32 op;                  /* CEPH_CAP_OP_* */
+	__le64 ino, realm;
+	__le64 cap_id;
+	__le32 seq, issue_seq;
+	__le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
+	__le32 migrate_seq;
+	__le64 snap_follows;
+	__le32 snap_trace_len;
+
+	/* authlock */
+	__le32 uid, gid, mode;
+
+	/* linklock */
+	__le32 nlink;
+
+	/* xattrlock */
+	__le32 xattr_len;
+	__le64 xattr_version;
+
+	/* filelock */
+	__le64 size, max_size, truncate_size;
+	__le32 truncate_seq;
+	struct ceph_timespec mtime, atime, ctime;
+	struct ceph_file_layout layout;
+	__le32 time_warp_seq;
+} __attribute__ ((packed));
+
+/* cap release msg head */
+struct ceph_mds_cap_release {
+	__le32 num;                /* number of cap_items that follow */
+} __attribute__ ((packed));
+
+struct ceph_mds_cap_item {
+	__le64 ino;
+	__le64 cap_id;
+	__le32 migrate_seq, seq;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_LEASE_REVOKE           1  /*    mds  -> client */
+#define CEPH_MDS_LEASE_RELEASE          2  /* client  -> mds    */
+#define CEPH_MDS_LEASE_RENEW            3  /* client <-> mds    */
+#define CEPH_MDS_LEASE_REVOKE_ACK       4  /* client  -> mds    */
+
+extern const char *ceph_lease_op_name(int o);
+
+/* lease msg header */
+struct ceph_mds_lease {
+	__u8 action;            /* CEPH_MDS_LEASE_* */
+	__le16 mask;            /* which lease */
+	__le64 ino;
+	__le64 first, last;     /* snap range */
+	__le32 seq;
+	__le32 duration_ms;     /* duration of renewal */
+} __attribute__ ((packed));
+/* followed by a __le32+string for dname */
+
+/* client reconnect */
+struct ceph_mds_cap_reconnect {
+	__le64 cap_id;
+	__le32 wanted;
+	__le32 issued;
+	__le64 snaprealm;
+	__le64 pathbase;        /* base ino for our path to this ino */
+	__le32 flock_len;       /* size of flock state blob, if any */
+} __attribute__ ((packed));
+/* followed by flock blob */
+
+struct ceph_mds_cap_reconnect_v1 {
+	__le64 cap_id;
+	__le32 wanted;
+	__le32 issued;
+	__le64 size;
+	struct ceph_timespec mtime, atime;
+	__le64 snaprealm;
+	__le64 pathbase;        /* base ino for our path to this ino */
+} __attribute__ ((packed));
+
+struct ceph_mds_snaprealm_reconnect {
+	__le64 ino;     /* snap realm base */
+	__le64 seq;     /* snap seq for this snap realm */
+	__le64 parent;  /* parent realm */
+} __attribute__ ((packed));
+
+/*
+ * snaps
+ */
+enum {
+	CEPH_SNAP_OP_UPDATE,  /* CREATE or DESTROY */
+	CEPH_SNAP_OP_CREATE,
+	CEPH_SNAP_OP_DESTROY,
+	CEPH_SNAP_OP_SPLIT,
+};
+
+extern const char *ceph_snap_op_name(int o);
+
+/* snap msg header */
+struct ceph_mds_snap_head {
+	__le32 op;                /* CEPH_SNAP_OP_* */
+	__le64 split;             /* ino to split off, if any */
+	__le32 num_split_inos;    /* # inos belonging to new child realm */
+	__le32 num_split_realms;  /* # child realms udner new child realm */
+	__le32 trace_len;         /* size of snap trace blob */
+} __attribute__ ((packed));
+/* followed by split ino list, then split realms, then the trace blob */
+
+/*
+ * encode info about a snaprealm, as viewed by a client
+ */
+struct ceph_mds_snap_realm {
+	__le64 ino;           /* ino */
+	__le64 created;       /* snap: when created */
+	__le64 parent;        /* ino: parent realm */
+	__le64 parent_since;  /* snap: same parent since */
+	__le64 seq;           /* snap: version */
+	__le32 num_snaps;
+	__le32 num_prior_parent_snaps;
+} __attribute__ ((packed));
+/* followed by my snap list, then prior parent snap list */
+
+#endif
diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h
new file mode 100644
index 000000000000..d099c3f90236
--- /dev/null
+++ b/include/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
+#ifndef FS_CEPH_HASH_H
+#define FS_CEPH_HASH_H
+
+#define CEPH_STR_HASH_LINUX      0x1  /* linux dcache hash */
+#define CEPH_STR_HASH_RJENKINS   0x2  /* robert jenkins' */
+
+extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
+extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
+
+extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
+extern const char *ceph_str_hash_name(int type);
+
+#endif
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
new file mode 100644
index 000000000000..2a79702e092b
--- /dev/null
+++ b/include/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
+#ifndef _FS_CEPH_DEBUGFS_H
+#define _FS_CEPH_DEBUGFS_H
+
+#include "ceph_debug.h"
+#include "types.h"
+
+#define CEPH_DEFINE_SHOW_FUNC(name)					\
+static int name##_open(struct inode *inode, struct file *file)		\
+{									\
+	struct seq_file *sf;						\
+	int ret;							\
+									\
+	ret = single_open(file, name, NULL);				\
+	sf = file->private_data;					\
+	sf->private = inode->i_private;					\
+	return ret;							\
+}									\
+									\
+static const struct file_operations name##_fops = {			\
+	.open		= name##_open,					\
+	.read		= seq_read,					\
+	.llseek		= seq_lseek,					\
+	.release	= single_release,				\
+};
+
+/* debugfs.c */
+extern int ceph_debugfs_init(void);
+extern void ceph_debugfs_cleanup(void);
+extern int ceph_debugfs_client_init(struct ceph_client *client);
+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+
+#endif
+
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
new file mode 100644
index 000000000000..c5b6939fb32a
--- /dev/null
+++ b/include/linux/ceph/decode.h
@@ -0,0 +1,201 @@
+#ifndef __CEPH_DECODE_H
+#define __CEPH_DECODE_H
+
+#include <asm/unaligned.h>
+#include <linux/time.h>
+
+#include "types.h"
+
+/*
+ * in all cases,
+ *   void **p     pointer to position pointer
+ *   void *end    pointer to end of buffer (last byte + 1)
+ */
+
+static inline u64 ceph_decode_64(void **p)
+{
+	u64 v = get_unaligned_le64(*p);
+	*p += sizeof(u64);
+	return v;
+}
+static inline u32 ceph_decode_32(void **p)
+{
+	u32 v = get_unaligned_le32(*p);
+	*p += sizeof(u32);
+	return v;
+}
+static inline u16 ceph_decode_16(void **p)
+{
+	u16 v = get_unaligned_le16(*p);
+	*p += sizeof(u16);
+	return v;
+}
+static inline u8 ceph_decode_8(void **p)
+{
+	u8 v = *(u8 *)*p;
+	(*p)++;
+	return v;
+}
+static inline void ceph_decode_copy(void **p, void *pv, size_t n)
+{
+	memcpy(pv, *p, n);
+	*p += n;
+}
+
+/*
+ * bounds check input.
+ */
+#define ceph_decode_need(p, end, n, bad)		\
+	do {						\
+		if (unlikely(*(p) + (n) > (end))) 	\
+			goto bad;			\
+	} while (0)
+
+#define ceph_decode_64_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u64), bad);	\
+		v = ceph_decode_64(p);				\
+	} while (0)
+#define ceph_decode_32_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u32), bad);	\
+		v = ceph_decode_32(p);				\
+	} while (0)
+#define ceph_decode_16_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u16), bad);	\
+		v = ceph_decode_16(p);				\
+	} while (0)
+#define ceph_decode_8_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u8), bad);	\
+		v = ceph_decode_8(p);				\
+	} while (0)
+
+#define ceph_decode_copy_safe(p, end, pv, n, bad)		\
+	do {							\
+		ceph_decode_need(p, end, n, bad);		\
+		ceph_decode_copy(p, pv, n);			\
+	} while (0)
+
+/*
+ * struct ceph_timespec <-> struct timespec
+ */
+static inline void ceph_decode_timespec(struct timespec *ts,
+					const struct ceph_timespec *tv)
+{
+	ts->tv_sec = le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec(struct ceph_timespec *tv,
+					const struct timespec *ts)
+{
+	tv->tv_sec = cpu_to_le32(ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
+}
+
+/*
+ * sockaddr_storage <-> ceph_sockaddr
+ */
+static inline void ceph_encode_addr(struct ceph_entity_addr *a)
+{
+	__be16 ss_family = htons(a->in_addr.ss_family);
+	a->in_addr.ss_family = *(__u16 *)&ss_family;
+}
+static inline void ceph_decode_addr(struct ceph_entity_addr *a)
+{
+	__be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
+	a->in_addr.ss_family = ntohs(ss_family);
+	WARN_ON(a->in_addr.ss_family == 512);
+}
+
+/*
+ * encoders
+ */
+static inline void ceph_encode_64(void **p, u64 v)
+{
+	put_unaligned_le64(v, (__le64 *)*p);
+	*p += sizeof(u64);
+}
+static inline void ceph_encode_32(void **p, u32 v)
+{
+	put_unaligned_le32(v, (__le32 *)*p);
+	*p += sizeof(u32);
+}
+static inline void ceph_encode_16(void **p, u16 v)
+{
+	put_unaligned_le16(v, (__le16 *)*p);
+	*p += sizeof(u16);
+}
+static inline void ceph_encode_8(void **p, u8 v)
+{
+	*(u8 *)*p = v;
+	(*p)++;
+}
+static inline void ceph_encode_copy(void **p, const void *s, int len)
+{
+	memcpy(*p, s, len);
+	*p += len;
+}
+
+/*
+ * filepath, string encoders
+ */
+static inline void ceph_encode_filepath(void **p, void *end,
+					u64 ino, const char *path)
+{
+	u32 len = path ? strlen(path) : 0;
+	BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, ino);
+	ceph_encode_32(p, len);
+	if (len)
+		memcpy(*p, path, len);
+	*p += len;
+}
+
+static inline void ceph_encode_string(void **p, void *end,
+				      const char *s, u32 len)
+{
+	BUG_ON(*p + sizeof(len) + len > end);
+	ceph_encode_32(p, len);
+	if (len)
+		memcpy(*p, s, len);
+	*p += len;
+}
+
+#define ceph_encode_need(p, end, n, bad)		\
+	do {						\
+		if (unlikely(*(p) + (n) > (end))) 	\
+			goto bad;			\
+	} while (0)
+
+#define ceph_encode_64_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u64), bad);	\
+		ceph_encode_64(p, v);				\
+	} while (0)
+#define ceph_encode_32_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u32), bad);	\
+		ceph_encode_32(p, v);			\
+	} while (0)
+#define ceph_encode_16_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u16), bad);	\
+		ceph_encode_16(p, v);			\
+	} while (0)
+
+#define ceph_encode_copy_safe(p, end, pv, n, bad)		\
+	do {							\
+		ceph_encode_need(p, end, n, bad);		\
+		ceph_encode_copy(p, pv, n);			\
+	} while (0)
+#define ceph_encode_string_safe(p, end, s, n, bad)		\
+	do {							\
+		ceph_encode_need(p, end, n, bad);		\
+		ceph_encode_string(p, end, s, n);		\
+	} while (0)
+
+
+#endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
new file mode 100644
index 000000000000..f22b2e941686
--- /dev/null
+++ b/include/linux/ceph/libceph.h
@@ -0,0 +1,249 @@
+#ifndef _FS_CEPH_LIBCEPH_H
+#define _FS_CEPH_LIBCEPH_H
+
+#include "ceph_debug.h"
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+
+#include "types.h"
+#include "messenger.h"
+#include "msgpool.h"
+#include "mon_client.h"
+#include "osd_client.h"
+#include "ceph_fs.h"
+
+/*
+ * Supported features
+ */
+#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
+#define CEPH_FEATURE_REQUIRED_DEFAULT  CEPH_FEATURE_NOSRCADDR
+
+/*
+ * mount options
+ */
+#define CEPH_OPT_FSID             (1<<0)
+#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
+#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
+#define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+
+#define CEPH_OPT_DEFAULT   (0);
+
+#define ceph_set_opt(client, opt) \
+	(client)->options->flags |= CEPH_OPT_##opt;
+#define ceph_test_opt(client, opt) \
+	(!!((client)->options->flags & CEPH_OPT_##opt))
+
+struct ceph_options {
+	int flags;
+	struct ceph_fsid fsid;
+	struct ceph_entity_addr my_addr;
+	int mount_timeout;
+	int osd_idle_ttl;
+	int osd_timeout;
+	int osd_keepalive_timeout;
+
+	/*
+	 * any type that can't be simply compared or doesn't need need
+	 * to be compared should go beyond this point,
+	 * ceph_compare_options() should be updated accordingly
+	 */
+
+	struct ceph_entity_addr *mon_addr; /* should be the first
+					      pointer type of args */
+	int num_mon;
+	char *name;
+	char *secret;
+};
+
+/*
+ * defaults
+ */
+#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
+#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
+#define CEPH_OSD_KEEPALIVE_DEFAULT  5
+#define CEPH_OSD_IDLE_TTL_DEFAULT    60
+#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
+
+#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
+#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
+
+#define CEPH_AUTH_NAME_DEFAULT   "guest"
+
+/*
+ * Delay telling the MDS we no longer want caps, in case we reopen
+ * the file.  Delay a minimum amount of time, even if we send a cap
+ * message for some other reason.  Otherwise, take the oppotunity to
+ * update the mds to avoid sending another message later.
+ */
+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
+
+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
+
+/* mount state */
+enum {
+	CEPH_MOUNT_MOUNTING,
+	CEPH_MOUNT_MOUNTED,
+	CEPH_MOUNT_UNMOUNTING,
+	CEPH_MOUNT_UNMOUNTED,
+	CEPH_MOUNT_SHUTDOWN,
+};
+
+/*
+ * subtract jiffies
+ */
+static inline unsigned long time_sub(unsigned long a, unsigned long b)
+{
+	BUG_ON(time_after(b, a));
+	return (long)a - (long)b;
+}
+
+struct ceph_mds_client;
+
+/*
+ * per client state
+ *
+ * possibly shared by multiple mount points, if they are
+ * mounting the same ceph filesystem/cluster.
+ */
+struct ceph_client {
+	struct ceph_fsid fsid;
+	bool have_fsid;
+
+	void *private;
+
+	struct ceph_options *options;
+
+	struct mutex mount_mutex;      /* serialize mount attempts */
+	wait_queue_head_t auth_wq;
+	int auth_err;
+
+	int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
+
+	u32 supported_features;
+	u32 required_features;
+
+	struct ceph_messenger *msgr;   /* messenger instance */
+	struct ceph_mon_client monc;
+	struct ceph_osd_client osdc;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_monmap;
+	struct dentry *debugfs_osdmap;
+#endif
+};
+
+
+
+/*
+ * snapshots
+ */
+
+/*
+ * A "snap context" is the set of existing snapshots when we
+ * write data.  It is used by the OSD to guide its COW behavior.
+ *
+ * The ceph_snap_context is refcounted, and attached to each dirty
+ * page, indicating which context the dirty data belonged when it was
+ * dirtied.
+ */
+struct ceph_snap_context {
+	atomic_t nref;
+	u64 seq;
+	int num_snaps;
+	u64 snaps[];
+};
+
+static inline struct ceph_snap_context *
+ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+	/*
+	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+	       atomic_read(&sc->nref)+1);
+	*/
+	if (sc)
+		atomic_inc(&sc->nref);
+	return sc;
+}
+
+static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+	if (!sc)
+		return;
+	/*
+	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
+	       atomic_read(&sc->nref)-1);
+	*/
+	if (atomic_dec_and_test(&sc->nref)) {
+		/*printk(" deleting snap_context %p\n", sc);*/
+		kfree(sc);
+	}
+}
+
+/*
+ * calculate the number of pages a given length and offset map onto,
+ * if we align the data.
+ */
+static inline int calc_pages_for(u64 off, u64 len)
+{
+	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
+		(off >> PAGE_CACHE_SHIFT);
+}
+
+/* ceph_common.c */
+extern const char *ceph_msg_type_name(int type);
+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
+extern int ceph_parse_options(struct ceph_options **popt, char *options,
+			      const char *dev_name, const char *dev_name_end,
+			      int (*parse_extra_token)(char *c, void *private),
+			      void *private);
+extern void ceph_destroy_options(struct ceph_options *opt);
+extern int ceph_compare_options(struct ceph_options *new_opt,
+				struct ceph_client *client);
+extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
+					      void *private);
+extern u64 ceph_client_id(struct ceph_client *client);
+extern void ceph_destroy_client(struct ceph_client *client);
+extern int __ceph_open_session(struct ceph_client *client,
+			       unsigned long started);
+extern int ceph_open_session(struct ceph_client *client);
+
+/* pagevec.c */
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+
+extern struct page **ceph_get_direct_page_vector(const char __user *data,
+					    int num_pages,
+					    loff_t off, size_t len);
+extern void ceph_put_page_vector(struct page **pages, int num_pages);
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+extern int ceph_copy_user_to_page_vector(struct page **pages,
+					 const char __user *data,
+					 loff_t off, size_t len);
+extern int ceph_copy_to_page_vector(struct page **pages,
+				    const char *data,
+				    loff_t off, size_t len);
+extern int ceph_copy_from_page_vector(struct page **pages,
+				    char *data,
+				    loff_t off, size_t len);
+extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
+				    loff_t off, size_t len);
+extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
+
+
+#endif /* _FS_CEPH_SUPER_H */
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
new file mode 100644
index 000000000000..4c5cb0880bba
--- /dev/null
+++ b/include/linux/ceph/mdsmap.h
@@ -0,0 +1,62 @@
+#ifndef _FS_CEPH_MDSMAP_H
+#define _FS_CEPH_MDSMAP_H
+
+#include "types.h"
+
+/*
+ * mds map - describe servers in the mds cluster.
+ *
+ * we limit fields to those the client actually xcares about
+ */
+struct ceph_mds_info {
+	u64 global_id;
+	struct ceph_entity_addr addr;
+	s32 state;
+	int num_export_targets;
+	bool laggy;
+	u32 *export_targets;
+};
+
+struct ceph_mdsmap {
+	u32 m_epoch, m_client_epoch, m_last_failure;
+	u32 m_root;
+	u32 m_session_timeout;          /* seconds */
+	u32 m_session_autoclose;        /* seconds */
+	u64 m_max_file_size;
+	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
+	struct ceph_mds_info *m_info;
+
+	/* which object pools file data can be stored in */
+	int m_num_data_pg_pools;
+	u32 *m_data_pg_pools;
+	u32 m_cas_pg_pool;
+};
+
+static inline struct ceph_entity_addr *
+ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
+{
+	if (w >= m->m_max_mds)
+		return NULL;
+	return &m->m_info[w].addr;
+}
+
+static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
+{
+	BUG_ON(w < 0);
+	if (w >= m->m_max_mds)
+		return CEPH_MDS_STATE_DNE;
+	return m->m_info[w].state;
+}
+
+static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
+{
+	if (w >= 0 && w < m->m_max_mds)
+		return m->m_info[w].laggy;
+	return false;
+}
+
+extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
+extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
+extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
+
+#endif
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
new file mode 100644
index 000000000000..5956d62c3057
--- /dev/null
+++ b/include/linux/ceph/messenger.h
@@ -0,0 +1,261 @@
+#ifndef __FS_CEPH_MESSENGER_H
+#define __FS_CEPH_MESSENGER_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/radix-tree.h>
+#include <linux/uio.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+
+#include "types.h"
+#include "buffer.h"
+
+struct ceph_msg;
+struct ceph_connection;
+
+extern struct workqueue_struct *ceph_msgr_wq;       /* receive work queue */
+
+/*
+ * Ceph defines these callbacks for handling connection events.
+ */
+struct ceph_connection_operations {
+	struct ceph_connection *(*get)(struct ceph_connection *);
+	void (*put)(struct ceph_connection *);
+
+	/* handle an incoming message. */
+	void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
+
+	/* authorize an outgoing connection */
+	int (*get_authorizer) (struct ceph_connection *con,
+			       void **buf, int *len, int *proto,
+			       void **reply_buf, int *reply_len, int force_new);
+	int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
+	int (*invalidate_authorizer)(struct ceph_connection *con);
+
+	/* protocol version mismatch */
+	void (*bad_proto) (struct ceph_connection *con);
+
+	/* there was some error on the socket (disconnect, whatever) */
+	void (*fault) (struct ceph_connection *con);
+
+	/* a remote host as terminated a message exchange session, and messages
+	 * we sent (or they tried to send us) may be lost. */
+	void (*peer_reset) (struct ceph_connection *con);
+
+	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
+					struct ceph_msg_header *hdr,
+					int *skip);
+};
+
+/* use format string %s%d */
+#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
+
+struct ceph_messenger {
+	struct ceph_entity_inst inst;    /* my name+address */
+	struct ceph_entity_addr my_enc_addr;
+	struct page *zero_page;          /* used in certain error cases */
+
+	bool nocrc;
+
+	/*
+	 * the global_seq counts connections i (attempt to) initiate
+	 * in order to disambiguate certain connect race conditions.
+	 */
+	u32 global_seq;
+	spinlock_t global_seq_lock;
+
+	u32 supported_features;
+	u32 required_features;
+};
+
+/*
+ * a single message.  it contains a header (src, dest, message type, etc.),
+ * footer (crc values, mainly), a "front" message body, and possibly a
+ * data payload (stored in some number of pages).
+ */
+struct ceph_msg {
+	struct ceph_msg_header hdr;	/* header */
+	struct ceph_msg_footer footer;	/* footer */
+	struct kvec front;              /* unaligned blobs of message */
+	struct ceph_buffer *middle;
+	struct page **pages;            /* data payload.  NOT OWNER. */
+	unsigned nr_pages;              /* size of page array */
+	struct ceph_pagelist *pagelist; /* instead of pages */
+	struct list_head list_head;
+	struct kref kref;
+	struct bio  *bio;		/* instead of pages/pagelist */
+	struct bio  *bio_iter;		/* bio iterator */
+	int bio_seg;			/* current bio segment */
+	struct ceph_pagelist *trail;	/* the trailing part of the data */
+	bool front_is_vmalloc;
+	bool more_to_follow;
+	bool needs_out_seq;
+	int front_max;
+
+	struct ceph_msgpool *pool;
+};
+
+struct ceph_msg_pos {
+	int page, page_pos;  /* which page; offset in page */
+	int data_pos;        /* offset in data payload */
+	int did_page_crc;    /* true if we've calculated crc for current page */
+};
+
+/* ceph connection fault delay defaults, for exponential backoff */
+#define BASE_DELAY_INTERVAL	(HZ/2)
+#define MAX_DELAY_INTERVAL	(5 * 60 * HZ)
+
+/*
+ * ceph_connection state bit flags
+ *
+ * QUEUED and BUSY are used together to ensure that only a single
+ * thread is currently opening, reading or writing data to the socket.
+ */
+#define LOSSYTX         0  /* we can close channel or drop messages on errors */
+#define CONNECTING	1
+#define NEGOTIATING	2
+#define KEEPALIVE_PENDING      3
+#define WRITE_PENDING	4  /* we have data ready to send */
+#define QUEUED          5  /* there is work queued on this connection */
+#define BUSY            6  /* work is being done */
+#define STANDBY		8  /* no outgoing messages, socket closed.  we keep
+			    * the ceph_connection around to maintain shared
+			    * state with the peer. */
+#define CLOSED		10 /* we've closed the connection */
+#define SOCK_CLOSED	11 /* socket state changed to closed */
+#define OPENING         13 /* open connection w/ (possibly new) peer */
+#define DEAD            14 /* dead, about to kfree */
+
+/*
+ * A single connection with another host.
+ *
+ * We maintain a queue of outgoing messages, and some session state to
+ * ensure that we can preserve the lossless, ordered delivery of
+ * messages in the case of a TCP disconnect.
+ */
+struct ceph_connection {
+	void *private;
+	atomic_t nref;
+
+	const struct ceph_connection_operations *ops;
+
+	struct ceph_messenger *msgr;
+	struct socket *sock;
+	unsigned long state;	/* connection state (see flags above) */
+	const char *error_msg;  /* error message, if any */
+
+	struct ceph_entity_addr peer_addr; /* peer address */
+	struct ceph_entity_name peer_name; /* peer name */
+	struct ceph_entity_addr peer_addr_for_me;
+	unsigned peer_features;
+	u32 connect_seq;      /* identify the most recent connection
+				 attempt for this connection, client */
+	u32 peer_global_seq;  /* peer's global seq for this connection */
+
+	int auth_retry;       /* true if we need a newer authorizer */
+	void *auth_reply_buf;   /* where to put the authorizer reply */
+	int auth_reply_buf_len;
+
+	struct mutex mutex;
+
+	/* out queue */
+	struct list_head out_queue;
+	struct list_head out_sent;   /* sending or sent but unacked */
+	u64 out_seq;		     /* last message queued for send */
+	bool out_keepalive_pending;
+
+	u64 in_seq, in_seq_acked;  /* last message received, acked */
+
+	/* connection negotiation temps */
+	char in_banner[CEPH_BANNER_MAX_LEN];
+	union {
+		struct {  /* outgoing connection */
+			struct ceph_msg_connect out_connect;
+			struct ceph_msg_connect_reply in_reply;
+		};
+		struct {  /* incoming */
+			struct ceph_msg_connect in_connect;
+			struct ceph_msg_connect_reply out_reply;
+		};
+	};
+	struct ceph_entity_addr actual_peer_addr;
+
+	/* message out temps */
+	struct ceph_msg *out_msg;        /* sending message (== tail of
+					    out_sent) */
+	bool out_msg_done;
+	struct ceph_msg_pos out_msg_pos;
+
+	struct kvec out_kvec[8],         /* sending header/footer data */
+		*out_kvec_cur;
+	int out_kvec_left;   /* kvec's left in out_kvec */
+	int out_skip;        /* skip this many bytes */
+	int out_kvec_bytes;  /* total bytes left */
+	bool out_kvec_is_msg; /* kvec refers to out_msg */
+	int out_more;        /* there is more data after the kvecs */
+	__le64 out_temp_ack; /* for writing an ack */
+
+	/* message in temps */
+	struct ceph_msg_header in_hdr;
+	struct ceph_msg *in_msg;
+	struct ceph_msg_pos in_msg_pos;
+	u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
+
+	char in_tag;         /* protocol control byte */
+	int in_base_pos;     /* bytes read */
+	__le64 in_temp_ack;  /* for reading an ack */
+
+	struct delayed_work work;	    /* send|recv work */
+	unsigned long       delay;          /* current delay interval */
+};
+
+
+extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
+extern int ceph_parse_ips(const char *c, const char *end,
+			  struct ceph_entity_addr *addr,
+			  int max_count, int *count);
+
+
+extern int ceph_msgr_init(void);
+extern void ceph_msgr_exit(void);
+extern void ceph_msgr_flush(void);
+
+extern struct ceph_messenger *ceph_messenger_create(
+	struct ceph_entity_addr *myaddr,
+	u32 features, u32 required);
+extern void ceph_messenger_destroy(struct ceph_messenger *);
+
+extern void ceph_con_init(struct ceph_messenger *msgr,
+			  struct ceph_connection *con);
+extern void ceph_con_open(struct ceph_connection *con,
+			  struct ceph_entity_addr *addr);
+extern bool ceph_con_opened(struct ceph_connection *con);
+extern void ceph_con_close(struct ceph_connection *con);
+extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
+extern void ceph_con_revoke_message(struct ceph_connection *con,
+				  struct ceph_msg *msg);
+extern void ceph_con_keepalive(struct ceph_connection *con);
+extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
+extern void ceph_con_put(struct ceph_connection *con);
+
+extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
+extern void ceph_msg_kfree(struct ceph_msg *m);
+
+
+static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+	kref_get(&msg->kref);
+	return msg;
+}
+extern void ceph_msg_last_put(struct kref *kref);
+static inline void ceph_msg_put(struct ceph_msg *msg)
+{
+	kref_put(&msg->kref, ceph_msg_last_put);
+}
+
+extern void ceph_msg_dump(struct ceph_msg *msg);
+
+#endif
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
new file mode 100644
index 000000000000..545f85917780
--- /dev/null
+++ b/include/linux/ceph/mon_client.h
@@ -0,0 +1,122 @@
+#ifndef _FS_CEPH_MON_CLIENT_H
+#define _FS_CEPH_MON_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+
+#include "messenger.h"
+
+struct ceph_client;
+struct ceph_mount_args;
+struct ceph_auth_client;
+
+/*
+ * The monitor map enumerates the set of all monitors.
+ */
+struct ceph_monmap {
+	struct ceph_fsid fsid;
+	u32 epoch;
+	u32 num_mon;
+	struct ceph_entity_inst mon_inst[0];
+};
+
+struct ceph_mon_client;
+struct ceph_mon_generic_request;
+
+
+/*
+ * Generic mechanism for resending monitor requests.
+ */
+typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
+					 int newmon);
+
+/* a pending monitor request */
+struct ceph_mon_request {
+	struct ceph_mon_client *monc;
+	struct delayed_work delayed_work;
+	unsigned long delay;
+	ceph_monc_request_func_t do_request;
+};
+
+/*
+ * ceph_mon_generic_request is being used for the statfs and poolop requests
+ * which are bening done a bit differently because we need to get data back
+ * to the caller
+ */
+struct ceph_mon_generic_request {
+	struct kref kref;
+	u64 tid;
+	struct rb_node node;
+	int result;
+	void *buf;
+	int buf_len;
+	struct completion completion;
+	struct ceph_msg *request;  /* original request */
+	struct ceph_msg *reply;    /* and reply */
+};
+
+struct ceph_mon_client {
+	struct ceph_client *client;
+	struct ceph_monmap *monmap;
+
+	struct mutex mutex;
+	struct delayed_work delayed_work;
+
+	struct ceph_auth_client *auth;
+	struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
+	int pending_auth;
+
+	bool hunting;
+	int cur_mon;                       /* last monitor i contacted */
+	unsigned long sub_sent, sub_renew_after;
+	struct ceph_connection *con;
+	bool have_fsid;
+
+	/* pending generic requests */
+	struct rb_root generic_request_tree;
+	int num_generic_requests;
+	u64 last_tid;
+
+	/* mds/osd map */
+	int want_mdsmap;
+	int want_next_osdmap; /* 1 = want, 2 = want+asked */
+	u32 have_osdmap, have_mdsmap;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_file;
+#endif
+};
+
+extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
+extern int ceph_monmap_contains(struct ceph_monmap *m,
+				struct ceph_entity_addr *addr);
+
+extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
+extern void ceph_monc_stop(struct ceph_mon_client *monc);
+
+/*
+ * The model here is to indicate that we need a new map of at least
+ * epoch @want, and also call in when we receive a map.  We will
+ * periodically rerequest the map from the monitor cluster until we
+ * get what we want.
+ */
+extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
+extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+
+extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+
+extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
+			       struct ceph_statfs *buf);
+
+extern int ceph_monc_open_session(struct ceph_mon_client *monc);
+
+extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
+
+extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+				   u32 pool, u64 *snapid);
+
+extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+				   u32 pool, u64 snapid);
+
+#endif
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
new file mode 100644
index 000000000000..a362605f9368
--- /dev/null
+++ b/include/linux/ceph/msgpool.h
@@ -0,0 +1,25 @@
+#ifndef _FS_CEPH_MSGPOOL
+#define _FS_CEPH_MSGPOOL
+
+#include <linux/mempool.h>
+#include "messenger.h"
+
+/*
+ * we use memory pools for preallocating messages we may receive, to
+ * avoid unexpected OOM conditions.
+ */
+struct ceph_msgpool {
+	const char *name;
+	mempool_t *pool;
+	int front_len;          /* preallocated payload size */
+};
+
+extern int ceph_msgpool_init(struct ceph_msgpool *pool,
+			     int front_len, int size, bool blocking,
+			     const char *name);
+extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
+extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
+					 int front_len);
+extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
+
+#endif
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
new file mode 100644
index 000000000000..680d3d648cac
--- /dev/null
+++ b/include/linux/ceph/msgr.h
@@ -0,0 +1,175 @@
+#ifndef CEPH_MSGR_H
+#define CEPH_MSGR_H
+
+/*
+ * Data types for message passing layer used by Ceph.
+ */
+
+#define CEPH_MON_PORT    6789  /* default monitor port */
+
+/*
+ * client-side processes will try to bind to ports in this
+ * range, simply for the benefit of tools like nmap or wireshark
+ * that would like to identify the protocol.
+ */
+#define CEPH_PORT_FIRST  6789
+#define CEPH_PORT_START  6800  /* non-monitors start here */
+#define CEPH_PORT_LAST   6900
+
+/*
+ * tcp connection banner.  include a protocol version. and adjust
+ * whenever the wire protocol changes.  try to keep this string length
+ * constant.
+ */
+#define CEPH_BANNER "ceph v027"
+#define CEPH_BANNER_MAX_LEN 30
+
+
+/*
+ * Rollover-safe type and comparator for 32-bit sequence numbers.
+ * Comparator returns -1, 0, or 1.
+ */
+typedef __u32 ceph_seq_t;
+
+static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
+{
+       return (__s32)a - (__s32)b;
+}
+
+
+/*
+ * entity_name -- logical name for a process participating in the
+ * network, e.g. 'mds0' or 'osd3'.
+ */
+struct ceph_entity_name {
+	__u8 type;      /* CEPH_ENTITY_TYPE_* */
+	__le64 num;
+} __attribute__ ((packed));
+
+#define CEPH_ENTITY_TYPE_MON    0x01
+#define CEPH_ENTITY_TYPE_MDS    0x02
+#define CEPH_ENTITY_TYPE_OSD    0x04
+#define CEPH_ENTITY_TYPE_CLIENT 0x08
+#define CEPH_ENTITY_TYPE_AUTH   0x20
+
+#define CEPH_ENTITY_TYPE_ANY    0xFF
+
+extern const char *ceph_entity_type_name(int type);
+
+/*
+ * entity_addr -- network address
+ */
+struct ceph_entity_addr {
+	__le32 type;
+	__le32 nonce;  /* unique id for process (e.g. pid) */
+	struct sockaddr_storage in_addr;
+} __attribute__ ((packed));
+
+struct ceph_entity_inst {
+	struct ceph_entity_name name;
+	struct ceph_entity_addr addr;
+} __attribute__ ((packed));
+
+
+/* used by message exchange protocol */
+#define CEPH_MSGR_TAG_READY         1  /* server->client: ready for messages */
+#define CEPH_MSGR_TAG_RESETSESSION  2  /* server->client: reset, try again */
+#define CEPH_MSGR_TAG_WAIT          3  /* server->client: wait for racing
+					  incoming connection */
+#define CEPH_MSGR_TAG_RETRY_SESSION 4  /* server->client + cseq: try again
+					  with higher cseq */
+#define CEPH_MSGR_TAG_RETRY_GLOBAL  5  /* server->client + gseq: try again
+					  with higher gseq */
+#define CEPH_MSGR_TAG_CLOSE         6  /* closing pipe */
+#define CEPH_MSGR_TAG_MSG           7  /* message */
+#define CEPH_MSGR_TAG_ACK           8  /* message ack */
+#define CEPH_MSGR_TAG_KEEPALIVE     9  /* just a keepalive byte! */
+#define CEPH_MSGR_TAG_BADPROTOVER  10  /* bad protocol version */
+#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
+#define CEPH_MSGR_TAG_FEATURES      12 /* insufficient features */
+
+
+/*
+ * connection negotiation
+ */
+struct ceph_msg_connect {
+	__le64 features;     /* supported feature bits */
+	__le32 host_type;    /* CEPH_ENTITY_TYPE_* */
+	__le32 global_seq;   /* count connections initiated by this host */
+	__le32 connect_seq;  /* count connections initiated in this session */
+	__le32 protocol_version;
+	__le32 authorizer_protocol;
+	__le32 authorizer_len;
+	__u8  flags;         /* CEPH_MSG_CONNECT_* */
+} __attribute__ ((packed));
+
+struct ceph_msg_connect_reply {
+	__u8 tag;
+	__le64 features;     /* feature bits for this session */
+	__le32 global_seq;
+	__le32 connect_seq;
+	__le32 protocol_version;
+	__le32 authorizer_len;
+	__u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_CONNECT_LOSSY  1  /* messages i send may be safely dropped */
+
+
+/*
+ * message header
+ */
+struct ceph_msg_header_old {
+	__le64 seq;       /* message seq# for this session */
+	__le64 tid;       /* transaction id */
+	__le16 type;      /* message type */
+	__le16 priority;  /* priority.  higher value == higher priority */
+	__le16 version;   /* version of message encoding */
+
+	__le32 front_len; /* bytes in main payload */
+	__le32 middle_len;/* bytes in middle payload */
+	__le32 data_len;  /* bytes of data payload */
+	__le16 data_off;  /* sender: include full offset;
+			     receiver: mask against ~PAGE_MASK */
+
+	struct ceph_entity_inst src, orig_src;
+	__le32 reserved;
+	__le32 crc;       /* header crc32c */
+} __attribute__ ((packed));
+
+struct ceph_msg_header {
+	__le64 seq;       /* message seq# for this session */
+	__le64 tid;       /* transaction id */
+	__le16 type;      /* message type */
+	__le16 priority;  /* priority.  higher value == higher priority */
+	__le16 version;   /* version of message encoding */
+
+	__le32 front_len; /* bytes in main payload */
+	__le32 middle_len;/* bytes in middle payload */
+	__le32 data_len;  /* bytes of data payload */
+	__le16 data_off;  /* sender: include full offset;
+			     receiver: mask against ~PAGE_MASK */
+
+	struct ceph_entity_name src;
+	__le32 reserved;
+	__le32 crc;       /* header crc32c */
+} __attribute__ ((packed));
+
+#define CEPH_MSG_PRIO_LOW     64
+#define CEPH_MSG_PRIO_DEFAULT 127
+#define CEPH_MSG_PRIO_HIGH    196
+#define CEPH_MSG_PRIO_HIGHEST 255
+
+/*
+ * follows data payload
+ */
+struct ceph_msg_footer {
+	__le32 front_crc, middle_crc, data_crc;
+	__u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
+#define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
+
+
+#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
new file mode 100644
index 000000000000..6c91fb032c39
--- /dev/null
+++ b/include/linux/ceph/osd_client.h
@@ -0,0 +1,234 @@
+#ifndef _FS_CEPH_OSD_CLIENT_H
+#define _FS_CEPH_OSD_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/mempool.h>
+#include <linux/rbtree.h>
+
+#include "types.h"
+#include "osdmap.h"
+#include "messenger.h"
+
+struct ceph_msg;
+struct ceph_snap_context;
+struct ceph_osd_request;
+struct ceph_osd_client;
+struct ceph_authorizer;
+struct ceph_pagelist;
+
+/*
+ * completion callback for async writepages
+ */
+typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
+				     struct ceph_msg *);
+
+/* a given osd we're communicating with */
+struct ceph_osd {
+	atomic_t o_ref;
+	struct ceph_osd_client *o_osdc;
+	int o_osd;
+	int o_incarnation;
+	struct rb_node o_node;
+	struct ceph_connection o_con;
+	struct list_head o_requests;
+	struct list_head o_osd_lru;
+	struct ceph_authorizer *o_authorizer;
+	void *o_authorizer_buf, *o_authorizer_reply_buf;
+	size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
+	unsigned long lru_ttl;
+	int o_marked_for_keepalive;
+	struct list_head o_keepalive_item;
+};
+
+/* an in-flight request */
+struct ceph_osd_request {
+	u64             r_tid;              /* unique for this client */
+	struct rb_node  r_node;
+	struct list_head r_req_lru_item;
+	struct list_head r_osd_item;
+	struct ceph_osd *r_osd;
+	struct ceph_pg   r_pgid;
+	int              r_pg_osds[CEPH_PG_MAX_SIZE];
+	int              r_num_pg_osds;
+
+	struct ceph_connection *r_con_filling_msg;
+
+	struct ceph_msg  *r_request, *r_reply;
+	int               r_result;
+	int               r_flags;     /* any additional flags for the osd */
+	u32               r_sent;      /* >0 if r_request is sending/sent */
+	int               r_got_reply;
+
+	struct ceph_osd_client *r_osdc;
+	struct kref       r_kref;
+	bool              r_mempool;
+	struct completion r_completion, r_safe_completion;
+	ceph_osdc_callback_t r_callback, r_safe_callback;
+	struct ceph_eversion r_reassert_version;
+	struct list_head  r_unsafe_item;
+
+	struct inode *r_inode;         	      /* for use by callbacks */
+	void *r_priv;			      /* ditto */
+
+	char              r_oid[40];          /* object name */
+	int               r_oid_len;
+	unsigned long     r_stamp;            /* send OR check time */
+	bool              r_resend;           /* msg send failed, needs retry */
+
+	struct ceph_file_layout r_file_layout;
+	struct ceph_snap_context *r_snapc;    /* snap context for writes */
+	unsigned          r_num_pages;        /* size of page array (follows) */
+	struct page     **r_pages;            /* pages for data payload */
+	int               r_pages_from_pool;
+	int               r_own_pages;        /* if true, i own page list */
+#ifdef CONFIG_BLOCK
+	struct bio       *r_bio;	      /* instead of pages */
+#endif
+
+	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
+};
+
+struct ceph_osd_client {
+	struct ceph_client     *client;
+
+	struct ceph_osdmap     *osdmap;       /* current map */
+	struct rw_semaphore    map_sem;
+	struct completion      map_waiters;
+	u64                    last_requested_map;
+
+	struct mutex           request_mutex;
+	struct rb_root         osds;          /* osds */
+	struct list_head       osd_lru;       /* idle osds */
+	u64                    timeout_tid;   /* tid of timeout triggering rq */
+	u64                    last_tid;      /* tid of last request */
+	struct rb_root         requests;      /* pending requests */
+	struct list_head       req_lru;	      /* pending requests lru */
+	int                    num_requests;
+	struct delayed_work    timeout_work;
+	struct delayed_work    osds_timeout_work;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry 	       *debugfs_file;
+#endif
+
+	mempool_t              *req_mempool;
+
+	struct ceph_msgpool	msgpool_op;
+	struct ceph_msgpool	msgpool_op_reply;
+};
+
+struct ceph_osd_req_op {
+	u16 op;           /* CEPH_OSD_OP_* */
+	u32 flags;        /* CEPH_OSD_FLAG_* */
+	union {
+		struct {
+			u64 offset, length;
+			u64 truncate_size;
+			u32 truncate_seq;
+		} extent;
+		struct {
+			const char *name;
+			u32 name_len;
+			const char  *val;
+			u32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+		} xattr;
+		struct {
+			const char *class_name;
+			__u8 class_len;
+			const char *method_name;
+			__u8 method_len;
+			__u8 argc;
+			const char *indata;
+			u32 indata_len;
+		} cls;
+		struct {
+			u64 cookie, count;
+		} pgls;
+	        struct {
+		        u64 snapid;
+	        } snap;
+	};
+	u32 payload_len;
+};
+
+extern int ceph_osdc_init(struct ceph_osd_client *osdc,
+			  struct ceph_client *client);
+extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+
+extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
+				   struct ceph_msg *msg);
+extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
+				 struct ceph_msg *msg);
+
+extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+			struct ceph_file_layout *layout,
+			u64 snapid,
+			u64 off, u64 *plen, u64 *bno,
+			struct ceph_osd_request *req,
+			struct ceph_osd_req_op *op);
+
+extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+					       int flags,
+					       struct ceph_snap_context *snapc,
+					       struct ceph_osd_req_op *ops,
+					       bool use_mempool,
+					       gfp_t gfp_flags,
+					       struct page **pages,
+					       struct bio *bio);
+
+extern void ceph_osdc_build_request(struct ceph_osd_request *req,
+				    u64 off, u64 *plen,
+				    struct ceph_osd_req_op *src_ops,
+				    struct ceph_snap_context *snapc,
+				    struct timespec *mtime,
+				    const char *oid,
+				    int oid_len);
+
+extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
+				      struct ceph_file_layout *layout,
+				      struct ceph_vino vino,
+				      u64 offset, u64 *len, int op, int flags,
+				      struct ceph_snap_context *snapc,
+				      int do_sync, u32 truncate_seq,
+				      u64 truncate_size,
+				      struct timespec *mtime,
+				      bool use_mempool, int num_reply);
+
+static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+	kref_get(&req->r_kref);
+}
+extern void ceph_osdc_release_request(struct kref *kref);
+static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+	kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+
+extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+				   struct ceph_osd_request *req,
+				   bool nofail);
+extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+				  struct ceph_osd_request *req);
+extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
+
+extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+			       struct ceph_vino vino,
+			       struct ceph_file_layout *layout,
+			       u64 off, u64 *plen,
+			       u32 truncate_seq, u64 truncate_size,
+			       struct page **pages, int nr_pages);
+
+extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
+				struct ceph_vino vino,
+				struct ceph_file_layout *layout,
+				struct ceph_snap_context *sc,
+				u64 off, u64 len,
+				u32 truncate_seq, u64 truncate_size,
+				struct timespec *mtime,
+				struct page **pages, int nr_pages,
+				int flags, int do_sync, bool nofail);
+
+#endif
+
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
new file mode 100644
index 000000000000..ba4c205cbb01
--- /dev/null
+++ b/include/linux/ceph/osdmap.h
@@ -0,0 +1,130 @@
+#ifndef _FS_CEPH_OSDMAP_H
+#define _FS_CEPH_OSDMAP_H
+
+#include <linux/rbtree.h>
+#include "types.h"
+#include "ceph_fs.h"
+#include <linux/crush/crush.h>
+
+/*
+ * The osd map describes the current membership of the osd cluster and
+ * specifies the mapping of objects to placement groups and placement
+ * groups to (sets of) osds.  That is, it completely specifies the
+ * (desired) distribution of all data objects in the system at some
+ * point in time.
+ *
+ * Each map version is identified by an epoch, which increases monotonically.
+ *
+ * The map can be updated either via an incremental map (diff) describing
+ * the change between two successive epochs, or as a fully encoded map.
+ */
+struct ceph_pg_pool_info {
+	struct rb_node node;
+	int id;
+	struct ceph_pg_pool v;
+	int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+	char *name;
+};
+
+struct ceph_pg_mapping {
+	struct rb_node node;
+	struct ceph_pg pgid;
+	int len;
+	int osds[];
+};
+
+struct ceph_osdmap {
+	struct ceph_fsid fsid;
+	u32 epoch;
+	u32 mkfs_epoch;
+	struct ceph_timespec created, modified;
+
+	u32 flags;         /* CEPH_OSDMAP_* */
+
+	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
+	u8 *osd_state;     /* CEPH_OSD_* */
+	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
+	struct ceph_entity_addr *osd_addr;
+
+	struct rb_root pg_temp;
+	struct rb_root pg_pools;
+	u32 pool_max;
+
+	/* the CRUSH map specifies the mapping of placement groups to
+	 * the list of osds that store+replicate them. */
+	struct crush_map *crush;
+};
+
+/*
+ * file layout helpers
+ */
+#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
+#define ceph_file_layout_stripe_count(l) \
+	((__s32)le32_to_cpu((l).fl_stripe_count))
+#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
+#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
+#define ceph_file_layout_object_su(l) \
+	((__s32)le32_to_cpu((l).fl_object_stripe_unit))
+#define ceph_file_layout_pg_preferred(l) \
+	((__s32)le32_to_cpu((l).fl_pg_preferred))
+#define ceph_file_layout_pg_pool(l) \
+	((__s32)le32_to_cpu((l).fl_pg_pool))
+
+static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
+{
+	return le32_to_cpu(l->fl_stripe_unit) *
+		le32_to_cpu(l->fl_stripe_count);
+}
+
+/* "period" == bytes before i start on a new set of objects */
+static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
+{
+	return le32_to_cpu(l->fl_object_size) *
+		le32_to_cpu(l->fl_stripe_count);
+}
+
+
+static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
+{
+	return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
+}
+
+static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
+{
+	return map && (map->flags & flag);
+}
+
+extern char *ceph_osdmap_state_str(char *str, int len, int state);
+
+static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
+						     int osd)
+{
+	if (osd >= map->max_osd)
+		return NULL;
+	return &map->osd_addr[osd];
+}
+
+extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
+extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+					    struct ceph_osdmap *map,
+					    struct ceph_messenger *msgr);
+extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
+
+/* calculate mapping of a file extent to an object */
+extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+					  u64 off, u64 *plen,
+					  u64 *bno, u64 *oxoff, u64 *oxlen);
+
+/* calculate mapping of object to a placement group */
+extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
+				   const char *oid,
+				   struct ceph_file_layout *fl,
+				   struct ceph_osdmap *osdmap);
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			       int *acting);
+extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
+				struct ceph_pg pgid);
+
+extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+
+#endif
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
new file mode 100644
index 000000000000..cc9327aa1c98
--- /dev/null
+++ b/include/linux/ceph/pagelist.h
@@ -0,0 +1,54 @@
+#ifndef __FS_CEPH_PAGELIST_H
+#define __FS_CEPH_PAGELIST_H
+
+#include <linux/list.h>
+
+struct ceph_pagelist {
+	struct list_head head;
+	void *mapped_tail;
+	size_t length;
+	size_t room;
+};
+
+static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
+{
+	INIT_LIST_HEAD(&pl->head);
+	pl->mapped_tail = NULL;
+	pl->length = 0;
+	pl->room = 0;
+}
+extern int ceph_pagelist_release(struct ceph_pagelist *pl);
+
+extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
+
+static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
+{
+	__le64 ev = cpu_to_le64(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
+{
+	__le32 ev = cpu_to_le32(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
+{
+	__le16 ev = cpu_to_le16(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
+{
+	return ceph_pagelist_append(pl, &v, 1);
+}
+static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
+					      char *s, size_t len)
+{
+	int ret = ceph_pagelist_encode_32(pl, len);
+	if (ret)
+		return ret;
+	if (len)
+		return ceph_pagelist_append(pl, s, len);
+	return 0;
+}
+
+#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
new file mode 100644
index 000000000000..6d5247f2e81b
--- /dev/null
+++ b/include/linux/ceph/rados.h
@@ -0,0 +1,405 @@
+#ifndef CEPH_RADOS_H
+#define CEPH_RADOS_H
+
+/*
+ * Data types for the Ceph distributed object storage layer RADOS
+ * (Reliable Autonomic Distributed Object Store).
+ */
+
+#include "msgr.h"
+
+/*
+ * osdmap encoding versions
+ */
+#define CEPH_OSDMAP_INC_VERSION     5
+#define CEPH_OSDMAP_INC_VERSION_EXT 5
+#define CEPH_OSDMAP_VERSION         5
+#define CEPH_OSDMAP_VERSION_EXT     5
+
+/*
+ * fs id
+ */
+struct ceph_fsid {
+	unsigned char fsid[16];
+};
+
+static inline int ceph_fsid_compare(const struct ceph_fsid *a,
+				    const struct ceph_fsid *b)
+{
+	return memcmp(a, b, sizeof(*a));
+}
+
+/*
+ * ino, object, etc.
+ */
+typedef __le64 ceph_snapid_t;
+#define CEPH_SNAPDIR ((__u64)(-1))  /* reserved for hidden .snap dir */
+#define CEPH_NOSNAP  ((__u64)(-2))  /* "head", "live" revision */
+#define CEPH_MAXSNAP ((__u64)(-3))  /* largest valid snapid */
+
+struct ceph_timespec {
+	__le32 tv_sec;
+	__le32 tv_nsec;
+} __attribute__ ((packed));
+
+
+/*
+ * object layout - how objects are mapped into PGs
+ */
+#define CEPH_OBJECT_LAYOUT_HASH     1
+#define CEPH_OBJECT_LAYOUT_LINEAR   2
+#define CEPH_OBJECT_LAYOUT_HASHINO  3
+
+/*
+ * pg layout -- how PGs are mapped onto (sets of) OSDs
+ */
+#define CEPH_PG_LAYOUT_CRUSH  0
+#define CEPH_PG_LAYOUT_HASH   1
+#define CEPH_PG_LAYOUT_LINEAR 2
+#define CEPH_PG_LAYOUT_HYBRID 3
+
+#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
+
+/*
+ * placement group.
+ * we encode this into one __le64.
+ */
+struct ceph_pg {
+	__le16 preferred; /* preferred primary osd */
+	__le16 ps;        /* placement seed */
+	__le32 pool;      /* object pool */
+} __attribute__ ((packed));
+
+/*
+ * pg_pool is a set of pgs storing a pool of objects
+ *
+ *  pg_num -- base number of pseudorandomly placed pgs
+ *
+ *  pgp_num -- effective number when calculating pg placement.  this
+ * is used for pg_num increases.  new pgs result in data being "split"
+ * into new pgs.  for this to proceed smoothly, new pgs are intiially
+ * colocated with their parents; that is, pgp_num doesn't increase
+ * until the new pgs have successfully split.  only _then_ are the new
+ * pgs placed independently.
+ *
+ *  lpg_num -- localized pg count (per device).  replicas are randomly
+ * selected.
+ *
+ *  lpgp_num -- as above.
+ */
+#define CEPH_PG_TYPE_REP     1
+#define CEPH_PG_TYPE_RAID4   2
+#define CEPH_PG_POOL_VERSION 2
+struct ceph_pg_pool {
+	__u8 type;                /* CEPH_PG_TYPE_* */
+	__u8 size;                /* number of osds in each pg */
+	__u8 crush_ruleset;       /* crush placement rule */
+	__u8 object_hash;         /* hash mapping object name to ps */
+	__le32 pg_num, pgp_num;   /* number of pg's */
+	__le32 lpg_num, lpgp_num; /* number of localized pg's */
+	__le32 last_change;       /* most recent epoch changed */
+	__le64 snap_seq;          /* seq for per-pool snapshot */
+	__le32 snap_epoch;        /* epoch of last snap */
+	__le32 num_snaps;
+	__le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
+	__le64 auid;               /* who owns the pg */
+} __attribute__ ((packed));
+
+/*
+ * stable_mod func is used to control number of placement groups.
+ * similar to straight-up modulo, but produces a stable mapping as b
+ * increases over time.  b is the number of bins, and bmask is the
+ * containing power of 2 minus 1.
+ *
+ * b <= bmask and bmask=(2**n)-1
+ * e.g., b=12 -> bmask=15, b=123 -> bmask=127
+ */
+static inline int ceph_stable_mod(int x, int b, int bmask)
+{
+	if ((x & bmask) < b)
+		return x & bmask;
+	else
+		return x & (bmask >> 1);
+}
+
+/*
+ * object layout - how a given object should be stored.
+ */
+struct ceph_object_layout {
+	struct ceph_pg ol_pgid;   /* raw pg, with _full_ ps precision. */
+	__le32 ol_stripe_unit;    /* for per-object parity, if any */
+} __attribute__ ((packed));
+
+/*
+ * compound epoch+version, used by storage layer to serialize mutations
+ */
+struct ceph_eversion {
+	__le32 epoch;
+	__le64 version;
+} __attribute__ ((packed));
+
+/*
+ * osd map bits
+ */
+
+/* status bits */
+#define CEPH_OSD_EXISTS 1
+#define CEPH_OSD_UP     2
+
+/* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
+#define CEPH_OSD_IN  0x10000
+#define CEPH_OSD_OUT 0
+
+
+/*
+ * osd map flag bits
+ */
+#define CEPH_OSDMAP_NEARFULL (1<<0)  /* sync writes (near ENOSPC) */
+#define CEPH_OSDMAP_FULL     (1<<1)  /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
+#define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
+#define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
+
+/*
+ * osd ops
+ */
+#define CEPH_OSD_OP_MODE       0xf000
+#define CEPH_OSD_OP_MODE_RD    0x1000
+#define CEPH_OSD_OP_MODE_WR    0x2000
+#define CEPH_OSD_OP_MODE_RMW   0x3000
+#define CEPH_OSD_OP_MODE_SUB   0x4000
+
+#define CEPH_OSD_OP_TYPE       0x0f00
+#define CEPH_OSD_OP_TYPE_LOCK  0x0100
+#define CEPH_OSD_OP_TYPE_DATA  0x0200
+#define CEPH_OSD_OP_TYPE_ATTR  0x0300
+#define CEPH_OSD_OP_TYPE_EXEC  0x0400
+#define CEPH_OSD_OP_TYPE_PG    0x0500
+
+enum {
+	/** data **/
+	/* read */
+	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
+	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
+
+	/* fancy read */
+	CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
+
+	/* write */
+	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
+	CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
+	CEPH_OSD_OP_TRUNCATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
+	CEPH_OSD_OP_ZERO      = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
+	CEPH_OSD_OP_DELETE    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
+
+	/* fancy write */
+	CEPH_OSD_OP_APPEND    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
+	CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
+	CEPH_OSD_OP_SETTRUNC  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
+	CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
+
+	CEPH_OSD_OP_TMAPUP  = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
+	CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
+	CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
+
+	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
+	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
+
+	/** attrs **/
+	/* read */
+	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
+	CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
+	CEPH_OSD_OP_CMPXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
+
+	/* write */
+	CEPH_OSD_OP_SETXATTR  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
+	CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
+	CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
+	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
+
+	/** subop **/
+	CEPH_OSD_OP_PULL           = CEPH_OSD_OP_MODE_SUB | 1,
+	CEPH_OSD_OP_PUSH           = CEPH_OSD_OP_MODE_SUB | 2,
+	CEPH_OSD_OP_BALANCEREADS   = CEPH_OSD_OP_MODE_SUB | 3,
+	CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
+	CEPH_OSD_OP_SCRUB          = CEPH_OSD_OP_MODE_SUB | 5,
+
+	/** lock **/
+	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
+	CEPH_OSD_OP_WRUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
+	CEPH_OSD_OP_RDLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
+	CEPH_OSD_OP_RDUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
+	CEPH_OSD_OP_UPLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
+	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
+
+	/** exec **/
+	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
+
+	/** pg **/
+	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
+};
+
+static inline int ceph_osd_op_type_lock(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
+}
+static inline int ceph_osd_op_type_data(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
+}
+static inline int ceph_osd_op_type_attr(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
+}
+static inline int ceph_osd_op_type_exec(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
+}
+static inline int ceph_osd_op_type_pg(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
+}
+
+static inline int ceph_osd_op_mode_subop(int op)
+{
+	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
+}
+static inline int ceph_osd_op_mode_read(int op)
+{
+	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
+}
+static inline int ceph_osd_op_mode_modify(int op)
+{
+	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
+}
+
+/*
+ * note that the following tmap stuff is also defined in the ceph librados.h
+ * any modification here needs to be updated there
+ */
+#define CEPH_OSD_TMAP_HDR 'h'
+#define CEPH_OSD_TMAP_SET 's'
+#define CEPH_OSD_TMAP_RM  'r'
+
+extern const char *ceph_osd_op_name(int op);
+
+
+/*
+ * osd op flags
+ *
+ * An op may be READ, WRITE, or READ|WRITE.
+ */
+enum {
+	CEPH_OSD_FLAG_ACK = 1,          /* want (or is) "ack" ack */
+	CEPH_OSD_FLAG_ONNVRAM = 2,      /* want (or is) "onnvram" ack */
+	CEPH_OSD_FLAG_ONDISK = 4,       /* want (or is) "ondisk" ack */
+	CEPH_OSD_FLAG_RETRY = 8,        /* resend attempt */
+	CEPH_OSD_FLAG_READ = 16,        /* op may read */
+	CEPH_OSD_FLAG_WRITE = 32,       /* op may write */
+	CEPH_OSD_FLAG_ORDERSNAP = 64,   /* EOLDSNAP if snapc is out of order */
+	CEPH_OSD_FLAG_PEERSTAT = 128,   /* msg includes osd_peer_stat */
+	CEPH_OSD_FLAG_BALANCE_READS = 256,
+	CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
+	CEPH_OSD_FLAG_PGOP = 1024,      /* pg op, no object */
+	CEPH_OSD_FLAG_EXEC = 2048,      /* op may exec */
+	CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
+};
+
+enum {
+	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
+};
+
+#define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
+#define EBLACKLISTED ESHUTDOWN /* blacklisted */
+
+/* xattr comparison */
+enum {
+	CEPH_OSD_CMPXATTR_OP_NOP = 0,
+	CEPH_OSD_CMPXATTR_OP_EQ  = 1,
+	CEPH_OSD_CMPXATTR_OP_NE  = 2,
+	CEPH_OSD_CMPXATTR_OP_GT  = 3,
+	CEPH_OSD_CMPXATTR_OP_GTE = 4,
+	CEPH_OSD_CMPXATTR_OP_LT  = 5,
+	CEPH_OSD_CMPXATTR_OP_LTE = 6
+};
+
+enum {
+	CEPH_OSD_CMPXATTR_MODE_STRING = 1,
+	CEPH_OSD_CMPXATTR_MODE_U64    = 2
+};
+
+/*
+ * an individual object operation.  each may be accompanied by some data
+ * payload
+ */
+struct ceph_osd_op {
+	__le16 op;           /* CEPH_OSD_OP_* */
+	__le32 flags;        /* CEPH_OSD_FLAG_* */
+	union {
+		struct {
+			__le64 offset, length;
+			__le64 truncate_size;
+			__le32 truncate_seq;
+		} __attribute__ ((packed)) extent;
+		struct {
+			__le32 name_len;
+			__le32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+		} __attribute__ ((packed)) xattr;
+		struct {
+			__u8 class_len;
+			__u8 method_len;
+			__u8 argc;
+			__le32 indata_len;
+		} __attribute__ ((packed)) cls;
+		struct {
+			__le64 cookie, count;
+		} __attribute__ ((packed)) pgls;
+	        struct {
+		        __le64 snapid;
+	        } __attribute__ ((packed)) snap;
+	};
+	__le32 payload_len;
+} __attribute__ ((packed));
+
+/*
+ * osd request message header.  each request may include multiple
+ * ceph_osd_op object operations.
+ */
+struct ceph_osd_request_head {
+	__le32 client_inc;                 /* client incarnation */
+	struct ceph_object_layout layout;  /* pgid */
+	__le32 osdmap_epoch;               /* client's osdmap epoch */
+
+	__le32 flags;
+
+	struct ceph_timespec mtime;        /* for mutations only */
+	struct ceph_eversion reassert_version; /* if we are replaying op */
+
+	__le32 object_len;     /* length of object name */
+
+	__le64 snapid;         /* snapid to read */
+	__le64 snap_seq;       /* writer's snap context */
+	__le32 num_snaps;
+
+	__le16 num_ops;
+	struct ceph_osd_op ops[];  /* followed by ops[], obj, ticket, snaps */
+} __attribute__ ((packed));
+
+struct ceph_osd_reply_head {
+	__le32 client_inc;                /* client incarnation */
+	__le32 flags;
+	struct ceph_object_layout layout;
+	__le32 osdmap_epoch;
+	struct ceph_eversion reassert_version; /* for replaying uncommitted */
+
+	__le32 result;                    /* result code */
+
+	__le32 object_len;                /* length of object name */
+	__le32 num_ops;
+	struct ceph_osd_op ops[0];  /* ops[], object */
+} __attribute__ ((packed));
+
+
+#endif
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
new file mode 100644
index 000000000000..28b35a005ec2
--- /dev/null
+++ b/include/linux/ceph/types.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_TYPES_H
+#define _FS_CEPH_TYPES_H
+
+/* needed before including ceph_fs.h */
+#include <linux/in.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+
+#include "ceph_fs.h"
+#include "ceph_frag.h"
+#include "ceph_hash.h"
+
+/*
+ * Identify inodes by both their ino AND snapshot id (a u64).
+ */
+struct ceph_vino {
+	u64 ino;
+	u64 snap;
+};
+
+
+/* context for the caps reservation mechanism */
+struct ceph_cap_reservation {
+	int count;
+};
+
+
+#endif
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
new file mode 100644
index 000000000000..97e435b191f4
--- /dev/null
+++ b/include/linux/crush/crush.h
@@ -0,0 +1,180 @@
+#ifndef CEPH_CRUSH_CRUSH_H
+#define CEPH_CRUSH_CRUSH_H
+
+#include <linux/types.h>
+
+/*
+ * CRUSH is a pseudo-random data distribution algorithm that
+ * efficiently distributes input values (typically, data objects)
+ * across a heterogeneous, structured storage cluster.
+ *
+ * The algorithm was originally described in detail in this paper
+ * (although the algorithm has evolved somewhat since then):
+ *
+ *     http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
+ *
+ * LGPL2
+ */
+
+
+#define CRUSH_MAGIC 0x00010000ul   /* for detecting algorithm revisions */
+
+
+#define CRUSH_MAX_DEPTH 10  /* max crush hierarchy depth */
+#define CRUSH_MAX_SET   10  /* max size of a mapping result */
+
+
+/*
+ * CRUSH uses user-defined "rules" to describe how inputs should be
+ * mapped to devices.  A rule consists of sequence of steps to perform
+ * to generate the set of output devices.
+ */
+struct crush_rule_step {
+	__u32 op;
+	__s32 arg1;
+	__s32 arg2;
+};
+
+/* step op codes */
+enum {
+	CRUSH_RULE_NOOP = 0,
+	CRUSH_RULE_TAKE = 1,          /* arg1 = value to start with */
+	CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
+				      /* arg2 = type */
+	CRUSH_RULE_CHOOSE_INDEP = 3,  /* same */
+	CRUSH_RULE_EMIT = 4,          /* no args */
+	CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
+	CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
+};
+
+/*
+ * for specifying choose num (arg1) relative to the max parameter
+ * passed to do_rule
+ */
+#define CRUSH_CHOOSE_N            0
+#define CRUSH_CHOOSE_N_MINUS(x)   (-(x))
+
+/*
+ * The rule mask is used to describe what the rule is intended for.
+ * Given a ruleset and size of output set, we search through the
+ * rule list for a matching rule_mask.
+ */
+struct crush_rule_mask {
+	__u8 ruleset;
+	__u8 type;
+	__u8 min_size;
+	__u8 max_size;
+};
+
+struct crush_rule {
+	__u32 len;
+	struct crush_rule_mask mask;
+	struct crush_rule_step steps[0];
+};
+
+#define crush_rule_size(len) (sizeof(struct crush_rule) + \
+			      (len)*sizeof(struct crush_rule_step))
+
+
+
+/*
+ * A bucket is a named container of other items (either devices or
+ * other buckets).  Items within a bucket are chosen using one of a
+ * few different algorithms.  The table summarizes how the speed of
+ * each option measures up against mapping stability when items are
+ * added or removed.
+ *
+ *  Bucket Alg     Speed       Additions    Removals
+ *  ------------------------------------------------
+ *  uniform         O(1)       poor         poor
+ *  list            O(n)       optimal      poor
+ *  tree            O(log n)   good         good
+ *  straw           O(n)       optimal      optimal
+ */
+enum {
+	CRUSH_BUCKET_UNIFORM = 1,
+	CRUSH_BUCKET_LIST = 2,
+	CRUSH_BUCKET_TREE = 3,
+	CRUSH_BUCKET_STRAW = 4
+};
+extern const char *crush_bucket_alg_name(int alg);
+
+struct crush_bucket {
+	__s32 id;        /* this'll be negative */
+	__u16 type;      /* non-zero; type=0 is reserved for devices */
+	__u8 alg;        /* one of CRUSH_BUCKET_* */
+	__u8 hash;       /* which hash function to use, CRUSH_HASH_* */
+	__u32 weight;    /* 16-bit fixed point */
+	__u32 size;      /* num items */
+	__s32 *items;
+
+	/*
+	 * cached random permutation: used for uniform bucket and for
+	 * the linear search fallback for the other bucket types.
+	 */
+	__u32 perm_x;  /* @x for which *perm is defined */
+	__u32 perm_n;  /* num elements of *perm that are permuted/defined */
+	__u32 *perm;
+};
+
+struct crush_bucket_uniform {
+	struct crush_bucket h;
+	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
+};
+
+struct crush_bucket_list {
+	struct crush_bucket h;
+	__u32 *item_weights;  /* 16-bit fixed point */
+	__u32 *sum_weights;   /* 16-bit fixed point.  element i is sum
+				 of weights 0..i, inclusive */
+};
+
+struct crush_bucket_tree {
+	struct crush_bucket h;  /* note: h.size is _tree_ size, not number of
+				   actual items */
+	__u8 num_nodes;
+	__u32 *node_weights;
+};
+
+struct crush_bucket_straw {
+	struct crush_bucket h;
+	__u32 *item_weights;   /* 16-bit fixed point */
+	__u32 *straws;         /* 16-bit fixed point */
+};
+
+
+
+/*
+ * CRUSH map includes all buckets, rules, etc.
+ */
+struct crush_map {
+	struct crush_bucket **buckets;
+	struct crush_rule **rules;
+
+	/*
+	 * Parent pointers to identify the parent bucket a device or
+	 * bucket in the hierarchy.  If an item appears more than
+	 * once, this is the _last_ time it appeared (where buckets
+	 * are processed in bucket id order, from -1 on down to
+	 * -max_buckets.
+	 */
+	__u32 *bucket_parents;
+	__u32 *device_parents;
+
+	__s32 max_buckets;
+	__u32 max_rules;
+	__s32 max_devices;
+};
+
+
+/* crush.c */
+extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
+extern void crush_calc_parents(struct crush_map *map);
+extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
+extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
+extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
+extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket(struct crush_bucket *b);
+extern void crush_destroy(struct crush_map *map);
+
+#endif
diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h
new file mode 100644
index 000000000000..91e884230d5d
--- /dev/null
+++ b/include/linux/crush/hash.h
@@ -0,0 +1,17 @@
+#ifndef CEPH_CRUSH_HASH_H
+#define CEPH_CRUSH_HASH_H
+
+#define CRUSH_HASH_RJENKINS1   0
+
+#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
+
+extern const char *crush_hash_name(int type);
+
+extern __u32 crush_hash32(int type, __u32 a);
+extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
+extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
+extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
+extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
+			    __u32 e);
+
+#endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
new file mode 100644
index 000000000000..c46b99c18bb0
--- /dev/null
+++ b/include/linux/crush/mapper.h
@@ -0,0 +1,20 @@
+#ifndef CEPH_CRUSH_MAPPER_H
+#define CEPH_CRUSH_MAPPER_H
+
+/*
+ * CRUSH functions for find rules and then mapping an input to an
+ * output set.
+ *
+ * LGPL2
+ */
+
+#include "crush.h"
+
+extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
+extern int crush_do_rule(struct crush_map *map,
+			 int ruleno,
+			 int x, int *result, int result_max,
+			 int forcefeed,    /* -1 for none */
+			 __u32 *weights);
+
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index e926884c1675..55fd82e9ffd9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -293,6 +293,7 @@ source "net/wimax/Kconfig"
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
 source "net/caif/Kconfig"
+source "net/ceph/Kconfig"
 
 
 endif   # if NET
diff --git a/net/Makefile b/net/Makefile
index ea60fbce9b1b..6b7bfd7f1416 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
 obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
+obj-$(CONFIG_CEPH_LIB)		+= ceph/
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
new file mode 100644
index 000000000000..ad424049b0cf
--- /dev/null
+++ b/net/ceph/Kconfig
@@ -0,0 +1,28 @@
+config CEPH_LIB
+        tristate "Ceph core library (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
+	select LIBCRC32C
+	select CRYPTO_AES
+	select CRYPTO
+	default n
+	help
+	  Choose Y or M here to include cephlib, which provides the
+	  common functionality to both the Ceph filesystem and
+	  to the rados block device (rbd).
+
+	  More information at http://ceph.newdream.net/.
+
+	  If unsure, say N.
+
+config CEPH_LIB_PRETTYDEBUG
+	bool "Include file:line in ceph debug output"
+	depends on CEPH_LIB
+	default n
+	help
+	  If you say Y here, debug output will include a filename and
+	  line to aid debugging.  This increases kernel size and slows
+	  execution slightly when debug call sites are enabled (e.g.,
+	  via CONFIG_DYNAMIC_DEBUG).
+
+	  If unsure, say N.
+
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
new file mode 100644
index 000000000000..aab1cabb8035
--- /dev/null
+++ b/net/ceph/Makefile
@@ -0,0 +1,37 @@
+#
+# Makefile for CEPH filesystem.
+#
+
+ifneq ($(KERNELRELEASE),)
+
+obj-$(CONFIG_CEPH_LIB) += libceph.o
+
+libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+	mon_client.o \
+	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+	debugfs.o \
+	auth.o auth_none.o \
+	crypto.o armor.o \
+	auth_x.o \
+	ceph_fs.o ceph_strings.o ceph_hash.o \
+	pagevec.o
+
+else
+#Otherwise we were called directly from the command
+# line; invoke the kernel build system.
+
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+
+default: all
+
+all:
+	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
+
+modules_install:
+	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
+
+clean:
+	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
+
+endif
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
new file mode 100644
index 000000000000..eb2a666b0be7
--- /dev/null
+++ b/net/ceph/armor.c
@@ -0,0 +1,103 @@
+
+#include <linux/errno.h>
+
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
+/*
+ * base64 encode/decode.
+ */
+
+static const char *pem_key =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static int encode_bits(int c)
+{
+	return pem_key[c];
+}
+
+static int decode_bits(char c)
+{
+	if (c >= 'A' && c <= 'Z')
+		return c - 'A';
+	if (c >= 'a' && c <= 'z')
+		return c - 'a' + 26;
+	if (c >= '0' && c <= '9')
+		return c - '0' + 52;
+	if (c == '+')
+		return 62;
+	if (c == '/')
+		return 63;
+	if (c == '=')
+		return 0; /* just non-negative, please */
+	return -EINVAL;
+}
+
+int ceph_armor(char *dst, const char *src, const char *end)
+{
+	int olen = 0;
+	int line = 0;
+
+	while (src < end) {
+		unsigned char a, b, c;
+
+		a = *src++;
+		*dst++ = encode_bits(a >> 2);
+		if (src < end) {
+			b = *src++;
+			*dst++ = encode_bits(((a & 3) << 4) | (b >> 4));
+			if (src < end) {
+				c = *src++;
+				*dst++ = encode_bits(((b & 15) << 2) |
+						     (c >> 6));
+				*dst++ = encode_bits(c & 63);
+			} else {
+				*dst++ = encode_bits((b & 15) << 2);
+				*dst++ = '=';
+			}
+		} else {
+			*dst++ = encode_bits(((a & 3) << 4));
+			*dst++ = '=';
+			*dst++ = '=';
+		}
+		olen += 4;
+		line += 4;
+		if (line == 64) {
+			line = 0;
+			*(dst++) = '\n';
+			olen++;
+		}
+	}
+	return olen;
+}
+
+int ceph_unarmor(char *dst, const char *src, const char *end)
+{
+	int olen = 0;
+
+	while (src < end) {
+		int a, b, c, d;
+
+		if (src < end && src[0] == '\n')
+			src++;
+		if (src + 4 > end)
+			return -EINVAL;
+		a = decode_bits(src[0]);
+		b = decode_bits(src[1]);
+		c = decode_bits(src[2]);
+		d = decode_bits(src[3]);
+		if (a < 0 || b < 0 || c < 0 || d < 0)
+			return -EINVAL;
+
+		*dst++ = (a << 2) | (b >> 4);
+		if (src[2] == '=')
+			return olen + 1;
+		*dst++ = ((b & 15) << 4) | (c >> 2);
+		if (src[3] == '=')
+			return olen + 2;
+		*dst++ = ((c & 3) << 6) | d;
+		olen += 3;
+		src += 4;
+	}
+	return olen;
+}
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
new file mode 100644
index 000000000000..549c1f43e1d5
--- /dev/null
+++ b/net/ceph/auth.c
@@ -0,0 +1,259 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include "auth_none.h"
+#include "auth_x.h"
+
+
+/*
+ * get protocol handler
+ */
+static u32 supported_protocols[] = {
+	CEPH_AUTH_NONE,
+	CEPH_AUTH_CEPHX
+};
+
+static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
+{
+	switch (protocol) {
+	case CEPH_AUTH_NONE:
+		return ceph_auth_none_init(ac);
+	case CEPH_AUTH_CEPHX:
+		return ceph_x_init(ac);
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * setup, teardown.
+ */
+struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
+{
+	struct ceph_auth_client *ac;
+	int ret;
+
+	dout("auth_init name '%s' secret '%s'\n", name, secret);
+
+	ret = -ENOMEM;
+	ac = kzalloc(sizeof(*ac), GFP_NOFS);
+	if (!ac)
+		goto out;
+
+	ac->negotiating = true;
+	if (name)
+		ac->name = name;
+	else
+		ac->name = CEPH_AUTH_NAME_DEFAULT;
+	dout("auth_init name %s secret %s\n", ac->name, secret);
+	ac->secret = secret;
+	return ac;
+
+out:
+	return ERR_PTR(ret);
+}
+
+void ceph_auth_destroy(struct ceph_auth_client *ac)
+{
+	dout("auth_destroy %p\n", ac);
+	if (ac->ops)
+		ac->ops->destroy(ac);
+	kfree(ac);
+}
+
+/*
+ * Reset occurs when reconnecting to the monitor.
+ */
+void ceph_auth_reset(struct ceph_auth_client *ac)
+{
+	dout("auth_reset %p\n", ac);
+	if (ac->ops && !ac->negotiating)
+		ac->ops->reset(ac);
+	ac->negotiating = true;
+}
+
+int ceph_entity_name_encode(const char *name, void **p, void *end)
+{
+	int len = strlen(name);
+
+	if (*p + 2*sizeof(u32) + len > end)
+		return -ERANGE;
+	ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT);
+	ceph_encode_32(p, len);
+	ceph_encode_copy(p, name, len);
+	return 0;
+}
+
+/*
+ * Initiate protocol negotiation with monitor.  Include entity name
+ * and list supported protocols.
+ */
+int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
+{
+	struct ceph_mon_request_header *monhdr = buf;
+	void *p = monhdr + 1, *end = buf + len, *lenp;
+	int i, num;
+	int ret;
+
+	dout("auth_build_hello\n");
+	monhdr->have_version = 0;
+	monhdr->session_mon = cpu_to_le16(-1);
+	monhdr->session_mon_tid = 0;
+
+	ceph_encode_32(&p, 0);  /* no protocol, yet */
+
+	lenp = p;
+	p += sizeof(u32);
+
+	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+	ceph_encode_8(&p, 1);
+	num = ARRAY_SIZE(supported_protocols);
+	ceph_encode_32(&p, num);
+	ceph_decode_need(&p, end, num * sizeof(u32), bad);
+	for (i = 0; i < num; i++)
+		ceph_encode_32(&p, supported_protocols[i]);
+
+	ret = ceph_entity_name_encode(ac->name, &p, end);
+	if (ret < 0)
+		return ret;
+	ceph_decode_need(&p, end, sizeof(u64), bad);
+	ceph_encode_64(&p, ac->global_id);
+
+	ceph_encode_32(&lenp, p - lenp - sizeof(u32));
+	return p - buf;
+
+bad:
+	return -ERANGE;
+}
+
+static int ceph_build_auth_request(struct ceph_auth_client *ac,
+				   void *msg_buf, size_t msg_len)
+{
+	struct ceph_mon_request_header *monhdr = msg_buf;
+	void *p = monhdr + 1;
+	void *end = msg_buf + msg_len;
+	int ret;
+
+	monhdr->have_version = 0;
+	monhdr->session_mon = cpu_to_le16(-1);
+	monhdr->session_mon_tid = 0;
+
+	ceph_encode_32(&p, ac->protocol);
+
+	ret = ac->ops->build_request(ac, p + sizeof(u32), end);
+	if (ret < 0) {
+		pr_err("error %d building auth method %s request\n", ret,
+		       ac->ops->name);
+		return ret;
+	}
+	dout(" built request %d bytes\n", ret);
+	ceph_encode_32(&p, ret);
+	return p + ret - msg_buf;
+}
+
+/*
+ * Handle auth message from monitor.
+ */
+int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+			   void *buf, size_t len,
+			   void *reply_buf, size_t reply_len)
+{
+	void *p = buf;
+	void *end = buf + len;
+	int protocol;
+	s32 result;
+	u64 global_id;
+	void *payload, *payload_end;
+	int payload_len;
+	char *result_msg;
+	int result_msg_len;
+	int ret = -EINVAL;
+
+	dout("handle_auth_reply %p %p\n", p, end);
+	ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
+	protocol = ceph_decode_32(&p);
+	result = ceph_decode_32(&p);
+	global_id = ceph_decode_64(&p);
+	payload_len = ceph_decode_32(&p);
+	payload = p;
+	p += payload_len;
+	ceph_decode_need(&p, end, sizeof(u32), bad);
+	result_msg_len = ceph_decode_32(&p);
+	result_msg = p;
+	p += result_msg_len;
+	if (p != end)
+		goto bad;
+
+	dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len,
+	     result_msg, global_id, payload_len);
+
+	payload_end = payload + payload_len;
+
+	if (global_id && ac->global_id != global_id) {
+		dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
+		ac->global_id = global_id;
+	}
+
+	if (ac->negotiating) {
+		/* server does not support our protocols? */
+		if (!protocol && result < 0) {
+			ret = result;
+			goto out;
+		}
+		/* set up (new) protocol handler? */
+		if (ac->protocol && ac->protocol != protocol) {
+			ac->ops->destroy(ac);
+			ac->protocol = 0;
+			ac->ops = NULL;
+		}
+		if (ac->protocol != protocol) {
+			ret = ceph_auth_init_protocol(ac, protocol);
+			if (ret) {
+				pr_err("error %d on auth protocol %d init\n",
+				       ret, protocol);
+				goto out;
+			}
+		}
+
+		ac->negotiating = false;
+	}
+
+	ret = ac->ops->handle_reply(ac, result, payload, payload_end);
+	if (ret == -EAGAIN) {
+		return ceph_build_auth_request(ac, reply_buf, reply_len);
+	} else if (ret) {
+		pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
+		return ret;
+	}
+	return 0;
+
+bad:
+	pr_err("failed to decode auth msg\n");
+out:
+	return ret;
+}
+
+int ceph_build_auth(struct ceph_auth_client *ac,
+		    void *msg_buf, size_t msg_len)
+{
+	if (!ac->protocol)
+		return ceph_auth_build_hello(ac, msg_buf, msg_len);
+	BUG_ON(!ac->ops);
+	if (ac->ops->should_authenticate(ac))
+		return ceph_build_auth_request(ac, msg_buf, msg_len);
+	return 0;
+}
+
+int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
+{
+	if (!ac->ops)
+		return 0;
+	return ac->ops->is_authenticated(ac);
+}
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
new file mode 100644
index 000000000000..214c2bb43d62
--- /dev/null
+++ b/net/ceph/auth_none.c
@@ -0,0 +1,132 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "auth_none.h"
+
+static void reset(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	xi->starting = true;
+	xi->built_authorizer = false;
+}
+
+static void destroy(struct ceph_auth_client *ac)
+{
+	kfree(ac->private);
+	ac->private = NULL;
+}
+
+static int is_authenticated(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	return !xi->starting;
+}
+
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	return xi->starting;
+}
+
+/*
+ * the generic auth code decode the global_id, and we carry no actual
+ * authenticate state, so nothing happens here.
+ */
+static int handle_reply(struct ceph_auth_client *ac, int result,
+			void *buf, void *end)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	xi->starting = false;
+	return result;
+}
+
+/*
+ * build an 'authorizer' with our entity_name and global_id.  we can
+ * reuse a single static copy since it is identical for all services
+ * we connect to.
+ */
+static int ceph_auth_none_create_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_authorizer **a,
+	void **buf, size_t *len,
+	void **reply_buf, size_t *reply_len)
+{
+	struct ceph_auth_none_info *ai = ac->private;
+	struct ceph_none_authorizer *au = &ai->au;
+	void *p, *end;
+	int ret;
+
+	if (!ai->built_authorizer) {
+		p = au->buf;
+		end = p + sizeof(au->buf);
+		ceph_encode_8(&p, 1);
+		ret = ceph_entity_name_encode(ac->name, &p, end - 8);
+		if (ret < 0)
+			goto bad;
+		ceph_decode_need(&p, end, sizeof(u64), bad2);
+		ceph_encode_64(&p, ac->global_id);
+		au->buf_len = p - (void *)au->buf;
+		ai->built_authorizer = true;
+		dout("built authorizer len %d\n", au->buf_len);
+	}
+
+	*a = (struct ceph_authorizer *)au;
+	*buf = au->buf;
+	*len = au->buf_len;
+	*reply_buf = au->reply_buf;
+	*reply_len = sizeof(au->reply_buf);
+	return 0;
+
+bad2:
+	ret = -ERANGE;
+bad:
+	return ret;
+}
+
+static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a)
+{
+	/* nothing to do */
+}
+
+static const struct ceph_auth_client_ops ceph_auth_none_ops = {
+	.name = "none",
+	.reset = reset,
+	.destroy = destroy,
+	.is_authenticated = is_authenticated,
+	.should_authenticate = should_authenticate,
+	.handle_reply = handle_reply,
+	.create_authorizer = ceph_auth_none_create_authorizer,
+	.destroy_authorizer = ceph_auth_none_destroy_authorizer,
+};
+
+int ceph_auth_none_init(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi;
+
+	dout("ceph_auth_none_init %p\n", ac);
+	xi = kzalloc(sizeof(*xi), GFP_NOFS);
+	if (!xi)
+		return -ENOMEM;
+
+	xi->starting = true;
+	xi->built_authorizer = false;
+
+	ac->protocol = CEPH_AUTH_NONE;
+	ac->private = xi;
+	ac->ops = &ceph_auth_none_ops;
+	return 0;
+}
+
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
new file mode 100644
index 000000000000..ed7d088b1bc9
--- /dev/null
+++ b/net/ceph/auth_none.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_AUTH_NONE_H
+#define _FS_CEPH_AUTH_NONE_H
+
+#include <linux/slab.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * null security mode.
+ *
+ * we use a single static authorizer that simply encodes our entity name
+ * and global id.
+ */
+
+struct ceph_none_authorizer {
+	char buf[128];
+	int buf_len;
+	char reply_buf[0];
+};
+
+struct ceph_auth_none_info {
+	bool starting;
+	bool built_authorizer;
+	struct ceph_none_authorizer au;   /* we only need one; it's static */
+};
+
+extern int ceph_auth_none_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
new file mode 100644
index 000000000000..7fd5dfcf6e18
--- /dev/null
+++ b/net/ceph/auth_x.c
@@ -0,0 +1,688 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x.h"
+#include "auth_x_protocol.h"
+
+#define TEMP_TICKET_BUF_LEN	256
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
+
+static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+
+	ceph_x_validate_tickets(ac, &need);
+	dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
+	     ac->want_keys, need, xi->have_keys);
+	return (ac->want_keys & xi->have_keys) == ac->want_keys;
+}
+
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+
+	ceph_x_validate_tickets(ac, &need);
+	dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+	     ac->want_keys, need, xi->have_keys);
+	return need != 0;
+}
+
+static int ceph_x_encrypt_buflen(int ilen)
+{
+	return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
+		sizeof(u32);
+}
+
+static int ceph_x_encrypt(struct ceph_crypto_key *secret,
+			  void *ibuf, int ilen, void *obuf, size_t olen)
+{
+	struct ceph_x_encrypt_header head = {
+		.struct_v = 1,
+		.magic = cpu_to_le64(CEPHX_ENC_MAGIC)
+	};
+	size_t len = olen - sizeof(u32);
+	int ret;
+
+	ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
+			    &head, sizeof(head), ibuf, ilen);
+	if (ret)
+		return ret;
+	ceph_encode_32(&obuf, len);
+	return len + sizeof(u32);
+}
+
+static int ceph_x_decrypt(struct ceph_crypto_key *secret,
+			  void **p, void *end, void *obuf, size_t olen)
+{
+	struct ceph_x_encrypt_header head;
+	size_t head_len = sizeof(head);
+	int len, ret;
+
+	len = ceph_decode_32(p);
+	if (*p + len > end)
+		return -EINVAL;
+
+	dout("ceph_x_decrypt len %d\n", len);
+	ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
+			    *p, len);
+	if (ret)
+		return ret;
+	if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
+		return -EPERM;
+	*p += len;
+	return olen;
+}
+
+/*
+ * get existing (or insert new) ticket handler
+ */
+static struct ceph_x_ticket_handler *
+get_ticket_handler(struct ceph_auth_client *ac, int service)
+{
+	struct ceph_x_ticket_handler *th;
+	struct ceph_x_info *xi = ac->private;
+	struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node;
+
+	while (*p) {
+		parent = *p;
+		th = rb_entry(parent, struct ceph_x_ticket_handler, node);
+		if (service < th->service)
+			p = &(*p)->rb_left;
+		else if (service > th->service)
+			p = &(*p)->rb_right;
+		else
+			return th;
+	}
+
+	/* add it */
+	th = kzalloc(sizeof(*th), GFP_NOFS);
+	if (!th)
+		return ERR_PTR(-ENOMEM);
+	th->service = service;
+	rb_link_node(&th->node, parent, p);
+	rb_insert_color(&th->node, &xi->ticket_handlers);
+	return th;
+}
+
+static void remove_ticket_handler(struct ceph_auth_client *ac,
+				  struct ceph_x_ticket_handler *th)
+{
+	struct ceph_x_info *xi = ac->private;
+
+	dout("remove_ticket_handler %p %d\n", th, th->service);
+	rb_erase(&th->node, &xi->ticket_handlers);
+	ceph_crypto_key_destroy(&th->session_key);
+	if (th->ticket_blob)
+		ceph_buffer_put(th->ticket_blob);
+	kfree(th);
+}
+
+static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
+				    struct ceph_crypto_key *secret,
+				    void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	int num;
+	void *p = buf;
+	int ret;
+	char *dbuf;
+	char *ticket_buf;
+	u8 reply_struct_v;
+
+	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+	if (!dbuf)
+		return -ENOMEM;
+
+	ret = -ENOMEM;
+	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+	if (!ticket_buf)
+		goto out_dbuf;
+
+	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+	reply_struct_v = ceph_decode_8(&p);
+	if (reply_struct_v != 1)
+		goto bad;
+	num = ceph_decode_32(&p);
+	dout("%d tickets\n", num);
+	while (num--) {
+		int type;
+		u8 tkt_struct_v, blob_struct_v;
+		struct ceph_x_ticket_handler *th;
+		void *dp, *dend;
+		int dlen;
+		char is_enc;
+		struct timespec validity;
+		struct ceph_crypto_key old_key;
+		void *tp, *tpend;
+		struct ceph_timespec new_validity;
+		struct ceph_crypto_key new_session_key;
+		struct ceph_buffer *new_ticket_blob;
+		unsigned long new_expires, new_renew_after;
+		u64 new_secret_id;
+
+		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
+
+		type = ceph_decode_32(&p);
+		dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
+
+		tkt_struct_v = ceph_decode_8(&p);
+		if (tkt_struct_v != 1)
+			goto bad;
+
+		th = get_ticket_handler(ac, type);
+		if (IS_ERR(th)) {
+			ret = PTR_ERR(th);
+			goto out;
+		}
+
+		/* blob for me */
+		dlen = ceph_x_decrypt(secret, &p, end, dbuf,
+				      TEMP_TICKET_BUF_LEN);
+		if (dlen <= 0) {
+			ret = dlen;
+			goto out;
+		}
+		dout(" decrypted %d bytes\n", dlen);
+		dend = dbuf + dlen;
+		dp = dbuf;
+
+		tkt_struct_v = ceph_decode_8(&dp);
+		if (tkt_struct_v != 1)
+			goto bad;
+
+		memcpy(&old_key, &th->session_key, sizeof(old_key));
+		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+		if (ret)
+			goto out;
+
+		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+		ceph_decode_timespec(&validity, &new_validity);
+		new_expires = get_seconds() + validity.tv_sec;
+		new_renew_after = new_expires - (validity.tv_sec / 4);
+		dout(" expires=%lu renew_after=%lu\n", new_expires,
+		     new_renew_after);
+
+		/* ticket blob for service */
+		ceph_decode_8_safe(&p, end, is_enc, bad);
+		tp = ticket_buf;
+		if (is_enc) {
+			/* encrypted */
+			dout(" encrypted ticket\n");
+			dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
+					      TEMP_TICKET_BUF_LEN);
+			if (dlen < 0) {
+				ret = dlen;
+				goto out;
+			}
+			dlen = ceph_decode_32(&tp);
+		} else {
+			/* unencrypted */
+			ceph_decode_32_safe(&p, end, dlen, bad);
+			ceph_decode_need(&p, end, dlen, bad);
+			ceph_decode_copy(&p, ticket_buf, dlen);
+		}
+		tpend = tp + dlen;
+		dout(" ticket blob is %d bytes\n", dlen);
+		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+		blob_struct_v = ceph_decode_8(&tp);
+		new_secret_id = ceph_decode_64(&tp);
+		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+		if (ret)
+			goto out;
+
+		/* all is well, update our ticket */
+		ceph_crypto_key_destroy(&th->session_key);
+		if (th->ticket_blob)
+			ceph_buffer_put(th->ticket_blob);
+		th->session_key = new_session_key;
+		th->ticket_blob = new_ticket_blob;
+		th->validity = new_validity;
+		th->secret_id = new_secret_id;
+		th->expires = new_expires;
+		th->renew_after = new_renew_after;
+		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+		     type, ceph_entity_type_name(type), th->secret_id,
+		     (int)th->ticket_blob->vec.iov_len);
+		xi->have_keys |= th->service;
+	}
+
+	ret = 0;
+out:
+	kfree(ticket_buf);
+out_dbuf:
+	kfree(dbuf);
+	return ret;
+
+bad:
+	ret = -EINVAL;
+	goto out;
+}
+
+static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+				   struct ceph_x_ticket_handler *th,
+				   struct ceph_x_authorizer *au)
+{
+	int maxlen;
+	struct ceph_x_authorize_a *msg_a;
+	struct ceph_x_authorize_b msg_b;
+	void *p, *end;
+	int ret;
+	int ticket_blob_len =
+		(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+
+	dout("build_authorizer for %s %p\n",
+	     ceph_entity_type_name(th->service), au);
+
+	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
+		ceph_x_encrypt_buflen(ticket_blob_len);
+	dout("  need len %d\n", maxlen);
+	if (au->buf && au->buf->alloc_len < maxlen) {
+		ceph_buffer_put(au->buf);
+		au->buf = NULL;
+	}
+	if (!au->buf) {
+		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
+		if (!au->buf)
+			return -ENOMEM;
+	}
+	au->service = th->service;
+
+	msg_a = au->buf->vec.iov_base;
+	msg_a->struct_v = 1;
+	msg_a->global_id = cpu_to_le64(ac->global_id);
+	msg_a->service_id = cpu_to_le32(th->service);
+	msg_a->ticket_blob.struct_v = 1;
+	msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id);
+	msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len);
+	if (ticket_blob_len) {
+		memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base,
+		       th->ticket_blob->vec.iov_len);
+	}
+	dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+	     le64_to_cpu(msg_a->ticket_blob.secret_id));
+
+	p = msg_a + 1;
+	p += ticket_blob_len;
+	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+
+	get_random_bytes(&au->nonce, sizeof(au->nonce));
+	msg_b.struct_v = 1;
+	msg_b.nonce = cpu_to_le64(au->nonce);
+	ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+			     p, end - p);
+	if (ret < 0)
+		goto out_buf;
+	p += ret;
+	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	dout(" built authorizer nonce %llx len %d\n", au->nonce,
+	     (int)au->buf->vec.iov_len);
+	BUG_ON(au->buf->vec.iov_len > maxlen);
+	return 0;
+
+out_buf:
+	ceph_buffer_put(au->buf);
+	au->buf = NULL;
+	return ret;
+}
+
+static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
+				void **p, void *end)
+{
+	ceph_decode_need(p, end, 1 + sizeof(u64), bad);
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, th->secret_id);
+	if (th->ticket_blob) {
+		const char *buf = th->ticket_blob->vec.iov_base;
+		u32 len = th->ticket_blob->vec.iov_len;
+
+		ceph_encode_32_safe(p, end, len, bad);
+		ceph_encode_copy_safe(p, end, buf, len, bad);
+	} else {
+		ceph_encode_32_safe(p, end, 0, bad);
+	}
+
+	return 0;
+bad:
+	return -ERANGE;
+}
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
+{
+	int want = ac->want_keys;
+	struct ceph_x_info *xi = ac->private;
+	int service;
+
+	*pneed = ac->want_keys & ~(xi->have_keys);
+
+	for (service = 1; service <= want; service <<= 1) {
+		struct ceph_x_ticket_handler *th;
+
+		if (!(ac->want_keys & service))
+			continue;
+
+		if (*pneed & service)
+			continue;
+
+		th = get_ticket_handler(ac, service);
+
+		if (IS_ERR(th)) {
+			*pneed |= service;
+			continue;
+		}
+
+		if (get_seconds() >= th->renew_after)
+			*pneed |= service;
+		if (get_seconds() >= th->expires)
+			xi->have_keys &= ~service;
+	}
+}
+
+
+static int ceph_x_build_request(struct ceph_auth_client *ac,
+				void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+	struct ceph_x_request_header *head = buf;
+	int ret;
+	struct ceph_x_ticket_handler *th =
+		get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	ceph_x_validate_tickets(ac, &need);
+
+	dout("build_request want %x have %x need %x\n",
+	     ac->want_keys, xi->have_keys, need);
+
+	if (need & CEPH_ENTITY_TYPE_AUTH) {
+		struct ceph_x_authenticate *auth = (void *)(head + 1);
+		void *p = auth + 1;
+		struct ceph_x_challenge_blob tmp;
+		char tmp_enc[40];
+		u64 *u;
+
+		if (p > end)
+			return -ERANGE;
+
+		dout(" get_auth_session_key\n");
+		head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY);
+
+		/* encrypt and hash */
+		get_random_bytes(&auth->client_challenge, sizeof(u64));
+		tmp.client_challenge = auth->client_challenge;
+		tmp.server_challenge = cpu_to_le64(xi->server_challenge);
+		ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
+				     tmp_enc, sizeof(tmp_enc));
+		if (ret < 0)
+			return ret;
+
+		auth->struct_v = 1;
+		auth->key = 0;
+		for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
+			auth->key ^= *(__le64 *)u;
+		dout(" server_challenge %llx client_challenge %llx key %llx\n",
+		     xi->server_challenge, le64_to_cpu(auth->client_challenge),
+		     le64_to_cpu(auth->key));
+
+		/* now encode the old ticket if exists */
+		ret = ceph_x_encode_ticket(th, &p, end);
+		if (ret < 0)
+			return ret;
+
+		return p - buf;
+	}
+
+	if (need) {
+		void *p = head + 1;
+		struct ceph_x_service_ticket_request *req;
+
+		if (p > end)
+			return -ERANGE;
+		head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
+
+		ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
+		if (ret)
+			return ret;
+		ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
+				 xi->auth_authorizer.buf->vec.iov_len);
+
+		req = p;
+		req->keys = cpu_to_le32(need);
+		p += sizeof(*req);
+		return p - buf;
+	}
+
+	return 0;
+}
+
+static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
+			       void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	struct ceph_x_reply_header *head = buf;
+	struct ceph_x_ticket_handler *th;
+	int len = end - buf;
+	int op;
+	int ret;
+
+	if (result)
+		return result;  /* XXX hmm? */
+
+	if (xi->starting) {
+		/* it's a hello */
+		struct ceph_x_server_challenge *sc = buf;
+
+		if (len != sizeof(*sc))
+			return -EINVAL;
+		xi->server_challenge = le64_to_cpu(sc->server_challenge);
+		dout("handle_reply got server challenge %llx\n",
+		     xi->server_challenge);
+		xi->starting = false;
+		xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH;
+		return -EAGAIN;
+	}
+
+	op = le16_to_cpu(head->op);
+	result = le32_to_cpu(head->result);
+	dout("handle_reply op %d result %d\n", op, result);
+	switch (op) {
+	case CEPHX_GET_AUTH_SESSION_KEY:
+		/* verify auth key */
+		ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
+					       buf + sizeof(*head), end);
+		break;
+
+	case CEPHX_GET_PRINCIPAL_SESSION_KEY:
+		th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+		if (IS_ERR(th))
+			return PTR_ERR(th);
+		ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
+					       buf + sizeof(*head), end);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		return ret;
+	if (ac->want_keys == xi->have_keys)
+		return 0;
+	return -EAGAIN;
+}
+
+static int ceph_x_create_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_authorizer **a,
+	void **buf, size_t *len,
+	void **reply_buf, size_t *reply_len)
+{
+	struct ceph_x_authorizer *au;
+	struct ceph_x_ticket_handler *th;
+	int ret;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	au = kzalloc(sizeof(*au), GFP_NOFS);
+	if (!au)
+		return -ENOMEM;
+
+	ret = ceph_x_build_authorizer(ac, th, au);
+	if (ret) {
+		kfree(au);
+		return ret;
+	}
+
+	*a = (struct ceph_authorizer *)au;
+	*buf = au->buf->vec.iov_base;
+	*len = au->buf->vec.iov_len;
+	*reply_buf = au->reply_buf;
+	*reply_len = sizeof(au->reply_buf);
+	return 0;
+}
+
+static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+					  struct ceph_authorizer *a, size_t len)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+	struct ceph_x_ticket_handler *th;
+	int ret = 0;
+	struct ceph_x_authorize_reply reply;
+	void *p = au->reply_buf;
+	void *end = p + sizeof(au->reply_buf);
+
+	th = get_ticket_handler(ac, au->service);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+	ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+	if (ret < 0)
+		return ret;
+	if (ret != sizeof(reply))
+		return -EPERM;
+
+	if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
+		ret = -EPERM;
+	else
+		ret = 0;
+	dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
+	     au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
+	return ret;
+}
+
+static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+
+	ceph_buffer_put(au->buf);
+	kfree(au);
+}
+
+
+static void ceph_x_reset(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+
+	dout("reset\n");
+	xi->starting = true;
+	xi->server_challenge = 0;
+}
+
+static void ceph_x_destroy(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	struct rb_node *p;
+
+	dout("ceph_x_destroy %p\n", ac);
+	ceph_crypto_key_destroy(&xi->secret);
+
+	while ((p = rb_first(&xi->ticket_handlers)) != NULL) {
+		struct ceph_x_ticket_handler *th =
+			rb_entry(p, struct ceph_x_ticket_handler, node);
+		remove_ticket_handler(ac, th);
+	}
+
+	if (xi->auth_authorizer.buf)
+		ceph_buffer_put(xi->auth_authorizer.buf);
+
+	kfree(ac->private);
+	ac->private = NULL;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+				   int peer_type)
+{
+	struct ceph_x_ticket_handler *th;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (!IS_ERR(th))
+		remove_ticket_handler(ac, th);
+}
+
+
+static const struct ceph_auth_client_ops ceph_x_ops = {
+	.name = "x",
+	.is_authenticated = ceph_x_is_authenticated,
+	.should_authenticate = ceph_x_should_authenticate,
+	.build_request = ceph_x_build_request,
+	.handle_reply = ceph_x_handle_reply,
+	.create_authorizer = ceph_x_create_authorizer,
+	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+	.destroy_authorizer = ceph_x_destroy_authorizer,
+	.invalidate_authorizer = ceph_x_invalidate_authorizer,
+	.reset =  ceph_x_reset,
+	.destroy = ceph_x_destroy,
+};
+
+
+int ceph_x_init(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi;
+	int ret;
+
+	dout("ceph_x_init %p\n", ac);
+	ret = -ENOMEM;
+	xi = kzalloc(sizeof(*xi), GFP_NOFS);
+	if (!xi)
+		goto out;
+
+	ret = -EINVAL;
+	if (!ac->secret) {
+		pr_err("no secret set (for auth_x protocol)\n");
+		goto out_nomem;
+	}
+
+	ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
+	if (ret)
+		goto out_nomem;
+
+	xi->starting = true;
+	xi->ticket_handlers = RB_ROOT;
+
+	ac->protocol = CEPH_AUTH_CEPHX;
+	ac->private = xi;
+	ac->ops = &ceph_x_ops;
+	return 0;
+
+out_nomem:
+	kfree(xi);
+out:
+	return ret;
+}
+
+
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
new file mode 100644
index 000000000000..e02da7a5c5a1
--- /dev/null
+++ b/net/ceph/auth_x.h
@@ -0,0 +1,50 @@
+#ifndef _FS_CEPH_AUTH_X_H
+#define _FS_CEPH_AUTH_X_H
+
+#include <linux/rbtree.h>
+
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x_protocol.h"
+
+/*
+ * Handle ticket for a single service.
+ */
+struct ceph_x_ticket_handler {
+	struct rb_node node;
+	unsigned service;
+
+	struct ceph_crypto_key session_key;
+	struct ceph_timespec validity;
+
+	u64 secret_id;
+	struct ceph_buffer *ticket_blob;
+
+	unsigned long renew_after, expires;
+};
+
+
+struct ceph_x_authorizer {
+	struct ceph_buffer *buf;
+	unsigned service;
+	u64 nonce;
+	char reply_buf[128];  /* big enough for encrypted blob */
+};
+
+struct ceph_x_info {
+	struct ceph_crypto_key secret;
+
+	bool starting;
+	u64 server_challenge;
+
+	unsigned have_keys;
+	struct rb_root ticket_handlers;
+
+	struct ceph_x_authorizer auth_authorizer;
+};
+
+extern int ceph_x_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
new file mode 100644
index 000000000000..671d30576c4f
--- /dev/null
+++ b/net/ceph/auth_x_protocol.h
@@ -0,0 +1,90 @@
+#ifndef __FS_CEPH_AUTH_X_PROTOCOL
+#define __FS_CEPH_AUTH_X_PROTOCOL
+
+#define CEPHX_GET_AUTH_SESSION_KEY      0x0100
+#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200
+#define CEPHX_GET_ROTATING_KEY          0x0400
+
+/* common bits */
+struct ceph_x_ticket_blob {
+	__u8 struct_v;
+	__le64 secret_id;
+	__le32 blob_len;
+	char blob[];
+} __attribute__ ((packed));
+
+
+/* common request/reply headers */
+struct ceph_x_request_header {
+	__le16 op;
+} __attribute__ ((packed));
+
+struct ceph_x_reply_header {
+	__le16 op;
+	__le32 result;
+} __attribute__ ((packed));
+
+
+/* authenticate handshake */
+
+/* initial hello (no reply header) */
+struct ceph_x_server_challenge {
+	__u8 struct_v;
+	__le64 server_challenge;
+} __attribute__ ((packed));
+
+struct ceph_x_authenticate {
+	__u8 struct_v;
+	__le64 client_challenge;
+	__le64 key;
+	/* ticket blob */
+} __attribute__ ((packed));
+
+struct ceph_x_service_ticket_request {
+	__u8 struct_v;
+	__le32 keys;
+} __attribute__ ((packed));
+
+struct ceph_x_challenge_blob {
+	__le64 server_challenge;
+	__le64 client_challenge;
+} __attribute__ ((packed));
+
+
+
+/* authorize handshake */
+
+/*
+ * The authorizer consists of two pieces:
+ *  a - service id, ticket blob
+ *  b - encrypted with session key
+ */
+struct ceph_x_authorize_a {
+	__u8 struct_v;
+	__le64 global_id;
+	__le32 service_id;
+	struct ceph_x_ticket_blob ticket_blob;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_b {
+	__u8 struct_v;
+	__le64 nonce;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_reply {
+	__u8 struct_v;
+	__le64 nonce_plus_one;
+} __attribute__ ((packed));
+
+
+/*
+ * encyption bundle
+ */
+#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
+
+struct ceph_x_encrypt_header {
+	__u8 struct_v;
+	__le64 magic;
+} __attribute__ ((packed));
+
+#endif
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
new file mode 100644
index 000000000000..53d8abfa25d5
--- /dev/null
+++ b/net/ceph/buffer.c
@@ -0,0 +1,68 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/buffer.h>
+#include <linux/ceph/decode.h>
+
+struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
+{
+	struct ceph_buffer *b;
+
+	b = kmalloc(sizeof(*b), gfp);
+	if (!b)
+		return NULL;
+
+	b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN);
+	if (b->vec.iov_base) {
+		b->is_vmalloc = false;
+	} else {
+		b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
+		if (!b->vec.iov_base) {
+			kfree(b);
+			return NULL;
+		}
+		b->is_vmalloc = true;
+	}
+
+	kref_init(&b->kref);
+	b->alloc_len = len;
+	b->vec.iov_len = len;
+	dout("buffer_new %p\n", b);
+	return b;
+}
+EXPORT_SYMBOL(ceph_buffer_new);
+
+void ceph_buffer_release(struct kref *kref)
+{
+	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
+
+	dout("buffer_release %p\n", b);
+	if (b->vec.iov_base) {
+		if (b->is_vmalloc)
+			vfree(b->vec.iov_base);
+		else
+			kfree(b->vec.iov_base);
+	}
+	kfree(b);
+}
+EXPORT_SYMBOL(ceph_buffer_release);
+
+int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
+{
+	size_t len;
+
+	ceph_decode_need(p, end, sizeof(u32), bad);
+	len = ceph_decode_32(p);
+	dout("decode_buffer len %d\n", (int)len);
+	ceph_decode_need(p, end, len, bad);
+	*b = ceph_buffer_new(len, GFP_NOFS);
+	if (!*b)
+		return -ENOMEM;
+	ceph_decode_copy(p, (*b)->vec.iov_base, len);
+	return 0;
+bad:
+	return -EINVAL;
+}
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
new file mode 100644
index 000000000000..f6f2eebc0767
--- /dev/null
+++ b/net/ceph/ceph_common.c
@@ -0,0 +1,529 @@
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+
+
+
+/*
+ * find filename portion of a path (/foo/bar/baz -> baz)
+ */
+const char *ceph_file_part(const char *s, int len)
+{
+	const char *e = s + len;
+
+	while (e != s && *(e-1) != '/')
+		e--;
+	return e;
+}
+EXPORT_SYMBOL(ceph_file_part);
+
+const char *ceph_msg_type_name(int type)
+{
+	switch (type) {
+	case CEPH_MSG_SHUTDOWN: return "shutdown";
+	case CEPH_MSG_PING: return "ping";
+	case CEPH_MSG_AUTH: return "auth";
+	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
+	case CEPH_MSG_MON_MAP: return "mon_map";
+	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
+	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
+	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
+	case CEPH_MSG_STATFS: return "statfs";
+	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MDS_MAP: return "mds_map";
+	case CEPH_MSG_CLIENT_SESSION: return "client_session";
+	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
+	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
+	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
+	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
+	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
+	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
+	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+	case CEPH_MSG_OSD_MAP: return "osd_map";
+	case CEPH_MSG_OSD_OP: return "osd_op";
+	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+	default: return "unknown";
+	}
+}
+EXPORT_SYMBOL(ceph_msg_type_name);
+
+/*
+ * Initially learn our fsid, or verify an fsid matches.
+ */
+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+{
+	if (client->have_fsid) {
+		if (ceph_fsid_compare(&client->fsid, fsid)) {
+			pr_err("bad fsid, had %pU got %pU",
+			       &client->fsid, fsid);
+			return -1;
+		}
+	} else {
+		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
+		memcpy(&client->fsid, fsid, sizeof(*fsid));
+		ceph_debugfs_client_init(client);
+		client->have_fsid = true;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_check_fsid);
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+	if (!s1 && !s2)
+		return 0;
+	if (s1 && !s2)
+		return -1;
+	if (!s1 && s2)
+		return 1;
+	return strcmp(s1, s2);
+}
+
+int ceph_compare_options(struct ceph_options *new_opt,
+			 struct ceph_client *client)
+{
+	struct ceph_options *opt1 = new_opt;
+	struct ceph_options *opt2 = client->options;
+	int ofs = offsetof(struct ceph_options, mon_addr);
+	int i;
+	int ret;
+
+	ret = memcmp(opt1, opt2, ofs);
+	if (ret)
+		return ret;
+
+	ret = strcmp_null(opt1->name, opt2->name);
+	if (ret)
+		return ret;
+
+	ret = strcmp_null(opt1->secret, opt2->secret);
+	if (ret)
+		return ret;
+
+	/* any matching mon ip implies a match */
+	for (i = 0; i < opt1->num_mon; i++) {
+		if (ceph_monmap_contains(client->monc.monmap,
+				 &opt1->mon_addr[i]))
+			return 0;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(ceph_compare_options);
+
+
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+	int i = 0;
+	char tmp[3];
+	int err = -EINVAL;
+	int d;
+
+	dout("parse_fsid '%s'\n", str);
+	tmp[2] = 0;
+	while (*str && i < 16) {
+		if (ispunct(*str)) {
+			str++;
+			continue;
+		}
+		if (!isxdigit(str[0]) || !isxdigit(str[1]))
+			break;
+		tmp[0] = str[0];
+		tmp[1] = str[1];
+		if (sscanf(tmp, "%x", &d) < 1)
+			break;
+		fsid->fsid[i] = d & 0xff;
+		i++;
+		str += 2;
+	}
+
+	if (i == 16)
+		err = 0;
+	dout("parse_fsid ret %d got fsid %pU", err, fsid);
+	return err;
+}
+
+/*
+ * ceph options
+ */
+enum {
+	Opt_osdtimeout,
+	Opt_osdkeepalivetimeout,
+	Opt_mount_timeout,
+	Opt_osd_idle_ttl,
+	Opt_last_int,
+	/* int args above */
+	Opt_fsid,
+	Opt_name,
+	Opt_secret,
+	Opt_ip,
+	Opt_last_string,
+	/* string args above */
+	Opt_noshare,
+	Opt_nocrc,
+};
+
+static match_table_t opt_tokens = {
+	{Opt_osdtimeout, "osdtimeout=%d"},
+	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
+	{Opt_mount_timeout, "mount_timeout=%d"},
+	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	/* int args above */
+	{Opt_fsid, "fsid=%s"},
+	{Opt_name, "name=%s"},
+	{Opt_secret, "secret=%s"},
+	{Opt_ip, "ip=%s"},
+	/* string args above */
+	{Opt_noshare, "noshare"},
+	{Opt_nocrc, "nocrc"},
+	{-1, NULL}
+};
+
+void ceph_destroy_options(struct ceph_options *opt)
+{
+	dout("destroy_options %p\n", opt);
+	kfree(opt->name);
+	kfree(opt->secret);
+	kfree(opt);
+}
+EXPORT_SYMBOL(ceph_destroy_options);
+
+int ceph_parse_options(struct ceph_options **popt, char *options,
+		       const char *dev_name, const char *dev_name_end,
+		       int (*parse_extra_token)(char *c, void *private),
+		       void *private)
+{
+	struct ceph_options *opt;
+	const char *c;
+	int err = -ENOMEM;
+	substring_t argstr[MAX_OPT_ARGS];
+
+	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
+	if (!opt)
+		return err;
+	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
+				GFP_KERNEL);
+	if (!opt->mon_addr)
+		goto out;
+
+	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
+	     dev_name);
+
+	/* start with defaults */
+	opt->flags = CEPH_OPT_DEFAULT;
+	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
+	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
+	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
+	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
+
+	/* get mon ip(s) */
+	/* ip1[:port1][,ip2[:port2]...] */
+	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
+			     CEPH_MAX_MON, &opt->num_mon);
+	if (err < 0)
+		goto out;
+
+	/* parse mount options */
+	while ((c = strsep(&options, ",")) != NULL) {
+		int token, intval, ret;
+		if (!*c)
+			continue;
+		err = -EINVAL;
+		token = match_token((char *)c, opt_tokens, argstr);
+		if (token < 0) {
+			/* extra? */
+			err = parse_extra_token((char *)c, private);
+			if (err < 0) {
+				pr_err("bad option at '%s'\n", c);
+				goto out;
+			}
+			continue;
+		}
+		if (token < Opt_last_int) {
+			ret = match_int(&argstr[0], &intval);
+			if (ret < 0) {
+				pr_err("bad mount option arg (not int) "
+				       "at '%s'\n", c);
+				continue;
+			}
+			dout("got int token %d val %d\n", token, intval);
+		} else if (token > Opt_last_int && token < Opt_last_string) {
+			dout("got string token %d val %s\n", token,
+			     argstr[0].from);
+		} else {
+			dout("got token %d\n", token);
+		}
+		switch (token) {
+		case Opt_ip:
+			err = ceph_parse_ips(argstr[0].from,
+					     argstr[0].to,
+					     &opt->my_addr,
+					     1, NULL);
+			if (err < 0)
+				goto out;
+			opt->flags |= CEPH_OPT_MYIP;
+			break;
+
+		case Opt_fsid:
+			err = parse_fsid(argstr[0].from, &opt->fsid);
+			if (err == 0)
+				opt->flags |= CEPH_OPT_FSID;
+			break;
+		case Opt_name:
+			opt->name = kstrndup(argstr[0].from,
+					      argstr[0].to-argstr[0].from,
+					      GFP_KERNEL);
+			break;
+		case Opt_secret:
+			opt->secret = kstrndup(argstr[0].from,
+						argstr[0].to-argstr[0].from,
+						GFP_KERNEL);
+			break;
+
+			/* misc */
+		case Opt_osdtimeout:
+			opt->osd_timeout = intval;
+			break;
+		case Opt_osdkeepalivetimeout:
+			opt->osd_keepalive_timeout = intval;
+			break;
+		case Opt_osd_idle_ttl:
+			opt->osd_idle_ttl = intval;
+			break;
+		case Opt_mount_timeout:
+			opt->mount_timeout = intval;
+			break;
+
+		case Opt_noshare:
+			opt->flags |= CEPH_OPT_NOSHARE;
+			break;
+
+		case Opt_nocrc:
+			opt->flags |= CEPH_OPT_NOCRC;
+			break;
+
+		default:
+			BUG_ON(token);
+		}
+	}
+
+	/* success */
+	*popt = opt;
+	return 0;
+
+out:
+	ceph_destroy_options(opt);
+	return err;
+}
+EXPORT_SYMBOL(ceph_parse_options);
+
+u64 ceph_client_id(struct ceph_client *client)
+{
+	return client->monc.auth->global_id;
+}
+EXPORT_SYMBOL(ceph_client_id);
+
+/*
+ * create a fresh client instance
+ */
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
+{
+	struct ceph_client *client;
+	int err = -ENOMEM;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (client == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	client->private = private;
+	client->options = opt;
+
+	mutex_init(&client->mount_mutex);
+	init_waitqueue_head(&client->auth_wq);
+	client->auth_err = 0;
+
+	client->extra_mon_dispatch = NULL;
+	client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
+	client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
+
+	client->msgr = NULL;
+
+	/* subsystems */
+	err = ceph_monc_init(&client->monc, client);
+	if (err < 0)
+		goto fail;
+	err = ceph_osdc_init(&client->osdc, client);
+	if (err < 0)
+		goto fail_monc;
+
+	return client;
+
+fail_monc:
+	ceph_monc_stop(&client->monc);
+fail:
+	kfree(client);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_create_client);
+
+void ceph_destroy_client(struct ceph_client *client)
+{
+	dout("destroy_client %p\n", client);
+
+	/* unmount */
+	ceph_osdc_stop(&client->osdc);
+
+	/*
+	 * make sure mds and osd connections close out before destroying
+	 * the auth module, which is needed to free those connections'
+	 * ceph_authorizers.
+	 */
+	ceph_msgr_flush();
+
+	ceph_monc_stop(&client->monc);
+
+	ceph_debugfs_client_cleanup(client);
+
+	if (client->msgr)
+		ceph_messenger_destroy(client->msgr);
+
+	ceph_destroy_options(client->options);
+
+	kfree(client);
+	dout("destroy_client %p done\n", client);
+}
+EXPORT_SYMBOL(ceph_destroy_client);
+
+/*
+ * true if we have the mon map (and have thus joined the cluster)
+ */
+static int have_mon_and_osd_map(struct ceph_client *client)
+{
+	return client->monc.monmap && client->monc.monmap->epoch &&
+	       client->osdc.osdmap && client->osdc.osdmap->epoch;
+}
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+int __ceph_open_session(struct ceph_client *client, unsigned long started)
+{
+	struct ceph_entity_addr *myaddr = NULL;
+	int err;
+	unsigned long timeout = client->options->mount_timeout * HZ;
+
+	/* initialize the messenger */
+	if (client->msgr == NULL) {
+		if (ceph_test_opt(client, MYIP))
+			myaddr = &client->options->my_addr;
+		client->msgr = ceph_messenger_create(myaddr,
+					client->supported_features,
+					client->required_features);
+		if (IS_ERR(client->msgr)) {
+			client->msgr = NULL;
+			return PTR_ERR(client->msgr);
+		}
+		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+	}
+
+	/* open session, and wait for mon and osd maps */
+	err = ceph_monc_open_session(&client->monc);
+	if (err < 0)
+		return err;
+
+	while (!have_mon_and_osd_map(client)) {
+		err = -EIO;
+		if (timeout && time_after_eq(jiffies, started + timeout))
+			return err;
+
+		/* wait */
+		dout("mount waiting for mon_map\n");
+		err = wait_event_interruptible_timeout(client->auth_wq,
+			have_mon_and_osd_map(client) || (client->auth_err < 0),
+			timeout);
+		if (err == -EINTR || err == -ERESTARTSYS)
+			return err;
+		if (client->auth_err < 0)
+			return client->auth_err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__ceph_open_session);
+
+
+int ceph_open_session(struct ceph_client *client)
+{
+	int ret;
+	unsigned long started = jiffies;  /* note the start time */
+
+	dout("open_session start\n");
+	mutex_lock(&client->mount_mutex);
+
+	ret = __ceph_open_session(client, started);
+
+	mutex_unlock(&client->mount_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_open_session);
+
+
+static int __init init_ceph_lib(void)
+{
+	int ret = 0;
+
+	ret = ceph_debugfs_init();
+	if (ret < 0)
+		goto out;
+
+	ret = ceph_msgr_init();
+	if (ret < 0)
+		goto out_debugfs;
+
+	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+
+	return 0;
+
+out_debugfs:
+	ceph_debugfs_cleanup();
+out:
+	return ret;
+}
+
+static void __exit exit_ceph_lib(void)
+{
+	dout("exit_ceph_lib\n");
+	ceph_msgr_exit();
+	ceph_debugfs_cleanup();
+}
+
+module_init(init_ceph_lib);
+module_exit(exit_ceph_lib);
+
+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
new file mode 100644
index 000000000000..a3a3a31d3c37
--- /dev/null
+++ b/net/ceph/ceph_fs.c
@@ -0,0 +1,75 @@
+/*
+ * Some non-inline ceph helpers
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+/*
+ * return true if @layout appears to be valid
+ */
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
+{
+	__u32 su = le32_to_cpu(layout->fl_stripe_unit);
+	__u32 sc = le32_to_cpu(layout->fl_stripe_count);
+	__u32 os = le32_to_cpu(layout->fl_object_size);
+
+	/* stripe unit, object size must be non-zero, 64k increment */
+	if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	/* object size must be a multiple of stripe unit */
+	if (os < su || os % su)
+		return 0;
+	/* stripe count must be non-zero */
+	if (!sc)
+		return 0;
+	return 1;
+}
+
+
+int ceph_flags_to_mode(int flags)
+{
+	int mode;
+
+#ifdef O_DIRECTORY  /* fixme */
+	if ((flags & O_DIRECTORY) == O_DIRECTORY)
+		return CEPH_FILE_MODE_PIN;
+#endif
+	if ((flags & O_APPEND) == O_APPEND)
+		flags |= O_WRONLY;
+
+	if ((flags & O_ACCMODE) == O_RDWR)
+		mode = CEPH_FILE_MODE_RDWR;
+	else if ((flags & O_ACCMODE) == O_WRONLY)
+		mode = CEPH_FILE_MODE_WR;
+	else
+		mode = CEPH_FILE_MODE_RD;
+
+#ifdef O_LAZY
+	if (flags & O_LAZY)
+		mode |= CEPH_FILE_MODE_LAZY;
+#endif
+
+	return mode;
+}
+EXPORT_SYMBOL(ceph_flags_to_mode);
+
+int ceph_caps_for_mode(int mode)
+{
+	int caps = CEPH_CAP_PIN;
+
+	if (mode & CEPH_FILE_MODE_RD)
+		caps |= CEPH_CAP_FILE_SHARED |
+			CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
+	if (mode & CEPH_FILE_MODE_WR)
+		caps |= CEPH_CAP_FILE_EXCL |
+			CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
+			CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
+			CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
+	if (mode & CEPH_FILE_MODE_LAZY)
+		caps |= CEPH_CAP_FILE_LAZYIO;
+
+	return caps;
+}
+EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c
new file mode 100644
index 000000000000..815ef8826796
--- /dev/null
+++ b/net/ceph/ceph_hash.c
@@ -0,0 +1,118 @@
+
+#include <linux/ceph/types.h>
+
+/*
+ * Robert Jenkin's hash function.
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * This is in the public domain.
+ */
+#define mix(a, b, c)						\
+	do {							\
+		a = a - b;  a = a - c;  a = a ^ (c >> 13);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 8);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 13);	\
+		a = a - b;  a = a - c;  a = a ^ (c >> 12);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 16);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 5);	\
+		a = a - b;  a = a - c;  a = a ^ (c >> 3);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 10);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 15);	\
+	} while (0)
+
+unsigned ceph_str_hash_rjenkins(const char *str, unsigned length)
+{
+	const unsigned char *k = (const unsigned char *)str;
+	__u32 a, b, c;  /* the internal state */
+	__u32 len;      /* how many key bytes still need mixing */
+
+	/* Set up the internal state */
+	len = length;
+	a = 0x9e3779b9;      /* the golden ratio; an arbitrary value */
+	b = a;
+	c = 0;               /* variable initialization of internal state */
+
+	/* handle most of the key */
+	while (len >= 12) {
+		a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
+			 ((__u32)k[3] << 24));
+		b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
+			 ((__u32)k[7] << 24));
+		c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
+			 ((__u32)k[11] << 24));
+		mix(a, b, c);
+		k = k + 12;
+		len = len - 12;
+	}
+
+	/* handle the last 11 bytes */
+	c = c + length;
+	switch (len) {            /* all the case statements fall through */
+	case 11:
+		c = c + ((__u32)k[10] << 24);
+	case 10:
+		c = c + ((__u32)k[9] << 16);
+	case 9:
+		c = c + ((__u32)k[8] << 8);
+		/* the first byte of c is reserved for the length */
+	case 8:
+		b = b + ((__u32)k[7] << 24);
+	case 7:
+		b = b + ((__u32)k[6] << 16);
+	case 6:
+		b = b + ((__u32)k[5] << 8);
+	case 5:
+		b = b + k[4];
+	case 4:
+		a = a + ((__u32)k[3] << 24);
+	case 3:
+		a = a + ((__u32)k[2] << 16);
+	case 2:
+		a = a + ((__u32)k[1] << 8);
+	case 1:
+		a = a + k[0];
+		/* case 0: nothing left to add */
+	}
+	mix(a, b, c);
+
+	return c;
+}
+
+/*
+ * linux dcache hash
+ */
+unsigned ceph_str_hash_linux(const char *str, unsigned length)
+{
+	unsigned long hash = 0;
+	unsigned char c;
+
+	while (length--) {
+		c = *str++;
+		hash = (hash + (c << 4) + (c >> 4)) * 11;
+	}
+	return hash;
+}
+
+
+unsigned ceph_str_hash(int type, const char *s, unsigned len)
+{
+	switch (type) {
+	case CEPH_STR_HASH_LINUX:
+		return ceph_str_hash_linux(s, len);
+	case CEPH_STR_HASH_RJENKINS:
+		return ceph_str_hash_rjenkins(s, len);
+	default:
+		return -1;
+	}
+}
+
+const char *ceph_str_hash_name(int type)
+{
+	switch (type) {
+	case CEPH_STR_HASH_LINUX:
+		return "linux";
+	case CEPH_STR_HASH_RJENKINS:
+		return "rjenkins";
+	default:
+		return "unknown";
+	}
+}
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
new file mode 100644
index 000000000000..3fbda04de29c
--- /dev/null
+++ b/net/ceph/ceph_strings.c
@@ -0,0 +1,84 @@
+/*
+ * Ceph string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+const char *ceph_entity_type_name(int type)
+{
+	switch (type) {
+	case CEPH_ENTITY_TYPE_MDS: return "mds";
+	case CEPH_ENTITY_TYPE_OSD: return "osd";
+	case CEPH_ENTITY_TYPE_MON: return "mon";
+	case CEPH_ENTITY_TYPE_CLIENT: return "client";
+	case CEPH_ENTITY_TYPE_AUTH: return "auth";
+	default: return "unknown";
+	}
+}
+
+const char *ceph_osd_op_name(int op)
+{
+	switch (op) {
+	case CEPH_OSD_OP_READ: return "read";
+	case CEPH_OSD_OP_STAT: return "stat";
+
+	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
+
+	case CEPH_OSD_OP_WRITE: return "write";
+	case CEPH_OSD_OP_DELETE: return "delete";
+	case CEPH_OSD_OP_TRUNCATE: return "truncate";
+	case CEPH_OSD_OP_ZERO: return "zero";
+	case CEPH_OSD_OP_WRITEFULL: return "writefull";
+	case CEPH_OSD_OP_ROLLBACK: return "rollback";
+
+	case CEPH_OSD_OP_APPEND: return "append";
+	case CEPH_OSD_OP_STARTSYNC: return "startsync";
+	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
+	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
+
+	case CEPH_OSD_OP_TMAPUP: return "tmapup";
+	case CEPH_OSD_OP_TMAPGET: return "tmapget";
+	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+
+	case CEPH_OSD_OP_GETXATTR: return "getxattr";
+	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
+	case CEPH_OSD_OP_SETXATTR: return "setxattr";
+	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
+	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
+	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
+
+	case CEPH_OSD_OP_PULL: return "pull";
+	case CEPH_OSD_OP_PUSH: return "push";
+	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
+	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
+	case CEPH_OSD_OP_SCRUB: return "scrub";
+
+	case CEPH_OSD_OP_WRLOCK: return "wrlock";
+	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
+	case CEPH_OSD_OP_RDLOCK: return "rdlock";
+	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
+	case CEPH_OSD_OP_UPLOCK: return "uplock";
+	case CEPH_OSD_OP_DNLOCK: return "dnlock";
+
+	case CEPH_OSD_OP_CALL: return "call";
+
+	case CEPH_OSD_OP_PGLS: return "pgls";
+	}
+	return "???";
+}
+
+
+const char *ceph_pool_op_name(int op)
+{
+	switch (op) {
+	case POOL_OP_CREATE: return "create";
+	case POOL_OP_DELETE: return "delete";
+	case POOL_OP_AUID_CHANGE: return "auid change";
+	case POOL_OP_CREATE_SNAP: return "create snap";
+	case POOL_OP_DELETE_SNAP: return "delete snap";
+	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+	}
+	return "???";
+}
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
new file mode 100644
index 000000000000..d6ebb13a18a4
--- /dev/null
+++ b/net/ceph/crush/crush.c
@@ -0,0 +1,151 @@
+
+#ifdef __KERNEL__
+# include <linux/slab.h>
+#else
+# include <stdlib.h>
+# include <assert.h>
+# define kfree(x) do { if (x) free(x); } while (0)
+# define BUG_ON(x) assert(!(x))
+#endif
+
+#include <linux/crush/crush.h>
+
+const char *crush_bucket_alg_name(int alg)
+{
+	switch (alg) {
+	case CRUSH_BUCKET_UNIFORM: return "uniform";
+	case CRUSH_BUCKET_LIST: return "list";
+	case CRUSH_BUCKET_TREE: return "tree";
+	case CRUSH_BUCKET_STRAW: return "straw";
+	default: return "unknown";
+	}
+}
+
+/**
+ * crush_get_bucket_item_weight - Get weight of an item in given bucket
+ * @b: bucket pointer
+ * @p: item index in bucket
+ */
+int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
+{
+	if (p >= b->size)
+		return 0;
+
+	switch (b->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		return ((struct crush_bucket_uniform *)b)->item_weight;
+	case CRUSH_BUCKET_LIST:
+		return ((struct crush_bucket_list *)b)->item_weights[p];
+	case CRUSH_BUCKET_TREE:
+		if (p & 1)
+			return ((struct crush_bucket_tree *)b)->node_weights[p];
+		return 0;
+	case CRUSH_BUCKET_STRAW:
+		return ((struct crush_bucket_straw *)b)->item_weights[p];
+	}
+	return 0;
+}
+
+/**
+ * crush_calc_parents - Calculate parent vectors for the given crush map.
+ * @map: crush_map pointer
+ */
+void crush_calc_parents(struct crush_map *map)
+{
+	int i, b, c;
+
+	for (b = 0; b < map->max_buckets; b++) {
+		if (map->buckets[b] == NULL)
+			continue;
+		for (i = 0; i < map->buckets[b]->size; i++) {
+			c = map->buckets[b]->items[i];
+			BUG_ON(c >= map->max_devices ||
+			       c < -map->max_buckets);
+			if (c >= 0)
+				map->device_parents[c] = map->buckets[b]->id;
+			else
+				map->bucket_parents[-1-c] = map->buckets[b]->id;
+		}
+	}
+}
+
+void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
+{
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket_list(struct crush_bucket_list *b)
+{
+	kfree(b->item_weights);
+	kfree(b->sum_weights);
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
+{
+	kfree(b->node_weights);
+	kfree(b);
+}
+
+void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
+{
+	kfree(b->straws);
+	kfree(b->item_weights);
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket(struct crush_bucket *b)
+{
+	switch (b->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
+		break;
+	case CRUSH_BUCKET_LIST:
+		crush_destroy_bucket_list((struct crush_bucket_list *)b);
+		break;
+	case CRUSH_BUCKET_TREE:
+		crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
+		break;
+	case CRUSH_BUCKET_STRAW:
+		crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
+		break;
+	}
+}
+
+/**
+ * crush_destroy - Destroy a crush_map
+ * @map: crush_map pointer
+ */
+void crush_destroy(struct crush_map *map)
+{
+	int b;
+
+	/* buckets */
+	if (map->buckets) {
+		for (b = 0; b < map->max_buckets; b++) {
+			if (map->buckets[b] == NULL)
+				continue;
+			crush_destroy_bucket(map->buckets[b]);
+		}
+		kfree(map->buckets);
+	}
+
+	/* rules */
+	if (map->rules) {
+		for (b = 0; b < map->max_rules; b++)
+			kfree(map->rules[b]);
+		kfree(map->rules);
+	}
+
+	kfree(map->bucket_parents);
+	kfree(map->device_parents);
+	kfree(map);
+}
+
+
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
new file mode 100644
index 000000000000..5bb63e37a8a1
--- /dev/null
+++ b/net/ceph/crush/hash.c
@@ -0,0 +1,149 @@
+
+#include <linux/types.h>
+#include <linux/crush/hash.h>
+
+/*
+ * Robert Jenkins' function for mixing 32-bit values
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * a, b = random bits, c = input and output
+ */
+#define crush_hashmix(a, b, c) do {			\
+		a = a-b;  a = a-c;  a = a^(c>>13);	\
+		b = b-c;  b = b-a;  b = b^(a<<8);	\
+		c = c-a;  c = c-b;  c = c^(b>>13);	\
+		a = a-b;  a = a-c;  a = a^(c>>12);	\
+		b = b-c;  b = b-a;  b = b^(a<<16);	\
+		c = c-a;  c = c-b;  c = c^(b>>5);	\
+		a = a-b;  a = a-c;  a = a^(c>>3);	\
+		b = b-c;  b = b-a;  b = b^(a<<10);	\
+		c = c-a;  c = c-b;  c = c^(b>>15);	\
+	} while (0)
+
+#define crush_hash_seed 1315423911
+
+static __u32 crush_hash32_rjenkins1(__u32 a)
+{
+	__u32 hash = crush_hash_seed ^ a;
+	__u32 b = a;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, a, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(x, a, hash);
+	crush_hashmix(b, y, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, x, hash);
+	crush_hashmix(y, a, hash);
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, c, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, d, hash);
+	crush_hashmix(a, x, hash);
+	crush_hashmix(y, b, hash);
+	crush_hashmix(c, x, hash);
+	crush_hashmix(y, d, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d,
+				      __u32 e)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, d, hash);
+	crush_hashmix(e, x, hash);
+	crush_hashmix(y, a, hash);
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, c, hash);
+	crush_hashmix(d, x, hash);
+	crush_hashmix(y, e, hash);
+	return hash;
+}
+
+
+__u32 crush_hash32(int type, __u32 a)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1(a);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_2(int type, __u32 a, __u32 b)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_2(a, b);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_3(a, b, c);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_4(a, b, c, d);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_5(a, b, c, d, e);
+	default:
+		return 0;
+	}
+}
+
+const char *crush_hash_name(int type)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return "rjenkins1";
+	default:
+		return "unknown";
+	}
+}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
new file mode 100644
index 000000000000..42599e31dcad
--- /dev/null
+++ b/net/ceph/crush/mapper.c
@@ -0,0 +1,609 @@
+
+#ifdef __KERNEL__
+# include <linux/string.h>
+# include <linux/slab.h>
+# include <linux/bug.h>
+# include <linux/kernel.h>
+# ifndef dprintk
+#  define dprintk(args...)
+# endif
+#else
+# include <string.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <assert.h>
+# define BUG_ON(x) assert(!(x))
+# define dprintk(args...) /* printf(args) */
+# define kmalloc(x, f) malloc(x)
+# define kfree(x) free(x)
+#endif
+
+#include <linux/crush/crush.h>
+#include <linux/crush/hash.h>
+
+/*
+ * Implement the core CRUSH mapping algorithm.
+ */
+
+/**
+ * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
+ * @map: the crush_map
+ * @ruleset: the storage ruleset id (user defined)
+ * @type: storage ruleset type (user defined)
+ * @size: output set size
+ */
+int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
+{
+	int i;
+
+	for (i = 0; i < map->max_rules; i++) {
+		if (map->rules[i] &&
+		    map->rules[i]->mask.ruleset == ruleset &&
+		    map->rules[i]->mask.type == type &&
+		    map->rules[i]->mask.min_size <= size &&
+		    map->rules[i]->mask.max_size >= size)
+			return i;
+	}
+	return -1;
+}
+
+
+/*
+ * bucket choose methods
+ *
+ * For each bucket algorithm, we have a "choose" method that, given a
+ * crush input @x and replica position (usually, position in output set) @r,
+ * will produce an item in the bucket.
+ */
+
+/*
+ * Choose based on a random permutation of the bucket.
+ *
+ * We used to use some prime number arithmetic to do this, but it
+ * wasn't very random, and had some other bad behaviors.  Instead, we
+ * calculate an actual random permutation of the bucket members.
+ * Since this is expensive, we optimize for the r=0 case, which
+ * captures the vast majority of calls.
+ */
+static int bucket_perm_choose(struct crush_bucket *bucket,
+			      int x, int r)
+{
+	unsigned pr = r % bucket->size;
+	unsigned i, s;
+
+	/* start a new permutation if @x has changed */
+	if (bucket->perm_x != x || bucket->perm_n == 0) {
+		dprintk("bucket %d new x=%d\n", bucket->id, x);
+		bucket->perm_x = x;
+
+		/* optimize common r=0 case */
+		if (pr == 0) {
+			s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
+				bucket->size;
+			bucket->perm[0] = s;
+			bucket->perm_n = 0xffff;   /* magic value, see below */
+			goto out;
+		}
+
+		for (i = 0; i < bucket->size; i++)
+			bucket->perm[i] = i;
+		bucket->perm_n = 0;
+	} else if (bucket->perm_n == 0xffff) {
+		/* clean up after the r=0 case above */
+		for (i = 1; i < bucket->size; i++)
+			bucket->perm[i] = i;
+		bucket->perm[bucket->perm[0]] = 0;
+		bucket->perm_n = 1;
+	}
+
+	/* calculate permutation up to pr */
+	for (i = 0; i < bucket->perm_n; i++)
+		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
+	while (bucket->perm_n <= pr) {
+		unsigned p = bucket->perm_n;
+		/* no point in swapping the final entry */
+		if (p < bucket->size - 1) {
+			i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
+				(bucket->size - p);
+			if (i) {
+				unsigned t = bucket->perm[p + i];
+				bucket->perm[p + i] = bucket->perm[p];
+				bucket->perm[p] = t;
+			}
+			dprintk(" perm_choose swap %d with %d\n", p, p+i);
+		}
+		bucket->perm_n++;
+	}
+	for (i = 0; i < bucket->size; i++)
+		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
+
+	s = bucket->perm[pr];
+out:
+	dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
+		bucket->size, x, r, pr, s);
+	return bucket->items[s];
+}
+
+/* uniform */
+static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
+				 int x, int r)
+{
+	return bucket_perm_choose(&bucket->h, x, r);
+}
+
+/* list */
+static int bucket_list_choose(struct crush_bucket_list *bucket,
+			      int x, int r)
+{
+	int i;
+
+	for (i = bucket->h.size-1; i >= 0; i--) {
+		__u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
+					 r, bucket->h.id);
+		w &= 0xffff;
+		dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
+			"sw %x rand %llx",
+			i, x, r, bucket->h.items[i], bucket->item_weights[i],
+			bucket->sum_weights[i], w);
+		w *= bucket->sum_weights[i];
+		w = w >> 16;
+		/*dprintk(" scaled %llx\n", w);*/
+		if (w < bucket->item_weights[i])
+			return bucket->h.items[i];
+	}
+
+	BUG_ON(1);
+	return 0;
+}
+
+
+/* (binary) tree */
+static int height(int n)
+{
+	int h = 0;
+	while ((n & 1) == 0) {
+		h++;
+		n = n >> 1;
+	}
+	return h;
+}
+
+static int left(int x)
+{
+	int h = height(x);
+	return x - (1 << (h-1));
+}
+
+static int right(int x)
+{
+	int h = height(x);
+	return x + (1 << (h-1));
+}
+
+static int terminal(int x)
+{
+	return x & 1;
+}
+
+static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+			      int x, int r)
+{
+	int n, l;
+	__u32 w;
+	__u64 t;
+
+	/* start at root */
+	n = bucket->num_nodes >> 1;
+
+	while (!terminal(n)) {
+		/* pick point in [0, w) */
+		w = bucket->node_weights[n];
+		t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
+					  bucket->h.id) * (__u64)w;
+		t = t >> 32;
+
+		/* descend to the left or right? */
+		l = left(n);
+		if (t < bucket->node_weights[l])
+			n = l;
+		else
+			n = right(n);
+	}
+
+	return bucket->h.items[n >> 1];
+}
+
+
+/* straw */
+
+static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+			       int x, int r)
+{
+	int i;
+	int high = 0;
+	__u64 high_draw = 0;
+	__u64 draw;
+
+	for (i = 0; i < bucket->h.size; i++) {
+		draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
+		draw &= 0xffff;
+		draw *= bucket->straws[i];
+		if (i == 0 || draw > high_draw) {
+			high = i;
+			high_draw = draw;
+		}
+	}
+	return bucket->h.items[high];
+}
+
+static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+{
+	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
+	switch (in->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
+					  x, r);
+	case CRUSH_BUCKET_LIST:
+		return bucket_list_choose((struct crush_bucket_list *)in,
+					  x, r);
+	case CRUSH_BUCKET_TREE:
+		return bucket_tree_choose((struct crush_bucket_tree *)in,
+					  x, r);
+	case CRUSH_BUCKET_STRAW:
+		return bucket_straw_choose((struct crush_bucket_straw *)in,
+					   x, r);
+	default:
+		BUG_ON(1);
+		return in->items[0];
+	}
+}
+
+/*
+ * true if device is marked "out" (failed, fully offloaded)
+ * of the cluster
+ */
+static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
+{
+	if (weight[item] >= 0x10000)
+		return 0;
+	if (weight[item] == 0)
+		return 1;
+	if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
+	    < weight[item])
+		return 0;
+	return 1;
+}
+
+/**
+ * crush_choose - choose numrep distinct items of given type
+ * @map: the crush_map
+ * @bucket: the bucket we are choose an item from
+ * @x: crush input value
+ * @numrep: the number of items to choose
+ * @type: the type of item to choose
+ * @out: pointer to output vector
+ * @outpos: our position in that vector
+ * @firstn: true if choosing "first n" items, false if choosing "indep"
+ * @recurse_to_leaf: true if we want one device under each item of given type
+ * @out2: second output vector for leaf items (if @recurse_to_leaf)
+ */
+static int crush_choose(struct crush_map *map,
+			struct crush_bucket *bucket,
+			__u32 *weight,
+			int x, int numrep, int type,
+			int *out, int outpos,
+			int firstn, int recurse_to_leaf,
+			int *out2)
+{
+	int rep;
+	int ftotal, flocal;
+	int retry_descent, retry_bucket, skip_rep;
+	struct crush_bucket *in = bucket;
+	int r;
+	int i;
+	int item = 0;
+	int itemtype;
+	int collide, reject;
+	const int orig_tries = 5; /* attempts before we fall back to search */
+
+	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+		bucket->id, x, outpos, numrep);
+
+	for (rep = outpos; rep < numrep; rep++) {
+		/* keep trying until we get a non-out, non-colliding item */
+		ftotal = 0;
+		skip_rep = 0;
+		do {
+			retry_descent = 0;
+			in = bucket;               /* initial bucket */
+
+			/* choose through intervening buckets */
+			flocal = 0;
+			do {
+				collide = 0;
+				retry_bucket = 0;
+				r = rep;
+				if (in->alg == CRUSH_BUCKET_UNIFORM) {
+					/* be careful */
+					if (firstn || numrep >= in->size)
+						/* r' = r + f_total */
+						r += ftotal;
+					else if (in->size % numrep == 0)
+						/* r'=r+(n+1)*f_local */
+						r += (numrep+1) *
+							(flocal+ftotal);
+					else
+						/* r' = r + n*f_local */
+						r += numrep * (flocal+ftotal);
+				} else {
+					if (firstn)
+						/* r' = r + f_total */
+						r += ftotal;
+					else
+						/* r' = r + n*f_local */
+						r += numrep * (flocal+ftotal);
+				}
+
+				/* bucket choose */
+				if (in->size == 0) {
+					reject = 1;
+					goto reject;
+				}
+				if (flocal >= (in->size>>1) &&
+				    flocal > orig_tries)
+					item = bucket_perm_choose(in, x, r);
+				else
+					item = crush_bucket_choose(in, x, r);
+				BUG_ON(item >= map->max_devices);
+
+				/* desired type? */
+				if (item < 0)
+					itemtype = map->buckets[-1-item]->type;
+				else
+					itemtype = 0;
+				dprintk("  item %d type %d\n", item, itemtype);
+
+				/* keep going? */
+				if (itemtype != type) {
+					BUG_ON(item >= 0 ||
+					       (-1-item) >= map->max_buckets);
+					in = map->buckets[-1-item];
+					retry_bucket = 1;
+					continue;
+				}
+
+				/* collision? */
+				for (i = 0; i < outpos; i++) {
+					if (out[i] == item) {
+						collide = 1;
+						break;
+					}
+				}
+
+				reject = 0;
+				if (recurse_to_leaf) {
+					if (item < 0) {
+						if (crush_choose(map,
+							 map->buckets[-1-item],
+							 weight,
+							 x, outpos+1, 0,
+							 out2, outpos,
+							 firstn, 0,
+							 NULL) <= outpos)
+							/* didn't get leaf */
+							reject = 1;
+					} else {
+						/* we already have a leaf! */
+						out2[outpos] = item;
+					}
+				}
+
+				if (!reject) {
+					/* out? */
+					if (itemtype == 0)
+						reject = is_out(map, weight,
+								item, x);
+					else
+						reject = 0;
+				}
+
+reject:
+				if (reject || collide) {
+					ftotal++;
+					flocal++;
+
+					if (collide && flocal < 3)
+						/* retry locally a few times */
+						retry_bucket = 1;
+					else if (flocal < in->size + orig_tries)
+						/* exhaustive bucket search */
+						retry_bucket = 1;
+					else if (ftotal < 20)
+						/* then retry descent */
+						retry_descent = 1;
+					else
+						/* else give up */
+						skip_rep = 1;
+					dprintk("  reject %d  collide %d  "
+						"ftotal %d  flocal %d\n",
+						reject, collide, ftotal,
+						flocal);
+				}
+			} while (retry_bucket);
+		} while (retry_descent);
+
+		if (skip_rep) {
+			dprintk("skip rep\n");
+			continue;
+		}
+
+		dprintk("CHOOSE got %d\n", item);
+		out[outpos] = item;
+		outpos++;
+	}
+
+	dprintk("CHOOSE returns %d\n", outpos);
+	return outpos;
+}
+
+
+/**
+ * crush_do_rule - calculate a mapping with the given input and rule
+ * @map: the crush_map
+ * @ruleno: the rule id
+ * @x: hash input
+ * @result: pointer to result vector
+ * @result_max: maximum result size
+ * @force: force initial replica choice; -1 for none
+ */
+int crush_do_rule(struct crush_map *map,
+		  int ruleno, int x, int *result, int result_max,
+		  int force, __u32 *weight)
+{
+	int result_len;
+	int force_context[CRUSH_MAX_DEPTH];
+	int force_pos = -1;
+	int a[CRUSH_MAX_SET];
+	int b[CRUSH_MAX_SET];
+	int c[CRUSH_MAX_SET];
+	int recurse_to_leaf;
+	int *w;
+	int wsize = 0;
+	int *o;
+	int osize;
+	int *tmp;
+	struct crush_rule *rule;
+	int step;
+	int i, j;
+	int numrep;
+	int firstn;
+	int rc = -1;
+
+	BUG_ON(ruleno >= map->max_rules);
+
+	rule = map->rules[ruleno];
+	result_len = 0;
+	w = a;
+	o = b;
+
+	/*
+	 * determine hierarchical context of force, if any.  note
+	 * that this may or may not correspond to the specific types
+	 * referenced by the crush rule.
+	 */
+	if (force >= 0) {
+		if (force >= map->max_devices ||
+		    map->device_parents[force] == 0) {
+			/*dprintk("CRUSH: forcefed device dne\n");*/
+			rc = -1;  /* force fed device dne */
+			goto out;
+		}
+		if (!is_out(map, weight, force, x)) {
+			while (1) {
+				force_context[++force_pos] = force;
+				if (force >= 0)
+					force = map->device_parents[force];
+				else
+					force = map->bucket_parents[-1-force];
+				if (force == 0)
+					break;
+			}
+		}
+	}
+
+	for (step = 0; step < rule->len; step++) {
+		firstn = 0;
+		switch (rule->steps[step].op) {
+		case CRUSH_RULE_TAKE:
+			w[0] = rule->steps[step].arg1;
+			if (force_pos >= 0) {
+				BUG_ON(force_context[force_pos] != w[0]);
+				force_pos--;
+			}
+			wsize = 1;
+			break;
+
+		case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
+		case CRUSH_RULE_CHOOSE_FIRSTN:
+			firstn = 1;
+		case CRUSH_RULE_CHOOSE_LEAF_INDEP:
+		case CRUSH_RULE_CHOOSE_INDEP:
+			BUG_ON(wsize == 0);
+
+			recurse_to_leaf =
+				rule->steps[step].op ==
+				 CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
+				rule->steps[step].op ==
+				CRUSH_RULE_CHOOSE_LEAF_INDEP;
+
+			/* reset output */
+			osize = 0;
+
+			for (i = 0; i < wsize; i++) {
+				/*
+				 * see CRUSH_N, CRUSH_N_MINUS macros.
+				 * basically, numrep <= 0 means relative to
+				 * the provided result_max
+				 */
+				numrep = rule->steps[step].arg1;
+				if (numrep <= 0) {
+					numrep += result_max;
+					if (numrep <= 0)
+						continue;
+				}
+				j = 0;
+				if (osize == 0 && force_pos >= 0) {
+					/* skip any intermediate types */
+					while (force_pos &&
+					       force_context[force_pos] < 0 &&
+					       rule->steps[step].arg2 !=
+					       map->buckets[-1 -
+					       force_context[force_pos]]->type)
+						force_pos--;
+					o[osize] = force_context[force_pos];
+					if (recurse_to_leaf)
+						c[osize] = force_context[0];
+					j++;
+					force_pos--;
+				}
+				osize += crush_choose(map,
+						      map->buckets[-1-w[i]],
+						      weight,
+						      x, numrep,
+						      rule->steps[step].arg2,
+						      o+osize, j,
+						      firstn,
+						      recurse_to_leaf, c+osize);
+			}
+
+			if (recurse_to_leaf)
+				/* copy final _leaf_ values to output set */
+				memcpy(o, c, osize*sizeof(*o));
+
+			/* swap t and w arrays */
+			tmp = o;
+			o = w;
+			w = tmp;
+			wsize = osize;
+			break;
+
+
+		case CRUSH_RULE_EMIT:
+			for (i = 0; i < wsize && result_len < result_max; i++) {
+				result[result_len] = w[i];
+				result_len++;
+			}
+			wsize = 0;
+			break;
+
+		default:
+			BUG_ON(1);
+		}
+	}
+	rc = result_len;
+
+out:
+	return rc;
+}
+
+
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
new file mode 100644
index 000000000000..7b505b0c983f
--- /dev/null
+++ b/net/ceph/crypto.c
@@ -0,0 +1,412 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <crypto/hash.h>
+
+#include <linux/ceph/decode.h>
+#include "crypto.h"
+
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
+{
+	if (*p + sizeof(u16) + sizeof(key->created) +
+	    sizeof(u16) + key->len > end)
+		return -ERANGE;
+	ceph_encode_16(p, key->type);
+	ceph_encode_copy(p, &key->created, sizeof(key->created));
+	ceph_encode_16(p, key->len);
+	ceph_encode_copy(p, key->key, key->len);
+	return 0;
+}
+
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
+{
+	ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
+	key->type = ceph_decode_16(p);
+	ceph_decode_copy(p, &key->created, sizeof(key->created));
+	key->len = ceph_decode_16(p);
+	ceph_decode_need(p, end, key->len, bad);
+	key->key = kmalloc(key->len, GFP_NOFS);
+	if (!key->key)
+		return -ENOMEM;
+	ceph_decode_copy(p, key->key, key->len);
+	return 0;
+
+bad:
+	dout("failed to decode crypto key\n");
+	return -EINVAL;
+}
+
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
+{
+	int inlen = strlen(inkey);
+	int blen = inlen * 3 / 4;
+	void *buf, *p;
+	int ret;
+
+	dout("crypto_key_unarmor %s\n", inkey);
+	buf = kmalloc(blen, GFP_NOFS);
+	if (!buf)
+		return -ENOMEM;
+	blen = ceph_unarmor(buf, inkey, inkey+inlen);
+	if (blen < 0) {
+		kfree(buf);
+		return blen;
+	}
+
+	p = buf;
+	ret = ceph_crypto_key_decode(key, &p, p + blen);
+	kfree(buf);
+	if (ret)
+		return ret;
+	dout("crypto_key_unarmor key %p type %d len %d\n", key,
+	     key->type, key->len);
+	return 0;
+}
+
+
+
+#define AES_KEY_SIZE 16
+
+static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
+{
+	return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+}
+
+static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
+
+static int ceph_aes_encrypt(const void *key, int key_len,
+			    void *dst, size_t *dst_len,
+			    const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[2], sg_out[1];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+	int ret;
+	void *iv;
+	int ivsize;
+	size_t zero_padding = (0x10 - (src_len & 0x0f));
+	char pad[16];
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	memset(pad, zero_padding, zero_padding);
+
+	*dst_len = src_len + zero_padding;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 2);
+	sg_set_buf(&sg_in[0], src, src_len);
+	sg_set_buf(&sg_in[1], pad, zero_padding);
+	sg_init_table(sg_out, 1);
+	sg_set_buf(sg_out, dst, *dst_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+	/*
+	print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
+			src, src_len, 1);
+	print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
+			pad, zero_padding, 1);
+	*/
+	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+				     src_len + zero_padding);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0)
+		pr_err("ceph_aes_crypt failed %d\n", ret);
+	/*
+	print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
+			     size_t *dst_len,
+			     const void *src1, size_t src1_len,
+			     const void *src2, size_t src2_len)
+{
+	struct scatterlist sg_in[3], sg_out[1];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+	int ret;
+	void *iv;
+	int ivsize;
+	size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
+	char pad[16];
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	memset(pad, zero_padding, zero_padding);
+
+	*dst_len = src1_len + src2_len + zero_padding;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 3);
+	sg_set_buf(&sg_in[0], src1, src1_len);
+	sg_set_buf(&sg_in[1], src2, src2_len);
+	sg_set_buf(&sg_in[2], pad, zero_padding);
+	sg_init_table(sg_out, 1);
+	sg_set_buf(sg_out, dst, *dst_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+	/*
+	print_hex_dump(KERN_ERR, "enc  key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
+			src1, src1_len, 1);
+	print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
+			src2, src2_len, 1);
+	print_hex_dump(KERN_ERR, "enc  pad: ", DUMP_PREFIX_NONE, 16, 1,
+			pad, zero_padding, 1);
+	*/
+	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+				     src1_len + src2_len + zero_padding);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0)
+		pr_err("ceph_aes_crypt2 failed %d\n", ret);
+	/*
+	print_hex_dump(KERN_ERR, "enc  out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_decrypt(const void *key, int key_len,
+			    void *dst, size_t *dst_len,
+			    const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[1], sg_out[2];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm };
+	char pad[16];
+	void *iv;
+	int ivsize;
+	int ret;
+	int last_byte;
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 1);
+	sg_init_table(sg_out, 2);
+	sg_set_buf(sg_in, src, src_len);
+	sg_set_buf(&sg_out[0], dst, *dst_len);
+	sg_set_buf(&sg_out[1], pad, sizeof(pad));
+
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+
+	/*
+	print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "dec  in: ", DUMP_PREFIX_NONE, 16, 1,
+		       src, src_len, 1);
+	*/
+
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0) {
+		pr_err("ceph_aes_decrypt failed %d\n", ret);
+		return ret;
+	}
+
+	if (src_len <= *dst_len)
+		last_byte = ((char *)dst)[src_len - 1];
+	else
+		last_byte = pad[src_len - *dst_len - 1];
+	if (last_byte <= 16 && src_len >= last_byte) {
+		*dst_len = src_len - last_byte;
+	} else {
+		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+		       last_byte, (int)src_len);
+		return -EPERM;  /* bad padding */
+	}
+	/*
+	print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_decrypt2(const void *key, int key_len,
+			     void *dst1, size_t *dst1_len,
+			     void *dst2, size_t *dst2_len,
+			     const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[1], sg_out[3];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm };
+	char pad[16];
+	void *iv;
+	int ivsize;
+	int ret;
+	int last_byte;
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	sg_init_table(sg_in, 1);
+	sg_set_buf(sg_in, src, src_len);
+	sg_init_table(sg_out, 3);
+	sg_set_buf(&sg_out[0], dst1, *dst1_len);
+	sg_set_buf(&sg_out[1], dst2, *dst2_len);
+	sg_set_buf(&sg_out[2], pad, sizeof(pad));
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+
+	/*
+	print_hex_dump(KERN_ERR, "dec  key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "dec   in: ", DUMP_PREFIX_NONE, 16, 1,
+		       src, src_len, 1);
+	*/
+
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0) {
+		pr_err("ceph_aes_decrypt failed %d\n", ret);
+		return ret;
+	}
+
+	if (src_len <= *dst1_len)
+		last_byte = ((char *)dst1)[src_len - 1];
+	else if (src_len <= *dst1_len + *dst2_len)
+		last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
+	else
+		last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
+	if (last_byte <= 16 && src_len >= last_byte) {
+		src_len -= last_byte;
+	} else {
+		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+		       last_byte, (int)src_len);
+		return -EPERM;  /* bad padding */
+	}
+
+	if (src_len < *dst1_len) {
+		*dst1_len = src_len;
+		*dst2_len = 0;
+	} else {
+		*dst2_len = src_len - *dst1_len;
+	}
+	/*
+	print_hex_dump(KERN_ERR, "dec  out1: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst1, *dst1_len, 1);
+	print_hex_dump(KERN_ERR, "dec  out2: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst2, *dst2_len, 1);
+	*/
+
+	return 0;
+}
+
+
+int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		 const void *src, size_t src_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src_len)
+			return -ERANGE;
+		memcpy(dst, src, src_len);
+		*dst_len = src_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_decrypt(secret->key, secret->len, dst,
+					dst_len, src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+			void *dst1, size_t *dst1_len,
+			void *dst2, size_t *dst2_len,
+			const void *src, size_t src_len)
+{
+	size_t t;
+
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst1_len + *dst2_len < src_len)
+			return -ERANGE;
+		t = min(*dst1_len, src_len);
+		memcpy(dst1, src, t);
+		*dst1_len = t;
+		src += t;
+		src_len -= t;
+		if (src_len) {
+			t = min(*dst2_len, src_len);
+			memcpy(dst2, src, t);
+			*dst2_len = t;
+		}
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_decrypt2(secret->key, secret->len,
+					 dst1, dst1_len, dst2, dst2_len,
+					 src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		 const void *src, size_t src_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src_len)
+			return -ERANGE;
+		memcpy(dst, src, src_len);
+		*dst_len = src_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_encrypt(secret->key, secret->len, dst,
+					dst_len, src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		  const void *src1, size_t src1_len,
+		  const void *src2, size_t src2_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src1_len + src2_len)
+			return -ERANGE;
+		memcpy(dst, src1, src1_len);
+		memcpy(dst + src1_len, src2, src2_len);
+		*dst_len = src1_len + src2_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
+					 src1, src1_len, src2, src2_len);
+
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
new file mode 100644
index 000000000000..f9eccace592b
--- /dev/null
+++ b/net/ceph/crypto.h
@@ -0,0 +1,48 @@
+#ifndef _FS_CEPH_CRYPTO_H
+#define _FS_CEPH_CRYPTO_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * cryptographic secret
+ */
+struct ceph_crypto_key {
+	int type;
+	struct ceph_timespec created;
+	int len;
+	void *key;
+};
+
+static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
+{
+	kfree(key->key);
+}
+
+extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
+				  void **p, void *end);
+extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
+				  void **p, void *end);
+extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
+
+/* crypto.c */
+extern int ceph_decrypt(struct ceph_crypto_key *secret,
+			void *dst, size_t *dst_len,
+			const void *src, size_t src_len);
+extern int ceph_encrypt(struct ceph_crypto_key *secret,
+			void *dst, size_t *dst_len,
+			const void *src, size_t src_len);
+extern int ceph_decrypt2(struct ceph_crypto_key *secret,
+			void *dst1, size_t *dst1_len,
+			void *dst2, size_t *dst2_len,
+			const void *src, size_t src_len);
+extern int ceph_encrypt2(struct ceph_crypto_key *secret,
+			 void *dst, size_t *dst_len,
+			 const void *src1, size_t src1_len,
+			 const void *src2, size_t src2_len);
+
+/* armor.c */
+extern int ceph_armor(char *dst, const char *src, const char *end);
+extern int ceph_unarmor(char *dst, const char *src, const char *end);
+
+#endif
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
new file mode 100644
index 000000000000..33d04999f4f2
--- /dev/null
+++ b/net/ceph/debugfs.c
@@ -0,0 +1,268 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Implement /sys/kernel/debug/ceph fun
+ *
+ * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
+ *      .../osdmap      - current osdmap
+ *      .../monmap      - current monmap
+ *      .../osdc        - active osd requests
+ *      .../monc        - mon client state
+ *      .../dentry_lru  - dump contents of dentry lru
+ *      .../caps        - expose cap (reservation) stats
+ *      .../bdi         - symlink to ../../bdi/something
+ */
+
+static struct dentry *ceph_debugfs_dir;
+
+static int monmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+
+	if (client->monc.monmap == NULL)
+		return 0;
+
+	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
+	for (i = 0; i < client->monc.monmap->num_mon; i++) {
+		struct ceph_entity_inst *inst =
+			&client->monc.monmap->mon_inst[i];
+
+		seq_printf(s, "\t%s%lld\t%s\n",
+			   ENTITY_NAME(inst->name),
+			   ceph_pr_addr(&inst->addr.in_addr));
+	}
+	return 0;
+}
+
+static int osdmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+	struct rb_node *n;
+
+	if (client->osdc.osdmap == NULL)
+		return 0;
+	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
+	seq_printf(s, "flags%s%s\n",
+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
+		   " NEARFULL" : "",
+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
+		   " FULL" : "");
+	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
+		struct ceph_pg_pool_info *pool =
+			rb_entry(n, struct ceph_pg_pool_info, node);
+		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
+			   pool->id, pool->v.pg_num, pool->pg_num_mask,
+			   pool->v.lpg_num, pool->lpg_num_mask);
+	}
+	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
+		struct ceph_entity_addr *addr =
+			&client->osdc.osdmap->osd_addr[i];
+		int state = client->osdc.osdmap->osd_state[i];
+		char sb[64];
+
+		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
+			   i, ceph_pr_addr(&addr->in_addr),
+			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
+			   ceph_osdmap_state_str(sb, sizeof(sb), state));
+	}
+	return 0;
+}
+
+static int monc_show(struct seq_file *s, void *p)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_client *monc = &client->monc;
+	struct rb_node *rp;
+
+	mutex_lock(&monc->mutex);
+
+	if (monc->have_mdsmap)
+		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
+	if (monc->have_osdmap)
+		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
+	if (monc->want_next_osdmap)
+		seq_printf(s, "want next osdmap\n");
+
+	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+		__u16 op;
+		req = rb_entry(rp, struct ceph_mon_generic_request, node);
+		op = le16_to_cpu(req->request->hdr.type);
+		if (op == CEPH_MSG_STATFS)
+			seq_printf(s, "%lld statfs\n", req->tid);
+		else
+			seq_printf(s, "%lld unknown\n", req->tid);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+
+static int osdc_show(struct seq_file *s, void *pp)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_osd_client *osdc = &client->osdc;
+	struct rb_node *p;
+
+	mutex_lock(&osdc->request_mutex);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		struct ceph_osd_request *req;
+		struct ceph_osd_request_head *head;
+		struct ceph_osd_op *op;
+		int num_ops;
+		int opcode, olen;
+		int i;
+
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+
+		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+			   req->r_osd ? req->r_osd->o_osd : -1,
+			   le32_to_cpu(req->r_pgid.pool),
+			   le16_to_cpu(req->r_pgid.ps));
+
+		head = req->r_request->front.iov_base;
+		op = (void *)(head + 1);
+
+		num_ops = le16_to_cpu(head->num_ops);
+		olen = le32_to_cpu(head->object_len);
+		seq_printf(s, "%.*s", olen,
+			   (const char *)(head->ops + num_ops));
+
+		if (req->r_reassert_version.epoch)
+			seq_printf(s, "\t%u'%llu",
+			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
+			   le64_to_cpu(req->r_reassert_version.version));
+		else
+			seq_printf(s, "\t");
+
+		for (i = 0; i < num_ops; i++) {
+			opcode = le16_to_cpu(op->op);
+			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
+			op++;
+		}
+
+		seq_printf(s, "\n");
+	}
+	mutex_unlock(&osdc->request_mutex);
+	return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(monmap_show)
+CEPH_DEFINE_SHOW_FUNC(osdmap_show)
+CEPH_DEFINE_SHOW_FUNC(monc_show)
+CEPH_DEFINE_SHOW_FUNC(osdc_show)
+
+int ceph_debugfs_init(void)
+{
+	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
+	if (!ceph_debugfs_dir)
+		return -ENOMEM;
+	return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+	debugfs_remove(ceph_debugfs_dir);
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+	int ret = -ENOMEM;
+	char name[80];
+
+	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+		 client->monc.auth->global_id);
+
+	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+	if (!client->debugfs_dir)
+		goto out;
+
+	client->monc.debugfs_file = debugfs_create_file("monc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &monc_show_fops);
+	if (!client->monc.debugfs_file)
+		goto out;
+
+	client->osdc.debugfs_file = debugfs_create_file("osdc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &osdc_show_fops);
+	if (!client->osdc.debugfs_file)
+		goto out;
+
+	client->debugfs_monmap = debugfs_create_file("monmap",
+					0600,
+					client->debugfs_dir,
+					client,
+					&monmap_show_fops);
+	if (!client->debugfs_monmap)
+		goto out;
+
+	client->debugfs_osdmap = debugfs_create_file("osdmap",
+					0600,
+					client->debugfs_dir,
+					client,
+					&osdmap_show_fops);
+	if (!client->debugfs_osdmap)
+		goto out;
+
+	return 0;
+
+out:
+	ceph_debugfs_client_cleanup(client);
+	return ret;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+	debugfs_remove(client->debugfs_osdmap);
+	debugfs_remove(client->debugfs_monmap);
+	debugfs_remove(client->osdc.debugfs_file);
+	debugfs_remove(client->monc.debugfs_file);
+	debugfs_remove(client->debugfs_dir);
+}
+
+#else  /* CONFIG_DEBUG_FS */
+
+int ceph_debugfs_init(void)
+{
+	return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client,
+			     int (*module_debugfs_init)(struct ceph_client *))
+{
+	return 0;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+}
+
+#endif  /* CONFIG_DEBUG_FS */
+
+EXPORT_SYMBOL(ceph_debugfs_init);
+EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
new file mode 100644
index 000000000000..0e8157ee5d43
--- /dev/null
+++ b/net/ceph/messenger.c
@@ -0,0 +1,2453 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/crc32c.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/inet.h>
+#include <linux/kthread.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <net/tcp.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+
+/*
+ * Ceph uses the messenger to exchange ceph_msg messages with other
+ * hosts in the system.  The messenger provides ordered and reliable
+ * delivery.  We tolerate TCP disconnects by reconnecting (with
+ * exponential backoff) in the case of a fault (disconnection, bad
+ * crc, protocol error).  Acks allow sent messages to be discarded by
+ * the sender.
+ */
+
+/* static tag bytes (protocol control messages) */
+static char tag_msg = CEPH_MSGR_TAG_MSG;
+static char tag_ack = CEPH_MSGR_TAG_ACK;
+static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
+
+static void queue_con(struct ceph_connection *con);
+static void con_work(struct work_struct *);
+static void ceph_fault(struct ceph_connection *con);
+
+/*
+ * nicely render a sockaddr as a string.
+ */
+#define MAX_ADDR_STR 20
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
+static DEFINE_SPINLOCK(addr_str_lock);
+static int last_addr_str;
+
+const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+{
+	int i;
+	char *s;
+	struct sockaddr_in *in4 = (void *)ss;
+	struct sockaddr_in6 *in6 = (void *)ss;
+
+	spin_lock(&addr_str_lock);
+	i = last_addr_str++;
+	if (last_addr_str == MAX_ADDR_STR)
+		last_addr_str = 0;
+	spin_unlock(&addr_str_lock);
+	s = addr_str[i];
+
+	switch (ss->ss_family) {
+	case AF_INET:
+		snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+			 (unsigned int)ntohs(in4->sin_port));
+		break;
+
+	case AF_INET6:
+		snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+			 (unsigned int)ntohs(in6->sin6_port));
+		break;
+
+	default:
+		sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family);
+	}
+
+	return s;
+}
+EXPORT_SYMBOL(ceph_pr_addr);
+
+static void encode_my_addr(struct ceph_messenger *msgr)
+{
+	memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
+	ceph_encode_addr(&msgr->my_enc_addr);
+}
+
+/*
+ * work queue for all reading and writing to/from the socket.
+ */
+struct workqueue_struct *ceph_msgr_wq;
+
+int ceph_msgr_init(void)
+{
+	ceph_msgr_wq = create_workqueue("ceph-msgr");
+	if (IS_ERR(ceph_msgr_wq)) {
+		int ret = PTR_ERR(ceph_msgr_wq);
+		pr_err("msgr_init failed to create workqueue: %d\n", ret);
+		ceph_msgr_wq = NULL;
+		return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_msgr_init);
+
+void ceph_msgr_exit(void)
+{
+	destroy_workqueue(ceph_msgr_wq);
+}
+EXPORT_SYMBOL(ceph_msgr_exit);
+
+void ceph_msgr_flush(void)
+{
+	flush_workqueue(ceph_msgr_wq);
+}
+EXPORT_SYMBOL(ceph_msgr_flush);
+
+
+/*
+ * socket callback functions
+ */
+
+/* data available on socket, or listen socket received a connect */
+static void ceph_data_ready(struct sock *sk, int count_unused)
+{
+	struct ceph_connection *con =
+		(struct ceph_connection *)sk->sk_user_data;
+	if (sk->sk_state != TCP_CLOSE_WAIT) {
+		dout("ceph_data_ready on %p state = %lu, queueing work\n",
+		     con, con->state);
+		queue_con(con);
+	}
+}
+
+/* socket has buffer space for writing */
+static void ceph_write_space(struct sock *sk)
+{
+	struct ceph_connection *con =
+		(struct ceph_connection *)sk->sk_user_data;
+
+	/* only queue to workqueue if there is data we want to write. */
+	if (test_bit(WRITE_PENDING, &con->state)) {
+		dout("ceph_write_space %p queueing write work\n", con);
+		queue_con(con);
+	} else {
+		dout("ceph_write_space %p nothing to write\n", con);
+	}
+
+	/* since we have our own write_space, clear the SOCK_NOSPACE flag */
+	clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+}
+
+/* socket's state has changed */
+static void ceph_state_change(struct sock *sk)
+{
+	struct ceph_connection *con =
+		(struct ceph_connection *)sk->sk_user_data;
+
+	dout("ceph_state_change %p state = %lu sk_state = %u\n",
+	     con, con->state, sk->sk_state);
+
+	if (test_bit(CLOSED, &con->state))
+		return;
+
+	switch (sk->sk_state) {
+	case TCP_CLOSE:
+		dout("ceph_state_change TCP_CLOSE\n");
+	case TCP_CLOSE_WAIT:
+		dout("ceph_state_change TCP_CLOSE_WAIT\n");
+		if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
+			if (test_bit(CONNECTING, &con->state))
+				con->error_msg = "connection failed";
+			else
+				con->error_msg = "socket closed";
+			queue_con(con);
+		}
+		break;
+	case TCP_ESTABLISHED:
+		dout("ceph_state_change TCP_ESTABLISHED\n");
+		queue_con(con);
+		break;
+	}
+}
+
+/*
+ * set up socket callbacks
+ */
+static void set_sock_callbacks(struct socket *sock,
+			       struct ceph_connection *con)
+{
+	struct sock *sk = sock->sk;
+	sk->sk_user_data = (void *)con;
+	sk->sk_data_ready = ceph_data_ready;
+	sk->sk_write_space = ceph_write_space;
+	sk->sk_state_change = ceph_state_change;
+}
+
+
+/*
+ * socket helpers
+ */
+
+/*
+ * initiate connection to a remote socket.
+ */
+static struct socket *ceph_tcp_connect(struct ceph_connection *con)
+{
+	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+	struct socket *sock;
+	int ret;
+
+	BUG_ON(con->sock);
+	ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+			       IPPROTO_TCP, &sock);
+	if (ret)
+		return ERR_PTR(ret);
+	con->sock = sock;
+	sock->sk->sk_allocation = GFP_NOFS;
+
+#ifdef CONFIG_LOCKDEP
+	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
+	set_sock_callbacks(sock, con);
+
+	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+
+	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+				 O_NONBLOCK);
+	if (ret == -EINPROGRESS) {
+		dout("connect %s EINPROGRESS sk_state = %u\n",
+		     ceph_pr_addr(&con->peer_addr.in_addr),
+		     sock->sk->sk_state);
+		ret = 0;
+	}
+	if (ret < 0) {
+		pr_err("connect %s error %d\n",
+		       ceph_pr_addr(&con->peer_addr.in_addr), ret);
+		sock_release(sock);
+		con->sock = NULL;
+		con->error_msg = "connect error";
+	}
+
+	if (ret < 0)
+		return ERR_PTR(ret);
+	return sock;
+}
+
+static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
+{
+	struct kvec iov = {buf, len};
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+
+	return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+}
+
+/*
+ * write something.  @more is true if caller will be sending more data
+ * shortly.
+ */
+static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
+		     size_t kvlen, size_t len, int more)
+{
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+
+	if (more)
+		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
+
+	return kernel_sendmsg(sock, &msg, iov, kvlen, len);
+}
+
+
+/*
+ * Shutdown/close the socket for the given connection.
+ */
+static int con_close_socket(struct ceph_connection *con)
+{
+	int rc;
+
+	dout("con_close_socket on %p sock %p\n", con, con->sock);
+	if (!con->sock)
+		return 0;
+	set_bit(SOCK_CLOSED, &con->state);
+	rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+	sock_release(con->sock);
+	con->sock = NULL;
+	clear_bit(SOCK_CLOSED, &con->state);
+	return rc;
+}
+
+/*
+ * Reset a connection.  Discard all incoming and outgoing messages
+ * and clear *_seq state.
+ */
+static void ceph_msg_remove(struct ceph_msg *msg)
+{
+	list_del_init(&msg->list_head);
+	ceph_msg_put(msg);
+}
+static void ceph_msg_remove_list(struct list_head *head)
+{
+	while (!list_empty(head)) {
+		struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
+							list_head);
+		ceph_msg_remove(msg);
+	}
+}
+
+static void reset_connection(struct ceph_connection *con)
+{
+	/* reset connection, out_queue, msg_ and connect_seq */
+	/* discard existing out_queue and msg_seq */
+	ceph_msg_remove_list(&con->out_queue);
+	ceph_msg_remove_list(&con->out_sent);
+
+	if (con->in_msg) {
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+	}
+
+	con->connect_seq = 0;
+	con->out_seq = 0;
+	if (con->out_msg) {
+		ceph_msg_put(con->out_msg);
+		con->out_msg = NULL;
+	}
+	con->out_keepalive_pending = false;
+	con->in_seq = 0;
+	con->in_seq_acked = 0;
+}
+
+/*
+ * mark a peer down.  drop any open connections.
+ */
+void ceph_con_close(struct ceph_connection *con)
+{
+	dout("con_close %p peer %s\n", con,
+	     ceph_pr_addr(&con->peer_addr.in_addr));
+	set_bit(CLOSED, &con->state);  /* in case there's queued work */
+	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
+	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
+	clear_bit(KEEPALIVE_PENDING, &con->state);
+	clear_bit(WRITE_PENDING, &con->state);
+	mutex_lock(&con->mutex);
+	reset_connection(con);
+	con->peer_global_seq = 0;
+	cancel_delayed_work(&con->work);
+	mutex_unlock(&con->mutex);
+	queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_close);
+
+/*
+ * Reopen a closed connection, with a new peer address.
+ */
+void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
+{
+	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+	set_bit(OPENING, &con->state);
+	clear_bit(CLOSED, &con->state);
+	memcpy(&con->peer_addr, addr, sizeof(*addr));
+	con->delay = 0;      /* reset backoff memory */
+	queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_open);
+
+/*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+	return con->connect_seq > 0;
+}
+
+/*
+ * generic get/put
+ */
+struct ceph_connection *ceph_con_get(struct ceph_connection *con)
+{
+	dout("con_get %p nref = %d -> %d\n", con,
+	     atomic_read(&con->nref), atomic_read(&con->nref) + 1);
+	if (atomic_inc_not_zero(&con->nref))
+		return con;
+	return NULL;
+}
+
+void ceph_con_put(struct ceph_connection *con)
+{
+	dout("con_put %p nref = %d -> %d\n", con,
+	     atomic_read(&con->nref), atomic_read(&con->nref) - 1);
+	BUG_ON(atomic_read(&con->nref) == 0);
+	if (atomic_dec_and_test(&con->nref)) {
+		BUG_ON(con->sock);
+		kfree(con);
+	}
+}
+
+/*
+ * initialize a new connection.
+ */
+void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
+{
+	dout("con_init %p\n", con);
+	memset(con, 0, sizeof(*con));
+	atomic_set(&con->nref, 1);
+	con->msgr = msgr;
+	mutex_init(&con->mutex);
+	INIT_LIST_HEAD(&con->out_queue);
+	INIT_LIST_HEAD(&con->out_sent);
+	INIT_DELAYED_WORK(&con->work, con_work);
+}
+EXPORT_SYMBOL(ceph_con_init);
+
+
+/*
+ * We maintain a global counter to order connection attempts.  Get
+ * a unique seq greater than @gt.
+ */
+static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
+{
+	u32 ret;
+
+	spin_lock(&msgr->global_seq_lock);
+	if (msgr->global_seq < gt)
+		msgr->global_seq = gt;
+	ret = ++msgr->global_seq;
+	spin_unlock(&msgr->global_seq_lock);
+	return ret;
+}
+
+
+/*
+ * Prepare footer for currently outgoing message, and finish things
+ * off.  Assumes out_kvec* are already valid.. we just add on to the end.
+ */
+static void prepare_write_message_footer(struct ceph_connection *con, int v)
+{
+	struct ceph_msg *m = con->out_msg;
+
+	dout("prepare_write_message_footer %p\n", con);
+	con->out_kvec_is_msg = true;
+	con->out_kvec[v].iov_base = &m->footer;
+	con->out_kvec[v].iov_len = sizeof(m->footer);
+	con->out_kvec_bytes += sizeof(m->footer);
+	con->out_kvec_left++;
+	con->out_more = m->more_to_follow;
+	con->out_msg_done = true;
+}
+
+/*
+ * Prepare headers for the next outgoing message.
+ */
+static void prepare_write_message(struct ceph_connection *con)
+{
+	struct ceph_msg *m;
+	int v = 0;
+
+	con->out_kvec_bytes = 0;
+	con->out_kvec_is_msg = true;
+	con->out_msg_done = false;
+
+	/* Sneak an ack in there first?  If we can get it into the same
+	 * TCP packet that's a good thing. */
+	if (con->in_seq > con->in_seq_acked) {
+		con->in_seq_acked = con->in_seq;
+		con->out_kvec[v].iov_base = &tag_ack;
+		con->out_kvec[v++].iov_len = 1;
+		con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+		con->out_kvec[v].iov_base = &con->out_temp_ack;
+		con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack);
+		con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
+	}
+
+	m = list_first_entry(&con->out_queue,
+		       struct ceph_msg, list_head);
+	con->out_msg = m;
+	if (test_bit(LOSSYTX, &con->state)) {
+		list_del_init(&m->list_head);
+	} else {
+		/* put message on sent list */
+		ceph_msg_get(m);
+		list_move_tail(&m->list_head, &con->out_sent);
+	}
+
+	/*
+	 * only assign outgoing seq # if we haven't sent this message
+	 * yet.  if it is requeued, resend with it's original seq.
+	 */
+	if (m->needs_out_seq) {
+		m->hdr.seq = cpu_to_le64(++con->out_seq);
+		m->needs_out_seq = false;
+	}
+
+	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
+	     m, con->out_seq, le16_to_cpu(m->hdr.type),
+	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
+	     le32_to_cpu(m->hdr.data_len),
+	     m->nr_pages);
+	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+
+	/* tag + hdr + front + middle */
+	con->out_kvec[v].iov_base = &tag_msg;
+	con->out_kvec[v++].iov_len = 1;
+	con->out_kvec[v].iov_base = &m->hdr;
+	con->out_kvec[v++].iov_len = sizeof(m->hdr);
+	con->out_kvec[v++] = m->front;
+	if (m->middle)
+		con->out_kvec[v++] = m->middle->vec;
+	con->out_kvec_left = v;
+	con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len +
+		(m->middle ? m->middle->vec.iov_len : 0);
+	con->out_kvec_cur = con->out_kvec;
+
+	/* fill in crc (except data pages), footer */
+	con->out_msg->hdr.crc =
+		cpu_to_le32(crc32c(0, (void *)&m->hdr,
+				      sizeof(m->hdr) - sizeof(m->hdr.crc)));
+	con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
+	con->out_msg->footer.front_crc =
+		cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len));
+	if (m->middle)
+		con->out_msg->footer.middle_crc =
+			cpu_to_le32(crc32c(0, m->middle->vec.iov_base,
+					   m->middle->vec.iov_len));
+	else
+		con->out_msg->footer.middle_crc = 0;
+	con->out_msg->footer.data_crc = 0;
+	dout("prepare_write_message front_crc %u data_crc %u\n",
+	     le32_to_cpu(con->out_msg->footer.front_crc),
+	     le32_to_cpu(con->out_msg->footer.middle_crc));
+
+	/* is there a data payload? */
+	if (le32_to_cpu(m->hdr.data_len) > 0) {
+		/* initialize page iterator */
+		con->out_msg_pos.page = 0;
+		if (m->pages)
+			con->out_msg_pos.page_pos =
+				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
+		else
+			con->out_msg_pos.page_pos = 0;
+		con->out_msg_pos.data_pos = 0;
+		con->out_msg_pos.did_page_crc = 0;
+		con->out_more = 1;  /* data + footer will follow */
+	} else {
+		/* no, queue up footer too and be done */
+		prepare_write_message_footer(con, v);
+	}
+
+	set_bit(WRITE_PENDING, &con->state);
+}
+
+/*
+ * Prepare an ack.
+ */
+static void prepare_write_ack(struct ceph_connection *con)
+{
+	dout("prepare_write_ack %p %llu -> %llu\n", con,
+	     con->in_seq_acked, con->in_seq);
+	con->in_seq_acked = con->in_seq;
+
+	con->out_kvec[0].iov_base = &tag_ack;
+	con->out_kvec[0].iov_len = 1;
+	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+	con->out_kvec[1].iov_base = &con->out_temp_ack;
+	con->out_kvec[1].iov_len = sizeof(con->out_temp_ack);
+	con->out_kvec_left = 2;
+	con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack);
+	con->out_kvec_cur = con->out_kvec;
+	con->out_more = 1;  /* more will follow.. eventually.. */
+	set_bit(WRITE_PENDING, &con->state);
+}
+
+/*
+ * Prepare to write keepalive byte.
+ */
+static void prepare_write_keepalive(struct ceph_connection *con)
+{
+	dout("prepare_write_keepalive %p\n", con);
+	con->out_kvec[0].iov_base = &tag_keepalive;
+	con->out_kvec[0].iov_len = 1;
+	con->out_kvec_left = 1;
+	con->out_kvec_bytes = 1;
+	con->out_kvec_cur = con->out_kvec;
+	set_bit(WRITE_PENDING, &con->state);
+}
+
+/*
+ * Connection negotiation.
+ */
+
+static void prepare_connect_authorizer(struct ceph_connection *con)
+{
+	void *auth_buf;
+	int auth_len = 0;
+	int auth_protocol = 0;
+
+	mutex_unlock(&con->mutex);
+	if (con->ops->get_authorizer)
+		con->ops->get_authorizer(con, &auth_buf, &auth_len,
+					 &auth_protocol, &con->auth_reply_buf,
+					 &con->auth_reply_buf_len,
+					 con->auth_retry);
+	mutex_lock(&con->mutex);
+
+	con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
+	con->out_connect.authorizer_len = cpu_to_le32(auth_len);
+
+	con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
+	con->out_kvec[con->out_kvec_left].iov_len = auth_len;
+	con->out_kvec_left++;
+	con->out_kvec_bytes += auth_len;
+}
+
+/*
+ * We connected to a peer and are saying hello.
+ */
+static void prepare_write_banner(struct ceph_messenger *msgr,
+				 struct ceph_connection *con)
+{
+	int len = strlen(CEPH_BANNER);
+
+	con->out_kvec[0].iov_base = CEPH_BANNER;
+	con->out_kvec[0].iov_len = len;
+	con->out_kvec[1].iov_base = &msgr->my_enc_addr;
+	con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr);
+	con->out_kvec_left = 2;
+	con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr);
+	con->out_kvec_cur = con->out_kvec;
+	con->out_more = 0;
+	set_bit(WRITE_PENDING, &con->state);
+}
+
+static void prepare_write_connect(struct ceph_messenger *msgr,
+				  struct ceph_connection *con,
+				  int after_banner)
+{
+	unsigned global_seq = get_global_seq(con->msgr, 0);
+	int proto;
+
+	switch (con->peer_name.type) {
+	case CEPH_ENTITY_TYPE_MON:
+		proto = CEPH_MONC_PROTOCOL;
+		break;
+	case CEPH_ENTITY_TYPE_OSD:
+		proto = CEPH_OSDC_PROTOCOL;
+		break;
+	case CEPH_ENTITY_TYPE_MDS:
+		proto = CEPH_MDSC_PROTOCOL;
+		break;
+	default:
+		BUG();
+	}
+
+	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
+	     con->connect_seq, global_seq, proto);
+
+	con->out_connect.features = cpu_to_le64(msgr->supported_features);
+	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
+	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
+	con->out_connect.global_seq = cpu_to_le32(global_seq);
+	con->out_connect.protocol_version = cpu_to_le32(proto);
+	con->out_connect.flags = 0;
+
+	if (!after_banner) {
+		con->out_kvec_left = 0;
+		con->out_kvec_bytes = 0;
+	}
+	con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect;
+	con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect);
+	con->out_kvec_left++;
+	con->out_kvec_bytes += sizeof(con->out_connect);
+	con->out_kvec_cur = con->out_kvec;
+	con->out_more = 0;
+	set_bit(WRITE_PENDING, &con->state);
+
+	prepare_connect_authorizer(con);
+}
+
+
+/*
+ * write as much of pending kvecs to the socket as we can.
+ *  1 -> done
+ *  0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_kvec(struct ceph_connection *con)
+{
+	int ret;
+
+	dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
+	while (con->out_kvec_bytes > 0) {
+		ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
+				       con->out_kvec_left, con->out_kvec_bytes,
+				       con->out_more);
+		if (ret <= 0)
+			goto out;
+		con->out_kvec_bytes -= ret;
+		if (con->out_kvec_bytes == 0)
+			break;            /* done */
+		while (ret > 0) {
+			if (ret >= con->out_kvec_cur->iov_len) {
+				ret -= con->out_kvec_cur->iov_len;
+				con->out_kvec_cur++;
+				con->out_kvec_left--;
+			} else {
+				con->out_kvec_cur->iov_len -= ret;
+				con->out_kvec_cur->iov_base += ret;
+				ret = 0;
+				break;
+			}
+		}
+	}
+	con->out_kvec_left = 0;
+	con->out_kvec_is_msg = false;
+	ret = 1;
+out:
+	dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
+	     con->out_kvec_bytes, con->out_kvec_left, ret);
+	return ret;  /* done! */
+}
+
+#ifdef CONFIG_BLOCK
+static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+{
+	if (!bio) {
+		*iter = NULL;
+		*seg = 0;
+		return;
+	}
+	*iter = bio;
+	*seg = bio->bi_idx;
+}
+
+static void iter_bio_next(struct bio **bio_iter, int *seg)
+{
+	if (*bio_iter == NULL)
+		return;
+
+	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+
+	(*seg)++;
+	if (*seg == (*bio_iter)->bi_vcnt)
+		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+}
+#endif
+
+/*
+ * Write as much message data payload as we can.  If we finish, queue
+ * up the footer.
+ *  1 -> done, footer is now queued in out_kvec[].
+ *  0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_msg_pages(struct ceph_connection *con)
+{
+	struct ceph_msg *msg = con->out_msg;
+	unsigned data_len = le32_to_cpu(msg->hdr.data_len);
+	size_t len;
+	int crc = con->msgr->nocrc;
+	int ret;
+	int total_max_write;
+	int in_trail = 0;
+	size_t trail_len = (msg->trail ? msg->trail->length : 0);
+
+	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
+	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
+	     con->out_msg_pos.page_pos);
+
+#ifdef CONFIG_BLOCK
+	if (msg->bio && !msg->bio_iter)
+		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+#endif
+
+	while (data_len > con->out_msg_pos.data_pos) {
+		struct page *page = NULL;
+		void *kaddr = NULL;
+		int max_write = PAGE_SIZE;
+		int page_shift = 0;
+
+		total_max_write = data_len - trail_len -
+			con->out_msg_pos.data_pos;
+
+		/*
+		 * if we are calculating the data crc (the default), we need
+		 * to map the page.  if our pages[] has been revoked, use the
+		 * zero page.
+		 */
+
+		/* have we reached the trail part of the data? */
+		if (con->out_msg_pos.data_pos >= data_len - trail_len) {
+			in_trail = 1;
+
+			total_max_write = data_len - con->out_msg_pos.data_pos;
+
+			page = list_first_entry(&msg->trail->head,
+						struct page, lru);
+			if (crc)
+				kaddr = kmap(page);
+			max_write = PAGE_SIZE;
+		} else if (msg->pages) {
+			page = msg->pages[con->out_msg_pos.page];
+			if (crc)
+				kaddr = kmap(page);
+		} else if (msg->pagelist) {
+			page = list_first_entry(&msg->pagelist->head,
+						struct page, lru);
+			if (crc)
+				kaddr = kmap(page);
+#ifdef CONFIG_BLOCK
+		} else if (msg->bio) {
+			struct bio_vec *bv;
+
+			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
+			page = bv->bv_page;
+			page_shift = bv->bv_offset;
+			if (crc)
+				kaddr = kmap(page) + page_shift;
+			max_write = bv->bv_len;
+#endif
+		} else {
+			page = con->msgr->zero_page;
+			if (crc)
+				kaddr = page_address(con->msgr->zero_page);
+		}
+		len = min_t(int, max_write - con->out_msg_pos.page_pos,
+			    total_max_write);
+
+		if (crc && !con->out_msg_pos.did_page_crc) {
+			void *base = kaddr + con->out_msg_pos.page_pos;
+			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
+
+			BUG_ON(kaddr == NULL);
+			con->out_msg->footer.data_crc =
+				cpu_to_le32(crc32c(tmpcrc, base, len));
+			con->out_msg_pos.did_page_crc = 1;
+		}
+		ret = kernel_sendpage(con->sock, page,
+				      con->out_msg_pos.page_pos + page_shift,
+				      len,
+				      MSG_DONTWAIT | MSG_NOSIGNAL |
+				      MSG_MORE);
+
+		if (crc &&
+		    (msg->pages || msg->pagelist || msg->bio || in_trail))
+			kunmap(page);
+
+		if (ret <= 0)
+			goto out;
+
+		con->out_msg_pos.data_pos += ret;
+		con->out_msg_pos.page_pos += ret;
+		if (ret == len) {
+			con->out_msg_pos.page_pos = 0;
+			con->out_msg_pos.page++;
+			con->out_msg_pos.did_page_crc = 0;
+			if (in_trail)
+				list_move_tail(&page->lru,
+					       &msg->trail->head);
+			else if (msg->pagelist)
+				list_move_tail(&page->lru,
+					       &msg->pagelist->head);
+#ifdef CONFIG_BLOCK
+			else if (msg->bio)
+				iter_bio_next(&msg->bio_iter, &msg->bio_seg);
+#endif
+		}
+	}
+
+	dout("write_partial_msg_pages %p msg %p done\n", con, msg);
+
+	/* prepare and queue up footer, too */
+	if (!crc)
+		con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+	con->out_kvec_bytes = 0;
+	con->out_kvec_left = 0;
+	con->out_kvec_cur = con->out_kvec;
+	prepare_write_message_footer(con, 0);
+	ret = 1;
+out:
+	return ret;
+}
+
+/*
+ * write some zeros
+ */
+static int write_partial_skip(struct ceph_connection *con)
+{
+	int ret;
+
+	while (con->out_skip > 0) {
+		struct kvec iov = {
+			.iov_base = page_address(con->msgr->zero_page),
+			.iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE)
+		};
+
+		ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1);
+		if (ret <= 0)
+			goto out;
+		con->out_skip -= ret;
+	}
+	ret = 1;
+out:
+	return ret;
+}
+
+/*
+ * Prepare to read connection handshake, or an ack.
+ */
+static void prepare_read_banner(struct ceph_connection *con)
+{
+	dout("prepare_read_banner %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_connect(struct ceph_connection *con)
+{
+	dout("prepare_read_connect %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_ack(struct ceph_connection *con)
+{
+	dout("prepare_read_ack %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_tag(struct ceph_connection *con)
+{
+	dout("prepare_read_tag %p\n", con);
+	con->in_base_pos = 0;
+	con->in_tag = CEPH_MSGR_TAG_READY;
+}
+
+/*
+ * Prepare to read a message.
+ */
+static int prepare_read_message(struct ceph_connection *con)
+{
+	dout("prepare_read_message %p\n", con);
+	BUG_ON(con->in_msg != NULL);
+	con->in_base_pos = 0;
+	con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
+	return 0;
+}
+
+
+static int read_partial(struct ceph_connection *con,
+			int *to, int size, void *object)
+{
+	*to += size;
+	while (con->in_base_pos < *to) {
+		int left = *to - con->in_base_pos;
+		int have = size - left;
+		int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
+		if (ret <= 0)
+			return ret;
+		con->in_base_pos += ret;
+	}
+	return 1;
+}
+
+
+/*
+ * Read all or part of the connect-side handshake on a new connection
+ */
+static int read_partial_banner(struct ceph_connection *con)
+{
+	int ret, to = 0;
+
+	dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
+
+	/* peer's banner */
+	ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
+	if (ret <= 0)
+		goto out;
+	ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
+			   &con->actual_peer_addr);
+	if (ret <= 0)
+		goto out;
+	ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
+			   &con->peer_addr_for_me);
+	if (ret <= 0)
+		goto out;
+out:
+	return ret;
+}
+
+static int read_partial_connect(struct ceph_connection *con)
+{
+	int ret, to = 0;
+
+	dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
+
+	ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
+	if (ret <= 0)
+		goto out;
+	ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
+			   con->auth_reply_buf);
+	if (ret <= 0)
+		goto out;
+
+	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+	     con, (int)con->in_reply.tag,
+	     le32_to_cpu(con->in_reply.connect_seq),
+	     le32_to_cpu(con->in_reply.global_seq));
+out:
+	return ret;
+
+}
+
+/*
+ * Verify the hello banner looks okay.
+ */
+static int verify_hello(struct ceph_connection *con)
+{
+	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
+		pr_err("connect to %s got bad banner\n",
+		       ceph_pr_addr(&con->peer_addr.in_addr));
+		con->error_msg = "protocol error, bad banner";
+		return -1;
+	}
+	return 0;
+}
+
+static bool addr_is_blank(struct sockaddr_storage *ss)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+	case AF_INET6:
+		return
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+	}
+	return false;
+}
+
+static int addr_port(struct sockaddr_storage *ss)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		return ntohs(((struct sockaddr_in *)ss)->sin_port);
+	case AF_INET6:
+		return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+	}
+	return 0;
+}
+
+static void addr_set_port(struct sockaddr_storage *ss, int p)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		((struct sockaddr_in *)ss)->sin_port = htons(p);
+	case AF_INET6:
+		((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+	}
+}
+
+/*
+ * Parse an ip[:port] list into an addr array.  Use the default
+ * monitor port if a port isn't specified.
+ */
+int ceph_parse_ips(const char *c, const char *end,
+		   struct ceph_entity_addr *addr,
+		   int max_count, int *count)
+{
+	int i;
+	const char *p = c;
+
+	dout("parse_ips on '%.*s'\n", (int)(end-c), c);
+	for (i = 0; i < max_count; i++) {
+		const char *ipend;
+		struct sockaddr_storage *ss = &addr[i].in_addr;
+		struct sockaddr_in *in4 = (void *)ss;
+		struct sockaddr_in6 *in6 = (void *)ss;
+		int port;
+		char delim = ',';
+
+		if (*p == '[') {
+			delim = ']';
+			p++;
+		}
+
+		memset(ss, 0, sizeof(*ss));
+		if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
+			     delim, &ipend))
+			ss->ss_family = AF_INET;
+		else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+				  delim, &ipend))
+			ss->ss_family = AF_INET6;
+		else
+			goto bad;
+		p = ipend;
+
+		if (delim == ']') {
+			if (*p != ']') {
+				dout("missing matching ']'\n");
+				goto bad;
+			}
+			p++;
+		}
+
+		/* port? */
+		if (p < end && *p == ':') {
+			port = 0;
+			p++;
+			while (p < end && *p >= '0' && *p <= '9') {
+				port = (port * 10) + (*p - '0');
+				p++;
+			}
+			if (port > 65535 || port == 0)
+				goto bad;
+		} else {
+			port = CEPH_MON_PORT;
+		}
+
+		addr_set_port(ss, port);
+
+		dout("parse_ips got %s\n", ceph_pr_addr(ss));
+
+		if (p == end)
+			break;
+		if (*p != ',')
+			goto bad;
+		p++;
+	}
+
+	if (p != end)
+		goto bad;
+
+	if (count)
+		*count = i + 1;
+	return 0;
+
+bad:
+	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_parse_ips);
+
+static int process_banner(struct ceph_connection *con)
+{
+	dout("process_banner on %p\n", con);
+
+	if (verify_hello(con) < 0)
+		return -1;
+
+	ceph_decode_addr(&con->actual_peer_addr);
+	ceph_decode_addr(&con->peer_addr_for_me);
+
+	/*
+	 * Make sure the other end is who we wanted.  note that the other
+	 * end may not yet know their ip address, so if it's 0.0.0.0, give
+	 * them the benefit of the doubt.
+	 */
+	if (memcmp(&con->peer_addr, &con->actual_peer_addr,
+		   sizeof(con->peer_addr)) != 0 &&
+	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
+		pr_warning("wrong peer, want %s/%d, got %s/%d\n",
+			   ceph_pr_addr(&con->peer_addr.in_addr),
+			   (int)le32_to_cpu(con->peer_addr.nonce),
+			   ceph_pr_addr(&con->actual_peer_addr.in_addr),
+			   (int)le32_to_cpu(con->actual_peer_addr.nonce));
+		con->error_msg = "wrong peer at address";
+		return -1;
+	}
+
+	/*
+	 * did we learn our address?
+	 */
+	if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
+		int port = addr_port(&con->msgr->inst.addr.in_addr);
+
+		memcpy(&con->msgr->inst.addr.in_addr,
+		       &con->peer_addr_for_me.in_addr,
+		       sizeof(con->peer_addr_for_me.in_addr));
+		addr_set_port(&con->msgr->inst.addr.in_addr, port);
+		encode_my_addr(con->msgr);
+		dout("process_banner learned my addr is %s\n",
+		     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+	}
+
+	set_bit(NEGOTIATING, &con->state);
+	prepare_read_connect(con);
+	return 0;
+}
+
+static void fail_protocol(struct ceph_connection *con)
+{
+	reset_connection(con);
+	set_bit(CLOSED, &con->state);  /* in case there's queued work */
+
+	mutex_unlock(&con->mutex);
+	if (con->ops->bad_proto)
+		con->ops->bad_proto(con);
+	mutex_lock(&con->mutex);
+}
+
+static int process_connect(struct ceph_connection *con)
+{
+	u64 sup_feat = con->msgr->supported_features;
+	u64 req_feat = con->msgr->required_features;
+	u64 server_feat = le64_to_cpu(con->in_reply.features);
+
+	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+
+	switch (con->in_reply.tag) {
+	case CEPH_MSGR_TAG_FEATURES:
+		pr_err("%s%lld %s feature set mismatch,"
+		       " my %llx < server's %llx, missing %llx\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       sup_feat, server_feat, server_feat & ~sup_feat);
+		con->error_msg = "missing required protocol features";
+		fail_protocol(con);
+		return -1;
+
+	case CEPH_MSGR_TAG_BADPROTOVER:
+		pr_err("%s%lld %s protocol version mismatch,"
+		       " my %d != server's %d\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       le32_to_cpu(con->out_connect.protocol_version),
+		       le32_to_cpu(con->in_reply.protocol_version));
+		con->error_msg = "protocol version mismatch";
+		fail_protocol(con);
+		return -1;
+
+	case CEPH_MSGR_TAG_BADAUTHORIZER:
+		con->auth_retry++;
+		dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
+		     con->auth_retry);
+		if (con->auth_retry == 2) {
+			con->error_msg = "connect authorization failure";
+			reset_connection(con);
+			set_bit(CLOSED, &con->state);
+			return -1;
+		}
+		con->auth_retry = 1;
+		prepare_write_connect(con->msgr, con, 0);
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_RESETSESSION:
+		/*
+		 * If we connected with a large connect_seq but the peer
+		 * has no record of a session with us (no connection, or
+		 * connect_seq == 0), they will send RESETSESION to indicate
+		 * that they must have reset their session, and may have
+		 * dropped messages.
+		 */
+		dout("process_connect got RESET peer seq %u\n",
+		     le32_to_cpu(con->in_connect.connect_seq));
+		pr_err("%s%lld %s connection reset\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr));
+		reset_connection(con);
+		prepare_write_connect(con->msgr, con, 0);
+		prepare_read_connect(con);
+
+		/* Tell ceph about it. */
+		mutex_unlock(&con->mutex);
+		pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
+		if (con->ops->peer_reset)
+			con->ops->peer_reset(con);
+		mutex_lock(&con->mutex);
+		break;
+
+	case CEPH_MSGR_TAG_RETRY_SESSION:
+		/*
+		 * If we sent a smaller connect_seq than the peer has, try
+		 * again with a larger value.
+		 */
+		dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
+		     le32_to_cpu(con->out_connect.connect_seq),
+		     le32_to_cpu(con->in_connect.connect_seq));
+		con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
+		prepare_write_connect(con->msgr, con, 0);
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_RETRY_GLOBAL:
+		/*
+		 * If we sent a smaller global_seq than the peer has, try
+		 * again with a larger value.
+		 */
+		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
+		     con->peer_global_seq,
+		     le32_to_cpu(con->in_connect.global_seq));
+		get_global_seq(con->msgr,
+			       le32_to_cpu(con->in_connect.global_seq));
+		prepare_write_connect(con->msgr, con, 0);
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_READY:
+		if (req_feat & ~server_feat) {
+			pr_err("%s%lld %s protocol feature mismatch,"
+			       " my required %llx > server's %llx, need %llx\n",
+			       ENTITY_NAME(con->peer_name),
+			       ceph_pr_addr(&con->peer_addr.in_addr),
+			       req_feat, server_feat, req_feat & ~server_feat);
+			con->error_msg = "missing required protocol features";
+			fail_protocol(con);
+			return -1;
+		}
+		clear_bit(CONNECTING, &con->state);
+		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+		con->connect_seq++;
+		con->peer_features = server_feat;
+		dout("process_connect got READY gseq %d cseq %d (%d)\n",
+		     con->peer_global_seq,
+		     le32_to_cpu(con->in_reply.connect_seq),
+		     con->connect_seq);
+		WARN_ON(con->connect_seq !=
+			le32_to_cpu(con->in_reply.connect_seq));
+
+		if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+			set_bit(LOSSYTX, &con->state);
+
+		prepare_read_tag(con);
+		break;
+
+	case CEPH_MSGR_TAG_WAIT:
+		/*
+		 * If there is a connection race (we are opening
+		 * connections to each other), one of us may just have
+		 * to WAIT.  This shouldn't happen if we are the
+		 * client.
+		 */
+		pr_err("process_connect peer connecting WAIT\n");
+
+	default:
+		pr_err("connect protocol error, will retry\n");
+		con->error_msg = "protocol error, garbage tag during connect";
+		return -1;
+	}
+	return 0;
+}
+
+
+/*
+ * read (part of) an ack
+ */
+static int read_partial_ack(struct ceph_connection *con)
+{
+	int to = 0;
+
+	return read_partial(con, &to, sizeof(con->in_temp_ack),
+			    &con->in_temp_ack);
+}
+
+
+/*
+ * We can finally discard anything that's been acked.
+ */
+static void process_ack(struct ceph_connection *con)
+{
+	struct ceph_msg *m;
+	u64 ack = le64_to_cpu(con->in_temp_ack);
+	u64 seq;
+
+	while (!list_empty(&con->out_sent)) {
+		m = list_first_entry(&con->out_sent, struct ceph_msg,
+				     list_head);
+		seq = le64_to_cpu(m->hdr.seq);
+		if (seq > ack)
+			break;
+		dout("got ack for seq %llu type %d at %p\n", seq,
+		     le16_to_cpu(m->hdr.type), m);
+		ceph_msg_remove(m);
+	}
+	prepare_read_tag(con);
+}
+
+
+
+
+static int read_partial_message_section(struct ceph_connection *con,
+					struct kvec *section,
+					unsigned int sec_len, u32 *crc)
+{
+	int ret, left;
+
+	BUG_ON(!section);
+
+	while (section->iov_len < sec_len) {
+		BUG_ON(section->iov_base == NULL);
+		left = sec_len - section->iov_len;
+		ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
+				       section->iov_len, left);
+		if (ret <= 0)
+			return ret;
+		section->iov_len += ret;
+		if (section->iov_len == sec_len)
+			*crc = crc32c(0, section->iov_base,
+				      section->iov_len);
+	}
+
+	return 1;
+}
+
+static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+				struct ceph_msg_header *hdr,
+				int *skip);
+
+
+static int read_partial_message_pages(struct ceph_connection *con,
+				      struct page **pages,
+				      unsigned data_len, int datacrc)
+{
+	void *p;
+	int ret;
+	int left;
+
+	left = min((int)(data_len - con->in_msg_pos.data_pos),
+		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
+	/* (page) data */
+	BUG_ON(pages == NULL);
+	p = kmap(pages[con->in_msg_pos.page]);
+	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
+			       left);
+	if (ret > 0 && datacrc)
+		con->in_data_crc =
+			crc32c(con->in_data_crc,
+				  p + con->in_msg_pos.page_pos, ret);
+	kunmap(pages[con->in_msg_pos.page]);
+	if (ret <= 0)
+		return ret;
+	con->in_msg_pos.data_pos += ret;
+	con->in_msg_pos.page_pos += ret;
+	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
+		con->in_msg_pos.page_pos = 0;
+		con->in_msg_pos.page++;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_BLOCK
+static int read_partial_message_bio(struct ceph_connection *con,
+				    struct bio **bio_iter, int *bio_seg,
+				    unsigned data_len, int datacrc)
+{
+	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
+	void *p;
+	int ret, left;
+
+	if (IS_ERR(bv))
+		return PTR_ERR(bv);
+
+	left = min((int)(data_len - con->in_msg_pos.data_pos),
+		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
+
+	p = kmap(bv->bv_page) + bv->bv_offset;
+
+	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
+			       left);
+	if (ret > 0 && datacrc)
+		con->in_data_crc =
+			crc32c(con->in_data_crc,
+				  p + con->in_msg_pos.page_pos, ret);
+	kunmap(bv->bv_page);
+	if (ret <= 0)
+		return ret;
+	con->in_msg_pos.data_pos += ret;
+	con->in_msg_pos.page_pos += ret;
+	if (con->in_msg_pos.page_pos == bv->bv_len) {
+		con->in_msg_pos.page_pos = 0;
+		iter_bio_next(bio_iter, bio_seg);
+	}
+
+	return ret;
+}
+#endif
+
+/*
+ * read (part of) a message.
+ */
+static int read_partial_message(struct ceph_connection *con)
+{
+	struct ceph_msg *m = con->in_msg;
+	int ret;
+	int to, left;
+	unsigned front_len, middle_len, data_len, data_off;
+	int datacrc = con->msgr->nocrc;
+	int skip;
+	u64 seq;
+
+	dout("read_partial_message con %p msg %p\n", con, m);
+
+	/* header */
+	while (con->in_base_pos < sizeof(con->in_hdr)) {
+		left = sizeof(con->in_hdr) - con->in_base_pos;
+		ret = ceph_tcp_recvmsg(con->sock,
+				       (char *)&con->in_hdr + con->in_base_pos,
+				       left);
+		if (ret <= 0)
+			return ret;
+		con->in_base_pos += ret;
+		if (con->in_base_pos == sizeof(con->in_hdr)) {
+			u32 crc = crc32c(0, (void *)&con->in_hdr,
+				 sizeof(con->in_hdr) - sizeof(con->in_hdr.crc));
+			if (crc != le32_to_cpu(con->in_hdr.crc)) {
+				pr_err("read_partial_message bad hdr "
+				       " crc %u != expected %u\n",
+				       crc, con->in_hdr.crc);
+				return -EBADMSG;
+			}
+		}
+	}
+	front_len = le32_to_cpu(con->in_hdr.front_len);
+	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
+		return -EIO;
+	middle_len = le32_to_cpu(con->in_hdr.middle_len);
+	if (middle_len > CEPH_MSG_MAX_DATA_LEN)
+		return -EIO;
+	data_len = le32_to_cpu(con->in_hdr.data_len);
+	if (data_len > CEPH_MSG_MAX_DATA_LEN)
+		return -EIO;
+	data_off = le16_to_cpu(con->in_hdr.data_off);
+
+	/* verify seq# */
+	seq = le64_to_cpu(con->in_hdr.seq);
+	if ((s64)seq - (s64)con->in_seq < 1) {
+		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+			ENTITY_NAME(con->peer_name),
+			ceph_pr_addr(&con->peer_addr.in_addr),
+			seq, con->in_seq + 1);
+		con->in_base_pos = -front_len - middle_len - data_len -
+			sizeof(m->footer);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
+		return 0;
+	} else if ((s64)seq - (s64)con->in_seq > 1) {
+		pr_err("read_partial_message bad seq %lld expected %lld\n",
+		       seq, con->in_seq + 1);
+		con->error_msg = "bad message sequence # for incoming message";
+		return -EBADMSG;
+	}
+
+	/* allocate message? */
+	if (!con->in_msg) {
+		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+		     con->in_hdr.front_len, con->in_hdr.data_len);
+		skip = 0;
+		con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+		if (skip) {
+			/* skip this message */
+			dout("alloc_msg said skip message\n");
+			BUG_ON(con->in_msg);
+			con->in_base_pos = -front_len - middle_len - data_len -
+				sizeof(m->footer);
+			con->in_tag = CEPH_MSGR_TAG_READY;
+			con->in_seq++;
+			return 0;
+		}
+		if (!con->in_msg) {
+			con->error_msg =
+				"error allocating memory for incoming message";
+			return -ENOMEM;
+		}
+		m = con->in_msg;
+		m->front.iov_len = 0;    /* haven't read it yet */
+		if (m->middle)
+			m->middle->vec.iov_len = 0;
+
+		con->in_msg_pos.page = 0;
+		if (m->pages)
+			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
+		else
+			con->in_msg_pos.page_pos = 0;
+		con->in_msg_pos.data_pos = 0;
+	}
+
+	/* front */
+	ret = read_partial_message_section(con, &m->front, front_len,
+					   &con->in_front_crc);
+	if (ret <= 0)
+		return ret;
+
+	/* middle */
+	if (m->middle) {
+		ret = read_partial_message_section(con, &m->middle->vec,
+						   middle_len,
+						   &con->in_middle_crc);
+		if (ret <= 0)
+			return ret;
+	}
+#ifdef CONFIG_BLOCK
+	if (m->bio && !m->bio_iter)
+		init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
+#endif
+
+	/* (page) data */
+	while (con->in_msg_pos.data_pos < data_len) {
+		if (m->pages) {
+			ret = read_partial_message_pages(con, m->pages,
+						 data_len, datacrc);
+			if (ret <= 0)
+				return ret;
+#ifdef CONFIG_BLOCK
+		} else if (m->bio) {
+
+			ret = read_partial_message_bio(con,
+						 &m->bio_iter, &m->bio_seg,
+						 data_len, datacrc);
+			if (ret <= 0)
+				return ret;
+#endif
+		} else {
+			BUG_ON(1);
+		}
+	}
+
+	/* footer */
+	to = sizeof(m->hdr) + sizeof(m->footer);
+	while (con->in_base_pos < to) {
+		left = to - con->in_base_pos;
+		ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
+				       (con->in_base_pos - sizeof(m->hdr)),
+				       left);
+		if (ret <= 0)
+			return ret;
+		con->in_base_pos += ret;
+	}
+	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
+	     m, front_len, m->footer.front_crc, middle_len,
+	     m->footer.middle_crc, data_len, m->footer.data_crc);
+
+	/* crc ok? */
+	if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
+		pr_err("read_partial_message %p front crc %u != exp. %u\n",
+		       m, con->in_front_crc, m->footer.front_crc);
+		return -EBADMSG;
+	}
+	if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
+		pr_err("read_partial_message %p middle crc %u != exp %u\n",
+		       m, con->in_middle_crc, m->footer.middle_crc);
+		return -EBADMSG;
+	}
+	if (datacrc &&
+	    (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
+	    con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
+		pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
+		       con->in_data_crc, le32_to_cpu(m->footer.data_crc));
+		return -EBADMSG;
+	}
+
+	return 1; /* done! */
+}
+
+/*
+ * Process message.  This happens in the worker thread.  The callback should
+ * be careful not to do anything that waits on other incoming messages or it
+ * may deadlock.
+ */
+static void process_message(struct ceph_connection *con)
+{
+	struct ceph_msg *msg;
+
+	msg = con->in_msg;
+	con->in_msg = NULL;
+
+	/* if first message, set peer_name */
+	if (con->peer_name.type == 0)
+		con->peer_name = msg->hdr.src;
+
+	con->in_seq++;
+	mutex_unlock(&con->mutex);
+
+	dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
+	     msg, le64_to_cpu(msg->hdr.seq),
+	     ENTITY_NAME(msg->hdr.src),
+	     le16_to_cpu(msg->hdr.type),
+	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+	     le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.data_len),
+	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
+	con->ops->dispatch(con, msg);
+
+	mutex_lock(&con->mutex);
+	prepare_read_tag(con);
+}
+
+
+/*
+ * Write something to the socket.  Called in a worker thread when the
+ * socket appears to be writeable and we have something ready to send.
+ */
+static int try_write(struct ceph_connection *con)
+{
+	struct ceph_messenger *msgr = con->msgr;
+	int ret = 1;
+
+	dout("try_write start %p state %lu nref %d\n", con, con->state,
+	     atomic_read(&con->nref));
+
+more:
+	dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
+
+	/* open the socket first? */
+	if (con->sock == NULL) {
+		/*
+		 * if we were STANDBY and are reconnecting _this_
+		 * connection, bump connect_seq now.  Always bump
+		 * global_seq.
+		 */
+		if (test_and_clear_bit(STANDBY, &con->state))
+			con->connect_seq++;
+
+		prepare_write_banner(msgr, con);
+		prepare_write_connect(msgr, con, 1);
+		prepare_read_banner(con);
+		set_bit(CONNECTING, &con->state);
+		clear_bit(NEGOTIATING, &con->state);
+
+		BUG_ON(con->in_msg);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		dout("try_write initiating connect on %p new state %lu\n",
+		     con, con->state);
+		con->sock = ceph_tcp_connect(con);
+		if (IS_ERR(con->sock)) {
+			con->sock = NULL;
+			con->error_msg = "connect error";
+			ret = -1;
+			goto out;
+		}
+	}
+
+more_kvec:
+	/* kvec data queued? */
+	if (con->out_skip) {
+		ret = write_partial_skip(con);
+		if (ret <= 0)
+			goto done;
+		if (ret < 0) {
+			dout("try_write write_partial_skip err %d\n", ret);
+			goto done;
+		}
+	}
+	if (con->out_kvec_left) {
+		ret = write_partial_kvec(con);
+		if (ret <= 0)
+			goto done;
+	}
+
+	/* msg pages? */
+	if (con->out_msg) {
+		if (con->out_msg_done) {
+			ceph_msg_put(con->out_msg);
+			con->out_msg = NULL;   /* we're done with this one */
+			goto do_next;
+		}
+
+		ret = write_partial_msg_pages(con);
+		if (ret == 1)
+			goto more_kvec;  /* we need to send the footer, too! */
+		if (ret == 0)
+			goto done;
+		if (ret < 0) {
+			dout("try_write write_partial_msg_pages err %d\n",
+			     ret);
+			goto done;
+		}
+	}
+
+do_next:
+	if (!test_bit(CONNECTING, &con->state)) {
+		/* is anything else pending? */
+		if (!list_empty(&con->out_queue)) {
+			prepare_write_message(con);
+			goto more;
+		}
+		if (con->in_seq > con->in_seq_acked) {
+			prepare_write_ack(con);
+			goto more;
+		}
+		if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
+			prepare_write_keepalive(con);
+			goto more;
+		}
+	}
+
+	/* Nothing to do! */
+	clear_bit(WRITE_PENDING, &con->state);
+	dout("try_write nothing else to write.\n");
+done:
+	ret = 0;
+out:
+	dout("try_write done on %p\n", con);
+	return ret;
+}
+
+
+
+/*
+ * Read what we can from the socket.
+ */
+static int try_read(struct ceph_connection *con)
+{
+	int ret = -1;
+
+	if (!con->sock)
+		return 0;
+
+	if (test_bit(STANDBY, &con->state))
+		return 0;
+
+	dout("try_read start on %p\n", con);
+
+more:
+	dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
+	     con->in_base_pos);
+	if (test_bit(CONNECTING, &con->state)) {
+		if (!test_bit(NEGOTIATING, &con->state)) {
+			dout("try_read connecting\n");
+			ret = read_partial_banner(con);
+			if (ret <= 0)
+				goto done;
+			if (process_banner(con) < 0) {
+				ret = -1;
+				goto out;
+			}
+		}
+		ret = read_partial_connect(con);
+		if (ret <= 0)
+			goto done;
+		if (process_connect(con) < 0) {
+			ret = -1;
+			goto out;
+		}
+		goto more;
+	}
+
+	if (con->in_base_pos < 0) {
+		/*
+		 * skipping + discarding content.
+		 *
+		 * FIXME: there must be a better way to do this!
+		 */
+		static char buf[1024];
+		int skip = min(1024, -con->in_base_pos);
+		dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
+		ret = ceph_tcp_recvmsg(con->sock, buf, skip);
+		if (ret <= 0)
+			goto done;
+		con->in_base_pos += ret;
+		if (con->in_base_pos)
+			goto more;
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_READY) {
+		/*
+		 * what's next?
+		 */
+		ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
+		if (ret <= 0)
+			goto done;
+		dout("try_read got tag %d\n", (int)con->in_tag);
+		switch (con->in_tag) {
+		case CEPH_MSGR_TAG_MSG:
+			prepare_read_message(con);
+			break;
+		case CEPH_MSGR_TAG_ACK:
+			prepare_read_ack(con);
+			break;
+		case CEPH_MSGR_TAG_CLOSE:
+			set_bit(CLOSED, &con->state);   /* fixme */
+			goto done;
+		default:
+			goto bad_tag;
+		}
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_MSG) {
+		ret = read_partial_message(con);
+		if (ret <= 0) {
+			switch (ret) {
+			case -EBADMSG:
+				con->error_msg = "bad crc";
+				ret = -EIO;
+				goto out;
+			case -EIO:
+				con->error_msg = "io error";
+				goto out;
+			default:
+				goto done;
+			}
+		}
+		if (con->in_tag == CEPH_MSGR_TAG_READY)
+			goto more;
+		process_message(con);
+		goto more;
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_ACK) {
+		ret = read_partial_ack(con);
+		if (ret <= 0)
+			goto done;
+		process_ack(con);
+		goto more;
+	}
+
+done:
+	ret = 0;
+out:
+	dout("try_read done on %p\n", con);
+	return ret;
+
+bad_tag:
+	pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
+	con->error_msg = "protocol error, garbage tag";
+	ret = -1;
+	goto out;
+}
+
+
+/*
+ * Atomically queue work on a connection.  Bump @con reference to
+ * avoid races with connection teardown.
+ *
+ * There is some trickery going on with QUEUED and BUSY because we
+ * only want a _single_ thread operating on each connection at any
+ * point in time, but we want to use all available CPUs.
+ *
+ * The worker thread only proceeds if it can atomically set BUSY.  It
+ * clears QUEUED and does it's thing.  When it thinks it's done, it
+ * clears BUSY, then rechecks QUEUED.. if it's set again, it loops
+ * (tries again to set BUSY).
+ *
+ * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we
+ * try to queue work.  If that fails (work is already queued, or BUSY)
+ * we give up (work also already being done or is queued) but leave QUEUED
+ * set so that the worker thread will loop if necessary.
+ */
+static void queue_con(struct ceph_connection *con)
+{
+	if (test_bit(DEAD, &con->state)) {
+		dout("queue_con %p ignoring: DEAD\n",
+		     con);
+		return;
+	}
+
+	if (!con->ops->get(con)) {
+		dout("queue_con %p ref count 0\n", con);
+		return;
+	}
+
+	set_bit(QUEUED, &con->state);
+	if (test_bit(BUSY, &con->state)) {
+		dout("queue_con %p - already BUSY\n", con);
+		con->ops->put(con);
+	} else if (!queue_work(ceph_msgr_wq, &con->work.work)) {
+		dout("queue_con %p - already queued\n", con);
+		con->ops->put(con);
+	} else {
+		dout("queue_con %p\n", con);
+	}
+}
+
+/*
+ * Do some work on a connection.  Drop a connection ref when we're done.
+ */
+static void con_work(struct work_struct *work)
+{
+	struct ceph_connection *con = container_of(work, struct ceph_connection,
+						   work.work);
+	int backoff = 0;
+
+more:
+	if (test_and_set_bit(BUSY, &con->state) != 0) {
+		dout("con_work %p BUSY already set\n", con);
+		goto out;
+	}
+	dout("con_work %p start, clearing QUEUED\n", con);
+	clear_bit(QUEUED, &con->state);
+
+	mutex_lock(&con->mutex);
+
+	if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
+		dout("con_work CLOSED\n");
+		con_close_socket(con);
+		goto done;
+	}
+	if (test_and_clear_bit(OPENING, &con->state)) {
+		/* reopen w/ new peer */
+		dout("con_work OPENING\n");
+		con_close_socket(con);
+	}
+
+	if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
+	    try_read(con) < 0 ||
+	    try_write(con) < 0) {
+		mutex_unlock(&con->mutex);
+		backoff = 1;
+		ceph_fault(con);     /* error/fault path */
+		goto done_unlocked;
+	}
+
+done:
+	mutex_unlock(&con->mutex);
+
+done_unlocked:
+	clear_bit(BUSY, &con->state);
+	dout("con->state=%lu\n", con->state);
+	if (test_bit(QUEUED, &con->state)) {
+		if (!backoff || test_bit(OPENING, &con->state)) {
+			dout("con_work %p QUEUED reset, looping\n", con);
+			goto more;
+		}
+		dout("con_work %p QUEUED reset, but just faulted\n", con);
+		clear_bit(QUEUED, &con->state);
+	}
+	dout("con_work %p done\n", con);
+
+out:
+	con->ops->put(con);
+}
+
+
+/*
+ * Generic error/fault handler.  A retry mechanism is used with
+ * exponential backoff
+ */
+static void ceph_fault(struct ceph_connection *con)
+{
+	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+	dout("fault %p state %lu to peer %s\n",
+	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+	if (test_bit(LOSSYTX, &con->state)) {
+		dout("fault on LOSSYTX channel\n");
+		goto out;
+	}
+
+	mutex_lock(&con->mutex);
+	if (test_bit(CLOSED, &con->state))
+		goto out_unlock;
+
+	con_close_socket(con);
+
+	if (con->in_msg) {
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+	}
+
+	/* Requeue anything that hasn't been acked */
+	list_splice_init(&con->out_sent, &con->out_queue);
+
+	/* If there are no messages in the queue, place the connection
+	 * in a STANDBY state (i.e., don't try to reconnect just yet). */
+	if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
+		dout("fault setting STANDBY\n");
+		set_bit(STANDBY, &con->state);
+	} else {
+		/* retry after a delay. */
+		if (con->delay == 0)
+			con->delay = BASE_DELAY_INTERVAL;
+		else if (con->delay < MAX_DELAY_INTERVAL)
+			con->delay *= 2;
+		dout("fault queueing %p delay %lu\n", con, con->delay);
+		con->ops->get(con);
+		if (queue_delayed_work(ceph_msgr_wq, &con->work,
+				       round_jiffies_relative(con->delay)) == 0)
+			con->ops->put(con);
+	}
+
+out_unlock:
+	mutex_unlock(&con->mutex);
+out:
+	/*
+	 * in case we faulted due to authentication, invalidate our
+	 * current tickets so that we can get new ones.
+	 */
+	if (con->auth_retry && con->ops->invalidate_authorizer) {
+		dout("calling invalidate_authorizer()\n");
+		con->ops->invalidate_authorizer(con);
+	}
+
+	if (con->ops->fault)
+		con->ops->fault(con);
+}
+
+
+
+/*
+ * create a new messenger instance
+ */
+struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
+					     u32 supported_features,
+					     u32 required_features)
+{
+	struct ceph_messenger *msgr;
+
+	msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
+	if (msgr == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	msgr->supported_features = supported_features;
+	msgr->required_features = required_features;
+
+	spin_lock_init(&msgr->global_seq_lock);
+
+	/* the zero page is needed if a request is "canceled" while the message
+	 * is being written over the socket */
+	msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO);
+	if (!msgr->zero_page) {
+		kfree(msgr);
+		return ERR_PTR(-ENOMEM);
+	}
+	kmap(msgr->zero_page);
+
+	if (myaddr)
+		msgr->inst.addr = *myaddr;
+
+	/* select a random nonce */
+	msgr->inst.addr.type = 0;
+	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
+	encode_my_addr(msgr);
+
+	dout("messenger_create %p\n", msgr);
+	return msgr;
+}
+EXPORT_SYMBOL(ceph_messenger_create);
+
+void ceph_messenger_destroy(struct ceph_messenger *msgr)
+{
+	dout("destroy %p\n", msgr);
+	kunmap(msgr->zero_page);
+	__free_page(msgr->zero_page);
+	kfree(msgr);
+	dout("destroyed messenger %p\n", msgr);
+}
+EXPORT_SYMBOL(ceph_messenger_destroy);
+
+/*
+ * Queue up an outgoing message on the given connection.
+ */
+void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	if (test_bit(CLOSED, &con->state)) {
+		dout("con_send %p closed, dropping %p\n", con, msg);
+		ceph_msg_put(msg);
+		return;
+	}
+
+	/* set src+dst */
+	msg->hdr.src = con->msgr->inst.name;
+
+	BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+
+	msg->needs_out_seq = true;
+
+	/* queue */
+	mutex_lock(&con->mutex);
+	BUG_ON(!list_empty(&msg->list_head));
+	list_add_tail(&msg->list_head, &con->out_queue);
+	dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+	     ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
+	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+	     le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.middle_len),
+	     le32_to_cpu(msg->hdr.data_len));
+	mutex_unlock(&con->mutex);
+
+	/* if there wasn't anything waiting to send before, queue
+	 * new work */
+	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+		queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_send);
+
+/*
+ * Revoke a message that was previously queued for send
+ */
+void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	mutex_lock(&con->mutex);
+	if (!list_empty(&msg->list_head)) {
+		dout("con_revoke %p msg %p - was on queue\n", con, msg);
+		list_del_init(&msg->list_head);
+		ceph_msg_put(msg);
+		msg->hdr.seq = 0;
+	}
+	if (con->out_msg == msg) {
+		dout("con_revoke %p msg %p - was sending\n", con, msg);
+		con->out_msg = NULL;
+		if (con->out_kvec_is_msg) {
+			con->out_skip = con->out_kvec_bytes;
+			con->out_kvec_is_msg = false;
+		}
+		ceph_msg_put(msg);
+		msg->hdr.seq = 0;
+	}
+	mutex_unlock(&con->mutex);
+}
+
+/*
+ * Revoke a message that we may be reading data into
+ */
+void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	mutex_lock(&con->mutex);
+	if (con->in_msg && con->in_msg == msg) {
+		unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
+		unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
+		unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
+
+		/* skip rest of message */
+		dout("con_revoke_pages %p msg %p revoked\n", con, msg);
+			con->in_base_pos = con->in_base_pos -
+				sizeof(struct ceph_msg_header) -
+				front_len -
+				middle_len -
+				data_len -
+				sizeof(struct ceph_msg_footer);
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
+	} else {
+		dout("con_revoke_pages %p msg %p pages %p no-op\n",
+		     con, con->in_msg, msg);
+	}
+	mutex_unlock(&con->mutex);
+}
+
+/*
+ * Queue a keepalive byte to ensure the tcp connection is alive.
+ */
+void ceph_con_keepalive(struct ceph_connection *con)
+{
+	if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
+	    test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+		queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_keepalive);
+
+
+/*
+ * construct a new message with given type, size
+ * the new msg has a ref count of 1.
+ */
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
+{
+	struct ceph_msg *m;
+
+	m = kmalloc(sizeof(*m), flags);
+	if (m == NULL)
+		goto out;
+	kref_init(&m->kref);
+	INIT_LIST_HEAD(&m->list_head);
+
+	m->hdr.tid = 0;
+	m->hdr.type = cpu_to_le16(type);
+	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+	m->hdr.version = 0;
+	m->hdr.front_len = cpu_to_le32(front_len);
+	m->hdr.middle_len = 0;
+	m->hdr.data_len = 0;
+	m->hdr.data_off = 0;
+	m->hdr.reserved = 0;
+	m->footer.front_crc = 0;
+	m->footer.middle_crc = 0;
+	m->footer.data_crc = 0;
+	m->footer.flags = 0;
+	m->front_max = front_len;
+	m->front_is_vmalloc = false;
+	m->more_to_follow = false;
+	m->pool = NULL;
+
+	/* front */
+	if (front_len) {
+		if (front_len > PAGE_CACHE_SIZE) {
+			m->front.iov_base = __vmalloc(front_len, flags,
+						      PAGE_KERNEL);
+			m->front_is_vmalloc = true;
+		} else {
+			m->front.iov_base = kmalloc(front_len, flags);
+		}
+		if (m->front.iov_base == NULL) {
+			pr_err("msg_new can't allocate %d bytes\n",
+			     front_len);
+			goto out2;
+		}
+	} else {
+		m->front.iov_base = NULL;
+	}
+	m->front.iov_len = front_len;
+
+	/* middle */
+	m->middle = NULL;
+
+	/* data */
+	m->nr_pages = 0;
+	m->pages = NULL;
+	m->pagelist = NULL;
+	m->bio = NULL;
+	m->bio_iter = NULL;
+	m->bio_seg = 0;
+	m->trail = NULL;
+
+	dout("ceph_msg_new %p front %d\n", m, front_len);
+	return m;
+
+out2:
+	ceph_msg_put(m);
+out:
+	pr_err("msg_new can't create type %d front %d\n", type, front_len);
+	return NULL;
+}
+EXPORT_SYMBOL(ceph_msg_new);
+
+/*
+ * Allocate "middle" portion of a message, if it is needed and wasn't
+ * allocated by alloc_msg.  This allows us to read a small fixed-size
+ * per-type header in the front and then gracefully fail (i.e.,
+ * propagate the error to the caller based on info in the front) when
+ * the middle is too large.
+ */
+static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	int type = le16_to_cpu(msg->hdr.type);
+	int middle_len = le32_to_cpu(msg->hdr.middle_len);
+
+	dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
+	     ceph_msg_type_name(type), middle_len);
+	BUG_ON(!middle_len);
+	BUG_ON(msg->middle);
+
+	msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
+	if (!msg->middle)
+		return -ENOMEM;
+	return 0;
+}
+
+/*
+ * Generic message allocator, for incoming messages.
+ */
+static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
+				struct ceph_msg_header *hdr,
+				int *skip)
+{
+	int type = le16_to_cpu(hdr->type);
+	int front_len = le32_to_cpu(hdr->front_len);
+	int middle_len = le32_to_cpu(hdr->middle_len);
+	struct ceph_msg *msg = NULL;
+	int ret;
+
+	if (con->ops->alloc_msg) {
+		mutex_unlock(&con->mutex);
+		msg = con->ops->alloc_msg(con, hdr, skip);
+		mutex_lock(&con->mutex);
+		if (!msg || *skip)
+			return NULL;
+	}
+	if (!msg) {
+		*skip = 0;
+		msg = ceph_msg_new(type, front_len, GFP_NOFS);
+		if (!msg) {
+			pr_err("unable to allocate msg type %d len %d\n",
+			       type, front_len);
+			return NULL;
+		}
+	}
+	memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+
+	if (middle_len && !msg->middle) {
+		ret = ceph_alloc_middle(con, msg);
+		if (ret < 0) {
+			ceph_msg_put(msg);
+			return NULL;
+		}
+	}
+
+	return msg;
+}
+
+
+/*
+ * Free a generically kmalloc'd message.
+ */
+void ceph_msg_kfree(struct ceph_msg *m)
+{
+	dout("msg_kfree %p\n", m);
+	if (m->front_is_vmalloc)
+		vfree(m->front.iov_base);
+	else
+		kfree(m->front.iov_base);
+	kfree(m);
+}
+
+/*
+ * Drop a msg ref.  Destroy as needed.
+ */
+void ceph_msg_last_put(struct kref *kref)
+{
+	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+
+	dout("ceph_msg_put last one on %p\n", m);
+	WARN_ON(!list_empty(&m->list_head));
+
+	/* drop middle, data, if any */
+	if (m->middle) {
+		ceph_buffer_put(m->middle);
+		m->middle = NULL;
+	}
+	m->nr_pages = 0;
+	m->pages = NULL;
+
+	if (m->pagelist) {
+		ceph_pagelist_release(m->pagelist);
+		kfree(m->pagelist);
+		m->pagelist = NULL;
+	}
+
+	m->trail = NULL;
+
+	if (m->pool)
+		ceph_msgpool_put(m->pool, m);
+	else
+		ceph_msg_kfree(m);
+}
+EXPORT_SYMBOL(ceph_msg_last_put);
+
+void ceph_msg_dump(struct ceph_msg *msg)
+{
+	pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg,
+		 msg->front_max, msg->nr_pages);
+	print_hex_dump(KERN_DEBUG, "header: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       &msg->hdr, sizeof(msg->hdr), true);
+	print_hex_dump(KERN_DEBUG, " front: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       msg->front.iov_base, msg->front.iov_len, true);
+	if (msg->middle)
+		print_hex_dump(KERN_DEBUG, "middle: ",
+			       DUMP_PREFIX_OFFSET, 16, 1,
+			       msg->middle->vec.iov_base,
+			       msg->middle->vec.iov_len, true);
+	print_hex_dump(KERN_DEBUG, "footer: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       &msg->footer, sizeof(msg->footer), true);
+}
+EXPORT_SYMBOL(ceph_msg_dump);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
new file mode 100644
index 000000000000..8a079399174a
--- /dev/null
+++ b/net/ceph/mon_client.c
@@ -0,0 +1,1027 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/decode.h>
+
+#include <linux/ceph/auth.h>
+
+/*
+ * Interact with Ceph monitor cluster.  Handle requests for new map
+ * versions, and periodically resend as needed.  Also implement
+ * statfs() and umount().
+ *
+ * A small cluster of Ceph "monitors" are responsible for managing critical
+ * cluster configuration and state information.  An odd number (e.g., 3, 5)
+ * of cmon daemons use a modified version of the Paxos part-time parliament
+ * algorithm to manage the MDS map (mds cluster membership), OSD map, and
+ * list of clients who have mounted the file system.
+ *
+ * We maintain an open, active session with a monitor at all times in order to
+ * receive timely MDSMap updates.  We periodically send a keepalive byte on the
+ * TCP socket to ensure we detect a failure.  If the connection does break, we
+ * randomly hunt for a new monitor.  Once the connection is reestablished, we
+ * resend any outstanding requests.
+ */
+
+static const struct ceph_connection_operations mon_con_ops;
+
+static int __validate_auth(struct ceph_mon_client *monc);
+
+/*
+ * Decode a monmap blob (e.g., during mount).
+ */
+struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+{
+	struct ceph_monmap *m = NULL;
+	int i, err = -EINVAL;
+	struct ceph_fsid fsid;
+	u32 epoch, num_mon;
+	u16 version;
+	u32 len;
+
+	ceph_decode_32_safe(&p, end, len, bad);
+	ceph_decode_need(&p, end, len, bad);
+
+	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+
+	ceph_decode_16_safe(&p, end, version, bad);
+
+	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
+	ceph_decode_copy(&p, &fsid, sizeof(fsid));
+	epoch = ceph_decode_32(&p);
+
+	num_mon = ceph_decode_32(&p);
+	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
+
+	if (num_mon >= CEPH_MAX_MON)
+		goto bad;
+	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
+	if (m == NULL)
+		return ERR_PTR(-ENOMEM);
+	m->fsid = fsid;
+	m->epoch = epoch;
+	m->num_mon = num_mon;
+	ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
+	for (i = 0; i < num_mon; i++)
+		ceph_decode_addr(&m->mon_inst[i].addr);
+
+	dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
+	     m->num_mon);
+	for (i = 0; i < m->num_mon; i++)
+		dout("monmap_decode  mon%d is %s\n", i,
+		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+	return m;
+
+bad:
+	dout("monmap_decode failed with %d\n", err);
+	kfree(m);
+	return ERR_PTR(err);
+}
+
+/*
+ * return true if *addr is included in the monmap.
+ */
+int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
+{
+	int i;
+
+	for (i = 0; i < m->num_mon; i++)
+		if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
+			return 1;
+	return 0;
+}
+
+/*
+ * Send an auth request.
+ */
+static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
+{
+	monc->pending_auth = 1;
+	monc->m_auth->front.iov_len = len;
+	monc->m_auth->hdr.front_len = cpu_to_le32(len);
+	ceph_con_revoke(monc->con, monc->m_auth);
+	ceph_msg_get(monc->m_auth);  /* keep our ref */
+	ceph_con_send(monc->con, monc->m_auth);
+}
+
+/*
+ * Close monitor session, if any.
+ */
+static void __close_session(struct ceph_mon_client *monc)
+{
+	if (monc->con) {
+		dout("__close_session closing mon%d\n", monc->cur_mon);
+		ceph_con_revoke(monc->con, monc->m_auth);
+		ceph_con_close(monc->con);
+		monc->cur_mon = -1;
+		monc->pending_auth = 0;
+		ceph_auth_reset(monc->auth);
+	}
+}
+
+/*
+ * Open a session with a (new) monitor.
+ */
+static int __open_session(struct ceph_mon_client *monc)
+{
+	char r;
+	int ret;
+
+	if (monc->cur_mon < 0) {
+		get_random_bytes(&r, 1);
+		monc->cur_mon = r % monc->monmap->num_mon;
+		dout("open_session num=%d r=%d -> mon%d\n",
+		     monc->monmap->num_mon, r, monc->cur_mon);
+		monc->sub_sent = 0;
+		monc->sub_renew_after = jiffies;  /* i.e., expired */
+		monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+		dout("open_session mon%d opening\n", monc->cur_mon);
+		monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
+		monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
+		ceph_con_open(monc->con,
+			      &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+		/* initiatiate authentication handshake */
+		ret = ceph_auth_build_hello(monc->auth,
+					    monc->m_auth->front.iov_base,
+					    monc->m_auth->front_max);
+		__send_prepared_auth_request(monc, ret);
+	} else {
+		dout("open_session mon%d already open\n", monc->cur_mon);
+	}
+	return 0;
+}
+
+static bool __sub_expired(struct ceph_mon_client *monc)
+{
+	return time_after_eq(jiffies, monc->sub_renew_after);
+}
+
+/*
+ * Reschedule delayed work timer.
+ */
+static void __schedule_delayed(struct ceph_mon_client *monc)
+{
+	unsigned delay;
+
+	if (monc->cur_mon < 0 || __sub_expired(monc))
+		delay = 10 * HZ;
+	else
+		delay = 20 * HZ;
+	dout("__schedule_delayed after %u\n", delay);
+	schedule_delayed_work(&monc->delayed_work, delay);
+}
+
+/*
+ * Send subscribe request for mdsmap and/or osdmap.
+ */
+static void __send_subscribe(struct ceph_mon_client *monc)
+{
+	dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
+	     (unsigned)monc->sub_sent, __sub_expired(monc),
+	     monc->want_next_osdmap);
+	if ((__sub_expired(monc) && !monc->sub_sent) ||
+	    monc->want_next_osdmap == 1) {
+		struct ceph_msg *msg = monc->m_subscribe;
+		struct ceph_mon_subscribe_item *i;
+		void *p, *end;
+		int num;
+
+		p = msg->front.iov_base;
+		end = p + msg->front_max;
+
+		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
+		ceph_encode_32(&p, num);
+
+		if (monc->want_next_osdmap) {
+			dout("__send_subscribe to 'osdmap' %u\n",
+			     (unsigned)monc->have_osdmap);
+			ceph_encode_string(&p, end, "osdmap", 6);
+			i = p;
+			i->have = cpu_to_le64(monc->have_osdmap);
+			i->onetime = 1;
+			p += sizeof(*i);
+			monc->want_next_osdmap = 2;  /* requested */
+		}
+		if (monc->want_mdsmap) {
+			dout("__send_subscribe to 'mdsmap' %u+\n",
+			     (unsigned)monc->have_mdsmap);
+			ceph_encode_string(&p, end, "mdsmap", 6);
+			i = p;
+			i->have = cpu_to_le64(monc->have_mdsmap);
+			i->onetime = 0;
+			p += sizeof(*i);
+		}
+		ceph_encode_string(&p, end, "monmap", 6);
+		i = p;
+		i->have = 0;
+		i->onetime = 0;
+		p += sizeof(*i);
+
+		msg->front.iov_len = p - msg->front.iov_base;
+		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+		ceph_con_revoke(monc->con, msg);
+		ceph_con_send(monc->con, ceph_msg_get(msg));
+
+		monc->sub_sent = jiffies | 1;  /* never 0 */
+	}
+}
+
+static void handle_subscribe_ack(struct ceph_mon_client *monc,
+				 struct ceph_msg *msg)
+{
+	unsigned seconds;
+	struct ceph_mon_subscribe_ack *h = msg->front.iov_base;
+
+	if (msg->front.iov_len < sizeof(*h))
+		goto bad;
+	seconds = le32_to_cpu(h->duration);
+
+	mutex_lock(&monc->mutex);
+	if (monc->hunting) {
+		pr_info("mon%d %s session established\n",
+			monc->cur_mon,
+			ceph_pr_addr(&monc->con->peer_addr.in_addr));
+		monc->hunting = false;
+	}
+	dout("handle_subscribe_ack after %d seconds\n", seconds);
+	monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
+	monc->sub_sent = 0;
+	mutex_unlock(&monc->mutex);
+	return;
+bad:
+	pr_err("got corrupt subscribe-ack msg\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Keep track of which maps we have
+ */
+int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+{
+	mutex_lock(&monc->mutex);
+	monc->have_mdsmap = got;
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+
+int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+{
+	mutex_lock(&monc->mutex);
+	monc->have_osdmap = got;
+	monc->want_next_osdmap = 0;
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+
+/*
+ * Register interest in the next osdmap
+ */
+void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
+{
+	dout("request_next_osdmap have %u\n", monc->have_osdmap);
+	mutex_lock(&monc->mutex);
+	if (!monc->want_next_osdmap)
+		monc->want_next_osdmap = 1;
+	if (monc->want_next_osdmap < 2)
+		__send_subscribe(monc);
+	mutex_unlock(&monc->mutex);
+}
+
+/*
+ *
+ */
+int ceph_monc_open_session(struct ceph_mon_client *monc)
+{
+	if (!monc->con) {
+		monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
+		if (!monc->con)
+			return -ENOMEM;
+		ceph_con_init(monc->client->msgr, monc->con);
+		monc->con->private = monc;
+		monc->con->ops = &mon_con_ops;
+	}
+
+	mutex_lock(&monc->mutex);
+	__open_session(monc);
+	__schedule_delayed(monc);
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_open_session);
+
+/*
+ * The monitor responds with mount ack indicate mount success.  The
+ * included client ticket allows the client to talk to MDSs and OSDs.
+ */
+static void ceph_monc_handle_map(struct ceph_mon_client *monc,
+				 struct ceph_msg *msg)
+{
+	struct ceph_client *client = monc->client;
+	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
+	void *p, *end;
+
+	mutex_lock(&monc->mutex);
+
+	dout("handle_monmap\n");
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	monmap = ceph_monmap_decode(p, end);
+	if (IS_ERR(monmap)) {
+		pr_err("problem decoding monmap, %d\n",
+		       (int)PTR_ERR(monmap));
+		goto out;
+	}
+
+	if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) {
+		kfree(monmap);
+		goto out;
+	}
+
+	client->monc.monmap = monmap;
+	kfree(old);
+
+out:
+	mutex_unlock(&monc->mutex);
+	wake_up_all(&client->auth_wq);
+}
+
+/*
+ * generic requests (e.g., statfs, poolop)
+ */
+static struct ceph_mon_generic_request *__lookup_generic_req(
+	struct ceph_mon_client *monc, u64 tid)
+{
+	struct ceph_mon_generic_request *req;
+	struct rb_node *n = monc->generic_request_tree.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_mon_generic_request, node);
+		if (tid < req->tid)
+			n = n->rb_left;
+		else if (tid > req->tid)
+			n = n->rb_right;
+		else
+			return req;
+	}
+	return NULL;
+}
+
+static void __insert_generic_request(struct ceph_mon_client *monc,
+			    struct ceph_mon_generic_request *new)
+{
+	struct rb_node **p = &monc->generic_request_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_mon_generic_request *req = NULL;
+
+	while (*p) {
+		parent = *p;
+		req = rb_entry(parent, struct ceph_mon_generic_request, node);
+		if (new->tid < req->tid)
+			p = &(*p)->rb_left;
+		else if (new->tid > req->tid)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &monc->generic_request_tree);
+}
+
+static void release_generic_request(struct kref *kref)
+{
+	struct ceph_mon_generic_request *req =
+		container_of(kref, struct ceph_mon_generic_request, kref);
+
+	if (req->reply)
+		ceph_msg_put(req->reply);
+	if (req->request)
+		ceph_msg_put(req->request);
+
+	kfree(req);
+}
+
+static void put_generic_request(struct ceph_mon_generic_request *req)
+{
+	kref_put(&req->kref, release_generic_request);
+}
+
+static void get_generic_request(struct ceph_mon_generic_request *req)
+{
+	kref_get(&req->kref);
+}
+
+static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
+					 struct ceph_msg_header *hdr,
+					 int *skip)
+{
+	struct ceph_mon_client *monc = con->private;
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(hdr->tid);
+	struct ceph_msg *m;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (!req) {
+		dout("get_generic_reply %lld dne\n", tid);
+		*skip = 1;
+		m = NULL;
+	} else {
+		dout("get_generic_reply %lld got %p\n", tid, req->reply);
+		m = ceph_msg_get(req->reply);
+		/*
+		 * we don't need to track the connection reading into
+		 * this reply because we only have one open connection
+		 * at a time, ever.
+		 */
+	}
+	mutex_unlock(&monc->mutex);
+	return m;
+}
+
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	/* register request */
+	mutex_lock(&monc->mutex);
+	req->tid = ++monc->last_tid;
+	req->request->hdr.tid = cpu_to_le64(req->tid);
+	__insert_generic_request(monc, req);
+	monc->num_generic_requests++;
+	ceph_con_send(monc->con, ceph_msg_get(req->request));
+	mutex_unlock(&monc->mutex);
+
+	err = wait_for_completion_interruptible(&req->completion);
+
+	mutex_lock(&monc->mutex);
+	rb_erase(&req->node, &monc->generic_request_tree);
+	monc->num_generic_requests--;
+	mutex_unlock(&monc->mutex);
+
+	if (!err)
+		err = req->result;
+	return err;
+}
+
+/*
+ * statfs
+ */
+static void handle_statfs_reply(struct ceph_mon_client *monc,
+				struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_statfs_reply *reply = msg->front.iov_base;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+
+	if (msg->front.iov_len != sizeof(*reply))
+		goto bad;
+	dout("handle_statfs_reply %p tid %llu\n", msg, tid);
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (req) {
+		*(struct ceph_statfs *)req->buf = reply->st;
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+	return;
+
+bad:
+	pr_err("corrupt generic reply, tid %llu\n", tid);
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous statfs().
+ */
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_statfs *h;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = buf;
+	req->buf_len = sizeof(*buf);
+	init_completion(&req->completion);
+
+	err = -ENOMEM;
+	req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS);
+	if (!req->request)
+		goto out;
+	req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS);
+	if (!req->reply)
+		goto out;
+
+	/* fill out request */
+	h = req->request->front.iov_base;
+	h->monhdr.have_version = 0;
+	h->monhdr.session_mon = cpu_to_le16(-1);
+	h->monhdr.session_mon_tid = 0;
+	h->fsid = monc->monmap->fsid;
+
+	err = do_generic_request(monc, req);
+
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_statfs);
+
+/*
+ * pool ops
+ */
+static int get_poolop_reply_buf(const char *src, size_t src_len,
+				char *dst, size_t dst_len)
+{
+	u32 buf_len;
+
+	if (src_len != sizeof(u32) + dst_len)
+		return -EINVAL;
+
+	buf_len = le32_to_cpu(*(u32 *)src);
+	if (buf_len != dst_len)
+		return -EINVAL;
+
+	memcpy(dst, src + sizeof(u32), dst_len);
+	return 0;
+}
+
+static void handle_poolop_reply(struct ceph_mon_client *monc,
+				struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+
+	if (msg->front.iov_len < sizeof(*reply))
+		goto bad;
+	dout("handle_poolop_reply %p tid %llu\n", msg, tid);
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (req) {
+		if (req->buf_len &&
+		    get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
+				     msg->front.iov_len - sizeof(*reply),
+				     req->buf, req->buf_len) < 0) {
+			mutex_unlock(&monc->mutex);
+			goto bad;
+		}
+		req->result = le32_to_cpu(reply->reply_code);
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete(&req->completion);
+		put_generic_request(req);
+	}
+	return;
+
+bad:
+	pr_err("corrupt generic reply, tid %llu\n", tid);
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous pool op.
+ */
+int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op,
+			u32 pool, u64 snapid,
+			char *buf, int len)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_poolop *h;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = buf;
+	req->buf_len = len;
+	init_completion(&req->completion);
+
+	err = -ENOMEM;
+	req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS);
+	if (!req->request)
+		goto out;
+	req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS);
+	if (!req->reply)
+		goto out;
+
+	/* fill out request */
+	req->request->hdr.version = cpu_to_le16(2);
+	h = req->request->front.iov_base;
+	h->monhdr.have_version = 0;
+	h->monhdr.session_mon = cpu_to_le16(-1);
+	h->monhdr.session_mon_tid = 0;
+	h->fsid = monc->monmap->fsid;
+	h->pool = cpu_to_le32(pool);
+	h->op = cpu_to_le32(op);
+	h->auid = 0;
+	h->snapid = cpu_to_le64(snapid);
+	h->name_len = 0;
+
+	err = do_generic_request(monc, req);
+
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+
+int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+			    u32 pool, u64 *snapid)
+{
+	return ceph_monc_do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
+				   pool, 0, (char *)snapid, sizeof(*snapid));
+
+}
+EXPORT_SYMBOL(ceph_monc_create_snapid);
+
+int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+			    u32 pool, u64 snapid)
+{
+	return ceph_monc_do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
+				   pool, snapid, 0, 0);
+
+}
+
+/*
+ * Resend pending generic requests.
+ */
+static void __resend_generic_request(struct ceph_mon_client *monc)
+{
+	struct ceph_mon_generic_request *req;
+	struct rb_node *p;
+
+	for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_mon_generic_request, node);
+		ceph_con_revoke(monc->con, req->request);
+		ceph_con_send(monc->con, ceph_msg_get(req->request));
+	}
+}
+
+/*
+ * Delayed work.  If we haven't mounted yet, retry.  Otherwise,
+ * renew/retry subscription as needed (in case it is timing out, or we
+ * got an ENOMEM).  And keep the monitor connection alive.
+ */
+static void delayed_work(struct work_struct *work)
+{
+	struct ceph_mon_client *monc =
+		container_of(work, struct ceph_mon_client, delayed_work.work);
+
+	dout("monc delayed_work\n");
+	mutex_lock(&monc->mutex);
+	if (monc->hunting) {
+		__close_session(monc);
+		__open_session(monc);  /* continue hunting */
+	} else {
+		ceph_con_keepalive(monc->con);
+
+		__validate_auth(monc);
+
+		if (monc->auth->ops->is_authenticated(monc->auth))
+			__send_subscribe(monc);
+	}
+	__schedule_delayed(monc);
+	mutex_unlock(&monc->mutex);
+}
+
+/*
+ * On startup, we build a temporary monmap populated with the IPs
+ * provided by mount(2).
+ */
+static int build_initial_monmap(struct ceph_mon_client *monc)
+{
+	struct ceph_options *opt = monc->client->options;
+	struct ceph_entity_addr *mon_addr = opt->mon_addr;
+	int num_mon = opt->num_mon;
+	int i;
+
+	/* build initial monmap */
+	monc->monmap = kzalloc(sizeof(*monc->monmap) +
+			       num_mon*sizeof(monc->monmap->mon_inst[0]),
+			       GFP_KERNEL);
+	if (!monc->monmap)
+		return -ENOMEM;
+	for (i = 0; i < num_mon; i++) {
+		monc->monmap->mon_inst[i].addr = mon_addr[i];
+		monc->monmap->mon_inst[i].addr.nonce = 0;
+		monc->monmap->mon_inst[i].name.type =
+			CEPH_ENTITY_TYPE_MON;
+		monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
+	}
+	monc->monmap->num_mon = num_mon;
+	monc->have_fsid = false;
+	return 0;
+}
+
+int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
+{
+	int err = 0;
+
+	dout("init\n");
+	memset(monc, 0, sizeof(*monc));
+	monc->client = cl;
+	monc->monmap = NULL;
+	mutex_init(&monc->mutex);
+
+	err = build_initial_monmap(monc);
+	if (err)
+		goto out;
+
+	monc->con = NULL;
+
+	/* authentication */
+	monc->auth = ceph_auth_init(cl->options->name,
+				    cl->options->secret);
+	if (IS_ERR(monc->auth))
+		return PTR_ERR(monc->auth);
+	monc->auth->want_keys =
+		CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
+		CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
+
+	/* msgs */
+	err = -ENOMEM;
+	monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
+				     sizeof(struct ceph_mon_subscribe_ack),
+				     GFP_NOFS);
+	if (!monc->m_subscribe_ack)
+		goto out_monmap;
+
+	monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS);
+	if (!monc->m_subscribe)
+		goto out_subscribe_ack;
+
+	monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS);
+	if (!monc->m_auth_reply)
+		goto out_subscribe;
+
+	monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS);
+	monc->pending_auth = 0;
+	if (!monc->m_auth)
+		goto out_auth_reply;
+
+	monc->cur_mon = -1;
+	monc->hunting = true;
+	monc->sub_renew_after = jiffies;
+	monc->sub_sent = 0;
+
+	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
+	monc->generic_request_tree = RB_ROOT;
+	monc->num_generic_requests = 0;
+	monc->last_tid = 0;
+
+	monc->have_mdsmap = 0;
+	monc->have_osdmap = 0;
+	monc->want_next_osdmap = 1;
+	return 0;
+
+out_auth_reply:
+	ceph_msg_put(monc->m_auth_reply);
+out_subscribe:
+	ceph_msg_put(monc->m_subscribe);
+out_subscribe_ack:
+	ceph_msg_put(monc->m_subscribe_ack);
+out_monmap:
+	kfree(monc->monmap);
+out:
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_init);
+
+void ceph_monc_stop(struct ceph_mon_client *monc)
+{
+	dout("stop\n");
+	cancel_delayed_work_sync(&monc->delayed_work);
+
+	mutex_lock(&monc->mutex);
+	__close_session(monc);
+	if (monc->con) {
+		monc->con->private = NULL;
+		monc->con->ops->put(monc->con);
+		monc->con = NULL;
+	}
+	mutex_unlock(&monc->mutex);
+
+	ceph_auth_destroy(monc->auth);
+
+	ceph_msg_put(monc->m_auth);
+	ceph_msg_put(monc->m_auth_reply);
+	ceph_msg_put(monc->m_subscribe);
+	ceph_msg_put(monc->m_subscribe_ack);
+
+	kfree(monc->monmap);
+}
+EXPORT_SYMBOL(ceph_monc_stop);
+
+static void handle_auth_reply(struct ceph_mon_client *monc,
+			      struct ceph_msg *msg)
+{
+	int ret;
+	int was_auth = 0;
+
+	mutex_lock(&monc->mutex);
+	if (monc->auth->ops)
+		was_auth = monc->auth->ops->is_authenticated(monc->auth);
+	monc->pending_auth = 0;
+	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
+				     msg->front.iov_len,
+				     monc->m_auth->front.iov_base,
+				     monc->m_auth->front_max);
+	if (ret < 0) {
+		monc->client->auth_err = ret;
+		wake_up_all(&monc->client->auth_wq);
+	} else if (ret > 0) {
+		__send_prepared_auth_request(monc, ret);
+	} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
+		dout("authenticated, starting session\n");
+
+		monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+		monc->client->msgr->inst.name.num =
+					cpu_to_le64(monc->auth->global_id);
+
+		__send_subscribe(monc);
+		__resend_generic_request(monc);
+	}
+	mutex_unlock(&monc->mutex);
+}
+
+static int __validate_auth(struct ceph_mon_client *monc)
+{
+	int ret;
+
+	if (monc->pending_auth)
+		return 0;
+
+	ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
+			      monc->m_auth->front_max);
+	if (ret <= 0)
+		return ret; /* either an error, or no need to authenticate */
+	__send_prepared_auth_request(monc, ret);
+	return 0;
+}
+
+int ceph_monc_validate_auth(struct ceph_mon_client *monc)
+{
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	ret = __validate_auth(monc);
+	mutex_unlock(&monc->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_monc_validate_auth);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_mon_client *monc = con->private;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	if (!monc)
+		return;
+
+	switch (type) {
+	case CEPH_MSG_AUTH_REPLY:
+		handle_auth_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_MON_SUBSCRIBE_ACK:
+		handle_subscribe_ack(monc, msg);
+		break;
+
+	case CEPH_MSG_STATFS_REPLY:
+		handle_statfs_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_POOLOP_REPLY:
+		handle_poolop_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_MON_MAP:
+		ceph_monc_handle_map(monc, msg);
+		break;
+
+	case CEPH_MSG_OSD_MAP:
+		ceph_osdc_handle_map(&monc->client->osdc, msg);
+		break;
+
+	default:
+		/* can the chained handler handle it? */
+		if (monc->client->extra_mon_dispatch &&
+		    monc->client->extra_mon_dispatch(monc->client, msg) == 0)
+			break;
+			
+		pr_err("received unknown message type %d %s\n", type,
+		       ceph_msg_type_name(type));
+	}
+	ceph_msg_put(msg);
+}
+
+/*
+ * Allocate memory for incoming message
+ */
+static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
+				      struct ceph_msg_header *hdr,
+				      int *skip)
+{
+	struct ceph_mon_client *monc = con->private;
+	int type = le16_to_cpu(hdr->type);
+	int front_len = le32_to_cpu(hdr->front_len);
+	struct ceph_msg *m = NULL;
+
+	*skip = 0;
+
+	switch (type) {
+	case CEPH_MSG_MON_SUBSCRIBE_ACK:
+		m = ceph_msg_get(monc->m_subscribe_ack);
+		break;
+	case CEPH_MSG_POOLOP_REPLY:
+	case CEPH_MSG_STATFS_REPLY:
+		return get_generic_reply(con, hdr, skip);
+	case CEPH_MSG_AUTH_REPLY:
+		m = ceph_msg_get(monc->m_auth_reply);
+		break;
+	case CEPH_MSG_MON_MAP:
+	case CEPH_MSG_MDS_MAP:
+	case CEPH_MSG_OSD_MAP:
+		m = ceph_msg_new(type, front_len, GFP_NOFS);
+		break;
+	}
+
+	if (!m) {
+		pr_info("alloc_msg unknown type %d\n", type);
+		*skip = 1;
+	}
+	return m;
+}
+
+/*
+ * If the monitor connection resets, pick a new monitor and resubmit
+ * any pending requests.
+ */
+static void mon_fault(struct ceph_connection *con)
+{
+	struct ceph_mon_client *monc = con->private;
+
+	if (!monc)
+		return;
+
+	dout("mon_fault\n");
+	mutex_lock(&monc->mutex);
+	if (!con->private)
+		goto out;
+
+	if (monc->con && !monc->hunting)
+		pr_info("mon%d %s session lost, "
+			"hunting for new mon\n", monc->cur_mon,
+			ceph_pr_addr(&monc->con->peer_addr.in_addr));
+
+	__close_session(monc);
+	if (!monc->hunting) {
+		/* start hunting */
+		monc->hunting = true;
+		__open_session(monc);
+	} else {
+		/* already hunting, let's wait a bit */
+		__schedule_delayed(monc);
+	}
+out:
+	mutex_unlock(&monc->mutex);
+}
+
+static const struct ceph_connection_operations mon_con_ops = {
+	.get = ceph_con_get,
+	.put = ceph_con_put,
+	.dispatch = dispatch,
+	.fault = mon_fault,
+	.alloc_msg = mon_alloc_msg,
+};
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
new file mode 100644
index 000000000000..d5f2d97ac05c
--- /dev/null
+++ b/net/ceph/msgpool.c
@@ -0,0 +1,64 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include <linux/ceph/msgpool.h>
+
+static void *alloc_fn(gfp_t gfp_mask, void *arg)
+{
+	struct ceph_msgpool *pool = arg;
+	void *p;
+
+	p = ceph_msg_new(0, pool->front_len, gfp_mask);
+	if (!p)
+		pr_err("msgpool %s alloc failed\n", pool->name);
+	return p;
+}
+
+static void free_fn(void *element, void *arg)
+{
+	ceph_msg_put(element);
+}
+
+int ceph_msgpool_init(struct ceph_msgpool *pool,
+		      int front_len, int size, bool blocking, const char *name)
+{
+	pool->front_len = front_len;
+	pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
+	if (!pool->pool)
+		return -ENOMEM;
+	pool->name = name;
+	return 0;
+}
+
+void ceph_msgpool_destroy(struct ceph_msgpool *pool)
+{
+	mempool_destroy(pool->pool);
+}
+
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
+				  int front_len)
+{
+	if (front_len > pool->front_len) {
+		pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
+		       pool->name, front_len, pool->front_len);
+		WARN_ON(1);
+
+		/* try to alloc a fresh message */
+		return ceph_msg_new(0, front_len, GFP_NOFS);
+	}
+
+	return mempool_alloc(pool->pool, GFP_NOFS);
+}
+
+void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
+{
+	/* reset msg front_len; user may have changed it */
+	msg->front.iov_len = pool->front_len;
+	msg->hdr.front_len = cpu_to_le32(pool->front_len);
+
+	kref_init(&msg->kref);  /* retake single ref */
+}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
new file mode 100644
index 000000000000..79391994b3ed
--- /dev/null
+++ b/net/ceph/osd_client.c
@@ -0,0 +1,1773 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_BLOCK
+#include <linux/bio.h>
+#endif
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
+
+#define OSD_OP_FRONT_LEN	4096
+#define OSD_OPREPLY_FRONT_LEN	512
+
+static const struct ceph_connection_operations osd_con_ops;
+static int __kick_requests(struct ceph_osd_client *osdc,
+			  struct ceph_osd *kickosd);
+
+static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+
+static int op_needs_trail(int op)
+{
+	switch (op) {
+	case CEPH_OSD_OP_GETXATTR:
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+	case CEPH_OSD_OP_CALL:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int op_has_extent(int op)
+{
+	return (op == CEPH_OSD_OP_READ ||
+		op == CEPH_OSD_OP_WRITE);
+}
+
+void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+			struct ceph_file_layout *layout,
+			u64 snapid,
+			u64 off, u64 *plen, u64 *bno,
+			struct ceph_osd_request *req,
+			struct ceph_osd_req_op *op)
+{
+	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
+	u64 orig_len = *plen;
+	u64 objoff, objlen;    /* extent in object */
+
+	reqhead->snapid = cpu_to_le64(snapid);
+
+	/* object extent? */
+	ceph_calc_file_object_mapping(layout, off, plen, bno,
+				      &objoff, &objlen);
+	if (*plen < orig_len)
+		dout(" skipping last %llu, final file extent %llu~%llu\n",
+		     orig_len - *plen, off, *plen);
+
+	if (op_has_extent(op->op)) {
+		op->extent.offset = objoff;
+		op->extent.length = objlen;
+	}
+	req->r_num_pages = calc_pages_for(off, *plen);
+	if (op->op == CEPH_OSD_OP_WRITE)
+		op->payload_len = *plen;
+
+	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
+	     *bno, objoff, objlen, req->r_num_pages);
+
+}
+EXPORT_SYMBOL(ceph_calc_raw_layout);
+
+/*
+ * Implement client access to distributed object storage cluster.
+ *
+ * All data objects are stored within a cluster/cloud of OSDs, or
+ * "object storage devices."  (Note that Ceph OSDs have _nothing_ to
+ * do with the T10 OSD extensions to SCSI.)  Ceph OSDs are simply
+ * remote daemons serving up and coordinating consistent and safe
+ * access to storage.
+ *
+ * Cluster membership and the mapping of data objects onto storage devices
+ * are described by the osd map.
+ *
+ * We keep track of pending OSD requests (read, write), resubmit
+ * requests to different OSDs when the cluster topology/data layout
+ * change, or retry the affected requests when the communications
+ * channel with an OSD is reset.
+ */
+
+/*
+ * calculate the mapping of a file extent onto an object, and fill out the
+ * request accordingly.  shorten extent as necessary if it crosses an
+ * object boundary.
+ *
+ * fill osd op in request message.
+ */
+static void calc_layout(struct ceph_osd_client *osdc,
+			struct ceph_vino vino,
+			struct ceph_file_layout *layout,
+			u64 off, u64 *plen,
+			struct ceph_osd_request *req,
+			struct ceph_osd_req_op *op)
+{
+	u64 bno;
+
+	ceph_calc_raw_layout(osdc, layout, vino.snap, off,
+			     plen, &bno, req, op);
+
+	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
+	req->r_oid_len = strlen(req->r_oid);
+}
+
+/*
+ * requests
+ */
+void ceph_osdc_release_request(struct kref *kref)
+{
+	struct ceph_osd_request *req = container_of(kref,
+						    struct ceph_osd_request,
+						    r_kref);
+
+	if (req->r_request)
+		ceph_msg_put(req->r_request);
+	if (req->r_reply)
+		ceph_msg_put(req->r_reply);
+	if (req->r_con_filling_msg) {
+		dout("release_request revoking pages %p from con %p\n",
+		     req->r_pages, req->r_con_filling_msg);
+		ceph_con_revoke_message(req->r_con_filling_msg,
+				      req->r_reply);
+		ceph_con_put(req->r_con_filling_msg);
+	}
+	if (req->r_own_pages)
+		ceph_release_page_vector(req->r_pages,
+					 req->r_num_pages);
+#ifdef CONFIG_BLOCK
+	if (req->r_bio)
+		bio_put(req->r_bio);
+#endif
+	ceph_put_snap_context(req->r_snapc);
+	if (req->r_trail) {
+		ceph_pagelist_release(req->r_trail);
+		kfree(req->r_trail);
+	}
+	if (req->r_mempool)
+		mempool_free(req, req->r_osdc->req_mempool);
+	else
+		kfree(req);
+}
+EXPORT_SYMBOL(ceph_osdc_release_request);
+
+static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
+{
+	int i = 0;
+
+	if (needs_trail)
+		*needs_trail = 0;
+	while (ops[i].op) {
+		if (needs_trail && op_needs_trail(ops[i].op))
+			*needs_trail = 1;
+		i++;
+	}
+
+	return i;
+}
+
+struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+					       int flags,
+					       struct ceph_snap_context *snapc,
+					       struct ceph_osd_req_op *ops,
+					       bool use_mempool,
+					       gfp_t gfp_flags,
+					       struct page **pages,
+					       struct bio *bio)
+{
+	struct ceph_osd_request *req;
+	struct ceph_msg *msg;
+	int needs_trail;
+	int num_op = get_num_ops(ops, &needs_trail);
+	size_t msg_size = sizeof(struct ceph_osd_request_head);
+
+	msg_size += num_op*sizeof(struct ceph_osd_op);
+
+	if (use_mempool) {
+		req = mempool_alloc(osdc->req_mempool, gfp_flags);
+		memset(req, 0, sizeof(*req));
+	} else {
+		req = kzalloc(sizeof(*req), gfp_flags);
+	}
+	if (req == NULL)
+		return NULL;
+
+	req->r_osdc = osdc;
+	req->r_mempool = use_mempool;
+
+	kref_init(&req->r_kref);
+	init_completion(&req->r_completion);
+	init_completion(&req->r_safe_completion);
+	INIT_LIST_HEAD(&req->r_unsafe_item);
+	req->r_flags = flags;
+
+	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
+
+	/* create reply message */
+	if (use_mempool)
+		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
+	else
+		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
+				   OSD_OPREPLY_FRONT_LEN, gfp_flags);
+	if (!msg) {
+		ceph_osdc_put_request(req);
+		return NULL;
+	}
+	req->r_reply = msg;
+
+	/* allocate space for the trailing data */
+	if (needs_trail) {
+		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
+		if (!req->r_trail) {
+			ceph_osdc_put_request(req);
+			return NULL;
+		}
+		ceph_pagelist_init(req->r_trail);
+	}
+	/* create request message; allow space for oid */
+	msg_size += 40;
+	if (snapc)
+		msg_size += sizeof(u64) * snapc->num_snaps;
+	if (use_mempool)
+		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
+	else
+		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags);
+	if (!msg) {
+		ceph_osdc_put_request(req);
+		return NULL;
+	}
+
+	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
+	memset(msg->front.iov_base, 0, msg->front.iov_len);
+
+	req->r_request = msg;
+	req->r_pages = pages;
+#ifdef CONFIG_BLOCK
+	if (bio) {
+		req->r_bio = bio;
+		bio_get(req->r_bio);
+	}
+#endif
+
+	return req;
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_request);
+
+static void osd_req_encode_op(struct ceph_osd_request *req,
+			      struct ceph_osd_op *dst,
+			      struct ceph_osd_req_op *src)
+{
+	dst->op = cpu_to_le16(src->op);
+
+	switch (dst->op) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		dst->extent.offset =
+			cpu_to_le64(src->extent.offset);
+		dst->extent.length =
+			cpu_to_le64(src->extent.length);
+		dst->extent.truncate_size =
+			cpu_to_le64(src->extent.truncate_size);
+		dst->extent.truncate_seq =
+			cpu_to_le32(src->extent.truncate_seq);
+		break;
+
+	case CEPH_OSD_OP_GETXATTR:
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		BUG_ON(!req->r_trail);
+
+		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+		dst->xattr.cmp_op = src->xattr.cmp_op;
+		dst->xattr.cmp_mode = src->xattr.cmp_mode;
+		ceph_pagelist_append(req->r_trail, src->xattr.name,
+				     src->xattr.name_len);
+		ceph_pagelist_append(req->r_trail, src->xattr.val,
+				     src->xattr.value_len);
+		break;
+	case CEPH_OSD_OP_CALL:
+		BUG_ON(!req->r_trail);
+
+		dst->cls.class_len = src->cls.class_len;
+		dst->cls.method_len = src->cls.method_len;
+		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
+
+		ceph_pagelist_append(req->r_trail, src->cls.class_name,
+				     src->cls.class_len);
+		ceph_pagelist_append(req->r_trail, src->cls.method_name,
+				     src->cls.method_len);
+		ceph_pagelist_append(req->r_trail, src->cls.indata,
+				     src->cls.indata_len);
+		break;
+	case CEPH_OSD_OP_ROLLBACK:
+		dst->snap.snapid = cpu_to_le64(src->snap.snapid);
+		break;
+	case CEPH_OSD_OP_STARTSYNC:
+		break;
+	default:
+		pr_err("unrecognized osd opcode %d\n", dst->op);
+		WARN_ON(1);
+		break;
+	}
+	dst->payload_len = cpu_to_le32(src->payload_len);
+}
+
+/*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req,
+			     u64 off, u64 *plen,
+			     struct ceph_osd_req_op *src_ops,
+			     struct ceph_snap_context *snapc,
+			     struct timespec *mtime,
+			     const char *oid,
+			     int oid_len)
+{
+	struct ceph_msg *msg = req->r_request;
+	struct ceph_osd_request_head *head;
+	struct ceph_osd_req_op *src_op;
+	struct ceph_osd_op *op;
+	void *p;
+	int num_op = get_num_ops(src_ops, NULL);
+	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
+	int flags = req->r_flags;
+	u64 data_len = 0;
+	int i;
+
+	head = msg->front.iov_base;
+	op = (void *)(head + 1);
+	p = (void *)(op + num_op);
+
+	req->r_snapc = ceph_get_snap_context(snapc);
+
+	head->client_inc = cpu_to_le32(1); /* always, for now. */
+	head->flags = cpu_to_le32(flags);
+	if (flags & CEPH_OSD_FLAG_WRITE)
+		ceph_encode_timespec(&head->mtime, mtime);
+	head->num_ops = cpu_to_le16(num_op);
+
+
+	/* fill in oid */
+	head->object_len = cpu_to_le32(oid_len);
+	memcpy(p, oid, oid_len);
+	p += oid_len;
+
+	src_op = src_ops;
+	while (src_op->op) {
+		osd_req_encode_op(req, op, src_op);
+		src_op++;
+		op++;
+	}
+
+	if (req->r_trail)
+		data_len += req->r_trail->length;
+
+	if (snapc) {
+		head->snap_seq = cpu_to_le64(snapc->seq);
+		head->num_snaps = cpu_to_le32(snapc->num_snaps);
+		for (i = 0; i < snapc->num_snaps; i++) {
+			put_unaligned_le64(snapc->snaps[i], p);
+			p += sizeof(u64);
+		}
+	}
+
+	if (flags & CEPH_OSD_FLAG_WRITE) {
+		req->r_request->hdr.data_off = cpu_to_le16(off);
+		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
+	} else if (data_len) {
+		req->r_request->hdr.data_off = 0;
+		req->r_request->hdr.data_len = cpu_to_le32(data_len);
+	}
+
+	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+	msg_size = p - msg->front.iov_base;
+	msg->front.iov_len = msg_size;
+	msg->hdr.front_len = cpu_to_le32(msg_size);
+	return;
+}
+EXPORT_SYMBOL(ceph_osdc_build_request);
+
+/*
+ * build new request AND message, calculate layout, and adjust file
+ * extent as needed.
+ *
+ * if the file was recently truncated, we include information about its
+ * old and new size so that the object can be updated appropriately.  (we
+ * avoid synchronously deleting truncated objects because it's slow.)
+ *
+ * if @do_sync, include a 'startsync' command so that the osd will flush
+ * data quickly.
+ */
+struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
+					       struct ceph_file_layout *layout,
+					       struct ceph_vino vino,
+					       u64 off, u64 *plen,
+					       int opcode, int flags,
+					       struct ceph_snap_context *snapc,
+					       int do_sync,
+					       u32 truncate_seq,
+					       u64 truncate_size,
+					       struct timespec *mtime,
+					       bool use_mempool, int num_reply)
+{
+	struct ceph_osd_req_op ops[3];
+	struct ceph_osd_request *req;
+
+	ops[0].op = opcode;
+	ops[0].extent.truncate_seq = truncate_seq;
+	ops[0].extent.truncate_size = truncate_size;
+	ops[0].payload_len = 0;
+
+	if (do_sync) {
+		ops[1].op = CEPH_OSD_OP_STARTSYNC;
+		ops[1].payload_len = 0;
+		ops[2].op = 0;
+	} else
+		ops[1].op = 0;
+
+	req = ceph_osdc_alloc_request(osdc, flags,
+					 snapc, ops,
+					 use_mempool,
+					 GFP_NOFS, NULL, NULL);
+	if (IS_ERR(req))
+		return req;
+
+	/* calculate max write size */
+	calc_layout(osdc, vino, layout, off, plen, req, ops);
+	req->r_file_layout = *layout;  /* keep a copy */
+
+	ceph_osdc_build_request(req, off, plen, ops,
+				snapc,
+				mtime,
+				req->r_oid, req->r_oid_len);
+
+	return req;
+}
+EXPORT_SYMBOL(ceph_osdc_new_request);
+
+/*
+ * We keep osd requests in an rbtree, sorted by ->r_tid.
+ */
+static void __insert_request(struct ceph_osd_client *osdc,
+			     struct ceph_osd_request *new)
+{
+	struct rb_node **p = &osdc->requests.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_request *req = NULL;
+
+	while (*p) {
+		parent = *p;
+		req = rb_entry(parent, struct ceph_osd_request, r_node);
+		if (new->r_tid < req->r_tid)
+			p = &(*p)->rb_left;
+		else if (new->r_tid > req->r_tid)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->r_node, parent, p);
+	rb_insert_color(&new->r_node, &osdc->requests);
+}
+
+static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc,
+						 u64 tid)
+{
+	struct ceph_osd_request *req;
+	struct rb_node *n = osdc->requests.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_osd_request, r_node);
+		if (tid < req->r_tid)
+			n = n->rb_left;
+		else if (tid > req->r_tid)
+			n = n->rb_right;
+		else
+			return req;
+	}
+	return NULL;
+}
+
+static struct ceph_osd_request *
+__lookup_request_ge(struct ceph_osd_client *osdc,
+		    u64 tid)
+{
+	struct ceph_osd_request *req;
+	struct rb_node *n = osdc->requests.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_osd_request, r_node);
+		if (tid < req->r_tid) {
+			if (!n->rb_left)
+				return req;
+			n = n->rb_left;
+		} else if (tid > req->r_tid) {
+			n = n->rb_right;
+		} else {
+			return req;
+		}
+	}
+	return NULL;
+}
+
+
+/*
+ * If the osd connection drops, we need to resubmit all requests.
+ */
+static void osd_reset(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc;
+
+	if (!osd)
+		return;
+	dout("osd_reset osd%d\n", osd->o_osd);
+	osdc = osd->o_osdc;
+	down_read(&osdc->map_sem);
+	kick_requests(osdc, osd);
+	up_read(&osdc->map_sem);
+}
+
+/*
+ * Track open sessions with osds.
+ */
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd *osd;
+
+	osd = kzalloc(sizeof(*osd), GFP_NOFS);
+	if (!osd)
+		return NULL;
+
+	atomic_set(&osd->o_ref, 1);
+	osd->o_osdc = osdc;
+	INIT_LIST_HEAD(&osd->o_requests);
+	INIT_LIST_HEAD(&osd->o_osd_lru);
+	osd->o_incarnation = 1;
+
+	ceph_con_init(osdc->client->msgr, &osd->o_con);
+	osd->o_con.private = osd;
+	osd->o_con.ops = &osd_con_ops;
+	osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+
+	INIT_LIST_HEAD(&osd->o_keepalive_item);
+	return osd;
+}
+
+static struct ceph_osd *get_osd(struct ceph_osd *osd)
+{
+	if (atomic_inc_not_zero(&osd->o_ref)) {
+		dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
+		     atomic_read(&osd->o_ref));
+		return osd;
+	} else {
+		dout("get_osd %p FAIL\n", osd);
+		return NULL;
+	}
+}
+
+static void put_osd(struct ceph_osd *osd)
+{
+	dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
+	     atomic_read(&osd->o_ref) - 1);
+	if (atomic_dec_and_test(&osd->o_ref)) {
+		struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
+
+		if (osd->o_authorizer)
+			ac->ops->destroy_authorizer(ac, osd->o_authorizer);
+		kfree(osd);
+	}
+}
+
+/*
+ * remove an osd from our map
+ */
+static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+	dout("__remove_osd %p\n", osd);
+	BUG_ON(!list_empty(&osd->o_requests));
+	rb_erase(&osd->o_node, &osdc->osds);
+	list_del_init(&osd->o_osd_lru);
+	ceph_con_close(&osd->o_con);
+	put_osd(osd);
+}
+
+static void __move_osd_to_lru(struct ceph_osd_client *osdc,
+			      struct ceph_osd *osd)
+{
+	dout("__move_osd_to_lru %p\n", osd);
+	BUG_ON(!list_empty(&osd->o_osd_lru));
+	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
+	osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+}
+
+static void __remove_osd_from_lru(struct ceph_osd *osd)
+{
+	dout("__remove_osd_from_lru %p\n", osd);
+	if (!list_empty(&osd->o_osd_lru))
+		list_del_init(&osd->o_osd_lru);
+}
+
+static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
+{
+	struct ceph_osd *osd, *nosd;
+
+	dout("__remove_old_osds %p\n", osdc);
+	mutex_lock(&osdc->request_mutex);
+	list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
+		if (!remove_all && time_before(jiffies, osd->lru_ttl))
+			break;
+		__remove_osd(osdc, osd);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
+/*
+ * reset osd connect
+ */
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+	struct ceph_osd_request *req;
+	int ret = 0;
+
+	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
+	if (list_empty(&osd->o_requests)) {
+		__remove_osd(osdc, osd);
+	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
+			  &osd->o_con.peer_addr,
+			  sizeof(osd->o_con.peer_addr)) == 0 &&
+		   !ceph_con_opened(&osd->o_con)) {
+		dout(" osd addr hasn't changed and connection never opened,"
+		     " letting msgr retry");
+		/* touch each r_stamp for handle_timeout()'s benfit */
+		list_for_each_entry(req, &osd->o_requests, r_osd_item)
+			req->r_stamp = jiffies;
+		ret = -EAGAIN;
+	} else {
+		ceph_con_close(&osd->o_con);
+		ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
+		osd->o_incarnation++;
+	}
+	return ret;
+}
+
+static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
+{
+	struct rb_node **p = &osdc->osds.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd *osd = NULL;
+
+	while (*p) {
+		parent = *p;
+		osd = rb_entry(parent, struct ceph_osd, o_node);
+		if (new->o_osd < osd->o_osd)
+			p = &(*p)->rb_left;
+		else if (new->o_osd > osd->o_osd)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->o_node, parent, p);
+	rb_insert_color(&new->o_node, &osdc->osds);
+}
+
+static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
+{
+	struct ceph_osd *osd;
+	struct rb_node *n = osdc->osds.rb_node;
+
+	while (n) {
+		osd = rb_entry(n, struct ceph_osd, o_node);
+		if (o < osd->o_osd)
+			n = n->rb_left;
+		else if (o > osd->o_osd)
+			n = n->rb_right;
+		else
+			return osd;
+	}
+	return NULL;
+}
+
+static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
+{
+	schedule_delayed_work(&osdc->timeout_work,
+			osdc->client->options->osd_keepalive_timeout * HZ);
+}
+
+static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
+{
+	cancel_delayed_work(&osdc->timeout_work);
+}
+
+/*
+ * Register request, assign tid.  If this is the first request, set up
+ * the timeout event.
+ */
+static void register_request(struct ceph_osd_client *osdc,
+			     struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	req->r_tid = ++osdc->last_tid;
+	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
+	INIT_LIST_HEAD(&req->r_req_lru_item);
+
+	dout("register_request %p tid %lld\n", req, req->r_tid);
+	__insert_request(osdc, req);
+	ceph_osdc_get_request(req);
+	osdc->num_requests++;
+
+	if (osdc->num_requests == 1) {
+		dout(" first request, scheduling timeout\n");
+		__schedule_osd_timeout(osdc);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
+/*
+ * called under osdc->request_mutex
+ */
+static void __unregister_request(struct ceph_osd_client *osdc,
+				 struct ceph_osd_request *req)
+{
+	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+	rb_erase(&req->r_node, &osdc->requests);
+	osdc->num_requests--;
+
+	if (req->r_osd) {
+		/* make sure the original request isn't in flight. */
+		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+
+		list_del_init(&req->r_osd_item);
+		if (list_empty(&req->r_osd->o_requests))
+			__move_osd_to_lru(osdc, req->r_osd);
+		req->r_osd = NULL;
+	}
+
+	ceph_osdc_put_request(req);
+
+	list_del_init(&req->r_req_lru_item);
+	if (osdc->num_requests == 0) {
+		dout(" no requests, canceling timeout\n");
+		__cancel_osd_timeout(osdc);
+	}
+}
+
+/*
+ * Cancel a previously queued request message
+ */
+static void __cancel_request(struct ceph_osd_request *req)
+{
+	if (req->r_sent && req->r_osd) {
+		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+		req->r_sent = 0;
+	}
+	list_del_init(&req->r_req_lru_item);
+}
+
+/*
+ * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
+ * (as needed), and set the request r_osd appropriately.  If there is
+ * no up osd, set r_osd to NULL.
+ *
+ * Return 0 if unchanged, 1 if changed, or negative on error.
+ *
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __map_osds(struct ceph_osd_client *osdc,
+		      struct ceph_osd_request *req)
+{
+	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
+	struct ceph_pg pgid;
+	int acting[CEPH_PG_MAX_SIZE];
+	int o = -1, num = 0;
+	int err;
+
+	dout("map_osds %p tid %lld\n", req, req->r_tid);
+	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
+				      &req->r_file_layout, osdc->osdmap);
+	if (err)
+		return err;
+	pgid = reqhead->layout.ol_pgid;
+	req->r_pgid = pgid;
+
+	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
+	if (err > 0) {
+		o = acting[0];
+		num = err;
+	}
+
+	if ((req->r_osd && req->r_osd->o_osd == o &&
+	     req->r_sent >= req->r_osd->o_incarnation &&
+	     req->r_num_pg_osds == num &&
+	     memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
+	    (req->r_osd == NULL && o == -1))
+		return 0;  /* no change */
+
+	dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
+	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
+	     req->r_osd ? req->r_osd->o_osd : -1);
+
+	/* record full pg acting set */
+	memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+	req->r_num_pg_osds = num;
+
+	if (req->r_osd) {
+		__cancel_request(req);
+		list_del_init(&req->r_osd_item);
+		req->r_osd = NULL;
+	}
+
+	req->r_osd = __lookup_osd(osdc, o);
+	if (!req->r_osd && o >= 0) {
+		err = -ENOMEM;
+		req->r_osd = create_osd(osdc);
+		if (!req->r_osd)
+			goto out;
+
+		dout("map_osds osd %p is osd%d\n", req->r_osd, o);
+		req->r_osd->o_osd = o;
+		req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
+		__insert_osd(osdc, req->r_osd);
+
+		ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
+	}
+
+	if (req->r_osd) {
+		__remove_osd_from_lru(req->r_osd);
+		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+	}
+	err = 1;   /* osd or pg changed */
+
+out:
+	return err;
+}
+
+/*
+ * caller should hold map_sem (for read) and request_mutex
+ */
+static int __send_request(struct ceph_osd_client *osdc,
+			  struct ceph_osd_request *req)
+{
+	struct ceph_osd_request_head *reqhead;
+	int err;
+
+	err = __map_osds(osdc, req);
+	if (err < 0)
+		return err;
+	if (req->r_osd == NULL) {
+		dout("send_request %p no up osds in pg\n", req);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+		return 0;
+	}
+
+	dout("send_request %p tid %llu to osd%d flags %d\n",
+	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
+
+	reqhead = req->r_request->front.iov_base;
+	reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
+	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
+	reqhead->reassert_version = req->r_reassert_version;
+
+	req->r_stamp = jiffies;
+	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
+
+	ceph_msg_get(req->r_request); /* send consumes a ref */
+	ceph_con_send(&req->r_osd->o_con, req->r_request);
+	req->r_sent = req->r_osd->o_incarnation;
+	return 0;
+}
+
+/*
+ * Timeout callback, called every N seconds when 1 or more osd
+ * requests has been active for more than N seconds.  When this
+ * happens, we ping all OSDs with requests who have timed out to
+ * ensure any communications channel reset is detected.  Reset the
+ * request timeouts another N seconds in the future as we go.
+ * Reschedule the timeout event another N seconds in future (unless
+ * there are no open requests).
+ */
+static void handle_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client, timeout_work.work);
+	struct ceph_osd_request *req, *last_req = NULL;
+	struct ceph_osd *osd;
+	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
+	unsigned long keepalive =
+		osdc->client->options->osd_keepalive_timeout * HZ;
+	unsigned long last_stamp = 0;
+	struct rb_node *p;
+	struct list_head slow_osds;
+
+	dout("timeout\n");
+	down_read(&osdc->map_sem);
+
+	ceph_monc_request_next_osdmap(&osdc->client->monc);
+
+	mutex_lock(&osdc->request_mutex);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+
+		if (req->r_resend) {
+			int err;
+
+			dout("osdc resending prev failed %lld\n", req->r_tid);
+			err = __send_request(osdc, req);
+			if (err)
+				dout("osdc failed again on %lld\n", req->r_tid);
+			else
+				req->r_resend = false;
+			continue;
+		}
+	}
+
+	/*
+	 * reset osds that appear to be _really_ unresponsive.  this
+	 * is a failsafe measure.. we really shouldn't be getting to
+	 * this point if the system is working properly.  the monitors
+	 * should mark the osd as failed and we should find out about
+	 * it from an updated osd map.
+	 */
+	while (timeout && !list_empty(&osdc->req_lru)) {
+		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
+				 r_req_lru_item);
+
+		if (time_before(jiffies, req->r_stamp + timeout))
+			break;
+
+		BUG_ON(req == last_req && req->r_stamp == last_stamp);
+		last_req = req;
+		last_stamp = req->r_stamp;
+
+		osd = req->r_osd;
+		BUG_ON(!osd);
+		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
+			   req->r_tid, osd->o_osd);
+		__kick_requests(osdc, osd);
+	}
+
+	/*
+	 * ping osds that are a bit slow.  this ensures that if there
+	 * is a break in the TCP connection we will notice, and reopen
+	 * a connection with that osd (from the fault callback).
+	 */
+	INIT_LIST_HEAD(&slow_osds);
+	list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
+		if (time_before(jiffies, req->r_stamp + keepalive))
+			break;
+
+		osd = req->r_osd;
+		BUG_ON(!osd);
+		dout(" tid %llu is slow, will send keepalive on osd%d\n",
+		     req->r_tid, osd->o_osd);
+		list_move_tail(&osd->o_keepalive_item, &slow_osds);
+	}
+	while (!list_empty(&slow_osds)) {
+		osd = list_entry(slow_osds.next, struct ceph_osd,
+				 o_keepalive_item);
+		list_del_init(&osd->o_keepalive_item);
+		ceph_con_keepalive(&osd->o_con);
+	}
+
+	__schedule_osd_timeout(osdc);
+	mutex_unlock(&osdc->request_mutex);
+
+	up_read(&osdc->map_sem);
+}
+
+static void handle_osds_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client,
+			     osds_timeout_work.work);
+	unsigned long delay =
+		osdc->client->options->osd_idle_ttl * HZ >> 2;
+
+	dout("osds timeout\n");
+	down_read(&osdc->map_sem);
+	remove_old_osds(osdc, 0);
+	up_read(&osdc->map_sem);
+
+	schedule_delayed_work(&osdc->osds_timeout_work,
+			      round_jiffies_relative(delay));
+}
+
+/*
+ * handle osd op reply.  either call the callback if it is specified,
+ * or do the completion to wake up the waiting thread.
+ */
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
+			 struct ceph_connection *con)
+{
+	struct ceph_osd_reply_head *rhead = msg->front.iov_base;
+	struct ceph_osd_request *req;
+	u64 tid;
+	int numops, object_len, flags;
+	s32 result;
+
+	tid = le64_to_cpu(msg->hdr.tid);
+	if (msg->front.iov_len < sizeof(*rhead))
+		goto bad;
+	numops = le32_to_cpu(rhead->num_ops);
+	object_len = le32_to_cpu(rhead->object_len);
+	result = le32_to_cpu(rhead->result);
+	if (msg->front.iov_len != sizeof(*rhead) + object_len +
+	    numops * sizeof(struct ceph_osd_op))
+		goto bad;
+	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
+
+	/* lookup */
+	mutex_lock(&osdc->request_mutex);
+	req = __lookup_request(osdc, tid);
+	if (req == NULL) {
+		dout("handle_reply tid %llu dne\n", tid);
+		mutex_unlock(&osdc->request_mutex);
+		return;
+	}
+	ceph_osdc_get_request(req);
+	flags = le32_to_cpu(rhead->flags);
+
+	/*
+	 * if this connection filled our message, drop our reference now, to
+	 * avoid a (safe but slower) revoke later.
+	 */
+	if (req->r_con_filling_msg == con && req->r_reply == msg) {
+		dout(" dropping con_filling_msg ref %p\n", con);
+		req->r_con_filling_msg = NULL;
+		ceph_con_put(con);
+	}
+
+	if (!req->r_got_reply) {
+		unsigned bytes;
+
+		req->r_result = le32_to_cpu(rhead->result);
+		bytes = le32_to_cpu(msg->hdr.data_len);
+		dout("handle_reply result %d bytes %d\n", req->r_result,
+		     bytes);
+		if (req->r_result == 0)
+			req->r_result = bytes;
+
+		/* in case this is a write and we need to replay, */
+		req->r_reassert_version = rhead->reassert_version;
+
+		req->r_got_reply = 1;
+	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
+		dout("handle_reply tid %llu dup ack\n", tid);
+		mutex_unlock(&osdc->request_mutex);
+		goto done;
+	}
+
+	dout("handle_reply tid %llu flags %d\n", tid, flags);
+
+	/* either this is a read, or we got the safe response */
+	if (result < 0 ||
+	    (flags & CEPH_OSD_FLAG_ONDISK) ||
+	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
+		__unregister_request(osdc, req);
+
+	mutex_unlock(&osdc->request_mutex);
+
+	if (req->r_callback)
+		req->r_callback(req, msg);
+	else
+		complete_all(&req->r_completion);
+
+	if (flags & CEPH_OSD_FLAG_ONDISK) {
+		if (req->r_safe_callback)
+			req->r_safe_callback(req, msg);
+		complete_all(&req->r_safe_completion);  /* fsync waiter */
+	}
+
+done:
+	ceph_osdc_put_request(req);
+	return;
+
+bad:
+	pr_err("corrupt osd_op_reply got %d %d expected %d\n",
+	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len),
+	       (int)sizeof(*rhead));
+	ceph_msg_dump(msg);
+}
+
+
+static int __kick_requests(struct ceph_osd_client *osdc,
+			  struct ceph_osd *kickosd)
+{
+	struct ceph_osd_request *req;
+	struct rb_node *p, *n;
+	int needmap = 0;
+	int err;
+
+	dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
+	if (kickosd) {
+		err = __reset_osd(osdc, kickosd);
+		if (err == -EAGAIN)
+			return 1;
+	} else {
+		for (p = rb_first(&osdc->osds); p; p = n) {
+			struct ceph_osd *osd =
+				rb_entry(p, struct ceph_osd, o_node);
+
+			n = rb_next(p);
+			if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+			    memcmp(&osd->o_con.peer_addr,
+				   ceph_osd_addr(osdc->osdmap,
+						 osd->o_osd),
+				   sizeof(struct ceph_entity_addr)) != 0)
+				__reset_osd(osdc, osd);
+		}
+	}
+
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+
+		if (req->r_resend) {
+			dout(" r_resend set on tid %llu\n", req->r_tid);
+			__cancel_request(req);
+			goto kick;
+		}
+		if (req->r_osd && kickosd == req->r_osd) {
+			__cancel_request(req);
+			goto kick;
+		}
+
+		err = __map_osds(osdc, req);
+		if (err == 0)
+			continue;  /* no change */
+		if (err < 0) {
+			/*
+			 * FIXME: really, we should set the request
+			 * error and fail if this isn't a 'nofail'
+			 * request, but that's a fair bit more
+			 * complicated to do.  So retry!
+			 */
+			dout(" setting r_resend on %llu\n", req->r_tid);
+			req->r_resend = true;
+			continue;
+		}
+		if (req->r_osd == NULL) {
+			dout("tid %llu maps to no valid osd\n", req->r_tid);
+			needmap++;  /* request a newer map */
+			continue;
+		}
+
+kick:
+		dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
+		     req->r_osd ? req->r_osd->o_osd : -1);
+		req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		err = __send_request(osdc, req);
+		if (err) {
+			dout(" setting r_resend on %llu\n", req->r_tid);
+			req->r_resend = true;
+		}
+	}
+
+	return needmap;
+}
+
+/*
+ * Resubmit osd requests whose osd or osd address has changed.  Request
+ * a new osd map if osds are down, or we are otherwise unable to determine
+ * how to direct a request.
+ *
+ * Close connections to down osds.
+ *
+ * If @who is specified, resubmit requests for that specific osd.
+ *
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static void kick_requests(struct ceph_osd_client *osdc,
+			  struct ceph_osd *kickosd)
+{
+	int needmap;
+
+	mutex_lock(&osdc->request_mutex);
+	needmap = __kick_requests(osdc, kickosd);
+	mutex_unlock(&osdc->request_mutex);
+
+	if (needmap) {
+		dout("%d requests for down osds, need new map\n", needmap);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	}
+
+}
+/*
+ * Process updated osd map.
+ *
+ * The message contains any number of incremental and full maps, normally
+ * indicating some sort of topology change in the cluster.  Kick requests
+ * off to different OSDs as needed.
+ */
+void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+	void *p, *end, *next;
+	u32 nr_maps, maplen;
+	u32 epoch;
+	struct ceph_osdmap *newmap = NULL, *oldmap;
+	int err;
+	struct ceph_fsid fsid;
+
+	dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0);
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	/* verify fsid */
+	ceph_decode_need(&p, end, sizeof(fsid), bad);
+	ceph_decode_copy(&p, &fsid, sizeof(fsid));
+	if (ceph_check_fsid(osdc->client, &fsid) < 0)
+		return;
+
+	down_write(&osdc->map_sem);
+
+	/* incremental maps */
+	ceph_decode_32_safe(&p, end, nr_maps, bad);
+	dout(" %d inc maps\n", nr_maps);
+	while (nr_maps > 0) {
+		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+		epoch = ceph_decode_32(&p);
+		maplen = ceph_decode_32(&p);
+		ceph_decode_need(&p, end, maplen, bad);
+		next = p + maplen;
+		if (osdc->osdmap && osdc->osdmap->epoch+1 == epoch) {
+			dout("applying incremental map %u len %d\n",
+			     epoch, maplen);
+			newmap = osdmap_apply_incremental(&p, next,
+							  osdc->osdmap,
+							  osdc->client->msgr);
+			if (IS_ERR(newmap)) {
+				err = PTR_ERR(newmap);
+				goto bad;
+			}
+			BUG_ON(!newmap);
+			if (newmap != osdc->osdmap) {
+				ceph_osdmap_destroy(osdc->osdmap);
+				osdc->osdmap = newmap;
+			}
+		} else {
+			dout("ignoring incremental map %u len %d\n",
+			     epoch, maplen);
+		}
+		p = next;
+		nr_maps--;
+	}
+	if (newmap)
+		goto done;
+
+	/* full maps */
+	ceph_decode_32_safe(&p, end, nr_maps, bad);
+	dout(" %d full maps\n", nr_maps);
+	while (nr_maps) {
+		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+		epoch = ceph_decode_32(&p);
+		maplen = ceph_decode_32(&p);
+		ceph_decode_need(&p, end, maplen, bad);
+		if (nr_maps > 1) {
+			dout("skipping non-latest full map %u len %d\n",
+			     epoch, maplen);
+		} else if (osdc->osdmap && osdc->osdmap->epoch >= epoch) {
+			dout("skipping full map %u len %d, "
+			     "older than our %u\n", epoch, maplen,
+			     osdc->osdmap->epoch);
+		} else {
+			dout("taking full map %u len %d\n", epoch, maplen);
+			newmap = osdmap_decode(&p, p+maplen);
+			if (IS_ERR(newmap)) {
+				err = PTR_ERR(newmap);
+				goto bad;
+			}
+			BUG_ON(!newmap);
+			oldmap = osdc->osdmap;
+			osdc->osdmap = newmap;
+			if (oldmap)
+				ceph_osdmap_destroy(oldmap);
+		}
+		p += maplen;
+		nr_maps--;
+	}
+
+done:
+	downgrade_write(&osdc->map_sem);
+	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+	if (newmap)
+		kick_requests(osdc, NULL);
+	up_read(&osdc->map_sem);
+	wake_up_all(&osdc->client->auth_wq);
+	return;
+
+bad:
+	pr_err("osdc handle_map corrupt msg\n");
+	ceph_msg_dump(msg);
+	up_write(&osdc->map_sem);
+	return;
+}
+
+/*
+ * Register request, send initial attempt.
+ */
+int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+			    struct ceph_osd_request *req,
+			    bool nofail)
+{
+	int rc = 0;
+
+	req->r_request->pages = req->r_pages;
+	req->r_request->nr_pages = req->r_num_pages;
+#ifdef CONFIG_BLOCK
+	req->r_request->bio = req->r_bio;
+#endif
+	req->r_request->trail = req->r_trail;
+
+	register_request(osdc, req);
+
+	down_read(&osdc->map_sem);
+	mutex_lock(&osdc->request_mutex);
+	/*
+	 * a racing kick_requests() may have sent the message for us
+	 * while we dropped request_mutex above, so only send now if
+	 * the request still han't been touched yet.
+	 */
+	if (req->r_sent == 0) {
+		rc = __send_request(osdc, req);
+		if (rc) {
+			if (nofail) {
+				dout("osdc_start_request failed send, "
+				     " marking %lld\n", req->r_tid);
+				req->r_resend = true;
+				rc = 0;
+			} else {
+				__unregister_request(osdc, req);
+			}
+		}
+	}
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_start_request);
+
+/*
+ * wait for a request to complete
+ */
+int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req)
+{
+	int rc;
+
+	rc = wait_for_completion_interruptible(&req->r_completion);
+	if (rc < 0) {
+		mutex_lock(&osdc->request_mutex);
+		__cancel_request(req);
+		__unregister_request(osdc, req);
+		mutex_unlock(&osdc->request_mutex);
+		dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
+		return rc;
+	}
+
+	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+	return req->r_result;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_request);
+
+/*
+ * sync - wait for all in-flight requests to flush.  avoid starvation.
+ */
+void ceph_osdc_sync(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd_request *req;
+	u64 last_tid, next_tid = 0;
+
+	mutex_lock(&osdc->request_mutex);
+	last_tid = osdc->last_tid;
+	while (1) {
+		req = __lookup_request_ge(osdc, next_tid);
+		if (!req)
+			break;
+		if (req->r_tid > last_tid)
+			break;
+
+		next_tid = req->r_tid + 1;
+		if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0)
+			continue;
+
+		ceph_osdc_get_request(req);
+		mutex_unlock(&osdc->request_mutex);
+		dout("sync waiting on tid %llu (last is %llu)\n",
+		     req->r_tid, last_tid);
+		wait_for_completion(&req->r_safe_completion);
+		mutex_lock(&osdc->request_mutex);
+		ceph_osdc_put_request(req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+	dout("sync done (thru tid %llu)\n", last_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_sync);
+
+/*
+ * init, shutdown
+ */
+int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
+{
+	int err;
+
+	dout("init\n");
+	osdc->client = client;
+	osdc->osdmap = NULL;
+	init_rwsem(&osdc->map_sem);
+	init_completion(&osdc->map_waiters);
+	osdc->last_requested_map = 0;
+	mutex_init(&osdc->request_mutex);
+	osdc->last_tid = 0;
+	osdc->osds = RB_ROOT;
+	INIT_LIST_HEAD(&osdc->osd_lru);
+	osdc->requests = RB_ROOT;
+	INIT_LIST_HEAD(&osdc->req_lru);
+	osdc->num_requests = 0;
+	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
+	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+
+	schedule_delayed_work(&osdc->osds_timeout_work,
+	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+
+	err = -ENOMEM;
+	osdc->req_mempool = mempool_create_kmalloc_pool(10,
+					sizeof(struct ceph_osd_request));
+	if (!osdc->req_mempool)
+		goto out;
+
+	err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
+				"osd_op");
+	if (err < 0)
+		goto out_mempool;
+	err = ceph_msgpool_init(&osdc->msgpool_op_reply,
+				OSD_OPREPLY_FRONT_LEN, 10, true,
+				"osd_op_reply");
+	if (err < 0)
+		goto out_msgpool;
+	return 0;
+
+out_msgpool:
+	ceph_msgpool_destroy(&osdc->msgpool_op);
+out_mempool:
+	mempool_destroy(osdc->req_mempool);
+out:
+	return err;
+}
+EXPORT_SYMBOL(ceph_osdc_init);
+
+void ceph_osdc_stop(struct ceph_osd_client *osdc)
+{
+	cancel_delayed_work_sync(&osdc->timeout_work);
+	cancel_delayed_work_sync(&osdc->osds_timeout_work);
+	if (osdc->osdmap) {
+		ceph_osdmap_destroy(osdc->osdmap);
+		osdc->osdmap = NULL;
+	}
+	remove_old_osds(osdc, 1);
+	mempool_destroy(osdc->req_mempool);
+	ceph_msgpool_destroy(&osdc->msgpool_op);
+	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
+}
+EXPORT_SYMBOL(ceph_osdc_stop);
+
+/*
+ * Read some contiguous pages.  If we cross a stripe boundary, shorten
+ * *plen.  Return number of bytes read, or error.
+ */
+int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+			struct ceph_vino vino, struct ceph_file_layout *layout,
+			u64 off, u64 *plen,
+			u32 truncate_seq, u64 truncate_size,
+			struct page **pages, int num_pages)
+{
+	struct ceph_osd_request *req;
+	int rc = 0;
+
+	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
+	     vino.snap, off, *plen);
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
+				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+				    NULL, 0, truncate_seq, truncate_size, NULL,
+				    false, 1);
+	if (!req)
+		return -ENOMEM;
+
+	/* it may be a short read due to an object boundary */
+	req->r_pages = pages;
+
+	dout("readpages  final extent is %llu~%llu (%d pages)\n",
+	     off, *plen, req->r_num_pages);
+
+	rc = ceph_osdc_start_request(osdc, req, false);
+	if (!rc)
+		rc = ceph_osdc_wait_request(osdc, req);
+
+	ceph_osdc_put_request(req);
+	dout("readpages result %d\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_readpages);
+
+/*
+ * do a synchronous write on N pages
+ */
+int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
+			 struct ceph_file_layout *layout,
+			 struct ceph_snap_context *snapc,
+			 u64 off, u64 len,
+			 u32 truncate_seq, u64 truncate_size,
+			 struct timespec *mtime,
+			 struct page **pages, int num_pages,
+			 int flags, int do_sync, bool nofail)
+{
+	struct ceph_osd_request *req;
+	int rc = 0;
+
+	BUG_ON(vino.snap != CEPH_NOSNAP);
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
+				    CEPH_OSD_OP_WRITE,
+				    flags | CEPH_OSD_FLAG_ONDISK |
+					    CEPH_OSD_FLAG_WRITE,
+				    snapc, do_sync,
+				    truncate_seq, truncate_size, mtime,
+				    nofail, 1);
+	if (!req)
+		return -ENOMEM;
+
+	/* it may be a short write due to an object boundary */
+	req->r_pages = pages;
+	dout("writepages %llu~%llu (%d pages)\n", off, len,
+	     req->r_num_pages);
+
+	rc = ceph_osdc_start_request(osdc, req, nofail);
+	if (!rc)
+		rc = ceph_osdc_wait_request(osdc, req);
+
+	ceph_osdc_put_request(req);
+	if (rc == 0)
+		rc = len;
+	dout("writepages result %d\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_writepages);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	if (!osd)
+		goto out;
+	osdc = osd->o_osdc;
+
+	switch (type) {
+	case CEPH_MSG_OSD_MAP:
+		ceph_osdc_handle_map(osdc, msg);
+		break;
+	case CEPH_MSG_OSD_OPREPLY:
+		handle_reply(osdc, msg, con);
+		break;
+
+	default:
+		pr_err("received unknown message type %d %s\n", type,
+		       ceph_msg_type_name(type));
+	}
+out:
+	ceph_msg_put(msg);
+}
+
+/*
+ * lookup and return message for incoming reply.  set up reply message
+ * pages.
+ */
+static struct ceph_msg *get_reply(struct ceph_connection *con,
+				  struct ceph_msg_header *hdr,
+				  int *skip)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc = osd->o_osdc;
+	struct ceph_msg *m;
+	struct ceph_osd_request *req;
+	int front = le32_to_cpu(hdr->front_len);
+	int data_len = le32_to_cpu(hdr->data_len);
+	u64 tid;
+
+	tid = le64_to_cpu(hdr->tid);
+	mutex_lock(&osdc->request_mutex);
+	req = __lookup_request(osdc, tid);
+	if (!req) {
+		*skip = 1;
+		m = NULL;
+		pr_info("get_reply unknown tid %llu from osd%d\n", tid,
+			osd->o_osd);
+		goto out;
+	}
+
+	if (req->r_con_filling_msg) {
+		dout("get_reply revoking msg %p from old con %p\n",
+		     req->r_reply, req->r_con_filling_msg);
+		ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
+		ceph_con_put(req->r_con_filling_msg);
+		req->r_con_filling_msg = NULL;
+	}
+
+	if (front > req->r_reply->front.iov_len) {
+		pr_warning("get_reply front %d > preallocated %d\n",
+			   front, (int)req->r_reply->front.iov_len);
+		m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS);
+		if (!m)
+			goto out;
+		ceph_msg_put(req->r_reply);
+		req->r_reply = m;
+	}
+	m = ceph_msg_get(req->r_reply);
+
+	if (data_len > 0) {
+		unsigned data_off = le16_to_cpu(hdr->data_off);
+		int want = calc_pages_for(data_off & ~PAGE_MASK, data_len);
+
+		if (unlikely(req->r_num_pages < want)) {
+			pr_warning("tid %lld reply %d > expected %d pages\n",
+				   tid, want, m->nr_pages);
+			*skip = 1;
+			ceph_msg_put(m);
+			m = NULL;
+			goto out;
+		}
+		m->pages = req->r_pages;
+		m->nr_pages = req->r_num_pages;
+#ifdef CONFIG_BLOCK
+		m->bio = req->r_bio;
+#endif
+	}
+	*skip = 0;
+	req->r_con_filling_msg = ceph_con_get(con);
+	dout("get_reply tid %lld %p\n", tid, m);
+
+out:
+	mutex_unlock(&osdc->request_mutex);
+	return m;
+
+}
+
+static struct ceph_msg *alloc_msg(struct ceph_connection *con,
+				  struct ceph_msg_header *hdr,
+				  int *skip)
+{
+	struct ceph_osd *osd = con->private;
+	int type = le16_to_cpu(hdr->type);
+	int front = le32_to_cpu(hdr->front_len);
+
+	switch (type) {
+	case CEPH_MSG_OSD_MAP:
+		return ceph_msg_new(type, front, GFP_NOFS);
+	case CEPH_MSG_OSD_OPREPLY:
+		return get_reply(con, hdr, skip);
+	default:
+		pr_info("alloc_msg unexpected msg type %d from osd%d\n", type,
+			osd->o_osd);
+		*skip = 1;
+		return NULL;
+	}
+}
+
+/*
+ * Wrappers to refcount containing ceph_osd struct
+ */
+static struct ceph_connection *get_osd_con(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	if (get_osd(osd))
+		return con;
+	return NULL;
+}
+
+static void put_osd_con(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	put_osd(osd);
+}
+
+/*
+ * authentication
+ */
+static int get_authorizer(struct ceph_connection *con,
+			  void **buf, int *len, int *proto,
+			  void **reply_buf, int *reply_len, int force_new)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+	int ret = 0;
+
+	if (force_new && o->o_authorizer) {
+		ac->ops->destroy_authorizer(ac, o->o_authorizer);
+		o->o_authorizer = NULL;
+	}
+	if (o->o_authorizer == NULL) {
+		ret = ac->ops->create_authorizer(
+			ac, CEPH_ENTITY_TYPE_OSD,
+			&o->o_authorizer,
+			&o->o_authorizer_buf,
+			&o->o_authorizer_buf_len,
+			&o->o_authorizer_reply_buf,
+			&o->o_authorizer_reply_buf_len);
+		if (ret)
+			return ret;
+	}
+
+	*proto = ac->protocol;
+	*buf = o->o_authorizer_buf;
+	*len = o->o_authorizer_buf_len;
+	*reply_buf = o->o_authorizer_reply_buf;
+	*reply_len = o->o_authorizer_reply_buf_len;
+	return 0;
+}
+
+
+static int verify_authorizer_reply(struct ceph_connection *con, int len)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
+}
+
+static int invalidate_authorizer(struct ceph_connection *con)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	if (ac->ops->invalidate_authorizer)
+		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
+
+	return ceph_monc_validate_auth(&osdc->client->monc);
+}
+
+static const struct ceph_connection_operations osd_con_ops = {
+	.get = get_osd_con,
+	.put = put_osd_con,
+	.dispatch = dispatch,
+	.get_authorizer = get_authorizer,
+	.verify_authorizer_reply = verify_authorizer_reply,
+	.invalidate_authorizer = invalidate_authorizer,
+	.alloc_msg = alloc_msg,
+	.fault = osd_reset,
+};
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
new file mode 100644
index 000000000000..d73f3f6efa36
--- /dev/null
+++ b/net/ceph/osdmap.c
@@ -0,0 +1,1128 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/div64.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/decode.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
+
+char *ceph_osdmap_state_str(char *str, int len, int state)
+{
+	int flag = 0;
+
+	if (!len)
+		goto done;
+
+	*str = '\0';
+	if (state) {
+		if (state & CEPH_OSD_EXISTS) {
+			snprintf(str, len, "exists");
+			flag = 1;
+		}
+		if (state & CEPH_OSD_UP) {
+			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
+				 "up");
+			flag = 1;
+		}
+	} else {
+		snprintf(str, len, "doesn't exist");
+	}
+done:
+	return str;
+}
+
+/* maps */
+
+static int calc_bits_of(unsigned t)
+{
+	int b = 0;
+	while (t) {
+		t = t >> 1;
+		b++;
+	}
+	return b;
+}
+
+/*
+ * the foo_mask is the smallest value 2^n-1 that is >= foo.
+ */
+static void calc_pg_masks(struct ceph_pg_pool_info *pi)
+{
+	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
+	pi->pgp_num_mask =
+		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
+	pi->lpg_num_mask =
+		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
+	pi->lpgp_num_mask =
+		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
+}
+
+/*
+ * decode crush map
+ */
+static int crush_decode_uniform_bucket(void **p, void *end,
+				       struct crush_bucket_uniform *b)
+{
+	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
+	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
+	b->item_weight = ceph_decode_32(p);
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_list_bucket(void **p, void *end,
+				    struct crush_bucket_list *b)
+{
+	int j;
+	dout("crush_decode_list_bucket %p to %p\n", *p, end);
+	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->item_weights == NULL)
+		return -ENOMEM;
+	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->sum_weights == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+	for (j = 0; j < b->h.size; j++) {
+		b->item_weights[j] = ceph_decode_32(p);
+		b->sum_weights[j] = ceph_decode_32(p);
+	}
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_tree_bucket(void **p, void *end,
+				    struct crush_bucket_tree *b)
+{
+	int j;
+	dout("crush_decode_tree_bucket %p to %p\n", *p, end);
+	ceph_decode_32_safe(p, end, b->num_nodes, bad);
+	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
+	if (b->node_weights == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
+	for (j = 0; j < b->num_nodes; j++)
+		b->node_weights[j] = ceph_decode_32(p);
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_straw_bucket(void **p, void *end,
+				     struct crush_bucket_straw *b)
+{
+	int j;
+	dout("crush_decode_straw_bucket %p to %p\n", *p, end);
+	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->item_weights == NULL)
+		return -ENOMEM;
+	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->straws == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+	for (j = 0; j < b->h.size; j++) {
+		b->item_weights[j] = ceph_decode_32(p);
+		b->straws[j] = ceph_decode_32(p);
+	}
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static struct crush_map *crush_decode(void *pbyval, void *end)
+{
+	struct crush_map *c;
+	int err = -EINVAL;
+	int i, j;
+	void **p = &pbyval;
+	void *start = pbyval;
+	u32 magic;
+
+	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
+
+	c = kzalloc(sizeof(*c), GFP_NOFS);
+	if (c == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ceph_decode_need(p, end, 4*sizeof(u32), bad);
+	magic = ceph_decode_32(p);
+	if (magic != CRUSH_MAGIC) {
+		pr_err("crush_decode magic %x != current %x\n",
+		       (unsigned)magic, (unsigned)CRUSH_MAGIC);
+		goto bad;
+	}
+	c->max_buckets = ceph_decode_32(p);
+	c->max_rules = ceph_decode_32(p);
+	c->max_devices = ceph_decode_32(p);
+
+	c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
+	if (c->device_parents == NULL)
+		goto badmem;
+	c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
+	if (c->bucket_parents == NULL)
+		goto badmem;
+
+	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
+	if (c->buckets == NULL)
+		goto badmem;
+	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
+	if (c->rules == NULL)
+		goto badmem;
+
+	/* buckets */
+	for (i = 0; i < c->max_buckets; i++) {
+		int size = 0;
+		u32 alg;
+		struct crush_bucket *b;
+
+		ceph_decode_32_safe(p, end, alg, bad);
+		if (alg == 0) {
+			c->buckets[i] = NULL;
+			continue;
+		}
+		dout("crush_decode bucket %d off %x %p to %p\n",
+		     i, (int)(*p-start), *p, end);
+
+		switch (alg) {
+		case CRUSH_BUCKET_UNIFORM:
+			size = sizeof(struct crush_bucket_uniform);
+			break;
+		case CRUSH_BUCKET_LIST:
+			size = sizeof(struct crush_bucket_list);
+			break;
+		case CRUSH_BUCKET_TREE:
+			size = sizeof(struct crush_bucket_tree);
+			break;
+		case CRUSH_BUCKET_STRAW:
+			size = sizeof(struct crush_bucket_straw);
+			break;
+		default:
+			err = -EINVAL;
+			goto bad;
+		}
+		BUG_ON(size == 0);
+		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
+		if (b == NULL)
+			goto badmem;
+
+		ceph_decode_need(p, end, 4*sizeof(u32), bad);
+		b->id = ceph_decode_32(p);
+		b->type = ceph_decode_16(p);
+		b->alg = ceph_decode_8(p);
+		b->hash = ceph_decode_8(p);
+		b->weight = ceph_decode_32(p);
+		b->size = ceph_decode_32(p);
+
+		dout("crush_decode bucket size %d off %x %p to %p\n",
+		     b->size, (int)(*p-start), *p, end);
+
+		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
+		if (b->items == NULL)
+			goto badmem;
+		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
+		if (b->perm == NULL)
+			goto badmem;
+		b->perm_n = 0;
+
+		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
+		for (j = 0; j < b->size; j++)
+			b->items[j] = ceph_decode_32(p);
+
+		switch (b->alg) {
+		case CRUSH_BUCKET_UNIFORM:
+			err = crush_decode_uniform_bucket(p, end,
+				  (struct crush_bucket_uniform *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_LIST:
+			err = crush_decode_list_bucket(p, end,
+			       (struct crush_bucket_list *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_TREE:
+			err = crush_decode_tree_bucket(p, end,
+				(struct crush_bucket_tree *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_STRAW:
+			err = crush_decode_straw_bucket(p, end,
+				(struct crush_bucket_straw *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		}
+	}
+
+	/* rules */
+	dout("rule vec is %p\n", c->rules);
+	for (i = 0; i < c->max_rules; i++) {
+		u32 yes;
+		struct crush_rule *r;
+
+		ceph_decode_32_safe(p, end, yes, bad);
+		if (!yes) {
+			dout("crush_decode NO rule %d off %x %p to %p\n",
+			     i, (int)(*p-start), *p, end);
+			c->rules[i] = NULL;
+			continue;
+		}
+
+		dout("crush_decode rule %d off %x %p to %p\n",
+		     i, (int)(*p-start), *p, end);
+
+		/* len */
+		ceph_decode_32_safe(p, end, yes, bad);
+#if BITS_PER_LONG == 32
+		err = -EINVAL;
+		if (yes > ULONG_MAX / sizeof(struct crush_rule_step))
+			goto bad;
+#endif
+		r = c->rules[i] = kmalloc(sizeof(*r) +
+					  yes*sizeof(struct crush_rule_step),
+					  GFP_NOFS);
+		if (r == NULL)
+			goto badmem;
+		dout(" rule %d is at %p\n", i, r);
+		r->len = yes;
+		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
+		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
+		for (j = 0; j < r->len; j++) {
+			r->steps[j].op = ceph_decode_32(p);
+			r->steps[j].arg1 = ceph_decode_32(p);
+			r->steps[j].arg2 = ceph_decode_32(p);
+		}
+	}
+
+	/* ignore trailing name maps. */
+
+	dout("crush_decode success\n");
+	return c;
+
+badmem:
+	err = -ENOMEM;
+bad:
+	dout("crush_decode fail %d\n", err);
+	crush_destroy(c);
+	return ERR_PTR(err);
+}
+
+/*
+ * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
+ * to a set of osds)
+ */
+static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
+{
+	u64 a = *(u64 *)&l;
+	u64 b = *(u64 *)&r;
+
+	if (a < b)
+		return -1;
+	if (a > b)
+		return 1;
+	return 0;
+}
+
+static int __insert_pg_mapping(struct ceph_pg_mapping *new,
+			       struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_pg_mapping *pg = NULL;
+	int c;
+
+	while (*p) {
+		parent = *p;
+		pg = rb_entry(parent, struct ceph_pg_mapping, node);
+		c = pgid_cmp(new->pgid, pg->pgid);
+		if (c < 0)
+			p = &(*p)->rb_left;
+		else if (c > 0)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+	return 0;
+}
+
+static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
+						   struct ceph_pg pgid)
+{
+	struct rb_node *n = root->rb_node;
+	struct ceph_pg_mapping *pg;
+	int c;
+
+	while (n) {
+		pg = rb_entry(n, struct ceph_pg_mapping, node);
+		c = pgid_cmp(pgid, pg->pgid);
+		if (c < 0)
+			n = n->rb_left;
+		else if (c > 0)
+			n = n->rb_right;
+		else
+			return pg;
+	}
+	return NULL;
+}
+
+/*
+ * rbtree of pg pool info
+ */
+static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_pg_pool_info *pi = NULL;
+
+	while (*p) {
+		parent = *p;
+		pi = rb_entry(parent, struct ceph_pg_pool_info, node);
+		if (new->id < pi->id)
+			p = &(*p)->rb_left;
+		else if (new->id > pi->id)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+	return 0;
+}
+
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
+{
+	struct ceph_pg_pool_info *pi;
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		pi = rb_entry(n, struct ceph_pg_pool_info, node);
+		if (id < pi->id)
+			n = n->rb_left;
+		else if (id > pi->id)
+			n = n->rb_right;
+		else
+			return pi;
+	}
+	return NULL;
+}
+
+int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
+{
+	struct rb_node *rbp;
+
+	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rbp, struct ceph_pg_pool_info, node);
+		if (pi->name && strcmp(pi->name, name) == 0)
+			return pi->id;
+	}
+	return -ENOENT;
+}
+EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+	rb_erase(&pi->node, root);
+	kfree(pi->name);
+	kfree(pi);
+}
+
+static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
+{
+	unsigned n, m;
+
+	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
+	calc_pg_masks(pi);
+
+	/* num_snaps * snap_info_t */
+	n = le32_to_cpu(pi->v.num_snaps);
+	while (n--) {
+		ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
+				 sizeof(struct ceph_timespec), bad);
+		*p += sizeof(u64) +       /* key */
+			1 + sizeof(u64) + /* u8, snapid */
+			sizeof(struct ceph_timespec);
+		m = ceph_decode_32(p);    /* snap name */
+		*p += m;
+	}
+
+	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+	struct ceph_pg_pool_info *pi;
+	u32 num, len, pool;
+
+	ceph_decode_32_safe(p, end, num, bad);
+	dout(" %d pool names\n", num);
+	while (num--) {
+		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_32_safe(p, end, len, bad);
+		dout("  pool %d len %d\n", pool, len);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi) {
+			kfree(pi->name);
+			pi->name = kmalloc(len + 1, GFP_NOFS);
+			if (pi->name) {
+				memcpy(pi->name, *p, len);
+				pi->name[len] = '\0';
+				dout("  name is %s\n", pi->name);
+			}
+		}
+		*p += len;
+	}
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+	dout("osdmap_destroy %p\n", map);
+	if (map->crush)
+		crush_destroy(map->crush);
+	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_temp),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_temp);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rb_first(&map->pg_pools),
+				 struct ceph_pg_pool_info, node);
+		__remove_pg_pool(&map->pg_pools, pi);
+	}
+	kfree(map->osd_state);
+	kfree(map->osd_weight);
+	kfree(map->osd_addr);
+	kfree(map);
+}
+
+/*
+ * adjust max osd value.  reallocate arrays.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+	u8 *state;
+	struct ceph_entity_addr *addr;
+	u32 *weight;
+
+	state = kcalloc(max, sizeof(*state), GFP_NOFS);
+	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
+	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
+	if (state == NULL || addr == NULL || weight == NULL) {
+		kfree(state);
+		kfree(addr);
+		kfree(weight);
+		return -ENOMEM;
+	}
+
+	/* copy old? */
+	if (map->osd_state) {
+		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
+		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
+		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
+		kfree(map->osd_state);
+		kfree(map->osd_addr);
+		kfree(map->osd_weight);
+	}
+
+	map->osd_state = state;
+	map->osd_weight = weight;
+	map->osd_addr = addr;
+	map->max_osd = max;
+	return 0;
+}
+
+/*
+ * decode a full map.
+ */
+struct ceph_osdmap *osdmap_decode(void **p, void *end)
+{
+	struct ceph_osdmap *map;
+	u16 version;
+	u32 len, max, i;
+	u8 ev;
+	int err = -EINVAL;
+	void *start = *p;
+	struct ceph_pg_pool_info *pi;
+
+	dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p));
+
+	map = kzalloc(sizeof(*map), GFP_NOFS);
+	if (map == NULL)
+		return ERR_PTR(-ENOMEM);
+	map->pg_temp = RB_ROOT;
+
+	ceph_decode_16_safe(p, end, version, bad);
+	if (version > CEPH_OSDMAP_VERSION) {
+		pr_warning("got unknown v %d > %d of osdmap\n", version,
+			   CEPH_OSDMAP_VERSION);
+		goto bad;
+	}
+
+	ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad);
+	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
+	map->epoch = ceph_decode_32(p);
+	ceph_decode_copy(p, &map->created, sizeof(map->created));
+	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
+
+	ceph_decode_32_safe(p, end, max, bad);
+	while (max--) {
+		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
+		pi = kzalloc(sizeof(*pi), GFP_NOFS);
+		if (!pi)
+			goto bad;
+		pi->id = ceph_decode_32(p);
+		ev = ceph_decode_8(p); /* encoding version */
+		if (ev > CEPH_PG_POOL_VERSION) {
+			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
+				   ev, CEPH_PG_POOL_VERSION);
+			kfree(pi);
+			goto bad;
+		}
+		err = __decode_pool(p, end, pi);
+		if (err < 0)
+			goto bad;
+		__insert_pg_pool(&map->pg_pools, pi);
+	}
+
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
+
+	ceph_decode_32_safe(p, end, map->pool_max, bad);
+
+	ceph_decode_32_safe(p, end, map->flags, bad);
+
+	max = ceph_decode_32(p);
+
+	/* (re)alloc osd arrays */
+	err = osdmap_set_max_osd(map, max);
+	if (err < 0)
+		goto bad;
+	dout("osdmap_decode max_osd = %d\n", map->max_osd);
+
+	/* osds */
+	err = -EINVAL;
+	ceph_decode_need(p, end, 3*sizeof(u32) +
+			 map->max_osd*(1 + sizeof(*map->osd_weight) +
+				       sizeof(*map->osd_addr)), bad);
+	*p += 4; /* skip length field (should match max) */
+	ceph_decode_copy(p, map->osd_state, map->max_osd);
+
+	*p += 4; /* skip length field (should match max) */
+	for (i = 0; i < map->max_osd; i++)
+		map->osd_weight[i] = ceph_decode_32(p);
+
+	*p += 4; /* skip length field (should match max) */
+	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
+	for (i = 0; i < map->max_osd; i++)
+		ceph_decode_addr(&map->osd_addr[i]);
+
+	/* pg_temp */
+	ceph_decode_32_safe(p, end, len, bad);
+	for (i = 0; i < len; i++) {
+		int n, j;
+		struct ceph_pg pgid;
+		struct ceph_pg_mapping *pg;
+
+		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
+		ceph_decode_copy(p, &pgid, sizeof(pgid));
+		n = ceph_decode_32(p);
+		ceph_decode_need(p, end, n * sizeof(u32), bad);
+		err = -ENOMEM;
+		pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
+		if (!pg)
+			goto bad;
+		pg->pgid = pgid;
+		pg->len = n;
+		for (j = 0; j < n; j++)
+			pg->osds[j] = ceph_decode_32(p);
+
+		err = __insert_pg_mapping(pg, &map->pg_temp);
+		if (err)
+			goto bad;
+		dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
+	}
+
+	/* crush */
+	ceph_decode_32_safe(p, end, len, bad);
+	dout("osdmap_decode crush len %d from off 0x%x\n", len,
+	     (int)(*p - start));
+	ceph_decode_need(p, end, len, bad);
+	map->crush = crush_decode(*p, end);
+	*p += len;
+	if (IS_ERR(map->crush)) {
+		err = PTR_ERR(map->crush);
+		map->crush = NULL;
+		goto bad;
+	}
+
+	/* ignore the rest of the map */
+	*p = end;
+
+	dout("osdmap_decode done %p %p\n", *p, end);
+	return map;
+
+bad:
+	dout("osdmap_decode fail\n");
+	ceph_osdmap_destroy(map);
+	return ERR_PTR(err);
+}
+
+/*
+ * decode and apply an incremental map update.
+ */
+struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+					     struct ceph_osdmap *map,
+					     struct ceph_messenger *msgr)
+{
+	struct crush_map *newcrush = NULL;
+	struct ceph_fsid fsid;
+	u32 epoch = 0;
+	struct ceph_timespec modified;
+	u32 len, pool;
+	__s32 new_pool_max, new_flags, max;
+	void *start = *p;
+	int err = -EINVAL;
+	u16 version;
+	struct rb_node *rbp;
+
+	ceph_decode_16_safe(p, end, version, bad);
+	if (version > CEPH_OSDMAP_INC_VERSION) {
+		pr_warning("got unknown v %d > %d of inc osdmap\n", version,
+			   CEPH_OSDMAP_INC_VERSION);
+		goto bad;
+	}
+
+	ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32),
+			 bad);
+	ceph_decode_copy(p, &fsid, sizeof(fsid));
+	epoch = ceph_decode_32(p);
+	BUG_ON(epoch != map->epoch+1);
+	ceph_decode_copy(p, &modified, sizeof(modified));
+	new_pool_max = ceph_decode_32(p);
+	new_flags = ceph_decode_32(p);
+
+	/* full map? */
+	ceph_decode_32_safe(p, end, len, bad);
+	if (len > 0) {
+		dout("apply_incremental full map len %d, %p to %p\n",
+		     len, *p, end);
+		return osdmap_decode(p, min(*p+len, end));
+	}
+
+	/* new crush? */
+	ceph_decode_32_safe(p, end, len, bad);
+	if (len > 0) {
+		dout("apply_incremental new crush map len %d, %p to %p\n",
+		     len, *p, end);
+		newcrush = crush_decode(*p, min(*p+len, end));
+		if (IS_ERR(newcrush))
+			return ERR_CAST(newcrush);
+		*p += len;
+	}
+
+	/* new flags? */
+	if (new_flags >= 0)
+		map->flags = new_flags;
+	if (new_pool_max >= 0)
+		map->pool_max = new_pool_max;
+
+	ceph_decode_need(p, end, 5*sizeof(u32), bad);
+
+	/* new max? */
+	max = ceph_decode_32(p);
+	if (max >= 0) {
+		err = osdmap_set_max_osd(map, max);
+		if (err < 0)
+			goto bad;
+	}
+
+	map->epoch++;
+	map->modified = map->modified;
+	if (newcrush) {
+		if (map->crush)
+			crush_destroy(map->crush);
+		map->crush = newcrush;
+		newcrush = NULL;
+	}
+
+	/* new_pool */
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		__u8 ev;
+		struct ceph_pg_pool_info *pi;
+
+		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
+		ev = ceph_decode_8(p);  /* encoding version */
+		if (ev > CEPH_PG_POOL_VERSION) {
+			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
+				   ev, CEPH_PG_POOL_VERSION);
+			goto bad;
+		}
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (!pi) {
+			pi = kzalloc(sizeof(*pi), GFP_NOFS);
+			if (!pi) {
+				err = -ENOMEM;
+				goto bad;
+			}
+			pi->id = pool;
+			__insert_pg_pool(&map->pg_pools, pi);
+		}
+		err = __decode_pool(p, end, pi);
+		if (err < 0)
+			goto bad;
+	}
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
+
+	/* old_pool */
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		struct ceph_pg_pool_info *pi;
+
+		ceph_decode_32_safe(p, end, pool, bad);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi)
+			__remove_pg_pool(&map->pg_pools, pi);
+	}
+
+	/* new_up */
+	err = -EINVAL;
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		u32 osd;
+		struct ceph_entity_addr addr;
+		ceph_decode_32_safe(p, end, osd, bad);
+		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
+		ceph_decode_addr(&addr);
+		pr_info("osd%d up\n", osd);
+		BUG_ON(osd >= map->max_osd);
+		map->osd_state[osd] |= CEPH_OSD_UP;
+		map->osd_addr[osd] = addr;
+	}
+
+	/* new_down */
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		u32 osd;
+		ceph_decode_32_safe(p, end, osd, bad);
+		(*p)++;  /* clean flag */
+		pr_info("osd%d down\n", osd);
+		if (osd < map->max_osd)
+			map->osd_state[osd] &= ~CEPH_OSD_UP;
+	}
+
+	/* new_weight */
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		u32 osd, off;
+		ceph_decode_need(p, end, sizeof(u32)*2, bad);
+		osd = ceph_decode_32(p);
+		off = ceph_decode_32(p);
+		pr_info("osd%d weight 0x%x %s\n", osd, off,
+		     off == CEPH_OSD_IN ? "(in)" :
+		     (off == CEPH_OSD_OUT ? "(out)" : ""));
+		if (osd < map->max_osd)
+			map->osd_weight[osd] = off;
+	}
+
+	/* new_pg_temp */
+	rbp = rb_first(&map->pg_temp);
+	ceph_decode_32_safe(p, end, len, bad);
+	while (len--) {
+		struct ceph_pg_mapping *pg;
+		int j;
+		struct ceph_pg pgid;
+		u32 pglen;
+		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
+		ceph_decode_copy(p, &pgid, sizeof(pgid));
+		pglen = ceph_decode_32(p);
+
+		/* remove any? */
+		while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
+						node)->pgid, pgid) <= 0) {
+			struct ceph_pg_mapping *cur =
+				rb_entry(rbp, struct ceph_pg_mapping, node);
+
+			rbp = rb_next(rbp);
+			dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+			rb_erase(&cur->node, &map->pg_temp);
+			kfree(cur);
+		}
+
+		if (pglen) {
+			/* insert */
+			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
+			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
+			if (!pg) {
+				err = -ENOMEM;
+				goto bad;
+			}
+			pg->pgid = pgid;
+			pg->len = pglen;
+			for (j = 0; j < pglen; j++)
+				pg->osds[j] = ceph_decode_32(p);
+			err = __insert_pg_mapping(pg, &map->pg_temp);
+			if (err) {
+				kfree(pg);
+				goto bad;
+			}
+			dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
+			     pglen);
+		}
+	}
+	while (rbp) {
+		struct ceph_pg_mapping *cur =
+			rb_entry(rbp, struct ceph_pg_mapping, node);
+
+		rbp = rb_next(rbp);
+		dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+		rb_erase(&cur->node, &map->pg_temp);
+		kfree(cur);
+	}
+
+	/* ignore the rest */
+	*p = end;
+	return map;
+
+bad:
+	pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n",
+	       epoch, (int)(*p - start), *p, start, end);
+	print_hex_dump(KERN_DEBUG, "osdmap: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       start, end - start, true);
+	if (newcrush)
+		crush_destroy(newcrush);
+	return ERR_PTR(err);
+}
+
+
+
+
+/*
+ * calculate file layout from given offset, length.
+ * fill in correct oid, logical length, and object extent
+ * offset, length.
+ *
+ * for now, we write only a single su, until we can
+ * pass a stride back to the caller.
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+				   u64 off, u64 *plen,
+				   u64 *ono,
+				   u64 *oxoff, u64 *oxlen)
+{
+	u32 osize = le32_to_cpu(layout->fl_object_size);
+	u32 su = le32_to_cpu(layout->fl_stripe_unit);
+	u32 sc = le32_to_cpu(layout->fl_stripe_count);
+	u32 bl, stripeno, stripepos, objsetno;
+	u32 su_per_object;
+	u64 t, su_offset;
+
+	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+	     osize, su);
+	su_per_object = osize / su;
+	dout("osize %u / su %u = su_per_object %u\n", osize, su,
+	     su_per_object);
+
+	BUG_ON((su & ~PAGE_MASK) != 0);
+	/* bl = *off / su; */
+	t = off;
+	do_div(t, su);
+	bl = t;
+	dout("off %llu / su %u = bl %u\n", off, su, bl);
+
+	stripeno = bl / sc;
+	stripepos = bl % sc;
+	objsetno = stripeno / su_per_object;
+
+	*ono = objsetno * sc + stripepos;
+	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned)*ono);
+
+	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
+	t = off;
+	su_offset = do_div(t, su);
+	*oxoff = su_offset + (stripeno % su_per_object) * su;
+
+	/*
+	 * Calculate the length of the extent being written to the selected
+	 * object. This is the minimum of the full length requested (plen) or
+	 * the remainder of the current stripe being written to.
+	 */
+	*oxlen = min_t(u64, *plen, su - su_offset);
+	*plen = *oxlen;
+
+	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * calculate an object layout (i.e. pgid) from an oid,
+ * file_layout, and osdmap
+ */
+int ceph_calc_object_layout(struct ceph_object_layout *ol,
+			    const char *oid,
+			    struct ceph_file_layout *fl,
+			    struct ceph_osdmap *osdmap)
+{
+	unsigned num, num_mask;
+	struct ceph_pg pgid;
+	s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
+	int poolid = le32_to_cpu(fl->fl_pg_pool);
+	struct ceph_pg_pool_info *pool;
+	unsigned ps;
+
+	BUG_ON(!osdmap);
+
+	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	if (!pool)
+		return -EIO;
+	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
+	if (preferred >= 0) {
+		ps += preferred;
+		num = le32_to_cpu(pool->v.lpg_num);
+		num_mask = pool->lpg_num_mask;
+	} else {
+		num = le32_to_cpu(pool->v.pg_num);
+		num_mask = pool->pg_num_mask;
+	}
+
+	pgid.ps = cpu_to_le16(ps);
+	pgid.preferred = cpu_to_le16(preferred);
+	pgid.pool = fl->fl_pg_pool;
+	if (preferred >= 0)
+		dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
+		     (int)preferred);
+	else
+		dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+
+	ol->ol_pgid = pgid;
+	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_calc_object_layout);
+
+/*
+ * Calculate raw osd vector for the given pgid.  Return pointer to osd
+ * array, or NULL on failure.
+ */
+static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			int *osds, int *num)
+{
+	struct ceph_pg_mapping *pg;
+	struct ceph_pg_pool_info *pool;
+	int ruleno;
+	unsigned poolid, ps, pps;
+	int preferred;
+
+	/* pg_temp? */
+	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+	if (pg) {
+		*num = pg->len;
+		return pg->osds;
+	}
+
+	/* crush */
+	poolid = le32_to_cpu(pgid.pool);
+	ps = le16_to_cpu(pgid.ps);
+	preferred = (s16)le16_to_cpu(pgid.preferred);
+
+	/* don't forcefeed bad device ids to crush */
+	if (preferred >= osdmap->max_osd ||
+	    preferred >= osdmap->crush->max_devices)
+		preferred = -1;
+
+	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	if (!pool)
+		return NULL;
+	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
+				 pool->v.type, pool->v.size);
+	if (ruleno < 0) {
+		pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
+		       poolid, pool->v.crush_ruleset, pool->v.type,
+		       pool->v.size);
+		return NULL;
+	}
+
+	if (preferred >= 0)
+		pps = ceph_stable_mod(ps,
+				      le32_to_cpu(pool->v.lpgp_num),
+				      pool->lpgp_num_mask);
+	else
+		pps = ceph_stable_mod(ps,
+				      le32_to_cpu(pool->v.pgp_num),
+				      pool->pgp_num_mask);
+	pps += poolid;
+	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
+			     min_t(int, pool->v.size, *num),
+			     preferred, osdmap->osd_weight);
+	return osds;
+}
+
+/*
+ * Return acting set for given pgid.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			int *acting)
+{
+	int rawosds[CEPH_PG_MAX_SIZE], *osds;
+	int i, o, num = CEPH_PG_MAX_SIZE;
+
+	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+	if (!osds)
+		return -1;
+
+	/* primary is first up osd */
+	o = 0;
+	for (i = 0; i < num; i++)
+		if (ceph_osd_is_up(osdmap, osds[i]))
+			acting[o++] = osds[i];
+	return o;
+}
+
+/*
+ * Return primary osd for given pgid, or -1 if none.
+ */
+int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
+{
+	int rawosds[CEPH_PG_MAX_SIZE], *osds;
+	int i, num = CEPH_PG_MAX_SIZE;
+
+	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+	if (!osds)
+		return -1;
+
+	/* primary is first up osd */
+	for (i = 0; i < num; i++)
+		if (ceph_osd_is_up(osdmap, osds[i]))
+			return osds[i];
+	return -1;
+}
+EXPORT_SYMBOL(ceph_calc_pg_primary);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
new file mode 100644
index 000000000000..3b146cfac182
--- /dev/null
+++ b/net/ceph/pagelist.c
@@ -0,0 +1,65 @@
+
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/ceph/pagelist.h>
+
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+	struct page *page = list_entry(pl->head.prev, struct page,
+				       lru);
+	kunmap(page);
+}
+
+int ceph_pagelist_release(struct ceph_pagelist *pl)
+{
+	if (pl->mapped_tail)
+		ceph_pagelist_unmap_tail(pl);
+
+	while (!list_empty(&pl->head)) {
+		struct page *page = list_first_entry(&pl->head, struct page,
+						     lru);
+		list_del(&page->lru);
+		__free_page(page);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_release);
+
+static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
+{
+	struct page *page = __page_cache_alloc(GFP_NOFS);
+	if (!page)
+		return -ENOMEM;
+	pl->room += PAGE_SIZE;
+	list_add_tail(&page->lru, &pl->head);
+	if (pl->mapped_tail)
+		ceph_pagelist_unmap_tail(pl);
+	pl->mapped_tail = kmap(page);
+	return 0;
+}
+
+int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
+{
+	while (pl->room < len) {
+		size_t bit = pl->room;
+		int ret;
+
+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+		       buf, bit);
+		pl->length += bit;
+		pl->room -= bit;
+		buf += bit;
+		len -= bit;
+		ret = ceph_pagelist_addpage(pl);
+		if (ret)
+			return ret;
+	}
+
+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+	pl->length += len;
+	pl->room -= len;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_append);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
new file mode 100644
index 000000000000..54caf0687155
--- /dev/null
+++ b/net/ceph/pagevec.c
@@ -0,0 +1,223 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+
+#include <linux/ceph/libceph.h>
+
+/*
+ * build a vector of user pages
+ */
+struct page **ceph_get_direct_page_vector(const char __user *data,
+						 int num_pages,
+						 loff_t off, size_t len)
+{
+	struct page **pages;
+	int rc;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+
+	down_read(&current->mm->mmap_sem);
+	rc = get_user_pages(current, current->mm, (unsigned long)data,
+			    num_pages, 0, 0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+	if (rc < 0)
+		goto fail;
+	return pages;
+
+fail:
+	kfree(pages);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ceph_get_direct_page_vector);
+
+void ceph_put_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+	kfree(pages);
+}
+EXPORT_SYMBOL(ceph_put_page_vector);
+
+void ceph_release_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		__free_pages(pages[i], 0);
+	kfree(pages);
+}
+EXPORT_SYMBOL(ceph_release_page_vector);
+
+/*
+ * allocate a vector new pages
+ */
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+{
+	struct page **pages;
+	int i;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, flags);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = __page_cache_alloc(flags);
+		if (pages[i] == NULL) {
+			ceph_release_page_vector(pages, i);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+	return pages;
+}
+EXPORT_SYMBOL(ceph_alloc_page_vector);
+
+/*
+ * copy user data into a page vector
+ */
+int ceph_copy_user_to_page_vector(struct page **pages,
+					 const char __user *data,
+					 loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, PAGE_CACHE_SIZE-po, left);
+		bad = copy_from_user(page_address(pages[i]) + po, data, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		po += l - bad;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
+
+int ceph_copy_to_page_vector(struct page **pages,
+				    const char *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t left = len;
+	size_t l;
+
+	while (left > 0) {
+		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		memcpy(page_address(pages[i]) + po, data, l);
+		data += l;
+		left -= l;
+		po += l;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_to_page_vector);
+
+int ceph_copy_from_page_vector(struct page **pages,
+				    char *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t left = len;
+	size_t l;
+
+	while (left > 0) {
+		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		memcpy(data, page_address(pages[i]) + po, l);
+		data += l;
+		left -= l;
+		po += l;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_from_page_vector);
+
+/*
+ * copy user data from a page vector into a user pointer
+ */
+int ceph_copy_page_vector_to_user(struct page **pages,
+					 char __user *data,
+					 loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, left, PAGE_CACHE_SIZE-po);
+		bad = copy_to_user(data, page_address(pages[i]) + po, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		if (po) {
+			po += l - bad;
+			if (po == PAGE_CACHE_SIZE)
+				po = 0;
+		}
+		i++;
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
+
+/*
+ * Zero an extent within a page vector.  Offset is relative to the
+ * start of the first page.
+ */
+void ceph_zero_page_vector_range(int off, int len, struct page **pages)
+{
+	int i = off >> PAGE_CACHE_SHIFT;
+
+	off &= ~PAGE_CACHE_MASK;
+
+	dout("zero_page_vector_page %u~%u\n", off, len);
+
+	/* leading partial page? */
+	if (off) {
+		int end = min((int)PAGE_CACHE_SIZE, off + len);
+		dout("zeroing %d %p head from %d\n", i, pages[i],
+		     (int)off);
+		zero_user_segment(pages[i], off, end);
+		len -= (end - off);
+		i++;
+	}
+	while (len >= PAGE_CACHE_SIZE) {
+		dout("zeroing %d %p len=%d\n", i, pages[i], len);
+		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+		len -= PAGE_CACHE_SIZE;
+		i++;
+	}
+	/* trailing partial page? */
+	if (len) {
+		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
+		zero_user_segment(pages[i], 0, len);
+	}
+}
+EXPORT_SYMBOL(ceph_zero_page_vector_range);
+
-- 
cgit v1.2.3


From ac0b74d8a1ced8ea86147467daf06b15b130dd94 Mon Sep 17 00:00:00 2001
From: Greg Farnum <gregf@hq.newdream.net>
Date: Fri, 17 Sep 2010 10:10:55 -0700
Subject: ceph: add pagelist_reserve, pagelist_truncate, pagelist_set_cursor

These facilitate preallocation of pages so that we can encode into the pagelist
in an atomic context.

Signed-off-by: Greg Farnum <gregf@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/ceph/pagelist.h |  21 +++++++++
 net/ceph/pagelist.c           | 106 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 118 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index cc9327aa1c98..9660d6b0a35d 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -8,6 +8,14 @@ struct ceph_pagelist {
 	void *mapped_tail;
 	size_t length;
 	size_t room;
+	struct list_head free_list;
+	size_t num_pages_free;
+};
+
+struct ceph_pagelist_cursor {
+	struct ceph_pagelist *pl;   /* pagelist, for error checking */
+	struct list_head *page_lru; /* page in list */
+	size_t room;		    /* room remaining to reset to */
 };
 
 static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
@@ -16,11 +24,24 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->mapped_tail = NULL;
 	pl->length = 0;
 	pl->room = 0;
+	INIT_LIST_HEAD(&pl->free_list);
+	pl->num_pages_free = 0;
 }
+
 extern int ceph_pagelist_release(struct ceph_pagelist *pl);
 
 extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
 
+extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
+
+extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
+
+extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+				     struct ceph_pagelist_cursor *c);
+
+extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+				  struct ceph_pagelist_cursor *c);
+
 static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
 {
 	__le64 ev = cpu_to_le64(v);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 3b146cfac182..b8cbc456d0bb 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -7,35 +7,42 @@
 
 static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 {
-	struct page *page = list_entry(pl->head.prev, struct page,
-				       lru);
-	kunmap(page);
+	if (pl->mapped_tail) {
+		struct page *page = list_entry(pl->head.prev, struct page, lru);
+		kunmap(page);
+		pl->mapped_tail = NULL;
+	}
 }
 
 int ceph_pagelist_release(struct ceph_pagelist *pl)
 {
-	if (pl->mapped_tail)
-		ceph_pagelist_unmap_tail(pl);
-
+	ceph_pagelist_unmap_tail(pl);
 	while (!list_empty(&pl->head)) {
 		struct page *page = list_first_entry(&pl->head, struct page,
 						     lru);
 		list_del(&page->lru);
 		__free_page(page);
 	}
+	ceph_pagelist_free_reserve(pl);
 	return 0;
 }
 EXPORT_SYMBOL(ceph_pagelist_release);
 
 static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 {
-	struct page *page = __page_cache_alloc(GFP_NOFS);
+	struct page *page;
+
+	if (!pl->num_pages_free) {
+		page = __page_cache_alloc(GFP_NOFS);
+	} else {
+		page = list_first_entry(&pl->free_list, struct page, lru);
+		list_del(&page->lru);
+	}
 	if (!page)
 		return -ENOMEM;
 	pl->room += PAGE_SIZE;
+	ceph_pagelist_unmap_tail(pl);
 	list_add_tail(&page->lru, &pl->head);
-	if (pl->mapped_tail)
-		ceph_pagelist_unmap_tail(pl);
 	pl->mapped_tail = kmap(page);
 	return 0;
 }
@@ -63,3 +70,84 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
 	return 0;
 }
 EXPORT_SYMBOL(ceph_pagelist_append);
+
+/**
+ * Allocate enough pages for a pagelist to append the given amount
+ * of data without without allocating.
+ * Returns: 0 on success, -ENOMEM on error.
+ */
+int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
+{
+	if (space <= pl->room)
+		return 0;
+	space -= pl->room;
+	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
+
+	while (space > pl->num_pages_free) {
+		struct page *page = __page_cache_alloc(GFP_NOFS);
+		if (!page)
+			return -ENOMEM;
+		list_add_tail(&page->lru, &pl->free_list);
+		++pl->num_pages_free;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_reserve);
+
+/**
+ * Free any pages that have been preallocated.
+ */
+int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
+{
+	while (!list_empty(&pl->free_list)) {
+		struct page *page = list_first_entry(&pl->free_list,
+						     struct page, lru);
+		list_del(&page->lru);
+		__free_page(page);
+		--pl->num_pages_free;
+	}
+	BUG_ON(pl->num_pages_free);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_free_reserve);
+
+/**
+ * Create a truncation point.
+ */
+void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+			      struct ceph_pagelist_cursor *c)
+{
+	c->pl = pl;
+	c->page_lru = pl->head.prev;
+	c->room = pl->room;
+}
+EXPORT_SYMBOL(ceph_pagelist_set_cursor);
+
+/**
+ * Truncate a pagelist to the given point. Move extra pages to reserve.
+ * This won't sleep.
+ * Returns: 0 on success,
+ *          -EINVAL if the pagelist doesn't match the trunc point pagelist
+ */
+int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+			   struct ceph_pagelist_cursor *c)
+{
+	struct page *page;
+
+	if (pl != c->pl)
+		return -EINVAL;
+	ceph_pagelist_unmap_tail(pl);
+	while (pl->head.prev != c->page_lru) {
+		page = list_entry(pl->head.prev, struct page, lru);
+		list_del(&page->lru);                /* remove from pagelist */
+		list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
+		++pl->num_pages_free;
+	}
+	pl->room = c->room;
+	if (!list_empty(&pl->head)) {
+		page = list_entry(pl->head.prev, struct page, lru);
+		pl->mapped_tail = kmap(page);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_truncate);
-- 
cgit v1.2.3


From 571dba52a34015a5a7aa5d480a86936878444a6f Mon Sep 17 00:00:00 2001
From: Greg Farnum <gregf@hq.newdream.net>
Date: Fri, 24 Sep 2010 14:56:40 -0700
Subject: ceph: add CEPH_MDS_OP_SETDIRLAYOUT and associated ioctl.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/ioctl.c              | 66 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ceph/ioctl.h              |  4 ++-
 include/linux/ceph/ceph_fs.h |  1 +
 3 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 899578b0c46b..8888c9ba68db 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -91,6 +91,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	return err;
 }
 
+/*
+ * Set a layout policy on a directory inode. All items in the tree
+ * rooted at this inode will inherit this layout on creation,
+ * (It doesn't apply retroactively )
+ * unless a subdirectory has its own layout policy.
+ */
+static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct ceph_mds_request *req;
+	struct ceph_ioctl_layout l;
+	int err, i;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+
+	/* copy and validate */
+	if (copy_from_user(&l, arg, sizeof(l)))
+		return -EFAULT;
+
+	if ((l.object_size & ~PAGE_MASK) ||
+	    (l.stripe_unit & ~PAGE_MASK) ||
+	    !l.stripe_unit ||
+	    (l.object_size &&
+	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
+		return -EINVAL;
+
+	/* make sure it's a valid data pool */
+	if (l.data_pool > 0) {
+		mutex_lock(&mdsc->mutex);
+		err = -EINVAL;
+		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
+				err = 0;
+				break;
+			}
+		mutex_unlock(&mdsc->mutex);
+		if (err)
+			return err;
+	}
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
+				       USE_AUTH_MDS);
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = igrab(inode);
+
+	req->r_args.setlayout.layout.fl_stripe_unit =
+			cpu_to_le32(l.stripe_unit);
+	req->r_args.setlayout.layout.fl_stripe_count =
+			cpu_to_le32(l.stripe_count);
+	req->r_args.setlayout.layout.fl_object_size =
+			cpu_to_le32(l.object_size);
+	req->r_args.setlayout.layout.fl_pg_pool =
+			cpu_to_le32(l.data_pool);
+	req->r_args.setlayout.layout.fl_pg_preferred =
+			cpu_to_le32(l.preferred_osd);
+
+	err = ceph_mdsc_do_request(mdsc, inode, req);
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
 /*
  * Return object name, size/offset information, and location (OSD
  * number, network address) for a given file offset.
@@ -177,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case CEPH_IOC_SET_LAYOUT:
 		return ceph_ioctl_set_layout(file, (void __user *)arg);
 
+	case CEPH_IOC_SET_LAYOUT_POLICY:
+		return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
+
 	case CEPH_IOC_GET_DATALOC:
 		return ceph_ioctl_get_dataloc(file, (void __user *)arg);
 
 	case CEPH_IOC_LAZYIO:
 		return ceph_ioctl_lazyio(file);
 	}
+
 	return -ENOTTY;
 }
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index 88451a3b6857..a6ce54e94eb5 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-#define CEPH_IOCTL_MAGIC 0x97
+#define CEPH_IOCTL_MAGIC 0x98
 
 /* just use u64 to align sanely on all archs */
 struct ceph_ioctl_layout {
@@ -17,6 +17,8 @@ struct ceph_ioctl_layout {
 				   struct ceph_ioctl_layout)
 #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\
 				   struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\
+				   struct ceph_ioctl_layout)
 
 /*
  * Extract identity, address of the OSD and object storing a given
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d5619ac86711..c3c74aef289d 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -299,6 +299,7 @@ enum {
 	CEPH_MDS_OP_SETATTR    = 0x01108,
 	CEPH_MDS_OP_SETFILELOCK= 0x01109,
 	CEPH_MDS_OP_GETFILELOCK= 0x00110,
+	CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
 
 	CEPH_MDS_OP_MKNOD      = 0x01201,
 	CEPH_MDS_OP_LINK       = 0x01202,
-- 
cgit v1.2.3


From b0ae19811375031ae3b3fecc65b702a9c6e5cc28 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 15 Oct 2010 04:21:18 +0900
Subject: security: remove unused parameter from security_task_setscheduler()

All security modules shouldn't change sched_param parameter of
security_task_setscheduler().  This is not only meaningless, but also
make a harmful result if caller pass a static variable.

This patch remove policy and sched_param parameter from
security_task_setscheduler() becuase none of security module is
using it.

Cc: James Morris <jmorris@namei.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/mips/kernel/mips-mt-fpaff.c |  2 +-
 include/linux/security.h         | 14 +++++---------
 kernel/cpuset.c                  |  4 ++--
 kernel/sched.c                   |  4 ++--
 security/commoncap.c             |  5 +----
 security/security.c              |  5 ++---
 security/selinux/hooks.c         |  4 ++--
 security/smack/smack_lsm.c       |  5 ++---
 8 files changed, 17 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 2340f11dc29c..9a526ba6f257 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;
 
-	retval = security_task_setscheduler(p, 0, NULL);
+	retval = security_task_setscheduler(p)
 	if (retval)
 		goto out_unlock;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index a22219afff09..294a0b228123 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot,
 extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5);
-extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp);
+extern int cap_task_setscheduler(struct task_struct *p);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
 extern int cap_syslog(int type, bool from_file);
@@ -1501,8 +1501,7 @@ struct security_operations {
 	int (*task_getioprio) (struct task_struct *p);
 	int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
 			struct rlimit *new_rlim);
-	int (*task_setscheduler) (struct task_struct *p, int policy,
-				  struct sched_param *lp);
+	int (*task_setscheduler) (struct task_struct *p);
 	int (*task_getscheduler) (struct task_struct *p);
 	int (*task_movememory) (struct task_struct *p);
 	int (*task_kill) (struct task_struct *p,
@@ -1752,8 +1751,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio);
 int security_task_getioprio(struct task_struct *p);
 int security_task_setrlimit(struct task_struct *p, unsigned int resource,
 		struct rlimit *new_rlim);
-int security_task_setscheduler(struct task_struct *p,
-				int policy, struct sched_param *lp);
+int security_task_setscheduler(struct task_struct *p);
 int security_task_getscheduler(struct task_struct *p);
 int security_task_movememory(struct task_struct *p);
 int security_task_kill(struct task_struct *p, struct siginfo *info,
@@ -2320,11 +2318,9 @@ static inline int security_task_setrlimit(struct task_struct *p,
 	return 0;
 }
 
-static inline int security_task_setscheduler(struct task_struct *p,
-					     int policy,
-					     struct sched_param *lp)
+static inline int security_task_setscheduler(struct task_struct *p)
 {
-	return cap_task_setscheduler(p, policy, lp);
+	return cap_task_setscheduler(p);
 }
 
 static inline int security_task_getscheduler(struct task_struct *p)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b23c0979bbe7..51b143e2a07a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1397,7 +1397,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	if (tsk->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 
-	ret = security_task_setscheduler(tsk, 0, NULL);
+	ret = security_task_setscheduler(tsk);
 	if (ret)
 		return ret;
 	if (threadgroup) {
@@ -1405,7 +1405,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 
 		rcu_read_lock();
 		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-			ret = security_task_setscheduler(c, 0, NULL);
+			ret = security_task_setscheduler(c);
 			if (ret) {
 				rcu_read_unlock();
 				return ret;
diff --git a/kernel/sched.c b/kernel/sched.c
index dc85ceb90832..df6579d9b4df 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4645,7 +4645,7 @@ recheck:
 	}
 
 	if (user) {
-		retval = security_task_setscheduler(p, policy, param);
+		retval = security_task_setscheduler(p);
 		if (retval)
 			return retval;
 	}
@@ -4887,7 +4887,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;
 
-	retval = security_task_setscheduler(p, 0, NULL);
+	retval = security_task_setscheduler(p);
 	if (retval)
 		goto out_unlock;
 
diff --git a/security/commoncap.c b/security/commoncap.c
index 9d172e6e330c..5e632b4857e4 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -719,14 +719,11 @@ static int cap_safe_nice(struct task_struct *p)
 /**
  * cap_task_setscheduler - Detemine if scheduler policy change is permitted
  * @p: The task to affect
- * @policy: The policy to effect
- * @lp: The parameters to the scheduling policy
  *
  * Detemine if the requested scheduler policy change is permitted for the
  * specified task, returning 0 if permission is granted, -ve if denied.
  */
-int cap_task_setscheduler(struct task_struct *p, int policy,
-			   struct sched_param *lp)
+int cap_task_setscheduler(struct task_struct *p)
 {
 	return cap_safe_nice(p);
 }
diff --git a/security/security.c b/security/security.c
index 43b6463ebbfb..1cbcdfa4b015 100644
--- a/security/security.c
+++ b/security/security.c
@@ -778,10 +778,9 @@ int security_task_setrlimit(struct task_struct *p, unsigned int resource,
 	return security_ops->task_setrlimit(p, resource, new_rlim);
 }
 
-int security_task_setscheduler(struct task_struct *p,
-				int policy, struct sched_param *lp)
+int security_task_setscheduler(struct task_struct *p)
 {
-	return security_ops->task_setscheduler(p, policy, lp);
+	return security_ops->task_setscheduler(p);
 }
 
 int security_task_getscheduler(struct task_struct *p)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4796ddd4e721..db2b331de89a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3354,11 +3354,11 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
 	return 0;
 }
 
-static int selinux_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp)
+static int selinux_task_setscheduler(struct task_struct *p)
 {
 	int rc;
 
-	rc = cap_task_setscheduler(p, policy, lp);
+	rc = cap_task_setscheduler(p);
 	if (rc)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c448d57ae2b7..174aec44bfac 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1281,12 +1281,11 @@ static int smack_task_getioprio(struct task_struct *p)
  *
  * Return 0 if read access is permitted
  */
-static int smack_task_setscheduler(struct task_struct *p, int policy,
-				   struct sched_param *lp)
+static int smack_task_setscheduler(struct task_struct *p)
 {
 	int rc;
 
-	rc = cap_task_setscheduler(p, policy, lp);
+	rc = cap_task_setscheduler(p);
 	if (rc == 0)
 		rc = smk_curacc_on_task(p, MAY_WRITE);
 	return rc;
-- 
cgit v1.2.3


From 2606fd1fa5710205b23ee859563502aa18362447 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 Oct 2010 16:24:41 -0400
Subject: secmark: make secmark object handling generic

Right now secmark has lots of direct selinux calls.  Use all LSM calls and
remove all SELinux specific knowledge.  The only SELinux specific knowledge
we leave is the mode.  The only point is to make sure that other LSMs at
least test this generic code before they assume it works.  (They may also
have to make changes if they do not represent labels as strings)

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Paul Moore <paul.moore@hp.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/netfilter/xt_SECMARK.h | 12 ++-----
 include/linux/security.h             | 25 ++++++++++++++
 include/linux/selinux.h              | 63 ------------------------------------
 net/netfilter/xt_CT.c                |  1 -
 net/netfilter/xt_SECMARK.c           | 35 ++++++++++----------
 security/capability.c                | 17 +++++++++-
 security/security.c                  | 18 +++++++++++
 security/selinux/exports.c           | 49 ----------------------------
 security/selinux/hooks.c             | 24 ++++++++++++++
 security/selinux/include/security.h  |  1 +
 10 files changed, 104 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h
index 6fcd3448b186..989092bd6274 100644
--- a/include/linux/netfilter/xt_SECMARK.h
+++ b/include/linux/netfilter/xt_SECMARK.h
@@ -11,18 +11,12 @@
  * packets are being marked for.
  */
 #define SECMARK_MODE_SEL	0x01		/* SELinux */
-#define SECMARK_SELCTX_MAX	256
-
-struct xt_secmark_target_selinux_info {
-	__u32 selsid;
-	char selctx[SECMARK_SELCTX_MAX];
-};
+#define SECMARK_SECCTX_MAX	256
 
 struct xt_secmark_target_info {
 	__u8 mode;
-	union {
-		struct xt_secmark_target_selinux_info sel;
-	} u;
+	__u32 secid;
+	char secctx[SECMARK_SECCTX_MAX];
 };
 
 #endif /*_XT_SECMARK_H_target */
diff --git a/include/linux/security.h b/include/linux/security.h
index 294a0b228123..d70adc394f62 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Sets the new child socket's sid to the openreq sid.
  * @inet_conn_established:
  *	Sets the connection's peersid to the secmark on skb.
+ * @secmark_relabel_packet:
+ *	check if the process should be allowed to relabel packets to the given secid
+ * @security_secmark_refcount_inc
+ *	tells the LSM to increment the number of secmark labeling rules loaded
+ * @security_secmark_refcount_dec
+ *	tells the LSM to decrement the number of secmark labeling rules loaded
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
  * @tun_dev_create:
@@ -1593,6 +1599,9 @@ struct security_operations {
 				  struct request_sock *req);
 	void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
 	void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
+	int (*secmark_relabel_packet) (u32 secid);
+	void (*secmark_refcount_inc) (void);
+	void (*secmark_refcount_dec) (void);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
 	int (*tun_dev_create)(void);
 	void (*tun_dev_post_create)(struct sock *sk);
@@ -2547,6 +2556,9 @@ void security_inet_csk_clone(struct sock *newsk,
 			const struct request_sock *req);
 void security_inet_conn_established(struct sock *sk,
 			struct sk_buff *skb);
+int security_secmark_relabel_packet(u32 secid);
+void security_secmark_refcount_inc(void);
+void security_secmark_refcount_dec(void);
 int security_tun_dev_create(void);
 void security_tun_dev_post_create(struct sock *sk);
 int security_tun_dev_attach(struct sock *sk);
@@ -2701,6 +2713,19 @@ static inline void security_inet_conn_established(struct sock *sk,
 {
 }
 
+static inline int security_secmark_relabel_packet(u32 secid)
+{
+	return 0;
+}
+
+static inline void security_secmark_refcount_inc(void)
+{
+}
+
+static inline void security_secmark_refcount_dec(void)
+{
+}
+
 static inline int security_tun_dev_create(void)
 {
 	return 0;
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 82e0f26a1299..44f459612690 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -20,75 +20,12 @@ struct kern_ipc_perm;
 
 #ifdef CONFIG_SECURITY_SELINUX
 
-/**
- *     selinux_string_to_sid - map a security context string to a security ID
- *     @str: the security context string to be mapped
- *     @sid: ID value returned via this.
- *
- *     Returns 0 if successful, with the SID stored in sid.  A value
- *     of zero for sid indicates no SID could be determined (but no error
- *     occurred).
- */
-int selinux_string_to_sid(char *str, u32 *sid);
-
-/**
- *     selinux_secmark_relabel_packet_permission - secmark permission check
- *     @sid: SECMARK ID value to be applied to network packet
- *
- *     Returns 0 if the current task is allowed to set the SECMARK label of
- *     packets with the supplied security ID.  Note that it is implicit that
- *     the packet is always being relabeled from the default unlabeled value,
- *     and that the access control decision is made in the AVC.
- */
-int selinux_secmark_relabel_packet_permission(u32 sid);
-
-/**
- *     selinux_secmark_refcount_inc - increments the secmark use counter
- *
- *     SELinux keeps track of the current SECMARK targets in use so it knows
- *     when to apply SECMARK label access checks to network packets.  This
- *     function incements this reference count to indicate that a new SECMARK
- *     target has been configured.
- */
-void selinux_secmark_refcount_inc(void);
-
-/**
- *     selinux_secmark_refcount_dec - decrements the secmark use counter
- *
- *     SELinux keeps track of the current SECMARK targets in use so it knows
- *     when to apply SECMARK label access checks to network packets.  This
- *     function decements this reference count to indicate that one of the
- *     existing SECMARK targets has been removed/flushed.
- */
-void selinux_secmark_refcount_dec(void);
-
 /**
  * selinux_is_enabled - is SELinux enabled?
  */
 bool selinux_is_enabled(void);
 #else
 
-static inline int selinux_string_to_sid(const char *str, u32 *sid)
-{
-       *sid = 0;
-       return 0;
-}
-
-static inline int selinux_secmark_relabel_packet_permission(u32 sid)
-{
-	return 0;
-}
-
-static inline void selinux_secmark_refcount_inc(void)
-{
-	return;
-}
-
-static inline void selinux_secmark_refcount_dec(void)
-{
-	return;
-}
-
 static inline bool selinux_is_enabled(void)
 {
 	return false;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0cb6053f02fd..782e51986a6f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
-#include <linux/selinux.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 364ad1600129..9faf5e050b79 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -14,8 +14,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/security.h>
 #include <linux/skbuff.h>
-#include <linux/selinux.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_SECMARK.h>
 
@@ -39,9 +39,8 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	switch (mode) {
 	case SECMARK_MODE_SEL:
-		secmark = info->u.sel.selsid;
+		secmark = info->secid;
 		break;
-
 	default:
 		BUG();
 	}
@@ -50,33 +49,33 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static int checkentry_selinux(struct xt_secmark_target_info *info)
+static int checkentry_lsm(struct xt_secmark_target_info *info)
 {
 	int err;
-	struct xt_secmark_target_selinux_info *sel = &info->u.sel;
 
-	sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0';
+	info->secctx[SECMARK_SECCTX_MAX - 1] = '\0';
+	info->secid = 0;
 
-	err = selinux_string_to_sid(sel->selctx, &sel->selsid);
+	err = security_secctx_to_secid(info->secctx, strlen(info->secctx),
+				       &info->secid);
 	if (err) {
 		if (err == -EINVAL)
-			pr_info("invalid SELinux context \'%s\'\n",
-				sel->selctx);
+			pr_info("invalid security context \'%s\'\n", info->secctx);
 		return err;
 	}
 
-	if (!sel->selsid) {
-		pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
+	if (!info->secid) {
+		pr_info("unable to map security context \'%s\'\n", info->secctx);
 		return -ENOENT;
 	}
 
-	err = selinux_secmark_relabel_packet_permission(sel->selsid);
+	err = security_secmark_relabel_packet(info->secid);
 	if (err) {
 		pr_info("unable to obtain relabeling permission\n");
 		return err;
 	}
 
-	selinux_secmark_refcount_inc();
+	security_secmark_refcount_inc();
 	return 0;
 }
 
@@ -100,16 +99,16 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
-		err = checkentry_selinux(info);
-		if (err)
-			return err;
 		break;
-
 	default:
 		pr_info("invalid mode: %hu\n", info->mode);
 		return -EINVAL;
 	}
 
+	err = checkentry_lsm(info);
+	if (err)
+		return err;
+
 	if (!mode)
 		mode = info->mode;
 	return 0;
@@ -119,7 +118,7 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	switch (mode) {
 	case SECMARK_MODE_SEL:
-		selinux_secmark_refcount_dec();
+		security_secmark_refcount_dec();
 	}
 }
 
diff --git a/security/capability.c b/security/capability.c
index 95a6599a37bb..30ae00fbecd5 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -677,7 +677,18 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 {
 }
 
+static int cap_secmark_relabel_packet(u32 secid)
+{
+	return 0;
+}
 
+static void cap_secmark_refcount_inc(void)
+{
+}
+
+static void cap_secmark_refcount_dec(void)
+{
+}
 
 static void cap_req_classify_flow(const struct request_sock *req,
 				  struct flowi *fl)
@@ -777,7 +788,8 @@ static int cap_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 
 static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
 {
-	return -EOPNOTSUPP;
+	*secid = 0;
+	return 0;
 }
 
 static void cap_release_secctx(char *secdata, u32 seclen)
@@ -1018,6 +1030,9 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inet_conn_request);
 	set_to_cap_if_null(ops, inet_csk_clone);
 	set_to_cap_if_null(ops, inet_conn_established);
+	set_to_cap_if_null(ops, secmark_relabel_packet);
+	set_to_cap_if_null(ops, secmark_refcount_inc);
+	set_to_cap_if_null(ops, secmark_refcount_dec);
 	set_to_cap_if_null(ops, req_classify_flow);
 	set_to_cap_if_null(ops, tun_dev_create);
 	set_to_cap_if_null(ops, tun_dev_post_create);
diff --git a/security/security.c b/security/security.c
index 1cbcdfa4b015..b50f472061a4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1136,6 +1136,24 @@ void security_inet_conn_established(struct sock *sk,
 	security_ops->inet_conn_established(sk, skb);
 }
 
+int security_secmark_relabel_packet(u32 secid)
+{
+	return security_ops->secmark_relabel_packet(secid);
+}
+EXPORT_SYMBOL(security_secmark_relabel_packet);
+
+void security_secmark_refcount_inc(void)
+{
+	security_ops->secmark_refcount_inc();
+}
+EXPORT_SYMBOL(security_secmark_refcount_inc);
+
+void security_secmark_refcount_dec(void)
+{
+	security_ops->secmark_refcount_dec();
+}
+EXPORT_SYMBOL(security_secmark_refcount_dec);
+
 int security_tun_dev_create(void)
 {
 	return security_ops->tun_dev_create();
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index c0a454aee1e0..90664385dead 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -11,58 +11,9 @@
  * it under the terms of the GNU General Public License version 2,
  * as published by the Free Software Foundation.
  */
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/selinux.h>
-#include <linux/fs.h>
-#include <linux/ipc.h>
-#include <asm/atomic.h>
 
 #include "security.h"
-#include "objsec.h"
-
-/* SECMARK reference count */
-extern atomic_t selinux_secmark_refcount;
-
-int selinux_string_to_sid(char *str, u32 *sid)
-{
-	if (selinux_enabled)
-		return security_context_to_sid(str, strlen(str), sid);
-	else {
-		*sid = 0;
-		return 0;
-	}
-}
-EXPORT_SYMBOL_GPL(selinux_string_to_sid);
-
-int selinux_secmark_relabel_packet_permission(u32 sid)
-{
-	if (selinux_enabled) {
-		const struct task_security_struct *__tsec;
-		u32 tsid;
-
-		__tsec = current_security();
-		tsid = __tsec->sid;
-
-		return avc_has_perm(tsid, sid, SECCLASS_PACKET,
-				    PACKET__RELABELTO, NULL);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(selinux_secmark_relabel_packet_permission);
-
-void selinux_secmark_refcount_inc(void)
-{
-	atomic_inc(&selinux_secmark_refcount);
-}
-EXPORT_SYMBOL_GPL(selinux_secmark_refcount_inc);
-
-void selinux_secmark_refcount_dec(void)
-{
-	atomic_dec(&selinux_secmark_refcount);
-}
-EXPORT_SYMBOL_GPL(selinux_secmark_refcount_dec);
 
 bool selinux_is_enabled(void)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index db2b331de89a..d9154cf90ae1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4279,6 +4279,27 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
+static int selinux_secmark_relabel_packet(u32 sid)
+{
+	const struct task_security_struct *__tsec;
+	u32 tsid;
+
+	__tsec = current_security();
+	tsid = __tsec->sid;
+
+	return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL);
+}
+
+static void selinux_secmark_refcount_inc(void)
+{
+	atomic_inc(&selinux_secmark_refcount);
+}
+
+static void selinux_secmark_refcount_dec(void)
+{
+	atomic_dec(&selinux_secmark_refcount);
+}
+
 static void selinux_req_classify_flow(const struct request_sock *req,
 				      struct flowi *fl)
 {
@@ -5533,6 +5554,9 @@ static struct security_operations selinux_ops = {
 	.inet_conn_request =		selinux_inet_conn_request,
 	.inet_csk_clone =		selinux_inet_csk_clone,
 	.inet_conn_established =	selinux_inet_conn_established,
+	.secmark_relabel_packet =	selinux_secmark_relabel_packet,
+	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
+	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
 	.req_classify_flow =		selinux_req_classify_flow,
 	.tun_dev_create =		selinux_tun_dev_create,
 	.tun_dev_post_create = 		selinux_tun_dev_post_create,
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 4b66f19bb1f3..611a526afae7 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -9,6 +9,7 @@
 #define _SELINUX_SECURITY_H_
 
 #include <linux/magic.h>
+#include <linux/types.h>
 #include "flask.h"
 
 #define SECSID_NULL			0x00000000 /* unspecified SID */
-- 
cgit v1.2.3


From d5630b9d276bd389299ffea620b7c340ab19bcf5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 Oct 2010 16:24:48 -0400
Subject: security: secid_to_secctx returns len when data is NULL

With the (long ago) interface change to have the secid_to_secctx functions
do the string allocation instead of having the caller do the allocation we
lost the ability to query the security server for the length of the
upcoming string.  The SECMARK code would like to allocate a netlink skb
with enough length to hold the string but it is just too unclean to do the
string allocation twice or to do the allocation the first time and hold
onto the string and slen.  This patch adds the ability to call
security_secid_to_secctx() with a NULL data pointer and it will just set
the slen pointer.

Signed-off-by: Eric Paris <eparis@redhat.com>
Reviewed-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h       |  6 +++++-
 security/selinux/ss/services.c | 11 +++++++++--
 security/smack/smack_lsm.c     |  3 ++-
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index d70adc394f62..b8246a8df7d2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1285,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return 0 if permission is granted.
  *
  * @secid_to_secctx:
- *	Convert secid to security context.
+ *	Convert secid to security context.  If secdata is NULL the length of
+ *	the result will be returned in seclen, but no secdata will be returned.
+ *	This does mean that the length could change between calls to check the
+ *	length and the next call which actually allocates and returns the secdata.
  *	@secid contains the security ID.
  *	@secdata contains the pointer that stores the converted security context.
+ *	@seclen pointer which contains the length of the data
  * @secctx_to_secid:
  *	Convert security context to secid.
  *	@secid contains the pointer to the generated security ID.
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 494ff527c174..60964d79e5eb 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -991,7 +991,8 @@ static int context_struct_to_string(struct context *context, char **scontext, u3
 {
 	char *scontextp;
 
-	*scontext = NULL;
+	if (scontext)
+		*scontext = NULL;
 	*scontext_len = 0;
 
 	if (context->len) {
@@ -1008,6 +1009,9 @@ static int context_struct_to_string(struct context *context, char **scontext, u3
 	*scontext_len += strlen(policydb.p_type_val_to_name[context->type - 1]) + 1;
 	*scontext_len += mls_compute_context_len(context);
 
+	if (!scontext)
+		return 0;
+
 	/* Allocate space for the context; caller must free this space. */
 	scontextp = kmalloc(*scontext_len, GFP_ATOMIC);
 	if (!scontextp)
@@ -1047,7 +1051,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext,
 	struct context *context;
 	int rc = 0;
 
-	*scontext = NULL;
+	if (scontext)
+		*scontext = NULL;
 	*scontext_len  = 0;
 
 	if (!ss_initialized) {
@@ -1055,6 +1060,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext,
 			char *scontextp;
 
 			*scontext_len = strlen(initial_sid_to_string[sid]) + 1;
+			if (!scontext)
+				goto out;
 			scontextp = kmalloc(*scontext_len, GFP_ATOMIC);
 			if (!scontextp) {
 				rc = -ENOMEM;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 174aec44bfac..bc39f4067af6 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3004,7 +3004,8 @@ static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	char *sp = smack_from_secid(secid);
 
-	*secdata = sp;
+	if (secdata)
+		*secdata = sp;
 	*seclen = strlen(sp);
 	return 0;
 }
-- 
cgit v1.2.3


From 1cc63249adfa957b34ca51effdee90ff8261d63f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 Oct 2010 16:24:54 -0400
Subject: conntrack: export lsm context rather than internal secid via netlink

The conntrack code can export the internal secid to userspace.  These are
dynamic, can change on lsm changes, and have no meaning in userspace.  We
should instead be sending lsm contexts to userspace instead.  This patch sends
the secctx (rather than secid) to userspace over the netlink socket.  We use a
new field CTA_SECCTX and stop using the the old CTA_SECMARK field since it did
not send particularly useful information.

Signed-off-by: Eric Paris <eparis@redhat.com>
Reviewed-by: Paul Moore <paul.moore@hp.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 10 +++++-
 net/netfilter/nf_conntrack_netlink.c          | 46 +++++++++++++++++++++------
 2 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 9ed534c991b9..70cd0603911c 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -39,8 +39,9 @@ enum ctattr_type {
 	CTA_TUPLE_MASTER,
 	CTA_NAT_SEQ_ADJ_ORIG,
 	CTA_NAT_SEQ_ADJ_REPLY,
-	CTA_SECMARK,
+	CTA_SECMARK,		/* obsolete */
 	CTA_ZONE,
+	CTA_SECCTX,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -172,4 +173,11 @@ enum ctattr_help {
 };
 #define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
 
+enum ctattr_secctx {
+	CTA_SECCTX_UNSPEC,
+	CTA_SECCTX_NAME,
+	__CTA_SECCTX_MAX
+};
+#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
+
 #endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5bae1cd15eea..b3c628555cf3 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -22,6 +22,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/types.h>
 #include <linux/timer.h>
+#include <linux/security.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
 #include <linux/netlink.h>
@@ -245,16 +246,31 @@ nla_put_failure:
 
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
 static inline int
-ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark));
-	return 0;
+	struct nlattr *nest_secctx;
+	int len, ret;
+	char *secctx;
+
+	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+	if (ret)
+		return ret;
+
+	ret = -1;
+	nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED);
+	if (!nest_secctx)
+		goto nla_put_failure;
 
+	NLA_PUT_STRING(skb, CTA_SECCTX_NAME, secctx);
+	nla_nest_end(skb, nest_secctx);
+
+	ret = 0;
 nla_put_failure:
-	return -1;
+	security_release_secctx(secctx, len);
+	return ret;
 }
 #else
-#define ctnetlink_dump_secmark(a, b) (0)
+#define ctnetlink_dump_secctx(a, b) (0)
 #endif
 
 #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
@@ -391,7 +407,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
-	    ctnetlink_dump_secmark(skb, ct) < 0 ||
+	    ctnetlink_dump_secctx(skb, ct) < 0 ||
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
@@ -437,6 +453,17 @@ ctnetlink_counters_size(const struct nf_conn *ct)
 	       ;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static int ctnetlink_nlmsg_secctx_size(const struct nf_conn *ct)
+{
+	int len;
+
+	security_secid_to_secctx(ct->secmark, NULL, &len);
+
+	return sizeof(char) * len;
+}
+#endif
+
 static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
@@ -453,7 +480,8 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(0) /* CTA_HELP */
 	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
-	       + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */
+	       + nla_total_size(0) /* CTA_SECCTX */
+	       + nla_total_size(ctnetlink_nlmsg_secctx_size(ct)) /* CTA_SECCTX_NAME */
 #endif
 #ifdef CONFIG_NF_NAT_NEEDED
 	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
@@ -554,11 +582,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		    && ctnetlink_dump_helpinfo(skb, ct) < 0)
 			goto nla_put_failure;
 
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
 		if ((events & (1 << IPCT_SECMARK) || ct->secmark)
-		    && ctnetlink_dump_secmark(skb, ct) < 0)
+		    && ctnetlink_dump_secctx(skb, ct) < 0)
 			goto nla_put_failure;
-#endif
 
 		if (events & (1 << IPCT_RELATED) &&
 		    ctnetlink_dump_master(skb, ct) < 0)
-- 
cgit v1.2.3


From 686a0f3d71203bbfcc186900bbb8ac2cfc3d803c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 Oct 2010 17:50:02 -0400
Subject: kernel: rounddown helper function

The roundup() helper function will round a given value up to a multiple of
another given value.  aka  roundup(11, 7) would give 14 = 7 * 2.  This new
function does the opposite.  It will round a given number down to the
nearest multiple of the second number: rounddown(11, 7) would give 7.

I need this in some future SELinux code and can carry the macro myself, but
figured I would put it in the core kernel so others might find and use it
if need be.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b0a35e6bc69..6d6eea7f7b1e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -59,6 +59,12 @@ extern const char linux_proc_banner[];
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define rounddown(x, y) (				\
+{							\
+	typeof(x) __x = (x);				\
+	__x - (__x % (y));				\
+}							\
+)
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(divisor) __divisor = divisor;		\
-- 
cgit v1.2.3


From b28efd54d9d5c8005a29cd8782335beb9daaa32d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 13 Oct 2010 17:50:08 -0400
Subject: kernel: roundup should only reference arguments once

Currently the roundup macro references it's arguments more than one time.
This patch changes it so it will only use its arguments once.

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/kernel.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6d6eea7f7b1e..1759ba5adce8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,7 +58,12 @@ extern const char linux_proc_banner[];
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define roundup(x, y) (					\
+{							\
+	typeof(y) __y = y;				\
+	(((x) + (__y - 1)) / __y) * __y;		\
+}							\
+)
 #define rounddown(x, y) (				\
 {							\
 	typeof(x) __x = (x);				\
-- 
cgit v1.2.3


From 1c5de1939c204bde9cce87f4eb3d26e9f9eb732b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 9 Feb 2009 12:05:49 -0800
Subject: xen: add privcmd driver

The privcmd interface in xenfs allows the tool stack in the privileged
domain to get fairly direct access to the hypervisor in order to do
various management things such as domain construction.

[ Impact: new xenfs interface for privileged operations ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/xenfs/Makefile  |   2 +-
 drivers/xen/xenfs/privcmd.c | 436 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/xenfs/super.c   |   2 +
 drivers/xen/xenfs/xenfs.h   |   1 +
 include/xen/Kbuild          |   1 +
 include/xen/privcmd.h       |  80 ++++++++
 6 files changed, 521 insertions(+), 1 deletion(-)
 create mode 100644 drivers/xen/xenfs/privcmd.c
 create mode 100644 include/xen/privcmd.h

(limited to 'include')

diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 5d45ff13cc01..4a0be9a82af3 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_XENFS) += xenfs.o
 
 xenfs-y			  = super.o xenbus.o
-xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
+xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
new file mode 100644
index 000000000000..c7192f314f86
--- /dev/null
+++ b/drivers/xen/xenfs/privcmd.c
@@ -0,0 +1,436 @@
+/******************************************************************************
+ * privcmd.c
+ *
+ * Interface to privileged domain-0 commands.
+ *
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/features.h>
+#include <xen/page.h>
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
+struct remap_data {
+	unsigned long mfn;
+	unsigned domid;
+	pgprot_t prot;
+};
+
+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+				 unsigned long addr, void *data)
+{
+	struct remap_data *rmd = data;
+	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+	xen_set_domain_pte(ptep, pte, rmd->domid);
+
+	return 0;
+}
+
+int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
+			   unsigned long mfn, unsigned long size,
+			   pgprot_t prot, unsigned domid)
+{
+	struct remap_data rmd;
+	int err;
+
+	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
+
+	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+
+	rmd.mfn = mfn;
+	rmd.prot = prot;
+	rmd.domid = domid;
+
+	err = apply_to_page_range(vma->vm_mm, addr, size,
+				  remap_area_mfn_pte_fn, &rmd);
+
+	return err;
+}
+
+static long privcmd_ioctl_hypercall(void __user *udata)
+{
+	struct privcmd_hypercall hypercall;
+	long ret;
+
+	if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+		return -EFAULT;
+
+	ret = privcmd_call(hypercall.op,
+			   hypercall.arg[0], hypercall.arg[1],
+			   hypercall.arg[2], hypercall.arg[3],
+			   hypercall.arg[4]);
+
+	return ret;
+}
+
+static void free_page_list(struct list_head *pages)
+{
+	struct page *p, *n;
+
+	list_for_each_entry_safe(p, n, pages, lru)
+		__free_page(p);
+
+	INIT_LIST_HEAD(pages);
+}
+
+/*
+ * Given an array of items in userspace, return a list of pages
+ * containing the data.  If copying fails, either because of memory
+ * allocation failure or a problem reading user memory, return an
+ * error code; its up to the caller to dispose of any partial list.
+ */
+static int gather_array(struct list_head *pagelist,
+			unsigned nelem, size_t size,
+			void __user *data)
+{
+	unsigned pageidx;
+	void *pagedata;
+	int ret;
+
+	if (size > PAGE_SIZE)
+		return 0;
+
+	pageidx = PAGE_SIZE;
+	pagedata = NULL;	/* quiet, gcc */
+	while (nelem--) {
+		if (pageidx > PAGE_SIZE-size) {
+			struct page *page = alloc_page(GFP_KERNEL);
+
+			ret = -ENOMEM;
+			if (page == NULL)
+				goto fail;
+
+			pagedata = page_address(page);
+
+			list_add_tail(&page->lru, pagelist);
+			pageidx = 0;
+		}
+
+		ret = -EFAULT;
+		if (copy_from_user(pagedata + pageidx, data, size))
+			goto fail;
+
+		data += size;
+		pageidx += size;
+	}
+
+	ret = 0;
+
+fail:
+	return ret;
+}
+
+/*
+ * Call function "fn" on each element of the array fragmented
+ * over a list of pages.
+ */
+static int traverse_pages(unsigned nelem, size_t size,
+			  struct list_head *pos,
+			  int (*fn)(void *data, void *state),
+			  void *state)
+{
+	void *pagedata;
+	unsigned pageidx;
+	int ret;
+
+	BUG_ON(size > PAGE_SIZE);
+
+	pageidx = PAGE_SIZE;
+	pagedata = NULL;	/* hush, gcc */
+
+	while (nelem--) {
+		if (pageidx > PAGE_SIZE-size) {
+			struct page *page;
+			pos = pos->next;
+			page = list_entry(pos, struct page, lru);
+			pagedata = page_address(page);
+			pageidx = 0;
+		}
+
+		ret = (*fn)(pagedata + pageidx, state);
+		if (ret)
+			break;
+		pageidx += size;
+	}
+
+	return ret;
+}
+
+struct mmap_mfn_state {
+	unsigned long va;
+	struct vm_area_struct *vma;
+	domid_t domain;
+};
+
+static int mmap_mfn_range(void *data, void *state)
+{
+	struct privcmd_mmap_entry *msg = data;
+	struct mmap_mfn_state *st = state;
+	struct vm_area_struct *vma = st->vma;
+	int rc;
+
+	/* Do not allow range to wrap the address space. */
+	if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
+	    ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
+		return -EINVAL;
+
+	/* Range chunks must be contiguous in va space. */
+	if ((msg->va != st->va) ||
+	    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
+		return -EINVAL;
+
+	rc = remap_domain_mfn_range(vma,
+				    msg->va & PAGE_MASK,
+				    msg->mfn,
+				    msg->npages << PAGE_SHIFT,
+				    vma->vm_page_prot,
+				    st->domain);
+	if (rc < 0)
+		return rc;
+
+	st->va += msg->npages << PAGE_SHIFT;
+
+	return 0;
+}
+
+static long privcmd_ioctl_mmap(void __user *udata)
+{
+	struct privcmd_mmap mmapcmd;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc;
+	LIST_HEAD(pagelist);
+	struct mmap_mfn_state state;
+
+	if (!xen_initial_domain())
+		return -EPERM;
+
+	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+		return -EFAULT;
+
+	rc = gather_array(&pagelist,
+			  mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+			  mmapcmd.entry);
+
+	if (rc || list_empty(&pagelist))
+		goto out;
+
+	down_write(&mm->mmap_sem);
+
+	{
+		struct page *page = list_first_entry(&pagelist,
+						     struct page, lru);
+		struct privcmd_mmap_entry *msg = page_address(page);
+
+		vma = find_vma(mm, msg->va);
+		rc = -EINVAL;
+
+		if (!vma || (msg->va != vma->vm_start) ||
+		    !privcmd_enforce_singleshot_mapping(vma))
+			goto out_up;
+	}
+
+	state.va = vma->vm_start;
+	state.vma = vma;
+	state.domain = mmapcmd.dom;
+
+	rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+			    &pagelist,
+			    mmap_mfn_range, &state);
+
+
+out_up:
+	up_write(&mm->mmap_sem);
+
+out:
+	free_page_list(&pagelist);
+
+	return rc;
+}
+
+struct mmap_batch_state {
+	domid_t domain;
+	unsigned long va;
+	struct vm_area_struct *vma;
+	int err;
+
+	xen_pfn_t __user *user;
+};
+
+static int mmap_batch_fn(void *data, void *state)
+{
+	xen_pfn_t *mfnp = data;
+	struct mmap_batch_state *st = state;
+
+	if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
+				   *mfnp, PAGE_SIZE,
+				   st->vma->vm_page_prot, st->domain) < 0) {
+		*mfnp |= 0xf0000000U;
+		st->err++;
+	}
+	st->va += PAGE_SIZE;
+
+	return 0;
+}
+
+static int mmap_return_errors(void *data, void *state)
+{
+	xen_pfn_t *mfnp = data;
+	struct mmap_batch_state *st = state;
+
+	put_user(*mfnp, st->user++);
+
+	return 0;
+}
+
+static long privcmd_ioctl_mmap_batch(void __user *udata)
+{
+	int ret;
+	struct privcmd_mmapbatch m;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long nr_pages;
+	LIST_HEAD(pagelist);
+	struct mmap_batch_state state;
+
+	if (!xen_initial_domain())
+		return -EPERM;
+
+	if (copy_from_user(&m, udata, sizeof(m)))
+		return -EFAULT;
+
+	nr_pages = m.num;
+	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+		return -EINVAL;
+
+	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
+			   m.arr);
+
+	if (ret || list_empty(&pagelist))
+		goto out;
+
+	down_write(&mm->mmap_sem);
+
+	vma = find_vma(mm, m.addr);
+	ret = -EINVAL;
+	if (!vma ||
+	    (m.addr != vma->vm_start) ||
+	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+	    !privcmd_enforce_singleshot_mapping(vma)) {
+		up_write(&mm->mmap_sem);
+		goto out;
+	}
+
+	state.domain = m.dom;
+	state.vma = vma;
+	state.va = m.addr;
+	state.err = 0;
+
+	ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+			     &pagelist, mmap_batch_fn, &state);
+
+	up_write(&mm->mmap_sem);
+
+	if (state.err > 0) {
+		ret = state.err;
+
+		state.user = udata;
+		traverse_pages(m.num, sizeof(xen_pfn_t),
+			       &pagelist,
+			       mmap_return_errors, &state);
+	}
+
+out:
+	free_page_list(&pagelist);
+
+	return ret;
+}
+
+static long privcmd_ioctl(struct file *file,
+			  unsigned int cmd, unsigned long data)
+{
+	int ret = -ENOSYS;
+	void __user *udata = (void __user *) data;
+
+	switch (cmd) {
+	case IOCTL_PRIVCMD_HYPERCALL:
+		ret = privcmd_ioctl_hypercall(udata);
+		break;
+
+	case IOCTL_PRIVCMD_MMAP:
+		ret = privcmd_ioctl_mmap(udata);
+		break;
+
+	case IOCTL_PRIVCMD_MMAPBATCH:
+		ret = privcmd_ioctl_mmap_batch(udata);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+	.fault = privcmd_fault
+};
+
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	/* Unsupported for auto-translate guests. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -ENOSYS;
+
+	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+	vma->vm_ops = &privcmd_vm_ops;
+	vma->vm_private_data = NULL;
+
+	return 0;
+}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
+#endif
+
+const struct file_operations privcmd_file_ops = {
+	.unlocked_ioctl = privcmd_ioctl,
+	.mmap = privcmd_mmap,
+};
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 3cf7707217f2..8c7462866e90 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -96,6 +96,8 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 				  &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
 		xenfs_create_file(sb, sb->s_root, "xsd_port",
 				  &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
+		xenfs_create_file(sb, sb->s_root, "privcmd",
+				  &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR);
 	}
 
 	return rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 5056306e7aa8..b68aa6200003 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -2,6 +2,7 @@
 #define _XENFS_XENBUS_H
 
 extern const struct file_operations xenbus_file_ops;
+extern const struct file_operations privcmd_file_ops;
 extern const struct file_operations xsd_kva_file_ops;
 extern const struct file_operations xsd_port_file_ops;
 
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
index 4e65c16a445b..84ad8f02fee5 100644
--- a/include/xen/Kbuild
+++ b/include/xen/Kbuild
@@ -1 +1,2 @@
 header-y += evtchn.h
+header-y += privcmd.h
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
new file mode 100644
index 000000000000..b42cdfd92fee
--- /dev/null
+++ b/include/xen/privcmd.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ * privcmd.h
+ *
+ * Interface to /proc/xen/privcmd.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_PRIVCMD_H__
+#define __LINUX_PUBLIC_PRIVCMD_H__
+
+#include <linux/types.h>
+
+typedef unsigned long xen_pfn_t;
+
+#ifndef __user
+#define __user
+#endif
+
+struct privcmd_hypercall {
+	__u64 op;
+	__u64 arg[5];
+};
+
+struct privcmd_mmap_entry {
+	__u64 va;
+	__u64 mfn;
+	__u64 npages;
+};
+
+struct privcmd_mmap {
+	int num;
+	domid_t dom; /* target domain */
+	struct privcmd_mmap_entry __user *entry;
+};
+
+struct privcmd_mmapbatch {
+	int num;     /* number of pages to populate */
+	domid_t dom; /* target domain */
+	__u64 addr;  /* virtual address */
+	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+};
+
+/*
+ * @cmd: IOCTL_PRIVCMD_HYPERCALL
+ * @arg: &privcmd_hypercall_t
+ * Return: Value returned from execution of the specified hypercall.
+ */
+#define IOCTL_PRIVCMD_HYPERCALL					\
+	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
+#define IOCTL_PRIVCMD_MMAP					\
+	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
+#define IOCTL_PRIVCMD_MMAPBATCH					\
+	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+
+#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
-- 
cgit v1.2.3


From de1ef2065c4675ab1062ebc8d1cb6c5f42b61d04 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 21 May 2009 10:09:46 +0100
Subject: xen/privcmd: move remap_domain_mfn_range() to core xen code and
 export.

This allows xenfs to be built as a module, previously it required flush_tlb_all
and arbitrary_virt_to_machine to be exported.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c          | 66 ++++++++++++++++++++++++++++++++++++
 drivers/xen/xenfs/privcmd.c | 81 +++++----------------------------------------
 include/xen/xen-ops.h       |  5 +++
 3 files changed, 79 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1ceb0f2fa0af..f08ea045620f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2265,6 +2265,72 @@ void __init xen_hvm_init_mmu_ops(void)
 }
 #endif
 
+#define REMAP_BATCH_SIZE 16
+
+struct remap_data {
+	unsigned long mfn;
+	pgprot_t prot;
+	struct mmu_update *mmu_update;
+};
+
+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+				 unsigned long addr, void *data)
+{
+	struct remap_data *rmd = data;
+	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+	rmd->mmu_update->val = pte_val_ma(pte);
+	rmd->mmu_update++;
+
+	return 0;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       unsigned long mfn, int nr,
+			       pgprot_t prot, unsigned domid)
+{
+	struct remap_data rmd;
+	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+	int batch;
+	unsigned long range;
+	int err = 0;
+
+	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
+
+	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+
+	rmd.mfn = mfn;
+	rmd.prot = prot;
+
+	while (nr) {
+		batch = min(REMAP_BATCH_SIZE, nr);
+		range = (unsigned long)batch << PAGE_SHIFT;
+
+		rmd.mmu_update = mmu_update;
+		err = apply_to_page_range(vma->vm_mm, addr, range,
+					  remap_area_mfn_pte_fn, &rmd);
+		if (err)
+			goto out;
+
+		err = -EFAULT;
+		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
+			goto out;
+
+		nr -= batch;
+		addr += range;
+	}
+
+	err = 0;
+out:
+
+	flush_tlb_all();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
 #ifdef CONFIG_XEN_DEBUG_FS
 
 static struct dentry *d_mmu_debug;
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
index 438223ae0fc3..f80be7f6eb95 100644
--- a/drivers/xen/xenfs/privcmd.c
+++ b/drivers/xen/xenfs/privcmd.c
@@ -31,76 +31,12 @@
 #include <xen/interface/xen.h>
 #include <xen/features.h>
 #include <xen/page.h>
-
-#define REMAP_BATCH_SIZE 16
+#include <xen/xen-ops.h>
 
 #ifndef HAVE_ARCH_PRIVCMD_MMAP
 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
 #endif
 
-struct remap_data {
-	unsigned long mfn;
-	pgprot_t prot;
-	struct mmu_update *mmu_update;
-};
-
-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
-				 unsigned long addr, void *data)
-{
-	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
-
-	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
-	rmd->mmu_update->val = pte_val_ma(pte);
-	rmd->mmu_update++;
-
-	return 0;
-}
-
-static int remap_domain_mfn_range(struct vm_area_struct *vma,
-				  unsigned long addr,
-				  unsigned long mfn, int nr,
-				  pgprot_t prot, unsigned domid)
-{
-	struct remap_data rmd;
-	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-	int batch;
-	unsigned long range;
-	int err = 0;
-
-	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
-
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-
-	rmd.mfn = mfn;
-	rmd.prot = prot;
-
-	while (nr) {
-		batch = min(REMAP_BATCH_SIZE, nr);
-		range = (unsigned long)batch << PAGE_SHIFT;
-
-		rmd.mmu_update = mmu_update;
-		err = apply_to_page_range(vma->vm_mm, addr, range,
-					  remap_area_mfn_pte_fn, &rmd);
-		if (err)
-			goto out;
-
-		err = -EFAULT;
-		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
-			goto out;
-
-		nr -= batch;
-		addr += range;
-	}
-
-	err = 0;
-out:
-
-	flush_tlb_all();
-
-	return err;
-}
-
 static long privcmd_ioctl_hypercall(void __user *udata)
 {
 	struct privcmd_hypercall hypercall;
@@ -233,11 +169,11 @@ static int mmap_mfn_range(void *data, void *state)
 	    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 		return -EINVAL;
 
-	rc = remap_domain_mfn_range(vma,
-				    msg->va & PAGE_MASK,
-				    msg->mfn, msg->npages,
-				    vma->vm_page_prot,
-				    st->domain);
+	rc = xen_remap_domain_mfn_range(vma,
+					msg->va & PAGE_MASK,
+					msg->mfn, msg->npages,
+					vma->vm_page_prot,
+					st->domain);
 	if (rc < 0)
 		return rc;
 
@@ -315,9 +251,8 @@ static int mmap_batch_fn(void *data, void *state)
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
 
-	if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
-				   *mfnp, 1,
-				   st->vma->vm_page_prot, st->domain) < 0) {
+	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+				       st->vma->vm_page_prot, st->domain) < 0) {
 		*mfnp |= 0xf0000000U;
 		st->err++;
 	}
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 351f4051f6d8..98b92154a264 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -23,4 +23,9 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
 
 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
 
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       unsigned long mfn, int nr,
+			       pgprot_t prot, unsigned domid);
+
 #endif /* INCLUDE_XEN_OPS_H */
-- 
cgit v1.2.3


From 40d857bba2915a4e8d82f44744a186bfdd1a46ea Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 19 Oct 2010 09:01:00 +0200
Subject: drm/ttm: Avoid using the ttm_mem_type_manager::put_locked function

Release the lru spinlock early.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c         | 32 ++++++--------------------------
 drivers/gpu/drm/ttm/ttm_bo_manager.c | 10 ----------
 include/drm/ttm/ttm_bo_driver.h      |  2 --
 3 files changed, 6 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9f2eed520fc3..4a73f401644d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -434,36 +434,25 @@ out_err:
 }
 
 /**
- * Call bo::reserved and with the lru lock held.
+ * Call bo::reserved.
  * Will release GPU memory type usage on destruction.
- * This is the place to put in driver specific hooks.
- * Will release the bo::reserved lock and the
- * lru lock on exit.
+ * This is the place to put in driver specific hooks to release
+ * driver private resources.
+ * Will release the bo::reserved lock.
  */
 
 static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 {
-	struct ttm_bo_global *glob = bo->glob;
-
 	if (bo->ttm) {
-
-		/**
-		 * Release the lru_lock, since we don't want to have
-		 * an atomic requirement on ttm_tt[unbind|destroy].
-		 */
-
-		spin_unlock(&glob->lru_lock);
 		ttm_tt_unbind(bo->ttm);
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
-		spin_lock(&glob->lru_lock);
 	}
 
-	ttm_bo_mem_put_locked(bo, &bo->mem);
+	ttm_bo_mem_put(bo, &bo->mem);
 
 	atomic_set(&bo->reserved, 0);
 	wake_up_all(&bo->event_queue);
-	spin_unlock(&glob->lru_lock);
 }
 
 
@@ -528,7 +517,7 @@ retry:
 			list_del_init(&bo->ddestroy);
 			++put_count;
 		}
-
+		spin_unlock(&glob->lru_lock);
 		ttm_bo_cleanup_memtype_use(bo);
 
 		while (put_count--)
@@ -784,15 +773,6 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 }
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
-void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bo->bdev->man[mem->mem_type];
-
-	if (mem->mm_node)
-		(*man->func->put_node_locked)(man, mem);
-}
-EXPORT_SYMBOL(ttm_bo_mem_put_locked);
-
 /**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 35c97b20bdae..7410c190c891 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -90,15 +90,6 @@ static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
 	}
 }
 
-static void ttm_bo_man_put_node_locked(struct ttm_mem_type_manager *man,
-				       struct ttm_mem_reg *mem)
-{
-	if (mem->mm_node) {
-		drm_mm_put_block(mem->mm_node);
-		mem->mm_node = NULL;
-	}
-}
-
 static int ttm_bo_man_init(struct ttm_mem_type_manager *man,
 			   unsigned long p_size)
 {
@@ -152,7 +143,6 @@ const struct ttm_mem_type_manager_func ttm_bo_manager_func = {
 	ttm_bo_man_takedown,
 	ttm_bo_man_get_node,
 	ttm_bo_man_put_node,
-	ttm_bo_man_put_node_locked,
 	ttm_bo_man_debug
 };
 EXPORT_SYMBOL(ttm_bo_manager_func);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d0ff529fedde..d01b4ddbdc56 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -214,8 +214,6 @@ struct ttm_mem_type_manager_func {
 			 struct ttm_mem_reg *mem);
 	void (*put_node)(struct ttm_mem_type_manager *man,
 			 struct ttm_mem_reg *mem);
-	void (*put_node_locked)(struct ttm_mem_type_manager *man,
-				struct ttm_mem_reg *mem);
 	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
 };
 
-- 
cgit v1.2.3


From 0edc4afbcf59e4156e06f9e91f72c8fc47ceb856 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 7 Aug 2010 06:27:16 -0300
Subject: V4L/DVB: v4l: add new YUV mediabus formats

Needed for tvp7002 and tvp514x drivers.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-mediabus.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index f0cf2e7def06..9b44aa5527ee 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -28,6 +28,10 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_YVYU8_2X8,
 	V4L2_MBUS_FMT_UYVY8_2X8,
 	V4L2_MBUS_FMT_VYUY8_2X8,
+	V4L2_MBUS_FMT_YVYU10_2X10,
+	V4L2_MBUS_FMT_YUYV10_2X10,
+	V4L2_MBUS_FMT_YVYU10_1X20,
+	V4L2_MBUS_FMT_YUYV10_1X20,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_RGB565_2X8_LE,
-- 
cgit v1.2.3


From d7709ffff4117f7c7700755accfe9d5e429efc53 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 6 Aug 2010 16:25:04 -0300
Subject: V4L/DVB: v4l: add RGB444 mediabus formats

These are needed for the ov7670 driver.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-mediabus.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 9b44aa5527ee..7e923fe3db04 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -32,6 +32,8 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_YUYV10_2X10,
 	V4L2_MBUS_FMT_YVYU10_1X20,
 	V4L2_MBUS_FMT_YUYV10_1X20,
+	V4L2_MBUS_FMT_RGB444_2X8_PADHI_LE,
+	V4L2_MBUS_FMT_RGB444_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_RGB565_2X8_LE,
-- 
cgit v1.2.3


From fba32e0c70364f46a3d9ff36af5c85f1bd32ac7a Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 9 May 2010 07:42:03 -0300
Subject: V4L/DVB: v4l2-subdev: remove obsolete enum/try/s/g_fmt

These have now all been replaced by enum/try/s/g_mbus_fmt.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 4a97d7341a94..905879d7742f 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -256,10 +256,6 @@ struct v4l2_subdev_video_ops {
 	int (*querystd)(struct v4l2_subdev *sd, v4l2_std_id *std);
 	int (*g_input_status)(struct v4l2_subdev *sd, u32 *status);
 	int (*s_stream)(struct v4l2_subdev *sd, int enable);
-	int (*enum_fmt)(struct v4l2_subdev *sd, struct v4l2_fmtdesc *fmtdesc);
-	int (*g_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
-	int (*try_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
-	int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
 	int (*cropcap)(struct v4l2_subdev *sd, struct v4l2_cropcap *cc);
 	int (*g_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
 	int (*s_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
-- 
cgit v1.2.3


From 1f33de0f8b91b70ccadb44958f09aa5bee7c1a44 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 5 Sep 2010 16:05:22 -0300
Subject: V4L/DVB: gspca_xirlink_cit: New gspca subdriver replacing v4l1
 usbvideo/ibmcam.c

The old usbvideo ibmcam driver needs to be replaced with a v4l2 driver
preferably using the gspca webcam framework rather then the old usbvideo
framework.

This new gspca sub driver sets a first step in that direction. The ibmcam
driver supports 4 different model webcams. This new driver (for now) only
supports Model 3 cameras, as my test cam is a Model 3 cam, or so I thought.

Upon reading:
http://www.linux-usb.org/ibmcam/
I learned that the IBM Netcamera Pro I have even though having the same
usb id and the same bcd version is different from the Model 3 cameras
supported by the ibmcam driver. So this new gscpa subdriver supports Model 3
cameras (untested), and the IBM Netcamera Pro. Currently use with the
IBM Netcamera Pro requires a module parameter. I hope to be able to
autodetect which is which in the future.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/Kconfig       |    9 +
 drivers/media/video/gspca/Makefile      |    2 +
 drivers/media/video/gspca/xirlink_cit.c | 1765 +++++++++++++++++++++++++++++++
 include/linux/videodev2.h               |    1 +
 4 files changed, 1777 insertions(+)
 create mode 100644 drivers/media/video/gspca/xirlink_cit.c

(limited to 'include')

diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 23db0c29f68c..cab7be7eb638 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -337,6 +337,15 @@ config USB_GSPCA_VC032X
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_vc032x.
 
+config USB_GSPCA_XIRLINK_CIT
+	tristate "Xirlink C-It USB Camera Driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for Xirlink C-It bases cameras.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_xirlink_cit.
+
 config USB_GSPCA_ZC3XX
 	tristate "ZC3XX USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index f6616db0b7f8..ea89ac1bd547 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_USB_GSPCA_STV0680)  += gspca_stv0680.o
 obj-$(CONFIG_USB_GSPCA_T613)     += gspca_t613.o
 obj-$(CONFIG_USB_GSPCA_TV8532)   += gspca_tv8532.o
 obj-$(CONFIG_USB_GSPCA_VC032X)   += gspca_vc032x.o
+obj-$(CONFIG_USB_GSPCA_XIRLINK_CIT) += gspca_xirlink_cit.o
 obj-$(CONFIG_USB_GSPCA_ZC3XX)    += gspca_zc3xx.o
 
 gspca_main-objs     := gspca.o
@@ -70,6 +71,7 @@ gspca_sunplus-objs  := sunplus.o
 gspca_t613-objs     := t613.o
 gspca_tv8532-objs   := tv8532.o
 gspca_vc032x-objs   := vc032x.o
+gspca_xirlink_cit-objs := xirlink_cit.o
 gspca_zc3xx-objs    := zc3xx.o
 
 obj-$(CONFIG_USB_M5602)   += m5602/
diff --git a/drivers/media/video/gspca/xirlink_cit.c b/drivers/media/video/gspca/xirlink_cit.c
new file mode 100644
index 000000000000..3525e783f7a2
--- /dev/null
+++ b/drivers/media/video/gspca/xirlink_cit.c
@@ -0,0 +1,1765 @@
+/*
+ * USB IBM C-It Video Camera driver
+ *
+ * Supports Xirlink C-It Video Camera, IBM PC Camera,
+ * IBM NetCamera and Veo Stingray.
+ *
+ * Copyright (C) 2010 Hans de Goede <hdgoede@redhat.com>
+ *
+ * This driver is based on earlier work of:
+ *
+ * (C) Copyright 1999 Johannes Erdfelt
+ * (C) Copyright 1999 Randy Dunlap
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define MODULE_NAME "xirlink-cit"
+
+#include "gspca.h"
+
+MODULE_AUTHOR("Hans de Goede <hdgoede@redhat.com>");
+MODULE_DESCRIPTION("Xirlink C-IT");
+MODULE_LICENSE("GPL");
+
+/* FIXME we should autodetect this */
+static int ibm_netcam_pro;
+module_param(ibm_netcam_pro, int, 0);
+MODULE_PARM_DESC(ibm_netcam_pro,
+		 "Use IBM Netcamera Pro init sequences for Model 3 cams");
+
+/* FIXME this should be handled through the V4L2 input selection API */
+static int rca_input;
+module_param(rca_input, int, 0644);
+MODULE_PARM_DESC(rca_input,
+		 "Use rca input instead of ccd sensor on Model 3 cams");
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;		/* !! must be the first item */
+	u8 model;
+#define CIT_MODEL1 0 /* The model 1 - 4 nomenclature comes from the old */
+#define CIT_MODEL2 1 /* ibmcam driver */
+#define CIT_MODEL3 2
+#define CIT_MODEL4 3
+#define CIT_IBM_NETCAM_PRO 4
+	u8 input_index;
+	u8 sof_read;
+	u8 contrast;
+	u8 brightness;
+	u8 hue;
+	u8 sharpness;
+};
+
+/* V4L2 controls supported by the driver */
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_sethue(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_gethue(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setsharpness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getsharpness(struct gspca_dev *gspca_dev, __s32 *val);
+static void sd_stop0(struct gspca_dev *gspca_dev);
+
+static const struct ctrl sd_ctrls[] = {
+#define SD_BRIGHTNESS 0
+	{
+	    {
+		.id      = V4L2_CID_BRIGHTNESS,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Brightness",
+		.minimum = 0x0c,
+		.maximum = 0x3f,
+		.step = 1,
+#define BRIGHTNESS_DEFAULT 0x20
+		.default_value = BRIGHTNESS_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setbrightness,
+	    .get = sd_getbrightness,
+	},
+#define SD_CONTRAST 1
+	{
+	    {
+		.id = V4L2_CID_CONTRAST,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "contrast",
+		.minimum = 0,
+		.maximum = 20,
+		.step = 1,
+#define CONTRAST_DEFAULT 10
+		.default_value = CONTRAST_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setcontrast,
+	    .get = sd_getcontrast,
+	},
+#define SD_HUE 2
+	{
+	    {
+		.id	= V4L2_CID_HUE,
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.name	= "Hue",
+		.minimum = 0x05,
+		.maximum = 0x37,
+		.step	= 1,
+#define HUE_DEFAULT 0x20
+		.default_value = HUE_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_sethue,
+	    .get = sd_gethue,
+	},
+#define SD_SHARPNESS 3
+	{
+	    {
+		.id = V4L2_CID_SHARPNESS,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Sharpness",
+		.minimum = 0,
+		.maximum = 6,
+		.step = 1,
+#define SHARPNESS_DEFAULT 3
+		.default_value = SHARPNESS_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setsharpness,
+	    .get = sd_getsharpness,
+	},
+};
+
+static const struct v4l2_pix_format vga_yuv_mode[] = {
+	{160, 120, V4L2_PIX_FMT_CIT_YYVYUY, V4L2_FIELD_NONE,
+		.bytesperline = 160,
+		.sizeimage = 160 * 120 * 3 / 2,
+		.colorspace = V4L2_COLORSPACE_SRGB},
+	{320, 240, V4L2_PIX_FMT_CIT_YYVYUY, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 * 3 / 2,
+		.colorspace = V4L2_COLORSPACE_SRGB},
+	{640, 480, V4L2_PIX_FMT_CIT_YYVYUY, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480 * 3 / 2,
+		.colorspace = V4L2_COLORSPACE_SRGB},
+};
+
+/*
+ * 01.01.08 - Added for RCA video in support -LO
+ * This struct is used to init the Model3 cam to use the RCA video in port
+ * instead of the CCD sensor.
+ */
+static const u16 rca_initdata[][3] = {
+	{0, 0x0000, 0x010c},
+	{0, 0x0006, 0x012c},
+	{0, 0x0078, 0x012d},
+	{0, 0x0046, 0x012f},
+	{0, 0xd141, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfea8, 0x0124},
+	{1, 0x0000, 0x0116},
+	{0, 0x0064, 0x0116},
+	{1, 0x0000, 0x0115},
+	{0, 0x0003, 0x0115},
+	{0, 0x0008, 0x0123},
+	{0, 0x0000, 0x0117},
+	{0, 0x0000, 0x0112},
+	{0, 0x0080, 0x0100},
+	{0, 0x0000, 0x0100},
+	{1, 0x0000, 0x0116},
+	{0, 0x0060, 0x0116},
+	{0, 0x0002, 0x0112},
+	{0, 0x0000, 0x0123},
+	{0, 0x0001, 0x0117},
+	{0, 0x0040, 0x0108},
+	{0, 0x0019, 0x012c},
+	{0, 0x0040, 0x0116},
+	{0, 0x000a, 0x0115},
+	{0, 0x000b, 0x0115},
+	{0, 0x0078, 0x012d},
+	{0, 0x0046, 0x012f},
+	{0, 0xd141, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfea8, 0x0124},
+	{0, 0x0064, 0x0116},
+	{0, 0x0000, 0x0115},
+	{0, 0x0001, 0x0115},
+	{0, 0xffff, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00aa, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xffff, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00f2, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x000f, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xffff, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00f8, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00fc, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xffff, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00f9, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x003c, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xffff, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0027, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0019, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0021, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0006, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0045, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002a, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x000e, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002b, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00f4, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002c, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0004, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002d, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0014, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002e, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0003, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x002f, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0003, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0014, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0053, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0x0000, 0x0101},
+	{0, 0x00a0, 0x0103},
+	{0, 0x0078, 0x0105},
+	{0, 0x0000, 0x010a},
+	{0, 0x0024, 0x010b},
+	{0, 0x0028, 0x0119},
+	{0, 0x0088, 0x011b},
+	{0, 0x0002, 0x011d},
+	{0, 0x0003, 0x011e},
+	{0, 0x0000, 0x0129},
+	{0, 0x00fc, 0x012b},
+	{0, 0x0008, 0x0102},
+	{0, 0x0000, 0x0104},
+	{0, 0x0008, 0x011a},
+	{0, 0x0028, 0x011c},
+	{0, 0x0021, 0x012a},
+	{0, 0x0000, 0x0118},
+	{0, 0x0000, 0x0132},
+	{0, 0x0000, 0x0109},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0031, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x00dc, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0032, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0020, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0001, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0040, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0037, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0030, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0xfff9, 0x0124},
+	{0, 0x0086, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0038, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0008, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0x0000, 0x0127},
+	{0, 0xfff8, 0x0124},
+	{0, 0xfffd, 0x0124},
+	{0, 0xfffa, 0x0124},
+	{0, 0x0003, 0x0111},
+};
+
+static int cit_write_reg(struct gspca_dev *gspca_dev, u16 value, u16 index)
+{
+	struct usb_device *udev = gspca_dev->dev;
+	int err;
+
+	err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT,
+			value, index, NULL, 0, 1000);
+	if (err < 0)
+		PDEBUG(D_ERR, "Failed to write a register (index 0x%04X,"
+			" value 0x%02X, error %d)", index, value, err);
+
+	return 0;
+}
+
+static int cit_read_reg(struct gspca_dev *gspca_dev, u16 index)
+{
+	struct usb_device *udev = gspca_dev->dev;
+	__u8 *buf = gspca_dev->usb_buf;
+	int res;
+
+	res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x01,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_ENDPOINT,
+			0x00, index, buf, 8, 1000);
+	if (res < 0) {
+		PDEBUG(D_ERR,
+			"Failed to read a register (index 0x%04X, error %d)",
+			index, res);
+		return res;
+	}
+
+	PDEBUG(D_PROBE,
+	       "Register %04x value: %02x %02x %02x %02x %02x %02x %02x %02x",
+	       index,
+	       buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
+
+	return 0;
+}
+
+/*
+ * ibmcam_model3_Packet1()
+ *
+ * 00_0078_012d
+ * 00_0097_012f
+ * 00_d141_0124
+ * 00_0096_0127
+ * 00_fea8_0124
+*/
+static void cit_model3_Packet1(struct gspca_dev *gspca_dev, u16 v1, u16 v2)
+{
+	cit_write_reg(gspca_dev, 0x0078, 0x012d);
+	cit_write_reg(gspca_dev, v1,     0x012f);
+	cit_write_reg(gspca_dev, 0xd141, 0x0124);
+	cit_write_reg(gspca_dev, v2,     0x0127);
+	cit_write_reg(gspca_dev, 0xfea8, 0x0124);
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+		     const struct usb_device_id *id)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam *cam;
+
+	sd->model = id->driver_info;
+	if (sd->model == CIT_MODEL3 && ibm_netcam_pro)
+		sd->model = CIT_IBM_NETCAM_PRO;
+
+	cam = &gspca_dev->cam;
+	switch (sd->model) {
+	case CIT_MODEL3:
+		cam->cam_mode = vga_yuv_mode;
+		cam->nmodes = ARRAY_SIZE(vga_yuv_mode);
+		gspca_dev->ctrl_dis = (1 << SD_HUE);
+		break;
+	case CIT_IBM_NETCAM_PRO:
+		cam->cam_mode = vga_yuv_mode;
+		cam->nmodes = 2; /* no 640 x 480 */
+		cam->input_flags = V4L2_IN_ST_VFLIP;
+		gspca_dev->ctrl_dis = ~(1 << SD_CONTRAST);
+		break;
+	}
+
+	sd->brightness = BRIGHTNESS_DEFAULT;
+	sd->contrast = CONTRAST_DEFAULT;
+	sd->hue = HUE_DEFAULT;
+	sd->sharpness = SHARPNESS_DEFAULT;
+
+	return 0;
+}
+
+static int cit_init_ibm_netcam_pro(struct gspca_dev *gspca_dev)
+{
+	cit_read_reg(gspca_dev, 0x128);
+	cit_write_reg(gspca_dev, 0x0003, 0x0133);
+	cit_write_reg(gspca_dev, 0x0000, 0x0117);
+	cit_write_reg(gspca_dev, 0x0008, 0x0123);
+	cit_write_reg(gspca_dev, 0x0000, 0x0100);
+	cit_read_reg(gspca_dev, 0x0116);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	cit_write_reg(gspca_dev, 0x0002, 0x0112);
+	cit_write_reg(gspca_dev, 0x0000, 0x0133);
+	cit_write_reg(gspca_dev, 0x0000, 0x0123);
+	cit_write_reg(gspca_dev, 0x0001, 0x0117);
+	cit_write_reg(gspca_dev, 0x0040, 0x0108);
+	cit_write_reg(gspca_dev, 0x0019, 0x012c);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	cit_write_reg(gspca_dev, 0x0002, 0x0115);
+	cit_write_reg(gspca_dev, 0x000b, 0x0115);
+
+	cit_write_reg(gspca_dev, 0x0078, 0x012d);
+	cit_write_reg(gspca_dev, 0x0001, 0x012f);
+	cit_write_reg(gspca_dev, 0xd141, 0x0124);
+	cit_write_reg(gspca_dev, 0x0079, 0x012d);
+	cit_write_reg(gspca_dev, 0x00ff, 0x0130);
+	cit_write_reg(gspca_dev, 0xcd41, 0x0124);
+	cit_write_reg(gspca_dev, 0xfffa, 0x0124);
+	cit_read_reg(gspca_dev, 0x0126);
+
+	cit_model3_Packet1(gspca_dev, 0x0000, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0000, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x000b, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x000c, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x000d, 0x003a);
+	cit_model3_Packet1(gspca_dev, 0x000e, 0x0060);
+	cit_model3_Packet1(gspca_dev, 0x000f, 0x0060);
+	cit_model3_Packet1(gspca_dev, 0x0010, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x0011, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x0012, 0x0028);
+	cit_model3_Packet1(gspca_dev, 0x0013, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0014, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0015, 0x00fb);
+	cit_model3_Packet1(gspca_dev, 0x0016, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0017, 0x0037);
+	cit_model3_Packet1(gspca_dev, 0x0018, 0x0036);
+	cit_model3_Packet1(gspca_dev, 0x001e, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x001f, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x0020, 0x00c1);
+	cit_model3_Packet1(gspca_dev, 0x0021, 0x0034);
+	cit_model3_Packet1(gspca_dev, 0x0022, 0x0034);
+	cit_model3_Packet1(gspca_dev, 0x0025, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0028, 0x0022);
+	cit_model3_Packet1(gspca_dev, 0x0029, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x002b, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x002c, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x002d, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x002e, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x002f, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x0030, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x0031, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x0032, 0x0007);
+	cit_model3_Packet1(gspca_dev, 0x0033, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x0037, 0x0040);
+	cit_model3_Packet1(gspca_dev, 0x0039, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x003a, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x003b, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x003c, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0040, 0x000c);
+	cit_model3_Packet1(gspca_dev, 0x0041, 0x00fb);
+	cit_model3_Packet1(gspca_dev, 0x0042, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0043, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0045, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0046, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0047, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0048, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0049, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x004a, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x004b, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x004c, 0x00ff);
+	cit_model3_Packet1(gspca_dev, 0x004f, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0050, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0051, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0055, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0056, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0057, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0058, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0059, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x005c, 0x0016);
+	cit_model3_Packet1(gspca_dev, 0x005d, 0x0022);
+	cit_model3_Packet1(gspca_dev, 0x005e, 0x003c);
+	cit_model3_Packet1(gspca_dev, 0x005f, 0x0050);
+	cit_model3_Packet1(gspca_dev, 0x0060, 0x0044);
+	cit_model3_Packet1(gspca_dev, 0x0061, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x006a, 0x007e);
+	cit_model3_Packet1(gspca_dev, 0x006f, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0072, 0x001b);
+	cit_model3_Packet1(gspca_dev, 0x0073, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x0074, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x0075, 0x001b);
+	cit_model3_Packet1(gspca_dev, 0x0076, 0x002a);
+	cit_model3_Packet1(gspca_dev, 0x0077, 0x003c);
+	cit_model3_Packet1(gspca_dev, 0x0078, 0x0050);
+	cit_model3_Packet1(gspca_dev, 0x007b, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x007c, 0x0011);
+	cit_model3_Packet1(gspca_dev, 0x007d, 0x0024);
+	cit_model3_Packet1(gspca_dev, 0x007e, 0x0043);
+	cit_model3_Packet1(gspca_dev, 0x007f, 0x005a);
+	cit_model3_Packet1(gspca_dev, 0x0084, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x0085, 0x0033);
+	cit_model3_Packet1(gspca_dev, 0x0086, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x0087, 0x0030);
+	cit_model3_Packet1(gspca_dev, 0x0088, 0x0070);
+	cit_model3_Packet1(gspca_dev, 0x008b, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x008f, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0090, 0x0006);
+	cit_model3_Packet1(gspca_dev, 0x0091, 0x0028);
+	cit_model3_Packet1(gspca_dev, 0x0092, 0x005a);
+	cit_model3_Packet1(gspca_dev, 0x0093, 0x0082);
+	cit_model3_Packet1(gspca_dev, 0x0096, 0x0014);
+	cit_model3_Packet1(gspca_dev, 0x0097, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x0098, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00b0, 0x0046);
+	cit_model3_Packet1(gspca_dev, 0x00b1, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00b2, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00b3, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x00b4, 0x0007);
+	cit_model3_Packet1(gspca_dev, 0x00b6, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x00b7, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x00bb, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00bc, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00bd, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00bf, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00c0, 0x00c8);
+	cit_model3_Packet1(gspca_dev, 0x00c1, 0x0014);
+	cit_model3_Packet1(gspca_dev, 0x00c2, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00c3, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00c4, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x00cb, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00cc, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00cd, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00ce, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00cf, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x00d0, 0x0040);
+	cit_model3_Packet1(gspca_dev, 0x00d1, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00d1, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00d2, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00d3, 0x00bf);
+	cit_model3_Packet1(gspca_dev, 0x00ea, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x00eb, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00ec, 0x00e8);
+	cit_model3_Packet1(gspca_dev, 0x00ed, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00ef, 0x0022);
+	cit_model3_Packet1(gspca_dev, 0x00f0, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00f2, 0x0028);
+	cit_model3_Packet1(gspca_dev, 0x00f4, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x00f5, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00fa, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00fb, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00fc, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00fd, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00fe, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00ff, 0x0000);
+
+	cit_model3_Packet1(gspca_dev, 0x00be, 0x0003);
+	cit_model3_Packet1(gspca_dev, 0x00c8, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00c9, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x00ca, 0x0040);
+	cit_model3_Packet1(gspca_dev, 0x0053, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0082, 0x000e);
+	cit_model3_Packet1(gspca_dev, 0x0083, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x0034, 0x003c);
+	cit_model3_Packet1(gspca_dev, 0x006e, 0x0055);
+	cit_model3_Packet1(gspca_dev, 0x0062, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x0063, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x0066, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x0067, 0x0006);
+	cit_model3_Packet1(gspca_dev, 0x006b, 0x0010);
+	cit_model3_Packet1(gspca_dev, 0x005a, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x005b, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x0023, 0x0006);
+	cit_model3_Packet1(gspca_dev, 0x0026, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x0036, 0x0069);
+	cit_model3_Packet1(gspca_dev, 0x0038, 0x0064);
+	cit_model3_Packet1(gspca_dev, 0x003d, 0x0003);
+	cit_model3_Packet1(gspca_dev, 0x003e, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00b8, 0x0014);
+	cit_model3_Packet1(gspca_dev, 0x00b9, 0x0014);
+	cit_model3_Packet1(gspca_dev, 0x00e6, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x00e8, 0x0001);
+
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+		break; /* All is done in sd_start */
+	case CIT_IBM_NETCAM_PRO:
+		cit_init_ibm_netcam_pro(gspca_dev);
+		sd_stop0(gspca_dev);
+		break;
+	}
+	return 0;
+}
+
+static int cit_set_brightness(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+		/* Model 3: Brightness range 'i' in [0x0C..0x3F] */
+		cit_model3_Packet1(gspca_dev, 0x0036, sd->brightness);
+		break;
+	case CIT_IBM_NETCAM_PRO:
+		/* No (known) brightness control for ibm netcam pro */
+		break;
+	}
+
+	return 0;
+}
+
+static int cit_set_contrast(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+	{	/* Preset hardware values */
+		static const struct {
+			unsigned short cv1;
+			unsigned short cv2;
+			unsigned short cv3;
+		} cv[7] = {
+			{ 0x05, 0x05, 0x0f },	/* Minimum */
+			{ 0x04, 0x04, 0x16 },
+			{ 0x02, 0x03, 0x16 },
+			{ 0x02, 0x08, 0x16 },
+			{ 0x01, 0x0c, 0x16 },
+			{ 0x01, 0x0e, 0x16 },
+			{ 0x01, 0x10, 0x16 }	/* Maximum */
+		};
+		int i = sd->contrast / 3;
+		cit_model3_Packet1(gspca_dev, 0x0067, cv[i].cv1);
+		cit_model3_Packet1(gspca_dev, 0x005b, cv[i].cv2);
+		cit_model3_Packet1(gspca_dev, 0x005c, cv[i].cv3);
+		break;
+	}
+	case CIT_IBM_NETCAM_PRO:
+		cit_model3_Packet1(gspca_dev, 0x005b, sd->contrast + 1);
+		break;
+	}
+	return 0;
+}
+
+static int cit_set_hue(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+		/* according to the ibmcam driver this does not work 8/
+		/* cit_model3_Packet1(gspca_dev, 0x007e, sd->hue); */
+		break;
+	case CIT_IBM_NETCAM_PRO:
+		/* No hue control for ibm netcam pro */
+		break;
+	}
+	return 0;
+}
+
+static int cit_set_sharpness(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+	{	/*
+		 * "Use a table of magic numbers.
+		 *  This setting doesn't really change much.
+		 *  But that's how Windows does it."
+		 */
+		static const struct {
+			unsigned short sv1;
+			unsigned short sv2;
+			unsigned short sv3;
+			unsigned short sv4;
+		} sv[7] = {
+			{ 0x00, 0x00, 0x05, 0x14 },	/* Smoothest */
+			{ 0x01, 0x04, 0x05, 0x14 },
+			{ 0x02, 0x04, 0x05, 0x14 },
+			{ 0x03, 0x04, 0x05, 0x14 },
+			{ 0x03, 0x05, 0x05, 0x14 },
+			{ 0x03, 0x06, 0x05, 0x14 },
+			{ 0x03, 0x07, 0x05, 0x14 }	/* Sharpest */
+		};
+		cit_model3_Packet1(gspca_dev, 0x0060, sv[sd->sharpness].sv1);
+		cit_model3_Packet1(gspca_dev, 0x0061, sv[sd->sharpness].sv2);
+		cit_model3_Packet1(gspca_dev, 0x0062, sv[sd->sharpness].sv3);
+		cit_model3_Packet1(gspca_dev, 0x0063, sv[sd->sharpness].sv4);
+		break;
+	}
+	case CIT_IBM_NETCAM_PRO:
+		/* No sharpness setting on ibm netcamera pro */
+		break;
+	}
+	return 0;
+}
+
+static int cit_restart_stream(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+	case CIT_IBM_NETCAM_PRO:
+		cit_write_reg(gspca_dev, 0x0001, 0x0114);
+		cit_write_reg(gspca_dev, 0x00c0, 0x010c); /* Go! */
+		usb_clear_halt(gspca_dev->dev, gspca_dev->urb[0]->pipe);
+		cit_write_reg(gspca_dev, 0x0001, 0x0113);
+	}
+
+	sd->sof_read = 0;
+
+	return 0;
+}
+
+static int cit_start_model3(struct gspca_dev *gspca_dev)
+{
+	const unsigned short compression = 0; /* 0=none, 7=best frame rate */
+	int i, clock_div = 0;
+
+	/* HDG not in ibmcam driver, added to see if it helps with
+	   auto-detecting between model3 and ibm netcamera pro */
+	cit_read_reg(gspca_dev, 0x128);
+
+	cit_write_reg(gspca_dev, 0x0000, 0x0100);
+	cit_read_reg(gspca_dev, 0x0116);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	cit_write_reg(gspca_dev, 0x0002, 0x0112);
+	cit_write_reg(gspca_dev, 0x0000, 0x0123);
+	cit_write_reg(gspca_dev, 0x0001, 0x0117);
+	cit_write_reg(gspca_dev, 0x0040, 0x0108);
+	cit_write_reg(gspca_dev, 0x0019, 0x012c);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	cit_write_reg(gspca_dev, 0x0002, 0x0115);
+	cit_write_reg(gspca_dev, 0x0003, 0x0115);
+	cit_read_reg(gspca_dev, 0x0115);
+	cit_write_reg(gspca_dev, 0x000b, 0x0115);
+
+	/* HDG not in ibmcam driver, added to see if it helps with
+	   auto-detecting between model3 and ibm netcamera pro */
+	if (0) {
+		cit_write_reg(gspca_dev, 0x0078, 0x012d);
+		cit_write_reg(gspca_dev, 0x0001, 0x012f);
+		cit_write_reg(gspca_dev, 0xd141, 0x0124);
+		cit_write_reg(gspca_dev, 0x0079, 0x012d);
+		cit_write_reg(gspca_dev, 0x00ff, 0x0130);
+		cit_write_reg(gspca_dev, 0xcd41, 0x0124);
+		cit_write_reg(gspca_dev, 0xfffa, 0x0124);
+		cit_read_reg(gspca_dev, 0x0126);
+	}
+
+	cit_model3_Packet1(gspca_dev, 0x000a, 0x0040);
+	cit_model3_Packet1(gspca_dev, 0x000b, 0x00f6);
+	cit_model3_Packet1(gspca_dev, 0x000c, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x000d, 0x0020);
+	cit_model3_Packet1(gspca_dev, 0x000e, 0x0033);
+	cit_model3_Packet1(gspca_dev, 0x000f, 0x0007);
+	cit_model3_Packet1(gspca_dev, 0x0010, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0011, 0x0070);
+	cit_model3_Packet1(gspca_dev, 0x0012, 0x0030);
+	cit_model3_Packet1(gspca_dev, 0x0013, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0014, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0015, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0016, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0017, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0018, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x001e, 0x00c3);
+	cit_model3_Packet1(gspca_dev, 0x0020, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0028, 0x0010);
+	cit_model3_Packet1(gspca_dev, 0x0029, 0x0054);
+	cit_model3_Packet1(gspca_dev, 0x002a, 0x0013);
+	cit_model3_Packet1(gspca_dev, 0x002b, 0x0007);
+	cit_model3_Packet1(gspca_dev, 0x002d, 0x0028);
+	cit_model3_Packet1(gspca_dev, 0x002e, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0031, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0032, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0033, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0034, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0035, 0x0038);
+	cit_model3_Packet1(gspca_dev, 0x003a, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x003c, 0x001e);
+	cit_model3_Packet1(gspca_dev, 0x003f, 0x000a);
+	cit_model3_Packet1(gspca_dev, 0x0041, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0046, 0x003f);
+	cit_model3_Packet1(gspca_dev, 0x0047, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0050, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x0052, 0x001a);
+	cit_model3_Packet1(gspca_dev, 0x0053, 0x0003);
+	cit_model3_Packet1(gspca_dev, 0x005a, 0x006b);
+	cit_model3_Packet1(gspca_dev, 0x005d, 0x001e);
+	cit_model3_Packet1(gspca_dev, 0x005e, 0x0030);
+	cit_model3_Packet1(gspca_dev, 0x005f, 0x0041);
+	cit_model3_Packet1(gspca_dev, 0x0064, 0x0008);
+	cit_model3_Packet1(gspca_dev, 0x0065, 0x0015);
+	cit_model3_Packet1(gspca_dev, 0x0068, 0x000f);
+	cit_model3_Packet1(gspca_dev, 0x0079, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x007a, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x007c, 0x003f);
+	cit_model3_Packet1(gspca_dev, 0x0082, 0x000f);
+	cit_model3_Packet1(gspca_dev, 0x0085, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0099, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x009b, 0x0023);
+	cit_model3_Packet1(gspca_dev, 0x009c, 0x0022);
+	cit_model3_Packet1(gspca_dev, 0x009d, 0x0096);
+	cit_model3_Packet1(gspca_dev, 0x009e, 0x0096);
+	cit_model3_Packet1(gspca_dev, 0x009f, 0x000a);
+
+	switch (gspca_dev->width) {
+	case 160:
+		cit_write_reg(gspca_dev, 0x0000, 0x0101); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x00a0, 0x0103); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x0078, 0x0105); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x0000, 0x010a); /* Same */
+		cit_write_reg(gspca_dev, 0x0024, 0x010b); /* Differs everywhere */
+		cit_write_reg(gspca_dev, 0x00a9, 0x0119);
+		cit_write_reg(gspca_dev, 0x0016, 0x011b);
+		cit_write_reg(gspca_dev, 0x0002, 0x011d); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x0003, 0x011e); /* Same on 160x120, 640x480 */
+		cit_write_reg(gspca_dev, 0x0000, 0x0129); /* Same */
+		cit_write_reg(gspca_dev, 0x00fc, 0x012b); /* Same */
+		cit_write_reg(gspca_dev, 0x0018, 0x0102);
+		cit_write_reg(gspca_dev, 0x0004, 0x0104);
+		cit_write_reg(gspca_dev, 0x0004, 0x011a);
+		cit_write_reg(gspca_dev, 0x0028, 0x011c);
+		cit_write_reg(gspca_dev, 0x0022, 0x012a); /* Same */
+		cit_write_reg(gspca_dev, 0x0000, 0x0118);
+		cit_write_reg(gspca_dev, 0x0000, 0x0132);
+		cit_model3_Packet1(gspca_dev, 0x0021, 0x0001); /* Same */
+		cit_write_reg(gspca_dev, compression, 0x0109);
+		clock_div = 3;
+		break;
+	case 320:
+		cit_write_reg(gspca_dev, 0x0000, 0x0101); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x00a0, 0x0103); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x0078, 0x0105); /* Same on 160x120, 320x240 */
+		cit_write_reg(gspca_dev, 0x0000, 0x010a); /* Same */
+		cit_write_reg(gspca_dev, 0x0028, 0x010b); /* Differs everywhere */
+		cit_write_reg(gspca_dev, 0x0002, 0x011d); /* Same */
+		cit_write_reg(gspca_dev, 0x0000, 0x011e);
+		cit_write_reg(gspca_dev, 0x0000, 0x0129); /* Same */
+		cit_write_reg(gspca_dev, 0x00fc, 0x012b); /* Same */
+		/* 4 commands from 160x120 skipped */
+		cit_write_reg(gspca_dev, 0x0022, 0x012a); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x0021, 0x0001); /* Same */
+		cit_write_reg(gspca_dev, compression, 0x0109);
+		cit_write_reg(gspca_dev, 0x00d9, 0x0119);
+		cit_write_reg(gspca_dev, 0x0006, 0x011b);
+		cit_write_reg(gspca_dev, 0x0021, 0x0102); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x0010, 0x0104);
+		cit_write_reg(gspca_dev, 0x0004, 0x011a);
+		cit_write_reg(gspca_dev, 0x003f, 0x011c);
+		cit_write_reg(gspca_dev, 0x001c, 0x0118);
+		cit_write_reg(gspca_dev, 0x0000, 0x0132);
+		clock_div = 5;
+		break;
+	case 640:
+		cit_write_reg(gspca_dev, 0x00f0, 0x0105);
+		cit_write_reg(gspca_dev, 0x0000, 0x010a); /* Same */
+		cit_write_reg(gspca_dev, 0x0038, 0x010b); /* Differs everywhere */
+		cit_write_reg(gspca_dev, 0x00d9, 0x0119); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x0006, 0x011b); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x0004, 0x011d); /* NC */
+		cit_write_reg(gspca_dev, 0x0003, 0x011e); /* Same on 160x120, 640x480 */
+		cit_write_reg(gspca_dev, 0x0000, 0x0129); /* Same */
+		cit_write_reg(gspca_dev, 0x00fc, 0x012b); /* Same */
+		cit_write_reg(gspca_dev, 0x0021, 0x0102); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x0016, 0x0104); /* NC */
+		cit_write_reg(gspca_dev, 0x0004, 0x011a); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x003f, 0x011c); /* Same on 320x240, 640x480 */
+		cit_write_reg(gspca_dev, 0x0022, 0x012a); /* Same */
+		cit_write_reg(gspca_dev, 0x001c, 0x0118); /* Same on 320x240, 640x480 */
+		cit_model3_Packet1(gspca_dev, 0x0021, 0x0001); /* Same */
+		cit_write_reg(gspca_dev, compression, 0x0109);
+		cit_write_reg(gspca_dev, 0x0040, 0x0101);
+		cit_write_reg(gspca_dev, 0x0040, 0x0103);
+		cit_write_reg(gspca_dev, 0x0000, 0x0132); /* Same on 320x240, 640x480 */
+		clock_div = 7;
+		break;
+	}
+
+	cit_model3_Packet1(gspca_dev, 0x007e, 0x000e);	/* Hue */
+	cit_model3_Packet1(gspca_dev, 0x0036, 0x0011);	/* Brightness */
+	cit_model3_Packet1(gspca_dev, 0x0060, 0x0002);	/* Sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0061, 0x0004);	/* Sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0062, 0x0005);	/* Sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0063, 0x0014);	/* Sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0096, 0x00a0);	/* Red sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0097, 0x0096);	/* Blue sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0067, 0x0001);	/* Contrast */
+	cit_model3_Packet1(gspca_dev, 0x005b, 0x000c);	/* Contrast */
+	cit_model3_Packet1(gspca_dev, 0x005c, 0x0016);	/* Contrast */
+	cit_model3_Packet1(gspca_dev, 0x0098, 0x000b);
+	cit_model3_Packet1(gspca_dev, 0x002c, 0x0003);	/* Was 1, broke 640x480 */
+	cit_model3_Packet1(gspca_dev, 0x002f, 0x002a);
+	cit_model3_Packet1(gspca_dev, 0x0030, 0x0029);
+	cit_model3_Packet1(gspca_dev, 0x0037, 0x0002);
+	cit_model3_Packet1(gspca_dev, 0x0038, 0x0059);
+	cit_model3_Packet1(gspca_dev, 0x003d, 0x002e);
+	cit_model3_Packet1(gspca_dev, 0x003e, 0x0028);
+	cit_model3_Packet1(gspca_dev, 0x0078, 0x0005);
+	cit_model3_Packet1(gspca_dev, 0x007b, 0x0011);
+	cit_model3_Packet1(gspca_dev, 0x007d, 0x004b);
+	cit_model3_Packet1(gspca_dev, 0x007f, 0x0022);
+	cit_model3_Packet1(gspca_dev, 0x0080, 0x000c);
+	cit_model3_Packet1(gspca_dev, 0x0081, 0x000b);
+	cit_model3_Packet1(gspca_dev, 0x0083, 0x00fd);
+	cit_model3_Packet1(gspca_dev, 0x0086, 0x000b);
+	cit_model3_Packet1(gspca_dev, 0x0087, 0x000b);
+	cit_model3_Packet1(gspca_dev, 0x007e, 0x000e);
+	cit_model3_Packet1(gspca_dev, 0x0096, 0x00a0);	/* Red sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0097, 0x0096);	/* Blue sharpness */
+	cit_model3_Packet1(gspca_dev, 0x0098, 0x000b);
+
+	cit_write_reg(gspca_dev, clock_div, 0x0111); /* Clock Divider */
+
+	switch (gspca_dev->width) {
+	case 160:
+		cit_model3_Packet1(gspca_dev, 0x001f, 0x0000); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x0039, 0x001f); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x003b, 0x003c); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x0040, 0x000a);
+		cit_model3_Packet1(gspca_dev, 0x0051, 0x000a);
+		break;
+	case 320:
+		cit_model3_Packet1(gspca_dev, 0x001f, 0x0000); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x0039, 0x001f); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x003b, 0x003c); /* Same */
+		cit_model3_Packet1(gspca_dev, 0x0040, 0x0008);
+		cit_model3_Packet1(gspca_dev, 0x0051, 0x000b);
+		break;
+	case 640:
+		cit_model3_Packet1(gspca_dev, 0x001f, 0x0002);	/* !Same */
+		cit_model3_Packet1(gspca_dev, 0x0039, 0x003e);	/* !Same */
+		cit_model3_Packet1(gspca_dev, 0x0040, 0x0008);
+		cit_model3_Packet1(gspca_dev, 0x0051, 0x000a);
+		break;
+	}
+
+/*	if (sd->input_index) { */
+	if (rca_input) {
+		for (i = 0; i < ARRAY_SIZE(rca_initdata); i++) {
+			if (rca_initdata[i][0])
+				cit_read_reg(gspca_dev, rca_initdata[i][2]);
+			else
+				cit_write_reg(gspca_dev, rca_initdata[i][1],
+					      rca_initdata[i][2]);
+		}
+	}
+
+	return 0;
+}
+
+static int cit_start_ibm_netcam_pro(struct gspca_dev *gspca_dev)
+{
+	const unsigned short compression = 0; /* 0=none, 7=best frame rate */
+	int i, clock_div = 0;
+
+	cit_write_reg(gspca_dev, 0x0003, 0x0133);
+	cit_write_reg(gspca_dev, 0x0000, 0x0117);
+	cit_write_reg(gspca_dev, 0x0008, 0x0123);
+	cit_write_reg(gspca_dev, 0x0000, 0x0100);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	/* cit_write_reg(gspca_dev, 0x0002, 0x0112); see sd_stop0 */
+	cit_write_reg(gspca_dev, 0x0000, 0x0133);
+	cit_write_reg(gspca_dev, 0x0000, 0x0123);
+	cit_write_reg(gspca_dev, 0x0001, 0x0117);
+	cit_write_reg(gspca_dev, 0x0040, 0x0108);
+	cit_write_reg(gspca_dev, 0x0019, 0x012c);
+	cit_write_reg(gspca_dev, 0x0060, 0x0116);
+	/* cit_write_reg(gspca_dev, 0x000b, 0x0115); see sd_stop0 */
+
+	cit_model3_Packet1(gspca_dev, 0x0049, 0x0000);
+
+	cit_write_reg(gspca_dev, 0x0000, 0x0101); /* Same on 160x120, 320x240 */
+	cit_write_reg(gspca_dev, 0x003a, 0x0102); /* Hstart */
+	cit_write_reg(gspca_dev, 0x00a0, 0x0103); /* Same on 160x120, 320x240 */
+	cit_write_reg(gspca_dev, 0x0078, 0x0105); /* Same on 160x120, 320x240 */
+	cit_write_reg(gspca_dev, 0x0000, 0x010a); /* Same */
+	cit_write_reg(gspca_dev, 0x0002, 0x011d); /* Same on 160x120, 320x240 */
+	cit_write_reg(gspca_dev, 0x0000, 0x0129); /* Same */
+	cit_write_reg(gspca_dev, 0x00fc, 0x012b); /* Same */
+	cit_write_reg(gspca_dev, 0x0022, 0x012a); /* Same */
+
+	switch (gspca_dev->width) {
+	case 160:
+		cit_write_reg(gspca_dev, 0x0024, 0x010b);
+		cit_write_reg(gspca_dev, 0x0089, 0x0119);
+		cit_write_reg(gspca_dev, 0x000a, 0x011b);
+		cit_write_reg(gspca_dev, 0x0003, 0x011e);
+		cit_write_reg(gspca_dev, 0x0007, 0x0104);
+		cit_write_reg(gspca_dev, 0x0009, 0x011a);
+		cit_write_reg(gspca_dev, 0x008b, 0x011c);
+		cit_write_reg(gspca_dev, 0x0008, 0x0118);
+		cit_write_reg(gspca_dev, 0x0000, 0x0132);
+		clock_div = 3;
+		break;
+	case 320:
+		cit_write_reg(gspca_dev, 0x0028, 0x010b);
+		cit_write_reg(gspca_dev, 0x00d9, 0x0119);
+		cit_write_reg(gspca_dev, 0x0006, 0x011b);
+		cit_write_reg(gspca_dev, 0x0000, 0x011e);
+		cit_write_reg(gspca_dev, 0x000e, 0x0104);
+		cit_write_reg(gspca_dev, 0x0004, 0x011a);
+		cit_write_reg(gspca_dev, 0x003f, 0x011c);
+		cit_write_reg(gspca_dev, 0x000c, 0x0118);
+		cit_write_reg(gspca_dev, 0x0000, 0x0132);
+		clock_div = 5;
+		break;
+	}
+
+	cit_model3_Packet1(gspca_dev, 0x0019, 0x0031);
+	cit_model3_Packet1(gspca_dev, 0x001a, 0x0003);
+	cit_model3_Packet1(gspca_dev, 0x001b, 0x0038);
+	cit_model3_Packet1(gspca_dev, 0x001c, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0024, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0027, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x002a, 0x0004);
+	cit_model3_Packet1(gspca_dev, 0x0035, 0x000b);
+	cit_model3_Packet1(gspca_dev, 0x003f, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x0044, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x0054, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00c4, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00e7, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00e9, 0x0001);
+	cit_model3_Packet1(gspca_dev, 0x00ee, 0x0000);
+	cit_model3_Packet1(gspca_dev, 0x00f3, 0x00c0);
+
+	cit_write_reg(gspca_dev, compression, 0x0109);
+	cit_write_reg(gspca_dev, clock_div, 0x0111);
+
+/*	if (sd->input_index) { */
+	if (rca_input) {
+		for (i = 0; i < ARRAY_SIZE(rca_initdata); i++) {
+			if (rca_initdata[i][0])
+				cit_read_reg(gspca_dev, rca_initdata[i][2]);
+			else
+				cit_write_reg(gspca_dev, rca_initdata[i][1],
+					      rca_initdata[i][2]);
+		}
+	}
+
+	return 0;
+}
+
+/* -- start the camera -- */
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct usb_host_interface *alt;
+	struct usb_interface *intf;
+	int packet_size;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+		cit_start_model3(gspca_dev);
+		break;
+	case CIT_IBM_NETCAM_PRO:
+		cit_start_ibm_netcam_pro(gspca_dev);
+		break;
+	}
+
+	cit_set_brightness(gspca_dev);
+	cit_set_contrast(gspca_dev);
+	cit_set_hue(gspca_dev);
+	cit_set_sharpness(gspca_dev);
+
+	/* Program max isoc packet size, one day we should use this to
+	   allow us to work together with other isoc devices on the same
+	   root hub. */
+	intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface);
+	alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt);
+	if (!alt) {
+		PDEBUG(D_ERR, "Couldn't get altsetting");
+		return -EIO;
+	}
+
+	packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize);
+	cit_write_reg(gspca_dev, packet_size >> 8, 0x0106);
+	cit_write_reg(gspca_dev, packet_size & 0xff, 0x0107);
+
+	cit_restart_stream(gspca_dev);
+
+	return 0;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+	case CIT_IBM_NETCAM_PRO:
+		cit_write_reg(gspca_dev, 0x0000, 0x010c);
+		break;
+	}
+}
+
+static void sd_stop0(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	/* We cannot use gspca_dev->present here as that is not set when
+	   sd_init gets called and we get called from sd_init */
+	if (!gspca_dev->dev)
+		return;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+		cit_write_reg(gspca_dev, 0x0006, 0x012c);
+		cit_model3_Packet1(gspca_dev, 0x0046, 0x0000);
+		cit_read_reg(gspca_dev, 0x0116);
+		cit_write_reg(gspca_dev, 0x0064, 0x0116);
+		cit_read_reg(gspca_dev, 0x0115);
+		cit_write_reg(gspca_dev, 0x0003, 0x0115);
+		cit_write_reg(gspca_dev, 0x0008, 0x0123);
+		cit_write_reg(gspca_dev, 0x0000, 0x0117);
+		cit_write_reg(gspca_dev, 0x0000, 0x0112);
+		cit_write_reg(gspca_dev, 0x0080, 0x0100);
+		break;
+	case CIT_IBM_NETCAM_PRO:
+		cit_model3_Packet1(gspca_dev, 0x0049, 0x00ff);
+		cit_write_reg(gspca_dev, 0x0006, 0x012c);
+		cit_write_reg(gspca_dev, 0x0000, 0x0116);
+		/* HDG windows does this, but I cannot get the camera
+		   to restart with this without redoing the entire init
+		   sequence which makes switching modes really slow */
+		/* cit_write_reg(gspca_dev, 0x0006, 0x0115); */
+		cit_write_reg(gspca_dev, 0x0008, 0x0123);
+		cit_write_reg(gspca_dev, 0x0000, 0x0117);
+		cit_write_reg(gspca_dev, 0x0003, 0x0133);
+		cit_write_reg(gspca_dev, 0x0000, 0x0111);
+		/* HDG windows does this, but I get a green picture when
+		   restarting the stream after this */
+		/* cit_write_reg(gspca_dev, 0x0000, 0x0112); */
+		cit_write_reg(gspca_dev, 0x00c0, 0x0100);
+		break;
+	}
+}
+
+static u8 *cit_find_sof(struct gspca_dev *gspca_dev, u8 *data, int len)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+
+	switch (sd->model) {
+	case CIT_MODEL3:
+	case CIT_IBM_NETCAM_PRO:
+		for (i = 0; i < len; i++) {
+			switch (sd->sof_read) {
+			case 0:
+				if (data[i] == 0x00)
+					sd->sof_read++;
+				break;
+			case 1:
+				if (data[i] == 0xff)
+					sd->sof_read++;
+				else
+					sd->sof_read = 0;
+				break;
+			case 2:
+				sd->sof_read = 0;
+				if (data[i] != 0xff) {
+					if (i >= 4)
+						PDEBUG(D_FRAM,
+						       "header found at offset: %d: %02x %02x 00 ff %02x %02x\n",
+						       i - 2,
+						       data[i - 4],
+						       data[i - 3],
+						       data[i],
+						       data[i + 1]);
+					return data + i + 2;
+				}
+				break;
+			}
+		}
+		break;
+	}
+	return NULL;
+}
+
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			u8 *data, int len)
+{
+	unsigned char *sof;
+
+	sof = cit_find_sof(gspca_dev, data, len);
+	if (sof) {
+		int n;
+
+		/* finish decoding current frame */
+		n = sof - data;
+		if (n > 4)
+			n -= 4;
+		else
+			n = 0;
+		gspca_frame_add(gspca_dev, LAST_PACKET,
+				data, n);
+		gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0);
+		len -= sof - data;
+		data = sof;
+	}
+
+	gspca_frame_add(gspca_dev, INTER_PACKET, data, len);
+}
+
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->brightness = val;
+	if (gspca_dev->streaming) {
+		sd_stopN(gspca_dev);
+		cit_set_brightness(gspca_dev);
+		cit_restart_stream(gspca_dev);
+	}
+
+	return 0;
+}
+
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->brightness;
+
+	return 0;
+}
+
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->contrast = val;
+	if (gspca_dev->streaming) {
+		sd_stopN(gspca_dev);
+		cit_set_contrast(gspca_dev);
+		cit_restart_stream(gspca_dev);
+	}
+
+	return 0;
+}
+
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->contrast;
+
+	return 0;
+}
+
+static int sd_sethue(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->hue = val;
+	if (gspca_dev->streaming) {
+		sd_stopN(gspca_dev);
+		cit_set_hue(gspca_dev);
+		cit_restart_stream(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_gethue(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->hue;
+
+	return 0;
+}
+
+static int sd_setsharpness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->sharpness = val;
+	if (gspca_dev->streaming) {
+		sd_stopN(gspca_dev);
+		cit_set_sharpness(gspca_dev);
+		cit_restart_stream(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_getsharpness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->sharpness;
+
+	return 0;
+}
+
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+	.stop0 = sd_stop0,
+	.pkt_scan = sd_pkt_scan,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{ USB_DEVICE_VER(0x0545, 0x8080, 0x0002, 0x0002), .driver_info = CIT_MODEL1 },
+	{ USB_DEVICE_VER(0x0545, 0x8080, 0x030a, 0x030a), .driver_info = CIT_MODEL2 },
+	{ USB_DEVICE_VER(0x0545, 0x8080, 0x0301, 0x0301), .driver_info = CIT_MODEL3 },
+	{ USB_DEVICE_VER(0x0545, 0x8002, 0x030a, 0x030a), .driver_info = CIT_MODEL4 },
+	{ USB_DEVICE_VER(0x0545, 0x800c, 0x030a, 0x030a), .driver_info = CIT_MODEL2 },
+	{ USB_DEVICE_VER(0x0545, 0x800d, 0x030a, 0x030a), .driver_info = CIT_MODEL4 },
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
+{
+	return gspca_dev_probe2(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	PDEBUG(D_PROBE, "registered");
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	PDEBUG(D_PROBE, "deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 61490c6dcdbd..0aef67e0281a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -363,6 +363,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 #define V4L2_PIX_FMT_STV0680  v4l2_fourcc('S', '6', '8', '0') /* stv0680 bayer */
 #define V4L2_PIX_FMT_TM6000   v4l2_fourcc('T', 'M', '6', '0') /* tm5600/tm60x0 */
+#define V4L2_PIX_FMT_CIT_YYVYUY v4l2_fourcc('C', 'I', 'T', 'V') /* one line of Y then 1 line of VYUY */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3


From b517af722860dcf9878754217575137be35ea0cc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 5 Sep 2010 16:30:30 -0300
Subject: V4L/DVB: gspca_konica: New gspca subdriver for konica chipset using
 cams

This new driver replaces the (known to not work / crash) usbvideo konicawc
driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/Kconfig    |   9 +
 drivers/media/video/gspca/Makefile   |   2 +
 drivers/media/video/gspca/konica.c   | 653 +++++++++++++++++++++++++++++++++++
 drivers/media/video/usbvideo/Kconfig |   5 +-
 include/linux/videodev2.h            |   1 +
 5 files changed, 669 insertions(+), 1 deletion(-)
 create mode 100644 drivers/media/video/gspca/konica.c

(limited to 'include')

diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index cab7be7eb638..dda56ff834f4 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -77,6 +77,15 @@ config USB_GSPCA_JEILINJ
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_jeilinj.
 
+config USB_GSPCA_KONICA
+	tristate "Konica USB Camera V4L2 driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for cameras based on the Konica chip.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_konica.
+
 config USB_GSPCA_MARS
 	tristate "Mars USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index ea89ac1bd547..24e695b8b077 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_USB_GSPCA_CPIA1)    += gspca_cpia1.o
 obj-$(CONFIG_USB_GSPCA_ETOMS)    += gspca_etoms.o
 obj-$(CONFIG_USB_GSPCA_FINEPIX)  += gspca_finepix.o
 obj-$(CONFIG_USB_GSPCA_JEILINJ)  += gspca_jeilinj.o
+obj-$(CONFIG_USB_GSPCA_KONICA)   += gspca_konica.o
 obj-$(CONFIG_USB_GSPCA_MARS)     += gspca_mars.o
 obj-$(CONFIG_USB_GSPCA_MR97310A) += gspca_mr97310a.o
 obj-$(CONFIG_USB_GSPCA_OV519)    += gspca_ov519.o
@@ -43,6 +44,7 @@ gspca_cpia1-objs    := cpia1.o
 gspca_etoms-objs    := etoms.o
 gspca_finepix-objs  := finepix.o
 gspca_jeilinj-objs  := jeilinj.o
+gspca_konica-objs   := konica.o
 gspca_mars-objs     := mars.o
 gspca_mr97310a-objs := mr97310a.o
 gspca_ov519-objs    := ov519.o
diff --git a/drivers/media/video/gspca/konica.c b/drivers/media/video/gspca/konica.c
new file mode 100644
index 000000000000..8cdf1af4257e
--- /dev/null
+++ b/drivers/media/video/gspca/konica.c
@@ -0,0 +1,653 @@
+/*
+ * Driver for USB webcams based on Konica chipset. This
+ * chipset is used in Intel YC76 camera.
+ *
+ * Copyright (C) 2010 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on the usbvideo v4l1 konicawc driver which is:
+ *
+ * Copyright (C) 2002 Simon Evans <spse@secret.org.uk>
+ *
+ * The code for making gspca work with a webcam with 2 isoc endpoints was
+ * taken from the benq gspca subdriver which is:
+ *
+ * Copyright (C) 2009 Jean-Francois Moine (http://moinejf.free.fr)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define MODULE_NAME "konica"
+
+#include <linux/input.h>
+#include "gspca.h"
+
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_DESCRIPTION("Konica chipset USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+#define WHITEBAL_REG   0x01
+#define BRIGHTNESS_REG 0x02
+#define SHARPNESS_REG  0x03
+#define CONTRAST_REG   0x04
+#define SATURATION_REG 0x05
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;	/* !! must be the first item */
+	struct urb *last_data_urb;
+	u8 snapshot_pressed;
+	u8 brightness;
+	u8 contrast;
+	u8 saturation;
+	u8 whitebal;
+	u8 sharpness;
+};
+
+/* V4L2 controls supported by the driver */
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setsaturation(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getsaturation(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setwhitebal(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getwhitebal(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setsharpness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getsharpness(struct gspca_dev *gspca_dev, __s32 *val);
+
+static const struct ctrl sd_ctrls[] = {
+#define SD_BRIGHTNESS 0
+	{
+	    {
+		.id      = V4L2_CID_BRIGHTNESS,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Brightness",
+		.minimum = 0,
+		.maximum = 9,
+		.step = 1,
+#define BRIGHTNESS_DEFAULT 4
+		.default_value = BRIGHTNESS_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setbrightness,
+	    .get = sd_getbrightness,
+	},
+#define SD_CONTRAST 1
+	{
+	    {
+		.id = V4L2_CID_CONTRAST,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "contrast",
+		.minimum = 0,
+		.maximum = 9,
+		.step = 4,
+#define CONTRAST_DEFAULT 10
+		.default_value = CONTRAST_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setcontrast,
+	    .get = sd_getcontrast,
+	},
+#define SD_SATURATION 2
+	{
+	    {
+		.id	= V4L2_CID_SATURATION,
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.name	= "Saturation",
+		.minimum = 0,
+		.maximum = 9,
+		.step	= 1,
+#define SATURATION_DEFAULT 4
+		.default_value = SATURATION_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setsaturation,
+	    .get = sd_getsaturation,
+	},
+#define SD_WHITEBAL 3
+	{
+	    {
+		.id = V4L2_CID_WHITE_BALANCE_TEMPERATURE,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Whitebalance ",
+		.minimum = 0,
+		.maximum = 33,
+		.step = 1,
+#define WHITEBAL_DEFAULT 25
+		.default_value = WHITEBAL_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setwhitebal,
+	    .get = sd_getwhitebal,
+	},
+#define SD_SHARPNESS 4
+	{
+	    {
+		.id = V4L2_CID_SHARPNESS,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Sharpness",
+		.minimum = 0,
+		.maximum = 9,
+		.step = 1,
+#define SHARPNESS_DEFAULT 4
+		.default_value = SHARPNESS_DEFAULT,
+		.flags = 0,
+	    },
+	    .set = sd_setsharpness,
+	    .get = sd_getsharpness,
+	},
+};
+
+/* .priv is what goes to register 8 for this mode, known working values:
+   0x00 -> 176x144, cropped
+   0x01 -> 176x144, cropped
+   0x02 -> 176x144, cropped
+   0x03 -> 176x144, cropped
+   0x04 -> 176x144, binned
+   0x05 -> 320x240
+   0x06 -> 320x240
+   0x07 -> 160x120, cropped
+   0x08 -> 160x120, cropped
+   0x09 -> 160x120, binned (note has 136 lines)
+   0x0a -> 160x120, binned (note has 136 lines)
+   0x0b -> 160x120, cropped
+*/
+static const struct v4l2_pix_format vga_mode[] = {
+	{160, 120, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE,
+		.bytesperline = 160,
+		.sizeimage = 160 * 136 * 3 / 2 + 960,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0x0a},
+	{176, 144, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE,
+		.bytesperline = 176,
+		.sizeimage = 176 * 144 * 3 / 2 + 960,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0x04},
+	{320, 240, V4L2_PIX_FMT_KONICA420, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 * 3 / 2 + 960,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0x05},
+};
+
+static void sd_isoc_irq(struct urb *urb);
+
+static void reg_w(struct gspca_dev *gspca_dev, u16 value, u16 index)
+{
+	struct usb_device *dev = gspca_dev->dev;
+	int ret;
+
+	if (gspca_dev->usb_err < 0)
+		return;
+	ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+			0x02,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value,
+			index,
+			NULL,
+			0,
+			1000);
+	if (ret < 0) {
+		PDEBUG(D_ERR, "reg_w err %d", ret);
+		gspca_dev->usb_err = ret;
+	}
+}
+
+static void reg_r(struct gspca_dev *gspca_dev, u16 value, u16 index)
+{
+	struct usb_device *dev = gspca_dev->dev;
+	int ret;
+
+	if (gspca_dev->usb_err < 0)
+		return;
+	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+			0x03,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value,
+			index,
+			gspca_dev->usb_buf,
+			2,
+			1000);
+	if (ret < 0) {
+		PDEBUG(D_ERR, "reg_w err %d", ret);
+		gspca_dev->usb_err = ret;
+	}
+}
+
+static void konica_stream_on(struct gspca_dev *gspca_dev)
+{
+	reg_w(gspca_dev, 1, 0x0b);
+}
+
+static void konica_stream_off(struct gspca_dev *gspca_dev)
+{
+	reg_w(gspca_dev, 0, 0x0b);
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	gspca_dev->cam.cam_mode = vga_mode;
+	gspca_dev->cam.nmodes = ARRAY_SIZE(vga_mode);
+	gspca_dev->cam.no_urb_create = 1;
+	/* The highest alt setting has an isoc packetsize of 0, so we
+	   don't want to use it */
+	gspca_dev->nbalt--;
+
+	sd->brightness  = BRIGHTNESS_DEFAULT;
+	sd->contrast    = CONTRAST_DEFAULT;
+	sd->saturation  = SATURATION_DEFAULT;
+	sd->whitebal    = WHITEBAL_DEFAULT;
+	sd->sharpness   = SHARPNESS_DEFAULT;
+
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	/* HDG not sure if these 2 reads are needed */
+	reg_r(gspca_dev, 0, 0x10);
+	PDEBUG(D_PROBE, "Reg 0x10 reads: %02x %02x",
+	       gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]);
+	reg_r(gspca_dev, 0, 0x10);
+	PDEBUG(D_PROBE, "Reg 0x10 reads: %02x %02x",
+	       gspca_dev->usb_buf[0], gspca_dev->usb_buf[1]);
+	reg_w(gspca_dev, 0, 0x0d);
+
+	return 0;
+}
+
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct urb *urb;
+	int i, n, packet_size;
+	struct usb_host_interface *alt;
+	struct usb_interface *intf;
+
+	intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface);
+	alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt);
+	if (!alt) {
+		PDEBUG(D_ERR, "Couldn't get altsetting");
+		return -EIO;
+	}
+
+	packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize);
+
+	reg_w(gspca_dev, sd->brightness, BRIGHTNESS_REG);
+	reg_w(gspca_dev, sd->whitebal, WHITEBAL_REG);
+	reg_w(gspca_dev, sd->contrast, CONTRAST_REG);
+	reg_w(gspca_dev, sd->saturation, SATURATION_REG);
+	reg_w(gspca_dev, sd->sharpness, SHARPNESS_REG);
+
+	n = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv;
+	reg_w(gspca_dev, n, 0x08);
+
+	konica_stream_on(gspca_dev);
+
+	if (gspca_dev->usb_err)
+		return gspca_dev->usb_err;
+
+	/* create 4 URBs - 2 on endpoint 0x83 and 2 on 0x082 */
+#if MAX_NURBS < 4
+#error "Not enough URBs in the gspca table"
+#endif
+#define SD_NPKT 32
+	for (n = 0; n < 4; n++) {
+		i = n & 1 ? 0 : 1;
+		packet_size =
+			le16_to_cpu(alt->endpoint[i].desc.wMaxPacketSize);
+		urb = usb_alloc_urb(SD_NPKT, GFP_KERNEL);
+		if (!urb) {
+			err("usb_alloc_urb failed");
+			return -ENOMEM;
+		}
+		gspca_dev->urb[n] = urb;
+		urb->transfer_buffer = usb_buffer_alloc(gspca_dev->dev,
+						packet_size * SD_NPKT,
+						GFP_KERNEL,
+						&urb->transfer_dma);
+		if (urb->transfer_buffer == NULL) {
+			err("usb_buffer_alloc failed");
+			return -ENOMEM;
+		}
+
+		urb->dev = gspca_dev->dev;
+		urb->context = gspca_dev;
+		urb->transfer_buffer_length = packet_size * SD_NPKT;
+		urb->pipe = usb_rcvisocpipe(gspca_dev->dev,
+					n & 1 ? 0x81 : 0x82);
+		urb->transfer_flags = URB_ISO_ASAP
+					| URB_NO_TRANSFER_DMA_MAP;
+		urb->interval = 1;
+		urb->complete = sd_isoc_irq;
+		urb->number_of_packets = SD_NPKT;
+		for (i = 0; i < SD_NPKT; i++) {
+			urb->iso_frame_desc[i].length = packet_size;
+			urb->iso_frame_desc[i].offset = packet_size * i;
+		}
+	}
+
+	return 0;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	konica_stream_off(gspca_dev);
+#ifdef CONFIG_INPUT
+	/* Don't keep the button in the pressed state "forever" if it was
+	   pressed when streaming is stopped */
+	if (sd->snapshot_pressed) {
+		input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0);
+		input_sync(gspca_dev->input_dev);
+		sd->snapshot_pressed = 0;
+	}
+#endif
+}
+
+/* reception of an URB */
+static void sd_isoc_irq(struct urb *urb)
+{
+	struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context;
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct urb *data_urb, *status_urb;
+	u8 *data;
+	int i, st;
+
+	PDEBUG(D_PACK, "sd isoc irq");
+	if (!gspca_dev->streaming)
+		return;
+
+	if (urb->status != 0) {
+		if (urb->status == -ESHUTDOWN)
+			return;		/* disconnection */
+#ifdef CONFIG_PM
+		if (gspca_dev->frozen)
+			return;
+#endif
+		PDEBUG(D_ERR, "urb status: %d", urb->status);
+		st = usb_submit_urb(urb, GFP_ATOMIC);
+		if (st < 0)
+			PDEBUG(D_ERR|D_PACK, "resubmit urb error %d", st);
+		return;
+	}
+
+	/* if this is a data URB (ep 0x82), wait */
+	if (urb->transfer_buffer_length > 32) {
+		sd->last_data_urb = urb;
+		return;
+	}
+
+	status_urb = urb;
+	data_urb   = sd->last_data_urb;
+	sd->last_data_urb = NULL;
+
+	if (!data_urb || data_urb->start_frame != status_urb->start_frame) {
+		PDEBUG(D_ERR|D_PACK, "lost sync on frames");
+		goto resubmit;
+	}
+
+	if (data_urb->number_of_packets != status_urb->number_of_packets) {
+		PDEBUG(D_ERR|D_PACK,
+		       "no packets does not match, data: %d, status: %d",
+		       data_urb->number_of_packets,
+		       status_urb->number_of_packets);
+		goto resubmit;
+	}
+
+	for (i = 0; i < status_urb->number_of_packets; i++) {
+		if (data_urb->iso_frame_desc[i].status ||
+		    status_urb->iso_frame_desc[i].status) {
+			PDEBUG(D_ERR|D_PACK,
+			       "pkt %d data-status %d, status-status %d", i,
+			       data_urb->iso_frame_desc[i].status,
+			       status_urb->iso_frame_desc[i].status);
+			gspca_dev->last_packet_type = DISCARD_PACKET;
+			continue;
+		}
+
+		if (status_urb->iso_frame_desc[i].actual_length != 1) {
+			PDEBUG(D_ERR|D_PACK,
+			       "bad status packet length %d",
+			       status_urb->iso_frame_desc[i].actual_length);
+			gspca_dev->last_packet_type = DISCARD_PACKET;
+			continue;
+		}
+
+		st = *((u8 *)status_urb->transfer_buffer
+				+ status_urb->iso_frame_desc[i].offset);
+
+		data = (u8 *)data_urb->transfer_buffer
+				+ data_urb->iso_frame_desc[i].offset;
+
+		/* st: 0x80-0xff: frame start with frame number (ie 0-7f)
+		 * otherwise:
+		 * bit 0 0: keep packet
+		 *	 1: drop packet (padding data)
+		 *
+		 * bit 4 0 button not clicked
+		 *       1 button clicked
+		 * button is used to `take a picture' (in software)
+		 */
+		if (st & 0x80) {
+			gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0);
+			gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0);
+		} else {
+#ifdef CONFIG_INPUT
+			u8 button_state = st & 0x40 ? 1 : 0;
+			if (sd->snapshot_pressed != button_state) {
+				input_report_key(gspca_dev->input_dev,
+						 KEY_CAMERA,
+						 button_state);
+				input_sync(gspca_dev->input_dev);
+				sd->snapshot_pressed = button_state;
+			}
+#endif
+			if (st & 0x01)
+				continue;
+		}
+		gspca_frame_add(gspca_dev, INTER_PACKET, data,
+				data_urb->iso_frame_desc[i].actual_length);
+	}
+
+resubmit:
+	if (data_urb) {
+		st = usb_submit_urb(data_urb, GFP_ATOMIC);
+		if (st < 0)
+			PDEBUG(D_ERR|D_PACK,
+			       "usb_submit_urb(data_urb) ret %d", st);
+	}
+	st = usb_submit_urb(status_urb, GFP_ATOMIC);
+	if (st < 0)
+		PDEBUG(D_ERR|D_PACK, "usb_submit_urb(status_urb) ret %d", st);
+}
+
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->brightness = val;
+	if (gspca_dev->streaming) {
+		konica_stream_off(gspca_dev);
+		reg_w(gspca_dev, sd->brightness, BRIGHTNESS_REG);
+		konica_stream_on(gspca_dev);
+	}
+
+	return 0;
+}
+
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->brightness;
+
+	return 0;
+}
+
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->contrast = val;
+	if (gspca_dev->streaming) {
+		konica_stream_off(gspca_dev);
+		reg_w(gspca_dev, sd->contrast, CONTRAST_REG);
+		konica_stream_on(gspca_dev);
+	}
+
+	return 0;
+}
+
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->contrast;
+
+	return 0;
+}
+
+static int sd_setsaturation(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->saturation = val;
+	if (gspca_dev->streaming) {
+		konica_stream_off(gspca_dev);
+		reg_w(gspca_dev, sd->saturation, SATURATION_REG);
+		konica_stream_on(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_getsaturation(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->saturation;
+
+	return 0;
+}
+
+static int sd_setwhitebal(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->whitebal = val;
+	if (gspca_dev->streaming) {
+		konica_stream_off(gspca_dev);
+		reg_w(gspca_dev, sd->whitebal, WHITEBAL_REG);
+		konica_stream_on(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_getwhitebal(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->whitebal;
+
+	return 0;
+}
+
+static int sd_setsharpness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->sharpness = val;
+	if (gspca_dev->streaming) {
+		konica_stream_off(gspca_dev);
+		reg_w(gspca_dev, sd->sharpness, SHARPNESS_REG);
+		konica_stream_on(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_getsharpness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->sharpness;
+
+	return 0;
+}
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+#ifdef CONFIG_INPUT
+	.other_input = 1,
+#endif
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x04c8, 0x0720)}, /* Intel YC 76 */
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	info("registered");
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	info("deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/drivers/media/video/usbvideo/Kconfig b/drivers/media/video/usbvideo/Kconfig
index e5822b1125ed..dfa7fc68a657 100644
--- a/drivers/media/video/usbvideo/Kconfig
+++ b/drivers/media/video/usbvideo/Kconfig
@@ -30,10 +30,13 @@ config USB_IBMCAM
 	  <file:Documentation/video4linux/ibmcam.txt> to learn more.
 
 config USB_KONICAWC
-	tristate "USB Konica Webcam support"
+	tristate "USB Konica Webcam support (DEPRECATED)"
 	depends on VIDEO_V4L1
 	select VIDEO_USBVIDEO
 	---help---
+	  This driver is DEPRECATED (and known to crash) please use the
+	  gspca konica module instead.
+
 	  Say Y here if you want support for webcams based on a Konica
 	  chipset. This is known to work with the Intel YC76 webcam.
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 0aef67e0281a..2d5ce17ca472 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -364,6 +364,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_STV0680  v4l2_fourcc('S', '6', '8', '0') /* stv0680 bayer */
 #define V4L2_PIX_FMT_TM6000   v4l2_fourcc('T', 'M', '6', '0') /* tm5600/tm60x0 */
 #define V4L2_PIX_FMT_CIT_YYVYUY v4l2_fourcc('C', 'I', 'T', 'V') /* one line of Y then 1 line of VYUY */
+#define V4L2_PIX_FMT_KONICA420  v4l2_fourcc('K', 'O', 'N', 'I') /* YUV420 planar in blocks of 256 pixels */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3


From 7a569f524dd36806b995c844f29e28ff40c444b2 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Sat, 7 Aug 2010 13:31:40 -0300
Subject: V4L/DVB: IR/streamzap: functional in-kernel decoding

This patch makes in-kernel decoding with the stock Streamzap PC Remote
work out of the box. There are quite a few things going on in this
patch, all related to getting this working:

1) I had to enable reporting of a long space at the end of each signal,
   or I had weird buffering and keybounce issues.

2) The keymap has been reworked slightly to match actual decoded values,
   the first edition was missing the pre-data bits present in the lirc
   config file for this remote.

3) There's a whole new decoder included, specifically for the
   not-quite-RC5 15-bit protocol variant used by the Streamzap PC
   Remote. The decoder, while usable with other recievers (tested with
   an mceusb receiver), will only be loaded by the streamzap driver, as
   its likely not of use in almost all other situations. This can be
   revisited if/when all keytable loading (and disabling of unneeded
   protocol decoder engines) is moved to userspace, but for now, I think
   this makes the most sense.

Note that I did try to enable handling the streamzap RC5-ish protocol in
the current RC5 decoder, but there's no particularly easy way to tell if
its 14-bit RC5 or 15-bit Streamzap until we see bit 14, and even then,
in testing an attempted decoder merge, only 2/3 of the keys were
properly recognized as being the 15-bit variant and decoded correctly,
the rest were close enough to compliant with 14-bit that they were
decoded as such (but they have overlap with one another, and thus we
can't just shrug and use the 14-bit decoded values).

Also of note in this patch is the removal of the streamzap driver's
internal delay buffer. Per discussion w/Christoph, it shouldn't be
needed by lirc any longer anyway, and it doesn't seem to make any
difference to the in-kernel decoder engine. That being the case, I'm
yanking it all out, as it greatly simplifies the driver code.

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/Kconfig                    |  12 +
 drivers/media/IR/Makefile                   |   1 +
 drivers/media/IR/ir-core-priv.h             |   6 +
 drivers/media/IR/ir-rc5-sz-decoder.c        | 153 ++++++++++++
 drivers/media/IR/ir-sysfs.c                 |   1 +
 drivers/media/IR/keymaps/Makefile           |   2 +-
 drivers/media/IR/keymaps/rc-rc5-streamzap.c |  81 -------
 drivers/media/IR/keymaps/rc-streamzap.c     |  82 +++++++
 drivers/media/IR/streamzap.c                | 358 ++++++++--------------------
 include/media/rc-map.h                      |   5 +-
 10 files changed, 352 insertions(+), 349 deletions(-)
 create mode 100644 drivers/media/IR/ir-rc5-sz-decoder.c
 delete mode 100644 drivers/media/IR/keymaps/rc-rc5-streamzap.c
 create mode 100644 drivers/media/IR/keymaps/rc-streamzap.c

(limited to 'include')

diff --git a/drivers/media/IR/Kconfig b/drivers/media/IR/Kconfig
index 490c57cc4cfe..152000db3526 100644
--- a/drivers/media/IR/Kconfig
+++ b/drivers/media/IR/Kconfig
@@ -79,6 +79,18 @@ config IR_SONY_DECODER
 	   Enable this option if you have an infrared remote control which
 	   uses the Sony protocol, and you need software decoding support.
 
+config IR_RC5_SZ_DECODER
+	tristate "Enable IR raw decoder for the RC-5 (streamzap) protocol"
+	depends on IR_CORE
+	select BITREVERSE
+	default y
+
+	---help---
+	   Enable this option if you have IR with RC-5 (streamzap) protocol,
+	   and if the IR is decoded in software. (The Streamzap PC Remote
+	   uses an IR protocol that is almost standard RC-5, but not quite,
+	   as it uses an additional bit).
+
 config IR_LIRC_CODEC
 	tristate "Enable IR to LIRC bridge"
 	depends on IR_CORE
diff --git a/drivers/media/IR/Makefile b/drivers/media/IR/Makefile
index 53676838fe97..953c6c44330a 100644
--- a/drivers/media/IR/Makefile
+++ b/drivers/media/IR/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
 obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o
 obj-$(CONFIG_IR_JVC_DECODER) += ir-jvc-decoder.o
 obj-$(CONFIG_IR_SONY_DECODER) += ir-sony-decoder.o
+obj-$(CONFIG_IR_RC5_SZ_DECODER) += ir-rc5-sz-decoder.o
 obj-$(CONFIG_IR_LIRC_CODEC) += ir-lirc-codec.o
 
 # stand-alone IR receivers/transmitters
diff --git a/drivers/media/IR/ir-core-priv.h b/drivers/media/IR/ir-core-priv.h
index a85a8c7c905a..0ad8ea39b39e 100644
--- a/drivers/media/IR/ir-core-priv.h
+++ b/drivers/media/IR/ir-core-priv.h
@@ -76,6 +76,12 @@ struct ir_raw_event_ctrl {
 		bool first;
 		bool toggle;
 	} jvc;
+	struct rc5_sz_dec {
+		int state;
+		u32 bits;
+		unsigned count;
+		unsigned wanted_bits;
+	} rc5_sz;
 	struct lirc_codec {
 		struct ir_input_dev *ir_dev;
 		struct lirc_driver *drv;
diff --git a/drivers/media/IR/ir-rc5-sz-decoder.c b/drivers/media/IR/ir-rc5-sz-decoder.c
new file mode 100644
index 000000000000..68f11d6acd5b
--- /dev/null
+++ b/drivers/media/IR/ir-rc5-sz-decoder.c
@@ -0,0 +1,153 @@
+/* ir-rc5-sz-decoder.c - handle RC5 Streamzap IR Pulse/Space protocol
+ *
+ * Copyright (C) 2010 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ * Copyright (C) 2010 by Jarod Wilson <jarod@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+/*
+ * This code handles the 15 bit RC5-ish protocol used by the Streamzap
+ * PC Remote.
+ * It considers a carrier of 36 kHz, with a total of 15 bits, where
+ * the first two bits are start bits, and a third one is a filing bit
+ */
+
+#include "ir-core-priv.h"
+
+#define RC5_SZ_NBITS		15
+#define RC5_UNIT		888888 /* ns */
+#define RC5_BIT_START		(1 * RC5_UNIT)
+#define RC5_BIT_END		(1 * RC5_UNIT)
+
+enum rc5_sz_state {
+	STATE_INACTIVE,
+	STATE_BIT_START,
+	STATE_BIT_END,
+	STATE_FINISHED,
+};
+
+/**
+ * ir_rc5_sz_decode() - Decode one RC-5 Streamzap pulse or space
+ * @input_dev:	the struct input_dev descriptor of the device
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
+ *
+ * This function returns -EINVAL if the pulse violates the state machine
+ */
+static int ir_rc5_sz_decode(struct input_dev *input_dev, struct ir_raw_event ev)
+{
+	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
+	struct rc5_sz_dec *data = &ir_dev->raw->rc5_sz;
+	u8 toggle, command, system;
+	u32 scancode;
+
+        if (!(ir_dev->raw->enabled_protocols & IR_TYPE_RC5_SZ))
+                return 0;
+
+	if (IS_RESET(ev)) {
+		data->state = STATE_INACTIVE;
+		return 0;
+	}
+
+	if (!geq_margin(ev.duration, RC5_UNIT, RC5_UNIT / 2))
+		goto out;
+
+again:
+	IR_dprintk(2, "RC5-sz decode started at state %i (%uus %s)\n",
+		   data->state, TO_US(ev.duration), TO_STR(ev.pulse));
+
+	if (!geq_margin(ev.duration, RC5_UNIT, RC5_UNIT / 2))
+		return 0;
+
+	switch (data->state) {
+
+	case STATE_INACTIVE:
+		if (!ev.pulse)
+			break;
+
+		data->state = STATE_BIT_START;
+		data->count = 1;
+		data->wanted_bits = RC5_SZ_NBITS;
+		decrease_duration(&ev, RC5_BIT_START);
+		goto again;
+
+	case STATE_BIT_START:
+		if (!eq_margin(ev.duration, RC5_BIT_START, RC5_UNIT / 2))
+			break;
+
+		data->bits <<= 1;
+		if (!ev.pulse)
+			data->bits |= 1;
+		data->count++;
+		data->state = STATE_BIT_END;
+		return 0;
+
+	case STATE_BIT_END:
+		if (!is_transition(&ev, &ir_dev->raw->prev_ev))
+			break;
+
+		if (data->count == data->wanted_bits)
+			data->state = STATE_FINISHED;
+		else
+			data->state = STATE_BIT_START;
+
+		decrease_duration(&ev, RC5_BIT_END);
+		goto again;
+
+	case STATE_FINISHED:
+		if (ev.pulse)
+			break;
+
+		/* RC5-sz */
+		command  = (data->bits & 0x0003F) >> 0;
+		system   = (data->bits & 0x02FC0) >> 6;
+		toggle   = (data->bits & 0x01000) ? 1 : 0;
+		scancode = system << 6 | command;
+
+		IR_dprintk(1, "RC5-sz scancode 0x%04x (toggle: %u)\n",
+			   scancode, toggle);
+
+		ir_keydown(input_dev, scancode, toggle);
+		data->state = STATE_INACTIVE;
+		return 0;
+	}
+
+out:
+	IR_dprintk(1, "RC5-sz decode failed at state %i (%uus %s)\n",
+		   data->state, TO_US(ev.duration), TO_STR(ev.pulse));
+	data->state = STATE_INACTIVE;
+	return -EINVAL;
+}
+
+static struct ir_raw_handler rc5_sz_handler = {
+	.protocols	= IR_TYPE_RC5_SZ,
+	.decode		= ir_rc5_sz_decode,
+};
+
+static int __init ir_rc5_sz_decode_init(void)
+{
+	ir_raw_handler_register(&rc5_sz_handler);
+
+	printk(KERN_INFO "IR RC5 (streamzap) protocol handler initialized\n");
+	return 0;
+}
+
+static void __exit ir_rc5_sz_decode_exit(void)
+{
+	ir_raw_handler_unregister(&rc5_sz_handler);
+}
+
+module_init(ir_rc5_sz_decode_init);
+module_exit(ir_rc5_sz_decode_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("RC5 (streamzap) IR protocol decoder");
diff --git a/drivers/media/IR/ir-sysfs.c b/drivers/media/IR/ir-sysfs.c
index 46d42467f9b4..be09d1995b41 100644
--- a/drivers/media/IR/ir-sysfs.c
+++ b/drivers/media/IR/ir-sysfs.c
@@ -43,6 +43,7 @@ static struct {
 	{ IR_TYPE_RC6,		"rc-6"		},
 	{ IR_TYPE_JVC,		"jvc"		},
 	{ IR_TYPE_SONY,		"sony"		},
+	{ IR_TYPE_RC5_SZ,	"rc-5-sz"	},
 	{ IR_TYPE_LIRC,		"lirc"		},
 };
 
diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 950e5d953c6f..c032b9d2e51a 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -58,10 +58,10 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-purpletv.o \
 			rc-pv951.o \
 			rc-rc5-hauppauge-new.o \
-			rc-rc5-streamzap.o \
 			rc-rc5-tv.o \
 			rc-rc6-mce.o \
 			rc-real-audio-220-32-keys.o \
+			rc-streamzap.o \
 			rc-tbs-nec.o \
 			rc-terratec-cinergy-xs.o \
 			rc-tevii-nec.o \
diff --git a/drivers/media/IR/keymaps/rc-rc5-streamzap.c b/drivers/media/IR/keymaps/rc-rc5-streamzap.c
deleted file mode 100644
index 4c19c58b46d8..000000000000
--- a/drivers/media/IR/keymaps/rc-rc5-streamzap.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* rc-rc5-streamzap.c - Keytable for Streamzap PC Remote, for use
- * with the Streamzap PC Remote IR Receiver.
- *
- * Copyright (c) 2010 by Jarod Wilson <jarod@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <media/rc-map.h>
-
-static struct ir_scancode rc5_streamzap[] = {
-/*
- * FIXME: The Streamzap remote isn't actually true RC-5, it has an extra
- * bit in it, which presently throws the in-kernel RC-5 decoder for a loop.
- * We either have to enhance the decoder to support it, add a new decoder,
- * or just rely on lirc userspace decoding.
- */
-	{ 0x00, KEY_NUMERIC_0 },
-	{ 0x01, KEY_NUMERIC_1 },
-	{ 0x02, KEY_NUMERIC_2 },
-	{ 0x03, KEY_NUMERIC_3 },
-	{ 0x04, KEY_NUMERIC_4 },
-	{ 0x05, KEY_NUMERIC_5 },
-	{ 0x06, KEY_NUMERIC_6 },
-	{ 0x07, KEY_NUMERIC_7 },
-	{ 0x08, KEY_NUMERIC_8 },
-	{ 0x0a, KEY_POWER },
-	{ 0x0b, KEY_MUTE },
-	{ 0x0c, KEY_CHANNELUP },
-	{ 0x0d, KEY_VOLUMEUP },
-	{ 0x0e, KEY_CHANNELDOWN },
-	{ 0x0f, KEY_VOLUMEDOWN },
-	{ 0x10, KEY_UP },
-	{ 0x11, KEY_LEFT },
-	{ 0x12, KEY_OK },
-	{ 0x13, KEY_RIGHT },
-	{ 0x14, KEY_DOWN },
-	{ 0x15, KEY_MENU },
-	{ 0x16, KEY_EXIT },
-	{ 0x17, KEY_PLAY },
-	{ 0x18, KEY_PAUSE },
-	{ 0x19, KEY_STOP },
-	{ 0x1a, KEY_BACK },
-	{ 0x1b, KEY_FORWARD },
-	{ 0x1c, KEY_RECORD },
-	{ 0x1d, KEY_REWIND },
-	{ 0x1e, KEY_FASTFORWARD },
-	{ 0x20, KEY_RED },
-	{ 0x21, KEY_GREEN },
-	{ 0x22, KEY_YELLOW },
-	{ 0x23, KEY_BLUE },
-
-};
-
-static struct rc_keymap rc5_streamzap_map = {
-	.map = {
-		.scan    = rc5_streamzap,
-		.size    = ARRAY_SIZE(rc5_streamzap),
-		.ir_type = IR_TYPE_RC5,
-		.name    = RC_MAP_RC5_STREAMZAP,
-	}
-};
-
-static int __init init_rc_map_rc5_streamzap(void)
-{
-	return ir_register_map(&rc5_streamzap_map);
-}
-
-static void __exit exit_rc_map_rc5_streamzap(void)
-{
-	ir_unregister_map(&rc5_streamzap_map);
-}
-
-module_init(init_rc_map_rc5_streamzap)
-module_exit(exit_rc_map_rc5_streamzap)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
diff --git a/drivers/media/IR/keymaps/rc-streamzap.c b/drivers/media/IR/keymaps/rc-streamzap.c
new file mode 100644
index 000000000000..df32013a321c
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-streamzap.c
@@ -0,0 +1,82 @@
+/* rc-streamzap.c - Keytable for Streamzap PC Remote, for use
+ * with the Streamzap PC Remote IR Receiver.
+ *
+ * Copyright (c) 2010 by Jarod Wilson <jarod@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode streamzap[] = {
+/*
+ * The Streamzap remote is almost, but not quite, RC-5, as it has an extra
+ * bit in it, which throws the in-kernel RC-5 decoder for a loop. Currently,
+ * an additional RC-5-sz decoder is being deployed to support it, but it
+ * may be possible to merge it back with the standard RC-5 decoder.
+ */
+	{ 0x28c0, KEY_NUMERIC_0 },
+	{ 0x28c1, KEY_NUMERIC_1 },
+	{ 0x28c2, KEY_NUMERIC_2 },
+	{ 0x28c3, KEY_NUMERIC_3 },
+	{ 0x28c4, KEY_NUMERIC_4 },
+	{ 0x28c5, KEY_NUMERIC_5 },
+	{ 0x28c6, KEY_NUMERIC_6 },
+	{ 0x28c7, KEY_NUMERIC_7 },
+	{ 0x28c8, KEY_NUMERIC_8 },
+	{ 0x28c9, KEY_NUMERIC_9 },
+	{ 0x28ca, KEY_POWER },
+	{ 0x28cb, KEY_MUTE },
+	{ 0x28cc, KEY_CHANNELUP },
+	{ 0x28cd, KEY_VOLUMEUP },
+	{ 0x28ce, KEY_CHANNELDOWN },
+	{ 0x28cf, KEY_VOLUMEDOWN },
+	{ 0x28d0, KEY_UP },
+	{ 0x28d1, KEY_LEFT },
+	{ 0x28d2, KEY_OK },
+	{ 0x28d3, KEY_RIGHT },
+	{ 0x28d4, KEY_DOWN },
+	{ 0x28d5, KEY_MENU },
+	{ 0x28d6, KEY_EXIT },
+	{ 0x28d7, KEY_PLAY },
+	{ 0x28d8, KEY_PAUSE },
+	{ 0x28d9, KEY_STOP },
+	{ 0x28da, KEY_BACK },
+	{ 0x28db, KEY_FORWARD },
+	{ 0x28dc, KEY_RECORD },
+	{ 0x28dd, KEY_REWIND },
+	{ 0x28de, KEY_FASTFORWARD },
+	{ 0x28e0, KEY_RED },
+	{ 0x28e1, KEY_GREEN },
+	{ 0x28e2, KEY_YELLOW },
+	{ 0x28e3, KEY_BLUE },
+
+};
+
+static struct rc_keymap streamzap_map = {
+	.map = {
+		.scan    = streamzap,
+		.size    = ARRAY_SIZE(streamzap),
+		.ir_type = IR_TYPE_RC5_SZ,
+		.name    = RC_MAP_STREAMZAP,
+	}
+};
+
+static int __init init_rc_map_streamzap(void)
+{
+	return ir_register_map(&streamzap_map);
+}
+
+static void __exit exit_rc_map_streamzap(void)
+{
+	ir_unregister_map(&streamzap_map);
+}
+
+module_init(init_rc_map_streamzap)
+module_exit(exit_rc_map_streamzap)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarod Wilson <jarod@redhat.com>");
diff --git a/drivers/media/IR/streamzap.c b/drivers/media/IR/streamzap.c
index 058e29fd478c..2cf57e64a80b 100644
--- a/drivers/media/IR/streamzap.c
+++ b/drivers/media/IR/streamzap.c
@@ -38,7 +38,7 @@
 #include <linux/input.h>
 #include <media/ir-core.h>
 
-#define DRIVER_VERSION	"1.60"
+#define DRIVER_VERSION	"1.61"
 #define DRIVER_NAME	"streamzap"
 #define DRIVER_DESC	"Streamzap Remote Control driver"
 
@@ -69,6 +69,13 @@ MODULE_DEVICE_TABLE(usb, streamzap_table);
 /* number of samples buffered */
 #define SZ_BUF_LEN 128
 
+/* from ir-rc5-sz-decoder.c */
+#ifdef CONFIG_IR_RC5_SZ_DECODER_MODULE
+#define load_rc5_sz_decode()    request_module("ir-rc5-sz-decoder")
+#else
+#define load_rc5_sz_decode()    0
+#endif
+
 enum StreamzapDecoderState {
 	PulseSpace,
 	FullPulse,
@@ -81,7 +88,6 @@ struct streamzap_ir {
 
 	/* ir-core */
 	struct ir_dev_props *props;
-	struct ir_raw_event rawir;
 
 	/* core device info */
 	struct device *dev;
@@ -98,17 +104,6 @@ struct streamzap_ir {
 	dma_addr_t		dma_in;
 	unsigned int		buf_in_len;
 
-	/* timer used to support delay buffering */
-	struct timer_list	delay_timer;
-	bool			timer_running;
-	spinlock_t		timer_lock;
-	struct timer_list	flush_timer;
-	bool			flush;
-
-	/* delay buffer */
-	struct kfifo fifo;
-	bool fifo_initialized;
-
 	/* track what state we're in */
 	enum StreamzapDecoderState decoder_state;
 	/* tracks whether we are currently receiving some signal */
@@ -118,7 +113,7 @@ struct streamzap_ir {
 	/* start time of signal; necessary for gap tracking */
 	struct timeval		signal_last;
 	struct timeval		signal_start;
-	/* bool			timeout_enabled; */
+	bool			timeout_enabled;
 
 	char			name[128];
 	char			phys[64];
@@ -143,122 +138,16 @@ static struct usb_driver streamzap_driver = {
 	.id_table =	streamzap_table,
 };
 
-static void streamzap_stop_timer(struct streamzap_ir *sz)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&sz->timer_lock, flags);
-	if (sz->timer_running) {
-		sz->timer_running = false;
-		spin_unlock_irqrestore(&sz->timer_lock, flags);
-		del_timer_sync(&sz->delay_timer);
-	} else {
-		spin_unlock_irqrestore(&sz->timer_lock, flags);
-	}
-}
-
-static void streamzap_flush_timeout(unsigned long arg)
-{
-	struct streamzap_ir *sz = (struct streamzap_ir *)arg;
-
-	dev_info(sz->dev, "%s: callback firing\n", __func__);
-
-	/* finally start accepting data */
-	sz->flush = false;
-}
-
-static void streamzap_delay_timeout(unsigned long arg)
-{
-	struct streamzap_ir *sz = (struct streamzap_ir *)arg;
-	struct ir_raw_event rawir = { .pulse = false, .duration = 0 };
-	unsigned long flags;
-	int len, ret;
-	static unsigned long delay;
-	bool wake = false;
-
-	/* deliver data every 10 ms */
-	delay = msecs_to_jiffies(10);
-
-	spin_lock_irqsave(&sz->timer_lock, flags);
-
-	if (kfifo_len(&sz->fifo) > 0) {
-		ret = kfifo_out(&sz->fifo, &rawir, sizeof(rawir));
-		if (ret != sizeof(rawir))
-			dev_err(sz->dev, "Problem w/kfifo_out...\n");
-		ir_raw_event_store(sz->idev, &rawir);
-		wake = true;
-	}
-
-	len = kfifo_len(&sz->fifo);
-	if (len > 0) {
-		while ((len < SZ_BUF_LEN / 2) &&
-		       (len < SZ_BUF_LEN * sizeof(int))) {
-			ret = kfifo_out(&sz->fifo, &rawir, sizeof(rawir));
-			if (ret != sizeof(rawir))
-				dev_err(sz->dev, "Problem w/kfifo_out...\n");
-			ir_raw_event_store(sz->idev, &rawir);
-			wake = true;
-			len = kfifo_len(&sz->fifo);
-		}
-		if (sz->timer_running)
-			mod_timer(&sz->delay_timer, jiffies + delay);
-
-	} else {
-		sz->timer_running = false;
-	}
-
-	if (wake)
-		ir_raw_event_handle(sz->idev);
-
-	spin_unlock_irqrestore(&sz->timer_lock, flags);
-}
-
-static void streamzap_flush_delay_buffer(struct streamzap_ir *sz)
+static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir)
 {
-	struct ir_raw_event rawir = { .pulse = false, .duration = 0 };
-	bool wake = false;
-	int ret;
-
-	while (kfifo_len(&sz->fifo) > 0) {
-		ret = kfifo_out(&sz->fifo, &rawir, sizeof(rawir));
-		if (ret != sizeof(rawir))
-			dev_err(sz->dev, "Problem w/kfifo_out...\n");
-		ir_raw_event_store(sz->idev, &rawir);
-		wake = true;
-	}
-
-	if (wake)
-		ir_raw_event_handle(sz->idev);
-}
-
-static void sz_push(struct streamzap_ir *sz)
-{
-	struct ir_raw_event rawir = { .pulse = false, .duration = 0 };
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&sz->timer_lock, flags);
-	if (kfifo_len(&sz->fifo) >= sizeof(int) * SZ_BUF_LEN) {
-		ret = kfifo_out(&sz->fifo, &rawir, sizeof(rawir));
-		if (ret != sizeof(rawir))
-			dev_err(sz->dev, "Problem w/kfifo_out...\n");
-		ir_raw_event_store(sz->idev, &rawir);
-	}
-
-	kfifo_in(&sz->fifo, &sz->rawir, sizeof(rawir));
-
-	if (!sz->timer_running) {
-		sz->delay_timer.expires = jiffies + (HZ / 10);
-		add_timer(&sz->delay_timer);
-		sz->timer_running = true;
-	}
-
-	spin_unlock_irqrestore(&sz->timer_lock, flags);
+	ir_raw_event_store(sz->idev, &rawir);
 }
 
 static void sz_push_full_pulse(struct streamzap_ir *sz,
 			       unsigned char value)
 {
+	struct ir_raw_event rawir;
+
 	if (sz->idle) {
 		long deltv;
 
@@ -266,33 +155,33 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
 		do_gettimeofday(&sz->signal_start);
 
 		deltv = sz->signal_start.tv_sec - sz->signal_last.tv_sec;
-		sz->rawir.pulse = false;
+		rawir.pulse = false;
 		if (deltv > 15) {
 			/* really long time */
-			sz->rawir.duration = IR_MAX_DURATION;
+			rawir.duration = IR_MAX_DURATION;
 		} else {
-			sz->rawir.duration = (int)(deltv * 1000000 +
+			rawir.duration = (int)(deltv * 1000000 +
 				sz->signal_start.tv_usec -
 				sz->signal_last.tv_usec);
-			sz->rawir.duration -= sz->sum;
-			sz->rawir.duration *= 1000;
-			sz->rawir.duration &= IR_MAX_DURATION;
+			rawir.duration -= sz->sum;
+			rawir.duration *= 1000;
+			rawir.duration &= IR_MAX_DURATION;
 		}
-		dev_dbg(sz->dev, "ls %u\n", sz->rawir.duration);
-		sz_push(sz);
+		dev_dbg(sz->dev, "ls %u\n", rawir.duration);
+		sz_push(sz, rawir);
 
-		sz->idle = 0;
+		sz->idle = false;
 		sz->sum = 0;
 	}
 
-	sz->rawir.pulse = true;
-	sz->rawir.duration = ((int) value) * STREAMZAP_RESOLUTION;
-	sz->rawir.duration += STREAMZAP_RESOLUTION / 2;
-	sz->sum += sz->rawir.duration;
-	sz->rawir.duration *= 1000;
-	sz->rawir.duration &= IR_MAX_DURATION;
-	dev_dbg(sz->dev, "p %u\n", sz->rawir.duration);
-	sz_push(sz);
+	rawir.pulse = true;
+	rawir.duration = ((int) value) * STREAMZAP_RESOLUTION;
+	rawir.duration += STREAMZAP_RESOLUTION / 2;
+	sz->sum += rawir.duration;
+	rawir.duration *= 1000;
+	rawir.duration &= IR_MAX_DURATION;
+	dev_dbg(sz->dev, "p %u\n", rawir.duration);
+	sz_push(sz, rawir);
 }
 
 static void sz_push_half_pulse(struct streamzap_ir *sz,
@@ -304,13 +193,15 @@ static void sz_push_half_pulse(struct streamzap_ir *sz,
 static void sz_push_full_space(struct streamzap_ir *sz,
 			       unsigned char value)
 {
-	sz->rawir.pulse = false;
-	sz->rawir.duration = ((int) value) * STREAMZAP_RESOLUTION;
-	sz->rawir.duration += STREAMZAP_RESOLUTION / 2;
-	sz->sum += sz->rawir.duration;
-	sz->rawir.duration *= 1000;
-	dev_dbg(sz->dev, "s %u\n", sz->rawir.duration);
-	sz_push(sz);
+	struct ir_raw_event rawir;
+
+	rawir.pulse = false;
+	rawir.duration = ((int) value) * STREAMZAP_RESOLUTION;
+	rawir.duration += STREAMZAP_RESOLUTION / 2;
+	sz->sum += rawir.duration;
+	rawir.duration *= 1000;
+	dev_dbg(sz->dev, "s %u\n", rawir.duration);
+	sz_push(sz, rawir);
 }
 
 static void sz_push_half_space(struct streamzap_ir *sz,
@@ -330,10 +221,8 @@ static void streamzap_callback(struct urb *urb)
 	struct streamzap_ir *sz;
 	unsigned int i;
 	int len;
-	#if 0
 	static int timeout = (((STREAMZAP_TIMEOUT * STREAMZAP_RESOLUTION) &
 				IR_MAX_DURATION) | 0x03000000);
-	#endif
 
 	if (!urb)
 		return;
@@ -356,57 +245,53 @@ static void streamzap_callback(struct urb *urb)
 	}
 
 	dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len);
-	if (!sz->flush) {
-		for (i = 0; i < urb->actual_length; i++) {
-			dev_dbg(sz->dev, "%d: %x\n", i,
-				(unsigned char)sz->buf_in[i]);
-			switch (sz->decoder_state) {
-			case PulseSpace:
-				if ((sz->buf_in[i] & STREAMZAP_PULSE_MASK) ==
-				    STREAMZAP_PULSE_MASK) {
-					sz->decoder_state = FullPulse;
-					continue;
-				} else if ((sz->buf_in[i] & STREAMZAP_SPACE_MASK)
-					   == STREAMZAP_SPACE_MASK) {
-					sz_push_half_pulse(sz, sz->buf_in[i]);
-					sz->decoder_state = FullSpace;
-					continue;
-				} else {
-					sz_push_half_pulse(sz, sz->buf_in[i]);
-					sz_push_half_space(sz, sz->buf_in[i]);
-				}
-				break;
-			case FullPulse:
-				sz_push_full_pulse(sz, sz->buf_in[i]);
-				sz->decoder_state = IgnorePulse;
-				break;
-			case FullSpace:
-				if (sz->buf_in[i] == STREAMZAP_TIMEOUT) {
-					sz->idle = 1;
-					streamzap_stop_timer(sz);
-					#if 0
-					if (sz->timeout_enabled) {
-						sz->rawir.pulse = false;
-						sz->rawir.duration = timeout;
-						sz->rawir.duration *= 1000;
-						sz_push(sz);
-					}
-					#endif
-					streamzap_flush_delay_buffer(sz);
-				} else
-					sz_push_full_space(sz, sz->buf_in[i]);
-				sz->decoder_state = PulseSpace;
-				break;
-			case IgnorePulse:
-				if ((sz->buf_in[i]&STREAMZAP_SPACE_MASK) ==
-				    STREAMZAP_SPACE_MASK) {
-					sz->decoder_state = FullSpace;
-					continue;
-				}
+	for (i = 0; i < len; i++) {
+		dev_dbg(sz->dev, "sz idx %d: %x\n",
+			i, (unsigned char)sz->buf_in[i]);
+		switch (sz->decoder_state) {
+		case PulseSpace:
+			if ((sz->buf_in[i] & STREAMZAP_PULSE_MASK) ==
+				STREAMZAP_PULSE_MASK) {
+				sz->decoder_state = FullPulse;
+				continue;
+			} else if ((sz->buf_in[i] & STREAMZAP_SPACE_MASK)
+					== STREAMZAP_SPACE_MASK) {
+				sz_push_half_pulse(sz, sz->buf_in[i]);
+				sz->decoder_state = FullSpace;
+				continue;
+			} else {
+				sz_push_half_pulse(sz, sz->buf_in[i]);
 				sz_push_half_space(sz, sz->buf_in[i]);
-				sz->decoder_state = PulseSpace;
-				break;
 			}
+			break;
+		case FullPulse:
+			sz_push_full_pulse(sz, sz->buf_in[i]);
+			sz->decoder_state = IgnorePulse;
+			break;
+		case FullSpace:
+			if (sz->buf_in[i] == STREAMZAP_TIMEOUT) {
+				struct ir_raw_event rawir;
+
+				rawir.pulse = false;
+				rawir.duration = timeout * 1000;
+				sz->idle = true;
+				if (sz->timeout_enabled)
+					sz_push(sz, rawir);
+				ir_raw_event_handle(sz->idev);
+			} else {
+				sz_push_full_space(sz, sz->buf_in[i]);
+			}
+			sz->decoder_state = PulseSpace;
+			break;
+		case IgnorePulse:
+			if ((sz->buf_in[i] & STREAMZAP_SPACE_MASK) ==
+				STREAMZAP_SPACE_MASK) {
+				sz->decoder_state = FullSpace;
+				continue;
+			}
+			sz_push_half_space(sz, sz->buf_in[i]);
+			sz->decoder_state = PulseSpace;
+			break;
 		}
 	}
 
@@ -446,12 +331,11 @@ static struct input_dev *streamzap_init_input_dev(struct streamzap_ir *sz)
 
 	props->priv = sz;
 	props->driver_type = RC_DRIVER_IR_RAW;
-	/* FIXME: not sure about supported protocols, check on this */
-	props->allowed_protos = IR_TYPE_RC5 | IR_TYPE_RC6;
+	props->allowed_protos = IR_TYPE_ALL;
 
 	sz->props = props;
 
-	ret = ir_input_register(idev, RC_MAP_RC5_STREAMZAP, props, DRIVER_NAME);
+	ret = ir_input_register(idev, RC_MAP_STREAMZAP, props, DRIVER_NAME);
 	if (ret < 0) {
 		dev_err(dev, "remote input device register failed\n");
 		goto irdev_failed;
@@ -467,29 +351,6 @@ idev_alloc_failed:
 	return NULL;
 }
 
-static int streamzap_delay_buf_init(struct streamzap_ir *sz)
-{
-	int ret;
-
-	ret = kfifo_alloc(&sz->fifo, sizeof(int) * SZ_BUF_LEN,
-			  GFP_KERNEL);
-	if (ret == 0)
-		sz->fifo_initialized = 1;
-
-	return ret;
-}
-
-static void streamzap_start_flush_timer(struct streamzap_ir *sz)
-{
-	sz->flush_timer.expires = jiffies + HZ;
-	sz->flush = true;
-	add_timer(&sz->flush_timer);
-
-	sz->urb_in->dev = sz->usbdev;
-	if (usb_submit_urb(sz->urb_in, GFP_ATOMIC))
-		dev_err(sz->dev, "urb submit failed\n");
-}
-
 /**
  *	streamzap_probe
  *
@@ -575,35 +436,21 @@ static int __devinit streamzap_probe(struct usb_interface *intf,
 		snprintf(name + strlen(name), sizeof(name) - strlen(name),
 			 " %s", buf);
 
-	retval = streamzap_delay_buf_init(sz);
-	if (retval) {
-		dev_err(&intf->dev, "%s: delay buffer init failed\n", __func__);
-		goto free_urb_in;
-	}
-
 	sz->idev = streamzap_init_input_dev(sz);
 	if (!sz->idev)
 		goto input_dev_fail;
 
 	sz->idle = true;
 	sz->decoder_state = PulseSpace;
+	/* FIXME: don't yet have a way to set this */
+	sz->timeout_enabled = true;
 	#if 0
 	/* not yet supported, depends on patches from maxim */
 	/* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */
-	sz->timeout_enabled = false;
 	sz->min_timeout = STREAMZAP_TIMEOUT * STREAMZAP_RESOLUTION * 1000;
 	sz->max_timeout = STREAMZAP_TIMEOUT * STREAMZAP_RESOLUTION * 1000;
 	#endif
 
-	init_timer(&sz->delay_timer);
-	sz->delay_timer.function = streamzap_delay_timeout;
-	sz->delay_timer.data = (unsigned long)sz;
-	spin_lock_init(&sz->timer_lock);
-
-	init_timer(&sz->flush_timer);
-	sz->flush_timer.function = streamzap_flush_timeout;
-	sz->flush_timer.data = (unsigned long)sz;
-
 	do_gettimeofday(&sz->signal_start);
 
 	/* Complete final initialisations */
@@ -615,16 +462,18 @@ static int __devinit streamzap_probe(struct usb_interface *intf,
 
 	usb_set_intfdata(intf, sz);
 
-	streamzap_start_flush_timer(sz);
+	if (usb_submit_urb(sz->urb_in, GFP_ATOMIC))
+		dev_err(sz->dev, "urb submit failed\n");
 
 	dev_info(sz->dev, "Registered %s on usb%d:%d\n", name,
 		 usbdev->bus->busnum, usbdev->devnum);
 
+	/* Load the streamzap not-quite-rc5 decoder too */
+	load_rc5_sz_decode();
+
 	return 0;
 
 input_dev_fail:
-	kfifo_free(&sz->fifo);
-free_urb_in:
 	usb_free_urb(sz->urb_in);
 free_buf_in:
 	usb_free_coherent(usbdev, maxp, sz->buf_in, sz->dma_in);
@@ -654,13 +503,6 @@ static void streamzap_disconnect(struct usb_interface *interface)
 	if (!sz)
 		return;
 
-	if (sz->flush) {
-		sz->flush = false;
-		del_timer_sync(&sz->flush_timer);
-	}
-
-	streamzap_stop_timer(sz);
-
 	sz->usbdev = NULL;
 	ir_input_unregister(sz->idev);
 	usb_kill_urb(sz->urb_in);
@@ -674,13 +516,6 @@ static int streamzap_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct streamzap_ir *sz = usb_get_intfdata(intf);
 
-	if (sz->flush) {
-		sz->flush = false;
-		del_timer_sync(&sz->flush_timer);
-	}
-
-	streamzap_stop_timer(sz);
-
 	usb_kill_urb(sz->urb_in);
 
 	return 0;
@@ -690,13 +525,6 @@ static int streamzap_resume(struct usb_interface *intf)
 {
 	struct streamzap_ir *sz = usb_get_intfdata(intf);
 
-	if (sz->fifo_initialized)
-		kfifo_reset(&sz->fifo);
-
-	sz->flush_timer.expires = jiffies + HZ;
-	sz->flush = true;
-	add_timer(&sz->flush_timer);
-
 	if (usb_submit_urb(sz->urb_in, GFP_ATOMIC)) {
 		dev_err(sz->dev, "Error sumbiting urb\n");
 		return -EIO;
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index a9c041d49662..6c0324eb4914 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -17,12 +17,13 @@
 #define IR_TYPE_RC6	(1  << 2)	/* Philips RC6 protocol */
 #define IR_TYPE_JVC	(1  << 3)	/* JVC protocol */
 #define IR_TYPE_SONY	(1  << 4)	/* Sony12/15/20 protocol */
+#define IR_TYPE_RC5_SZ	(1  << 5)	/* RC5 variant used by Streamzap */
 #define IR_TYPE_LIRC	(1  << 30)	/* Pass raw IR to lirc userspace */
 #define IR_TYPE_OTHER	(1u << 31)
 
 #define IR_TYPE_ALL (IR_TYPE_RC5 | IR_TYPE_NEC  | IR_TYPE_RC6  | \
 		     IR_TYPE_JVC | IR_TYPE_SONY | IR_TYPE_LIRC | \
-		     IR_TYPE_OTHER)
+		     IR_TYPE_RC5_SZ | IR_TYPE_OTHER)
 
 struct ir_scancode {
 	u32	scancode;
@@ -114,10 +115,10 @@ void rc_map_init(void);
 #define RC_MAP_PURPLETV                  "rc-purpletv"
 #define RC_MAP_PV951                     "rc-pv951"
 #define RC_MAP_RC5_HAUPPAUGE_NEW         "rc-rc5-hauppauge-new"
-#define RC_MAP_RC5_STREAMZAP             "rc-rc5-streamzap"
 #define RC_MAP_RC5_TV                    "rc-rc5-tv"
 #define RC_MAP_RC6_MCE                   "rc-rc6-mce"
 #define RC_MAP_REAL_AUDIO_220_32_KEYS    "rc-real-audio-220-32-keys"
+#define RC_MAP_STREAMZAP                 "rc-streamzap"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
 #define RC_MAP_TERRATEC_CINERGY_XS       "rc-terratec-cinergy-xs"
 #define RC_MAP_TEVII_NEC                 "rc-tevii-nec"
-- 
cgit v1.2.3


From 6c2d4dd139de417d18151b98c157aa35387038a3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 12 Aug 2010 17:16:00 -0300
Subject: V4L/DVB: V4L2: avoid name conflicts in macros

"sd" and "err" are too common names to be used in macros for local variables.
Prefix them with an underscore to avoid name clashing.

[mchehab@redhat.com: whitespace cleanups]
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Reviewed-by: Andy Walls <awalls@md.metrocast.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx18/cx18-driver.h | 19 ++++++++----
 drivers/media/video/ivtv/ivtv-driver.h | 14 +++++++--
 include/media/v4l2-device.h            | 57 +++++++++++++++++++++++-----------
 3 files changed, 63 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 9bc51a99376b..77be58c1096b 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -674,18 +674,25 @@ static inline int cx18_raw_vbi(const struct cx18 *cx)
 
 /* Call the specified callback for all subdevs with a grp_id bit matching the
  * mask in hw (if 0, then match them all). Ignore any errors. */
-#define cx18_call_hw(cx, hw, o, f, args...) \
-	__v4l2_device_call_subdevs(&(cx)->v4l2_dev, \
-				   !(hw) || (sd->grp_id & (hw)), o, f , ##args)
+#define cx18_call_hw(cx, hw, o, f, args...)				\
+	do {								\
+		struct v4l2_subdev *__sd;				\
+		__v4l2_device_call_subdevs_p(&(cx)->v4l2_dev, __sd,	\
+			!(hw) || (__sd->grp_id & (hw)), o, f , ##args);	\
+	} while (0)
 
 #define cx18_call_all(cx, o, f, args...) cx18_call_hw(cx, 0, o, f , ##args)
 
 /* Call the specified callback for all subdevs with a grp_id bit matching the
  * mask in hw (if 0, then match them all). If the callback returns an error
  * other than 0 or -ENOIOCTLCMD, then return with that error code. */
-#define cx18_call_hw_err(cx, hw, o, f, args...) \
-	__v4l2_device_call_subdevs_until_err( \
-		   &(cx)->v4l2_dev, !(hw) || (sd->grp_id & (hw)), o, f , ##args)
+#define cx18_call_hw_err(cx, hw, o, f, args...)				\
+({									\
+	struct v4l2_subdev *__sd;					\
+	__v4l2_device_call_subdevs_until_err_p(&(cx)->v4l2_dev,		\
+			__sd, !(hw) || (__sd->grp_id & (hw)), o, f,	\
+			##args);					\
+})
 
 #define cx18_call_all_err(cx, o, f, args...) \
 	cx18_call_hw_err(cx, 0, o, f , ##args)
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index 75803141481e..04bacdbd10bb 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -811,15 +811,23 @@ static inline int ivtv_raw_vbi(const struct ivtv *itv)
 /* Call the specified callback for all subdevs matching hw (if 0, then
    match them all). Ignore any errors. */
 #define ivtv_call_hw(itv, hw, o, f, args...) 				\
-	__v4l2_device_call_subdevs(&(itv)->v4l2_dev, !(hw) || (sd->grp_id & (hw)), o, f , ##args)
+	do {								\
+		struct v4l2_subdev *__sd;				\
+		__v4l2_device_call_subdevs_p(&(itv)->v4l2_dev, __sd,	\
+			!(hw) || (__sd->grp_id & (hw)), o, f , ##args);	\
+	} while (0)
 
 #define ivtv_call_all(itv, o, f, args...) ivtv_call_hw(itv, 0, o, f , ##args)
 
 /* Call the specified callback for all subdevs matching hw (if 0, then
    match them all). If the callback returns an error other than 0 or
    -ENOIOCTLCMD, then return with that error code. */
-#define ivtv_call_hw_err(itv, hw, o, f, args...)  		\
-	__v4l2_device_call_subdevs_until_err(&(itv)->v4l2_dev, !(hw) || (sd->grp_id & (hw)), o, f , ##args)
+#define ivtv_call_hw_err(itv, hw, o, f, args...)			\
+({									\
+	struct v4l2_subdev *__sd;					\
+	__v4l2_device_call_subdevs_until_err_p(&(itv)->v4l2_dev, __sd,	\
+		!(hw) || (__sd->grp_id & (hw)), o, f , ##args);		\
+})
 
 #define ivtv_call_all_err(itv, o, f, args...) ivtv_call_hw_err(itv, 0, o, f , ##args)
 
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 8bcbd7a0271c..6648036b728d 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -101,46 +101,67 @@ void v4l2_device_unregister_subdev(struct v4l2_subdev *sd);
 /* Call the specified callback for all subdevs matching the condition.
    Ignore any errors. Note that you cannot add or delete a subdev
    while walking the subdevs list. */
-#define __v4l2_device_call_subdevs(v4l2_dev, cond, o, f, args...) 	\
+#define __v4l2_device_call_subdevs_p(v4l2_dev, sd, cond, o, f, args...)	\
 	do { 								\
-		struct v4l2_subdev *sd; 				\
+		list_for_each_entry((sd), &(v4l2_dev)->subdevs, list)	\
+			if ((cond) && (sd)->ops->o && (sd)->ops->o->f)	\
+				(sd)->ops->o->f((sd) , ##args);		\
+	} while (0)
+
+#define __v4l2_device_call_subdevs(v4l2_dev, cond, o, f, args...)	\
+	do {								\
+		struct v4l2_subdev *__sd;				\
 									\
-		list_for_each_entry(sd, &(v4l2_dev)->subdevs, list)   	\
-			if ((cond) && sd->ops->o && sd->ops->o->f) 	\
-				sd->ops->o->f(sd , ##args); 		\
+		__v4l2_device_call_subdevs_p(v4l2_dev, __sd, cond, o,	\
+						f , ##args);		\
 	} while (0)
 
 /* Call the specified callback for all subdevs matching the condition.
    If the callback returns an error other than 0 or -ENOIOCTLCMD, then
    return with that error code. Note that you cannot add or delete a
    subdev while walking the subdevs list. */
-#define __v4l2_device_call_subdevs_until_err(v4l2_dev, cond, o, f, args...) \
+#define __v4l2_device_call_subdevs_until_err_p(v4l2_dev, sd, cond, o, f, args...) \
 ({ 									\
-	struct v4l2_subdev *sd; 					\
-	long err = 0; 							\
+	long __err = 0;							\
 									\
-	list_for_each_entry(sd, &(v4l2_dev)->subdevs, list) { 		\
-		if ((cond) && sd->ops->o && sd->ops->o->f) 		\
-			err = sd->ops->o->f(sd , ##args); 		\
-		if (err && err != -ENOIOCTLCMD)				\
+	list_for_each_entry((sd), &(v4l2_dev)->subdevs, list) {		\
+		if ((cond) && (sd)->ops->o && (sd)->ops->o->f)		\
+			__err = (sd)->ops->o->f((sd) , ##args);		\
+		if (__err && __err != -ENOIOCTLCMD)			\
 			break; 						\
 	} 								\
-	(err == -ENOIOCTLCMD) ? 0 : err; 				\
+	(__err == -ENOIOCTLCMD) ? 0 : __err;				\
+})
+
+#define __v4l2_device_call_subdevs_until_err(v4l2_dev, cond, o, f, args...) \
+({									\
+	struct v4l2_subdev *__sd;					\
+	__v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o,	\
+						f, args...);		\
 })
 
 /* Call the specified callback for all subdevs matching grp_id (if 0, then
    match them all). Ignore any errors. Note that you cannot add or delete
    a subdev while walking the subdevs list. */
-#define v4l2_device_call_all(v4l2_dev, grpid, o, f, args...) 		\
-	__v4l2_device_call_subdevs(v4l2_dev, 				\
-			!(grpid) || sd->grp_id == (grpid), o, f , ##args)
+#define v4l2_device_call_all(v4l2_dev, grpid, o, f, args...)		\
+	do {								\
+		struct v4l2_subdev *__sd;				\
+									\
+		__v4l2_device_call_subdevs_p(v4l2_dev, __sd,		\
+			!(grpid) || __sd->grp_id == (grpid), o, f ,	\
+			##args);					\
+	} while (0)
 
 /* Call the specified callback for all subdevs matching grp_id (if 0, then
    match them all). If the callback returns an error other than 0 or
    -ENOIOCTLCMD, then return with that error code. Note that you cannot
    add or delete a subdev while walking the subdevs list. */
 #define v4l2_device_call_until_err(v4l2_dev, grpid, o, f, args...) 	\
-	__v4l2_device_call_subdevs_until_err(v4l2_dev,			\
-		       !(grpid) || sd->grp_id == (grpid), o, f , ##args)
+({									\
+	struct v4l2_subdev *__sd;					\
+	__v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd,		\
+			!(grpid) || __sd->grp_id == (grpid), o, f ,	\
+			##args);					\
+})
 
 #endif
-- 
cgit v1.2.3


From 692d5522646fdf432329efbe5092dc9c5ca83e85 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 30 Jul 2010 17:24:55 -0300
Subject: V4L/DVB: v4l: Add a v4l2_subdev host private data field

The existing priv field stores subdev private data owned by the subdev
driver. Host (bridge) drivers might need to store per-subdev
host-specific data, such as a pointer to platform data.

Add a v4l2_subdev host_priv field to store host-specific data, and
rename the existing priv field to dev_priv.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt |  5 +++++
 include/media/v4l2-subdev.h                  | 20 ++++++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index e831aaca66f8..f5fdb395287f 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -192,6 +192,11 @@ You also need a way to go from the low-level struct to v4l2_subdev. For the
 common i2c_client struct the i2c_set_clientdata() call is used to store a
 v4l2_subdev pointer, for other busses you may have to use other methods.
 
+Bridges might also need to store per-subdev private data, such as a pointer to
+bridge-specific per-subdev private data. The v4l2_subdev structure provides
+host private data for that purpose that can be accessed with
+v4l2_get_subdev_hostdata() and v4l2_set_subdev_hostdata().
+
 From the bridge driver perspective you load the sub-device module and somehow
 obtain the v4l2_subdev pointer. For i2c devices this is easy: you call
 i2c_get_clientdata(). For other busses something similar needs to be done.
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 905879d7742f..b0316a7cf08d 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -438,17 +438,28 @@ struct v4l2_subdev {
 	/* can be used to group similar subdevs, value is driver-specific */
 	u32 grp_id;
 	/* pointer to private data */
-	void *priv;
+	void *dev_priv;
+	void *host_priv;
 };
 
 static inline void v4l2_set_subdevdata(struct v4l2_subdev *sd, void *p)
 {
-	sd->priv = p;
+	sd->dev_priv = p;
 }
 
 static inline void *v4l2_get_subdevdata(const struct v4l2_subdev *sd)
 {
-	return sd->priv;
+	return sd->dev_priv;
+}
+
+static inline void v4l2_set_subdev_hostdata(struct v4l2_subdev *sd, void *p)
+{
+	sd->host_priv = p;
+}
+
+static inline void *v4l2_get_subdev_hostdata(const struct v4l2_subdev *sd)
+{
+	return sd->host_priv;
 }
 
 static inline void v4l2_subdev_init(struct v4l2_subdev *sd,
@@ -462,7 +473,8 @@ static inline void v4l2_subdev_init(struct v4l2_subdev *sd,
 	sd->flags = 0;
 	sd->name[0] = '\0';
 	sd->grp_id = 0;
-	sd->priv = NULL;
+	sd->dev_priv = NULL;
+	sd->host_priv = NULL;
 }
 
 /* Call an ops of a v4l2_subdev, doing the right checks against
-- 
cgit v1.2.3


From d2f918bba7a482bee18cc0ede7791f7d846dd5d0 Mon Sep 17 00:00:00 2001
From: Malcolm Priestley <tvboxspy@gmail.com>
Date: Thu, 2 Sep 2010 17:29:30 -0300
Subject: V4L/DVB: Support or LME2510(C) DM04/QQBOX USB DVB-S BOXES

DM04/QQBOX DVB-S USB BOX with LME2510C+SHARP:BS2F7HZ7395 or LME2510+LGTDQT-P001F tuner.

[mchehab@redhat.com: Fix merge conflicts/compilation and CodingStyle issues]
Signed-off-by: Malcolm Priestley <tvboxspy@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/dvb/get_dvb_firmware    |  46 +-
 Documentation/dvb/lmedm04.txt         |  55 ++
 drivers/media/IR/keymaps/Makefile     |   1 +
 drivers/media/IR/keymaps/rc-lme2510.c |  68 +++
 drivers/media/dvb/dvb-usb/Kconfig     |  11 +
 drivers/media/dvb/dvb-usb/Makefile    |   3 +
 drivers/media/dvb/dvb-usb/lmedm04.c   | 936 ++++++++++++++++++++++++++++++++++
 drivers/media/dvb/dvb-usb/lmedm04.h   | 187 +++++++
 include/media/rc-map.h                |   1 +
 9 files changed, 1307 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/dvb/lmedm04.txt
 create mode 100644 drivers/media/IR/keymaps/rc-lme2510.c
 create mode 100644 drivers/media/dvb/dvb-usb/lmedm04.c
 create mode 100644 drivers/media/dvb/dvb-usb/lmedm04.h

(limited to 'include')

diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware
index 350959f4e41b..59690de8ebfe 100644
--- a/Documentation/dvb/get_dvb_firmware
+++ b/Documentation/dvb/get_dvb_firmware
@@ -26,7 +26,8 @@ use IO::Handle;
 		"dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
 		"or51211", "or51132_qam", "or51132_vsb", "bluebird",
 		"opera1", "cx231xx", "cx18", "cx23885", "pvrusb2", "mpc718",
-		"af9015", "ngene", "az6027");
+		"af9015", "ngene", "az6027", "lme2510_lg", "lme2510c_s7395",
+		"lme2510c_s7395_old");
 
 # Check args
 syntax() if (scalar(@ARGV) != 1);
@@ -584,6 +585,49 @@ sub az6027{
 
     $firmware;
 }
+
+sub lme2510_lg {
+    my $sourcefile = "LMEBDA_DVBS.sys";
+    my $hash = "fc6017ad01e79890a97ec53bea157ed2";
+    my $outfile = "dvb-usb-lme2510-lg.fw";
+    my $hasho = "caa065d5fdbd2c09ad57b399bbf55cad";
+
+    checkstandard();
+
+    verify($sourcefile, $hash);
+    extract($sourcefile, 4168, 3841, $outfile);
+    verify($outfile, $hasho);
+    $outfile;
+}
+
+sub lme2510c_s7395 {
+    my $sourcefile = "US2A0D.sys";
+    my $hash = "b0155a8083fb822a3bd47bc360e74601";
+    my $outfile = "dvb-usb-lme2510c-s7395.fw";
+    my $hasho = "3a3cf1aeebd17b6ddc04cebe131e94cf";
+
+    checkstandard();
+
+    verify($sourcefile, $hash);
+    extract($sourcefile, 37248, 3720, $outfile);
+    verify($outfile, $hasho);
+    $outfile;
+}
+
+sub lme2510c_s7395_old {
+    my $sourcefile = "LMEBDA_DVBS7395C.sys";
+    my $hash = "7572ae0eb9cdf91baabd7c0ba9e09b31";
+    my $outfile = "dvb-usb-lme2510c-s7395.fw";
+    my $hasho = "90430c5b435eb5c6f88fd44a9d950674";
+
+    checkstandard();
+
+    verify($sourcefile, $hash);
+    extract($sourcefile, 4208, 3881, $outfile);
+    verify($outfile, $hasho);
+    $outfile;
+}
+
 # ---------------------------------------------------------------
 # Utilities
 
diff --git a/Documentation/dvb/lmedm04.txt b/Documentation/dvb/lmedm04.txt
new file mode 100644
index 000000000000..4bde457fafc4
--- /dev/null
+++ b/Documentation/dvb/lmedm04.txt
@@ -0,0 +1,55 @@
+To extract firmware for the DM04/QQBOX you need to copy the
+following file(s) to this directory.
+
+for DM04+/QQBOX LME2510C (Sharp 7395 Tuner)
+-------------------------------------------
+
+The Sharp 7395 driver can be found in windows/system32/driver
+
+US2A0D.sys (dated 17 Mar 2009)
+
+
+and run
+./get_dvb_firmware lme2510c_s7395
+
+	will produce
+	dvb-usb-lme2510c-s7395.fw
+
+An alternative but older firmware can be found on the driver
+disk DVB-S_EN_3.5A in BDADriver/driver
+
+LMEBDA_DVBS7395C.sys (dated 18 Jan 2008)
+
+and run
+./get_dvb_firmware lme2510c_s7395_old
+
+	will produce
+	dvb-usb-lme2510c-s7395.fw
+
+--------------------------------------------------------------------
+
+The LG firmware can be found on the driver
+disk DM04+_5.1A[LG] in BDADriver/driver
+
+for DM04 LME2510 (LG Tuner)
+---------------------------
+
+LMEBDA_DVBS.sys (dated 13 Nov 2007)
+
+and run
+./get_dvb_firmware lme2510_lg
+
+	will produce
+	dvb-usb-lme2510-lg.fw
+
+
+Other LG firmware can be extracted manually from US280D.sys
+only found in windows/system32/driver.
+However, this firmware does not run very well under Windows
+or with the Linux driver.
+
+dd if=US280D.sys ibs=1 skip=36856 count=3976 of=dvb-usb-lme2510-lg.fw
+
+---------------------------------------------------------------------
+
+Copy the firmware file(s) to /lib/firmware
diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index c032b9d2e51a..f755b21eef1a 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-kworld-315u.o \
 			rc-kworld-plus-tv-analog.o \
 			rc-lirc.o \
+			rc-lme2510.o \
 			rc-manli.o \
 			rc-msi-tvanywhere.o \
 			rc-msi-tvanywhere-plus.o \
diff --git a/drivers/media/IR/keymaps/rc-lme2510.c b/drivers/media/IR/keymaps/rc-lme2510.c
new file mode 100644
index 000000000000..40dcf0b4e21a
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-lme2510.c
@@ -0,0 +1,68 @@
+/* LME2510 remote control
+ *
+ *
+ * Copyright (C) 2010 Malcolm Priestley (tvboxspy@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <media/rc-map.h>
+
+
+static struct ir_scancode lme2510_rc[] = {
+	{ 0xba45, KEY_0 },
+	{ 0xa05f, KEY_1 },
+	{ 0xaf50, KEY_2 },
+	{ 0xa25d, KEY_3 },
+	{ 0xbe41, KEY_4 },
+	{ 0xf50a, KEY_5 },
+	{ 0xbd42, KEY_6 },
+	{ 0xb847, KEY_7 },
+	{ 0xb649, KEY_8 },
+	{ 0xfa05, KEY_9 },
+	{ 0xbc43, KEY_POWER },
+	{ 0xb946, KEY_SUBTITLE },
+	{ 0xf906, KEY_PAUSE },
+	{ 0xfc03, KEY_MEDIA_REPEAT},
+	{ 0xfd02, KEY_PAUSE },
+	{ 0xa15e, KEY_VOLUMEUP },
+	{ 0xa35c, KEY_VOLUMEDOWN },
+	{ 0xf609, KEY_CHANNELUP },
+	{ 0xe51a, KEY_CHANNELDOWN },
+	{ 0xe11e, KEY_PLAY },
+	{ 0xe41b, KEY_ZOOM },
+	{ 0xa659, KEY_MUTE },
+	{ 0xa55a, KEY_TV },
+	{ 0xe718, KEY_RECORD },
+	{ 0xf807, KEY_EPG },
+	{ 0xfe01, KEY_STOP },
+
+};
+
+static struct rc_keymap lme2510_map = {
+	.map = {
+		.scan    = lme2510_rc,
+		.size    = ARRAY_SIZE(lme2510_rc),
+		.ir_type = IR_TYPE_UNKNOWN,
+		.name    = RC_MAP_LME2510,
+	}
+};
+
+static int __init init_rc_lme2510_map(void)
+{
+	return ir_register_map(&lme2510_map);
+}
+
+static void __exit exit_rc_lme2510_map(void)
+{
+	ir_unregister_map(&lme2510_map);
+}
+
+module_init(init_rc_lme2510_map)
+module_exit(exit_rc_lme2510_map)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Malcolm Priestley tvboxspy@gmail.com");
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index c57f828e1eb9..4734ec073e01 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -347,3 +347,14 @@ config DVB_USB_AZ6027
 	select DVB_STB6100 if !DVB_FE_CUSTOMISE
 	help
 	  Say Y here to support the AZ6027 device
+
+config DVB_USB_LME2510
+	tristate "LME DM04/QQBOX DVB-S USB2.0 support"
+	depends on DVB_USB
+	select DVB_TDA10086 if !DVB_FE_CUSTOMISE
+	select DVB_TDA826X if !DVB_FE_CUSTOMISE
+	select DVB_STV0288 if !DVB_FE_CUSTOMISE
+	select DVB_IX2505V if !DVB_FE_CUSTOMISE
+	depends on IR_CORE
+	help
+	  Say Y here to support the LME DM04/QQBOX DVB-S USB2.0 .
diff --git a/drivers/media/dvb/dvb-usb/Makefile b/drivers/media/dvb/dvb-usb/Makefile
index 1a192453b0e7..5b1d12f2d591 100644
--- a/drivers/media/dvb/dvb-usb/Makefile
+++ b/drivers/media/dvb/dvb-usb/Makefile
@@ -88,6 +88,9 @@ obj-$(CONFIG_DVB_USB_EC168) += dvb-usb-ec168.o
 dvb-usb-az6027-objs = az6027.o
 obj-$(CONFIG_DVB_USB_AZ6027) += dvb-usb-az6027.o
 
+dvb-usb-lmedm04-objs = lmedm04.o
+obj-$(CONFIG_DVB_USB_LME2510) += dvb-usb-lmedm04.o
+
 EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core/ -Idrivers/media/dvb/frontends/
 # due to tuner-xc3028
 EXTRA_CFLAGS += -Idrivers/media/common/tuners
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
new file mode 100644
index 000000000000..d5374ac075d3
--- /dev/null
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -0,0 +1,936 @@
+/* DVB USB compliant linux driver for
+ *
+ * DM04/QQBOX DVB-S USB BOX	LME2510C + SHARP:BS2F7HZ7395
+ *				LME2510 + LGTDQT-P001F
+ *
+ * MVB7395 (LME2510C+SHARP:BS2F7HZ7395)
+ * SHARP:BS2F7HZ7395 = (STV0288+Sharp IX2505V)
+ *
+ * MV001F (LME2510 +LGTDQY-P001F)
+ * LG TDQY - P001F =(TDA8263 + TDA10086H)
+ *
+ * For firmware see Documentation/dvb/lmedm04.txt
+ *
+ * I2C addresses:
+ * 0xd0 - STV0288	- Demodulator
+ * 0xc0 - Sharp IX2505V	- Tuner
+ * --or--
+ * 0x1c - TDA10086   - Demodulator
+ * 0xc0 - TDA8263    - Tuner
+ *
+ * ***Please Note***
+ *		There are other variants of the DM04
+ *		***NOT SUPPORTED***
+ *		MVB0001F (LME2510C+LGTDQT-P001F)
+ *		MV0194 (LME2510+SHARP0194)
+ *		MVB0194 (LME2510C+SHARP0194)
+ *
+ *
+ * VID = 3344  PID LME2510=1122 LME2510C=1120
+ *
+ * Copyright (C) 2010 Malcolm Priestley (tvboxspy@gmail.com)
+ * LME2510(C)(C) Leaguerme (Shenzhen) MicroElectronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * see Documentation/dvb/README.dvb-usb for more information
+ *
+ * Known Issues :
+ *	LME2510: Non Intel USB chipsets fail to maintain High Speed on
+ * Boot or Hot Plug.
+ *
+ *	DiSEqC functions are not fully supported in this driver. The main
+ * reason is the frontend is cut off during streaming. Allowing frontend
+ * access will stall the driver. Applications that attempt to this, the
+ * commands are ignored.
+ *
+ *	PID functions have been removed from this driver version due to
+ * problems with different firmware and application versions.
+ */
+#define DVB_USB_LOG_PREFIX "LME2510(C)"
+#include <linux/usb.h>
+#include <linux/usb/input.h>
+#include <media/ir-core.h>
+
+#include "dvb-usb.h"
+#include "lmedm04.h"
+#include "tda826x.h"
+#include "tda10086.h"
+#include "stv0288.h"
+#include "ix2505v.h"
+
+
+
+/* debug */
+static int dvb_usb_lme2510_debug;
+#define l_dprintk(var, level, args...) do { \
+	if ((var >= level)) \
+		printk(KERN_DEBUG DVB_USB_LOG_PREFIX ": " args); \
+} while (0)
+
+#define deb_info(level, args...) l_dprintk(dvb_usb_lme2510_debug, level, args)
+#define debug_data_snipet(level, name, p) \
+	 deb_info(level, name" (%02x%02x%02x%02x%02x%02x%02x%02x)", \
+		*p, *(p+1), *(p+2), *(p+3), *(p+4), \
+			*(p+5), *(p+6), *(p+7));
+
+
+module_param_named(debug, dvb_usb_lme2510_debug, int, 0644);
+MODULE_PARM_DESC(debug, "set debugging level (1=info (or-able))."
+			DVB_USB_DEBUG_STATUS);
+
+DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
+#define TUNER_LG 0x1
+#define TUNER_S7395 0x2
+
+struct lme2510_state {
+	u8 id;
+	u8 tuner_config;
+	u8 signal_lock;
+	u8 signal_level;
+	u8 signal_sn;
+	u8 time_key;
+	u8 i2c_talk_onoff;
+	u8 i2c_gate;
+	u8 i2c_tuner_gate_w;
+	u8 i2c_tuner_gate_r;
+	u8 i2c_tuner_addr;
+	void *buffer;
+	struct urb *lme_urb;
+	void *usb_buffer;
+
+};
+
+static int lme2510_bulk_write(struct usb_device *dev,
+				u8 *snd, int len, u8 pipe)
+{
+	int ret, actual_l;
+	ret = usb_bulk_msg(dev, usb_sndbulkpipe(dev, pipe),
+				snd, len , &actual_l, 500);
+	return ret;
+}
+
+static int lme2510_bulk_read(struct usb_device *dev,
+				u8 *rev, int len, u8 pipe)
+{
+	int ret, actual_l;
+
+	ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, pipe),
+				 rev, len , &actual_l, 500);
+	return ret;
+}
+
+static int lme2510_usb_talk(struct dvb_usb_device *d,
+		u8 *wbuf, int wlen, u8 *rbuf, int rlen)
+{
+	struct lme2510_state *st = d->priv;
+	u8 *buff;
+	int ret = 0;
+
+	if (st->usb_buffer == NULL) {
+		st->usb_buffer = kmalloc(512, GFP_KERNEL);
+		if (st->usb_buffer == NULL) {
+			info("MEM Error no memory");
+			return -ENOMEM;
+		}
+	}
+	buff = st->usb_buffer;
+
+	/* the read/write capped at 512 */
+	memcpy(buff, wbuf, (wlen > 512) ? 512 : wlen);
+
+
+	ret = mutex_lock_interruptible(&d->usb_mutex);
+
+	if (ret < 0)
+		return -EAGAIN;
+
+	ret |= usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, 0x01));
+	msleep(5);
+	ret |= lme2510_bulk_write(d->udev, buff, wlen , 0x1);
+
+	msleep(5);
+	ret |= usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, 0x1));
+
+	msleep(5);
+	ret |= lme2510_bulk_read(d->udev, buff, (rlen > 512) ?
+			512 : rlen , 0x1);
+
+	if (rlen > 0)
+		memcpy(rbuf, buff, rlen);
+
+	mutex_unlock(&d->usb_mutex);
+
+	return (ret < 0) ? -ENODEV : 0;
+}
+
+static int lme2510_remote_keypress(struct dvb_usb_adapter *adap, u16 keypress)
+{
+	struct dvb_usb_device *d = adap->dev;
+
+	deb_info(1, "INT Key Keypress =%04x", keypress);
+
+	if (keypress > 0)
+		ir_keydown(d->rc_input_dev, keypress, 0);
+
+	return 0;
+}
+
+static void lme2510_int_response(struct urb *lme_urb)
+{
+	struct dvb_usb_adapter *adap = lme_urb->context;
+	struct lme2510_state *st = adap->dev->priv;
+	static u8 *ibuf, *rbuf;
+	int i = 0, offset;
+
+	switch (lme_urb->status) {
+	case 0:
+	case -ETIMEDOUT:
+		break;
+	case -ECONNRESET:
+	case -ENOENT:
+	case -ESHUTDOWN:
+		return;
+	default:
+		info("Error %x", lme_urb->status);
+		break;
+	}
+
+	rbuf = (u8 *) lme_urb->transfer_buffer;
+
+	offset = ((lme_urb->actual_length/8) > 4)
+			? 4 : (lme_urb->actual_length/8) ;
+
+
+	for (i = 0; i < offset; ++i) {
+		ibuf = (u8 *)&rbuf[i*8];
+		deb_info(5, "INT O/S C =%02x C/O=%02x Type =%02x%02x",
+		offset, i, ibuf[0], ibuf[1]);
+
+		switch (ibuf[0]) {
+		case 0xaa:
+			debug_data_snipet(1, "INT Remote data snipet in", ibuf);
+			lme2510_remote_keypress(adap,
+				(u16)(ibuf[4]<<8)+ibuf[5]);
+			break;
+		case 0xbb:
+			switch (st->tuner_config) {
+			case TUNER_LG:
+				st->signal_lock = ibuf[2];
+				st->signal_level = ibuf[4];
+				st->signal_sn = ibuf[3];
+				st->time_key = ibuf[7];
+				break;
+			case TUNER_S7395:
+				/* Tweak for earlier firmware*/
+				if (ibuf[1] == 0x03) {
+					st->signal_level = ibuf[3];
+					st->signal_sn = ibuf[2];
+				} else {
+					st->signal_level = ibuf[4];
+					st->signal_sn = ibuf[5];
+				}
+				break;
+			default:
+				break;
+			}
+			debug_data_snipet(5, "INT Remote data snipet in", ibuf);
+		break;
+		case 0xcc:
+			debug_data_snipet(1, "INT Control data snipet", ibuf);
+			break;
+		default:
+			debug_data_snipet(1, "INT Unknown data snipet", ibuf);
+		break;
+		}
+	}
+	usb_submit_urb(lme_urb, GFP_ATOMIC);
+}
+
+static int lme2510_int_read(struct dvb_usb_adapter *adap)
+{
+	struct lme2510_state *lme_int = adap->dev->priv;
+
+	lme_int->lme_urb = usb_alloc_urb(0, GFP_ATOMIC);
+
+	if (lme_int->lme_urb == NULL)
+			return -ENOMEM;
+
+	lme_int->buffer = usb_alloc_coherent(adap->dev->udev, 5000, GFP_ATOMIC,
+					&lme_int->lme_urb->transfer_dma);
+
+	if (lme_int->buffer == NULL)
+			return -ENOMEM;
+
+	usb_fill_int_urb(lme_int->lme_urb,
+				adap->dev->udev,
+				usb_rcvintpipe(adap->dev->udev, 0xa),
+				lme_int->buffer,
+				4096,
+				lme2510_int_response,
+				adap,
+				11);
+
+	lme_int->lme_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+	usb_submit_urb(lme_int->lme_urb, GFP_ATOMIC);
+	info("INT Interupt Service Started");
+
+	return 0;
+}
+
+static int lme2510_return_status(struct usb_device *dev)
+{
+	int ret = 0;
+	u8 data[10] = {0};
+	ret |= usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+			0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200);
+	info("Firmware Status: %x (%x)", ret , data[2]);
+
+	return (ret < 0) ? -ENODEV : data[2];
+}
+
+static int lme2510_msg(struct dvb_usb_device *d,
+		u8 *wbuf, int wlen, u8 *rbuf, int rlen)
+{
+	int ret = 0;
+	struct lme2510_state *st = d->priv;
+
+	if (st->i2c_talk_onoff == 1) {
+		if ((wbuf[2] == 0x1c) & (wbuf[3] == 0x0e))
+			msleep(80); /*take your time when waiting for tune*/
+
+		if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
+			return -EAGAIN;
+
+		ret = lme2510_usb_talk(d, wbuf, wlen, rbuf, rlen);
+
+		mutex_unlock(&d->i2c_mutex);
+		switch (st->tuner_config) {
+		case TUNER_S7395:
+			if (wbuf[3] == 0x24)
+				st->signal_lock = rbuf[1];
+			break;
+		default:
+			break;
+		}
+
+	} else {
+		switch (st->tuner_config) {
+		case TUNER_LG:
+			switch (wbuf[3]) {
+			case 0x0e:
+				rbuf[0] = 0x55;
+				rbuf[1] = st->signal_lock;
+				break;
+			case 0x43:
+				rbuf[0] = 0x55;
+				rbuf[1] = st->signal_level;
+				break;
+			case 0x1c:
+				rbuf[0] = 0x55;
+				rbuf[1] = st->signal_sn;
+				break;
+			default:
+				break;
+			}
+			break;
+		case TUNER_S7395:
+			switch (wbuf[3]) {
+			case 0x10:
+				rbuf[0] = 0x55;
+				rbuf[1] = (st->signal_level & 0x80)
+						? 0 : (st->signal_level * 2);
+				break;
+			case 0x2d:
+				rbuf[0] = 0x55;
+				rbuf[1] = st->signal_sn;
+				break;
+			case 0x24:
+				rbuf[0] = 0x55;
+				rbuf[1] = (st->signal_level & 0x80)
+						? 0 : st->signal_lock;
+			default:
+				break;
+			}
+			break;
+		default:
+			break;
+
+		}
+
+		deb_info(4, "I2C From Interupt Message out(%02x) in(%02x)",
+				wbuf[3], rbuf[1]);
+
+	}
+
+	return ret;
+}
+
+
+static int lme2510_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
+				 int num)
+{
+	struct dvb_usb_device *d = i2c_get_adapdata(adap);
+	struct lme2510_state *st = d->priv;
+	static u8 obuf[64], ibuf[512];
+	int i, read, read_o;
+	u16 len;
+	u8 gate = st->i2c_gate;
+
+	if (gate == 0)
+		gate = 5;
+
+	if (num > 2)
+		warn("more than 2 i2c messages"
+			"at a time is not handled yet.	TODO.");
+
+	for (i = 0; i < num; i++) {
+		read_o = 1 & (msg[i].flags & I2C_M_RD);
+		read = i+1 < num && (msg[i+1].flags & I2C_M_RD);
+		read |= read_o;
+		gate = (msg[i].addr == st->i2c_tuner_addr)
+			? (read)	? st->i2c_tuner_gate_r
+					: st->i2c_tuner_gate_w
+			: st->i2c_gate;
+		obuf[0] = gate | (read << 7);
+
+		if (gate == 5)
+			obuf[1] = (read) ? 2 : msg[i].len + 1;
+		else
+			obuf[1] = msg[i].len + read + 1;
+
+		obuf[2] = msg[i].addr;
+		if (read) {
+			if (read_o)
+				len = 3;
+			else {
+				memcpy(&obuf[3], msg[i].buf, msg[i].len);
+				obuf[msg[i].len+3] = msg[i+1].len;
+				len = msg[i].len+4;
+			}
+		} else {
+			memcpy(&obuf[3], msg[i].buf, msg[i].len);
+			len = msg[i].len+3;
+		}
+
+		if (lme2510_msg(d, obuf, len, ibuf, 512) < 0) {
+			deb_info(1, "i2c transfer failed.");
+			return -EAGAIN;
+		}
+
+		if (read) {
+			if (read_o)
+				memcpy(msg[i].buf, &ibuf[1], msg[i].len);
+			else {
+				memcpy(msg[i+1].buf, &ibuf[1], msg[i+1].len);
+				i++;
+			}
+		}
+	}
+	return i;
+}
+
+static u32 lme2510_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C;
+}
+
+static struct i2c_algorithm lme2510_i2c_algo = {
+	.master_xfer   = lme2510_i2c_xfer,
+	.functionality = lme2510_i2c_func,
+};
+
+/* Callbacks for DVB USB */
+static int lme2510_identify_state(struct usb_device *udev,
+		struct dvb_usb_device_properties *props,
+		struct dvb_usb_device_description **desc,
+		int *cold)
+{
+	if (lme2510_return_status(udev) == 0x44)
+		*cold = 1;
+	else
+		*cold = 0;
+	return 0;
+}
+
+static int lme2510_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
+{
+	struct lme2510_state *st = adap->dev->priv;
+	static u8 reset[] =  LME_RESET;
+	static u8 stream_on[] = LME_ST_ON_W;
+	static u8 clear_reg_3[] =  LME_CLEAR_PID;
+	static u8 rbuf[1];
+	int ret = 0, len = 2, rlen = sizeof(rbuf);
+
+	deb_info(1, "STM  (%02x)", onoff);
+
+	if (onoff == 1)	{
+		st->i2c_talk_onoff = 0;
+		msleep(400); /* give enough time for i2c to stop */
+		ret |= lme2510_usb_talk(adap->dev,
+				 stream_on,  len, rbuf, rlen);
+	} else {
+		deb_info(1, "STM Steam Off");
+		if  (st->tuner_config == TUNER_LG)
+			ret |= lme2510_usb_talk(adap->dev, clear_reg_3,
+				sizeof(clear_reg_3), rbuf, rlen);
+		else
+			ret |= lme2510_usb_talk(adap->dev,
+				 reset, sizeof(reset), rbuf, rlen);
+
+		msleep(400);
+		st->i2c_talk_onoff = 1;
+	}
+
+	return (ret < 0) ? -ENODEV : 0;
+}
+
+static int lme2510_int_service(struct dvb_usb_adapter *adap)
+{
+	struct dvb_usb_device *d = adap->dev;
+	struct input_dev *input_dev;
+	char *ir_codes = RC_MAP_LME2510;
+	int ret = 0;
+
+	info("STA Configuring Remote");
+
+	usb_make_path(d->udev, d->rc_phys, sizeof(d->rc_phys));
+
+	strlcat(d->rc_phys, "/ir0", sizeof(d->rc_phys));
+
+	input_dev = input_allocate_device();
+	if (!input_dev)
+		return -ENOMEM;
+
+	input_dev->name = "LME2510 Remote Control";
+	input_dev->phys = d->rc_phys;
+
+	usb_to_input_id(d->udev, &input_dev->id);
+
+	ret |= ir_input_register(input_dev, ir_codes, NULL, "LME 2510");
+
+	if (ret) {
+		input_free_device(input_dev);
+		return ret;
+	}
+
+	d->rc_input_dev = input_dev;
+	/* Start the Interupt */
+	ret = lme2510_int_read(adap);
+
+	if (ret < 0) {
+		ir_input_unregister(input_dev);
+		input_free_device(input_dev);
+	}
+	return (ret < 0) ? -ENODEV : 0;
+}
+
+static u8 check_sum(u8 *p, u8 len)
+{
+	u8 sum = 0;
+	while (len--)
+		sum += *p++;
+	return sum;
+}
+
+static int lme2510_download_firmware(struct usb_device *dev,
+					const struct firmware *fw)
+{
+	int ret = 0;
+	u8 data[512] = {0};
+	u16 j, wlen, len_in, start, end;
+	u8 packet_size, dlen, i;
+	u8 *fw_data;
+
+	packet_size = 0x31;
+	len_in = 1;
+
+
+	info("FRM Starting Firmware Download");
+
+	for (i = 1; i < 3; i++) {
+		start = (i == 1) ? 0 : 512;
+		end = (i == 1) ? 512 : fw->size;
+		for (j = start; j < end; j += (packet_size+1)) {
+			fw_data = (u8 *)(fw->data + j);
+			if ((end - j) > packet_size) {
+				data[0] = i;
+				dlen = packet_size;
+			} else {
+				data[0] = i | 0x80;
+				dlen = (u8)(end - j)-1;
+			}
+		data[1] = dlen;
+		memcpy(&data[2], fw_data, dlen+1);
+		wlen = (u8) dlen + 4;
+		data[wlen-1] = check_sum(fw_data, dlen+1);
+		deb_info(1, "Data S=%02x:E=%02x CS= %02x", data[3],
+				data[dlen+2], data[dlen+3]);
+		ret |= lme2510_bulk_write(dev, data,  wlen, 1);
+		ret |= lme2510_bulk_read(dev, data, len_in , 1);
+		ret |= (data[0] == 0x88) ? 0 : -1;
+		}
+	}
+	usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+			0x06, 0x80, 0x0200, 0x00, data, 0x0109, 1000);
+
+
+	data[0] = 0x8a;
+	len_in = 1;
+	msleep(2000);
+	ret |= lme2510_bulk_write(dev, data , len_in, 1); /*Resetting*/
+	ret |= lme2510_bulk_read(dev, data, len_in, 1);
+	msleep(400);
+
+	if (ret < 0)
+		info("FRM Firmware Download Failed (%04x)" , ret);
+	else
+		info("FRM Firmware Download Completed - Resetting Device");
+
+
+	return (ret < 0) ? -ENODEV : 0;
+}
+
+
+static int lme2510_kill_urb(struct usb_data_stream *stream)
+{
+	int i;
+	for (i = 0; i < stream->urbs_submitted; i++) {
+		deb_info(3, "killing URB no. %d.", i);
+
+		/* stop the URB */
+		usb_kill_urb(stream->urb_list[i]);
+	}
+	stream->urbs_submitted = 0;
+	return 0;
+}
+
+static struct tda10086_config tda10086_config = {
+	.demod_address = 0x1c,
+	.invert = 0,
+	.diseqc_tone = 1,
+	.xtal_freq = TDA10086_XTAL_16M,
+};
+
+static struct stv0288_config lme_config = {
+	.demod_address = 0xd0,
+	.min_delay_ms = 15,
+	.inittab = s7395_inittab,
+};
+
+static struct ix2505v_config lme_tuner = {
+	.tuner_address = 0xc0,
+	.min_delay_ms = 100,
+	.tuner_gain = 0x0,
+	.tuner_chargepump = 0x3,
+};
+
+
+static int dm04_lme2510_set_voltage(struct dvb_frontend *fe,
+					fe_sec_voltage_t voltage)
+{
+	struct dvb_usb_adapter *adap = fe->dvb->priv;
+	struct lme2510_state *st = adap->dev->priv;
+	static u8 voltage_low[]	= LME_VOLTAGE_L;
+	static u8 voltage_high[] = LME_VOLTAGE_H;
+	static u8 reset[] = LME_RESET;
+	static u8 clear_reg_3[] =  LME_CLEAR_PID;
+	static u8 rbuf[1];
+	int ret = 0, len = 3, rlen = 1;
+
+	msleep(100);
+
+	if  (st->tuner_config == TUNER_LG)
+		ret |= lme2510_usb_talk(adap->dev, clear_reg_3,
+			sizeof(clear_reg_3), rbuf, rlen);
+	else
+		ret |= lme2510_usb_talk(adap->dev,
+			 reset, sizeof(reset), rbuf, rlen);
+
+	/*always check & stop streaming*/
+	lme2510_kill_urb(&adap->stream);
+	adap->feedcount = 0;
+
+		switch (voltage) {
+
+		case SEC_VOLTAGE_18:
+			ret |= lme2510_usb_talk(adap->dev,
+				voltage_high, len, rbuf, rlen);
+		break;
+
+		case SEC_VOLTAGE_OFF:
+		case SEC_VOLTAGE_13:
+		default:
+			ret |= lme2510_usb_talk(adap->dev,
+				voltage_low, len, rbuf, rlen);
+		break;
+
+
+	};
+	st->i2c_talk_onoff = 1;
+	return (ret < 0) ? -ENODEV : 0;
+}
+
+static int dm04_lme2510_frontend_attach(struct dvb_usb_adapter *adap)
+{
+	int ret = 0;
+	struct lme2510_state *st = adap->dev->priv;
+
+	/* Interupt Start  */
+	ret = lme2510_int_service(adap);
+	if (ret < 0) {
+		info("INT Unable to start Interupt Service");
+		return -ENODEV;
+	}
+
+	st->i2c_talk_onoff = 1;
+	st->i2c_gate = 4;
+
+	adap->fe = dvb_attach(tda10086_attach, &tda10086_config,
+		&adap->dev->i2c_adap);
+
+	if (adap->fe) {
+		info("TUN Found Frontend TDA10086");
+		memcpy(&adap->fe->ops.info.name,
+				&"DM04_LG_TDQY-P001F DVB-S", 24);
+		adap->fe->ops.set_voltage = dm04_lme2510_set_voltage;
+		st->i2c_tuner_gate_w = 4;
+		st->i2c_tuner_gate_r = 4;
+		st->i2c_tuner_addr = 0xc0;
+		if (dvb_attach(tda826x_attach, adap->fe, 0xc0,
+			&adap->dev->i2c_adap, 1)) {
+			info("TUN TDA8263 Found");
+			st->tuner_config = TUNER_LG;
+			return 0;
+		}
+		kfree(adap->fe);
+		adap->fe = NULL;
+	}
+	st->i2c_gate = 5;
+	adap->fe = dvb_attach(stv0288_attach, &lme_config,
+			&adap->dev->i2c_adap);
+
+	if (adap->fe) {
+		info("FE Found Stv0288");
+		memcpy(&adap->fe->ops.info.name,
+				&"DM04_SHARP:BS2F7HZ7395", 22);
+		adap->fe->ops.set_voltage = dm04_lme2510_set_voltage;
+		st->i2c_tuner_gate_w = 4;
+		st->i2c_tuner_gate_r = 5;
+		st->i2c_tuner_addr = 0xc0;
+		if (dvb_attach(ix2505v_attach , adap->fe, &lme_tuner,
+					&adap->dev->i2c_adap)) {
+			st->tuner_config = TUNER_S7395;
+			info("TUN Sharp IX2505V silicon tuner");
+			return 0;
+		}
+		kfree(adap->fe);
+		adap->fe = NULL;
+	}
+
+	info("DM04 Not Supported");
+	return -ENODEV;
+}
+
+static int lme2510_powerup(struct dvb_usb_device *d, int onoff)
+{
+	struct lme2510_state *st = d->priv;
+	st->i2c_talk_onoff = 1;
+	return 0;
+}
+
+/* DVB USB Driver stuff */
+static struct dvb_usb_device_properties lme2510_properties;
+static struct dvb_usb_device_properties lme2510c_properties;
+
+static int lme2510_probe(struct usb_interface *intf,
+		const struct usb_device_id *id)
+{
+	struct usb_device *udev = interface_to_usbdev(intf);
+	int ret = 0;
+
+	usb_reset_configuration(udev);
+
+	usb_set_interface(udev, intf->cur_altsetting->desc.bInterfaceNumber, 1);
+
+	if (udev->speed != USB_SPEED_HIGH) {
+		ret = usb_reset_device(udev);
+		info("DEV Failed to connect in HIGH SPEED mode");
+		return -ENODEV;
+	}
+
+	if (0 == dvb_usb_device_init(intf, &lme2510_properties,
+				     THIS_MODULE, NULL, adapter_nr)) {
+		info("DEV registering device driver");
+		return 0;
+	}
+	if (0 == dvb_usb_device_init(intf, &lme2510c_properties,
+				     THIS_MODULE, NULL, adapter_nr)) {
+		info("DEV registering device driver");
+		return 0;
+	}
+
+	info("DEV lme2510 Error");
+	return -ENODEV;
+
+}
+
+static struct usb_device_id lme2510_table[] = {
+	{ USB_DEVICE(0x3344, 0x1122) },  /* LME2510 */
+	{ USB_DEVICE(0x3344, 0x1120) },  /* LME2510C */
+	{}		/* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, lme2510_table);
+
+static struct dvb_usb_device_properties lme2510_properties = {
+	.caps = DVB_USB_IS_AN_I2C_ADAPTER,
+	.usb_ctrl = DEVICE_SPECIFIC,
+	.download_firmware = lme2510_download_firmware,
+	.firmware = "dvb-usb-lme2510-lg.fw",
+
+	.size_of_priv = sizeof(struct lme2510_state),
+	.num_adapters = 1,
+	.adapter = {
+		{
+			.streaming_ctrl   = lme2510_streaming_ctrl,
+			.frontend_attach  = dm04_lme2510_frontend_attach,
+			/* parameter for the MPEG2-data transfer */
+			.stream = {
+				.type = USB_BULK,
+				.count = 10,
+				.endpoint = 0x06,
+				.u = {
+					.bulk = {
+						.buffersize = 4096,
+
+					}
+				}
+			}
+		}
+	},
+	.power_ctrl       = lme2510_powerup,
+	.identify_state   = lme2510_identify_state,
+	.i2c_algo         = &lme2510_i2c_algo,
+	.generic_bulk_ctrl_endpoint = 0,
+	.num_device_descs = 1,
+	.devices = {
+		{   "DM04 LME2510 DVB-S USB 2.0",
+			{ &lme2510_table[0], NULL },
+			},
+
+	}
+};
+
+static struct dvb_usb_device_properties lme2510c_properties = {
+	.caps = DVB_USB_IS_AN_I2C_ADAPTER,
+	.usb_ctrl = DEVICE_SPECIFIC,
+	.download_firmware = lme2510_download_firmware,
+	.firmware = "dvb-usb-lme2510c-s7395.fw",
+	.size_of_priv = sizeof(struct lme2510_state),
+	.num_adapters = 1,
+	.adapter = {
+		{
+			.streaming_ctrl   = lme2510_streaming_ctrl,
+			.frontend_attach  = dm04_lme2510_frontend_attach,
+			/* parameter for the MPEG2-data transfer */
+			.stream = {
+				.type = USB_BULK,
+				.count = 8,
+				.endpoint = 0x8,
+				.u = {
+					.bulk = {
+						.buffersize = 4096,
+
+					}
+				}
+			}
+		}
+	},
+	.power_ctrl       = lme2510_powerup,
+	.identify_state   = lme2510_identify_state,
+	.i2c_algo         = &lme2510_i2c_algo,
+	.generic_bulk_ctrl_endpoint = 0,
+	.num_device_descs = 1,
+	.devices = {
+		{   "DM04 LME2510C USB2.0",
+			{ &lme2510_table[1], NULL },
+			},
+	}
+};
+
+void lme2510_exit_int(struct dvb_usb_device *d)
+{
+	struct lme2510_state *st = d->priv;
+	if (st->lme_urb != NULL) {
+		st->i2c_talk_onoff = 0;
+		st->signal_lock = 0;
+		st->signal_level = 0;
+		st->signal_sn = 0;
+		kfree(st->usb_buffer);
+		usb_kill_urb(st->lme_urb);
+		usb_free_coherent(d->udev, 5000, st->buffer,
+				  st->lme_urb->transfer_dma);
+		info("Interupt Service Stopped");
+		ir_input_unregister(d->rc_input_dev);
+		info("Remote Stopped");
+	}
+	return;
+}
+
+void lme2510_exit(struct usb_interface *intf)
+{
+	struct dvb_usb_device *d = usb_get_intfdata(intf);
+	if (d != NULL) {
+		d->adapter[0].feedcount = 0;
+		lme2510_exit_int(d);
+		dvb_usb_device_exit(intf);
+	}
+
+}
+
+static struct usb_driver lme2510_driver = {
+	.name		= "LME2510C_DVBS",
+	.probe		= lme2510_probe,
+	.disconnect	= lme2510_exit,
+	.id_table	= lme2510_table,
+};
+
+/* module stuff */
+static int __init lme2510_module_init(void)
+{
+	int result = usb_register(&lme2510_driver);
+	if (result) {
+		err("usb_register failed. Error number %d", result);
+		return result;
+	}
+
+	return 0;
+}
+
+static void __exit lme2510_module_exit(void)
+{
+	/* deregister this driver from the USB subsystem */
+	usb_deregister(&lme2510_driver);
+}
+
+module_init(lme2510_module_init);
+module_exit(lme2510_module_exit);
+
+MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
+MODULE_DESCRIPTION("LM2510(C) DVB-S USB2.0");
+MODULE_VERSION("1.4");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.h b/drivers/media/dvb/dvb-usb/lmedm04.h
new file mode 100644
index 000000000000..5a66c7ec4511
--- /dev/null
+++ b/drivers/media/dvb/dvb-usb/lmedm04.h
@@ -0,0 +1,187 @@
+/* DVB USB compliant linux driver for
+ *
+ * DM04/QQBOX DVB-S USB BOX	LME2510C + SHARP:BS2F7HZ7395
+ *				LME2510C + LGTDQT-P001F
+ *
+ * MVB7395 (LME2510C+SHARP:BS2F7HZ7395)
+ * SHARP:BS2F7HZ7395 = (STV0288+Sharp IX2505V)
+ *
+ * MVB0001F (LME2510C+LGTDQT-P001F)
+ * LG TDQY - P001F =(TDA8263 + TDA10086H)
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation,  version 2.
+ * *
+ * see Documentation/dvb/README.dvb-usb for more information
+ */
+#ifndef _DVB_USB_LME2510_H_
+#define _DVB_USB_LME2510_H_
+
+/* Streamer &  PID
+ *
+ * Note:	These commands do not actually stop the streaming
+ *		but form some kind of packet filtering/stream count
+ *		or tuning related functions.
+ *  06 XX
+ *  offset 1 = 00 Enable Streaming
+ *
+ *
+ *  PID
+ *  03 XX XX  ----> reg number ---> setting....20 XX
+ *  offset 1 = length
+ *  offset 2 = start of data
+ *  end byte -1 = 20
+ *  end byte = clear pid always a0, other wise 9c, 9a ??
+ *
+ *  RESET (also clears PID filter)
+ *  3a 01 00
+*/
+#define LME_ST_ON_W	{0x06, 0x00}
+#define LME_RESET   {0x3a, 0x01, 0x00}
+#define LME_CLEAR_PID   {0x03, 0x02, 0x20, 0xa0}
+
+
+/* LME Power Control
+ *  07 XX XX
+ *  offset 1 = 01  Power? my device cannot be powered down
+ *  offset 2 = 00=Voltage low 01=Voltage high
+ */
+
+#define LME_VOLTAGE_L	{0x07, 0x01, 0x00}
+#define LME_VOLTAGE_H	{0x07, 0x01, 0x01}
+
+
+/* Initial stv0288 settings for 7395 Frontend */
+static u8 s7395_inittab[] = {
+	0x00, 0x11,
+	0x01, 0x15,
+	0x02, 0x20,
+	0x03, 0x8e,
+	0x04, 0x8e,
+	0x05, 0x12,
+	0x06, 0xff,
+	0x07, 0x20,
+	0x08, 0x00,
+	0x09, 0x00,
+	0x0a, 0x04,
+	0x0b, 0x00,
+	0x0c, 0x00,
+	0x0d, 0x00,
+	0x0e, 0xc1,
+	0x0f, 0x54,
+	0x10, 0x40,
+	0x11, 0x7a,
+	0x12, 0x03,
+	0x13, 0x48,
+	0x14, 0x84,
+	0x15, 0xc5,
+	0x16, 0xb8,
+	0x17, 0x9c,
+	0x18, 0x00,
+	0x19, 0xa6,
+	0x1a, 0x88,
+	0x1b, 0x8f,
+	0x1c, 0xf0,
+	0x1e, 0x80,
+	0x20, 0x0b,
+	0x21, 0x54,
+	0x22, 0xff,
+	0x23, 0x01,
+	0x24, 0x9a,
+	0x25, 0x7f,
+	0x26, 0x00,
+	0x27, 0x00,
+	0x28, 0x46,
+	0x29, 0x66,
+	0x2a, 0x90,
+	0x2b, 0xfa,
+	0x2c, 0xd9,
+	0x2d, 0x02,
+	0x2e, 0xb1,
+	0x2f, 0x00,
+	0x30, 0x0,
+	0x31, 0x1e,
+	0x32, 0x14,
+	0x33, 0x0f,
+	0x34, 0x09,
+	0x35, 0x0c,
+	0x36, 0x05,
+	0x37, 0x2f,
+	0x38, 0x16,
+	0x39, 0xbd,
+	0x3a, 0x0,
+	0x3b, 0x13,
+	0x3c, 0x11,
+	0x3d, 0x30,
+	0x3e, 0x00,
+	0x3f, 0x00,
+	0x40, 0x63,
+	0x41, 0x04,
+	0x42, 0x60,
+	0x43, 0x00,
+	0x44, 0x00,
+	0x45, 0x00,
+	0x46, 0x00,
+	0x47, 0x00,
+	0x4a, 0x00,
+	0x4b, 0xd1,
+	0x4c, 0x33,
+	0x50, 0x12,
+	0x51, 0x36,
+	0x52, 0x21,
+	0x53, 0x94,
+	0x54, 0xb2,
+	0x55, 0x29,
+	0x56, 0x64,
+	0x57, 0x2b,
+	0x58, 0x54,
+	0x59, 0x86,
+	0x5a, 0x00,
+	0x5b, 0x9b,
+	0x5c, 0x08,
+	0x5d, 0x7f,
+	0x5e, 0xff,
+	0x5f, 0x8d,
+	0x70, 0x0,
+	0x71, 0x0,
+	0x72, 0x0,
+	0x74, 0x0,
+	0x75, 0x0,
+	0x76, 0x0,
+	0x81, 0x0,
+	0x82, 0x3f,
+	0x83, 0x3f,
+	0x84, 0x0,
+	0x85, 0x0,
+	0x88, 0x0,
+	0x89, 0x0,
+	0x8a, 0x0,
+	0x8b, 0x0,
+	0x8c, 0x0,
+	0x90, 0x0,
+	0x91, 0x0,
+	0x92, 0x0,
+	0x93, 0x0,
+	0x94, 0x1c,
+	0x97, 0x0,
+	0xa0, 0x48,
+	0xa1, 0x0,
+	0xb0, 0xb8,
+	0xb1, 0x3a,
+	0xb2, 0x10,
+	0xb3, 0x82,
+	0xb4, 0x80,
+	0xb5, 0x82,
+	0xb6, 0x82,
+	0xb7, 0x82,
+	0xb8, 0x20,
+	0xb9, 0x0,
+	0xf0, 0x0,
+	0xf1, 0x0,
+	0xf2, 0xc0,
+	0xff, 0xff,
+};
+
+#endif
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 6c0324eb4914..57281b1008e4 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -96,6 +96,7 @@ void rc_map_init(void);
 #define RC_MAP_KWORLD_315U               "rc-kworld-315u"
 #define RC_MAP_KWORLD_PLUS_TV_ANALOG     "rc-kworld-plus-tv-analog"
 #define RC_MAP_LIRC                      "rc-lirc"
+#define RC_MAP_LME2510                   "rc-lme2510"
 #define RC_MAP_MANLI                     "rc-manli"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
-- 
cgit v1.2.3


From 226c0eeaea6732c686a5f4e06f25e5850cd5dd61 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 6 Aug 2010 12:48:00 -0300
Subject: V4L/DVB: videotext: remove this obsolete API

Remove the vtx (aka videotext aka teletext) API from the v4l2 core.
This API was scheduled for removal in kernel 2.6.35.

The vtx device nodes have been superseded by vbi device nodes
for many years. No applications exist that use the vtx support.
Of the two i2c drivers that actually support this API the saa5249
has been impossible to use for a year now and no known hardware
that supports this device exists. The saa5246a is theoretically
supported by the old mxb boards, but it never actually worked.

In summary: there is no hardware that can use this API and there
are no applications actually implementing this API.

The vtx support still reserves minors 192-223 and we would really
like to reuse those for upcoming new functionality. In the unlikely
event that new hardware appears that wants to use the functionality
provided by the vtx API, then that functionality should be build
around the sliced VBI API instead.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-dev.c |  11 +---
 include/linux/Kbuild           |   1 -
 include/linux/videotext.h      | 125 -----------------------------------------
 include/media/v4l2-dev.h       |   3 +-
 4 files changed, 2 insertions(+), 138 deletions(-)
 delete mode 100644 include/linux/videotext.h

(limited to 'include')

diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index cb77197d480e..d4a353260c1e 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -81,7 +81,7 @@ static inline unsigned long *devnode_bits(int vfl_type)
 	/* Any types not assigned to fixed minor ranges must be mapped to
 	   one single bitmap for the purposes of finding a free node number
 	   since all those unassigned types use the same minor range. */
-	int idx = (vfl_type > VFL_TYPE_VTX) ? VFL_TYPE_MAX - 1 : vfl_type;
+	int idx = (vfl_type > VFL_TYPE_RADIO) ? VFL_TYPE_MAX - 1 : vfl_type;
 
 	return devnode_nums[idx];
 }
@@ -377,8 +377,6 @@ static int get_index(struct video_device *vdev)
  *
  *	%VFL_TYPE_GRABBER - A frame grabber
  *
- *	%VFL_TYPE_VTX - A teletext device
- *
  *	%VFL_TYPE_VBI - Vertical blank data (undecoded)
  *
  *	%VFL_TYPE_RADIO - A radio card
@@ -411,9 +409,6 @@ static int __video_register_device(struct video_device *vdev, int type, int nr,
 	case VFL_TYPE_GRABBER:
 		name_base = "video";
 		break;
-	case VFL_TYPE_VTX:
-		name_base = "vtx";
-		break;
 	case VFL_TYPE_VBI:
 		name_base = "vbi";
 		break;
@@ -451,10 +446,6 @@ static int __video_register_device(struct video_device *vdev, int type, int nr,
 		minor_offset = 64;
 		minor_cnt = 64;
 		break;
-	case VFL_TYPE_VTX:
-		minor_offset = 192;
-		minor_cnt = 32;
-		break;
 	case VFL_TYPE_VBI:
 		minor_offset = 224;
 		minor_cnt = 32;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4e8ea8c8ec1e..38a9f502baef 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -371,7 +371,6 @@ header-y += veth.h
 header-y += vhost.h
 header-y += videodev.h
 header-y += videodev2.h
-header-y += videotext.h
 header-y += virtio_9p.h
 header-y += virtio_balloon.h
 header-y += virtio_blk.h
diff --git a/include/linux/videotext.h b/include/linux/videotext.h
deleted file mode 100644
index 3e68c8d1c7f7..000000000000
--- a/include/linux/videotext.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef _VTX_H
-#define _VTX_H
-
-/*
- * Teletext (=Videotext) hardware decoders using interface /dev/vtx
- * Do not confuse with drivers using /dev/vbi which decode videotext by software
- *
- * Videotext IOCTLs changed in order to use _IO() macros defined in <linux/ioctl.h>,
- * unused tuner IOCTLs cleaned up by
- * Michael Geng <linux@MichaelGeng.de>
- *
- * Copyright (c) 1994-97 Martin Buck  <martin-2.buck@student.uni-ulm.de>
- * Read COPYING for more information
- *
- */
-
-
-/*
- *	Videotext ioctls
- */
-#define VTXIOCGETINFO	_IOR  (0x81,  1, vtx_info_t)
-#define VTXIOCCLRPAGE	_IOW  (0x81,  2, vtx_pagereq_t)
-#define VTXIOCCLRFOUND	_IOW  (0x81,  3, vtx_pagereq_t)
-#define VTXIOCPAGEREQ	_IOW  (0x81,  4, vtx_pagereq_t)
-#define VTXIOCGETSTAT	_IOW  (0x81,  5, vtx_pagereq_t)
-#define VTXIOCGETPAGE	_IOW  (0x81,  6, vtx_pagereq_t)
-#define VTXIOCSTOPDAU	_IOW  (0x81,  7, vtx_pagereq_t)
-#define VTXIOCPUTPAGE	_IO   (0x81,  8)
-#define VTXIOCSETDISP	_IO   (0x81,  9)
-#define VTXIOCPUTSTAT	_IO   (0x81, 10)
-#define VTXIOCCLRCACHE	_IO   (0x81, 11)
-#define VTXIOCSETVIRT	_IOW  (0x81, 12, long)
-
-/* for compatibility, will go away some day */
-#define VTXIOCGETINFO_OLD  0x7101  /* get version of driver & capabilities of vtx-chipset */
-#define VTXIOCCLRPAGE_OLD  0x7102  /* clear page-buffer */
-#define VTXIOCCLRFOUND_OLD 0x7103  /* clear bits indicating that page was found */
-#define VTXIOCPAGEREQ_OLD  0x7104  /* search for page */
-#define VTXIOCGETSTAT_OLD  0x7105  /* get status of page-buffer */
-#define VTXIOCGETPAGE_OLD  0x7106  /* get contents of page-buffer */
-#define VTXIOCSTOPDAU_OLD  0x7107  /* stop data acquisition unit */
-#define VTXIOCPUTPAGE_OLD  0x7108  /* display page on TV-screen */
-#define VTXIOCSETDISP_OLD  0x7109  /* set TV-mode */
-#define VTXIOCPUTSTAT_OLD  0x710a  /* set status of TV-output-buffer */
-#define VTXIOCCLRCACHE_OLD 0x710b  /* clear cache on VTX-interface (if avail.) */
-#define VTXIOCSETVIRT_OLD  0x710c  /* turn on virtual mode (this disables TV-display) */
-
-/*
- *	Definitions for VTXIOCGETINFO
- */
-
-#define SAA5243 0
-#define SAA5246 1
-#define SAA5249 2
-#define SAA5248 3
-#define XSTV5346 4
-
-typedef struct {
-	int version_major, version_minor;	/* version of driver; if version_major changes, driver */
-						/* is not backward compatible!!! CHECK THIS!!! */
-	int numpages;				/* number of page-buffers of vtx-chipset */
-	int cct_type;				/* type of vtx-chipset (SAA5243, SAA5246, SAA5248 or
-						 * SAA5249) */
-}
-vtx_info_t;
-
-
-/*
- *	Definitions for VTXIOC{CLRPAGE,CLRFOUND,PAGEREQ,GETSTAT,GETPAGE,STOPDAU,PUTPAGE,SETDISP}
- */
-
-#define MIN_UNIT   (1<<0)
-#define MIN_TEN    (1<<1)
-#define HR_UNIT    (1<<2)
-#define HR_TEN     (1<<3)
-#define PG_UNIT    (1<<4)
-#define PG_TEN     (1<<5)
-#define PG_HUND    (1<<6)
-#define PGMASK_MAX (1<<7)
-#define PGMASK_PAGE (PG_HUND | PG_TEN | PG_UNIT)
-#define PGMASK_HOUR (HR_TEN | HR_UNIT)
-#define PGMASK_MINUTE (MIN_TEN | MIN_UNIT)
-
-typedef struct
-{
-	int page;	/* number of requested page (hexadecimal) */
-	int hour;	/* requested hour (hexadecimal) */
-	int minute;	/* requested minute (hexadecimal) */
-	int pagemask;	/* mask defining which values of the above are set */
-	int pgbuf;	/* buffer where page will be stored */
-	int start;	/* start of requested part of page */
-	int end;	/* end of requested part of page */
-	void __user *buffer;	/* pointer to beginning of destination buffer */
-}
-vtx_pagereq_t;
-
-
-/*
- *	Definitions for VTXIOC{GETSTAT,PUTSTAT}
- */
-
-#define VTX_PAGESIZE (40 * 24)
-#define VTX_VIRTUALSIZE (40 * 49)
-
-typedef struct
-{
-	int pagenum;			/* number of page (hexadecimal) */
-	int hour;			/* hour (hexadecimal) */
-	int minute;			/* minute (hexadecimal) */
-	int charset;			/* national charset */
-	unsigned delete : 1;		/* delete page (C4) */
-	unsigned headline : 1;		/* insert headline (C5) */
-	unsigned subtitle : 1;		/* insert subtitle (C6) */
-	unsigned supp_header : 1;	/* suppress header (C7) */
-	unsigned update : 1;		/* update page (C8) */
-	unsigned inter_seq : 1;		/* interrupted sequence (C9) */
-	unsigned dis_disp : 1;		/* disable/suppress display (C10) */
-	unsigned serial : 1;		/* serial mode (C11) */
-	unsigned notfound : 1;		/* /FOUND */
-	unsigned pblf : 1;		/* PBLF */
-	unsigned hamming : 1;		/* hamming-error occurred */
-}
-vtx_pageinfo_t;
-
-#endif /* _VTX_H */
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 1efcacbed01a..8ad4f9f93fe2 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -21,8 +21,7 @@
 #define VFL_TYPE_GRABBER	0
 #define VFL_TYPE_VBI		1
 #define VFL_TYPE_RADIO		2
-#define VFL_TYPE_VTX		3
-#define VFL_TYPE_MAX		4
+#define VFL_TYPE_MAX		3
 
 struct v4l2_ioctl_callbacks;
 struct video_device;
-- 
cgit v1.2.3


From 008d35f2f5256751a18f1f4aea79e3caf140098d Mon Sep 17 00:00:00 2001
From: Jean-François Moine <moinejf@free.fr>
Date: Mon, 13 Sep 2010 07:04:49 -0300
Subject: V4L/DVB: v4l2: Add illuminator controls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some media devices (microscopes) may have one or many illuminators.
This patch makes them controlable by the applications.

Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
Acked-by: Andy Walls <awalls@md.metrocast.net>
Signed-off-by: Jean-François Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/controls.xml | 9 ++++++++-
 drivers/media/video/v4l2-ctrls.c       | 4 ++++
 include/linux/videodev2.h              | 5 ++++-
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/v4l/controls.xml b/Documentation/DocBook/v4l/controls.xml
index 8408caaee276..68c83449d7c9 100644
--- a/Documentation/DocBook/v4l/controls.xml
+++ b/Documentation/DocBook/v4l/controls.xml
@@ -311,11 +311,18 @@ minimum value disables backlight compensation.</entry>
 	    bits 8-15 Green color information, bits 16-23 Blue color
 	    information and bits 24-31 must be zero.</entry>
 	  </row>
+	  <row>
+	    <entry><constant>V4L2_CID_ILLUMINATORS_1</constant>
+		<constant>V4L2_CID_ILLUMINATORS_2</constant></entry>
+	    <entry>boolean</entry>
+	    <entry>Switch on or off the illuminator 1 or 2 of the device
+		(usually a microscope).</entry>
+	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_LASTP1</constant></entry>
 	    <entry></entry>
 	    <entry>End of the predefined control IDs (currently
-<constant>V4L2_CID_BG_COLOR</constant> + 1).</entry>
+<constant>V4L2_CID_ILLUMINATORS_2</constant> + 1).</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_PRIVATE_BASE</constant></entry>
diff --git a/drivers/media/video/v4l2-ctrls.c b/drivers/media/video/v4l2-ctrls.c
index ea8d32cd425d..9d2502cd03ff 100644
--- a/drivers/media/video/v4l2-ctrls.c
+++ b/drivers/media/video/v4l2-ctrls.c
@@ -305,6 +305,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_ROTATE:			return "Rotate";
 	case V4L2_CID_BG_COLOR:			return "Background Color";
 	case V4L2_CID_CHROMA_GAIN:		return "Chroma Gain";
+	case V4L2_CID_ILLUMINATORS_1:		return "Illuminator 1";
+	case V4L2_CID_ILLUMINATORS_2:		return "Illuminator 2";
 
 	/* MPEG controls */
 	/* Keep the order of the 'case's the same as in videodev2.h! */
@@ -419,6 +421,8 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_AUDIO_LIMITER_ENABLED:
 	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
 	case V4L2_CID_PILOT_TONE_ENABLED:
+	case V4L2_CID_ILLUMINATORS_1:
+	case V4L2_CID_ILLUMINATORS_2:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2d5ce17ca472..b06479f63271 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1047,8 +1047,11 @@ enum v4l2_colorfx {
 
 #define V4L2_CID_CHROMA_GAIN                    (V4L2_CID_BASE+36)
 
+#define V4L2_CID_ILLUMINATORS_1			(V4L2_CID_BASE+37)
+#define V4L2_CID_ILLUMINATORS_2			(V4L2_CID_BASE+38)
+
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+37)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+39)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From 1f2052539666bd8b43ed26c9b1c3687628c49ecc Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 15 Sep 2010 16:36:07 -0300
Subject: V4L/DVB: v4l: remove obsolete include/media/v4l2-i2c-drv.h file

The include/media/v4l2-i2c-drv.h header was used to be able to compile drivers
in the v4l-dvb hg repository for legacy kernels (mainly pre-2.6.26) without
creating an #ifdef mess.

The hg repository dropped support for kernels < 2.6.26 so we can remove this
header. All i2c drivers that used it have now been converted to use proper
i2c code. The header was a hack, but it did its job well. So I would call
this an honorable removal. :-)

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-i2c-drv.h | 80 --------------------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 include/media/v4l2-i2c-drv.h

(limited to 'include')

diff --git a/include/media/v4l2-i2c-drv.h b/include/media/v4l2-i2c-drv.h
deleted file mode 100644
index 74bf741d1a9b..000000000000
--- a/include/media/v4l2-i2c-drv.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * v4l2-i2c-drv.h - contains I2C handling code that's identical for
- *		    all V4L2 I2C drivers. Use this header if the
- *		    I2C driver is only used by drivers converted
- *		    to the bus-based I2C API.
- *
- * Copyright (C) 2007 Hans Verkuil <hverkuil@xs4all.nl>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* NOTE: the full version of this header is in the v4l-dvb repository
- * and allows v4l i2c drivers to be compiled on pre-2.6.26 kernels.
- * The version of this header as it appears in the kernel is a stripped
- * version (without all the backwards compatibility stuff) and so it
- * looks a bit odd.
- *
- * If you look at the full version then you will understand the reason
- * for introducing this header since you really don't want to have all
- * the tricky backwards compatibility code in each and every i2c driver.
- *
- * If the i2c driver will never be compiled for pre-2.6.26 kernels, then
- * DO NOT USE this header! Just write it as a regular i2c driver.
- */
-
-#ifndef __V4L2_I2C_DRV_H__
-#define __V4L2_I2C_DRV_H__
-
-#include <media/v4l2-common.h>
-
-struct v4l2_i2c_driver_data {
-	const char * const name;
-	int (*command)(struct i2c_client *client, unsigned int cmd, void *arg);
-	int (*probe)(struct i2c_client *client, const struct i2c_device_id *id);
-	int (*remove)(struct i2c_client *client);
-	int (*suspend)(struct i2c_client *client, pm_message_t state);
-	int (*resume)(struct i2c_client *client);
-	const struct i2c_device_id *id_table;
-};
-
-static struct v4l2_i2c_driver_data v4l2_i2c_data;
-static struct i2c_driver v4l2_i2c_driver;
-
-
-/* Bus-based I2C implementation for kernels >= 2.6.26 */
-
-static int __init v4l2_i2c_drv_init(void)
-{
-	v4l2_i2c_driver.driver.name = v4l2_i2c_data.name;
-	v4l2_i2c_driver.command = v4l2_i2c_data.command;
-	v4l2_i2c_driver.probe = v4l2_i2c_data.probe;
-	v4l2_i2c_driver.remove = v4l2_i2c_data.remove;
-	v4l2_i2c_driver.suspend = v4l2_i2c_data.suspend;
-	v4l2_i2c_driver.resume = v4l2_i2c_data.resume;
-	v4l2_i2c_driver.id_table = v4l2_i2c_data.id_table;
-	return i2c_add_driver(&v4l2_i2c_driver);
-}
-
-
-static void __exit v4l2_i2c_drv_cleanup(void)
-{
-	i2c_del_driver(&v4l2_i2c_driver);
-}
-
-module_init(v4l2_i2c_drv_init);
-module_exit(v4l2_i2c_drv_cleanup);
-
-#endif /* __V4L2_I2C_DRV_H__ */
-- 
cgit v1.2.3


From c0c46826274a4da5d9e312d7cfd4ca0806c0a358 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 22 Sep 2010 23:24:04 -0300
Subject: V4L/DVB: bttv: Move PV951 IR to the right driver

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/bt8xx/bttv-i2c.c   | 38 ---------------
 drivers/media/video/bt8xx/bttv-input.c | 84 +++++++++++++++++++++++++++++++---
 drivers/media/video/bt8xx/bttv.h       |  1 -
 drivers/media/video/bt8xx/bttvp.h      | 13 ++++--
 drivers/media/video/ir-kbd-i2c.c       | 29 ------------
 include/media/ir-kbd-i2c.h             |  1 -
 6 files changed, 87 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c
index 685d6597ee79..d502f4106e56 100644
--- a/drivers/media/video/bt8xx/bttv-i2c.c
+++ b/drivers/media/video/bt8xx/bttv-i2c.c
@@ -390,41 +390,3 @@ int __devinit init_bttv_i2c(struct bttv *btv)
 
 	return btv->i2c_rc;
 }
-
-/* Instantiate the I2C IR receiver device, if present */
-void __devinit init_bttv_i2c_ir(struct bttv *btv)
-{
-	if (0 == btv->i2c_rc) {
-		struct i2c_board_info info;
-		/* The external IR receiver is at i2c address 0x34 (0x35 for
-		   reads).  Future Hauppauge cards will have an internal
-		   receiver at 0x30 (0x31 for reads).  In theory, both can be
-		   fitted, and Hauppauge suggest an external overrides an
-		   internal.
-
-		   That's why we probe 0x1a (~0x34) first. CB
-		*/
-		const unsigned short addr_list[] = {
-			0x1a, 0x18, 0x4b, 0x64, 0x30, 0x71,
-			I2C_CLIENT_END
-		};
-
-		memset(&info, 0, sizeof(struct i2c_board_info));
-		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
-		i2c_new_probed_device(&btv->c.i2c_adap, &info, addr_list, NULL);
-	}
-}
-
-int __devexit fini_bttv_i2c(struct bttv *btv)
-{
-	if (0 != btv->i2c_rc)
-		return 0;
-
-	return i2c_del_adapter(&btv->c.i2c_adap);
-}
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/drivers/media/video/bt8xx/bttv-input.c b/drivers/media/video/bt8xx/bttv-input.c
index f68717a4bdec..6bf05a7dc5f9 100644
--- a/drivers/media/video/bt8xx/bttv-input.c
+++ b/drivers/media/video/bt8xx/bttv-input.c
@@ -245,6 +245,83 @@ static void bttv_ir_stop(struct bttv *btv)
 	}
 }
 
+/*
+ * Get_key functions used by I2C remotes
+ */
+
+static int get_key_pv951(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
+{
+	unsigned char b;
+
+	/* poll IR chip */
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
+		dprintk(KERN_INFO DEVNAME ": read error\n");
+		return -EIO;
+	}
+
+	/* ignore 0xaa */
+	if (b==0xaa)
+		return 0;
+	dprintk(KERN_INFO DEVNAME ": key %02x\n", b);
+
+	*ir_key = b;
+	*ir_raw = b;
+	return 1;
+}
+
+/* Instantiate the I2C IR receiver device, if present */
+void __devinit init_bttv_i2c_ir(struct bttv *btv)
+{
+	const unsigned short addr_list[] = {
+		0x1a, 0x18, 0x64, 0x30, 0x71,
+		I2C_CLIENT_END
+	};
+	struct i2c_board_info info;
+
+	if (0 != btv->i2c_rc)
+		return;
+
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	memset(&btv->init_data, 0, sizeof(btv->init_data));
+	strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+
+	switch (btv->c.type) {
+	case BTTV_BOARD_PV951:
+		btv->init_data.name = "PV951";
+		btv->init_data.get_key = get_key_pv951;
+		btv->init_data.ir_codes = RC_MAP_PV951;
+		btv->init_data.type = IR_TYPE_OTHER;
+		info.addr = 0x4b;
+		break;
+	default:
+		/*
+		 * The external IR receiver is at i2c address 0x34 (0x35 for
+                 * reads).  Future Hauppauge cards will have an internal
+                 * receiver at 0x30 (0x31 for reads).  In theory, both can be
+                 * fitted, and Hauppauge suggest an external overrides an
+                 * internal.
+		 * That's why we probe 0x1a (~0x34) first. CB
+		 */
+
+		i2c_new_probed_device(&btv->c.i2c_adap, &info, addr_list, NULL);
+		return;
+	}
+
+	if (btv->init_data.name)
+		info.platform_data = &btv->init_data;
+	i2c_new_device(&btv->c.i2c_adap, &info);
+
+	return;
+}
+
+int __devexit fini_bttv_i2c(struct bttv *btv)
+{
+	if (0 != btv->i2c_rc)
+		return 0;
+
+	return i2c_del_adapter(&btv->c.i2c_adap);
+}
+
 int bttv_input_init(struct bttv *btv)
 {
 	struct card_ir *ir;
@@ -420,10 +497,3 @@ void bttv_input_fini(struct bttv *btv)
 	kfree(btv->remote);
 	btv->remote = NULL;
 }
-
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/drivers/media/video/bt8xx/bttv.h b/drivers/media/video/bt8xx/bttv.h
index 3ec2402c6b4a..6fd2a8ebda1e 100644
--- a/drivers/media/video/bt8xx/bttv.h
+++ b/drivers/media/video/bt8xx/bttv.h
@@ -18,7 +18,6 @@
 #include <linux/i2c.h>
 #include <media/v4l2-device.h>
 #include <media/ir-common.h>
-#include <media/ir-kbd-i2c.h>
 #include <media/i2c-addr.h>
 #include <media/tuner.h>
 
diff --git a/drivers/media/video/bt8xx/bttvp.h b/drivers/media/video/bt8xx/bttvp.h
index 6cccc2a17eee..d1e26a448ed2 100644
--- a/drivers/media/video/bt8xx/bttvp.h
+++ b/drivers/media/video/bt8xx/bttvp.h
@@ -42,7 +42,7 @@
 #include <media/videobuf-dma-sg.h>
 #include <media/tveeprom.h>
 #include <media/ir-common.h>
-
+#include <media/ir-kbd-i2c.h>
 
 #include "bt848.h"
 #include "bttv.h"
@@ -270,6 +270,12 @@ int bttv_sub_del_devices(struct bttv_core *core);
 
 extern int no_overlay;
 
+/* ---------------------------------------------------------- */
+/* bttv-input.c                                               */
+
+extern void init_bttv_i2c_ir(struct bttv *btv);
+extern int fini_bttv_i2c(struct bttv *btv);
+
 /* ---------------------------------------------------------- */
 /* bttv-driver.c                                              */
 
@@ -279,8 +285,6 @@ extern unsigned int bttv_debug;
 extern unsigned int bttv_gpio;
 extern void bttv_gpio_tracking(struct bttv *btv, char *comment);
 extern int init_bttv_i2c(struct bttv *btv);
-extern void init_bttv_i2c_ir(struct bttv *btv);
-extern int fini_bttv_i2c(struct bttv *btv);
 
 #define bttv_printk if (bttv_verbose) printk
 #define dprintk  if (bttv_debug >= 1) printk
@@ -366,6 +370,9 @@ struct bttv {
 	int has_remote;
 	struct card_ir *remote;
 
+	/* I2C remote data */
+	struct IR_i2c_init_data    init_data;
+
 	/* locking */
 	spinlock_t s_lock;
 	struct mutex lock;
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index ece6e15e4c07..02fbd08112c9 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -146,26 +146,6 @@ static int get_key_pixelview(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	return 1;
 }
 
-static int get_key_pv951(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
-{
-	unsigned char b;
-
-	/* poll IR chip */
-	if (1 != i2c_master_recv(ir->c, &b, 1)) {
-		dprintk(1,"read error\n");
-		return -EIO;
-	}
-
-	/* ignore 0xaa */
-	if (b==0xaa)
-		return 0;
-	dprintk(2,"key %02x\n", b);
-
-	*ir_key = b;
-	*ir_raw = b;
-	return 1;
-}
-
 static int get_key_fusionhdtv(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 {
 	unsigned char buf[4];
@@ -321,12 +301,6 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		ir_type     = IR_TYPE_OTHER;
 		ir_codes    = RC_MAP_EMPTY;
 		break;
-	case 0x4b:
-		name        = "PV951";
-		ir->get_key = get_key_pv951;
-		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = RC_MAP_PV951;
-		break;
 	case 0x18:
 	case 0x1f:
 	case 0x1a:
@@ -396,9 +370,6 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		case IR_KBD_GET_KEY_PIXELVIEW:
 			ir->get_key = get_key_pixelview;
 			break;
-		case IR_KBD_GET_KEY_PV951:
-			ir->get_key = get_key_pv951;
-			break;
 		case IR_KBD_GET_KEY_HAUP:
 			ir->get_key = get_key_haup;
 			break;
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 5e96d7a430be..4102f0dd5c40 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -24,7 +24,6 @@ struct IR_i2c {
 enum ir_kbd_get_key_fn {
 	IR_KBD_GET_KEY_CUSTOM = 0,
 	IR_KBD_GET_KEY_PIXELVIEW,
-	IR_KBD_GET_KEY_PV951,
 	IR_KBD_GET_KEY_HAUP,
 	IR_KBD_GET_KEY_KNC1,
 	IR_KBD_GET_KEY_FUSIONHDTV,
-- 
cgit v1.2.3


From c72ba8e6ae7376d20e509a9a54a2dd45fb483fc2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 23 Sep 2010 01:23:10 -0300
Subject: V4L/DVB: saa7134: get rid of I2C_HW_SAA7134

The only reason for keeping I2C_HW_SAA7134 is to allow setting a
per-device polling interval. Just move this info to the platform
data, allowing drivers to change it per device, where needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ir-kbd-i2c.c            | 12 +++++-------
 drivers/media/video/saa7134/saa7134-i2c.c   |  1 -
 drivers/media/video/saa7134/saa7134-input.c |  5 +++++
 include/media/ir-kbd-i2c.h                  |  9 +++++++--
 4 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 91b2c88d4d72..5a000c65ae98 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -259,15 +259,9 @@ static void ir_key_poll(struct IR_i2c *ir)
 static void ir_work(struct work_struct *work)
 {
 	struct IR_i2c *ir = container_of(work, struct IR_i2c, work.work);
-	int polling_interval = 100;
-
-	/* MSI TV@nywhere Plus requires more frequent polling
-	   otherwise it will miss some keypresses */
-	if (ir->c->adapter->id == I2C_HW_SAA7134 && ir->c->addr == 0x30)
-		polling_interval = 50;
 
 	ir_key_poll(ir);
-	schedule_delayed_work(&ir->work, msecs_to_jiffies(polling_interval));
+	schedule_delayed_work(&ir->work, msecs_to_jiffies(ir->polling_interval));
 }
 
 /* ----------------------------------------------------------------------- */
@@ -292,6 +286,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	ir->c = client;
 	ir->input = input_dev;
+	ir->polling_interval = DEFAULT_POLLING_INTERVAL;
 	i2c_set_clientdata(client, ir);
 
 	switch(addr) {
@@ -343,6 +338,9 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		if (init_data->type)
 			ir_type = init_data->type;
 
+		if (init_data->polling_interval)
+			ir->polling_interval = init_data->polling_interval;
+
 		switch (init_data->internal_get_key_func) {
 		case IR_KBD_GET_KEY_CUSTOM:
 			/* The bridge driver provided us its own function */
diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c
index da41b6b1e64a..2d3f6d265bbf 100644
--- a/drivers/media/video/saa7134/saa7134-i2c.c
+++ b/drivers/media/video/saa7134/saa7134-i2c.c
@@ -328,7 +328,6 @@ static struct i2c_algorithm saa7134_algo = {
 static struct i2c_adapter saa7134_adap_template = {
 	.owner         = THIS_MODULE,
 	.name          = "saa7134",
-	.id            = I2C_HW_SAA7134,
 	.algo          = &saa7134_algo,
 };
 
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 0b336ca6d55b..52a1ee5aefd0 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -959,6 +959,11 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 		dev->init_data.name = "MSI TV@nywhere Plus";
 		dev->init_data.get_key = get_key_msi_tvanywhere_plus;
 		dev->init_data.ir_codes = RC_MAP_MSI_TVANYWHERE_PLUS;
+		/*
+		 * MSI TV@nyware Plus requires more frequent polling
+		 * otherwise it will miss some keypresses
+		 */
+		dev->init_data.polling_interval = 50;
 		info.addr = 0x30;
 		/* MSI TV@nywhere Plus controller doesn't seem to
 		   respond to probes unless we read something from
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 4102f0dd5c40..557c676ab7dc 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -3,6 +3,8 @@
 
 #include <media/ir-common.h>
 
+#define DEFAULT_POLLING_INTERVAL	100	/* ms */
+
 struct IR_i2c;
 
 struct IR_i2c {
@@ -15,6 +17,8 @@ struct IR_i2c {
 	/* Used to avoid fast repeating */
 	unsigned char          old;
 
+	u32                    polling_interval; /* in ms */
+
 	struct delayed_work    work;
 	char                   name[32];
 	char                   phys[32];
@@ -34,8 +38,9 @@ enum ir_kbd_get_key_fn {
 /* Can be passed when instantiating an ir_video i2c device */
 struct IR_i2c_init_data {
 	char			*ir_codes;
-	const char             *name;
-	u64          type; /* IR_TYPE_RC5, etc */
+	const char		*name;
+	u64			type; /* IR_TYPE_RC5, etc */
+	u32			polling_interval; /* 0 means DEFAULT_POLLING_INTERVAL */
 	/*
 	 * Specify either a function pointer or a value indicating one of
 	 * ir_kbd_i2c's internal get_key functions
-- 
cgit v1.2.3


From ee08940531193ccce680ca3c2f17ecc497c4bb67 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 15 Sep 2010 15:31:12 -0300
Subject: V4L/DVB: IR: export ir_keyup so imon driver can use it directly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The imon driver currently reimplements its own version of ir_keyup
(along with key release timer functionality also already present in the
core IR code). A follow-up imon patch will make use of ir_keyup and the
IR stack's key release code.

Trivial extraction from David Härdeman's pending rc-core merge and
device interface abstraction patchset to facilitate merging a patch
based on his imon input dev split patch ahead of the larger churn, which
is slated for post-2.6.37-rc1 (after Dmitry's large keycode patches are
merged in mainline).

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/ir-keytable.c | 3 ++-
 include/media/ir-core.h        | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c
index 7961d59f5cac..59510cd33419 100644
--- a/drivers/media/IR/ir-keytable.c
+++ b/drivers/media/IR/ir-keytable.c
@@ -285,7 +285,7 @@ EXPORT_SYMBOL_GPL(ir_g_keycode_from_table);
  * This routine is used to signal that a key has been released on the
  * remote control. It reports a keyup input event via input_report_key().
  */
-static void ir_keyup(struct ir_input_dev *ir)
+void ir_keyup(struct ir_input_dev *ir)
 {
 	if (!ir->keypressed)
 		return;
@@ -295,6 +295,7 @@ static void ir_keyup(struct ir_input_dev *ir)
 	input_sync(ir->input_dev);
 	ir->keypressed = false;
 }
+EXPORT_SYMBOL_GPL(ir_keyup);
 
 /**
  * ir_timer_keyup() - generates a keyup event after a timeout
diff --git a/include/media/ir-core.h b/include/media/ir-core.h
index eb7fddf8f607..4dd43d44ec5e 100644
--- a/include/media/ir-core.h
+++ b/include/media/ir-core.h
@@ -157,6 +157,7 @@ void ir_input_unregister(struct input_dev *input_dev);
 
 void ir_repeat(struct input_dev *dev);
 void ir_keydown(struct input_dev *dev, int scancode, u8 toggle);
+void ir_keyup(struct ir_input_dev *ir);
 u32 ir_g_keycode_from_table(struct input_dev *input_dev, u32 scancode);
 
 /* From ir-raw-event.c */
-- 
cgit v1.2.3


From c29fcff3daafbf46d64a543c1950bbd206ad8c1c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 26 Sep 2010 08:20:13 -0300
Subject: V4L/DVB: v4l2-dev: remove get_unmapped_area

The get_unmapped_area file operation is unused. Remove.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-dev.c | 18 ------------------
 include/media/v4l2-dev.h       |  2 --
 2 files changed, 20 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index f069c61cdf68..5a54eabd4c42 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -236,23 +236,6 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return ret;
 }
 
-#ifdef CONFIG_MMU
-#define v4l2_get_unmapped_area NULL
-#else
-static unsigned long v4l2_get_unmapped_area(struct file *filp,
-		unsigned long addr, unsigned long len, unsigned long pgoff,
-		unsigned long flags)
-{
-	struct video_device *vdev = video_devdata(filp);
-
-	if (!vdev->fops->get_unmapped_area)
-		return -ENOSYS;
-	if (!video_is_registered(vdev))
-		return -ENODEV;
-	return vdev->fops->get_unmapped_area(filp, addr, len, pgoff, flags);
-}
-#endif
-
 static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm)
 {
 	struct video_device *vdev = video_devdata(filp);
@@ -309,7 +292,6 @@ static const struct file_operations v4l2_fops = {
 	.read = v4l2_read,
 	.write = v4l2_write,
 	.open = v4l2_open,
-	.get_unmapped_area = v4l2_get_unmapped_area,
 	.mmap = v4l2_mmap,
 	.unlocked_ioctl = v4l2_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 8ad4f9f93fe2..ba236ff35c8a 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -41,8 +41,6 @@ struct v4l2_file_operations {
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*ioctl) (struct file *, unsigned int, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
-	unsigned long (*get_unmapped_area) (struct file *, unsigned long,
-				unsigned long, unsigned long, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct file *);
 	int (*release) (struct file *);
-- 
cgit v1.2.3


From ee6869afc922a9849979e49bb3bbcad794872fcb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 26 Sep 2010 08:47:38 -0300
Subject: V4L/DVB: v4l2: add core serialization lock

Drivers can optionally set a pointer to a mutex in struct video_device.
The core will use that to lock before calling open, read, write, unlocked_ioctl,
poll, mmap or release.

Updated the documentation as well and ensure that v4l2-event knows about the
lock: it will unlock it before doing a blocking wait on an event and relock it
afterwards.

Ensure that the 'video_is_registered' check is done when the lock is held:
a typical disconnect will take the lock as well before unregistering the
device nodes, so to prevent race conditions the video_is_registered check
should also be done with the lock held.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 20 ++++++++
 drivers/media/video/v4l2-dev.c               | 76 +++++++++++++++++++++-------
 drivers/media/video/v4l2-event.c             |  9 +++-
 include/media/v4l2-dev.h                     |  3 ++
 4 files changed, 89 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 9b1d81c26b7d..a128e012a45c 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -453,6 +453,10 @@ You should also set these fields:
 - ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance
   (highly recommended to use this and it might become compulsory in the
   future!), then set this to your v4l2_ioctl_ops struct.
+- lock: leave to NULL if you want to do all the locking in the driver.
+  Otherwise you give it a pointer to a struct mutex_lock and before any
+  of the v4l2_file_operations is called this lock will be taken by the
+  core and released afterwards.
 - parent: you only set this if v4l2_device was registered with NULL as
   the parent device struct. This only happens in cases where one hardware
   device has multiple PCI devices that all share the same v4l2_device core.
@@ -469,6 +473,22 @@ If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or
 The v4l2_file_operations struct is a subset of file_operations. The main
 difference is that the inode argument is omitted since it is never used.
 
+v4l2_file_operations and locking
+--------------------------------
+
+You can set a pointer to a mutex_lock in struct video_device. Usually this
+will be either a top-level mutex or a mutex per device node. If you want
+finer-grained locking then you have to set it to NULL and do you own locking.
+
+If a lock is specified then all file operations will be serialized on that
+lock. If you use videobuf then you must pass the same lock to the videobuf
+queue initialize function: if videobuf has to wait for a frame to arrive, then
+it will temporarily unlock the lock and relock it afterwards. If your driver
+also waits in the code, then you should do the same to allow other processes
+to access the device node while the first process is waiting for something.
+
+The implementation of a hotplug disconnect should also take the lock before
+calling v4l2_device_disconnect and video_unregister_device.
 
 video_device registration
 -------------------------
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 5a54eabd4c42..a7702e3d149c 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -187,33 +187,50 @@ static ssize_t v4l2_read(struct file *filp, char __user *buf,
 		size_t sz, loff_t *off)
 {
 	struct video_device *vdev = video_devdata(filp);
+	int ret = -EIO;
 
 	if (!vdev->fops->read)
 		return -EINVAL;
-	if (!video_is_registered(vdev))
-		return -EIO;
-	return vdev->fops->read(filp, buf, sz, off);
+	if (vdev->lock)
+		mutex_lock(vdev->lock);
+	if (video_is_registered(vdev))
+		ret = vdev->fops->read(filp, buf, sz, off);
+	if (vdev->lock)
+		mutex_unlock(vdev->lock);
+	return ret;
 }
 
 static ssize_t v4l2_write(struct file *filp, const char __user *buf,
 		size_t sz, loff_t *off)
 {
 	struct video_device *vdev = video_devdata(filp);
+	int ret = -EIO;
 
 	if (!vdev->fops->write)
 		return -EINVAL;
-	if (!video_is_registered(vdev))
-		return -EIO;
-	return vdev->fops->write(filp, buf, sz, off);
+	if (vdev->lock)
+		mutex_lock(vdev->lock);
+	if (video_is_registered(vdev))
+		ret = vdev->fops->write(filp, buf, sz, off);
+	if (vdev->lock)
+		mutex_unlock(vdev->lock);
+	return ret;
 }
 
 static unsigned int v4l2_poll(struct file *filp, struct poll_table_struct *poll)
 {
 	struct video_device *vdev = video_devdata(filp);
+	int ret = DEFAULT_POLLMASK;
 
-	if (!vdev->fops->poll || !video_is_registered(vdev))
-		return DEFAULT_POLLMASK;
-	return vdev->fops->poll(filp, poll);
+	if (!vdev->fops->poll)
+		return ret;
+	if (vdev->lock)
+		mutex_lock(vdev->lock);
+	if (video_is_registered(vdev))
+		ret = vdev->fops->poll(filp, poll);
+	if (vdev->lock)
+		mutex_unlock(vdev->lock);
+	return ret;
 }
 
 static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -224,7 +241,11 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (!vdev->fops->ioctl)
 		return -ENOTTY;
 	if (vdev->fops->unlocked_ioctl) {
+		if (vdev->lock)
+			mutex_lock(vdev->lock);
 		ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
+		if (vdev->lock)
+			mutex_unlock(vdev->lock);
 	} else if (vdev->fops->ioctl) {
 		/* TODO: convert all drivers to unlocked_ioctl */
 		lock_kernel();
@@ -239,10 +260,17 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm)
 {
 	struct video_device *vdev = video_devdata(filp);
+	int ret = -ENODEV;
 
-	if (!vdev->fops->mmap || !video_is_registered(vdev))
-		return -ENODEV;
-	return vdev->fops->mmap(filp, vm);
+	if (!vdev->fops->mmap)
+		return ret;
+	if (vdev->lock)
+		mutex_lock(vdev->lock);
+	if (video_is_registered(vdev))
+		ret = vdev->fops->mmap(filp, vm);
+	if (vdev->lock)
+		mutex_unlock(vdev->lock);
+	return ret;
 }
 
 /* Override for the open function */
@@ -254,17 +282,24 @@ static int v4l2_open(struct inode *inode, struct file *filp)
 	/* Check if the video device is available */
 	mutex_lock(&videodev_lock);
 	vdev = video_devdata(filp);
-	/* return ENODEV if the video device has been removed
-	   already or if it is not registered anymore. */
-	if (vdev == NULL || !video_is_registered(vdev)) {
+	/* return ENODEV if the video device has already been removed. */
+	if (vdev == NULL) {
 		mutex_unlock(&videodev_lock);
 		return -ENODEV;
 	}
 	/* and increase the device refcount */
 	video_get(vdev);
 	mutex_unlock(&videodev_lock);
-	if (vdev->fops->open)
-		ret = vdev->fops->open(filp);
+	if (vdev->fops->open) {
+		if (vdev->lock)
+			mutex_lock(vdev->lock);
+		if (video_is_registered(vdev))
+			ret = vdev->fops->open(filp);
+		else
+			ret = -ENODEV;
+		if (vdev->lock)
+			mutex_unlock(vdev->lock);
+	}
 
 	/* decrease the refcount in case of an error */
 	if (ret)
@@ -278,8 +313,13 @@ static int v4l2_release(struct inode *inode, struct file *filp)
 	struct video_device *vdev = video_devdata(filp);
 	int ret = 0;
 
-	if (vdev->fops->release)
+	if (vdev->fops->release) {
+		if (vdev->lock)
+			mutex_lock(vdev->lock);
 		vdev->fops->release(filp);
+		if (vdev->lock)
+			mutex_unlock(vdev->lock);
+	}
 
 	/* decrease the refcount unconditionally since the release()
 	   return value is ignored. */
diff --git a/drivers/media/video/v4l2-event.c b/drivers/media/video/v4l2-event.c
index de74ce07b5e2..69fd343d4774 100644
--- a/drivers/media/video/v4l2-event.c
+++ b/drivers/media/video/v4l2-event.c
@@ -134,15 +134,22 @@ int v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event,
 	if (nonblocking)
 		return __v4l2_event_dequeue(fh, event);
 
+	/* Release the vdev lock while waiting */
+	if (fh->vdev->lock)
+		mutex_unlock(fh->vdev->lock);
+
 	do {
 		ret = wait_event_interruptible(events->wait,
 					       events->navailable != 0);
 		if (ret < 0)
-			return ret;
+			break;
 
 		ret = __v4l2_event_dequeue(fh, event);
 	} while (ret == -ENOENT);
 
+	if (fh->vdev->lock)
+		mutex_lock(fh->vdev->lock);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(v4l2_event_dequeue);
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index ba236ff35c8a..15802a067a12 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -94,6 +94,9 @@ struct video_device
 
 	/* ioctl callbacks */
 	const struct v4l2_ioctl_ops *ioctl_ops;
+
+	/* serialization lock */
+	struct mutex *lock;
 };
 
 /* dev to video-device */
-- 
cgit v1.2.3


From 97397687886aa8ecd4ec603fab9e70e970c11597 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 20 Sep 2010 17:24:30 -0300
Subject: V4L/DVB: videobuf: prepare to make locking optional in videobuf

Currently videobuf uses the vb_lock mutex to lock its data structures.
But this locking will (optionally) move into the v4l2 core, which means
that in that case vb_lock shouldn't be used since the external lock is already
held.

Prepare for this by adding a pointer to such an external mutex and
don't lock if that pointer is set.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-mem2mem.c        |  8 ++--
 drivers/media/video/videobuf-core.c       | 64 +++++++++++++++----------------
 drivers/media/video/videobuf-dma-contig.c |  4 +-
 drivers/media/video/videobuf-dma-sg.c     |  4 +-
 drivers/media/video/videobuf-vmalloc.c    |  4 +-
 include/media/videobuf-core.h             | 13 +++++++
 6 files changed, 55 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-mem2mem.c b/drivers/media/video/v4l2-mem2mem.c
index f45f9405ea39..ac832a28e18e 100644
--- a/drivers/media/video/v4l2-mem2mem.c
+++ b/drivers/media/video/v4l2-mem2mem.c
@@ -421,8 +421,8 @@ unsigned int v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 	src_q = v4l2_m2m_get_src_vq(m2m_ctx);
 	dst_q = v4l2_m2m_get_dst_vq(m2m_ctx);
 
-	mutex_lock(&src_q->vb_lock);
-	mutex_lock(&dst_q->vb_lock);
+	videobuf_queue_lock(src_q);
+	videobuf_queue_lock(dst_q);
 
 	if (src_q->streaming && !list_empty(&src_q->stream))
 		src_vb = list_first_entry(&src_q->stream,
@@ -450,8 +450,8 @@ unsigned int v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 	}
 
 end:
-	mutex_unlock(&dst_q->vb_lock);
-	mutex_unlock(&src_q->vb_lock);
+	videobuf_queue_unlock(dst_q);
+	videobuf_queue_unlock(src_q);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_poll);
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index ce1595bef629..2930665910b7 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -350,9 +350,9 @@ static void videobuf_status(struct videobuf_queue *q, struct v4l2_buffer *b,
 int videobuf_mmap_free(struct videobuf_queue *q)
 {
 	int ret;
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	ret = __videobuf_free(q);
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(videobuf_mmap_free);
@@ -407,9 +407,9 @@ int videobuf_mmap_setup(struct videobuf_queue *q,
 			enum v4l2_memory memory)
 {
 	int ret;
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	ret = __videobuf_mmap_setup(q, bcount, bsize, memory);
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(videobuf_mmap_setup);
@@ -432,7 +432,7 @@ int videobuf_reqbufs(struct videobuf_queue *q,
 		return -EINVAL;
 	}
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	if (req->type != q->type) {
 		dprintk(1, "reqbufs: queue type invalid\n");
 		retval = -EINVAL;
@@ -469,7 +469,7 @@ int videobuf_reqbufs(struct videobuf_queue *q,
 	retval = 0;
 
  done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(videobuf_reqbufs);
@@ -478,7 +478,7 @@ int videobuf_querybuf(struct videobuf_queue *q, struct v4l2_buffer *b)
 {
 	int ret = -EINVAL;
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	if (unlikely(b->type != q->type)) {
 		dprintk(1, "querybuf: Wrong type.\n");
 		goto done;
@@ -496,7 +496,7 @@ int videobuf_querybuf(struct videobuf_queue *q, struct v4l2_buffer *b)
 
 	ret = 0;
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(videobuf_querybuf);
@@ -513,7 +513,7 @@ int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b)
 	if (b->memory == V4L2_MEMORY_MMAP)
 		down_read(&current->mm->mmap_sem);
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	retval = -EBUSY;
 	if (q->reading) {
 		dprintk(1, "qbuf: Reading running...\n");
@@ -605,7 +605,7 @@ int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b)
 	wake_up_interruptible_sync(&q->wait);
 
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 
 	if (b->memory == V4L2_MEMORY_MMAP)
 		up_read(&current->mm->mmap_sem);
@@ -635,14 +635,14 @@ checks:
 			dprintk(2, "next_buffer: waiting on buffer\n");
 
 			/* Drop lock to avoid deadlock with qbuf */
-			mutex_unlock(&q->vb_lock);
+			videobuf_queue_unlock(q);
 
 			/* Checking list_empty and streaming is safe without
 			 * locks because we goto checks to validate while
 			 * holding locks before proceeding */
 			retval = wait_event_interruptible(q->wait,
 				!list_empty(&q->stream) || !q->streaming);
-			mutex_lock(&q->vb_lock);
+			videobuf_queue_lock(q);
 
 			if (retval)
 				goto done;
@@ -687,7 +687,7 @@ int videobuf_dqbuf(struct videobuf_queue *q,
 	MAGIC_CHECK(q->int_ops->magic, MAGIC_QTYPE_OPS);
 
 	memset(b, 0, sizeof(*b));
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 
 	retval = stream_next_buffer(q, &buf, nonblocking);
 	if (retval < 0) {
@@ -713,7 +713,7 @@ int videobuf_dqbuf(struct videobuf_queue *q,
 	buf->state = VIDEOBUF_IDLE;
 	b->flags &= ~V4L2_BUF_FLAG_DONE;
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(videobuf_dqbuf);
@@ -724,7 +724,7 @@ int videobuf_streamon(struct videobuf_queue *q)
 	unsigned long flags = 0;
 	int retval;
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	retval = -EBUSY;
 	if (q->reading)
 		goto done;
@@ -740,7 +740,7 @@ int videobuf_streamon(struct videobuf_queue *q)
 
 	wake_up_interruptible_sync(&q->wait);
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(videobuf_streamon);
@@ -760,9 +760,9 @@ int videobuf_streamoff(struct videobuf_queue *q)
 {
 	int retval;
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	retval = __videobuf_streamoff(q);
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 
 	return retval;
 }
@@ -868,7 +868,7 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
 
 	MAGIC_CHECK(q->int_ops->magic, MAGIC_QTYPE_OPS);
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 
 	q->ops->buf_setup(q, &nbufs, &size);
 
@@ -938,7 +938,7 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
 	}
 
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(videobuf_read_one);
@@ -999,9 +999,9 @@ int videobuf_read_start(struct videobuf_queue *q)
 {
 	int rc;
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	rc = __videobuf_read_start(q);
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 
 	return rc;
 }
@@ -1009,15 +1009,15 @@ EXPORT_SYMBOL_GPL(videobuf_read_start);
 
 void videobuf_read_stop(struct videobuf_queue *q)
 {
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	__videobuf_read_stop(q);
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 }
 EXPORT_SYMBOL_GPL(videobuf_read_stop);
 
 void videobuf_stop(struct videobuf_queue *q)
 {
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 
 	if (q->streaming)
 		__videobuf_streamoff(q);
@@ -1025,7 +1025,7 @@ void videobuf_stop(struct videobuf_queue *q)
 	if (q->reading)
 		__videobuf_read_stop(q);
 
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 }
 EXPORT_SYMBOL_GPL(videobuf_stop);
 
@@ -1039,7 +1039,7 @@ ssize_t videobuf_read_stream(struct videobuf_queue *q,
 	MAGIC_CHECK(q->int_ops->magic, MAGIC_QTYPE_OPS);
 
 	dprintk(2, "%s\n", __func__);
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	retval = -EBUSY;
 	if (q->streaming)
 		goto done;
@@ -1097,7 +1097,7 @@ ssize_t videobuf_read_stream(struct videobuf_queue *q,
 	}
 
 done:
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(videobuf_read_stream);
@@ -1109,7 +1109,7 @@ unsigned int videobuf_poll_stream(struct file *file,
 	struct videobuf_buffer *buf = NULL;
 	unsigned int rc = 0;
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	if (q->streaming) {
 		if (!list_empty(&q->stream))
 			buf = list_entry(q->stream.next,
@@ -1147,7 +1147,7 @@ unsigned int videobuf_poll_stream(struct file *file,
 			}
 		}
 	}
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(videobuf_poll_stream);
@@ -1164,7 +1164,7 @@ int videobuf_mmap_mapper(struct videobuf_queue *q, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	mutex_lock(&q->vb_lock);
+	videobuf_queue_lock(q);
 	for (i = 0; i < VIDEO_MAX_FRAME; i++) {
 		struct videobuf_buffer *buf = q->bufs[i];
 
@@ -1174,7 +1174,7 @@ int videobuf_mmap_mapper(struct videobuf_queue *q, struct vm_area_struct *vma)
 			break;
 		}
 	}
-	mutex_unlock(&q->vb_lock);
+	videobuf_queue_unlock(q);
 
 	return rc;
 }
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index 6ff9e4bac3ea..047054f79601 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -63,7 +63,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 		struct videobuf_dma_contig_memory *mem;
 
 		dev_dbg(q->dev, "munmap %p q=%p\n", map, q);
-		mutex_lock(&q->vb_lock);
+		videobuf_queue_lock(q);
 
 		/* We need first to cancel streams, before unmapping */
 		if (q->streaming)
@@ -103,7 +103,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 
 		kfree(map);
 
-		mutex_unlock(&q->vb_lock);
+		videobuf_queue_unlock(q);
 	}
 }
 
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 2ad0bc252b0e..515ae887ffcf 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -358,7 +358,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	map->count--;
 	if (0 == map->count) {
 		dprintk(1, "munmap %p q=%p\n", map, q);
-		mutex_lock(&q->vb_lock);
+		videobuf_queue_lock(q);
 		for (i = 0; i < VIDEO_MAX_FRAME; i++) {
 			if (NULL == q->bufs[i])
 				continue;
@@ -374,7 +374,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 			q->bufs[i]->baddr = 0;
 			q->ops->buf_release(q, q->bufs[i]);
 		}
-		mutex_unlock(&q->vb_lock);
+		videobuf_queue_unlock(q);
 		kfree(map);
 	}
 	return;
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index e7fe31d54f07..91348b3170ff 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -75,7 +75,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 		struct videobuf_vmalloc_memory *mem;
 
 		dprintk(1, "munmap %p q=%p\n", map, q);
-		mutex_lock(&q->vb_lock);
+		videobuf_queue_lock(q);
 
 		/* We need first to cancel streams, before unmapping */
 		if (q->streaming)
@@ -114,7 +114,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 
 		kfree(map);
 
-		mutex_unlock(&q->vb_lock);
+		videobuf_queue_unlock(q);
 	}
 
 	return;
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index f2c41cebf453..f5eb2cbf3002 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -139,6 +139,7 @@ struct videobuf_qtype_ops {
 
 struct videobuf_queue {
 	struct mutex               vb_lock;
+	struct mutex               *ext_lock;
 	spinlock_t                 *irqlock;
 	struct device		   *dev;
 
@@ -167,6 +168,18 @@ struct videobuf_queue {
 	void                       *priv_data;
 };
 
+static inline void videobuf_queue_lock(struct videobuf_queue *q)
+{
+	if (!q->ext_lock)
+		mutex_lock(&q->vb_lock);
+}
+
+static inline void videobuf_queue_unlock(struct videobuf_queue *q)
+{
+	if (!q->ext_lock)
+		mutex_unlock(&q->vb_lock);
+}
+
 int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr);
 int videobuf_iolock(struct videobuf_queue *q, struct videobuf_buffer *vb,
 		struct v4l2_framebuffer *fbuf);
-- 
cgit v1.2.3


From 08bff03ed697a583612b62a6ac566bd5bce98012 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 20 Sep 2010 17:39:46 -0300
Subject: V4L/DVB: videobuf: add ext_lock argument to the queue init functions

Add an ext_lock argument to the videobuf init functions. This allows
drivers to pass the vdev->lock pointer (or any other externally held lock)
to videobuf. For now all drivers just pass NULL.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_vbi.c            | 2 +-
 drivers/media/common/saa7146_video.c          | 2 +-
 drivers/media/video/au0828/au0828-video.c     | 4 ++--
 drivers/media/video/bt8xx/bttv-driver.c       | 4 ++--
 drivers/media/video/cx231xx/cx231xx-video.c   | 6 ++++--
 drivers/media/video/cx23885/cx23885-417.c     | 2 +-
 drivers/media/video/cx23885/cx23885-dvb.c     | 2 +-
 drivers/media/video/cx23885/cx23885-video.c   | 2 +-
 drivers/media/video/cx88/cx88-blackbird.c     | 2 +-
 drivers/media/video/cx88/cx88-dvb.c           | 2 +-
 drivers/media/video/cx88/cx88-video.c         | 4 ++--
 drivers/media/video/em28xx/em28xx-video.c     | 4 ++--
 drivers/media/video/s2255drv.c                | 2 +-
 drivers/media/video/saa7134/saa7134-dvb.c     | 2 +-
 drivers/media/video/saa7134/saa7134-empress.c | 2 +-
 drivers/media/video/saa7134/saa7134-video.c   | 4 ++--
 drivers/media/video/videobuf-core.c           | 4 +++-
 drivers/media/video/videobuf-dma-contig.c     | 5 +++--
 drivers/media/video/videobuf-dma-sg.c         | 5 +++--
 drivers/media/video/videobuf-vmalloc.c        | 5 +++--
 drivers/media/video/vivi.c                    | 2 +-
 drivers/media/video/zr364xx.c                 | 2 +-
 drivers/staging/cx25821/cx25821-video.c       | 2 +-
 drivers/staging/tm6000/tm6000-video.c         | 2 +-
 include/media/videobuf-core.h                 | 3 ++-
 include/media/videobuf-dma-contig.h           | 3 ++-
 include/media/videobuf-dma-sg.h               | 3 ++-
 include/media/videobuf-vmalloc.h              | 3 ++-
 28 files changed, 48 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_vbi.c b/drivers/media/common/saa7146_vbi.c
index 8224c301d050..2d4533ab22b7 100644
--- a/drivers/media/common/saa7146_vbi.c
+++ b/drivers/media/common/saa7146_vbi.c
@@ -412,7 +412,7 @@ static int vbi_open(struct saa7146_dev *dev, struct file *file)
 			    V4L2_BUF_TYPE_VBI_CAPTURE,
 			    V4L2_FIELD_SEQ_TB, // FIXME: does this really work?
 			    sizeof(struct saa7146_buf),
-			    file);
+			    file, NULL);
 
 	init_timer(&fh->vbi_read_timeout);
 	fh->vbi_read_timeout.function = vbi_read_timeout;
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index a212a91a30f0..741c5732b430 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -1386,7 +1386,7 @@ static int video_open(struct saa7146_dev *dev, struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct saa7146_buf),
-			    file);
+			    file, NULL);
 
 	return 0;
 }
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index 7989a7ba7c40..162fd5f9d448 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -965,7 +965,7 @@ static int au0828_v4l2_open(struct file *filp)
 				    NULL, &dev->slock,
 				    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				    V4L2_FIELD_INTERLACED,
-				    sizeof(struct au0828_buffer), fh);
+				    sizeof(struct au0828_buffer), fh, NULL);
 
 	/* VBI Setup */
 	dev->vbi_width = 720;
@@ -974,7 +974,7 @@ static int au0828_v4l2_open(struct file *filp)
 				    NULL, &dev->slock,
 				    V4L2_BUF_TYPE_VBI_CAPTURE,
 				    V4L2_FIELD_SEQ_TB,
-				    sizeof(struct au0828_buffer), fh);
+				    sizeof(struct au0828_buffer), fh, NULL);
 
 
 	return ret;
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 69a8ad24c0b3..3da6e80e1041 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -3318,13 +3318,13 @@ static int bttv_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct bttv_buffer),
-			    fh);
+			    fh, NULL);
 	videobuf_queue_sg_init(&fh->vbi, &bttv_vbi_qops,
 			    &btv->c.pci->dev, &btv->s_lock,
 			    V4L2_BUF_TYPE_VBI_CAPTURE,
 			    V4L2_FIELD_SEQ_TB,
 			    sizeof(struct bttv_buffer),
-			    fh);
+			    fh, NULL);
 	set_tvnorm(btv,btv->tvnorm);
 	set_input(btv, btv->input, btv->tvnorm);
 
diff --git a/drivers/media/video/cx231xx/cx231xx-video.c b/drivers/media/video/cx231xx/cx231xx-video.c
index e76014561aa7..b638c4ed3f2e 100644
--- a/drivers/media/video/cx231xx/cx231xx-video.c
+++ b/drivers/media/video/cx231xx/cx231xx-video.c
@@ -2008,7 +2008,8 @@ static int cx231xx_v4l2_open(struct file *filp)
 		videobuf_queue_vmalloc_init(&fh->vb_vidq, &cx231xx_video_qops,
 					    NULL, &dev->video_mode.slock,
 					    fh->type, V4L2_FIELD_INTERLACED,
-					    sizeof(struct cx231xx_buffer), fh);
+					    sizeof(struct cx231xx_buffer),
+					    fh, NULL);
 	if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE) {
 		/* Set the required alternate setting  VBI interface works in
 		   Bulk mode only */
@@ -2017,7 +2018,8 @@ static int cx231xx_v4l2_open(struct file *filp)
 		videobuf_queue_vmalloc_init(&fh->vb_vidq, &cx231xx_vbi_qops,
 					    NULL, &dev->vbi_mode.slock,
 					    fh->type, V4L2_FIELD_SEQ_TB,
-					    sizeof(struct cx231xx_buffer), fh);
+					    sizeof(struct cx231xx_buffer),
+					    fh, NULL);
 	}
 
 	mutex_unlock(&dev->lock);
diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c
index abd64e89f60f..6628e07a8093 100644
--- a/drivers/media/video/cx23885/cx23885-417.c
+++ b/drivers/media/video/cx23885/cx23885-417.c
@@ -1591,7 +1591,7 @@ static int mpeg_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct cx23885_buffer),
-			    fh);
+			    fh, NULL);
 	unlock_kernel();
 
 	return 0;
diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c
index 0674ea1bf863..5958cb882e93 100644
--- a/drivers/media/video/cx23885/cx23885-dvb.c
+++ b/drivers/media/video/cx23885/cx23885-dvb.c
@@ -1071,7 +1071,7 @@ int cx23885_dvb_register(struct cx23885_tsport *port)
 		videobuf_queue_sg_init(&fe0->dvb.dvbq, &dvb_qops,
 			    &dev->pci->dev, &port->slock,
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_TOP,
-			    sizeof(struct cx23885_buffer), port);
+			    sizeof(struct cx23885_buffer), port, NULL);
 	}
 	err = dvb_register(port);
 	if (err != 0)
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 5aadc1d1956e..3173cc69ac61 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -758,7 +758,7 @@ static int video_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct cx23885_buffer),
-			    fh);
+			    fh, NULL);
 
 	dprintk(1, "post videobuf_queue_init()\n");
 
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index ec3299585434..ac885f49313c 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -1094,7 +1094,7 @@ static int mpeg_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct cx88_buffer),
-			    fh);
+			    fh, NULL);
 
 	/* FIXME: locking against other video device */
 	cx88_set_scale(dev->core, dev->width, dev->height,
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index e24fd8d711d8..a037e925ceb9 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -1576,7 +1576,7 @@ static int cx8802_dvb_probe(struct cx8802_driver *drv)
 				    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				    V4L2_FIELD_TOP,
 				    sizeof(struct cx88_buffer),
-				    dev);
+				    dev, NULL);
 		/* init struct videobuf_dvb */
 		fe->dvb.name = dev->core->name;
 	}
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 2da9117b138e..19c64a7d0985 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -793,13 +793,13 @@ static int video_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct cx88_buffer),
-			    fh);
+			    fh, NULL);
 	videobuf_queue_sg_init(&fh->vbiq, &cx8800_vbi_qops,
 			    &dev->pci->dev, &dev->slock,
 			    V4L2_BUF_TYPE_VBI_CAPTURE,
 			    V4L2_FIELD_SEQ_TB,
 			    sizeof(struct cx88_buffer),
-			    fh);
+			    fh, NULL);
 
 	if (fh->radio) {
 		dprintk(1,"video_open: setting radio device\n");
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 95a4b60a1eda..a0627cdb706b 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -2182,13 +2182,13 @@ static int em28xx_v4l2_open(struct file *filp)
 	videobuf_queue_vmalloc_init(&fh->vb_vidq, &em28xx_video_qops,
 				    NULL, &dev->slock,
 				    V4L2_BUF_TYPE_VIDEO_CAPTURE, field,
-				    sizeof(struct em28xx_buffer), fh);
+				    sizeof(struct em28xx_buffer), fh, NULL);
 
 	videobuf_queue_vmalloc_init(&fh->vb_vbiq, &em28xx_vbi_qops,
 				    NULL, &dev->slock,
 				    V4L2_BUF_TYPE_VBI_CAPTURE,
 				    V4L2_FIELD_SEQ_TB,
-				    sizeof(struct em28xx_buffer), fh);
+				    sizeof(struct em28xx_buffer), fh, NULL);
 
 	mutex_unlock(&dev->lock);
 
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index 8f7434182bc3..f5a46c458717 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -1817,7 +1817,7 @@ static int s2255_open(struct file *file)
 				    NULL, &dev->slock,
 				    fh->type,
 				    V4L2_FIELD_INTERLACED,
-				    sizeof(struct s2255_buffer), fh);
+				    sizeof(struct s2255_buffer), fh, NULL);
 	return 0;
 }
 
diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c
index f26fe7661a1d..beb95e21d109 100644
--- a/drivers/media/video/saa7134/saa7134-dvb.c
+++ b/drivers/media/video/saa7134/saa7134-dvb.c
@@ -1111,7 +1111,7 @@ static int dvb_init(struct saa7134_dev *dev)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_ALTERNATE,
 			    sizeof(struct saa7134_buf),
-			    dev);
+			    dev, NULL);
 
 	switch (dev->board) {
 	case SAA7134_BOARD_PINNACLE_300I_DVBT_PAL:
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index e763f9fd0133..1467a30a434f 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -542,7 +542,7 @@ static int empress_init(struct saa7134_dev *dev)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_ALTERNATE,
 			    sizeof(struct saa7134_buf),
-			    dev);
+			    dev, NULL);
 
 	empress_signal_update(&dev->empress_workqueue);
 	return 0;
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 645224cfd887..fae5e97a3eb2 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -1366,13 +1366,13 @@ static int video_open(struct file *file)
 			    V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			    V4L2_FIELD_INTERLACED,
 			    sizeof(struct saa7134_buf),
-			    fh);
+			    fh, NULL);
 	videobuf_queue_sg_init(&fh->vbi, &saa7134_vbi_qops,
 			    &dev->pci->dev, &dev->slock,
 			    V4L2_BUF_TYPE_VBI_CAPTURE,
 			    V4L2_FIELD_SEQ_TB,
 			    sizeof(struct saa7134_buf),
-			    fh);
+			    fh, NULL);
 	saa7134_pgtable_alloc(dev->pci,&fh->pt_cap);
 	saa7134_pgtable_alloc(dev->pci,&fh->pt_vbi);
 
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 2930665910b7..a32ef8eafc01 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -125,11 +125,13 @@ void videobuf_queue_core_init(struct videobuf_queue *q,
 			 enum v4l2_field field,
 			 unsigned int msize,
 			 void *priv,
-			 struct videobuf_qtype_ops *int_ops)
+			 struct videobuf_qtype_ops *int_ops,
+			 struct mutex *ext_lock)
 {
 	BUG_ON(!q);
 	memset(q, 0, sizeof(*q));
 	q->irqlock   = irqlock;
+	q->ext_lock  = ext_lock;
 	q->dev       = dev;
 	q->type      = type;
 	q->field     = field;
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index 047054f79601..4d0a723b744b 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -349,10 +349,11 @@ void videobuf_queue_dma_contig_init(struct videobuf_queue *q,
 				    enum v4l2_buf_type type,
 				    enum v4l2_field field,
 				    unsigned int msize,
-				    void *priv)
+				    void *priv,
+				    struct mutex *ext_lock)
 {
 	videobuf_queue_core_init(q, ops, dev, irqlock, type, field, msize,
-				 priv, &qops);
+				 priv, &qops, ext_lock);
 }
 EXPORT_SYMBOL_GPL(videobuf_queue_dma_contig_init);
 
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 515ae887ffcf..359f2f3fdc77 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -654,10 +654,11 @@ void videobuf_queue_sg_init(struct videobuf_queue *q,
 			 enum v4l2_buf_type type,
 			 enum v4l2_field field,
 			 unsigned int msize,
-			 void *priv)
+			 void *priv,
+			 struct mutex *ext_lock)
 {
 	videobuf_queue_core_init(q, ops, dev, irqlock, type, field, msize,
-				 priv, &sg_ops);
+				 priv, &sg_ops, ext_lock);
 }
 EXPORT_SYMBOL_GPL(videobuf_queue_sg_init);
 
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index 91348b3170ff..df142580e44c 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -304,10 +304,11 @@ void videobuf_queue_vmalloc_init(struct videobuf_queue *q,
 			 enum v4l2_buf_type type,
 			 enum v4l2_field field,
 			 unsigned int msize,
-			 void *priv)
+			 void *priv,
+			 struct mutex *ext_lock)
 {
 	videobuf_queue_core_init(q, ops, dev, irqlock, type, field, msize,
-				 priv, &qops);
+				 priv, &qops, ext_lock);
 }
 EXPORT_SYMBOL_GPL(videobuf_queue_vmalloc_init);
 
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index e17b6fee046b..547f5e546c9c 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -1176,7 +1176,7 @@ static int __init vivi_create_instance(int inst)
 	videobuf_queue_vmalloc_init(&dev->vb_vidq, &vivi_video_qops,
 			NULL, &dev->slock, V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			V4L2_FIELD_INTERLACED,
-			sizeof(struct vivi_buffer), dev);
+			sizeof(struct vivi_buffer), dev, NULL);
 
 	/* init video dma queues */
 	INIT_LIST_HEAD(&dev->vidq.active);
diff --git a/drivers/media/video/zr364xx.c b/drivers/media/video/zr364xx.c
index 616c61fb3493..7dfb01e9930e 100644
--- a/drivers/media/video/zr364xx.c
+++ b/drivers/media/video/zr364xx.c
@@ -1304,7 +1304,7 @@ static int zr364xx_open(struct file *file)
 				    NULL, &cam->slock,
 				    cam->type,
 				    V4L2_FIELD_NONE,
-				    sizeof(struct zr364xx_buffer), cam);
+				    sizeof(struct zr364xx_buffer), cam, NULL);
 
 	/* Added some delay here, since opening/closing the camera quickly,
 	 * like Ekiga does during its startup, can crash the webcam
diff --git a/drivers/staging/cx25821/cx25821-video.c b/drivers/staging/cx25821/cx25821-video.c
index 6fff985358a6..e7f1d5778cec 100644
--- a/drivers/staging/cx25821/cx25821-video.c
+++ b/drivers/staging/cx25821/cx25821-video.c
@@ -856,7 +856,7 @@ static int video_open(struct file *file)
 			      &dev->pci->dev, &dev->slock,
 			      V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			      V4L2_FIELD_INTERLACED,
-			      sizeof(struct cx25821_buffer), fh);
+			      sizeof(struct cx25821_buffer), fh, NULL);
 
        dprintk(1, "post videobuf_queue_init()\n");
        unlock_kernel();
diff --git a/drivers/staging/tm6000/tm6000-video.c b/drivers/staging/tm6000/tm6000-video.c
index ce0a089a0771..4c22c6584940 100644
--- a/drivers/staging/tm6000/tm6000-video.c
+++ b/drivers/staging/tm6000/tm6000-video.c
@@ -1300,7 +1300,7 @@ static int tm6000_open(struct file *file)
 			NULL, &dev->slock,
 			fh->type,
 			V4L2_FIELD_INTERLACED,
-			sizeof(struct tm6000_buffer),fh);
+			sizeof(struct tm6000_buffer), fh, NULL);
 
 	return 0;
 }
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index f5eb2cbf3002..9a4194557e4a 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -198,7 +198,8 @@ void videobuf_queue_core_init(struct videobuf_queue *q,
 			 enum v4l2_field field,
 			 unsigned int msize,
 			 void *priv,
-			 struct videobuf_qtype_ops *int_ops);
+			 struct videobuf_qtype_ops *int_ops,
+			 struct mutex *ext_lock);
 int  videobuf_queue_is_busy(struct videobuf_queue *q);
 void videobuf_queue_cancel(struct videobuf_queue *q);
 
diff --git a/include/media/videobuf-dma-contig.h b/include/media/videobuf-dma-contig.h
index ebaa9bc1ee8d..f0ed82543d9f 100644
--- a/include/media/videobuf-dma-contig.h
+++ b/include/media/videobuf-dma-contig.h
@@ -23,7 +23,8 @@ void videobuf_queue_dma_contig_init(struct videobuf_queue *q,
 				    enum v4l2_buf_type type,
 				    enum v4l2_field field,
 				    unsigned int msize,
-				    void *priv);
+				    void *priv,
+				    struct mutex *ext_lock);
 
 dma_addr_t videobuf_to_dma_contig(struct videobuf_buffer *buf);
 void videobuf_dma_contig_free(struct videobuf_queue *q,
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index aa4ebb42a565..1c647e8148c4 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -103,7 +103,8 @@ void videobuf_queue_sg_init(struct videobuf_queue *q,
 			 enum v4l2_buf_type type,
 			 enum v4l2_field field,
 			 unsigned int msize,
-			 void *priv);
+			 void *priv,
+			 struct mutex *ext_lock);
 
 #endif /* _VIDEOBUF_DMA_SG_H */
 
diff --git a/include/media/videobuf-vmalloc.h b/include/media/videobuf-vmalloc.h
index e19403c18dae..486a97efdb56 100644
--- a/include/media/videobuf-vmalloc.h
+++ b/include/media/videobuf-vmalloc.h
@@ -36,7 +36,8 @@ void videobuf_queue_vmalloc_init(struct videobuf_queue *q,
 			 enum v4l2_buf_type type,
 			 enum v4l2_field field,
 			 unsigned int msize,
-			 void *priv);
+			 void *priv,
+			 struct mutex *ext_lock);
 
 void *videobuf_to_vmalloc(struct videobuf_buffer *buf);
 
-- 
cgit v1.2.3


From 0e0809a58869e3e422985f868ad5e0da1fc0ba85 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 26 Sep 2010 09:01:26 -0300
Subject: V4L/DVB: videobuf: add queue argument to videobuf_waiton()

videobuf_waiton() must unlock and relock ext_lock if it has to wait.
For that to happen it needs the videobuf_queue pointer.

Don't attempt to unlock/relock q->ext_lock unless it was locked in the
first place.

vb->state has to be protected by a spinlock to be safe.

This patch is based on code from Mauro Carvalho Chehab <mchehab@redhat.com>.

[mchehab@redhat.com: add extra argument to a few missing places]
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_fops.c        |  2 +-
 drivers/media/video/bt8xx/bttv-risc.c      |  2 +-
 drivers/media/video/cx23885/cx23885-core.c |  2 +-
 drivers/media/video/cx88/cx88-core.c       |  2 +-
 drivers/media/video/fsl-viu.c              |  2 +-
 drivers/media/video/mem2mem_testdev.c      |  2 +-
 drivers/media/video/mx1_camera.c           |  2 +-
 drivers/media/video/mx2_camera.c           |  2 +-
 drivers/media/video/mx3_camera.c           |  2 +-
 drivers/media/video/omap24xxcam.c          |  2 +-
 drivers/media/video/pxa_camera.c           |  2 +-
 drivers/media/video/saa7134/saa7134-core.c |  2 +-
 drivers/media/video/sh_mobile_ceu_camera.c |  2 +-
 drivers/media/video/sh_vou.c               |  2 +-
 drivers/media/video/tlg2300/pd-video.c     |  4 +--
 drivers/media/video/videobuf-core.c        | 47 +++++++++++++++++++++---------
 drivers/media/video/videobuf-dvb.c         |  2 +-
 drivers/staging/cx25821/cx25821-core.c     |  2 +-
 drivers/staging/dt3155v4l/dt3155v4l.c      |  8 ++---
 include/media/videobuf-core.h              |  3 +-
 20 files changed, 58 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index 4da2a54cb8bd..e3fedc60fe77 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -56,7 +56,7 @@ void saa7146_dma_free(struct saa7146_dev *dev,struct videobuf_queue *q,
 
 	BUG_ON(in_interrupt());
 
-	videobuf_waiton(&buf->vb,0,0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/bt8xx/bttv-risc.c b/drivers/media/video/bt8xx/bttv-risc.c
index 0fa9f39f37a3..9b57d091da48 100644
--- a/drivers/media/video/bt8xx/bttv-risc.c
+++ b/drivers/media/video/bt8xx/bttv-risc.c
@@ -582,7 +582,7 @@ bttv_dma_free(struct videobuf_queue *q,struct bttv *btv, struct bttv_buffer *buf
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 
 	BUG_ON(in_interrupt());
-	videobuf_waiton(&buf->vb,0,0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(btv->c.pci,&buf->bottom);
diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c
index f6b62e7398af..2a34e955dad2 100644
--- a/drivers/media/video/cx23885/cx23885-core.c
+++ b/drivers/media/video/cx23885/cx23885-core.c
@@ -1221,7 +1221,7 @@ void cx23885_free_buffer(struct videobuf_queue *q, struct cx23885_buffer *buf)
 	struct videobuf_dmabuf *dma = videobuf_to_dma(&buf->vb);
 
 	BUG_ON(in_interrupt());
-	videobuf_waiton(&buf->vb, 0, 0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c
index e46dd7ed63ce..2e145f0a5fd9 100644
--- a/drivers/media/video/cx88/cx88-core.c
+++ b/drivers/media/video/cx88/cx88-core.c
@@ -217,7 +217,7 @@ cx88_free_buffer(struct videobuf_queue *q, struct cx88_buffer *buf)
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 
 	BUG_ON(in_interrupt());
-	videobuf_waiton(&buf->vb,0,0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
diff --git a/drivers/media/video/fsl-viu.c b/drivers/media/video/fsl-viu.c
index 0b318bed4d52..211e83963a46 100644
--- a/drivers/media/video/fsl-viu.c
+++ b/drivers/media/video/fsl-viu.c
@@ -426,7 +426,7 @@ static void free_buffer(struct videobuf_queue *vq, struct viu_buf *buf)
 
 	BUG_ON(in_interrupt());
 
-	videobuf_waiton(&buf->vb, 0, 0);
+	videobuf_waiton(vq, &buf->vb, 0, 0);
 
 	if (vq->int_ops && vq->int_ops->vaddr)
 		vaddr = vq->int_ops->vaddr(vb);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index a7210d981388..3b19f5b25a72 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -848,7 +848,7 @@ static void queue_init(void *priv, struct videobuf_queue *vq,
 
 	videobuf_queue_vmalloc_init(vq, &m2mtest_qops, ctx->dev->v4l2_dev.dev,
 				    &ctx->dev->irqlock, type, V4L2_FIELD_NONE,
-				    sizeof(struct m2mtest_buffer), priv);
+				    sizeof(struct m2mtest_buffer), priv, NULL);
 }
 
 
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 5c17f9ec3d7c..e8a5ffcb6f06 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -161,7 +161,7 @@ static void free_buffer(struct videobuf_queue *vq, struct mx1_buffer *buf)
 	 * This waits until this buffer is out of danger, i.e., until it is no
 	 * longer in STATE_QUEUED or STATE_ACTIVE
 	 */
-	videobuf_waiton(vb, 0, 0);
+	videobuf_waiton(vq, vb, 0, 0);
 	videobuf_dma_contig_free(vq, vb);
 
 	vb->state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/mx2_camera.c b/drivers/media/video/mx2_camera.c
index b6ea67221d1d..38d09474da13 100644
--- a/drivers/media/video/mx2_camera.c
+++ b/drivers/media/video/mx2_camera.c
@@ -463,7 +463,7 @@ static void free_buffer(struct videobuf_queue *vq, struct mx2_buffer *buf)
 	 * This waits until this buffer is out of danger, i.e., until it is no
 	 * longer in STATE_QUEUED or STATE_ACTIVE
 	 */
-	videobuf_waiton(vb, 0, 0);
+	videobuf_waiton(vq, vb, 0, 0);
 
 	videobuf_dma_contig_free(vq, vb);
 	dev_dbg(&icd->dev, "%s freed\n", __func__);
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index a9be14c23912..a13076969e5a 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -185,7 +185,7 @@ static void free_buffer(struct videobuf_queue *vq, struct mx3_camera_buffer *buf
 	 * This waits until this buffer is out of danger, i.e., until it is no
 	 * longer in STATE_QUEUED or STATE_ACTIVE
 	 */
-	videobuf_waiton(vb, 0, 0);
+	videobuf_waiton(vq, vb, 0, 0);
 	if (txd) {
 		ichan = to_idmac_chan(txd->chan);
 		async_tx_ack(txd);
diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c
index 926a5aa6f7f8..13c09f5fa875 100644
--- a/drivers/media/video/omap24xxcam.c
+++ b/drivers/media/video/omap24xxcam.c
@@ -420,7 +420,7 @@ static void omap24xxcam_vbq_release(struct videobuf_queue *vbq,
 	struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
 
 	/* wait for buffer, especially to get out of the sgdma queue */
-	videobuf_waiton(vb, 0, 0);
+	videobuf_waiton(vbq, vb, 0, 0);
 	if (vb->memory == V4L2_MEMORY_MMAP) {
 		dma_unmap_sg(vbq->dev, dma->sglist, dma->sglen,
 			     dma->direction);
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 9de7d59916bd..109ba99ae121 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -275,7 +275,7 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 	 * This waits until this buffer is out of danger, i.e., until it is no
 	 * longer in STATE_QUEUED or STATE_ACTIVE
 	 */
-	videobuf_waiton(&buf->vb, 0, 0);
+	videobuf_waiton(vq, &buf->vb, 0, 0);
 	videobuf_dma_unmap(vq->dev, dma);
 	videobuf_dma_free(dma);
 
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 40bc635e8a3f..c424c4574d49 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -255,7 +255,7 @@ void saa7134_dma_free(struct videobuf_queue *q,struct saa7134_buf *buf)
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 	BUG_ON(in_interrupt());
 
-	videobuf_waiton(&buf->vb,0,0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 2b24bd0de3ad..39211628d427 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -245,7 +245,7 @@ static void free_buffer(struct videobuf_queue *vq,
 	if (in_interrupt())
 		BUG();
 
-	videobuf_waiton(&buf->vb, 0, 0);
+	videobuf_waiton(vq, &buf->vb, 0, 0);
 	videobuf_dma_contig_free(vq, &buf->vb);
 	dev_dbg(dev, "%s freed\n", __func__);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
diff --git a/drivers/media/video/sh_vou.c b/drivers/media/video/sh_vou.c
index d394187eb701..d3acd02a1632 100644
--- a/drivers/media/video/sh_vou.c
+++ b/drivers/media/video/sh_vou.c
@@ -230,7 +230,7 @@ static void free_buffer(struct videobuf_queue *vq, struct videobuf_buffer *vb)
 	BUG_ON(in_interrupt());
 
 	/* Wait until this buffer is no longer in STATE_QUEUED or STATE_ACTIVE */
-	videobuf_waiton(vb, 0, 0);
+	videobuf_waiton(vq, vb, 0, 0);
 	videobuf_dma_contig_free(vq, vb);
 	vb->state = VIDEOBUF_NEEDS_INIT;
 }
diff --git a/drivers/media/video/tlg2300/pd-video.c b/drivers/media/video/tlg2300/pd-video.c
index d0cc012f7ae6..a1ffe18640fe 100644
--- a/drivers/media/video/tlg2300/pd-video.c
+++ b/drivers/media/video/tlg2300/pd-video.c
@@ -1434,7 +1434,7 @@ static int pd_video_open(struct file *file)
 				V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				V4L2_FIELD_INTERLACED,/* video is interlacd */
 				sizeof(struct videobuf_buffer),/*it's enough*/
-				front);
+				front, NULL);
 	} else if (vfd->vfl_type == VFL_TYPE_VBI
 		&& !(pd->state & POSEIDON_STATE_VBI)) {
 		front = kzalloc(sizeof(struct front_face), GFP_KERNEL);
@@ -1451,7 +1451,7 @@ static int pd_video_open(struct file *file)
 				V4L2_BUF_TYPE_VBI_CAPTURE,
 				V4L2_FIELD_NONE, /* vbi is NONE mode */
 				sizeof(struct videobuf_buffer),
-				front);
+				front, NULL);
 	} else {
 		/* maybe add FM support here */
 		log("other ");
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index a32ef8eafc01..8979f91fa8e5 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -73,25 +73,46 @@ struct videobuf_buffer *videobuf_alloc_vb(struct videobuf_queue *q)
 }
 EXPORT_SYMBOL_GPL(videobuf_alloc_vb);
 
-#define WAITON_CONDITION (vb->state != VIDEOBUF_ACTIVE &&\
-				vb->state != VIDEOBUF_QUEUED)
-int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr)
+static int is_state_active_or_queued(struct videobuf_queue *q, struct videobuf_buffer *vb)
 {
+	unsigned long flags;
+	bool rc;
+
+	spin_lock_irqsave(q->irqlock, flags);
+	rc = vb->state != VIDEOBUF_ACTIVE && vb->state != VIDEOBUF_QUEUED;
+	spin_unlock_irqrestore(q->irqlock, flags);
+	return rc;
+};
+
+int videobuf_waiton(struct videobuf_queue *q, struct videobuf_buffer *vb,
+		int non_blocking, int intr)
+{
+	bool is_ext_locked;
+	int ret = 0;
+
 	MAGIC_CHECK(vb->magic, MAGIC_BUFFER);
 
 	if (non_blocking) {
-		if (WAITON_CONDITION)
+		if (is_state_active_or_queued(q, vb))
 			return 0;
-		else
-			return -EAGAIN;
+		return -EAGAIN;
 	}
 
+	is_ext_locked = q->ext_lock && mutex_is_locked(q->ext_lock);
+
+	/* Release vdev lock to prevent this wait from blocking outside access to
+	   the device. */
+	if (is_ext_locked)
+		mutex_unlock(q->ext_lock);
 	if (intr)
-		return wait_event_interruptible(vb->done, WAITON_CONDITION);
+		ret = wait_event_interruptible(vb->done, is_state_active_or_queued(q, vb));
 	else
-		wait_event(vb->done, WAITON_CONDITION);
+		wait_event(vb->done, is_state_active_or_queued(q, vb));
+	/* Relock */
+	if (is_ext_locked)
+		mutex_lock(q->ext_lock);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(videobuf_waiton);
 
@@ -671,7 +692,7 @@ static int stream_next_buffer(struct videobuf_queue *q,
 		goto done;
 
 	buf = list_entry(q->stream.next, struct videobuf_buffer, stream);
-	retval = videobuf_waiton(buf, nonblocking, 1);
+	retval = videobuf_waiton(q, buf, nonblocking, 1);
 	if (retval < 0)
 		goto done;
 
@@ -799,7 +820,7 @@ static ssize_t videobuf_read_zerocopy(struct videobuf_queue *q,
 	spin_lock_irqsave(q->irqlock, flags);
 	q->ops->buf_queue(q, q->read_buf);
 	spin_unlock_irqrestore(q->irqlock, flags);
-	retval = videobuf_waiton(q->read_buf, 0, 0);
+	retval = videobuf_waiton(q, q->read_buf, 0, 0);
 	if (0 == retval) {
 		CALL(q, sync, q, q->read_buf);
 		if (VIDEOBUF_ERROR == q->read_buf->state)
@@ -911,7 +932,7 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
 	}
 
 	/* wait until capture is done */
-	retval = videobuf_waiton(q->read_buf, nonblocking, 1);
+	retval = videobuf_waiton(q, q->read_buf, nonblocking, 1);
 	if (0 != retval)
 		goto done;
 
@@ -1061,7 +1082,7 @@ ssize_t videobuf_read_stream(struct videobuf_queue *q,
 			list_del(&q->read_buf->stream);
 			q->read_off = 0;
 		}
-		rc = videobuf_waiton(q->read_buf, nonblocking, 1);
+		rc = videobuf_waiton(q, q->read_buf, nonblocking, 1);
 		if (rc < 0) {
 			if (0 == retval)
 				retval = rc;
diff --git a/drivers/media/video/videobuf-dvb.c b/drivers/media/video/videobuf-dvb.c
index 3f76398968b8..3de7c7e4402d 100644
--- a/drivers/media/video/videobuf-dvb.c
+++ b/drivers/media/video/videobuf-dvb.c
@@ -57,7 +57,7 @@ static int videobuf_dvb_thread(void *data)
 		buf = list_entry(dvb->dvbq.stream.next,
 				 struct videobuf_buffer, stream);
 		list_del(&buf->stream);
-		err = videobuf_waiton(buf,0,1);
+		err = videobuf_waiton(&dvb->dvbq, buf, 0, 1);
 
 		/* no more feeds left or stop_feed() asked us to quit */
 		if (0 == dvb->nfeeds)
diff --git a/drivers/staging/cx25821/cx25821-core.c b/drivers/staging/cx25821/cx25821-core.c
index c487c19256b9..03391f413397 100644
--- a/drivers/staging/cx25821/cx25821-core.c
+++ b/drivers/staging/cx25821/cx25821-core.c
@@ -1319,7 +1319,7 @@ void cx25821_free_buffer(struct videobuf_queue *q, struct cx25821_buffer *buf)
 	struct videobuf_dmabuf *dma = videobuf_to_dma(&buf->vb);
 
 	BUG_ON(in_interrupt());
-	videobuf_waiton(&buf->vb, 0, 0);
+	videobuf_waiton(q, &buf->vb, 0, 0);
 	videobuf_dma_unmap(q->dev, dma);
 	videobuf_dma_free(dma);
 	btcx_riscmem_free(to_pci_dev(q->dev), &buf->risc);
diff --git a/drivers/staging/dt3155v4l/dt3155v4l.c b/drivers/staging/dt3155v4l/dt3155v4l.c
index fd48b38e797c..b996697e7eb2 100644
--- a/drivers/staging/dt3155v4l/dt3155v4l.c
+++ b/drivers/staging/dt3155v4l/dt3155v4l.c
@@ -293,7 +293,7 @@ static void
 dt3155_buf_release(struct videobuf_queue *q, struct videobuf_buffer *vb)
 {
 	if (vb->state == VIDEOBUF_ACTIVE)
-		videobuf_waiton(vb, 0, 0); /* FIXME: cannot be interrupted */
+		videobuf_waiton(q, vb, 0, 0); /* FIXME: cannot be interrupted */
 	videobuf_dma_contig_free(q, vb);
 	vb->state = VIDEOBUF_NEEDS_INIT;
 }
@@ -440,7 +440,7 @@ dt3155_open(struct file *filp)
 		videobuf_queue_dma_contig_init(pd->vidq, &vbq_ops,
 				&pd->pdev->dev, &pd->lock,
 				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-				sizeof(struct videobuf_buffer), pd);
+				sizeof(struct videobuf_buffer), pd, NULL);
 		/* disable all irqs, clear all irq flags */
 		iowrite32(FLD_START | FLD_END_EVEN | FLD_END_ODD,
 						pd->regs + INT_CSR);
@@ -494,7 +494,7 @@ dt3155_release(struct file *filp)
 		tmp = pd->curr_buf;
 		spin_unlock_irqrestore(&pd->lock, flags);
 		if (tmp)
-			videobuf_waiton(tmp, 0, 1); /* block, interruptible */
+			videobuf_waiton(pd->vidq, tmp, 0, 1); /* block, interruptible */
 		dt3155_stop_acq(pd);
 		videobuf_stop(pd->vidq);
 		pd->acq_fp = NULL;
@@ -603,7 +603,7 @@ dt3155_ioc_streamoff(struct file *filp, void *p, enum v4l2_buf_type type)
 	tmp = pd->curr_buf;
 	spin_unlock_irqrestore(&pd->lock, flags);
 	if (tmp)
-		videobuf_waiton(tmp, 0, 1); /* block, interruptible */
+		videobuf_waiton(pd->vidq, tmp, 0, 1); /* block, interruptible */
 	return ret;
 }
 
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index 9a4194557e4a..1d3835fc26be 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -180,7 +180,8 @@ static inline void videobuf_queue_unlock(struct videobuf_queue *q)
 		mutex_unlock(&q->vb_lock);
 }
 
-int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr);
+int videobuf_waiton(struct videobuf_queue *q, struct videobuf_buffer *vb,
+		int non_blocking, int intr);
 int videobuf_iolock(struct videobuf_queue *q, struct videobuf_buffer *vb,
 		struct v4l2_framebuffer *fbuf);
 
-- 
cgit v1.2.3


From 79c6ff93c74e793ccceb464ee3698478c812ce79 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 27 Aug 2010 13:41:44 -0300
Subject: V4L/DVB: V4L2: add a generic function to find the nearest discrete
 format to the required one

Many video drivers implement a fixed set of frame formats and thus face a task
of finding the best match for a user-requested format. Implementing this in a
generic function has also an advantage, that different drivers with similar
supported format sets will select the same format for the user, which improves
consistency across drivers.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c | 24 ++++++++++++++++++++++++
 include/linux/videodev2.h         |  8 ++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 8ee1179be926..31ae1a138613 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -676,3 +676,27 @@ int v4l_fill_dv_preset_info(u32 preset, struct v4l2_dv_enum_preset *info)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(v4l_fill_dv_preset_info);
+
+struct v4l2_frmsize_discrete *v4l2_find_nearest_format(struct v4l2_discrete_probe *probe,
+						       s32 width, s32 height)
+{
+	int i;
+	u32 error, min_error = UINT_MAX;
+	struct v4l2_frmsize_discrete *size, *best = NULL;
+
+	if (!probe)
+		return best;
+
+	for (i = 0, size = probe->sizes; i < probe->num_sizes; i++, size++) {
+		error = abs(size->width - width) + abs(size->height - height);
+		if (error < min_error) {
+			min_error = error;
+			best = size;
+		}
+		if (!error)
+			break;
+	}
+
+	return best;
+}
+EXPORT_SYMBOL_GPL(v4l2_find_nearest_format);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b06479f63271..957d5b093847 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -397,6 +397,14 @@ struct v4l2_frmsize_discrete {
 	__u32			height;		/* Frame height [pixel] */
 };
 
+struct v4l2_discrete_probe {
+	const struct v4l2_frmsize_discrete	*sizes;
+	int					num_sizes;
+};
+
+struct v4l2_frmsize_discrete *v4l2_find_nearest_format(struct v4l2_discrete_probe *probe,
+						       s32 width, s32 height);
+
 struct v4l2_frmsize_stepwise {
 	__u32			min_width;	/* Minimum frame width [pixel] */
 	__u32			max_width;	/* Maximum frame width [pixel] */
-- 
cgit v1.2.3


From 57bee29d6e8cf721864fa47a18366bee5ff24f21 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 17 Aug 2010 14:29:51 -0300
Subject: V4L/DVB: soc-camera: allow only one video queue per device

Multiple user-space application instances can open the same video device, but
it only makes sense for one of them to manage the videobuffer queue and set
video format of the device. Restrict soc-camera respectively.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mx1_camera.c           |   8 +-
 drivers/media/video/mx3_camera.c           |   6 +-
 drivers/media/video/pxa_camera.c           |   8 +-
 drivers/media/video/sh_mobile_ceu_camera.c |   8 +-
 drivers/media/video/soc_camera.c           | 178 ++++++++++++++---------------
 include/media/soc_camera.h                 |   9 +-
 6 files changed, 107 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index e8a5ffcb6f06..adc72c4a9f08 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -638,7 +638,7 @@ static int mx1_camera_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mx1_camera_reqbufs(struct soc_camera_file *icf,
+static int mx1_camera_reqbufs(struct soc_camera_device *icd,
 			      struct v4l2_requestbuffers *p)
 {
 	int i;
@@ -650,7 +650,7 @@ static int mx1_camera_reqbufs(struct soc_camera_file *icf,
 	 * it hadn't triggered
 	 */
 	for (i = 0; i < p->count; i++) {
-		struct mx1_buffer *buf = container_of(icf->vb_vidq.bufs[i],
+		struct mx1_buffer *buf = container_of(icd->vb_vidq.bufs[i],
 						      struct mx1_buffer, vb);
 		buf->inwork = 0;
 		INIT_LIST_HEAD(&buf->vb.queue);
@@ -661,10 +661,10 @@ static int mx1_camera_reqbufs(struct soc_camera_file *icf,
 
 static unsigned int mx1_camera_poll(struct file *file, poll_table *pt)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 	struct mx1_buffer *buf;
 
-	buf = list_entry(icf->vb_vidq.stream.next, struct mx1_buffer,
+	buf = list_entry(icd->vb_vidq.stream.next, struct mx1_buffer,
 			 vb.stream);
 
 	poll_wait(file, &buf->vb.done, pt);
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index a13076969e5a..d020388d29ca 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -976,7 +976,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mx3_camera_reqbufs(struct soc_camera_file *icf,
+static int mx3_camera_reqbufs(struct soc_camera_device *icd,
 			      struct v4l2_requestbuffers *p)
 {
 	return 0;
@@ -984,9 +984,9 @@ static int mx3_camera_reqbufs(struct soc_camera_file *icf,
 
 static unsigned int mx3_camera_poll(struct file *file, poll_table *pt)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 
-	return videobuf_poll_stream(file, &icf->vb_vidq, pt);
+	return videobuf_poll_stream(file, &icd->vb_vidq, pt);
 }
 
 static int mx3_camera_querycap(struct soc_camera_host *ici,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 109ba99ae121..7ffa525019d4 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1539,7 +1539,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int pxa_camera_reqbufs(struct soc_camera_file *icf,
+static int pxa_camera_reqbufs(struct soc_camera_device *icd,
 			      struct v4l2_requestbuffers *p)
 {
 	int i;
@@ -1551,7 +1551,7 @@ static int pxa_camera_reqbufs(struct soc_camera_file *icf,
 	 * it hadn't triggered
 	 */
 	for (i = 0; i < p->count; i++) {
-		struct pxa_buffer *buf = container_of(icf->vb_vidq.bufs[i],
+		struct pxa_buffer *buf = container_of(icd->vb_vidq.bufs[i],
 						      struct pxa_buffer, vb);
 		buf->inwork = 0;
 		INIT_LIST_HEAD(&buf->vb.queue);
@@ -1562,10 +1562,10 @@ static int pxa_camera_reqbufs(struct soc_camera_file *icf,
 
 static unsigned int pxa_camera_poll(struct file *file, poll_table *pt)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 	struct pxa_buffer *buf;
 
-	buf = list_entry(icf->vb_vidq.stream.next, struct pxa_buffer,
+	buf = list_entry(icd->vb_vidq.stream.next, struct pxa_buffer,
 			 vb.stream);
 
 	poll_wait(file, &buf->vb.done, pt);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 39211628d427..cbc997d591bf 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -1726,7 +1726,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int sh_mobile_ceu_reqbufs(struct soc_camera_file *icf,
+static int sh_mobile_ceu_reqbufs(struct soc_camera_device *icd,
 				 struct v4l2_requestbuffers *p)
 {
 	int i;
@@ -1740,7 +1740,7 @@ static int sh_mobile_ceu_reqbufs(struct soc_camera_file *icf,
 	for (i = 0; i < p->count; i++) {
 		struct sh_mobile_ceu_buffer *buf;
 
-		buf = container_of(icf->vb_vidq.bufs[i],
+		buf = container_of(icd->vb_vidq.bufs[i],
 				   struct sh_mobile_ceu_buffer, vb);
 		INIT_LIST_HEAD(&buf->vb.queue);
 	}
@@ -1750,10 +1750,10 @@ static int sh_mobile_ceu_reqbufs(struct soc_camera_file *icf,
 
 static unsigned int sh_mobile_ceu_poll(struct file *file, poll_table *pt)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 	struct sh_mobile_ceu_buffer *buf;
 
-	buf = list_entry(icf->vb_vidq.stream.next,
+	buf = list_entry(icd->vb_vidq.stream.next,
 			 struct sh_mobile_ceu_buffer, vb.stream);
 
 	poll_wait(file, &buf->vb.done, pt);
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index a55d6dc4c6bd..6876fdcbf8d7 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -92,8 +92,7 @@ EXPORT_SYMBOL(soc_camera_apply_sensor_flags);
 static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 				      struct v4l2_format *f)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
@@ -105,8 +104,7 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 static int soc_camera_enum_input(struct file *file, void *priv,
 				 struct v4l2_input *inp)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	int ret = 0;
 
 	if (inp->index != 0)
@@ -141,8 +139,7 @@ static int soc_camera_s_input(struct file *file, void *priv, unsigned int i)
 
 static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, core, s_std, *a);
@@ -152,47 +149,59 @@ static int soc_camera_reqbufs(struct file *file, void *priv,
 			      struct v4l2_requestbuffers *p)
 {
 	int ret;
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
-	ret = videobuf_reqbufs(&icf->vb_vidq, p);
+	if (icd->streamer && icd->streamer != file)
+		return -EBUSY;
+
+	ret = videobuf_reqbufs(&icd->vb_vidq, p);
 	if (ret < 0)
 		return ret;
 
-	return ici->ops->reqbufs(icf, p);
+	ret = ici->ops->reqbufs(icd, p);
+	if (!ret && !icd->streamer)
+		icd->streamer = file;
+
+	return ret;
 }
 
 static int soc_camera_querybuf(struct file *file, void *priv,
 			       struct v4l2_buffer *p)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 
 	WARN_ON(priv != file->private_data);
 
-	return videobuf_querybuf(&icf->vb_vidq, p);
+	return videobuf_querybuf(&icd->vb_vidq, p);
 }
 
 static int soc_camera_qbuf(struct file *file, void *priv,
 			   struct v4l2_buffer *p)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 
 	WARN_ON(priv != file->private_data);
 
-	return videobuf_qbuf(&icf->vb_vidq, p);
+	if (icd->streamer != file)
+		return -EBUSY;
+
+	return videobuf_qbuf(&icd->vb_vidq, p);
 }
 
 static int soc_camera_dqbuf(struct file *file, void *priv,
 			    struct v4l2_buffer *p)
 {
-	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = file->private_data;
 
 	WARN_ON(priv != file->private_data);
 
-	return videobuf_dqbuf(&icf->vb_vidq, p, file->f_flags & O_NONBLOCK);
+	if (icd->streamer != file)
+		return -EBUSY;
+
+	return videobuf_dqbuf(&icd->vb_vidq, p, file->f_flags & O_NONBLOCK);
 }
 
 /* Always entered with .video_lock held */
@@ -280,10 +289,9 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 	((x) >> 24) & 0xff
 
 /* Called with .vb_lock held, or from the first open(2), see comment there */
-static int soc_camera_set_fmt(struct soc_camera_file *icf,
+static int soc_camera_set_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	int ret;
@@ -309,7 +317,7 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 	icd->user_width		= pix->width;
 	icd->user_height	= pix->height;
 	icd->colorspace		= pix->colorspace;
-	icf->vb_vidq.field	=
+	icd->vb_vidq.field	=
 		icd->field	= pix->field;
 
 	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
@@ -331,7 +339,6 @@ static int soc_camera_open(struct file *file)
 						     dev);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct soc_camera_host *ici;
-	struct soc_camera_file *icf;
 	int ret;
 
 	if (!icd->ops)
@@ -340,14 +347,9 @@ static int soc_camera_open(struct file *file)
 
 	ici = to_soc_camera_host(icd->dev.parent);
 
-	icf = vmalloc(sizeof(*icf));
-	if (!icf)
-		return -ENOMEM;
-
 	if (!try_module_get(ici->ops->owner)) {
 		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
-		ret = -EINVAL;
-		goto emgi;
+		return -EINVAL;
 	}
 
 	/*
@@ -356,7 +358,6 @@ static int soc_camera_open(struct file *file)
 	 */
 	mutex_lock(&icd->video_lock);
 
-	icf->icd = icd;
 	icd->use_count++;
 
 	/* Now we really have to activate the camera */
@@ -401,15 +402,15 @@ static int soc_camera_open(struct file *file)
 		 * apart from someone else calling open() simultaneously, but
 		 * .video_lock is protecting us against it.
 		 */
-		ret = soc_camera_set_fmt(icf, &f);
+		ret = soc_camera_set_fmt(icd, &f);
 		if (ret < 0)
 			goto esfmt;
 	}
 
-	file->private_data = icf;
+	file->private_data = icd;
 	dev_dbg(&icd->dev, "camera device open\n");
 
-	ici->ops->init_videobuf(&icf->vb_vidq, icd);
+	ici->ops->init_videobuf(&icd->vb_vidq, icd);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -430,15 +431,13 @@ epower:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
-emgi:
-	vfree(icf);
+
 	return ret;
 }
 
 static int soc_camera_close(struct file *file)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	mutex_lock(&icd->video_lock);
@@ -455,12 +454,13 @@ static int soc_camera_close(struct file *file)
 			icl->power(icd->pdev, 0);
 	}
 
+	if (icd->streamer == file)
+		icd->streamer = NULL;
+
 	mutex_unlock(&icd->video_lock);
 
 	module_put(ici->ops->owner);
 
-	vfree(icf);
-
 	dev_dbg(&icd->dev, "camera device close\n");
 
 	return 0;
@@ -469,8 +469,7 @@ static int soc_camera_close(struct file *file)
 static ssize_t soc_camera_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	int err = -EINVAL;
 
 	dev_err(&icd->dev, "camera device read not implemented\n");
@@ -480,13 +479,15 @@ static ssize_t soc_camera_read(struct file *file, char __user *buf,
 
 static int soc_camera_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	int err;
 
 	dev_dbg(&icd->dev, "mmap called, vma=0x%08lx\n", (unsigned long)vma);
 
-	err = videobuf_mmap_mapper(&icf->vb_vidq, vma);
+	if (icd->streamer != file)
+		return -EBUSY;
+
+	err = videobuf_mmap_mapper(&icd->vb_vidq, vma);
 
 	dev_dbg(&icd->dev, "vma start=0x%08lx, size=%ld, ret=%d\n",
 		(unsigned long)vma->vm_start,
@@ -498,11 +499,13 @@ static int soc_camera_mmap(struct file *file, struct vm_area_struct *vma)
 
 static unsigned int soc_camera_poll(struct file *file, poll_table *pt)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (list_empty(&icf->vb_vidq.stream)) {
+	if (icd->streamer != file)
+		return -EBUSY;
+
+	if (list_empty(&icd->vb_vidq.stream)) {
 		dev_err(&icd->dev, "Trying to poll with no queued buffers!\n");
 		return POLLERR;
 	}
@@ -523,24 +526,29 @@ static struct v4l2_file_operations soc_camera_fops = {
 static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 				    struct v4l2_format *f)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	int ret;
 
 	WARN_ON(priv != file->private_data);
 
-	mutex_lock(&icf->vb_vidq.vb_lock);
+	if (icd->streamer && icd->streamer != file)
+		return -EBUSY;
 
-	if (icf->vb_vidq.bufs[0]) {
+	mutex_lock(&icd->vb_vidq.vb_lock);
+
+	if (icd->vb_vidq.bufs[0]) {
 		dev_err(&icd->dev, "S_FMT denied: queue initialised\n");
 		ret = -EBUSY;
 		goto unlock;
 	}
 
-	ret = soc_camera_set_fmt(icf, f);
+	ret = soc_camera_set_fmt(icd, f);
+
+	if (!ret && !icd->streamer)
+		icd->streamer = file;
 
 unlock:
-	mutex_unlock(&icf->vb_vidq.vb_lock);
+	mutex_unlock(&icd->vb_vidq.vb_lock);
 
 	return ret;
 }
@@ -548,8 +556,7 @@ unlock:
 static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
 				       struct v4l2_fmtdesc *f)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	const struct soc_mbus_pixelfmt *format;
 
 	WARN_ON(priv != file->private_data);
@@ -568,15 +575,14 @@ static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
 static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 				    struct v4l2_format *f)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	WARN_ON(priv != file->private_data);
 
 	pix->width		= icd->user_width;
 	pix->height		= icd->user_height;
-	pix->field		= icf->vb_vidq.field;
+	pix->field		= icd->vb_vidq.field;
 	pix->pixelformat	= icd->current_fmt->host_fmt->fourcc;
 	pix->bytesperline	= soc_mbus_bytes_per_line(pix->width,
 						icd->current_fmt->host_fmt);
@@ -592,8 +598,7 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 static int soc_camera_querycap(struct file *file, void  *priv,
 			       struct v4l2_capability *cap)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
@@ -605,8 +610,7 @@ static int soc_camera_querycap(struct file *file, void  *priv,
 static int soc_camera_streamon(struct file *file, void *priv,
 			       enum v4l2_buf_type i)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
@@ -615,12 +619,15 @@ static int soc_camera_streamon(struct file *file, void *priv,
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	if (icd->streamer != file)
+		return -EBUSY;
+
 	mutex_lock(&icd->video_lock);
 
 	v4l2_subdev_call(sd, video, s_stream, 1);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
-	ret = videobuf_streamon(&icf->vb_vidq);
+	ret = videobuf_streamon(&icd->vb_vidq);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -630,8 +637,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 static int soc_camera_streamoff(struct file *file, void *priv,
 				enum v4l2_buf_type i)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	WARN_ON(priv != file->private_data);
@@ -639,13 +645,16 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	if (icd->streamer != file)
+		return -EBUSY;
+
 	mutex_lock(&icd->video_lock);
 
 	/*
 	 * This calls buf_release from host driver's videobuf_queue_ops for all
 	 * remaining buffers. When the last buffer is freed, stop capture
 	 */
-	videobuf_streamoff(&icf->vb_vidq);
+	videobuf_streamoff(&icd->vb_vidq);
 
 	v4l2_subdev_call(sd, video, s_stream, 0);
 
@@ -657,8 +666,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 static int soc_camera_queryctrl(struct file *file, void *priv,
 				struct v4l2_queryctrl *qc)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int i;
 
@@ -689,8 +697,7 @@ static int soc_camera_queryctrl(struct file *file, void *priv,
 static int soc_camera_g_ctrl(struct file *file, void *priv,
 			     struct v4l2_control *ctrl)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
@@ -709,8 +716,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 static int soc_camera_s_ctrl(struct file *file, void *priv,
 			     struct v4l2_control *ctrl)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
@@ -729,8 +735,7 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 static int soc_camera_cropcap(struct file *file, void *fh,
 			      struct v4l2_cropcap *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	return ici->ops->cropcap(icd, a);
@@ -739,14 +744,13 @@ static int soc_camera_cropcap(struct file *file, void *fh,
 static int soc_camera_g_crop(struct file *file, void *fh,
 			     struct v4l2_crop *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int ret;
 
-	mutex_lock(&icf->vb_vidq.vb_lock);
+	mutex_lock(&icd->vb_vidq.vb_lock);
 	ret = ici->ops->get_crop(icd, a);
-	mutex_unlock(&icf->vb_vidq.vb_lock);
+	mutex_unlock(&icd->vb_vidq.vb_lock);
 
 	return ret;
 }
@@ -759,8 +763,7 @@ static int soc_camera_g_crop(struct file *file, void *fh,
 static int soc_camera_s_crop(struct file *file, void *fh,
 			     struct v4l2_crop *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct v4l2_rect *rect = &a->c;
 	struct v4l2_crop current_crop;
@@ -773,7 +776,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 		rect->width, rect->height, rect->left, rect->top);
 
 	/* Cropping is allowed during a running capture, guard consistency */
-	mutex_lock(&icf->vb_vidq.vb_lock);
+	mutex_lock(&icd->vb_vidq.vb_lock);
 
 	/* If get_crop fails, we'll let host and / or client drivers decide */
 	ret = ici->ops->get_crop(icd, &current_crop);
@@ -782,7 +785,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	if (ret < 0) {
 		dev_err(&icd->dev,
 			"S_CROP denied: getting current crop failed\n");
-	} else if (icf->vb_vidq.bufs[0] &&
+	} else if (icd->vb_vidq.bufs[0] &&
 		   (a->c.width != current_crop.c.width ||
 		    a->c.height != current_crop.c.height)) {
 		dev_err(&icd->dev,
@@ -792,7 +795,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 		ret = ici->ops->set_crop(icd, a);
 	}
 
-	mutex_unlock(&icf->vb_vidq.vb_lock);
+	mutex_unlock(&icd->vb_vidq.vb_lock);
 
 	return ret;
 }
@@ -800,8 +803,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 static int soc_camera_g_parm(struct file *file, void *fh,
 			     struct v4l2_streamparm *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	if (ici->ops->get_parm)
@@ -813,8 +815,7 @@ static int soc_camera_g_parm(struct file *file, void *fh,
 static int soc_camera_s_parm(struct file *file, void *fh,
 			     struct v4l2_streamparm *a)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	if (ici->ops->set_parm)
@@ -826,8 +827,7 @@ static int soc_camera_s_parm(struct file *file, void *fh,
 static int soc_camera_g_chip_ident(struct file *file, void *fh,
 				   struct v4l2_dbg_chip_ident *id)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, core, g_chip_ident, id);
@@ -837,8 +837,7 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 static int soc_camera_g_register(struct file *file, void *fh,
 				 struct v4l2_dbg_register *reg)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, core, g_register, reg);
@@ -847,8 +846,7 @@ static int soc_camera_g_register(struct file *file, void *fh,
 static int soc_camera_s_register(struct file *file, void *fh,
 				 struct v4l2_dbg_register *reg)
 {
-	struct soc_camera_file *icf = file->private_data;
-	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_device *icd = file->private_data;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, core, s_register, reg);
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2ce957301f77..86e3631764ef 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -21,6 +21,8 @@
 
 extern struct bus_type soc_camera_bus_type;
 
+struct file;
+
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
@@ -41,10 +43,7 @@ struct soc_camera_device {
 	/* soc_camera.c private count. Only accessed with .video_lock held */
 	int use_count;
 	struct mutex video_lock;	/* Protects device data */
-};
-
-struct soc_camera_file {
-	struct soc_camera_device *icd;
+	struct file *streamer;		/* stream owner */
 	struct videobuf_queue vb_vidq;
 };
 
@@ -79,7 +78,7 @@ struct soc_camera_host_ops {
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	void (*init_videobuf)(struct videobuf_queue *,
 			      struct soc_camera_device *);
-	int (*reqbufs)(struct soc_camera_file *, struct v4l2_requestbuffers *);
+	int (*reqbufs)(struct soc_camera_device *, struct v4l2_requestbuffers *);
 	int (*querycap)(struct soc_camera_host *, struct v4l2_capability *);
 	int (*set_bus_param)(struct soc_camera_device *, __u32);
 	int (*get_ctrl)(struct soc_camera_device *, struct v4l2_control *);
-- 
cgit v1.2.3


From 3fd8e647eaa76a1eb5bdd0fcecf49364a089b71d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 30 Sep 2010 09:29:37 -0300
Subject: V4L/DVB: v4l2-common: Move v4l2_find_nearest_format from videodev2.h
 to v4l2-common.h

This function is an internal API and belongs in v4l2-common.h, not
videodev.h. The return pointer and probe argument should be const as well.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c |  7 ++++---
 include/linux/videodev2.h         |  8 --------
 include/media/v4l2-common.h       | 10 ++++++++++
 3 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 31ae1a138613..cef80dab1a0d 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -677,12 +677,13 @@ int v4l_fill_dv_preset_info(u32 preset, struct v4l2_dv_enum_preset *info)
 }
 EXPORT_SYMBOL_GPL(v4l_fill_dv_preset_info);
 
-struct v4l2_frmsize_discrete *v4l2_find_nearest_format(struct v4l2_discrete_probe *probe,
-						       s32 width, s32 height)
+const struct v4l2_frmsize_discrete *v4l2_find_nearest_format(
+		const struct v4l2_discrete_probe *probe,
+		s32 width, s32 height)
 {
 	int i;
 	u32 error, min_error = UINT_MAX;
-	struct v4l2_frmsize_discrete *size, *best = NULL;
+	const struct v4l2_frmsize_discrete *size, *best = NULL;
 
 	if (!probe)
 		return best;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 957d5b093847..b06479f63271 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -397,14 +397,6 @@ struct v4l2_frmsize_discrete {
 	__u32			height;		/* Frame height [pixel] */
 };
 
-struct v4l2_discrete_probe {
-	const struct v4l2_frmsize_discrete	*sizes;
-	int					num_sizes;
-};
-
-struct v4l2_frmsize_discrete *v4l2_find_nearest_format(struct v4l2_discrete_probe *probe,
-						       s32 width, s32 height);
-
 struct v4l2_frmsize_stepwise {
 	__u32			min_width;	/* Minimum frame width [pixel] */
 	__u32			max_width;	/* Maximum frame width [pixel] */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 98b32645e5a7..41dd480e45f1 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -232,4 +232,14 @@ void v4l_bound_align_image(unsigned int *w, unsigned int wmin,
 			   unsigned int hmax, unsigned int halign,
 			   unsigned int salign);
 int v4l_fill_dv_preset_info(u32 preset, struct v4l2_dv_enum_preset *info);
+
+struct v4l2_discrete_probe {
+	const struct v4l2_frmsize_discrete	*sizes;
+	int					num_sizes;
+};
+
+const struct v4l2_frmsize_discrete *v4l2_find_nearest_format(
+		const struct v4l2_discrete_probe *probe,
+		s32 width, s32 height);
+
 #endif /* V4L2_COMMON_H_ */
-- 
cgit v1.2.3


From 22c00854f3db563427527a8f71c20bd3943b13e0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 30 Sep 2010 16:55:07 -0300
Subject: [media] lirc: Make struct file_operations pointer const

struct file_operations was made const in the drivers, but not in struct
lirc_driver:

drivers/staging/lirc/lirc_it87.c:365: warning: initialization discards qualifiers from pointer target type
drivers/staging/lirc/lirc_parallel.c:571: warning: initialization discards qualifiers from pointer target type
drivers/staging/lirc/lirc_serial.c:1073: warning: initialization discards qualifiers from pointer target type
drivers/staging/lirc/lirc_sir.c:482: warning: initialization discards qualifiers from pointer target type
drivers/staging/lirc/lirc_zilog.c:1284: warning: assignment discards qualifiers from pointer target type

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/lirc_dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h
index b1f60663cb39..71a896e7b208 100644
--- a/include/media/lirc_dev.h
+++ b/include/media/lirc_dev.h
@@ -139,7 +139,7 @@ struct lirc_driver {
 	struct lirc_buffer *rbuf;
 	int (*set_use_inc) (void *data);
 	void (*set_use_dec) (void *data);
-	struct file_operations *fops;
+	const struct file_operations *fops;
 	struct device *dev;
 	struct module *owner;
 };
-- 
cgit v1.2.3


From 68617b31b39fdfd7920d55710994755e7259b16b Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Thu, 7 Oct 2010 21:56:48 -0300
Subject: [media] TerraTec remote controller keytable

TerraTec slim remote, 7 rows, 4 columns. Uses NEC extended 0x02bd.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile           |  1 +
 drivers/media/IR/keymaps/rc-terratec-slim.c | 79 +++++++++++++++++++++++++++++
 include/media/rc-map.h                      |  1 +
 3 files changed, 81 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-terratec-slim.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index f755b21eef1a..1f85989d5af3 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-streamzap.o \
 			rc-tbs-nec.o \
 			rc-terratec-cinergy-xs.o \
+			rc-terratec-slim.o \
 			rc-tevii-nec.o \
 			rc-tt-1500.o \
 			rc-videomate-s350.o \
diff --git a/drivers/media/IR/keymaps/rc-terratec-slim.c b/drivers/media/IR/keymaps/rc-terratec-slim.c
new file mode 100644
index 000000000000..70b08fe64079
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-terratec-slim.c
@@ -0,0 +1,79 @@
+/*
+ * TerraTec remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* TerraTec slim remote, 7 rows, 4 columns. */
+/* Uses NEC extended 0x02bd. */
+static struct ir_scancode terratec_slim[] = {
+	{ 0x02bd00, KEY_1 },
+	{ 0x02bd01, KEY_2 },
+	{ 0x02bd02, KEY_3 },
+	{ 0x02bd03, KEY_4 },
+	{ 0x02bd04, KEY_5 },
+	{ 0x02bd05, KEY_6 },
+	{ 0x02bd06, KEY_7 },
+	{ 0x02bd07, KEY_8 },
+	{ 0x02bd08, KEY_9 },
+	{ 0x02bd09, KEY_0 },
+	{ 0x02bd0a, KEY_MUTE },
+	{ 0x02bd0b, KEY_ZOOM },            /* symbol: PIP or zoom ? */
+	{ 0x02bd0e, KEY_VOLUMEDOWN },
+	{ 0x02bd0f, KEY_PLAYPAUSE },
+	{ 0x02bd10, KEY_RIGHT },
+	{ 0x02bd11, KEY_LEFT },
+	{ 0x02bd12, KEY_UP },
+	{ 0x02bd13, KEY_DOWN },
+	{ 0x02bd15, KEY_OK },
+	{ 0x02bd16, KEY_STOP },
+	{ 0x02bd17, KEY_CAMERA },          /* snapshot */
+	{ 0x02bd18, KEY_CHANNELUP },
+	{ 0x02bd19, KEY_RECORD },
+	{ 0x02bd1a, KEY_CHANNELDOWN },
+	{ 0x02bd1c, KEY_ESC },
+	{ 0x02bd1f, KEY_VOLUMEUP },
+	{ 0x02bd44, KEY_EPG },
+	{ 0x02bd45, KEY_POWER2 },          /* [red power button] */
+};
+
+static struct rc_keymap terratec_slim_map = {
+	.map = {
+		.scan    = terratec_slim,
+		.size    = ARRAY_SIZE(terratec_slim),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_TERRATEC_SLIM,
+	}
+};
+
+static int __init init_rc_map_terratec_slim(void)
+{
+	return ir_register_map(&terratec_slim_map);
+}
+
+static void __exit exit_rc_map_terratec_slim(void)
+{
+	ir_unregister_map(&terratec_slim_map);
+}
+
+module_init(init_rc_map_terratec_slim)
+module_exit(exit_rc_map_terratec_slim)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 57281b1008e4..5ba78f588bc6 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -122,6 +122,7 @@ void rc_map_init(void);
 #define RC_MAP_STREAMZAP                 "rc-streamzap"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
 #define RC_MAP_TERRATEC_CINERGY_XS       "rc-terratec-cinergy-xs"
+#define RC_MAP_TERRATEC_SLIM             "rc-terratec-slim"
 #define RC_MAP_TEVII_NEC                 "rc-tevii-nec"
 #define RC_MAP_TT_1500                   "rc-tt-1500"
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
-- 
cgit v1.2.3


From 76ba9c4d50c66ae43f2694c6ad652ddd4c3be000 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 06:56:46 -0300
Subject: [media] MSI DIGIVOX mini III remote controller keytable

MSI DIGIVOX mini III remote controller. Uses NEC extended 0x61d6.
This remote seems to be same as rc-kworld-315u.c. Anyhow, add new remote
since rc-kworld-315u.c lacks NEC extended address byte.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile         |  1 +
 drivers/media/IR/keymaps/rc-msi-digivox.c | 85 +++++++++++++++++++++++++++++++
 include/media/rc-map.h                    |  1 +
 3 files changed, 87 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-msi-digivox.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 1f85989d5af3..2ad99ba21c8e 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-lirc.o \
 			rc-lme2510.o \
 			rc-manli.o \
+			rc-msi-digivox.o \
 			rc-msi-tvanywhere.o \
 			rc-msi-tvanywhere-plus.o \
 			rc-nebula.o \
diff --git a/drivers/media/IR/keymaps/rc-msi-digivox.c b/drivers/media/IR/keymaps/rc-msi-digivox.c
new file mode 100644
index 000000000000..3ba314052689
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-msi-digivox.c
@@ -0,0 +1,85 @@
+/*
+ * MSI DIGIVOX mini III remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* MSI DIGIVOX mini III */
+/* Uses NEC extended 0x61d6. */
+/* This remote seems to be same as rc-kworld-315u.c. Anyhow, add new remote
+   since rc-kworld-315u.c lacks NEC extended address byte. */
+static struct ir_scancode msi_digivox[] = {
+	{ 0x61d601, KEY_VIDEO },           /* Source */
+	{ 0x61d602, KEY_3 },
+	{ 0x61d603, KEY_POWER2 },          /* ShutDown */
+	{ 0x61d604, KEY_1 },
+	{ 0x61d605, KEY_5 },
+	{ 0x61d606, KEY_6 },
+	{ 0x61d607, KEY_CHANNELDOWN },     /* CH- */
+	{ 0x61d608, KEY_2 },
+	{ 0x61d609, KEY_CHANNELUP },       /* CH+ */
+	{ 0x61d60a, KEY_9 },
+	{ 0x61d60b, KEY_ZOOM },            /* Zoom */
+	{ 0x61d60c, KEY_7 },
+	{ 0x61d60d, KEY_8 },
+	{ 0x61d60e, KEY_VOLUMEUP },        /* Vol+ */
+	{ 0x61d60f, KEY_4 },
+	{ 0x61d610, KEY_ESC },             /* [back up arrow] */
+	{ 0x61d611, KEY_0 },
+	{ 0x61d612, KEY_OK },              /* [enter arrow] */
+	{ 0x61d613, KEY_VOLUMEDOWN },      /* Vol- */
+	{ 0x61d614, KEY_RECORD },          /* Rec */
+	{ 0x61d615, KEY_STOP },            /* Stop */
+	{ 0x61d616, KEY_PLAY },            /* Play */
+	{ 0x61d617, KEY_MUTE },            /* Mute */
+	{ 0x61d618, KEY_UP },
+	{ 0x61d619, KEY_DOWN },
+	{ 0x61d61a, KEY_LEFT },
+	{ 0x61d61b, KEY_RIGHT },
+	{ 0x61d61c, KEY_RED },
+	{ 0x61d61d, KEY_GREEN },
+	{ 0x61d61e, KEY_YELLOW },
+	{ 0x61d61f, KEY_BLUE },
+	{ 0x61d643, KEY_POWER },           /* [red power button] */
+};
+
+static struct rc_keymap msi_digivox_map = {
+	.map = {
+		.scan    = msi_digivox,
+		.size    = ARRAY_SIZE(msi_digivox),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_MSI_DIGIVOX,
+	}
+};
+
+static int __init init_rc_map_msi_digivox(void)
+{
+	return ir_register_map(&msi_digivox_map);
+}
+
+static void __exit exit_rc_map_msi_digivox(void)
+{
+	ir_unregister_map(&msi_digivox_map);
+}
+
+module_init(init_rc_map_msi_digivox)
+module_exit(exit_rc_map_msi_digivox)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 5ba78f588bc6..cc636a4349ef 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -98,6 +98,7 @@ void rc_map_init(void);
 #define RC_MAP_LIRC                      "rc-lirc"
 #define RC_MAP_LME2510                   "rc-lme2510"
 #define RC_MAP_MANLI                     "rc-manli"
+#define RC_MAP_MSI_DIGIVOX               "rc-msi-digivox"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
 #define RC_MAP_NEBULA                    "rc-nebula"
-- 
cgit v1.2.3


From a3d34e6ac362dafdb7aa0c3438b105c7822cdcd1 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 07:36:54 -0300
Subject: [media] TrekStor DVB-T USB Stick remote controller

Imported from af9015.h.
Initial keytable was from Marc Schneider <macke@macke.org>

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile      |  1 +
 drivers/media/IR/keymaps/rc-trekstor.c | 80 ++++++++++++++++++++++++++++++++++
 include/media/rc-map.h                 |  1 +
 3 files changed, 82 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-trekstor.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 2ad99ba21c8e..2493e91fe38e 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-terratec-cinergy-xs.o \
 			rc-terratec-slim.o \
 			rc-tevii-nec.o \
+			rc-trekstor.o \
 			rc-tt-1500.o \
 			rc-videomate-s350.o \
 			rc-videomate-tv-pvr.o \
diff --git a/drivers/media/IR/keymaps/rc-trekstor.c b/drivers/media/IR/keymaps/rc-trekstor.c
new file mode 100644
index 000000000000..91092caca452
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-trekstor.c
@@ -0,0 +1,80 @@
+/*
+ * TrekStor remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* TrekStor DVB-T USB Stick remote controller. */
+/* Imported from af9015.h.
+   Initial keytable was from Marc Schneider <macke@macke.org> */
+static struct ir_scancode trekstor[] = {
+	{ 0x0084, KEY_0 },
+	{ 0x0085, KEY_MUTE },            /* Mute */
+	{ 0x0086, KEY_HOMEPAGE },        /* Home */
+	{ 0x0087, KEY_UP },              /* Up */
+	{ 0x0088, KEY_OK },              /* OK */
+	{ 0x0089, KEY_RIGHT },           /* Right */
+	{ 0x008a, KEY_FASTFORWARD },     /* Fast forward */
+	{ 0x008b, KEY_VOLUMEUP },        /* Volume + */
+	{ 0x008c, KEY_DOWN },            /* Down */
+	{ 0x008d, KEY_PLAY },            /* Play/Pause */
+	{ 0x008e, KEY_STOP },            /* Stop */
+	{ 0x008f, KEY_EPG },             /* Info/EPG */
+	{ 0x0090, KEY_7 },
+	{ 0x0091, KEY_4 },
+	{ 0x0092, KEY_1 },
+	{ 0x0093, KEY_CHANNELDOWN },     /* Channel - */
+	{ 0x0094, KEY_8 },
+	{ 0x0095, KEY_5 },
+	{ 0x0096, KEY_2 },
+	{ 0x0097, KEY_CHANNELUP },       /* Channel + */
+	{ 0x0098, KEY_9 },
+	{ 0x0099, KEY_6 },
+	{ 0x009a, KEY_3 },
+	{ 0x009b, KEY_VOLUMEDOWN },      /* Volume - */
+	{ 0x009c, KEY_TV },              /* TV */
+	{ 0x009d, KEY_RECORD },          /* Record */
+	{ 0x009e, KEY_REWIND },          /* Rewind */
+	{ 0x009f, KEY_LEFT },            /* Left */
+};
+
+static struct rc_keymap trekstor_map = {
+	.map = {
+		.scan    = trekstor,
+		.size    = ARRAY_SIZE(trekstor),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_TREKSTOR,
+	}
+};
+
+static int __init init_rc_map_trekstor(void)
+{
+	return ir_register_map(&trekstor_map);
+}
+
+static void __exit exit_rc_map_trekstor(void)
+{
+	ir_unregister_map(&trekstor_map);
+}
+
+module_init(init_rc_map_trekstor)
+module_exit(exit_rc_map_trekstor)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index cc636a4349ef..8251d201ea31 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -125,6 +125,7 @@ void rc_map_init(void);
 #define RC_MAP_TERRATEC_CINERGY_XS       "rc-terratec-cinergy-xs"
 #define RC_MAP_TERRATEC_SLIM             "rc-terratec-slim"
 #define RC_MAP_TEVII_NEC                 "rc-tevii-nec"
+#define RC_MAP_TREKSTOR                  "rc-trekstor"
 #define RC_MAP_TT_1500                   "rc-tt-1500"
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
 #define RC_MAP_VIDEOMATE_TV_PVR          "rc-videomate-tv-pvr"
-- 
cgit v1.2.3


From ff533801bd9fa91fc61053900fd40b10132427c0 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 08:24:00 -0300
Subject: [media] Digittrade DVB-T USB Stick remote controller keytable

Digittrade DVB-T USB Stick remote controller.
Imported from af9015.h. Initial keytable was from Alain Kalker <miki@dds.nl>

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile        |  1 +
 drivers/media/IR/keymaps/rc-digittrade.c | 82 ++++++++++++++++++++++++++++++++
 include/media/rc-map.h                   |  1 +
 3 files changed, 84 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-digittrade.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 2493e91fe38e..f016d5d99eaf 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-cinergy.o \
 			rc-dib0700-nec.o \
 			rc-dib0700-rc5.o \
+			rc-digittrade.o \
 			rc-dm1105-nec.o \
 			rc-dntv-live-dvb-t.o \
 			rc-dntv-live-dvbt-pro.o \
diff --git a/drivers/media/IR/keymaps/rc-digittrade.c b/drivers/media/IR/keymaps/rc-digittrade.c
new file mode 100644
index 000000000000..5dece78e19c5
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-digittrade.c
@@ -0,0 +1,82 @@
+/*
+ * Digittrade DVB-T USB Stick remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* Digittrade DVB-T USB Stick remote controller. */
+/* Imported from af9015.h.
+   Initial keytable was from Alain Kalker <miki@dds.nl> */
+
+/* Digittrade DVB-T USB Stick */
+static struct ir_scancode digittrade[] = {
+	{ 0x0000, KEY_9 },
+	{ 0x0001, KEY_EPG },             /* EPG */
+	{ 0x0002, KEY_VOLUMEDOWN },      /* Vol Dn */
+	{ 0x0003, KEY_TEXT },            /* TELETEXT */
+	{ 0x0004, KEY_8 },
+	{ 0x0005, KEY_MUTE },            /* MUTE */
+	{ 0x0006, KEY_POWER2 },          /* POWER */
+	{ 0x0009, KEY_ZOOM },            /* FULLSCREEN */
+	{ 0x000a, KEY_RECORD },          /* RECORD */
+	{ 0x000d, KEY_SUBTITLE },        /* SUBTITLE */
+	{ 0x000e, KEY_STOP },            /* STOP */
+	{ 0x0010, KEY_OK },              /* RETURN */
+	{ 0x0011, KEY_2 },
+	{ 0x0012, KEY_4 },
+	{ 0x0015, KEY_3 },
+	{ 0x0016, KEY_5 },
+	{ 0x0017, KEY_CHANNELDOWN },     /* Ch Dn */
+	{ 0x0019, KEY_CHANNELUP },       /* CH Up */
+	{ 0x001a, KEY_PAUSE },           /* PAUSE */
+	{ 0x001b, KEY_1 },
+	{ 0x001d, KEY_AUDIO },           /* DUAL SOUND */
+	{ 0x001e, KEY_PLAY },            /* PLAY */
+	{ 0x001f, KEY_CAMERA },          /* SNAPSHOT */
+	{ 0x0040, KEY_VOLUMEUP },        /* Vol Up */
+	{ 0x0048, KEY_7 },
+	{ 0x004c, KEY_6 },
+	{ 0x004d, KEY_PLAYPAUSE },       /* TIMESHIFT */
+	{ 0x0054, KEY_0 },
+};
+
+static struct rc_keymap digittrade_map = {
+	.map = {
+		.scan    = digittrade,
+		.size    = ARRAY_SIZE(digittrade),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_DIGITTRADE,
+	}
+};
+
+static int __init init_rc_map_digittrade(void)
+{
+	return ir_register_map(&digittrade_map);
+}
+
+static void __exit exit_rc_map_digittrade(void)
+{
+	ir_unregister_map(&digittrade_map);
+}
+
+module_init(init_rc_map_digittrade)
+module_exit(exit_rc_map_digittrade)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 8251d201ea31..2e06835da238 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -72,6 +72,7 @@ void rc_map_init(void);
 #define RC_MAP_CINERGY                   "rc-cinergy"
 #define RC_MAP_DIB0700_NEC_TABLE         "rc-dib0700-nec"
 #define RC_MAP_DIB0700_RC5_TABLE         "rc-dib0700-rc5"
+#define RC_MAP_DIGITTRADE                "rc-digittrade"
 #define RC_MAP_DM1105_NEC                "rc-dm1105-nec"
 #define RC_MAP_DNTV_LIVE_DVBT_PRO        "rc-dntv-live-dvbt-pro"
 #define RC_MAP_DNTV_LIVE_DVB_T           "rc-dntv-live-dvb-t"
-- 
cgit v1.2.3


From ae54a699c3d625ae2b4f87350ecb97ba034a656b Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 10:59:48 -0300
Subject: [media] AverMedia RM-KS remote controller keytable

Imported from af9015.h. Initial keytable was from
Jose Alberto Reguero <jareguero@telefonica.net> and
Felipe Morales Moreno <felipe.morales.moreno@gmail.com>

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile             |  1 +
 drivers/media/IR/keymaps/rc-avermedia-rm-ks.c | 79 +++++++++++++++++++++++++++
 include/media/rc-map.h                        |  1 +
 3 files changed, 81 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-avermedia-rm-ks.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index f016d5d99eaf..743a57f85ac3 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-avermedia-dvbt.o \
 			rc-avermedia-m135a.o \
 			rc-avermedia-m733a-rm-k6.o \
+			rc-avermedia-rm-ks.o \
 			rc-avertv-303.o \
 			rc-behold.o \
 			rc-behold-columbus.o \
diff --git a/drivers/media/IR/keymaps/rc-avermedia-rm-ks.c b/drivers/media/IR/keymaps/rc-avermedia-rm-ks.c
new file mode 100644
index 000000000000..9ee60906c861
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-avermedia-rm-ks.c
@@ -0,0 +1,79 @@
+/*
+ * AverMedia RM-KS remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* Initial keytable is from Jose Alberto Reguero <jareguero@telefonica.net>
+   and Felipe Morales Moreno <felipe.morales.moreno@gmail.com> */
+/* FIXME: mappings are not 100% correct? */
+static struct ir_scancode avermedia_rm_ks[] = {
+	{ 0x0501, KEY_POWER2 },
+	{ 0x0502, KEY_CHANNELUP },
+	{ 0x0503, KEY_CHANNELDOWN },
+	{ 0x0504, KEY_VOLUMEUP },
+	{ 0x0505, KEY_VOLUMEDOWN },
+	{ 0x0506, KEY_MUTE },
+	{ 0x0507, KEY_RIGHT },
+	{ 0x0508, KEY_PROG1 },
+	{ 0x0509, KEY_1 },
+	{ 0x050a, KEY_2 },
+	{ 0x050b, KEY_3 },
+	{ 0x050c, KEY_4 },
+	{ 0x050d, KEY_5 },
+	{ 0x050e, KEY_6 },
+	{ 0x050f, KEY_7 },
+	{ 0x0510, KEY_8 },
+	{ 0x0511, KEY_9 },
+	{ 0x0512, KEY_0 },
+	{ 0x0513, KEY_AUDIO },
+	{ 0x0515, KEY_EPG },
+	{ 0x0516, KEY_PLAY },
+	{ 0x0517, KEY_RECORD },
+	{ 0x0518, KEY_STOP },
+	{ 0x051c, KEY_BACK },
+	{ 0x051d, KEY_FORWARD },
+	{ 0x054d, KEY_LEFT },
+	{ 0x0556, KEY_ZOOM },
+};
+
+static struct rc_keymap avermedia_rm_ks_map = {
+	.map = {
+		.scan    = avermedia_rm_ks,
+		.size    = ARRAY_SIZE(avermedia_rm_ks),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_AVERMEDIA_RM_KS,
+	}
+};
+
+static int __init init_rc_map_avermedia_rm_ks(void)
+{
+	return ir_register_map(&avermedia_rm_ks_map);
+}
+
+static void __exit exit_rc_map_avermedia_rm_ks(void)
+{
+	ir_unregister_map(&avermedia_rm_ks_map);
+}
+
+module_init(init_rc_map_avermedia_rm_ks)
+module_exit(exit_rc_map_avermedia_rm_ks)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 2e06835da238..db1f9885ed7b 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -63,6 +63,7 @@ void rc_map_init(void);
 #define RC_MAP_AVERMEDIA_DVBT            "rc-avermedia-dvbt"
 #define RC_MAP_AVERMEDIA_M135A           "rc-avermedia-m135a"
 #define RC_MAP_AVERMEDIA_M733A_RM_K6     "rc-avermedia-m733a-rm-k6"
+#define RC_MAP_AVERMEDIA_RM_KS           "rc-avermedia-rm-ks"
 #define RC_MAP_AVERMEDIA                 "rc-avermedia"
 #define RC_MAP_AVERTV_303                "rc-avertv-303"
 #define RC_MAP_BEHOLD_COLUMBUS           "rc-behold-columbus"
-- 
cgit v1.2.3


From a7a177877cbfa0de15410a4d00984bdb2aaf4ea6 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 16:08:13 -0300
Subject: [media] LeadTek Y04G0051 remote controller keytable

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile              |  1 +
 drivers/media/IR/keymaps/rc-leadtek-y04g0051.c | 99 ++++++++++++++++++++++++++
 include/media/rc-map.h                         |  1 +
 3 files changed, 101 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-leadtek-y04g0051.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 743a57f85ac3..c5436637ffde 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-kaiomy.o \
 			rc-kworld-315u.o \
 			rc-kworld-plus-tv-analog.o \
+			rc-leadtek-y04g0051.o \
 			rc-lirc.o \
 			rc-lme2510.o \
 			rc-manli.o \
diff --git a/drivers/media/IR/keymaps/rc-leadtek-y04g0051.c b/drivers/media/IR/keymaps/rc-leadtek-y04g0051.c
new file mode 100644
index 000000000000..7521315fd876
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-leadtek-y04g0051.c
@@ -0,0 +1,99 @@
+/*
+ * LeadTek Y04G0051 remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode leadtek_y04g0051[] = {
+	{ 0x0300, KEY_POWER2 },
+	{ 0x0303, KEY_SCREEN },
+	{ 0x0304, KEY_RIGHT },
+	{ 0x0305, KEY_1 },
+	{ 0x0306, KEY_2 },
+	{ 0x0307, KEY_3 },
+	{ 0x0308, KEY_LEFT },
+	{ 0x0309, KEY_4 },
+	{ 0x030a, KEY_5 },
+	{ 0x030b, KEY_6 },
+	{ 0x030c, KEY_UP },
+	{ 0x030d, KEY_7 },
+	{ 0x030e, KEY_8 },
+	{ 0x030f, KEY_9 },
+	{ 0x0310, KEY_DOWN },
+	{ 0x0311, KEY_AGAIN },
+	{ 0x0312, KEY_0 },
+	{ 0x0313, KEY_OK },              /* 1st ok */
+	{ 0x0314, KEY_MUTE },
+	{ 0x0316, KEY_OK },              /* 2nd ok */
+	{ 0x031e, KEY_VIDEO },           /* 2nd video */
+	{ 0x031b, KEY_AUDIO },
+	{ 0x031f, KEY_TEXT },
+	{ 0x0340, KEY_SLEEP },
+	{ 0x0341, KEY_DOT },
+	{ 0x0342, KEY_REWIND },
+	{ 0x0343, KEY_PLAY },
+	{ 0x0344, KEY_FASTFORWARD },
+	{ 0x0345, KEY_TIME },
+	{ 0x0346, KEY_STOP },            /* 2nd stop */
+	{ 0x0347, KEY_RECORD },
+	{ 0x0348, KEY_CAMERA },
+	{ 0x0349, KEY_ESC },
+	{ 0x034a, KEY_NEW },
+	{ 0x034b, KEY_RED },
+	{ 0x034c, KEY_GREEN },
+	{ 0x034d, KEY_YELLOW },
+	{ 0x034e, KEY_BLUE },
+	{ 0x034f, KEY_MENU },
+	{ 0x0350, KEY_STOP },            /* 1st stop */
+	{ 0x0351, KEY_CHANNEL },
+	{ 0x0352, KEY_VIDEO },           /* 1st video */
+	{ 0x0353, KEY_EPG },
+	{ 0x0354, KEY_PREVIOUS },
+	{ 0x0355, KEY_NEXT },
+	{ 0x0356, KEY_TV },
+	{ 0x035a, KEY_VOLUMEDOWN },
+	{ 0x035b, KEY_CHANNELUP },
+	{ 0x035e, KEY_VOLUMEUP },
+	{ 0x035f, KEY_CHANNELDOWN },
+};
+
+static struct rc_keymap leadtek_y04g0051_map = {
+	.map = {
+		.scan    = leadtek_y04g0051,
+		.size    = ARRAY_SIZE(leadtek_y04g0051),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_LEADTEK_Y04G0051,
+	}
+};
+
+static int __init init_rc_map_leadtek_y04g0051(void)
+{
+	return ir_register_map(&leadtek_y04g0051_map);
+}
+
+static void __exit exit_rc_map_leadtek_y04g0051(void)
+{
+	ir_unregister_map(&leadtek_y04g0051_map);
+}
+
+module_init(init_rc_map_leadtek_y04g0051)
+module_exit(exit_rc_map_leadtek_y04g0051)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index db1f9885ed7b..a4c2343f7d34 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -97,6 +97,7 @@ void rc_map_init(void);
 #define RC_MAP_KAIOMY                    "rc-kaiomy"
 #define RC_MAP_KWORLD_315U               "rc-kworld-315u"
 #define RC_MAP_KWORLD_PLUS_TV_ANALOG     "rc-kworld-plus-tv-analog"
+#define RC_MAP_LEADTEK_Y04G0051          "rc-leadtek-y04g0051"
 #define RC_MAP_LIRC                      "rc-lirc"
 #define RC_MAP_LME2510                   "rc-lme2510"
 #define RC_MAP_MANLI                     "rc-manli"
-- 
cgit v1.2.3


From 3f37fcedb2a104ba9d70a5a9c4c89abe4ef075d6 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 17:31:59 -0300
Subject: [media] TwinHan AzureWave AD-TU700(704J) remote controller

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile                |   1 +
 drivers/media/IR/keymaps/rc-azurewave-ad-tu700.c | 102 +++++++++++++++++++++++
 include/media/rc-map.h                           |   1 +
 3 files changed, 104 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-azurewave-ad-tu700.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index c5436637ffde..ee6b6774255f 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-avermedia-m733a-rm-k6.o \
 			rc-avermedia-rm-ks.o \
 			rc-avertv-303.o \
+			rc-azurewave-ad-tu700.o \
 			rc-behold.o \
 			rc-behold-columbus.o \
 			rc-budget-ci-old.o \
diff --git a/drivers/media/IR/keymaps/rc-azurewave-ad-tu700.c b/drivers/media/IR/keymaps/rc-azurewave-ad-tu700.c
new file mode 100644
index 000000000000..e0876147d471
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-azurewave-ad-tu700.c
@@ -0,0 +1,102 @@
+/*
+ * TwinHan AzureWave AD-TU700(704J) remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode azurewave_ad_tu700[] = {
+	{ 0x0000, KEY_TAB },             /* Tab */
+	{ 0x0001, KEY_2 },
+	{ 0x0002, KEY_CHANNELDOWN },
+	{ 0x0003, KEY_1 },
+	{ 0x0004, KEY_MENU },            /* Record List */
+	{ 0x0005, KEY_CHANNELUP },
+	{ 0x0006, KEY_3 },
+	{ 0x0007, KEY_SLEEP },           /* Hibernate */
+	{ 0x0008, KEY_VIDEO },           /* A/V */
+	{ 0x0009, KEY_4 },
+	{ 0x000a, KEY_VOLUMEDOWN },
+	{ 0x000c, KEY_CANCEL },          /* Cancel */
+	{ 0x000d, KEY_7 },
+	{ 0x000e, KEY_AGAIN },           /* Recall */
+	{ 0x000f, KEY_TEXT },            /* Teletext */
+	{ 0x0010, KEY_MUTE },
+	{ 0x0011, KEY_RECORD },
+	{ 0x0012, KEY_FASTFORWARD },     /* FF >> */
+	{ 0x0013, KEY_BACK },            /* Back */
+	{ 0x0014, KEY_PLAY },
+	{ 0x0015, KEY_0 },
+	{ 0x0016, KEY_POWER2 },          /* [red power button] */
+	{ 0x0017, KEY_FAVORITES },       /* Favorite List */
+	{ 0x0018, KEY_RED },
+	{ 0x0019, KEY_8 },
+	{ 0x001a, KEY_STOP },
+	{ 0x001b, KEY_9 },
+	{ 0x001c, KEY_EPG },             /* Info/EPG */
+	{ 0x001d, KEY_5 },
+	{ 0x001e, KEY_VOLUMEUP },
+	{ 0x001f, KEY_6 },
+	{ 0x0040, KEY_REWIND },          /* FR << */
+	{ 0x0041, KEY_PREVIOUS },        /* Replay */
+	{ 0x0042, KEY_NEXT },            /* Skip */
+	{ 0x0043, KEY_SUBTITLE },        /* Subtitle / CC */
+	{ 0x0045, KEY_KPPLUS },          /* Zoom+ */
+	{ 0x0046, KEY_KPMINUS },         /* Zoom- */
+	{ 0x0047, KEY_NEW },             /* PIP */
+	{ 0x0048, KEY_INFO },            /* Preview */
+	{ 0x0049, KEY_MODE },            /* L/R */
+	{ 0x004a, KEY_CLEAR },           /* Clear */
+	{ 0x004b, KEY_UP },              /* up arrow */
+	{ 0x004c, KEY_PAUSE },
+	{ 0x004d, KEY_ZOOM },            /* Full Screen */
+	{ 0x004e, KEY_LEFT },            /* left arrow */
+	{ 0x004f, KEY_OK },              /* Enter / ok */
+	{ 0x0050, KEY_LANGUAGE },        /* SAP */
+	{ 0x0051, KEY_DOWN },            /* down arrow */
+	{ 0x0052, KEY_RIGHT },           /* right arrow */
+	{ 0x0053, KEY_GREEN },
+	{ 0x0054, KEY_CAMERA },          /* Capture */
+	{ 0x005e, KEY_YELLOW },
+	{ 0x005f, KEY_BLUE },
+};
+
+static struct rc_keymap azurewave_ad_tu700_map = {
+	.map = {
+		.scan    = azurewave_ad_tu700,
+		.size    = ARRAY_SIZE(azurewave_ad_tu700),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_AZUREWAVE_AD_TU700,
+	}
+};
+
+static int __init init_rc_map_azurewave_ad_tu700(void)
+{
+	return ir_register_map(&azurewave_ad_tu700_map);
+}
+
+static void __exit exit_rc_map_azurewave_ad_tu700(void)
+{
+	ir_unregister_map(&azurewave_ad_tu700_map);
+}
+
+module_init(init_rc_map_azurewave_ad_tu700)
+module_exit(exit_rc_map_azurewave_ad_tu700)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index a4c2343f7d34..3d281a7b3cee 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -66,6 +66,7 @@ void rc_map_init(void);
 #define RC_MAP_AVERMEDIA_RM_KS           "rc-avermedia-rm-ks"
 #define RC_MAP_AVERMEDIA                 "rc-avermedia"
 #define RC_MAP_AVERTV_303                "rc-avertv-303"
+#define RC_MAP_AZUREWAVE_AD_TU700        "rc-azurewave-ad-tu700"
 #define RC_MAP_BEHOLD_COLUMBUS           "rc-behold-columbus"
 #define RC_MAP_BEHOLD                    "rc-behold"
 #define RC_MAP_BUDGET_CI_OLD             "rc-budget-ci-old"
-- 
cgit v1.2.3


From ad7fac348470b6b2ac0b4579e3bdb6da5a6166e0 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 18:54:13 -0300
Subject: [media] A-Link DTU(m) remote controller

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile         |  1 +
 drivers/media/IR/keymaps/rc-alink-dtu-m.c | 68 +++++++++++++++++++++++++++++++
 include/media/rc-map.h                    |  1 +
 3 files changed, 70 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-alink-dtu-m.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index ee6b6774255f..3a44b6765d04 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
+			rc-alink-dtu-m.o \
 			rc-apac-viewcomp.o \
 			rc-asus-pc39.o \
 			rc-ati-tv-wonder-hd-600.o \
diff --git a/drivers/media/IR/keymaps/rc-alink-dtu-m.c b/drivers/media/IR/keymaps/rc-alink-dtu-m.c
new file mode 100644
index 000000000000..6888ea2e4b91
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-alink-dtu-m.c
@@ -0,0 +1,68 @@
+/*
+ * A-Link DTU(m) remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* A-Link DTU(m) slim remote, 6 rows, 3 columns. */
+static struct ir_scancode alink_dtu_m[] = {
+	{ 0x0800, KEY_VOLUMEUP },
+	{ 0x0801, KEY_1 },
+	{ 0x0802, KEY_3 },
+	{ 0x0803, KEY_7 },
+	{ 0x0804, KEY_9 },
+	{ 0x0805, KEY_ZOOM },            /* [two windows + corner arrows] */
+	{ 0x0806, KEY_0 },
+	{ 0x0807, KEY_CHANNEL },         /* JUMP */
+	{ 0x080d, KEY_5 },
+	{ 0x080f, KEY_2 },
+	{ 0x0812, KEY_POWER2 },
+	{ 0x0814, KEY_CHANNELUP },
+	{ 0x0816, KEY_VOLUMEDOWN },
+	{ 0x0818, KEY_6 },
+	{ 0x081a, KEY_MUTE },
+	{ 0x081b, KEY_8 },
+	{ 0x081c, KEY_4 },
+	{ 0x081d, KEY_CHANNELDOWN },
+};
+
+static struct rc_keymap alink_dtu_m_map = {
+	.map = {
+		.scan    = alink_dtu_m,
+		.size    = ARRAY_SIZE(alink_dtu_m),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_ALINK_DTU_M,
+	}
+};
+
+static int __init init_rc_map_alink_dtu_m(void)
+{
+	return ir_register_map(&alink_dtu_m_map);
+}
+
+static void __exit exit_rc_map_alink_dtu_m(void)
+{
+	ir_unregister_map(&alink_dtu_m_map);
+}
+
+module_init(init_rc_map_alink_dtu_m)
+module_exit(exit_rc_map_alink_dtu_m)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 3d281a7b3cee..ddad3dad8690 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -55,6 +55,7 @@ void rc_map_init(void);
 /* Names of the several keytables defined in-kernel */
 
 #define RC_MAP_ADSTECH_DVB_T_PCI         "rc-adstech-dvb-t-pci"
+#define RC_MAP_ALINK_DTU_M               "rc-alink-dtu-m"
 #define RC_MAP_APAC_VIEWCOMP             "rc-apac-viewcomp"
 #define RC_MAP_ASUS_PC39                 "rc-asus-pc39"
 #define RC_MAP_ATI_TV_WONDER_HD_600      "rc-ati-tv-wonder-hd-600"
-- 
cgit v1.2.3


From c12d2f746b34baf9e97cae06a8757dc6be387e62 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 19:12:58 -0300
Subject: [media] MSI DIGIVOX mini II remote controller

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile            |  1 +
 drivers/media/IR/keymaps/rc-msi-digivox-ii.c | 67 ++++++++++++++++++++++++++++
 include/media/rc-map.h                       |  1 +
 3 files changed, 69 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-msi-digivox-ii.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 3a44b6765d04..c3a8ab875a74 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-lme2510.o \
 			rc-manli.o \
 			rc-msi-digivox.o \
+			rc-msi-digivox-ii.o \
 			rc-msi-tvanywhere.o \
 			rc-msi-tvanywhere-plus.o \
 			rc-nebula.o \
diff --git a/drivers/media/IR/keymaps/rc-msi-digivox-ii.c b/drivers/media/IR/keymaps/rc-msi-digivox-ii.c
new file mode 100644
index 000000000000..67237fbf9e4b
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-msi-digivox-ii.c
@@ -0,0 +1,67 @@
+/*
+ * MSI DIGIVOX mini II remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode msi_digivox_ii[] = {
+	{ 0x0002, KEY_2 },
+	{ 0x0003, KEY_UP },              /* up */
+	{ 0x0004, KEY_3 },
+	{ 0x0005, KEY_CHANNELDOWN },
+	{ 0x0008, KEY_5 },
+	{ 0x0009, KEY_0 },
+	{ 0x000b, KEY_8 },
+	{ 0x000d, KEY_DOWN },            /* down */
+	{ 0x0010, KEY_9 },
+	{ 0x0011, KEY_7 },
+	{ 0x0014, KEY_VOLUMEUP },
+	{ 0x0015, KEY_CHANNELUP },
+	{ 0x0016, KEY_OK },
+	{ 0x0017, KEY_POWER2 },
+	{ 0x001a, KEY_1 },
+	{ 0x001c, KEY_4 },
+	{ 0x001d, KEY_6 },
+	{ 0x001f, KEY_VOLUMEDOWN },
+};
+
+static struct rc_keymap msi_digivox_ii_map = {
+	.map = {
+		.scan    = msi_digivox_ii,
+		.size    = ARRAY_SIZE(msi_digivox_ii),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_MSI_DIGIVOX_II,
+	}
+};
+
+static int __init init_rc_map_msi_digivox_ii(void)
+{
+	return ir_register_map(&msi_digivox_ii_map);
+}
+
+static void __exit exit_rc_map_msi_digivox_ii(void)
+{
+	ir_unregister_map(&msi_digivox_ii_map);
+}
+
+module_init(init_rc_map_msi_digivox_ii)
+module_exit(exit_rc_map_msi_digivox_ii)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index ddad3dad8690..b31992323c24 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -104,6 +104,7 @@ void rc_map_init(void);
 #define RC_MAP_LME2510                   "rc-lme2510"
 #define RC_MAP_MANLI                     "rc-manli"
 #define RC_MAP_MSI_DIGIVOX               "rc-msi-digivox"
+#define RC_MAP_MSI_DIGIVOX_II            "rc-msi-digivox-ii"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
 #define RC_MAP_NEBULA                    "rc-nebula"
-- 
cgit v1.2.3


From 860d1e29205860ebab539e41011e765e1bc417a1 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 20:19:23 -0300
Subject: [media] rename rc-msi-digivox.c -> rc-msi-digivox-iii.c

Rename remote controller driver I added earlier.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile             |  2 +-
 drivers/media/IR/keymaps/rc-msi-digivox-iii.c | 85 +++++++++++++++++++++++++++
 drivers/media/IR/keymaps/rc-msi-digivox.c     | 85 ---------------------------
 include/media/rc-map.h                        |  2 +-
 4 files changed, 87 insertions(+), 87 deletions(-)
 create mode 100644 drivers/media/IR/keymaps/rc-msi-digivox-iii.c
 delete mode 100644 drivers/media/IR/keymaps/rc-msi-digivox.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index c3a8ab875a74..8053b20474fe 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -46,8 +46,8 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-lirc.o \
 			rc-lme2510.o \
 			rc-manli.o \
-			rc-msi-digivox.o \
 			rc-msi-digivox-ii.o \
+			rc-msi-digivox-iii.o \
 			rc-msi-tvanywhere.o \
 			rc-msi-tvanywhere-plus.o \
 			rc-nebula.o \
diff --git a/drivers/media/IR/keymaps/rc-msi-digivox-iii.c b/drivers/media/IR/keymaps/rc-msi-digivox-iii.c
new file mode 100644
index 000000000000..8ea3960347c3
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-msi-digivox-iii.c
@@ -0,0 +1,85 @@
+/*
+ * MSI DIGIVOX mini III remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* MSI DIGIVOX mini III */
+/* Uses NEC extended 0x61d6. */
+/* This remote seems to be same as rc-kworld-315u.c. Anyhow, add new remote
+   since rc-kworld-315u.c lacks NEC extended address byte. */
+static struct ir_scancode msi_digivox_iii[] = {
+	{ 0x61d601, KEY_VIDEO },           /* Source */
+	{ 0x61d602, KEY_3 },
+	{ 0x61d603, KEY_POWER2 },          /* ShutDown */
+	{ 0x61d604, KEY_1 },
+	{ 0x61d605, KEY_5 },
+	{ 0x61d606, KEY_6 },
+	{ 0x61d607, KEY_CHANNELDOWN },     /* CH- */
+	{ 0x61d608, KEY_2 },
+	{ 0x61d609, KEY_CHANNELUP },       /* CH+ */
+	{ 0x61d60a, KEY_9 },
+	{ 0x61d60b, KEY_ZOOM },            /* Zoom */
+	{ 0x61d60c, KEY_7 },
+	{ 0x61d60d, KEY_8 },
+	{ 0x61d60e, KEY_VOLUMEUP },        /* Vol+ */
+	{ 0x61d60f, KEY_4 },
+	{ 0x61d610, KEY_ESC },             /* [back up arrow] */
+	{ 0x61d611, KEY_0 },
+	{ 0x61d612, KEY_OK },              /* [enter arrow] */
+	{ 0x61d613, KEY_VOLUMEDOWN },      /* Vol- */
+	{ 0x61d614, KEY_RECORD },          /* Rec */
+	{ 0x61d615, KEY_STOP },            /* Stop */
+	{ 0x61d616, KEY_PLAY },            /* Play */
+	{ 0x61d617, KEY_MUTE },            /* Mute */
+	{ 0x61d618, KEY_UP },
+	{ 0x61d619, KEY_DOWN },
+	{ 0x61d61a, KEY_LEFT },
+	{ 0x61d61b, KEY_RIGHT },
+	{ 0x61d61c, KEY_RED },
+	{ 0x61d61d, KEY_GREEN },
+	{ 0x61d61e, KEY_YELLOW },
+	{ 0x61d61f, KEY_BLUE },
+	{ 0x61d643, KEY_POWER },           /* [red power button] */
+};
+
+static struct rc_keymap msi_digivox_iii_map = {
+	.map = {
+		.scan    = msi_digivox_iii,
+		.size    = ARRAY_SIZE(msi_digivox_iii),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_MSI_DIGIVOX_III,
+	}
+};
+
+static int __init init_rc_map_msi_digivox_iii(void)
+{
+	return ir_register_map(&msi_digivox_iii_map);
+}
+
+static void __exit exit_rc_map_msi_digivox_iii(void)
+{
+	ir_unregister_map(&msi_digivox_iii_map);
+}
+
+module_init(init_rc_map_msi_digivox_iii)
+module_exit(exit_rc_map_msi_digivox_iii)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/drivers/media/IR/keymaps/rc-msi-digivox.c b/drivers/media/IR/keymaps/rc-msi-digivox.c
deleted file mode 100644
index 3ba314052689..000000000000
--- a/drivers/media/IR/keymaps/rc-msi-digivox.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * MSI DIGIVOX mini III remote controller keytable
- *
- * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License along
- *    with this program; if not, write to the Free Software Foundation, Inc.,
- *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <media/rc-map.h>
-
-/* MSI DIGIVOX mini III */
-/* Uses NEC extended 0x61d6. */
-/* This remote seems to be same as rc-kworld-315u.c. Anyhow, add new remote
-   since rc-kworld-315u.c lacks NEC extended address byte. */
-static struct ir_scancode msi_digivox[] = {
-	{ 0x61d601, KEY_VIDEO },           /* Source */
-	{ 0x61d602, KEY_3 },
-	{ 0x61d603, KEY_POWER2 },          /* ShutDown */
-	{ 0x61d604, KEY_1 },
-	{ 0x61d605, KEY_5 },
-	{ 0x61d606, KEY_6 },
-	{ 0x61d607, KEY_CHANNELDOWN },     /* CH- */
-	{ 0x61d608, KEY_2 },
-	{ 0x61d609, KEY_CHANNELUP },       /* CH+ */
-	{ 0x61d60a, KEY_9 },
-	{ 0x61d60b, KEY_ZOOM },            /* Zoom */
-	{ 0x61d60c, KEY_7 },
-	{ 0x61d60d, KEY_8 },
-	{ 0x61d60e, KEY_VOLUMEUP },        /* Vol+ */
-	{ 0x61d60f, KEY_4 },
-	{ 0x61d610, KEY_ESC },             /* [back up arrow] */
-	{ 0x61d611, KEY_0 },
-	{ 0x61d612, KEY_OK },              /* [enter arrow] */
-	{ 0x61d613, KEY_VOLUMEDOWN },      /* Vol- */
-	{ 0x61d614, KEY_RECORD },          /* Rec */
-	{ 0x61d615, KEY_STOP },            /* Stop */
-	{ 0x61d616, KEY_PLAY },            /* Play */
-	{ 0x61d617, KEY_MUTE },            /* Mute */
-	{ 0x61d618, KEY_UP },
-	{ 0x61d619, KEY_DOWN },
-	{ 0x61d61a, KEY_LEFT },
-	{ 0x61d61b, KEY_RIGHT },
-	{ 0x61d61c, KEY_RED },
-	{ 0x61d61d, KEY_GREEN },
-	{ 0x61d61e, KEY_YELLOW },
-	{ 0x61d61f, KEY_BLUE },
-	{ 0x61d643, KEY_POWER },           /* [red power button] */
-};
-
-static struct rc_keymap msi_digivox_map = {
-	.map = {
-		.scan    = msi_digivox,
-		.size    = ARRAY_SIZE(msi_digivox),
-		.ir_type = IR_TYPE_NEC,
-		.name    = RC_MAP_MSI_DIGIVOX,
-	}
-};
-
-static int __init init_rc_map_msi_digivox(void)
-{
-	return ir_register_map(&msi_digivox_map);
-}
-
-static void __exit exit_rc_map_msi_digivox(void)
-{
-	ir_unregister_map(&msi_digivox_map);
-}
-
-module_init(init_rc_map_msi_digivox)
-module_exit(exit_rc_map_msi_digivox)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index b31992323c24..f9fe29afe3df 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -103,8 +103,8 @@ void rc_map_init(void);
 #define RC_MAP_LIRC                      "rc-lirc"
 #define RC_MAP_LME2510                   "rc-lme2510"
 #define RC_MAP_MANLI                     "rc-manli"
-#define RC_MAP_MSI_DIGIVOX               "rc-msi-digivox"
 #define RC_MAP_MSI_DIGIVOX_II            "rc-msi-digivox-ii"
+#define RC_MAP_MSI_DIGIVOX_III           "rc-msi-digivox-iii"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
 #define RC_MAP_NEBULA                    "rc-nebula"
-- 
cgit v1.2.3


From 20d747b4c44ba1a5c8604a88dc898260a2a3bcf8 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 11 Oct 2010 20:25:44 -0300
Subject: [media] Total Media In Hand remote controller

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile                 |  1 +
 drivers/media/IR/keymaps/rc-total-media-in-hand.c | 85 +++++++++++++++++++++++
 include/media/rc-map.h                            |  1 +
 3 files changed, 87 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-total-media-in-hand.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 8053b20474fe..11688dd740ca 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-terratec-cinergy-xs.o \
 			rc-terratec-slim.o \
 			rc-tevii-nec.o \
+			rc-total-media-in-hand.o \
 			rc-trekstor.o \
 			rc-tt-1500.o \
 			rc-videomate-s350.o \
diff --git a/drivers/media/IR/keymaps/rc-total-media-in-hand.c b/drivers/media/IR/keymaps/rc-total-media-in-hand.c
new file mode 100644
index 000000000000..fd1985763781
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-total-media-in-hand.c
@@ -0,0 +1,85 @@
+/*
+ * Total Media In Hand remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+/* Uses NEC extended 0x02bd */
+static struct ir_scancode total_media_in_hand[] = {
+	{ 0x02bd00, KEY_1 },
+	{ 0x02bd01, KEY_2 },
+	{ 0x02bd02, KEY_3 },
+	{ 0x02bd03, KEY_4 },
+	{ 0x02bd04, KEY_5 },
+	{ 0x02bd05, KEY_6 },
+	{ 0x02bd06, KEY_7 },
+	{ 0x02bd07, KEY_8 },
+	{ 0x02bd08, KEY_9 },
+	{ 0x02bd09, KEY_0 },
+	{ 0x02bd0a, KEY_MUTE },
+	{ 0x02bd0b, KEY_CYCLEWINDOWS },    /* yellow, [min / max] */
+	{ 0x02bd0c, KEY_VIDEO },           /* TV / AV */
+	{ 0x02bd0e, KEY_VOLUMEDOWN },
+	{ 0x02bd0f, KEY_TIME },            /* TimeShift */
+	{ 0x02bd10, KEY_RIGHT },           /* right arrow */
+	{ 0x02bd11, KEY_LEFT },            /* left arrow */
+	{ 0x02bd12, KEY_UP },              /* up arrow */
+	{ 0x02bd13, KEY_DOWN },            /* down arrow */
+	{ 0x02bd14, KEY_POWER2 },          /* [red] */
+	{ 0x02bd15, KEY_OK },              /* OK */
+	{ 0x02bd16, KEY_STOP },
+	{ 0x02bd17, KEY_CAMERA },          /* Snapshot */
+	{ 0x02bd18, KEY_CHANNELUP },
+	{ 0x02bd19, KEY_RECORD },
+	{ 0x02bd1a, KEY_CHANNELDOWN },
+	{ 0x02bd1c, KEY_ESC },             /* Esc */
+	{ 0x02bd1e, KEY_PLAY },
+	{ 0x02bd1f, KEY_VOLUMEUP },
+	{ 0x02bd40, KEY_PAUSE },
+	{ 0x02bd41, KEY_FASTFORWARD },     /* FF >> */
+	{ 0x02bd42, KEY_REWIND },          /* FR << */
+	{ 0x02bd43, KEY_ZOOM },            /* [window + mouse pointer] */
+	{ 0x02bd44, KEY_SHUFFLE },         /* Shuffle */
+	{ 0x02bd45, KEY_INFO },            /* [red (I)] */
+};
+
+static struct rc_keymap total_media_in_hand_map = {
+	.map = {
+		.scan    = total_media_in_hand,
+		.size    = ARRAY_SIZE(total_media_in_hand),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_TOTAL_MEDIA_IN_HAND,
+	}
+};
+
+static int __init init_rc_map_total_media_in_hand(void)
+{
+	return ir_register_map(&total_media_in_hand_map);
+}
+
+static void __exit exit_rc_map_total_media_in_hand(void)
+{
+	ir_unregister_map(&total_media_in_hand_map);
+}
+
+module_init(init_rc_map_total_media_in_hand)
+module_exit(exit_rc_map_total_media_in_hand)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index f9fe29afe3df..d1bff027297e 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -131,6 +131,7 @@ void rc_map_init(void);
 #define RC_MAP_TERRATEC_CINERGY_XS       "rc-terratec-cinergy-xs"
 #define RC_MAP_TERRATEC_SLIM             "rc-terratec-slim"
 #define RC_MAP_TEVII_NEC                 "rc-tevii-nec"
+#define RC_MAP_TOTAL_MEDIA_IN_HAND       "rc-total-media-in-hand"
 #define RC_MAP_TREKSTOR                  "rc-trekstor"
 #define RC_MAP_TT_1500                   "rc-tt-1500"
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
-- 
cgit v1.2.3


From be1f985ffa49467f604318182616678b3e5184fd Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 8 Oct 2010 17:24:21 -0300
Subject: [media] IR/lirc: further ioctl portability fixups

>From Joris van Rantwijk <jorispubl@xs4all.nl>:

	I tested lirc_serial and found that it works fine.
	Except the LIRC ioctls do not work in my 64-bit-kernel/32-bit-user
	setup. I added compat_ioctl entries in the drivers to fix this.

	While doing so, I noticed inconsistencies in the argument type of
	the LIRC ioctls. All ioctls are declared in lirc.h as having argument
	type __u32, however there are a few places where the driver calls
	get_user/put_user with an unsigned long argument.

	The patch below changes lirc_dev and lirc_serial to use __u32 for all
	ioctl arguments, and adds compat_ioctl entries.
	It should probably also be done in the other low-level drivers,
	but I don't have hardware to test those.

I've dropped the .compat_ioctl addition from Joris' original patch,
as I swear the non-compat definition should now work for both 32-bit
and 64-bit userspace. Technically, I think we still need/want a
in getting a reply to you).

Reported-by: Joris van Rantwijk <jorispubl@xs4all.nl>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/ir-lirc-codec.c | 10 +++++-----
 drivers/media/IR/lirc_dev.c      | 14 +++++++-------
 include/media/lirc_dev.h         |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/media/IR/ir-lirc-codec.c b/drivers/media/IR/ir-lirc-codec.c
index e63f757d5d72..c6d5b3eb1325 100644
--- a/drivers/media/IR/ir-lirc-codec.c
+++ b/drivers/media/IR/ir-lirc-codec.c
@@ -102,7 +102,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	struct ir_input_dev *ir_dev;
 	int ret = 0;
 	void *drv_data;
-	unsigned long val = 0;
+	__u32 val = 0;
 
 	lirc = lirc_get_pdata(filep);
 	if (!lirc)
@@ -115,7 +115,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	drv_data = ir_dev->props->priv;
 
 	if (_IOC_DIR(cmd) & _IOC_WRITE) {
-		ret = get_user(val, (unsigned long *)arg);
+		ret = get_user(val, (__u32 *)arg);
 		if (ret)
 			return ret;
 	}
@@ -135,14 +135,14 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	/* TX settings */
 	case LIRC_SET_TRANSMITTER_MASK:
 		if (ir_dev->props->s_tx_mask)
-			ret = ir_dev->props->s_tx_mask(drv_data, (u32)val);
+			ret = ir_dev->props->s_tx_mask(drv_data, val);
 		else
 			return -EINVAL;
 		break;
 
 	case LIRC_SET_SEND_CARRIER:
 		if (ir_dev->props->s_tx_carrier)
-			ir_dev->props->s_tx_carrier(drv_data, (u32)val);
+			ir_dev->props->s_tx_carrier(drv_data, val);
 		else
 			return -EINVAL;
 		break;
@@ -212,7 +212,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	}
 
 	if (_IOC_DIR(cmd) & _IOC_READ)
-		ret = put_user(val, (unsigned long *)arg);
+		ret = put_user(val, (__u32 *)arg);
 
 	return ret;
 }
diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c
index 0572053a7944..e4e4d99725e6 100644
--- a/drivers/media/IR/lirc_dev.c
+++ b/drivers/media/IR/lirc_dev.c
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(lirc_dev_fop_poll);
 
 long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	unsigned long mode;
+	__u32 mode;
 	int result = 0;
 	struct irctl *ir = file->private_data;
 
@@ -541,7 +541,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case LIRC_GET_FEATURES:
-		result = put_user(ir->d.features, (unsigned long *)arg);
+		result = put_user(ir->d.features, (__u32 *)arg);
 		break;
 	case LIRC_GET_REC_MODE:
 		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
@@ -551,7 +551,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		result = put_user(LIRC_REC2MODE
 				  (ir->d.features & LIRC_CAN_REC_MASK),
-				  (unsigned long *)arg);
+				  (__u32 *)arg);
 		break;
 	case LIRC_SET_REC_MODE:
 		if (!(ir->d.features & LIRC_CAN_REC_MASK)) {
@@ -559,7 +559,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			break;
 		}
 
-		result = get_user(mode, (unsigned long *)arg);
+		result = get_user(mode, (__u32 *)arg);
 		if (!result && !(LIRC_MODE2REC(mode) & ir->d.features))
 			result = -EINVAL;
 		/*
@@ -568,7 +568,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		 */
 		break;
 	case LIRC_GET_LENGTH:
-		result = put_user(ir->d.code_length, (unsigned long *)arg);
+		result = put_user(ir->d.code_length, (__u32 *)arg);
 		break;
 	case LIRC_GET_MIN_TIMEOUT:
 		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
@@ -577,7 +577,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			break;
 		}
 
-		result = put_user(ir->d.min_timeout, (unsigned long *)arg);
+		result = put_user(ir->d.min_timeout, (__u32 *)arg);
 		break;
 	case LIRC_GET_MAX_TIMEOUT:
 		if (!(ir->d.features & LIRC_CAN_SET_REC_TIMEOUT) ||
@@ -586,7 +586,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			break;
 		}
 
-		result = put_user(ir->d.max_timeout, (unsigned long *)arg);
+		result = put_user(ir->d.max_timeout, (__u32 *)arg);
 		break;
 	default:
 		result = -EINVAL;
diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h
index 71a896e7b208..54780a560d0e 100644
--- a/include/media/lirc_dev.h
+++ b/include/media/lirc_dev.h
@@ -125,10 +125,10 @@ static inline unsigned int lirc_buffer_write(struct lirc_buffer *buf,
 struct lirc_driver {
 	char name[40];
 	int minor;
-	unsigned long code_length;
+	__u32 code_length;
 	unsigned int buffer_size; /* in chunks holding one code each */
 	int sample_rate;
-	unsigned long features;
+	__u32 features;
 
 	unsigned int chunk_size;
 
-- 
cgit v1.2.3


From 2aeb66d3036dbafc297ac553a257a40283dadb3e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 21 Oct 2010 00:15:00 -0700
Subject: x86-32, percpu: Correct the ordering of the percpu readmostly section

Checkin c957ef2c59e952803766ddc22e89981ab534606f had inconsistent
ordering of .data..percpu..page_aligned and .data..percpu..readmostly;
the still-broken version affected x86-32 at least.

The page aligned version really must be page aligned...

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1287544022.4571.7.camel@sli10-conroe.sh.intel.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d7e7b21511b1..1457b81357af 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -706,8 +706,8 @@
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data..percpu..first)					\
 		. = ALIGN(PAGE_SIZE);					\
-		*(.data..percpu..readmostly)				\
 		*(.data..percpu..page_aligned)				\
+		*(.data..percpu..readmostly)				\
 		*(.data..percpu)					\
 		*(.data..percpu..shared_aligned)			\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
-- 
cgit v1.2.3


From b738127dfb469bb9f595cdace30e7f881e8146b2 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 20 Oct 2010 13:56:02 +0000
Subject: vlan: Rename VLAN_GROUP_ARRAY_LEN to VLAN_N_VID.

VLAN_GROUP_ARRAY_LEN is simply the number of possible vlan VIDs.
Since vlan groups will soon be more of an implementation detail
for vlan devices, rename the constant to be descriptive of its
actual purpose.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be.h             |  2 +-
 drivers/net/benet/be_main.c        |  2 +-
 drivers/net/e1000/e1000_main.c     |  2 +-
 drivers/net/e1000e/netdev.c        |  2 +-
 drivers/net/igb/igb_main.c         |  2 +-
 drivers/net/igbvf/netdev.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c       |  2 +-
 drivers/net/ixgbe/ixgbe_main.c     |  2 +-
 drivers/net/ixgbevf/ixgbevf_main.c |  2 +-
 drivers/net/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c  |  2 +-
 drivers/net/vxge/vxge-main.c       |  2 +-
 drivers/s390/net/qeth_l3_main.c    |  6 +++---
 include/linux/if_vlan.h            |  4 ++--
 net/8021q/vlan.c                   | 16 ++++++++--------
 net/bridge/netfilter/ebt_vlan.c    |  4 ++--
 16 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h
index 1afabb1e6620..59a17b569b7f 100644
--- a/drivers/net/benet/be.h
+++ b/drivers/net/benet/be.h
@@ -263,7 +263,7 @@ struct be_adapter {
 	struct vlan_group *vlan_grp;
 	u16 vlans_added;
 	u16 max_vlans;	/* Number of vlans supported */
-	u8 vlan_tag[VLAN_GROUP_ARRAY_LEN];
+	u8 vlan_tag[VLAN_N_VID];
 	struct be_dma_mem mc_cmd_mem;
 
 	struct be_dma_mem stats_cmd;
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 9a1cd28b426d..4b59e53890b2 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -626,7 +626,7 @@ static int be_vid_config(struct be_adapter *adapter, bool vf, u32 vf_num)
 
 	if (adapter->vlans_added <= adapter->max_vlans)  {
 		/* Construct VLAN Table to give to HW */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			if (adapter->vlan_tag[i]) {
 				vtag[ntags] = cpu_to_le16(i);
 				ntags++;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index cb3f84b81793..232ac2dcb497 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4541,7 +4541,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter)
 
 	if (adapter->vlgrp) {
 		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(adapter->vlgrp, vid))
 				continue;
 			e1000_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 992b622fe205..5d6cdea0eb14 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2545,7 +2545,7 @@ static void e1000_restore_vlan(struct e1000_adapter *adapter)
 	if (!adapter->vlgrp)
 		return;
 
-	for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+	for (vid = 0; vid < VLAN_N_VID; vid++) {
 		if (!vlan_group_get_device(adapter->vlgrp, vid))
 			continue;
 		e1000_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index b8dccc0ac089..0f0939caf091 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -6153,7 +6153,7 @@ static void igb_restore_vlan(struct igb_adapter *adapter)
 
 	if (adapter->vlgrp) {
 		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(adapter->vlgrp, vid))
 				continue;
 			igb_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 6693323a6cf5..ebfaa68ee630 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -1254,7 +1254,7 @@ static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
 	if (!adapter->vlgrp)
 		return;
 
-	for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+	for (vid = 0; vid < VLAN_N_VID; vid++) {
 		if (!vlan_group_get_device(adapter->vlgrp, vid))
 			continue;
 		igbvf_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 80e62578ffa0..666207a9c039 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -2223,7 +2223,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter)
 
 	if (adapter->vlgrp) {
 		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(adapter->vlgrp, vid))
 				continue;
 			ixgb_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 790a0dae1247..1d424428079f 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3185,7 +3185,7 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
 
 	if (adapter->vlgrp) {
 		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(adapter->vlgrp, vid))
 				continue;
 			ixgbe_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
index 0866a1cf4d7b..78bfbe42ca9b 100644
--- a/drivers/net/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ixgbevf/ixgbevf_main.c
@@ -1495,7 +1495,7 @@ static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
 
 	if (adapter->vlgrp) {
 		u16 vid;
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(adapter->vlgrp, vid))
 				continue;
 			ixgbevf_vlan_rx_add_vid(adapter->netdev, vid);
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 4aada0b8ceb1..f047c7c48314 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -4093,7 +4093,7 @@ qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event)
 	if (!adapter->vlgrp)
 		return;
 
-	for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+	for (vid = 0; vid < VLAN_N_VID; vid++) {
 		dev = vlan_group_get_device(adapter->vlgrp, vid);
 		if (!dev)
 			continue;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 198ce92af0c3..b1de73b1bf1a 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1634,7 +1634,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
 		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
 		bool activeVlan = false;
 
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (vlan_group_get_device(adapter->vlan_grp, vid)) {
 				VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
 				activeVlan = true;
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 5378b849f54f..0bda7fe05d4b 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -1862,7 +1862,7 @@ enum vxge_hw_status vxge_restore_vpath_vid_table(struct vxge_vpath *vpath)
 
 	if (vdev->vlgrp && vpath->is_open) {
 
-		for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
 			if (!vlan_group_get_device(vdev->vlgrp, vid))
 				continue;
 			/* Add these vlan to the vid table */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c094707fcbff..74d1401a5d5e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1820,7 +1820,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
 		return;
 
 	vg = card->vlangrp;
-	for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+	for (i = 0; i < VLAN_N_VID; i++) {
 		struct net_device *netdev = vlan_group_get_device(vg, i);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1883,7 +1883,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
 		return;
 
 	vg = card->vlangrp;
-	for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+	for (i = 0; i < VLAN_N_VID; i++) {
 		struct net_device *netdev = vlan_group_get_device(vg, i);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -2247,7 +2247,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
 	if (!vg)
 		return rc;
 
-	for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+	for (i = 0; i < VLAN_N_VID; i++) {
 		if (vlan_group_get_device(vg, i) == dev) {
 			rc = QETH_VLAN_CARD;
 			break;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a52320751bfc..494cce866564 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -68,6 +68,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
 #define VLAN_TAG_PRESENT	VLAN_CFI_MASK
 #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
+#define VLAN_N_VID		4096
 
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
@@ -76,9 +77,8 @@ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
  * depends on completely exhausting the VLAN identifier space.  Thus
  * it gives constant time look-up, but in many cases it wastes memory.
  */
-#define VLAN_GROUP_ARRAY_LEN          4096
 #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
-#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_GROUP_ARRAY_LEN/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
 
 struct vlan_group {
 	struct net_device	*real_dev; /* The ethernet(like) device
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 25c21332e9c3..54f22d820ef5 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -439,7 +439,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -450,7 +450,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_CHANGEADDR:
 		/* Adjust unicast filters on underlying device */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -464,7 +464,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEMTU:
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -478,7 +478,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_FEAT_CHANGE:
 		/* Propagate device features to underlying device */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -490,7 +490,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -508,7 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -532,7 +532,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		/* Delete all VLANs for this dev. */
 		grp->killall = 1;
 
-		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+		for (i = 0; i < VLAN_N_VID; i++) {
 			vlandev = vlan_group_get_device(grp, i);
 			if (!vlandev)
 				continue;
@@ -540,7 +540,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			/* unregistration of last vlan destroys group, abort
 			 * afterwards */
 			if (grp->nr_vlans == 1)
-				i = VLAN_GROUP_ARRAY_LEN;
+				i = VLAN_N_VID;
 
 			unregister_vlan_dev(vlandev, &list);
 		}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index cc11d6b8e507..eae67bf0446c 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -118,10 +118,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	 * 0 - The null VLAN ID.
 	 * 1 - The default Port VID (PVID)
 	 * 0x0FFF - Reserved for implementation use.
-	 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
+	 * if_vlan.h: VLAN_N_VID 4096. */
 	if (GET_BITMASK(EBT_VLAN_ID)) {
 		if (!!info->id) { /* if id!=0 => check vid range */
-			if (info->id > VLAN_GROUP_ARRAY_LEN) {
+			if (info->id > VLAN_N_VID) {
 				pr_debug("id %d is out of range (1-4096)\n",
 					 info->id);
 				return -EINVAL;
-- 
cgit v1.2.3


From 7b9c60903714bf0a19d746b228864bad3497284e Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 20 Oct 2010 13:56:04 +0000
Subject: vlan: Enable software emulation for vlan accleration.

Currently users of hardware vlan accleration need to know whether
the device supports it before generating packets.  However, vlan
acceleration will soon be available in a more flexible manner so
knowing ahead of time becomes much more difficult.  This adds
a software fallback path for vlan packets on devices without the
necessary offloading support, similar to other types of hardware
accleration.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 +++++++++++---
 net/core/dev.c            | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 880d56565828..2861565a27d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2248,9 +2248,17 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
-	return skb_is_gso(skb) &&
-	       (!skb_gso_ok(skb, dev->features) ||
-		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
+	if (skb_is_gso(skb)) {
+		int features = dev->features;
+
+		if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
+			features &= dev->vlan_features;
+
+		return (!skb_gso_ok(skb, features) ||
+			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
+	}
+
+	return 0;
 }
 
 static inline void netif_set_gso_max_size(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 4c3ac53e4b16..1bfd96b1fbd4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,7 +1694,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 
 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
 {
-	if (can_checksum_protocol(dev->features, skb->protocol))
+	int features = dev->features;
+
+	if (vlan_tx_tag_present(skb))
+		features &= dev->vlan_features;
+
+	if (can_checksum_protocol(features, skb->protocol))
 		return true;
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1793,6 +1798,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	__be16 type = skb->protocol;
 	int err;
 
+	if (type == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh;
+
+		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+			return ERR_PTR(-EINVAL);
+
+		veh = (struct vlan_ethhdr *)skb->data;
+		type = veh->h_vlan_encapsulated_proto;
+	}
+
 	skb_reset_mac_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
@@ -1964,9 +1979,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
+	int features = dev->features;
+
+	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
+		features &= dev->vlan_features;
+
 	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+		(skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
 					      illegal_highdma(dev, skb))));
 }
 
@@ -1989,6 +2009,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_orphan_try(skb);
 
+		if (vlan_tx_tag_present(skb) &&
+		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
+			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				goto out;
+
+			skb->vlan_tci = 0;
+		}
+
 		if (netif_needs_gso(dev, skb)) {
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
@@ -2050,6 +2079,7 @@ out_kfree_gso_skb:
 		skb->destructor = DEV_GSO_CB(skb)->destructor;
 out_kfree_skb:
 	kfree_skb(skb);
+out:
 	return rc;
 }
 
-- 
cgit v1.2.3


From 65ac6a5fa658b90f1be700c55e7cd72e4611015d Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 20 Oct 2010 13:56:05 +0000
Subject: vlan: Avoid hash table lookup to find group.

A struct net_device always maps to zero or one vlan groups and we
always know the device when we are looking up a group.  We currently
do a hash table lookup on the device to find the group but it is
much simpler to just store a pointer.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   | 19 ++++++++++++++
 include/linux/netdevice.h |  5 +++-
 net/8021q/vlan.c          | 64 ++++++++---------------------------------------
 net/8021q/vlan.h          | 17 -------------
 net/8021q/vlan_dev.c      |  2 +-
 5 files changed, 34 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 494cce866564..4047781da727 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -16,6 +16,7 @@
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
 
 #define VLAN_HLEN	4		/* The additional bytes (on top of the Ethernet header)
 					 * that VLAN requires.
@@ -114,6 +115,18 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+/* Must be invoked with rcu_read_lock or with RTNL. */
+static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
+					       u16 vlan_id)
+{
+	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+
+	if (grp)
+		return vlan_group_get_device(grp, vlan_id);
+
+	return NULL;
+}
+
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
@@ -128,6 +141,12 @@ vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	       unsigned int vlan_tci);
 
 #else
+static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
+					       u16 vlan_id)
+{
+	return NULL;
+}
+
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
 	BUG();
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2861565a27d9..9c78312ce142 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -942,7 +942,10 @@ struct net_device {
 
 
 	/* Protocol specific pointers */
-	
+
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+	struct vlan_group	*vlgrp;		/* VLAN group */
+#endif
 #ifdef CONFIG_NET_DSA
 	void			*dsa_ptr;	/* dsa specific data */
 #endif
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 54f22d820ef5..f862dccf6bb0 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -44,9 +44,6 @@
 
 int vlan_net_id __read_mostly;
 
-/* Our listing of VLAN group(s) */
-static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
-
 const char vlan_fullname[] = "802.1Q VLAN Support";
 const char vlan_version[] = DRV_VERSION;
 static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
@@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
 
 /* End of global variables definitions. */
 
-static inline unsigned int vlan_grp_hashfn(unsigned int idx)
-{
-	return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
-}
-
-/* Must be invoked with RCU read lock (no preempt) */
-static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
-{
-	struct vlan_group *grp;
-	struct hlist_node *n;
-	int hash = vlan_grp_hashfn(real_dev->ifindex);
-
-	hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
-		if (grp->real_dev == real_dev)
-			return grp;
-	}
-
-	return NULL;
-}
-
-/*  Find the protocol handler.  Assumes VID < VLAN_VID_MASK.
- *
- * Must be invoked with RCU read lock (no preempt)
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
-{
-	struct vlan_group *grp = __vlan_find_group(real_dev);
-
-	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
-
-	return NULL;
-}
-
 static void vlan_group_free(struct vlan_group *grp)
 {
 	int i;
@@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
 		return NULL;
 
 	grp->real_dev = real_dev;
-	hlist_add_head_rcu(&grp->hlist,
-			&vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
 	return grp;
 }
 
@@ -151,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	ASSERT_RTNL();
 
-	grp = __vlan_find_group(real_dev);
+	grp = real_dev->vlgrp;
 	BUG_ON(!grp);
 
 	/* Take it out of our own structures, but be sure to interlock with
@@ -173,11 +134,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (grp->nr_vlans == 0) {
 		vlan_gvrp_uninit_applicant(real_dev);
 
+		rcu_assign_pointer(real_dev->vlgrp, NULL);
 		if (real_dev->features & NETIF_F_HW_VLAN_RX)
 			ops->ndo_vlan_rx_register(real_dev, NULL);
 
-		hlist_del_rcu(&grp->hlist);
-
 		/* Free the group, after all cpu's are done. */
 		call_rcu(&grp->rcu, vlan_rcu_free);
 	}
@@ -207,7 +167,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 		return -EOPNOTSUPP;
 	}
 
-	if (__find_vlan_dev(real_dev, vlan_id) != NULL)
+	if (vlan_find_dev(real_dev, vlan_id) != NULL)
 		return -EEXIST;
 
 	return 0;
@@ -222,7 +182,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
-	grp = __vlan_find_group(real_dev);
+	grp = real_dev->vlgrp;
 	if (!grp) {
 		ngrp = grp = vlan_group_alloc(real_dev);
 		if (!grp)
@@ -252,8 +212,11 @@ int register_vlan_dev(struct net_device *dev)
 	vlan_group_set_device(grp, vlan_id, dev);
 	grp->nr_vlans++;
 
-	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
-		ops->ndo_vlan_rx_register(real_dev, ngrp);
+	if (ngrp) {
+		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+			ops->ndo_vlan_rx_register(real_dev, ngrp);
+		rcu_assign_pointer(real_dev->vlgrp, ngrp);
+	}
 	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
 		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
 
@@ -264,7 +227,6 @@ out_uninit_applicant:
 		vlan_gvrp_uninit_applicant(real_dev);
 out_free_group:
 	if (ngrp) {
-		hlist_del_rcu(&ngrp->hlist);
 		/* Free the group, after all cpu's are done. */
 		call_rcu(&ngrp->rcu, vlan_rcu_free);
 	}
@@ -428,7 +390,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
 	}
 
-	grp = __vlan_find_group(dev);
+	grp = dev->vlgrp;
 	if (!grp)
 		goto out;
 
@@ -746,8 +708,6 @@ err0:
 
 static void __exit vlan_cleanup_module(void)
 {
-	unsigned int i;
-
 	vlan_ioctl_set(NULL);
 	vlan_netlink_fini();
 
@@ -755,10 +715,6 @@ static void __exit vlan_cleanup_module(void)
 
 	dev_remove_pack(&vlan_packet_type);
 
-	/* This table must be empty if there are no module references left. */
-	for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
-		BUG_ON(!hlist_empty(&vlan_group_hash[i]));
-
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01da..db01b3181fdc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
-#define VLAN_GRP_HASH_SHIFT	5
-#define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
-#define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
-
-/*  Find a VLAN device by the MAC address of its Ethernet device, and
- *  it's VLAN ID.  The default configuration is to have VLAN's scope
- *  to be box-wide, so the MAC will be ignored.  The mac will only be
- *  looked at if we are configured to have a separate set of VLANs per
- *  each MAC addressable interface.  Note that this latter option does
- *  NOT follow the spec for VLANs, but may be useful for doing very
- *  large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
- *
- *  Must be invoked with rcu_read_lock (ie preempt disabled)
- *  or with RTNL.
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
-
 /* found in vlan_dev.c */
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f54251edd40d..14e3d1fa07a0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	vlan_id = vlan_tci & VLAN_VID_MASK;
 
 	rcu_read_lock();
-	vlan_dev = __find_vlan_dev(dev, vlan_id);
+	vlan_dev = vlan_find_dev(dev, vlan_id);
 
 	/* If the VLAN device is defined, we use it.
 	 * If not, and the VID is 0, it is a 802.1p packet (not
-- 
cgit v1.2.3


From 3701e51382a026cba10c60b03efabe534fba4ca4 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 20 Oct 2010 13:56:06 +0000
Subject: vlan: Centralize handling of hardware acceleration.

Currently each driver that is capable of vlan hardware acceleration
must be aware of the vlan groups that are configured and then pass
the stripped tag to a specialized receive function.  This is

different from other types of hardware offload in that it places a
significant amount of knowledge in the driver itself rather keeping
it in the networking core.

This makes vlan offloading function more similarly to other forms
of offloading (such as checksum offloading or TSO) by doing the
following:
* On receive, stripped vlans are passed directly to the network
core, without attempting to check for vlan groups or reconstructing
the header if no group
* vlans are made less special by folding the logic into the main
receive routines
* On transmit, the device layer will add the vlan header in software
if the hardware doesn't support it, instead of spreading that logic
out in upper layers, such as bonding.

There are a number of advantages to this:
* Fixes all bugs with drivers incorrectly dropping vlan headers at once.
* Avoids having to disable VLAN acceleration when in promiscuous mode
(good for bridging since it always puts devices in promiscuous mode).
* Keeps VLAN tag separate until given to ultimate consumer, which
avoids needing to do header reconstruction as in tg3 unless absolutely
necessary.
* Consolidates common code in core networking.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   |   6 ++-
 include/linux/netdevice.h |   1 -
 net/8021q/vlan.c          |   9 +---
 net/8021q/vlan_core.c     | 125 ++++++++++------------------------------------
 net/core/dev.c            |  47 ++++++-----------
 5 files changed, 48 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 4047781da727..a0d9786c202d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -132,7 +132,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern void vlan_hwaccel_do_receive(struct sk_buff *skb);
+extern bool vlan_hwaccel_do_receive(struct sk_buff **skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -166,8 +166,10 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline void vlan_hwaccel_do_receive(struct sk_buff *skb)
+static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
 {
+	BUG();
+	return false;
 }
 
 static inline gro_result_t
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c78312ce142..ed7db7eebbf3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1768,7 +1768,6 @@ extern int netdev_rx_handler_register(struct net_device *dev,
 				      void *rx_handler_data);
 extern void netdev_rx_handler_unregister(struct net_device *dev);
 
-extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
 extern int		dev_ethtool(struct net *net, struct ifreq *);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f862dccf6bb0..05b867e43757 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -135,7 +135,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		vlan_gvrp_uninit_applicant(real_dev);
 
 		rcu_assign_pointer(real_dev->vlgrp, NULL);
-		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+		if (ops->ndo_vlan_rx_register)
 			ops->ndo_vlan_rx_register(real_dev, NULL);
 
 		/* Free the group, after all cpu's are done. */
@@ -156,11 +156,6 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 		return -EOPNOTSUPP;
 	}
 
-	if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
-		pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
-		return -EOPNOTSUPP;
-	}
-
 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	    (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
 		pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
@@ -213,7 +208,7 @@ int register_vlan_dev(struct net_device *dev)
 	grp->nr_vlans++;
 
 	if (ngrp) {
-		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+		if (ops->ndo_vlan_rx_register)
 			ops->ndo_vlan_rx_register(real_dev, ngrp);
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
 	}
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index dee727ce0291..69b2f79800a5 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,54 +4,29 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
-int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
-		      u16 vlan_tci, int polling)
+bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
 {
+	struct sk_buff *skb = *skbp;
+	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
-	u16 vlan_id;
-
-	if (netpoll_rx(skb))
-		return NET_RX_DROP;
-
-	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		skb->deliver_no_wcard = 1;
+	struct vlan_rx_stats *rx_stats;
 
-	skb->skb_iif = skb->dev->ifindex;
-	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-	vlan_dev = vlan_group_get_device(grp, vlan_id);
-
-	if (vlan_dev)
-		skb->dev = vlan_dev;
-	else if (vlan_id) {
-		if (!(skb->dev->flags & IFF_PROMISC))
-			goto drop;
-		skb->pkt_type = PACKET_OTHERHOST;
+	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+	if (!vlan_dev) {
+		if (vlan_id)
+			skb->pkt_type = PACKET_OTHERHOST;
+		return false;
 	}
 
-	return polling ? netif_receive_skb(skb) : netif_rx(skb);
+	skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return false;
 
-drop:
-	atomic_long_inc(&skb->dev->rx_dropped);
-	dev_kfree_skb_any(skb);
-	return NET_RX_DROP;
-}
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
-
-void vlan_hwaccel_do_receive(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	struct vlan_rx_stats     *rx_stats;
-
-	skb->dev = vlan_dev_real_dev(dev);
-	netif_nit_deliver(skb);
-
-	skb->dev = dev;
-	skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
+	skb->dev = vlan_dev;
+	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
@@ -68,11 +43,13 @@ void vlan_hwaccel_do_receive(struct sk_buff *skb)
 		 * This allows the VLAN to have a different MAC than the
 		 * underlying device, and still route correctly. */
 		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					dev->dev_addr))
+					vlan_dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 		break;
 	}
 	u64_stats_update_end(&rx_stats->syncp);
+
+	return true;
 }
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -87,75 +64,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
 
-static gro_result_t
-vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
-		unsigned int vlan_tci, struct sk_buff *skb)
+/* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
+int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+		      u16 vlan_tci, int polling)
 {
-	struct sk_buff *p;
-	struct net_device *vlan_dev;
-	u16 vlan_id;
-
-	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		skb->deliver_no_wcard = 1;
-
-	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-	vlan_dev = vlan_group_get_device(grp, vlan_id);
-
-	if (vlan_dev)
-		skb->dev = vlan_dev;
-	else if (vlan_id) {
-		if (!(skb->dev->flags & IFF_PROMISC))
-			goto drop;
-		skb->pkt_type = PACKET_OTHERHOST;
-	}
-
-	for (p = napi->gro_list; p; p = p->next) {
-		unsigned long diffs;
-
-		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
-		diffs |= compare_ether_header(skb_mac_header(p),
-					      skb_gro_mac_header(skb));
-		NAPI_GRO_CB(p)->same_flow = !diffs;
-		NAPI_GRO_CB(p)->flush = 0;
-	}
-
-	return dev_gro_receive(napi, skb);
-
-drop:
-	atomic_long_inc(&skb->dev->rx_dropped);
-	return GRO_DROP;
+	return polling ? netif_receive_skb(skb) : netif_rx(skb);
 }
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
 
 gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 			      unsigned int vlan_tci, struct sk_buff *skb)
 {
-	if (netpoll_rx_on(skb))
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
-			? GRO_DROP : GRO_NORMAL;
-
-	skb_gro_reset_offset(skb);
-
-	return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
+	__vlan_hwaccel_put_tag(skb, vlan_tci);
+	return napi_gro_receive(napi, skb);
 }
 EXPORT_SYMBOL(vlan_gro_receive);
 
 gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 			    unsigned int vlan_tci)
 {
-	struct sk_buff *skb = napi_frags_skb(napi);
-
-	if (!skb)
-		return GRO_DROP;
-
-	if (netpoll_rx_on(skb)) {
-		skb->protocol = eth_type_trans(skb, skb->dev);
-		return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
-			? GRO_DROP : GRO_NORMAL;
-	}
-
-	return napi_frags_finish(napi, skb,
-				 vlan_gro_common(napi, grp, vlan_tci, skb));
+	__vlan_hwaccel_put_tag(napi->skb, vlan_tci);
+	return napi_gro_frags(napi);
 }
 EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1bfd96b1fbd4..97fd6bc2004c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2789,33 +2789,6 @@ out:
 }
 #endif
 
-/*
- * 	netif_nit_deliver - deliver received packets to network taps
- * 	@skb: buffer
- *
- * 	This function is used to deliver incoming packets to network
- * 	taps. It should be used when the normal netif_receive_skb path
- * 	is bypassed, for example because of VLAN acceleration.
- */
-void netif_nit_deliver(struct sk_buff *skb)
-{
-	struct packet_type *ptype;
-
-	if (list_empty(&ptype_all))
-		return;
-
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev)
-			deliver_skb(skb, ptype, skb->dev);
-	}
-	rcu_read_unlock();
-}
-
 /**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
@@ -2925,9 +2898,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
-	if (vlan_tx_tag_present(skb))
-		vlan_hwaccel_do_receive(skb);
-
 	/* if we've gotten here through NAPI, check netpoll */
 	if (netpoll_receive_skb(skb))
 		return NET_RX_DROP;
@@ -2940,8 +2910,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	 * be delivered to pkt handlers that are exact matches.  Also
 	 * the deliver_no_wcard flag will be set.  If packet handlers
 	 * are sensitive to duplicate packets these skbs will need to
-	 * be dropped at the handler.  The vlan accel path may have
-	 * already set the deliver_no_wcard flag.
+	 * be dropped at the handler.
 	 */
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
@@ -3000,6 +2969,18 @@ ncls:
 			goto out;
 	}
 
+	if (vlan_tx_tag_present(skb)) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		if (vlan_hwaccel_do_receive(&skb)) {
+			ret = __netif_receive_skb(skb);
+			goto out;
+		} else if (unlikely(!skb))
+			goto out;
+	}
+
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
 	 * bonding interfaces still make their way to any base bonding
@@ -3264,6 +3245,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		unsigned long diffs;
 
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		diffs |= compare_ether_header(skb_mac_header(p),
 					      skb_gro_mac_header(skb));
 		NAPI_GRO_CB(p)->same_flow = !diffs;
@@ -3323,6 +3305,7 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+	skb->vlan_tci = 0;
 
 	napi->skb = skb;
 }
-- 
cgit v1.2.3


From d5dbda23804156ae6f35025ade5307a49d1db6d7 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 20 Oct 2010 13:56:07 +0000
Subject: ethtool: Add support for vlan accleration.

Now that vlan acceleration is handled consistently regardless of usage,
it is possible to enable and disable it at will.  This adds support for
Ethtool operations that change the offloading status for debugging
purposes, similar to other forms of hardware acceleration.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 2 ++
 net/core/ethtool.c      | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8a3338ceb438..6628a507fd3b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -309,6 +309,8 @@ struct ethtool_perm_addr {
  * flag differs from the read-only value.
  */
 enum ethtool_flags {
+	ETH_FLAG_TXVLAN		= (1 << 7),	/* TX VLAN offload enabled */
+	ETH_FLAG_RXVLAN		= (1 << 8),	/* RX VLAN offload enabled */
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 	ETH_FLAG_NTUPLE		= (1 << 27),	/* N-tuple filters enabled */
 	ETH_FLAG_RXHASH		= (1 << 28),
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 685c7005e87f..956a9f4971cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -132,7 +132,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo);
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+	 ETH_FLAG_RXHASH);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
-- 
cgit v1.2.3


From 11a691bea48887c27425cc40bf291e74c922df25 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 21 Oct 2010 10:32:29 +0200
Subject: block: Turn bvec_k{un,}map_irq() into static inline functions

Convert bvec_k{un,}map_irq() from macros to static inline functions if
!CONFIG_HIGHMEM, so we can easier detect mistakes like the one fixed in
93055c31045a2d5599ec613a0c6cdcefc481a460 ("ps3disk: passing wrong variable =
to
bvec_kunmap_irq()")

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/bio.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2c3fd7421607..ba679992d39b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -346,8 +346,15 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
 }
 
 #else
-#define bvec_kmap_irq(bvec, flags)	(page_address((bvec)->bv_page) + (bvec)->bv_offset)
-#define bvec_kunmap_irq(buf, flags)	do { *(flags) = 0; } while (0)
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
+{
+	return page_address(bvec->bv_page) + bvec->bv_offset;
+}
+
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
+{
+	*flags = 0;
+}
 #endif
 
 static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
-- 
cgit v1.2.3


From 8b27b10f5863a5b63e46304a71aa01463d1efac4 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 17 Oct 2010 16:17:20 +0300
Subject: ipvs: optimize checksums for apps

 	Avoid full checksum calculation for apps that can provide
info whether csum was broken after payload mangling. For now only
ip_vs_ftp mangles payload and it updates the csum, so the full
recalculation is avoided for all packets.

 	Add CHECKSUM_UNNECESSARY for snat_handler (TCP and UDP).
It is needed to support SNAT from local address for the case
when csum is fully recalculated.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                  | 12 ++++++++++--
 net/netfilter/ipvs/ip_vs_ftp.c       |  7 ++++++-
 net/netfilter/ipvs/ip_vs_proto_tcp.c | 31 +++++++++++++++++++++++++------
 net/netfilter/ipvs/ip_vs_proto_udp.c | 31 +++++++++++++++++++++++++------
 4 files changed, 66 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 6e8a6192e574..adcdba9dd183 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -597,11 +597,19 @@ struct ip_vs_app {
 	__be16			port;		/* port number in net order */
 	atomic_t		usecnt;		/* usage counter */
 
-	/* output hook: return false if can't linearize. diff set for TCP.  */
+	/*
+	 * output hook: Process packet in inout direction, diff set for TCP.
+	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
+	 *	   2=Mangled but checksum was not updated
+	 */
 	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
 		       struct sk_buff *, int *diff);
 
-	/* input hook: return false if can't linearize. diff set for TCP. */
+	/*
+	 * input hook: Process packet in outin direction, diff set for TCP.
+	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
+	 *	   2=Mangled but checksum was not updated
+	 */
 	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
 		      struct sk_buff *, int *diff);
 
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 090889a3b3af..75455000ad1c 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -242,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 						       start-data, end-start,
 						       buf, buf_len);
-			if (ret)
+			if (ret) {
 				ip_vs_nfct_expect_related(skb, ct, n_cp,
 							  IPPROTO_TCP, 0, 0);
+				if (skb->ip_summed == CHECKSUM_COMPLETE)
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				/* csum is updated */
+				ret = 1;
+			}
 		}
 
 		/*
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 318d011036db..64dc2954cf78 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -120,6 +120,7 @@ tcp_snat_handler(struct sk_buff *skb,
 	struct tcphdr *tcph;
 	unsigned int tcphoff;
 	int oldlen;
+	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -134,13 +135,20 @@ tcp_snat_handler(struct sk_buff *skb,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, skb))
+		if (!(ret = ip_vs_app_pkt_out(cp, skb)))
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - tcphoff;
+		else
+			payload_csum = 1;
 	}
 
 	tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -151,12 +159,13 @@ tcp_snat_handler(struct sk_buff *skb,
 		tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 					htons(oldlen),
 					htons(skb->len - tcphoff));
-	} else if (!cp->app) {
+	} else if (!payload_csum) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = (cp->app && pp->csum_check) ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
@@ -174,6 +183,7 @@ tcp_snat_handler(struct sk_buff *skb,
 							skb->len - tcphoff,
 							cp->protocol,
 							skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 			  pp->name, tcph->check,
@@ -190,6 +200,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	struct tcphdr *tcph;
 	unsigned int tcphoff;
 	int oldlen;
+	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -204,6 +215,8 @@ tcp_dnat_handler(struct sk_buff *skb,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
@@ -212,8 +225,13 @@ tcp_dnat_handler(struct sk_buff *skb,
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn and iph ack_seq stuff
 		 */
-		if (!ip_vs_app_pkt_in(cp, skb))
+		if (!(ret = ip_vs_app_pkt_in(cp, skb)))
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - tcphoff;
+		else
+			payload_csum = 1;
 	}
 
 	tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -226,12 +244,13 @@ tcp_dnat_handler(struct sk_buff *skb,
 		tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 					htons(oldlen),
 					htons(skb->len - tcphoff));
-	} else if (!cp->app) {
+	} else if (!payload_csum) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = (cp->app && pp->csum_check) ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index f9290893bd93..9c558c40bfbb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -121,6 +121,7 @@ udp_snat_handler(struct sk_buff *skb,
 	struct udphdr *udph;
 	unsigned int udphoff;
 	int oldlen;
+	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -135,6 +136,8 @@ udp_snat_handler(struct sk_buff *skb,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
@@ -142,8 +145,13 @@ udp_snat_handler(struct sk_buff *skb,
 		/*
 		 *	Call application helper if needed
 		 */
-		if (!ip_vs_app_pkt_out(cp, skb))
+		if (!(ret = ip_vs_app_pkt_out(cp, skb)))
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - udphoff;
+		else
+			payload_csum = 1;
 	}
 
 	udph = (void *)skb_network_header(skb) + udphoff;
@@ -156,12 +164,13 @@ udp_snat_handler(struct sk_buff *skb,
 		udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 					htons(oldlen),
 					htons(skb->len - udphoff));
-	} else if (!cp->app && (udph->check != 0)) {
+	} else if (!payload_csum && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = (cp->app && pp->csum_check) ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
@@ -181,6 +190,7 @@ udp_snat_handler(struct sk_buff *skb,
 							skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 			  pp->name, udph->check,
 			  (char*)&(udph->check) - (char*)udph);
@@ -196,6 +206,7 @@ udp_dnat_handler(struct sk_buff *skb,
 	struct udphdr *udph;
 	unsigned int udphoff;
 	int oldlen;
+	int payload_csum = 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -210,6 +221,8 @@ udp_dnat_handler(struct sk_buff *skb,
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
+		int ret;
+
 		/* Some checks before mangling */
 		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
 			return 0;
@@ -218,8 +231,13 @@ udp_dnat_handler(struct sk_buff *skb,
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn
 		 */
-		if (!ip_vs_app_pkt_in(cp, skb))
+		if (!(ret = ip_vs_app_pkt_in(cp, skb)))
 			return 0;
+		/* ret=2: csum update is needed after payload mangling */
+		if (ret == 1)
+			oldlen = skb->len - udphoff;
+		else
+			payload_csum = 1;
 	}
 
 	udph = (void *)skb_network_header(skb) + udphoff;
@@ -232,12 +250,13 @@ udp_dnat_handler(struct sk_buff *skb,
 		udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 					htons(oldlen),
 					htons(skb->len - udphoff));
-	} else if (!cp->app && (udph->check != 0)) {
+	} else if (!payload_csum && (udph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = (cp->app && pp->csum_check) ?
+					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
-- 
cgit v1.2.3


From cf356d69db0afef692cd640917bc70f708c27f14 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 17 Oct 2010 16:21:07 +0300
Subject: ipvs: switch to notrack mode

 	Change skb->ipvs_property semantic. This is preparation
to support ip_vs_out processing in LOCAL_OUT. ipvs_property=1
will be used to avoid expensive lookups for traffic sent by
transmitters. Now when conntrack support is not used we call
ip_vs_notrack method to avoid problems in OUTPUT and
POST_ROUTING hooks instead of exiting POST_ROUTING as before.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             | 20 +++++++++++++++++++-
 net/netfilter/ipvs/ip_vs_core.c | 39 ++++-----------------------------------
 net/netfilter/ipvs/ip_vs_xmit.c |  7 +++++--
 3 files changed, 28 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index adcdba9dd183..0e4618470cee 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -25,7 +25,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
 #include <net/ipv6.h>			/* for ipv6_addr_copy */
-#ifdef CONFIG_IP_VS_NFCT
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
@@ -1021,6 +1021,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
+/*
+ * Forget current conntrack (unconfirmed) and attach notrack entry
+ */
+static inline void ip_vs_notrack(struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+
+	if (!ct || !nf_ct_is_untracked(ct)) {
+		nf_reset(skb);
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
+		skb->nfctinfo = IP_CT_NEW;
+		nf_conntrack_get(skb->nfct);
+	}
+#endif
+}
+
 #ifdef CONFIG_IP_VS_NFCT
 /*
  *      Netfilter connection tracking
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e5fef7aef0d4..222453029b9e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -507,23 +507,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	return NF_DROP;
 }
 
-/*
- * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- * chain and is used to avoid double NAT and confirmation when we do
- * not want to keep the conntrack structure
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
-				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
-{
-	if (!skb->ipvs_property)
-		return NF_ACCEPT;
-	/* The packet was sent from IPVS, exit this chain */
-	return NF_STOP;
-}
-
 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -682,8 +665,9 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	/* do the statistics and put it back */
 	ip_vs_out_stats(cp, skb);
 
+	skb->ipvs_property = 1;
 	if (!(cp->flags & IP_VS_CONN_F_NFCT))
-		skb->ipvs_property = 1;
+		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
 	verdict = NF_ACCEPT;
@@ -929,8 +913,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	skb->ipvs_property = 1;
 	if (!(cp->flags & IP_VS_CONN_F_NFCT))
-		skb->ipvs_property = 1;
+		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
 	ip_vs_conn_put(cp);
@@ -1496,14 +1481,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP_PRI_NAT_SRC-1,
-	},
 #ifdef CONFIG_IP_VS_IPV6
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
 	 * or VS/NAT(change destination), so that filtering rules can be
@@ -1532,14 +1509,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
-	/* Before the netfilter connection tracking, exit from POST_ROUTING */
-	{
-		.hook		= ip_vs_post_routing,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_POST_ROUTING,
-		.priority       = NF_IP6_PRI_NAT_SRC-1,
-	},
 #endif
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b0bd8afbf368..94b53b441028 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -217,6 +217,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 ({								\
 	int __ret = NF_ACCEPT;					\
 								\
+	(skb)->ipvs_property = 1;				\
 	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
 		__ret = ip_vs_confirm_conntrack(skb, cp);	\
 	if (__ret == NF_ACCEPT) {				\
@@ -228,8 +229,9 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 
 #define IP_VS_XMIT_NAT(pf, skb, cp)				\
 do {							\
+	(skb)->ipvs_property = 1;			\
 	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
-		(skb)->ipvs_property = 1;		\
+		ip_vs_notrack(skb);			\
 	else						\
 		ip_vs_update_conntrack(skb, cp, 1);	\
 	skb_forward_csum(skb);				\
@@ -239,8 +241,9 @@ do {							\
 
 #define IP_VS_XMIT(pf, skb, cp)				\
 do {							\
+	(skb)->ipvs_property = 1;			\
 	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
-		(skb)->ipvs_property = 1;		\
+		ip_vs_notrack(skb);			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
 		skb_dst(skb)->dev, dst_output);		\
-- 
cgit v1.2.3


From 190ecd27cd7294105e3b26ca71663c7d940acbbb Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 17 Oct 2010 16:24:37 +0300
Subject: ipvs: do not schedule conns from real servers

 	This patch is needed to avoid scheduling of
packets from local real server when we add ip_vs_in
in LOCAL_OUT hook to support local client.

 	Currently, when ip_vs_in can not find existing
connection it tries to create new one by calling ip_vs_schedule.

 	The default indication from ip_vs_schedule was if
connection was scheduled to real server. If real server is
not available we try to use the bypass forwarding method
or to send ICMP error. But in some cases we do not want to use
the bypass feature. So, add flag 'ignored' to indicate if
the scheduler ignores this packet.

 	Make sure we do not create new connections from replies.
We can hit this problem for persistent services and local real
server when ip_vs_in is added to LOCAL_OUT hook to handle
local clients.

 	Also, make sure ip_vs_schedule ignores SYN packets
for Active FTP DATA from local real server. The FTP DATA
connection should be created on SYN+ACK from client to assign
correct connection daddr.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                   |  3 ++-
 net/netfilter/ipvs/ip_vs_core.c       | 34 ++++++++++++++++++++++++++++++++--
 net/netfilter/ipvs/ip_vs_proto_sctp.c |  6 ++++--
 net/netfilter/ipvs/ip_vs_proto_tcp.c  |  7 +++++--
 net/netfilter/ipvs/ip_vs_proto_udp.c  |  6 ++++--
 5 files changed, 47 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 0e4618470cee..9d5c1b965304 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -849,7 +849,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
 extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
 extern struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb);
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
+	       struct ip_vs_protocol *pp, int *ignored);
 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 			struct ip_vs_protocol *pp);
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 222453029b9e..0090d6d25e95 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -342,7 +342,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  Protocols supported: TCP, UDP
  */
 struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
+	       struct ip_vs_protocol *pp, int *ignored)
 {
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
@@ -350,16 +351,43 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
 	__be16 _ports[2], *pptr;
 	unsigned int flags;
 
+	*ignored = 1;
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;
 
+	/*
+	 * FTPDATA needs this check when using local real server.
+	 * Never schedule Active FTPDATA connections from real server.
+	 * For LVS-NAT they must be already created. For other methods
+	 * with persistence the connection is created on SYN+ACK.
+	 */
+	if (pptr[0] == FTPDATA) {
+		IP_VS_DBG_PKT(12, pp, skb, 0, "Not scheduling FTPDATA");
+		return NULL;
+	}
+
+	/*
+	 * Do not schedule replies from local real server. It is risky
+	 * for fwmark services but mostly for persistent services.
+	 */
+	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+	    (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
+	    (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+		IP_VS_DBG_PKT(12, pp, skb, 0,
+			      "Not scheduling reply for existing connection");
+		__ip_vs_conn_put(cp);
+		return NULL;
+	}
+
 	/*
 	 *    Persistent service
 	 */
-	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
+		*ignored = 0;
 		return ip_vs_sched_persist(svc, skb, pptr);
+	}
 
 	/*
 	 *    Non-persistent service
@@ -372,6 +400,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
 		return NULL;
 	}
 
+	*ignored = 0;
+
 	dest = svc->scheduler->schedule(svc, skb);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "Schedule: no dest found.\n");
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 4c0855cb006e..9ab5232ce019 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	if ((sch->type == SCTP_CID_INIT) &&
 	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
 				     &iph.daddr, sh->dest))) {
+		int ignored;
+
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb);
-		if (!*cpp) {
+		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+		if (!*cpp && !ignored) {
 			*verdict = ip_vs_leave(svc, skb, pp);
 			return 0;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 64dc2954cf78..85d80a66b492 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 	}
 
+	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
 	if (th->syn &&
 	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
 				     th->dest))) {
+		int ignored;
+
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb);
-		if (!*cpp) {
+		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+		if (!*cpp && !ignored) {
 			*verdict = ip_vs_leave(svc, skb, pp);
 			return 0;
 		}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9c558c40bfbb..5d21f08155ed 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
 				&iph.daddr, uh->dest);
 	if (svc) {
+		int ignored;
+
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb);
-		if (!*cpp) {
+		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+		if (!*cpp && !ignored) {
 			*verdict = ip_vs_leave(svc, skb, pp);
 			return 0;
 		}
-- 
cgit v1.2.3


From fc604767613b6d2036cdc35b660bc39451040a47 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 17 Oct 2010 16:38:15 +0300
Subject: ipvs: changes for local real server

 	This patch deals with local real servers:

- Add support for DNAT to local address (different real server port).
It needs ip_vs_out hook in LOCAL_OUT for both families because
skb->protocol is not set for locally generated packets and can not
be used to set 'af'.

- Skip packets in ip_vs_in marked with skb->ipvs_property because
ip_vs_out processing can be executed in LOCAL_OUT but we still
have the conn_out_get check in ip_vs_in.

- Ignore packets with inet->nodefrag from local stack

- Require skb_dst(skb) != NULL because we use it to get struct net

- Add support for changing the route to local IPv4 stack after DNAT
depending on the source address type. Local client sets output
route and the remote client sets input route. It looks like
IPv6 does not need such rerouting because the replies use
addresses from initial incoming header, not from skb route.

- All transmitters now have strict checks for the destination
address type: redirect from non-local address to local real
server requires NAT method, local address can not be used as
source address when talking to remote real server.

- Now LOCALNODE is not set explicitly as forwarding
method in real server to allow the connections to provide
correct forwarding method to the backup server. Not sure if
this breaks tools that expect to see 'Local' real server type.
If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL.
Now it should be possible connections in backup that lost
their fwmark information during sync to be forwarded properly
to their daddr, even if it is local address in the backup server.
By this way backup could be used as real server for DR or TUN,
for NAT there are some restrictions because tuple collisions
in conntracks can create problems for the traffic.

- Call ip_vs_dst_reset when destination is updated in case
some real server IP type is changed between local and remote.

[ horms@verge.net.au: removed trailing whitespace ]
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |   1 +
 net/netfilter/ipvs/ip_vs_core.c | 123 ++++++++++--
 net/netfilter/ipvs/ip_vs_ctl.c  |  18 +-
 net/netfilter/ipvs/ip_vs_xmit.c | 433 ++++++++++++++++++++++++++++++++--------
 4 files changed, 458 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9d5c1b965304..2f88d5942332 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -409,6 +409,7 @@ struct ip_vs_conn {
 	/* packet transmitter for different forwarding methods.  If it
 	   mangles the packet, it must return NF_DROP or better NF_STOLEN,
 	   otherwise this must be changed to a sk_buff **.
+	   NF_ACCEPT can be returned when destination is local.
 	 */
 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 			   struct ip_vs_protocol *pp);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c4f091d5a628..a6c8aff1b47e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -984,26 +984,34 @@ drop:
 }
 
 /*
- *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
  *	Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
-	  const struct net_device *in, const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 {
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
-	int af;
 
 	EnterFunction(11);
 
-	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
-
+	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
+	/* Bad... Do not break raw sockets */
+	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+		     af == AF_INET)) {
+		struct sock *sk = skb->sk;
+		struct inet_sock *inet = inet_sk(skb->sk);
+
+		if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+			return NF_ACCEPT;
+	}
+
+	if (unlikely(!skb_dst(skb)))
+		return NF_ACCEPT;
+
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
@@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	return handle_response(af, skb, pp, cp, iph.len);
 }
 
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
+ *	Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+	     const struct net_device *in, const struct net_device *out,
+	     int (*okfn)(struct sk_buff *))
+{
+	return ip_vs_out(hooknum, skb, AF_INET);
+}
+
+/*
+ *	It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
+ *	Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+		   const struct net_device *in, const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	unsigned int verdict;
+
+	/* Disable BH in LOCAL_OUT until all places are fixed */
+	local_bh_disable();
+	verdict = ip_vs_out(hooknum, skb, AF_INET);
+	local_bh_enable();
+	return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+
+/*
+ *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
+ *	Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+	     const struct net_device *in, const struct net_device *out,
+	     int (*okfn)(struct sk_buff *))
+{
+	return ip_vs_out(hooknum, skb, AF_INET6);
+}
+
+/*
+ *	It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
+ *	Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+		   const struct net_device *in, const struct net_device *out,
+		   int (*okfn)(struct sk_buff *))
+{
+	unsigned int verdict;
+
+	/* Disable BH in LOCAL_OUT until all places are fixed */
+	local_bh_disable();
+	verdict = ip_vs_out(hooknum, skb, AF_INET6);
+	local_bh_enable();
+	return verdict;
+}
+
+#endif
 
 /*
  *	Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	struct ip_vs_conn *cp;
 	int ret, restart, af, pkts;
 
+	/* Already marked as IPVS request or reply? */
+	if (skb->ipvs_property)
+		return NF_ACCEPT;
+
 	af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
 
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_LOCAL_IN,
 		.priority       = 100,
 	},
-	/* After packet filtering, change source only for VS/NAT */
+	/* Before ip_vs_in, change source only for VS/NAT */
 	{
-		.hook		= ip_vs_out,
+		.hook		= ip_vs_local_reply4,
 		.owner		= THIS_MODULE,
 		.pf		= PF_INET,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 100,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= -99,
 	},
 	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_reply4,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 100,
+	},
 #ifdef CONFIG_IP_VS_IPV6
 	/* After packet filtering, forward packet through VS/DR, VS/TUN,
 	 * or VS/NAT(change destination), so that filtering rules can be
@@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_LOCAL_IN,
 		.priority       = 100,
 	},
-	/* After packet filtering, change source only for VS/NAT */
+	/* Before ip_vs_in, change source only for VS/NAT */
 	{
-		.hook		= ip_vs_out,
+		.hook		= ip_vs_local_reply6,
 		.owner		= THIS_MODULE,
-		.pf		= PF_INET6,
-		.hooknum        = NF_INET_FORWARD,
-		.priority       = 100,
+		.pf		= PF_INET,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= -99,
 	},
 	/* After packet filtering (but before ip_vs_out_icmp), catch icmp
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 99,
 	},
+	/* After packet filtering, change source only for VS/NAT */
+	{
+		.hook		= ip_vs_reply6,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET6,
+		.hooknum	= NF_INET_FORWARD,
+		.priority	= 100,
+	},
 #endif
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0b884d3e192f..5f5daa30b0af 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 	conn_flags |= IP_VS_CONN_F_INACTIVE;
 
-	/* check if local node and update the flags */
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6) {
-		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
-			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-				| IP_VS_CONN_F_LOCALNODE;
-		}
-	} else
-#endif
-		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
-			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-				| IP_VS_CONN_F_LOCALNODE;
-		}
-
 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
@@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	dest->u_threshold = udest->u_threshold;
 	dest->l_threshold = udest->l_threshold;
 
+	spin_lock(&dest->dst_lock);
+	ip_vs_dst_reset(dest);
+	spin_unlock(&dest->dst_lock);
+
 	if (add)
 		ip_vs_new_estimator(&dest->stats);
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 63cc0feaaef6..8608882f89e3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -67,12 +67,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
 	return dst;
 }
 
+/*
+ * Get route to destination or remote server
+ * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
+ *	    &4=Allow redirect from remote daddr to local
+ */
 static struct rtable *
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
+__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
+		   __be32 daddr, u32 rtos, int rt_mode)
 {
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct rtable *rt;			/* Route to the other host */
-	struct ip_vs_dest *dest = cp->dest;
+	struct rtable *ort;			/* Original route */
+	int local;
 
 	if (dest) {
 		spin_lock(&dest->dst_lock);
@@ -104,23 +111,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos)
 			.oif = 0,
 			.nl_u = {
 				.ip4_u = {
-					.daddr = cp->daddr.ip,
+					.daddr = daddr,
 					.saddr = 0,
 					.tos = rtos, } },
 		};
 
 		if (ip_route_output_key(net, &rt, &fl)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
-				     &cp->daddr.ip);
+				     &daddr);
 			return NULL;
 		}
 	}
 
+	local = rt->rt_flags & RTCF_LOCAL;
+	if (!((local ? 1 : 2) & rt_mode)) {
+		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
+			     (rt->rt_flags & RTCF_LOCAL) ?
+			     "local":"non-local", &rt->rt_dst);
+		ip_rt_put(rt);
+		return NULL;
+	}
+	if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
+					 ort->rt_flags & RTCF_LOCAL)) {
+		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
+			     "requires NAT method, dest: %pI4\n",
+			     &ip_hdr(skb)->daddr, &rt->rt_dst);
+		ip_rt_put(rt);
+		return NULL;
+	}
+	if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
+		IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
+			     "to non-local address, dest: %pI4\n",
+			     &ip_hdr(skb)->saddr, &rt->rt_dst);
+		ip_rt_put(rt);
+		return NULL;
+	}
+
 	return rt;
 }
 
+/* Reroute packet to local IPv4 stack after DNAT */
+static int
+__ip_vs_reroute_locally(struct sk_buff *skb)
+{
+	struct rtable *rt = skb_rtable(skb);
+	struct net_device *dev = rt->dst.dev;
+	struct net *net = dev_net(dev);
+	struct iphdr *iph = ip_hdr(skb);
+
+	if (rt->fl.iif) {
+		unsigned long orefdst = skb->_skb_refdst;
+
+		if (ip_route_input(skb, iph->daddr, iph->saddr,
+				   iph->tos, skb->dev))
+			return 0;
+		refdst_drop(orefdst);
+	} else {
+		struct flowi fl = {
+			.oif = 0,
+			.nl_u = {
+				.ip4_u = {
+					.daddr = iph->daddr,
+					.saddr = iph->saddr,
+					.tos = RT_TOS(iph->tos),
+				}
+			},
+			.mark = skb->mark,
+		};
+		struct rtable *rt;
+
+		if (ip_route_output_key(net, &rt, &fl))
+			return 0;
+		if (!(rt->rt_flags & RTCF_LOCAL)) {
+			ip_rt_put(rt);
+			return 0;
+		}
+		/* Drop old route. */
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->dst);
+	}
+	return 1;
+}
+
 #ifdef CONFIG_IP_VS_IPV6
 
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+	return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
+}
+
 static struct dst_entry *
 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 			struct in6_addr *ret_saddr, int do_xfrm)
@@ -155,14 +234,21 @@ out_err:
 	return NULL;
 }
 
+/*
+ * Get route to destination or remote server
+ * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
+ *	    &4=Allow redirect from remote daddr to local
+ */
 static struct rt6_info *
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
-		      struct in6_addr *ret_saddr, int do_xfrm)
+__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
+		      int do_xfrm, int rt_mode)
 {
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct rt6_info *rt;			/* Route to the other host */
-	struct ip_vs_dest *dest = cp->dest;
+	struct rt6_info *ort;			/* Original route */
 	struct dst_entry *dst;
+	int local;
 
 	if (dest) {
 		spin_lock(&dest->dst_lock);
@@ -188,13 +274,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 			ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
 		spin_unlock(&dest->dst_lock);
 	} else {
-		dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr,
-					      do_xfrm);
+		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
 		if (!dst)
 			return NULL;
 		rt = (struct rt6_info *) dst;
 	}
 
+	local = __ip_vs_is_local_route6(rt);
+	if (!((local ? 1 : 2) & rt_mode)) {
+		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
+			     local ? "local":"non-local", daddr);
+		dst_release(&rt->dst);
+		return NULL;
+	}
+	if (local && !(rt_mode & 4) &&
+	    !((ort = (struct rt6_info *) skb_dst(skb)) &&
+	      __ip_vs_is_local_route6(ort))) {
+		IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
+			     "requires NAT method, dest: %pI6\n",
+			     &ipv6_hdr(skb)->daddr, daddr);
+		dst_release(&rt->dst);
+		return NULL;
+	}
+	if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+		     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
+				    IPV6_ADDR_LOOPBACK)) {
+		IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
+			     "to non-local address, dest: %pI6\n",
+			     &ipv6_hdr(skb)->saddr, daddr);
+		dst_release(&rt->dst);
+		return NULL;
+	}
+
 	return rt;
 }
 #endif
@@ -227,23 +338,27 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 	__ret;							\
 })
 
-#define IP_VS_XMIT_NAT(pf, skb, cp)				\
+#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
 do {							\
 	(skb)->ipvs_property = 1;			\
 	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
 		ip_vs_notrack(skb);			\
 	else						\
 		ip_vs_update_conntrack(skb, cp, 1);	\
+	if (local)					\
+		return NF_ACCEPT;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
 		skb_dst(skb)->dev, dst_output);		\
 } while (0)
 
-#define IP_VS_XMIT(pf, skb, cp)				\
+#define IP_VS_XMIT(pf, skb, cp, local)			\
 do {							\
 	(skb)->ipvs_property = 1;			\
 	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
 		ip_vs_notrack(skb);			\
+	if (local)					\
+		return NF_ACCEPT;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
 		skb_dst(skb)->dev, dst_output);		\
@@ -258,7 +373,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct ip_vs_protocol *pp)
 {
 	/* we do not touch skb and do not need pskb ptr */
-	return NF_ACCEPT;
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
 }
 
 
@@ -271,27 +386,15 @@ int
 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp)
 {
-	struct net *net = dev_net(skb->dev);
 	struct rtable *rt;			/* Route to the other host */
 	struct iphdr  *iph = ip_hdr(skb);
-	u8     tos = iph->tos;
 	int    mtu;
-	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip4_u = {
-				.daddr = iph->daddr,
-				.saddr = 0,
-				.tos = RT_TOS(tos), } },
-	};
 
 	EnterFunction(10);
 
-	if (ip_route_output_key(net, &rt, &fl)) {
-		IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
-			     __func__, &iph->daddr);
+	if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
+				      RT_TOS(iph->tos), 2)))
 		goto tx_error_icmp;
-	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
@@ -319,7 +422,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -337,18 +440,14 @@ int
 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		     struct ip_vs_protocol *pp)
 {
-	struct net *net = dev_net(skb->dev);
-	struct dst_entry *dst;
 	struct rt6_info *rt;			/* Route to the other host */
 	struct ipv6hdr  *iph = ipv6_hdr(skb);
 	int    mtu;
 
 	EnterFunction(10);
 
-	dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0);
-	if (!dst)
+	if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
 		goto tx_error_icmp;
-	rt = (struct rt6_info *) dst;
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
@@ -376,7 +475,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -401,6 +500,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct rtable *rt;		/* Route to the other host */
 	int mtu;
 	struct iphdr *iph = ip_hdr(skb);
+	int local;
 
 	EnterFunction(10);
 
@@ -414,16 +514,40 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				      RT_TOS(iph->tos), 1|2|4)))
 		goto tx_error_icmp;
+	local = rt->rt_flags & RTCF_LOCAL;
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct && !nf_ct_is_untracked(ct)) {
+			IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): "
+					 "stopping DNAT to local address");
+			goto tx_error_put;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+		IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): "
+				 "stopping DNAT to loopback address");
+		goto tx_error_put;
+	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
-		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/* copy-on-write the packet before mangling it */
@@ -433,16 +557,27 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
-		goto tx_error;
+		goto tx_error_put;
 	ip_hdr(skb)->daddr = cp->daddr.ip;
 	ip_send_check(ip_hdr(skb));
 
+	if (!local) {
+		/* drop old route */
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		ip_rt_put(rt);
+		/*
+		 * Some IPv4 replies get local address from routes,
+		 * not from iph, so while we DNAT after routing
+		 * we need this second input/output route.
+		 */
+		if (!__ip_vs_reroute_locally(skb))
+			goto tx_error;
+	}
+
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
@@ -452,7 +587,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
+	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -475,6 +610,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 {
 	struct rt6_info *rt;		/* Route to the other host */
 	int mtu;
+	int local;
 
 	EnterFunction(10);
 
@@ -489,18 +625,44 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
-	if (!rt)
+	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+					 0, 1|2|4)))
 		goto tx_error_icmp;
+	local = __ip_vs_is_local_route6(rt);
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct && !nf_ct_is_untracked(ct)) {
+			IP_VS_DBG_RL_PKT(10, pp, skb, 0,
+					 "ip_vs_nat_xmit_v6(): "
+					 "stopping DNAT to local address");
+			goto tx_error_put;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
+		IP_VS_DBG_RL_PKT(1, pp, skb, 0,
+				 "ip_vs_nat_xmit_v6(): "
+				 "stopping DNAT to loopback address");
+		goto tx_error_put;
+	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/* copy-on-write the packet before mangling it */
@@ -510,14 +672,19 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
-	/* drop old route */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
-	ipv6_hdr(skb)->daddr = cp->daddr.in6;
+	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
+
+	if (!local || !skb->dev) {
+		/* drop the old route when skb is not shared */
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		/* destined to loopback, do we need to change route? */
+		dst_release(&rt->dst);
+	}
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
@@ -528,7 +695,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
+	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -588,16 +755,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 	}
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				      RT_TOS(tos), 1|2)))
 		goto tx_error_icmp;
+	if (rt->rt_flags & RTCF_LOCAL) {
+		ip_rt_put(rt);
+		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+	}
 
 	tdev = rt->dst.dev;
 
 	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
-		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
-		goto tx_error;
+		goto tx_error_put;
 	}
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -607,9 +778,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if ((old_iph->frag_off & htons(IP_DF))
 	    && mtu < ntohs(old_iph->tot_len)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/*
@@ -678,6 +848,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	kfree_skb(skb);
 	LeaveFunction(10);
 	return NF_STOLEN;
+tx_error_put:
+	ip_rt_put(rt);
+	goto tx_error;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -703,27 +876,29 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 	}
 
-	rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1);
-	if (!rt)
+	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+					 &saddr, 1, 1|2)))
 		goto tx_error_icmp;
+	if (__ip_vs_is_local_route6(rt)) {
+		dst_release(&rt->dst);
+		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
+	}
 
 	tdev = rt->dst.dev;
 
 	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 	if (mtu < IPV6_MIN_MTU) {
-		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
 			     IPV6_MIN_MTU);
-		goto tx_error;
+		goto tx_error_put;
 	}
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/*
@@ -789,6 +964,9 @@ tx_error:
 	kfree_skb(skb);
 	LeaveFunction(10);
 	return NF_STOLEN;
+tx_error_put:
+	dst_release(&rt->dst);
+	goto tx_error;
 }
 #endif
 
@@ -807,8 +985,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				      RT_TOS(iph->tos), 1|2)))
 		goto tx_error_icmp;
+	if (rt->rt_flags & RTCF_LOCAL) {
+		ip_rt_put(rt);
+		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
@@ -836,7 +1019,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -859,9 +1042,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
-	if (!rt)
+	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+					 0, 1|2)))
 		goto tx_error_icmp;
+	if (__ip_vs_is_local_route6(rt)) {
+		dst_release(&rt->dst);
+		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
+	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
@@ -889,7 +1076,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -915,6 +1102,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct rtable	*rt;	/* Route to the other host */
 	int mtu;
 	int rc;
+	int local;
 
 	EnterFunction(10);
 
@@ -935,16 +1123,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos))))
+	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+				      RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
 		goto tx_error_icmp;
+	local = rt->rt_flags & RTCF_LOCAL;
+
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct && !nf_ct_is_untracked(ct)) {
+			IP_VS_DBG(10, "%s(): "
+				  "stopping DNAT to local address %pI4\n",
+				  __func__, &cp->daddr.ip);
+			goto tx_error_put;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+		IP_VS_DBG(1, "%s(): "
+			  "stopping DNAT to loopback %pI4\n",
+			  __func__, &cp->daddr.ip);
+		goto tx_error_put;
+	}
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
-		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/* copy-on-write the packet before mangling it */
@@ -954,16 +1169,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
-	/* drop the old route when skb is not shared */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
+	if (!local) {
+		/* drop the old route when skb is not shared */
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		ip_rt_put(rt);
+		/*
+		 * Some IPv4 replies get local address from routes,
+		 * not from iph, so while we DNAT after routing
+		 * we need this second input/output route.
+		 */
+		if (!__ip_vs_reroute_locally(skb))
+			goto tx_error;
+	}
+
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
+	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
 
 	rc = NF_STOLEN;
 	goto out;
@@ -989,6 +1215,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct rt6_info	*rt;	/* Route to the other host */
 	int mtu;
 	int rc;
+	int local;
 
 	EnterFunction(10);
 
@@ -1009,17 +1236,44 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
-	if (!rt)
+	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+					 0, 1|2|4)))
 		goto tx_error_icmp;
 
+	local = __ip_vs_is_local_route6(rt);
+	/*
+	 * Avoid duplicate tuple in reply direction for NAT traffic
+	 * to local address when connection is sync-ed
+	 */
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+
+		if (ct && !nf_ct_is_untracked(ct)) {
+			IP_VS_DBG(10, "%s(): "
+				  "stopping DNAT to local address %pI6\n",
+				  __func__, &cp->daddr.in6);
+			goto tx_error_put;
+		}
+	}
+#endif
+
+	/* From world but DNAT to loopback address? */
+	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
+		IP_VS_DBG(1, "%s(): "
+			  "stopping DNAT to loopback %pI6\n",
+			  __func__, &cp->daddr.in6);
+		goto tx_error_put;
+	}
+
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
-		goto tx_error;
+		goto tx_error_put;
 	}
 
 	/* copy-on-write the packet before mangling it */
@@ -1029,16 +1283,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
-	/* drop the old route when skb is not shared */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
-
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
+	if (!local || !skb->dev) {
+		/* drop the old route when skb is not shared */
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		/* destined to loopback, do we need to change route? */
+		dst_release(&rt->dst);
+	}
+
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
+	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
 
 	rc = NF_STOLEN;
 	goto out;
-- 
cgit v1.2.3


From 0d79641a96d612aaa6d57a4d4f521d7ed9c9ccdd Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 17 Oct 2010 16:46:17 +0300
Subject: ipvs: provide address family for debugging

 	As skb->protocol is not valid in LOCAL_OUT add
parameter for address family in packet debugging functions.
Even if ports are not present in AH and ESP change them to
use ip_vs_tcpudp_debug_packet to show at least valid addresses
as before. This patch removes the last user of skb->protocol
in IPVS.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h                     | 17 ++++++-----
 net/netfilter/ipvs/ip_vs_core.c         | 41 +++++++++++++++-----------
 net/netfilter/ipvs/ip_vs_proto.c        |  8 ++---
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 52 ++-------------------------------
 net/netfilter/ipvs/ip_vs_proto_sctp.c   |  2 +-
 net/netfilter/ipvs/ip_vs_proto_tcp.c    |  4 +--
 net/netfilter/ipvs/ip_vs_proto_udp.c    |  4 +--
 net/netfilter/ipvs/ip_vs_xmit.c         | 18 +++++++-----
 8 files changed, 54 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 2f88d5942332..b7bbd6c28cfa 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 		if (net_ratelimit())					\
 			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
 	} while (0)
-#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg)				\
+#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)			\
 	do {								\
 		if (level <= ip_vs_get_debug_level())			\
-			pp->debug_packet(pp, skb, ofs, msg);		\
+			pp->debug_packet(af, pp, skb, ofs, msg);	\
 	} while (0)
-#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg)			\
+#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)			\
 	do {								\
 		if (level <= ip_vs_get_debug_level() &&			\
 		    net_ratelimit())					\
-			pp->debug_packet(pp, skb, ofs, msg);		\
+			pp->debug_packet(af, pp, skb, ofs, msg);	\
 	} while (0)
 #else	/* NO DEBUGGING at ALL */
 #define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
 #define IP_VS_ERR_BUF(msg...)  do {} while (0)
 #define IP_VS_DBG(level, msg...)  do {} while (0)
 #define IP_VS_DBG_RL(msg...)  do {} while (0)
-#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg)		do {} while (0)
-#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg)	do {} while (0)
+#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
+#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
 #endif
 
 #define IP_VS_BUG() BUG()
@@ -345,7 +345,7 @@ struct ip_vs_protocol {
 
 	int (*app_conn_bind)(struct ip_vs_conn *cp);
 
-	void (*debug_packet)(struct ip_vs_protocol *pp,
+	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
 			     const struct sk_buff *skb,
 			     int offset,
 			     const char *msg);
@@ -828,7 +828,8 @@ extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
 			const char *name, int to);
 extern void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
+			  const struct sk_buff *skb,
 			  int offset, const char *msg);
 
 extern struct ip_vs_protocol ip_vs_protocol_tcp;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5fbcf67af8ec..b4e51e9c5a04 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -365,7 +365,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	 * with persistence the connection is created on SYN+ACK.
 	 */
 	if (pptr[0] == FTPDATA) {
-		IP_VS_DBG_PKT(12, pp, skb, 0, "Not scheduling FTPDATA");
+		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+			      "Not scheduling FTPDATA");
 		return NULL;
 	}
 
@@ -376,7 +377,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
 	    (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
 	    (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
-		IP_VS_DBG_PKT(12, pp, skb, 0,
+		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
 			      "Not scheduling reply for existing connection");
 		__ip_vs_conn_put(cp);
 		return NULL;
@@ -617,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (inout)
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+		IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
 			"Forwarding altered outgoing ICMP");
 	else
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+		IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
 			"Forwarding altered incoming ICMP");
 }
 
@@ -662,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	skb->ip_summed = CHECKSUM_PARTIAL;
 
 	if (inout)
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered outgoing ICMPv6");
+		IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+			      (void *)ciph - (void *)iph,
+			      "Forwarding altered outgoing ICMPv6");
 	else
-		IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-			"Forwarding altered incoming ICMPv6");
+		IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+			      (void *)ciph - (void *)iph,
+			      "Forwarding altered incoming ICMPv6");
 }
 #endif
 
@@ -798,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+	IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+		      "Checking outgoing ICMP for");
 
 	offset += cih->ihl * 4;
 
@@ -874,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
 		return NF_ACCEPT;
 
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+	IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+		      "Checking outgoing ICMPv6 for");
 
 	offset += sizeof(struct ipv6hdr);
 
@@ -922,7 +927,7 @@ static unsigned int
 handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		struct ip_vs_conn *cp, int ihl)
 {
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
 	if (!skb_make_writable(skb, ihl))
 		goto drop;
@@ -967,7 +972,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto drop;
 
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
@@ -1117,7 +1122,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 			}
 		}
 	}
-	IP_VS_DBG_PKT(12, pp, skb, 0,
+	IP_VS_DBG_PKT(12, af, pp, skb, 0,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
 }
@@ -1253,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+	IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+		      "Checking incoming ICMP for");
 
 	offset += cih->ihl * 4;
 
@@ -1364,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
 		return NF_ACCEPT;
 
-	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+	IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+		      "Checking incoming ICMPv6 for");
 
 	offset += sizeof(struct ipv6hdr);
 
@@ -1492,12 +1499,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 
 	if (unlikely(!cp)) {
 		/* sorry, all this trouble for a no-hit :) */
-		IP_VS_DBG_PKT(12, pp, skb, 0,
+		IP_VS_DBG_PKT(12, af, pp, skb, 0,
 			      "ip_vs_in: packet continues traversal as normal");
 		return NF_ACCEPT;
 	}
 
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
 
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 027f654799fe..c53998390877 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 	else if (ih->frag_off & htons(IP_OFFSET))
 		sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
 	else {
-		__be16 _ports[2], *pptr
-;
+		__be16 _ports[2], *pptr;
+
 		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
@@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 
 
 void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 			  const struct sk_buff *skb,
 			  int offset,
 			  const char *msg)
 {
 #ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == htons(ETH_P_IPV6))
+	if (af == AF_INET6)
 		ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
 	else
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 8956ef33ea6c..3a0461117d3f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -117,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	return 0;
 }
 
-
-static void
-ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		       int offset, const char *msg)
-{
-	char buf[256];
-	struct iphdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "TRUNCATED");
-	else
-		sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
-
-	pr_debug("%s: %s %s\n", msg, pp->name, buf);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-static void
-ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		       int offset, const char *msg)
-{
-	char buf[256];
-	struct ipv6hdr _iph, *ih;
-
-	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-	if (ih == NULL)
-		sprintf(buf, "TRUNCATED");
-	else
-		sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
-
-	pr_debug("%s: %s %s\n", msg, pp->name, buf);
-}
-#endif
-
-static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-		    int offset, const char *msg)
-{
-#ifdef CONFIG_IP_VS_IPV6
-	if (skb->protocol == htons(ETH_P_IPV6))
-		ah_esp_debug_packet_v6(pp, skb, offset, msg);
-	else
-#endif
-		ah_esp_debug_packet_v4(pp, skb, offset, msg);
-}
-
-
 static void ah_esp_init(struct ip_vs_protocol *pp)
 {
 	/* nothing to do now */
@@ -195,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.register_app =		NULL,
 	.unregister_app =	NULL,
 	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
 	.set_state_timeout =	NULL,
 };
@@ -219,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
 	.register_app =		NULL,
 	.unregister_app =	NULL,
 	.app_conn_bind =	NULL,
-	.debug_packet =		ah_esp_debug_packet,
+	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
 };
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9ab5232ce019..d254345bfda7 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -176,7 +176,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 
 	if (val != cmp) {
 		/* CRC failure, dump it. */
-		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+		IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 				"Failed checksum for");
 		return 0;
 	}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 85d80a66b492..f6c5200e2146 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -300,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 					    skb->len - tcphoff,
 					    ipv6_hdr(skb)->nexthdr,
 					    skb->csum)) {
-				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 						 "Failed checksum for");
 				return 0;
 			}
@@ -311,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 					      skb->len - tcphoff,
 					      ip_hdr(skb)->protocol,
 					      skb->csum)) {
-				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 						 "Failed checksum for");
 				return 0;
 			}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 5d21f08155ed..9d106a06bb0a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -314,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 						    skb->len - udphoff,
 						    ipv6_hdr(skb)->nexthdr,
 						    skb->csum)) {
-					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+					IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 							 "Failed checksum for");
 					return 0;
 				}
@@ -325,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 						      skb->len - udphoff,
 						      ip_hdr(skb)->protocol,
 						      skb->csum)) {
-					IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+					IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
 							 "Failed checksum for");
 					return 0;
 				}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 97b5361c036e..de04ea39cde8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
-			IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): "
+			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
+					 "ip_vs_nat_xmit(): "
 					 "stopping DNAT to local address");
 			goto tx_error_put;
 		}
@@ -552,7 +553,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* From world but DNAT to loopback address? */
 	if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
-		IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): "
+		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
 				 "stopping DNAT to loopback address");
 		goto tx_error_put;
 	}
@@ -561,7 +562,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
+		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
+				 "ip_vs_nat_xmit(): frag needed for");
 		goto tx_error_put;
 	}
 
@@ -593,7 +595,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 			goto tx_error;
 	}
 
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -654,7 +656,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
-			IP_VS_DBG_RL_PKT(10, pp, skb, 0,
+			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
 					 "ip_vs_nat_xmit_v6(): "
 					 "stopping DNAT to local address");
 			goto tx_error_put;
@@ -665,7 +667,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* From world but DNAT to loopback address? */
 	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
 	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
-		IP_VS_DBG_RL_PKT(1, pp, skb, 0,
+		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): "
 				 "stopping DNAT to loopback address");
 		goto tx_error_put;
@@ -680,7 +682,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 			skb->dev = net->loopback_dev;
 		}
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+		IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
 		goto tx_error_put;
 	}
@@ -706,7 +708,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		dst_release(&rt->dst);
 	}
 
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
-- 
cgit v1.2.3


From 0915d559a062f8f1034cf160f4af060df47dd0a3 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 3 Aug 2010 07:57:47 -0300
Subject: [media] v4l2-mediabus: Add pixelcodes for BGR565 formats

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-mediabus.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 7e923fe3db04..8e6559838ae3 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -38,6 +38,8 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE,
 	V4L2_MBUS_FMT_RGB565_2X8_LE,
 	V4L2_MBUS_FMT_RGB565_2X8_BE,
+	V4L2_MBUS_FMT_BGR565_2X8_LE,
+	V4L2_MBUS_FMT_BGR565_2X8_BE,
 	V4L2_MBUS_FMT_SBGGR8_1X8,
 	V4L2_MBUS_FMT_SBGGR10_1X10,
 	V4L2_MBUS_FMT_GREY8_1X8,
-- 
cgit v1.2.3


From bdc621fcedf031426551caed4e7517603690fad1 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Thu, 30 Sep 2010 08:35:49 -0300
Subject: [media] SoC Camera: add driver for OMAP1 camera interface

This is a V4L2 driver for TI OMAP1 SoC camera interface.

Both videobuf-dma versions are supported, contig and sg, selectable with a
module option. The former uses less processing power, but often fails to
allocate contignuous buffer memory. The latter is free of this problem, but
generates tens of DMA interrupts per frame. If contig memory allocation ever
fails, the driver falls back to sg automatically on next open, but still can
be switched back to contig manually. Both paths work stable for me, even
under heavy load, on my OMAP1510 based Amstrad Delta videophone, that is the
oldest, least powerfull OMAP1 implementation.

The interface generally works in pass-through mode. Since input data byte
endianess can be swapped, it provides up to two v4l2 pixel formats per each of
several soc_mbus formats that have their swapped endian counterparts.

Boards using this driver can provide it with the following platform data:
- if and what freqency clock is expected by an on-board camera sensor,
- what is the maximum pixel clock that should be accepted from the sensor,
- what is the polarity of the sensor provided pixel clock,
- if the interface GPIO line is connected to a sensor reset/powerdown input
  and what is the input polarity.

Created and tested against linux-2.6.36-rc5 on Amstrad Delta.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig        |    8 +
 drivers/media/video/Makefile       |    1 +
 drivers/media/video/omap1_camera.c | 1702 ++++++++++++++++++++++++++++++++++++
 include/media/omap1_camera.h       |   35 +
 4 files changed, 1746 insertions(+)
 create mode 100644 drivers/media/video/omap1_camera.c
 create mode 100644 include/media/omap1_camera.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 27b728fce7a7..b89d0fda40fc 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -822,6 +822,14 @@ config VIDEO_SH_MOBILE_CEU
 	---help---
 	  This is a v4l2 driver for the SuperH Mobile CEU Interface
 
+config VIDEO_OMAP1
+	tristate "OMAP1 Camera Interface driver"
+	depends on VIDEO_DEV && ARCH_OMAP1 && SOC_CAMERA
+	select VIDEOBUF_DMA_CONTIG
+	select VIDEOBUF_DMA_SG
+	---help---
+	  This is a v4l2 driver for the TI OMAP1 camera interface
+
 config VIDEO_OMAP2
 	tristate "OMAP2 Camera Capture Interface driver"
 	depends on VIDEO_DEV && ARCH_OMAP2
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 118196c06fcd..557ad1f3aeb4 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -157,6 +157,7 @@ obj-$(CONFIG_VIDEO_MX3)			+= mx3_camera.o
 obj-$(CONFIG_VIDEO_PXA27x)		+= pxa_camera.o
 obj-$(CONFIG_VIDEO_SH_MOBILE_CSI2)	+= sh_mobile_csi2.o
 obj-$(CONFIG_VIDEO_SH_MOBILE_CEU)	+= sh_mobile_ceu_camera.o
+obj-$(CONFIG_VIDEO_OMAP1)		+= omap1_camera.o
 obj-$(CONFIG_VIDEO_SAMSUNG_S5P_FIMC) 	+= s5p-fimc/
 
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci/
diff --git a/drivers/media/video/omap1_camera.c b/drivers/media/video/omap1_camera.c
new file mode 100644
index 000000000000..7c30e62b50db
--- /dev/null
+++ b/drivers/media/video/omap1_camera.c
@@ -0,0 +1,1702 @@
+/*
+ * V4L2 SoC Camera driver for OMAP1 Camera Interface
+ *
+ * Copyright (C) 2010, Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
+ *
+ * Based on V4L2 Driver for i.MXL/i.MXL camera (CSI) host
+ * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ * Copyright (C) 2009, Darius Augulis <augulis.darius@gmail.com>
+ *
+ * Based on PXA SoC camera driver
+ * Copyright (C) 2006, Sascha Hauer, Pengutronix
+ * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
+ *
+ * Hardware specific bits initialy based on former work by Matt Callow
+ * drivers/media/video/omap/omap1510cam.c
+ * Copyright (C) 2006 Matt Callow
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+
+#include <media/omap1_camera.h>
+#include <media/soc_camera.h>
+#include <media/soc_mediabus.h>
+#include <media/videobuf-dma-contig.h>
+#include <media/videobuf-dma-sg.h>
+
+#include <plat/dma.h>
+
+
+#define DRIVER_NAME		"omap1-camera"
+#define VERSION_CODE		KERNEL_VERSION(0, 0, 1)
+
+
+/*
+ * ---------------------------------------------------------------------------
+ *  OMAP1 Camera Interface registers
+ * ---------------------------------------------------------------------------
+ */
+
+#define REG_CTRLCLOCK		0x00
+#define REG_IT_STATUS		0x04
+#define REG_MODE		0x08
+#define REG_STATUS		0x0C
+#define REG_CAMDATA		0x10
+#define REG_GPIO		0x14
+#define REG_PEAK_COUNTER	0x18
+
+/* CTRLCLOCK bit shifts */
+#define LCLK_EN			BIT(7)
+#define DPLL_EN			BIT(6)
+#define MCLK_EN			BIT(5)
+#define CAMEXCLK_EN		BIT(4)
+#define POLCLK			BIT(3)
+#define FOSCMOD_SHIFT		0
+#define FOSCMOD_MASK		(0x7 << FOSCMOD_SHIFT)
+#define FOSCMOD_12MHz		0x0
+#define FOSCMOD_6MHz		0x2
+#define FOSCMOD_9_6MHz		0x4
+#define FOSCMOD_24MHz		0x5
+#define FOSCMOD_8MHz		0x6
+
+/* IT_STATUS bit shifts */
+#define DATA_TRANSFER		BIT(5)
+#define FIFO_FULL		BIT(4)
+#define H_DOWN			BIT(3)
+#define H_UP			BIT(2)
+#define V_DOWN			BIT(1)
+#define V_UP			BIT(0)
+
+/* MODE bit shifts */
+#define RAZ_FIFO		BIT(18)
+#define EN_FIFO_FULL		BIT(17)
+#define EN_NIRQ			BIT(16)
+#define THRESHOLD_SHIFT		9
+#define THRESHOLD_MASK		(0x7f << THRESHOLD_SHIFT)
+#define DMA			BIT(8)
+#define EN_H_DOWN		BIT(7)
+#define EN_H_UP			BIT(6)
+#define EN_V_DOWN		BIT(5)
+#define EN_V_UP			BIT(4)
+#define ORDERCAMD		BIT(3)
+
+#define IRQ_MASK		(EN_V_UP | EN_V_DOWN | EN_H_UP | EN_H_DOWN | \
+				 EN_NIRQ | EN_FIFO_FULL)
+
+/* STATUS bit shifts */
+#define HSTATUS			BIT(1)
+#define VSTATUS			BIT(0)
+
+/* GPIO bit shifts */
+#define CAM_RST			BIT(0)
+
+/* end of OMAP1 Camera Interface registers */
+
+
+#define SOCAM_BUS_FLAGS	(SOCAM_MASTER | \
+			SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH | \
+			SOCAM_PCLK_SAMPLE_RISING | SOCAM_PCLK_SAMPLE_FALLING | \
+			SOCAM_DATA_ACTIVE_HIGH | SOCAM_DATAWIDTH_8)
+
+
+#define FIFO_SIZE		((THRESHOLD_MASK >> THRESHOLD_SHIFT) + 1)
+#define FIFO_SHIFT		__fls(FIFO_SIZE)
+
+#define DMA_BURST_SHIFT		(1 + OMAP_DMA_DATA_BURST_4)
+#define DMA_BURST_SIZE		(1 << DMA_BURST_SHIFT)
+
+#define DMA_ELEMENT_SHIFT	OMAP_DMA_DATA_TYPE_S32
+#define DMA_ELEMENT_SIZE	(1 << DMA_ELEMENT_SHIFT)
+
+#define DMA_FRAME_SHIFT_CONTIG	(FIFO_SHIFT - 1)
+#define DMA_FRAME_SHIFT_SG	DMA_BURST_SHIFT
+
+#define DMA_FRAME_SHIFT(x)	((x) == OMAP1_CAM_DMA_CONTIG ? \
+						DMA_FRAME_SHIFT_CONTIG : \
+						DMA_FRAME_SHIFT_SG)
+#define DMA_FRAME_SIZE(x)	(1 << DMA_FRAME_SHIFT(x))
+#define DMA_SYNC		OMAP_DMA_SYNC_FRAME
+#define THRESHOLD_LEVEL		DMA_FRAME_SIZE
+
+
+#define MAX_VIDEO_MEM		4	/* arbitrary video memory limit in MB */
+
+
+/*
+ * Structures
+ */
+
+/* buffer for one video frame */
+struct omap1_cam_buf {
+	struct videobuf_buffer		vb;
+	enum v4l2_mbus_pixelcode	code;
+	int				inwork;
+	struct scatterlist		*sgbuf;
+	int				sgcount;
+	int				bytes_left;
+	enum videobuf_state		result;
+};
+
+struct omap1_cam_dev {
+	struct soc_camera_host		soc_host;
+	struct soc_camera_device	*icd;
+	struct clk			*clk;
+
+	unsigned int			irq;
+	void __iomem			*base;
+
+	int				dma_ch;
+
+	struct omap1_cam_platform_data	*pdata;
+	struct resource			*res;
+	unsigned long			pflags;
+	unsigned long			camexclk;
+
+	struct list_head		capture;
+
+	/* lock used to protect videobuf */
+	spinlock_t			lock;
+
+	/* Pointers to DMA buffers */
+	struct omap1_cam_buf		*active;
+	struct omap1_cam_buf		*ready;
+
+	enum omap1_cam_vb_mode		vb_mode;
+	int				(*mmap_mapper)(struct videobuf_queue *q,
+						struct videobuf_buffer *buf,
+						struct vm_area_struct *vma);
+
+	u32				reg_cache[0];
+};
+
+
+static void cam_write(struct omap1_cam_dev *pcdev, u16 reg, u32 val)
+{
+	pcdev->reg_cache[reg / sizeof(u32)] = val;
+	__raw_writel(val, pcdev->base + reg);
+}
+
+static u32 cam_read(struct omap1_cam_dev *pcdev, u16 reg, bool from_cache)
+{
+	return !from_cache ? __raw_readl(pcdev->base + reg) :
+			pcdev->reg_cache[reg / sizeof(u32)];
+}
+
+#define CAM_READ(pcdev, reg) \
+		cam_read(pcdev, REG_##reg, false)
+#define CAM_WRITE(pcdev, reg, val) \
+		cam_write(pcdev, REG_##reg, val)
+#define CAM_READ_CACHE(pcdev, reg) \
+		cam_read(pcdev, REG_##reg, true)
+
+/*
+ *  Videobuf operations
+ */
+static int omap1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
+		unsigned int *size)
+{
+	struct soc_camera_device *icd = vq->priv_data;
+	int bytes_per_line = soc_mbus_bytes_per_line(icd->user_width,
+			icd->current_fmt->host_fmt);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+
+	if (bytes_per_line < 0)
+		return bytes_per_line;
+
+	*size = bytes_per_line * icd->user_height;
+
+	if (!*count || *count < OMAP1_CAMERA_MIN_BUF_COUNT(pcdev->vb_mode))
+		*count = OMAP1_CAMERA_MIN_BUF_COUNT(pcdev->vb_mode);
+
+	if (*size * *count > MAX_VIDEO_MEM * 1024 * 1024)
+		*count = (MAX_VIDEO_MEM * 1024 * 1024) / *size;
+
+	dev_dbg(icd->dev.parent,
+			"%s: count=%d, size=%d\n", __func__, *count, *size);
+
+	return 0;
+}
+
+static void free_buffer(struct videobuf_queue *vq, struct omap1_cam_buf *buf,
+		enum omap1_cam_vb_mode vb_mode)
+{
+	struct videobuf_buffer *vb = &buf->vb;
+
+	BUG_ON(in_interrupt());
+
+	videobuf_waiton(vb, 0, 0);
+
+	if (vb_mode == OMAP1_CAM_DMA_CONTIG) {
+		videobuf_dma_contig_free(vq, vb);
+	} else {
+		struct soc_camera_device *icd = vq->priv_data;
+		struct device *dev = icd->dev.parent;
+		struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
+
+		videobuf_dma_unmap(dev, dma);
+		videobuf_dma_free(dma);
+	}
+
+	vb->state = VIDEOBUF_NEEDS_INIT;
+}
+
+static int omap1_videobuf_prepare(struct videobuf_queue *vq,
+		struct videobuf_buffer *vb, enum v4l2_field field)
+{
+	struct soc_camera_device *icd = vq->priv_data;
+	struct omap1_cam_buf *buf = container_of(vb, struct omap1_cam_buf, vb);
+	int bytes_per_line = soc_mbus_bytes_per_line(icd->user_width,
+			icd->current_fmt->host_fmt);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	int ret;
+
+	if (bytes_per_line < 0)
+		return bytes_per_line;
+
+	WARN_ON(!list_empty(&vb->queue));
+
+	BUG_ON(NULL == icd->current_fmt);
+
+	buf->inwork = 1;
+
+	if (buf->code != icd->current_fmt->code || vb->field != field ||
+			vb->width  != icd->user_width ||
+			vb->height != icd->user_height) {
+		buf->code  = icd->current_fmt->code;
+		vb->width  = icd->user_width;
+		vb->height = icd->user_height;
+		vb->field  = field;
+		vb->state  = VIDEOBUF_NEEDS_INIT;
+	}
+
+	vb->size = bytes_per_line * vb->height;
+
+	if (vb->baddr && vb->bsize < vb->size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (vb->state == VIDEOBUF_NEEDS_INIT) {
+		ret = videobuf_iolock(vq, vb, NULL);
+		if (ret)
+			goto fail;
+
+		vb->state = VIDEOBUF_PREPARED;
+	}
+	buf->inwork = 0;
+
+	return 0;
+fail:
+	free_buffer(vq, buf, pcdev->vb_mode);
+out:
+	buf->inwork = 0;
+	return ret;
+}
+
+static void set_dma_dest_params(int dma_ch, struct omap1_cam_buf *buf,
+		enum omap1_cam_vb_mode vb_mode)
+{
+	dma_addr_t dma_addr;
+	unsigned int block_size;
+
+	if (vb_mode == OMAP1_CAM_DMA_CONTIG) {
+		dma_addr = videobuf_to_dma_contig(&buf->vb);
+		block_size = buf->vb.size;
+	} else {
+		if (WARN_ON(!buf->sgbuf)) {
+			buf->result = VIDEOBUF_ERROR;
+			return;
+		}
+		dma_addr = sg_dma_address(buf->sgbuf);
+		if (WARN_ON(!dma_addr)) {
+			buf->sgbuf = NULL;
+			buf->result = VIDEOBUF_ERROR;
+			return;
+		}
+		block_size = sg_dma_len(buf->sgbuf);
+		if (WARN_ON(!block_size)) {
+			buf->sgbuf = NULL;
+			buf->result = VIDEOBUF_ERROR;
+			return;
+		}
+		if (unlikely(buf->bytes_left < block_size))
+			block_size = buf->bytes_left;
+		if (WARN_ON(dma_addr & (DMA_FRAME_SIZE(vb_mode) *
+				DMA_ELEMENT_SIZE - 1))) {
+			dma_addr = ALIGN(dma_addr, DMA_FRAME_SIZE(vb_mode) *
+					DMA_ELEMENT_SIZE);
+			block_size &= ~(DMA_FRAME_SIZE(vb_mode) *
+					DMA_ELEMENT_SIZE - 1);
+		}
+		buf->bytes_left -= block_size;
+		buf->sgcount++;
+	}
+
+	omap_set_dma_dest_params(dma_ch,
+		OMAP_DMA_PORT_EMIFF, OMAP_DMA_AMODE_POST_INC, dma_addr, 0, 0);
+	omap_set_dma_transfer_params(dma_ch,
+		OMAP_DMA_DATA_TYPE_S32, DMA_FRAME_SIZE(vb_mode),
+		block_size >> (DMA_FRAME_SHIFT(vb_mode) + DMA_ELEMENT_SHIFT),
+		DMA_SYNC, 0, 0);
+}
+
+static struct omap1_cam_buf *prepare_next_vb(struct omap1_cam_dev *pcdev)
+{
+	struct omap1_cam_buf *buf;
+
+	/*
+	 * If there is already a buffer pointed out by the pcdev->ready,
+	 * (re)use it, otherwise try to fetch and configure a new one.
+	 */
+	buf = pcdev->ready;
+	if (!buf) {
+		if (list_empty(&pcdev->capture))
+			return buf;
+		buf = list_entry(pcdev->capture.next,
+				struct omap1_cam_buf, vb.queue);
+		buf->vb.state = VIDEOBUF_ACTIVE;
+		pcdev->ready = buf;
+		list_del_init(&buf->vb.queue);
+	}
+
+	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+		/*
+		 * In CONTIG mode, we can safely enter next buffer parameters
+		 * into the DMA programming register set after the DMA
+		 * has already been activated on the previous buffer
+		 */
+		set_dma_dest_params(pcdev->dma_ch, buf, pcdev->vb_mode);
+	} else {
+		/*
+		 * In SG mode, the above is not safe since there are probably
+		 * a bunch of sgbufs from previous sglist still pending.
+		 * Instead, mark the sglist fresh for the upcoming
+		 * try_next_sgbuf().
+		 */
+		buf->sgbuf = NULL;
+	}
+
+	return buf;
+}
+
+static struct scatterlist *try_next_sgbuf(int dma_ch, struct omap1_cam_buf *buf)
+{
+	struct scatterlist *sgbuf;
+
+	if (likely(buf->sgbuf)) {
+		/* current sglist is active */
+		if (unlikely(!buf->bytes_left)) {
+			/* indicate sglist complete */
+			sgbuf = NULL;
+		} else {
+			/* process next sgbuf */
+			sgbuf = sg_next(buf->sgbuf);
+			if (WARN_ON(!sgbuf)) {
+				buf->result = VIDEOBUF_ERROR;
+			} else if (WARN_ON(!sg_dma_len(sgbuf))) {
+				sgbuf = NULL;
+				buf->result = VIDEOBUF_ERROR;
+			}
+		}
+		buf->sgbuf = sgbuf;
+	} else {
+		/* sglist is fresh, initialize it before using */
+		struct videobuf_dmabuf *dma = videobuf_to_dma(&buf->vb);
+
+		sgbuf = dma->sglist;
+		if (!(WARN_ON(!sgbuf))) {
+			buf->sgbuf = sgbuf;
+			buf->sgcount = 0;
+			buf->bytes_left = buf->vb.size;
+			buf->result = VIDEOBUF_DONE;
+		}
+	}
+	if (sgbuf)
+		/*
+		 * Put our next sgbuf parameters (address, size)
+		 * into the DMA programming register set.
+		 */
+		set_dma_dest_params(dma_ch, buf, OMAP1_CAM_DMA_SG);
+
+	return sgbuf;
+}
+
+static void start_capture(struct omap1_cam_dev *pcdev)
+{
+	struct omap1_cam_buf *buf = pcdev->active;
+	u32 ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
+	u32 mode = CAM_READ_CACHE(pcdev, MODE) & ~EN_V_DOWN;
+
+	if (WARN_ON(!buf))
+		return;
+
+	/*
+	 * Enable start of frame interrupt, which we will use for activating
+	 * our end of frame watchdog when capture actually starts.
+	 */
+	mode |= EN_V_UP;
+
+	if (unlikely(ctrlclock & LCLK_EN))
+		/* stop pixel clock before FIFO reset */
+		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
+	/* reset FIFO */
+	CAM_WRITE(pcdev, MODE, mode | RAZ_FIFO);
+
+	omap_start_dma(pcdev->dma_ch);
+
+	if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
+		/*
+		 * In SG mode, it's a good moment for fetching next sgbuf
+		 * from the current sglist and, if available, already putting
+		 * its parameters into the DMA programming register set.
+		 */
+		try_next_sgbuf(pcdev->dma_ch, buf);
+	}
+
+	/* (re)enable pixel clock */
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock | LCLK_EN);
+	/* release FIFO reset */
+	CAM_WRITE(pcdev, MODE, mode);
+}
+
+static void suspend_capture(struct omap1_cam_dev *pcdev)
+{
+	u32 ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
+
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
+	omap_stop_dma(pcdev->dma_ch);
+}
+
+static void disable_capture(struct omap1_cam_dev *pcdev)
+{
+	u32 mode = CAM_READ_CACHE(pcdev, MODE);
+
+	CAM_WRITE(pcdev, MODE, mode & ~(IRQ_MASK | DMA));
+}
+
+static void omap1_videobuf_queue(struct videobuf_queue *vq,
+						struct videobuf_buffer *vb)
+{
+	struct soc_camera_device *icd = vq->priv_data;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	struct omap1_cam_buf *buf;
+	u32 mode;
+
+	list_add_tail(&vb->queue, &pcdev->capture);
+	vb->state = VIDEOBUF_QUEUED;
+
+	if (pcdev->active) {
+		/*
+		 * Capture in progress, so don't touch pcdev->ready even if
+		 * empty. Since the transfer of the DMA programming register set
+		 * content to the DMA working register set is done automatically
+		 * by the DMA hardware, this can pretty well happen while we
+		 * are keeping the lock here. Levae fetching it from the queue
+		 * to be done when a next DMA interrupt occures instead.
+		 */
+		return;
+	}
+
+	WARN_ON(pcdev->ready);
+
+	buf = prepare_next_vb(pcdev);
+	if (WARN_ON(!buf))
+		return;
+
+	pcdev->active = buf;
+	pcdev->ready = NULL;
+
+	dev_dbg(icd->dev.parent,
+		"%s: capture not active, setup FIFO, start DMA\n", __func__);
+	mode = CAM_READ_CACHE(pcdev, MODE) & ~THRESHOLD_MASK;
+	mode |= THRESHOLD_LEVEL(pcdev->vb_mode) << THRESHOLD_SHIFT;
+	CAM_WRITE(pcdev, MODE, mode | EN_FIFO_FULL | DMA);
+
+	if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
+		/*
+		 * In SG mode, the above prepare_next_vb() didn't actually
+		 * put anything into the DMA programming register set,
+		 * so we have to do it now, before activating DMA.
+		 */
+		try_next_sgbuf(pcdev->dma_ch, buf);
+	}
+
+	start_capture(pcdev);
+}
+
+static void omap1_videobuf_release(struct videobuf_queue *vq,
+				 struct videobuf_buffer *vb)
+{
+	struct omap1_cam_buf *buf =
+			container_of(vb, struct omap1_cam_buf, vb);
+	struct soc_camera_device *icd = vq->priv_data;
+	struct device *dev = icd->dev.parent;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+
+	switch (vb->state) {
+	case VIDEOBUF_DONE:
+		dev_dbg(dev, "%s (done)\n", __func__);
+		break;
+	case VIDEOBUF_ACTIVE:
+		dev_dbg(dev, "%s (active)\n", __func__);
+		break;
+	case VIDEOBUF_QUEUED:
+		dev_dbg(dev, "%s (queued)\n", __func__);
+		break;
+	case VIDEOBUF_PREPARED:
+		dev_dbg(dev, "%s (prepared)\n", __func__);
+		break;
+	default:
+		dev_dbg(dev, "%s (unknown %d)\n", __func__, vb->state);
+		break;
+	}
+
+	free_buffer(vq, buf, pcdev->vb_mode);
+}
+
+static void videobuf_done(struct omap1_cam_dev *pcdev,
+		enum videobuf_state result)
+{
+	struct omap1_cam_buf *buf = pcdev->active;
+	struct videobuf_buffer *vb;
+	struct device *dev = pcdev->icd->dev.parent;
+
+	if (WARN_ON(!buf)) {
+		suspend_capture(pcdev);
+		disable_capture(pcdev);
+		return;
+	}
+
+	if (result == VIDEOBUF_ERROR)
+		suspend_capture(pcdev);
+
+	vb = &buf->vb;
+	if (waitqueue_active(&vb->done)) {
+		if (!pcdev->ready && result != VIDEOBUF_ERROR) {
+			/*
+			 * No next buffer has been entered into the DMA
+			 * programming register set on time (could be done only
+			 * while the previous DMA interurpt was processed, not
+			 * later), so the last DMA block, be it a whole buffer
+			 * if in CONTIG or its last sgbuf if in SG mode, is
+			 * about to be reused by the just autoreinitialized DMA
+			 * engine, and overwritten with next frame data. Best we
+			 * can do is stopping the capture as soon as possible,
+			 * hopefully before the next frame start.
+			 */
+			suspend_capture(pcdev);
+		}
+		vb->state = result;
+		do_gettimeofday(&vb->ts);
+		if (result != VIDEOBUF_ERROR)
+			vb->field_count++;
+		wake_up(&vb->done);
+
+		/* shift in next buffer */
+		buf = pcdev->ready;
+		pcdev->active = buf;
+		pcdev->ready = NULL;
+
+		if (!buf) {
+			/*
+			 * No next buffer was ready on time (see above), so
+			 * indicate error condition to force capture restart or
+			 * stop, depending on next buffer already queued or not.
+			 */
+			result = VIDEOBUF_ERROR;
+			prepare_next_vb(pcdev);
+
+			buf = pcdev->ready;
+			pcdev->active = buf;
+			pcdev->ready = NULL;
+		}
+	} else if (pcdev->ready) {
+		/*
+		 * In both CONTIG and SG mode, the DMA engine has possibly
+		 * been already autoreinitialized with the preprogrammed
+		 * pcdev->ready buffer.  We can either accept this fact
+		 * and just swap the buffers, or provoke an error condition
+		 * and restart capture.  The former seems less intrusive.
+		 */
+		dev_dbg(dev, "%s: nobody waiting on videobuf, swap with next\n",
+				__func__);
+		pcdev->active = pcdev->ready;
+
+		if (pcdev->vb_mode == OMAP1_CAM_DMA_SG) {
+			/*
+			 * In SG mode, we have to make sure that the buffer we
+			 * are putting back into the pcdev->ready is marked
+			 * fresh.
+			 */
+			buf->sgbuf = NULL;
+		}
+		pcdev->ready = buf;
+
+		buf = pcdev->active;
+	} else {
+		/*
+		 * No next buffer has been entered into
+		 * the DMA programming register set on time.
+		 */
+		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+			/*
+			 * In CONTIG mode, the DMA engine has already been
+			 * reinitialized with the current buffer. Best we can do
+			 * is not touching it.
+			 */
+			dev_dbg(dev,
+				"%s: nobody waiting on videobuf, reuse it\n",
+				__func__);
+		} else {
+			/*
+			 * In SG mode, the DMA engine has just been
+			 * autoreinitialized with the last sgbuf from the
+			 * current list. Restart capture in order to transfer
+			 * next frame start into the first sgbuf, not the last
+			 * one.
+			 */
+			if (result != VIDEOBUF_ERROR) {
+				suspend_capture(pcdev);
+				result = VIDEOBUF_ERROR;
+			}
+		}
+	}
+
+	if (!buf) {
+		dev_dbg(dev, "%s: no more videobufs, stop capture\n", __func__);
+		disable_capture(pcdev);
+		return;
+	}
+
+	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+		/*
+		 * In CONTIG mode, the current buffer parameters had already
+		 * been entered into the DMA programming register set while the
+		 * buffer was fetched with prepare_next_vb(), they may have also
+		 * been transfered into the runtime set and already active if
+		 * the DMA still running.
+		 */
+	} else {
+		/* In SG mode, extra steps are required */
+		if (result == VIDEOBUF_ERROR)
+			/* make sure we (re)use sglist from start on error */
+			buf->sgbuf = NULL;
+
+		/*
+		 * In any case, enter the next sgbuf parameters into the DMA
+		 * programming register set.  They will be used either during
+		 * nearest DMA autoreinitialization or, in case of an error,
+		 * on DMA startup below.
+		 */
+		try_next_sgbuf(pcdev->dma_ch, buf);
+	}
+
+	if (result == VIDEOBUF_ERROR) {
+		dev_dbg(dev, "%s: videobuf error; reset FIFO, restart DMA\n",
+				__func__);
+		start_capture(pcdev);
+		/*
+		 * In SG mode, the above also resulted in the next sgbuf
+		 * parameters being entered into the DMA programming register
+		 * set, making them ready for next DMA autoreinitialization.
+		 */
+	}
+
+	/*
+	 * Finally, try fetching next buffer.
+	 * In CONTIG mode, it will also enter it into the DMA programming
+	 * register set, making it ready for next DMA autoreinitialization.
+	 */
+	prepare_next_vb(pcdev);
+}
+
+static void dma_isr(int channel, unsigned short status, void *data)
+{
+	struct omap1_cam_dev *pcdev = data;
+	struct omap1_cam_buf *buf = pcdev->active;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcdev->lock, flags);
+
+	if (WARN_ON(!buf)) {
+		suspend_capture(pcdev);
+		disable_capture(pcdev);
+		goto out;
+	}
+
+	if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+		/*
+		 * In CONTIG mode, assume we have just managed to collect the
+		 * whole frame, hopefully before our end of frame watchdog is
+		 * triggered. Then, all we have to do is disabling the watchdog
+		 * for this frame, and calling videobuf_done() with success
+		 * indicated.
+		 */
+		CAM_WRITE(pcdev, MODE,
+				CAM_READ_CACHE(pcdev, MODE) & ~EN_V_DOWN);
+		videobuf_done(pcdev, VIDEOBUF_DONE);
+	} else {
+		/*
+		 * In SG mode, we have to process every sgbuf from the current
+		 * sglist, one after another.
+		 */
+		if (buf->sgbuf) {
+			/*
+			 * Current sglist not completed yet, try fetching next
+			 * sgbuf, hopefully putting it into the DMA programming
+			 * register set, making it ready for next DMA
+			 * autoreinitialization.
+			 */
+			try_next_sgbuf(pcdev->dma_ch, buf);
+			if (buf->sgbuf)
+				goto out;
+
+			/*
+			 * No more sgbufs left in the current sglist. This
+			 * doesn't mean that the whole videobuffer is already
+			 * complete, but only that the last sgbuf from the
+			 * current sglist is about to be filled. It will be
+			 * ready on next DMA interrupt, signalled with the
+			 * buf->sgbuf set back to NULL.
+			 */
+			if (buf->result != VIDEOBUF_ERROR) {
+				/*
+				 * Video frame collected without errors so far,
+				 * we can prepare for collecting a next one
+				 * as soon as DMA gets autoreinitialized
+				 * after the current (last) sgbuf is completed.
+				 */
+				buf = prepare_next_vb(pcdev);
+				if (!buf)
+					goto out;
+
+				try_next_sgbuf(pcdev->dma_ch, buf);
+				goto out;
+			}
+		}
+		/* end of videobuf */
+		videobuf_done(pcdev, buf->result);
+	}
+
+out:
+	spin_unlock_irqrestore(&pcdev->lock, flags);
+}
+
+static irqreturn_t cam_isr(int irq, void *data)
+{
+	struct omap1_cam_dev *pcdev = data;
+	struct device *dev = pcdev->icd->dev.parent;
+	struct omap1_cam_buf *buf = pcdev->active;
+	u32 it_status;
+	unsigned long flags;
+
+	it_status = CAM_READ(pcdev, IT_STATUS);
+	if (!it_status)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&pcdev->lock, flags);
+
+	if (WARN_ON(!buf)) {
+		dev_warn(dev, "%s: unhandled camera interrupt, status == "
+				"%#x\n", __func__, it_status);
+		suspend_capture(pcdev);
+		disable_capture(pcdev);
+		goto out;
+	}
+
+	if (unlikely(it_status & FIFO_FULL)) {
+		dev_warn(dev, "%s: FIFO overflow\n", __func__);
+
+	} else if (it_status & V_DOWN) {
+		/* end of video frame watchdog */
+		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+			/*
+			 * In CONTIG mode, the watchdog is disabled with
+			 * successful DMA end of block interrupt, and reenabled
+			 * on next frame start. If we get here, there is nothing
+			 * to check, we must be out of sync.
+			 */
+		} else {
+			if (buf->sgcount == 2) {
+				/*
+				 * If exactly 2 sgbufs from the next sglist have
+				 * been programmed into the DMA engine (the
+				 * frist one already transfered into the DMA
+				 * runtime register set, the second one still
+				 * in the programming set), then we are in sync.
+				 */
+				goto out;
+			}
+		}
+		dev_notice(dev, "%s: unexpected end of video frame\n",
+				__func__);
+
+	} else if (it_status & V_UP) {
+		u32 mode;
+
+		if (pcdev->vb_mode == OMAP1_CAM_DMA_CONTIG) {
+			/*
+			 * In CONTIG mode, we need this interrupt every frame
+			 * in oredr to reenable our end of frame watchdog.
+			 */
+			mode = CAM_READ_CACHE(pcdev, MODE);
+		} else {
+			/*
+			 * In SG mode, the below enabled end of frame watchdog
+			 * is kept on permanently, so we can turn this one shot
+			 * setup off.
+			 */
+			mode = CAM_READ_CACHE(pcdev, MODE) & ~EN_V_UP;
+		}
+
+		if (!(mode & EN_V_DOWN)) {
+			/* (re)enable end of frame watchdog interrupt */
+			mode |= EN_V_DOWN;
+		}
+		CAM_WRITE(pcdev, MODE, mode);
+		goto out;
+
+	} else {
+		dev_warn(dev, "%s: unhandled camera interrupt, status == %#x\n",
+				__func__, it_status);
+		goto out;
+	}
+
+	videobuf_done(pcdev, VIDEOBUF_ERROR);
+out:
+	spin_unlock_irqrestore(&pcdev->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static struct videobuf_queue_ops omap1_videobuf_ops = {
+	.buf_setup	= omap1_videobuf_setup,
+	.buf_prepare	= omap1_videobuf_prepare,
+	.buf_queue	= omap1_videobuf_queue,
+	.buf_release	= omap1_videobuf_release,
+};
+
+
+/*
+ * SOC Camera host operations
+ */
+
+static void sensor_reset(struct omap1_cam_dev *pcdev, bool reset)
+{
+	/* apply/release camera sensor reset if requested by platform data */
+	if (pcdev->pflags & OMAP1_CAMERA_RST_HIGH)
+		CAM_WRITE(pcdev, GPIO, reset);
+	else if (pcdev->pflags & OMAP1_CAMERA_RST_LOW)
+		CAM_WRITE(pcdev, GPIO, !reset);
+}
+
+/*
+ * The following two functions absolutely depend on the fact, that
+ * there can be only one camera on OMAP1 camera sensor interface
+ */
+static int omap1_cam_add_device(struct soc_camera_device *icd)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	u32 ctrlclock;
+
+	if (pcdev->icd)
+		return -EBUSY;
+
+	clk_enable(pcdev->clk);
+
+	/* setup sensor clock */
+	ctrlclock = CAM_READ(pcdev, CTRLCLOCK);
+	ctrlclock &= ~(CAMEXCLK_EN | MCLK_EN | DPLL_EN);
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
+
+	ctrlclock &= ~FOSCMOD_MASK;
+	switch (pcdev->camexclk) {
+	case 6000000:
+		ctrlclock |= CAMEXCLK_EN | FOSCMOD_6MHz;
+		break;
+	case 8000000:
+		ctrlclock |= CAMEXCLK_EN | FOSCMOD_8MHz | DPLL_EN;
+		break;
+	case 9600000:
+		ctrlclock |= CAMEXCLK_EN | FOSCMOD_9_6MHz | DPLL_EN;
+		break;
+	case 12000000:
+		ctrlclock |= CAMEXCLK_EN | FOSCMOD_12MHz;
+		break;
+	case 24000000:
+		ctrlclock |= CAMEXCLK_EN | FOSCMOD_24MHz | DPLL_EN;
+	default:
+		break;
+	}
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~DPLL_EN);
+
+	/* enable internal clock */
+	ctrlclock |= MCLK_EN;
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
+
+	sensor_reset(pcdev, false);
+
+	pcdev->icd = icd;
+
+	dev_dbg(icd->dev.parent, "OMAP1 Camera driver attached to camera %d\n",
+			icd->devnum);
+	return 0;
+}
+
+static void omap1_cam_remove_device(struct soc_camera_device *icd)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	u32 ctrlclock;
+
+	BUG_ON(icd != pcdev->icd);
+
+	suspend_capture(pcdev);
+	disable_capture(pcdev);
+
+	sensor_reset(pcdev, true);
+
+	/* disable and release system clocks */
+	ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
+	ctrlclock &= ~(MCLK_EN | DPLL_EN | CAMEXCLK_EN);
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
+
+	ctrlclock = (ctrlclock & ~FOSCMOD_MASK) | FOSCMOD_12MHz;
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock | MCLK_EN);
+
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~MCLK_EN);
+
+	clk_disable(pcdev->clk);
+
+	pcdev->icd = NULL;
+
+	dev_dbg(icd->dev.parent,
+		"OMAP1 Camera driver detached from camera %d\n", icd->devnum);
+}
+
+/* Duplicate standard formats based on host capability of byte swapping */
+static const struct soc_mbus_pixelfmt omap1_cam_formats[] = {
+	[V4L2_MBUS_FMT_UYVY8_2X8] = {
+		.fourcc			= V4L2_PIX_FMT_YUYV,
+		.name			= "YUYV",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_VYUY8_2X8] = {
+		.fourcc			= V4L2_PIX_FMT_YVYU,
+		.name			= "YVYU",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_YUYV8_2X8] = {
+		.fourcc			= V4L2_PIX_FMT_UYVY,
+		.name			= "UYVY",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_YVYU8_2X8] = {
+		.fourcc			= V4L2_PIX_FMT_VYUY,
+		.name			= "VYUY",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE] = {
+		.fourcc			= V4L2_PIX_FMT_RGB555,
+		.name			= "RGB555",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE] = {
+		.fourcc			= V4L2_PIX_FMT_RGB555X,
+		.name			= "RGB555X",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_RGB565_2X8_BE] = {
+		.fourcc			= V4L2_PIX_FMT_RGB565,
+		.name			= "RGB565",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+	[V4L2_MBUS_FMT_RGB565_2X8_LE] = {
+		.fourcc			= V4L2_PIX_FMT_RGB565X,
+		.name			= "RGB565X",
+		.bits_per_sample	= 8,
+		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+};
+
+static int omap1_cam_get_formats(struct soc_camera_device *icd,
+		unsigned int idx, struct soc_camera_format_xlate *xlate)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
+	int formats = 0, ret;
+	enum v4l2_mbus_pixelcode code;
+	const struct soc_mbus_pixelfmt *fmt;
+
+	ret = v4l2_subdev_call(sd, video, enum_mbus_fmt, idx, &code);
+	if (ret < 0)
+		/* No more formats */
+		return 0;
+
+	fmt = soc_mbus_get_fmtdesc(code);
+	if (!fmt) {
+		dev_err(dev, "%s: invalid format code #%d: %d\n", __func__,
+				idx, code);
+		return 0;
+	}
+
+	/* Check support for the requested bits-per-sample */
+	if (fmt->bits_per_sample != 8)
+		return 0;
+
+	switch (code) {
+	case V4L2_MBUS_FMT_YUYV8_2X8:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
+	case V4L2_MBUS_FMT_VYUY8_2X8:
+	case V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE:
+	case V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE:
+	case V4L2_MBUS_FMT_RGB565_2X8_BE:
+	case V4L2_MBUS_FMT_RGB565_2X8_LE:
+		formats++;
+		if (xlate) {
+			xlate->host_fmt	= &omap1_cam_formats[code];
+			xlate->code	= code;
+			xlate++;
+			dev_dbg(dev, "%s: providing format %s "
+					"as byte swapped code #%d\n", __func__,
+					omap1_cam_formats[code].name, code);
+		}
+	default:
+		if (xlate)
+			dev_dbg(dev, "%s: providing format %s "
+					"in pass-through mode\n", __func__,
+					fmt->name);
+	}
+	formats++;
+	if (xlate) {
+		xlate->host_fmt	= fmt;
+		xlate->code	= code;
+		xlate++;
+	}
+
+	return formats;
+}
+
+static bool is_dma_aligned(s32 bytes_per_line, unsigned int height,
+		enum omap1_cam_vb_mode vb_mode)
+{
+	int size = bytes_per_line * height;
+
+	return IS_ALIGNED(bytes_per_line, DMA_ELEMENT_SIZE) &&
+		IS_ALIGNED(size, DMA_FRAME_SIZE(vb_mode) * DMA_ELEMENT_SIZE);
+}
+
+static int dma_align(int *width, int *height,
+		const struct soc_mbus_pixelfmt *fmt,
+		enum omap1_cam_vb_mode vb_mode, bool enlarge)
+{
+	s32 bytes_per_line = soc_mbus_bytes_per_line(*width, fmt);
+
+	if (bytes_per_line < 0)
+		return bytes_per_line;
+
+	if (!is_dma_aligned(bytes_per_line, *height, vb_mode)) {
+		unsigned int pxalign = __fls(bytes_per_line / *width);
+		unsigned int salign  = DMA_FRAME_SHIFT(vb_mode) +
+				DMA_ELEMENT_SHIFT - pxalign;
+		unsigned int incr    = enlarge << salign;
+
+		v4l_bound_align_image(width, 1, *width + incr, 0,
+				height, 1, *height + incr, 0, salign);
+		return 0;
+	}
+	return 1;
+}
+
+#define subdev_call_with_sense(pcdev, dev, icd, sd, function, args...)	   \
+({									   \
+	struct soc_camera_sense sense = {				   \
+		.master_clock		= pcdev->camexclk,		   \
+		.pixel_clock_max	= 0,				   \
+	};								   \
+	int __ret;							   \
+									   \
+	if (pcdev->pdata)						   \
+		sense.pixel_clock_max = pcdev->pdata->lclk_khz_max * 1000; \
+	icd->sense = &sense;						   \
+	__ret = v4l2_subdev_call(sd, video, function, ##args);		   \
+	icd->sense = NULL;						   \
+									   \
+	if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {			   \
+		if (sense.pixel_clock > sense.pixel_clock_max) {	   \
+			dev_err(dev, "%s: pixel clock %lu "		   \
+					"set by the camera too high!\n",   \
+					__func__, sense.pixel_clock);	   \
+			__ret = -EINVAL;				   \
+		}							   \
+	}								   \
+	__ret;								   \
+})
+
+static int set_mbus_format(struct omap1_cam_dev *pcdev, struct device *dev,
+		struct soc_camera_device *icd, struct v4l2_subdev *sd,
+		struct v4l2_mbus_framefmt *mf,
+		const struct soc_camera_format_xlate *xlate)
+{
+	s32 bytes_per_line;
+	int ret = subdev_call_with_sense(pcdev, dev, icd, sd, s_mbus_fmt, mf);
+
+	if (ret < 0) {
+		dev_err(dev, "%s: s_mbus_fmt failed\n", __func__);
+		return ret;
+	}
+
+	if (mf->code != xlate->code) {
+		dev_err(dev, "%s: unexpected pixel code change\n", __func__);
+		return -EINVAL;
+	}
+
+	bytes_per_line = soc_mbus_bytes_per_line(mf->width, xlate->host_fmt);
+	if (bytes_per_line < 0) {
+		dev_err(dev, "%s: soc_mbus_bytes_per_line() failed\n",
+				__func__);
+		return bytes_per_line;
+	}
+
+	if (!is_dma_aligned(bytes_per_line, mf->height, pcdev->vb_mode)) {
+		dev_err(dev, "%s: resulting geometry %ux%u not DMA aligned\n",
+				__func__, mf->width, mf->height);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int omap1_cam_set_crop(struct soc_camera_device *icd,
+			       struct v4l2_crop *crop)
+{
+	struct v4l2_rect *rect = &crop->c;
+	const struct soc_camera_format_xlate *xlate = icd->current_fmt;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	struct device *dev = icd->dev.parent;
+	struct v4l2_mbus_framefmt mf;
+	int ret;
+
+	ret = subdev_call_with_sense(pcdev, dev, icd, sd, s_crop, crop);
+	if (ret < 0) {
+		dev_warn(dev, "%s: failed to crop to %ux%u@%u:%u\n", __func__,
+			 rect->width, rect->height, rect->left, rect->top);
+		return ret;
+	}
+
+	ret = v4l2_subdev_call(sd, video, g_mbus_fmt, &mf);
+	if (ret < 0) {
+		dev_warn(dev, "%s: failed to fetch current format\n", __func__);
+		return ret;
+	}
+
+	ret = dma_align(&mf.width, &mf.height, xlate->host_fmt, pcdev->vb_mode,
+			false);
+	if (ret < 0) {
+		dev_err(dev, "%s: failed to align %ux%u %s with DMA\n",
+				__func__, mf.width, mf.height,
+				xlate->host_fmt->name);
+		return ret;
+	}
+
+	if (!ret) {
+		/* sensor returned geometry not DMA aligned, trying to fix */
+		ret = set_mbus_format(pcdev, dev, icd, sd, &mf, xlate);
+		if (ret < 0) {
+			dev_err(dev, "%s: failed to set format\n", __func__);
+			return ret;
+		}
+	}
+
+	icd->user_width	 = mf.width;
+	icd->user_height = mf.height;
+
+	return 0;
+}
+
+static int omap1_cam_set_fmt(struct soc_camera_device *icd,
+			      struct v4l2_format *f)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	const struct soc_camera_format_xlate *xlate;
+	struct device *dev = icd->dev.parent;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_mbus_framefmt mf;
+	int ret;
+
+	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
+	if (!xlate) {
+		dev_warn(dev, "%s: format %#x not found\n", __func__,
+				pix->pixelformat);
+		return -EINVAL;
+	}
+
+	mf.width	= pix->width;
+	mf.height	= pix->height;
+	mf.field	= pix->field;
+	mf.colorspace	= pix->colorspace;
+	mf.code		= xlate->code;
+
+	ret = dma_align(&mf.width, &mf.height, xlate->host_fmt, pcdev->vb_mode,
+			true);
+	if (ret < 0) {
+		dev_err(dev, "%s: failed to align %ux%u %s with DMA\n",
+				__func__, pix->width, pix->height,
+				xlate->host_fmt->name);
+		return ret;
+	}
+
+	ret = set_mbus_format(pcdev, dev, icd, sd, &mf, xlate);
+	if (ret < 0) {
+		dev_err(dev, "%s: failed to set format\n", __func__);
+		return ret;
+	}
+
+	pix->width	 = mf.width;
+	pix->height	 = mf.height;
+	pix->field	 = mf.field;
+	pix->colorspace  = mf.colorspace;
+	icd->current_fmt = xlate;
+
+	return 0;
+}
+
+static int omap1_cam_try_fmt(struct soc_camera_device *icd,
+			      struct v4l2_format *f)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	const struct soc_camera_format_xlate *xlate;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_mbus_framefmt mf;
+	int ret;
+	/* TODO: limit to mx1 hardware capabilities */
+
+	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
+	if (!xlate) {
+		dev_warn(icd->dev.parent, "Format %#x not found\n",
+			 pix->pixelformat);
+		return -EINVAL;
+	}
+
+	mf.width	= pix->width;
+	mf.height	= pix->height;
+	mf.field	= pix->field;
+	mf.colorspace	= pix->colorspace;
+	mf.code		= xlate->code;
+
+	/* limit to sensor capabilities */
+	ret = v4l2_subdev_call(sd, video, try_mbus_fmt, &mf);
+	if (ret < 0)
+		return ret;
+
+	pix->width	= mf.width;
+	pix->height	= mf.height;
+	pix->field	= mf.field;
+	pix->colorspace	= mf.colorspace;
+
+	return 0;
+}
+
+static bool sg_mode;
+
+/*
+ * Local mmap_mapper wrapper,
+ * used for detecting videobuf-dma-contig buffer allocation failures
+ * and switching to videobuf-dma-sg automatically for future attempts.
+ */
+static int omap1_cam_mmap_mapper(struct videobuf_queue *q,
+				  struct videobuf_buffer *buf,
+				  struct vm_area_struct *vma)
+{
+	struct soc_camera_device *icd = q->priv_data;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	int ret;
+
+	ret = pcdev->mmap_mapper(q, buf, vma);
+
+	if (ret == -ENOMEM)
+		sg_mode = true;
+
+	return ret;
+}
+
+static void omap1_cam_init_videobuf(struct videobuf_queue *q,
+				     struct soc_camera_device *icd)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+
+	if (!sg_mode)
+		videobuf_queue_dma_contig_init(q, &omap1_videobuf_ops,
+				icd->dev.parent, &pcdev->lock,
+				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
+				sizeof(struct omap1_cam_buf), icd);
+	else
+		videobuf_queue_sg_init(q, &omap1_videobuf_ops,
+				icd->dev.parent, &pcdev->lock,
+				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
+				sizeof(struct omap1_cam_buf), icd);
+
+	/* use videobuf mode (auto)selected with the module parameter */
+	pcdev->vb_mode = sg_mode ? OMAP1_CAM_DMA_SG : OMAP1_CAM_DMA_CONTIG;
+
+	/*
+	 * Ensure we substitute the videobuf-dma-contig version of the
+	 * mmap_mapper() callback with our own wrapper, used for switching
+	 * automatically to videobuf-dma-sg on buffer allocation failure.
+	 */
+	if (!sg_mode && q->int_ops->mmap_mapper != omap1_cam_mmap_mapper) {
+		pcdev->mmap_mapper = q->int_ops->mmap_mapper;
+		q->int_ops->mmap_mapper = omap1_cam_mmap_mapper;
+	}
+}
+
+static int omap1_cam_reqbufs(struct soc_camera_file *icf,
+			      struct v4l2_requestbuffers *p)
+{
+	int i;
+
+	/*
+	 * This is for locking debugging only. I removed spinlocks and now I
+	 * check whether .prepare is ever called on a linked buffer, or whether
+	 * a dma IRQ can occur for an in-work or unlinked buffer. Until now
+	 * it hadn't triggered
+	 */
+	for (i = 0; i < p->count; i++) {
+		struct omap1_cam_buf *buf = container_of(icf->vb_vidq.bufs[i],
+						      struct omap1_cam_buf, vb);
+		buf->inwork = 0;
+		INIT_LIST_HEAD(&buf->vb.queue);
+	}
+
+	return 0;
+}
+
+static int omap1_cam_querycap(struct soc_camera_host *ici,
+			       struct v4l2_capability *cap)
+{
+	/* cap->name is set by the friendly caller:-> */
+	strlcpy(cap->card, "OMAP1 Camera", sizeof(cap->card));
+	cap->version = VERSION_CODE;
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+
+	return 0;
+}
+
+static int omap1_cam_set_bus_param(struct soc_camera_device *icd,
+		__u32 pixfmt)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct omap1_cam_dev *pcdev = ici->priv;
+	struct device *dev = icd->dev.parent;
+	const struct soc_camera_format_xlate *xlate;
+	const struct soc_mbus_pixelfmt *fmt;
+	unsigned long camera_flags, common_flags;
+	u32 ctrlclock, mode;
+	int ret;
+
+	camera_flags = icd->ops->query_bus_param(icd);
+
+	common_flags = soc_camera_bus_param_compatible(camera_flags,
+			SOCAM_BUS_FLAGS);
+	if (!common_flags)
+		return -EINVAL;
+
+	/* Make choices, possibly based on platform configuration */
+	if ((common_flags & SOCAM_PCLK_SAMPLE_RISING) &&
+			(common_flags & SOCAM_PCLK_SAMPLE_FALLING)) {
+		if (!pcdev->pdata ||
+				pcdev->pdata->flags & OMAP1_CAMERA_LCLK_RISING)
+			common_flags &= ~SOCAM_PCLK_SAMPLE_FALLING;
+		else
+			common_flags &= ~SOCAM_PCLK_SAMPLE_RISING;
+	}
+
+	ret = icd->ops->set_bus_param(icd, common_flags);
+	if (ret < 0)
+		return ret;
+
+	ctrlclock = CAM_READ_CACHE(pcdev, CTRLCLOCK);
+	if (ctrlclock & LCLK_EN)
+		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
+
+	if (common_flags & SOCAM_PCLK_SAMPLE_RISING) {
+		dev_dbg(dev, "CTRLCLOCK_REG |= POLCLK\n");
+		ctrlclock |= POLCLK;
+	} else {
+		dev_dbg(dev, "CTRLCLOCK_REG &= ~POLCLK\n");
+		ctrlclock &= ~POLCLK;
+	}
+	CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock & ~LCLK_EN);
+
+	if (ctrlclock & LCLK_EN)
+		CAM_WRITE(pcdev, CTRLCLOCK, ctrlclock);
+
+	/* select bus endianess */
+	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
+	fmt = xlate->host_fmt;
+
+	mode = CAM_READ(pcdev, MODE) & ~(RAZ_FIFO | IRQ_MASK | DMA);
+	if (fmt->order == SOC_MBUS_ORDER_LE) {
+		dev_dbg(dev, "MODE_REG &= ~ORDERCAMD\n");
+		CAM_WRITE(pcdev, MODE, mode & ~ORDERCAMD);
+	} else {
+		dev_dbg(dev, "MODE_REG |= ORDERCAMD\n");
+		CAM_WRITE(pcdev, MODE, mode | ORDERCAMD);
+	}
+
+	return 0;
+}
+
+static unsigned int omap1_cam_poll(struct file *file, poll_table *pt)
+{
+	struct soc_camera_file *icf = file->private_data;
+	struct omap1_cam_buf *buf;
+
+	buf = list_entry(icf->vb_vidq.stream.next, struct omap1_cam_buf,
+			 vb.stream);
+
+	poll_wait(file, &buf->vb.done, pt);
+
+	if (buf->vb.state == VIDEOBUF_DONE ||
+	    buf->vb.state == VIDEOBUF_ERROR)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static struct soc_camera_host_ops omap1_host_ops = {
+	.owner		= THIS_MODULE,
+	.add		= omap1_cam_add_device,
+	.remove		= omap1_cam_remove_device,
+	.get_formats	= omap1_cam_get_formats,
+	.set_crop	= omap1_cam_set_crop,
+	.set_fmt	= omap1_cam_set_fmt,
+	.try_fmt	= omap1_cam_try_fmt,
+	.init_videobuf	= omap1_cam_init_videobuf,
+	.reqbufs	= omap1_cam_reqbufs,
+	.querycap	= omap1_cam_querycap,
+	.set_bus_param	= omap1_cam_set_bus_param,
+	.poll		= omap1_cam_poll,
+};
+
+static int __init omap1_cam_probe(struct platform_device *pdev)
+{
+	struct omap1_cam_dev *pcdev;
+	struct resource *res;
+	struct clk *clk;
+	void __iomem *base;
+	unsigned int irq;
+	int err = 0;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (!res || (int)irq <= 0) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	clk = clk_get(&pdev->dev, "armper_ck");
+	if (IS_ERR(clk)) {
+		err = PTR_ERR(clk);
+		goto exit;
+	}
+
+	pcdev = kzalloc(sizeof(*pcdev) + resource_size(res), GFP_KERNEL);
+	if (!pcdev) {
+		dev_err(&pdev->dev, "Could not allocate pcdev\n");
+		err = -ENOMEM;
+		goto exit_put_clk;
+	}
+
+	pcdev->res = res;
+	pcdev->clk = clk;
+
+	pcdev->pdata = pdev->dev.platform_data;
+	pcdev->pflags = pcdev->pdata->flags;
+
+	if (pcdev->pdata)
+		pcdev->camexclk = pcdev->pdata->camexclk_khz * 1000;
+
+	switch (pcdev->camexclk) {
+	case 6000000:
+	case 8000000:
+	case 9600000:
+	case 12000000:
+	case 24000000:
+		break;
+	default:
+		dev_warn(&pdev->dev,
+				"Incorrect sensor clock frequency %ld kHz, "
+				"should be one of 0, 6, 8, 9.6, 12 or 24 MHz, "
+				"please correct your platform data\n",
+				pcdev->pdata->camexclk_khz);
+		pcdev->camexclk = 0;
+	case 0:
+		dev_info(&pdev->dev,
+				"Not providing sensor clock\n");
+	}
+
+	INIT_LIST_HEAD(&pcdev->capture);
+	spin_lock_init(&pcdev->lock);
+
+	/*
+	 * Request the region.
+	 */
+	if (!request_mem_region(res->start, resource_size(res), DRIVER_NAME)) {
+		err = -EBUSY;
+		goto exit_kfree;
+	}
+
+	base = ioremap(res->start, resource_size(res));
+	if (!base) {
+		err = -ENOMEM;
+		goto exit_release;
+	}
+	pcdev->irq = irq;
+	pcdev->base = base;
+
+	sensor_reset(pcdev, true);
+
+	err = omap_request_dma(OMAP_DMA_CAMERA_IF_RX, DRIVER_NAME,
+			dma_isr, (void *)pcdev, &pcdev->dma_ch);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Can't request DMA for OMAP1 Camera\n");
+		err = -EBUSY;
+		goto exit_iounmap;
+	}
+	dev_dbg(&pdev->dev, "got DMA channel %d\n", pcdev->dma_ch);
+
+	/* preconfigure DMA */
+	omap_set_dma_src_params(pcdev->dma_ch, OMAP_DMA_PORT_TIPB,
+			OMAP_DMA_AMODE_CONSTANT, res->start + REG_CAMDATA,
+			0, 0);
+	omap_set_dma_dest_burst_mode(pcdev->dma_ch, OMAP_DMA_DATA_BURST_4);
+	/* setup DMA autoinitialization */
+	omap_dma_link_lch(pcdev->dma_ch, pcdev->dma_ch);
+
+	err = request_irq(pcdev->irq, cam_isr, 0, DRIVER_NAME, pcdev);
+	if (err) {
+		dev_err(&pdev->dev, "Camera interrupt register failed\n");
+		goto exit_free_dma;
+	}
+
+	pcdev->soc_host.drv_name	= DRIVER_NAME;
+	pcdev->soc_host.ops		= &omap1_host_ops;
+	pcdev->soc_host.priv		= pcdev;
+	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
+	pcdev->soc_host.nr		= pdev->id;
+
+	err = soc_camera_host_register(&pcdev->soc_host);
+	if (err)
+		goto exit_free_irq;
+
+	dev_info(&pdev->dev, "OMAP1 Camera Interface driver loaded\n");
+
+	return 0;
+
+exit_free_irq:
+	free_irq(pcdev->irq, pcdev);
+exit_free_dma:
+	omap_free_dma(pcdev->dma_ch);
+exit_iounmap:
+	iounmap(base);
+exit_release:
+	release_mem_region(res->start, resource_size(res));
+exit_kfree:
+	kfree(pcdev);
+exit_put_clk:
+	clk_put(clk);
+exit:
+	return err;
+}
+
+static int __exit omap1_cam_remove(struct platform_device *pdev)
+{
+	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
+	struct omap1_cam_dev *pcdev = container_of(soc_host,
+					struct omap1_cam_dev, soc_host);
+	struct resource *res;
+
+	free_irq(pcdev->irq, pcdev);
+
+	omap_free_dma(pcdev->dma_ch);
+
+	soc_camera_host_unregister(soc_host);
+
+	iounmap(pcdev->base);
+
+	res = pcdev->res;
+	release_mem_region(res->start, resource_size(res));
+
+	kfree(pcdev);
+
+	clk_put(pcdev->clk);
+
+	dev_info(&pdev->dev, "OMAP1 Camera Interface driver unloaded\n");
+
+	return 0;
+}
+
+static struct platform_driver omap1_cam_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+	.probe		= omap1_cam_probe,
+	.remove		= __exit_p(omap1_cam_remove),
+};
+
+static int __init omap1_cam_init(void)
+{
+	return platform_driver_register(&omap1_cam_driver);
+}
+module_init(omap1_cam_init);
+
+static void __exit omap1_cam_exit(void)
+{
+	platform_driver_unregister(&omap1_cam_driver);
+}
+module_exit(omap1_cam_exit);
+
+module_param(sg_mode, bool, 0644);
+MODULE_PARM_DESC(sg_mode, "videobuf mode, 0: dma-contig (default), 1: dma-sg");
+
+MODULE_DESCRIPTION("OMAP1 Camera Interface driver");
+MODULE_AUTHOR("Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/include/media/omap1_camera.h b/include/media/omap1_camera.h
new file mode 100644
index 000000000000..819767cf04d4
--- /dev/null
+++ b/include/media/omap1_camera.h
@@ -0,0 +1,35 @@
+/*
+ * Header for V4L2 SoC Camera driver for OMAP1 Camera Interface
+ *
+ * Copyright (C) 2010, Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MEDIA_OMAP1_CAMERA_H_
+#define __MEDIA_OMAP1_CAMERA_H_
+
+#include <linux/bitops.h>
+
+#define OMAP1_CAMERA_IOSIZE		0x1c
+
+enum omap1_cam_vb_mode {
+	OMAP1_CAM_DMA_CONTIG = 0,
+	OMAP1_CAM_DMA_SG,
+};
+
+#define OMAP1_CAMERA_MIN_BUF_COUNT(x)	((x) == OMAP1_CAM_DMA_CONTIG ? 3 : 2)
+
+struct omap1_cam_platform_data {
+	unsigned long	camexclk_khz;
+	unsigned long	lclk_khz_max;
+	unsigned long	flags;
+};
+
+#define OMAP1_CAMERA_LCLK_RISING	BIT(0)
+#define OMAP1_CAMERA_RST_LOW		BIT(1)
+#define OMAP1_CAMERA_RST_HIGH		BIT(2)
+
+#endif /* __MEDIA_OMAP1_CAMERA_H_ */
-- 
cgit v1.2.3


From 2f6e2404799ad610317157b73169c109788da0b0 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Tue, 5 Oct 2010 11:52:45 -0300
Subject: [media] SoC Camera: add driver for OV6650 sensor

This patch provides a V4L2 SoC Camera driver for OV6650 camera sensor, found
on OMAP1 SoC based Amstrad Delta videophone.

Since I have no experience with camera sensors, and the sensor documentation I
was able to find was not very comprehensive, I left most settings at their
default (reset) values, except for:
- those required for proper mediabus parameters and picture geometry and
  format setup,
- those used by controls.
Resulting picture quality may be far from perfect, but better than nothing.

In order to be able to get / set the sensor frame rate from userspace, I
decided to provide two not yet SoC camera supported operations, g_parm and
s_parm. These can be used after applying patch 4/6 from this series,
"SoC Camera: add support for g_parm / s_parm operations".

Created and tested against linux-2.6.36-rc5 on Amstrad Delta.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |    6 +
 drivers/media/video/Makefile    |    1 +
 drivers/media/video/ov6650.c    | 1225 +++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |    1 +
 4 files changed, 1233 insertions(+)
 create mode 100644 drivers/media/video/ov6650.c

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index b89d0fda40fc..31d8a2bc3203 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -767,6 +767,12 @@ config SOC_CAMERA_PLATFORM
 	help
 	  This is a generic SoC camera platform driver, useful for testing
 
+config SOC_CAMERA_OV6650
+	tristate "ov6650 sensor support"
+	depends on SOC_CAMERA && I2C
+	---help---
+	  This is a V4L2 SoC camera driver for the OmniVision OV6650 sensor
+
 config SOC_CAMERA_OV772X
 	tristate "ov772x camera support"
 	depends on SOC_CAMERA && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 557ad1f3aeb4..134f616f7a38 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_SOC_CAMERA_MT9M111)	+= mt9m111.o
 obj-$(CONFIG_SOC_CAMERA_MT9T031)	+= mt9t031.o
 obj-$(CONFIG_SOC_CAMERA_MT9T112)	+= mt9t112.o
 obj-$(CONFIG_SOC_CAMERA_MT9V022)	+= mt9v022.o
+obj-$(CONFIG_SOC_CAMERA_OV6650)		+= ov6650.o
 obj-$(CONFIG_SOC_CAMERA_OV772X)		+= ov772x.o
 obj-$(CONFIG_SOC_CAMERA_OV9640)		+= ov9640.o
 obj-$(CONFIG_SOC_CAMERA_RJ54N1)		+= rj54n1cb0c.o
diff --git a/drivers/media/video/ov6650.c b/drivers/media/video/ov6650.c
new file mode 100644
index 000000000000..b7cfeab0948c
--- /dev/null
+++ b/drivers/media/video/ov6650.c
@@ -0,0 +1,1225 @@
+/*
+ * V4L2 SoC Camera driver for OmniVision OV6650 Camera Sensor
+ *
+ * Copyright (C) 2010 Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
+ *
+ * Based on OmniVision OV96xx Camera Driver
+ * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * Based on ov772x camera driver:
+ * Copyright (C) 2008 Renesas Solutions Corp.
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on ov7670 and soc_camera_platform driver,
+ * Copyright 2006-7 Jonathan Corbet <corbet@lwn.net>
+ * Copyright (C) 2008 Magnus Damm
+ * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
+ *
+ * Hardware specific bits initialy based on former work by Matt Callow
+ * drivers/media/video/omap/sensor_ov6650.c
+ * Copyright (C) 2006 Matt Callow
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+
+#include <media/soc_camera.h>
+#include <media/v4l2-chip-ident.h>
+
+
+/* Register definitions */
+#define REG_GAIN		0x00	/* range 00 - 3F */
+#define REG_BLUE		0x01
+#define REG_RED			0x02
+#define REG_SAT			0x03	/* [7:4] saturation [0:3] reserved */
+#define REG_HUE			0x04	/* [7:6] rsrvd [5] hue en [4:0] hue */
+
+#define REG_BRT			0x06
+
+#define REG_PIDH		0x0a
+#define REG_PIDL		0x0b
+
+#define REG_AECH		0x10
+#define REG_CLKRC		0x11	/* Data Format and Internal Clock */
+					/* [7:6] Input system clock (MHz)*/
+					/*   00=8, 01=12, 10=16, 11=24 */
+					/* [5:0]: Internal Clock Pre-Scaler */
+#define REG_COMA		0x12	/* [7] Reset */
+#define REG_COMB		0x13
+#define REG_COMC		0x14
+#define REG_COMD		0x15
+#define REG_COML		0x16
+#define REG_HSTRT		0x17
+#define REG_HSTOP		0x18
+#define REG_VSTRT		0x19
+#define REG_VSTOP		0x1a
+#define REG_PSHFT		0x1b
+#define REG_MIDH		0x1c
+#define REG_MIDL		0x1d
+#define REG_HSYNS		0x1e
+#define REG_HSYNE		0x1f
+#define REG_COME		0x20
+#define REG_YOFF		0x21
+#define REG_UOFF		0x22
+#define REG_VOFF		0x23
+#define REG_AEW			0x24
+#define REG_AEB			0x25
+#define REG_COMF		0x26
+#define REG_COMG		0x27
+#define REG_COMH		0x28
+#define REG_COMI		0x29
+
+#define REG_FRARL		0x2b
+#define REG_COMJ		0x2c
+#define REG_COMK		0x2d
+#define REG_AVGY		0x2e
+#define REG_REF0		0x2f
+#define REG_REF1		0x30
+#define REG_REF2		0x31
+#define REG_FRAJH		0x32
+#define REG_FRAJL		0x33
+#define REG_FACT		0x34
+#define REG_L1AEC		0x35
+#define REG_AVGU		0x36
+#define REG_AVGV		0x37
+
+#define REG_SPCB		0x60
+#define REG_SPCC		0x61
+#define REG_GAM1		0x62
+#define REG_GAM2		0x63
+#define REG_GAM3		0x64
+#define REG_SPCD		0x65
+
+#define REG_SPCE		0x68
+#define REG_ADCL		0x69
+
+#define REG_RMCO		0x6c
+#define REG_GMCO		0x6d
+#define REG_BMCO		0x6e
+
+
+/* Register bits, values, etc. */
+#define OV6650_PIDH		0x66	/* high byte of product ID number */
+#define OV6650_PIDL		0x50	/* low byte of product ID number */
+#define OV6650_MIDH		0x7F	/* high byte of mfg ID */
+#define OV6650_MIDL		0xA2	/* low byte of mfg ID */
+
+#define DEF_GAIN		0x00
+#define DEF_BLUE		0x80
+#define DEF_RED			0x80
+
+#define SAT_SHIFT		4
+#define SAT_MASK		(0xf << SAT_SHIFT)
+#define SET_SAT(x)		(((x) << SAT_SHIFT) & SAT_MASK)
+
+#define HUE_EN			BIT(5)
+#define HUE_MASK		0x1f
+#define DEF_HUE			0x10
+#define SET_HUE(x)		(HUE_EN | ((x) & HUE_MASK))
+
+#define DEF_AECH		0x4D
+
+#define CLKRC_6MHz		0x00
+#define CLKRC_12MHz		0x40
+#define CLKRC_16MHz		0x80
+#define CLKRC_24MHz		0xc0
+#define CLKRC_DIV_MASK		0x3f
+#define GET_CLKRC_DIV(x)	(((x) & CLKRC_DIV_MASK) + 1)
+
+#define COMA_RESET		BIT(7)
+#define COMA_QCIF		BIT(5)
+#define COMA_RAW_RGB		BIT(4)
+#define COMA_RGB		BIT(3)
+#define COMA_BW			BIT(2)
+#define COMA_WORD_SWAP		BIT(1)
+#define COMA_BYTE_SWAP		BIT(0)
+#define DEF_COMA		0x00
+
+#define COMB_FLIP_V		BIT(7)
+#define COMB_FLIP_H		BIT(5)
+#define COMB_BAND_FILTER	BIT(4)
+#define COMB_AWB		BIT(2)
+#define COMB_AGC		BIT(1)
+#define COMB_AEC		BIT(0)
+#define DEF_COMB		0x5f
+
+#define COML_ONE_CHANNEL	BIT(7)
+
+#define DEF_HSTRT		0x24
+#define DEF_HSTOP		0xd4
+#define DEF_VSTRT		0x04
+#define DEF_VSTOP		0x94
+
+#define COMF_HREF_LOW		BIT(4)
+
+#define COMJ_PCLK_RISING	BIT(4)
+#define COMJ_VSYNC_HIGH		BIT(0)
+
+/* supported resolutions */
+#define W_QCIF			(DEF_HSTOP - DEF_HSTRT)
+#define W_CIF			(W_QCIF << 1)
+#define H_QCIF			(DEF_VSTOP - DEF_VSTRT)
+#define H_CIF			(H_QCIF << 1)
+
+#define FRAME_RATE_MAX		30
+
+
+struct ov6650_reg {
+	u8	reg;
+	u8	val;
+};
+
+struct ov6650 {
+	struct v4l2_subdev	subdev;
+
+	int			gain;
+	int			blue;
+	int			red;
+	int			saturation;
+	int			hue;
+	int			brightness;
+	int			exposure;
+	int			gamma;
+	int			aec;
+	bool			vflip;
+	bool			hflip;
+	bool			awb;
+	bool			agc;
+	bool			half_scale;	/* scale down output by 2 */
+	struct v4l2_rect	rect;		/* sensor cropping window */
+	unsigned long		pclk_limit;	/* from host */
+	unsigned long		pclk_max;	/* from resolution and format */
+	struct v4l2_fract	tpf;		/* as requested with s_parm */
+	enum v4l2_mbus_pixelcode code;
+	enum v4l2_colorspace	colorspace;
+};
+
+
+static enum v4l2_mbus_pixelcode ov6650_codes[] = {
+	V4L2_MBUS_FMT_YUYV8_2X8,
+	V4L2_MBUS_FMT_UYVY8_2X8,
+	V4L2_MBUS_FMT_YVYU8_2X8,
+	V4L2_MBUS_FMT_VYUY8_2X8,
+	V4L2_MBUS_FMT_SBGGR8_1X8,
+	V4L2_MBUS_FMT_GREY8_1X8,
+};
+
+static const struct v4l2_queryctrl ov6650_controls[] = {
+	{
+		.id		= V4L2_CID_AUTOGAIN,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "AGC",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 1,
+	},
+	{
+		.id		= V4L2_CID_GAIN,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Gain",
+		.minimum	= 0,
+		.maximum	= 0x3f,
+		.step		= 1,
+		.default_value	= DEF_GAIN,
+	},
+	{
+		.id		= V4L2_CID_AUTO_WHITE_BALANCE,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "AWB",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 1,
+	},
+	{
+		.id		= V4L2_CID_BLUE_BALANCE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Blue",
+		.minimum	= 0,
+		.maximum	= 0xff,
+		.step		= 1,
+		.default_value	= DEF_BLUE,
+	},
+	{
+		.id		= V4L2_CID_RED_BALANCE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Red",
+		.minimum	= 0,
+		.maximum	= 0xff,
+		.step		= 1,
+		.default_value	= DEF_RED,
+	},
+	{
+		.id		= V4L2_CID_SATURATION,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Saturation",
+		.minimum	= 0,
+		.maximum	= 0xf,
+		.step		= 1,
+		.default_value	= 0x8,
+	},
+	{
+		.id		= V4L2_CID_HUE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Hue",
+		.minimum	= 0,
+		.maximum	= HUE_MASK,
+		.step		= 1,
+		.default_value	= DEF_HUE,
+	},
+	{
+		.id		= V4L2_CID_BRIGHTNESS,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Brightness",
+		.minimum	= 0,
+		.maximum	= 0xff,
+		.step		= 1,
+		.default_value	= 0x80,
+	},
+	{
+		.id		= V4L2_CID_EXPOSURE_AUTO,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "AEC",
+		.minimum	= 0,
+		.maximum	= 3,
+		.step		= 1,
+		.default_value	= 0,
+	},
+	{
+		.id		= V4L2_CID_EXPOSURE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Exposure",
+		.minimum	= 0,
+		.maximum	= 0xff,
+		.step		= 1,
+		.default_value	= DEF_AECH,
+	},
+	{
+		.id		= V4L2_CID_GAMMA,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Gamma",
+		.minimum	= 0,
+		.maximum	= 0xff,
+		.step		= 1,
+		.default_value	= 0x12,
+	},
+	{
+		.id		= V4L2_CID_VFLIP,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "Flip Vertically",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 0,
+	},
+	{
+		.id		= V4L2_CID_HFLIP,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "Flip Horizontally",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 0,
+	},
+};
+
+/* read a register */
+static int ov6650_reg_read(struct i2c_client *client, u8 reg, u8 *val)
+{
+	int ret;
+	u8 data = reg;
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= 1,
+		.buf	= &data,
+	};
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret < 0)
+		goto err;
+
+	msg.flags = I2C_M_RD;
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret < 0)
+		goto err;
+
+	*val = data;
+	return 0;
+
+err:
+	dev_err(&client->dev, "Failed reading register 0x%02x!\n", reg);
+	return ret;
+}
+
+/* write a register */
+static int ov6650_reg_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret;
+	unsigned char data[2] = { reg, val };
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= 2,
+		.buf	= data,
+	};
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	udelay(100);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed writing register 0x%02x!\n", reg);
+		return ret;
+	}
+	return 0;
+}
+
+
+/* Read a register, alter its bits, write it back */
+static int ov6650_reg_rmw(struct i2c_client *client, u8 reg, u8 set, u8 mask)
+{
+	u8 val;
+	int ret;
+
+	ret = ov6650_reg_read(client, reg, &val);
+	if (ret) {
+		dev_err(&client->dev,
+			"[Read]-Modify-Write of register 0x%02x failed!\n",
+			reg);
+		return ret;
+	}
+
+	val &= ~mask;
+	val |= set;
+
+	ret = ov6650_reg_write(client, reg, val);
+	if (ret)
+		dev_err(&client->dev,
+			"Read-Modify-[Write] of register 0x%02x failed!\n",
+			reg);
+
+	return ret;
+}
+
+static struct ov6650 *to_ov6650(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct ov6650, subdev);
+}
+
+/* Start/Stop streaming from the device */
+static int ov6650_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	return 0;
+}
+
+/* Alter bus settings on camera side */
+static int ov6650_set_bus_param(struct soc_camera_device *icd,
+				unsigned long flags)
+{
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	int ret;
+
+	flags = soc_camera_apply_sensor_flags(icl, flags);
+
+	if (flags & SOCAM_PCLK_SAMPLE_RISING)
+		ret = ov6650_reg_rmw(client, REG_COMJ, COMJ_PCLK_RISING, 0);
+	else
+		ret = ov6650_reg_rmw(client, REG_COMJ, 0, COMJ_PCLK_RISING);
+	if (ret)
+		return ret;
+
+	if (flags & SOCAM_HSYNC_ACTIVE_LOW)
+		ret = ov6650_reg_rmw(client, REG_COMF, COMF_HREF_LOW, 0);
+	else
+		ret = ov6650_reg_rmw(client, REG_COMF, 0, COMF_HREF_LOW);
+	if (ret)
+		return ret;
+
+	if (flags & SOCAM_VSYNC_ACTIVE_HIGH)
+		ret = ov6650_reg_rmw(client, REG_COMJ, COMJ_VSYNC_HIGH, 0);
+	else
+		ret = ov6650_reg_rmw(client, REG_COMJ, 0, COMJ_VSYNC_HIGH);
+
+	return ret;
+}
+
+/* Request bus settings on camera side */
+static unsigned long ov6650_query_bus_param(struct soc_camera_device *icd)
+{
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+
+	unsigned long flags = SOCAM_MASTER |
+		SOCAM_PCLK_SAMPLE_RISING | SOCAM_PCLK_SAMPLE_FALLING |
+		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_LOW |
+		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_LOW |
+		SOCAM_DATA_ACTIVE_HIGH | SOCAM_DATAWIDTH_8;
+
+	return soc_camera_apply_sensor_flags(icl, flags);
+}
+
+/* Get status of additional camera capabilities */
+static int ov6650_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+	uint8_t reg;
+	int ret = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUTOGAIN:
+		ctrl->value = priv->agc;
+		break;
+	case V4L2_CID_GAIN:
+		if (priv->agc) {
+			ret = ov6650_reg_read(client, REG_GAIN, &reg);
+			ctrl->value = reg;
+		} else {
+			ctrl->value = priv->gain;
+		}
+		break;
+	case V4L2_CID_AUTO_WHITE_BALANCE:
+		ctrl->value = priv->awb;
+		break;
+	case V4L2_CID_BLUE_BALANCE:
+		if (priv->awb) {
+			ret = ov6650_reg_read(client, REG_BLUE, &reg);
+			ctrl->value = reg;
+		} else {
+			ctrl->value = priv->blue;
+		}
+		break;
+	case V4L2_CID_RED_BALANCE:
+		if (priv->awb) {
+			ret = ov6650_reg_read(client, REG_RED, &reg);
+			ctrl->value = reg;
+		} else {
+			ctrl->value = priv->red;
+		}
+		break;
+	case V4L2_CID_SATURATION:
+		ctrl->value = priv->saturation;
+		break;
+	case V4L2_CID_HUE:
+		ctrl->value = priv->hue;
+		break;
+	case V4L2_CID_BRIGHTNESS:
+		ctrl->value = priv->brightness;
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		ctrl->value = priv->aec;
+		break;
+	case V4L2_CID_EXPOSURE:
+		if (priv->aec) {
+			ret = ov6650_reg_read(client, REG_AECH, &reg);
+			ctrl->value = reg;
+		} else {
+			ctrl->value = priv->exposure;
+		}
+		break;
+	case V4L2_CID_GAMMA:
+		ctrl->value = priv->gamma;
+		break;
+	case V4L2_CID_VFLIP:
+		ctrl->value = priv->vflip;
+		break;
+	case V4L2_CID_HFLIP:
+		ctrl->value = priv->hflip;
+		break;
+	}
+	return ret;
+}
+
+/* Set status of additional camera capabilities */
+static int ov6650_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+	int ret = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUTOGAIN:
+		ret = ov6650_reg_rmw(client, REG_COMB,
+				ctrl->value ? COMB_AGC : 0, COMB_AGC);
+		if (!ret)
+			priv->agc = ctrl->value;
+		break;
+	case V4L2_CID_GAIN:
+		ret = ov6650_reg_write(client, REG_GAIN, ctrl->value);
+		if (!ret)
+			priv->gain = ctrl->value;
+		break;
+	case V4L2_CID_AUTO_WHITE_BALANCE:
+		ret = ov6650_reg_rmw(client, REG_COMB,
+				ctrl->value ? COMB_AWB : 0, COMB_AWB);
+		if (!ret)
+			priv->awb = ctrl->value;
+		break;
+	case V4L2_CID_BLUE_BALANCE:
+		ret = ov6650_reg_write(client, REG_BLUE, ctrl->value);
+		if (!ret)
+			priv->blue = ctrl->value;
+		break;
+	case V4L2_CID_RED_BALANCE:
+		ret = ov6650_reg_write(client, REG_RED, ctrl->value);
+		if (!ret)
+			priv->red = ctrl->value;
+		break;
+	case V4L2_CID_SATURATION:
+		ret = ov6650_reg_rmw(client, REG_SAT, SET_SAT(ctrl->value),
+				SAT_MASK);
+		if (!ret)
+			priv->saturation = ctrl->value;
+		break;
+	case V4L2_CID_HUE:
+		ret = ov6650_reg_rmw(client, REG_HUE, SET_HUE(ctrl->value),
+				HUE_MASK);
+		if (!ret)
+			priv->hue = ctrl->value;
+		break;
+	case V4L2_CID_BRIGHTNESS:
+		ret = ov6650_reg_write(client, REG_BRT, ctrl->value);
+		if (!ret)
+			priv->brightness = ctrl->value;
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		switch (ctrl->value) {
+		case V4L2_EXPOSURE_AUTO:
+			ret = ov6650_reg_rmw(client, REG_COMB, COMB_AEC, 0);
+			break;
+		default:
+			ret = ov6650_reg_rmw(client, REG_COMB, 0, COMB_AEC);
+			break;
+		}
+		if (!ret)
+			priv->aec = ctrl->value;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ret = ov6650_reg_write(client, REG_AECH, ctrl->value);
+		if (!ret)
+			priv->exposure = ctrl->value;
+		break;
+	case V4L2_CID_GAMMA:
+		ret = ov6650_reg_write(client, REG_GAM1, ctrl->value);
+		if (!ret)
+			priv->gamma = ctrl->value;
+		break;
+	case V4L2_CID_VFLIP:
+		ret = ov6650_reg_rmw(client, REG_COMB,
+				ctrl->value ? COMB_FLIP_V : 0, COMB_FLIP_V);
+		if (!ret)
+			priv->vflip = ctrl->value;
+		break;
+	case V4L2_CID_HFLIP:
+		ret = ov6650_reg_rmw(client, REG_COMB,
+				ctrl->value ? COMB_FLIP_H : 0, COMB_FLIP_H);
+		if (!ret)
+			priv->hflip = ctrl->value;
+		break;
+	}
+
+	return ret;
+}
+
+/* Get chip identification */
+static int ov6650_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
+{
+	id->ident	= V4L2_IDENT_OV6650;
+	id->revision	= 0;
+
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int ov6650_get_register(struct v4l2_subdev *sd,
+				struct v4l2_dbg_register *reg)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	int ret;
+	u8 val;
+
+	if (reg->reg & ~0xff)
+		return -EINVAL;
+
+	reg->size = 1;
+
+	ret = ov6650_reg_read(client, reg->reg, &val);
+	if (!ret)
+		reg->val = (__u64)val;
+
+	return ret;
+}
+
+static int ov6650_set_register(struct v4l2_subdev *sd,
+				struct v4l2_dbg_register *reg)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	if (reg->reg & ~0xff || reg->val & ~0xff)
+		return -EINVAL;
+
+	return ov6650_reg_write(client, reg->reg, reg->val);
+}
+#endif
+
+static int ov6650_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+
+	a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->c = priv->rect;
+
+	return 0;
+}
+
+static int ov6650_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+	struct v4l2_rect *rect = &a->c;
+	int ret;
+
+	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	rect->left   = ALIGN(rect->left,   2);
+	rect->width  = ALIGN(rect->width,  2);
+	rect->top    = ALIGN(rect->top,    2);
+	rect->height = ALIGN(rect->height, 2);
+	soc_camera_limit_side(&rect->left, &rect->width,
+			DEF_HSTRT << 1, 2, W_CIF);
+	soc_camera_limit_side(&rect->top, &rect->height,
+			DEF_VSTRT << 1, 2, H_CIF);
+
+	ret = ov6650_reg_write(client, REG_HSTRT, rect->left >> 1);
+	if (!ret) {
+		priv->rect.left = rect->left;
+		ret = ov6650_reg_write(client, REG_HSTOP,
+				(rect->left + rect->width) >> 1);
+	}
+	if (!ret) {
+		priv->rect.width = rect->width;
+		ret = ov6650_reg_write(client, REG_VSTRT, rect->top >> 1);
+	}
+	if (!ret) {
+		priv->rect.top = rect->top;
+		ret = ov6650_reg_write(client, REG_VSTOP,
+				(rect->top + rect->height) >> 1);
+	}
+	if (!ret)
+		priv->rect.height = rect->height;
+
+	return ret;
+}
+
+static int ov6650_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	a->bounds.left			= DEF_HSTRT << 1;
+	a->bounds.top			= DEF_VSTRT << 1;
+	a->bounds.width			= W_CIF;
+	a->bounds.height		= H_CIF;
+	a->defrect			= a->bounds;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int ov6650_g_fmt(struct v4l2_subdev *sd,
+			 struct v4l2_mbus_framefmt *mf)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+
+	mf->width	= priv->rect.width >> priv->half_scale;
+	mf->height	= priv->rect.height >> priv->half_scale;
+	mf->code	= priv->code;
+	mf->colorspace	= priv->colorspace;
+	mf->field	= V4L2_FIELD_NONE;
+
+	return 0;
+}
+
+static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect)
+{
+	return (width > rect->width >> 1 || height > rect->height >> 1);
+}
+
+static u8 to_clkrc(struct v4l2_fract *timeperframe,
+		unsigned long pclk_limit, unsigned long pclk_max)
+{
+	unsigned long pclk;
+
+	if (timeperframe->numerator && timeperframe->denominator)
+		pclk = pclk_max * timeperframe->denominator /
+				(FRAME_RATE_MAX * timeperframe->numerator);
+	else
+		pclk = pclk_max;
+
+	if (pclk_limit && pclk_limit < pclk)
+		pclk = pclk_limit;
+
+	return (pclk_max - 1) / pclk;
+}
+
+/* set the format we will capture in */
+static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct soc_camera_device *icd = client->dev.platform_data;
+	struct soc_camera_sense *sense = icd->sense;
+	struct ov6650 *priv = to_ov6650(client);
+	bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect);
+	struct v4l2_crop a = {
+		.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+		.c = {
+			.left	= priv->rect.left + (priv->rect.width >> 1) -
+					(mf->width >> (1 - half_scale)),
+			.top	= priv->rect.top + (priv->rect.height >> 1) -
+					(mf->height >> (1 - half_scale)),
+			.width	= mf->width << half_scale,
+			.height	= mf->height << half_scale,
+		},
+	};
+	enum v4l2_mbus_pixelcode code = mf->code;
+	unsigned long mclk, pclk;
+	u8 coma_set = 0, coma_mask = 0, coml_set, coml_mask, clkrc;
+	int ret;
+
+	/* select color matrix configuration for given color encoding */
+	switch (code) {
+	case V4L2_MBUS_FMT_GREY8_1X8:
+		dev_dbg(&client->dev, "pixel format GREY8_1X8\n");
+		coma_mask |= COMA_RGB | COMA_WORD_SWAP | COMA_BYTE_SWAP;
+		coma_set |= COMA_BW;
+		break;
+	case V4L2_MBUS_FMT_YUYV8_2X8:
+		dev_dbg(&client->dev, "pixel format YUYV8_2X8_LE\n");
+		coma_mask |= COMA_RGB | COMA_BW | COMA_BYTE_SWAP;
+		coma_set |= COMA_WORD_SWAP;
+		break;
+	case V4L2_MBUS_FMT_YVYU8_2X8:
+		dev_dbg(&client->dev, "pixel format YVYU8_2X8_LE (untested)\n");
+		coma_mask |= COMA_RGB | COMA_BW | COMA_WORD_SWAP |
+				COMA_BYTE_SWAP;
+		break;
+	case V4L2_MBUS_FMT_UYVY8_2X8:
+		dev_dbg(&client->dev, "pixel format YUYV8_2X8_BE\n");
+		if (half_scale) {
+			coma_mask |= COMA_RGB | COMA_BW | COMA_WORD_SWAP;
+			coma_set |= COMA_BYTE_SWAP;
+		} else {
+			coma_mask |= COMA_RGB | COMA_BW;
+			coma_set |= COMA_BYTE_SWAP | COMA_WORD_SWAP;
+		}
+		break;
+	case V4L2_MBUS_FMT_VYUY8_2X8:
+		dev_dbg(&client->dev, "pixel format YVYU8_2X8_BE (untested)\n");
+		if (half_scale) {
+			coma_mask |= COMA_RGB | COMA_BW;
+			coma_set |= COMA_BYTE_SWAP | COMA_WORD_SWAP;
+		} else {
+			coma_mask |= COMA_RGB | COMA_BW | COMA_WORD_SWAP;
+			coma_set |= COMA_BYTE_SWAP;
+		}
+		break;
+	case V4L2_MBUS_FMT_SBGGR8_1X8:
+		dev_dbg(&client->dev, "pixel format SBGGR8_1X8 (untested)\n");
+		coma_mask |= COMA_BW | COMA_BYTE_SWAP | COMA_WORD_SWAP;
+		coma_set |= COMA_RAW_RGB | COMA_RGB;
+		break;
+	case 0:
+		break;
+	default:
+		dev_err(&client->dev, "Pixel format not handled: 0x%x\n", code);
+		return -EINVAL;
+	}
+	priv->code = code;
+
+	if (code == V4L2_MBUS_FMT_GREY8_1X8 ||
+			code == V4L2_MBUS_FMT_SBGGR8_1X8) {
+		coml_mask = COML_ONE_CHANNEL;
+		coml_set = 0;
+		priv->pclk_max = 4000000;
+	} else {
+		coml_mask = 0;
+		coml_set = COML_ONE_CHANNEL;
+		priv->pclk_max = 8000000;
+	}
+
+	if (code == V4L2_MBUS_FMT_SBGGR8_1X8)
+		priv->colorspace = V4L2_COLORSPACE_SRGB;
+	else if (code != 0)
+		priv->colorspace = V4L2_COLORSPACE_JPEG;
+
+	if (half_scale) {
+		dev_dbg(&client->dev, "max resolution: QCIF\n");
+		coma_set |= COMA_QCIF;
+		priv->pclk_max /= 2;
+	} else {
+		dev_dbg(&client->dev, "max resolution: CIF\n");
+		coma_mask |= COMA_QCIF;
+	}
+	priv->half_scale = half_scale;
+
+	if (sense) {
+		if (sense->master_clock == 8000000) {
+			dev_dbg(&client->dev, "8MHz input clock\n");
+			clkrc = CLKRC_6MHz;
+		} else if (sense->master_clock == 12000000) {
+			dev_dbg(&client->dev, "12MHz input clock\n");
+			clkrc = CLKRC_12MHz;
+		} else if (sense->master_clock == 16000000) {
+			dev_dbg(&client->dev, "16MHz input clock\n");
+			clkrc = CLKRC_16MHz;
+		} else if (sense->master_clock == 24000000) {
+			dev_dbg(&client->dev, "24MHz input clock\n");
+			clkrc = CLKRC_24MHz;
+		} else {
+			dev_err(&client->dev,
+				"unspported input clock, check platform data\n");
+			return -EINVAL;
+		}
+		mclk = sense->master_clock;
+		priv->pclk_limit = sense->pixel_clock_max;
+	} else {
+		clkrc = CLKRC_24MHz;
+		mclk = 24000000;
+		priv->pclk_limit = 0;
+		dev_dbg(&client->dev, "using default 24MHz input clock\n");
+	}
+
+	clkrc |= to_clkrc(&priv->tpf, priv->pclk_limit, priv->pclk_max);
+
+	pclk = priv->pclk_max / GET_CLKRC_DIV(clkrc);
+	dev_dbg(&client->dev, "pixel clock divider: %ld.%ld\n",
+			mclk / pclk, 10 * mclk % pclk / pclk);
+
+	ret = ov6650_s_crop(sd, &a);
+	if (!ret)
+		ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask);
+	if (!ret)
+		ret = ov6650_reg_write(client, REG_CLKRC, clkrc);
+	if (!ret)
+		ret = ov6650_reg_rmw(client, REG_COML, coml_set, coml_mask);
+
+	if (!ret) {
+		mf->colorspace	= priv->colorspace;
+		mf->width = priv->rect.width >> half_scale;
+		mf->height = priv->rect.height >> half_scale;
+	}
+
+	return ret;
+}
+
+static int ov6650_try_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_mbus_framefmt *mf)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+
+	if (is_unscaled_ok(mf->width, mf->height, &priv->rect))
+		v4l_bound_align_image(&mf->width, 2, W_CIF, 1,
+				&mf->height, 2, H_CIF, 1, 0);
+
+	mf->field = V4L2_FIELD_NONE;
+
+	switch (mf->code) {
+	case V4L2_MBUS_FMT_Y10_1X10:
+		mf->code = V4L2_MBUS_FMT_GREY8_1X8;
+	case V4L2_MBUS_FMT_GREY8_1X8:
+	case V4L2_MBUS_FMT_YVYU8_2X8:
+	case V4L2_MBUS_FMT_YUYV8_2X8:
+	case V4L2_MBUS_FMT_VYUY8_2X8:
+	case V4L2_MBUS_FMT_UYVY8_2X8:
+		mf->colorspace = V4L2_COLORSPACE_JPEG;
+		break;
+	default:
+		mf->code = V4L2_MBUS_FMT_SBGGR8_1X8;
+	case V4L2_MBUS_FMT_SBGGR8_1X8:
+		mf->colorspace = V4L2_COLORSPACE_SRGB;
+		break;
+	}
+
+	return 0;
+}
+
+static int ov6650_enum_fmt(struct v4l2_subdev *sd, unsigned int index,
+			   enum v4l2_mbus_pixelcode *code)
+{
+	if (index >= ARRAY_SIZE(ov6650_codes))
+		return -EINVAL;
+
+	*code = ov6650_codes[index];
+	return 0;
+}
+
+static int ov6650_g_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *parms)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+	struct v4l2_captureparm *cp = &parms->parm.capture;
+
+	if (parms->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	memset(cp, 0, sizeof(*cp));
+	cp->capability = V4L2_CAP_TIMEPERFRAME;
+	cp->timeperframe.numerator = GET_CLKRC_DIV(to_clkrc(&priv->tpf,
+			priv->pclk_limit, priv->pclk_max));
+	cp->timeperframe.denominator = FRAME_RATE_MAX;
+
+	dev_dbg(&client->dev, "Frame interval: %u/%u s\n",
+		cp->timeperframe.numerator, cp->timeperframe.denominator);
+
+	return 0;
+}
+
+static int ov6650_s_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *parms)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct ov6650 *priv = to_ov6650(client);
+	struct v4l2_captureparm *cp = &parms->parm.capture;
+	struct v4l2_fract *tpf = &cp->timeperframe;
+	int div, ret;
+	u8 clkrc;
+
+	if (parms->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	if (cp->extendedmode != 0)
+		return -EINVAL;
+
+	if (tpf->numerator == 0 || tpf->denominator == 0)
+		div = 1;  /* Reset to full rate */
+	else
+		div = (tpf->numerator * FRAME_RATE_MAX) / tpf->denominator;
+
+	if (div == 0)
+		div = 1;
+	else if (div > GET_CLKRC_DIV(CLKRC_DIV_MASK))
+		div = GET_CLKRC_DIV(CLKRC_DIV_MASK);
+
+	/*
+	 * Keep result to be used as tpf limit
+	 * for subseqent clock divider calculations
+	 */
+	priv->tpf.numerator = div;
+	priv->tpf.denominator = FRAME_RATE_MAX;
+
+	clkrc = to_clkrc(&priv->tpf, priv->pclk_limit, priv->pclk_max);
+
+	ret = ov6650_reg_rmw(client, REG_CLKRC, clkrc, CLKRC_DIV_MASK);
+	if (!ret) {
+		tpf->numerator = GET_CLKRC_DIV(clkrc);
+		tpf->denominator = FRAME_RATE_MAX;
+	}
+
+	return ret;
+}
+
+/* Soft reset the camera. This has nothing to do with the RESET pin! */
+static int ov6650_reset(struct i2c_client *client)
+{
+	int ret;
+
+	dev_dbg(&client->dev, "reset\n");
+
+	ret = ov6650_reg_rmw(client, REG_COMA, COMA_RESET, 0);
+	if (ret)
+		dev_err(&client->dev,
+			"An error occured while entering soft reset!\n");
+
+	return ret;
+}
+
+/* program default register values */
+static int ov6650_prog_dflt(struct i2c_client *client)
+{
+	int ret;
+
+	dev_dbg(&client->dev, "initializing\n");
+
+	ret = ov6650_reg_write(client, REG_COMA, 0);	/* ~COMA_RESET */
+	if (!ret)
+		ret = ov6650_reg_rmw(client, REG_COMB, 0, COMB_BAND_FILTER);
+
+	return ret;
+}
+
+static int ov6650_video_probe(struct soc_camera_device *icd,
+				struct i2c_client *client)
+{
+	u8		pidh, pidl, midh, midl;
+	int		ret = 0;
+
+	/*
+	 * check and show product ID and manufacturer ID
+	 */
+	ret = ov6650_reg_read(client, REG_PIDH, &pidh);
+	if (!ret)
+		ret = ov6650_reg_read(client, REG_PIDL, &pidl);
+	if (!ret)
+		ret = ov6650_reg_read(client, REG_MIDH, &midh);
+	if (!ret)
+		ret = ov6650_reg_read(client, REG_MIDL, &midl);
+
+	if (ret)
+		return ret;
+
+	if ((pidh != OV6650_PIDH) || (pidl != OV6650_PIDL)) {
+		dev_err(&client->dev, "Product ID error 0x%02x:0x%02x\n",
+				pidh, pidl);
+		return -ENODEV;
+	}
+
+	dev_info(&client->dev,
+		"ov6650 Product ID 0x%02x:0x%02x Manufacturer ID 0x%02x:0x%02x\n",
+		pidh, pidl, midh, midl);
+
+	ret = ov6650_reset(client);
+	if (!ret)
+		ret = ov6650_prog_dflt(client);
+
+	return ret;
+}
+
+static struct soc_camera_ops ov6650_ops = {
+	.set_bus_param		= ov6650_set_bus_param,
+	.query_bus_param	= ov6650_query_bus_param,
+	.controls		= ov6650_controls,
+	.num_controls		= ARRAY_SIZE(ov6650_controls),
+};
+
+static struct v4l2_subdev_core_ops ov6650_core_ops = {
+	.g_ctrl			= ov6650_g_ctrl,
+	.s_ctrl			= ov6650_s_ctrl,
+	.g_chip_ident		= ov6650_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register		= ov6650_get_register,
+	.s_register		= ov6650_set_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops ov6650_video_ops = {
+	.s_stream	= ov6650_s_stream,
+	.g_mbus_fmt	= ov6650_g_fmt,
+	.s_mbus_fmt	= ov6650_s_fmt,
+	.try_mbus_fmt	= ov6650_try_fmt,
+	.enum_mbus_fmt	= ov6650_enum_fmt,
+	.cropcap	= ov6650_cropcap,
+	.g_crop		= ov6650_g_crop,
+	.s_crop		= ov6650_s_crop,
+	.g_parm		= ov6650_g_parm,
+	.s_parm		= ov6650_s_parm,
+};
+
+static struct v4l2_subdev_ops ov6650_subdev_ops = {
+	.core	= &ov6650_core_ops,
+	.video	= &ov6650_video_ops,
+};
+
+/*
+ * i2c_driver function
+ */
+static int ov6650_probe(struct i2c_client *client,
+			const struct i2c_device_id *did)
+{
+	struct ov6650 *priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
+	struct soc_camera_link *icl;
+	int ret;
+
+	if (!icd) {
+		dev_err(&client->dev, "Missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
+	if (!icl) {
+		dev_err(&client->dev, "Missing platform_data for driver\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&client->dev,
+			"Failed to allocate memory for private data!\n");
+		return -ENOMEM;
+	}
+
+	v4l2_i2c_subdev_init(&priv->subdev, client, &ov6650_subdev_ops);
+
+	icd->ops = &ov6650_ops;
+
+	priv->rect.left	  = DEF_HSTRT << 1;
+	priv->rect.top	  = DEF_VSTRT << 1;
+	priv->rect.width  = W_CIF;
+	priv->rect.height = H_CIF;
+	priv->half_scale  = false;
+	priv->code	  = V4L2_MBUS_FMT_YUYV8_2X8;
+	priv->colorspace  = V4L2_COLORSPACE_JPEG;
+
+	ret = ov6650_video_probe(icd, client);
+
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(priv);
+	}
+
+	return ret;
+}
+
+static int ov6650_remove(struct i2c_client *client)
+{
+	struct ov6650 *priv = to_ov6650(client);
+
+	i2c_set_clientdata(client, NULL);
+	kfree(priv);
+	return 0;
+}
+
+static const struct i2c_device_id ov6650_id[] = {
+	{ "ov6650", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ov6650_id);
+
+static struct i2c_driver ov6650_i2c_driver = {
+	.driver = {
+		.name = "ov6650",
+	},
+	.probe    = ov6650_probe,
+	.remove   = ov6650_remove,
+	.id_table = ov6650_id,
+};
+
+static int __init ov6650_module_init(void)
+{
+	return i2c_add_driver(&ov6650_i2c_driver);
+}
+
+static void __exit ov6650_module_exit(void)
+{
+	i2c_del_driver(&ov6650_i2c_driver);
+}
+
+module_init(ov6650_module_init);
+module_exit(ov6650_module_exit);
+
+MODULE_DESCRIPTION("SoC Camera driver for OmniVision OV6650");
+MODULE_AUTHOR("Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 21b4428c12ab..1c612b4703a1 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -70,6 +70,7 @@ enum {
 	V4L2_IDENT_OV9655 = 255,
 	V4L2_IDENT_SOI968 = 256,
 	V4L2_IDENT_OV9640 = 257,
+	V4L2_IDENT_OV6650 = 258,
 
 	/* module saa7146: reserved range 300-309 */
 	V4L2_IDENT_SAA7146 = 300,
-- 
cgit v1.2.3


From 32e1f777274b0e642a8af381a4e07d7858506f37 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 5 Oct 2010 12:32:41 -0300
Subject: [media] V4L: add IMX074 sensor chip ID

Chip identification register contains the value 0x74.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-chip-ident.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 1c612b4703a1..aeb4ff9c6140 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -38,6 +38,9 @@ enum {
 	/* module tvaudio: reserved range 50-99 */
 	V4L2_IDENT_TVAUDIO = 50,	/* A tvaudio chip, unknown which it is exactly */
 
+	/* Sony IMX074 */
+	V4L2_IDENT_IMX074 = 74,
+
 	/* module saa7110: just ident 100 */
 	V4L2_IDENT_SAA7110 = 100,
 
-- 
cgit v1.2.3


From 5f3cc4474cdeab3ee44962fd752baec24e8fecec Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 7 Oct 2010 10:06:16 -0300
Subject: [media] s5p-fimc: Add camera capture support

Add a video device driver per each FIMC entity to support
the camera capture input mode. Video capture node is registered
only if CCD sensor data is provided through driver's platfrom data
and board setup code.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/s5p-fimc/Makefile       |   2 +-
 drivers/media/video/s5p-fimc/fimc-capture.c | 819 ++++++++++++++++++++++++++++
 drivers/media/video/s5p-fimc/fimc-core.c    | 563 +++++++++++++------
 drivers/media/video/s5p-fimc/fimc-core.h    | 205 ++++++-
 drivers/media/video/s5p-fimc/fimc-reg.c     | 173 +++++-
 include/media/s3c_fimc.h                    |  60 ++
 6 files changed, 1630 insertions(+), 192 deletions(-)
 create mode 100644 drivers/media/video/s5p-fimc/fimc-capture.c
 create mode 100644 include/media/s3c_fimc.h

(limited to 'include')

diff --git a/drivers/media/video/s5p-fimc/Makefile b/drivers/media/video/s5p-fimc/Makefile
index 0d9d54132ecc..7ea1b1403b1e 100644
--- a/drivers/media/video/s5p-fimc/Makefile
+++ b/drivers/media/video/s5p-fimc/Makefile
@@ -1,3 +1,3 @@
 
 obj-$(CONFIG_VIDEO_SAMSUNG_S5P_FIMC) := s5p-fimc.o
-s5p-fimc-y := fimc-core.o fimc-reg.o
+s5p-fimc-y := fimc-core.o fimc-reg.o fimc-capture.o
diff --git a/drivers/media/video/s5p-fimc/fimc-capture.c b/drivers/media/video/s5p-fimc/fimc-capture.c
new file mode 100644
index 000000000000..e8f13d3e2df1
--- /dev/null
+++ b/drivers/media/video/s5p-fimc/fimc-capture.c
@@ -0,0 +1,819 @@
+/*
+ * Samsung S5P SoC series camera interface (camera capture) driver
+ *
+ * Copyright (c) 2010 Samsung Electronics Co., Ltd
+ * Author: Sylwester Nawrocki, <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/i2c.h>
+
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf-core.h>
+#include <media/videobuf-dma-contig.h>
+
+#include "fimc-core.h"
+
+static struct v4l2_subdev *fimc_subdev_register(struct fimc_dev *fimc,
+					    struct s3c_fimc_isp_info *isp_info)
+{
+	struct i2c_adapter *i2c_adap;
+	struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
+	struct v4l2_subdev *sd = NULL;
+
+	i2c_adap = i2c_get_adapter(isp_info->i2c_bus_num);
+	if (!i2c_adap)
+		return ERR_PTR(-ENOMEM);
+
+	sd = v4l2_i2c_new_subdev_board(&vid_cap->v4l2_dev, i2c_adap,
+				       MODULE_NAME, isp_info->board_info, NULL);
+	if (!sd) {
+		v4l2_err(&vid_cap->v4l2_dev, "failed to acquire subdev\n");
+		return NULL;
+	}
+
+	v4l2_info(&vid_cap->v4l2_dev, "subdevice %s registered successfuly\n",
+		isp_info->board_info->type);
+
+	return sd;
+}
+
+static void fimc_subdev_unregister(struct fimc_dev *fimc)
+{
+	struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
+	struct i2c_client *client;
+
+	if (vid_cap->input_index < 0)
+		return;	/* Subdevice already released or not registered. */
+
+	if (vid_cap->sd) {
+		v4l2_device_unregister_subdev(vid_cap->sd);
+		client = v4l2_get_subdevdata(vid_cap->sd);
+		i2c_unregister_device(client);
+		i2c_put_adapter(client->adapter);
+		vid_cap->sd = NULL;
+	}
+
+	vid_cap->input_index = -1;
+}
+
+/**
+ * fimc_subdev_attach - attach v4l2_subdev to camera host interface
+ *
+ * @fimc: FIMC device information
+ * @index: index to the array of available subdevices,
+ *	   -1 for full array search or non negative value
+ *	   to select specific subdevice
+ */
+static int fimc_subdev_attach(struct fimc_dev *fimc, int index)
+{
+	struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
+	struct s3c_platform_fimc *pdata = fimc->pdata;
+	struct s3c_fimc_isp_info *isp_info;
+	struct v4l2_subdev *sd;
+	int i;
+
+	for (i = 0; i < FIMC_MAX_CAMIF_CLIENTS; ++i) {
+		isp_info = pdata->isp_info[i];
+
+		if (!isp_info || (index >= 0 && i != index))
+			continue;
+
+		sd = fimc_subdev_register(fimc, isp_info);
+		if (sd) {
+			vid_cap->sd = sd;
+			vid_cap->input_index = i;
+
+			return 0;
+		}
+	}
+
+	vid_cap->input_index = -1;
+	vid_cap->sd = NULL;
+	v4l2_err(&vid_cap->v4l2_dev, "fimc%d: sensor attach failed\n",
+		 fimc->id);
+	return -ENODEV;
+}
+
+static int fimc_isp_subdev_init(struct fimc_dev *fimc, int index)
+{
+	struct s3c_fimc_isp_info *isp_info;
+	int ret;
+
+	ret = fimc_subdev_attach(fimc, index);
+	if (ret)
+		return ret;
+
+	isp_info = fimc->pdata->isp_info[fimc->vid_cap.input_index];
+	ret = fimc_hw_set_camera_polarity(fimc, isp_info);
+	if (!ret) {
+		ret = v4l2_subdev_call(fimc->vid_cap.sd, core,
+				       s_power, 1);
+		if (!ret)
+			return ret;
+	}
+
+	fimc_subdev_unregister(fimc);
+	err("ISP initialization failed: %d", ret);
+	return ret;
+}
+
+/*
+ * At least one buffer on the pending_buf_q queue is required.
+ * Locking: The caller holds fimc->slock spinlock.
+ */
+int fimc_vid_cap_buf_queue(struct fimc_dev *fimc,
+			     struct fimc_vid_buffer *fimc_vb)
+{
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	struct fimc_ctx *ctx = cap->ctx;
+	int ret = 0;
+
+	BUG_ON(!fimc || !fimc_vb);
+
+	ret = fimc_prepare_addr(ctx, fimc_vb, &ctx->d_frame,
+				&fimc_vb->paddr);
+	if (ret)
+		return ret;
+
+	if (test_bit(ST_CAPT_STREAM, &fimc->state)) {
+		fimc_pending_queue_add(cap, fimc_vb);
+	} else {
+		/* Setup the buffer directly for processing. */
+		int buf_id = (cap->reqbufs_count == 1) ? -1 : cap->buf_index;
+		fimc_hw_set_output_addr(fimc, &fimc_vb->paddr, buf_id);
+
+		fimc_vb->index = cap->buf_index;
+		active_queue_add(cap, fimc_vb);
+
+		if (++cap->buf_index >= FIMC_MAX_OUT_BUFS)
+			cap->buf_index = 0;
+	}
+	return ret;
+}
+
+static int fimc_stop_capture(struct fimc_dev *fimc)
+{
+	unsigned long flags;
+	struct fimc_vid_cap *cap;
+	int ret;
+
+	cap = &fimc->vid_cap;
+
+	if (!fimc_capture_active(fimc))
+		return 0;
+
+	spin_lock_irqsave(&fimc->slock, flags);
+	set_bit(ST_CAPT_SHUT, &fimc->state);
+	fimc_deactivate_capture(fimc);
+	spin_unlock_irqrestore(&fimc->slock, flags);
+
+	wait_event_timeout(fimc->irq_queue,
+			   test_bit(ST_CAPT_SHUT, &fimc->state),
+			   FIMC_SHUTDOWN_TIMEOUT);
+
+	ret = v4l2_subdev_call(cap->sd, video, s_stream, 0);
+	if (ret)
+		v4l2_err(&fimc->vid_cap.v4l2_dev, "s_stream(0) failed\n");
+
+	spin_lock_irqsave(&fimc->slock, flags);
+	fimc->state &= ~(1 << ST_CAPT_RUN | 1 << ST_CAPT_PEND |
+			1 << ST_CAPT_STREAM);
+
+	fimc->vid_cap.active_buf_cnt = 0;
+	spin_unlock_irqrestore(&fimc->slock, flags);
+
+	dbg("state: 0x%lx", fimc->state);
+	return 0;
+}
+
+static int fimc_capture_open(struct file *file)
+{
+	struct fimc_dev *fimc = video_drvdata(file);
+	int ret = 0;
+
+	dbg("pid: %d, state: 0x%lx", task_pid_nr(current), fimc->state);
+
+	/* Return if the corresponding video mem2mem node is already opened. */
+	if (fimc_m2m_active(fimc))
+		return -EBUSY;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	if (++fimc->vid_cap.refcnt == 1) {
+		ret = fimc_isp_subdev_init(fimc, -1);
+		if (ret) {
+			fimc->vid_cap.refcnt--;
+			ret = -EIO;
+		}
+	}
+
+	file->private_data = fimc->vid_cap.ctx;
+
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_capture_close(struct file *file)
+{
+	struct fimc_dev *fimc = video_drvdata(file);
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	dbg("pid: %d, state: 0x%lx", task_pid_nr(current), fimc->state);
+
+	if (--fimc->vid_cap.refcnt == 0) {
+		fimc_stop_capture(fimc);
+
+		videobuf_stop(&fimc->vid_cap.vbq);
+		videobuf_mmap_free(&fimc->vid_cap.vbq);
+
+		v4l2_err(&fimc->vid_cap.v4l2_dev, "releasing ISP\n");
+		v4l2_subdev_call(fimc->vid_cap.sd, core, s_power, 0);
+		fimc_subdev_unregister(fimc);
+	}
+
+	mutex_unlock(&fimc->lock);
+	return 0;
+}
+
+static unsigned int fimc_capture_poll(struct file *file,
+				      struct poll_table_struct *wait)
+{
+	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	int ret;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return POLLERR;
+
+	ret = videobuf_poll_stream(file, &cap->vbq, wait);
+	mutex_unlock(&fimc->lock);
+
+	return ret;
+}
+
+static int fimc_capture_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	int ret;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	ret = videobuf_mmap_mapper(&cap->vbq, vma);
+	mutex_unlock(&fimc->lock);
+
+	return ret;
+}
+
+/* video device file operations */
+static const struct v4l2_file_operations fimc_capture_fops = {
+	.owner		= THIS_MODULE,
+	.open		= fimc_capture_open,
+	.release	= fimc_capture_close,
+	.poll		= fimc_capture_poll,
+	.unlocked_ioctl	= video_ioctl2,
+	.mmap		= fimc_capture_mmap,
+};
+
+static int fimc_vidioc_querycap_capture(struct file *file, void *priv,
+					struct v4l2_capability *cap)
+{
+	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+
+	strncpy(cap->driver, fimc->pdev->name, sizeof(cap->driver) - 1);
+	strncpy(cap->card, fimc->pdev->name, sizeof(cap->card) - 1);
+	cap->bus_info[0] = 0;
+	cap->version = KERNEL_VERSION(1, 0, 0);
+	cap->capabilities = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+/* Synchronize formats of the camera interface input and attached  sensor. */
+static int sync_capture_fmt(struct fimc_ctx *ctx)
+{
+	struct fimc_frame *frame = &ctx->s_frame;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct v4l2_mbus_framefmt *fmt = &fimc->vid_cap.fmt;
+	int ret;
+
+	fmt->width  = ctx->d_frame.o_width;
+	fmt->height = ctx->d_frame.o_height;
+
+	ret = v4l2_subdev_call(fimc->vid_cap.sd, video, s_mbus_fmt, fmt);
+	if (ret == -ENOIOCTLCMD) {
+		err("s_mbus_fmt failed");
+		return ret;
+	}
+	dbg("w: %d, h: %d, code= %d", fmt->width, fmt->height, fmt->code);
+
+	frame->fmt = find_mbus_format(fmt, FMT_FLAGS_CAM);
+	if (!frame->fmt) {
+		err("fimc source format not found\n");
+		return -EINVAL;
+	}
+
+	frame->f_width	= fmt->width;
+	frame->f_height = fmt->height;
+	frame->width	= fmt->width;
+	frame->height	= fmt->height;
+	frame->o_width	= fmt->width;
+	frame->o_height = fmt->height;
+	frame->offs_h	= 0;
+	frame->offs_v	= 0;
+
+	return 0;
+}
+
+static int fimc_cap_s_fmt(struct file *file, void *priv,
+			     struct v4l2_format *f)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_frame *frame;
+	struct v4l2_pix_format *pix;
+	int ret;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	ret = fimc_vidioc_try_fmt(file, priv, f);
+	if (ret)
+		return ret;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	if (fimc_capture_active(fimc)) {
+		ret = -EBUSY;
+		goto sf_unlock;
+	}
+
+	frame = &ctx->d_frame;
+
+	pix = &f->fmt.pix;
+	frame->fmt = find_format(f, FMT_FLAGS_M2M | FMT_FLAGS_CAM);
+	if (!frame->fmt) {
+		err("fimc target format not found\n");
+		ret = -EINVAL;
+		goto sf_unlock;
+	}
+
+	/* Output DMA frame pixel size and offsets. */
+	frame->f_width	= pix->bytesperline * 8 / frame->fmt->depth;
+	frame->f_height = pix->height;
+	frame->width	= pix->width;
+	frame->height	= pix->height;
+	frame->o_width	= pix->width;
+	frame->o_height = pix->height;
+	frame->size	= (pix->width * pix->height * frame->fmt->depth) >> 3;
+	frame->offs_h	= 0;
+	frame->offs_v	= 0;
+
+	ret = sync_capture_fmt(ctx);
+
+	ctx->state |= (FIMC_PARAMS | FIMC_DST_FMT);
+
+sf_unlock:
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_enum_input(struct file *file, void *priv,
+				     struct v4l2_input *i)
+{
+	struct fimc_ctx *ctx = priv;
+	struct s3c_platform_fimc *pldata = ctx->fimc_dev->pdata;
+	struct s3c_fimc_isp_info *isp_info;
+
+	if (i->index >= FIMC_MAX_CAMIF_CLIENTS)
+		return -EINVAL;
+
+	isp_info = pldata->isp_info[i->index];
+	if (isp_info == NULL)
+		return -EINVAL;
+
+	i->type = V4L2_INPUT_TYPE_CAMERA;
+	strncpy(i->name, isp_info->board_info->type, 32);
+	return 0;
+}
+
+static int fimc_cap_s_input(struct file *file, void *priv,
+				  unsigned int i)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct s3c_platform_fimc *pdata = fimc->pdata;
+	int ret;
+
+	if (fimc_capture_active(ctx->fimc_dev))
+		return -EBUSY;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	if (i >= FIMC_MAX_CAMIF_CLIENTS || !pdata->isp_info[i]) {
+		ret = -EINVAL;
+		goto si_unlock;
+	}
+
+	if (fimc->vid_cap.sd) {
+		ret = v4l2_subdev_call(fimc->vid_cap.sd, core, s_power, 0);
+		if (ret)
+			err("s_power failed: %d", ret);
+	}
+
+	/* Release the attached sensor subdevice. */
+	fimc_subdev_unregister(fimc);
+
+	ret = fimc_isp_subdev_init(fimc, i);
+
+si_unlock:
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_g_input(struct file *file, void *priv,
+				       unsigned int *i)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_vid_cap *cap = &ctx->fimc_dev->vid_cap;
+
+	*i = cap->input_index;
+	return 0;
+}
+
+static int fimc_cap_streamon(struct file *file, void *priv,
+			   enum v4l2_buf_type type)
+{
+	struct s3c_fimc_isp_info *isp_info;
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	int ret = -EBUSY;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	if (fimc_capture_active(fimc) || !fimc->vid_cap.sd)
+		goto s_unlock;
+
+	if (!(ctx->state & FIMC_DST_FMT)) {
+		v4l2_err(&fimc->vid_cap.v4l2_dev, "Format is not set\n");
+		ret = -EINVAL;
+		goto s_unlock;
+	}
+
+	ret = v4l2_subdev_call(fimc->vid_cap.sd, video, s_stream, 1);
+	if (ret && ret != -ENOIOCTLCMD)
+		goto s_unlock;
+
+	ret = fimc_prepare_config(ctx, ctx->state);
+	if (ret)
+		goto s_unlock;
+
+	isp_info = fimc->pdata->isp_info[fimc->vid_cap.input_index];
+	fimc_hw_set_camera_type(fimc, isp_info);
+	fimc_hw_set_camera_source(fimc, isp_info);
+	fimc_hw_set_camera_offset(fimc, &ctx->s_frame);
+
+	if (ctx->state & FIMC_PARAMS) {
+		ret = fimc_set_scaler_info(ctx);
+		if (ret) {
+			err("Scaler setup error");
+			goto s_unlock;
+		}
+		fimc_hw_set_input_path(ctx);
+		fimc_hw_set_scaler(ctx);
+		fimc_hw_set_target_format(ctx);
+		fimc_hw_set_rotation(ctx);
+		fimc_hw_set_effect(ctx);
+	}
+
+	fimc_hw_set_output_path(ctx);
+	fimc_hw_set_out_dma(ctx);
+
+	INIT_LIST_HEAD(&fimc->vid_cap.pending_buf_q);
+	INIT_LIST_HEAD(&fimc->vid_cap.active_buf_q);
+	fimc->vid_cap.active_buf_cnt = 0;
+	fimc->vid_cap.frame_count = 0;
+
+	set_bit(ST_CAPT_PEND, &fimc->state);
+	ret = videobuf_streamon(&fimc->vid_cap.vbq);
+
+s_unlock:
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_streamoff(struct file *file, void *priv,
+			    enum v4l2_buf_type type)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&fimc->slock, flags);
+	if (!fimc_capture_running(fimc) && !fimc_capture_pending(fimc)) {
+		spin_unlock_irqrestore(&fimc->slock, flags);
+		dbg("state: 0x%lx", fimc->state);
+		return -EINVAL;
+	}
+	spin_unlock_irqrestore(&fimc->slock, flags);
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	fimc_stop_capture(fimc);
+	ret = videobuf_streamoff(&cap->vbq);
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_reqbufs(struct file *file, void *priv,
+			  struct v4l2_requestbuffers *reqbufs)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	int ret;
+
+	if (fimc_capture_active(ctx->fimc_dev))
+		return -EBUSY;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	ret = videobuf_reqbufs(&cap->vbq, reqbufs);
+	if (!ret)
+		cap->reqbufs_count = reqbufs->count;
+
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_querybuf(struct file *file, void *priv,
+			   struct v4l2_buffer *buf)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_vid_cap *cap = &ctx->fimc_dev->vid_cap;
+
+	if (fimc_capture_active(ctx->fimc_dev))
+		return -EBUSY;
+
+	return videobuf_querybuf(&cap->vbq, buf);
+}
+
+static int fimc_cap_qbuf(struct file *file, void *priv,
+			  struct v4l2_buffer *buf)
+{
+	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	int ret;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	ret = videobuf_qbuf(&cap->vbq, buf);
+
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+static int fimc_cap_dqbuf(struct file *file, void *priv,
+			   struct v4l2_buffer *buf)
+{
+	struct fimc_ctx *ctx = priv;
+	int ret;
+
+	if (mutex_lock_interruptible(&ctx->fimc_dev->lock))
+		return -ERESTARTSYS;
+
+	ret = videobuf_dqbuf(&ctx->fimc_dev->vid_cap.vbq, buf,
+		file->f_flags & O_NONBLOCK);
+
+	mutex_unlock(&ctx->fimc_dev->lock);
+	return ret;
+}
+
+static int fimc_cap_s_ctrl(struct file *file, void *priv,
+			 struct v4l2_control *ctrl)
+{
+	struct fimc_ctx *ctx = priv;
+	int ret = -EINVAL;
+
+	if (mutex_lock_interruptible(&ctx->fimc_dev->lock))
+		return -ERESTARTSYS;
+
+	/* Allow any controls but 90/270 rotation while streaming */
+	if (!fimc_capture_active(ctx->fimc_dev) ||
+	    ctrl->id != V4L2_CID_ROTATE ||
+	    (ctrl->value != 90 && ctrl->value != 270)) {
+		ret = check_ctrl_val(ctx, ctrl);
+		if (!ret) {
+			ret = fimc_s_ctrl(ctx, ctrl);
+			if (!ret)
+				ctx->state |= FIMC_PARAMS;
+		}
+	}
+	if (ret == -EINVAL)
+		ret = v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
+				       core, s_ctrl, ctrl);
+
+	mutex_unlock(&ctx->fimc_dev->lock);
+	return ret;
+}
+
+static int fimc_cap_s_crop(struct file *file, void *fh,
+			       struct v4l2_crop *cr)
+{
+	struct fimc_frame *f;
+	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	int ret = -EINVAL;
+
+	if (fimc_capture_active(fimc))
+		return -EBUSY;
+
+	ret = fimc_try_crop(ctx, cr);
+	if (ret)
+		return ret;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	if (!(ctx->state & FIMC_DST_FMT)) {
+		v4l2_err(&fimc->vid_cap.v4l2_dev,
+			 "Capture color format not set\n");
+		goto sc_unlock;
+	}
+
+	f = &ctx->s_frame;
+	/* Check for the pixel scaling ratio when cropping input image. */
+	ret = fimc_check_scaler_ratio(&cr->c, &ctx->d_frame);
+	if (ret) {
+		v4l2_err(&fimc->vid_cap.v4l2_dev, "Out of the scaler range");
+	} else {
+		ret = 0;
+		f->offs_h = cr->c.left;
+		f->offs_v = cr->c.top;
+		f->width  = cr->c.width;
+		f->height = cr->c.height;
+	}
+
+sc_unlock:
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
+
+
+static const struct v4l2_ioctl_ops fimc_capture_ioctl_ops = {
+	.vidioc_querycap		= fimc_vidioc_querycap_capture,
+
+	.vidioc_enum_fmt_vid_cap	= fimc_vidioc_enum_fmt,
+	.vidioc_try_fmt_vid_cap		= fimc_vidioc_try_fmt,
+	.vidioc_s_fmt_vid_cap		= fimc_cap_s_fmt,
+	.vidioc_g_fmt_vid_cap		= fimc_vidioc_g_fmt,
+
+	.vidioc_reqbufs			= fimc_cap_reqbufs,
+	.vidioc_querybuf		= fimc_cap_querybuf,
+
+	.vidioc_qbuf			= fimc_cap_qbuf,
+	.vidioc_dqbuf			= fimc_cap_dqbuf,
+
+	.vidioc_streamon		= fimc_cap_streamon,
+	.vidioc_streamoff		= fimc_cap_streamoff,
+
+	.vidioc_queryctrl		= fimc_vidioc_queryctrl,
+	.vidioc_g_ctrl			= fimc_vidioc_g_ctrl,
+	.vidioc_s_ctrl			= fimc_cap_s_ctrl,
+
+	.vidioc_g_crop			= fimc_vidioc_g_crop,
+	.vidioc_s_crop			= fimc_cap_s_crop,
+	.vidioc_cropcap			= fimc_vidioc_cropcap,
+
+	.vidioc_enum_input		= fimc_cap_enum_input,
+	.vidioc_s_input			= fimc_cap_s_input,
+	.vidioc_g_input			= fimc_cap_g_input,
+};
+
+int fimc_register_capture_device(struct fimc_dev *fimc)
+{
+	struct v4l2_device *v4l2_dev = &fimc->vid_cap.v4l2_dev;
+	struct video_device *vfd;
+	struct fimc_vid_cap *vid_cap;
+	struct fimc_ctx *ctx;
+	struct v4l2_format f;
+	int ret;
+
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->fimc_dev	 = fimc;
+	ctx->in_path	 = FIMC_CAMERA;
+	ctx->out_path	 = FIMC_DMA;
+	ctx->state	 = FIMC_CTX_CAP;
+
+	f.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
+	ctx->d_frame.fmt = find_format(&f, FMT_FLAGS_M2M);
+
+	if (!v4l2_dev->name[0])
+		snprintf(v4l2_dev->name, sizeof(v4l2_dev->name),
+			 "%s.capture", dev_name(&fimc->pdev->dev));
+
+	ret = v4l2_device_register(NULL, v4l2_dev);
+	if (ret)
+		goto err_info;
+
+	vfd = video_device_alloc();
+	if (!vfd) {
+		v4l2_err(v4l2_dev, "Failed to allocate video device\n");
+		goto err_v4l2_reg;
+	}
+
+	snprintf(vfd->name, sizeof(vfd->name), "%s:cap",
+		 dev_name(&fimc->pdev->dev));
+
+	vfd->fops	= &fimc_capture_fops;
+	vfd->ioctl_ops	= &fimc_capture_ioctl_ops;
+	vfd->minor	= -1;
+	vfd->release	= video_device_release;
+	video_set_drvdata(vfd, fimc);
+
+	vid_cap = &fimc->vid_cap;
+	vid_cap->vfd = vfd;
+	vid_cap->active_buf_cnt = 0;
+	vid_cap->reqbufs_count  = 0;
+	vid_cap->refcnt = 0;
+	/* The default color format for image sensor. */
+	vid_cap->fmt.code = V4L2_MBUS_FMT_YUYV8_2X8;
+
+	INIT_LIST_HEAD(&vid_cap->pending_buf_q);
+	INIT_LIST_HEAD(&vid_cap->active_buf_q);
+	spin_lock_init(&ctx->slock);
+	vid_cap->ctx = ctx;
+
+	videobuf_queue_dma_contig_init(&vid_cap->vbq, &fimc_qops,
+		vid_cap->v4l2_dev.dev, &fimc->irqlock,
+		V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
+		sizeof(struct fimc_vid_buffer), (void *)ctx);
+
+	ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
+	if (ret) {
+		v4l2_err(v4l2_dev, "Failed to register video device\n");
+		goto err_vd_reg;
+	}
+
+	v4l2_info(v4l2_dev,
+		  "FIMC capture driver registered as /dev/video%d\n",
+		  vfd->num);
+
+	return 0;
+
+err_vd_reg:
+	video_device_release(vfd);
+err_v4l2_reg:
+	v4l2_device_unregister(v4l2_dev);
+err_info:
+	dev_err(&fimc->pdev->dev, "failed to install\n");
+	return ret;
+}
+
+void fimc_unregister_capture_device(struct fimc_dev *fimc)
+{
+	struct fimc_vid_cap *capture = &fimc->vid_cap;
+
+	if (capture->vfd)
+		video_unregister_device(capture->vfd);
+
+	kfree(capture->ctx);
+}
diff --git a/drivers/media/video/s5p-fimc/fimc-core.c b/drivers/media/video/s5p-fimc/fimc-core.c
index 23cc054bd7b1..5168a9a5d821 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.c
+++ b/drivers/media/video/s5p-fimc/fimc-core.c
@@ -1,7 +1,7 @@
 /*
  * S5P camera interface (video postprocessor) driver
  *
- * Copyright (c) 2010 Samsung Electronics
+ * Copyright (c) 2010 Samsung Electronics Co., Ltd
  *
  * Sylwester Nawrocki, <s.nawrocki@samsung.com>
  *
@@ -38,85 +38,102 @@ static struct fimc_fmt fimc_formats[] = {
 		.depth	= 16,
 		.color	= S5P_FIMC_RGB565,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.mbus_code = V4L2_MBUS_FMT_RGB565_2X8_BE,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "BGR666",
 		.fourcc	= V4L2_PIX_FMT_BGR666,
 		.depth	= 32,
 		.color	= S5P_FIMC_RGB666,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name = "XRGB-8-8-8-8, 24 bpp",
 		.fourcc	= V4L2_PIX_FMT_RGB24,
 		.depth = 32,
 		.color	= S5P_FIMC_RGB888,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc	= V4L2_PIX_FMT_YUYV,
 		.depth	= 16,
 		.color	= S5P_FIMC_YCBYCR422,
 		.buff_cnt = 1,
-		.planes_cnt = 1
-		}, {
+		.planes_cnt = 1,
+		.mbus_code = V4L2_MBUS_FMT_YUYV8_2X8,
+		.flags = FMT_FLAGS_M2M | FMT_FLAGS_CAM,
+	}, {
 		.name	= "YUV 4:2:2 packed, CbYCrY",
 		.fourcc	= V4L2_PIX_FMT_UYVY,
 		.depth	= 16,
 		.color	= S5P_FIMC_CBYCRY422,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.mbus_code = V4L2_MBUS_FMT_UYVY8_2X8,
+		.flags = FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
 		.name	= "YUV 4:2:2 packed, CrYCbY",
 		.fourcc	= V4L2_PIX_FMT_VYUY,
 		.depth	= 16,
 		.color	= S5P_FIMC_CRYCBY422,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.mbus_code = V4L2_MBUS_FMT_VYUY8_2X8,
+		.flags = FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
 		.name	= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc	= V4L2_PIX_FMT_YVYU,
 		.depth	= 16,
 		.color	= S5P_FIMC_YCRYCB422,
 		.buff_cnt = 1,
-		.planes_cnt = 1
+		.planes_cnt = 1,
+		.mbus_code = V4L2_MBUS_FMT_YVYU8_2X8,
+		.flags = FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
 		.name	= "YUV 4:2:2 planar, Y/Cb/Cr",
 		.fourcc	= V4L2_PIX_FMT_YUV422P,
 		.depth	= 12,
 		.color	= S5P_FIMC_YCBCR422,
 		.buff_cnt = 1,
-		.planes_cnt = 3
+		.planes_cnt = 3,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "YUV 4:2:2 planar, Y/CbCr",
 		.fourcc	= V4L2_PIX_FMT_NV16,
 		.depth	= 16,
 		.color	= S5P_FIMC_YCBCR422,
 		.buff_cnt = 1,
-		.planes_cnt = 2
+		.planes_cnt = 2,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "YUV 4:2:2 planar, Y/CrCb",
 		.fourcc	= V4L2_PIX_FMT_NV61,
 		.depth	= 16,
 		.color	= S5P_FIMC_RGB565,
 		.buff_cnt = 1,
-		.planes_cnt = 2
+		.planes_cnt = 2,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "YUV 4:2:0 planar, YCbCr",
 		.fourcc	= V4L2_PIX_FMT_YUV420,
 		.depth	= 12,
 		.color	= S5P_FIMC_YCBCR420,
 		.buff_cnt = 1,
-		.planes_cnt = 3
+		.planes_cnt = 3,
+		.flags = FMT_FLAGS_M2M,
 	}, {
 		.name	= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc	= V4L2_PIX_FMT_NV12,
 		.depth	= 12,
 		.color	= S5P_FIMC_YCBCR420,
 		.buff_cnt = 1,
-		.planes_cnt = 2
-	}
+		.planes_cnt = 2,
+		.flags = FMT_FLAGS_M2M,
+	},
 };
 
 static struct v4l2_queryctrl fimc_ctrls[] = {
@@ -156,7 +173,7 @@ static struct v4l2_queryctrl *get_ctrl(int id)
 	return NULL;
 }
 
-static int fimc_check_scaler_ratio(struct v4l2_rect *r, struct fimc_frame *f)
+int fimc_check_scaler_ratio(struct v4l2_rect *r, struct fimc_frame *f)
 {
 	if (r->width > f->width) {
 		if (f->width > (r->width * SCALER_MAX_HRATIO))
@@ -199,7 +216,7 @@ static int fimc_get_scaler_factor(u32 src, u32 tar, u32 *ratio, u32 *shift)
 	return 0;
 }
 
-static int fimc_set_scaler_info(struct fimc_ctx *ctx)
+int fimc_set_scaler_info(struct fimc_ctx *ctx)
 {
 	struct fimc_scaler *sc = &ctx->scaler;
 	struct fimc_frame *s_frame = &ctx->s_frame;
@@ -259,6 +276,51 @@ static int fimc_set_scaler_info(struct fimc_ctx *ctx)
 	return 0;
 }
 
+static void fimc_capture_handler(struct fimc_dev *fimc)
+{
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	struct fimc_vid_buffer *v_buf = NULL;
+
+	if (!list_empty(&cap->active_buf_q)) {
+		v_buf = active_queue_pop(cap);
+		fimc_buf_finish(fimc, v_buf);
+	}
+
+	if (test_and_clear_bit(ST_CAPT_SHUT, &fimc->state)) {
+		wake_up(&fimc->irq_queue);
+		return;
+	}
+
+	if (!list_empty(&cap->pending_buf_q)) {
+
+		v_buf = pending_queue_pop(cap);
+		fimc_hw_set_output_addr(fimc, &v_buf->paddr, cap->buf_index);
+		v_buf->index = cap->buf_index;
+
+		dbg("hw ptr: %d, sw ptr: %d",
+		    fimc_hw_get_frame_index(fimc), cap->buf_index);
+
+		spin_lock(&fimc->irqlock);
+		v_buf->vb.state = VIDEOBUF_ACTIVE;
+		spin_unlock(&fimc->irqlock);
+
+		/* Move the buffer to the capture active queue */
+		active_queue_add(cap, v_buf);
+
+		dbg("next frame: %d, done frame: %d",
+		    fimc_hw_get_frame_index(fimc), v_buf->index);
+
+		if (++cap->buf_index >= FIMC_MAX_OUT_BUFS)
+			cap->buf_index = 0;
+
+	} else if (test_and_clear_bit(ST_CAPT_STREAM, &fimc->state) &&
+		   cap->active_buf_cnt <= 1) {
+		fimc_deactivate_capture(fimc);
+	}
+
+	dbg("frame: %d, active_buf_cnt= %d",
+	    fimc_hw_get_frame_index(fimc), cap->active_buf_cnt);
+}
 
 static irqreturn_t fimc_isr(int irq, void *priv)
 {
@@ -285,6 +347,16 @@ static irqreturn_t fimc_isr(int irq, void *priv)
 			spin_unlock(&fimc->irqlock);
 			v4l2_m2m_job_finish(fimc->m2m.m2m_dev, ctx->m2m_ctx);
 		}
+		goto isr_unlock;
+
+	}
+
+	if (test_bit(ST_CAPT_RUN, &fimc->state))
+		fimc_capture_handler(fimc);
+
+	if (test_and_clear_bit(ST_CAPT_PEND, &fimc->state)) {
+		set_bit(ST_CAPT_RUN, &fimc->state);
+		wake_up(&fimc->irq_queue);
 	}
 
 isr_unlock:
@@ -424,7 +496,7 @@ static void fimc_prepare_dma_offset(struct fimc_ctx *ctx, struct fimc_frame *f)
  *
  * Return: 0 if dimensions are valid or non zero otherwise.
  */
-static int fimc_prepare_config(struct fimc_ctx *ctx, u32 flags)
+int fimc_prepare_config(struct fimc_ctx *ctx, u32 flags)
 {
 	struct fimc_frame *s_frame, *d_frame;
 	struct fimc_vid_buffer *buf = NULL;
@@ -513,9 +585,9 @@ static void fimc_dma_run(void *priv)
 	if (ctx->state & FIMC_PARAMS)
 		fimc_hw_set_out_dma(ctx);
 
-	ctx->state = 0;
 	fimc_activate_capture(ctx);
 
+	ctx->state &= (FIMC_CTX_M2M | FIMC_CTX_CAP);
 	fimc_hw_activate_input_dma(fimc, true);
 
 dma_unlock:
@@ -598,10 +670,31 @@ static void fimc_buf_queue(struct videobuf_queue *vq,
 				  struct videobuf_buffer *vb)
 {
 	struct fimc_ctx *ctx = vq->priv_data;
-	v4l2_m2m_buf_queue(ctx->m2m_ctx, vq, vb);
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	struct fimc_vid_cap *cap = &fimc->vid_cap;
+	unsigned long flags;
+
+	dbg("ctx: %p, ctx->state: 0x%x", ctx, ctx->state);
+
+	if ((ctx->state & FIMC_CTX_M2M) && ctx->m2m_ctx) {
+		v4l2_m2m_buf_queue(ctx->m2m_ctx, vq, vb);
+	} else if (ctx->state & FIMC_CTX_CAP) {
+		spin_lock_irqsave(&fimc->slock, flags);
+		fimc_vid_cap_buf_queue(fimc, (struct fimc_vid_buffer *)vb);
+
+		dbg("fimc->cap.active_buf_cnt: %d",
+		    fimc->vid_cap.active_buf_cnt);
+
+		if (cap->active_buf_cnt >= cap->reqbufs_count ||
+		   cap->active_buf_cnt >= FIMC_MAX_OUT_BUFS) {
+			if (!test_and_set_bit(ST_CAPT_STREAM, &fimc->state))
+				fimc_activate_capture(ctx);
+		}
+		spin_unlock_irqrestore(&fimc->slock, flags);
+	}
 }
 
-static struct videobuf_queue_ops fimc_qops = {
+struct videobuf_queue_ops fimc_qops = {
 	.buf_setup	= fimc_buf_setup,
 	.buf_prepare	= fimc_buf_prepare,
 	.buf_queue	= fimc_buf_queue,
@@ -624,7 +717,7 @@ static int fimc_m2m_querycap(struct file *file, void *priv,
 	return 0;
 }
 
-static int fimc_m2m_enum_fmt(struct file *file, void *priv,
+int fimc_vidioc_enum_fmt(struct file *file, void *priv,
 				struct v4l2_fmtdesc *f)
 {
 	struct fimc_fmt *fmt;
@@ -635,109 +728,139 @@ static int fimc_m2m_enum_fmt(struct file *file, void *priv,
 	fmt = &fimc_formats[f->index];
 	strncpy(f->description, fmt->name, sizeof(f->description) - 1);
 	f->pixelformat = fmt->fourcc;
+
 	return 0;
 }
 
-static int fimc_m2m_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
+int fimc_vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
 {
 	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
 	struct fimc_frame *frame;
 
 	frame = ctx_get_frame(ctx, f->type);
 	if (IS_ERR(frame))
 		return PTR_ERR(frame);
 
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
 	f->fmt.pix.width	= frame->width;
 	f->fmt.pix.height	= frame->height;
 	f->fmt.pix.field	= V4L2_FIELD_NONE;
 	f->fmt.pix.pixelformat	= frame->fmt->fourcc;
 
+	mutex_unlock(&fimc->lock);
 	return 0;
 }
 
-static struct fimc_fmt *find_format(struct v4l2_format *f)
+struct fimc_fmt *find_format(struct v4l2_format *f, unsigned int mask)
 {
 	struct fimc_fmt *fmt;
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(fimc_formats); ++i) {
 		fmt = &fimc_formats[i];
-		if (fmt->fourcc == f->fmt.pix.pixelformat)
+		if (fmt->fourcc == f->fmt.pix.pixelformat &&
+		   (fmt->flags & mask))
 			break;
 	}
-	if (i == ARRAY_SIZE(fimc_formats))
-		return NULL;
 
-	return fmt;
+	return (i == ARRAY_SIZE(fimc_formats)) ? NULL : fmt;
 }
 
-static int fimc_m2m_try_fmt(struct file *file, void *priv,
-			     struct v4l2_format *f)
+struct fimc_fmt *find_mbus_format(struct v4l2_mbus_framefmt *f,
+				  unsigned int mask)
 {
 	struct fimc_fmt *fmt;
-	u32 max_width, max_height, mod_x, mod_y;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(fimc_formats); ++i) {
+		fmt = &fimc_formats[i];
+		if (fmt->mbus_code == f->code && (fmt->flags & mask))
+			break;
+	}
+
+	return (i == ARRAY_SIZE(fimc_formats)) ? NULL : fmt;
+}
+
+
+int fimc_vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
+{
 	struct fimc_ctx *ctx = priv;
 	struct fimc_dev *fimc = ctx->fimc_dev;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct samsung_fimc_variant *variant = fimc->variant;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct fimc_fmt *fmt;
+	u32 max_width, mod_x, mod_y, mask;
+	int ret = -EINVAL, is_output = 0;
 
-	fmt = find_format(f);
-	if (!fmt) {
-		v4l2_err(&fimc->m2m.v4l2_dev,
-			 "Fourcc format (0x%X) invalid.\n",  pix->pixelformat);
+	if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+		if (ctx->state & FIMC_CTX_CAP)
+			return -EINVAL;
+		is_output = 1;
+	} else if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) {
 		return -EINVAL;
 	}
 
+	dbg("w: %d, h: %d, bpl: %d",
+	    pix->width, pix->height, pix->bytesperline);
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	mask = is_output ? FMT_FLAGS_M2M : FMT_FLAGS_M2M | FMT_FLAGS_CAM;
+	fmt = find_format(f, mask);
+	if (!fmt) {
+		v4l2_err(&fimc->m2m.v4l2_dev, "Fourcc format (0x%X) invalid.\n",
+			 pix->pixelformat);
+		goto tf_out;
+	}
+
 	if (pix->field == V4L2_FIELD_ANY)
 		pix->field = V4L2_FIELD_NONE;
 	else if (V4L2_FIELD_NONE != pix->field)
-		return -EINVAL;
+		goto tf_out;
 
-	if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+	if (is_output) {
 		max_width = variant->scaler_dis_w;
-		max_height = variant->scaler_dis_w;
-		mod_x = variant->min_inp_pixsize;
-		mod_y = variant->min_inp_pixsize;
-	} else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
-		max_width = variant->out_rot_dis_w;
-		max_height = variant->out_rot_dis_w;
-		mod_x = variant->min_out_pixsize;
-		mod_y = variant->min_out_pixsize;
+		mod_x = ffs(variant->min_inp_pixsize) - 1;
 	} else {
-		err("Wrong stream type (%d)", f->type);
-		return -EINVAL;
+		max_width = variant->out_rot_dis_w;
+		mod_x = ffs(variant->min_out_pixsize) - 1;
 	}
 
-	dbg("max_w= %d, max_h= %d", max_width, max_height);
-
-	if (pix->height > max_height)
-		pix->height = max_height;
-	if (pix->width > max_width)
-		pix->width = max_width;
-
 	if (tiled_fmt(fmt)) {
-		mod_x = 64; /* 64x32 tile */
-		mod_y = 32;
+		mod_x = 6; /* 64 x 32 pixels tile */
+		mod_y = 5;
+	} else {
+		if (fimc->id == 1 && fimc->variant->pix_hoff)
+			mod_y = fimc_fmt_is_rgb(fmt->color) ? 0 : 1;
+		else
+			mod_y = mod_x;
 	}
 
-	dbg("mod_x= 0x%X, mod_y= 0x%X", mod_x, mod_y);
+	dbg("mod_x: %d, mod_y: %d, max_w: %d", mod_x, mod_y, max_width);
 
-	pix->width = (pix->width == 0) ? mod_x : ALIGN(pix->width, mod_x);
-	pix->height = (pix->height == 0) ? mod_y : ALIGN(pix->height, mod_y);
+	v4l_bound_align_image(&pix->width, 16, max_width, mod_x,
+		&pix->height, 8, variant->scaler_dis_w, mod_y, 0);
 
 	if (pix->bytesperline == 0 ||
-	    pix->bytesperline * 8 / fmt->depth > pix->width)
+	    (pix->bytesperline * 8 / fmt->depth) > pix->width)
 		pix->bytesperline = (pix->width * fmt->depth) >> 3;
 
 	if (pix->sizeimage == 0)
 		pix->sizeimage = pix->height * pix->bytesperline;
 
-	dbg("pix->bytesperline= %d, fmt->depth= %d",
-	    pix->bytesperline, fmt->depth);
+	dbg("w: %d, h: %d, bpl: %d, depth: %d",
+	    pix->width, pix->height, pix->bytesperline, fmt->depth);
 
-	return 0;
-}
+	ret = 0;
 
+tf_out:
+	mutex_unlock(&fimc->lock);
+	return ret;
+}
 
 static int fimc_m2m_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
 {
@@ -750,9 +873,7 @@ static int fimc_m2m_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
 	unsigned long flags;
 	int ret = 0;
 
-	BUG_ON(!ctx);
-
-	ret = fimc_m2m_try_fmt(file, priv, f);
+	ret = fimc_vidioc_try_fmt(file, priv, f);
 	if (ret)
 		return ret;
 
@@ -785,7 +906,7 @@ static int fimc_m2m_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
 	spin_unlock_irqrestore(&ctx->slock, flags);
 
 	pix = &f->fmt.pix;
-	frame->fmt = find_format(f);
+	frame->fmt = find_format(f, FMT_FLAGS_M2M);
 	if (!frame->fmt) {
 		ret = -EINVAL;
 		goto sf_out;
@@ -857,21 +978,33 @@ static int fimc_m2m_streamoff(struct file *file, void *priv,
 	return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
 }
 
-int fimc_m2m_queryctrl(struct file *file, void *priv,
+int fimc_vidioc_queryctrl(struct file *file, void *priv,
 			    struct v4l2_queryctrl *qc)
 {
+	struct fimc_ctx *ctx = priv;
 	struct v4l2_queryctrl *c;
+
 	c = get_ctrl(qc->id);
-	if (!c)
-		return -EINVAL;
-	*qc = *c;
-	return 0;
+	if (c) {
+		*qc = *c;
+		return 0;
+	}
+
+	if (ctx->state & FIMC_CTX_CAP)
+		return v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
+					core, queryctrl, qc);
+	return -EINVAL;
 }
 
-int fimc_m2m_g_ctrl(struct file *file, void *priv,
+int fimc_vidioc_g_ctrl(struct file *file, void *priv,
 			 struct v4l2_control *ctrl)
 {
 	struct fimc_ctx *ctx = priv;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	int ret = 0;
+
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
 
 	switch (ctrl->id) {
 	case V4L2_CID_HFLIP:
@@ -884,15 +1017,22 @@ int fimc_m2m_g_ctrl(struct file *file, void *priv,
 		ctrl->value = ctx->rotation;
 		break;
 	default:
-		v4l2_err(&ctx->fimc_dev->m2m.v4l2_dev, "Invalid control\n");
-		return -EINVAL;
+		if (ctx->state & FIMC_CTX_CAP) {
+			ret = v4l2_subdev_call(fimc->vid_cap.sd, core,
+				       g_ctrl, ctrl);
+		} else {
+			v4l2_err(&fimc->m2m.v4l2_dev,
+				 "Invalid control\n");
+			ret = -EINVAL;
+		}
 	}
 	dbg("ctrl->value= %d", ctrl->value);
-	return 0;
+
+	mutex_unlock(&fimc->lock);
+	return ret;
 }
 
-static int check_ctrl_val(struct fimc_ctx *ctx,
-			  struct v4l2_control *ctrl)
+int check_ctrl_val(struct fimc_ctx *ctx,  struct v4l2_control *ctrl)
 {
 	struct v4l2_queryctrl *c;
 	c = get_ctrl(ctrl->id);
@@ -909,22 +1049,23 @@ static int check_ctrl_val(struct fimc_ctx *ctx,
 	return 0;
 }
 
-int fimc_m2m_s_ctrl(struct file *file, void *priv,
-			 struct v4l2_control *ctrl)
+int fimc_s_ctrl(struct fimc_ctx *ctx, struct v4l2_control *ctrl)
 {
-	struct fimc_ctx *ctx = priv;
 	struct samsung_fimc_variant *variant = ctx->fimc_dev->variant;
+	struct fimc_dev *fimc = ctx->fimc_dev;
 	unsigned long flags;
-	int ret = 0;
 
-	ret = check_ctrl_val(ctx, ctrl);
-	if (ret)
-		return ret;
+	if (ctx->rotation != 0 &&
+	    (ctrl->id == V4L2_CID_HFLIP || ctrl->id == V4L2_CID_VFLIP)) {
+		v4l2_err(&fimc->m2m.v4l2_dev,
+			 "Simultaneous flip and rotation is not supported\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&ctx->slock, flags);
 
 	switch (ctrl->id) {
 	case V4L2_CID_HFLIP:
-		if (ctx->rotation != 0)
-			return 0;
 		if (ctrl->value)
 			ctx->flip |= FLIP_X_AXIS;
 		else
@@ -932,8 +1073,6 @@ int fimc_m2m_s_ctrl(struct file *file, void *priv,
 		break;
 
 	case V4L2_CID_VFLIP:
-		if (ctx->rotation != 0)
-			return 0;
 		if (ctrl->value)
 			ctx->flip |= FLIP_Y_AXIS;
 		else
@@ -941,77 +1080,95 @@ int fimc_m2m_s_ctrl(struct file *file, void *priv,
 		break;
 
 	case V4L2_CID_ROTATE:
-		if (ctrl->value == 90 || ctrl->value == 270) {
-			if (ctx->out_path == FIMC_LCDFIFO &&
-			    !variant->has_inp_rot) {
-				return -EINVAL;
-			} else if (ctx->in_path == FIMC_DMA &&
-				   !variant->has_out_rot) {
-				return -EINVAL;
-			}
+		/* Check for the output rotator availability */
+		if ((ctrl->value == 90 || ctrl->value == 270) &&
+		    (ctx->in_path == FIMC_DMA && !variant->has_out_rot)) {
+			spin_unlock_irqrestore(&ctx->slock, flags);
+			return -EINVAL;
+		} else {
+			ctx->rotation = ctrl->value;
 		}
-		ctx->rotation = ctrl->value;
-		if (ctrl->value == 180)
-			ctx->flip = FLIP_XY_AXIS;
 		break;
 
 	default:
-		v4l2_err(&ctx->fimc_dev->m2m.v4l2_dev, "Invalid control\n");
+		spin_unlock_irqrestore(&ctx->slock, flags);
+		v4l2_err(&fimc->m2m.v4l2_dev, "Invalid control\n");
 		return -EINVAL;
 	}
-	spin_lock_irqsave(&ctx->slock, flags);
 	ctx->state |= FIMC_PARAMS;
 	spin_unlock_irqrestore(&ctx->slock, flags);
+
 	return 0;
 }
 
+static int fimc_m2m_s_ctrl(struct file *file, void *priv,
+			 struct v4l2_control *ctrl)
+{
+	struct fimc_ctx *ctx = priv;
+	int ret = 0;
+
+	ret = check_ctrl_val(ctx, ctrl);
+	if (ret)
+		return ret;
+
+	ret = fimc_s_ctrl(ctx, ctrl);
+	return 0;
+}
 
-static int fimc_m2m_cropcap(struct file *file, void *fh,
-			     struct v4l2_cropcap *cr)
+int fimc_vidioc_cropcap(struct file *file, void *fh,
+			struct v4l2_cropcap *cr)
 {
 	struct fimc_frame *frame;
 	struct fimc_ctx *ctx = fh;
+	struct fimc_dev *fimc = ctx->fimc_dev;
 
 	frame = ctx_get_frame(ctx, cr->type);
 	if (IS_ERR(frame))
 		return PTR_ERR(frame);
 
-	cr->bounds.left = 0;
-	cr->bounds.top = 0;
-	cr->bounds.width = frame->f_width;
-	cr->bounds.height = frame->f_height;
-	cr->defrect.left = frame->offs_h;
-	cr->defrect.top = frame->offs_v;
-	cr->defrect.width = frame->o_width;
-	cr->defrect.height = frame->o_height;
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	cr->bounds.left		= 0;
+	cr->bounds.top		= 0;
+	cr->bounds.width	= frame->f_width;
+	cr->bounds.height	= frame->f_height;
+	cr->defrect		= cr->bounds;
+
+	mutex_unlock(&fimc->lock);
 	return 0;
 }
 
-static int fimc_m2m_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+int fimc_vidioc_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
 {
 	struct fimc_frame *frame;
 	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
 
 	frame = ctx_get_frame(ctx, cr->type);
 	if (IS_ERR(frame))
 		return PTR_ERR(frame);
 
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
 	cr->c.left = frame->offs_h;
 	cr->c.top = frame->offs_v;
 	cr->c.width = frame->width;
 	cr->c.height = frame->height;
 
+	mutex_unlock(&fimc->lock);
 	return 0;
 }
 
-static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+int fimc_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr)
 {
-	struct fimc_ctx *ctx = file->private_data;
 	struct fimc_dev *fimc = ctx->fimc_dev;
-	unsigned long flags;
 	struct fimc_frame *f;
-	u32 min_size;
-	int ret = 0;
+	u32 min_size, halign;
+
+	f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
+		&ctx->s_frame : &ctx->d_frame;
 
 	if (cr->c.top < 0 || cr->c.left < 0) {
 		v4l2_err(&fimc->m2m.v4l2_dev,
@@ -1019,66 +1176,98 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
 		return -EINVAL;
 	}
 
-	if (cr->c.width  <= 0 || cr->c.height <= 0) {
-		v4l2_err(&fimc->m2m.v4l2_dev,
-			"crop width and height must be greater than 0\n");
-		return -EINVAL;
-	}
-
 	f = ctx_get_frame(ctx, cr->type);
 	if (IS_ERR(f))
 		return PTR_ERR(f);
 
-	/* Adjust to required pixel boundary. */
-	min_size = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
-		fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize;
+	min_size = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
+		? fimc->variant->min_inp_pixsize
+		: fimc->variant->min_out_pixsize;
 
-	cr->c.width = round_down(cr->c.width, min_size);
-	cr->c.height = round_down(cr->c.height, min_size);
-	cr->c.left = round_down(cr->c.left + 1, min_size);
-	cr->c.top = round_down(cr->c.top + 1, min_size);
-
-	if ((cr->c.left + cr->c.width > f->o_width)
-		|| (cr->c.top + cr->c.height > f->o_height)) {
-		v4l2_err(&fimc->m2m.v4l2_dev, "Error in S_CROP params\n");
-		return -EINVAL;
+	if (ctx->state & FIMC_CTX_M2M) {
+		if (fimc->id == 1 && fimc->variant->pix_hoff)
+			halign = fimc_fmt_is_rgb(f->fmt->color) ? 0 : 1;
+		else
+			halign = ffs(min_size) - 1;
+	/* there are more strict aligment requirements at camera interface */
+	} else {
+		min_size = 16;
+		halign = 4;
 	}
 
+	v4l_bound_align_image(&cr->c.width, min_size, f->o_width,
+			      ffs(min_size) - 1,
+			      &cr->c.height, min_size, f->o_height,
+			      halign, 64/(ALIGN(f->fmt->depth, 8)));
+
+	/* adjust left/top if cropping rectangle is out of bounds */
+	if (cr->c.left + cr->c.width > f->o_width)
+		cr->c.left = f->o_width - cr->c.width;
+	if (cr->c.top + cr->c.height > f->o_height)
+		cr->c.top = f->o_height - cr->c.height;
+
+	cr->c.left = round_down(cr->c.left, min_size);
+	cr->c.top  = round_down(cr->c.top,
+				ctx->state & FIMC_CTX_M2M ? 8 : 16);
+
+	dbg("l:%d, t:%d, w:%d, h:%d, f_w: %d, f_h: %d",
+	    cr->c.left, cr->c.top, cr->c.width, cr->c.height,
+	    f->f_width, f->f_height);
+
+	return 0;
+}
+
+
+static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+{
+	struct fimc_ctx *ctx = file->private_data;
+	struct fimc_dev *fimc = ctx->fimc_dev;
+	unsigned long flags;
+	struct fimc_frame *f;
+	int ret;
+
+	ret = fimc_try_crop(ctx, cr);
+	if (ret)
+		return ret;
+
+	f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
+		&ctx->s_frame : &ctx->d_frame;
+
 	spin_lock_irqsave(&ctx->slock, flags);
-	if ((ctx->state & FIMC_SRC_FMT) && (ctx->state & FIMC_DST_FMT)) {
-		/* Check for the pixel scaling ratio when cropping input img. */
+	if (~ctx->state & (FIMC_SRC_FMT | FIMC_DST_FMT)) {
+		/* Check to see if scaling ratio is within supported range */
 		if (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
 			ret = fimc_check_scaler_ratio(&cr->c, &ctx->d_frame);
-		else if (cr->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		else
 			ret = fimc_check_scaler_ratio(&cr->c, &ctx->s_frame);
-
 		if (ret) {
 			spin_unlock_irqrestore(&ctx->slock, flags);
-			v4l2_err(&fimc->m2m.v4l2_dev,  "Out of scaler range");
+			v4l2_err(&fimc->m2m.v4l2_dev, "Out of scaler range");
 			return -EINVAL;
 		}
 	}
 	ctx->state |= FIMC_PARAMS;
-	spin_unlock_irqrestore(&ctx->slock, flags);
 
 	f->offs_h = cr->c.left;
 	f->offs_v = cr->c.top;
-	f->width = cr->c.width;
+	f->width  = cr->c.width;
 	f->height = cr->c.height;
+
+	spin_unlock_irqrestore(&ctx->slock, flags);
 	return 0;
 }
 
 static const struct v4l2_ioctl_ops fimc_m2m_ioctl_ops = {
 	.vidioc_querycap		= fimc_m2m_querycap,
 
-	.vidioc_enum_fmt_vid_cap	= fimc_m2m_enum_fmt,
-	.vidioc_enum_fmt_vid_out	= fimc_m2m_enum_fmt,
+	.vidioc_enum_fmt_vid_cap	= fimc_vidioc_enum_fmt,
+	.vidioc_enum_fmt_vid_out	= fimc_vidioc_enum_fmt,
 
-	.vidioc_g_fmt_vid_cap		= fimc_m2m_g_fmt,
-	.vidioc_g_fmt_vid_out		= fimc_m2m_g_fmt,
+	.vidioc_g_fmt_vid_cap		= fimc_vidioc_g_fmt,
+	.vidioc_g_fmt_vid_out		= fimc_vidioc_g_fmt,
 
-	.vidioc_try_fmt_vid_cap		= fimc_m2m_try_fmt,
-	.vidioc_try_fmt_vid_out		= fimc_m2m_try_fmt,
+	.vidioc_try_fmt_vid_cap		= fimc_vidioc_try_fmt,
+	.vidioc_try_fmt_vid_out		= fimc_vidioc_try_fmt,
 
 	.vidioc_s_fmt_vid_cap		= fimc_m2m_s_fmt,
 	.vidioc_s_fmt_vid_out		= fimc_m2m_s_fmt,
@@ -1092,13 +1281,13 @@ static const struct v4l2_ioctl_ops fimc_m2m_ioctl_ops = {
 	.vidioc_streamon		= fimc_m2m_streamon,
 	.vidioc_streamoff		= fimc_m2m_streamoff,
 
-	.vidioc_queryctrl		= fimc_m2m_queryctrl,
-	.vidioc_g_ctrl			= fimc_m2m_g_ctrl,
+	.vidioc_queryctrl		= fimc_vidioc_queryctrl,
+	.vidioc_g_ctrl			= fimc_vidioc_g_ctrl,
 	.vidioc_s_ctrl			= fimc_m2m_s_ctrl,
 
-	.vidioc_g_crop			= fimc_m2m_g_crop,
+	.vidioc_g_crop			= fimc_vidioc_g_crop,
 	.vidioc_s_crop			= fimc_m2m_s_crop,
-	.vidioc_cropcap			= fimc_m2m_cropcap
+	.vidioc_cropcap			= fimc_vidioc_cropcap
 
 };
 
@@ -1120,11 +1309,23 @@ static int fimc_m2m_open(struct file *file)
 	struct fimc_ctx *ctx = NULL;
 	int err = 0;
 
-	mutex_lock(&fimc->lock);
+	if (mutex_lock_interruptible(&fimc->lock))
+		return -ERESTARTSYS;
+
+	dbg("pid: %d, state: 0x%lx, refcnt: %d",
+		task_pid_nr(current), fimc->state, fimc->vid_cap.refcnt);
+
+	/*
+	 * Return if the corresponding video capture node
+	 * is already opened.
+	 */
+	if (fimc->vid_cap.refcnt > 0) {
+		mutex_unlock(&fimc->lock);
+		return -EBUSY;
+	}
+
 	fimc->m2m.refcnt++;
 	set_bit(ST_OUTDMA_RUN, &fimc->state);
-	mutex_unlock(&fimc->lock);
-
 
 	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
 	if (!ctx)
@@ -1135,8 +1336,8 @@ static int fimc_m2m_open(struct file *file)
 	/* Default color format */
 	ctx->s_frame.fmt = &fimc_formats[0];
 	ctx->d_frame.fmt = &fimc_formats[0];
-	/* per user process device context initialization */
-	ctx->state = 0;
+	/* Setup the device context for mem2mem mode. */
+	ctx->state = FIMC_CTX_M2M;
 	ctx->flags = 0;
 	ctx->in_path = FIMC_DMA;
 	ctx->out_path = FIMC_DMA;
@@ -1147,6 +1348,8 @@ static int fimc_m2m_open(struct file *file)
 		err = PTR_ERR(ctx->m2m_ctx);
 		kfree(ctx);
 	}
+
+	mutex_unlock(&fimc->lock);
 	return err;
 }
 
@@ -1155,11 +1358,16 @@ static int fimc_m2m_release(struct file *file)
 	struct fimc_ctx *ctx = file->private_data;
 	struct fimc_dev *fimc = ctx->fimc_dev;
 
+	mutex_lock(&fimc->lock);
+
+	dbg("pid: %d, state: 0x%lx, refcnt= %d",
+		task_pid_nr(current), fimc->state, fimc->m2m.refcnt);
+
 	v4l2_m2m_ctx_release(ctx->m2m_ctx);
 	kfree(ctx);
-	mutex_lock(&fimc->lock);
 	if (--fimc->m2m.refcnt <= 0)
 		clear_bit(ST_OUTDMA_RUN, &fimc->state);
+
 	mutex_unlock(&fimc->lock);
 	return 0;
 }
@@ -1168,6 +1376,7 @@ static unsigned int fimc_m2m_poll(struct file *file,
 				     struct poll_table_struct *wait)
 {
 	struct fimc_ctx *ctx = file->private_data;
+
 	return v4l2_m2m_poll(file, ctx->m2m_ctx, wait);
 }
 
@@ -1175,6 +1384,7 @@ static unsigned int fimc_m2m_poll(struct file *file,
 static int fimc_m2m_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct fimc_ctx *ctx = file->private_data;
+
 	return v4l2_m2m_mmap(file, ctx->m2m_ctx, vma);
 }
 
@@ -1322,9 +1532,11 @@ static int fimc_probe(struct platform_device *pdev)
 	fimc->id = pdev->id;
 	fimc->variant = drv_data->variant[fimc->id];
 	fimc->pdev = pdev;
+	fimc->pdata = pdev->dev.platform_data;
 	fimc->state = ST_IDLE;
 
 	spin_lock_init(&fimc->irqlock);
+	init_waitqueue_head(&fimc->irq_queue);
 	spin_lock_init(&fimc->slock);
 
 	mutex_init(&fimc->lock);
@@ -1354,6 +1566,7 @@ static int fimc_probe(struct platform_device *pdev)
 	ret = fimc_clk_get(fimc);
 	if (ret)
 		goto err_regs_unmap;
+	clk_set_rate(fimc->clock[0], drv_data->lclk_frequency);
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res) {
@@ -1375,11 +1588,27 @@ static int fimc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_irq;
 
+	/* At least one camera sensor is required to register capture node */
+	if (fimc->pdata) {
+		int i;
+		for (i = 0; i < FIMC_MAX_CAMIF_CLIENTS; ++i)
+			if (fimc->pdata->isp_info[i])
+				break;
+
+		if (i < FIMC_MAX_CAMIF_CLIENTS) {
+			ret = fimc_register_capture_device(fimc);
+			if (ret)
+				goto err_m2m;
+		}
+	}
+
 	dev_dbg(&pdev->dev, "%s(): fimc-%d registered successfully\n",
 		__func__, fimc->id);
 
 	return 0;
 
+err_m2m:
+	fimc_unregister_m2m_device(fimc);
 err_irq:
 	free_irq(fimc->irq, fimc);
 err_clk:
@@ -1404,6 +1633,8 @@ static int __devexit fimc_remove(struct platform_device *pdev)
 	fimc_hw_reset(fimc);
 
 	fimc_unregister_m2m_device(fimc);
+	fimc_unregister_capture_device(fimc);
+
 	fimc_clk_release(fimc);
 	iounmap(fimc->regs);
 	release_resource(fimc->regs_res);
@@ -1474,7 +1705,8 @@ static struct samsung_fimc_driverdata fimc_drvdata_s5p = {
 		[1] = &fimc01_variant_s5p,
 		[2] = &fimc2_variant_s5p,
 	},
-	.devs_cnt = 3
+	.devs_cnt	= 3,
+	.lclk_frequency	= 133000000UL,
 };
 
 static struct samsung_fimc_driverdata fimc_drvdata_s5pv210 = {
@@ -1483,7 +1715,8 @@ static struct samsung_fimc_driverdata fimc_drvdata_s5pv210 = {
 		[1] = &fimc01_variant_s5pv210,
 		[2] = &fimc2_variant_s5pv210,
 	},
-	.devs_cnt = 3
+	.devs_cnt = 3,
+	.lclk_frequency	= 166000000UL,
 };
 
 static struct platform_device_id fimc_driver_ids[] = {
@@ -1524,6 +1757,6 @@ static void __exit fimc_exit(void)
 module_init(fimc_init);
 module_exit(fimc_exit);
 
-MODULE_AUTHOR("Sylwester Nawrocki, s.nawrocki@samsung.com");
-MODULE_DESCRIPTION("S3C/S5P FIMC (video postprocessor) driver");
+MODULE_AUTHOR("Sylwester Nawrocki <s.nawrocki@samsung.com>");
+MODULE_DESCRIPTION("S5P FIMC camera host interface/video postprocessor driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/video/s5p-fimc/fimc-core.h b/drivers/media/video/s5p-fimc/fimc-core.h
index 5aeb3efbd457..ce0a6b8a7d54 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.h
+++ b/drivers/media/video/s5p-fimc/fimc-core.h
@@ -11,10 +11,14 @@
 #ifndef FIMC_CORE_H_
 #define FIMC_CORE_H_
 
+/*#define DEBUG*/
+
 #include <linux/types.h>
 #include <media/videobuf-core.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-mem2mem.h>
+#include <media/v4l2-mediabus.h>
+#include <media/s3c_fimc.h>
 #include <linux/videodev2.h>
 #include "regs-fimc.h"
 
@@ -28,6 +32,8 @@
 #define dbg(fmt, args...)
 #endif
 
+/* Time to wait for next frame VSYNC interrupt while stopping operation. */
+#define FIMC_SHUTDOWN_TIMEOUT	((100*HZ)/1000)
 #define NUM_FIMC_CLOCKS		2
 #define MODULE_NAME		"s5p-fimc"
 #define FIMC_MAX_DEVS		3
@@ -36,19 +42,41 @@
 #define SCALER_MAX_VRATIO	64
 #define DMA_MIN_SIZE		8
 
-enum {
+/* FIMC device state flags */
+enum fimc_dev_flags {
+	/* for m2m node */
 	ST_IDLE,
 	ST_OUTDMA_RUN,
 	ST_M2M_PEND,
+	/* for capture node */
+	ST_CAPT_PEND,
+	ST_CAPT_RUN,
+	ST_CAPT_STREAM,
+	ST_CAPT_SHUT,
 };
 
 #define fimc_m2m_active(dev) test_bit(ST_OUTDMA_RUN, &(dev)->state)
 #define fimc_m2m_pending(dev) test_bit(ST_M2M_PEND, &(dev)->state)
 
+#define fimc_capture_running(dev) test_bit(ST_CAPT_RUN, &(dev)->state)
+#define fimc_capture_pending(dev) test_bit(ST_CAPT_PEND, &(dev)->state)
+
+#define fimc_capture_active(dev) \
+	(test_bit(ST_CAPT_RUN, &(dev)->state) || \
+	 test_bit(ST_CAPT_PEND, &(dev)->state))
+
+#define fimc_capture_streaming(dev) \
+	test_bit(ST_CAPT_STREAM, &(dev)->state)
+
+#define fimc_buf_finish(dev, vid_buf) do { \
+	spin_lock(&(dev)->irqlock); \
+	(vid_buf)->vb.state = VIDEOBUF_DONE; \
+	spin_unlock(&(dev)->irqlock); \
+	wake_up(&(vid_buf)->vb.done); \
+} while (0)
+
 enum fimc_datapath {
-	FIMC_ITU_CAM_A,
-	FIMC_ITU_CAM_B,
-	FIMC_MIPI_CAM,
+	FIMC_CAMERA,
 	FIMC_DMA,
 	FIMC_LCDFIFO,
 	FIMC_WRITEBACK
@@ -123,20 +151,25 @@ enum fimc_color_fmt {
 
 /**
  * struct fimc_fmt - the driver's internal color format data
+ * @mbus_code: Media Bus pixel code, -1 if not applicable
  * @name: format description
- * @fourcc: the fourcc code for this format
+ * @fourcc: the fourcc code for this format, 0 if not applicable
  * @color: the corresponding fimc_color_fmt
- * @depth: number of bits per pixel
+ * @depth: driver's private 'number of bits per pixel'
  * @buff_cnt: number of physically non-contiguous data planes
  * @planes_cnt: number of physically contiguous data planes
  */
 struct fimc_fmt {
+	enum v4l2_mbus_pixelcode mbus_code;
 	char	*name;
 	u32	fourcc;
 	u32	color;
-	u32	depth;
 	u16	buff_cnt;
 	u16	planes_cnt;
+	u16	depth;
+	u16	flags;
+#define FMT_FLAGS_CAM	(1 << 0)
+#define FMT_FLAGS_M2M	(1 << 1)
 };
 
 /**
@@ -220,10 +253,14 @@ struct fimc_addr {
 
 /**
  * struct fimc_vid_buffer - the driver's video buffer
- * @vb:	v4l videobuf buffer
+ * @vb:    v4l videobuf buffer
+ * @paddr: precalculated physical address set
+ * @index: buffer index for the output DMA engine
  */
 struct fimc_vid_buffer {
 	struct videobuf_buffer	vb;
+	struct fimc_addr	paddr;
+	int			index;
 };
 
 /**
@@ -273,6 +310,40 @@ struct fimc_m2m_device {
 	int			refcnt;
 };
 
+/**
+ * struct fimc_vid_cap - camera capture device information
+ * @ctx: hardware context data
+ * @vfd: video device node for camera capture mode
+ * @v4l2_dev: v4l2_device struct to manage subdevs
+ * @sd: pointer to camera sensor subdevice currently in use
+ * @fmt: Media Bus format configured at selected image sensor
+ * @pending_buf_q: the pending buffer queue head
+ * @active_buf_q: the queue head of buffers scheduled in hardware
+ * @vbq: the capture am video buffer queue
+ * @active_buf_cnt: number of video buffers scheduled in hardware
+ * @buf_index: index for managing the output DMA buffers
+ * @frame_count: the frame counter for statistics
+ * @reqbufs_count: the number of buffers requested in REQBUFS ioctl
+ * @input_index: input (camera sensor) index
+ * @refcnt: driver's private reference counter
+ */
+struct fimc_vid_cap {
+	struct fimc_ctx			*ctx;
+	struct video_device		*vfd;
+	struct v4l2_device		v4l2_dev;
+	struct v4l2_subdev		*sd;
+	struct v4l2_mbus_framefmt	fmt;
+	struct list_head		pending_buf_q;
+	struct list_head		active_buf_q;
+	struct videobuf_queue		vbq;
+	int				active_buf_cnt;
+	int				buf_index;
+	unsigned int			frame_count;
+	unsigned int			reqbufs_count;
+	int				input_index;
+	int				refcnt;
+};
+
 /**
  * struct samsung_fimc_variant - camera interface variant information
  *
@@ -308,10 +379,12 @@ struct samsung_fimc_variant {
  *
  * @variant: the variant information for this driver.
  * @dev_cnt: number of fimc sub-devices available in SoC
+ * @lclk_frequency: fimc bus clock frequency
  */
 struct samsung_fimc_driverdata {
 	struct samsung_fimc_variant *variant[FIMC_MAX_DEVS];
-	int	devs_cnt;
+	unsigned long	lclk_frequency;
+	int		devs_cnt;
 };
 
 struct fimc_ctx;
@@ -322,19 +395,23 @@ struct fimc_ctx;
  * @slock:	the spinlock protecting this data structure
  * @lock:	the mutex protecting this data structure
  * @pdev:	pointer to the FIMC platform device
+ * @pdata:	pointer to the device platform data
  * @id:		FIMC device index (0..2)
  * @clock[]:	the clocks required for FIMC operation
  * @regs:	the mapped hardware registers
  * @regs_res:	the resource claimed for IO registers
  * @irq:	interrupt number of the FIMC subdevice
  * @irqlock:	spinlock protecting videobuffer queue
+ * @irq_queue:
  * @m2m:	memory-to-memory V4L2 device information
- * @state:	the FIMC device state flags
+ * @vid_cap:	camera capture device information
+ * @state:	flags used to synchronize m2m and capture mode operation
  */
 struct fimc_dev {
 	spinlock_t			slock;
 	struct mutex			lock;
 	struct platform_device		*pdev;
+	struct s3c_platform_fimc	*pdata;
 	struct samsung_fimc_variant	*variant;
 	int				id;
 	struct clk			*clock[NUM_FIMC_CLOCKS];
@@ -342,7 +419,9 @@ struct fimc_dev {
 	struct resource			*regs_res;
 	int				irq;
 	spinlock_t			irqlock;
+	wait_queue_head_t		irq_queue;
 	struct fimc_m2m_device		m2m;
+	struct fimc_vid_cap		vid_cap;
 	unsigned long			state;
 };
 
@@ -387,6 +466,7 @@ struct fimc_ctx {
 	struct v4l2_m2m_ctx	*m2m_ctx;
 };
 
+extern struct videobuf_queue_ops fimc_qops;
 
 static inline int tiled_fmt(struct fimc_fmt *fmt)
 {
@@ -433,7 +513,10 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
 	struct fimc_frame *frame;
 
 	if (V4L2_BUF_TYPE_VIDEO_OUTPUT == type) {
-		frame = &ctx->s_frame;
+		if (ctx->state & FIMC_CTX_M2M)
+			frame = &ctx->s_frame;
+		else
+			return ERR_PTR(-EINVAL);
 	} else if (V4L2_BUF_TYPE_VIDEO_CAPTURE == type) {
 		frame = &ctx->d_frame;
 	} else {
@@ -445,6 +528,13 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
 	return frame;
 }
 
+static inline u32 fimc_hw_get_frame_index(struct fimc_dev *dev)
+{
+	u32 reg = readl(dev->regs + S5P_CISTATUS);
+	return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
+		S5P_CISTATUS_FRAMECNT_SHIFT;
+}
+
 /* -----------------------------------------------------*/
 /* fimc-reg.c						*/
 void fimc_hw_reset(struct fimc_dev *fimc);
@@ -462,6 +552,52 @@ void fimc_hw_set_output_path(struct fimc_ctx *ctx);
 void fimc_hw_set_input_addr(struct fimc_dev *fimc, struct fimc_addr *paddr);
 void fimc_hw_set_output_addr(struct fimc_dev *fimc, struct fimc_addr *paddr,
 			      int index);
+int fimc_hw_set_camera_source(struct fimc_dev *fimc,
+			      struct s3c_fimc_isp_info *cam);
+int fimc_hw_set_camera_offset(struct fimc_dev *fimc, struct fimc_frame *f);
+int fimc_hw_set_camera_polarity(struct fimc_dev *fimc,
+				struct s3c_fimc_isp_info *cam);
+int fimc_hw_set_camera_type(struct fimc_dev *fimc,
+			    struct s3c_fimc_isp_info *cam);
+
+/* -----------------------------------------------------*/
+/* fimc-core.c */
+int fimc_vidioc_enum_fmt(struct file *file, void *priv,
+		      struct v4l2_fmtdesc *f);
+int fimc_vidioc_g_fmt(struct file *file, void *priv,
+		      struct v4l2_format *f);
+int fimc_vidioc_try_fmt(struct file *file, void *priv,
+			struct v4l2_format *f);
+int fimc_vidioc_g_crop(struct file *file, void *fh,
+		       struct v4l2_crop *cr);
+int fimc_vidioc_cropcap(struct file *file, void *fh,
+			struct v4l2_cropcap *cr);
+int fimc_vidioc_queryctrl(struct file *file, void *priv,
+			  struct v4l2_queryctrl *qc);
+int fimc_vidioc_g_ctrl(struct file *file, void *priv,
+		       struct v4l2_control *ctrl);
+
+int fimc_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr);
+int check_ctrl_val(struct fimc_ctx *ctx,  struct v4l2_control *ctrl);
+int fimc_s_ctrl(struct fimc_ctx *ctx, struct v4l2_control *ctrl);
+
+struct fimc_fmt *find_format(struct v4l2_format *f, unsigned int mask);
+struct fimc_fmt *find_mbus_format(struct v4l2_mbus_framefmt *f,
+				  unsigned int mask);
+
+int fimc_check_scaler_ratio(struct v4l2_rect *r, struct fimc_frame *f);
+int fimc_set_scaler_info(struct fimc_ctx *ctx);
+int fimc_prepare_config(struct fimc_ctx *ctx, u32 flags);
+int fimc_prepare_addr(struct fimc_ctx *ctx, struct fimc_vid_buffer *buf,
+		      struct fimc_frame *frame, struct fimc_addr *paddr);
+
+/* -----------------------------------------------------*/
+/* fimc-capture.c					*/
+int fimc_register_capture_device(struct fimc_dev *fimc);
+void fimc_unregister_capture_device(struct fimc_dev *fimc);
+int fimc_sensor_sd_init(struct fimc_dev *fimc, int index);
+int fimc_vid_cap_buf_queue(struct fimc_dev *fimc,
+			     struct fimc_vid_buffer *fimc_vb);
 
 /* Locking: the caller holds fimc->slock */
 static inline void fimc_activate_capture(struct fimc_ctx *ctx)
@@ -478,4 +614,51 @@ static inline void fimc_deactivate_capture(struct fimc_dev *fimc)
 	fimc_hw_en_lastirq(fimc, false);
 }
 
+/*
+ * Add video buffer to the active buffers queue.
+ * The caller holds irqlock spinlock.
+ */
+static inline void active_queue_add(struct fimc_vid_cap *vid_cap,
+					 struct fimc_vid_buffer *buf)
+{
+	buf->vb.state = VIDEOBUF_ACTIVE;
+	list_add_tail(&buf->vb.queue, &vid_cap->active_buf_q);
+	vid_cap->active_buf_cnt++;
+}
+
+/*
+ * Pop a video buffer from the capture active buffers queue
+ * Locking: Need to be called with dev->slock held.
+ */
+static inline struct fimc_vid_buffer *
+active_queue_pop(struct fimc_vid_cap *vid_cap)
+{
+	struct fimc_vid_buffer *buf;
+	buf = list_entry(vid_cap->active_buf_q.next,
+			 struct fimc_vid_buffer, vb.queue);
+	list_del(&buf->vb.queue);
+	vid_cap->active_buf_cnt--;
+	return buf;
+}
+
+/* Add video buffer to the capture pending buffers queue */
+static inline void fimc_pending_queue_add(struct fimc_vid_cap *vid_cap,
+					  struct fimc_vid_buffer *buf)
+{
+	buf->vb.state = VIDEOBUF_QUEUED;
+	list_add_tail(&buf->vb.queue, &vid_cap->pending_buf_q);
+}
+
+/* Add video buffer to the capture pending buffers queue */
+static inline struct fimc_vid_buffer *
+pending_queue_pop(struct fimc_vid_cap *vid_cap)
+{
+	struct fimc_vid_buffer *buf;
+	buf = list_entry(vid_cap->pending_buf_q.next,
+			struct fimc_vid_buffer, vb.queue);
+	list_del(&buf->vb.queue);
+	return buf;
+}
+
+
 #endif /* FIMC_CORE_H_ */
diff --git a/drivers/media/video/s5p-fimc/fimc-reg.c b/drivers/media/video/s5p-fimc/fimc-reg.c
index 95adc8464693..511631a2e5c3 100644
--- a/drivers/media/video/s5p-fimc/fimc-reg.c
+++ b/drivers/media/video/s5p-fimc/fimc-reg.c
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <mach/map.h>
+#include <media/s3c_fimc.h>
 
 #include "fimc-core.h"
 
@@ -176,6 +177,15 @@ static void fimc_hw_set_out_dma_size(struct fimc_ctx *ctx)
 	cfg = S5P_ORIG_SIZE_HOR(frame->f_width);
 	cfg |= S5P_ORIG_SIZE_VER(frame->f_height);
 	writel(cfg, dev->regs + S5P_ORGOSIZE);
+
+	/* Select color space conversion equation (HD/SD size).*/
+	cfg = readl(dev->regs + S5P_CIGCTRL);
+	if (frame->f_width >= 1280) /* HD */
+		cfg |= S5P_CIGCTRL_CSC_ITU601_709;
+	else	/* SD */
+		cfg &= ~S5P_CIGCTRL_CSC_ITU601_709;
+	writel(cfg, dev->regs + S5P_CIGCTRL);
+
 }
 
 void fimc_hw_set_out_dma(struct fimc_ctx *ctx)
@@ -231,19 +241,12 @@ static void fimc_hw_en_autoload(struct fimc_dev *dev, int enable)
 
 void fimc_hw_en_lastirq(struct fimc_dev *dev, int enable)
 {
-	unsigned long flags;
-	u32 cfg;
-
-	spin_lock_irqsave(&dev->slock, flags);
-
-	cfg = readl(dev->regs + S5P_CIOCTRL);
+	u32 cfg = readl(dev->regs + S5P_CIOCTRL);
 	if (enable)
 		cfg |= S5P_CIOCTRL_LASTIRQ_ENABLE;
 	else
 		cfg &= ~S5P_CIOCTRL_LASTIRQ_ENABLE;
 	writel(cfg, dev->regs + S5P_CIOCTRL);
-
-	spin_unlock_irqrestore(&dev->slock, flags);
 }
 
 static void fimc_hw_set_prescaler(struct fimc_ctx *ctx)
@@ -325,14 +328,18 @@ void fimc_hw_set_scaler(struct fimc_ctx *ctx)
 void fimc_hw_en_capture(struct fimc_ctx *ctx)
 {
 	struct fimc_dev *dev = ctx->fimc_dev;
-	u32 cfg;
 
-	cfg = readl(dev->regs + S5P_CIIMGCPT);
-	/* One shot mode for output DMA or freerun for FIFO. */
-	if (ctx->out_path == FIMC_DMA)
-		cfg |= S5P_CIIMGCPT_CPT_FREN_ENABLE;
-	else
-		cfg &= ~S5P_CIIMGCPT_CPT_FREN_ENABLE;
+	u32 cfg = readl(dev->regs + S5P_CIIMGCPT);
+
+	if (ctx->out_path == FIMC_DMA) {
+		/* one shot mode */
+		cfg |= S5P_CIIMGCPT_CPT_FREN_ENABLE | S5P_CIIMGCPT_IMGCPTEN;
+	} else {
+		/* Continous frame capture mode (freerun). */
+		cfg &= ~(S5P_CIIMGCPT_CPT_FREN_ENABLE |
+			 S5P_CIIMGCPT_CPT_FRMOD_CNT);
+		cfg |= S5P_CIIMGCPT_IMGCPTEN;
+	}
 
 	if (ctx->scaler.enabled)
 		cfg |= S5P_CIIMGCPT_IMGCPTEN_SC;
@@ -523,3 +530,139 @@ void fimc_hw_set_output_addr(struct fimc_dev *dev,
 		    i, paddr->y, paddr->cb, paddr->cr);
 	} while (index == -1 && ++i < FIMC_MAX_OUT_BUFS);
 }
+
+int fimc_hw_set_camera_polarity(struct fimc_dev *fimc,
+				struct s3c_fimc_isp_info *cam)
+{
+	u32 cfg = readl(fimc->regs + S5P_CIGCTRL);
+
+	cfg &= ~(S5P_CIGCTRL_INVPOLPCLK | S5P_CIGCTRL_INVPOLVSYNC |
+		 S5P_CIGCTRL_INVPOLHREF | S5P_CIGCTRL_INVPOLHSYNC);
+
+	if (cam->flags & FIMC_CLK_INV_PCLK)
+		cfg |= S5P_CIGCTRL_INVPOLPCLK;
+
+	if (cam->flags & FIMC_CLK_INV_VSYNC)
+		cfg |= S5P_CIGCTRL_INVPOLVSYNC;
+
+	if (cam->flags & FIMC_CLK_INV_HREF)
+		cfg |= S5P_CIGCTRL_INVPOLHREF;
+
+	if (cam->flags & FIMC_CLK_INV_HSYNC)
+		cfg |= S5P_CIGCTRL_INVPOLHSYNC;
+
+	writel(cfg, fimc->regs + S5P_CIGCTRL);
+
+	return 0;
+}
+
+int fimc_hw_set_camera_source(struct fimc_dev *fimc,
+			      struct s3c_fimc_isp_info *cam)
+{
+	struct fimc_frame *f = &fimc->vid_cap.ctx->s_frame;
+	u32 cfg = 0;
+
+	if (cam->bus_type == FIMC_ITU_601 || cam->bus_type == FIMC_ITU_656) {
+
+		switch (fimc->vid_cap.fmt.code) {
+		case V4L2_MBUS_FMT_YUYV8_2X8:
+			cfg = S5P_CISRCFMT_ORDER422_YCBYCR;
+			break;
+		case V4L2_MBUS_FMT_YVYU8_2X8:
+			cfg = S5P_CISRCFMT_ORDER422_YCRYCB;
+			break;
+		case V4L2_MBUS_FMT_VYUY8_2X8:
+			cfg = S5P_CISRCFMT_ORDER422_CRYCBY;
+			break;
+		case V4L2_MBUS_FMT_UYVY8_2X8:
+			cfg = S5P_CISRCFMT_ORDER422_CBYCRY;
+			break;
+		default:
+			err("camera image format not supported: %d",
+			    fimc->vid_cap.fmt.code);
+			return -EINVAL;
+		}
+
+		if (cam->bus_type == FIMC_ITU_601) {
+			if (cam->bus_width == 8) {
+				cfg |= S5P_CISRCFMT_ITU601_8BIT;
+			} else if (cam->bus_width == 16) {
+				cfg |= S5P_CISRCFMT_ITU601_16BIT;
+			} else {
+				err("invalid bus width: %d", cam->bus_width);
+				return -EINVAL;
+			}
+		} /* else defaults to ITU-R BT.656 8-bit */
+	}
+
+	cfg |= S5P_CISRCFMT_HSIZE(f->o_width) | S5P_CISRCFMT_VSIZE(f->o_height);
+	writel(cfg, fimc->regs + S5P_CISRCFMT);
+	return 0;
+}
+
+
+int fimc_hw_set_camera_offset(struct fimc_dev *fimc, struct fimc_frame *f)
+{
+	u32 hoff2, voff2;
+
+	u32 cfg = readl(fimc->regs + S5P_CIWDOFST);
+
+	cfg &= ~(S5P_CIWDOFST_HOROFF_MASK | S5P_CIWDOFST_VEROFF_MASK);
+	cfg |=  S5P_CIWDOFST_OFF_EN |
+		S5P_CIWDOFST_HOROFF(f->offs_h) |
+		S5P_CIWDOFST_VEROFF(f->offs_v);
+
+	writel(cfg, fimc->regs + S5P_CIWDOFST);
+
+	/* See CIWDOFSTn register description in the datasheet for details. */
+	hoff2 = f->o_width - f->width - f->offs_h;
+	voff2 = f->o_height - f->height - f->offs_v;
+	cfg = S5P_CIWDOFST2_HOROFF(hoff2) | S5P_CIWDOFST2_VEROFF(voff2);
+
+	writel(cfg, fimc->regs + S5P_CIWDOFST2);
+	return 0;
+}
+
+int fimc_hw_set_camera_type(struct fimc_dev *fimc,
+			    struct s3c_fimc_isp_info *cam)
+{
+	u32 cfg, tmp;
+	struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
+
+	cfg = readl(fimc->regs + S5P_CIGCTRL);
+
+	/* Select ITU B interface, disable Writeback path and test pattern. */
+	cfg &= ~(S5P_CIGCTRL_TESTPAT_MASK | S5P_CIGCTRL_SELCAM_ITU_A |
+		S5P_CIGCTRL_SELCAM_MIPI | S5P_CIGCTRL_CAMIF_SELWB |
+		S5P_CIGCTRL_SELCAM_MIPI_A);
+
+	if (cam->bus_type == FIMC_MIPI_CSI2) {
+		cfg |= S5P_CIGCTRL_SELCAM_MIPI;
+
+		if (cam->mux_id == 0)
+			cfg |= S5P_CIGCTRL_SELCAM_MIPI_A;
+
+		/* TODO: add remaining supported formats. */
+		if (vid_cap->fmt.code == V4L2_MBUS_FMT_VYUY8_2X8) {
+			tmp = S5P_CSIIMGFMT_YCBCR422_8BIT;
+		} else {
+			err("camera image format not supported: %d",
+			    vid_cap->fmt.code);
+			return -EINVAL;
+		}
+		writel(tmp | (0x1 << 8), fimc->regs + S5P_CSIIMGFMT);
+
+	} else if (cam->bus_type == FIMC_ITU_601 ||
+		  cam->bus_type == FIMC_ITU_656) {
+		if (cam->mux_id == 0) /* ITU-A, ITU-B: 0, 1 */
+			cfg |= S5P_CIGCTRL_SELCAM_ITU_A;
+	} else if (cam->bus_type == FIMC_LCD_WB) {
+		cfg |= S5P_CIGCTRL_CAMIF_SELWB;
+	} else {
+		err("invalid camera bus type selected\n");
+		return -EINVAL;
+	}
+	writel(cfg, fimc->regs + S5P_CIGCTRL);
+
+	return 0;
+}
diff --git a/include/media/s3c_fimc.h b/include/media/s3c_fimc.h
new file mode 100644
index 000000000000..ca1b6738e4a4
--- /dev/null
+++ b/include/media/s3c_fimc.h
@@ -0,0 +1,60 @@
+/*
+ * Samsung S5P SoC camera interface driver header
+ *
+ * Copyright (c) 2010 Samsung Electronics Co., Ltd
+ * Author: Sylwester Nawrocki, <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef S3C_FIMC_H_
+#define S3C_FIMC_H_
+
+enum cam_bus_type {
+	FIMC_ITU_601 = 1,
+	FIMC_ITU_656,
+	FIMC_MIPI_CSI2,
+	FIMC_LCD_WB, /* FIFO link from LCD mixer */
+};
+
+#define FIMC_CLK_INV_PCLK	(1 << 0)
+#define FIMC_CLK_INV_VSYNC	(1 << 1)
+#define FIMC_CLK_INV_HREF	(1 << 2)
+#define FIMC_CLK_INV_HSYNC	(1 << 3)
+
+struct i2c_board_info;
+
+/**
+ * struct s3c_fimc_isp_info - image sensor information required for host
+ *			      interace configuration.
+ *
+ * @board_info: pointer to I2C subdevice's board info
+ * @bus_type: determines bus type, MIPI, ITU-R BT.601 etc.
+ * @i2c_bus_num: i2c control bus id the sensor is attached to
+ * @mux_id: FIMC camera interface multiplexer index (separate for MIPI and ITU)
+ * @bus_width: camera data bus width in bits
+ * @flags: flags defining bus signals polarity inversion (High by default)
+ */
+struct s3c_fimc_isp_info {
+	struct i2c_board_info *board_info;
+	enum cam_bus_type bus_type;
+	u16 i2c_bus_num;
+	u16 mux_id;
+	u16 bus_width;
+	u16 flags;
+};
+
+
+#define FIMC_MAX_CAMIF_CLIENTS	2
+
+/**
+ * struct s3c_platform_fimc - camera host interface platform data
+ *
+ * @isp_info: properties of camera sensor required for host interface setup
+ */
+struct s3c_platform_fimc {
+	struct s3c_fimc_isp_info *isp_info[FIMC_MAX_CAMIF_CLIENTS];
+};
+#endif /* S3C_FIMC_H_ */
-- 
cgit v1.2.3


From 3e9c2b8477059c3ae39ed9373fddd23e4c754acc Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 11 Oct 2010 13:33:57 -0300
Subject: [media] Add driver for Siliconfile SR030PC30 VGA camera

Add an I2C/v4l2-subdev driver for Siliconfile SR030PC30 VGA
camera sensor with Image Signal Processor. SR030PC30 is
the low resolution camera sensor on Samsung Aquila boards.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   6 +
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/sr030pc30.c | 893 ++++++++++++++++++++++++++++++++++++++++
 include/media/sr030pc30.h       |  21 +
 4 files changed, 921 insertions(+)
 create mode 100644 drivers/media/video/sr030pc30.c
 create mode 100644 include/media/sr030pc30.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 8db8c6fd6fb1..2d4226710ed5 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -706,6 +706,12 @@ config VIDEO_CAFE_CCIC
 	  CMOS camera controller.  This is the controller found on first-
 	  generation OLPC systems.
 
+config VIDEO_SR030PC30
+	tristate "SR030PC30 VGA camera sensor support"
+	depends on I2C && VIDEO_V4L2
+	---help---
+	  This driver supports SR030PC30 VGA camera from Siliconfile
+
 config SOC_CAMERA
 	tristate "SoC camera support"
 	depends on VIDEO_V4L2 && HAS_DMA && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 4f0952a758da..d5e49ddbf9dd 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_VIDEO_OV7670) 	+= ov7670.o
 obj-$(CONFIG_VIDEO_TCM825X) += tcm825x.o
 obj-$(CONFIG_VIDEO_TVEEPROM) += tveeprom.o
 obj-$(CONFIG_VIDEO_MT9V011) += mt9v011.o
+obj-$(CONFIG_VIDEO_SR030PC30)	+= sr030pc30.o
 
 obj-$(CONFIG_SOC_CAMERA_IMX074)		+= imx074.o
 obj-$(CONFIG_SOC_CAMERA_MT9M001)	+= mt9m001.o
diff --git a/drivers/media/video/sr030pc30.c b/drivers/media/video/sr030pc30.c
new file mode 100644
index 000000000000..f82e1f30988b
--- /dev/null
+++ b/drivers/media/video/sr030pc30.c
@@ -0,0 +1,893 @@
+/*
+ * Driver for SiliconFile SR030PC30 VGA (1/10-Inch) Image Sensor with ISP
+ *
+ * Copyright (C) 2010 Samsung Electronics Co., Ltd
+ * Author: Sylwester Nawrocki, s.nawrocki@samsung.com
+ *
+ * Based on original driver authored by Dongsoo Nathaniel Kim
+ * and HeungJun Kim <riverful.kim@samsung.com>.
+ *
+ * Based on mt9v011 Micron Digital Image Sensor driver
+ * Copyright (c) 2009 Mauro Carvalho Chehab (mchehab@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-subdev.h>
+#include <media/v4l2-mediabus.h>
+#include <media/sr030pc30.h>
+
+static int debug;
+module_param(debug, int, 0644);
+
+#define MODULE_NAME	"SR030PC30"
+
+/*
+ * Register offsets within a page
+ * b15..b8 - page id, b7..b0 - register address
+ */
+#define POWER_CTRL_REG		0x0001
+#define PAGEMODE_REG		0x03
+#define DEVICE_ID_REG		0x0004
+#define NOON010PC30_ID		0x86
+#define SR030PC30_ID		0x8C
+#define VDO_CTL1_REG		0x0010
+#define SUBSAMPL_NONE_VGA	0
+#define SUBSAMPL_QVGA		0x10
+#define SUBSAMPL_QQVGA		0x20
+#define VDO_CTL2_REG		0x0011
+#define SYNC_CTL_REG		0x0012
+#define WIN_ROWH_REG		0x0020
+#define WIN_ROWL_REG		0x0021
+#define WIN_COLH_REG		0x0022
+#define WIN_COLL_REG		0x0023
+#define WIN_HEIGHTH_REG		0x0024
+#define WIN_HEIGHTL_REG		0x0025
+#define WIN_WIDTHH_REG		0x0026
+#define WIN_WIDTHL_REG		0x0027
+#define HBLANKH_REG		0x0040
+#define HBLANKL_REG		0x0041
+#define VSYNCH_REG		0x0042
+#define VSYNCL_REG		0x0043
+/* page 10 */
+#define ISP_CTL_REG(n)		(0x1010 + (n))
+#define YOFS_REG		0x1040
+#define DARK_YOFS_REG		0x1041
+#define AG_ABRTH_REG		0x1050
+#define SAT_CTL_REG		0x1060
+#define BSAT_REG		0x1061
+#define RSAT_REG		0x1062
+#define AG_SAT_TH_REG		0x1063
+/* page 11 */
+#define ZLPF_CTRL_REG		0x1110
+#define ZLPF_CTRL2_REG		0x1112
+#define ZLPF_AGH_THR_REG	0x1121
+#define ZLPF_THR_REG		0x1160
+#define ZLPF_DYN_THR_REG	0x1160
+/* page 12 */
+#define YCLPF_CTL1_REG		0x1240
+#define YCLPF_CTL2_REG		0x1241
+#define YCLPF_THR_REG		0x1250
+#define BLPF_CTL_REG		0x1270
+#define BLPF_THR1_REG		0x1274
+#define BLPF_THR2_REG		0x1275
+/* page 14 - Lens Shading Compensation */
+#define LENS_CTRL_REG		0x1410
+#define LENS_XCEN_REG		0x1420
+#define LENS_YCEN_REG		0x1421
+#define LENS_R_COMP_REG		0x1422
+#define LENS_G_COMP_REG		0x1423
+#define LENS_B_COMP_REG		0x1424
+/* page 15 - Color correction */
+#define CMC_CTL_REG		0x1510
+#define CMC_OFSGH_REG		0x1514
+#define CMC_OFSGL_REG		0x1516
+#define CMC_SIGN_REG		0x1517
+/* Color correction coefficients */
+#define CMC_COEF_REG(n)		(0x1530 + (n))
+/* Color correction offset coefficients */
+#define CMC_OFS_REG(n)		(0x1540 + (n))
+/* page 16 - Gamma correction */
+#define GMA_CTL_REG		0x1610
+/* Gamma correction coefficients 0.14 */
+#define GMA_COEF_REG(n)		(0x1630 + (n))
+/* page 20 - Auto Exposure */
+#define AE_CTL1_REG		0x2010
+#define AE_CTL2_REG		0x2011
+#define AE_FRM_CTL_REG		0x2020
+#define AE_FINE_CTL_REG(n)	(0x2028 + (n))
+#define EXP_TIMEH_REG		0x2083
+#define EXP_TIMEM_REG		0x2084
+#define EXP_TIMEL_REG		0x2085
+#define EXP_MMINH_REG		0x2086
+#define EXP_MMINL_REG		0x2087
+#define EXP_MMAXH_REG		0x2088
+#define EXP_MMAXM_REG		0x2089
+#define EXP_MMAXL_REG		0x208A
+/* page 22 - Auto White Balance */
+#define AWB_CTL1_REG		0x2210
+#define AWB_ENABLE		0x80
+#define AWB_CTL2_REG		0x2211
+#define MWB_ENABLE		0x01
+/* RGB gain control (manual WB) when AWB_CTL1[7]=0 */
+#define AWB_RGAIN_REG		0x2280
+#define AWB_GGAIN_REG		0x2281
+#define AWB_BGAIN_REG		0x2282
+#define AWB_RMAX_REG		0x2283
+#define AWB_RMIN_REG		0x2284
+#define AWB_BMAX_REG		0x2285
+#define AWB_BMIN_REG		0x2286
+/* R, B gain range in bright light conditions */
+#define AWB_RMAXB_REG		0x2287
+#define AWB_RMINB_REG		0x2288
+#define AWB_BMAXB_REG		0x2289
+#define AWB_BMINB_REG		0x228A
+/* manual white balance, when AWB_CTL2[0]=1 */
+#define MWB_RGAIN_REG		0x22B2
+#define MWB_BGAIN_REG		0x22B3
+/* the token to mark an array end */
+#define REG_TERM		0xFFFF
+
+/* Minimum and maximum exposure time in ms */
+#define EXPOS_MIN_MS		1
+#define EXPOS_MAX_MS		125
+
+struct sr030pc30_info {
+	struct v4l2_subdev sd;
+	const struct sr030pc30_platform_data *pdata;
+	const struct sr030pc30_format *curr_fmt;
+	const struct sr030pc30_frmsize *curr_win;
+	unsigned int auto_wb:1;
+	unsigned int auto_exp:1;
+	unsigned int hflip:1;
+	unsigned int vflip:1;
+	unsigned int sleep:1;
+	unsigned int exposure;
+	u8 blue_balance;
+	u8 red_balance;
+	u8 i2c_reg_page;
+};
+
+struct sr030pc30_format {
+	enum v4l2_mbus_pixelcode code;
+	enum v4l2_colorspace colorspace;
+	u16 ispctl1_reg;
+};
+
+struct sr030pc30_frmsize {
+	u16 width;
+	u16 height;
+	int vid_ctl1;
+};
+
+struct i2c_regval {
+	u16 addr;
+	u16 val;
+};
+
+static const struct v4l2_queryctrl sr030pc30_ctrl[] = {
+	{
+		.id		= V4L2_CID_AUTO_WHITE_BALANCE,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "Auto White Balance",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 1,
+	}, {
+		.id		= V4L2_CID_RED_BALANCE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Red Balance",
+		.minimum	= 0,
+		.maximum	= 127,
+		.step		= 1,
+		.default_value	= 64,
+		.flags		= 0,
+	}, {
+		.id		= V4L2_CID_BLUE_BALANCE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Blue Balance",
+		.minimum	= 0,
+		.maximum	= 127,
+		.step		= 1,
+		.default_value	= 64,
+	}, {
+		.id		= V4L2_CID_EXPOSURE_AUTO,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Auto Exposure",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 1,
+	}, {
+		.id		= V4L2_CID_EXPOSURE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Exposure",
+		.minimum	= EXPOS_MIN_MS,
+		.maximum	= EXPOS_MAX_MS,
+		.step		= 1,
+		.default_value	= 1,
+	}, {
+	}
+};
+
+/* supported resolutions */
+static const struct sr030pc30_frmsize sr030pc30_sizes[] = {
+	{
+		.width		= 640,
+		.height		= 480,
+		.vid_ctl1	= SUBSAMPL_NONE_VGA,
+	}, {
+		.width		= 320,
+		.height		= 240,
+		.vid_ctl1	= SUBSAMPL_QVGA,
+	}, {
+		.width		= 160,
+		.height		= 120,
+		.vid_ctl1	= SUBSAMPL_QQVGA,
+	},
+};
+
+/* supported pixel formats */
+static const struct sr030pc30_format sr030pc30_formats[] = {
+	{
+		.code		= V4L2_MBUS_FMT_YUYV8_2X8,
+		.colorspace	= V4L2_COLORSPACE_JPEG,
+		.ispctl1_reg	= 0x03,
+	}, {
+		.code		= V4L2_MBUS_FMT_YVYU8_2X8,
+		.colorspace	= V4L2_COLORSPACE_JPEG,
+		.ispctl1_reg	= 0x02,
+	}, {
+		.code		= V4L2_MBUS_FMT_VYUY8_2X8,
+		.colorspace	= V4L2_COLORSPACE_JPEG,
+		.ispctl1_reg	= 0,
+	}, {
+		.code		= V4L2_MBUS_FMT_UYVY8_2X8,
+		.colorspace	= V4L2_COLORSPACE_JPEG,
+		.ispctl1_reg	= 0x01,
+	}, {
+		.code		= V4L2_MBUS_FMT_RGB565_2X8_BE,
+		.colorspace	= V4L2_COLORSPACE_JPEG,
+		.ispctl1_reg	= 0x40,
+	},
+};
+
+static const struct i2c_regval sr030pc30_base_regs[] = {
+	/* Window size and position within pixel matrix */
+	{ WIN_ROWH_REG,		0x00 }, { WIN_ROWL_REG,		0x06 },
+	{ WIN_COLH_REG,		0x00 },	{ WIN_COLL_REG,		0x06 },
+	{ WIN_HEIGHTH_REG,	0x01 }, { WIN_HEIGHTL_REG,	0xE0 },
+	{ WIN_WIDTHH_REG,	0x02 }, { WIN_WIDTHL_REG,	0x80 },
+	{ HBLANKH_REG,		0x01 }, { HBLANKL_REG,		0x50 },
+	{ VSYNCH_REG,		0x00 }, { VSYNCL_REG,		0x14 },
+	{ SYNC_CTL_REG,		0 },
+	/* Color corection and saturation */
+	{ ISP_CTL_REG(0),	0x30 }, { YOFS_REG,		0x80 },
+	{ DARK_YOFS_REG,	0x04 }, { AG_ABRTH_REG,		0x78 },
+	{ SAT_CTL_REG,		0x1F }, { BSAT_REG,		0x90 },
+	{ AG_SAT_TH_REG,	0xF0 }, { 0x1064,		0x80 },
+	{ CMC_CTL_REG,		0x03 }, { CMC_OFSGH_REG,	0x3C },
+	{ CMC_OFSGL_REG,	0x2C }, { CMC_SIGN_REG,		0x2F },
+	{ CMC_COEF_REG(0),	0xCB }, { CMC_OFS_REG(0),	0x87 },
+	{ CMC_COEF_REG(1),	0x61 }, { CMC_OFS_REG(1),	0x18 },
+	{ CMC_COEF_REG(2),	0x16 }, { CMC_OFS_REG(2),	0x91 },
+	{ CMC_COEF_REG(3),	0x23 }, { CMC_OFS_REG(3),	0x94 },
+	{ CMC_COEF_REG(4),	0xCE }, { CMC_OFS_REG(4),	0x9f },
+	{ CMC_COEF_REG(5),	0x2B }, { CMC_OFS_REG(5),	0x33 },
+	{ CMC_COEF_REG(6),	0x01 }, { CMC_OFS_REG(6),	0x00 },
+	{ CMC_COEF_REG(7),	0x34 }, { CMC_OFS_REG(7),	0x94 },
+	{ CMC_COEF_REG(8),	0x75 }, { CMC_OFS_REG(8),	0x14 },
+	/* Color corection coefficients */
+	{ GMA_CTL_REG,		0x03 },	{ GMA_COEF_REG(0),	0x00 },
+	{ GMA_COEF_REG(1),	0x19 },	{ GMA_COEF_REG(2),	0x26 },
+	{ GMA_COEF_REG(3),	0x3B },	{ GMA_COEF_REG(4),	0x5D },
+	{ GMA_COEF_REG(5),	0x79 }, { GMA_COEF_REG(6),	0x8E },
+	{ GMA_COEF_REG(7),	0x9F },	{ GMA_COEF_REG(8),	0xAF },
+	{ GMA_COEF_REG(9),	0xBD },	{ GMA_COEF_REG(10),	0xCA },
+	{ GMA_COEF_REG(11),	0xDD }, { GMA_COEF_REG(12),	0xEC },
+	{ GMA_COEF_REG(13),	0xF7 },	{ GMA_COEF_REG(14),	0xFF },
+	/* Noise reduction, Z-LPF, YC-LPF and BLPF filters setup */
+	{ ZLPF_CTRL_REG,	0x99 }, { ZLPF_CTRL2_REG,	0x0E },
+	{ ZLPF_AGH_THR_REG,	0x29 }, { ZLPF_THR_REG,		0x0F },
+	{ ZLPF_DYN_THR_REG,	0x63 }, { YCLPF_CTL1_REG,	0x23 },
+	{ YCLPF_CTL2_REG,	0x3B }, { YCLPF_THR_REG,	0x05 },
+	{ BLPF_CTL_REG,		0x1D }, { BLPF_THR1_REG,	0x05 },
+	{ BLPF_THR2_REG,	0x04 },
+	/* Automatic white balance */
+	{ AWB_CTL1_REG,		0xFB }, { AWB_CTL2_REG,		0x26 },
+	{ AWB_RMAX_REG,		0x54 }, { AWB_RMIN_REG,		0x2B },
+	{ AWB_BMAX_REG,		0x57 }, { AWB_BMIN_REG,		0x29 },
+	{ AWB_RMAXB_REG,	0x50 }, { AWB_RMINB_REG,	0x43 },
+	{ AWB_BMAXB_REG,	0x30 }, { AWB_BMINB_REG,	0x22 },
+	/* Auto exposure */
+	{ AE_CTL1_REG,		0x8C }, { AE_CTL2_REG,		0x04 },
+	{ AE_FRM_CTL_REG,	0x01 }, { AE_FINE_CTL_REG(0),	0x3F },
+	{ AE_FINE_CTL_REG(1),	0xA3 }, { AE_FINE_CTL_REG(3),	0x34 },
+	/* Lens shading compensation */
+	{ LENS_CTRL_REG,	0x01 }, { LENS_XCEN_REG,	0x80 },
+	{ LENS_YCEN_REG,	0x70 }, { LENS_R_COMP_REG,	0x53 },
+	{ LENS_G_COMP_REG,	0x40 }, { LENS_B_COMP_REG,	0x3e },
+	{ REG_TERM,		0 },
+};
+
+static inline struct sr030pc30_info *to_sr030pc30(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct sr030pc30_info, sd);
+}
+
+static inline int set_i2c_page(struct sr030pc30_info *info,
+			       struct i2c_client *client, unsigned int reg)
+{
+	int ret;
+	u32 page = reg >> 8 & 0xFF;
+
+	if (info->i2c_reg_page != page && (reg & 0xFF) != 0x03) {
+		ret = i2c_smbus_write_byte_data(client, PAGEMODE_REG, page);
+		if (!ret)
+			info->i2c_reg_page = page;
+	}
+	return ret;
+}
+
+static int cam_i2c_read(struct v4l2_subdev *sd, u32 reg_addr)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	int ret = set_i2c_page(info, client, reg_addr);
+	if (!ret)
+		ret = i2c_smbus_read_byte_data(client, reg_addr & 0xFF);
+	return ret;
+}
+
+static int cam_i2c_write(struct v4l2_subdev *sd, u32 reg_addr, u32 val)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	int ret = set_i2c_page(info, client, reg_addr);
+	if (!ret)
+		ret = i2c_smbus_write_byte_data(
+			client, reg_addr & 0xFF, val);
+	return ret;
+}
+
+static inline int sr030pc30_bulk_write_reg(struct v4l2_subdev *sd,
+				const struct i2c_regval *msg)
+{
+	while (msg->addr != REG_TERM) {
+		int ret = cam_i2c_write(sd, msg->addr, msg->val);
+		if (ret)
+			return ret;
+		msg++;
+	}
+	return 0;
+}
+
+/* Device reset and sleep mode control */
+static int sr030pc30_pwr_ctrl(struct v4l2_subdev *sd,
+				     bool reset, bool sleep)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	u8 reg = sleep ? 0xF1 : 0xF0;
+	int ret = 0;
+
+	if (reset)
+		ret = cam_i2c_write(sd, POWER_CTRL_REG, reg | 0x02);
+	if (!ret) {
+		ret = cam_i2c_write(sd, POWER_CTRL_REG, reg);
+		if (!ret) {
+			info->sleep = sleep;
+			if (reset)
+				info->i2c_reg_page = -1;
+		}
+	}
+	return ret;
+}
+
+static inline int sr030pc30_enable_autoexposure(struct v4l2_subdev *sd, int on)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	/* auto anti-flicker is also enabled here */
+	int ret = cam_i2c_write(sd, AE_CTL1_REG, on ? 0xDC : 0x0C);
+	if (!ret)
+		info->auto_exp = on;
+	return ret;
+}
+
+static int sr030pc30_set_exposure(struct v4l2_subdev *sd, int value)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	unsigned long expos = value * info->pdata->clk_rate / (8 * 1000);
+
+	int ret = cam_i2c_write(sd, EXP_TIMEH_REG, expos >> 16 & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_TIMEM_REG, expos >> 8 & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_TIMEL_REG, expos & 0xFF);
+	if (!ret) { /* Turn off AE */
+		info->exposure = value;
+		ret = sr030pc30_enable_autoexposure(sd, 0);
+	}
+	return ret;
+}
+
+/* Automatic white balance control */
+static int sr030pc30_enable_autowhitebalance(struct v4l2_subdev *sd, int on)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	int ret = cam_i2c_write(sd, AWB_CTL2_REG, on ? 0x2E : 0x2F);
+	if (!ret)
+		ret = cam_i2c_write(sd, AWB_CTL1_REG, on ? 0xFB : 0x7B);
+	if (!ret)
+		info->auto_wb = on;
+
+	return ret;
+}
+
+static int sr030pc30_set_flip(struct v4l2_subdev *sd)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	s32 reg = cam_i2c_read(sd, VDO_CTL2_REG);
+	if (reg < 0)
+		return reg;
+
+	reg &= 0x7C;
+	if (info->hflip)
+		reg |= 0x01;
+	if (info->vflip)
+		reg |= 0x02;
+	return cam_i2c_write(sd, VDO_CTL2_REG, reg | 0x80);
+}
+
+/* Configure resolution, color format and image flip */
+static int sr030pc30_set_params(struct v4l2_subdev *sd)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	int ret;
+
+	if (!info->curr_win)
+		return -EINVAL;
+
+	/* Configure the resolution through subsampling */
+	ret = cam_i2c_write(sd, VDO_CTL1_REG,
+			    info->curr_win->vid_ctl1);
+
+	if (!ret && info->curr_fmt)
+		ret = cam_i2c_write(sd, ISP_CTL_REG(0),
+				info->curr_fmt->ispctl1_reg);
+	if (!ret)
+		ret = sr030pc30_set_flip(sd);
+
+	return ret;
+}
+
+/* Find nearest matching image pixel size. */
+static int sr030pc30_try_frame_size(struct v4l2_mbus_framefmt *mf)
+{
+	unsigned int min_err = ~0;
+	int i = ARRAY_SIZE(sr030pc30_sizes);
+	const struct sr030pc30_frmsize *fsize = &sr030pc30_sizes[0],
+					*match = NULL;
+	while (i--) {
+		int err = abs(fsize->width - mf->width)
+				+ abs(fsize->height - mf->height);
+		if (err < min_err) {
+			min_err = err;
+			match = fsize;
+		}
+		fsize++;
+	}
+	if (match) {
+		mf->width  = match->width;
+		mf->height = match->height;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int sr030pc30_queryctrl(struct v4l2_subdev *sd,
+			       struct v4l2_queryctrl *qc)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sr030pc30_ctrl); i++)
+		if (qc->id == sr030pc30_ctrl[i].id) {
+			*qc = sr030pc30_ctrl[i];
+			v4l2_dbg(1, debug, sd, "%s id: %d\n",
+				 __func__, qc->id);
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+static inline int sr030pc30_set_bluebalance(struct v4l2_subdev *sd, int value)
+{
+	int ret = cam_i2c_write(sd, MWB_BGAIN_REG, value);
+	if (!ret)
+		to_sr030pc30(sd)->blue_balance = value;
+	return ret;
+}
+
+static inline int sr030pc30_set_redbalance(struct v4l2_subdev *sd, int value)
+{
+	int ret = cam_i2c_write(sd, MWB_RGAIN_REG, value);
+	if (!ret)
+		to_sr030pc30(sd)->red_balance = value;
+	return ret;
+}
+
+static int sr030pc30_s_ctrl(struct v4l2_subdev *sd,
+			    struct v4l2_control *ctrl)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sr030pc30_ctrl); i++)
+		if (ctrl->id == sr030pc30_ctrl[i].id)
+			break;
+
+	if (i == ARRAY_SIZE(sr030pc30_ctrl))
+		return -EINVAL;
+
+	if (ctrl->value < sr030pc30_ctrl[i].minimum ||
+		ctrl->value > sr030pc30_ctrl[i].maximum)
+			return -ERANGE;
+
+	v4l2_dbg(1, debug, sd, "%s: ctrl_id: %d, value: %d\n",
+			 __func__, ctrl->id, ctrl->value);
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUTO_WHITE_BALANCE:
+		sr030pc30_enable_autowhitebalance(sd, ctrl->value);
+		break;
+	case V4L2_CID_BLUE_BALANCE:
+		ret = sr030pc30_set_bluebalance(sd, ctrl->value);
+		break;
+	case V4L2_CID_RED_BALANCE:
+		ret = sr030pc30_set_redbalance(sd, ctrl->value);
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		sr030pc30_enable_autoexposure(sd,
+			ctrl->value == V4L2_EXPOSURE_AUTO);
+		break;
+	case V4L2_CID_EXPOSURE:
+		ret = sr030pc30_set_exposure(sd, ctrl->value);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int sr030pc30_g_ctrl(struct v4l2_subdev *sd,
+			    struct v4l2_control *ctrl)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	v4l2_dbg(1, debug, sd, "%s: id: %d\n", __func__, ctrl->id);
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUTO_WHITE_BALANCE:
+		ctrl->value = info->auto_wb;
+		break;
+	case V4L2_CID_BLUE_BALANCE:
+		ctrl->value = info->blue_balance;
+		break;
+	case V4L2_CID_RED_BALANCE:
+		ctrl->value = info->red_balance;
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		ctrl->value = info->auto_exp;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ctrl->value = info->exposure;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int sr030pc30_enum_fmt(struct v4l2_subdev *sd, unsigned int index,
+			      enum v4l2_mbus_pixelcode *code)
+{
+	if (!code || index >= ARRAY_SIZE(sr030pc30_formats))
+		return -EINVAL;
+
+	*code = sr030pc30_formats[index].code;
+	return 0;
+}
+
+static int sr030pc30_g_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_mbus_framefmt *mf)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	int ret;
+
+	if (!mf)
+		return -EINVAL;
+
+	if (!info->curr_win || !info->curr_fmt) {
+		ret = sr030pc30_set_params(sd);
+		if (ret)
+			return ret;
+	}
+
+	mf->width	= info->curr_win->width;
+	mf->height	= info->curr_win->height;
+	mf->code	= info->curr_fmt->code;
+	mf->colorspace	= info->curr_fmt->colorspace;
+	mf->field	= V4L2_FIELD_NONE;
+
+	return 0;
+}
+
+/* Return nearest media bus frame format. */
+static const struct sr030pc30_format *try_fmt(struct v4l2_subdev *sd,
+					      struct v4l2_mbus_framefmt *mf)
+{
+	int i = ARRAY_SIZE(sr030pc30_formats);
+
+	sr030pc30_try_frame_size(mf);
+
+	while (i--)
+		if (mf->code == sr030pc30_formats[i].code)
+			break;
+
+	mf->code = sr030pc30_formats[i].code;
+
+	return &sr030pc30_formats[i];
+}
+
+/* Return nearest media bus frame format. */
+static int sr030pc30_try_fmt(struct v4l2_subdev *sd,
+			     struct v4l2_mbus_framefmt *mf)
+{
+	if (!sd || !mf)
+		return -EINVAL;
+
+	try_fmt(sd, mf);
+	return 0;
+}
+
+static int sr030pc30_s_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_mbus_framefmt *mf)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	if (!sd || !mf)
+		return -EINVAL;
+
+	info->curr_fmt = try_fmt(sd, mf);
+
+	return sr030pc30_set_params(sd);
+}
+
+static int sr030pc30_base_config(struct v4l2_subdev *sd)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	int ret;
+	unsigned long expmin, expmax;
+
+	ret = sr030pc30_bulk_write_reg(sd, sr030pc30_base_regs);
+	if (!ret) {
+		info->curr_fmt = &sr030pc30_formats[0];
+		info->curr_win = &sr030pc30_sizes[0];
+		ret = sr030pc30_set_params(sd);
+	}
+	if (!ret)
+		ret = sr030pc30_pwr_ctrl(sd, false, false);
+
+	if (!ret && !info->pdata)
+		return ret;
+
+	expmin = EXPOS_MIN_MS * info->pdata->clk_rate / (8 * 1000);
+	expmax = EXPOS_MAX_MS * info->pdata->clk_rate / (8 * 1000);
+
+	v4l2_dbg(1, debug, sd, "%s: expmin= %lx, expmax= %lx", __func__,
+		 expmin, expmax);
+
+	/* Setting up manual exposure time range */
+	ret = cam_i2c_write(sd, EXP_MMINH_REG, expmin >> 8 & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_MMINL_REG, expmin & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_MMAXH_REG, expmax >> 16 & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_MMAXM_REG, expmax >> 8 & 0xFF);
+	if (!ret)
+		ret = cam_i2c_write(sd, EXP_MMAXL_REG, expmax & 0xFF);
+
+	return ret;
+}
+
+static int sr030pc30_s_config(struct v4l2_subdev *sd,
+			      int irq, void *platform_data)
+{
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	info->pdata = platform_data;
+	return 0;
+}
+
+static int sr030pc30_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	return 0;
+}
+
+static int sr030pc30_s_power(struct v4l2_subdev *sd, int on)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+	const struct sr030pc30_platform_data *pdata = info->pdata;
+	int ret;
+
+	if (WARN(pdata == NULL, "No platform data!"))
+		return -ENOMEM;
+
+	/*
+	 * Put sensor into power sleep mode before switching off
+	 * power and disabling MCLK.
+	 */
+	if (!on)
+		sr030pc30_pwr_ctrl(sd, false, true);
+
+	/* set_power controls sensor's power and clock */
+	if (pdata->set_power) {
+		ret = pdata->set_power(&client->dev, on);
+		if (ret)
+			return ret;
+	}
+
+	if (on) {
+		ret = sr030pc30_base_config(sd);
+	} else {
+		info->curr_win = NULL;
+		info->curr_fmt = NULL;
+	}
+
+	return ret;
+}
+
+static const struct v4l2_subdev_core_ops sr030pc30_core_ops = {
+	.s_config	= sr030pc30_s_config,
+	.s_power	= sr030pc30_s_power,
+	.queryctrl	= sr030pc30_queryctrl,
+	.s_ctrl		= sr030pc30_s_ctrl,
+	.g_ctrl		= sr030pc30_g_ctrl,
+};
+
+static const struct v4l2_subdev_video_ops sr030pc30_video_ops = {
+	.s_stream	= sr030pc30_s_stream,
+	.g_mbus_fmt	= sr030pc30_g_fmt,
+	.s_mbus_fmt	= sr030pc30_s_fmt,
+	.try_mbus_fmt	= sr030pc30_try_fmt,
+	.enum_mbus_fmt	= sr030pc30_enum_fmt,
+};
+
+static const struct v4l2_subdev_ops sr030pc30_ops = {
+	.core	= &sr030pc30_core_ops,
+	.video	= &sr030pc30_video_ops,
+};
+
+/*
+ * Detect sensor type. Return 0 if SR030PC30 was detected
+ * or -ENODEV otherwise.
+ */
+static int sr030pc30_detect(struct i2c_client *client)
+{
+	const struct sr030pc30_platform_data *pdata
+		= client->dev.platform_data;
+	int ret;
+
+	/* Enable sensor's power and clock */
+	if (pdata->set_power) {
+		ret = pdata->set_power(&client->dev, 1);
+		if (ret)
+			return ret;
+	}
+
+	ret = i2c_smbus_read_byte_data(client, DEVICE_ID_REG);
+
+	if (pdata->set_power)
+		pdata->set_power(&client->dev, 0);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "%s: I2C read failed\n", __func__);
+		return ret;
+	}
+
+	return ret == SR030PC30_ID ? 0 : -ENODEV;
+}
+
+
+static int sr030pc30_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct sr030pc30_info *info;
+	struct v4l2_subdev *sd;
+	const struct sr030pc30_platform_data *pdata
+		= client->dev.platform_data;
+	int ret;
+
+	if (!pdata) {
+		dev_err(&client->dev, "No platform data!");
+		return -EIO;
+	}
+
+	ret = sr030pc30_detect(client);
+	if (ret)
+		return ret;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	sd = &info->sd;
+	strcpy(sd->name, MODULE_NAME);
+	info->pdata = client->dev.platform_data;
+
+	v4l2_i2c_subdev_init(sd, client, &sr030pc30_ops);
+
+	info->i2c_reg_page	= -1;
+	info->hflip		= 1;
+	info->auto_exp		= 1;
+	info->exposure		= 30;
+
+	return 0;
+}
+
+static int sr030pc30_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct sr030pc30_info *info = to_sr030pc30(sd);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(info);
+	return 0;
+}
+
+static const struct i2c_device_id sr030pc30_id[] = {
+	{ MODULE_NAME, 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, sr030pc30_id);
+
+
+static struct i2c_driver sr030pc30_i2c_driver = {
+	.driver = {
+		.name = MODULE_NAME
+	},
+	.probe		= sr030pc30_probe,
+	.remove		= sr030pc30_remove,
+	.id_table	= sr030pc30_id,
+};
+
+static int __init sr030pc30_init(void)
+{
+	return i2c_add_driver(&sr030pc30_i2c_driver);
+}
+
+static void __exit sr030pc30_exit(void)
+{
+	i2c_del_driver(&sr030pc30_i2c_driver);
+}
+
+module_init(sr030pc30_init);
+module_exit(sr030pc30_exit);
+
+MODULE_DESCRIPTION("Siliconfile SR030PC30 camera driver");
+MODULE_AUTHOR("Sylwester Nawrocki <s.nawrocki@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/media/sr030pc30.h b/include/media/sr030pc30.h
new file mode 100644
index 000000000000..6f901a653ba2
--- /dev/null
+++ b/include/media/sr030pc30.h
@@ -0,0 +1,21 @@
+/*
+ * Driver header for SR030PC30 camera sensor
+ *
+ * Copyright (c) 2010 Samsung Electronics, Co. Ltd
+ * Contact: Sylwester Nawrocki <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef SR030PC30_H
+#define SR030PC30_H
+
+struct sr030pc30_platform_data {
+	unsigned long clk_rate;	/* master clock frequency in Hz */
+	int (*set_power)(struct device *dev, int on);
+};
+
+#endif /* SR030PC30_H */
-- 
cgit v1.2.3


From 111ac84a80199654db55c06e2565d4ab343c135d Mon Sep 17 00:00:00 2001
From: Sergey Ivanov <123kash@gmail.com>
Date: Mon, 9 Aug 2010 10:18:32 -0300
Subject: [media] Twinhan 1027 + IR Port support

Patch add support of TwinHan 1027 DVB-S card.

Refreshed version of https://patchwork.kernel.org/patch/79753/ patch.
(adapted for the new IR system), still works.

DVB-S support come from a patch originally authored by
Manu Abraham (abraham.manu@gmail.com).
IR Port support were added by Sergey.

Cc: Manu Abraham <abraham.manu@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile         |  1 +
 drivers/media/IR/keymaps/rc-twinhan1027.c | 87 +++++++++++++++++++++++++++++++
 drivers/media/video/cx88/cx88-cards.c     | 23 ++++++++
 drivers/media/video/cx88/cx88-dvb.c       | 61 ++++++++++++++++++----
 drivers/media/video/cx88/cx88-input.c     |  6 +++
 drivers/media/video/cx88/cx88.h           |  1 +
 include/media/rc-map.h                    |  1 +
 7 files changed, 171 insertions(+), 9 deletions(-)
 create mode 100644 drivers/media/IR/keymaps/rc-twinhan1027.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 11688dd740ca..6083453f4715 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-total-media-in-hand.o \
 			rc-trekstor.o \
 			rc-tt-1500.o \
+			rc-twinhan1027.o \
 			rc-videomate-s350.o \
 			rc-videomate-tv-pvr.o \
 			rc-winfast.o \
diff --git a/drivers/media/IR/keymaps/rc-twinhan1027.c b/drivers/media/IR/keymaps/rc-twinhan1027.c
new file mode 100644
index 000000000000..0b5d356c2d84
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-twinhan1027.c
@@ -0,0 +1,87 @@
+#include <media/rc-map.h>
+
+static struct ir_scancode twinhan_vp1027[] = {
+	{ 0x16, KEY_POWER2 },
+	{ 0x17, KEY_FAVORITES },
+	{ 0x0f, KEY_TEXT },
+	{ 0x48, KEY_INFO},
+	{ 0x1c, KEY_EPG },
+	{ 0x04, KEY_LIST },
+
+	{ 0x03, KEY_1 },
+	{ 0x01, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x1d, KEY_5 },
+	{ 0x1f, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x19, KEY_8 },
+	{ 0x1b, KEY_9 },
+	{ 0x15, KEY_0 },
+
+	{ 0x0c, KEY_CANCEL },
+	{ 0x4a, KEY_CLEAR },
+	{ 0x13, KEY_BACKSPACE },
+	{ 0x00, KEY_TAB },
+
+	{ 0x4b, KEY_UP },
+	{ 0x51, KEY_DOWN },
+	{ 0x4e, KEY_LEFT },
+	{ 0x52, KEY_RIGHT },
+	{ 0x4f, KEY_ENTER },
+
+	{ 0x1e, KEY_VOLUMEUP },
+	{ 0x0a, KEY_VOLUMEDOWN },
+	{ 0x02, KEY_CHANNELDOWN },
+	{ 0x05, KEY_CHANNELUP },
+	{ 0x11, KEY_RECORD },
+
+	{ 0x14, KEY_PLAY },
+	{ 0x4c, KEY_PAUSE },
+	{ 0x1a, KEY_STOP },
+	{ 0x40, KEY_REWIND },
+	{ 0x12, KEY_FASTFORWARD },
+	{ 0x41, KEY_PREVIOUSSONG },
+	{ 0x42, KEY_NEXTSONG },
+	{ 0x54, KEY_SAVE },
+	{ 0x50, KEY_LANGUAGE },
+	{ 0x47, KEY_MEDIA },
+	{ 0x4d, KEY_SCREEN },
+	{ 0x43, KEY_SUBTITLE },
+	{ 0x10, KEY_MUTE },
+	{ 0x49, KEY_AUDIO },
+	{ 0x07, KEY_SLEEP },
+	{ 0x08, KEY_VIDEO },
+	{ 0x0e, KEY_AGAIN },
+	{ 0x45, KEY_EQUAL },
+	{ 0x46, KEY_MINUS },
+	{ 0x18, KEY_RED },
+	{ 0x53, KEY_GREEN },
+	{ 0x5e, KEY_YELLOW },
+	{ 0x5f, KEY_BLUE },
+};
+
+static struct rc_keymap twinhan_vp1027_map = {
+	.map = {
+		.scan    = twinhan_vp1027,
+		.size    = ARRAY_SIZE(twinhan_vp1027),
+		.ir_type = IR_TYPE_UNKNOWN,	/* Legacy IR type */
+		.name    = RC_MAP_TWINHAN_VP1027_DVBS,
+	}
+};
+
+static int __init init_rc_map_twinhan_vp1027(void)
+{
+	return ir_register_map(&twinhan_vp1027_map);
+}
+
+static void __exit exit_rc_map_twinhan_vp1027(void)
+{
+	ir_unregister_map(&twinhan_vp1027_map);
+}
+
+module_init(init_rc_map_twinhan_vp1027)
+module_exit(exit_rc_map_twinhan_vp1027)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sergey Ivanov <123kash@gmail.com>");
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index 97672cb49b51..f220fa2e10d8 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -2104,6 +2104,18 @@ static const struct cx88_board cx88_boards[] = {
 		} },
 		.mpeg           = CX88_MPEG_DVB,
 	},
+	[CX88_BOARD_TWINHAN_VP1027_DVBS] = {
+		.name		= "Twinhan VP-1027 DVB-S",
+		.tuner_type     = TUNER_ABSENT,
+		.radio_type     = UNSET,
+		.tuner_addr     = ADDR_UNSET,
+		.radio_addr     = ADDR_UNSET,
+		.input          = {{
+		       .type   = CX88_VMUX_DVB,
+		       .vmux   = 0,
+		} },
+		.mpeg           = CX88_MPEG_DVB,
+	},
 };
 
 /* ------------------------------------------------------------------ */
@@ -2576,6 +2588,10 @@ static const struct cx88_subid cx88_subids[] = {
 		.subvendor = 0xb034,
 		.subdevice = 0x3034,
 		.card      = CX88_BOARD_PROF_7301,
+	}, {
+		.subvendor = 0x1822,
+		.subdevice = 0x0023,
+		.card      = CX88_BOARD_TWINHAN_VP1027_DVBS,
 	},
 };
 
@@ -3070,6 +3086,13 @@ static void cx88_card_setup_pre_i2c(struct cx88_core *core)
 		cx_set(MO_GP1_IO, 0x10);
 		mdelay(50);
 		break;
+
+	case CX88_BOARD_TWINHAN_VP1027_DVBS:
+		cx_write(MO_GP0_IO, 0x00003230);
+		cx_write(MO_GP0_IO, 0x00003210);
+		msleep(1);
+		cx_write(MO_GP0_IO, 0x00001230);
+		break;
 	}
 }
 
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index a037e925ceb9..367a653f4c95 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -56,6 +56,7 @@
 #include "stv0900.h"
 #include "stb6100.h"
 #include "stb6100_proc.h"
+#include "mb86a16.h"
 
 MODULE_DESCRIPTION("driver for cx2388x based DVB cards");
 MODULE_AUTHOR("Chris Pascoe <c.pascoe@itee.uq.edu.au>");
@@ -250,6 +251,10 @@ static const struct zl10353_config cx88_terratec_cinergy_ht_pci_mkii_config = {
 	.if2           = 45600,
 };
 
+static struct mb86a16_config twinhan_vp1027 = {
+	.demod_address  = 0x08,
+};
+
 #if defined(CONFIG_VIDEO_CX88_VP3054) || (defined(CONFIG_VIDEO_CX88_VP3054_MODULE) && defined(MODULE))
 static int dntv_live_dvbt_pro_demod_init(struct dvb_frontend* fe)
 {
@@ -429,15 +434,41 @@ static int tevii_dvbs_set_voltage(struct dvb_frontend *fe,
 
 	cx_set(MO_GP0_IO, 0x6040);
 	switch (voltage) {
-		case SEC_VOLTAGE_13:
-			cx_clear(MO_GP0_IO, 0x20);
-			break;
-		case SEC_VOLTAGE_18:
-			cx_set(MO_GP0_IO, 0x20);
-			break;
-		case SEC_VOLTAGE_OFF:
-			cx_clear(MO_GP0_IO, 0x20);
-			break;
+	case SEC_VOLTAGE_13:
+		cx_clear(MO_GP0_IO, 0x20);
+		break;
+	case SEC_VOLTAGE_18:
+		cx_set(MO_GP0_IO, 0x20);
+		break;
+	case SEC_VOLTAGE_OFF:
+		cx_clear(MO_GP0_IO, 0x20);
+		break;
+	}
+
+	if (core->prev_set_voltage)
+		return core->prev_set_voltage(fe, voltage);
+	return 0;
+}
+
+static int vp1027_set_voltage(struct dvb_frontend *fe,
+				    fe_sec_voltage_t voltage)
+{
+	struct cx8802_dev *dev = fe->dvb->priv;
+	struct cx88_core *core = dev->core;
+
+	switch (voltage) {
+	case SEC_VOLTAGE_13:
+		dprintk(1, "LNB SEC Voltage=13\n");
+		cx_write(MO_GP0_IO, 0x00001220);
+		break;
+	case SEC_VOLTAGE_18:
+		dprintk(1, "LNB SEC Voltage=18\n");
+		cx_write(MO_GP0_IO, 0x00001222);
+		break;
+	case SEC_VOLTAGE_OFF:
+		dprintk(1, "LNB Voltage OFF\n");
+		cx_write(MO_GP0_IO, 0x00001230);
+		break;
 	}
 
 	if (core->prev_set_voltage)
@@ -1416,6 +1447,18 @@ static int dvb_register(struct cx8802_dev *dev)
 		}
 
 		break;
+	case CX88_BOARD_TWINHAN_VP1027_DVBS:
+		dev->ts_gen_cntrl = 0x00;
+		fe0->dvb.frontend = dvb_attach(mb86a16_attach,
+						&twinhan_vp1027,
+						&core->i2c_adap);
+		if (fe0->dvb.frontend) {
+			core->prev_set_voltage =
+					fe0->dvb.frontend->ops.set_voltage;
+			fe0->dvb.frontend->ops.set_voltage =
+					vp1027_set_voltage;
+		}
+		break;
 
 	default:
 		printk(KERN_ERR "%s/2: The frontend of your DVB/ATSC card isn't supported yet\n",
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index d52ce0e3bec9..fc777bc6e716 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -405,6 +405,11 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		ir->mask_keycode = 0x7e;
 		ir->polling      = 100; /* ms */
 		break;
+	case CX88_BOARD_TWINHAN_VP1027_DVBS:
+		ir_codes         = RC_MAP_TWINHAN_VP1027_DVBS;
+		ir_type          = IR_TYPE_NEC;
+		ir->sampling     = 0xff00; /* address */
+		break;
 	}
 
 	if (NULL == ir_codes) {
@@ -530,6 +535,7 @@ void cx88_ir_irq(struct cx88_core *core)
 	case CX88_BOARD_PROF_7300:
 	case CX88_BOARD_PROF_7301:
 	case CX88_BOARD_PROF_6200:
+	case CX88_BOARD_TWINHAN_VP1027_DVBS:
 		ircode = ir_decode_pulsedistance(ir->samples, ir->scount, 1, 4);
 
 		if (ircode == 0xffffffff) { /* decoding error */
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 127118f4b650..c9981e77416a 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -239,6 +239,7 @@ extern const struct sram_channel const cx88_sram_channels[];
 #define CX88_BOARD_WINFAST_DTV2000H_J      82
 #define CX88_BOARD_PROF_7301               83
 #define CX88_BOARD_SAMSUNG_SMT_7020        84
+#define CX88_BOARD_TWINHAN_VP1027_DVBS     85
 
 enum cx88_itype {
 	CX88_VMUX_COMPOSITE1 = 1,
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index d1bff027297e..74a00a93dcf1 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -134,6 +134,7 @@ void rc_map_init(void);
 #define RC_MAP_TOTAL_MEDIA_IN_HAND       "rc-total-media-in-hand"
 #define RC_MAP_TREKSTOR                  "rc-trekstor"
 #define RC_MAP_TT_1500                   "rc-tt-1500"
+#define RC_MAP_TWINHAN_VP1027_DVBS       "rc-twinhan1027"
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
 #define RC_MAP_VIDEOMATE_TV_PVR          "rc-videomate-tv-pvr"
 #define RC_MAP_WINFAST                   "rc-winfast"
-- 
cgit v1.2.3


From 44a81550410ada5b30f0a611d9446dc9cbf4cb59 Mon Sep 17 00:00:00 2001
From: Matti Aaltonen <matti.j.aaltonen@nokia.com>
Date: Thu, 7 Oct 2010 10:16:11 -0300
Subject: [media] V4L2: Add seek spacing and RDS CAP bits

Add spacing field to v4l2_hw_freq_seek.

Add V4L2_TUNER_CAP_RDS_BLOCK_IO, which indicates that the tuner/
transmitter if capable of transmitting/receiving RDS blocks.

Add V4L2_TUNER_CAP_RDS_CONTROLS capability, which indicates that the
RDS data is handled as values of predefined controls like radio text,
program ID and so on.

Signed-off-by: Matti J. Aaltonen <matti.j.aaltonen@nokia.com>
Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b06479f63271..5f6f47044abf 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1368,6 +1368,8 @@ struct v4l2_modulator {
 #define V4L2_TUNER_CAP_SAP		0x0020
 #define V4L2_TUNER_CAP_LANG1		0x0040
 #define V4L2_TUNER_CAP_RDS		0x0080
+#define V4L2_TUNER_CAP_RDS_BLOCK_IO	0x0100
+#define V4L2_TUNER_CAP_RDS_CONTROLS	0x0200
 
 /*  Flags for the 'rxsubchans' field */
 #define V4L2_TUNER_SUB_MONO		0x0001
@@ -1397,7 +1399,8 @@ struct v4l2_hw_freq_seek {
 	enum v4l2_tuner_type  type;
 	__u32		      seek_upward;
 	__u32		      wrap_around;
-	__u32		      reserved[8];
+	__u32		      spacing;
+	__u32		      reserved[7];
 };
 
 /*
-- 
cgit v1.2.3


From 11165f1457181e4499e5eada442434a07827ffd8 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 18 Oct 2010 14:27:29 +0000
Subject: socket: localize functions

A couple of functions in socket.c are only used there and
should be localized.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 1 -
 net/socket.c           | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index a8f56e1ec760..5146b50202ce 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -326,7 +326,6 @@ extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *a
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
-extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/net/socket.c b/net/socket.c
index 717a5f1c8792..72da57d6ab7b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -209,8 +209,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
  *	specified. Zero is returned for a success.
  */
 
-int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
-		      int __user *ulen)
+static int move_addr_to_user(struct sockaddr *kaddr, int klen,
+			     void __user *uaddr, int __user *ulen)
 {
 	int err;
 	int len;
@@ -661,7 +661,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
-inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
+				   struct sk_buff *skb)
 {
 	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
-- 
cgit v1.2.3


From 6f747aca5e61778190d1e27bdc469f49149f0230 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Fri, 15 Oct 2010 05:15:59 +0000
Subject: xfrm6: make xfrm6_tunnel_free_spi local

Function only defined and used in one file.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 1 -
 net/ipv6/xfrm6_tunnel.c | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4f53532d4c2f..36fdcb3fab9e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1419,7 +1419,6 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
 extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
 extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
-extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr);
 extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr);
 extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ac7584b946a5..2969cad408de 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -199,7 +199,7 @@ static void x6spi_destroy_rcu(struct rcu_head *head)
 			container_of(head, struct xfrm6_tunnel_spi, rcu_head));
 }
 
-void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
+static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
@@ -223,8 +223,6 @@ void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 }
 
-EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
-
 static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	skb_push(skb, -skb_network_offset(skb));
-- 
cgit v1.2.3


From 8d8a0b1cc2a8f9794a3f1f747089b6a93774408d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Fri, 15 Oct 2010 05:12:01 +0000
Subject: rtnetlink: remove rtnl_kill_links

The function rtnl_kill_links is defined but never used.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h | 1 -
 net/core/rtnetlink.c    | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index af60fd050844..e013c68bfb00 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -79,7 +79,6 @@ struct rtnl_link_ops {
 
 extern int	__rtnl_link_register(struct rtnl_link_ops *ops);
 extern void	__rtnl_link_unregister(struct rtnl_link_ops *ops);
-extern void	rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops);
 
 extern int	rtnl_link_register(struct rtnl_link_ops *ops);
 extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b2a718dfd720..8121268ddbdd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
 	unregister_netdevice_many(&list_kill);
 }
 
-void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
-{
-	rtnl_lock();
-	__rtnl_kill_links(net, ops);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(rtnl_kill_links);
-
 /**
  * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
  * @ops: struct rtnl_link_ops * to unregister
-- 
cgit v1.2.3


From 1c4c40c42da468ef02dc04940930c1926c964558 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Fri, 15 Oct 2010 05:14:19 +0000
Subject: xfrm: make xfrm_bundle_ok local

Only used in one place.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 2 --
 net/xfrm/xfrm_policy.c | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 36fdcb3fab9e..f28d7c9b9f8d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1465,8 +1465,6 @@ struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark,
 				 xfrm_address_t *saddr, int create,
 				 unsigned short family);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
-extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
-			  struct flowi *fl, int family, int strict);
 
 #ifdef CONFIG_XFRM_MIGRATE
 extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cbab6e1a8c9c..044e77898512 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -50,6 +50,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
+static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
+			  struct flowi *fl, int family, int strict);
+
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
@@ -2276,7 +2279,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
  * still valid.
  */
 
-int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
+static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 		struct flowi *fl, int family, int strict)
 {
 	struct dst_entry *dst = &first->u.dst;
@@ -2358,8 +2361,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 	return 1;
 }
 
-EXPORT_SYMBOL(xfrm_bundle_ok);
-
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
 	struct net *net;
-- 
cgit v1.2.3


From 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 16 Oct 2010 13:04:08 +0000
Subject: net_sched: remove the unused parameter of qdisc_create_dflt()

The first parameter dev isn't in use in qdisc_create_dflt().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 +--
 net/sched/sch_atm.c       |  5 ++---
 net/sched/sch_cbq.c       | 12 ++++++------
 net/sched/sch_drr.c       |  4 ++--
 net/sched/sch_dsmark.c    |  6 ++----
 net/sched/sch_fifo.c      |  3 +--
 net/sched/sch_generic.c   | 10 ++++------
 net/sched/sch_hfsc.c      |  8 +++-----
 net/sched/sch_htb.c       |  8 +++-----
 net/sched/sch_mq.c        |  2 +-
 net/sched/sch_multiq.c    |  3 +--
 net/sched/sch_netem.c     |  3 +--
 net/sched/sch_prio.c      |  2 +-
 13 files changed, 28 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index eda8808fdacd..ea1f8a83160d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -328,8 +328,7 @@ extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
 extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 				 struct Qdisc_ops *ops);
-extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
-				       struct netdev_queue *dev_queue,
+extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 				       struct Qdisc_ops *ops, u32 parentid);
 extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
 				   struct qdisc_size_table *stab);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6318e1136b83..282540778aa8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		goto err_out;
 	}
 	flow->filter_list = NULL;
-	flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-				    &pfifo_qdisc_ops, classid);
+	flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!flow->q)
 		flow->q = &noop_qdisc;
 	pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -543,7 +542,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	INIT_LIST_HEAD(&p->flows);
 	INIT_LIST_HEAD(&p->link.list);
 	list_add(&p->link.list, &p->flows);
-	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+	p->link.q = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
 		p->link.q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 28c01ef5abc8..eb7631590865 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1379,9 +1379,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
-	if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					    &pfifo_qdisc_ops,
-					    sch->handle)))
+	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+				      sch->handle);
+	if (!q->link.q)
 		q->link.q = &noop_qdisc;
 
 	q->link.priority = TC_CBQ_MAXPRIO-1;
@@ -1623,7 +1623,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	struct cbq_class *cl = (struct cbq_class*)arg;
 
 	if (new == NULL) {
-		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+		new = qdisc_create_dflt(sch->dev_queue,
 					&pfifo_qdisc_ops, cl->common.classid);
 		if (new == NULL)
 			return -ENOBUFS;
@@ -1874,8 +1874,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->R_tab = rtab;
 	rtab = NULL;
 	cl->refcnt = 1;
-	if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					&pfifo_qdisc_ops, classid)))
+	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
+	if (!cl->q)
 		cl->q = &noop_qdisc;
 	cl->common.classid = classid;
 	cl->tparent = parent;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index b74046a95397..aa8b5313f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->refcnt	   = 1;
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
-	cl->qdisc	   = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
 					       &pfifo_qdisc_ops, classid);
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
 	struct drr_class *cl = (struct drr_class *)arg;
 
 	if (new == NULL) {
-		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+		new = qdisc_create_dflt(sch->dev_queue,
 					&pfifo_qdisc_ops, cl->common.classid);
 		if (new == NULL)
 			new = &noop_qdisc;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 63d41f86679c..1d295d62bb5c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 		sch, p, new, old);
 
 	if (new == NULL) {
-		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					&pfifo_qdisc_ops,
+		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					sch->handle);
 		if (new == NULL)
 			new = &noop_qdisc;
@@ -384,8 +383,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	p->default_index = default_index;
 	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
 
-	p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-				 &pfifo_qdisc_ops, sch->handle);
+	p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
 	if (p->q == NULL)
 		p->q = &noop_qdisc;
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 5948bafa8ce2..4dfecb0cba37 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -172,8 +172,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 	struct Qdisc *q;
 	int err = -ENOMEM;
 
-	q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-			      ops, TC_H_MAKE(sch->handle, 1));
+	q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
 	if (q) {
 		err = fifo_set_limit(q, limit);
 		if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0abcc492fbf9..5dbb3cd96e59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -576,10 +576,8 @@ errout:
 	return ERR_PTR(err);
 }
 
-struct Qdisc * qdisc_create_dflt(struct net_device *dev,
-				 struct netdev_queue *dev_queue,
-				 struct Qdisc_ops *ops,
-				 unsigned int parentid)
+struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
+				struct Qdisc_ops *ops, unsigned int parentid)
 {
 	struct Qdisc *sch;
 
@@ -684,7 +682,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 	struct Qdisc *qdisc;
 
 	if (dev->tx_queue_len) {
-		qdisc = qdisc_create_dflt(dev, dev_queue,
+		qdisc = qdisc_create_dflt(dev_queue,
 					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
@@ -711,7 +709,7 @@ static void attach_default_qdiscs(struct net_device *dev)
 		dev->qdisc = txq->qdisc_sleeping;
 		atomic_inc(&dev->qdisc->refcnt);
 	} else {
-		qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
+		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
 		if (qdisc) {
 			qdisc->ops->attach(qdisc);
 			dev->qdisc = qdisc;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 47496098d35c..069c62b7bb36 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1088,7 +1088,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
-	cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, classid);
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
@@ -1209,8 +1209,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (cl->level > 0)
 		return -EINVAL;
 	if (new == NULL) {
-		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					&pfifo_qdisc_ops,
+		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					cl->cl_common.classid);
 		if (new == NULL)
 			new = &noop_qdisc;
@@ -1452,8 +1451,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
 	q->root.sched   = q;
-	q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					  &pfifo_qdisc_ops,
+	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					  sch->handle);
 	if (q->root.qdisc == NULL)
 		q->root.qdisc = &noop_qdisc;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 87d944941867..01b519d6c52d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1121,8 +1121,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (cl->level)
 		return -EINVAL;
 	if (new == NULL &&
-	    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-				     &pfifo_qdisc_ops,
+	    (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 				     cl->common.classid)) == NULL)
 		return -ENOBUFS;
 
@@ -1247,8 +1246,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 		return -EBUSY;
 
 	if (!cl->level && htb_parent_last_child(cl)) {
-		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-					  &pfifo_qdisc_ops,
+		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					  cl->parent->common.classid);
 		last_child = 1;
 	}
@@ -1377,7 +1375,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
 		   -- thanks to Karlis Peisenieks */
-		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+		new_q = qdisc_create_dflt(sch->dev_queue,
 					  &pfifo_qdisc_ops, classid);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index fe91e50f9d98..ecc302f4d2a1 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -56,7 +56,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
 		dev_queue = netdev_get_tx_queue(dev, ntx);
-		qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
 					  TC_H_MAKE(TC_H_MAJ(sch->handle),
 						    TC_H_MIN(ntx + 1)));
 		if (qdisc == NULL)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 6ae251279fc2..32690deab5d0 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -227,8 +227,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child, *old;
-			child = qdisc_create_dflt(qdisc_dev(sch),
-						  sch->dev_queue,
+			child = qdisc_create_dflt(sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle,
 							    i + 1));
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4714ff162bbd..e5593c083a78 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -538,8 +538,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 
-	q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
-				     &tfifo_qdisc_ops,
+	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
 	if (!q->qdisc) {
 		pr_debug("netem: qdisc create failed\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0748fb1e3a49..b1c95bce33ce 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -200,7 +200,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	for (i=0; i<q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child, *old;
-			child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+			child = qdisc_create_dflt(sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
 			if (child) {
-- 
cgit v1.2.3


From 106e4c26b1529e559d1aae777f11b4f8f7bafc26 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 12:45:14 +0200
Subject: tproxy: kick out TIME_WAIT sockets in case a new connection comes in
 with the same tuple

Without tproxy redirections an incoming SYN kicks out conflicting
TIME_WAIT sockets, in order to handle clients that reuse ports
within the TIME_WAIT period.

The same mechanism didn't work in case TProxy is involved in finding
the proper socket, as the time_wait processing code looked up the
listening socket assuming that the listener addr/port matches those
of the established connection.

This is not the case with TProxy as the listener addr/port is possibly
changed with the tproxy rule.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_tproxy_core.h |  6 ++-
 net/netfilter/nf_tproxy_core.c         | 29 +++++++++++----
 net/netfilter/xt_TPROXY.c              | 68 +++++++++++++++++++++++++++++++---
 net/netfilter/xt_socket.c              |  2 +-
 4 files changed, 90 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index 208b46f4d6d2..b3a8942a4e6a 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -8,12 +8,16 @@
 #include <net/inet_sock.h>
 #include <net/tcp.h>
 
+#define NFT_LOOKUP_ANY         0
+#define NFT_LOOKUP_LISTENER    1
+#define NFT_LOOKUP_ESTABLISHED 2
+
 /* look up and get a reference to a matching socket */
 extern struct sock *
 nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
-		      const struct net_device *in, bool listening);
+		      const struct net_device *in, int lookup_type);
 
 static inline void
 nf_tproxy_put_sock(struct sock *sk)
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 5490fc37c92d..8589e5e08115 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -22,21 +22,34 @@ struct sock *
 nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
-		      const struct net_device *in, bool listening_only)
+		      const struct net_device *in, int lookup_type)
 {
 	struct sock *sk;
 
 	/* look up socket */
 	switch (protocol) {
 	case IPPROTO_TCP:
-		if (listening_only)
-			sk = __inet_lookup_listener(net, &tcp_hashinfo,
-						    daddr, ntohs(dport),
-						    in->ifindex);
-		else
+		switch (lookup_type) {
+		case NFT_LOOKUP_ANY:
 			sk = __inet_lookup(net, &tcp_hashinfo,
 					   saddr, sport, daddr, dport,
 					   in->ifindex);
+			break;
+		case NFT_LOOKUP_LISTENER:
+			sk = inet_lookup_listener(net, &tcp_hashinfo,
+						    daddr, dport,
+						    in->ifindex);
+			break;
+		case NFT_LOOKUP_ESTABLISHED:
+			sk = inet_lookup_established(net, &tcp_hashinfo,
+						    saddr, sport, daddr, dport,
+						    in->ifindex);
+			break;
+		default:
+			WARN_ON(1);
+			sk = NULL;
+			break;
+		}
 		break;
 	case IPPROTO_UDP:
 		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
@@ -47,8 +60,8 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 		sk = NULL;
 	}
 
-	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
-		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
+	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
 
 	return sk;
 }
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 21bb2aff6b8f..e0b6900a92c1 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -24,6 +24,57 @@
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 #include <net/netfilter/nf_tproxy_core.h>
 
+/**
+ * tproxy_handle_time_wait() - handle TCP TIME_WAIT reopen redirections
+ * @skb:	The skb being processed.
+ * @par:	Iptables target parameters.
+ * @sk:		The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * tproxy_handle_time_wait() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+static struct sock *
+tproxy_handle_time_wait(struct sk_buff *skb, const struct xt_action_param *par, struct sock *sk)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct xt_tproxy_target_info *tgi = par->targinfo;
+	struct tcphdr _hdr, *hp;
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		inet_twsk_put(inet_twsk(sk));
+		return NULL;
+	}
+
+	if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+		/* SYN to a TIME_WAIT socket, we'd rather redirect it
+		 * to a listener socket if there's one */
+		struct sock *sk2;
+
+		sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+					    iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
+					    hp->source, tgi->lport ? tgi->lport : hp->dest,
+					    par->in, NFT_LOOKUP_LISTENER);
+		if (sk2) {
+			/* yeah, there's one, let's kill the TIME_WAIT
+			 * socket and redirect to the listener
+			 */
+			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+			inet_twsk_put(inet_twsk(sk));
+			sk = sk2;
+		}
+	}
+
+	return sk;
+}
+
 static unsigned int
 tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -37,11 +88,18 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		return NF_DROP;
 
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
-				   iph->saddr,
-				   tgi->laddr ? tgi->laddr : iph->daddr,
-				   hp->source,
-				   tgi->lport ? tgi->lport : hp->dest,
-				   par->in, true);
+				   iph->saddr, iph->daddr,
+				   hp->source, hp->dest,
+				   par->in, NFT_LOOKUP_ESTABLISHED);
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT)
+		sk = tproxy_handle_time_wait(skb, par, sk);
+	else if (!sk)
+		sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+					   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
+					   hp->source, tgi->lport ? tgi->lport : hp->dest,
+					   par->in, NFT_LOOKUP_LISTENER);
 
 	/* NOTE: assign_sock consumes our sk reference */
 	if (sk && nf_tproxy_assign_sock(skb, sk)) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1ca89908cbad..266faa0a2fbf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -142,7 +142,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 #endif
 
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
-				   saddr, daddr, sport, dport, par->in, false);
+				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
 	if (sk != NULL) {
 		bool wildcard;
 		bool transparent = true;
-- 
cgit v1.2.3


From 6006db84a91838813cdad8a6622a4e39efe9ea47 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 12:47:34 +0200
Subject: tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4()

Also, inline this function as the lookup_type is always a literal
and inlining removes branches performed at runtime.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_tproxy_core.h | 116 ++++++++++++++++++++++++++++++++-
 net/netfilter/nf_tproxy_core.c         |  48 --------------
 2 files changed, 114 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index b3a8942a4e6a..1027d7f901a0 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -13,11 +13,123 @@
 #define NFT_LOOKUP_ESTABLISHED 2
 
 /* look up and get a reference to a matching socket */
-extern struct sock *
+
+
+/* This function is used by the 'TPROXY' target and the 'socket'
+ * match. The following lookups are supported:
+ *
+ * Explicit TProxy target rule
+ * ===========================
+ *
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple, it is returned, assuming the redirection
+ *     already took place and we process a packet belonging to an
+ *     established connection
+ *
+ *   - match: if there's a listening socket matching the redirection
+ *     (e.g. on-port & on-ip of the connection), it is returned,
+ *     regardless if it was bound to 0.0.0.0 or an explicit
+ *     address. The reasoning is that if there's an explicit rule, it
+ *     does not really matter if the listener is bound to an interface
+ *     or to 0. The user already stated that he wants redirection
+ *     (since he added the rule).
+ *
+ * "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ *   - match: if there's a fully established connection matching the
+ *     _packet_ tuple
+ *
+ *   - match: if there's a non-zero bound listener (possibly with a
+ *     non-local address) We don't accept zero-bound listeners, since
+ *     then local services could intercept traffic going through the
+ *     box.
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
 nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
-		      const struct net_device *in, int lookup_type);
+		      const struct net_device *in, int lookup_type)
+{
+	struct sock *sk;
+
+	/* look up socket */
+	switch (protocol) {
+	case IPPROTO_TCP:
+		switch (lookup_type) {
+		case NFT_LOOKUP_ANY:
+			sk = __inet_lookup(net, &tcp_hashinfo,
+					   saddr, sport, daddr, dport,
+					   in->ifindex);
+			break;
+		case NFT_LOOKUP_LISTENER:
+			sk = inet_lookup_listener(net, &tcp_hashinfo,
+						    daddr, dport,
+						    in->ifindex);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too */
+
+			break;
+		case NFT_LOOKUP_ESTABLISHED:
+			sk = inet_lookup_established(net, &tcp_hashinfo,
+						    saddr, sport, daddr, dport,
+						    in->ifindex);
+			break;
+		default:
+			WARN_ON(1);
+			sk = NULL;
+			break;
+		}
+		break;
+	case IPPROTO_UDP:
+		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		if (sk && lookup_type != NFT_LOOKUP_ANY) {
+			int connected = (sk->sk_state == TCP_ESTABLISHED);
+			int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too */
+			if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+			    (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+				sock_put(sk);
+				sk = NULL;
+			}
+		}
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+	return sk;
+}
+
 
 static inline void
 nf_tproxy_put_sock(struct sock *sk)
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 8589e5e08115..db655638d76d 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -18,54 +18,6 @@
 #include <net/udp.h>
 #include <net/netfilter/nf_tproxy_core.h>
 
-struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
-		      const __be32 saddr, const __be32 daddr,
-		      const __be16 sport, const __be16 dport,
-		      const struct net_device *in, int lookup_type)
-{
-	struct sock *sk;
-
-	/* look up socket */
-	switch (protocol) {
-	case IPPROTO_TCP:
-		switch (lookup_type) {
-		case NFT_LOOKUP_ANY:
-			sk = __inet_lookup(net, &tcp_hashinfo,
-					   saddr, sport, daddr, dport,
-					   in->ifindex);
-			break;
-		case NFT_LOOKUP_LISTENER:
-			sk = inet_lookup_listener(net, &tcp_hashinfo,
-						    daddr, dport,
-						    in->ifindex);
-			break;
-		case NFT_LOOKUP_ESTABLISHED:
-			sk = inet_lookup_established(net, &tcp_hashinfo,
-						    saddr, sport, daddr, dport,
-						    in->ifindex);
-			break;
-		default:
-			WARN_ON(1);
-			sk = NULL;
-			break;
-		}
-		break;
-	case IPPROTO_UDP:
-		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
-				     in->ifindex);
-		break;
-	default:
-		WARN_ON(1);
-		sk = NULL;
-	}
-
-	pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
-		 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
-
-	return sk;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
 
 static void
 nf_tproxy_destructor(struct sk_buff *skb)
-- 
cgit v1.2.3


From 093d282321daeb19c107e5f1f16d7f68484f3ade Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 13:06:43 +0200
Subject: tproxy: fix hash locking issue when using port redirection in
 __inet_inherit_port()

When __inet_inherit_port() is called on a tproxy connection the wrong locks are
held for the inet_bind_bucket it is added to. __inet_inherit_port() made an
implicit assumption that the listener's port number (and thus its bind bucket).
Unfortunately, if you're using the TPROXY target to redirect skbs to a
transparent proxy that assumption is not true anymore and things break.

This patch adds code to __inet_inherit_port() so that it can handle this case
by looking up or creating a new bind bucket for the child socket and updates
callers of __inet_inherit_port() to gracefully handle __inet_inherit_port()
failing.

Reported by and original patch from Stephen Buck <stephen.buck@exinda.com>.
See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion.

Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/inet_hashtables.h |  2 +-
 net/dccp/ipv4.c               | 10 +++++++---
 net/dccp/ipv6.c               | 10 +++++++---
 net/ipv4/inet_hashtables.c    | 28 ++++++++++++++++++++++++++--
 net/ipv4/tcp_ipv4.c           | 10 +++++++---
 net/ipv6/tcp_ipv6.c           | 12 ++++++++----
 6 files changed, 56 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 74358d1b3f43..e9c2ed8af864 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk)
 }
 
 /* Caller must disable local BH processing. */
-extern void __inet_inherit_port(struct sock *sk, struct sock *child);
+extern int __inet_inherit_port(struct sock *sk, struct sock *child);
 
 extern void inet_put_port(struct sock *sk);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f391..3f69ea114829 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk = dccp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
-		goto exit;
+		goto exit_nonewsk;
 
 	sk_setup_caps(newsk, dst);
 
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
+	if (__inet_inherit_port(sk, newsk) < 0) {
+		sock_put(newsk);
+		goto exit;
+	}
 	__inet_hash_nolisten(newsk, NULL);
-	__inet_inherit_port(sk, newsk);
 
 	return newsk;
 
 exit_overflow:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+exit_nonewsk:
+	dst_release(dst);
 exit:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-	dst_release(dst);
 	return NULL;
 }
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df7..dca711df9b60 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	newsk = dccp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
-		goto out;
+		goto out_nonewsk;
 
 	/*
 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
+	if (__inet_inherit_port(sk, newsk) < 0) {
+		sock_put(newsk);
+		goto out;
+	}
 	__inet6_hash(newsk, NULL);
-	__inet_inherit_port(sk, newsk);
 
 	return newsk;
 
 out_overflow:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+out_nonewsk:
+	dst_release(dst);
 out:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	if (opt != NULL && opt != np->opt)
 		sock_kfree_s(sk, opt, opt->tot_len);
-	dst_release(dst);
 	return NULL;
 }
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff3..1b344f30b463 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_put_port);
 
-void __inet_inherit_port(struct sock *sk, struct sock *child)
+int __inet_inherit_port(struct sock *sk, struct sock *child)
 {
 	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
-	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
+	unsigned short port = inet_sk(child)->inet_num;
+	const int bhash = inet_bhashfn(sock_net(sk), port,
 			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
 	struct inet_bind_bucket *tb;
 
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
+	if (tb->port != port) {
+		/* NOTE: using tproxy and redirecting skbs to a proxy
+		 * on a different listener port breaks the assumption
+		 * that the listener socket's icsk_bind_hash is the same
+		 * as that of the child socket. We have to look up or
+		 * create a new bind bucket for the child here. */
+		struct hlist_node *node;
+		inet_bind_bucket_for_each(tb, node, &head->chain) {
+			if (net_eq(ib_net(tb), sock_net(sk)) &&
+			    tb->port == port)
+				break;
+		}
+		if (!node) {
+			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+						     sock_net(sk), head, port);
+			if (!tb) {
+				spin_unlock(&head->lock);
+				return -ENOMEM;
+			}
+		}
+	}
 	sk_add_bind_node(child, &tb->owners);
 	inet_csk(child)->icsk_bind_hash = tb;
 	spin_unlock(&head->lock);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a0232f3a358b..8f8527d41682 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk = tcp_create_openreq_child(sk, req, skb);
 	if (!newsk)
-		goto exit;
+		goto exit_nonewsk;
 
 	newsk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(newsk, dst);
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
+	if (__inet_inherit_port(sk, newsk) < 0) {
+		sock_put(newsk);
+		goto exit;
+	}
 	__inet_hash_nolisten(newsk, NULL);
-	__inet_inherit_port(sk, newsk);
 
 	return newsk;
 
 exit_overflow:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+exit_nonewsk:
+	dst_release(dst);
 exit:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-	dst_release(dst);
 	return NULL;
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..ba5258ef1c57 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newsk = tcp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
-		goto out;
+		goto out_nonewsk;
 
 	/*
 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
+	if (__inet_inherit_port(sk, newsk) < 0) {
+		sock_put(newsk);
+		goto out;
+	}
 	__inet6_hash(newsk, NULL);
-	__inet_inherit_port(sk, newsk);
 
 	return newsk;
 
 out_overflow:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-out:
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+out_nonewsk:
 	if (opt && opt != np->opt)
 		sock_kfree_s(sk, opt, opt->tot_len);
 	dst_release(dst);
+out:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 8c974438085d2c81b006daeaab8801eedbd19758 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 21 Oct 2010 01:06:15 +0000
Subject: Revert c6537d6742985da1fbf12ae26cde6a096fd35b5c

Backout the tipc changes to the flags int he subscription message.  These
changees, while reasonable on the surface, interefere with user space ABI
compatibility which is a no-no.  This was part of the changes to fix the
endianess issues in the TIPC protocol, which would be really nice to do but we
need to do so in a way that is backwards compatible with user space.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h | 30 ++++++++++++++++++------------
 net/tipc/subscr.c    | 15 +++++----------
 2 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index 181c8d0e6f73..d10614b29d59 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -127,17 +127,23 @@ static inline unsigned int tipc_node(__u32 addr)
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_SERVICE     	0x00  	/* Filter for service availability    */
-#define TIPC_SUB_PORTS     	0x01  	/* Filter for port availability  */
-#define TIPC_SUB_CANCEL         0x04    /* Cancel a subscription         */
+#define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
+#define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
+#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
+#if 0
+/* The following filter options are not currently implemented */
+#define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
+#define TIPC_SUB_NO_UNBIND_EVTS	0x08	/* filter out "withdraw" events */
+#define TIPC_SUB_SINGLE_EVT	0x10	/* expire after first event */
+#endif
 
 #define TIPC_WAIT_FOREVER	~0	/* timeout for permanent subscription */
 
 struct tipc_subscr {
-	struct tipc_name_seq seq;	/* NBO. Name sequence of interest */
-	__u32 timeout;			/* NBO. Subscription duration (in ms) */
-        __u32 filter;   		/* NBO. Bitmask of filter options */
-	char usr_handle[8];		/* Opaque. Available for subscriber use */
+	struct tipc_name_seq seq;	/* name sequence of interest */
+	__u32 timeout;			/* subscription duration (in ms) */
+        __u32 filter;   		/* bitmask of filter options */
+	char usr_handle[8];		/* available for subscriber use */
 };
 
 #define TIPC_PUBLISHED		1	/* publication event */
@@ -145,11 +151,11 @@ struct tipc_subscr {
 #define TIPC_SUBSCR_TIMEOUT	3	/* subscription timeout event */
 
 struct tipc_event {
-	__u32 event;			/* NBO. Event type, as defined above */
-	__u32 found_lower;		/* NBO. Matching name seq instances  */
-	__u32 found_upper;		/*  "      "       "   "    "        */
-	struct tipc_portid port;	/* NBO. Associated port              */
-	struct tipc_subscr s;		/* Original, associated subscription */
+	__u32 event;			/* event type */
+	__u32 found_lower;		/* matching name seq instances */
+	__u32 found_upper;		/*    "      "    "     "      */
+	struct tipc_portid port;	/* associated port */
+	struct tipc_subscr s;		/* associated subscription */
 };
 
 /*
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e2..ff123e56114a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
-	__u32 type, lower, upper, timeout, filter;
+	__u32 type, lower, upper;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
@@ -282,18 +282,12 @@ static void subscr_cancel(struct tipc_subscr *s,
 	type = ntohl(s->seq.type);
 	lower = ntohl(s->seq.lower);
 	upper = ntohl(s->seq.upper);
-	timeout = ntohl(s->timeout);
-	filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
 
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
 			if ((type == sub->seq.type) &&
 			    (lower == sub->seq.lower) &&
-			    (upper == sub->seq.upper) &&
-			    (timeout == sub->timeout) &&
-                            (filter == sub->filter) &&
-                             !memcmp(s->usr_handle,sub->evt.s.usr_handle,
-				     sizeof(s->usr_handle)) ){
+			    (upper == sub->seq.upper)) {
 				found = 1;
 				break;
 			}
@@ -310,7 +304,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 		k_term_timer(&sub->timer);
 		spin_lock_bh(subscriber->lock);
 	}
-	dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
+	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
 	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 	subscr_del(sub);
 }
@@ -358,7 +352,8 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.upper = ntohl(s->seq.upper);
 	sub->timeout = ntohl(s->timeout);
 	sub->filter = ntohl(s->filter);
-	if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
+	if ((!(sub->filter & TIPC_SUB_PORTS) ==
+	     !(sub->filter & TIPC_SUB_SERVICE)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
-- 
cgit v1.2.3


From d0c2b0d265a0f1f92922a99a31def9da582197ac Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 19 Oct 2010 07:12:10 +0000
Subject: napi: unexport napi_reuse_skb

The function napi_reuse_skb is only used inside core.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 --
 net/core/dev.c            | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ed7db7eebbf3..fcd3dda86322 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1748,8 +1748,6 @@ extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
 extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
-extern void		napi_reuse_skb(struct napi_struct *napi,
-				       struct sk_buff *skb);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
 extern gro_result_t	napi_frags_finish(struct napi_struct *napi,
 					  struct sk_buff *skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 97fd6bc2004c..b2269ac04cb8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3301,7 +3301,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
 	__skb_pull(skb, skb_headlen(skb));
 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
@@ -3309,7 +3309,6 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 
 	napi->skb = skb;
 }
-EXPORT_SYMBOL(napi_reuse_skb);
 
 struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
-- 
cgit v1.2.3


From 32a875adcdcf5f470bf967250cfd01722e23844f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 19 Oct 2010 06:48:16 +0000
Subject: 9p: client code cleanup

Make p9_client_version static since only used in one file.
Remove p9_client_auth because it is defined but never used.
Compile tested only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/9p/client.h |  3 ---
 net/9p/client.c         | 51 +------------------------------------------------
 2 files changed, 1 insertion(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index d1aa2cfb30f0..7f63d5ab7b44 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -212,15 +212,12 @@ struct p9_dirent {
 
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
 int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name);
-int p9_client_version(struct p9_client *);
 struct p9_client *p9_client_create(const char *dev_name, char *options);
 void p9_client_destroy(struct p9_client *clnt);
 void p9_client_disconnect(struct p9_client *clnt);
 void p9_client_begin_disconnect(struct p9_client *clnt);
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 					char *uname, u32 n_uname, char *aname);
-struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname,
-						u32 n_uname, char *aname);
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 								int clone);
 int p9_client_open(struct p9_fid *fid, int mode);
diff --git a/net/9p/client.c b/net/9p/client.c
index b5e1aa8d718e..83bf0541d66f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -671,7 +671,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
 	kfree(fid);
 }
 
-int p9_client_version(struct p9_client *c)
+static int p9_client_version(struct p9_client *c)
 {
 	int err = 0;
 	struct p9_req_t *req;
@@ -730,7 +730,6 @@ error:
 
 	return err;
 }
-EXPORT_SYMBOL(p9_client_version);
 
 struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
@@ -887,54 +886,6 @@ error:
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *
-p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
-{
-	int err;
-	struct p9_req_t *req;
-	struct p9_qid qid;
-	struct p9_fid *afid;
-
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
-	err = 0;
-
-	afid = p9_fid_create(clnt);
-	if (IS_ERR(afid)) {
-		err = PTR_ERR(afid);
-		afid = NULL;
-		goto error;
-	}
-
-	req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
-			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto error;
-	}
-
-	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
-	if (err) {
-		p9pdu_dump(1, req->rc);
-		p9_free_req(clnt, req);
-		goto error;
-	}
-
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
-					qid.type,
-					(unsigned long long)qid.path,
-					qid.version);
-
-	memmove(&afid->qid, &qid, sizeof(struct p9_qid));
-	p9_free_req(clnt, req);
-	return afid;
-
-error:
-	if (afid)
-		p9_fid_destroy(afid);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(p9_client_auth);
-
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 	int clone)
 {
-- 
cgit v1.2.3


From 30e1f7a8122145f44f45c95366e27b6bb0b08428 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 2 Sep 2010 17:26:48 +0200
Subject: EDAC: Export edac sysfs class to users.

Move toplevel sysfs class to the stub and make it available to
non-modularized code too. Add proper refcounting of its users and move
the registration functionality into the reference counting routines.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 drivers/edac/edac_device_sysfs.c | 16 +++++---
 drivers/edac/edac_mc_sysfs.c     |  9 +++--
 drivers/edac/edac_module.c       | 79 +---------------------------------------
 drivers/edac/edac_module.h       |  1 -
 drivers/edac/edac_pci_sysfs.c    | 10 +++--
 drivers/edac/edac_stub.c         | 51 ++++++++++++++++++++++++--
 include/linux/edac.h             |  4 ++
 7 files changed, 75 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 070968178a24..2941dca91aae 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -13,6 +13,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/edac.h>
 
 #include "edac_core.h"
 #include "edac_module.h"
@@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
 	debugf1("%s()\n", __func__);
 
 	/* get the /sys/devices/system/edac reference */
-	edac_class = edac_get_edac_class();
+	edac_class = edac_get_sysfs_class();
 	if (edac_class == NULL) {
 		debugf1("%s() no edac_class error\n", __func__);
 		err = -ENODEV;
@@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
 
 	if (!try_module_get(edac_dev->owner)) {
 		err = -ENODEV;
-		goto err_out;
+		goto err_mod_get;
 	}
 
 	/* register */
@@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
 err_kobj_reg:
 	module_put(edac_dev->owner);
 
+err_mod_get:
+	edac_put_sysfs_class();
+
 err_out:
 	return err;
 }
@@ -290,12 +294,11 @@ err_out:
  * edac_device_unregister_sysfs_main_kobj:
  *	the '..../edac/<name>' kobject
  */
-void edac_device_unregister_sysfs_main_kobj(
-					struct edac_device_ctl_info *edac_dev)
+void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
 {
 	debugf0("%s()\n", __func__);
 	debugf4("%s() name of kobject is: %s\n",
-		__func__, kobject_name(&edac_dev->kobj));
+		__func__, kobject_name(&dev->kobj));
 
 	/*
 	 * Unregister the edac device's kobject and
@@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj(
 	 *   a) module_put() this module
 	 *   b) 'kfree' the memory
 	 */
-	kobject_put(&edac_dev->kobj);
+	kobject_put(&dev->kobj);
+	edac_put_sysfs_class();
 }
 
 /* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index aa93ad82ee07..a4135860149b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -11,6 +11,7 @@
 
 #include <linux/ctype.h>
 #include <linux/slab.h>
+#include <linux/edac.h>
 #include <linux/bug.h>
 
 #include "edac_core.h"
@@ -1017,7 +1018,7 @@ int edac_sysfs_setup_mc_kset(void)
 	debugf1("%s()\n", __func__);
 
 	/* get the /sys/devices/system/edac class reference */
-	edac_class = edac_get_edac_class();
+	edac_class = edac_get_sysfs_class();
 	if (edac_class == NULL) {
 		debugf1("%s() no edac_class error=%d\n", __func__, err);
 		goto fail_out;
@@ -1028,15 +1029,16 @@ int edac_sysfs_setup_mc_kset(void)
 	if (!mc_kset) {
 		err = -ENOMEM;
 		debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
-		goto fail_out;
+		goto fail_kset;
 	}
 
 	debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
 
 	return 0;
 
+fail_kset:
+	edac_put_sysfs_class();
 
-	/* error unwind stack */
 fail_out:
 	return err;
 }
@@ -1049,5 +1051,6 @@ fail_out:
 void edac_sysfs_teardown_mc_kset(void)
 {
 	kset_unregister(mc_kset);
+	edac_put_sysfs_class();
 }
 
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 7e1374afd967..be4b075c3098 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -26,15 +26,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level);
 /* scope is to module level only */
 struct workqueue_struct *edac_workqueue;
 
-/*
- * sysfs object: /sys/devices/system/edac
- *	need to export to other files in this modules
- */
-static struct sysdev_class edac_class = {
-	.name = "edac",
-};
-static int edac_class_valid;
-
 /*
  * edac_op_state_to_string()
  */
@@ -54,60 +45,6 @@ char *edac_op_state_to_string(int opstate)
 	return "UNKNOWN";
 }
 
-/*
- * edac_get_edac_class()
- *
- *	return pointer to the edac class of 'edac'
- */
-struct sysdev_class *edac_get_edac_class(void)
-{
-	struct sysdev_class *classptr = NULL;
-
-	if (edac_class_valid)
-		classptr = &edac_class;
-
-	return classptr;
-}
-
-/*
- * edac_register_sysfs_edac_name()
- *
- *	register the 'edac' into /sys/devices/system
- *
- * return:
- *	0  success
- *	!0 error
- */
-static int edac_register_sysfs_edac_name(void)
-{
-	int err;
-
-	/* create the /sys/devices/system/edac directory */
-	err = sysdev_class_register(&edac_class);
-
-	if (err) {
-		debugf1("%s() error=%d\n", __func__, err);
-		return err;
-	}
-
-	edac_class_valid = 1;
-	return 0;
-}
-
-/*
- * sysdev_class_unregister()
- *
- *	unregister the 'edac' from /sys/devices/system
- */
-static void edac_unregister_sysfs_edac_name(void)
-{
-	/* only if currently registered, then unregister it */
-	if (edac_class_valid)
-		sysdev_class_unregister(&edac_class);
-
-	edac_class_valid = 0;
-}
-
 /*
  * edac_workqueue_setup
  *	initialize the edac work queue for polling operations
@@ -153,22 +90,12 @@ static int __init edac_init(void)
 	 */
 	edac_pci_clear_parity_errors();
 
-	/*
-	 * perform the registration of the /sys/devices/system/edac class object
-	 */
-	if (edac_register_sysfs_edac_name()) {
-		edac_printk(KERN_ERR, EDAC_MC,
-			"Error initializing 'edac' kobject\n");
-		err = -ENODEV;
-		goto error;
-	}
-
 	/*
 	 * now set up the mc_kset under the edac class object
 	 */
 	err = edac_sysfs_setup_mc_kset();
 	if (err)
-		goto sysfs_setup_fail;
+		goto error;
 
 	/* Setup/Initialize the workq for this core */
 	err = edac_workqueue_setup();
@@ -183,9 +110,6 @@ static int __init edac_init(void)
 workq_fail:
 	edac_sysfs_teardown_mc_kset();
 
-sysfs_setup_fail:
-	edac_unregister_sysfs_edac_name();
-
 error:
 	return err;
 }
@@ -201,7 +125,6 @@ static void __exit edac_exit(void)
 	/* tear down the various subsystems */
 	edac_workqueue_teardown();
 	edac_sysfs_teardown_mc_kset();
-	edac_unregister_sysfs_edac_name();
 }
 
 /*
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 233d4798c3aa..17aabb7b90ec 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj(
 				struct edac_device_ctl_info *edac_dev);
 extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
 extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
-extern struct sysdev_class *edac_get_edac_class(void);
 
 /* edac core workqueue: single CPU mode */
 extern struct workqueue_struct *edac_workqueue;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c39697df9cb4..023b01cb5175 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -7,7 +7,7 @@
  *
  */
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/edac.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
 
@@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void)
 	/* First time, so create the main kobject and its
 	 * controls and atributes
 	 */
-	edac_class = edac_get_edac_class();
+	edac_class = edac_get_sysfs_class();
 	if (edac_class == NULL) {
 		debugf1("%s() no edac_class\n", __func__);
 		err = -ENODEV;
@@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void)
 	if (!try_module_get(THIS_MODULE)) {
 		debugf1("%s() try_module_get() failed\n", __func__);
 		err = -ENODEV;
-		goto decrement_count_fail;
+		goto mod_get_fail;
 	}
 
 	edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -403,6 +403,9 @@ kobject_init_and_add_fail:
 kzalloc_fail:
 	module_put(THIS_MODULE);
 
+mod_get_fail:
+	edac_put_sysfs_class();
+
 decrement_count_fail:
 	/* if are on this error exit, nothing to tear down */
 	atomic_dec(&edac_pci_sysfs_refcount);
@@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
 			__func__);
 		kobject_put(edac_pci_top_main_kobj);
 	}
+	edac_put_sysfs_class();
 }
 
 /*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 20b428aa155e..aab970760b75 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -3,10 +3,13 @@
  *
  * Author: Dave Jiang <djiang@mvista.com>
  *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2007 (c) MontaVista Software, Inc.
+ * 2010 (c) Advanced Micro Devices Inc.
+ *	    Borislav Petkov <borislav.petkov@amd.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
  *
  */
 #include <linux/module.h>
@@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers);
 int edac_err_assert = 0;
 EXPORT_SYMBOL_GPL(edac_err_assert);
 
+static atomic_t edac_class_valid = ATOMIC_INIT(0);
+
 /*
  * called to determine if there is an EDAC driver interested in
  * knowing an event (such as NMI) occurred
@@ -44,3 +49,41 @@ void edac_atomic_assert_error(void)
 	edac_err_assert++;
 }
 EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
+
+/*
+ * sysfs object: /sys/devices/system/edac
+ *	need to export to other files
+ */
+struct sysdev_class edac_class = {
+	.name = "edac",
+};
+EXPORT_SYMBOL_GPL(edac_class);
+
+/* return pointer to the 'edac' node in sysfs */
+struct sysdev_class *edac_get_sysfs_class(void)
+{
+	int err = 0;
+
+	if (atomic_read(&edac_class_valid))
+		goto out;
+
+	/* create the /sys/devices/system/edac directory */
+	err = sysdev_class_register(&edac_class);
+	if (err) {
+		printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
+		return NULL;
+	}
+
+out:
+	atomic_inc(&edac_class_valid);
+	return &edac_class;
+}
+EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
+
+void edac_put_sysfs_class(void)
+{
+	/* last user unregisters it */
+	if (atomic_dec_and_test(&edac_class_valid))
+		sysdev_class_unregister(&edac_class);
+}
+EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 7cf92e8a4196..36c66443bdfd 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -13,6 +13,7 @@
 #define _LINUX_EDAC_H_
 
 #include <asm/atomic.h>
+#include <linux/sysdev.h>
 
 #define EDAC_OPSTATE_INVAL	-1
 #define EDAC_OPSTATE_POLL	0
@@ -22,9 +23,12 @@
 extern int edac_op_state;
 extern int edac_err_assert;
 extern atomic_t edac_handlers;
+extern struct sysdev_class edac_class;
 
 extern int edac_handler_set(void);
 extern void edac_atomic_assert_error(void);
+extern struct sysdev_class *edac_get_sysfs_class(void);
+extern void edac_put_sysfs_class(void);
 
 static inline void opstate_init(void)
 {
-- 
cgit v1.2.3


From 4651918a4afdd49bdea21d2f919b189ef17a6399 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Sat, 16 Oct 2010 19:56:28 -0300
Subject: [media] IR: extend ir_raw_event and do refactoring

Add new event types for timeout & carrier report
Move timeout handling from ir_raw_event_store_with_filter to
ir-lirc-codec, where it is really needed.
Now lirc bridge ensures proper gap handling.
Extend lirc bridge for carrier & timeout reports

Note: all new ir_raw_event variables now should be initialized
like that: DEFINE_IR_RAW_EVENT(ev);

To clean an existing event, use init_ir_raw_event(&ev);

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/ene_ir.c            |   4 +-
 drivers/media/IR/ir-core-priv.h      |  13 +++-
 drivers/media/IR/ir-jvc-decoder.c    |   5 +-
 drivers/media/IR/ir-lirc-codec.c     | 128 ++++++++++++++++++++++++-----------
 drivers/media/IR/ir-nec-decoder.c    |   5 +-
 drivers/media/IR/ir-raw-event.c      |  45 +++++-------
 drivers/media/IR/ir-rc5-decoder.c    |   5 +-
 drivers/media/IR/ir-rc5-sz-decoder.c |   5 +-
 drivers/media/IR/ir-rc6-decoder.c    |   5 +-
 drivers/media/IR/ir-sony-decoder.c   |   5 +-
 drivers/media/IR/mceusb.c            |   3 +-
 drivers/media/IR/nuvoton-cir.c       |   5 +-
 drivers/media/IR/streamzap.c         |   6 +-
 include/media/ir-core.h              |  40 +++++++++--
 14 files changed, 181 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/drivers/media/IR/ene_ir.c b/drivers/media/IR/ene_ir.c
index f5beea04906a..0795872ff5de 100644
--- a/drivers/media/IR/ene_ir.c
+++ b/drivers/media/IR/ene_ir.c
@@ -697,7 +697,7 @@ static irqreturn_t ene_isr(int irq, void *data)
 	unsigned long flags;
 	irqreturn_t retval = IRQ_NONE;
 	struct ene_device *dev = (struct ene_device *)data;
-	struct ir_raw_event ev;
+	DEFINE_IR_RAW_EVENT(ev);
 
 	spin_lock_irqsave(&dev->hw_lock, flags);
 
@@ -898,7 +898,7 @@ static int ene_set_learning_mode(void *data, int enable)
 }
 
 /* outside interface: enable or disable idle mode */
-static void ene_rx_set_idle(void *data, int idle)
+static void ene_rx_set_idle(void *data, bool idle)
 {
 	struct ene_device *dev = (struct ene_device *)data;
 
diff --git a/drivers/media/IR/ir-core-priv.h b/drivers/media/IR/ir-core-priv.h
index 6830580ae4db..81c936bd793f 100644
--- a/drivers/media/IR/ir-core-priv.h
+++ b/drivers/media/IR/ir-core-priv.h
@@ -88,6 +88,12 @@ struct ir_raw_event_ctrl {
 		struct ir_input_dev *ir_dev;
 		struct lirc_driver *drv;
 		int carrier_low;
+
+		ktime_t gap_start;
+		u64 gap_duration;
+		bool gap;
+		bool send_timeout_reports;
+
 	} lirc;
 };
 
@@ -115,9 +121,14 @@ static inline void decrease_duration(struct ir_raw_event *ev, unsigned duration)
 		ev->duration -= duration;
 }
 
+/* Returns true if event is normal pulse/space event */
+static inline bool is_timing_event(struct ir_raw_event ev)
+{
+	return !ev.carrier_report && !ev.reset;
+}
+
 #define TO_US(duration)			DIV_ROUND_CLOSEST((duration), 1000)
 #define TO_STR(is_pulse)		((is_pulse) ? "pulse" : "space")
-#define IS_RESET(ev)			(ev.duration == 0)
 /*
  * Routines from ir-sysfs.c - Meant to be called only internally inside
  * ir-core
diff --git a/drivers/media/IR/ir-jvc-decoder.c b/drivers/media/IR/ir-jvc-decoder.c
index 77a89c4de014..63dca6e5458b 100644
--- a/drivers/media/IR/ir-jvc-decoder.c
+++ b/drivers/media/IR/ir-jvc-decoder.c
@@ -50,8 +50,9 @@ static int ir_jvc_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_JVC))
 		return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/ir-lirc-codec.c b/drivers/media/IR/ir-lirc-codec.c
index 20ac9a4ce522..1d9c6b0fd0ea 100644
--- a/drivers/media/IR/ir-lirc-codec.c
+++ b/drivers/media/IR/ir-lirc-codec.c
@@ -32,6 +32,7 @@
 static int ir_lirc_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 {
 	struct ir_input_dev *ir_dev = input_get_drvdata(input_dev);
+	struct lirc_codec *lirc = &ir_dev->raw->lirc;
 	int sample;
 
 	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_LIRC))
@@ -40,21 +41,57 @@ static int ir_lirc_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 	if (!ir_dev->raw->lirc.drv || !ir_dev->raw->lirc.drv->rbuf)
 		return -EINVAL;
 
-	if (IS_RESET(ev))
+	/* Packet start */
+	if (ev.reset)
 		return 0;
 
-	IR_dprintk(2, "LIRC data transfer started (%uus %s)\n",
-		   TO_US(ev.duration), TO_STR(ev.pulse));
+	/* Carrier reports */
+	if (ev.carrier_report) {
+		sample = LIRC_FREQUENCY(ev.carrier);
+
+	/* Packet end */
+	} else if (ev.timeout) {
+
+		if (lirc->gap)
+			return 0;
+
+		lirc->gap_start = ktime_get();
+		lirc->gap = true;
+		lirc->gap_duration = ev.duration;
+
+		if (!lirc->send_timeout_reports)
+			return 0;
+
+		sample = LIRC_TIMEOUT(ev.duration / 1000);
 
-	sample = ev.duration / 1000;
-	if (ev.pulse)
-		sample |= PULSE_BIT;
+	/* Normal sample */
+	} else {
+
+		if (lirc->gap) {
+			int gap_sample;
+
+			lirc->gap_duration += ktime_to_ns(ktime_sub(ktime_get(),
+				lirc->gap_start));
+
+			/* Convert to ms and cap by LIRC_VALUE_MASK */
+			do_div(lirc->gap_duration, 1000);
+			lirc->gap_duration = min(lirc->gap_duration,
+							(u64)LIRC_VALUE_MASK);
+
+			gap_sample = LIRC_SPACE(lirc->gap_duration);
+			lirc_buffer_write(ir_dev->raw->lirc.drv->rbuf,
+						(unsigned char *) &gap_sample);
+			lirc->gap = false;
+		}
+
+		sample = ev.pulse ? LIRC_PULSE(ev.duration / 1000) :
+					LIRC_SPACE(ev.duration / 1000);
+	}
 
 	lirc_buffer_write(ir_dev->raw->lirc.drv->rbuf,
 			  (unsigned char *) &sample);
 	wake_up(&ir_dev->raw->lirc.drv->rbuf->wait_poll);
 
-
 	return 0;
 }
 
@@ -102,7 +139,7 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	struct ir_input_dev *ir_dev;
 	int ret = 0;
 	void *drv_data;
-	__u32 val = 0;
+	__u32 val = 0, tmp;
 
 	lirc = lirc_get_pdata(filep);
 	if (!lirc)
@@ -130,22 +167,20 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 	case LIRC_SET_SEND_MODE:
 		if (val != (LIRC_MODE_PULSE & LIRC_CAN_SEND_MASK))
 			return -EINVAL;
-		break;
+		return 0;
 
 	/* TX settings */
 	case LIRC_SET_TRANSMITTER_MASK:
-		if (ir_dev->props->s_tx_mask)
-			ret = ir_dev->props->s_tx_mask(drv_data, val);
-		else
+		if (!ir_dev->props->s_tx_mask)
 			return -EINVAL;
-		break;
+
+		return ir_dev->props->s_tx_mask(drv_data, val);
 
 	case LIRC_SET_SEND_CARRIER:
-		if (ir_dev->props->s_tx_carrier)
-			ir_dev->props->s_tx_carrier(drv_data, val);
-		else
+		if (!ir_dev->props->s_tx_carrier)
 			return -EINVAL;
-		break;
+
+		return ir_dev->props->s_tx_carrier(drv_data, val);
 
 	case LIRC_SET_SEND_DUTY_CYCLE:
 		if (!ir_dev->props->s_tx_duty_cycle)
@@ -154,39 +189,42 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 		if (val <= 0 || val >= 100)
 			return -EINVAL;
 
-		ir_dev->props->s_tx_duty_cycle(ir_dev->props->priv, val);
-		break;
+		return ir_dev->props->s_tx_duty_cycle(drv_data, val);
 
 	/* RX settings */
 	case LIRC_SET_REC_CARRIER:
-		if (ir_dev->props->s_rx_carrier_range)
-			ret = ir_dev->props->s_rx_carrier_range(
-				ir_dev->props->priv,
-				ir_dev->raw->lirc.carrier_low, val);
-		else
+		if (!ir_dev->props->s_rx_carrier_range)
 			return -ENOSYS;
 
-		if (!ret)
-			ir_dev->raw->lirc.carrier_low = 0;
-		break;
+		if (val <= 0)
+			return -EINVAL;
+
+		return ir_dev->props->s_rx_carrier_range(drv_data,
+			ir_dev->raw->lirc.carrier_low, val);
 
 	case LIRC_SET_REC_CARRIER_RANGE:
-		if (val >= 0)
-			ir_dev->raw->lirc.carrier_low = val;
-		break;
+		if (val <= 0)
+			return -EINVAL;
 
+		ir_dev->raw->lirc.carrier_low = val;
+		return 0;
 
 	case LIRC_GET_REC_RESOLUTION:
 		val = ir_dev->props->rx_resolution;
 		break;
 
 	case LIRC_SET_WIDEBAND_RECEIVER:
-		if (ir_dev->props->s_learning_mode)
-			return ir_dev->props->s_learning_mode(
-				ir_dev->props->priv, !!val);
-		else
+		if (!ir_dev->props->s_learning_mode)
 			return -ENOSYS;
 
+		return ir_dev->props->s_learning_mode(drv_data, !!val);
+
+	case LIRC_SET_MEASURE_CARRIER_MODE:
+		if (!ir_dev->props->s_carrier_report)
+			return -ENOSYS;
+
+		return ir_dev->props->s_carrier_report(drv_data, !!val);
+
 	/* Generic timeout support */
 	case LIRC_GET_MIN_TIMEOUT:
 		if (!ir_dev->props->max_timeout)
@@ -201,10 +239,20 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
 		break;
 
 	case LIRC_SET_REC_TIMEOUT:
-		if (val < ir_dev->props->min_timeout ||
-		    val > ir_dev->props->max_timeout)
-			return -EINVAL;
-		ir_dev->props->timeout = val * 1000;
+		if (!ir_dev->props->max_timeout)
+			return -ENOSYS;
+
+		tmp = val * 1000;
+
+		if (tmp < ir_dev->props->min_timeout ||
+			tmp > ir_dev->props->max_timeout)
+				return -EINVAL;
+
+		ir_dev->props->timeout = tmp;
+		break;
+
+	case LIRC_SET_REC_TIMEOUT_REPORTS:
+		lirc->send_timeout_reports = !!val;
 		break;
 
 	default:
@@ -280,6 +328,10 @@ static int ir_lirc_register(struct input_dev *input_dev)
 	if (ir_dev->props->s_learning_mode)
 		features |= LIRC_CAN_USE_WIDEBAND_RECEIVER;
 
+	if (ir_dev->props->s_carrier_report)
+		features |= LIRC_CAN_MEASURE_CARRIER;
+
+
 	if (ir_dev->props->max_timeout)
 		features |= LIRC_CAN_SET_REC_TIMEOUT;
 
diff --git a/drivers/media/IR/ir-nec-decoder.c b/drivers/media/IR/ir-nec-decoder.c
index d597421d6547..70993f79c8a2 100644
--- a/drivers/media/IR/ir-nec-decoder.c
+++ b/drivers/media/IR/ir-nec-decoder.c
@@ -54,8 +54,9 @@ static int ir_nec_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_NEC))
 		return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/ir-raw-event.c b/drivers/media/IR/ir-raw-event.c
index 119b567feeab..0d59ef7d1014 100644
--- a/drivers/media/IR/ir-raw-event.c
+++ b/drivers/media/IR/ir-raw-event.c
@@ -174,7 +174,7 @@ int ir_raw_event_store_with_filter(struct input_dev *input_dev,
 	if (ir->idle && !ev->pulse)
 		return 0;
 	else if (ir->idle)
-		ir_raw_event_set_idle(input_dev, 0);
+		ir_raw_event_set_idle(input_dev, false);
 
 	if (!raw->this_ev.duration) {
 		raw->this_ev = *ev;
@@ -187,48 +187,35 @@ int ir_raw_event_store_with_filter(struct input_dev *input_dev,
 
 	/* Enter idle mode if nessesary */
 	if (!ev->pulse && ir->props->timeout &&
-		raw->this_ev.duration >= ir->props->timeout)
-		ir_raw_event_set_idle(input_dev, 1);
+		raw->this_ev.duration >= ir->props->timeout) {
+		ir_raw_event_set_idle(input_dev, true);
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ir_raw_event_store_with_filter);
 
-void ir_raw_event_set_idle(struct input_dev *input_dev, int idle)
+/**
+ * ir_raw_event_set_idle() - hint the ir core if device is receiving
+ * IR data or not
+ * @input_dev: the struct input_dev device descriptor
+ * @idle: the hint value
+ */
+void ir_raw_event_set_idle(struct input_dev *input_dev, bool idle)
 {
 	struct ir_input_dev *ir = input_get_drvdata(input_dev);
 	struct ir_raw_event_ctrl *raw = ir->raw;
-	ktime_t now;
-	u64 delta;
 
-	if (!ir->props)
+	if (!ir->props || !ir->raw)
 		return;
 
-	if (!ir->raw)
-		goto out;
+	IR_dprintk(2, "%s idle mode\n", idle ? "enter" : "leave");
 
 	if (idle) {
-		IR_dprintk(2, "enter idle mode\n");
-		raw->last_event = ktime_get();
-	} else {
-		IR_dprintk(2, "exit idle mode\n");
-
-		now = ktime_get();
-		delta = ktime_to_ns(ktime_sub(now, ir->raw->last_event));
-
-		WARN_ON(raw->this_ev.pulse);
-
-		raw->this_ev.duration =
-			min(raw->this_ev.duration + delta,
-						(u64)IR_MAX_DURATION);
-
+		raw->this_ev.timeout = true;
 		ir_raw_event_store(input_dev, &raw->this_ev);
-
-		if (raw->this_ev.duration == IR_MAX_DURATION)
-			ir_raw_event_reset(input_dev);
-
-		raw->this_ev.duration = 0;
+		init_ir_raw_event(&raw->this_ev);
 	}
-out:
+
 	if (ir->props->s_idle)
 		ir->props->s_idle(ir->props->priv, idle);
 	ir->idle = idle;
diff --git a/drivers/media/IR/ir-rc5-decoder.c b/drivers/media/IR/ir-rc5-decoder.c
index df4770d978ad..572ed4ca8c68 100644
--- a/drivers/media/IR/ir-rc5-decoder.c
+++ b/drivers/media/IR/ir-rc5-decoder.c
@@ -55,8 +55,9 @@ static int ir_rc5_decode(struct input_dev *input_dev, struct ir_raw_event ev)
         if (!(ir_dev->raw->enabled_protocols & IR_TYPE_RC5))
                 return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/ir-rc5-sz-decoder.c b/drivers/media/IR/ir-rc5-sz-decoder.c
index 68f11d6acd5b..7c413501a3f7 100644
--- a/drivers/media/IR/ir-rc5-sz-decoder.c
+++ b/drivers/media/IR/ir-rc5-sz-decoder.c
@@ -51,8 +51,9 @@ static int ir_rc5_sz_decode(struct input_dev *input_dev, struct ir_raw_event ev)
         if (!(ir_dev->raw->enabled_protocols & IR_TYPE_RC5_SZ))
                 return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/ir-rc6-decoder.c b/drivers/media/IR/ir-rc6-decoder.c
index f1624b8279bc..d25da91f44ff 100644
--- a/drivers/media/IR/ir-rc6-decoder.c
+++ b/drivers/media/IR/ir-rc6-decoder.c
@@ -85,8 +85,9 @@ static int ir_rc6_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_RC6))
 		return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/ir-sony-decoder.c b/drivers/media/IR/ir-sony-decoder.c
index b9074f07c7a0..2d15730822bc 100644
--- a/drivers/media/IR/ir-sony-decoder.c
+++ b/drivers/media/IR/ir-sony-decoder.c
@@ -48,8 +48,9 @@ static int ir_sony_decode(struct input_dev *input_dev, struct ir_raw_event ev)
 	if (!(ir_dev->raw->enabled_protocols & IR_TYPE_SONY))
 		return 0;
 
-	if (IS_RESET(ev)) {
-		data->state = STATE_INACTIVE;
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
 		return 0;
 	}
 
diff --git a/drivers/media/IR/mceusb.c b/drivers/media/IR/mceusb.c
index bc620e10ef77..6825da5771f6 100644
--- a/drivers/media/IR/mceusb.c
+++ b/drivers/media/IR/mceusb.c
@@ -660,7 +660,7 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
 
 static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
 {
-	struct ir_raw_event rawir = { .pulse = false, .duration = 0 };
+	DEFINE_IR_RAW_EVENT(rawir);
 	int i, start_index = 0;
 	u8 hdr = MCE_CONTROL_HEADER;
 
@@ -997,6 +997,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
 	ir->len_in = maxp;
 	ir->flags.microsoft_gen1 = is_microsoft_gen1;
 	ir->flags.tx_mask_inverted = tx_mask_inverted;
+	init_ir_raw_event(&ir->rawir);
 
 	/* Saving usb interface data for use by the transmitter routine */
 	ir->usb_ep_in = ep_in;
diff --git a/drivers/media/IR/nuvoton-cir.c b/drivers/media/IR/nuvoton-cir.c
index 2f0f78078b57..301be53aee85 100644
--- a/drivers/media/IR/nuvoton-cir.c
+++ b/drivers/media/IR/nuvoton-cir.c
@@ -586,7 +586,7 @@ static void nvt_dump_rx_buf(struct nvt_dev *nvt)
  */
 static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
 {
-	struct ir_raw_event rawir = { .pulse = false, .duration = 0 };
+	DEFINE_IR_RAW_EVENT(rawir);
 	unsigned int count;
 	u32 carrier;
 	u8 sample;
@@ -622,6 +622,8 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
 		}
 
 		rawir.duration += nvt->rawir.duration;
+
+		init_ir_raw_event(&nvt->rawir);
 		nvt->rawir.duration = 0;
 		nvt->rawir.pulse = rawir.pulse;
 
@@ -1016,6 +1018,7 @@ static int nvt_probe(struct pnp_dev *pdev, const struct pnp_device_id *dev_id)
 
 	spin_lock_init(&nvt->nvt_lock);
 	spin_lock_init(&nvt->tx.lock);
+	init_ir_raw_event(&nvt->rawir);
 
 	ret = -EBUSY;
 	/* now claim resources */
diff --git a/drivers/media/IR/streamzap.c b/drivers/media/IR/streamzap.c
index 86a4f68feea4..548381c35bfd 100644
--- a/drivers/media/IR/streamzap.c
+++ b/drivers/media/IR/streamzap.c
@@ -146,7 +146,7 @@ static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir)
 static void sz_push_full_pulse(struct streamzap_ir *sz,
 			       unsigned char value)
 {
-	struct ir_raw_event rawir;
+	DEFINE_IR_RAW_EVENT(rawir);
 
 	if (sz->idle) {
 		long deltv;
@@ -193,7 +193,7 @@ static void sz_push_half_pulse(struct streamzap_ir *sz,
 static void sz_push_full_space(struct streamzap_ir *sz,
 			       unsigned char value)
 {
-	struct ir_raw_event rawir;
+	DEFINE_IR_RAW_EVENT(rawir);
 
 	rawir.pulse = false;
 	rawir.duration = ((int) value) * SZ_RESOLUTION;
@@ -270,7 +270,7 @@ static void streamzap_callback(struct urb *urb)
 			break;
 		case FullSpace:
 			if (sz->buf_in[i] == SZ_TIMEOUT) {
-				struct ir_raw_event rawir;
+				DEFINE_IR_RAW_EVENT(rawir);
 
 				rawir.pulse = false;
 				rawir.duration = timeout;
diff --git a/include/media/ir-core.h b/include/media/ir-core.h
index 4dd43d44ec5e..6dc37fae6606 100644
--- a/include/media/ir-core.h
+++ b/include/media/ir-core.h
@@ -60,6 +60,7 @@ enum rc_driver_type {
  * @s_idle: optional: enable/disable hardware idle mode, upon which,
 	device doesn't interrupt host until it sees IR pulses
  * @s_learning_mode: enable wide band receiver used for learning
+ * @s_carrier_report: enable carrier reports
  */
 struct ir_dev_props {
 	enum rc_driver_type	driver_type;
@@ -82,8 +83,9 @@ struct ir_dev_props {
 	int			(*s_tx_duty_cycle)(void *priv, u32 duty_cycle);
 	int			(*s_rx_carrier_range)(void *priv, u32 min, u32 max);
 	int			(*tx_ir)(void *priv, int *txbuf, u32 n);
-	void			(*s_idle)(void *priv, int enable);
+	void			(*s_idle)(void *priv, bool enable);
 	int			(*s_learning_mode)(void *priv, int enable);
+	int			(*s_carrier_report) (void *priv, int enable);
 };
 
 struct ir_input_dev {
@@ -163,22 +165,48 @@ u32 ir_g_keycode_from_table(struct input_dev *input_dev, u32 scancode);
 /* From ir-raw-event.c */
 
 struct ir_raw_event {
-	unsigned                        pulse:1;
-	unsigned                        duration:31;
+	union {
+		u32             duration;
+
+		struct {
+			u32     carrier;
+			u8      duty_cycle;
+		};
+	};
+
+	unsigned                pulse:1;
+	unsigned                reset:1;
+	unsigned                timeout:1;
+	unsigned                carrier_report:1;
 };
 
-#define IR_MAX_DURATION                 0x7FFFFFFF      /* a bit more than 2 seconds */
+#define DEFINE_IR_RAW_EVENT(event) \
+	struct ir_raw_event event = { \
+		{ .duration = 0 } , \
+		.pulse = 0, \
+		.reset = 0, \
+		.timeout = 0, \
+		.carrier_report = 0 }
+
+static inline void init_ir_raw_event(struct ir_raw_event *ev)
+{
+	memset(ev, 0, sizeof(*ev));
+}
+
+#define IR_MAX_DURATION         0xFFFFFFFF      /* a bit more than 4 seconds */
 
 void ir_raw_event_handle(struct input_dev *input_dev);
 int ir_raw_event_store(struct input_dev *input_dev, struct ir_raw_event *ev);
 int ir_raw_event_store_edge(struct input_dev *input_dev, enum raw_event_type type);
 int ir_raw_event_store_with_filter(struct input_dev *input_dev,
 				struct ir_raw_event *ev);
-void ir_raw_event_set_idle(struct input_dev *input_dev, int idle);
+void ir_raw_event_set_idle(struct input_dev *input_dev, bool idle);
 
 static inline void ir_raw_event_reset(struct input_dev *input_dev)
 {
-	struct ir_raw_event ev = { .pulse = false, .duration = 0 };
+	DEFINE_IR_RAW_EVENT(ev);
+	ev.reset = true;
+
 	ir_raw_event_store(input_dev, &ev);
 	ir_raw_event_handle(input_dev);
 }
-- 
cgit v1.2.3


From 6de5bd128d381ad88ac6d419a5e597048eb468cf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 11 Sep 2010 18:00:57 +0200
Subject: BKL: introduce CONFIG_BKL.

With all the patches we have queued in the BKL removal tree, only a
few dozen modules are left that actually rely on the BKL, and even
there are lots of low-hanging fruit. We need to decide what to do
about them, this patch illustrates one of the options:

Every user of the BKL is marked as 'depends on BKL' in Kconfig,
and the CONFIG_BKL becomes a user-visible option. If it gets
disabled, no BKL using module can be built any more and the BKL
code itself is compiled out.

The one exception is file locking, which is practically always
enabled and does a 'select BKL' instead. This effectively forces
CONFIG_BKL to be enabled until we have solved the fs/lockd
mess and can apply the patch that removes the BKL from fs/locks.c.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/gpu/drm/Kconfig         | 5 ++++-
 drivers/media/Kconfig           | 1 +
 drivers/net/appletalk/Kconfig   | 1 +
 drivers/staging/cx25821/Kconfig | 1 +
 drivers/staging/easycap/Kconfig | 1 +
 drivers/staging/go7007/Kconfig  | 1 +
 drivers/staging/usbip/Kconfig   | 2 +-
 fs/Kconfig                      | 1 +
 fs/adfs/Kconfig                 | 1 +
 fs/autofs/Kconfig               | 1 +
 fs/hpfs/Kconfig                 | 1 +
 fs/nfs/Kconfig                  | 1 +
 fs/nfsd/Kconfig                 | 1 +
 fs/smbfs/Kconfig                | 1 +
 fs/udf/Kconfig                  | 1 +
 fs/ufs/Kconfig                  | 1 +
 include/linux/smp_lock.h        | 7 +++++--
 init/Kconfig                    | 2 +-
 lib/Kconfig.debug               | 9 +++++++++
 net/ipx/Kconfig                 | 1 +
 net/x25/Kconfig                 | 1 +
 21 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4cab0c6397e3..7af443672626 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -73,7 +73,8 @@ source "drivers/gpu/drm/radeon/Kconfig"
 
 config DRM_I810
 	tristate "Intel I810"
-	depends on DRM && AGP && AGP_INTEL
+	# BKL usage in order to avoid AB-BA deadlocks, may become BROKEN_ON_SMP
+	depends on DRM && AGP && AGP_INTEL && BKL
 	help
 	  Choose this option if you have an Intel I810 graphics card.  If M is
 	  selected, the module will be called i810.  AGP support is required
@@ -86,6 +87,8 @@ choice
 
 config DRM_I830
 	tristate "i830 driver"
+	# BKL usage in order to avoid AB-BA deadlocks, i830 may get removed
+	depends on BKL
 	help
 	  Choose this option if you have a system that has Intel 830M, 845G,
 	  852GM, 855GM or 865G integrated graphics.  If M is selected, the
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index a28541b2b1a2..bad2cedb8d96 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -19,6 +19,7 @@ comment "Multimedia core support"
 
 config VIDEO_DEV
 	tristate "Video For Linux"
+	depends on BKL # used in many drivers for ioctl handling, need to kill
 	---help---
 	  V4L core support for video capture and overlay devices, webcams and
 	  AM/FM radio cards.
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index 0a0e0cd81a23..20f97e7017ce 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -3,6 +3,7 @@
 #
 config ATALK
 	tristate "Appletalk protocol support"
+	depends on BKL # waiting to be removed from net/appletalk/ddp.c
 	select LLC
 	---help---
 	  AppleTalk is the protocol that Apple computers can use to communicate
diff --git a/drivers/staging/cx25821/Kconfig b/drivers/staging/cx25821/Kconfig
index df7756a95fad..813cb355ac01 100644
--- a/drivers/staging/cx25821/Kconfig
+++ b/drivers/staging/cx25821/Kconfig
@@ -1,6 +1,7 @@
 config VIDEO_CX25821
 	tristate "Conexant cx25821 support"
 	depends on DVB_CORE && VIDEO_DEV && PCI && I2C && INPUT
+	depends on BKL # please fix
 	select I2C_ALGOBIT
 	select VIDEO_BTCX
 	select VIDEO_TVEEPROM
diff --git a/drivers/staging/easycap/Kconfig b/drivers/staging/easycap/Kconfig
index bd96f39f2735..9d5fe4ddc30a 100644
--- a/drivers/staging/easycap/Kconfig
+++ b/drivers/staging/easycap/Kconfig
@@ -1,6 +1,7 @@
 config EASYCAP
 	tristate "EasyCAP USB ID 05e1:0408 support"
 	depends on USB && VIDEO_DEV
+	depends on BKL # please fix
 
 	---help---
 	  This is an integrated audio/video driver for EasyCAP cards with
diff --git a/drivers/staging/go7007/Kconfig b/drivers/staging/go7007/Kconfig
index e47f683a323e..75fa46805527 100644
--- a/drivers/staging/go7007/Kconfig
+++ b/drivers/staging/go7007/Kconfig
@@ -1,6 +1,7 @@
 config VIDEO_GO7007
 	tristate "WIS GO7007 MPEG encoder support"
 	depends on VIDEO_DEV && PCI && I2C && INPUT
+	depends on BKL # please fix
 	depends on SND
 	select VIDEOBUF_DMA_SG
 	select VIDEO_IR
diff --git a/drivers/staging/usbip/Kconfig b/drivers/staging/usbip/Kconfig
index 2c1d10acb8b5..b11ec379b5c2 100644
--- a/drivers/staging/usbip/Kconfig
+++ b/drivers/staging/usbip/Kconfig
@@ -1,6 +1,6 @@
 config USB_IP_COMMON
 	tristate "USB IP support (EXPERIMENTAL)"
-	depends on USB && NET && EXPERIMENTAL
+	depends on USB && NET && EXPERIMENTAL && BKL
 	default N
 	---help---
 	  This enables pushing USB packets over IP to allow remote
diff --git a/fs/Kconfig b/fs/Kconfig
index 3d185308ec88..65781de44fc0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,7 @@ endif # BLOCK
 config FILE_LOCKING
 	bool "Enable POSIX file locking API" if EMBEDDED
 	default y
+	select BKL # while lockd still uses it.
 	help
 	  This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..1dd5f34b3cf2 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,7 @@
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
 	depends on BLOCK && EXPERIMENTAL
+	depends on BKL # need to fix
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
index 5f3bea90911e..480e210c83ab 100644
--- a/fs/autofs/Kconfig
+++ b/fs/autofs/Kconfig
@@ -1,5 +1,6 @@
 config AUTOFS_FS
 	tristate "Kernel automounter support"
+	depends on BKL # unfixable, just use autofs4
 	help
 	  The automounter is a tool to automatically mount remote file systems
 	  on demand. This implementation is partially kernel-based to reduce
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 56bd15c5bf6c..63b6f5632318 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,6 +1,7 @@
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
 	depends on BLOCK
+	depends on BKL # nontrivial to fix
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad49d731..10b9524bb908 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,7 @@
 config NFS_FS
 	tristate "NFS client support"
 	depends on INET && FILE_LOCKING
+	depends on BKL # fix as soon as lockd is done
 	select LOCKD
 	select SUNRPC
 	select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 95932f523aef..429d4a142276 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -2,6 +2,7 @@ config NFSD
 	tristate "NFS server support"
 	depends on INET
 	depends on FILE_LOCKING
+	depends on BKL # fix as soon as lockd is done
 	select LOCKD
 	select SUNRPC
 	select EXPORTFS
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
index e668127c8b2e..2bc24a8c4039 100644
--- a/fs/smbfs/Kconfig
+++ b/fs/smbfs/Kconfig
@@ -1,5 +1,6 @@
 config SMB_FS
 	tristate "SMB file system support (OBSOLETE, please use CIFS)"
+	depends on BKL # probably unfixable
 	depends on INET
 	select NLS
 	help
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index 0e0e99bd6bce..f8def3c8ea4c 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -1,5 +1,6 @@
 config UDF_FS
 	tristate "UDF file system support"
+	depends on BKL # needs serious work to remove
 	select CRC_ITU_T
 	help
 	  This is the new file system used on some CD-ROMs and DVDs. Say Y if
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..30c8f223253d 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -1,6 +1,7 @@
 config UFS_FS
 	tristate "UFS file system support (read only)"
 	depends on BLOCK
+	depends on BKL # probably fixable
 	help
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index 2ea1dd1ba21c..291f721144c2 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -54,12 +54,15 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
+#ifdef CONFIG_BKL /* provoke build bug if not set */
 #define lock_kernel()
 #define unlock_kernel()
-#define release_kernel_lock(task)		do { } while(0)
 #define cycle_kernel_lock()			do { } while(0)
-#define reacquire_kernel_lock(task)		0
 #define kernel_locked()				1
+#endif /* CONFIG_BKL */
+
+#define release_kernel_lock(task)		do { } while(0)
+#define reacquire_kernel_lock(task)		0
 
 #endif /* CONFIG_LOCK_KERNEL */
 #endif /* __LINUX_SMPLOCK_H */
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1cbadd9..2005a1d49928 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -64,7 +64,7 @@ config BROKEN_ON_SMP
 
 config LOCK_KERNEL
 	bool
-	depends on SMP || PREEMPT
+	depends on (SMP || PREEMPT) && BKL
 	default y
 
 config INIT_ENV_ARG_LIMIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca0..088eea1c2bef 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -461,6 +461,15 @@ config DEBUG_MUTEXES
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
+config BKL
+	bool "Big Kernel Lock" if (SMP || PREEMPT)
+	default y
+	help
+	  This is the traditional lock that is used in old code instead
+	  of proper locking. All drivers that use the BKL should depend
+	  on this symbol.
+	  Say Y here unless you are working on removing the BKL.
+
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index e9ad0062fbb6..02549cb2c328 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -3,6 +3,7 @@
 #
 config IPX
 	tristate "The IPX protocol"
+	depends on BKL # should be fixable
 	select LLC
 	---help---
 	  This is support for the Novell networking protocol, IPX, commonly
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index e6759c9660bb..2196e55e4f61 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,6 +5,7 @@
 config X25
 	tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
+	depends on BKL # should be fixable
 	---help---
 	  X.25 is a set of standardized network protocols, similar in scope to
 	  frame relay; the one physical line from your box to the X.25 network
-- 
cgit v1.2.3


From e97c3e278e951501c2f385de70c3ceacdea78c4a Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 16:03:43 +0200
Subject: tproxy: split off ipv6 defragmentation to a separate module

Like with IPv4, TProxy needs IPv6 defragmentation but does not
require connection tracking. Since defragmentation was coupled
with conntrack, I split off the two, creating an nf_defrag_ipv6 module,
similar to the already existing nf_defrag_ipv4.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/ipv6/nf_defrag_ipv6.h    |   6 ++
 net/ipv6/netfilter/Makefile                    |   5 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  78 +--------------
 net/ipv6/netfilter/nf_conntrack_reasm.c        |  14 ++-
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      | 131 +++++++++++++++++++++++++
 5 files changed, 156 insertions(+), 78 deletions(-)
 create mode 100644 include/net/netfilter/ipv6/nf_defrag_ipv6.h
 create mode 100644 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c

(limited to 'include')

diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
new file mode 100644
index 000000000000..94dd54d76b48
--- /dev/null
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -0,0 +1,6 @@
+#ifndef _NF_DEFRAG_IPV6_H
+#define _NF_DEFRAG_IPV6_H
+
+extern void nf_defrag_ipv6_enable(void);
+
+#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index aafbba30c899..3f8e4a3d83ce 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,10 +11,11 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 
 # objects for l3 independent conntrack
-nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
+nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 
 # l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ff43461704be..c8af58b22562 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
-#include <linux/sysctl.h>
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
@@ -29,6 +28,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_log.h>
 
 static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -189,53 +189,6 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
-						struct sk_buff *skb)
-{
-	u16 zone = NF_CT_DEFAULT_ZONE;
-
-	if (skb->nfct)
-		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
-
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
-	if (hooknum == NF_INET_PRE_ROUTING)
-		return IP6_DEFRAG_CONNTRACK_IN + zone;
-	else
-		return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
-}
-
-static unsigned int ipv6_defrag(unsigned int hooknum,
-				struct sk_buff *skb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
-{
-	struct sk_buff *reasm;
-
-	/* Previously seen (loopback)?  */
-	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
-		return NF_ACCEPT;
-
-	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
-	/* queued */
-	if (reasm == NULL)
-		return NF_STOLEN;
-
-	/* error occured or not fragmented */
-	if (reasm == skb)
-		return NF_ACCEPT;
-
-	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
-			   (struct net_device *)out, okfn);
-
-	return NF_STOLEN;
-}
-
 static unsigned int __ipv6_conntrack_in(struct net *net,
 					unsigned int hooknum,
 					struct sk_buff *skb,
@@ -287,13 +240,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
-	{
-		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
-	},
 	{
 		.hook		= ipv6_conntrack_in,
 		.owner		= THIS_MODULE,
@@ -308,13 +254,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
-	{
-		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
-	},
 	{
 		.hook		= ipv6_confirm,
 		.owner		= THIS_MODULE,
@@ -386,10 +325,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.nlattr_tuple_size	= ipv6_nlattr_tuple_size,
 	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
 	.nla_policy		= ipv6_nla_policy,
-#endif
-#ifdef CONFIG_SYSCTL
-	.ctl_table_path		= nf_net_netfilter_sysctl_path,
-	.ctl_table		= nf_ct_ipv6_sysctl_table,
 #endif
 	.me			= THIS_MODULE,
 };
@@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 	int ret = 0;
 
 	need_conntrack();
+	nf_defrag_ipv6_enable();
 
-	ret = nf_ct_frag6_init();
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
-		return ret;
-	}
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv6: can't register tcp.\n");
-		goto cleanup_frag6;
+		return ret;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
@@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
  cleanup_tcp:
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
- cleanup_frag6:
-	nf_ct_frag6_cleanup();
 	return ret;
 }
 
@@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
 	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
-	nf_ct_frag6_cleanup();
 }
 
 module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 138a8b362706..489d71b844ac 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_ipv6_sysctl_table[] = {
+struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
 		.data		= &nf_init_frags.timeout,
@@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
 	},
 	{ }
 };
+
+static struct ctl_table_header *nf_ct_frag6_sysctl_header;
 #endif
 
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -623,11 +625,21 @@ int nf_ct_frag6_init(void)
 	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
+	nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
+							  nf_ct_frag6_sysctl_table);
+	if (!nf_ct_frag6_sysctl_header) {
+		inet_frags_fini(&nf_frags);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 void nf_ct_frag6_cleanup(void)
 {
+	unregister_sysctl_table(nf_ct_frag6_sysctl_header);
+	nf_ct_frag6_sysctl_header = NULL;
+
 	inet_frags_fini(&nf_frags);
 
 	nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 000000000000..99abfb53bab9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,131 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
+						struct sk_buff *skb)
+{
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+	if (hooknum == NF_INET_PRE_ROUTING)
+		return IP6_DEFRAG_CONNTRACK_IN + zone;
+	else
+		return IP6_DEFRAG_CONNTRACK_OUT + zone;
+
+}
+
+static unsigned int ipv6_defrag(unsigned int hooknum,
+				struct sk_buff *skb,
+				const struct net_device *in,
+				const struct net_device *out,
+				int (*okfn)(struct sk_buff *))
+{
+	struct sk_buff *reasm;
+
+	/* Previously seen (loopback)?	*/
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+		return NF_ACCEPT;
+
+	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+	/* queued */
+	if (reasm == NULL)
+		return NF_STOLEN;
+
+	/* error occured or not fragmented */
+	if (reasm == skb)
+		return NF_ACCEPT;
+
+	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+			   (struct net_device *)out, okfn);
+
+	return NF_STOLEN;
+}
+
+static struct nf_hook_ops ipv6_defrag_ops[] = {
+	{
+		.hook		= ipv6_defrag,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+	},
+	{
+		.hook		= ipv6_defrag,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
+	},
+};
+
+static int __init nf_defrag_init(void)
+{
+	int ret = 0;
+
+	ret = nf_ct_frag6_init();
+	if (ret < 0) {
+		pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
+		return ret;
+	}
+	ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+	if (ret < 0) {
+		pr_err("nf_defrag_ipv6: can't register hooks\n");
+		goto cleanup_frag6;
+	}
+	return ret;
+
+cleanup_frag6:
+	nf_ct_frag6_cleanup();
+	return ret;
+
+}
+
+static void __exit nf_defrag_fini(void)
+{
+	nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
+	nf_ct_frag6_cleanup();
+}
+
+void nf_defrag_ipv6_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From aa976fc011efa1f0e3290c6c9addf7c20757f885 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 16:05:41 +0200
Subject: tproxy: added udp6_lib_lookup function

Just like with IPv4, we need access to the UDP hash table to look up local
sockets, but instead of exporting the global udp_table, export a lookup
function.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/udp.h | 3 +++
 net/ipv6/udp.c    | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index a184d3496b13..200b82848c9a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 				    __be32 daddr, __be16 dport,
 				    int dif);
+extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+				    const struct in6_addr *daddr, __be16 dport,
+				    int dif);
 
 /*
  * 	SNMP statistics for UDP and UDP-Lite
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 33e368318d47..c84dad432114 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 				 udptable);
 }
 
+struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+			     const struct in6_addr *daddr, __be16 dport, int dif)
+{
+	return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
+}
+EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
-- 
cgit v1.2.3


From 6c46862280c5f55eda7750391bc65cd7e08c7535 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 16:08:28 +0200
Subject: tproxy: added tproxy sockopt interface in the IPV6 layer

Support for IPV6_RECVORIGDSTADDR sockopt for UDP sockets were contributed by
Harry Mason.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/in6.h      |  4 ++++
 include/linux/ipv6.h     |  4 +++-
 net/ipv6/datagram.c      | 19 +++++++++++++++++++
 net/ipv6/ipv6_sockglue.c | 23 +++++++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index c4bf46f764bf..097a34b55560 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -268,6 +268,10 @@ struct in6_flowlabel_req {
 /* RFC5082: Generalized Ttl Security Mechanism */
 #define IPV6_MINHOPCOUNT		73
 
+#define IPV6_ORIGDSTADDR        74
+#define IPV6_RECVORIGDSTADDR    IPV6_ORIGDSTADDR
+#define IPV6_TRANSPARENT        75
+
 /*
  * Multicast Routing:
  * see include/linux/mroute6.h.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e62683ba88e6..8e429d0e0405 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -341,7 +341,9 @@ struct ipv6_pinfo {
 				odstopts:1,
                                 rxflow:1,
 				rxtclass:1,
-				rxpmtu:1;
+				rxpmtu:1,
+				rxorigdstaddr:1;
+				/* 2 bits hole */
 		} bits;
 		__u16		all;
 	} rxopt;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ef371aa01ac5..320bdb877eed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -577,6 +577,25 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		u8 *ptr = nh + opt->dst1;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
+	if (np->rxopt.bits.rxorigdstaddr) {
+		struct sockaddr_in6 sin6;
+		u16 *ports = (u16 *) skb_transport_header(skb);
+
+		if (skb_transport_offset(skb) + 4 <= skb->len) {
+			/* All current transport protocols have the port numbers in the
+			 * first four bytes of the transport header and this function is
+			 * written with this assumption in mind.
+			 */
+
+			sin6.sin6_family = AF_INET6;
+			ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+			sin6.sin6_port = ports[1];
+			sin6.sin6_flowinfo = 0;
+			sin6.sin6_scope_id = 0;
+
+			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
+		}
+	}
 	return 0;
 }
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a7f66bc8f0b0..0553867a317f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -342,6 +342,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_TRANSPARENT:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
+		inet_sk(sk)->transparent = valbool;
+		retv = 0;
+		break;
+
+	case IPV6_RECVORIGDSTADDR:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxorigdstaddr = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
@@ -1104,6 +1119,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		break;
 	}
 
+	case IPV6_TRANSPARENT:
+		val = inet_sk(sk)->transparent;
+		break;
+
+	case IPV6_RECVORIGDSTADDR:
+		val = np->rxopt.bits.rxorigdstaddr;
+		break;
+
 	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
 	{
-- 
cgit v1.2.3


From 3b9afb29917f4ab08decf358ecfd354a72a91ac0 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 16:12:14 +0200
Subject: tproxy: added IPv6 socket lookup function to nf_tproxy_core

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_tproxy_core.h | 72 +++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index 1027d7f901a0..cd85b3bc8327 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -5,7 +5,8 @@
 #include <linux/in.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
 #include <net/tcp.h>
 
 #define NFT_LOOKUP_ANY         0
@@ -130,6 +131,75 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 	return sk;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+		      const struct in6_addr *saddr, const struct in6_addr *daddr,
+		      const __be16 sport, const __be16 dport,
+		      const struct net_device *in, int lookup_type)
+{
+	struct sock *sk;
+
+	/* look up socket */
+	switch (protocol) {
+	case IPPROTO_TCP:
+		switch (lookup_type) {
+		case NFT_LOOKUP_ANY:
+			sk = inet6_lookup(net, &tcp_hashinfo,
+					  saddr, sport, daddr, dport,
+					  in->ifindex);
+			break;
+		case NFT_LOOKUP_LISTENER:
+			sk = inet6_lookup_listener(net, &tcp_hashinfo,
+						   daddr, ntohs(dport),
+						   in->ifindex);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too */
+
+			break;
+		case NFT_LOOKUP_ESTABLISHED:
+			sk = __inet6_lookup_established(net, &tcp_hashinfo,
+							saddr, sport, daddr, ntohs(dport),
+							in->ifindex);
+			break;
+		default:
+			WARN_ON(1);
+			sk = NULL;
+			break;
+		}
+		break;
+	case IPPROTO_UDP:
+		sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+				     in->ifindex);
+		if (sk && lookup_type != NFT_LOOKUP_ANY) {
+			int connected = (sk->sk_state == TCP_ESTABLISHED);
+			int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+
+			/* NOTE: we return listeners even if bound to
+			 * 0.0.0.0, those are filtered out in
+			 * xt_socket, since xt_TPROXY needs 0 bound
+			 * listeners too */
+			if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+			    (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+				sock_put(sk);
+				sk = NULL;
+			}
+		}
+		break;
+	default:
+		WARN_ON(1);
+		sk = NULL;
+	}
+
+	pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+		 protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+	return sk;
+}
+#endif
 
 static inline void
 nf_tproxy_put_sock(struct sock *sk)
-- 
cgit v1.2.3


From 6ad7889327a5ee6ab4220bd34e4428c7d0de0f32 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Thu, 21 Oct 2010 16:17:26 +0200
Subject: tproxy: added IPv6 support to the TPROXY target

This requires a new revision as the old target structure was
IPv4 specific.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_TPROXY.h |  13 +-
 net/netfilter/xt_TPROXY.c           | 262 +++++++++++++++++++++++++++++++-----
 2 files changed, 235 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h
index 152e8f97132b..3f3d69361289 100644
--- a/include/linux/netfilter/xt_TPROXY.h
+++ b/include/linux/netfilter/xt_TPROXY.h
@@ -1,5 +1,5 @@
-#ifndef _XT_TPROXY_H_target
-#define _XT_TPROXY_H_target
+#ifndef _XT_TPROXY_H
+#define _XT_TPROXY_H
 
 /* TPROXY target is capable of marking the packet to perform
  * redirection. We can get rid of that whenever we get support for
@@ -11,4 +11,11 @@ struct xt_tproxy_target_info {
 	__be16 lport;
 };
 
-#endif /* _XT_TPROXY_H_target */
+struct xt_tproxy_target_info_v1 {
+	u_int32_t mark_mask;
+	u_int32_t mark_value;
+	union nf_inet_addr laddr;
+	__be16 lport;
+};
+
+#endif /* _XT_TPROXY_H */
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e0b6900a92c1..d5f97e2302b8 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -1,7 +1,7 @@
 /*
  * Transparent proxy support for Linux/iptables
  *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
+ * Copyright (c) 2006-2010 BalaBit IT Ltd.
  * Author: Balazs Scheidler, Krisztian Kovacs
  *
  * This program is free software; you can redistribute it and/or modify
@@ -19,15 +19,18 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_tproxy_core.h>
 
 /**
- * tproxy_handle_time_wait() - handle TCP TIME_WAIT reopen redirections
+ * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections
  * @skb:	The skb being processed.
- * @par:	Iptables target parameters.
+ * @laddr:	IPv4 address to redirect to or zero.
+ * @lport:	TCP port to redirect to or zero.
  * @sk:		The TIME_WAIT TCP socket found by the lookup.
  *
  * We have to handle SYN packets arriving to TIME_WAIT sockets
@@ -35,16 +38,16 @@
  * redirect the new connection to the proxy if there's a listener
  * socket present.
  *
- * tproxy_handle_time_wait() consumes the socket reference passed in.
+ * tproxy_handle_time_wait4() consumes the socket reference passed in.
  *
  * Returns the listener socket if there's one, the TIME_WAIT socket if
  * no such listener is found, or NULL if the TCP header is incomplete.
  */
 static struct sock *
-tproxy_handle_time_wait(struct sk_buff *skb, const struct xt_action_param *par, struct sock *sk)
+tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+			struct sock *sk)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	const struct xt_tproxy_target_info *tgi = par->targinfo;
 	struct tcphdr _hdr, *hp;
 
 	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
@@ -59,13 +62,64 @@ tproxy_handle_time_wait(struct sk_buff *skb, const struct xt_action_param *par,
 		struct sock *sk2;
 
 		sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
-					    iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
-					    hp->source, tgi->lport ? tgi->lport : hp->dest,
-					    par->in, NFT_LOOKUP_LISTENER);
+					    iph->saddr, laddr ? laddr : iph->daddr,
+					    hp->source, lport ? lport : hp->dest,
+					    skb->dev, NFT_LOOKUP_LISTENER);
+		if (sk2) {
+			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+			inet_twsk_put(inet_twsk(sk));
+			sk = sk2;
+		}
+	}
+
+	return sk;
+}
+
+/**
+ * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections
+ * @skb:	The skb being processed.
+ * @tproto:	Transport protocol.
+ * @thoff:	Transport protocol header offset.
+ * @par:	Iptables target parameters.
+ * @sk:		The TIME_WAIT TCP socket found by the lookup.
+ *
+ * We have to handle SYN packets arriving to TIME_WAIT sockets
+ * differently: instead of reopening the connection we should rather
+ * redirect the new connection to the proxy if there's a listener
+ * socket present.
+ *
+ * tproxy_handle_time_wait6() consumes the socket reference passed in.
+ *
+ * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * no such listener is found, or NULL if the TCP header is incomplete.
+ */
+static struct sock *
+tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+			 const struct xt_action_param *par,
+			 struct sock *sk)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct tcphdr _hdr, *hp;
+	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+
+	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		inet_twsk_put(inet_twsk(sk));
+		return NULL;
+	}
+
+	if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+		/* SYN to a TIME_WAIT socket, we'd rather redirect it
+		 * to a listener socket if there's one */
+		struct sock *sk2;
+
+		sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+					    &iph->saddr,
+					    !ipv6_addr_any(&tgi->laddr.in6) ? &tgi->laddr.in6 : &iph->daddr,
+					    hp->source,
+					    tgi->lport ? tgi->lport : hp->dest,
+					    skb->dev, NFT_LOOKUP_LISTENER);
 		if (sk2) {
-			/* yeah, there's one, let's kill the TIME_WAIT
-			 * socket and redirect to the listener
-			 */
 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
 			inet_twsk_put(inet_twsk(sk));
 			sk = sk2;
@@ -76,10 +130,10 @@ tproxy_handle_time_wait(struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
+tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+	   u_int32_t mark_mask, u_int32_t mark_value)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	const struct xt_tproxy_target_info *tgi = par->targinfo;
 	struct udphdr _hdr, *hp;
 	struct sock *sk;
 
@@ -87,18 +141,105 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	if (hp == NULL)
 		return NF_DROP;
 
+	/* check if there's an ongoing connection on the packet
+	 * addresses, this happens if the redirect already happened
+	 * and the current packet belongs to an already established
+	 * connection */
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
 				   iph->saddr, iph->daddr,
 				   hp->source, hp->dest,
-				   par->in, NFT_LOOKUP_ESTABLISHED);
+				   skb->dev, NFT_LOOKUP_ESTABLISHED);
 
 	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
 	if (sk && sk->sk_state == TCP_TIME_WAIT)
-		sk = tproxy_handle_time_wait(skb, par, sk);
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
 	else if (!sk)
+		/* no, there's no established connection, check if
+		 * there's a listener on the redirected addr/port */
 		sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
-					   iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr,
-					   hp->source, tgi->lport ? tgi->lport : hp->dest,
+					   iph->saddr, laddr ? laddr : iph->daddr,
+					   hp->source, lport ? lport : hp->dest,
+					   skb->dev, NFT_LOOKUP_LISTENER);
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_assign_sock(skb, sk)) {
+		/* This should be in a separate target, but we don't do multiple
+		   targets on the same rule yet */
+		skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
+
+		pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
+			 iph->protocol, &iph->daddr, ntohs(hp->dest),
+			 &laddr, ntohs(lport), skb->mark);
+		return NF_ACCEPT;
+	}
+
+	pr_debug("no socket, dropping: proto %hhu %08x:%hu -> %08x:%hu, mark: %x\n",
+		 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
+		 ntohl(laddr), ntohs(lport), skb->mark);
+	return NF_DROP;
+}
+
+static unsigned int
+tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tproxy_target_info *tgi = par->targinfo;
+
+	return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+}
+
+static unsigned int
+tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+
+	return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static unsigned int
+tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
+	struct udphdr _hdr, *hp;
+	struct sock *sk;
+	int thoff;
+	int tproto;
+
+	tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
+	if (tproto < 0) {
+		pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+		return NF_DROP;
+	}
+
+	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
+		return NF_DROP;
+	}
+
+	/* check if there's an ongoing connection on the packet
+	 * addresses, this happens if the redirect already happened
+	 * and the current packet belongs to an already established
+	 * connection */
+	sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+				   &iph->saddr, &iph->daddr,
+				   hp->source, hp->dest,
+				   par->in, NFT_LOOKUP_ESTABLISHED);
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT)
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
+	else if (!sk)
+		/* no there's no established connection, check if
+		 * there's a listener on the redirected addr/port */
+		sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+					   &iph->saddr,
+					   !ipv6_addr_any(&tgi->laddr.in6) ? &tgi->laddr.in6 : &iph->daddr,
+					   hp->source,
+					   tgi->lport ? tgi->lport : hp->dest,
 					   par->in, NFT_LOOKUP_LISTENER);
 
 	/* NOTE: assign_sock consumes our sk reference */
@@ -107,19 +248,33 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		   targets on the same rule yet */
 		skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
 
-		pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n",
-			 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
-			 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+		pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
+			 tproto, &iph->saddr, ntohs(hp->dest),
+			 &tgi->laddr.in6, ntohs(tgi->lport), skb->mark);
 		return NF_ACCEPT;
 	}
 
-	pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n",
-		 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest),
-		 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark);
+	pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
+		 tproto, &iph->saddr, ntohs(hp->dest),
+		 &tgi->laddr.in6, ntohs(tgi->lport), skb->mark);
 	return NF_DROP;
 }
 
-static int tproxy_tg_check(const struct xt_tgchk_param *par)
+static int tproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+	const struct ip6t_ip6 *i = par->entryinfo;
+
+	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
+	    && !(i->flags & IP6T_INV_PROTO))
+		return 0;
+
+	pr_info("Can be used only in combination with "
+		"either -p tcp or -p udp\n");
+	return -EINVAL;
+}
+#endif
+
+static int tproxy_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ip *i = par->entryinfo;
 
@@ -132,31 +287,64 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
 	return -EINVAL;
 }
 
-static struct xt_target tproxy_tg_reg __read_mostly = {
-	.name		= "TPROXY",
-	.family		= NFPROTO_IPV4,
-	.table		= "mangle",
-	.target		= tproxy_tg,
-	.targetsize	= sizeof(struct xt_tproxy_target_info),
-	.checkentry	= tproxy_tg_check,
-	.hooks		= 1 << NF_INET_PRE_ROUTING,
-	.me		= THIS_MODULE,
+static struct xt_target tproxy_tg_reg[] __read_mostly = {
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV4,
+		.table		= "mangle",
+		.target		= tproxy_tg4_v0,
+		.revision	= 0,
+		.targetsize	= sizeof(struct xt_tproxy_target_info),
+		.checkentry	= tproxy_tg4_check,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV4,
+		.table		= "mangle",
+		.target		= tproxy_tg4_v1,
+		.revision	= 1,
+		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
+		.checkentry	= tproxy_tg4_check,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	{
+		.name		= "TPROXY",
+		.family		= NFPROTO_IPV6,
+		.table		= "mangle",
+		.target		= tproxy_tg6_v1,
+		.revision	= 1,
+		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
+		.checkentry	= tproxy_tg6_check,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+#endif
+
 };
 
 static int __init tproxy_tg_init(void)
 {
 	nf_defrag_ipv4_enable();
-	return xt_register_target(&tproxy_tg_reg);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	nf_defrag_ipv6_enable();
+#endif
+
+	return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
 }
 
 static void __exit tproxy_tg_exit(void)
 {
-	xt_unregister_target(&tproxy_tg_reg);
+	xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
 }
 
 module_init(tproxy_tg_init);
 module_exit(tproxy_tg_exit);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
 MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
 MODULE_ALIAS("ipt_TPROXY");
+MODULE_ALIAS("ip6t_TPROXY");
-- 
cgit v1.2.3


From f4bc6bb2d562703eafc895c37e7be20906de139d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 Oct 2010 15:00:13 +0200
Subject: tracing: Cleanup the convoluted softirq tracepoints

With the addition of trace_softirq_raise() the softirq tracepoint got
even more convoluted. Why the tracepoints take two pointers to assign
an integer is beyond my comprehension.

But adding an extra case which treats the first pointer as an unsigned
long when the second pointer is NULL including the back and forth
type casting is just horrible.

Convert the softirq tracepoints to take a single unsigned int argument
for the softirq vector number and fix the call sites.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <alpine.LFD.2.00.1010191428560.6815@localhost6.localdomain6>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: mathieu.desnoyers@efficios.com
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/interrupt.h  |  2 +-
 include/trace/events/irq.h | 54 +++++++++++++++++-----------------------------
 kernel/softirq.c           | 16 ++++++++------
 3 files changed, 30 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 531495db1708..0ac194946fec 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 static inline void __raise_softirq_irqoff(unsigned int nr)
 {
-	trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+	trace_softirq_raise(nr);
 	or_softirq_pending(1UL << nr);
 }
 
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 6fa7cbab7d93..1c09820df585 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,
 
 DECLARE_EVENT_CLASS(softirq,
 
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_PROTO(unsigned int vec_nr),
 
-	TP_ARGS(h, vec),
+	TP_ARGS(vec_nr),
 
 	TP_STRUCT__entry(
-		__field(	int,	vec			)
+		__field(	unsigned int,	vec	)
 	),
 
 	TP_fast_assign(
-		if (vec)
-			__entry->vec = (int)(h - vec);
-		else
-			__entry->vec = (int)(long)h;
+		__entry->vec = vec_nr;
 	),
 
-	TP_printk("vec=%d [action=%s]", __entry->vec,
+	TP_printk("vec=%u [action=%s]", __entry->vec,
 		  show_softirq_name(__entry->vec))
 );
 
 /**
  * softirq_entry - called immediately before the softirq handler
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter, contains a pointer to the struct softirq_action
- * which has a pointer to the action handler that is called. By subtracting
- * the @vec pointer from the @h pointer, we can determine the softirq
- * number. Also, when used in combination with the softirq_exit tracepoint
- * we can determine the softirq latency.
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
  */
 DEFINE_EVENT(softirq, softirq_entry,
 
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_PROTO(unsigned int vec_nr),
 
-	TP_ARGS(h, vec)
+	TP_ARGS(vec_nr)
 );
 
 /**
  * softirq_exit - called immediately after the softirq handler returns
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter contains a pointer to the struct softirq_action
- * that has handled the softirq. By subtracting the @vec pointer from
- * the @h pointer, we can determine the softirq number. Also, when used in
- * combination with the softirq_entry tracepoint we can determine the softirq
- * latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
  */
 DEFINE_EVENT(softirq, softirq_exit,
 
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_PROTO(unsigned int vec_nr),
 
-	TP_ARGS(h, vec)
+	TP_ARGS(vec_nr)
 );
 
 /**
  * softirq_raise - called immediately when a softirq is raised
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter contains a pointer to the softirq vector number which is
- * raised. @vec is NULL and it means @h includes vector number not
- * softirq_action. When used in combination with the softirq_entry tracepoint
- * we can determine the softirq raise latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
  */
 DEFINE_EVENT(softirq, softirq_raise,
 
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_PROTO(unsigned int vec_nr),
 
-	TP_ARGS(h, vec)
+	TP_ARGS(vec_nr)
 );
 
 #endif /*  _TRACE_IRQ_H */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b1a73a..b3cb1dc15795 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -212,18 +212,20 @@ restart:
 
 	do {
 		if (pending & 1) {
+			unsigned int vec_nr = h - softirq_vec;
 			int prev_count = preempt_count();
-			kstat_incr_softirqs_this_cpu(h - softirq_vec);
 
-			trace_softirq_entry(h, softirq_vec);
+			kstat_incr_softirqs_this_cpu(vec_nr);
+
+			trace_softirq_entry(vec_nr);
 			h->action(h);
-			trace_softirq_exit(h, softirq_vec);
+			trace_softirq_exit(vec_nr);
 			if (unlikely(prev_count != preempt_count())) {
-				printk(KERN_ERR "huh, entered softirq %td %s %p"
+				printk(KERN_ERR "huh, entered softirq %u %s %p"
 				       "with preempt_count %08x,"
-				       " exited with %08x?\n", h - softirq_vec,
-				       softirq_to_name[h - softirq_vec],
-				       h->action, prev_count, preempt_count());
+				       " exited with %08x?\n", vec_nr,
+				       softirq_to_name[vec_nr], h->action,
+				       prev_count, preempt_count());
 				preempt_count() = prev_count;
 			}
 
-- 
cgit v1.2.3


From 024fafbac36b176d978ccd0fb1cae1fbc38c7fee Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 19 Oct 2010 21:32:11 -0300
Subject: [media] Add the via framebuffer camera controller driver

Add a driver for the video capture port on VIA integrated chipsets.  This
version has a remaining OLPCism or two and expects to be talking to an
ov7670; those can be improved as the need arises.

This work was supported by the One Laptop Per Child project.
Thanks to Laurent Pinchart for a number of useful comments.

Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig      |   10 +
 drivers/media/video/Makefile     |    2 +
 drivers/media/video/via-camera.c | 1474 ++++++++++++++++++++++++++++++++++++++
 drivers/media/video/via-camera.h |   93 +++
 drivers/video/via/accel.c        |    2 +-
 drivers/video/via/via-core.c     |   16 +-
 include/linux/via-core.h         |    4 +-
 include/media/v4l2-chip-ident.h  |    4 +
 8 files changed, 1601 insertions(+), 4 deletions(-)
 create mode 100644 drivers/media/video/via-camera.c
 create mode 100644 drivers/media/video/via-camera.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 2d4226710ed5..0efbb29a1a08 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -712,6 +712,16 @@ config VIDEO_SR030PC30
 	---help---
 	  This driver supports SR030PC30 VGA camera from Siliconfile
 
+config VIDEO_VIA_CAMERA
+	tristate "VIAFB camera controller support"
+	depends on FB_VIA
+	select VIDEOBUF_DMA_SG
+	select VIDEO_OV7670
+	help
+	   Driver support for the integrated camera controller in VIA
+	   Chrome9 chipsets.  Currently only tested on OLPC xo-1.5 systems
+	   with ov7670 sensors.
+
 config SOC_CAMERA
 	tristate "SoC camera support"
 	depends on VIDEO_V4L2 && HAS_DMA && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index d5e49ddbf9dd..af79d476a4c8 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -122,6 +122,8 @@ obj-$(CONFIG_VIDEO_CX2341X) += cx2341x.o
 
 obj-$(CONFIG_VIDEO_CAFE_CCIC) += cafe_ccic.o
 
+obj-$(CONFIG_VIDEO_VIA_CAMERA) += via-camera.o
+
 obj-$(CONFIG_USB_DABUSB)        += dabusb.o
 obj-$(CONFIG_USB_SE401)         += se401.o
 obj-$(CONFIG_USB_ZR364XX)       += zr364xx.o
diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c
new file mode 100644
index 000000000000..02a21bccae18
--- /dev/null
+++ b/drivers/media/video/via-camera.c
@@ -0,0 +1,1474 @@
+/*
+ * Driver for the VIA Chrome integrated camera controller.
+ *
+ * Copyright 2009,2010 Jonathan Corbet <corbet@lwn.net>
+ * Distributable under the terms of the GNU General Public License, version 2
+ *
+ * This work was supported by the One Laptop Per Child project
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-chip-ident.h>
+#include <media/videobuf-dma-sg.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/pm_qos_params.h>
+#include <linux/via-core.h>
+#include <linux/via-gpio.h>
+#include <linux/via_i2c.h>
+
+#include "via-camera.h"
+
+MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
+MODULE_DESCRIPTION("VIA framebuffer-based camera controller driver");
+MODULE_LICENSE("GPL");
+
+static int flip_image;
+module_param(flip_image, bool, 0444);
+MODULE_PARM_DESC(flip_image,
+		"If set, the sensor will be instructed to flip the image "
+		"vertically.");
+
+#ifdef CONFIG_OLPC_XO_1_5
+static int override_serial;
+module_param(override_serial, bool, 0444);
+MODULE_PARM_DESC(override_serial,
+		"The camera driver will normally refuse to load if "
+		"the XO 1.5 serial port is enabled.  Set this option "
+		"to force the issue.");
+#endif
+
+/*
+ * Basic window sizes.
+ */
+#define VGA_WIDTH	640
+#define VGA_HEIGHT	480
+#define QCIF_WIDTH	176
+#define	QCIF_HEIGHT	144
+
+/*
+ * The structure describing our camera.
+ */
+enum viacam_opstate { S_IDLE = 0, S_RUNNING = 1 };
+
+struct via_camera {
+	struct v4l2_device v4l2_dev;
+	struct video_device vdev;
+	struct v4l2_subdev *sensor;
+	struct platform_device *platdev;
+	struct viafb_dev *viadev;
+	struct mutex lock;
+	enum viacam_opstate opstate;
+	unsigned long flags;
+	struct pm_qos_request_list qos_request;
+	/*
+	 * GPIO info for power/reset management
+	 */
+	int power_gpio;
+	int reset_gpio;
+	/*
+	 * I/O memory stuff.
+	 */
+	void __iomem *mmio;	/* Where the registers live */
+	void __iomem *fbmem;	/* Frame buffer memory */
+	u32 fb_offset;		/* Reserved memory offset (FB) */
+	/*
+	 * Capture buffers and related.	 The controller supports
+	 * up to three, so that's what we have here.  These buffers
+	 * live in frame buffer memory, so we don't call them "DMA".
+	 */
+	unsigned int cb_offsets[3];	/* offsets into fb mem */
+	u8 *cb_addrs[3];		/* Kernel-space addresses */
+	int n_cap_bufs;			/* How many are we using? */
+	int next_buf;
+	struct videobuf_queue vb_queue;
+	struct list_head buffer_queue;	/* prot. by reg_lock */
+	/*
+	 * User tracking.
+	 */
+	int users;
+	struct file *owner;
+	/*
+	 * Video format information.  sensor_format is kept in a form
+	 * that we can use to pass to the sensor.  We always run the
+	 * sensor in VGA resolution, though, and let the controller
+	 * downscale things if need be.	 So we keep the "real*
+	 * dimensions separately.
+	 */
+	struct v4l2_pix_format sensor_format;
+	struct v4l2_pix_format user_format;
+	enum v4l2_mbus_pixelcode mbus_code;
+};
+
+/*
+ * Yes, this is a hack, but there's only going to be one of these
+ * on any system we know of.
+ */
+static struct via_camera *via_cam_info;
+
+/*
+ * Flag values, manipulated with bitops
+ */
+#define CF_DMA_ACTIVE	 0	/* A frame is incoming */
+#define CF_CONFIG_NEEDED 1	/* Must configure hardware */
+
+
+/*
+ * Nasty ugly v4l2 boilerplate.
+ */
+#define sensor_call(cam, optype, func, args...) \
+	v4l2_subdev_call(cam->sensor, optype, func, ##args)
+
+/*
+ * Debugging and related.
+ */
+#define cam_err(cam, fmt, arg...) \
+	dev_err(&(cam)->platdev->dev, fmt, ##arg);
+#define cam_warn(cam, fmt, arg...) \
+	dev_warn(&(cam)->platdev->dev, fmt, ##arg);
+#define cam_dbg(cam, fmt, arg...) \
+	dev_dbg(&(cam)->platdev->dev, fmt, ##arg);
+
+/*
+ * Format handling.  This is ripped almost directly from Hans's changes
+ * to cafe_ccic.c.  It's a little unfortunate; until this change, we
+ * didn't need to know anything about the format except its byte depth;
+ * now this information must be managed at this level too.
+ */
+static struct via_format {
+	__u8 *desc;
+	__u32 pixelformat;
+	int bpp;   /* Bytes per pixel */
+	enum v4l2_mbus_pixelcode mbus_code;
+} via_formats[] = {
+	{
+		.desc		= "YUYV 4:2:2",
+		.pixelformat	= V4L2_PIX_FMT_YUYV,
+		.mbus_code	= V4L2_MBUS_FMT_YUYV8_2X8,
+		.bpp		= 2,
+	},
+	{
+		.desc		= "RGB 565",
+		.pixelformat	= V4L2_PIX_FMT_RGB565,
+		.mbus_code	= V4L2_MBUS_FMT_RGB565_2X8_LE,
+		.bpp		= 2,
+	},
+	/* RGB444 and Bayer should be doable, but have never been
+	   tested with this driver. */
+};
+#define N_VIA_FMTS ARRAY_SIZE(via_formats)
+
+static struct via_format *via_find_format(u32 pixelformat)
+{
+	unsigned i;
+
+	for (i = 0; i < N_VIA_FMTS; i++)
+		if (via_formats[i].pixelformat == pixelformat)
+			return via_formats + i;
+	/* Not found? Then return the first format. */
+	return via_formats;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*
+ * Sensor power/reset management.  This piece is OLPC-specific for
+ * sure; other configurations will have things connected differently.
+ */
+static int via_sensor_power_setup(struct via_camera *cam)
+{
+	int ret;
+
+	cam->power_gpio = viafb_gpio_lookup("VGPIO3");
+	cam->reset_gpio = viafb_gpio_lookup("VGPIO2");
+	if (cam->power_gpio < 0 || cam->reset_gpio < 0) {
+		dev_err(&cam->platdev->dev, "Unable to find GPIO lines\n");
+		return -EINVAL;
+	}
+	ret = gpio_request(cam->power_gpio, "viafb-camera");
+	if (ret) {
+		dev_err(&cam->platdev->dev, "Unable to request power GPIO\n");
+		return ret;
+	}
+	ret = gpio_request(cam->reset_gpio, "viafb-camera");
+	if (ret) {
+		dev_err(&cam->platdev->dev, "Unable to request reset GPIO\n");
+		gpio_free(cam->power_gpio);
+		return ret;
+	}
+	gpio_direction_output(cam->power_gpio, 0);
+	gpio_direction_output(cam->reset_gpio, 0);
+	return 0;
+}
+
+/*
+ * Power up the sensor and perform the reset dance.
+ */
+static void via_sensor_power_up(struct via_camera *cam)
+{
+	gpio_set_value(cam->power_gpio, 1);
+	gpio_set_value(cam->reset_gpio, 0);
+	msleep(20);  /* Probably excessive */
+	gpio_set_value(cam->reset_gpio, 1);
+	msleep(20);
+}
+
+static void via_sensor_power_down(struct via_camera *cam)
+{
+	gpio_set_value(cam->power_gpio, 0);
+	gpio_set_value(cam->reset_gpio, 0);
+}
+
+
+static void via_sensor_power_release(struct via_camera *cam)
+{
+	via_sensor_power_down(cam);
+	gpio_free(cam->power_gpio);
+	gpio_free(cam->reset_gpio);
+}
+
+/* --------------------------------------------------------------------------*/
+/* Sensor ops */
+
+/*
+ * Manage the ov7670 "flip" bit, which needs special help.
+ */
+static int viacam_set_flip(struct via_camera *cam)
+{
+	struct v4l2_control ctrl;
+
+	memset(&ctrl, 0, sizeof(ctrl));
+	ctrl.id = V4L2_CID_VFLIP;
+	ctrl.value = flip_image;
+	return sensor_call(cam, core, s_ctrl, &ctrl);
+}
+
+/*
+ * Configure the sensor.  It's up to the caller to ensure
+ * that the camera is in the correct operating state.
+ */
+static int viacam_configure_sensor(struct via_camera *cam)
+{
+	struct v4l2_mbus_framefmt mbus_fmt;
+	int ret;
+
+	v4l2_fill_mbus_format(&mbus_fmt, &cam->sensor_format, cam->mbus_code);
+	ret = sensor_call(cam, core, init, 0);
+	if (ret == 0)
+		ret = sensor_call(cam, video, s_mbus_fmt, &mbus_fmt);
+	/*
+	 * OV7670 does weird things if flip is set *before* format...
+	 */
+	if (ret == 0)
+		ret = viacam_set_flip(cam);
+	return ret;
+}
+
+
+
+/* --------------------------------------------------------------------------*/
+/*
+ * Some simple register accessors; they assume that the lock is held.
+ *
+ * Should we want to support the second capture engine, we could
+ * hide the register difference by adding 0x1000 to registers in the
+ * 0x300-350 range.
+ */
+static inline void viacam_write_reg(struct via_camera *cam,
+		int reg, int value)
+{
+	iowrite32(value, cam->mmio + reg);
+}
+
+static inline int viacam_read_reg(struct via_camera *cam, int reg)
+{
+	return ioread32(cam->mmio + reg);
+}
+
+static inline void viacam_write_reg_mask(struct via_camera *cam,
+		int reg, int value, int mask)
+{
+	int tmp = viacam_read_reg(cam, reg);
+
+	tmp = (tmp & ~mask) | (value & mask);
+	viacam_write_reg(cam, reg, tmp);
+}
+
+
+/* --------------------------------------------------------------------------*/
+/* Interrupt management and handling */
+
+static irqreturn_t viacam_quick_irq(int irq, void *data)
+{
+	struct via_camera *cam = data;
+	irqreturn_t ret = IRQ_NONE;
+	int icv;
+
+	/*
+	 * All we do here is to clear the interrupts and tell
+	 * the handler thread to wake up.
+	 */
+	spin_lock(&cam->viadev->reg_lock);
+	icv = viacam_read_reg(cam, VCR_INTCTRL);
+	if (icv & VCR_IC_EAV) {
+		icv |= VCR_IC_EAV|VCR_IC_EVBI|VCR_IC_FFULL;
+		viacam_write_reg(cam, VCR_INTCTRL, icv);
+		ret = IRQ_WAKE_THREAD;
+	}
+	spin_unlock(&cam->viadev->reg_lock);
+	return ret;
+}
+
+/*
+ * Find the next videobuf buffer which has somebody waiting on it.
+ */
+static struct videobuf_buffer *viacam_next_buffer(struct via_camera *cam)
+{
+	unsigned long flags;
+	struct videobuf_buffer *buf = NULL;
+
+	spin_lock_irqsave(&cam->viadev->reg_lock, flags);
+	if (cam->opstate != S_RUNNING)
+		goto out;
+	if (list_empty(&cam->buffer_queue))
+		goto out;
+	buf = list_entry(cam->buffer_queue.next, struct videobuf_buffer, queue);
+	if (!waitqueue_active(&buf->done)) {/* Nobody waiting */
+		buf = NULL;
+		goto out;
+	}
+	list_del(&buf->queue);
+	buf->state = VIDEOBUF_ACTIVE;
+out:
+	spin_unlock_irqrestore(&cam->viadev->reg_lock, flags);
+	return buf;
+}
+
+/*
+ * The threaded IRQ handler.
+ */
+static irqreturn_t viacam_irq(int irq, void *data)
+{
+	int bufn;
+	struct videobuf_buffer *vb;
+	struct via_camera *cam = data;
+	struct videobuf_dmabuf *vdma;
+
+	/*
+	 * If there is no place to put the data frame, don't bother
+	 * with anything else.
+	 */
+	vb = viacam_next_buffer(cam);
+	if (vb == NULL)
+		goto done;
+	/*
+	 * Figure out which buffer we just completed.
+	 */
+	bufn = (viacam_read_reg(cam, VCR_INTCTRL) & VCR_IC_ACTBUF) >> 3;
+	bufn -= 1;
+	if (bufn < 0)
+		bufn = cam->n_cap_bufs - 1;
+	/*
+	 * Copy over the data and let any waiters know.
+	 */
+	vdma = videobuf_to_dma(vb);
+	viafb_dma_copy_out_sg(cam->cb_offsets[bufn], vdma->sglist, vdma->sglen);
+	vb->state = VIDEOBUF_DONE;
+	vb->size = cam->user_format.sizeimage;
+	wake_up(&vb->done);
+done:
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * These functions must mess around with the general interrupt
+ * control register, which is relevant to much more than just the
+ * camera.  Nothing else uses interrupts, though, as of this writing.
+ * Should that situation change, we'll have to improve support at
+ * the via-core level.
+ */
+static void viacam_int_enable(struct via_camera *cam)
+{
+	viacam_write_reg(cam, VCR_INTCTRL,
+			VCR_IC_INTEN|VCR_IC_EAV|VCR_IC_EVBI|VCR_IC_FFULL);
+	viafb_irq_enable(VDE_I_C0AVEN);
+}
+
+static void viacam_int_disable(struct via_camera *cam)
+{
+	viafb_irq_disable(VDE_I_C0AVEN);
+	viacam_write_reg(cam, VCR_INTCTRL, 0);
+}
+
+
+
+/* --------------------------------------------------------------------------*/
+/* Controller operations */
+
+/*
+ * Set up our capture buffers in framebuffer memory.
+ */
+static int viacam_ctlr_cbufs(struct via_camera *cam)
+{
+	int nbuf = cam->viadev->camera_fbmem_size/cam->sensor_format.sizeimage;
+	int i;
+	unsigned int offset;
+
+	/*
+	 * See how many buffers we can work with.
+	 */
+	if (nbuf >= 3) {
+		cam->n_cap_bufs = 3;
+		viacam_write_reg_mask(cam, VCR_CAPINTC, VCR_CI_3BUFS,
+				VCR_CI_3BUFS);
+	} else if (nbuf == 2) {
+		cam->n_cap_bufs = 2;
+		viacam_write_reg_mask(cam, VCR_CAPINTC, 0, VCR_CI_3BUFS);
+	} else {
+		cam_warn(cam, "Insufficient frame buffer memory\n");
+		return -ENOMEM;
+	}
+	/*
+	 * Set them up.
+	 */
+	offset = cam->fb_offset;
+	for (i = 0; i < cam->n_cap_bufs; i++) {
+		cam->cb_offsets[i] = offset;
+		cam->cb_addrs[i] = cam->fbmem + offset;
+		viacam_write_reg(cam, VCR_VBUF1 + i*4, offset & VCR_VBUF_MASK);
+		offset += cam->sensor_format.sizeimage;
+	}
+	return 0;
+}
+
+/*
+ * Set the scaling register for downscaling the image.
+ *
+ * This register works like this...  Vertical scaling is enabled
+ * by bit 26; if that bit is set, downscaling is controlled by the
+ * value in bits 16:25.	 Those bits are divided by 1024 to get
+ * the scaling factor; setting just bit 25 thus cuts the height
+ * in half.
+ *
+ * Horizontal scaling works about the same, but it's enabled by
+ * bit 11, with bits 0:10 giving the numerator of a fraction
+ * (over 2048) for the scaling value.
+ *
+ * This function is naive in that, if the user departs from
+ * the 3x4 VGA scaling factor, the image will distort.	We
+ * could work around that if it really seemed important.
+ */
+static void viacam_set_scale(struct via_camera *cam)
+{
+	unsigned int avscale;
+	int sf;
+
+	if (cam->user_format.width == VGA_WIDTH)
+		avscale = 0;
+	else {
+		sf = (cam->user_format.width*2048)/VGA_WIDTH;
+		avscale = VCR_AVS_HEN | sf;
+	}
+	if (cam->user_format.height < VGA_HEIGHT) {
+		sf = (1024*cam->user_format.height)/VGA_HEIGHT;
+		avscale |= VCR_AVS_VEN | (sf << 16);
+	}
+	viacam_write_reg(cam, VCR_AVSCALE, avscale);
+}
+
+
+/*
+ * Configure image-related information into the capture engine.
+ */
+static void viacam_ctlr_image(struct via_camera *cam)
+{
+	int cicreg;
+
+	/*
+	 * Disable clock before messing with stuff - from the via
+	 * sample driver.
+	 */
+	viacam_write_reg(cam, VCR_CAPINTC, ~(VCR_CI_ENABLE|VCR_CI_CLKEN));
+	/*
+	 * Set up the controller for VGA resolution, modulo magic
+	 * offsets from the via sample driver.
+	 */
+	viacam_write_reg(cam, VCR_HORRANGE, 0x06200120);
+	viacam_write_reg(cam, VCR_VERTRANGE, 0x01de0000);
+	viacam_set_scale(cam);
+	/*
+	 * Image size info.
+	 */
+	viacam_write_reg(cam, VCR_MAXDATA,
+			(cam->sensor_format.height << 16) |
+			(cam->sensor_format.bytesperline >> 3));
+	viacam_write_reg(cam, VCR_MAXVBI, 0);
+	viacam_write_reg(cam, VCR_VSTRIDE,
+			cam->user_format.bytesperline & VCR_VS_STRIDE);
+	/*
+	 * Set up the capture interface control register,
+	 * everything but the "go" bit.
+	 *
+	 * The FIFO threshold is a bit of a magic number; 8 is what
+	 * VIA's sample code uses.
+	 */
+	cicreg = VCR_CI_CLKEN |
+		0x08000000 |		/* FIFO threshold */
+		VCR_CI_FLDINV |		/* OLPC-specific? */
+		VCR_CI_VREFINV |	/* OLPC-specific? */
+		VCR_CI_DIBOTH |		/* Capture both fields */
+		VCR_CI_CCIR601_8;
+	if (cam->n_cap_bufs == 3)
+		cicreg |= VCR_CI_3BUFS;
+	/*
+	 * YUV formats need different byte swapping than RGB.
+	 */
+	if (cam->user_format.pixelformat == V4L2_PIX_FMT_YUYV)
+		cicreg |= VCR_CI_YUYV;
+	else
+		cicreg |= VCR_CI_UYVY;
+	viacam_write_reg(cam, VCR_CAPINTC, cicreg);
+}
+
+
+static int viacam_config_controller(struct via_camera *cam)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cam->viadev->reg_lock, flags);
+	ret = viacam_ctlr_cbufs(cam);
+	if (!ret)
+		viacam_ctlr_image(cam);
+	spin_unlock_irqrestore(&cam->viadev->reg_lock, flags);
+	clear_bit(CF_CONFIG_NEEDED, &cam->flags);
+	return ret;
+}
+
+/*
+ * Make it start grabbing data.
+ */
+static void viacam_start_engine(struct via_camera *cam)
+{
+	spin_lock_irq(&cam->viadev->reg_lock);
+	cam->next_buf = 0;
+	viacam_write_reg_mask(cam, VCR_CAPINTC, VCR_CI_ENABLE, VCR_CI_ENABLE);
+	viacam_int_enable(cam);
+	(void) viacam_read_reg(cam, VCR_CAPINTC); /* Force post */
+	cam->opstate = S_RUNNING;
+	spin_unlock_irq(&cam->viadev->reg_lock);
+}
+
+
+static void viacam_stop_engine(struct via_camera *cam)
+{
+	spin_lock_irq(&cam->viadev->reg_lock);
+	viacam_int_disable(cam);
+	viacam_write_reg_mask(cam, VCR_CAPINTC, 0, VCR_CI_ENABLE);
+	(void) viacam_read_reg(cam, VCR_CAPINTC); /* Force post */
+	cam->opstate = S_IDLE;
+	spin_unlock_irq(&cam->viadev->reg_lock);
+}
+
+
+/* --------------------------------------------------------------------------*/
+/* Videobuf callback ops */
+
+/*
+ * buffer_setup.  The purpose of this one would appear to be to tell
+ * videobuf how big a single image is.	It's also evidently up to us
+ * to put some sort of limit on the maximum number of buffers allowed.
+ */
+static int viacam_vb_buf_setup(struct videobuf_queue *q,
+		unsigned int *count, unsigned int *size)
+{
+	struct via_camera *cam = q->priv_data;
+
+	*size = cam->user_format.sizeimage;
+	if (*count == 0 || *count > 6)	/* Arbitrary number */
+		*count = 6;
+	return 0;
+}
+
+/*
+ * Prepare a buffer.
+ */
+static int viacam_vb_buf_prepare(struct videobuf_queue *q,
+		struct videobuf_buffer *vb, enum v4l2_field field)
+{
+	struct via_camera *cam = q->priv_data;
+
+	vb->size = cam->user_format.sizeimage;
+	vb->width = cam->user_format.width; /* bytesperline???? */
+	vb->height = cam->user_format.height;
+	vb->field = field;
+	if (vb->state == VIDEOBUF_NEEDS_INIT) {
+		int ret = videobuf_iolock(q, vb, NULL);
+		if (ret)
+			return ret;
+	}
+	vb->state = VIDEOBUF_PREPARED;
+	return 0;
+}
+
+/*
+ * We've got a buffer to put data into.
+ *
+ * FIXME: check for a running engine and valid buffers?
+ */
+static void viacam_vb_buf_queue(struct videobuf_queue *q,
+		struct videobuf_buffer *vb)
+{
+	struct via_camera *cam = q->priv_data;
+
+	/*
+	 * Note that videobuf holds the lock when it calls
+	 * us, so we need not (indeed, cannot) take it here.
+	 */
+	vb->state = VIDEOBUF_QUEUED;
+	list_add_tail(&vb->queue, &cam->buffer_queue);
+}
+
+/*
+ * Free a buffer.
+ */
+static void viacam_vb_buf_release(struct videobuf_queue *q,
+		struct videobuf_buffer *vb)
+{
+	struct via_camera *cam = q->priv_data;
+
+	videobuf_dma_unmap(&cam->platdev->dev, videobuf_to_dma(vb));
+	videobuf_dma_free(videobuf_to_dma(vb));
+	vb->state = VIDEOBUF_NEEDS_INIT;
+}
+
+static const struct videobuf_queue_ops viacam_vb_ops = {
+	.buf_setup	= viacam_vb_buf_setup,
+	.buf_prepare	= viacam_vb_buf_prepare,
+	.buf_queue	= viacam_vb_buf_queue,
+	.buf_release	= viacam_vb_buf_release,
+};
+
+/* --------------------------------------------------------------------------*/
+/* File operations */
+
+static int viacam_open(struct file *filp)
+{
+	struct via_camera *cam = video_drvdata(filp);
+
+	filp->private_data = cam;
+	/*
+	 * Note the new user.  If this is the first one, we'll also
+	 * need to power up the sensor.
+	 */
+	mutex_lock(&cam->lock);
+	if (cam->users == 0) {
+		int ret = viafb_request_dma();
+
+		if (ret) {
+			mutex_unlock(&cam->lock);
+			return ret;
+		}
+		via_sensor_power_up(cam);
+		set_bit(CF_CONFIG_NEEDED, &cam->flags);
+		/*
+		 * Hook into videobuf.	Evidently this cannot fail.
+		 */
+		videobuf_queue_sg_init(&cam->vb_queue, &viacam_vb_ops,
+				&cam->platdev->dev, &cam->viadev->reg_lock,
+				V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
+				sizeof(struct videobuf_buffer), cam, NULL);
+	}
+	(cam->users)++;
+	mutex_unlock(&cam->lock);
+	return 0;
+}
+
+static int viacam_release(struct file *filp)
+{
+	struct via_camera *cam = video_drvdata(filp);
+
+	mutex_lock(&cam->lock);
+	(cam->users)--;
+	/*
+	 * If the "owner" is closing, shut down any ongoing
+	 * operations.
+	 */
+	if (filp == cam->owner) {
+		videobuf_stop(&cam->vb_queue);
+		/*
+		 * We don't hold the spinlock here, but, if release()
+		 * is being called by the owner, nobody else will
+		 * be changing the state.  And an extra stop would
+		 * not hurt anyway.
+		 */
+		if (cam->opstate != S_IDLE)
+			viacam_stop_engine(cam);
+		cam->owner = NULL;
+	}
+	/*
+	 * Last one out needs to turn out the lights.
+	 */
+	if (cam->users == 0) {
+		videobuf_mmap_free(&cam->vb_queue);
+		via_sensor_power_down(cam);
+		viafb_release_dma();
+	}
+	mutex_unlock(&cam->lock);
+	return 0;
+}
+
+/*
+ * Read a frame from the device.
+ */
+static ssize_t viacam_read(struct file *filp, char __user *buffer,
+		size_t len, loff_t *pos)
+{
+	struct via_camera *cam = video_drvdata(filp);
+	int ret;
+
+	mutex_lock(&cam->lock);
+	/*
+	 * Enforce the V4l2 "only one owner gets to read data" rule.
+	 */
+	if (cam->owner && cam->owner != filp) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	cam->owner = filp;
+	/*
+	 * Do we need to configure the hardware?
+	 */
+	if (test_bit(CF_CONFIG_NEEDED, &cam->flags)) {
+		ret = viacam_configure_sensor(cam);
+		if (!ret)
+			ret = viacam_config_controller(cam);
+		if (ret)
+			goto out_unlock;
+	}
+	/*
+	 * Fire up the capture engine, then have videobuf do
+	 * the heavy lifting.  Someday it would be good to avoid
+	 * stopping and restarting the engine each time.
+	 */
+	INIT_LIST_HEAD(&cam->buffer_queue);
+	viacam_start_engine(cam);
+	ret = videobuf_read_stream(&cam->vb_queue, buffer, len, pos, 0,
+			filp->f_flags & O_NONBLOCK);
+	viacam_stop_engine(cam);
+	/* videobuf_stop() ?? */
+
+out_unlock:
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+
+static unsigned int viacam_poll(struct file *filp, struct poll_table_struct *pt)
+{
+	struct via_camera *cam = video_drvdata(filp);
+
+	return videobuf_poll_stream(filp, &cam->vb_queue, pt);
+}
+
+
+static int viacam_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct via_camera *cam = video_drvdata(filp);
+
+	return videobuf_mmap_mapper(&cam->vb_queue, vma);
+}
+
+
+
+static const struct v4l2_file_operations viacam_fops = {
+	.owner		= THIS_MODULE,
+	.open		= viacam_open,
+	.release	= viacam_release,
+	.read		= viacam_read,
+	.poll		= viacam_poll,
+	.mmap		= viacam_mmap,
+	.unlocked_ioctl	= video_ioctl2,
+};
+
+/*----------------------------------------------------------------------------*/
+/*
+ * The long list of v4l2 ioctl ops
+ */
+
+static int viacam_g_chip_ident(struct file *file, void *priv,
+		struct v4l2_dbg_chip_ident *ident)
+{
+	struct via_camera *cam = priv;
+
+	ident->ident = V4L2_IDENT_NONE;
+	ident->revision = 0;
+	if (v4l2_chip_match_host(&ident->match)) {
+		ident->ident = V4L2_IDENT_VIA_VX855;
+		return 0;
+	}
+	return sensor_call(cam, core, g_chip_ident, ident);
+}
+
+/*
+ * Control ops are passed through to the sensor.
+ */
+static int viacam_queryctrl(struct file *filp, void *priv,
+		struct v4l2_queryctrl *qc)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, core, queryctrl, qc);
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+
+static int viacam_g_ctrl(struct file *filp, void *priv,
+		struct v4l2_control *ctrl)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, core, g_ctrl, ctrl);
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+
+static int viacam_s_ctrl(struct file *filp, void *priv,
+		struct v4l2_control *ctrl)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, core, s_ctrl, ctrl);
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+/*
+ * Only one input.
+ */
+static int viacam_enum_input(struct file *filp, void *priv,
+		struct v4l2_input *input)
+{
+	if (input->index != 0)
+		return -EINVAL;
+
+	input->type = V4L2_INPUT_TYPE_CAMERA;
+	input->std = V4L2_STD_ALL; /* Not sure what should go here */
+	strcpy(input->name, "Camera");
+	return 0;
+}
+
+static int viacam_g_input(struct file *filp, void *priv, unsigned int *i)
+{
+	*i = 0;
+	return 0;
+}
+
+static int viacam_s_input(struct file *filp, void *priv, unsigned int i)
+{
+	if (i != 0)
+		return -EINVAL;
+	return 0;
+}
+
+static int viacam_s_std(struct file *filp, void *priv, v4l2_std_id *std)
+{
+	return 0;
+}
+
+/*
+ * Video format stuff.	Here is our default format until
+ * user space messes with things.
+ */
+static const struct v4l2_pix_format viacam_def_pix_format = {
+	.width		= VGA_WIDTH,
+	.height		= VGA_HEIGHT,
+	.pixelformat	= V4L2_PIX_FMT_YUYV,
+	.field		= V4L2_FIELD_NONE,
+	.bytesperline	= VGA_WIDTH * 2,
+	.sizeimage	= VGA_WIDTH * VGA_HEIGHT * 2,
+};
+
+static const enum v4l2_mbus_pixelcode via_def_mbus_code = V4L2_MBUS_FMT_YUYV8_2X8;
+
+static int viacam_enum_fmt_vid_cap(struct file *filp, void *priv,
+		struct v4l2_fmtdesc *fmt)
+{
+	if (fmt->index >= N_VIA_FMTS)
+		return -EINVAL;
+	strlcpy(fmt->description, via_formats[fmt->index].desc,
+			sizeof(fmt->description));
+	fmt->pixelformat = via_formats[fmt->index].pixelformat;
+	return 0;
+}
+
+/*
+ * Figure out proper image dimensions, but always force the
+ * sensor to VGA.
+ */
+static void viacam_fmt_pre(struct v4l2_pix_format *userfmt,
+		struct v4l2_pix_format *sensorfmt)
+{
+	*sensorfmt = *userfmt;
+	if (userfmt->width < QCIF_WIDTH || userfmt->height < QCIF_HEIGHT) {
+		userfmt->width = QCIF_WIDTH;
+		userfmt->height = QCIF_HEIGHT;
+	}
+	if (userfmt->width > VGA_WIDTH || userfmt->height > VGA_HEIGHT) {
+		userfmt->width = VGA_WIDTH;
+		userfmt->height = VGA_HEIGHT;
+	}
+	sensorfmt->width = VGA_WIDTH;
+	sensorfmt->height = VGA_HEIGHT;
+}
+
+static void viacam_fmt_post(struct v4l2_pix_format *userfmt,
+		struct v4l2_pix_format *sensorfmt)
+{
+	struct via_format *f = via_find_format(userfmt->pixelformat);
+
+	sensorfmt->bytesperline = sensorfmt->width * f->bpp;
+	sensorfmt->sizeimage = sensorfmt->height * sensorfmt->bytesperline;
+	userfmt->pixelformat = sensorfmt->pixelformat;
+	userfmt->field = sensorfmt->field;
+	userfmt->bytesperline = 2 * userfmt->width;
+	userfmt->sizeimage = userfmt->bytesperline * userfmt->height;
+}
+
+
+/*
+ * The real work of figuring out a workable format.
+ */
+static int viacam_do_try_fmt(struct via_camera *cam,
+		struct v4l2_pix_format *upix, struct v4l2_pix_format *spix)
+{
+	int ret;
+	struct v4l2_mbus_framefmt mbus_fmt;
+	struct via_format *f = via_find_format(upix->pixelformat);
+
+	upix->pixelformat = f->pixelformat;
+	viacam_fmt_pre(upix, spix);
+	v4l2_fill_mbus_format(&mbus_fmt, upix, f->mbus_code);
+	ret = sensor_call(cam, video, try_mbus_fmt, &mbus_fmt);
+	v4l2_fill_pix_format(spix, &mbus_fmt);
+	viacam_fmt_post(upix, spix);
+	return ret;
+}
+
+
+
+static int viacam_try_fmt_vid_cap(struct file *filp, void *priv,
+		struct v4l2_format *fmt)
+{
+	struct via_camera *cam = priv;
+	struct v4l2_format sfmt;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = viacam_do_try_fmt(cam, &fmt->fmt.pix, &sfmt.fmt.pix);
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+
+static int viacam_g_fmt_vid_cap(struct file *filp, void *priv,
+		struct v4l2_format *fmt)
+{
+	struct via_camera *cam = priv;
+
+	mutex_lock(&cam->lock);
+	fmt->fmt.pix = cam->user_format;
+	mutex_unlock(&cam->lock);
+	return 0;
+}
+
+static int viacam_s_fmt_vid_cap(struct file *filp, void *priv,
+		struct v4l2_format *fmt)
+{
+	struct via_camera *cam = priv;
+	int ret;
+	struct v4l2_format sfmt;
+	struct via_format *f = via_find_format(fmt->fmt.pix.pixelformat);
+
+	/*
+	 * Camera must be idle or we can't mess with the
+	 * video setup.
+	 */
+	mutex_lock(&cam->lock);
+	if (cam->opstate != S_IDLE) {
+		ret = -EBUSY;
+		goto out;
+	}
+	/*
+	 * Let the sensor code look over and tweak the
+	 * requested formatting.
+	 */
+	ret = viacam_do_try_fmt(cam, &fmt->fmt.pix, &sfmt.fmt.pix);
+	if (ret)
+		goto out;
+	/*
+	 * OK, let's commit to the new format.
+	 */
+	cam->user_format = fmt->fmt.pix;
+	cam->sensor_format = sfmt.fmt.pix;
+	cam->mbus_code = f->mbus_code;
+	ret = viacam_configure_sensor(cam);
+	if (!ret)
+		ret = viacam_config_controller(cam);
+out:
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+static int viacam_querycap(struct file *filp, void *priv,
+		struct v4l2_capability *cap)
+{
+	strcpy(cap->driver, "via-camera");
+	strcpy(cap->card, "via-camera");
+	cap->version = 1;
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE |
+		V4L2_CAP_READWRITE | V4L2_CAP_STREAMING;
+	return 0;
+}
+
+/*
+ * Streaming operations - pure videobuf stuff.
+ */
+static int viacam_reqbufs(struct file *filp, void *priv,
+		struct v4l2_requestbuffers *rb)
+{
+	struct via_camera *cam = priv;
+
+	return videobuf_reqbufs(&cam->vb_queue, rb);
+}
+
+static int viacam_querybuf(struct file *filp, void *priv,
+		struct v4l2_buffer *buf)
+{
+	struct via_camera *cam = priv;
+
+	return videobuf_querybuf(&cam->vb_queue, buf);
+}
+
+static int viacam_qbuf(struct file *filp, void *priv, struct v4l2_buffer *buf)
+{
+	struct via_camera *cam = priv;
+
+	return videobuf_qbuf(&cam->vb_queue, buf);
+}
+
+static int viacam_dqbuf(struct file *filp, void *priv, struct v4l2_buffer *buf)
+{
+	struct via_camera *cam = priv;
+
+	return videobuf_dqbuf(&cam->vb_queue, buf, filp->f_flags & O_NONBLOCK);
+}
+
+static int viacam_streamon(struct file *filp, void *priv, enum v4l2_buf_type t)
+{
+	struct via_camera *cam = priv;
+	int ret = 0;
+
+	if (t != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	mutex_lock(&cam->lock);
+	if (cam->opstate != S_IDLE) {
+		ret = -EBUSY;
+		goto out;
+	}
+	/*
+	 * Enforce the V4l2 "only one owner gets to read data" rule.
+	 */
+	if (cam->owner && cam->owner != filp) {
+		ret = -EBUSY;
+		goto out;
+	}
+	cam->owner = filp;
+	/*
+	 * Configure things if need be.
+	 */
+	if (test_bit(CF_CONFIG_NEEDED, &cam->flags)) {
+		ret = viacam_configure_sensor(cam);
+		if (ret)
+			goto out;
+		ret = viacam_config_controller(cam);
+		if (ret)
+			goto out;
+	}
+	/*
+	 * If the CPU goes into C3, the DMA transfer gets corrupted and
+	 * users start filing unsightly bug reports.  Put in a "latency"
+	 * requirement which will keep the CPU out of the deeper sleep
+	 * states.
+	 */
+	pm_qos_add_request(&cam->qos_request, PM_QOS_CPU_DMA_LATENCY, 50);
+	/*
+	 * Fire things up.
+	 */
+	INIT_LIST_HEAD(&cam->buffer_queue);
+	ret = videobuf_streamon(&cam->vb_queue);
+	if (!ret)
+		viacam_start_engine(cam);
+out:
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+static int viacam_streamoff(struct file *filp, void *priv, enum v4l2_buf_type t)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	if (t != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+	mutex_lock(&cam->lock);
+	if (cam->opstate != S_RUNNING) {
+		ret = -EINVAL;
+		goto out;
+	}
+	pm_qos_remove_request(&cam->qos_request);
+	viacam_stop_engine(cam);
+	/*
+	 * Videobuf will recycle all of the outstanding buffers, but
+	 * we should be sure we don't retain any references to
+	 * any of them.
+	 */
+	ret = videobuf_streamoff(&cam->vb_queue);
+	INIT_LIST_HEAD(&cam->buffer_queue);
+out:
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+static int viacam_vidiocgmbuf(struct file *filp, void *priv,
+		struct video_mbuf *mbuf)
+{
+	struct via_camera *cam = priv;
+
+	return videobuf_cgmbuf(&cam->vb_queue, mbuf, 6);
+}
+#endif
+
+/* G/S_PARM */
+
+static int viacam_g_parm(struct file *filp, void *priv,
+		struct v4l2_streamparm *parm)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, video, g_parm, parm);
+	mutex_unlock(&cam->lock);
+	parm->parm.capture.readbuffers = cam->n_cap_bufs;
+	return ret;
+}
+
+static int viacam_s_parm(struct file *filp, void *priv,
+		struct v4l2_streamparm *parm)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, video, s_parm, parm);
+	mutex_unlock(&cam->lock);
+	parm->parm.capture.readbuffers = cam->n_cap_bufs;
+	return ret;
+}
+
+static int viacam_enum_framesizes(struct file *filp, void *priv,
+		struct v4l2_frmsizeenum *sizes)
+{
+	if (sizes->index != 0)
+		return -EINVAL;
+	sizes->type = V4L2_FRMSIZE_TYPE_CONTINUOUS;
+	sizes->stepwise.min_width = QCIF_WIDTH;
+	sizes->stepwise.min_height = QCIF_HEIGHT;
+	sizes->stepwise.max_width = VGA_WIDTH;
+	sizes->stepwise.max_height = VGA_HEIGHT;
+	sizes->stepwise.step_width = sizes->stepwise.step_height = 1;
+	return 0;
+}
+
+static int viacam_enum_frameintervals(struct file *filp, void *priv,
+		struct v4l2_frmivalenum *interval)
+{
+	struct via_camera *cam = priv;
+	int ret;
+
+	mutex_lock(&cam->lock);
+	ret = sensor_call(cam, video, enum_frameintervals, interval);
+	mutex_unlock(&cam->lock);
+	return ret;
+}
+
+
+
+static const struct v4l2_ioctl_ops viacam_ioctl_ops = {
+	.vidioc_g_chip_ident	= viacam_g_chip_ident,
+	.vidioc_queryctrl	= viacam_queryctrl,
+	.vidioc_g_ctrl		= viacam_g_ctrl,
+	.vidioc_s_ctrl		= viacam_s_ctrl,
+	.vidioc_enum_input	= viacam_enum_input,
+	.vidioc_g_input		= viacam_g_input,
+	.vidioc_s_input		= viacam_s_input,
+	.vidioc_s_std		= viacam_s_std,
+	.vidioc_enum_fmt_vid_cap = viacam_enum_fmt_vid_cap,
+	.vidioc_try_fmt_vid_cap = viacam_try_fmt_vid_cap,
+	.vidioc_g_fmt_vid_cap	= viacam_g_fmt_vid_cap,
+	.vidioc_s_fmt_vid_cap	= viacam_s_fmt_vid_cap,
+	.vidioc_querycap	= viacam_querycap,
+	.vidioc_reqbufs		= viacam_reqbufs,
+	.vidioc_querybuf	= viacam_querybuf,
+	.vidioc_qbuf		= viacam_qbuf,
+	.vidioc_dqbuf		= viacam_dqbuf,
+	.vidioc_streamon	= viacam_streamon,
+	.vidioc_streamoff	= viacam_streamoff,
+	.vidioc_g_parm		= viacam_g_parm,
+	.vidioc_s_parm		= viacam_s_parm,
+	.vidioc_enum_framesizes = viacam_enum_framesizes,
+	.vidioc_enum_frameintervals = viacam_enum_frameintervals,
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+	.vidiocgmbuf		= viacam_vidiocgmbuf,
+#endif
+};
+
+/*----------------------------------------------------------------------------*/
+
+/*
+ * Power management.
+ */
+
+/*
+ * Setup stuff.
+ */
+
+static struct video_device viacam_v4l_template = {
+	.name		= "via-camera",
+	.minor		= -1,
+	.tvnorms	= V4L2_STD_NTSC_M,
+	.current_norm	= V4L2_STD_NTSC_M,
+	.fops		= &viacam_fops,
+	.ioctl_ops	= &viacam_ioctl_ops,
+	.release	= video_device_release_empty, /* Check this */
+};
+
+
+static __devinit int viacam_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct i2c_adapter *sensor_adapter;
+	struct viafb_dev *viadev = pdev->dev.platform_data;
+
+	/*
+	 * Note that there are actually two capture channels on
+	 * the device.	We only deal with one for now.	That
+	 * is encoded here; nothing else assumes it's dealing with
+	 * a unique capture device.
+	 */
+	struct via_camera *cam;
+
+	/*
+	 * Ensure that frame buffer memory has been set aside for
+	 * this purpose.  As an arbitrary limit, refuse to work
+	 * with less than two frames of VGA 16-bit data.
+	 *
+	 * If we ever support the second port, we'll need to set
+	 * aside more memory.
+	 */
+	if (viadev->camera_fbmem_size < (VGA_HEIGHT*VGA_WIDTH*4)) {
+		printk(KERN_ERR "viacam: insufficient FB memory reserved\n");
+		return -ENOMEM;
+	}
+	if (viadev->engine_mmio == NULL) {
+		printk(KERN_ERR "viacam: No I/O memory, so no pictures\n");
+		return -ENOMEM;
+	}
+	/*
+	 * Basic structure initialization.
+	 */
+	cam = kzalloc (sizeof(struct via_camera), GFP_KERNEL);
+	if (cam == NULL)
+		return -ENOMEM;
+	via_cam_info = cam;
+	cam->platdev = pdev;
+	cam->viadev = viadev;
+	cam->users = 0;
+	cam->owner = NULL;
+	cam->opstate = S_IDLE;
+	cam->user_format = cam->sensor_format = viacam_def_pix_format;
+	mutex_init(&cam->lock);
+	INIT_LIST_HEAD(&cam->buffer_queue);
+	cam->mmio = viadev->engine_mmio;
+	cam->fbmem = viadev->fbmem;
+	cam->fb_offset = viadev->camera_fbmem_offset;
+	cam->flags = 1 << CF_CONFIG_NEEDED;
+	cam->mbus_code = via_def_mbus_code;
+	/*
+	 * Tell V4L that we exist.
+	 */
+	ret = v4l2_device_register(&pdev->dev, &cam->v4l2_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to register v4l2 device\n");
+		return ret;
+	}
+	/*
+	 * Convince the system that we can do DMA.
+	 */
+	pdev->dev.dma_mask = &viadev->pdev->dma_mask;
+	dma_set_mask(&pdev->dev, 0xffffffff);
+	/*
+	 * Fire up the capture port.  The write to 0x78 looks purely
+	 * OLPCish; any system will need to tweak 0x1e.
+	 */
+	via_write_reg_mask(VIASR, 0x78, 0, 0x80);
+	via_write_reg_mask(VIASR, 0x1e, 0xc0, 0xc0);
+	/*
+	 * Get the sensor powered up.
+	 */
+	ret = via_sensor_power_setup(cam);
+	if (ret)
+		goto out_unregister;
+	via_sensor_power_up(cam);
+
+	/*
+	 * See if we can't find it on the bus.	The VIA_PORT_31 assumption
+	 * is OLPC-specific.  0x42 assumption is ov7670-specific.
+	 */
+	sensor_adapter = viafb_find_i2c_adapter(VIA_PORT_31);
+	cam->sensor = v4l2_i2c_new_subdev(&cam->v4l2_dev, sensor_adapter,
+			"ov7670", "ov7670", 0x42 >> 1, NULL);
+	if (cam->sensor == NULL) {
+		dev_err(&pdev->dev, "Unable to find the sensor!\n");
+		ret = -ENODEV;
+		goto out_power_down;
+	}
+	/*
+	 * Get the IRQ.
+	 */
+	viacam_int_disable(cam);
+	ret = request_threaded_irq(viadev->pdev->irq, viacam_quick_irq,
+			viacam_irq, IRQF_SHARED, "via-camera", cam);
+	if (ret)
+		goto out_power_down;
+	/*
+	 * Tell V4l2 that we exist.
+	 */
+	cam->vdev = viacam_v4l_template;
+	cam->vdev.v4l2_dev = &cam->v4l2_dev;
+	ret = video_register_device(&cam->vdev, VFL_TYPE_GRABBER, -1);
+	if (ret)
+		goto out_irq;
+	video_set_drvdata(&cam->vdev, cam);
+
+	/* Power the sensor down until somebody opens the device */
+	via_sensor_power_down(cam);
+	return 0;
+
+out_irq:
+	free_irq(viadev->pdev->irq, cam);
+out_power_down:
+	via_sensor_power_release(cam);
+out_unregister:
+	v4l2_device_unregister(&cam->v4l2_dev);
+	return ret;
+}
+
+static __devexit int viacam_remove(struct platform_device *pdev)
+{
+	struct via_camera *cam = via_cam_info;
+	struct viafb_dev *viadev = pdev->dev.platform_data;
+
+	video_unregister_device(&cam->vdev);
+	v4l2_device_unregister(&cam->v4l2_dev);
+	free_irq(viadev->pdev->irq, cam);
+	via_sensor_power_release(cam);
+	via_cam_info = NULL;
+	return 0;
+}
+
+
+static struct platform_driver viacam_driver = {
+	.driver = {
+		.name = "viafb-camera",
+	},
+	.probe = viacam_probe,
+	.remove = viacam_remove,
+};
+
+
+#ifdef CONFIG_OLPC_XO_1_5
+/*
+ * The OLPC folks put the serial port on the same pin as
+ * the camera.	They also get grumpy if we break the
+ * serial port and keep them from using it.  So we have
+ * to check the serial enable bit and not step on it.
+ */
+#define VIACAM_SERIAL_DEVFN 0x88
+#define VIACAM_SERIAL_CREG 0x46
+#define VIACAM_SERIAL_BIT 0x40
+
+static __devinit int viacam_check_serial_port(void)
+{
+	struct pci_bus *pbus = pci_find_bus(0, 0);
+	u8 cbyte;
+
+	pci_bus_read_config_byte(pbus, VIACAM_SERIAL_DEVFN,
+			VIACAM_SERIAL_CREG, &cbyte);
+	if ((cbyte & VIACAM_SERIAL_BIT) == 0)
+		return 0; /* Not enabled */
+	if (override_serial == 0) {
+		printk(KERN_NOTICE "Via camera: serial port is enabled, " \
+				"refusing to load.\n");
+		printk(KERN_NOTICE "Specify override_serial=1 to force " \
+				"module loading.\n");
+		return -EBUSY;
+	}
+	printk(KERN_NOTICE "Via camera: overriding serial port\n");
+	pci_bus_write_config_byte(pbus, VIACAM_SERIAL_DEVFN,
+			VIACAM_SERIAL_CREG, cbyte & ~VIACAM_SERIAL_BIT);
+	return 0;
+}
+#endif
+
+
+
+
+static int viacam_init(void)
+{
+#ifdef CONFIG_OLPC_XO_1_5
+	if (viacam_check_serial_port())
+		return -EBUSY;
+#endif
+	return platform_driver_register(&viacam_driver);
+}
+module_init(viacam_init);
+
+static void viacam_exit(void)
+{
+	platform_driver_unregister(&viacam_driver);
+}
+module_exit(viacam_exit);
diff --git a/drivers/media/video/via-camera.h b/drivers/media/video/via-camera.h
new file mode 100644
index 000000000000..b12a4b3d616f
--- /dev/null
+++ b/drivers/media/video/via-camera.h
@@ -0,0 +1,93 @@
+/*
+ * VIA Camera register definitions.
+ */
+#define VCR_INTCTRL	0x300	/* Capture interrupt control */
+#define   VCR_IC_EAV	  0x0001   /* End of active video status */
+#define	  VCR_IC_EVBI	  0x0002   /* End of VBI status */
+#define   VCR_IC_FBOTFLD  0x0004   /* "flipping" Bottom field is active */
+#define   VCR_IC_ACTBUF	  0x0018   /* Active video buffer  */
+#define   VCR_IC_VSYNC	  0x0020   /* 0 = VB, 1 = active video */
+#define   VCR_IC_BOTFLD	  0x0040   /* Bottom field is active */
+#define   VCR_IC_FFULL	  0x0080   /* FIFO full */
+#define   VCR_IC_INTEN	  0x0100   /* End of active video int. enable */
+#define   VCR_IC_VBIINT	  0x0200   /* End of VBI int enable */
+#define   VCR_IC_VBIBUF	  0x0400   /* Current VBI buffer */
+
+#define VCR_TSC		0x308	/* Transport stream control */
+#define VCR_TSC_ENABLE    0x000001   /* Transport stream input enable */
+#define VCR_TSC_DROPERR   0x000002   /* Drop error packets */
+#define VCR_TSC_METHOD    0x00000c   /* DMA method (non-functional) */
+#define VCR_TSC_COUNT     0x07fff0   /* KByte or packet count */
+#define VCR_TSC_CBMODE	  0x080000   /* Change buffer by byte count */
+#define VCR_TSC_PSSIG	  0x100000   /* Packet starting signal disable */
+#define VCR_TSC_BE	  0x200000   /* MSB first (serial mode) */
+#define VCR_TSC_SERIAL	  0x400000   /* Serial input (0 = parallel) */
+
+#define VCR_CAPINTC	0x310	/* Capture interface control */
+#define   VCR_CI_ENABLE   0x00000001  /* Capture enable */
+#define   VCR_CI_BSS	  0x00000002  /* WTF "bit stream selection" */
+#define   VCR_CI_3BUFS	  0x00000004  /* 1 = 3 buffers, 0 = 2 buffers */
+#define   VCR_CI_VIPEN	  0x00000008  /* VIP enable */
+#define   VCR_CI_CCIR601_8  0	        /* CCIR601 input stream, 8 bit */
+#define   VCR_CI_CCIR656_8  0x00000010  /* ... CCIR656, 8 bit */
+#define   VCR_CI_CCIR601_16 0x00000020  /* ... CCIR601, 16 bit */
+#define   VCR_CI_CCIR656_16 0x00000030  /* ... CCIR656, 16 bit */
+#define   VCR_CI_HDMODE   0x00000040  /* CCIR656-16 hdr decode mode; 1=16b */
+#define   VCR_CI_BSWAP    0x00000080  /* Swap bytes (16-bit) */
+#define   VCR_CI_YUYV	  0	      /* Byte order 0123 */
+#define   VCR_CI_UYVY	  0x00000100  /* Byte order 1032 */
+#define   VCR_CI_YVYU	  0x00000200  /* Byte order 0321 */
+#define   VCR_CI_VYUY	  0x00000300  /* Byte order 3012 */
+#define   VCR_CI_VIPTYPE  0x00000400  /* VIP type */
+#define   VCR_CI_IFSEN    0x00000800  /* Input field signal enable */
+#define   VCR_CI_DIODD	  0	      /* De-interlace odd, 30fps */
+#define   VCR_CI_DIEVEN   0x00001000  /*    ...even field, 30fps */
+#define   VCR_CI_DIBOTH   0x00002000  /*    ...both fields, 60fps */
+#define   VCR_CI_DIBOTH30 0x00003000  /*    ...both fields, 30fps interlace */
+#define   VCR_CI_CONVTYPE 0x00004000  /* 4:2:2 to 4:4:4; 1 = interpolate */
+#define   VCR_CI_CFC	  0x00008000  /* Capture flipping control */
+#define   VCR_CI_FILTER   0x00070000  /* Horiz filter mode select
+					 000 = none
+					 001 = 2 tap
+					 010 = 3 tap
+					 011 = 4 tap
+					 100 = 5 tap */
+#define   VCR_CI_CLKINV   0x00080000  /* Input CLK inverted */
+#define   VCR_CI_VREFINV  0x00100000  /* VREF inverted */
+#define   VCR_CI_HREFINV  0x00200000  /* HREF inverted */
+#define   VCR_CI_FLDINV   0x00400000  /* Field inverted */
+#define   VCR_CI_CLKPIN	  0x00800000  /* Capture clock pin */
+#define   VCR_CI_THRESH   0x0f000000  /* Capture fifo threshold */
+#define   VCR_CI_HRLE     0x10000000  /* Positive edge of HREF */
+#define   VCR_CI_VRLE     0x20000000  /* Positive edge of VREF */
+#define   VCR_CI_OFLDINV  0x40000000  /* Field output inverted */
+#define   VCR_CI_CLKEN    0x80000000  /* Capture clock enable */
+
+#define VCR_HORRANGE	0x314	/* Active video horizontal range */
+#define VCR_VERTRANGE	0x318	/* Active video vertical range */
+#define VCR_AVSCALE	0x31c	/* Active video scaling control */
+#define   VCR_AVS_HEN	  0x00000800   /* Horizontal scale enable */
+#define   VCR_AVS_VEN	  0x04000000   /* Vertical enable */
+#define VCR_VBIHOR	0x320	/* VBI Data horizontal range */
+#define VCR_VBIVERT	0x324	/* VBI data vertical range */
+#define VCR_VBIBUF1	0x328	/* First VBI buffer */
+#define VCR_VBISTRIDE	0x32c	/* VBI stride */
+#define VCR_ANCDATACNT	0x330	/* Ancillary data count setting */
+#define VCR_MAXDATA	0x334	/* Active data count of active video */
+#define VCR_MAXVBI	0x338	/* Maximum data count of VBI */
+#define VCR_CAPDATA	0x33c	/* Capture data count */
+#define VCR_VBUF1	0x340	/* First video buffer */
+#define VCR_VBUF2	0x344	/* Second video buffer */
+#define VCR_VBUF3	0x348	/* Third video buffer */
+#define VCR_VBUF_MASK	0x1ffffff0	/* Bits 28:4 */
+#define VCR_VBIBUF2	0x34c	/* Second VBI buffer */
+#define VCR_VSTRIDE	0x350	/* Stride of video + coring control */
+#define   VCR_VS_STRIDE_SHIFT 4
+#define   VCR_VS_STRIDE   0x00001ff0  /* Stride (8-byte units) */
+#define   VCR_VS_CCD	  0x007f0000  /* Coring compare data */
+#define   VCR_VS_COREEN   0x00800000  /* Coring enable */
+#define VCR_TS0ERR	0x354	/* TS buffer 0 error indicator */
+#define VCR_TS1ERR	0x358	/* TS buffer 0 error indicator */
+#define VCR_TS2ERR	0x35c	/* TS buffer 0 error indicator */
+
+/* Add 0x1000 for the second capture engine registers */
diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index e44893ea590d..04bec058569c 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -370,7 +370,7 @@ int viafb_init_engine(struct fb_info *info)
 	viapar->shared->vq_vram_addr = viapar->fbmem_free;
 	viapar->fbmem_used += VQ_SIZE;
 
-#if defined(CONFIG_FB_VIA_CAMERA) || defined(CONFIG_FB_VIA_CAMERA_MODULE)
+#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
 	/*
 	 * Set aside a chunk of framebuffer memory for the camera
 	 * driver.  Someday this driver probably needs a proper allocator
diff --git a/drivers/video/via/via-core.c b/drivers/video/via/via-core.c
index 66f403033111..27d7260d4f50 100644
--- a/drivers/video/via/via-core.c
+++ b/drivers/video/via/via-core.c
@@ -94,6 +94,13 @@ void viafb_irq_disable(u32 mask)
 EXPORT_SYMBOL_GPL(viafb_irq_disable);
 
 /* ---------------------------------------------------------------------- */
+/*
+ * Currently, the camera driver is the only user of the DMA code, so we
+ * only compile it in if the camera driver is being built.  Chances are,
+ * most viafb systems will not need to have this extra code for a while.
+ * As soon as another user comes long, the ifdef can be removed.
+ */
+#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
 /*
  * Access to the DMA engine.  This currently provides what the camera
  * driver needs (i.e. outgoing only) but is easily expandable if need
@@ -322,7 +329,7 @@ int viafb_dma_copy_out_sg(unsigned int offset, struct scatterlist *sg, int nsg)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(viafb_dma_copy_out_sg);
-
+#endif /* CONFIG_VIDEO_VIA_CAMERA */
 
 /* ---------------------------------------------------------------------- */
 /*
@@ -507,7 +514,12 @@ static struct viafb_subdev_info {
 	},
 	{
 		.name = "viafb-i2c",
-	}
+	},
+#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
+	{
+		.name = "viafb-camera",
+	},
+#endif
 };
 #define N_SUBDEVS ARRAY_SIZE(viafb_subdevs)
 
diff --git a/include/linux/via-core.h b/include/linux/via-core.h
index 7ffb521e1a7a..38bffd8ccca5 100644
--- a/include/linux/via-core.h
+++ b/include/linux/via-core.h
@@ -81,7 +81,7 @@ struct viafb_dev {
 	unsigned long fbmem_start;
 	long fbmem_len;
 	void __iomem *fbmem;
-#if defined(CONFIG_FB_VIA_CAMERA) || defined(CONFIG_FB_VIA_CAMERA_MODULE)
+#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
 	long camera_fbmem_offset;
 	long camera_fbmem_size;
 #endif
@@ -138,6 +138,7 @@ void viafb_irq_disable(u32 mask);
 #define   VDE_I_LVDSSIEN  0x40000000  /* LVDS Sense enable */
 #define   VDE_I_ENABLE	  0x80000000  /* Global interrupt enable */
 
+#if defined(CONFIG_VIDEO_VIA_CAMERA) || defined(CONFIG_VIDEO_VIA_CAMERA_MODULE)
 /*
  * DMA management.
  */
@@ -172,6 +173,7 @@ int viafb_dma_copy_out_sg(unsigned int offset, struct scatterlist *sg, int nsg);
  */
 #define VGA_WIDTH	640
 #define VGA_HEIGHT	480
+#endif /* CONFIG_VIDEO_VIA_CAMERA */
 
 /*
  * Indexed port operations.  Note that these are all multi-op
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index aeb4ff9c6140..51e89f2267b8 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -115,6 +115,10 @@ enum {
 	V4L2_IDENT_VPX3216B = 3216,
 	V4L2_IDENT_VPX3220A = 3220,
 
+	/* VX855 just ident 3409 */
+	/* Other via devs could use 3314, 3324, 3327, 3336, 3364, 3353 */
+	V4L2_IDENT_VIA_VX855 = 3409,
+
 	/* module tvp5150 */
 	V4L2_IDENT_TVP5150 = 5150,
 
-- 
cgit v1.2.3


From 7096d0422153ffcc2264eef652fc3a7bca3e6d3c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 20 Oct 2010 11:45:13 -0600
Subject: of/device: Rework to use common platform_device_alloc() for
 allocating devices

The current code allocates and manages platform_devices created from
the device tree manually.  It also uses an unsafe shortcut for
allocating the platform_device and the resource table at the same
time. (which I added in the last rework; sorry).

This patch refactors the code to use platform_device_alloc() for
allocating new devices.  This reduces the amount of custom code
implemented by of_platform, eliminates the unsafe alloc trick, and has
the side benefit of letting the platform_bus code manage freeing the
device data and resources when the device is freed.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michal Simek <monstr@monstr.eu>
---
 arch/powerpc/kernel/ibmebus.c | 11 ++++-------
 drivers/base/platform.c       |  1 +
 drivers/of/device.c           | 27 +++++++--------------------
 drivers/of/platform.c         | 24 +++++++++++++-----------
 include/linux/of_device.h     | 13 +++++++------
 5 files changed, 32 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 9b626cfffce1..f62efdfd1769 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -162,13 +162,10 @@ static int ibmebus_create_device(struct device_node *dn)
 	dev->dev.bus = &ibmebus_bus_type;
 	dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
 
-	ret = of_device_register(dev);
-	if (ret) {
-		of_device_free(dev);
-		return ret;
-	}
-
-	return 0;
+	ret = of_device_add(dev);
+	if (ret)
+		platform_device_put(dev);
+	return ret;
 }
 
 static int ibmebus_create_devices(const struct of_device_id *matches)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index c6c933f58102..2fff59cef505 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -147,6 +147,7 @@ static void platform_device_release(struct device *dev)
 	struct platform_object *pa = container_of(dev, struct platform_object,
 						  pdev.dev);
 
+	of_device_node_put(&pa->pdev.dev);
 	kfree(pa->pdev.dev.platform_data);
 	kfree(pa->pdev.resource);
 	kfree(pa);
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 92de0eb74aea..45d86530799f 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -81,29 +81,10 @@ struct device_attribute of_platform_device_attrs[] = {
 	__ATTR_NULL
 };
 
-/**
- * of_release_dev - free an of device structure when all users of it are finished.
- * @dev: device that's been disconnected
- *
- * Will be called only by the device core when all users of this of device are
- * done.
- */
-void of_release_dev(struct device *dev)
-{
-	struct platform_device *ofdev;
-
-	ofdev = to_platform_device(dev);
-	of_node_put(ofdev->dev.of_node);
-	kfree(ofdev);
-}
-EXPORT_SYMBOL(of_release_dev);
-
-int of_device_register(struct platform_device *ofdev)
+int of_device_add(struct platform_device *ofdev)
 {
 	BUG_ON(ofdev->dev.of_node == NULL);
 
-	device_initialize(&ofdev->dev);
-
 	/* name and id have to be set so that the platform bus doesn't get
 	 * confused on matching */
 	ofdev->name = dev_name(&ofdev->dev);
@@ -117,6 +98,12 @@ int of_device_register(struct platform_device *ofdev)
 
 	return device_add(&ofdev->dev);
 }
+
+int of_device_register(struct platform_device *pdev)
+{
+	device_initialize(&pdev->dev);
+	return of_device_add(pdev);
+}
 EXPORT_SYMBOL(of_device_register);
 
 void of_device_unregister(struct platform_device *ofdev)
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 30726c8b0693..5b4a07f1220e 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -587,20 +587,23 @@ struct platform_device *of_device_alloc(struct device_node *np,
 	int rc, i, num_reg = 0, num_irq;
 	struct resource *res, temp_res;
 
-	/* First count how many resources are needed */
+	dev = platform_device_alloc("", -1);
+	if (!dev)
+		return NULL;
+
+	/* count the io and irq resources */
 	while (of_address_to_resource(np, num_reg, &temp_res) == 0)
 		num_reg++;
 	num_irq = of_irq_count(np);
 
-	/* Allocate memory for both the struct device and the resource table */
-	dev = kzalloc(sizeof(*dev) + (sizeof(*res) * (num_reg + num_irq)),
-		      GFP_KERNEL);
-	if (!dev)
-		return NULL;
-	res = (struct resource *) &dev[1];
-
 	/* Populate the resource table */
 	if (num_irq || num_reg) {
+		res = kzalloc(sizeof(*res) * (num_irq + num_reg), GFP_KERNEL);
+		if (!res) {
+			platform_device_put(dev);
+			return NULL;
+		}
+
 		dev->num_resources = num_reg + num_irq;
 		dev->resource = res;
 		for (i = 0; i < num_reg; i++, res++) {
@@ -615,7 +618,6 @@ struct platform_device *of_device_alloc(struct device_node *np,
 	dev->dev.dma_mask = &dev->archdata.dma_mask;
 #endif
 	dev->dev.parent = parent;
-	dev->dev.release = of_release_dev;
 
 	if (bus_id)
 		dev_set_name(&dev->dev, "%s", bus_id);
@@ -653,8 +655,8 @@ struct platform_device *of_platform_device_create(struct device_node *np,
 	 * to do such, possibly using a device notifier
 	 */
 
-	if (of_device_register(dev) != 0) {
-		of_device_free(dev);
+	if (of_device_add(dev) != 0) {
+		platform_device_put(dev);
 		return NULL;
 	}
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 835f85ecd2de..975d347079d9 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -27,20 +27,19 @@ static inline int of_driver_match_device(const struct device *dev,
 extern struct platform_device *of_dev_get(struct platform_device *dev);
 extern void of_dev_put(struct platform_device *dev);
 
+extern int of_device_add(struct platform_device *pdev);
 extern int of_device_register(struct platform_device *ofdev);
 extern void of_device_unregister(struct platform_device *ofdev);
-extern void of_release_dev(struct device *dev);
-
-static inline void of_device_free(struct platform_device *dev)
-{
-	of_release_dev(&dev->dev);
-}
 
 extern ssize_t of_device_get_modalias(struct device *dev,
 					char *str, ssize_t len);
 
 extern int of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 
+static inline void of_device_node_put(struct device *dev)
+{
+	of_node_put(dev->of_node);
+}
 
 #else /* CONFIG_OF_DEVICE */
 
@@ -56,6 +55,8 @@ static inline int of_device_uevent(struct device *dev,
 	return -ENODEV;
 }
 
+static inline void of_device_node_put(struct device *dev) { }
+
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3


From 32c97689c46b272302053778f1a6c2facb0e220c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 20 Oct 2010 11:45:14 -0600
Subject: of/flattree: Eliminate need to provide early_init_dt_scan_chosen_arch

This patch refactors the early init parsing of the chosen node so that
architectures aren't forced to provide an empty implementation of
early_init_dt_scan_chosen_arch.  Instead, if an architecture wants to
do something different, it can either use a wrapper function around
early_init_dt_scan_chosen(), or it can replace it altogether.

This patch was written in preparation to adding device tree support to
both x86 ad MIPS.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/microblaze/kernel/prom.c |  5 -----
 arch/powerpc/kernel/prom.c    | 12 ++++++++++--
 drivers/of/fdt.c              |  2 --
 include/linux/of_fdt.h        |  2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 427b13b4740f..bacbd3d41ec7 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -42,11 +42,6 @@
 #include <asm/sections.h>
 #include <asm/pci-bridge.h>
 
-void __init early_init_dt_scan_chosen_arch(unsigned long node)
-{
-	/* No Microblaze specific code here */
-}
-
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	memblock_add(base, size);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fed9bf6187d1..e296aae63c60 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -363,10 +363,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-void __init early_init_dt_scan_chosen_arch(unsigned long node)
+int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
+					 int depth, void *data)
 {
 	unsigned long *lprop;
 
+	/* Use common scan routine to determine if this is the chosen node */
+	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+		return 0;
+
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
 	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
@@ -398,6 +403,9 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node)
 	if (lprop)
 		crashk_res.end = crashk_res.start + *lprop - 1;
 #endif
+
+	/* break now */
+	return 1;
 }
 
 #ifdef CONFIG_PPC_PSERIES
@@ -679,7 +687,7 @@ void __init early_init_devtree(void *params)
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
 	 */
-	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
+	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, NULL);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
 	memblock_init();
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 65da5aec7552..c1360e02f921 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -533,8 +533,6 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 #endif /* CONFIG_CMDLINE */
 
-	early_init_dt_scan_chosen_arch(node);
-
 	pr_debug("Command line is: %s\n", cmd_line);
 
 	/* break now */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 71e1a916d3fa..7bbf5b328438 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -72,7 +72,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
-extern void early_init_dt_scan_chosen_arch(unsigned long node);
+
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_check_for_initrd(unsigned long node);
-- 
cgit v1.2.3


From 530719b2341fea925f58a5d6be0353fa43a88baf Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 21 Oct 2010 11:34:55 -0600
Subject: of/irq: of_irq.c needs to include linux/irq.h

It works on current architectures simply because asm/prom.h includes
it, but it broke when x86 turned on CONFIG_OF.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 090cbaa4bd36..109e013b1772 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -5,6 +5,7 @@
 struct of_irq;
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/irq.h>
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-- 
cgit v1.2.3


From 549f7365820a212a1cfd0871d377b1ad0d1e5723 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 19 Oct 2010 11:19:32 +0100
Subject: drm/i915: Enable SandyBridge blitter ring

Based on an original patch by Zhenyu Wang, this initializes the BLT ring for
SandyBridge and enables support for user execbuffers.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  2 +
 drivers/gpu/drm/i915/i915_dma.c         |  4 ++
 drivers/gpu/drm/i915/i915_drv.c         |  2 +
 drivers/gpu/drm/i915/i915_drv.h         |  3 ++
 drivers/gpu/drm/i915/i915_gem.c         | 55 +++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_evict.c   |  6 ++-
 drivers/gpu/drm/i915/i915_irq.c         | 64 +++++++++++++----------
 drivers/gpu/drm/i915/i915_reg.h         |  2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 92 +++++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +
 include/drm/i915_drm.h                  |  6 ++-
 11 files changed, 175 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f9e3295f0457..d521de3e0680 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -80,6 +80,8 @@ static int i915_capabilities(struct seq_file *m, void *data)
 	B(has_overlay);
 	B(overlay_needs_physical);
 	B(supports_tv);
+	B(has_bsd_ring);
+	B(has_blt_ring);
 #undef B
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 1ffeb1c5e7c4..1851ca4087f9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -133,6 +133,7 @@ static int i915_dma_cleanup(struct drm_device * dev)
 	mutex_lock(&dev->struct_mutex);
 	intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
 	intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
+	intel_cleanup_ring_buffer(dev, &dev_priv->blt_ring);
 	mutex_unlock(&dev->struct_mutex);
 
 	/* Clear the HWS virtual address at teardown */
@@ -763,6 +764,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_BSD:
 		value = HAS_BSD(dev);
 		break;
+	case I915_PARAM_HAS_BLT:
+		value = HAS_BLT(dev);
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c3decb2fef4b..90f9c3e3fee3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -158,12 +158,14 @@ static const struct intel_device_info intel_sandybridge_d_info = {
 	.gen = 6,
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.has_bsd_ring = 1,
+	.has_blt_ring = 1,
 };
 
 static const struct intel_device_info intel_sandybridge_m_info = {
 	.gen = 6, .is_mobile = 1,
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.has_bsd_ring = 1,
+	.has_blt_ring = 1,
 };
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 817d8be6ff49..a9a0e220176e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -216,6 +216,7 @@ struct intel_device_info {
 	u8 overlay_needs_physical : 1;
 	u8 supports_tv : 1;
 	u8 has_bsd_ring : 1;
+	u8 has_blt_ring : 1;
 };
 
 enum no_fbc_reason {
@@ -255,6 +256,7 @@ typedef struct drm_i915_private {
 	struct pci_dev *bridge_dev;
 	struct intel_ring_buffer render_ring;
 	struct intel_ring_buffer bsd_ring;
+	struct intel_ring_buffer blt_ring;
 	uint32_t next_seqno;
 
 	drm_dma_handle_t *status_page_dmah;
@@ -1300,6 +1302,7 @@ static inline void i915_write(struct drm_i915_private *dev_priv, u32 reg,
 #define IS_GEN6(dev)	(INTEL_INFO(dev)->gen == 6)
 
 #define HAS_BSD(dev)            (INTEL_INFO(dev)->has_bsd_ring)
+#define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_OVERLAY(dev) 		(INTEL_INFO(dev)->has_overlay)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5041ebe3fdf9..c3398d396419 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1800,6 +1800,7 @@ void i915_gem_reset(struct drm_device *dev)
 
 	i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
 	i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
+	i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
 
 	/* Remove anything from the flushing lists. The GPU cache is likely
 	 * to be lost on reset along with the data, so simply move the
@@ -1922,6 +1923,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 
 	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
 	i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
+	i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
 }
 
 static void
@@ -1944,7 +1946,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 
 	if (!dev_priv->mm.suspended &&
 		(!list_empty(&dev_priv->render_ring.request_list) ||
-		 !list_empty(&dev_priv->bsd_ring.request_list)))
+		 !list_empty(&dev_priv->bsd_ring.request_list) ||
+		 !list_empty(&dev_priv->blt_ring.request_list)))
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
 	mutex_unlock(&dev->struct_mutex);
 }
@@ -2063,6 +2066,10 @@ i915_gem_flush(struct drm_device *dev,
 			i915_gem_flush_ring(dev, file_priv,
 					    &dev_priv->bsd_ring,
 					    invalidate_domains, flush_domains);
+		if (flush_rings & RING_BLT)
+			i915_gem_flush_ring(dev, file_priv,
+					    &dev_priv->blt_ring,
+					    invalidate_domains, flush_domains);
 	}
 }
 
@@ -2182,7 +2189,8 @@ i915_gpu_idle(struct drm_device *dev)
 
 	lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
 		       list_empty(&dev_priv->render_ring.active_list) &&
-		       list_empty(&dev_priv->bsd_ring.active_list));
+		       list_empty(&dev_priv->bsd_ring.active_list) &&
+		       list_empty(&dev_priv->blt_ring.active_list));
 	if (lists_empty)
 		return 0;
 
@@ -2195,6 +2203,10 @@ i915_gpu_idle(struct drm_device *dev)
 	if (ret)
 		return ret;
 
+	ret = i915_ring_idle(dev, &dev_priv->blt_ring);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -3609,14 +3621,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
 		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
 #endif
-	if (args->flags & I915_EXEC_BSD) {
+	switch (args->flags & I915_EXEC_RING_MASK) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		ring = &dev_priv->render_ring;
+		break;
+	case I915_EXEC_BSD:
 		if (!HAS_BSD(dev)) {
-			DRM_ERROR("execbuf with wrong flag\n");
+			DRM_ERROR("execbuf with invalid ring (BSD)\n");
 			return -EINVAL;
 		}
 		ring = &dev_priv->bsd_ring;
-	} else {
-		ring = &dev_priv->render_ring;
+		break;
+	case I915_EXEC_BLT:
+		if (!HAS_BLT(dev)) {
+			DRM_ERROR("execbuf with invalid ring (BLT)\n");
+			return -EINVAL;
+		}
+		ring = &dev_priv->blt_ring;
+		break;
+	default:
+		DRM_ERROR("execbuf with unknown ring: %d\n",
+			  (int)(args->flags & I915_EXEC_RING_MASK));
+		return -EINVAL;
 	}
 
 	if (args->buffer_count < 1) {
@@ -4482,10 +4509,18 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
 			goto cleanup_render_ring;
 	}
 
+	if (HAS_BLT(dev)) {
+		ret = intel_init_blt_ring_buffer(dev);
+		if (ret)
+			goto cleanup_bsd_ring;
+	}
+
 	dev_priv->next_seqno = 1;
 
 	return 0;
 
+cleanup_bsd_ring:
+	intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
 cleanup_render_ring:
 	intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
 cleanup_pipe_control:
@@ -4501,6 +4536,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 
 	intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
 	intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
+	intel_cleanup_ring_buffer(dev, &dev_priv->blt_ring);
 	if (HAS_PIPE_CONTROL(dev))
 		i915_gem_cleanup_pipe_control(dev);
 }
@@ -4532,10 +4568,12 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
 	BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
 	BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
+	BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
 	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
 	BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
 	BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
+	BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
 	mutex_unlock(&dev->struct_mutex);
 
 	ret = drm_irq_install(dev);
@@ -4594,6 +4632,8 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
 	INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
 	INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
+	INIT_LIST_HEAD(&dev_priv->blt_ring.active_list);
+	INIT_LIST_HEAD(&dev_priv->blt_ring.request_list);
 	for (i = 0; i < 16; i++)
 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
@@ -4857,7 +4897,8 @@ i915_gpu_is_active(struct drm_device *dev)
 
 	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
 		      list_empty(&dev_priv->render_ring.active_list) &&
-		      list_empty(&dev_priv->bsd_ring.active_list);
+		      list_empty(&dev_priv->bsd_ring.active_list) &&
+		      list_empty(&dev_priv->blt_ring.active_list);
 
 	return !lists_empty;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 70db2f1ee369..43a4013f53fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -166,7 +166,8 @@ i915_gem_evict_everything(struct drm_device *dev)
 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
 		       list_empty(&dev_priv->mm.flushing_list) &&
 		       list_empty(&dev_priv->render_ring.active_list) &&
-		       list_empty(&dev_priv->bsd_ring.active_list));
+		       list_empty(&dev_priv->bsd_ring.active_list) &&
+		       list_empty(&dev_priv->blt_ring.active_list));
 	if (lists_empty)
 		return -ENOSPC;
 
@@ -184,7 +185,8 @@ i915_gem_evict_everything(struct drm_device *dev)
 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
 		       list_empty(&dev_priv->mm.flushing_list) &&
 		       list_empty(&dev_priv->render_ring.active_list) &&
-		       list_empty(&dev_priv->bsd_ring.active_list));
+		       list_empty(&dev_priv->bsd_ring.active_list) &&
+		       list_empty(&dev_priv->blt_ring.active_list));
 	BUG_ON(!lists_empty);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f94cd7ffd74d..237b8bdb5994 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -293,6 +293,19 @@ static void i915_handle_rps_change(struct drm_device *dev)
 	return;
 }
 
+static void notify_ring(struct drm_device *dev,
+			struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 seqno = ring->get_seqno(dev, ring);
+	ring->irq_gem_seqno = seqno;
+	trace_i915_gem_request_complete(dev, seqno);
+	wake_up_all(&ring->irq_queue);
+	dev_priv->hangcheck_count = 0;
+	mod_timer(&dev_priv->hangcheck_timer,
+		  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+}
+
 static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -300,7 +313,6 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 	u32 de_iir, gt_iir, de_ier, pch_iir;
 	u32 hotplug_mask;
 	struct drm_i915_master_private *master_priv;
-	struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
 	u32 bsd_usr_interrupt = GT_BSD_USER_INTERRUPT;
 
 	if (IS_GEN6(dev))
@@ -332,17 +344,12 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 				READ_BREADCRUMB(dev_priv);
 	}
 
-	if (gt_iir & GT_PIPE_NOTIFY) {
-		u32 seqno = render_ring->get_seqno(dev, render_ring);
-		render_ring->irq_gem_seqno = seqno;
-		trace_i915_gem_request_complete(dev, seqno);
-		wake_up_all(&dev_priv->render_ring.irq_queue);
-		dev_priv->hangcheck_count = 0;
-		mod_timer(&dev_priv->hangcheck_timer,
-			  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
-	}
+	if (gt_iir & GT_PIPE_NOTIFY)
+		notify_ring(dev, &dev_priv->render_ring);
 	if (gt_iir & bsd_usr_interrupt)
-		wake_up_all(&dev_priv->bsd_ring.irq_queue);
+		notify_ring(dev, &dev_priv->bsd_ring);
+	if (HAS_BLT(dev) && gt_iir & GT_BLT_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->blt_ring);
 
 	if (de_iir & DE_GSE)
 		intel_opregion_gse_intr(dev);
@@ -881,6 +888,8 @@ static void i915_handle_error(struct drm_device *dev, bool wedged)
 		wake_up_all(&dev_priv->render_ring.irq_queue);
 		if (HAS_BSD(dev))
 			wake_up_all(&dev_priv->bsd_ring.irq_queue);
+		if (HAS_BLT(dev))
+			wake_up_all(&dev_priv->blt_ring.irq_queue);
 	}
 
 	queue_work(dev_priv->wq, &dev_priv->error_work);
@@ -941,7 +950,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 	unsigned long irqflags;
 	int irq_received;
 	int ret = IRQ_NONE;
-	struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
 
 	atomic_inc(&dev_priv->irq_received);
 
@@ -1018,18 +1026,10 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 					READ_BREADCRUMB(dev_priv);
 		}
 
-		if (iir & I915_USER_INTERRUPT) {
-			u32 seqno = render_ring->get_seqno(dev, render_ring);
-			render_ring->irq_gem_seqno = seqno;
-			trace_i915_gem_request_complete(dev, seqno);
-			wake_up_all(&dev_priv->render_ring.irq_queue);
-			dev_priv->hangcheck_count = 0;
-			mod_timer(&dev_priv->hangcheck_timer,
-				  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
-		}
-
+		if (iir & I915_USER_INTERRUPT)
+			notify_ring(dev, &dev_priv->render_ring);
 		if (HAS_BSD(dev) && (iir & I915_BSD_USER_INTERRUPT))
-			wake_up_all(&dev_priv->bsd_ring.irq_queue);
+			notify_ring(dev, &dev_priv->bsd_ring);
 
 		if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT) {
 			intel_prepare_page_flip(dev, 0);
@@ -1358,6 +1358,12 @@ void i915_hangcheck_elapsed(unsigned long data)
 			missed_wakeup = true;
 		}
 
+		if (dev_priv->blt_ring.waiting_gem_seqno &&
+		    waitqueue_active(&dev_priv->blt_ring.irq_queue)) {
+			wake_up_all(&dev_priv->blt_ring.irq_queue);
+			missed_wakeup = true;
+		}
+
 		if (missed_wakeup)
 			DRM_ERROR("Hangcheck timer elapsed... GPU idle, missed IRQ.\n");
 		return;
@@ -1443,8 +1449,12 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(DEIER, dev_priv->de_irq_enable_reg);
 	(void) I915_READ(DEIER);
 
-	if (IS_GEN6(dev))
-		render_mask = GT_PIPE_NOTIFY | GT_GEN6_BSD_USER_INTERRUPT;
+	if (IS_GEN6(dev)) {
+		render_mask =
+			GT_PIPE_NOTIFY |
+			GT_GEN6_BSD_USER_INTERRUPT |
+			GT_BLT_USER_INTERRUPT;
+	}
 
 	dev_priv->gt_irq_mask_reg = ~render_mask;
 	dev_priv->gt_irq_enable_reg = render_mask;
@@ -1454,6 +1464,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	if (IS_GEN6(dev)) {
 		I915_WRITE(GEN6_RENDER_IMR, ~GEN6_RENDER_PIPE_CONTROL_NOTIFY_INTERRUPT);
 		I915_WRITE(GEN6_BSD_IMR, ~GEN6_BSD_IMR_USER_INTERRUPT);
+		I915_WRITE(GEN6_BLITTER_IMR, ~GEN6_BLITTER_USER_INTERRUPT);
 	}
 
 	I915_WRITE(GTIER, dev_priv->gt_irq_enable_reg);
@@ -1523,9 +1534,10 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	u32 error_mask;
 
 	DRM_INIT_WAITQUEUE(&dev_priv->render_ring.irq_queue);
-
 	if (HAS_BSD(dev))
 		DRM_INIT_WAITQUEUE(&dev_priv->bsd_ring.irq_queue);
+	if (HAS_BLT(dev))
+		DRM_INIT_WAITQUEUE(&dev_priv->blt_ring.irq_queue);
 
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 557f27134d05..c52e209321c1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -263,6 +263,7 @@
 #define RENDER_RING_BASE	0x02000
 #define BSD_RING_BASE		0x04000
 #define GEN6_BSD_RING_BASE	0x12000
+#define BLT_RING_BASE		0x22000
 #define RING_TAIL(base)		((base)+0x30)
 #define RING_HEAD(base)		((base)+0x34)
 #define RING_START(base)	((base)+0x38)
@@ -2561,6 +2562,7 @@
 #define GT_USER_INTERRUPT       (1 << 0)
 #define GT_BSD_USER_INTERRUPT   (1 << 5)
 #define GT_GEN6_BSD_USER_INTERRUPT	(1 << 12)
+#define GT_BLT_USER_INTERRUPT	(1 << 22)
 
 #define GTISR   0x44010
 #define GTIMR   0x44014
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8da5ff790da3..a8f408fe4e71 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -383,9 +383,9 @@ static int init_bsd_ring(struct drm_device *dev,
 }
 
 static u32
-bsd_ring_add_request(struct drm_device *dev,
-		     struct intel_ring_buffer *ring,
-		     u32 flush_domains)
+ring_add_request(struct drm_device *dev,
+		 struct intel_ring_buffer *ring,
+		 u32 flush_domains)
 {
 	u32 seqno;
 
@@ -418,18 +418,18 @@ bsd_ring_put_user_irq(struct drm_device *dev,
 }
 
 static u32
-bsd_ring_get_seqno(struct drm_device *dev,
-		   struct intel_ring_buffer *ring)
+ring_status_page_get_seqno(struct drm_device *dev,
+			   struct intel_ring_buffer *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
 static int
-bsd_ring_dispatch_gem_execbuffer(struct drm_device *dev,
-				 struct intel_ring_buffer *ring,
-				 struct drm_i915_gem_execbuffer2 *exec,
-				 struct drm_clip_rect *cliprects,
-				 uint64_t exec_offset)
+ring_dispatch_gem_execbuffer(struct drm_device *dev,
+			     struct intel_ring_buffer *ring,
+			     struct drm_i915_gem_execbuffer2 *exec,
+			     struct drm_clip_rect *cliprects,
+			     uint64_t exec_offset)
 {
 	uint32_t exec_start;
 	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
@@ -441,7 +441,6 @@ bsd_ring_dispatch_gem_execbuffer(struct drm_device *dev,
 	return 0;
 }
 
-
 static int
 render_ring_dispatch_gem_execbuffer(struct drm_device *dev,
 				    struct intel_ring_buffer *ring,
@@ -758,11 +757,11 @@ static const struct intel_ring_buffer bsd_ring = {
 	.init			= init_bsd_ring,
 	.set_tail		= ring_set_tail,
 	.flush			= bsd_ring_flush,
-	.add_request		= bsd_ring_add_request,
-	.get_seqno		= bsd_ring_get_seqno,
+	.add_request		= ring_add_request,
+	.get_seqno		= ring_status_page_get_seqno,
 	.user_irq_get		= bsd_ring_get_user_irq,
 	.user_irq_put		= bsd_ring_put_user_irq,
-	.dispatch_gem_execbuffer = bsd_ring_dispatch_gem_execbuffer,
+	.dispatch_gem_execbuffer = ring_dispatch_gem_execbuffer,
 };
 
 
@@ -789,10 +788,10 @@ static void gen6_bsd_ring_set_tail(struct drm_device *dev,
 	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
 }
 
-static void gen6_bsd_ring_flush(struct drm_device *dev,
-				struct intel_ring_buffer *ring,
-				u32 invalidate_domains,
-				u32 flush_domains)
+static void gen6_ring_flush(struct drm_device *dev,
+			    struct intel_ring_buffer *ring,
+			    u32 invalidate_domains,
+			    u32 flush_domains)
 {
        intel_ring_begin(dev, ring, 4);
        intel_ring_emit(dev, ring, MI_FLUSH_DW);
@@ -803,11 +802,11 @@ static void gen6_bsd_ring_flush(struct drm_device *dev,
 }
 
 static int
-gen6_bsd_ring_dispatch_gem_execbuffer(struct drm_device *dev,
-				      struct intel_ring_buffer *ring,
-				      struct drm_i915_gem_execbuffer2 *exec,
-				      struct drm_clip_rect *cliprects,
-				      uint64_t exec_offset)
+gen6_ring_dispatch_gem_execbuffer(struct drm_device *dev,
+				  struct intel_ring_buffer *ring,
+				  struct drm_i915_gem_execbuffer2 *exec,
+				  struct drm_clip_rect *cliprects,
+				  uint64_t exec_offset)
 {
        uint32_t exec_start;
 
@@ -831,12 +830,42 @@ static const struct intel_ring_buffer gen6_bsd_ring = {
        .size			= 32 * PAGE_SIZE,
        .init			= init_bsd_ring,
        .set_tail		= gen6_bsd_ring_set_tail,
-       .flush			= gen6_bsd_ring_flush,
-       .add_request		= bsd_ring_add_request,
-       .get_seqno		= bsd_ring_get_seqno,
+       .flush			= gen6_ring_flush,
+       .add_request		= ring_add_request,
+       .get_seqno		= ring_status_page_get_seqno,
        .user_irq_get		= bsd_ring_get_user_irq,
        .user_irq_put		= bsd_ring_put_user_irq,
-       .dispatch_gem_execbuffer	= gen6_bsd_ring_dispatch_gem_execbuffer,
+       .dispatch_gem_execbuffer	= gen6_ring_dispatch_gem_execbuffer,
+};
+
+/* Blitter support (SandyBridge+) */
+
+static void
+blt_ring_get_user_irq(struct drm_device *dev,
+		      struct intel_ring_buffer *ring)
+{
+	/* do nothing */
+}
+static void
+blt_ring_put_user_irq(struct drm_device *dev,
+		      struct intel_ring_buffer *ring)
+{
+	/* do nothing */
+}
+
+static const struct intel_ring_buffer gen6_blt_ring = {
+       .name			= "blt ring",
+       .id			= RING_BLT,
+       .mmio_base		= BLT_RING_BASE,
+       .size			= 32 * PAGE_SIZE,
+       .init			= init_ring_common,
+       .set_tail		= ring_set_tail,
+       .flush			= gen6_ring_flush,
+       .add_request		= ring_add_request,
+       .get_seqno		= ring_status_page_get_seqno,
+       .user_irq_get		= blt_ring_get_user_irq,
+       .user_irq_put		= blt_ring_put_user_irq,
+       .dispatch_gem_execbuffer	= gen6_ring_dispatch_gem_execbuffer,
 };
 
 int intel_init_render_ring_buffer(struct drm_device *dev)
@@ -866,3 +895,12 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 
 	return intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
 }
+
+int intel_init_blt_ring_buffer(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	dev_priv->blt_ring = gen6_blt_ring;
+
+	return intel_init_ring_buffer(dev, &dev_priv->blt_ring);
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5b37ff3a6949..9e81ff3b39cd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -22,6 +22,7 @@ struct  intel_ring_buffer {
 	enum intel_ring_id {
 		RING_RENDER = 0x1,
 		RING_BSD = 0x2,
+		RING_BLT = 0x4,
 	} id;
 	u32		mmio_base;
 	unsigned long	size;
@@ -124,6 +125,7 @@ u32 intel_ring_get_seqno(struct drm_device *dev,
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
+int intel_init_blt_ring_buffer(struct drm_device *dev);
 
 u32 intel_ring_get_active_head(struct drm_device *dev,
 			       struct intel_ring_buffer *ring);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index e41c74facb6a..8c641bed9bbd 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -286,6 +286,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PAGEFLIPPING	 8
 #define I915_PARAM_HAS_EXECBUF2          9
 #define I915_PARAM_HAS_BSD		 10
+#define I915_PARAM_HAS_BLT		 11
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -627,8 +628,11 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/** This is a struct drm_clip_rect *cliprects */
 	__u64 cliprects_ptr;
+#define I915_EXEC_RING_MASK              (7<<0)
+#define I915_EXEC_DEFAULT                (0<<0)
 #define I915_EXEC_RENDER                 (1<<0)
-#define I915_EXEC_BSD                    (1<<1)
+#define I915_EXEC_BSD                    (2<<0)
+#define I915_EXEC_BLT                    (3<<0)
 	__u64 flags;
 	__u64 rsvd1;
 	__u64 rsvd2;
-- 
cgit v1.2.3


From d9027470b88631d0956ac37cdadfdeb9cdcf2c99 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@google.com>
Date: Tue, 25 May 2010 12:31:38 -0700
Subject: [libata] Add ATA transport class

This is a scheleton for libata transport class.
All information is read only, exporting information from libata:
- ata_port class: one per ATA port
- ata_link class: one per ATA port or 15 for SATA Port Multiplier
- ata_device class: up to 2 for PATA link, usually one for SATA.

Signed-off-by: Gwendal Grignou <gwendal@google.com>
Reviewed-by: Grant Grundler <grundler@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/ABI/testing/sysfs-ata |  99 +++++
 drivers/ata/Makefile                |   2 +-
 drivers/ata/libata-core.c           |  49 ++-
 drivers/ata/libata-eh.c             |  35 +-
 drivers/ata/libata-pmp.c            |  18 +-
 drivers/ata/libata-scsi.c           |  21 +-
 drivers/ata/libata-transport.c      | 773 ++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-transport.h      |  18 +
 drivers/ata/libata.h                |   7 +
 include/linux/libata.h              |   5 +
 10 files changed, 987 insertions(+), 40 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-ata
 create mode 100644 drivers/ata/libata-transport.c
 create mode 100644 drivers/ata/libata-transport.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata
new file mode 100644
index 000000000000..0a932155cbba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ata
@@ -0,0 +1,99 @@
+What:		/sys/class/ata_...
+Date:		August 2008
+Contact:	Gwendal Grignou<gwendal@google.com>
+Description:
+
+Provide a place in sysfs for storing the ATA topology of the system.  This allows
+retrieving various information about ATA objects.
+
+Files under /sys/class/ata_port
+-------------------------------
+
+	For each port, a directory ataX is created where X is the ata_port_id of
+	the port. The device parent is the ata host device.
+
+idle_irq (read)
+
+	Number of IRQ received by the port while idle [some ata HBA only].
+
+nr_pmp_links (read)
+
+	If a SATA Port Multiplier (PM) is connected, number of link behind it.
+
+Files under /sys/class/ata_link
+-------------------------------
+
+	Behind each port, there is a ata_link. If there is a SATA PM in the
+	topology, 15 ata_link objects are created.
+
+	If a link is behind a port, the directory name is linkX, where X is
+	ata_port_id of the port.
+	If a link is behind a PM, its name is linkX.Y where X is ata_port_id
+	of the parent port and Y the PM port.
+
+hw_sata_spd_limit
+
+	Maximum speed supported by the connected SATA device.
+
+sata_spd_limit
+
+	Maximum speed imposed by libata.
+
+sata_spd
+
+	Current speed of the link [1.5, 3Gps,...].
+
+Files under /sys/class/ata_device
+---------------------------------
+
+	Behind each link, up to two ata device are created.
+	The name of the directory is devX[.Y].Z where:
+	- X is ata_port_id of the port where the device is connected,
+	- Y the port of the PM if any, and
+	- Z the device id: for PATA, there is usually 2 devices [0,1],
+	only 1 for SATA.
+
+class
+	Device class. Can be "ata" for disk, "atapi" for packet device,
+	"pmp" for PM, or "none" if no device was found behind the link.
+
+dma_mode
+
+	Transfer modes supported by the device when in DMA mode.
+	Mostly used by PATA device.
+
+pio_mode
+
+	Transfer modes supported by the device when in PIO mode.
+	Mostly used by PATA device.
+
+xfer_mode
+
+	Current transfer mode.
+
+id
+
+	Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17.
+	Only valid if the device is not a PM.
+
+gscr
+
+	Cached result of the dump of PM GSCR register.
+	Valid registers are:
+	0: 	SATA_PMP_GSCR_PROD_ID,
+	1: 	SATA_PMP_GSCR_REV,
+	2: 	SATA_PMP_GSCR_PORT_INFO,
+	32:	SATA_PMP_GSCR_ERROR,
+	33:	SATA_PMP_GSCR_ERROR_EN,
+	64:	SATA_PMP_GSCR_FEAT,
+	96:	SATA_PMP_GSCR_FEAT_EN,
+	130:	SATA_PMP_GSCR_SII_GPIO
+	Only valid if the device is a PM.
+
+spdn_cnt
+
+	Number of time libata decided to lower the speed of link due to errors.
+
+ering
+
+	Formatted output of the error ring of the device.
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index d5df04a395ca..ccd461bb5ee4 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -99,7 +99,7 @@ obj-$(CONFIG_ATA_GENERIC)	+= ata_generic.o
 # Should be last libata driver
 obj-$(CONFIG_PATA_LEGACY)	+= pata_legacy.o
 
-libata-objs	:= libata-core.o libata-scsi.o libata-eh.o
+libata-objs	:= libata-core.o libata-scsi.o libata-eh.o libata-transport.o
 libata-$(CONFIG_ATA_SFF)	+= libata-sff.o
 libata-$(CONFIG_SATA_PMP)	+= libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)	+= libata-acpi.o
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 932eaee50245..4012b33e8b8a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -68,7 +68,7 @@
 #include <linux/ratelimit.h>
 
 #include "libata.h"
-
+#include "libata-transport.h"
 
 /* debounce timing parameters in msecs { interval, duration, timeout } */
 const unsigned long sata_deb_timing_normal[]		= {   5,  100, 2000 };
@@ -1017,7 +1017,7 @@ const char *ata_mode_string(unsigned long xfer_mask)
 	return "<n/a>";
 }
 
-static const char *sata_spd_string(unsigned int spd)
+const char *sata_spd_string(unsigned int spd)
 {
 	static const char * const spd_str[] = {
 		"1.5 Gbps",
@@ -5517,7 +5517,8 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp)
 	int i;
 
 	/* clear everything except for devices */
-	memset(link, 0, offsetof(struct ata_link, device[0]));
+	memset((void *)link + ATA_LINK_CLEAR_BEGIN, 0,
+	       ATA_LINK_CLEAR_END - ATA_LINK_CLEAR_BEGIN);
 
 	link->ap = ap;
 	link->pmp = pmp;
@@ -5591,7 +5592,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	ap = kzalloc(sizeof(*ap), GFP_KERNEL);
 	if (!ap)
 		return NULL;
-
+	
 	ap->pflags |= ATA_PFLAG_INITIALIZING;
 	ap->lock = &host->lock;
 	ap->print_id = -1;
@@ -6093,9 +6094,18 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 	for (i = 0; i < host->n_ports; i++)
 		host->ports[i]->print_id = ata_print_id++;
 
+	
+	/* Create associated sysfs transport objects  */
+	for (i = 0; i < host->n_ports; i++) {
+		rc = ata_tport_add(host->dev,host->ports[i]);
+		if (rc) {
+			goto err_tadd;
+		}
+	}
+
 	rc = ata_scsi_add_hosts(host, sht);
 	if (rc)
-		return rc;
+		goto err_tadd;
 
 	/* associate with ACPI nodes */
 	ata_acpi_associate(host);
@@ -6136,6 +6146,13 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 	}
 
 	return 0;
+
+ err_tadd:
+	while (--i >= 0) {
+		ata_tport_delete(host->ports[i]);
+	}
+	return rc;
+
 }
 
 /**
@@ -6226,6 +6243,13 @@ static void ata_port_detach(struct ata_port *ap)
 	cancel_rearming_delayed_work(&ap->hotplug_task);
 
  skip_eh:
+	if (ap->pmp_link) {
+		int i;
+		for (i = 0; i < SATA_PMP_MAX_PORTS; i++)
+			ata_tlink_delete(&ap->pmp_link[i]);
+	}
+	ata_tport_delete(ap);
+
 	/* remove the associated SCSI host */
 	scsi_remove_host(ap->scsi_host);
 }
@@ -6542,7 +6566,7 @@ static void __init ata_parse_force_param(void)
 
 static int __init ata_init(void)
 {
-	int rc = -ENOMEM;
+	int rc;
 
 	ata_parse_force_param();
 
@@ -6552,12 +6576,25 @@ static int __init ata_init(void)
 		return rc;
 	}
 
+	libata_transport_init();
+	ata_scsi_transport_template = ata_attach_transport();
+	if (!ata_scsi_transport_template) {
+		ata_sff_exit();
+		rc = -ENOMEM;
+		goto err_out;
+	}		
+
 	printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n");
 	return 0;
+
+err_out:
+	return rc;
 }
 
 static void __exit ata_exit(void)
 {
+	ata_release_transport(ata_scsi_transport_template);
+	libata_transport_exit();
 	ata_sff_exit();
 	kfree(ata_force_tbl);
 }
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index e48302eae55f..95838b382231 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -57,6 +57,7 @@ enum {
 	/* error flags */
 	ATA_EFLAG_IS_IO			= (1 << 0),
 	ATA_EFLAG_DUBIOUS_XFER		= (1 << 1),
+	ATA_EFLAG_OLD_ER                = (1 << 31),
 
 	/* error categories */
 	ATA_ECAT_NONE			= 0,
@@ -396,14 +397,9 @@ static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
 	return NULL;
 }
 
-static void ata_ering_clear(struct ata_ering *ering)
-{
-	memset(ering, 0, sizeof(*ering));
-}
-
-static int ata_ering_map(struct ata_ering *ering,
-			 int (*map_fn)(struct ata_ering_entry *, void *),
-			 void *arg)
+int ata_ering_map(struct ata_ering *ering,
+		  int (*map_fn)(struct ata_ering_entry *, void *),
+		  void *arg)
 {
 	int idx, rc = 0;
 	struct ata_ering_entry *ent;
@@ -422,6 +418,17 @@ static int ata_ering_map(struct ata_ering *ering,
 	return rc;
 }
 
+int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
+{
+	ent->eflags |= ATA_EFLAG_OLD_ER;
+	return 0;
+}
+
+static void ata_ering_clear(struct ata_ering *ering)
+{
+	ata_ering_map(ering, ata_ering_clear_cb, NULL);
+}
+
 static unsigned int ata_eh_dev_action(struct ata_device *dev)
 {
 	struct ata_eh_context *ehc = &dev->link->eh_context;
@@ -572,19 +579,19 @@ void ata_scsi_error(struct Scsi_Host *host)
 		int nr_timedout = 0;
 
 		spin_lock_irqsave(ap->lock, flags);
-		
+
 		/* This must occur under the ap->lock as we don't want
 		   a polled recovery to race the real interrupt handler
-		   
+
 		   The lost_interrupt handler checks for any completed but
 		   non-notified command and completes much like an IRQ handler.
-		   
+
 		   We then fall into the error recovery code which will treat
 		   this as if normal completion won the race */
 
 		if (ap->ops->lost_interrupt)
 			ap->ops->lost_interrupt(ap);
-			
+
 		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
 			struct ata_queued_cmd *qc;
 
@@ -628,7 +635,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		ap->eh_tries = ATA_EH_MAX_TRIES;
 	} else
 		spin_unlock_wait(ap->lock);
-		
+
 	/* If we timed raced normal completion and there is nothing to
 	   recover nr_timedout == 0 why exactly are we doing error recovery ? */
 
@@ -1755,7 +1762,7 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
 	struct speed_down_verdict_arg *arg = void_arg;
 	int cat;
 
-	if (ent->timestamp < arg->since)
+	if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
 		return -1;
 
 	cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index 224faabd7b7e..505470237d79 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -11,6 +11,7 @@
 #include <linux/libata.h>
 #include <linux/slab.h>
 #include "libata.h"
+#include "libata-transport.h"
 
 const struct ata_port_operations sata_pmp_port_ops = {
 	.inherits		= &sata_port_ops,
@@ -312,10 +313,10 @@ static int sata_pmp_configure(struct ata_device *dev, int print_info)
 	return rc;
 }
 
-static int sata_pmp_init_links(struct ata_port *ap, int nr_ports)
+static int sata_pmp_init_links (struct ata_port *ap, int nr_ports)
 {
 	struct ata_link *pmp_link = ap->pmp_link;
-	int i;
+	int i, err;
 
 	if (!pmp_link) {
 		pmp_link = kzalloc(sizeof(pmp_link[0]) * SATA_PMP_MAX_PORTS,
@@ -327,6 +328,13 @@ static int sata_pmp_init_links(struct ata_port *ap, int nr_ports)
 			ata_link_init(ap, &pmp_link[i], i);
 
 		ap->pmp_link = pmp_link;
+
+		for (i = 0; i < SATA_PMP_MAX_PORTS; i++) {
+			err = ata_tlink_add(&pmp_link[i]);
+			if (err) {
+				goto err_tlink;
+			}
+		}
 	}
 
 	for (i = 0; i < nr_ports; i++) {
@@ -339,6 +347,12 @@ static int sata_pmp_init_links(struct ata_port *ap, int nr_ports)
 	}
 
 	return 0;
+  err_tlink:
+	while (--i >= 0)
+		ata_tlink_delete(&pmp_link[i]);
+	kfree(pmp_link);
+	ap->pmp_link = NULL;
+	return err;
 }
 
 static void sata_pmp_quirks(struct ata_port *ap)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a89172c100f5..c16f5c151735 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -51,6 +51,7 @@
 #include <asm/unaligned.h>
 
 #include "libata.h"
+#include "libata-transport.h"
 
 #define SECTOR_SIZE		512
 #define ATA_SCSI_RBUF_SIZE	4096
@@ -64,9 +65,6 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
 					const struct scsi_device *scsidev);
 static struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
 					    const struct scsi_device *scsidev);
-static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
-			      unsigned int id, unsigned int lun);
-
 
 #define RW_RECOVERY_MPAGE 0x1
 #define RW_RECOVERY_MPAGE_LEN 12
@@ -106,17 +104,6 @@ static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = {
 	0, 30	/* extended self test time, see 05-359r1 */
 };
 
-/*
- * libata transport template.  libata doesn't do real transport stuff.
- * It just needs the eh_timed_out hook.
- */
-static struct scsi_transport_template ata_scsi_transport_template = {
-	.eh_strategy_handler	= ata_scsi_error,
-	.eh_timed_out		= ata_scsi_timed_out,
-	.user_scan		= ata_scsi_user_scan,
-};
-
-
 static const struct {
 	enum link_pm	value;
 	const char	*name;
@@ -3334,7 +3321,7 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
 		*(struct ata_port **)&shost->hostdata[0] = ap;
 		ap->scsi_host = shost;
 
-		shost->transportt = &ata_scsi_transport_template;
+		shost->transportt = ata_scsi_transport_template;
 		shost->unique_id = ap->print_id;
 		shost->max_id = 16;
 		shost->max_lun = 1;
@@ -3616,8 +3603,8 @@ void ata_scsi_hotplug(struct work_struct *work)
  *	RETURNS:
  *	Zero.
  */
-static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
-			      unsigned int id, unsigned int lun)
+int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
+		       unsigned int id, unsigned int lun)
 {
 	struct ata_port *ap = ata_shost_to_port(shost);
 	unsigned long flags;
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
new file mode 100644
index 000000000000..a80860f537e4
--- /dev/null
+++ b/drivers/ata/libata-transport.c
@@ -0,0 +1,773 @@
+/*
+ *  Copyright 2008 ioogle, Inc.  All rights reserved.
+ *	Released under GPL v2.
+ *
+ * Libata transport class.
+ *
+ * The ATA transport class contains common code to deal with ATA HBAs,
+ * an approximated representation of ATA topologies in the driver model,
+ * and various sysfs attributes to expose these topologies and management
+ * interfaces to user-space.
+ *
+ * There are 3 objects defined in in this class:
+ * - ata_port
+ * - ata_link
+ * - ata_device
+ * Each port has a link object. Each link can have up to two devices for PATA
+ * and generally one for SATA.
+ * If there is SATA port multiplier [PMP], 15 additional ata_link object are
+ * created.
+ *
+ * These objects are created when the ata host is initialized and when a PMP is
+ * found. They are removed only when the HBA is removed, cleaned before the
+ * error handler runs.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/spinlock.h>
+#include <scsi/scsi_transport.h>
+#include <linux/libata.h>
+#include <linux/hdreg.h>
+#include <linux/uaccess.h>
+
+#include "libata.h"
+#include "libata-transport.h"
+
+#define ATA_PORT_ATTRS		2
+#define ATA_LINK_ATTRS		3
+#define ATA_DEV_ATTRS		9
+
+struct scsi_transport_template;
+struct scsi_transport_template *ata_scsi_transport_template;
+
+struct ata_internal {
+	struct scsi_transport_template t;
+
+	struct device_attribute private_port_attrs[ATA_PORT_ATTRS];
+	struct device_attribute private_link_attrs[ATA_LINK_ATTRS];
+	struct device_attribute private_dev_attrs[ATA_DEV_ATTRS];
+
+	struct transport_container link_attr_cont;
+	struct transport_container dev_attr_cont;
+
+	/*
+	 * The array of null terminated pointers to attributes
+	 * needed by scsi_sysfs.c
+	 */
+	struct device_attribute *link_attrs[ATA_LINK_ATTRS + 1];
+	struct device_attribute *port_attrs[ATA_PORT_ATTRS + 1];
+	struct device_attribute *dev_attrs[ATA_DEV_ATTRS + 1];
+};
+#define to_ata_internal(tmpl)	container_of(tmpl, struct ata_internal, t)
+
+
+#define tdev_to_device(d)					\
+	container_of((d), struct ata_device, tdev)
+#define transport_class_to_dev(dev)				\
+	tdev_to_device((dev)->parent)
+
+#define tdev_to_link(d)						\
+	container_of((d), struct ata_link, tdev)
+#define transport_class_to_link(dev)				\
+	tdev_to_link((dev)->parent)
+
+#define tdev_to_port(d)						\
+	container_of((d), struct ata_port, tdev)
+#define transport_class_to_port(dev)				\
+	tdev_to_port((dev)->parent)
+
+
+/* Device objects are always created whit link objects */
+static int ata_tdev_add(struct ata_device *dev);
+static void ata_tdev_delete(struct ata_device *dev);
+
+
+/*
+ * Hack to allow attributes of the same name in different objects.
+ */
+#define ATA_DEVICE_ATTR(_prefix,_name,_mode,_show,_store) \
+	struct device_attribute device_attr_##_prefix##_##_name = \
+	__ATTR(_name,_mode,_show,_store)
+
+#define ata_bitfield_name_match(title, table)			\
+static ssize_t							\
+get_ata_##title##_names(u32 table_key, char *buf)		\
+{								\
+	char *prefix = "";					\
+	ssize_t len = 0;					\
+	int i;							\
+								\
+	for (i = 0; i < ARRAY_SIZE(table); i++) {		\
+		if (table[i].value & table_key) {		\
+			len += sprintf(buf + len, "%s%s",	\
+				prefix, table[i].name);		\
+			prefix = ", ";				\
+		}						\
+	}							\
+	len += sprintf(buf + len, "\n");			\
+	return len;						\
+}
+
+#define ata_bitfield_name_search(title, table)			\
+static ssize_t							\
+get_ata_##title##_names(u32 table_key, char *buf)		\
+{								\
+	ssize_t len = 0;					\
+	int i;							\
+								\
+	for (i = 0; i < ARRAY_SIZE(table); i++) {		\
+		if (table[i].value == table_key) {		\
+			len += sprintf(buf + len, "%s",		\
+				table[i].name);			\
+			break;					\
+		}						\
+	}							\
+	len += sprintf(buf + len, "\n");			\
+	return len;						\
+}
+
+static struct {
+	u32		value;
+	char		*name;
+} ata_class_names[] = {
+	{ ATA_DEV_UNKNOWN,		"unknown" },
+	{ ATA_DEV_ATA,			"ata" },
+	{ ATA_DEV_ATA_UNSUP,		"ata" },
+	{ ATA_DEV_ATAPI,		"atapi" },
+	{ ATA_DEV_ATAPI_UNSUP,		"atapi" },
+	{ ATA_DEV_PMP,			"pmp" },
+	{ ATA_DEV_PMP_UNSUP,		"pmp" },
+	{ ATA_DEV_SEMB,			"semb" },
+	{ ATA_DEV_SEMB_UNSUP,		"semb" },
+	{ ATA_DEV_NONE,			"none" }
+};
+ata_bitfield_name_search(class, ata_class_names)
+
+
+static struct {
+	u32		value;
+	char		*name;
+} ata_err_names[] = {
+	{ AC_ERR_DEV,			"DeviceError" },
+	{ AC_ERR_HSM,			"HostStateMachineError" },
+	{ AC_ERR_TIMEOUT,		"Timeout" },
+	{ AC_ERR_MEDIA,			"MediaError" },
+	{ AC_ERR_ATA_BUS,		"BusError" },
+	{ AC_ERR_HOST_BUS,		"HostBusError" },
+	{ AC_ERR_SYSTEM,		"SystemError" },
+	{ AC_ERR_INVALID,		"InvalidArg" },
+	{ AC_ERR_OTHER,			"Unknown" },
+	{ AC_ERR_NODEV_HINT,		"NoDeviceHint" },
+	{ AC_ERR_NCQ,		 	"NCQError" }
+};
+ata_bitfield_name_match(err, ata_err_names)
+
+static struct {
+	u32		value;
+	char		*name;
+} ata_xfer_names[] = {
+	{ XFER_UDMA_7,			"XFER_UDMA_7" },
+	{ XFER_UDMA_6,			"XFER_UDMA_6" },
+	{ XFER_UDMA_5,			"XFER_UDMA_5" },
+	{ XFER_UDMA_4,			"XFER_UDMA_4" },
+	{ XFER_UDMA_3,			"XFER_UDMA_3" },
+	{ XFER_UDMA_2,			"XFER_UDMA_2" },
+	{ XFER_UDMA_1,			"XFER_UDMA_1" },
+	{ XFER_UDMA_0,			"XFER_UDMA_0" },
+	{ XFER_MW_DMA_4,		"XFER_MW_DMA_4" },
+	{ XFER_MW_DMA_3,		"XFER_MW_DMA_3" },
+	{ XFER_MW_DMA_2,		"XFER_MW_DMA_2" },
+	{ XFER_MW_DMA_1,		"XFER_MW_DMA_1" },
+	{ XFER_MW_DMA_0,		"XFER_MW_DMA_0" },
+	{ XFER_SW_DMA_2,		"XFER_SW_DMA_2" },
+	{ XFER_SW_DMA_1,		"XFER_SW_DMA_1" },
+	{ XFER_SW_DMA_0,		"XFER_SW_DMA_0" },
+	{ XFER_PIO_6,			"XFER_PIO_6" },
+	{ XFER_PIO_5,			"XFER_PIO_5" },
+	{ XFER_PIO_4,			"XFER_PIO_4" },
+	{ XFER_PIO_3,			"XFER_PIO_3" },
+	{ XFER_PIO_2,			"XFER_PIO_2" },
+	{ XFER_PIO_1,			"XFER_PIO_1" },
+	{ XFER_PIO_0,			"XFER_PIO_0" },
+	{ XFER_PIO_SLOW,		"XFER_PIO_SLOW" }
+};
+ata_bitfield_name_match(xfer,ata_xfer_names)
+
+/*
+ * ATA Port attributes
+ */
+#define ata_port_show_simple(field, name, format_string, cast)		\
+static ssize_t								\
+show_ata_port_##name(struct device *dev,				\
+		     struct device_attribute *attr, char *buf)		\
+{									\
+	struct ata_port *ap = transport_class_to_port(dev);		\
+									\
+	return snprintf(buf, 20, format_string, cast ap->field);	\
+}
+
+#define ata_port_simple_attr(field, name, format_string, type)		\
+	ata_port_show_simple(field, name, format_string, (type))	\
+static DEVICE_ATTR(name, S_IRUGO, show_ata_port_##name, NULL)
+
+ata_port_simple_attr(nr_pmp_links, nr_pmp_links, "%d\n", int);
+ata_port_simple_attr(stats.idle_irq, idle_irq, "%ld\n", unsigned long);
+
+static DECLARE_TRANSPORT_CLASS(ata_port_class,
+			       "ata_port", NULL, NULL, NULL);
+
+static void ata_tport_release(struct device *dev)
+{
+	put_device(dev->parent);
+}
+
+/**
+ * ata_is_port --  check if a struct device represents a ATA port
+ * @dev:	device to check
+ *
+ * Returns:
+ *	%1 if the device represents a ATA Port, %0 else
+ */
+int ata_is_port(const struct device *dev)
+{
+	return dev->release == ata_tport_release;
+}
+
+static int ata_tport_match(struct attribute_container *cont,
+			   struct device *dev)
+{
+	if (!ata_is_port(dev))
+		return 0;
+	return &ata_scsi_transport_template->host_attrs.ac == cont;
+}
+
+/**
+ * ata_tport_delete  --  remove ATA PORT
+ * @port:	ATA PORT to remove
+ *
+ * Removes the specified ATA PORT.  Remove the associated link as well.
+ */
+void ata_tport_delete(struct ata_port *ap)
+{
+	struct device *dev = &ap->tdev;
+
+	ata_tlink_delete(&ap->link);
+
+	transport_remove_device(dev);
+	device_del(dev);
+	transport_destroy_device(dev);
+	put_device(dev);
+}
+
+/** ata_tport_add - initialize a transport ATA port structure
+ *
+ * @parent:	parent device
+ * @ap:		existing ata_port structure
+ *
+ * Initialize a ATA port structure for sysfs.  It will be added to the device
+ * tree below the device specified by @parent which could be a PCI device.
+ *
+ * Returns %0 on success
+ */
+int ata_tport_add(struct device *parent,
+		  struct ata_port *ap)
+{
+	int error;
+	struct device *dev = &ap->tdev;
+
+	device_initialize(dev);
+
+	dev->parent = get_device(parent);
+	dev->release = ata_tport_release;
+	dev_set_name(dev, "ata%d", ap->print_id);
+	transport_setup_device(dev);
+	error = device_add(dev);
+	if (error) {
+		goto tport_err;
+	}
+
+	transport_add_device(dev);
+	transport_configure_device(dev);
+
+	error = ata_tlink_add(&ap->link);
+	if (error) {
+		goto tport_link_err;
+	}
+	return 0;
+
+ tport_link_err:
+	transport_remove_device(dev);
+	device_del(dev);
+
+ tport_err:
+	transport_destroy_device(dev);
+	put_device(dev);
+	return error;
+}
+
+
+/*
+ * ATA link attributes
+ */
+
+
+#define ata_link_show_linkspeed(field)					\
+static ssize_t								\
+show_ata_link_##field(struct device *dev,				\
+		      struct device_attribute *attr, char *buf)		\
+{									\
+	struct ata_link *link = transport_class_to_link(dev);		\
+									\
+	return sprintf(buf,"%s\n", sata_spd_string(fls(link->field)));	\
+}
+
+#define ata_link_linkspeed_attr(field)					\
+	ata_link_show_linkspeed(field)					\
+static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL)
+
+ata_link_linkspeed_attr(hw_sata_spd_limit);
+ata_link_linkspeed_attr(sata_spd_limit);
+ata_link_linkspeed_attr(sata_spd);
+
+
+static DECLARE_TRANSPORT_CLASS(ata_link_class,
+		"ata_link", NULL, NULL, NULL);
+
+static void ata_tlink_release(struct device *dev)
+{
+	put_device(dev->parent);
+}
+
+/**
+ * ata_is_link --  check if a struct device represents a ATA link
+ * @dev:	device to check
+ *
+ * Returns:
+ *	%1 if the device represents a ATA link, %0 else
+ */
+int ata_is_link(const struct device *dev)
+{
+	return dev->release == ata_tlink_release;
+}
+
+static int ata_tlink_match(struct attribute_container *cont,
+			   struct device *dev)
+{
+	struct ata_internal* i = to_ata_internal(ata_scsi_transport_template);
+	if (!ata_is_link(dev))
+		return 0;
+	return &i->link_attr_cont.ac == cont;
+}
+
+/**
+ * ata_tlink_delete  --  remove ATA LINK
+ * @port:	ATA LINK to remove
+ *
+ * Removes the specified ATA LINK.  remove associated ATA device(s) as well.
+ */
+void ata_tlink_delete(struct ata_link *link)
+{
+	struct device *dev = &link->tdev;
+	struct ata_device *ata_dev;
+
+	ata_for_each_dev(ata_dev, link, ALL) {
+		ata_tdev_delete(ata_dev);
+	}
+
+	transport_remove_device(dev);
+	device_del(dev);
+	transport_destroy_device(dev);
+	put_device(dev);
+}
+
+/**
+ * ata_tlink_add  --  initialize a transport ATA link structure
+ * @link:	allocated ata_link structure.
+ *
+ * Initialize an ATA LINK structure for sysfs.  It will be added in the
+ * device tree below the ATA PORT it belongs to.
+ *
+ * Returns %0 on success
+ */
+int ata_tlink_add(struct ata_link *link)
+{
+	struct device *dev = &link->tdev;
+	struct ata_port *ap = link->ap;
+	struct ata_device *ata_dev;
+	int error;
+
+	device_initialize(dev);
+	dev->parent = get_device(&ap->tdev);
+	dev->release = ata_tlink_release;
+	if (ata_is_host_link(link))
+		dev_set_name(dev, "link%d", ap->print_id);
+        else
+		dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp);
+
+	transport_setup_device(dev);
+
+	error = device_add(dev);
+	if (error) {
+		goto tlink_err;
+	}
+
+	transport_add_device(dev);
+	transport_configure_device(dev);
+
+	ata_for_each_dev(ata_dev, link, ALL) {
+		error = ata_tdev_add(ata_dev);
+		if (error) {
+			goto tlink_dev_err;
+		}
+	}
+	return 0;
+  tlink_dev_err:
+	while (--ata_dev >= link->device) {
+		ata_tdev_delete(ata_dev);
+	}
+	transport_remove_device(dev);
+	device_del(dev);
+  tlink_err:
+	transport_destroy_device(dev);
+	put_device(dev);
+	return error;
+}
+
+/*
+ * ATA device attributes
+ */
+
+#define ata_dev_show_class(title, field)				\
+static ssize_t								\
+show_ata_dev_##field(struct device *dev,				\
+		     struct device_attribute *attr, char *buf)		\
+{									\
+	struct ata_device *ata_dev = transport_class_to_dev(dev);	\
+									\
+	return get_ata_##title##_names(ata_dev->field, buf);		\
+}
+
+#define ata_dev_attr(title, field)					\
+	ata_dev_show_class(title, field)				\
+static DEVICE_ATTR(field, S_IRUGO, show_ata_dev_##field, NULL)
+
+ata_dev_attr(class, class);
+ata_dev_attr(xfer, pio_mode);
+ata_dev_attr(xfer, dma_mode);
+ata_dev_attr(xfer, xfer_mode);
+
+
+#define ata_dev_show_simple(field, format_string, cast)		\
+static ssize_t								\
+show_ata_dev_##field(struct device *dev,				\
+		     struct device_attribute *attr, char *buf)		\
+{									\
+	struct ata_device *ata_dev = transport_class_to_dev(dev);	\
+									\
+	return snprintf(buf, 20, format_string, cast ata_dev->field);	\
+}
+
+#define ata_dev_simple_attr(field, format_string, type)	\
+	ata_dev_show_simple(field, format_string, (type))	\
+static DEVICE_ATTR(field, S_IRUGO, 			\
+		   show_ata_dev_##field, NULL)
+
+ata_dev_simple_attr(spdn_cnt, "%d\n", int);
+
+struct ata_show_ering_arg {
+	char* buf;
+	int written;
+};
+
+static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg)
+{
+	struct ata_show_ering_arg* arg = void_arg;
+	struct timespec time;
+
+	jiffies_to_timespec(ent->timestamp,&time);
+	arg->written += sprintf(arg->buf + arg->written,
+			       "[%5lu.%06lu]",
+			       time.tv_sec, time.tv_nsec);
+	arg->written += get_ata_err_names(ent->err_mask,
+					  arg->buf + arg->written);
+	return 0;
+}
+
+static ssize_t
+show_ata_dev_ering(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct ata_device *ata_dev = transport_class_to_dev(dev);
+	struct ata_show_ering_arg arg = { buf, 0 };
+
+	ata_ering_map(&ata_dev->ering, ata_show_ering, &arg);
+	return arg.written;
+}
+
+
+static DEVICE_ATTR(ering, S_IRUGO, show_ata_dev_ering, NULL);
+
+static ssize_t
+show_ata_dev_id(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct ata_device *ata_dev = transport_class_to_dev(dev);
+	int written = 0, i = 0;
+
+	if (ata_dev->class == ATA_DEV_PMP)
+		return 0;
+	for(i=0;i<ATA_ID_WORDS;i++)  {
+		written += snprintf(buf+written, 20, "%04x%c",
+				    ata_dev->id[i],
+				    ((i+1) & 7) ? ' ' : '\n');
+	}
+	return written;
+}
+
+static DEVICE_ATTR(id, S_IRUGO, show_ata_dev_id, NULL);
+
+static ssize_t
+show_ata_dev_gscr(struct device *dev,
+		  struct device_attribute *attr, char *buf)
+{
+	struct ata_device *ata_dev = transport_class_to_dev(dev);
+	int written = 0, i = 0;
+
+	if (ata_dev->class != ATA_DEV_PMP)
+		return 0;
+	for(i=0;i<SATA_PMP_GSCR_DWORDS;i++)  {
+		written += snprintf(buf+written, 20, "%08x%c",
+				    ata_dev->gscr[i],
+				    ((i+1) & 3) ? ' ' : '\n');
+	}
+	if (SATA_PMP_GSCR_DWORDS & 3)
+		buf[written-1] = '\n';
+	return written;
+}
+
+static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL);
+
+static DECLARE_TRANSPORT_CLASS(ata_dev_class,
+			       "ata_device", NULL, NULL, NULL);
+
+static void ata_tdev_release(struct device *dev)
+{
+	put_device(dev->parent);
+}
+
+/**
+ * ata_is_ata_dev  --  check if a struct device represents a ATA device
+ * @dev:	device to check
+ *
+ * Returns:
+ *	%1 if the device represents a ATA device, %0 else
+ */
+int ata_is_ata_dev(const struct device *dev)
+{
+	return dev->release == ata_tdev_release;
+}
+
+static int ata_tdev_match(struct attribute_container *cont,
+			  struct device *dev)
+{
+	struct ata_internal* i = to_ata_internal(ata_scsi_transport_template);
+	if (!ata_is_ata_dev(dev))
+		return 0;
+	return &i->dev_attr_cont.ac == cont;
+}
+
+/**
+ * ata_tdev_free  --  free a ATA LINK
+ * @dev:	ATA PHY to free
+ *
+ * Frees the specified ATA PHY.
+ *
+ * Note:
+ *   This function must only be called on a PHY that has not
+ *   successfully been added using ata_tdev_add().
+ */
+static void ata_tdev_free(struct ata_device *dev)
+{
+	transport_destroy_device(&dev->tdev);
+	put_device(&dev->tdev);
+}
+
+/**
+ * ata_tdev_delete  --  remove ATA device
+ * @port:	ATA PORT to remove
+ *
+ * Removes the specified ATA device.
+ */
+static void ata_tdev_delete(struct ata_device *ata_dev)
+{
+	struct device *dev = &ata_dev->tdev;
+
+	transport_remove_device(dev);
+	device_del(dev);
+	ata_tdev_free(ata_dev);
+}
+
+
+/**
+ * ata_tdev_add  --  initialize a transport ATA device structure.
+ * @ata_dev:	ata_dev structure.
+ *
+ * Initialize an ATA device structure for sysfs.  It will be added in the
+ * device tree below the ATA LINK device it belongs to.
+ *
+ * Returns %0 on success
+ */
+static int ata_tdev_add(struct ata_device *ata_dev)
+{
+	struct device *dev = &ata_dev->tdev;
+	struct ata_link *link = ata_dev->link;
+	struct ata_port *ap = link->ap;
+	int error;
+
+	device_initialize(dev);
+	dev->parent = get_device(&link->tdev);
+	dev->release = ata_tdev_release;
+	if (ata_is_host_link(link))
+		dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
+        else
+		dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp);
+
+	transport_setup_device(dev);
+	error = device_add(dev);
+	if (error) {
+		ata_tdev_free(ata_dev);
+		return error;
+	}
+
+	transport_add_device(dev);
+	transport_configure_device(dev);
+	return 0;
+}
+
+
+/*
+ * Setup / Teardown code
+ */
+
+#define SETUP_TEMPLATE(attrb, field, perm, test)			\
+	i->private_##attrb[count] = dev_attr_##field;		       	\
+	i->private_##attrb[count].attr.mode = perm;			\
+	i->attrb[count] = &i->private_##attrb[count];			\
+	if (test)							\
+		count++
+
+#define SETUP_LINK_ATTRIBUTE(field)					\
+	SETUP_TEMPLATE(link_attrs, field, S_IRUGO, 1)
+
+#define SETUP_PORT_ATTRIBUTE(field)					\
+	SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1)
+
+#define SETUP_DEV_ATTRIBUTE(field)					\
+	SETUP_TEMPLATE(dev_attrs, field, S_IRUGO, 1)
+
+/**
+ * ata_attach_transport  --  instantiate ATA transport template
+ */
+struct scsi_transport_template *ata_attach_transport(void)
+{
+	struct ata_internal *i;
+	int count;
+
+	i = kzalloc(sizeof(struct ata_internal), GFP_KERNEL);
+	if (!i)
+		return NULL;
+
+	i->t.eh_strategy_handler	= ata_scsi_error;
+	i->t.eh_timed_out		= ata_scsi_timed_out;
+	i->t.user_scan			= ata_scsi_user_scan;
+
+	i->t.host_attrs.ac.attrs = &i->port_attrs[0];
+	i->t.host_attrs.ac.class = &ata_port_class.class;
+	i->t.host_attrs.ac.match = ata_tport_match;
+	transport_container_register(&i->t.host_attrs);
+
+	i->link_attr_cont.ac.class = &ata_link_class.class;
+	i->link_attr_cont.ac.attrs = &i->link_attrs[0];
+	i->link_attr_cont.ac.match = ata_tlink_match;
+	transport_container_register(&i->link_attr_cont);
+
+	i->dev_attr_cont.ac.class = &ata_dev_class.class;
+	i->dev_attr_cont.ac.attrs = &i->dev_attrs[0];
+	i->dev_attr_cont.ac.match = ata_tdev_match;
+	transport_container_register(&i->dev_attr_cont);
+
+	count = 0;
+	SETUP_PORT_ATTRIBUTE(nr_pmp_links);
+	SETUP_PORT_ATTRIBUTE(idle_irq);
+	BUG_ON(count > ATA_PORT_ATTRS);
+	i->port_attrs[count] = NULL;
+
+	count = 0;
+	SETUP_LINK_ATTRIBUTE(hw_sata_spd_limit);
+	SETUP_LINK_ATTRIBUTE(sata_spd_limit);
+	SETUP_LINK_ATTRIBUTE(sata_spd);
+	BUG_ON(count > ATA_LINK_ATTRS);
+	i->link_attrs[count] = NULL;
+
+	count = 0;
+	SETUP_DEV_ATTRIBUTE(class);
+	SETUP_DEV_ATTRIBUTE(pio_mode);
+	SETUP_DEV_ATTRIBUTE(dma_mode);
+	SETUP_DEV_ATTRIBUTE(xfer_mode);
+	SETUP_DEV_ATTRIBUTE(spdn_cnt);
+	SETUP_DEV_ATTRIBUTE(ering);
+	SETUP_DEV_ATTRIBUTE(id);
+	SETUP_DEV_ATTRIBUTE(gscr);
+	BUG_ON(count > ATA_DEV_ATTRS);
+	i->dev_attrs[count] = NULL;
+
+	return &i->t;
+}
+
+/**
+ * ata_release_transport  --  release ATA transport template instance
+ * @t:		transport template instance
+ */
+void ata_release_transport(struct scsi_transport_template *t)
+{
+	struct ata_internal *i = to_ata_internal(t);
+
+	transport_container_unregister(&i->t.host_attrs);
+	transport_container_unregister(&i->link_attr_cont);
+	transport_container_unregister(&i->dev_attr_cont);
+
+	kfree(i);
+}
+
+__init int libata_transport_init(void)
+{
+	int error;
+
+	error = transport_class_register(&ata_link_class);
+	if (error)
+		goto out_unregister_transport;
+	error = transport_class_register(&ata_port_class);
+	if (error)
+		goto out_unregister_link;
+	error = transport_class_register(&ata_dev_class);
+	if (error)
+		goto out_unregister_port;
+	return 0;
+
+ out_unregister_port:
+	transport_class_unregister(&ata_port_class);
+ out_unregister_link:
+	transport_class_unregister(&ata_link_class);
+ out_unregister_transport:
+	return error;
+
+}
+
+void __exit libata_transport_exit(void)
+{
+	transport_class_unregister(&ata_link_class);
+	transport_class_unregister(&ata_port_class);
+	transport_class_unregister(&ata_dev_class);
+}
diff --git a/drivers/ata/libata-transport.h b/drivers/ata/libata-transport.h
new file mode 100644
index 000000000000..2820cf864f11
--- /dev/null
+++ b/drivers/ata/libata-transport.h
@@ -0,0 +1,18 @@
+#ifndef _LIBATA_TRANSPORT_H
+#define _LIBATA_TRANSPORT_H
+
+
+extern struct scsi_transport_template *ata_scsi_transport_template;
+
+int ata_tlink_add(struct ata_link *link);
+void ata_tlink_delete(struct ata_link *link);
+
+int ata_tport_add(struct device *parent, struct ata_port *ap);
+void ata_tport_delete(struct ata_port *ap);
+
+struct scsi_transport_template *ata_attach_transport(void);
+void ata_release_transport(struct scsi_transport_template *t);
+
+__init int libata_transport_init(void);
+void __exit libata_transport_exit(void);
+#endif
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 9ce1ecc63e39..142102b94df5 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -102,6 +102,7 @@ extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
 extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy);
 extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);
+extern const char *sata_spd_string(unsigned int spd);
 
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
@@ -137,6 +138,9 @@ extern void ata_scsi_hotplug(struct work_struct *work);
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
 extern void ata_scsi_dev_rescan(struct work_struct *work);
 extern int ata_bus_probe(struct ata_port *ap);
+extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
+			      unsigned int id, unsigned int lun);
+
 
 /* libata-eh.c */
 extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
@@ -164,6 +168,9 @@ extern int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 			  ata_postreset_fn_t postreset,
 			  struct ata_link **r_failed_disk);
 extern void ata_eh_finish(struct ata_port *ap);
+extern int ata_ering_map(struct ata_ering *ering,
+			 int (*map_fn)(struct ata_ering_entry *, void *),
+		  	 void *arg);
 
 /* libata-pmp.c */
 #ifdef CONFIG_SATA_PMP
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 45fb2967b66d..c50f66d4382c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -604,6 +604,7 @@ struct ata_device {
 	union acpi_object	*gtf_cache;
 	unsigned int		gtf_filter;
 #endif
+	struct device		tdev;
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
 	u64			n_sectors;	/* size of device, if ATA */
 	u64			n_native_sectors; /* native size, if ATA */
@@ -690,6 +691,7 @@ struct ata_link {
 	struct ata_port		*ap;
 	int			pmp;		/* port multiplier port # */
 
+	struct device		tdev;
 	unsigned int		active_tag;	/* active tag on this link */
 	u32			sactive;	/* active NCQ commands */
 
@@ -707,6 +709,8 @@ struct ata_link {
 
 	struct ata_device	device[ATA_MAX_DEVICES];
 };
+#define ATA_LINK_CLEAR_BEGIN		offsetof(struct ata_link, active_tag)
+#define ATA_LINK_CLEAR_END		offsetof(struct ata_link, device[0])
 
 struct ata_port {
 	struct Scsi_Host	*scsi_host; /* our co-allocated scsi host */
@@ -752,6 +756,7 @@ struct ata_port {
 	struct ata_port_stats	stats;
 	struct ata_host		*host;
 	struct device 		*dev;
+	struct device		tdev;
 
 	struct mutex		scsi_scan_mutex;
 	struct delayed_work	hotplug_task;
-- 
cgit v1.2.3


From 295124dce4ddfd40b1f12d3ffd2779673e87c701 Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@google.com>
Date: Tue, 17 Aug 2010 10:56:53 -0700
Subject: [libata] support for > 512 byte sectors (e.g. 4K Native)

This change enables my x86 machine to recognize and talk to a
"Native 4K" SATA device.

When I started working on this, I didn't know Matthew Wilcox had
posted a similar patch 2 years ago:
  http://git.kernel.org/?p=linux/kernel/git/willy/ata.git;a=shortlog;h=refs/heads/ata-large-sectors

Gwendal Grignou pointed me at the the above code and small portions of
this patch include Matthew's work. That's why Mathew is first on the
"Signed-off-by:". I've NOT included his use of a bitmap to determine
512 vs Native for ATA command block size - just used a simple table.
And bugs are almost certainly mine.

Lastly, the patch has been tested with a native 4K 'Engineering
Sample' drive provided by Hitachi GST.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Signed-off-by: Grant Grundler <grundler@google.com>
Reviewed-by: Gwendal Grignou <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c | 96 ++++++++++++++++++++++++++++++++---------------
 include/linux/ata.h       | 46 ++++++++++++++++++++---
 2 files changed, 105 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index c16f5c151735..f1c0118c6d4b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -53,7 +53,6 @@
 #include "libata.h"
 #include "libata-transport.h"
 
-#define SECTOR_SIZE		512
 #define ATA_SCSI_RBUF_SIZE	4096
 
 static DEFINE_SPINLOCK(ata_scsi_rbuf_lock);
@@ -503,7 +502,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	memset(scsi_cmd, 0, sizeof(scsi_cmd));
 
 	if (args[3]) {
-		argsize = SECTOR_SIZE * args[3];
+		argsize = ATA_SECT_SIZE * args[3];
 		argbuf = kmalloc(argsize, GFP_KERNEL);
 		if (argbuf == NULL) {
 			rc = -ENOMEM;
@@ -1137,8 +1136,9 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
 	} else {
 		/* ATA devices must be sector aligned */
+		sdev->sector_size = ata_id_logical_sector_size(dev->id);
 		blk_queue_update_dma_alignment(sdev->request_queue,
-					       ATA_SECT_SIZE - 1);
+					       sdev->sector_size - 1);
 		sdev->manage_start_stop = 1;
 	}
 
@@ -1153,6 +1153,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
 	}
 
+	dev->sdev = sdev;
 	return 0;
 }
 
@@ -1683,7 +1684,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 		goto nothing_to_do;
 
 	qc->flags |= ATA_QCFLAG_IO;
-	qc->nbytes = n_block * ATA_SECT_SIZE;
+	qc->nbytes = n_block * scmd->device->sector_size;
 
 	rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags,
 			     qc->tag);
@@ -2110,7 +2111,7 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
 
 static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 {
-	u32 min_io_sectors;
+	u16 min_io_sectors;
 
 	rbuf[1] = 0xb0;
 	rbuf[3] = 0x3c;		/* required VPD size with unmap support */
@@ -2122,10 +2123,7 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
 	 * logical than physical sector size we need to figure out what the
 	 * latter is.
 	 */
-	if (ata_id_has_large_logical_sectors(args->id))
-		min_io_sectors = ata_id_logical_per_physical_sectors(args->id);
-	else
-		min_io_sectors = 1;
+	min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id);
 	put_unaligned_be16(min_io_sectors, &rbuf[6]);
 
 	/*
@@ -2384,21 +2382,13 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 {
 	struct ata_device *dev = args->dev;
 	u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */
-	u8 log_per_phys = 0;
-	u16 lowest_aligned = 0;
-	u16 word_106 = dev->id[106];
-	u16 word_209 = dev->id[209];
-
-	if ((word_106 & 0xc000) == 0x4000) {
-		/* Number and offset of logical sectors per physical sector */
-		if (word_106 & (1 << 13))
-			log_per_phys = word_106 & 0xf;
-		if ((word_209 & 0xc000) == 0x4000) {
-			u16 first = dev->id[209] & 0x3fff;
-			if (first > 0)
-				lowest_aligned = (1 << log_per_phys) - first;
-		}
-	}
+	u32 sector_size; /* physical sector size in bytes */
+	u8 log2_per_phys;
+	u16 lowest_aligned;
+
+	sector_size = ata_id_logical_sector_size(dev->id);
+	log2_per_phys = ata_id_log2_per_physical_sector(dev->id);
+	lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys);
 
 	VPRINTK("ENTER\n");
 
@@ -2413,8 +2403,10 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 		rbuf[3] = last_lba;
 
 		/* sector size */
-		rbuf[6] = ATA_SECT_SIZE >> 8;
-		rbuf[7] = ATA_SECT_SIZE & 0xff;
+		rbuf[4] = sector_size >> (8 * 3);
+		rbuf[5] = sector_size >> (8 * 2);
+		rbuf[6] = sector_size >> (8 * 1);
+		rbuf[7] = sector_size;
 	} else {
 		/* sector count, 64-bit */
 		rbuf[0] = last_lba >> (8 * 7);
@@ -2427,11 +2419,13 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
 		rbuf[7] = last_lba;
 
 		/* sector size */
-		rbuf[10] = ATA_SECT_SIZE >> 8;
-		rbuf[11] = ATA_SECT_SIZE & 0xff;
+		rbuf[ 8] = sector_size >> (8 * 3);
+		rbuf[ 9] = sector_size >> (8 * 2);
+		rbuf[10] = sector_size >> (8 * 1);
+		rbuf[11] = sector_size;
 
 		rbuf[12] = 0;
-		rbuf[13] = log_per_phys;
+		rbuf[13] = log2_per_phys;
 		rbuf[14] = (lowest_aligned >> 8) & 0x3f;
 		rbuf[15] = lowest_aligned;
 
@@ -2875,9 +2869,8 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 	tf->device = dev->devno ?
 		tf->device | ATA_DEV1 : tf->device & ~ATA_DEV1;
 
-	/* READ/WRITE LONG use a non-standard sect_size */
-	qc->sect_size = ATA_SECT_SIZE;
 	switch (tf->command) {
+	/* READ/WRITE LONG use a non-standard sect_size */
 	case ATA_CMD_READ_LONG:
 	case ATA_CMD_READ_LONG_ONCE:
 	case ATA_CMD_WRITE_LONG:
@@ -2885,6 +2878,45 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 		if (tf->protocol != ATA_PROT_PIO || tf->nsect != 1)
 			goto invalid_fld;
 		qc->sect_size = scsi_bufflen(scmd);
+		break;
+
+	/* commands using reported Logical Block size (e.g. 512 or 4K) */
+	case ATA_CMD_CFA_WRITE_NE:
+	case ATA_CMD_CFA_TRANS_SECT:
+	case ATA_CMD_CFA_WRITE_MULT_NE:
+	/* XXX: case ATA_CMD_CFA_WRITE_SECTORS_WITHOUT_ERASE: */
+	case ATA_CMD_READ:
+	case ATA_CMD_READ_EXT:
+	case ATA_CMD_READ_QUEUED:
+	/* XXX: case ATA_CMD_READ_QUEUED_EXT: */
+	case ATA_CMD_FPDMA_READ:
+	case ATA_CMD_READ_MULTI:
+	case ATA_CMD_READ_MULTI_EXT:
+	case ATA_CMD_PIO_READ:
+	case ATA_CMD_PIO_READ_EXT:
+	case ATA_CMD_READ_STREAM_DMA_EXT:
+	case ATA_CMD_READ_STREAM_EXT:
+	case ATA_CMD_VERIFY:
+	case ATA_CMD_VERIFY_EXT:
+	case ATA_CMD_WRITE:
+	case ATA_CMD_WRITE_EXT:
+	case ATA_CMD_WRITE_FUA_EXT:
+	case ATA_CMD_WRITE_QUEUED:
+	case ATA_CMD_WRITE_QUEUED_FUA_EXT:
+	case ATA_CMD_FPDMA_WRITE:
+	case ATA_CMD_WRITE_MULTI:
+	case ATA_CMD_WRITE_MULTI_EXT:
+	case ATA_CMD_WRITE_MULTI_FUA_EXT:
+	case ATA_CMD_PIO_WRITE:
+	case ATA_CMD_PIO_WRITE_EXT:
+	case ATA_CMD_WRITE_STREAM_DMA_EXT:
+	case ATA_CMD_WRITE_STREAM_EXT:
+		qc->sect_size = scmd->device->sector_size;
+		break;
+
+	/* Everything else uses 512 byte "sectors" */
+	default:
+		qc->sect_size = ATA_SECT_SIZE;
 	}
 
 	/*
@@ -3380,6 +3412,8 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 			if (!IS_ERR(sdev)) {
 				dev->sdev = sdev;
 				scsi_device_put(sdev);
+			} else {
+				dev->sdev = NULL;
 			}
 		}
 	}
diff --git a/include/linux/ata.h b/include/linux/ata.h
index fe6e681a9d74..0c4929fa34d3 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,7 @@ enum {
 	ATA_ID_SPG		= 98,
 	ATA_ID_LBA_CAPACITY_2	= 100,
 	ATA_ID_SECTOR_SIZE	= 106,
+	ATA_ID_LOGICAL_SECTOR_SIZE	= 117,	/* and 118 */
 	ATA_ID_LAST_LUN		= 126,
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
@@ -640,16 +641,49 @@ static inline int ata_id_flush_ext_enabled(const u16 *id)
 	return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400;
 }
 
-static inline int ata_id_has_large_logical_sectors(const u16 *id)
+static inline u32 ata_id_logical_sector_size(const u16 *id)
 {
-	if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000)
-		return 0;
-	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
+	/* T13/1699-D Revision 6a, Sep 6, 2008. Page 128.
+	 * IDENTIFY DEVICE data, word 117-118.
+	 * 0xd000 ignores bit 13 (logical:physical > 1)
+	 */
+	if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000)
+		return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16)
+			 + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ;
+	return ATA_SECT_SIZE;
+}
+
+static inline u8 ata_id_log2_per_physical_sector(const u16 *id)
+{
+	/* T13/1699-D Revision 6a, Sep 6, 2008. Page 128.
+	 * IDENTIFY DEVICE data, word 106.
+	 * 0xe000 ignores bit 12 (logical sector > 512 bytes)
+	 */
+	if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000)
+		return (id[ATA_ID_SECTOR_SIZE] & 0xf);
+	return 0;
 }
 
-static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
+/* Offset of logical sectors relative to physical sectors.
+ *
+ * If device has more than one logical sector per physical sector
+ * (aka 512 byte emulation), vendors might offset the "sector 0" address
+ * so sector 63 is "naturally aligned" - e.g. FAT partition table.
+ * This avoids Read/Mod/Write penalties when using FAT partition table
+ * and updating "well aligned" (FS perspective) physical sectors on every
+ * transaction.
+ */
+static inline u16 ata_id_logical_sector_offset(const u16 *id,
+	 u8 log2_per_phys)
 {
-	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
+	u16 word_209 = id[209];
+
+	if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) {
+		u16 first = word_209 & 0x3fff;
+		if (first > 0)
+			return (1 << log2_per_phys) - first;
+	}
+	return 0;
 }
 
 static inline int ata_id_has_lba48(const u16 *id)
-- 
cgit v1.2.3


From c93b263e0d4fa8ce5fec0142a98196d1a127e845 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 1 Sep 2010 17:50:04 +0200
Subject: libata: clean up lpm related symbols and sysfs show/store functions

Link power management related symbols are in confusing state w/ mixed
usages of lpm, ipm and pm.  This patch cleans up lpm related symbols
and sysfs show/store functions as follows.

* lpm states - NOT_AVAILABLE, MIN_POWER, MAX_PERFORMANCE and
  MEDIUM_POWER are renamed to ATA_LPM_UNKNOWN and
  ATA_LPM_{MIN|MAX|MED}_POWER.

* Pre/postfixes are unified to lpm.

* sysfs show/store functions for link_power_management_policy were
  curiously named get/put and unnecessarily complex.  Renamed to
  show/store and simplified.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c          |  2 +-
 drivers/ata/ahci.h          |  2 +-
 drivers/ata/ahci_platform.c |  2 +-
 drivers/ata/libahci.c       | 20 +++++++-------
 drivers/ata/libata-core.c   | 50 +++++++++++++++++-----------------
 drivers/ata/libata-eh.c     |  2 +-
 drivers/ata/libata-scsi.c   | 66 +++++++++++++++------------------------------
 drivers/ata/libata.h        |  6 +++--
 include/linux/libata.h      | 29 ++++++++++----------
 9 files changed, 79 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 99d0e5a51148..68f7b4216501 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1209,7 +1209,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				   0x100 + ap->port_no * 0x80, "port");
 
 		/* set initial link pm policy */
-		ap->pm_policy = NOT_AVAILABLE;
+		ap->lpm_policy = ATA_LPM_UNKNOWN;
 
 		/* set enclosure management message type */
 		if (ap->flags & ATA_FLAG_EM)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index e5fdeebf9ef0..93867d3f8dd3 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -216,7 +216,7 @@ enum {
 	AHCI_FLAG_COMMON		= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
 					  ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
 					  ATA_FLAG_ACPI_SATA | ATA_FLAG_AN |
-					  ATA_FLAG_IPM,
+					  ATA_FLAG_LPM,
 
 	ICH_MAP				= 0x90, /* ICH MAP register */
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 84b643270e7a..07556853c0d6 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -130,7 +130,7 @@ static int __init ahci_probe(struct platform_device *pdev)
 		ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80);
 
 		/* set initial link pm policy */
-		ap->pm_policy = NOT_AVAILABLE;
+		ap->lpm_policy = ATA_LPM_UNKNOWN;
 
 		/* set enclosure management message type */
 		if (ap->flags & ATA_FLAG_EM)
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 8eea309ea212..ed7803f2b4f1 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -56,8 +56,7 @@ MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)
 module_param_named(ignore_sss, ahci_ignore_sss, int, 0444);
 MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)");
 
-static int ahci_enable_alpm(struct ata_port *ap,
-		enum link_pm policy);
+static int ahci_enable_alpm(struct ata_port *ap, enum ata_lpm_policy policy);
 static void ahci_disable_alpm(struct ata_port *ap);
 static ssize_t ahci_led_show(struct ata_port *ap, char *buf);
 static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
@@ -649,7 +648,7 @@ static void ahci_disable_alpm(struct ata_port *ap)
 	u32 cmd;
 	struct ahci_port_priv *pp = ap->private_data;
 
-	/* IPM bits should be disabled by libata-core */
+	/* LPM bits should be disabled by libata-core */
 	/* get the existing command bits */
 	cmd = readl(port_mmio + PORT_CMD);
 
@@ -691,8 +690,7 @@ static void ahci_disable_alpm(struct ata_port *ap)
 	 */
 }
 
-static int ahci_enable_alpm(struct ata_port *ap,
-	enum link_pm policy)
+static int ahci_enable_alpm(struct ata_port *ap, enum ata_lpm_policy policy)
 {
 	struct ahci_host_priv *hpriv = ap->host->private_data;
 	void __iomem *port_mmio = ahci_port_base(ap);
@@ -705,21 +703,21 @@ static int ahci_enable_alpm(struct ata_port *ap,
 		return -EINVAL;
 
 	switch (policy) {
-	case MAX_PERFORMANCE:
-	case NOT_AVAILABLE:
+	case ATA_LPM_MAX_POWER:
+	case ATA_LPM_UNKNOWN:
 		/*
-		 * if we came here with NOT_AVAILABLE,
+		 * if we came here with ATA_LPM_UNKNOWN,
 		 * it just means this is the first time we
 		 * have tried to enable - default to max performance,
 		 * and let the user go to lower power modes on request.
 		 */
 		ahci_disable_alpm(ap);
 		return 0;
-	case MIN_POWER:
+	case ATA_LPM_MIN_POWER:
 		/* configure HBA to enter SLUMBER */
 		asp = PORT_CMD_ASP;
 		break;
-	case MEDIUM_POWER:
+	case ATA_LPM_MED_POWER:
 		/* configure HBA to enter PARTIAL */
 		asp = 0;
 		break;
@@ -762,7 +760,7 @@ static int ahci_enable_alpm(struct ata_port *ap,
 	writel(cmd, port_mmio + PORT_CMD);
 	cmd = readl(port_mmio + PORT_CMD);
 
-	/* IPM bits should be set by libata-core */
+	/* LPM bits should be set by libata-core */
 	return 0;
 }
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 92cd5f375b8f..380ceb000aad 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1030,7 +1030,7 @@ const char *sata_spd_string(unsigned int spd)
 	return spd_str[spd - 1];
 }
 
-static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
+static int ata_dev_set_dipm(struct ata_device *dev, enum ata_lpm_policy policy)
 {
 	struct ata_link *link = dev->link;
 	struct ata_port *ap = link->ap;
@@ -1040,14 +1040,14 @@ static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
 
 	/*
 	 * disallow DIPM for drivers which haven't set
-	 * ATA_FLAG_IPM.  This is because when DIPM is enabled,
+	 * ATA_FLAG_LPM.  This is because when DIPM is enabled,
 	 * phy ready will be set in the interrupt status on
 	 * state changes, which will cause some drivers to
 	 * think there are errors - additionally drivers will
 	 * need to disable hot plug.
 	 */
-	if (!(ap->flags & ATA_FLAG_IPM) || !ata_dev_enabled(dev)) {
-		ap->pm_policy = NOT_AVAILABLE;
+	if (!(ap->flags & ATA_FLAG_LPM) || !ata_dev_enabled(dev)) {
+		ap->lpm_policy = ATA_LPM_UNKNOWN;
 		return -EINVAL;
 	}
 
@@ -1066,8 +1066,8 @@ static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
 		return rc;
 
 	switch (policy) {
-	case MIN_POWER:
-		/* no restrictions on IPM transitions */
+	case ATA_LPM_MIN_POWER:
+		/* no restrictions on LPM transitions */
 		scontrol &= ~(0x3 << 8);
 		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
 		if (rc)
@@ -1078,8 +1078,8 @@ static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
 			err_mask = ata_dev_set_feature(dev,
 					SETFEATURES_SATA_ENABLE, SATA_DIPM);
 		break;
-	case MEDIUM_POWER:
-		/* allow IPM to PARTIAL */
+	case ATA_LPM_MED_POWER:
+		/* allow LPM to PARTIAL */
 		scontrol &= ~(0x1 << 8);
 		scontrol |= (0x2 << 8);
 		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
@@ -1087,21 +1087,21 @@ static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
 			return rc;
 
 		/*
-		 * we don't have to disable DIPM since IPM flags
+		 * we don't have to disable DIPM since LPM flags
 		 * disallow transitions to SLUMBER, which effectively
 		 * disable DIPM if it does not support PARTIAL
 		 */
 		break;
-	case NOT_AVAILABLE:
-	case MAX_PERFORMANCE:
-		/* disable all IPM transitions */
+	case ATA_LPM_UNKNOWN:
+	case ATA_LPM_MAX_POWER:
+		/* disable all LPM transitions */
 		scontrol |= (0x3 << 8);
 		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
 		if (rc)
 			return rc;
 
 		/*
-		 * we don't have to disable DIPM since IPM flags
+		 * we don't have to disable DIPM since LPM flags
 		 * disallow all transitions which effectively
 		 * disable DIPM anyway.
 		 */
@@ -1125,9 +1125,9 @@ static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy)
  *	enabling Host Initiated Power management.
  *
  *	Locking: Caller.
- *	Returns: -EINVAL if IPM is not supported, 0 otherwise.
+ *	Returns: -EINVAL if LPM is not supported, 0 otherwise.
  */
-void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy)
+void ata_dev_enable_pm(struct ata_device *dev, enum ata_lpm_policy policy)
 {
 	int rc = 0;
 	struct ata_port *ap = dev->link->ap;
@@ -1141,9 +1141,9 @@ void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy)
 
 enable_pm_out:
 	if (rc)
-		ap->pm_policy = MAX_PERFORMANCE;
+		ap->lpm_policy = ATA_LPM_MAX_POWER;
 	else
-		ap->pm_policy = policy;
+		ap->lpm_policy = policy;
 	return /* rc */;	/* hopefully we can use 'rc' eventually */
 }
 
@@ -1164,15 +1164,15 @@ static void ata_dev_disable_pm(struct ata_device *dev)
 {
 	struct ata_port *ap = dev->link->ap;
 
-	ata_dev_set_dipm(dev, MAX_PERFORMANCE);
+	ata_dev_set_dipm(dev, ATA_LPM_MAX_POWER);
 	if (ap->ops->disable_pm)
 		ap->ops->disable_pm(ap);
 }
 #endif	/* CONFIG_PM */
 
-void ata_lpm_schedule(struct ata_port *ap, enum link_pm policy)
+void ata_lpm_schedule(struct ata_port *ap, enum ata_lpm_policy policy)
 {
-	ap->pm_policy = policy;
+	ap->lpm_policy = policy;
 	ap->link.eh_info.action |= ATA_EH_LPM;
 	ap->link.eh_info.flags |= ATA_EHI_NO_AUTOPSY;
 	ata_port_schedule_eh(ap);
@@ -1201,7 +1201,7 @@ static void ata_lpm_disable(struct ata_host *host)
 
 	for (i = 0; i < host->n_ports; i++) {
 		struct ata_port *ap = host->ports[i];
-		ata_lpm_schedule(ap, ap->pm_policy);
+		ata_lpm_schedule(ap, ap->lpm_policy);
 	}
 }
 #endif	/* CONFIG_PM */
@@ -2564,7 +2564,7 @@ int ata_dev_configure(struct ata_device *dev)
 	if (dev->flags & ATA_DFLAG_LBA48)
 		dev->max_sectors = ATA_MAX_SECTORS_LBA48;
 
-	if (!(dev->horkage & ATA_HORKAGE_IPM)) {
+	if (!(dev->horkage & ATA_HORKAGE_LPM)) {
 		if (ata_id_has_hipm(dev->id))
 			dev->flags |= ATA_DFLAG_HIPM;
 		if (ata_id_has_dipm(dev->id))
@@ -2591,11 +2591,11 @@ int ata_dev_configure(struct ata_device *dev)
 		dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128,
 					 dev->max_sectors);
 
-	if (ata_dev_blacklisted(dev) & ATA_HORKAGE_IPM) {
-		dev->horkage |= ATA_HORKAGE_IPM;
+	if (ata_dev_blacklisted(dev) & ATA_HORKAGE_LPM) {
+		dev->horkage |= ATA_HORKAGE_LPM;
 
 		/* reset link pm_policy for this port to no pm */
-		ap->pm_policy = MAX_PERFORMANCE;
+		ap->lpm_policy = ATA_LPM_MAX_POWER;
 	}
 
 	if (ap->ops->dev_config)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 95838b382231..96aa75ddf87f 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3570,7 +3570,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
 			ata_for_each_dev(dev, link, ALL)
-				ata_dev_enable_pm(dev, ap->pm_policy);
+				ata_dev_enable_pm(dev, ap->lpm_policy);
 
 		/* this link is okay now */
 		ehc->i.flags = 0;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index f1c0118c6d4b..aa56681f68db 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -103,72 +103,50 @@ static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = {
 	0, 30	/* extended self test time, see 05-359r1 */
 };
 
-static const struct {
-	enum link_pm	value;
-	const char	*name;
-} link_pm_policy[] = {
-	{ NOT_AVAILABLE, "max_performance" },
-	{ MIN_POWER, "min_power" },
-	{ MAX_PERFORMANCE, "max_performance" },
-	{ MEDIUM_POWER, "medium_power" },
+static const char *ata_lpm_policy_names[] = {
+	[ATA_LPM_UNKNOWN]	= "max_performance",
+	[ATA_LPM_MAX_POWER]	= "max_performance",
+	[ATA_LPM_MED_POWER]	= "medium_power",
+	[ATA_LPM_MIN_POWER]	= "min_power",
 };
 
-static const char *ata_scsi_lpm_get(enum link_pm policy)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(link_pm_policy); i++)
-		if (link_pm_policy[i].value == policy)
-			return link_pm_policy[i].name;
-
-	return NULL;
-}
-
-static ssize_t ata_scsi_lpm_put(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
+static ssize_t ata_scsi_lpm_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ata_port *ap = ata_shost_to_port(shost);
-	enum link_pm policy = 0;
-	int i;
+	enum ata_lpm_policy policy;
 
-	/*
-	 * we are skipping array location 0 on purpose - this
-	 * is because a value of NOT_AVAILABLE is displayed
-	 * to the user as max_performance, but when the user
-	 * writes "max_performance", they actually want the
-	 * value to match MAX_PERFORMANCE.
-	 */
-	for (i = 1; i < ARRAY_SIZE(link_pm_policy); i++) {
-		const int len = strlen(link_pm_policy[i].name);
-		if (strncmp(link_pm_policy[i].name, buf, len) == 0) {
-			policy = link_pm_policy[i].value;
+	/* UNKNOWN is internal state, iterate from MAX_POWER */
+	for (policy = ATA_LPM_MAX_POWER;
+	     policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) {
+		const char *name = ata_lpm_policy_names[policy];
+
+		if (strncmp(name, buf, strlen(name)) == 0)
 			break;
-		}
 	}
-	if (!policy)
+	if (policy == ARRAY_SIZE(ata_lpm_policy_names))
 		return -EINVAL;
 
 	ata_lpm_schedule(ap, policy);
 	return count;
 }
 
-static ssize_t
-ata_scsi_lpm_show(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t ata_scsi_lpm_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
 {
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ata_port *ap = ata_shost_to_port(shost);
-	const char *policy =
-		ata_scsi_lpm_get(ap->pm_policy);
 
-	if (!policy)
+	if (ap->lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
 		return -EINVAL;
 
-	return snprintf(buf, 23, "%s\n", policy);
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			ata_lpm_policy_names[ap->lpm_policy]);
 }
 DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
-		ata_scsi_lpm_show, ata_scsi_lpm_put);
+	    ata_scsi_lpm_show, ata_scsi_lpm_store);
 EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
 
 static ssize_t ata_scsi_park_show(struct device *device,
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 142102b94df5..1471462e3e3c 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -100,8 +100,10 @@ extern int sata_link_init_spd(struct ata_link *link);
 extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
-extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy);
-extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);
+extern void ata_dev_enable_pm(struct ata_device *dev,
+			      enum ata_lpm_policy policy);
+extern void ata_lpm_schedule(struct ata_port *ap,
+			     enum ata_lpm_policy policy);
 extern const char *sata_spd_string(unsigned int spd);
 
 /* libata-acpi.c */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c50f66d4382c..c5bdc90fd319 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -196,7 +196,7 @@ enum {
 	ATA_FLAG_ACPI_SATA	= (1 << 17), /* need native SATA ACPI layout */
 	ATA_FLAG_AN		= (1 << 18), /* controller supports AN */
 	ATA_FLAG_PMP		= (1 << 19), /* controller supports PMP */
-	ATA_FLAG_IPM		= (1 << 20), /* driver can handle IPM */
+	ATA_FLAG_LPM		= (1 << 20), /* driver can handle LPM */
 	ATA_FLAG_EM		= (1 << 21), /* driver supports enclosure
 					      * management */
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
@@ -377,7 +377,7 @@ enum {
 	ATA_HORKAGE_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
 	ATA_HORKAGE_DISABLE	= (1 << 5),	/* Disable it */
 	ATA_HORKAGE_HPA_SIZE	= (1 << 6),	/* native size off by one */
-	ATA_HORKAGE_IPM		= (1 << 7),	/* Link PM problems */
+	ATA_HORKAGE_LPM		= (1 << 7),	/* Link PM problems */
 	ATA_HORKAGE_IVB		= (1 << 8),	/* cbl det validity bit bugs */
 	ATA_HORKAGE_STUCK_ERR	= (1 << 9),	/* stuck ERR on next PACKET */
 	ATA_HORKAGE_BRIDGE_OK	= (1 << 10),	/* no bridge limits */
@@ -464,6 +464,17 @@ enum ata_completion_errors {
 	AC_ERR_NCQ		= (1 << 10), /* marker for offending NCQ qc */
 };
 
+/*
+ * Link power management policy: If you alter this, you also need to
+ * alter libata-scsi.c (for the ascii descriptions)
+ */
+enum ata_lpm_policy {
+	ATA_LPM_UNKNOWN,
+	ATA_LPM_MAX_POWER,
+	ATA_LPM_MED_POWER,
+	ATA_LPM_MIN_POWER,
+};
+
 /* forward declarations */
 struct scsi_device;
 struct ata_port_operations;
@@ -478,16 +489,6 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes,
 			      unsigned long deadline);
 typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes);
 
-/*
- * host pm policy: If you alter this, you also need to alter libata-scsi.c
- * (for the ascii descriptions)
- */
-enum link_pm {
-	NOT_AVAILABLE,
-	MIN_POWER,
-	MAX_PERFORMANCE,
-	MEDIUM_POWER,
-};
 extern struct device_attribute dev_attr_link_power_management_policy;
 extern struct device_attribute dev_attr_unload_heads;
 extern struct device_attribute dev_attr_em_message_type;
@@ -772,7 +773,7 @@ struct ata_port {
 
 	pm_message_t		pm_mesg;
 	int			*pm_result;
-	enum link_pm		pm_policy;
+	enum ata_lpm_policy	lpm_policy;
 
 	struct timer_list	fastdrain_timer;
 	unsigned long		fastdrain_cnt;
@@ -838,7 +839,7 @@ struct ata_port_operations {
 	int  (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val);
 	void (*pmp_attach)(struct ata_port *ap);
 	void (*pmp_detach)(struct ata_port *ap);
-	int  (*enable_pm)(struct ata_port *ap, enum link_pm policy);
+	int  (*enable_pm)(struct ata_port *ap, enum ata_lpm_policy policy);
 	void (*disable_pm)(struct ata_port *ap);
 
 	/*
-- 
cgit v1.2.3


From 1152b2617a6e1943b6b82e07c962950e56f1000c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 1 Sep 2010 17:50:05 +0200
Subject: libata: implement sata_link_scr_lpm() and make ata_dev_set_feature()
 global

Link power management is about to be reimplemented.  Prepare for it.

* Implement sata_link_scr_lpm().

* Drop static from ata_dev_set_feature() and make it available to
  other libata files.

* Trivial whitespace adjustments.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 75 +++++++++++++++++++++++++++++++++++++++++++----
 drivers/ata/libata.h      |  2 ++
 include/linux/libata.h    |  2 ++
 3 files changed, 74 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 380ceb000aad..b8024451234c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -91,8 +91,6 @@ const struct ata_port_operations sata_port_ops = {
 static unsigned int ata_dev_init_params(struct ata_device *dev,
 					u16 heads, u16 sectors);
 static unsigned int ata_dev_set_xfermode(struct ata_device *dev);
-static unsigned int ata_dev_set_feature(struct ata_device *dev,
-					u8 enable, u8 feature);
 static void ata_dev_xfermask(struct ata_device *dev);
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
 
@@ -3628,7 +3626,7 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
  *	@params: timing parameters { interval, duratinon, timeout } in msec
  *	@deadline: deadline jiffies for the operation
  *
-*	Make sure SStatus of @link reaches stable state, determined by
+ *	Make sure SStatus of @link reaches stable state, determined by
  *	holding the same value where DET is not 1 for @duration polled
  *	every @interval, before @timeout.  Timeout constraints the
  *	beginning of the stable state.  Because DET gets stuck at 1 on
@@ -3759,6 +3757,72 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
 	return rc != -EINVAL ? rc : 0;
 }
 
+/**
+ *	sata_link_scr_lpm - manipulate SControl IPM and SPM fields
+ *	@link: ATA link to manipulate SControl for
+ *	@policy: LPM policy to configure
+ *	@spm_wakeup: initiate LPM transition to active state
+ *
+ *	Manipulate the IPM field of the SControl register of @link
+ *	according to @policy.  If @policy is ATA_LPM_MAX_POWER and
+ *	@spm_wakeup is %true, the SPM field is manipulated to wake up
+ *	the link.  This function also clears PHYRDY_CHG before
+ *	returning.
+ *
+ *	LOCKING:
+ *	EH context.
+ *
+ *	RETURNS:
+ *	0 on succes, -errno otherwise.
+ */
+int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+		      bool spm_wakeup)
+{
+	struct ata_eh_context *ehc = &link->eh_context;
+	bool woken_up = false;
+	u32 scontrol;
+	int rc;
+
+	rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
+	if (rc)
+		return rc;
+
+	switch (policy) {
+	case ATA_LPM_MAX_POWER:
+		/* disable all LPM transitions */
+		scontrol |= (0x3 << 8);
+		/* initiate transition to active state */
+		if (spm_wakeup) {
+			scontrol |= (0x4 << 12);
+			woken_up = true;
+		}
+		break;
+	case ATA_LPM_MED_POWER:
+		/* allow LPM to PARTIAL */
+		scontrol &= ~(0x1 << 8);
+		scontrol |= (0x2 << 8);
+		break;
+	case ATA_LPM_MIN_POWER:
+		/* no restrictions on LPM transitions */
+		scontrol &= ~(0x3 << 8);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	rc = sata_scr_write(link, SCR_CONTROL, scontrol);
+	if (rc)
+		return rc;
+
+	/* give the link time to transit out of LPM state */
+	if (woken_up)
+		msleep(10);
+
+	/* clear PHYRDY_CHG from SError */
+	ehc->i.serror &= ~SERR_PHYRDY_CHG;
+	return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
+}
+
 /**
  *	ata_std_prereset - prepare for reset
  *	@link: ATA link to be reset
@@ -4551,6 +4615,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev)
 	DPRINTK("EXIT, err_mask=%x\n", err_mask);
 	return err_mask;
 }
+
 /**
  *	ata_dev_set_feature - Issue SET FEATURES - SATA FEATURES
  *	@dev: Device to which command will be sent
@@ -4566,8 +4631,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev)
  *	RETURNS:
  *	0 on success, AC_ERR_* mask otherwise.
  */
-static unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable,
-					u8 feature)
+unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature)
 {
 	struct ata_taskfile tf;
 	unsigned int err_mask;
@@ -6732,6 +6796,7 @@ EXPORT_SYMBOL_GPL(sata_set_spd);
 EXPORT_SYMBOL_GPL(ata_wait_after_reset);
 EXPORT_SYMBOL_GPL(sata_link_debounce);
 EXPORT_SYMBOL_GPL(sata_link_resume);
+EXPORT_SYMBOL_GPL(sata_link_scr_lpm);
 EXPORT_SYMBOL_GPL(ata_std_prereset);
 EXPORT_SYMBOL_GPL(sata_link_hardreset);
 EXPORT_SYMBOL_GPL(sata_std_hardreset);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 1471462e3e3c..65b6a73c3ac7 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -86,6 +86,8 @@ extern int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 extern int ata_dev_configure(struct ata_device *dev);
 extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit);
 extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
+extern unsigned int ata_dev_set_feature(struct ata_device *dev,
+					u8 enable, u8 feature);
 extern void ata_sg_clean(struct ata_queued_cmd *qc);
 extern void ata_qc_free(struct ata_queued_cmd *qc);
 extern void ata_qc_issue(struct ata_queued_cmd *qc);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c5bdc90fd319..7770eeb21039 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -952,6 +952,8 @@ extern int sata_link_debounce(struct ata_link *link,
 			const unsigned long *params, unsigned long deadline);
 extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
 			    unsigned long deadline);
+extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+			     bool spm_wakeup);
 extern int sata_link_hardreset(struct ata_link *link,
 			const unsigned long *timing, unsigned long deadline,
 			bool *online, int (*check_ready)(struct ata_link *));
-- 
cgit v1.2.3


From 6b7ae9545ad9875a289f4191c0216b473e313cb9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 1 Sep 2010 17:50:06 +0200
Subject: libata: reimplement link power management

The current LPM implementation has the following issues.

* Operation order isn't well thought-out.  e.g. HIPM should be
  configured after IPM in SControl is properly configured.  Not the
  other way around.

* Suspend/resume paths call ata_lpm_enable/disable() which must only
  be called from EH context directly.  Also, ata_lpm_enable/disable()
  were called whether LPM was in use or not.

* Implementation is per-port when it should be per-link.  As a result,
  it can't be used for controllers with slave links or PMP.

* LPM state isn't managed consistently.  After a link reset for
  whatever reason including suspend/resume the actual LPM state would
  be reset leaving ap->lpm_policy inconsistent.

* Generic/driver-specific logic boundary isn't clear.  Currently,
  libahci has to mangle stuff which libata EH proper should be
  handling.  This makes the implementation unnecessarily complex and
  fragile.

* Tied to ALPM.  Doesn't consider DIPM only cases and doesn't check
  whether the device allows HIPM.

* Error handling isn't implemented.

Given the extent of mismatch with the rest of libata, I don't think
trying to fix it piecewise makes much sense.  This patch reimplements
LPM support.

* The new implementation is per-link.  The target policy is still
  port-wide (ap->target_lpm_policy) but all the mechanisms and states
  are per-link and integrate well with the rest of link abstraction
  and can work with slave and PMP links.

* Core EH has proper control of LPM state.  LPM state is reconfigured
  when and only when reconfiguration is necessary.  It makes sure that
  LPM state is reset when probing for new device on the link.
  Controller agnostic logic is now implemented in libata EH proper and
  driver implementation only has to deal with controller specifics.

* Proper error handling.  LPM config failure is attributed to the
  device on the link and LPM is disabled for the link if it fails
  repeatedly.

* ops->enable/disable_pm() are replaced with single ops->set_lpm()
  which takes @policy and @hints.  This simplifies driver specific
  implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c          |   3 -
 drivers/ata/ahci.h          |   1 -
 drivers/ata/ahci_platform.c |   3 -
 drivers/ata/libahci.c       | 158 +++++++++-------------------------
 drivers/ata/libata-core.c   | 201 +-------------------------------------------
 drivers/ata/libata-eh.c     | 164 +++++++++++++++++++++++++++++++-----
 drivers/ata/libata-scsi.c   |  11 ++-
 drivers/ata/libata.h        |   4 -
 include/linux/libata.h      |  17 ++--
 9 files changed, 206 insertions(+), 356 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 68f7b4216501..328826381a2d 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1208,9 +1208,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		ata_port_pbar_desc(ap, AHCI_PCI_BAR,
 				   0x100 + ap->port_no * 0x80, "port");
 
-		/* set initial link pm policy */
-		ap->lpm_policy = ATA_LPM_UNKNOWN;
-
 		/* set enclosure management message type */
 		if (ap->flags & ATA_FLAG_EM)
 			ap->em_message_type = hpriv->em_msg_type;
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 93867d3f8dd3..1a2aacfdebc5 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -201,7 +201,6 @@ enum {
 	AHCI_HFLAG_MV_PATA		= (1 << 4), /* PATA port */
 	AHCI_HFLAG_NO_MSI		= (1 << 5), /* no PCI MSI */
 	AHCI_HFLAG_NO_PMP		= (1 << 6), /* no PMP */
-	AHCI_HFLAG_NO_HOTPLUG		= (1 << 7), /* ignore PxSERR.DIAG.N */
 	AHCI_HFLAG_SECT255		= (1 << 8), /* max 255 sectors */
 	AHCI_HFLAG_YES_NCQ		= (1 << 9), /* force NCQ cap on */
 	AHCI_HFLAG_NO_SUSPEND		= (1 << 10), /* don't suspend */
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 07556853c0d6..6fef1fa75c54 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -129,9 +129,6 @@ static int __init ahci_probe(struct platform_device *pdev)
 		ata_port_desc(ap, "mmio %pR", mem);
 		ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80);
 
-		/* set initial link pm policy */
-		ap->lpm_policy = ATA_LPM_UNKNOWN;
-
 		/* set enclosure management message type */
 		if (ap->flags & ATA_FLAG_EM)
 			ap->em_message_type = hpriv->em_msg_type;
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index ed7803f2b4f1..437f92597788 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -56,8 +56,8 @@ MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)
 module_param_named(ignore_sss, ahci_ignore_sss, int, 0444);
 MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)");
 
-static int ahci_enable_alpm(struct ata_port *ap, enum ata_lpm_policy policy);
-static void ahci_disable_alpm(struct ata_port *ap);
+static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+			unsigned hints);
 static ssize_t ahci_led_show(struct ata_port *ap, char *buf);
 static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
 			      size_t size);
@@ -163,8 +163,7 @@ struct ata_port_operations ahci_ops = {
 	.pmp_attach		= ahci_pmp_attach,
 	.pmp_detach		= ahci_pmp_detach,
 
-	.enable_pm		= ahci_enable_alpm,
-	.disable_pm		= ahci_disable_alpm,
+	.set_lpm		= ahci_set_lpm,
 	.em_show		= ahci_led_show,
 	.em_store		= ahci_led_store,
 	.sw_activity_show	= ahci_activity_show,
@@ -641,126 +640,56 @@ static void ahci_power_up(struct ata_port *ap)
 	writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD);
 }
 
-static void ahci_disable_alpm(struct ata_port *ap)
+static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+			unsigned int hints)
 {
+	struct ata_port *ap = link->ap;
 	struct ahci_host_priv *hpriv = ap->host->private_data;
-	void __iomem *port_mmio = ahci_port_base(ap);
-	u32 cmd;
 	struct ahci_port_priv *pp = ap->private_data;
-
-	/* LPM bits should be disabled by libata-core */
-	/* get the existing command bits */
-	cmd = readl(port_mmio + PORT_CMD);
-
-	/* disable ALPM and ASP */
-	cmd &= ~PORT_CMD_ASP;
-	cmd &= ~PORT_CMD_ALPE;
-
-	/* force the interface back to active */
-	cmd |= PORT_CMD_ICC_ACTIVE;
-
-	/* write out new cmd value */
-	writel(cmd, port_mmio + PORT_CMD);
-	cmd = readl(port_mmio + PORT_CMD);
-
-	/* wait 10ms to be sure we've come out of any low power state */
-	msleep(10);
-
-	/* clear out any PhyRdy stuff from interrupt status */
-	writel(PORT_IRQ_PHYRDY, port_mmio + PORT_IRQ_STAT);
-
-	/* go ahead and clean out PhyRdy Change from Serror too */
-	ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
-
-	/*
-	 * Clear flag to indicate that we should ignore all PhyRdy
-	 * state changes
-	 */
-	hpriv->flags &= ~AHCI_HFLAG_NO_HOTPLUG;
-
-	/*
-	 * Enable interrupts on Phy Ready.
-	 */
-	pp->intr_mask |= PORT_IRQ_PHYRDY;
-	writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
-
-	/*
-	 * don't change the link pm policy - we can be called
-	 * just to turn of link pm temporarily
-	 */
-}
-
-static int ahci_enable_alpm(struct ata_port *ap, enum ata_lpm_policy policy)
-{
-	struct ahci_host_priv *hpriv = ap->host->private_data;
 	void __iomem *port_mmio = ahci_port_base(ap);
-	u32 cmd;
-	struct ahci_port_priv *pp = ap->private_data;
-	u32 asp;
-
-	/* Make sure the host is capable of link power management */
-	if (!(hpriv->cap & HOST_CAP_ALPM))
-		return -EINVAL;
 
-	switch (policy) {
-	case ATA_LPM_MAX_POWER:
-	case ATA_LPM_UNKNOWN:
+	if (policy != ATA_LPM_MAX_POWER) {
 		/*
-		 * if we came here with ATA_LPM_UNKNOWN,
-		 * it just means this is the first time we
-		 * have tried to enable - default to max performance,
-		 * and let the user go to lower power modes on request.
+		 * Disable interrupts on Phy Ready. This keeps us from
+		 * getting woken up due to spurious phy ready
+		 * interrupts.
 		 */
-		ahci_disable_alpm(ap);
-		return 0;
-	case ATA_LPM_MIN_POWER:
-		/* configure HBA to enter SLUMBER */
-		asp = PORT_CMD_ASP;
-		break;
-	case ATA_LPM_MED_POWER:
-		/* configure HBA to enter PARTIAL */
-		asp = 0;
-		break;
-	default:
-		return -EINVAL;
+		pp->intr_mask &= ~PORT_IRQ_PHYRDY;
+		writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
+
+		sata_link_scr_lpm(link, policy, false);
 	}
 
-	/*
-	 * Disable interrupts on Phy Ready. This keeps us from
-	 * getting woken up due to spurious phy ready interrupts
-	 * TBD - Hot plug should be done via polling now, is
-	 * that even supported?
-	 */
-	pp->intr_mask &= ~PORT_IRQ_PHYRDY;
-	writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
+	if (hpriv->cap & HOST_CAP_ALPM) {
+		u32 cmd = readl(port_mmio + PORT_CMD);
 
-	/*
-	 * Set a flag to indicate that we should ignore all PhyRdy
-	 * state changes since these can happen now whenever we
-	 * change link state
-	 */
-	hpriv->flags |= AHCI_HFLAG_NO_HOTPLUG;
+		if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) {
+			cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE);
+			cmd |= PORT_CMD_ICC_ACTIVE;
 
-	/* get the existing command bits */
-	cmd = readl(port_mmio + PORT_CMD);
+			writel(cmd, port_mmio + PORT_CMD);
+			readl(port_mmio + PORT_CMD);
 
-	/*
-	 * Set ASP based on Policy
-	 */
-	cmd |= asp;
+			/* wait 10ms to be sure we've come out of LPM state */
+			msleep(10);
+		} else {
+			cmd |= PORT_CMD_ALPE;
+			if (policy == ATA_LPM_MIN_POWER)
+				cmd |= PORT_CMD_ASP;
 
-	/*
-	 * Setting this bit will instruct the HBA to aggressively
-	 * enter a lower power link state when it's appropriate and
-	 * based on the value set above for ASP
-	 */
-	cmd |= PORT_CMD_ALPE;
+			/* write out new cmd value */
+			writel(cmd, port_mmio + PORT_CMD);
+		}
+	}
 
-	/* write out new cmd value */
-	writel(cmd, port_mmio + PORT_CMD);
-	cmd = readl(port_mmio + PORT_CMD);
+	if (policy == ATA_LPM_MAX_POWER) {
+		sata_link_scr_lpm(link, policy, false);
+
+		/* turn PHYRDY IRQ back on */
+		pp->intr_mask |= PORT_IRQ_PHYRDY;
+		writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
+	}
 
-	/* LPM bits should be set by libata-core */
 	return 0;
 }
 
@@ -1658,15 +1587,10 @@ static void ahci_port_intr(struct ata_port *ap)
 	if (unlikely(resetting))
 		status &= ~PORT_IRQ_BAD_PMP;
 
-	/* If we are getting PhyRdy, this is
-	 * just a power state change, we should
-	 * clear out this, plus the PhyRdy/Comm
-	 * Wake bits from Serror
-	 */
-	if ((hpriv->flags & AHCI_HFLAG_NO_HOTPLUG) &&
-		(status & PORT_IRQ_PHYRDY)) {
+	/* if LPM is enabled, PHYRDY doesn't mean anything */
+	if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) {
 		status &= ~PORT_IRQ_PHYRDY;
-		ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
+		ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG);
 	}
 
 	if (unlikely(status & PORT_IRQ_ERROR)) {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b8024451234c..7c5538b9fa3b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1028,182 +1028,6 @@ const char *sata_spd_string(unsigned int spd)
 	return spd_str[spd - 1];
 }
 
-static int ata_dev_set_dipm(struct ata_device *dev, enum ata_lpm_policy policy)
-{
-	struct ata_link *link = dev->link;
-	struct ata_port *ap = link->ap;
-	u32 scontrol;
-	unsigned int err_mask;
-	int rc;
-
-	/*
-	 * disallow DIPM for drivers which haven't set
-	 * ATA_FLAG_LPM.  This is because when DIPM is enabled,
-	 * phy ready will be set in the interrupt status on
-	 * state changes, which will cause some drivers to
-	 * think there are errors - additionally drivers will
-	 * need to disable hot plug.
-	 */
-	if (!(ap->flags & ATA_FLAG_LPM) || !ata_dev_enabled(dev)) {
-		ap->lpm_policy = ATA_LPM_UNKNOWN;
-		return -EINVAL;
-	}
-
-	/*
-	 * For DIPM, we will only enable it for the
-	 * min_power setting.
-	 *
-	 * Why?  Because Disks are too stupid to know that
-	 * If the host rejects a request to go to SLUMBER
-	 * they should retry at PARTIAL, and instead it
-	 * just would give up.  So, for medium_power to
-	 * work at all, we need to only allow HIPM.
-	 */
-	rc = sata_scr_read(link, SCR_CONTROL, &scontrol);
-	if (rc)
-		return rc;
-
-	switch (policy) {
-	case ATA_LPM_MIN_POWER:
-		/* no restrictions on LPM transitions */
-		scontrol &= ~(0x3 << 8);
-		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
-		if (rc)
-			return rc;
-
-		/* enable DIPM */
-		if (dev->flags & ATA_DFLAG_DIPM)
-			err_mask = ata_dev_set_feature(dev,
-					SETFEATURES_SATA_ENABLE, SATA_DIPM);
-		break;
-	case ATA_LPM_MED_POWER:
-		/* allow LPM to PARTIAL */
-		scontrol &= ~(0x1 << 8);
-		scontrol |= (0x2 << 8);
-		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
-		if (rc)
-			return rc;
-
-		/*
-		 * we don't have to disable DIPM since LPM flags
-		 * disallow transitions to SLUMBER, which effectively
-		 * disable DIPM if it does not support PARTIAL
-		 */
-		break;
-	case ATA_LPM_UNKNOWN:
-	case ATA_LPM_MAX_POWER:
-		/* disable all LPM transitions */
-		scontrol |= (0x3 << 8);
-		rc = sata_scr_write(link, SCR_CONTROL, scontrol);
-		if (rc)
-			return rc;
-
-		/*
-		 * we don't have to disable DIPM since LPM flags
-		 * disallow all transitions which effectively
-		 * disable DIPM anyway.
-		 */
-		break;
-	}
-
-	/* FIXME: handle SET FEATURES failure */
-	(void) err_mask;
-
-	return 0;
-}
-
-/**
- *	ata_dev_enable_pm - enable SATA interface power management
- *	@dev:  device to enable power management
- *	@policy: the link power management policy
- *
- *	Enable SATA Interface power management.  This will enable
- *	Device Interface Power Management (DIPM) for min_power
- * 	policy, and then call driver specific callbacks for
- *	enabling Host Initiated Power management.
- *
- *	Locking: Caller.
- *	Returns: -EINVAL if LPM is not supported, 0 otherwise.
- */
-void ata_dev_enable_pm(struct ata_device *dev, enum ata_lpm_policy policy)
-{
-	int rc = 0;
-	struct ata_port *ap = dev->link->ap;
-
-	/* set HIPM first, then DIPM */
-	if (ap->ops->enable_pm)
-		rc = ap->ops->enable_pm(ap, policy);
-	if (rc)
-		goto enable_pm_out;
-	rc = ata_dev_set_dipm(dev, policy);
-
-enable_pm_out:
-	if (rc)
-		ap->lpm_policy = ATA_LPM_MAX_POWER;
-	else
-		ap->lpm_policy = policy;
-	return /* rc */;	/* hopefully we can use 'rc' eventually */
-}
-
-#ifdef CONFIG_PM
-/**
- *	ata_dev_disable_pm - disable SATA interface power management
- *	@dev: device to disable power management
- *
- *	Disable SATA Interface power management.  This will disable
- *	Device Interface Power Management (DIPM) without changing
- * 	policy,  call driver specific callbacks for disabling Host
- * 	Initiated Power management.
- *
- *	Locking: Caller.
- *	Returns: void
- */
-static void ata_dev_disable_pm(struct ata_device *dev)
-{
-	struct ata_port *ap = dev->link->ap;
-
-	ata_dev_set_dipm(dev, ATA_LPM_MAX_POWER);
-	if (ap->ops->disable_pm)
-		ap->ops->disable_pm(ap);
-}
-#endif	/* CONFIG_PM */
-
-void ata_lpm_schedule(struct ata_port *ap, enum ata_lpm_policy policy)
-{
-	ap->lpm_policy = policy;
-	ap->link.eh_info.action |= ATA_EH_LPM;
-	ap->link.eh_info.flags |= ATA_EHI_NO_AUTOPSY;
-	ata_port_schedule_eh(ap);
-}
-
-#ifdef CONFIG_PM
-static void ata_lpm_enable(struct ata_host *host)
-{
-	struct ata_link *link;
-	struct ata_port *ap;
-	struct ata_device *dev;
-	int i;
-
-	for (i = 0; i < host->n_ports; i++) {
-		ap = host->ports[i];
-		ata_for_each_link(link, ap, EDGE) {
-			ata_for_each_dev(dev, link, ALL)
-				ata_dev_disable_pm(dev);
-		}
-	}
-}
-
-static void ata_lpm_disable(struct ata_host *host)
-{
-	int i;
-
-	for (i = 0; i < host->n_ports; i++) {
-		struct ata_port *ap = host->ports[i];
-		ata_lpm_schedule(ap, ap->lpm_policy);
-	}
-}
-#endif	/* CONFIG_PM */
-
 /**
  *	ata_dev_classify - determine device type based on ATA-spec signature
  *	@tf: ATA taskfile register set for device to be identified
@@ -2562,13 +2386,6 @@ int ata_dev_configure(struct ata_device *dev)
 	if (dev->flags & ATA_DFLAG_LBA48)
 		dev->max_sectors = ATA_MAX_SECTORS_LBA48;
 
-	if (!(dev->horkage & ATA_HORKAGE_LPM)) {
-		if (ata_id_has_hipm(dev->id))
-			dev->flags |= ATA_DFLAG_HIPM;
-		if (ata_id_has_dipm(dev->id))
-			dev->flags |= ATA_DFLAG_DIPM;
-	}
-
 	/* Limit PATA drive on SATA cable bridge transfers to udma5,
 	   200 sectors */
 	if (ata_dev_knobble(dev)) {
@@ -2589,13 +2406,6 @@ int ata_dev_configure(struct ata_device *dev)
 		dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128,
 					 dev->max_sectors);
 
-	if (ata_dev_blacklisted(dev) & ATA_HORKAGE_LPM) {
-		dev->horkage |= ATA_HORKAGE_LPM;
-
-		/* reset link pm_policy for this port to no pm */
-		ap->lpm_policy = ATA_LPM_MAX_POWER;
-	}
-
 	if (ap->ops->dev_config)
 		ap->ops->dev_config(dev);
 
@@ -5494,12 +5304,6 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
 	unsigned int ehi_flags = ATA_EHI_QUIET;
 	int rc;
 
-	/*
-	 * disable link pm on all ports before requesting
-	 * any pm activity
-	 */
-	ata_lpm_enable(host);
-
 	/*
 	 * On some hardware, device fails to respond after spun down
 	 * for suspend.  As the device won't be used before being
@@ -5533,9 +5337,6 @@ void ata_host_resume(struct ata_host *host)
 	ata_host_request_pm(host, PMSG_ON, ATA_EH_RESET,
 			    ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0);
 	host->dev->power.power_state = PMSG_ON;
-
-	/* reenable link pm */
-	ata_lpm_disable(host);
 }
 #endif
 
@@ -6096,7 +5897,7 @@ static void async_port_probe(void *data, async_cookie_t cookie)
 		spin_lock_irqsave(ap->lock, flags);
 
 		ehi->probe_mask |= ATA_ALL_DEVICES;
-		ehi->action |= ATA_EH_RESET | ATA_EH_LPM;
+		ehi->action |= ATA_EH_RESET;
 		ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
 
 		ap->pflags &= ~ATA_PFLAG_INITIALIZING;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 96aa75ddf87f..a645cd3ab163 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1580,9 +1580,9 @@ static void ata_eh_analyze_serror(struct ata_link *link)
 	 * host links.  For disabled PMP links, only N bit is
 	 * considered as X bit is left at 1 for link plugging.
 	 */
-	hotplug_mask = 0;
-
-	if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
+	if (link->lpm_policy != ATA_LPM_MAX_POWER)
+		hotplug_mask = 0;	/* hotplug doesn't work w/ LPM */
+	else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
 		hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
 	else
 		hotplug_mask = SERR_PHYRDY_CHG;
@@ -2784,8 +2784,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
 	ata_eh_done(link, NULL, ATA_EH_RESET);
 	if (slave)
 		ata_eh_done(slave, NULL, ATA_EH_RESET);
-	ehc->last_reset = jiffies;	/* update to completion time */
+	ehc->last_reset = jiffies;		/* update to completion time */
 	ehc->i.action |= ATA_EH_REVALIDATE;
+	link->lpm_policy = ATA_LPM_UNKNOWN;	/* reset LPM state */
 
 	rc = 0;
  out:
@@ -3211,6 +3212,121 @@ static int ata_eh_maybe_retry_flush(struct ata_device *dev)
 	return rc;
 }
 
+/**
+ *	ata_eh_set_lpm - configure SATA interface power management
+ *	@link: link to configure power management
+ *	@policy: the link power management policy
+ *	@r_failed_dev: out parameter for failed device
+ *
+ *	Enable SATA Interface power management.  This will enable
+ *	Device Interface Power Management (DIPM) for min_power
+ * 	policy, and then call driver specific callbacks for
+ *	enabling Host Initiated Power management.
+ *
+ *	LOCKING:
+ *	EH context.
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+			  struct ata_device **r_failed_dev)
+{
+	struct ata_port *ap = link->ap;
+	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
+	unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
+	unsigned int err_mask;
+	int rc;
+
+	/* if the link or host doesn't do LPM, noop */
+	if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
+		return 0;
+
+	/*
+	 * DIPM is enabled only for MIN_POWER as some devices
+	 * misbehave when the host NACKs transition to SLUMBER.  Order
+	 * device and link configurations such that the host always
+	 * allows DIPM requests.
+	 */
+	ata_for_each_dev(dev, link, ENABLED) {
+		bool hipm = ata_id_has_hipm(dev->id);
+		bool dipm = ata_id_has_dipm(dev->id);
+
+		/* find the first enabled and LPM enabled devices */
+		if (!link_dev)
+			link_dev = dev;
+
+		if (!lpm_dev && (hipm || dipm))
+			lpm_dev = dev;
+
+		hints &= ~ATA_LPM_EMPTY;
+		if (!hipm)
+			hints &= ~ATA_LPM_HIPM;
+
+		/* disable DIPM before changing link config */
+		if (policy != ATA_LPM_MIN_POWER && dipm) {
+			err_mask = ata_dev_set_feature(dev,
+					SETFEATURES_SATA_DISABLE, SATA_DIPM);
+			if (err_mask && err_mask != AC_ERR_DEV) {
+				ata_dev_printk(dev, KERN_WARNING,
+					"failed to disable DIPM, Emask 0x%x\n",
+					err_mask);
+				rc = -EIO;
+				goto fail;
+			}
+		}
+	}
+
+	rc = ap->ops->set_lpm(link, policy, hints);
+	if (!rc && ap->slave_link)
+		rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
+
+	/*
+	 * Attribute link config failure to the first (LPM) enabled
+	 * device on the link.
+	 */
+	if (rc) {
+		if (rc == -EOPNOTSUPP) {
+			link->flags |= ATA_LFLAG_NO_LPM;
+			return 0;
+		}
+		dev = lpm_dev ? lpm_dev : link_dev;
+		goto fail;
+	}
+
+	/* host config updated, enable DIPM if transitioning to MIN_POWER */
+	ata_for_each_dev(dev, link, ENABLED) {
+		if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
+			err_mask = ata_dev_set_feature(dev,
+					SETFEATURES_SATA_ENABLE, SATA_DIPM);
+			if (err_mask && err_mask != AC_ERR_DEV) {
+				ata_dev_printk(dev, KERN_WARNING,
+					"failed to enable DIPM, Emask 0x%x\n",
+					err_mask);
+				rc = -EIO;
+				goto fail;
+			}
+		}
+	}
+
+	link->lpm_policy = policy;
+	if (ap && ap->slave_link)
+		ap->slave_link->lpm_policy = policy;
+	return 0;
+
+fail:
+	/* if no device or only one more chance is left, disable LPM */
+	if (!dev || ehc->tries[dev->devno] <= 2) {
+		ata_link_printk(link, KERN_WARNING,
+				"disabling LPM on the link\n");
+		link->flags |= ATA_LFLAG_NO_LPM;
+	}
+	if (r_failed_dev)
+		*r_failed_dev = dev;
+	return rc;
+}
+
 static int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
@@ -3295,6 +3411,10 @@ static int ata_eh_schedule_probe(struct ata_device *dev)
 	ehc->saved_xfer_mode[dev->devno] = 0;
 	ehc->saved_ncq_enabled &= ~(1 << dev->devno);
 
+	/* the link maybe in a deep sleep, wake it up */
+	if (link->lpm_policy > ATA_LPM_MAX_POWER)
+		link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, ATA_LPM_EMPTY);
+
 	/* Record and count probe trials on the ering.  The specific
 	 * error mask used is irrelevant.  Because a successful device
 	 * detection clears the ering, this count accumulates only if
@@ -3396,8 +3516,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 {
 	struct ata_link *link;
 	struct ata_device *dev;
-	int nr_failed_devs;
-	int rc;
+	int rc, nr_fails;
 	unsigned long flags, deadline;
 
 	DPRINTK("ENTER\n");
@@ -3438,7 +3557,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
  retry:
 	rc = 0;
-	nr_failed_devs = 0;
 
 	/* if UNLOADING, finish immediately */
 	if (ap->pflags & ATA_PFLAG_UNLOADING)
@@ -3523,13 +3641,17 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	}
 
 	/* the rest */
-	ata_for_each_link(link, ap, EDGE) {
+	nr_fails = 0;
+	ata_for_each_link(link, ap, PMP_FIRST) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
+		if (sata_pmp_attached(ap) && ata_is_host_link(link))
+			goto config_lpm;
+
 		/* revalidate existing devices and attach new ones */
 		rc = ata_eh_revalidate_and_attach(link, &dev);
 		if (rc)
-			goto dev_fail;
+			goto rest_fail;
 
 		/* if PMP got attached, return, pmp EH will take care of it */
 		if (link->device->class == ATA_DEV_PMP) {
@@ -3541,7 +3663,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		if (ehc->i.flags & ATA_EHI_SETMODE) {
 			rc = ata_set_mode(link, &dev);
 			if (rc)
-				goto dev_fail;
+				goto rest_fail;
 			ehc->i.flags &= ~ATA_EHI_SETMODE;
 		}
 
@@ -3554,7 +3676,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 					continue;
 				rc = atapi_eh_clear_ua(dev);
 				if (rc)
-					goto dev_fail;
+					goto rest_fail;
 			}
 		}
 
@@ -3564,21 +3686,25 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 				continue;
 			rc = ata_eh_maybe_retry_flush(dev);
 			if (rc)
-				goto dev_fail;
+				goto rest_fail;
 		}
 
+	config_lpm:
 		/* configure link power saving */
-		if (ehc->i.action & ATA_EH_LPM)
-			ata_for_each_dev(dev, link, ALL)
-				ata_dev_enable_pm(dev, ap->lpm_policy);
+		if (link->lpm_policy != ap->target_lpm_policy) {
+			rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
+			if (rc)
+				goto rest_fail;
+		}
 
 		/* this link is okay now */
 		ehc->i.flags = 0;
 		continue;
 
-dev_fail:
-		nr_failed_devs++;
-		ata_eh_handle_dev_fail(dev, rc);
+	rest_fail:
+		nr_fails++;
+		if (dev)
+			ata_eh_handle_dev_fail(dev, rc);
 
 		if (ap->pflags & ATA_PFLAG_FROZEN) {
 			/* PMP reset requires working host port.
@@ -3590,7 +3716,7 @@ dev_fail:
 		}
 	}
 
-	if (nr_failed_devs)
+	if (nr_fails)
 		goto retry;
 
  out:
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index aa56681f68db..56f6224fd6e6 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -117,6 +117,7 @@ static ssize_t ata_scsi_lpm_store(struct device *dev,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ata_port *ap = ata_shost_to_port(shost);
 	enum ata_lpm_policy policy;
+	unsigned long flags;
 
 	/* UNKNOWN is internal state, iterate from MAX_POWER */
 	for (policy = ATA_LPM_MAX_POWER;
@@ -129,7 +130,11 @@ static ssize_t ata_scsi_lpm_store(struct device *dev,
 	if (policy == ARRAY_SIZE(ata_lpm_policy_names))
 		return -EINVAL;
 
-	ata_lpm_schedule(ap, policy);
+	spin_lock_irqsave(ap->lock, flags);
+	ap->target_lpm_policy = policy;
+	ata_port_schedule_eh(ap);
+	spin_unlock_irqrestore(ap->lock, flags);
+
 	return count;
 }
 
@@ -139,11 +144,11 @@ static ssize_t ata_scsi_lpm_show(struct device *dev,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ata_port *ap = ata_shost_to_port(shost);
 
-	if (ap->lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
+	if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
 		return -EINVAL;
 
 	return snprintf(buf, PAGE_SIZE, "%s\n",
-			ata_lpm_policy_names[ap->lpm_policy]);
+			ata_lpm_policy_names[ap->target_lpm_policy]);
 }
 DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
 	    ata_scsi_lpm_show, ata_scsi_lpm_store);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 65b6a73c3ac7..55a6f413a550 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -102,10 +102,6 @@ extern int sata_link_init_spd(struct ata_link *link);
 extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
-extern void ata_dev_enable_pm(struct ata_device *dev,
-			      enum ata_lpm_policy policy);
-extern void ata_lpm_schedule(struct ata_port *ap,
-			     enum ata_lpm_policy policy);
 extern const char *sata_spd_string(unsigned int spd);
 
 /* libata-acpi.c */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7770eeb21039..bc4ee218b185 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -172,6 +172,7 @@ enum {
 	ATA_LFLAG_NO_RETRY	= (1 << 5), /* don't retry this link */
 	ATA_LFLAG_DISABLED	= (1 << 6), /* link is disabled */
 	ATA_LFLAG_SW_ACTIVITY	= (1 << 7), /* keep activity stats */
+	ATA_LFLAG_NO_LPM	= (1 << 8), /* disable LPM on this link */
 
 	/* struct ata_port flags */
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
@@ -324,12 +325,11 @@ enum {
 	ATA_EH_HARDRESET	= (1 << 2), /* meaningful only in ->prereset */
 	ATA_EH_RESET		= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
 	ATA_EH_ENABLE_LINK	= (1 << 3),
-	ATA_EH_LPM		= (1 << 4),  /* link power management action */
 	ATA_EH_PARK		= (1 << 5), /* unload heads and stop I/O */
 
 	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_PARK,
 	ATA_EH_ALL_ACTIONS	= ATA_EH_REVALIDATE | ATA_EH_RESET |
-				  ATA_EH_ENABLE_LINK | ATA_EH_LPM,
+				  ATA_EH_ENABLE_LINK,
 
 	/* ata_eh_info->flags */
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
@@ -377,7 +377,6 @@ enum {
 	ATA_HORKAGE_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
 	ATA_HORKAGE_DISABLE	= (1 << 5),	/* Disable it */
 	ATA_HORKAGE_HPA_SIZE	= (1 << 6),	/* native size off by one */
-	ATA_HORKAGE_LPM		= (1 << 7),	/* Link PM problems */
 	ATA_HORKAGE_IVB		= (1 << 8),	/* cbl det validity bit bugs */
 	ATA_HORKAGE_STUCK_ERR	= (1 << 9),	/* stuck ERR on next PACKET */
 	ATA_HORKAGE_BRIDGE_OK	= (1 << 10),	/* no bridge limits */
@@ -475,6 +474,11 @@ enum ata_lpm_policy {
 	ATA_LPM_MIN_POWER,
 };
 
+enum ata_lpm_hints {
+	ATA_LPM_EMPTY		= (1 << 0), /* port empty/probing */
+	ATA_LPM_HIPM		= (1 << 1), /* may use HIPM */
+};
+
 /* forward declarations */
 struct scsi_device;
 struct ata_port_operations;
@@ -702,6 +706,7 @@ struct ata_link {
 	unsigned int		hw_sata_spd_limit;
 	unsigned int		sata_spd_limit;
 	unsigned int		sata_spd;	/* current SATA PHY speed */
+	enum ata_lpm_policy	lpm_policy;
 
 	/* record runtime error info, protected by host_set lock */
 	struct ata_eh_info	eh_info;
@@ -773,7 +778,7 @@ struct ata_port {
 
 	pm_message_t		pm_mesg;
 	int			*pm_result;
-	enum ata_lpm_policy	lpm_policy;
+	enum ata_lpm_policy	target_lpm_policy;
 
 	struct timer_list	fastdrain_timer;
 	unsigned long		fastdrain_cnt;
@@ -839,8 +844,8 @@ struct ata_port_operations {
 	int  (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val);
 	void (*pmp_attach)(struct ata_port *ap);
 	void (*pmp_detach)(struct ata_port *ap);
-	int  (*enable_pm)(struct ata_port *ap, enum ata_lpm_policy policy);
-	void (*disable_pm)(struct ata_port *ap);
+	int  (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy,
+			unsigned hints);
 
 	/*
 	 * Start, stop, suspend and resume
-- 
cgit v1.2.3


From 97750cebb3000a9cc08f8ce8dc8c7143be7d7201 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Sep 2010 17:56:29 +0200
Subject: libata: add @ap to ata_wait_register() and introduce ata_msleep()

Add optional @ap argument to ata_wait_register() and replace msleep()
calls with ata_msleep() which take optional @ap in addition to the
duration.  These will be used to implement EH exclusion.

This patch doesn't cause any behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libahci.c         | 18 +++++++++---------
 drivers/ata/libata-core.c     | 21 ++++++++++++++-------
 drivers/ata/libata-eh.c       |  2 +-
 drivers/ata/libata-sff.c      | 12 ++++++------
 drivers/ata/pata_bf54x.c      |  4 ++--
 drivers/ata/pata_samsung_cf.c |  2 +-
 drivers/ata/pata_scc.c        |  4 ++--
 drivers/ata/sata_fsl.c        | 19 ++++++++++---------
 drivers/ata/sata_inic162x.c   |  2 +-
 drivers/ata/sata_sil24.c      | 14 +++++++-------
 drivers/ata/sata_via.c        |  2 +-
 include/linux/libata.h        |  5 +++--
 12 files changed, 57 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 437f92597788..524dbe8be163 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -567,7 +567,7 @@ int ahci_stop_engine(struct ata_port *ap)
 	writel(tmp, port_mmio + PORT_CMD);
 
 	/* wait for engine to stop. This could be as long as 500 msec */
-	tmp = ata_wait_register(port_mmio + PORT_CMD,
+	tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
 				PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500);
 	if (tmp & PORT_CMD_LIST_ON)
 		return -EIO;
@@ -614,7 +614,7 @@ static int ahci_stop_fis_rx(struct ata_port *ap)
 	writel(tmp, port_mmio + PORT_CMD);
 
 	/* wait for completion, spec says 500ms, give it 1000 */
-	tmp = ata_wait_register(port_mmio + PORT_CMD, PORT_CMD_FIS_ON,
+	tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_FIS_ON,
 				PORT_CMD_FIS_ON, 10, 1000);
 	if (tmp & PORT_CMD_FIS_ON)
 		return -EBUSY;
@@ -671,7 +671,7 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			readl(port_mmio + PORT_CMD);
 
 			/* wait 10ms to be sure we've come out of LPM state */
-			msleep(10);
+			ata_msleep(ap, 10);
 		} else {
 			cmd |= PORT_CMD_ALPE;
 			if (policy == ATA_LPM_MIN_POWER)
@@ -740,7 +740,7 @@ static void ahci_start_port(struct ata_port *ap)
 							       emp->led_state,
 							       4);
 				if (rc == -EBUSY)
-					msleep(1);
+					ata_msleep(ap, 1);
 				else
 					break;
 			}
@@ -799,7 +799,7 @@ int ahci_reset_controller(struct ata_host *host)
 		 * reset must complete within 1 second, or
 		 * the hardware should be considered fried.
 		 */
-		tmp = ata_wait_register(mmio + HOST_CTL, HOST_RESET,
+		tmp = ata_wait_register(NULL, mmio + HOST_CTL, HOST_RESET,
 					HOST_RESET, 10, 1000);
 
 		if (tmp & HOST_RESET) {
@@ -1179,7 +1179,7 @@ int ahci_kick_engine(struct ata_port *ap)
 	writel(tmp, port_mmio + PORT_CMD);
 
 	rc = 0;
-	tmp = ata_wait_register(port_mmio + PORT_CMD,
+	tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
 				PORT_CMD_CLO, PORT_CMD_CLO, 1, 500);
 	if (tmp & PORT_CMD_CLO)
 		rc = -EIO;
@@ -1209,8 +1209,8 @@ static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
 	writel(1, port_mmio + PORT_CMD_ISSUE);
 
 	if (timeout_msec) {
-		tmp = ata_wait_register(port_mmio + PORT_CMD_ISSUE, 0x1, 0x1,
-					1, timeout_msec);
+		tmp = ata_wait_register(ap, port_mmio + PORT_CMD_ISSUE,
+					0x1, 0x1, 1, timeout_msec);
 		if (tmp & 0x1) {
 			ahci_kick_engine(ap);
 			return -EBUSY;
@@ -1257,7 +1257,7 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
 	}
 
 	/* spec says at least 5us, but be generous and sleep for 1ms */
-	msleep(1);
+	ata_msleep(ap, 1);
 
 	/* issue the second D2H Register FIS */
 	tf.ctl &= ~ATA_SRST;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 7c5538b9fa3b..42d9ce29f50d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3404,7 +3404,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
 			warned = 1;
 		}
 
-		msleep(50);
+		ata_msleep(link->ap, 50);
 	}
 }
 
@@ -3425,7 +3425,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
 int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
 				int (*check_ready)(struct ata_link *link))
 {
-	msleep(ATA_WAIT_AFTER_RESET);
+	ata_msleep(link->ap, ATA_WAIT_AFTER_RESET);
 
 	return ata_wait_ready(link, deadline, check_ready);
 }
@@ -3473,7 +3473,7 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 	last_jiffies = jiffies;
 
 	while (1) {
-		msleep(interval);
+		ata_msleep(link->ap, interval);
 		if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
 			return rc;
 		cur &= 0xf;
@@ -3538,7 +3538,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		 * immediately after resuming.  Delay 200ms before
 		 * debouncing.
 		 */
-		msleep(200);
+		ata_msleep(link->ap, 200);
 
 		/* is SControl restored correctly? */
 		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
@@ -3742,7 +3742,7 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
 	/* Couldn't find anything in SATA I/II specs, but AHCI-1.1
 	 * 10.4.2 says at least 1 ms.
 	 */
-	msleep(1);
+	ata_msleep(link->ap, 1);
 
 	/* bring link back */
 	rc = sata_link_resume(link, timing, deadline);
@@ -6483,8 +6483,14 @@ int ata_ratelimit(void)
 	return __ratelimit(&ratelimit);
 }
 
+void ata_msleep(struct ata_port *ap, unsigned int msecs)
+{
+	msleep(msecs);
+}
+
 /**
  *	ata_wait_register - wait until register value changes
+ *	@ap: ATA port to wait register for, can be NULL
  *	@reg: IO-mapped register
  *	@mask: Mask to apply to read register value
  *	@val: Wait condition
@@ -6506,7 +6512,7 @@ int ata_ratelimit(void)
  *	RETURNS:
  *	The final register value.
  */
-u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
+u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
 		      unsigned long interval, unsigned long timeout)
 {
 	unsigned long deadline;
@@ -6521,7 +6527,7 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
 	deadline = ata_deadline(jiffies, timeout);
 
 	while ((tmp & mask) == val && time_before(jiffies, deadline)) {
-		msleep(interval);
+		ata_msleep(ap, interval);
 		tmp = ioread32(reg);
 	}
 
@@ -6605,6 +6611,7 @@ EXPORT_SYMBOL_GPL(ata_std_postreset);
 EXPORT_SYMBOL_GPL(ata_dev_classify);
 EXPORT_SYMBOL_GPL(ata_dev_pair);
 EXPORT_SYMBOL_GPL(ata_ratelimit);
+EXPORT_SYMBOL_GPL(ata_msleep);
 EXPORT_SYMBOL_GPL(ata_wait_register);
 EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
 EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 06a4db1ec10e..6780f4d16e81 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -779,7 +779,7 @@ void ata_port_wait_eh(struct ata_port *ap)
 
 	/* make sure SCSI EH is complete */
 	if (scsi_host_in_recovery(ap->scsi_host)) {
-		msleep(10);
+		ata_msleep(ap, 10);
 		goto retry;
 	}
 }
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 193f242eec05..14d18bf81255 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -222,7 +222,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
 	timeout = ata_deadline(timer_start, tmout_pat);
 	while (status != 0xff && (status & ATA_BUSY) &&
 	       time_before(jiffies, timeout)) {
-		msleep(50);
+		ata_msleep(ap, 50);
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 3);
 	}
 
@@ -234,7 +234,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
 	timeout = ata_deadline(timer_start, tmout);
 	while (status != 0xff && (status & ATA_BUSY) &&
 	       time_before(jiffies, timeout)) {
-		msleep(50);
+		ata_msleep(ap, 50);
 		status = ap->ops->sff_check_status(ap);
 	}
 
@@ -360,7 +360,7 @@ static void ata_dev_select(struct ata_port *ap, unsigned int device,
 
 	if (wait) {
 		if (can_sleep && ap->link.device[device].class == ATA_DEV_ATAPI)
-			msleep(150);
+			ata_msleep(ap, 150);
 		ata_wait_idle(ap);
 	}
 }
@@ -1356,7 +1356,7 @@ fsm_start:
 	 */
 	status = ata_sff_busy_wait(ap, ATA_BUSY, 5);
 	if (status & ATA_BUSY) {
-		msleep(2);
+		ata_msleep(ap, 2);
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
 			ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
@@ -1937,7 +1937,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
 	unsigned int dev1 = devmask & (1 << 1);
 	int rc, ret = 0;
 
-	msleep(ATA_WAIT_AFTER_RESET);
+	ata_msleep(ap, ATA_WAIT_AFTER_RESET);
 
 	/* always check readiness of the master device */
 	rc = ata_sff_wait_ready(link, deadline);
@@ -1966,7 +1966,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
 			lbal = ioread8(ioaddr->lbal_addr);
 			if ((nsect == 1) && (lbal == 1))
 				break;
-			msleep(50);	/* give drive a breather */
+			ata_msleep(ap, 50);	/* give drive a breather */
 		}
 
 		rc = ata_sff_wait_ready(link, deadline);
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 9cae65de750e..e1423cd2531f 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1046,7 +1046,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
 			dev1 = 0;
 			break;
 		}
-		msleep(50);	/* give drive a breather */
+		ata_msleep(ap, 50);	/* give drive a breather */
 	}
 	if (dev1)
 		ata_sff_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
@@ -1087,7 +1087,7 @@ static unsigned int bfin_bus_softreset(struct ata_port *ap,
 	 *
 	 * Old drivers/ide uses the 2mS rule and then waits for ready
 	 */
-	msleep(150);
+	ata_msleep(ap, 150);
 
 	/* Before we perform post reset processing we want to see if
 	 * the bus shows 0xFF because the odd clown forgets the D7
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index 6f9cfb24b751..8a51d673e5b2 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -322,7 +322,7 @@ static int pata_s3c_wait_after_reset(struct ata_link *link,
 {
 	int rc;
 
-	msleep(ATA_WAIT_AFTER_RESET);
+	ata_msleep(link->ap, ATA_WAIT_AFTER_RESET);
 
 	/* always check readiness of the master device */
 	rc = ata_sff_wait_ready(link, deadline);
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index fe36966f7e34..093715c3273a 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -530,7 +530,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask,
 	 *
 	 * Old drivers/ide uses the 2mS rule and then waits for ready.
 	 */
-	msleep(150);
+	ata_msleep(ap, 150);
 
 	/* always check readiness of the master device */
 	rc = ata_sff_wait_ready(link, deadline);
@@ -559,7 +559,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask,
 			lbal = in_be32(ioaddr->lbal_addr);
 			if ((nsect == 1) && (lbal == 1))
 				break;
-			msleep(50);	/* give drive a breather */
+			ata_msleep(ap, 50);	/* give drive a breather */
 		}
 
 		rc = ata_sff_wait_ready(link, deadline);
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 1440dc0af242..b0214d00d50b 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -678,7 +678,7 @@ static void sata_fsl_port_stop(struct ata_port *ap)
 	iowrite32(temp, hcr_base + HCONTROL);
 
 	/* Poll for controller to go offline - should happen immediately */
-	ata_wait_register(hcr_base + HSTATUS, ONLINE, ONLINE, 1, 1);
+	ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE, 1, 1);
 
 	ap->private_data = NULL;
 	dma_free_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ,
@@ -729,7 +729,8 @@ try_offline_again:
 	iowrite32(temp, hcr_base + HCONTROL);
 
 	/* Poll for controller to go offline */
-	temp = ata_wait_register(hcr_base + HSTATUS, ONLINE, ONLINE, 1, 500);
+	temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE,
+				 1, 500);
 
 	if (temp & ONLINE) {
 		ata_port_printk(ap, KERN_ERR,
@@ -752,7 +753,7 @@ try_offline_again:
 	/*
 	 * PHY reset should remain asserted for atleast 1ms
 	 */
-	msleep(1);
+	ata_msleep(ap, 1);
 
 	/*
 	 * Now, bring the host controller online again, this can take time
@@ -766,7 +767,7 @@ try_offline_again:
 	temp |= HCONTROL_PMP_ATTACHED;
 	iowrite32(temp, hcr_base + HCONTROL);
 
-	temp = ata_wait_register(hcr_base + HSTATUS, ONLINE, 0, 1, 500);
+	temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, 0, 1, 500);
 
 	if (!(temp & ONLINE)) {
 		ata_port_printk(ap, KERN_ERR,
@@ -784,7 +785,7 @@ try_offline_again:
 	 * presence
 	 */
 
-	temp = ata_wait_register(hcr_base + HSTATUS, 0xFF, 0, 1, 500);
+	temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0, 1, 500);
 	if ((!(temp & 0x10)) || ata_link_offline(link)) {
 		ata_port_printk(ap, KERN_WARNING,
 				"No Device OR PHYRDY change,Hstatus = 0x%x\n",
@@ -797,7 +798,7 @@ try_offline_again:
 	 * Wait for the first D2H from device,i.e,signature update notification
 	 */
 	start_jiffies = jiffies;
-	temp = ata_wait_register(hcr_base + HSTATUS, 0xFF, 0x10,
+	temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0x10,
 			500, jiffies_to_msecs(deadline - start_jiffies));
 
 	if ((temp & 0xFF) != 0x18) {
@@ -880,7 +881,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
 		iowrite32(pmp, CQPMP + hcr_base);
 	iowrite32(1, CQ + hcr_base);
 
-	temp = ata_wait_register(CQ + hcr_base, 0x1, 0x1, 1, 5000);
+	temp = ata_wait_register(ap, CQ + hcr_base, 0x1, 0x1, 1, 5000);
 	if (temp & 0x1) {
 		ata_port_printk(ap, KERN_WARNING, "ATA_SRST issue failed\n");
 
@@ -896,7 +897,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
 		goto err;
 	}
 
-	msleep(1);
+	ata_msleep(ap, 1);
 
 	/*
 	 * SATA device enters reset state after receving a Control register
@@ -915,7 +916,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class,
 	if (pmp != SATA_PMP_CTRL_PORT)
 		iowrite32(pmp, CQPMP + hcr_base);
 	iowrite32(1, CQ + hcr_base);
-	msleep(150);		/* ?? */
+	ata_msleep(ap, 150);		/* ?? */
 
 	/*
 	 * The above command would have signalled an interrupt on command
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index a36149ebf4a2..83a44471b189 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -614,7 +614,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
 
 	writew(IDMA_CTL_RST_ATA, idma_ctl);
 	readw(idma_ctl);	/* flush */
-	msleep(1);
+	ata_msleep(ap, 1);
 	writew(0, idma_ctl);
 
 	rc = sata_link_resume(link, timing, deadline);
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index be7726d7686d..af41c6fd1254 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -589,9 +589,9 @@ static int sil24_init_port(struct ata_port *ap)
 		sil24_clear_pmp(ap);
 
 	writel(PORT_CS_INIT, port + PORT_CTRL_STAT);
-	ata_wait_register(port + PORT_CTRL_STAT,
+	ata_wait_register(ap, port + PORT_CTRL_STAT,
 			  PORT_CS_INIT, PORT_CS_INIT, 10, 100);
-	tmp = ata_wait_register(port + PORT_CTRL_STAT,
+	tmp = ata_wait_register(ap, port + PORT_CTRL_STAT,
 				PORT_CS_RDY, 0, 10, 100);
 
 	if ((tmp & (PORT_CS_INIT | PORT_CS_RDY)) != PORT_CS_RDY) {
@@ -631,7 +631,7 @@ static int sil24_exec_polled_cmd(struct ata_port *ap, int pmp,
 	writel((u64)paddr >> 32, port + PORT_CMD_ACTIVATE + 4);
 
 	irq_mask = (PORT_IRQ_COMPLETE | PORT_IRQ_ERROR) << PORT_IRQ_RAW_SHIFT;
-	irq_stat = ata_wait_register(port + PORT_IRQ_STAT, irq_mask, 0x0,
+	irq_stat = ata_wait_register(ap, port + PORT_IRQ_STAT, irq_mask, 0x0,
 				     10, timeout_msec);
 
 	writel(irq_mask, port + PORT_IRQ_STAT); /* clear IRQs */
@@ -719,9 +719,9 @@ static int sil24_hardreset(struct ata_link *link, unsigned int *class,
 				"state, performing PORT_RST\n");
 
 		writel(PORT_CS_PORT_RST, port + PORT_CTRL_STAT);
-		msleep(10);
+		ata_msleep(ap, 10);
 		writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR);
-		ata_wait_register(port + PORT_CTRL_STAT, PORT_CS_RDY, 0,
+		ata_wait_register(ap, port + PORT_CTRL_STAT, PORT_CS_RDY, 0,
 				  10, 5000);
 
 		/* restore port configuration */
@@ -740,7 +740,7 @@ static int sil24_hardreset(struct ata_link *link, unsigned int *class,
 		tout_msec = 5000;
 
 	writel(PORT_CS_DEV_RST, port + PORT_CTRL_STAT);
-	tmp = ata_wait_register(port + PORT_CTRL_STAT,
+	tmp = ata_wait_register(ap, port + PORT_CTRL_STAT,
 				PORT_CS_DEV_RST, PORT_CS_DEV_RST, 10,
 				tout_msec);
 
@@ -1253,7 +1253,7 @@ static void sil24_init_controller(struct ata_host *host)
 		tmp = readl(port + PORT_CTRL_STAT);
 		if (tmp & PORT_CS_PORT_RST) {
 			writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR);
-			tmp = ata_wait_register(port + PORT_CTRL_STAT,
+			tmp = ata_wait_register(NULL, port + PORT_CTRL_STAT,
 						PORT_CS_PORT_RST,
 						PORT_CS_PORT_RST, 10, 100);
 			if (tmp & PORT_CS_PORT_RST)
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 4730c42a5ee5..c21589986c69 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -349,7 +349,7 @@ static int vt6420_prereset(struct ata_link *link, unsigned long deadline)
 
 	/* wait for phy to become ready, if necessary */
 	do {
-		msleep(200);
+		ata_msleep(link->ap, 200);
 		svia_scr_read(link, SCR_STATUS, &sstatus);
 		if ((sstatus & 0xf) != 1)
 			break;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bc4ee218b185..2fbd22bd68ce 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1004,8 +1004,9 @@ extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg);
 extern void ata_host_resume(struct ata_host *host);
 #endif
 extern int ata_ratelimit(void);
-extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
-			     unsigned long interval, unsigned long timeout);
+extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
+extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
+			u32 val, unsigned long interval, unsigned long timeout);
 extern int atapi_cmd_type(u8 opcode);
 extern void ata_tf_to_fis(const struct ata_taskfile *tf,
 			  u8 pmp, int is_cmd, u8 *fis);
-- 
cgit v1.2.3


From c0c362b60e259e3480a36ef70280d545818844f0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 6 Sep 2010 17:57:14 +0200
Subject: libata: implement cross-port EH exclusion

In libata, the non-EH code paths should always take and release
ap->lock explicitly when accessing hardware or shared data structures.
However, once EH is active, it's assumed that the port is owned by EH
and EH methods don't explicitly take ap->lock unless race from irq
handler or other code paths are expected.  However, libata EH didn't
guarantee exclusion among EHs for ports of the same host.  IOW,
multiple EHs may execute in parallel on multiple ports of the same
controller.

In many cases, especially in SATA, the ports are completely
independent of each other and this doesn't cause problems; however,
there are cases where different ports share the same resource, which
lead to obscure timing related bugs such as the one fixed by commit
213373cf (ata_piix: fix locking around SIDPR access).

This patch implements exclusion among EHs of the same host.  When EH
begins, it acquires per-host EH ownership by calling ata_eh_acquire().
When EH finishes, the ownership is released by calling
ata_eh_release().  EH ownership is also released whenever the EH
thread goes to sleep from ata_msleep() or explicitly and reacquired
after waking up.

This ensures that while EH is actively accessing the hardware, it has
exclusive access to it while allowing EHs to interleave and progress
in parallel as they hit waiting stages, which dominate the time spent
in EH.  This achieves cross-port EH exclusion without pervasive and
fragile changes while still allowing parallel EH for the most part.

This was first reported by yuanding02@gmail.com more than three years
ago in the following bugzilla.  :-)

  https://bugzilla.kernel.org/show_bug.cgi?id=8223

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Reported-by: yuanding02@gmail.com
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 30 ++++++++++++++++++++++++++++++
 drivers/ata/libata-eh.c   | 44 +++++++++++++++++++++++++++++++++++++++++++-
 drivers/ata/libata.h      |  2 ++
 include/linux/libata.h    |  5 +++++
 4 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 42d9ce29f50d..7f77c67d267c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1628,8 +1628,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 		}
 	}
 
+	if (ap->ops->error_handler)
+		ata_eh_release(ap);
+
 	rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
 
+	if (ap->ops->error_handler)
+		ata_eh_acquire(ap);
+
 	ata_sff_flush_pio_task(ap);
 
 	if (!rc) {
@@ -5570,6 +5576,7 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
 	dev_set_drvdata(dev, host);
 
 	spin_lock_init(&host->lock);
+	mutex_init(&host->eh_mutex);
 	host->dev = dev;
 	host->n_ports = max_ports;
 
@@ -5867,6 +5874,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 		   unsigned long flags, struct ata_port_operations *ops)
 {
 	spin_lock_init(&host->lock);
+	mutex_init(&host->eh_mutex);
 	host->dev = dev;
 	host->flags = flags;
 	host->ops = ops;
@@ -6483,9 +6491,31 @@ int ata_ratelimit(void)
 	return __ratelimit(&ratelimit);
 }
 
+/**
+ *	ata_msleep - ATA EH owner aware msleep
+ *	@ap: ATA port to attribute the sleep to
+ *	@msecs: duration to sleep in milliseconds
+ *
+ *	Sleeps @msecs.  If the current task is owner of @ap's EH, the
+ *	ownership is released before going to sleep and reacquired
+ *	after the sleep is complete.  IOW, other ports sharing the
+ *	@ap->host will be allowed to own the EH while this task is
+ *	sleeping.
+ *
+ *	LOCKING:
+ *	Might sleep.
+ */
 void ata_msleep(struct ata_port *ap, unsigned int msecs)
 {
+	bool owns_eh = ap && ap->host->eh_owner == current;
+
+	if (owns_eh)
+		ata_eh_release(ap);
+
 	msleep(msecs);
+
+	if (owns_eh)
+		ata_eh_acquire(ap);
 }
 
 /**
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 6780f4d16e81..5e590504f3aa 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -462,6 +462,41 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
 	}
 }
 
+/**
+ *	ata_eh_acquire - acquire EH ownership
+ *	@ap: ATA port to acquire EH ownership for
+ *
+ *	Acquire EH ownership for @ap.  This is the basic exclusion
+ *	mechanism for ports sharing a host.  Only one port hanging off
+ *	the same host can claim the ownership of EH.
+ *
+ *	LOCKING:
+ *	EH context.
+ */
+void ata_eh_acquire(struct ata_port *ap)
+{
+	mutex_lock(&ap->host->eh_mutex);
+	WARN_ON_ONCE(ap->host->eh_owner);
+	ap->host->eh_owner = current;
+}
+
+/**
+ *	ata_eh_release - release EH ownership
+ *	@ap: ATA port to release EH ownership for
+ *
+ *	Release EH ownership for @ap if the caller.  The caller must
+ *	have acquired EH ownership using ata_eh_acquire() previously.
+ *
+ *	LOCKING:
+ *	EH context.
+ */
+void ata_eh_release(struct ata_port *ap)
+{
+	WARN_ON_ONCE(ap->host->eh_owner != current);
+	ap->host->eh_owner = NULL;
+	mutex_unlock(&ap->host->eh_mutex);
+}
+
 /**
  *	ata_scsi_timed_out - SCSI layer time out callback
  *	@cmd: timed out SCSI command
@@ -639,11 +674,13 @@ void ata_scsi_error(struct Scsi_Host *host)
 	/* If we timed raced normal completion and there is nothing to
 	   recover nr_timedout == 0 why exactly are we doing error recovery ? */
 
- repeat:
 	/* invoke error handler */
 	if (ap->ops->error_handler) {
 		struct ata_link *link;
 
+		/* acquire EH ownership */
+		ata_eh_acquire(ap);
+ repeat:
 		/* kill fast drain timer */
 		del_timer_sync(&ap->fastdrain_timer);
 
@@ -718,6 +755,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		host->host_eh_scheduled = 0;
 
 		spin_unlock_irqrestore(ap->lock, flags);
+		ata_eh_release(ap);
 	} else {
 		WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
 		ap->ops->eng_timeout(ap);
@@ -2818,8 +2856,10 @@ int ata_eh_reset(struct ata_link *link, int classify,
 			"reset failed (errno=%d), retrying in %u secs\n",
 			rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
+		ata_eh_release(ap);
 		while (delta)
 			delta = schedule_timeout_uninterruptible(delta);
+		ata_eh_acquire(ap);
 	}
 
 	if (try == max_tries - 1) {
@@ -3635,8 +3675,10 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		if (time_before_eq(deadline, now))
 			break;
 
+		ata_eh_release(ap);
 		deadline = wait_for_completion_timeout(&ap->park_req_pending,
 						       deadline - now);
+		ata_eh_acquire(ap);
 	} while (deadline);
 	ata_for_each_link(link, ap, EDGE) {
 		ata_for_each_dev(dev, link, ALL) {
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 7c070a4b1c08..a9be110dbf51 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -145,6 +145,8 @@ extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 /* libata-eh.c */
 extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
 extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
+extern void ata_eh_acquire(struct ata_port *ap);
+extern void ata_eh_release(struct ata_port *ap);
 extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern void ata_scsi_error(struct Scsi_Host *host);
 extern void ata_port_wait_eh(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2fbd22bd68ce..52112d39d71e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -37,6 +37,7 @@
 #include <scsi/scsi_host.h>
 #include <linux/acpi.h>
 #include <linux/cdrom.h>
+#include <linux/sched.h>
 
 /*
  * Define if arch has non-standard setup.  This is a _PCI_ standard
@@ -535,6 +536,10 @@ struct ata_host {
 	void			*private_data;
 	struct ata_port_operations *ops;
 	unsigned long		flags;
+
+	struct mutex		eh_mutex;
+	struct task_struct	*eh_owner;
+
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
 #endif
-- 
cgit v1.2.3


From b34e90429ce8a23546b6b927d4e151df4c113644 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 10 Sep 2010 12:19:43 +0100
Subject: libata: reorder ata_queued_cmd to remove alignment padding on 64 bit
 builds

Reorder structure ata_queued_cmd to remove 8 bytes of alignment padding
on 64 bit builds & therefore reduce the size of structure ata_port by
256 bytes.

Overall this will have little impact, other than reducing the amount of
memory that is cleared when allocating ata_ports.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 52112d39d71e..15efec05df67 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -570,13 +570,13 @@ struct ata_queued_cmd {
 	unsigned int		extrabytes;
 	unsigned int		curbytes;
 
-	struct scatterlist	*cursg;
-	unsigned int		cursg_ofs;
-
 	struct scatterlist	sgent;
 
 	struct scatterlist	*sg;
 
+	struct scatterlist	*cursg;
+	unsigned int		cursg_ofs;
+
 	unsigned int		err_mask;
 	struct ata_taskfile	result_tf;
 	ata_qc_cb_t		complete_fn;
-- 
cgit v1.2.3


From 89692c03226a066a017048cf7fbacbaa645f0e79 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 16 Oct 2010 15:19:18 +0200
Subject: include/linux/libata.h: fix typo

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 15efec05df67..15b77b8dc7e1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -342,7 +342,7 @@ enum {
 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
 	ATA_EHI_PRINTINFO	= (1 << 18), /* print configuration info */
 	ATA_EHI_SETMODE		= (1 << 19), /* configure transfer mode */
-	ATA_EHI_POST_SETMODE	= (1 << 20), /* revaildating after setmode */
+	ATA_EHI_POST_SETMODE	= (1 << 20), /* revalidating after setmode */
 
 	ATA_EHI_DID_RESET	= ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET,
 
-- 
cgit v1.2.3


From 0988c4c7fb5881377ec20a6452f739a722e97c6b Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Thu, 21 Oct 2010 11:30:42 +0000
Subject: vlan: Calling vlan_hwaccel_do_receive() is always valid.

It is now acceptable to receive vlan tagged packets at any time,
even if CONFIG_VLAN_8021Q is not set.  This means that calling
vlan_hwaccel_do_receive() should not result in BUG() but rather just
behave as if there were no vlan devices configured.

Reported-by: Vladislav Zolotarov <vladz@broadcom.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a0d9786c202d..c2f3a72712ce 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -168,7 +168,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
 static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
 {
-	BUG();
+	if ((*skb)->vlan_tci & VLAN_VID_MASK)
+		(*skb)->pkt_type = PACKET_OTHERHOST;
 	return false;
 }
 
-- 
cgit v1.2.3


From 2b9603a0d7e395fb844af90fba71448bc8019077 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 2 Aug 2010 15:52:15 +0800
Subject: spi: enable spi_board_info to be registered after spi_master

Currently spi_register_board_info() has to be called before its related
spi_master be registered, otherwise these board info will be just ignored.

This patch will remove this order limit, it adds a global spi master list
like the existing global board info listr. Whenever a board info or a
spi_master is registered, the spi master list or board info list
will be scanned, and a new spi device will be created if there is a
master-board info match.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/spi.c       | 88 ++++++++++++++++++++++++++-----------------------
 include/linux/spi/spi.h |  3 ++
 2 files changed, 50 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index b5a78a1f4421..0845cd4c5155 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -29,11 +29,6 @@
 #include <linux/spi/spi.h>
 #include <linux/of_spi.h>
 
-
-/* SPI bustype and spi_master class are registered after board init code
- * provides the SPI device tables, ensuring that both are present by the
- * time controller driver registration causes spi_devices to "enumerate".
- */
 static void spidev_release(struct device *dev)
 {
 	struct spi_device	*spi = to_spi_device(dev);
@@ -202,11 +197,16 @@ EXPORT_SYMBOL_GPL(spi_register_driver);
 
 struct boardinfo {
 	struct list_head	list;
-	unsigned		n_board_info;
-	struct spi_board_info	board_info[0];
+	struct spi_board_info	board_info;
 };
 
 static LIST_HEAD(board_list);
+static LIST_HEAD(spi_master_list);
+
+/*
+ * Used to protect add/del opertion for board_info list and
+ * spi_master list, and their matching process
+ */
 static DEFINE_MUTEX(board_lock);
 
 /**
@@ -371,6 +371,20 @@ struct spi_device *spi_new_device(struct spi_master *master,
 }
 EXPORT_SYMBOL_GPL(spi_new_device);
 
+static void spi_match_master_to_boardinfo(struct spi_master *master,
+				struct spi_board_info *bi)
+{
+	struct spi_device *dev;
+
+	if (master->bus_num != bi->bus_num)
+		return;
+
+	dev = spi_new_device(master, bi);
+	if (!dev)
+		dev_err(master->dev.parent, "can't create new device for %s\n",
+			bi->modalias);
+}
+
 /**
  * spi_register_board_info - register SPI devices for a given board
  * @info: array of chip descriptors
@@ -393,43 +407,25 @@ EXPORT_SYMBOL_GPL(spi_new_device);
 int __init
 spi_register_board_info(struct spi_board_info const *info, unsigned n)
 {
-	struct boardinfo	*bi;
+	struct boardinfo *bi;
+	int i;
 
-	bi = kmalloc(sizeof(*bi) + n * sizeof *info, GFP_KERNEL);
+	bi = kzalloc(n * sizeof(*bi), GFP_KERNEL);
 	if (!bi)
 		return -ENOMEM;
-	bi->n_board_info = n;
-	memcpy(bi->board_info, info, n * sizeof *info);
 
-	mutex_lock(&board_lock);
-	list_add_tail(&bi->list, &board_list);
-	mutex_unlock(&board_lock);
-	return 0;
-}
+	for (i = 0; i < n; i++, bi++, info++) {
+		struct spi_master *master;
 
-/* FIXME someone should add support for a __setup("spi", ...) that
- * creates board info from kernel command lines
- */
-
-static void scan_boardinfo(struct spi_master *master)
-{
-	struct boardinfo	*bi;
-
-	mutex_lock(&board_lock);
-	list_for_each_entry(bi, &board_list, list) {
-		struct spi_board_info	*chip = bi->board_info;
-		unsigned		n;
-
-		for (n = bi->n_board_info; n > 0; n--, chip++) {
-			if (chip->bus_num != master->bus_num)
-				continue;
-			/* NOTE: this relies on spi_new_device to
-			 * issue diagnostics when given bogus inputs
-			 */
-			(void) spi_new_device(master, chip);
-		}
+		memcpy(&bi->board_info, info, sizeof(*info));
+		mutex_lock(&board_lock);
+		list_add_tail(&bi->list, &board_list);
+		list_for_each_entry(master, &spi_master_list, list)
+			spi_match_master_to_boardinfo(master, &bi->board_info);
+		mutex_unlock(&board_lock);
 	}
-	mutex_unlock(&board_lock);
+
+	return 0;
 }
 
 /*-------------------------------------------------------------------------*/
@@ -512,6 +508,7 @@ int spi_register_master(struct spi_master *master)
 {
 	static atomic_t		dyn_bus_id = ATOMIC_INIT((1<<15) - 1);
 	struct device		*dev = master->dev.parent;
+	struct boardinfo	*bi;
 	int			status = -ENODEV;
 	int			dynamic = 0;
 
@@ -547,8 +544,12 @@ int spi_register_master(struct spi_master *master)
 	dev_dbg(dev, "registered master %s%s\n", dev_name(&master->dev),
 			dynamic ? " (dynamic)" : "");
 
-	/* populate children from any spi device tables */
-	scan_boardinfo(master);
+	mutex_lock(&board_lock);
+	list_add_tail(&master->list, &spi_master_list);
+	list_for_each_entry(bi, &board_list, list)
+		spi_match_master_to_boardinfo(master, &bi->board_info);
+	mutex_unlock(&board_lock);
+
 	status = 0;
 
 	/* Register devices from the device tree */
@@ -579,7 +580,12 @@ void spi_unregister_master(struct spi_master *master)
 {
 	int dummy;
 
-	dummy = device_for_each_child(&master->dev, NULL, __unregister);
+	mutex_lock(&board_lock);
+	list_del(&master->list);
+	mutex_unlock(&board_lock);
+
+	dummy = device_for_each_child(master->dev.parent, &master->dev,
+					__unregister);
 	device_unregister(&master->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_master);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 92e52a1e6af3..b4d7710bc38d 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -204,6 +204,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 /**
  * struct spi_master - interface to SPI master controller
  * @dev: device interface to this driver
+ * @list: link with the global spi_master list
  * @bus_num: board-specific (and often SOC-specific) identifier for a
  *	given SPI controller.
  * @num_chipselect: chipselects are used to distinguish individual
@@ -238,6 +239,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 struct spi_master {
 	struct device	dev;
 
+	struct list_head list;
+
 	/* other than negative (== assign one dynamically), bus_num is fully
 	 * board-specific.  usually that simplifies to being SOC-specific.
 	 * example:  one SOC has three SPI controllers, numbered 0..2,
-- 
cgit v1.2.3


From ff10fca5ceacf7bc59636f5ab808e775d1717167 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 22 Oct 2010 08:46:36 +0200
Subject: pcmcia: IOCARD is also required for using IRQs

Dave Hinds pointed out to me that 37979e1546a7 will break b43 and
ray_cs, as IOCARD is not -- as the name would suggest -- only needed
for cards using IO ports. Instead, as it re-deines several pins, it
is also required for using interrupts.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/pcmcia_resource.c | 4 +++-
 include/pcmcia/ds.h              | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index a9af0d784426..0bdda5b3ed55 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -514,7 +514,9 @@ int pcmcia_enable_device(struct pcmcia_device *p_dev)
 	}
 
 	/* Pick memory or I/O card, DMA mode, interrupt */
-	if (p_dev->_io)
+	if (p_dev->_io || flags & CONF_ENABLE_IRQ)
+		flags |= CONF_ENABLE_IOCARD;
+	if (flags & CONF_ENABLE_IOCARD)
 		s->socket.flags |= SS_IOCARD;
 	if (flags & CONF_ENABLE_SPKR) {
 		s->socket.flags |= SS_SPKR_ENA;
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index d830c87ff0a7..8479b66c067b 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -259,6 +259,8 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
 #define CONF_ENABLE_SPKR        0x0002
 #define CONF_ENABLE_PULSE_IRQ   0x0004
 #define CONF_ENABLE_ESR         0x0008
+#define CONF_ENABLE_IOCARD	0x0010 /* auto-enabled if IO resources or IRQ
+					* (CONF_ENABLE_IRQ) in use */
 
 /* flags used by pcmcia_loop_config() autoconfiguration */
 #define CONF_AUTO_CHECK_VCC	0x0100 /* check for matching Vcc? */
-- 
cgit v1.2.3


From b2b5ce022acf5e9f52f7b78c5579994fdde191d4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 Oct 2010 15:24:15 +0200
Subject: sched, cgroup: Fixup broken cgroup movement

Dima noticed that we fail to correct the ->vruntime of sleeping tasks
when we move them between cgroups.

Reported-by: Dima Zavin <dima@android.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1287150604.29097.1513.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  8 ++++----
 kernel/sched_fair.c   | 25 +++++++++++++++++++------
 3 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2cca9a92f5e5..be312c129787 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1073,7 +1073,7 @@ struct sched_class {
 					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*moved_group) (struct task_struct *p, int on_rq);
+	void (*task_move_group) (struct task_struct *p, int on_rq);
 #endif
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 5998222f901c..3fe253e6a6e9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8498,12 +8498,12 @@ void sched_move_task(struct task_struct *tsk)
 	if (unlikely(running))
 		tsk->sched_class->put_prev_task(rq, tsk);
 
-	set_task_rq(tsk, task_cpu(tsk));
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	if (tsk->sched_class->moved_group)
-		tsk->sched_class->moved_group(tsk, on_rq);
+	if (tsk->sched_class->task_move_group)
+		tsk->sched_class->task_move_group(tsk, on_rq);
+	else
 #endif
+		set_task_rq(tsk, task_cpu(tsk));
 
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 74cccfae87a8..3acc2a487c18 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3866,13 +3866,26 @@ static void set_curr_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p, int on_rq)
+static void task_move_group_fair(struct task_struct *p, int on_rq)
 {
-	struct cfs_rq *cfs_rq = task_cfs_rq(p);
-
-	update_curr(cfs_rq);
+	/*
+	 * If the task was not on the rq at the time of this cgroup movement
+	 * it must have been asleep, sleeping tasks keep their ->vruntime
+	 * absolute on their old rq until wakeup (needed for the fair sleeper
+	 * bonus in place_entity()).
+	 *
+	 * If it was on the rq, we've just 'preempted' it, which does convert
+	 * ->vruntime to a relative base.
+	 *
+	 * Make sure both cases convert their relative position when migrating
+	 * to another cgroup's rq. This does somewhat interfere with the
+	 * fair sleeper stuff for the first placement, but who cares.
+	 */
+	if (!on_rq)
+		p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
+	set_task_rq(p, task_cpu(p));
 	if (!on_rq)
-		place_entity(cfs_rq, &p->se, 1);
+		p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
 }
 #endif
 
@@ -3924,7 +3937,7 @@ static const struct sched_class fair_sched_class = {
 	.get_rr_interval	= get_rr_interval_fair,
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	.moved_group		= moved_group_fair,
+	.task_move_group	= task_move_group_fair,
 #endif
 };
 
-- 
cgit v1.2.3


From c64a0926710153b9d44c979d2942f4a8648fd74e Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Wed, 25 Aug 2010 12:50:00 -0700
Subject: driver core: platform_bus: allow runtime override of dev_pm_ops

Currently, the platform_bus allows customization of several of the
busses dev_pm_ops methods by using weak symbols so that platform code
can override them.  The weak-symbol approach is not scalable when
wanting to support multiple platforms in a single kernel binary.

Instead, provide __init methods for platform code to customize the
dev_pm_ops methods at runtime.

NOTE: after these dynamic methods are merged, the weak symbols should
      be removed from drivers/base/platform.c.  AFAIK, this will only
      affect SH and sh-mobile which should be converted to use this
      runtime approach instead of the weak symbols.  After SH &
      sh-mobile are converted, the weak symobols could be removed.

Tested on OMAP3.

Cc: Magnus Damm <magnus.damm@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 35 +++++++++++++++++++++++++++++++++++
 include/linux/platform_device.h |  3 +++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 579906f88b09..a01abf9ebf7b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -976,6 +976,41 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
+/**
+ * platform_bus_get_pm_ops() - return pointer to busses dev_pm_ops
+ *
+ * This function can be used by platform code to get the current
+ * set of dev_pm_ops functions used by the platform_bus_type.
+ */
+const struct dev_pm_ops * __init platform_bus_get_pm_ops(void)
+{
+	return platform_bus_type.pm;
+}
+
+/**
+ * platform_bus_set_pm_ops() - update dev_pm_ops for the platform_bus_type
+ *
+ * @pm: pointer to new dev_pm_ops struct to be used for platform_bus_type
+ *
+ * Platform code can override the dev_pm_ops methods of
+ * platform_bus_type by using this function.  It is expected that
+ * platform code will first do a platform_bus_get_pm_ops(), then
+ * kmemdup it, then customize selected methods and pass a pointer to
+ * the new struct dev_pm_ops to this function.
+ *
+ * Since platform-specific code is customizing methods for *all*
+ * devices (not just platform-specific devices) it is expected that
+ * any custom overrides of these functions will keep existing behavior
+ * and simply extend it.  For example, any customization of the
+ * runtime PM methods should continue to call the pm_generic_*
+ * functions as the default ones do in addition to the
+ * platform-specific behavior.
+ */
+void __init platform_bus_set_pm_ops(const struct dev_pm_ops *pm)
+{
+	platform_bus_type.pm = pm;
+}
+
 int __init platform_bus_init(void)
 {
 	int error;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d7ecad0093bb..2e700ec0601f 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -138,6 +138,9 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr
 					struct resource *res, unsigned int n_res,
 					const void *data, size_t size);
 
+extern const struct dev_pm_ops * platform_bus_get_pm_ops(void);
+extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm);
+
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3


From 39aba963d937edb20db7d9d93e6dda5d2adfdcdd Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Sat, 4 Sep 2010 22:33:14 -0700
Subject: driver core: remove CONFIG_SYSFS_DEPRECATED_V2 but keep it for block
 devices

This patch removes the old CONFIG_SYSFS_DEPRECATED_V2 config option,
but it keeps the logic around to handle block devices in the old manner
as some people like to run new kernel versions on old (pre 2007/2008)
distros.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: "James E.J. Bottomley" <James.Bottomley@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c       |  22 ------
 drivers/base/class.c     |  19 -----
 drivers/base/core.c      | 194 ++++++-----------------------------------------
 drivers/scsi/hosts.c     |   2 -
 drivers/scsi/scsi_scan.c |   2 -
 include/sound/core.h     |   6 --
 init/Kconfig             |  46 ++++-------
 sound/core/init.c        |  11 +--
 8 files changed, 43 insertions(+), 259 deletions(-)

(limited to 'include')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index eb1b7fa20dce..33c270a64db7 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -440,22 +440,6 @@ static void device_remove_attrs(struct bus_type *bus, struct device *dev)
 	}
 }
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-static int make_deprecated_bus_links(struct device *dev)
-{
-	return sysfs_create_link(&dev->kobj,
-				 &dev->bus->p->subsys.kobj, "bus");
-}
-
-static void remove_deprecated_bus_links(struct device *dev)
-{
-	sysfs_remove_link(&dev->kobj, "bus");
-}
-#else
-static inline int make_deprecated_bus_links(struct device *dev) { return 0; }
-static inline void remove_deprecated_bus_links(struct device *dev) { }
-#endif
-
 /**
  * bus_add_device - add device to bus
  * @dev: device being added
@@ -482,15 +466,10 @@ int bus_add_device(struct device *dev)
 				&dev->bus->p->subsys.kobj, "subsystem");
 		if (error)
 			goto out_subsys;
-		error = make_deprecated_bus_links(dev);
-		if (error)
-			goto out_deprecated;
 		klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices);
 	}
 	return 0;
 
-out_deprecated:
-	sysfs_remove_link(&dev->kobj, "subsystem");
 out_subsys:
 	sysfs_remove_link(&bus->p->devices_kset->kobj, dev_name(dev));
 out_id:
@@ -530,7 +509,6 @@ void bus_remove_device(struct device *dev)
 {
 	if (dev->bus) {
 		sysfs_remove_link(&dev->kobj, "subsystem");
-		remove_deprecated_bus_links(dev);
 		sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
 				  dev_name(dev));
 		device_remove_attrs(dev->bus, dev);
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 8e231d05b400..1078969889fd 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -276,25 +276,6 @@ void class_destroy(struct class *cls)
 	class_unregister(cls);
 }
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-char *make_class_name(const char *name, struct kobject *kobj)
-{
-	char *class_name;
-	int size;
-
-	size = strlen(name) + strlen(kobject_name(kobj)) + 2;
-
-	class_name = kmalloc(size, GFP_KERNEL);
-	if (!class_name)
-		return NULL;
-
-	strcpy(class_name, name);
-	strcat(class_name, ":");
-	strcat(class_name, kobject_name(kobj));
-	return class_name;
-}
-#endif
-
 /**
  * class_dev_iter_init - initialize class device iterator
  * @iter: class iterator to initialize
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d1b2c9adc271..6cf9069f3150 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -203,37 +203,6 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (dev->driver)
 		add_uevent_var(env, "DRIVER=%s", dev->driver->name);
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-	if (dev->class) {
-		struct device *parent = dev->parent;
-
-		/* find first bus device in parent chain */
-		while (parent && !parent->bus)
-			parent = parent->parent;
-		if (parent && parent->bus) {
-			const char *path;
-
-			path = kobject_get_path(&parent->kobj, GFP_KERNEL);
-			if (path) {
-				add_uevent_var(env, "PHYSDEVPATH=%s", path);
-				kfree(path);
-			}
-
-			add_uevent_var(env, "PHYSDEVBUS=%s", parent->bus->name);
-
-			if (parent->driver)
-				add_uevent_var(env, "PHYSDEVDRIVER=%s",
-					       parent->driver->name);
-		}
-	} else if (dev->bus) {
-		add_uevent_var(env, "PHYSDEVBUS=%s", dev->bus->name);
-
-		if (dev->driver)
-			add_uevent_var(env, "PHYSDEVDRIVER=%s",
-				       dev->driver->name);
-	}
-#endif
-
 	/* have the bus specific function add its stuff */
 	if (dev->bus && dev->bus->uevent) {
 		retval = dev->bus->uevent(dev, env);
@@ -578,24 +547,6 @@ void device_initialize(struct device *dev)
 	set_dev_node(dev, -1);
 }
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-static struct kobject *get_device_parent(struct device *dev,
-					 struct device *parent)
-{
-	/* class devices without a parent live in /sys/class/<classname>/ */
-	if (dev->class && (!parent || parent->class != dev->class))
-		return &dev->class->p->class_subsys.kobj;
-	/* all other devices keep their parent */
-	else if (parent)
-		return &parent->kobj;
-
-	return NULL;
-}
-
-static inline void cleanup_device_parent(struct device *dev) {}
-static inline void cleanup_glue_dir(struct device *dev,
-				    struct kobject *glue_dir) {}
-#else
 static struct kobject *virtual_device_parent(struct device *dev)
 {
 	static struct kobject *virtual_dir = NULL;
@@ -666,6 +617,14 @@ static struct kobject *get_device_parent(struct device *dev,
 		struct kobject *parent_kobj;
 		struct kobject *k;
 
+#ifdef CONFIG_SYSFS_DEPRECATED
+		/* block disks show up in /sys/block */
+		if (dev->class == &block_class) {
+			if (parent && parent->class == &block_class)
+				return &parent->kobj;
+			return &block_class.p->class_subsys.kobj;
+		}
+#endif
 		/*
 		 * If we have no parent, we live in "virtual".
 		 * Class-devices with a non class-device as parent, live
@@ -719,7 +678,6 @@ static void cleanup_device_parent(struct device *dev)
 {
 	cleanup_glue_dir(dev, dev->kobj.parent);
 }
-#endif
 
 static void setup_parent(struct device *dev, struct device *parent)
 {
@@ -742,70 +700,29 @@ static int device_add_class_symlinks(struct device *dev)
 	if (error)
 		goto out;
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-	/* stacked class devices need a symlink in the class directory */
-	if (dev->kobj.parent != &dev->class->p->class_subsys.kobj &&
-	    device_is_not_partition(dev)) {
-		error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
-					  &dev->kobj, dev_name(dev));
-		if (error)
-			goto out_subsys;
-	}
-
 	if (dev->parent && device_is_not_partition(dev)) {
-		struct device *parent = dev->parent;
-		char *class_name;
-
-		/*
-		 * stacked class devices have the 'device' link
-		 * pointing to the bus device instead of the parent
-		 */
-		while (parent->class && !parent->bus && parent->parent)
-			parent = parent->parent;
-
-		error = sysfs_create_link(&dev->kobj,
-					  &parent->kobj,
+		error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
 					  "device");
 		if (error)
-			goto out_busid;
-
-		class_name = make_class_name(dev->class->name,
-						&dev->kobj);
-		if (class_name)
-			error = sysfs_create_link(&dev->parent->kobj,
-						&dev->kobj, class_name);
-		kfree(class_name);
-		if (error)
-			goto out_device;
+			goto out_subsys;
 	}
-	return 0;
 
-out_device:
-	if (dev->parent && device_is_not_partition(dev))
-		sysfs_remove_link(&dev->kobj, "device");
-out_busid:
-	if (dev->kobj.parent != &dev->class->p->class_subsys.kobj &&
-	    device_is_not_partition(dev))
-		sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj,
-				  dev_name(dev));
-#else
+#ifdef CONFIG_SYSFS_DEPRECATED
+	/* /sys/block has directories and does not need symlinks */
+	if (dev->class == &block_class)
+		return 0;
+#endif
+
 	/* link in the class directory pointing to the device */
 	error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
 				  &dev->kobj, dev_name(dev));
 	if (error)
-		goto out_subsys;
+		goto out_device;
 
-	if (dev->parent && device_is_not_partition(dev)) {
-		error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
-					  "device");
-		if (error)
-			goto out_busid;
-	}
 	return 0;
 
-out_busid:
-	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
-#endif
+out_device:
+	sysfs_remove_link(&dev->kobj, "device");
 
 out_subsys:
 	sysfs_remove_link(&dev->kobj, "subsystem");
@@ -818,30 +735,14 @@ static void device_remove_class_symlinks(struct device *dev)
 	if (!dev->class)
 		return;
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-	if (dev->parent && device_is_not_partition(dev)) {
-		char *class_name;
-
-		class_name = make_class_name(dev->class->name, &dev->kobj);
-		if (class_name) {
-			sysfs_remove_link(&dev->parent->kobj, class_name);
-			kfree(class_name);
-		}
-		sysfs_remove_link(&dev->kobj, "device");
-	}
-
-	if (dev->kobj.parent != &dev->class->p->class_subsys.kobj &&
-	    device_is_not_partition(dev))
-		sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj,
-				  dev_name(dev));
-#else
 	if (dev->parent && device_is_not_partition(dev))
 		sysfs_remove_link(&dev->kobj, "device");
-
-	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
-#endif
-
 	sysfs_remove_link(&dev->kobj, "subsystem");
+#ifdef CONFIG_SYSFS_DEPRECATED
+	if (dev->class == &block_class)
+		return;
+#endif
+	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
 }
 
 /**
@@ -1613,41 +1514,23 @@ int device_rename(struct device *dev, const char *new_name)
 	pr_debug("device: '%s': %s: renaming to '%s'\n", dev_name(dev),
 		 __func__, new_name);
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-	if ((dev->class) && (dev->parent))
-		old_class_name = make_class_name(dev->class->name, &dev->kobj);
-#endif
-
 	old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
 	if (!old_device_name) {
 		error = -ENOMEM;
 		goto out;
 	}
 
-#ifndef CONFIG_SYSFS_DEPRECATED
 	if (dev->class) {
 		error = sysfs_rename_link(&dev->class->p->class_subsys.kobj,
 			&dev->kobj, old_device_name, new_name);
 		if (error)
 			goto out;
 	}
-#endif
+
 	error = kobject_rename(&dev->kobj, new_name);
 	if (error)
 		goto out;
 
-#ifdef CONFIG_SYSFS_DEPRECATED
-	if (old_class_name) {
-		new_class_name = make_class_name(dev->class->name, &dev->kobj);
-		if (new_class_name) {
-			error = sysfs_rename_link(&dev->parent->kobj,
-						  &dev->kobj,
-						  old_class_name,
-						  new_class_name);
-		}
-	}
-#endif
-
 out:
 	put_device(dev);
 
@@ -1664,40 +1547,13 @@ static int device_move_class_links(struct device *dev,
 				   struct device *new_parent)
 {
 	int error = 0;
-#ifdef CONFIG_SYSFS_DEPRECATED
-	char *class_name;
 
-	class_name = make_class_name(dev->class->name, &dev->kobj);
-	if (!class_name) {
-		error = -ENOMEM;
-		goto out;
-	}
-	if (old_parent) {
-		sysfs_remove_link(&dev->kobj, "device");
-		sysfs_remove_link(&old_parent->kobj, class_name);
-	}
-	if (new_parent) {
-		error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
-					  "device");
-		if (error)
-			goto out;
-		error = sysfs_create_link(&new_parent->kobj, &dev->kobj,
-					  class_name);
-		if (error)
-			sysfs_remove_link(&dev->kobj, "device");
-	} else
-		error = 0;
-out:
-	kfree(class_name);
-	return error;
-#else
 	if (old_parent)
 		sysfs_remove_link(&dev->kobj, "device");
 	if (new_parent)
 		error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
 					  "device");
 	return error;
-#endif
 }
 
 /**
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 8a8f803439e1..ba7f87acc00d 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -411,9 +411,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 
 	device_initialize(&shost->shost_gendev);
 	dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
-#ifndef CONFIG_SYSFS_DEPRECATED
 	shost->shost_gendev.bus = &scsi_bus_type;
-#endif
 	shost->shost_gendev.type = &scsi_host_type;
 
 	device_initialize(&shost->shost_dev);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3d0a1e6e9c48..087821fac8fe 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -417,9 +417,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	starget->reap_ref = 1;
 	dev->parent = get_device(parent);
 	dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id);
-#ifndef CONFIG_SYSFS_DEPRECATED
 	dev->bus = &scsi_bus_type;
-#endif
 	dev->type = &scsi_target_type;
 	starget->id = id;
 	starget->channel = channel;
diff --git a/include/sound/core.h b/include/sound/core.h
index 89e0ac17f44a..df26ebbfa9c6 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -133,9 +133,7 @@ struct snd_card {
 	int free_on_last_close;		/* free in context of file_release */
 	wait_queue_head_t shutdown_sleep;
 	struct device *dev;		/* device assigned to this card */
-#ifndef CONFIG_SYSFS_DEPRECATED
 	struct device *card_dev;	/* cardX object for sysfs */
-#endif
 
 #ifdef CONFIG_PM
 	unsigned int power_state;	/* power state */
@@ -196,11 +194,7 @@ struct snd_minor {
 /* return a device pointer linked to each sound device as a parent */
 static inline struct device *snd_card_get_device_link(struct snd_card *card)
 {
-#ifdef CONFIG_SYSFS_DEPRECATED
-	return card ? card->dev : NULL;
-#else
 	return card ? card->card_dev : NULL;
-#endif
 }
 
 /* sound.c */
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1cbadd9..137609f33ebc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -656,40 +656,24 @@ config MM_OWNER
 	bool
 
 config SYSFS_DEPRECATED
-	bool
-
-config SYSFS_DEPRECATED_V2
 	bool "enable deprecated sysfs features to support old userspace tools"
 	depends on SYSFS
 	default n
-	select SYSFS_DEPRECATED
-	help
-	  This option switches the layout of sysfs to the deprecated
-	  version. Do not use it on recent distributions.
-
-	  The current sysfs layout features a unified device tree at
-	  /sys/devices/, which is able to express a hierarchy between
-	  class devices. If the deprecated option is set to Y, the
-	  unified device tree is split into a bus device tree at
-	  /sys/devices/ and several individual class device trees at
-	  /sys/class/. The class and bus devices will be connected by
-	  "<subsystem>:<name>" and the "device" links. The "block"
-	  class devices, will not show up in /sys/class/block/. Some
-	  subsystems will suppress the creation of some devices which
-	  depend on the unified device tree.
-
-	  This option is not a pure compatibility option that can
-	  be safely enabled on newer distributions. It will change the
-	  layout of sysfs to the non-extensible deprecated version,
-	  and disable some features, which can not be exported without
-	  confusing older userspace tools. Since 2007/2008 all major
-	  distributions do not enable this option, and ship no tools which
-	  depend on the deprecated layout or this option.
-
-	  If you are using a new kernel on an older distribution, or use
-	  older userspace tools, you might need to say Y here. Do not say Y,
-	  if the original kernel, that came with your distribution, has
-	  this option set to N.
+	help
+	  This option switches the layout of the "block" class devices, to not
+	  show up in /sys/class/block/, but only in /sys/block/.
+
+	  This option allows new kernels to run on old distributions and tools,
+	  which might get confused by /sys/class/block/. Since 2007/2008 all
+	  major distributions and tools handle this just fine.
+
+	  Recent distributions and userspace tools after 2009/2010 depend on
+	  the existence of /sys/class/block/, and will not work with this
+	  option enabled.
+
+	  Only if you are using a new kernel on an old distribution, you might
+	  need to say Y here. Never say Y, if the original kernel, that came
+	  with your distribution, has not set this option.
 
 config RELAY
 	bool "Kernel->user space relay support (formerly relayfs)"
diff --git a/sound/core/init.c b/sound/core/init.c
index ec4a50ce5656..f7c3df8b521b 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -395,12 +395,10 @@ int snd_card_disconnect(struct snd_card *card)
 		snd_printk(KERN_ERR "not all devices for card %i can be disconnected\n", card->number);
 
 	snd_info_card_disconnect(card);
-#ifndef CONFIG_SYSFS_DEPRECATED
 	if (card->card_dev) {
 		device_unregister(card->card_dev);
 		card->card_dev = NULL;
 	}
-#endif
 #ifdef CONFIG_PM
 	wake_up(&card->power_sleep);
 #endif
@@ -573,7 +571,6 @@ void snd_card_set_id(struct snd_card *card, const char *nid)
 }
 EXPORT_SYMBOL(snd_card_set_id);
 
-#ifndef CONFIG_SYSFS_DEPRECATED
 static ssize_t
 card_id_show_attr(struct device *dev,
 		  struct device_attribute *attr, char *buf)
@@ -630,7 +627,6 @@ card_number_show_attr(struct device *dev,
 
 static struct device_attribute card_number_attrs =
 	__ATTR(number, S_IRUGO, card_number_show_attr, NULL);
-#endif /* CONFIG_SYSFS_DEPRECATED */
 
 /**
  *  snd_card_register - register the soundcard
@@ -649,7 +645,7 @@ int snd_card_register(struct snd_card *card)
 
 	if (snd_BUG_ON(!card))
 		return -EINVAL;
-#ifndef CONFIG_SYSFS_DEPRECATED
+
 	if (!card->card_dev) {
 		card->card_dev = device_create(sound_class, card->dev,
 					       MKDEV(0, 0), card,
@@ -657,7 +653,7 @@ int snd_card_register(struct snd_card *card)
 		if (IS_ERR(card->card_dev))
 			card->card_dev = NULL;
 	}
-#endif
+
 	if ((err = snd_device_register_all(card)) < 0)
 		return err;
 	mutex_lock(&snd_card_mutex);
@@ -674,7 +670,6 @@ int snd_card_register(struct snd_card *card)
 	if (snd_mixer_oss_notify_callback)
 		snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_REGISTER);
 #endif
-#ifndef CONFIG_SYSFS_DEPRECATED
 	if (card->card_dev) {
 		err = device_create_file(card->card_dev, &card_id_attrs);
 		if (err < 0)
@@ -683,7 +678,7 @@ int snd_card_register(struct snd_card *card)
 		if (err < 0)
 			return err;
 	}
-#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From e52eec13cd6b7f30ab19081b387813e03e592ae5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 8 Sep 2010 16:54:17 +0200
Subject: SYSFS: Allow boot time switching between deprecated and modern sysfs
 layout

I have some systems which need legacy sysfs due to old tools that are
making assumptions that a directory can never be a symlink to another
directory, and it's a big hazzle to compile separate kernels for them.

This patch turns CONFIG_SYSFS_DEPRECATED into a run time option
that can be switched on/off the kernel command line. This way
the same binary can be used in both cases with just a option
on the command line.

The old CONFIG_SYSFS_DEPRECATED_V2 option is still there to set
the default. I kept the weird name to not break existing
config files.

Also the compat code can be still completely disabled by undefining
CONFIG_SYSFS_DEPRECATED_SWITCH -- just the optimizer takes
care of this now instead of lots of ifdefs. This makes the code
look nicer.

v2: This is an updated version on top of Kay's patch to only
handle the block devices. I tested it on my old systems
and that seems to work.

Cc: axboe@kernel.dk
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kernel-parameters.txt |  9 +++++++++
 block/genhd.c                       |  7 ++-----
 drivers/base/class.c                |  4 ++--
 drivers/base/core.c                 | 26 +++++++++++++++++---------
 fs/partitions/check.c               | 19 +++++++++----------
 include/linux/device.h              |  7 +++++++
 init/Kconfig                        | 26 ++++++++++++++++++++++----
 7 files changed, 68 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3d854c9ae53a..67fa3fdc128f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2365,6 +2365,15 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	switches=	[HW,M68k]
 
+	sysfs.deprecated=0|1 [KNL]
+			Enable/disable old style sysfs layout for old udev
+			on older distributions. When this option is enabled
+			very new udev will not work anymore. When this option
+			is disabled (or CONFIG_SYSFS_DEPRECATED not compiled)
+			in older udev will not work anymore.
+			Default depends on CONFIG_SYSFS_DEPRECATED_V2 set in
+			the kernel configuration.
+
 	sysrq_always_enabled
 			[KNL]
 			Ignore sysrq setting - this boot parameter will
diff --git a/block/genhd.c b/block/genhd.c
index 59a2db6fecef..4e28a840bde0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -22,9 +22,7 @@
 #include "blk.h"
 
 static DEFINE_MUTEX(block_class_lock);
-#ifndef CONFIG_SYSFS_DEPRECATED
 struct kobject *block_depr;
-#endif
 
 /* for extended dynamic devt allocation, currently only one major is used */
 #define MAX_EXT_DEVT		(1 << MINORBITS)
@@ -803,10 +801,9 @@ static int __init genhd_device_init(void)
 
 	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 
-#ifndef CONFIG_SYSFS_DEPRECATED
 	/* create top-level block dir */
-	block_depr = kobject_create_and_add("block", NULL);
-#endif
+	if (!sysfs_deprecated)
+		block_depr = kobject_create_and_add("block", NULL);
 	return 0;
 }
 
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 1078969889fd..9c63a5687d69 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -184,9 +184,9 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	if (!cls->dev_kobj)
 		cls->dev_kobj = sysfs_dev_char_kobj;
 
-#if defined(CONFIG_SYSFS_DEPRECATED) && defined(CONFIG_BLOCK)
+#if defined(CONFIG_BLOCK)
 	/* let the block class directory show up in the root of sysfs */
-	if (cls != &block_class)
+	if (!sysfs_deprecated || cls != &block_class)
 		cp->class_subsys.kobj.kset = class_kset;
 #else
 	cp->class_subsys.kobj.kset = class_kset;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6cf9069f3150..f7f906f0a2f2 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -26,6 +26,19 @@
 #include "base.h"
 #include "power/power.h"
 
+#ifdef CONFIG_SYSFS_DEPRECATED
+#ifdef CONFIG_SYSFS_DEPRECATED_V2
+long sysfs_deprecated = 1;
+#else
+long sysfs_deprecated = 0;
+#endif
+static __init int sysfs_deprecated_setup(char *arg)
+{
+	return strict_strtol(arg, 10, &sysfs_deprecated);
+}
+early_param("sysfs.deprecated", sysfs_deprecated_setup);
+#endif
+
 int (*platform_notify)(struct device *dev) = NULL;
 int (*platform_notify_remove)(struct device *dev) = NULL;
 static struct kobject *dev_kobj;
@@ -617,14 +630,13 @@ static struct kobject *get_device_parent(struct device *dev,
 		struct kobject *parent_kobj;
 		struct kobject *k;
 
-#ifdef CONFIG_SYSFS_DEPRECATED
 		/* block disks show up in /sys/block */
-		if (dev->class == &block_class) {
+		if (sysfs_deprecated && dev->class == &block_class) {
 			if (parent && parent->class == &block_class)
 				return &parent->kobj;
 			return &block_class.p->class_subsys.kobj;
 		}
-#endif
+
 		/*
 		 * If we have no parent, we live in "virtual".
 		 * Class-devices with a non class-device as parent, live
@@ -707,11 +719,9 @@ static int device_add_class_symlinks(struct device *dev)
 			goto out_subsys;
 	}
 
-#ifdef CONFIG_SYSFS_DEPRECATED
 	/* /sys/block has directories and does not need symlinks */
-	if (dev->class == &block_class)
+	if (sysfs_deprecated && dev->class == &block_class)
 		return 0;
-#endif
 
 	/* link in the class directory pointing to the device */
 	error = sysfs_create_link(&dev->class->p->class_subsys.kobj,
@@ -738,10 +748,8 @@ static void device_remove_class_symlinks(struct device *dev)
 	if (dev->parent && device_is_not_partition(dev))
 		sysfs_remove_link(&dev->kobj, "device");
 	sysfs_remove_link(&dev->kobj, "subsystem");
-#ifdef CONFIG_SYSFS_DEPRECATED
-	if (dev->class == &block_class)
+	if (sysfs_deprecated && dev->class == &block_class)
 		return;
-#endif
 	sysfs_delete_link(&dev->class->p->class_subsys.kobj, &dev->kobj, dev_name(dev));
 }
 
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 79fbf3f390f0..137bf9787853 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -513,14 +513,14 @@ void register_disk(struct gendisk *disk)
 
 	if (device_add(ddev))
 		return;
-#ifndef CONFIG_SYSFS_DEPRECATED
-	err = sysfs_create_link(block_depr, &ddev->kobj,
-				kobject_name(&ddev->kobj));
-	if (err) {
-		device_del(ddev);
-		return;
+	if (!sysfs_deprecated) {
+		err = sysfs_create_link(block_depr, &ddev->kobj,
+					kobject_name(&ddev->kobj));
+		if (err) {
+			device_del(ddev);
+			return;
+		}
 	}
-#endif
 	disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
 	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
@@ -737,8 +737,7 @@ void del_gendisk(struct gendisk *disk)
 	kobject_put(disk->part0.holder_dir);
 	kobject_put(disk->slave_dir);
 	disk->driverfs_dev = NULL;
-#ifndef CONFIG_SYSFS_DEPRECATED
-	sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
-#endif
+	if (!sysfs_deprecated)
+		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 	device_del(disk_to_dev(disk));
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 516fecacf27b..dd4895313468 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -751,4 +751,11 @@ do {						     \
 	MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_CHARDEV_MAJOR(major) \
 	MODULE_ALIAS("char-major-" __stringify(major) "-*")
+
+#ifdef CONFIG_SYSFS_DEPRECATED
+extern long sysfs_deprecated;
+#else
+#define sysfs_deprecated 0
+#endif
+
 #endif /* _DEVICE_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 137609f33ebc..d742b6fca8d2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -660,8 +660,12 @@ config SYSFS_DEPRECATED
 	depends on SYSFS
 	default n
 	help
-	  This option switches the layout of the "block" class devices, to not
-	  show up in /sys/class/block/, but only in /sys/block/.
+	  This option adds code that switches the layout of the "block" class
+	  devices, to not show up in /sys/class/block/, but only in
+	  /sys/block/.
+
+	  This switch is only active when the sysfs.deprecated=1 boot option is
+	  passed or the SYSFS_DEPRECATED_V2 option is set.
 
 	  This option allows new kernels to run on old distributions and tools,
 	  which might get confused by /sys/class/block/. Since 2007/2008 all
@@ -672,8 +676,22 @@ config SYSFS_DEPRECATED
 	  option enabled.
 
 	  Only if you are using a new kernel on an old distribution, you might
-	  need to say Y here. Never say Y, if the original kernel, that came
-	  with your distribution, has not set this option.
+	  need to say Y here.
+
+config SYSFS_DEPRECATED_V2
+	bool "enabled deprecated sysfs features by default"
+	default n
+	depends on SYSFS
+	depends on SYSFS_DEPRECATED
+	help
+	  Enable deprecated sysfs by default.
+
+	  See the CONFIG_SYSFS_DEPRECATED option for more details about this
+	  option.
+
+	  Only if you are using a new kernel on an old distribution, you might
+	  need to say Y here. Even then, odds are you would not need it
+	  enabled, you can always pass the boot option if absolutely necessary.
 
 config RELAY
 	bool "Kernel->user space relay support (formerly relayfs)"
-- 
cgit v1.2.3


From 6427a7655afd7f07dfa83736defd1d94656c83e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Tue, 14 Sep 2010 11:37:36 -0700
Subject: uio: Cleanup irq handling.

Change the value of UIO_IRQ_NONE -2 to 0.  0 is well defined in the rest
of the kernel as the value to indicate an irq has not been assigned.

Update the calls to request_irq and free_irq to only ignore UIO_IRQ_NONE
and UIO_IRQ_CUSTOM allowing the rest of the kernel's possible irq
numbers to be used.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/uio/uio.c          | 14 +++++++-------
 include/linux/uio_driver.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 8132288920b2..0fd2cda74244 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -512,7 +512,7 @@ static unsigned int uio_poll(struct file *filep, poll_table *wait)
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
 
-	if (idev->info->irq == UIO_IRQ_NONE)
+	if (!idev->info->irq)
 		return -EIO;
 
 	poll_wait(filep, &idev->wait, wait);
@@ -530,7 +530,7 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
 	ssize_t retval;
 	s32 event_count;
 
-	if (idev->info->irq == UIO_IRQ_NONE)
+	if (!idev->info->irq)
 		return -EIO;
 
 	if (count != sizeof(s32))
@@ -578,7 +578,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	ssize_t retval;
 	s32 irq_on;
 
-	if (idev->info->irq == UIO_IRQ_NONE)
+	if (!idev->info->irq)
 		return -EIO;
 
 	if (count != sizeof(s32))
@@ -825,9 +825,9 @@ int __uio_register_device(struct module *owner,
 
 	info->uio_dev = idev;
 
-	if (idev->info->irq >= 0) {
-		ret = request_irq(idev->info->irq, uio_interrupt,
-				  idev->info->irq_flags, idev->info->name, idev);
+	if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
+		ret = request_irq(info->irq, uio_interrupt,
+				  info->irq_flags, info->name, idev);
 		if (ret)
 			goto err_request_irq;
 	}
@@ -863,7 +863,7 @@ void uio_unregister_device(struct uio_info *info)
 
 	uio_free_minor(idev);
 
-	if (info->irq >= 0)
+	if (info->irq && (info->irq != UIO_IRQ_CUSTOM))
 		free_irq(info->irq, idev);
 
 	uio_dev_del_attributes(idev);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 5dcc9ff72f69..d6188e5a52df 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -108,7 +108,7 @@ extern void uio_event_notify(struct uio_info *info);
 
 /* defines for uio_info->irq */
 #define UIO_IRQ_CUSTOM	-1
-#define UIO_IRQ_NONE	-2
+#define UIO_IRQ_NONE	0
 
 /* defines for uio_mem->memtype */
 #define UIO_MEM_NONE	0
-- 
cgit v1.2.3


From c25d1dfbd403209025df41a737f82ce8f43d93f5 Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Wed, 29 Sep 2010 14:00:54 -0500
Subject: kobject: Introduce kset_find_obj_hinted.

One call chain getting to kset_find_obj is:
  link_mem_sections()
    find_mem_section()
      kset_find_obj()

This is done during boot.  The memory sections were added in a linearly
increasing order and link_mem_sections tends to utilize them in that
same linear order.

Introduce a kset_find_obj_hinted which is passed the result of the
previous kset_find_obj which it uses for a quick "is the next object
our desired object" check before falling back to the old behavior.

Signed-off-by: Robin Holt <holt@sgi.com>
To: Robert P. J. Day <rpjday@crashcourse.ca>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h |  2 ++
 lib/kobject.c           | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7950a37a7146..8f6d12151048 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -191,6 +191,8 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj)
 }
 
 extern struct kobject *kset_find_obj(struct kset *, const char *);
+extern struct kobject *kset_find_obj_hinted(struct kset *, const char *,
+						struct kobject *);
 
 /* The global /sys/kernel/ kobject for people to chain off of */
 extern struct kobject *kernel_kobj;
diff --git a/lib/kobject.c b/lib/kobject.c
index f07c57252e82..82dc34c095c2 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -745,18 +745,57 @@ void kset_unregister(struct kset *k)
  * take a reference and return the object.
  */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
+{
+	return kset_find_obj_hinted(kset, name, NULL);
+}
+
+/**
+ * kset_find_obj_hinted - search for object in kset given a predecessor hint.
+ * @kset: kset we're looking in.
+ * @name: object's name.
+ * @hint: hint to possible object's predecessor.
+ *
+ * Check the hint's next object and if it is a match return it directly,
+ * otherwise, fall back to the behavior of kset_find_obj().  Either way
+ * a reference for the returned object is held and the reference on the
+ * hinted object is released.
+ */
+struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
+				     struct kobject *hint)
 {
 	struct kobject *k;
 	struct kobject *ret = NULL;
 
 	spin_lock(&kset->list_lock);
+
+	if (!hint)
+		goto slow_search;
+
+	/* end of list detection */
+	if (hint->entry.next == kset->list.next)
+		goto slow_search;
+
+	k = container_of(hint->entry.next, struct kobject, entry);
+	if (!kobject_name(k) || strcmp(kobject_name(k), name))
+		goto slow_search;
+
+	ret = kobject_get(k);
+	goto unlock_exit;
+
+slow_search:
 	list_for_each_entry(k, &kset->list, entry) {
 		if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
 			ret = kobject_get(k);
 			break;
 		}
 	}
+
+unlock_exit:
 	spin_unlock(&kset->list_lock);
+
+	if (hint)
+		kobject_put(hint);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 98383031ed77c6eb49ab612166fef9c0efe1604a Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Wed, 29 Sep 2010 14:00:55 -0500
Subject: driver core: Introduce find_memory_block_hinted which utilizes
 kset_find_obj_hinted.

Introduce a find_memory_block_hinted() which utilizes the
recently added kset_find_obj_hinted().

Signed-off-by: Robin Holt <holt@sgi.com>
To: Dave Hansen <haveblue@us.ibm.com>
To: Matt Tolentino <matthew.e.tolentino@intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/memory.c  | 28 ++++++++++++++++++----------
 include/linux/memory.h |  2 ++
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 933442f40321..a7994409b9a5 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -468,28 +468,23 @@ static int add_memory_block(int nid, struct mem_section *section,
 	return ret;
 }
 
-/*
- * For now, we have a linear search to go find the appropriate
- * memory_block corresponding to a particular phys_index. If
- * this gets to be a real problem, we can always use a radix
- * tree or something here.
- *
- * This could be made generic for all sysdev classes.
- */
-struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block_hinted(struct mem_section *section,
+					      struct memory_block *hint)
 {
 	struct kobject *kobj;
 	struct sys_device *sysdev;
 	struct memory_block *mem;
 	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
 
+	kobj = hint ? &hint->sysdev.kobj : NULL;
+
 	/*
 	 * This only works because we know that section == sysdev->id
 	 * slightly redundant with sysdev_register()
 	 */
 	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
 
-	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
+	kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj);
 	if (!kobj)
 		return NULL;
 
@@ -499,6 +494,19 @@ struct memory_block *find_memory_block(struct mem_section *section)
 	return mem;
 }
 
+/*
+ * For now, we have a linear search to go find the appropriate
+ * memory_block corresponding to a particular phys_index. If
+ * this gets to be a real problem, we can always use a radix
+ * tree or something here.
+ *
+ * This could be made generic for all sysdev classes.
+ */
+struct memory_block *find_memory_block(struct mem_section *section)
+{
+	return find_memory_block_hinted(section, NULL);
+}
+
 int remove_memory_block(unsigned long node_id, struct mem_section *section,
 		int phys_device)
 {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 85582e1bcee9..c4f3127dbd48 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -113,6 +113,8 @@ extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
 extern int memory_isolate_notify(unsigned long val, void *v);
+extern struct memory_block *find_memory_block_hinted(struct mem_section *,
+							struct memory_block *);
 extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };
-- 
cgit v1.2.3


From 07681215975e05a1454b0afdeef07deb0db626ee Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Tue, 19 Oct 2010 12:46:19 -0500
Subject: Driver core: Add section count to memory_block struct

Add a section count property to the memory_block struct to track the number
of memory sections that have been added/removed from a memory block. This
allows us to know when the last memory section of a memory block has been
removed so we can remove the memory block.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Reviewed-by: Robin Holt <holt@sgi.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/memory.c  | 17 +++++++++++------
 include/linux/memory.h |  2 ++
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5185bcff2de9..cafeaaf0428f 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -490,6 +490,7 @@ static int add_memory_block(int nid, struct mem_section *section,
 
 	mem->phys_index = __section_nr(section);
 	mem->state = state;
+	mem->section_count++;
 	mutex_init(&mem->state_mutex);
 	start_pfn = section_nr_to_pfn(mem->phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
@@ -519,12 +520,16 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 
 	mutex_lock(&mem_sysfs_mutex);
 	mem = find_memory_block(section);
-	unregister_mem_sect_under_nodes(mem);
-	mem_remove_simple_file(mem, phys_index);
-	mem_remove_simple_file(mem, state);
-	mem_remove_simple_file(mem, phys_device);
-	mem_remove_simple_file(mem, removable);
-	unregister_memory(mem, section);
+
+	mem->section_count--;
+	if (mem->section_count == 0) {
+		unregister_mem_sect_under_nodes(mem);
+		mem_remove_simple_file(mem, phys_index);
+		mem_remove_simple_file(mem, state);
+		mem_remove_simple_file(mem, phys_device);
+		mem_remove_simple_file(mem, removable);
+		unregister_memory(mem, section);
+	}
 
 	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index c4f3127dbd48..06c1fa0a5c7b 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -23,6 +23,8 @@
 struct memory_block {
 	unsigned long phys_index;
 	unsigned long state;
+	int section_count;
+
 	/*
 	 * This serializes all state change requests.  It isn't
 	 * held during creation because the control files are
-- 
cgit v1.2.3


From 30004ac9c090dcdcca99556b4587b3bad828731a Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Mon, 9 Aug 2010 18:22:49 +0400
Subject: tty: add tty_struct->dev pointer to corresponding device instance

Some device drivers (mostly tty line disciplines) would like to have way
know a struct device instance corresponding to passed tty_struct. Add
a struct device pointer to struct tty_struct and populate it during
initialize_tty_struct().

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_io.c | 17 +++++++++++++++++
 include/linux/tty.h   |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 613c852ee0fe..dc184d4b5638 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -183,6 +183,8 @@ struct tty_struct *alloc_tty_struct(void)
 
 void free_tty_struct(struct tty_struct *tty)
 {
+	if (tty->dev)
+		put_device(tty->dev);
 	kfree(tty->write_buf);
 	tty_buffer_free_all(tty);
 	kfree(tty);
@@ -2783,6 +2785,20 @@ void do_SAK(struct tty_struct *tty)
 
 EXPORT_SYMBOL(do_SAK);
 
+static int dev_match_devt(struct device *dev, void *data)
+{
+	dev_t *devt = data;
+	return dev->devt == *devt;
+}
+
+/* Must put_device() after it's unused! */
+static struct device *tty_get_device(struct tty_struct *tty)
+{
+	dev_t devt = tty_devnum(tty);
+	return class_find_device(tty_class, NULL, &devt, dev_match_devt);
+}
+
+
 /**
  *	initialize_tty_struct
  *	@tty: tty to initialize
@@ -2823,6 +2839,7 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->ops = driver->ops;
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
+	tty->dev = tty_get_device(tty);
 }
 
 /**
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 67d64e6efe7a..d94eb86266c4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -256,6 +256,7 @@ struct tty_operations;
 struct tty_struct {
 	int	magic;
 	struct kref kref;
+	struct device *dev;
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
-- 
cgit v1.2.3


From f573bd1764f0f3f47754ca1ae7b2eb2909798a60 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@kernel.org>
Date: Tue, 24 Aug 2010 07:48:34 +0300
Subject: tty: Remove __GFP_NOFAIL from tty_add_file()

This patch removes __GFP_NOFAIL use from tty_add_file() and adds proper error
handling to the call-sites of the function.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/pty.c    |  4 +++-
 drivers/char/tty_io.c | 15 +++++++++++----
 include/linux/tty.h   |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index c350d01716bd..923a48585501 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -676,7 +676,9 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 
-	tty_add_file(tty, filp);
+	retval = tty_add_file(tty, filp);
+	if (retval)
+		goto out;
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index dc184d4b5638..d6c659f2f659 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -196,12 +196,13 @@ static inline struct tty_struct *file_tty(struct file *file)
 }
 
 /* Associate a new file with the tty structure */
-void tty_add_file(struct tty_struct *tty, struct file *file)
+int tty_add_file(struct tty_struct *tty, struct file *file)
 {
 	struct tty_file_private *priv;
 
-	/* XXX: must implement proper error handling in callers */
-	priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
 	priv->tty = tty;
 	priv->file = file;
@@ -210,6 +211,8 @@ void tty_add_file(struct tty_struct *tty, struct file *file)
 	spin_lock(&tty_files_lock);
 	list_add(&priv->list, &tty->tty_files);
 	spin_unlock(&tty_files_lock);
+
+	return 0;
 }
 
 /* Delete file from its tty */
@@ -1877,7 +1880,11 @@ got_driver:
 		return PTR_ERR(tty);
 	}
 
-	tty_add_file(tty, filp);
+	retval = tty_add_file(tty, filp);
+	if (retval) {
+		tty_unlock();
+		return retval;
+	}
 
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d94eb86266c4..86be0cdeb11b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -466,7 +466,7 @@ extern void proc_clear_tty(struct task_struct *p);
 extern struct tty_struct *get_current_tty(void);
 extern void tty_default_fops(struct file_operations *fops);
 extern struct tty_struct *alloc_tty_struct(void);
-extern void tty_add_file(struct tty_struct *tty, struct file *file);
+extern int tty_add_file(struct tty_struct *tty, struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
 extern void initialize_tty_struct(struct tty_struct *tty,
 		struct tty_driver *driver, int idx);
-- 
cgit v1.2.3


From 8bc3372d9e57db3c65cf00cea6cf14969875b055 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 20 Aug 2010 17:14:04 -0400
Subject: ioctl: Use asm-generic/ioctls.h on cris (enables termiox)

This patch converts cris to use asm-generic/ioctls.h instead of its
own version.

The differences between the arch-specific version and the generic
version are as follows:

- CRIS defines two ioctls: TIOCSERSETRS485 and TIOCSERWRRS485,
  kept in arch-specific portion
- CRIS defines a different value for TIOCSRS485, kept via ifndef in generic
- The generic version adds support for termiox

Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/cris/include/asm/ioctls.h | 84 +-----------------------------------------
 include/asm-generic/ioctls.h   |  2 +
 2 files changed, 3 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/arch/cris/include/asm/ioctls.h b/arch/cris/include/asm/ioctls.h
index c9129ed37443..488fbb3f5e84 100644
--- a/arch/cris/include/asm/ioctls.h
+++ b/arch/cris/include/asm/ioctls.h
@@ -1,93 +1,11 @@
 #ifndef __ARCH_CRIS_IOCTLS_H__
 #define __ARCH_CRIS_IOCTLS_H__
 
-/* verbatim copy of asm-i386/ioctls.h */
-
-#include <asm/ioctl.h>
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS		0x5401
-#define TCSETS		0x5402
-#define TCSETSW		0x5403
-#define TCSETSF		0x5404
-#define TCGETA		0x5405
-#define TCSETA		0x5406
-#define TCSETAW		0x5407
-#define TCSETAF		0x5408
-#define TCSBRK		0x5409
-#define TCXONC		0x540A
-#define TCFLSH		0x540B
-#define TIOCEXCL	0x540C
-#define TIOCNXCL	0x540D
-#define TIOCSCTTY	0x540E
-#define TIOCGPGRP	0x540F
-#define TIOCSPGRP	0x5410
-#define TIOCOUTQ	0x5411
-#define TIOCSTI		0x5412
-#define TIOCGWINSZ	0x5413
-#define TIOCSWINSZ	0x5414
-#define TIOCMGET	0x5415
-#define TIOCMBIS	0x5416
-#define TIOCMBIC	0x5417
-#define TIOCMSET	0x5418
-#define TIOCGSOFTCAR	0x5419
-#define TIOCSSOFTCAR	0x541A
-#define FIONREAD	0x541B
-#define TIOCINQ		FIONREAD
-#define TIOCLINUX	0x541C
-#define TIOCCONS	0x541D
-#define TIOCGSERIAL	0x541E
-#define TIOCSSERIAL	0x541F
-#define TIOCPKT		0x5420
-#define FIONBIO		0x5421
-#define TIOCNOTTY	0x5422
-#define TIOCSETD	0x5423
-#define TIOCGETD	0x5424
-#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
-#define TIOCSBRK	0x5427  /* BSD compatibility */
-#define TIOCCBRK	0x5428  /* BSD compatibility */
-#define TIOCGSID	0x5429  /* Return the session ID of FD */
-#define TCGETS2		_IOR('T',0x2A, struct termios2)
-#define TCSETS2		_IOW('T',0x2B, struct termios2)
-#define TCSETSW2	_IOW('T',0x2C, struct termios2)
-#define TCSETSF2	_IOW('T',0x2D, struct termios2)
-#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
-#define TIOCSIG		_IOW('T',0x36, int)  /* Generate signal on Pty slave */
-
-#define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
-#define FIOCLEX		0x5451
-#define FIOASYNC	0x5452
-#define TIOCSERCONFIG	0x5453
-#define TIOCSERGWILD	0x5454
-#define TIOCSERSWILD	0x5455
-#define TIOCGLCKTRMIOS	0x5456
-#define TIOCSLCKTRMIOS	0x5457
 #define TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
-#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
-#define FIOQSIZE	0x5460
-
 #define TIOCSERSETRS485	0x5461  /* enable rs-485 (deprecated) */
 #define TIOCSERWRRS485	0x5462  /* write rs-485 */
 #define TIOCSRS485	0x5463  /* enable rs-485 */
-#define TIOCGRS485	0x542E  /* get rs-485 */
-
-/* Used for packet mode */
-#define TIOCPKT_DATA		 0
-#define TIOCPKT_FLUSHREAD	 1
-#define TIOCPKT_FLUSHWRITE	 2
-#define TIOCPKT_STOP		 4
-#define TIOCPKT_START		 8
-#define TIOCPKT_NOSTOP		16
-#define TIOCPKT_DOSTOP		32
-#define TIOCPKT_IOCTL		64
 
-#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+#include <asm-generic/ioctls.h>
 
 #endif
diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h
index 8554cb6a81b9..a3216655d657 100644
--- a/include/asm-generic/ioctls.h
+++ b/include/asm-generic/ioctls.h
@@ -62,7 +62,9 @@
 #define TCSETSW2	_IOW('T', 0x2C, struct termios2)
 #define TCSETSF2	_IOW('T', 0x2D, struct termios2)
 #define TIOCGRS485	0x542E
+#ifndef TIOCSRS485
 #define TIOCSRS485	0x542F
+#endif
 #define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
 #define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
 #define TCGETX		0x5432 /* SYS5 TCGETX compatibility */
-- 
cgit v1.2.3


From d281da7ff6f70efca0553c288bb883e8605b3862 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 16 Sep 2010 18:21:24 +0100
Subject: tty: Make tiocgicount a handler

Dan Rosenberg noted that various drivers return the struct with uncleared
fields. Instead of spending forever trying to stomp all the drivers that
get it wrong (and every new driver) do the job in one place.

This first patch adds the needed operations and hooks them up, including
the needed USB midlayer and serial core plumbing.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_io.c           | 21 +++++++++++++++++++++
 drivers/serial/serial_core.c    | 35 ++++++++++++++++-------------------
 drivers/usb/serial/usb-serial.c | 13 +++++++++++++
 include/linux/tty_driver.h      |  9 +++++++++
 include/linux/usb/serial.h      |  2 ++
 5 files changed, 61 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index e185db36f8ad..c05c5af5aa04 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -96,6 +96,7 @@
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
+#include <linux/serial.h>
 
 #include <linux/uaccess.h>
 #include <asm/system.h>
@@ -2511,6 +2512,20 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int
 	return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
+{
+	int retval = -EINVAL;
+	struct serial_icounter_struct icount;
+	memset(&icount, 0, sizeof(icount));
+	if (tty->ops->get_icount)
+		retval = tty->ops->get_icount(tty, &icount);
+	if (retval != 0)
+		return retval;
+	if (copy_to_user(arg, &icount, sizeof(icount)))
+		return -EFAULT;
+	return 0;
+}
+
 struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
 {
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
@@ -2631,6 +2646,12 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCMBIC:
 	case TIOCMBIS:
 		return tty_tiocmset(tty, file, cmd, p);
+	case TIOCGICOUNT:
+		retval = tty_tiocgicount(tty, p);
+		/* For the moment allow fall through to the old method */
+        	if (retval != -EINVAL)
+			return retval;
+		break;
 	case TCFLSH:
 		switch (arg) {
 		case TCIFLUSH:
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index bc6cddd10294..c4ea14670d44 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1074,10 +1074,10 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
  * NB: both 1->0 and 0->1 transitions are counted except for
  *     RI where only 0->1 is counted.
  */
-static int uart_get_count(struct uart_state *state,
-			  struct serial_icounter_struct __user *icnt)
+static int uart_get_icount(struct tty_struct *tty,
+			  struct serial_icounter_struct *icount)
 {
-	struct serial_icounter_struct icount;
+	struct uart_state *state = tty->driver_data;
 	struct uart_icount cnow;
 	struct uart_port *uport = state->uart_port;
 
@@ -1085,19 +1085,19 @@ static int uart_get_count(struct uart_state *state,
 	memcpy(&cnow, &uport->icount, sizeof(struct uart_icount));
 	spin_unlock_irq(&uport->lock);
 
-	icount.cts         = cnow.cts;
-	icount.dsr         = cnow.dsr;
-	icount.rng         = cnow.rng;
-	icount.dcd         = cnow.dcd;
-	icount.rx          = cnow.rx;
-	icount.tx          = cnow.tx;
-	icount.frame       = cnow.frame;
-	icount.overrun     = cnow.overrun;
-	icount.parity      = cnow.parity;
-	icount.brk         = cnow.brk;
-	icount.buf_overrun = cnow.buf_overrun;
+	icount->cts         = cnow.cts;
+	icount->dsr         = cnow.dsr;
+	icount->rng         = cnow.rng;
+	icount->dcd         = cnow.dcd;
+	icount->rx          = cnow.rx;
+	icount->tx          = cnow.tx;
+	icount->frame       = cnow.frame;
+	icount->overrun     = cnow.overrun;
+	icount->parity      = cnow.parity;
+	icount->brk         = cnow.brk;
+	icount->buf_overrun = cnow.buf_overrun;
 
-	return copy_to_user(icnt, &icount, sizeof(icount)) ? -EFAULT : 0;
+	return 0;
 }
 
 /*
@@ -1150,10 +1150,6 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	case TIOCMIWAIT:
 		ret = uart_wait_modem_status(state, arg);
 		break;
-
-	case TIOCGICOUNT:
-		ret = uart_get_count(state, uarg);
-		break;
 	}
 
 	if (ret != -ENOIOCTLCMD)
@@ -2295,6 +2291,7 @@ static const struct tty_operations uart_ops = {
 #endif
 	.tiocmget	= uart_tiocmget,
 	.tiocmset	= uart_tiocmset,
+	.get_icount	= uart_get_icount,
 #ifdef CONFIG_CONSOLE_POLL
 	.poll_init	= uart_poll_init,
 	.poll_get_char	= uart_poll_get_char,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 7a2177c79bde..e64da74bdcc5 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -519,6 +519,18 @@ static int serial_tiocmset(struct tty_struct *tty, struct file *file,
 	return -EINVAL;
 }
 
+static int serial_get_icount(struct tty_struct *tty,
+				struct serial_icounter_struct *icount)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	dbg("%s - port %d", __func__, port->number);
+
+	if (port->serial->type->get_icount)
+		return port->serial->type->get_icount(tty, icount);
+	return -EINVAL;
+}
+
 /*
  * We would be calling tty_wakeup here, but unfortunately some line
  * disciplines have an annoying habit of calling tty->write from
@@ -1195,6 +1207,7 @@ static const struct tty_operations serial_ops = {
 	.chars_in_buffer =	serial_chars_in_buffer,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
+	.get_icount = 		serial_get_icount,
 	.cleanup = 		serial_cleanup,
 	.install = 		serial_install,
 	.proc_fops =		&serial_proc_fops,
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index b08677982525..db2d227694da 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -224,6 +224,12 @@
  *	unless the tty also has a valid tty->termiox pointer.
  *
  *	Optional: Called under the termios lock
+ *
+ * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount);
+ *
+ *	Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel
+ *	structure to complete. This method is optional and will only be called
+ *	if provided (otherwise EINVAL will be returned).
  */
 
 #include <linux/fs.h>
@@ -232,6 +238,7 @@
 
 struct tty_struct;
 struct tty_driver;
+struct serial_icounter_struct;
 
 struct tty_operations {
 	struct tty_struct * (*lookup)(struct tty_driver *driver,
@@ -268,6 +275,8 @@ struct tty_operations {
 			unsigned int set, unsigned int clear);
 	int (*resize)(struct tty_struct *tty, struct winsize *ws);
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
+	int (*get_icount)(struct tty_struct *tty,
+				struct serial_icounter_struct *icount);
 #ifdef CONFIG_CONSOLE_POLL
 	int (*poll_init)(struct tty_driver *driver, int line, char *options);
 	int (*poll_get_char)(struct tty_driver *driver, int line);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 55675b1efb28..16d682f4f7c3 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -271,6 +271,8 @@ struct usb_serial_driver {
 	int  (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int  (*tiocmset)(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
+	int  (*get_icount)(struct tty_struct *tty,
+			struct serial_icounter_struct *icount);
 	/* Called by the tty layer for port level work. There may or may not
 	   be an attached tty at this point */
 	void (*dtr_rts)(struct usb_serial_port *port, int on);
-- 
cgit v1.2.3


From 432c9ed22aff641039ccd400cdabf983fabc285e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Fri, 1 Oct 2010 00:10:44 -0400
Subject: vcs: invoke the vt update callback when /dev/vcs* is written to

A notifier chain is called whenever the vt code modifies a terminal
content, except for one case which is when the modification comes
through writes to /dev/vcs* devices.  Let's add the missing notifier
invocation at the end of vcs_write() for that case too.

Signed-off-by: Nicolas Pitre <nicolas.pitre@canonical.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vc_screen.c  | 2 ++
 drivers/char/vt.c         | 5 +++++
 include/linux/selection.h | 1 +
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vc_screen.c b/drivers/char/vc_screen.c
index 6f7054e1a516..273ab44cc91d 100644
--- a/drivers/char/vc_screen.c
+++ b/drivers/char/vc_screen.c
@@ -538,6 +538,8 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	}
 	*ppos += written;
 	ret = written;
+	if (written)
+		vcs_scr_updated(vc);
 
 unlock_out:
 	release_console_sem();
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 281aada7b4a1..a8ec48ed14d9 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -4182,6 +4182,11 @@ void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org)
 	}
 }
 
+void vcs_scr_updated(struct vc_data *vc)
+{
+	notify_update(vc);
+}
+
 /*
  *	Visible symbols for modules
  */
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 8cdaa1151d2e..85193aa8c1e3 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -39,5 +39,6 @@ extern void putconsxy(struct vc_data *vc, unsigned char *p);
 
 extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org);
 extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
+extern void vcs_scr_updated(struct vc_data *vc);
 
 #endif
-- 
cgit v1.2.3


From 54381067ed7873e6173d6fe32818a585ad667723 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Fri, 1 Oct 2010 17:21:25 +0400
Subject: serial: Factor out uart_poll_timeout() from 8250 driver

Soon we will use that handy function in the altera_uart driver.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c       | 14 ++++----------
 include/linux/serial_core.h |  8 ++++++++
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 31b8cca179cd..b586406f04ca 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1722,12 +1722,6 @@ static void serial_unlink_irq_chain(struct uart_8250_port *up)
 	mutex_unlock(&hash_mutex);
 }
 
-/* Base timer interval for polling */
-static inline int poll_timeout(int timeout)
-{
-	return timeout > 6 ? (timeout / 2 - 2) : 1;
-}
-
 /*
  * This function is used to handle ports that do not have an
  * interrupt.  This doesn't work very well for 16450's, but gives
@@ -1742,7 +1736,7 @@ static void serial8250_timeout(unsigned long data)
 	iir = serial_in(up, UART_IIR);
 	if (!(iir & UART_IIR_NO_INT))
 		serial8250_handle_port(up);
-	mod_timer(&up->timer, jiffies + poll_timeout(up->port.timeout));
+	mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port));
 }
 
 static void serial8250_backup_timeout(unsigned long data)
@@ -1787,7 +1781,7 @@ static void serial8250_backup_timeout(unsigned long data)
 
 	/* Standard timer interval plus 0.2s to keep the port running */
 	mod_timer(&up->timer,
-		jiffies + poll_timeout(up->port.timeout) + HZ / 5);
+		jiffies + uart_poll_timeout(&up->port) + HZ / 5);
 }
 
 static unsigned int serial8250_tx_empty(struct uart_port *port)
@@ -2071,7 +2065,7 @@ static int serial8250_startup(struct uart_port *port)
 		up->timer.function = serial8250_backup_timeout;
 		up->timer.data = (unsigned long)up;
 		mod_timer(&up->timer, jiffies +
-			  poll_timeout(up->port.timeout) + HZ / 5);
+			uart_poll_timeout(port) + HZ / 5);
 	}
 
 	/*
@@ -2081,7 +2075,7 @@ static int serial8250_startup(struct uart_port *port)
 	 */
 	if (!is_real_interrupt(up->port.irq)) {
 		up->timer.data = (unsigned long)up;
-		mod_timer(&up->timer, jiffies + poll_timeout(up->port.timeout));
+		mod_timer(&up->timer, jiffies + uart_poll_timeout(port));
 	} else {
 		retval = serial_link_irq_chain(up);
 		if (retval)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 563e23400913..ac48082f3559 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -411,6 +411,14 @@ unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios
 				unsigned int max);
 unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud);
 
+/* Base timer interval for polling */
+static inline int uart_poll_timeout(struct uart_port *port)
+{
+	int timeout = port->timeout;
+
+	return timeout > 6 ? (timeout / 2 - 2) : 1;
+}
+
 /*
  * Console helpers.
  */
-- 
cgit v1.2.3


From 0d426eda7c94d864ead913f7099c623521368443 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Fri, 1 Oct 2010 17:21:54 +0400
Subject: altera_uart: Add support for different address strides

Some controllers implement registers with a stride, to support
those we must implement the proper IO accessors.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Acked-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/altera_uart.c | 58 ++++++++++++++++++++++++++++----------------
 include/linux/altera_uart.h  |  1 +
 2 files changed, 38 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/altera_uart.c b/drivers/serial/altera_uart.c
index 1dfeffae5231..f1985aaf2cbc 100644
--- a/drivers/serial/altera_uart.c
+++ b/drivers/serial/altera_uart.c
@@ -82,9 +82,23 @@ struct altera_uart {
 	unsigned short imr;	/* Local IMR mirror */
 };
 
+static u32 altera_uart_readl(struct uart_port *port, int reg)
+{
+	struct altera_uart_platform_uart *platp = port->private_data;
+
+	return readl(port->membase + (reg << platp->bus_shift));
+}
+
+static void altera_uart_writel(struct uart_port *port, u32 dat, int reg)
+{
+	struct altera_uart_platform_uart *platp = port->private_data;
+
+	writel(dat, port->membase + (reg << platp->bus_shift));
+}
+
 static unsigned int altera_uart_tx_empty(struct uart_port *port)
 {
-	return (readl(port->membase + ALTERA_UART_STATUS_REG) &
+	return (altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
 		ALTERA_UART_STATUS_TMT_MSK) ? TIOCSER_TEMT : 0;
 }
 
@@ -93,8 +107,7 @@ static unsigned int altera_uart_get_mctrl(struct uart_port *port)
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
 	unsigned int sigs;
 
-	sigs =
-	    (readl(port->membase + ALTERA_UART_STATUS_REG) &
+	sigs = (altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
 	     ALTERA_UART_STATUS_CTS_MSK) ? TIOCM_CTS : 0;
 	sigs |= (pp->sigs & TIOCM_RTS);
 
@@ -110,7 +123,7 @@ static void altera_uart_set_mctrl(struct uart_port *port, unsigned int sigs)
 		pp->imr |= ALTERA_UART_CONTROL_RTS_MSK;
 	else
 		pp->imr &= ~ALTERA_UART_CONTROL_RTS_MSK;
-	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 }
 
 static void altera_uart_start_tx(struct uart_port *port)
@@ -118,7 +131,7 @@ static void altera_uart_start_tx(struct uart_port *port)
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
 
 	pp->imr |= ALTERA_UART_CONTROL_TRDY_MSK;
-	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 }
 
 static void altera_uart_stop_tx(struct uart_port *port)
@@ -126,7 +139,7 @@ static void altera_uart_stop_tx(struct uart_port *port)
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
 
 	pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK;
-	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 }
 
 static void altera_uart_stop_rx(struct uart_port *port)
@@ -134,7 +147,7 @@ static void altera_uart_stop_rx(struct uart_port *port)
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
 
 	pp->imr &= ~ALTERA_UART_CONTROL_RRDY_MSK;
-	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 }
 
 static void altera_uart_break_ctl(struct uart_port *port, int break_state)
@@ -147,7 +160,7 @@ static void altera_uart_break_ctl(struct uart_port *port, int break_state)
 		pp->imr |= ALTERA_UART_CONTROL_TRBK_MSK;
 	else
 		pp->imr &= ~ALTERA_UART_CONTROL_TRBK_MSK;
-	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
@@ -171,7 +184,7 @@ static void altera_uart_set_termios(struct uart_port *port,
 
 	spin_lock_irqsave(&port->lock, flags);
 	uart_update_timeout(port, termios->c_cflag, baud);
-	writel(baudclk, port->membase + ALTERA_UART_DIVISOR_REG);
+	altera_uart_writel(port, baudclk, ALTERA_UART_DIVISOR_REG);
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
@@ -181,14 +194,15 @@ static void altera_uart_rx_chars(struct altera_uart *pp)
 	unsigned char ch, flag;
 	unsigned short status;
 
-	while ((status = readl(port->membase + ALTERA_UART_STATUS_REG)) &
+	while ((status = altera_uart_readl(port, ALTERA_UART_STATUS_REG)) &
 	       ALTERA_UART_STATUS_RRDY_MSK) {
-		ch = readl(port->membase + ALTERA_UART_RXDATA_REG);
+		ch = altera_uart_readl(port, ALTERA_UART_RXDATA_REG);
 		flag = TTY_NORMAL;
 		port->icount.rx++;
 
 		if (status & ALTERA_UART_STATUS_E_MSK) {
-			writel(status, port->membase + ALTERA_UART_STATUS_REG);
+			altera_uart_writel(port, status,
+					   ALTERA_UART_STATUS_REG);
 
 			if (status & ALTERA_UART_STATUS_BRK_MSK) {
 				port->icount.brk++;
@@ -228,18 +242,18 @@ static void altera_uart_tx_chars(struct altera_uart *pp)
 
 	if (port->x_char) {
 		/* Send special char - probably flow control */
-		writel(port->x_char, port->membase + ALTERA_UART_TXDATA_REG);
+		altera_uart_writel(port, port->x_char, ALTERA_UART_TXDATA_REG);
 		port->x_char = 0;
 		port->icount.tx++;
 		return;
 	}
 
-	while (readl(port->membase + ALTERA_UART_STATUS_REG) &
+	while (altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
 	       ALTERA_UART_STATUS_TRDY_MSK) {
 		if (xmit->head == xmit->tail)
 			break;
-		writel(xmit->buf[xmit->tail],
-		       port->membase + ALTERA_UART_TXDATA_REG);
+		altera_uart_writel(port, xmit->buf[xmit->tail],
+		       ALTERA_UART_TXDATA_REG);
 		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		port->icount.tx++;
 	}
@@ -249,7 +263,7 @@ static void altera_uart_tx_chars(struct altera_uart *pp)
 
 	if (xmit->head == xmit->tail) {
 		pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK;
-		writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+		altera_uart_writel(port, pp->imr, ALTERA_UART_CONTROL_REG);
 	}
 }
 
@@ -259,7 +273,7 @@ static irqreturn_t altera_uart_interrupt(int irq, void *data)
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
 	unsigned int isr;
 
-	isr = readl(port->membase + ALTERA_UART_STATUS_REG) & pp->imr;
+	isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr;
 
 	spin_lock(&port->lock);
 	if (isr & ALTERA_UART_STATUS_RRDY_MSK)
@@ -285,9 +299,9 @@ static void altera_uart_config_port(struct uart_port *port, int flags)
 	port->type = PORT_ALTERA_UART;
 
 	/* Clear mask, so no surprise interrupts. */
-	writel(0, port->membase + ALTERA_UART_CONTROL_REG);
+	altera_uart_writel(port, 0, ALTERA_UART_CONTROL_REG);
 	/* Clear status register */
-	writel(0, port->membase + ALTERA_UART_STATUS_REG);
+	altera_uart_writel(port, 0, ALTERA_UART_STATUS_REG);
 }
 
 static int altera_uart_startup(struct uart_port *port)
@@ -407,6 +421,7 @@ int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp)
 		port->uartclk = platp[i].uartclk;
 		port->flags = ASYNC_BOOT_AUTOCONF;
 		port->ops = &altera_uart_ops;
+		port->private_data = platp;
 	}
 
 	return 0;
@@ -414,7 +429,7 @@ int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp)
 
 static void altera_uart_console_putc(struct uart_port *port, const char c)
 {
-	while (!(readl(port->membase + ALTERA_UART_STATUS_REG) &
+	while (!(altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
 		 ALTERA_UART_STATUS_TRDY_MSK))
 		cpu_relax();
 
@@ -535,6 +550,7 @@ static int __devinit altera_uart_probe(struct platform_device *pdev)
 	port->uartclk = platp->uartclk;
 	port->ops = &altera_uart_ops;
 	port->flags = ASYNC_BOOT_AUTOCONF;
+	port->private_data = platp;
 
 	uart_add_one_port(&altera_uart_driver, port);
 
diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h
index 8d441064a30d..c022c82db7ca 100644
--- a/include/linux/altera_uart.h
+++ b/include/linux/altera_uart.h
@@ -9,6 +9,7 @@ struct altera_uart_platform_uart {
 	unsigned long mapbase;	/* Physical address base */
 	unsigned int irq;	/* Interrupt vector */
 	unsigned int uartclk;	/* UART clock rate */
+	unsigned int bus_shift;	/* Bus shift (address stride) */
 };
 
 #endif /* __ALTUART_H */
-- 
cgit v1.2.3


From 5d89a48acfbaae02e7ecf97d4d8cc570a31964c5 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Fri, 1 Oct 2010 17:22:55 +0400
Subject: altera_uart: Fix missing prototype for registering an early console

Simply add an early_altera_uart_setup() prototype declaration, otherwise
platform code have to do it in .c files, which is ugly.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Acked-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/altera_uart.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h
index c022c82db7ca..a10a90791976 100644
--- a/include/linux/altera_uart.h
+++ b/include/linux/altera_uart.h
@@ -5,6 +5,8 @@
 #ifndef	__ALTUART_H
 #define	__ALTUART_H
 
+#include <linux/init.h>
+
 struct altera_uart_platform_uart {
 	unsigned long mapbase;	/* Physical address base */
 	unsigned int irq;	/* Interrupt vector */
@@ -12,4 +14,6 @@ struct altera_uart_platform_uart {
 	unsigned int bus_shift;	/* Bus shift (address stride) */
 };
 
+int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp);
+
 #endif /* __ALTUART_H */
-- 
cgit v1.2.3


From c161afe9759ddcc174d08e7c4f683d08ac9ba86f Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sat, 25 Sep 2010 15:13:45 +0200
Subject: 8250: allow platforms to override PM hook.

Add a hook for platforms to specify custom pm methods.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c       | 27 ++++++++++++++++-----------
 include/linux/serial_8250.h |  4 ++++
 include/linux/serial_core.h |  2 ++
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index b586406f04ca..6994afb5571a 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -154,12 +154,6 @@ struct uart_8250_port {
 	unsigned char		lsr_saved_flags;
 #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
 	unsigned char		msr_saved_flags;
-
-	/*
-	 * We provide a per-port pm hook.
-	 */
-	void			(*pm)(struct uart_port *port,
-				      unsigned int state, unsigned int old);
 };
 
 struct irq_info {
@@ -2436,16 +2430,24 @@ serial8250_set_ldisc(struct uart_port *port, int new)
 		port->flags &= ~UPF_HARDPPS_CD;
 }
 
-static void
-serial8250_pm(struct uart_port *port, unsigned int state,
-	      unsigned int oldstate)
+
+void serial8250_do_pm(struct uart_port *port, unsigned int state,
+		      unsigned int oldstate)
 {
 	struct uart_8250_port *p = (struct uart_8250_port *)port;
 
 	serial8250_set_sleep(p, state != 0);
+}
+EXPORT_SYMBOL(serial8250_do_pm);
 
-	if (p->pm)
-		p->pm(port, state, oldstate);
+static void
+serial8250_pm(struct uart_port *port, unsigned int state,
+	      unsigned int oldstate)
+{
+	if (port->pm)
+		port->pm(port, state, oldstate);
+	else
+		serial8250_do_pm(port, state, oldstate);
 }
 
 static unsigned int serial8250_port_size(struct uart_8250_port *pt)
@@ -3006,6 +3008,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.serial_in		= p->serial_in;
 		port.serial_out		= p->serial_out;
 		port.set_termios	= p->set_termios;
+		port.pm			= p->pm;
 		port.dev		= &dev->dev;
 		port.irqflags		|= irqflag;
 		ret = serial8250_register_port(&port);
@@ -3172,6 +3175,8 @@ int serial8250_register_port(struct uart_port *port)
 		/*  Possibly override set_termios call */
 		if (port->set_termios)
 			uart->port.set_termios = port->set_termios;
+		if (port->pm)
+			uart->port.pm = port->pm;
 
 		ret = uart_add_one_port(&serial8250_reg, &uart->port);
 		if (ret == 0)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 7638deaaba65..bf9c2bdb2e05 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -35,6 +35,8 @@ struct plat_serial8250_port {
 	void		(*set_termios)(struct uart_port *,
 			               struct ktermios *new,
 			               struct ktermios *old);
+	void		(*pm)(struct uart_port *, unsigned int state,
+			      unsigned old);
 };
 
 /*
@@ -76,5 +78,7 @@ extern int serial8250_find_port_for_earlycon(void);
 extern int setup_early_serial8250_console(char *cmdline);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
+extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
+			     unsigned int oldstate);
 
 #endif
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ac48082f3559..99e5994e6f84 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -289,6 +289,8 @@ struct uart_port {
 	void			(*set_termios)(struct uart_port *,
 				               struct ktermios *new,
 				               struct ktermios *old);
+	void			(*pm)(struct uart_port *, unsigned int state,
+				      unsigned int old);
 	unsigned int		irq;			/* irq number */
 	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
-- 
cgit v1.2.3


From af7f3743567e3d5b40e2f9c21541b7f40b99c103 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 18 Oct 2010 11:38:02 -0700
Subject: serial: abstraction for 8250 legacy ports

Not every platform that has generic legacy 8250 ports manages to have them
clocked the right way or without errata. Provide a generic interface to
allow platforms to override the default behaviour in a manner that dumps
the complexity in *their* code not the 8250 driver.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c       | 17 +++++++++++++++++
 include/linux/serial_8250.h |  4 ++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 6994afb5571a..37f19a678c97 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2672,6 +2672,16 @@ static struct uart_ops serial8250_pops = {
 
 static struct uart_8250_port serial8250_ports[UART_NR];
 
+static void (*serial8250_isa_config)(int port, struct uart_port *up,
+	unsigned short *capabilities);
+
+void serial8250_set_isa_configurator(
+	void (*v)(int port, struct uart_port *up, unsigned short *capabilities))
+{
+	serial8250_isa_config = v;
+}
+EXPORT_SYMBOL(serial8250_set_isa_configurator);
+
 static void __init serial8250_isa_init_ports(void)
 {
 	struct uart_8250_port *up;
@@ -2717,6 +2727,9 @@ static void __init serial8250_isa_init_ports(void)
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
 		set_io_from_upio(&up->port);
 		up->port.irqflags |= irqflag;
+		if (serial8250_isa_config != NULL)
+			serial8250_isa_config(i, &up->port, &up->capabilities);
+
 	}
 }
 
@@ -3178,6 +3191,10 @@ int serial8250_register_port(struct uart_port *port)
 		if (port->pm)
 			uart->port.pm = port->pm;
 
+		if (serial8250_isa_config != NULL)
+			serial8250_isa_config(0, &uart->port,
+					&uart->capabilities);
+
 		ret = uart_add_one_port(&serial8250_reg, &uart->port);
 		if (ret == 0)
 			ret = uart->port.line;
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index bf9c2bdb2e05..97f5b45bbc07 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -81,4 +81,8 @@ extern void serial8250_do_set_termios(struct uart_port *port,
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 
+extern void serial8250_set_isa_configurator(void (*v)
+					(int port, struct uart_port *up,
+						unsigned short *capabilities));
+
 #endif
-- 
cgit v1.2.3


From f0ae849df1cd6b597130d890f2107ee31bf02c19 Mon Sep 17 00:00:00 2001
From: Hao Wu <hao.wu@intel.com>
Date: Thu, 5 Aug 2010 14:17:28 +0100
Subject: usb: Add Intel Langwell USB OTG Transceiver Driver

This adds support for the USB transceiver driver in the Langwell chipset used
on the Intel MID platforms. It folds up the original patch set which includes
basic support for the device, PHY low power mode (Please notice that there is
a limitation, after we drive VBus down, 2ms delay is required from SCU FW to
sync up OTGSC register with USBCFG register), software timers (the hardware
timers do not work in low power mode), HNP, SRP.


Signed-off-by: Hao Wu <hao.wu@intel.com>
Signed-off-by: Alek Du <alek.du@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          |   14 +
 drivers/usb/otg/Makefile         |    1 +
 drivers/usb/otg/langwell_otg.c   | 2408 ++++++++++++++++++++++++++++++++++++++
 include/linux/usb/langwell_otg.h |  139 +++
 4 files changed, 2562 insertions(+)
 create mode 100644 drivers/usb/otg/langwell_otg.c
 create mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 3b1289572d72..299dfd2510cb 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -67,4 +67,18 @@ config NOP_USB_XCEIV
 	 built-in with usb ip or which are autonomous and doesn't require any
 	 phy programming such as ISP1x04 etc.
 
+config USB_LANGWELL_OTG
+	tristate "Intel Langwell USB OTG dual-role support"
+	depends on USB && X86_MRST
+	select USB_OTG
+	select USB_OTG_UTILS
+	help
+	  Say Y here if you want to build Intel Langwell USB OTG
+	  transciever driver in kernel. This driver implements role
+	  switch between EHCI host driver and Langwell USB OTG
+	  client driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called langwell_otg.
+
 endif # USB || OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index aeb49a8ec412..b6609db3a849 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_USB_OTG_UTILS)	+= otg.o
 obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
+obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 obj-$(CONFIG_USB_ULPI)		+= ulpi.o
 
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
new file mode 100644
index 000000000000..879188086daf
--- /dev/null
+++ b/drivers/usb/otg/langwell_otg.c
@@ -0,0 +1,2408 @@
+/*
+ * Intel Langwell USB OTG transceiver driver
+ * Copyright (C) 2008 - 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+/* This driver helps to switch Langwell OTG controller function between host
+ * and peripheral. It works with EHCI driver and Langwell client controller
+ * driver together.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/moduleparam.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb.h>
+#include <linux/usb/otg.h>
+#include <linux/usb/hcd.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <asm/intel_scu_ipc.h>
+
+#include <linux/usb/langwell_otg.h>
+
+#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
+#define	DRIVER_VERSION		"3.0.0.32L.0003"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+
+static const char driver_name[] = "langwell_otg";
+
+static int langwell_otg_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id);
+static void langwell_otg_remove(struct pci_dev *pdev);
+static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
+static int langwell_otg_resume(struct pci_dev *pdev);
+
+static int langwell_otg_set_host(struct otg_transceiver *otg,
+				struct usb_bus *host);
+static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
+				struct usb_gadget *gadget);
+static int langwell_otg_start_srp(struct otg_transceiver *otg);
+
+static const struct pci_device_id pci_ids[] = {{
+	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class_mask =   ~0,
+	.vendor =	0x8086,
+	.device =	0x0811,
+	.subvendor =	PCI_ANY_ID,
+	.subdevice =	PCI_ANY_ID,
+}, { /* end: all zeroes */ }
+};
+
+static struct pci_driver otg_pci_driver = {
+	.name =		(char *) driver_name,
+	.id_table =	pci_ids,
+
+	.probe =	langwell_otg_probe,
+	.remove =	langwell_otg_remove,
+
+	.suspend =	langwell_otg_suspend,
+	.resume =	langwell_otg_resume,
+};
+
+static const char *state_string(enum usb_otg_state state)
+{
+	switch (state) {
+	case OTG_STATE_A_IDLE:
+		return "a_idle";
+	case OTG_STATE_A_WAIT_VRISE:
+		return "a_wait_vrise";
+	case OTG_STATE_A_WAIT_BCON:
+		return "a_wait_bcon";
+	case OTG_STATE_A_HOST:
+		return "a_host";
+	case OTG_STATE_A_SUSPEND:
+		return "a_suspend";
+	case OTG_STATE_A_PERIPHERAL:
+		return "a_peripheral";
+	case OTG_STATE_A_WAIT_VFALL:
+		return "a_wait_vfall";
+	case OTG_STATE_A_VBUS_ERR:
+		return "a_vbus_err";
+	case OTG_STATE_B_IDLE:
+		return "b_idle";
+	case OTG_STATE_B_SRP_INIT:
+		return "b_srp_init";
+	case OTG_STATE_B_PERIPHERAL:
+		return "b_peripheral";
+	case OTG_STATE_B_WAIT_ACON:
+		return "b_wait_acon";
+	case OTG_STATE_B_HOST:
+		return "b_host";
+	default:
+		return "UNDEFINED";
+	}
+}
+
+/* HSM timers */
+static inline struct langwell_otg_timer *otg_timer_initializer
+(void (*function)(unsigned long), unsigned long expires, unsigned long data)
+{
+	struct langwell_otg_timer *timer;
+	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
+	if (timer == NULL)
+		return timer;
+
+	timer->function = function;
+	timer->expires = expires;
+	timer->data = data;
+	return timer;
+}
+
+static struct langwell_otg_timer *a_wait_vrise_tmr, *a_aidl_bdis_tmr,
+	*b_se0_srp_tmr, *b_srp_init_tmr;
+
+static struct list_head active_timers;
+
+static struct langwell_otg *the_transceiver;
+
+/* host/client notify transceiver when event affects HNP state */
+void langwell_update_transceiver(void)
+{
+	struct langwell_otg *lnw = the_transceiver;
+
+	dev_dbg(lnw->dev, "transceiver is updated\n");
+
+	if (!lnw->qwork)
+		return ;
+
+	queue_work(lnw->qwork, &lnw->work);
+}
+EXPORT_SYMBOL(langwell_update_transceiver);
+
+static int langwell_otg_set_host(struct otg_transceiver *otg,
+					struct usb_bus *host)
+{
+	otg->host = host;
+
+	return 0;
+}
+
+static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
+					struct usb_gadget *gadget)
+{
+	otg->gadget = gadget;
+
+	return 0;
+}
+
+static int langwell_otg_set_power(struct otg_transceiver *otg,
+				unsigned mA)
+{
+	return 0;
+}
+
+/* A-device drives vbus, controlled through PMIC CHRGCNTL register*/
+static int langwell_otg_set_vbus(struct otg_transceiver *otg, bool enabled)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	u8 r;
+
+	dev_dbg(lnw->dev, "%s <--- %s\n", __func__, enabled ? "on" : "off");
+
+	/* FIXME: surely we should cache this on the first read. If not use
+	   readv to avoid two transactions */
+	if (intel_scu_ipc_ioread8(0x00, &r) < 0) {
+		dev_dbg(lnw->dev, "Failed to read PMIC register 0xD2");
+		return -EBUSY;
+	}
+	if ((r & 0x03) != 0x02) {
+		dev_dbg(lnw->dev, "not NEC PMIC attached\n");
+		return -EBUSY;
+	}
+
+	if (intel_scu_ipc_ioread8(0x20, &r) < 0) {
+		dev_dbg(lnw->dev, "Failed to read PMIC register 0xD2");
+		return -EBUSY;
+	}
+
+	if ((r & 0x20) == 0) {
+		dev_dbg(lnw->dev, "no battery attached\n");
+		return -EBUSY;
+	}
+
+	/* Workaround for battery attachment issue */
+	if (r == 0x34) {
+		dev_dbg(lnw->dev, "no battery attached on SH\n");
+		return -EBUSY;
+	}
+
+	dev_dbg(lnw->dev, "battery attached. 2 reg = %x\n", r);
+
+	/* workaround: FW detect writing 0x20/0xc0 to d4 event.
+	 * this is only for NEC PMIC.
+	 */
+
+	if (intel_scu_ipc_iowrite8(0xD4, enabled ? 0x20 : 0xC0))
+		dev_dbg(lnw->dev, "Failed to write PMIC.\n");
+
+	dev_dbg(lnw->dev, "%s --->\n", __func__);
+
+	return 0;
+}
+
+/* charge vbus or discharge vbus through a resistor to ground */
+static void langwell_otg_chrg_vbus(int on)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	u32	val;
+
+	val = readl(lnw->iotg.base + CI_OTGSC);
+
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
+				lnw->iotg.base + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
+				lnw->iotg.base + CI_OTGSC);
+}
+
+/* Start SRP */
+static int langwell_otg_start_srp(struct otg_transceiver *otg)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val;
+
+	dev_dbg(lnw->dev, "%s --->\n", __func__);
+
+	val = readl(iotg->base + CI_OTGSC);
+
+	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
+				iotg->base + CI_OTGSC);
+
+	/* Check if the data plus is finished or not */
+	msleep(8);
+	val = readl(iotg->base + CI_OTGSC);
+	if (val & (OTGSC_HADP | OTGSC_DP))
+		dev_dbg(lnw->dev, "DataLine SRP Error\n");
+
+	/* Disable interrupt - b_sess_vld */
+	val = readl(iotg->base + CI_OTGSC);
+	val &= (~(OTGSC_BSVIE | OTGSC_BSEIE));
+	writel(val, iotg->base + CI_OTGSC);
+
+	/* Start VBus SRP, drive vbus to generate VBus pulse */
+	iotg->otg.set_vbus(&iotg->otg, true);
+	msleep(15);
+	iotg->otg.set_vbus(&iotg->otg, false);
+
+	/* Enable interrupt - b_sess_vld*/
+	val = readl(iotg->base + CI_OTGSC);
+	dev_dbg(lnw->dev, "after VBUS pulse otgsc = %x\n", val);
+
+	val |= (OTGSC_BSVIE | OTGSC_BSEIE);
+	writel(val, iotg->base + CI_OTGSC);
+
+	/* If Vbus is valid, then update the hsm */
+	if (val & OTGSC_BSV) {
+		dev_dbg(lnw->dev, "no b_sess_vld interrupt\n");
+
+		lnw->iotg.hsm.b_sess_vld = 1;
+		langwell_update_transceiver();
+	}
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+	return 0;
+}
+
+/* stop SOF via bus_suspend */
+static void langwell_otg_loc_sof(int on)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	struct usb_hcd		*hcd;
+	int			err;
+
+	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "suspend" : "resume");
+
+	hcd = bus_to_hcd(lnw->iotg.otg.host);
+	if (on)
+		err = hcd->driver->bus_resume(hcd);
+	else
+		err = hcd->driver->bus_suspend(hcd);
+
+	if (err)
+		dev_dbg(lnw->dev, "Fail to resume/suspend USB bus - %d\n", err);
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+}
+
+static int langwell_otg_check_otgsc(void)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	u32				otgsc, usbcfg;
+
+	dev_dbg(lnw->dev, "check sync OTGSC and USBCFG registers\n");
+
+	otgsc = readl(lnw->iotg.base + CI_OTGSC);
+	usbcfg = readl(lnw->usbcfg);
+
+	dev_dbg(lnw->dev, "OTGSC = %08x, USBCFG = %08x\n",
+					otgsc, usbcfg);
+	dev_dbg(lnw->dev, "OTGSC_AVV = %d\n", !!(otgsc & OTGSC_AVV));
+	dev_dbg(lnw->dev, "USBCFG.VBUSVAL = %d\n",
+					!!(usbcfg & USBCFG_VBUSVAL));
+	dev_dbg(lnw->dev, "OTGSC_ASV = %d\n", !!(otgsc & OTGSC_ASV));
+	dev_dbg(lnw->dev, "USBCFG.AVALID = %d\n",
+					!!(usbcfg & USBCFG_AVALID));
+	dev_dbg(lnw->dev, "OTGSC_BSV = %d\n", !!(otgsc & OTGSC_BSV));
+	dev_dbg(lnw->dev, "USBCFG.BVALID = %d\n",
+					!!(usbcfg & USBCFG_BVALID));
+	dev_dbg(lnw->dev, "OTGSC_BSE = %d\n", !!(otgsc & OTGSC_BSE));
+	dev_dbg(lnw->dev, "USBCFG.SESEND = %d\n",
+					!!(usbcfg & USBCFG_SESEND));
+
+	/* Check USBCFG VBusValid/AValid/BValid/SessEnd */
+	if (!!(otgsc & OTGSC_AVV) ^ !!(usbcfg & USBCFG_VBUSVAL)) {
+		dev_dbg(lnw->dev, "OTGSC.AVV != USBCFG.VBUSVAL\n");
+		goto err;
+	}
+	if (!!(otgsc & OTGSC_ASV) ^ !!(usbcfg & USBCFG_AVALID)) {
+		dev_dbg(lnw->dev, "OTGSC.ASV != USBCFG.AVALID\n");
+		goto err;
+	}
+	if (!!(otgsc & OTGSC_BSV) ^ !!(usbcfg & USBCFG_BVALID)) {
+		dev_dbg(lnw->dev, "OTGSC.BSV != USBCFG.BVALID\n");
+		goto err;
+	}
+	if (!!(otgsc & OTGSC_BSE) ^ !!(usbcfg & USBCFG_SESEND)) {
+		dev_dbg(lnw->dev, "OTGSC.BSE != USBCFG.SESSEN\n");
+		goto err;
+	}
+
+	dev_dbg(lnw->dev, "OTGSC and USBCFG are synced\n");
+
+	return 0;
+
+err:
+	dev_warn(lnw->dev, "OTGSC isn't equal to USBCFG\n");
+	return -EPIPE;
+}
+
+
+static void langwell_otg_phy_low_power(int on)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u8				val, phcd;
+	int				retval;
+
+	dev_dbg(lnw->dev, "%s ---> %s mode\n",
+			__func__, on ? "Low power" : "Normal");
+
+	phcd = 0x40;
+
+	val = readb(iotg->base + CI_HOSTPC1 + 2);
+
+	if (on) {
+		/* Due to hardware issue, after set PHCD, sync will failed
+		 * between USBCFG and OTGSC, so before set PHCD, check if
+		 * sync is in process now. If the answer is "yes", then do
+		 * not touch PHCD bit */
+		retval = langwell_otg_check_otgsc();
+		if (retval) {
+			dev_dbg(lnw->dev, "Skip PHCD programming..\n");
+			return ;
+		}
+
+		writeb(val | phcd, iotg->base + CI_HOSTPC1 + 2);
+	} else
+		writeb(val & ~phcd, iotg->base + CI_HOSTPC1 + 2);
+
+	dev_dbg(lnw->dev, "%s <--- done\n", __func__);
+}
+
+/* After drv vbus, add 2 ms delay to set PHCD */
+static void langwell_otg_phy_low_power_wait(int on)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+
+	dev_dbg(lnw->dev, "add 2ms delay before programing PHCD\n");
+
+	mdelay(2);
+	langwell_otg_phy_low_power(on);
+}
+
+/* Enable/Disable OTG interrupt */
+static void langwell_otg_intr(int on)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val;
+
+	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
+
+	val = readl(iotg->base + CI_OTGSC);
+
+	/* OTGSC_INT_MASK doesn't contains 1msInt */
+	if (on) {
+		val = val | (OTGSC_INT_MASK);
+		writel(val, iotg->base + CI_OTGSC);
+	} else {
+		val = val & ~(OTGSC_INT_MASK);
+		writel(val, iotg->base + CI_OTGSC);
+	}
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+}
+
+/* set HAAR: Hardware Assist Auto-Reset */
+static void langwell_otg_HAAR(int on)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val;
+
+	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
+
+	val = readl(iotg->base + CI_OTGSC);
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
+					iotg->base + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
+					iotg->base + CI_OTGSC);
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+}
+
+/* set HABA: Hardware Assist B-Disconnect to A-Connect */
+static void langwell_otg_HABA(int on)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val;
+
+	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
+
+	val = readl(iotg->base + CI_OTGSC);
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
+					iotg->base + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
+					iotg->base + CI_OTGSC);
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+}
+
+static int langwell_otg_check_se0_srp(int on)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	int			delay_time = TB_SE0_SRP * 10;
+	u32			val;
+
+	dev_dbg(lnw->dev, "%s --->\n", __func__);
+
+	do {
+		udelay(100);
+		if (!delay_time--)
+			break;
+		val = readl(lnw->iotg.base + CI_PORTSC1);
+		val &= PORTSC_LS;
+	} while (!val);
+
+	dev_dbg(lnw->dev, "%s <---\n", __func__);
+	return val;
+}
+
+/* The timeout callback function to set time out bit */
+static void set_tmout(unsigned long indicator)
+{
+	*(int *)indicator = 1;
+}
+
+void langwell_otg_nsf_msg(unsigned long indicator)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+
+	switch (indicator) {
+	case 2:
+	case 4:
+	case 6:
+	case 7:
+		dev_warn(lnw->dev,
+			"OTG:NSF-%lu - deivce not responding\n", indicator);
+		break;
+	case 3:
+		dev_warn(lnw->dev,
+			"OTG:NSF-%lu - deivce not supported\n", indicator);
+		break;
+	default:
+		dev_warn(lnw->dev, "Do not have this kind of NSF\n");
+		break;
+	}
+}
+
+/* Initialize timers */
+static int langwell_otg_init_timers(struct otg_hsm *hsm)
+{
+	/* HSM used timers */
+	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
+				(unsigned long)&hsm->a_wait_vrise_tmout);
+	if (a_wait_vrise_tmr == NULL)
+		return -ENOMEM;
+	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
+				(unsigned long)&hsm->a_aidl_bdis_tmout);
+	if (a_aidl_bdis_tmr == NULL)
+		return -ENOMEM;
+	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
+				(unsigned long)&hsm->b_se0_srp);
+	if (b_se0_srp_tmr == NULL)
+		return -ENOMEM;
+	b_srp_init_tmr = otg_timer_initializer(&set_tmout, TB_SRP_INIT,
+				(unsigned long)&hsm->b_srp_init_tmout);
+	if (b_srp_init_tmr == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/* Free timers */
+static void langwell_otg_free_timers(void)
+{
+	kfree(a_wait_vrise_tmr);
+	kfree(a_aidl_bdis_tmr);
+	kfree(b_se0_srp_tmr);
+	kfree(b_srp_init_tmr);
+}
+
+/* The timeout callback function to set time out bit */
+static void langwell_otg_timer_fn(unsigned long indicator)
+{
+	struct langwell_otg *lnw = the_transceiver;
+
+	*(int *)indicator = 1;
+
+	dev_dbg(lnw->dev, "kernel timer - timeout\n");
+
+	langwell_update_transceiver();
+}
+
+/* kernel timer used instead of HW based interrupt */
+static void langwell_otg_add_ktimer(enum langwell_otg_timer_type timers)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	unsigned long		j = jiffies;
+	unsigned long		data, time;
+
+	switch (timers) {
+	case TA_WAIT_VRISE_TMR:
+		iotg->hsm.a_wait_vrise_tmout = 0;
+		data = (unsigned long)&iotg->hsm.a_wait_vrise_tmout;
+		time = TA_WAIT_VRISE;
+		break;
+	case TA_WAIT_BCON_TMR:
+		iotg->hsm.a_wait_bcon_tmout = 0;
+		data = (unsigned long)&iotg->hsm.a_wait_bcon_tmout;
+		time = TA_WAIT_BCON;
+		break;
+	case TA_AIDL_BDIS_TMR:
+		iotg->hsm.a_aidl_bdis_tmout = 0;
+		data = (unsigned long)&iotg->hsm.a_aidl_bdis_tmout;
+		time = TA_AIDL_BDIS;
+		break;
+	case TB_ASE0_BRST_TMR:
+		iotg->hsm.b_ase0_brst_tmout = 0;
+		data = (unsigned long)&iotg->hsm.b_ase0_brst_tmout;
+		time = TB_ASE0_BRST;
+		break;
+	case TB_SRP_INIT_TMR:
+		iotg->hsm.b_srp_init_tmout = 0;
+		data = (unsigned long)&iotg->hsm.b_srp_init_tmout;
+		time = TB_SRP_INIT;
+		break;
+	case TB_SRP_FAIL_TMR:
+		iotg->hsm.b_srp_fail_tmout = 0;
+		data = (unsigned long)&iotg->hsm.b_srp_fail_tmout;
+		time = TB_SRP_FAIL;
+		break;
+	case TB_BUS_SUSPEND_TMR:
+		iotg->hsm.b_bus_suspend_tmout = 0;
+		data = (unsigned long)&iotg->hsm.b_bus_suspend_tmout;
+		time = TB_BUS_SUSPEND;
+		break;
+	default:
+		dev_dbg(lnw->dev, "unkown timer, cannot enable it\n");
+		return;
+	}
+
+	lnw->hsm_timer.data = data;
+	lnw->hsm_timer.function = langwell_otg_timer_fn;
+	lnw->hsm_timer.expires = j + time * HZ / 1000; /* milliseconds */
+
+	add_timer(&lnw->hsm_timer);
+
+	dev_dbg(lnw->dev, "add timer successfully\n");
+}
+
+/* Add timer to timer list */
+static void langwell_otg_add_timer(void *gtimer)
+{
+	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
+	struct langwell_otg_timer *tmp_timer;
+	struct intel_mid_otg_xceiv *iotg = &the_transceiver->iotg;
+	u32	val32;
+
+	/* Check if the timer is already in the active list,
+	 * if so update timer count
+	 */
+	list_for_each_entry(tmp_timer, &active_timers, list)
+		if (tmp_timer == timer) {
+			timer->count = timer->expires;
+			return;
+		}
+	timer->count = timer->expires;
+
+	if (list_empty(&active_timers)) {
+		val32 = readl(iotg->base + CI_OTGSC);
+		writel(val32 | OTGSC_1MSE, iotg->base + CI_OTGSC);
+	}
+
+	list_add_tail(&timer->list, &active_timers);
+}
+
+/* Remove timer from the timer list; clear timeout status */
+static void langwell_otg_del_timer(void *gtimer)
+{
+	struct langwell_otg *lnw = the_transceiver;
+	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
+	struct langwell_otg_timer *tmp_timer, *del_tmp;
+	u32 val32;
+
+	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
+		if (tmp_timer == timer)
+			list_del(&timer->list);
+
+	if (list_empty(&active_timers)) {
+		val32 = readl(lnw->iotg.base + CI_OTGSC);
+		writel(val32 & ~OTGSC_1MSE, lnw->iotg.base + CI_OTGSC);
+	}
+}
+
+/* Reduce timer count by 1, and find timeout conditions.*/
+static int langwell_otg_tick_timer(u32 *int_sts)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	struct langwell_otg_timer *tmp_timer, *del_tmp;
+	int expired = 0;
+
+	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
+		tmp_timer->count--;
+		/* check if timer expires */
+		if (!tmp_timer->count) {
+			list_del(&tmp_timer->list);
+			tmp_timer->function(tmp_timer->data);
+			expired = 1;
+		}
+	}
+
+	if (list_empty(&active_timers)) {
+		dev_dbg(lnw->dev, "tick timer: disable 1ms int\n");
+		*int_sts = *int_sts & ~OTGSC_1MSE;
+	}
+	return expired;
+}
+
+static void reset_otg(void)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	int			delay_time = 1000;
+	u32			val;
+
+	dev_dbg(lnw->dev, "reseting OTG controller ...\n");
+	val = readl(lnw->iotg.base + CI_USBCMD);
+	writel(val | USBCMD_RST, lnw->iotg.base + CI_USBCMD);
+	do {
+		udelay(100);
+		if (!delay_time--)
+			dev_dbg(lnw->dev, "reset timeout\n");
+		val = readl(lnw->iotg.base + CI_USBCMD);
+		val &= USBCMD_RST;
+	} while (val != 0);
+	dev_dbg(lnw->dev, "reset done.\n");
+}
+
+static void set_host_mode(void)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	u32			val;
+
+	reset_otg();
+	val = readl(lnw->iotg.base + CI_USBMODE);
+	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
+	writel(val, lnw->iotg.base + CI_USBMODE);
+}
+
+static void set_client_mode(void)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	u32			val;
+
+	reset_otg();
+	val = readl(lnw->iotg.base + CI_USBMODE);
+	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
+	writel(val, lnw->iotg.base + CI_USBMODE);
+}
+
+static void init_hsm(void)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val32;
+
+	/* read OTGSC after reset */
+	val32 = readl(lnw->iotg.base + CI_OTGSC);
+	dev_dbg(lnw->dev, "%s: OTGSC init value = 0x%x\n", __func__, val32);
+
+	/* set init state */
+	if (val32 & OTGSC_ID) {
+		iotg->hsm.id = 1;
+		iotg->otg.default_a = 0;
+		set_client_mode();
+		iotg->otg.state = OTG_STATE_B_IDLE;
+	} else {
+		iotg->hsm.id = 0;
+		iotg->otg.default_a = 1;
+		set_host_mode();
+		iotg->otg.state = OTG_STATE_A_IDLE;
+	}
+
+	/* set session indicator */
+	if (val32 & OTGSC_BSE)
+		iotg->hsm.b_sess_end = 1;
+	if (val32 & OTGSC_BSV)
+		iotg->hsm.b_sess_vld = 1;
+	if (val32 & OTGSC_ASV)
+		iotg->hsm.a_sess_vld = 1;
+	if (val32 & OTGSC_AVV)
+		iotg->hsm.a_vbus_vld = 1;
+
+	/* defautly power the bus */
+	iotg->hsm.a_bus_req = 1;
+	iotg->hsm.a_bus_drop = 0;
+	/* defautly don't request bus as B device */
+	iotg->hsm.b_bus_req = 0;
+	/* no system error */
+	iotg->hsm.a_clr_err = 0;
+
+	langwell_otg_phy_low_power_wait(1);
+}
+
+static void update_hsm(void)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				val32;
+
+	/* read OTGSC */
+	val32 = readl(lnw->iotg.base + CI_OTGSC);
+	dev_dbg(lnw->dev, "%s: OTGSC value = 0x%x\n", __func__, val32);
+
+	iotg->hsm.id = !!(val32 & OTGSC_ID);
+	iotg->hsm.b_sess_end = !!(val32 & OTGSC_BSE);
+	iotg->hsm.b_sess_vld = !!(val32 & OTGSC_BSV);
+	iotg->hsm.a_sess_vld = !!(val32 & OTGSC_ASV);
+	iotg->hsm.a_vbus_vld = !!(val32 & OTGSC_AVV);
+}
+
+static irqreturn_t otg_dummy_irq(int irq, void *_dev)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	void __iomem		*reg_base = _dev;
+	u32			val;
+	u32			int_mask = 0;
+
+	val = readl(reg_base + CI_USBMODE);
+	if ((val & USBMODE_CM) != USBMODE_DEVICE)
+		return IRQ_NONE;
+
+	val = readl(reg_base + CI_USBSTS);
+	int_mask = val & INTR_DUMMY_MASK;
+
+	if (int_mask == 0)
+		return IRQ_NONE;
+
+	/* clear hsm.b_conn here since host driver can't detect it
+	*  otg_dummy_irq called means B-disconnect happened.
+	*/
+	if (lnw->iotg.hsm.b_conn) {
+		lnw->iotg.hsm.b_conn = 0;
+		if (spin_trylock(&lnw->wq_lock)) {
+			langwell_update_transceiver();
+			spin_unlock(&lnw->wq_lock);
+		}
+	}
+
+	/* Clear interrupts */
+	writel(int_mask, reg_base + CI_USBSTS);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t otg_irq(int irq, void *_dev)
+{
+	struct langwell_otg		*lnw = _dev;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	u32				int_sts, int_en;
+	u32				int_mask = 0;
+	int				flag = 0;
+
+	int_sts = readl(lnw->iotg.base + CI_OTGSC);
+	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
+	int_mask = int_sts & int_en;
+	if (int_mask == 0)
+		return IRQ_NONE;
+
+	if (int_mask & OTGSC_IDIS) {
+		dev_dbg(lnw->dev, "%s: id change int\n", __func__);
+		iotg->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
+		dev_dbg(lnw->dev, "id = %d\n", iotg->hsm.id);
+		flag = 1;
+	}
+	if (int_mask & OTGSC_DPIS) {
+		dev_dbg(lnw->dev, "%s: data pulse int\n", __func__);
+		iotg->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
+		dev_dbg(lnw->dev, "data pulse = %d\n", iotg->hsm.a_srp_det);
+		flag = 1;
+	}
+	if (int_mask & OTGSC_BSEIS) {
+		dev_dbg(lnw->dev, "%s: b session end int\n", __func__);
+		iotg->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
+		dev_dbg(lnw->dev, "b_sess_end = %d\n", iotg->hsm.b_sess_end);
+		flag = 1;
+	}
+	if (int_mask & OTGSC_BSVIS) {
+		dev_dbg(lnw->dev, "%s: b session valid int\n", __func__);
+		iotg->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
+		dev_dbg(lnw->dev, "b_sess_vld = %d\n", iotg->hsm.b_sess_end);
+		flag = 1;
+	}
+	if (int_mask & OTGSC_ASVIS) {
+		dev_dbg(lnw->dev, "%s: a session valid int\n", __func__);
+		iotg->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
+		dev_dbg(lnw->dev, "a_sess_vld = %d\n", iotg->hsm.a_sess_vld);
+		flag = 1;
+	}
+	if (int_mask & OTGSC_AVVIS) {
+		dev_dbg(lnw->dev, "%s: a vbus valid int\n", __func__);
+		iotg->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
+		dev_dbg(lnw->dev, "a_vbus_vld = %d\n", iotg->hsm.a_vbus_vld);
+		flag = 1;
+	}
+
+	if (int_mask & OTGSC_1MSS) {
+		/* need to schedule otg_work if any timer is expired */
+		if (langwell_otg_tick_timer(&int_sts))
+			flag = 1;
+	}
+
+	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
+					lnw->iotg.base + CI_OTGSC);
+	if (flag)
+		langwell_update_transceiver();
+
+	return IRQ_HANDLED;
+}
+
+static int langwell_otg_iotg_notify(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = data;
+	int				flag = 0;
+
+	if (iotg == NULL)
+		return NOTIFY_BAD;
+
+	if (lnw == NULL)
+		return NOTIFY_BAD;
+
+	switch (action) {
+	case MID_OTG_NOTIFY_CONNECT:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Connect Event\n");
+		if (iotg->otg.default_a == 1)
+			iotg->hsm.b_conn = 1;
+		else
+			iotg->hsm.a_conn = 1;
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_DISCONN:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Disconnect Event\n");
+		if (iotg->otg.default_a == 1)
+			iotg->hsm.b_conn = 0;
+		else
+			iotg->hsm.a_conn = 0;
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_HSUSPEND:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus suspend Event\n");
+		if (iotg->otg.default_a == 1)
+			iotg->hsm.a_suspend_req = 1;
+		else
+			iotg->hsm.b_bus_req = 0;
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_HRESUME:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus resume Event\n");
+		if (iotg->otg.default_a == 1)
+			iotg->hsm.b_bus_resume = 1;
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_CSUSPEND:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus suspend Event\n");
+		if (iotg->otg.default_a == 1) {
+			if (iotg->hsm.b_bus_suspend_vld == 2) {
+				iotg->hsm.b_bus_suspend = 1;
+				iotg->hsm.b_bus_suspend_vld = 0;
+				flag = 1;
+			} else {
+				iotg->hsm.b_bus_suspend_vld++;
+				flag = 0;
+			}
+		} else {
+			if (iotg->hsm.a_bus_suspend == 0) {
+				iotg->hsm.a_bus_suspend = 1;
+				flag = 1;
+			}
+		}
+		break;
+	case MID_OTG_NOTIFY_CRESUME:
+		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus resume Event\n");
+		if (iotg->otg.default_a == 0)
+			iotg->hsm.a_bus_suspend = 0;
+		flag = 0;
+		break;
+	case MID_OTG_NOTIFY_HOSTADD:
+		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver Add\n");
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_HOSTREMOVE:
+		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver remove\n");
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_CLIENTADD:
+		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver Add\n");
+		flag = 1;
+		break;
+	case MID_OTG_NOTIFY_CLIENTREMOVE:
+		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver remove\n");
+		flag = 1;
+		break;
+	default:
+		dev_dbg(lnw->dev, "Lnw OTG Nofity unknown notify message\n");
+		return NOTIFY_DONE;
+	}
+
+	if (flag)
+		langwell_update_transceiver();
+
+	return NOTIFY_OK;
+}
+
+static void langwell_otg_work(struct work_struct *work)
+{
+	struct langwell_otg		*lnw;
+	struct intel_mid_otg_xceiv	*iotg;
+	int				retval;
+	struct pci_dev			*pdev;
+
+	lnw = container_of(work, struct langwell_otg, work);
+	iotg = &lnw->iotg;
+	pdev = to_pci_dev(lnw->dev);
+
+	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
+			state_string(iotg->otg.state));
+
+	switch (iotg->otg.state) {
+	case OTG_STATE_UNDEFINED:
+	case OTG_STATE_B_IDLE:
+		if (!iotg->hsm.id) {
+			langwell_otg_del_timer(b_srp_init_tmr);
+			del_timer_sync(&lnw->hsm_timer);
+
+			iotg->otg.default_a = 1;
+			iotg->hsm.a_srp_det = 0;
+
+			langwell_otg_chrg_vbus(0);
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+
+			iotg->otg.state = OTG_STATE_A_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.b_sess_vld) {
+			langwell_otg_del_timer(b_srp_init_tmr);
+			del_timer_sync(&lnw->hsm_timer);
+			iotg->hsm.b_sess_end = 0;
+			iotg->hsm.a_bus_suspend = 0;
+			langwell_otg_chrg_vbus(0);
+
+			if (lnw->iotg.start_peripheral) {
+				lnw->iotg.start_peripheral(&lnw->iotg);
+				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
+			} else
+				dev_dbg(lnw->dev, "client driver not loaded\n");
+
+		} else if (iotg->hsm.b_srp_init_tmout) {
+			iotg->hsm.b_srp_init_tmout = 0;
+			dev_warn(lnw->dev, "SRP init timeout\n");
+		} else if (iotg->hsm.b_srp_fail_tmout) {
+			iotg->hsm.b_srp_fail_tmout = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			/* No silence failure */
+			langwell_otg_nsf_msg(6);
+		} else if (iotg->hsm.b_bus_req && iotg->hsm.b_sess_end) {
+			del_timer_sync(&lnw->hsm_timer);
+			/* workaround for b_se0_srp detection */
+			retval = langwell_otg_check_se0_srp(0);
+			if (retval) {
+				iotg->hsm.b_bus_req = 0;
+				dev_dbg(lnw->dev, "LS isn't SE0, try later\n");
+			} else {
+				/* clear the PHCD before start srp */
+				langwell_otg_phy_low_power(0);
+
+				/* Start SRP */
+				langwell_otg_add_timer(b_srp_init_tmr);
+				iotg->otg.start_srp(&iotg->otg);
+				langwell_otg_del_timer(b_srp_init_tmr);
+				langwell_otg_add_ktimer(TB_SRP_FAIL_TMR);
+
+				/* reset PHY low power mode here */
+				langwell_otg_phy_low_power_wait(1);
+			}
+		}
+		break;
+	case OTG_STATE_B_SRP_INIT:
+		if (!iotg->hsm.id) {
+			iotg->otg.default_a = 1;
+			iotg->hsm.a_srp_det = 0;
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			langwell_otg_chrg_vbus(0);
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_A_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.b_sess_vld) {
+			langwell_otg_chrg_vbus(0);
+			if (lnw->iotg.start_peripheral) {
+				lnw->iotg.start_peripheral(&lnw->iotg);
+				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
+			} else
+				dev_dbg(lnw->dev, "client driver not loaded\n");
+		}
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (!iotg->hsm.id) {
+			iotg->otg.default_a = 1;
+			iotg->hsm.a_srp_det = 0;
+
+			langwell_otg_chrg_vbus(0);
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_A_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.b_sess_vld) {
+			iotg->hsm.b_hnp_enable = 0;
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			iotg->otg.state = OTG_STATE_B_IDLE;
+		} else if (iotg->hsm.b_bus_req && iotg->otg.gadget &&
+					iotg->otg.gadget->b_hnp_enable &&
+					iotg->hsm.a_bus_suspend) {
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			langwell_otg_HAAR(1);
+			iotg->hsm.a_conn = 0;
+
+			if (lnw->iotg.start_host) {
+				lnw->iotg.start_host(&lnw->iotg);
+				iotg->otg.state = OTG_STATE_B_WAIT_ACON;
+			} else
+				dev_dbg(lnw->dev,
+						"host driver not loaded.\n");
+
+			iotg->hsm.a_bus_resume = 0;
+			langwell_otg_add_ktimer(TB_ASE0_BRST_TMR);
+		}
+		break;
+
+	case OTG_STATE_B_WAIT_ACON:
+		if (!iotg->hsm.id) {
+			/* delete hsm timer for b_ase0_brst_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			iotg->otg.default_a = 1;
+			iotg->hsm.a_srp_det = 0;
+
+			langwell_otg_chrg_vbus(0);
+
+			langwell_otg_HAAR(0);
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_A_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.b_sess_vld) {
+			/* delete hsm timer for b_ase0_brst_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			iotg->hsm.b_hnp_enable = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			langwell_otg_chrg_vbus(0);
+			langwell_otg_HAAR(0);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+		} else if (iotg->hsm.a_conn) {
+			/* delete hsm timer for b_ase0_brst_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			langwell_otg_HAAR(0);
+			iotg->otg.state = OTG_STATE_B_HOST;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.a_bus_resume ||
+				iotg->hsm.b_ase0_brst_tmout) {
+			/* delete hsm timer for b_ase0_brst_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			langwell_otg_HAAR(0);
+			langwell_otg_nsf_msg(7);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			iotg->hsm.a_bus_suspend = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			if (lnw->iotg.start_peripheral)
+				lnw->iotg.start_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver not loaded.\n");
+
+			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
+		}
+		break;
+
+	case OTG_STATE_B_HOST:
+		if (!iotg->hsm.id) {
+			iotg->otg.default_a = 1;
+			iotg->hsm.a_srp_det = 0;
+
+			langwell_otg_chrg_vbus(0);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_A_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.b_sess_vld) {
+			iotg->hsm.b_hnp_enable = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			langwell_otg_chrg_vbus(0);
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+		} else if ((!iotg->hsm.b_bus_req) ||
+				(!iotg->hsm.a_conn)) {
+			iotg->hsm.b_bus_req = 0;
+			langwell_otg_loc_sof(0);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			iotg->hsm.a_bus_suspend = 0;
+
+			if (lnw->iotg.start_peripheral)
+				lnw->iotg.start_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+						"client driver not loaded.\n");
+
+			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
+		}
+		break;
+
+	case OTG_STATE_A_IDLE:
+		iotg->otg.default_a = 1;
+		if (iotg->hsm.id) {
+			iotg->otg.default_a = 0;
+			iotg->hsm.b_bus_req = 0;
+			iotg->hsm.vbus_srp_up = 0;
+
+			langwell_otg_chrg_vbus(0);
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.a_bus_drop &&
+			(iotg->hsm.a_srp_det || iotg->hsm.a_bus_req)) {
+			langwell_otg_phy_low_power(0);
+
+			/* Turn on VBus */
+			iotg->otg.set_vbus(&iotg->otg, true);
+
+			iotg->hsm.vbus_srp_up = 0;
+			iotg->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.a_bus_drop && iotg->hsm.a_sess_vld) {
+			iotg->hsm.vbus_srp_up = 1;
+		} else if (!iotg->hsm.a_sess_vld && iotg->hsm.vbus_srp_up) {
+			msleep(10);
+			langwell_otg_phy_low_power(0);
+
+			/* Turn on VBus */
+			iotg->otg.set_vbus(&iotg->otg, true);
+			iotg->hsm.a_srp_det = 1;
+			iotg->hsm.vbus_srp_up = 0;
+			iotg->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.a_sess_vld &&
+				!iotg->hsm.vbus_srp_up) {
+			langwell_otg_phy_low_power(1);
+		}
+		break;
+	case OTG_STATE_A_WAIT_VRISE:
+		if (iotg->hsm.id) {
+			langwell_otg_del_timer(a_wait_vrise_tmr);
+			iotg->hsm.b_bus_req = 0;
+			iotg->otg.default_a = 0;
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			set_client_mode();
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+		} else if (iotg->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(a_wait_vrise_tmr);
+			iotg->hsm.b_conn = 0;
+			if (lnw->iotg.start_host)
+				lnw->iotg.start_host(&lnw->iotg);
+			else {
+				dev_dbg(lnw->dev, "host driver not loaded.\n");
+				break;
+			}
+
+			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
+			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
+		} else if (iotg->hsm.a_wait_vrise_tmout) {
+			iotg->hsm.b_conn = 0;
+			if (iotg->hsm.a_vbus_vld) {
+				if (lnw->iotg.start_host)
+					lnw->iotg.start_host(&lnw->iotg);
+				else {
+					dev_dbg(lnw->dev,
+						"host driver not loaded.\n");
+					break;
+				}
+				langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
+				iotg->otg.state = OTG_STATE_A_WAIT_BCON;
+			} else {
+
+				/* Turn off VBus */
+				iotg->otg.set_vbus(&iotg->otg, false);
+				langwell_otg_phy_low_power_wait(1);
+				iotg->otg.state = OTG_STATE_A_VBUS_ERR;
+			}
+		}
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		if (iotg->hsm.id) {
+			/* delete hsm timer for a_wait_bcon_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			iotg->otg.default_a = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			set_client_mode();
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.a_vbus_vld) {
+			/* delete hsm timer for a_wait_bcon_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (iotg->hsm.a_bus_drop ||
+				(iotg->hsm.a_wait_bcon_tmout &&
+				!iotg->hsm.a_bus_req)) {
+			/* delete hsm timer for a_wait_bcon_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (iotg->hsm.b_conn) {
+			/* delete hsm timer for a_wait_bcon_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			iotg->hsm.a_suspend_req = 0;
+			iotg->otg.state = OTG_STATE_A_HOST;
+			if (iotg->hsm.a_srp_det && iotg->otg.host &&
+					!iotg->otg.host->b_hnp_enable) {
+				/* SRP capable peripheral-only device */
+				iotg->hsm.a_bus_req = 1;
+				iotg->hsm.a_srp_det = 0;
+			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
+					iotg->otg.host->b_hnp_enable) {
+				/* It is not safe enough to do a fast
+				 * transistion from A_WAIT_BCON to
+				 * A_SUSPEND */
+				msleep(10000);
+				if (iotg->hsm.a_bus_req)
+					break;
+
+				if (request_irq(pdev->irq,
+					otg_dummy_irq, IRQF_SHARED,
+					driver_name, iotg->base) != 0) {
+					dev_dbg(lnw->dev,
+						"request interrupt %d fail\n",
+						pdev->irq);
+				}
+
+				langwell_otg_HABA(1);
+				iotg->hsm.b_bus_resume = 0;
+				iotg->hsm.a_aidl_bdis_tmout = 0;
+
+				langwell_otg_loc_sof(0);
+				/* clear PHCD to enable HW timer */
+				langwell_otg_phy_low_power(0);
+				langwell_otg_add_timer(a_aidl_bdis_tmr);
+				iotg->otg.state = OTG_STATE_A_SUSPEND;
+			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
+				!iotg->otg.host->b_hnp_enable) {
+				if (lnw->iotg.stop_host)
+					lnw->iotg.stop_host(&lnw->iotg);
+				else
+					dev_dbg(lnw->dev,
+						"host driver removed.\n");
+
+				/* Turn off VBus */
+				iotg->otg.set_vbus(&iotg->otg, false);
+				iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
+			}
+		}
+		break;
+	case OTG_STATE_A_HOST:
+		if (iotg->hsm.id) {
+			iotg->otg.default_a = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			set_client_mode();
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.a_bus_drop ||
+				(iotg->otg.host &&
+				!iotg->otg.host->b_hnp_enable &&
+					!iotg->hsm.a_bus_req)) {
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (!iotg->hsm.a_vbus_vld) {
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (iotg->otg.host &&
+				iotg->otg.host->b_hnp_enable &&
+				!iotg->hsm.a_bus_req) {
+			/* Set HABA to enable hardware assistance to signal
+			 *  A-connect after receiver B-disconnect. Hardware
+			 *  will then set client mode and enable URE, SLE and
+			 *  PCE after the assistance. otg_dummy_irq is used to
+			 *  clean these ints when client driver is not resumed.
+			 */
+			if (request_irq(pdev->irq, otg_dummy_irq, IRQF_SHARED,
+					driver_name, iotg->base) != 0) {
+				dev_dbg(lnw->dev,
+					"request interrupt %d failed\n",
+						pdev->irq);
+			}
+
+			/* set HABA */
+			langwell_otg_HABA(1);
+			iotg->hsm.b_bus_resume = 0;
+			iotg->hsm.a_aidl_bdis_tmout = 0;
+			langwell_otg_loc_sof(0);
+			/* clear PHCD to enable HW timer */
+			langwell_otg_phy_low_power(0);
+			langwell_otg_add_timer(a_aidl_bdis_tmr);
+			iotg->otg.state = OTG_STATE_A_SUSPEND;
+		} else if (!iotg->hsm.b_conn || !iotg->hsm.a_bus_req) {
+			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
+			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
+		}
+		break;
+	case OTG_STATE_A_SUSPEND:
+		if (iotg->hsm.id) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(pdev->irq, iotg->base);
+			iotg->otg.default_a = 0;
+			iotg->hsm.b_bus_req = 0;
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.a_bus_req ||
+				iotg->hsm.b_bus_resume) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(pdev->irq, iotg->base);
+			iotg->hsm.a_suspend_req = 0;
+			langwell_otg_loc_sof(1);
+			iotg->otg.state = OTG_STATE_A_HOST;
+		} else if (iotg->hsm.a_aidl_bdis_tmout ||
+				iotg->hsm.a_bus_drop) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(pdev->irq, iotg->base);
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (!iotg->hsm.b_conn && iotg->otg.host &&
+				iotg->otg.host->b_hnp_enable) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(pdev->irq, iotg->base);
+
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			iotg->hsm.b_bus_suspend = 0;
+			iotg->hsm.b_bus_suspend_vld = 0;
+
+			/* msleep(200); */
+			if (lnw->iotg.start_peripheral)
+				lnw->iotg.start_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver not loaded.\n");
+
+			langwell_otg_add_ktimer(TB_BUS_SUSPEND_TMR);
+			iotg->otg.state = OTG_STATE_A_PERIPHERAL;
+			break;
+		} else if (!iotg->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(pdev->irq, iotg->base);
+			if (lnw->iotg.stop_host)
+				lnw->iotg.stop_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"host driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
+		}
+		break;
+	case OTG_STATE_A_PERIPHERAL:
+		if (iotg->hsm.id) {
+			/* delete hsm timer for b_bus_suspend_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+			iotg->otg.default_a = 0;
+			iotg->hsm.b_bus_req = 0;
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			set_client_mode();
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (!iotg->hsm.a_vbus_vld) {
+			/* delete hsm timer for b_bus_suspend_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			langwell_otg_phy_low_power_wait(1);
+			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (iotg->hsm.a_bus_drop) {
+			/* delete hsm timer for b_bus_suspend_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			/* Turn off VBus */
+			iotg->otg.set_vbus(&iotg->otg, false);
+			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (iotg->hsm.b_bus_suspend) {
+			/* delete hsm timer for b_bus_suspend_tmr */
+			del_timer_sync(&lnw->hsm_timer);
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			if (lnw->iotg.start_host)
+				lnw->iotg.start_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+						"host driver not loaded.\n");
+			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
+			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
+		} else if (iotg->hsm.b_bus_suspend_tmout) {
+			u32	val;
+			val = readl(lnw->iotg.base + CI_PORTSC1);
+			if (!(val & PORTSC_SUSP))
+				break;
+
+			if (lnw->iotg.stop_peripheral)
+				lnw->iotg.stop_peripheral(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+					"client driver has been removed.\n");
+
+			if (lnw->iotg.start_host)
+				lnw->iotg.start_host(&lnw->iotg);
+			else
+				dev_dbg(lnw->dev,
+						"host driver not loaded.\n");
+			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
+			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
+		}
+		break;
+	case OTG_STATE_A_VBUS_ERR:
+		if (iotg->hsm.id) {
+			iotg->otg.default_a = 0;
+			iotg->hsm.a_clr_err = 0;
+			iotg->hsm.a_srp_det = 0;
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.a_clr_err) {
+			iotg->hsm.a_clr_err = 0;
+			iotg->hsm.a_srp_det = 0;
+			reset_otg();
+			init_hsm();
+			if (iotg->otg.state == OTG_STATE_A_IDLE)
+				langwell_update_transceiver();
+		} else {
+			/* FW will clear PHCD bit when any VBus
+			 * event detected. Reset PHCD to 1 again */
+			langwell_otg_phy_low_power(1);
+		}
+		break;
+	case OTG_STATE_A_WAIT_VFALL:
+		if (iotg->hsm.id) {
+			iotg->otg.default_a = 0;
+			set_client_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_B_IDLE;
+			langwell_update_transceiver();
+		} else if (iotg->hsm.a_bus_req) {
+
+			/* Turn on VBus */
+			iotg->otg.set_vbus(&iotg->otg, true);
+			iotg->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
+		} else if (!iotg->hsm.a_sess_vld) {
+			iotg->hsm.a_srp_det = 0;
+			set_host_mode();
+			langwell_otg_phy_low_power(1);
+			iotg->otg.state = OTG_STATE_A_IDLE;
+		}
+		break;
+	default:
+		;
+	}
+
+	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
+			state_string(iotg->otg.state));
+}
+
+static ssize_t
+show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	char			*next;
+	unsigned		size, t;
+
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size,
+		"\n"
+		"USBCMD = 0x%08x\n"
+		"USBSTS = 0x%08x\n"
+		"USBINTR = 0x%08x\n"
+		"ASYNCLISTADDR = 0x%08x\n"
+		"PORTSC1 = 0x%08x\n"
+		"HOSTPC1 = 0x%08x\n"
+		"OTGSC = 0x%08x\n"
+		"USBMODE = 0x%08x\n",
+		readl(lnw->iotg.base + 0x30),
+		readl(lnw->iotg.base + 0x34),
+		readl(lnw->iotg.base + 0x38),
+		readl(lnw->iotg.base + 0x48),
+		readl(lnw->iotg.base + 0x74),
+		readl(lnw->iotg.base + 0xb4),
+		readl(lnw->iotg.base + 0xf4),
+		readl(lnw->iotg.base + 0xf8)
+	     );
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
+
+static ssize_t
+show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	char				*next;
+	unsigned			size, t;
+
+	next = buf;
+	size = PAGE_SIZE;
+
+	if (iotg->otg.host)
+		iotg->hsm.a_set_b_hnp_en = iotg->otg.host->b_hnp_enable;
+
+	if (iotg->otg.gadget)
+		iotg->hsm.b_hnp_enable = iotg->otg.gadget->b_hnp_enable;
+
+	t = scnprintf(next, size,
+		"\n"
+		"current state = %s\n"
+		"a_bus_resume = \t%d\n"
+		"a_bus_suspend = \t%d\n"
+		"a_conn = \t%d\n"
+		"a_sess_vld = \t%d\n"
+		"a_srp_det = \t%d\n"
+		"a_vbus_vld = \t%d\n"
+		"b_bus_resume = \t%d\n"
+		"b_bus_suspend = \t%d\n"
+		"b_conn = \t%d\n"
+		"b_se0_srp = \t%d\n"
+		"b_sess_end = \t%d\n"
+		"b_sess_vld = \t%d\n"
+		"id = \t%d\n"
+		"a_set_b_hnp_en = \t%d\n"
+		"b_srp_done = \t%d\n"
+		"b_hnp_enable = \t%d\n"
+		"a_wait_vrise_tmout = \t%d\n"
+		"a_wait_bcon_tmout = \t%d\n"
+		"a_aidl_bdis_tmout = \t%d\n"
+		"b_ase0_brst_tmout = \t%d\n"
+		"a_bus_drop = \t%d\n"
+		"a_bus_req = \t%d\n"
+		"a_clr_err = \t%d\n"
+		"a_suspend_req = \t%d\n"
+		"b_bus_req = \t%d\n"
+		"b_bus_suspend_tmout = \t%d\n"
+		"b_bus_suspend_vld = \t%d\n",
+		state_string(iotg->otg.state),
+		iotg->hsm.a_bus_resume,
+		iotg->hsm.a_bus_suspend,
+		iotg->hsm.a_conn,
+		iotg->hsm.a_sess_vld,
+		iotg->hsm.a_srp_det,
+		iotg->hsm.a_vbus_vld,
+		iotg->hsm.b_bus_resume,
+		iotg->hsm.b_bus_suspend,
+		iotg->hsm.b_conn,
+		iotg->hsm.b_se0_srp,
+		iotg->hsm.b_sess_end,
+		iotg->hsm.b_sess_vld,
+		iotg->hsm.id,
+		iotg->hsm.a_set_b_hnp_en,
+		iotg->hsm.b_srp_done,
+		iotg->hsm.b_hnp_enable,
+		iotg->hsm.a_wait_vrise_tmout,
+		iotg->hsm.a_wait_bcon_tmout,
+		iotg->hsm.a_aidl_bdis_tmout,
+		iotg->hsm.b_ase0_brst_tmout,
+		iotg->hsm.a_bus_drop,
+		iotg->hsm.a_bus_req,
+		iotg->hsm.a_clr_err,
+		iotg->hsm.a_suspend_req,
+		iotg->hsm.b_bus_req,
+		iotg->hsm.b_bus_suspend_tmout,
+		iotg->hsm.b_bus_suspend_vld
+		);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
+
+static ssize_t
+get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	char			*next;
+	unsigned		size, t;
+
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_req);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_a_bus_req(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+
+	if (!iotg->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		iotg->hsm.a_bus_req = 0;
+		dev_dbg(lnw->dev, "User request: a_bus_req = 0\n");
+	} else if (buf[0] == '1') {
+		/* If a_bus_drop is TRUE, a_bus_req can't be set */
+		if (iotg->hsm.a_bus_drop)
+			return -1;
+		iotg->hsm.a_bus_req = 1;
+		dev_dbg(lnw->dev, "User request: a_bus_req = 1\n");
+	}
+	if (spin_trylock(&lnw->wq_lock)) {
+		langwell_update_transceiver();
+		spin_unlock(&lnw->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUGO, get_a_bus_req, set_a_bus_req);
+
+static ssize_t
+get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	char			*next;
+	unsigned		size, t;
+
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_drop);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_a_bus_drop(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+
+	if (!iotg->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		iotg->hsm.a_bus_drop = 0;
+		dev_dbg(lnw->dev, "User request: a_bus_drop = 0\n");
+	} else if (buf[0] == '1') {
+		iotg->hsm.a_bus_drop = 1;
+		iotg->hsm.a_bus_req = 0;
+		dev_dbg(lnw->dev, "User request: a_bus_drop = 1\n");
+		dev_dbg(lnw->dev, "User request: and a_bus_req = 0\n");
+	}
+	if (spin_trylock(&lnw->wq_lock)) {
+		langwell_update_transceiver();
+		spin_unlock(&lnw->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUGO,
+	get_a_bus_drop, set_a_bus_drop);
+
+static ssize_t
+get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	char			*next;
+	unsigned		size, t;
+
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", lnw->iotg.hsm.b_bus_req);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_b_bus_req(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+
+	if (iotg->otg.default_a)
+		return -1;
+
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		iotg->hsm.b_bus_req = 0;
+		dev_dbg(lnw->dev, "User request: b_bus_req = 0\n");
+	} else if (buf[0] == '1') {
+		iotg->hsm.b_bus_req = 1;
+		dev_dbg(lnw->dev, "User request: b_bus_req = 1\n");
+	}
+	if (spin_trylock(&lnw->wq_lock)) {
+		langwell_update_transceiver();
+		spin_unlock(&lnw->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUGO, get_b_bus_req, set_b_bus_req);
+
+static ssize_t
+set_a_clr_err(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+
+	if (!iotg->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '1') {
+		iotg->hsm.a_clr_err = 1;
+		dev_dbg(lnw->dev, "User request: a_clr_err = 1\n");
+	}
+	if (spin_trylock(&lnw->wq_lock)) {
+		langwell_update_transceiver();
+		spin_unlock(&lnw->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_clr_err, S_IWUGO, NULL, set_a_clr_err);
+
+static struct attribute *inputs_attrs[] = {
+	&dev_attr_a_bus_req.attr,
+	&dev_attr_a_bus_drop.attr,
+	&dev_attr_b_bus_req.attr,
+	&dev_attr_a_clr_err.attr,
+	NULL,
+};
+
+static struct attribute_group debug_dev_attr_group = {
+	.name = "inputs",
+	.attrs = inputs_attrs,
+};
+
+static int langwell_otg_probe(struct pci_dev *pdev,
+		const struct pci_device_id *id)
+{
+	unsigned long		resource, len;
+	void __iomem		*base = NULL;
+	int			retval;
+	u32			val32;
+	struct langwell_otg	*lnw;
+	char			qname[] = "langwell_otg_queue";
+
+	retval = 0;
+	dev_dbg(&pdev->dev, "\notg controller is detected.\n");
+	if (pci_enable_device(pdev) < 0) {
+		retval = -ENODEV;
+		goto done;
+	}
+
+	lnw = kzalloc(sizeof *lnw, GFP_KERNEL);
+	if (lnw == NULL) {
+		retval = -ENOMEM;
+		goto done;
+	}
+	the_transceiver = lnw;
+
+	/* control register: BAR 0 */
+	resource = pci_resource_start(pdev, 0);
+	len = pci_resource_len(pdev, 0);
+	if (!request_mem_region(resource, len, driver_name)) {
+		retval = -EBUSY;
+		goto err;
+	}
+	lnw->region = 1;
+
+	base = ioremap_nocache(resource, len);
+	if (base == NULL) {
+		retval = -EFAULT;
+		goto err;
+	}
+	lnw->iotg.base = base;
+
+	if (!request_mem_region(USBCFG_ADDR, USBCFG_LEN, driver_name)) {
+		retval = -EBUSY;
+		goto err;
+	}
+	lnw->cfg_region = 1;
+
+	/* For the SCCB.USBCFG register */
+	base = ioremap_nocache(USBCFG_ADDR, USBCFG_LEN);
+	if (base == NULL) {
+		retval = -EFAULT;
+		goto err;
+	}
+	lnw->usbcfg = base;
+
+	if (!pdev->irq) {
+		dev_dbg(&pdev->dev, "No IRQ.\n");
+		retval = -ENODEV;
+		goto err;
+	}
+
+	lnw->qwork = create_singlethread_workqueue(qname);
+	if (!lnw->qwork) {
+		dev_dbg(&pdev->dev, "cannot create workqueue %s\n", qname);
+		retval = -ENOMEM;
+		goto err;
+	}
+	INIT_WORK(&lnw->work, langwell_otg_work);
+
+	/* OTG common part */
+	lnw->dev = &pdev->dev;
+	lnw->iotg.otg.dev = lnw->dev;
+	lnw->iotg.otg.label = driver_name;
+	lnw->iotg.otg.set_host = langwell_otg_set_host;
+	lnw->iotg.otg.set_peripheral = langwell_otg_set_peripheral;
+	lnw->iotg.otg.set_power = langwell_otg_set_power;
+	lnw->iotg.otg.set_vbus = langwell_otg_set_vbus;
+	lnw->iotg.otg.start_srp = langwell_otg_start_srp;
+	lnw->iotg.otg.state = OTG_STATE_UNDEFINED;
+
+	if (otg_set_transceiver(&lnw->iotg.otg)) {
+		dev_dbg(lnw->dev, "can't set transceiver\n");
+		retval = -EBUSY;
+		goto err;
+	}
+
+	reset_otg();
+	init_hsm();
+
+	spin_lock_init(&lnw->lock);
+	spin_lock_init(&lnw->wq_lock);
+	INIT_LIST_HEAD(&active_timers);
+	retval = langwell_otg_init_timers(&lnw->iotg.hsm);
+	if (retval) {
+		dev_dbg(&pdev->dev, "Failed to init timers\n");
+		goto err;
+	}
+
+	init_timer(&lnw->hsm_timer);
+	ATOMIC_INIT_NOTIFIER_HEAD(&lnw->iotg.iotg_notifier);
+
+	lnw->iotg_notifier.notifier_call = langwell_otg_iotg_notify;
+
+	retval = intel_mid_otg_register_notifier(&lnw->iotg,
+						&lnw->iotg_notifier);
+	if (retval) {
+		dev_dbg(lnw->dev, "Failed to register notifier\n");
+		goto err;
+	}
+
+	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
+				driver_name, lnw) != 0) {
+		dev_dbg(lnw->dev, "request interrupt %d failed\n", pdev->irq);
+		retval = -EBUSY;
+		goto err;
+	}
+
+	/* enable OTGSC int */
+	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
+		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
+	writel(val32, lnw->iotg.base + CI_OTGSC);
+
+	retval = device_create_file(&pdev->dev, &dev_attr_registers);
+	if (retval < 0) {
+		dev_dbg(lnw->dev,
+			"Can't register sysfs attribute: %d\n", retval);
+		goto err;
+	}
+
+	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
+	if (retval < 0) {
+		dev_dbg(lnw->dev, "Can't hsm sysfs attribute: %d\n", retval);
+		goto err;
+	}
+
+	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
+	if (retval < 0) {
+		dev_dbg(lnw->dev,
+			"Can't register sysfs attr group: %d\n", retval);
+		goto err;
+	}
+
+	if (lnw->iotg.otg.state == OTG_STATE_A_IDLE)
+		langwell_update_transceiver();
+
+	return 0;
+
+err:
+	if (the_transceiver)
+		langwell_otg_remove(pdev);
+done:
+	return retval;
+}
+
+static void langwell_otg_remove(struct pci_dev *pdev)
+{
+	struct langwell_otg *lnw = the_transceiver;
+
+	if (lnw->qwork) {
+		flush_workqueue(lnw->qwork);
+		destroy_workqueue(lnw->qwork);
+	}
+	intel_mid_otg_unregister_notifier(&lnw->iotg, &lnw->iotg_notifier);
+	langwell_otg_free_timers();
+
+	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
+	writel(0, lnw->iotg.base + CI_OTGSC);
+
+	if (pdev->irq)
+		free_irq(pdev->irq, lnw);
+	if (lnw->usbcfg)
+		iounmap(lnw->usbcfg);
+	if (lnw->cfg_region)
+		release_mem_region(USBCFG_ADDR, USBCFG_LEN);
+	if (lnw->iotg.base)
+		iounmap(lnw->iotg.base);
+	if (lnw->region)
+		release_mem_region(pci_resource_start(pdev, 0),
+				pci_resource_len(pdev, 0));
+
+	otg_set_transceiver(NULL);
+	pci_disable_device(pdev);
+	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
+	device_remove_file(&pdev->dev, &dev_attr_hsm);
+	device_remove_file(&pdev->dev, &dev_attr_registers);
+	kfree(lnw);
+	lnw = NULL;
+}
+
+static void transceiver_suspend(struct pci_dev *pdev)
+{
+	pci_save_state(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	langwell_otg_phy_low_power(1);
+}
+
+static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
+{
+	struct langwell_otg		*lnw = the_transceiver;
+	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
+	int				ret = 0;
+
+	/* Disbale OTG interrupts */
+	langwell_otg_intr(0);
+
+	if (pdev->irq)
+		free_irq(pdev->irq, lnw);
+
+	/* Prevent more otg_work */
+	flush_workqueue(lnw->qwork);
+	destroy_workqueue(lnw->qwork);
+	lnw->qwork = NULL;
+
+	/* start actions */
+	switch (iotg->otg.state) {
+	case OTG_STATE_A_WAIT_VFALL:
+		iotg->otg.state = OTG_STATE_A_IDLE;
+	case OTG_STATE_A_IDLE:
+	case OTG_STATE_B_IDLE:
+	case OTG_STATE_A_VBUS_ERR:
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_WAIT_VRISE:
+		langwell_otg_del_timer(a_wait_vrise_tmr);
+		iotg->hsm.a_srp_det = 0;
+
+		/* Turn off VBus */
+		iotg->otg.set_vbus(&iotg->otg, false);
+		iotg->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		del_timer_sync(&lnw->hsm_timer);
+		if (lnw->iotg.stop_host)
+			lnw->iotg.stop_host(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev, "host driver has been removed.\n");
+
+		iotg->hsm.a_srp_det = 0;
+
+		/* Turn off VBus */
+		iotg->otg.set_vbus(&iotg->otg, false);
+		iotg->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_HOST:
+		if (lnw->iotg.stop_host)
+			lnw->iotg.stop_host(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev, "host driver has been removed.\n");
+
+		iotg->hsm.a_srp_det = 0;
+
+		/* Turn off VBus */
+		iotg->otg.set_vbus(&iotg->otg, false);
+
+		iotg->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_SUSPEND:
+		langwell_otg_del_timer(a_aidl_bdis_tmr);
+		langwell_otg_HABA(0);
+		if (lnw->iotg.stop_host)
+			lnw->iotg.stop_host(&lnw->iotg);
+		else
+			dev_dbg(lnw->dev, "host driver has been removed.\n");
+		iotg->hsm.a_srp_det = 0;
+
+		/* Turn off VBus */
+		iotg->otg.set_vbus(&iotg->otg, false);
+		iotg->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_PERIPHERAL:
+		del_timer_sync(&lnw->hsm_timer);
+
+		if (lnw->iotg.stop_peripheral)
+			lnw->iotg.stop_peripheral(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev,
+				"client driver has been removed.\n");
+		iotg->hsm.a_srp_det = 0;
+
+		/* Turn off VBus */
+		iotg->otg.set_vbus(&iotg->otg, false);
+		iotg->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_B_HOST:
+		if (lnw->iotg.stop_host)
+			lnw->iotg.stop_host(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev, "host driver has been removed.\n");
+		iotg->hsm.b_bus_req = 0;
+		iotg->otg.state = OTG_STATE_B_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (lnw->iotg.stop_peripheral)
+			lnw->iotg.stop_peripheral(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev,
+				"client driver has been removed.\n");
+		iotg->otg.state = OTG_STATE_B_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_B_WAIT_ACON:
+		/* delete hsm timer for b_ase0_brst_tmr */
+		del_timer_sync(&lnw->hsm_timer);
+
+		langwell_otg_HAAR(0);
+
+		if (lnw->iotg.stop_host)
+			lnw->iotg.stop_host(&lnw->iotg);
+		else
+			dev_dbg(&pdev->dev, "host driver has been removed.\n");
+		iotg->hsm.b_bus_req = 0;
+		iotg->otg.state = OTG_STATE_B_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	default:
+		dev_dbg(lnw->dev, "error state before suspend\n");
+		break;
+	}
+
+	return ret;
+}
+
+static void transceiver_resume(struct pci_dev *pdev)
+{
+	pci_restore_state(pdev);
+	pci_set_power_state(pdev, PCI_D0);
+}
+
+static int langwell_otg_resume(struct pci_dev *pdev)
+{
+	struct langwell_otg	*lnw = the_transceiver;
+	int			ret = 0;
+
+	transceiver_resume(pdev);
+
+	lnw->qwork = create_singlethread_workqueue("langwell_otg_queue");
+	if (!lnw->qwork) {
+		dev_dbg(&pdev->dev, "cannot create langwell otg workqueuen");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
+				driver_name, lnw) != 0) {
+		dev_dbg(&pdev->dev, "request interrupt %d failed\n", pdev->irq);
+		ret = -EBUSY;
+		goto error;
+	}
+
+	/* enable OTG interrupts */
+	langwell_otg_intr(1);
+
+	update_hsm();
+
+	langwell_update_transceiver();
+
+	return ret;
+error:
+	langwell_otg_intr(0);
+	transceiver_suspend(pdev);
+	return ret;
+}
+
+static int __init langwell_otg_init(void)
+{
+	return pci_register_driver(&otg_pci_driver);
+}
+module_init(langwell_otg_init);
+
+static void __exit langwell_otg_cleanup(void)
+{
+	pci_unregister_driver(&otg_pci_driver);
+}
+module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
new file mode 100644
index 000000000000..a6562f1d4e2b
--- /dev/null
+++ b/include/linux/usb/langwell_otg.h
@@ -0,0 +1,139 @@
+/*
+ * Intel Langwell USB OTG transceiver driver
+ * Copyright (C) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef __LANGWELL_OTG_H
+#define __LANGWELL_OTG_H
+
+#include <linux/usb/intel_mid_otg.h>
+
+#define CI_USBCMD		0x30
+#	define USBCMD_RST		BIT(1)
+#	define USBCMD_RS		BIT(0)
+#define CI_USBSTS		0x34
+#	define USBSTS_SLI		BIT(8)
+#	define USBSTS_URI		BIT(6)
+#	define USBSTS_PCI		BIT(2)
+#define CI_PORTSC1		0x74
+#	define PORTSC_PP		BIT(12)
+#	define PORTSC_LS		(BIT(11) | BIT(10))
+#	define PORTSC_SUSP		BIT(7)
+#	define PORTSC_CCS		BIT(0)
+#define CI_HOSTPC1		0xb4
+#	define HOSTPC1_PHCD		BIT(22)
+#define CI_OTGSC		0xf4
+#	define OTGSC_DPIE		BIT(30)
+#	define OTGSC_1MSE		BIT(29)
+#	define OTGSC_BSEIE		BIT(28)
+#	define OTGSC_BSVIE		BIT(27)
+#	define OTGSC_ASVIE		BIT(26)
+#	define OTGSC_AVVIE		BIT(25)
+#	define OTGSC_IDIE		BIT(24)
+#	define OTGSC_DPIS		BIT(22)
+#	define OTGSC_1MSS		BIT(21)
+#	define OTGSC_BSEIS		BIT(20)
+#	define OTGSC_BSVIS		BIT(19)
+#	define OTGSC_ASVIS		BIT(18)
+#	define OTGSC_AVVIS		BIT(17)
+#	define OTGSC_IDIS		BIT(16)
+#	define OTGSC_DPS		BIT(14)
+#	define OTGSC_1MST		BIT(13)
+#	define OTGSC_BSE		BIT(12)
+#	define OTGSC_BSV		BIT(11)
+#	define OTGSC_ASV		BIT(10)
+#	define OTGSC_AVV		BIT(9)
+#	define OTGSC_ID			BIT(8)
+#	define OTGSC_HABA		BIT(7)
+#	define OTGSC_HADP		BIT(6)
+#	define OTGSC_IDPU		BIT(5)
+#	define OTGSC_DP			BIT(4)
+#	define OTGSC_OT			BIT(3)
+#	define OTGSC_HAAR		BIT(2)
+#	define OTGSC_VC			BIT(1)
+#	define OTGSC_VD			BIT(0)
+#	define OTGSC_INTEN_MASK		(0x7f << 24)
+#	define OTGSC_INT_MASK		(0x5f << 24)
+#	define OTGSC_INTSTS_MASK	(0x7f << 16)
+#define CI_USBMODE		0xf8
+#	define USBMODE_CM		(BIT(1) | BIT(0))
+#	define USBMODE_IDLE		0
+#	define USBMODE_DEVICE		0x2
+#	define USBMODE_HOST		0x3
+#define USBCFG_ADDR			0xff10801c
+#define USBCFG_LEN			4
+#	define USBCFG_VBUSVAL		BIT(14)
+#	define USBCFG_AVALID		BIT(13)
+#	define USBCFG_BVALID		BIT(12)
+#	define USBCFG_SESEND		BIT(11)
+
+#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
+
+enum langwell_otg_timer_type {
+	TA_WAIT_VRISE_TMR,
+	TA_WAIT_BCON_TMR,
+	TA_AIDL_BDIS_TMR,
+	TB_ASE0_BRST_TMR,
+	TB_SE0_SRP_TMR,
+	TB_SRP_INIT_TMR,
+	TB_SRP_FAIL_TMR,
+	TB_BUS_SUSPEND_TMR
+};
+
+#define TA_WAIT_VRISE	100
+#define TA_WAIT_BCON	30000
+#define TA_AIDL_BDIS	15000
+#define TB_ASE0_BRST	5000
+#define TB_SE0_SRP	2
+#define TB_SRP_INIT	100
+#define TB_SRP_FAIL	5500
+#define TB_BUS_SUSPEND	500
+
+struct langwell_otg_timer {
+	unsigned long expires;	/* Number of count increase to timeout */
+	unsigned long count;	/* Tick counter */
+	void (*function)(unsigned long);	/* Timeout function */
+	unsigned long data;	/* Data passed to function */
+	struct list_head list;
+};
+
+struct langwell_otg {
+	struct intel_mid_otg_xceiv	iotg;
+	struct device			*dev;
+
+	void __iomem			*usbcfg;	/* SCCBUSB config Reg */
+
+	unsigned			region;
+	unsigned			cfg_region;
+
+	struct work_struct		work;
+	struct workqueue_struct		*qwork;
+	struct timer_list		hsm_timer;
+
+	spinlock_t			lock;
+	spinlock_t			wq_lock;
+
+	struct notifier_block		iotg_notifier;
+};
+
+static inline
+struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg)
+{
+	return container_of(iotg, struct langwell_otg, iotg);
+}
+
+#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3


From 37b5801e16d2e192fe2b20f4af33aa8c6e8786f3 Mon Sep 17 00:00:00 2001
From: Parirajan Muthalagu <parirajan.muthalagu@stericsson.com>
Date: Wed, 25 Aug 2010 16:33:26 +0530
Subject: USB Gadget: Verify VBUS current before setting the device
 self-powered bit

Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Praveena Nadahally <praveen.nadahally@stericsson.com>
Signed-off-by: Parirajan Muthalagu <parirajan.muthalagu@stericsson.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c |  8 +++++++-
 include/linux/usb/ch9.h        | 10 ++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 1160c55de7f2..eaa9a599df63 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1074,7 +1074,13 @@ static int composite_bind(struct usb_gadget *gadget)
 	cdev->bufsiz = USB_BUFSIZ;
 	cdev->driver = composite;
 
-	usb_gadget_set_selfpowered(gadget);
+	/*
+	 * As per USB compliance update, a device that is actively drawing
+	 * more than 100mA from USB must report itself as bus-powered in
+	 * the GetStatus(DEVICE) call.
+	 */
+	if (CONFIG_USB_GADGET_VBUS_DRAW <= USB_SELF_POWER_VBUS_MAX_DRAW)
+		usb_gadget_set_selfpowered(gadget);
 
 	/* interface and string IDs start at zero via kzalloc.
 	 * we force endpoints to start unassigned; few controller
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index da2ed77d3e8d..b0f7e9f57176 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -808,4 +808,14 @@ enum usb_device_state {
 	 */
 };
 
+/*-------------------------------------------------------------------------*/
+
+/*
+ * As per USB compliance update, a device that is actively drawing
+ * more than 100mA from USB must report itself as bus-powered in
+ * the GetStatus(DEVICE) call.
+ * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34
+ */
+#define USB_SELF_POWER_VBUS_MAX_DRAW		100
+
 #endif /* __LINUX_USB_CH9_H */
-- 
cgit v1.2.3


From ad1a8102f957f4d25fc58cdc10736c5ade7557e1 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Thu, 12 Aug 2010 17:43:46 +0200
Subject: USB: gadget: composite: Better string override handling

The iManufatcurer, iProduct and iSerialNumber composite module
parameters were only used when the gadget driver registers
strings for manufacturer, product and serial number.  If the
gadget never bothered to set corresponding fields in USB device
descriptors those module parameters are ignored.

This commit makes the parameters work even if the strings ID
have not been assigned.  It also changes the way IDs are
overridden -- what IDs are overridden is now saved in
usb_composite_dev structure -- which makes it unnecessary to
modify the string tables the way previous code did.

The commit also adds a iProduct and iManufatcurer fields to the
usb_composite_device structure.  If they are set, appropriate
strings are reserved and added to device descriptor.  This makes
it unnecessary for gadget drivers to maintain code for setting
those.  If iProduct is not set it defaults to
usb_composite_device::name; if iManufatcurer is not set
a default "<system> <release> with <gadget-name>" is used.

The last thing is that if needs_serial field of
usb_composite_device is set and user failed to provided
iSerialNumber parameter a warning is issued.

Signed-off-by: Michal Nazarewicz <m.nazarewicz@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/composite.c | 96 ++++++++++++++++++++++++++++--------------
 include/linux/usb/composite.h  | 13 ++++++
 2 files changed, 77 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index eaa9a599df63..717de39627c7 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/utsname.h>
 
 #include <linux/usb/composite.h>
 
@@ -69,6 +70,8 @@ static char *iSerialNumber;
 module_param(iSerialNumber, charp, 0);
 MODULE_PARM_DESC(iSerialNumber, "SerialNumber string");
 
+static char composite_manufacturer[50];
+
 /*-------------------------------------------------------------------------*/
 
 /**
@@ -599,6 +602,7 @@ static int get_string(struct usb_composite_dev *cdev,
 	struct usb_configuration	*c;
 	struct usb_function		*f;
 	int				len;
+	const char			*str;
 
 	/* Yes, not only is USB's I18N support probably more than most
 	 * folk will ever care about ... also, it's all supported here.
@@ -638,9 +642,29 @@ static int get_string(struct usb_composite_dev *cdev,
 		return s->bLength;
 	}
 
-	/* Otherwise, look up and return a specified string.  String IDs
-	 * are device-scoped, so we look up each string table we're told
-	 * about.  These lookups are infrequent; simpler-is-better here.
+	/* Otherwise, look up and return a specified string.  First
+	 * check if the string has not been overridden.
+	 */
+	if (cdev->manufacturer_override == id)
+		str = iManufacturer ?: composite->iManufacturer ?:
+			composite_manufacturer;
+	else if (cdev->product_override == id)
+		str = iProduct ?: composite->iProduct;
+	else if (cdev->serial_override == id)
+		str = iSerialNumber;
+	else
+		str = NULL;
+	if (str) {
+		struct usb_gadget_strings strings = {
+			.language = language,
+			.strings  = &(struct usb_string) { 0xff, str }
+		};
+		return usb_gadget_get_string(&strings, 0xff, buf);
+	}
+
+	/* String IDs are device-scoped, so we look up each string
+	 * table we're told about.  These lookups are infrequent;
+	 * simpler-is-better here.
 	 */
 	if (composite->strings) {
 		len = lookup_string(composite->strings, buf, language, id);
@@ -1025,26 +1049,17 @@ composite_unbind(struct usb_gadget *gadget)
 	composite = NULL;
 }
 
-static void
-string_override_one(struct usb_gadget_strings *tab, u8 id, const char *s)
+static u8 override_id(struct usb_composite_dev *cdev, u8 *desc)
 {
-	struct usb_string		*str = tab->strings;
-
-	for (str = tab->strings; str->s; str++) {
-		if (str->id == id) {
-			str->s = s;
-			return;
-		}
+	if (!*desc) {
+		int ret = usb_string_id(cdev);
+		if (unlikely(ret < 0))
+			WARNING(cdev, "failed to override string ID\n");
+		else
+			*desc = ret;
 	}
-}
 
-static void
-string_override(struct usb_gadget_strings **tab, u8 id, const char *s)
-{
-	while (*tab) {
-		string_override_one(*tab, id, s);
-		tab++;
-	}
+	return *desc;
 }
 
 static int composite_bind(struct usb_gadget *gadget)
@@ -1107,19 +1122,34 @@ static int composite_bind(struct usb_gadget *gadget)
 	cdev->desc = *composite->dev;
 	cdev->desc.bMaxPacketSize0 = gadget->ep0->maxpacket;
 
-	/* strings can't be assigned before bind() allocates the
-	 * releavnt identifiers
-	 */
-	if (cdev->desc.iManufacturer && iManufacturer)
-		string_override(composite->strings,
-			cdev->desc.iManufacturer, iManufacturer);
-	if (cdev->desc.iProduct && iProduct)
-		string_override(composite->strings,
-			cdev->desc.iProduct, iProduct);
-	if (cdev->desc.iSerialNumber && iSerialNumber)
-		string_override(composite->strings,
-			cdev->desc.iSerialNumber, iSerialNumber);
+	/* stirng overrides */
+	if (iManufacturer || !cdev->desc.iManufacturer) {
+		if (!iManufacturer && !composite->iManufacturer &&
+		    !*composite_manufacturer)
+			snprintf(composite_manufacturer,
+				 sizeof composite_manufacturer,
+				 "%s %s with %s",
+				 init_utsname()->sysname,
+				 init_utsname()->release,
+				 gadget->name);
+
+		cdev->manufacturer_override =
+			override_id(cdev, &cdev->desc.iManufacturer);
+	}
+
+	if (iProduct || (!cdev->desc.iProduct && composite->iProduct))
+		cdev->product_override =
+			override_id(cdev, &cdev->desc.iProduct);
+
+	if (iSerialNumber)
+		cdev->serial_override =
+			override_id(cdev, &cdev->desc.iSerialNumber);
+
+	/* has userspace failed to provide a serial number? */
+	if (composite->needs_serial && !cdev->desc.iSerialNumber)
+		WARNING(cdev, "userspace failed to provide iSerialNumber\n");
 
+	/* finish up */
 	status = device_create_file(&gadget->dev, &dev_attr_suspended);
 	if (status)
 		goto fail;
@@ -1217,6 +1247,8 @@ int usb_composite_register(struct usb_composite_driver *driver)
 	if (!driver || !driver->dev || !driver->bind || composite)
 		return -EINVAL;
 
+	if (!driver->iProduct)
+		driver->iProduct = driver->name;
 	if (!driver->name)
 		driver->name = "composite";
 	composite_driver.function =  (char *) driver->name;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 617068134ae8..a78e813d27e4 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -237,10 +237,17 @@ int usb_add_config(struct usb_composite_dev *,
 /**
  * struct usb_composite_driver - groups configurations into a gadget
  * @name: For diagnostics, identifies the driver.
+ * @iProduct: Used as iProduct override if @dev->iProduct is not set.
+ *	If NULL value of @name is taken.
+ * @iManufacturer: Used as iManufacturer override if @dev->iManufacturer is
+ *	not set. If NULL a default "<system> <release> with <udc>" value
+ *	will be used.
  * @dev: Template descriptor for the device, including default device
  *	identifiers.
  * @strings: tables of strings, keyed by identifiers assigned during bind()
  *	and language IDs provided in control requests
+ * @needs_serial: set to 1 if the gadget needs userspace to provide
+ * 	a serial number.  If one is not provided, warning will be printed.
  * @bind: (REQUIRED) Used to allocate resources that are shared across the
  *	whole device, such as string IDs, and add its configurations using
  *	@usb_add_config().  This may fail by returning a negative errno
@@ -266,8 +273,11 @@ int usb_add_config(struct usb_composite_dev *,
  */
 struct usb_composite_driver {
 	const char				*name;
+	const char				*iProduct;
+	const char				*iManufacturer;
 	const struct usb_device_descriptor	*dev;
 	struct usb_gadget_strings		**strings;
+	unsigned		needs_serial:1;
 
 	/* REVISIT:  bind() functions can be marked __init, which
 	 * makes trouble for section mismatch analysis.  See if
@@ -334,6 +344,9 @@ struct usb_composite_dev {
 	struct list_head		configs;
 	struct usb_composite_driver	*driver;
 	u8				next_string_id;
+	u8				manufacturer_override;
+	u8				product_override;
+	u8				serial_override;
 
 	/* the gadget driver won't enable the data pullup
 	 * while the deactivation count is nonzero.
-- 
cgit v1.2.3


From b0fca50f5a94a268ed02cfddf44448051ed9343f Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 12 Aug 2010 17:43:53 +0200
Subject: usb gadget: don't save bind callback in struct usb_gadget_driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To accomplish this the function to register a gadget driver takes the bind
function as a second argument.  To make things clearer rename the function
to resemble platform_driver_probe.

This fixes many section mismatches like

	WARNING: drivers/usb/gadget/g_printer.o(.data+0xc): Section mismatch in
	reference from the variable printer_driver to the function
	.init.text:printer_bind()
	The variable printer_driver references
	the function __init printer_bind()

All callers are fixed.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
[m.nazarewicz@samsung.com: added dbgp]
Signed-off-by: Michał Nazarewicz <m.nazarewicz@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/amd5536udc.c     |  9 +++++----
 drivers/usb/gadget/at91_udc.c       | 11 ++++++-----
 drivers/usb/gadget/atmel_usba_udc.c |  7 ++++---
 drivers/usb/gadget/ci13xxx_udc.c    | 18 ++++++++++--------
 drivers/usb/gadget/composite.c      |  3 +--
 drivers/usb/gadget/dbgp.c           |  3 +--
 drivers/usb/gadget/dummy_hcd.c      | 10 +++++-----
 drivers/usb/gadget/file_storage.c   |  3 +--
 drivers/usb/gadget/fsl_qe_udc.c     | 12 ++++++------
 drivers/usb/gadget/fsl_udc_core.c   | 10 +++++-----
 drivers/usb/gadget/gmidi.c          |  3 +--
 drivers/usb/gadget/goku_udc.c       |  9 +++++----
 drivers/usb/gadget/imx_udc.c        |  9 +++++----
 drivers/usb/gadget/inode.c          |  6 ++----
 drivers/usb/gadget/langwell_udc.c   |  9 +++++----
 drivers/usb/gadget/lh7a40x_udc.c    | 10 +++++-----
 drivers/usb/gadget/m66592-udc.c     |  9 +++++----
 drivers/usb/gadget/net2280.c        | 10 +++++-----
 drivers/usb/gadget/omap_udc.c       | 10 +++++-----
 drivers/usb/gadget/printer.c        |  5 ++---
 drivers/usb/gadget/pxa25x_udc.c     |  9 +++++----
 drivers/usb/gadget/pxa27x_udc.c     | 12 +++++++-----
 drivers/usb/gadget/r8a66597-udc.c   |  9 +++++----
 drivers/usb/gadget/s3c-hsotg.c      |  9 +++++----
 drivers/usb/gadget/s3c2410_udc.c    | 17 ++++++++---------
 drivers/usb/musb/musb_gadget.c      | 11 ++++++-----
 include/linux/usb/gadget.h          | 20 ++++++++------------
 27 files changed, 128 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index 731150d4b1d9..fadebfd53b47 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -1954,13 +1954,14 @@ static int setup_ep0(struct udc *dev)
 }
 
 /* Called by gadget driver to register itself */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct udc		*dev = udc;
 	int			retval;
 	u32 tmp;
 
-	if (!driver || !driver->bind || !driver->setup
+	if (!driver || !bind || !driver->setup
 			|| driver->speed != USB_SPEED_HIGH)
 		return -EINVAL;
 	if (!dev)
@@ -1972,7 +1973,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	dev->driver = driver;
 	dev->gadget.dev.driver = &driver->driver;
 
-	retval = driver->bind(&dev->gadget);
+	retval = bind(&dev->gadget);
 
 	/* Some gadget drivers use both ep0 directions.
 	 * NOTE: to gadget driver, ep0 is just one endpoint...
@@ -2000,7 +2001,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	return 0;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 /* shutdown requests and disconnect from gadget */
 static void
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 93ead19507b6..387e503b9d14 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -1628,7 +1628,8 @@ static void at91_vbus_timer(unsigned long data)
 		schedule_work(&udc->vbus_timer_work);
 }
 
-int usb_gadget_register_driver (struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct at91_udc	*udc = &controller;
 	int		retval;
@@ -1636,7 +1637,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 
 	if (!driver
 			|| driver->speed < USB_SPEED_FULL
-			|| !driver->bind
+			|| !bind
 			|| !driver->setup) {
 		DBG("bad parameter.\n");
 		return -EINVAL;
@@ -1653,9 +1654,9 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	udc->enabled = 1;
 	udc->selfpowered = 1;
 
-	retval = driver->bind(&udc->gadget);
+	retval = bind(&udc->gadget);
 	if (retval) {
-		DBG("driver->bind() returned %d\n", retval);
+		DBG("bind() returned %d\n", retval);
 		udc->driver = NULL;
 		udc->gadget.dev.driver = NULL;
 		dev_set_drvdata(&udc->gadget.dev, NULL);
@@ -1671,7 +1672,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	DBG("bound to %s\n", driver->driver.name);
 	return 0;
 }
-EXPORT_SYMBOL (usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver (struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index d623c7bda1f6..e4810c6a0b1f 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -1789,7 +1789,8 @@ out:
 	return IRQ_HANDLED;
 }
 
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct usba_udc *udc = &the_udc;
 	unsigned long flags;
@@ -1812,7 +1813,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	clk_enable(udc->pclk);
 	clk_enable(udc->hclk);
 
-	ret = driver->bind(&udc->gadget);
+	ret = bind(&udc->gadget);
 	if (ret) {
 		DBG(DBG_ERR, "Could not bind to driver %s: error %d\n",
 			driver->driver.name, ret);
@@ -1841,7 +1842,7 @@ err_driver_bind:
 	udc->gadget.dev.driver = NULL;
 	return ret;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/ci13xxx_udc.c b/drivers/usb/gadget/ci13xxx_udc.c
index 699695128e33..98b36fc88c77 100644
--- a/drivers/usb/gadget/ci13xxx_udc.c
+++ b/drivers/usb/gadget/ci13xxx_udc.c
@@ -2340,12 +2340,15 @@ static const struct usb_ep_ops usb_ep_ops = {
 static const struct usb_gadget_ops usb_gadget_ops;
 
 /**
- * usb_gadget_register_driver: register a gadget driver
+ * usb_gadget_probe_driver: register a gadget driver
+ * @driver: the driver being registered
+ * @bind: the driver's bind callback
  *
- * Check usb_gadget_register_driver() at "usb_gadget.h" for details
- * Interrupts are enabled here
+ * Check usb_gadget_probe_driver() at <linux/usb/gadget.h> for details.
+ * Interrupts are enabled here.
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct ci13xxx *udc = _udc;
 	unsigned long i, k, flags;
@@ -2354,7 +2357,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	trace("%p", driver);
 
 	if (driver             == NULL ||
-	    driver->bind       == NULL ||
+	    bind               == NULL ||
 	    driver->unbind     == NULL ||
 	    driver->setup      == NULL ||
 	    driver->disconnect == NULL ||
@@ -2430,7 +2433,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	udc->gadget.dev.driver = &driver->driver;
 
 	spin_unlock_irqrestore(udc->lock, flags);
-	retval = driver->bind(&udc->gadget);                /* MAY SLEEP */
+	retval = bind(&udc->gadget);                /* MAY SLEEP */
 	spin_lock_irqsave(udc->lock, flags);
 
 	if (retval) {
@@ -2447,7 +2450,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		usb_gadget_unregister_driver(driver);
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 /**
  * usb_gadget_unregister_driver: unregister a gadget driver
@@ -2462,7 +2465,6 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 	trace("%p", driver);
 
 	if (driver             == NULL ||
-	    driver->bind       == NULL ||
 	    driver->unbind     == NULL ||
 	    driver->setup      == NULL ||
 	    driver->disconnect == NULL ||
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 717de39627c7..a3009bf01229 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1213,7 +1213,6 @@ composite_resume(struct usb_gadget *gadget)
 static struct usb_gadget_driver composite_driver = {
 	.speed		= USB_SPEED_HIGH,
 
-	.bind		= composite_bind,
 	.unbind		= composite_unbind,
 
 	.setup		= composite_setup,
@@ -1255,7 +1254,7 @@ int usb_composite_register(struct usb_composite_driver *driver)
 	composite_driver.driver.name = driver->name;
 	composite = driver;
 
-	return usb_gadget_register_driver(&composite_driver);
+	return usb_gadget_probe_driver(&composite_driver, composite_bind);
 }
 
 /**
diff --git a/drivers/usb/gadget/dbgp.c b/drivers/usb/gadget/dbgp.c
index abe4a2ec5625..e5ac8a316fec 100644
--- a/drivers/usb/gadget/dbgp.c
+++ b/drivers/usb/gadget/dbgp.c
@@ -403,7 +403,6 @@ fail:
 static struct usb_gadget_driver dbgp_driver = {
 	.function = "dbgp",
 	.speed = USB_SPEED_HIGH,
-	.bind = dbgp_bind,
 	.unbind = dbgp_unbind,
 	.setup = dbgp_setup,
 	.disconnect = dbgp_disconnect,
@@ -415,7 +414,7 @@ static struct usb_gadget_driver dbgp_driver = {
 
 static int __init dbgp_init(void)
 {
-	return usb_gadget_register_driver(&dbgp_driver);
+	return usb_gadget_probe_driver(&dbgp_driver, dbgp_bind);
 }
 
 static void __exit dbgp_exit(void)
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index dc6546248ed9..7bb9d78aac27 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -748,7 +748,8 @@ static DEVICE_ATTR (function, S_IRUGO, show_function, NULL);
  */
 
 int
-usb_gadget_register_driver (struct usb_gadget_driver *driver)
+usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct dummy	*dum = the_controller;
 	int		retval, i;
@@ -757,8 +758,7 @@ usb_gadget_register_driver (struct usb_gadget_driver *driver)
 		return -EINVAL;
 	if (dum->driver)
 		return -EBUSY;
-	if (!driver->bind || !driver->setup
-			|| driver->speed == USB_SPEED_UNKNOWN)
+	if (!bind || !driver->setup || driver->speed == USB_SPEED_UNKNOWN)
 		return -EINVAL;
 
 	/*
@@ -796,7 +796,7 @@ usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	dum->gadget.dev.driver = &driver->driver;
 	dev_dbg (udc_dev(dum), "binding gadget driver '%s'\n",
 			driver->driver.name);
-	retval = driver->bind(&dum->gadget);
+	retval = bind(&dum->gadget);
 	if (retval) {
 		dum->driver = NULL;
 		dum->gadget.dev.driver = NULL;
@@ -812,7 +812,7 @@ usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	usb_hcd_poll_rh_status (dummy_to_hcd (dum));
 	return 0;
 }
-EXPORT_SYMBOL (usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int
 usb_gadget_unregister_driver (struct usb_gadget_driver *driver)
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index aae04c20b0ea..132a1c0877bd 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -3608,7 +3608,6 @@ static struct usb_gadget_driver		fsg_driver = {
 	.speed		= USB_SPEED_FULL,
 #endif
 	.function	= (char *) fsg_string_product,
-	.bind		= fsg_bind,
 	.unbind		= fsg_unbind,
 	.disconnect	= fsg_disconnect,
 	.setup		= fsg_setup,
@@ -3650,7 +3649,7 @@ static int __init fsg_init(void)
 	if ((rc = fsg_alloc()) != 0)
 		return rc;
 	fsg = the_fsg;
-	if ((rc = usb_gadget_register_driver(&fsg_driver)) != 0)
+	if ((rc = usb_gadget_probe_driver(&fsg_driver, fsg_bind)) != 0)
 		kref_put(&fsg->ref, fsg_release);
 	return rc;
 }
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index a5ea2c1d8c93..792d5ef40137 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2302,9 +2302,10 @@ static irqreturn_t qe_udc_irq(int irq, void *_udc)
 }
 
 /*-------------------------------------------------------------------------
-	Gadget driver register and unregister.
+	Gadget driver probe and unregister.
  --------------------------------------------------------------------------*/
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	int retval;
 	unsigned long flags = 0;
@@ -2315,8 +2316,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	if (!driver || (driver->speed != USB_SPEED_FULL
 			&& driver->speed != USB_SPEED_HIGH)
-			|| !driver->bind || !driver->disconnect
-			|| !driver->setup)
+			|| !bind || !driver->disconnect || !driver->setup)
 		return -EINVAL;
 
 	if (udc_controller->driver)
@@ -2332,7 +2332,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	udc_controller->gadget.speed = (enum usb_device_speed)(driver->speed);
 	spin_unlock_irqrestore(&udc_controller->lock, flags);
 
-	retval = driver->bind(&udc_controller->gadget);
+	retval = bind(&udc_controller->gadget);
 	if (retval) {
 		dev_err(udc_controller->dev, "bind to %s --> %d",
 				driver->driver.name, retval);
@@ -2353,7 +2353,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		udc_controller->gadget.name, driver->driver.name);
 	return 0;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index 08a9a62a39e3..c16b402a876b 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -1765,7 +1765,8 @@ static irqreturn_t fsl_udc_irq(int irq, void *_udc)
  * Hook to gadget drivers
  * Called by initialization code of gadget drivers
 *----------------------------------------------------------------*/
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	int retval = -ENODEV;
 	unsigned long flags = 0;
@@ -1775,8 +1776,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	if (!driver || (driver->speed != USB_SPEED_FULL
 				&& driver->speed != USB_SPEED_HIGH)
-			|| !driver->bind || !driver->disconnect
-			|| !driver->setup)
+			|| !bind || !driver->disconnect || !driver->setup)
 		return -EINVAL;
 
 	if (udc_controller->driver)
@@ -1792,7 +1792,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	spin_unlock_irqrestore(&udc_controller->lock, flags);
 
 	/* bind udc driver to gadget driver */
-	retval = driver->bind(&udc_controller->gadget);
+	retval = bind(&udc_controller->gadget);
 	if (retval) {
 		VDBG("bind to %s --> %d", driver->driver.name, retval);
 		udc_controller->gadget.dev.driver = NULL;
@@ -1814,7 +1814,7 @@ out:
 		       retval);
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 /* Disconnect from gadget driver */
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index b7bf88019b06..0ab7e141d494 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -1292,7 +1292,6 @@ static void gmidi_resume(struct usb_gadget *gadget)
 static struct usb_gadget_driver gmidi_driver = {
 	.speed		= USB_SPEED_FULL,
 	.function	= (char *)longname,
-	.bind		= gmidi_bind,
 	.unbind		= gmidi_unbind,
 
 	.setup		= gmidi_setup,
@@ -1309,7 +1308,7 @@ static struct usb_gadget_driver gmidi_driver = {
 
 static int __init gmidi_init(void)
 {
-	return usb_gadget_register_driver(&gmidi_driver);
+	return usb_gadget_probe_driver(&gmidi_driver, gmidi_bind);
 }
 module_init(gmidi_init);
 
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 1088d08c7ed8..49fbd4dbeb94 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1343,14 +1343,15 @@ static struct goku_udc	*the_controller;
  * disconnect is reported.  then a host may connect again, or
  * the driver might get unbound.
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct goku_udc	*dev = the_controller;
 	int			retval;
 
 	if (!driver
 			|| driver->speed < USB_SPEED_FULL
-			|| !driver->bind
+			|| !bind
 			|| !driver->disconnect
 			|| !driver->setup)
 		return -EINVAL;
@@ -1363,7 +1364,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	driver->driver.bus = NULL;
 	dev->driver = driver;
 	dev->gadget.dev.driver = &driver->driver;
-	retval = driver->bind(&dev->gadget);
+	retval = bind(&dev->gadget);
 	if (retval) {
 		DBG(dev, "bind to driver %s --> error %d\n",
 				driver->driver.name, retval);
@@ -1380,7 +1381,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	DBG(dev, "registered gadget driver '%s'\n", driver->driver.name);
 	return 0;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 static void
 stop_activity(struct goku_udc *dev, struct usb_gadget_driver *driver)
diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c
index e743122fcd93..ed0266462c57 100644
--- a/drivers/usb/gadget/imx_udc.c
+++ b/drivers/usb/gadget/imx_udc.c
@@ -1319,14 +1319,15 @@ static struct imx_udc_struct controller = {
  * USB gadged driver functions
  *******************************************************************************
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct imx_udc_struct *imx_usb = &controller;
 	int retval;
 
 	if (!driver
 		|| driver->speed < USB_SPEED_FULL
-		|| !driver->bind
+		|| !bind
 		|| !driver->disconnect
 		|| !driver->setup)
 			return -EINVAL;
@@ -1342,7 +1343,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	retval = device_add(&imx_usb->gadget.dev);
 	if (retval)
 		goto fail;
-	retval = driver->bind(&imx_usb->gadget);
+	retval = bind(&imx_usb->gadget);
 	if (retval) {
 		D_ERR(imx_usb->dev, "<%s> bind to driver %s --> error %d\n",
 			__func__, driver->driver.name, retval);
@@ -1362,7 +1363,7 @@ fail:
 	imx_usb->gadget.dev.driver = NULL;
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index fc35406fc80c..e2e8eda83f0d 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1775,7 +1775,6 @@ static struct usb_gadget_driver gadgetfs_driver = {
 	.speed		= USB_SPEED_FULL,
 #endif
 	.function	= (char *) driver_desc,
-	.bind		= gadgetfs_bind,
 	.unbind		= gadgetfs_unbind,
 	.setup		= gadgetfs_setup,
 	.disconnect	= gadgetfs_disconnect,
@@ -1798,7 +1797,6 @@ static int gadgetfs_probe (struct usb_gadget *gadget)
 
 static struct usb_gadget_driver probe_driver = {
 	.speed		= USB_SPEED_HIGH,
-	.bind		= gadgetfs_probe,
 	.unbind		= gadgetfs_nop,
 	.setup		= (void *)gadgetfs_nop,
 	.disconnect	= gadgetfs_nop,
@@ -1908,7 +1906,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 
 	/* triggers gadgetfs_bind(); then we can enumerate. */
 	spin_unlock_irq (&dev->lock);
-	value = usb_gadget_register_driver (&gadgetfs_driver);
+	value = usb_gadget_probe_driver(&gadgetfs_driver, gadgetfs_bind);
 	if (value != 0) {
 		kfree (dev->buf);
 		dev->buf = NULL;
@@ -2047,7 +2045,7 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent)
 		return -ESRCH;
 
 	/* fake probe to determine $CHIP */
-	(void) usb_gadget_register_driver (&probe_driver);
+	(void) usb_gadget_probe_driver(&probe_driver, gadgetfs_probe);
 	if (!CHIP)
 		return -ENODEV;
 
diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c
index ccc9c070a30d..1ef17a6dcb51 100644
--- a/drivers/usb/gadget/langwell_udc.c
+++ b/drivers/usb/gadget/langwell_udc.c
@@ -1855,7 +1855,8 @@ static DEVICE_ATTR(remote_wakeup, S_IWUSR, NULL, store_remote_wakeup);
  * the driver might get unbound.
  */
 
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct langwell_udc	*dev = the_controller;
 	unsigned long		flags;
@@ -1878,7 +1879,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	retval = driver->bind(&dev->gadget);
+	retval = bind(&dev->gadget);
 	if (retval) {
 		dev_dbg(&dev->pdev->dev, "bind to driver %s --> %d\n",
 				driver->driver.name, retval);
@@ -1916,7 +1917,7 @@ err_unbind:
 	dev_dbg(&dev->pdev->dev, "<--- %s()\n", __func__);
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 
 /* unregister gadget driver */
@@ -1930,7 +1931,7 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 
 	dev_dbg(&dev->pdev->dev, "---> %s()\n", __func__);
 
-	if (unlikely(!driver || !driver->bind || !driver->unbind))
+	if (unlikely(!driver || !driver->unbind))
 		return -EINVAL;
 
 	/* exit PHY low power suspend */
diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c
index fded3fca793b..6b58bd8ce623 100644
--- a/drivers/usb/gadget/lh7a40x_udc.c
+++ b/drivers/usb/gadget/lh7a40x_udc.c
@@ -408,7 +408,8 @@ static void udc_enable(struct lh7a40x_udc *dev)
 /*
   Register entry point for the peripheral controller driver.
 */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct lh7a40x_udc *dev = the_controller;
 	int retval;
@@ -417,7 +418,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	if (!driver
 			|| driver->speed != USB_SPEED_FULL
-			|| !driver->bind
+			|| !bind
 			|| !driver->disconnect
 			|| !driver->setup)
 		return -EINVAL;
@@ -431,7 +432,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	dev->gadget.dev.driver = &driver->driver;
 
 	device_add(&dev->gadget.dev);
-	retval = driver->bind(&dev->gadget);
+	retval = bind(&dev->gadget);
 	if (retval) {
 		printk(KERN_WARNING "%s: bind to driver %s --> error %d\n",
 		       dev->gadget.name, driver->driver.name, retval);
@@ -453,8 +454,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	return 0;
 }
-
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 /*
   Unregister entry point for the peripheral controller driver.
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index e03058fe23cb..51b19f3027e7 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -1454,14 +1454,15 @@ static struct usb_ep_ops m66592_ep_ops = {
 /*-------------------------------------------------------------------------*/
 static struct m66592 *the_controller;
 
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct m66592 *m66592 = the_controller;
 	int retval;
 
 	if (!driver
 			|| driver->speed != USB_SPEED_HIGH
-			|| !driver->bind
+			|| !bind
 			|| !driver->setup)
 		return -EINVAL;
 	if (!m66592)
@@ -1480,7 +1481,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		goto error;
 	}
 
-	retval = driver->bind (&m66592->gadget);
+	retval = bind(&m66592->gadget);
 	if (retval) {
 		pr_err("bind to driver error (%d)\n", retval);
 		device_del(&m66592->gadget.dev);
@@ -1505,7 +1506,7 @@ error:
 
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 9498be87a724..d09155b25d73 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1929,7 +1929,8 @@ static void ep0_start (struct net2280 *dev)
  * disconnect is reported.  then a host may connect again, or
  * the driver might get unbound.
  */
-int usb_gadget_register_driver (struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct net2280		*dev = the_controller;
 	int			retval;
@@ -1941,8 +1942,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	 */
 	if (!driver
 			|| driver->speed != USB_SPEED_HIGH
-			|| !driver->bind
-			|| !driver->setup)
+			|| !bind || !driver->setup)
 		return -EINVAL;
 	if (!dev)
 		return -ENODEV;
@@ -1957,7 +1957,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	driver->driver.bus = NULL;
 	dev->driver = driver;
 	dev->gadget.dev.driver = &driver->driver;
-	retval = driver->bind (&dev->gadget);
+	retval = bind(&dev->gadget);
 	if (retval) {
 		DEBUG (dev, "bind to driver %s --> %d\n",
 				driver->driver.name, retval);
@@ -1993,7 +1993,7 @@ err_unbind:
 	dev->driver = NULL;
 	return retval;
 }
-EXPORT_SYMBOL (usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 static void
 stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index f81e4f025f23..61d3ca6619bb 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -2102,7 +2102,8 @@ static inline int machine_without_vbus_sense(void)
 		);
 }
 
-int usb_gadget_register_driver (struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	int		status = -ENODEV;
 	struct omap_ep	*ep;
@@ -2114,8 +2115,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	if (!driver
 			// FIXME if otg, check:  driver->is_otg
 			|| driver->speed < USB_SPEED_FULL
-			|| !driver->bind
-			|| !driver->setup)
+			|| !bind || !driver->setup)
 		return -EINVAL;
 
 	spin_lock_irqsave(&udc->lock, flags);
@@ -2145,7 +2145,7 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
 	if (udc->dc_clk != NULL)
 		omap_udc_enable_clock(1);
 
-	status = driver->bind (&udc->gadget);
+	status = bind(&udc->gadget);
 	if (status) {
 		DBG("bind to %s --> %d\n", driver->driver.name, status);
 		udc->gadget.dev.driver = NULL;
@@ -2186,7 +2186,7 @@ done:
 		omap_udc_enable_clock(0);
 	return status;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver (struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 04c7bbf121af..ded080a1c8ce 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -1543,7 +1543,6 @@ static struct usb_gadget_driver printer_driver = {
 	.speed		= DEVSPEED,
 
 	.function	= (char *) driver_desc,
-	.bind		= printer_bind,
 	.unbind		= printer_unbind,
 
 	.setup		= printer_setup,
@@ -1579,11 +1578,11 @@ init(void)
 		return status;
 	}
 
-	status = usb_gadget_register_driver(&printer_driver);
+	status = usb_gadget_probe_driver(&printer_driver, printer_bind);
 	if (status) {
 		class_destroy(usb_gadget_class);
 		unregister_chrdev_region(g_printer_devno, 1);
-		DBG(dev, "usb_gadget_register_driver %x\n", status);
+		DBG(dev, "usb_gadget_probe_driver %x\n", status);
 	}
 
 	return status;
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index be5fb34d9602..b37f92cb71bc 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -1280,14 +1280,15 @@ static void udc_enable (struct pxa25x_udc *dev)
  * disconnect is reported.  then a host may connect again, or
  * the driver might get unbound.
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct pxa25x_udc	*dev = the_controller;
 	int			retval;
 
 	if (!driver
 			|| driver->speed < USB_SPEED_FULL
-			|| !driver->bind
+			|| !bind
 			|| !driver->disconnect
 			|| !driver->setup)
 		return -EINVAL;
@@ -1308,7 +1309,7 @@ fail:
 		dev->gadget.dev.driver = NULL;
 		return retval;
 	}
-	retval = driver->bind(&dev->gadget);
+	retval = bind(&dev->gadget);
 	if (retval) {
 		DMSG("bind to driver %s --> error %d\n",
 				driver->driver.name, retval);
@@ -1338,7 +1339,7 @@ fail:
 bind_fail:
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 static void
 stop_activity(struct pxa25x_udc *dev, struct usb_gadget_driver *driver)
diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c
index 980762453a9c..027d66f81620 100644
--- a/drivers/usb/gadget/pxa27x_udc.c
+++ b/drivers/usb/gadget/pxa27x_udc.c
@@ -1792,8 +1792,9 @@ static void udc_enable(struct pxa_udc *udc)
 }
 
 /**
- * usb_gadget_register_driver - Register gadget driver
+ * usb_gadget_probe_driver - Register gadget driver
  * @driver: gadget driver
+ * @bind: bind function
  *
  * When a driver is successfully registered, it will receive control requests
  * including set_configuration(), which enables non-control requests.  Then
@@ -1805,12 +1806,13 @@ static void udc_enable(struct pxa_udc *udc)
  *
  * Returns 0 if no error, -EINVAL, -ENODEV, -EBUSY otherwise
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct pxa_udc *udc = the_controller;
 	int retval;
 
-	if (!driver || driver->speed < USB_SPEED_FULL || !driver->bind
+	if (!driver || driver->speed < USB_SPEED_FULL || !bind
 			|| !driver->disconnect || !driver->setup)
 		return -EINVAL;
 	if (!udc)
@@ -1828,7 +1830,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		dev_err(udc->dev, "device_add error %d\n", retval);
 		goto add_fail;
 	}
-	retval = driver->bind(&udc->gadget);
+	retval = bind(&udc->gadget);
 	if (retval) {
 		dev_err(udc->dev, "bind to driver %s --> error %d\n",
 			driver->driver.name, retval);
@@ -1859,7 +1861,7 @@ add_fail:
 	udc->gadget.dev.driver = NULL;
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 
 /**
diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c
index 2456ccd9965e..95092151f901 100644
--- a/drivers/usb/gadget/r8a66597-udc.c
+++ b/drivers/usb/gadget/r8a66597-udc.c
@@ -1405,14 +1405,15 @@ static struct usb_ep_ops r8a66597_ep_ops = {
 /*-------------------------------------------------------------------------*/
 static struct r8a66597 *the_controller;
 
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct r8a66597 *r8a66597 = the_controller;
 	int retval;
 
 	if (!driver
 			|| driver->speed != USB_SPEED_HIGH
-			|| !driver->bind
+			|| !bind
 			|| !driver->setup)
 		return -EINVAL;
 	if (!r8a66597)
@@ -1431,7 +1432,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		goto error;
 	}
 
-	retval = driver->bind(&r8a66597->gadget);
+	retval = bind(&r8a66597->gadget);
 	if (retval) {
 		printk(KERN_ERR "bind to driver error (%d)\n", retval);
 		device_del(&r8a66597->gadget.dev);
@@ -1456,7 +1457,7 @@ error:
 
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c
index a229744a8c7d..ef825c3baed9 100644
--- a/drivers/usb/gadget/s3c-hsotg.c
+++ b/drivers/usb/gadget/s3c-hsotg.c
@@ -2523,7 +2523,8 @@ static int s3c_hsotg_corereset(struct s3c_hsotg *hsotg)
 	return 0;
 }
 
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct s3c_hsotg *hsotg = our_hsotg;
 	int ret;
@@ -2543,7 +2544,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		dev_err(hsotg->dev, "%s: bad speed\n", __func__);
 	}
 
-	if (!driver->bind || !driver->setup) {
+	if (!bind || !driver->setup) {
 		dev_err(hsotg->dev, "%s: missing entry points\n", __func__);
 		return -EINVAL;
 	}
@@ -2562,7 +2563,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 		goto err;
 	}
 
-	ret = driver->bind(&hsotg->gadget);
+	ret = bind(&hsotg->gadget);
 	if (ret) {
 		dev_err(hsotg->dev, "failed bind %s\n", driver->driver.name);
 
@@ -2687,7 +2688,7 @@ err:
 	hsotg->gadget.dev.driver = NULL;
 	return ret;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index ea2b3c7ebee5..c2448950a8d8 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -1632,15 +1632,15 @@ static void s3c2410_udc_enable(struct s3c2410_udc *dev)
 }
 
 /*
- *	usb_gadget_register_driver
+ *	usb_gadget_probe_driver
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	struct s3c2410_udc *udc = the_controller;
 	int		retval;
 
-	dprintk(DEBUG_NORMAL, "usb_gadget_register_driver() '%s'\n",
-		driver->driver.name);
+	dprintk(DEBUG_NORMAL, "%s() '%s'\n", __func__, driver->driver.name);
 
 	/* Sanity checks */
 	if (!udc)
@@ -1649,10 +1649,9 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	if (udc->driver)
 		return -EBUSY;
 
-	if (!driver->bind || !driver->setup
-			|| driver->speed < USB_SPEED_FULL) {
+	if (!bind || !driver->setup || driver->speed < USB_SPEED_FULL) {
 		printk(KERN_ERR "Invalid driver: bind %p setup %p speed %d\n",
-			driver->bind, driver->setup, driver->speed);
+			bind, driver->setup, driver->speed);
 		return -EINVAL;
 	}
 #if defined(MODULE)
@@ -1675,7 +1674,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	dprintk(DEBUG_NORMAL, "binding gadget driver '%s'\n",
 		driver->driver.name);
 
-	if ((retval = driver->bind (&udc->gadget)) != 0) {
+	if ((retval = bind(&udc->gadget)) != 0) {
 		device_del(&udc->gadget.dev);
 		goto register_error;
 	}
@@ -1690,6 +1689,7 @@ register_error:
 	udc->gadget.dev.driver = NULL;
 	return retval;
 }
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 /*
  *	usb_gadget_unregister_driver
@@ -2049,7 +2049,6 @@ static void __exit udc_exit(void)
 }
 
 EXPORT_SYMBOL(usb_gadget_unregister_driver);
-EXPORT_SYMBOL(usb_gadget_register_driver);
 
 module_init(udc_init);
 module_exit(udc_exit);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index d065e23f123e..ecd5f8cffcbf 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1699,9 +1699,11 @@ void musb_gadget_cleanup(struct musb *musb)
  * -ENOMEM no memeory to perform the operation
  *
  * @param driver the gadget driver
+ * @param bind the driver's bind function
  * @return <0 if error, 0 if everything is fine
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *))
 {
 	int retval;
 	unsigned long flags;
@@ -1709,8 +1711,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	if (!driver
 			|| driver->speed != USB_SPEED_HIGH
-			|| !driver->bind
-			|| !driver->setup)
+			|| !bind || !driver->setup)
 		return -EINVAL;
 
 	/* driver must be initialized to support peripheral mode */
@@ -1738,7 +1739,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	if (retval == 0) {
-		retval = driver->bind(&musb->g);
+		retval = bind(&musb->g);
 		if (retval != 0) {
 			DBG(3, "bind to driver %s failed --> %d\n",
 					driver->driver.name, retval);
@@ -1786,7 +1787,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
 
 	return retval;
 }
-EXPORT_SYMBOL(usb_gadget_register_driver);
+EXPORT_SYMBOL(usb_gadget_probe_driver);
 
 static void stop_activity(struct musb *musb, struct usb_gadget_driver *driver)
 {
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index d3ef42d7d2f0..006412ce2303 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -705,11 +705,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
  * struct usb_gadget_driver - driver for usb 'slave' devices
  * @function: String describing the gadget's function
  * @speed: Highest speed the driver handles.
- * @bind: Invoked when the driver is bound to a gadget, usually
- *	after registering the driver.
- *	At that point, ep0 is fully initialized, and ep_list holds
- *	the currently-available endpoints.
- *	Called in a context that permits sleeping.
  * @setup: Invoked for ep0 control requests that aren't handled by
  *	the hardware level driver. Most calls must be handled by
  *	the gadget driver, including descriptor and configuration
@@ -774,7 +769,6 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
 struct usb_gadget_driver {
 	char			*function;
 	enum usb_device_speed	speed;
-	int			(*bind)(struct usb_gadget *);
 	void			(*unbind)(struct usb_gadget *);
 	int			(*setup)(struct usb_gadget *,
 					const struct usb_ctrlrequest *);
@@ -798,17 +792,19 @@ struct usb_gadget_driver {
  */
 
 /**
- * usb_gadget_register_driver - register a gadget driver
- * @driver:the driver being registered
+ * usb_gadget_probe_driver - probe a gadget driver
+ * @driver: the driver being registered
+ * @bind: the driver's bind callback
  * Context: can sleep
  *
  * Call this in your gadget driver's module initialization function,
  * to tell the underlying usb controller driver about your driver.
- * The driver's bind() function will be called to bind it to a
- * gadget before this registration call returns.  It's expected that
- * the bind() functions will be in init sections.
+ * The @bind() function will be called to bind it to a gadget before this
+ * registration call returns.  It's expected that the @bind() function will
+ * be in init sections.
  */
-int usb_gadget_register_driver(struct usb_gadget_driver *driver);
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver,
+		int (*bind)(struct usb_gadget *));
 
 /**
  * usb_gadget_unregister_driver - unregister a gadget driver
-- 
cgit v1.2.3


From 07a18bd716ed5dea336429404b132568cfaaef95 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Thu, 12 Aug 2010 17:43:54 +0200
Subject: usb gadget: don't save bind callback in struct usb_composite_driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bind function is most of the time only called at init time so there
is no need to save a pointer to it in the composite driver structure.

This fixes many section mismatches reported by modpost.

Signed-off-by: Michał Nazarewicz <m.nazarewicz@samsung.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/audio.c        |  3 +--
 drivers/usb/gadget/cdc2.c         |  3 +--
 drivers/usb/gadget/composite.c    | 15 +++++++++++----
 drivers/usb/gadget/ether.c        |  3 +--
 drivers/usb/gadget/g_ffs.c        |  3 +--
 drivers/usb/gadget/hid.c          |  3 +--
 drivers/usb/gadget/mass_storage.c |  3 +--
 drivers/usb/gadget/multi.c        |  3 +--
 drivers/usb/gadget/nokia.c        |  3 +--
 drivers/usb/gadget/serial.c       |  3 +--
 drivers/usb/gadget/webcam.c       |  3 +--
 drivers/usb/gadget/zero.c         |  3 +--
 include/linux/usb/composite.h     | 19 +++++--------------
 13 files changed, 27 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/audio.c b/drivers/usb/gadget/audio.c
index a62af7b59094..5a65fbb4e20b 100644
--- a/drivers/usb/gadget/audio.c
+++ b/drivers/usb/gadget/audio.c
@@ -166,13 +166,12 @@ static struct usb_composite_driver audio_driver = {
 	.name		= "g_audio",
 	.dev		= &device_desc,
 	.strings	= audio_strings,
-	.bind		= audio_bind,
 	.unbind		= __exit_p(audio_unbind),
 };
 
 static int __init init(void)
 {
-	return usb_composite_register(&audio_driver);
+	return usb_composite_probe(&audio_driver, audio_bind);
 }
 module_init(init);
 
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index 928137d3dbdc..1f2a9b1e4f2d 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -245,7 +245,6 @@ static struct usb_composite_driver cdc_driver = {
 	.name		= "g_cdc",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= cdc_bind,
 	.unbind		= __exit_p(cdc_unbind),
 };
 
@@ -255,7 +254,7 @@ MODULE_LICENSE("GPL");
 
 static int __init init(void)
 {
-	return usb_composite_register(&cdc_driver);
+	return usb_composite_probe(&cdc_driver, cdc_bind);
 }
 module_init(init);
 
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index a3009bf01229..c531a7e05f1e 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -40,6 +40,7 @@
 #define USB_BUFSIZ	1024
 
 static struct usb_composite_driver *composite;
+static int (*composite_gadget_bind)(struct usb_composite_dev *cdev);
 
 /* Some systems will need runtime overrides for the  product identifers
  * published in the device descriptor, either numbers or strings or both.
@@ -1115,7 +1116,7 @@ static int composite_bind(struct usb_gadget *gadget)
 	 * serial number), register function drivers, potentially update
 	 * power state and consumption, etc
 	 */
-	status = composite->bind(cdev);
+	status = composite_gadget_bind(cdev);
 	if (status < 0)
 		goto fail;
 
@@ -1227,8 +1228,12 @@ static struct usb_gadget_driver composite_driver = {
 };
 
 /**
- * usb_composite_register() - register a composite driver
+ * usb_composite_probe() - register a composite driver
  * @driver: the driver to register
+ * @bind: the callback used to allocate resources that are shared across the
+ *	whole device, such as string IDs, and add its configurations using
+ *	@usb_add_config().  This may fail by returning a negative errno
+ *	value; it should return zero on successful initialization.
  * Context: single threaded during gadget setup
  *
  * This function is used to register drivers using the composite driver
@@ -1241,9 +1246,10 @@ static struct usb_gadget_driver composite_driver = {
  * while it was binding.  That would usually be done in order to wait for
  * some userspace participation.
  */
-int usb_composite_register(struct usb_composite_driver *driver)
+extern int usb_composite_probe(struct usb_composite_driver *driver,
+			       int (*bind)(struct usb_composite_dev *cdev))
 {
-	if (!driver || !driver->dev || !driver->bind || composite)
+	if (!driver || !driver->dev || !bind || composite)
 		return -EINVAL;
 
 	if (!driver->iProduct)
@@ -1253,6 +1259,7 @@ int usb_composite_register(struct usb_composite_driver *driver)
 	composite_driver.function =  (char *) driver->name;
 	composite_driver.driver.name = driver->name;
 	composite = driver;
+	composite_gadget_bind = bind;
 
 	return usb_gadget_probe_driver(&composite_driver, composite_bind);
 }
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 400f80372d93..33076bca9936 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -402,7 +402,6 @@ static struct usb_composite_driver eth_driver = {
 	.name		= "g_ether",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= eth_bind,
 	.unbind		= __exit_p(eth_unbind),
 };
 
@@ -412,7 +411,7 @@ MODULE_LICENSE("GPL");
 
 static int __init init(void)
 {
-	return usb_composite_register(&eth_driver);
+	return usb_composite_probe(&eth_driver, eth_bind);
 }
 module_init(init);
 
diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c
index 52fd3fa0d130..9fcb15879506 100644
--- a/drivers/usb/gadget/g_ffs.c
+++ b/drivers/usb/gadget/g_ffs.c
@@ -147,7 +147,6 @@ static struct usb_composite_driver gfs_driver = {
 	.name		= DRIVER_NAME,
 	.dev		= &gfs_dev_desc,
 	.strings	= gfs_dev_strings,
-	.bind		= gfs_bind,
 	.unbind		= gfs_unbind,
 	.iProduct	= DRIVER_DESC,
 };
@@ -187,7 +186,7 @@ static int functionfs_ready_callback(struct ffs_data *ffs)
 		return -EBUSY;
 
 	gfs_ffs_data = ffs;
-	ret = usb_composite_register(&gfs_driver);
+	ret = usb_composite_probe(&gfs_driver, gfs_bind);
 	if (unlikely(ret < 0))
 		clear_bit(0, &gfs_registered);
 	return ret;
diff --git a/drivers/usb/gadget/hid.c b/drivers/usb/gadget/hid.c
index 775722686ed8..77f495212fb9 100644
--- a/drivers/usb/gadget/hid.c
+++ b/drivers/usb/gadget/hid.c
@@ -256,7 +256,6 @@ static struct usb_composite_driver hidg_driver = {
 	.name		= "g_hid",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= hid_bind,
 	.unbind		= __exit_p(hid_unbind),
 };
 
@@ -282,7 +281,7 @@ static int __init hidg_init(void)
 	if (status < 0)
 		return status;
 
-	status = usb_composite_register(&hidg_driver);
+	status = usb_composite_probe(&hidg_driver, hid_bind);
 	if (status < 0)
 		platform_driver_unregister(&hidg_plat_driver);
 
diff --git a/drivers/usb/gadget/mass_storage.c b/drivers/usb/gadget/mass_storage.c
index 05e9bd330348..a5e4a777d922 100644
--- a/drivers/usb/gadget/mass_storage.c
+++ b/drivers/usb/gadget/mass_storage.c
@@ -169,7 +169,6 @@ static int __init msg_bind(struct usb_composite_dev *cdev)
 static struct usb_composite_driver msg_driver = {
 	.name		= "g_mass_storage",
 	.dev		= &msg_device_desc,
-	.bind		= msg_bind,
 	.iProduct	= DRIVER_DESC,
 	.needs_serial	= 1,
 };
@@ -180,7 +179,7 @@ MODULE_LICENSE("GPL");
 
 static int __init msg_init(void)
 {
-	return usb_composite_register(&msg_driver);
+	return usb_composite_probe(&msg_driver, msg_bind);
 }
 module_init(msg_init);
 
diff --git a/drivers/usb/gadget/multi.c b/drivers/usb/gadget/multi.c
index ca51661d71db..91170a02a9a3 100644
--- a/drivers/usb/gadget/multi.c
+++ b/drivers/usb/gadget/multi.c
@@ -353,7 +353,6 @@ static struct usb_composite_driver multi_driver = {
 	.name		= "g_multi",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= multi_bind,
 	.unbind		= __exit_p(multi_unbind),
 	.iProduct	= DRIVER_DESC,
 	.needs_serial	= 1,
@@ -362,7 +361,7 @@ static struct usb_composite_driver multi_driver = {
 
 static int __init multi_init(void)
 {
-	return usb_composite_register(&multi_driver);
+	return usb_composite_probe(&multi_driver, multi_bind);
 }
 module_init(multi_init);
 
diff --git a/drivers/usb/gadget/nokia.c b/drivers/usb/gadget/nokia.c
index 7d6b66a85724..8aec728882a0 100644
--- a/drivers/usb/gadget/nokia.c
+++ b/drivers/usb/gadget/nokia.c
@@ -241,13 +241,12 @@ static struct usb_composite_driver nokia_driver = {
 	.name		= "g_nokia",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= nokia_bind,
 	.unbind		= __exit_p(nokia_unbind),
 };
 
 static int __init nokia_init(void)
 {
-	return usb_composite_register(&nokia_driver);
+	return usb_composite_probe(&nokia_driver, nokia_bind);
 }
 module_init(nokia_init);
 
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index f46a60962dab..0b81d7b741d1 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -242,7 +242,6 @@ static struct usb_composite_driver gserial_driver = {
 	.name		= "g_serial",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= gs_bind,
 };
 
 static int __init init(void)
@@ -271,7 +270,7 @@ static int __init init(void)
 	}
 	strings_dev[STRING_DESCRIPTION_IDX].s = serial_config_driver.label;
 
-	return usb_composite_register(&gserial_driver);
+	return usb_composite_probe(&gserial_driver, gs_bind);
 }
 module_init(init);
 
diff --git a/drivers/usb/gadget/webcam.c b/drivers/usb/gadget/webcam.c
index 288d21155abe..de65b8078e05 100644
--- a/drivers/usb/gadget/webcam.c
+++ b/drivers/usb/gadget/webcam.c
@@ -373,14 +373,13 @@ static struct usb_composite_driver webcam_driver = {
 	.name		= "g_webcam",
 	.dev		= &webcam_device_descriptor,
 	.strings	= webcam_device_strings,
-	.bind		= webcam_bind,
 	.unbind		= webcam_unbind,
 };
 
 static int __init
 webcam_init(void)
 {
-	return usb_composite_register(&webcam_driver);
+	return usb_composite_probe(&webcam_driver, webcam_bind);
 }
 
 static void __exit
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 807280d069f9..6d16db9d9d2d 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -340,7 +340,6 @@ static struct usb_composite_driver zero_driver = {
 	.name		= "zero",
 	.dev		= &device_desc,
 	.strings	= dev_strings,
-	.bind		= zero_bind,
 	.unbind		= zero_unbind,
 	.suspend	= zero_suspend,
 	.resume		= zero_resume,
@@ -351,7 +350,7 @@ MODULE_LICENSE("GPL");
 
 static int __init init(void)
 {
-	return usb_composite_register(&zero_driver);
+	return usb_composite_probe(&zero_driver, zero_bind);
 }
 module_init(init);
 
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index a78e813d27e4..e28b6626802c 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -248,11 +248,7 @@ int usb_add_config(struct usb_composite_dev *,
  *	and language IDs provided in control requests
  * @needs_serial: set to 1 if the gadget needs userspace to provide
  * 	a serial number.  If one is not provided, warning will be printed.
- * @bind: (REQUIRED) Used to allocate resources that are shared across the
- *	whole device, such as string IDs, and add its configurations using
- *	@usb_add_config().  This may fail by returning a negative errno
- *	value; it should return zero on successful initialization.
- * @unbind: Reverses @bind(); called as a side effect of unregistering
+ * @unbind: Reverses bind; called as a side effect of unregistering
  *	this driver.
  * @disconnect: optional driver disconnect method
  * @suspend: Notifies when the host stops sending USB traffic,
@@ -263,7 +259,7 @@ int usb_add_config(struct usb_composite_dev *,
  * Devices default to reporting self powered operation.  Devices which rely
  * on bus powered operation should report this in their @bind() method.
  *
- * Before returning from @bind, various fields in the template descriptor
+ * Before returning from bind, various fields in the template descriptor
  * may be overridden.  These include the idVendor/idProduct/bcdDevice values
  * normally to bind the appropriate host side driver, and the three strings
  * (iManufacturer, iProduct, iSerialNumber) normally used to provide user
@@ -279,12 +275,6 @@ struct usb_composite_driver {
 	struct usb_gadget_strings		**strings;
 	unsigned		needs_serial:1;
 
-	/* REVISIT:  bind() functions can be marked __init, which
-	 * makes trouble for section mismatch analysis.  See if
-	 * we can't restructure things to avoid mismatching...
-	 */
-
-	int			(*bind)(struct usb_composite_dev *);
 	int			(*unbind)(struct usb_composite_dev *);
 
 	void			(*disconnect)(struct usb_composite_dev *);
@@ -294,8 +284,9 @@ struct usb_composite_driver {
 	void			(*resume)(struct usb_composite_dev *);
 };
 
-extern int usb_composite_register(struct usb_composite_driver *);
-extern void usb_composite_unregister(struct usb_composite_driver *);
+extern int usb_composite_probe(struct usb_composite_driver *driver,
+			       int (*bind)(struct usb_composite_dev *cdev));
+extern void usb_composite_unregister(struct usb_composite_driver *driver);
 
 
 /**
-- 
cgit v1.2.3


From c9bfff9c98671ad50e4abbfe1ab606a9957f7539 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 12 Aug 2010 17:43:55 +0200
Subject: usb gadget: don't save bind callback in struct usb_configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bind function is most of the time only called at init time so there
is no need to save a pointer to it in the configuration structure.

This fixes many section mismatches reported by modpost.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
[m.nazarewicz@samsung.com: updated for -next]
Signed-off-by: Michał Nazarewicz <m.nazarewicz@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/audio.c        |  3 +--
 drivers/usb/gadget/cdc2.c         |  3 +--
 drivers/usb/gadget/composite.c    | 14 ++++++++------
 drivers/usb/gadget/ether.c        |  7 +++----
 drivers/usb/gadget/f_loopback.c   |  3 +--
 drivers/usb/gadget/f_sourcesink.c |  3 +--
 drivers/usb/gadget/g_ffs.c        |  3 +--
 drivers/usb/gadget/hid.c          |  3 +--
 drivers/usb/gadget/mass_storage.c |  3 +--
 drivers/usb/gadget/multi.c        | 10 ++++------
 drivers/usb/gadget/nokia.c        |  8 ++++----
 drivers/usb/gadget/serial.c       |  4 ++--
 drivers/usb/gadget/webcam.c       |  4 ++--
 include/linux/usb/composite.h     |  8 +++-----
 14 files changed, 33 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/audio.c b/drivers/usb/gadget/audio.c
index 5a65fbb4e20b..93b999e49ef3 100644
--- a/drivers/usb/gadget/audio.c
+++ b/drivers/usb/gadget/audio.c
@@ -105,7 +105,6 @@ static int __init audio_do_config(struct usb_configuration *c)
 
 static struct usb_configuration audio_config_driver = {
 	.label			= DRIVER_DESC,
-	.bind			= audio_do_config,
 	.bConfigurationValue	= 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -145,7 +144,7 @@ static int __init audio_bind(struct usb_composite_dev *cdev)
 	strings_dev[STRING_PRODUCT_IDX].id = status;
 	device_desc.iProduct = status;
 
-	status = usb_add_config(cdev, &audio_config_driver);
+	status = usb_add_config(cdev, &audio_config_driver, audio_do_config);
 	if (status < 0)
 		goto fail;
 
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index 1f2a9b1e4f2d..2720ab07ef1a 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -151,7 +151,6 @@ static int __init cdc_do_config(struct usb_configuration *c)
 
 static struct usb_configuration cdc_config_driver = {
 	.label			= "CDC Composite (ECM + ACM)",
-	.bind			= cdc_do_config,
 	.bConfigurationValue	= 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -218,7 +217,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
 	device_desc.iProduct = status;
 
 	/* register our configuration */
-	status = usb_add_config(cdev, &cdc_config_driver);
+	status = usb_add_config(cdev, &cdc_config_driver, cdc_do_config);
 	if (status < 0)
 		goto fail1;
 
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index c531a7e05f1e..5e2bd7428424 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -474,18 +474,20 @@ done:
  * usb_add_config() - add a configuration to a device.
  * @cdev: wraps the USB gadget
  * @config: the configuration, with bConfigurationValue assigned
+ * @bind: the configuration's bind function
  * Context: single threaded during gadget setup
  *
- * One of the main tasks of a composite driver's bind() routine is to
+ * One of the main tasks of a composite @bind() routine is to
  * add each of the configurations it supports, using this routine.
  *
- * This function returns the value of the configuration's bind(), which
+ * This function returns the value of the configuration's @bind(), which
  * is zero for success else a negative errno value.  Binding configurations
  * assigns global resources including string IDs, and per-configuration
  * resources such as interface IDs and endpoints.
  */
 int usb_add_config(struct usb_composite_dev *cdev,
-		struct usb_configuration *config)
+		struct usb_configuration *config,
+		int (*bind)(struct usb_configuration *))
 {
 	int				status = -EINVAL;
 	struct usb_configuration	*c;
@@ -494,7 +496,7 @@ int usb_add_config(struct usb_composite_dev *cdev,
 			config->bConfigurationValue,
 			config->label, config);
 
-	if (!config->bConfigurationValue || !config->bind)
+	if (!config->bConfigurationValue || !bind)
 		goto done;
 
 	/* Prevent duplicate configuration identifiers */
@@ -511,7 +513,7 @@ int usb_add_config(struct usb_composite_dev *cdev,
 	INIT_LIST_HEAD(&config->functions);
 	config->next_interface_id = 0;
 
-	status = config->bind(config);
+	status = bind(config);
 	if (status < 0) {
 		list_del(&config->list);
 		config->cdev = NULL;
@@ -537,7 +539,7 @@ int usb_add_config(struct usb_composite_dev *cdev,
 		}
 	}
 
-	/* set_alt(), or next config->bind(), sets up
+	/* set_alt(), or next bind(), sets up
 	 * ep->driver_data as needed.
 	 */
 	usb_ep_autoconfig_reset(cdev->gadget);
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 33076bca9936..1690c9d68256 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -251,7 +251,6 @@ static int __init rndis_do_config(struct usb_configuration *c)
 
 static struct usb_configuration rndis_config_driver = {
 	.label			= "RNDIS",
-	.bind			= rndis_do_config,
 	.bConfigurationValue	= 2,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -289,7 +288,6 @@ static int __init eth_do_config(struct usb_configuration *c)
 
 static struct usb_configuration eth_config_driver = {
 	/* .label = f(hardware) */
-	.bind			= eth_do_config,
 	.bConfigurationValue	= 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -373,12 +371,13 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
 
 	/* register our configuration(s); RNDIS first, if it's used */
 	if (has_rndis()) {
-		status = usb_add_config(cdev, &rndis_config_driver);
+		status = usb_add_config(cdev, &rndis_config_driver,
+				rndis_do_config);
 		if (status < 0)
 			goto fail;
 	}
 
-	status = usb_add_config(cdev, &eth_config_driver);
+	status = usb_add_config(cdev, &eth_config_driver, eth_do_config);
 	if (status < 0)
 		goto fail;
 
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c
index e91d1b16d9be..b37960f9e753 100644
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -349,7 +349,6 @@ static int __init loopback_bind_config(struct usb_configuration *c)
 static struct usb_configuration loopback_driver = {
 	.label		= "loopback",
 	.strings	= loopback_strings,
-	.bind		= loopback_bind_config,
 	.bConfigurationValue = 2,
 	.bmAttributes	= USB_CONFIG_ATT_SELFPOWER,
 	/* .iConfiguration = DYNAMIC */
@@ -382,5 +381,5 @@ int __init loopback_add(struct usb_composite_dev *cdev, bool autoresume)
 		loopback_driver.bmAttributes |= USB_CONFIG_ATT_WAKEUP;
 	}
 
-	return usb_add_config(cdev, &loopback_driver);
+	return usb_add_config(cdev, &loopback_driver, loopback_bind_config);
 }
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c
index 6d3cc443d914..e403a534dd55 100644
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -498,7 +498,6 @@ unknown:
 static struct usb_configuration sourcesink_driver = {
 	.label		= "source/sink",
 	.strings	= sourcesink_strings,
-	.bind		= sourcesink_bind_config,
 	.setup		= sourcesink_setup,
 	.bConfigurationValue = 3,
 	.bmAttributes	= USB_CONFIG_ATT_SELFPOWER,
@@ -532,5 +531,5 @@ int __init sourcesink_add(struct usb_composite_dev *cdev, bool autoresume)
 		sourcesink_driver.bmAttributes |= USB_CONFIG_ATT_WAKEUP;
 	}
 
-	return usb_add_config(cdev, &sourcesink_driver);
+	return usb_add_config(cdev, &sourcesink_driver, sourcesink_bind_config);
 }
diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c
index 9fcb15879506..af75e3620849 100644
--- a/drivers/usb/gadget/g_ffs.c
+++ b/drivers/usb/gadget/g_ffs.c
@@ -234,11 +234,10 @@ static int gfs_bind(struct usb_composite_dev *cdev)
 
 		c->c.label			= gfs_strings[i].s;
 		c->c.iConfiguration		= gfs_strings[i].id;
-		c->c.bind			= gfs_do_config;
 		c->c.bConfigurationValue	= 1 + i;
 		c->c.bmAttributes		= USB_CONFIG_ATT_SELFPOWER;
 
-		ret = usb_add_config(cdev, &c->c);
+		ret = usb_add_config(cdev, &c->c, gfs_do_config);
 		if (unlikely(ret < 0))
 			goto error_unbind;
 	}
diff --git a/drivers/usb/gadget/hid.c b/drivers/usb/gadget/hid.c
index 77f495212fb9..2523e54097bd 100644
--- a/drivers/usb/gadget/hid.c
+++ b/drivers/usb/gadget/hid.c
@@ -148,7 +148,6 @@ static int __init do_config(struct usb_configuration *c)
 
 static struct usb_configuration config_driver = {
 	.label			= "HID Gadget",
-	.bind			= do_config,
 	.bConfigurationValue	= 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -201,7 +200,7 @@ static int __init hid_bind(struct usb_composite_dev *cdev)
 	device_desc.iProduct = status;
 
 	/* register our configuration */
-	status = usb_add_config(cdev, &config_driver);
+	status = usb_add_config(cdev, &config_driver, do_config);
 	if (status < 0)
 		return status;
 
diff --git a/drivers/usb/gadget/mass_storage.c b/drivers/usb/gadget/mass_storage.c
index a5e4a777d922..0769179dbdb0 100644
--- a/drivers/usb/gadget/mass_storage.c
+++ b/drivers/usb/gadget/mass_storage.c
@@ -141,7 +141,6 @@ static int __init msg_do_config(struct usb_configuration *c)
 
 static struct usb_configuration msg_config_driver = {
 	.label			= "Linux File-Backed Storage",
-	.bind			= msg_do_config,
 	.bConfigurationValue	= 1,
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
 };
@@ -153,7 +152,7 @@ static int __init msg_bind(struct usb_composite_dev *cdev)
 {
 	int status;
 
-	status = usb_add_config(cdev, &msg_config_driver);
+	status = usb_add_config(cdev, &msg_config_driver, msg_do_config);
 	if (status < 0)
 		return status;
 
diff --git a/drivers/usb/gadget/multi.c b/drivers/usb/gadget/multi.c
index 91170a02a9a3..d9feced348e3 100644
--- a/drivers/usb/gadget/multi.c
+++ b/drivers/usb/gadget/multi.c
@@ -164,7 +164,7 @@ static u8 hostaddr[ETH_ALEN];
 
 #ifdef USB_ETH_RNDIS
 
-static __ref int rndis_do_config(struct usb_configuration *c)
+static __init int rndis_do_config(struct usb_configuration *c)
 {
 	int ret;
 
@@ -191,7 +191,6 @@ static __ref int rndis_do_config(struct usb_configuration *c)
 static int rndis_config_register(struct usb_composite_dev *cdev)
 {
 	static struct usb_configuration config = {
-		.bind			= rndis_do_config,
 		.bConfigurationValue	= MULTI_RNDIS_CONFIG_NUM,
 		.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
 	};
@@ -199,7 +198,7 @@ static int rndis_config_register(struct usb_composite_dev *cdev)
 	config.label          = strings_dev[MULTI_STRING_RNDIS_CONFIG_IDX].s;
 	config.iConfiguration = strings_dev[MULTI_STRING_RNDIS_CONFIG_IDX].id;
 
-	return usb_add_config(cdev, &config);
+	return usb_add_config(cdev, &config, rndis_do_config);
 }
 
 #else
@@ -216,7 +215,7 @@ static int rndis_config_register(struct usb_composite_dev *cdev)
 
 #ifdef CONFIG_USB_G_MULTI_CDC
 
-static __ref int cdc_do_config(struct usb_configuration *c)
+static __init int cdc_do_config(struct usb_configuration *c)
 {
 	int ret;
 
@@ -243,7 +242,6 @@ static __ref int cdc_do_config(struct usb_configuration *c)
 static int cdc_config_register(struct usb_composite_dev *cdev)
 {
 	static struct usb_configuration config = {
-		.bind			= cdc_do_config,
 		.bConfigurationValue	= MULTI_CDC_CONFIG_NUM,
 		.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
 	};
@@ -251,7 +249,7 @@ static int cdc_config_register(struct usb_composite_dev *cdev)
 	config.label          = strings_dev[MULTI_STRING_CDC_CONFIG_IDX].s;
 	config.iConfiguration = strings_dev[MULTI_STRING_CDC_CONFIG_IDX].id;
 
-	return usb_add_config(cdev, &config);
+	return usb_add_config(cdev, &config, cdc_do_config);
 }
 
 #else
diff --git a/drivers/usb/gadget/nokia.c b/drivers/usb/gadget/nokia.c
index 8aec728882a0..b5364f9d7cd2 100644
--- a/drivers/usb/gadget/nokia.c
+++ b/drivers/usb/gadget/nokia.c
@@ -135,7 +135,6 @@ static int __init nokia_bind_config(struct usb_configuration *c)
 
 static struct usb_configuration nokia_config_500ma_driver = {
 	.label		= "Bus Powered",
-	.bind		= nokia_bind_config,
 	.bConfigurationValue = 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_ONE,
@@ -144,7 +143,6 @@ static struct usb_configuration nokia_config_500ma_driver = {
 
 static struct usb_configuration nokia_config_100ma_driver = {
 	.label		= "Self Powered",
-	.bind		= nokia_bind_config,
 	.bConfigurationValue = 2,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER,
@@ -206,11 +204,13 @@ static int __init nokia_bind(struct usb_composite_dev *cdev)
 	}
 
 	/* finaly register the configuration */
-	status = usb_add_config(cdev, &nokia_config_500ma_driver);
+	status = usb_add_config(cdev, &nokia_config_500ma_driver,
+			nokia_bind_config);
 	if (status < 0)
 		goto err_usb;
 
-	status = usb_add_config(cdev, &nokia_config_100ma_driver);
+	status = usb_add_config(cdev, &nokia_config_100ma_driver,
+			nokia_bind_config);
 	if (status < 0)
 		goto err_usb;
 
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index 0b81d7b741d1..1ac57a973aa9 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -155,7 +155,6 @@ static int __init serial_bind_config(struct usb_configuration *c)
 
 static struct usb_configuration serial_config_driver = {
 	/* .label = f(use_acm) */
-	.bind		= serial_bind_config,
 	/* .bConfigurationValue = f(use_acm) */
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_SELFPOWER,
@@ -225,7 +224,8 @@ static int __init gs_bind(struct usb_composite_dev *cdev)
 	}
 
 	/* register our configuration */
-	status = usb_add_config(cdev, &serial_config_driver);
+	status = usb_add_config(cdev, &serial_config_driver,
+			serial_bind_config);
 	if (status < 0)
 		goto fail;
 
diff --git a/drivers/usb/gadget/webcam.c b/drivers/usb/gadget/webcam.c
index de65b8078e05..a5a0fdb808c7 100644
--- a/drivers/usb/gadget/webcam.c
+++ b/drivers/usb/gadget/webcam.c
@@ -317,7 +317,6 @@ webcam_config_bind(struct usb_configuration *c)
 
 static struct usb_configuration webcam_config_driver = {
 	.label			= webcam_config_label,
-	.bind			= webcam_config_bind,
 	.bConfigurationValue	= 1,
 	.iConfiguration		= 0, /* dynamic */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
@@ -354,7 +353,8 @@ webcam_bind(struct usb_composite_dev *cdev)
 	webcam_config_driver.iConfiguration = ret;
 
 	/* Register our configuration. */
-	if ((ret = usb_add_config(cdev, &webcam_config_driver)) < 0)
+	if ((ret = usb_add_config(cdev, &webcam_config_driver,
+					webcam_config_bind)) < 0)
 		goto error;
 
 	INFO(cdev, "Webcam Video Gadget\n");
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index e28b6626802c..3d29a7dcac2d 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -161,8 +161,6 @@ ep_choose(struct usb_gadget *g, struct usb_endpoint_descriptor *hs,
  *	and by language IDs provided in control requests.
  * @descriptors: Table of descriptors preceding all function descriptors.
  *	Examples include OTG and vendor-specific descriptors.
- * @bind: Called from @usb_add_config() to allocate resources unique to this
- *	configuration and to call @usb_add_function() for each function used.
  * @unbind: Reverses @bind; called as a side effect of unregistering the
  *	driver which added this configuration.
  * @setup: Used to delegate control requests that aren't handled by standard
@@ -207,8 +205,7 @@ struct usb_configuration {
 	 * we can't restructure things to avoid mismatching...
 	 */
 
-	/* configuration management:  bind/unbind */
-	int			(*bind)(struct usb_configuration *);
+	/* configuration management: unbind/setup */
 	void			(*unbind)(struct usb_configuration *);
 	int			(*setup)(struct usb_configuration *,
 					const struct usb_ctrlrequest *);
@@ -232,7 +229,8 @@ struct usb_configuration {
 };
 
 int usb_add_config(struct usb_composite_dev *,
-		struct usb_configuration *);
+		struct usb_configuration *,
+		int (*)(struct usb_configuration *));
 
 /**
  * struct usb_composite_driver - groups configurations into a gadget
-- 
cgit v1.2.3


From 5ea081785dde6041eb2f4acc2369abbb9099a981 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Thu, 12 Aug 2010 17:43:56 +0200
Subject: init.h: add some more documentation to __ref* tags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The __ref* tags may have been confusing for new kernel
developers (I was confused by them for sure) so adding a few
more sentences to comment to clear things up for people who
see those for the first time.

Signed-off-by: Michal Nazarewicz <m.nazarewicz@samsung.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/init.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index de994304e0bb..577671c55153 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -46,16 +46,23 @@
 #define __exitdata	__section(.exit.data)
 #define __exit_call	__used __section(.exitcall.exit)
 
-/* modpost check for section mismatches during the kernel build.
+/*
+ * modpost check for section mismatches during the kernel build.
  * A section mismatch happens when there are references from a
  * code or data section to an init section (both code or data).
  * The init sections are (for most archs) discarded by the kernel
  * when early init has completed so all such references are potential bugs.
  * For exit sections the same issue exists.
+ *
  * The following markers are used for the cases where the reference to
  * the *init / *exit section (code or data) is valid and will teach
- * modpost not to issue a warning.
- * The markers follow same syntax rules as __init / __initdata. */
+ * modpost not to issue a warning.  Intended semantics is that a code or
+ * data tagged __ref* can reference code or data from init section without
+ * producing a warning (of course, no warning does not mean code is
+ * correct, so optimally document why the __ref is needed and why it's OK).
+ *
+ * The markers follow same syntax rules as __init / __initdata.
+ */
 #define __ref            __section(.ref.text) noinline
 #define __refdata        __section(.ref.data)
 #define __refconst       __section(.ref.rodata)
-- 
cgit v1.2.3


From d39a0edad60dc65cf4774ee732aa7a84cf35c27a Mon Sep 17 00:00:00 2001
From: Hao Wu <hao.wu@intel.com>
Date: Thu, 9 Sep 2010 22:35:39 +0100
Subject: USB OTG: Add common data structure for Intel MID Platform
 (Langwell/Penwell)

This patch adds one new header file for the common data structure used in
Intel Penwell/Langwell MID Platform OTG Transceiver drivers. After switched
to the common data structure, Langwell/Penwell OTG Transceiver driver will
provide an unified interface to host/client driver.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Hao Wu <hao.wu@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/intel_mid_otg.h | 180 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 180 insertions(+)
 create mode 100644 include/linux/usb/intel_mid_otg.h

(limited to 'include')

diff --git a/include/linux/usb/intel_mid_otg.h b/include/linux/usb/intel_mid_otg.h
new file mode 100644
index 000000000000..a0ccf795f362
--- /dev/null
+++ b/include/linux/usb/intel_mid_otg.h
@@ -0,0 +1,180 @@
+/*
+ * Intel MID (Langwell/Penwell) USB OTG Transceiver driver
+ * Copyright (C) 2008 - 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef __INTEL_MID_OTG_H
+#define __INTEL_MID_OTG_H
+
+#include <linux/pm.h>
+#include <linux/usb/otg.h>
+#include <linux/notifier.h>
+
+struct intel_mid_otg_xceiv;
+
+/* This is a common data structure for Intel MID platform to
+ * save values of the OTG state machine */
+struct otg_hsm {
+	/* Input */
+	int a_bus_resume;
+	int a_bus_suspend;
+	int a_conn;
+	int a_sess_vld;
+	int a_srp_det;
+	int a_vbus_vld;
+	int b_bus_resume;
+	int b_bus_suspend;
+	int b_conn;
+	int b_se0_srp;
+	int b_ssend_srp;
+	int b_sess_end;
+	int b_sess_vld;
+	int id;
+/* id values */
+#define ID_B		0x05
+#define ID_A		0x04
+#define ID_ACA_C	0x03
+#define ID_ACA_B	0x02
+#define ID_ACA_A	0x01
+	int power_up;
+	int adp_change;
+	int test_device;
+
+	/* Internal variables */
+	int a_set_b_hnp_en;
+	int b_srp_done;
+	int b_hnp_enable;
+	int hnp_poll_enable;
+
+	/* Timeout indicator for timers */
+	int a_wait_vrise_tmout;
+	int a_wait_bcon_tmout;
+	int a_aidl_bdis_tmout;
+	int a_bidl_adis_tmout;
+	int a_bidl_adis_tmr;
+	int a_wait_vfall_tmout;
+	int b_ase0_brst_tmout;
+	int b_bus_suspend_tmout;
+	int b_srp_init_tmout;
+	int b_srp_fail_tmout;
+	int b_srp_fail_tmr;
+	int b_adp_sense_tmout;
+
+	/* Informative variables */
+	int a_bus_drop;
+	int a_bus_req;
+	int a_clr_err;
+	int b_bus_req;
+	int a_suspend_req;
+	int b_bus_suspend_vld;
+
+	/* Output */
+	int drv_vbus;
+	int loc_conn;
+	int loc_sof;
+
+	/* Others */
+	int vbus_srp_up;
+};
+
+/* must provide ULPI access function to read/write registers implemented in
+ * ULPI address space */
+struct iotg_ulpi_access_ops {
+	int	(*read)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 *val);
+	int	(*write)(struct intel_mid_otg_xceiv *iotg, u8 reg, u8 val);
+};
+
+#define OTG_A_DEVICE	0x0
+#define OTG_B_DEVICE	0x1
+
+/*
+ * the Intel MID (Langwell/Penwell) otg transceiver driver needs to interact
+ * with device and host drivers to implement the USB OTG related feature. More
+ * function members are added based on otg_transceiver data structure for this
+ * purpose.
+ */
+struct intel_mid_otg_xceiv {
+	struct otg_transceiver	otg;
+	struct otg_hsm		hsm;
+
+	/* base address */
+	void __iomem		*base;
+
+	/* ops to access ulpi */
+	struct iotg_ulpi_access_ops	ulpi_ops;
+
+	/* atomic notifier for interrupt context */
+	struct atomic_notifier_head	iotg_notifier;
+
+	/* start/stop USB Host function */
+	int	(*start_host)(struct intel_mid_otg_xceiv *iotg);
+	int	(*stop_host)(struct intel_mid_otg_xceiv *iotg);
+
+	/* start/stop USB Peripheral function */
+	int	(*start_peripheral)(struct intel_mid_otg_xceiv *iotg);
+	int	(*stop_peripheral)(struct intel_mid_otg_xceiv *iotg);
+
+	/* start/stop ADP sense/probe function */
+	int	(*set_adp_probe)(struct intel_mid_otg_xceiv *iotg,
+					bool enabled, int dev);
+	int	(*set_adp_sense)(struct intel_mid_otg_xceiv *iotg,
+					bool enabled);
+
+#ifdef CONFIG_PM
+	/* suspend/resume USB host function */
+	int	(*suspend_host)(struct intel_mid_otg_xceiv *iotg,
+					pm_message_t message);
+	int	(*resume_host)(struct intel_mid_otg_xceiv *iotg);
+
+	int	(*suspend_peripheral)(struct intel_mid_otg_xceiv *iotg,
+					pm_message_t message);
+	int	(*resume_peripheral)(struct intel_mid_otg_xceiv *iotg);
+#endif
+
+};
+static inline
+struct intel_mid_otg_xceiv *otg_to_mid_xceiv(struct otg_transceiver *otg)
+{
+	return container_of(otg, struct intel_mid_otg_xceiv, otg);
+}
+
+#define MID_OTG_NOTIFY_CONNECT		0x0001
+#define MID_OTG_NOTIFY_DISCONN		0x0002
+#define MID_OTG_NOTIFY_HSUSPEND		0x0003
+#define MID_OTG_NOTIFY_HRESUME		0x0004
+#define MID_OTG_NOTIFY_CSUSPEND		0x0005
+#define MID_OTG_NOTIFY_CRESUME		0x0006
+#define MID_OTG_NOTIFY_HOSTADD		0x0007
+#define MID_OTG_NOTIFY_HOSTREMOVE	0x0008
+#define MID_OTG_NOTIFY_CLIENTADD	0x0009
+#define MID_OTG_NOTIFY_CLIENTREMOVE	0x000a
+
+static inline int
+intel_mid_otg_register_notifier(struct intel_mid_otg_xceiv *iotg,
+				struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&iotg->iotg_notifier, nb);
+}
+
+static inline void
+intel_mid_otg_unregister_notifier(struct intel_mid_otg_xceiv *iotg,
+				struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&iotg->iotg_notifier, nb);
+}
+
+#endif /* __INTEL_MID_OTG_H */
-- 
cgit v1.2.3


From 56e9406ca22968e3c9dc27d6dc0f1825e13bfff9 Mon Sep 17 00:00:00 2001
From: Hao Wu <hao.wu@intel.com>
Date: Thu, 9 Sep 2010 22:35:54 +0100
Subject: USB OTG Langwell: Update OTG Kconfig and driver version.

This patch updated Kconfig for langwell otg transceiver driver.
Add ipc driver(INTEL_SCU_IPC) as a dependency. Driver version is
updated too.

Signed-off-by: Hao Wu <hao.wu@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          | 2 +-
 drivers/usb/otg/langwell_otg.c   | 4 ++--
 include/linux/usb/langwell_otg.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 299dfd2510cb..5ce07528cd0c 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -69,7 +69,7 @@ config NOP_USB_XCEIV
 
 config USB_LANGWELL_OTG
 	tristate "Intel Langwell USB OTG dual-role support"
-	depends on USB && X86_MRST
+	depends on USB && PCI && INTEL_SCU_IPC
 	select USB_OTG
 	select USB_OTG_UTILS
 	help
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
index 879188086daf..bdc3ea66be69 100644
--- a/drivers/usb/otg/langwell_otg.c
+++ b/drivers/usb/otg/langwell_otg.c
@@ -1,6 +1,6 @@
 /*
  * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2009, Intel Corporation.
+ * Copyright (C) 2008 - 2010, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -40,7 +40,7 @@
 #include <linux/usb/langwell_otg.h>
 
 #define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
-#define	DRIVER_VERSION		"3.0.0.32L.0003"
+#define	DRIVER_VERSION		"July 10, 2010"
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
index a6562f1d4e2b..51f17b16d312 100644
--- a/include/linux/usb/langwell_otg.h
+++ b/include/linux/usb/langwell_otg.h
@@ -1,6 +1,6 @@
 /*
  * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008, Intel Corporation.
+ * Copyright (C) 2008 - 2010, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
-- 
cgit v1.2.3


From 1f53c0e9bbf654ed93f63deee2bf5c9a1816966e Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Mon, 20 Sep 2010 15:40:26 +0300
Subject: USB: cdc.h: ncm: typo and style fixes

Some typos were in the initial commit, make the spelling
according to the spec.

Add some more comments.

Also change constant names according to the style of the rest
of the file

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/cdc.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index c117a68d04a7..583264abca0c 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -274,13 +274,13 @@ struct usb_cdc_notification {
 /*
  * Class Specific structures and constants
  *
- * CDC NCM parameter structure, CDC NCM subclass 6.2.1
+ * CDC NCM NTB parameters structure, CDC NCM subclass 6.2.1
  *
  */
 
-struct usb_cdc_ncm_ntb_parameter {
+struct usb_cdc_ncm_ntb_parameters {
 	__le16	wLength;
-	__le16	bmNtbFormatSupported;
+	__le16	bmNtbFormatsSupported;
 	__le32	dwNtbInMaxSize;
 	__le16	wNdpInDivisor;
 	__le16	wNdpInPayloadRemainder;
@@ -297,8 +297,8 @@ struct usb_cdc_ncm_ntb_parameter {
  * CDC NCM transfer headers, CDC NCM subclass 3.2
  */
 
-#define NCM_NTH16_SIGN		0x484D434E /* NCMH */
-#define NCM_NTH32_SIGN		0x686D636E /* ncmh */
+#define USB_CDC_NCM_NTH16_SIGN		0x484D434E /* NCMH */
+#define USB_CDC_NCM_NTH32_SIGN		0x686D636E /* ncmh */
 
 struct usb_cdc_ncm_nth16 {
 	__le32	dwSignature;
@@ -320,11 +320,12 @@ struct usb_cdc_ncm_nth32 {
  * CDC NCM datagram pointers, CDC NCM subclass 3.3
  */
 
-#define NCM_NDP16_CRC_SIGN	0x314D434E /* NCM1 */
-#define NCM_NDP16_NOCRC_SIGN	0x304D434E /* NCM0 */
-#define NCM_NDP32_CRC_SIGN	0x316D636E /* ncm1 */
-#define NCM_NDP32_NOCRC_SIGN	0x306D636E /* ncm0 */
+#define USB_CDC_NCM_NDP16_CRC_SIGN	0x314D434E /* NCM1 */
+#define USB_CDC_NCM_NDP16_NOCRC_SIGN	0x304D434E /* NCM0 */
+#define USB_CDC_NCM_NDP32_CRC_SIGN	0x316D636E /* ncm1 */
+#define USB_CDC_NCM_NDP32_NOCRC_SIGN	0x306D636E /* ncm0 */
 
+/* 16-bit NCM Datagram Pointer Table */
 struct usb_cdc_ncm_ndp16 {
 	__le32	dwSignature;
 	__le16	wLength;
@@ -332,11 +333,12 @@ struct usb_cdc_ncm_ndp16 {
 	__u8	data[0];
 } __attribute__ ((packed));
 
+/* 32-bit NCM Datagram Pointer Table */
 struct usb_cdc_ncm_ndp32 {
 	__le32	dwSignature;
 	__le16	wLength;
 	__le16	wReserved6;
-	__le32	dwNextFpIndex;
+	__le32	dwNextNdpIndex;
 	__le32	dwReserved12;
 	__u8	data[0];
 } __attribute__ ((packed));
-- 
cgit v1.2.3


From 7fc09170cedc329ad274433b4f1a653e603600b5 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Mon, 20 Sep 2010 15:40:27 +0300
Subject: Revert "USB: ncm: added ncm.h with auxiliary definitions"

This reverts commit 65e0b499105ec8ff3bc4ab7680873dec20127f9d.

Since the host and gadget implementations are different, there is
no common code for the file, remove for now.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ncm.h | 114 ------------------------------------------------
 1 file changed, 114 deletions(-)
 delete mode 100644 include/linux/usb/ncm.h

(limited to 'include')

diff --git a/include/linux/usb/ncm.h b/include/linux/usb/ncm.h
deleted file mode 100644
index 006d1064c8b2..000000000000
--- a/include/linux/usb/ncm.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * USB CDC NCM auxiliary definitions
- */
-
-#ifndef __LINUX_USB_NCM_H
-#define __LINUX_USB_NCM_H
-
-#include <linux/types.h>
-#include <linux/usb/cdc.h>
-#include <asm/unaligned.h>
-
-#define NCM_NTB_MIN_IN_SIZE		2048
-#define NCM_NTB_MIN_OUT_SIZE		2048
-
-#define NCM_CONTROL_TIMEOUT		(5 * 1000)
-
-/* bmNetworkCapabilities */
-
-#define NCM_NCAP_ETH_FILTER	(1 << 0)
-#define NCM_NCAP_NET_ADDRESS	(1 << 1)
-#define NCM_NCAP_ENCAP_COMM	(1 << 2)
-#define NCM_NCAP_MAX_DGRAM	(1 << 3)
-#define NCM_NCAP_CRC_MODE	(1 << 4)
-
-/*
- * Here are options for NCM Datagram Pointer table (NDP) parser.
- * There are 2 different formats: NDP16 and NDP32 in the spec (ch. 3),
- * in NDP16 offsets and sizes fields are 1 16bit word wide,
- * in NDP32 -- 2 16bit words wide. Also signatures are different.
- * To make the parser code the same, put the differences in the structure,
- * and switch pointers to the structures when the format is changed.
- */
-
-struct ndp_parser_opts {
-	u32		nth_sign;
-	u32		ndp_sign;
-	unsigned	nth_size;
-	unsigned	ndp_size;
-	unsigned	ndplen_align;
-	/* sizes in u16 units */
-	unsigned	dgram_item_len; /* index or length */
-	unsigned	block_length;
-	unsigned	fp_index;
-	unsigned	reserved1;
-	unsigned	reserved2;
-	unsigned	next_fp_index;
-};
-
-#define INIT_NDP16_OPTS {					\
-		.nth_sign = NCM_NTH16_SIGN,			\
-		.ndp_sign = NCM_NDP16_NOCRC_SIGN,		\
-		.nth_size = sizeof(struct usb_cdc_ncm_nth16),	\
-		.ndp_size = sizeof(struct usb_cdc_ncm_ndp16),	\
-		.ndplen_align = 4,				\
-		.dgram_item_len = 1,				\
-		.block_length = 1,				\
-		.fp_index = 1,					\
-		.reserved1 = 0,					\
-		.reserved2 = 0,					\
-		.next_fp_index = 1,				\
-	}
-
-
-#define INIT_NDP32_OPTS {					\
-		.nth_sign = NCM_NTH32_SIGN,			\
-		.ndp_sign = NCM_NDP32_NOCRC_SIGN,		\
-		.nth_size = sizeof(struct usb_cdc_ncm_nth32),	\
-		.ndp_size = sizeof(struct usb_cdc_ncm_ndp32),	\
-		.ndplen_align = 8,				\
-		.dgram_item_len = 2,				\
-		.block_length = 2,				\
-		.fp_index = 2,					\
-		.reserved1 = 1,					\
-		.reserved2 = 2,					\
-		.next_fp_index = 2,				\
-	}
-
-static inline void put_ncm(__le16 **p, unsigned size, unsigned val)
-{
-	switch (size) {
-	case 1:
-		put_unaligned_le16((u16)val, *p);
-		break;
-	case 2:
-		put_unaligned_le32((u32)val, *p);
-
-		break;
-	default:
-		BUG();
-	}
-
-	*p += size;
-}
-
-static inline unsigned get_ncm(__le16 **p, unsigned size)
-{
-	unsigned tmp;
-
-	switch (size) {
-	case 1:
-		tmp = get_unaligned_le16(*p);
-		break;
-	case 2:
-		tmp = get_unaligned_le32(*p);
-		break;
-	default:
-		BUG();
-	}
-
-	*p += size;
-	return tmp;
-}
-
-#endif /* __LINUX_USB_NCM_H */
-- 
cgit v1.2.3


From e5dcd531ac7a040f1b4d35f58914a36ad6174e84 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Mon, 20 Sep 2010 15:40:28 +0300
Subject: USB: cdc.h: ncm: add missed constants and structures

Make a dedicated structure for datagram pointer entry. There is no
explicit declaration in the spec, but it's used by the host
implementation and makes the structure more clear.

Add some missed constants from the spec

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/cdc.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 583264abca0c..2d5b6f296aa3 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -32,6 +32,8 @@
 
 #define USB_CDC_PROTO_EEM			7
 
+#define USB_CDC_NCM_PROTO_NTB			1
+
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -325,14 +327,26 @@ struct usb_cdc_ncm_nth32 {
 #define USB_CDC_NCM_NDP32_CRC_SIGN	0x316D636E /* ncm1 */
 #define USB_CDC_NCM_NDP32_NOCRC_SIGN	0x306D636E /* ncm0 */
 
+/* 16-bit NCM Datagram Pointer Entry */
+struct usb_cdc_ncm_dpe16 {
+	__le16	wDatagramIndex;
+	__le16	wDatagramLength;
+} __attribute__((__packed__));
+
 /* 16-bit NCM Datagram Pointer Table */
 struct usb_cdc_ncm_ndp16 {
 	__le32	dwSignature;
 	__le16	wLength;
 	__le16	wNextFpIndex;
-	__u8	data[0];
+	struct	usb_cdc_ncm_dpe16 dpe16[0];
 } __attribute__ ((packed));
 
+/* 32-bit NCM Datagram Pointer Entry */
+struct usb_cdc_ncm_dpe32 {
+	__le32	wDatagramIndex;
+	__le32	wDatagramLength;
+} __attribute__((__packed__));
+
 /* 32-bit NCM Datagram Pointer Table */
 struct usb_cdc_ncm_ndp32 {
 	__le32	dwSignature;
@@ -340,7 +354,46 @@ struct usb_cdc_ncm_ndp32 {
 	__le16	wReserved6;
 	__le32	dwNextNdpIndex;
 	__le32	dwReserved12;
-	__u8	data[0];
+	struct	usb_cdc_ncm_dpe32 dpe32[0];
 } __attribute__ ((packed));
 
+/* CDC NCM subclass 3.2.1 and 3.2.2 */
+#define USB_CDC_NCM_NDP16_INDEX_MIN			0x000C
+#define USB_CDC_NCM_NDP32_INDEX_MIN			0x0010
+
+/* CDC NCM subclass 3.3.3 Datagram Formatting */
+#define USB_CDC_NCM_DATAGRAM_FORMAT_CRC			0x30
+#define USB_CDC_NCM_DATAGRAM_FORMAT_NOCRC		0X31
+
+/* CDC NCM subclass 4.2 NCM Communications Interface Protocol Code */
+#define USB_CDC_NCM_PROTO_CODE_NO_ENCAP_COMMANDS	0x00
+#define USB_CDC_NCM_PROTO_CODE_EXTERN_PROTO		0xFE
+
+/* CDC NCM subclass 5.2.1 NCM Functional Descriptor, bmNetworkCapabilities */
+#define USB_CDC_NCM_NCAP_ETH_FILTER			(1 << 0)
+#define USB_CDC_NCM_NCAP_NET_ADDRESS			(1 << 1)
+#define USB_CDC_NCM_NCAP_ENCAP_COMMAND			(1 << 2)
+#define USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE		(1 << 3)
+#define USB_CDC_NCM_NCAP_CRC_MODE			(1 << 4)
+
+/* CDC NCM subclass Table 6-3: NTB Parameter Structure */
+#define USB_CDC_NCM_NTB16_SUPPORTED			(1 << 0)
+#define USB_CDC_NCM_NTB32_SUPPORTED			(1 << 1)
+
+/* CDC NCM subclass Table 6-3: NTB Parameter Structure */
+#define USB_CDC_NCM_NDP_ALIGN_MIN_SIZE			0x04
+#define USB_CDC_NCM_NTB_MAX_LENGTH			0x1C
+
+/* CDC NCM subclass 6.2.5 SetNtbFormat */
+#define USB_CDC_NCM_NTB16_FORMAT			0x00
+#define USB_CDC_NCM_NTB32_FORMAT			0x01
+
+/* CDC NCM subclass 6.2.7 SetNtbInputSize */
+#define USB_CDC_NCM_NTB_MIN_IN_SIZE			2048
+#define USB_CDC_NCM_NTB_MIN_OUT_SIZE			2048
+
+/* CDC NCM subclass 6.2.11 SetCrcMode */
+#define USB_CDC_NCM_CRC_NOT_APPENDED			0x00
+#define USB_CDC_NCM_CRC_APPENDED			0x01
+
 #endif /* __LINUX_USB_CDC_H */
-- 
cgit v1.2.3


From 6195e3c6aa84dbbf80a60731168118824bd58bba Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Fri, 24 Sep 2010 09:43:27 +0300
Subject: USB: cdc.h: ncm: fix one more typo

In usb_cdc_ncm_dpe32 the fields are 32 bit long and according
to usb style (hungarian notation) should be called dwDatagramIndex
and dwDatagramLength (see CDC NCM subclass spec, 3.3.2). Actually,
they were called wDatagramIndex, wDatagramLength.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/cdc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 2d5b6f296aa3..5e86dc771da4 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -343,8 +343,8 @@ struct usb_cdc_ncm_ndp16 {
 
 /* 32-bit NCM Datagram Pointer Entry */
 struct usb_cdc_ncm_dpe32 {
-	__le32	wDatagramIndex;
-	__le32	wDatagramLength;
+	__le32	dwDatagramIndex;
+	__le32	dwDatagramLength;
 } __attribute__((__packed__));
 
 /* 32-bit NCM Datagram Pointer Table */
-- 
cgit v1.2.3


From 8fa7fd74ef398370383df276ca41082ba35aafd8 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 7 Oct 2010 13:05:21 +0200
Subject: USB: storage: Use USB_ prefix instead of US_ prefix

This commit changes prefix for some of the USB mass storage
class related macros (ie. USB_SC_ for subclass and USB_PR_
for class).

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/block/ub.c                     |   2 +-
 drivers/usb/storage/scsiglue.c         |   8 +-
 drivers/usb/storage/sddr09.c           |   2 +-
 drivers/usb/storage/transport.c        |  10 +-
 drivers/usb/storage/unusual_alauda.h   |   4 +-
 drivers/usb/storage/unusual_cypress.h  |   4 +-
 drivers/usb/storage/unusual_datafab.h  |  20 +-
 drivers/usb/storage/unusual_devs.h     | 572 ++++++++++++++++-----------------
 drivers/usb/storage/unusual_freecom.h  |   2 +-
 drivers/usb/storage/unusual_isd200.h   |  12 +-
 drivers/usb/storage/unusual_jumpshot.h |   2 +-
 drivers/usb/storage/unusual_karma.h    |   2 +-
 drivers/usb/storage/unusual_onetouch.h |   4 +-
 drivers/usb/storage/unusual_sddr09.h   |  12 +-
 drivers/usb/storage/unusual_sddr55.h   |   8 +-
 drivers/usb/storage/unusual_usbat.h    |   8 +-
 drivers/usb/storage/usb.c              |  30 +-
 include/linux/usb_usual.h              |  59 ++--
 18 files changed, 381 insertions(+), 380 deletions(-)

(limited to 'include')

diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index c48e14878582..f3fd454e28f9 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -396,7 +396,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum);
 #else
 
 static const struct usb_device_id ub_usb_ids[] = {
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_BULK) },
+	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, USB_SC_SCSI, USB_PR_BULK) },
 	{ }
 };
 
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index d8d98cfecada..e80362d148f3 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -113,7 +113,7 @@ static int slave_alloc (struct scsi_device *sdev)
 	 * Let the scanning code know if this target merely sets
 	 * Peripheral Device Type to 0x1f to indicate no LUN.
 	 */
-	if (us->subclass == US_SC_UFI)
+	if (us->subclass == USB_SC_UFI)
 		sdev->sdev_target->pdt_1f_for_no_lun = 1;
 
 	return 0;
@@ -176,7 +176,7 @@ static int slave_configure(struct scsi_device *sdev)
 		/* Disk-type devices use MODE SENSE(6) if the protocol
 		 * (SubClass) is Transparent SCSI, otherwise they use
 		 * MODE SENSE(10). */
-		if (us->subclass != US_SC_SCSI && us->subclass != US_SC_CYP_ATACB)
+		if (us->subclass != USB_SC_SCSI && us->subclass != USB_SC_CYP_ATACB)
 			sdev->use_10_for_ms = 1;
 
 		/* Many disks only accept MODE SENSE transfer lengths of
@@ -245,7 +245,7 @@ static int slave_configure(struct scsi_device *sdev)
 		 * capacity will be decremented or is correct. */
 		if (!(us->fflags & (US_FL_FIX_CAPACITY | US_FL_CAPACITY_OK |
 					US_FL_SCM_MULT_TARG)) &&
-				us->protocol == US_PR_BULK)
+				us->protocol == USB_PR_BULK)
 			us->use_last_sector_hacks = 1;
 	} else {
 
@@ -261,7 +261,7 @@ static int slave_configure(struct scsi_device *sdev)
 	 * scsi_level == 0 (UNKNOWN).  Hence such devices must necessarily
 	 * be single-LUN.
 	 */
-	if ((us->protocol == US_PR_CB || us->protocol == US_PR_CBI) &&
+	if ((us->protocol == USB_PR_CB || us->protocol == USB_PR_CBI) &&
 			sdev->scsi_level == SCSI_UNKNOWN)
 		us->max_lun = 0;
 
diff --git a/drivers/usb/storage/sddr09.c b/drivers/usb/storage/sddr09.c
index ab5f9f37575a..bcb9a709d349 100644
--- a/drivers/usb/storage/sddr09.c
+++ b/drivers/usb/storage/sddr09.c
@@ -1760,7 +1760,7 @@ static int sddr09_probe(struct usb_interface *intf,
 	if (result)
 		return result;
 
-	if (us->protocol == US_PR_DPCM_USB) {
+	if (us->protocol == USB_PR_DPCM_USB) {
 		us->transport_name = "Control/Bulk-EUSB/SDDR09";
 		us->transport = dpcm_transport;
 		us->transport_reset = usb_stor_CB_reset;
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 64ec073e89de..00418995d8e9 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -642,7 +642,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 	 * unless the operation involved a data-in transfer.  Devices
 	 * can signal most data-in errors by stalling the bulk-in pipe.
 	 */
-	if ((us->protocol == US_PR_CB || us->protocol == US_PR_DPCM_USB) &&
+	if ((us->protocol == USB_PR_CB || us->protocol == USB_PR_DPCM_USB) &&
 			srb->sc_data_direction != DMA_FROM_DEVICE) {
 		US_DEBUGP("-- CB transport device requiring auto-sense\n");
 		need_auto_sense = 1;
@@ -701,8 +701,8 @@ Retry_Sense:
 		scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sense_size);
 
 		/* FIXME: we must do the protocol translation here */
-		if (us->subclass == US_SC_RBC || us->subclass == US_SC_SCSI ||
-				us->subclass == US_SC_CYP_ATACB)
+		if (us->subclass == USB_SC_RBC || us->subclass == USB_SC_SCSI ||
+				us->subclass == USB_SC_CYP_ATACB)
 			srb->cmd_len = 6;
 		else
 			srb->cmd_len = 12;
@@ -926,7 +926,7 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us)
 	/* NOTE: CB does not have a status stage.  Silly, I know.  So
 	 * we have to catch this at a higher level.
 	 */
-	if (us->protocol != US_PR_CBI)
+	if (us->protocol != USB_PR_CBI)
 		return USB_STOR_TRANSPORT_GOOD;
 
 	result = usb_stor_intr_transfer(us, us->iobuf, 2);
@@ -942,7 +942,7 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us)
 	 * that this means we could be ignoring a real error on these
 	 * commands, but that can't be helped.
 	 */
-	if (us->subclass == US_SC_UFI) {
+	if (us->subclass == USB_SC_UFI) {
 		if (srb->cmnd[0] == REQUEST_SENSE ||
 		    srb->cmnd[0] == INQUIRY)
 			return USB_STOR_TRANSPORT_GOOD;
diff --git a/drivers/usb/storage/unusual_alauda.h b/drivers/usb/storage/unusual_alauda.h
index 8c412f885dd2..fa3e9edaa2cf 100644
--- a/drivers/usb/storage/unusual_alauda.h
+++ b/drivers/usb/storage/unusual_alauda.h
@@ -21,11 +21,11 @@
 UNUSUAL_DEV(  0x0584, 0x0008, 0x0102, 0x0102,
 		"Fujifilm",
 		"DPC-R1 (Alauda)",
-		US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0),
+		USB_SC_SCSI, USB_PR_ALAUDA, init_alauda, 0),
 
 UNUSUAL_DEV(  0x07b4, 0x010a, 0x0102, 0x0102,
 		"Olympus",
 		"MAUSB-10 (Alauda)",
-		US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0),
+		USB_SC_SCSI, USB_PR_ALAUDA, init_alauda, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_ALAUDA) || ... */
diff --git a/drivers/usb/storage/unusual_cypress.h b/drivers/usb/storage/unusual_cypress.h
index 44be6d75dab6..c854fdebe0ae 100644
--- a/drivers/usb/storage/unusual_cypress.h
+++ b/drivers/usb/storage/unusual_cypress.h
@@ -23,12 +23,12 @@
 UNUSUAL_DEV(  0x04b4, 0x6830, 0x0000, 0x9999,
 		"Cypress",
 		"Cypress AT2LP",
-		US_SC_CYP_ATACB, US_PR_DEVICE, NULL, 0),
+		USB_SC_CYP_ATACB, USB_PR_DEVICE, NULL, 0),
 
 /* CY7C68310 : support atacb and atacb2 */
 UNUSUAL_DEV(  0x04b4, 0x6831, 0x0000, 0x9999,
 		"Cypress",
 		"Cypress ISD-300LP",
-		US_SC_CYP_ATACB, US_PR_DEVICE, NULL, 0),
+		USB_SC_CYP_ATACB, USB_PR_DEVICE, NULL, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_CYPRESS_ATACB) || ... */
diff --git a/drivers/usb/storage/unusual_datafab.h b/drivers/usb/storage/unusual_datafab.h
index c9298ce9f223..582a603c78be 100644
--- a/drivers/usb/storage/unusual_datafab.h
+++ b/drivers/usb/storage/unusual_datafab.h
@@ -21,7 +21,7 @@
 UNUSUAL_DEV(  0x07c4, 0xa000, 0x0000, 0x0015,
 		"Datafab",
 		"MDCFE-B USB CF Reader",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 /*
@@ -38,45 +38,45 @@ UNUSUAL_DEV(  0x07c4, 0xa000, 0x0000, 0x0015,
 UNUSUAL_DEV( 0x07c4, 0xa001, 0x0000, 0xffff,
 		"SIIG/Datafab",
 		"SIIG/Datafab Memory Stick+CF Reader/Writer",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 /* Reported by Josef Reisinger <josef.reisinger@netcologne.de> */
 UNUSUAL_DEV( 0x07c4, 0xa002, 0x0000, 0xffff,
 		"Datafab/Unknown",
 		"MD2/MD3 Disk enclosure",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		US_FL_SINGLE_LUN),
 
 UNUSUAL_DEV( 0x07c4, 0xa003, 0x0000, 0xffff,
 		"Datafab/Unknown",
 		"Datafab-based Reader",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 UNUSUAL_DEV( 0x07c4, 0xa004, 0x0000, 0xffff,
 		"Datafab/Unknown",
 		"Datafab-based Reader",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 UNUSUAL_DEV( 0x07c4, 0xa005, 0x0000, 0xffff,
 		"PNY/Datafab",
 		"PNY/Datafab CF+SM Reader",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 UNUSUAL_DEV( 0x07c4, 0xa006, 0x0000, 0xffff,
 		"Simple Tech/Datafab",
 		"Simple Tech/Datafab CF+SM Reader",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 /* Submitted by Olaf Hering <olh@suse.de> */
 UNUSUAL_DEV(  0x07c4, 0xa109, 0x0000, 0xffff,
 		"Datafab Systems, Inc.",
 		"USB to CF + SM Combo (LC1)",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 /* Reported by Felix Moeller <felix@derklecks.de>
@@ -86,13 +86,13 @@ UNUSUAL_DEV(  0x07c4, 0xa109, 0x0000, 0xffff,
 UNUSUAL_DEV(  0x07c4, 0xa10b, 0x0000, 0xffff,
 		"DataFab Systems Inc.",
 		"USB CF+MS",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		0),
 
 UNUSUAL_DEV( 0x0c0b, 0xa109, 0x0000, 0xffff,
 		"Acomdata",
 		"CF",
-		US_SC_SCSI, US_PR_DATAFAB, NULL,
+		USB_SC_SCSI, USB_PR_DATAFAB, NULL,
 		US_FL_SINGLE_LUN),
 
 #endif /* defined(CONFIG_USB_STORAGE_DATAFAB) || ... */
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 2c897eefadde..f43314a22ba4 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -63,26 +63,26 @@
 UNUSUAL_DEV(  0x03eb, 0x2002, 0x0100, 0x0100,
 		"ATMEL",
 		"SND1 Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE),
 
 /* Reported by Rodolfo Quesada <rquesada@roqz.net> */
 UNUSUAL_DEV(  0x03ee, 0x6906, 0x0003, 0x0003,
 		"VIA Technologies Inc.",
 		"Mitsumi multi cardreader",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 UNUSUAL_DEV(  0x03f0, 0x0107, 0x0200, 0x0200,
 		"HP",
 		"CD-Writer+",
-		US_SC_8070, US_PR_CB, NULL, 0),
+		USB_SC_8070, USB_PR_CB, NULL, 0),
 
 /* Reported by Ben Efros <ben@pc-doctor.com> */
 UNUSUAL_DEV(  0x03f0, 0x070c, 0x0000, 0x0000,
 		"HP",
 		"Personal Media Drive",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SANE_SENSE ),
 
 /* Reported by Grant Grundler <grundler@parisc-linux.org>
@@ -91,7 +91,7 @@ UNUSUAL_DEV(  0x03f0, 0x070c, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x03f0, 0x4002, 0x0001, 0x0001,
 		"HP",
 		"PhotoSmart R707",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_CAPACITY),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY),
 
 /* Reported by Sebastian Kapfer <sebastian_kapfer@gmx.net>
  * and Olaf Hering <olh@suse.de> (different bcd's, same vendor/product)
@@ -100,14 +100,14 @@ UNUSUAL_DEV(  0x03f0, 0x4002, 0x0001, 0x0001,
 UNUSUAL_DEV(  0x0409, 0x0040, 0x0000, 0x9999,
 		"NEC",
 		"NEC USB UF000x",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Patch submitted by Mihnea-Costin Grigore <mihnea@zulu.ro> */
 UNUSUAL_DEV(  0x040d, 0x6205, 0x0003, 0x0003,
 		"VIA Technologies Inc.",
 		"USB 2.0 Card Reader",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Deduced by Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
@@ -117,40 +117,40 @@ UNUSUAL_DEV(  0x040d, 0x6205, 0x0003, 0x0003,
 UNUSUAL_DEV(  0x0411, 0x001c, 0x0113, 0x0113,
 		"Buffalo",
 		"DUB-P40G HDD",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Submitted by Ernestas Vaiciukevicius <ernisv@gmail.com> */
 UNUSUAL_DEV(  0x0419, 0x0100, 0x0100, 0x0100,
 		"Samsung Info. Systems America, Inc.",
 		"MP3 Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Orgad Shaneh <orgads@gmail.com> */
 UNUSUAL_DEV(  0x0419, 0xaace, 0x0100, 0x0100,
 		"Samsung", "MP3 Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Christian Leber <christian@leber.de> */
 UNUSUAL_DEV(  0x0419, 0xaaf5, 0x0100, 0x0100,
 		"TrekStor",
 		"i.Beat 115 2.0",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_NOT_LOCKABLE ),
 
 /* Reported by Stefan Werner <dustbln@gmx.de> */
 UNUSUAL_DEV(  0x0419, 0xaaf6, 0x0100, 0x0100,
 		"TrekStor",
 		"i.Beat Joy 2.0",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Pete Zaitcev <zaitcev@redhat.com>, bz#176584 */
 UNUSUAL_DEV(  0x0420, 0x0001, 0x0100, 0x0100,
 		"GENERIC", "MP3 PLAYER", /* MyMusix PD-205 on the outside. */
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Andrew Nayenko <relan@bk.ru>
@@ -158,28 +158,28 @@ UNUSUAL_DEV(  0x0420, 0x0001, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0421, 0x0019, 0x0592, 0x0610,
 		"Nokia",
 		"Nokia 6288",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /* Reported by Mario Rettig <mariorettig@web.de> */
 UNUSUAL_DEV(  0x0421, 0x042e, 0x0100, 0x0100,
 		"Nokia",
 		"Nokia 3250",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by <honkkis@gmail.com> */
 UNUSUAL_DEV(  0x0421, 0x0433, 0x0100, 0x0100,
 		"Nokia",
 		"E70",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by Jon Hart <Jon.Hart@web.de> */
 UNUSUAL_DEV(  0x0421, 0x0434, 0x0100, 0x0100,
 		"Nokia",
 		"E60",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Sumedha Swamy <sumedhaswamy@gmail.com> and
@@ -187,7 +187,7 @@ UNUSUAL_DEV(  0x0421, 0x0434, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0421, 0x0444, 0x0100, 0x0100,
 		"Nokia",
 		"N91",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by Jiri Slaby <jirislaby@gmail.com> and
@@ -195,42 +195,42 @@ UNUSUAL_DEV(  0x0421, 0x0444, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0421, 0x0446, 0x0100, 0x0100,
 		"Nokia",
 		"N80",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by Matthew Bloch <matthew@bytemark.co.uk> */
 UNUSUAL_DEV(  0x0421, 0x044e, 0x0100, 0x0100,
 		"Nokia",
 		"E61",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by Bardur Arantsson <bardur@scientician.net> */
 UNUSUAL_DEV(  0x0421, 0x047c, 0x0370, 0x0610,
 		"Nokia",
 		"6131",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /* Reported by Manuel Osdoba <manuel.osdoba@tu-ilmenau.de> */
 UNUSUAL_DEV( 0x0421, 0x0492, 0x0452, 0x9999,
 		"Nokia",
 		"Nokia 6233",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /* Reported by Alex Corcoles <alex@corcoles.net> */
 UNUSUAL_DEV(  0x0421, 0x0495, 0x0370, 0x0370,
 		"Nokia",
 		"6234",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 #ifdef NO_SDDR09
 UNUSUAL_DEV(  0x0436, 0x0005, 0x0100, 0x0100,
 		"Microtech",
 		"CameraMate",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN ),
 #endif
 
@@ -239,7 +239,7 @@ UNUSUAL_DEV(  0x0436, 0x0005, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0451, 0x5416, 0x0100, 0x0100,
 		"Neuros Audio",
 		"USB 2.0 HD 2.5",
-		US_SC_DEVICE, US_PR_BULK, NULL,
+		USB_SC_DEVICE, USB_PR_BULK, NULL,
 		US_FL_NEED_OVERRIDE ),
 
 /*
@@ -250,7 +250,7 @@ UNUSUAL_DEV(  0x0451, 0x5416, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0457, 0x0150, 0x0100, 0x0100,
 		"USBest Technology",	/* sold by Transcend */
 		"USB Mass Storage Device",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
 
 /*
 * Bohdan Linda <bohdan.linda@gmail.com>
@@ -260,7 +260,7 @@ UNUSUAL_DEV(  0x0457, 0x0150, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0457, 0x0151, 0x0100, 0x0100,
 		"USB 2.0",
 		"Flash Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NOT_LOCKABLE ),
 
 /* Reported by Tamas Kerecsen <kerecsen@bigfoot.com>
@@ -272,7 +272,7 @@ UNUSUAL_DEV(  0x0457, 0x0151, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x045e, 0xffff, 0x0000, 0x0000,
 		"Mitac",
 		"GPS",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /*
@@ -284,32 +284,32 @@ UNUSUAL_DEV(  0x045e, 0xffff, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x046b, 0xff40, 0x0100, 0x0100,
 		"AMI",
 		"Virtual Floppy",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_WP_DETECT),
 
 /* Patch submitted by Philipp Friedrich <philipp@void.at> */
 UNUSUAL_DEV(  0x0482, 0x0100, 0x0100, 0x0100,
 		"Kyocera",
 		"Finecam S3x",
-		US_SC_8070, US_PR_CB, NULL, US_FL_FIX_INQUIRY),
+		USB_SC_8070, USB_PR_CB, NULL, US_FL_FIX_INQUIRY),
 
 /* Patch submitted by Philipp Friedrich <philipp@void.at> */
 UNUSUAL_DEV(  0x0482, 0x0101, 0x0100, 0x0100,
 		"Kyocera",
 		"Finecam S4",
-		US_SC_8070, US_PR_CB, NULL, US_FL_FIX_INQUIRY),
+		USB_SC_8070, USB_PR_CB, NULL, US_FL_FIX_INQUIRY),
 
 /* Patch submitted by Stephane Galles <stephane.galles@free.fr> */
 UNUSUAL_DEV(  0x0482, 0x0103, 0x0100, 0x0100,
 		"Kyocera",
 		"Finecam S5",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_INQUIRY),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY),
 
 /* Patch submitted by Jens Taprogge <jens.taprogge@taprogge.org> */
 UNUSUAL_DEV(  0x0482, 0x0107, 0x0100, 0x0100,
 		"Kyocera",
 		"CONTAX SL300R T*",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE),
 
 /* Reported by Paul Stewart <stewart@wetlogic.net>
@@ -317,7 +317,7 @@ UNUSUAL_DEV(  0x0482, 0x0107, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x04a4, 0x0004, 0x0001, 0x0001,
 		"Hitachi",
 		"DVD-CAM DZ-MV100A Camcorder",
-		US_SC_SCSI, US_PR_CB, NULL, US_FL_SINGLE_LUN),
+		USB_SC_SCSI, USB_PR_CB, NULL, US_FL_SINGLE_LUN),
 
 /* BENQ DC5330
  * Reported by Manuel Fombuena <mfombuena@ya.com> and
@@ -325,7 +325,7 @@ UNUSUAL_DEV(  0x04a4, 0x0004, 0x0001, 0x0001,
 UNUSUAL_DEV(  0x04a5, 0x3010, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"300_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Patch for Nikon coolpix 2000
@@ -333,14 +333,14 @@ UNUSUAL_DEV(  0x04a5, 0x3010, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x04b0, 0x0301, 0x0010, 0x0010,
 		"NIKON",
 		"NIKON DSC E2000",
-		US_SC_DEVICE, US_PR_DEVICE,NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE,NULL,
 		US_FL_NOT_LOCKABLE ),
 
 /* Reported by Doug Maxey (dwm@austin.ibm.com) */
 UNUSUAL_DEV(  0x04b3, 0x4001, 0x0110, 0x0110,
 		"IBM",
 		"IBM RSA2",
-		US_SC_DEVICE, US_PR_CB, NULL,
+		USB_SC_DEVICE, USB_PR_CB, NULL,
 		US_FL_MAX_SECTORS_MIN),
 
 /* Reported by Simon Levitt <simon@whattf.com>
@@ -348,14 +348,14 @@ UNUSUAL_DEV(  0x04b3, 0x4001, 0x0110, 0x0110,
 UNUSUAL_DEV(  0x04b8, 0x0601, 0x0100, 0x0100,
 		"Epson",
 		"875DC Storage",
-		US_SC_SCSI, US_PR_CB, NULL, US_FL_FIX_INQUIRY),
+		USB_SC_SCSI, USB_PR_CB, NULL, US_FL_FIX_INQUIRY),
 
 /* Reported by Khalid Aziz <khalid@gonehiking.org>
  * This entry is needed because the device reports Sub=ff */
 UNUSUAL_DEV(  0x04b8, 0x0602, 0x0110, 0x0110,
 		"Epson",
 		"785EPX Storage",
-		US_SC_SCSI, US_PR_BULK, NULL, US_FL_SINGLE_LUN),
+		USB_SC_SCSI, USB_PR_BULK, NULL, US_FL_SINGLE_LUN),
 
 /* Not sure who reported this originally but
  * Pavel Machek <pavel@ucw.cz> reported that the extra US_FL_SINGLE_LUN
@@ -363,7 +363,7 @@ UNUSUAL_DEV(  0x04b8, 0x0602, 0x0110, 0x0110,
 UNUSUAL_DEV(  0x04cb, 0x0100, 0x0000, 0x2210,
 		"Fujifilm",
 		"FinePix 1400Zoom",
-		US_SC_UFI, US_PR_DEVICE, NULL, US_FL_FIX_INQUIRY | US_FL_SINGLE_LUN),
+		USB_SC_UFI, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY | US_FL_SINGLE_LUN),
 
 /* Reported by Ondrej Zary <linux@rainbow-software.org>
  * The device reports one sector more and breaks when that sector is accessed
@@ -371,7 +371,7 @@ UNUSUAL_DEV(  0x04cb, 0x0100, 0x0000, 0x2210,
 UNUSUAL_DEV(  0x04ce, 0x0002, 0x026c, 0x026c,
 		"ScanLogic",
 		"SL11R-IDE",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* Reported by Kriston Fincher <kriston@airmail.net>
@@ -382,27 +382,27 @@ UNUSUAL_DEV(  0x04ce, 0x0002, 0x026c, 0x026c,
 UNUSUAL_DEV(  0x04da, 0x0901, 0x0100, 0x0200,
 		"Panasonic",
 		"LS-120 Camera",
-		US_SC_UFI, US_PR_DEVICE, NULL, 0),
+		USB_SC_UFI, USB_PR_DEVICE, NULL, 0),
 
 /* From Yukihiro Nakai, via zaitcev@yahoo.com.
  * This is needed for CB instead of CBI */
 UNUSUAL_DEV(  0x04da, 0x0d05, 0x0000, 0x0000,
 		"Sharp CE-CW05",
 		"CD-R/RW Drive",
-		US_SC_8070, US_PR_CB, NULL, 0),
+		USB_SC_8070, USB_PR_CB, NULL, 0),
 
 /* Reported by Adriaan Penning <a.penning@luon.net> */
 UNUSUAL_DEV(  0x04da, 0x2372, 0x0000, 0x9999,
 		"Panasonic",
 		"DMC-LCx Camera",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE ),
 
 /* Reported by Simeon Simeonov <simeonov_2000@yahoo.com> */
 UNUSUAL_DEV(  0x04da, 0x2373, 0x0000, 0x9999,
 		"LEICA",
 		"D-LUX Camera",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE ),
 
 /* Most of the following entries were developed with the help of
@@ -411,19 +411,19 @@ UNUSUAL_DEV(  0x04da, 0x2373, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x04e6, 0x0001, 0x0200, 0x0200,
 		"Matshita",
 		"LS-120",
-		US_SC_8020, US_PR_CB, NULL, 0),
+		USB_SC_8020, USB_PR_CB, NULL, 0),
 
 UNUSUAL_DEV(  0x04e6, 0x0002, 0x0100, 0x0100,
 		"Shuttle",
 		"eUSCSI Bridge",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_euscsi_init, 
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ),
 
 #ifdef NO_SDDR09
 UNUSUAL_DEV(  0x04e6, 0x0005, 0x0100, 0x0208,
 		"SCM Microsystems",
 		"eUSB CompactFlash Adapter",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN),
 #endif
 
@@ -431,54 +431,54 @@ UNUSUAL_DEV(  0x04e6, 0x0005, 0x0100, 0x0208,
 UNUSUAL_DEV(  0x04e6, 0x0006, 0x0100, 0x0100,
 		"SCM Microsystems Inc.",
 		"eUSB MMC Adapter",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN),
 
 /* Reported by Daniel Nouri <dpunktnpunkt@web.de> */
 UNUSUAL_DEV(  0x04e6, 0x0006, 0x0205, 0x0205,
 		"Shuttle",
 		"eUSB MMC Adapter",
-		US_SC_SCSI, US_PR_DEVICE, NULL,
+		USB_SC_SCSI, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN),
 
 UNUSUAL_DEV(  0x04e6, 0x0007, 0x0100, 0x0200,
 		"Sony",
 		"Hifd",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN),
 
 UNUSUAL_DEV(  0x04e6, 0x0009, 0x0200, 0x0200,
 		"Shuttle",
 		"eUSB ATA/ATAPI Adapter",
-		US_SC_8020, US_PR_CB, NULL, 0),
+		USB_SC_8020, USB_PR_CB, NULL, 0),
 
 UNUSUAL_DEV(  0x04e6, 0x000a, 0x0200, 0x0200,
 		"Shuttle",
 		"eUSB CompactFlash Adapter",
-		US_SC_8020, US_PR_CB, NULL, 0),
+		USB_SC_8020, USB_PR_CB, NULL, 0),
 
 UNUSUAL_DEV(  0x04e6, 0x000B, 0x0100, 0x0100,
 		"Shuttle",
 		"eUSCSI Bridge",
-		US_SC_SCSI, US_PR_BULK, usb_stor_euscsi_init,
+		USB_SC_SCSI, USB_PR_BULK, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ), 
 
 UNUSUAL_DEV(  0x04e6, 0x000C, 0x0100, 0x0100,
 		"Shuttle",
 		"eUSCSI Bridge",
-		US_SC_SCSI, US_PR_BULK, usb_stor_euscsi_init,
+		USB_SC_SCSI, USB_PR_BULK, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ),
 
 UNUSUAL_DEV(  0x04e6, 0x0101, 0x0200, 0x0200,
 		"Shuttle",
 		"CD-RW Device",
-		US_SC_8020, US_PR_CB, NULL, 0),
+		USB_SC_8020, USB_PR_CB, NULL, 0),
 
 /* Reported by Dmitry Khlystov <adminimus@gmail.com> */
 UNUSUAL_DEV(  0x04e8, 0x507c, 0x0220, 0x0220,
 		"Samsung",
 		"YP-U3",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64),
 
 /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>.
@@ -488,14 +488,14 @@ UNUSUAL_DEV(  0x04e8, 0x507c, 0x0220, 0x0220,
 UNUSUAL_DEV(  0x04fc, 0x80c2, 0x0100, 0x0100,
 		"Kobian Mercury",
 		"Binocam DCB-132",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BULK32),
 
 /* Reported by Bob Sass <rls@vectordb.com> -- only rev 1.33 tested */
 UNUSUAL_DEV(  0x050d, 0x0115, 0x0133, 0x0133,
 		"Belkin",
 		"USB SCSI Adaptor",
-		US_SC_SCSI, US_PR_BULK, usb_stor_euscsi_init,
+		USB_SC_SCSI, USB_PR_BULK, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ),
 
 /* Iomega Clik! Drive 
@@ -505,14 +505,14 @@ UNUSUAL_DEV(  0x050d, 0x0115, 0x0133, 0x0133,
 UNUSUAL_DEV(  0x0525, 0xa140, 0x0100, 0x0100,
 		"Iomega",
 		"USB Clik! 40",
-		US_SC_8070, US_PR_DEVICE, NULL,
+		USB_SC_8070, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Added by Alan Stern <stern@rowland.harvard.edu> */
 COMPLIANT_DEV(0x0525, 0xa4a5, 0x0000, 0x9999,
 		"Linux",
 		"File-backed Storage Gadget",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_CAPACITY_OK ),
 
 /* Yakumo Mega Image 37
@@ -520,7 +520,7 @@ COMPLIANT_DEV(0x0525, 0xa4a5, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x052b, 0x1801, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"300_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Another Yakumo camera.
@@ -528,14 +528,14 @@ UNUSUAL_DEV(  0x052b, 0x1801, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x052b, 0x1804, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"300_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Iacopo Spalletti <avvisi@spalletti.it> */
 UNUSUAL_DEV(  0x052b, 0x1807, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"300_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Yakumo Mega Image 47
@@ -543,7 +543,7 @@ UNUSUAL_DEV(  0x052b, 0x1807, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x052b, 0x1905, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"400_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Paul Ortyl <ortylp@3miasto.net>
@@ -551,13 +551,13 @@ UNUSUAL_DEV(  0x052b, 0x1905, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x052b, 0x1911, 0x0100, 0x0100,
 		"Tekom Technologies, Inc",
 		"400_CAMERA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 UNUSUAL_DEV(  0x054c, 0x0010, 0x0106, 0x0450,
 		"Sony",
 		"DSC-S30/S70/S75/505V/F505/F707/F717/P8",
-		US_SC_SCSI, US_PR_DEVICE, NULL,
+		USB_SC_SCSI, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN | US_FL_NOT_LOCKABLE | US_FL_NO_WP_DETECT ),
 
 /* Submitted by Lars Jacob <jacob.lars@googlemail.com>
@@ -565,7 +565,7 @@ UNUSUAL_DEV(  0x054c, 0x0010, 0x0106, 0x0450,
 UNUSUAL_DEV(  0x054c, 0x0010, 0x0500, 0x0610,
 		"Sony",
 		"DSC-T1/T5/H5",
-		US_SC_8070, US_PR_DEVICE, NULL,
+		USB_SC_8070, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 
@@ -573,88 +573,88 @@ UNUSUAL_DEV(  0x054c, 0x0010, 0x0500, 0x0610,
 UNUSUAL_DEV(  0x054c, 0x0025, 0x0100, 0x0100,
 		"Sony",
 		"Memorystick NW-MS7",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Olaf Hering, <olh@suse.de> SuSE Bugzilla #49049 */
 UNUSUAL_DEV(  0x054c, 0x002c, 0x0501, 0x2000,
 		"Sony",
 		"USB Floppy Drive",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 UNUSUAL_DEV(  0x054c, 0x002d, 0x0100, 0x0100,
 		"Sony",
 		"Memorystick MSAC-US1",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Klaus Mueller <k.mueller@intershop.de> */
 UNUSUAL_DEV(  0x054c, 0x002e, 0x0106, 0x0310,
 		"Sony",
 		"Handycam",
-		US_SC_SCSI, US_PR_DEVICE, NULL,
+		USB_SC_SCSI, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Rajesh Kumble Nayak <nayak@obs-nice.fr> */
 UNUSUAL_DEV(  0x054c, 0x002e, 0x0500, 0x0500,
 		"Sony",
 		"Handycam HC-85",
-		US_SC_UFI, US_PR_DEVICE, NULL,
+		USB_SC_UFI, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 UNUSUAL_DEV(  0x054c, 0x0032, 0x0000, 0x9999,
 		"Sony",
 		"Memorystick MSC-U01N",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Michal Mlotek <mlotek@foobar.pl> */
 UNUSUAL_DEV(  0x054c, 0x0058, 0x0000, 0x9999,
 		"Sony",
 		"PEG N760c Memorystick",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 		
 UNUSUAL_DEV(  0x054c, 0x0069, 0x0000, 0x9999,
 		"Sony",
 		"Memorystick MSC-U03",
-		US_SC_UFI, US_PR_CB, NULL,
+		USB_SC_UFI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Nathan Babb <nathan@lexi.com> */
 UNUSUAL_DEV(  0x054c, 0x006d, 0x0000, 0x9999,
 		"Sony",
 		"PEG Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Submitted by Frank Engel <frankie@cse.unsw.edu.au> */
 UNUSUAL_DEV(  0x054c, 0x0099, 0x0000, 0x9999,
 		"Sony",
 		"PEG Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Submitted by Mike Alborn <malborn@deandra.homeip.net> */
 UNUSUAL_DEV(  0x054c, 0x016a, 0x0000, 0x9999,
 		"Sony",
 		"PEG Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* floppy reports multiple luns */
 UNUSUAL_DEV(  0x055d, 0x2020, 0x0000, 0x0210,
 		"SAMSUNG",
 		"SFD-321U [FW 0C]",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* We keep this entry to force the transport; firmware 3.00 and later is ok. */
 UNUSUAL_DEV(  0x057b, 0x0000, 0x0000, 0x0299,
 		"Y-E Data",
 		"Flashbuster-U",
-		US_SC_DEVICE,  US_PR_CB, NULL,
+		USB_SC_DEVICE,  USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN),
 
 /* Reported by Johann Cardon <johann.cardon@free.fr>
@@ -664,20 +664,20 @@ UNUSUAL_DEV(  0x057b, 0x0000, 0x0000, 0x0299,
 UNUSUAL_DEV(  0x057b, 0x0022, 0x0000, 0x9999,
 		"Y-E Data",
 		"Silicon Media R/W",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, 0),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, 0),
 
 /* Reported by RTE <raszilki@yandex.ru> */
 UNUSUAL_DEV(  0x058f, 0x6387, 0x0141, 0x0141,
 		"JetFlash",
 		"TS1GJF2A/120",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /* Fabrizio Fellini <fello@libero.it> */
 UNUSUAL_DEV(  0x0595, 0x4343, 0x0000, 0x2210,
 		"Fujifilm",
 		"Digital Camera EX-20 DSC",
-		US_SC_8070, US_PR_DEVICE, NULL, 0 ),
+		USB_SC_8070, USB_PR_DEVICE, NULL, 0 ),
 
 /* Reported by Andre Welter <a.r.welter@gmx.de>
  * This antique device predates the release of the Bulk-only Transport
@@ -688,14 +688,14 @@ UNUSUAL_DEV(  0x0595, 0x4343, 0x0000, 0x2210,
 UNUSUAL_DEV(  0x059b, 0x0001, 0x0100, 0x0100,
 		"Iomega",
 		"ZIP 100",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Reported by <Hendryk.Pfeiffer@gmx.de> */
 UNUSUAL_DEV(  0x059f, 0x0643, 0x0000, 0x0000,
 		"LaCie",
 		"DVD+-RW",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_GO_SLOW ),
 
 /* Submitted by Joel Bourquard <numlock@freesurf.ch>
@@ -705,7 +705,7 @@ UNUSUAL_DEV(  0x059f, 0x0643, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x05ab, 0x0060, 0x1104, 0x1110,
 		"In-System",
 		"PyroGate External CD-ROM Enclosure (FCD-523)",
-		US_SC_SCSI, US_PR_BULK, NULL,
+		USB_SC_SCSI, USB_PR_BULK, NULL,
 		US_FL_NEED_OVERRIDE ),
 
 /* Submitted by Sven Anderson <sven-linux@anderson.de>
@@ -717,26 +717,26 @@ UNUSUAL_DEV(  0x05ab, 0x0060, 0x1104, 0x1110,
 UNUSUAL_DEV( 0x05ac, 0x1202, 0x0000, 0x9999,
 		"Apple",
 		"iPod",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /* Reported by Avi Kivity <avi@argo.co.il> */
 UNUSUAL_DEV( 0x05ac, 0x1203, 0x0000, 0x9999,
 		"Apple",
 		"iPod",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 UNUSUAL_DEV( 0x05ac, 0x1204, 0x0000, 0x9999,
 		"Apple",
 		"iPod",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE ),
 
 UNUSUAL_DEV( 0x05ac, 0x1205, 0x0000, 0x9999,
 		"Apple",
 		"iPod",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /*
@@ -746,7 +746,7 @@ UNUSUAL_DEV( 0x05ac, 0x1205, 0x0000, 0x9999,
 UNUSUAL_DEV( 0x05ac, 0x120a, 0x0000, 0x9999,
 		"Apple",
 		"iPod",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /* Reported by Dan Williams <dcbw@redhat.com>
@@ -758,14 +758,14 @@ UNUSUAL_DEV( 0x05ac, 0x120a, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x05c6, 0x1000, 0x0000, 0x9999,
 		"Option N.V.",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, option_ms_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, option_ms_init,
 		0),
 
 /* Reported by Blake Matheny <bmatheny@purdue.edu> */
 UNUSUAL_DEV(  0x05dc, 0xb002, 0x0000, 0x0113,
 		"Lexar",
 		"USB CF Reader",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* The following two entries are for a Genesys USB to IDE
@@ -782,20 +782,20 @@ UNUSUAL_DEV(  0x05dc, 0xb002, 0x0000, 0x0113,
 UNUSUAL_DEV(  0x05e3, 0x0701, 0x0000, 0xffff,
 		"Genesys Logic",
 		"USB to IDE Optical",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_GO_SLOW | US_FL_MAX_SECTORS_64 | US_FL_IGNORE_RESIDUE ),
 
 UNUSUAL_DEV(  0x05e3, 0x0702, 0x0000, 0xffff,
 		"Genesys Logic",
 		"USB to IDE Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_GO_SLOW | US_FL_MAX_SECTORS_64 | US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Ben Efros <ben@pc-doctor.com> */
 UNUSUAL_DEV(  0x05e3, 0x0723, 0x9451, 0x9451,
 		"Genesys Logic",
 		"USB to SATA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SANE_SENSE ),
 
 /* Reported by Hanno Boeck <hanno@gmx.de>
@@ -803,33 +803,33 @@ UNUSUAL_DEV(  0x05e3, 0x0723, 0x9451, 0x9451,
 UNUSUAL_DEV(  0x0636, 0x0003, 0x0000, 0x9999,
 		"Vivitar",
 		"Vivicam 35Xx",
-		US_SC_SCSI, US_PR_BULK, NULL,
+		USB_SC_SCSI, USB_PR_BULK, NULL,
 		US_FL_FIX_INQUIRY ),
 
 UNUSUAL_DEV(  0x0644, 0x0000, 0x0100, 0x0100,
 		"TEAC",
 		"Floppy Drive",
-		US_SC_UFI, US_PR_CB, NULL, 0 ),
+		USB_SC_UFI, USB_PR_CB, NULL, 0 ),
 
 /* Reported by Darsen Lu <darsen@micro.ee.nthu.edu.tw> */
 UNUSUAL_DEV( 0x066f, 0x8000, 0x0001, 0x0001,
 		"SigmaTel",
 		"USBMSC Audio Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /* Reported by Daniel Kukula <daniel.kuku@gmail.com> */
 UNUSUAL_DEV( 0x067b, 0x1063, 0x0100, 0x0100,
 		"Prolific Technology, Inc.",
 		"Prolific Storage Gadget",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BAD_SENSE ),
 
 /* Reported by Rogerio Brito <rbrito@ime.usp.br> */
 UNUSUAL_DEV( 0x067b, 0x2317, 0x0001, 0x001,
 		"Prolific Technology, Inc.",
 		"Mass Storage Device",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NOT_LOCKABLE ),
 
 /* Reported by Richard -=[]=- <micro_flyer@hotmail.com> */
@@ -838,45 +838,45 @@ UNUSUAL_DEV( 0x067b, 0x2317, 0x0001, 0x001,
 UNUSUAL_DEV( 0x067b, 0x2507, 0x0001, 0x0100,
 		"Prolific Technology Inc.",
 		"Mass Storage Device",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_GO_SLOW ),
 
 /* Reported by Alex Butcher <alex.butcher@assursys.co.uk> */
 UNUSUAL_DEV( 0x067b, 0x3507, 0x0001, 0x0101,
 		"Prolific Technology Inc.",
 		"ATAPI-6 Bridge Controller",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_GO_SLOW ),
 
 /* Submitted by Benny Sjostrand <benny@hostmobility.com> */
 UNUSUAL_DEV( 0x0686, 0x4011, 0x0001, 0x0001,
 		"Minolta",
 		"Dimage F300",
-		US_SC_SCSI, US_PR_BULK, NULL, 0 ),
+		USB_SC_SCSI, USB_PR_BULK, NULL, 0 ),
 
 /* Reported by Miguel A. Fosas <amn3s1a@ono.com> */
 UNUSUAL_DEV(  0x0686, 0x4017, 0x0001, 0x0001,
 		"Minolta",
 		"DIMAGE E223",
-		US_SC_SCSI, US_PR_DEVICE, NULL, 0 ),
+		USB_SC_SCSI, USB_PR_DEVICE, NULL, 0 ),
 
 UNUSUAL_DEV(  0x0693, 0x0005, 0x0100, 0x0100,
 		"Hagiwara",
 		"Flashgate",
-		US_SC_SCSI, US_PR_BULK, NULL, 0 ),
+		USB_SC_SCSI, USB_PR_BULK, NULL, 0 ),
 
 /* Reported by David Hamilton <niftimusmaximus@lycos.com> */
 UNUSUAL_DEV(  0x069b, 0x3004, 0x0001, 0x0001,
 		"Thomson Multimedia Inc.",
 		"RCA RD1080 MP3 Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /* Reported by Adrian Pilchowiec <adi1981@epf.pl> */
 UNUSUAL_DEV(  0x071b, 0x3203, 0x0000, 0x0000,
 		"RockChip",
 		"MP3",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_WP_DETECT | US_FL_MAX_SECTORS_64),
 
 /* Reported by Jean-Baptiste Onofre <jb@nanthrax.net>
@@ -886,7 +886,7 @@ UNUSUAL_DEV(  0x071b, 0x3203, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x071b, 0x32bb, 0x0000, 0x0000,
 		"RockChip",
 		"MTP",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_WP_DETECT | US_FL_MAX_SECTORS_64),
 
 /* Reported by Massimiliano Ghilardi <massimiliano.ghilardi@gmail.com>
@@ -902,59 +902,59 @@ UNUSUAL_DEV(  0x071b, 0x32bb, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x071b, 0x3203, 0x0100, 0x0100,
 		"RockChip",
 		"ROCK MP3",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64),
 
 /* Reported by Olivier Blondeau <zeitoun@gmail.com> */
 UNUSUAL_DEV(  0x0727, 0x0306, 0x0100, 0x0100,
 		"ATMEL",
 		"SND1 Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE),
 
 /* Submitted by Roman Hodek <roman@hodek.net> */
 UNUSUAL_DEV(  0x0781, 0x0001, 0x0200, 0x0200,
 		"Sandisk",
 		"ImageMate SDDR-05a",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN ),
 
 UNUSUAL_DEV(  0x0781, 0x0002, 0x0009, 0x0009,
 		"SanDisk Corporation",
 		"ImageMate CompactFlash USB",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 UNUSUAL_DEV(  0x0781, 0x0100, 0x0100, 0x0100,
 		"Sandisk",
 		"ImageMate SDDR-12",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Reported by Eero Volotinen <eero@ping-viini.org> */
 UNUSUAL_DEV(  0x07ab, 0xfccd, 0x0000, 0x9999,
 		"Freecom Technologies",
 		"FHD-Classic",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 UNUSUAL_DEV(  0x07af, 0x0004, 0x0100, 0x0133,
 		"Microtech",
 		"USB-SCSI-DB25",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_euscsi_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ), 
 
 UNUSUAL_DEV(  0x07af, 0x0005, 0x0100, 0x0100,
 		"Microtech",
 		"USB-SCSI-HD50",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_euscsi_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ),
 
 #ifdef NO_SDDR09
 UNUSUAL_DEV(  0x07af, 0x0006, 0x0100, 0x0100,
 		"Microtech",
 		"CameraMate",
-		US_SC_SCSI, US_PR_CB, NULL,
+		USB_SC_SCSI, USB_PR_CB, NULL,
 		US_FL_SINGLE_LUN ),
 #endif
 
@@ -967,7 +967,7 @@ UNUSUAL_DEV(  0x07af, 0x0006, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x07c4, 0xa400, 0x0000, 0xffff,
 		"Datafab",
 		"KECF-USB",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY | US_FL_FIX_CAPACITY ),
 
 /* Reported by Rauch Wolke <rauchwolke@gmx.net>
@@ -976,7 +976,7 @@ UNUSUAL_DEV(  0x07c4, 0xa400, 0x0000, 0xffff,
 UNUSUAL_DEV(  0x07c4, 0xa4a5, 0x0000, 0xffff,
 		"Simple Tech/Datafab",
 		"CF+SM Reader",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_MAX_SECTORS_64 ),
 
 /* Casio QV 2x00/3x00/4000/8000 digital still cameras are not conformant
@@ -986,42 +986,42 @@ UNUSUAL_DEV(  0x07c4, 0xa4a5, 0x0000, 0xffff,
  * - They don't like the INQUIRY command. So we must handle this command
  *   of the SCSI layer ourselves.
  * - Some cameras with idProduct=0x1001 and bcdDevice=0x1000 have
- *   bInterfaceProtocol=0x00 (US_PR_CBI) while others have 0x01 (US_PR_CB).
- *   So don't remove the US_PR_CB override!
- * - Cameras with bcdDevice=0x9009 require the US_SC_8070 override.
+ *   bInterfaceProtocol=0x00 (USB_PR_CBI) while others have 0x01 (USB_PR_CB).
+ *   So don't remove the USB_PR_CB override!
+ * - Cameras with bcdDevice=0x9009 require the USB_SC_8070 override.
  */
 UNUSUAL_DEV( 0x07cf, 0x1001, 0x1000, 0x9999,
 		"Casio",
 		"QV DigitalCamera",
-		US_SC_8070, US_PR_CB, NULL,
+		USB_SC_8070, USB_PR_CB, NULL,
 		US_FL_NEED_OVERRIDE | US_FL_FIX_INQUIRY ),
 
 /* Submitted by Hartmut Wahl <hwahl@hwahl.de>*/
 UNUSUAL_DEV( 0x0839, 0x000a, 0x0001, 0x0001,
 		"Samsung",
 		"Digimax 410",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY),
 
 /* Reported by Luciano Rocha <luciano@eurotux.com> */
 UNUSUAL_DEV( 0x0840, 0x0082, 0x0001, 0x0001,
 		"Argosy",
 		"Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* Reported and patched by Nguyen Anh Quynh <aquynh@gmail.com> */
 UNUSUAL_DEV( 0x0840, 0x0084, 0x0001, 0x0001,
 		"Argosy",
 		"Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* Reported by Martijn Hijdra <martijn.hijdra@gmail.com> */
 UNUSUAL_DEV( 0x0840, 0x0085, 0x0001, 0x0001,
 		"Argosy",
 		"Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>.
@@ -1033,7 +1033,7 @@ UNUSUAL_DEV( 0x0840, 0x0085, 0x0001, 0x0001,
 UNUSUAL_DEV(  0x084d, 0x0011, 0x0110, 0x0110,
 		"Grandtech",
 		"DC2MEGA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BULK32),
 
 /* Andrew Lunn <andrew@lunn.ch>
@@ -1044,14 +1044,14 @@ UNUSUAL_DEV(  0x084d, 0x0011, 0x0110, 0x0110,
 UNUSUAL_DEV(  0x0851, 0x1543, 0x0200, 0x0200,
 		"PanDigital",
 		"Photo Frame",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NOT_LOCKABLE),
 
 /* Submitted by Jan De Luyck <lkml@kcore.org> */
 UNUSUAL_DEV(  0x08bd, 0x1100, 0x0000, 0x0000,
 		"CITIZEN",
 		"X1DE-USB",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN),
 
 /* Submitted by Dylan Taft <d13f00l@gmail.com>
@@ -1060,7 +1060,7 @@ UNUSUAL_DEV(  0x08bd, 0x1100, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x08ca, 0x3103, 0x0100, 0x0100,
 		"AIPTEK",
 		"Aiptek USB Keychain MP3 Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE),
 
 /* Entry needed for flags. Moreover, all devices with this ID use
@@ -1071,7 +1071,7 @@ UNUSUAL_DEV(  0x08ca, 0x3103, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x090a, 0x1001, 0x0100, 0x0100,
 		"Trumpion",
 		"t33520 USB Flash Card Controller",
-		US_SC_DEVICE, US_PR_BULK, NULL,
+		USB_SC_DEVICE, USB_PR_BULK, NULL,
 		US_FL_NEED_OVERRIDE ),
 
 /* Reported by Filippo Bardelli <filibard@libero.it>
@@ -1080,21 +1080,21 @@ UNUSUAL_DEV(  0x090a, 0x1001, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x090a, 0x1050, 0x0100, 0x0100,
 		"Trumpion Microelectronics, Inc.",
 		"33520 USB Digital Voice Recorder",
-		US_SC_UFI, US_PR_DEVICE, NULL,
+		USB_SC_UFI, USB_PR_DEVICE, NULL,
 		0),
 
 /* Trumpion Microelectronics MP3 player (felipe_alfaro@linuxmail.org) */
 UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999,
 		"Trumpion",
 		"MP3 player",
-		US_SC_RBC, US_PR_BULK, NULL,
+		USB_SC_RBC, USB_PR_BULK, NULL,
 		0 ),
 
 /* aeb */
 UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff,
 		"Feiya",
 		"5-in-1 Card Reader",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY ),
 
 /* This Pentax still camera is not conformant
@@ -1107,7 +1107,7 @@ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff,
 UNUSUAL_DEV( 0x0a17, 0x0004, 0x1000, 0x1000,
 		"Pentax",
 		"Optio 2/3/400",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* These are virtual windows driver CDs, which the zd1211rw driver
@@ -1115,13 +1115,13 @@ UNUSUAL_DEV( 0x0a17, 0x0004, 0x1000, 0x1000,
 UNUSUAL_DEV( 0x0ace, 0x2011, 0x0101, 0x0101,
 		"ZyXEL",
 		"G-220F USB-WLAN Install",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_DEVICE ),
 
 UNUSUAL_DEV( 0x0ace, 0x20ff, 0x0101, 0x0101,
 		"SiteCom",
 		"WL-117 USB-WLAN Install",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_DEVICE ),
 
 /* Reported by Dan Williams <dcbw@redhat.com>
@@ -1133,7 +1133,7 @@ UNUSUAL_DEV( 0x0ace, 0x20ff, 0x0101, 0x0101,
 UNUSUAL_DEV(  0x0af0, 0x6971, 0x0000, 0x9999,
 		"Option N.V.",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, option_ms_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, option_ms_init,
 		0),
 
 /* Reported by F. Aben <f.aben@option.com>
@@ -1143,7 +1143,7 @@ UNUSUAL_DEV(  0x0af0, 0x6971, 0x0000, 0x9999,
 UNUSUAL_DEV( 0x0af0, 0x7401, 0x0000, 0x0000,
 		"Option",
 		"GI 0401 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 /* Reported by Jan Dumon <j.dumon@option.com>
@@ -1153,104 +1153,104 @@ UNUSUAL_DEV( 0x0af0, 0x7401, 0x0000, 0x0000,
 UNUSUAL_DEV( 0x0af0, 0x7501, 0x0000, 0x0000,
 		"Option",
 		"GI 0431 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x7701, 0x0000, 0x0000,
 		"Option",
 		"GI 0451 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x7706, 0x0000, 0x0000,
 		"Option",
 		"GI 0451 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x7901, 0x0000, 0x0000,
 		"Option",
 		"GI 0452 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x7A01, 0x0000, 0x0000,
 		"Option",
 		"GI 0461 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x7A05, 0x0000, 0x0000,
 		"Option",
 		"GI 0461 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x8300, 0x0000, 0x0000,
 		"Option",
 		"GI 033x SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x8302, 0x0000, 0x0000,
 		"Option",
 		"GI 033x SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0x8304, 0x0000, 0x0000,
 		"Option",
 		"GI 033x SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xc100, 0x0000, 0x0000,
 		"Option",
 		"GI 070x SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xd057, 0x0000, 0x0000,
 		"Option",
 		"GI 1505 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xd058, 0x0000, 0x0000,
 		"Option",
 		"GI 1509 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xd157, 0x0000, 0x0000,
 		"Option",
 		"GI 1515 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xd257, 0x0000, 0x0000,
 		"Option",
 		"GI 1215 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 UNUSUAL_DEV( 0x0af0, 0xd357, 0x0000, 0x0000,
 		"Option",
 		"GI 1505 SD-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 /* Reported by Ben Efros <ben@pc-doctor.com> */
 UNUSUAL_DEV( 0x0bc2, 0x3010, 0x0000, 0x0000,
 		"Seagate",
 		"FreeAgent Pro",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SANE_SENSE ),
 
 UNUSUAL_DEV(  0x0d49, 0x7310, 0x0000, 0x9999,
 		"Maxtor",
 		"USB to SATA",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SANE_SENSE),
 
 /*
@@ -1260,14 +1260,14 @@ UNUSUAL_DEV(  0x0d49, 0x7310, 0x0000, 0x9999,
 UNUSUAL_DEV( 0x0c45, 0x1060, 0x0100, 0x0100,
 		"Unknown",
 		"Unknown",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SINGLE_LUN ),
 
 /* Submitted by Joris Struyve <joris@struyve.be> */
 UNUSUAL_DEV( 0x0d96, 0x410a, 0x0001, 0xffff,
 		"Medion",
 		"MD 7425",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY),
 
 /*
@@ -1278,13 +1278,13 @@ UNUSUAL_DEV( 0x0d96, 0x410a, 0x0001, 0xffff,
 UNUSUAL_DEV(  0x0d96, 0x5200, 0x0001, 0x0200,
 		"Jenoptik",
 		"JD 5200 z3",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_FIX_INQUIRY),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY),
 
 /* Reported by  Jason Johnston <killean@shaw.ca> */
 UNUSUAL_DEV(  0x0dc4, 0x0073, 0x0000, 0x0000,
 		"Macpower Technology Co.LTD.",
 		"USB 2.0 3.5\" DEVICE",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* Reported by Lubomir Blaha <tritol@trilogic.cz>
@@ -1295,7 +1295,7 @@ UNUSUAL_DEV(  0x0dc4, 0x0073, 0x0000, 0x0000,
 UNUSUAL_DEV( 0x0dd8, 0x1060, 0x0000, 0xffff,
 		"Netac",
 		"USB-CF-Card",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Reported by Edward Chapman (taken from linux-usb mailing list)
@@ -1303,7 +1303,7 @@ UNUSUAL_DEV( 0x0dd8, 0x1060, 0x0000, 0xffff,
 UNUSUAL_DEV( 0x0dd8, 0xd202, 0x0000, 0x9999,
 		"Netac",
 		"USB Flash Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 
@@ -1312,28 +1312,28 @@ UNUSUAL_DEV( 0x0dd8, 0xd202, 0x0000, 0x9999,
 UNUSUAL_DEV( 0x0dda, 0x0001, 0x0012, 0x0012,
 		"WINWARD",
 		"Music Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Ian McConnell <ian at emit.demon.co.uk> */
 UNUSUAL_DEV(  0x0dda, 0x0301, 0x0012, 0x0012,
 		"PNP_MP3",
 		"PNP_MP3 PLAYER",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Jim McCloskey <mcclosk@ucsc.edu> */
 UNUSUAL_DEV( 0x0e21, 0x0520, 0x0100, 0x0100,
 		"Cowon Systems",
 		"iAUDIO M5",
-		US_SC_DEVICE, US_PR_BULK, NULL,
+		USB_SC_DEVICE, USB_PR_BULK, NULL,
 		US_FL_NEED_OVERRIDE ),
 
 /* Submitted by Antoine Mairesse <antoine.mairesse@free.fr> */
 UNUSUAL_DEV( 0x0ed1, 0x6660, 0x0100, 0x0300,
 		"USB",
 		"Solid state disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
 /* Submitted by Daniel Drake <dsd@gentoo.org>
@@ -1341,14 +1341,14 @@ UNUSUAL_DEV( 0x0ed1, 0x6660, 0x0100, 0x0300,
 UNUSUAL_DEV(  0x0ea0, 0x2168, 0x0110, 0x0110,
 		"Ours Technology",
 		"Flash Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Rastislav Stanik <rs_kernel@yahoo.com> */
 UNUSUAL_DEV(  0x0ea0, 0x6828, 0x0110, 0x0110,
 		"USB",
 		"Flash Disk",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Benjamin Schiller <sbenni@gmx.de>
@@ -1356,7 +1356,7 @@ UNUSUAL_DEV(  0x0ea0, 0x6828, 0x0110, 0x0110,
 UNUSUAL_DEV(  0x0ed1, 0x7636, 0x0103, 0x0103,
 		"Typhoon",
 		"My DJ 1820",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_GO_SLOW | US_FL_MAX_SECTORS_64),
 
 /* Patch by Leonid Petrov mail at lpetrov.net
@@ -1367,7 +1367,7 @@ UNUSUAL_DEV(  0x0ed1, 0x7636, 0x0103, 0x0103,
 UNUSUAL_DEV(  0x0f19, 0x0103, 0x0100, 0x0100,
 		"Oracom Co., Ltd",
 		"ORC-200M",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* David Kuehling <dvdkhlng@gmx.de>:
@@ -1377,21 +1377,21 @@ UNUSUAL_DEV(  0x0f19, 0x0103, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x0f19, 0x0105, 0x0100, 0x0100,
 		"C-MEX",
 		"A-VOX",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Michael Stattmann <michael@stattmann.com> */
 UNUSUAL_DEV(  0x0fce, 0xd008, 0x0000, 0x0000,
 		"Sony Ericsson",
 		"V800-Vodafone 802",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_WP_DETECT ),
 
 /* Reported by The Solutor <thesolutor@gmail.com> */
 UNUSUAL_DEV(  0x0fce, 0xd0e1, 0x0000, 0x0000,
 		"Sony Ericsson",
 		"MD400",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_DEVICE),
 
 /* Reported by Jan Mate <mate@fiit.stuba.sk>
@@ -1399,21 +1399,21 @@ UNUSUAL_DEV(  0x0fce, 0xd0e1, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x0fce, 0xe030, 0x0000, 0x0000,
 		"Sony Ericsson",
 		"P990i",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Emmanuel Vasilakis <evas@forthnet.gr> */
 UNUSUAL_DEV(  0x0fce, 0xe031, 0x0000, 0x0000,
 		"Sony Ericsson",
 		"M600i",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_FIX_CAPACITY ),
 
 /* Reported by Ricardo Barberis <ricardo@dattatec.com> */
 UNUSUAL_DEV(  0x0fce, 0xe092, 0x0000, 0x0000,
 		"Sony Ericsson",
 		"P1i",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Kevin Cernekee <kpc-usbdev@gelato.uiuc.edu>
@@ -1425,13 +1425,13 @@ UNUSUAL_DEV(  0x0fce, 0xe092, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x1019, 0x0c55, 0x0000, 0x0110,
 		"Desknote",
 		"UCR-61S2B",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_ucr61s2b_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_ucr61s2b_init,
 		0 ),
 
 UNUSUAL_DEV(  0x1058, 0x0704, 0x0000, 0x9999,
 		"Western Digital",
 		"External HDD",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_SANE_SENSE),
 
 /* Reported by Fabio Venturi <f.venturi@tdnet.it>
@@ -1440,7 +1440,7 @@ UNUSUAL_DEV(  0x1058, 0x0704, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x10d6, 0x2200, 0x0100, 0x0100,
 		"Actions Semiconductor",
 		"Mtp device",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0),
 
 /* Reported by Pascal Terjan <pterjan@mandriva.com>
@@ -1449,7 +1449,7 @@ UNUSUAL_DEV(  0x10d6, 0x2200, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x1186, 0x3e04, 0x0000, 0x0000,
            "D-Link",
            "USB Mass Storage",
-           US_SC_DEVICE, US_PR_DEVICE, option_ms_init, US_FL_IGNORE_DEVICE),
+           USB_SC_DEVICE, USB_PR_DEVICE, option_ms_init, US_FL_IGNORE_DEVICE),
 
 /* Reported by Kevin Lloyd <linux@sierrawireless.com>
  * Entry is needed for the initializer function override,
@@ -1459,7 +1459,7 @@ UNUSUAL_DEV(  0x1186, 0x3e04, 0x0000, 0x0000,
 UNUSUAL_DEV(  0x1199, 0x0fff, 0x0000, 0x9999,
 		"Sierra Wireless",
 		"USB MMC Storage",
-		US_SC_DEVICE, US_PR_DEVICE, sierra_ms_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, sierra_ms_init,
 		0),
 
 /* Reported by Jaco Kroon <jaco@kroon.co.za>
@@ -1469,7 +1469,7 @@ UNUSUAL_DEV(  0x1199, 0x0fff, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x1210, 0x0003, 0x0100, 0x0100,
 		"Digitech HMG",
 		"DigiTech Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by fangxiaozhi <huananhu@huawei.com>
@@ -1478,353 +1478,353 @@ UNUSUAL_DEV(  0x1210, 0x0003, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x12d1, 0x1001, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1003, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1004, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1401, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1402, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1403, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1404, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1405, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1406, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1407, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1408, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1409, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140A, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140B, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140C, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140D, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140E, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x140F, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1410, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1411, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1412, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1413, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1414, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1415, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1416, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1417, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1418, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1419, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141A, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141B, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141C, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141D, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141E, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x141F, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1420, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1421, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1422, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1423, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1424, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1425, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1426, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1427, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1428, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1429, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142A, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142B, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142C, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142D, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142E, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x142F, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1430, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1431, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1432, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1433, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1434, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1435, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1436, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1437, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1438, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x1439, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143A, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143B, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143C, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143D, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143E, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 UNUSUAL_DEV(  0x12d1, 0x143F, 0x0000, 0x0000,
 		"HUAWEI MOBILE",
 		"Mass Storage",
-		US_SC_DEVICE, US_PR_DEVICE, usb_stor_huawei_e220_init,
+		USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_huawei_e220_init,
 		0),
 
 /* Reported by Vilius Bilinkevicius <vilisas AT xxx DOT lt) */
 UNUSUAL_DEV(  0x132b, 0x000b, 0x0001, 0x0001,
 		"Minolta",
 		"Dimage Z10",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		0 ),
 
 /* Reported by Kotrla Vitezslav <kotrla@ceb.cz> */
 UNUSUAL_DEV(  0x1370, 0x6828, 0x0110, 0x0110,
 		"SWISSBIT",
 		"Black Silver",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Francesco Foresti <frafore@tiscali.it> */
 UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
 		"Super Top",
 		"IDE DEVICE",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
@@ -1833,7 +1833,7 @@ UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
 UNUSUAL_DEV(  0x152d, 0x2329, 0x0100, 0x0100,
 		"JMicron",
 		"USB to ATA/ATAPI Bridge",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE | US_FL_SANE_SENSE ),
 
 /* Reported by Robert Schedel <r.schedel@yahoo.de>
@@ -1841,7 +1841,7 @@ UNUSUAL_DEV(  0x152d, 0x2329, 0x0100, 0x0100,
 UNUSUAL_DEV(  0x1652, 0x6600, 0x0201, 0x0201,
 		"Teac",
 		"HD-35PUK-B",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Hans de Goede <hdegoede@redhat.com>
@@ -1851,18 +1851,18 @@ UNUSUAL_DEV(  0x1652, 0x6600, 0x0201, 0x0201,
 UNUSUAL_DEV( 0x1908, 0x1315, 0x0000, 0x0000,
 		"BUILDWIN",
 		"Photo Frame",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BAD_SENSE ),
 UNUSUAL_DEV( 0x1908, 0x1320, 0x0000, 0x0000,
 		"BUILDWIN",
 		"Photo Frame",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BAD_SENSE ),
 
 UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
 		"ST",
 		"2A",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY),
 
 /* patch submitted by Davide Perini <perini.davide@dpsoftware.org>
@@ -1871,7 +1871,7 @@ UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
 UNUSUAL_DEV(  0x22b8, 0x3010, 0x0001, 0x0001,
 		"Motorola",
 		"RAZR V3x",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_CAPACITY | US_FL_IGNORE_RESIDUE ),
 
 /*
@@ -1882,14 +1882,14 @@ UNUSUAL_DEV(  0x22b8, 0x3010, 0x0001, 0x0001,
 UNUSUAL_DEV(  0x22b8, 0x6426, 0x0101, 0x0101,
 		"Motorola",
 		"MSnc.",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY | US_FL_FIX_CAPACITY | US_FL_BULK_IGNORE_TAG),
 
 /* Reported by Radovan Garabik <garabik@kassiopeia.juls.savba.sk> */
 UNUSUAL_DEV(  0x2735, 0x100b, 0x0000, 0x9999,
 		"MPIO",
 		"HS200",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_GO_SLOW ),
 
 /* Reported by Frederic Marchal <frederic.marchal@wowcompany.com>
@@ -1898,21 +1898,21 @@ UNUSUAL_DEV(  0x2735, 0x100b, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x3340, 0xffff, 0x0000, 0x0000,
 		"Mitac",
 		"Mio DigiWalker USB Sync",
-		US_SC_DEVICE,US_PR_DEVICE,NULL,
+		USB_SC_DEVICE,USB_PR_DEVICE,NULL,
 		US_FL_MAX_SECTORS_64 ),
 
 /* Reported by Andrey Rahmatullin <wrar@altlinux.org> */
 UNUSUAL_DEV(  0x4102, 0x1020, 0x0100,  0x0100,
 		"iRiver",
 		"MP3 T10",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Sergey Pinaev <dfo@antex.ru> */
 UNUSUAL_DEV(  0x4102, 0x1059, 0x0000,  0x0000,
                "iRiver",
                "P7K",
-               US_SC_DEVICE, US_PR_DEVICE, NULL,
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_MAX_SECTORS_64 ),
 
 /*
@@ -1922,41 +1922,41 @@ UNUSUAL_DEV(  0x4102, 0x1059, 0x0000,  0x0000,
 UNUSUAL_DEV(  0x4146, 0xba01, 0x0100, 0x0100,
 		"Iomega",
 		"Micro Mini 1GB",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
 
 /* Reported by Andrew Simmons <andrew.simmons@gmail.com> */
 UNUSUAL_DEV(  0xed06, 0x4500, 0x0001, 0x0001,
 		"DataStor",
 		"USB4500 FW1.04",
-		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_CAPACITY_HEURISTICS),
 
 /* Reported by Alessio Treglia <quadrispro@ubuntu.com> */
 UNUSUAL_DEV( 0xed10, 0x7636, 0x0001, 0x0001,
 		"TGE",
 		"Digital MP3 Audio Player",
-		US_SC_DEVICE, US_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
 
 /* Control/Bulk transport for all SubClass values */
-USUAL_DEV(US_SC_RBC, US_PR_CB, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8020, US_PR_CB, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_QIC, US_PR_CB, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_UFI, US_PR_CB, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8070, US_PR_CB, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_SCSI, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_RBC, USB_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8020, USB_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_QIC, USB_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_UFI, USB_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8070, USB_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_SCSI, USB_PR_CB, USB_US_TYPE_STOR),
 
 /* Control/Bulk/Interrupt transport for all SubClass values */
-USUAL_DEV(US_SC_RBC, US_PR_CBI, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8020, US_PR_CBI, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_QIC, US_PR_CBI, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_UFI, US_PR_CBI, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8070, US_PR_CBI, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_SCSI, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_RBC, USB_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8020, USB_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_QIC, USB_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_UFI, USB_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8070, USB_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_SCSI, USB_PR_CBI, USB_US_TYPE_STOR),
 
 /* Bulk-only transport for all SubClass values */
-USUAL_DEV(US_SC_RBC, US_PR_BULK, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8020, US_PR_BULK, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_QIC, US_PR_BULK, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_UFI, US_PR_BULK, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_8070, US_PR_BULK, USB_US_TYPE_STOR),
-USUAL_DEV(US_SC_SCSI, US_PR_BULK, 0),
+USUAL_DEV(USB_SC_RBC, USB_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8020, USB_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_QIC, USB_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_UFI, USB_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_8070, USB_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(USB_SC_SCSI, USB_PR_BULK, 0),
diff --git a/drivers/usb/storage/unusual_freecom.h b/drivers/usb/storage/unusual_freecom.h
index 375867942391..59a261155b98 100644
--- a/drivers/usb/storage/unusual_freecom.h
+++ b/drivers/usb/storage/unusual_freecom.h
@@ -21,6 +21,6 @@
 UNUSUAL_DEV(  0x07ab, 0xfc01, 0x0000, 0x9999,
 		"Freecom",
 		"USB-IDE",
-		US_SC_QIC, US_PR_FREECOM, init_freecom, 0),
+		USB_SC_QIC, USB_PR_FREECOM, init_freecom, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_FREECOM) || ... */
diff --git a/drivers/usb/storage/unusual_isd200.h b/drivers/usb/storage/unusual_isd200.h
index 0d99dde3382a..14cca0c48302 100644
--- a/drivers/usb/storage/unusual_isd200.h
+++ b/drivers/usb/storage/unusual_isd200.h
@@ -21,37 +21,37 @@
 UNUSUAL_DEV(  0x054c, 0x002b, 0x0100, 0x0110,
 		"Sony",
 		"Portable USB Harddrive V2",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 UNUSUAL_DEV(  0x05ab, 0x0031, 0x0100, 0x0110,
 		"In-System",
 		"USB/IDE Bridge (ATA/ATAPI)",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 UNUSUAL_DEV(  0x05ab, 0x0301, 0x0100, 0x0110,
 		"In-System",
 		"Portable USB Harddrive V2",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 UNUSUAL_DEV(  0x05ab, 0x0351, 0x0100, 0x0110,
 		"In-System",
 		"Portable USB Harddrive V2",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 UNUSUAL_DEV(  0x05ab, 0x5701, 0x0100, 0x0110,
 		"In-System",
 		"USB Storage Adapter V2",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 UNUSUAL_DEV(  0x0bf6, 0xa001, 0x0100, 0x0110,
 		"ATI",
 		"USB Cable 205",
-		US_SC_ISD200, US_PR_BULK, isd200_Initialization,
+		USB_SC_ISD200, USB_PR_BULK, isd200_Initialization,
 		0),
 
 #endif /* defined(CONFIG_USB_STORAGE_ISD200) || ... */
diff --git a/drivers/usb/storage/unusual_jumpshot.h b/drivers/usb/storage/unusual_jumpshot.h
index 2e549b1c2c62..54be78b5d643 100644
--- a/drivers/usb/storage/unusual_jumpshot.h
+++ b/drivers/usb/storage/unusual_jumpshot.h
@@ -21,7 +21,7 @@
 UNUSUAL_DEV(  0x05dc, 0x0001, 0x0000, 0x0001,
 		"Lexar",
 		"Jumpshot USB CF Reader",
-		US_SC_SCSI, US_PR_JUMPSHOT, NULL,
+		USB_SC_SCSI, USB_PR_JUMPSHOT, NULL,
 		US_FL_NEED_OVERRIDE),
 
 #endif /* defined(CONFIG_USB_STORAGE_JUMPSHOT) || ... */
diff --git a/drivers/usb/storage/unusual_karma.h b/drivers/usb/storage/unusual_karma.h
index 12ae3a03e802..6df03972a22c 100644
--- a/drivers/usb/storage/unusual_karma.h
+++ b/drivers/usb/storage/unusual_karma.h
@@ -21,6 +21,6 @@
 UNUSUAL_DEV(  0x045a, 0x5210, 0x0101, 0x0101,
 		"Rio",
 		"Rio Karma",
-		US_SC_SCSI, US_PR_KARMA, rio_karma_init, 0),
+		USB_SC_SCSI, USB_PR_KARMA, rio_karma_init, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_KARMA) || ... */
diff --git a/drivers/usb/storage/unusual_onetouch.h b/drivers/usb/storage/unusual_onetouch.h
index bd9306b637df..0abb819c7405 100644
--- a/drivers/usb/storage/unusual_onetouch.h
+++ b/drivers/usb/storage/unusual_onetouch.h
@@ -24,13 +24,13 @@
 UNUSUAL_DEV(  0x0d49, 0x7000, 0x0000, 0x9999,
 		"Maxtor",
 		"OneTouch External Harddrive",
-		US_SC_DEVICE, US_PR_DEVICE, onetouch_connect_input,
+		USB_SC_DEVICE, USB_PR_DEVICE, onetouch_connect_input,
 		0),
 
 UNUSUAL_DEV(  0x0d49, 0x7010, 0x0000, 0x9999,
 		"Maxtor",
 		"OneTouch External Harddrive",
-		US_SC_DEVICE, US_PR_DEVICE, onetouch_connect_input,
+		USB_SC_DEVICE, USB_PR_DEVICE, onetouch_connect_input,
 		0),
 
 #endif /* defined(CONFIG_USB_STORAGE_ONETOUCH) || ... */
diff --git a/drivers/usb/storage/unusual_sddr09.h b/drivers/usb/storage/unusual_sddr09.h
index 50cab511a4d7..59a7e37b6c11 100644
--- a/drivers/usb/storage/unusual_sddr09.h
+++ b/drivers/usb/storage/unusual_sddr09.h
@@ -21,36 +21,36 @@
 UNUSUAL_DEV(  0x0436, 0x0005, 0x0100, 0x0100,
 		"Microtech",
 		"CameraMate (DPCM_USB)",
-		US_SC_SCSI, US_PR_DPCM_USB, NULL, 0),
+		USB_SC_SCSI, USB_PR_DPCM_USB, NULL, 0),
 
 UNUSUAL_DEV(  0x04e6, 0x0003, 0x0000, 0x9999,
 		"Sandisk",
 		"ImageMate SDDR09",
-		US_SC_SCSI, US_PR_EUSB_SDDR09, usb_stor_sddr09_init,
+		USB_SC_SCSI, USB_PR_EUSB_SDDR09, usb_stor_sddr09_init,
 		0),
 
 /* This entry is from Andries.Brouwer@cwi.nl */
 UNUSUAL_DEV(  0x04e6, 0x0005, 0x0100, 0x0208,
 		"SCM Microsystems",
 		"eUSB SmartMedia / CompactFlash Adapter",
-		US_SC_SCSI, US_PR_DPCM_USB, usb_stor_sddr09_dpcm_init,
+		USB_SC_SCSI, USB_PR_DPCM_USB, usb_stor_sddr09_dpcm_init,
 		0),
 
 UNUSUAL_DEV(  0x066b, 0x0105, 0x0100, 0x0100,
 		"Olympus",
 		"Camedia MAUSB-2",
-		US_SC_SCSI, US_PR_EUSB_SDDR09, usb_stor_sddr09_init,
+		USB_SC_SCSI, USB_PR_EUSB_SDDR09, usb_stor_sddr09_init,
 		0),
 
 UNUSUAL_DEV(  0x0781, 0x0200, 0x0000, 0x9999,
 		"Sandisk",
 		"ImageMate SDDR-09",
-		US_SC_SCSI, US_PR_EUSB_SDDR09, usb_stor_sddr09_init,
+		USB_SC_SCSI, USB_PR_EUSB_SDDR09, usb_stor_sddr09_init,
 		0),
 
 UNUSUAL_DEV(  0x07af, 0x0006, 0x0100, 0x0100,
 		"Microtech",
 		"CameraMate (DPCM_USB)",
-		US_SC_SCSI, US_PR_DPCM_USB, NULL, 0),
+		USB_SC_SCSI, USB_PR_DPCM_USB, NULL, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_SDDR09) || ... */
diff --git a/drivers/usb/storage/unusual_sddr55.h b/drivers/usb/storage/unusual_sddr55.h
index ae81ef7a1cfd..fcb7e12c598f 100644
--- a/drivers/usb/storage/unusual_sddr55.h
+++ b/drivers/usb/storage/unusual_sddr55.h
@@ -22,23 +22,23 @@
 UNUSUAL_DEV( 0x07c4, 0xa103, 0x0000, 0x9999,
 		"Datafab",
 		"MDSM-B reader",
-		US_SC_SCSI, US_PR_SDDR55, NULL,
+		USB_SC_SCSI, USB_PR_SDDR55, NULL,
 		US_FL_FIX_INQUIRY),
 
 /* SM part - aeb <Andries.Brouwer@cwi.nl> */
 UNUSUAL_DEV(  0x07c4, 0xa109, 0x0000, 0xffff,
 		"Datafab Systems, Inc.",
 		"USB to CF + SM Combo (LC1)",
-		US_SC_SCSI, US_PR_SDDR55, NULL, 0),
+		USB_SC_SCSI, USB_PR_SDDR55, NULL, 0),
 
 UNUSUAL_DEV( 0x0c0b, 0xa109, 0x0000, 0xffff,
 		"Acomdata",
 		"SM",
-		US_SC_SCSI, US_PR_SDDR55, NULL, 0),
+		USB_SC_SCSI, USB_PR_SDDR55, NULL, 0),
 
 UNUSUAL_DEV(  0x55aa, 0xa103, 0x0000, 0x9999,
 		"Sandisk",
 		"ImageMate SDDR55",
-		US_SC_SCSI, US_PR_SDDR55, NULL, 0),
+		USB_SC_SCSI, USB_PR_SDDR55, NULL, 0),
 
 #endif /* defined(CONFIG_USB_STORAGE_SDDR55) || ... */
diff --git a/drivers/usb/storage/unusual_usbat.h b/drivers/usb/storage/unusual_usbat.h
index 80e869f10180..38e79c4e6d6a 100644
--- a/drivers/usb/storage/unusual_usbat.h
+++ b/drivers/usb/storage/unusual_usbat.h
@@ -21,23 +21,23 @@
 UNUSUAL_DEV(  0x03f0, 0x0207, 0x0001, 0x0001,
 		"HP",
 		"CD-Writer+ 8200e",
-		US_SC_8070, US_PR_USBAT, init_usbat_cd, 0),
+		USB_SC_8070, USB_PR_USBAT, init_usbat_cd, 0),
 
 UNUSUAL_DEV(  0x03f0, 0x0307, 0x0001, 0x0001,
 		"HP",
 		"CD-Writer+ CD-4e",
-		US_SC_8070, US_PR_USBAT, init_usbat_cd, 0),
+		USB_SC_8070, USB_PR_USBAT, init_usbat_cd, 0),
 
 UNUSUAL_DEV(  0x04e6, 0x1010, 0x0000, 0x9999,
 		"Shuttle/SCM",
 		"USBAT-02",
-		US_SC_SCSI, US_PR_USBAT, init_usbat_flash,
+		USB_SC_SCSI, USB_PR_USBAT, init_usbat_flash,
 		US_FL_SINGLE_LUN),
 
 UNUSUAL_DEV(  0x0781, 0x0005, 0x0005, 0x0005,
 		"Sandisk",
 		"ImageMate SDDR-05b",
-		US_SC_SCSI, US_PR_USBAT, init_usbat_flash,
+		USB_SC_SCSI, USB_PR_USBAT, init_usbat_flash,
 		US_FL_SINGLE_LUN),
 
 #endif /* defined(CONFIG_USB_STORAGE_USBAT) || ... */
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 90bb0175a152..4219c197cb08 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -512,10 +512,10 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id,
 
 	/* Store the entries */
 	us->unusual_dev = unusual_dev;
-	us->subclass = (unusual_dev->useProtocol == US_SC_DEVICE) ?
+	us->subclass = (unusual_dev->useProtocol == USB_SC_DEVICE) ?
 			idesc->bInterfaceSubClass :
 			unusual_dev->useProtocol;
-	us->protocol = (unusual_dev->useTransport == US_PR_DEVICE) ?
+	us->protocol = (unusual_dev->useTransport == USB_PR_DEVICE) ?
 			idesc->bInterfaceProtocol :
 			unusual_dev->useTransport;
 	us->fflags = USB_US_ORIG_FLAGS(id->driver_info);
@@ -552,10 +552,10 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id,
 		struct usb_device_descriptor *ddesc = &dev->descriptor;
 		int msg = -1;
 
-		if (unusual_dev->useProtocol != US_SC_DEVICE &&
+		if (unusual_dev->useProtocol != USB_SC_DEVICE &&
 			us->subclass == idesc->bInterfaceSubClass)
 			msg += 1;
-		if (unusual_dev->useTransport != US_PR_DEVICE &&
+		if (unusual_dev->useTransport != USB_PR_DEVICE &&
 			us->protocol == idesc->bInterfaceProtocol)
 			msg += 2;
 		if (msg >= 0 && !(us->fflags & US_FL_NEED_OVERRIDE))
@@ -582,21 +582,21 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id,
 static void get_transport(struct us_data *us)
 {
 	switch (us->protocol) {
-	case US_PR_CB:
+	case USB_PR_CB:
 		us->transport_name = "Control/Bulk";
 		us->transport = usb_stor_CB_transport;
 		us->transport_reset = usb_stor_CB_reset;
 		us->max_lun = 7;
 		break;
 
-	case US_PR_CBI:
+	case USB_PR_CBI:
 		us->transport_name = "Control/Bulk/Interrupt";
 		us->transport = usb_stor_CB_transport;
 		us->transport_reset = usb_stor_CB_reset;
 		us->max_lun = 7;
 		break;
 
-	case US_PR_BULK:
+	case USB_PR_BULK:
 		us->transport_name = "Bulk";
 		us->transport = usb_stor_Bulk_transport;
 		us->transport_reset = usb_stor_Bulk_reset;
@@ -608,35 +608,35 @@ static void get_transport(struct us_data *us)
 static void get_protocol(struct us_data *us)
 {
 	switch (us->subclass) {
-	case US_SC_RBC:
+	case USB_SC_RBC:
 		us->protocol_name = "Reduced Block Commands (RBC)";
 		us->proto_handler = usb_stor_transparent_scsi_command;
 		break;
 
-	case US_SC_8020:
+	case USB_SC_8020:
 		us->protocol_name = "8020i";
 		us->proto_handler = usb_stor_pad12_command;
 		us->max_lun = 0;
 		break;
 
-	case US_SC_QIC:
+	case USB_SC_QIC:
 		us->protocol_name = "QIC-157";
 		us->proto_handler = usb_stor_pad12_command;
 		us->max_lun = 0;
 		break;
 
-	case US_SC_8070:
+	case USB_SC_8070:
 		us->protocol_name = "8070i";
 		us->proto_handler = usb_stor_pad12_command;
 		us->max_lun = 0;
 		break;
 
-	case US_SC_SCSI:
+	case USB_SC_SCSI:
 		us->protocol_name = "Transparent SCSI";
 		us->proto_handler = usb_stor_transparent_scsi_command;
 		break;
 
-	case US_SC_UFI:
+	case USB_SC_UFI:
 		us->protocol_name = "Uniform Floppy Interface (UFI)";
 		us->proto_handler = usb_stor_ufi_command;
 		break;
@@ -679,7 +679,7 @@ static int get_pipes(struct us_data *us)
 		}
 	}
 
-	if (!ep_in || !ep_out || (us->protocol == US_PR_CBI && !ep_int)) {
+	if (!ep_in || !ep_out || (us->protocol == USB_PR_CBI && !ep_int)) {
 		US_DEBUGP("Endpoint sanity check failed! Rejecting dev.\n");
 		return -EIO;
 	}
@@ -834,7 +834,7 @@ static int usb_stor_scan_thread(void * __us)
 	if (!test_bit(US_FLIDX_DONT_SCAN, &us->dflags)) {
 
 		/* For bulk-only devices, determine the max LUN value */
-		if (us->protocol == US_PR_BULK &&
+		if (us->protocol == USB_PR_BULK &&
 				!(us->fflags & US_FL_SINGLE_LUN)) {
 			mutex_lock(&us->dev_mutex);
 			us->max_lun = usb_stor_Bulk_max_lun(us);
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index a4b947e470a5..f387c436042e 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -81,35 +81,36 @@ enum { US_DO_ALL_FLAGS };
 
 /* Sub Classes */
 
-#define US_SC_RBC	0x01		/* Typically, flash devices */
-#define US_SC_8020	0x02		/* CD-ROM */
-#define US_SC_QIC	0x03		/* QIC-157 Tapes */
-#define US_SC_UFI	0x04		/* Floppy */
-#define US_SC_8070	0x05		/* Removable media */
-#define US_SC_SCSI	0x06		/* Transparent */
-#define US_SC_LOCKABLE	0x07		/* Password-protected */
-
-#define US_SC_ISD200    0xf0		/* ISD200 ATA */
-#define US_SC_CYP_ATACB 0xf1		/* Cypress ATACB */
-#define US_SC_DEVICE	0xff		/* Use device's value */
-
-/* Protocols */
-
-#define US_PR_CBI	0x00		/* Control/Bulk/Interrupt */
-#define US_PR_CB	0x01		/* Control/Bulk w/o interrupt */
-#define US_PR_BULK	0x50		/* bulk only */
-
-#define US_PR_USBAT	0x80		/* SCM-ATAPI bridge */
-#define US_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
-#define US_PR_SDDR55	0x82		/* SDDR-55 (made up) */
-#define US_PR_DPCM_USB  0xf0		/* Combination CB/SDDR09 */
-#define US_PR_FREECOM   0xf1		/* Freecom */
-#define US_PR_DATAFAB   0xf2		/* Datafab chipsets */
-#define US_PR_JUMPSHOT  0xf3		/* Lexar Jumpshot */
-#define US_PR_ALAUDA    0xf4		/* Alauda chipsets */
-#define US_PR_KARMA     0xf5		/* Rio Karma */
-
-#define US_PR_DEVICE	0xff		/* Use device's value */
+#define USB_SC_RBC	0x01		/* Typically, flash devices */
+#define USB_SC_8020	0x02		/* CD-ROM */
+#define USB_SC_QIC	0x03		/* QIC-157 Tapes */
+#define USB_SC_UFI	0x04		/* Floppy */
+#define USB_SC_8070	0x05		/* Removable media */
+#define USB_SC_SCSI	0x06		/* Transparent */
+#define USB_SC_LOCKABLE	0x07		/* Password-protected */
+
+#define USB_SC_ISD200	0xf0		/* ISD200 ATA */
+#define USB_SC_CYP_ATACB	0xf1	/* Cypress ATACB */
+#define USB_SC_DEVICE	0xff		/* Use device's value */
+
+/* Storage protocol codes */
+
+#define USB_PR_CBI	0x00		/* Control/Bulk/Interrupt */
+#define USB_PR_CB	0x01		/* Control/Bulk w/o interrupt */
+#define USB_PR_BULK	0x50		/* bulk only */
+#define USB_PR_UAS	0x62		/* USB Attached SCSI */
+
+#define USB_PR_USBAT	0x80		/* SCM-ATAPI bridge */
+#define USB_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
+#define USB_PR_SDDR55	0x82		/* SDDR-55 (made up) */
+#define USB_PR_DPCM_USB	0xf0		/* Combination CB/SDDR09 */
+#define USB_PR_FREECOM	0xf1		/* Freecom */
+#define USB_PR_DATAFAB	0xf2		/* Datafab chipsets */
+#define USB_PR_JUMPSHOT	0xf3		/* Lexar Jumpshot */
+#define USB_PR_ALAUDA	0xf4		/* Alauda chipsets */
+#define USB_PR_KARMA	0xf5		/* Rio Karma */
+
+#define USB_PR_DEVICE	0xff		/* Use device's value */
 
 /*
  */
-- 
cgit v1.2.3


From ae6d22fe1812ce8d40add3eb74ede9cfd2eae44f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 7 Oct 2010 13:05:22 +0200
Subject: USB: Move USB Storage definitions to their own header file

The libusual header file is hard to use from code that isn't part
of libusual.  As the comment suggests, these definitions are moved to
their own header file, paralleling other USB classes.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
[mina86@mina86.com: updated to use USB_ prefix and added #include guard]
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

index 0000000..d7fc910
---
 include/linux/usb/storage.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb_usual.h   | 38 +----------------------------------
 2 files changed, 49 insertions(+), 37 deletions(-)
 create mode 100644 include/linux/usb/storage.h

(limited to 'include')

diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h
new file mode 100644
index 000000000000..d7fc910f1dc4
--- /dev/null
+++ b/include/linux/usb/storage.h
@@ -0,0 +1,48 @@
+#ifndef __LINUX_USB_STORAGE_H
+#define __LINUX_USB_STORAGE_H
+
+/*
+ * linux/usb/storage.h
+ *
+ * Copyright Matthew Wilcox for Intel Corp, 2010
+ *
+ * This file contains definitions taken from the
+ * USB Mass Storage Class Specification Overview
+ *
+ * Distributed under the terms of the GNU GPL, version two.
+ */
+
+/* Storage subclass codes */
+
+#define USB_SC_RBC	0x01		/* Typically, flash devices */
+#define USB_SC_8020	0x02		/* CD-ROM */
+#define USB_SC_QIC	0x03		/* QIC-157 Tapes */
+#define USB_SC_UFI	0x04		/* Floppy */
+#define USB_SC_8070	0x05		/* Removable media */
+#define USB_SC_SCSI	0x06		/* Transparent */
+#define USB_SC_LOCKABLE	0x07		/* Password-protected */
+
+#define USB_SC_ISD200	0xf0		/* ISD200 ATA */
+#define USB_SC_CYP_ATACB	0xf1	/* Cypress ATACB */
+#define USB_SC_DEVICE	0xff		/* Use device's value */
+
+/* Storage protocol codes */
+
+#define USB_PR_CBI	0x00		/* Control/Bulk/Interrupt */
+#define USB_PR_CB	0x01		/* Control/Bulk w/o interrupt */
+#define USB_PR_BULK	0x50		/* bulk only */
+#define USB_PR_UAS	0x62		/* USB Attached SCSI */
+
+#define USB_PR_USBAT	0x80		/* SCM-ATAPI bridge */
+#define USB_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
+#define USB_PR_SDDR55	0x82		/* SDDR-55 (made up) */
+#define USB_PR_DPCM_USB	0xf0		/* Combination CB/SDDR09 */
+#define USB_PR_FREECOM	0xf1		/* Freecom */
+#define USB_PR_DATAFAB	0xf2		/* Datafab chipsets */
+#define USB_PR_JUMPSHOT	0xf3		/* Lexar Jumpshot */
+#define USB_PR_ALAUDA	0xf4		/* Alauda chipsets */
+#define USB_PR_KARMA	0xf5		/* Rio Karma */
+
+#define USB_PR_DEVICE	0xff		/* Use device's value */
+
+#endif
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index f387c436042e..f091dc6e5a00 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -74,43 +74,7 @@ enum { US_DO_ALL_FLAGS };
 #define USB_US_TYPE(flags) 		(((flags) >> 24) & 0xFF)
 #define USB_US_ORIG_FLAGS(flags)	((flags) & 0x00FFFFFF)
 
-/*
- * This is probably not the best place to keep these constants, conceptually.
- * But it's the only header included into all places which need them.
- */
-
-/* Sub Classes */
-
-#define USB_SC_RBC	0x01		/* Typically, flash devices */
-#define USB_SC_8020	0x02		/* CD-ROM */
-#define USB_SC_QIC	0x03		/* QIC-157 Tapes */
-#define USB_SC_UFI	0x04		/* Floppy */
-#define USB_SC_8070	0x05		/* Removable media */
-#define USB_SC_SCSI	0x06		/* Transparent */
-#define USB_SC_LOCKABLE	0x07		/* Password-protected */
-
-#define USB_SC_ISD200	0xf0		/* ISD200 ATA */
-#define USB_SC_CYP_ATACB	0xf1	/* Cypress ATACB */
-#define USB_SC_DEVICE	0xff		/* Use device's value */
-
-/* Storage protocol codes */
-
-#define USB_PR_CBI	0x00		/* Control/Bulk/Interrupt */
-#define USB_PR_CB	0x01		/* Control/Bulk w/o interrupt */
-#define USB_PR_BULK	0x50		/* bulk only */
-#define USB_PR_UAS	0x62		/* USB Attached SCSI */
-
-#define USB_PR_USBAT	0x80		/* SCM-ATAPI bridge */
-#define USB_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
-#define USB_PR_SDDR55	0x82		/* SDDR-55 (made up) */
-#define USB_PR_DPCM_USB	0xf0		/* Combination CB/SDDR09 */
-#define USB_PR_FREECOM	0xf1		/* Freecom */
-#define USB_PR_DATAFAB	0xf2		/* Datafab chipsets */
-#define USB_PR_JUMPSHOT	0xf3		/* Lexar Jumpshot */
-#define USB_PR_ALAUDA	0xf4		/* Alauda chipsets */
-#define USB_PR_KARMA	0xf5		/* Rio Karma */
-
-#define USB_PR_DEVICE	0xff		/* Use device's value */
+#include <linux/usb/storage.h>
 
 /*
  */
-- 
cgit v1.2.3


From 496dda704bca1208e08773ba39b29a69536f5381 Mon Sep 17 00:00:00 2001
From: Maulik Mankad <x0082077@ti.com>
Date: Fri, 24 Sep 2010 13:44:06 +0300
Subject: usb: musb: host: unmap the buffer for PIO data transfers

The USB stack maps the buffer for DMA if the controller supports DMA.
MUSB controller can perform DMA as well as PIO transfers.
The buffer needs to be unmapped before CPU can perform
PIO data transfers.

Export unmap_urb_for_dma() so that drivers can perform
the DMA unmapping in a sane way.

Signed-off-by: Maulik Mankad <x0082077@ti.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c       | 3 ++-
 drivers/usb/musb/musb_host.c | 5 +++++
 include/linux/usb/hcd.h      | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 5cca00a6d09d..cb2d894321da 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1263,7 +1263,7 @@ static void hcd_free_coherent(struct usb_bus *bus, dma_addr_t *dma_handle,
 	*dma_handle = 0;
 }
 
-static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 {
 	enum dma_data_direction dir;
 
@@ -1307,6 +1307,7 @@ static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 			URB_DMA_MAP_SG | URB_DMA_MAP_PAGE |
 			URB_DMA_MAP_SINGLE | URB_MAP_LOCAL);
 }
+EXPORT_SYMBOL_GPL(unmap_urb_for_dma);
 
 static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 			   gfp_t mem_flags)
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 9e65c47cc98b..62e39fc57211 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -41,6 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/dma-mapping.h>
 
 #include "musb_core.h"
 #include "musb_host.h"
@@ -1332,6 +1333,8 @@ void musb_host_tx(struct musb *musb, u8 epnum)
 	 */
 	if (length > qh->maxpacket)
 		length = qh->maxpacket;
+	/* Unmap the buffer so that CPU can use it */
+	unmap_urb_for_dma(musb_to_hcd(musb), urb);
 	musb_write_fifo(hw_ep, length, urb->transfer_buffer + offset);
 	qh->segsize = length;
 
@@ -1752,6 +1755,8 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 #endif	/* Mentor DMA */
 
 		if (!dma) {
+			/* Unmap the buffer so that CPU can use it */
+			unmap_urb_for_dma(musb_to_hcd(musb), urb);
 			done = musb_host_packet_rx(musb, urb,
 					epnum, iso_err);
 			DBG(6, "read %spacket\n", done ? "last " : "");
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 3b571f1ffbb3..fe89f7c298aa 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -329,6 +329,7 @@ extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags);
 extern int usb_hcd_unlink_urb(struct urb *urb, int status);
 extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb,
 		int status);
+extern void unmap_urb_for_dma(struct usb_hcd *, struct urb *);
 extern void usb_hcd_flush_endpoint(struct usb_device *udev,
 		struct usb_host_endpoint *ep);
 extern void usb_hcd_disable_endpoint(struct usb_device *udev,
-- 
cgit v1.2.3


From 748eee0986f0d51c7bc39f194d515a8d8248ebdd Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Mon, 27 Sep 2010 15:17:18 +0300
Subject: USB: Add more empty functions in otg.h

Add empty functions for get/put transceiver functions too, so that
drivers that optionally use them can call them without worrying that
they might not exist, eliminating ifdefs.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 545cba73ccaf..0a5b3711e502 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -164,8 +164,19 @@ otg_shutdown(struct otg_transceiver *otg)
 }
 
 /* for usb host and peripheral controller drivers */
+#ifdef CONFIG_USB_OTG_UTILS
 extern struct otg_transceiver *otg_get_transceiver(void);
 extern void otg_put_transceiver(struct otg_transceiver *);
+#else
+static inline struct otg_transceiver *otg_get_transceiver(void)
+{
+	return NULL;
+}
+
+static inline void otg_put_transceiver(struct otg_transceiver *x)
+{
+}
+#endif
 
 /* Context: can sleep */
 static inline int
-- 
cgit v1.2.3


From 230f7ede6c2f0e403f29e03e0251a470aa9350dd Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Tue, 28 Sep 2010 20:55:21 +0200
Subject: USB: add USB EHCI support for MPC5121 SoC

Extends FSL EHCI platform driver glue layer to support
MPC5121 USB controllers. MPC5121 Rev 2.0 silicon EHCI
registers are in big endian format. The appropriate flags
are set using the information in the platform data structure.
MPC83xx system interface registers are not available on
MPC512x, so the access to these registers is isolated in
MPC512x case. Furthermore the USB controller clocks
must be enabled before 512x register accesses which is
done by providing platform specific init callback.

The MPC512x internal USB PHY doesn't provide supply voltage.
For boards using different power switches allow specifying
DRVVBUS and PWR_FAULT signal polarity of the MPC5121 internal
PHY using "fsl,invert-drvvbus" and "fsl,invert-pwr-fault"
properties in the device tree USB nodes. Adds documentation
for this new device tree bindings.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/powerpc/dts-bindings/fsl/usb.txt | 22 ++++++
 drivers/usb/Kconfig                            |  1 +
 drivers/usb/host/Kconfig                       |  6 +-
 drivers/usb/host/ehci-fsl.c                    | 99 ++++++++++++++++++--------
 drivers/usb/host/ehci-fsl.h                    | 13 +++-
 drivers/usb/host/ehci-mem.c                    |  2 +-
 drivers/usb/host/fsl-mph-dr-of.c               | 89 +++++++++++++++++++++++
 include/linux/fsl_devices.h                    | 15 ++++
 8 files changed, 215 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/Documentation/powerpc/dts-bindings/fsl/usb.txt b/Documentation/powerpc/dts-bindings/fsl/usb.txt
index b00152402694..bd5723f0b67e 100644
--- a/Documentation/powerpc/dts-bindings/fsl/usb.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/usb.txt
@@ -8,6 +8,7 @@ and additions :
 Required properties :
  - compatible : Should be "fsl-usb2-mph" for multi port host USB
    controllers, or "fsl-usb2-dr" for dual role USB controllers
+   or "fsl,mpc5121-usb2-dr" for dual role USB controllers of MPC5121
  - phy_type : For multi port host USB controllers, should be one of
    "ulpi", or "serial". For dual role USB controllers, should be
    one of "ulpi", "utmi", "utmi_wide", or "serial".
@@ -33,6 +34,12 @@ Recommended properties :
  - interrupt-parent : the phandle for the interrupt controller that
    services interrupts for this device.
 
+Optional properties :
+ - fsl,invert-drvvbus : boolean; for MPC5121 USB0 only. Indicates the
+   port power polarity of internal PHY signal DRVVBUS is inverted.
+ - fsl,invert-pwr-fault : boolean; for MPC5121 USB0 only. Indicates
+   the PWR_FAULT signal polarity is inverted.
+
 Example multi port host USB controller device node :
 	usb@22000 {
 		compatible = "fsl-usb2-mph";
@@ -57,3 +64,18 @@ Example dual role USB controller device node :
 		dr_mode = "otg";
 		phy = "ulpi";
 	};
+
+Example dual role USB controller device node for MPC5121ADS:
+
+	usb@4000 {
+		compatible = "fsl,mpc5121-usb2-dr";
+		reg = <0x4000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		interrupt-parent = < &ipic >;
+		interrupts = <44 0x8>;
+		dr_mode = "otg";
+		phy_type = "utmi_wide";
+		fsl,invert-drvvbus;
+		fsl,invert-pwr-fault;
+	};
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 4aa00e6e57ad..67eb3770868f 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -59,6 +59,7 @@ config USB_ARCH_HAS_OHCI
 config USB_ARCH_HAS_EHCI
 	boolean
 	default y if PPC_83xx
+	default y if PPC_MPC512x
 	default y if SOC_AU1200
 	default y if ARCH_IXP4XX
 	default y if ARCH_W90X900
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index f3a90b0fc422..bf2e7d234533 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -93,12 +93,14 @@ config USB_EHCI_TT_NEWSCHED
 
 config USB_EHCI_BIG_ENDIAN_MMIO
 	bool
-	depends on USB_EHCI_HCD && (PPC_CELLEB || PPC_PS3 || 440EPX || ARCH_IXP4XX || XPS_USB_HCD_XILINX)
+	depends on USB_EHCI_HCD && (PPC_CELLEB || PPC_PS3 || 440EPX || ARCH_IXP4XX || \
+				    XPS_USB_HCD_XILINX || PPC_MPC512x)
 	default y
 
 config USB_EHCI_BIG_ENDIAN_DESC
 	bool
-	depends on USB_EHCI_HCD && (440EPX || ARCH_IXP4XX || XPS_USB_HCD_XILINX)
+	depends on USB_EHCI_HCD && (440EPX || ARCH_IXP4XX || XPS_USB_HCD_XILINX || \
+				    PPC_MPC512x)
 	default y
 
 config XPS_USB_HCD_XILINX
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 8600317bd60b..86e42892016d 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -116,13 +116,33 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 		goto err3;
 	}
 
-	/* Enable USB controller */
-	temp = in_be32(hcd->regs + 0x500);
-	out_be32(hcd->regs + 0x500, temp | 0x4);
+	pdata->regs = hcd->regs;
 
-	/* Set to Host mode */
-	temp = in_le32(hcd->regs + 0x1a8);
-	out_le32(hcd->regs + 0x1a8, temp | 0x3);
+	/*
+	 * do platform specific init: check the clock, grab/config pins, etc.
+	 */
+	if (pdata->init && pdata->init(pdev)) {
+		retval = -ENODEV;
+		goto err3;
+	}
+
+	/*
+	 * Check if it is MPC5121 SoC, otherwise set pdata->have_sysif_regs
+	 * flag for 83xx or 8536 system interface registers.
+	 */
+	if (pdata->big_endian_mmio)
+		temp = in_be32(hcd->regs + FSL_SOC_USB_ID);
+	else
+		temp = in_le32(hcd->regs + FSL_SOC_USB_ID);
+
+	if ((temp & ID_MSK) != (~((temp & NID_MSK) >> 8) & ID_MSK))
+		pdata->have_sysif_regs = 1;
+
+	/* Enable USB controller, 83xx or 8536 */
+	if (pdata->have_sysif_regs)
+		setbits32(hcd->regs + FSL_SOC_USB_CTRL, 0x4);
+
+	/* Don't need to set host mode here. It will be done by tdi_reset() */
 
 	retval = usb_add_hcd(hcd, irq, IRQF_DISABLED | IRQF_SHARED);
 	if (retval != 0)
@@ -137,6 +157,8 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 	usb_put_hcd(hcd);
       err1:
 	dev_err(&pdev->dev, "init %s fail, %d\n", dev_name(&pdev->dev), retval);
+	if (pdata->exit)
+		pdata->exit(pdev);
 	return retval;
 }
 
@@ -154,17 +176,30 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 static void usb_hcd_fsl_remove(struct usb_hcd *hcd,
 			       struct platform_device *pdev)
 {
+	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+
 	usb_remove_hcd(hcd);
+
+	/*
+	 * do platform specific un-initialization:
+	 * release iomux pins, disable clock, etc.
+	 */
+	if (pdata->exit)
+		pdata->exit(pdev);
 	iounmap(hcd->regs);
 	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
 	usb_put_hcd(hcd);
 }
 
-static void mpc83xx_setup_phy(struct ehci_hcd *ehci,
-			      enum fsl_usb2_phy_modes phy_mode,
-			      unsigned int port_offset)
+static void ehci_fsl_setup_phy(struct ehci_hcd *ehci,
+			       enum fsl_usb2_phy_modes phy_mode,
+			       unsigned int port_offset)
 {
-	u32 portsc = 0;
+	u32 portsc;
+
+	portsc = ehci_readl(ehci, &ehci->regs->port_status[port_offset]);
+	portsc &= ~(PORT_PTS_MSK | PORT_PTS_PTW);
+
 	switch (phy_mode) {
 	case FSL_USB2_PHY_ULPI:
 		portsc |= PORT_PTS_ULPI;
@@ -184,20 +219,21 @@ static void mpc83xx_setup_phy(struct ehci_hcd *ehci,
 	ehci_writel(ehci, portsc, &ehci->regs->port_status[port_offset]);
 }
 
-static void mpc83xx_usb_setup(struct usb_hcd *hcd)
+static void ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 {
-	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
+	struct usb_hcd *hcd = ehci_to_hcd(ehci);
 	struct fsl_usb2_platform_data *pdata;
 	void __iomem *non_ehci = hcd->regs;
 	u32 temp;
 
-	pdata =
-	    (struct fsl_usb2_platform_data *)hcd->self.controller->
-	    platform_data;
+	pdata = hcd->self.controller->platform_data;
+
 	/* Enable PHY interface in the control reg. */
-	temp = in_be32(non_ehci + FSL_SOC_USB_CTRL);
-	out_be32(non_ehci + FSL_SOC_USB_CTRL, temp | 0x00000004);
-	out_be32(non_ehci + FSL_SOC_USB_SNOOP1, 0x0000001b);
+	if (pdata->have_sysif_regs) {
+		temp = in_be32(non_ehci + FSL_SOC_USB_CTRL);
+		out_be32(non_ehci + FSL_SOC_USB_CTRL, temp | 0x00000004);
+		out_be32(non_ehci + FSL_SOC_USB_SNOOP1, 0x0000001b);
+	}
 
 #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE)
 	/*
@@ -214,7 +250,7 @@ static void mpc83xx_usb_setup(struct usb_hcd *hcd)
 
 	if ((pdata->operating_mode == FSL_USB2_DR_HOST) ||
 			(pdata->operating_mode == FSL_USB2_DR_OTG))
-		mpc83xx_setup_phy(ehci, pdata->phy_mode, 0);
+		ehci_fsl_setup_phy(ehci, pdata->phy_mode, 0);
 
 	if (pdata->operating_mode == FSL_USB2_MPH_HOST) {
 		unsigned int chip, rev, svr;
@@ -228,25 +264,27 @@ static void mpc83xx_usb_setup(struct usb_hcd *hcd)
 			ehci->has_fsl_port_bug = 1;
 
 		if (pdata->port_enables & FSL_USB2_PORT0_ENABLED)
-			mpc83xx_setup_phy(ehci, pdata->phy_mode, 0);
+			ehci_fsl_setup_phy(ehci, pdata->phy_mode, 0);
 		if (pdata->port_enables & FSL_USB2_PORT1_ENABLED)
-			mpc83xx_setup_phy(ehci, pdata->phy_mode, 1);
+			ehci_fsl_setup_phy(ehci, pdata->phy_mode, 1);
 	}
 
+	if (pdata->have_sysif_regs) {
 #ifdef CONFIG_PPC_85xx
-	out_be32(non_ehci + FSL_SOC_USB_PRICTRL, 0x00000008);
-	out_be32(non_ehci + FSL_SOC_USB_AGECNTTHRSH, 0x00000080);
+		out_be32(non_ehci + FSL_SOC_USB_PRICTRL, 0x00000008);
+		out_be32(non_ehci + FSL_SOC_USB_AGECNTTHRSH, 0x00000080);
 #else
-	out_be32(non_ehci + FSL_SOC_USB_PRICTRL, 0x0000000c);
-	out_be32(non_ehci + FSL_SOC_USB_AGECNTTHRSH, 0x00000040);
+		out_be32(non_ehci + FSL_SOC_USB_PRICTRL, 0x0000000c);
+		out_be32(non_ehci + FSL_SOC_USB_AGECNTTHRSH, 0x00000040);
 #endif
-	out_be32(non_ehci + FSL_SOC_USB_SICTRL, 0x00000001);
+		out_be32(non_ehci + FSL_SOC_USB_SICTRL, 0x00000001);
+	}
 }
 
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_fsl_reinit(struct ehci_hcd *ehci)
 {
-	mpc83xx_usb_setup(ehci_to_hcd(ehci));
+	ehci_fsl_usb_setup(ehci);
 	ehci_port_power(ehci, 0);
 
 	return 0;
@@ -257,6 +295,11 @@ static int ehci_fsl_setup(struct usb_hcd *hcd)
 {
 	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	int retval;
+	struct fsl_usb2_platform_data *pdata;
+
+	pdata = hcd->self.controller->platform_data;
+	ehci->big_endian_desc = pdata->big_endian_desc;
+	ehci->big_endian_mmio = pdata->big_endian_mmio;
 
 	/* EHCI registers start at offset 0x100 */
 	ehci->caps = hcd->regs + 0x100;
@@ -370,7 +413,7 @@ static const struct hc_driver ehci_fsl_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq = ehci_irq,
-	.flags = HCD_USB2,
+	.flags = HCD_USB2 | HCD_MEMORY,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-fsl.h b/drivers/usb/host/ehci-fsl.h
index eb537aa54610..2c8353795226 100644
--- a/drivers/usb/host/ehci-fsl.h
+++ b/drivers/usb/host/ehci-fsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2005 freescale semiconductor
+/* Copyright (C) 2005-2010 Freescale Semiconductor, Inc.
  * Copyright (c) 2005 MontaVista Software
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -19,6 +19,9 @@
 #define _EHCI_FSL_H
 
 /* offsets for the non-ehci registers in the FSL SOC USB controller */
+#define FSL_SOC_USB_ID		0x0
+#define ID_MSK			0x3f
+#define NID_MSK			0x3f00
 #define FSL_SOC_USB_ULPIVP	0x170
 #define FSL_SOC_USB_PORTSC1	0x184
 #define PORT_PTS_MSK		(3<<30)
@@ -27,6 +30,14 @@
 #define	PORT_PTS_SERIAL		(3<<30)
 #define PORT_PTS_PTW		(1<<28)
 #define FSL_SOC_USB_PORTSC2	0x188
+
+#define FSL_SOC_USB_USBGENCTRL	0x200
+#define USBGENCTRL_PPP		(1 << 3)
+#define USBGENCTRL_PFP		(1 << 2)
+#define FSL_SOC_USB_ISIPHYCTRL	0x204
+#define ISIPHYCTRL_PXE		(1)
+#define ISIPHYCTRL_PHYE		(1 << 4)
+
 #define FSL_SOC_USB_SNOOP1	0x400	/* NOTE: big-endian */
 #define FSL_SOC_USB_SNOOP2	0x404	/* NOTE: big-endian */
 #define FSL_SOC_USB_AGECNTTHRSH	0x408	/* NOTE: big-endian */
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index 1f3f01eacaf0..d36e4e75e08d 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -40,7 +40,7 @@ static inline void ehci_qtd_init(struct ehci_hcd *ehci, struct ehci_qtd *qtd,
 {
 	memset (qtd, 0, sizeof *qtd);
 	qtd->qtd_dma = dma;
-	qtd->hw_token = cpu_to_le32 (QTD_STS_HALT);
+	qtd->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
 	qtd->hw_next = EHCI_LIST_END(ehci);
 	qtd->hw_alt_next = EHCI_LIST_END(ehci);
 	INIT_LIST_HEAD (&qtd->qtd_list);
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index 12db5d5cb0bc..574b99ea0700 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of_platform.h>
+#include <linux/clk.h>
 
 struct fsl_usb2_dev_data {
 	char *dr_mode;		/* controller mode */
@@ -153,6 +154,12 @@ static int __devinit fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev)
 
 		pdata->operating_mode = FSL_USB2_MPH_HOST;
 	} else {
+		if (of_get_property(np, "fsl,invert-drvvbus", NULL))
+			pdata->invert_drvvbus = 1;
+
+		if (of_get_property(np, "fsl,invert-pwr-fault", NULL))
+			pdata->invert_pwr_fault = 1;
+
 		/* setup mode selected in the device tree */
 		pdata->operating_mode = dev_data->op_mode;
 	}
@@ -186,9 +193,91 @@ static int __devexit fsl_usb2_mph_dr_of_remove(struct platform_device *ofdev)
 	return 0;
 }
 
+#ifdef CONFIG_PPC_MPC512x
+
+#define USBGENCTRL		0x200		/* NOTE: big endian */
+#define GC_WU_INT_CLR		(1 << 5)	/* Wakeup int clear */
+#define GC_ULPI_SEL		(1 << 4)	/* ULPI i/f select (usb0 only)*/
+#define GC_PPP			(1 << 3)	/* Inv. Port Power Polarity */
+#define GC_PFP			(1 << 2)	/* Inv. Power Fault Polarity */
+#define GC_WU_ULPI_EN		(1 << 1)	/* Wakeup on ULPI event */
+#define GC_WU_IE		(1 << 1)	/* Wakeup interrupt enable */
+
+#define ISIPHYCTRL		0x204		/* NOTE: big endian */
+#define PHYCTRL_PHYE		(1 << 4)	/* On-chip UTMI PHY enable */
+#define PHYCTRL_BSENH		(1 << 3)	/* Bit Stuff Enable High */
+#define PHYCTRL_BSEN		(1 << 2)	/* Bit Stuff Enable */
+#define PHYCTRL_LSFE		(1 << 1)	/* Line State Filter Enable */
+#define PHYCTRL_PXE		(1 << 0)	/* PHY oscillator enable */
+
+int fsl_usb2_mpc5121_init(struct platform_device *pdev)
+{
+	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+	struct clk *clk;
+	char clk_name[10];
+	int base, clk_num;
+
+	base = pdev->resource->start & 0xf000;
+	if (base == 0x3000)
+		clk_num = 1;
+	else if (base == 0x4000)
+		clk_num = 2;
+	else
+		return -ENODEV;
+
+	snprintf(clk_name, sizeof(clk_name), "usb%d_clk", clk_num);
+	clk = clk_get(&pdev->dev, clk_name);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get clk\n");
+		return PTR_ERR(clk);
+	}
+
+	clk_enable(clk);
+	pdata->clk = clk;
+
+	if (pdata->phy_mode == FSL_USB2_PHY_UTMI_WIDE) {
+		u32 reg = 0;
+
+		if (pdata->invert_drvvbus)
+			reg |= GC_PPP;
+
+		if (pdata->invert_pwr_fault)
+			reg |= GC_PFP;
+
+		out_be32(pdata->regs + ISIPHYCTRL, PHYCTRL_PHYE | PHYCTRL_PXE);
+		out_be32(pdata->regs + USBGENCTRL, reg);
+	}
+	return 0;
+}
+
+static void fsl_usb2_mpc5121_exit(struct platform_device *pdev)
+{
+	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+
+	pdata->regs = NULL;
+
+	if (pdata->clk) {
+		clk_disable(pdata->clk);
+		clk_put(pdata->clk);
+	}
+}
+
+struct fsl_usb2_platform_data fsl_usb2_mpc5121_pd = {
+	.big_endian_desc = 1,
+	.big_endian_mmio = 1,
+	.es = 1,
+	.le_setup_buf = 1,
+	.init = fsl_usb2_mpc5121_init,
+	.exit = fsl_usb2_mpc5121_exit,
+};
+#endif /* CONFIG_PPC_MPC512x */
+
 static const struct of_device_id fsl_usb2_mph_dr_of_match[] = {
 	{ .compatible = "fsl-usb2-mph", },
 	{ .compatible = "fsl-usb2-dr", },
+#ifdef CONFIG_PPC_MPC512x
+	{ .compatible = "fsl,mpc5121-usb2-dr", .data = &fsl_usb2_mpc5121_pd, },
+#endif
 	{},
 };
 
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 28e33fea5107..d5f9a7431bd0 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -58,11 +58,26 @@ enum fsl_usb2_phy_modes {
 	FSL_USB2_PHY_SERIAL,
 };
 
+struct clk;
+struct platform_device;
+
 struct fsl_usb2_platform_data {
 	/* board specific information */
 	enum fsl_usb2_operating_modes	operating_mode;
 	enum fsl_usb2_phy_modes		phy_mode;
 	unsigned int			port_enables;
+
+	int		(*init)(struct platform_device *);
+	void		(*exit)(struct platform_device *);
+	void __iomem	*regs;		/* ioremap'd register base */
+	struct clk	*clk;
+	unsigned	big_endian_mmio:1;
+	unsigned	big_endian_desc:1;
+	unsigned	es:1;		/* need USBMODE:ES */
+	unsigned	le_setup_buf:1;
+	unsigned	have_sysif_regs:1;
+	unsigned	invert_drvvbus:1;
+	unsigned	invert_pwr_fault:1;
 };
 
 /* Flags in fsl_usb2_mph_platform_data */
-- 
cgit v1.2.3


From 1dae423dd9b247b048eda00cb598c755e5933213 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Fri, 1 Oct 2010 00:21:55 +0200
Subject: USB: introduce unmap_urb_setup_for_dma()

Split unmap_urb_for_dma() to allow just the setup buffer
to be unmapped. This allows HCDs to use PIO for the setup
buffer if it is not suitable for DMA.

Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c  | 18 +++++++++++++-----
 include/linux/usb/hcd.h |  1 +
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index cb2d894321da..61800f77dac8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1263,10 +1263,8 @@ static void hcd_free_coherent(struct usb_bus *bus, dma_addr_t *dma_handle,
 	*dma_handle = 0;
 }
 
-void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+void unmap_urb_setup_for_dma(struct usb_hcd *hcd, struct urb *urb)
 {
-	enum dma_data_direction dir;
-
 	if (urb->transfer_flags & URB_SETUP_MAP_SINGLE)
 		dma_unmap_single(hcd->self.controller,
 				urb->setup_dma,
@@ -1279,6 +1277,17 @@ void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 				sizeof(struct usb_ctrlrequest),
 				DMA_TO_DEVICE);
 
+	/* Make it safe to call this routine more than once */
+	urb->transfer_flags &= ~(URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL);
+}
+EXPORT_SYMBOL_GPL(unmap_urb_setup_for_dma);
+
+void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+	enum dma_data_direction dir;
+
+	unmap_urb_setup_for_dma(hcd, urb);
+
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 	if (urb->transfer_flags & URB_DMA_MAP_SG)
 		dma_unmap_sg(hcd->self.controller,
@@ -1303,8 +1312,7 @@ void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 				dir);
 
 	/* Make it safe to call this routine more than once */
-	urb->transfer_flags &= ~(URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
-			URB_DMA_MAP_SG | URB_DMA_MAP_PAGE |
+	urb->transfer_flags &= ~(URB_DMA_MAP_SG | URB_DMA_MAP_PAGE |
 			URB_DMA_MAP_SINGLE | URB_MAP_LOCAL);
 }
 EXPORT_SYMBOL_GPL(unmap_urb_for_dma);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index fe89f7c298aa..0b6e751ea0b1 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -329,6 +329,7 @@ extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags);
 extern int usb_hcd_unlink_urb(struct urb *urb, int status);
 extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb,
 		int status);
+extern void unmap_urb_setup_for_dma(struct usb_hcd *, struct urb *);
 extern void unmap_urb_for_dma(struct usb_hcd *, struct urb *);
 extern void usb_hcd_flush_endpoint(struct usb_device *udev,
 		struct usb_host_endpoint *ep);
-- 
cgit v1.2.3


From 8e04d8056c1ea0e0aab730994b74756f0526cda8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 1 Oct 2010 14:20:08 -0700
Subject: scsi/sr: add no_read_disc_info scsi_device flag

Some USB devices emulate a usb-mass-storage attached (scsi) cdrom device,
usually this fake cdrom contains the windows software for the device.
While working on supporting Appotech ax3003 based photoframes, which do
this I discovered that they will go of into lala land when ever they see a
READ_DISC_INFO scsi command.

Thus this patch adds a scsi_device flag (which can then be set by the
usb-storage driver through an unsual-devs entry), to indicate this, and
makes the sr driver honor this flag.

I know this sucks, but as discussed on linux-scsi list there is no other
way to make this device work properly.

Looking at usb traces made under windows, windows never sends a
READ_DISC_INFO during normal interactions with a usb cdrom device.  So as
this cdrom emulation thingie becomes more common we might see more of this
problem.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/scsi/sr.c          | 8 +++++++-
 include/scsi/scsi_device.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index ba9c3e0387ce..b811dd0eb240 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -862,10 +862,16 @@ static void get_capabilities(struct scsi_cd *cd)
 static int sr_packet(struct cdrom_device_info *cdi,
 		struct packet_command *cgc)
 {
+	struct scsi_cd *cd = cdi->handle;
+	struct scsi_device *sdev = cd->device;
+
+	if (cgc->cmd[0] == GPCMD_READ_DISC_INFO && sdev->no_read_disc_info)
+		return -EDRIVE_CANT_DO_THIS;
+
 	if (cgc->timeout <= 0)
 		cgc->timeout = IOCTL_TIMEOUT;
 
-	sr_do_ioctl(cdi->handle, cgc);
+	sr_do_ioctl(cd, cgc);
 
 	return cgc->stat;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 50cb34ffef11..e8c2433ad8a8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -148,6 +148,7 @@ struct scsi_device {
 	unsigned retry_hwerror:1;	/* Retry HARDWARE_ERROR */
 	unsigned last_sector_bug:1;	/* do not use multisector accesses on
 					   SD_LAST_BUGGY_SECTORS */
+	unsigned no_read_disc_info:1;	/* Avoid READ_DISC_INFO cmds */
 	unsigned is_visible:1;	/* is the device visible in sysfs */
 
 	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
-- 
cgit v1.2.3


From ae38c78a03e1b77ad45248fcf097e4568e740209 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 1 Oct 2010 14:20:10 -0700
Subject: usb-storage: add new no_read_disc_info quirk

Appotech ax3003 (the larger brother of the ax203) based devices are even
more buggy then the ax203.  They will go of into lala land when ever they
see a READ_DISC_INFO scsi command.  So add a new US_FL which tells the
scsi sr driver to not issue any READ_DISC_INFO scsi commands.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/scsiglue.c     | 4 ++++
 drivers/usb/storage/unusual_devs.h | 5 +++++
 include/linux/usb_usual.h          | 4 +++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index e80362d148f3..a1128ff5cc2c 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -253,6 +253,10 @@ static int slave_configure(struct scsi_device *sdev)
 		 * or to force 192-byte transfer lengths for MODE SENSE.
 		 * But they do need to use MODE SENSE(10). */
 		sdev->use_10_for_ms = 1;
+
+		/* Some (fake) usb cdrom devices don't like READ_DISC_INFO */
+		if (us->fflags & US_FL_NO_READ_DISC_INFO)
+			sdev->no_read_disc_info = 1;
 	}
 
 	/* The CB and CBI transports have no way to pass LUN values
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index f43314a22ba4..c8264ff5457e 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1858,6 +1858,11 @@ UNUSUAL_DEV( 0x1908, 0x1320, 0x0000, 0x0000,
 		"Photo Frame",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BAD_SENSE ),
+UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200,
+		"BUILDWIN",
+		"Photo Frame",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_READ_DISC_INFO ),
 
 UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001,
 		"ST",
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index f091dc6e5a00..e62e9fe08883 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -58,7 +58,9 @@
 	US_FLAG(CAPACITY_OK,	0x00010000)			\
 		/* READ CAPACITY response is correct */		\
 	US_FLAG(BAD_SENSE,	0x00020000)			\
-		/* Bad Sense (never more than 18 bytes) */
+		/* Bad Sense (never more than 18 bytes) */	\
+	US_FLAG(NO_READ_DISC_INFO,	0x00040000)		\
+		/* cannot handle READ_DISC_INFO */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 5ce524bdff367b4abda20bcfd4dafd9d30c773df Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 1 Oct 2010 14:20:10 -0700
Subject: scsi/sd: add a no_read_capacity_16 scsi_device flag

I seem to have a knack for digging up buggy usb devices which don't work
with Linux, and I'm crazy enough to try to make them work.  So this time a
friend of mine asked me to get an mp4 player (an mp3 player which can play
videos on a small screen) to work with Linux.

It is based on the well known rockbox chipset for which we already have an
unusual devs entries to work around some of its bugs.  But this model
comes with an additional twist.

This model chokes on read_capacity_16 calls.  Now normally we don't make
those calls, but this model comes with an sdcard slot and when there is no
card in there (and shipped from the factory there is none), it reports a
size of 0.  However this time the programmers actually got the
read_capacity_10 response right!  So they substract one from the size as
stored internally in the mp3 player before reporting it back, resulting in
an answer of ...  0xffffffff sectors, causing sd.c to try a
read_capacity_16, on which the device crashes.

This patch adds a flag to scsi_device to indicate that a a device cannot
handle read_capacity_16, and when this flag is set if a device reports an
lba of 0xffffffff as answer to a read_capacity_10, assumes it tries to
report a size of 0.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/scsi/sd.c          | 12 ++++++++++++
 include/scsi/scsi_device.h |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ffa0689ee840..ff4c9a3aa5b2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1498,6 +1498,9 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 	unsigned long long lba;
 	unsigned sector_size;
 
+	if (sdp->no_read_capacity_16)
+		return -EINVAL;
+
 	do {
 		memset(cmd, 0, 16);
 		cmd[0] = SERVICE_ACTION_IN;
@@ -1626,6 +1629,15 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
 	sector_size = get_unaligned_be32(&buffer[4]);
 	lba = get_unaligned_be32(&buffer[0]);
 
+	if (sdp->no_read_capacity_16 && (lba == 0xffffffff)) {
+		/* Some buggy (usb cardreader) devices return an lba of
+		   0xffffffff when the want to report a size of 0 (with
+		   which they really mean no media is present) */
+		sdkp->capacity = 0;
+		sdkp->hw_sector_size = sector_size;
+		return sector_size;
+	}
+
 	if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
 		sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
 			"kernel compiled with support for large block "
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index e8c2433ad8a8..85867dcde335 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -149,6 +149,7 @@ struct scsi_device {
 	unsigned last_sector_bug:1;	/* do not use multisector accesses on
 					   SD_LAST_BUGGY_SECTORS */
 	unsigned no_read_disc_info:1;	/* Avoid READ_DISC_INFO cmds */
+	unsigned no_read_capacity_16:1; /* Avoid READ_CAPACITY_16 cmds */
 	unsigned is_visible:1;	/* is the device visible in sysfs */
 
 	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
-- 
cgit v1.2.3


From 00914025cc4e783d4703b4db1d47b41f389e50c8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 1 Oct 2010 14:20:11 -0700
Subject: usb-storage: add new no_read_capacity_16 quirk

Some Rockbox based mp4 players will crash when ever they see a
read_capacity_16 scsi command.  So add a new US_FL which tells the scsi sd
driver to not issue any read_capacity_16 scsi commands.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/scsiglue.c     | 4 ++++
 drivers/usb/storage/unusual_devs.h | 3 ++-
 include/linux/usb_usual.h          | 4 +++-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index a1128ff5cc2c..a688b1e686ea 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -209,6 +209,10 @@ static int slave_configure(struct scsi_device *sdev)
 		if (us->fflags & US_FL_CAPACITY_HEURISTICS)
 			sdev->guess_capacity = 1;
 
+		/* Some devices cannot handle READ_CAPACITY_16 */
+		if (us->fflags & US_FL_NO_READ_CAPACITY_16)
+			sdev->no_read_capacity_16 = 1;
+
 		/* assume SPC3 or latter devices support sense size > 18 */
 		if (sdev->scsi_level > SCSI_SPC_2)
 			us->fflags |= US_FL_SANE_SENSE;
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index c8264ff5457e..6ccdd3dd5259 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -877,7 +877,8 @@ UNUSUAL_DEV(  0x071b, 0x3203, 0x0000, 0x0000,
 		"RockChip",
 		"MP3",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
-		US_FL_NO_WP_DETECT | US_FL_MAX_SECTORS_64),
+		US_FL_NO_WP_DETECT | US_FL_MAX_SECTORS_64 |
+		US_FL_NO_READ_CAPACITY_16),
 
 /* Reported by Jean-Baptiste Onofre <jb@nanthrax.net>
  * Support the following product :
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index e62e9fe08883..71693d4a4fe1 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -60,7 +60,9 @@
 	US_FLAG(BAD_SENSE,	0x00020000)			\
 		/* Bad Sense (never more than 18 bytes) */	\
 	US_FLAG(NO_READ_DISC_INFO,	0x00040000)		\
-		/* cannot handle READ_DISC_INFO */
+		/* cannot handle READ_DISC_INFO */		\
+	US_FLAG(NO_READ_CAPACITY_16,	0x00080000)		\
+		/* cannot handle READ_CAPACITY_16 */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 0a6a717ceff67f887b16783ce891f5dcf846f1fc Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 7 Oct 2010 14:46:15 +0200
Subject: USB: gadget: storage: reuse definitions from scsi.h header file

This commit changes storage_common.h, file_storage.c and
f_mass_storage.c to use definitions of SCSI commands from
scsi/scsi.h file instead of redefining the commands in
storage_common.c.

scsi/scsi.h header file was missing READ_FORMAT_CAPACITIES and
READ_HEADER so this commit also add those to the header.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_mass_storage.c | 72 ++++++++++++++++++------------------
 drivers/usb/gadget/file_storage.c   | 74 ++++++++++++++++++-------------------
 drivers/usb/gadget/storage_common.c | 34 +----------------
 include/scsi/scsi.h                 |  2 +
 4 files changed, 77 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index 44e5ffed5c08..838286b1cd14 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -741,7 +741,7 @@ static int do_read(struct fsg_common *common)
 
 	/* Get the starting Logical Block Address and check that it's
 	 * not too big */
-	if (common->cmnd[0] == SC_READ_6)
+	if (common->cmnd[0] == READ_6)
 		lba = get_unaligned_be24(&common->cmnd[1]);
 	else {
 		lba = get_unaligned_be32(&common->cmnd[2]);
@@ -879,7 +879,7 @@ static int do_write(struct fsg_common *common)
 
 	/* Get the starting Logical Block Address and check that it's
 	 * not too big */
-	if (common->cmnd[0] == SC_WRITE_6)
+	if (common->cmnd[0] == WRITE_6)
 		lba = get_unaligned_be24(&common->cmnd[1]);
 	else {
 		lba = get_unaligned_be32(&common->cmnd[2]);
@@ -1186,7 +1186,7 @@ static int do_inquiry(struct fsg_common *common, struct fsg_buffhd *bh)
 		return 36;
 	}
 
-	buf[0] = curlun->cdrom ? TYPE_CDROM : TYPE_DISK;
+	buf[0] = curlun->cdrom ? TYPE_ROM : TYPE_DISK;
 	buf[1] = curlun->removable ? 0x80 : 0;
 	buf[2] = 2;		/* ANSI SCSI level 2 */
 	buf[3] = 2;		/* SCSI-2 INQUIRY data format */
@@ -1353,11 +1353,11 @@ static int do_mode_sense(struct fsg_common *common, struct fsg_buffhd *bh)
 	 * The only variable value is the WriteProtect bit.  We will fill in
 	 * the mode data length later. */
 	memset(buf, 0, 8);
-	if (mscmnd == SC_MODE_SENSE_6) {
+	if (mscmnd == MODE_SENSE) {
 		buf[2] = (curlun->ro ? 0x80 : 0x00);		/* WP, DPOFUA */
 		buf += 4;
 		limit = 255;
-	} else {			/* SC_MODE_SENSE_10 */
+	} else {			/* MODE_SENSE_10 */
 		buf[3] = (curlun->ro ? 0x80 : 0x00);		/* WP, DPOFUA */
 		buf += 8;
 		limit = 65535;		/* Should really be FSG_BUFLEN */
@@ -1397,7 +1397,7 @@ static int do_mode_sense(struct fsg_common *common, struct fsg_buffhd *bh)
 	}
 
 	/*  Store the mode data length */
-	if (mscmnd == SC_MODE_SENSE_6)
+	if (mscmnd == MODE_SENSE)
 		buf0[0] = len - 1;
 	else
 		put_unaligned_be16(len - 2, buf0);
@@ -1886,7 +1886,7 @@ static int check_command(struct fsg_common *common, int cmnd_size,
 	if (common->lun >= 0 && common->lun < common->nluns) {
 		curlun = &common->luns[common->lun];
 		common->curlun = curlun;
-		if (common->cmnd[0] != SC_REQUEST_SENSE) {
+		if (common->cmnd[0] != REQUEST_SENSE) {
 			curlun->sense_data = SS_NO_SENSE;
 			curlun->sense_data_info = 0;
 			curlun->info_valid = 0;
@@ -1898,8 +1898,8 @@ static int check_command(struct fsg_common *common, int cmnd_size,
 
 		/* INQUIRY and REQUEST SENSE commands are explicitly allowed
 		 * to use unsupported LUNs; all others may not. */
-		if (common->cmnd[0] != SC_INQUIRY &&
-		    common->cmnd[0] != SC_REQUEST_SENSE) {
+		if (common->cmnd[0] != INQUIRY &&
+		    common->cmnd[0] != REQUEST_SENSE) {
 			DBG(common, "unsupported LUN %d\n", common->lun);
 			return -EINVAL;
 		}
@@ -1908,8 +1908,8 @@ static int check_command(struct fsg_common *common, int cmnd_size,
 	/* If a unit attention condition exists, only INQUIRY and
 	 * REQUEST SENSE commands are allowed; anything else must fail. */
 	if (curlun && curlun->unit_attention_data != SS_NO_SENSE &&
-			common->cmnd[0] != SC_INQUIRY &&
-			common->cmnd[0] != SC_REQUEST_SENSE) {
+			common->cmnd[0] != INQUIRY &&
+			common->cmnd[0] != REQUEST_SENSE) {
 		curlun->sense_data = curlun->unit_attention_data;
 		curlun->unit_attention_data = SS_NO_SENSE;
 		return -EINVAL;
@@ -1960,7 +1960,7 @@ static int do_scsi_command(struct fsg_common *common)
 	down_read(&common->filesem);	/* We're using the backing file */
 	switch (common->cmnd[0]) {
 
-	case SC_INQUIRY:
+	case INQUIRY:
 		common->data_size_from_cmnd = common->cmnd[4];
 		reply = check_command(common, 6, DATA_DIR_TO_HOST,
 				      (1<<4), 0,
@@ -1969,7 +1969,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_inquiry(common, bh);
 		break;
 
-	case SC_MODE_SELECT_6:
+	case MODE_SELECT:
 		common->data_size_from_cmnd = common->cmnd[4];
 		reply = check_command(common, 6, DATA_DIR_FROM_HOST,
 				      (1<<1) | (1<<4), 0,
@@ -1978,7 +1978,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_mode_select(common, bh);
 		break;
 
-	case SC_MODE_SELECT_10:
+	case MODE_SELECT_10:
 		common->data_size_from_cmnd =
 			get_unaligned_be16(&common->cmnd[7]);
 		reply = check_command(common, 10, DATA_DIR_FROM_HOST,
@@ -1988,7 +1988,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_mode_select(common, bh);
 		break;
 
-	case SC_MODE_SENSE_6:
+	case MODE_SENSE:
 		common->data_size_from_cmnd = common->cmnd[4];
 		reply = check_command(common, 6, DATA_DIR_TO_HOST,
 				      (1<<1) | (1<<2) | (1<<4), 0,
@@ -1997,7 +1997,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_mode_sense(common, bh);
 		break;
 
-	case SC_MODE_SENSE_10:
+	case MODE_SENSE_10:
 		common->data_size_from_cmnd =
 			get_unaligned_be16(&common->cmnd[7]);
 		reply = check_command(common, 10, DATA_DIR_TO_HOST,
@@ -2007,7 +2007,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_mode_sense(common, bh);
 		break;
 
-	case SC_PREVENT_ALLOW_MEDIUM_REMOVAL:
+	case ALLOW_MEDIUM_REMOVAL:
 		common->data_size_from_cmnd = 0;
 		reply = check_command(common, 6, DATA_DIR_NONE,
 				      (1<<4), 0,
@@ -2016,7 +2016,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_prevent_allow(common);
 		break;
 
-	case SC_READ_6:
+	case READ_6:
 		i = common->cmnd[4];
 		common->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
 		reply = check_command(common, 6, DATA_DIR_TO_HOST,
@@ -2026,7 +2026,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read(common);
 		break;
 
-	case SC_READ_10:
+	case READ_10:
 		common->data_size_from_cmnd =
 				get_unaligned_be16(&common->cmnd[7]) << 9;
 		reply = check_command(common, 10, DATA_DIR_TO_HOST,
@@ -2036,7 +2036,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read(common);
 		break;
 
-	case SC_READ_12:
+	case READ_12:
 		common->data_size_from_cmnd =
 				get_unaligned_be32(&common->cmnd[6]) << 9;
 		reply = check_command(common, 12, DATA_DIR_TO_HOST,
@@ -2046,7 +2046,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read(common);
 		break;
 
-	case SC_READ_CAPACITY:
+	case READ_CAPACITY:
 		common->data_size_from_cmnd = 8;
 		reply = check_command(common, 10, DATA_DIR_TO_HOST,
 				      (0xf<<2) | (1<<8), 1,
@@ -2055,7 +2055,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read_capacity(common, bh);
 		break;
 
-	case SC_READ_HEADER:
+	case READ_HEADER:
 		if (!common->curlun || !common->curlun->cdrom)
 			goto unknown_cmnd;
 		common->data_size_from_cmnd =
@@ -2067,7 +2067,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read_header(common, bh);
 		break;
 
-	case SC_READ_TOC:
+	case READ_TOC:
 		if (!common->curlun || !common->curlun->cdrom)
 			goto unknown_cmnd;
 		common->data_size_from_cmnd =
@@ -2079,7 +2079,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read_toc(common, bh);
 		break;
 
-	case SC_READ_FORMAT_CAPACITIES:
+	case READ_FORMAT_CAPACITIES:
 		common->data_size_from_cmnd =
 			get_unaligned_be16(&common->cmnd[7]);
 		reply = check_command(common, 10, DATA_DIR_TO_HOST,
@@ -2089,7 +2089,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_read_format_capacities(common, bh);
 		break;
 
-	case SC_REQUEST_SENSE:
+	case REQUEST_SENSE:
 		common->data_size_from_cmnd = common->cmnd[4];
 		reply = check_command(common, 6, DATA_DIR_TO_HOST,
 				      (1<<4), 0,
@@ -2098,7 +2098,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_request_sense(common, bh);
 		break;
 
-	case SC_START_STOP_UNIT:
+	case START_STOP:
 		common->data_size_from_cmnd = 0;
 		reply = check_command(common, 6, DATA_DIR_NONE,
 				      (1<<1) | (1<<4), 0,
@@ -2107,7 +2107,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_start_stop(common);
 		break;
 
-	case SC_SYNCHRONIZE_CACHE:
+	case SYNCHRONIZE_CACHE:
 		common->data_size_from_cmnd = 0;
 		reply = check_command(common, 10, DATA_DIR_NONE,
 				      (0xf<<2) | (3<<7), 1,
@@ -2116,7 +2116,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_synchronize_cache(common);
 		break;
 
-	case SC_TEST_UNIT_READY:
+	case TEST_UNIT_READY:
 		common->data_size_from_cmnd = 0;
 		reply = check_command(common, 6, DATA_DIR_NONE,
 				0, 1,
@@ -2125,7 +2125,7 @@ static int do_scsi_command(struct fsg_common *common)
 
 	/* Although optional, this command is used by MS-Windows.  We
 	 * support a minimal version: BytChk must be 0. */
-	case SC_VERIFY:
+	case VERIFY:
 		common->data_size_from_cmnd = 0;
 		reply = check_command(common, 10, DATA_DIR_NONE,
 				      (1<<1) | (0xf<<2) | (3<<7), 1,
@@ -2134,7 +2134,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_verify(common);
 		break;
 
-	case SC_WRITE_6:
+	case WRITE_6:
 		i = common->cmnd[4];
 		common->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
 		reply = check_command(common, 6, DATA_DIR_FROM_HOST,
@@ -2144,7 +2144,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_write(common);
 		break;
 
-	case SC_WRITE_10:
+	case WRITE_10:
 		common->data_size_from_cmnd =
 				get_unaligned_be16(&common->cmnd[7]) << 9;
 		reply = check_command(common, 10, DATA_DIR_FROM_HOST,
@@ -2154,7 +2154,7 @@ static int do_scsi_command(struct fsg_common *common)
 			reply = do_write(common);
 		break;
 
-	case SC_WRITE_12:
+	case WRITE_12:
 		common->data_size_from_cmnd =
 				get_unaligned_be32(&common->cmnd[6]) << 9;
 		reply = check_command(common, 12, DATA_DIR_FROM_HOST,
@@ -2168,10 +2168,10 @@ static int do_scsi_command(struct fsg_common *common)
 	 * They don't mean much in this setting.  It's left as an exercise
 	 * for anyone interested to implement RESERVE and RELEASE in terms
 	 * of Posix locks. */
-	case SC_FORMAT_UNIT:
-	case SC_RELEASE:
-	case SC_RESERVE:
-	case SC_SEND_DIAGNOSTIC:
+	case FORMAT_UNIT:
+	case RELEASE:
+	case RESERVE:
+	case SEND_DIAGNOSTIC:
 		/* Fall through */
 
 	default:
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index ce437f5dd674..d4fdf65fb925 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -783,7 +783,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 {
 	struct usb_request	*req = fsg->ep0req;
 	static u8		cbi_reset_cmnd[6] = {
-			SC_SEND_DIAGNOSTIC, 4, 0xff, 0xff, 0xff, 0xff};
+			SEND_DIAGNOSTIC, 4, 0xff, 0xff, 0xff, 0xff};
 
 	/* Error in command transfer? */
 	if (req->status || req->length != req->actual ||
@@ -1135,7 +1135,7 @@ static int do_read(struct fsg_dev *fsg)
 
 	/* Get the starting Logical Block Address and check that it's
 	 * not too big */
-	if (fsg->cmnd[0] == SC_READ_6)
+	if (fsg->cmnd[0] == READ_6)
 		lba = get_unaligned_be24(&fsg->cmnd[1]);
 	else {
 		lba = get_unaligned_be32(&fsg->cmnd[2]);
@@ -1270,7 +1270,7 @@ static int do_write(struct fsg_dev *fsg)
 
 	/* Get the starting Logical Block Address and check that it's
 	 * not too big */
-	if (fsg->cmnd[0] == SC_WRITE_6)
+	if (fsg->cmnd[0] == WRITE_6)
 		lba = get_unaligned_be24(&fsg->cmnd[1]);
 	else {
 		lba = get_unaligned_be32(&fsg->cmnd[2]);
@@ -1578,7 +1578,7 @@ static int do_inquiry(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 	}
 
 	memset(buf, 0, 8);
-	buf[0] = (mod_data.cdrom ? TYPE_CDROM : TYPE_DISK);
+	buf[0] = (mod_data.cdrom ? TYPE_ROM : TYPE_DISK);
 	if (mod_data.removable)
 		buf[1] = 0x80;
 	buf[2] = 2;		// ANSI SCSI level 2
@@ -1747,11 +1747,11 @@ static int do_mode_sense(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 	 * The only variable value is the WriteProtect bit.  We will fill in
 	 * the mode data length later. */
 	memset(buf, 0, 8);
-	if (mscmnd == SC_MODE_SENSE_6) {
+	if (mscmnd == MODE_SENSE) {
 		buf[2] = (curlun->ro ? 0x80 : 0x00);		// WP, DPOFUA
 		buf += 4;
 		limit = 255;
-	} else {			// SC_MODE_SENSE_10
+	} else {			// MODE_SENSE_10
 		buf[3] = (curlun->ro ? 0x80 : 0x00);		// WP, DPOFUA
 		buf += 8;
 		limit = 65535;		// Should really be mod_data.buflen
@@ -1791,7 +1791,7 @@ static int do_mode_sense(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 	}
 
 	/*  Store the mode data length */
-	if (mscmnd == SC_MODE_SENSE_6)
+	if (mscmnd == MODE_SENSE)
 		buf0[0] = len - 1;
 	else
 		put_unaligned_be16(len - 2, buf0);
@@ -2316,7 +2316,7 @@ static int check_command(struct fsg_dev *fsg, int cmnd_size,
 	/* Check the LUN */
 	if (fsg->lun >= 0 && fsg->lun < fsg->nluns) {
 		fsg->curlun = curlun = &fsg->luns[fsg->lun];
-		if (fsg->cmnd[0] != SC_REQUEST_SENSE) {
+		if (fsg->cmnd[0] != REQUEST_SENSE) {
 			curlun->sense_data = SS_NO_SENSE;
 			curlun->sense_data_info = 0;
 			curlun->info_valid = 0;
@@ -2327,8 +2327,8 @@ static int check_command(struct fsg_dev *fsg, int cmnd_size,
 
 		/* INQUIRY and REQUEST SENSE commands are explicitly allowed
 		 * to use unsupported LUNs; all others may not. */
-		if (fsg->cmnd[0] != SC_INQUIRY &&
-				fsg->cmnd[0] != SC_REQUEST_SENSE) {
+		if (fsg->cmnd[0] != INQUIRY &&
+				fsg->cmnd[0] != REQUEST_SENSE) {
 			DBG(fsg, "unsupported LUN %d\n", fsg->lun);
 			return -EINVAL;
 		}
@@ -2337,8 +2337,8 @@ static int check_command(struct fsg_dev *fsg, int cmnd_size,
 	/* If a unit attention condition exists, only INQUIRY and
 	 * REQUEST SENSE commands are allowed; anything else must fail. */
 	if (curlun && curlun->unit_attention_data != SS_NO_SENSE &&
-			fsg->cmnd[0] != SC_INQUIRY &&
-			fsg->cmnd[0] != SC_REQUEST_SENSE) {
+			fsg->cmnd[0] != INQUIRY &&
+			fsg->cmnd[0] != REQUEST_SENSE) {
 		curlun->sense_data = curlun->unit_attention_data;
 		curlun->unit_attention_data = SS_NO_SENSE;
 		return -EINVAL;
@@ -2388,7 +2388,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 	down_read(&fsg->filesem);	// We're using the backing file
 	switch (fsg->cmnd[0]) {
 
-	case SC_INQUIRY:
+	case INQUIRY:
 		fsg->data_size_from_cmnd = fsg->cmnd[4];
 		if ((reply = check_command(fsg, 6, DATA_DIR_TO_HOST,
 				(1<<4), 0,
@@ -2396,7 +2396,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_inquiry(fsg, bh);
 		break;
 
-	case SC_MODE_SELECT_6:
+	case MODE_SELECT:
 		fsg->data_size_from_cmnd = fsg->cmnd[4];
 		if ((reply = check_command(fsg, 6, DATA_DIR_FROM_HOST,
 				(1<<1) | (1<<4), 0,
@@ -2404,7 +2404,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_mode_select(fsg, bh);
 		break;
 
-	case SC_MODE_SELECT_10:
+	case MODE_SELECT_10:
 		fsg->data_size_from_cmnd = get_unaligned_be16(&fsg->cmnd[7]);
 		if ((reply = check_command(fsg, 10, DATA_DIR_FROM_HOST,
 				(1<<1) | (3<<7), 0,
@@ -2412,7 +2412,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_mode_select(fsg, bh);
 		break;
 
-	case SC_MODE_SENSE_6:
+	case MODE_SENSE:
 		fsg->data_size_from_cmnd = fsg->cmnd[4];
 		if ((reply = check_command(fsg, 6, DATA_DIR_TO_HOST,
 				(1<<1) | (1<<2) | (1<<4), 0,
@@ -2420,7 +2420,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_mode_sense(fsg, bh);
 		break;
 
-	case SC_MODE_SENSE_10:
+	case MODE_SENSE_10:
 		fsg->data_size_from_cmnd = get_unaligned_be16(&fsg->cmnd[7]);
 		if ((reply = check_command(fsg, 10, DATA_DIR_TO_HOST,
 				(1<<1) | (1<<2) | (3<<7), 0,
@@ -2428,7 +2428,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_mode_sense(fsg, bh);
 		break;
 
-	case SC_PREVENT_ALLOW_MEDIUM_REMOVAL:
+	case ALLOW_MEDIUM_REMOVAL:
 		fsg->data_size_from_cmnd = 0;
 		if ((reply = check_command(fsg, 6, DATA_DIR_NONE,
 				(1<<4), 0,
@@ -2436,7 +2436,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_prevent_allow(fsg);
 		break;
 
-	case SC_READ_6:
+	case READ_6:
 		i = fsg->cmnd[4];
 		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
 		if ((reply = check_command(fsg, 6, DATA_DIR_TO_HOST,
@@ -2445,7 +2445,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read(fsg);
 		break;
 
-	case SC_READ_10:
+	case READ_10:
 		fsg->data_size_from_cmnd =
 				get_unaligned_be16(&fsg->cmnd[7]) << 9;
 		if ((reply = check_command(fsg, 10, DATA_DIR_TO_HOST,
@@ -2454,7 +2454,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read(fsg);
 		break;
 
-	case SC_READ_12:
+	case READ_12:
 		fsg->data_size_from_cmnd =
 				get_unaligned_be32(&fsg->cmnd[6]) << 9;
 		if ((reply = check_command(fsg, 12, DATA_DIR_TO_HOST,
@@ -2463,7 +2463,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read(fsg);
 		break;
 
-	case SC_READ_CAPACITY:
+	case READ_CAPACITY:
 		fsg->data_size_from_cmnd = 8;
 		if ((reply = check_command(fsg, 10, DATA_DIR_TO_HOST,
 				(0xf<<2) | (1<<8), 1,
@@ -2471,7 +2471,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read_capacity(fsg, bh);
 		break;
 
-	case SC_READ_HEADER:
+	case READ_HEADER:
 		if (!mod_data.cdrom)
 			goto unknown_cmnd;
 		fsg->data_size_from_cmnd = get_unaligned_be16(&fsg->cmnd[7]);
@@ -2481,7 +2481,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read_header(fsg, bh);
 		break;
 
-	case SC_READ_TOC:
+	case READ_TOC:
 		if (!mod_data.cdrom)
 			goto unknown_cmnd;
 		fsg->data_size_from_cmnd = get_unaligned_be16(&fsg->cmnd[7]);
@@ -2491,7 +2491,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read_toc(fsg, bh);
 		break;
 
-	case SC_READ_FORMAT_CAPACITIES:
+	case READ_FORMAT_CAPACITIES:
 		fsg->data_size_from_cmnd = get_unaligned_be16(&fsg->cmnd[7]);
 		if ((reply = check_command(fsg, 10, DATA_DIR_TO_HOST,
 				(3<<7), 1,
@@ -2499,7 +2499,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_read_format_capacities(fsg, bh);
 		break;
 
-	case SC_REQUEST_SENSE:
+	case REQUEST_SENSE:
 		fsg->data_size_from_cmnd = fsg->cmnd[4];
 		if ((reply = check_command(fsg, 6, DATA_DIR_TO_HOST,
 				(1<<4), 0,
@@ -2507,7 +2507,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_request_sense(fsg, bh);
 		break;
 
-	case SC_START_STOP_UNIT:
+	case START_STOP:
 		fsg->data_size_from_cmnd = 0;
 		if ((reply = check_command(fsg, 6, DATA_DIR_NONE,
 				(1<<1) | (1<<4), 0,
@@ -2515,7 +2515,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_start_stop(fsg);
 		break;
 
-	case SC_SYNCHRONIZE_CACHE:
+	case SYNCHRONIZE_CACHE:
 		fsg->data_size_from_cmnd = 0;
 		if ((reply = check_command(fsg, 10, DATA_DIR_NONE,
 				(0xf<<2) | (3<<7), 1,
@@ -2523,7 +2523,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_synchronize_cache(fsg);
 		break;
 
-	case SC_TEST_UNIT_READY:
+	case TEST_UNIT_READY:
 		fsg->data_size_from_cmnd = 0;
 		reply = check_command(fsg, 6, DATA_DIR_NONE,
 				0, 1,
@@ -2532,7 +2532,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 
 	/* Although optional, this command is used by MS-Windows.  We
 	 * support a minimal version: BytChk must be 0. */
-	case SC_VERIFY:
+	case VERIFY:
 		fsg->data_size_from_cmnd = 0;
 		if ((reply = check_command(fsg, 10, DATA_DIR_NONE,
 				(1<<1) | (0xf<<2) | (3<<7), 1,
@@ -2540,7 +2540,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_verify(fsg);
 		break;
 
-	case SC_WRITE_6:
+	case WRITE_6:
 		i = fsg->cmnd[4];
 		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
 		if ((reply = check_command(fsg, 6, DATA_DIR_FROM_HOST,
@@ -2549,7 +2549,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_write(fsg);
 		break;
 
-	case SC_WRITE_10:
+	case WRITE_10:
 		fsg->data_size_from_cmnd =
 				get_unaligned_be16(&fsg->cmnd[7]) << 9;
 		if ((reply = check_command(fsg, 10, DATA_DIR_FROM_HOST,
@@ -2558,7 +2558,7 @@ static int do_scsi_command(struct fsg_dev *fsg)
 			reply = do_write(fsg);
 		break;
 
-	case SC_WRITE_12:
+	case WRITE_12:
 		fsg->data_size_from_cmnd =
 				get_unaligned_be32(&fsg->cmnd[6]) << 9;
 		if ((reply = check_command(fsg, 12, DATA_DIR_FROM_HOST,
@@ -2571,10 +2571,10 @@ static int do_scsi_command(struct fsg_dev *fsg)
 	 * They don't mean much in this setting.  It's left as an exercise
 	 * for anyone interested to implement RESERVE and RELEASE in terms
 	 * of Posix locks. */
-	case SC_FORMAT_UNIT:
-	case SC_RELEASE:
-	case SC_RESERVE:
-	case SC_SEND_DIAGNOSTIC:
+	case FORMAT_UNIT:
+	case RELEASE:
+	case RESERVE:
+	case SEND_DIAGNOSTIC:
 		// Fall through
 
 	default:
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 0a7df3db71f1..3b513bafaf2a 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -53,8 +53,9 @@
  */
 
 
-#include <asm/unaligned.h>
 #include <linux/usb/storage.h>
+#include <scsi/scsi.h>
+#include <asm/unaligned.h>
 
 
 /*
@@ -153,10 +154,6 @@
 
 /*-------------------------------------------------------------------------*/
 
-/* SCSI device types */
-#define TYPE_DISK	0x00
-#define TYPE_CDROM	0x05
-
 /* Bulk-only data structures */
 
 /* Command Block Wrapper */
@@ -208,33 +205,6 @@ struct interrupt_data {
 /* Length of a SCSI Command Data Block */
 #define MAX_COMMAND_SIZE	16
 
-/* SCSI commands that we recognize */
-#define SC_FORMAT_UNIT			0x04
-#define SC_INQUIRY			0x12
-#define SC_MODE_SELECT_6		0x15
-#define SC_MODE_SELECT_10		0x55
-#define SC_MODE_SENSE_6			0x1a
-#define SC_MODE_SENSE_10		0x5a
-#define SC_PREVENT_ALLOW_MEDIUM_REMOVAL	0x1e
-#define SC_READ_6			0x08
-#define SC_READ_10			0x28
-#define SC_READ_12			0xa8
-#define SC_READ_CAPACITY		0x25
-#define SC_READ_FORMAT_CAPACITIES	0x23
-#define SC_READ_HEADER			0x44
-#define SC_READ_TOC			0x43
-#define SC_RELEASE			0x17
-#define SC_REQUEST_SENSE		0x03
-#define SC_RESERVE			0x16
-#define SC_SEND_DIAGNOSTIC		0x1d
-#define SC_START_STOP_UNIT		0x1b
-#define SC_SYNCHRONIZE_CACHE		0x35
-#define SC_TEST_UNIT_READY		0x00
-#define SC_VERIFY			0x2f
-#define SC_WRITE_6			0x0a
-#define SC_WRITE_10			0x2a
-#define SC_WRITE_12			0xaa
-
 /* SCSI Sense Key/Additional Sense Code/ASC Qualifier values */
 #define SS_NO_SENSE				0
 #define SS_COMMUNICATION_FAILURE		0x040800
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 8fcb6e0e9e72..1418e9010f3e 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -67,6 +67,7 @@ struct scsi_cmnd;
 #define SEND_DIAGNOSTIC       0x1d
 #define ALLOW_MEDIUM_REMOVAL  0x1e
 
+#define READ_FORMAT_CAPACITIES 0x23
 #define SET_WINDOW            0x24
 #define READ_CAPACITY         0x25
 #define READ_10               0x28
@@ -96,6 +97,7 @@ struct scsi_cmnd;
 #define WRITE_SAME            0x41
 #define UNMAP		      0x42
 #define READ_TOC              0x43
+#define READ_HEADER           0x44
 #define LOG_SELECT            0x4c
 #define LOG_SENSE             0x4d
 #define XDWRITEREAD_10        0x53
-- 
cgit v1.2.3


From 562e7c71c6708353bfe7b615576bcbcf7afd522e Mon Sep 17 00:00:00 2001
From: Tatyana Brokhman <tlinder@codeaurora.org>
Date: Sat, 9 Oct 2010 16:46:12 +0200
Subject: usb: usb3.0 ch9 definitions

Adding SuperSpeed usb definitions as defined by ch9 of the USB3.0 spec.
This patch is a preparation for adding SuperSpeed support to the gadget
framework.

Signed-off-by: Tatyana Brokhman <tlinder@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch9.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index b0f7e9f57176..f917bbbc8901 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -123,8 +123,23 @@
 #define USB_DEVICE_A_ALT_HNP_SUPPORT	5	/* (otg) other RH port does */
 #define USB_DEVICE_DEBUG_MODE		6	/* (special devices only) */
 
+/*
+ * New Feature Selectors as added by USB 3.0
+ * See USB 3.0 spec Table 9-6
+ */
+#define USB_DEVICE_U1_ENABLE	48	/* dev may initiate U1 transition */
+#define USB_DEVICE_U2_ENABLE	49	/* dev may initiate U2 transition */
+#define USB_DEVICE_LTM_ENABLE	50	/* dev may send LTM */
+#define USB_INTRF_FUNC_SUSPEND	0	/* function suspend */
+
+#define USB_INTR_FUNC_SUSPEND_OPT_MASK	0xFF00
+
 #define USB_ENDPOINT_HALT		0	/* IN/OUT will STALL */
 
+/* Bit array elements as returned by the USB_REQ_GET_STATUS request. */
+#define USB_DEV_STAT_U1_ENABLED		2	/* transition into U1 state */
+#define USB_DEV_STAT_U2_ENABLED		3	/* transition into U2 state */
+#define USB_DEV_STAT_LTM_ENABLED	4	/* Latency tolerance messages */
 
 /**
  * struct usb_ctrlrequest - SETUP data for a USB device control request
@@ -675,6 +690,7 @@ struct usb_bos_descriptor {
 	__u8  bNumDeviceCaps;
 } __attribute__((packed));
 
+#define USB_DT_BOS_SIZE		5
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_DEVICE_CAPABILITY:  grouped with BOS */
@@ -712,16 +728,56 @@ struct usb_wireless_cap_descriptor {	/* Ultra Wide Band */
 	__u8  bReserved;
 } __attribute__((packed));
 
+/* USB 2.0 Extension descriptor */
 #define	USB_CAP_TYPE_EXT		2
 
 struct usb_ext_cap_descriptor {		/* Link Power Management */
 	__u8  bLength;
 	__u8  bDescriptorType;
 	__u8  bDevCapabilityType;
-	__u8  bmAttributes;
+	__le32 bmAttributes;
 #define USB_LPM_SUPPORT			(1 << 1)	/* supports LPM */
 } __attribute__((packed));
 
+#define USB_DT_USB_EXT_CAP_SIZE	7
+
+/*
+ * SuperSpeed USB Capability descriptor: Defines the set of SuperSpeed USB
+ * specific device level capabilities
+ */
+#define		USB_SS_CAP_TYPE		3
+struct usb_ss_cap_descriptor {		/* Link Power Management */
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDevCapabilityType;
+	__u8  bmAttributes;
+#define USB_LTM_SUPPORT			(1 << 1) /* supports LTM */
+	__le16 wSpeedSupported;
+#define USB_LOW_SPEED_OPERATION		(1)	 /* Low speed operation */
+#define USB_FULL_SPEED_OPERATION	(1 << 1) /* Full speed operation */
+#define USB_HIGH_SPEED_OPERATION	(1 << 2) /* High speed operation */
+#define USB_5GBPS_OPERATION		(1 << 3) /* Operation at 5Gbps */
+	__u8  bFunctionalitySupport;
+	__u8  bU1devExitLat;
+	__le16 bU2DevExitLat;
+} __attribute__((packed));
+
+#define USB_DT_USB_SS_CAP_SIZE	10
+
+/*
+ * Container ID Capability descriptor: Defines the instance unique ID used to
+ * identify the instance across all operating modes
+ */
+#define	CONTAINER_ID_TYPE	4
+struct usb_ss_container_id_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDevCapabilityType;
+	__u8  bReserved;
+	__u8  ContainerID[16]; /* 128-bit number */
+} __attribute__((packed));
+
+#define USB_DT_USB_SS_CONTN_ID_SIZE	20
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_WIRELESS_ENDPOINT_COMP:  companion descriptor associated with
-- 
cgit v1.2.3


From 69cb1ec4ce4da4bc4c07bb09c4c98b3e25d99fb1 Mon Sep 17 00:00:00 2001
From: Eric Bénard <eric@eukrea.com>
Date: Fri, 15 Oct 2010 14:30:58 +0200
Subject: mxc_udc: add workaround for ENGcm09152 for i.MX35
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this patch gives the possibility to workaround bug ENGcm09152
on i.MX35 when the hardware workaround is also implemented on
the board.
It covers the workaround described on page 25 of the following Errata :
http://cache.freescale.com/files/dsp/doc/errata/IMX35CE.pdf

Signed-off-by: Eric Bénard <eric@eukrea.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-mx3/mach-cpuimx35.c |  1 +
 drivers/usb/gadget/fsl_mxc_udc.c  | 15 +++++++++++++++
 include/linux/fsl_devices.h       |  3 +++
 3 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/arch/arm/mach-mx3/mach-cpuimx35.c b/arch/arm/mach-mx3/mach-cpuimx35.c
index 2a4f8b781ba4..4d161b3fca65 100644
--- a/arch/arm/mach-mx3/mach-cpuimx35.c
+++ b/arch/arm/mach-mx3/mach-cpuimx35.c
@@ -155,6 +155,7 @@ static struct mxc_usbh_platform_data usbh1_pdata = {
 static struct fsl_usb2_platform_data otg_device_pdata = {
 	.operating_mode	= FSL_USB2_DR_DEVICE,
 	.phy_mode	= FSL_USB2_PHY_UTMI,
+	.workaround	= FLS_USB2_WORKAROUND_ENGCM09152,
 };
 
 static int otg_mode_host;
diff --git a/drivers/usb/gadget/fsl_mxc_udc.c b/drivers/usb/gadget/fsl_mxc_udc.c
index eafa6d2c5ed7..5bdbfe619853 100644
--- a/drivers/usb/gadget/fsl_mxc_udc.c
+++ b/drivers/usb/gadget/fsl_mxc_udc.c
@@ -22,6 +22,10 @@
 static struct clk *mxc_ahb_clk;
 static struct clk *mxc_usb_clk;
 
+/* workaround ENGcm09152 for i.MX35 */
+#define USBPHYCTRL_OTGBASE_OFFSET	0x608
+#define USBPHYCTRL_EVDO			(1 << 23)
+
 int fsl_udc_clk_init(struct platform_device *pdev)
 {
 	struct fsl_usb2_platform_data *pdata;
@@ -84,6 +88,17 @@ eenahb:
 void fsl_udc_clk_finalize(struct platform_device *pdev)
 {
 	struct fsl_usb2_platform_data *pdata = pdev->dev.platform_data;
+#if defined(CONFIG_ARCH_MX35)
+	unsigned int v;
+
+	/* workaround ENGcm09152 for i.MX35 */
+	if (pdata->workaround & FLS_USB2_WORKAROUND_ENGCM09152) {
+		v = readl(MX35_IO_ADDRESS(MX35_OTG_BASE_ADDR +
+				USBPHYCTRL_OTGBASE_OFFSET));
+		writel(v | USBPHYCTRL_EVDO, MX35_IO_ADDRESS(MX35_OTG_BASE_ADDR +
+				USBPHYCTRL_OTGBASE_OFFSET));
+	}
+#endif
 
 	/* ULPI transceivers don't need usbpll */
 	if (pdata->phy_mode == FSL_USB2_PHY_ULPI) {
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index d5f9a7431bd0..4eb56ed75fbc 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -66,6 +66,7 @@ struct fsl_usb2_platform_data {
 	enum fsl_usb2_operating_modes	operating_mode;
 	enum fsl_usb2_phy_modes		phy_mode;
 	unsigned int			port_enables;
+	unsigned int			workaround;
 
 	int		(*init)(struct platform_device *);
 	void		(*exit)(struct platform_device *);
@@ -84,6 +85,8 @@ struct fsl_usb2_platform_data {
 #define FSL_USB2_PORT0_ENABLED	0x00000001
 #define FSL_USB2_PORT1_ENABLED	0x00000002
 
+#define FLS_USB2_WORKAROUND_ENGCM09152	(1 << 0)
+
 struct spi_device;
 
 struct fsl_spi_platform_data {
-- 
cgit v1.2.3


From 35ae11fd146384d222f3bb1f17eed1970cc92c36 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:09:48 -0800
Subject: xen: Use host-provided E820 map

Rather than simply using a flat memory map from Xen, use its provided
E820 map.  This allows the domain builder to tell the domain to reserve
space for more pages than those initially provided at domain-build time.

It also allows the host to specify holes in the address space (for
PCI-passthrough, for example).

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/setup.c           | 38 ++++++++++++++++++++++++++++++++++++--
 include/xen/interface/memory.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 328b00305426..dd2eb2a9303f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -19,6 +19,7 @@
 
 #include <xen/page.h>
 #include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/memory.h>
 #include <xen/features.h>
@@ -107,13 +108,46 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
 
 char * __init xen_memory_setup(void)
 {
+	static struct e820entry map[E820MAX] __initdata;
+
 	unsigned long max_pfn = xen_start_info->nr_pages;
+	unsigned long long mem_end;
+	int rc;
+	struct xen_memory_map memmap;
+	int i;
 
 	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+	mem_end = PFN_PHYS(max_pfn);
+
+	memmap.nr_entries = E820MAX;
+	set_xen_guest_handle(memmap.buffer, map);
+
+	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+	if (rc == -ENOSYS) {
+		memmap.nr_entries = 1;
+		map[0].addr = 0ULL;
+		map[0].size = mem_end;
+		/* 8MB slack (to balance backend allocations). */
+		map[0].size += 8ULL << 20;
+		map[0].type = E820_RAM;
+		rc = 0;
+	}
+	BUG_ON(rc);
 
 	e820.nr_map = 0;
-
-	e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
+	for (i = 0; i < memmap.nr_entries; i++) {
+		unsigned long long end = map[i].addr + map[i].size;
+		if (map[i].type == E820_RAM) {
+			if (map[i].addr > mem_end)
+				continue;
+			if (end > mem_end) {
+				/* Truncate region to max_mem. */
+				map[i].size -= end - mem_end;
+			}
+		}
+		if (map[i].size > 0)
+			e820_add_region(map[i].addr, map[i].size, map[i].type);
+	}
 
 	/*
 	 * Even though this is normal, usable memory under Xen, reserve
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index d3938d3e71f8..d7a6c13bde69 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -186,6 +186,35 @@ struct xen_translate_gpfn_list {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
 
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    GUEST_HANDLE(void) buffer;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_machine_memory_map   10
+
 
 /*
  * Prevent the balloon driver from changing the memory reservation
-- 
cgit v1.2.3


From 7a043f119c0e4b460306f868d9638ac55c6afa6f Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 1 Jul 2010 17:08:14 +0100
Subject: xen: support pirq != irq

PHYSDEVOP_map_pirq might return a pirq different from what we asked if
we are running as an HVM guest, so we need to be able to support pirqs
that are different from linux irqs.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c | 64 +++++++++++++++++++++++++++++++++++++++-------------
 include/xen/events.h |  1 +
 2 files changed, 49 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 3df53de6b43a..018a96275ee4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -90,6 +90,7 @@ struct irq_info
 		unsigned short virq;
 		enum ipi_vector ipi;
 		struct {
+			unsigned short pirq;
 			unsigned short gsi;
 			unsigned char vector;
 			unsigned char flags;
@@ -100,6 +101,7 @@ struct irq_info
 #define PIRQ_SHAREABLE	(1 << 1)
 
 static struct irq_info *irq_info;
+static int *pirq_to_irq;
 
 static int *evtchn_to_irq;
 struct cpu_evtchn_s {
@@ -147,11 +149,12 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
 			.cpu = 0, .u.virq = virq };
 }
 
-static struct irq_info mk_pirq_info(unsigned short evtchn,
+static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
 				    unsigned short gsi, unsigned short vector)
 {
 	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
-			.cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
+			.cpu = 0,
+			.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
 }
 
 /*
@@ -193,6 +196,16 @@ static unsigned virq_from_irq(unsigned irq)
 	return info->u.virq;
 }
 
+static unsigned pirq_from_irq(unsigned irq)
+{
+	struct irq_info *info = info_for_irq(irq);
+
+	BUG_ON(info == NULL);
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	return info->u.pirq.pirq;
+}
+
 static unsigned gsi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -365,6 +378,16 @@ static int get_nr_hw_irqs(void)
 	return ret;
 }
 
+static int find_unbound_pirq(void)
+{
+	int i;
+	for (i = 0; i < nr_irqs; i++) {
+		if (pirq_to_irq[i] < 0)
+			return i;
+	}
+	return -1;
+}
+
 static int find_unbound_irq(void)
 {
 	struct irq_data *data;
@@ -410,7 +433,7 @@ static bool identity_mapped_irq(unsigned irq)
 
 static void pirq_unmask_notify(int irq)
 {
-	struct physdev_eoi eoi = { .irq = irq };
+	struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
 
 	if (unlikely(pirq_needs_eoi(irq))) {
 		int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@ -425,7 +448,7 @@ static void pirq_query_unmask(int irq)
 
 	BUG_ON(info->type != IRQT_PIRQ);
 
-	irq_status.irq = irq;
+	irq_status.irq = pirq_from_irq(irq);
 	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
 		irq_status.flags = 0;
 
@@ -453,7 +476,7 @@ static unsigned int startup_pirq(unsigned int irq)
 	if (VALID_EVTCHN(evtchn))
 		goto out;
 
-	bind_pirq.pirq = irq;
+	bind_pirq.pirq = pirq_from_irq(irq);
 	/* NB. We are happy to share unless we are probing. */
 	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
 					BIND_PIRQ__WILL_SHARE : 0;
@@ -556,28 +579,32 @@ static int find_irq_by_gsi(unsigned gsi)
 	return -1;
 }
 
-/* xen_allocate_irq might allocate irqs from the top down, as a
+int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
+{
+	return xen_map_pirq_gsi(gsi, gsi, shareable, name);
+}
+
+/* xen_map_pirq_gsi might allocate irqs from the top down, as a
  * consequence don't assume that the irq number returned has a low value
  * or can be used as a pirq number unless you know otherwise.
  *
- * One notable exception is when xen_allocate_irq is called passing an
+ * One notable exception is when xen_map_pirq_gsi is called passing an
  * hardware gsi as argument, in that case the irq number returned
- * matches the gsi number passed as first argument.
-
- * Note: We don't assign an
- * event channel until the irq actually started up.  Return an
- * existing irq if we've already got one for the gsi.
+ * matches the gsi number passed as second argument.
+ *
+ * Note: We don't assign an event channel until the irq actually started
+ * up.  Return an existing irq if we've already got one for the gsi.
  */
-int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
+int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
 {
-	int irq;
+	int irq = 0;
 	struct physdev_irq irq_op;
 
 	spin_lock(&irq_mapping_update_lock);
 
 	irq = find_irq_by_gsi(gsi);
 	if (irq != -1) {
-		printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n",
+		printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
 		       irq, gsi);
 		goto out;	/* XXX need refcount? */
 	}
@@ -606,8 +633,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
 		goto out;
 	}
 
-	irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector);
+	irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
 	irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
+	pirq_to_irq[pirq] = irq;
 
 out:
 	spin_unlock(&irq_mapping_update_lock);
@@ -1327,6 +1355,10 @@ void __init xen_init_IRQ(void)
 				    GFP_KERNEL);
 	irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
 
+	pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL);
+	for (i = 0; i < nr_irqs; i++)
+		pirq_to_irq[i] = -1;
+
 	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
 	for (i = 0; i < NR_EVENT_CHANNELS; i++)
diff --git a/include/xen/events.h b/include/xen/events.h
index c1717ca5ac13..deec8faace22 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -71,6 +71,7 @@ void xen_hvm_evtchn_do_upcall(void);
  * GSIs are identity mapped; others are dynamically allocated as
  * usual. */
 int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
+int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
 
 /* De-allocates the above mentioned physical interrupt. */
 int xen_destroy_irq(int irq);
-- 
cgit v1.2.3


From 01557baff6e9c371d4c96e01089dca32cf347500 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 20 Aug 2010 14:46:52 +0100
Subject: xen: get the maximum number of pirqs from xen

Use PHYSDEVOP_get_nr_pirqs to get the maximum number of pirqs from xen.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c            | 30 ++++++++++++++++++++++++++----
 include/xen/interface/physdev.h |  6 ++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 018a96275ee4..07e56e5a5d2d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -102,6 +102,7 @@ struct irq_info
 
 static struct irq_info *irq_info;
 static int *pirq_to_irq;
+static int nr_pirqs;
 
 static int *evtchn_to_irq;
 struct cpu_evtchn_s {
@@ -378,10 +379,12 @@ static int get_nr_hw_irqs(void)
 	return ret;
 }
 
+/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs
+ * succeeded otherwise nr_pirqs won't hold the right value */
 static int find_unbound_pirq(void)
 {
 	int i;
-	for (i = 0; i < nr_irqs; i++) {
+	for (i = nr_pirqs-1; i >= 0; i--) {
 		if (pirq_to_irq[i] < 0)
 			return i;
 	}
@@ -602,6 +605,13 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
 
 	spin_lock(&irq_mapping_update_lock);
 
+	if ((pirq > nr_pirqs) || (gsi > nr_irqs)) {
+		printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n",
+			pirq > nr_pirqs ? "nr_pirqs" :"",
+			gsi > nr_irqs ? "nr_irqs" : "");
+		goto out;
+	}
+
 	irq = find_irq_by_gsi(gsi);
 	if (irq != -1) {
 		printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
@@ -1349,14 +1359,26 @@ void xen_callback_vector(void) {}
 
 void __init xen_init_IRQ(void)
 {
-	int i;
+	int i, rc;
+	struct physdev_nr_pirqs op_nr_pirqs;
 
 	cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
 				    GFP_KERNEL);
 	irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
 
-	pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL);
-	for (i = 0; i < nr_irqs; i++)
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs);
+	if (rc < 0) {
+		nr_pirqs = nr_irqs;
+		if (rc != -ENOSYS)
+			printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc);
+	} else {
+		if (xen_pv_domain() && !xen_initial_domain())
+			nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs);
+		else
+			nr_pirqs = op_nr_pirqs.nr_pirqs;
+	}
+	pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL);
+	for (i = 0; i < nr_pirqs; i++)
 		pirq_to_irq[i] = -1;
 
 	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index cd6939147cb6..fbb58833f13e 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -121,6 +121,12 @@ struct physdev_op {
 	} u;
 };
 
+#define PHYSDEVOP_get_nr_pirqs    22
+struct physdev_nr_pirqs {
+    /* OUT */
+    uint32_t nr_pirqs;
+};
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **
-- 
cgit v1.2.3


From 42a1de56f35a9c87932f45439dc1b09c8da0cc95 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 24 Jun 2010 16:42:04 +0100
Subject: xen: implement xen_hvm_register_pirq

xen_hvm_register_pirq allows the kernel to map a GSI into a Xen pirq and
receive the interrupt as an event channel from that point on.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c              | 38 ++++++++++++++++++++++++++++++++++++++
 drivers/xen/events.c            |  4 +++-
 include/xen/interface/physdev.h | 30 ++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 4e371065ce41..08e3cdccdfa8 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -17,6 +17,44 @@
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 
+#ifdef CONFIG_ACPI
+static int xen_hvm_register_pirq(u32 gsi, int triggering)
+{
+	int rc, irq;
+	struct physdev_map_pirq map_irq;
+	int shareable = 0;
+	char *name;
+
+	if (!xen_hvm_domain())
+		return -1;
+
+	map_irq.domid = DOMID_SELF;
+	map_irq.type = MAP_PIRQ_TYPE_GSI;
+	map_irq.index = gsi;
+	map_irq.pirq = -1;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+	if (rc) {
+		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+		return -1;
+	}
+
+	if (triggering == ACPI_EDGE_SENSITIVE) {
+		shareable = 0;
+		name = "ioapic-edge";
+	} else {
+		shareable = 1;
+		name = "ioapic-level";
+	}
+
+	irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name);
+
+	printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
+
+	return irq;
+}
+#endif
+
 #if defined(CONFIG_PCI_MSI)
 #include <linux/msi.h>
 
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 07e56e5a5d2d..239b011ef56f 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -36,6 +36,7 @@
 #include <asm/idle.h>
 #include <asm/io_apic.h>
 #include <asm/sync_bitops.h>
+#include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 
@@ -75,7 +76,8 @@ enum xen_irq_type {
  * event channel - irq->event channel mapping
  * cpu - cpu this event channel is bound to
  * index - type-specific information:
- *    PIRQ - vector, with MSB being "needs EIO"
+ *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ *           guest, or GSI (real passthrough IRQ) of the device.
  *    VIRQ - virq number
  *    IPI - IPI vector
  *    EVTCHN -
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index fbb58833f13e..69a72b96a6cb 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -106,6 +106,36 @@ struct physdev_irq {
 	uint32_t vector;
 };
 
+#define MAP_PIRQ_TYPE_MSI		0x0
+#define MAP_PIRQ_TYPE_GSI		0x1
+#define MAP_PIRQ_TYPE_UNKNOWN		0x2
+
+#define PHYSDEVOP_map_pirq		13
+struct physdev_map_pirq {
+    domid_t domid;
+    /* IN */
+    int type;
+    /* IN */
+    int index;
+    /* IN or OUT */
+    int pirq;
+    /* IN */
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
+};
+
+#define PHYSDEVOP_unmap_pirq		14
+struct physdev_unmap_pirq {
+    domid_t domid;
+    /* IN */
+    int pirq;
+};
+
 /*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
-- 
cgit v1.2.3


From 3942b740e5183caad47a4a3fcb37a4509ce7af83 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 24 Jun 2010 17:50:18 +0100
Subject: xen: support GSI -> pirq remapping in PV on HVM guests

Disable pcifront when running on HVM: it is meant to be used with pv
guests that don't have PCI bus.

Use acpi_register_gsi_xen_hvm to remap GSIs into pirqs.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/pci.h   |  5 +++++
 arch/x86/pci/xen.c               | 16 ++++++++++++++++
 drivers/xen/events.c             |  6 +++++-
 include/xen/interface/features.h |  3 +++
 4 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 449c82f71677..f89a42aff284 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -3,10 +3,15 @@
 
 #if defined(CONFIG_PCI_XEN)
 extern int __init pci_xen_init(void);
+extern int __init pci_xen_hvm_init(void);
 #define pci_xen 1
 #else
 #define pci_xen 0
 #define pci_xen_init (0)
+static inline int pci_xen_hvm_init(void)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 3a4ab0b4dcca..d5284c491aef 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -14,6 +14,7 @@
 
 #include <asm/xen/hypervisor.h>
 
+#include <xen/features.h>
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 
@@ -184,3 +185,18 @@ int __init pci_xen_init(void)
 #endif
 	return 0;
 }
+
+int __init pci_xen_hvm_init(void)
+{
+	if (!xen_feature(XENFEAT_hvm_pirqs))
+		return 0;
+
+#ifdef CONFIG_ACPI
+	/*
+	 * We don't want to change the actual ACPI delivery model,
+	 * just how GSIs get registered.
+	 */
+	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
+#endif
+	return 0;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 239b011ef56f..32269bcbd88c 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -623,7 +623,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
 
 	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
 	 * we are using the !xen_initial_domain() to drop in the function.*/
-	if (identity_mapped_irq(gsi) || !xen_initial_domain()) {
+	if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
+				xen_pv_domain())) {
 		irq = gsi;
 		irq_alloc_desc_at(irq, 0);
 	} else
@@ -1397,6 +1398,9 @@ void __init xen_init_IRQ(void)
 	if (xen_hvm_domain()) {
 		xen_callback_vector();
 		native_init_IRQ();
+		/* pci_xen_hvm_init must be called after native_init_IRQ so that
+		 * __acpi_register_gsi can point at the right function */
+		pci_xen_hvm_init();
 	} else {
 		irq_ctx_init(smp_processor_id());
 	}
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 70d2563ab166..b6ca39a069d8 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -47,6 +47,9 @@
 /* x86: pvclock algorithm is safe to use on HVM */
 #define XENFEAT_hvm_safe_pvclock           9
 
+/* x86: pirq can be used by HVM guests */
+#define XENFEAT_hvm_pirqs           10
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
-- 
cgit v1.2.3


From 809f9267bbaba7765cdb86a47f2e6e4bf4951b69 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 1 Jul 2010 17:10:39 +0100
Subject: xen: map MSIs into pirqs

Map MSIs into pirqs, writing 0 in the MSI vector data field and the pirq
number in the MSI destination id field.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c   | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/events.c | 22 ++++++++++++++++++++
 include/xen/events.h |  2 ++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index d5284c491aef..b5bd6420851e 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -64,10 +64,62 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
 
 #if defined(CONFIG_PCI_MSI)
 #include <linux/msi.h>
+#include <asm/msidef.h>
 
 struct xen_pci_frontend_ops *xen_pci_frontend;
 EXPORT_SYMBOL_GPL(xen_pci_frontend);
 
+static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
+		struct msi_msg *msg)
+{
+	/* We set vector == 0 to tell the hypervisor we don't care about it,
+	 * but we want a pirq setup instead.
+	 * We use the dest_id field to pass the pirq that we want. */
+	msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		MSI_ADDR_DEST_MODE_PHYSICAL |
+		MSI_ADDR_REDIRECTION_CPU |
+		MSI_ADDR_DEST_ID(pirq);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		/* delivery mode reserved */
+		(3 << 8) |
+		MSI_DATA_VECTOR(0);
+}
+
+static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, pirq, ret = 0;
+	struct msi_desc *msidesc;
+	struct msi_msg msg;
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
+				"msi-x" : "msi", &irq, &pirq);
+		if (irq < 0 || pirq < 0)
+			goto error;
+		printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
+		xen_msi_compose_msg(dev, pirq, &msg);
+		ret = set_irq_msi(irq, msidesc);
+		if (ret < 0)
+			goto error_while;
+		write_msi_msg(irq, &msg);
+	}
+	return 0;
+
+error_while:
+	unbind_from_irqhandler(irq, NULL);
+error:
+	if (ret == -ENODEV)
+		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+				" MSI/MSI-X support!\n");
+
+	return ret;
+}
+
 /*
  * For MSI interrupts we have to use drivers/xen/event.s functions to
  * allocate an irq_desc and setup the right */
@@ -198,5 +250,10 @@ int __init pci_xen_hvm_init(void)
 	 */
 	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
 #endif
+
+#ifdef CONFIG_PCI_MSI
+	x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
+	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+#endif
 	return 0;
 }
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 32269bcbd88c..efa683ee8840 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -656,6 +656,28 @@ out:
 	return irq;
 }
 
+void xen_allocate_pirq_msi(char *name, int *irq, int *pirq)
+{
+	spin_lock(&irq_mapping_update_lock);
+
+	*irq = find_unbound_irq();
+	if (*irq == -1)
+		goto out;
+
+	*pirq = find_unbound_pirq();
+	if (*pirq == -1)
+		goto out;
+
+	set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
+				      handle_level_irq, name);
+
+	irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
+	pirq_to_irq[*pirq] = *irq;
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
+}
+
 int xen_destroy_irq(int irq)
 {
 	struct irq_desc *desc;
diff --git a/include/xen/events.h b/include/xen/events.h
index deec8faace22..0c58db6ea3f4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -72,6 +72,8 @@ void xen_hvm_evtchn_do_upcall(void);
  * usual. */
 int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
 int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
+/* Allocate an irq and a pirq to be used with MSIs. */
+void xen_allocate_pirq_msi(char *name, int *irq, int *pirq);
 
 /* De-allocates the above mentioned physical interrupt. */
 int xen_destroy_irq(int irq);
-- 
cgit v1.2.3


From 38aa66fcb79e0a46c24bba96b6f2b851a6ec2037 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 2 Sep 2010 14:51:39 +0100
Subject: xen: remap GSIs as pirqs when running as initial domain

Implement xen_register_gsi to setup the correct triggering and polarity
properties of a gsi.
Implement xen_register_pirq to register a particular gsi as pirq and
receive interrupts as events.
Call xen_setup_pirqs to register all the legacy ISA irqs as pirqs.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/pci.h  |   7 +++
 arch/x86/pci/xen.c              | 135 ++++++++++++++++++++++++++++++++++++++++
 drivers/xen/events.c            |  13 ++++
 include/xen/interface/physdev.h |  10 +++
 4 files changed, 165 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index f89a42aff284..2329b3eaf8d3 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -13,6 +13,13 @@ static inline int pci_xen_hvm_init(void)
 	return -1;
 }
 #endif
+#if defined(CONFIG_XEN_DOM0)
+void __init xen_setup_pirqs(void);
+#else
+static inline void __init xen_setup_pirqs(void)
+{
+}
+#endif
 
 #if defined(CONFIG_PCI_MSI)
 #if defined(CONFIG_PCI_XEN)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index b5bd6420851e..dd0b5fdb27b9 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -257,3 +257,138 @@ int __init pci_xen_hvm_init(void)
 #endif
 	return 0;
 }
+
+#ifdef CONFIG_XEN_DOM0
+static int xen_register_pirq(u32 gsi, int triggering)
+{
+	int rc, irq;
+	struct physdev_map_pirq map_irq;
+	int shareable = 0;
+	char *name;
+
+	if (!xen_pv_domain())
+		return -1;
+
+	if (triggering == ACPI_EDGE_SENSITIVE) {
+		shareable = 0;
+		name = "ioapic-edge";
+	} else {
+		shareable = 1;
+		name = "ioapic-level";
+	}
+
+	irq = xen_allocate_pirq(gsi, shareable, name);
+
+	printk(KERN_DEBUG "xen: --> irq=%d\n", irq);
+
+	if (irq < 0)
+		goto out;
+
+	map_irq.domid = DOMID_SELF;
+	map_irq.type = MAP_PIRQ_TYPE_GSI;
+	map_irq.index = gsi;
+	map_irq.pirq = irq;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+	if (rc) {
+		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+		return -1;
+	}
+
+out:
+	return irq;
+}
+
+static int xen_register_gsi(u32 gsi, int triggering, int polarity)
+{
+	int rc, irq;
+	struct physdev_setup_gsi setup_gsi;
+
+	if (!xen_pv_domain())
+		return -1;
+
+	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
+			gsi, triggering, polarity);
+
+	irq = xen_register_pirq(gsi, triggering);
+
+	setup_gsi.gsi = gsi;
+	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
+	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+	if (rc == -EEXIST)
+		printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
+	else if (rc) {
+		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
+				gsi, rc);
+	}
+
+	return irq;
+}
+
+static __init void xen_setup_acpi_sci(void)
+{
+	int rc;
+	int trigger, polarity;
+	int gsi = acpi_sci_override_gsi;
+
+	if (!gsi)
+		return;
+
+	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+	if (rc) {
+		printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
+				" sci, rc=%d\n", rc);
+		return;
+	}
+	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+	
+	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
+			"polarity=%d\n", gsi, trigger, polarity);
+
+	gsi = xen_register_gsi(gsi, trigger, polarity);
+	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
+
+	return;
+}
+
+static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
+				 int trigger, int polarity)
+{
+	return xen_register_gsi(gsi, trigger, polarity);
+}
+
+static int __init pci_xen_initial_domain(void)
+{
+	xen_setup_acpi_sci();
+	__acpi_register_gsi = acpi_register_gsi_xen;
+
+	return 0;
+}
+
+void __init xen_setup_pirqs(void)
+{
+	int irq;
+
+	pci_xen_initial_domain();
+
+	if (0 == nr_ioapics) {
+		for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
+			xen_allocate_pirq(irq, 0, "xt-pic");
+		return;
+	}
+
+	/* Pre-allocate legacy irqs */
+	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
+		int trigger, polarity;
+
+		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
+			continue;
+
+		xen_register_pirq(irq,
+			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
+	}
+}
+#endif
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index efa683ee8840..c649ac0aaeef 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -681,6 +681,8 @@ out:
 int xen_destroy_irq(int irq)
 {
 	struct irq_desc *desc;
+	struct physdev_unmap_pirq unmap_irq;
+	struct irq_info *info = info_for_irq(irq);
 	int rc = -ENOENT;
 
 	spin_lock(&irq_mapping_update_lock);
@@ -689,6 +691,15 @@ int xen_destroy_irq(int irq)
 	if (!desc)
 		goto out;
 
+	if (xen_initial_domain()) {
+		unmap_irq.pirq = info->u.pirq.gsi;
+		unmap_irq.domid = DOMID_SELF;
+		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
+		if (rc) {
+			printk(KERN_WARNING "unmap irq failed %d\n", rc);
+			goto out;
+		}
+	}
 	irq_info[irq] = mk_unbound_info();
 
 	irq_free_desc(irq);
@@ -1425,5 +1436,7 @@ void __init xen_init_IRQ(void)
 		pci_xen_hvm_init();
 	} else {
 		irq_ctx_init(smp_processor_id());
+		if (xen_initial_domain())
+			xen_setup_pirqs();
 	}
 }
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 69a72b96a6cb..a85d76c2e360 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -151,6 +151,16 @@ struct physdev_op {
 	} u;
 };
 
+#define PHYSDEVOP_setup_gsi    21
+struct physdev_setup_gsi {
+    int gsi;
+    /* IN */
+    uint8_t triggering;
+    /* IN */
+    uint8_t polarity;
+    /* IN */
+};
+
 #define PHYSDEVOP_get_nr_pirqs    22
 struct physdev_nr_pirqs {
     /* OUT */
-- 
cgit v1.2.3


From f731e3ef02b4744f4d7ca2f63539b900e47db31f Mon Sep 17 00:00:00 2001
From: Qing He <qing.he@intel.com>
Date: Mon, 11 Oct 2010 15:30:09 +0100
Subject: xen: remap MSIs into pirqs when running as initial domain

Implement xen_create_msi_irq to create an msi and remap it as pirq.
Use xen_create_msi_irq to implement an initial domain specific version
of setup_msi_irqs.

Signed-off-by: Qing He <qing.he@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c   | 55 +++++++++++++++++++++++++++++++----------------
 drivers/xen/events.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/events.h |  4 ++++
 3 files changed, 101 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index dd0b5fdb27b9..b3f4b30222fa 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -135,14 +135,12 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	if (!v)
 		return -ENOMEM;
 
-	if (!xen_initial_domain()) {
-		if (type == PCI_CAP_ID_MSIX)
-			ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
-		else
-			ret = xen_pci_frontend_enable_msi(dev, &v);
-		if (ret)
-			goto error;
-	}
+	if (type == PCI_CAP_ID_MSIX)
+		ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+	else
+		ret = xen_pci_frontend_enable_msi(dev, &v);
+	if (ret)
+		goto error;
 	i = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = xen_allocate_pirq(v[i], 0, /* not sharable */
@@ -172,23 +170,40 @@ error:
 
 static void xen_teardown_msi_irqs(struct pci_dev *dev)
 {
-	/* Only do this when were are in non-privileged mode.*/
-	if (!xen_initial_domain()) {
-		struct msi_desc *msidesc;
-
-		msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-		if (msidesc->msi_attrib.is_msix)
-			xen_pci_frontend_disable_msix(dev);
-		else
-			xen_pci_frontend_disable_msi(dev);
-	}
+	struct msi_desc *msidesc;
 
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	if (msidesc->msi_attrib.is_msix)
+		xen_pci_frontend_disable_msix(dev);
+	else
+		xen_pci_frontend_disable_msi(dev);
 }
 
 static void xen_teardown_msi_irq(unsigned int irq)
 {
 	xen_destroy_irq(irq);
 }
+
+static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret;
+	struct msi_desc *msidesc;
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = xen_create_msi_irq(dev, msidesc, type);
+		if (irq < 0)
+			return -1;
+
+		ret = set_irq_msi(irq, msidesc);
+		if (ret)
+			goto error;
+	}
+	return 0;
+
+error:
+	xen_destroy_irq(irq);
+	return ret;
+}
 #endif
 
 static int xen_pcifront_enable_irq(struct pci_dev *dev)
@@ -362,6 +377,10 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
 
 static int __init pci_xen_initial_domain(void)
 {
+#ifdef CONFIG_PCI_MSI
+	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
+	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+#endif
 	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
 
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c649ac0aaeef..a7d9555e664d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -29,6 +29,7 @@
 #include <linux/bootmem.h>
 #include <linux/slab.h>
 #include <linux/irqnr.h>
+#include <linux/pci.h>
 
 #include <asm/desc.h>
 #include <asm/ptrace.h>
@@ -656,6 +657,10 @@ out:
 	return irq;
 }
 
+#ifdef CONFIG_PCI_MSI
+#include <linux/msi.h>
+#include "../pci/msi.h"
+
 void xen_allocate_pirq_msi(char *name, int *irq, int *pirq)
 {
 	spin_lock(&irq_mapping_update_lock);
@@ -678,6 +683,61 @@ out:
 	spin_unlock(&irq_mapping_update_lock);
 }
 
+int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+{
+	int irq = -1;
+	struct physdev_map_pirq map_irq;
+	int rc;
+	int pos;
+	u32 table_offset, bir;
+
+	memset(&map_irq, 0, sizeof(map_irq));
+	map_irq.domid = DOMID_SELF;
+	map_irq.type = MAP_PIRQ_TYPE_MSI;
+	map_irq.index = -1;
+	map_irq.pirq = -1;
+	map_irq.bus = dev->bus->number;
+	map_irq.devfn = dev->devfn;
+
+	if (type == PCI_CAP_ID_MSIX) {
+		pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+		pci_read_config_dword(dev, msix_table_offset_reg(pos),
+					&table_offset);
+		bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+		map_irq.table_base = pci_resource_start(dev, bir);
+		map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+	}
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = find_unbound_irq();
+
+	if (irq == -1)
+		goto out;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+	if (rc) {
+		printk(KERN_WARNING "xen map irq failed %d\n", rc);
+
+		irq_free_desc(irq);
+
+		irq = -1;
+		goto out;
+	}
+	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
+
+	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+			handle_level_irq,
+			(type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
+	return irq;
+}
+#endif
+
 int xen_destroy_irq(int irq)
 {
 	struct irq_desc *desc;
diff --git a/include/xen/events.h b/include/xen/events.h
index 0c58db6ea3f4..8fa27dc7358b 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -72,8 +72,12 @@ void xen_hvm_evtchn_do_upcall(void);
  * usual. */
 int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
 int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
+
+#ifdef CONFIG_PCI_MSI
 /* Allocate an irq and a pirq to be used with MSIs. */
 void xen_allocate_pirq_msi(char *name, int *irq, int *pirq);
+int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
+#endif
 
 /* De-allocates the above mentioned physical interrupt. */
 int xen_destroy_irq(int irq);
-- 
cgit v1.2.3


From 4fe7d5a708a955b35e3fdc4dea3e0b7a6ae2eb06 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 2 Sep 2010 16:17:06 +0100
Subject: xen: make hvc_xen console work for dom0.

Use the console hypercalls for dom0 console.

[ Impact: Add Xen dom0 console ]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/char/hvc_xen.c | 98 +++++++++++++++++++++++++++++++++-----------------
 drivers/xen/events.c   |  2 +-
 include/xen/events.h   |  1 +
 3 files changed, 67 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index 60446f82a3fc..1f7e13a43a88 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -78,7 +78,7 @@ static int __write_console(const char *data, int len)
 	return sent;
 }
 
-static int write_console(uint32_t vtermno, const char *data, int len)
+static int domU_write_console(uint32_t vtermno, const char *data, int len)
 {
 	int ret = len;
 
@@ -101,7 +101,7 @@ static int write_console(uint32_t vtermno, const char *data, int len)
 	return ret;
 }
 
-static int read_console(uint32_t vtermno, char *buf, int len)
+static int domU_read_console(uint32_t vtermno, char *buf, int len)
 {
 	struct xencons_interface *intf = xencons_interface();
 	XENCONS_RING_IDX cons, prod;
@@ -122,28 +122,62 @@ static int read_console(uint32_t vtermno, char *buf, int len)
 	return recv;
 }
 
-static const struct hv_ops hvc_ops = {
-	.get_chars = read_console,
-	.put_chars = write_console,
+static struct hv_ops domU_hvc_ops = {
+	.get_chars = domU_read_console,
+	.put_chars = domU_write_console,
 	.notifier_add = notifier_add_irq,
 	.notifier_del = notifier_del_irq,
 	.notifier_hangup = notifier_hangup_irq,
 };
 
-static int __init xen_init(void)
+static int dom0_read_console(uint32_t vtermno, char *buf, int len)
+{
+	return HYPERVISOR_console_io(CONSOLEIO_read, len, buf);
+}
+
+/*
+ * Either for a dom0 to write to the system console, or a domU with a
+ * debug version of Xen
+ */
+static int dom0_write_console(uint32_t vtermno, const char *str, int len)
+{
+	int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
+	if (rc < 0)
+		return 0;
+
+	return len;
+}
+
+static struct hv_ops dom0_hvc_ops = {
+	.get_chars = dom0_read_console,
+	.put_chars = dom0_write_console,
+	.notifier_add = notifier_add_irq,
+	.notifier_del = notifier_del_irq,
+	.notifier_hangup = notifier_hangup_irq,
+};
+
+static int __init xen_hvc_init(void)
 {
 	struct hvc_struct *hp;
+	struct hv_ops *ops;
 
-	if (!xen_pv_domain() ||
-	    xen_initial_domain() ||
-	    !xen_start_info->console.domU.evtchn)
+	if (!xen_pv_domain())
 		return -ENODEV;
 
-	xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
+	if (xen_initial_domain()) {
+		ops = &dom0_hvc_ops;
+		xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
+	} else {
+		if (!xen_start_info->console.domU.evtchn)
+			return -ENODEV;
+
+		ops = &domU_hvc_ops;
+		xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
+	}
 	if (xencons_irq < 0)
 		xencons_irq = 0; /* NO_IRQ */
 
-	hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
+	hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 
@@ -160,7 +194,7 @@ void xen_console_resume(void)
 		rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
 }
 
-static void __exit xen_fini(void)
+static void __exit xen_hvc_fini(void)
 {
 	if (hvc)
 		hvc_remove(hvc);
@@ -168,29 +202,24 @@ static void __exit xen_fini(void)
 
 static int xen_cons_init(void)
 {
+	struct hv_ops *ops;
+
 	if (!xen_pv_domain())
 		return 0;
 
-	hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
+	if (xen_initial_domain())
+		ops = &dom0_hvc_ops;
+	else
+		ops = &domU_hvc_ops;
+
+	hvc_instantiate(HVC_COOKIE, 0, ops);
 	return 0;
 }
 
-module_init(xen_init);
-module_exit(xen_fini);
+module_init(xen_hvc_init);
+module_exit(xen_hvc_fini);
 console_initcall(xen_cons_init);
 
-static void raw_console_write(const char *str, int len)
-{
-	while(len > 0) {
-		int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
-		if (rc <= 0)
-			break;
-
-		str += rc;
-		len -= rc;
-	}
-}
-
 #ifdef CONFIG_EARLY_PRINTK
 static void xenboot_write_console(struct console *console, const char *string,
 				  unsigned len)
@@ -198,19 +227,22 @@ static void xenboot_write_console(struct console *console, const char *string,
 	unsigned int linelen, off = 0;
 	const char *pos;
 
-	raw_console_write(string, len);
+	dom0_write_console(0, string, len);
+
+	if (xen_initial_domain())
+		return;
 
-	write_console(0, "(early) ", 8);
+	domU_write_console(0, "(early) ", 8);
 	while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
 		linelen = pos-string+off;
 		if (off + linelen > len)
 			break;
-		write_console(0, string+off, linelen);
-		write_console(0, "\r\n", 2);
+		domU_write_console(0, string+off, linelen);
+		domU_write_console(0, "\r\n", 2);
 		off += linelen + 1;
 	}
 	if (off < len)
-		write_console(0, string+off, len-off);
+		domU_write_console(0, string+off, len-off);
 }
 
 struct console xenboot_console = {
@@ -222,7 +254,7 @@ struct console xenboot_console = {
 
 void xen_raw_console_write(const char *str)
 {
-	raw_console_write(str, strlen(str));
+	dom0_write_console(0, str, strlen(str));
 }
 
 void xen_raw_printk(const char *fmt, ...)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a7d9555e664d..93e98ffe71ae 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -839,7 +839,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 }
 
 
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
 	int evtchn, irq;
diff --git a/include/xen/events.h b/include/xen/events.h
index 8fa27dc7358b..646dd17d3aa4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -12,6 +12,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
 			      irq_handler_t handler,
 			      unsigned long irqflags, const char *devname,
 			      void *dev_id);
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu);
 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
 			    irq_handler_t handler,
 			    unsigned long irqflags, const char *devname,
-- 
cgit v1.2.3


From f7030bbc446430ecd12c9ad02cf0ea94934e5f91 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 11 Oct 2010 10:20:14 -0500
Subject: kdb: Allow kernel loadable modules to add kdb shell functions

In order to allow kernel modules to dynamically add a command to the
kdb shell the kdb_register, kdb_register_repeat, kdb_unregister, and
kdb_printf need to be exported as GPL symbols.

Any kernel module that adds a dynamic kdb shell function should only
need to include linux/kdb.h.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h            | 43 ++++++++++++++++++++++++++++++++++++++++++
 kernel/debug/kdb/kdb_io.c      |  2 +-
 kernel/debug/kdb/kdb_main.c    |  4 ++++
 kernel/debug/kdb/kdb_private.h | 39 --------------------------------------
 4 files changed, 48 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index ea6e5244ed3f..deda197ced62 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -28,6 +28,41 @@ extern int kdb_poll_idx;
 extern int kdb_initial_cpu;
 extern atomic_t kdb_event;
 
+/* Types and messages used for dynamically added kdb shell commands */
+
+#define KDB_MAXARGS    16 /* Maximum number of arguments to a function  */
+
+typedef enum {
+	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
+	KDB_REPEAT_NO_ARGS,	/* Repeat the command without arguments */
+	KDB_REPEAT_WITH_ARGS,	/* Repeat the command including its arguments */
+} kdb_repeat_t;
+
+typedef int (*kdb_func_t)(int, const char **);
+
+/* KDB return codes from a command or internal kdb function */
+#define KDB_NOTFOUND	(-1)
+#define KDB_ARGCOUNT	(-2)
+#define KDB_BADWIDTH	(-3)
+#define KDB_BADRADIX	(-4)
+#define KDB_NOTENV	(-5)
+#define KDB_NOENVVALUE	(-6)
+#define KDB_NOTIMP	(-7)
+#define KDB_ENVFULL	(-8)
+#define KDB_ENVBUFFULL	(-9)
+#define KDB_TOOMANYBPT	(-10)
+#define KDB_TOOMANYDBREGS (-11)
+#define KDB_DUPBPT	(-12)
+#define KDB_BPTNOTFOUND	(-13)
+#define KDB_BADMODE	(-14)
+#define KDB_BADINT	(-15)
+#define KDB_INVADDRFMT  (-16)
+#define KDB_BADREG      (-17)
+#define KDB_BADCPUNUM   (-18)
+#define KDB_BADLENGTH	(-19)
+#define KDB_NOBP	(-20)
+#define KDB_BADADDR	(-21)
+
 /*
  * kdb_diemsg
  *
@@ -105,9 +140,17 @@ int kdb_process_cpu(const struct task_struct *p)
 /* kdb access to register set for stack dumping */
 extern struct pt_regs *kdb_current_regs;
 
+/* Dynamic kdb shell command registration */
+extern int kdb_register(char *, kdb_func_t, char *, char *, short);
+extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
+			       short, kdb_repeat_t);
+extern int kdb_unregister(char *);
 #else /* ! CONFIG_KGDB_KDB */
 #define kdb_printf(...)
 #define kdb_init(x)
+#define kdb_register(...)
+#define kdb_register_repeat(...)
+#define kdb_uregister(x)
 #endif	/* CONFIG_KGDB_KDB */
 enum {
 	KDB_NOT_INITIALIZED,
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index c9b7f4f90bba..96fdaac46a80 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -823,4 +823,4 @@ int kdb_printf(const char *fmt, ...)
 
 	return r;
 }
-
+EXPORT_SYMBOL_GPL(kdb_printf);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index caf057a3de0e..5448990a299e 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2783,6 +2783,8 @@ int kdb_register_repeat(char *cmd,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kdb_register_repeat);
+
 
 /*
  * kdb_register - Compatibility register function for commands that do
@@ -2805,6 +2807,7 @@ int kdb_register(char *cmd,
 	return kdb_register_repeat(cmd, func, usage, help, minlen,
 				   KDB_REPEAT_NONE);
 }
+EXPORT_SYMBOL_GPL(kdb_register);
 
 /*
  * kdb_unregister - This function is used to unregister a kernel
@@ -2833,6 +2836,7 @@ int kdb_unregister(char *cmd)
 	/* Couldn't find it.  */
 	return 1;
 }
+EXPORT_SYMBOL_GPL(kdb_unregister);
 
 /* Initialize the kdb command table. */
 static void __init kdb_inittab(void)
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index be775f7e81e0..1921e6e4c0bc 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -15,29 +15,6 @@
 #include <linux/kgdb.h>
 #include "../debug_core.h"
 
-/* Kernel Debugger Error codes.  Must not overlap with command codes. */
-#define KDB_NOTFOUND	(-1)
-#define KDB_ARGCOUNT	(-2)
-#define KDB_BADWIDTH	(-3)
-#define KDB_BADRADIX	(-4)
-#define KDB_NOTENV	(-5)
-#define KDB_NOENVVALUE	(-6)
-#define KDB_NOTIMP	(-7)
-#define KDB_ENVFULL	(-8)
-#define KDB_ENVBUFFULL	(-9)
-#define KDB_TOOMANYBPT	(-10)
-#define KDB_TOOMANYDBREGS (-11)
-#define KDB_DUPBPT	(-12)
-#define KDB_BPTNOTFOUND	(-13)
-#define KDB_BADMODE	(-14)
-#define KDB_BADINT	(-15)
-#define KDB_INVADDRFMT  (-16)
-#define KDB_BADREG      (-17)
-#define KDB_BADCPUNUM   (-18)
-#define KDB_BADLENGTH	(-19)
-#define KDB_NOBP	(-20)
-#define KDB_BADADDR	(-21)
-
 /* Kernel Debugger Command codes.  Must not overlap with error codes. */
 #define KDB_CMD_GO	(-1001)
 #define KDB_CMD_CPU	(-1002)
@@ -93,17 +70,6 @@
  */
 #define KDB_MAXBPT	16
 
-/* Maximum number of arguments to a function  */
-#define KDB_MAXARGS    16
-
-typedef enum {
-	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
-	KDB_REPEAT_NO_ARGS,	/* Repeat the command without arguments */
-	KDB_REPEAT_WITH_ARGS,	/* Repeat the command including its arguments */
-} kdb_repeat_t;
-
-typedef int (*kdb_func_t)(int, const char **);
-
 /* Symbol table format returned by kallsyms. */
 typedef struct __ksymtab {
 		unsigned long value;	/* Address of symbol */
@@ -123,11 +89,6 @@ extern int kallsyms_symbol_next(char *prefix_name, int flag);
 extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
 
 /* Exported Symbols for kernel loadable modules to use. */
-extern int kdb_register(char *, kdb_func_t, char *, char *, short);
-extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
-			       short, kdb_repeat_t);
-extern int kdb_unregister(char *);
-
 extern int kdb_getarea_size(void *, unsigned long, size_t);
 extern int kdb_putarea_size(unsigned long, void *, size_t);
 
-- 
cgit v1.2.3


From 91b152aa85bbcf076e269565394c31964f940371 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 23 Aug 2010 09:20:14 -0500
Subject: kdb,kgdb: fix sparse fixups

Fix the following sparse warnings:

kdb_main.c:328:5: warning: symbol 'kdbgetu64arg' was not declared. Should it be static?
kgdboc.c:246:12: warning: symbol 'kgdboc_early_init' was not declared. Should it be static?
kgdb.c:652:26: warning: incorrect type in argument 1 (different address spaces)
kgdb.c:652:26:    expected void const *ptr
kgdb.c:652:26:    got struct perf_event *[noderef] <asn:3>*pev

The one in kgdb.c required the (void * __force) because of the return
code from register_wide_hw_breakpoint looking like:

        return (void __percpu __force *)ERR_PTR(err);

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c         | 2 +-
 drivers/serial/kgdboc.c        | 2 +-
 include/linux/kdb.h            | 8 ++++++++
 kernel/debug/kdb/kdb_private.h | 9 +--------
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 497f97386412..101bf22cf164 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -649,7 +649,7 @@ void kgdb_arch_late(void)
 		if (breakinfo[i].pev)
 			continue;
 		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
-		if (IS_ERR(breakinfo[i].pev)) {
+		if (IS_ERR((void * __force)breakinfo[i].pev)) {
 			printk(KERN_ERR "kgdb: Could not allocate hw"
 			       "breakpoints\nDisabling the kernel debugger\n");
 			breakinfo[i].pev = NULL;
diff --git a/drivers/serial/kgdboc.c b/drivers/serial/kgdboc.c
index 39f9a1adaa75..d4b711c9a416 100644
--- a/drivers/serial/kgdboc.c
+++ b/drivers/serial/kgdboc.c
@@ -243,7 +243,7 @@ static struct kgdb_io kgdboc_io_ops = {
 
 #ifdef CONFIG_KGDB_SERIAL_CONSOLE
 /* This is only available if kgdboc is a built in for early debugging */
-int __init kgdboc_early_init(char *opt)
+static int __init kgdboc_early_init(char *opt)
 {
 	/* save the first character of the config string because the
 	 * init routine can destroy it.
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index deda197ced62..aadff7cc2b84 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -139,6 +139,14 @@ int kdb_process_cpu(const struct task_struct *p)
 
 /* kdb access to register set for stack dumping */
 extern struct pt_regs *kdb_current_regs;
+#ifdef CONFIG_KALLSYMS
+extern const char *kdb_walk_kallsyms(loff_t *pos);
+#else /* ! CONFIG_KALLSYMS */
+static inline const char *kdb_walk_kallsyms(loff_t *pos)
+{
+	return NULL;
+}
+#endif /* ! CONFIG_KALLSYMS */
 
 /* Dynamic kdb shell command registration */
 extern int kdb_register(char *, kdb_func_t, char *, char *, short);
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 1921e6e4c0bc..35d69ed1dfb5 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -105,6 +105,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t);
 extern int kdb_putword(unsigned long, unsigned long, size_t);
 
 extern int kdbgetularg(const char *, unsigned long *);
+extern int kdbgetu64arg(const char *, u64 *);
 extern char *kdbgetenv(const char *);
 extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
 			 long *, char **);
@@ -216,14 +217,6 @@ extern void kdb_ps1(const struct task_struct *p);
 extern void kdb_print_nameval(const char *name, unsigned long val);
 extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
 extern void kdb_meminfo_proc_show(void);
-#ifdef CONFIG_KALLSYMS
-extern const char *kdb_walk_kallsyms(loff_t *pos);
-#else /* ! CONFIG_KALLSYMS */
-static inline const char *kdb_walk_kallsyms(loff_t *pos)
-{
-	return NULL;
-}
-#endif /* ! CONFIG_KALLSYMS */
 extern char *kdb_getstr(char *, size_t, char *);
 
 /* Defines for kdb_symbol_print */
-- 
cgit v1.2.3


From 4ae871088a9ddead041c9e91e01435e4ed5dda08 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Sun, 17 Oct 2010 16:16:13 -0300
Subject: [media] Anysee remote controller

Anysee remote controller keytable. Uses NEC address 0x08.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile    |  1 +
 drivers/media/IR/keymaps/rc-anysee.c | 93 ++++++++++++++++++++++++++++++++++++
 include/media/rc-map.h               |  1 +
 3 files changed, 95 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-anysee.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 6083453f4715..0b88b6563b66 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-alink-dtu-m.o \
+			rc-anysee.o \
 			rc-apac-viewcomp.o \
 			rc-asus-pc39.o \
 			rc-ati-tv-wonder-hd-600.o \
diff --git a/drivers/media/IR/keymaps/rc-anysee.c b/drivers/media/IR/keymaps/rc-anysee.c
new file mode 100644
index 000000000000..30d70498cfed
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-anysee.c
@@ -0,0 +1,93 @@
+/*
+ * Anysee remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode anysee[] = {
+	{ 0x0800, KEY_0 },
+	{ 0x0801, KEY_1 },
+	{ 0x0802, KEY_2 },
+	{ 0x0803, KEY_3 },
+	{ 0x0804, KEY_4 },
+	{ 0x0805, KEY_5 },
+	{ 0x0806, KEY_6 },
+	{ 0x0807, KEY_7 },
+	{ 0x0808, KEY_8 },
+	{ 0x0809, KEY_9 },
+	{ 0x080a, KEY_POWER2 },          /* [red power button] */
+	{ 0x080b, KEY_VIDEO },           /* [*] MODE */
+	{ 0x080c, KEY_CHANNEL },         /* [symbol counterclockwise arrow] */
+	{ 0x080d, KEY_NEXT },            /* [>>|] */
+	{ 0x080e, KEY_MENU },            /* MENU */
+	{ 0x080f, KEY_EPG },             /* [EPG] */
+	{ 0x0810, KEY_CLEAR },           /* EXIT */
+	{ 0x0811, KEY_CHANNELUP },
+	{ 0x0812, KEY_VOLUMEDOWN },
+	{ 0x0813, KEY_VOLUMEUP },
+	{ 0x0814, KEY_CHANNELDOWN },
+	{ 0x0815, KEY_OK },
+	{ 0x0816, KEY_RADIO },           /* [symbol TV/radio] */
+	{ 0x0817, KEY_INFO },            /* [i] */
+	{ 0x0818, KEY_PREVIOUS },        /* [|<<] */
+	{ 0x0819, KEY_FAVORITES },       /* FAV. */
+	{ 0x081a, KEY_SUBTITLE },        /* Subtitle */
+	{ 0x081b, KEY_CAMERA },          /* [symbol camera] */
+	{ 0x081c, KEY_YELLOW },
+	{ 0x081d, KEY_RED },
+	{ 0x081e, KEY_LANGUAGE },        /* [symbol Second Audio Program] */
+	{ 0x081f, KEY_GREEN },
+	{ 0x0820, KEY_SLEEP },           /* Sleep */
+	{ 0x0821, KEY_SCREEN },          /* 16:9 / 4:3 */
+	{ 0x0822, KEY_ZOOM },            /* SIZE */
+	{ 0x0824, KEY_FN },              /* [F1] */
+	{ 0x0825, KEY_FN },              /* [F2] */
+	{ 0x0842, KEY_MUTE },            /* symbol mute */
+	{ 0x0844, KEY_BLUE },
+	{ 0x0847, KEY_TEXT },            /* TEXT */
+	{ 0x0848, KEY_STOP },
+	{ 0x0849, KEY_RECORD },
+	{ 0x0850, KEY_PLAY },
+	{ 0x0851, KEY_PAUSE },
+};
+
+static struct rc_keymap anysee_map = {
+	.map = {
+		.scan    = anysee,
+		.size    = ARRAY_SIZE(anysee),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_ANYSEE,
+	}
+};
+
+static int __init init_rc_map_anysee(void)
+{
+	return ir_register_map(&anysee_map);
+}
+
+static void __exit exit_rc_map_anysee(void)
+{
+	ir_unregister_map(&anysee_map);
+}
+
+module_init(init_rc_map_anysee)
+module_exit(exit_rc_map_anysee)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 74a00a93dcf1..2241655570c1 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -56,6 +56,7 @@ void rc_map_init(void);
 
 #define RC_MAP_ADSTECH_DVB_T_PCI         "rc-adstech-dvb-t-pci"
 #define RC_MAP_ALINK_DTU_M               "rc-alink-dtu-m"
+#define RC_MAP_ANYSEE                    "rc-anysee"
 #define RC_MAP_APAC_VIEWCOMP             "rc-apac-viewcomp"
 #define RC_MAP_ASUS_PC39                 "rc-asus-pc39"
 #define RC_MAP_ATI_TV_WONDER_HD_600      "rc-ati-tv-wonder-hd-600"
-- 
cgit v1.2.3


From 2334e7902e4ff3d61b5dd808858f2ac3e276a551 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 24 Sep 2010 08:39:19 -0300
Subject: [media] sh_vou: Don't use module names to load I2C modules

With the v4l2_i2c_new_subdev* functions now supporting loading modules
based on modaliases, remove the module names hardcoded in platform data
and pass a NULL module name to those functions.

All corresponding I2C modules have been checked, and all of them include
a module aliases table with names corresponding to what the sh_vou
platform data uses.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/mach-ecovec24/setup.c | 1 -
 arch/sh/boards/mach-se/7724/setup.c  | 1 -
 drivers/media/video/sh_vou.c         | 2 +-
 include/media/sh_vou.h               | 1 -
 4 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 1d7b495a7db4..4a9fa5d38540 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -793,7 +793,6 @@ static struct sh_vou_pdata sh_vou_pdata = {
 	.flags		= SH_VOU_HSYNC_LOW | SH_VOU_VSYNC_LOW,
 	.board_info	= &ak8813,
 	.i2c_adap	= 0,
-	.module_name	= "ak881x",
 };
 
 static struct resource sh_vou_resources[] = {
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 552ebd9ba82b..8cc1d7295d85 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -550,7 +550,6 @@ static struct sh_vou_pdata sh_vou_pdata = {
 	.flags		= SH_VOU_HSYNC_LOW | SH_VOU_VSYNC_LOW,
 	.board_info	= &ak8813,
 	.i2c_adap	= 0,
-	.module_name	= "ak881x",
 };
 
 static struct resource sh_vou_resources[] = {
diff --git a/drivers/media/video/sh_vou.c b/drivers/media/video/sh_vou.c
index 195ac8651160..0f4906136b8f 100644
--- a/drivers/media/video/sh_vou.c
+++ b/drivers/media/video/sh_vou.c
@@ -1406,7 +1406,7 @@ static int __devinit sh_vou_probe(struct platform_device *pdev)
 		goto ereset;
 
 	subdev = v4l2_i2c_new_subdev_board(&vou_dev->v4l2_dev, i2c_adap,
-			vou_pdata->module_name, vou_pdata->board_info, NULL);
+			NULL, vou_pdata->board_info, NULL);
 	if (!subdev) {
 		ret = -ENOMEM;
 		goto ei2cnd;
diff --git a/include/media/sh_vou.h b/include/media/sh_vou.h
index a3ef30242b00..ec3ba9a597a2 100644
--- a/include/media/sh_vou.h
+++ b/include/media/sh_vou.h
@@ -28,7 +28,6 @@ struct sh_vou_pdata {
 	int i2c_adap;
 	struct i2c_board_info *board_info;
 	unsigned long flags;
-	char *module_name;
 };
 
 #endif
-- 
cgit v1.2.3


From 9566a7a851eb7201e3207eab53ee81efd0850fee Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Tue, 10 Aug 2010 00:58:41 +0900
Subject: nilfs2: accept future revisions

Compatibility of nilfs partitions is now managed with three feature
sets.  This changes old compatibility check with revision number so
that it can accept future revisions.

Note that we can stop support of experimental versions of nilfs that
doesn't know the feature sets by incrementing NILFS_CURRENT_REV.  We
don't have to do it soon, but it would be a possible option whenever
the need arises.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/the_nilfs.c     | 4 ++--
 include/linux/nilfs2_fs.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ba7c10c917fc..461b7211e14f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -468,8 +468,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
 static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
 				   struct nilfs_super_block *sbp)
 {
-	if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) {
-		printk(KERN_ERR "NILFS: revision mismatch "
+	if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
+		printk(KERN_ERR "NILFS: unsupported revision "
 		       "(superblock rev.=%d.%d, current rev.=%d.%d). "
 		       "Please check the version of mkfs.nilfs.\n",
 		       le32_to_cpu(sbp->s_rev_level),
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index f5487b6f91ed..b07f5cdff5e2 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -229,6 +229,7 @@ struct nilfs_super_block {
  */
 #define NILFS_CURRENT_REV	2	/* current major revision */
 #define NILFS_MINOR_REV		0	/* minor revision */
+#define NILFS_MIN_SUPP_REV	2	/* minimum supported revision */
 
 /*
  * Feature set definitions
-- 
cgit v1.2.3


From 6c43f41000312fefa482c3bfdd97e7f81d6be0ec Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 20 Aug 2010 20:10:38 +0900
Subject: nilfs2: keep zero value in i_cno except for gc-inodes

On-memory inode structures of nilfs have a member "i_cno" which stores
a checkpoint number related to the inode.  For gc-inodes, this field
indicates version of data each gc-inode caches for GC.  Log writer
temporarily uses "i_cno" to transfer the latest checkpoint number.

This stops the latter use and lets only gc-inodes use it.

The purpose of this patch is to allow the successive change use
"i_cno" for inode lookup.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segment.c       | 29 ++++++++++++++---------------
 fs/nilfs2/segment.h       |  3 ++-
 include/linux/nilfs2_fs.h |  8 ++++++++
 3 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9fd051a33c4f..eee4b223c293 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -366,8 +366,7 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
 
 	if (nilfs_doing_gc())
 		flags = NILFS_SS_GC;
-	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime,
-				 sci->sc_sbi->s_nilfs->ns_cno);
+	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
 	if (unlikely(err))
 		return err;
 
@@ -440,17 +439,26 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
 	struct nilfs_finfo *finfo;
 	struct nilfs_inode_info *ii;
 	struct nilfs_segment_buffer *segbuf;
+	__u64 cno;
 
 	if (sci->sc_blk_cnt == 0)
 		return;
 
 	ii = NILFS_I(inode);
+
+	if (test_bit(NILFS_I_GCINODE, &ii->i_state))
+		cno = ii->i_cno;
+	else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
+		cno = 0;
+	else
+		cno = sci->sc_cno;
+
 	finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
 						 sizeof(*finfo));
 	finfo->fi_ino = cpu_to_le64(inode->i_ino);
 	finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
 	finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
-	finfo->fi_cno = cpu_to_le64(ii->i_cno);
+	finfo->fi_cno = cpu_to_le64(cno);
 
 	segbuf = sci->sc_curseg;
 	segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
@@ -1976,7 +1984,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
 					struct nilfs_sb_info *sbi)
 {
 	struct nilfs_inode_info *ii, *n;
-	__u64 cno = sbi->s_nilfs->ns_cno;
 
 	spin_lock(&sbi->s_inode_lock);
  retry:
@@ -2002,7 +2009,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
 				brelse(ibh);
 			goto retry;
 		}
-		ii->i_cno = cno;
 
 		clear_bit(NILFS_I_QUEUED, &ii->i_state);
 		set_bit(NILFS_I_BUSY, &ii->i_state);
@@ -2011,8 +2017,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
 	}
 	spin_unlock(&sbi->s_inode_lock);
 
-	NILFS_I(sbi->s_ifile)->i_cno = cno;
-
 	return 0;
 }
 
@@ -2021,19 +2025,13 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
 {
 	struct nilfs_transaction_info *ti = current->journal_info;
 	struct nilfs_inode_info *ii, *n;
-	__u64 cno = sbi->s_nilfs->ns_cno;
 
 	spin_lock(&sbi->s_inode_lock);
 	list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
 		if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
-		    test_bit(NILFS_I_DIRTY, &ii->i_state)) {
-			/* The current checkpoint number (=nilfs->ns_cno) is
-			   changed between check-in and check-out only if the
-			   super root is written out.  So, we can update i_cno
-			   for the inodes that remain in the dirty list. */
-			ii->i_cno = cno;
+		    test_bit(NILFS_I_DIRTY, &ii->i_state))
 			continue;
-		}
+
 		clear_bit(NILFS_I_BUSY, &ii->i_state);
 		brelse(ii->i_bh);
 		ii->i_bh = NULL;
@@ -2054,6 +2052,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 	int err;
 
 	sci->sc_stage.scnt = NILFS_ST_INIT;
+	sci->sc_cno = nilfs->ns_cno;
 
 	err = nilfs_segctor_check_in_files(sci, sbi);
 	if (unlikely(err))
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 17c487bd8152..675d932148a4 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -107,6 +107,7 @@ struct nilfs_segsum_pointer {
  * @sc_datablk_cnt: Data block count of a file
  * @sc_nblk_this_inc: Number of blocks included in the current logical segment
  * @sc_seg_ctime: Creation time
+ * @sc_cno: checkpoint number of current log
  * @sc_flags: Internal flags
  * @sc_state_lock: spinlock for sc_state and so on
  * @sc_state: Segctord state flags
@@ -156,7 +157,7 @@ struct nilfs_sc_info {
 	unsigned long		sc_datablk_cnt;
 	unsigned long		sc_nblk_this_inc;
 	time_t			sc_seg_ctime;
-
+	__u64			sc_cno;
 	unsigned long		sc_flags;
 
 	spinlock_t		sc_state_lock;
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index b07f5cdff5e2..bcdb34c68d08 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -270,6 +270,14 @@ struct nilfs_super_block {
 #define NILFS_MIN_NRSVSEGS	8	/* Minimum number of reserved
 					   segments */
 
+/*
+ * We call DAT, cpfile, and sufile root metadata files.  Inodes of
+ * these files are written in super root block instead of ifile, and
+ * garbage collector doesn't keep any past versions of these files.
+ */
+#define NILFS_ROOT_METADATA_FILE(ino) \
+	((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO)
+
 /*
  * bytes offset of secondary super block
  */
-- 
cgit v1.2.3


From 8e656fd518784b49453f60c5f78b78703bc85cb2 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 27 Aug 2010 00:23:02 +0900
Subject: nilfs2: make snapshots in checkpoint tree exportable

The previous export operations cannot handle multiple versions of
a filesystem if they belong to the same sb instance.

This adds a new type of file handle and extends export operations so
that they can get the inode specified by a checkpoint number as well
as an inode number and a generation number.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/export.h       |  17 ++++++
 fs/nilfs2/namei.c        | 137 +++++++++++++++++++++++++++++++++++++++++------
 fs/nilfs2/nilfs.h        |   3 --
 fs/nilfs2/super.c        |  52 +-----------------
 include/linux/exportfs.h |  13 +++++
 5 files changed, 151 insertions(+), 71 deletions(-)
 create mode 100644 fs/nilfs2/export.h

(limited to 'include')

diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h
new file mode 100644
index 000000000000..a71cc412b651
--- /dev/null
+++ b/fs/nilfs2/export.h
@@ -0,0 +1,17 @@
+#ifndef NILFS_EXPORT_H
+#define NILFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations nilfs_export_ops;
+
+struct nilfs_fid {
+	u64 cno;
+	u64 ino;
+	u32 gen;
+
+	u32 parent_gen;
+	u64 parent_ino;
+} __attribute__ ((packed));
+
+#endif
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1110d56a23f5..a65f46560fbe 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -40,7 +40,11 @@
 
 #include <linux/pagemap.h>
 #include "nilfs.h"
+#include "export.h"
 
+#define NILFS_FID_SIZE_NON_CONNECTABLE \
+	(offsetof(struct nilfs_fid, parent_gen) / 4)
+#define NILFS_FID_SIZE_CONNECTABLE	(sizeof(struct nilfs_fid) / 4)
 
 static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 {
@@ -77,23 +81,6 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	return d_splice_alias(inode, dentry);
 }
 
-struct dentry *nilfs_get_parent(struct dentry *child)
-{
-	unsigned long ino;
-	struct inode *inode;
-	struct qstr dotdot = {.name = "..", .len = 2};
-
-	ino = nilfs_inode_by_name(child->d_inode, &dotdot);
-	if (!ino)
-		return ERR_PTR(-ENOENT);
-
-	inode = nilfs_iget(child->d_inode->i_sb,
-			   NILFS_I(child->d_inode)->i_root, ino);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-	return d_obtain_alias(inode);
-}
-
 /*
  * By the time this is called, we already have created
  * the directory cache entry for the new file, but it
@@ -469,6 +456,115 @@ out:
 	return err;
 }
 
+/*
+ * Export operations
+ */
+static struct dentry *nilfs_get_parent(struct dentry *child)
+{
+	unsigned long ino;
+	struct inode *inode;
+	struct qstr dotdot = {.name = "..", .len = 2};
+	struct nilfs_root *root;
+
+	ino = nilfs_inode_by_name(child->d_inode, &dotdot);
+	if (!ino)
+		return ERR_PTR(-ENOENT);
+
+	root = NILFS_I(child->d_inode)->i_root;
+
+	inode = nilfs_iget(child->d_inode->i_sb, root, ino);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	return d_obtain_alias(inode);
+}
+
+static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
+				       u64 ino, u32 gen)
+{
+	struct nilfs_root *root;
+	struct inode *inode;
+
+	if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO)
+		return ERR_PTR(-ESTALE);
+
+	root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno);
+	if (!root)
+		return ERR_PTR(-ESTALE);
+
+	inode = nilfs_iget(sb, root, ino);
+	nilfs_put_root(root);
+
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (gen && inode->i_generation != gen) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	return d_obtain_alias(inode);
+}
+
+static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct nilfs_fid *fid = (struct nilfs_fid *)fh;
+
+	if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE &&
+	     fh_len != NILFS_FID_SIZE_CONNECTABLE) ||
+	    (fh_type != FILEID_NILFS_WITH_PARENT &&
+	     fh_type != FILEID_NILFS_WITHOUT_PARENT))
+		return NULL;
+
+	return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen);
+}
+
+static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh,
+					 int fh_len, int fh_type)
+{
+	struct nilfs_fid *fid = (struct nilfs_fid *)fh;
+
+	if (fh_len != NILFS_FID_SIZE_CONNECTABLE ||
+	    fh_type != FILEID_NILFS_WITH_PARENT)
+		return NULL;
+
+	return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen);
+}
+
+static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp,
+			   int connectable)
+{
+	struct nilfs_fid *fid = (struct nilfs_fid *)fh;
+	struct inode *inode = dentry->d_inode;
+	struct nilfs_root *root = NILFS_I(inode)->i_root;
+	int type;
+
+	if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE ||
+	    (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE))
+		return 255;
+
+	fid->cno = root->cno;
+	fid->ino = inode->i_ino;
+	fid->gen = inode->i_generation;
+
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		struct inode *parent;
+
+		spin_lock(&dentry->d_lock);
+		parent = dentry->d_parent->d_inode;
+		fid->parent_ino = parent->i_ino;
+		fid->parent_gen = parent->i_generation;
+		spin_unlock(&dentry->d_lock);
+
+		type = FILEID_NILFS_WITH_PARENT;
+		*lenp = NILFS_FID_SIZE_CONNECTABLE;
+	} else {
+		type = FILEID_NILFS_WITHOUT_PARENT;
+		*lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
+	}
+
+	return type;
+}
+
 const struct inode_operations nilfs_dir_inode_operations = {
 	.create		= nilfs_create,
 	.lookup		= nilfs_lookup,
@@ -493,3 +589,10 @@ const struct inode_operations nilfs_symlink_inode_operations = {
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
 };
+
+const struct export_operations nilfs_export_ops = {
+	.encode_fh = nilfs_encode_fh,
+	.fh_to_dentry = nilfs_fh_to_dentry,
+	.fh_to_parent = nilfs_fh_to_parent,
+	.get_parent = nilfs_get_parent,
+};
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 21d90c4b4e2d..aa7940f7ecf1 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -264,9 +264,6 @@ extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *,
 extern int nilfs_mark_inode_dirty(struct inode *);
 extern void nilfs_dirty_inode(struct inode *);
 
-/* namei.c */
-extern struct dentry *nilfs_get_parent(struct dentry *);
-
 /* super.c */
 extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *);
 extern struct inode *nilfs_alloc_inode(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index a1c0e38a7706..adbf5826b837 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -48,10 +48,10 @@
 #include <linux/vfs.h>
 #include <linux/writeback.h>
 #include <linux/kobject.h>
-#include <linux/exportfs.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include "nilfs.h"
+#include "export.h"
 #include "mdt.h"
 #include "alloc.h"
 #include "btree.h"
@@ -556,56 +556,6 @@ static const struct super_operations nilfs_sops = {
 	.show_options = nilfs_show_options
 };
 
-static struct inode *
-nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
-{
-	struct inode *inode;
-	struct nilfs_root *root;
-
-	if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO &&
-	    ino != NILFS_SKETCH_INO)
-		return ERR_PTR(-ESTALE);
-
-	root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs,
-				 NILFS_CPTREE_CURRENT_CNO);
-	if (!root)
-		return ERR_PTR(-ESTALE);
-
-	/* new file handle type is required to export snapshots */
-	inode = nilfs_iget(sb, root, ino);
-	nilfs_put_root(root);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-	if (generation && inode->i_generation != generation) {
-		iput(inode);
-		return ERR_PTR(-ESTALE);
-	}
-
-	return inode;
-}
-
-static struct dentry *
-nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
-		   int fh_type)
-{
-	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-				    nilfs_nfs_get_inode);
-}
-
-static struct dentry *
-nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
-		   int fh_type)
-{
-	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
-				    nilfs_nfs_get_inode);
-}
-
-static const struct export_operations nilfs_export_ops = {
-	.fh_to_dentry = nilfs_fh_to_dentry,
-	.fh_to_parent = nilfs_fh_to_parent,
-	.get_parent = nilfs_get_parent,
-};
-
 enum {
 	Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index a9cd507f8cd2..28028988c862 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -67,6 +67,19 @@ enum fid_type {
 	 * 32 bit parent block number, 32 bit parent generation number
 	 */
 	FILEID_UDF_WITH_PARENT = 0x52,
+
+	/*
+	 * 64 bit checkpoint number, 64 bit inode number,
+	 * 32 bit generation number.
+	 */
+	FILEID_NILFS_WITHOUT_PARENT = 0x61,
+
+	/*
+	 * 64 bit checkpoint number, 64 bit inode number,
+	 * 32 bit generation number, 32 bit parent generation.
+	 * 64 bit parent inode number.
+	 */
+	FILEID_NILFS_WITH_PARENT = 0x62,
 };
 
 struct fid {
-- 
cgit v1.2.3


From b453c95eb8d6a3b2348e9c7bc28a7d223cb640e3 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Wed, 25 Aug 2010 23:52:46 +0900
Subject: nilfs2: get rid of snapshot mount flag

This flag is a fake used to distinguish type of super block instance.
And, it got obsolete by the unification of sb.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index bcdb34c68d08..46604671ccd5 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -147,7 +147,6 @@ struct nilfs_super_root {
 #define NILFS_MOUNT_ERRORS_CONT		0x0010  /* Continue on errors */
 #define NILFS_MOUNT_ERRORS_RO		0x0020  /* Remount fs ro on errors */
 #define NILFS_MOUNT_ERRORS_PANIC	0x0040  /* Panic on errors */
-#define NILFS_MOUNT_SNAPSHOT		0x0080  /* Snapshot flag */
 #define NILFS_MOUNT_BARRIER		0x1000  /* Use block barriers */
 #define NILFS_MOUNT_STRICT_ORDER	0x2000  /* Apply strict in-order
 						   semantics also for data */
-- 
cgit v1.2.3


From c486f3895d6dc751f7c0f04f0fa67390ce4d168e Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 3 Oct 2010 17:44:03 +0900
Subject: nilfs2: change license of exported header file

This allows other projects to carry copies of the header file related
to ABI and disk format (i.e. "nilfs2_fs.h") without it or distributors
having to worry about effects on the project's overall license terms.
It's also desired for switching the license of nilfs library to LGPL.

Jiro SEKIBA pointed out these license issues (Message-ID:
<87tylo7msw.wl%jir@sekiba.com>), and he suggested switching license of
the library and nilfs2_fs.h to GNU Lesser General Public License.  We
take in his suggestion to avoid the license issues.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Jiro SEKIBA <jir@unicus.jp>
Cc: linux-nilfs <linux-nilfs@vger.kernel.org>
---
 include/linux/nilfs2_fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 46604671ccd5..227e49dd5720 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -4,16 +4,16 @@
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *
-- 
cgit v1.2.3


From fcb9757333df37cf4a7feccef7ef6f5300643864 Mon Sep 17 00:00:00 2001
From: lawrence rust <lawrence@softsystem.co.uk>
Date: Mon, 18 Oct 2010 07:06:02 -0300
Subject: [media] Nova-S-Plus audio line input

This patch adds audio DMA capture and ALSA mixer elements for the line
input jack of the Hauppauge Nova-S-plus DVB-S PCI card.  The Nova-S-plus
has a WM8775 ADC that is currently not detected.  This patch enables
this chip and exports volume, balance mute and ALC elements for ALSA
mixer controls.

[mchehab@redhat.com: Fix CodingStyle issues]
Signed-off-by: Lawrence Rust <lawrence@softsystem.co.uk>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx88/cx88-alsa.c  |  99 ++++++++++++++++++++++++++++----
 drivers/media/video/cx88/cx88-cards.c |   7 +++
 drivers/media/video/cx88/cx88-video.c |  27 ++++++++-
 drivers/media/video/cx88/cx88.h       |   6 +-
 drivers/media/video/wm8775.c          | 104 +++++++++++++++++++++-------------
 include/media/wm8775.h                |   3 +
 6 files changed, 192 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 54b7fcd469a8..4aaa47c0eabf 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -40,6 +40,7 @@
 #include <sound/control.h>
 #include <sound/initval.h>
 #include <sound/tlv.h>
+#include <media/wm8775.h>
 
 #include "cx88.h"
 #include "cx88-reg.h"
@@ -586,26 +587,47 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
 	int left, right, v, b;
 	int changed = 0;
 	u32 old;
+	struct v4l2_control client_ctl;
+
+	/* Pass volume & balance onto any WM8775 */
+	if (value->value.integer.value[0] >= value->value.integer.value[1]) {
+		v = value->value.integer.value[0] << 10;
+		b = value->value.integer.value[0] ?
+			(0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] :
+			0x8000;
+	} else {
+		v = value->value.integer.value[1] << 10;
+		b = value->value.integer.value[1] ?
+		0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] :
+		0x8000;
+	}
+	client_ctl.value = v;
+	client_ctl.id = V4L2_CID_AUDIO_VOLUME;
+	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
+
+	client_ctl.value = b;
+	client_ctl.id = V4L2_CID_AUDIO_BALANCE;
+	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
 
 	left = value->value.integer.value[0] & 0x3f;
 	right = value->value.integer.value[1] & 0x3f;
 	b = right - left;
 	if (b < 0) {
-	    v = 0x3f - left;
-	    b = (-b) | 0x40;
+		v = 0x3f - left;
+		b = (-b) | 0x40;
 	} else {
-	    v = 0x3f - right;
+		v = 0x3f - right;
 	}
 	/* Do we really know this will always be called with IRQs on? */
 	spin_lock_irq(&chip->reg_lock);
 	old = cx_read(AUD_VOL_CTL);
 	if (v != (old & 0x3f)) {
-	    cx_write(AUD_VOL_CTL, (old & ~0x3f) | v);
-	    changed = 1;
+		cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v);
+		changed = 1;
 	}
-	if (cx_read(AUD_BAL_CTL) != b) {
-	    cx_write(AUD_BAL_CTL, b);
-	    changed = 1;
+	if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) {
+		cx_write(AUD_BAL_CTL, b);
+		changed = 1;
 	}
 	spin_unlock_irq(&chip->reg_lock);
 
@@ -618,7 +640,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
 		  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
-	.name = "Playback Volume",
+	.name = "Analog-TV Volume",
 	.info = snd_cx88_volume_info,
 	.get = snd_cx88_volume_get,
 	.put = snd_cx88_volume_put,
@@ -649,7 +671,14 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 	vol = cx_read(AUD_VOL_CTL);
 	if (value->value.integer.value[0] != !(vol & bit)) {
 		vol ^= bit;
-		cx_write(AUD_VOL_CTL, vol);
+		cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol);
+		/* Pass mute onto any WM8775 */
+		if ((1<<6) == bit) {
+			struct v4l2_control client_ctl;
+			client_ctl.value = 0 != (vol & bit);
+			client_ctl.id = V4L2_CID_AUDIO_MUTE;
+			call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
+		}
 		ret = 1;
 	}
 	spin_unlock_irq(&chip->reg_lock);
@@ -658,7 +687,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 
 static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.name = "Playback Switch",
+	.name = "Audio-Out Switch",
 	.info = snd_ctl_boolean_mono_info,
 	.get = snd_cx88_switch_get,
 	.put = snd_cx88_switch_put,
@@ -667,13 +696,49 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 
 static const struct snd_kcontrol_new snd_cx88_source_switch = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.name = "Capture Switch",
+	.name = "Analog-TV Switch",
 	.info = snd_ctl_boolean_mono_info,
 	.get = snd_cx88_switch_get,
 	.put = snd_cx88_switch_put,
 	.private_value = (1<<6),
 };
 
+static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *value)
+{
+	snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
+	struct cx88_core *core = chip->core;
+	struct v4l2_control client_ctl;
+
+	client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
+	call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl);
+	value->value.integer.value[0] = client_ctl.value ? 1 : 0;
+
+	return 0;
+}
+
+static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *value)
+{
+	snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
+	struct cx88_core *core = chip->core;
+	struct v4l2_control client_ctl;
+
+	client_ctl.value = 0 != value->value.integer.value[0];
+	client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
+	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
+
+	return 0;
+}
+
+static struct snd_kcontrol_new snd_cx88_alc_switch = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "Line-In ALC Switch",
+	.info = snd_ctl_boolean_mono_info,
+	.get = snd_cx88_alc_get,
+	.put = snd_cx88_alc_put,
+};
+
 /****************************************************************************
 			Basic Flow for Sound Devices
  ****************************************************************************/
@@ -795,6 +860,7 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 {
 	struct snd_card  *card;
 	snd_cx88_card_t  *chip;
+	struct v4l2_subdev *sd;
 	int              err;
 
 	if (devno >= SNDRV_CARDS)
@@ -830,6 +896,15 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 	if (err < 0)
 		goto error;
 
+	/* If there's a wm8775 then add a Line-In ALC switch */
+	list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) {
+		if (WM8775_GID == sd->grp_id) {
+			snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch,
+						       chip));
+			break;
+		}
+	}
+
 	strcpy (card->driver, "CX88x");
 	sprintf(card->shortname, "Conexant CX%x", pci->device);
 	sprintf(card->longname, "%s at %#llx",
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index 7bfe330a8167..b26fcba8600c 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -970,15 +970,22 @@ static const struct cx88_board cx88_boards[] = {
 		.radio_type	= UNSET,
 		.tuner_addr	= ADDR_UNSET,
 		.radio_addr	= ADDR_UNSET,
+		.audio_chip = V4L2_IDENT_WM8775,
 		.input		= {{
 			.type	= CX88_VMUX_DVB,
 			.vmux	= 0,
+			/* 2: Line-In */
+			.audioroute = 2,
 		},{
 			.type	= CX88_VMUX_COMPOSITE1,
 			.vmux	= 1,
+			/* 2: Line-In */
+			.audioroute = 2,
 		},{
 			.type	= CX88_VMUX_SVIDEO,
 			.vmux	= 2,
+			/* 2: Line-In */
+			.audioroute = 2,
 		}},
 		.mpeg           = CX88_MPEG_DVB,
 	},
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index e3cff585215d..d2f159daa8b5 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -41,6 +41,7 @@
 #include "cx88.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
+#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards");
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -977,6 +978,7 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
 	const struct cx88_ctrl *c = NULL;
 	u32 value,mask;
 	int i;
+	struct v4l2_control client_ctl;
 
 	for (i = 0; i < CX8800_CTLS; i++) {
 		if (cx8800_ctls[i].v.id == ctl->id) {
@@ -990,6 +992,27 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
 		ctl->value = c->v.minimum;
 	if (ctl->value > c->v.maximum)
 		ctl->value = c->v.maximum;
+
+	/* Pass changes onto any WM8775 */
+	client_ctl.id = ctl->id;
+	switch (ctl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		client_ctl.value = ctl->value;
+		break;
+	case V4L2_CID_AUDIO_VOLUME:
+		client_ctl.value = (ctl->value) ?
+			(0x90 + ctl->value) << 8 : 0;
+		break;
+	case V4L2_CID_AUDIO_BALANCE:
+		client_ctl.value = ctl->value << 9;
+		break;
+	default:
+		client_ctl.id = 0;
+		break;
+	}
+	if (client_ctl.id)
+		call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
+
 	mask=c->mask;
 	switch (ctl->id) {
 	case V4L2_CID_AUDIO_BALANCE:
@@ -1536,7 +1559,9 @@ static int radio_queryctrl (struct file *file, void *priv,
 	if (c->id <  V4L2_CID_BASE ||
 		c->id >= V4L2_CID_LASTP1)
 		return -EINVAL;
-	if (c->id == V4L2_CID_AUDIO_MUTE) {
+	if (c->id == V4L2_CID_AUDIO_MUTE ||
+		c->id == V4L2_CID_AUDIO_VOLUME ||
+		c->id == V4L2_CID_AUDIO_BALANCE) {
 		for (i = 0; i < CX8800_CTLS; i++) {
 			if (cx8800_ctls[i].v.id == c->id)
 				break;
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index c9981e77416a..e8c732e7ae4f 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -398,17 +398,19 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev)
 	return container_of(v4l2_dev, struct cx88_core, v4l2_dev);
 }
 
-#define call_all(core, o, f, args...) 				\
+#define call_hw(core, grpid, o, f, args...) \
 	do {							\
 		if (!core->i2c_rc) {				\
 			if (core->gate_ctrl)			\
 				core->gate_ctrl(core, 1);	\
-			v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \
+			v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \
 			if (core->gate_ctrl)			\
 				core->gate_ctrl(core, 0);	\
 		}						\
 	} while (0)
 
+#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args)
+
 struct cx8800_dev;
 struct cx8802_dev;
 
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index fe8ef6419f83..135525649086 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -35,6 +35,7 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/v4l2-ctrls.h>
+#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("wm8775 driver");
 MODULE_AUTHOR("Ulf Eklund, Hans Verkuil");
@@ -50,10 +51,16 @@ enum {
 	TOT_REGS
 };
 
+#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */
+#define ALC_EN 0x100  /* R17: ALC enable */
+
 struct wm8775_state {
 	struct v4l2_subdev sd;
 	struct v4l2_ctrl_handler hdl;
 	struct v4l2_ctrl *mute;
+	struct v4l2_ctrl *vol;
+	struct v4l2_ctrl *bal;
+	struct v4l2_ctrl *loud;
 	u8 input;		/* Last selected input (0-0xf) */
 };
 
@@ -85,6 +92,30 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val)
 	return -1;
 }
 
+static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly)
+{
+	struct wm8775_state *state = to_state(sd);
+	u8 vol_l, vol_r;
+	int muted = 0 != state->mute->val;
+	u16 volume = (u16)state->vol->val;
+	u16 balance = (u16)state->bal->val;
+
+	/* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */
+	vol_l = (min(65536 - balance, 32768) * volume) >> 23;
+	vol_r = (min(balance, (u16)32768) * volume) >> 23;
+
+	/* Mute */
+	if (muted || quietly)
+		wm8775_write(sd, R21, 0x0c0 | state->input);
+
+	wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */
+	wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */
+
+	/* Un-mute */
+	if (!muted)
+		wm8775_write(sd, R21, state->input);
+}
+
 static int wm8775_s_routing(struct v4l2_subdev *sd,
 			    u32 input, u32 output, u32 config)
 {
@@ -102,25 +133,26 @@ static int wm8775_s_routing(struct v4l2_subdev *sd,
 	state->input = input;
 	if (!v4l2_ctrl_g_ctrl(state->mute))
 		return 0;
-	wm8775_write(sd, R21, 0x0c0);
-	wm8775_write(sd, R14, 0x1d4);
-	wm8775_write(sd, R15, 0x1d4);
-	wm8775_write(sd, R21, 0x100 + state->input);
+	if (!v4l2_ctrl_g_ctrl(state->vol))
+		return 0;
+	if (!v4l2_ctrl_g_ctrl(state->bal))
+		return 0;
+	wm8775_set_audio(sd, 1);
 	return 0;
 }
 
 static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl)
 {
 	struct v4l2_subdev *sd = to_sd(ctrl);
-	struct wm8775_state *state = to_state(sd);
 
 	switch (ctrl->id) {
 	case V4L2_CID_AUDIO_MUTE:
-		wm8775_write(sd, R21, 0x0c0);
-		wm8775_write(sd, R14, 0x1d4);
-		wm8775_write(sd, R15, 0x1d4);
-		if (!ctrl->val)
-			wm8775_write(sd, R21, 0x100 + state->input);
+	case V4L2_CID_AUDIO_VOLUME:
+	case V4L2_CID_AUDIO_BALANCE:
+		wm8775_set_audio(sd, 0);
+		return 0;
+	case V4L2_CID_AUDIO_LOUDNESS:
+		wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD);
 		return 0;
 	}
 	return -EINVAL;
@@ -144,16 +176,7 @@ static int wm8775_log_status(struct v4l2_subdev *sd)
 
 static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq)
 {
-	struct wm8775_state *state = to_state(sd);
-
-	/* If I remove this, then it can happen that I have no
-	   sound the first time I tune from static to a valid channel.
-	   It's difficult to reproduce and is almost certainly related
-	   to the zero cross detect circuit. */
-	wm8775_write(sd, R21, 0x0c0);
-	wm8775_write(sd, R14, 0x1d4);
-	wm8775_write(sd, R15, 0x1d4);
-	wm8775_write(sd, R21, 0x100 + state->input);
+	wm8775_set_audio(sd, 0);
 	return 0;
 }
 
@@ -203,6 +226,7 @@ static int wm8775_probe(struct i2c_client *client,
 {
 	struct wm8775_state *state;
 	struct v4l2_subdev *sd;
+	int err;
 
 	/* Check if the adapter supports the needed features */
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
@@ -216,15 +240,21 @@ static int wm8775_probe(struct i2c_client *client,
 		return -ENOMEM;
 	sd = &state->sd;
 	v4l2_i2c_subdev_init(sd, client, &wm8775_ops);
+	sd->grp_id = WM8775_GID; /* subdev group id */
 	state->input = 2;
 
-	v4l2_ctrl_handler_init(&state->hdl, 1);
+	v4l2_ctrl_handler_init(&state->hdl, 4);
 	state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
 			V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0);
+	state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
+			V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/
+	state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
+			V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768);
+	state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
+			V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1);
 	sd->ctrl_handler = &state->hdl;
-	if (state->hdl.error) {
-		int err = state->hdl.error;
-
+	err = state->hdl.error;
+	if (err) {
 		v4l2_ctrl_handler_free(&state->hdl);
 		kfree(state);
 		return err;
@@ -236,29 +266,25 @@ static int wm8775_probe(struct i2c_client *client,
 	wm8775_write(sd, R23, 0x000);
 	/* Disable zero cross detect timeout */
 	wm8775_write(sd, R7, 0x000);
-	/* Left justified, 24-bit mode */
-	wm8775_write(sd, R11, 0x021);
+	/* HPF enable, I2S mode, 24-bit */
+	wm8775_write(sd, R11, 0x022);
 	/* Master mode, clock ratio 256fs */
 	wm8775_write(sd, R12, 0x102);
 	/* Powered up */
 	wm8775_write(sd, R13, 0x000);
-	/* ADC gain +2.5dB, enable zero cross */
-	wm8775_write(sd, R14, 0x1d4);
-	/* ADC gain +2.5dB, enable zero cross */
-	wm8775_write(sd, R15, 0x1d4);
-	/* ALC Stereo, ALC target level -1dB FS max gain +8dB */
-	wm8775_write(sd, R16, 0x1bf);
-	/* Enable gain control, use zero cross detection,
-	   ALC hold time 42.6 ms */
-	wm8775_write(sd, R17, 0x185);
+	/* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */
+	wm8775_write(sd, R16, 0x1bb);
+	/* Set ALC mode and hold time */
+	wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD);
 	/* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */
 	wm8775_write(sd, R18, 0x0a2);
 	/* Enable noise gate, threshold -72dBfs */
 	wm8775_write(sd, R19, 0x005);
-	/* Transient window 4ms, lower PGA gain limit -1dB */
-	wm8775_write(sd, R20, 0x07a);
-	/* LRBOTH = 1, use input 2. */
-	wm8775_write(sd, R21, 0x102);
+	/* Transient window 4ms, ALC min gain -5dB  */
+	wm8775_write(sd, R20, 0x0fb);
+
+	wm8775_set_audio(sd, 1);      /* set volume/mute/mux */
+
 	return 0;
 }
 
diff --git a/include/media/wm8775.h b/include/media/wm8775.h
index 60739c5a23ae..a1c4d417dfa2 100644
--- a/include/media/wm8775.h
+++ b/include/media/wm8775.h
@@ -32,4 +32,7 @@
 #define WM8775_AIN3 4
 #define WM8775_AIN4 8
 
+/* subdev group ID */
+#define WM8775_GID (1 << 0)
+
 #endif
-- 
cgit v1.2.3


From bb12588a38e6db85e01dceadff7bc161fc92e7d2 Mon Sep 17 00:00:00 2001
From: David Dillow <dillowda@ornl.gov>
Date: Fri, 8 Oct 2010 14:40:47 -0400
Subject: IB/srp: Implement SRP_CRED_REQ and SRP_AER_REQ

This patch adds support for SRP_CRED_REQ to avoid a lockup by targets
that use that mechanism to return credits to the initiator. This
prevents a lockup observed in the field where we would never add the
credits from the SRP_CRED_REQ to our current count, and would therefore
never send another command to the target.

Minimal support for SRP_AER_REQ is also added, as these messages can
also be used to convey additional credits to the initiator.

Based upon extensive debugging and code by Bart Van Assche and a bug
report by Chris Worley.

Signed-off-by: David Dillow <dillowda@ornl.gov>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 105 +++++++++++++++++++++++++++++++++---
 drivers/infiniband/ulp/srp/ib_srp.h |   8 +--
 include/scsi/srp.h                  |  38 +++++++++++++
 3 files changed, 141 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b8b09a4f012a..a54eee9324b6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -83,6 +83,10 @@ static void srp_remove_one(struct ib_device *device);
 static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
 static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
+				      enum srp_iu_type iu_type);
+static int __srp_post_send(struct srp_target_port *target,
+			   struct srp_iu *iu, int len);
 
 static struct scsi_transport_template *ib_srp_transport_template;
 
@@ -896,6 +900,71 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
 }
 
+static int srp_response_common(struct srp_target_port *target, s32 req_delta,
+			       void *rsp, int len)
+{
+	struct ib_device *dev;
+	unsigned long flags;
+	struct srp_iu *iu;
+	int err = 1;
+
+	dev = target->srp_host->srp_dev->dev;
+
+	spin_lock_irqsave(target->scsi_host->host_lock, flags);
+	target->req_lim += req_delta;
+
+	iu = __srp_get_tx_iu(target, SRP_IU_RSP);
+	if (!iu) {
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "no IU available to send response\n");
+		goto out;
+	}
+
+	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
+	memcpy(iu->buf, rsp, len);
+	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
+
+	err = __srp_post_send(target, iu, len);
+	if (err)
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "unable to post response: %d\n", err);
+
+out:
+	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+	return err;
+}
+
+static void srp_process_cred_req(struct srp_target_port *target,
+				 struct srp_cred_req *req)
+{
+	struct srp_cred_rsp rsp = {
+		.opcode = SRP_CRED_RSP,
+		.tag = req->tag,
+	};
+	s32 delta = be32_to_cpu(req->req_lim_delta);
+
+	if (srp_response_common(target, delta, &rsp, sizeof rsp))
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "problems processing SRP_CRED_REQ\n");
+}
+
+static void srp_process_aer_req(struct srp_target_port *target,
+				struct srp_aer_req *req)
+{
+	struct srp_aer_rsp rsp = {
+		.opcode = SRP_AER_RSP,
+		.tag = req->tag,
+	};
+	s32 delta = be32_to_cpu(req->req_lim_delta);
+
+	shost_printk(KERN_ERR, target->scsi_host, PFX
+		     "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
+
+	if (srp_response_common(target, delta, &rsp, sizeof rsp))
+		shost_printk(KERN_ERR, target->scsi_host, PFX
+			     "problems processing SRP_AER_REQ\n");
+}
+
 static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 {
 	struct ib_device *dev;
@@ -923,6 +992,14 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 		srp_process_rsp(target, iu->buf);
 		break;
 
+	case SRP_CRED_REQ:
+		srp_process_cred_req(target, iu->buf);
+		break;
+
+	case SRP_AER_REQ:
+		srp_process_aer_req(target, iu->buf);
+		break;
+
 	case SRP_T_LOGOUT:
 		/* XXX Handle target logout */
 		shost_printk(KERN_WARNING, target->scsi_host,
@@ -985,23 +1062,36 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
  * Must be called with target->scsi_host->host_lock held to protect
  * req_lim and tx_head.  Lock cannot be dropped between call here and
  * call to __srp_post_send().
+ *
+ * Note:
+ * An upper limit for the number of allocated information units for each
+ * request type is:
+ * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
+ *   more than Scsi_Host.can_queue requests.
+ * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
+ * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
+ *   one unanswered SRP request to an initiator.
  */
 static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
-					enum srp_request_type req_type)
+				      enum srp_iu_type iu_type)
 {
-	s32 rsv = (req_type == SRP_REQ_TASK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
+	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
+	struct srp_iu *iu;
 
 	srp_send_completion(target->send_cq, target);
 
 	if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
 		return NULL;
 
-	if (target->req_lim <= rsv) {
+	/* Initiator responses to target requests do not consume credits */
+	if (target->req_lim <= rsv && iu_type != SRP_IU_RSP) {
 		++target->zero_req_lim;
 		return NULL;
 	}
 
-	return target->tx_ring[target->tx_head & SRP_SQ_MASK];
+	iu = target->tx_ring[target->tx_head & SRP_SQ_MASK];
+	iu->type = iu_type;
+	return iu;
 }
 
 /*
@@ -1030,7 +1120,8 @@ static int __srp_post_send(struct srp_target_port *target,
 
 	if (!ret) {
 		++target->tx_head;
-		--target->req_lim;
+		if (iu->type != SRP_IU_RSP)
+			--target->req_lim;
 	}
 
 	return ret;
@@ -1056,7 +1147,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
 		return 0;
 	}
 
-	iu = __srp_get_tx_iu(target, SRP_REQ_NORMAL);
+	iu = __srp_get_tx_iu(target, SRP_IU_CMD);
 	if (!iu)
 		goto err;
 
@@ -1363,7 +1454,7 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
 
 	init_completion(&req->done);
 
-	iu = __srp_get_tx_iu(target, SRP_REQ_TASK_MGMT);
+	iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
 	if (!iu)
 		goto out;
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 7a959d5f2fa6..ed0dce9e479f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -82,9 +82,10 @@ enum srp_target_state {
 	SRP_TARGET_REMOVED
 };
 
-enum srp_request_type {
-	SRP_REQ_NORMAL,
-	SRP_REQ_TASK_MGMT,
+enum srp_iu_type {
+	SRP_IU_CMD,
+	SRP_IU_TSK_MGMT,
+	SRP_IU_RSP,
 };
 
 struct srp_device {
@@ -171,6 +172,7 @@ struct srp_iu {
 	void		       *buf;
 	size_t			size;
 	enum dma_data_direction	direction;
+	enum srp_iu_type	type;
 };
 
 #endif /* IB_SRP_H */
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index ad178fa78f66..1ae84db4c9fb 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -239,4 +239,42 @@ struct srp_rsp {
 	u8	data[0];
 } __attribute__((packed));
 
+struct srp_cred_req {
+	u8	opcode;
+	u8	sol_not;
+	u8	reserved[2];
+	__be32	req_lim_delta;
+	u64	tag;
+};
+
+struct srp_cred_rsp {
+	u8	opcode;
+	u8	reserved[7];
+	u64	tag;
+};
+
+/*
+ * The SRP spec defines the fixed portion of the AER_REQ structure to be
+ * 36 bytes, so it needs to be packed to avoid having it padded to 40 bytes
+ * on 64-bit architectures.
+ */
+struct srp_aer_req {
+	u8	opcode;
+	u8	sol_not;
+	u8	reserved[2];
+	__be32	req_lim_delta;
+	u64	tag;
+	u32	reserved2;
+	__be64	lun;
+	__be32	sense_data_len;
+	u32	reserved3;
+	u8	sense_data[0];
+} __attribute__((packed));
+
+struct srp_aer_rsp {
+	u8	opcode;
+	u8	reserved[7];
+	u64	tag;
+};
+
 #endif /* SCSI_SRP_H */
-- 
cgit v1.2.3


From a36274e0184193e393fb82957925c3981a6b0477 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 10 Sep 2010 01:33:59 -0400
Subject: mmc: Remove distinction between hw and phys segments

We have deprecated the distinction between hardware and physical
segments in the block layer.  Consolidate the two limits into one in
drivers/mmc/.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/queue.c       | 8 ++++----
 drivers/mmc/core/host.c        | 3 +--
 drivers/mmc/host/at91_mci.c    | 3 +--
 drivers/mmc/host/atmel-mci.c   | 3 +--
 drivers/mmc/host/au1xmmc.c     | 2 +-
 drivers/mmc/host/bfin_sdh.c    | 2 +-
 drivers/mmc/host/davinci_mmc.c | 8 +++-----
 drivers/mmc/host/imxmmc.c      | 3 +--
 drivers/mmc/host/jz4740_mmc.c  | 3 +--
 drivers/mmc/host/mmc_spi.c     | 3 +--
 drivers/mmc/host/mmci.c        | 3 +--
 drivers/mmc/host/msm_sdcc.c    | 3 +--
 drivers/mmc/host/mvsdio.c      | 3 +--
 drivers/mmc/host/mxcmmc.c      | 3 +--
 drivers/mmc/host/omap.c        | 3 +--
 drivers/mmc/host/omap_hsmmc.c  | 3 +--
 drivers/mmc/host/pxamci.c      | 2 +-
 drivers/mmc/host/s3cmci.c      | 3 +--
 drivers/mmc/host/sdhci.c       | 7 +++----
 drivers/mmc/host/sh_mmcif.c    | 3 +--
 drivers/mmc/host/tifm_sd.c     | 3 +--
 drivers/mmc/host/via-sdmmc.c   | 3 +--
 drivers/mmc/host/wbsd.c        | 3 +--
 include/linux/mmc/host.h       | 3 +--
 24 files changed, 31 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 9c0b42bfe089..7c3392e50722 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -146,7 +146,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 	}
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
-	if (host->max_hw_segs == 1) {
+	if (host->max_segs == 1) {
 		unsigned int bouncesz;
 
 		bouncesz = MMC_QUEUE_BOUNCESZ;
@@ -196,16 +196,16 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
-		blk_queue_max_segments(mq->queue, host->max_hw_segs);
+		blk_queue_max_segments(mq->queue, host->max_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
 		mq->sg = kmalloc(sizeof(struct scatterlist) *
-			host->max_phys_segs, GFP_KERNEL);
+			host->max_segs, GFP_KERNEL);
 		if (!mq->sg) {
 			ret = -ENOMEM;
 			goto cleanup_queue;
 		}
-		sg_init_table(mq->sg, host->max_phys_segs);
+		sg_init_table(mq->sg, host->max_segs);
 	}
 
 	init_MUTEX(&mq->thread_sem);
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index d80cfdc8edd2..10b8af27e03a 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -94,8 +94,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	 * By default, hosts do not support SGIO or large requests.
 	 * They have to set these according to their abilities.
 	 */
-	host->max_hw_segs = 1;
-	host->max_phys_segs = 1;
+	host->max_segs = 1;
 	host->max_seg_size = PAGE_CACHE_SIZE;
 
 	host->max_req_size = PAGE_CACHE_SIZE;
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 339e5913e5cc..591ab540b407 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -947,8 +947,7 @@ static int __init at91_mci_probe(struct platform_device *pdev)
 	mmc->max_blk_size  = MCI_MAXBLKSIZE;
 	mmc->max_blk_count = MCI_BLKATONCE;
 	mmc->max_req_size  = MCI_BUFSIZE;
-	mmc->max_phys_segs = MCI_BLKATONCE;
-	mmc->max_hw_segs   = MCI_BLKATONCE;
+	mmc->max_segs      = MCI_BLKATONCE;
 	mmc->max_seg_size  = MCI_BUFSIZE;
 
 	host = mmc_priv(mmc);
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 1d4e5464ea2f..301351a5d838 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1618,8 +1618,7 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 	if (slot_data->bus_width >= 4)
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
-	mmc->max_hw_segs = 64;
-	mmc->max_phys_segs = 64;
+	mmc->max_segs = 64;
 	mmc->max_req_size = 32768 * 512;
 	mmc->max_blk_size = 32768;
 	mmc->max_blk_count = 512;
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index e14b866b270f..41e5a60493ad 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -998,7 +998,7 @@ static int __devinit au1xmmc_probe(struct platform_device *pdev)
 	mmc->f_max = 24000000;
 
 	mmc->max_seg_size = AU1XMMC_DESCRIPTOR_SIZE;
-	mmc->max_phys_segs = AU1XMMC_DESCRIPTOR_COUNT;
+	mmc->max_segs = AU1XMMC_DESCRIPTOR_COUNT;
 
 	mmc->max_blk_size = 2048;
 	mmc->max_blk_count = 512;
diff --git a/drivers/mmc/host/bfin_sdh.c b/drivers/mmc/host/bfin_sdh.c
index 4b0e677d7295..bac7d62866b7 100644
--- a/drivers/mmc/host/bfin_sdh.c
+++ b/drivers/mmc/host/bfin_sdh.c
@@ -469,7 +469,7 @@ static int __devinit sdh_probe(struct platform_device *pdev)
 	}
 
 	mmc->ops = &sdh_ops;
-	mmc->max_phys_segs = 32;
+	mmc->max_segs = 32;
 	mmc->max_seg_size = 1 << 16;
 	mmc->max_blk_size = 1 << 11;
 	mmc->max_blk_count = 1 << 11;
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 33d9f1b00862..e15547cf701f 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -138,7 +138,7 @@
 /*
  * One scatterlist dma "segment" is at most MAX_CCNT rw_threshold units,
  * and we handle up to MAX_NR_SG segments.  MMC_BLOCK_BOUNCE kicks in only
- * for drivers with max_hw_segs == 1, making the segments bigger (64KB)
+ * for drivers with max_segs == 1, making the segments bigger (64KB)
  * than the page or two that's otherwise typical. nr_sg (passed from
  * platform data) == 16 gives at least the same throughput boost, using
  * EDMA transfer linkage instead of spending CPU time copying pages.
@@ -1239,8 +1239,7 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 	 * Each hw_seg uses one EDMA parameter RAM slot, always one
 	 * channel and then usually some linked slots.
 	 */
-	mmc->max_hw_segs	= 1 + host->n_link;
-	mmc->max_phys_segs	= mmc->max_hw_segs;
+	mmc->max_segs		= 1 + host->n_link;
 
 	/* EDMA limit per hw segment (one or two MBytes) */
 	mmc->max_seg_size	= MAX_CCNT * rw_threshold;
@@ -1250,8 +1249,7 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 	mmc->max_blk_count	= 65535; /* NBLK is 16 bits */
 	mmc->max_req_size	= mmc->max_blk_size * mmc->max_blk_count;
 
-	dev_dbg(mmc_dev(host->mmc), "max_phys_segs=%d\n", mmc->max_phys_segs);
-	dev_dbg(mmc_dev(host->mmc), "max_hw_segs=%d\n", mmc->max_hw_segs);
+	dev_dbg(mmc_dev(host->mmc), "max_segs=%d\n", mmc->max_segs);
 	dev_dbg(mmc_dev(host->mmc), "max_blk_size=%d\n", mmc->max_blk_size);
 	dev_dbg(mmc_dev(host->mmc), "max_req_size=%d\n", mmc->max_req_size);
 	dev_dbg(mmc_dev(host->mmc), "max_seg_size=%d\n", mmc->max_seg_size);
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c
index 5a950b16d9e6..881f7ba545ae 100644
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -966,8 +966,7 @@ static int __init imxmci_probe(struct platform_device *pdev)
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 
 	/* MMC core transfer sizes tunable parameters */
-	mmc->max_hw_segs = 64;
-	mmc->max_phys_segs = 64;
+	mmc->max_segs = 64;
 	mmc->max_seg_size = 64*512;	/* default PAGE_CACHE_SIZE */
 	mmc->max_req_size = 64*512;	/* default PAGE_CACHE_SIZE */
 	mmc->max_blk_size = 2048;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index ad4f9870e3ca..b3a0ab0e4c2b 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -876,8 +876,7 @@ static int __devinit jz4740_mmc_probe(struct platform_device* pdev)
 	mmc->max_blk_count = (1 << 15) - 1;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 
-	mmc->max_phys_segs = 128;
-	mmc->max_hw_segs = 128;
+	mmc->max_segs = 128;
 	mmc->max_seg_size = mmc->max_req_size;
 
 	host->mmc = mmc;
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 62a35822003e..5b0b50636ec4 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1345,8 +1345,7 @@ static int mmc_spi_probe(struct spi_device *spi)
 
 	mmc->ops = &mmc_spi_ops;
 	mmc->max_blk_size = MMC_SPI_BLOCKSIZE;
-	mmc->max_hw_segs = MMC_SPI_BLOCKSATONCE;
-	mmc->max_phys_segs = MMC_SPI_BLOCKSATONCE;
+	mmc->max_segs = MMC_SPI_BLOCKSATONCE;
 	mmc->max_req_size = MMC_SPI_BLOCKSATONCE * MMC_SPI_BLOCKSIZE;
 	mmc->max_blk_count = MMC_SPI_BLOCKSATONCE;
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index f2e02d7d9f3d..5f2e72d38b5d 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -734,8 +734,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	/*
 	 * We can do SGIO
 	 */
-	mmc->max_hw_segs = 16;
-	mmc->max_phys_segs = NR_SG;
+	mmc->max_segs = NR_SG;
 
 	/*
 	 * Since only a certain number of bits are valid in the data length
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index ff7752348b11..1290d14c5839 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -1164,8 +1164,7 @@ msmsdcc_probe(struct platform_device *pdev)
 		mmc->caps |= MMC_CAP_SDIO_IRQ;
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED;
 
-	mmc->max_phys_segs = NR_SG;
-	mmc->max_hw_segs = NR_SG;
+	mmc->max_segs = NR_SG;
 	mmc->max_blk_size = 4096;	/* MCI_DATA_CTL BLOCKSIZE up to 4096 */
 	mmc->max_blk_count = 65536;
 
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 366eefa77c5a..a5bf60e01af4 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -742,8 +742,7 @@ static int __init mvsd_probe(struct platform_device *pdev)
 	mmc->max_blk_size = 2048;
 	mmc->max_blk_count = 65535;
 
-	mmc->max_hw_segs = 1;
-	mmc->max_phys_segs = 1;
+	mmc->max_segs = 1;
 	mmc->max_seg_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 350f78e86245..bdd2cbb87cba 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -790,8 +790,7 @@ static int mxcmci_probe(struct platform_device *pdev)
 	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
 
 	/* MMC core transfer sizes tunable parameters */
-	mmc->max_hw_segs = 64;
-	mmc->max_phys_segs = 64;
+	mmc->max_segs = 64;
 	mmc->max_blk_size = 2048;
 	mmc->max_blk_count = 65535;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index d98ddcfac5e5..0c7e37f496ef 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1335,8 +1335,7 @@ static int __init mmc_omap_new_slot(struct mmc_omap_host *host, int id)
 	 * NOTE max_seg_size assumption that small blocks aren't
 	 * normally used (except e.g. for reading SD registers).
 	 */
-	mmc->max_phys_segs = 32;
-	mmc->max_hw_segs = 32;
+	mmc->max_segs = 32;
 	mmc->max_blk_size = 2048;	/* BLEN is 11 bits (+1) */
 	mmc->max_blk_count = 2048;	/* NBLK is 11 bits (+1) */
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 4526d2791f29..03c26e026506 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2105,8 +2105,7 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 
 	/* Since we do only SG emulation, we can have as many segs
 	 * as we want. */
-	mmc->max_phys_segs = 1024;
-	mmc->max_hw_segs = 1024;
+	mmc->max_segs = 1024;
 
 	mmc->max_blk_size = 512;       /* Block Length at max can be 1024 */
 	mmc->max_blk_count = 0xFFFF;    /* No. of Blocks is 16 bits */
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 0a4e43f37140..b7dfcac31e8a 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -576,7 +576,7 @@ static int pxamci_probe(struct platform_device *pdev)
 	 * We can do SG-DMA, but we don't because we never know how much
 	 * data we successfully wrote to the card.
 	 */
-	mmc->max_phys_segs = NR_SG;
+	mmc->max_segs = NR_SG;
 
 	/*
 	 * Our hardware DMA can handle a maximum of one page per SG entry.
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 976330de379e..1ccd4b256cee 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1736,8 +1736,7 @@ static int __devinit s3cmci_probe(struct platform_device *pdev)
 	mmc->max_req_size	= 4095 * 512;
 	mmc->max_seg_size	= mmc->max_req_size;
 
-	mmc->max_phys_segs	= 128;
-	mmc->max_hw_segs	= 128;
+	mmc->max_segs		= 128;
 
 	dbg(host, dbg_debug,
 	    "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 401527d273b5..f608626c4821 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1850,12 +1850,11 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * can do scatter/gather or not.
 	 */
 	if (host->flags & SDHCI_USE_ADMA)
-		mmc->max_hw_segs = 128;
+		mmc->max_segs = 128;
 	else if (host->flags & SDHCI_USE_SDMA)
-		mmc->max_hw_segs = 1;
+		mmc->max_segs = 1;
 	else /* PIO */
-		mmc->max_hw_segs = 128;
-	mmc->max_phys_segs = 128;
+		mmc->max_segs = 128;
 
 	/*
 	 * Maximum number of sectors in one transfer. Limited by DMA boundary
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 5d3f824bb5a3..0f06b8002814 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -846,8 +846,7 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	mmc->caps = MMC_CAP_MMC_HIGHSPEED;
 	if (pd->caps)
 		mmc->caps |= pd->caps;
-	mmc->max_phys_segs = 128;
-	mmc->max_hw_segs = 128;
+	mmc->max_segs = 128;
 	mmc->max_blk_size = 512;
 	mmc->max_blk_count = 65535;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index cec99958b652..457c26ea09de 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -978,11 +978,10 @@ static int tifm_sd_probe(struct tifm_dev *sock)
 	mmc->f_max = 24000000;
 
 	mmc->max_blk_count = 2048;
-	mmc->max_hw_segs = mmc->max_blk_count;
+	mmc->max_segs = mmc->max_blk_count;
 	mmc->max_blk_size = min(TIFM_MMCSD_MAX_BLOCK_SIZE, PAGE_SIZE);
 	mmc->max_seg_size = mmc->max_blk_count * mmc->max_blk_size;
 	mmc->max_req_size = mmc->max_seg_size;
-	mmc->max_phys_segs = mmc->max_hw_segs;
 
 	sock->card_event = tifm_sd_card_event;
 	sock->data_event = tifm_sd_data_event;
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index 19f2d72dbca5..9ed84ddb4780 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -1050,8 +1050,7 @@ static void via_init_mmc_host(struct via_crdr_mmc_host *host)
 	mmc->ops = &via_sdc_ops;
 
 	/*Hardware cannot do scatter lists*/
-	mmc->max_hw_segs = 1;
-	mmc->max_phys_segs = 1;
+	mmc->max_segs = 1;
 
 	mmc->max_blk_size = VIA_CRDR_MAX_BLOCK_LENGTH;
 	mmc->max_blk_count = VIA_CRDR_MAX_BLOCK_COUNT;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 0012f5d13d28..7fca0a386ba0 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1235,8 +1235,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
 	 * Maximum number of segments. Worst case is one sector per segment
 	 * so this will be 64kB/512.
 	 */
-	mmc->max_hw_segs = 128;
-	mmc->max_phys_segs = 128;
+	mmc->max_segs = 128;
 
 	/*
 	 * Maximum request size. Also limited by 64KiB buffer.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 1575b52c3bfa..ded401703762 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -162,8 +162,7 @@ struct mmc_host {
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-	unsigned short		max_hw_segs;	/* see blk_queue_max_hw_segments */
-	unsigned short		max_phys_segs;	/* see blk_queue_max_phys_segments */
+	unsigned short		max_segs;	/* see blk_queue_max_segments */
 	unsigned short		unused;
 	unsigned int		max_req_size;	/* maximum number of bytes in one req */
 	unsigned int		max_blk_size;	/* maximum size of one mmc block */
-- 
cgit v1.2.3


From 453722b9f7366e5b8b46101358dd7bcaef62b59d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Fri, 20 Aug 2010 10:46:46 +0300
Subject: mmc: make mmc_dev_to_card() macro public

Conversion from struct device to struct mmc_card is used more than in one
place.  Due to this it's better to have public macro for such thing.

Signed-off-by: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
Cc: Adrian Hunter <adrian.hunter@nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/mmc_test.c | 4 +---
 drivers/mmc/core/bus.c      | 1 -
 drivers/mmc/core/bus.h      | 2 +-
 include/linux/mmc/card.h    | 2 ++
 4 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 5dd8576b5c18..82d39bd4f55f 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -1935,12 +1935,10 @@ static ssize_t mmc_test_show(struct device *dev,
 static ssize_t mmc_test_store(struct device *dev,
 	struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct mmc_card *card;
+	struct mmc_card *card = dev_to_mmc_card(dev);
 	struct mmc_test_card *test;
 	int testcase;
 
-	card = container_of(dev, struct mmc_card, dev);
-
 	testcase = simple_strtol(buf, NULL, 10);
 
 	test = kzalloc(sizeof(struct mmc_test_card), GFP_KERNEL);
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 7cd9749dc21d..27326c411735 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -22,7 +22,6 @@
 #include "sdio_cis.h"
 #include "bus.h"
 
-#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
 #define to_mmc_driver(d)	container_of(d, struct mmc_driver, drv)
 
 static ssize_t mmc_type_show(struct device *dev,
diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h
index 18178766ab46..7813954e3199 100644
--- a/drivers/mmc/core/bus.h
+++ b/drivers/mmc/core/bus.h
@@ -14,7 +14,7 @@
 #define MMC_DEV_ATTR(name, fmt, args...)					\
 static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf)	\
 {										\
-	struct mmc_card *card = container_of(dev, struct mmc_card, dev);	\
+	struct mmc_card *card = dev_to_mmc_card(dev);				\
 	return sprintf(buf, fmt, args);						\
 }										\
 static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6b7525099e56..71acf19ecaf3 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -173,6 +173,8 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
+#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
+
 #define mmc_list_to_card(l)	container_of(l, struct mmc_card, node)
 #define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
 #define mmc_set_drvdata(c,d)	dev_set_drvdata(&(c)->dev, d)
-- 
cgit v1.2.3


From 265cdc900ce93c0cd2465d751fe75ff2e55e126e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Fri, 17 Sep 2010 20:32:25 -0400
Subject: mmc: rename dev_to_mmc_card() to mmc_dev_to_card()

Global symbols should use their subsystem name in a prefixed fashion.

Signed-off-by: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/mmc_test.c |  2 +-
 drivers/mmc/core/bus.c      | 14 +++++++-------
 drivers/mmc/core/bus.h      |  2 +-
 include/linux/mmc/card.h    |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 82d39bd4f55f..b992725ecbc9 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -1935,7 +1935,7 @@ static ssize_t mmc_test_show(struct device *dev,
 static ssize_t mmc_test_store(struct device *dev,
 	struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_test_card *test;
 	int testcase;
 
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 27326c411735..e70bd6641cee 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -27,7 +27,7 @@
 static ssize_t mmc_type_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 
 	switch (card->type) {
 	case MMC_TYPE_MMC:
@@ -61,7 +61,7 @@ static int mmc_bus_match(struct device *dev, struct device_driver *drv)
 static int
 mmc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	const char *type;
 	int retval = 0;
 
@@ -104,7 +104,7 @@ mmc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 static int mmc_bus_probe(struct device *dev)
 {
 	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 
 	return drv->probe(card);
 }
@@ -112,7 +112,7 @@ static int mmc_bus_probe(struct device *dev)
 static int mmc_bus_remove(struct device *dev)
 {
 	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 
 	drv->remove(card);
 
@@ -122,7 +122,7 @@ static int mmc_bus_remove(struct device *dev)
 static int mmc_bus_suspend(struct device *dev, pm_message_t state)
 {
 	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	int ret = 0;
 
 	if (dev->driver && drv->suspend)
@@ -133,7 +133,7 @@ static int mmc_bus_suspend(struct device *dev, pm_message_t state)
 static int mmc_bus_resume(struct device *dev)
 {
 	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	int ret = 0;
 
 	if (dev->driver && drv->resume)
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(mmc_unregister_driver);
 
 static void mmc_release_card(struct device *dev)
 {
-	struct mmc_card *card = dev_to_mmc_card(dev);
+	struct mmc_card *card = mmc_dev_to_card(dev);
 
 	sdio_free_common_cis(card);
 
diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h
index 7813954e3199..00a19710b6b4 100644
--- a/drivers/mmc/core/bus.h
+++ b/drivers/mmc/core/bus.h
@@ -14,7 +14,7 @@
 #define MMC_DEV_ATTR(name, fmt, args...)					\
 static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf)	\
 {										\
-	struct mmc_card *card = dev_to_mmc_card(dev);				\
+	struct mmc_card *card = mmc_dev_to_card(dev);				\
 	return sprintf(buf, fmt, args);						\
 }										\
 static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 71acf19ecaf3..7bd49234cd88 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -173,7 +173,7 @@ static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
+#define mmc_dev_to_card(d)	container_of(d, struct mmc_card, dev)
 
 #define mmc_list_to_card(l)	container_of(l, struct mmc_card, node)
 #define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
-- 
cgit v1.2.3


From 71d7d3d190fe77588269a8febf93cd739bd91eb3 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Mon, 27 Sep 2010 09:42:19 +0100
Subject: mmc: Add helper function to check if a card is removable

There are two checks that need to be made when determining whether a
card is removable. A host controller may set MMC_CAP_NONREMOVABLE if the
controller does not support removing cards (e.g. eMMC), in which case
the card is physically non-removable. Also the 'mmc_assume_removable'
module parameter can be configured at module load time, in which case
the card may be logically non-removable.

A helper function keeps the logic in one place so that code always
checks both conditions.

Because this new function is likely to be called from modules we now
need to export the mmc_assume_removable symbol.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 1 +
 drivers/mmc/core/core.h  | 1 -
 drivers/mmc/core/mmc.c   | 2 +-
 drivers/mmc/core/sd.c    | 2 +-
 include/linux/mmc/host.h | 8 ++++++++
 5 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 09eee6df0653..ab4446c428be 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -58,6 +58,7 @@ int mmc_assume_removable;
 #else
 int mmc_assume_removable = 1;
 #endif
+EXPORT_SYMBOL(mmc_assume_removable);
 module_param_named(removable, mmc_assume_removable, bool, 0644);
 MODULE_PARM_DESC(
 	removable,
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 9d9eef50e5d1..a2ca770ca89b 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -58,7 +58,6 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr);
 
 /* Module parameters */
 extern int use_spi_crc;
-extern int mmc_assume_removable;
 
 /* Debugfs information for hosts and cards */
 void mmc_add_host_debugfs(struct mmc_host *host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 6909a54c39be..6570c03f9c76 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -685,7 +685,7 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
 {
 	const struct mmc_bus_ops *bus_ops;
 
-	if (host->caps & MMC_CAP_NONREMOVABLE || !mmc_assume_removable)
+	if (!mmc_card_is_removable(host))
 		bus_ops = &mmc_ops_unsafe;
 	else
 		bus_ops = &mmc_ops;
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 0f5241085557..bc745e1237bf 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -750,7 +750,7 @@ static void mmc_sd_attach_bus_ops(struct mmc_host *host)
 {
 	const struct mmc_bus_ops *bus_ops;
 
-	if (host->caps & MMC_CAP_NONREMOVABLE || !mmc_assume_removable)
+	if (!mmc_card_is_removable(host))
 		bus_ops = &mmc_sd_ops_unsafe;
 	else
 		bus_ops = &mmc_sd_ops;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ded401703762..2e0fe623df90 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -267,5 +267,13 @@ static inline void mmc_set_disable_delay(struct mmc_host *host,
 	host->disable_delay = disable_delay;
 }
 
+/* Module parameter */
+extern int mmc_assume_removable;
+
+static inline int mmc_card_is_removable(struct mmc_host *host)
+{
+	return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable;
+}
+
 #endif
 
-- 
cgit v1.2.3


From 88ae8b866488031b0e2fc05a27440fefec5e6927 Mon Sep 17 00:00:00 2001
From: Hein Tibosch <hein_tibosch@yahoo.es>
Date: Mon, 6 Sep 2010 09:37:19 +0800
Subject: mmc: Make ID freq configurable

In the latest releases of the mmc driver, the freq during initialization
is set to a fixed 400 Khz.  This was reportedly too fast for several
users.  As there doesn't seem to be an ideal frequency
which-works-for-all, Pierre suggested to let the driver try several
frequencies.

This patch implements that idea. It will try mmc-initialization using
several frequencies from an array 400, 300, 200 and 100.

In case SDIO is broken, it'll still try to detect SDMEM, also at different
freqs.

Signed-off-by: Hein Tibosch <hein_tibosch@yahoo.es>
Cc: Pierre Ossman <pierre@ossman.eu>
Reviewed-by: Chris Ball <cjb@laptop.org>
Tested-by: Chris Ball <cjb@laptop.org>
Cc: Ben Nizette <bn@niasdigital.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Adrian Hunter <adrian.hunter@nokia.com>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 102 ++++++++++++++++++++++++++---------------------
 include/linux/mmc/host.h |   1 +
 2 files changed, 58 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index ab4446c428be..222466df66ff 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -908,12 +908,7 @@ static void mmc_power_up(struct mmc_host *host)
 	 */
 	mmc_delay(10);
 
-	if (host->f_min > 400000) {
-		pr_warning("%s: Minimum clock frequency too high for "
-				"identification mode\n", mmc_hostname(host));
-		host->ios.clock = host->f_min;
-	} else
-		host->ios.clock = 400000;
+	host->ios.clock = host->f_init;
 
 	host->ios.power_mode = MMC_POWER_ON;
 	mmc_set_ios(host);
@@ -1405,6 +1400,8 @@ void mmc_rescan(struct work_struct *work)
 	u32 ocr;
 	int err;
 	unsigned long flags;
+	int i;
+	const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
 
 	spin_lock_irqsave(&host->lock, flags);
 
@@ -1444,55 +1441,70 @@ void mmc_rescan(struct work_struct *work)
 	if (host->ops->get_cd && host->ops->get_cd(host) == 0)
 		goto out;
 
-	mmc_claim_host(host);
+	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
+		mmc_claim_host(host);
 
-	mmc_power_up(host);
-	sdio_reset(host);
-	mmc_go_idle(host);
+		if (freqs[i] >= host->f_min)
+			host->f_init = freqs[i];
+		else if (!i || freqs[i-1] > host->f_min)
+			host->f_init = host->f_min;
+		else {
+			mmc_release_host(host);
+			goto out;
+		}
+		pr_info("%s: %s: trying to init card at %u Hz\n",
+			mmc_hostname(host), __func__, host->f_init);
 
-	mmc_send_if_cond(host, host->ocr_avail);
+		mmc_power_up(host);
+		sdio_reset(host);
+		mmc_go_idle(host);
 
-	/*
-	 * First we search for SDIO...
-	 */
-	err = mmc_send_io_op_cond(host, 0, &ocr);
-	if (!err) {
-		if (mmc_attach_sdio(host, ocr)) {
-			mmc_claim_host(host);
-			/* try SDMEM (but not MMC) even if SDIO is broken */
-			if (mmc_send_app_op_cond(host, 0, &ocr))
-				goto out_fail;
+		mmc_send_if_cond(host, host->ocr_avail);
 
+		/*
+		 * First we search for SDIO...
+		 */
+		err = mmc_send_io_op_cond(host, 0, &ocr);
+		if (!err) {
+			if (mmc_attach_sdio(host, ocr)) {
+				mmc_claim_host(host);
+				/*
+				 * Try SDMEM (but not MMC) even if SDIO
+				 * is broken.
+				 */
+				if (mmc_send_app_op_cond(host, 0, &ocr))
+					goto out_fail;
+
+				if (mmc_attach_sd(host, ocr))
+					mmc_power_off(host);
+			}
+			goto out;
+		}
+
+		/*
+		 * ...then normal SD...
+		 */
+		err = mmc_send_app_op_cond(host, 0, &ocr);
+		if (!err) {
 			if (mmc_attach_sd(host, ocr))
 				mmc_power_off(host);
+			goto out;
 		}
-		goto out;
-	}
 
-	/*
-	 * ...then normal SD...
-	 */
-	err = mmc_send_app_op_cond(host, 0, &ocr);
-	if (!err) {
-		if (mmc_attach_sd(host, ocr))
-			mmc_power_off(host);
-		goto out;
-	}
-
-	/*
-	 * ...and finally MMC.
-	 */
-	err = mmc_send_op_cond(host, 0, &ocr);
-	if (!err) {
-		if (mmc_attach_mmc(host, ocr))
-			mmc_power_off(host);
-		goto out;
-	}
+		/*
+		 * ...and finally MMC.
+		 */
+		err = mmc_send_op_cond(host, 0, &ocr);
+		if (!err) {
+			if (mmc_attach_mmc(host, ocr))
+				mmc_power_off(host);
+			goto out;
+		}
 
 out_fail:
-	mmc_release_host(host);
-	mmc_power_off(host);
-
+		mmc_release_host(host);
+		mmc_power_off(host);
+	}
 out:
 	if (host->caps & MMC_CAP_NEEDS_POLL)
 		mmc_schedule_delayed_work(&host->detect, HZ);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 2e0fe623df90..20be040649a9 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -123,6 +123,7 @@ struct mmc_host {
 	const struct mmc_host_ops *ops;
 	unsigned int		f_min;
 	unsigned int		f_max;
+	unsigned int		f_init;
 	u32			ocr_avail;
 	struct notifier_block	pm_notify;
 
-- 
cgit v1.2.3


From 99fc5131018cbdc3cf42ce09fb394a4e8b053c74 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 29 Sep 2010 01:08:27 -0400
Subject: mmc: Move regulator handling closer to core

After discovering a problem in regulator reference counting I took Mark
Brown's advice to move the reference count into the MMC core by making the
regulator status a member of struct mmc_host.

I took this opportunity to also implement NULL versions of
the regulator functions so as to rid the driver code from
some ugly #ifdef CONFIG_REGULATOR clauses.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Liam Girdwood <lrg@slimlogic.co.uk>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Adrian Hunter <adrian.hunter@nokia.com>
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: Sundar Iyer <sundar.iyer@stericsson.com>
Cc: Daniel Mack <daniel@caiaq.de>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Cliff Brake <cbrake@bec-systems.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c       | 26 ++++++++++++++++----------
 drivers/mmc/host/mmci.c       | 28 ++++++++++++++++++----------
 drivers/mmc/host/omap_hsmmc.c | 21 +++++++++++++--------
 drivers/mmc/host/pxamci.c     | 41 +++++++++++++++++++++++++++++++++--------
 include/linux/mmc/host.h      | 22 +++++++++++++++++++++-
 5 files changed, 101 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c5e3c9bf6fdd..46029d5c0364 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -772,8 +772,9 @@ EXPORT_SYMBOL(mmc_regulator_get_ocrmask);
 
 /**
  * mmc_regulator_set_ocr - set regulator to match host->ios voltage
- * @vdd_bit: zero for power off, else a bit number (host->ios.vdd)
+ * @mmc: the host to regulate
  * @supply: regulator to use
+ * @vdd_bit: zero for power off, else a bit number (host->ios.vdd)
  *
  * Returns zero on success, else negative errno.
  *
@@ -781,15 +782,12 @@ EXPORT_SYMBOL(mmc_regulator_get_ocrmask);
  * a particular supply voltage.  This would normally be called from the
  * set_ios() method.
  */
-int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit)
+int mmc_regulator_set_ocr(struct mmc_host *mmc,
+			struct regulator *supply,
+			unsigned short vdd_bit)
 {
 	int			result = 0;
 	int			min_uV, max_uV;
-	int			enabled;
-
-	enabled = regulator_is_enabled(supply);
-	if (enabled < 0)
-		return enabled;
 
 	if (vdd_bit) {
 		int		tmp;
@@ -820,17 +818,25 @@ int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit)
 		else
 			result = 0;
 
-		if (result == 0 && !enabled)
+		if (result == 0 && !mmc->regulator_enabled) {
 			result = regulator_enable(supply);
-	} else if (enabled) {
+			if (!result)
+				mmc->regulator_enabled = true;
+		}
+	} else if (mmc->regulator_enabled) {
 		result = regulator_disable(supply);
+		if (result == 0)
+			mmc->regulator_enabled = false;
 	}
 
+	if (result)
+		dev_err(mmc_dev(mmc),
+			"could not set regulator OCR (%d)\n", result);
 	return result;
 }
 EXPORT_SYMBOL(mmc_regulator_set_ocr);
 
-#endif
+#endif /* CONFIG_REGULATOR */
 
 /*
  * Mask off any voltages we don't support and select
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 5f2e72d38b5d..87b4fc6c98c2 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -523,19 +523,27 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct mmci_host *host = mmc_priv(mmc);
 	u32 pwr = 0;
 	unsigned long flags;
+	int ret;
 
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF:
-		if(host->vcc &&
-		   regulator_is_enabled(host->vcc))
-			regulator_disable(host->vcc);
+		if (host->vcc)
+			ret = mmc_regulator_set_ocr(mmc, host->vcc, 0);
 		break;
 	case MMC_POWER_UP:
-#ifdef CONFIG_REGULATOR
-		if (host->vcc)
-			/* This implicitly enables the regulator */
-			mmc_regulator_set_ocr(host->vcc, ios->vdd);
-#endif
+		if (host->vcc) {
+			ret = mmc_regulator_set_ocr(mmc, host->vcc, ios->vdd);
+			if (ret) {
+				dev_err(mmc_dev(mmc), "unable to set OCR\n");
+				/*
+				 * The .set_ios() function in the mmc_host_ops
+				 * struct return void, and failing to set the
+				 * power should be rare so we print an error
+				 * and return here.
+				 */
+				return;
+			}
+		}
 		if (host->plat->vdd_handler)
 			pwr |= host->plat->vdd_handler(mmc_dev(mmc), ios->vdd,
 						       ios->power_mode);
@@ -869,8 +877,8 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		clk_disable(host->clk);
 		clk_put(host->clk);
 
-		if (regulator_is_enabled(host->vcc))
-			regulator_disable(host->vcc);
+		if (host->vcc)
+			mmc_regulator_set_ocr(mmc, host->vcc, 0);
 		regulator_put(host->vcc);
 
 		mmc_free_host(mmc);
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 03c26e026506..8c863ccb1bfe 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -250,9 +250,9 @@ static int omap_hsmmc_1_set_power(struct device *dev, int slot, int power_on,
 		mmc_slot(host).before_set_reg(dev, slot, power_on, vdd);
 
 	if (power_on)
-		ret = mmc_regulator_set_ocr(host->vcc, vdd);
+		ret = mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
 	else
-		ret = mmc_regulator_set_ocr(host->vcc, 0);
+		ret = mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
 
 	if (mmc_slot(host).after_set_reg)
 		mmc_slot(host).after_set_reg(dev, slot, power_on, vdd);
@@ -291,18 +291,23 @@ static int omap_hsmmc_23_set_power(struct device *dev, int slot, int power_on,
 	 * chips/cards need an interface voltage rail too.
 	 */
 	if (power_on) {
-		ret = mmc_regulator_set_ocr(host->vcc, vdd);
+		ret = mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
 		/* Enable interface voltage rail, if needed */
 		if (ret == 0 && host->vcc_aux) {
 			ret = regulator_enable(host->vcc_aux);
 			if (ret < 0)
-				ret = mmc_regulator_set_ocr(host->vcc, 0);
+				ret = mmc_regulator_set_ocr(host->mmc,
+							host->vcc, 0);
 		}
 	} else {
+		/* Shut down the rail */
 		if (host->vcc_aux)
 			ret = regulator_disable(host->vcc_aux);
-		if (ret == 0)
-			ret = mmc_regulator_set_ocr(host->vcc, 0);
+		if (!ret) {
+			/* Then proceed to shut down the local regulator */
+			ret = mmc_regulator_set_ocr(host->mmc,
+						host->vcc, 0);
+		}
 	}
 
 	if (mmc_slot(host).after_set_reg)
@@ -343,9 +348,9 @@ static int omap_hsmmc_23_set_sleep(struct device *dev, int slot, int sleep,
 	if (cardsleep) {
 		/* VCC can be turned off if card is asleep */
 		if (sleep)
-			err = mmc_regulator_set_ocr(host->vcc, 0);
+			err = mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
 		else
-			err = mmc_regulator_set_ocr(host->vcc, vdd);
+			err = mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
 	} else
 		err = regulator_set_mode(host->vcc, mode);
 	if (err)
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index b7dfcac31e8a..7257738fd7da 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -99,14 +99,25 @@ static inline void pxamci_init_ocr(struct pxamci_host *host)
 	}
 }
 
-static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd)
+static inline int pxamci_set_power(struct pxamci_host *host,
+				    unsigned char power_mode,
+				    unsigned int vdd)
 {
 	int on;
 
-#ifdef CONFIG_REGULATOR
-	if (host->vcc)
-		mmc_regulator_set_ocr(host->vcc, vdd);
-#endif
+	if (host->vcc) {
+		int ret;
+
+		if (power_mode == MMC_POWER_UP) {
+			ret = mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
+			if (ret)
+				return ret;
+		} else if (power_mode == MMC_POWER_OFF) {
+			ret = mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
+			if (ret)
+				return ret;
+		}
+	}
 	if (!host->vcc && host->pdata &&
 	    gpio_is_valid(host->pdata->gpio_power)) {
 		on = ((1 << vdd) & host->pdata->ocr_mask);
@@ -115,6 +126,8 @@ static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd)
 	}
 	if (!host->vcc && host->pdata && host->pdata->setpower)
 		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+
+	return 0;
 }
 
 static void pxamci_stop_clock(struct pxamci_host *host)
@@ -490,9 +503,21 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	}
 
 	if (host->power_mode != ios->power_mode) {
+		int ret;
+
 		host->power_mode = ios->power_mode;
 
-		pxamci_set_power(host, ios->vdd);
+		ret = pxamci_set_power(host, ios->power_mode, ios->vdd);
+		if (ret) {
+			dev_err(mmc_dev(mmc), "unable to set power\n");
+			/*
+			 * The .set_ios() function in the mmc_host_ops
+			 * struct return void, and failing to set the
+			 * power should be rare so we print an error and
+			 * return here.
+			 */
+			return;
+		}
 
 		if (ios->power_mode == MMC_POWER_ON)
 			host->cmdat |= CMDAT_INIT;
@@ -503,8 +528,8 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	else
 		host->cmdat &= ~CMDAT_SD_4DAT;
 
-	pr_debug("PXAMCI: clkrt = %x cmdat = %x\n",
-		 host->clkrt, host->cmdat);
+	dev_dbg(mmc_dev(mmc), "PXAMCI: clkrt = %x cmdat = %x\n",
+		host->clkrt, host->cmdat);
 }
 
 static void pxamci_enable_sdio_irq(struct mmc_host *host, int enable)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 20be040649a9..ccac56ae1286 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -212,6 +212,10 @@ struct mmc_host {
 	struct led_trigger	*led;		/* activity led */
 #endif
 
+#ifdef CONFIG_REGULATOR
+	bool			regulator_enabled; /* regulator state */
+#endif
+
 	struct dentry		*debugfs_root;
 
 	unsigned long		private[0] ____cacheline_aligned;
@@ -250,8 +254,24 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 
 struct regulator;
 
+#ifdef CONFIG_REGULATOR
 int mmc_regulator_get_ocrmask(struct regulator *supply);
-int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
+int mmc_regulator_set_ocr(struct mmc_host *mmc,
+			struct regulator *supply,
+			unsigned short vdd_bit);
+#else
+static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
+{
+	return 0;
+}
+
+static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
+				 struct regulator *supply,
+				 unsigned short vdd_bit)
+{
+	return 0;
+}
+#endif
 
 int mmc_card_awake(struct mmc_host *host);
 int mmc_card_sleep(struct mmc_host *host);
-- 
cgit v1.2.3


From dfc13e8402c75e7c2e0a52e123c0500a3259866b Mon Sep 17 00:00:00 2001
From: Hanumath Prasad <hanumath.prasad@stericsson.com>
Date: Thu, 30 Sep 2010 17:37:23 -0400
Subject: mmc: MMC 4.4 DDR support

Add support for Dual Data Rate MMC cards as defined in the 4.4
specification.

Signed-off-by: Hanumath Prasad <hanumath.prasad@stericsson.com>
Cc: linux-mmc@vger.kernel.org
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Tested-by Zhangfei Gao <zhangfei.gao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 10 +++++++---
 drivers/mmc/core/mmc.c   | 37 +++++++++++++++++++++++++++++++++++--
 include/linux/mmc/card.h |  4 ++++
 include/linux/mmc/core.h |  1 +
 include/linux/mmc/host.h |  4 ++++
 include/linux/mmc/mmc.h  | 10 +++++++++-
 6 files changed, 60 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 759a105cb216..aab593480975 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -373,7 +373,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			readcmd = MMC_READ_SINGLE_BLOCK;
 			writecmd = MMC_WRITE_BLOCK;
 		}
-
+		if (mmc_card_ddr_mode(card))
+			brq.data.flags |= MMC_DDR_MODE;
 		if (rq_data_dir(req) == READ) {
 			brq.cmd.opcode = readcmd;
 			brq.data.flags |= MMC_DATA_READ;
@@ -655,8 +656,11 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 	struct mmc_command cmd;
 	int err;
 
-	/* Block-addressed cards ignore MMC_SET_BLOCKLEN. */
-	if (mmc_card_blockaddr(card))
+	/*
+	 * Block-addressed and ddr mode supported cards
+	 * ignore MMC_SET_BLOCKLEN.
+	 */
+	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
 		return 0;
 
 	mmc_claim_host(card->host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 6570c03f9c76..66c4a59fee5f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -258,6 +258,21 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 	}
 
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
+	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
+	     EXT_CSD_CARD_TYPE_26:
+		card->ext_csd.hs_max_dtr = 52000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_52;
+		break;
+	case EXT_CSD_CARD_TYPE_DDR_1_2V | EXT_CSD_CARD_TYPE_52 |
+	     EXT_CSD_CARD_TYPE_26:
+		card->ext_csd.hs_max_dtr = 52000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_1_2V;
+		break;
+	case EXT_CSD_CARD_TYPE_DDR_1_8V | EXT_CSD_CARD_TYPE_52 |
+	     EXT_CSD_CARD_TYPE_26:
+		card->ext_csd.hs_max_dtr = 52000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_1_8V;
+		break;
 	case EXT_CSD_CARD_TYPE_52 | EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
 		break;
@@ -502,6 +517,18 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 
 	mmc_set_clock(host, max_dtr);
 
+	/*
+	 * Activate DDR50 mode (if supported).
+	 */
+	if (mmc_card_highspeed(card)) {
+		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
+			&& (host->caps & (MMC_CAP_1_8V_DDR)))
+				mmc_card_set_ddr_mode(card);
+		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
+			&& (host->caps & (MMC_CAP_1_2V_DDR)))
+				mmc_card_set_ddr_mode(card);
+	}
+
 	/*
 	 * Activate wide bus (if supported).
 	 */
@@ -510,10 +537,16 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		unsigned ext_csd_bit, bus_width;
 
 		if (host->caps & MMC_CAP_8_BIT_DATA) {
-			ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
+			if (mmc_card_ddr_mode(card))
+				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_8;
+			else
+				ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
 			bus_width = MMC_BUS_WIDTH_8;
 		} else {
-			ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
+			if (mmc_card_ddr_mode(card))
+				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_4;
+			else
+				ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
 			bus_width = MMC_BUS_WIDTH_4;
 		}
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 7bd49234cd88..8ce082781ccb 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -48,6 +48,7 @@ struct mmc_ext_csd {
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
+	unsigned int		card_type;
 	unsigned int		hc_erase_size;		/* In sectors */
 	unsigned int		hc_erase_timeout;	/* In milliseconds */
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
@@ -113,6 +114,7 @@ struct mmc_card {
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
+#define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -154,11 +156,13 @@ struct mmc_card {
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
+#define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
+#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
 
 static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 {
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 7429033acb66..d0fbcacab52c 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -109,6 +109,7 @@ struct mmc_data {
 #define MMC_DATA_WRITE	(1 << 8)
 #define MMC_DATA_READ	(1 << 9)
 #define MMC_DATA_STREAM	(1 << 10)
+#define MMC_DDR_MODE	(1 << 11)
 
 	unsigned int		bytes_xfered;
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ccac56ae1286..6711eb8715ba 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -158,6 +158,10 @@ struct mmc_host {
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 #define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 #define MMC_CAP_ERASE		(1 << 10)	/* Allow erase/trim commands */
+#define MMC_CAP_1_8V_DDR	(1 << 11)	/* can support */
+						/* DDR mode at 1.8V */
+#define MMC_CAP_1_2V_DDR	(1 << 12)	/* can support */
+						/* DDR mode at 1.2V */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index dd11ae51fb68..956fbd877692 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -277,11 +277,19 @@ struct _mmc_csd {
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK	0x3	/* Mask out reserved and DDR bits */
+#define EXT_CSD_CARD_TYPE_MASK	0xF	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
+					     /* DDR mode @1.8V or 3V I/O */
+#define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
+					     /* DDR mode @1.2V I/O */
+#define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
+					| EXT_CSD_CARD_TYPE_DDR_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
 #define EXT_CSD_BUS_WIDTH_8	2	/* Card is in 8 bit mode */
+#define EXT_CSD_DDR_BUS_WIDTH_4	5	/* Card is in 4 bit DDR mode */
+#define EXT_CSD_DDR_BUS_WIDTH_8	6	/* Card is in 8 bit DDR mode */
 
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
-- 
cgit v1.2.3


From 0f8d8ea64ec7c77ca5beb59534d386fe0235961a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 24 Aug 2010 13:20:26 +0300
Subject: mmc: Fixes for Dual Data Rate (DDR) support

The DDR support patch needs the following fixes:

- The block driver does not need to know about DDR, any more
  than it needs to know about bus width.
- Not only the card must be switched to DDR mode.  The host
  controller must also be configured, which is done through
  the 'set_ios()' function.
- Do not set the DDR mode state until after the switch command
  is successful.
- Setting block length is not supported in DDR mode.  Make that
  a core function and change the other place it is used (mmc_test)
  also.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c    | 19 +++----------------
 drivers/mmc/card/mmc_test.c | 12 +-----------
 drivers/mmc/core/bus.c      |  6 ++++--
 drivers/mmc/core/core.c     | 28 ++++++++++++++++++++++++++--
 drivers/mmc/core/core.h     |  1 +
 drivers/mmc/core/mmc.c      | 21 +++++++++++----------
 include/linux/mmc/core.h    |  3 ++-
 include/linux/mmc/host.h    |  5 +++++
 8 files changed, 53 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index aab593480975..a9970504cabb 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -373,8 +373,6 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			readcmd = MMC_READ_SINGLE_BLOCK;
 			writecmd = MMC_WRITE_BLOCK;
 		}
-		if (mmc_card_ddr_mode(card))
-			brq.data.flags |= MMC_DDR_MODE;
 		if (rq_data_dir(req) == READ) {
 			brq.cmd.opcode = readcmd;
 			brq.data.flags |= MMC_DATA_READ;
@@ -653,26 +651,15 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 static int
 mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 {
-	struct mmc_command cmd;
 	int err;
 
-	/*
-	 * Block-addressed and ddr mode supported cards
-	 * ignore MMC_SET_BLOCKLEN.
-	 */
-	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
-		return 0;
-
 	mmc_claim_host(card->host);
-	cmd.opcode = MMC_SET_BLOCKLEN;
-	cmd.arg = 512;
-	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &cmd, 5);
+	err = mmc_set_blocklen(card, 512);
 	mmc_release_host(card->host);
 
 	if (err) {
-		printk(KERN_ERR "%s: unable to set block size to %d: %d\n",
-			md->disk->disk_name, cmd.arg, err);
+		printk(KERN_ERR "%s: unable to set block size to 512: %d\n",
+			md->disk->disk_name, err);
 		return -EINVAL;
 	}
 
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index c38a3a84a455..21adc27f4132 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -155,17 +155,7 @@ struct mmc_test_card {
  */
 static int mmc_test_set_blksize(struct mmc_test_card *test, unsigned size)
 {
-	struct mmc_command cmd;
-	int ret;
-
-	cmd.opcode = MMC_SET_BLOCKLEN;
-	cmd.arg = size;
-	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
-	ret = mmc_wait_for_cmd(test->card->host, &cmd, 0);
-	if (ret)
-		return ret;
-
-	return 0;
+	return mmc_set_blocklen(test->card, size);
 }
 
 /*
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index e70bd6641cee..da3c01b214ec 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -253,14 +253,16 @@ int mmc_add_card(struct mmc_card *card)
 	}
 
 	if (mmc_host_is_spi(card->host)) {
-		printk(KERN_INFO "%s: new %s%s card on SPI\n",
+		printk(KERN_INFO "%s: new %s%s%s card on SPI\n",
 			mmc_hostname(card->host),
 			mmc_card_highspeed(card) ? "high speed " : "",
+			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type);
 	} else {
-		printk(KERN_INFO "%s: new %s%s card at address %04x\n",
+		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_highspeed(card) ? "high speed " : "",
+			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 46029d5c0364..7cb352b3b247 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -651,14 +651,23 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 }
 
 /*
- * Change data bus width of a host.
+ * Change data bus width and DDR mode of a host.
  */
-void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
+void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, int ddr)
 {
 	host->ios.bus_width = width;
+	host->ios.ddr = ddr ? MMC_DDR_MODE : MMC_SDR_MODE;
 	mmc_set_ios(host);
 }
 
+/*
+ * Change data bus width of a host.
+ */
+void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
+{
+	mmc_set_bus_width_ddr(host, width, 0);
+}
+
 /**
  * mmc_vdd_to_ocrbitnum - Convert a voltage to the OCR bit number
  * @vdd:	voltage (mV)
@@ -1399,6 +1408,21 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 }
 EXPORT_SYMBOL(mmc_erase_group_aligned);
 
+int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
+{
+	struct mmc_command cmd;
+
+	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
+		return 0;
+
+	memset(&cmd, 0, sizeof(struct mmc_command));
+	cmd.opcode = MMC_SET_BLOCKLEN;
+	cmd.arg = blocklen;
+	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
+	return mmc_wait_for_cmd(card->host, &cmd, 5);
+}
+EXPORT_SYMBOL(mmc_set_blocklen);
+
 void mmc_rescan(struct work_struct *work)
 {
 	struct mmc_host *host =
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index a2ca770ca89b..13240d128a69 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -35,6 +35,7 @@ void mmc_set_chip_select(struct mmc_host *host, int mode);
 void mmc_set_clock(struct mmc_host *host, unsigned int hz);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
+void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
 
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 66c4a59fee5f..3ea58ce773ff 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err;
+	int err, ddr = 0;
 	u32 cid[4];
 	unsigned int max_dtr;
 
@@ -518,32 +518,32 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	mmc_set_clock(host, max_dtr);
 
 	/*
-	 * Activate DDR50 mode (if supported).
+	 * Indicate DDR mode (if supported).
 	 */
 	if (mmc_card_highspeed(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
 			&& (host->caps & (MMC_CAP_1_8V_DDR)))
-				mmc_card_set_ddr_mode(card);
+				ddr = 1;
 		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
 			&& (host->caps & (MMC_CAP_1_2V_DDR)))
-				mmc_card_set_ddr_mode(card);
+				ddr = 1;
 	}
 
 	/*
-	 * Activate wide bus (if supported).
+	 * Activate wide bus and DDR (if supported).
 	 */
 	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
 	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
 		unsigned ext_csd_bit, bus_width;
 
 		if (host->caps & MMC_CAP_8_BIT_DATA) {
-			if (mmc_card_ddr_mode(card))
+			if (ddr)
 				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_8;
 			else
 				ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
 			bus_width = MMC_BUS_WIDTH_8;
 		} else {
-			if (mmc_card_ddr_mode(card))
+			if (ddr)
 				ext_csd_bit = EXT_CSD_DDR_BUS_WIDTH_4;
 			else
 				ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
@@ -557,12 +557,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			goto free_card;
 
 		if (err) {
-			printk(KERN_WARNING "%s: switch to bus width %d "
+			printk(KERN_WARNING "%s: switch to bus width %d ddr %d "
 			       "failed\n", mmc_hostname(card->host),
-			       1 << bus_width);
+			       1 << bus_width, ddr);
 			err = 0;
 		} else {
-			mmc_set_bus_width(card->host, bus_width);
+			mmc_card_set_ddr_mode(card);
+			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
 		}
 	}
 
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index d0fbcacab52c..64e013f1cfb8 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -109,7 +109,6 @@ struct mmc_data {
 #define MMC_DATA_WRITE	(1 << 8)
 #define MMC_DATA_READ	(1 << 9)
 #define MMC_DATA_STREAM	(1 << 10)
-#define MMC_DDR_MODE	(1 << 11)
 
 	unsigned int		bytes_xfered;
 
@@ -154,6 +153,8 @@ extern int mmc_can_secure_erase_trim(struct mmc_card *card);
 extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 				   unsigned int nr);
 
+extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen);
+
 extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
 extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6711eb8715ba..c4fb1c5efc44 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -50,6 +50,11 @@ struct mmc_ios {
 #define MMC_TIMING_LEGACY	0
 #define MMC_TIMING_MMC_HS	1
 #define MMC_TIMING_SD_HS	2
+
+	unsigned char	ddr;			/* dual data rate used */
+
+#define MMC_SDR_MODE		0
+#define MMC_DDR_MODE		1
 };
 
 struct mmc_host_ops {
-- 
cgit v1.2.3


From 49e3b5a44f8abd33c8693edc575c6d06a210d778 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Mon, 11 Oct 2010 12:43:50 +0300
Subject: mmc: refine DDR support

One flaw with DDR support is that MMC core does not inform the driver
which DDR mode it has selected.  This patch expands the ios->ddr flag
to do that.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 7 ++++---
 drivers/mmc/core/core.h  | 3 ++-
 drivers/mmc/core/mmc.c   | 6 +++---
 include/linux/mmc/host.h | 3 ++-
 4 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7cb352b3b247..3eb7a9be6d8d 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -653,10 +653,11 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
 /*
  * Change data bus width and DDR mode of a host.
  */
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, int ddr)
+void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
+			   unsigned int ddr)
 {
 	host->ios.bus_width = width;
-	host->ios.ddr = ddr ? MMC_DDR_MODE : MMC_SDR_MODE;
+	host->ios.ddr = ddr;
 	mmc_set_ios(host);
 }
 
@@ -665,7 +666,7 @@ void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, int ddr)
  */
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 {
-	mmc_set_bus_width_ddr(host, width, 0);
+	mmc_set_bus_width_ddr(host, width, MMC_SDR_MODE);
 }
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 13240d128a69..a971b0667aad 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -35,7 +35,8 @@ void mmc_set_chip_select(struct mmc_host *host, int mode);
 void mmc_set_clock(struct mmc_host *host, unsigned int hz);
 void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode);
 void mmc_set_bus_width(struct mmc_host *host, unsigned int width);
-void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width, int ddr);
+void mmc_set_bus_width_ddr(struct mmc_host *host, unsigned int width,
+			   unsigned int ddr);
 u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
 
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 3ea58ce773ff..df2a817303b4 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = 0;
+	int err, ddr = MMC_SDR_MODE;
 	u32 cid[4];
 	unsigned int max_dtr;
 
@@ -523,10 +523,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	if (mmc_card_highspeed(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
 			&& (host->caps & (MMC_CAP_1_8V_DDR)))
-				ddr = 1;
+				ddr = MMC_1_8V_DDR_MODE;
 		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
 			&& (host->caps & (MMC_CAP_1_2V_DDR)))
-				ddr = 1;
+				ddr = MMC_1_2V_DDR_MODE;
 	}
 
 	/*
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c4fb1c5efc44..69ee1ebe4302 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -54,7 +54,8 @@ struct mmc_ios {
 	unsigned char	ddr;			/* dual data rate used */
 
 #define MMC_SDR_MODE		0
-#define MMC_DDR_MODE		1
+#define MMC_1_2V_DDR_MODE	1
+#define MMC_1_8V_DDR_MODE	2
 };
 
 struct mmc_host_ops {
-- 
cgit v1.2.3


From 1978fda85dfdb53623dddb4ec126163a61ab3933 Mon Sep 17 00:00:00 2001
From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Tue, 28 Sep 2010 10:41:29 +0200
Subject: mmc: sdhci: split up sdhci.h for sdhci-pltfm users

Some platforms based on sdhci-pltfm need to set their own quirks.
Previously to this patch, the quirks were in drivers/mmc/host/sdhci.h.

This patch splits drivers/mmc/host/sdhci.h into two parts:

* drivers/mmc/host/sdhci.h  includes the HC registers and I/O accessors.
* include/linux/mmc/sdhci.h includes the sdhci structure and quirks.

Instead of including drivers/mmc/host/sdhci.h, -pltfm drivers should
now include include/linux/mmc/sdhci.h and include/linux/sdhci-pltfm.h.

This patch avoids adding/changing the calls/flags in the
sdhci_pltfm_data structure.  It has been tested on STM platforms
(e.g. STx7106, STx7108, STx5206) where the driver is configured
and used as shown in the example below:

[snip]
static int mmc_pad_resources(struct sdhci_host *sdhci)
{
	if (!devm_stm_pad_claim(sdhci->mmc->parent,
				&stx7108_mmc_pad_config,
				dev_name(sdhci->mmc->parent)))
		return -ENODEV;

	return 0;
}

static struct sdhci_pltfm_data stx7108_mmc_platform_data = {
	.init = mmc_pad_resources,
	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
};

static struct platform_device stx7108_mmc_device = {
	.name = "sdhci",
[snip]

Note: drivers/mmc/host/sdhci.h now also includes linux/mmc/sdhci.h,
and no modifications should be needed on other sdhci-<XXX> drivers.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.h  | 138 +++-----------------------------------------
 include/linux/mmc/sdhci.h | 144 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 131 deletions(-)
 create mode 100644 include/linux/mmc/sdhci.h

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 112543ae36c9..410ee8aa04d4 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -1,6 +1,8 @@
 /*
  *  linux/drivers/mmc/host/sdhci.h - Secure Digital Host Controller Interface driver
  *
+ * Header file for Host Controller registers and I/O accessors.
+ *
  *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -8,14 +10,16 @@
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
  */
-#ifndef __SDHCI_H
-#define __SDHCI_H
+#ifndef __SDHCI_HW_H
+#define __SDHCI_HW_H
 
 #include <linux/scatterlist.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/io.h>
 
+#include <linux/mmc/sdhci.h>
+
 /*
  * Controller registers
  */
@@ -192,134 +196,6 @@
 #define SDHCI_MAX_DIV_SPEC_200	256
 #define SDHCI_MAX_DIV_SPEC_300	2046
 
-struct sdhci_ops;
-
-struct sdhci_host {
-	/* Data set by hardware interface driver */
-	const char		*hw_name;	/* Hardware bus name */
-
-	unsigned int		quirks;		/* Deviations from spec. */
-
-/* Controller doesn't honor resets unless we touch the clock register */
-#define SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1<<0)
-/* Controller has bad caps bits, but really supports DMA */
-#define SDHCI_QUIRK_FORCE_DMA				(1<<1)
-/* Controller doesn't like to be reset when there is no card inserted. */
-#define SDHCI_QUIRK_NO_CARD_NO_RESET			(1<<2)
-/* Controller doesn't like clearing the power reg before a change */
-#define SDHCI_QUIRK_SINGLE_POWER_WRITE			(1<<3)
-/* Controller has flaky internal state so reset it on each ios change */
-#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS		(1<<4)
-/* Controller has an unusable DMA engine */
-#define SDHCI_QUIRK_BROKEN_DMA				(1<<5)
-/* Controller has an unusable ADMA engine */
-#define SDHCI_QUIRK_BROKEN_ADMA				(1<<6)
-/* Controller can only DMA from 32-bit aligned addresses */
-#define SDHCI_QUIRK_32BIT_DMA_ADDR			(1<<7)
-/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_DMA_SIZE			(1<<8)
-/* Controller can only ADMA chunks that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_ADMA_SIZE			(1<<9)
-/* Controller needs to be reset after each request to stay stable */
-#define SDHCI_QUIRK_RESET_AFTER_REQUEST			(1<<10)
-/* Controller needs voltage and power writes to happen separately */
-#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER		(1<<11)
-/* Controller provides an incorrect timeout value for transfers */
-#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<12)
-/* Controller has an issue with buffer bits for small transfers */
-#define SDHCI_QUIRK_BROKEN_SMALL_PIO			(1<<13)
-/* Controller does not provide transfer-complete interrupt when not busy */
-#define SDHCI_QUIRK_NO_BUSY_IRQ				(1<<14)
-/* Controller has unreliable card detection */
-#define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
-/* Controller reports inverted write-protect state */
-#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
-/* Controller has nonstandard clock management */
-#define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
-/* Controller does not like fast PIO transfers */
-#define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
-/* Controller losing signal/interrupt enable states after reset */
-#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET		(1<<19)
-/* Controller has to be forced to use block size of 2048 bytes */
-#define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
-/* Controller cannot do multi-block transfers */
-#define SDHCI_QUIRK_NO_MULTIBLOCK			(1<<21)
-/* Controller can only handle 1-bit data transfers */
-#define SDHCI_QUIRK_FORCE_1_BIT_DATA			(1<<22)
-/* Controller needs 10ms delay between applying power and clock */
-#define SDHCI_QUIRK_DELAY_AFTER_POWER			(1<<23)
-/* Controller uses SDCLK instead of TMCLK for data timeouts */
-#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<24)
-/* Controller reports wrong base clock capability */
-#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN		(1<<25)
-/* Controller cannot support End Attribute in NOP ADMA descriptor */
-#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC		(1<<26)
-/* Controller is missing device caps. Use caps provided by host */
-#define SDHCI_QUIRK_MISSING_CAPS			(1<<27)
-/* Controller uses Auto CMD12 command to stop the transfer */
-#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12		(1<<28)
-/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
-#define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
-
-	int			irq;		/* Device IRQ */
-	void __iomem *		ioaddr;		/* Mapped address */
-
-	const struct sdhci_ops	*ops;		/* Low level hw interface */
-
-	struct regulator	*vmmc;		/* Power regulator */
-
-	/* Internal data */
-	struct mmc_host		*mmc;		/* MMC structure */
-	u64			dma_mask;	/* custom DMA mask */
-
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
-	struct led_classdev	led;		/* LED control */
-	char   led_name[32];
-#endif
-
-	spinlock_t		lock;		/* Mutex */
-
-	int			flags;		/* Host attributes */
-#define SDHCI_USE_SDMA		(1<<0)		/* Host is SDMA capable */
-#define SDHCI_USE_ADMA		(1<<1)		/* Host is ADMA capable */
-#define SDHCI_REQ_USE_DMA	(1<<2)		/* Use DMA for this req. */
-#define SDHCI_DEVICE_DEAD	(1<<3)		/* Device unresponsive */
-
-	unsigned int		version;	/* SDHCI spec. version */
-
-	unsigned int		max_clk;	/* Max possible freq (MHz) */
-	unsigned int		timeout_clk;	/* Timeout freq (KHz) */
-
-	unsigned int		clock;		/* Current clock (MHz) */
-	u8			pwr;		/* Current voltage */
-
-	struct mmc_request	*mrq;		/* Current request */
-	struct mmc_command	*cmd;		/* Current command */
-	struct mmc_data		*data;		/* Current data request */
-	unsigned int		data_early:1;	/* Data finished before cmd */
-
-	struct sg_mapping_iter	sg_miter;	/* SG state for PIO */
-	unsigned int		blocks;		/* remaining PIO blocks */
-
-	int			sg_count;	/* Mapped sg entries */
-
-	u8			*adma_desc;	/* ADMA descriptor table */
-	u8			*align_buffer;	/* Bounce buffer */
-
-	dma_addr_t		adma_addr;	/* Mapped ADMA descr. table */
-	dma_addr_t		align_addr;	/* Mapped bounce buffer */
-
-	struct tasklet_struct	card_tasklet;	/* Tasklet structures */
-	struct tasklet_struct	finish_tasklet;
-
-	struct timer_list	timer;		/* Timer for timeouts */
-
-	unsigned int		caps;		/* Alternative capabilities */
-
-	unsigned long		private[0] ____cacheline_aligned;
-};
-
-
 struct sdhci_ops {
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
 	u32		(*read_l)(struct sdhci_host *host, int reg);
@@ -440,4 +316,4 @@ extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state);
 extern int sdhci_resume_host(struct sdhci_host *host);
 #endif
 
-#endif /* __SDHCI_H */
+#endif /* __SDHCI_HW_H */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
new file mode 100644
index 000000000000..1fdc673f2396
--- /dev/null
+++ b/include/linux/mmc/sdhci.h
@@ -0,0 +1,144 @@
+/*
+ *  linux/include/linux/mmc/sdhci.h - Secure Digital Host Controller Interface
+ *
+ *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+#ifndef __SDHCI_H
+#define __SDHCI_H
+
+#include <linux/scatterlist.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+
+struct sdhci_host {
+	/* Data set by hardware interface driver */
+	const char *hw_name;	/* Hardware bus name */
+
+	unsigned int quirks;	/* Deviations from spec. */
+
+/* Controller doesn't honor resets unless we touch the clock register */
+#define SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1<<0)
+/* Controller has bad caps bits, but really supports DMA */
+#define SDHCI_QUIRK_FORCE_DMA				(1<<1)
+/* Controller doesn't like to be reset when there is no card inserted. */
+#define SDHCI_QUIRK_NO_CARD_NO_RESET			(1<<2)
+/* Controller doesn't like clearing the power reg before a change */
+#define SDHCI_QUIRK_SINGLE_POWER_WRITE			(1<<3)
+/* Controller has flaky internal state so reset it on each ios change */
+#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS		(1<<4)
+/* Controller has an unusable DMA engine */
+#define SDHCI_QUIRK_BROKEN_DMA				(1<<5)
+/* Controller has an unusable ADMA engine */
+#define SDHCI_QUIRK_BROKEN_ADMA				(1<<6)
+/* Controller can only DMA from 32-bit aligned addresses */
+#define SDHCI_QUIRK_32BIT_DMA_ADDR			(1<<7)
+/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_DMA_SIZE			(1<<8)
+/* Controller can only ADMA chunks that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_ADMA_SIZE			(1<<9)
+/* Controller needs to be reset after each request to stay stable */
+#define SDHCI_QUIRK_RESET_AFTER_REQUEST			(1<<10)
+/* Controller needs voltage and power writes to happen separately */
+#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER		(1<<11)
+/* Controller provides an incorrect timeout value for transfers */
+#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<12)
+/* Controller has an issue with buffer bits for small transfers */
+#define SDHCI_QUIRK_BROKEN_SMALL_PIO			(1<<13)
+/* Controller does not provide transfer-complete interrupt when not busy */
+#define SDHCI_QUIRK_NO_BUSY_IRQ				(1<<14)
+/* Controller has unreliable card detection */
+#define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
+/* Controller reports inverted write-protect state */
+#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
+/* Controller has nonstandard clock management */
+#define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
+/* Controller does not like fast PIO transfers */
+#define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
+/* Controller losing signal/interrupt enable states after reset */
+#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET		(1<<19)
+/* Controller has to be forced to use block size of 2048 bytes */
+#define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
+/* Controller cannot do multi-block transfers */
+#define SDHCI_QUIRK_NO_MULTIBLOCK			(1<<21)
+/* Controller can only handle 1-bit data transfers */
+#define SDHCI_QUIRK_FORCE_1_BIT_DATA			(1<<22)
+/* Controller needs 10ms delay between applying power and clock */
+#define SDHCI_QUIRK_DELAY_AFTER_POWER			(1<<23)
+/* Controller uses SDCLK instead of TMCLK for data timeouts */
+#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<24)
+/* Controller reports wrong base clock capability */
+#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN		(1<<25)
+/* Controller cannot support End Attribute in NOP ADMA descriptor */
+#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC		(1<<26)
+/* Controller is missing device caps. Use caps provided by host */
+#define SDHCI_QUIRK_MISSING_CAPS			(1<<27)
+/* Controller uses Auto CMD12 command to stop the transfer */
+#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12		(1<<28)
+/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
+#define SDHCI_QUIRK_NO_HISPD_BIT			(1<<29)
+
+	int irq;		/* Device IRQ */
+	void __iomem *ioaddr;	/* Mapped address */
+
+	const struct sdhci_ops *ops;	/* Low level hw interface */
+
+	struct regulator *vmmc;	/* Power regulator */
+
+	/* Internal data */
+	struct mmc_host *mmc;	/* MMC structure */
+	u64 dma_mask;		/* custom DMA mask */
+
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+	struct led_classdev led;	/* LED control */
+	char led_name[32];
+#endif
+
+	spinlock_t lock;	/* Mutex */
+
+	int flags;		/* Host attributes */
+#define SDHCI_USE_SDMA		(1<<0)	/* Host is SDMA capable */
+#define SDHCI_USE_ADMA		(1<<1)	/* Host is ADMA capable */
+#define SDHCI_REQ_USE_DMA	(1<<2)	/* Use DMA for this req. */
+#define SDHCI_DEVICE_DEAD	(1<<3)	/* Device unresponsive */
+
+	unsigned int version;	/* SDHCI spec. version */
+
+	unsigned int max_clk;	/* Max possible freq (MHz) */
+	unsigned int timeout_clk;	/* Timeout freq (KHz) */
+
+	unsigned int clock;	/* Current clock (MHz) */
+	u8 pwr;			/* Current voltage */
+
+	struct mmc_request *mrq;	/* Current request */
+	struct mmc_command *cmd;	/* Current command */
+	struct mmc_data *data;	/* Current data request */
+	unsigned int data_early:1;	/* Data finished before cmd */
+
+	struct sg_mapping_iter sg_miter;	/* SG state for PIO */
+	unsigned int blocks;	/* remaining PIO blocks */
+
+	int sg_count;		/* Mapped sg entries */
+
+	u8 *adma_desc;		/* ADMA descriptor table */
+	u8 *align_buffer;	/* Bounce buffer */
+
+	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
+	dma_addr_t align_addr;	/* Mapped bounce buffer */
+
+	struct tasklet_struct card_tasklet;	/* Tasklet structures */
+	struct tasklet_struct finish_tasklet;
+
+	struct timer_list timer;	/* Timer for timeouts */
+
+	unsigned int caps;	/* Alternative capabilities */
+
+	unsigned long private[0] ____cacheline_aligned;
+};
+#endif /* __SDHCI_H */
-- 
cgit v1.2.3


From 12ae637f081a7a05144af65802a7b492b9162660 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Sat, 2 Oct 2010 13:54:06 +0200
Subject: mmc: propagate power save/restore ops return value

Allow power save/restore and their relevant mmc_bus_ops handlers
exit with a return value.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 20 ++++++++++++++------
 drivers/mmc/core/core.h  |  4 ++--
 drivers/mmc/core/mmc.c   |  8 ++++++--
 drivers/mmc/core/sd.c    |  8 ++++++--
 include/linux/mmc/host.h |  4 ++--
 5 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 3eb7a9be6d8d..8f86d702e46e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1583,37 +1583,45 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_power_off(host);
 }
 
-void mmc_power_save_host(struct mmc_host *host)
+int mmc_power_save_host(struct mmc_host *host)
 {
+	int ret = 0;
+
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
 		mmc_bus_put(host);
-		return;
+		return -EINVAL;
 	}
 
 	if (host->bus_ops->power_save)
-		host->bus_ops->power_save(host);
+		ret = host->bus_ops->power_save(host);
 
 	mmc_bus_put(host);
 
 	mmc_power_off(host);
+
+	return ret;
 }
 EXPORT_SYMBOL(mmc_power_save_host);
 
-void mmc_power_restore_host(struct mmc_host *host)
+int mmc_power_restore_host(struct mmc_host *host)
 {
+	int ret;
+
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
 		mmc_bus_put(host);
-		return;
+		return -EINVAL;
 	}
 
 	mmc_power_up(host);
-	host->bus_ops->power_restore(host);
+	ret = host->bus_ops->power_restore(host);
 
 	mmc_bus_put(host);
+
+	return ret;
 }
 EXPORT_SYMBOL(mmc_power_restore_host);
 
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index a971b0667aad..77240cd11bcf 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -22,8 +22,8 @@ struct mmc_bus_ops {
 	void (*detect)(struct mmc_host *);
 	int (*suspend)(struct mmc_host *);
 	int (*resume)(struct mmc_host *);
-	void (*power_save)(struct mmc_host *);
-	void (*power_restore)(struct mmc_host *);
+	int (*power_save)(struct mmc_host *);
+	int (*power_restore)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index df2a817303b4..995261f7fd70 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -657,12 +657,16 @@ static int mmc_resume(struct mmc_host *host)
 	return err;
 }
 
-static void mmc_power_restore(struct mmc_host *host)
+static int mmc_power_restore(struct mmc_host *host)
 {
+	int ret;
+
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
-	mmc_init_card(host, host->ocr, host->card);
+	ret = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
+
+	return ret;
 }
 
 static int mmc_sleep(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index bc745e1237bf..49da4dffd28e 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -722,12 +722,16 @@ static int mmc_sd_resume(struct mmc_host *host)
 	return err;
 }
 
-static void mmc_sd_power_restore(struct mmc_host *host)
+static int mmc_sd_power_restore(struct mmc_host *host)
 {
+	int ret;
+
 	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
-	mmc_sd_init_card(host, host->ocr, host->card);
+	ret = mmc_sd_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
+
+	return ret;
 }
 
 static const struct mmc_bus_ops mmc_sd_ops = {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 69ee1ebe4302..6d87f68ce4b6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -250,8 +250,8 @@ static inline void *mmc_priv(struct mmc_host *host)
 extern int mmc_suspend_host(struct mmc_host *);
 extern int mmc_resume_host(struct mmc_host *);
 
-extern void mmc_power_save_host(struct mmc_host *host);
-extern void mmc_power_restore_host(struct mmc_host *host);
+extern int mmc_power_save_host(struct mmc_host *host);
+extern int mmc_power_restore_host(struct mmc_host *host);
 
 extern void mmc_detect_change(struct mmc_host *, unsigned long delay);
 extern void mmc_request_done(struct mmc_host *, struct mmc_request *);
-- 
cgit v1.2.3


From 292290524e54724cab78e7e79a27a3b9a8b11483 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Mon, 4 Oct 2010 15:24:52 +0100
Subject: mmc: sdhci: Intel Medfield support

Basic support for the Intel Medfield devices

Give them their own quirks as we will need to update this later.

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h      |  7 +++++++
 2 files changed, 55 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 4f5d6d00d338..ec4b81dd493f 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -145,6 +145,14 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
 			  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
+static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
+	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+};
+
+static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
+	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+};
+
 static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
 {
 	u8 scratch;
@@ -494,6 +502,46 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.driver_data	= (kernel_ulong_t)&sdhci_via,
 	},
 
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MFD_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_sd,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO1,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO2,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC0,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC1,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+	},
+
 	{	/* Generic SD host controller */
 		PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
 	},
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e4471b27c396..1196d429f182 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2420,6 +2420,13 @@
 #define PCI_DEVICE_ID_INTEL_82375	0x0482
 #define PCI_DEVICE_ID_INTEL_82424	0x0483
 #define PCI_DEVICE_ID_INTEL_82378	0x0484
+#define PCI_DEVICE_ID_INTEL_MRST_SD0	0x0807
+#define PCI_DEVICE_ID_INTEL_MRST_SD1	0x0808
+#define PCI_DEVICE_ID_INTEL_MFD_SD	0x0820
+#define PCI_DEVICE_ID_INTEL_MFD_SDIO1	0x0821
+#define PCI_DEVICE_ID_INTEL_MFD_SDIO2	0x0822
+#define PCI_DEVICE_ID_INTEL_MFD_EMMC0	0x0823
+#define PCI_DEVICE_ID_INTEL_MFD_EMMC1	0x0824
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
-- 
cgit v1.2.3


From d3b993dcc11cd291e6908ed02b9db99970220952 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 15 Oct 2010 12:21:00 +0200
Subject: mmc: sdhci-pltfm: move .h file into appropriate subdir
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make use of the include/linux/mmc directory.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Tested-by: Eric Bénard <eric@eukrea.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-cns3xxx.c |  2 +-
 drivers/mmc/host/sdhci-pltfm.c   |  2 +-
 drivers/mmc/host/sdhci-pltfm.h   |  2 +-
 include/linux/mmc/sdhci-pltfm.h  | 35 +++++++++++++++++++++++++++++++++++
 include/linux/sdhci-pltfm.h      | 35 -----------------------------------
 5 files changed, 38 insertions(+), 38 deletions(-)
 create mode 100644 include/linux/mmc/sdhci-pltfm.h
 delete mode 100644 include/linux/sdhci-pltfm.h

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index b7050b380d5f..9ebd1d7759dc 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -15,7 +15,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mmc/host.h>
-#include <linux/sdhci-pltfm.h>
+#include <linux/mmc/sdhci-pltfm.h>
 #include <mach/cns3xxx.h>
 #include "sdhci.h"
 #include "sdhci-pltfm.h"
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 730fdf54ecb0..685202be2778 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -30,7 +30,7 @@
 #include <linux/mmc/host.h>
 
 #include <linux/io.h>
-#include <linux/sdhci-pltfm.h>
+#include <linux/mmc/sdhci-pltfm.h>
 
 #include "sdhci.h"
 #include "sdhci-pltfm.h"
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 93a031973f62..562b92957f18 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -13,7 +13,7 @@
 
 #include <linux/clk.h>
 #include <linux/types.h>
-#include <linux/sdhci-pltfm.h>
+#include <linux/mmc/sdhci-pltfm.h>
 
 struct sdhci_pltfm_host {
 	struct clk *clk;
diff --git a/include/linux/mmc/sdhci-pltfm.h b/include/linux/mmc/sdhci-pltfm.h
new file mode 100644
index 000000000000..0239bd70241e
--- /dev/null
+++ b/include/linux/mmc/sdhci-pltfm.h
@@ -0,0 +1,35 @@
+/*
+ * Platform data declarations for the sdhci-pltfm driver.
+ *
+ * Copyright (c) 2010 MontaVista Software, LLC.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef _SDHCI_PLTFM_H
+#define _SDHCI_PLTFM_H
+
+struct sdhci_ops;
+struct sdhci_host;
+
+/**
+ * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks
+ * @ops: optional pointer to the platform-provided SDHCI ops
+ * @quirks: optional SDHCI quirks
+ * @init: optional hook that is called during device probe, before the
+ *        driver tries to access any SDHCI registers
+ * @exit: optional hook that is called during device removal
+ */
+struct sdhci_pltfm_data {
+	struct sdhci_ops *ops;
+	unsigned int quirks;
+	int (*init)(struct sdhci_host *host);
+	void (*exit)(struct sdhci_host *host);
+};
+
+#endif /* _SDHCI_PLTFM_H */
diff --git a/include/linux/sdhci-pltfm.h b/include/linux/sdhci-pltfm.h
deleted file mode 100644
index 0239bd70241e..000000000000
--- a/include/linux/sdhci-pltfm.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Platform data declarations for the sdhci-pltfm driver.
- *
- * Copyright (c) 2010 MontaVista Software, LLC.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#ifndef _SDHCI_PLTFM_H
-#define _SDHCI_PLTFM_H
-
-struct sdhci_ops;
-struct sdhci_host;
-
-/**
- * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks
- * @ops: optional pointer to the platform-provided SDHCI ops
- * @quirks: optional SDHCI quirks
- * @init: optional hook that is called during device probe, before the
- *        driver tries to access any SDHCI registers
- * @exit: optional hook that is called during device removal
- */
-struct sdhci_pltfm_data {
-	struct sdhci_ops *ops;
-	unsigned int quirks;
-	int (*init)(struct sdhci_host *host);
-	void (*exit)(struct sdhci_host *host);
-};
-
-#endif /* _SDHCI_PLTFM_H */
-- 
cgit v1.2.3


From 012994f4fa5fc7663b51fa921c85c0a352339b24 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 15 Oct 2010 12:21:02 +0200
Subject: mmc: sdhci_pltfm: pass more data on custom init call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The custom init call may need more data to perform its job, so we pass
it a pointer to pdata, too. Also, always use the platform_id specific
data even if platform_data is present. Doing that, platform_data can
additionally be parsed by init() for board-specific information (via
sdhci->mmc->parent).

(Note: the old behaviour was that you could override the platform_id
specific data with your own. However, one can still do this by using the
"sdhci" id instead of "sdhci-<something>".)

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Eric Bénard <eric@eukrea.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pltfm.c  | 8 +++++---
 include/linux/mmc/sdhci-pltfm.h | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 685202be2778..00e8a8ab638e 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -52,15 +52,17 @@ static struct sdhci_ops sdhci_pltfm_ops = {
 
 static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
 {
-	struct sdhci_pltfm_data *pdata = pdev->dev.platform_data;
 	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	struct sdhci_pltfm_data *pdata;
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct resource *iomem;
 	int ret;
 
-	if (!pdata && platid && platid->driver_data)
+	if (platid && platid->driver_data)
 		pdata = (void *)platid->driver_data;
+	else
+		pdata = pdev->dev.platform_data;
 
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iomem) {
@@ -109,7 +111,7 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
 	}
 
 	if (pdata && pdata->init) {
-		ret = pdata->init(host);
+		ret = pdata->init(host, pdata);
 		if (ret)
 			goto err_plat_init;
 	}
diff --git a/include/linux/mmc/sdhci-pltfm.h b/include/linux/mmc/sdhci-pltfm.h
index 0239bd70241e..548d59d404cb 100644
--- a/include/linux/mmc/sdhci-pltfm.h
+++ b/include/linux/mmc/sdhci-pltfm.h
@@ -28,7 +28,7 @@ struct sdhci_host;
 struct sdhci_pltfm_data {
 	struct sdhci_ops *ops;
 	unsigned int quirks;
-	int (*init)(struct sdhci_host *host);
+	int (*init)(struct sdhci_host *host, struct sdhci_pltfm_data *pdata);
 	void (*exit)(struct sdhci_host *host);
 };
 
-- 
cgit v1.2.3


From 64fb58092e025235dc7fdcebeffee1516c79464d Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 22 Oct 2010 20:23:31 -0300
Subject: [media] DigitalNow TinyTwin remote controller

Signed-off-by: Antti Palosaari <crope@iki.fi>
Cc: Renura Enterprises Pty Ltd <renura@digitalnow.com.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/IR/keymaps/Makefile                 |  1 +
 drivers/media/IR/keymaps/rc-digitalnow-tinytwin.c | 98 +++++++++++++++++++++++
 include/media/rc-map.h                            |  1 +
 3 files changed, 100 insertions(+)
 create mode 100644 drivers/media/IR/keymaps/rc-digitalnow-tinytwin.c

(limited to 'include')

diff --git a/drivers/media/IR/keymaps/Makefile b/drivers/media/IR/keymaps/Makefile
index 0b88b6563b66..3194d391bbd4 100644
--- a/drivers/media/IR/keymaps/Makefile
+++ b/drivers/media/IR/keymaps/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-cinergy.o \
 			rc-dib0700-nec.o \
 			rc-dib0700-rc5.o \
+			rc-digitalnow-tinytwin.o \
 			rc-digittrade.o \
 			rc-dm1105-nec.o \
 			rc-dntv-live-dvb-t.o \
diff --git a/drivers/media/IR/keymaps/rc-digitalnow-tinytwin.c b/drivers/media/IR/keymaps/rc-digitalnow-tinytwin.c
new file mode 100644
index 000000000000..63e469e2dd21
--- /dev/null
+++ b/drivers/media/IR/keymaps/rc-digitalnow-tinytwin.c
@@ -0,0 +1,98 @@
+/*
+ * DigitalNow TinyTwin remote controller keytable
+ *
+ * Copyright (C) 2010 Antti Palosaari <crope@iki.fi>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License along
+ *    with this program; if not, write to the Free Software Foundation, Inc.,
+ *    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <media/rc-map.h>
+
+static struct ir_scancode digitalnow_tinytwin[] = {
+	{ 0x0000, KEY_MUTE },            /* [symbol speaker] */
+	{ 0x0001, KEY_VOLUMEUP },
+	{ 0x0002, KEY_POWER2 },          /* TV [power button] */
+	{ 0x0003, KEY_2 },
+	{ 0x0004, KEY_3 },
+	{ 0x0005, KEY_4 },
+	{ 0x0006, KEY_6 },
+	{ 0x0007, KEY_7 },
+	{ 0x0008, KEY_8 },
+	{ 0x0009, KEY_NUMERIC_STAR },    /* [*] */
+	{ 0x000a, KEY_0 },
+	{ 0x000b, KEY_NUMERIC_POUND },   /* [#] */
+	{ 0x000c, KEY_RIGHT },           /* [right arrow] */
+	{ 0x000d, KEY_HOMEPAGE },        /* [symbol home] Start */
+	{ 0x000e, KEY_RED },             /* [red] Videos */
+	{ 0x0010, KEY_POWER },           /* PC [power button] */
+	{ 0x0011, KEY_YELLOW },          /* [yellow] Pictures */
+	{ 0x0012, KEY_DOWN },            /* [down arrow] */
+	{ 0x0013, KEY_GREEN },           /* [green] Music */
+	{ 0x0014, KEY_CYCLEWINDOWS },    /* BACK */
+	{ 0x0015, KEY_FAVORITES },       /* MORE */
+	{ 0x0016, KEY_UP },              /* [up arrow] */
+	{ 0x0017, KEY_LEFT },            /* [left arrow] */
+	{ 0x0018, KEY_OK },              /* OK */
+	{ 0x0019, KEY_BLUE },            /* [blue] MyTV */
+	{ 0x001a, KEY_REWIND },          /* REW [<<] */
+	{ 0x001b, KEY_PLAY },            /* PLAY */
+	{ 0x001c, KEY_5 },
+	{ 0x001d, KEY_9 },
+	{ 0x001e, KEY_VOLUMEDOWN },
+	{ 0x001f, KEY_1 },
+	{ 0x0040, KEY_STOP },            /* STOP */
+	{ 0x0042, KEY_PAUSE },           /* PAUSE */
+	{ 0x0043, KEY_SCREEN },          /* Aspect */
+	{ 0x0044, KEY_FORWARD },         /* FWD [>>] */
+	{ 0x0045, KEY_NEXT },            /* SKIP */
+	{ 0x0048, KEY_RECORD },          /* RECORD */
+	{ 0x0049, KEY_VIDEO },           /* RTV */
+	{ 0x004a, KEY_EPG },             /* Guide */
+	{ 0x004b, KEY_CHANNELUP },
+	{ 0x004c, KEY_HELP },            /* Help */
+	{ 0x004d, KEY_RADIO },           /* Radio */
+	{ 0x004f, KEY_CHANNELDOWN },
+	{ 0x0050, KEY_DVD },             /* DVD */
+	{ 0x0051, KEY_AUDIO },           /* Audio */
+	{ 0x0052, KEY_TITLE },           /* Title */
+	{ 0x0053, KEY_NEW },             /* [symbol PIP?] */
+	{ 0x0057, KEY_MENU },            /* Mouse */
+	{ 0x005a, KEY_PREVIOUS },        /* REPLAY */
+};
+
+static struct rc_keymap digitalnow_tinytwin_map = {
+	.map = {
+		.scan    = digitalnow_tinytwin,
+		.size    = ARRAY_SIZE(digitalnow_tinytwin),
+		.ir_type = IR_TYPE_NEC,
+		.name    = RC_MAP_DIGITALNOW_TINYTWIN,
+	}
+};
+
+static int __init init_rc_map_digitalnow_tinytwin(void)
+{
+	return ir_register_map(&digitalnow_tinytwin_map);
+}
+
+static void __exit exit_rc_map_digitalnow_tinytwin(void)
+{
+	ir_unregister_map(&digitalnow_tinytwin_map);
+}
+
+module_init(init_rc_map_digitalnow_tinytwin)
+module_exit(exit_rc_map_digitalnow_tinytwin)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 2241655570c1..25883cfc3118 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -76,6 +76,7 @@ void rc_map_init(void);
 #define RC_MAP_CINERGY                   "rc-cinergy"
 #define RC_MAP_DIB0700_NEC_TABLE         "rc-dib0700-nec"
 #define RC_MAP_DIB0700_RC5_TABLE         "rc-dib0700-rc5"
+#define RC_MAP_DIGITALNOW_TINYTWIN       "rc-digitalnow-tinytwin"
 #define RC_MAP_DIGITTRADE                "rc-digittrade"
 #define RC_MAP_DM1105_NEC                "rc-dm1105-nec"
 #define RC_MAP_DNTV_LIVE_DVBT_PRO        "rc-dntv-live-dvbt-pro"
-- 
cgit v1.2.3


From ba8e452a4fe64a51b74d43761e14d99f0666cc45 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 19 Oct 2010 19:58:49 -0400
Subject: SUNRPC: Add a helper function xdr_inline_peek

We sometimes need to be able to read ahead in an xdr_stream without
incrementing the current pointer position.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 8c1dcbb54d89..ab91d86565fd 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -201,6 +201,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e0725d9d8107..cd9e841e7492 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -572,6 +572,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
+/**
+ * xdr_inline_peek - Allow read-ahead in the XDR data stream
+ * @xdr: pointer to xdr_stream struct
+ * @nbytes: number of bytes of data to decode
+ *
+ * Check if the input buffer is long enough to enable us to decode
+ * 'nbytes' more bytes of data starting at the current position.
+ * If so return the current pointer without updating the current
+ * pointer position.
+ */
+__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+{
+	__be32 *p = xdr->p;
+	__be32 *q = p + XDR_QUADLEN(nbytes);
+
+	if (unlikely(q > xdr->end || q < p))
+		return NULL;
+	return p;
+}
+EXPORT_SYMBOL_GPL(xdr_inline_peek);
+
 /**
  * xdr_inline_decode - Retrieve non-page XDR data to decode
  * @xdr: pointer to xdr_stream struct
-- 
cgit v1.2.3


From babddc72a9468884ce1a23db3c3d54b0afa299f0 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Wed, 20 Oct 2010 15:44:29 -0400
Subject: NFS: decode_dirent should use an xdr_stream

Convert nfs*xdr.c to use an xdr stream in decode_dirent.  This will prevent a
kernel oops that has been occuring when reading a vmapped page.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 29 ++++++++++-----
 fs/nfs/internal.h       |  6 ++--
 fs/nfs/nfs2xdr.c        | 39 ++++++++++++++++++---
 fs/nfs/nfs3xdr.c        | 93 +++++++++++++++++++++++++++++++++++++++++++++----
 fs/nfs/nfs4_fs.h        |  2 +-
 fs/nfs/nfs4xdr.c        | 71 ++++++++++++++++++++++++++++---------
 include/linux/nfs_xdr.h |  2 +-
 7 files changed, 202 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fd30f185ec01..88cbcda76856 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -171,7 +171,7 @@ struct nfs_cache_array {
 
 #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
 
-typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int);
+typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -357,13 +357,11 @@ error:
 
 /* Fill in an entry based on the xdr code stored in desc->page */
 static
-int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, __be32 **ptr)
+int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
 {
-	__be32	*p = *ptr;
-	p = desc->decode(p, entry, desc->plus);
+	__be32 *p = desc->decode(stream, entry, desc->plus);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
-	*ptr = p;
 
 	entry->fattr->time_start = desc->timestamp;
 	entry->fattr->gencount = desc->gencount;
@@ -438,10 +436,23 @@ out:
 /* Perform conversion from xdr to cache array */
 static
 void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				struct page *xdr_page, struct page *page)
+				struct page *xdr_page, struct page *page, unsigned int buflen)
 {
+	struct xdr_stream stream;
+	struct xdr_buf buf;
 	__be32 *ptr = kmap(xdr_page);
-	while (xdr_decode(desc, entry, &ptr) == 0) {
+
+	buf.head->iov_base = xdr_page;
+	buf.head->iov_len = buflen;
+	buf.tail->iov_len = 0;
+	buf.page_base = 0;
+	buf.page_len = 0;
+	buf.buflen = buf.head->iov_len;
+	buf.len = buf.head->iov_len;
+
+	xdr_init_decode(&stream, &buf, ptr);
+
+	while (xdr_decode(desc, entry, &stream) == 0) {
 		if (nfs_readdir_add_to_array(entry, page) == -1)
 			break;
 		if (desc->plus == 1)
@@ -458,6 +469,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	struct file	*file = desc->file;
 	struct nfs_cache_array *array;
 	int status = 0;
+	unsigned int array_size = 1;
 
 	entry.prev_cookie = 0;
 	entry.cookie = *desc->dir_cookie;
@@ -476,9 +488,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out_release_array;
 	do {
 		status = nfs_readdir_xdr_filler(xdr_page, desc, &entry, file, inode);
+
 		if (status < 0)
 			break;
-		nfs_readdir_page_filler(desc, &entry, xdr_page, page);
+		nfs_readdir_page_filler(desc, &entry, xdr_page, page, array_size * PAGE_SIZE);
 	} while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
 
 	put_page(xdr_page);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c961bc92c107..74b015598a43 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -181,15 +181,15 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
+extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 79c74387a2fe..0210c752e743 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -500,25 +500,56 @@ err_unmap:
 	goto out;
 }
 
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("nfs: %s: prematurely hit end of receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
+
 __be32 *
-nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
 {
-	if (!*p++) {
-		if (!*p)
+	__be32 *p;
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	if (!ntohl(*p++)) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		if (!ntohl(*p++))
 			return ERR_PTR(-EAGAIN);
 		entry->eof = 1;
 		return ERR_PTR(-EBADCOOKIE);
 	}
 
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(!p))
+		goto out_overflow;
+
 	entry->ino	  = ntohl(*p++);
 	entry->len	  = ntohl(*p++);
+
+	p = xdr_inline_decode(xdr, entry->len + 4);
+	if (unlikely(!p))
+		goto out_overflow;
 	entry->name	  = (const char *) p;
 	p		 += XDR_QUADLEN(entry->len);
 	entry->prev_cookie	  = entry->cookie;
 	entry->cookie	  = ntohl(*p++);
-	entry->eof	  = !p[0] && p[1];
+
+	p = xdr_inline_peek(xdr, 8);
+	if (p != NULL)
+		entry->eof = !p[0] && p[1];
+	else
+		entry->eof = 0;
 
 	return p;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return ERR_PTR(-EIO);
 }
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 52b2fda66e63..d562c8d9d56e 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
 	[NF3FIFO] = S_IFIFO,
 };
 
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("nfs: %s: prematurely hit end of receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
+
 /*
  * Common NFS XDR functions as inlines
  */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
 	return NULL;
 }
 
+static inline __be32 *
+xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+	__be32 *p;
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	fh->size = ntohl(*p++);
+
+	if (fh->size <= NFS3_FHSIZE) {
+		p = xdr_inline_decode(xdr, fh->size);
+		if (unlikely(!p))
+			goto out_overflow;
+		memcpy(fh->data, p, fh->size);
+		return p + XDR_QUADLEN(fh->size);
+	}
+	return NULL;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return ERR_PTR(-EIO);
+}
+
 /*
  * Encode/decode time.
  */
@@ -240,6 +270,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
 	return p;
 }
 
+static inline __be32 *
+xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	if (ntohl(*p++)) {
+		p = xdr_inline_decode(xdr, 84);
+		if (unlikely(!p))
+			goto out_overflow;
+		p = xdr_decode_fattr(p, fattr);
+	}
+	return p;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return ERR_PTR(-EIO);
+}
+
 static inline __be32 *
 xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
 {
@@ -616,19 +666,33 @@ err_unmap:
 }
 
 __be32 *
-nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
 {
+	__be32 *p;
 	struct nfs_entry old = *entry;
 
-	if (!*p++) {
-		if (!*p)
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	if (!ntohl(*p++)) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		if (!ntohl(*p++))
 			return ERR_PTR(-EAGAIN);
 		entry->eof = 1;
 		return ERR_PTR(-EBADCOOKIE);
 	}
 
+	p = xdr_inline_decode(xdr, 12);
+	if (unlikely(!p))
+		goto out_overflow;
 	p = xdr_decode_hyper(p, &entry->ino);
 	entry->len  = ntohl(*p++);
+
+	p = xdr_inline_decode(xdr, entry->len + 8);
+	if (unlikely(!p))
+		goto out_overflow;
 	entry->name = (const char *) p;
 	p += XDR_QUADLEN(entry->len);
 	entry->prev_cookie = entry->cookie;
@@ -636,10 +700,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 
 	if (plus) {
 		entry->fattr->valid = 0;
-		p = xdr_decode_post_op_attr(p, entry->fattr);
+		p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
+		if (IS_ERR(p))
+			goto out_overflow_exit;
 		/* In fact, a post_op_fh3: */
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
 		if (*p++) {
-			p = xdr_decode_fhandle(p, entry->fh);
+			p = xdr_decode_fhandle_stream(xdr, entry->fh);
+			if (IS_ERR(p))
+				goto out_overflow_exit;
 			/* Ugh -- server reply was truncated */
 			if (p == NULL) {
 				dprintk("NFS: FH truncated\n");
@@ -650,8 +721,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
 	}
 
-	entry->eof = !p[0] && p[1];
+	p = xdr_inline_peek(xdr, 8);
+	if (p != NULL)
+		entry->eof = !p[0] && p[1];
+	else
+		entry->eof = 0;
+
 	return p;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+out_overflow_exit:
+	return ERR_PTR(-EIO);
 }
 
 /*
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d24a8e07b5e2..c58ea6377506 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -331,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6ea5c9392fe4..a4919e999354 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3950,13 +3950,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 	__be32 *p;
 	uint32_t namelen, type;
 
-	p = xdr_inline_decode(xdr, 32);
+	p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
 	if (unlikely(!p))
 		goto out_overflow;
-	p = xdr_decode_hyper(p, &offset);
+	p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
 	p = xdr_decode_hyper(p, &length);
-	type = be32_to_cpup(p++);
-	if (fl != NULL) {
+	type = be32_to_cpup(p++); /* 4 byte read */
+	if (fl != NULL) { /* manipulate file lock */
 		fl->fl_start = (loff_t)offset;
 		fl->fl_end = fl->fl_start + (loff_t)length - 1;
 		if (length == ~(uint64_t)0)
@@ -3966,9 +3966,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 			fl->fl_type = F_RDLCK;
 		fl->fl_pid = 0;
 	}
-	p = xdr_decode_hyper(p, &clientid);
-	namelen = be32_to_cpup(p);
-	p = xdr_inline_decode(xdr, namelen);
+	p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
+	namelen = be32_to_cpup(p); /* read 4 bytes */  /* have read all 32 bytes now */
+	p = xdr_inline_decode(xdr, namelen); /* variable size field */
 	if (likely(p))
 		return -NFS4ERR_DENIED;
 out_overflow:
@@ -5755,21 +5755,33 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
-
-	if (!*p++) {
-		if (!*p)
+	__be32 *p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	if (!ntohl(*p++)) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		if (!ntohl(*p++))
 			return ERR_PTR(-EAGAIN);
 		entry->eof = 1;
 		return ERR_PTR(-EBADCOOKIE);
 	}
 
+	p = xdr_inline_decode(xdr, 12);
+	if (unlikely(!p))
+		goto out_overflow;
 	entry->prev_cookie = entry->cookie;
 	p = xdr_decode_hyper(p, &entry->cookie);
 	entry->len = ntohl(*p++);
+
+	p = xdr_inline_decode(xdr, entry->len + 4);
+	if (unlikely(!p))
+		goto out_overflow;
 	entry->name = (const char *) p;
 	p += XDR_QUADLEN(entry->len);
 
@@ -5782,29 +5794,54 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 
 	len = ntohl(*p++);		/* bitmap length */
 	if (len-- > 0) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
 		bitmap[0] = ntohl(*p++);
 		if (len-- > 0) {
+			p = xdr_inline_decode(xdr, 4);
+			if (unlikely(!p))
+				goto out_overflow;
 			bitmap[1] = ntohl(*p++);
 			p += len;
 		}
 	}
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
 		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
 			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
 			/* Ignore the return value of rdattr_error for now */
-			p++;
-			len--;
+			p = xdr_inline_decode(xdr, 4);
+			if (unlikely(!p))
+				goto out_overflow;
 		}
-		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
+		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) {
+			p = xdr_inline_decode(xdr, 8);
+			if (unlikely(!p))
+				goto out_overflow;
 			xdr_decode_hyper(p, &entry->ino);
-		else if (bitmap[0] == FATTR4_WORD0_FILEID)
+		} else if (bitmap[0] == FATTR4_WORD0_FILEID) {
+			p = xdr_inline_decode(xdr, 8);
+			if (unlikely(!p))
+				goto out_overflow;
 			xdr_decode_hyper(p, &entry->ino);
-		p += len;
+		}
 	}
 
-	entry->eof = !p[0] && p[1];
+	p = xdr_inline_peek(xdr, 8);
+	if (p != NULL)
+		entry->eof = !p[0] && p[1];
+	else
+		entry->eof = 0;
+
 	return p;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return ERR_PTR(-EIO);
 }
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5772b2c2f063..ca0e8fd7feec 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1036,7 +1036,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	__be32 *(*decode_dirent)(__be32 *, struct nfs_entry *, int plus);
+	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-- 
cgit v1.2.3


From 56e4ebf877b6043c289bda32a5a7385b80c17dee Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Wed, 20 Oct 2010 15:44:37 -0400
Subject: NFS: readdir with vmapped pages

We can use vmapped pages to read more information from the network at once.
This will reduce the number of calls needed to complete a readdir.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
[trondmy: Added #include for linux/vmalloc.h> in fs/nfs/dir.c]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c         |  4 +--
 fs/nfs/dir.c            | 66 ++++++++++++++++++++++++++++++++++++++++---------
 fs/nfs/internal.h       |  6 +++++
 fs/nfs/nfs3proc.c       |  4 +--
 fs/nfs/nfs4proc.c       |  8 +++---
 fs/nfs/proc.c           |  4 +--
 include/linux/nfs_xdr.h |  2 +-
 7 files changed, 71 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5f01f42b3991..876ba8592706 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -903,8 +903,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
+		server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 88cbcda76856..5d7da3ad4e85 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,6 +33,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include "delegation.h"
 #include "iostat.h"
@@ -327,7 +328,7 @@ out:
 
 /* Fill a page with xdr information before transferring to the cache page */
 static
-int nfs_readdir_xdr_filler(struct page *xdr_page, nfs_readdir_descriptor_t *desc,
+int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 			struct nfs_entry *entry, struct file *file, struct inode *inode)
 {
 	struct rpc_cred	*cred = nfs_file_cred(file);
@@ -337,7 +338,7 @@ int nfs_readdir_xdr_filler(struct page *xdr_page, nfs_readdir_descriptor_t *desc
  again:
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
-	error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, xdr_page,
+	error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
 					  NFS_SERVER(inode)->dtsize, desc->plus);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
@@ -436,11 +437,11 @@ out:
 /* Perform conversion from xdr to cache array */
 static
 void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				struct page *xdr_page, struct page *page, unsigned int buflen)
+				void *xdr_page, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
 	struct xdr_buf buf;
-	__be32 *ptr = kmap(xdr_page);
+	__be32 *ptr = xdr_page;
 
 	buf.head->iov_base = xdr_page;
 	buf.head->iov_len = buflen;
@@ -458,18 +459,59 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e
 		if (desc->plus == 1)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
 	}
-	kunmap(xdr_page);
+}
+
+static
+void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
+{
+	unsigned int i;
+	for (i = 0; i < npages; i++)
+		put_page(pages[i]);
+}
+
+static
+void nfs_readdir_free_large_page(void *ptr, struct page **pages,
+		unsigned int npages)
+{
+	vm_unmap_ram(ptr, npages);
+	nfs_readdir_free_pagearray(pages, npages);
+}
+
+/*
+ * nfs_readdir_large_page will allocate pages that must be freed with a call
+ * to nfs_readdir_free_large_page
+ */
+static
+void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+{
+	void *ptr;
+	unsigned int i;
+
+	for (i = 0; i < npages; i++) {
+		struct page *page = alloc_page(GFP_KERNEL);
+		if (page == NULL)
+			goto out_freepages;
+		pages[i] = page;
+	}
+
+	ptr = vm_map_ram(pages, NFS_MAX_READDIR_PAGES, 0, PAGE_KERNEL);
+	if (!IS_ERR_OR_NULL(ptr))
+		return ptr;
+out_freepages:
+	nfs_readdir_free_pagearray(pages, i);
+	return NULL;
 }
 
 static
 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
 {
-	struct page *xdr_page;
+	struct page *pages[NFS_MAX_READDIR_PAGES];
+	void *pages_ptr = NULL;
 	struct nfs_entry entry;
 	struct file	*file = desc->file;
 	struct nfs_cache_array *array;
 	int status = 0;
-	unsigned int array_size = 1;
+	unsigned int array_size = ARRAY_SIZE(pages);
 
 	entry.prev_cookie = 0;
 	entry.cookie = *desc->dir_cookie;
@@ -483,18 +525,18 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
-	xdr_page = alloc_page(GFP_KERNEL);
-	if (!xdr_page)
+	pages_ptr = nfs_readdir_large_page(pages, array_size);
+	if (!pages_ptr)
 		goto out_release_array;
 	do {
-		status = nfs_readdir_xdr_filler(xdr_page, desc, &entry, file, inode);
+		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
 
 		if (status < 0)
 			break;
-		nfs_readdir_page_filler(desc, &entry, xdr_page, page, array_size * PAGE_SIZE);
+		nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
 	} while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
 
-	put_page(xdr_page);
+	nfs_readdir_free_large_page(pages_ptr, pages, array_size);
 out_release_array:
 	nfs_readdir_release_array(page);
 out:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 74b015598a43..7b0e894d00c8 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -62,6 +62,12 @@ struct nfs_clone_mount {
  */
 #define NFS_UNSPEC_PORT		(-1)
 
+/*
+ * Maximum number of pages that readdir can use for creating
+ * a vmapped array of pages.
+ */
+#define NFS_MAX_READDIR_PAGES 8
+
 /*
  * In-kernel mount arguments
  */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f8446b38dcb4..ce939c062a52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -630,7 +630,7 @@ out:
  */
 static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		  u64 cookie, struct page *page, unsigned int count, int plus)
+		  u64 cookie, struct page **pages, unsigned int count, int plus)
 {
 	struct inode		*dir = dentry->d_inode;
 	__be32			*verf = NFS_COOKIEVERF(dir);
@@ -640,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		.verf		= {verf[0], verf[1]},
 		.plus		= plus,
 		.count		= count,
-		.pages		= &page
+		.pages		= pages
 	};
 	struct nfs3_readdirres	res = {
 		.verf		= verf,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aa771c1af115..cb33c73206e3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2823,12 +2823,12 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                  u64 cookie, struct page *page, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, int plus)
 {
 	struct inode		*dir = dentry->d_inode;
 	struct nfs4_readdir_arg args = {
 		.fh = NFS_FH(dir),
-		.pages = &page,
+		.pages = pages,
 		.pgbase = 0,
 		.count = count,
 		.bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
@@ -2859,14 +2859,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 }
 
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                  u64 cookie, struct page *page, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, int plus)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
 				_nfs4_proc_readdir(dentry, cred, cookie,
-					page, count, plus),
+					pages, count, plus),
 				&exception);
 	} while (exception.retry);
 	return err;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e5e84aa2af17..58e7f84fc1fd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -534,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
  */
 static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		 u64 cookie, struct page *page, unsigned int count, int plus)
+		 u64 cookie, struct page **pages, unsigned int count, int plus)
 {
 	struct inode		*dir = dentry->d_inode;
 	struct nfs_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
 		.count		= count,
-		.pages		= &page,
+		.pages		= pages,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ca0e8fd7feec..1b9a17a1f235 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1026,7 +1026,7 @@ struct nfs_rpc_ops {
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-			    u64, struct page *, unsigned int, int);
+			    u64, struct page **, unsigned int, int);
 	int	(*mknod)   (struct inode *, struct dentry *, struct iattr *,
 			    dev_t);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
-- 
cgit v1.2.3


From 82f2e5472e2304e531c2fa85e457f4a71070044e Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 21 Oct 2010 16:33:18 -0400
Subject: NFS: Readdir plus in v4

By requsting more attributes during a readdir, we can mimic the readdir plus
operation that was in NFSv3.

To test, I ran the command `ls -lU --color=none` on directories with various
numbers of files.  Without readdir plus, I see this:

n files |    100    |   1,000   |  10,000   |  100,000  | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real    | 0m00.153s | 0m00.589s | 0m05.601s | 0m56.691s | 9m59.128s
user    | 0m00.007s | 0m00.007s | 0m00.077s | 0m00.703s | 0m06.800s
sys     | 0m00.010s | 0m00.070s | 0m00.633s | 0m06.423s | 1m10.005s
access  | 3         | 1         | 1         | 4         | 31
getattr | 2         | 1         | 1         | 1         | 1
lookup  | 104       | 1,003     | 10,003    | 100,003   | 1,000,003
readdir | 2         | 16        | 158       | 1,575     | 15,749
total   | 111       | 1,021     | 10,163    | 101,583   | 1,015,784

With readdir plus enabled, I see this:

n files |    100    |   1,000   |  10,000   |  100,000  | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real    | 0m00.115s | 0m00.206s | 0m01.079s | 0m12.521s | 2m07.528s
user    | 0m00.003s | 0m00.003s | 0m00.040s | 0m00.290s | 0m03.296s
sys     | 0m00.007s | 0m00.020s | 0m00.120s | 0m01.357s | 0m17.556s
access  | 3         | 1         | 1         | 1         | 7
getattr | 2         | 1         | 1         | 1         | 1
lookup  | 4         | 3         | 3         | 3         | 3
readdir | 6         | 62        | 630       | 6,300     | 62,993
total   | 15        | 67        | 635       | 6,305     | 63,004

Readdir plus disabled has about a 16x increase in the number of rpc calls and
is 4 - 5 times slower on large directories.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c         |  5 +++--
 fs/nfs/dir.c            |  4 ++--
 fs/nfs/internal.h       |  6 +++---
 fs/nfs/nfs2xdr.c        |  2 +-
 fs/nfs/nfs3xdr.c        |  2 +-
 fs/nfs/nfs4_fs.h        |  2 +-
 fs/nfs/nfs4proc.c       |  1 +
 fs/nfs/nfs4xdr.c        | 55 ++++++++++++++++++++++++-------------------------
 include/linux/nfs_xdr.h |  3 ++-
 9 files changed, 41 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 876ba8592706..da2f2f024a4d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1358,8 +1358,9 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
-		NFS_CAP_POSIX_LOCK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
+	if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+			server->caps |= NFS_CAP_READDIRPLUS;
 	server->options = data->options;
 
 	/* Get a client record */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0cbb714a09d8..2a768d05a534 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -172,7 +172,7 @@ struct nfs_cache_array {
 
 #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
 
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -360,7 +360,7 @@ error:
 static
 int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
 {
-	__be32 *p = desc->decode(stream, entry, desc->plus);
+	__be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b0e894d00c8..db08ff3ff454 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -187,15 +187,15 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
+extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 82f026422424..e6bf45710cc7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -454,7 +454,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
 }
 
 __be32 *
-nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
 	__be32 *p;
 	p = xdr_inline_decode(xdr, 4);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index dc98eb7976c3..31a44df40aea 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -590,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 }
 
 __be32 *
-nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
 	__be32 *p;
 	struct nfs_entry old = *entry;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c58ea6377506..9fa496387fdf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -331,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb33c73206e3..f5ab216e8870 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2832,6 +2832,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		.pgbase = 0,
 		.count = count,
 		.bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+		.plus = plus,
 	};
 	struct nfs4_readdir_res res;
 	struct rpc_message msg = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7eff205d3d8..ccfb1c92b262 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1385,12 +1385,20 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-	uint32_t attrs[2] = {
-		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
-		FATTR4_WORD1_MOUNTED_ON_FILEID,
-	};
+	uint32_t attrs[2] = {0, 0};
 	__be32 *p;
 
+	if (readdir->plus) {
+		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
+			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
+			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
+			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
+			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+	}
+	attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
+	attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+
 	p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
 	*p++ = cpu_to_be32(OP_READDIR);
 	p = xdr_encode_hyper(p, readdir->cookie);
@@ -1398,11 +1406,15 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	*p++ = cpu_to_be32(readdir->count >> 1);  /* We're not doing readdirplus */
 	*p++ = cpu_to_be32(readdir->count);
 	*p++ = cpu_to_be32(2);
-	/* Switch to mounted_on_fileid if the server supports it */
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		attrs[0] &= ~FATTR4_WORD0_FILEID;
-	else
-		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+
+	if (!readdir->plus) {
+		/* Switch to mounted_on_fileid if the server supports it */
+		if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+			attrs[0] &= ~FATTR4_WORD0_FILEID;
+		else
+			attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	}
+
 	*p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
 	*p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
 	hdr->nops++;
@@ -5768,7 +5780,8 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+			   struct nfs_server *server, int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
@@ -5824,24 +5837,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int
 		goto out_overflow;
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
-		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
-			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
-			/* Ignore the return value of rdattr_error for now */
-			p = xdr_inline_decode(xdr, 4);
-			if (unlikely(!p))
-				goto out_overflow;
-		}
-		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) {
-			p = xdr_inline_decode(xdr, 8);
-			if (unlikely(!p))
-				goto out_overflow;
-			xdr_decode_hyper(p, &entry->ino);
-		} else if (bitmap[0] == FATTR4_WORD0_FILEID) {
-			p = xdr_inline_decode(xdr, 8);
-			if (unlikely(!p))
-				goto out_overflow;
-			xdr_decode_hyper(p, &entry->ino);
-		}
+		if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+			goto out_overflow;
+		if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+			entry->ino = entry->fattr->fileid;
 	}
 
 	p = xdr_inline_peek(xdr, 8);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 1b9a17a1f235..efe2eab8ac94 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -778,6 +778,7 @@ struct nfs4_readdir_arg {
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
+	int				plus;
 	struct nfs4_sequence_args	seq_args;
 };
 
@@ -1036,7 +1037,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int plus);
+	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-- 
cgit v1.2.3


From d0d68b8693bd16bfbbc93b89f1d9f3351723307c Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 4 Oct 2010 12:11:34 +0000
Subject: IB/mlx4: Signal node desc changes to SM by using FW to generate trap
 144

The Node Description cannot be changed via MADs (it is read-only).
Until now, it was changed in the driver via sysfs, and the new Node
Description was simply inserted by the driver into MAD responses
(replacing the description returned by FW).

System startup scripts use the sysfs interface to change the node
description at driver startup to show the hostname, etc. However, this
has a race condition: the SM could discover the original FW node
description rather than the system-specific description if it queried the
port before the startup scripts finish running.

For mlx4, we fix this with a new FW command (SET_NODE) that allows
passing the new node description to FW.  When this command is invoked,
FW sends a trap 144 to the SM.  When it gets this trap, the SM can
query the node to obtain the new node description -- thus eliminating
the effects of the race.

This patch simply calls SET_NODE command when a new node description
is entered via sysfs (thus causing trap 144 to be issued by the FW).
We ignore all failures of the SET_NODE command (including those caused
by using a device FW that predates the SET_NODE command), since in
that case things work just as before.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c | 28 +++++++++++++++++++++++-----
 include/linux/mlx4/cmd.h          |  1 +
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 62e8cd6f0371..ac6951d99336 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -272,14 +272,32 @@ out:
 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 				 struct ib_device_modify *props)
 {
+	struct mlx4_cmd_mailbox *mailbox;
+
 	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 		return -EOPNOTSUPP;
 
-	if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
-		spin_lock(&to_mdev(ibdev)->sm_lock);
-		memcpy(ibdev->node_desc, props->node_desc, 64);
-		spin_unlock(&to_mdev(ibdev)->sm_lock);
-	}
+	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
+		return 0;
+
+	spin_lock(&to_mdev(ibdev)->sm_lock);
+	memcpy(ibdev->node_desc, props->node_desc, 64);
+	spin_unlock(&to_mdev(ibdev)->sm_lock);
+
+	/*
+	 * If possible, pass node desc to FW, so it can generate
+	 * a 144 trap.  If cmd fails, just ignore.
+	 */
+	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
+	if (IS_ERR(mailbox))
+		return 0;
+
+	memset(mailbox->buf, 0, 256);
+	memcpy(mailbox->buf, props->node_desc, 64);
+	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
+		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
+
+	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
 
 	return 0;
 }
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 0f82293a82ed..2731266e73a7 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -57,6 +57,7 @@ enum {
 	MLX4_CMD_QUERY_PORT	 = 0x43,
 	MLX4_CMD_SENSE_PORT	 = 0x4d,
 	MLX4_CMD_SET_PORT	 = 0xc,
+	MLX4_CMD_SET_NODE	 = 0x5a,
 	MLX4_CMD_ACCESS_DDR	 = 0x2e,
 	MLX4_CMD_MAP_ICM	 = 0xffa,
 	MLX4_CMD_UNMAP_ICM	 = 0xff9,
-- 
cgit v1.2.3


From 2a342ed57756ad5d8af5456959433884367e5ab2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:48 +0200
Subject: KVM: PPC: Implement hypervisor interface

To communicate with KVM directly we need to plumb some sort of interface
between the guest and KVM. Usually those interfaces use hypercalls.

This hypercall implementation is described in the last patch of the series
in a special documentation file. Please read that for further information.

This patch implements stubs to handle KVM PPC hypercalls on the host and
guest side alike.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h | 114 +++++++++++++++++++++++++++++++++++-
 arch/powerpc/include/asm/kvm_ppc.h  |   1 +
 arch/powerpc/kernel/Makefile        |   2 +
 arch/powerpc/kernel/kvm.c           |  68 +++++++++++++++++++++
 arch/powerpc/kvm/book3s.c           |   9 ++-
 arch/powerpc/kvm/booke.c            |  10 +++-
 arch/powerpc/kvm/powerpc.c          |  32 ++++++++++
 include/linux/kvm_para.h            |   1 +
 8 files changed, 233 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/kernel/kvm.c

(limited to 'include')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index e402999ba193..556fd59ee0f1 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -21,6 +21,7 @@
 #define __POWERPC_KVM_PARA_H__
 
 #include <linux/types.h>
+#include <linux/of.h>
 
 struct kvm_vcpu_arch_shared {
 	__u64 sprg0;
@@ -34,16 +35,127 @@ struct kvm_vcpu_arch_shared {
 	__u32 dsisr;
 };
 
+#define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
+#define HC_VENDOR_KVM		(42 << 16)
+#define HC_EV_SUCCESS		0
+#define HC_EV_UNIMPLEMENTED	12
+
 #ifdef __KERNEL__
 
+#ifdef CONFIG_KVM_GUEST
+
+static inline int kvm_para_available(void)
+{
+	struct device_node *hyper_node;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+		return 0;
+
+	return 1;
+}
+
+extern unsigned long kvm_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr);
+
+#else
+
 static inline int kvm_para_available(void)
 {
 	return 0;
 }
 
+static unsigned long kvm_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr)
+{
+	return HC_EV_UNIMPLEMENTED;
+}
+
+#endif
+
+static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+	unsigned long r;
+
+	r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	*r2 = out[0];
+
+	return r;
+}
+
+static inline long kvm_hypercall0(unsigned int nr)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
+				  unsigned long p2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3,
+				  unsigned long p4)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	in[3] = p4;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+
 static inline unsigned int kvm_arch_para_features(void)
 {
-	return 0;
+	unsigned long r;
+
+	if (!kvm_para_available())
+		return 0;
+
+	if(kvm_hypercall0_1(KVM_HC_FEATURES, &r))
+		return 0;
+
+	return r;
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 18d139ec2d22..ecb3bc74c344 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
+extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 
 /*
  * Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1dda70129141..3a6955dc7191 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -127,6 +127,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 obj-y				+= ppc_save_regs.o
 endif
 
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+
 # Disable GCOV in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 GCOV_PROFILE_ftrace.o := n
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 000000000000..4f85505e4653
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/reg.h>
+#include <asm/kvm_ppc.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+
+unsigned long kvm_hypercall(unsigned long *in,
+			    unsigned long *out,
+			    unsigned long nr)
+{
+	unsigned long register r0 asm("r0");
+	unsigned long register r3 asm("r3") = in[0];
+	unsigned long register r4 asm("r4") = in[1];
+	unsigned long register r5 asm("r5") = in[2];
+	unsigned long register r6 asm("r6") = in[3];
+	unsigned long register r7 asm("r7") = in[4];
+	unsigned long register r8 asm("r8") = in[5];
+	unsigned long register r9 asm("r9") = in[6];
+	unsigned long register r10 asm("r10") = in[7];
+	unsigned long register r11 asm("r11") = nr;
+	unsigned long register r12 asm("r12");
+
+	asm volatile("bl	kvm_hypercall_start"
+		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+		       "=r"(r12)
+		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+		       "r"(r9), "r"(r10), "r"(r11)
+		     : "memory", "cc", "xer", "ctr", "lr");
+
+	out[0] = r4;
+	out[1] = r5;
+	out[2] = r6;
+	out[3] = r7;
+	out[4] = r8;
+	out[5] = r9;
+	out[6] = r10;
+	out[7] = r11;
+
+	return r3;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index cfd7fe5c3a62..5cb5f0d9381f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -947,10 +947,10 @@ program_interrupt:
 		break;
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
-		// XXX make user settable
 		if (vcpu->arch.osi_enabled &&
 		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
 		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+			/* MOL hypercalls */
 			u64 *gprs = run->osi.gprs;
 			int i;
 
@@ -959,8 +959,13 @@ program_interrupt:
 				gprs[i] = kvmppc_get_gpr(vcpu, i);
 			vcpu->arch.osi_needed = 1;
 			r = RESUME_HOST_NV;
-
+		} else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
 		} else {
+			/* Guest syscalls */
 			vcpu->stat.syscall_exits++;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b2c8c423c4d5..13e0747178e3 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -338,7 +338,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
+		} else {
+			/* Guest syscalls */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		}
 		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
 		r = RESUME_GUEST;
 		break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 22f6fa2982f2..a4cf4b47e232 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -42,6 +42,38 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	       !!(v->arch.pending_exceptions);
 }
 
+int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
+{
+	int nr = kvmppc_get_gpr(vcpu, 11);
+	int r;
+	unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
+	unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
+	unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
+	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
+	unsigned long r2 = 0;
+
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		/* 32 bit mode */
+		param1 &= 0xffffffff;
+		param2 &= 0xffffffff;
+		param3 &= 0xffffffff;
+		param4 &= 0xffffffff;
+	}
+
+	switch (nr) {
+	case HC_VENDOR_KVM | KVM_HC_FEATURES:
+		r = HC_EV_SUCCESS;
+
+		/* Second return value is in r4 */
+		kvmppc_set_gpr(vcpu, 4, r2);
+		break;
+	default:
+		r = HC_EV_UNIMPLEMENTED;
+		break;
+	}
+
+	return r;
+}
 
 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index d73109243fda..3b8080e1843f 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -17,6 +17,7 @@
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
+#define KVM_HC_FEATURES			3
 
 /*
  * hypercalls use architecture specific
-- 
cgit v1.2.3


From beb03f14da9ceff76ff08cbb8af064b52dc21f7e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:53 +0200
Subject: KVM: PPC: First magic page steps

We will be introducing a method to project the shared page in guest context.
As soon as we're talking about this coupling, the shared page is colled magic
page.

This patch introduces simple defines, so the follow-up patches are easier to
read.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 include/linux/kvm_para.h            | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1674da8134cb..e1da77579e65 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -287,6 +287,8 @@ struct kvm_vcpu_arch {
 	u64 dec_jiffies;
 	unsigned long pending_exceptions;
 	struct kvm_vcpu_arch_shared *shared;
+	unsigned long magic_page_pa; /* phys addr to map the magic page to */
+	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 
 #ifdef CONFIG_PPC_BOOK3S
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 3b8080e1843f..ac2015a25012 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -18,6 +18,7 @@
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
 #define KVM_HC_FEATURES			3
+#define KVM_HC_PPC_MAP_MAGIC_PAGE	4
 
 /*
  * hypercalls use architecture specific
-- 
cgit v1.2.3


From ba492962363a02c45836be205f339be48093e1be Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:56 +0200
Subject: KVM: Move kvm_guest_init out of generic code

Currently x86 is the only architecture that uses kvm_guest_init(). With
PowerPC we're getting a second user, but the signature is different there
and we don't need to export it, as it uses the normal kernel init framework.

So let's move the x86 specific definition of that function over to the x86
specfic header file.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_para.h | 6 ++++++
 include/linux/kvm_para.h        | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 05eba5e9a8e8..7b562b6184bc 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -158,6 +158,12 @@ static inline unsigned int kvm_arch_para_features(void)
 	return cpuid_eax(KVM_CPUID_FEATURES);
 }
 
+#ifdef CONFIG_KVM_GUEST
+void __init kvm_guest_init(void);
+#else
+#define kvm_guest_init() do { } while (0)
 #endif
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_X86_KVM_PARA_H */
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index ac2015a25012..47a070b0520e 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -26,11 +26,6 @@
 #include <asm/kvm_para.h>
 
 #ifdef __KERNEL__
-#ifdef CONFIG_KVM_GUEST
-void __init kvm_guest_init(void);
-#else
-#define kvm_guest_init() do { } while (0)
-#endif
 
 static inline int kvm_para_has_feature(unsigned int feature)
 {
-- 
cgit v1.2.3


From 15711e9c927bfc08e66791cbf0ca7887c0880768 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:08 +0200
Subject: KVM: PPC: Add get_pvinfo interface to query hypercall instructions

We need to tell the guest the opcodes that make up a hypercall through
interfaces that are controlled by userspace. So we need to add a call
for userspace to allow it to query those opcodes so it can pass them
on.

This is required because the hypercall opcodes can change based on
the hypervisor conditions. If we're running in hardware accelerated
hypervisor mode, a hypercall looks different from when we're running
without hardware acceleration.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/kvm/api.txt  | 23 +++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/kvm.h        | 11 +++++++++++
 3 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 5f5b64982b1a..44d9893f9db1 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1032,6 +1032,29 @@ are defined as follows:
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+4.46 KVM_PPC_GET_PVINFO
+
+Capability: KVM_CAP_PPC_GET_PVINFO
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_pvinfo (out)
+Returns: 0 on success, !0 on error
+
+struct kvm_ppc_pvinfo {
+	__u32 flags;
+	__u32 hcall[4];
+	__u8  pad[108];
+};
+
+This ioctl fetches PV specific information that need to be passed to the guest
+using the device tree or other means from vm context.
+
+For now the only implemented piece of information distributed here is an array
+of 4 instructions that make up a hypercall.
+
+If any additional field gets added to this structure later on, a bit for that
+additional piece of information will be set in the flags bitmap.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fecfe043458d..6a53a3f86dae 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
+	case KVM_CAP_PPC_GET_PVINFO:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -578,16 +579,53 @@ out:
 	return r;
 }
 
+static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
+{
+	u32 inst_lis = 0x3c000000;
+	u32 inst_ori = 0x60000000;
+	u32 inst_nop = 0x60000000;
+	u32 inst_sc = 0x44000002;
+	u32 inst_imm_mask = 0xffff;
+
+	/*
+	 * The hypercall to get into KVM from within guest context is as
+	 * follows:
+	 *
+	 *    lis r0, r0, KVM_SC_MAGIC_R0@h
+	 *    ori r0, KVM_SC_MAGIC_R0@l
+	 *    sc
+	 *    nop
+	 */
+	pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
+	pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
+	pvinfo->hcall[2] = inst_sc;
+	pvinfo->hcall[3] = inst_nop;
+
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
 	long r;
 
 	switch (ioctl) {
+	case KVM_PPC_GET_PVINFO: {
+		struct kvm_ppc_pvinfo pvinfo;
+		r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
+		if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
 
+out:
 	return r;
 }
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 636fc381c897..37077045970b 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -414,6 +414,14 @@ struct kvm_enable_cap {
 	__u8  pad[64];
 };
 
+/* for KVM_PPC_GET_PVINFO */
+struct kvm_ppc_pvinfo {
+	/* out */
+	__u32 flags;
+	__u32 hcall[4];
+	__u8  pad[108];
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -530,6 +538,7 @@ struct kvm_enable_cap {
 #ifdef __KVM_HAVE_XCRS
 #define KVM_CAP_XCRS 56
 #endif
+#define KVM_CAP_PPC_GET_PVINFO 57
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -664,6 +673,8 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
+/* Available with KVM_CAP_PPC_GET_PVINFO */
+#define KVM_PPC_GET_PVINFO	  _IOW(KVMIO,  0xa1, struct kvm_ppc_pvinfo)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From 887c08ac191efb103e33e589aacbc2ce1a3f131e Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Sun, 22 Aug 2010 19:10:28 +0800
Subject: KVM: MMU: introduce hva_to_pfn_atomic function

Introduce hva_to_pfn_atomic(), it's the fast path and can used in atomic
context, the later patch will use it

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  7 +++++++
 virt/kvm/kvm_main.c      | 30 +++++++++++++++++++-----------
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c13cc48697aa..307d0e2c0f59 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -296,6 +296,7 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
+pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
@@ -518,6 +519,12 @@ static inline void kvm_guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
+static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+					       gfn_t gfn)
+{
+	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index da117a6b1e2e..08bd304f8bc7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -927,11 +927,6 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return memslot - slots->memslots;
 }
 
-static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
-{
-	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
-}
-
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
@@ -943,19 +938,25 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
 {
 	struct page *page[1];
 	int npages;
 	pfn_t pfn;
 
-	might_sleep();
-
-	npages = get_user_pages_fast(addr, 1, 1, page);
+	if (atomic)
+		npages = __get_user_pages_fast(addr, 1, 1, page);
+	else {
+		might_sleep();
+		npages = get_user_pages_fast(addr, 1, 1, page);
+	}
 
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		if (atomic)
+			goto return_fault_page;
+
 		down_read(&current->mm->mmap_sem);
 		if (is_hwpoison_address(addr)) {
 			up_read(&current->mm->mmap_sem);
@@ -968,6 +969,7 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 		if (vma == NULL || addr < vma->vm_start ||
 		    !(vma->vm_flags & VM_PFNMAP)) {
 			up_read(&current->mm->mmap_sem);
+return_fault_page:
 			get_page(fault_page);
 			return page_to_pfn(fault_page);
 		}
@@ -981,6 +983,12 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 	return pfn;
 }
 
+pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+{
+	return hva_to_pfn(kvm, addr, true);
+}
+EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
+
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned long addr;
@@ -991,7 +999,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr);
+	return hva_to_pfn(kvm, addr, false);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
@@ -999,7 +1007,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(kvm, addr);
+	return hva_to_pfn(kvm, addr, false);
 }
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
-- 
cgit v1.2.3


From 48987781eb1d1e8ded41f55cd5806615fda92c6e Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Sun, 22 Aug 2010 19:11:43 +0800
Subject: KVM: MMU: introduce gfn_to_page_many_atomic() function

Introduce this function to get consecutive gfn's pages, it can reduce
gup's overload, used by later patch

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  3 +++
 virt/kvm/kvm_main.c      | 29 ++++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 307d0e2c0f59..b837ec80885d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -289,6 +289,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 
+int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
+			    int nr_pages);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 08bd304f8bc7..2eb0b7500a2a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -927,15 +927,25 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return memslot - slots->memslots;
 }
 
-unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+static unsigned long gfn_to_hva_many(struct kvm *kvm, gfn_t gfn,
+				     gfn_t *nr_pages)
 {
 	struct kvm_memory_slot *slot;
 
 	slot = gfn_to_memslot(kvm, gfn);
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
+
+	if (nr_pages)
+		*nr_pages = slot->npages - (gfn - slot->base_gfn);
+
 	return gfn_to_hva_memslot(slot, gfn);
 }
+
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn_to_hva_many(kvm, gfn, NULL);
+}
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
@@ -1010,6 +1020,23 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 	return hva_to_pfn(kvm, addr, false);
 }
 
+int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
+								  int nr_pages)
+{
+	unsigned long addr;
+	gfn_t entry;
+
+	addr = gfn_to_hva_many(kvm, gfn, &entry);
+	if (kvm_is_error_hva(addr))
+		return -1;
+
+	if (entry < nr_pages)
+		return 0;
+
+	return __get_user_pages_fast(addr, nr_pages, 1, pages);
+}
+EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	pfn_t pfn;
-- 
cgit v1.2.3


From 365fb3fdf6769d3553999d8eb6cc2a8c56c747c1 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Sat, 28 Aug 2010 19:24:13 +0800
Subject: KVM: MMU: rewrite audit_mappings_page() function

There is a bugs in this function, we call gfn_to_pfn() and kvm_mmu_gva_to_gpa_read() in
atomic context(kvm_mmu_audit() is called under the spinlock(mmu_lock)'s protection).

This patch fix it by:
- introduce gfn_to_pfn_atomic instead of gfn_to_pfn
- get the mapping gfn from kvm_mmu_page_get_gfn()

And it adds 'notrap' ptes check in unsync/direct sps

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       | 75 ++++++++++++++++++++++++++----------------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 15 ++++++++--
 3 files changed, 54 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 68575dc32ec7..0d91f60af1a8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3487,15 +3487,6 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
 
 static const char *audit_msg;
 
-static gva_t canonicalize(gva_t gva)
-{
-#ifdef CONFIG_X86_64
-	gva = (long long)(gva << 16) >> 16;
-#endif
-	return gva;
-}
-
-
 typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep);
 
 static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -3550,39 +3541,53 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 	gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
-		u64 ent = pt[i];
+		u64 *sptep = pt + i;
+		struct kvm_mmu_page *sp;
+		gfn_t gfn;
+		pfn_t pfn;
+		hpa_t hpa;
 
-		if (ent == shadow_trap_nonpresent_pte)
-			continue;
+		sp = page_header(__pa(sptep));
 
-		va = canonicalize(va);
-		if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
-			audit_mappings_page(vcpu, ent, va, level - 1);
-		else {
-			gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
-			gfn_t gfn = gpa >> PAGE_SHIFT;
-			pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
-			hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
+		if (sp->unsync) {
+			if (level != PT_PAGE_TABLE_LEVEL) {
+				printk(KERN_ERR "audit: (%s) error: unsync sp: %p level = %d\n",
+						audit_msg, sp, level);
+				return;
+			}
 
-			if (is_error_pfn(pfn)) {
-				kvm_release_pfn_clean(pfn);
-				continue;
+			if (*sptep == shadow_notrap_nonpresent_pte) {
+				printk(KERN_ERR "audit: (%s) error: notrap spte in unsync sp: %p\n",
+						audit_msg, sp);
+				return;
 			}
+		}
 
-			if (is_shadow_present_pte(ent)
-			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
-				printk(KERN_ERR "xx audit error: (%s) levels %d"
-				       " gva %lx gpa %llx hpa %llx ent %llx %d\n",
-				       audit_msg, vcpu->arch.mmu.root_level,
-				       va, gpa, hpa, ent,
-				       is_shadow_present_pte(ent));
-			else if (ent == shadow_notrap_nonpresent_pte
-				 && !is_error_hpa(hpa))
-				printk(KERN_ERR "audit: (%s) notrap shadow,"
-				       " valid guest gva %lx\n", audit_msg, va);
-			kvm_release_pfn_clean(pfn);
+		if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) {
+			printk(KERN_ERR "audit: (%s) error: notrap spte in direct sp: %p\n",
+					audit_msg, sp);
+			return;
+		}
+
+		if (!is_shadow_present_pte(*sptep) ||
+		      !is_last_spte(*sptep, level))
+			return;
+
+		gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
+		pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
 
+		if (is_error_pfn(pfn)) {
+			kvm_release_pfn_clean(pfn);
+			return;
 		}
+
+		hpa =  pfn << PAGE_SHIFT;
+
+		if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
+			printk(KERN_ERR "xx audit error: (%s) levels %d"
+					   " gva %lx pfn %llx hpa %llx ent %llxn",
+					   audit_msg, vcpu->arch.mmu.root_level,
+					   va, pfn, hpa, *sptep);
 	}
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b837ec80885d..f2ecdd52032b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -300,6 +300,7 @@ void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2eb0b7500a2a..c7a57b4feb39 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -999,7 +999,7 @@ pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
 {
 	unsigned long addr;
 
@@ -1009,7 +1009,18 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr, false);
+	return hva_to_pfn(kvm, addr, atomic);
+}
+
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+{
+	return __gfn_to_pfn(kvm, gfn, true);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+	return __gfn_to_pfn(kvm, gfn, false);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
-- 
cgit v1.2.3


From 7b4203e8cb5c5d9bc49da62b7a6fa4ba876a1b3f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Aug 2010 13:50:45 +0200
Subject: KVM: PPC: Expose level based interrupt cap

Now that we have all the level interrupt magic in place, let's
expose the capability to user space, so it can make use of it!

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/powerpc.c | 1 +
 include/linux/kvm.h        | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 028891c0baff..2f87a1627f6c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -192,6 +192,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_UNSET_IRQ:
+	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 37077045970b..919ae53adc5c 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -539,6 +539,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_XCRS 56
 #endif
 #define KVM_CAP_PPC_GET_PVINFO 57
+#define KVM_CAP_PPC_IRQ_LEVEL 58
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From c30a358d33e0e111f06e54a4a4125371e6b6693c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 10 Sep 2010 17:30:48 +0200
Subject: KVM: MMU: Add infrastructure for two-level page walker

This patch introduces a mmu-callback to translate gpa
addresses in the walk_addr code. This is later used to
translate l2_gpa addresses into l1_gpa addresses.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/x86.c              | 6 ++++++
 include/linux/kvm_host.h        | 5 +++++
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3fde5b322534..4915b7c8f2ec 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -243,6 +243,7 @@ struct kvm_mmu {
 	void (*free)(struct kvm_vcpu *vcpu);
 	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
 			    u32 *error);
+	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
 	void (*prefetch_page)(struct kvm_vcpu *vcpu,
 			      struct kvm_mmu_page *page);
 	int (*sync_page)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48b74d2fbfb7..2364c2cad891 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3448,6 +3448,11 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
 	kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
+static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+{
+	return gpa;
+}
+
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
 	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
@@ -5659,6 +5664,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.mmu.translate_gpa = translate_gpa;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f2ecdd52032b..917e68ff5ed2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -534,6 +534,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline gfn_t gpa_to_gfn(gpa_t gpa)
+{
+	return (gfn_t)(gpa >> PAGE_SHIFT);
+}
+
 static inline hpa_t pfn_to_hpa(pfn_t pfn)
 {
 	return (hpa_t)pfn << PAGE_SHIFT;
-- 
cgit v1.2.3


From 3842d135ff246b6543f1df77f5600e12094a6845 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 27 Jul 2010 12:30:24 +0300
Subject: KVM: Check for pending events before attempting injection

Instead of blindly attempting to inject an event before each guest entry,
check for a possible event first in vcpu->requests.  Sites that can trigger
event injection are modified to set KVM_REQ_EVENT:

- interrupt, nmi window opening
- ppr updates
- i8259 output changes
- local apic irr changes
- rflags updates
- gif flag set
- event set on exit

This improves non-injecting entry performance, and sets the stage for
non-atomic injection.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c     |  1 +
 arch/x86/kvm/lapic.c     | 13 +++++++++++--
 arch/x86/kvm/svm.c       |  8 +++++++-
 arch/x86/kvm/vmx.c       |  6 ++++++
 arch/x86/kvm/x86.c       | 41 ++++++++++++++++++++++++++++++++---------
 include/linux/kvm_host.h |  1 +
 6 files changed, 58 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 6e77471951e8..ab1bb8ff9a8d 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -67,6 +67,7 @@ static void pic_unlock(struct kvm_pic *s)
 		if (!found)
 			return;
 
+		kvm_make_request(KVM_REQ_EVENT, found);
 		kvm_vcpu_kick(found);
 	}
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 77d8c0f4817d..c6f2f159384a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -259,9 +259,10 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
-	u32 tpr, isrv, ppr;
+	u32 tpr, isrv, ppr, old_ppr;
 	int isr;
 
+	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
 	tpr = apic_get_reg(apic, APIC_TASKPRI);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
@@ -274,7 +275,10 @@ static void apic_update_ppr(struct kvm_lapic *apic)
 	apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
 		   apic, ppr, isr, isrv);
 
-	apic_set_reg(apic, APIC_PROCPRI, ppr);
+	if (old_ppr != ppr) {
+		apic_set_reg(apic, APIC_PROCPRI, ppr);
+		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+	}
 }
 
 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
@@ -391,6 +395,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			break;
 		}
 
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		kvm_vcpu_kick(vcpu);
 		break;
 
@@ -416,6 +421,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 				       "INIT on a runnable vcpu %d\n",
 				       vcpu->vcpu_id);
 			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		} else {
 			apic_debug("Ignoring de-assert INIT to vcpu %d\n",
@@ -430,6 +436,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			result = 1;
 			vcpu->arch.sipi_vector = vector;
 			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		}
 		break;
@@ -475,6 +482,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_EDGE_TRIG;
 	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -1152,6 +1160,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0f4da07f987..1d2ea65d3537 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2371,6 +2371,7 @@ static int stgi_interception(struct vcpu_svm *svm)
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
 	skip_emulated_instruction(&svm->vcpu);
+	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
 	enable_gif(svm);
 
@@ -2763,6 +2764,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 {
 	struct kvm_run *kvm_run = svm->vcpu.run;
 
+	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	/*
@@ -3209,8 +3211,10 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 
 	svm->int3_injected = 0;
 
-	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK) {
 		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+	}
 
 	svm->vcpu.arch.nmi_injected = false;
 	kvm_clear_exception_queue(&svm->vcpu);
@@ -3219,6 +3223,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
 		return;
 
+	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
 	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
 	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a7691a87178..2ce2e0b13edb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3327,6 +3327,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
 
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
 {
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 1;
 }
 
@@ -3339,6 +3340,8 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	++vcpu->stat.irq_window_exits;
 
 	/*
@@ -3595,6 +3598,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	return 1;
 }
@@ -3828,6 +3832,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	if (!idtv_info_valid)
 		return;
 
+	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ff0a8ff275c..e7198036db61 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -284,6 +284,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 	u32 prev_nr;
 	int class1, class2;
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	if (!vcpu->arch.exception.pending) {
 	queue:
 		vcpu->arch.exception.pending = true;
@@ -356,6 +358,7 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	vcpu->arch.nmi_pending = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
@@ -2418,6 +2421,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 
 	kvm_queue_interrupt(vcpu, irq->irq, false);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	return 0;
 }
@@ -2571,6 +2575,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
 		vcpu->arch.sipi_vector = events->sipi_vector;
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return 0;
 }
 
@@ -4329,6 +4335,7 @@ done:
 
 	toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
 	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
 
@@ -4998,6 +5005,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	int r;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
+	bool req_event;
 
 	if (vcpu->requests) {
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
@@ -5045,8 +5053,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	local_irq_disable();
 
+	req_event = kvm_check_request(KVM_REQ_EVENT, vcpu);
+
 	if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
 	    || need_resched() || signal_pending(current)) {
+		if (req_event)
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
 		atomic_set(&vcpu->guest_mode, 0);
 		smp_wmb();
 		local_irq_enable();
@@ -5055,17 +5067,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	inject_pending_event(vcpu);
+	if (req_event || req_int_win) {
+		inject_pending_event(vcpu);
 
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		kvm_x86_ops->enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		kvm_x86_ops->enable_irq_window(vcpu);
+		/* enable NMI/IRQ window open exits if needed */
+		if (vcpu->arch.nmi_pending)
+			kvm_x86_ops->enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+			kvm_x86_ops->enable_irq_window(vcpu);
 
-	if (kvm_lapic_enabled(vcpu)) {
-		update_cr8_intercept(vcpu);
-		kvm_lapic_sync_to_vapic(vcpu);
+		if (kvm_lapic_enabled(vcpu)) {
+			update_cr8_intercept(vcpu);
+			kvm_lapic_sync_to_vapic(vcpu);
+		}
 	}
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -5305,6 +5319,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	vcpu->arch.exception.pending = false;
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return 0;
 }
 
@@ -5368,6 +5384,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
 	vcpu->arch.mp_state = mp_state->mp_state;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 0;
 }
 
@@ -5389,6 +5406,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
 	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
 	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -5459,6 +5477,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	    !is_protmode(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return 0;
 }
 
@@ -5691,6 +5711,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.dr6 = DR6_FIXED_1;
 	vcpu->arch.dr7 = DR7_FIXED_1;
 
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -6001,6 +6023,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 	    kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
 		rflags |= X86_EFLAGS_TF;
 	kvm_x86_ops->set_rflags(vcpu, rflags);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_rflags);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 917e68ff5ed2..6022da1490e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -39,6 +39,7 @@
 #define KVM_REQ_KVMCLOCK_UPDATE    8
 #define KVM_REQ_KICK               9
 #define KVM_REQ_DEACTIVATE_FPU    10
+#define KVM_REQ_EVENT             11
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From 34c238a1d1832d7b1f655641f52782e86396b30a Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zamsden@redhat.com>
Date: Sat, 18 Sep 2010 14:38:14 -1000
Subject: KVM: x86: Rename timer function

This just changes some names to better reflect the usage they
will be given.  Separated out to keep confusion to a minimum.

Signed-off-by: Zachary Amsden <zamsden@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 12 ++++++------
 include/linux/kvm_host.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6666af840190..ce57cd899a62 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -892,7 +892,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 	/*
 	 * The guest calculates current wall clock time by adding
-	 * system time (updated by kvm_write_guest_time below) to the
+	 * system time (updated by kvm_guest_time_update below) to the
 	 * wall clock specified here.  guest system time equals host
 	 * system time for us, thus we must fill in host boot time here.
 	 */
@@ -1032,7 +1032,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
-static int kvm_write_guest_time(struct kvm_vcpu *v)
+static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
@@ -1052,7 +1052,7 @@ static int kvm_write_guest_time(struct kvm_vcpu *v)
 	local_irq_restore(flags);
 
 	if (unlikely(this_tsc_khz == 0)) {
-		kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
 		return 1;
 	}
 
@@ -1128,7 +1128,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v)
 
 	if (!vcpu->time_page)
 		return 0;
-	kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
+	kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
 	return 1;
 }
 
@@ -5041,8 +5041,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_mmu_unload(vcpu);
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
 			__kvm_migrate_timers(vcpu);
-		if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) {
-			r = kvm_write_guest_time(vcpu);
+		if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
+			r = kvm_guest_time_update(vcpu);
 			if (unlikely(r))
 				goto out;
 		}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6022da1490e4..0b89d008db65 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -36,7 +36,7 @@
 #define KVM_REQ_PENDING_TIMER      5
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
-#define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_CLOCK_UPDATE       8
 #define KVM_REQ_KICK               9
 #define KVM_REQ_DEACTIVATE_FPU    10
 #define KVM_REQ_EVENT             11
-- 
cgit v1.2.3


From d7a79b6c80fdbe4366484805ee07a4735fc427d8 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 14 Oct 2010 13:59:04 +0200
Subject: KVM: Fix signature of kvm_iommu_map_pages stub

Breaks otherwise if CONFIG_IOMMU_API is not set.

KVM-Stable-Tag.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0b89d008db65..866ed3084363 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -483,8 +483,7 @@ int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_IOMMU_API */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
-				      gfn_t base_gfn,
-				      unsigned long npages)
+				      struct kvm_memory_slot *slot)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From d582963a027fd63f8dfc97a0bf3654d4380e34ce Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 24 Oct 2010 18:16:57 +0200
Subject: i2c: Simplify i2c_parent_is_i2c_adapter

Only i2c devices can have their type set to i2c_adapter_type, so
testing the bus type is redundant.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Michael Lawnick <ml.lawnick@gmx.de>
---
 include/linux/i2c.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4bae0b72ed3c..9391a491501a 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -387,7 +387,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 static inline int i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
 {
 	return adapter->dev.parent != NULL
-		&& adapter->dev.parent->bus == &i2c_bus_type
 		&& adapter->dev.parent->type == &i2c_adapter_type;
 }
 
-- 
cgit v1.2.3


From 97cc4d49cfcda1c2dad89c00b62a25b628ce2115 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 24 Oct 2010 18:16:57 +0200
Subject: i2c: Let i2c_parent_is_i2c_adapter return the parent adapter

This makes the calling site's code clearer IMHO.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Michael Lawnick <ml.lawnick@gmx.de>
---
 drivers/i2c/i2c-core.c | 30 ++++++++++++++++++------------
 drivers/i2c/i2c-dev.c  | 13 ++++++-------
 include/linux/i2c.h    | 11 ++++++++---
 3 files changed, 32 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index bea4c5021d26..40e563de0508 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -425,14 +425,14 @@ static int __i2c_check_addr_busy(struct device *dev, void *addrp)
 /* walk up mux tree */
 static int i2c_check_mux_parents(struct i2c_adapter *adapter, int addr)
 {
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
 	int result;
 
 	result = device_for_each_child(&adapter->dev, &addr,
 					__i2c_check_addr_busy);
 
-	if (!result && i2c_parent_is_i2c_adapter(adapter))
-		result = i2c_check_mux_parents(
-				    to_i2c_adapter(adapter->dev.parent), addr);
+	if (!result && parent)
+		result = i2c_check_mux_parents(parent, addr);
 
 	return result;
 }
@@ -453,11 +453,11 @@ static int i2c_check_mux_children(struct device *dev, void *addrp)
 
 static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
 {
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
 	int result = 0;
 
-	if (i2c_parent_is_i2c_adapter(adapter))
-		result = i2c_check_mux_parents(
-				    to_i2c_adapter(adapter->dev.parent), addr);
+	if (parent)
+		result = i2c_check_mux_parents(parent, addr);
 
 	if (!result)
 		result = device_for_each_child(&adapter->dev, &addr,
@@ -472,8 +472,10 @@ static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
  */
 void i2c_lock_adapter(struct i2c_adapter *adapter)
 {
-	if (i2c_parent_is_i2c_adapter(adapter))
-		i2c_lock_adapter(to_i2c_adapter(adapter->dev.parent));
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+	if (parent)
+		i2c_lock_adapter(parent);
 	else
 		rt_mutex_lock(&adapter->bus_lock);
 }
@@ -485,8 +487,10 @@ EXPORT_SYMBOL_GPL(i2c_lock_adapter);
  */
 static int i2c_trylock_adapter(struct i2c_adapter *adapter)
 {
-	if (i2c_parent_is_i2c_adapter(adapter))
-		return i2c_trylock_adapter(to_i2c_adapter(adapter->dev.parent));
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+	if (parent)
+		return i2c_trylock_adapter(parent);
 	else
 		return rt_mutex_trylock(&adapter->bus_lock);
 }
@@ -497,8 +501,10 @@ static int i2c_trylock_adapter(struct i2c_adapter *adapter)
  */
 void i2c_unlock_adapter(struct i2c_adapter *adapter)
 {
-	if (i2c_parent_is_i2c_adapter(adapter))
-		i2c_unlock_adapter(to_i2c_adapter(adapter->dev.parent));
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+
+	if (parent)
+		i2c_unlock_adapter(parent);
 	else
 		rt_mutex_unlock(&adapter->bus_lock);
 }
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 5f3a52d517c3..cec0f3ba97f8 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -192,13 +192,12 @@ static int i2cdev_check(struct device *dev, void *addrp)
 /* walk up mux tree */
 static int i2cdev_check_mux_parents(struct i2c_adapter *adapter, int addr)
 {
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
 	int result;
 
 	result = device_for_each_child(&adapter->dev, &addr, i2cdev_check);
-
-	if (!result && i2c_parent_is_i2c_adapter(adapter))
-		result = i2cdev_check_mux_parents(
-				    to_i2c_adapter(adapter->dev.parent), addr);
+	if (!result && parent)
+		result = i2cdev_check_mux_parents(parent, addr);
 
 	return result;
 }
@@ -222,11 +221,11 @@ static int i2cdev_check_mux_children(struct device *dev, void *addrp)
    driver bound to it, as NOT busy. */
 static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr)
 {
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
 	int result = 0;
 
-	if (i2c_parent_is_i2c_adapter(adapter))
-		result = i2cdev_check_mux_parents(
-				    to_i2c_adapter(adapter->dev.parent), addr);
+	if (parent)
+		result = i2cdev_check_mux_parents(parent, addr);
 
 	if (!result)
 		result = device_for_each_child(&adapter->dev, &addr,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 9391a491501a..1f66fa06a97c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -384,10 +384,15 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 	dev_set_drvdata(&dev->dev, data);
 }
 
-static inline int i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
+static inline struct i2c_adapter *
+i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
 {
-	return adapter->dev.parent != NULL
-		&& adapter->dev.parent->type == &i2c_adapter_type;
+	struct device *parent = adapter->dev.parent;
+
+	if (parent != NULL && parent->type == &i2c_adapter_type)
+		return to_i2c_adapter(parent);
+	else
+		return NULL;
 }
 
 /* Adapter locking functions, exported for shared pin cases */
-- 
cgit v1.2.3


From f253b86b4ad1b3220544e75880510fd455ebd23f Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Sun, 24 Oct 2010 22:06:02 +0200
Subject: Revert "block: fix accounting bug on cross partition merges"

This reverts commit 7681bfeeccff5efa9eb29bf09249a3c400b15327.

Conflicts:

	include/linux/genhd.h

It has numerous issues with the cleanup path and non-elevator
devices. Revert it for now so we can come up with a clean
version without rushing things.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c         | 24 ++++++++----------------
 block/blk-merge.c        |  2 +-
 block/blk.h              |  4 ++++
 block/genhd.c            | 14 --------------
 fs/partitions/check.c    | 12 ------------
 include/linux/blkdev.h   |  1 -
 include/linux/elevator.h |  2 --
 include/linux/genhd.h    |  1 -
 8 files changed, 13 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 51efd835d4cf..f8548876d7ea 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,15 +64,13 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 
-	if (!new_io) {
-		part = rq->part;
+	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
-	} else {
-		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+	else {
 		part_round_stats(cpu, part);
 		part_inc_in_flight(part, rw);
-		rq->part = part;
 	}
 
 	part_stat_unlock();
@@ -130,7 +128,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->ref_count = 1;
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
-	rq->part = NULL;
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -805,16 +802,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	rl->starved[is_sync] = 0;
 
 	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
-	if (priv) {
+	if (priv)
 		rl->elvpriv++;
 
-		/*
-		 * Don't do stats for non-priv requests
-		 */
-		if (blk_queue_io_stat(q))
-			rw_flags |= REQ_IO_STAT;
-	}
-
+	if (blk_queue_io_stat(q))
+		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
 	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
@@ -1791,7 +1783,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = req->part;
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1811,7 +1803,7 @@ static void blk_account_io_done(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = req->part;
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 0a2fd8a48a38..77b7c26df6b5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = req->part;
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part, rq_data_dir(req));
diff --git a/block/blk.h b/block/blk.h
index 1e675e5ade02..2db8f32838e7 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -116,6 +116,10 @@ void blk_queue_congestion_threshold(struct request_queue *q);
 
 int blk_dev_init(void);
 
+void elv_quiesce_start(struct request_queue *q);
+void elv_quiesce_end(struct request_queue *q);
+
+
 /*
  * Return the threshold (number of used requests) at which the queue is
  * considered to be congested.  It include a little hysteresis to keep the
diff --git a/block/genhd.c b/block/genhd.c
index a8adf96a4b41..5fa2b44a72ff 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -929,15 +929,8 @@ static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 {
 	struct disk_part_tbl *ptbl =
 		container_of(head, struct disk_part_tbl, rcu_head);
-	struct gendisk *disk = ptbl->disk;
-	struct request_queue *q = disk->queue;
-	unsigned long flags;
 
 	kfree(ptbl);
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	elv_quiesce_end(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 /**
@@ -955,17 +948,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
 				  struct disk_part_tbl *new_ptbl)
 {
 	struct disk_part_tbl *old_ptbl = disk->part_tbl;
-	struct request_queue *q = disk->queue;
 
 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
 
 	if (old_ptbl) {
 		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
-
-		spin_lock_irq(q->queue_lock);
-		elv_quiesce_start(q);
-		spin_unlock_irq(q->queue_lock);
-
 		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 	}
 }
@@ -1006,7 +993,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
 		return -ENOMEM;
 
 	new_ptbl->len = target;
-	new_ptbl->disk = disk;
 
 	for (i = 0; i < len; i++)
 		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index b81bfc016a05..0a8b0ad0c7e2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -365,25 +365,17 @@ struct device_type part_type = {
 static void delete_partition_rcu_cb(struct rcu_head *head)
 {
 	struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
-	struct gendisk *disk = part_to_disk(part);
-	struct request_queue *q = disk->queue;
-	unsigned long flags;
 
 	part->start_sect = 0;
 	part->nr_sects = 0;
 	part_stat_set_all(part, 0);
 	put_device(part_to_dev(part));
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	elv_quiesce_end(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 void delete_partition(struct gendisk *disk, int partno)
 {
 	struct disk_part_tbl *ptbl = disk->part_tbl;
 	struct hd_struct *part;
-	struct request_queue *q = disk->queue;
 
 	if (partno >= ptbl->len)
 		return;
@@ -398,10 +390,6 @@ void delete_partition(struct gendisk *disk, int partno)
 	kobject_put(part->holder_dir);
 	device_del(part_to_dev(part));
 
-	spin_lock_irq(q->queue_lock);
-	elv_quiesce_start(q);
-	spin_unlock_irq(q->queue_lock);
-
 	call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 009b80e49f53..646b462d04df 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -115,7 +115,6 @@ struct request {
 	void *elevator_private3;
 
 	struct gendisk *rq_disk;
-	struct hd_struct *part;
 	unsigned long start_time;
 #ifdef CONFIG_BLK_CGROUP
 	unsigned long long start_time_ns;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 80a0ece8f7e4..4fd978e7eb83 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -122,8 +122,6 @@ extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
 extern void elv_put_request(struct request_queue *, struct request *);
 extern void elv_drain_elevator(struct request_queue *);
-extern void elv_quiesce_start(struct request_queue *);
-extern void elv_quiesce_end(struct request_queue *);
 
 /*
  * io scheduler registration
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 557c3927e70f..7a7b9c1644e4 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -140,7 +140,6 @@ struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
 	struct hd_struct __rcu *last_lookup;
-	struct gendisk *disk;
 	struct hd_struct __rcu *part[];
 };
 
-- 
cgit v1.2.3


From 6cebb17beece746de86793cd549e84740896cf4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 14 Oct 2010 23:55:22 +0000
Subject: connector: remove lazy workqueue creation

Commit 1a5645bc (connector: create connector workqueue only while
needed once) implements lazy workqueue creation for connector
workqueue.  With cmwq now in place, lazy workqueue creation doesn't
make much sense while adding a lot of complexity.  Remove it and
allocate an ordered workqueue during initialization.

This also removes a call to flush_scheduled_work() which is deprecated
and scheduled to be removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  | 75 +++++--------------------------------------
 drivers/connector/connector.c |  9 +++---
 include/linux/connector.h     |  8 -----
 3 files changed, 12 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 210338ea222f..81270d221e5a 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -31,48 +31,6 @@
 #include <linux/connector.h>
 #include <linux/delay.h>
 
-
-/*
- * This job is sent to the kevent workqueue.
- * While no event is once sent to any callback, the connector workqueue
- * is not created to avoid a useless waiting kernel task.
- * Once the first event is received, we create this dedicated workqueue which
- * is necessary because the flow of data can be high and we don't want
- * to encumber keventd with that.
- */
-static void cn_queue_create(struct work_struct *work)
-{
-	struct cn_queue_dev *dev;
-
-	dev = container_of(work, struct cn_queue_dev, wq_creation);
-
-	dev->cn_queue = create_singlethread_workqueue(dev->name);
-	/* If we fail, we will use keventd for all following connector jobs */
-	WARN_ON(!dev->cn_queue);
-}
-
-/*
- * Queue a data sent to a callback.
- * If the connector workqueue is already created, we queue the job on it.
- * Otherwise, we queue the job to kevent and queue the connector workqueue
- * creation too.
- */
-int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work)
-{
-	struct cn_queue_dev *pdev = cbq->pdev;
-
-	if (likely(pdev->cn_queue))
-		return queue_work(pdev->cn_queue, work);
-
-	/* Don't create the connector workqueue twice */
-	if (atomic_inc_return(&pdev->wq_requested) == 1)
-		schedule_work(&pdev->wq_creation);
-	else
-		atomic_dec(&pdev->wq_requested);
-
-	return schedule_work(work);
-}
-
 void cn_queue_wrapper(struct work_struct *work)
 {
 	struct cn_callback_entry *cbq =
@@ -111,11 +69,7 @@ cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
 
 static void cn_queue_free_callback(struct cn_callback_entry *cbq)
 {
-	/* The first jobs have been sent to kevent, flush them too */
-	flush_scheduled_work();
-	if (cbq->pdev->cn_queue)
-		flush_workqueue(cbq->pdev->cn_queue);
-
+	flush_workqueue(cbq->pdev->cn_queue);
 	kfree(cbq);
 }
 
@@ -193,11 +147,14 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls)
 	atomic_set(&dev->refcnt, 0);
 	INIT_LIST_HEAD(&dev->queue_list);
 	spin_lock_init(&dev->queue_lock);
-	init_waitqueue_head(&dev->wq_created);
 
 	dev->nls = nls;
 
-	INIT_WORK(&dev->wq_creation, cn_queue_create);
+	dev->cn_queue = alloc_ordered_workqueue(dev->name, 0);
+	if (!dev->cn_queue) {
+		kfree(dev);
+		return NULL;
+	}
 
 	return dev;
 }
@@ -205,25 +162,9 @@ struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls)
 void cn_queue_free_dev(struct cn_queue_dev *dev)
 {
 	struct cn_callback_entry *cbq, *n;
-	long timeout;
-	DEFINE_WAIT(wait);
-
-	/* Flush the first pending jobs queued on kevent */
-	flush_scheduled_work();
-
-	/* If the connector workqueue creation is still pending, wait for it */
-	prepare_to_wait(&dev->wq_created, &wait, TASK_UNINTERRUPTIBLE);
-	if (atomic_read(&dev->wq_requested) && !dev->cn_queue) {
-		timeout = schedule_timeout(HZ * 2);
-		if (!timeout && !dev->cn_queue)
-			WARN_ON(1);
-	}
-	finish_wait(&dev->wq_created, &wait);
 
-	if (dev->cn_queue) {
-		flush_workqueue(dev->cn_queue);
-		destroy_workqueue(dev->cn_queue);
-	}
+	flush_workqueue(dev->cn_queue);
+	destroy_workqueue(dev->cn_queue);
 
 	spin_lock_bh(&dev->queue_lock);
 	list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry)
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 1d48f40342cb..e16c3fa8d2e3 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -133,7 +133,8 @@ static int cn_call_callback(struct sk_buff *skb)
 					__cbq->data.skb == NULL)) {
 				__cbq->data.skb = skb;
 
-				if (queue_cn_work(__cbq, &__cbq->work))
+				if (queue_work(dev->cbdev->cn_queue,
+					       &__cbq->work))
 					err = 0;
 				else
 					err = -EINVAL;
@@ -148,13 +149,11 @@ static int cn_call_callback(struct sk_buff *skb)
 					d->callback = __cbq->data.callback;
 					d->free = __new_cbq;
 
-					__new_cbq->pdev = __cbq->pdev;
-
 					INIT_WORK(&__new_cbq->work,
 							&cn_queue_wrapper);
 
-					if (queue_cn_work(__new_cbq,
-						    &__new_cbq->work))
+					if (queue_work(dev->cbdev->cn_queue,
+						       &__new_cbq->work))
 						err = 0;
 					else {
 						kfree(__new_cbq);
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 3a779ffba60b..7e8ca75d2dad 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -88,12 +88,6 @@ struct cn_queue_dev {
 	unsigned char name[CN_CBQ_NAMELEN];
 
 	struct workqueue_struct *cn_queue;
-	/* Sent to kevent to create cn_queue only when needed */
-	struct work_struct wq_creation;
-	/* Tell if the wq_creation job is pending/completed */
-	atomic_t wq_requested;
-	/* Wait for cn_queue to be created */
-	wait_queue_head_t wq_created;
 
 	struct list_head queue_list;
 	spinlock_t queue_lock;
@@ -141,8 +135,6 @@ int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
-int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
-
 struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *);
 void cn_queue_free_dev(struct cn_queue_dev *dev);
 
-- 
cgit v1.2.3


From 6b96724e507fecc3e6440e86426fe4f44359ed66 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Tue, 12 Oct 2010 16:30:05 -0700
Subject: Revalidate caches on lock

Instead of blindly zapping the caches, attempt to revalidate them if
the server has indicated that it uses high resolution timestamps.

NFSv4 should be able to always revalidate the cache since the
protocol requires the update of the change attribute on modification of
the data.  In reality, there are servers (the Linux NFS server
for example) that do not obey this requirement and use ctime as the
basis for change attribute.  Long term, the server needs to be fixed.
At this time, and to be on the safe side, continue zapping caches if
the server indicates that it does not have a high resolution timestamp.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  2 ++
 fs/nfs/file.c             | 19 ++++++++++++++++---
 fs/nfs/nfs3xdr.c          |  3 ++-
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 5 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index da2f2f024a4d..a63bce8d0596 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -915,6 +915,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 
 	server->maxfilesize = fsinfo->maxfilesize;
 
+	server->time_delta = fsinfo->time_delta;
+
 	/* We're airborne Set socket buffersize */
 	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 39672b731736..c3f2477c16c1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -757,6 +757,11 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 	return status;
 }
 
+static int
+is_time_granular(struct timespec *ts) {
+	return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
+}
+
 static int
 do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
@@ -781,13 +786,21 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 		status = do_vfs_lock(filp, fl);
 	if (status < 0)
 		goto out;
+
 	/*
-	 * Make sure we clear the cache whenever we try to get the lock.
+	 * Revalidate the cache if the server has time stamps granular
+	 * enough to detect subsecond changes.  Otherwise, clear the
+	 * cache to prevent missing any changes.
+	 *
 	 * This makes locking act as a cache coherency point.
 	 */
 	nfs_sync_mapping(filp->f_mapping);
-	if (!nfs_have_delegation(inode, FMODE_READ))
-		nfs_zap_caches(inode);
+	if (!nfs_have_delegation(inode, FMODE_READ)) {
+		if (is_time_granular(&NFS_SERVER(inode)->time_delta))
+			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+		else
+			nfs_zap_caches(inode);
+	}
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 31a44df40aea..d9a5e832c257 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1044,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
 	res->wtmult = ntohl(*p++);
 	res->dtpref = ntohl(*p++);
 	p = xdr_decode_hyper(p, &res->maxfilesize);
+	p = xdr_decode_time3(p, &res->time_delta);
 
-	/* ignore time_delta and properties */
+	/* ignore properties */
 	res->lease_time = 0;
 	return 0;
 }
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c82ee7cd6288..5eef862ec187 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -124,6 +124,7 @@ struct nfs_server {
 
 	struct nfs_fsid		fsid;
 	__u64			maxfilesize;	/* maximum file size */
+	struct timespec		time_delta;	/* smallest time granularity */
 	unsigned long		mount_time;	/* when this fs was mounted */
 	dev_t			s_dev;		/* superblock dev numbers */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index efe2eab8ac94..da7a1300dc60 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -112,6 +112,7 @@ struct nfs_fsinfo {
 	__u32			wtmult;	/* writes should be multiple of this */
 	__u32			dtpref;	/* pref. readdir transfer size */
 	__u64			maxfilesize;
+	struct timespec		time_delta; /* server time granularity */
 	__u32			lease_time; /* in seconds */
 };
 
-- 
cgit v1.2.3


From 35b61e63323ccf5fdcdd74b11751b58392c9cce1 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 20 Oct 2010 00:17:54 -0400
Subject: SUNRPC: define xdr_decode_opaque_fixed

A helper for decoding a fixed length opaque value.
Returns a pointer to the next item in the xdr stream.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index ab91d86565fd..498ab93a81e4 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -132,6 +132,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
 	return p + 2;
 }
 
+static inline __be32 *
+xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
+{
+	memcpy(ptr, p, len);
+	return p + XDR_QUADLEN(len);
+}
+
 /*
  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
  */
-- 
cgit v1.2.3


From c772567d97fa0fca454eea68aeae915ca1bc732b Mon Sep 17 00:00:00 2001
From: Dean Hildebrand <dhildebz@umich.edu>
Date: Wed, 20 Oct 2010 00:17:55 -0400
Subject: NFSv4.1: pnfsd, pnfs: protocol level pnfs constants

Use only layoutreturn constant for both returns and recalls.
(return_* works better for recall_type rather the other way around)

Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 07e40c625972..6c0406e87d5c 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -471,6 +471,8 @@ enum lock_type4 {
 #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
 #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
+#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
+#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
@@ -550,6 +552,49 @@ enum state_protect_how4 {
 	SP4_SSV		= 2
 };
 
+enum pnfs_layouttype {
+	LAYOUT_NFSV4_1_FILES  = 1,
+	LAYOUT_OSD2_OBJECTS = 2,
+	LAYOUT_BLOCK_VOLUME = 3,
+};
+
+/* used for both layout return and recall */
+enum pnfs_layoutreturn_type {
+	RETURN_FILE = 1,
+	RETURN_FSID = 2,
+	RETURN_ALL  = 3
+};
+
+enum pnfs_iomode {
+	IOMODE_READ = 1,
+	IOMODE_RW = 2,
+	IOMODE_ANY = 3,
+};
+
+enum pnfs_notify_deviceid_type4 {
+	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
+	NOTIFY_DEVICEID4_DELETE = 1 << 2,
+};
+
+#define NFL4_UFLG_MASK			0x0000003F
+#define NFL4_UFLG_DENSE			0x00000001
+#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
+#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
+
+/* Encoded in the loh_body field of type layouthint4 */
+enum filelayout_hint_care4 {
+	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
+	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
+	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
+	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
+};
+
+#define NFS4_DEVICEID4_SIZE 16
+
+struct nfs4_deviceid {
+	char data[NFS4_DEVICEID4_SIZE];
+};
+
 #endif
 #endif
 
-- 
cgit v1.2.3


From 9449925273933d19235d7d36c1fd970841d055de Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Wed, 20 Oct 2010 00:17:56 -0400
Subject: NFS: change stateid to be a union

In NFSv4.1 the stateid consists of the other and seqid fields. For layout
processing we need to numerically compare the seqid value of layout stateids.
To do so, introduce a union to nfs4_stateid to switch between opaque(16 bytes)
and opaque(12 bytes) / __be32

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_proc.c |  8 ++++----
 include/linux/nfs4.h   | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
 	if (delegation == NULL)
 		return 0;
 
-	/* seqid is 4-bytes long */
-	if (((u32 *) &stateid->data)[0] != 0)
+	if (stateid->stateid.seqid != 0)
 		return 0;
-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
-		   sizeof(stateid->data)-4))
+	if (memcmp(&delegation->stateid.stateid.other,
+		   &stateid->stateid.other,
+		   NFS4_STATEID_OTHER_SIZE))
 		return 0;
 
 	return 1;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 6c0406e87d5c..34da32436ac0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -17,7 +17,9 @@
 
 #define NFS4_BITMAP_SIZE	2
 #define NFS4_VERIFIER_SIZE	8
-#define NFS4_STATEID_SIZE	16
+#define NFS4_STATEID_SEQID_SIZE 4
+#define NFS4_STATEID_OTHER_SIZE 12
+#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
 #define NFS4_FHSIZE		128
 #define NFS4_MAXPATHLEN		PATH_MAX
 #define NFS4_MAXNAMLEN		NAME_MAX
@@ -167,7 +169,16 @@ struct nfs4_acl {
 };
 
 typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
+
+struct nfs41_stateid {
+	__be32 seqid;
+	char other[NFS4_STATEID_OTHER_SIZE];
+} __attribute__ ((packed));
+
+typedef union {
+	char data[NFS4_STATEID_SIZE];
+	struct nfs41_stateid stateid;
+} nfs4_stateid;
 
 enum nfs_opnum4 {
 	OP_ACCESS = 3,
-- 
cgit v1.2.3


From 504913fbc84c00bba7224d73e4aab525c1731f7d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 20 Oct 2010 00:17:57 -0400
Subject: NFS: ask for layouttypes during v4 fsinfo call

This information will be used to determine which layout driver,
if any, to use for subsequent IO on this filesystem.  Each driver
is assigned an integer id, with 0 reserved to indicate no driver.

The server can in theory return multiple ids.  However, our current
client implementation only notes the first entry and ignores the
rest.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  1 +
 fs/nfs/nfs4xdr.c        | 58 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 60 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e87fe612ca18..a5f1edb45b47 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -130,6 +130,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
 			| FATTR4_WORD0_MAXWRITE
 			| FATTR4_WORD0_LEASE_TIME,
 			FATTR4_WORD1_TIME_DELTA
+			| FATTR4_WORD1_FS_LAYOUT_TYPES
 };
 
 const u32 nfs4_fs_locations_bitmap[2] = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index bd2101d918c8..8b4dfa393f0f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3978,6 +3978,61 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
 	return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
 }
 
+/*
+ * Decode potentially multiple layout types. Currently we only support
+ * one layout driver per file system.
+ */
+static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
+					 uint32_t *layouttype)
+{
+	uint32_t *p;
+	int num;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	num = be32_to_cpup(p);
+
+	/* pNFS is not supported by the underlying file system */
+	if (num == 0) {
+		*layouttype = 0;
+		return 0;
+	}
+	if (num > 1)
+		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
+			"per filesystem not supported\n", __func__);
+
+	/* Decode and set first layout type, move xdr->p past unused types */
+	p = xdr_inline_decode(xdr, num * 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	*layouttype = be32_to_cpup(p);
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * The type of file system exported.
+ * Note we must ensure that layouttype is set in any non-error case.
+ */
+static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
+				uint32_t *layouttype)
+{
+	int status = 0;
+
+	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
+	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
+		return -EIO;
+	if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
+		status = decode_first_pnfs_layout_type(xdr, layouttype);
+		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
+	} else
+		*layouttype = 0;
+	return status;
+}
+
 static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 {
 	__be32 *savep;
@@ -4004,6 +4059,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 		goto xdr_error;
 	fsinfo->wtpref = fsinfo->wtmax;
 	status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
+	if (status != 0)
+		goto xdr_error;
+	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
 	if (status != 0)
 		goto xdr_error;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index da7a1300dc60..065f9d105d05 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -114,6 +114,7 @@ struct nfs_fsinfo {
 	__u64			maxfilesize;
 	struct timespec		time_delta; /* server time granularity */
 	__u32			lease_time; /* in seconds */
+	__u32			layouttype; /* supported pnfs layout driver */
 };
 
 struct nfs_fsstat {
-- 
cgit v1.2.3


From 85e174ba6b786ad336eb2df105b4f66d0932e70a Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 20 Oct 2010 00:17:58 -0400
Subject: NFS: set layout driver

Put in the infrastructure that uses information returned from the
server at mount to select a layout driver module.

In this patch, a stub is used that always returns "no driver found".

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile           |  1 +
 fs/nfs/client.c           |  5 +++
 fs/nfs/pnfs.c             | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.h             | 56 +++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h    |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 6 files changed, 148 insertions(+)
 create mode 100644 fs/nfs/pnfs.c
 create mode 100644 fs/nfs/pnfs.h

(limited to 'include')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..bb9e773d4312 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,6 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o \
 			   nfs4namespace.o
+nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a63bce8d0596..eba0bcc1bab0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
 #include "iostat.h"
 #include "internal.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_CLIENT
 
@@ -900,6 +901,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
 		server->wsize = NFS_MAX_FILE_IO_SIZE;
 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	set_pnfs_layoutdriver(server, fsinfo->layouttype);
+
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
@@ -939,6 +942,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
 	}
 
 	fsinfo.fattr = fattr;
+	fsinfo.layouttype = 0;
 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
 	if (error < 0)
 		goto out_error;
@@ -1021,6 +1025,7 @@ void nfs_free_server(struct nfs_server *server)
 {
 	dprintk("--> nfs_free_server()\n");
 
+	unset_pnfs_layoutdriver(server);
 	spin_lock(&nfs_client_lock);
 	list_del(&server->client_link);
 	list_del(&server->master_link);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..b483026e82aa
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,84 @@
+/*
+ *  pNFS functions to call and manage layout drivers.
+ *
+ *  Copyright (c) 2002 [year of first publication]
+ *  The Regents of the University of Michigan
+ *  All Rights Reserved
+ *
+ *  Dean Hildebrand <dhildebz@umich.edu>
+ *
+ *  Permission is granted to use, copy, create derivative works, and
+ *  redistribute this software and such derivative works for any purpose,
+ *  so long as the name of the University of Michigan is not used in
+ *  any advertising or publicity pertaining to the use or distribution
+ *  of this software without specific, written prior authorization. If
+ *  the above copyright notice or any other identification of the
+ *  University of Michigan is included in any copy of any portion of
+ *  this software, then the disclaimer below must also be included.
+ *
+ *  This software is provided as is, without representation or warranty
+ *  of any kind either express or implied, including without limitation
+ *  the implied warranties of merchantability, fitness for a particular
+ *  purpose, or noninfringement.  The Regents of the University of
+ *  Michigan shall not be liable for any damages, including special,
+ *  indirect, incidental, or consequential damages, with respect to any
+ *  claim arising out of or in connection with the use of the software,
+ *  even if it has been or is hereafter advised of the possibility of
+ *  such damages.
+ */
+
+#include <linux/nfs_fs.h>
+#include "pnfs.h"
+
+#define NFSDBG_FACILITY		NFSDBG_PNFS
+
+/* STUB that returns the equivalent of "no module found" */
+static struct pnfs_layoutdriver_type *
+find_pnfs_driver(u32 id)
+{
+	return NULL;
+}
+
+void
+unset_pnfs_layoutdriver(struct nfs_server *nfss)
+{
+	nfss->pnfs_curr_ld = NULL;
+}
+
+/*
+ * Try to set the server's pnfs module to the pnfs layout type specified by id.
+ * Currently only one pNFS layout driver per filesystem is supported.
+ *
+ * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ */
+void
+set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
+{
+	struct pnfs_layoutdriver_type *ld_type = NULL;
+
+	if (id == 0)
+		goto out_no_driver;
+	if (!(server->nfs_client->cl_exchange_flags &
+		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
+		printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
+		       id, server->nfs_client->cl_exchange_flags);
+		goto out_no_driver;
+	}
+	ld_type = find_pnfs_driver(id);
+	if (!ld_type) {
+		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+		ld_type = find_pnfs_driver(id);
+		if (!ld_type) {
+			dprintk("%s: No pNFS module found for %u.\n",
+				__func__, id);
+			goto out_no_driver;
+		}
+	}
+	server->pnfs_curr_ld = ld_type;
+	dprintk("%s: pNFS module for %u set\n", __func__, id);
+	return;
+
+out_no_driver:
+	dprintk("%s: Using NFSv4 I/O\n", __func__);
+	server->pnfs_curr_ld = NULL;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..c628ef131d83
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,56 @@
+/*
+ *  pNFS client data structures.
+ *
+ *  Copyright (c) 2002
+ *  The Regents of the University of Michigan
+ *  All Rights Reserved
+ *
+ *  Dean Hildebrand <dhildebz@umich.edu>
+ *
+ *  Permission is granted to use, copy, create derivative works, and
+ *  redistribute this software and such derivative works for any purpose,
+ *  so long as the name of the University of Michigan is not used in
+ *  any advertising or publicity pertaining to the use or distribution
+ *  of this software without specific, written prior authorization. If
+ *  the above copyright notice or any other identification of the
+ *  University of Michigan is included in any copy of any portion of
+ *  this software, then the disclaimer below must also be included.
+ *
+ *  This software is provided as is, without representation or warranty
+ *  of any kind either express or implied, including without limitation
+ *  the implied warranties of merchantability, fitness for a particular
+ *  purpose, or noninfringement.  The Regents of the University of
+ *  Michigan shall not be liable for any damages, including special,
+ *  indirect, incidental, or consequential damages, with respect to any
+ *  claim arising out of or in connection with the use of the software,
+ *  even if it has been or is hereafter advised of the possibility of
+ *  such damages.
+ */
+
+#ifndef FS_NFS_PNFS_H
+#define FS_NFS_PNFS_H
+
+#ifdef CONFIG_NFS_V4_1
+
+#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
+
+/* Per-layout driver specific registration structure */
+struct pnfs_layoutdriver_type {
+};
+
+void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
+void unset_pnfs_layoutdriver(struct nfs_server *);
+
+#else  /* CONFIG_NFS_V4_1 */
+
+static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
+{
+}
+
+static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
+{
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+#endif /* FS_NFS_PNFS_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d929b1883644..aba3da2a6227 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -615,6 +615,7 @@ nfs_fileid_to_ino_t(u64 fileid)
 #define NFSDBG_CLIENT		0x0200
 #define NFSDBG_MOUNT		0x0400
 #define NFSDBG_FSCACHE		0x0800
+#define NFSDBG_PNFS		0x1000
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 5eef862ec187..c38619d95a57 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -145,6 +145,7 @@ struct nfs_server {
 	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
 						   that are supported on this
 						   filesystem */
+	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 #endif
 	void (*destroy)(struct nfs_server *);
 
-- 
cgit v1.2.3


From 7ab672ce312133ee4a5d85b71447b2b334403681 Mon Sep 17 00:00:00 2001
From: Dean Hildebrand <dhildebz@umich.edu>
Date: Wed, 20 Oct 2010 00:18:00 -0400
Subject: NFSv4.1: pnfs: filelayout: introduce minimal file layout driver

This driver just registers itself and supplies trivial mount/umount functions.

Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile         |  3 ++
 fs/nfs/nfs4filelayout.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h  |  1 +
 3 files changed, 82 insertions(+)
 create mode 100644 fs/nfs/nfs4filelayout.c

(limited to 'include')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index bb9e773d4312..08a8889c5149 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -18,3 +18,6 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := nfs4filelayout.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..696e283c2632
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,78 @@
+/*
+ *  Module for the pnfs nfs4 file layout driver.
+ *  Defines all I/O and Policy interface operations, plus code
+ *  to register itself with the pNFS client.
+ *
+ *  Copyright (c) 2002
+ *  The Regents of the University of Michigan
+ *  All Rights Reserved
+ *
+ *  Dean Hildebrand <dhildebz@umich.edu>
+ *
+ *  Permission is granted to use, copy, create derivative works, and
+ *  redistribute this software and such derivative works for any purpose,
+ *  so long as the name of the University of Michigan is not used in
+ *  any advertising or publicity pertaining to the use or distribution
+ *  of this software without specific, written prior authorization. If
+ *  the above copyright notice or any other identification of the
+ *  University of Michigan is included in any copy of any portion of
+ *  this software, then the disclaimer below must also be included.
+ *
+ *  This software is provided as is, without representation or warranty
+ *  of any kind either express or implied, including without limitation
+ *  the implied warranties of merchantability, fitness for a particular
+ *  purpose, or noninfringement.  The Regents of the University of
+ *  Michigan shall not be liable for any damages, including special,
+ *  indirect, incidental, or consequential damages, with respect to any
+ *  claim arising out of or in connection with the use of the software,
+ *  even if it has been or is hereafter advised of the possibility of
+ *  such damages.
+ */
+
+#include <linux/nfs_fs.h>
+#include "pnfs.h"
+
+#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
+MODULE_DESCRIPTION("The NFSv4 file layout driver");
+
+int
+filelayout_initialize_mountpoint(struct nfs_server *nfss)
+{
+	return 0;
+}
+
+int
+filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
+{
+	dprintk("--> %s\n", __func__);
+
+	return 0;
+}
+
+static struct pnfs_layoutdriver_type filelayout_type = {
+	.id = LAYOUT_NFSV4_1_FILES,
+	.name = "LAYOUT_NFSV4_1_FILES",
+	.owner = THIS_MODULE,
+	.initialize_mountpoint   = filelayout_initialize_mountpoint,
+	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
+};
+
+static int __init nfs4filelayout_init(void)
+{
+	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
+	       __func__);
+	return pnfs_register_layoutdriver(&filelayout_type);
+}
+
+static void __exit nfs4filelayout_exit(void)
+{
+	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
+	       __func__);
+	pnfs_unregister_layoutdriver(&filelayout_type);
+}
+
+module_init(nfs4filelayout_init);
+module_exit(nfs4filelayout_exit);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index aba3da2a6227..499872fa895c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -616,6 +616,7 @@ nfs_fileid_to_ino_t(u64 fileid)
 #define NFSDBG_MOUNT		0x0400
 #define NFSDBG_FSCACHE		0x0800
 #define NFSDBG_PNFS		0x1000
+#define NFSDBG_PNFS_LD		0x2000
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From e5e940170b2136ad4d5483ef293ae284b9cc8d53 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 20 Oct 2010 00:18:01 -0400
Subject: NFS: create and destroy inode's layout cache

At the start of the io paths, try to grab the relevant layout
information.  This will initiate the inode's layout cache, but
stubs ensure the cache stays empty.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn>
Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c          |   5 ++
 fs/nfs/inode.c         |   3 ++
 fs/nfs/pnfs.c          | 140 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.h          |  39 ++++++++++++++
 fs/nfs/read.c          |   3 ++
 include/linux/nfs_fs.h |   3 ++
 6 files changed, 193 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c3f2477c16c1..91d019d39122 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 		file->f_path.dentry->d_name.name,
 		mapping->host->i_ino, len, (long long) pos);
 
+	pnfs_update_layout(mapping->host,
+			   nfs_file_open_context(file),
+			   IOMODE_RW);
+
 start:
 	/*
 	 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6eec28656415..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
 #include "internal.h"
 #include "fscache.h"
 #include "dns_resolve.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -1410,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
+	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
@@ -1447,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 	nfsi->delegation = NULL;
 	nfsi->delegation_state = 0;
 	init_rwsem(&nfsi->rwsem);
+	nfsi->layout = NULL;
 #endif
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cf795625610e..c0cd954855b9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -166,3 +166,143 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 	spin_unlock(&pnfs_spinlock);
 }
 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
+
+static void
+get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+{
+	assert_spin_locked(&lo->inode->i_lock);
+	lo->refcount++;
+}
+
+static void
+put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+{
+	assert_spin_locked(&lo->inode->i_lock);
+	BUG_ON(lo->refcount == 0);
+
+	lo->refcount--;
+	if (!lo->refcount) {
+		dprintk("%s: freeing layout cache %p\n", __func__, lo);
+		NFS_I(lo->inode)->layout = NULL;
+		kfree(lo);
+	}
+}
+
+void
+pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+	struct pnfs_layout_hdr *lo;
+
+	spin_lock(&nfsi->vfs_inode.i_lock);
+	lo = nfsi->layout;
+	if (lo) {
+		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
+		put_layout_hdr_locked(lo);
+	}
+	spin_unlock(&nfsi->vfs_inode.i_lock);
+}
+
+/* STUB - pretend LAYOUTGET to server failed */
+static struct pnfs_layout_segment *
+send_layoutget(struct pnfs_layout_hdr *lo,
+	   struct nfs_open_context *ctx,
+	   u32 iomode)
+{
+	struct inode *ino = lo->inode;
+
+	set_bit(lo_fail_bit(iomode), &lo->state);
+	spin_lock(&ino->i_lock);
+	put_layout_hdr_locked(lo);
+	spin_unlock(&ino->i_lock);
+	return NULL;
+}
+
+static struct pnfs_layout_hdr *
+alloc_init_layout_hdr(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo;
+
+	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+	if (!lo)
+		return NULL;
+	lo->refcount = 1;
+	lo->inode = ino;
+	return lo;
+}
+
+static struct pnfs_layout_hdr *
+pnfs_find_alloc_layout(struct inode *ino)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_hdr *new = NULL;
+
+	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
+
+	assert_spin_locked(&ino->i_lock);
+	if (nfsi->layout)
+		return nfsi->layout;
+
+	spin_unlock(&ino->i_lock);
+	new = alloc_init_layout_hdr(ino);
+	spin_lock(&ino->i_lock);
+
+	if (likely(nfsi->layout == NULL))	/* Won the race? */
+		nfsi->layout = new;
+	else
+		kfree(new);
+	return nfsi->layout;
+}
+
+/* STUB - LAYOUTGET never succeeds, so cache is empty */
+static struct pnfs_layout_segment *
+pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+{
+	return NULL;
+}
+
+/*
+ * Layout segment is retreived from the server if not cached.
+ * The appropriate layout segment is referenced and returned to the caller.
+ */
+struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino,
+		   struct nfs_open_context *ctx,
+		   enum pnfs_iomode iomode)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_hdr *lo;
+	struct pnfs_layout_segment *lseg = NULL;
+
+	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
+		return NULL;
+	spin_lock(&ino->i_lock);
+	lo = pnfs_find_alloc_layout(ino);
+	if (lo == NULL) {
+		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
+		goto out_unlock;
+	}
+
+	/* Check to see if the layout for the given range already exists */
+	lseg = pnfs_has_layout(lo, iomode);
+	if (lseg) {
+		dprintk("%s: Using cached lseg %p for iomode %d)\n",
+			__func__, lseg, iomode);
+		goto out_unlock;
+	}
+
+	/* if LAYOUTGET already failed once we don't try again */
+	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+		goto out_unlock;
+
+	get_layout_hdr_locked(lo);
+	spin_unlock(&ino->i_lock);
+
+	lseg = send_layoutget(lo, ctx, iomode);
+out:
+	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
+		nfsi->layout->state, lseg);
+	return lseg;
+out_unlock:
+	spin_unlock(&ino->i_lock);
+	goto out;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 61531f338576..4ed1b48c71b1 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -34,6 +34,11 @@
 
 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
 
+enum {
+	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
+	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
+};
+
 /* Per-layout driver specific registration structure */
 struct pnfs_layoutdriver_type {
 	struct list_head pnfs_tblid;
@@ -44,14 +49,48 @@ struct pnfs_layoutdriver_type {
 	int (*uninitialize_mountpoint) (struct nfs_server *);
 };
 
+struct pnfs_layout_hdr {
+	unsigned long		refcount;
+	unsigned long		state;
+	struct inode		*inode;
+};
+
 extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 
+struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
+		   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
+void pnfs_destroy_layout(struct nfs_inode *);
+
+
+static inline int lo_fail_bit(u32 iomode)
+{
+	return iomode == IOMODE_RW ?
+			 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
+}
+
+/* Return true if a layout driver is being used for this mountpoint */
+static inline int pnfs_enabled_sb(struct nfs_server *nfss)
+{
+	return nfss->pnfs_curr_ld != NULL;
+}
 
 #else  /* CONFIG_NFS_V4_1 */
 
+static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+}
+
+static inline struct pnfs_layout_segment *
+pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
+		   enum pnfs_iomode access_type)
+{
+	return NULL;
+}
+
 static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
 {
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 79859c81a943..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -120,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
+	pnfs_update_layout(inode, ctx, IOMODE_READ);
 	new = nfs_create_request(ctx, inode, page, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
@@ -624,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
+	pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
 	if (rsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
 	else
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 499872fa895c..0833bb67c831 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -188,6 +188,9 @@ struct nfs_inode {
 	struct nfs_delegation	*delegation;
 	fmode_t			 delegation_state;
 	struct rw_semaphore	rwsem;
+
+	/* pNFS layout information */
+	struct pnfs_layout_hdr *layout;
 #endif /* CONFIG_NFS_V4*/
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
-- 
cgit v1.2.3


From 974cec8ca0352eb5d281535b714cf194a606e98f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 20 Oct 2010 00:18:02 -0400
Subject: NFS: client needs to maintain list of inodes with active layouts

In particular, server reboot will invalidate all layouts.

Note that in order to have an active layout, we must get a successful response
from the server.  To avoid adding that machinery, this patch just includes a
stub that fakes up a successful return.  Since the layout is never referenced
for io, this is not a problem.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |   4 +-
 fs/nfs/nfs4state.c        |   2 +
 fs/nfs/pnfs.c             | 160 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/nfs/pnfs.h             |  14 ++++
 include/linux/nfs_fs_sb.h |   1 +
 5 files changed, 177 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index eba0bcc1bab0..e55ad211dbed 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -156,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	cred = rpc_lookup_machine_cred();
 	if (!IS_ERR(cred))
 		clp->cl_machine_cred = cred;
-
+#if defined(CONFIG_NFS_V4_1)
+	INIT_LIST_HEAD(&clp->cl_layouts);
+#endif
 	nfs_fscache_get_client_cookie(clp);
 
 	return clp;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e0dc06d74b6a..9c81f4d66950 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -54,6 +54,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "pnfs.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
@@ -1475,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			}
 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+			pnfs_destroy_all_layouts(clp);
 		}
 
 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c0cd954855b9..891a0c36f992 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include "internal.h"
 #include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PNFS
@@ -183,38 +184,189 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 	lo->refcount--;
 	if (!lo->refcount) {
 		dprintk("%s: freeing layout cache %p\n", __func__, lo);
+		BUG_ON(!list_empty(&lo->layouts));
 		NFS_I(lo->inode)->layout = NULL;
 		kfree(lo);
 	}
 }
 
+static void
+put_layout_hdr(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	put_layout_hdr_locked(NFS_I(inode)->layout);
+	spin_unlock(&inode->i_lock);
+}
+
+static void
+init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
+{
+	INIT_LIST_HEAD(&lseg->fi_list);
+	kref_init(&lseg->kref);
+	lseg->layout = lo;
+}
+
+/* Called without i_lock held, as the free_lseg call may sleep */
+static void
+destroy_lseg(struct kref *kref)
+{
+	struct pnfs_layout_segment *lseg =
+		container_of(kref, struct pnfs_layout_segment, kref);
+	struct inode *ino = lseg->layout->inode;
+
+	dprintk("--> %s\n", __func__);
+	kfree(lseg);
+	/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
+	put_layout_hdr(ino);
+}
+
+static void
+put_lseg(struct pnfs_layout_segment *lseg)
+{
+	if (!lseg)
+		return;
+
+	dprintk("%s: lseg %p ref %d\n", __func__, lseg,
+		atomic_read(&lseg->kref.refcount));
+	kref_put(&lseg->kref, destroy_lseg);
+}
+
+static void
+pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+{
+	struct pnfs_layout_segment *lseg, *next;
+	struct nfs_client *clp;
+
+	dprintk("%s:Begin lo %p\n", __func__, lo);
+
+	assert_spin_locked(&lo->inode->i_lock);
+	list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
+		dprintk("%s: freeing lseg %p\n", __func__, lseg);
+		list_move(&lseg->fi_list, tmp_list);
+	}
+	clp = NFS_SERVER(lo->inode)->nfs_client;
+	spin_lock(&clp->cl_lock);
+	/* List does not take a reference, so no need for put here */
+	list_del_init(&lo->layouts);
+	spin_unlock(&clp->cl_lock);
+
+	dprintk("%s:Return\n", __func__);
+}
+
+static void
+pnfs_free_lseg_list(struct list_head *tmp_list)
+{
+	struct pnfs_layout_segment *lseg;
+
+	while (!list_empty(tmp_list)) {
+		lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
+				fi_list);
+		dprintk("%s calling put_lseg on %p\n", __func__, lseg);
+		list_del(&lseg->fi_list);
+		put_lseg(lseg);
+	}
+}
+
 void
 pnfs_destroy_layout(struct nfs_inode *nfsi)
 {
 	struct pnfs_layout_hdr *lo;
+	LIST_HEAD(tmp_list);
 
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	if (lo) {
+		pnfs_clear_lseg_list(lo, &tmp_list);
 		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
 		put_layout_hdr_locked(lo);
 	}
 	spin_unlock(&nfsi->vfs_inode.i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+}
+
+/*
+ * Called by the state manger to remove all layouts established under an
+ * expired lease.
+ */
+void
+pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+	struct pnfs_layout_hdr *lo;
+	LIST_HEAD(tmp_list);
+
+	spin_lock(&clp->cl_lock);
+	list_splice_init(&clp->cl_layouts, &tmp_list);
+	spin_unlock(&clp->cl_lock);
+
+	while (!list_empty(&tmp_list)) {
+		lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
+				layouts);
+		dprintk("%s freeing layout for inode %lu\n", __func__,
+			lo->inode->i_ino);
+		pnfs_destroy_layout(NFS_I(lo->inode));
+	}
 }
 
-/* STUB - pretend LAYOUTGET to server failed */
+static void pnfs_insert_layout(struct pnfs_layout_hdr *lo,
+			       struct pnfs_layout_segment *lseg);
+
+/* Get layout from server. */
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
 	   u32 iomode)
 {
 	struct inode *ino = lo->inode;
+	struct pnfs_layout_segment *lseg;
 
-	set_bit(lo_fail_bit(iomode), &lo->state);
+	/* Lets pretend we sent LAYOUTGET and got a response */
+	lseg = kzalloc(sizeof(*lseg), GFP_KERNEL);
+	if (!lseg) {
+		set_bit(lo_fail_bit(iomode), &lo->state);
+		spin_lock(&ino->i_lock);
+		put_layout_hdr_locked(lo);
+		spin_unlock(&ino->i_lock);
+		return NULL;
+	}
+	init_lseg(lo, lseg);
+	lseg->iomode = IOMODE_RW;
 	spin_lock(&ino->i_lock);
+	pnfs_insert_layout(lo, lseg);
 	put_layout_hdr_locked(lo);
 	spin_unlock(&ino->i_lock);
-	return NULL;
+	return lseg;
+}
+
+static void
+pnfs_insert_layout(struct pnfs_layout_hdr *lo,
+		   struct pnfs_layout_segment *lseg)
+{
+	dprintk("%s:Begin\n", __func__);
+
+	assert_spin_locked(&lo->inode->i_lock);
+	if (list_empty(&lo->segs)) {
+		struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
+
+		spin_lock(&clp->cl_lock);
+		BUG_ON(!list_empty(&lo->layouts));
+		list_add_tail(&lo->layouts, &clp->cl_layouts);
+		spin_unlock(&clp->cl_lock);
+	}
+	get_layout_hdr_locked(lo);
+	/* STUB - add the constructed lseg if necessary */
+	if (list_empty(&lo->segs)) {
+		list_add_tail(&lseg->fi_list, &lo->segs);
+		dprintk("%s: inserted lseg %p iomode %d at tail\n",
+			__func__, lseg, lseg->iomode);
+	} else {
+		/* There is no harm for the moment in calling this
+		 * with the lock held, and the call will be removed
+		 * with the STUB.
+		 */
+		put_lseg(lseg);
+	}
+
+	dprintk("%s:Return\n", __func__);
 }
 
 static struct pnfs_layout_hdr *
@@ -226,6 +378,8 @@ alloc_init_layout_hdr(struct inode *ino)
 	if (!lo)
 		return NULL;
 	lo->refcount = 1;
+	INIT_LIST_HEAD(&lo->layouts);
+	INIT_LIST_HEAD(&lo->segs);
 	lo->inode = ino;
 	return lo;
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4ed1b48c71b1..1c3eb02f4944 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,13 @@
 #ifndef FS_NFS_PNFS_H
 #define FS_NFS_PNFS_H
 
+struct pnfs_layout_segment {
+	struct list_head fi_list;
+	u32 iomode;
+	struct kref kref;
+	struct pnfs_layout_hdr *layout;
+};
+
 #ifdef CONFIG_NFS_V4_1
 
 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -51,6 +58,8 @@ struct pnfs_layoutdriver_type {
 
 struct pnfs_layout_hdr {
 	unsigned long		refcount;
+	struct list_head	layouts;   /* other client layouts */
+	struct list_head	segs;      /* layout segments list */
 	unsigned long		state;
 	struct inode		*inode;
 };
@@ -64,6 +73,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void pnfs_destroy_layout(struct nfs_inode *);
+void pnfs_destroy_all_layouts(struct nfs_client *);
 
 
 static inline int lo_fail_bit(u32 iomode)
@@ -80,6 +90,10 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
 
 #else  /* CONFIG_NFS_V4_1 */
 
+static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+}
+
 static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
 {
 }
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c38619d95a57..4d62f1581ed1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -82,6 +82,7 @@ struct nfs_client {
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
+	struct list_head	cl_layouts;
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_FSCACHE
-- 
cgit v1.2.3


From b1f69b754ee312ec75f2c7ead0e6851cd9598cc2 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 20 Oct 2010 00:18:03 -0400
Subject: NFSv4.1: pnfs: add LAYOUTGET and GETDEVICEINFO infrastructure

Add the ability to actually send LAYOUTGET and GETDEVICEINFO.  This also adds
in the machinery to handle layout state and the deviceid cache.  Note that
GETDEVICEINFO is not called directly by the generic layer.  Instead it
is called by the drivers while parsing the LAYOUTGET opaque data in response
to an unknown device id embedded therein.  RFC 5661 only encodes
device ids within the driver-specific opaque data.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Dean Hildebrand <dhildebz@umich.edu>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 142 +++++++++++++++++
 fs/nfs/nfs4xdr.c          | 302 ++++++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.c             | 385 ++++++++++++++++++++++++++++++++++++++++++----
 fs/nfs/pnfs.h             |  73 ++++++++-
 include/linux/nfs4.h      |   2 +
 include/linux/nfs_fs_sb.h |   1 +
 include/linux/nfs_xdr.h   |  49 ++++++
 7 files changed, 921 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a5f1edb45b47..7e14e991ddfa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
 #include "internal.h"
 #include "iostat.h"
 #include "callback.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -5256,6 +5257,147 @@ out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
 }
+
+static void
+nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutget *lgp = calldata;
+	struct inode *ino = lgp->args.inode;
+	struct nfs_server *server = NFS_SERVER(ino);
+
+	dprintk("--> %s\n", __func__);
+	if (nfs4_setup_sequence(server, &lgp->args.seq_args,
+				&lgp->res.seq_res, 0, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutget *lgp = calldata;
+	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+
+	dprintk("--> %s\n", __func__);
+
+	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
+		return;
+
+	switch (task->tk_status) {
+	case 0:
+		break;
+	case -NFS4ERR_LAYOUTTRYLATER:
+	case -NFS4ERR_RECALLCONFLICT:
+		task->tk_status = -NFS4ERR_DELAY;
+		/* Fall through */
+	default:
+		if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+			rpc_restart_call_prepare(task);
+			return;
+		}
+	}
+	lgp->status = task->tk_status;
+	dprintk("<-- %s\n", __func__);
+}
+
+static void nfs4_layoutget_release(void *calldata)
+{
+	struct nfs4_layoutget *lgp = calldata;
+
+	dprintk("--> %s\n", __func__);
+	put_layout_hdr(lgp->args.inode);
+	if (lgp->res.layout.buf != NULL)
+		free_page((unsigned long) lgp->res.layout.buf);
+	put_nfs_open_context(lgp->args.ctx);
+	kfree(calldata);
+	dprintk("<-- %s\n", __func__);
+}
+
+static const struct rpc_call_ops nfs4_layoutget_call_ops = {
+	.rpc_call_prepare = nfs4_layoutget_prepare,
+	.rpc_call_done = nfs4_layoutget_done,
+	.rpc_release = nfs4_layoutget_release,
+};
+
+int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+{
+	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
+		.rpc_argp = &lgp->args,
+		.rpc_resp = &lgp->res,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = server->client,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_layoutget_call_ops,
+		.callback_data = lgp,
+		.flags = RPC_TASK_ASYNC,
+	};
+	int status = 0;
+
+	dprintk("--> %s\n", __func__);
+
+	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
+	if (lgp->res.layout.buf == NULL) {
+		nfs4_layoutget_release(lgp);
+		return -ENOMEM;
+	}
+
+	lgp->res.seq_res.sr_slot = NULL;
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0)
+		goto out;
+	status = lgp->status;
+	if (status != 0)
+		goto out;
+	status = pnfs_layout_process(lgp);
+out:
+	rpc_put_task(task);
+	dprintk("<-- %s status=%d\n", __func__, status);
+	return status;
+}
+
+static int
+_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+{
+	struct nfs4_getdeviceinfo_args args = {
+		.pdev = pdev,
+	};
+	struct nfs4_getdeviceinfo_res res = {
+		.pdev = pdev,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	int status;
+
+	dprintk("--> %s\n", __func__);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	dprintk("<-- %s status=%d\n", __func__, status);
+
+	return status;
+}
+
+int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+{
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = nfs4_handle_exception(server,
+					_nfs4_proc_getdeviceinfo(server, pdev),
+					&exception);
+	} while (exception.retry);
+	return err;
+}
+EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8b4dfa393f0f..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
 #include <linux/nfs_idmap.h>
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
 #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
 #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
+#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
+				XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
+#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
+				1 /* layout type */ + \
+				1 /* opaque devaddr4 length */ + \
+				  /* devaddr4 payload is read into page */ \
+				1 /* notification bitmap length */ + \
+				1 /* notification bitmap */)
+#define encode_layoutget_maxsz	(op_encode_hdr_maxsz + 10 + \
+				encode_stateid_maxsz)
+#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
+				decode_stateid_maxsz + \
+				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_reclaim_complete_maxsz)
+#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
+				encode_sequence_maxsz +\
+				encode_getdeviceinfo_maxsz)
+#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
+				decode_sequence_maxsz + \
+				decode_getdeviceinfo_maxsz)
+#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz +        \
+				encode_layoutget_maxsz)
+#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz +        \
+				decode_layoutget_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1737,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
 #endif /* CONFIG_NFS_V4_1 */
 }
 
+#ifdef CONFIG_NFS_V4_1
+static void
+encode_getdeviceinfo(struct xdr_stream *xdr,
+		     const struct nfs4_getdeviceinfo_args *args,
+		     struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
+	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
+	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
+				    NFS4_DEVICEID4_SIZE);
+	*p++ = cpu_to_be32(args->pdev->layout_type);
+	*p++ = cpu_to_be32(args->pdev->pglen);		/* gdia_maxcount */
+	*p++ = cpu_to_be32(0);				/* bitmap length 0 */
+	hdr->nops++;
+	hdr->replen += decode_getdeviceinfo_maxsz;
+}
+
+static void
+encode_layoutget(struct xdr_stream *xdr,
+		      const struct nfs4_layoutget_args *args,
+		      struct compound_hdr *hdr)
+{
+	nfs4_stateid stateid;
+	__be32 *p;
+
+	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(OP_LAYOUTGET);
+	*p++ = cpu_to_be32(0);     /* Signal layout available */
+	*p++ = cpu_to_be32(args->type);
+	*p++ = cpu_to_be32(args->range.iomode);
+	p = xdr_encode_hyper(p, args->range.offset);
+	p = xdr_encode_hyper(p, args->range.length);
+	p = xdr_encode_hyper(p, args->minlength);
+	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
+				args->ctx->state);
+	p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
+	*p = cpu_to_be32(args->maxcount);
+
+	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
+		__func__,
+		args->type,
+		args->range.iomode,
+		(unsigned long)args->range.offset,
+		(unsigned long)args->range.length,
+		args->maxcount);
+	hdr->nops++;
+	hdr->replen += decode_layoutget_maxsz;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * END OF "GENERIC" ENCODE ROUTINES.
  */
@@ -2554,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
 	return 0;
 }
 
+/*
+ * Encode GETDEVICEINFO request
+ */
+static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
+				      struct nfs4_getdeviceinfo_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
+	encode_getdeviceinfo(&xdr, args, &hdr);
+
+	/* set up reply kvec. Subtract notification bitmap max size (2)
+	 * so that notification bitmap is put in xdr_buf tail */
+	xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
+			 args->pdev->pages, args->pdev->pgbase,
+			 args->pdev->pglen);
+
+	encode_nops(&hdr);
+	return 0;
+}
+
+/*
+ *  Encode LAYOUTGET request
+ */
+static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
+				  struct nfs4_layoutget_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->seq_args, &hdr);
+	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutget(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4830,6 +4955,134 @@ out_overflow:
 #endif /* CONFIG_NFS_V4_1 */
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
+static int decode_getdeviceinfo(struct xdr_stream *xdr,
+				struct pnfs_device *pdev)
+{
+	__be32 *p;
+	uint32_t len, type;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
+	if (status) {
+		if (status == -ETOOSMALL) {
+			p = xdr_inline_decode(xdr, 4);
+			if (unlikely(!p))
+				goto out_overflow;
+			pdev->mincount = be32_to_cpup(p);
+			dprintk("%s: Min count too small. mincnt = %u\n",
+				__func__, pdev->mincount);
+		}
+		return status;
+	}
+
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(!p))
+		goto out_overflow;
+	type = be32_to_cpup(p++);
+	if (type != pdev->layout_type) {
+		dprintk("%s: layout mismatch req: %u pdev: %u\n",
+			__func__, pdev->layout_type, type);
+		return -EINVAL;
+	}
+	/*
+	 * Get the length of the opaque device_addr4. xdr_read_pages places
+	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
+	 * and places the remaining xdr data in xdr_buf->tail
+	 */
+	pdev->mincount = be32_to_cpup(p);
+	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
+
+	/* Parse notification bitmap, verifying that it is zero. */
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	len = be32_to_cpup(p);
+	if (len) {
+		int i;
+
+		p = xdr_inline_decode(xdr, 4 * len);
+		if (unlikely(!p))
+			goto out_overflow;
+		for (i = 0; i < len; i++, p++) {
+			if (be32_to_cpup(p)) {
+				dprintk("%s: notifications not supported\n",
+					__func__);
+				return -EIO;
+			}
+		}
+	}
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
+			    struct nfs4_layoutget_res *res)
+{
+	__be32 *p;
+	int status;
+	u32 layout_count;
+
+	status = decode_op_hdr(xdr, OP_LAYOUTGET);
+	if (status)
+		return status;
+	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->return_on_close = be32_to_cpup(p++);
+	p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
+	layout_count = be32_to_cpup(p);
+	if (!layout_count) {
+		dprintk("%s: server responded with empty layout array\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	p = xdr_inline_decode(xdr, 24);
+	if (unlikely(!p))
+		goto out_overflow;
+	p = xdr_decode_hyper(p, &res->range.offset);
+	p = xdr_decode_hyper(p, &res->range.length);
+	res->range.iomode = be32_to_cpup(p++);
+	res->type = be32_to_cpup(p++);
+
+	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
+	if (unlikely(status))
+		return status;
+
+	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
+		__func__,
+		(unsigned long)res->range.offset,
+		(unsigned long)res->range.length,
+		res->range.iomode,
+		res->type,
+		res->layout.len);
+
+	/* nfs4_proc_layoutget allocated a single page */
+	if (res->layout.len > PAGE_SIZE)
+		return -ENOMEM;
+	memcpy(res->layout.buf, p, res->layout.len);
+
+	if (layout_count > 1) {
+		/* We only handle a length one array at the moment.  Any
+		 * further entries are just ignored.  Note that this means
+		 * the client may see a response that is less than the
+		 * minimum it requested.
+		 */
+		dprintk("%s: server responded with %d layouts, dropping tail\n",
+			__func__, layout_count);
+	}
+
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * END OF "GENERIC" DECODE ROUTINES.
  */
@@ -5857,6 +6110,53 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
 		status = decode_reclaim_complete(&xdr, (void *)NULL);
 	return status;
 }
+
+/*
+ * Decode GETDEVINFO response
+ */
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+				      struct nfs4_getdeviceinfo_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status != 0)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status != 0)
+		goto out;
+	status = decode_getdeviceinfo(&xdr, res->pdev);
+out:
+	return status;
+}
+
+/*
+ * Decode LAYOUTGET response
+ */
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+				  struct nfs4_layoutget_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_layoutget(&xdr, rqstp, res);
+out:
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
@@ -6048,6 +6348,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
+  PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
+  PROC(LAYOUTGET,  enc_layoutget,     dec_layoutget),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 891a0c36f992..d1ad7df3479e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 		printk(KERN_ERR "%s id 0 is reserved\n", __func__);
 		return status;
 	}
+	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
+		printk(KERN_ERR "%s Layout driver must provide "
+		       "alloc_lseg and free_lseg.\n", __func__);
+		return status;
+	}
 
 	spin_lock(&pnfs_spinlock);
 	tmp = find_pnfs_driver_locked(ld_type->id);
@@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 }
 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
 
+/*
+ * pNFS client layout cache
+ */
+
 static void
 get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 {
@@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 	}
 }
 
-static void
+void
 put_layout_hdr(struct inode *inode)
 {
 	spin_lock(&inode->i_lock);
@@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref)
 	struct inode *ino = lseg->layout->inode;
 
 	dprintk("--> %s\n", __func__);
-	kfree(lseg);
+	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
 	/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
 	put_layout_hdr(ino);
 }
@@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
 	/* List does not take a reference, so no need for put here */
 	list_del_init(&lo->layouts);
 	spin_unlock(&clp->cl_lock);
+	write_seqlock(&lo->seqlock);
+	clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+	write_sequnlock(&lo->seqlock);
 
 	dprintk("%s:Return\n", __func__);
 }
@@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 	}
 }
 
-static void pnfs_insert_layout(struct pnfs_layout_hdr *lo,
-			       struct pnfs_layout_segment *lseg);
+/* update lo->stateid with new if is more recent
+ *
+ * lo->stateid could be the open stateid, in which case we just use what given.
+ */
+static void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+			const nfs4_stateid *new)
+{
+	nfs4_stateid *old = &lo->stateid;
+	bool overwrite = false;
+
+	write_seqlock(&lo->seqlock);
+	if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
+	    memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
+		overwrite = true;
+	else {
+		u32 oldseq, newseq;
+
+		oldseq = be32_to_cpu(old->stateid.seqid);
+		newseq = be32_to_cpu(new->stateid.seqid);
+		if ((int)(newseq - oldseq) > 0)
+			overwrite = true;
+	}
+	if (overwrite)
+		memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
+	write_sequnlock(&lo->seqlock);
+}
+
+static void
+pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
+			      struct nfs4_state *state)
+{
+	int seq;
+
+	dprintk("--> %s\n", __func__);
+	write_seqlock(&lo->seqlock);
+	do {
+		seq = read_seqbegin(&state->seqlock);
+		memcpy(lo->stateid.data, state->stateid.data,
+		       sizeof(state->stateid.data));
+	} while (read_seqretry(&state->seqlock, seq));
+	set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+	write_sequnlock(&lo->seqlock);
+	dprintk("<-- %s\n", __func__);
+}
+
+void
+pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+			struct nfs4_state *open_state)
+{
+	int seq;
 
-/* Get layout from server. */
+	dprintk("--> %s\n", __func__);
+	do {
+		seq = read_seqbegin(&lo->seqlock);
+		if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
+			/* This will trigger retry of the read */
+			pnfs_layout_from_open_stateid(lo, open_state);
+		} else
+			memcpy(dst->data, lo->stateid.data,
+			       sizeof(lo->stateid.data));
+	} while (read_seqretry(&lo->seqlock, seq));
+	dprintk("<-- %s\n", __func__);
+}
+
+/*
+* Get layout from server.
+*    for now, assume that whole file layouts are requested.
+*    arg->offset: 0
+*    arg->length: all ones
+*/
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
 	   u32 iomode)
 {
 	struct inode *ino = lo->inode;
-	struct pnfs_layout_segment *lseg;
+	struct nfs_server *server = NFS_SERVER(ino);
+	struct nfs4_layoutget *lgp;
+	struct pnfs_layout_segment *lseg = NULL;
+
+	dprintk("--> %s\n", __func__);
 
-	/* Lets pretend we sent LAYOUTGET and got a response */
-	lseg = kzalloc(sizeof(*lseg), GFP_KERNEL);
+	BUG_ON(ctx == NULL);
+	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+	if (lgp == NULL) {
+		put_layout_hdr(lo->inode);
+		return NULL;
+	}
+	lgp->args.minlength = NFS4_MAX_UINT64;
+	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+	lgp->args.range.iomode = iomode;
+	lgp->args.range.offset = 0;
+	lgp->args.range.length = NFS4_MAX_UINT64;
+	lgp->args.type = server->pnfs_curr_ld->id;
+	lgp->args.inode = ino;
+	lgp->args.ctx = get_nfs_open_context(ctx);
+	lgp->lsegpp = &lseg;
+
+	/* Synchronously retrieve layout information from server and
+	 * store in lseg.
+	 */
+	nfs4_proc_layoutget(lgp);
 	if (!lseg) {
+		/* remember that LAYOUTGET failed and suspend trying */
 		set_bit(lo_fail_bit(iomode), &lo->state);
-		spin_lock(&ino->i_lock);
-		put_layout_hdr_locked(lo);
-		spin_unlock(&ino->i_lock);
-		return NULL;
 	}
-	init_lseg(lo, lseg);
-	lseg->iomode = IOMODE_RW;
-	spin_lock(&ino->i_lock);
-	pnfs_insert_layout(lo, lseg);
-	put_layout_hdr_locked(lo);
-	spin_unlock(&ino->i_lock);
 	return lseg;
 }
 
+/*
+ * Compare two layout segments for sorting into layout cache.
+ * We want to preferentially return RW over RO layouts, so ensure those
+ * are seen first.
+ */
+static s64
+cmp_layout(u32 iomode1, u32 iomode2)
+{
+	/* read > read/write */
+	return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
+}
+
 static void
 pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 		   struct pnfs_layout_segment *lseg)
 {
+	struct pnfs_layout_segment *lp;
+	int found = 0;
+
 	dprintk("%s:Begin\n", __func__);
 
 	assert_spin_locked(&lo->inode->i_lock);
@@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 		list_add_tail(&lo->layouts, &clp->cl_layouts);
 		spin_unlock(&clp->cl_lock);
 	}
-	get_layout_hdr_locked(lo);
-	/* STUB - add the constructed lseg if necessary */
-	if (list_empty(&lo->segs)) {
+	list_for_each_entry(lp, &lo->segs, fi_list) {
+		if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+			continue;
+		list_add_tail(&lseg->fi_list, &lp->fi_list);
+		dprintk("%s: inserted lseg %p "
+			"iomode %d offset %llu length %llu before "
+			"lp %p iomode %d offset %llu length %llu\n",
+			__func__, lseg, lseg->range.iomode,
+			lseg->range.offset, lseg->range.length,
+			lp, lp->range.iomode, lp->range.offset,
+			lp->range.length);
+		found = 1;
+		break;
+	}
+	if (!found) {
 		list_add_tail(&lseg->fi_list, &lo->segs);
-		dprintk("%s: inserted lseg %p iomode %d at tail\n",
-			__func__, lseg, lseg->iomode);
-	} else {
-		/* There is no harm for the moment in calling this
-		 * with the lock held, and the call will be removed
-		 * with the STUB.
-		 */
-		put_lseg(lseg);
+		dprintk("%s: inserted lseg %p "
+			"iomode %d offset %llu length %llu at tail\n",
+			__func__, lseg, lseg->range.iomode,
+			lseg->range.offset, lseg->range.length);
 	}
+	get_layout_hdr_locked(lo);
 
 	dprintk("%s:Return\n", __func__);
 }
@@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino)
 	lo->refcount = 1;
 	INIT_LIST_HEAD(&lo->layouts);
 	INIT_LIST_HEAD(&lo->segs);
+	seqlock_init(&lo->seqlock);
 	lo->inode = ino;
 	return lo;
 }
@@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino)
 	return nfsi->layout;
 }
 
-/* STUB - LAYOUTGET never succeeds, so cache is empty */
+/*
+ * iomode matching rules:
+ * iomode	lseg	match
+ * -----	-----	-----
+ * ANY		READ	true
+ * ANY		RW	true
+ * RW		READ	false
+ * RW		RW	true
+ * READ		READ	true
+ * READ		RW	true
+ */
+static int
+is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
+{
+	return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+}
+
+/*
+ * lookup range in layout
+ */
 static struct pnfs_layout_segment *
 pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
 {
-	return NULL;
+	struct pnfs_layout_segment *lseg, *ret = NULL;
+
+	dprintk("%s:Begin\n", __func__);
+
+	assert_spin_locked(&lo->inode->i_lock);
+	list_for_each_entry(lseg, &lo->segs, fi_list) {
+		if (is_matching_lseg(lseg, iomode)) {
+			ret = lseg;
+			break;
+		}
+		if (cmp_layout(iomode, lseg->range.iomode) > 0)
+			break;
+	}
+
+	dprintk("%s:Return lseg %p ref %d\n",
+		__func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+	return ret;
 }
 
 /*
@@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino,
 	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
 		goto out_unlock;
 
-	get_layout_hdr_locked(lo);
+	get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
 	spin_unlock(&ino->i_lock);
 
 	lseg = send_layoutget(lo, ctx, iomode);
@@ -460,3 +612,172 @@ out_unlock:
 	spin_unlock(&ino->i_lock);
 	goto out;
 }
+
+int
+pnfs_layout_process(struct nfs4_layoutget *lgp)
+{
+	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
+	struct nfs4_layoutget_res *res = &lgp->res;
+	struct pnfs_layout_segment *lseg;
+	struct inode *ino = lo->inode;
+	int status = 0;
+
+	/* Inject layout blob into I/O device driver */
+	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+	if (!lseg || IS_ERR(lseg)) {
+		if (!lseg)
+			status = -ENOMEM;
+		else
+			status = PTR_ERR(lseg);
+		dprintk("%s: Could not allocate layout: error %d\n",
+		       __func__, status);
+		goto out;
+	}
+
+	spin_lock(&ino->i_lock);
+	init_lseg(lo, lseg);
+	lseg->range = res->range;
+	*lgp->lsegpp = lseg;
+	pnfs_insert_layout(lo, lseg);
+
+	/* Done processing layoutget. Set the layout stateid */
+	pnfs_set_layout_stateid(lo, &res->stateid);
+	spin_unlock(&ino->i_lock);
+out:
+	return status;
+}
+
+/*
+ * Device ID cache. Currently supports one layout type per struct nfs_client.
+ * Add layout type to the lookup key to expand to support multiple types.
+ */
+int
+pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
+			 void (*free_callback)(struct pnfs_deviceid_node *))
+{
+	struct pnfs_deviceid_cache *c;
+
+	c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	spin_lock(&clp->cl_lock);
+	if (clp->cl_devid_cache != NULL) {
+		atomic_inc(&clp->cl_devid_cache->dc_ref);
+		dprintk("%s [kref [%d]]\n", __func__,
+			atomic_read(&clp->cl_devid_cache->dc_ref));
+		kfree(c);
+	} else {
+		/* kzalloc initializes hlists */
+		spin_lock_init(&c->dc_lock);
+		atomic_set(&c->dc_ref, 1);
+		c->dc_free_callback = free_callback;
+		clp->cl_devid_cache = c;
+		dprintk("%s [new]\n", __func__);
+	}
+	spin_unlock(&clp->cl_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
+
+/*
+ * Called from pnfs_layoutdriver_type->free_lseg
+ * last layout segment reference frees deviceid
+ */
+void
+pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
+		  struct pnfs_deviceid_node *devid)
+{
+	struct nfs4_deviceid *id = &devid->de_id;
+	struct pnfs_deviceid_node *d;
+	struct hlist_node *n;
+	long h = nfs4_deviceid_hash(id);
+
+	dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
+	if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
+		return;
+
+	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
+		if (!memcmp(&d->de_id, id, sizeof(*id))) {
+			hlist_del_rcu(&d->de_node);
+			spin_unlock(&c->dc_lock);
+			synchronize_rcu();
+			c->dc_free_callback(devid);
+			return;
+		}
+	spin_unlock(&c->dc_lock);
+	/* Why wasn't it found in  the list? */
+	BUG();
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
+
+/* Find and reference a deviceid */
+struct pnfs_deviceid_node *
+pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
+{
+	struct pnfs_deviceid_node *d;
+	struct hlist_node *n;
+	long hash = nfs4_deviceid_hash(id);
+
+	dprintk("--> %s hash %ld\n", __func__, hash);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+		if (!memcmp(&d->de_id, id, sizeof(*id))) {
+			if (!atomic_inc_not_zero(&d->de_ref)) {
+				goto fail;
+			} else {
+				rcu_read_unlock();
+				return d;
+			}
+		}
+	}
+fail:
+	rcu_read_unlock();
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
+
+/*
+ * Add a deviceid to the cache.
+ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
+ */
+struct pnfs_deviceid_node *
+pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
+{
+	struct pnfs_deviceid_node *d;
+	long hash = nfs4_deviceid_hash(&new->de_id);
+
+	dprintk("--> %s hash %ld\n", __func__, hash);
+	spin_lock(&c->dc_lock);
+	d = pnfs_find_get_deviceid(c, &new->de_id);
+	if (d) {
+		spin_unlock(&c->dc_lock);
+		dprintk("%s [discard]\n", __func__);
+		c->dc_free_callback(new);
+		return d;
+	}
+	INIT_HLIST_NODE(&new->de_node);
+	atomic_set(&new->de_ref, 1);
+	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
+	spin_unlock(&c->dc_lock);
+	dprintk("%s [new]\n", __func__);
+	return new;
+}
+EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
+
+void
+pnfs_put_deviceid_cache(struct nfs_client *clp)
+{
+	struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
+
+	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+	if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
+		int i;
+		/* Verify cache is empty */
+		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
+			BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
+		clp->cl_devid_cache = NULL;
+		spin_unlock(&clp->cl_lock);
+		kfree(local);
+	}
+}
+EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1c3eb02f4944..cbba28cb02a7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,7 +32,7 @@
 
 struct pnfs_layout_segment {
 	struct list_head fi_list;
-	u32 iomode;
+	struct pnfs_layout_range range;
 	struct kref kref;
 	struct pnfs_layout_hdr *layout;
 };
@@ -44,6 +44,7 @@ struct pnfs_layout_segment {
 enum {
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
+	NFS_LAYOUT_STATEID_SET,		/* have a valid layout stateid */
 };
 
 /* Per-layout driver specific registration structure */
@@ -54,26 +55,96 @@ struct pnfs_layoutdriver_type {
 	struct module *owner;
 	int (*initialize_mountpoint) (struct nfs_server *);
 	int (*uninitialize_mountpoint) (struct nfs_server *);
+	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+	void (*free_lseg) (struct pnfs_layout_segment *lseg);
 };
 
 struct pnfs_layout_hdr {
 	unsigned long		refcount;
 	struct list_head	layouts;   /* other client layouts */
 	struct list_head	segs;      /* layout segments list */
+	seqlock_t		seqlock;   /* Protects the stateid */
+	nfs4_stateid		stateid;
 	unsigned long		state;
 	struct inode		*inode;
 };
 
+struct pnfs_device {
+	struct nfs4_deviceid dev_id;
+	unsigned int  layout_type;
+	unsigned int  mincount;
+	struct page **pages;
+	void          *area;
+	unsigned int  pgbase;
+	unsigned int  pglen;
+};
+
+/*
+ * Device ID RCU cache. A device ID is unique per client ID and layout type.
+ */
+#define NFS4_DEVICE_ID_HASH_BITS	5
+#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
+#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
+
+static inline u32
+nfs4_deviceid_hash(struct nfs4_deviceid *id)
+{
+	unsigned char *cptr = (unsigned char *)id->data;
+	unsigned int nbytes = NFS4_DEVICEID4_SIZE;
+	u32 x = 0;
+
+	while (nbytes--) {
+		x *= 37;
+		x += *cptr++;
+	}
+	return x & NFS4_DEVICE_ID_HASH_MASK;
+}
+
+struct pnfs_deviceid_node {
+	struct hlist_node	de_node;
+	struct nfs4_deviceid	de_id;
+	atomic_t		de_ref;
+};
+
+struct pnfs_deviceid_cache {
+	spinlock_t		dc_lock;
+	atomic_t		dc_ref;
+	void			(*dc_free_callback)(struct pnfs_deviceid_node *);
+	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
+};
+
+extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
+			void (*free_callback)(struct pnfs_deviceid_node *));
+extern void pnfs_put_deviceid_cache(struct nfs_client *);
+extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
+				struct pnfs_deviceid_cache *,
+				struct nfs4_deviceid *);
+extern struct pnfs_deviceid_node *pnfs_add_deviceid(
+				struct pnfs_deviceid_cache *,
+				struct pnfs_deviceid_node *);
+extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
+			      struct pnfs_deviceid_node *devid);
+
 extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 
+/* nfs4proc.c */
+extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
+				   struct pnfs_device *dev);
+extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+
+/* pnfs.c */
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 		   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
+int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
+void put_layout_hdr(struct inode *inode);
+void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+			     struct nfs4_state *open_state);
 
 
 static inline int lo_fail_bit(u32 iomode)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 34da32436ac0..a9683d6acaa4 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -545,6 +545,8 @@ enum {
 	NFSPROC4_CLNT_SEQUENCE,
 	NFSPROC4_CLNT_GET_LEASE_TIME,
 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
+	NFSPROC4_CLNT_LAYOUTGET,
+	NFSPROC4_CLNT_GETDEVICEINFO,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4d62f1581ed1..452d96436d26 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -83,6 +83,7 @@ struct nfs_client {
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
 	struct list_head	cl_layouts;
+	struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_FSCACHE
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 065f9d105d05..ba6cc8f223c9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,55 @@ struct nfs4_get_lease_time_res {
 	struct nfs4_sequence_res	lr_seq_res;
 };
 
+#define PNFS_LAYOUT_MAXSIZE 4096
+
+struct nfs4_layoutdriver_data {
+	__u32 len;
+	void *buf;
+};
+
+struct pnfs_layout_range {
+	u32 iomode;
+	u64 offset;
+	u64 length;
+};
+
+struct nfs4_layoutget_args {
+	__u32 type;
+	struct pnfs_layout_range range;
+	__u64 minlength;
+	__u32 maxcount;
+	struct inode *inode;
+	struct nfs_open_context *ctx;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_layoutget_res {
+	__u32 return_on_close;
+	struct pnfs_layout_range range;
+	__u32 type;
+	nfs4_stateid stateid;
+	struct nfs4_layoutdriver_data layout;
+	struct nfs4_sequence_res seq_res;
+};
+
+struct nfs4_layoutget {
+	struct nfs4_layoutget_args args;
+	struct nfs4_layoutget_res res;
+	struct pnfs_layout_segment **lsegpp;
+	int status;
+};
+
+struct nfs4_getdeviceinfo_args {
+	struct pnfs_device *pdev;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_getdeviceinfo_res {
+	struct pnfs_device *pdev;
+	struct nfs4_sequence_res seq_res;
+};
+
 /*
  * Arguments to the open call.
  */
-- 
cgit v1.2.3


From 89ff05ec553f3e70b8773c501da01bf7ad952cab Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 21 Oct 2010 08:37:41 +0000
Subject: phylib: make local function static

The following functions are not used directly by any drivers:
    phy_attach_direct
    phy_device_create
    phy_prepare_link
    genphy_config_advert
    genphy_setup_forced
    phy_config_interrupt
    phy_clear_interrypt
    phy_sanitize_settings
    phy_enable_interrupts
    phy_disable_interrupts

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt | 18 ------------------
 drivers/net/phy/phy.c            | 13 +++++--------
 drivers/net/phy/phy_device.c     | 19 ++++++++++---------
 include/linux/phy.h              | 12 ------------
 4 files changed, 15 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 88bb71b46da4..9eb1ba52013d 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -177,18 +177,6 @@ Doing it all yourself
  
    A convenience function to print out the PHY status neatly.
 
- int phy_clear_interrupt(struct phy_device *phydev);
- int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
-   
-   Clear the PHY's interrupt, and configure which ones are allowed,
-   respectively.  Currently only supports all on, or all off.
- 
- int phy_enable_interrupts(struct phy_device *phydev);
- int phy_disable_interrupts(struct phy_device *phydev);
-
-   Functions which enable/disable PHY interrupts, clearing them
-   before and after, respectively.
-
  int phy_start_interrupts(struct phy_device *phydev);
  int phy_stop_interrupts(struct phy_device *phydev);
 
@@ -213,12 +201,6 @@ Doing it all yourself
    Fills the phydev structure with up-to-date information about the current
    settings in the PHY.
 
- void phy_sanitize_settings(struct phy_device *phydev)
-   
-   Resolves differences between currently desired settings, and
-   supported settings for the given PHY device.  Does not make
-   the changes in the hardware, though.
-
  int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
  int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1bb16cb79433..7670aac0e93f 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL(phy_print_status);
  *
  * Returns 0 on success on < 0 on error.
  */
-int phy_clear_interrupt(struct phy_device *phydev)
+static int phy_clear_interrupt(struct phy_device *phydev)
 {
 	int err = 0;
 
@@ -82,7 +82,7 @@ int phy_clear_interrupt(struct phy_device *phydev)
  *
  * Returns 0 on success on < 0 on error.
  */
-int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
+static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
 {
 	int err = 0;
 
@@ -208,7 +208,7 @@ static inline int phy_find_valid(int idx, u32 features)
  *   duplexes.  Drop down by one in this order:  1000/FULL,
  *   1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF.
  */
-void phy_sanitize_settings(struct phy_device *phydev)
+static void phy_sanitize_settings(struct phy_device *phydev)
 {
 	u32 features = phydev->supported;
 	int idx;
@@ -223,7 +223,6 @@ void phy_sanitize_settings(struct phy_device *phydev)
 	phydev->speed = settings[idx].speed;
 	phydev->duplex = settings[idx].duplex;
 }
-EXPORT_SYMBOL(phy_sanitize_settings);
 
 /**
  * phy_ethtool_sset - generic ethtool sset function, handles all the details
@@ -532,7 +531,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
  * phy_enable_interrupts - Enable the interrupts from the PHY side
  * @phydev: target phy_device struct
  */
-int phy_enable_interrupts(struct phy_device *phydev)
+static int phy_enable_interrupts(struct phy_device *phydev)
 {
 	int err;
 
@@ -545,13 +544,12 @@ int phy_enable_interrupts(struct phy_device *phydev)
 
 	return err;
 }
-EXPORT_SYMBOL(phy_enable_interrupts);
 
 /**
  * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
  * @phydev: target phy_device struct
  */
-int phy_disable_interrupts(struct phy_device *phydev)
+static int phy_disable_interrupts(struct phy_device *phydev)
 {
 	int err;
 
@@ -574,7 +572,6 @@ phy_err:
 
 	return err;
 }
-EXPORT_SYMBOL(phy_disable_interrupts);
 
 /**
  * phy_start_interrupts - request and enable interrupts for a PHY device
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 16ddc77313cb..993c52c82aeb 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -57,6 +57,9 @@ extern void mdio_bus_exit(void);
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
+static int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
+			     u32 flags, phy_interface_t interface);
+
 /*
  * Creates a new phy_fixup and adds it to the list
  * @bus_id: A string which matches phydev->dev.bus_id (or PHY_ANY_ID)
@@ -146,7 +149,8 @@ int phy_scan_fixups(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_scan_fixups);
 
-struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
+static struct phy_device* phy_device_create(struct mii_bus *bus,
+					    int addr, int phy_id)
 {
 	struct phy_device *dev;
 
@@ -193,7 +197,6 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 
 	return dev;
 }
-EXPORT_SYMBOL(phy_device_create);
 
 /**
  * get_phy_id - reads the specified addr for its ID.
@@ -316,7 +319,7 @@ EXPORT_SYMBOL(phy_find_first);
  *   If you want to monitor your own link state, don't call
  *   this function.
  */
-void phy_prepare_link(struct phy_device *phydev,
+static void phy_prepare_link(struct phy_device *phydev,
 		void (*handler)(struct net_device *))
 {
 	phydev->adjust_link = handler;
@@ -435,8 +438,8 @@ int phy_init_hw(struct phy_device *phydev)
  *     the attaching device, and given a callback for link status
  *     change.  The phy_device is returned to the attaching driver.
  */
-int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
-		      u32 flags, phy_interface_t interface)
+static int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
+			     u32 flags, phy_interface_t interface)
 {
 	struct device *d = &phydev->dev;
 
@@ -473,7 +476,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	 * (dev_flags and interface) */
 	return phy_init_hw(phydev);
 }
-EXPORT_SYMBOL(phy_attach_direct);
 
 /**
  * phy_attach - attach a network device to a particular PHY device
@@ -540,7 +542,7 @@ EXPORT_SYMBOL(phy_detach);
  *   what is supported.  Returns < 0 on error, 0 if the PHY's advertisement
  *   hasn't changed, and > 0 if it has changed.
  */
-int genphy_config_advert(struct phy_device *phydev)
+static int genphy_config_advert(struct phy_device *phydev)
 {
 	u32 advertise;
 	int oldadv, adv;
@@ -605,7 +607,6 @@ int genphy_config_advert(struct phy_device *phydev)
 
 	return changed;
 }
-EXPORT_SYMBOL(genphy_config_advert);
 
 /**
  * genphy_setup_forced - configures/forces speed/duplex from @phydev
@@ -615,7 +616,7 @@ EXPORT_SYMBOL(genphy_config_advert);
  *   to the values in phydev. Assumes that the values are valid.
  *   Please see phy_sanitize_settings().
  */
-int genphy_setup_forced(struct phy_device *phydev)
+static int genphy_setup_forced(struct phy_device *phydev)
 {
 	int err;
 	int ctl = 0;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a6e047a04f79..7da5fa845959 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -472,11 +472,7 @@ static inline int phy_write(struct phy_device *phydev, u32 regnum, u16 val)
 int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id);
 struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
-int phy_clear_interrupt(struct phy_device *phydev);
-int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
 int phy_init_hw(struct phy_device *phydev);
-int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
-		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
 		const char *bus_id, u32 flags, phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -492,17 +488,12 @@ void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 
-void phy_sanitize_settings(struct phy_device *phydev);
 int phy_stop_interrupts(struct phy_device *phydev);
-int phy_enable_interrupts(struct phy_device *phydev);
-int phy_disable_interrupts(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev) {
 	return phydev->drv->read_status(phydev);
 }
 
-int genphy_config_advert(struct phy_device *phydev);
-int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
@@ -511,8 +502,6 @@ int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
-void phy_prepare_link(struct phy_device *phydev,
-		void (*adjust_link)(struct net_device *));
 void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
@@ -523,7 +512,6 @@ int phy_mii_ioctl(struct phy_device *phydev,
 		struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
-struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id);
 void phy_device_free(struct phy_device *phydev);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
-- 
cgit v1.2.3


From f4a2da0cd522a3b805ff2386c14912945bf990c7 Mon Sep 17 00:00:00 2001
From: Brian Norris <norris@broadcom.com>
Date: Wed, 18 Aug 2010 09:10:58 -0700
Subject: mtd: inftl.h: fix spacing errors

Replaced some spaces with tabs to fit CodingStyle guidelines

Signed-off-by: Brian Norris <norris@broadcom.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/inftl.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h
index 64ee53ce95a9..02cd5f9b79b8 100644
--- a/include/linux/mtd/inftl.h
+++ b/include/linux/mtd/inftl.h
@@ -37,14 +37,14 @@ struct INFTLrecord {
 	__u16 firstEUN;
 	__u16 lastEUN;
 	__u16 numfreeEUNs;
-	__u16 LastFreeEUN; 		/* To speed up finding a free EUN */
+	__u16 LastFreeEUN;		/* To speed up finding a free EUN */
 	int head,sect,cyl;
-	__u16 *PUtable;	 		/* Physical Unit Table  */
-	__u16 *VUtable; 		/* Virtual Unit Table */
-        unsigned int nb_blocks;		/* number of physical blocks */
-        unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
-        struct erase_info instr;
-        struct nand_ecclayout oobinfo;
+	__u16 *PUtable;			/* Physical Unit Table */
+	__u16 *VUtable;			/* Virtual Unit Table */
+	unsigned int nb_blocks;		/* number of physical blocks */
+	unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
+	struct erase_info instr;
+	struct nand_ecclayout oobinfo;
 };
 
 int INFTL_mount(struct INFTLrecord *s);
-- 
cgit v1.2.3


From cc26c3cd3d1cf40a07f2b19ac4c53d517bee52a5 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 24 Aug 2010 18:12:00 -0700
Subject: mtd: nand: expand nand_ecc_layout, deprecate ioctl ECCGETLAYOUT

struct nand_ecclayout is too small for many new chips; OOB regions can be as
large as 448 bytes and may increase more in the future. Thus, copying that
struct to user-space with the ECCGETLAYOUT ioctl is not a good idea; the ioctl
would have to be updated every time there's a change to the current largest
size.

Instead, the old nand_ecclayout is renamed to nand_ecclayout_user and a
new struct nand_ecclayout is created that can accomodate larger sizes and
expand without affecting the user-space. struct nand_ecclayout can still
be used in board drivers without modification -- at least for now.

A new function is provided to convert from the new to the old in order to
allow the deprecated ioctl to continue to work with truncated data. Perhaps
the ioctl, the conversion process, and the struct nand_ecclayout_user can be
removed altogether in the future.

Note: There are comments in nand/davinci_nand.c::nand_davinci_probe()
regarding this issue; this driver (and maybe others) can be updated to
account for extra space. All kernel drivers can use the expanded
nand_ecclayout as a drop-in replacement and ignore its benefits.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c           | 48 ++++++++++++++++++++++++++++++++++++++---
 drivers/mtd/nand/davinci_nand.c |  3 +++
 include/linux/mtd/mtd.h         | 15 +++++++++++++
 include/linux/mtd/partitions.h  |  2 +-
 include/mtd/mtd-abi.h           |  4 ++--
 include/mtd/mtd-user.h          |  2 +-
 6 files changed, 67 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a825002123c8..24d35ba62b84 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -477,6 +477,39 @@ static int mtd_do_readoob(struct mtd_info *mtd, uint64_t start,
 	return ret;
 }
 
+/*
+ * Copies (and truncates, if necessary) data from the larger struct,
+ * nand_ecclayout, to the smaller, deprecated layout struct,
+ * nand_ecclayout_user. This is necessary only to suppport the deprecated
+ * API ioctl ECCGETLAYOUT while allowing all new functionality to use
+ * nand_ecclayout flexibly (i.e. the struct may change size in new
+ * releases without requiring major rewrites).
+ */
+static int shrink_ecclayout(const struct nand_ecclayout *from,
+		struct nand_ecclayout_user *to)
+{
+	int i;
+
+	if (!from || !to)
+		return -EINVAL;
+
+	memset(to, 0, sizeof(*to));
+
+	to->eccbytes = min((int)from->eccbytes, MTD_MAX_ECCPOS_ENTRIES_OLD);
+	for (i = 0; i < to->eccbytes; i++)
+		to->eccpos[i] = from->eccpos[i];
+
+	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES; i++) {
+		if (from->oobfree[i].length == 0 &&
+				from->oobfree[i].offset == 0)
+			break;
+		to->oobavail += from->oobfree[i].length;
+		to->oobfree[i] = from->oobfree[i];
+	}
+
+	return 0;
+}
+
 static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 {
 	struct mtd_file_info *mfi = file->private_data;
@@ -812,14 +845,23 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 	}
 #endif
 
+	/* This ioctl is being deprecated - it truncates the ecc layout */
 	case ECCGETLAYOUT:
 	{
+		struct nand_ecclayout_user *usrlay;
+
 		if (!mtd->ecclayout)
 			return -EOPNOTSUPP;
 
-		if (copy_to_user(argp, mtd->ecclayout,
-				 sizeof(struct nand_ecclayout)))
-			return -EFAULT;
+		usrlay = kmalloc(sizeof(*usrlay), GFP_KERNEL);
+		if (!usrlay)
+			return -ENOMEM;
+
+		shrink_ecclayout(mtd->ecclayout, usrlay);
+
+		if (copy_to_user(argp, usrlay, sizeof(*usrlay)))
+			ret = -EFAULT;
+		kfree(usrlay);
 		break;
 	}
 
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 2ac7367afe77..70698e86e437 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -749,6 +749,9 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 		 * breaks userspace ioctl interface with mtd-utils. Once we
 		 * resolve this issue, NAND_ECC_HW_OOB_FIRST mode can be used
 		 * for the 4KiB page chips.
+		 *
+		 * TODO: Note that nand_ecclayout has now been expanded and can
+		 *  hold plenty of OOB entries.
 		 */
 		dev_warn(&pdev->dev, "no 4-bit ECC support yet "
 				"for 4KiB-page NAND\n");
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8485e42a9b09..03a1e954c586 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -110,6 +110,21 @@ struct mtd_oob_ops {
 	uint8_t		*oobbuf;
 };
 
+#define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
+#define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
+#define MTD_MAX_ECCPOS_ENTRIES_OLD	64	/* Previous maximum */
+/*
+ * Correct ECC layout control structure. This replaces old nand_ecclayout
+ * (mtd-abi.h) that is exported via ECCGETLAYOUT ioctl. It should be expandable
+ *  in the future simply by the above macros.
+ */
+struct nand_ecclayout {
+	__u32 eccbytes;
+	__u32 eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE];
+	__u32 oobavail;
+	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE];
+};
+
 struct mtd_info {
 	u_char type;
 	uint32_t flags;
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 274b6196091d..930c8ac198db 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -39,7 +39,7 @@ struct mtd_partition {
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
-	struct nand_ecclayout *ecclayout;	/* out of band layout for this partition (NAND only)*/
+	struct nand_ecclayout *ecclayout;	/* out of band layout for this partition (NAND only) */
 };
 
 #define MTDPART_OFS_NXTBLK	(-2)
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 4debb4514634..5bce08384345 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -119,7 +119,7 @@ struct otp_info {
 #define OTPGETREGIONCOUNT	_IOW('M', 14, int)
 #define OTPGETREGIONINFO	_IOW('M', 15, struct otp_info)
 #define OTPLOCK			_IOR('M', 16, struct otp_info)
-#define ECCGETLAYOUT		_IOR('M', 17, struct nand_ecclayout)
+#define ECCGETLAYOUT		_IOR('M', 17, struct nand_ecclayout_user)
 #define ECCGETSTATS		_IOR('M', 18, struct mtd_ecc_stats)
 #define MTDFILEMODE		_IO('M', 19)
 #define MEMERASE64		_IOW('M', 20, struct erase_info_user64)
@@ -148,7 +148,7 @@ struct nand_oobfree {
  * ECC layout control structure. Exported to userspace for
  * diagnosis and to allow creation of raw images
  */
-struct nand_ecclayout {
+struct nand_ecclayout_user {
 	__u32 eccbytes;
 	__u32 eccpos[64];
 	__u32 oobavail;
diff --git a/include/mtd/mtd-user.h b/include/mtd/mtd-user.h
index aa3c2f86a913..83327c808c86 100644
--- a/include/mtd/mtd-user.h
+++ b/include/mtd/mtd-user.h
@@ -29,6 +29,6 @@ typedef struct mtd_info_user mtd_info_t;
 typedef struct erase_info_user erase_info_t;
 typedef struct region_info_user region_info_t;
 typedef struct nand_oobinfo nand_oobinfo_t;
-typedef struct nand_ecclayout nand_ecclayout_t;
+typedef struct nand_ecclayout_user nand_ecclayout_t;
 
 #endif /* __MTD_USER_H__ */
-- 
cgit v1.2.3


From 0ceacf36e9c41859a6072342d27cf6e60776c523 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Sun, 19 Sep 2010 23:57:12 -0700
Subject: mtd: edit comments on deprecation of ioctl ECCGETLAYOUT

There were some improvements and additions necessary in the
comments explaining of the expansion of nand_ecclayout, the
introduction of nand_ecclayout_user, and the deprecation of the
ioctl ECCGETLAYOUT.

Also, I found a better placement for the macro MTD_MAX_ECCPOS_ENTRIES;
next to the definition of MTD_MAX_OOBFREE_ENTRIES in mtd-abi.h. The macro
is really only important for the ioctl code (found in drivers/mtd/mtdchar.c)
but since there are small edits being made to the user-space header, I
figured this is a better location.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 include/linux/mtd/mtd.h |  8 ++++----
 include/mtd/mtd-abi.h   | 11 ++++++++---
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 24d35ba62b84..1d981a5c1b13 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -495,7 +495,7 @@ static int shrink_ecclayout(const struct nand_ecclayout *from,
 
 	memset(to, 0, sizeof(*to));
 
-	to->eccbytes = min((int)from->eccbytes, MTD_MAX_ECCPOS_ENTRIES_OLD);
+	to->eccbytes = min((int)from->eccbytes, MTD_MAX_ECCPOS_ENTRIES);
 	for (i = 0; i < to->eccbytes; i++)
 		to->eccpos[i] = from->eccpos[i];
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 03a1e954c586..fe8d77ebec13 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -112,11 +112,11 @@ struct mtd_oob_ops {
 
 #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
 #define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
-#define MTD_MAX_ECCPOS_ENTRIES_OLD	64	/* Previous maximum */
 /*
- * Correct ECC layout control structure. This replaces old nand_ecclayout
- * (mtd-abi.h) that is exported via ECCGETLAYOUT ioctl. It should be expandable
- *  in the future simply by the above macros.
+ * Internal ECC layout control structure. For historical reasons, there is a
+ * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained
+ * for export to user-space via the ECCGETLAYOUT ioctl.
+ * nand_ecclayout should be expandable in the future simply by the above macros.
  */
 struct nand_ecclayout {
 	__u32 eccbytes;
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 5bce08384345..a57c4cb7d6b2 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -144,13 +144,18 @@ struct nand_oobfree {
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES	8
+#define MTD_MAX_ECCPOS_ENTRIES	64
 /*
- * ECC layout control structure. Exported to userspace for
- * diagnosis and to allow creation of raw images
+ * OBSOLETE: ECC layout control structure. Exported to user-space via ioctl
+ * ECCGETLAYOUT for backwards compatbility and should not be mistaken as a
+ * complete set of ECC information. The ioctl truncates the larger internal
+ * structure to retain binary compatibility with the static declaration of the
+ * ioctl. Note that the "MTD_MAX_..._ENTRIES" macros represent the max size of
+ * the user struct, not the MAX size of the internal struct nand_ecclayout.
  */
 struct nand_ecclayout_user {
 	__u32 eccbytes;
-	__u32 eccpos[64];
+	__u32 eccpos[MTD_MAX_ECCPOS_ENTRIES];
 	__u32 oobavail;
 	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES];
 };
-- 
cgit v1.2.3


From 5c709ee9f33da4a07d94e3d48b297eb6f003fc61 Mon Sep 17 00:00:00 2001
From: Brian Norris <norris@broadcom.com>
Date: Fri, 20 Aug 2010 12:36:13 -0700
Subject: mtd: nand: Increase NAND_MAX_OOBSIZE

An increase in NAND_MAX_OOBSIZE and NAND_MAX_PAGESIZE is necessary
in order to support many new chips. Among those:

Toshiba TC58TxG4S2FBAxx  8KB page, 576B OOB
Micron MT29F64G08CBAAA   8KB page, 448B OOB

Signed-off-by: Brian Norris <norris@broadcom.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 102e12c58cb3..a8921c213df5 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -53,8 +53,8 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
  * is supported now. If you add a chip with bigger oobsize/page
  * adjust this accordingly.
  */
-#define NAND_MAX_OOBSIZE	256
-#define NAND_MAX_PAGESIZE	4096
+#define NAND_MAX_OOBSIZE	576
+#define NAND_MAX_PAGESIZE	8192
 
 /*
  * Constants for hardware specific CLE/ALE/NCE function
-- 
cgit v1.2.3


From caa4b6f24c2cae586ed3f371ddde7fc1fc75b322 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Mon, 30 Aug 2010 18:32:14 +0200
Subject: mtd: nand: add NAND_CMD_PARAM (0xec) definition

This command is used to read the device ONFI parameters page.

Signed-off-by: Florian Fainelli <ffainelli@freebox.fr>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index a8921c213df5..29656a3f9331 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -88,6 +88,7 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 #define NAND_CMD_RNDIN		0x85
 #define NAND_CMD_READID		0x90
 #define NAND_CMD_ERASE2		0xd0
+#define NAND_CMD_PARAM		0xec
 #define NAND_CMD_RESET		0xff
 
 #define NAND_CMD_LOCK		0x2a
-- 
cgit v1.2.3


From d1e1f4e42b5df063712ca2926e50c07b95c96b96 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Mon, 30 Aug 2010 18:32:24 +0200
Subject: mtd: nand: add support for reading ONFI parameters from NAND device

This patch adds support for reading NAND device ONFI parameters and use
the ONFI informations to define its geometry. In case the device supports
ONFI, the onfi_version field in struct nand_chip contains the version (BCD)
and the onfi_params structure can be used by drivers to set up timings and
such. We currently only support ONFI 1.0 parameters.

Signed-off-by: Brian Norris <norris@broadcom.com>
Signed-off-by: Matthieu Castet <matthieu.castet@parrot.com>
Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: Florian Fainelli <ffainelli@freebox.fr>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 164 +++++++++++++++++++++++++++++++++++--------
 include/linux/mtd/nand.h     |  68 ++++++++++++++++++
 2 files changed, 202 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 596ac848b46d..5b26fffc3534 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2786,15 +2786,47 @@ static void nand_set_defaults(struct nand_chip *chip, int busw)
 
 }
 
+/*
+ * sanitize ONFI strings so we can safely print them
+ */
+static void sanitize_string(uint8_t *s, size_t len)
+{
+	ssize_t i;
+
+	/* null terminate */
+	s[len - 1] = 0;
+
+	/* remove non printable chars */
+	for (i = 0; i < len - 1; i++) {
+		if (s[i] < ' ' || s[i] > 127)
+			s[i] = '?';
+	}
+
+	/* remove trailing spaces */
+	strim(s);
+}
+
+static u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
+{
+	int i;
+	while (len--) {
+		crc ^= *p++ << 8;
+		for (i = 0; i < 8; i++)
+			crc = (crc << 1) ^ ((crc & 0x8000) ? 0x8005 : 0);
+	}
+
+	return crc;
+}
+
 /*
  * Get the flash and manufacturer id and lookup if the type is supported
  */
 static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 						  struct nand_chip *chip,
-						  int busw, int *maf_id,
+						  int busw, int *maf_id, int *dev_id,
 						  struct nand_flash_dev *type)
 {
-	int i, dev_id, maf_idx;
+	int i, maf_idx;
 	u8 id_data[8];
 
 	/* Select the device */
@@ -2811,7 +2843,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 
 	/* Read manufacturer and device IDs */
 	*maf_id = chip->read_byte(mtd);
-	dev_id = chip->read_byte(mtd);
+	*dev_id = chip->read_byte(mtd);
 
 	/* Try again to make sure, as some systems the bus-hold or other
 	 * interface concerns can cause random data which looks like a
@@ -2821,15 +2853,13 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
-	/* Read entire ID string */
-
-	for (i = 0; i < 8; i++)
+	for (i = 0; i < 2; i++)
 		id_data[i] = chip->read_byte(mtd);
 
-	if (id_data[0] != *maf_id || id_data[1] != dev_id) {
+	if (id_data[0] != *maf_id || id_data[1] != *dev_id) {
 		printk(KERN_INFO "%s: second ID read did not match "
 		       "%02x,%02x against %02x,%02x\n", __func__,
-		       *maf_id, dev_id, id_data[0], id_data[1]);
+		       *maf_id, *dev_id, id_data[0], id_data[1]);
 		return ERR_PTR(-ENODEV);
 	}
 
@@ -2837,9 +2867,81 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 		type = nand_flash_ids;
 
 	for (; type->name != NULL; type++)
-		if (dev_id == type->id)
+		if (*dev_id == type->id)
                         break;
 
+	chip->onfi_version = 0;
+	if (!type->name || !type->pagesize) {
+		/* try ONFI for unknow chip or LP */
+		chip->cmdfunc(mtd, NAND_CMD_READID, 0x20, -1);
+		if (chip->read_byte(mtd) == 'O' &&
+			chip->read_byte(mtd) == 'N' &&
+			chip->read_byte(mtd) == 'F' &&
+			chip->read_byte(mtd) == 'I') {
+
+			struct nand_onfi_params *p = &chip->onfi_params;
+			int i;
+
+			printk(KERN_INFO "ONFI flash detected\n");
+			chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1);
+			for (i = 0; i < 3; i++) {
+				chip->read_buf(mtd, (uint8_t *)p, sizeof(*p));
+				if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 254) ==
+								le16_to_cpu(p->crc))
+				{
+					 printk(KERN_INFO "ONFI param page %d valid\n", i);
+					 break;
+				}
+			}
+
+			if (i < 3) {
+				/* check version */
+				int val = le16_to_cpu(p->revision);
+				if (val == 1 || val > (1 << 4))
+					printk(KERN_INFO "%s: unsupported ONFI version: %d\n",
+						__func__, val);
+				else {
+					if (val & (1 << 4))
+						chip->onfi_version = 22;
+					else if (val & (1 << 3))
+						chip->onfi_version = 21;
+					else if (val & (1 << 2))
+						chip->onfi_version = 20;
+					else
+						chip->onfi_version = 10;
+				}
+			}
+
+			if (chip->onfi_version) {
+				sanitize_string(p->manufacturer, sizeof(p->manufacturer));
+				sanitize_string(p->model, sizeof(p->model));
+				if (!mtd->name)
+					mtd->name = p->model;
+				mtd->writesize = le32_to_cpu(p->byte_per_page);
+				mtd->erasesize = le32_to_cpu(p->pages_per_block)*mtd->writesize;
+				mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
+				chip->chipsize = le32_to_cpu(p->blocks_per_lun) * mtd->erasesize;
+				busw = 0;
+				if (le16_to_cpu(p->features) & 1)
+					busw = NAND_BUSWIDTH_16;
+
+				chip->options &= ~NAND_CHIPOPTIONS_MSK;
+				chip->options |= (NAND_NO_READRDY |
+						NAND_NO_AUTOINCR) & NAND_CHIPOPTIONS_MSK;
+
+				goto ident_done;
+
+			}
+		}
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
+
+	/* Read entire ID string */
+
+	for (i = 0; i < 8; i++)
+		id_data[i] = chip->read_byte(mtd);
+
 	if (!type->name)
 		return ERR_PTR(-ENODEV);
 
@@ -2927,6 +3029,21 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 			mtd->erasesize <<= ((id_data[3] & 0x03) << 1);
 		}
 	}
+	/* Get chip options, preserve non chip based options */
+	chip->options &= ~NAND_CHIPOPTIONS_MSK;
+	chip->options |= type->options & NAND_CHIPOPTIONS_MSK;
+
+	/* Check if chip is a not a samsung device. Do not clear the
+	 * options for chips which are not having an extended id.
+	 */
+	if (*maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
+		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
+ident_done:
+
+	/*
+	 * Set chip as a default. Board drivers can override it, if necessary
+	 */
+	chip->options |= NAND_NO_AUTOINCR;
 
 	/* Try to identify manufacturer */
 	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
@@ -2941,7 +3058,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	if (busw != (chip->options & NAND_BUSWIDTH_16)) {
 		printk(KERN_INFO "NAND device: Manufacturer ID:"
 		       " 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id,
-		       dev_id, nand_manuf_ids[maf_idx].name, mtd->name);
+		       *dev_id, nand_manuf_ids[maf_idx].name, mtd->name);
 		printk(KERN_WARNING "NAND bus width %d instead %d bit\n",
 		       (chip->options & NAND_BUSWIDTH_16) ? 16 : 8,
 		       busw ? 16 : 8);
@@ -2966,21 +3083,6 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	else
 		chip->badblockpos = NAND_SMALL_BADBLOCK_POS;
 
-	/* Get chip options, preserve non chip based options */
-	chip->options &= ~NAND_CHIPOPTIONS_MSK;
-	chip->options |= type->options & NAND_CHIPOPTIONS_MSK;
-
-	/*
-	 * Set chip as a default. Board drivers can override it, if necessary
-	 */
-	chip->options |= NAND_NO_AUTOINCR;
-
-	/* Check if chip is a not a samsung device. Do not clear the
-	 * options for chips which are not having an extended id.
-	 */
-	if (*maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
-		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
-
 	/*
 	 * Bad block marker is stored in the last page of each block
 	 * on Samsung and Hynix MLC devices; stored in first two pages
@@ -3021,9 +3123,11 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
 		chip->cmdfunc = nand_command_lp;
 
+	/* TODO onfi flash name */
 	printk(KERN_INFO "NAND device: Manufacturer ID:"
-	       " 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, dev_id,
-	       nand_manuf_ids[maf_idx].name, type->name);
+		" 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, *dev_id,
+		nand_manuf_ids[maf_idx].name,
+		chip->onfi_version ? type->name:chip->onfi_params.model);
 
 	return type;
 }
@@ -3042,7 +3146,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		    struct nand_flash_dev *table)
 {
-	int i, busw, nand_maf_id;
+	int i, busw, nand_maf_id, nand_dev_id;
 	struct nand_chip *chip = mtd->priv;
 	struct nand_flash_dev *type;
 
@@ -3052,7 +3156,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	nand_set_defaults(chip, busw);
 
 	/* Read the flash type */
-	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id, table);
+	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id, &nand_dev_id, table);
 
 	if (IS_ERR(type)) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
@@ -3070,7 +3174,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 		/* Read manufacturer and device IDs */
 		if (nand_maf_id != chip->read_byte(mtd) ||
-		    type->id != chip->read_byte(mtd))
+		    nand_dev_id != chip->read_byte(mtd))
 			break;
 	}
 	if (i > 1)
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 29656a3f9331..7666c42235c7 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -228,6 +228,69 @@ typedef enum {
 /* Keep gcc happy */
 struct nand_chip;
 
+struct nand_onfi_params {
+	/* rev info and features block */
+	u8		sig[4]; /* 'O' 'N' 'F' 'I'  */
+	__le16		revision;
+	__le16		features;
+	__le16		opt_cmd;
+	u8		reserved[22];
+
+	/* manufacturer information block */
+	char		manufacturer[12];
+	char		model[20];
+	u8		jedec_id;
+	__le16		date_code;
+	u8		reserved2[13];
+
+	/* memory organization block */
+	__le32		byte_per_page;
+	__le16		spare_bytes_per_page;
+	__le32		data_bytes_per_ppage;
+	__le16		spare_bytes_per_ppage;
+	__le32		pages_per_block;
+	__le32		blocks_per_lun;
+	u8		lun_count;
+	u8		addr_cycles;
+	u8		bits_per_cell;
+	__le16		bb_per_lun;
+	__le16		block_endurance;
+	u8		guaranteed_good_blocks;
+	__le16		guaranteed_block_endurance;
+	u8		programs_per_page;
+	u8		ppage_attr;
+	u8		ecc_bits;
+	u8		interleaved_bits;
+	u8		interleaved_ops;
+	u8		reserved3[13];
+
+	/* electrical parameter block */
+	u8		io_pin_capacitance_max;
+	__le16		async_timing_mode;
+	__le16		program_cache_timing_mode;
+	__le16		t_prog;
+	__le16		t_bers;
+	__le16		t_r;
+	__le16		t_ccs;
+	__le16		src_sync_timing_mode;
+	__le16		src_ssync_features;
+	__le16		clk_pin_capacitance_typ;
+	__le16		io_pin_capacitance_typ;
+	__le16		input_pin_capacitance_typ;
+	u8		input_pin_capacitance_max;
+	u8		driver_strenght_support;
+	__le16		t_int_r;
+	__le16		t_ald;
+	u8		reserved4[7];
+
+	/* vendor */
+	u8		reserved5[90];
+
+	__le16 crc;
+} __attribute__((packed));
+
+#define ONFI_CRC_BASE	0x4F4E
+
 /**
  * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
@@ -360,6 +423,8 @@ struct nand_buffers {
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
  * @pagebuf:		[INTERN] holds the pagenumber which is currently in data_buf
  * @subpagesize:	[INTERN] holds the subpagesize
+ * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded), non 0 if ONFI supported
+ * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is supported, 0 otherwise
  * @ecclayout:		[REPLACEABLE] the default ecc placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash lookup
@@ -412,6 +477,9 @@ struct nand_chip {
 	int		badblockpos;
 	int		badblockbits;
 
+	int		onfi_version;
+	struct nand_onfi_params	onfi_params;
+
 	flstate_t	state;
 
 	uint8_t		*oob_poi;
-- 
cgit v1.2.3


From 6c009ab89a212b4364cdb74192d438f542fb291c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 13 Sep 2010 00:35:22 +0200
Subject: mtd: generic FSMC NAND MTD driver

This is the same driver submitted by ST Micros SPEAr team but
generalized and tested on the ST-Ericsson U300. It probably
easily works on the NHK8815 too.

Signed-off-by: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Rajeev Kumar <rajeev-dlh.kumar@st.com>
Signed-off-by: Shiraz Hashim <shiraz.hashim@st.com>
Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/Kconfig     |   7 +
 drivers/mtd/nand/Makefile    |   1 +
 drivers/mtd/nand/fsmc_nand.c | 866 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/fsmc.h     | 181 +++++++++
 4 files changed, 1055 insertions(+)
 create mode 100644 drivers/mtd/nand/fsmc_nand.c
 create mode 100644 include/linux/mtd/fsmc.h

(limited to 'include')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index a6f22f5bbef6..a494cce704b9 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -524,4 +524,11 @@ config MTD_NAND_JZ4740
 	help
 		Enables support for NAND Flash on JZ4740 SoC based boards.
 
+config MTD_NAND_FSMC
+	tristate "Support for NAND on ST Micros FSMC"
+	depends on PLAT_SPEAR || PLAT_NOMADIK || MACH_U300
+	help
+	  Enables support for NAND Flash chips on the ST Microelectronics
+	  Flexible Static Memory Controller (FSMC)
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index ac83dcdac5d6..8ad6faec72cb 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_MTD_NAND_PPCHAMELEONEVB)	+= ppchameleonevb.o
 obj-$(CONFIG_MTD_NAND_S3C2410)		+= s3c2410.o
 obj-$(CONFIG_MTD_NAND_DAVINCI)		+= davinci_nand.o
 obj-$(CONFIG_MTD_NAND_DISKONCHIP)	+= diskonchip.o
+obj-$(CONFIG_MTD_NAND_FSMC)		+= fsmc_nand.o
 obj-$(CONFIG_MTD_NAND_H1900)		+= h1910.o
 obj-$(CONFIG_MTD_NAND_RTC_FROM4)	+= rtc_from4.o
 obj-$(CONFIG_MTD_NAND_SHARPSL)		+= sharpsl.o
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
new file mode 100644
index 000000000000..02edfba25b0c
--- /dev/null
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -0,0 +1,866 @@
+/*
+ * drivers/mtd/nand/fsmc_nand.c
+ *
+ * ST Microelectronics
+ * Flexible Static Memory Controller (FSMC)
+ * Driver for NAND portions
+ *
+ * Copyright © 2010 ST Microelectronics
+ * Vipin Kumar <vipin.kumar@st.com>
+ * Ashish Priyadarshi
+ *
+ * Based on drivers/mtd/nand/nomadik_nand.c
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/resource.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/nand_ecc.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/partitions.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mtd/fsmc.h>
+#include <mtd/mtd-abi.h>
+
+static struct nand_ecclayout fsmc_ecc1_layout = {
+	.eccbytes = 24,
+	.eccpos = {2, 3, 4, 18, 19, 20, 34, 35, 36, 50, 51, 52,
+		66, 67, 68, 82, 83, 84, 98, 99, 100, 114, 115, 116},
+	.oobfree = {
+		{.offset = 8, .length = 8},
+		{.offset = 24, .length = 8},
+		{.offset = 40, .length = 8},
+		{.offset = 56, .length = 8},
+		{.offset = 72, .length = 8},
+		{.offset = 88, .length = 8},
+		{.offset = 104, .length = 8},
+		{.offset = 120, .length = 8}
+	}
+};
+
+static struct nand_ecclayout fsmc_ecc4_lp_layout = {
+	.eccbytes = 104,
+	.eccpos = {  2,   3,   4,   5,   6,   7,   8,
+		9,  10,  11,  12,  13,  14,
+		18,  19,  20,  21,  22,  23,  24,
+		25,  26,  27,  28,  29,  30,
+		34,  35,  36,  37,  38,  39,  40,
+		41,  42,  43,  44,  45,  46,
+		50,  51,  52,  53,  54,  55,  56,
+		57,  58,  59,  60,  61,  62,
+		66,  67,  68,  69,  70,  71,  72,
+		73,  74,  75,  76,  77,  78,
+		82,  83,  84,  85,  86,  87,  88,
+		89,  90,  91,  92,  93,  94,
+		98,  99, 100, 101, 102, 103, 104,
+		105, 106, 107, 108, 109, 110,
+		114, 115, 116, 117, 118, 119, 120,
+		121, 122, 123, 124, 125, 126
+	},
+	.oobfree = {
+		{.offset = 15, .length = 3},
+		{.offset = 31, .length = 3},
+		{.offset = 47, .length = 3},
+		{.offset = 63, .length = 3},
+		{.offset = 79, .length = 3},
+		{.offset = 95, .length = 3},
+		{.offset = 111, .length = 3},
+		{.offset = 127, .length = 1}
+	}
+};
+
+/*
+ * ECC placement definitions in oobfree type format.
+ * There are 13 bytes of ecc for every 512 byte block and it has to be read
+ * consecutively and immediately after the 512 byte data block for hardware to
+ * generate the error bit offsets in 512 byte data.
+ * Managing the ecc bytes in the following way makes it easier for software to
+ * read ecc bytes consecutive to data bytes. This way is similar to
+ * oobfree structure maintained already in generic nand driver
+ */
+static struct fsmc_eccplace fsmc_ecc4_lp_place = {
+	.eccplace = {
+		{.offset = 2, .length = 13},
+		{.offset = 18, .length = 13},
+		{.offset = 34, .length = 13},
+		{.offset = 50, .length = 13},
+		{.offset = 66, .length = 13},
+		{.offset = 82, .length = 13},
+		{.offset = 98, .length = 13},
+		{.offset = 114, .length = 13}
+	}
+};
+
+static struct nand_ecclayout fsmc_ecc4_sp_layout = {
+	.eccbytes = 13,
+	.eccpos = { 0,  1,  2,  3,  6,  7, 8,
+		9, 10, 11, 12, 13, 14
+	},
+	.oobfree = {
+		{.offset = 15, .length = 1},
+	}
+};
+
+static struct fsmc_eccplace fsmc_ecc4_sp_place = {
+	.eccplace = {
+		{.offset = 0, .length = 4},
+		{.offset = 6, .length = 9}
+	}
+};
+
+/*
+ * Default partition tables to be used if the partition information not
+ * provided through platform data
+ */
+#define PARTITION(n, off, sz)	{.name = n, .offset = off, .size = sz}
+
+/*
+ * Default partition layout for small page(= 512 bytes) devices
+ * Size for "Root file system" is updated in driver based on actual device size
+ */
+static struct mtd_partition partition_info_16KB_blk[] = {
+	PARTITION("X-loader", 0, 4 * 0x4000),
+	PARTITION("U-Boot", 0x10000, 20 * 0x4000),
+	PARTITION("Kernel", 0x60000, 256 * 0x4000),
+	PARTITION("Root File System", 0x460000, 0),
+};
+
+/*
+ * Default partition layout for large page(> 512 bytes) devices
+ * Size for "Root file system" is updated in driver based on actual device size
+ */
+static struct mtd_partition partition_info_128KB_blk[] = {
+	PARTITION("X-loader", 0, 4 * 0x20000),
+	PARTITION("U-Boot", 0x80000, 12 * 0x20000),
+	PARTITION("Kernel", 0x200000, 48 * 0x20000),
+	PARTITION("Root File System", 0x800000, 0),
+};
+
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+const char *part_probes[] = { "cmdlinepart", NULL };
+#endif
+
+/**
+ * struct fsmc_nand_data - atructure for FSMC NAND device state
+ *
+ * @mtd:		MTD info for a NAND flash.
+ * @nand:		Chip related info for a NAND flash.
+ * @partitions:		Partition info for a NAND Flash.
+ * @nr_partitions:	Total number of partition of a NAND flash.
+ *
+ * @ecc_place:		ECC placing locations in oobfree type format.
+ * @bank:		Bank number for probed device.
+ * @clk:		Clock structure for FSMC.
+ *
+ * @data_va:		NAND port for Data.
+ * @cmd_va:		NAND port for Command.
+ * @addr_va:		NAND port for Address.
+ * @regs_va:		FSMC regs base address.
+ */
+struct fsmc_nand_data {
+	struct mtd_info		mtd;
+	struct nand_chip	nand;
+	struct mtd_partition	*partitions;
+	unsigned int		nr_partitions;
+
+	struct fsmc_eccplace	*ecc_place;
+	unsigned int		bank;
+	struct clk		*clk;
+
+	struct resource		*resregs;
+	struct resource		*rescmd;
+	struct resource		*resaddr;
+	struct resource		*resdata;
+
+	void __iomem		*data_va;
+	void __iomem		*cmd_va;
+	void __iomem		*addr_va;
+	void __iomem		*regs_va;
+
+	void			(*select_chip)(uint32_t bank, uint32_t busw);
+};
+
+/* Assert CS signal based on chipnr */
+static void fsmc_select_chip(struct mtd_info *mtd, int chipnr)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct fsmc_nand_data *host;
+
+	host = container_of(mtd, struct fsmc_nand_data, mtd);
+
+	switch (chipnr) {
+	case -1:
+		chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+		break;
+	case 0:
+	case 1:
+	case 2:
+	case 3:
+		if (host->select_chip)
+			host->select_chip(chipnr,
+					chip->options & NAND_BUSWIDTH_16);
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+/*
+ * fsmc_cmd_ctrl - For facilitaing Hardware access
+ * This routine allows hardware specific access to control-lines(ALE,CLE)
+ */
+static void fsmc_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	struct nand_chip *this = mtd->priv;
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_regs *regs = host->regs_va;
+	unsigned int bank = host->bank;
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		if (ctrl & NAND_CLE) {
+			this->IO_ADDR_R = (void __iomem *)host->cmd_va;
+			this->IO_ADDR_W = (void __iomem *)host->cmd_va;
+		} else if (ctrl & NAND_ALE) {
+			this->IO_ADDR_R = (void __iomem *)host->addr_va;
+			this->IO_ADDR_W = (void __iomem *)host->addr_va;
+		} else {
+			this->IO_ADDR_R = (void __iomem *)host->data_va;
+			this->IO_ADDR_W = (void __iomem *)host->data_va;
+		}
+
+		if (ctrl & NAND_NCE) {
+			writel(readl(&regs->bank_regs[bank].pc) | FSMC_ENABLE,
+					&regs->bank_regs[bank].pc);
+		} else {
+			writel(readl(&regs->bank_regs[bank].pc) & ~FSMC_ENABLE,
+				       &regs->bank_regs[bank].pc);
+		}
+	}
+
+	mb();
+
+	if (cmd != NAND_CMD_NONE)
+		writeb(cmd, this->IO_ADDR_W);
+}
+
+/*
+ * fsmc_nand_setup - FSMC (Flexible Static Memory Controller) init routine
+ *
+ * This routine initializes timing parameters related to NAND memory access in
+ * FSMC registers
+ */
+static void __init fsmc_nand_setup(struct fsmc_regs *regs, uint32_t bank,
+				   uint32_t busw)
+{
+	uint32_t value = FSMC_DEVTYPE_NAND | FSMC_ENABLE | FSMC_WAITON;
+
+	if (busw)
+		writel(value | FSMC_DEVWID_16, &regs->bank_regs[bank].pc);
+	else
+		writel(value | FSMC_DEVWID_8, &regs->bank_regs[bank].pc);
+
+	writel(readl(&regs->bank_regs[bank].pc) | FSMC_TCLR_1 | FSMC_TAR_1,
+	       &regs->bank_regs[bank].pc);
+	writel(FSMC_THIZ_1 | FSMC_THOLD_4 | FSMC_TWAIT_6 | FSMC_TSET_0,
+	       &regs->bank_regs[bank].comm);
+	writel(FSMC_THIZ_1 | FSMC_THOLD_4 | FSMC_TWAIT_6 | FSMC_TSET_0,
+	       &regs->bank_regs[bank].attrib);
+}
+
+/*
+ * fsmc_enable_hwecc - Enables Hardware ECC through FSMC registers
+ */
+static void fsmc_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_regs *regs = host->regs_va;
+	uint32_t bank = host->bank;
+
+	writel(readl(&regs->bank_regs[bank].pc) & ~FSMC_ECCPLEN_256,
+		       &regs->bank_regs[bank].pc);
+	writel(readl(&regs->bank_regs[bank].pc) & ~FSMC_ECCEN,
+			&regs->bank_regs[bank].pc);
+	writel(readl(&regs->bank_regs[bank].pc) | FSMC_ECCEN,
+			&regs->bank_regs[bank].pc);
+}
+
+/*
+ * fsmc_read_hwecc_ecc4 - Hardware ECC calculator for ecc4 option supported by
+ * FSMC. ECC is 13 bytes for 512 bytes of data (supports error correction upto
+ * max of 8-bits)
+ */
+static int fsmc_read_hwecc_ecc4(struct mtd_info *mtd, const uint8_t *data,
+				uint8_t *ecc)
+{
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_regs *regs = host->regs_va;
+	uint32_t bank = host->bank;
+	uint32_t ecc_tmp;
+	unsigned long deadline = jiffies + FSMC_BUSY_WAIT_TIMEOUT;
+
+	do {
+		if (readl(&regs->bank_regs[bank].sts) & FSMC_CODE_RDY)
+			break;
+		else
+			cond_resched();
+	} while (!time_after_eq(jiffies, deadline));
+
+	ecc_tmp = readl(&regs->bank_regs[bank].ecc1);
+	ecc[0] = (uint8_t) (ecc_tmp >> 0);
+	ecc[1] = (uint8_t) (ecc_tmp >> 8);
+	ecc[2] = (uint8_t) (ecc_tmp >> 16);
+	ecc[3] = (uint8_t) (ecc_tmp >> 24);
+
+	ecc_tmp = readl(&regs->bank_regs[bank].ecc2);
+	ecc[4] = (uint8_t) (ecc_tmp >> 0);
+	ecc[5] = (uint8_t) (ecc_tmp >> 8);
+	ecc[6] = (uint8_t) (ecc_tmp >> 16);
+	ecc[7] = (uint8_t) (ecc_tmp >> 24);
+
+	ecc_tmp = readl(&regs->bank_regs[bank].ecc3);
+	ecc[8] = (uint8_t) (ecc_tmp >> 0);
+	ecc[9] = (uint8_t) (ecc_tmp >> 8);
+	ecc[10] = (uint8_t) (ecc_tmp >> 16);
+	ecc[11] = (uint8_t) (ecc_tmp >> 24);
+
+	ecc_tmp = readl(&regs->bank_regs[bank].sts);
+	ecc[12] = (uint8_t) (ecc_tmp >> 16);
+
+	return 0;
+}
+
+/*
+ * fsmc_read_hwecc_ecc1 - Hardware ECC calculator for ecc1 option supported by
+ * FSMC. ECC is 3 bytes for 512 bytes of data (supports error correction upto
+ * max of 1-bit)
+ */
+static int fsmc_read_hwecc_ecc1(struct mtd_info *mtd, const uint8_t *data,
+				uint8_t *ecc)
+{
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_regs *regs = host->regs_va;
+	uint32_t bank = host->bank;
+	uint32_t ecc_tmp;
+
+	ecc_tmp = readl(&regs->bank_regs[bank].ecc1);
+	ecc[0] = (uint8_t) (ecc_tmp >> 0);
+	ecc[1] = (uint8_t) (ecc_tmp >> 8);
+	ecc[2] = (uint8_t) (ecc_tmp >> 16);
+
+	return 0;
+}
+
+/*
+ * fsmc_read_page_hwecc
+ * @mtd:	mtd info structure
+ * @chip:	nand chip info structure
+ * @buf:	buffer to store read data
+ * @page:	page number to read
+ *
+ * This routine is needed for fsmc verison 8 as reading from NAND chip has to be
+ * performed in a strict sequence as follows:
+ * data(512 byte) -> ecc(13 byte)
+ * After this read, fsmc hardware generates and reports error data bits(upto a
+ * max of 8 bits)
+ */
+static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int page)
+{
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_eccplace *ecc_place = host->ecc_place;
+	int i, j, s, stat, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	int off, len, group = 0;
+	/*
+	 * ecc_oob is intentionally taken as uint16_t. In 16bit devices, we
+	 * end up reading 14 bytes (7 words) from oob. The local array is
+	 * to maintain word alignment
+	 */
+	uint16_t ecc_oob[7];
+	uint8_t *oob = (uint8_t *)&ecc_oob[0];
+
+	for (i = 0, s = 0; s < eccsteps; s++, i += eccbytes, p += eccsize) {
+
+		chip->cmdfunc(mtd, NAND_CMD_READ0, s * eccsize, page);
+		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->read_buf(mtd, p, eccsize);
+
+		for (j = 0; j < eccbytes;) {
+			off = ecc_place->eccplace[group].offset;
+			len = ecc_place->eccplace[group].length;
+			group++;
+
+			/*
+			* length is intentionally kept a higher multiple of 2
+			* to read at least 13 bytes even in case of 16 bit NAND
+			* devices
+			*/
+			len = roundup(len, 2);
+			chip->cmdfunc(mtd, NAND_CMD_READOOB, off, page);
+			chip->read_buf(mtd, oob + j, len);
+			j += len;
+		}
+
+		memcpy(&ecc_code[i], oob, 13);
+		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+
+		stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
+		if (stat < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += stat;
+	}
+
+	return 0;
+}
+
+/*
+ * fsmc_correct_data
+ * @mtd:	mtd info structure
+ * @dat:	buffer of read data
+ * @read_ecc:	ecc read from device spare area
+ * @calc_ecc:	ecc calculated from read data
+ *
+ * calc_ecc is a 104 bit information containing maximum of 8 error
+ * offset informations of 13 bits each in 512 bytes of read data.
+ */
+static int fsmc_correct_data(struct mtd_info *mtd, uint8_t *dat,
+			     uint8_t *read_ecc, uint8_t *calc_ecc)
+{
+	struct fsmc_nand_data *host = container_of(mtd,
+					struct fsmc_nand_data, mtd);
+	struct fsmc_regs *regs = host->regs_va;
+	unsigned int bank = host->bank;
+	uint16_t err_idx[8];
+	uint64_t ecc_data[2];
+	uint32_t num_err, i;
+
+	/* The calculated ecc is actually the correction index in data */
+	memcpy(ecc_data, calc_ecc, 13);
+
+	/*
+	 * ------------------- calc_ecc[] bit wise -----------|--13 bits--|
+	 * |---idx[7]--|--.....-----|---idx[2]--||---idx[1]--||---idx[0]--|
+	 *
+	 * calc_ecc is a 104 bit information containing maximum of 8 error
+	 * offset informations of 13 bits each. calc_ecc is copied into a
+	 * uint64_t array and error offset indexes are populated in err_idx
+	 * array
+	 */
+	for (i = 0; i < 8; i++) {
+		if (i == 4) {
+			err_idx[4] = ((ecc_data[1] & 0x1) << 12) | ecc_data[0];
+			ecc_data[1] >>= 1;
+			continue;
+		}
+		err_idx[i] = (ecc_data[i/4] & 0x1FFF);
+		ecc_data[i/4] >>= 13;
+	}
+
+	num_err = (readl(&regs->bank_regs[bank].sts) >> 10) & 0xF;
+
+	if (num_err == 0xF)
+		return -EBADMSG;
+
+	i = 0;
+	while (num_err--) {
+		change_bit(0, (unsigned long *)&err_idx[i]);
+		change_bit(1, (unsigned long *)&err_idx[i]);
+
+		if (err_idx[i] <= 512 * 8) {
+			change_bit(err_idx[i], (unsigned long *)dat);
+			i++;
+		}
+	}
+	return i;
+}
+
+/*
+ * fsmc_nand_probe - Probe function
+ * @pdev:       platform device structure
+ */
+static int __init fsmc_nand_probe(struct platform_device *pdev)
+{
+	struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct fsmc_nand_data *host;
+	struct mtd_info *mtd;
+	struct nand_chip *nand;
+	struct fsmc_regs *regs;
+	struct resource *res;
+	int nr_parts, ret = 0;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data is NULL\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for the device structure (and zero it) */
+	host = kzalloc(sizeof(*host), GFP_KERNEL);
+	if (!host) {
+		dev_err(&pdev->dev, "failed to allocate device structure\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
+	if (!res) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->resdata = request_mem_region(res->start, resource_size(res),
+			pdev->name);
+	if (!host->resdata) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->data_va = ioremap(res->start, resource_size(res));
+	if (!host->data_va) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->resaddr = request_mem_region(res->start + PLAT_NAND_ALE,
+			resource_size(res), pdev->name);
+	if (!host->resaddr) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->addr_va = ioremap(res->start + PLAT_NAND_ALE, resource_size(res));
+	if (!host->addr_va) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->rescmd = request_mem_region(res->start + PLAT_NAND_CLE,
+			resource_size(res), pdev->name);
+	if (!host->rescmd) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->cmd_va = ioremap(res->start + PLAT_NAND_CLE, resource_size(res));
+	if (!host->cmd_va) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fsmc_regs");
+	if (!res) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->resregs = request_mem_region(res->start, resource_size(res),
+			pdev->name);
+	if (!host->resregs) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->regs_va = ioremap(res->start, resource_size(res));
+	if (!host->regs_va) {
+		ret = -EIO;
+		goto err_probe1;
+	}
+
+	host->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(host->clk)) {
+		dev_err(&pdev->dev, "failed to fetch block clock\n");
+		ret = PTR_ERR(host->clk);
+		host->clk = NULL;
+		goto err_probe1;
+	}
+
+	ret = clk_enable(host->clk);
+	if (ret)
+		goto err_probe1;
+
+	host->bank = pdata->bank;
+	host->select_chip = pdata->select_bank;
+	regs = host->regs_va;
+
+	/* Link all private pointers */
+	mtd = &host->mtd;
+	nand = &host->nand;
+	mtd->priv = nand;
+	nand->priv = host;
+
+	host->mtd.owner = THIS_MODULE;
+	nand->IO_ADDR_R = host->data_va;
+	nand->IO_ADDR_W = host->data_va;
+	nand->cmd_ctrl = fsmc_cmd_ctrl;
+	nand->chip_delay = 30;
+
+	nand->ecc.mode = NAND_ECC_HW;
+	nand->ecc.hwctl = fsmc_enable_hwecc;
+	nand->ecc.size = 512;
+	nand->options = pdata->options;
+	nand->select_chip = fsmc_select_chip;
+
+	if (pdata->width == FSMC_NAND_BW16)
+		nand->options |= NAND_BUSWIDTH_16;
+
+	fsmc_nand_setup(regs, host->bank, nand->options & NAND_BUSWIDTH_16);
+
+	if (get_fsmc_version(host->regs_va) == FSMC_VER8) {
+		nand->ecc.read_page = fsmc_read_page_hwecc;
+		nand->ecc.calculate = fsmc_read_hwecc_ecc4;
+		nand->ecc.correct = fsmc_correct_data;
+		nand->ecc.bytes = 13;
+	} else {
+		nand->ecc.calculate = fsmc_read_hwecc_ecc1;
+		nand->ecc.correct = nand_correct_data;
+		nand->ecc.bytes = 3;
+	}
+
+	/*
+	 * Scan to find existance of the device
+	 */
+	if (nand_scan_ident(&host->mtd, 1, NULL)) {
+		ret = -ENXIO;
+		dev_err(&pdev->dev, "No NAND Device found!\n");
+		goto err_probe;
+	}
+
+	if (get_fsmc_version(host->regs_va) == FSMC_VER8) {
+		if (host->mtd.writesize == 512) {
+			nand->ecc.layout = &fsmc_ecc4_sp_layout;
+			host->ecc_place = &fsmc_ecc4_sp_place;
+		} else {
+			nand->ecc.layout = &fsmc_ecc4_lp_layout;
+			host->ecc_place = &fsmc_ecc4_lp_place;
+		}
+	} else {
+		nand->ecc.layout = &fsmc_ecc1_layout;
+	}
+
+	/* Second stage of scan to fill MTD data-structures */
+	if (nand_scan_tail(&host->mtd)) {
+		ret = -ENXIO;
+		goto err_probe;
+	}
+
+	/*
+	 * The partition information can is accessed by (in the same precedence)
+	 *
+	 * command line through Bootloader,
+	 * platform data,
+	 * default partition information present in driver.
+	 */
+#ifdef CONFIG_MTD_PARTITIONS
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+	/*
+	 * Check if partition info passed via command line
+	 */
+	host->mtd.name = "nand";
+	nr_parts = parse_mtd_partitions(&host->mtd, part_probes,
+			&host->partitions, 0);
+	if (nr_parts > 0) {
+		host->nr_partitions = nr_parts;
+	} else {
+#endif
+		/*
+		 * Check if partition info passed via command line
+		 */
+		if (pdata->partitions) {
+			host->partitions = pdata->partitions;
+			host->nr_partitions = pdata->nr_partitions;
+		} else {
+			struct mtd_partition *partition;
+			int i;
+
+			/* Select the default partitions info */
+			switch (host->mtd.size) {
+			case 0x01000000:
+			case 0x02000000:
+			case 0x04000000:
+				host->partitions = partition_info_16KB_blk;
+				host->nr_partitions =
+					sizeof(partition_info_16KB_blk) /
+					sizeof(struct mtd_partition);
+				break;
+			case 0x08000000:
+			case 0x10000000:
+			case 0x20000000:
+			case 0x40000000:
+				host->partitions = partition_info_128KB_blk;
+				host->nr_partitions =
+					sizeof(partition_info_128KB_blk) /
+					sizeof(struct mtd_partition);
+				break;
+			default:
+				ret = -ENXIO;
+				pr_err("Unsupported NAND size\n");
+				goto err_probe;
+			}
+
+			partition = host->partitions;
+			for (i = 0; i < host->nr_partitions; i++, partition++) {
+				if (partition->size == 0) {
+					partition->size = host->mtd.size -
+						partition->offset;
+					break;
+				}
+			}
+		}
+#ifdef CONFIG_MTD_CMDLINE_PARTS
+	}
+#endif
+
+	if (host->partitions) {
+		ret = add_mtd_partitions(&host->mtd, host->partitions,
+				host->nr_partitions);
+		if (ret)
+			goto err_probe;
+	}
+#else
+	dev_info(&pdev->dev, "Registering %s as whole device\n", mtd->name);
+	if (!add_mtd_device(mtd)) {
+		ret = -ENXIO;
+		goto err_probe;
+	}
+#endif
+
+	platform_set_drvdata(pdev, host);
+	dev_info(&pdev->dev, "FSMC NAND driver registration successful\n");
+	return 0;
+
+err_probe:
+	clk_disable(host->clk);
+err_probe1:
+	if (host->clk)
+		clk_put(host->clk);
+	if (host->regs_va)
+		iounmap(host->regs_va);
+	if (host->resregs)
+		release_mem_region(host->resregs->start,
+				resource_size(host->resregs));
+	if (host->cmd_va)
+		iounmap(host->cmd_va);
+	if (host->rescmd)
+		release_mem_region(host->rescmd->start,
+				resource_size(host->rescmd));
+	if (host->addr_va)
+		iounmap(host->addr_va);
+	if (host->resaddr)
+		release_mem_region(host->resaddr->start,
+				resource_size(host->resaddr));
+	if (host->data_va)
+		iounmap(host->data_va);
+	if (host->resdata)
+		release_mem_region(host->resdata->start,
+				resource_size(host->resdata));
+
+	kfree(host);
+	return ret;
+}
+
+/*
+ * Clean up routine
+ */
+static int fsmc_nand_remove(struct platform_device *pdev)
+{
+	struct fsmc_nand_data *host = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+
+	if (host) {
+#ifdef CONFIG_MTD_PARTITIONS
+		del_mtd_partitions(&host->mtd);
+#else
+		del_mtd_device(&host->mtd);
+#endif
+		clk_disable(host->clk);
+		clk_put(host->clk);
+
+		iounmap(host->regs_va);
+		release_mem_region(host->resregs->start,
+				resource_size(host->resregs));
+		iounmap(host->cmd_va);
+		release_mem_region(host->rescmd->start,
+				resource_size(host->rescmd));
+		iounmap(host->addr_va);
+		release_mem_region(host->resaddr->start,
+				resource_size(host->resaddr));
+		iounmap(host->data_va);
+		release_mem_region(host->resdata->start,
+				resource_size(host->resdata));
+
+		kfree(host);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int fsmc_nand_suspend(struct device *dev)
+{
+	struct fsmc_nand_data *host = dev_get_drvdata(dev);
+	if (host)
+		clk_disable(host->clk);
+	return 0;
+}
+
+static int fsmc_nand_resume(struct device *dev)
+{
+	struct fsmc_nand_data *host = dev_get_drvdata(dev);
+	if (host)
+		clk_enable(host->clk);
+	return 0;
+}
+
+static const struct dev_pm_ops fsmc_nand_pm_ops = {
+	.suspend = fsmc_nand_suspend,
+	.resume = fsmc_nand_resume,
+};
+#endif
+
+static struct platform_driver fsmc_nand_driver = {
+	.remove = fsmc_nand_remove,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "fsmc-nand",
+#ifdef CONFIG_PM
+		.pm = &fsmc_nand_pm_ops,
+#endif
+	},
+};
+
+static int __init fsmc_nand_init(void)
+{
+	return platform_driver_probe(&fsmc_nand_driver,
+				     fsmc_nand_probe);
+}
+module_init(fsmc_nand_init);
+
+static void __exit fsmc_nand_exit(void)
+{
+	platform_driver_unregister(&fsmc_nand_driver);
+}
+module_exit(fsmc_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vipin Kumar <vipin.kumar@st.com>, Ashish Priyadarshi");
+MODULE_DESCRIPTION("NAND driver for SPEAr Platforms");
diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
new file mode 100644
index 000000000000..5d2556700ec2
--- /dev/null
+++ b/include/linux/mtd/fsmc.h
@@ -0,0 +1,181 @@
+/*
+ * incude/mtd/fsmc.h
+ *
+ * ST Microelectronics
+ * Flexible Static Memory Controller (FSMC)
+ * platform data interface and header file
+ *
+ * Copyright © 2010 ST Microelectronics
+ * Vipin Kumar <vipin.kumar@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __MTD_FSMC_H
+#define __MTD_FSMC_H
+
+#include <linux/platform_device.h>
+#include <linux/mtd/physmap.h>
+#include <linux/types.h>
+#include <linux/mtd/partitions.h>
+#include <asm/param.h>
+
+#define FSMC_NAND_BW8		1
+#define FSMC_NAND_BW16		2
+
+/*
+ * The placement of the Command Latch Enable (CLE) and
+ * Address Latch Enable (ALE) is twised around in the
+ * SPEAR310 implementation.
+ */
+#if defined(CONFIG_MACH_SPEAR310)
+#define PLAT_NAND_CLE		(1 << 17)
+#define PLAT_NAND_ALE		(1 << 16)
+#else
+#define PLAT_NAND_CLE		(1 << 16)
+#define PLAT_NAND_ALE		(1 << 17)
+#endif
+
+#define FSMC_MAX_NOR_BANKS	4
+#define FSMC_MAX_NAND_BANKS	4
+
+#define FSMC_FLASH_WIDTH8	1
+#define FSMC_FLASH_WIDTH16	2
+
+struct fsmc_nor_bank_regs {
+	uint32_t ctrl;
+	uint32_t ctrl_tim;
+};
+
+/* ctrl register definitions */
+#define BANK_ENABLE		(1 << 0)
+#define MUXED			(1 << 1)
+#define NOR_DEV			(2 << 2)
+#define WIDTH_8			(0 << 4)
+#define WIDTH_16		(1 << 4)
+#define RSTPWRDWN		(1 << 6)
+#define WPROT			(1 << 7)
+#define WRT_ENABLE		(1 << 12)
+#define WAIT_ENB		(1 << 13)
+
+/* ctrl_tim register definitions */
+
+struct fsms_nand_bank_regs {
+	uint32_t pc;
+	uint32_t sts;
+	uint32_t comm;
+	uint32_t attrib;
+	uint32_t ioata;
+	uint32_t ecc1;
+	uint32_t ecc2;
+	uint32_t ecc3;
+};
+
+#define FSMC_NOR_REG_SIZE	0x40
+
+struct fsmc_regs {
+	struct fsmc_nor_bank_regs nor_bank_regs[FSMC_MAX_NOR_BANKS];
+	uint8_t reserved_1[0x40 - 0x20];
+	struct fsms_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS];
+	uint8_t reserved_2[0xfe0 - 0xc0];
+	uint32_t peripid0;			/* 0xfe0 */
+	uint32_t peripid1;			/* 0xfe4 */
+	uint32_t peripid2;			/* 0xfe8 */
+	uint32_t peripid3;			/* 0xfec */
+	uint32_t pcellid0;			/* 0xff0 */
+	uint32_t pcellid1;			/* 0xff4 */
+	uint32_t pcellid2;			/* 0xff8 */
+	uint32_t pcellid3;			/* 0xffc */
+};
+
+#define FSMC_BUSY_WAIT_TIMEOUT	(1 * HZ)
+
+/* pc register definitions */
+#define FSMC_RESET		(1 << 0)
+#define FSMC_WAITON		(1 << 1)
+#define FSMC_ENABLE		(1 << 2)
+#define FSMC_DEVTYPE_NAND	(1 << 3)
+#define FSMC_DEVWID_8		(0 << 4)
+#define FSMC_DEVWID_16		(1 << 4)
+#define FSMC_ECCEN		(1 << 6)
+#define FSMC_ECCPLEN_512	(0 << 7)
+#define FSMC_ECCPLEN_256	(1 << 7)
+#define FSMC_TCLR_1		(1 << 9)
+#define FSMC_TAR_1		(1 << 13)
+
+/* sts register definitions */
+#define FSMC_CODE_RDY		(1 << 15)
+
+/* comm register definitions */
+#define FSMC_TSET_0		(0 << 0)
+#define FSMC_TWAIT_6		(6 << 8)
+#define FSMC_THOLD_4		(4 << 16)
+#define FSMC_THIZ_1		(1 << 24)
+
+/* peripid2 register definitions */
+#define FSMC_REVISION_MSK	(0xf)
+#define FSMC_REVISION_SHFT	(0x4)
+
+#define FSMC_VER1		1
+#define FSMC_VER2		2
+#define FSMC_VER3		3
+#define FSMC_VER4		4
+#define FSMC_VER5		5
+#define FSMC_VER6		6
+#define FSMC_VER7		7
+#define FSMC_VER8		8
+
+static inline uint32_t get_fsmc_version(struct fsmc_regs *regs)
+{
+	return (readl(&regs->peripid2) >> FSMC_REVISION_SHFT) &
+				FSMC_REVISION_MSK;
+}
+
+/*
+ * There are 13 bytes of ecc for every 512 byte block in FSMC version 8
+ * and it has to be read consecutively and immediately after the 512
+ * byte data block for hardware to generate the error bit offsets
+ * Managing the ecc bytes in the following way is easier. This way is
+ * similar to oobfree structure maintained already in u-boot nand driver
+ */
+#define MAX_ECCPLACE_ENTRIES	32
+
+struct fsmc_nand_eccplace {
+	uint8_t offset;
+	uint8_t length;
+};
+
+struct fsmc_eccplace {
+	struct fsmc_nand_eccplace eccplace[MAX_ECCPLACE_ENTRIES];
+};
+
+/**
+ * fsmc_nand_platform_data - platform specific NAND controller config
+ * @partitions: partition table for the platform, use a default fallback
+ * if this is NULL
+ * @nr_partitions: the number of partitions in the previous entry
+ * @options: different options for the driver
+ * @width: bus width
+ * @bank: default bank
+ * @select_bank: callback to select a certain bank, this is
+ * platform-specific. If the controller only supports one bank
+ * this may be set to NULL
+ */
+struct fsmc_nand_platform_data {
+	struct mtd_partition	*partitions;
+	unsigned int		nr_partitions;
+	unsigned int		options;
+	unsigned int		width;
+	unsigned int		bank;
+	void			(*select_bank)(uint32_t bank, uint32_t busw);
+};
+
+extern int __init fsmc_nor_init(struct platform_device *pdev,
+		unsigned long base, uint32_t bank, uint32_t width);
+extern void __init fsmc_init_board_info(struct platform_device *pdev,
+		struct mtd_partition *partitions, unsigned int nr_partitions,
+		unsigned int width);
+
+#endif /* __MTD_FSMC_H */
-- 
cgit v1.2.3


From 5daa7b21496aebf057c12be03038e7220e33353b Mon Sep 17 00:00:00 2001
From: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Date: Fri, 17 Sep 2010 13:31:41 +0300
Subject: mtd: prepare partition add and del functions for ioctl requests

mtd_is_master, mtd_add_partition and mtd_del_partition functions
are added to give the possibility of partition manipulation
by ioctl request.

The old partition add function is modified to fit the dynamic
allocation.

Signed-off-by: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c          | 154 ++++++++++++++++++++++++++++++++++++-----
 include/linux/mtd/partitions.h |   5 ++
 2 files changed, 141 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index dc6558568876..79e3689f1e16 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -29,9 +29,11 @@
 #include <linux/kmod.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/err.h>
 
 /* Our partition linked list */
 static LIST_HEAD(mtd_partitions);
+static DEFINE_MUTEX(mtd_partitions_mutex);
 
 /* Our partition node structure */
 struct mtd_part {
@@ -326,6 +328,12 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return res;
 }
 
+static inline void free_partition(struct mtd_part *p)
+{
+	kfree(p->mtd.name);
+	kfree(p);
+}
+
 /*
  * This function unregisters and destroy all slave MTD objects which are
  * attached to the given master MTD object.
@@ -334,33 +342,42 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 int del_mtd_partitions(struct mtd_info *master)
 {
 	struct mtd_part *slave, *next;
+	int ret, err = 0;
 
+	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
 		if (slave->master == master) {
+			ret = del_mtd_device(&slave->mtd);
+			if (ret < 0) {
+				err = ret;
+				continue;
+			}
 			list_del(&slave->list);
-			del_mtd_device(&slave->mtd);
-			kfree(slave);
+			free_partition(slave);
 		}
+	mutex_unlock(&mtd_partitions_mutex);
 
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(del_mtd_partitions);
 
-static struct mtd_part *add_one_partition(struct mtd_info *master,
-		const struct mtd_partition *part, int partno,
-		uint64_t cur_offset)
+static struct mtd_part *allocate_partition(struct mtd_info *master,
+			const struct mtd_partition *part, int partno,
+			uint64_t cur_offset)
 {
 	struct mtd_part *slave;
+	char *name;
 
 	/* allocate the partition structure */
 	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
-	if (!slave) {
+	name = kstrdup(part->name, GFP_KERNEL);
+	if (!name || !slave) {
 		printk(KERN_ERR"memory allocation error while creating partitions for \"%s\"\n",
-			master->name);
-		del_mtd_partitions(master);
-		return NULL;
+		       master->name);
+		kfree(name);
+		kfree(slave);
+		return ERR_PTR(-ENOMEM);
 	}
-	list_add(&slave->list, &mtd_partitions);
 
 	/* set up the MTD object for this partition */
 	slave->mtd.type = master->type;
@@ -371,7 +388,7 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
 	slave->mtd.oobavail = master->oobavail;
 	slave->mtd.subpage_sft = master->subpage_sft;
 
-	slave->mtd.name = part->name;
+	slave->mtd.name = name;
 	slave->mtd.owner = master->owner;
 	slave->mtd.backing_dev_info = master->backing_dev_info;
 
@@ -518,12 +535,89 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
 	}
 
 out_register:
-	/* register our partition */
-	add_mtd_device(&slave->mtd);
-
 	return slave;
 }
 
+int mtd_add_partition(struct mtd_info *master, char *name,
+		      long long offset, long long length)
+{
+	struct mtd_partition part;
+	struct mtd_part *p, *new;
+	uint64_t start, end;
+	int ret = 0;
+
+	/* the direct offset is expected */
+	if (offset == MTDPART_OFS_APPEND ||
+	    offset == MTDPART_OFS_NXTBLK)
+		return -EINVAL;
+
+	if (length == MTDPART_SIZ_FULL)
+		length = master->size - offset;
+
+	if (length <= 0)
+		return -EINVAL;
+
+	part.name = name;
+	part.size = length;
+	part.offset = offset;
+	part.mask_flags = 0;
+	part.ecclayout = NULL;
+
+	new = allocate_partition(master, &part, -1, offset);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+
+	start = offset;
+	end = offset + length;
+
+	mutex_lock(&mtd_partitions_mutex);
+	list_for_each_entry(p, &mtd_partitions, list)
+		if (p->master == master) {
+			if ((start >= p->offset) &&
+			    (start < (p->offset + p->mtd.size)))
+				goto err_inv;
+
+			if ((end >= p->offset) &&
+			    (end < (p->offset + p->mtd.size)))
+				goto err_inv;
+		}
+
+	list_add(&new->list, &mtd_partitions);
+	mutex_unlock(&mtd_partitions_mutex);
+
+	add_mtd_device(&new->mtd);
+
+	return ret;
+err_inv:
+	mutex_unlock(&mtd_partitions_mutex);
+	free_partition(new);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mtd_add_partition);
+
+int mtd_del_partition(struct mtd_info *master, int partno)
+{
+	struct mtd_part *slave, *next;
+	int ret = -EINVAL;
+
+	mutex_lock(&mtd_partitions_mutex);
+	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
+		if ((slave->master == master) &&
+		    (slave->mtd.index == partno)) {
+			ret = del_mtd_device(&slave->mtd);
+			if (ret < 0)
+				break;
+
+			list_del(&slave->list);
+			free_partition(slave);
+			break;
+		}
+	mutex_unlock(&mtd_partitions_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mtd_del_partition);
+
 /*
  * This function, given a master MTD object and a partition table, creates
  * and registers slave MTD objects which are bound to the master according to
@@ -544,9 +638,16 @@ int add_mtd_partitions(struct mtd_info *master,
 	printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
 
 	for (i = 0; i < nbparts; i++) {
-		slave = add_one_partition(master, parts + i, i, cur_offset);
-		if (!slave)
-			return -ENOMEM;
+		slave = allocate_partition(master, parts + i, i, cur_offset);
+		if (IS_ERR(slave))
+			return PTR_ERR(slave);
+
+		mutex_lock(&mtd_partitions_mutex);
+		list_add(&slave->list, &mtd_partitions);
+		mutex_unlock(&mtd_partitions_mutex);
+
+		add_mtd_device(&slave->mtd);
+
 		cur_offset = slave->offset + slave->mtd.size;
 	}
 
@@ -618,3 +719,20 @@ int parse_mtd_partitions(struct mtd_info *master, const char **types,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(parse_mtd_partitions);
+
+int mtd_is_master(struct mtd_info *mtd)
+{
+	struct mtd_part *part;
+	int nopart = 0;
+
+	mutex_lock(&mtd_partitions_mutex);
+	list_for_each_entry(part, &mtd_partitions, list)
+		if (&part->mtd == mtd) {
+			nopart = 1;
+			break;
+		}
+	mutex_unlock(&mtd_partitions_mutex);
+
+	return nopart;
+}
+EXPORT_SYMBOL_GPL(mtd_is_master);
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 930c8ac198db..2b54316591d2 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -89,4 +89,9 @@ static inline int mtd_has_cmdlinepart(void) { return 1; }
 static inline int mtd_has_cmdlinepart(void) { return 0; }
 #endif
 
+int mtd_is_master(struct mtd_info *mtd);
+int mtd_add_partition(struct mtd_info *master, char *name,
+		      long long offset, long long length);
+int mtd_del_partition(struct mtd_info *master, int partno);
+
 #endif
-- 
cgit v1.2.3


From 12a40a57f762f569f58a393437d8c13864db390a Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Mon, 27 Sep 2010 10:43:53 +0800
Subject: mtd: add init_size hook for NAND driver

Not all the NAND devices have all the information in additional
id bytes.

So add a hook in the nand_chip{} is a good method to calculate the
right value of oobsize, erasesize and so on.

Without the hook,you will get the wrong value, and you have to hack
in the ->scan_bbt() to change the wrong value which make the code
mess.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 6 ++++--
 include/linux/mtd/nand.h     | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0b70c175999c..53f4e41836f1 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2976,8 +2976,10 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
-	/* Newer devices have all the information in additional id bytes */
-	if (!type->pagesize) {
+	if (!type->pagesize && chip->init_size) {
+		/* set the pagesize, oobsize, erasesize by the driver*/
+		busw = chip->init_size(mtd, chip, id_data);
+	} else if (!type->pagesize) {
 		int extid;
 		/* The 3rd id byte holds MLC / multichip data */
 		chip->cellinfo = id_data[2];
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7666c42235c7..0f744547a48c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -396,6 +396,10 @@ struct nand_buffers {
  * @block_markbad:	[REPLACEABLE] mark the block bad
  * @cmd_ctrl:		[BOARDSPECIFIC] hardwarespecific funtion for controlling
  *			ALE/CLE/nCE. Also used to write command and address
+ * @init_size:		[BOARDSPECIFIC] hardwarespecific funtion for setting
+ *			mtd->oobsize, mtd->writesize and so on.
+ *			@id_data contains the 8 bytes values of NAND_CMD_READID.
+ *			Return with the bus width.
  * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accesing device ready/busy line
  *			If set to NULL no access to ready/busy is available and the ready/busy information
  *			is read from the chip status register
@@ -452,6 +456,8 @@ struct nand_chip {
 	int		(*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void		(*cmd_ctrl)(struct mtd_info *mtd, int dat,
 				    unsigned int ctrl);
+	int		(*init_size)(struct mtd_info *mtd,
+					struct nand_chip *this, u8 *id_data);
 	int		(*dev_ready)(struct mtd_info *mtd);
 	void		(*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr);
 	int		(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
-- 
cgit v1.2.3


From 17a22826bbfeed27b10c0d8274fc19d5fc3951c3 Mon Sep 17 00:00:00 2001
From: Rohit Hassan Sathyanarayan <rohit.hs@samsung.com>
Date: Mon, 27 Sep 2010 16:02:03 +0530
Subject: mtd: Define MLC Flash as a different flash type

MLC NAND Flash has a different cell structure and differs in
functioning than the SLC NAND Flash. Hence we are considering it as
a different Flash type.

Signed-off-by: Rohit H.S <rohit.hs@samsung.com>
Signed-off-by: Raghav Gupta <gupta.raghav@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/mtd/mtd-abi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index a57c4cb7d6b2..2f7d45bcbd24 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -52,6 +52,7 @@ struct mtd_oob_buf64 {
 #define MTD_NANDFLASH		4
 #define MTD_DATAFLASH		6
 #define MTD_UBIVOLUME		7
+#define MTD_MLCNANDFLASH	8
 
 #define MTD_WRITEABLE		0x400	/* Device is writeable */
 #define MTD_BIT_WRITEABLE	0x800	/* Single bits can be flipped */
-- 
cgit v1.2.3


From 7cba7b14fe179969d7217cca52e28519d7d6ca89 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 30 Sep 2010 21:28:01 +0200
Subject: mtd: nand: add support for BBT without OOB

The first (sixt) byte in the OOB area contains vendor's bad block
information. During identification of the NAND chip this information is
collected by scanning the complete chip.
The option NAND_USE_FLASH_BBT is used to store this information in a sector so
we don't have to scan the complete flash. Unfortunately the code stores
a marker in order to recognize the BBT in the OOB area. This will fail
if the OOB area is completely used for ECC.
This patch introduces the option NAND_USE_FLASH_BBT_NO_OOB which has to be
used with NAND_USE_FLASH_BBT. It will then store BBT on flash without
touching the OOB area. The BBT format on flash remains same except the
first page starts with the recognition pattern followed by the version byte.
This change was tested in nandsim and it looks good so far :)

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_bbt.c | 228 +++++++++++++++++++++++++++++++++++++++-----
 include/linux/mtd/bbm.h     |   2 +
 include/linux/mtd/nand.h    |  11 ++-
 3 files changed, 213 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 587297e43554..1dcafb8f47dd 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -13,28 +13,37 @@
  * Description:
  *
  * When nand_scan_bbt is called, then it tries to find the bad block table
- * depending on the options in the bbt descriptor(s). If a bbt is found
- * then the contents are read and the memory based bbt is created. If a
- * mirrored bbt is selected then the mirror is searched too and the
- * versions are compared. If the mirror has a greater version number
- * than the mirror bbt is used to build the memory based bbt.
+ * depending on the options in the BBT descriptor(s). If no flash based BBT
+ * (NAND_USE_FLASH_BBT) is specified then the device is scanned for factory
+ * marked good / bad blocks. This information is used to create a memory BBT.
+ * Once a new bad block is discovered then the "factory" information is updated
+ * on the device.
+ * If a flash based BBT is specified then the function first tries to find the
+ * BBT on flash. If a BBT is found then the contents are read and the memory
+ * based BBT is created. If a mirrored BBT is selected then the mirror is
+ * searched too and the versions are compared. If the mirror has a greater
+ * version number than the mirror BBT is used to build the memory based BBT.
  * If the tables are not versioned, then we "or" the bad block information.
- * If one of the bbt's is out of date or does not exist it is (re)created.
- * If no bbt exists at all then the device is scanned for factory marked
+ * If one of the BBTs is out of date or does not exist it is (re)created.
+ * If no BBT exists at all then the device is scanned for factory marked
  * good / bad blocks and the bad block tables are created.
  *
- * For manufacturer created bbts like the one found on M-SYS DOC devices
- * the bbt is searched and read but never created
+ * For manufacturer created BBTs like the one found on M-SYS DOC devices
+ * the BBT is searched and read but never created
  *
- * The autogenerated bad block table is located in the last good blocks
+ * The auto generated bad block table is located in the last good blocks
  * of the device. The table is mirrored, so it can be updated eventually.
- * The table is marked in the oob area with an ident pattern and a version
- * number which indicates which of both tables is more up to date.
+ * The table is marked in the OOB area with an ident pattern and a version
+ * number which indicates which of both tables is more up to date. If the NAND
+ * controller needs the complete OOB area for the ECC information then the
+ * option NAND_USE_FLASH_BBT_NO_OOB should be used: it moves the ident pattern
+ * and the version byte into the data area and the OOB area will remain
+ * untouched.
  *
  * The table uses 2 bits per block
- * 11b: 	block is good
- * 00b: 	block is factory marked bad
- * 01b, 10b: 	block is marked bad due to wear
+ * 11b:		block is good
+ * 00b:		block is factory marked bad
+ * 01b, 10b:	block is marked bad due to wear
  *
  * The memory bad block table uses the following scheme:
  * 00b:		block is good
@@ -59,6 +68,16 @@
 #include <linux/delay.h>
 #include <linux/vmalloc.h>
 
+static int check_pattern_no_oob(uint8_t *buf, struct nand_bbt_descr *td)
+{
+	int ret;
+
+	ret = memcmp(buf, td->pattern, td->len);
+	if (!ret)
+		return ret;
+	return -1;
+}
+
 /**
  * check_pattern - [GENERIC] check if a pattern is in the buffer
  * @buf:	the buffer to search
@@ -77,6 +96,9 @@ static int check_pattern(uint8_t *buf, int len, int paglen, struct nand_bbt_desc
 	int i, end = 0;
 	uint8_t *p = buf;
 
+	if (td->options & NAND_BBT_NO_OOB)
+		return check_pattern_no_oob(buf, td);
+
 	end = paglen + td->offs;
 	if (td->options & NAND_BBT_SCANEMPTY) {
 		for (i = 0; i < end; i++) {
@@ -155,6 +177,25 @@ static int check_short_pattern(uint8_t *buf, struct nand_bbt_descr *td)
 	return 0;
 }
 
+/**
+ * add_marker_len - compute the length of the marker in data area
+ * @td:		BBT descriptor used for computation
+ *
+ * The length will be 0 if the markeris located in OOB area.
+ */
+static u32 add_marker_len(struct nand_bbt_descr *td)
+{
+	u32 len;
+
+	if (!(td->options & NAND_BBT_NO_OOB))
+		return 0;
+
+	len = td->len;
+	if (td->options & NAND_BBT_VERSION)
+		len++;
+	return len;
+}
+
 /**
  * read_bbt - [GENERIC] Read the bad block table starting from page
  * @mtd:	MTD device structure
@@ -176,13 +217,24 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num,
 	loff_t from;
 	int bits = td->options & NAND_BBT_NRBITS_MSK;
 	uint8_t msk = (uint8_t) ((1 << bits) - 1);
+	u32 marker_len;
 	int reserved_block_code = td->reserved_block_code;
 
 	totlen = (num * bits) >> 3;
+	marker_len = add_marker_len(td);
 	from = ((loff_t) page) << this->page_shift;
 
 	while (totlen) {
 		len = min(totlen, (size_t) (1 << this->bbt_erase_shift));
+		if (marker_len) {
+			/*
+			 * In case the BBT marker is not in the OOB area it
+			 * will be just in the first page.
+			 */
+			len -= marker_len;
+			from += marker_len;
+			marker_len = 0;
+		}
 		res = mtd->read(mtd, from, len, &retlen, buf);
 		if (res < 0) {
 			if (retlen != len) {
@@ -260,10 +312,26 @@ static int read_abs_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_desc
 	return 0;
 }
 
+/*
+ * BBT marker is in the first page, no OOB.
+ */
+static int scan_read_raw_data(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
+			 struct nand_bbt_descr *td)
+{
+	size_t retlen;
+	size_t len;
+
+	len = td->len;
+	if (td->options & NAND_BBT_VERSION)
+		len++;
+
+	return mtd->read(mtd, offs, len, &retlen, buf);
+}
+
 /*
  * Scan read raw data from flash
  */
-static int scan_read_raw(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
+static int scan_read_raw_oob(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 			 size_t len)
 {
 	struct mtd_oob_ops ops;
@@ -296,6 +364,15 @@ static int scan_read_raw(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 	return 0;
 }
 
+static int scan_read_raw(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
+			 size_t len, struct nand_bbt_descr *td)
+{
+	if (td->options & NAND_BBT_NO_OOB)
+		return scan_read_raw_data(mtd, buf, offs, td);
+	else
+		return scan_read_raw_oob(mtd, buf, offs, len);
+}
+
 /*
  * Scan write data with oob to flash
  */
@@ -314,6 +391,15 @@ static int scan_write_bbt(struct mtd_info *mtd, loff_t offs, size_t len,
 	return mtd->write_oob(mtd, offs, &ops);
 }
 
+static u32 bbt_get_ver_offs(struct mtd_info *mtd, struct nand_bbt_descr *td)
+{
+	u32 ver_offs = td->veroffs;
+
+	if (!(td->options & NAND_BBT_NO_OOB))
+		ver_offs += mtd->writesize;
+	return ver_offs;
+}
+
 /**
  * read_abs_bbts - [GENERIC] Read the bad block table(s) for all chips starting at a given page
  * @mtd:	MTD device structure
@@ -333,8 +419,8 @@ static int read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
 	/* Read the primary version, if available */
 	if (td->options & NAND_BBT_VERSION) {
 		scan_read_raw(mtd, buf, (loff_t)td->pages[0] << this->page_shift,
-			      mtd->writesize);
-		td->version[0] = buf[mtd->writesize + td->veroffs];
+			      mtd->writesize, td);
+		td->version[0] = buf[bbt_get_ver_offs(mtd, td)];
 		printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n",
 		       td->pages[0], td->version[0]);
 	}
@@ -342,8 +428,8 @@ static int read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
 	/* Read the mirror version, if available */
 	if (md && (md->options & NAND_BBT_VERSION)) {
 		scan_read_raw(mtd, buf, (loff_t)md->pages[0] << this->page_shift,
-			      mtd->writesize);
-		md->version[0] = buf[mtd->writesize + md->veroffs];
+			      mtd->writesize, td);
+		md->version[0] = buf[bbt_get_ver_offs(mtd, md)];
 		printk(KERN_DEBUG "Bad block table at page %d, version 0x%02X\n",
 		       md->pages[0], md->version[0]);
 	}
@@ -359,7 +445,7 @@ static int scan_block_full(struct mtd_info *mtd, struct nand_bbt_descr *bd,
 {
 	int ret, j;
 
-	ret = scan_read_raw(mtd, buf, offs, readlen);
+	ret = scan_read_raw_oob(mtd, buf, offs, readlen);
 	if (ret)
 		return ret;
 
@@ -466,6 +552,8 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
 	for (i = startblock; i < numblocks;) {
 		int ret;
 
+		BUG_ON(bd->options & NAND_BBT_NO_OOB);
+
 		if (bd->options & NAND_BBT_SCANALLPAGES)
 			ret = scan_block_full(mtd, bd, from, buf, readlen,
 					      scanlen, len);
@@ -547,11 +635,12 @@ static int search_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 			loff_t offs = (loff_t)actblock << this->bbt_erase_shift;
 
 			/* Read first page */
-			scan_read_raw(mtd, buf, offs, mtd->writesize);
+			scan_read_raw(mtd, buf, offs, mtd->writesize, td);
 			if (!check_pattern(buf, scanlen, mtd->writesize, td)) {
 				td->pages[i] = actblock << blocktopage;
 				if (td->options & NAND_BBT_VERSION) {
-					td->version[i] = buf[mtd->writesize + td->veroffs];
+					offs = bbt_get_ver_offs(mtd, td);
+					td->version[i] = buf[offs];
 				}
 				break;
 			}
@@ -735,6 +824,21 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 			memset(&buf[offs], 0xff, (size_t) (numblocks >> sft));
 			ooboffs = len + (pageoffs * mtd->oobsize);
 
+		} else if (td->options & NAND_BBT_NO_OOB) {
+			ooboffs = 0;
+			offs = td->len;
+			/* the version byte */
+			if (td->options & NAND_BBT_VERSION)
+				offs++;
+			/* Calc length */
+			len = (size_t) (numblocks >> sft);
+			len += offs;
+			/* Make it page aligned ! */
+			len = ALIGN(len, mtd->writesize);
+			/* Preset the buffer with 0xff */
+			memset(buf, 0xff, len);
+			/* Pattern is located at the begin of first page */
+			memcpy(buf, td->pattern, td->len);
 		} else {
 			/* Calc length */
 			len = (size_t) (numblocks >> sft);
@@ -773,7 +877,9 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 		if (res < 0)
 			goto outerr;
 
-		res = scan_write_bbt(mtd, to, len, buf, &buf[len]);
+		res = scan_write_bbt(mtd, to, len, buf,
+				td->options & NAND_BBT_NO_OOB ? NULL :
+				&buf[len]);
 		if (res < 0)
 			goto outerr;
 
@@ -983,6 +1089,49 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td)
 	}
 }
 
+/**
+ * verify_bbt_descr - verify the bad block description
+ * @bd:			the table to verify
+ *
+ * This functions performs a few sanity checks on the bad block description
+ * table.
+ */
+static void verify_bbt_descr(struct mtd_info *mtd, struct nand_bbt_descr *bd)
+{
+	struct nand_chip *this = mtd->priv;
+	u32 pattern_len = bd->len;
+	u32 bits = bd->options & NAND_BBT_NRBITS_MSK;
+	u32 table_size;
+
+	if (!bd)
+		return;
+	BUG_ON((this->options & NAND_USE_FLASH_BBT_NO_OOB) &&
+			!(this->options & NAND_USE_FLASH_BBT));
+	BUG_ON(!bits);
+
+	if (bd->options & NAND_BBT_VERSION)
+		pattern_len++;
+
+	if (bd->options & NAND_BBT_NO_OOB) {
+		BUG_ON(!(this->options & NAND_USE_FLASH_BBT));
+		BUG_ON(!(this->options & NAND_USE_FLASH_BBT_NO_OOB));
+		BUG_ON(bd->offs);
+		if (bd->options & NAND_BBT_VERSION)
+			BUG_ON(bd->veroffs != bd->len);
+		BUG_ON(bd->options & NAND_BBT_SAVECONTENT);
+	}
+
+	if (bd->options & NAND_BBT_PERCHIP)
+		table_size = this->chipsize >> this->bbt_erase_shift;
+	else
+		table_size = mtd->size >> this->bbt_erase_shift;
+	table_size >>= 3;
+	table_size *= bits;
+	if (bd->options & NAND_BBT_NO_OOB)
+		table_size += pattern_len;
+	BUG_ON(table_size > (1 << this->bbt_erase_shift));
+}
+
 /**
  * nand_scan_bbt - [NAND Interface] scan, find, read and maybe create bad block table(s)
  * @mtd:	MTD device structure
@@ -1024,6 +1173,8 @@ int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 		}
 		return res;
 	}
+	verify_bbt_descr(mtd, td);
+	verify_bbt_descr(mtd, md);
 
 	/* Allocate a temporary buffer for one eraseblock incl. oob */
 	len = (1 << this->bbt_erase_shift);
@@ -1167,6 +1318,26 @@ static struct nand_bbt_descr bbt_mirror_descr = {
 	.pattern = mirror_pattern
 };
 
+static struct nand_bbt_descr bbt_main_no_bbt_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
+		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP
+		| NAND_BBT_NO_OOB,
+	.len = 4,
+	.veroffs = 4,
+	.maxblocks = 4,
+	.pattern = bbt_pattern
+};
+
+static struct nand_bbt_descr bbt_mirror_no_bbt_descr = {
+	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
+		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP
+		| NAND_BBT_NO_OOB,
+	.len = 4,
+	.veroffs = 4,
+	.maxblocks = 4,
+	.pattern = mirror_pattern
+};
+
 #define BBT_SCAN_OPTIONS (NAND_BBT_SCANLASTPAGE | NAND_BBT_SCAN2NDPAGE | \
 		NAND_BBT_SCANBYTE1AND6)
 /**
@@ -1237,8 +1408,13 @@ int nand_default_bbt(struct mtd_info *mtd)
 	if (this->options & NAND_USE_FLASH_BBT) {
 		/* Use the default pattern descriptors */
 		if (!this->bbt_td) {
-			this->bbt_td = &bbt_main_descr;
-			this->bbt_md = &bbt_mirror_descr;
+			if (this->options & NAND_USE_FLASH_BBT_NO_OOB) {
+				this->bbt_td = &bbt_main_no_bbt_descr;
+				this->bbt_md = &bbt_mirror_no_bbt_descr;
+			} else {
+				this->bbt_td = &bbt_main_descr;
+				this->bbt_md = &bbt_mirror_descr;
+			}
 		}
 		if (!this->badblock_pattern) {
 			this->badblock_pattern = (mtd->writesize > 512) ? &largepage_flashbased : &smallpage_flashbased;
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 7fa20beb2ab9..ba15b525987d 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -102,6 +102,8 @@ struct nand_bbt_descr {
 #define NAND_BBT_SCANBYTE1AND6 0x00100000
 /* The nand_bbt_descr was created dynamicaly and must be freed */
 #define NAND_BBT_DYNAMICSTRUCT 0x00200000
+/* The bad block table does not OOB for marker */
+#define NAND_BBT_NO_OOB		0x00400000
 
 /* The maximum number of blocks to scan for a bbt */
 #define NAND_BBT_SCAN_MAXBLOCKS	4
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 0f744547a48c..c0464d41f539 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -206,8 +206,10 @@ typedef enum {
 #define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
 
 /* Non chip related options */
-/* Use a flash based bad block table. This option is passed to the
- * default bad block table function. */
+/*
+ * Use a flash based bad block table. OOB identifier is saved in OOB area.
+ * This option is passed to the default bad block table function.
+ */
 #define NAND_USE_FLASH_BBT	0x00010000
 /* This option skips the bbt scan during initialization. */
 #define NAND_SKIP_BBTSCAN	0x00020000
@@ -216,6 +218,11 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00040000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00080000
+/*
+ * If passed additionally to NAND_USE_FLASH_BBT then BBT code will not touch
+ * the OOB area.
+ */
+#define NAND_USE_FLASH_BBT_NO_OOB	0x00100000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3


From 453281a973c10bce941b240d1c654d536623b16b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 1 Oct 2010 21:37:37 +0200
Subject: mtd: nand: introduce NAND_CREATE_EMPTY_BBT

it will create an empty BBT table without considering vendor's BBT
information. Vendor's information may be unavailable if the NAND
controller has a different DATA & OOB layout or this information may be
allready purged.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_bbt.c | 3 ++-
 include/linux/mtd/bbm.h     | 2 +-
 include/linux/mtd/nand.h    | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 1dcafb8f47dd..586b981f0e61 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -999,7 +999,8 @@ static int check_create(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_desc
 			continue;
 
 		/* Create the table in memory by scanning the chip(s) */
-		create_bbt(mtd, buf, bd, chipsel);
+		if (!(this->options & NAND_CREATE_EMPTY_BBT))
+			create_bbt(mtd, buf, bd, chipsel);
 
 		td->version[i] = 1;
 		if (md)
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index ba15b525987d..57cc0e63714f 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -84,7 +84,7 @@ struct nand_bbt_descr {
 #define NAND_BBT_PERCHIP	0x00000080
 /* bbt has a version counter at offset veroffs */
 #define NAND_BBT_VERSION	0x00000100
-/* Create a bbt if none axists */
+/* Create a bbt if none exists */
 #define NAND_BBT_CREATE		0x00000200
 /* Search good / bad pattern through all pages of a block */
 #define NAND_BBT_SCANALLPAGES	0x00000400
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c0464d41f539..023866572fb1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -223,6 +223,8 @@ typedef enum {
  * the OOB area.
  */
 #define NAND_USE_FLASH_BBT_NO_OOB	0x00100000
+/* Create an empty BBT with no vendor information if the BBT is available */
+#define NAND_CREATE_EMPTY_BBT		0x00200000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3


From a0491fc4d4b584ddc23f412e9a57b05d66a21263 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 5 Oct 2010 12:41:01 +0200
Subject: mtd: cleanup nand.h

- *var instead of * var
- proper multiline comment
- func(args) instead of func (args)
- 80 lines

So from
|total: 2 errors, 37 warnings, 654 lines checked
we got to one warning.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 155 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 99 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 023866572fb1..acd0b500f55a 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -27,15 +27,17 @@
 struct mtd_info;
 struct nand_flash_dev;
 /* Scan and identify a NAND device */
-extern int nand_scan (struct mtd_info *mtd, int max_chips);
-/* Separate phases of nand_scan(), allowing board driver to intervene
- * and override command or ECC setup according to flash type */
+extern int nand_scan(struct mtd_info *mtd, int max_chips);
+/*
+ * Separate phases of nand_scan(), allowing board driver to intervene
+ * and override command or ECC setup according to flash type.
+ */
 extern int nand_scan_ident(struct mtd_info *mtd, int max_chips,
 			   struct nand_flash_dev *table);
 extern int nand_scan_tail(struct mtd_info *mtd);
 
 /* Free resources held by the NAND device */
-extern void nand_release (struct mtd_info *mtd);
+extern void nand_release(struct mtd_info *mtd);
 
 /* Internal helper for board drivers which need to override command function */
 extern void nand_wait_ready(struct mtd_info *mtd);
@@ -49,7 +51,8 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 /* The maximum number of NAND chips in an array */
 #define NAND_MAX_CHIPS		8
 
-/* This constant declares the max. oobsize / page, which
+/*
+ * This constant declares the max. oobsize / page, which
  * is supported now. If you add a chip with bigger oobsize/page
  * adjust this accordingly.
  */
@@ -153,9 +156,10 @@ typedef enum {
 #define NAND_GET_DEVICE		0x80
 
 
-/* Option constants for bizarre disfunctionality and real
-*  features
-*/
+/*
+ * Option constants for bizarre disfunctionality and real
+ * features.
+ */
 /* Chip can not auto increment pages */
 #define NAND_NO_AUTOINCR	0x00000001
 /* Buswitdh is 16 bit */
@@ -166,19 +170,27 @@ typedef enum {
 #define NAND_CACHEPRG		0x00000008
 /* Chip has copy back function */
 #define NAND_COPYBACK		0x00000010
-/* AND Chip which has 4 banks and a confusing page / block
- * assignment. See Renesas datasheet for further information */
+/*
+ * AND Chip which has 4 banks and a confusing page / block
+ * assignment. See Renesas datasheet for further information.
+ */
 #define NAND_IS_AND		0x00000020
-/* Chip has a array of 4 pages which can be read without
- * additional ready /busy waits */
+/*
+ * Chip has a array of 4 pages which can be read without
+ * additional ready /busy waits.
+ */
 #define NAND_4PAGE_ARRAY	0x00000040
-/* Chip requires that BBT is periodically rewritten to prevent
+/*
+ * Chip requires that BBT is periodically rewritten to prevent
  * bits from adjacent blocks from 'leaking' in altering data.
- * This happens with the Renesas AG-AND chips, possibly others.  */
+ * This happens with the Renesas AG-AND chips, possibly others.
+ */
 #define BBT_AUTO_REFRESH	0x00000080
-/* Chip does not require ready check on read. True
+/*
+ * Chip does not require ready check on read. True
  * for all large page devices, as they do not support
- * autoincrement.*/
+ * autoincrement.
+ */
 #define NAND_NO_READRDY		0x00000100
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
@@ -213,8 +225,10 @@ typedef enum {
 #define NAND_USE_FLASH_BBT	0x00010000
 /* This option skips the bbt scan during initialization. */
 #define NAND_SKIP_BBTSCAN	0x00020000
-/* This option is defined if the board driver allocates its own buffers
-   (e.g. because it needs them DMA-coherent */
+/*
+ * This option is defined if the board driver allocates its own buffers
+ * (e.g. because it needs them DMA-coherent).
+ */
 #define NAND_OWN_BUFFERS	0x00040000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00080000
@@ -304,8 +318,9 @@ struct nand_onfi_params {
  * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
  * @active:		the mtd device which holds the controller currently
- * @wq:			wait queue to sleep on if a NAND operation is in progress
- *                      used instead of the per chip wait queue when a hw controller is available
+ * @wq:			wait queue to sleep on if a NAND operation is in
+ *			progress used instead of the per chip wait queue
+ *			when a hw controller is available.
  */
 struct nand_hw_control {
 	spinlock_t	 lock;
@@ -329,9 +344,11 @@ struct nand_hw_control {
  * @correct:	function for ecc correction, matching to ecc generator (sw/hw)
  * @read_page_raw:	function to read a raw page without ECC
  * @write_page_raw:	function to write a raw page without ECC
- * @read_page:	function to read a page according to the ecc generator requirements
+ * @read_page:	function to read a page according to the ecc generator
+ *		requirements.
  * @read_subpage:	function to read parts of the page covered by ECC.
- * @write_page:	function to write a page according to the ecc generator requirements
+ * @write_page:	function to write a page according to the ecc generator
+ *		requirements.
  * @read_oob:	function to read chip OOB data
  * @write_oob:	function to write chip OOB data
  */
@@ -393,13 +410,16 @@ struct nand_buffers {
 
 /**
  * struct nand_chip - NAND Private Flash Chip Data
- * @IO_ADDR_R:		[BOARDSPECIFIC] address to read the 8 I/O lines of the flash device
- * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the flash device
+ * @IO_ADDR_R:		[BOARDSPECIFIC] address to read the 8 I/O lines of the
+ *			flash device
+ * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the
+ *			flash device.
  * @read_byte:		[REPLACEABLE] read one byte from the chip
  * @read_word:		[REPLACEABLE] read one word from the chip
  * @write_buf:		[REPLACEABLE] write data from the buffer to the chip
  * @read_buf:		[REPLACEABLE] read data from the chip into the buffer
- * @verify_buf:		[REPLACEABLE] verify buffer contents against the chip data
+ * @verify_buf:		[REPLACEABLE] verify buffer contents against the chip
+ *			data.
  * @select_chip:	[REPLACEABLE] select chip nr
  * @block_bad:		[REPLACEABLE] check, if the block is bad
  * @block_markbad:	[REPLACEABLE] mark the block bad
@@ -409,45 +429,60 @@ struct nand_buffers {
  *			mtd->oobsize, mtd->writesize and so on.
  *			@id_data contains the 8 bytes values of NAND_CMD_READID.
  *			Return with the bus width.
- * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accesing device ready/busy line
- *			If set to NULL no access to ready/busy is available and the ready/busy information
- *			is read from the chip status register
- * @cmdfunc:		[REPLACEABLE] hardwarespecific function for writing commands to the chip
- * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on ready
+ * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accesing
+ *			device ready/busy line. If set to NULL no access to
+ *			ready/busy is available and the ready/busy information
+ *			is read from the chip status register.
+ * @cmdfunc:		[REPLACEABLE] hardwarespecific function for writing
+ *			commands to the chip.
+ * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on
+ *			ready.
  * @ecc:		[BOARDSPECIFIC] ecc control ctructure
  * @buffers:		buffer structure for read/write
  * @hwcontrol:		platform-specific hardware control structure
  * @ops:		oob operation operands
- * @erase_cmd:		[INTERN] erase command write function, selectable due to AND support
+ * @erase_cmd:		[INTERN] erase command write function, selectable due
+ *			to AND support.
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
- * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR)
+ * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transfering
+ *			data from array to read regs (tR).
  * @state:		[INTERN] the current state of the NAND device
  * @oob_poi:		poison value buffer
- * @page_shift:		[INTERN] number of address bits in a page (column address bits)
+ * @page_shift:		[INTERN] number of address bits in a page (column
+ *			address bits).
  * @phys_erase_shift:	[INTERN] number of address bits in a physical eraseblock
  * @bbt_erase_shift:	[INTERN] number of address bits in a bbt entry
  * @chip_shift:		[INTERN] number of address bits in one chip
- * @options:		[BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about
- *			special functionality. See the defines for further explanation
- * @badblockpos:	[INTERN] position of the bad block marker in the oob area
+ * @options:		[BOARDSPECIFIC] various chip options. They can partly
+ *			be set to inform nand_scan about special functionality.
+ *			See the defines for further explanation.
+ * @badblockpos:	[INTERN] position of the bad block marker in the oob
+ *			area.
  * @cellinfo:		[INTERN] MLC/multichip data from chip ident
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
- * @pagebuf:		[INTERN] holds the pagenumber which is currently in data_buf
+ * @pagebuf:		[INTERN] holds the pagenumber which is currently in
+ *			data_buf.
  * @subpagesize:	[INTERN] holds the subpagesize
- * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded), non 0 if ONFI supported
- * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is supported, 0 otherwise
+ * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded),
+ *			non 0 if ONFI supported.
+ * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
+ *			supported, 0 otherwise.
  * @ecclayout:		[REPLACEABLE] the default ecc placement scheme
  * @bbt:		[INTERN] bad block table pointer
- * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash lookup
+ * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
+ *			lookup.
  * @bbt_md:		[REPLACEABLE] bad block table mirror descriptor
- * @badblock_pattern:	[REPLACEABLE] bad block scan pattern used for initial bad block scan
- * @controller:		[REPLACEABLE] a pointer to a hardware controller structure
- *			which is shared among multiple independend devices
+ * @badblock_pattern:	[REPLACEABLE] bad block scan pattern used for initial
+ *			bad block scan.
+ * @controller:		[REPLACEABLE] a pointer to a hardware controller
+ *			structure which is shared among multiple independend
+ *			devices.
  * @priv:		[OPTIONAL] pointer to private chip date
- * @errstat:		[OPTIONAL] hardware specific function to perform additional error status checks
- *			(determine if errors are correctable)
+ * @errstat:		[OPTIONAL] hardware specific function to perform
+ *			additional error status checks (determine if errors are
+ *			correctable).
  * @write_page:		[REPLACEABLE] High-level page write function
  */
 
@@ -457,24 +492,32 @@ struct nand_chip {
 
 	uint8_t		(*read_byte)(struct mtd_info *mtd);
 	u16		(*read_word)(struct mtd_info *mtd);
-	void		(*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
-	void		(*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
-	int		(*verify_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
+	void		(*write_buf)(struct mtd_info *mtd, const uint8_t *buf,
+			int len);
+	void		(*read_buf)(struct mtd_info *mtd, uint8_t *buf,
+			int len);
+	int		(*verify_buf)(struct mtd_info *mtd, const uint8_t *buf,
+			int len);
 	void		(*select_chip)(struct mtd_info *mtd, int chip);
-	int		(*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip);
+	int		(*block_bad)(struct mtd_info *mtd, loff_t ofs,
+			int getchip);
 	int		(*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void		(*cmd_ctrl)(struct mtd_info *mtd, int dat,
 				    unsigned int ctrl);
 	int		(*init_size)(struct mtd_info *mtd,
 					struct nand_chip *this, u8 *id_data);
 	int		(*dev_ready)(struct mtd_info *mtd);
-	void		(*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr);
-	int		(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
+	void		(*cmdfunc)(struct mtd_info *mtd, unsigned command,
+			int column, int page_addr);
+	int		(*waitfunc)(struct mtd_info *mtd,
+			struct nand_chip *this);
 	void		(*erase_cmd)(struct mtd_info *mtd, int page);
 	int		(*scan_bbt)(struct mtd_info *mtd);
-	int		(*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page);
-	int		(*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-				      const uint8_t *buf, int page, int cached, int raw);
+	int		(*errstat)(struct mtd_info *mtd, struct nand_chip *this,
+			int state, int status, int page);
+	int		(*write_page)(struct mtd_info *mtd,
+			struct nand_chip *chip, const uint8_t *buf, int page,
+			int cached, int raw);
 
 	int		chip_delay;
 	unsigned int	options;
@@ -557,7 +600,7 @@ struct nand_flash_dev {
 */
 struct nand_manufacturers {
 	int id;
-	char * name;
+	char *name;
 };
 
 extern struct nand_flash_dev nand_flash_ids[];
@@ -570,7 +613,7 @@ extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 			   int allowbbt);
 extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
-			size_t * retlen, uint8_t * buf);
+			size_t *retlen, uint8_t *buf);
 
 /**
  * struct platform_nand_chip - chip level device structure
-- 
cgit v1.2.3


From b46daf7eb1a143169699a8f9df634aa751a6ddde Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 7 Oct 2010 21:48:27 +0200
Subject: mtd: remove junk tabs in nand.h

Remove tabs between type and name.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 315 ++++++++++++++++++++++-------------------------
 1 file changed, 147 insertions(+), 168 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index acd0b500f55a..63e17d01fde9 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -253,61 +253,62 @@ struct nand_chip;
 
 struct nand_onfi_params {
 	/* rev info and features block */
-	u8		sig[4]; /* 'O' 'N' 'F' 'I'  */
-	__le16		revision;
-	__le16		features;
-	__le16		opt_cmd;
-	u8		reserved[22];
+	/* 'O' 'N' 'F' 'I'  */
+	u8 sig[4];
+	__le16 revision;
+	__le16 features;
+	__le16 opt_cmd;
+	u8 reserved[22];
 
 	/* manufacturer information block */
-	char		manufacturer[12];
-	char		model[20];
-	u8		jedec_id;
-	__le16		date_code;
-	u8		reserved2[13];
+	char manufacturer[12];
+	char model[20];
+	u8 jedec_id;
+	__le16 date_code;
+	u8 reserved2[13];
 
 	/* memory organization block */
-	__le32		byte_per_page;
-	__le16		spare_bytes_per_page;
-	__le32		data_bytes_per_ppage;
-	__le16		spare_bytes_per_ppage;
-	__le32		pages_per_block;
-	__le32		blocks_per_lun;
-	u8		lun_count;
-	u8		addr_cycles;
-	u8		bits_per_cell;
-	__le16		bb_per_lun;
-	__le16		block_endurance;
-	u8		guaranteed_good_blocks;
-	__le16		guaranteed_block_endurance;
-	u8		programs_per_page;
-	u8		ppage_attr;
-	u8		ecc_bits;
-	u8		interleaved_bits;
-	u8		interleaved_ops;
-	u8		reserved3[13];
+	__le32 byte_per_page;
+	__le16 spare_bytes_per_page;
+	__le32 data_bytes_per_ppage;
+	__le16 spare_bytes_per_ppage;
+	__le32 pages_per_block;
+	__le32 blocks_per_lun;
+	u8 lun_count;
+	u8 addr_cycles;
+	u8 bits_per_cell;
+	__le16 bb_per_lun;
+	__le16 block_endurance;
+	u8 guaranteed_good_blocks;
+	__le16 guaranteed_block_endurance;
+	u8 programs_per_page;
+	u8 ppage_attr;
+	u8 ecc_bits;
+	u8 interleaved_bits;
+	u8 interleaved_ops;
+	u8 reserved3[13];
 
 	/* electrical parameter block */
-	u8		io_pin_capacitance_max;
-	__le16		async_timing_mode;
-	__le16		program_cache_timing_mode;
-	__le16		t_prog;
-	__le16		t_bers;
-	__le16		t_r;
-	__le16		t_ccs;
-	__le16		src_sync_timing_mode;
-	__le16		src_ssync_features;
-	__le16		clk_pin_capacitance_typ;
-	__le16		io_pin_capacitance_typ;
-	__le16		input_pin_capacitance_typ;
-	u8		input_pin_capacitance_max;
-	u8		driver_strenght_support;
-	__le16		t_int_r;
-	__le16		t_ald;
-	u8		reserved4[7];
+	u8 io_pin_capacitance_max;
+	__le16 async_timing_mode;
+	__le16 program_cache_timing_mode;
+	__le16 t_prog;
+	__le16 t_bers;
+	__le16 t_r;
+	__le16 t_ccs;
+	__le16 src_sync_timing_mode;
+	__le16 src_ssync_features;
+	__le16 clk_pin_capacitance_typ;
+	__le16 io_pin_capacitance_typ;
+	__le16 input_pin_capacitance_typ;
+	u8 input_pin_capacitance_max;
+	u8 driver_strenght_support;
+	__le16 t_int_r;
+	__le16 t_ald;
+	u8 reserved4[7];
 
 	/* vendor */
-	u8		reserved5[90];
+	u8 reserved5[90];
 
 	__le16 crc;
 } __attribute__((packed));
@@ -323,7 +324,7 @@ struct nand_onfi_params {
  *			when a hw controller is available.
  */
 struct nand_hw_control {
-	spinlock_t	 lock;
+	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
 };
@@ -353,44 +354,33 @@ struct nand_hw_control {
  * @write_oob:	function to write chip OOB data
  */
 struct nand_ecc_ctrl {
-	nand_ecc_modes_t	mode;
-	int			steps;
-	int			size;
-	int			bytes;
-	int			total;
-	int			prepad;
-	int			postpad;
+	nand_ecc_modes_t mode;
+	int steps;
+	int size;
+	int bytes;
+	int total;
+	int prepad;
+	int postpad;
 	struct nand_ecclayout	*layout;
-	void			(*hwctl)(struct mtd_info *mtd, int mode);
-	int			(*calculate)(struct mtd_info *mtd,
-					     const uint8_t *dat,
-					     uint8_t *ecc_code);
-	int			(*correct)(struct mtd_info *mtd, uint8_t *dat,
-					   uint8_t *read_ecc,
-					   uint8_t *calc_ecc);
-	int			(*read_page_raw)(struct mtd_info *mtd,
-						 struct nand_chip *chip,
-						 uint8_t *buf, int page);
-	void			(*write_page_raw)(struct mtd_info *mtd,
-						  struct nand_chip *chip,
-						  const uint8_t *buf);
-	int			(*read_page)(struct mtd_info *mtd,
-					     struct nand_chip *chip,
-					     uint8_t *buf, int page);
-	int			(*read_subpage)(struct mtd_info *mtd,
-					     struct nand_chip *chip,
-					     uint32_t offs, uint32_t len,
-					     uint8_t *buf);
-	void			(*write_page)(struct mtd_info *mtd,
-					      struct nand_chip *chip,
-					      const uint8_t *buf);
-	int			(*read_oob)(struct mtd_info *mtd,
-					    struct nand_chip *chip,
-					    int page,
-					    int sndcmd);
-	int			(*write_oob)(struct mtd_info *mtd,
-					     struct nand_chip *chip,
-					     int page);
+	void (*hwctl)(struct mtd_info *mtd, int mode);
+	int (*calculate)(struct mtd_info *mtd, const uint8_t *dat,
+			uint8_t *ecc_code);
+	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
+			uint8_t *calc_ecc);
+	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
+			uint8_t *buf, int page);
+	void (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf);
+	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
+			uint8_t *buf, int page);
+	int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
+			uint32_t offs, uint32_t len, uint8_t *buf);
+	void (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf);
+	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page,
+			int sndcmd);
+	int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip,
+			int page);
 };
 
 /**
@@ -487,62 +477,55 @@ struct nand_buffers {
  */
 
 struct nand_chip {
-	void  __iomem	*IO_ADDR_R;
-	void  __iomem	*IO_ADDR_W;
-
-	uint8_t		(*read_byte)(struct mtd_info *mtd);
-	u16		(*read_word)(struct mtd_info *mtd);
-	void		(*write_buf)(struct mtd_info *mtd, const uint8_t *buf,
-			int len);
-	void		(*read_buf)(struct mtd_info *mtd, uint8_t *buf,
-			int len);
-	int		(*verify_buf)(struct mtd_info *mtd, const uint8_t *buf,
-			int len);
-	void		(*select_chip)(struct mtd_info *mtd, int chip);
-	int		(*block_bad)(struct mtd_info *mtd, loff_t ofs,
-			int getchip);
-	int		(*block_markbad)(struct mtd_info *mtd, loff_t ofs);
-	void		(*cmd_ctrl)(struct mtd_info *mtd, int dat,
-				    unsigned int ctrl);
-	int		(*init_size)(struct mtd_info *mtd,
-					struct nand_chip *this, u8 *id_data);
-	int		(*dev_ready)(struct mtd_info *mtd);
-	void		(*cmdfunc)(struct mtd_info *mtd, unsigned command,
-			int column, int page_addr);
-	int		(*waitfunc)(struct mtd_info *mtd,
-			struct nand_chip *this);
-	void		(*erase_cmd)(struct mtd_info *mtd, int page);
-	int		(*scan_bbt)(struct mtd_info *mtd);
-	int		(*errstat)(struct mtd_info *mtd, struct nand_chip *this,
-			int state, int status, int page);
-	int		(*write_page)(struct mtd_info *mtd,
-			struct nand_chip *chip, const uint8_t *buf, int page,
-			int cached, int raw);
-
-	int		chip_delay;
-	unsigned int	options;
-
-	int		page_shift;
-	int		phys_erase_shift;
-	int		bbt_erase_shift;
-	int		chip_shift;
-	int		numchips;
-	uint64_t	chipsize;
-	int		pagemask;
-	int		pagebuf;
-	int		subpagesize;
-	uint8_t		cellinfo;
-	int		badblockpos;
-	int		badblockbits;
-
-	int		onfi_version;
+	void __iomem *IO_ADDR_R;
+	void __iomem *IO_ADDR_W;
+
+	uint8_t (*read_byte)(struct mtd_info *mtd);
+	u16 (*read_word)(struct mtd_info *mtd);
+	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
+	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	int (*verify_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
+	void (*select_chip)(struct mtd_info *mtd, int chip);
+	int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip);
+	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
+	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
+	int (*init_size)(struct mtd_info *mtd, struct nand_chip *this,
+			u8 *id_data);
+	int (*dev_ready)(struct mtd_info *mtd);
+	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
+			int page_addr);
+	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
+	void (*erase_cmd)(struct mtd_info *mtd, int page);
+	int (*scan_bbt)(struct mtd_info *mtd);
+	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
+			int status, int page);
+	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int page, int cached, int raw);
+
+	int chip_delay;
+	unsigned int options;
+
+	int page_shift;
+	int phys_erase_shift;
+	int bbt_erase_shift;
+	int chip_shift;
+	int numchips;
+	uint64_t chipsize;
+	int pagemask;
+	int pagebuf;
+	int subpagesize;
+	uint8_t cellinfo;
+	int badblockpos;
+	int badblockbits;
+
+	int onfi_version;
 	struct nand_onfi_params	onfi_params;
 
-	flstate_t	state;
+	flstate_t state;
 
-	uint8_t		*oob_poi;
-	struct nand_hw_control  *controller;
-	struct nand_ecclayout	*ecclayout;
+	uint8_t *oob_poi;
+	struct nand_hw_control *controller;
+	struct nand_ecclayout *ecclayout;
 
 	struct nand_ecc_ctrl ecc;
 	struct nand_buffers *buffers;
@@ -550,13 +533,13 @@ struct nand_chip {
 
 	struct mtd_oob_ops ops;
 
-	uint8_t		*bbt;
-	struct nand_bbt_descr	*bbt_td;
-	struct nand_bbt_descr	*bbt_md;
+	uint8_t *bbt;
+	struct nand_bbt_descr *bbt_td;
+	struct nand_bbt_descr *bbt_md;
 
-	struct nand_bbt_descr	*badblock_pattern;
+	struct nand_bbt_descr *badblock_pattern;
 
-	void		*priv;
+	void *priv;
 };
 
 /*
@@ -629,17 +612,16 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
  * @priv:		hardware controller specific settings
  */
 struct platform_nand_chip {
-	int			nr_chips;
-	int			chip_offset;
-	int			nr_partitions;
-	struct mtd_partition	*partitions;
-	struct nand_ecclayout	*ecclayout;
-	int			chip_delay;
-	unsigned int		options;
-	const char		**part_probe_types;
-	void			(*set_parts)(uint64_t size,
-					struct platform_nand_chip *chip);
-	void			*priv;
+	int nr_chips;
+	int chip_offset;
+	int nr_partitions;
+	struct mtd_partition *partitions;
+	struct nand_ecclayout *ecclayout;
+	int chip_delay;
+	unsigned int options;
+	const char **part_probe_types;
+	void (*set_parts)(uint64_t size, struct platform_nand_chip *chip);
+	void *priv;
 };
 
 /* Keep gcc happy */
@@ -661,18 +643,15 @@ struct platform_device;
  * All fields are optional and depend on the hardware driver requirements
  */
 struct platform_nand_ctrl {
-	int		(*probe)(struct platform_device *pdev);
-	void		(*remove)(struct platform_device *pdev);
-	void		(*hwcontrol)(struct mtd_info *mtd, int cmd);
-	int		(*dev_ready)(struct mtd_info *mtd);
-	void		(*select_chip)(struct mtd_info *mtd, int chip);
-	void		(*cmd_ctrl)(struct mtd_info *mtd, int dat,
-				    unsigned int ctrl);
-	void		(*write_buf)(struct mtd_info *mtd,
-				    const uint8_t *buf, int len);
-	void		(*read_buf)(struct mtd_info *mtd,
-				    uint8_t *buf, int len);
-	void		*priv;
+	int (*probe)(struct platform_device *pdev);
+	void (*remove)(struct platform_device *pdev);
+	void (*hwcontrol)(struct mtd_info *mtd, int cmd);
+	int (*dev_ready)(struct mtd_info *mtd);
+	void (*select_chip)(struct mtd_info *mtd, int chip);
+	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
+	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
+	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	void *priv;
 };
 
 /**
@@ -681,8 +660,8 @@ struct platform_nand_ctrl {
  * @ctrl:		controller level device structure
  */
 struct platform_nand_data {
-	struct platform_nand_chip	chip;
-	struct platform_nand_ctrl	ctrl;
+	struct platform_nand_chip chip;
+	struct platform_nand_ctrl ctrl;
 };
 
 /* Some helpers to access the data structures */
-- 
cgit v1.2.3


From 29718521237a1b1607ea05b49243100ea2044337 Mon Sep 17 00:00:00 2001
From: Myron Stowe <myron.stowe@hp.com>
Date: Thu, 21 Oct 2010 14:23:59 -0600
Subject: ACPI: Add interfaces for ioremapping/iounmapping ACPI registers

Add remapping and unmapping interfaces for ACPI registers that are
backed by memory mapped I/O (MMIO).  These interfaces, along with
the MMIO remapping list, enable accesses of such registers from within
interrupt context.

ACPI Generic Address Structure (GAS) reference (ACPI's fixed/generic
hardware registers use the GAS format):
  ACPI Specification, Revision 4.0, Section 5.2.3.1, "Generic Address
  Structure".

Signed-off-by: Myron Stowe <myron.stowe@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c   | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |  3 +++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index bd72129e35f2..fc6c5d21c3eb 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -372,6 +372,44 @@ void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
 		__acpi_unmap_table(virt, size);
 }
 
+int acpi_os_map_generic_address(struct acpi_generic_address *addr)
+{
+	void __iomem *virt;
+
+	if (addr->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		return 0;
+
+	if (!addr->address || !addr->bit_width)
+		return -EINVAL;
+
+	virt = acpi_os_map_memory(addr->address, addr->bit_width / 8);
+	if (!virt)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_os_map_generic_address);
+
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr)
+{
+	void __iomem *virt;
+	unsigned long flags;
+	acpi_size size = addr->bit_width / 8;
+
+	if (addr->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		return;
+
+	if (!addr->address || !addr->bit_width)
+		return;
+
+	spin_lock_irqsave(&acpi_ioremap_lock, flags);
+	virt = acpi_map_vaddr_lookup(addr->address, size);
+	spin_unlock_irqrestore(&acpi_ioremap_lock, flags);
+
+	acpi_os_unmap_memory(virt, size);
+}
+EXPORT_SYMBOL_GPL(acpi_os_unmap_generic_address);
+
 #ifdef ACPI_FUTURE_USAGE
 acpi_status
 acpi_os_get_physical_address(void *virt, acpi_physical_address * phys)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c227757feb06..7774e6d8fddd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -308,6 +308,9 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
+int acpi_os_map_generic_address(struct acpi_generic_address *addr);
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
-- 
cgit v1.2.3


From 02460d08930656b3a50381cfb119864efcd4eef9 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Fri, 11 Jun 2010 10:44:22 +0000
Subject: netdev: bfin_mac: push settings to platform resources

Move all the pin settings out of the Kconfig and into the platform
resources (MII vs RMII).  This clean up also lets us push out the
phy settings so that board porters may control the layout.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 arch/blackfin/mach-bf518/boards/ezbrd.c     |  44 ++++++++-
 arch/blackfin/mach-bf518/boards/tcm-bf518.c |  24 ++++-
 arch/blackfin/mach-bf527/boards/cm_bf527.c  |  24 ++++-
 arch/blackfin/mach-bf527/boards/ezbrd.c     |  24 ++++-
 arch/blackfin/mach-bf527/boards/ezkit.c     |  24 ++++-
 arch/blackfin/mach-bf527/boards/tll6527m.c  |  24 ++++-
 arch/blackfin/mach-bf537/boards/cm_bf537e.c |  24 ++++-
 arch/blackfin/mach-bf537/boards/cm_bf537u.c |  24 ++++-
 arch/blackfin/mach-bf537/boards/minotaur.c  |  24 ++++-
 arch/blackfin/mach-bf537/boards/pnav10.c    |  24 ++++-
 arch/blackfin/mach-bf537/boards/stamp.c     |  24 ++++-
 arch/blackfin/mach-bf537/boards/tcm_bf537.c |  24 ++++-
 drivers/net/Kconfig                         |   8 --
 drivers/net/bfin_mac.c                      | 145 ++++++++++++++++++----------
 drivers/net/bfin_mac.h                      |   2 +
 include/linux/bfin_mac.h                    |  29 ++++++
 16 files changed, 421 insertions(+), 71 deletions(-)
 create mode 100644 include/linux/bfin_mac.h

(limited to 'include')

diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index f95e6096719b..b894c8abe7ec 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -87,13 +87,55 @@ static struct platform_device rtc_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = {
+	P_MII0_ETxD0,
+	P_MII0_ETxD1,
+	P_MII0_ETxEN,
+	P_MII0_ERxD0,
+	P_MII0_ERxD1,
+	P_MII0_TxCLK,
+	P_MII0_PHYINT,
+	P_MII0_CRS,
+	P_MII0_MDC,
+	P_MII0_MDIO,
+	0
+};
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+	{
+		.addr = 2,
+		.irq = IRQ_MAC_PHYINT,
+	},
+	{
+		.addr = 3,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 3,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 
 #if defined(CONFIG_NET_DSA_KSZ8893M) || defined(CONFIG_NET_DSA_KSZ8893M_MODULE)
diff --git a/arch/blackfin/mach-bf518/boards/tcm-bf518.c b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
index bead810a6546..e6ce1d7c523a 100644
--- a/arch/blackfin/mach-bf518/boards/tcm-bf518.c
+++ b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
@@ -81,13 +81,35 @@ static struct platform_device rtc_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/cm_bf527.c b/arch/blackfin/mach-bf527/boards/cm_bf527.c
index 38037c7e125a..2c31af7a320a 100644
--- a/arch/blackfin/mach-bf527/boards/cm_bf527.c
+++ b/arch/blackfin/mach-bf527/boards/cm_bf527.c
@@ -273,13 +273,35 @@ static struct platform_device dm9000_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_RMII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_RMII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index 6cc64a1e78b9..9a736a850c5c 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -193,13 +193,35 @@ static struct platform_device rtc_device = {
 
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_RMII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_RMII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index df82723fb504..9222bc00bbd3 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -366,13 +366,35 @@ static struct platform_device dm9000_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_RMII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_RMII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index ae4130e97c01..9ec575729e2c 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -257,13 +257,35 @@ static struct platform_device rtc_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_RMII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_RMII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
index e2e7be40ef44..836698c4ee54 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
@@ -597,13 +597,35 @@ static struct platform_device bfin_sport1_uart_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
index 752c833f7ca8..2a85670273cb 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
@@ -562,13 +562,35 @@ static struct platform_device bfin_sport1_uart_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/minotaur.c b/arch/blackfin/mach-bf537/boards/minotaur.c
index 05d45994480e..49800518412c 100644
--- a/arch/blackfin/mach-bf537/boards/minotaur.c
+++ b/arch/blackfin/mach-bf537/boards/minotaur.c
@@ -68,13 +68,35 @@ static struct platform_device rtc_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/pnav10.c b/arch/blackfin/mach-bf537/boards/pnav10.c
index 6b03808800a6..b95807894e25 100644
--- a/arch/blackfin/mach-bf537/boards/pnav10.c
+++ b/arch/blackfin/mach-bf537/boards/pnav10.c
@@ -99,13 +99,35 @@ static struct platform_device smc91x_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_RMII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_RMII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index cd2c797c8c9f..3aa344ce8e52 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -327,13 +327,35 @@ static struct platform_device bfin_can_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = PHY_POLL, /* IRQ_MAC_PHYINT */
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index a4d62b5fc7ba..31498add1a42 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -564,13 +564,35 @@ static struct platform_device bfin_sport1_uart_device = {
 #endif
 
 #if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE)
+#include <linux/bfin_mac.h>
+static const unsigned short bfin_mac_peripherals[] = P_MII0;
+
+static struct bfin_phydev_platform_data bfin_phydev_data[] = {
+	{
+		.addr = 1,
+		.irq = IRQ_MAC_PHYINT,
+	},
+};
+
+static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
+	.phydev_number = 1,
+	.phydev_data = bfin_phydev_data,
+	.phy_mode = PHY_INTERFACE_MODE_MII,
+	.mac_peripherals = bfin_mac_peripherals,
+};
+
 static struct platform_device bfin_mii_bus = {
 	.name = "bfin_mii_bus",
+	.dev = {
+		.platform_data = &bfin_mii_bus_data,
+	}
 };
 
 static struct platform_device bfin_mac_device = {
 	.name = "bfin_mac",
-	.dev.platform_data = &bfin_mii_bus,
+	.dev = {
+		.platform_data = &bfin_mii_bus,
+	}
 };
 #endif
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 77c1fab7d774..c598fe008e37 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -883,14 +883,6 @@ config BFIN_RX_DESC_NUM
 	help
 	  Set the number of buffer packets used in driver.
 
-config BFIN_MAC_RMII
-	bool "RMII PHY Interface"
-	depends on BFIN_MAC
-	default y if BFIN527_EZKIT
-	default n if BFIN537_STAMP
-	help
-	  Use Reduced PHY MII Interface
-
 config BFIN_MAC_USE_HWSTAMP
 	bool "Use IEEE 1588 hwstamp"
 	depends on BFIN_MAC && BF518
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index f7233191162b..ce1e5e9d06f6 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -1,7 +1,7 @@
 /*
  * Blackfin On-Chip MAC Driver
  *
- * Copyright 2004-2007 Analog Devices Inc.
+ * Copyright 2004-2010 Analog Devices Inc.
  *
  * Enter bugs at http://blackfin.uclinux.org/
  *
@@ -23,7 +23,6 @@
 #include <linux/device.h>
 #include <linux/spinlock.h>
 #include <linux/mii.h>
-#include <linux/phy.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -76,12 +75,6 @@ static struct net_dma_desc_tx *current_tx_ptr;
 static struct net_dma_desc_tx *tx_desc;
 static struct net_dma_desc_rx *rx_desc;
 
-#if defined(CONFIG_BFIN_MAC_RMII)
-static u16 pin_req[] = P_RMII0;
-#else
-static u16 pin_req[] = P_MII0;
-#endif
-
 static void desc_list_free(void)
 {
 	struct net_dma_desc_rx *r;
@@ -347,23 +340,23 @@ static void bfin_mac_adjust_link(struct net_device *dev)
 		}
 
 		if (phydev->speed != lp->old_speed) {
-#if defined(CONFIG_BFIN_MAC_RMII)
-			u32 opmode = bfin_read_EMAC_OPMODE();
-			switch (phydev->speed) {
-			case 10:
-				opmode |= RMII_10;
-				break;
-			case 100:
-				opmode &= ~(RMII_10);
-				break;
-			default:
-				printk(KERN_WARNING
-					"%s: Ack!  Speed (%d) is not 10/100!\n",
-					DRV_NAME, phydev->speed);
-				break;
+			if (phydev->interface == PHY_INTERFACE_MODE_RMII) {
+				u32 opmode = bfin_read_EMAC_OPMODE();
+				switch (phydev->speed) {
+				case 10:
+					opmode |= RMII_10;
+					break;
+				case 100:
+					opmode &= ~RMII_10;
+					break;
+				default:
+					printk(KERN_WARNING
+						"%s: Ack!  Speed (%d) is not 10/100!\n",
+						DRV_NAME, phydev->speed);
+					break;
+				}
+				bfin_write_EMAC_OPMODE(opmode);
 			}
-			bfin_write_EMAC_OPMODE(opmode);
-#endif
 
 			new_state = 1;
 			lp->old_speed = phydev->speed;
@@ -392,7 +385,7 @@ static void bfin_mac_adjust_link(struct net_device *dev)
 /* MDC  = 2.5 MHz */
 #define MDC_CLK 2500000
 
-static int mii_probe(struct net_device *dev)
+static int mii_probe(struct net_device *dev, int phy_mode)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
 	struct phy_device *phydev = NULL;
@@ -411,8 +404,8 @@ static int mii_probe(struct net_device *dev)
 	sysctl = (sysctl & ~MDCDIV) | SET_MDCDIV(mdc_div);
 	bfin_write_EMAC_SYSCTL(sysctl);
 
-	/* search for connect PHY device */
-	for (i = 0; i < PHY_MAX_ADDR; i++) {
+	/* search for connected PHY device */
+	for (i = 0; i < PHY_MAX_ADDR; ++i) {
 		struct phy_device *const tmp_phydev = lp->mii_bus->phy_map[i];
 
 		if (!tmp_phydev)
@@ -429,13 +422,14 @@ static int mii_probe(struct net_device *dev)
 		return -ENODEV;
 	}
 
-#if defined(CONFIG_BFIN_MAC_RMII)
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &bfin_mac_adjust_link,
-			0, PHY_INTERFACE_MODE_RMII);
-#else
+	if (phy_mode != PHY_INTERFACE_MODE_RMII &&
+		phy_mode != PHY_INTERFACE_MODE_MII) {
+		printk(KERN_INFO "%s: Invalid phy interface mode\n", dev->name);
+		return -EINVAL;
+	}
+
 	phydev = phy_connect(dev, dev_name(&phydev->dev), &bfin_mac_adjust_link,
-			0, PHY_INTERFACE_MODE_MII);
-#endif
+			0, phy_mode);
 
 	if (IS_ERR(phydev)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
@@ -570,6 +564,8 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = {
 /**************************************************************************/
 void setup_system_regs(struct net_device *dev)
 {
+	struct bfin_mac_local *lp = netdev_priv(dev);
+	int i;
 	unsigned short sysctl;
 
 	/*
@@ -577,6 +573,15 @@ void setup_system_regs(struct net_device *dev)
 	 * Configure checksum support and rcve frame word alignment
 	 */
 	sysctl = bfin_read_EMAC_SYSCTL();
+	/*
+	 * check if interrupt is requested for any PHY,
+	 * enable PHY interrupt only if needed
+	 */
+	for (i = 0; i < PHY_MAX_ADDR; ++i)
+		if (lp->mii_bus->irq[i] != PHY_POLL)
+			break;
+	if (i < PHY_MAX_ADDR)
+		sysctl |= PHYIE;
 	sysctl |= RXDWA;
 #if defined(BFIN_MAC_CSUM_OFFLOAD)
 	sysctl |= RXCKS;
@@ -1203,7 +1208,7 @@ static void bfin_mac_disable(void)
 /*
  * Enable Interrupts, Receive, and Transmit
  */
-static int bfin_mac_enable(void)
+static int bfin_mac_enable(struct phy_device *phydev)
 {
 	int ret;
 	u32 opmode;
@@ -1233,12 +1238,13 @@ static int bfin_mac_enable(void)
 		opmode |= DRO | DC | PSF;
 	opmode |= RE;
 
-#if defined(CONFIG_BFIN_MAC_RMII)
-	opmode |= RMII; /* For Now only 100MBit are supported */
+	if (phydev->interface == PHY_INTERFACE_MODE_RMII) {
+		opmode |= RMII; /* For Now only 100MBit are supported */
 #if (defined(CONFIG_BF537) || defined(CONFIG_BF536)) && CONFIG_BF_REV_0_2
-	opmode |= TE;
-#endif
+		opmode |= TE;
 #endif
+	}
+
 	/* Turn on the EMAC rx */
 	bfin_write_EMAC_OPMODE(opmode);
 
@@ -1270,7 +1276,7 @@ static void bfin_mac_timeout(struct net_device *dev)
 	if (netif_queue_stopped(lp->ndev))
 		netif_wake_queue(lp->ndev);
 
-	bfin_mac_enable();
+	bfin_mac_enable(lp->phydev);
 
 	/* We can accept TX packets again */
 	dev->trans_start = jiffies; /* prevent tx timeout */
@@ -1342,11 +1348,19 @@ static void bfin_mac_set_multicast_list(struct net_device *dev)
 
 static int bfin_mac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
+	struct bfin_mac_local *lp = netdev_priv(netdev);
+
+	if (!netif_running(netdev))
+		return -EINVAL;
+
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
 		return bfin_mac_hwtstamp_ioctl(netdev, ifr, cmd);
 	default:
-		return -EOPNOTSUPP;
+		if (lp->phydev)
+			return phy_mii_ioctl(lp->phydev, ifr, cmd);
+		else
+			return -EOPNOTSUPP;
 	}
 }
 
@@ -1394,7 +1408,7 @@ static int bfin_mac_open(struct net_device *dev)
 	setup_mac_addr(dev->dev_addr);
 
 	bfin_mac_disable();
-	ret = bfin_mac_enable();
+	ret = bfin_mac_enable(lp->phydev);
 	if (ret)
 		return ret;
 	pr_debug("hardware init finished\n");
@@ -1450,6 +1464,7 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct bfin_mac_local *lp;
 	struct platform_device *pd;
+	struct bfin_mii_bus_platform_data *mii_bus_data;
 	int rc;
 
 	ndev = alloc_etherdev(sizeof(struct bfin_mac_local));
@@ -1501,11 +1516,12 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	if (!lp->mii_bus) {
 		dev_err(&pdev->dev, "Cannot get mii_bus!\n");
 		rc = -ENODEV;
-		goto out_err_mii_bus_probe;
+		goto out_err_probe_mac;
 	}
 	lp->mii_bus->priv = ndev;
+	mii_bus_data = pd->dev.platform_data;
 
-	rc = mii_probe(ndev);
+	rc = mii_probe(ndev, mii_bus_data->phy_mode);
 	if (rc) {
 		dev_err(&pdev->dev, "MII Probe failed!\n");
 		goto out_err_mii_probe;
@@ -1552,8 +1568,6 @@ out_err_request_irq:
 out_err_mii_probe:
 	mdiobus_unregister(lp->mii_bus);
 	mdiobus_free(lp->mii_bus);
-out_err_mii_bus_probe:
-	peripheral_free_list(pin_req);
 out_err_probe_mac:
 	platform_set_drvdata(pdev, NULL);
 	free_netdev(ndev);
@@ -1576,8 +1590,6 @@ static int __devexit bfin_mac_remove(struct platform_device *pdev)
 
 	free_netdev(ndev);
 
-	peripheral_free_list(pin_req);
-
 	return 0;
 }
 
@@ -1623,12 +1635,21 @@ static int bfin_mac_resume(struct platform_device *pdev)
 static int __devinit bfin_mii_bus_probe(struct platform_device *pdev)
 {
 	struct mii_bus *miibus;
+	struct bfin_mii_bus_platform_data *mii_bus_pd;
+	const unsigned short *pin_req;
 	int rc, i;
 
+	mii_bus_pd = dev_get_platdata(&pdev->dev);
+	if (!mii_bus_pd) {
+		dev_err(&pdev->dev, "No peripherals in platform data!\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * We are setting up a network card,
 	 * so set the GPIO pins to Ethernet mode
 	 */
+	pin_req = mii_bus_pd->mac_peripherals;
 	rc = peripheral_request_list(pin_req, DRV_NAME);
 	if (rc) {
 		dev_err(&pdev->dev, "Requesting peripherals failed!\n");
@@ -1645,13 +1666,30 @@ static int __devinit bfin_mii_bus_probe(struct platform_device *pdev)
 
 	miibus->parent = &pdev->dev;
 	miibus->name = "bfin_mii_bus";
+	miibus->phy_mask = mii_bus_pd->phy_mask;
+
 	snprintf(miibus->id, MII_BUS_ID_SIZE, "0");
 	miibus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);
-	if (miibus->irq == NULL)
-		goto out_err_alloc;
-	for (i = 0; i < PHY_MAX_ADDR; ++i)
+	if (!miibus->irq)
+		goto out_err_irq_alloc;
+
+	for (i = rc; i < PHY_MAX_ADDR; ++i)
 		miibus->irq[i] = PHY_POLL;
 
+	rc = clamp(mii_bus_pd->phydev_number, 0, PHY_MAX_ADDR);
+	if (rc != mii_bus_pd->phydev_number)
+		dev_err(&pdev->dev, "Invalid number (%i) of phydevs\n",
+			mii_bus_pd->phydev_number);
+	for (i = 0; i < rc; ++i) {
+		unsigned short phyaddr = mii_bus_pd->phydev_data[i].addr;
+		if (phyaddr < PHY_MAX_ADDR)
+			miibus->irq[phyaddr] = mii_bus_pd->phydev_data[i].irq;
+		else
+			dev_err(&pdev->dev,
+				"Invalid PHY address %i for phydev %i\n",
+				phyaddr, i);
+	}
+
 	rc = mdiobus_register(miibus);
 	if (rc) {
 		dev_err(&pdev->dev, "Cannot register MDIO bus!\n");
@@ -1663,6 +1701,7 @@ static int __devinit bfin_mii_bus_probe(struct platform_device *pdev)
 
 out_err_mdiobus_register:
 	kfree(miibus->irq);
+out_err_irq_alloc:
 	mdiobus_free(miibus);
 out_err_alloc:
 	peripheral_free_list(pin_req);
@@ -1673,11 +1712,15 @@ out_err_alloc:
 static int __devexit bfin_mii_bus_remove(struct platform_device *pdev)
 {
 	struct mii_bus *miibus = platform_get_drvdata(pdev);
+	struct bfin_mii_bus_platform_data *mii_bus_pd =
+		dev_get_platdata(&pdev->dev);
+
 	platform_set_drvdata(pdev, NULL);
 	mdiobus_unregister(miibus);
 	kfree(miibus->irq);
 	mdiobus_free(miibus);
-	peripheral_free_list(pin_req);
+	peripheral_free_list(mii_bus_pd->mac_peripherals);
+
 	return 0;
 }
 
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index 04e4050df18b..aed68bed2365 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -14,6 +14,8 @@
 #include <linux/clocksource.h>
 #include <linux/timecompare.h>
 #include <linux/timer.h>
+#include <linux/etherdevice.h>
+#include <linux/bfin_mac.h>
 
 #define BFIN_MAC_CSUM_OFFLOAD
 
diff --git a/include/linux/bfin_mac.h b/include/linux/bfin_mac.h
new file mode 100644
index 000000000000..904dec7d03a1
--- /dev/null
+++ b/include/linux/bfin_mac.h
@@ -0,0 +1,29 @@
+/*
+ * Blackfin On-Chip MAC Driver
+ *
+ * Copyright 2004-2010 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _LINUX_BFIN_MAC_H_
+#define _LINUX_BFIN_MAC_H_
+
+#include <linux/phy.h>
+
+struct bfin_phydev_platform_data {
+	unsigned short addr;
+	int irq;
+};
+
+struct bfin_mii_bus_platform_data {
+	int phydev_number;
+	struct bfin_phydev_platform_data *phydev_data;
+	const unsigned short *mac_peripherals;
+	int phy_mode;
+	unsigned int phy_mask;
+};
+
+#endif
-- 
cgit v1.2.3


From 446396bfab00392010ebc36b9ccf859935b0f17b Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 25 Oct 2010 13:57:32 +0100
Subject: uwb: Remove the WLP subsystem and drivers

The only Wimedia LLC Protocol (WLP) hardware was an Intel i1480 chip
with a beta release of firmware that was never commercially available as
a product.  This hardware and firmware is no longer available as Intel
sold their UWB/WLP IP.  I also see little prospect of other WLP
capable hardware ever being available.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 MAINTAINERS                               |   12 -
 drivers/uwb/Kconfig                       |   20 +-
 drivers/uwb/Makefile                      |    1 -
 drivers/uwb/i1480/Makefile                |    1 -
 drivers/uwb/i1480/i1480-wlp.h             |  200 ----
 drivers/uwb/i1480/i1480u-wlp/Makefile     |    8 -
 drivers/uwb/i1480/i1480u-wlp/i1480u-wlp.h |  283 -----
 drivers/uwb/i1480/i1480u-wlp/lc.c         |  424 -------
 drivers/uwb/i1480/i1480u-wlp/netdev.c     |  331 ------
 drivers/uwb/i1480/i1480u-wlp/rx.c         |  474 --------
 drivers/uwb/i1480/i1480u-wlp/sysfs.c      |  407 -------
 drivers/uwb/i1480/i1480u-wlp/tx.c         |  584 ----------
 drivers/uwb/wlp/Makefile                  |   10 -
 drivers/uwb/wlp/driver.c                  |   43 -
 drivers/uwb/wlp/eda.c                     |  415 -------
 drivers/uwb/wlp/messages.c                | 1798 -----------------------------
 drivers/uwb/wlp/sysfs.c                   |  708 ------------
 drivers/uwb/wlp/txrx.c                    |  354 ------
 drivers/uwb/wlp/wlp-internal.h            |  224 ----
 drivers/uwb/wlp/wlp-lc.c                  |  560 ---------
 drivers/uwb/wlp/wss-lc.c                  |  959 ---------------
 include/linux/wlp.h                       |  736 ------------
 22 files changed, 1 insertion(+), 8551 deletions(-)
 delete mode 100644 drivers/uwb/i1480/i1480-wlp.h
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/Makefile
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/i1480u-wlp.h
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/lc.c
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/netdev.c
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/rx.c
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/sysfs.c
 delete mode 100644 drivers/uwb/i1480/i1480u-wlp/tx.c
 delete mode 100644 drivers/uwb/wlp/Makefile
 delete mode 100644 drivers/uwb/wlp/driver.c
 delete mode 100644 drivers/uwb/wlp/eda.c
 delete mode 100644 drivers/uwb/wlp/messages.c
 delete mode 100644 drivers/uwb/wlp/sysfs.c
 delete mode 100644 drivers/uwb/wlp/txrx.c
 delete mode 100644 drivers/uwb/wlp/wlp-internal.h
 delete mode 100644 drivers/uwb/wlp/wlp-lc.c
 delete mode 100644 drivers/uwb/wlp/wss-lc.c
 delete mode 100644 include/linux/wlp.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 69aa8fe060b3..3a3df3f28d37 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5995,9 +5995,6 @@ M:	David Vrabel <david.vrabel@csr.com>
 L:	linux-usb@vger.kernel.org
 S:	Supported
 F:	drivers/uwb/
-X:	drivers/uwb/wlp/
-X:	drivers/uwb/i1480/i1480u-wlp/
-X:	drivers/uwb/i1480/i1480-wlp.h
 F:	include/linux/uwb.h
 F:	include/linux/uwb/
 
@@ -6533,15 +6530,6 @@ F:	include/linux/wimax/debug.h
 F:	include/net/wimax.h
 F:	net/wimax/
 
-WIMEDIA LLC PROTOCOL (WLP) SUBSYSTEM
-M:	David Vrabel <david.vrabel@csr.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	include/linux/wlp.h
-F:	drivers/uwb/wlp/
-F:	drivers/uwb/i1480/i1480u-wlp/
-F:	drivers/uwb/i1480/i1480-wlp.h
-
 WISTRON LAPTOP BUTTON DRIVER
 M:	Miloslav Trmac <mitr@volny.cz>
 S:	Maintained
diff --git a/drivers/uwb/Kconfig b/drivers/uwb/Kconfig
index bac8e7a6f17b..d100f54ed650 100644
--- a/drivers/uwb/Kconfig
+++ b/drivers/uwb/Kconfig
@@ -12,8 +12,7 @@ menuconfig UWB
 	  technology using a wide spectrum (3.1-10.6GHz). It is
 	  optimized for in-room use (480Mbps at 2 meters, 110Mbps at
 	  10m). It serves as the transport layer for other protocols,
-	  such as Wireless USB (WUSB), IP (WLP) and upcoming
-	  Bluetooth and 1394
+	  such as Wireless USB (WUSB).
 
 	  The topology is peer to peer; however, higher level
 	  protocols (such as WUSB) might impose a master/slave
@@ -58,13 +57,6 @@ config UWB_WHCI
           To compile this driver select Y (built in) or M (module). It
           is safe to select any even if you do not have the hardware.
 
-config UWB_WLP
-	tristate "Support WiMedia Link Protocol (Ethernet/IP over UWB)"
-	depends on UWB && NET
-	help
-	  This is a common library for drivers that implement
-	  networking over UWB.
-
 config UWB_I1480U
         tristate "Support for Intel Wireless UWB Link 1480 HWA"
         depends on UWB_HWA
@@ -77,14 +69,4 @@ config UWB_I1480U
          To compile this driver select Y (built in) or M (module). It
          is safe to select any even if you do not have the hardware.
 
-config UWB_I1480U_WLP
-        tristate "Support for Intel Wireless UWB Link 1480 HWA's WLP interface"
-        depends on UWB_I1480U &&  UWB_WLP && NET
-        help
-         This driver enables WLP support for the i1480 when connected via
-         USB. WLP is the WiMedia Link Protocol, or IP over UWB.
-
-         To compile this driver select Y (built in) or M (module). It
-         is safe to select any even if you don't have the hardware.
-
 endif # UWB
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 2f98d080fe78..d47dd6e2942c 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_UWB)		+= uwb.o
-obj-$(CONFIG_UWB_WLP)		+= wlp/
 obj-$(CONFIG_UWB_WHCI)		+= umc.o whci.o whc-rc.o
 obj-$(CONFIG_UWB_HWA)		+= hwa-rc.o
 obj-$(CONFIG_UWB_I1480U)	+= i1480/
diff --git a/drivers/uwb/i1480/Makefile b/drivers/uwb/i1480/Makefile
index 212bbc7d4c32..d69da1684cfb 100644
--- a/drivers/uwb/i1480/Makefile
+++ b/drivers/uwb/i1480/Makefile
@@ -1,2 +1 @@
 obj-$(CONFIG_UWB_I1480U)	+= dfu/ i1480-est.o
-obj-$(CONFIG_UWB_I1480U_WLP)	+= i1480u-wlp/
diff --git a/drivers/uwb/i1480/i1480-wlp.h b/drivers/uwb/i1480/i1480-wlp.h
deleted file mode 100644
index 18a8b0e4567b..000000000000
--- a/drivers/uwb/i1480/i1480-wlp.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Intel 1480 Wireless UWB Link
- * WLP specific definitions
- *
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- */
-
-#ifndef __i1480_wlp_h__
-#define __i1480_wlp_h__
-
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/uwb.h>
-#include <linux/if_ether.h>
-#include <asm/byteorder.h>
-
-/* New simplified header format? */
-#undef WLP_HDR_FMT_2 		/* FIXME: rename */
-
-/**
- * Values of the Delivery ID & Type field when PCA or DRP
- *
- * The Delivery ID & Type field in the WLP TX header indicates whether
- * the frame is PCA or DRP. This is done based on the high level bit of
- * this field.
- * We use this constant to test if the traffic is PCA or DRP as follows:
- * if (wlp_tx_hdr_delivery_id_type(wlp_tx_hdr) & WLP_DRP)
- * 	this is DRP traffic
- * else
- * 	this is PCA traffic
- */
-enum deliver_id_type_bit {
-	WLP_DRP = 8,
-};
-
-/**
- * WLP TX header
- *
- * Indicates UWB/WLP-specific transmission parameters for a network
- * packet.
- */
-struct wlp_tx_hdr {
-	/* dword 0 */
-	struct uwb_dev_addr dstaddr;
-	u8                  key_index;
-	u8                  mac_params;
-	/* dword 1 */
-	u8                  phy_params;
-#ifndef WLP_HDR_FMT_2
-	u8                  reserved;
-	__le16              oui01;              /* FIXME: not so sure if __le16 or u8[2] */
-	/* dword 2 */
-	u8                  oui2;               /*        if all LE, it could be merged */
-	__le16              prid;
-#endif
-} __attribute__((packed));
-
-static inline int wlp_tx_hdr_delivery_id_type(const struct wlp_tx_hdr *hdr)
-{
-	return hdr->mac_params & 0x0f;
-}
-
-static inline int wlp_tx_hdr_ack_policy(const struct wlp_tx_hdr *hdr)
-{
-	return (hdr->mac_params >> 4) & 0x07;
-}
-
-static inline int wlp_tx_hdr_rts_cts(const struct wlp_tx_hdr *hdr)
-{
-	return (hdr->mac_params >> 7) & 0x01;
-}
-
-static inline void wlp_tx_hdr_set_delivery_id_type(struct wlp_tx_hdr *hdr, int id)
-{
-	hdr->mac_params = (hdr->mac_params & ~0x0f) | id;
-}
-
-static inline void wlp_tx_hdr_set_ack_policy(struct wlp_tx_hdr *hdr,
-					     enum uwb_ack_pol policy)
-{
-	hdr->mac_params = (hdr->mac_params & ~0x70) | (policy << 4);
-}
-
-static inline void wlp_tx_hdr_set_rts_cts(struct wlp_tx_hdr *hdr, int rts_cts)
-{
-	hdr->mac_params = (hdr->mac_params & ~0x80) | (rts_cts << 7);
-}
-
-static inline enum uwb_phy_rate wlp_tx_hdr_phy_rate(const struct wlp_tx_hdr *hdr)
-{
-	return hdr->phy_params & 0x0f;
-}
-
-static inline int wlp_tx_hdr_tx_power(const struct wlp_tx_hdr *hdr)
-{
-	return (hdr->phy_params >> 4) & 0x0f;
-}
-
-static inline void wlp_tx_hdr_set_phy_rate(struct wlp_tx_hdr *hdr, enum uwb_phy_rate rate)
-{
-	hdr->phy_params = (hdr->phy_params & ~0x0f) | rate;
-}
-
-static inline void wlp_tx_hdr_set_tx_power(struct wlp_tx_hdr *hdr, int pwr)
-{
-	hdr->phy_params = (hdr->phy_params & ~0xf0) | (pwr << 4);
-}
-
-
-/**
- * WLP RX header
- *
- * Provides UWB/WLP-specific transmission data for a received
- * network packet.
- */
-struct wlp_rx_hdr {
-	/* dword 0 */
-	struct uwb_dev_addr dstaddr;
-	struct uwb_dev_addr srcaddr;
-	/* dword 1 */
-	u8 		    LQI;
-	s8		    RSSI;
-	u8		    reserved3;
-#ifndef WLP_HDR_FMT_2
-	u8 		    oui0;
-	/* dword 2 */
-	__le16		    oui12;
-	__le16		    prid;
-#endif
-} __attribute__((packed));
-
-
-/** User configurable options for WLP */
-struct wlp_options {
-	struct mutex mutex; /* access to user configurable options*/
-	struct wlp_tx_hdr def_tx_hdr;	/* default tx hdr */
-	u8 pca_base_priority;
-	u8 bw_alloc; /*index into bw_allocs[] for PCA/DRP reservations*/
-};
-
-
-static inline
-void wlp_options_init(struct wlp_options *options)
-{
-	mutex_init(&options->mutex);
-	wlp_tx_hdr_set_ack_policy(&options->def_tx_hdr, UWB_ACK_INM);
-	wlp_tx_hdr_set_rts_cts(&options->def_tx_hdr, 1);
-	/* FIXME: default to phy caps */
-	wlp_tx_hdr_set_phy_rate(&options->def_tx_hdr, UWB_PHY_RATE_480);
-#ifndef WLP_HDR_FMT_2
-	options->def_tx_hdr.prid = cpu_to_le16(0x0000);
-#endif
-}
-
-
-/* sysfs helpers */
-
-extern ssize_t uwb_pca_base_priority_store(struct wlp_options *,
-					   const char *, size_t);
-extern ssize_t uwb_pca_base_priority_show(const struct wlp_options *, char *);
-extern ssize_t uwb_bw_alloc_store(struct wlp_options *, const char *, size_t);
-extern ssize_t uwb_bw_alloc_show(const struct wlp_options *, char *);
-extern ssize_t uwb_ack_policy_store(struct wlp_options *,
-				    const char *, size_t);
-extern ssize_t uwb_ack_policy_show(const struct wlp_options *, char *);
-extern ssize_t uwb_rts_cts_store(struct wlp_options *, const char *, size_t);
-extern ssize_t uwb_rts_cts_show(const struct wlp_options *, char *);
-extern ssize_t uwb_phy_rate_store(struct wlp_options *, const char *, size_t);
-extern ssize_t uwb_phy_rate_show(const struct wlp_options *, char *);
-
-
-/** Simple bandwidth allocation (temporary and too simple) */
-struct wlp_bw_allocs {
-	const char *name;
-	struct {
-		u8 mask, stream;
-	} tx, rx;
-};
-
-
-#endif /* #ifndef __i1480_wlp_h__ */
diff --git a/drivers/uwb/i1480/i1480u-wlp/Makefile b/drivers/uwb/i1480/i1480u-wlp/Makefile
deleted file mode 100644
index fe6709b8e68b..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-obj-$(CONFIG_UWB_I1480U_WLP) += i1480u-wlp.o
-
-i1480u-wlp-objs :=	\
-	lc.o		\
-	netdev.o	\
-	rx.o		\
-	sysfs.o		\
-	tx.o
diff --git a/drivers/uwb/i1480/i1480u-wlp/i1480u-wlp.h b/drivers/uwb/i1480/i1480u-wlp/i1480u-wlp.h
deleted file mode 100644
index 2e31f536a347..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/i1480u-wlp.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Intel 1480 Wireless UWB Link USB
- * Header formats, constants, general internal interfaces
- *
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * This is not an standard interface.
- *
- * FIXME: docs
- *
- * i1480u-wlp is pretty simple: two endpoints, one for tx, one for
- * rx. rx is polled. Network packets (ethernet, whatever) are wrapped
- * in i1480 TX or RX headers (for sending over the air), and these
- * packets are wrapped in UNTD headers (for sending to the WLP UWB
- * controller).
- *
- * UNTD packets (UNTD hdr + i1480 hdr + network packet) packets
- * cannot be bigger than i1480u_MAX_FRG_SIZE. When this happens, the
- * i1480 packet is broken in chunks/packets:
- *
- * UNTD-1st.hdr + i1480.hdr + payload
- * UNTD-next.hdr + payload
- * ...
- * UNTD-last.hdr + payload
- *
- * so that each packet is smaller or equal than i1480u_MAX_FRG_SIZE.
- *
- * All HW structures and bitmaps are little endian, so we need to play
- * ugly tricks when defining bitfields. Hoping for the day GCC
- * implements __attribute__((endian(1234))).
- *
- * FIXME: ROADMAP to the whole implementation
- */
-
-#ifndef __i1480u_wlp_h__
-#define __i1480u_wlp_h__
-
-#include <linux/usb.h>
-#include <linux/netdevice.h>
-#include <linux/uwb.h>		/* struct uwb_rc, struct uwb_notifs_handler */
-#include <linux/wlp.h>
-#include "../i1480-wlp.h"
-
-#undef i1480u_FLOW_CONTROL	/* Enable flow control code */
-
-/**
- * Basic flow control
- */
-enum {
-	i1480u_TX_INFLIGHT_MAX = 1000,
-	i1480u_TX_INFLIGHT_THRESHOLD = 100,
-};
-
-/** Maximum size of a transaction that we can tx/rx */
-enum {
-	/* Maximum packet size computed as follows: max UNTD header (8) +
-	 * i1480 RX header (8) + max Ethernet header and payload (4096) +
-	 * Padding added by skb_reserve (2) to make post Ethernet payload
-	 * start on 16 byte boundary*/
-	i1480u_MAX_RX_PKT_SIZE = 4114,
-	i1480u_MAX_FRG_SIZE = 512,
-	i1480u_RX_BUFS = 9,
-};
-
-
-/**
- * UNTD packet type
- *
- * We need to fragment any payload whose UNTD packet is going to be
- * bigger than i1480u_MAX_FRG_SIZE.
- */
-enum i1480u_pkt_type {
-	i1480u_PKT_FRAG_1ST = 0x1,
-	i1480u_PKT_FRAG_NXT = 0x0,
-	i1480u_PKT_FRAG_LST = 0x2,
-	i1480u_PKT_FRAG_CMP = 0x3
-};
-enum {
-	i1480u_PKT_NONE = 0x4,
-};
-
-/** USB Network Transfer Descriptor - common */
-struct untd_hdr {
-	u8     type;
-	__le16 len;
-} __attribute__((packed));
-
-static inline enum i1480u_pkt_type untd_hdr_type(const struct untd_hdr *hdr)
-{
-	return hdr->type & 0x03;
-}
-
-static inline int untd_hdr_rx_tx(const struct untd_hdr *hdr)
-{
-	return (hdr->type >> 2) & 0x01;
-}
-
-static inline void untd_hdr_set_type(struct untd_hdr *hdr, enum i1480u_pkt_type type)
-{
-	hdr->type = (hdr->type & ~0x03) | type;
-}
-
-static inline void untd_hdr_set_rx_tx(struct untd_hdr *hdr, int rx_tx)
-{
-	hdr->type = (hdr->type & ~0x04) | (rx_tx << 2);
-}
-
-
-/**
- * USB Network Transfer Descriptor - Complete Packet
- *
- * This is for a packet that is smaller (header + payload) than
- * i1480u_MAX_FRG_SIZE.
- *
- * @hdr.total_len is the size of the payload; the payload doesn't
- * count this header nor the padding, but includes the size of i1480
- * header.
- */
-struct untd_hdr_cmp {
-	struct untd_hdr	hdr;
-	u8		padding;
-} __attribute__((packed));
-
-
-/**
- * USB Network Transfer Descriptor - First fragment
- *
- * @hdr.len is the size of the *whole packet* (excluding UNTD
- * headers); @fragment_len is the size of the payload (excluding UNTD
- * headers, but including i1480 headers).
- */
-struct untd_hdr_1st {
-	struct untd_hdr	hdr;
-	__le16		fragment_len;
-	u8		padding[3];
-} __attribute__((packed));
-
-
-/**
- * USB Network Transfer Descriptor - Next / Last [Rest]
- *
- * @hdr.len is the size of the payload, not including headrs.
- */
-struct untd_hdr_rst {
-	struct untd_hdr	hdr;
-	u8		padding;
-} __attribute__((packed));
-
-
-/**
- * Transmission context
- *
- * Wraps all the stuff needed to track a pending/active tx
- * operation.
- */
-struct i1480u_tx {
-	struct list_head list_node;
-	struct i1480u *i1480u;
-	struct urb *urb;
-
-	struct sk_buff *skb;
-	struct wlp_tx_hdr *wlp_tx_hdr;
-
-	void *buf;	/* if NULL, no new buf was used */
-	size_t buf_size;
-};
-
-/**
- * Basic flow control
- *
- * We maintain a basic flow control counter. "count" how many TX URBs are
- * outstanding. Only allow "max"
- * TX URBs to be outstanding. If this value is reached the queue will be
- * stopped. The queue will be restarted when there are
- * "threshold" URBs outstanding.
- * Maintain a counter of how many time the TX queue needed to be restarted
- * due to the "max" being exceeded and the "threshold" reached again. The
- * timestamp "restart_ts" is to keep track from when the counter was last
- * queried (see sysfs handling of file wlp_tx_inflight).
- */
-struct i1480u_tx_inflight {
-	atomic_t count;
-	unsigned long max;
-	unsigned long threshold;
-	unsigned long restart_ts;
-	atomic_t restart_count;
-};
-
-/**
- * Instance of a i1480u WLP interface
- *
- * Keeps references to the USB device that wraps it, as well as it's
- * interface and associated UWB host controller. As well, it also
- * keeps a link to the netdevice for integration into the networking
- * stack.
- * We maintian separate error history for the tx and rx endpoints because
- * the implementation does not rely on locking - having one shared
- * structure between endpoints may cause problems. Adding locking to the
- * implementation will have higher cost than adding a separate structure.
- */
-struct i1480u {
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-	struct net_device *net_dev;
-
-	spinlock_t lock;
-
-	/* RX context handling */
-	struct sk_buff *rx_skb;
-	struct uwb_dev_addr rx_srcaddr;
-	size_t rx_untd_pkt_size;
-	struct i1480u_rx_buf {
-		struct i1480u *i1480u;	/* back pointer */
-		struct urb *urb;
-		struct sk_buff *data;	/* i1480u_MAX_RX_PKT_SIZE each */
-	} rx_buf[i1480u_RX_BUFS];	/* N bufs */
-
-	spinlock_t tx_list_lock;	/* TX context */
-	struct list_head tx_list;
-	u8 tx_stream;
-
-	struct stats lqe_stats, rssi_stats;	/* radio statistics */
-
-	/* Options we can set from sysfs */
-	struct wlp_options options;
-	struct uwb_notifs_handler uwb_notifs_handler;
-	struct edc tx_errors;
-	struct edc rx_errors;
-	struct wlp wlp;
-#ifdef i1480u_FLOW_CONTROL
-	struct urb *notif_urb;
-	struct edc notif_edc;		/* error density counter */
-	u8 notif_buffer[1];
-#endif
-	struct i1480u_tx_inflight tx_inflight;
-};
-
-/* Internal interfaces */
-extern void i1480u_rx_cb(struct urb *urb);
-extern int i1480u_rx_setup(struct i1480u *);
-extern void i1480u_rx_release(struct i1480u *);
-extern void i1480u_tx_release(struct i1480u *);
-extern int i1480u_xmit_frame(struct wlp *, struct sk_buff *,
-			     struct uwb_dev_addr *);
-extern void i1480u_stop_queue(struct wlp *);
-extern void i1480u_start_queue(struct wlp *);
-extern int i1480u_sysfs_setup(struct i1480u *);
-extern void i1480u_sysfs_release(struct i1480u *);
-
-/* netdev interface */
-extern int i1480u_open(struct net_device *);
-extern int i1480u_stop(struct net_device *);
-extern netdev_tx_t i1480u_hard_start_xmit(struct sk_buff *,
-						struct net_device *);
-extern void i1480u_tx_timeout(struct net_device *);
-extern int i1480u_set_config(struct net_device *, struct ifmap *);
-extern int i1480u_change_mtu(struct net_device *, int);
-extern void i1480u_uwb_notifs_cb(void *, struct uwb_dev *, enum uwb_notifs);
-
-/* bandwidth allocation callback */
-extern void  i1480u_bw_alloc_cb(struct uwb_rsv *);
-
-/* Sys FS */
-extern struct attribute_group i1480u_wlp_attr_group;
-
-#endif /* #ifndef __i1480u_wlp_h__ */
diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
deleted file mode 100644
index def778cf2216..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Driver for the Linux Network stack.
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- *
- * This implements a very simple network driver for the WLP USB
- * device that is associated to a UWB (Ultra Wide Band) host.
- *
- * This is seen as an interface of a composite device. Once the UWB
- * host has an association to another WLP capable device, the
- * networking interface (aka WLP) can start to send packets back and
- * forth.
- *
- * Limitations:
- *
- *  - Hand cranked; can't ifup the interface until there is an association
- *
- *  - BW allocation very simplistic [see i1480u_mas_set() and callees].
- *
- *
- * ROADMAP:
- *
- *   ENTRY POINTS (driver model):
- *
- *     i1480u_driver_{exit,init}(): initialization of the driver.
- *
- *     i1480u_probe(): called by the driver code when a device
- *                     matching 'i1480u_id_table' is connected.
- *
- *                     This allocs a netdev instance, inits with
- *                     i1480u_add(), then registers_netdev().
- *         i1480u_init()
- *         i1480u_add()
- *
- *     i1480u_disconnect(): device has been disconnected/module
- *                          is being removed.
- *         i1480u_rm()
- */
-#include <linux/gfp.h>
-#include <linux/if_arp.h>
-#include <linux/etherdevice.h>
-
-#include "i1480u-wlp.h"
-
-
-
-static inline
-void i1480u_init(struct i1480u *i1480u)
-{
-	/* nothing so far... doesn't it suck? */
-	spin_lock_init(&i1480u->lock);
-	INIT_LIST_HEAD(&i1480u->tx_list);
-	spin_lock_init(&i1480u->tx_list_lock);
-	wlp_options_init(&i1480u->options);
-	edc_init(&i1480u->tx_errors);
-	edc_init(&i1480u->rx_errors);
-#ifdef i1480u_FLOW_CONTROL
-	edc_init(&i1480u->notif_edc);
-#endif
-	stats_init(&i1480u->lqe_stats);
-	stats_init(&i1480u->rssi_stats);
-	wlp_init(&i1480u->wlp);
-}
-
-/**
- * Fill WLP device information structure
- *
- * The structure will contain a few character arrays, each ending with a
- * null terminated string. Each string has to fit (excluding terminating
- * character) into a specified range obtained from the WLP substack.
- *
- * It is still not clear exactly how this device information should be
- * obtained. Until we find out we use the USB device descriptor as backup, some
- * information elements have intuitive mappings, other not.
- */
-static
-void i1480u_fill_device_info(struct wlp *wlp, struct wlp_device_info *dev_info)
-{
-	struct i1480u *i1480u = container_of(wlp, struct i1480u, wlp);
-	struct usb_device *usb_dev = i1480u->usb_dev;
-	/* Treat device name and model name the same */
-	if (usb_dev->descriptor.iProduct) {
-		usb_string(usb_dev, usb_dev->descriptor.iProduct,
-			   dev_info->name, sizeof(dev_info->name));
-		usb_string(usb_dev, usb_dev->descriptor.iProduct,
-			   dev_info->model_name, sizeof(dev_info->model_name));
-	}
-	if (usb_dev->descriptor.iManufacturer)
-		usb_string(usb_dev, usb_dev->descriptor.iManufacturer,
-			   dev_info->manufacturer,
-			   sizeof(dev_info->manufacturer));
-	scnprintf(dev_info->model_nr, sizeof(dev_info->model_nr), "%04x",
-		  __le16_to_cpu(usb_dev->descriptor.bcdDevice));
-	if (usb_dev->descriptor.iSerialNumber)
-		usb_string(usb_dev, usb_dev->descriptor.iSerialNumber,
-			   dev_info->serial, sizeof(dev_info->serial));
-	/* FIXME: where should we obtain category? */
-	dev_info->prim_dev_type.category = cpu_to_le16(WLP_DEV_CAT_OTHER);
-	/* FIXME: Complete OUI and OUIsubdiv attributes */
-}
-
-#ifdef i1480u_FLOW_CONTROL
-/**
- * Callback for the notification endpoint
- *
- * This mostly controls the xon/xoff protocol. In case of hard error,
- * we stop the queue. If not, we always retry.
- */
-static
-void i1480u_notif_cb(struct urb *urb, struct pt_regs *regs)
-{
-	struct i1480u *i1480u = urb->context;
-	struct usb_interface *usb_iface = i1480u->usb_iface;
-	struct device *dev = &usb_iface->dev;
-	int result;
-
-	switch (urb->status) {
-	case 0:				/* Got valid data, do xon/xoff */
-		switch (i1480u->notif_buffer[0]) {
-		case 'N':
-			dev_err(dev, "XOFF STOPPING queue at %lu\n", jiffies);
-			netif_stop_queue(i1480u->net_dev);
-			break;
-		case 'A':
-			dev_err(dev, "XON STARTING queue at %lu\n", jiffies);
-			netif_start_queue(i1480u->net_dev);
-			break;
-		default:
-			dev_err(dev, "NEP: unknown data 0x%02hhx\n",
-				i1480u->notif_buffer[0]);
-		}
-		break;
-	case -ECONNRESET:		/* Controlled situation ... */
-	case -ENOENT:			/* we killed the URB... */
-		dev_err(dev, "NEP: URB reset/noent %d\n", urb->status);
-		goto error;
-	case -ESHUTDOWN:		/* going away! */
-		dev_err(dev, "NEP: URB down %d\n", urb->status);
-		goto error;
-	default:			/* Retry unless it gets ugly */
-		if (edc_inc(&i1480u->notif_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "NEP: URB max acceptable errors "
-				"exceeded; resetting device\n");
-			goto error_reset;
-		}
-		dev_err(dev, "NEP: URB error %d\n", urb->status);
-		break;
-	}
-	result = usb_submit_urb(urb, GFP_ATOMIC);
-	if (result < 0) {
-		dev_err(dev, "NEP: Can't resubmit URB: %d; resetting device\n",
-			result);
-		goto error_reset;
-	}
-	return;
-
-error_reset:
-	wlp_reset_all(&i1480-wlp);
-error:
-	netif_stop_queue(i1480u->net_dev);
-	return;
-}
-#endif
-
-static const struct net_device_ops i1480u_netdev_ops = {
-	.ndo_open	= i1480u_open,
-	.ndo_stop 	= i1480u_stop,
-	.ndo_start_xmit = i1480u_hard_start_xmit,
-	.ndo_tx_timeout = i1480u_tx_timeout,
-	.ndo_set_config = i1480u_set_config,
-	.ndo_change_mtu = i1480u_change_mtu,
-};
-
-static
-int i1480u_add(struct i1480u *i1480u, struct usb_interface *iface)
-{
-	int result = -ENODEV;
-	struct wlp *wlp = &i1480u->wlp;
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-	struct net_device *net_dev = i1480u->net_dev;
-	struct uwb_rc *rc;
-	struct uwb_dev *uwb_dev;
-#ifdef i1480u_FLOW_CONTROL
-	struct usb_endpoint_descriptor *epd;
-#endif
-
-	i1480u->usb_dev = usb_get_dev(usb_dev);
-	i1480u->usb_iface = iface;
-	rc = uwb_rc_get_by_grandpa(&i1480u->usb_dev->dev);
-	if (rc == NULL) {
-		dev_err(&iface->dev, "Cannot get associated UWB Radio "
-			"Controller\n");
-		goto out;
-	}
-	wlp->xmit_frame = i1480u_xmit_frame;
-	wlp->fill_device_info = i1480u_fill_device_info;
-	wlp->stop_queue = i1480u_stop_queue;
-	wlp->start_queue = i1480u_start_queue;
-	result = wlp_setup(wlp, rc, net_dev);
-	if (result < 0) {
-		dev_err(&iface->dev, "Cannot setup WLP\n");
-		goto error_wlp_setup;
-	}
-	result = 0;
-	ether_setup(net_dev);			/* make it an etherdevice */
-	uwb_dev = &rc->uwb_dev;
-	/* FIXME: hookup address change notifications? */
-
-	memcpy(net_dev->dev_addr, uwb_dev->mac_addr.data,
-	       sizeof(net_dev->dev_addr));
-
-	net_dev->hard_header_len = sizeof(struct untd_hdr_cmp)
-		+ sizeof(struct wlp_tx_hdr)
-		+ WLP_DATA_HLEN
-		+ ETH_HLEN;
-	net_dev->mtu = 3500;
-	net_dev->tx_queue_len = 20;		/* FIXME: maybe use 1000? */
-
-/*	net_dev->flags &= ~IFF_BROADCAST;	FIXME: BUG in firmware */
-	/* FIXME: multicast disabled */
-	net_dev->flags &= ~IFF_MULTICAST;
-	net_dev->features &= ~NETIF_F_SG;
-	net_dev->features &= ~NETIF_F_FRAGLIST;
-	/* All NETIF_F_*_CSUM disabled */
-	net_dev->features |= NETIF_F_HIGHDMA;
-	net_dev->watchdog_timeo = 5*HZ;		/* FIXME: a better default? */
-
-	net_dev->netdev_ops = &i1480u_netdev_ops;
-
-#ifdef i1480u_FLOW_CONTROL
-	/* Notification endpoint setup (submitted when we open the device) */
-	i1480u->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (i1480u->notif_urb == NULL) {
-		dev_err(&iface->dev, "Unable to allocate notification URB\n");
-		result = -ENOMEM;
-		goto error_urb_alloc;
-	}
-	epd = &iface->cur_altsetting->endpoint[0].desc;
-	usb_fill_int_urb(i1480u->notif_urb, usb_dev,
-			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
-			 i1480u->notif_buffer, sizeof(i1480u->notif_buffer),
-			 i1480u_notif_cb, i1480u, epd->bInterval);
-
-#endif
-
-	i1480u->tx_inflight.max = i1480u_TX_INFLIGHT_MAX;
-	i1480u->tx_inflight.threshold = i1480u_TX_INFLIGHT_THRESHOLD;
-	i1480u->tx_inflight.restart_ts = jiffies;
-	usb_set_intfdata(iface, i1480u);
-	return result;
-
-#ifdef i1480u_FLOW_CONTROL
-error_urb_alloc:
-#endif
-	wlp_remove(wlp);
-error_wlp_setup:
-	uwb_rc_put(rc);
-out:
-	usb_put_dev(i1480u->usb_dev);
-	return result;
-}
-
-static void i1480u_rm(struct i1480u *i1480u)
-{
-	struct uwb_rc *rc = i1480u->wlp.rc;
-	usb_set_intfdata(i1480u->usb_iface, NULL);
-#ifdef i1480u_FLOW_CONTROL
-	usb_kill_urb(i1480u->notif_urb);
-	usb_free_urb(i1480u->notif_urb);
-#endif
-	wlp_remove(&i1480u->wlp);
-	uwb_rc_put(rc);
-	usb_put_dev(i1480u->usb_dev);
-}
-
-/** Just setup @net_dev's i1480u private data */
-static void i1480u_netdev_setup(struct net_device *net_dev)
-{
-	struct i1480u *i1480u = netdev_priv(net_dev);
-	/* Initialize @i1480u */
-	memset(i1480u, 0, sizeof(*i1480u));
-	i1480u_init(i1480u);
-}
-
-/**
- * Probe a i1480u interface and register it
- *
- * @iface:   USB interface to link to
- * @id:      USB class/subclass/protocol id
- * @returns: 0 if ok, < 0 errno code on error.
- *
- * Does basic housekeeping stuff and then allocs a netdev with space
- * for the i1480u  data. Initializes, registers in i1480u, registers in
- * netdev, ready to go.
- */
-static int i1480u_probe(struct usb_interface *iface,
-			const struct usb_device_id *id)
-{
-	int result;
-	struct net_device *net_dev;
-	struct device *dev = &iface->dev;
-	struct i1480u *i1480u;
-
-	/* Allocate instance [calls i1480u_netdev_setup() on it] */
-	result = -ENOMEM;
-	net_dev = alloc_netdev(sizeof(*i1480u), "wlp%d", i1480u_netdev_setup);
-	if (net_dev == NULL) {
-		dev_err(dev, "no memory for network device instance\n");
-		goto error_alloc_netdev;
-	}
-	SET_NETDEV_DEV(net_dev, dev);
-	i1480u = netdev_priv(net_dev);
-	i1480u->net_dev = net_dev;
-	result = i1480u_add(i1480u, iface);	/* Now setup all the wlp stuff */
-	if (result < 0) {
-		dev_err(dev, "cannot add i1480u device: %d\n", result);
-		goto error_i1480u_add;
-	}
-	result = register_netdev(net_dev);	/* Okey dokey, bring it up */
-	if (result < 0) {
-		dev_err(dev, "cannot register network device: %d\n", result);
-		goto error_register_netdev;
-	}
-	i1480u_sysfs_setup(i1480u);
-	if (result < 0)
-		goto error_sysfs_init;
-	return 0;
-
-error_sysfs_init:
-	unregister_netdev(net_dev);
-error_register_netdev:
-	i1480u_rm(i1480u);
-error_i1480u_add:
-	free_netdev(net_dev);
-error_alloc_netdev:
-	return result;
-}
-
-
-/**
- * Disconect a i1480u from the system.
- *
- * i1480u_stop() has been called before, so al the rx and tx contexts
- * have been taken down already. Make sure the queue is stopped,
- * unregister netdev and i1480u, free and kill.
- */
-static void i1480u_disconnect(struct usb_interface *iface)
-{
-	struct i1480u *i1480u;
-	struct net_device *net_dev;
-
-	i1480u = usb_get_intfdata(iface);
-	net_dev = i1480u->net_dev;
-	netif_stop_queue(net_dev);
-#ifdef i1480u_FLOW_CONTROL
-	usb_kill_urb(i1480u->notif_urb);
-#endif
-	i1480u_sysfs_release(i1480u);
-	unregister_netdev(net_dev);
-	i1480u_rm(i1480u);
-	free_netdev(net_dev);
-}
-
-static struct usb_device_id i1480u_id_table[] = {
-	{
-		.match_flags = USB_DEVICE_ID_MATCH_DEVICE \
-				|  USB_DEVICE_ID_MATCH_DEV_INFO \
-				|  USB_DEVICE_ID_MATCH_INT_INFO,
-		.idVendor = 0x8086,
-		.idProduct = 0x0c3b,
-		.bDeviceClass = 0xef,
-		.bDeviceSubClass = 0x02,
-		.bDeviceProtocol = 0x02,
-		.bInterfaceClass = 0xff,
-		.bInterfaceSubClass = 0xff,
-		.bInterfaceProtocol = 0xff,
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(usb, i1480u_id_table);
-
-static struct usb_driver i1480u_driver = {
-	.name =		KBUILD_MODNAME,
-	.probe =	i1480u_probe,
-	.disconnect =	i1480u_disconnect,
-	.id_table =	i1480u_id_table,
-};
-
-static int __init i1480u_driver_init(void)
-{
-	return usb_register(&i1480u_driver);
-}
-module_init(i1480u_driver_init);
-
-
-static void __exit i1480u_driver_exit(void)
-{
-	usb_deregister(&i1480u_driver);
-}
-module_exit(i1480u_driver_exit);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("i1480 Wireless UWB Link WLP networking for USB");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
deleted file mode 100644
index f98f6ce8b9e7..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Driver for the Linux Network stack.
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- *
- * Implementation of the netdevice linkage (except tx and rx related stuff).
- *
- * ROADMAP:
- *
- *   ENTRY POINTS (Net device):
- *
- *     i1480u_open(): Called when we ifconfig up the interface;
- *                    associates to a UWB host controller, reserves
- *                    bandwidth (MAS), sets up RX USB URB and starts
- *                    the queue.
- *
- *     i1480u_stop(): Called when we ifconfig down a interface;
- *                    reverses _open().
- *
- *     i1480u_set_config():
- */
-
-#include <linux/slab.h>
-#include <linux/if_arp.h>
-#include <linux/etherdevice.h>
-
-#include "i1480u-wlp.h"
-
-struct i1480u_cmd_set_ip_mas {
-	struct uwb_rccb     rccb;
-	struct uwb_dev_addr addr;
-	u8                  stream;
-	u8                  owner;
-	u8                  type;	/* enum uwb_drp_type */
-	u8                  baMAS[32];
-} __attribute__((packed));
-
-
-static
-int i1480u_set_ip_mas(
-	struct uwb_rc *rc,
-	const struct uwb_dev_addr *dstaddr,
-	u8 stream, u8 owner, u8 type, unsigned long *mas)
-{
-
-	int result;
-	struct i1480u_cmd_set_ip_mas *cmd;
-	struct uwb_rc_evt_confirm reply;
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_kzalloc;
-	cmd->rccb.bCommandType = 0xfd;
-	cmd->rccb.wCommand = cpu_to_le16(0x000e);
-	cmd->addr = *dstaddr;
-	cmd->stream = stream;
-	cmd->owner = owner;
-	cmd->type = type;
-	if (mas == NULL)
-		memset(cmd->baMAS, 0x00, sizeof(cmd->baMAS));
-	else
-		memcpy(cmd->baMAS, mas, sizeof(cmd->baMAS));
-	reply.rceb.bEventType = 0xfd;
-	reply.rceb.wEvent = cpu_to_le16(0x000e);
-	result = uwb_rc_cmd(rc, "SET-IP-MAS", &cmd->rccb, sizeof(*cmd),
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	if (reply.bResultCode != UWB_RC_RES_FAIL) {
-		dev_err(&rc->uwb_dev.dev,
-			"SET-IP-MAS: command execution failed: %d\n",
-			reply.bResultCode);
-		result = -EIO;
-	}
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	return result;
-}
-
-/*
- * Inform a WLP interface of a MAS reservation
- *
- * @rc is assumed refcnted.
- */
-/* FIXME: detect if remote device is WLP capable? */
-static int i1480u_mas_set_dev(struct uwb_dev *uwb_dev, struct uwb_rc *rc,
-			      u8 stream, u8 owner, u8 type, unsigned long *mas)
-{
-	int result = 0;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	result = i1480u_set_ip_mas(rc, &uwb_dev->dev_addr, stream, owner,
-				   type, mas);
-	if (result < 0) {
-		char rcaddrbuf[UWB_ADDR_STRSIZE], devaddrbuf[UWB_ADDR_STRSIZE];
-		uwb_dev_addr_print(rcaddrbuf, sizeof(rcaddrbuf),
-				   &rc->uwb_dev.dev_addr);
-		uwb_dev_addr_print(devaddrbuf, sizeof(devaddrbuf),
-				   &uwb_dev->dev_addr);
-		dev_err(dev, "Set IP MAS (%s to %s) failed: %d\n",
-			rcaddrbuf, devaddrbuf, result);
-	}
-	return result;
-}
-
-/**
- * Called by bandwidth allocator when change occurs in reservation.
- *
- * @rsv:     The reservation that is being established, modified, or
- *           terminated.
- *
- * When a reservation is established, modified, or terminated the upper layer
- * (WLP here) needs set/update the currently available Media Access Slots
- * that can be use for IP traffic.
- *
- * Our action taken during failure depends on how the reservation is being
- * changed:
- * - if reservation is being established we do nothing if we cannot set the
- *   new MAS to be used
- * - if reservation is being terminated we revert back to PCA whether the
- *   SET IP MAS command succeeds or not.
- */
-void i1480u_bw_alloc_cb(struct uwb_rsv *rsv)
-{
-	int result = 0;
-	struct i1480u *i1480u = rsv->pal_priv;
-	struct device *dev = &i1480u->usb_iface->dev;
-	struct uwb_dev *target_dev = rsv->target.dev;
-	struct uwb_rc *rc = i1480u->wlp.rc;
-	u8 stream = rsv->stream;
-	int type = rsv->type;
-	int is_owner = rsv->owner == &rc->uwb_dev;
-	unsigned long *bmp = rsv->mas.bm;
-
-	dev_err(dev, "WLP callback called - sending set ip mas\n");
-	/*user cannot change options while setting configuration*/
-	mutex_lock(&i1480u->options.mutex);
-	switch (rsv->state) {
-	case UWB_RSV_STATE_T_ACCEPTED:
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		result = i1480u_mas_set_dev(target_dev, rc, stream, is_owner,
-					type, bmp);
-		if (result < 0) {
-			dev_err(dev, "MAS reservation failed: %d\n", result);
-			goto out;
-		}
-		if (is_owner) {
-			wlp_tx_hdr_set_delivery_id_type(&i1480u->options.def_tx_hdr,
-							WLP_DRP | stream);
-			wlp_tx_hdr_set_rts_cts(&i1480u->options.def_tx_hdr, 0);
-		}
-		break;
-	case UWB_RSV_STATE_NONE:
-		/* revert back to PCA */
-		result = i1480u_mas_set_dev(target_dev, rc, stream, is_owner,
-					    type, bmp);
-		if (result < 0)
-			dev_err(dev, "MAS reservation failed: %d\n", result);
-		/* Revert to PCA even though SET IP MAS failed. */
-		wlp_tx_hdr_set_delivery_id_type(&i1480u->options.def_tx_hdr,
-						i1480u->options.pca_base_priority);
-		wlp_tx_hdr_set_rts_cts(&i1480u->options.def_tx_hdr, 1);
-		break;
-	default:
-		dev_err(dev, "unexpected WLP reservation state: %s (%d).\n",
-			uwb_rsv_state_str(rsv->state), rsv->state);
-		break;
-	}
-out:
-	mutex_unlock(&i1480u->options.mutex);
-	return;
-}
-
-/**
- *
- * Called on 'ifconfig up'
- */
-int i1480u_open(struct net_device *net_dev)
-{
-	int result;
-	struct i1480u *i1480u = netdev_priv(net_dev);
-	struct wlp *wlp = &i1480u->wlp;
-	struct uwb_rc *rc;
-	struct device *dev = &i1480u->usb_iface->dev;
-
-	rc = wlp->rc;
-	result = i1480u_rx_setup(i1480u);		/* Alloc RX stuff */
-	if (result < 0)
-		goto error_rx_setup;
-
-	result = uwb_radio_start(&wlp->pal);
-	if (result < 0)
-		goto error_radio_start;
-
-	netif_wake_queue(net_dev);
-#ifdef i1480u_FLOW_CONTROL
-	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "Can't submit notification URB: %d\n", result);
-		goto error_notif_urb_submit;
-	}
-#endif
-	/* Interface is up with an address, now we can create WSS */
-	result = wlp_wss_setup(net_dev, &wlp->wss);
-	if (result < 0) {
-		dev_err(dev, "Can't create WSS: %d. \n", result);
-		goto error_wss_setup;
-	}
-	return 0;
-error_wss_setup:
-#ifdef i1480u_FLOW_CONTROL
-	usb_kill_urb(i1480u->notif_urb);
-error_notif_urb_submit:
-#endif
-	uwb_radio_stop(&wlp->pal);
-error_radio_start:
-	netif_stop_queue(net_dev);
-	i1480u_rx_release(i1480u);
-error_rx_setup:
-	return result;
-}
-
-
-/**
- * Called on 'ifconfig down'
- */
-int i1480u_stop(struct net_device *net_dev)
-{
-	struct i1480u *i1480u = netdev_priv(net_dev);
-	struct wlp *wlp = &i1480u->wlp;
-
-	BUG_ON(wlp->rc == NULL);
-	wlp_wss_remove(&wlp->wss);
-	netif_carrier_off(net_dev);
-#ifdef i1480u_FLOW_CONTROL
-	usb_kill_urb(i1480u->notif_urb);
-#endif
-	netif_stop_queue(net_dev);
-	uwb_radio_stop(&wlp->pal);
-	i1480u_rx_release(i1480u);
-	i1480u_tx_release(i1480u);
-	return 0;
-}
-
-/**
- *
- * Change the interface config--we probably don't have to do anything.
- */
-int i1480u_set_config(struct net_device *net_dev, struct ifmap *map)
-{
-	int result;
-	struct i1480u *i1480u = netdev_priv(net_dev);
-	BUG_ON(i1480u->wlp.rc == NULL);
-	result = 0;
-	return result;
-}
-
-/**
- * Change the MTU of the interface
- */
-int i1480u_change_mtu(struct net_device *net_dev, int mtu)
-{
-	static union {
-		struct wlp_tx_hdr tx;
-		struct wlp_rx_hdr rx;
-	} i1480u_all_hdrs;
-
-	if (mtu < ETH_HLEN)	/* We encap eth frames */
-		return -ERANGE;
-	if (mtu > 4000 - sizeof(i1480u_all_hdrs))
-		return -ERANGE;
-	net_dev->mtu = mtu;
-	return 0;
-}
-
-/**
- * Stop the network queue
- *
- * Enable WLP substack to stop network queue. We also set the flow control
- * threshold at this time to prevent the flow control from restarting the
- * queue.
- *
- * we are loosing the current threshold value here ... FIXME?
- */
-void i1480u_stop_queue(struct wlp *wlp)
-{
-	struct i1480u *i1480u = container_of(wlp, struct i1480u, wlp);
-	struct net_device *net_dev = i1480u->net_dev;
-	i1480u->tx_inflight.threshold = 0;
-	netif_stop_queue(net_dev);
-}
-
-/**
- * Start the network queue
- *
- * Enable WLP substack to start network queue. Also re-enable the flow
- * control to manage the queue again.
- *
- * We re-enable the flow control by storing the default threshold in the
- * flow control threshold. This means that if the user modified the
- * threshold before the queue was stopped and restarted that information
- * will be lost. FIXME?
- */
-void i1480u_start_queue(struct wlp *wlp)
-{
-	struct i1480u *i1480u = container_of(wlp, struct i1480u, wlp);
-	struct net_device *net_dev = i1480u->net_dev;
-	i1480u->tx_inflight.threshold = i1480u_TX_INFLIGHT_THRESHOLD;
-	netif_start_queue(net_dev);
-}
diff --git a/drivers/uwb/i1480/i1480u-wlp/rx.c b/drivers/uwb/i1480/i1480u-wlp/rx.c
deleted file mode 100644
index d4e51e108aa4..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/rx.c
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Driver for the Linux Network stack.
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * i1480u's RX handling is simple. i1480u will send the received
- * network packets broken up in fragments; 1 to N fragments make a
- * packet, we assemble them together and deliver the packet with netif_rx().
- *
- * Beacuse each USB transfer is a *single* fragment (except when the
- * transfer contains a first fragment), each URB called thus
- * back contains one or two fragments. So we queue N URBs, each with its own
- * fragment buffer. When a URB is done, we process it (adding to the
- * current skb from the fragment buffer until complete). Once
- * processed, we requeue the URB. There is always a bunch of URBs
- * ready to take data, so the intergap should be minimal.
- *
- * An URB's transfer buffer is the data field of a socket buffer. This
- * reduces copying as data can be passed directly to network layer. If a
- * complete packet or 1st fragment is received the URB's transfer buffer is
- * taken away from it and used to send data to the network layer. In this
- * case a new transfer buffer is allocated to the URB before being requeued.
- * If a "NEXT" or "LAST" fragment is received, the fragment contents is
- * appended to the RX packet under construction and the transfer buffer
- * is reused. To be able to use this buffer to assemble complete packets
- * we set each buffer's size to that of the MAX ethernet packet that can
- * be received. There is thus room for improvement in memory usage.
- *
- * When the max tx fragment size increases, we should be able to read
- * data into the skbs directly with very simple code.
- *
- * ROADMAP:
- *
- *   ENTRY POINTS:
- *
- *     i1480u_rx_setup(): setup RX context [from i1480u_open()]
- *
- *     i1480u_rx_release(): release RX context [from i1480u_stop()]
- *
- *     i1480u_rx_cb(): called when the RX USB URB receives a
- *                     packet. It removes the header and pushes it up
- *                     the Linux netdev stack with netif_rx().
- *
- *       i1480u_rx_buffer()
- *         i1480u_drop() and i1480u_fix()
- *         i1480u_skb_deliver
- *
- */
-
-#include <linux/gfp.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include "i1480u-wlp.h"
-
-/*
- * Setup the RX context
- *
- * Each URB is provided with a transfer_buffer that is the data field
- * of a new socket buffer.
- */
-int i1480u_rx_setup(struct i1480u *i1480u)
-{
-	int result, cnt;
-	struct device *dev = &i1480u->usb_iface->dev;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct usb_endpoint_descriptor *epd;
-	struct sk_buff *skb;
-
-	/* Alloc RX stuff */
-	i1480u->rx_skb = NULL;	/* not in process of receiving packet */
-	result = -ENOMEM;
-	epd = &i1480u->usb_iface->cur_altsetting->endpoint[1].desc;
-	for (cnt = 0; cnt < i1480u_RX_BUFS; cnt++) {
-		struct i1480u_rx_buf *rx_buf = &i1480u->rx_buf[cnt];
-		rx_buf->i1480u = i1480u;
-		skb = dev_alloc_skb(i1480u_MAX_RX_PKT_SIZE);
-		if (!skb) {
-			dev_err(dev,
-				"RX: cannot allocate RX buffer %d\n", cnt);
-			result = -ENOMEM;
-			goto error;
-		}
-		skb->dev = net_dev;
-		skb->ip_summed = CHECKSUM_NONE;
-		skb_reserve(skb, 2);
-		rx_buf->data = skb;
-		rx_buf->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (unlikely(rx_buf->urb == NULL)) {
-			dev_err(dev, "RX: cannot allocate URB %d\n", cnt);
-			result = -ENOMEM;
-			goto error;
-		}
-		usb_fill_bulk_urb(rx_buf->urb, i1480u->usb_dev,
-			  usb_rcvbulkpipe(i1480u->usb_dev, epd->bEndpointAddress),
-			  rx_buf->data->data, i1480u_MAX_RX_PKT_SIZE - 2,
-			  i1480u_rx_cb, rx_buf);
-		result = usb_submit_urb(rx_buf->urb, GFP_NOIO);
-		if (unlikely(result < 0)) {
-			dev_err(dev, "RX: cannot submit URB %d: %d\n",
-				cnt, result);
-			goto error;
-		}
-	}
-	return 0;
-
-error:
-	i1480u_rx_release(i1480u);
-	return result;
-}
-
-
-/* Release resources associated to the rx context */
-void i1480u_rx_release(struct i1480u *i1480u)
-{
-	int cnt;
-	for (cnt = 0; cnt < i1480u_RX_BUFS; cnt++) {
-		if (i1480u->rx_buf[cnt].data)
-			dev_kfree_skb(i1480u->rx_buf[cnt].data);
-		if (i1480u->rx_buf[cnt].urb) {
-			usb_kill_urb(i1480u->rx_buf[cnt].urb);
-			usb_free_urb(i1480u->rx_buf[cnt].urb);
-		}
-	}
-	if (i1480u->rx_skb != NULL)
-		dev_kfree_skb(i1480u->rx_skb);
-}
-
-static
-void i1480u_rx_unlink_urbs(struct i1480u *i1480u)
-{
-	int cnt;
-	for (cnt = 0; cnt < i1480u_RX_BUFS; cnt++) {
-		if (i1480u->rx_buf[cnt].urb)
-			usb_unlink_urb(i1480u->rx_buf[cnt].urb);
-	}
-}
-
-/* Fix an out-of-sequence packet */
-#define i1480u_fix(i1480u, msg...)			\
-do {							\
-	if (printk_ratelimit())				\
-		dev_err(&i1480u->usb_iface->dev, msg);	\
-	dev_kfree_skb_irq(i1480u->rx_skb);		\
-	i1480u->rx_skb = NULL;				\
-	i1480u->rx_untd_pkt_size = 0;			\
-} while (0)
-
-
-/* Drop an out-of-sequence packet */
-#define i1480u_drop(i1480u, msg...)			\
-do {							\
-	if (printk_ratelimit())				\
-		dev_err(&i1480u->usb_iface->dev, msg);	\
-	i1480u->net_dev->stats.rx_dropped++;			\
-} while (0)
-
-
-
-
-/* Finalizes setting up the SKB and delivers it
- *
- * We first pass the incoming frame to WLP substack for verification. It
- * may also be a WLP association frame in which case WLP will take over the
- * processing. If WLP does not take it over it will still verify it, if the
- * frame is invalid the skb will be freed by WLP and we will not continue
- * parsing.
- * */
-static
-void i1480u_skb_deliver(struct i1480u *i1480u)
-{
-	int should_parse;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-
-	should_parse = wlp_receive_frame(dev, &i1480u->wlp, i1480u->rx_skb,
-					 &i1480u->rx_srcaddr);
-	if (!should_parse)
-		goto out;
-	i1480u->rx_skb->protocol = eth_type_trans(i1480u->rx_skb, net_dev);
-	net_dev->stats.rx_packets++;
-	net_dev->stats.rx_bytes += i1480u->rx_untd_pkt_size;
-
-	netif_rx(i1480u->rx_skb);		/* deliver */
-out:
-	i1480u->rx_skb = NULL;
-	i1480u->rx_untd_pkt_size = 0;
-}
-
-
-/*
- * Process a buffer of data received from the USB RX endpoint
- *
- * First fragment arrives with next or last fragment. All other fragments
- * arrive alone.
- *
- * /me hates long functions.
- */
-static
-void i1480u_rx_buffer(struct i1480u_rx_buf *rx_buf)
-{
-	unsigned pkt_completed = 0;	/* !0 when we got all pkt fragments */
-	size_t untd_hdr_size, untd_frg_size;
-	size_t i1480u_hdr_size;
-	struct wlp_rx_hdr *i1480u_hdr = NULL;
-
-	struct i1480u *i1480u = rx_buf->i1480u;
-	struct sk_buff *skb = rx_buf->data;
-	int size_left = rx_buf->urb->actual_length;
-	void *ptr = rx_buf->urb->transfer_buffer; /* also rx_buf->data->data */
-	struct untd_hdr *untd_hdr;
-
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-	struct sk_buff *new_skb;
-
-#if 0
-	dev_fnstart(dev,
-		    "(i1480u %p ptr %p size_left %zu)\n", i1480u, ptr, size_left);
-	dev_err(dev, "RX packet, %zu bytes\n", size_left);
-	dump_bytes(dev, ptr, size_left);
-#endif
-	i1480u_hdr_size = sizeof(struct wlp_rx_hdr);
-
-	while (size_left > 0) {
-		if (pkt_completed) {
-			i1480u_drop(i1480u, "RX: fragment follows completed"
-					 "packet in same buffer. Dropping\n");
-			break;
-		}
-		untd_hdr = ptr;
-		if (size_left < sizeof(*untd_hdr)) {	/*  Check the UNTD header */
-			i1480u_drop(i1480u, "RX: short UNTD header! Dropping\n");
-			goto out;
-		}
-		if (unlikely(untd_hdr_rx_tx(untd_hdr) == 0)) {	/* Paranoia: TX set? */
-			i1480u_drop(i1480u, "RX: TX bit set! Dropping\n");
-			goto out;
-		}
-		switch (untd_hdr_type(untd_hdr)) {	/* Check the UNTD header type */
-		case i1480u_PKT_FRAG_1ST: {
-			struct untd_hdr_1st *untd_hdr_1st = (void *) untd_hdr;
-			dev_dbg(dev, "1st fragment\n");
-			untd_hdr_size = sizeof(struct untd_hdr_1st);
-			if (i1480u->rx_skb != NULL)
-				i1480u_fix(i1480u, "RX: 1st fragment out of "
-					"sequence! Fixing\n");
-			if (size_left < untd_hdr_size + i1480u_hdr_size) {
-				i1480u_drop(i1480u, "RX: short 1st fragment! "
-					"Dropping\n");
-				goto out;
-			}
-			i1480u->rx_untd_pkt_size = le16_to_cpu(untd_hdr->len)
-						 - i1480u_hdr_size;
-			untd_frg_size = le16_to_cpu(untd_hdr_1st->fragment_len);
-			if (size_left < untd_hdr_size + untd_frg_size) {
-				i1480u_drop(i1480u,
-					    "RX: short payload! Dropping\n");
-				goto out;
-			}
-			i1480u->rx_skb = skb;
-			i1480u_hdr = (void *) untd_hdr_1st + untd_hdr_size;
-			i1480u->rx_srcaddr = i1480u_hdr->srcaddr;
-			skb_put(i1480u->rx_skb, untd_hdr_size + untd_frg_size);
-			skb_pull(i1480u->rx_skb, untd_hdr_size + i1480u_hdr_size);
-			stats_add_sample(&i1480u->lqe_stats, (s8) i1480u_hdr->LQI - 7);
-			stats_add_sample(&i1480u->rssi_stats, i1480u_hdr->RSSI + 18);
-			rx_buf->data = NULL; /* need to create new buffer */
-			break;
-		}
-		case i1480u_PKT_FRAG_NXT: {
-			dev_dbg(dev, "nxt fragment\n");
-			untd_hdr_size = sizeof(struct untd_hdr_rst);
-			if (i1480u->rx_skb == NULL) {
-				i1480u_drop(i1480u, "RX: next fragment out of "
-					    "sequence! Dropping\n");
-				goto out;
-			}
-			if (size_left < untd_hdr_size) {
-				i1480u_drop(i1480u, "RX: short NXT fragment! "
-					    "Dropping\n");
-				goto out;
-			}
-			untd_frg_size = le16_to_cpu(untd_hdr->len);
-			if (size_left < untd_hdr_size + untd_frg_size) {
-				i1480u_drop(i1480u,
-					    "RX: short payload! Dropping\n");
-				goto out;
-			}
-			memmove(skb_put(i1480u->rx_skb, untd_frg_size),
-					ptr + untd_hdr_size, untd_frg_size);
-			break;
-		}
-		case i1480u_PKT_FRAG_LST: {
-			dev_dbg(dev, "Lst fragment\n");
-			untd_hdr_size = sizeof(struct untd_hdr_rst);
-			if (i1480u->rx_skb == NULL) {
-				i1480u_drop(i1480u, "RX: last fragment out of "
-					    "sequence! Dropping\n");
-				goto out;
-			}
-			if (size_left < untd_hdr_size) {
-				i1480u_drop(i1480u, "RX: short LST fragment! "
-					    "Dropping\n");
-				goto out;
-			}
-			untd_frg_size = le16_to_cpu(untd_hdr->len);
-			if (size_left < untd_frg_size + untd_hdr_size) {
-				i1480u_drop(i1480u,
-					    "RX: short payload! Dropping\n");
-				goto out;
-			}
-			memmove(skb_put(i1480u->rx_skb, untd_frg_size),
-					ptr + untd_hdr_size, untd_frg_size);
-			pkt_completed = 1;
-			break;
-		}
-		case i1480u_PKT_FRAG_CMP: {
-			dev_dbg(dev, "cmp fragment\n");
-			untd_hdr_size = sizeof(struct untd_hdr_cmp);
-			if (i1480u->rx_skb != NULL)
-				i1480u_fix(i1480u, "RX: fix out-of-sequence CMP"
-					   " fragment!\n");
-			if (size_left < untd_hdr_size + i1480u_hdr_size) {
-				i1480u_drop(i1480u, "RX: short CMP fragment! "
-					    "Dropping\n");
-				goto out;
-			}
-			i1480u->rx_untd_pkt_size = le16_to_cpu(untd_hdr->len);
-			untd_frg_size = i1480u->rx_untd_pkt_size;
-			if (size_left < i1480u->rx_untd_pkt_size + untd_hdr_size) {
-				i1480u_drop(i1480u,
-					    "RX: short payload! Dropping\n");
-				goto out;
-			}
-			i1480u->rx_skb = skb;
-			i1480u_hdr = (void *) untd_hdr + untd_hdr_size;
-			i1480u->rx_srcaddr = i1480u_hdr->srcaddr;
-			stats_add_sample(&i1480u->lqe_stats, (s8) i1480u_hdr->LQI - 7);
-			stats_add_sample(&i1480u->rssi_stats, i1480u_hdr->RSSI + 18);
-			skb_put(i1480u->rx_skb, untd_hdr_size + i1480u->rx_untd_pkt_size);
-			skb_pull(i1480u->rx_skb, untd_hdr_size + i1480u_hdr_size);
-			rx_buf->data = NULL;	/* for hand off skb to network stack */
-			pkt_completed = 1;
-			i1480u->rx_untd_pkt_size -= i1480u_hdr_size; /* accurate stat */
-			break;
-		}
-		default:
-			i1480u_drop(i1480u, "RX: unknown packet type %u! "
-				    "Dropping\n", untd_hdr_type(untd_hdr));
-			goto out;
-		}
-		size_left -= untd_hdr_size + untd_frg_size;
-		if (size_left > 0)
-			ptr += untd_hdr_size + untd_frg_size;
-	}
-	if (pkt_completed)
-		i1480u_skb_deliver(i1480u);
-out:
-	/* recreate needed RX buffers*/
-	if (rx_buf->data == NULL) {
-		/* buffer is being used to receive packet, create new */
-		new_skb = dev_alloc_skb(i1480u_MAX_RX_PKT_SIZE);
-		if (!new_skb) {
-			if (printk_ratelimit())
-				dev_err(dev,
-				"RX: cannot allocate RX buffer\n");
-		} else {
-			new_skb->dev = net_dev;
-			new_skb->ip_summed = CHECKSUM_NONE;
-			skb_reserve(new_skb, 2);
-			rx_buf->data = new_skb;
-		}
-	}
-	return;
-}
-
-
-/*
- * Called when an RX URB has finished receiving or has found some kind
- * of error condition.
- *
- * LIMITATIONS:
- *
- *  - We read USB-transfers, each transfer contains a SINGLE fragment
- *    (can contain a complete packet, or a 1st, next, or last fragment
- *    of a packet).
- *    Looks like a transfer can contain more than one fragment (07/18/06)
- *
- *  - Each transfer buffer is the size of the maximum packet size (minus
- *    headroom), i1480u_MAX_PKT_SIZE - 2
- *
- *  - We always read the full USB-transfer, no partials.
- *
- *  - Each transfer is read directly into a skb. This skb will be used to
- *    send data to the upper layers if it is the first fragment or a complete
- *    packet. In the other cases the data will be copied from the skb to
- *    another skb that is being prepared for the upper layers from a prev
- *    first fragment.
- *
- * It is simply too much of a pain. Gosh, there should be a unified
- * SG infrastructure for *everything* [so that I could declare a SG
- * buffer, pass it to USB for receiving, append some space to it if
- * I wish, receive more until I have the whole chunk, adapt
- * pointers on each fragment to remove hardware headers and then
- * attach that to an skbuff and netif_rx()].
- */
-void i1480u_rx_cb(struct urb *urb)
-{
-	int result;
-	int do_parse_buffer = 1;
-	struct i1480u_rx_buf *rx_buf = urb->context;
-	struct i1480u *i1480u = rx_buf->i1480u;
-	struct device *dev = &i1480u->usb_iface->dev;
-	unsigned long flags;
-	u8 rx_buf_idx = rx_buf - i1480u->rx_buf;
-
-	switch (urb->status) {
-	case 0:
-		break;
-	case -ECONNRESET:	/* Not an error, but a controlled situation; */
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-	case -ESHUTDOWN:	/* going away! */
-		dev_err(dev, "RX URB[%u]: goind down %d\n",
-			rx_buf_idx, urb->status);
-		goto error;
-	default:
-		dev_err(dev, "RX URB[%u]: unknown status %d\n",
-			rx_buf_idx, urb->status);
-		if (edc_inc(&i1480u->rx_errors, EDC_MAX_ERRORS,
-					EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "RX: max acceptable errors exceeded,"
-					" resetting device.\n");
-			i1480u_rx_unlink_urbs(i1480u);
-			wlp_reset_all(&i1480u->wlp);
-			goto error;
-		}
-		do_parse_buffer = 0;
-		break;
-	}
-	spin_lock_irqsave(&i1480u->lock, flags);
-	/* chew the data fragments, extract network packets */
-	if (do_parse_buffer) {
-		i1480u_rx_buffer(rx_buf);
-		if (rx_buf->data) {
-			rx_buf->urb->transfer_buffer = rx_buf->data->data;
-			result = usb_submit_urb(rx_buf->urb, GFP_ATOMIC);
-			if (result < 0) {
-				dev_err(dev, "RX URB[%u]: cannot submit %d\n",
-					rx_buf_idx, result);
-			}
-		}
-	}
-	spin_unlock_irqrestore(&i1480u->lock, flags);
-error:
-	return;
-}
-
diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
deleted file mode 100644
index 4ffaf546cc6c..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Sysfs interfaces
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- */
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/device.h>
-
-#include "i1480u-wlp.h"
-
-
-/**
- *
- * @dev: Class device from the net_device; assumed refcnted.
- *
- * Yes, I don't lock--we assume it is refcounted and I am getting a
- * single byte value that is kind of atomic to read.
- */
-ssize_t uwb_phy_rate_show(const struct wlp_options *options, char *buf)
-{
-	return sprintf(buf, "%u\n",
-		       wlp_tx_hdr_phy_rate(&options->def_tx_hdr));
-}
-EXPORT_SYMBOL_GPL(uwb_phy_rate_show);
-
-
-ssize_t uwb_phy_rate_store(struct wlp_options *options,
-			   const char *buf, size_t size)
-{
-	ssize_t result;
-	unsigned rate;
-
-	result = sscanf(buf, "%u\n", &rate);
-	if (result != 1) {
-		result = -EINVAL;
-		goto out;
-	}
-	result = -EINVAL;
-	if (rate >= UWB_PHY_RATE_INVALID)
-		goto out;
-	wlp_tx_hdr_set_phy_rate(&options->def_tx_hdr, rate);
-	result = 0;
-out:
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(uwb_phy_rate_store);
-
-
-ssize_t uwb_rts_cts_show(const struct wlp_options *options, char *buf)
-{
-	return sprintf(buf, "%u\n",
-		       wlp_tx_hdr_rts_cts(&options->def_tx_hdr));
-}
-EXPORT_SYMBOL_GPL(uwb_rts_cts_show);
-
-
-ssize_t uwb_rts_cts_store(struct wlp_options *options,
-			  const char *buf, size_t size)
-{
-	ssize_t result;
-	unsigned value;
-
-	result = sscanf(buf, "%u\n", &value);
-	if (result != 1) {
-		result = -EINVAL;
-		goto out;
-	}
-	result = -EINVAL;
-	wlp_tx_hdr_set_rts_cts(&options->def_tx_hdr, !!value);
-	result = 0;
-out:
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(uwb_rts_cts_store);
-
-
-ssize_t uwb_ack_policy_show(const struct wlp_options *options, char *buf)
-{
-	return sprintf(buf, "%u\n",
-		       wlp_tx_hdr_ack_policy(&options->def_tx_hdr));
-}
-EXPORT_SYMBOL_GPL(uwb_ack_policy_show);
-
-
-ssize_t uwb_ack_policy_store(struct wlp_options *options,
-			     const char *buf, size_t size)
-{
-	ssize_t result;
-	unsigned value;
-
-	result = sscanf(buf, "%u\n", &value);
-	if (result != 1 || value > UWB_ACK_B_REQ) {
-		result = -EINVAL;
-		goto out;
-	}
-	wlp_tx_hdr_set_ack_policy(&options->def_tx_hdr, value);
-	result = 0;
-out:
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(uwb_ack_policy_store);
-
-
-/**
- * Show the PCA base priority.
- *
- * We can access without locking, as the value is (for now) orthogonal
- * to other values.
- */
-ssize_t uwb_pca_base_priority_show(const struct wlp_options *options,
-				   char *buf)
-{
-	return sprintf(buf, "%u\n",
-		       options->pca_base_priority);
-}
-EXPORT_SYMBOL_GPL(uwb_pca_base_priority_show);
-
-
-/**
- * Set the PCA base priority.
- *
- * We can access without locking, as the value is (for now) orthogonal
- * to other values.
- */
-ssize_t uwb_pca_base_priority_store(struct wlp_options *options,
-				    const char *buf, size_t size)
-{
-	ssize_t result = -EINVAL;
-	u8 pca_base_priority;
-
-	result = sscanf(buf, "%hhu\n", &pca_base_priority);
-	if (result != 1) {
-		result = -EINVAL;
-		goto out;
-	}
-	result = -EINVAL;
-	if (pca_base_priority >= 8)
-		goto out;
-	options->pca_base_priority = pca_base_priority;
-	/* Update TX header if we are currently using PCA. */
-	if (result >= 0 && (wlp_tx_hdr_delivery_id_type(&options->def_tx_hdr) & WLP_DRP) == 0)
-		wlp_tx_hdr_set_delivery_id_type(&options->def_tx_hdr, options->pca_base_priority);
-	result = 0;
-out:
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(uwb_pca_base_priority_store);
-
-/**
- * Show current inflight values
- *
- * Will print the current MAX and THRESHOLD values for the basic flow
- * control. In addition it will report how many times the TX queue needed
- * to be restarted since the last time this query was made.
- */
-static ssize_t wlp_tx_inflight_show(struct i1480u_tx_inflight *inflight,
-				    char *buf)
-{
-	ssize_t result;
-	unsigned long sec_elapsed = (jiffies - inflight->restart_ts)/HZ;
-	unsigned long restart_count = atomic_read(&inflight->restart_count);
-
-	result = scnprintf(buf, PAGE_SIZE, "%lu %lu %d %lu %lu %lu\n"
-			   "#read: threshold max inflight_count restarts "
-			   "seconds restarts/sec\n"
-			   "#write: threshold max\n",
-			   inflight->threshold, inflight->max,
-			   atomic_read(&inflight->count),
-			   restart_count, sec_elapsed,
-			   sec_elapsed == 0 ? 0 : restart_count/sec_elapsed);
-	inflight->restart_ts = jiffies;
-	atomic_set(&inflight->restart_count, 0);
-	return result;
-}
-
-static
-ssize_t wlp_tx_inflight_store(struct i1480u_tx_inflight *inflight,
-				const char *buf, size_t size)
-{
-	unsigned long in_threshold, in_max;
-	ssize_t result;
-	result = sscanf(buf, "%lu %lu", &in_threshold, &in_max);
-	if (result != 2)
-		return -EINVAL;
-	if (in_max <= in_threshold)
-		return -EINVAL;
-	inflight->max = in_max;
-	inflight->threshold = in_threshold;
-	return size;
-}
-/*
- * Glue (or function adaptors) for accesing info on sysfs
- *
- * [we need this indirection because the PCI driver does almost the
- * same]
- *
- * Linux 2.6.21 changed how 'struct netdevice' does attributes (from
- * having a 'struct class_dev' to having a 'struct device'). That is
- * quite of a pain.
- *
- * So we try to abstract that here. i1480u_SHOW() and i1480u_STORE()
- * create adaptors for extracting the 'struct i1480u' from a 'struct
- * dev' and calling a function for doing a sysfs operation (as we have
- * them factorized already). i1480u_ATTR creates the attribute file
- * (CLASS_DEVICE_ATTR or DEVICE_ATTR) and i1480u_ATTR_NAME produces a
- * class_device_attr_NAME or device_attr_NAME (for group registration).
- */
-
-#define i1480u_SHOW(name, fn, param)				\
-static ssize_t i1480u_show_##name(struct device *dev,		\
-				  struct device_attribute *attr,\
-				  char *buf)			\
-{								\
-	struct i1480u *i1480u = netdev_priv(to_net_dev(dev));	\
-	return fn(&i1480u->param, buf);				\
-}
-
-#define i1480u_STORE(name, fn, param)				\
-static ssize_t i1480u_store_##name(struct device *dev,		\
-				   struct device_attribute *attr,\
-				   const char *buf, size_t size)\
-{								\
-	struct i1480u *i1480u = netdev_priv(to_net_dev(dev));	\
-	return fn(&i1480u->param, buf, size);			\
-}
-
-#define i1480u_ATTR(name, perm) static DEVICE_ATTR(name, perm,  \
-					     i1480u_show_##name,\
-					     i1480u_store_##name)
-
-#define i1480u_ATTR_SHOW(name) static DEVICE_ATTR(name,		\
-					S_IRUGO,		\
-					i1480u_show_##name, NULL)
-
-#define i1480u_ATTR_NAME(a) (dev_attr_##a)
-
-
-/*
- * Sysfs adaptors
- */
-i1480u_SHOW(uwb_phy_rate, uwb_phy_rate_show, options);
-i1480u_STORE(uwb_phy_rate, uwb_phy_rate_store, options);
-i1480u_ATTR(uwb_phy_rate, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(uwb_rts_cts, uwb_rts_cts_show, options);
-i1480u_STORE(uwb_rts_cts, uwb_rts_cts_store, options);
-i1480u_ATTR(uwb_rts_cts, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(uwb_ack_policy, uwb_ack_policy_show, options);
-i1480u_STORE(uwb_ack_policy, uwb_ack_policy_store, options);
-i1480u_ATTR(uwb_ack_policy, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(uwb_pca_base_priority, uwb_pca_base_priority_show, options);
-i1480u_STORE(uwb_pca_base_priority, uwb_pca_base_priority_store, options);
-i1480u_ATTR(uwb_pca_base_priority, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_eda, wlp_eda_show, wlp);
-i1480u_STORE(wlp_eda, wlp_eda_store, wlp);
-i1480u_ATTR(wlp_eda, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_uuid, wlp_uuid_show, wlp);
-i1480u_STORE(wlp_uuid, wlp_uuid_store, wlp);
-i1480u_ATTR(wlp_uuid, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_name, wlp_dev_name_show, wlp);
-i1480u_STORE(wlp_dev_name, wlp_dev_name_store, wlp);
-i1480u_ATTR(wlp_dev_name, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_manufacturer, wlp_dev_manufacturer_show, wlp);
-i1480u_STORE(wlp_dev_manufacturer, wlp_dev_manufacturer_store, wlp);
-i1480u_ATTR(wlp_dev_manufacturer, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_model_name, wlp_dev_model_name_show, wlp);
-i1480u_STORE(wlp_dev_model_name, wlp_dev_model_name_store, wlp);
-i1480u_ATTR(wlp_dev_model_name, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_model_nr, wlp_dev_model_nr_show, wlp);
-i1480u_STORE(wlp_dev_model_nr, wlp_dev_model_nr_store, wlp);
-i1480u_ATTR(wlp_dev_model_nr, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_serial, wlp_dev_serial_show, wlp);
-i1480u_STORE(wlp_dev_serial, wlp_dev_serial_store, wlp);
-i1480u_ATTR(wlp_dev_serial, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_prim_category, wlp_dev_prim_category_show, wlp);
-i1480u_STORE(wlp_dev_prim_category, wlp_dev_prim_category_store, wlp);
-i1480u_ATTR(wlp_dev_prim_category, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_prim_OUI, wlp_dev_prim_OUI_show, wlp);
-i1480u_STORE(wlp_dev_prim_OUI, wlp_dev_prim_OUI_store, wlp);
-i1480u_ATTR(wlp_dev_prim_OUI, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_prim_OUI_sub, wlp_dev_prim_OUI_sub_show, wlp);
-i1480u_STORE(wlp_dev_prim_OUI_sub, wlp_dev_prim_OUI_sub_store, wlp);
-i1480u_ATTR(wlp_dev_prim_OUI_sub, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_dev_prim_subcat, wlp_dev_prim_subcat_show, wlp);
-i1480u_STORE(wlp_dev_prim_subcat, wlp_dev_prim_subcat_store, wlp);
-i1480u_ATTR(wlp_dev_prim_subcat, S_IRUGO | S_IWUSR);
-
-i1480u_SHOW(wlp_neighborhood, wlp_neighborhood_show, wlp);
-i1480u_ATTR_SHOW(wlp_neighborhood);
-
-i1480u_SHOW(wss_activate, wlp_wss_activate_show, wlp.wss);
-i1480u_STORE(wss_activate, wlp_wss_activate_store, wlp.wss);
-i1480u_ATTR(wss_activate, S_IRUGO | S_IWUSR);
-
-/*
- * Show the (min, max, avg) Line Quality Estimate (LQE, in dB) as over
- * the last 256 received WLP frames (ECMA-368 13.3).
- *
- * [the -7dB that have to be substracted from the LQI to make the LQE
- * are already taken into account].
- */
-i1480u_SHOW(wlp_lqe, stats_show, lqe_stats);
-i1480u_STORE(wlp_lqe, stats_store, lqe_stats);
-i1480u_ATTR(wlp_lqe, S_IRUGO | S_IWUSR);
-
-/*
- * Show the Receive Signal Strength Indicator averaged over all the
- * received WLP frames (ECMA-368 13.3). Still is not clear what
- * this value is, but is kind of a percentage of the signal strength
- * at the antenna.
- */
-i1480u_SHOW(wlp_rssi, stats_show, rssi_stats);
-i1480u_STORE(wlp_rssi, stats_store, rssi_stats);
-i1480u_ATTR(wlp_rssi, S_IRUGO | S_IWUSR);
-
-/**
- * We maintain a basic flow control counter. "count" how many TX URBs are
- * outstanding. Only allow "max"
- * TX URBs to be outstanding. If this value is reached the queue will be
- * stopped. The queue will be restarted when there are
- * "threshold" URBs outstanding.
- */
-i1480u_SHOW(wlp_tx_inflight, wlp_tx_inflight_show, tx_inflight);
-i1480u_STORE(wlp_tx_inflight, wlp_tx_inflight_store, tx_inflight);
-i1480u_ATTR(wlp_tx_inflight, S_IRUGO | S_IWUSR);
-
-static struct attribute *i1480u_attrs[] = {
-	&i1480u_ATTR_NAME(uwb_phy_rate).attr,
-	&i1480u_ATTR_NAME(uwb_rts_cts).attr,
-	&i1480u_ATTR_NAME(uwb_ack_policy).attr,
-	&i1480u_ATTR_NAME(uwb_pca_base_priority).attr,
-	&i1480u_ATTR_NAME(wlp_lqe).attr,
-	&i1480u_ATTR_NAME(wlp_rssi).attr,
-	&i1480u_ATTR_NAME(wlp_eda).attr,
-	&i1480u_ATTR_NAME(wlp_uuid).attr,
-	&i1480u_ATTR_NAME(wlp_dev_name).attr,
-	&i1480u_ATTR_NAME(wlp_dev_manufacturer).attr,
-	&i1480u_ATTR_NAME(wlp_dev_model_name).attr,
-	&i1480u_ATTR_NAME(wlp_dev_model_nr).attr,
-	&i1480u_ATTR_NAME(wlp_dev_serial).attr,
-	&i1480u_ATTR_NAME(wlp_dev_prim_category).attr,
-	&i1480u_ATTR_NAME(wlp_dev_prim_OUI).attr,
-	&i1480u_ATTR_NAME(wlp_dev_prim_OUI_sub).attr,
-	&i1480u_ATTR_NAME(wlp_dev_prim_subcat).attr,
-	&i1480u_ATTR_NAME(wlp_neighborhood).attr,
-	&i1480u_ATTR_NAME(wss_activate).attr,
-	&i1480u_ATTR_NAME(wlp_tx_inflight).attr,
-	NULL,
-};
-
-static struct attribute_group i1480u_attr_group = {
-	.name = NULL,	/* we want them in the same directory */
-	.attrs = i1480u_attrs,
-};
-
-int i1480u_sysfs_setup(struct i1480u *i1480u)
-{
-	int result;
-	struct device *dev = &i1480u->usb_iface->dev;
-	result = sysfs_create_group(&i1480u->net_dev->dev.kobj,
-				    &i1480u_attr_group);
-	if (result < 0)
-		dev_err(dev, "cannot initialize sysfs attributes: %d\n",
-			result);
-	return result;
-}
-
-
-void i1480u_sysfs_release(struct i1480u *i1480u)
-{
-	sysfs_remove_group(&i1480u->net_dev->dev.kobj,
-			   &i1480u_attr_group);
-}
diff --git a/drivers/uwb/i1480/i1480u-wlp/tx.c b/drivers/uwb/i1480/i1480u-wlp/tx.c
deleted file mode 100644
index 3c117a364564..000000000000
--- a/drivers/uwb/i1480/i1480u-wlp/tx.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Deal with TX (massaging data to transmit, handling it)
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * Transmission engine. Get an skb, create from that a WLP transmit
- * context, add a WLP TX header (which we keep prefilled in the
- * device's instance), fill out the target-specific fields and
- * fire it.
- *
- * ROADMAP:
- *
- *   Entry points:
- *
- *     i1480u_tx_release(): called by i1480u_disconnect() to release
- *                          pending tx contexts.
- *
- *     i1480u_tx_cb(): callback for TX contexts (USB URBs)
- *       i1480u_tx_destroy():
- *
- *     i1480u_tx_timeout(): called for timeout handling from the
- *                          network stack.
- *
- *     i1480u_hard_start_xmit(): called for transmitting an skb from
- *                               the network stack. Will interact with WLP
- *                               substack to verify and prepare frame.
- *       i1480u_xmit_frame(): actual transmission on hardware
- *
- *         i1480u_tx_create()	    Creates TX context
- *            i1480u_tx_create_1()    For packets in 1 fragment
- *            i1480u_tx_create_n()    For packets in >1 fragments
- *
- * TODO:
- *
- * - FIXME: rewrite using usb_sg_*(), add asynch support to
- *          usb_sg_*(). It might not make too much sense as most of
- *          the times the MTU will be smaller than one page...
- */
-
-#include <linux/slab.h>
-#include "i1480u-wlp.h"
-
-enum {
-	/* This is only for Next and Last TX packets */
-	i1480u_MAX_PL_SIZE = i1480u_MAX_FRG_SIZE
-		- sizeof(struct untd_hdr_rst),
-};
-
-/* Free resources allocated to a i1480u tx context. */
-static
-void i1480u_tx_free(struct i1480u_tx *wtx)
-{
-	kfree(wtx->buf);
-	if (wtx->skb)
-		dev_kfree_skb_irq(wtx->skb);
-	usb_free_urb(wtx->urb);
-	kfree(wtx);
-}
-
-static
-void i1480u_tx_destroy(struct i1480u *i1480u, struct i1480u_tx *wtx)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&i1480u->tx_list_lock, flags);	/* not active any more */
-	list_del(&wtx->list_node);
-	i1480u_tx_free(wtx);
-	spin_unlock_irqrestore(&i1480u->tx_list_lock, flags);
-}
-
-static
-void i1480u_tx_unlink_urbs(struct i1480u *i1480u)
-{
-	unsigned long flags;
-	struct i1480u_tx *wtx, *next;
-
-	spin_lock_irqsave(&i1480u->tx_list_lock, flags);
-	list_for_each_entry_safe(wtx, next, &i1480u->tx_list, list_node) {
-		usb_unlink_urb(wtx->urb);
-	}
-	spin_unlock_irqrestore(&i1480u->tx_list_lock, flags);
-}
-
-
-/*
- * Callback for a completed tx USB URB.
- *
- * TODO:
- *
- * - FIXME: recover errors more gracefully
- * - FIXME: handle NAKs (I dont think they come here) for flow ctl
- */
-static
-void i1480u_tx_cb(struct urb *urb)
-{
-	struct i1480u_tx *wtx = urb->context;
-	struct i1480u *i1480u = wtx->i1480u;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-	unsigned long flags;
-
-	switch (urb->status) {
-	case 0:
-		spin_lock_irqsave(&i1480u->lock, flags);
-		net_dev->stats.tx_packets++;
-		net_dev->stats.tx_bytes += urb->actual_length;
-		spin_unlock_irqrestore(&i1480u->lock, flags);
-		break;
-	case -ECONNRESET:	/* Not an error, but a controlled situation; */
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-		dev_dbg(dev, "notif endp: reset/noent %d\n", urb->status);
-		netif_stop_queue(net_dev);
-		break;
-	case -ESHUTDOWN:	/* going away! */
-		dev_dbg(dev, "notif endp: down %d\n", urb->status);
-		netif_stop_queue(net_dev);
-		break;
-	default:
-		dev_err(dev, "TX: unknown URB status %d\n", urb->status);
-		if (edc_inc(&i1480u->tx_errors, EDC_MAX_ERRORS,
-					EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "TX: max acceptable errors exceeded."
-					"Reset device.\n");
-			netif_stop_queue(net_dev);
-			i1480u_tx_unlink_urbs(i1480u);
-			wlp_reset_all(&i1480u->wlp);
-		}
-		break;
-	}
-	i1480u_tx_destroy(i1480u, wtx);
-	if (atomic_dec_return(&i1480u->tx_inflight.count)
-	    <= i1480u->tx_inflight.threshold
-	    && netif_queue_stopped(net_dev)
-	    && i1480u->tx_inflight.threshold != 0) {
-		netif_start_queue(net_dev);
-		atomic_inc(&i1480u->tx_inflight.restart_count);
-	}
-	return;
-}
-
-
-/*
- * Given a buffer that doesn't fit in a single fragment, create an
- * scatter/gather structure for delivery to the USB pipe.
- *
- * Implements functionality of i1480u_tx_create().
- *
- * @wtx:	tx descriptor
- * @skb:	skb to send
- * @gfp_mask:	gfp allocation mask
- * @returns:    Pointer to @wtx if ok, NULL on error.
- *
- * Sorry, TOO LONG a function, but breaking it up is kind of hard
- *
- * This will break the buffer in chunks smaller than
- * i1480u_MAX_FRG_SIZE (including the header) and add proper headers
- * to each:
- *
- *   1st header           \
- *   i1480 tx header      |  fragment 1
- *   fragment data        /
- *   nxt header           \  fragment 2
- *   fragment data        /
- *   ..
- *   ..
- *   last header          \  fragment 3
- *   last fragment data   /
- *
- * This does not fill the i1480 TX header, it is left up to the
- * caller to do that; you can get it from @wtx->wlp_tx_hdr.
- *
- * This function consumes the skb unless there is an error.
- */
-static
-int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb,
-		       gfp_t gfp_mask)
-{
-	int result;
-	void *pl;
-	size_t pl_size;
-
-	void *pl_itr, *buf_itr;
-	size_t pl_size_left, frgs, pl_size_1st, frg_pl_size = 0;
-	struct untd_hdr_1st *untd_hdr_1st;
-	struct wlp_tx_hdr *wlp_tx_hdr;
-	struct untd_hdr_rst *untd_hdr_rst;
-
-	wtx->skb = NULL;
-	pl = skb->data;
-	pl_itr = pl;
-	pl_size = skb->len;
-	pl_size_left = pl_size;	/* payload size */
-	/* First fragment; fits as much as i1480u_MAX_FRG_SIZE minus
-	 * the headers */
-	pl_size_1st = i1480u_MAX_FRG_SIZE
-		- sizeof(struct untd_hdr_1st) - sizeof(struct wlp_tx_hdr);
-	BUG_ON(pl_size_1st > pl_size);
-	pl_size_left -= pl_size_1st;
-	/* The rest have an smaller header (no i1480 TX header). We
-	 * need to break up the payload in blocks smaller than
-	 * i1480u_MAX_PL_SIZE (payload excluding header). */
-	frgs = (pl_size_left + i1480u_MAX_PL_SIZE - 1) / i1480u_MAX_PL_SIZE;
-	/* Allocate space for the new buffer. In this new buffer we'll
-	 * place the headers followed by the data fragment, headers,
-	 * data fragments, etc..
-	 */
-	result = -ENOMEM;
-	wtx->buf_size = sizeof(*untd_hdr_1st)
-		+ sizeof(*wlp_tx_hdr)
-		+ frgs * sizeof(*untd_hdr_rst)
-		+ pl_size;
-	wtx->buf = kmalloc(wtx->buf_size, gfp_mask);
-	if (wtx->buf == NULL)
-		goto error_buf_alloc;
-
-	buf_itr = wtx->buf;		/* We got the space, let's fill it up */
-	/* Fill 1st fragment */
-	untd_hdr_1st = buf_itr;
-	buf_itr += sizeof(*untd_hdr_1st);
-	untd_hdr_set_type(&untd_hdr_1st->hdr, i1480u_PKT_FRAG_1ST);
-	untd_hdr_set_rx_tx(&untd_hdr_1st->hdr, 0);
-	untd_hdr_1st->hdr.len = cpu_to_le16(pl_size + sizeof(*wlp_tx_hdr));
-	untd_hdr_1st->fragment_len =
-		cpu_to_le16(pl_size_1st + sizeof(*wlp_tx_hdr));
-	memset(untd_hdr_1st->padding, 0, sizeof(untd_hdr_1st->padding));
-	/* Set up i1480 header info */
-	wlp_tx_hdr = wtx->wlp_tx_hdr = buf_itr;
-	buf_itr += sizeof(*wlp_tx_hdr);
-	/* Copy the first fragment */
-	memcpy(buf_itr, pl_itr, pl_size_1st);
-	pl_itr += pl_size_1st;
-	buf_itr += pl_size_1st;
-
-	/* Now do each remaining fragment */
-	result = -EINVAL;
-	while (pl_size_left > 0) {
-		if (buf_itr + sizeof(*untd_hdr_rst) - wtx->buf
-		    > wtx->buf_size) {
-			printk(KERN_ERR "BUG: no space for header\n");
-			goto error_bug;
-		}
-		untd_hdr_rst = buf_itr;
-		buf_itr += sizeof(*untd_hdr_rst);
-		if (pl_size_left > i1480u_MAX_PL_SIZE) {
-			frg_pl_size = i1480u_MAX_PL_SIZE;
-			untd_hdr_set_type(&untd_hdr_rst->hdr, i1480u_PKT_FRAG_NXT);
-		} else {
-			frg_pl_size = pl_size_left;
-			untd_hdr_set_type(&untd_hdr_rst->hdr, i1480u_PKT_FRAG_LST);
-		}
-		untd_hdr_set_rx_tx(&untd_hdr_rst->hdr, 0);
-		untd_hdr_rst->hdr.len = cpu_to_le16(frg_pl_size);
-		untd_hdr_rst->padding = 0;
-		if (buf_itr + frg_pl_size - wtx->buf
-		    > wtx->buf_size) {
-			printk(KERN_ERR "BUG: no space for payload\n");
-			goto error_bug;
-		}
-		memcpy(buf_itr, pl_itr, frg_pl_size);
-		buf_itr += frg_pl_size;
-		pl_itr += frg_pl_size;
-		pl_size_left -= frg_pl_size;
-	}
-	dev_kfree_skb_irq(skb);
-	return 0;
-
-error_bug:
-	printk(KERN_ERR
-	       "BUG: skb %u bytes\n"
-	       "BUG: frg_pl_size %zd i1480u_MAX_FRG_SIZE %u\n"
-	       "BUG: buf_itr %zu buf_size %zu pl_size_left %zu\n",
-	       skb->len,
-	       frg_pl_size, i1480u_MAX_FRG_SIZE,
-	       buf_itr - wtx->buf, wtx->buf_size, pl_size_left);
-
-	kfree(wtx->buf);
-error_buf_alloc:
-	return result;
-}
-
-
-/*
- * Given a buffer that fits in a single fragment, fill out a @wtx
- * struct for transmitting it down the USB pipe.
- *
- * Uses the fact that we have space reserved in front of the skbuff
- * for hardware headers :]
- *
- * This does not fill the i1480 TX header, it is left up to the
- * caller to do that; you can get it from @wtx->wlp_tx_hdr.
- *
- * @pl:		pointer to payload data
- * @pl_size:    size of the payuload
- *
- * This function does not consume the @skb.
- */
-static
-int i1480u_tx_create_1(struct i1480u_tx *wtx, struct sk_buff *skb,
-		       gfp_t gfp_mask)
-{
-	struct untd_hdr_cmp *untd_hdr_cmp;
-	struct wlp_tx_hdr *wlp_tx_hdr;
-
-	wtx->buf = NULL;
-	wtx->skb = skb;
-	BUG_ON(skb_headroom(skb) < sizeof(*wlp_tx_hdr));
-	wlp_tx_hdr = (void *) __skb_push(skb, sizeof(*wlp_tx_hdr));
-	wtx->wlp_tx_hdr = wlp_tx_hdr;
-	BUG_ON(skb_headroom(skb) < sizeof(*untd_hdr_cmp));
-	untd_hdr_cmp = (void *) __skb_push(skb, sizeof(*untd_hdr_cmp));
-
-	untd_hdr_set_type(&untd_hdr_cmp->hdr, i1480u_PKT_FRAG_CMP);
-	untd_hdr_set_rx_tx(&untd_hdr_cmp->hdr, 0);
-	untd_hdr_cmp->hdr.len = cpu_to_le16(skb->len - sizeof(*untd_hdr_cmp));
-	untd_hdr_cmp->padding = 0;
-	return 0;
-}
-
-
-/*
- * Given a skb to transmit, massage it to become palatable for the TX pipe
- *
- * This will break the buffer in chunks smaller than
- * i1480u_MAX_FRG_SIZE and add proper headers to each.
- *
- *   1st header           \
- *   i1480 tx header      |  fragment 1
- *   fragment data        /
- *   nxt header           \  fragment 2
- *   fragment data        /
- *   ..
- *   ..
- *   last header          \  fragment 3
- *   last fragment data   /
- *
- * Each fragment will be always smaller or equal to i1480u_MAX_FRG_SIZE.
- *
- * If the first fragment is smaller than i1480u_MAX_FRG_SIZE, then the
- * following is composed:
- *
- *   complete header      \
- *   i1480 tx header      | single fragment
- *   packet data          /
- *
- * We were going to use s/g support, but because the interface is
- * synch and at the end there is plenty of overhead to do it, it
- * didn't seem that worth for data that is going to be smaller than
- * one page.
- */
-static
-struct i1480u_tx *i1480u_tx_create(struct i1480u *i1480u,
-				   struct sk_buff *skb, gfp_t gfp_mask)
-{
-	int result;
-	struct usb_endpoint_descriptor *epd;
-	int usb_pipe;
-	unsigned long flags;
-
-	struct i1480u_tx *wtx;
-	const size_t pl_max_size =
-		i1480u_MAX_FRG_SIZE - sizeof(struct untd_hdr_cmp)
-		- sizeof(struct wlp_tx_hdr);
-
-	wtx = kmalloc(sizeof(*wtx), gfp_mask);
-	if (wtx == NULL)
-		goto error_wtx_alloc;
-	wtx->urb = usb_alloc_urb(0, gfp_mask);
-	if (wtx->urb == NULL)
-		goto error_urb_alloc;
-	epd = &i1480u->usb_iface->cur_altsetting->endpoint[2].desc;
-	usb_pipe = usb_sndbulkpipe(i1480u->usb_dev, epd->bEndpointAddress);
-	/* Fits in a single complete packet or need to split? */
-	if (skb->len > pl_max_size) {
-		result = i1480u_tx_create_n(wtx, skb, gfp_mask);
-		if (result < 0)
-			goto error_create;
-		usb_fill_bulk_urb(wtx->urb, i1480u->usb_dev, usb_pipe,
-				  wtx->buf, wtx->buf_size, i1480u_tx_cb, wtx);
-	} else {
-		result = i1480u_tx_create_1(wtx, skb, gfp_mask);
-		if (result < 0)
-			goto error_create;
-		usb_fill_bulk_urb(wtx->urb, i1480u->usb_dev, usb_pipe,
-				  skb->data, skb->len, i1480u_tx_cb, wtx);
-	}
-	spin_lock_irqsave(&i1480u->tx_list_lock, flags);
-	list_add(&wtx->list_node, &i1480u->tx_list);
-	spin_unlock_irqrestore(&i1480u->tx_list_lock, flags);
-	return wtx;
-
-error_create:
-	kfree(wtx->urb);
-error_urb_alloc:
-	kfree(wtx);
-error_wtx_alloc:
-	return NULL;
-}
-
-/*
- * Actual fragmentation and transmission of frame
- *
- * @wlp:  WLP substack data structure
- * @skb:  To be transmitted
- * @dst:  Device address of destination
- * @returns: 0 on success, <0 on failure
- *
- * This function can also be called directly (not just from
- * hard_start_xmit), so we also check here if the interface is up before
- * taking sending anything.
- */
-int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb,
-		      struct uwb_dev_addr *dst)
-{
-	int result = -ENXIO;
-	struct i1480u *i1480u = container_of(wlp, struct i1480u, wlp);
-	struct device *dev = &i1480u->usb_iface->dev;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct i1480u_tx *wtx;
-	struct wlp_tx_hdr *wlp_tx_hdr;
-	static unsigned char dev_bcast[2] = { 0xff, 0xff };
-
-	BUG_ON(i1480u->wlp.rc == NULL);
-	if ((net_dev->flags & IFF_UP) == 0)
-		goto out;
-	result = -EBUSY;
-	if (atomic_read(&i1480u->tx_inflight.count) >= i1480u->tx_inflight.max) {
-		netif_stop_queue(net_dev);
-		goto error_max_inflight;
-	}
-	result = -ENOMEM;
-	wtx = i1480u_tx_create(i1480u, skb, GFP_ATOMIC);
-	if (unlikely(wtx == NULL)) {
-		if (printk_ratelimit())
-			dev_err(dev, "TX: no memory for WLP TX URB,"
-				"dropping packet (in flight %d)\n",
-				atomic_read(&i1480u->tx_inflight.count));
-		netif_stop_queue(net_dev);
-		goto error_wtx_alloc;
-	}
-	wtx->i1480u = i1480u;
-	/* Fill out the i1480 header; @i1480u->def_tx_hdr read without
-	 * locking. We do so because they are kind of orthogonal to
-	 * each other (and thus not changed in an atomic batch).
-	 * The ETH header is right after the WLP TX header. */
-	wlp_tx_hdr = wtx->wlp_tx_hdr;
-	*wlp_tx_hdr = i1480u->options.def_tx_hdr;
-	wlp_tx_hdr->dstaddr = *dst;
-	if (!memcmp(&wlp_tx_hdr->dstaddr, dev_bcast, sizeof(dev_bcast))
-	    && (wlp_tx_hdr_delivery_id_type(wlp_tx_hdr) & WLP_DRP)) {
-		/*Broadcast message directed to DRP host. Send as best effort
-		 * on PCA. */
-		wlp_tx_hdr_set_delivery_id_type(wlp_tx_hdr, i1480u->options.pca_base_priority);
-	}
-
-	result = usb_submit_urb(wtx->urb, GFP_ATOMIC);		/* Go baby */
-	if (result < 0) {
-		dev_err(dev, "TX: cannot submit URB: %d\n", result);
-		/* We leave the freeing of skb to calling function */
-		wtx->skb = NULL;
-		goto error_tx_urb_submit;
-	}
-	atomic_inc(&i1480u->tx_inflight.count);
-	net_dev->trans_start = jiffies;
-	return result;
-
-error_tx_urb_submit:
-	i1480u_tx_destroy(i1480u, wtx);
-error_wtx_alloc:
-error_max_inflight:
-out:
-	return result;
-}
-
-
-/*
- * Transmit an skb  Called when an skbuf has to be transmitted
- *
- * The skb is first passed to WLP substack to ensure this is a valid
- * frame. If valid the device address of destination will be filled and
- * the WLP header prepended to the skb. If this step fails we fake sending
- * the frame, if we return an error the network stack will just keep trying.
- *
- * Broadcast frames inside a WSS needs to be treated special as multicast is
- * not supported. A broadcast frame is sent as unicast to each member of the
- * WSS - this is done by the WLP substack when it finds a broadcast frame.
- * So, we test if the WLP substack took over the skb and only transmit it
- * if it has not (been taken over).
- *
- * @net_dev->xmit_lock is held
- */
-netdev_tx_t i1480u_hard_start_xmit(struct sk_buff *skb,
-					 struct net_device *net_dev)
-{
-	int result;
-	struct i1480u *i1480u = netdev_priv(net_dev);
-	struct device *dev = &i1480u->usb_iface->dev;
-	struct uwb_dev_addr dst;
-
-	if ((net_dev->flags & IFF_UP) == 0)
-		goto error;
-	result = wlp_prepare_tx_frame(dev, &i1480u->wlp, skb, &dst);
-	if (result < 0) {
-		dev_err(dev, "WLP verification of TX frame failed (%d). "
-			"Dropping packet.\n", result);
-		goto error;
-	} else if (result == 1) {
-		/* trans_start time will be set when WLP actually transmits
-		 * the frame */
-		goto out;
-	}
-	result = i1480u_xmit_frame(&i1480u->wlp, skb, &dst);
-	if (result < 0) {
-		dev_err(dev, "Frame TX failed (%d).\n", result);
-		goto error;
-	}
-	return NETDEV_TX_OK;
-error:
-	dev_kfree_skb_any(skb);
-	net_dev->stats.tx_dropped++;
-out:
-	return NETDEV_TX_OK;
-}
-
-
-/*
- * Called when a pkt transmission doesn't complete in a reasonable period
- * Device reset may sleep - do it outside of interrupt context (delayed)
- */
-void i1480u_tx_timeout(struct net_device *net_dev)
-{
-	struct i1480u *i1480u = netdev_priv(net_dev);
-
-	wlp_reset_all(&i1480u->wlp);
-}
-
-
-void i1480u_tx_release(struct i1480u *i1480u)
-{
-	unsigned long flags;
-	struct i1480u_tx *wtx, *next;
-	int count = 0, empty;
-
-	spin_lock_irqsave(&i1480u->tx_list_lock, flags);
-	list_for_each_entry_safe(wtx, next, &i1480u->tx_list, list_node) {
-		count++;
-		usb_unlink_urb(wtx->urb);
-	}
-	spin_unlock_irqrestore(&i1480u->tx_list_lock, flags);
-	count = count*10; /* i1480ut 200ms per unlinked urb (intervals of 20ms) */
-	/*
-	 * We don't like this sollution too much (dirty as it is), but
-	 * it is cheaper than putting a refcount on each i1480u_tx and
-	 * i1480uting for all of them to go away...
-	 *
-	 * Called when no more packets can be added to tx_list
-	 * so can i1480ut for it to be empty.
-	 */
-	while (1) {
-		spin_lock_irqsave(&i1480u->tx_list_lock, flags);
-		empty = list_empty(&i1480u->tx_list);
-		spin_unlock_irqrestore(&i1480u->tx_list_lock, flags);
-		if (empty)
-			break;
-		count--;
-		BUG_ON(count == 0);
-		msleep(20);
-	}
-}
diff --git a/drivers/uwb/wlp/Makefile b/drivers/uwb/wlp/Makefile
deleted file mode 100644
index c72c11db5b1b..000000000000
--- a/drivers/uwb/wlp/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-obj-$(CONFIG_UWB_WLP) := wlp.o
-
-wlp-objs :=	\
-	driver.o	\
-	eda.o		\
-	messages.o	\
-	sysfs.o		\
-	txrx.o		\
-	wlp-lc.o	\
-	wss-lc.o
diff --git a/drivers/uwb/wlp/driver.c b/drivers/uwb/wlp/driver.c
deleted file mode 100644
index cb8d699b6a67..000000000000
--- a/drivers/uwb/wlp/driver.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * Life cycle of WLP substack
- *
- * FIXME: Docs
- */
-
-#include <linux/module.h>
-
-static int __init wlp_subsys_init(void)
-{
-	return 0;
-}
-module_init(wlp_subsys_init);
-
-static void __exit wlp_subsys_exit(void)
-{
-	return;
-}
-module_exit(wlp_subsys_exit);
-
-MODULE_AUTHOR("Reinette Chatre <reinette.chatre@intel.com>");
-MODULE_DESCRIPTION("WiMedia Logical Link Control Protocol (WLP)");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/wlp/eda.c b/drivers/uwb/wlp/eda.c
deleted file mode 100644
index 086fc0cf9401..000000000000
--- a/drivers/uwb/wlp/eda.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * WUSB Wire Adapter: WLP interface
- * Ethernet to device address cache
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * We need to be able to map ethernet addresses to device addresses
- * and back because there is not explicit relationship between the eth
- * addresses used in the ETH frames and the device addresses (no, it
- * would not have been simpler to force as ETH address the MBOA MAC
- * address...no, not at all :).
- *
- * A device has one MBOA MAC address and one device address. It is possible
- * for a device to have more than one virtual MAC address (although a
- * virtual address can be the same as the MBOA MAC address). The device
- * address is guaranteed to be unique among the devices in the extended
- * beacon group (see ECMA 17.1.1). We thus use the device address as index
- * to this cache. We do allow searching based on virtual address as this
- * is how Ethernet frames will be addressed.
- *
- * We need to support virtual EUI-48. Although, right now the virtual
- * EUI-48 will always be the same as the MAC SAP address. The EDA cache
- * entry thus contains a MAC SAP address as well as the virtual address
- * (used to map the network stack address to a neighbor). When we move
- * to support more than one virtual MAC on a host then this organization
- * will have to change. Perhaps a neighbor has a list of WSSs, each with a
- * tag and virtual EUI-48.
- *
- * On data transmission
- * it is used to determine if the neighbor is connected and what WSS it
- * belongs to. With this we know what tag to add to the WLP frame. Storing
- * the WSS in the EDA cache may be overkill because we only support one
- * WSS. Hopefully we will support more than one WSS at some point.
- * On data reception it is used to determine the WSS based on
- * the tag and address of the transmitting neighbor.
- */
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/slab.h>
-#include <linux/wlp.h>
-#include "wlp-internal.h"
-
-
-/* FIXME: cache is not purged, only on device close */
-
-/* FIXME: does not scale, change to dynamic array */
-
-/*
- * Initialize the EDA cache
- *
- * @returns 0 if ok, < 0 errno code on error
- *
- * Call when the interface is being brought up
- *
- * NOTE: Keep it as a separate function as the implementation will
- *       change and be more complex.
- */
-void wlp_eda_init(struct wlp_eda *eda)
-{
-	INIT_LIST_HEAD(&eda->cache);
-	spin_lock_init(&eda->lock);
-}
-
-/*
- * Release the EDA cache
- *
- * @returns 0 if ok, < 0 errno code on error
- *
- * Called when the interface is brought down
- */
-void wlp_eda_release(struct wlp_eda *eda)
-{
-	unsigned long flags;
-	struct wlp_eda_node *itr, *next;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry_safe(itr, next, &eda->cache, list_node) {
-		list_del(&itr->list_node);
-		kfree(itr);
-	}
-	spin_unlock_irqrestore(&eda->lock, flags);
-}
-
-/*
- * Add an address mapping
- *
- * @returns 0 if ok, < 0 errno code on error
- *
- * An address mapping is initially created when the neighbor device is seen
- * for the first time (it is "onair"). At this time the neighbor is not
- * connected or associated with a WSS so we only populate the Ethernet and
- * Device address fields.
- *
- */
-int wlp_eda_create_node(struct wlp_eda *eda,
-			const unsigned char eth_addr[ETH_ALEN],
-			const struct uwb_dev_addr *dev_addr)
-{
-	int result = 0;
-	struct wlp_eda_node *itr;
-	unsigned long flags;
-
-	BUG_ON(dev_addr == NULL || eth_addr == NULL);
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(itr, &eda->cache, list_node) {
-		if (!memcmp(&itr->dev_addr, dev_addr, sizeof(itr->dev_addr))) {
-			printk(KERN_ERR "EDA cache already contains entry "
-			       "for neighbor %02x:%02x\n",
-			       dev_addr->data[1], dev_addr->data[0]);
-			result = -EEXIST;
-			goto out_unlock;
-		}
-	}
-	itr = kzalloc(sizeof(*itr), GFP_ATOMIC);
-	if (itr != NULL) {
-		memcpy(itr->eth_addr, eth_addr, sizeof(itr->eth_addr));
-		itr->dev_addr = *dev_addr;
-		list_add(&itr->list_node, &eda->cache);
-	} else
-		result = -ENOMEM;
-out_unlock:
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-/*
- * Remove entry from EDA cache
- *
- * This is done when the device goes off air.
- */
-void wlp_eda_rm_node(struct wlp_eda *eda, const struct uwb_dev_addr *dev_addr)
-{
-	struct wlp_eda_node *itr, *next;
-	unsigned long flags;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry_safe(itr, next, &eda->cache, list_node) {
-		if (!memcmp(&itr->dev_addr, dev_addr, sizeof(itr->dev_addr))) {
-			list_del(&itr->list_node);
-			kfree(itr);
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&eda->lock, flags);
-}
-
-/*
- * Update an address mapping
- *
- * @returns 0 if ok, < 0 errno code on error
- */
-int wlp_eda_update_node(struct wlp_eda *eda,
-			const struct uwb_dev_addr *dev_addr,
-			struct wlp_wss *wss,
-			const unsigned char virt_addr[ETH_ALEN],
-			const u8 tag, const enum wlp_wss_connect state)
-{
-	int result = -ENOENT;
-	struct wlp_eda_node *itr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(itr, &eda->cache, list_node) {
-		if (!memcmp(&itr->dev_addr, dev_addr, sizeof(itr->dev_addr))) {
-			/* Found it, update it */
-			itr->wss = wss;
-			memcpy(itr->virt_addr, virt_addr,
-			       sizeof(itr->virt_addr));
-			itr->tag = tag;
-			itr->state = state;
-			result = 0;
-			goto out_unlock;
-		}
-	}
-	/* Not found */
-out_unlock:
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-/*
- * Update only state field of an address mapping
- *
- * @returns 0 if ok, < 0 errno code on error
- */
-int wlp_eda_update_node_state(struct wlp_eda *eda,
-			      const struct uwb_dev_addr *dev_addr,
-			      const enum wlp_wss_connect state)
-{
-	int result = -ENOENT;
-	struct wlp_eda_node *itr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(itr, &eda->cache, list_node) {
-		if (!memcmp(&itr->dev_addr, dev_addr, sizeof(itr->dev_addr))) {
-			/* Found it, update it */
-			itr->state = state;
-			result = 0;
-			goto out_unlock;
-		}
-	}
-	/* Not found */
-out_unlock:
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-/*
- * Return contents of EDA cache entry
- *
- * @dev_addr: index to EDA cache
- * @eda_entry: pointer to where contents of EDA cache will be copied
- */
-int wlp_copy_eda_node(struct wlp_eda *eda, struct uwb_dev_addr *dev_addr,
-		      struct wlp_eda_node *eda_entry)
-{
-	int result = -ENOENT;
-	struct wlp_eda_node *itr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(itr, &eda->cache, list_node) {
-		if (!memcmp(&itr->dev_addr, dev_addr, sizeof(itr->dev_addr))) {
-			*eda_entry = *itr;
-			result = 0;
-			goto out_unlock;
-		}
-	}
-	/* Not found */
-out_unlock:
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-/*
- * Execute function for every element in the cache
- *
- * @function: function to execute on element of cache (must be atomic)
- * @priv:     private data of function
- * @returns:  result of first function that failed, or last function
- *            executed if no function failed.
- *
- * Stop executing when function returns error for any element in cache.
- *
- * IMPORTANT: We are using a spinlock here: the function executed on each
- * element has to be atomic.
- */
-int wlp_eda_for_each(struct wlp_eda *eda, wlp_eda_for_each_f function,
-		     void *priv)
-{
-	int result = 0;
-	struct wlp *wlp = container_of(eda, struct wlp, eda);
-	struct wlp_eda_node *entry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(entry, &eda->cache, list_node) {
-		result = (*function)(wlp, entry, priv);
-		if (result < 0)
-			break;
-	}
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-/*
- * Execute function for single element in the cache (return dev addr)
- *
- * @virt_addr: index into EDA cache used to determine which element to
- *             execute the function on
- * @dev_addr: device address of element in cache will be returned using
- *            @dev_addr
- * @function: function to execute on element of cache (must be atomic)
- * @priv:     private data of function
- * @returns:  result of function
- *
- * IMPORTANT: We are using a spinlock here: the function executed on the
- * element has to be atomic.
- */
-int wlp_eda_for_virtual(struct wlp_eda *eda,
-			const unsigned char virt_addr[ETH_ALEN],
-			struct uwb_dev_addr *dev_addr,
-			wlp_eda_for_each_f function,
-			void *priv)
-{
-	int result = 0;
-	struct wlp *wlp = container_of(eda, struct wlp, eda);
-	struct wlp_eda_node *itr;
-	unsigned long flags;
-	int found = 0;
-
-	spin_lock_irqsave(&eda->lock, flags);
-	list_for_each_entry(itr, &eda->cache, list_node) {
-		if (!memcmp(itr->virt_addr, virt_addr,
-			   sizeof(itr->virt_addr))) {
-			result = (*function)(wlp, itr, priv);
-			*dev_addr = itr->dev_addr;
-			found = 1;
-			break;
-		}
-	}
-	if (!found)
-		result = -ENODEV;
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-
-static const char *__wlp_wss_connect_state[] = { "WLP_WSS_UNCONNECTED",
-					  "WLP_WSS_CONNECTED",
-					  "WLP_WSS_CONNECT_FAILED",
-};
-
-static const char *wlp_wss_connect_state_str(unsigned id)
-{
-	if (id >= ARRAY_SIZE(__wlp_wss_connect_state))
-		return "unknown WSS connection state";
-	return __wlp_wss_connect_state[id];
-}
-
-/*
- * View EDA cache from user space
- *
- * A debugging feature to give user visibility into the EDA cache. Also
- * used to display members of WSS to user (called from wlp_wss_members_show())
- */
-ssize_t wlp_eda_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-	struct wlp_eda_node *entry;
-	unsigned long flags;
-	struct wlp_eda *eda = &wlp->eda;
-	spin_lock_irqsave(&eda->lock, flags);
-	result = scnprintf(buf, PAGE_SIZE, "#eth_addr dev_addr wss_ptr "
-			   "tag state virt_addr\n");
-	list_for_each_entry(entry, &eda->cache, list_node) {
-		result += scnprintf(buf + result, PAGE_SIZE - result,
-				    "%pM %02x:%02x %p 0x%02x %s %pM\n",
-				    entry->eth_addr,
-				    entry->dev_addr.data[1],
-				    entry->dev_addr.data[0], entry->wss,
-				    entry->tag,
-				    wlp_wss_connect_state_str(entry->state),
-				    entry->virt_addr);
-		if (result >= PAGE_SIZE)
-			break;
-	}
-	spin_unlock_irqrestore(&eda->lock, flags);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_eda_show);
-
-/*
- * Add new EDA cache entry based on user input in sysfs
- *
- * Should only be used for debugging.
- *
- * The WSS is assumed to be the only WSS supported. This needs to be
- * redesigned when we support more than one WSS.
- */
-ssize_t wlp_eda_store(struct wlp *wlp, const char *buf, size_t size)
-{
-	ssize_t result;
-	struct wlp_eda *eda = &wlp->eda;
-	u8 eth_addr[6];
-	struct uwb_dev_addr dev_addr;
-	u8 tag;
-	unsigned state;
-
-	result = sscanf(buf, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx "
-			"%02hhx:%02hhx %02hhx %u\n",
-			&eth_addr[0], &eth_addr[1],
-			&eth_addr[2], &eth_addr[3],
-			&eth_addr[4], &eth_addr[5],
-			&dev_addr.data[1], &dev_addr.data[0], &tag, &state);
-	switch (result) {
-	case 6: /* no dev addr specified -- remove entry NOT IMPLEMENTED */
-		/*result = wlp_eda_rm(eda, eth_addr, &dev_addr);*/
-		result = -ENOSYS;
-		break;
-	case 10:
-		state = state >= 1 ? 1 : 0;
-		result = wlp_eda_create_node(eda, eth_addr, &dev_addr);
-		if (result < 0 && result != -EEXIST)
-			goto error;
-		/* Set virtual addr to be same as MAC */
-		result = wlp_eda_update_node(eda, &dev_addr, &wlp->wss,
-					     eth_addr, tag, state);
-		if (result < 0)
-			goto error;
-		break;
-	default: /* bad format */
-		result = -EINVAL;
-	}
-error:
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_eda_store);
diff --git a/drivers/uwb/wlp/messages.c b/drivers/uwb/wlp/messages.c
deleted file mode 100644
index 3a8e033dce21..000000000000
--- a/drivers/uwb/wlp/messages.c
+++ /dev/null
@@ -1,1798 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- * Message construction and parsing
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- */
-
-#include <linux/wlp.h>
-#include <linux/slab.h>
-
-#include "wlp-internal.h"
-
-static
-const char *__wlp_assoc_frame[] = {
-	[WLP_ASSOC_D1] = "WLP_ASSOC_D1",
-	[WLP_ASSOC_D2] = "WLP_ASSOC_D2",
-	[WLP_ASSOC_M1] = "WLP_ASSOC_M1",
-	[WLP_ASSOC_M2] = "WLP_ASSOC_M2",
-	[WLP_ASSOC_M3] = "WLP_ASSOC_M3",
-	[WLP_ASSOC_M4] = "WLP_ASSOC_M4",
-	[WLP_ASSOC_M5] = "WLP_ASSOC_M5",
-	[WLP_ASSOC_M6] = "WLP_ASSOC_M6",
-	[WLP_ASSOC_M7] = "WLP_ASSOC_M7",
-	[WLP_ASSOC_M8] = "WLP_ASSOC_M8",
-	[WLP_ASSOC_F0] = "WLP_ASSOC_F0",
-	[WLP_ASSOC_E1] = "WLP_ASSOC_E1",
-	[WLP_ASSOC_E2] = "WLP_ASSOC_E2",
-	[WLP_ASSOC_C1] = "WLP_ASSOC_C1",
-	[WLP_ASSOC_C2] = "WLP_ASSOC_C2",
-	[WLP_ASSOC_C3] = "WLP_ASSOC_C3",
-	[WLP_ASSOC_C4] = "WLP_ASSOC_C4",
-};
-
-static const char *wlp_assoc_frame_str(unsigned id)
-{
-	if (id >= ARRAY_SIZE(__wlp_assoc_frame))
-		return "unknown association frame";
-	return __wlp_assoc_frame[id];
-}
-
-static const char *__wlp_assc_error[] = {
-	"none",
-	"Authenticator Failure",
-	"Rogue activity suspected",
-	"Device busy",
-	"Setup Locked",
-	"Registrar not ready",
-	"Invalid WSS selection",
-	"Message timeout",
-	"Enrollment session timeout",
-	"Device password invalid",
-	"Unsupported version",
-	"Internal error",
-	"Undefined error",
-	"Numeric comparison failure",
-	"Waiting for user input",
-};
-
-static const char *wlp_assc_error_str(unsigned id)
-{
-	if (id >= ARRAY_SIZE(__wlp_assc_error))
-		return "unknown WLP association error";
-	return __wlp_assc_error[id];
-}
-
-static inline void wlp_set_attr_hdr(struct wlp_attr_hdr *hdr, unsigned type,
-				    size_t len)
-{
-	hdr->type = cpu_to_le16(type);
-	hdr->length = cpu_to_le16(len);
-}
-
-/*
- * Populate fields of a constant sized attribute
- *
- * @returns: total size of attribute including size of new value
- *
- * We have two instances of this function (wlp_pset and wlp_set): one takes
- * the value as a parameter, the other takes a pointer to the value as
- * parameter. They thus only differ in how the value is assigned to the
- * attribute.
- *
- * We use sizeof(*attr) - sizeof(struct wlp_attr_hdr) instead of
- * sizeof(type) to be able to use this same code for the structures that
- * contain 8bit enum values and be able to deal with pointer types.
- */
-#define wlp_set(type, type_code, name)					\
-static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
-{									\
-	wlp_set_attr_hdr(&attr->hdr, type_code,				\
-			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
-	attr->name = value;						\
-	return sizeof(*attr);						\
-}
-
-#define wlp_pset(type, type_code, name)					\
-static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value)	\
-{									\
-	wlp_set_attr_hdr(&attr->hdr, type_code,				\
-			 sizeof(*attr) - sizeof(struct wlp_attr_hdr));	\
-	attr->name = *value;						\
-	return sizeof(*attr);						\
-}
-
-/**
- * Populate fields of a variable attribute
- *
- * @returns: total size of attribute including size of new value
- *
- * Provided with a pointer to the memory area reserved for the
- * attribute structure, the field is populated with the value. The
- * reserved memory has to contain enough space for the value.
- */
-#define wlp_vset(type, type_code, name)					\
-static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value,	\
-				size_t len)				\
-{									\
-	wlp_set_attr_hdr(&attr->hdr, type_code, len);			\
-	memcpy(attr->name, value, len);					\
-	return sizeof(*attr) + len;					\
-}
-
-wlp_vset(char *, WLP_ATTR_DEV_NAME, dev_name)
-wlp_vset(char *, WLP_ATTR_MANUF, manufacturer)
-wlp_set(enum wlp_assoc_type, WLP_ATTR_MSG_TYPE, msg_type)
-wlp_vset(char *, WLP_ATTR_MODEL_NAME, model_name)
-wlp_vset(char *, WLP_ATTR_MODEL_NR, model_nr)
-wlp_vset(char *, WLP_ATTR_SERIAL, serial)
-wlp_vset(char *, WLP_ATTR_WSS_NAME, wss_name)
-wlp_pset(struct wlp_uuid *, WLP_ATTR_UUID_E, uuid_e)
-wlp_pset(struct wlp_uuid *, WLP_ATTR_UUID_R, uuid_r)
-wlp_pset(struct wlp_uuid *, WLP_ATTR_WSSID, wssid)
-wlp_pset(struct wlp_dev_type *, WLP_ATTR_PRI_DEV_TYPE, prim_dev_type)
-/*wlp_pset(struct wlp_dev_type *, WLP_ATTR_SEC_DEV_TYPE, sec_dev_type)*/
-wlp_set(u8, WLP_ATTR_WLP_VER, version)
-wlp_set(enum wlp_assc_error, WLP_ATTR_WLP_ASSC_ERR, wlp_assc_err)
-wlp_set(enum wlp_wss_sel_mthd, WLP_ATTR_WSS_SEL_MTHD, wss_sel_mthd)
-wlp_set(u8, WLP_ATTR_ACC_ENRL, accept_enrl)
-wlp_set(u8, WLP_ATTR_WSS_SEC_STAT, wss_sec_status)
-wlp_pset(struct uwb_mac_addr *, WLP_ATTR_WSS_BCAST, wss_bcast)
-wlp_pset(struct wlp_nonce *, WLP_ATTR_ENRL_NONCE, enonce)
-wlp_pset(struct wlp_nonce *, WLP_ATTR_REG_NONCE, rnonce)
-wlp_set(u8, WLP_ATTR_WSS_TAG, wss_tag)
-wlp_pset(struct uwb_mac_addr *, WLP_ATTR_WSS_VIRT, wss_virt)
-
-/**
- * Fill in the WSS information attributes
- *
- * We currently only support one WSS, and this is assumed in this function
- * that can populate only one WSS information attribute.
- */
-static size_t wlp_set_wss_info(struct wlp_attr_wss_info *attr,
-			       struct wlp_wss *wss)
-{
-	size_t datalen;
-	void *ptr = attr->wss_info;
-	size_t used = sizeof(*attr);
-
-	datalen = sizeof(struct wlp_wss_info) + strlen(wss->name);
-	wlp_set_attr_hdr(&attr->hdr, WLP_ATTR_WSS_INFO, datalen);
-	used = wlp_set_wssid(ptr, &wss->wssid);
-	used += wlp_set_wss_name(ptr + used, wss->name, strlen(wss->name));
-	used += wlp_set_accept_enrl(ptr + used, wss->accept_enroll);
-	used += wlp_set_wss_sec_status(ptr + used, wss->secure_status);
-	used += wlp_set_wss_bcast(ptr + used, &wss->bcast);
-	return sizeof(*attr) + used;
-}
-
-/**
- * Verify attribute header
- *
- * @hdr:     Pointer to attribute header that will be verified.
- * @type:    Expected attribute type.
- * @len:     Expected length of attribute value (excluding header).
- *
- * Most attribute values have a known length even when they do have a
- * length field. This knowledge can be used via this function to verify
- * that the length field matches the expected value.
- */
-static int wlp_check_attr_hdr(struct wlp *wlp, struct wlp_attr_hdr *hdr,
-		       enum wlp_attr_type type, unsigned len)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	if (le16_to_cpu(hdr->type) != type) {
-		dev_err(dev, "WLP: unexpected header type. Expected "
-			"%u, got %u.\n", type, le16_to_cpu(hdr->type));
-		return -EINVAL;
-	}
-	if (le16_to_cpu(hdr->length) != len) {
-		dev_err(dev, "WLP: unexpected length in header. Expected "
-			"%u, got %u.\n", len, le16_to_cpu(hdr->length));
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/**
- * Check if header of WSS information attribute valid
- *
- * @returns: length of WSS attributes (value of length attribute field) if
- *             valid WSS information attribute found
- *           -ENODATA if no WSS information attribute found
- *           -EIO other error occured
- *
- * The WSS information attribute is optional. The function will be provided
- * with a pointer to data that could _potentially_ be a WSS information
- * attribute. If a valid WSS information attribute is found it will return
- * 0, if no WSS information attribute is found it will return -ENODATA, and
- * another error will be returned if it is a WSS information attribute, but
- * some parsing failure occured.
- */
-static int wlp_check_wss_info_attr_hdr(struct wlp *wlp,
-				       struct wlp_attr_hdr *hdr, size_t buflen)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	size_t len;
-	int result = 0;
-
-	if (buflen < sizeof(*hdr)) {
-		dev_err(dev, "WLP: Not enough space in buffer to parse"
-			" WSS information attribute header.\n");
-		result = -EIO;
-		goto out;
-	}
-	if (le16_to_cpu(hdr->type) != WLP_ATTR_WSS_INFO) {
-		/* WSS information is optional */
-		result = -ENODATA;
-		goto out;
-	}
-	len = le16_to_cpu(hdr->length);
-	if (buflen < sizeof(*hdr) + len) {
-		dev_err(dev, "WLP: Not enough space in buffer to parse "
-			"variable data. Got %d, expected %d.\n",
-			(int)buflen, (int)(sizeof(*hdr) + len));
-		result = -EIO;
-		goto out;
-	}
-	result = len;
-out:
-	return result;
-}
-
-
-static ssize_t wlp_get_attribute(struct wlp *wlp, u16 type_code,
-	struct wlp_attr_hdr *attr_hdr, void *value, ssize_t value_len,
-	ssize_t buflen)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	ssize_t attr_len = sizeof(*attr_hdr) + value_len;
-	if (buflen < 0)
-		return -EINVAL;
-	if (buflen < attr_len) {
-		dev_err(dev, "WLP: Not enough space in buffer to parse"
-			" attribute field. Need %d, received %zu\n",
-			(int)attr_len, buflen);
-		return -EIO;
-	}
-	if (wlp_check_attr_hdr(wlp, attr_hdr, type_code, value_len) < 0) {
-		dev_err(dev, "WLP: Header verification failed. \n");
-		return -EINVAL;
-	}
-	memcpy(value, (void *)attr_hdr + sizeof(*attr_hdr), value_len);
-	return attr_len;
-}
-
-static ssize_t wlp_vget_attribute(struct wlp *wlp, u16 type_code,
-	struct wlp_attr_hdr *attr_hdr, void *value, ssize_t max_value_len,
-	ssize_t buflen)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	size_t len;
-	if (buflen < 0)
-		return -EINVAL;
-	if (buflen < sizeof(*attr_hdr)) {
-		dev_err(dev, "WLP: Not enough space in buffer to parse"
-			" header.\n");
-		return -EIO;
-	}
-	if (le16_to_cpu(attr_hdr->type) != type_code) {
-		dev_err(dev, "WLP: Unexpected attribute type. Got %u, "
-			"expected %u.\n", le16_to_cpu(attr_hdr->type),
-			type_code);
-		return -EINVAL;
-	}
-	len = le16_to_cpu(attr_hdr->length);
-	if (len > max_value_len) {
-		dev_err(dev, "WLP: Attribute larger than maximum "
-			"allowed. Received %zu, max is %d.\n", len,
-			(int)max_value_len);
-		return -EFBIG;
-	}
-	if (buflen < sizeof(*attr_hdr) + len) {
-		dev_err(dev, "WLP: Not enough space in buffer to parse "
-			"variable data.\n");
-		return -EIO;
-	}
-	memcpy(value, (void *)attr_hdr + sizeof(*attr_hdr), len);
-	return sizeof(*attr_hdr) + len;
-}
-
-/**
- * Get value of attribute from fixed size attribute field.
- *
- * @attr:    Pointer to attribute field.
- * @value:   Pointer to variable in which attribute value will be placed.
- * @buflen:  Size of buffer in which attribute field (including header)
- *           can be found.
- * @returns: Amount of given buffer consumed by parsing for this attribute.
- *
- * The size and type of the value is known by the type of the attribute.
- */
-#define wlp_get(type, type_code, name)					\
-ssize_t wlp_get_##name(struct wlp *wlp, struct wlp_attr_##name *attr,	\
-		      type *value, ssize_t buflen)			\
-{									\
-	return wlp_get_attribute(wlp, (type_code), &attr->hdr,		\
-				 value, sizeof(*value), buflen);	\
-}
-
-#define wlp_get_sparse(type, type_code, name) \
-	static wlp_get(type, type_code, name)
-
-/**
- * Get value of attribute from variable sized attribute field.
- *
- * @max:     The maximum size of this attribute. This value is dictated by
- *           the maximum value from the WLP specification.
- *
- * @attr:    Pointer to attribute field.
- * @value:   Pointer to variable that will contain the value. The memory
- *           must already have been allocated for this value.
- * @buflen:  Size of buffer in which attribute field (including header)
- *           can be found.
- * @returns: Amount of given bufferconsumed by parsing for this attribute.
- */
-#define wlp_vget(type_val, type_code, name, max)			\
-static ssize_t wlp_get_##name(struct wlp *wlp,				\
-			      struct wlp_attr_##name *attr,		\
-			      type_val *value, ssize_t buflen)		\
-{									\
-	return wlp_vget_attribute(wlp, (type_code), &attr->hdr, 	\
-			      value, (max), buflen);			\
-}
-
-wlp_get(u8, WLP_ATTR_WLP_VER, version)
-wlp_get_sparse(enum wlp_wss_sel_mthd, WLP_ATTR_WSS_SEL_MTHD, wss_sel_mthd)
-wlp_get_sparse(struct wlp_dev_type, WLP_ATTR_PRI_DEV_TYPE, prim_dev_type)
-wlp_get_sparse(enum wlp_assc_error, WLP_ATTR_WLP_ASSC_ERR, wlp_assc_err)
-wlp_get_sparse(struct wlp_uuid, WLP_ATTR_UUID_E, uuid_e)
-wlp_get_sparse(struct wlp_uuid, WLP_ATTR_UUID_R, uuid_r)
-wlp_get(struct wlp_uuid, WLP_ATTR_WSSID, wssid)
-wlp_get_sparse(u8, WLP_ATTR_ACC_ENRL, accept_enrl)
-wlp_get_sparse(u8, WLP_ATTR_WSS_SEC_STAT, wss_sec_status)
-wlp_get_sparse(struct uwb_mac_addr, WLP_ATTR_WSS_BCAST, wss_bcast)
-wlp_get_sparse(u8, WLP_ATTR_WSS_TAG, wss_tag)
-wlp_get_sparse(struct uwb_mac_addr, WLP_ATTR_WSS_VIRT, wss_virt)
-wlp_get_sparse(struct wlp_nonce, WLP_ATTR_ENRL_NONCE, enonce)
-wlp_get_sparse(struct wlp_nonce, WLP_ATTR_REG_NONCE, rnonce)
-
-/* The buffers for the device info attributes can be found in the
- * wlp_device_info struct. These buffers contain one byte more than the
- * max allowed by the spec - this is done to be able to add the
- * terminating \0 for user display. This terminating byte is not required
- * in the actual attribute field (because it has a length field) so the
- * maximum allowed for this value is one less than its size in the
- * structure.
- */
-wlp_vget(char, WLP_ATTR_WSS_NAME, wss_name,
-	 FIELD_SIZEOF(struct wlp_wss, name) - 1)
-wlp_vget(char, WLP_ATTR_DEV_NAME, dev_name,
-	 FIELD_SIZEOF(struct wlp_device_info, name) - 1)
-wlp_vget(char, WLP_ATTR_MANUF, manufacturer,
-	 FIELD_SIZEOF(struct wlp_device_info, manufacturer) - 1)
-wlp_vget(char, WLP_ATTR_MODEL_NAME, model_name,
-	 FIELD_SIZEOF(struct wlp_device_info, model_name) - 1)
-wlp_vget(char, WLP_ATTR_MODEL_NR, model_nr,
-	 FIELD_SIZEOF(struct wlp_device_info, model_nr) - 1)
-wlp_vget(char, WLP_ATTR_SERIAL, serial,
-	 FIELD_SIZEOF(struct wlp_device_info, serial) - 1)
-
-/**
- * Retrieve WSS Name, Accept enroll, Secure status, Broadcast from WSS info
- *
- * @attr: pointer to WSS name attribute in WSS information attribute field
- * @info: structure that will be populated with data from WSS information
- *        field (WSS name, Accept enroll, secure status, broadcast address)
- * @buflen: size of buffer
- *
- * Although the WSSID attribute forms part of the WSS info attribute it is
- * retrieved separately and stored in a different location.
- */
-static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp,
-				      struct wlp_attr_hdr *attr,
-				      struct wlp_wss_tmp_info *info,
-				      ssize_t buflen)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	void *ptr = attr;
-	size_t used = 0;
-	ssize_t result = -EINVAL;
-
-	result = wlp_get_wss_name(wlp, ptr, info->name, buflen);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS name from "
-			"WSS info in D2 message.\n");
-		goto error_parse;
-	}
-	used += result;
-
-	result = wlp_get_accept_enrl(wlp, ptr + used, &info->accept_enroll,
-				     buflen - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain accepting "
-			"enrollment from WSS info in D2 message.\n");
-		goto error_parse;
-	}
-	if (info->accept_enroll != 0 && info->accept_enroll != 1) {
-		dev_err(dev, "WLP: invalid value for accepting "
-			"enrollment in D2 message.\n");
-		result = -EINVAL;
-		goto error_parse;
-	}
-	used += result;
-
-	result = wlp_get_wss_sec_status(wlp, ptr + used, &info->sec_status,
-					buflen - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain secure "
-			"status from WSS info in D2 message.\n");
-		goto error_parse;
-	}
-	if (info->sec_status != 0 && info->sec_status != 1) {
-		dev_err(dev, "WLP: invalid value for secure "
-			"status in D2 message.\n");
-		result = -EINVAL;
-		goto error_parse;
-	}
-	used += result;
-
-	result = wlp_get_wss_bcast(wlp, ptr + used, &info->bcast,
-				   buflen - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain broadcast "
-			"address from WSS info in D2 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = used;
-error_parse:
-	return result;
-}
-
-/**
- * Create a new WSSID entry for the neighbor, allocate temporary storage
- *
- * Each neighbor can have many WSS active. We maintain a list of WSSIDs
- * advertised by neighbor. During discovery we also cache information about
- * these WSS in temporary storage.
- *
- * The temporary storage will be removed after it has been used (eg.
- * displayed to user), the wssid element will be removed from the list when
- * the neighbor is rediscovered or when it disappears.
- */
-static struct wlp_wssid_e *wlp_create_wssid_e(struct wlp *wlp,
-					      struct wlp_neighbor_e *neighbor)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_wssid_e *wssid_e;
-
-	wssid_e = kzalloc(sizeof(*wssid_e), GFP_KERNEL);
-	if (wssid_e == NULL) {
-		dev_err(dev, "WLP: unable to allocate memory "
-			"for WSS information.\n");
-		goto error_alloc;
-	}
-	wssid_e->info = kzalloc(sizeof(struct wlp_wss_tmp_info), GFP_KERNEL);
-	if (wssid_e->info == NULL) {
-		dev_err(dev, "WLP: unable to allocate memory "
-			"for temporary WSS information.\n");
-		kfree(wssid_e);
-		wssid_e = NULL;
-		goto error_alloc;
-	}
-	list_add(&wssid_e->node, &neighbor->wssid);
-error_alloc:
-	return wssid_e;
-}
-
-/**
- * Parse WSS information attribute
- *
- * @attr: pointer to WSS information attribute header
- * @buflen: size of buffer in which WSS information attribute appears
- * @wssid: will place wssid from WSS info attribute in this location
- * @wss_info: will place other information from WSS information attribute
- * in this location
- *
- * memory for @wssid and @wss_info must be allocated when calling this
- */
-static ssize_t wlp_get_wss_info(struct wlp *wlp, struct wlp_attr_wss_info *attr,
-				size_t buflen, struct wlp_uuid *wssid,
-				struct wlp_wss_tmp_info *wss_info)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	ssize_t result;
-	size_t len;
-	size_t used = 0;
-	void *ptr;
-
-	result = wlp_check_wss_info_attr_hdr(wlp, (struct wlp_attr_hdr *)attr,
-					     buflen);
-	if (result < 0)
-		goto out;
-	len = result;
-	used = sizeof(*attr);
-	ptr = attr;
-
-	result = wlp_get_wssid(wlp, ptr + used, wssid, buflen - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSSID from WSS info.\n");
-		goto out;
-	}
-	used += result;
-	result = wlp_get_wss_info_attrs(wlp, ptr + used, wss_info,
-					buflen - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS information "
-			"from WSS information attributes. \n");
-		goto out;
-	}
-	used += result;
-	if (len + sizeof(*attr) != used) {
-		dev_err(dev, "WLP: Amount of data parsed does not "
-			"match length field. Parsed %zu, length "
-			"field %zu. \n", used, len);
-		result = -EINVAL;
-		goto out;
-	}
-	result = used;
-out:
-	return result;
-}
-
-/**
- * Retrieve WSS info from association frame
- *
- * @attr:     pointer to WSS information attribute
- * @neighbor: ptr to neighbor being discovered, NULL if enrollment in
- *            progress
- * @wss:      ptr to WSS being enrolled in, NULL if discovery in progress
- * @buflen:   size of buffer in which WSS information appears
- *
- * The WSS information attribute appears in the D2 association message.
- * This message is used in two ways: to discover all neighbors or to enroll
- * into a WSS activated by a neighbor. During discovery we only want to
- * store the WSS info in a cache, to be deleted right after it has been
- * used (eg. displayed to the user). During enrollment we store the WSS
- * information for the lifetime of enrollment.
- *
- * During discovery we are interested in all WSS information, during
- * enrollment we are only interested in the WSS being enrolled in. Even so,
- * when in enrollment we keep parsing the message after finding the WSS of
- * interest, this simplifies the calling routine in that it can be sure
- * that all WSS information attributes have been parsed out of the message.
- *
- * Association frame is process with nbmutex held. The list access is safe.
- */
-static ssize_t wlp_get_all_wss_info(struct wlp *wlp,
-				    struct wlp_attr_wss_info *attr,
-				    struct wlp_neighbor_e *neighbor,
-				    struct wlp_wss *wss, ssize_t buflen)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	size_t used = 0;
-	ssize_t result = -EINVAL;
-	struct wlp_attr_wss_info *cur;
-	struct wlp_uuid wssid;
-	struct wlp_wss_tmp_info wss_info;
-	unsigned enroll; /* 0 - discovery to cache, 1 - enrollment */
-	struct wlp_wssid_e *wssid_e;
-	char buf[WLP_WSS_UUID_STRSIZE];
-
-	if (buflen < 0)
-		goto out;
-
-	if (neighbor != NULL && wss == NULL)
-		enroll = 0; /* discovery */
-	else if (wss != NULL && neighbor == NULL)
-		enroll = 1; /* enrollment */
-	else
-		goto out;
-
-	cur = attr;
-	while (buflen - used > 0) {
-		memset(&wss_info, 0, sizeof(wss_info));
-		cur = (void *)cur + used;
-		result = wlp_get_wss_info(wlp, cur, buflen - used, &wssid,
-					  &wss_info);
-		if (result == -ENODATA) {
-			result = used;
-			goto out;
-		} else if (result < 0) {
-			dev_err(dev, "WLP: Unable to parse WSS information "
-				"from WSS information attribute. \n");
-			result = -EINVAL;
-			goto error_parse;
-		}
-		if (enroll && !memcmp(&wssid, &wss->wssid, sizeof(wssid))) {
-			if (wss_info.accept_enroll != 1) {
-				dev_err(dev, "WLP: Requested WSS does "
-					"not accept enrollment.\n");
-				result = -EINVAL;
-				goto out;
-			}
-			memcpy(wss->name, wss_info.name, sizeof(wss->name));
-			wss->bcast = wss_info.bcast;
-			wss->secure_status = wss_info.sec_status;
-			wss->accept_enroll = wss_info.accept_enroll;
-			wss->state = WLP_WSS_STATE_PART_ENROLLED;
-			wlp_wss_uuid_print(buf, sizeof(buf), &wssid);
-			dev_dbg(dev, "WLP: Found WSS %s. Enrolling.\n", buf);
-		} else {
-			wssid_e = wlp_create_wssid_e(wlp, neighbor);
-			if (wssid_e == NULL) {
-				dev_err(dev, "WLP: Cannot create new WSSID "
-					"entry for neighbor %02x:%02x.\n",
-					neighbor->uwb_dev->dev_addr.data[1],
-					neighbor->uwb_dev->dev_addr.data[0]);
-				result = -ENOMEM;
-				goto out;
-			}
-			wssid_e->wssid = wssid;
-			*wssid_e->info = wss_info;
-		}
-		used += result;
-	}
-	result = used;
-error_parse:
-	if (result < 0 && !enroll) /* this was a discovery */
-		wlp_remove_neighbor_tmp_info(neighbor);
-out:
-	return result;
-
-}
-
-/**
- * Parse WSS information attributes into cache for discovery
- *
- * @attr: the first WSS information attribute in message
- * @neighbor: the neighbor whose cache will be populated
- * @buflen: size of the input buffer
- */
-static ssize_t wlp_get_wss_info_to_cache(struct wlp *wlp,
-					 struct wlp_attr_wss_info *attr,
-					 struct wlp_neighbor_e *neighbor,
-					 ssize_t buflen)
-{
-	return wlp_get_all_wss_info(wlp, attr, neighbor, NULL, buflen);
-}
-
-/**
- * Parse WSS information attributes into WSS struct for enrollment
- *
- * @attr: the first WSS information attribute in message
- * @wss: the WSS that will be enrolled
- * @buflen: size of the input buffer
- */
-static ssize_t wlp_get_wss_info_to_enroll(struct wlp *wlp,
-					  struct wlp_attr_wss_info *attr,
-					  struct wlp_wss *wss, ssize_t buflen)
-{
-	return wlp_get_all_wss_info(wlp, attr, NULL, wss, buflen);
-}
-
-/**
- * Construct a D1 association frame
- *
- * We use the radio control functions to determine the values of the device
- * properties. These are of variable length and the total space needed is
- * tallied first before we start constructing the message. The radio
- * control functions return strings that are terminated with \0. This
- * character should not be included in the message (there is a length field
- * accompanying it in the attribute).
- */
-static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss,
-			      struct sk_buff **skb)
-{
-
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-	struct wlp_device_info *info;
-	size_t used = 0;
-	struct wlp_frame_assoc *_d1;
-	struct sk_buff *_skb;
-	void *d1_itr;
-
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to setup device "
-				"information for D1 message.\n");
-			goto error;
-		}
-	}
-	info = wlp->dev_info;
-	_skb = dev_alloc_skb(sizeof(*_d1)
-		      + sizeof(struct wlp_attr_uuid_e)
-		      + sizeof(struct wlp_attr_wss_sel_mthd)
-		      + sizeof(struct wlp_attr_dev_name)
-		      + strlen(info->name)
-		      + sizeof(struct wlp_attr_manufacturer)
-		      + strlen(info->manufacturer)
-		      + sizeof(struct wlp_attr_model_name)
-		      + strlen(info->model_name)
-		      + sizeof(struct wlp_attr_model_nr)
-		      + strlen(info->model_nr)
-		      + sizeof(struct wlp_attr_serial)
-		      + strlen(info->serial)
-		      + sizeof(struct wlp_attr_prim_dev_type)
-		      + sizeof(struct wlp_attr_wlp_assc_err));
-	if (_skb == NULL) {
-		dev_err(dev, "WLP: Cannot allocate memory for association "
-			"message.\n");
-		result = -ENOMEM;
-		goto error;
-	}
-	_d1 = (void *) _skb->data;
-	_d1->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-	_d1->hdr.type = WLP_FRAME_ASSOCIATION;
-	_d1->type = WLP_ASSOC_D1;
-
-	wlp_set_version(&_d1->version, WLP_VERSION);
-	wlp_set_msg_type(&_d1->msg_type, WLP_ASSOC_D1);
-	d1_itr = _d1->attr;
-	used = wlp_set_uuid_e(d1_itr, &wlp->uuid);
-	used += wlp_set_wss_sel_mthd(d1_itr + used, WLP_WSS_REG_SELECT);
-	used += wlp_set_dev_name(d1_itr + used, info->name,
-				 strlen(info->name));
-	used += wlp_set_manufacturer(d1_itr + used, info->manufacturer,
-				     strlen(info->manufacturer));
-	used += wlp_set_model_name(d1_itr + used, info->model_name,
-				   strlen(info->model_name));
-	used += wlp_set_model_nr(d1_itr + used, info->model_nr,
-				 strlen(info->model_nr));
-	used += wlp_set_serial(d1_itr + used, info->serial,
-			       strlen(info->serial));
-	used += wlp_set_prim_dev_type(d1_itr + used, &info->prim_dev_type);
-	used += wlp_set_wlp_assc_err(d1_itr + used, WLP_ASSOC_ERROR_NONE);
-	skb_put(_skb, sizeof(*_d1) + used);
-	*skb = _skb;
-error:
-	return result;
-}
-
-/**
- * Construct a D2 association frame
- *
- * We use the radio control functions to determine the values of the device
- * properties. These are of variable length and the total space needed is
- * tallied first before we start constructing the message. The radio
- * control functions return strings that are terminated with \0. This
- * character should not be included in the message (there is a length field
- * accompanying it in the attribute).
- */
-static
-int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss,
-		       struct sk_buff **skb, struct wlp_uuid *uuid_e)
-{
-
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-	struct wlp_device_info *info;
-	size_t used = 0;
-	struct wlp_frame_assoc *_d2;
-	struct sk_buff *_skb;
-	void *d2_itr;
-	size_t mem_needed;
-
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to setup device "
-				"information for D2 message.\n");
-			goto error;
-		}
-	}
-	info = wlp->dev_info;
-	mem_needed = sizeof(*_d2)
-		      + sizeof(struct wlp_attr_uuid_e)
-		      + sizeof(struct wlp_attr_uuid_r)
-		      + sizeof(struct wlp_attr_dev_name)
-		      + strlen(info->name)
-		      + sizeof(struct wlp_attr_manufacturer)
-		      + strlen(info->manufacturer)
-		      + sizeof(struct wlp_attr_model_name)
-		      + strlen(info->model_name)
-		      + sizeof(struct wlp_attr_model_nr)
-		      + strlen(info->model_nr)
-		      + sizeof(struct wlp_attr_serial)
-		      + strlen(info->serial)
-		      + sizeof(struct wlp_attr_prim_dev_type)
-		      + sizeof(struct wlp_attr_wlp_assc_err);
-	if (wlp->wss.state >= WLP_WSS_STATE_ACTIVE)
-		mem_needed += sizeof(struct wlp_attr_wss_info)
-			      + sizeof(struct wlp_wss_info)
-			      + strlen(wlp->wss.name);
-	_skb = dev_alloc_skb(mem_needed);
-	if (_skb == NULL) {
-		dev_err(dev, "WLP: Cannot allocate memory for association "
-			"message.\n");
-		result = -ENOMEM;
-		goto error;
-	}
-	_d2 = (void *) _skb->data;
-	_d2->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-	_d2->hdr.type = WLP_FRAME_ASSOCIATION;
-	_d2->type = WLP_ASSOC_D2;
-
-	wlp_set_version(&_d2->version, WLP_VERSION);
-	wlp_set_msg_type(&_d2->msg_type, WLP_ASSOC_D2);
-	d2_itr = _d2->attr;
-	used = wlp_set_uuid_e(d2_itr, uuid_e);
-	used += wlp_set_uuid_r(d2_itr + used, &wlp->uuid);
-	if (wlp->wss.state >= WLP_WSS_STATE_ACTIVE)
-		used += wlp_set_wss_info(d2_itr + used, &wlp->wss);
-	used += wlp_set_dev_name(d2_itr + used, info->name,
-				 strlen(info->name));
-	used += wlp_set_manufacturer(d2_itr + used, info->manufacturer,
-				     strlen(info->manufacturer));
-	used += wlp_set_model_name(d2_itr + used, info->model_name,
-				   strlen(info->model_name));
-	used += wlp_set_model_nr(d2_itr + used, info->model_nr,
-				 strlen(info->model_nr));
-	used += wlp_set_serial(d2_itr + used, info->serial,
-			       strlen(info->serial));
-	used += wlp_set_prim_dev_type(d2_itr + used, &info->prim_dev_type);
-	used += wlp_set_wlp_assc_err(d2_itr + used, WLP_ASSOC_ERROR_NONE);
-	skb_put(_skb, sizeof(*_d2) + used);
-	*skb = _skb;
-error:
-	return result;
-}
-
-/**
- * Allocate memory for and populate fields of F0 association frame
- *
- * Currently (while focusing on unsecure enrollment) we ignore the
- * nonce's that could be placed in the message. Only the error field is
- * populated by the value provided by the caller.
- */
-static
-int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb,
-		       enum wlp_assc_error error)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = -ENOMEM;
-	struct {
-		struct wlp_frame_assoc f0_hdr;
-		struct wlp_attr_enonce enonce;
-		struct wlp_attr_rnonce rnonce;
-		struct wlp_attr_wlp_assc_err assc_err;
-	} *f0;
-	struct sk_buff *_skb;
-	struct wlp_nonce tmp;
-
-	_skb = dev_alloc_skb(sizeof(*f0));
-	if (_skb == NULL) {
-		dev_err(dev, "WLP: Unable to allocate memory for F0 "
-			"association frame. \n");
-		goto error_alloc;
-	}
-	f0 = (void *) _skb->data;
-	f0->f0_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-	f0->f0_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
-	f0->f0_hdr.type = WLP_ASSOC_F0;
-	wlp_set_version(&f0->f0_hdr.version, WLP_VERSION);
-	wlp_set_msg_type(&f0->f0_hdr.msg_type, WLP_ASSOC_F0);
-	memset(&tmp, 0, sizeof(tmp));
-	wlp_set_enonce(&f0->enonce, &tmp);
-	wlp_set_rnonce(&f0->rnonce, &tmp);
-	wlp_set_wlp_assc_err(&f0->assc_err, error);
-	skb_put(_skb, sizeof(*f0));
-	*skb = _skb;
-	result = 0;
-error_alloc:
-	return result;
-}
-
-/**
- * Parse F0 frame
- *
- * We just retrieve the values and print it as an error to the user.
- * Calling function already knows an error occured (F0 indicates error), so
- * we just parse the content as debug for higher layers.
- */
-int wlp_parse_f0(struct wlp *wlp, struct sk_buff *skb)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *f0 = (void *) skb->data;
-	void *ptr = skb->data;
-	size_t len = skb->len;
-	size_t used;
-	ssize_t result;
-	struct wlp_nonce enonce, rnonce;
-	enum wlp_assc_error assc_err;
-	char enonce_buf[WLP_WSS_NONCE_STRSIZE];
-	char rnonce_buf[WLP_WSS_NONCE_STRSIZE];
-
-	used = sizeof(*f0);
-	result = wlp_get_enonce(wlp, ptr + used, &enonce, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Enrollee nonce "
-			"attribute from F0 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_rnonce(wlp, ptr + used, &rnonce, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Registrar nonce "
-			"attribute from F0 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wlp_assc_err(wlp, ptr + used, &assc_err, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WLP Association error "
-			"attribute from F0 message.\n");
-		goto error_parse;
-	}
-	wlp_wss_nonce_print(enonce_buf, sizeof(enonce_buf), &enonce);
-	wlp_wss_nonce_print(rnonce_buf, sizeof(rnonce_buf), &rnonce);
-	dev_err(dev, "WLP: Received F0 error frame from neighbor. Enrollee "
-		"nonce: %s, Registrar nonce: %s, WLP Association error: %s.\n",
-		enonce_buf, rnonce_buf, wlp_assc_error_str(assc_err));
-	result = 0;
-error_parse:
-	return result;
-}
-
-/**
- * Retrieve variable device information from association message
- *
- * The device information parsed is not required in any message. This
- * routine will thus not fail if an attribute is not present.
- * The attributes are expected in a certain order, even if all are not
- * present. The "attribute type" value is used to ensure the attributes
- * are parsed in the correct order.
- *
- * If an error is encountered during parsing the function will return an
- * error code, when this happens the given device_info structure may be
- * partially filled.
- */
-static
-int wlp_get_variable_info(struct wlp *wlp, void *data,
-			  struct wlp_device_info *dev_info, ssize_t len)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	size_t used = 0;
-	struct wlp_attr_hdr *hdr;
-	ssize_t result = 0;
-	unsigned last = 0;
-
-	while (len - used > 0) {
-		if (len - used < sizeof(*hdr)) {
-			dev_err(dev, "WLP: Partial data in frame, cannot "
-				"parse. \n");
-			goto error_parse;
-		}
-		hdr = data + used;
-		switch (le16_to_cpu(hdr->type)) {
-		case WLP_ATTR_MANUF:
-			if (last >= WLP_ATTR_MANUF) {
-				dev_err(dev, "WLP: Incorrect order of "
-					"attribute values in D1 msg.\n");
-				goto error_parse;
-			}
-			result = wlp_get_manufacturer(wlp, data + used,
-						      dev_info->manufacturer,
-						      len - used);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to obtain "
-					"Manufacturer attribute from D1 "
-					"message.\n");
-				goto error_parse;
-			}
-			last = WLP_ATTR_MANUF;
-			used += result;
-			break;
-		case WLP_ATTR_MODEL_NAME:
-			if (last >= WLP_ATTR_MODEL_NAME) {
-				dev_err(dev, "WLP: Incorrect order of "
-					"attribute values in D1 msg.\n");
-				goto error_parse;
-			}
-			result = wlp_get_model_name(wlp, data + used,
-						    dev_info->model_name,
-						    len - used);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to obtain Model "
-					"name attribute from D1 message.\n");
-				goto error_parse;
-			}
-			last = WLP_ATTR_MODEL_NAME;
-			used += result;
-			break;
-		case WLP_ATTR_MODEL_NR:
-			if (last >= WLP_ATTR_MODEL_NR) {
-				dev_err(dev, "WLP: Incorrect order of "
-					"attribute values in D1 msg.\n");
-				goto error_parse;
-			}
-			result = wlp_get_model_nr(wlp, data + used,
-						  dev_info->model_nr,
-						  len - used);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to obtain Model "
-					"number attribute from D1 message.\n");
-				goto error_parse;
-			}
-			last = WLP_ATTR_MODEL_NR;
-			used += result;
-			break;
-		case WLP_ATTR_SERIAL:
-			if (last >= WLP_ATTR_SERIAL) {
-				dev_err(dev, "WLP: Incorrect order of "
-					"attribute values in D1 msg.\n");
-				goto error_parse;
-			}
-			result = wlp_get_serial(wlp, data + used,
-						dev_info->serial, len - used);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to obtain Serial "
-					"number attribute from D1 message.\n");
-				goto error_parse;
-			}
-			last = WLP_ATTR_SERIAL;
-			used += result;
-			break;
-		case WLP_ATTR_PRI_DEV_TYPE:
-			if (last >= WLP_ATTR_PRI_DEV_TYPE) {
-				dev_err(dev, "WLP: Incorrect order of "
-					"attribute values in D1 msg.\n");
-				goto error_parse;
-			}
-			result = wlp_get_prim_dev_type(wlp, data + used,
-						       &dev_info->prim_dev_type,
-						       len - used);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to obtain Primary "
-					"device type attribute from D1 "
-					"message.\n");
-				goto error_parse;
-			}
-			dev_info->prim_dev_type.category =
-				le16_to_cpu(dev_info->prim_dev_type.category);
-			dev_info->prim_dev_type.subID =
-				le16_to_cpu(dev_info->prim_dev_type.subID);
-			last = WLP_ATTR_PRI_DEV_TYPE;
-			used += result;
-			break;
-		default:
-			/* This is not variable device information. */
-			goto out;
-			break;
-		}
-	}
-out:
-	return used;
-error_parse:
-	return -EINVAL;
-}
-
-/**
- * Parse incoming D1 frame, populate attribute values
- *
- * Caller provides pointers to memory already allocated for attributes
- * expected in the D1 frame. These variables will be populated.
- */
-static
-int wlp_parse_d1_frame(struct wlp *wlp, struct sk_buff *skb,
-		       struct wlp_uuid *uuid_e,
-		       enum wlp_wss_sel_mthd *sel_mthd,
-		       struct wlp_device_info *dev_info,
-		       enum wlp_assc_error *assc_err)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *d1 = (void *) skb->data;
-	void *ptr = skb->data;
-	size_t len = skb->len;
-	size_t used;
-	ssize_t result;
-
-	used = sizeof(*d1);
-	result = wlp_get_uuid_e(wlp, ptr + used, uuid_e, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain UUID-E attribute from D1 "
-			"message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wss_sel_mthd(wlp, ptr + used, sel_mthd, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS selection method "
-			"from D1 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_dev_name(wlp, ptr + used, dev_info->name,
-				     len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Name from D1 "
-			"message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_variable_info(wlp, ptr + used, dev_info, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Information from "
-			"D1 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wlp_assc_err(wlp, ptr + used, assc_err, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WLP Association Error "
-			"Information from D1 message.\n");
-		goto error_parse;
-	}
-	result = 0;
-error_parse:
-	return result;
-}
-/**
- * Handle incoming D1 frame
- *
- * The frame has already been verified to contain an Association header with
- * the correct version number. Parse the incoming frame, construct and send
- * a D2 frame in response.
- *
- * It is not clear what to do with most fields in the incoming D1 frame. We
- * retrieve and discard the information here for now.
- */
-void wlp_handle_d1_frame(struct work_struct *ws)
-{
-	struct wlp_assoc_frame_ctx *frame_ctx = container_of(ws,
-						  struct wlp_assoc_frame_ctx,
-						  ws);
-	struct wlp *wlp = frame_ctx->wlp;
-	struct wlp_wss *wss = &wlp->wss;
-	struct sk_buff *skb = frame_ctx->skb;
-	struct uwb_dev_addr *src = &frame_ctx->src;
-	int result;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_uuid uuid_e;
-	enum wlp_wss_sel_mthd sel_mthd = 0;
-	struct wlp_device_info dev_info;
-	enum wlp_assc_error assc_err;
-	struct sk_buff *resp = NULL;
-
-	/* Parse D1 frame */
-	mutex_lock(&wss->mutex);
-	mutex_lock(&wlp->mutex); /* to access wlp->uuid */
-	memset(&dev_info, 0, sizeof(dev_info));
-	result = wlp_parse_d1_frame(wlp, skb, &uuid_e, &sel_mthd, &dev_info,
-				    &assc_err);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to parse incoming D1 frame.\n");
-		kfree_skb(skb);
-		goto out;
-	}
-
-	kfree_skb(skb);
-	if (!wlp_uuid_is_set(&wlp->uuid)) {
-		dev_err(dev, "WLP: UUID is not set. Set via sysfs to "
-			"proceed. Respong to D1 message with error F0.\n");
-		result = wlp_build_assoc_f0(wlp, &resp,
-					    WLP_ASSOC_ERROR_NOT_READY);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to construct F0 message.\n");
-			goto out;
-		}
-	} else {
-		/* Construct D2 frame */
-		result = wlp_build_assoc_d2(wlp, wss, &resp, &uuid_e);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to construct D2 message.\n");
-			goto out;
-		}
-	}
-	/* Send D2 frame */
-	BUG_ON(wlp->xmit_frame == NULL);
-	result = wlp->xmit_frame(wlp, resp, src);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to transmit D2 association "
-			"message: %d\n", result);
-		if (result == -ENXIO)
-			dev_err(dev, "WLP: Is network interface up? \n");
-		/* We could try again ... */
-		dev_kfree_skb_any(resp); /* we need to free if tx fails */
-	}
-out:
-	kfree(frame_ctx);
-	mutex_unlock(&wlp->mutex);
-	mutex_unlock(&wss->mutex);
-}
-
-/**
- * Parse incoming D2 frame, create and populate temporary cache
- *
- * @skb: socket buffer in which D2 frame can be found
- * @neighbor: the neighbor that sent the D2 frame
- *
- * Will allocate memory for temporary storage of information learned during
- * discovery.
- */
-int wlp_parse_d2_frame_to_cache(struct wlp *wlp, struct sk_buff *skb,
-				struct wlp_neighbor_e *neighbor)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *d2 = (void *) skb->data;
-	void *ptr = skb->data;
-	size_t len = skb->len;
-	size_t used;
-	ssize_t result;
-	struct wlp_uuid uuid_e;
-	struct wlp_device_info *nb_info;
-	enum wlp_assc_error assc_err;
-
-	used = sizeof(*d2);
-	result = wlp_get_uuid_e(wlp, ptr + used, &uuid_e, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain UUID-E attribute from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	if (memcmp(&uuid_e, &wlp->uuid, sizeof(uuid_e))) {
-		dev_err(dev, "WLP: UUID-E in incoming D2 does not match "
-			"local UUID sent in D1. \n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_uuid_r(wlp, ptr + used, &neighbor->uuid, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain UUID-R attribute from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wss_info_to_cache(wlp, ptr + used, neighbor,
-					   len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS information "
-			"from D2 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	neighbor->info = kzalloc(sizeof(struct wlp_device_info), GFP_KERNEL);
-	if (neighbor->info == NULL) {
-		dev_err(dev, "WLP: cannot allocate memory to store device "
-			"info.\n");
-		result = -ENOMEM;
-		goto error_parse;
-	}
-	nb_info = neighbor->info;
-	result = wlp_get_dev_name(wlp, ptr + used, nb_info->name,
-				  len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Name from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_variable_info(wlp, ptr + used, nb_info, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Information from "
-			"D2 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wlp_assc_err(wlp, ptr + used, &assc_err, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WLP Association Error "
-			"Information from D2 message.\n");
-		goto error_parse;
-	}
-	if (assc_err != WLP_ASSOC_ERROR_NONE) {
-		dev_err(dev, "WLP: neighbor device returned association "
-			"error %d\n", assc_err);
-		result = -EINVAL;
-		goto error_parse;
-	}
-	result = 0;
-error_parse:
-	if (result < 0)
-		wlp_remove_neighbor_tmp_info(neighbor);
-	return result;
-}
-
-/**
- * Parse incoming D2 frame, populate attribute values of WSS bein enrolled in
- *
- * @wss: our WSS that will be enrolled
- * @skb: socket buffer in which D2 frame can be found
- * @neighbor: the neighbor that sent the D2 frame
- * @wssid: the wssid of the WSS in which we want to enroll
- *
- * Forms part of enrollment sequence. We are trying to enroll in WSS with
- * @wssid by using @neighbor as registrar. A D1 message was sent to
- * @neighbor and now we need to parse the D2 response. The neighbor's
- * response is searched for the requested WSS and if found (and it accepts
- * enrollment), we store the information.
- */
-int wlp_parse_d2_frame_to_enroll(struct wlp_wss *wss, struct sk_buff *skb,
-				 struct wlp_neighbor_e *neighbor,
-				 struct wlp_uuid *wssid)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	void *ptr = skb->data;
-	size_t len = skb->len;
-	size_t used;
-	ssize_t result;
-	struct wlp_uuid uuid_e;
-	struct wlp_uuid uuid_r;
-	struct wlp_device_info nb_info;
-	enum wlp_assc_error assc_err;
-	char uuid_bufA[WLP_WSS_UUID_STRSIZE];
-	char uuid_bufB[WLP_WSS_UUID_STRSIZE];
-
-	used = sizeof(struct wlp_frame_assoc);
-	result = wlp_get_uuid_e(wlp, ptr + used, &uuid_e, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain UUID-E attribute from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	if (memcmp(&uuid_e, &wlp->uuid, sizeof(uuid_e))) {
-		dev_err(dev, "WLP: UUID-E in incoming D2 does not match "
-			"local UUID sent in D1. \n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_uuid_r(wlp, ptr + used, &uuid_r, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain UUID-R attribute from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	if (memcmp(&uuid_r, &neighbor->uuid, sizeof(uuid_r))) {
-		wlp_wss_uuid_print(uuid_bufA, sizeof(uuid_bufA),
-				   &neighbor->uuid);
-		wlp_wss_uuid_print(uuid_bufB, sizeof(uuid_bufB), &uuid_r);
-		dev_err(dev, "WLP: UUID of neighbor does not match UUID "
-			"learned during discovery. Originally discovered: %s, "
-			"now from D2 message: %s\n", uuid_bufA, uuid_bufB);
-		result = -EINVAL;
-		goto error_parse;
-	}
-	used += result;
-	wss->wssid = *wssid;
-	result = wlp_get_wss_info_to_enroll(wlp, ptr + used, wss, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS information "
-			"from D2 message.\n");
-		goto error_parse;
-	}
-	if (wss->state != WLP_WSS_STATE_PART_ENROLLED) {
-		dev_err(dev, "WLP: D2 message did not contain information "
-			"for successful enrollment. \n");
-		result = -EINVAL;
-		goto error_parse;
-	}
-	used += result;
-	/* Place device information on stack to continue parsing of message */
-	result = wlp_get_dev_name(wlp, ptr + used, nb_info.name,
-				  len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Name from D2 "
-			"message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_variable_info(wlp, ptr + used, &nb_info, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain Device Information from "
-			"D2 message.\n");
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wlp_assc_err(wlp, ptr + used, &assc_err, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WLP Association Error "
-			"Information from D2 message.\n");
-		goto error_parse;
-	}
-	if (assc_err != WLP_ASSOC_ERROR_NONE) {
-		dev_err(dev, "WLP: neighbor device returned association "
-			"error %d\n", assc_err);
-		if (wss->state == WLP_WSS_STATE_PART_ENROLLED) {
-			dev_err(dev, "WLP: Enrolled in WSS (should not "
-				"happen according to spec). Undoing. \n");
-			wlp_wss_reset(wss);
-		}
-		result = -EINVAL;
-		goto error_parse;
-	}
-	result = 0;
-error_parse:
-	return result;
-}
-
-/**
- * Parse C3/C4 frame into provided variables
- *
- * @wssid: will point to copy of wssid retrieved from C3/C4 frame
- * @tag:   will point to copy of tag retrieved from C3/C4 frame
- * @virt_addr: will point to copy of virtual address retrieved from C3/C4
- * frame.
- *
- * Calling function has to allocate memory for these values.
- *
- * skb contains a valid C3/C4 frame, return the individual fields of this
- * frame in the provided variables.
- */
-int wlp_parse_c3c4_frame(struct wlp *wlp, struct sk_buff *skb,
-		       struct wlp_uuid *wssid, u8 *tag,
-		       struct uwb_mac_addr *virt_addr)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result;
-	void *ptr = skb->data;
-	size_t len = skb->len;
-	size_t used;
-	struct wlp_frame_assoc *assoc = ptr;
-
-	used = sizeof(*assoc);
-	result = wlp_get_wssid(wlp, ptr + used, wssid, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSSID attribute from "
-			"%s message.\n", wlp_assoc_frame_str(assoc->type));
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wss_tag(wlp, ptr + used, tag, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS tag attribute from "
-			"%s message.\n", wlp_assoc_frame_str(assoc->type));
-		goto error_parse;
-	}
-	used += result;
-	result = wlp_get_wss_virt(wlp, ptr + used, virt_addr, len - used);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSS virtual address "
-			"attribute from %s message.\n",
-			wlp_assoc_frame_str(assoc->type));
-		goto error_parse;
-	}
-error_parse:
-	return result;
-}
-
-/**
- * Allocate memory for and populate fields of C1 or C2 association frame
- *
- * The C1 and C2 association frames appear identical - except for the type.
- */
-static
-int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss,
-			 struct sk_buff **skb, enum wlp_assoc_type type)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result  = -ENOMEM;
-	struct {
-		struct wlp_frame_assoc c_hdr;
-		struct wlp_attr_wssid wssid;
-	} *c;
-	struct sk_buff *_skb;
-
-	_skb = dev_alloc_skb(sizeof(*c));
-	if (_skb == NULL) {
-		dev_err(dev, "WLP: Unable to allocate memory for C1/C2 "
-			"association frame. \n");
-		goto error_alloc;
-	}
-	c = (void *) _skb->data;
-	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
-	c->c_hdr.type = type;
-	wlp_set_version(&c->c_hdr.version, WLP_VERSION);
-	wlp_set_msg_type(&c->c_hdr.msg_type, type);
-	wlp_set_wssid(&c->wssid, &wss->wssid);
-	skb_put(_skb, sizeof(*c));
-	*skb = _skb;
-	result = 0;
-error_alloc:
-	return result;
-}
-
-
-static
-int wlp_build_assoc_c1(struct wlp *wlp, struct wlp_wss *wss,
-		       struct sk_buff **skb)
-{
-	return wlp_build_assoc_c1c2(wlp, wss, skb, WLP_ASSOC_C1);
-}
-
-static
-int wlp_build_assoc_c2(struct wlp *wlp, struct wlp_wss *wss,
-		       struct sk_buff **skb)
-{
-	return wlp_build_assoc_c1c2(wlp, wss, skb, WLP_ASSOC_C2);
-}
-
-
-/**
- * Allocate memory for and populate fields of C3 or C4 association frame
- *
- * The C3 and C4 association frames appear identical - except for the type.
- */
-static
-int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss,
-			 struct sk_buff **skb, enum wlp_assoc_type type)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result  = -ENOMEM;
-	struct {
-		struct wlp_frame_assoc c_hdr;
-		struct wlp_attr_wssid wssid;
-		struct wlp_attr_wss_tag wss_tag;
-		struct wlp_attr_wss_virt wss_virt;
-	} *c;
-	struct sk_buff *_skb;
-
-	_skb = dev_alloc_skb(sizeof(*c));
-	if (_skb == NULL) {
-		dev_err(dev, "WLP: Unable to allocate memory for C3/C4 "
-			"association frame. \n");
-		goto error_alloc;
-	}
-	c = (void *) _skb->data;
-	c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-	c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION;
-	c->c_hdr.type = type;
-	wlp_set_version(&c->c_hdr.version, WLP_VERSION);
-	wlp_set_msg_type(&c->c_hdr.msg_type, type);
-	wlp_set_wssid(&c->wssid, &wss->wssid);
-	wlp_set_wss_tag(&c->wss_tag, wss->tag);
-	wlp_set_wss_virt(&c->wss_virt, &wss->virtual_addr);
-	skb_put(_skb, sizeof(*c));
-	*skb = _skb;
-	result = 0;
-error_alloc:
-	return result;
-}
-
-static
-int wlp_build_assoc_c3(struct wlp *wlp, struct wlp_wss *wss,
-		       struct sk_buff **skb)
-{
-	return wlp_build_assoc_c3c4(wlp, wss, skb, WLP_ASSOC_C3);
-}
-
-static
-int wlp_build_assoc_c4(struct wlp *wlp, struct wlp_wss *wss,
-		       struct sk_buff **skb)
-{
-	return wlp_build_assoc_c3c4(wlp, wss, skb, WLP_ASSOC_C4);
-}
-
-
-#define wlp_send_assoc(type, id)					\
-static int wlp_send_assoc_##type(struct wlp *wlp, struct wlp_wss *wss,	\
-				 struct uwb_dev_addr *dev_addr)		\
-{									\
-	struct device *dev = &wlp->rc->uwb_dev.dev;			\
-	int result;							\
-	struct sk_buff *skb = NULL;					\
-									\
-	/* Build the frame */						\
-	result = wlp_build_assoc_##type(wlp, wss, &skb);		\
-	if (result < 0) {						\
-		dev_err(dev, "WLP: Unable to construct %s association "	\
-			"frame: %d\n", wlp_assoc_frame_str(id), result);\
-		goto error_build_assoc;					\
-	}								\
-	/* Send the frame */						\
-	BUG_ON(wlp->xmit_frame == NULL);				\
-	result = wlp->xmit_frame(wlp, skb, dev_addr);			\
-	if (result < 0) {						\
-		dev_err(dev, "WLP: Unable to transmit %s association "	\
-			"message: %d\n", wlp_assoc_frame_str(id),	\
-			result);					\
-		if (result == -ENXIO)					\
-			dev_err(dev, "WLP: Is network interface "	\
-				"up? \n");				\
-		goto error_xmit;					\
-	}								\
-	return 0;							\
-error_xmit:								\
-	/* We could try again ... */					\
-	dev_kfree_skb_any(skb);/*we need to free if tx fails*/		\
-error_build_assoc:							\
-	return result;							\
-}
-
-wlp_send_assoc(d1, WLP_ASSOC_D1)
-wlp_send_assoc(c1, WLP_ASSOC_C1)
-wlp_send_assoc(c3, WLP_ASSOC_C3)
-
-int wlp_send_assoc_frame(struct wlp *wlp, struct wlp_wss *wss,
-			 struct uwb_dev_addr *dev_addr,
-			 enum wlp_assoc_type type)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	switch (type) {
-	case WLP_ASSOC_D1:
-		result = wlp_send_assoc_d1(wlp, wss, dev_addr);
-		break;
-	case WLP_ASSOC_C1:
-		result = wlp_send_assoc_c1(wlp, wss, dev_addr);
-		break;
-	case WLP_ASSOC_C3:
-		result = wlp_send_assoc_c3(wlp, wss, dev_addr);
-		break;
-	default:
-		dev_err(dev, "WLP: Received request to send unknown "
-			"association message.\n");
-		result = -EINVAL;
-		break;
-	}
-	return result;
-}
-
-/**
- * Handle incoming C1 frame
- *
- * The frame has already been verified to contain an Association header with
- * the correct version number. Parse the incoming frame, construct and send
- * a C2 frame in response.
- */
-void wlp_handle_c1_frame(struct work_struct *ws)
-{
-	struct wlp_assoc_frame_ctx *frame_ctx = container_of(ws,
-						  struct wlp_assoc_frame_ctx,
-						  ws);
-	struct wlp *wlp = frame_ctx->wlp;
-	struct wlp_wss *wss = &wlp->wss;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *c1 = (void *) frame_ctx->skb->data;
-	unsigned int len = frame_ctx->skb->len;
-	struct uwb_dev_addr *src = &frame_ctx->src;
-	int result;
-	struct wlp_uuid wssid;
-	struct sk_buff *resp = NULL;
-
-	/* Parse C1 frame */
-	mutex_lock(&wss->mutex);
-	result = wlp_get_wssid(wlp, (void *)c1 + sizeof(*c1), &wssid,
-			       len - sizeof(*c1));
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSSID from C1 frame.\n");
-		goto out;
-	}
-	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
-	    && wss->state == WLP_WSS_STATE_ACTIVE) {
-		/* Construct C2 frame */
-		result = wlp_build_assoc_c2(wlp, wss, &resp);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to construct C2 message.\n");
-			goto out;
-		}
-	} else {
-		/* Construct F0 frame */
-		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to construct F0 message.\n");
-			goto out;
-		}
-	}
-	/* Send C2 frame */
-	BUG_ON(wlp->xmit_frame == NULL);
-	result = wlp->xmit_frame(wlp, resp, src);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to transmit response association "
-			"message: %d\n", result);
-		if (result == -ENXIO)
-			dev_err(dev, "WLP: Is network interface up? \n");
-		/* We could try again ... */
-		dev_kfree_skb_any(resp); /* we need to free if tx fails */
-	}
-out:
-	kfree_skb(frame_ctx->skb);
-	kfree(frame_ctx);
-	mutex_unlock(&wss->mutex);
-}
-
-/**
- * Handle incoming C3 frame
- *
- * The frame has already been verified to contain an Association header with
- * the correct version number. Parse the incoming frame, construct and send
- * a C4 frame in response. If the C3 frame identifies a WSS that is locally
- * active then we connect to this neighbor (add it to our EDA cache).
- */
-void wlp_handle_c3_frame(struct work_struct *ws)
-{
-	struct wlp_assoc_frame_ctx *frame_ctx = container_of(ws,
-						  struct wlp_assoc_frame_ctx,
-						  ws);
-	struct wlp *wlp = frame_ctx->wlp;
-	struct wlp_wss *wss = &wlp->wss;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct sk_buff *skb = frame_ctx->skb;
-	struct uwb_dev_addr *src = &frame_ctx->src;
-	int result;
-	struct sk_buff *resp = NULL;
-	struct wlp_uuid wssid;
-	u8 tag;
-	struct uwb_mac_addr virt_addr;
-
-	/* Parse C3 frame */
-	mutex_lock(&wss->mutex);
-	result = wlp_parse_c3c4_frame(wlp, skb, &wssid, &tag, &virt_addr);
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain values from C3 frame.\n");
-		goto out;
-	}
-	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))
-	    && wss->state >= WLP_WSS_STATE_ACTIVE) {
-		result = wlp_eda_update_node(&wlp->eda, src, wss,
-					     (void *) virt_addr.data, tag,
-					     WLP_WSS_CONNECTED);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to update EDA cache "
-				"with new connected neighbor information.\n");
-			result = wlp_build_assoc_f0(wlp, &resp,
-						    WLP_ASSOC_ERROR_INT);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to construct F0 "
-					"message.\n");
-				goto out;
-			}
-		} else {
-			wss->state = WLP_WSS_STATE_CONNECTED;
-			/* Construct C4 frame */
-			result = wlp_build_assoc_c4(wlp, wss, &resp);
-			if (result < 0) {
-				dev_err(dev, "WLP: Unable to construct C4 "
-					"message.\n");
-				goto out;
-			}
-		}
-	} else {
-		/* Construct F0 frame */
-		result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to construct F0 message.\n");
-			goto out;
-		}
-	}
-	/* Send C4 frame */
-	BUG_ON(wlp->xmit_frame == NULL);
-	result = wlp->xmit_frame(wlp, resp, src);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to transmit response association "
-			"message: %d\n", result);
-		if (result == -ENXIO)
-			dev_err(dev, "WLP: Is network interface up? \n");
-		/* We could try again ... */
-		dev_kfree_skb_any(resp); /* we need to free if tx fails */
-	}
-out:
-	kfree_skb(frame_ctx->skb);
-	kfree(frame_ctx);
-	mutex_unlock(&wss->mutex);
-}
-
-
diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c
deleted file mode 100644
index 6627c94cc854..000000000000
--- a/drivers/uwb/wlp/sysfs.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- * sysfs functions
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: Docs
- *
- */
-#include <linux/wlp.h>
-
-#include "wlp-internal.h"
-
-static
-size_t wlp_wss_wssid_e_print(char *buf, size_t bufsize,
-			     struct wlp_wssid_e *wssid_e)
-{
-	size_t used = 0;
-	used += scnprintf(buf, bufsize, " WSS: ");
-	used += wlp_wss_uuid_print(buf + used, bufsize - used,
-				   &wssid_e->wssid);
-
-	if (wssid_e->info != NULL) {
-		used += scnprintf(buf + used, bufsize - used, " ");
-		used += uwb_mac_addr_print(buf + used, bufsize - used,
-					   &wssid_e->info->bcast);
-		used += scnprintf(buf + used, bufsize - used, " %u %u %s\n",
-				  wssid_e->info->accept_enroll,
-				  wssid_e->info->sec_status,
-				  wssid_e->info->name);
-	}
-	return used;
-}
-
-/**
- * Print out information learned from neighbor discovery
- *
- * Some fields being printed may not be included in the device discovery
- * information (it is not mandatory). We are thus careful how the
- * information is printed to ensure it is clear to the user what field is
- * being referenced.
- * The information being printed is for one time use - temporary storage is
- * cleaned after it is printed.
- *
- * Ideally sysfs output should be on one line. The information printed here
- * contain a few strings so it will be hard to parse if they are all
- * printed on the same line - without agreeing on a standard field
- * separator.
- */
-static
-ssize_t wlp_wss_neighborhood_print_remove(struct wlp *wlp, char *buf,
-				   size_t bufsize)
-{
-	size_t used = 0;
-	struct wlp_neighbor_e *neighb;
-	struct wlp_wssid_e *wssid_e;
-
-	mutex_lock(&wlp->nbmutex);
-	used = scnprintf(buf, bufsize, "#Neighbor information\n"
-			 "#uuid dev_addr\n"
-			 "# Device Name:\n# Model Name:\n# Manufacturer:\n"
-			 "# Model Nr:\n# Serial:\n"
-			 "# Pri Dev type: CategoryID OUI OUISubdiv "
-			 "SubcategoryID\n"
-			 "# WSS: WSSID WSS_name accept_enroll sec_status "
-			 "bcast\n"
-			 "# WSS: WSSID WSS_name accept_enroll sec_status "
-			 "bcast\n\n");
-	list_for_each_entry(neighb, &wlp->neighbors, node) {
-		if (bufsize - used <= 0)
-			goto out;
-		used += wlp_wss_uuid_print(buf + used, bufsize - used,
-					   &neighb->uuid);
-		buf[used++] = ' ';
-		used += uwb_dev_addr_print(buf + used, bufsize - used,
-					   &neighb->uwb_dev->dev_addr);
-		if (neighb->info != NULL)
-			used += scnprintf(buf + used, bufsize - used,
-					  "\n Device Name: %s\n"
-					  " Model Name: %s\n"
-					  " Manufacturer:%s \n"
-					  " Model Nr: %s\n"
-					  " Serial: %s\n"
-					  " Pri Dev type: "
-					  "%u %02x:%02x:%02x %u %u\n",
-					  neighb->info->name,
-					  neighb->info->model_name,
-					  neighb->info->manufacturer,
-					  neighb->info->model_nr,
-					  neighb->info->serial,
-					  neighb->info->prim_dev_type.category,
-					  neighb->info->prim_dev_type.OUI[0],
-					  neighb->info->prim_dev_type.OUI[1],
-					  neighb->info->prim_dev_type.OUI[2],
-					  neighb->info->prim_dev_type.OUIsubdiv,
-					  neighb->info->prim_dev_type.subID);
-		list_for_each_entry(wssid_e, &neighb->wssid, node) {
-			used += wlp_wss_wssid_e_print(buf + used,
-						      bufsize - used,
-						      wssid_e);
-		}
-		buf[used++] = '\n';
-		wlp_remove_neighbor_tmp_info(neighb);
-	}
-
-
-out:
-	mutex_unlock(&wlp->nbmutex);
-	return used;
-}
-
-
-/**
- * Show properties of all WSS in neighborhood.
- *
- * Will trigger a complete discovery of WSS activated by this device and
- * its neighbors.
- */
-ssize_t wlp_neighborhood_show(struct wlp *wlp, char *buf)
-{
-	wlp_discover(wlp);
-	return wlp_wss_neighborhood_print_remove(wlp, buf, PAGE_SIZE);
-}
-EXPORT_SYMBOL_GPL(wlp_neighborhood_show);
-
-static
-ssize_t __wlp_wss_properties_show(struct wlp_wss *wss, char *buf,
-				  size_t bufsize)
-{
-	ssize_t result;
-
-	result = wlp_wss_uuid_print(buf, bufsize, &wss->wssid);
-	result += scnprintf(buf + result, bufsize - result, " ");
-	result += uwb_mac_addr_print(buf + result, bufsize - result,
-				     &wss->bcast);
-	result += scnprintf(buf + result, bufsize - result,
-			    " 0x%02x %u ", wss->hash, wss->secure_status);
-	result += wlp_wss_key_print(buf + result, bufsize - result,
-				    wss->master_key);
-	result += scnprintf(buf + result, bufsize - result, " 0x%02x ",
-			    wss->tag);
-	result += uwb_mac_addr_print(buf + result, bufsize - result,
-				     &wss->virtual_addr);
-	result += scnprintf(buf + result, bufsize - result, " %s", wss->name);
-	result += scnprintf(buf + result, bufsize - result,
-			    "\n\n#WSSID\n#WSS broadcast address\n"
-			    "#WSS hash\n#WSS secure status\n"
-			    "#WSS master key\n#WSS local tag\n"
-			    "#WSS local virtual EUI-48\n#WSS name\n");
-	return result;
-}
-
-/**
- * Show which WSS is activated.
- */
-ssize_t wlp_wss_activate_show(struct wlp_wss *wss, char *buf)
-{
-	int result = 0;
-
-	if (mutex_lock_interruptible(&wss->mutex))
-		goto out;
-	if (wss->state >= WLP_WSS_STATE_ACTIVE)
-		result = __wlp_wss_properties_show(wss, buf, PAGE_SIZE);
-	else
-		result = scnprintf(buf, PAGE_SIZE, "No local WSS active.\n");
-	result += scnprintf(buf + result, PAGE_SIZE - result,
-			"\n\n"
-			"# echo WSSID SECURE_STATUS ACCEPT_ENROLLMENT "
-			"NAME #create new WSS\n"
-			"# echo WSSID [DEV ADDR] #enroll in and activate "
-			"existing WSS, can request registrar\n"
-			"#\n"
-			"# WSSID is a 16 byte hex array. Eg. 12 A3 3B ... \n"
-			"# SECURE_STATUS 0 - unsecure, 1 - secure (default)\n"
-			"# ACCEPT_ENROLLMENT 0 - no, 1 - yes (default)\n"
-			"# NAME is the text string identifying the WSS\n"
-			"# DEV ADDR is the device address of neighbor "
-			"that should be registrar. Eg. 32:AB\n");
-
-	mutex_unlock(&wss->mutex);
-out:
-	return result;
-
-}
-EXPORT_SYMBOL_GPL(wlp_wss_activate_show);
-
-/**
- * Create/activate a new WSS or enroll/activate in neighboring WSS
- *
- * The user can provide the WSSID of a WSS in which it wants to enroll.
- * Only the WSSID is necessary if the WSS have been discovered before. If
- * the WSS has not been discovered before, or the user wants to use a
- * particular neighbor as its registrar, then the user can also provide a
- * device address or the neighbor that will be used as registrar.
- *
- * A new WSS is created when the user provides a WSSID, secure status, and
- * WSS name.
- */
-ssize_t wlp_wss_activate_store(struct wlp_wss *wss,
-			       const char *buf, size_t size)
-{
-	ssize_t result = -EINVAL;
-	struct wlp_uuid wssid;
-	struct uwb_dev_addr dev;
-	struct uwb_dev_addr bcast = {.data = {0xff, 0xff} };
-	char name[65];
-	unsigned sec_status, accept;
-	memset(name, 0, sizeof(name));
-	result = sscanf(buf, "%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx:%02hhx",
-			&wssid.data[0] , &wssid.data[1],
-			&wssid.data[2] , &wssid.data[3],
-			&wssid.data[4] , &wssid.data[5],
-			&wssid.data[6] , &wssid.data[7],
-			&wssid.data[8] , &wssid.data[9],
-			&wssid.data[10], &wssid.data[11],
-			&wssid.data[12], &wssid.data[13],
-			&wssid.data[14], &wssid.data[15],
-			&dev.data[1], &dev.data[0]);
-	if (result == 16 || result == 17) {
-		result = sscanf(buf, "%02hhx %02hhx %02hhx %02hhx "
-				"%02hhx %02hhx %02hhx %02hhx "
-				"%02hhx %02hhx %02hhx %02hhx "
-				"%02hhx %02hhx %02hhx %02hhx "
-				"%u %u %64c",
-				&wssid.data[0] , &wssid.data[1],
-				&wssid.data[2] , &wssid.data[3],
-				&wssid.data[4] , &wssid.data[5],
-				&wssid.data[6] , &wssid.data[7],
-				&wssid.data[8] , &wssid.data[9],
-				&wssid.data[10], &wssid.data[11],
-				&wssid.data[12], &wssid.data[13],
-				&wssid.data[14], &wssid.data[15],
-				&sec_status, &accept, name);
-		if (result == 16)
-			result = wlp_wss_enroll_activate(wss, &wssid, &bcast);
-		else if (result == 19) {
-			sec_status = sec_status == 0 ? 0 : 1;
-			accept = accept == 0 ? 0 : 1;
-			/* We read name using %c, so the newline needs to be
-			 * removed */
-			if (strlen(name) != sizeof(name) - 1)
-				name[strlen(name) - 1] = '\0';
-			result = wlp_wss_create_activate(wss, &wssid, name,
-							 sec_status, accept);
-		} else
-			result = -EINVAL;
-	} else if (result == 18)
-		result = wlp_wss_enroll_activate(wss, &wssid, &dev);
-	else
-		result = -EINVAL;
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_wss_activate_store);
-
-/**
- * Show the UUID of this host
- */
-ssize_t wlp_uuid_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-
-	mutex_lock(&wlp->mutex);
-	result = wlp_wss_uuid_print(buf, PAGE_SIZE, &wlp->uuid);
-	buf[result++] = '\n';
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_uuid_show);
-
-/**
- * Store a new UUID for this host
- *
- * According to the spec this should be encoded as an octet string in the
- * order the octets are shown in string representation in RFC 4122 (WLP
- * 0.99 [Table 6])
- *
- * We do not check value provided by user.
- */
-ssize_t wlp_uuid_store(struct wlp *wlp, const char *buf, size_t size)
-{
-	ssize_t result;
-	struct wlp_uuid uuid;
-
-	mutex_lock(&wlp->mutex);
-	result = sscanf(buf, "%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx ",
-			&uuid.data[0] , &uuid.data[1],
-			&uuid.data[2] , &uuid.data[3],
-			&uuid.data[4] , &uuid.data[5],
-			&uuid.data[6] , &uuid.data[7],
-			&uuid.data[8] , &uuid.data[9],
-			&uuid.data[10], &uuid.data[11],
-			&uuid.data[12], &uuid.data[13],
-			&uuid.data[14], &uuid.data[15]);
-	if (result != 16) {
-		result = -EINVAL;
-		goto error;
-	}
-	wlp->uuid = uuid;
-error:
-	mutex_unlock(&wlp->mutex);
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_uuid_store);
-
-/**
- * Show contents of members of device information structure
- */
-#define wlp_dev_info_show(type)						\
-ssize_t wlp_dev_##type##_show(struct wlp *wlp, char *buf)		\
-{									\
-	ssize_t result = 0;						\
-	mutex_lock(&wlp->mutex);					\
-	if (wlp->dev_info == NULL) {					\
-		result = __wlp_setup_device_info(wlp);			\
-		if (result < 0)						\
-			goto out;					\
-	}								\
-	result = scnprintf(buf, PAGE_SIZE, "%s\n", wlp->dev_info->type);\
-out:									\
-	mutex_unlock(&wlp->mutex);					\
-	return result;							\
-}									\
-EXPORT_SYMBOL_GPL(wlp_dev_##type##_show);
-
-wlp_dev_info_show(name)
-wlp_dev_info_show(model_name)
-wlp_dev_info_show(model_nr)
-wlp_dev_info_show(manufacturer)
-wlp_dev_info_show(serial)
-
-/**
- * Store contents of members of device information structure
- */
-#define wlp_dev_info_store(type, len)					\
-ssize_t wlp_dev_##type##_store(struct wlp *wlp, const char *buf, size_t size)\
-{									\
-	ssize_t result;							\
-	char format[10];						\
-	mutex_lock(&wlp->mutex);					\
-	if (wlp->dev_info == NULL) {					\
-		result = __wlp_alloc_device_info(wlp);			\
-		if (result < 0)						\
-			goto out;					\
-	}								\
-	memset(wlp->dev_info->type, 0, sizeof(wlp->dev_info->type));	\
-	sprintf(format, "%%%uc", len);					\
-	result = sscanf(buf, format, wlp->dev_info->type);		\
-out:									\
-	mutex_unlock(&wlp->mutex);					\
-	return result < 0 ? result : size;				\
-}									\
-EXPORT_SYMBOL_GPL(wlp_dev_##type##_store);
-
-wlp_dev_info_store(name, 32)
-wlp_dev_info_store(manufacturer, 64)
-wlp_dev_info_store(model_name, 32)
-wlp_dev_info_store(model_nr, 32)
-wlp_dev_info_store(serial, 32)
-
-static
-const char *__wlp_dev_category[] = {
-	[WLP_DEV_CAT_COMPUTER] = "Computer",
-	[WLP_DEV_CAT_INPUT] = "Input device",
-	[WLP_DEV_CAT_PRINT_SCAN_FAX_COPIER] = "Printer, scanner, FAX, or "
-					      "Copier",
-	[WLP_DEV_CAT_CAMERA] = "Camera",
-	[WLP_DEV_CAT_STORAGE] = "Storage Network",
-	[WLP_DEV_CAT_INFRASTRUCTURE] = "Infrastructure",
-	[WLP_DEV_CAT_DISPLAY] = "Display",
-	[WLP_DEV_CAT_MULTIM] = "Multimedia device",
-	[WLP_DEV_CAT_GAMING] = "Gaming device",
-	[WLP_DEV_CAT_TELEPHONE] = "Telephone",
-	[WLP_DEV_CAT_OTHER] = "Other",
-};
-
-static
-const char *wlp_dev_category_str(unsigned cat)
-{
-	if ((cat >= WLP_DEV_CAT_COMPUTER && cat <= WLP_DEV_CAT_TELEPHONE)
-	    || cat == WLP_DEV_CAT_OTHER)
-		return __wlp_dev_category[cat];
-	return "unknown category";
-}
-
-ssize_t wlp_dev_prim_category_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = scnprintf(buf, PAGE_SIZE, "%s\n",
-		  wlp_dev_category_str(wlp->dev_info->prim_dev_type.category));
-out:
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_category_show);
-
-ssize_t wlp_dev_prim_category_store(struct wlp *wlp, const char *buf,
-				    size_t size)
-{
-	ssize_t result;
-	u16 cat;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_alloc_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = sscanf(buf, "%hu", &cat);
-	if ((cat >= WLP_DEV_CAT_COMPUTER && cat <= WLP_DEV_CAT_TELEPHONE)
-	    || cat == WLP_DEV_CAT_OTHER)
-		wlp->dev_info->prim_dev_type.category = cat;
-	else
-		result = -EINVAL;
-out:
-	mutex_unlock(&wlp->mutex);
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_category_store);
-
-ssize_t wlp_dev_prim_OUI_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = scnprintf(buf, PAGE_SIZE, "%02x:%02x:%02x\n",
-			   wlp->dev_info->prim_dev_type.OUI[0],
-			   wlp->dev_info->prim_dev_type.OUI[1],
-			   wlp->dev_info->prim_dev_type.OUI[2]);
-out:
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_OUI_show);
-
-ssize_t wlp_dev_prim_OUI_store(struct wlp *wlp, const char *buf, size_t size)
-{
-	ssize_t result;
-	u8 OUI[3];
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_alloc_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = sscanf(buf, "%hhx:%hhx:%hhx",
-			&OUI[0], &OUI[1], &OUI[2]);
-	if (result != 3) {
-		result = -EINVAL;
-		goto out;
-	} else
-		memcpy(wlp->dev_info->prim_dev_type.OUI, OUI, sizeof(OUI));
-out:
-	mutex_unlock(&wlp->mutex);
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_OUI_store);
-
-
-ssize_t wlp_dev_prim_OUI_sub_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = scnprintf(buf, PAGE_SIZE, "%u\n",
-			   wlp->dev_info->prim_dev_type.OUIsubdiv);
-out:
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_OUI_sub_show);
-
-ssize_t wlp_dev_prim_OUI_sub_store(struct wlp *wlp, const char *buf,
-				   size_t size)
-{
-	ssize_t result;
-	unsigned sub;
-	u8 max_sub = ~0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_alloc_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = sscanf(buf, "%u", &sub);
-	if (sub <= max_sub)
-		wlp->dev_info->prim_dev_type.OUIsubdiv = sub;
-	else
-		result = -EINVAL;
-out:
-	mutex_unlock(&wlp->mutex);
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_OUI_sub_store);
-
-ssize_t wlp_dev_prim_subcat_show(struct wlp *wlp, char *buf)
-{
-	ssize_t result = 0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_setup_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = scnprintf(buf, PAGE_SIZE, "%u\n",
-			   wlp->dev_info->prim_dev_type.subID);
-out:
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_subcat_show);
-
-ssize_t wlp_dev_prim_subcat_store(struct wlp *wlp, const char *buf,
-				  size_t size)
-{
-	ssize_t result;
-	unsigned sub;
-	__le16 max_sub = ~0;
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info == NULL) {
-		result = __wlp_alloc_device_info(wlp);
-		if (result < 0)
-			goto out;
-	}
-	result = sscanf(buf, "%u", &sub);
-	if (sub <= max_sub)
-		wlp->dev_info->prim_dev_type.subID = sub;
-	else
-		result = -EINVAL;
-out:
-	mutex_unlock(&wlp->mutex);
-	return result < 0 ? result : size;
-}
-EXPORT_SYMBOL_GPL(wlp_dev_prim_subcat_store);
-
-/**
- * Subsystem implementation for interaction with individual WSS via sysfs
- *
- * Followed instructions for subsystem in Documentation/filesystems/sysfs.txt
- */
-
-#define kobj_to_wlp_wss(obj) container_of(obj, struct wlp_wss, kobj)
-#define attr_to_wlp_wss_attr(_attr) \
-	container_of(_attr, struct wlp_wss_attribute, attr)
-
-/**
- * Sysfs subsystem: forward read calls
- *
- * Sysfs operation for forwarding read call to the show method of the
- * attribute owner
- */
-static
-ssize_t wlp_wss_attr_show(struct kobject *kobj, struct attribute *attr,
-			  char *buf)
-{
-	struct wlp_wss_attribute *wss_attr = attr_to_wlp_wss_attr(attr);
-	struct wlp_wss *wss = kobj_to_wlp_wss(kobj);
-	ssize_t ret = -EIO;
-
-	if (wss_attr->show)
-		ret = wss_attr->show(wss, buf);
-	return ret;
-}
-/**
- * Sysfs subsystem: forward write calls
- *
- * Sysfs operation for forwarding write call to the store method of the
- * attribute owner
- */
-static
-ssize_t wlp_wss_attr_store(struct kobject *kobj, struct attribute *attr,
-			   const char *buf, size_t count)
-{
-	struct wlp_wss_attribute *wss_attr = attr_to_wlp_wss_attr(attr);
-	struct wlp_wss *wss = kobj_to_wlp_wss(kobj);
-	ssize_t ret = -EIO;
-
-	if (wss_attr->store)
-		ret = wss_attr->store(wss, buf, count);
-	return ret;
-}
-
-static const struct sysfs_ops wss_sysfs_ops = {
-	.show	= wlp_wss_attr_show,
-	.store	= wlp_wss_attr_store,
-};
-
-struct kobj_type wss_ktype = {
-	.release	= wlp_wss_release,
-	.sysfs_ops	= &wss_sysfs_ops,
-};
-
-
-/**
- * Sysfs files for individual WSS
- */
-
-/**
- * Print static properties of this WSS
- *
- * The name of a WSS may not be null teminated. It's max size is 64 bytes
- * so we copy it to a larger array just to make sure we print sane data.
- */
-static ssize_t wlp_wss_properties_show(struct wlp_wss *wss, char *buf)
-{
-	int result = 0;
-
-	if (mutex_lock_interruptible(&wss->mutex))
-		goto out;
-	result = __wlp_wss_properties_show(wss, buf, PAGE_SIZE);
-	mutex_unlock(&wss->mutex);
-out:
-	return result;
-}
-WSS_ATTR(properties, S_IRUGO, wlp_wss_properties_show, NULL);
-
-/**
- * Print all connected members of this WSS
- * The EDA cache contains all members of WSS neighborhood.
- */
-static ssize_t wlp_wss_members_show(struct wlp_wss *wss, char *buf)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	return wlp_eda_show(wlp, buf);
-}
-WSS_ATTR(members, S_IRUGO, wlp_wss_members_show, NULL);
-
-static
-const char *__wlp_strstate[] = {
-	"none",
-	"partially enrolled",
-	"enrolled",
-	"active",
-	"connected",
-};
-
-static const char *wlp_wss_strstate(unsigned state)
-{
-	if (state >= ARRAY_SIZE(__wlp_strstate))
-		return "unknown state";
-	return __wlp_strstate[state];
-}
-
-/*
- * Print current state of this WSS
- */
-static ssize_t wlp_wss_state_show(struct wlp_wss *wss, char *buf)
-{
-	int result = 0;
-
-	if (mutex_lock_interruptible(&wss->mutex))
-		goto out;
-	result = scnprintf(buf, PAGE_SIZE, "%s\n",
-			   wlp_wss_strstate(wss->state));
-	mutex_unlock(&wss->mutex);
-out:
-	return result;
-}
-WSS_ATTR(state, S_IRUGO, wlp_wss_state_show, NULL);
-
-
-static
-struct attribute *wss_attrs[] = {
-	&wss_attr_properties.attr,
-	&wss_attr_members.attr,
-	&wss_attr_state.attr,
-	NULL,
-};
-
-struct attribute_group wss_attr_group = {
-	.name = NULL,	/* we want them in the same directory */
-	.attrs = wss_attrs,
-};
diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c
deleted file mode 100644
index 05dde44b3592..000000000000
--- a/drivers/uwb/wlp/txrx.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- * Message exchange infrastructure
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: Docs
- *
- */
-
-#include <linux/etherdevice.h>
-#include <linux/slab.h>
-#include <linux/wlp.h>
-
-#include "wlp-internal.h"
-
-/*
- * Direct incoming association msg to correct parsing routine
- *
- * We only expect D1, E1, C1, C3 messages as new. All other incoming
- * association messages should form part of an established session that is
- * handled elsewhere.
- * The handling of these messages often require calling sleeping functions
- * - this cannot be done in interrupt context. We use the kernel's
- * workqueue to handle these messages.
- */
-static
-void wlp_direct_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
-			   struct uwb_dev_addr *src)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *assoc = (void *) skb->data;
-	struct wlp_assoc_frame_ctx *frame_ctx;
-
-	frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_ATOMIC);
-	if (frame_ctx == NULL) {
-		dev_err(dev, "WLP: Unable to allocate memory for association "
-			"frame handling.\n");
-		kfree_skb(skb);
-		return;
-	}
-	frame_ctx->wlp = wlp;
-	frame_ctx->skb = skb;
-	frame_ctx->src = *src;
-	switch (assoc->type) {
-	case WLP_ASSOC_D1:
-		INIT_WORK(&frame_ctx->ws, wlp_handle_d1_frame);
-		schedule_work(&frame_ctx->ws);
-		break;
-	case WLP_ASSOC_E1:
-		kfree_skb(skb); /* Temporary until we handle it */
-		kfree(frame_ctx); /* Temporary until we handle it */
-		break;
-	case WLP_ASSOC_C1:
-		INIT_WORK(&frame_ctx->ws, wlp_handle_c1_frame);
-		schedule_work(&frame_ctx->ws);
-		break;
-	case WLP_ASSOC_C3:
-		INIT_WORK(&frame_ctx->ws, wlp_handle_c3_frame);
-		schedule_work(&frame_ctx->ws);
-		break;
-	default:
-		dev_err(dev, "Received unexpected association frame. "
-			"Type = %d \n", assoc->type);
-		kfree_skb(skb);
-		kfree(frame_ctx);
-		break;
-	}
-}
-
-/*
- * Process incoming association frame
- *
- * Although it could be possible to deal with some incoming association
- * messages without creating a new session we are keeping things simple. We
- * do not accept new association messages if there is a session in progress
- * and the messages do not belong to that session.
- *
- * If an association message arrives that causes the creation of a session
- * (WLP_ASSOC_E1) while we are in the process of creating a session then we
- * rely on the neighbor mutex to protect the data. That is, the new session
- * will not be started until the previous is completed.
- */
-static
-void wlp_receive_assoc_frame(struct wlp *wlp, struct sk_buff *skb,
-			     struct uwb_dev_addr *src)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_frame_assoc *assoc = (void *) skb->data;
-	struct wlp_session *session = wlp->session;
-	u8 version;
-
-	if (wlp_get_version(wlp, &assoc->version, &version,
-			    sizeof(assoc->version)) < 0)
-		goto error;
-	if (version != WLP_VERSION) {
-		dev_err(dev, "Unsupported WLP version in association "
-			"message.\n");
-		goto error;
-	}
-	if (session != NULL) {
-		/* Function that created this session is still holding the
-		 * &wlp->mutex to protect this session. */
-		if (assoc->type == session->exp_message ||
-		    assoc->type == WLP_ASSOC_F0) {
-			if (!memcmp(&session->neighbor_addr, src,
-				   sizeof(*src))) {
-				session->data = skb;
-				(session->cb)(wlp);
-			} else {
-				dev_err(dev, "Received expected message from "
-					"unexpected source.  Expected message "
-					"%d or F0 from %02x:%02x, but received "
-					"it from %02x:%02x. Dropping.\n",
-					session->exp_message,
-					session->neighbor_addr.data[1],
-					session->neighbor_addr.data[0],
-					src->data[1], src->data[0]);
-				goto error;
-			}
-		} else {
-			dev_err(dev, "Association already in progress. "
-				"Dropping.\n");
-			goto error;
-		}
-	} else {
-		wlp_direct_assoc_frame(wlp, skb, src);
-	}
-	return;
-error:
-	kfree_skb(skb);
-}
-
-/*
- * Verify incoming frame is from connected neighbor, prep to pass to WLP client
- *
- * Verification proceeds according to WLP 0.99 [7.3.1]. The source address
- * is used to determine which neighbor is sending the frame and the WSS tag
- * is used to know to which WSS the frame belongs (we only support one WSS
- * so this test is straight forward).
- * With the WSS found we need to ensure that we are connected before
- * allowing the exchange of data frames.
- */
-static
-int wlp_verify_prep_rx_frame(struct wlp *wlp, struct sk_buff *skb,
-			     struct uwb_dev_addr *src)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = -EINVAL;
-	struct wlp_eda_node eda_entry;
-	struct wlp_frame_std_abbrv_hdr *hdr = (void *) skb->data;
-
-	/*verify*/
-	result = wlp_copy_eda_node(&wlp->eda, src, &eda_entry);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Incoming frame is from unknown "
-				"neighbor %02x:%02x.\n", src->data[1],
-				src->data[0]);
-		goto out;
-	}
-	if (hdr->tag != eda_entry.tag) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Tag of incoming frame from "
-				"%02x:%02x does not match expected tag. "
-				"Received 0x%02x, expected 0x%02x. \n",
-				src->data[1], src->data[0], hdr->tag,
-				eda_entry.tag);
-		result = -EINVAL;
-		goto out;
-	}
-	if (eda_entry.state != WLP_WSS_CONNECTED) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Incoming frame from "
-				"%02x:%02x does is not from connected WSS.\n",
-				src->data[1], src->data[0]);
-		result = -EINVAL;
-		goto out;
-	}
-	/*prep*/
-	skb_pull(skb, sizeof(*hdr));
-out:
-	return result;
-}
-
-/*
- * Receive a WLP frame from device
- *
- * @returns: 1 if calling function should free the skb
- *           0 if it successfully handled skb and freed it
- *           0 if error occured, will free skb in this case
- */
-int wlp_receive_frame(struct device *dev, struct wlp *wlp, struct sk_buff *skb,
-		      struct uwb_dev_addr *src)
-{
-	unsigned len = skb->len;
-	void *ptr = skb->data;
-	struct wlp_frame_hdr *hdr;
-	int result = 0;
-
-	if (len < sizeof(*hdr)) {
-		dev_err(dev, "Not enough data to parse WLP header.\n");
-		result = -EINVAL;
-		goto out;
-	}
-	hdr = ptr;
-	if (le16_to_cpu(hdr->mux_hdr) != WLP_PROTOCOL_ID) {
-		dev_err(dev, "Not a WLP frame type.\n");
-		result = -EINVAL;
-		goto out;
-	}
-	switch (hdr->type) {
-	case WLP_FRAME_STANDARD:
-		if (len < sizeof(struct wlp_frame_std_abbrv_hdr)) {
-			dev_err(dev, "Not enough data to parse Standard "
-				"WLP header.\n");
-			goto out;
-		}
-		result = wlp_verify_prep_rx_frame(wlp, skb, src);
-		if (result < 0) {
-			if (printk_ratelimit())
-				dev_err(dev, "WLP: Verification of frame "
-					"from neighbor %02x:%02x failed.\n",
-					src->data[1], src->data[0]);
-			goto out;
-		}
-		result = 1;
-		break;
-	case WLP_FRAME_ABBREVIATED:
-		dev_err(dev, "Abbreviated frame received. FIXME?\n");
-		kfree_skb(skb);
-		break;
-	case WLP_FRAME_CONTROL:
-		dev_err(dev, "Control frame received. FIXME?\n");
-		kfree_skb(skb);
-		break;
-	case WLP_FRAME_ASSOCIATION:
-		if (len < sizeof(struct wlp_frame_assoc)) {
-			dev_err(dev, "Not enough data to parse Association "
-				"WLP header.\n");
-			goto out;
-		}
-		wlp_receive_assoc_frame(wlp, skb, src);
-		break;
-	default:
-		dev_err(dev, "Invalid frame received.\n");
-		result = -EINVAL;
-		break;
-	}
-out:
-	if (result < 0) {
-		kfree_skb(skb);
-		result = 0;
-	}
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_receive_frame);
-
-
-/*
- * Verify frame from network stack, prepare for further transmission
- *
- * @skb:   the socket buffer that needs to be prepared for transmission (it
- *         is in need of a WLP header). If this is a broadcast frame we take
- *         over the entire transmission.
- *         If it is a unicast the WSS connection should already be established
- *         and transmission will be done by the calling function.
- * @dst:   On return this will contain the device address to which the
- *         frame is destined.
- * @returns: 0 on success no tx : WLP header successfully applied to skb buffer,
- *                                calling function can proceed with tx
- *           1 on success with tx : WLP will take over transmission of this
- *                                  frame
- *           <0 on error
- *
- * The network stack (WLP client) is attempting to transmit a frame. We can
- * only transmit data if a local WSS is at least active (connection will be
- * done here if this is a broadcast frame and neighbor also has the WSS
- * active).
- *
- * The frame can be either broadcast or unicast. Broadcast in a WSS is
- * supported via multicast, but we don't support multicast yet (until
- * devices start to support MAB IEs). If a broadcast frame needs to be
- * transmitted it is treated as a unicast frame to each neighbor. In this
- * case the WLP takes over transmission of the skb and returns 1
- * to the caller to indicate so. Also, in this case, if a neighbor has the
- * same WSS activated but is not connected then the WSS connection will be
- * done at this time. The neighbor's virtual address will be learned at
- * this time.
- *
- * The destination address in a unicast frame is the virtual address of the
- * neighbor. This address only becomes known when a WSS connection is
- * established. We thus rely on a broadcast frame to trigger the setup of
- * WSS connections to all neighbors before we are able to send unicast
- * frames to them. This seems reasonable as IP would usually use ARP first
- * before any unicast frames are sent.
- *
- * If we are already connected to the neighbor (neighbor's virtual address
- * is known) we just prepare the WLP header and the caller will continue to
- * send the frame.
- *
- * A failure in this function usually indicates something that cannot be
- * fixed automatically. So, if this function fails (@return < 0) the calling
- * function should not retry to send the frame as it will very likely keep
- * failing.
- *
- */
-int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
-			 struct sk_buff *skb, struct uwb_dev_addr *dst)
-{
-	int result = -EINVAL;
-	struct ethhdr *eth_hdr = (void *) skb->data;
-
-	if (is_multicast_ether_addr(eth_hdr->h_dest)) {
-		result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb);
-		if (result < 0) {
-			if (printk_ratelimit())
-				dev_err(dev, "Unable to handle broadcast "
-					"frame from WLP client.\n");
-			goto out;
-		}
-		dev_kfree_skb_irq(skb);
-		result = 1;
-		/* Frame will be transmitted by WLP. */
-	} else {
-		result = wlp_eda_for_virtual(&wlp->eda, eth_hdr->h_dest, dst,
-					     wlp_wss_prep_hdr, skb);
-		if (unlikely(result < 0)) {
-			if (printk_ratelimit())
-				dev_err(dev, "Unable to prepare "
-					"skb for transmission. \n");
-			goto out;
-		}
-	}
-out:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_prepare_tx_frame);
diff --git a/drivers/uwb/wlp/wlp-internal.h b/drivers/uwb/wlp/wlp-internal.h
deleted file mode 100644
index 3e8d5de7c5b9..000000000000
--- a/drivers/uwb/wlp/wlp-internal.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- * Internal API
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- */
-
-#ifndef __WLP_INTERNAL_H__
-#define __WLP_INTERNAL_H__
-
-/**
- * State of WSS connection
- *
- * A device needs to connect to a neighbor in an activated WSS before data
- * can be transmitted. The spec also distinguishes between a new connection
- * attempt and a connection attempt after previous connection attempts. The
- * state WLP_WSS_CONNECT_FAILED is used for this scenario. See WLP 0.99
- * [7.2.6]
- */
-enum wlp_wss_connect {
-	WLP_WSS_UNCONNECTED = 0,
-	WLP_WSS_CONNECTED,
-	WLP_WSS_CONNECT_FAILED,
-};
-
-extern struct kobj_type wss_ktype;
-extern struct attribute_group wss_attr_group;
-
-/* This should be changed to a dynamic array where entries are sorted
- * by eth_addr and search is done in a binary form
- *
- * Although thinking twice about it: this technologie's maximum reach
- * is 10 meters...unless you want to pack too much stuff in around
- * your radio controller/WLP device, the list will probably not be
- * too big.
- *
- * In any case, there is probably some data structure in the kernel
- * than we could reused for that already.
- *
- * The below structure is really just good while we support one WSS per
- * host.
- */
-struct wlp_eda_node {
-	struct list_head list_node;
-	unsigned char eth_addr[ETH_ALEN];
-	struct uwb_dev_addr dev_addr;
-	struct wlp_wss *wss;
-	unsigned char virt_addr[ETH_ALEN];
-	u8 tag;
-	enum wlp_wss_connect state;
-};
-
-typedef int (*wlp_eda_for_each_f)(struct wlp *, struct wlp_eda_node *, void *);
-
-extern void wlp_eda_init(struct wlp_eda *);
-extern void wlp_eda_release(struct wlp_eda *);
-extern int wlp_eda_create_node(struct wlp_eda *,
-			       const unsigned char eth_addr[ETH_ALEN],
-			       const struct uwb_dev_addr *);
-extern void wlp_eda_rm_node(struct wlp_eda *, const struct uwb_dev_addr *);
-extern int wlp_eda_update_node(struct wlp_eda *,
-			       const struct uwb_dev_addr *,
-			       struct wlp_wss *,
-			       const unsigned char virt_addr[ETH_ALEN],
-			       const u8, const enum wlp_wss_connect);
-extern int wlp_eda_update_node_state(struct wlp_eda *,
-				     const struct uwb_dev_addr *,
-				     const enum wlp_wss_connect);
-
-extern int wlp_copy_eda_node(struct wlp_eda *, struct uwb_dev_addr *,
-			     struct wlp_eda_node *);
-extern int wlp_eda_for_each(struct wlp_eda *, wlp_eda_for_each_f , void *);
-extern int wlp_eda_for_virtual(struct wlp_eda *,
-			       const unsigned char eth_addr[ETH_ALEN],
-			       struct uwb_dev_addr *,
-			       wlp_eda_for_each_f , void *);
-
-
-extern void wlp_remove_neighbor_tmp_info(struct wlp_neighbor_e *);
-
-extern size_t wlp_wss_key_print(char *, size_t, u8 *);
-
-/* Function called when no more references to WSS exists */
-extern void wlp_wss_release(struct kobject *);
-
-extern void wlp_wss_reset(struct wlp_wss *);
-extern int wlp_wss_create_activate(struct wlp_wss *, struct wlp_uuid *,
-				   char *, unsigned, unsigned);
-extern int wlp_wss_enroll_activate(struct wlp_wss *, struct wlp_uuid *,
-				   struct uwb_dev_addr *);
-extern ssize_t wlp_discover(struct wlp *);
-
-extern int wlp_enroll_neighbor(struct wlp *, struct wlp_neighbor_e *,
-			       struct wlp_wss *, struct wlp_uuid *);
-extern int wlp_wss_is_active(struct wlp *, struct wlp_wss *,
-			     struct uwb_dev_addr *);
-
-struct wlp_assoc_conn_ctx {
-	struct work_struct ws;
-	struct wlp *wlp;
-	struct sk_buff *skb;
-	struct wlp_eda_node eda_entry;
-};
-
-
-extern int wlp_wss_connect_prep(struct wlp *, struct wlp_eda_node *, void *);
-extern int wlp_wss_send_copy(struct wlp *, struct wlp_eda_node *, void *);
-
-
-/* Message handling */
-struct wlp_assoc_frame_ctx {
-	struct work_struct ws;
-	struct wlp *wlp;
-	struct sk_buff *skb;
-	struct uwb_dev_addr src;
-};
-
-extern int wlp_wss_prep_hdr(struct wlp *, struct wlp_eda_node *, void *);
-extern void wlp_handle_d1_frame(struct work_struct *);
-extern int wlp_parse_d2_frame_to_cache(struct wlp *, struct sk_buff *,
-				       struct wlp_neighbor_e *);
-extern int wlp_parse_d2_frame_to_enroll(struct wlp_wss *, struct sk_buff *,
-					struct wlp_neighbor_e *,
-					struct wlp_uuid *);
-extern void wlp_handle_c1_frame(struct work_struct *);
-extern void wlp_handle_c3_frame(struct work_struct *);
-extern int wlp_parse_c3c4_frame(struct wlp *, struct sk_buff *,
-				struct wlp_uuid *, u8 *,
-				struct uwb_mac_addr *);
-extern int wlp_parse_f0(struct wlp *, struct sk_buff *);
-extern int wlp_send_assoc_frame(struct wlp *, struct wlp_wss *,
-				struct uwb_dev_addr *, enum wlp_assoc_type);
-extern ssize_t wlp_get_version(struct wlp *, struct wlp_attr_version *,
-			       u8 *, ssize_t);
-extern ssize_t wlp_get_wssid(struct wlp *, struct wlp_attr_wssid *,
-			     struct wlp_uuid *, ssize_t);
-extern int __wlp_alloc_device_info(struct wlp *);
-extern int __wlp_setup_device_info(struct wlp *);
-
-extern struct wlp_wss_attribute wss_attribute_properties;
-extern struct wlp_wss_attribute wss_attribute_members;
-extern struct wlp_wss_attribute wss_attribute_state;
-
-static inline
-size_t wlp_wss_uuid_print(char *buf, size_t bufsize, struct wlp_uuid *uuid)
-{
-	size_t result;
-
-	result = scnprintf(buf, bufsize,
-			  "%02x:%02x:%02x:%02x:%02x:%02x:"
-			  "%02x:%02x:%02x:%02x:%02x:%02x:"
-			  "%02x:%02x:%02x:%02x",
-			  uuid->data[0], uuid->data[1],
-			  uuid->data[2], uuid->data[3],
-			  uuid->data[4], uuid->data[5],
-			  uuid->data[6], uuid->data[7],
-			  uuid->data[8], uuid->data[9],
-			  uuid->data[10], uuid->data[11],
-			  uuid->data[12], uuid->data[13],
-			  uuid->data[14], uuid->data[15]);
-	return result;
-}
-
-/**
- * FIXME: How should a nonce be displayed?
- */
-static inline
-size_t wlp_wss_nonce_print(char *buf, size_t bufsize, struct wlp_nonce *nonce)
-{
-	size_t result;
-
-	result = scnprintf(buf, bufsize,
-			  "%02x %02x %02x %02x %02x %02x "
-			  "%02x %02x %02x %02x %02x %02x "
-			  "%02x %02x %02x %02x",
-			  nonce->data[0], nonce->data[1],
-			  nonce->data[2], nonce->data[3],
-			  nonce->data[4], nonce->data[5],
-			  nonce->data[6], nonce->data[7],
-			  nonce->data[8], nonce->data[9],
-			  nonce->data[10], nonce->data[11],
-			  nonce->data[12], nonce->data[13],
-			  nonce->data[14], nonce->data[15]);
-	return result;
-}
-
-
-static inline
-void wlp_session_cb(struct wlp *wlp)
-{
-	struct completion *completion = wlp->session->cb_priv;
-	complete(completion);
-}
-
-static inline
-int wlp_uuid_is_set(struct wlp_uuid *uuid)
-{
-	struct wlp_uuid zero_uuid = { .data = { 0x00, 0x00, 0x00, 0x00,
-						0x00, 0x00, 0x00, 0x00,
-						0x00, 0x00, 0x00, 0x00,
-						0x00, 0x00, 0x00, 0x00} };
-
-	if (!memcmp(uuid, &zero_uuid, sizeof(*uuid)))
-		return 0;
-	return 1;
-}
-
-#endif /* __WLP_INTERNAL_H__ */
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
deleted file mode 100644
index 7f6a630bf26c..000000000000
--- a/drivers/uwb/wlp/wlp-lc.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- */
-#include <linux/wlp.h>
-#include <linux/slab.h>
-
-#include "wlp-internal.h"
-
-static
-void wlp_neighbor_init(struct wlp_neighbor_e *neighbor)
-{
-	INIT_LIST_HEAD(&neighbor->wssid);
-}
-
-/**
- * Create area for device information storage
- *
- * wlp->mutex must be held
- */
-int __wlp_alloc_device_info(struct wlp *wlp)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	BUG_ON(wlp->dev_info != NULL);
-	wlp->dev_info = kzalloc(sizeof(struct wlp_device_info), GFP_KERNEL);
-	if (wlp->dev_info == NULL) {
-		dev_err(dev, "WLP: Unable to allocate memory for "
-			"device information.\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-
-/**
- * Fill in device information using function provided by driver
- *
- * wlp->mutex must be held
- */
-static
-void __wlp_fill_device_info(struct wlp *wlp)
-{
-	wlp->fill_device_info(wlp, wlp->dev_info);
-}
-
-/**
- * Setup device information
- *
- * Allocate area for device information and populate it.
- *
- * wlp->mutex must be held
- */
-int __wlp_setup_device_info(struct wlp *wlp)
-{
-	int result;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	result = __wlp_alloc_device_info(wlp);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to allocate area for "
-			"device information.\n");
-		return result;
-	}
-	__wlp_fill_device_info(wlp);
-	return 0;
-}
-
-/**
- * Remove information about neighbor stored temporarily
- *
- * Information learned during discovey should only be stored when the
- * device enrolls in the neighbor's WSS. We do need to store this
- * information temporarily in order to present it to the user.
- *
- * We are only interested in keeping neighbor WSS information if that
- * neighbor is accepting enrollment.
- *
- * should be called with wlp->nbmutex held
- */
-void wlp_remove_neighbor_tmp_info(struct wlp_neighbor_e *neighbor)
-{
-	struct wlp_wssid_e *wssid_e, *next;
-	u8 keep;
-	if (!list_empty(&neighbor->wssid)) {
-		list_for_each_entry_safe(wssid_e, next, &neighbor->wssid,
-					 node) {
-			if (wssid_e->info != NULL) {
-				keep = wssid_e->info->accept_enroll;
-				kfree(wssid_e->info);
-				wssid_e->info = NULL;
-				if (!keep) {
-					list_del(&wssid_e->node);
-					kfree(wssid_e);
-				}
-			}
-		}
-	}
-	if (neighbor->info != NULL) {
-		kfree(neighbor->info);
-		neighbor->info = NULL;
-	}
-}
-
-/*
- * Populate WLP neighborhood cache with neighbor information
- *
- * A new neighbor is found. If it is discoverable then we add it to the
- * neighborhood cache.
- *
- */
-static
-int wlp_add_neighbor(struct wlp *wlp, struct uwb_dev *dev)
-{
-	int result = 0;
-	int discoverable;
-	struct wlp_neighbor_e *neighbor;
-
-	/*
-	 * FIXME:
-	 * Use contents of WLP IE found in beacon cache to determine if
-	 * neighbor is discoverable.
-	 * The device does not support WLP IE yet so this still needs to be
-	 * done. Until then we assume all devices are discoverable.
-	 */
-	discoverable = 1; /* will be changed when FIXME disappears */
-	if (discoverable) {
-		/* Add neighbor to cache for discovery */
-		neighbor = kzalloc(sizeof(*neighbor), GFP_KERNEL);
-		if (neighbor == NULL) {
-			dev_err(&dev->dev, "Unable to create memory for "
-				"new neighbor. \n");
-			result = -ENOMEM;
-			goto error_no_mem;
-		}
-		wlp_neighbor_init(neighbor);
-		uwb_dev_get(dev);
-		neighbor->uwb_dev = dev;
-		list_add(&neighbor->node, &wlp->neighbors);
-	}
-error_no_mem:
-	return result;
-}
-
-/**
- * Remove one neighbor from cache
- */
-static
-void __wlp_neighbor_release(struct wlp_neighbor_e *neighbor)
-{
-	struct wlp_wssid_e *wssid_e, *next_wssid_e;
-
-	list_for_each_entry_safe(wssid_e, next_wssid_e,
-				 &neighbor->wssid, node) {
-		list_del(&wssid_e->node);
-		kfree(wssid_e);
-	}
-	uwb_dev_put(neighbor->uwb_dev);
-	list_del(&neighbor->node);
-	kfree(neighbor);
-}
-
-/**
- * Clear entire neighborhood cache.
- */
-static
-void __wlp_neighbors_release(struct wlp *wlp)
-{
-	struct wlp_neighbor_e *neighbor, *next;
-	if (list_empty(&wlp->neighbors))
-		return;
-	list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) {
-		__wlp_neighbor_release(neighbor);
-	}
-}
-
-static
-void wlp_neighbors_release(struct wlp *wlp)
-{
-	mutex_lock(&wlp->nbmutex);
-	__wlp_neighbors_release(wlp);
-	mutex_unlock(&wlp->nbmutex);
-}
-
-
-
-/**
- * Send D1 message to neighbor, receive D2 message
- *
- * @neighbor: neighbor to which D1 message will be sent
- * @wss:      if not NULL, it is an enrollment request for this WSS
- * @wssid:    if wss not NULL, this is the wssid of the WSS in which we
- *            want to enroll
- *
- * A D1/D2 exchange is done for one of two reasons: discovery or
- * enrollment. If done for discovery the D1 message is sent to the neighbor
- * and the contents of the D2 response is stored in a temporary cache.
- * If done for enrollment the @wss and @wssid are provided also. In this
- * case the D1 message is sent to the neighbor, the D2 response is parsed
- * for enrollment of the WSS with wssid.
- *
- * &wss->mutex is held
- */
-static
-int wlp_d1d2_exchange(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
-		      struct wlp_wss *wss, struct wlp_uuid *wssid)
-{
-	int result;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	DECLARE_COMPLETION_ONSTACK(completion);
-	struct wlp_session session;
-	struct sk_buff  *skb;
-	struct wlp_frame_assoc *resp;
-	struct uwb_dev_addr *dev_addr = &neighbor->uwb_dev->dev_addr;
-
-	mutex_lock(&wlp->mutex);
-	if (!wlp_uuid_is_set(&wlp->uuid)) {
-		dev_err(dev, "WLP: UUID is not set. Set via sysfs to "
-			"proceed.\n");
-		result = -ENXIO;
-		goto out;
-	}
-	/* Send D1 association frame */
-	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_D1);
-	if (result < 0) {
-		dev_err(dev, "Unable to send D1 frame to neighbor "
-			"%02x:%02x (%d)\n", dev_addr->data[1],
-			dev_addr->data[0], result);
-		goto out;
-	}
-	/* Create session, wait for response */
-	session.exp_message = WLP_ASSOC_D2;
-	session.cb = wlp_session_cb;
-	session.cb_priv = &completion;
-	session.neighbor_addr = *dev_addr;
-	BUG_ON(wlp->session != NULL);
-	wlp->session = &session;
-	/* Wait for D2/F0 frame */
-	result = wait_for_completion_interruptible_timeout(&completion,
-						   WLP_PER_MSG_TIMEOUT * HZ);
-	if (result == 0) {
-		result = -ETIMEDOUT;
-		dev_err(dev, "Timeout while sending D1 to neighbor "
-			     "%02x:%02x.\n", dev_addr->data[1],
-			     dev_addr->data[0]);
-		goto error_session;
-	}
-	if (result < 0) {
-		dev_err(dev, "Unable to discover/enroll neighbor %02x:%02x.\n",
-			dev_addr->data[1], dev_addr->data[0]);
-		goto error_session;
-	}
-	/* Parse message in session->data: it will be either D2 or F0 */
-	skb = session.data;
-	resp = (void *) skb->data;
-
-	if (resp->type == WLP_ASSOC_F0) {
-		result = wlp_parse_f0(wlp, skb);
-		if (result < 0)
-			dev_err(dev, "WLP: Unable to parse F0 from neighbor "
-				"%02x:%02x.\n", dev_addr->data[1],
-				dev_addr->data[0]);
-		result = -EINVAL;
-		goto error_resp_parse;
-	}
-	if (wss == NULL) {
-		/* Discovery */
-		result = wlp_parse_d2_frame_to_cache(wlp, skb, neighbor);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to parse D2 message from "
-				"neighbor %02x:%02x for discovery.\n",
-				dev_addr->data[1], dev_addr->data[0]);
-			goto error_resp_parse;
-		}
-	} else {
-		/* Enrollment */
-		result = wlp_parse_d2_frame_to_enroll(wss, skb, neighbor,
-						      wssid);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to parse D2 message from "
-				"neighbor %02x:%02x for enrollment.\n",
-				dev_addr->data[1], dev_addr->data[0]);
-			goto error_resp_parse;
-		}
-	}
-error_resp_parse:
-	kfree_skb(skb);
-error_session:
-	wlp->session = NULL;
-out:
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-
-/**
- * Enroll into WSS of provided WSSID by using neighbor as registrar
- *
- * &wss->mutex is held
- */
-int wlp_enroll_neighbor(struct wlp *wlp, struct wlp_neighbor_e *neighbor,
-			struct wlp_wss *wss, struct wlp_uuid *wssid)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
-	struct uwb_dev_addr *dev_addr = &neighbor->uwb_dev->dev_addr;
-
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-
-	result =  wlp_d1d2_exchange(wlp, neighbor, wss, wssid);
-	if (result < 0) {
-		dev_err(dev, "WLP: D1/D2 message exchange for enrollment "
-			"failed. result = %d \n", result);
-		goto out;
-	}
-	if (wss->state != WLP_WSS_STATE_PART_ENROLLED) {
-		dev_err(dev, "WLP: Unable to enroll into WSS %s using "
-			"neighbor %02x:%02x. \n", buf,
-			dev_addr->data[1], dev_addr->data[0]);
-		result = -EINVAL;
-		goto out;
-	}
-	if (wss->secure_status == WLP_WSS_SECURE) {
-		dev_err(dev, "FIXME: need to complete secure enrollment.\n");
-		result = -EINVAL;
-		goto error;
-	} else {
-		wss->state = WLP_WSS_STATE_ENROLLED;
-		dev_dbg(dev, "WLP: Success Enrollment into unsecure WSS "
-			"%s using neighbor %02x:%02x. \n",
-			buf, dev_addr->data[1], dev_addr->data[0]);
-	}
-out:
-	return result;
-error:
-	wlp_wss_reset(wss);
-	return result;
-}
-
-/**
- * Discover WSS information of neighbor's active WSS
- */
-static
-int wlp_discover_neighbor(struct wlp *wlp,
-			  struct wlp_neighbor_e *neighbor)
-{
-	return wlp_d1d2_exchange(wlp, neighbor, NULL, NULL);
-}
-
-
-/**
- * Each neighbor in the neighborhood cache is discoverable. Discover it.
- *
- * Discovery is done through sending of D1 association frame and parsing
- * the D2 association frame response. Only wssid from D2 will be included
- * in neighbor cache, rest is just displayed to user and forgotten.
- *
- * The discovery is not done in parallel. This is simple and enables us to
- * maintain only one association context.
- *
- * The discovery of one neighbor does not affect the other, but if the
- * discovery of a neighbor fails it is removed from the neighborhood cache.
- */
-static
-int wlp_discover_all_neighbors(struct wlp *wlp)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_neighbor_e *neighbor, *next;
-
-	list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) {
-		result = wlp_discover_neighbor(wlp, neighbor);
-		if (result < 0) {
-			dev_err(dev, "WLP: Unable to discover neighbor "
-				"%02x:%02x, removing from neighborhood. \n",
-				neighbor->uwb_dev->dev_addr.data[1],
-				neighbor->uwb_dev->dev_addr.data[0]);
-			__wlp_neighbor_release(neighbor);
-		}
-	}
-	return result;
-}
-
-static int wlp_add_neighbor_helper(struct device *dev, void *priv)
-{
-	struct wlp *wlp = priv;
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-
-	return wlp_add_neighbor(wlp, uwb_dev);
-}
-
-/**
- * Discover WLP neighborhood
- *
- * Will send D1 association frame to all devices in beacon group that have
- * discoverable bit set in WLP IE. D2 frames will be received, information
- * displayed to user in @buf. Partial information (from D2 association
- * frame) will be cached to assist with future association
- * requests.
- *
- * The discovery of the WLP neighborhood is triggered by the user. This
- * should occur infrequently and we thus free current cache and re-allocate
- * memory if needed.
- *
- * If one neighbor fails during initial discovery (determining if it is a
- * neighbor or not), we fail all - note that interaction with neighbor has
- * not occured at this point so if a failure occurs we know something went wrong
- * locally. We thus undo everything.
- */
-ssize_t wlp_discover(struct wlp *wlp)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	mutex_lock(&wlp->nbmutex);
-	/* Clear current neighborhood cache. */
-	__wlp_neighbors_release(wlp);
-	/* Determine which devices in neighborhood. Repopulate cache. */
-	result = uwb_dev_for_each(wlp->rc, wlp_add_neighbor_helper, wlp);
-	if (result < 0) {
-		/* May have partial neighbor information, release all. */
-		__wlp_neighbors_release(wlp);
-		goto error_dev_for_each;
-	}
-	/* Discover the properties of devices in neighborhood. */
-	result = wlp_discover_all_neighbors(wlp);
-	/* In case of failure we still print our partial results. */
-	if (result < 0) {
-		dev_err(dev, "Unable to fully discover neighborhood. \n");
-		result = 0;
-	}
-error_dev_for_each:
-	mutex_unlock(&wlp->nbmutex);
-	return result;
-}
-
-/**
- * Handle events from UWB stack
- *
- * We handle events conservatively. If a neighbor goes off the air we
- * remove it from the neighborhood. If an association process is in
- * progress this function will block waiting for the nbmutex to become
- * free. The association process will thus be allowed to complete before it
- * is removed.
- */
-static
-void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev,
-		       enum uwb_notifs event)
-{
-	struct wlp *wlp = _wlp;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_neighbor_e *neighbor, *next;
-	int result;
-	switch (event) {
-	case UWB_NOTIF_ONAIR:
-		result = wlp_eda_create_node(&wlp->eda,
-					     uwb_dev->mac_addr.data,
-					     &uwb_dev->dev_addr);
-		if (result < 0)
-			dev_err(dev, "WLP: Unable to add new neighbor "
-				"%02x:%02x to EDA cache.\n",
-				uwb_dev->dev_addr.data[1],
-				uwb_dev->dev_addr.data[0]);
-		break;
-	case UWB_NOTIF_OFFAIR:
-		wlp_eda_rm_node(&wlp->eda, &uwb_dev->dev_addr);
-		mutex_lock(&wlp->nbmutex);
-		list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) {
-			if (neighbor->uwb_dev == uwb_dev)
-				__wlp_neighbor_release(neighbor);
-		}
-		mutex_unlock(&wlp->nbmutex);
-		break;
-	default:
-		dev_err(dev, "don't know how to handle event %d from uwb\n",
-				event);
-	}
-}
-
-static void wlp_channel_changed(struct uwb_pal *pal, int channel)
-{
-	struct wlp *wlp = container_of(pal, struct wlp, pal);
-
-	if (channel < 0)
-		netif_carrier_off(wlp->ndev);
-	else
-		netif_carrier_on(wlp->ndev);
-}
-
-int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
-{
-	int result;
-
-	BUG_ON(wlp->fill_device_info == NULL);
-	BUG_ON(wlp->xmit_frame == NULL);
-	BUG_ON(wlp->stop_queue == NULL);
-	BUG_ON(wlp->start_queue == NULL);
-
-	wlp->rc = rc;
-	wlp->ndev = ndev;
-	wlp_eda_init(&wlp->eda);/* Set up address cache */
-	wlp->uwb_notifs_handler.cb = wlp_uwb_notifs_cb;
-	wlp->uwb_notifs_handler.data = wlp;
-	uwb_notifs_register(rc, &wlp->uwb_notifs_handler);
-
-	uwb_pal_init(&wlp->pal);
-	wlp->pal.rc = rc;
-	wlp->pal.channel_changed = wlp_channel_changed;
-	result = uwb_pal_register(&wlp->pal);
-	if (result < 0)
-		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_setup);
-
-void wlp_remove(struct wlp *wlp)
-{
-	wlp_neighbors_release(wlp);
-	uwb_pal_unregister(&wlp->pal);
-	uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
-	wlp_eda_release(&wlp->eda);
-	mutex_lock(&wlp->mutex);
-	if (wlp->dev_info != NULL)
-		kfree(wlp->dev_info);
-	mutex_unlock(&wlp->mutex);
-	wlp->rc = NULL;
-}
-EXPORT_SYMBOL_GPL(wlp_remove);
-
-/**
- * wlp_reset_all - reset the WLP hardware
- * @wlp: the WLP device to reset.
- *
- * This schedules a full hardware reset of the WLP device.  The radio
- * controller and any other PALs will also be reset.
- */
-void wlp_reset_all(struct wlp *wlp)
-{
-	uwb_rc_reset_all(wlp->rc);
-}
-EXPORT_SYMBOL_GPL(wlp_reset_all);
diff --git a/drivers/uwb/wlp/wss-lc.c b/drivers/uwb/wlp/wss-lc.c
deleted file mode 100644
index 67872c83b679..000000000000
--- a/drivers/uwb/wlp/wss-lc.c
+++ /dev/null
@@ -1,959 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- *
- * Copyright (C) 2007 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * Implementation of the WLP association protocol.
- *
- * FIXME: Docs
- *
- * A UWB network interface will configure a WSS through wlp_wss_setup() after
- * the interface has been assigned a MAC address, typically after
- * "ifconfig" has been called. When the interface goes down it should call
- * wlp_wss_remove().
- *
- * When the WSS is ready for use the user interacts via sysfs to create,
- * discover, and activate WSS.
- *
- * wlp_wss_enroll_activate()
- *
- * wlp_wss_create_activate()
- * 	wlp_wss_set_wssid_hash()
- * 		wlp_wss_comp_wssid_hash()
- * 	wlp_wss_sel_bcast_addr()
- * 	wlp_wss_sysfs_add()
- *
- * Called when no more references to WSS exist:
- * 	wlp_wss_release()
- * 		wlp_wss_reset()
- */
-#include <linux/etherdevice.h> /* for is_valid_ether_addr */
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/wlp.h>
-
-#include "wlp-internal.h"
-
-size_t wlp_wss_key_print(char *buf, size_t bufsize, u8 *key)
-{
-	size_t result;
-
-	result = scnprintf(buf, bufsize,
-			  "%02x %02x %02x %02x %02x %02x "
-			  "%02x %02x %02x %02x %02x %02x "
-			  "%02x %02x %02x %02x",
-			  key[0], key[1], key[2], key[3],
-			  key[4], key[5], key[6], key[7],
-			  key[8], key[9], key[10], key[11],
-			  key[12], key[13], key[14], key[15]);
-	return result;
-}
-
-/**
- * Compute WSSID hash
- * WLP Draft 0.99 [7.2.1]
- *
- * The WSSID hash for a WSSID is the result of an octet-wise exclusive-OR
- * of all octets in the WSSID.
- */
-static
-u8 wlp_wss_comp_wssid_hash(struct wlp_uuid *wssid)
-{
-	return wssid->data[0]  ^ wssid->data[1]  ^ wssid->data[2]
-	       ^ wssid->data[3]  ^ wssid->data[4]  ^ wssid->data[5]
-	       ^ wssid->data[6]  ^ wssid->data[7]  ^ wssid->data[8]
-	       ^ wssid->data[9]  ^ wssid->data[10] ^ wssid->data[11]
-	       ^ wssid->data[12] ^ wssid->data[13] ^ wssid->data[14]
-	       ^ wssid->data[15];
-}
-
-/**
- * Select a multicast EUI-48 for the WSS broadcast address.
- * WLP Draft 0.99 [7.2.1]
- *
- * Selected based on the WiMedia Alliance OUI, 00-13-88, within the WLP
- * range, [01-13-88-00-01-00, 01-13-88-00-01-FF] inclusive.
- *
- * This address is currently hardcoded.
- * FIXME?
- */
-static
-struct uwb_mac_addr wlp_wss_sel_bcast_addr(struct wlp_wss *wss)
-{
-	struct uwb_mac_addr bcast = {
-		.data = { 0x01, 0x13, 0x88, 0x00, 0x01, 0x00 }
-	};
-	return bcast;
-}
-
-/**
- * Clear the contents of the WSS structure - all except kobj, mutex, virtual
- *
- * We do not want to reinitialize - the internal kobj should not change as
- * it still points to the parent received during setup. The mutex should
- * remain also. We thus just reset values individually.
- * The virutal address assigned to WSS will remain the same for the
- * lifetime of the WSS. We only reset the fields that can change during its
- * lifetime.
- */
-void wlp_wss_reset(struct wlp_wss *wss)
-{
-	memset(&wss->wssid, 0, sizeof(wss->wssid));
-	wss->hash = 0;
-	memset(&wss->name[0], 0, sizeof(wss->name));
-	memset(&wss->bcast, 0, sizeof(wss->bcast));
-	wss->secure_status = WLP_WSS_UNSECURE;
-	memset(&wss->master_key[0], 0, sizeof(wss->master_key));
-	wss->tag = 0;
-	wss->state = WLP_WSS_STATE_NONE;
-}
-
-/**
- * Create sysfs infrastructure for WSS
- *
- * The WSS is configured to have the interface as parent (see wlp_wss_setup())
- * a new sysfs directory that includes wssid as its name is created in the
- * interface's sysfs directory. The group of files interacting with WSS are
- * created also.
- */
-static
-int wlp_wss_sysfs_add(struct wlp_wss *wss, char *wssid_str)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result;
-
-	result = kobject_set_name(&wss->kobj, "wss-%s", wssid_str);
-	if (result < 0)
-		return result;
-	wss->kobj.ktype = &wss_ktype;
-	result = kobject_init_and_add(&wss->kobj,
-			&wss_ktype, wss->kobj.parent, "wlp");
-	if (result < 0) {
-		dev_err(dev, "WLP: Cannot register WSS kobject.\n");
-		goto error_kobject_register;
-	}
-	result = sysfs_create_group(&wss->kobj, &wss_attr_group);
-	if (result < 0) {
-		dev_err(dev, "WLP: Cannot register WSS attributes: %d\n",
-			result);
-		goto error_sysfs_create_group;
-	}
-	return 0;
-error_sysfs_create_group:
-
-	kobject_put(&wss->kobj); /* will free name if needed */
-	return result;
-error_kobject_register:
-	kfree(wss->kobj.name);
-	wss->kobj.name = NULL;
-	wss->kobj.ktype = NULL;
-	return result;
-}
-
-
-/**
- * Release WSS
- *
- * No more references exist to this WSS. We should undo everything that was
- * done in wlp_wss_create_activate() except removing the group. The group
- * is not removed because an object can be unregistered before the group is
- * created. We also undo any additional operations on the WSS after this
- * (addition of members).
- *
- * If memory was allocated for the kobject's name then it will
- * be freed by the kobject system during this time.
- *
- * The EDA cache is removed and reinitialized when the WSS is removed. We
- * thus loose knowledge of members of this WSS at that time and need not do
- * it here.
- */
-void wlp_wss_release(struct kobject *kobj)
-{
-	struct wlp_wss *wss = container_of(kobj, struct wlp_wss, kobj);
-
-	wlp_wss_reset(wss);
-}
-
-/**
- * Enroll into a WSS using provided neighbor as registrar
- *
- * First search the neighborhood information to learn which neighbor is
- * referred to, next proceed with enrollment.
- *
- * &wss->mutex is held
- */
-static
-int wlp_wss_enroll_target(struct wlp_wss *wss, struct wlp_uuid *wssid,
-			  struct uwb_dev_addr *dest)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_neighbor_e *neighbor;
-	int result = -ENXIO;
-	struct uwb_dev_addr *dev_addr;
-
-	mutex_lock(&wlp->nbmutex);
-	list_for_each_entry(neighbor, &wlp->neighbors, node) {
-		dev_addr = &neighbor->uwb_dev->dev_addr;
-		if (!memcmp(dest, dev_addr, sizeof(*dest))) {
-			result = wlp_enroll_neighbor(wlp, neighbor, wss, wssid);
-			break;
-		}
-	}
-	if (result == -ENXIO)
-		dev_err(dev, "WLP: Cannot find neighbor %02x:%02x. \n",
-			dest->data[1], dest->data[0]);
-	mutex_unlock(&wlp->nbmutex);
-	return result;
-}
-
-/**
- * Enroll into a WSS previously discovered
- *
- * User provides WSSID of WSS, search for neighbor that has this WSS
- * activated and attempt to enroll.
- *
- * &wss->mutex is held
- */
-static
-int wlp_wss_enroll_discovered(struct wlp_wss *wss, struct wlp_uuid *wssid)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_neighbor_e *neighbor;
-	struct wlp_wssid_e *wssid_e;
-	char buf[WLP_WSS_UUID_STRSIZE];
-	int result = -ENXIO;
-
-
-	mutex_lock(&wlp->nbmutex);
-	list_for_each_entry(neighbor, &wlp->neighbors, node) {
-		list_for_each_entry(wssid_e, &neighbor->wssid, node) {
-			if (!memcmp(wssid, &wssid_e->wssid, sizeof(*wssid))) {
-				result = wlp_enroll_neighbor(wlp, neighbor,
-							     wss, wssid);
-				if (result == 0) /* enrollment success */
-					goto out;
-				break;
-			}
-		}
-	}
-out:
-	if (result == -ENXIO) {
-		wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-		dev_err(dev, "WLP: Cannot find WSSID %s in cache. \n", buf);
-	}
-	mutex_unlock(&wlp->nbmutex);
-	return result;
-}
-
-/**
- * Enroll into WSS with provided WSSID, registrar may be provided
- *
- * @wss: out WSS that will be enrolled
- * @wssid: wssid of neighboring WSS that we want to enroll in
- * @devaddr: registrar can be specified, will be broadcast (ff:ff) if any
- *           neighbor can be used as registrar.
- *
- * &wss->mutex is held
- */
-static
-int wlp_wss_enroll(struct wlp_wss *wss, struct wlp_uuid *wssid,
-		   struct uwb_dev_addr *devaddr)
-{
-	int result;
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	char buf[WLP_WSS_UUID_STRSIZE];
-	struct uwb_dev_addr bcast = {.data = {0xff, 0xff} };
-
-	wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-
-	if (wss->state != WLP_WSS_STATE_NONE) {
-		dev_err(dev, "WLP: Already enrolled in WSS %s.\n", buf);
-		result = -EEXIST;
-		goto error;
-	}
-	if (!memcmp(&bcast, devaddr, sizeof(bcast)))
-		result = wlp_wss_enroll_discovered(wss, wssid);
-	else
-		result = wlp_wss_enroll_target(wss, wssid, devaddr);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to enroll into WSS %s, result %d \n",
-			buf, result);
-		goto error;
-	}
-	dev_dbg(dev, "Successfully enrolled into WSS %s \n", buf);
-	result = wlp_wss_sysfs_add(wss, buf);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to set up sysfs for WSS kobject.\n");
-		wlp_wss_reset(wss);
-	}
-error:
-	return result;
-
-}
-
-/**
- * Activate given WSS
- *
- * Prior to activation a WSS must be enrolled. To activate a WSS a device
- * includes the WSS hash in the WLP IE in its beacon in each superframe.
- * WLP 0.99 [7.2.5].
- *
- * The WSS tag is also computed at this time. We only support one activated
- * WSS so we can use the hash as a tag - there will never be a conflict.
- *
- * We currently only support one activated WSS so only one WSS hash is
- * included in the WLP IE.
- */
-static
-int wlp_wss_activate(struct wlp_wss *wss)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct uwb_rc *uwb_rc = wlp->rc;
-	int result;
-	struct {
-		struct wlp_ie wlp_ie;
-		u8 hash; /* only include one hash */
-	} ie_data;
-
-	BUG_ON(wss->state != WLP_WSS_STATE_ENROLLED);
-	wss->hash = wlp_wss_comp_wssid_hash(&wss->wssid);
-	wss->tag = wss->hash;
-	memset(&ie_data, 0, sizeof(ie_data));
-	ie_data.wlp_ie.hdr.element_id = UWB_IE_WLP;
-	ie_data.wlp_ie.hdr.length = sizeof(ie_data) - sizeof(struct uwb_ie_hdr);
-	wlp_ie_set_hash_length(&ie_data.wlp_ie, sizeof(ie_data.hash));
-	ie_data.hash = wss->hash;
-	result = uwb_rc_ie_add(uwb_rc, &ie_data.wlp_ie.hdr,
-			       sizeof(ie_data));
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to add WLP IE to beacon. "
-			"result = %d.\n", result);
-		goto error_wlp_ie;
-	}
-	wss->state = WLP_WSS_STATE_ACTIVE;
-	result = 0;
-error_wlp_ie:
-	return result;
-}
-
-/**
- * Enroll in and activate WSS identified by provided WSSID
- *
- * The neighborhood cache should contain a list of all neighbors and the
- * WSS they have activated. Based on that cache we search which neighbor we
- * can perform the association process with. The user also has option to
- * specify which neighbor it prefers as registrar.
- * Successful enrollment is followed by activation.
- * Successful activation will create the sysfs directory containing
- * specific information regarding this WSS.
- */
-int wlp_wss_enroll_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
-			    struct uwb_dev_addr *devaddr)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-	char buf[WLP_WSS_UUID_STRSIZE];
-
-	mutex_lock(&wss->mutex);
-	result = wlp_wss_enroll(wss, wssid, devaddr);
-	if (result < 0) {
-		wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid);
-		dev_err(dev, "WLP: Enrollment into WSS %s failed.\n", buf);
-		goto error_enroll;
-	}
-	result = wlp_wss_activate(wss);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to activate WSS. Undoing enrollment "
-			"result = %d \n", result);
-		/* Undo enrollment */
-		wlp_wss_reset(wss);
-		goto error_activate;
-	}
-error_activate:
-error_enroll:
-	mutex_unlock(&wss->mutex);
-	return result;
-}
-
-/**
- * Create, enroll, and activate a new WSS
- *
- * @wssid: new wssid provided by user
- * @name:  WSS name requested by used.
- * @sec_status: security status requested by user
- *
- * A user requested the creation of a new WSS. All operations are done
- * locally. The new WSS will be stored locally, the hash will be included
- * in the WLP IE, and the sysfs infrastructure for this WSS will be
- * created.
- */
-int wlp_wss_create_activate(struct wlp_wss *wss, struct wlp_uuid *wssid,
-			    char *name, unsigned sec_status, unsigned accept)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-	char buf[WLP_WSS_UUID_STRSIZE];
-
-	result = wlp_wss_uuid_print(buf, sizeof(buf), wssid);
-
-	if (!mutex_trylock(&wss->mutex)) {
-		dev_err(dev, "WLP: WLP association session in progress.\n");
-		return -EBUSY;
-	}
-	if (wss->state != WLP_WSS_STATE_NONE) {
-		dev_err(dev, "WLP: WSS already exists. Not creating new.\n");
-		result = -EEXIST;
-		goto out;
-	}
-	if (wss->kobj.parent == NULL) {
-		dev_err(dev, "WLP: WSS parent not ready. Is network interface "
-		       "up?\n");
-		result = -ENXIO;
-		goto out;
-	}
-	if (sec_status == WLP_WSS_SECURE) {
-		dev_err(dev, "WLP: FIXME Creation of secure WSS not "
-			"supported yet.\n");
-		result = -EINVAL;
-		goto out;
-	}
-	wss->wssid = *wssid;
-	memcpy(wss->name, name, sizeof(wss->name));
-	wss->bcast = wlp_wss_sel_bcast_addr(wss);
-	wss->secure_status = sec_status;
-	wss->accept_enroll = accept;
-	/*wss->virtual_addr is initialized in call to wlp_wss_setup*/
-	/* sysfs infrastructure */
-	result = wlp_wss_sysfs_add(wss, buf);
-	if (result < 0) {
-		dev_err(dev, "Cannot set up sysfs for WSS kobject.\n");
-		wlp_wss_reset(wss);
-		goto out;
-	} else
-		result = 0;
-	wss->state = WLP_WSS_STATE_ENROLLED;
-	result = wlp_wss_activate(wss);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to activate WSS. Undoing "
-			"enrollment\n");
-		wlp_wss_reset(wss);
-		goto out;
-	}
-	result = 0;
-out:
-	mutex_unlock(&wss->mutex);
-	return result;
-}
-
-/**
- * Determine if neighbor has WSS activated
- *
- * @returns: 1 if neighbor has WSS activated, zero otherwise
- *
- * This can be done in two ways:
- * - send a C1 frame, parse C2/F0 response
- * - examine the WLP IE sent by the neighbor
- *
- * The WLP IE is not fully supported in hardware so we use the C1/C2 frame
- * exchange to determine if a WSS is activated. Using the WLP IE should be
- * faster and should be used when it becomes possible.
- */
-int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss,
-		      struct uwb_dev_addr *dev_addr)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	DECLARE_COMPLETION_ONSTACK(completion);
-	struct wlp_session session;
-	struct sk_buff  *skb;
-	struct wlp_frame_assoc *resp;
-	struct wlp_uuid wssid;
-
-	mutex_lock(&wlp->mutex);
-	/* Send C1 association frame */
-	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C1);
-	if (result < 0) {
-		dev_err(dev, "Unable to send C1 frame to neighbor "
-			"%02x:%02x (%d)\n", dev_addr->data[1],
-			dev_addr->data[0], result);
-		result = 0;
-		goto out;
-	}
-	/* Create session, wait for response */
-	session.exp_message = WLP_ASSOC_C2;
-	session.cb = wlp_session_cb;
-	session.cb_priv = &completion;
-	session.neighbor_addr = *dev_addr;
-	BUG_ON(wlp->session != NULL);
-	wlp->session = &session;
-	/* Wait for C2/F0 frame */
-	result = wait_for_completion_interruptible_timeout(&completion,
-						   WLP_PER_MSG_TIMEOUT * HZ);
-	if (result == 0) {
-		dev_err(dev, "Timeout while sending C1 to neighbor "
-			     "%02x:%02x.\n", dev_addr->data[1],
-			     dev_addr->data[0]);
-		goto out;
-	}
-	if (result < 0) {
-		dev_err(dev, "Unable to send C1 to neighbor %02x:%02x.\n",
-			dev_addr->data[1], dev_addr->data[0]);
-		result = 0;
-		goto out;
-	}
-	/* Parse message in session->data: it will be either C2 or F0 */
-	skb = session.data;
-	resp = (void *) skb->data;
-	if (resp->type == WLP_ASSOC_F0) {
-		result = wlp_parse_f0(wlp, skb);
-		if (result < 0)
-			dev_err(dev, "WLP:  unable to parse incoming F0 "
-				"frame from neighbor %02x:%02x.\n",
-				dev_addr->data[1], dev_addr->data[0]);
-		result = 0;
-		goto error_resp_parse;
-	}
-	/* WLP version and message type fields have already been parsed */
-	result = wlp_get_wssid(wlp, (void *)resp + sizeof(*resp), &wssid,
-			       skb->len - sizeof(*resp));
-	if (result < 0) {
-		dev_err(dev, "WLP: unable to obtain WSSID from C2 frame.\n");
-		result = 0;
-		goto error_resp_parse;
-	}
-	if (!memcmp(&wssid, &wss->wssid, sizeof(wssid)))
-		result = 1;
-	else {
-		dev_err(dev, "WLP: Received a C2 frame without matching "
-			"WSSID.\n");
-		result = 0;
-	}
-error_resp_parse:
-	kfree_skb(skb);
-out:
-	wlp->session = NULL;
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-
-/**
- * Activate connection with neighbor by updating EDA cache
- *
- * @wss:       local WSS to which neighbor wants to connect
- * @dev_addr:  neighbor's address
- * @wssid:     neighbor's WSSID - must be same as our WSS's WSSID
- * @tag:       neighbor's WSS tag used to identify frames transmitted by it
- * @virt_addr: neighbor's virtual EUI-48
- */
-static
-int wlp_wss_activate_connection(struct wlp *wlp, struct wlp_wss *wss,
-				struct uwb_dev_addr *dev_addr,
-				struct wlp_uuid *wssid, u8 *tag,
-				struct uwb_mac_addr *virt_addr)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-
-	if (!memcmp(wssid, &wss->wssid, sizeof(*wssid))) {
-		/* Update EDA cache */
-		result = wlp_eda_update_node(&wlp->eda, dev_addr, wss,
-					     (void *) virt_addr->data, *tag,
-					     WLP_WSS_CONNECTED);
-		if (result < 0)
-			dev_err(dev, "WLP: Unable to update EDA cache "
-				"with new connected neighbor information.\n");
-	} else {
-		dev_err(dev, "WLP: Neighbor does not have matching WSSID.\n");
-		result = -EINVAL;
-	}
-	return result;
-}
-
-/**
- * Connect to WSS neighbor
- *
- * Use C3/C4 exchange to determine if neighbor has WSS activated and
- * retrieve the WSS tag and virtual EUI-48 of the neighbor.
- */
-static
-int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss,
-			     struct uwb_dev_addr *dev_addr)
-{
-	int result;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct wlp_uuid wssid;
-	u8 tag;
-	struct uwb_mac_addr virt_addr;
-	DECLARE_COMPLETION_ONSTACK(completion);
-	struct wlp_session session;
-	struct wlp_frame_assoc *resp;
-	struct sk_buff *skb;
-
-	mutex_lock(&wlp->mutex);
-	/* Send C3 association frame */
-	result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C3);
-	if (result < 0) {
-		dev_err(dev, "Unable to send C3 frame to neighbor "
-			"%02x:%02x (%d)\n", dev_addr->data[1],
-			dev_addr->data[0], result);
-		goto out;
-	}
-	/* Create session, wait for response */
-	session.exp_message = WLP_ASSOC_C4;
-	session.cb = wlp_session_cb;
-	session.cb_priv = &completion;
-	session.neighbor_addr = *dev_addr;
-	BUG_ON(wlp->session != NULL);
-	wlp->session = &session;
-	/* Wait for C4/F0 frame */
-	result = wait_for_completion_interruptible_timeout(&completion,
-						   WLP_PER_MSG_TIMEOUT * HZ);
-	if (result == 0) {
-		dev_err(dev, "Timeout while sending C3 to neighbor "
-			     "%02x:%02x.\n", dev_addr->data[1],
-			     dev_addr->data[0]);
-		result = -ETIMEDOUT;
-		goto out;
-	}
-	if (result < 0) {
-		dev_err(dev, "Unable to send C3 to neighbor %02x:%02x.\n",
-			dev_addr->data[1], dev_addr->data[0]);
-		goto out;
-	}
-	/* Parse message in session->data: it will be either C4 or F0 */
-	skb = session.data;
-	resp = (void *) skb->data;
-	if (resp->type == WLP_ASSOC_F0) {
-		result = wlp_parse_f0(wlp, skb);
-		if (result < 0)
-			dev_err(dev, "WLP: unable to parse incoming F0 "
-				"frame from neighbor %02x:%02x.\n",
-				dev_addr->data[1], dev_addr->data[0]);
-		result = -EINVAL;
-		goto error_resp_parse;
-	}
-	result = wlp_parse_c3c4_frame(wlp, skb, &wssid, &tag, &virt_addr);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to parse C4 frame from neighbor.\n");
-		goto error_resp_parse;
-	}
-	result = wlp_wss_activate_connection(wlp, wss, dev_addr, &wssid, &tag,
-					     &virt_addr);
-	if (result < 0) {
-		dev_err(dev, "WLP: Unable to activate connection to "
-			"neighbor %02x:%02x.\n", dev_addr->data[1],
-			dev_addr->data[0]);
-		goto error_resp_parse;
-	}
-error_resp_parse:
-	kfree_skb(skb);
-out:
-	/* Record that we unsuccessfully tried to connect to this neighbor */
-	if (result < 0)
-		wlp_eda_update_node_state(&wlp->eda, dev_addr,
-					  WLP_WSS_CONNECT_FAILED);
-	wlp->session = NULL;
-	mutex_unlock(&wlp->mutex);
-	return result;
-}
-
-/**
- * Connect to neighbor with common WSS, send pending frame
- *
- * This function is scheduled when a frame is destined to a neighbor with
- * which we do not have a connection. A copy of the EDA cache entry is
- * provided - not the actual cache entry (because it is protected by a
- * spinlock).
- *
- * First determine if neighbor has the same WSS activated, connect if it
- * does. The C3/C4 exchange is dual purpose to determine if neighbor has
- * WSS activated and proceed with the connection.
- *
- * The frame that triggered the connection setup is sent after connection
- * setup.
- *
- * network queue is stopped - we need to restart when done
- *
- */
-static
-void wlp_wss_connect_send(struct work_struct *ws)
-{
-	struct wlp_assoc_conn_ctx *conn_ctx = container_of(ws,
-						  struct wlp_assoc_conn_ctx,
-						  ws);
-	struct wlp *wlp = conn_ctx->wlp;
-	struct sk_buff *skb = conn_ctx->skb;
-	struct wlp_eda_node *eda_entry = &conn_ctx->eda_entry;
-	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
-	struct wlp_wss *wss = &wlp->wss;
-	int result;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-
-	mutex_lock(&wss->mutex);
-	if (wss->state < WLP_WSS_STATE_ACTIVE) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Attempting to connect with "
-				"WSS that is not active or connected.\n");
-		dev_kfree_skb(skb);
-		goto out;
-	}
-	/* Establish connection - send C3 rcv C4 */
-	result = wlp_wss_connect_neighbor(wlp, wss, dev_addr);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to establish connection "
-				"with neighbor %02x:%02x.\n",
-				dev_addr->data[1], dev_addr->data[0]);
-		dev_kfree_skb(skb);
-		goto out;
-	}
-	/* EDA entry changed, update the local copy being used */
-	result = wlp_copy_eda_node(&wlp->eda, dev_addr, eda_entry);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Cannot find EDA entry for "
-				"neighbor %02x:%02x \n",
-				dev_addr->data[1], dev_addr->data[0]);
-	}
-	result = wlp_wss_prep_hdr(wlp, eda_entry, skb);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to prepare frame header for "
-				"transmission (neighbor %02x:%02x). \n",
-				dev_addr->data[1], dev_addr->data[0]);
-		dev_kfree_skb(skb);
-		goto out;
-	}
-	BUG_ON(wlp->xmit_frame == NULL);
-	result = wlp->xmit_frame(wlp, skb, dev_addr);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to transmit frame: %d\n",
-				result);
-		if (result == -ENXIO)
-			dev_err(dev, "WLP: Is network interface up? \n");
-		/* We could try again ... */
-		dev_kfree_skb(skb);/*we need to free if tx fails */
-	}
-out:
-	kfree(conn_ctx);
-	BUG_ON(wlp->start_queue == NULL);
-	wlp->start_queue(wlp);
-	mutex_unlock(&wss->mutex);
-}
-
-/**
- * Add WLP header to outgoing skb
- *
- * @eda_entry: pointer to neighbor's entry in the EDA cache
- * @_skb:      skb containing data destined to the neighbor
- */
-int wlp_wss_prep_hdr(struct wlp *wlp, struct wlp_eda_node *eda_entry,
-		     void *_skb)
-{
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-	unsigned char *eth_addr = eda_entry->eth_addr;
-	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
-	struct sk_buff *skb = _skb;
-	struct wlp_frame_std_abbrv_hdr *std_hdr;
-
-	if (eda_entry->state == WLP_WSS_CONNECTED) {
-		/* Add WLP header */
-		BUG_ON(skb_headroom(skb) < sizeof(*std_hdr));
-		std_hdr = (void *) __skb_push(skb, sizeof(*std_hdr));
-		std_hdr->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID);
-		std_hdr->hdr.type = WLP_FRAME_STANDARD;
-		std_hdr->tag = eda_entry->wss->tag;
-	} else {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Destination neighbor (Ethernet: "
-				"%pM, Dev: %02x:%02x) is not connected.\n",
-				eth_addr, dev_addr->data[1], dev_addr->data[0]);
-		result = -EINVAL;
-	}
-	return result;
-}
-
-
-/**
- * Prepare skb for neighbor: connect if not already and prep WLP header
- *
- * This function is called in interrupt context, but it needs to sleep. We
- * temporarily stop the net queue to establish the WLP connection.
- * Setup of the WLP connection and restart of queue is scheduled
- * on the default work queue.
- *
- * run with eda->lock held (spinlock)
- */
-int wlp_wss_connect_prep(struct wlp *wlp, struct wlp_eda_node *eda_entry,
-			 void *_skb)
-{
-	int result = 0;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct sk_buff *skb = _skb;
-	struct wlp_assoc_conn_ctx *conn_ctx;
-
-	if (eda_entry->state == WLP_WSS_UNCONNECTED) {
-		/* We don't want any more packets while we set up connection */
-		BUG_ON(wlp->stop_queue == NULL);
-		wlp->stop_queue(wlp);
-		conn_ctx = kmalloc(sizeof(*conn_ctx), GFP_ATOMIC);
-		if (conn_ctx == NULL) {
-			if (printk_ratelimit())
-				dev_err(dev, "WLP: Unable to allocate memory "
-					"for connection handling.\n");
-			result = -ENOMEM;
-			goto out;
-		}
-		conn_ctx->wlp = wlp;
-		conn_ctx->skb = skb;
-		conn_ctx->eda_entry = *eda_entry;
-		INIT_WORK(&conn_ctx->ws, wlp_wss_connect_send);
-		schedule_work(&conn_ctx->ws);
-		result = 1;
-	} else if (eda_entry->state == WLP_WSS_CONNECT_FAILED) {
-		/* Previous connection attempts failed, don't retry - see
-		 * conditions for connection in WLP 0.99 [7.6.2] */
-		if (printk_ratelimit())
-			dev_err(dev, "Could not connect to neighbor "
-			 "previously. Not retrying. \n");
-		result = -ENONET;
-		goto out;
-	} else /* eda_entry->state == WLP_WSS_CONNECTED */
-		result = wlp_wss_prep_hdr(wlp, eda_entry, skb);
-out:
-	return result;
-}
-
-/**
- * Emulate broadcast: copy skb, send copy to neighbor (connect if not already)
- *
- * We need to copy skbs in the case where we emulate broadcast through
- * unicast. We copy instead of clone because we are modifying the data of
- * the frame after copying ... clones share data so we cannot emulate
- * broadcast using clones.
- *
- * run with eda->lock held (spinlock)
- */
-int wlp_wss_send_copy(struct wlp *wlp, struct wlp_eda_node *eda_entry,
-		      void *_skb)
-{
-	int result = -ENOMEM;
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	struct sk_buff *skb = _skb;
-	struct sk_buff *copy;
-	struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr;
-
-	copy = skb_copy(skb, GFP_ATOMIC);
-	if (copy == NULL) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to copy skb for "
-				"transmission.\n");
-		goto out;
-	}
-	result = wlp_wss_connect_prep(wlp, eda_entry, copy);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to connect/send skb "
-				"to neighbor.\n");
-		dev_kfree_skb_irq(copy);
-		goto out;
-	} else if (result == 1)
-		/* Frame will be transmitted separately */
-		goto out;
-	BUG_ON(wlp->xmit_frame == NULL);
-	result = wlp->xmit_frame(wlp, copy, dev_addr);
-	if (result < 0) {
-		if (printk_ratelimit())
-			dev_err(dev, "WLP: Unable to transmit frame: %d\n",
-				result);
-		if ((result == -ENXIO) && printk_ratelimit())
-			dev_err(dev, "WLP: Is network interface up? \n");
-		/* We could try again ... */
-		dev_kfree_skb_irq(copy);/*we need to free if tx fails */
-	}
-out:
-	return result;
-}
-
-
-/**
- * Setup WSS
- *
- * Should be called by network driver after the interface has been given a
- * MAC address.
- */
-int wlp_wss_setup(struct net_device *net_dev, struct wlp_wss *wss)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-	struct device *dev = &wlp->rc->uwb_dev.dev;
-	int result = 0;
-
-	mutex_lock(&wss->mutex);
-	wss->kobj.parent = &net_dev->dev.kobj;
-	if (!is_valid_ether_addr(net_dev->dev_addr)) {
-		dev_err(dev, "WLP: Invalid MAC address. Cannot use for"
-		       "virtual.\n");
-		result = -EINVAL;
-		goto out;
-	}
-	memcpy(wss->virtual_addr.data, net_dev->dev_addr,
-	       sizeof(wss->virtual_addr.data));
-out:
-	mutex_unlock(&wss->mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wlp_wss_setup);
-
-/**
- * Remove WSS
- *
- * Called by client that configured WSS through wlp_wss_setup(). This
- * function is called when client no longer needs WSS, eg. client shuts
- * down.
- *
- * We remove the WLP IE from the beacon before initiating local cleanup.
- */
-void wlp_wss_remove(struct wlp_wss *wss)
-{
-	struct wlp *wlp = container_of(wss, struct wlp, wss);
-
-	mutex_lock(&wss->mutex);
-	if (wss->state == WLP_WSS_STATE_ACTIVE)
-		uwb_rc_ie_rm(wlp->rc, UWB_IE_WLP);
-	if (wss->state != WLP_WSS_STATE_NONE) {
-		sysfs_remove_group(&wss->kobj, &wss_attr_group);
-		kobject_put(&wss->kobj);
-	}
-	wss->kobj.parent = NULL;
-	memset(&wss->virtual_addr, 0, sizeof(wss->virtual_addr));
-	/* Cleanup EDA cache */
-	wlp_eda_release(&wlp->eda);
-	wlp_eda_init(&wlp->eda);
-	mutex_unlock(&wss->mutex);
-}
-EXPORT_SYMBOL_GPL(wlp_wss_remove);
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
deleted file mode 100644
index c76fe2392506..000000000000
--- a/include/linux/wlp.h
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * WiMedia Logical Link Control Protocol (WLP)
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- *
- * - Does not (yet) include support for WLP control frames
- *   WLP Draft 0.99 [6.5].
- *
- *   A visual representation of the data structures.
- *
- *                              wssidB      wssidB
- *                               ^           ^
- *                               |           |
- *                              wssidA      wssidA
- *   wlp interface {             ^           ^
- *       ...                     |           |
- *       ...               ...  wssid      wssid ...
- *       wlp --- ...             |           |
- *   };          neighbors --> neighbA --> neighbB
- *               ...
- *               wss
- *               ...
- *               eda cache  --> neighborA --> neighborB --> neighborC ...
- */
-
-#ifndef __LINUX__WLP_H_
-#define __LINUX__WLP_H_
-
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/list.h>
-#include <linux/uwb.h>
-
-/**
- * WLP Protocol ID
- * WLP Draft 0.99 [6.2]
- *
- * The MUX header for all WLP frames
- */
-#define WLP_PROTOCOL_ID 0x0100
-
-/**
- * WLP Version
- * WLP version placed in the association frames (WLP 0.99 [6.6])
- */
-#define WLP_VERSION 0x10
-
-/**
- * Bytes needed to print UUID as string
- */
-#define WLP_WSS_UUID_STRSIZE 48
-
-/**
- * Bytes needed to print nonce as string
- */
-#define WLP_WSS_NONCE_STRSIZE 48
-
-
-/**
- * Size used for WLP name size
- *
- * The WSS name is set to 65 bytes, 1 byte larger than the maximum
- * allowed by the WLP spec. This is to have a null terminated string
- * for display to the user. A maximum of 64 bytes will still be used
- * when placing the WSS name field in association frames.
- */
-#define WLP_WSS_NAME_SIZE 65
-
-/**
- * Number of bytes added by WLP to data frame
- *
- * A data frame transmitted from a host will be placed in a Standard or
- * Abbreviated WLP frame. These have an extra 4 bytes of header (struct
- * wlp_frame_std_abbrv_hdr).
- * When the stack sends this data frame for transmission it needs to ensure
- * there is enough headroom for this header.
- */
-#define WLP_DATA_HLEN 4
-
-/**
- * State of device regarding WLP Service Set
- *
- * WLP_WSS_STATE_NONE: the host does not participate in any WSS
- * WLP_WSS_STATE_PART_ENROLLED: used as part of the enrollment sequence
- *                            ("Partial Enroll"). This state is used to
- *                            indicate the first part of enrollment that is
- *                            unsecure. If the WSS is unsecure then the
- *                            state will promptly go to WLP_WSS_STATE_ENROLLED,
- *                            if the WSS is not secure then the enrollment
- *                            procedure is a few more steps before we are
- *                            enrolled.
- * WLP_WSS_STATE_ENROLLED: the host is enrolled in a WSS
- * WLP_WSS_STATE_ACTIVE: WSS is activated
- * WLP_WSS_STATE_CONNECTED: host is connected to neighbor in WSS
- *
- */
-enum wlp_wss_state {
-	WLP_WSS_STATE_NONE = 0,
-	WLP_WSS_STATE_PART_ENROLLED,
-	WLP_WSS_STATE_ENROLLED,
-	WLP_WSS_STATE_ACTIVE,
-	WLP_WSS_STATE_CONNECTED,
-};
-
-/**
- * WSS Secure status
- * WLP 0.99 Table 6
- *
- * Set to one if the WSS is secure, zero if it is not secure
- */
-enum wlp_wss_sec_status {
-	WLP_WSS_UNSECURE = 0,
-	WLP_WSS_SECURE,
-};
-
-/**
- * WLP frame type
- * WLP Draft 0.99 [6.2 Table 1]
- */
-enum wlp_frame_type {
-	WLP_FRAME_STANDARD = 0,
-	WLP_FRAME_ABBREVIATED,
-	WLP_FRAME_CONTROL,
-	WLP_FRAME_ASSOCIATION,
-};
-
-/**
- * WLP Association Message Type
- * WLP Draft 0.99 [6.6.1.2 Table 8]
- */
-enum wlp_assoc_type {
-	WLP_ASSOC_D1 = 2,
-	WLP_ASSOC_D2 = 3,
-	WLP_ASSOC_M1 = 4,
-	WLP_ASSOC_M2 = 5,
-	WLP_ASSOC_M3 = 7,
-	WLP_ASSOC_M4 = 8,
-	WLP_ASSOC_M5 = 9,
-	WLP_ASSOC_M6 = 10,
-	WLP_ASSOC_M7 = 11,
-	WLP_ASSOC_M8 = 12,
-	WLP_ASSOC_F0 = 14,
-	WLP_ASSOC_E1 = 32,
-	WLP_ASSOC_E2 = 33,
-	WLP_ASSOC_C1 = 34,
-	WLP_ASSOC_C2 = 35,
-	WLP_ASSOC_C3 = 36,
-	WLP_ASSOC_C4 = 37,
-};
-
-/**
- * WLP Attribute Type
- * WLP Draft 0.99 [6.6.1 Table 6]
- */
-enum wlp_attr_type {
-	WLP_ATTR_AUTH		= 0x1005, /* Authenticator */
-	WLP_ATTR_DEV_NAME 	= 0x1011, /* Device Name */
-	WLP_ATTR_DEV_PWD_ID 	= 0x1012, /* Device Password ID */
-	WLP_ATTR_E_HASH1	= 0x1014, /* E-Hash1 */
-	WLP_ATTR_E_HASH2	= 0x1015, /* E-Hash2 */
-	WLP_ATTR_E_SNONCE1	= 0x1016, /* E-SNonce1 */
-	WLP_ATTR_E_SNONCE2	= 0x1017, /* E-SNonce2 */
-	WLP_ATTR_ENCR_SET	= 0x1018, /* Encrypted Settings */
-	WLP_ATTR_ENRL_NONCE	= 0x101A, /* Enrollee Nonce */
-	WLP_ATTR_KEYWRAP_AUTH	= 0x101E, /* Key Wrap Authenticator */
-	WLP_ATTR_MANUF		= 0x1021, /* Manufacturer */
-	WLP_ATTR_MSG_TYPE	= 0x1022, /* Message Type */
-	WLP_ATTR_MODEL_NAME	= 0x1023, /* Model Name */
-	WLP_ATTR_MODEL_NR	= 0x1024, /* Model Number */
-	WLP_ATTR_PUB_KEY	= 0x1032, /* Public Key */
-	WLP_ATTR_REG_NONCE	= 0x1039, /* Registrar Nonce */
-	WLP_ATTR_R_HASH1	= 0x103D, /* R-Hash1 */
-	WLP_ATTR_R_HASH2	= 0x103E, /* R-Hash2 */
-	WLP_ATTR_R_SNONCE1	= 0x103F, /* R-SNonce1 */
-	WLP_ATTR_R_SNONCE2	= 0x1040, /* R-SNonce2 */
-	WLP_ATTR_SERIAL		= 0x1042, /* Serial number */
-	WLP_ATTR_UUID_E		= 0x1047, /* UUID-E */
-	WLP_ATTR_UUID_R		= 0x1048, /* UUID-R */
-	WLP_ATTR_PRI_DEV_TYPE	= 0x1054, /* Primary Device Type */
-	WLP_ATTR_SEC_DEV_TYPE	= 0x1055, /* Secondary Device Type */
-	WLP_ATTR_PORT_DEV	= 0x1056, /* Portable Device */
-	WLP_ATTR_APP_EXT	= 0x1058, /* Application Extension */
-	WLP_ATTR_WLP_VER	= 0x2000, /* WLP Version */
-	WLP_ATTR_WSSID		= 0x2001, /* WSSID */
-	WLP_ATTR_WSS_NAME	= 0x2002, /* WSS Name */
-	WLP_ATTR_WSS_SEC_STAT	= 0x2003, /* WSS Secure Status */
-	WLP_ATTR_WSS_BCAST	= 0x2004, /* WSS Broadcast Address */
-	WLP_ATTR_WSS_M_KEY	= 0x2005, /* WSS Master Key */
-	WLP_ATTR_ACC_ENRL	= 0x2006, /* Accepting Enrollment */
-	WLP_ATTR_WSS_INFO	= 0x2007, /* WSS Information */
-	WLP_ATTR_WSS_SEL_MTHD	= 0x2008, /* WSS Selection Method */
-	WLP_ATTR_ASSC_MTHD_LIST	= 0x2009, /* Association Methods List */
-	WLP_ATTR_SEL_ASSC_MTHD	= 0x200A, /* Selected Association Method */
-	WLP_ATTR_ENRL_HASH_COMM	= 0x200B, /* Enrollee Hash Commitment */
-	WLP_ATTR_WSS_TAG	= 0x200C, /* WSS Tag */
-	WLP_ATTR_WSS_VIRT	= 0x200D, /* WSS Virtual EUI-48 */
-	WLP_ATTR_WLP_ASSC_ERR	= 0x200E, /* WLP Association Error */
-	WLP_ATTR_VNDR_EXT	= 0x200F, /* Vendor Extension */
-};
-
-/**
- * WLP Category ID of primary/secondary device
- * WLP Draft 0.99 [6.6.1.8 Table 12]
- */
-enum wlp_dev_category_id {
-	WLP_DEV_CAT_COMPUTER = 1,
-	WLP_DEV_CAT_INPUT,
-	WLP_DEV_CAT_PRINT_SCAN_FAX_COPIER,
-	WLP_DEV_CAT_CAMERA,
-	WLP_DEV_CAT_STORAGE,
-	WLP_DEV_CAT_INFRASTRUCTURE,
-	WLP_DEV_CAT_DISPLAY,
-	WLP_DEV_CAT_MULTIM,
-	WLP_DEV_CAT_GAMING,
-	WLP_DEV_CAT_TELEPHONE,
-	WLP_DEV_CAT_OTHER = 65535,
-};
-
-/**
- * WLP WSS selection method
- * WLP Draft 0.99 [6.6.1.6 Table 10]
- */
-enum wlp_wss_sel_mthd {
-	WLP_WSS_ENRL_SELECT = 1,	/* Enrollee selects */
-	WLP_WSS_REG_SELECT,		/* Registrar selects */
-};
-
-/**
- * WLP association error values
- * WLP Draft 0.99 [6.6.1.5 Table 9]
- */
-enum wlp_assc_error {
-	WLP_ASSOC_ERROR_NONE,
-	WLP_ASSOC_ERROR_AUTH,		/* Authenticator Failure */
-	WLP_ASSOC_ERROR_ROGUE,		/* Rogue activity suspected */
-	WLP_ASSOC_ERROR_BUSY,		/* Device busy */
-	WLP_ASSOC_ERROR_LOCK,		/* Setup Locked */
-	WLP_ASSOC_ERROR_NOT_READY,	/* Registrar not ready */
-	WLP_ASSOC_ERROR_INV,		/* Invalid WSS selection */
-	WLP_ASSOC_ERROR_MSG_TIME,	/* Message timeout */
-	WLP_ASSOC_ERROR_ENR_TIME,	/* Enrollment session timeout */
-	WLP_ASSOC_ERROR_PW,		/* Device password invalid */
-	WLP_ASSOC_ERROR_VER,		/* Unsupported version */
-	WLP_ASSOC_ERROR_INT,		/* Internal error */
-	WLP_ASSOC_ERROR_UNDEF,		/* Undefined error */
-	WLP_ASSOC_ERROR_NUM,		/* Numeric comparison failure */
-	WLP_ASSOC_ERROR_WAIT,		/* Waiting for user input */
-};
-
-/**
- * WLP Parameters
- * WLP 0.99 [7.7]
- */
-enum wlp_parameters {
-	WLP_PER_MSG_TIMEOUT = 15,	/* Seconds to wait for response to
-					   association message. */
-};
-
-/**
- * WLP IE
- *
- * The WLP IE should be included in beacons by all devices.
- *
- * The driver can set only a few of the fields in this information element,
- * most fields are managed by the device self. When the driver needs to set
- * a field it will only provide values for the fields of interest, the rest
- * will be filled with zeroes. The fields of interest are:
- *
- * Element ID
- * Length
- * Capabilities (only to include WSSID Hash list length)
- * WSSID Hash List fields
- *
- * WLP 0.99 [6.7]
- *
- * Only the fields that will be used are detailed in this structure, rest
- * are not detailed or marked as "notused".
- */
-struct wlp_ie {
-	struct uwb_ie_hdr hdr;
-	__le16 capabilities;
-	__le16 cycle_param;
-	__le16 acw_anchor_addr;
-	u8 wssid_hash_list[];
-} __packed;
-
-static inline int wlp_ie_hash_length(struct wlp_ie *ie)
-{
-	return (le16_to_cpu(ie->capabilities) >> 12) & 0xf;
-}
-
-static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length)
-{
-	u16 caps = le16_to_cpu(ie->capabilities);
-	caps = (caps & ~(0xf << 12)) | (hash_length << 12);
-	ie->capabilities = cpu_to_le16(caps);
-}
-
-/**
- * WLP nonce
- * WLP Draft 0.99 [6.6.1 Table 6]
- *
- * A 128-bit random number often used (E-SNonce1, E-SNonce2, Enrollee
- * Nonce, Registrar Nonce, R-SNonce1, R-SNonce2). It is passed to HW so
- * it is packed.
- */
-struct wlp_nonce {
-	u8 data[16];
-} __packed;
-
-/**
- * WLP UUID
- * WLP Draft 0.99 [6.6.1 Table 6]
- *
- * Universally Unique Identifier (UUID) encoded as an octet string in the
- * order the octets are shown in string representation in RFC4122. A UUID
- * is often used (UUID-E, UUID-R, WSSID). It is passed to HW so it is packed.
- */
-struct wlp_uuid {
-	u8 data[16];
-} __packed;
-
-
-/**
- * Primary and secondary device type attributes
- * WLP Draft 0.99 [6.6.1.8]
- */
-struct wlp_dev_type {
-	enum wlp_dev_category_id category:16;
-	u8 OUI[3];
-	u8 OUIsubdiv;
-	__le16 subID;
-} __packed;
-
-/**
- * WLP frame header
- * WLP Draft 0.99 [6.2]
- */
-struct wlp_frame_hdr {
-	__le16 mux_hdr;			/* WLP_PROTOCOL_ID */
-	enum wlp_frame_type type:8;
-} __packed;
-
-/**
- * WLP attribute field header
- * WLP Draft 0.99 [6.6.1]
- *
- * Header of each attribute found in an association frame
- */
-struct wlp_attr_hdr {
-	__le16 type;
-	__le16 length;
-} __packed;
-
-/**
- * Device information commonly used together
- *
- * Each of these device information elements has a specified range in which it
- * should fit (WLP 0.99 [Table 6]). This range provided in the spec does not
- * include the termination null '\0' character (when used in the
- * association protocol the attribute fields are accompanied
- * with a "length" field so the full range from the spec can be used for
- * the value). We thus allocate an extra byte to be able to store a string
- * of max length with a terminating '\0'.
- */
-struct wlp_device_info {
-	char name[33];
-	char model_name[33];
-	char manufacturer[65];
-	char model_nr[33];
-	char serial[33];
-	struct wlp_dev_type prim_dev_type;
-};
-
-/**
- * Macros for the WLP attributes
- *
- * There are quite a few attributes (total is 43). The attribute layout can be
- * in one of three categories: one value, an array, an enum forced to 8 bits.
- * These macros help with their definitions.
- */
-#define wlp_attr(type, name)						\
-struct wlp_attr_##name {						\
-	struct wlp_attr_hdr hdr;					\
-	type name;							\
-} __packed;
-
-#define wlp_attr_array(type, name)					\
-struct wlp_attr_##name {						\
-	struct wlp_attr_hdr hdr;					\
-	type name[];							\
-} __packed;
-
-/**
- * WLP association attribute fields
- * WLP Draft 0.99 [6.6.1 Table 6]
- *
- * Attributes appear in same order as the Table in the spec
- * FIXME Does not define all attributes yet
- */
-
-/* Device name: Friendly name of sending device */
-wlp_attr_array(u8, dev_name)
-
-/* Enrollee Nonce: Random number generated by enrollee for an enrollment
- * session */
-wlp_attr(struct wlp_nonce, enonce)
-
-/* Manufacturer name: Name of manufacturer of the sending device */
-wlp_attr_array(u8, manufacturer)
-
-/* WLP Message Type */
-wlp_attr(u8, msg_type)
-
-/* WLP Model name: Model name of sending device */
-wlp_attr_array(u8, model_name)
-
-/* WLP Model number: Model number of sending device */
-wlp_attr_array(u8, model_nr)
-
-/* Registrar Nonce: Random number generated by registrar for an enrollment
- * session */
-wlp_attr(struct wlp_nonce, rnonce)
-
-/* Serial number of device */
-wlp_attr_array(u8, serial)
-
-/* UUID of enrollee */
-wlp_attr(struct wlp_uuid, uuid_e)
-
-/* UUID of registrar */
-wlp_attr(struct wlp_uuid, uuid_r)
-
-/* WLP Primary device type */
-wlp_attr(struct wlp_dev_type, prim_dev_type)
-
-/* WLP Secondary device type */
-wlp_attr(struct wlp_dev_type, sec_dev_type)
-
-/* WLP protocol version */
-wlp_attr(u8, version)
-
-/* WLP service set identifier */
-wlp_attr(struct wlp_uuid, wssid)
-
-/* WLP WSS name */
-wlp_attr_array(u8, wss_name)
-
-/* WLP WSS Secure Status */
-wlp_attr(u8, wss_sec_status)
-
-/* WSS Broadcast Address */
-wlp_attr(struct uwb_mac_addr, wss_bcast)
-
-/* WLP Accepting Enrollment */
-wlp_attr(u8, accept_enrl)
-
-/**
- * WSS information attributes
- * WLP Draft 0.99 [6.6.3 Table 15]
- */
-struct wlp_wss_info {
-	struct wlp_attr_wssid wssid;
-	struct wlp_attr_wss_name name;
-	struct wlp_attr_accept_enrl accept;
-	struct wlp_attr_wss_sec_status sec_stat;
-	struct wlp_attr_wss_bcast bcast;
-} __packed;
-
-/* WLP WSS Information */
-wlp_attr_array(struct wlp_wss_info, wss_info)
-
-/* WLP WSS Selection method */
-wlp_attr(u8, wss_sel_mthd)
-
-/* WLP WSS tag */
-wlp_attr(u8, wss_tag)
-
-/* WSS Virtual Address */
-wlp_attr(struct uwb_mac_addr, wss_virt)
-
-/* WLP association error */
-wlp_attr(u8, wlp_assc_err)
-
-/**
- * WLP standard and abbreviated frames
- *
- * WLP Draft 0.99 [6.3] and [6.4]
- *
- * The difference between the WLP standard frame and the WLP
- * abbreviated frame is that the standard frame includes the src
- * and dest addresses from the Ethernet header, the abbreviated frame does
- * not.
- * The src/dest (as well as the type/length and client data) are already
- * defined as part of the Ethernet header, we do not do this here.
- * From this perspective the standard and abbreviated frames appear the
- * same - they will be treated differently though.
- *
- * The size of this header is also captured in WLP_DATA_HLEN to enable
- * interfaces to prepare their headroom.
- */
-struct wlp_frame_std_abbrv_hdr {
-	struct wlp_frame_hdr hdr;
-	u8 tag;
-} __packed;
-
-/**
- * WLP association frames
- *
- * WLP Draft 0.99 [6.6]
- */
-struct wlp_frame_assoc {
-	struct wlp_frame_hdr hdr;
-	enum wlp_assoc_type type:8;
-	struct wlp_attr_version version;
-	struct wlp_attr_msg_type msg_type;
-	u8 attr[];
-} __packed;
-
-/* Ethernet to dev address mapping */
-struct wlp_eda {
-	spinlock_t lock;
-	struct list_head cache;	/* Eth<->Dev Addr cache */
-};
-
-/**
- * WSS information temporary storage
- *
- * This information is only stored temporarily during discovery. It should
- * not be stored unless the device is enrolled in the advertised WSS. This
- * is done mainly because we follow the letter of the spec in this regard.
- * See WLP 0.99 [7.2.3].
- * When the device does become enrolled in a WSS the WSS information will
- * be stored as part of the more comprehensive struct wlp_wss.
- */
-struct wlp_wss_tmp_info {
-	char name[WLP_WSS_NAME_SIZE];
-	u8 accept_enroll;
-	u8 sec_status;
-	struct uwb_mac_addr bcast;
-};
-
-struct wlp_wssid_e {
-	struct list_head node;
-	struct wlp_uuid wssid;
-	struct wlp_wss_tmp_info *info;
-};
-
-/**
- * A cache entry of WLP neighborhood
- *
- * @node: head of list is wlp->neighbors
- * @wssid: list of wssids of this neighbor, element is wlp_wssid_e
- * @info:  temporary storage for information learned during discovery. This
- *         storage is used together with the wssid_e temporary storage
- *         during discovery.
- */
-struct wlp_neighbor_e {
-	struct list_head node;
-	struct wlp_uuid uuid;
-	struct uwb_dev *uwb_dev;
-	struct list_head wssid; /* Elements are wlp_wssid_e */
-	struct wlp_device_info *info;
-};
-
-struct wlp;
-/**
- * Information for an association session in progress.
- *
- * @exp_message: The type of the expected message. Both this message and a
- *               F0 message (which can be sent in response to any
- *               association frame) will be accepted as a valid message for
- *               this session.
- * @cb:          The function that will be called upon receipt of this
- *               message.
- * @cb_priv:     Private data of callback
- * @data:        Data used in association process (always a sk_buff?)
- * @neighbor:    Address of neighbor with which association session is in
- *               progress.
- */
-struct wlp_session {
-	enum wlp_assoc_type exp_message;
-	void (*cb)(struct wlp *);
-	void *cb_priv;
-	void *data;
-	struct uwb_dev_addr neighbor_addr;
-};
-
-/**
- * WLP Service Set
- *
- * @mutex: used to protect entire WSS structure.
- *
- * @name: The WSS name is set to 65 bytes, 1 byte larger than the maximum
- *        allowed by the WLP spec. This is to have a null terminated string
- *        for display to the user. A maximum of 64 bytes will still be used
- *        when placing the WSS name field in association frames.
- *
- * @accept_enroll: Accepting enrollment: Set to one if registrar is
- *                 accepting enrollment in WSS, or zero otherwise.
- *
- * Global and local information for each WSS in which we are enrolled.
- * WLP 0.99 Section 7.2.1 and Section 7.2.2
- */
-struct wlp_wss {
-	struct mutex mutex;
-	struct kobject kobj;
-	/* Global properties. */
-	struct wlp_uuid wssid;
-	u8 hash;
-	char name[WLP_WSS_NAME_SIZE];
-	struct uwb_mac_addr bcast;
-	u8 secure_status:1;
-	u8 master_key[16];
-	/* Local properties. */
-	u8 tag;
-	struct uwb_mac_addr virtual_addr;
-	/* Extra */
-	u8 accept_enroll:1;
-	enum wlp_wss_state state;
-};
-
-/**
- * WLP main structure
- * @mutex: protect changes to WLP structure. We only allow changes to the
- *         uuid, so currently this mutex only protects this field.
- */
-struct wlp {
-	struct mutex mutex;
-	struct uwb_rc *rc;		/* UWB radio controller */
-	struct net_device *ndev;
-	struct uwb_pal pal;
-	struct wlp_eda eda;
-	struct wlp_uuid uuid;
-	struct wlp_session *session;
-	struct wlp_wss wss;
-	struct mutex nbmutex; /* Neighbor mutex protects neighbors list */
-	struct list_head neighbors; /* Elements are wlp_neighbor_e */
-	struct uwb_notifs_handler uwb_notifs_handler;
-	struct wlp_device_info *dev_info;
-	void (*fill_device_info)(struct wlp *wlp, struct wlp_device_info *info);
-	int (*xmit_frame)(struct wlp *, struct sk_buff *,
-			  struct uwb_dev_addr *);
-	void (*stop_queue)(struct wlp *);
-	void (*start_queue)(struct wlp *);
-};
-
-/* sysfs */
-
-
-struct wlp_wss_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct wlp_wss *wss, char *buf);
-	ssize_t (*store)(struct wlp_wss *wss, const char *buf, size_t count);
-};
-
-#define WSS_ATTR(_name, _mode, _show, _store) \
-static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode,	\
-							  _show, _store)
-
-extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev);
-extern void wlp_remove(struct wlp *);
-extern ssize_t wlp_neighborhood_show(struct wlp *, char *);
-extern int wlp_wss_setup(struct net_device *, struct wlp_wss *);
-extern void wlp_wss_remove(struct wlp_wss *);
-extern ssize_t wlp_wss_activate_show(struct wlp_wss *, char *);
-extern ssize_t wlp_wss_activate_store(struct wlp_wss *, const char *, size_t);
-extern ssize_t wlp_eda_show(struct wlp *, char *);
-extern ssize_t wlp_eda_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_uuid_show(struct wlp *, char *);
-extern ssize_t wlp_uuid_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_name_show(struct wlp *, char *);
-extern ssize_t wlp_dev_name_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_manufacturer_show(struct wlp *, char *);
-extern ssize_t wlp_dev_manufacturer_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_model_name_show(struct wlp *, char *);
-extern ssize_t wlp_dev_model_name_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_model_nr_show(struct wlp *, char *);
-extern ssize_t wlp_dev_model_nr_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_serial_show(struct wlp *, char *);
-extern ssize_t wlp_dev_serial_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_prim_category_show(struct wlp *, char *);
-extern ssize_t wlp_dev_prim_category_store(struct wlp *, const char *,
-					   size_t);
-extern ssize_t wlp_dev_prim_OUI_show(struct wlp *, char *);
-extern ssize_t wlp_dev_prim_OUI_store(struct wlp *, const char *, size_t);
-extern ssize_t wlp_dev_prim_OUI_sub_show(struct wlp *, char *);
-extern ssize_t wlp_dev_prim_OUI_sub_store(struct wlp *, const char *,
-					  size_t);
-extern ssize_t wlp_dev_prim_subcat_show(struct wlp *, char *);
-extern ssize_t wlp_dev_prim_subcat_store(struct wlp *, const char *,
-					 size_t);
-extern int wlp_receive_frame(struct device *, struct wlp *, struct sk_buff *,
-			     struct uwb_dev_addr *);
-extern int wlp_prepare_tx_frame(struct device *, struct wlp *,
-			       struct sk_buff *, struct uwb_dev_addr *);
-void wlp_reset_all(struct wlp *wlp);
-
-/**
- * Initialize WSS
- */
-static inline
-void wlp_wss_init(struct wlp_wss *wss)
-{
-	mutex_init(&wss->mutex);
-}
-
-static inline
-void wlp_init(struct wlp *wlp)
-{
-	INIT_LIST_HEAD(&wlp->neighbors);
-	mutex_init(&wlp->mutex);
-	mutex_init(&wlp->nbmutex);
-	wlp_wss_init(&wlp->wss);
-}
-
-
-#endif /* #ifndef __LINUX__WLP_H_ */
-- 
cgit v1.2.3


From e2b8d7af0e3a9234de06606f9151f28cf847a8d6 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 25 Oct 2010 16:10:14 +0200
Subject: [S390] add support for nonquiescing sske

Improve performance of the sske operation by using the nonquiescing
variant if the affected page has no mappings established. On machines
with no support for the new sske variant the mask bit will be ignored.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/page.h    | 8 ++++++--
 arch/s390/include/asm/pgtable.h | 8 ++++----
 arch/s390/kernel/early.c        | 3 ++-
 arch/s390/kernel/setup.c        | 3 ++-
 include/asm-generic/pgtable.h   | 2 +-
 include/linux/page-flags.h      | 2 +-
 mm/rmap.c                       | 4 ++--
 7 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index af650fb47206..a8729ea7e9ac 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -108,9 +108,13 @@ typedef pte_t *pgtable_t;
 #define __pgprot(x)     ((pgprot_t) { (x) } )
 
 static inline void
-page_set_storage_key(unsigned long addr, unsigned int skey)
+page_set_storage_key(unsigned long addr, unsigned int skey, int mapped)
 {
-	asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+	if (!mapped)
+		asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
+			     : : "d" (skey), "a" (addr));
+	else
+		asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
 }
 
 static inline unsigned int
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 85cd4b039de6..f79e7bb9ae1e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -590,7 +590,7 @@ static inline void rcp_unlock(pte_t *ptep)
 }
 
 /* forward declaration for SetPageUptodate in page-flags.h*/
-static inline void page_clear_dirty(struct page *page);
+static inline void page_clear_dirty(struct page *page, int mapped);
 #include <linux/page-flags.h>
 
 static inline void ptep_rcp_copy(pte_t *ptep)
@@ -800,7 +800,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
 	}
 	dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
 	if (skey & _PAGE_CHANGED)
-		page_clear_dirty(page);
+		page_clear_dirty(page, 1);
 	rcp_unlock(ptep);
 	return dirty;
 }
@@ -975,9 +975,9 @@ static inline int page_test_dirty(struct page *page)
 }
 
 #define __HAVE_ARCH_PAGE_CLEAR_DIRTY
-static inline void page_clear_dirty(struct page *page)
+static inline void page_clear_dirty(struct page *page, int mapped)
 {
-	page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY);
+	page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped);
 }
 
 /*
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c00856ad4e5a..0badc6344eb4 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -208,7 +208,8 @@ static noinline __init void init_kernel_storage_key(void)
 	end_pfn = PFN_UP(__pa(&_end));
 
 	for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
-		page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY);
+		page_set_storage_key(init_pfn << PAGE_SHIFT,
+				     PAGE_DEFAULT_KEY, 0);
 }
 
 static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c8e8e1354e1d..9071e984dcf1 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -627,7 +627,8 @@ setup_memory(void)
 		add_active_range(0, start_chunk, end_chunk);
 		pfn = max(start_chunk, start_pfn);
 		for (; pfn < end_chunk; pfn++)
-			page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY);
+			page_set_storage_key(PFN_PHYS(pfn),
+					     PAGE_DEFAULT_KEY, 0);
 	}
 
 	psw_set_key(PAGE_DEFAULT_KEY);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f4d4120e5128..6f3c6ae4fe03 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -108,7 +108,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #endif
 
 #ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
-#define page_clear_dirty(page)		do { } while (0)
+#define page_clear_dirty(page, mapped)	do { } while (0)
 #endif
 
 #ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6fa317801e1c..5f38c460367e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -310,7 +310,7 @@ static inline void SetPageUptodate(struct page *page)
 {
 #ifdef CONFIG_S390
 	if (!test_and_set_bit(PG_uptodate, &page->flags))
-		page_clear_dirty(page);
+		page_clear_dirty(page, 0);
 #else
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,
diff --git a/mm/rmap.c b/mm/rmap.c
index 92e6757f196e..5f17fad1bee8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -745,7 +745,7 @@ int page_mkclean(struct page *page)
 		if (mapping) {
 			ret = page_mkclean_file(mapping, page);
 			if (page_test_dirty(page)) {
-				page_clear_dirty(page);
+				page_clear_dirty(page, 1);
 				ret = 1;
 			}
 		}
@@ -942,7 +942,7 @@ void page_remove_rmap(struct page *page)
 	 * containing the swap entry, but page not yet written to swap.
 	 */
 	if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
-		page_clear_dirty(page);
+		page_clear_dirty(page, 1);
 		set_page_dirty(page);
 	}
 	/*
-- 
cgit v1.2.3


From b5ce1d83a62fc109d8e815b1fc71dcdb0d26bc49 Mon Sep 17 00:00:00 2001
From: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Date: Mon, 25 Oct 2010 02:03:44 -0400
Subject: Coda: add spin lock to protect accesses to struct coda_inode_info.

We mostly need it to protect cached user permissions. The c_flags field
is advisory, reading the wrong value is harmless and in the worst case
we hit a slow path where we have to make an extra upcall to the
userspace cache manager when revalidating a dentry or inode.

Signed-off-by: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coda/cache.c            | 17 ++++++++++++-----
 fs/coda/cnode.c            | 19 ++++++++++++++-----
 fs/coda/dir.c              |  6 ++++++
 fs/coda/file.c             | 12 ++++++++++--
 fs/coda/inode.c            |  2 ++
 include/linux/coda_fs_i.h  | 13 +++++++++----
 include/linux/coda_linux.h |  6 +++++-
 7 files changed, 58 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index a5bf5771a22a..9060f08e70cf 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 
 #include <linux/coda.h>
 #include <linux/coda_linux.h>
@@ -31,19 +32,23 @@ void coda_cache_enter(struct inode *inode, int mask)
 {
 	struct coda_inode_info *cii = ITOC(inode);
 
+	spin_lock(&cii->c_lock);
 	cii->c_cached_epoch = atomic_read(&permission_epoch);
 	if (cii->c_uid != current_fsuid()) {
 		cii->c_uid = current_fsuid();
                 cii->c_cached_perm = mask;
         } else
                 cii->c_cached_perm |= mask;
+	spin_unlock(&cii->c_lock);
 }
 
 /* remove cached acl from an inode */
 void coda_cache_clear_inode(struct inode *inode)
 {
 	struct coda_inode_info *cii = ITOC(inode);
+	spin_lock(&cii->c_lock);
 	cii->c_cached_epoch = atomic_read(&permission_epoch) - 1;
+	spin_unlock(&cii->c_lock);
 }
 
 /* remove all acl caches */
@@ -57,13 +62,15 @@ void coda_cache_clear_all(struct super_block *sb)
 int coda_cache_check(struct inode *inode, int mask)
 {
 	struct coda_inode_info *cii = ITOC(inode);
-        int hit;
+	int hit;
 	
-        hit = (mask & cii->c_cached_perm) == mask &&
-		cii->c_uid == current_fsuid() &&
-		cii->c_cached_epoch == atomic_read(&permission_epoch);
+	spin_lock(&cii->c_lock);
+	hit = (mask & cii->c_cached_perm) == mask &&
+	    cii->c_uid == current_fsuid() &&
+	    cii->c_cached_epoch == atomic_read(&permission_epoch);
+	spin_unlock(&cii->c_lock);
 
-        return hit;
+	return hit;
 }
 
 
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index a7a780929eec..602240569c89 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -45,13 +45,15 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr)
 static int coda_test_inode(struct inode *inode, void *data)
 {
 	struct CodaFid *fid = (struct CodaFid *)data;
-	return coda_fideq(&(ITOC(inode)->c_fid), fid);
+	struct coda_inode_info *cii = ITOC(inode);
+	return coda_fideq(&cii->c_fid, fid);
 }
 
 static int coda_set_inode(struct inode *inode, void *data)
 {
 	struct CodaFid *fid = (struct CodaFid *)data;
-	ITOC(inode)->c_fid = *fid;
+	struct coda_inode_info *cii = ITOC(inode);
+	cii->c_fid = *fid;
 	return 0;
 }
 
@@ -71,6 +73,7 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid,
 		cii = ITOC(inode);
 		/* we still need to set i_ino for things like stat(2) */
 		inode->i_ino = hash;
+		/* inode is locked and unique, no need to grab cii->c_lock */
 		cii->c_mapcount = 0;
 		unlock_new_inode(inode);
 	}
@@ -107,14 +110,20 @@ int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_bloc
 }
 
 
+/* Although we treat Coda file identifiers as immutable, there is one
+ * special case for files created during a disconnection where they may
+ * not be globally unique. When an identifier collision is detected we
+ * first try to flush the cached inode from the kernel and finally
+ * resort to renaming/rehashing in-place. Userspace remembers both old
+ * and new values of the identifier to handle any in-flight upcalls.
+ * The real solution is to use globally unique UUIDs as identifiers, but
+ * retrofitting the existing userspace code for this is non-trivial. */
 void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, 
 		      struct CodaFid *newfid)
 {
-	struct coda_inode_info *cii;
+	struct coda_inode_info *cii = ITOC(inode);
 	unsigned long hash = coda_f2i(newfid);
 	
-	cii = ITOC(inode);
-
 	BUG_ON(!coda_fideq(&cii->c_fid, oldfid));
 
 	/* replace fid and rehash inode */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index ccd98b0f2b0b..69fbbea75f1b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/smp_lock.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 
@@ -617,7 +618,9 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
 		goto out;
 
 	/* clear the flags. */
+	spin_lock(&cii->c_lock);
 	cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
+	spin_unlock(&cii->c_lock);
 
 bad:
 	unlock_kernel();
@@ -691,7 +694,10 @@ int coda_revalidate_inode(struct dentry *dentry)
 			goto return_bad;
 		
 		coda_flag_inode_children(inode, C_FLUSH);
+
+		spin_lock(&cii->c_lock);
 		cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
+		spin_unlock(&cii->c_lock);
 	}
 
 ok:
diff --git a/fs/coda/file.c b/fs/coda/file.c
index ad3cd2abeeb4..c4e395781d41 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -16,6 +16,7 @@
 #include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
+#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
@@ -109,19 +110,24 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 
 	coda_inode = coda_file->f_path.dentry->d_inode;
 	host_inode = host_file->f_path.dentry->d_inode;
+
+	cii = ITOC(coda_inode);
+	spin_lock(&cii->c_lock);
 	coda_file->f_mapping = host_file->f_mapping;
 	if (coda_inode->i_mapping == &coda_inode->i_data)
 		coda_inode->i_mapping = host_inode->i_mapping;
 
 	/* only allow additional mmaps as long as userspace isn't changing
 	 * the container file on us! */
-	else if (coda_inode->i_mapping != host_inode->i_mapping)
+	else if (coda_inode->i_mapping != host_inode->i_mapping) {
+		spin_unlock(&cii->c_lock);
 		return -EBUSY;
+	}
 
 	/* keep track of how often the coda_inode/host_file has been mmapped */
-	cii = ITOC(coda_inode);
 	cii->c_mapcount++;
 	cfi->cfi_mapcount++;
+	spin_unlock(&cii->c_lock);
 
 	return host_file->f_op->mmap(host_file, vma);
 }
@@ -185,11 +191,13 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	cii = ITOC(coda_inode);
 
 	/* did we mmap this file? */
+	spin_lock(&cii->c_lock);
 	if (coda_inode->i_mapping == &host_inode->i_data) {
 		cii->c_mapcount -= cfi->cfi_mapcount;
 		if (!cii->c_mapcount)
 			coda_inode->i_mapping = &coda_inode->i_data;
 	}
+	spin_unlock(&cii->c_lock);
 
 	fput(cfi->cfi_container);
 	kfree(coda_file->private_data);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index bfe8179b1295..0553f3bd7b1b 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/unistd.h>
 #include <linux/smp_lock.h>
+#include <linux/spinlock.h>
 #include <linux/file.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -51,6 +52,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
 	ei->c_flags = 0;
 	ei->c_uid = 0;
 	ei->c_cached_perm = 0;
+	spin_lock_init(&ei->c_lock);
 	return &ei->vfs_inode;
 }
 
diff --git a/include/linux/coda_fs_i.h b/include/linux/coda_fs_i.h
index b3ef0c461578..e35071b1de0e 100644
--- a/include/linux/coda_fs_i.h
+++ b/include/linux/coda_fs_i.h
@@ -10,19 +10,24 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 #include <linux/coda.h>
 
 /*
  * coda fs inode data
+ * c_lock protects accesses to c_flags, c_mapcount, c_cached_epoch, c_uid and
+ * c_cached_perm.
+ * vfs_inode is set only when the inode is created and never changes.
+ * c_fid is set when the inode is created and should be considered immutable.
  */
 struct coda_inode_info {
-        struct CodaFid	   c_fid;	/* Coda identifier */
-        u_short	           c_flags;     /* flags (see below) */
-	struct list_head   c_cilist;    /* list of all coda inodes */
+	struct CodaFid	   c_fid;	/* Coda identifier */
+	u_short	           c_flags;     /* flags (see below) */
 	unsigned int	   c_mapcount;  /* nr of times this inode is mapped */
 	unsigned int	   c_cached_epoch; /* epoch for cached permissions */
 	vuid_t		   c_uid;	/* fsuid for cached permissions */
-        unsigned int       c_cached_perm; /* cached access permissions */
+	unsigned int       c_cached_perm; /* cached access permissions */
+	spinlock_t	   c_lock;
 	struct inode	   vfs_inode;
 };
 
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index dcc228aa335a..2e914d0771b9 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -89,7 +89,11 @@ static __inline__ char *coda_i2s(struct inode *inode)
 /* this will not zap the inode away */
 static __inline__ void coda_flag_inode(struct inode *inode, int flag)
 {
-	ITOC(inode)->c_flags |= flag;
+	struct coda_inode_info *cii = ITOC(inode);
+
+	spin_lock(&cii->c_lock);
+	cii->c_flags |= flag;
+	spin_unlock(&cii->c_lock);
 }		
 
 #endif
-- 
cgit v1.2.3


From f7cc02b8715618e179242ba9cc10bdc5146ae565 Mon Sep 17 00:00:00 2001
From: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Date: Mon, 25 Oct 2010 02:03:45 -0400
Subject: Coda: push BKL regions into coda_upcall()

Now that shared inode state is locked using the cii->c_lock, the BKL is
only used to protect the upcall queues used to communicate with the
userspace cache manager. The remaining state is all local and we can
push the lock further down into coda_upcall().

Signed-off-by: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coda/dir.c              | 149 +++++++++++++--------------------------------
 fs/coda/file.c             |  19 +-----
 fs/coda/inode.c            |  46 ++++++--------
 fs/coda/pioctl.c           |  22 ++-----
 fs/coda/psdev.c            |  13 +---
 fs/coda/symlink.c          |   3 -
 fs/coda/upcall.c           |  32 +++++++---
 include/linux/coda_psdev.h |   2 +-
 8 files changed, 96 insertions(+), 190 deletions(-)

(limited to 'include')

diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 69fbbea75f1b..96fbeab77f2f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -17,7 +17,6 @@
 #include <linux/stat.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
@@ -117,15 +116,11 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
 		goto exit;
 	}
 
-	lock_kernel();
-
 	error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length,
 			     &type, &resfid);
 	if (!error)
 		error = coda_cnode_make(&inode, &resfid, dir->i_sb);
 
-	unlock_kernel();
-
 	if (error && error != -ENOENT)
 		return ERR_PTR(error);
 
@@ -141,28 +136,24 @@ exit:
 
 int coda_permission(struct inode *inode, int mask)
 {
-        int error = 0;
+	int error;
 
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
  
 	if (!mask)
-		return 0; 
+		return 0;
 
 	if ((mask & MAY_EXEC) && !execute_ok(inode))
 		return -EACCES;
 
-	lock_kernel();
-
 	if (coda_cache_check(inode, mask))
-		goto out; 
+		return 0;
 
-        error = venus_access(inode->i_sb, coda_i2f(inode), mask);
+	error = venus_access(inode->i_sb, coda_i2f(inode), mask);
     
 	if (!error)
 		coda_cache_enter(inode, mask);
 
- out:
-	unlock_kernel();
 	return error;
 }
 
@@ -201,41 +192,34 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 /* creation routines: create, mknod, mkdir, link, symlink */
 static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd)
 {
-        int error=0;
+	int error;
 	const char *name=de->d_name.name;
 	int length=de->d_name.len;
 	struct inode *inode;
 	struct CodaFid newfid;
 	struct coda_vattr attrs;
 
-	lock_kernel();
-
-	if (coda_isroot(dir) && coda_iscontrol(name, length)) {
-		unlock_kernel();
+	if (coda_isroot(dir) && coda_iscontrol(name, length))
 		return -EPERM;
-	}
 
 	error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 
 				0, mode, &newfid, &attrs);
-
-        if ( error ) {
-		unlock_kernel();
-		d_drop(de);
-		return error;
-	}
+	if (error)
+		goto err_out;
 
 	inode = coda_iget(dir->i_sb, &newfid, &attrs);
-	if ( IS_ERR(inode) ) {
-		unlock_kernel();
-		d_drop(de);
-		return PTR_ERR(inode);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto err_out;
 	}
 
 	/* invalidate the directory cnode's attributes */
 	coda_dir_update_mtime(dir);
-	unlock_kernel();
 	d_instantiate(de, inode);
 	return 0;
+err_out:
+	d_drop(de);
+	return error;
 }
 
 static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
@@ -247,36 +231,29 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
 	int error;
 	struct CodaFid newfid;
 
-	lock_kernel();
-
-	if (coda_isroot(dir) && coda_iscontrol(name, len)) {
-		unlock_kernel();
+	if (coda_isroot(dir) && coda_iscontrol(name, len))
 		return -EPERM;
-	}
 
 	attrs.va_mode = mode;
 	error = venus_mkdir(dir->i_sb, coda_i2f(dir), 
 			       name, len, &newfid, &attrs);
-        
-        if ( error ) {
-		unlock_kernel();
-		d_drop(de);
-		return error;
-        }
+	if (error)
+		goto err_out;
          
 	inode = coda_iget(dir->i_sb, &newfid, &attrs);
-	if ( IS_ERR(inode) ) {
-		unlock_kernel();
-		d_drop(de);
-		return PTR_ERR(inode);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto err_out;
 	}
 
 	/* invalidate the directory cnode's attributes */
 	coda_dir_inc_nlink(dir);
 	coda_dir_update_mtime(dir);
-	unlock_kernel();
 	d_instantiate(de, inode);
 	return 0;
+err_out:
+	d_drop(de);
+	return error;
 }
 
 /* try to make de an entry in dir_inodde linked to source_de */ 
@@ -288,52 +265,38 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 	int len = de->d_name.len;
 	int error;
 
-	lock_kernel();
-
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) {
-		unlock_kernel();
+	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
-	}
 
 	error = venus_link(dir_inode->i_sb, coda_i2f(inode),
 			   coda_i2f(dir_inode), (const char *)name, len);
-
 	if (error) {
 		d_drop(de);
-		goto out;
+		return error;
 	}
 
 	coda_dir_update_mtime(dir_inode);
 	atomic_inc(&inode->i_count);
 	d_instantiate(de, inode);
 	inc_nlink(inode);
-
-out:
-	unlock_kernel();
-	return(error);
+	return 0;
 }
 
 
 static int coda_symlink(struct inode *dir_inode, struct dentry *de,
 			const char *symname)
 {
-        const char *name = de->d_name.name;
+	const char *name = de->d_name.name;
 	int len = de->d_name.len;
 	int symlen;
-	int error = 0;
-
-	lock_kernel();
+	int error;
 
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) {
-		unlock_kernel();
+	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
-	}
 
 	symlen = strlen(symname);
-	if ( symlen > CODA_MAXPATHLEN ) {
-		unlock_kernel();
+	if (symlen > CODA_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	}
 
 	/*
 	 * This entry is now negative. Since we do not create
@@ -344,10 +307,9 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
 			      symname, symlen);
 
 	/* mtime is no good anymore */
-	if ( !error )
+	if (!error)
 		coda_dir_update_mtime(dir_inode);
 
-	unlock_kernel();
 	return error;
 }
 
@@ -358,17 +320,12 @@ static int coda_unlink(struct inode *dir, struct dentry *de)
 	const char *name = de->d_name.name;
 	int len = de->d_name.len;
 
-	lock_kernel();
-
 	error = venus_remove(dir->i_sb, coda_i2f(dir), name, len);
-	if ( error ) {
-		unlock_kernel();
+	if (error)
 		return error;
-	}
 
 	coda_dir_update_mtime(dir);
 	drop_nlink(de->d_inode);
-	unlock_kernel();
 	return 0;
 }
 
@@ -378,8 +335,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
 	int len = de->d_name.len;
 	int error;
 
-	lock_kernel();
-
 	error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
 	if (!error) {
 		/* VFS may delete the child */
@@ -390,7 +345,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
 		coda_dir_drop_nlink(dir);
 		coda_dir_update_mtime(dir);
 	}
-	unlock_kernel();
 	return error;
 }
 
@@ -404,15 +358,12 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int new_length = new_dentry->d_name.len;
 	int error;
 
-	lock_kernel();
-
 	error = venus_rename(old_dir->i_sb, coda_i2f(old_dir),
 			     coda_i2f(new_dir), old_length, new_length,
 			     (const char *) old_name, (const char *)new_name);
-
-	if ( !error ) {
-		if ( new_dentry->d_inode ) {
-			if ( S_ISDIR(new_dentry->d_inode->i_mode) ) {
+	if (!error) {
+		if (new_dentry->d_inode) {
+			if (S_ISDIR(new_dentry->d_inode->i_mode)) {
 				coda_dir_drop_nlink(old_dir);
 				coda_dir_inc_nlink(new_dir);
 			}
@@ -424,8 +375,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
 			coda_flag_inode(new_dir, C_VATTR);
 		}
 	}
-	unlock_kernel();
-
 	return error;
 }
 
@@ -595,10 +544,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
 	struct inode *inode = de->d_inode;
 	struct coda_inode_info *cii;
 
-	if (!inode)
-		return 1;
-	lock_kernel();
-	if (coda_isroot(inode))
+	if (!inode || coda_isroot(inode))
 		goto out;
 	if (is_bad_inode(inode))
 		goto bad;
@@ -621,12 +567,9 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
 	spin_lock(&cii->c_lock);
 	cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
 	spin_unlock(&cii->c_lock);
-
 bad:
-	unlock_kernel();
 	return 0;
 out:
-	unlock_kernel();
 	return 1;
 }
 
@@ -659,20 +602,19 @@ static int coda_dentry_delete(struct dentry * dentry)
 int coda_revalidate_inode(struct dentry *dentry)
 {
 	struct coda_vattr attr;
-	int error = 0;
+	int error;
 	int old_mode;
 	ino_t old_ino;
 	struct inode *inode = dentry->d_inode;
 	struct coda_inode_info *cii = ITOC(inode);
 
-	lock_kernel();
-	if ( !cii->c_flags )
-		goto ok;
+	if (!cii->c_flags)
+		return 0;
 
 	if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) {
 		error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr);
-		if ( error )
-			goto return_bad;
+		if (error)
+			return -EIO;
 
 		/* this inode may be lost if:
 		   - it's ino changed 
@@ -691,7 +633,7 @@ int coda_revalidate_inode(struct dentry *dentry)
 		/* the following can happen when a local fid is replaced 
 		   with a global one, here we lose and declare the inode bad */
 		if (inode->i_ino != old_ino)
-			goto return_bad;
+			return -EIO;
 		
 		coda_flag_inode_children(inode, C_FLUSH);
 
@@ -699,12 +641,5 @@ int coda_revalidate_inode(struct dentry *dentry)
 		cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH);
 		spin_unlock(&cii->c_lock);
 	}
-
-ok:
-	unlock_kernel();
 	return 0;
-
-return_bad:
-	unlock_kernel();
-	return -EIO;
 }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index c4e395781d41..c8b50ba4366a 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -15,7 +15,6 @@
 #include <linux/stat.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -144,8 +143,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
 	if (!cfi)
 		return -ENOMEM;
 
-	lock_kernel();
-
 	error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags,
 			   &host_file);
 	if (!host_file)
@@ -153,7 +150,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
 
 	if (error) {
 		kfree(cfi);
-		unlock_kernel();
 		return error;
 	}
 
@@ -165,8 +161,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
 
 	BUG_ON(coda_file->private_data != NULL);
 	coda_file->private_data = cfi;
-
-	unlock_kernel();
 	return 0;
 }
 
@@ -177,9 +171,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	struct coda_file_info *cfi;
 	struct coda_inode_info *cii;
 	struct inode *host_inode;
-	int err = 0;
-
-	lock_kernel();
+	int err;
 
 	cfi = CODA_FTOC(coda_file);
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -203,8 +195,6 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	kfree(coda_file->private_data);
 	coda_file->private_data = NULL;
 
-	unlock_kernel();
-
 	/* VFS fput ignores the return value from file_operations->release, so
 	 * there is no use returning an error here */
 	return 0;
@@ -215,7 +205,7 @@ int coda_fsync(struct file *coda_file, int datasync)
 	struct file *host_file;
 	struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
 	struct coda_file_info *cfi;
-	int err = 0;
+	int err;
 
 	if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) ||
 	      S_ISLNK(coda_inode->i_mode)))
@@ -226,11 +216,8 @@ int coda_fsync(struct file *coda_file, int datasync)
 	host_file = cfi->cfi_container;
 
 	err = vfs_fsync(host_file, datasync);
-	if ( !err && !datasync ) {
-		lock_kernel();
+	if (!err && !datasync)
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
-		unlock_kernel();
-	}
 
 	return err;
 }
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 0553f3bd7b1b..b7fa3e3d772f 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -150,8 +150,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	int error;
 	int idx;
 
-	lock_kernel();
-
 	idx = get_device_index((struct coda_mount_data *) data);
 
 	/* Ignore errors in data, for backward compatibility */
@@ -161,23 +159,26 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	printk(KERN_INFO "coda_read_super: device index: %i\n", idx);
 
 	vc = &coda_comms[idx];
+	lock_kernel();
+
 	if (!vc->vc_inuse) {
 		printk("coda_read_super: No pseudo device\n");
-		unlock_kernel();
-		return -EINVAL;
+		error = -EINVAL;
+		goto unlock_out;
 	}
 
-        if ( vc->vc_sb ) {
+	if (vc->vc_sb) {
 		printk("coda_read_super: Device already mounted\n");
-		unlock_kernel();
-		return -EBUSY;
+		error = -EBUSY;
+		goto unlock_out;
 	}
 
 	error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
 	if (error)
-		goto bdi_err;
+		goto unlock_out;
 
 	vc->vc_sb = sb;
+	unlock_kernel();
 
 	sb->s_fs_info = vc;
 	sb->s_flags |= MS_NOATIME;
@@ -206,21 +207,23 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	printk("coda_read_super: rootinode is %ld dev %s\n", 
 	       root->i_ino, root->i_sb->s_id);
 	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root)
+	if (!sb->s_root) {
+		error = -EINVAL;
 		goto error;
-	unlock_kernel();
+	}
 	return 0;
 
- error:
-	bdi_destroy(&vc->bdi);
- bdi_err:
+error:
 	if (root)
 		iput(root);
-	if (vc)
-		vc->vc_sb = NULL;
 
+	lock_kernel();
+	bdi_destroy(&vc->bdi);
+	vc->vc_sb = NULL;
+	sb->s_fs_info = NULL;
+unlock_out:
 	unlock_kernel();
-	return -EINVAL;
+	return error;
 }
 
 static void coda_put_super(struct super_block *sb)
@@ -253,8 +256,6 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
 	struct coda_vattr vattr;
 	int error;
 
-	lock_kernel();
-	
 	memset(&vattr, 0, sizeof(vattr)); 
 
 	inode->i_ctime = CURRENT_TIME_SEC;
@@ -264,13 +265,10 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
 	/* Venus is responsible for truncating the container-file!!! */
 	error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr);
 
-	if ( !error ) {
+	if (!error) {
 	        coda_vattr_to_iattr(inode, &vattr); 
 		coda_cache_clear_inode(inode);
 	}
-
-	unlock_kernel();
-
 	return error;
 }
 
@@ -284,12 +282,8 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int error;
 	
-	lock_kernel();
-
 	error = venus_statfs(dentry, buf);
 
-	unlock_kernel();
-
 	if (error) {
 		/* fake something like AFS does */
 		buf->f_blocks = 9000000;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 028a9a0f588b..2fd89b5c5c7b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -23,8 +23,6 @@
 #include <linux/coda_fs_i.h>
 #include <linux/coda_psdev.h>
 
-#include <linux/smp_lock.h>
-
 /* pioctl ops */
 static int coda_ioctl_permission(struct inode *inode, int mask);
 static long coda_pioctl(struct file *filp, unsigned int cmd,
@@ -58,13 +56,9 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
 	struct inode *target_inode = NULL;
 	struct coda_inode_info *cnp;
 
-	lock_kernel();
-
 	/* get the Pioctl data arguments from user space */
-	if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) {
-		error = -EINVAL;
-		goto out;
-	}
+	if (copy_from_user(&data, (void __user *)user_data, sizeof(data)))
+		return -EINVAL;
 
 	/*
 	 * Look up the pathname. Note that the pathname is in
@@ -76,13 +70,12 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
 		error = user_lpath(data.path, &path);
 
 	if (error)
-		goto out;
-	else
-		target_inode = path.dentry->d_inode;
+		return error;
+
+	target_inode = path.dentry->d_inode;
 
 	/* return if it is not a Coda inode */
 	if (target_inode->i_sb != inode->i_sb) {
-		path_put(&path);
 		error = -EINVAL;
 		goto out;
 	}
@@ -91,10 +84,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
 	cnp = ITOC(target_inode);
 
 	error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
-
-	path_put(&path);
-
 out:
-	unlock_kernel();
+	path_put(&path);
 	return error;
 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index fdc2f3ef7ecd..9a9248e632c6 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -108,16 +108,9 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 	        return -EFAULT;
 
         if (DOWNCALL(hdr.opcode)) {
-		struct super_block *sb = NULL;
-                union outputArgs *dcbuf;
+		union outputArgs *dcbuf;
 		int size = sizeof(*dcbuf);
 
-		sb = vcp->vc_sb;
-		if ( !sb ) {
-                        count = nbytes;
-                        goto out;
-		}
-
 		if  ( nbytes < sizeof(struct coda_out_hdr) ) {
 		        printk("coda_downcall opc %d uniq %d, not enough!\n",
 			       hdr.opcode, hdr.unique);
@@ -137,9 +130,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 		}
 
 		/* what downcall errors does Venus handle ? */
-		lock_kernel();
-		error = coda_downcall(hdr.opcode, dcbuf, sb);
-		unlock_kernel();
+		error = coda_downcall(vcp, hdr.opcode, dcbuf);
 
 		CODA_FREE(dcbuf, nbytes);
 		if (error) {
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index 4513b7258458..af78f007a2b0 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -14,7 +14,6 @@
 #include <linux/stat.h>
 #include <linux/errno.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 
 #include <linux/coda.h>
 #include <linux/coda_linux.h>
@@ -29,11 +28,9 @@ static int coda_symlink_filler(struct file *file, struct page *page)
 	unsigned int len = PAGE_SIZE;
 	char *p = kmap(page);
 
-	lock_kernel();
 	cii = ITOC(inode);
 
 	error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len);
-	unlock_kernel();
 	if (error)
 		goto fail;
 	SetPageUptodate(page);
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index b8893ab6f9e6..4c258cb5266d 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,6 +27,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
@@ -667,18 +668,23 @@ static int coda_upcall(struct venus_comm *vcp,
 {
 	union outputArgs *out;
 	union inputArgs *sig_inputArgs;
-	struct upc_req *req, *sig_req;
-	int error = 0;
+	struct upc_req *req = NULL, *sig_req;
+	int error;
+
+	lock_kernel();
 
 	if (!vcp->vc_inuse) {
 		printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n");
-		return -ENXIO;
+		error = -ENXIO;
+		goto exit;
 	}
 
 	/* Format the request message. */
 	req = kmalloc(sizeof(struct upc_req), GFP_KERNEL);
-	if (!req)
-		return -ENOMEM;
+	if (!req) {
+		error = -ENOMEM;
+		goto exit;
+	}
 
 	req->uc_data = (void *)buffer;
 	req->uc_flags = 0;
@@ -759,6 +765,7 @@ static int coda_upcall(struct venus_comm *vcp,
 
 exit:
 	kfree(req);
+	unlock_kernel();
 	return error;
 }
 
@@ -796,21 +803,24 @@ exit:
  *
  * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */
 
-int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb)
+int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
 {
 	struct inode *inode = NULL;
 	struct CodaFid *fid, *newfid;
+	struct super_block *sb;
 
 	/* Handle invalidation requests. */
-	if ( !sb || !sb->s_root)
-		return 0;
+	lock_kernel();
+	sb = vcp->vc_sb;
+	if (!sb || !sb->s_root)
+		goto unlock_out;
 
 	switch (opcode) {
 	case CODA_FLUSH:
 		coda_cache_clear_all(sb);
 		shrink_dcache_sb(sb);
 		if (sb->s_root->d_inode)
-		    coda_flag_inode(sb->s_root->d_inode, C_FLUSH);
+			coda_flag_inode(sb->s_root->d_inode, C_FLUSH);
 		break;
 
 	case CODA_PURGEUSER:
@@ -855,9 +865,11 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb)
 		break;
 	}
 
+unlock_out:
+	unlock_kernel();
+
 	if (inode)
 		iput(inode);
-
 	return 0;
 }
 
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 284b520934a0..1e60c5a41a5b 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -63,7 +63,7 @@ int venus_symlink(struct super_block *sb, struct CodaFid *fid,
 int venus_access(struct super_block *sb, struct CodaFid *fid, int mask);
 int venus_pioctl(struct super_block *sb, struct CodaFid *fid,
 		 unsigned int cmd, struct PioctlData *data);
-int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb);
+int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out);
 int venus_fsync(struct super_block *sb, struct CodaFid *fid);
 int venus_statfs(struct dentry *dentry, struct kstatfs *sfs);
 
-- 
cgit v1.2.3


From da47c19e5c746829042933c8f945a71e2b62d6fc Mon Sep 17 00:00:00 2001
From: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Date: Mon, 25 Oct 2010 02:03:46 -0400
Subject: Coda: replace BKL with mutex

Replace the BKL with a mutex to protect the venus_comm structure which
binds the mountpoint with the character device and holds the upcall
queues.

Signed-off-by: Yoshihisa Abe <yoshiabe@cs.cmu.edu>
Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coda/inode.c            | 19 +++++++------
 fs/coda/psdev.c            | 28 +++++++++++-------
 fs/coda/upcall.c           | 71 ++++++++++++++++++++++++++--------------------
 include/linux/coda_psdev.h |  2 ++
 4 files changed, 70 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index b7fa3e3d772f..7993b96ca348 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -15,7 +15,7 @@
 #include <linux/stat.h>
 #include <linux/errno.h>
 #include <linux/unistd.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/file.h>
 #include <linux/vfs.h>
@@ -145,7 +145,7 @@ static int get_device_index(struct coda_mount_data *data)
 static int coda_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *root = NULL;
-	struct venus_comm *vc = NULL;
+	struct venus_comm *vc;
 	struct CodaFid fid;
 	int error;
 	int idx;
@@ -159,7 +159,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	printk(KERN_INFO "coda_read_super: device index: %i\n", idx);
 
 	vc = &coda_comms[idx];
-	lock_kernel();
+	mutex_lock(&vc->vc_mutex);
 
 	if (!vc->vc_inuse) {
 		printk("coda_read_super: No pseudo device\n");
@@ -178,7 +178,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 		goto unlock_out;
 
 	vc->vc_sb = sb;
-	unlock_kernel();
+	mutex_unlock(&vc->vc_mutex);
 
 	sb->s_fs_info = vc;
 	sb->s_flags |= MS_NOATIME;
@@ -217,20 +217,23 @@ error:
 	if (root)
 		iput(root);
 
-	lock_kernel();
+	mutex_lock(&vc->vc_mutex);
 	bdi_destroy(&vc->bdi);
 	vc->vc_sb = NULL;
 	sb->s_fs_info = NULL;
 unlock_out:
-	unlock_kernel();
+	mutex_unlock(&vc->vc_mutex);
 	return error;
 }
 
 static void coda_put_super(struct super_block *sb)
 {
-	bdi_destroy(&coda_vcp(sb)->bdi);
-	coda_vcp(sb)->vc_sb = NULL;
+	struct venus_comm *vcp = coda_vcp(sb);
+	mutex_lock(&vcp->vc_mutex);
+	bdi_destroy(&vcp->bdi);
+	vcp->vc_sb = NULL;
 	sb->s_fs_info = NULL;
+	mutex_unlock(&vcp->vc_mutex);
 
 	printk("Coda: Bye bye.\n");
 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 9a9248e632c6..62647a8595e4 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -35,7 +35,7 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 #include <linux/device.h>
 #include <asm/io.h>
 #include <asm/system.h>
@@ -67,8 +67,10 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait)
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
 	poll_wait(file, &vcp->vc_waitq, wait);
+	mutex_lock(&vcp->vc_mutex);
 	if (!list_empty(&vcp->vc_pending))
                 mask |= POLLIN | POLLRDNORM;
+	mutex_unlock(&vcp->vc_mutex);
 
 	return mask;
 }
@@ -143,7 +145,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 	}
         
 	/* Look for the message on the processing queue. */
-	lock_kernel();
+	mutex_lock(&vcp->vc_mutex);
 	list_for_each(lh, &vcp->vc_processing) {
 		tmp = list_entry(lh, struct upc_req , uc_chain);
 		if (tmp->uc_unique == hdr.unique) {
@@ -152,7 +154,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 			break;
 		}
 	}
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
 
 	if (!req) {
 		printk("psdev_write: msg (%d, %d) not found\n", 
@@ -207,7 +209,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
 	if (nbytes == 0)
 		return 0;
 
-	lock_kernel();
+	mutex_lock(&vcp->vc_mutex);
 
 	add_wait_queue(&vcp->vc_waitq, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
@@ -221,7 +223,9 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
 			retval = -ERESTARTSYS;
 			break;
 		}
+		mutex_unlock(&vcp->vc_mutex);
 		schedule();
+		mutex_lock(&vcp->vc_mutex);
 	}
 
 	set_current_state(TASK_RUNNING);
@@ -254,7 +258,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
 	CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
 	kfree(req);
 out:
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
 	return (count ? count : retval);
 }
 
@@ -267,10 +271,10 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
 	if (idx < 0 || idx >= MAX_CODADEVS)
 		return -ENODEV;
 
-	lock_kernel();
-
 	err = -EBUSY;
 	vcp = &coda_comms[idx];
+	mutex_lock(&vcp->vc_mutex);
+
 	if (!vcp->vc_inuse) {
 		vcp->vc_inuse++;
 
@@ -284,7 +288,7 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
 		err = 0;
 	}
 
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
 	return err;
 }
 
@@ -299,7 +303,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 		return -1;
 	}
 
-	lock_kernel();
+	mutex_lock(&vcp->vc_mutex);
 
 	/* Wakeup clients so they can return. */
 	list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) {
@@ -324,7 +328,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 
 	file->private_data = NULL;
 	vcp->vc_inuse--;
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
 	return 0;
 }
 
@@ -353,9 +357,11 @@ static int init_coda_psdev(void)
 		err = PTR_ERR(coda_psdev_class);
 		goto out_chrdev;
 	}		
-	for (i = 0; i < MAX_CODADEVS; i++)
+	for (i = 0; i < MAX_CODADEVS; i++) {
+		mutex_init(&(&coda_comms[i])->vc_mutex);
 		device_create(coda_psdev_class, NULL,
 			      MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i);
+	}
 	coda_sysctl_init();
 	goto out;
 
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 4c258cb5266d..c3563cab9758 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
@@ -607,7 +607,8 @@ static void coda_unblock_signals(sigset_t *old)
 				 (r)->uc_opcode != CODA_RELEASE) || \
 				(r)->uc_flags & CODA_REQ_READ))
 
-static inline void coda_waitfor_upcall(struct upc_req *req)
+static inline void coda_waitfor_upcall(struct venus_comm *vcp,
+				       struct upc_req *req)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long timeout = jiffies + coda_timeout * HZ;
@@ -640,10 +641,12 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
 			break;
 		}
 
+		mutex_unlock(&vcp->vc_mutex);
 		if (blocked)
 			schedule_timeout(HZ);
 		else
 			schedule();
+		mutex_lock(&vcp->vc_mutex);
 	}
 	if (blocked)
 		coda_unblock_signals(&old);
@@ -671,7 +674,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	struct upc_req *req = NULL, *sig_req;
 	int error;
 
-	lock_kernel();
+	mutex_lock(&vcp->vc_mutex);
 
 	if (!vcp->vc_inuse) {
 		printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n");
@@ -711,7 +714,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	 * ENODEV.  */
 
 	/* Go to sleep.  Wake up on signals only after the timeout. */
-	coda_waitfor_upcall(req);
+	coda_waitfor_upcall(vcp, req);
 
 	/* Op went through, interrupt or not... */
 	if (req->uc_flags & CODA_REQ_WRITE) {
@@ -765,7 +768,7 @@ static int coda_upcall(struct venus_comm *vcp,
 
 exit:
 	kfree(req);
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
 	return error;
 }
 
@@ -806,11 +809,11 @@ exit:
 int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
 {
 	struct inode *inode = NULL;
-	struct CodaFid *fid, *newfid;
+	struct CodaFid *fid = NULL, *newfid;
 	struct super_block *sb;
 
 	/* Handle invalidation requests. */
-	lock_kernel();
+	mutex_lock(&vcp->vc_mutex);
 	sb = vcp->vc_sb;
 	if (!sb || !sb->s_root)
 		goto unlock_out;
@@ -829,47 +832,53 @@ int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
 
 	case CODA_ZAPDIR:
 		fid = &out->coda_zapdir.CodaFid;
-		inode = coda_fid_to_inode(fid, sb);
-		if (inode) {
-			coda_flag_inode_children(inode, C_PURGE);
-			coda_flag_inode(inode, C_VATTR);
-		}
 		break;
 
 	case CODA_ZAPFILE:
 		fid = &out->coda_zapfile.CodaFid;
-		inode = coda_fid_to_inode(fid, sb);
-		if (inode)
-			coda_flag_inode(inode, C_VATTR);
 		break;
 
 	case CODA_PURGEFID:
 		fid = &out->coda_purgefid.CodaFid;
-		inode = coda_fid_to_inode(fid, sb);
-		if (inode) {
-			coda_flag_inode_children(inode, C_PURGE);
-
-			/* catch the dentries later if some are still busy */
-			coda_flag_inode(inode, C_PURGE);
-			d_prune_aliases(inode);
-
-		}
 		break;
 
 	case CODA_REPLACE:
 		fid = &out->coda_replace.OldFid;
-		newfid = &out->coda_replace.NewFid;
-		inode = coda_fid_to_inode(fid, sb);
-		if (inode)
-			coda_replace_fid(inode, fid, newfid);
 		break;
 	}
+	if (fid)
+		inode = coda_fid_to_inode(fid, sb);
 
 unlock_out:
-	unlock_kernel();
+	mutex_unlock(&vcp->vc_mutex);
+
+	if (!inode)
+		return 0;
+
+	switch (opcode) {
+	case CODA_ZAPDIR:
+		coda_flag_inode_children(inode, C_PURGE);
+		coda_flag_inode(inode, C_VATTR);
+		break;
+
+	case CODA_ZAPFILE:
+		coda_flag_inode(inode, C_VATTR);
+		break;
+
+	case CODA_PURGEFID:
+		coda_flag_inode_children(inode, C_PURGE);
 
-	if (inode)
-		iput(inode);
+		/* catch the dentries later if some are still busy */
+		coda_flag_inode(inode, C_PURGE);
+		d_prune_aliases(inode);
+		break;
+
+	case CODA_REPLACE:
+		newfid = &out->coda_replace.NewFid;
+		coda_replace_fid(inode, fid, newfid);
+		break;
+	}
+	iput(inode);
 	return 0;
 }
 
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 1e60c5a41a5b..72f2d2f0af91 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -8,6 +8,7 @@
 
 #ifdef __KERNEL__
 #include <linux/backing-dev.h>
+#include <linux/mutex.h>
 
 struct kstatfs;
 
@@ -20,6 +21,7 @@ struct venus_comm {
 	int                 vc_inuse;
 	struct super_block *vc_sb;
 	struct backing_dev_info bdi;
+	struct mutex	    vc_mutex;
 };
 
 
-- 
cgit v1.2.3


From 2420b60b1dc4ed98cb1788e928bc57ff2efa1a8d Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Mon, 18 Oct 2010 14:45:20 -0700
Subject: IB/uverbs: Return link layer type to userspace for query port
 operation

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 2 ++
 include/rdma/ib_user_verbs.h         | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6fcfbeb24a23..b342248aec05 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -460,6 +460,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	resp.active_width    = attr.active_width;
 	resp.active_speed    = attr.active_speed;
 	resp.phys_state      = attr.phys_state;
+	resp.link_layer      = rdma_port_get_link_layer(file->device->ib_dev,
+							cmd.port_num);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index a17f77106149..fe5b05177a2c 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -205,7 +205,8 @@ struct ib_uverbs_query_port_resp {
 	__u8  active_width;
 	__u8  active_speed;
 	__u8  phys_state;
-	__u8  reserved[3];
+	__u8  link_layer;
+	__u8  reserved[2];
 };
 
 struct ib_uverbs_alloc_pd {
-- 
cgit v1.2.3


From 33c87f0af60146b375220809c1cb745ac1a86edf Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 26 Aug 2010 14:18:43 +0000
Subject: mlx4_core: Allow protocol drivers to find corresponding interfaces

Add a mechanism for mlx4 protocol drivers to get a pointer to other
drivers's device objects.  For this, an exported function,
mlx4_get_protocol_dev() is added, which allows a driver to get some
other driver's device based on the protocol that the driver
implements.  Two protocols are added: MLX4_PROTOCOL_IB and
MLX4_PROTOCOL_EN.

This will be used in mlx4 IBoE support so that mlx4_ib can find the
corresponding mlx4_en netdev.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>

[ Clean up and rename a few things.  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/en_main.c  | 15 ++++++++++++---
 drivers/net/mlx4/intf.c     | 21 +++++++++++++++++++++
 include/linux/mlx4/driver.h |  9 +++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 97934f1ec53a..b2df3eeb1598 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -124,6 +124,13 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 	return 0;
 }
 
+static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
+{
+	struct mlx4_en_dev *endev = ctx;
+
+	return endev->pndev[port];
+}
+
 static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
 			  enum mlx4_dev_event event, int port)
 {
@@ -282,9 +289,11 @@ err_free_res:
 }
 
 static struct mlx4_interface mlx4_en_interface = {
-	.add	= mlx4_en_add,
-	.remove	= mlx4_en_remove,
-	.event	= mlx4_en_event,
+	.add		= mlx4_en_add,
+	.remove		= mlx4_en_remove,
+	.event		= mlx4_en_event,
+	.get_dev	= mlx4_en_get_netdev,
+	.protocol	= MLX4_PROTOCOL_EN,
 };
 
 static int __init mlx4_en_init(void)
diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c
index 555067802751..73c94fcdfddf 100644
--- a/drivers/net/mlx4/intf.c
+++ b/drivers/net/mlx4/intf.c
@@ -161,3 +161,24 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
 
 	mutex_unlock(&intf_mutex);
 }
+
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_device_context *dev_ctx;
+	unsigned long flags;
+	void *result = NULL;
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+		if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) {
+			result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port);
+			break;
+		}
+
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 53c5fdb6eac4..f407cd4bfb34 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -44,15 +44,24 @@ enum mlx4_dev_event {
 	MLX4_DEV_EVENT_PORT_REINIT,
 };
 
+enum mlx4_protocol {
+	MLX4_PROTOCOL_IB,
+	MLX4_PROTOCOL_EN,
+};
+
 struct mlx4_interface {
 	void *			(*add)	 (struct mlx4_dev *dev);
 	void			(*remove)(struct mlx4_dev *dev, void *context);
 	void			(*event) (struct mlx4_dev *dev, void *context,
 					  enum mlx4_dev_event event, int port);
+	void *			(*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
 	struct list_head	list;
+	enum mlx4_protocol	protocol;
 };
 
 int mlx4_register_interface(struct mlx4_interface *intf);
 void mlx4_unregister_interface(struct mlx4_interface *intf);
 
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
+
 #endif /* MLX4_DRIVER_H */
-- 
cgit v1.2.3


From 96dfa684c85d24b697f865f37a4f0c8678fc86e9 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Wed, 20 Oct 2010 21:57:02 -0700
Subject: mlx4_core: Update data structures and constants for IBoE

Add fields to hardware data structures and add new constants required for IBoE
support.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/fw.c       | 3 ++-
 include/linux/mlx4/cmd.h    | 1 +
 include/linux/mlx4/device.h | 3 ++-
 include/linux/mlx4/qp.h     | 7 +++++--
 4 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 04f42ae1eda0..5b3593d3cd74 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -98,7 +98,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
 		[20] = "Address vector port checking support",
 		[21] = "UD multicast support",
 		[24] = "Demand paging support",
-		[25] = "Router support"
+		[25] = "Router support",
+		[30] = "IBoE support"
 	};
 	int i;
 
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 0f82293a82ed..22bd8d3b84a2 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -140,6 +140,7 @@ enum {
 	MLX4_SET_PORT_MAC_TABLE = 0x2,
 	MLX4_SET_PORT_VLAN_TABLE = 0x3,
 	MLX4_SET_PORT_PRIO_MAP  = 0x4,
+	MLX4_SET_PORT_GID_TABLE = 0x5,
 };
 
 struct mlx4_dev;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7a7f9c1e679a..47e163ad3d11 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -67,7 +67,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
 	MLX4_DEV_CAP_FLAG_RAW_MCAST	= 1 << 19,
 	MLX4_DEV_CAP_FLAG_UD_AV_PORT	= 1 << 20,
-	MLX4_DEV_CAP_FLAG_UD_MCAST	= 1 << 21
+	MLX4_DEV_CAP_FLAG_UD_MCAST	= 1 << 21,
+	MLX4_DEV_CAP_FLAG_IBOE		= 1 << 30
 };
 
 enum {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 7abe64326f72..97cfdc8d7e2f 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -112,7 +112,8 @@ struct mlx4_qp_path {
 	u8			snooper_flags;
 	u8			reserved3[2];
 	u8			counter_index;
-	u8			reserved4[7];
+	u8			reserved4;
+	u8			dmac[6];
 };
 
 struct mlx4_qp_context {
@@ -166,6 +167,7 @@ enum {
 	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 	MLX4_WQE_CTRL_INS_VLAN		= 1 << 6,
 	MLX4_WQE_CTRL_STRONG_ORDER	= 1 << 7,
+	MLX4_WQE_CTRL_FORCE_LOOPBACK	= 1 << 0,
 };
 
 struct mlx4_wqe_ctrl_seg {
@@ -219,7 +221,8 @@ struct mlx4_wqe_datagram_seg {
 	__be32			av[8];
 	__be32			dqpn;
 	__be32			qkey;
-	__be32			reservd[2];
+	__be16			vlan;
+	u8			mac[6];
 };
 
 struct mlx4_wqe_lso_seg {
-- 
cgit v1.2.3


From fa417f7b520ee60b39f7e23528d2030af30a07d1 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Sun, 24 Oct 2010 21:08:52 -0700
Subject: IB/mlx4: Add support for IBoE

Add support for IBoE to mlx4_ib.  The bulk of the code is handling the
new address vector fields; mlx4 needs the MAC address of a remote node
to include it in a WQE (for datagrams) or in the QP context (for
connected QPs).  Address resolution is done by assuming all unicast
GIDs are either link-local IPv6 addresses.

Multicast group attach/detach needs to update the NIC's multicast
filters; but since attaching a QP to a multicast group can be done
before the QP is bound to a port, for IBoE we need to keep track of
all multicast groups that a QP is attached too before it transitions
from INIT to RTR (since it does not have a port in the INIT state).

Signed-off-by: Eli Cohen <eli@mellanox.co.il>

[ Many things cleaned up and otherwise monkeyed with; hope I didn't
  introduce too many bugs.  - Roland ]

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/ah.c      | 153 +++++++++---
 drivers/infiniband/hw/mlx4/mad.c     |  32 ++-
 drivers/infiniband/hw/mlx4/main.c    | 448 ++++++++++++++++++++++++++++++++---
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  32 ++-
 drivers/infiniband/hw/mlx4/qp.c      | 130 ++++++++--
 include/linux/mlx4/device.h          |  27 +++
 6 files changed, 714 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 11a236f8d884..3bf3544c0aa0 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -30,66 +30,153 @@
  * SOFTWARE.
  */
 
+#include <rdma/ib_addr.h>
+
 #include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/string.h>
 
 #include "mlx4_ib.h"
 
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
+			u8 *mac, int *is_mcast, u8 port)
 {
-	struct mlx4_dev *dev = to_mdev(pd->device)->dev;
-	struct mlx4_ib_ah *ah;
+	struct in6_addr in6;
 
-	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-	if (!ah)
-		return ERR_PTR(-ENOMEM);
+	*is_mcast = 0;
 
-	memset(&ah->av, 0, sizeof ah->av);
+	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
+	if (rdma_link_local_addr(&in6))
+		rdma_get_ll_mac(&in6, mac);
+	else if (rdma_is_multicast_addr(&in6)) {
+		rdma_get_mcast_mac(&in6, mac);
+		*is_mcast = 1;
+	} else
+		return -EINVAL;
 
-	ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
-	ah->av.g_slid  = ah_attr->src_path_bits;
-	ah->av.dlid    = cpu_to_be16(ah_attr->dlid);
-	if (ah_attr->static_rate) {
-		ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
-		while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
-		       !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
-			--ah->av.stat_rate;
-	}
-	ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+	return 0;
+}
+
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				  struct mlx4_ib_ah *ah)
+{
+	struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+
+	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+	ah->av.ib.g_slid  = ah_attr->src_path_bits;
 	if (ah_attr->ah_flags & IB_AH_GRH) {
-		ah->av.g_slid   |= 0x80;
-		ah->av.gid_index = ah_attr->grh.sgid_index;
-		ah->av.hop_limit = ah_attr->grh.hop_limit;
-		ah->av.sl_tclass_flowlabel |=
+		ah->av.ib.g_slid   |= 0x80;
+		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+		ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+		ah->av.ib.sl_tclass_flowlabel |=
 			cpu_to_be32((ah_attr->grh.traffic_class << 20) |
 				    ah_attr->grh.flow_label);
-		memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+		memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+	}
+
+	ah->av.ib.dlid    = cpu_to_be16(ah_attr->dlid);
+	if (ah_attr->static_rate) {
+		ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+		while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+			--ah->av.ib.stat_rate;
 	}
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
 
+static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				    struct mlx4_ib_ah *ah)
+{
+	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+	struct mlx4_dev *dev = ibdev->dev;
+	u8 mac[6];
+	int err;
+	int is_mcast;
+
+	err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
+	if (err)
+		return ERR_PTR(err);
+
+	memcpy(ah->av.eth.mac, mac, 6);
+	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+	ah->av.eth.gid_index = ah_attr->grh.sgid_index;
+	if (ah_attr->static_rate) {
+		ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+		while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+		       !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
+			--ah->av.eth.stat_rate;
+	}
+
+	/*
+	 * HW requires multicast LID so we just choose one.
+	 */
+	if (is_mcast)
+		ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+	memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
+	ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
+	return &ah->ibah;
+}
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	struct mlx4_ib_ah *ah;
+	struct ib_ah *ret;
+
+	ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
+		if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+			ret = ERR_PTR(-EINVAL);
+		} else {
+			/*
+			 * TBD: need to handle the case when we get
+			 * called in an atomic context and there we
+			 * might sleep.  We don't expect this
+			 * currently since we're working with link
+			 * local addresses which we can translate
+			 * without going to sleep.
+			 */
+			ret = create_iboe_ah(pd, ah_attr, ah);
+		}
+
+		if (IS_ERR(ret))
+			kfree(ah);
+
+		return ret;
+	} else
+		return create_ib_ah(pd, ah_attr, ah); /* never fails */
+}
+
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 {
 	struct mlx4_ib_ah *ah = to_mah(ibah);
+	enum rdma_link_layer ll;
 
 	memset(ah_attr, 0, sizeof *ah_attr);
-	ah_attr->dlid	       = be16_to_cpu(ah->av.dlid);
-	ah_attr->sl	       = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
-	ah_attr->port_num      = be32_to_cpu(ah->av.port_pd) >> 24;
-	if (ah->av.stat_rate)
-		ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
-	ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+	ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+	ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
+	ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
+	ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
+	if (ah->av.ib.stat_rate)
+		ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
+	ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
 
 	if (mlx4_ib_ah_grh_present(ah)) {
 		ah_attr->ah_flags = IB_AH_GRH;
 
 		ah_attr->grh.traffic_class =
-			be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
+			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
 		ah_attr->grh.flow_label =
-			be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
-		ah_attr->grh.hop_limit  = ah->av.hop_limit;
-		ah_attr->grh.sgid_index = ah->av.gid_index;
-		memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+		ah_attr->grh.hop_limit  = ah->av.ib.hop_limit;
+		ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+		memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f38d5b118927..c9a8dd63b9e2 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
 	struct ib_mad_agent *agent;
 	int p, q;
 	int ret;
+	enum rdma_link_layer ll;
 
-	for (p = 0; p < dev->num_ports; ++p)
+	for (p = 0; p < dev->num_ports; ++p) {
+		ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
 		for (q = 0; q <= 1; ++q) {
-			agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
-						      q ? IB_QPT_GSI : IB_QPT_SMI,
-						      NULL, 0, send_handler,
-						      NULL, NULL);
-			if (IS_ERR(agent)) {
-				ret = PTR_ERR(agent);
-				goto err;
-			}
-			dev->send_agent[p][q] = agent;
+			if (ll == IB_LINK_LAYER_INFINIBAND) {
+				agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+							      q ? IB_QPT_GSI : IB_QPT_SMI,
+							      NULL, 0, send_handler,
+							      NULL, NULL);
+				if (IS_ERR(agent)) {
+					ret = PTR_ERR(agent);
+					goto err;
+				}
+				dev->send_agent[p][q] = agent;
+			} else
+				dev->send_agent[p][q] = NULL;
 		}
+	}
 
 	return 0;
 
@@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
 	for (p = 0; p < dev->num_ports; ++p) {
 		for (q = 0; q <= 1; ++q) {
 			agent = dev->send_agent[p][q];
-			dev->send_agent[p][q] = NULL;
-			ib_unregister_mad_agent(agent);
+			if (agent) {
+				dev->send_agent[p][q] = NULL;
+				ib_unregister_mad_agent(agent);
+			}
 		}
 
 		if (dev->sm_ah[p])
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4e94e360e43b..e65db73fc277 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -35,9 +35,13 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
 
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/cmd.h>
@@ -58,6 +62,15 @@ static const char mlx4_ib_version[] =
 	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
+struct update_gid_work {
+	struct work_struct	work;
+	union ib_gid		gids[128];
+	struct mlx4_ib_dev     *dev;
+	int			port;
+};
+
+static struct workqueue_struct *wq;
+
 static void init_query_mad(struct ib_smp *mad)
 {
 	mad->base_version  = 1;
@@ -154,28 +167,19 @@ out:
 	return err;
 }
 
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
-			      struct ib_port_attr *props)
+static enum rdma_link_layer
+mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 {
-	struct ib_smp *in_mad  = NULL;
-	struct ib_smp *out_mad = NULL;
-	int err = -ENOMEM;
+	struct mlx4_dev *dev = to_mdev(device)->dev;
 
-	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-	if (!in_mad || !out_mad)
-		goto out;
-
-	memset(props, 0, sizeof *props);
-
-	init_query_mad(in_mad);
-	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
-	in_mad->attr_mod = cpu_to_be32(port);
-
-	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
-	if (err)
-		goto out;
+	return dev->caps.port_mask & (1 << (port_num - 1)) ?
+		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+}
 
+static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+			      struct ib_port_attr *props,
+			      struct ib_smp *out_mad)
+{
 	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
 	props->lmc		= out_mad->data[34] & 0x7;
 	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -196,6 +200,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 	props->max_vl_num	= out_mad->data[37] >> 4;
 	props->init_type_reply	= out_mad->data[41] >> 4;
 
+	return 0;
+}
+
+static u8 state_to_phys_state(enum ib_port_state state)
+{
+	return state == IB_PORT_ACTIVE ? 5 : 3;
+}
+
+static int eth_link_query_port(struct ib_device *ibdev, u8 port,
+			       struct ib_port_attr *props,
+			       struct ib_smp *out_mad)
+{
+	struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
+	struct net_device *ndev;
+	enum ib_mtu tmp;
+
+	props->active_width	= IB_WIDTH_4X;
+	props->active_speed	= 4;
+	props->port_cap_flags	= IB_PORT_CM_SUP;
+	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len[port];
+	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
+	props->pkey_tbl_len	= 1;
+	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
+	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
+	props->max_mtu		= IB_MTU_2048;
+	props->subnet_timeout	= 0;
+	props->max_vl_num	= out_mad->data[37] >> 4;
+	props->init_type_reply	= 0;
+	props->state		= IB_PORT_DOWN;
+	props->phys_state	= state_to_phys_state(props->state);
+	props->active_mtu	= IB_MTU_256;
+	spin_lock(&iboe->lock);
+	ndev = iboe->netdevs[port - 1];
+	if (!ndev)
+		goto out;
+
+	tmp = iboe_get_mtu(ndev->mtu);
+	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
+
+	props->state		= netif_running(ndev) &&  netif_oper_up(ndev) ?
+					IB_PORT_ACTIVE : IB_PORT_DOWN;
+	props->phys_state	= state_to_phys_state(props->state);
+
+out:
+	spin_unlock(&iboe->lock);
+	return 0;
+}
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+			      struct ib_port_attr *props)
+{
+	struct ib_smp *in_mad  = NULL;
+	struct ib_smp *out_mad = NULL;
+	int err = -ENOMEM;
+
+	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
+	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+	if (!in_mad || !out_mad)
+		goto out;
+
+	memset(props, 0, sizeof *props);
+
+	init_query_mad(in_mad);
+	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+	in_mad->attr_mod = cpu_to_be32(port);
+
+	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+	if (err)
+		goto out;
+
+	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
+		ib_link_query_port(ibdev, port, props, out_mad) :
+		eth_link_query_port(ibdev, port, props, out_mad);
+
 out:
 	kfree(in_mad);
 	kfree(out_mad);
@@ -203,8 +281,8 @@ out:
 	return err;
 }
 
-static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
-			     union ib_gid *gid)
+static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+			       union ib_gid *gid)
 {
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
@@ -241,6 +319,25 @@ out:
 	return err;
 }
 
+static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
+			  union ib_gid *gid)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibdev);
+
+	*gid = dev->iboe.gid_table[port - 1][index];
+
+	return 0;
+}
+
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+			     union ib_gid *gid)
+{
+	if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+		return __mlx4_ib_query_gid(ibdev, port, index, gid);
+	else
+		return iboe_query_gid(ibdev, port, index, gid);
+}
+
 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 			      u16 *pkey)
 {
@@ -289,6 +386,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	int err;
+	u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
 	if (IS_ERR(mailbox))
@@ -304,7 +402,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
 		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
 	}
 
-	err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
 		       MLX4_CMD_TIME_CLASS_B);
 
 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
@@ -447,18 +545,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
 	return 0;
 }
 
+static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
+{
+	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+	struct mlx4_ib_gid_entry *ge;
+
+	ge = kzalloc(sizeof *ge, GFP_KERNEL);
+	if (!ge)
+		return -ENOMEM;
+
+	ge->gid = *gid;
+	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
+		ge->port = mqp->port;
+		ge->added = 1;
+	}
+
+	mutex_lock(&mqp->mutex);
+	list_add_tail(&ge->list, &mqp->gid_list);
+	mutex_unlock(&mqp->mutex);
+
+	return 0;
+}
+
+int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+		   union ib_gid *gid)
+{
+	u8 mac[6];
+	struct net_device *ndev;
+	int ret = 0;
+
+	if (!mqp->port)
+		return 0;
+
+	spin_lock(&mdev->iboe.lock);
+	ndev = mdev->iboe.netdevs[mqp->port - 1];
+	if (ndev)
+		dev_hold(ndev);
+	spin_unlock(&mdev->iboe.lock);
+
+	if (ndev) {
+		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
+		rtnl_lock();
+		dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
+		ret = 1;
+		rtnl_unlock();
+		dev_put(ndev);
+	}
+
+	return ret;
+}
+
 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
-				     &to_mqp(ibqp)->mqp, gid->raw,
-				     !!(to_mqp(ibqp)->flags &
-					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+	int err;
+	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+
+	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
+				    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+	if (err)
+		return err;
+
+	err = add_gid_entry(ibqp, gid);
+	if (err)
+		goto err_add;
+
+	return 0;
+
+err_add:
+	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw);
+	return err;
+}
+
+static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+{
+	struct mlx4_ib_gid_entry *ge;
+	struct mlx4_ib_gid_entry *tmp;
+	struct mlx4_ib_gid_entry *ret = NULL;
+
+	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+		if (!memcmp(raw, ge->gid.raw, 16)) {
+			ret = ge;
+			break;
+		}
+	}
+
+	return ret;
 }
 
 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
-				     &to_mqp(ibqp)->mqp, gid->raw);
+	int err;
+	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+	u8 mac[6];
+	struct net_device *ndev;
+	struct mlx4_ib_gid_entry *ge;
+
+	err = mlx4_multicast_detach(mdev->dev,
+				    &mqp->mqp, gid->raw);
+	if (err)
+		return err;
+
+	mutex_lock(&mqp->mutex);
+	ge = find_gid_entry(mqp, gid->raw);
+	if (ge) {
+		spin_lock(&mdev->iboe.lock);
+		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
+		if (ndev)
+			dev_hold(ndev);
+		spin_unlock(&mdev->iboe.lock);
+		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
+		if (ndev) {
+			rtnl_lock();
+			dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
+			rtnl_unlock();
+			dev_put(ndev);
+		}
+		list_del(&ge->list);
+		kfree(ge);
+	} else
+		printk(KERN_WARNING "could not find mgid entry\n");
+
+	mutex_unlock(&mqp->mutex);
+
+	return 0;
 }
 
 static int init_node_data(struct mlx4_ib_dev *dev)
@@ -543,15 +755,143 @@ static struct device_attribute *mlx4_class_attributes[] = {
 	&dev_attr_board_id
 };
 
+static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+{
+	memcpy(eui, dev->dev_addr, 3);
+	memcpy(eui + 5, dev->dev_addr + 3, 3);
+	eui[3] = 0xFF;
+	eui[4] = 0xFE;
+	eui[0] ^= 2;
+}
+
+static void update_gids_task(struct work_struct *work)
+{
+	struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
+	struct mlx4_cmd_mailbox *mailbox;
+	union ib_gid *gids;
+	int err;
+	struct mlx4_dev	*dev = gw->dev->dev;
+	struct ib_event event;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
+		return;
+	}
+
+	gids = mailbox->buf;
+	memcpy(gids, gw->gids, sizeof gw->gids);
+
+	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+	if (err)
+		printk(KERN_WARNING "set port command failed\n");
+	else {
+		memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+		event.device = &gw->dev->ib_dev;
+		event.element.port_num = gw->port;
+		event.event    = IB_EVENT_LID_CHANGE;
+		ib_dispatch_event(&event);
+	}
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	kfree(gw);
+}
+
+static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+{
+	struct net_device *ndev = dev->iboe.netdevs[port - 1];
+	struct update_gid_work *work;
+
+	work = kzalloc(sizeof *work, GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;
+
+	if (!clear) {
+		mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev);
+		work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	}
+
+	INIT_WORK(&work->work, update_gids_task);
+	work->port = port;
+	work->dev = dev;
+	queue_work(wq, &work->work);
+
+	return 0;
+}
+
+static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		update_ipv6_gids(dev, port, 0);
+		break;
+
+	case NETDEV_DOWN:
+		update_ipv6_gids(dev, port, 1);
+		dev->iboe.netdevs[port - 1] = NULL;
+	}
+}
+
+static void netdev_added(struct mlx4_ib_dev *dev, int port)
+{
+	update_ipv6_gids(dev, port, 0);
+}
+
+static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+{
+	update_ipv6_gids(dev, port, 1);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+				void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct mlx4_ib_dev *ibdev;
+	struct net_device *oldnd;
+	struct mlx4_ib_iboe *iboe;
+	int port;
+
+	if (!net_eq(dev_net(dev), &init_net))
+		return NOTIFY_DONE;
+
+	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+	iboe = &ibdev->iboe;
+
+	spin_lock(&iboe->lock);
+	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
+		oldnd = iboe->netdevs[port - 1];
+		iboe->netdevs[port - 1] =
+			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port);
+		if (oldnd != iboe->netdevs[port - 1]) {
+			if (iboe->netdevs[port - 1])
+				netdev_added(ibdev, port);
+			else
+				netdev_removed(ibdev, port);
+		}
+	}
+
+	if (dev == iboe->netdevs[0])
+		handle_en_event(ibdev, 1, event);
+	else if (dev == iboe->netdevs[1])
+		handle_en_event(ibdev, 2, event);
+
+	spin_unlock(&iboe->lock);
+
+	return NOTIFY_DONE;
+}
+
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
 	struct mlx4_ib_dev *ibdev;
 	int num_ports = 0;
 	int i;
+	int err;
+	struct mlx4_ib_iboe *iboe;
 
 	printk_once(KERN_INFO "%s", mlx4_ib_version);
 
-	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+	mlx4_foreach_ib_transport_port(i, dev)
 		num_ports++;
 
 	/* No point in registering a device with no ports... */
@@ -564,6 +904,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 		return NULL;
 	}
 
+	iboe = &ibdev->iboe;
+
 	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
 		goto err_dealloc;
 
@@ -612,6 +954,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
 	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
+	ibdev->ib_dev.get_link_layer	= mlx4_ib_port_link_layer;
 	ibdev->ib_dev.query_gid		= mlx4_ib_query_gid;
 	ibdev->ib_dev.query_pkey	= mlx4_ib_query_pkey;
 	ibdev->ib_dev.modify_device	= mlx4_ib_modify_device;
@@ -656,6 +999,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
 	ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;
 
+	spin_lock_init(&iboe->lock);
+
 	if (init_node_data(ibdev))
 		goto err_map;
 
@@ -668,16 +1013,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	if (mlx4_ib_mad_init(ibdev))
 		goto err_reg;
 
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
+		iboe->nb.notifier_call = mlx4_ib_netdev_event;
+		err = register_netdevice_notifier(&iboe->nb);
+		if (err)
+			goto err_reg;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
 		if (device_create_file(&ibdev->ib_dev.dev,
 				       mlx4_class_attributes[i]))
-			goto err_reg;
+			goto err_notif;
 	}
 
 	ibdev->ib_active = true;
 
 	return ibdev;
 
+err_notif:
+	if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+		printk(KERN_WARNING "failure unregistering notifier\n");
+	flush_workqueue(wq);
+
 err_reg:
 	ib_unregister_device(&ibdev->ib_dev);
 
@@ -703,11 +1060,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 
 	mlx4_ib_mad_cleanup(ibdev);
 	ib_unregister_device(&ibdev->ib_dev);
+	if (ibdev->iboe.nb.notifier_call) {
+		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+			printk(KERN_WARNING "failure unregistering notifier\n");
+		ibdev->iboe.nb.notifier_call = NULL;
+	}
+	iounmap(ibdev->uar_map);
 
-	for (p = 1; p <= ibdev->num_ports; ++p)
+	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
 		mlx4_CLOSE_PORT(dev, p);
 
-	iounmap(ibdev->uar_map);
 	mlx4_uar_free(dev, &ibdev->priv_uar);
 	mlx4_pd_free(dev, ibdev->priv_pdn);
 	ib_dealloc_device(&ibdev->ib_dev);
@@ -747,19 +1109,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 }
 
 static struct mlx4_interface mlx4_ib_interface = {
-	.add	= mlx4_ib_add,
-	.remove	= mlx4_ib_remove,
-	.event	= mlx4_ib_event
+	.add		= mlx4_ib_add,
+	.remove		= mlx4_ib_remove,
+	.event		= mlx4_ib_event,
+	.protocol	= MLX4_PROTOCOL_IB
 };
 
 static int __init mlx4_ib_init(void)
 {
-	return mlx4_register_interface(&mlx4_ib_interface);
+	int err;
+
+	wq = create_singlethread_workqueue("mlx4_ib");
+	if (!wq)
+		return -ENOMEM;
+
+	err = mlx4_register_interface(&mlx4_ib_interface);
+	if (err) {
+		destroy_workqueue(wq);
+		return err;
+	}
+
+	return 0;
 }
 
 static void __exit mlx4_ib_cleanup(void)
 {
 	mlx4_unregister_interface(&mlx4_ib_interface);
+	destroy_workqueue(wq);
 }
 
 module_init(mlx4_ib_init);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 3486d7675e56..2a322f21049f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
 };
 
+struct mlx4_ib_gid_entry {
+	struct list_head	list;
+	union ib_gid		gid;
+	int			added;
+	u8			port;
+};
+
 struct mlx4_ib_qp {
 	struct ib_qp		ibqp;
 	struct mlx4_qp		mqp;
@@ -138,6 +145,8 @@ struct mlx4_ib_qp {
 	u8			resp_depth;
 	u8			sq_no_prefetch;
 	u8			state;
+	int			mlx_type;
+	struct list_head	gid_list;
 };
 
 struct mlx4_ib_srq {
@@ -157,7 +166,14 @@ struct mlx4_ib_srq {
 
 struct mlx4_ib_ah {
 	struct ib_ah		ibah;
-	struct mlx4_av		av;
+	union mlx4_ext_av       av;
+};
+
+struct mlx4_ib_iboe {
+	spinlock_t		lock;
+	struct net_device      *netdevs[MLX4_MAX_PORTS];
+	struct notifier_block 	nb;
+	union ib_gid		gid_table[MLX4_MAX_PORTS][128];
 };
 
 struct mlx4_ib_dev {
@@ -176,6 +192,7 @@ struct mlx4_ib_dev {
 
 	struct mutex		cap_mask_mutex;
 	bool			ib_active;
+	struct mlx4_ib_iboe	iboe;
 };
 
 static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
 int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
 int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
 
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
+			u8 *mac, int *is_mcast, u8 port);
+
 static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
-	return !!(ah->av.g_slid & 0x80);
+	u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
+
+	if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
+		return 1;
+
+	return !!(ah->av.ib.g_slid & 0x80);
 }
 
+int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+		   union ib_gid *gid);
+
 #endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bb1277c8fbf0..17f60fe6e5b6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -33,6 +33,7 @@
 
 #include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
@@ -48,17 +49,25 @@ enum {
 
 enum {
 	MLX4_IB_DEFAULT_SCHED_QUEUE	= 0x83,
-	MLX4_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f
+	MLX4_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f,
+	MLX4_IB_LINK_TYPE_IB		= 0,
+	MLX4_IB_LINK_TYPE_ETH		= 1
 };
 
 enum {
 	/*
-	 * Largest possible UD header: send with GRH and immediate data.
+	 * Largest possible UD header: send with GRH and immediate
+	 * data plus 14 bytes for an Ethernet header.  (LRH would only
+	 * use 8 bytes, so Ethernet is the biggest case)
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 72,
+	MLX4_IB_UD_HEADER_SIZE		= 78,
 	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
+enum {
+	MLX4_IB_IBOE_ETHERTYPE		= 0x8915
+};
+
 struct mlx4_ib_sqp {
 	struct mlx4_ib_qp	qp;
 	int			pkey_index;
@@ -462,6 +471,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
+	INIT_LIST_HEAD(&qp->gid_list);
 
 	qp->state	 = IB_QPS_RESET;
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -649,6 +659,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
 	}
 }
 
+static void del_gid_entries(struct mlx4_ib_qp *qp)
+{
+	struct mlx4_ib_gid_entry *ge, *tmp;
+
+	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+		list_del(&ge->list);
+		kfree(ge);
+	}
+}
+
 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 			      int is_user)
 {
@@ -695,6 +715,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 		if (!qp->ibqp.srq)
 			mlx4_db_free(dev->dev, &qp->db);
 	}
+
+	del_gid_entries(qp);
 }
 
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
@@ -852,6 +874,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 			 struct mlx4_qp_path *path, u8 port)
 {
+	int err;
+	int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
+		IB_LINK_LAYER_ETHERNET;
+	u8 mac[6];
+	int is_mcast;
+
 	path->grh_mylmc     = ah->src_path_bits & 0x7f;
 	path->rlid	    = cpu_to_be16(ah->dlid);
 	if (ah->static_rate) {
@@ -882,9 +910,35 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 	path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
 		((port - 1) << 6) | ((ah->sl & 0xf) << 2);
 
+	if (is_eth) {
+		if (!(ah->ah_flags & IB_AH_GRH))
+			return -1;
+
+		err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
+		if (err)
+			return err;
+
+		memcpy(path->dmac, mac, 6);
+		path->ackto = MLX4_IB_LINK_TYPE_ETH;
+		/* use index 0 into MAC table for IBoE */
+		path->grh_mylmc &= 0x80;
+	}
+
 	return 0;
 }
 
+static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+	struct mlx4_ib_gid_entry *ge, *tmp;
+
+	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+		if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
+			ge->added = 1;
+			ge->port = qp->port;
+		}
+	}
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 			       const struct ib_qp_attr *attr, int attr_mask,
 			       enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -980,7 +1034,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	}
 
 	if (attr_mask & IB_QP_TIMEOUT) {
-		context->pri_path.ackto = attr->timeout << 3;
+		context->pri_path.ackto |= attr->timeout << 3;
 		optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
 	}
 
@@ -1118,8 +1172,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		qp->atomic_rd_en = attr->qp_access_flags;
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 		qp->resp_depth = attr->max_dest_rd_atomic;
-	if (attr_mask & IB_QP_PORT)
+	if (attr_mask & IB_QP_PORT) {
 		qp->port = attr->port_num;
+		update_mcg_macs(dev, qp);
+	}
 	if (attr_mask & IB_QP_ALT_PATH)
 		qp->alt_port = attr->alt_port_num;
 
@@ -1226,35 +1282,45 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 	int header_size;
 	int spc;
 	int i;
+	int is_eth;
+	int is_grh;
 
 	send_size = 0;
 	for (i = 0; i < wr->num_sge; ++i)
 		send_size += wr->sg_list[i].length;
 
-	ib_ud_header_init(send_size, 1, 0, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header);
+	is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+	is_grh = mlx4_ib_ah_grh_present(ah);
+	ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header);
+
+	if (!is_eth) {
+		sqp->ud_header.lrh.service_level =
+			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+		sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
+		sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+	}
 
-	sqp->ud_header.lrh.service_level   =
-		be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
-	sqp->ud_header.lrh.destination_lid = ah->av.dlid;
-	sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid & 0x7f);
-	if (mlx4_ib_ah_grh_present(ah)) {
+	if (is_grh) {
 		sqp->ud_header.grh.traffic_class =
-			(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
+			(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
 		sqp->ud_header.grh.flow_label    =
-			ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
-		sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;
-		ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
-				  ah->av.gid_index, &sqp->ud_header.grh.source_gid);
+			ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+		sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
+		ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
+				  ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
 		memcpy(sqp->ud_header.grh.destination_gid.raw,
-		       ah->av.dgid, 16);
+		       ah->av.ib.dgid, 16);
 	}
 
 	mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-	mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
-				  (sqp->ud_header.lrh.destination_lid ==
-				   IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
-				  (sqp->ud_header.lrh.service_level << 8));
-	mlx->rlid   = sqp->ud_header.lrh.destination_lid;
+
+	if (!is_eth) {
+		mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+					  (sqp->ud_header.lrh.destination_lid ==
+					   IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+					  (sqp->ud_header.lrh.service_level << 8));
+		mlx->rlid = sqp->ud_header.lrh.destination_lid;
+	}
 
 	switch (wr->opcode) {
 	case IB_WR_SEND:
@@ -1270,9 +1336,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 		return -EINVAL;
 	}
 
-	sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
-	if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
-		sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+	if (is_eth) {
+		u8 *smac;
+
+		memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
+		/* FIXME: cache smac value? */
+		smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr;
+		memcpy(sqp->ud_header.eth.smac_h, smac, 6);
+		if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
+			mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+		sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+	} else {
+		sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
+		if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+			sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+	}
 	sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
 	if (!sqp->qp.ibqp.qp_num)
 		ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
@@ -1434,6 +1512,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 	dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+	dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
+	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
 }
 
 static void set_mlx_icrc_seg(void *dseg)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 47e163ad3d11..ca5645c43f61 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -374,6 +374,27 @@ struct mlx4_av {
 	u8			dgid[16];
 };
 
+struct mlx4_eth_av {
+	__be32		port_pd;
+	u8		reserved1;
+	u8		smac_idx;
+	u16		reserved2;
+	u8		reserved3;
+	u8		gid_index;
+	u8		stat_rate;
+	u8		hop_limit;
+	__be32		sl_tclass_flowlabel;
+	u8		dgid[16];
+	u32		reserved4[2];
+	__be16		vlan;
+	u8		mac[6];
+};
+
+union mlx4_ext_av {
+	struct mlx4_av		ib;
+	struct mlx4_eth_av	eth;
+};
+
 struct mlx4_dev {
 	struct pci_dev	       *pdev;
 	unsigned long		flags;
@@ -402,6 +423,12 @@ struct mlx4_init_port_param {
 		if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \
 		     ~(dev)->caps.port_mask) & 1 << ((port) - 1))
 
+#define mlx4_foreach_ib_transport_port(port, dev)			\
+	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
+		if (((dev)->caps.port_mask & 1 << ((port) - 1)) ||	\
+		    ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+
+
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
-- 
cgit v1.2.3


From af7bd463761c6abd8ca8d831f9cc0ac19f3b7d4b Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 26 Aug 2010 17:18:59 +0300
Subject: IB/core: Add VLAN support for IBoE

Add 802.1q VLAN support to IBoE. The VLAN tag is encoded within the
GID derived from a link local address in the following way:

    GID[11] GID[12] contain the VLAN ID when the GID contains a VLAN.

The 3 bits user priority field of the packets are identical to the 3
bits of the SL.

In case of rdma_cm apps, the TOS field is used to generate the SL
field by doing a shift right of 5 bits effectively taking to 3 MS bits
of the TOS field.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c          | 20 +++++++++-------
 drivers/infiniband/core/ucma.c         | 12 ++++++++--
 drivers/infiniband/core/ud_header.c    | 23 ++++++++++++++++++
 drivers/infiniband/hw/mlx4/qp.c        |  2 +-
 drivers/infiniband/hw/mthca/mthca_qp.c |  2 +-
 include/rdma/ib_addr.h                 | 43 ++++++++++++++++++++++++++++++----
 include/rdma/ib_pack.h                 |  9 +++++++
 7 files changed, 95 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f61bc0738488..6884da24fde1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1791,6 +1791,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
 	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
 	struct net_device *ndev = NULL;
+	u16 vid;
 
 	if (src_addr->sin_family != dst_addr->sin_family)
 		return -EINVAL;
@@ -1810,14 +1811,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
 	route->num_paths = 1;
 
-	iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr);
-	iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr);
-
-	route->path_rec->hop_limit = 1;
-	route->path_rec->reversible = 1;
-	route->path_rec->pkey = cpu_to_be16(0xffff);
-	route->path_rec->mtu_selector = IB_SA_EQ;
-
 	if (addr->dev_addr.bound_dev_if)
 		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
 	if (!ndev) {
@@ -1825,6 +1818,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 		goto err2;
 	}
 
+	vid = rdma_vlan_dev_vlan_id(ndev);
+
+	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
+	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+
+	route->path_rec->hop_limit = 1;
+	route->path_rec->reversible = 1;
+	route->path_rec->pkey = cpu_to_be16(0xffff);
+	route->path_rec->mtu_selector = IB_SA_EQ;
+	route->path_rec->sl = id_priv->tos >> 5;
+
 	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
 	route->path_rec->rate_selector = IB_SA_EQ;
 	route->path_rec->rate = iboe_get_rate(ndev);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 3d3c9264c450..357a766bd220 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -587,13 +587,21 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 				 struct rdma_route *route)
 {
 	struct rdma_dev_addr *dev_addr;
+	struct net_device *dev;
+	u16 vid = 0;
 
 	resp->num_paths = route->num_paths;
 	switch (route->num_paths) {
 	case 0:
 		dev_addr = &route->addr.dev_addr;
-		iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
-			       dev_addr->dst_dev_addr);
+		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+			if (dev) {
+				vid = rdma_vlan_dev_vlan_id(dev);
+				dev_put(dev);
+			}
+
+		iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
+				    dev_addr->dst_dev_addr, vid);
 		iboe_addr_get_sgid(dev_addr,
 				   (union ib_gid *) &resp->ib_route[0].sgid);
 		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index cb0dd5ae2777..bb7e19280821 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -33,6 +33,7 @@
 
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/if_ether.h>
 
 #include <rdma/ib_pack.h>
 
@@ -103,6 +104,17 @@ static const struct ib_field eth_table[]  = {
 	  .size_bits    = 16 }
 };
 
+static const struct ib_field vlan_table[]  = {
+	{ STRUCT_FIELD(vlan, tag),
+	  .offset_words = 0,
+	  .offset_bits  = 0,
+	  .size_bits    = 16 },
+	{ STRUCT_FIELD(vlan, type),
+	  .offset_words = 0,
+	  .offset_bits  = 16,
+	  .size_bits    = 16 }
+};
+
 static const struct ib_field grh_table[]  = {
 	{ STRUCT_FIELD(grh, ip_version),
 	  .offset_words = 0,
@@ -205,6 +217,7 @@ static const struct ib_field deth_table[] = {
  * @payload_bytes:Length of packet payload
  * @lrh_present: specify if LRH is present
  * @eth_present: specify if Eth header is present
+ * @vlan_present: packet is tagged vlan
  * @grh_present:GRH flag (if non-zero, GRH will be included)
  * @immediate_present: specify if immediate data is present
  * @header:Structure to initialize
@@ -212,6 +225,7 @@ static const struct ib_field deth_table[] = {
 void ib_ud_header_init(int     		    payload_bytes,
 		       int		    lrh_present,
 		       int		    eth_present,
+		       int		    vlan_present,
 		       int    		    grh_present,
 		       int		    immediate_present,
 		       struct ib_ud_header *header)
@@ -234,6 +248,9 @@ void ib_ud_header_init(int     		    payload_bytes,
 		header->lrh.packet_length = cpu_to_be16(packet_length);
 	}
 
+	if (vlan_present)
+		header->eth.type = cpu_to_be16(ETH_P_8021Q);
+
 	if (grh_present) {
 		header->grh.ip_version      = 6;
 		header->grh.payload_length  =
@@ -254,6 +271,7 @@ void ib_ud_header_init(int     		    payload_bytes,
 
 	header->lrh_present = lrh_present;
 	header->eth_present = eth_present;
+	header->vlan_present = vlan_present;
 	header->grh_present = grh_present;
 	header->immediate_present = immediate_present;
 }
@@ -312,6 +330,11 @@ int ib_ud_header_pack(struct ib_ud_header *header,
 			&header->eth, buf + len);
 		len += IB_ETH_BYTES;
 	}
+	if (header->vlan_present) {
+		ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
+			&header->vlan, buf + len);
+		len += IB_VLAN_BYTES;
+	}
 	if (header->grh_present) {
 		ib_pack(grh_table, ARRAY_SIZE(grh_table),
 			&header->grh, buf + len);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 17f60fe6e5b6..269648445113 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1291,7 +1291,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 
 	is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
 	is_grh = mlx4_ib_ah_grh_present(ah);
-	ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header);
+	ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header);
 
 	if (!is_eth) {
 		sqp->ud_header.lrh.service_level =
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 1a1c55fb13f3..a34c9d38e822 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
 	int err;
 	u16 pkey;
 
-	ib_ud_header_init(256, /* assume a MAD */ 1, 0,
+	ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
 			  mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
 			  &sqp->ud_header);
 
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 904ffa92fc93..b5fc9f39122b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -39,6 +39,7 @@
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
+#include <linux/if_vlan.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 
@@ -129,21 +130,41 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 
-static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac)
+static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
 {
 	memset(gid->raw, 0, 16);
 	*((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
-	gid->raw[12] = 0xfe;
-	gid->raw[11] = 0xff;
+	if (vid < 0x1000) {
+		gid->raw[12] = vid & 0xff;
+		gid->raw[11] = vid >> 8;
+	} else {
+		gid->raw[12] = 0xfe;
+		gid->raw[11] = 0xff;
+	}
 	memcpy(gid->raw + 13, mac + 3, 3);
 	memcpy(gid->raw + 8, mac, 3);
 	gid->raw[8] ^= 2;
 }
 
+static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_802_1Q_VLAN ?
+		vlan_dev_vlan_id(dev) : 0xffff;
+}
+
 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 				      union ib_gid *gid)
 {
-	iboe_mac_to_ll(gid, dev_addr->src_dev_addr);
+	struct net_device *dev;
+	u16 vid = 0xffff;
+
+	dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+	if (dev) {
+		vid = rdma_vlan_dev_vlan_id(dev);
+		dev_put(dev);
+	}
+
+	iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid);
 }
 
 static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
@@ -244,4 +265,18 @@ static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
 		mac[i] = addr->s6_addr[i + 10];
 }
 
+static inline u16 rdma_get_vlan_id(union ib_gid *dgid)
+{
+	u16 vid;
+
+	vid = dgid->raw[11] << 8 | dgid->raw[12];
+	return vid < 0x1000 ? vid : 0xffff;
+}
+
+static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_802_1Q_VLAN ?
+		vlan_dev_real_dev(dev) : 0;
+}
+
 #endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index 6b91d8e7a1fa..b37fe3b10a9d 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -38,6 +38,7 @@
 enum {
 	IB_LRH_BYTES  = 8,
 	IB_ETH_BYTES  = 14,
+	IB_VLAN_BYTES = 4,
 	IB_GRH_BYTES  = 40,
 	IB_BTH_BYTES  = 12,
 	IB_DETH_BYTES = 8
@@ -219,11 +220,18 @@ struct ib_unpacked_eth {
 	__be16	type;
 };
 
+struct ib_unpacked_vlan {
+	__be16  tag;
+	__be16  type;
+};
+
 struct ib_ud_header {
 	int                     lrh_present;
 	struct ib_unpacked_lrh  lrh;
 	int			eth_present;
 	struct ib_unpacked_eth	eth;
+	int                     vlan_present;
+	struct ib_unpacked_vlan vlan;
 	int			grh_present;
 	struct ib_unpacked_grh	grh;
 	struct ib_unpacked_bth	bth;
@@ -245,6 +253,7 @@ void ib_unpack(const struct ib_field        *desc,
 void ib_ud_header_init(int		    payload_bytes,
 		       int		    lrh_present,
 		       int		    eth_present,
+		       int		    vlan_present,
 		       int		    grh_present,
 		       int		    immediate_present,
 		       struct ib_ud_header *header);
-- 
cgit v1.2.3


From 4c3eb3ca13966508bcb64f39dcdef48be22f1731 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 26 Aug 2010 17:19:22 +0300
Subject: IB/mlx4: Add VLAN support for IBoE

This patch allows IBoE traffic to be encapsulated in 802.1Q tagged
VLAN frames.  The VLAN tag is encoded in the GID and derived from it
by a simple computation.

The netdev notifier callback is modified to catch VLAN device
addition/removal and the port's GID table is updated to reflect the
change, so that for each netdevice there is an entry in the GID table.
When the port's GID table is exhausted, GID entries will not be added.
Only children of the main interfaces can add to the GID table; if a
VLAN interface is added on another VLAN interface (e.g. "vconfig add
eth2.6 8"), then that interfaces will not add an entry to the GID
table.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/ah.c   | 10 ++++
 drivers/infiniband/hw/mlx4/main.c | 99 ++++++++++++++++++++++++++++++++++-----
 drivers/infiniband/hw/mlx4/qp.c   | 79 +++++++++++++++++++++++++------
 drivers/net/mlx4/en_netdev.c      | 10 ++++
 drivers/net/mlx4/mlx4_en.h        |  1 +
 drivers/net/mlx4/port.c           | 19 ++++++++
 include/linux/mlx4/device.h       |  1 +
 include/linux/mlx4/qp.h           |  2 +-
 8 files changed, 193 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 3bf3544c0aa0..4b8f9c49397e 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -31,6 +31,7 @@
  */
 
 #include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
 
 #include <linux/slab.h>
 #include <linux/inet.h>
@@ -91,17 +92,26 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
 	struct mlx4_dev *dev = ibdev->dev;
+	union ib_gid sgid;
 	u8 mac[6];
 	int err;
 	int is_mcast;
+	u16 vlan_tag;
 
 	err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
 	if (err)
 		return ERR_PTR(err);
 
 	memcpy(ah->av.eth.mac, mac, 6);
+	err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
+	if (err)
+		return ERR_PTR(err);
+	vlan_tag = rdma_get_vlan_id(&sgid);
+	if (vlan_tag < 0x1000)
+		vlan_tag |= (ah_attr->sl & 7) << 13;
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.eth.gid_index = ah_attr->grh.sgid_index;
+	ah->av.eth.vlan = cpu_to_be16(vlan_tag);
 	if (ah_attr->static_rate) {
 		ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
 		while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e65db73fc277..8736bd836dc0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -38,6 +38,7 @@
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
@@ -79,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad)
 	mad->method	   = IB_MGMT_METHOD_GET;
 }
 
+static union ib_gid zgid;
+
 static int mlx4_ib_query_device(struct ib_device *ibdev,
 				struct ib_device_attr *props)
 {
@@ -755,12 +758,17 @@ static struct device_attribute *mlx4_class_attributes[] = {
 	&dev_attr_board_id
 };
 
-static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
 {
 	memcpy(eui, dev->dev_addr, 3);
 	memcpy(eui + 5, dev->dev_addr + 3, 3);
-	eui[3] = 0xFF;
-	eui[4] = 0xFE;
+	if (vlan_id < 0x1000) {
+		eui[3] = vlan_id >> 8;
+		eui[4] = vlan_id & 0xff;
+	} else {
+		eui[3] = 0xff;
+		eui[4] = 0xfe;
+	}
 	eui[0] ^= 2;
 }
 
@@ -802,28 +810,93 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
 {
 	struct net_device *ndev = dev->iboe.netdevs[port - 1];
 	struct update_gid_work *work;
+	struct net_device *tmp;
+	int i;
+	u8 *hits;
+	int ret;
+	union ib_gid gid;
+	int free;
+	int found;
+	int need_update = 0;
+	u16 vid;
 
 	work = kzalloc(sizeof *work, GFP_ATOMIC);
 	if (!work)
 		return -ENOMEM;
 
-	if (!clear) {
-		mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev);
-		work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	hits = kzalloc(128, GFP_ATOMIC);
+	if (!hits) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	read_lock(&dev_base_lock);
+	for_each_netdev(&init_net, tmp) {
+		if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
+			gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+			vid = rdma_vlan_dev_vlan_id(tmp);
+			mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
+			found = 0;
+			free = -1;
+			for (i = 0; i < 128; ++i) {
+				if (free < 0 &&
+				    !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
+					free = i;
+				if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
+					hits[i] = 1;
+					found = 1;
+					break;
+				}
+			}
+
+			if (!found) {
+				if (tmp == ndev &&
+				    (memcmp(&dev->iboe.gid_table[port - 1][0],
+					    &gid, sizeof gid) ||
+				     !memcmp(&dev->iboe.gid_table[port - 1][0],
+					     &zgid, sizeof gid))) {
+					dev->iboe.gid_table[port - 1][0] = gid;
+					++need_update;
+					hits[0] = 1;
+				} else if (free >= 0) {
+					dev->iboe.gid_table[port - 1][free] = gid;
+					hits[free] = 1;
+					++need_update;
+				}
+			}
+		}
 	}
+	read_unlock(&dev_base_lock);
+
+	for (i = 0; i < 128; ++i)
+		if (!hits[i]) {
+			if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
+				++need_update;
+			dev->iboe.gid_table[port - 1][i] = zgid;
+		}
 
-	INIT_WORK(&work->work, update_gids_task);
-	work->port = port;
-	work->dev = dev;
-	queue_work(wq, &work->work);
+	if (need_update) {
+		memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
+		INIT_WORK(&work->work, update_gids_task);
+		work->port = port;
+		work->dev = dev;
+		queue_work(wq, &work->work);
+	} else
+		kfree(work);
 
+	kfree(hits);
 	return 0;
+
+out:
+	kfree(work);
+	return ret;
 }
 
 static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
 {
 	switch (event) {
 	case NETDEV_UP:
+	case NETDEV_CHANGEADDR:
 		update_ipv6_gids(dev, port, 0);
 		break;
 
@@ -871,9 +944,11 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
 		}
 	}
 
-	if (dev == iboe->netdevs[0])
+	if (dev == iboe->netdevs[0] ||
+	    (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
 		handle_en_event(ibdev, 1, event);
-	else if (dev == iboe->netdevs[1])
+	else if (dev == iboe->netdevs[1]
+		 || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
 		handle_en_event(ibdev, 2, event);
 
 	spin_unlock(&iboe->lock);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 269648445113..9a7794ac34c1 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -37,6 +37,7 @@
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
+#include <rdma/ib_addr.h>
 
 #include <linux/mlx4/qp.h>
 
@@ -57,10 +58,11 @@ enum {
 enum {
 	/*
 	 * Largest possible UD header: send with GRH and immediate
-	 * data plus 14 bytes for an Ethernet header.  (LRH would only
-	 * use 8 bytes, so Ethernet is the biggest case)
+	 * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+	 * tag.  (LRH would only use 8 bytes, so Ethernet is the
+	 * biggest case)
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 78,
+	MLX4_IB_UD_HEADER_SIZE		= 82,
 	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
@@ -879,6 +881,8 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 		IB_LINK_LAYER_ETHERNET;
 	u8 mac[6];
 	int is_mcast;
+	u16 vlan_tag;
+	int vidx;
 
 	path->grh_mylmc     = ah->src_path_bits & 0x7f;
 	path->rlid	    = cpu_to_be16(ah->dlid);
@@ -907,10 +911,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 		memcpy(path->rgid, ah->grh.dgid.raw, 16);
 	}
 
-	path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
-		((port - 1) << 6) | ((ah->sl & 0xf) << 2);
-
 	if (is_eth) {
+		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+			((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1);
+
 		if (!(ah->ah_flags & IB_AH_GRH))
 			return -1;
 
@@ -922,7 +926,18 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 		path->ackto = MLX4_IB_LINK_TYPE_ETH;
 		/* use index 0 into MAC table for IBoE */
 		path->grh_mylmc &= 0x80;
-	}
+
+		vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+		if (vlan_tag < 0x1000) {
+			if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
+				return -ENOENT;
+
+			path->vlan_index = vidx;
+			path->fl = 1 << 6;
+		}
+	} else
+		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+			((port - 1) << 6) | ((ah->sl & 0xf) << 2);
 
 	return 0;
 }
@@ -1277,13 +1292,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 	struct mlx4_wqe_mlx_seg *mlx = wqe;
 	struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
 	struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+	union ib_gid sgid;
 	u16 pkey;
 	int send_size;
 	int header_size;
 	int spc;
 	int i;
 	int is_eth;
+	int is_vlan = 0;
 	int is_grh;
+	u16 vlan;
 
 	send_size = 0;
 	for (i = 0; i < wr->num_sge; ++i)
@@ -1291,7 +1309,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 
 	is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
 	is_grh = mlx4_ib_ah_grh_present(ah);
-	ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header);
+	if (is_eth) {
+		ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
+				  ah->av.ib.gid_index, &sgid);
+		vlan = rdma_get_vlan_id(&sgid);
+		is_vlan = vlan < 0x1000;
+	}
+	ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
 
 	if (!is_eth) {
 		sqp->ud_header.lrh.service_level =
@@ -1345,7 +1369,15 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 		memcpy(sqp->ud_header.eth.smac_h, smac, 6);
 		if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
 			mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
-		sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+		if (!is_vlan) {
+			sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+		} else {
+			u16 pcp;
+
+			sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+			pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13;
+			sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
+		}
 	} else {
 		sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
 		if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
@@ -1507,13 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-			     struct ib_send_wr *wr)
+			     struct ib_send_wr *wr, __be16 *vlan)
 {
 	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 	dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
 	dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
 	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+	*vlan = dseg->vlan;
 }
 
 static void set_mlx_icrc_seg(void *dseg)
@@ -1616,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	__be32 uninitialized_var(lso_hdr_sz);
 	__be32 blh;
 	int i;
+	__be16 vlan = cpu_to_be16(0xffff);
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -1719,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			break;
 
 		case IB_QPT_UD:
-			set_datagram_seg(wqe, wr);
+			set_datagram_seg(wqe, wr, &vlan);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
@@ -1797,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
 			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
+		if (be16_to_cpu(vlan) < 0x1000) {
+			ctrl->ins_vlan = 1 << 6;
+			ctrl->vlan_tag = vlan;
+		}
+
 		stamp = ind + qp->sq_spare_wqes;
 		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
 
@@ -1946,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags)
 	return ib_flags;
 }
 
-static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
 				struct mlx4_qp_path *path)
 {
+	struct mlx4_dev *dev = ibdev->dev;
+	int is_eth;
+
 	memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
 	ib_ah_attr->port_num	  = path->sched_queue & 0x40 ? 2 : 1;
 
 	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
 		return;
 
+	is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
+		IB_LINK_LAYER_ETHERNET;
+	if (is_eth)
+		ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
+		((path->sched_queue & 4) << 1);
+	else
+		ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+
 	ib_ah_attr->dlid	  = be16_to_cpu(path->rlid);
-	ib_ah_attr->sl		  = (path->sched_queue >> 2) & 0xf;
 	ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
 	ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
 	ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
@@ -2009,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 		to_ib_qp_access_flags(be32_to_cpu(context.params2));
 
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
-		to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
-		to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
+		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
 		qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
 		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
 	}
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index a0d8a26f5a02..9a87c4f3bbbd 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -69,6 +69,7 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int err;
+	int idx;
 
 	if (!priv->vlgrp)
 		return;
@@ -83,7 +84,10 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		if (err)
 			en_err(priv, "Failed configuring VLAN filter\n");
 	}
+	if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
+		en_err(priv, "failed adding vlan %d\n", vid);
 	mutex_unlock(&mdev->state_lock);
+
 }
 
 static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
@@ -91,6 +95,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int err;
+	int idx;
 
 	if (!priv->vlgrp)
 		return;
@@ -101,6 +106,11 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 
 	/* Remove VID from port VLAN filter */
 	mutex_lock(&mdev->state_lock);
+	if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx))
+		mlx4_unregister_vlan(mdev->dev, priv->port, idx);
+	else
+		en_err(priv, "could not find vid %d in cache\n", vid);
+
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp);
 		if (err)
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 449210994ee9..dab5eafb8946 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -463,6 +463,7 @@ struct mlx4_en_priv {
 	char *mc_addrs;
 	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
+	int vids[128];
 };
 
 
diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
index 606aa58afdea..56371ef328ef 100644
--- a/drivers/net/mlx4/port.c
+++ b/drivers/net/mlx4/port.c
@@ -182,6 +182,25 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 	return err;
 }
 
+int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
+{
+	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+	int i;
+
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; ++i) {
+		if (table->refs[i] &&
+		    (vid == (MLX4_VLAN_MASK &
+			      be32_to_cpu(table->entries[i])))) {
+			/* VLAN already registered, increase reference count */
+			*idx = i;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
+
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ca5645c43f61..ff9893a33e90 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -496,6 +496,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
 
+int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
 
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 97cfdc8d7e2f..0eeb2a1a867c 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -109,7 +109,7 @@ struct mlx4_qp_path {
 	__be32			tclass_flowlabel;
 	u8			rgid[16];
 	u8			sched_queue;
-	u8			snooper_flags;
+	u8			vlan_index;
 	u8			reserved3[2];
 	u8			counter_index;
 	u8			reserved4;
-- 
cgit v1.2.3


From b616b09afabf6d569aa31176aa86f05d2586de3e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 24 Oct 2010 21:31:58 +0000
Subject: vlan: rcu annotations

(struct net_device)->vlgrp is rcu protected :

add __rcu annotation and proper rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/8021q/vlan.c          | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fcd3dda86322..ceefb441c236 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -944,7 +944,7 @@ struct net_device {
 	/* Protocol specific pointers */
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-	struct vlan_group	*vlgrp;		/* VLAN group */
+	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
 #ifdef CONFIG_NET_DSA
 	void			*dsa_ptr;	/* dsa specific data */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 05b867e43757..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	ASSERT_RTNL();
 
-	grp = real_dev->vlgrp;
+	grp = rtnl_dereference(real_dev->vlgrp);
 	BUG_ON(!grp);
 
 	/* Take it out of our own structures, but be sure to interlock with
@@ -177,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
 
-	grp = real_dev->vlgrp;
+	grp = rtnl_dereference(real_dev->vlgrp);
 	if (!grp) {
 		ngrp = grp = vlan_group_alloc(real_dev);
 		if (!grp)
@@ -385,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
 	}
 
-	grp = dev->vlgrp;
+	grp = rtnl_dereference(dev->vlgrp);
 	if (!grp)
 		goto out;
 
-- 
cgit v1.2.3


From 198caeca3eb4c81bbdbeb34a870868002f89b3d2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 24 Oct 2010 21:32:05 +0000
Subject: ipv6: ip6_ptr rcu annotations

(struct net_device)->ip6_ptr is rcu protected :

add __rcu annotation and proper rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/core/dev.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ceefb441c236..4722d4a9b58f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -952,7 +952,7 @@ struct net_device {
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
 	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
 	void                    *dn_ptr;        /* DECnet specific data */
-	void                    *ip6_ptr;       /* IPv6 specific data */
+	struct inet6_dev __rcu	*ip6_ptr;       /* IPv6 specific data */
 	void			*ec_ptr;	/* Econet specific data	*/
 	void			*ax25_ptr;	/* AX.25 specific data */
 	struct wireless_dev	*ieee80211_ptr;	/* IEEE 802.11 specific data,
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c7da3ab4684..365f7f5077e4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5416,7 +5416,7 @@ void netdev_run_todo(void)
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
 		WARN_ON(rcu_dereference_raw(dev->ip_ptr));
-		WARN_ON(dev->ip6_ptr);
+		WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
 		if (dev->destructor)
-- 
cgit v1.2.3


From 3cc77ec74e1583b50b8405114cdbd6b8ebb8c474 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 24 Oct 2010 21:32:36 +0000
Subject: net/802: add __rcu annotations

(struct net_device)->garp_port is rcu protected :
(struct garp_port)->applicants is rcu protected :

add __rcu annotation and proper rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/garp.h        |  2 +-
 net/802/garp.c            | 18 +++++++++---------
 net/802/stp.c             |  4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4722d4a9b58f..b72d5a460903 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1072,7 +1072,7 @@ struct net_device {
 		struct pcpu_dstats __percpu	*dstats; /* dummy stats */
 	};
 	/* GARP */
-	struct garp_port	*garp_port;
+	struct garp_port __rcu	*garp_port;
 
 	/* class/net/name entry */
 	struct device		dev;
diff --git a/include/net/garp.h b/include/net/garp.h
index 825f172caba9..f4c295984c45 100644
--- a/include/net/garp.h
+++ b/include/net/garp.h
@@ -107,7 +107,7 @@ struct garp_applicant {
 };
 
 struct garp_port {
-	struct garp_applicant	*applicants[GARP_APPLICATION_MAX + 1];
+	struct garp_applicant __rcu	*applicants[GARP_APPLICATION_MAX + 1];
 };
 
 extern int	garp_register_application(struct garp_application *app);
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
 		      const struct garp_application *appl,
 		      const void *data, u8 len, u8 type)
 {
-	struct garp_port *port = dev->garp_port;
-	struct garp_applicant *app = port->applicants[appl->type];
+	struct garp_port *port = rtnl_dereference(dev->garp_port);
+	struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
 	struct garp_attr *attr;
 
 	spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
 			const struct garp_application *appl,
 			const void *data, u8 len, u8 type)
 {
-	struct garp_port *port = dev->garp_port;
-	struct garp_applicant *app = port->applicants[appl->type];
+	struct garp_port *port = rtnl_dereference(dev->garp_port);
+	struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
 	struct garp_attr *attr;
 
 	spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
 
 static void garp_release_port(struct net_device *dev)
 {
-	struct garp_port *port = dev->garp_port;
+	struct garp_port *port = rtnl_dereference(dev->garp_port);
 	unsigned int i;
 
 	for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
-		if (port->applicants[i])
+		if (rtnl_dereference(port->applicants[i]))
 			return;
 	}
 	rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
 
 	ASSERT_RTNL();
 
-	if (!dev->garp_port) {
+	if (!rtnl_dereference(dev->garp_port)) {
 		err = garp_init_port(dev);
 		if (err < 0)
 			goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
 
 void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
 {
-	struct garp_port *port = dev->garp_port;
-	struct garp_applicant *app = port->applicants[appl->type];
+	struct garp_port *port = rtnl_dereference(dev->garp_port);
+	struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
 
 	ASSERT_RTNL();
 
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
 #define GARP_ADDR_MAX	0x2F
 #define GARP_ADDR_RANGE	(GARP_ADDR_MAX - GARP_ADDR_MIN)
 
-static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
-static const struct stp_proto *stp_proto __read_mostly;
+static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
+static const struct stp_proto __rcu *stp_proto __read_mostly;
 
 static struct llc_sap *sap __read_mostly;
 static unsigned int sap_registered;
-- 
cgit v1.2.3


From 6f0bcf152582e7403155627a38e07bf3ef7f3cf5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 24 Oct 2010 21:33:16 +0000
Subject: tunnels: add _rcu annotations

(struct ip6_tnl)->next is rcu protected :
(struct ip_tunnel)->next is rcu protected :
(struct xfrm6_tunnel)->next is rcu protected :

add __rcu annotation and proper rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h |  2 +-
 include/net/ipip.h       |  2 +-
 include/net/xfrm.h       |  2 +-
 net/ipv4/gre.c           |  5 +++--
 net/ipv6/tunnel6.c       | 24 +++++++++++++++---------
 5 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fc94ec568a50..fc73e667b50e 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -13,7 +13,7 @@
 /* IPv6 tunnel */
 
 struct ip6_tnl {
-	struct ip6_tnl *next;	/* next tunnel in list */
+	struct ip6_tnl __rcu *next;	/* next tunnel in list */
 	struct net_device *dev;	/* virtual device associated with tunnel */
 	struct ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 58abbf966b0c..0403fe4c4519 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -16,7 +16,7 @@ struct ip_tunnel_6rd_parm {
 };
 
 struct ip_tunnel {
-	struct ip_tunnel	*next;
+	struct ip_tunnel __rcu	*next;
 	struct net_device	*dev;
 
 	int			err_count;	/* Number of arrived ICMP errors */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f28d7c9b9f8d..ffcd47820a5b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1272,7 +1272,7 @@ struct xfrm6_tunnel {
 	int (*handler)(struct sk_buff *skb);
 	int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			   u8 type, u8 code, int offset, __be32 info);
-	struct xfrm6_tunnel *next;
+	struct xfrm6_tunnel __rcu *next;
 	int priority;
 };
 
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
 #include <net/gre.h>
 
 
-static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
 		goto err_out;
 
 	spin_lock(&gre_proto_lock);
-	if (gre_proto[version] != proto)
+	if (rcu_dereference_protected(gre_proto[version],
+			lockdep_is_held(&gre_proto_lock)) != proto)
 		goto err_out_unlock;
 	rcu_assign_pointer(gre_proto[version], NULL);
 	spin_unlock(&gre_proto_lock);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index d9864725d0c6..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,23 +30,26 @@
 #include <net/protocol.h>
 #include <net/xfrm.h>
 
-static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
-static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel6_mutex);
 
 int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
 {
-	struct xfrm6_tunnel **pprev;
+	struct xfrm6_tunnel __rcu **pprev;
+	struct xfrm6_tunnel *t;
 	int ret = -EEXIST;
 	int priority = handler->priority;
 
 	mutex_lock(&tunnel6_mutex);
 
 	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
-	     *pprev; pprev = &(*pprev)->next) {
-		if ((*pprev)->priority > priority)
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel6_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t->priority > priority)
 			break;
-		if ((*pprev)->priority == priority)
+		if (t->priority == priority)
 			goto err;
 	}
 
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
 
 int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
 {
-	struct xfrm6_tunnel **pprev;
+	struct xfrm6_tunnel __rcu **pprev;
+	struct xfrm6_tunnel *t;
 	int ret = -ENOENT;
 
 	mutex_lock(&tunnel6_mutex);
 
 	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
-	     *pprev; pprev = &(*pprev)->next) {
-		if (*pprev == handler) {
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel6_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t == handler) {
 			*pprev = handler->next;
 			ret = 0;
 			break;
-- 
cgit v1.2.3


From c531b9b49b146e1535dbed006d15e58f4f528f7e Mon Sep 17 00:00:00 2001
From: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Date: Fri, 8 Oct 2010 17:12:36 -0700
Subject: [SCSI] libfc: Do not let disc work cancel itself

When number of NPIV ports created are greater than the xids
allocated per pool -- for eg., creating 255 NPIV ports on a
system with nr_cpu_ids of 32, with each pool containing 128
xids -- and then generating a link event - for eg.,
shutdown/no shutdown -- on the switch port causes the hang
with the following stack trace.

Call Trace:
schedule_timeout+0x19d/0x230
wait_for_common+0xc0/0x170
__cancel_work_timer+0xcf/0x1b0
fc_disc_stop+0x16/0x30 [libfc]
fc_lport_reset_locked+0x47/0x90 [libfc]
fc_lport_enter_reset+0x67/0xe0 [libfc]
fc_lport_disc_callback+0xbc/0xe0 [libfc]
fc_disc_done+0xa8/0xf0 [libfc]
fc_disc_timeout+0x29/0x40 [libfc]
run_workqueue+0xb8/0x140
worker_thread+0x96/0x110
kthread+0x96/0xa0
child_rip+0xa/0x20

Fix is to not cancel the disc_work if discovery is already
stopped, thus allowing lport state machine to restart and try
discovery again.

Signed-off-by: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Acked-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c | 5 ++---
 include/scsi/libfc.h         | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 32f67c4b03fc..911b2736cafa 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -684,10 +684,9 @@ void fc_disc_stop(struct fc_lport *lport)
 {
 	struct fc_disc *disc = &lport->disc;
 
-	if (disc) {
+	if (disc->pending)
 		cancel_delayed_work_sync(&disc->disc_work);
-		fc_disc_stop_rports(disc);
-	}
+	fc_disc_stop_rports(disc);
 }
 
 /**
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 14be49b44e84..f986ab7ffe6f 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -721,7 +721,7 @@ struct libfc_function_template {
  * struct fc_disc - Discovery context
  * @retry_count:   Number of retries
  * @pending:       1 if discovery is pending, 0 if not
- * @requesting:    1 if discovery has been requested, 0 if not
+ * @requested:     1 if discovery has been requested, 0 if not
  * @seq_count:     Number of sequences used for discovery
  * @buf_len:       Length of the discovery buffer
  * @disc_id:       Discovery ID
-- 
cgit v1.2.3


From d6fe1360f42e86262153927986dea6502daff703 Mon Sep 17 00:00:00 2001
From: Simon Guinot <sguinot@lacie.com>
Date: Fri, 22 Oct 2010 00:44:19 +0200
Subject: hwmon: add generic GPIO fan driver

This patch adds hwmon support for fans connected to GPIO lines.

Platform specific information such as GPIO pinout and speed conversion array
(rpm from/to GPIO value) are passed to the driver via platform_data.

Signed-off-by: Simon Guinot <sguinot@lacie.com>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/Kconfig    |   9 +
 drivers/hwmon/Makefile   |   1 +
 drivers/hwmon/gpio-fan.c | 558 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio-fan.h |  36 +++
 4 files changed, 604 insertions(+)
 create mode 100644 drivers/hwmon/gpio-fan.c
 create mode 100644 include/linux/gpio-fan.h

(limited to 'include')

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ec4295fd22f6..c357c835eb1e 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -399,6 +399,15 @@ config SENSORS_GL520SM
 	  This driver can also be built as a module.  If so, the module
 	  will be called gl520sm.
 
+config SENSORS_GPIO_FAN
+	tristate "GPIO fan"
+	depends on GENERIC_GPIO
+	help
+	  If you say yes here you get support for fans connected to GPIO lines.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called gpio-fan.
+
 config SENSORS_CORETEMP
 	tristate "Intel Core/Core2/Atom temperature sensor"
 	depends on X86 && PCI && EXPERIMENTAL
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index fc38e7c131f8..d30f0f6870e0 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_SENSORS_FSCHMD)	+= fschmd.o
 obj-$(CONFIG_SENSORS_G760A)	+= g760a.o
 obj-$(CONFIG_SENSORS_GL518SM)	+= gl518sm.o
 obj-$(CONFIG_SENSORS_GL520SM)	+= gl520sm.o
+obj-$(CONFIG_SENSORS_GPIO_FAN)	+= gpio-fan.o
 obj-$(CONFIG_SENSORS_ULTRA45)	+= ultra45_env.o
 obj-$(CONFIG_SENSORS_I5K_AMB)	+= i5k_amb.o
 obj-$(CONFIG_SENSORS_IBMAEM)	+= ibmaem.o
diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
new file mode 100644
index 000000000000..aa701a183707
--- /dev/null
+++ b/drivers/hwmon/gpio-fan.c
@@ -0,0 +1,558 @@
+/*
+ * gpio-fan.c - Hwmon driver for fans connected to GPIO lines.
+ *
+ * Copyright (C) 2010 LaCie
+ *
+ * Author: Simon Guinot <sguinot@lacie.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/hwmon.h>
+#include <linux/gpio.h>
+#include <linux/gpio-fan.h>
+
+struct gpio_fan_data {
+	struct platform_device	*pdev;
+	struct device		*hwmon_dev;
+	struct mutex		lock; /* lock GPIOs operations. */
+	int			num_ctrl;
+	unsigned		*ctrl;
+	int			num_speed;
+	struct gpio_fan_speed	*speed;
+	int			speed_index;
+#ifdef CONFIG_PM
+	int			resume_speed;
+#endif
+	bool			pwm_enable;
+	struct gpio_fan_alarm	*alarm;
+	struct work_struct	alarm_work;
+};
+
+/*
+ * Alarm GPIO.
+ */
+
+static void fan_alarm_notify(struct work_struct *ws)
+{
+	struct gpio_fan_data *fan_data =
+		container_of(ws, struct gpio_fan_data, alarm_work);
+
+	sysfs_notify(&fan_data->pdev->dev.kobj, NULL, "fan1_alarm");
+	kobject_uevent(&fan_data->pdev->dev.kobj, KOBJ_CHANGE);
+}
+
+static irqreturn_t fan_alarm_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_fan_data *fan_data = dev_id;
+
+	schedule_work(&fan_data->alarm_work);
+
+	return IRQ_NONE;
+}
+
+static ssize_t show_fan_alarm(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+	struct gpio_fan_alarm *alarm = fan_data->alarm;
+	int value = gpio_get_value(alarm->gpio);
+
+	if (alarm->active_low)
+		value = !value;
+
+	return sprintf(buf, "%d\n", value);
+}
+
+static DEVICE_ATTR(fan1_alarm, S_IRUGO, show_fan_alarm, NULL);
+
+static int fan_alarm_init(struct gpio_fan_data *fan_data,
+			  struct gpio_fan_alarm *alarm)
+{
+	int err;
+	int alarm_irq;
+	struct platform_device *pdev = fan_data->pdev;
+
+	fan_data->alarm = alarm;
+
+	err = gpio_request(alarm->gpio, "GPIO fan alarm");
+	if (err)
+		return err;
+
+	err = gpio_direction_input(alarm->gpio);
+	if (err)
+		goto err_free_gpio;
+
+	err = device_create_file(&pdev->dev, &dev_attr_fan1_alarm);
+	if (err)
+		goto err_free_gpio;
+
+	/*
+	 * If the alarm GPIO don't support interrupts, just leave
+	 * without initializing the fail notification support.
+	 */
+	alarm_irq = gpio_to_irq(alarm->gpio);
+	if (alarm_irq < 0)
+		return 0;
+
+	INIT_WORK(&fan_data->alarm_work, fan_alarm_notify);
+	set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
+	err = request_irq(alarm_irq, fan_alarm_irq_handler, IRQF_SHARED,
+			  "GPIO fan alarm", fan_data);
+	if (err)
+		goto err_free_sysfs;
+
+	return 0;
+
+err_free_sysfs:
+	device_remove_file(&pdev->dev, &dev_attr_fan1_alarm);
+err_free_gpio:
+	gpio_free(alarm->gpio);
+
+	return err;
+}
+
+static void fan_alarm_free(struct gpio_fan_data *fan_data)
+{
+	struct platform_device *pdev = fan_data->pdev;
+	int alarm_irq = gpio_to_irq(fan_data->alarm->gpio);
+
+	if (alarm_irq >= 0)
+		free_irq(alarm_irq, fan_data);
+	device_remove_file(&pdev->dev, &dev_attr_fan1_alarm);
+	gpio_free(fan_data->alarm->gpio);
+}
+
+/*
+ * Control GPIOs.
+ */
+
+/* Must be called with fan_data->lock held, except during initialization. */
+static void __set_fan_ctrl(struct gpio_fan_data *fan_data, int ctrl_val)
+{
+	int i;
+
+	for (i = 0; i < fan_data->num_ctrl; i++)
+		gpio_set_value(fan_data->ctrl[i], (ctrl_val >> i) & 1);
+}
+
+static int __get_fan_ctrl(struct gpio_fan_data *fan_data)
+{
+	int i;
+	int ctrl_val = 0;
+
+	for (i = 0; i < fan_data->num_ctrl; i++) {
+		int value;
+
+		value = gpio_get_value(fan_data->ctrl[i]);
+		ctrl_val |= (value << i);
+	}
+	return ctrl_val;
+}
+
+/* Must be called with fan_data->lock held, except during initialization. */
+static void set_fan_speed(struct gpio_fan_data *fan_data, int speed_index)
+{
+	if (fan_data->speed_index == speed_index)
+		return;
+
+	__set_fan_ctrl(fan_data, fan_data->speed[speed_index].ctrl_val);
+	fan_data->speed_index = speed_index;
+}
+
+static int get_fan_speed_index(struct gpio_fan_data *fan_data)
+{
+	int ctrl_val = __get_fan_ctrl(fan_data);
+	int i;
+
+	for (i = 0; i < fan_data->num_speed; i++)
+		if (fan_data->speed[i].ctrl_val == ctrl_val)
+			return i;
+
+	dev_warn(&fan_data->pdev->dev,
+		 "missing speed array entry for GPIO value 0x%x\n", ctrl_val);
+
+	return -EINVAL;
+}
+
+static int rpm_to_speed_index(struct gpio_fan_data *fan_data, int rpm)
+{
+	struct gpio_fan_speed *speed = fan_data->speed;
+	int i;
+
+	for (i = 0; i < fan_data->num_speed; i++)
+		if (speed[i].rpm >= rpm)
+			return i;
+
+	return fan_data->num_speed - 1;
+}
+
+static ssize_t show_pwm(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+	u8 pwm = fan_data->speed_index * 255 / (fan_data->num_speed - 1);
+
+	return sprintf(buf, "%d\n", pwm);
+}
+
+static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+	unsigned long pwm;
+	int speed_index;
+	int ret = count;
+
+	if (strict_strtoul(buf, 10, &pwm) || pwm > 255)
+		return -EINVAL;
+
+	mutex_lock(&fan_data->lock);
+
+	if (!fan_data->pwm_enable) {
+		ret = -EPERM;
+		goto exit_unlock;
+	}
+
+	speed_index = DIV_ROUND_UP(pwm * (fan_data->num_speed - 1), 255);
+	set_fan_speed(fan_data, speed_index);
+
+exit_unlock:
+	mutex_unlock(&fan_data->lock);
+
+	return ret;
+}
+
+static ssize_t show_pwm_enable(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", fan_data->pwm_enable);
+}
+
+static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+	unsigned long val;
+
+	if (strict_strtoul(buf, 10, &val) || val > 1)
+		return -EINVAL;
+
+	if (fan_data->pwm_enable == val)
+		return count;
+
+	mutex_lock(&fan_data->lock);
+
+	fan_data->pwm_enable = val;
+
+	/* Disable manual control mode: set fan at full speed. */
+	if (val == 0)
+		set_fan_speed(fan_data, fan_data->num_speed - 1);
+
+	mutex_unlock(&fan_data->lock);
+
+	return count;
+}
+
+static ssize_t show_pwm_mode(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0\n");
+}
+
+static ssize_t show_rpm_min(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", fan_data->speed[0].rpm);
+}
+
+static ssize_t show_rpm_max(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n",
+		       fan_data->speed[fan_data->num_speed - 1].rpm);
+}
+
+static ssize_t show_rpm(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", fan_data->speed[fan_data->speed_index].rpm);
+}
+
+static ssize_t set_rpm(struct device *dev, struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct gpio_fan_data *fan_data = dev_get_drvdata(dev);
+	unsigned long rpm;
+	int ret = count;
+
+	if (strict_strtoul(buf, 10, &rpm))
+		return -EINVAL;
+
+	mutex_lock(&fan_data->lock);
+
+	if (!fan_data->pwm_enable) {
+		ret = -EPERM;
+		goto exit_unlock;
+	}
+
+	set_fan_speed(fan_data, rpm_to_speed_index(fan_data, rpm));
+
+exit_unlock:
+	mutex_unlock(&fan_data->lock);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm);
+static DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
+		   show_pwm_enable, set_pwm_enable);
+static DEVICE_ATTR(pwm1_mode, S_IRUGO, show_pwm_mode, NULL);
+static DEVICE_ATTR(fan1_min, S_IRUGO, show_rpm_min, NULL);
+static DEVICE_ATTR(fan1_max, S_IRUGO, show_rpm_max, NULL);
+static DEVICE_ATTR(fan1_input, S_IRUGO, show_rpm, NULL);
+static DEVICE_ATTR(fan1_target, S_IRUGO | S_IWUSR, show_rpm, set_rpm);
+
+static struct attribute *gpio_fan_ctrl_attributes[] = {
+	&dev_attr_pwm1.attr,
+	&dev_attr_pwm1_enable.attr,
+	&dev_attr_pwm1_mode.attr,
+	&dev_attr_fan1_input.attr,
+	&dev_attr_fan1_target.attr,
+	&dev_attr_fan1_min.attr,
+	&dev_attr_fan1_max.attr,
+	NULL
+};
+
+static const struct attribute_group gpio_fan_ctrl_group = {
+	.attrs = gpio_fan_ctrl_attributes,
+};
+
+static int fan_ctrl_init(struct gpio_fan_data *fan_data,
+			 struct gpio_fan_platform_data *pdata)
+{
+	struct platform_device *pdev = fan_data->pdev;
+	int num_ctrl = pdata->num_ctrl;
+	unsigned *ctrl = pdata->ctrl;
+	int i, err;
+
+	for (i = 0; i < num_ctrl; i++) {
+		err = gpio_request(ctrl[i], "GPIO fan control");
+		if (err)
+			goto err_free_gpio;
+
+		err = gpio_direction_output(ctrl[i], gpio_get_value(ctrl[i]));
+		if (err) {
+			gpio_free(ctrl[i]);
+			goto err_free_gpio;
+		}
+	}
+
+	err = sysfs_create_group(&pdev->dev.kobj, &gpio_fan_ctrl_group);
+	if (err)
+		goto err_free_gpio;
+
+	fan_data->num_ctrl = num_ctrl;
+	fan_data->ctrl = ctrl;
+	fan_data->num_speed = pdata->num_speed;
+	fan_data->speed = pdata->speed;
+	fan_data->pwm_enable = true; /* Enable manual fan speed control. */
+	fan_data->speed_index = get_fan_speed_index(fan_data);
+	if (fan_data->speed_index < 0) {
+		err = -ENODEV;
+		goto err_free_gpio;
+	}
+
+	return 0;
+
+err_free_gpio:
+	for (i = i - 1; i >= 0; i--)
+		gpio_free(ctrl[i]);
+
+	return err;
+}
+
+static void fan_ctrl_free(struct gpio_fan_data *fan_data)
+{
+	struct platform_device *pdev = fan_data->pdev;
+	int i;
+
+	sysfs_remove_group(&pdev->dev.kobj, &gpio_fan_ctrl_group);
+	for (i = 0; i < fan_data->num_ctrl; i++)
+		gpio_free(fan_data->ctrl[i]);
+}
+
+/*
+ * Platform driver.
+ */
+
+static ssize_t show_name(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "gpio-fan\n");
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+static int __devinit gpio_fan_probe(struct platform_device *pdev)
+{
+	int err;
+	struct gpio_fan_data *fan_data;
+	struct gpio_fan_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	fan_data = kzalloc(sizeof(struct gpio_fan_data), GFP_KERNEL);
+	if (!fan_data)
+		return -ENOMEM;
+
+	fan_data->pdev = pdev;
+	platform_set_drvdata(pdev, fan_data);
+	mutex_init(&fan_data->lock);
+
+	/* Configure alarm GPIO if available. */
+	if (pdata->alarm) {
+		err = fan_alarm_init(fan_data, pdata->alarm);
+		if (err)
+			goto err_free_data;
+	}
+
+	/* Configure control GPIOs if available. */
+	if (pdata->ctrl && pdata->num_ctrl > 0) {
+		if (!pdata->speed || pdata->num_speed <= 1) {
+			err = -EINVAL;
+			goto err_free_alarm;
+		}
+		err = fan_ctrl_init(fan_data, pdata);
+		if (err)
+			goto err_free_alarm;
+	}
+
+	err = device_create_file(&pdev->dev, &dev_attr_name);
+	if (err)
+		goto err_free_ctrl;
+
+	/* Make this driver part of hwmon class. */
+	fan_data->hwmon_dev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(fan_data->hwmon_dev)) {
+		err = PTR_ERR(fan_data->hwmon_dev);
+		goto err_remove_name;
+	}
+
+	dev_info(&pdev->dev, "GPIO fan initialized\n");
+
+	return 0;
+
+err_remove_name:
+	device_remove_file(&pdev->dev, &dev_attr_name);
+err_free_ctrl:
+	if (fan_data->ctrl)
+		fan_ctrl_free(fan_data);
+err_free_alarm:
+	if (fan_data->alarm)
+		fan_alarm_free(fan_data);
+err_free_data:
+	platform_set_drvdata(pdev, NULL);
+	kfree(fan_data);
+
+	return err;
+}
+
+static int __devexit gpio_fan_remove(struct platform_device *pdev)
+{
+	struct gpio_fan_data *fan_data = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(fan_data->hwmon_dev);
+	device_remove_file(&pdev->dev, &dev_attr_name);
+	if (fan_data->alarm)
+		fan_alarm_free(fan_data);
+	if (fan_data->ctrl)
+		fan_ctrl_free(fan_data);
+	kfree(fan_data);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int gpio_fan_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct gpio_fan_data *fan_data = platform_get_drvdata(pdev);
+
+	if (fan_data->ctrl) {
+		fan_data->resume_speed = fan_data->speed_index;
+		set_fan_speed(fan_data, 0);
+	}
+
+	return 0;
+}
+
+static int gpio_fan_resume(struct platform_device *pdev)
+{
+	struct gpio_fan_data *fan_data = platform_get_drvdata(pdev);
+
+	if (fan_data->ctrl)
+		set_fan_speed(fan_data, fan_data->resume_speed);
+
+	return 0;
+}
+#else
+#define gpio_fan_suspend NULL
+#define gpio_fan_resume NULL
+#endif
+
+static struct platform_driver gpio_fan_driver = {
+	.probe		= gpio_fan_probe,
+	.remove		= __devexit_p(gpio_fan_remove),
+	.suspend	= gpio_fan_suspend,
+	.resume		= gpio_fan_resume,
+	.driver	= {
+		.name	= "gpio-fan",
+	},
+};
+
+static int __init gpio_fan_init(void)
+{
+	return platform_driver_register(&gpio_fan_driver);
+}
+
+static void __exit gpio_fan_exit(void)
+{
+	platform_driver_unregister(&gpio_fan_driver);
+}
+
+module_init(gpio_fan_init);
+module_exit(gpio_fan_exit);
+
+MODULE_AUTHOR("Simon Guinot <sguinot@lacie.com>");
+MODULE_DESCRIPTION("GPIO FAN driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:gpio-fan");
diff --git a/include/linux/gpio-fan.h b/include/linux/gpio-fan.h
new file mode 100644
index 000000000000..096659169215
--- /dev/null
+++ b/include/linux/gpio-fan.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/gpio-fan.h
+ *
+ * Platform data structure for GPIO fan driver
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __LINUX_GPIO_FAN_H
+#define __LINUX_GPIO_FAN_H
+
+struct gpio_fan_alarm {
+	unsigned	gpio;
+	unsigned	active_low;
+};
+
+struct gpio_fan_speed {
+	int rpm;
+	int ctrl_val;
+};
+
+struct gpio_fan_platform_data {
+	int			num_ctrl;
+	unsigned		*ctrl;	/* fan control GPIOs. */
+	struct gpio_fan_alarm	*alarm;	/* fan alarm GPIO. */
+	/*
+	 * Speed conversion array: rpm from/to GPIO bit field.
+	 * This array _must_ be sorted in ascending rpm order.
+	 */
+	int			num_speed;
+	struct gpio_fan_speed	*speed;
+};
+
+#endif /* __LINUX_GPIO_FAN_H */
-- 
cgit v1.2.3


From f9deb41f91c41d9d91a24c84a555ec7fe82620da Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Fri, 22 Oct 2010 07:57:24 -0400
Subject: hwmon: lis3: regulator control

Based on pm_runtime control, turn lis3 regulators on and off.
Perform context save and restore on transitions.

Feature is optional and must be enabled in platform data.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Acked-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/lis3lv02d.c     | 52 ++++++++++++++++++++++++++++++++++++++++
 drivers/hwmon/lis3lv02d.h     | 13 ++++++++++
 drivers/hwmon/lis3lv02d_i2c.c | 55 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/lis3lv02d.h     |  2 ++
 4 files changed, 121 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 383a84938a98..159e402ddec3 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include <linux/poll.h>
+#include <linux/slab.h>
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
 #include <linux/miscdevice.h>
@@ -257,10 +258,46 @@ fail:
 	return ret;
 }
 
+/*
+ * Order of registers in the list affects to order of the restore process.
+ * Perhaps it is a good idea to set interrupt enable register as a last one
+ * after all other configurations
+ */
+static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1,
+			       FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2,
+			       CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ,
+			       CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW,
+			       CTRL_REG1, CTRL_REG2, CTRL_REG3};
+
+static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H,
+			       FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H,
+			       DD_THSE_L, DD_THSE_H,
+			       CTRL_REG1, CTRL_REG3, CTRL_REG2};
+
+static inline void lis3_context_save(struct lis3lv02d *lis3)
+{
+	int i;
+	for (i = 0; i < lis3->regs_size; i++)
+		lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]);
+	lis3->regs_stored = true;
+}
+
+static inline void lis3_context_restore(struct lis3lv02d *lis3)
+{
+	int i;
+	if (lis3->regs_stored)
+		for (i = 0; i < lis3->regs_size; i++)
+			lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]);
+}
+
 void lis3lv02d_poweroff(struct lis3lv02d *lis3)
 {
+	if (lis3->reg_ctrl)
+		lis3_context_save(lis3);
 	/* disable X,Y,Z axis and power down */
 	lis3->write(lis3, CTRL_REG1, 0x00);
+	if (lis3->reg_ctrl)
+		lis3->reg_ctrl(lis3, LIS3_REG_OFF);
 }
 EXPORT_SYMBOL_GPL(lis3lv02d_poweroff);
 
@@ -283,6 +320,8 @@ void lis3lv02d_poweron(struct lis3lv02d *lis3)
 		reg |= CTRL2_BDU;
 		lis3->write(lis3, CTRL_REG2, reg);
 	}
+	if (lis3->reg_ctrl)
+		lis3_context_restore(lis3);
 }
 EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
 
@@ -674,6 +713,7 @@ int lis3lv02d_remove_fs(struct lis3lv02d *lis3)
 		pm_runtime_disable(lis3->pm_dev);
 		pm_runtime_set_suspended(lis3->pm_dev);
 	}
+	kfree(lis3->reg_cache);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs);
@@ -753,6 +793,8 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 		dev->odrs = lis3_12_rates;
 		dev->odr_mask = CTRL1_DF0 | CTRL1_DF1;
 		dev->scale = LIS3_SENSITIVITY_12B;
+		dev->regs = lis3_wai12_regs;
+		dev->regs_size = ARRAY_SIZE(lis3_wai12_regs);
 		break;
 	case WAI_8B:
 		printk(KERN_INFO DRIVER_NAME ": 8 bits sensor found\n");
@@ -762,6 +804,8 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 		dev->odrs = lis3_8_rates;
 		dev->odr_mask = CTRL1_DR;
 		dev->scale = LIS3_SENSITIVITY_8B;
+		dev->regs = lis3_wai8_regs;
+		dev->regs_size = ARRAY_SIZE(lis3_wai8_regs);
 		break;
 	case WAI_3DC:
 		printk(KERN_INFO DRIVER_NAME ": 8 bits 3DC sensor found\n");
@@ -778,6 +822,14 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 		return -EINVAL;
 	}
 
+	dev->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs),
+				     sizeof(lis3_wai12_regs)), GFP_KERNEL);
+
+	if (dev->reg_cache == NULL) {
+		printk(KERN_ERR DRIVER_NAME "out of memory\n");
+		return -ENOMEM;
+	}
+
 	mutex_init(&dev->mutex);
 
 	lis3lv02d_add_fs(dev);
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index 633cf1debc52..c5c063de2f48 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -20,6 +20,7 @@
  */
 #include <linux/platform_device.h>
 #include <linux/input-polldev.h>
+#include <linux/regulator/consumer.h>
 
 /*
  * This driver tries to support the "digital" accelerometer chips from
@@ -223,11 +224,17 @@ enum lis3lv02d_click_src_8b {
 	CLICK_IA	= 0x40,
 };
 
+enum lis3lv02d_reg_state {
+	LIS3_REG_OFF	= 0x00,
+	LIS3_REG_ON	= 0x01,
+};
+
 union axis_conversion {
 	struct {
 		int x, y, z;
 	};
 	int as_array[3];
+
 };
 
 struct lis3lv02d {
@@ -236,8 +243,13 @@ struct lis3lv02d {
 	int (*init) (struct lis3lv02d *lis3);
 	int (*write) (struct lis3lv02d *lis3, int reg, u8 val);
 	int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret);
+	int (*reg_ctrl) (struct lis3lv02d *lis3, bool state);
 
 	int                     *odrs;     /* Supported output data rates */
+	u8			*regs;	   /* Regs to store / restore */
+	int			regs_size;
+	u8                      *reg_cache;
+	bool			regs_stored;
 	u8                      odr_mask;  /* ODR bit mask */
 	u8			whoami;    /* indicates measurement precision */
 	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
@@ -250,6 +262,7 @@ struct lis3lv02d {
 
 	struct input_polled_dev	*idev;     /* input device */
 	struct platform_device	*pdev;     /* platform device */
+	struct regulator_bulk_data regulators[2];
 	atomic_t		count;     /* interrupt count after last read */
 	union axis_conversion	ac;        /* hw -> logical axis */
 	int			mapped_btns[3];
diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c
index d52095603e03..ec35c87819c0 100644
--- a/drivers/hwmon/lis3lv02d_i2c.c
+++ b/drivers/hwmon/lis3lv02d_i2c.c
@@ -30,10 +30,29 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
+#include <linux/delay.h>
 #include "lis3lv02d.h"
 
 #define DRV_NAME 	"lis3lv02d_i2c"
 
+static const char reg_vdd[]    = "Vdd";
+static const char reg_vdd_io[] = "Vdd_IO";
+
+static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state)
+{
+	int ret;
+	if (state == LIS3_REG_OFF) {
+		ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+	} else {
+		ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+		/* Chip needs time to wakeup. Not mentioned in datasheet */
+		usleep_range(10000, 20000);
+	}
+	return ret;
+}
+
 static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value)
 {
 	struct i2c_client *c = lis3->bus_priv;
@@ -52,6 +71,13 @@ static int lis3_i2c_init(struct lis3lv02d *lis3)
 	u8 reg;
 	int ret;
 
+	if (lis3->reg_ctrl)
+		lis3_reg_ctrl(lis3, LIS3_REG_ON);
+
+	lis3->read(lis3, WHO_AM_I, &reg);
+	if (reg != lis3->whoami)
+		printk(KERN_ERR "lis3: power on failure\n");
+
 	/* power up the device */
 	ret = lis3->read(lis3, CTRL_REG1, &reg);
 	if (ret < 0)
@@ -72,6 +98,10 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client,
 	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
 
 	if (pdata) {
+		/* Regulator control is optional */
+		if (pdata->driver_features & LIS3_USE_REGULATOR_CTRL)
+			lis3_dev.reg_ctrl = lis3_reg_ctrl;
+
 		if (pdata->axis_x)
 			lis3lv02d_axis_map.x = pdata->axis_x;
 
@@ -88,6 +118,16 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client,
 			goto fail;
 	}
 
+	if (lis3_dev.reg_ctrl) {
+		lis3_dev.regulators[0].supply = reg_vdd;
+		lis3_dev.regulators[1].supply = reg_vdd_io;
+		ret = regulator_bulk_get(&client->dev,
+					ARRAY_SIZE(lis3_dev.regulators),
+					lis3_dev.regulators);
+		if (ret < 0)
+			goto fail;
+	}
+
 	lis3_dev.pdata	  = pdata;
 	lis3_dev.bus_priv = client;
 	lis3_dev.init	  = lis3_i2c_init;
@@ -98,21 +138,34 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client,
 	lis3_dev.pm_dev	  = &client->dev;
 
 	i2c_set_clientdata(client, &lis3_dev);
+
+	/* Provide power over the init call */
+	if (lis3_dev.reg_ctrl)
+		lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON);
+
 	ret = lis3lv02d_init_device(&lis3_dev);
+
+	if (lis3_dev.reg_ctrl)
+		lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF);
 fail:
 	return ret;
 }
 
 static int __devexit lis3lv02d_i2c_remove(struct i2c_client *client)
 {
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
 	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
 
 	if (pdata && pdata->release_resources)
 		pdata->release_resources();
 
 	lis3lv02d_joystick_disable();
+	lis3lv02d_remove_fs(&lis3_dev);
 
-	return lis3lv02d_remove_fs(&lis3_dev);
+	if (lis3_dev.reg_ctrl)
+		regulator_bulk_free(ARRAY_SIZE(lis3->regulators),
+				lis3_dev.regulators);
+	return 0;
 }
 
 #ifdef CONFIG_PM
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 0e8a346424bb..c4a4a52c1de7 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -64,6 +64,8 @@ struct lis3lv02d_platform_data {
 	s8 axis_x;
 	s8 axis_y;
 	s8 axis_z;
+#define LIS3_USE_REGULATOR_CTRL 0x01
+	u16 driver_features;
 	int (*setup_resources)(void);
 	int (*release_resources)(void);
 	/* Limits for selftest are specified in chip data sheet */
-- 
cgit v1.2.3


From cc23aa1ce2631b2fe1e3fba82ee444460f5ee3b7 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Fri, 22 Oct 2010 07:57:29 -0400
Subject: hwmon: lis3: New parameters to platform data

Added default output data rate setting to platform data.
If default rate is 0, reset default value is used.
Added control for duration via platform data.
Added possibility to configure interrupts to trig on
both rising and falling edge. The lis3 WU unit can be
configured quite many ways and with some configurations it
is quite handy to get coordinate refresh when some
event trigs and when it reason goes away.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Acked-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/lis3lv02d.c | 21 ++++++++++++++-------
 include/linux/lis3lv02d.h |  6 +++++-
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 7448411f51ea..3109eb8648ce 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -740,16 +740,16 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *dev,
 	if (p->wakeup_flags) {
 		dev->write(dev, FF_WU_CFG_1, p->wakeup_flags);
 		dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
-		/* default to 2.5ms for now */
-		dev->write(dev, FF_WU_DURATION_1, 1);
+		/* pdata value + 1 to keep this backward compatible*/
+		dev->write(dev, FF_WU_DURATION_1, p->duration1 + 1);
 		ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/
 	}
 
 	if (p->wakeup_flags2) {
 		dev->write(dev, FF_WU_CFG_2, p->wakeup_flags2);
 		dev->write(dev, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f);
-		/* default to 2.5ms for now */
-		dev->write(dev, FF_WU_DURATION_2, 1);
+		/* pdata value + 1 to keep this backward compatible*/
+		dev->write(dev, FF_WU_DURATION_2, p->duration2 + 1);
 		ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/
 	}
 	/* Configure hipass filters */
@@ -759,8 +759,8 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *dev,
 		err = request_threaded_irq(p->irq2,
 					NULL,
 					lis302dl_interrupt_thread2_8b,
-					IRQF_TRIGGER_RISING |
-					IRQF_ONESHOT,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+					(p->irq_flags2 & IRQF_TRIGGER_MASK),
 					DRIVER_NAME, &lis3_dev);
 		if (err < 0)
 			printk(KERN_ERR DRIVER_NAME
@@ -776,6 +776,7 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 {
 	int err;
 	irq_handler_t thread_fn;
+	int irq_flags = 0;
 
 	dev->whoami = lis3lv02d_read_8(dev, WHO_AM_I);
 
@@ -847,9 +848,14 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 		if (dev->whoami == WAI_8B)
 			lis3lv02d_8b_configure(dev, p);
 
+		irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK;
+
 		dev->irq_cfg = p->irq_cfg;
 		if (p->irq_cfg)
 			dev->write(dev, CTRL_REG3, p->irq_cfg);
+
+		if (p->default_rate)
+			lis3lv02d_set_odr(p->default_rate);
 	}
 
 	/* bail if we did not get an IRQ from the bus layer */
@@ -877,7 +883,8 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 
 	err = request_threaded_irq(dev->irq, lis302dl_interrupt,
 				thread_fn,
-				IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+				irq_flags,
 				DRIVER_NAME, &lis3_dev);
 
 	if (err < 0) {
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index c4a4a52c1de7..18d578f08120 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -36,7 +36,10 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
 #define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
-
+	unsigned char irq_flags1; /* Additional irq edge / level flags */
+	unsigned char irq_flags2; /* Additional irq edge / level flags */
+	unsigned char duration1;
+	unsigned char duration2;
 #define LIS3_WAKEUP_X_LO	(1 << 0)
 #define LIS3_WAKEUP_X_HI	(1 << 1)
 #define LIS3_WAKEUP_Y_LO	(1 << 2)
@@ -66,6 +69,7 @@ struct lis3lv02d_platform_data {
 	s8 axis_z;
 #define LIS3_USE_REGULATOR_CTRL 0x01
 	u16 driver_features;
+	int default_rate;
 	int (*setup_resources)(void);
 	int (*release_resources)(void);
 	/* Limits for selftest are specified in chip data sheet */
-- 
cgit v1.2.3


From f10a5407b58603fb3b084d7fbdbd50f47d010c82 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Fri, 22 Oct 2010 07:57:31 -0400
Subject: hwmon: lis3: use block read to access data registers

Add optional blockread function to interface driver. If available
the chip driver uses it for data register access. For 12 bit device
it reads 6 bytes to get 3*16bit data. For 8 bit device it reads out
5 bytes since every second byte is dummy.
This optimizes bus usage and reduces number of operations and
interrupts needed for one data update.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Acked-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/lis3lv02d.c     | 21 ++++++++++++++++++---
 drivers/hwmon/lis3lv02d.h     |  1 +
 drivers/hwmon/lis3lv02d_i2c.c | 13 +++++++++++++
 include/linux/lis3lv02d.h     |  1 +
 4 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index a4929b72506e..0780de0550df 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -154,9 +154,24 @@ static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z)
 	int position[3];
 	int i;
 
-	position[0] = lis3->read_data(lis3, OUTX);
-	position[1] = lis3->read_data(lis3, OUTY);
-	position[2] = lis3->read_data(lis3, OUTZ);
+	if (lis3->blkread) {
+		if (lis3_dev.whoami == WAI_12B) {
+			u16 data[3];
+			lis3->blkread(lis3, OUTX_L, 6, (u8 *)data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s16)le16_to_cpu(data[i]);
+		} else {
+			u8 data[5];
+			/* Data: x, dummy, y, dummy, z */
+			lis3->blkread(lis3, OUTX, 5, data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s8)data[i * 2];
+		}
+	} else {
+		position[0] = lis3->read_data(lis3, OUTX);
+		position[1] = lis3->read_data(lis3, OUTY);
+		position[2] = lis3->read_data(lis3, OUTZ);
+	}
 
 	for (i = 0; i < 3; i++)
 		position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY;
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index 77ebb15ea0b3..fdbe899a7f14 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -244,6 +244,7 @@ struct lis3lv02d {
 	int (*init) (struct lis3lv02d *lis3);
 	int (*write) (struct lis3lv02d *lis3, int reg, u8 val);
 	int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret);
+	int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret);
 	int (*reg_ctrl) (struct lis3lv02d *lis3, bool state);
 
 	int                     *odrs;     /* Supported output data rates */
diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c
index c1e7420cf77d..0074809917a2 100644
--- a/drivers/hwmon/lis3lv02d_i2c.c
+++ b/drivers/hwmon/lis3lv02d_i2c.c
@@ -66,6 +66,14 @@ static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v)
 	return 0;
 }
 
+static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len,
+				u8 *v)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	reg |= (1 << 7); /* 7th bit enables address auto incrementation */
+	return i2c_smbus_read_i2c_block_data(c, reg, len, v);
+}
+
 static int lis3_i2c_init(struct lis3lv02d *lis3)
 {
 	u8 reg;
@@ -102,6 +110,11 @@ static int __devinit lis3lv02d_i2c_probe(struct i2c_client *client,
 		if (pdata->driver_features & LIS3_USE_REGULATOR_CTRL)
 			lis3_dev.reg_ctrl = lis3_reg_ctrl;
 
+		if ((pdata->driver_features & LIS3_USE_BLOCK_READ) &&
+			(i2c_check_functionality(client->adapter,
+						I2C_FUNC_SMBUS_I2C_BLOCK)))
+			lis3_dev.blkread  = lis3_i2c_blockread;
+
 		if (pdata->axis_x)
 			lis3lv02d_axis_map.x = pdata->axis_x;
 
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 18d578f08120..c949612ac841 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -68,6 +68,7 @@ struct lis3lv02d_platform_data {
 	s8 axis_y;
 	s8 axis_z;
 #define LIS3_USE_REGULATOR_CTRL 0x01
+#define LIS3_USE_BLOCK_READ	0x02
 	u16 driver_features;
 	int default_rate;
 	int (*setup_resources)(void);
-- 
cgit v1.2.3


From 83af1bd81f7b7fb31a681b0c80790866f190d23a Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Sat, 23 Oct 2010 09:39:44 -0400
Subject: hwmon: lis3: Short explanations of platform data fields

Short documentation at kernel doc format.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Acked-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 include/linux/lis3lv02d.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index c949612ac841..d4292c8431e0 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -1,6 +1,52 @@
 #ifndef __LIS3LV02D_H_
 #define __LIS3LV02D_H_
 
+/**
+ * struct lis3lv02d_platform_data - lis3 chip family platform data
+ * @click_flags:	Click detection unit configuration
+ * @click_thresh_x:	Click detection unit x axis threshold
+ * @click_thresh_y:	Click detection unit y axis threshold
+ * @click_thresh_z:	Click detection unit z axis threshold
+ * @click_time_limit:	Click detection unit time parameter
+ * @click_latency:	Click detection unit latency parameter
+ * @click_window:	Click detection unit window parameter
+ * @irq_cfg:		On chip irq source and type configuration (click /
+ *			data available / wake up, open drain, polarity)
+ * @irq_flags1:		Additional irq triggering flags for irq channel 0
+ * @irq_flags2:		Additional irq triggering flags for irq channel 1
+ * @duration1:		Wake up unit 1 duration parameter
+ * @duration2:		Wake up unit 2 duration parameter
+ * @wakeup_flags:	Wake up unit 1 flags
+ * @wakeup_thresh:	Wake up unit 1 threshold value
+ * @wakeup_flags2:	Wake up unit 2 flags
+ * @wakeup_thresh2:	Wake up unit 2 threshold value
+ * @hipass_ctrl:	High pass filter control (enable / disable, cut off
+ *			frequency)
+ * @axis_x:		Sensor orientation remapping for x-axis
+ * @axis_y:		Sensor orientation remapping for y-axis
+ * @axis_z:		Sensor orientation remapping for z-axis
+ * @driver_features:	Enable bits for different features. Disabled by default
+ * @default_rate:	Default sampling rate. 0 means reset default
+ * @setup_resources:	Interrupt line setup call back function
+ * @release_resources:	Interrupt line release call back function
+ * @st_min_limits[3]:	Selftest acceptance minimum values
+ * @st_max_limits[3]:	Selftest acceptance maximum values
+ * @irq2:		Irq line 2 number
+ *
+ * Platform data is used to setup the sensor chip. Meaning of the different
+ * chip features can be found from the data sheet. It is publicly available
+ * at www.st.com web pages. Currently the platform data is used
+ * only for the 8 bit device. The 8 bit device has two wake up / free fall
+ * detection units and click detection unit. There are plenty of ways to
+ * configure the chip which makes is quite hard to explain deeper meaning of
+ * the fields here. Behaviour of the detection blocks varies heavily depending
+ * on the configuration. For example, interrupt detection block can use high
+ * pass filtered data which makes it react to the changes in the acceleration.
+ * Irq_flags can be used to enable interrupt detection on the both edges.
+ * With proper chip configuration this produces interrupt when some trigger
+ * starts and when it goes away.
+ */
+
 struct lis3lv02d_platform_data {
 	/* please note: the 'click' feature is only supported for
 	 * LIS[32]02DL variants of the chip and will be ignored for
-- 
cgit v1.2.3


From 6e3f7faf3e8a3e226b1a6b955aac12abf2f2e1b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 03:02:02 +0000
Subject: rps: add __rcu annotations

Add __rcu annotations to :
	(struct netdev_rx_queue)->rps_map
	(struct netdev_rx_queue)->rps_flow_table
	struct rps_sock_flow_table *rps_sock_flow_table;

And use appropriate rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 12 ++++++------
 net/core/dev.c             | 12 ++++++------
 net/core/net-sysfs.c       | 20 +++++++++++++-------
 net/core/sysctl_net_core.c |  3 ++-
 4 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b72d5a460903..072652d94d9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -585,15 +585,15 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
 		table->ents[hash & table->mask] = RPS_NO_CPU;
 }
 
-extern struct rps_sock_flow_table *rps_sock_flow_table;
+extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
 
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
-	struct rps_map *rps_map;
-	struct rps_dev_flow_table *rps_flow_table;
-	struct kobject kobj;
-	struct netdev_rx_queue *first;
-	atomic_t count;
+	struct rps_map __rcu		*rps_map;
+	struct rps_dev_flow_table __rcu	*rps_flow_table;
+	struct kobject			kobj;
+	struct netdev_rx_queue		*first;
+	atomic_t			count;
 } ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RPS */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 365f7f5077e4..e8a8dc19365b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2413,7 +2413,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
 /*
@@ -2425,7 +2425,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		       struct rps_dev_flow **rflowp)
 {
 	struct netdev_rx_queue *rxqueue;
-	struct rps_map *map = NULL;
+	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
@@ -2444,15 +2444,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	} else
 		rxqueue = dev->_rx;
 
-	if (rxqueue->rps_map) {
-		map = rcu_dereference(rxqueue->rps_map);
-		if (map && map->len == 1) {
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		if (map->len == 1) {
 			tcpu = map->cpus[0];
 			if (cpu_online(tcpu))
 				cpu = tcpu;
 			goto done;
 		}
-	} else if (!rxqueue->rps_flow_table) {
+	} else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
 		goto done;
 	}
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..a5ff5a89f376 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	}
 
 	spin_lock(&rps_map_lock);
-	old_map = queue->rps_map;
+	old_map = rcu_dereference_protected(queue->rps_map,
+					    lockdep_is_held(&rps_map_lock));
 	rcu_assign_pointer(queue->rps_map, map);
 	spin_unlock(&rps_map_lock);
 
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 		table = NULL;
 
 	spin_lock(&rps_dev_flow_lock);
-	old_table = queue->rps_flow_table;
+	old_table = rcu_dereference_protected(queue->rps_flow_table,
+					      lockdep_is_held(&rps_dev_flow_lock));
 	rcu_assign_pointer(queue->rps_flow_table, table);
 	spin_unlock(&rps_dev_flow_lock);
 
@@ -705,13 +707,17 @@ static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 	struct netdev_rx_queue *first = queue->first;
+	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
 
-	if (queue->rps_map)
-		call_rcu(&queue->rps_map->rcu, rps_map_release);
 
-	if (queue->rps_flow_table)
-		call_rcu(&queue->rps_flow_table->rcu,
-		    rps_dev_flow_table_release);
+	map = rcu_dereference_raw(queue->rps_map);
+	if (map)
+		call_rcu(&map->rcu, rps_map_release);
+
+	flow_table = rcu_dereference_raw(queue->rps_flow_table);
+	if (flow_table)
+		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
 
 	mutex_lock(&sock_flow_mutex);
 
-	orig_sock_table = rps_sock_flow_table;
+	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+					lockdep_is_held(&sock_flow_mutex));
 	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
 
 	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
-- 
cgit v1.2.3


From 1c87733d0682547050ccccb400cdac425fa43b39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 03:20:11 +0000
Subject: net_ns: add __rcu annotations

add __rcu annotation to (struct net)->gen, and use
rcu_dereference_protected() in net_assign_generic()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 2 +-
 net/core/net_namespace.c    | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 65af9a07cf76..1bf812b21fb7 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,7 +88,7 @@ struct net {
 #ifdef CONFIG_WEXT_CORE
 	struct sk_buff_head	wext_nlevents;
 #endif
-	struct net_generic	*gen;
+	struct net_generic __rcu	*gen;
 
 	/* Note : following structs are cache line aligned */
 #ifdef CONFIG_XFRM
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
 	BUG_ON(!mutex_is_locked(&net_mutex));
 	BUG_ON(id == 0);
 
-	ng = old_ng = net->gen;
+	old_ng = rcu_dereference_protected(net->gen,
+					   lockdep_is_held(&net_mutex));
+	ng = old_ng;
 	if (old_ng->len >= id)
 		goto assign;
 
-- 
cgit v1.2.3


From 0d7da9ddd9a4eb7808698d04b98bf9d62d02649b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 03:47:05 +0000
Subject: net: add __rcu annotation to sk_filter

Add __rcu annotation to :
        (struct sock)->sk_filter

And use appropriate rcu primitives to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 net/core/filter.c  | 4 ++--
 net/core/sock.c    | 2 +-
 net/ipv4/udp.c     | 2 +-
 net/ipv6/raw.c     | 2 +-
 net/ipv6/udp.c     | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 73a4f9702a65..c7a736228ca2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -301,7 +301,7 @@ struct sock {
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
 	long			sk_sndtimeo;
-	struct sk_filter      	*sk_filter;
+	struct sk_filter __rcu	*sk_filter;
 	void			*sk_protinfo;
 	struct timer_list	sk_timer;
 	ktime_t			sk_stamp;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..7beaec36b541 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock_bh();
 	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
-				filter->len);
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
 	rcu_read_unlock_bh();
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..3eed5424e659 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1225,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_reset_flag(newsk, SOCK_DONE);
 		skb_queue_head_init(&newsk->sk_error_queue);
 
-		filter = newsk->sk_filter;
+		filter = rcu_dereference_protected(newsk->sk_filter, 1);
 		if (filter != NULL)
 			sk_filter_charge(newsk, filter);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..28cb2d733a3c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	if (sk->sk_filter) {
+	if (rcu_dereference_raw(sk->sk_filter)) {
 		if (udp_lib_checksum_complete(skb))
 			goto drop;
 	}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45e6efb7f171..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 
 static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
-	if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
+	if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
 	    skb_checksum_complete(skb)) {
 		atomic_inc(&sk->sk_drops);
 		kfree_skb(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c84dad432114..91def93bec85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -527,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		}
 	}
 
-	if (sk->sk_filter) {
+	if (rcu_dereference_raw(sk->sk_filter)) {
 		if (udp_lib_checksum_complete(skb))
 			goto drop;
 	}
-- 
cgit v1.2.3


From 43a951e9994fd218ab4e641f94a2fc53556c3675 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 03:32:44 +0000
Subject: ipv4: add __rcu annotations to ip_ra_chain

Add __rcu annotations to :
        (struct ip_ra_chain)->next
	struct ip_ra_chain *ip_ra_chain;

And use appropriate rcu primitives.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  4 ++--
 net/ipv4/ip_sockglue.c | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index dbee3fe260e1..86e2b182a0c0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -59,7 +59,7 @@ struct ipcm_cookie {
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 
 struct ip_ra_chain {
-	struct ip_ra_chain	*next;
+	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
 	union {
 		void			(*destructor)(struct sock *);
@@ -68,7 +68,7 @@ struct ip_ra_chain {
 	struct rcu_head		rcu;
 };
 
-extern struct ip_ra_chain *ip_ra_chain;
+extern struct ip_ra_chain __rcu *ip_ra_chain;
 
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    but receiver should be enough clever f.e. to forward mtrace requests,
    sent to multicast group to reach destination designated router.
  */
-struct ip_ra_chain *ip_ra_chain;
+struct ip_ra_chain __rcu *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
 
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
 int ip_ra_control(struct sock *sk, unsigned char on,
 		  void (*destructor)(struct sock *))
 {
-	struct ip_ra_chain *ra, *new_ra, **rap;
+	struct ip_ra_chain *ra, *new_ra;
+	struct ip_ra_chain __rcu **rap;
 
 	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 		return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
 	spin_lock_bh(&ip_ra_lock);
-	for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
+	for (rap = &ip_ra_chain;
+	     (ra = rcu_dereference_protected(*rap,
+			lockdep_is_held(&ip_ra_lock))) != NULL;
+	     rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
 				spin_unlock_bh(&ip_ra_lock);
-- 
cgit v1.2.3


From c37650161a53c01ddd88587675f9a4adc909a73e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Oct 2010 10:48:20 +0200
Subject: fs: add sync_inode_metadata

Add a new helper to write out the inode using the writeback code,
that is including the correct dirty bit and list manipulation.  A few
of filesystems already opencode this, and a lot of others should be
using it instead of using write_inode_now which also writes out the
data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/pohmelfs/inode.c |  6 +-----
 fs/exofs/file.c                  |  6 +-----
 fs/ext2/dir.c                    |  2 +-
 fs/ext2/ext2.h                   |  1 -
 fs/ext2/inode.c                  | 11 +----------
 fs/ext2/xattr.c                  |  2 +-
 fs/fs-writeback.c                | 20 ++++++++++++++++++++
 fs/libfs.c                       |  6 +-----
 fs/nfsd/vfs.c                    | 16 +++-------------
 include/linux/fs.h               |  1 +
 10 files changed, 30 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 97dae297ca3c..c62d30017c07 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -882,12 +882,8 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
 static int pohmelfs_fsync(struct file *file, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,	/* sys_fsync did this */
-	};
 
-	return sync_inode(inode, &wbc);
+	return sync_inode_metadata(inode, 1);
 }
 
 ssize_t pohmelfs_write(struct file *file, const char __user *buf,
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb98..b905c79b4f0a 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
 {
 	int ret;
 	struct inode *inode = filp->f_mapping->host;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0, /* metadata-only; caller takes care of data */
-	};
 	struct super_block *sb;
 
 	if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return 0;
 
-	ret = sync_inode(inode, &wbc);
+	ret = sync_inode_metadata(inode, 1);
 
 	/* This is a good place to write the sb */
 	/* TODO: Sechedule an sb-sync on create */
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 764109886ec0..2709b34206ab 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	if (IS_DIRSYNC(dir)) {
 		err = write_one_page(page, 1);
 		if (!err)
-			err = ext2_sync_inode(dir);
+			err = sync_inode_metadata(dir, 1);
 	} else {
 		unlock_page(page);
 	}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 416daa62242c..6346a2acf326 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_evict_inode(struct inode *);
-extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 533699c16040..40ad210a5049 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
-		ext2_sync_inode (inode);
+		sync_inode_metadata(inode, 1);
 	} else {
 		mark_inode_dirty(inode);
 	}
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
-int ext2_sync_inode(struct inode *inode)
-{
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,	/* sys_fsync did this */
-	};
-	return sync_inode(inode, &wbc);
-}
-
 int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8c29ae15129e..f84700be3274 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 	inode->i_ctime = CURRENT_TIME_SEC;
 	if (IS_SYNC(inode)) {
-		error = ext2_sync_inode (inode);
+		error = sync_inode_metadata(inode, 1);
 		/* In case sync failed due to ENOSPC the inode was actually
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef1c9a1..29e3f409bbd0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1198,3 +1198,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
+
+/**
+ * sync_inode - write an inode to disk
+ * @inode: the inode to sync
+ * @wait: wait for I/O to complete.
+ *
+ * Write an inode to disk and adjust it's dirty state after completion.
+ *
+ * Note: only writes the actual inode, no associated data or other metadata.
+ */
+int sync_inode_metadata(struct inode *inode, int wait)
+{
+	struct writeback_control wbc = {
+		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+		.nr_to_write = 0, /* metadata-only */
+	};
+
+	return sync_inode(inode, &wbc);
+}
+EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/libfs.c b/fs/libfs.c
index 62baa0387d6e..2dbf4877d7ef 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -892,10 +892,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
  */
 int generic_file_fsync(struct file *file, int datasync)
 {
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0, /* metadata-only; caller takes care of data */
-	};
 	struct inode *inode = file->f_mapping->host;
 	int err;
 	int ret;
@@ -906,7 +902,7 @@ int generic_file_fsync(struct file *file, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return ret;
 
-	err = sync_inode(inode, &wbc);
+	err = sync_inode_metadata(inode, 1);
 	if (ret == 0)
 		ret = err;
 	return ret;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 661a6cf8e826..184938fcff04 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
 {
 	struct inode *inode = fhp->fh_dentry->d_inode;
 	const struct export_operations *export_ops = inode->i_sb->s_export_op;
-	int error = 0;
 
 	if (!EX_ISSYNC(fhp->fh_export))
 		return 0;
 
-	if (export_ops->commit_metadata) {
-		error = export_ops->commit_metadata(inode);
-	} else {
-		struct writeback_control wbc = {
-			.sync_mode = WB_SYNC_ALL,
-			.nr_to_write = 0, /* metadata only */
-		};
-
-		error = sync_inode(inode, &wbc);
-	}
-
-	return error;
+	if (export_ops->commit_metadata)
+		return export_ops->commit_metadata(inode);
+	return sync_inode_metadata(inode, 1);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f34ff6e5558..0b03f490572f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1734,6 +1734,7 @@ static inline void file_accessed(struct file *file)
 }
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_metadata(struct inode *inode, int wait);
 
 struct file_system_type {
 	const char *name;
-- 
cgit v1.2.3


From 56b0dacfa2b8416815a2f2a5f4f51e46be4cf14c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Oct 2010 10:48:55 +0200
Subject: fs: mark destroy_inode static

Hugetlbfs used to need it, but after the destroy_inode and evict_inode
changes it's not required anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 2 +-
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 86464332e590..3368abd64bb5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -235,7 +235,7 @@ void __destroy_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(__destroy_inode);
 
-void destroy_inode(struct inode *inode)
+static void destroy_inode(struct inode *inode)
 {
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b03f490572f..0a5d83633884 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2187,7 +2187,6 @@ extern void unlock_new_inode(struct inode *);
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
-extern void destroy_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int should_remove_suid(struct dentry *);
-- 
cgit v1.2.3


From ebdec241d509cf69f6ebf1ecdc036359d3dbe154 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 6 Oct 2010 10:47:23 +0200
Subject: fs: kill block_prepare_write

__block_write_begin and block_prepare_write are identical except for slightly
different calling conventions.  Convert all callers to the __block_write_begin
calling conventions and drop block_prepare_write.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c                  | 17 +++++------------
 fs/ext3/inode.c              |  4 ++--
 fs/ext4/inode.c              | 11 +++++------
 fs/gfs2/aops.c               |  3 +--
 fs/gfs2/ops_inode.c          |  6 +++---
 fs/ocfs2/aops.c              | 19 ++-----------------
 fs/ocfs2/aops.h              |  3 ---
 fs/ocfs2/file.c              |  9 ++++-----
 fs/reiserfs/inode.c          | 24 +++++++++++-------------
 fs/reiserfs/ioctl.c          |  6 ++----
 fs/reiserfs/xattr.c          |  5 +----
 fs/xfs/linux-2.6/xfs_super.c |  2 +-
 include/linux/buffer_head.h  |  1 -
 include/linux/reiserfs_fs.h  |  2 ++
 14 files changed, 39 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 7f0b9b083f77..a7b8f3c59a4e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1834,9 +1834,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
 
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block)
 {
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
 	sector_t block;
@@ -1916,7 +1918,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
 	}
 	return err;
 }
-EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(__block_write_begin);
 
 static int __block_commit_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to)
@@ -1953,15 +1955,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 	return 0;
 }
 
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-		get_block_t *get_block)
-{
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
-
-	return block_prepare_write(page, start, start + len, get_block);
-}
-EXPORT_SYMBOL(__block_write_begin);
-
 /*
  * block_write_begin takes care of the basic task of block allocation and
  * bringing partial write blocks uptodate first.
@@ -2379,7 +2372,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 	else
 		end = PAGE_CACHE_SIZE;
 
-	ret = block_prepare_write(page, 0, end, get_block);
+	ret = __block_write_begin(page, 0, end, get_block);
 	if (!ret)
 		ret = block_commit_write(page, 0, end);
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5e0faf4cda79..ad05353040a1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page,
 		 * doesn't seem much point in redirtying the page here.
 		 */
 		ClearPageChecked(page);
-		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext3_get_block);
+		ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
+					  ext3_get_block);
 		if (ret != 0) {
 			ext3_journal_stop(handle);
 			goto out_unlock;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b8debeb3965..49635ef236f8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle,
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
 	/*
-	 * __block_prepare_write() could have dirtied some buffers. Clean
+	 * __block_write_begin() could have dirtied some buffers. Clean
 	 * the dirty bit as jbd2_journal_get_write_access() could complain
 	 * otherwise about fs integrity issues. Setting of the dirty bit
-	 * by __block_prepare_write() isn't a real problem here as we clear
+	 * by __block_write_begin() isn't a real problem here as we clear
 	 * the bit before releasing a page lock and thus writeback cannot
 	 * ever write the buffer.
 	 */
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 		if (buffer_delay(bh))
 			return 0; /* Not sure this could or should happen */
 		/*
-		 * XXX: __block_prepare_write() unmaps passed block,
-		 * is it OK?
+		 * XXX: __block_write_begin() unmaps passed block, is it OK?
 		 */
 		ret = ext4_da_reserve_space(inode, iblock);
 		if (ret)
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 /*
  * This function is used as a standard get_block_t calback function
  * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
  * These functions should only try to map a single block at a time.
  *
  * Since this function doesn't do block allocations even if the caller
@@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page,
 		 * all are mapped and non delay. We don't want to
 		 * do block allocation here.
 		 */
-		ret = block_prepare_write(page, 0, len,
+		ret = __block_write_begin(page, 0, len,
 					  noalloc_get_block_write);
 		if (!ret) {
 			page_bufs = page_buffers(page);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b24afb96aae..4f36f8832b9b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	struct gfs2_alloc *al = NULL;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-	unsigned to = from + len;
 	struct page *page;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	}
 
 prepare_write:
-	error = block_prepare_write(page, from, to, gfs2_block_map);
+	error = __block_write_begin(page, from, len, gfs2_block_map);
 out:
 	if (error == 0)
 		return 0;
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0534510200d5..48a274f1674c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 	int error;
 
 	if (!page_has_buffers(page)) {
-		error = block_prepare_write(page, from, to, gfs2_block_map);
+		error = __block_write_begin(page, from, to - from, gfs2_block_map);
 		if (unlikely(error))
 			return error;
 
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 		next += bh->b_size;
 		if (buffer_mapped(bh)) {
 			if (end) {
-				error = block_prepare_write(page, start, end,
+				error = __block_write_begin(page, start, end - start,
 							    gfs2_block_map);
 				if (unlikely(error))
 					return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 	} while (next < to);
 
 	if (end) {
-		error = block_prepare_write(page, start, end, gfs2_block_map);
+		error = __block_write_begin(page, start, end - start, gfs2_block_map);
 		if (unlikely(error))
 			return error;
 		empty_write_end(page, start, end);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee118158..f1e962cb3b73 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
 	 * ocfs2 never allocates in this function - the only time we
 	 * need to use BH_New is when we're extending i_size on a file
 	 * system which doesn't support holes, in which case BH_New
-	 * allows block_prepare_write() to zero.
+	 * allows __block_write_begin() to zero.
 	 *
 	 * If we see this on a sparse file system, then a truncate has
 	 * raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
 	return ret;
 }
 
-/*
- * This is called from ocfs2_write_zero_page() which has handled it's
- * own cluster locking and has ensured allocation exists for those
- * blocks to be written.
- */
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-			       unsigned from, unsigned to)
-{
-	int ret;
-
-	ret = block_prepare_write(page, from, to, ocfs2_get_block);
-
-	return ret;
-}
-
 /* Taken from ext3. We don't necessarily need the full blown
  * functionality yet, but IMHO it's better to cut and paste the whole
  * thing so we can avoid introducing our own bugs (and easily pick up
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
 }
 
 /*
- * Some of this taken from block_prepare_write(). We already have our
+ * Some of this taken from __block_write_begin(). We already have our
  * mapping by now though, and the entire write will be allocating or
  * it won't, so not much need to use BH_New.
  *
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7606f663da6d..76bfdfda691a 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-			       unsigned from, unsigned to);
-
 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
 							 unsigned from,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1ca6867935bb..77b4c04a2809 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 		block_end = block_start + (1 << inode->i_blkbits);
 
 		/*
-		 * block_start is block-aligned.  Bump it by one to
-		 * force ocfs2_{prepare,commit}_write() to zero the
+		 * block_start is block-aligned.  Bump it by one to force
+		 * __block_write_begin and block_commit_write to zero the
 		 * whole block.
 		 */
-		ret = ocfs2_prepare_write_nolock(inode, page,
-						 block_start + 1,
-						 block_start + 1);
+		ret = __block_write_begin(page, block_start + 1, 0,
+					  ocfs2_get_block);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out_unlock;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index caa758377d66..4dcb88046030 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -22,8 +22,6 @@
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 
 void reiserfs_evict_inode(struct inode *inode)
 {
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
 ** but tail is still sitting in a direct item, and we can't write to
 ** it.  So, look through this page, and check all the mapped buffers
 ** to make sure they have valid block numbers.  Any that don't need
-** to be unmapped, so that block_prepare_write will correctly call
+** to be unmapped, so that __block_write_begin will correctly call
 ** reiserfs_get_block to convert the tail into an unformatted node
 */
 static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
 }
 
 /* special version of get_block that is only used by grab_tail_page right
-** now.  It is sent to block_prepare_write, and when you try to get a
+** now.  It is sent to __block_write_begin, and when you try to get a
 ** block past the end of the file (or a block from a hole) it returns
-** -ENOENT instead of a valid buffer.  block_prepare_write expects to
+** -ENOENT instead of a valid buffer.  __block_write_begin expects to
 ** be able to do i/o on the buffers returned, unless an error value
 ** is also returned.
 **
-** So, this allows block_prepare_write to be used for reading a single block
+** So, this allows __block_write_begin to be used for reading a single block
 ** in a page.  Where it does not produce a valid page for holes, or past the
 ** end of the file.  This turns out to be exactly what we need for reading
 ** tails for conversion.
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode,
 	 **
 	 ** We must fix the tail page for writing because it might have buffers
 	 ** that are mapped, but have a block number of 0.  This indicates tail
-	 ** data that has been read directly into the page, and block_prepare_write
-	 ** won't trigger a get_block in this case.
+	 ** data that has been read directly into the page, and
+	 ** __block_write_begin won't trigger a get_block in this case.
 	 */
 	fix_tail_page_for_writing(tail_page);
-	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
+	retval = __reiserfs_write_begin(tail_page, tail_start,
+				      tail_end - tail_start);
 	if (retval)
 		goto unlock;
 
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode,
 	/* start within the page of the last block in the file */
 	start = (offset / blocksize) * blocksize;
 
-	error = block_prepare_write(page, start, offset,
+	error = __block_write_begin(page, start, offset - start,
 				    reiserfs_get_block_create_0);
 	if (error)
 		goto unlock;
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file,
 	return ret;
 }
 
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to)
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
 {
 	struct inode *inode = page->mapping->host;
 	int ret;
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
 		th->t_refcount++;
 	}
 
-	ret = block_prepare_write(page, from, to, reiserfs_get_block);
+	ret = __block_write_begin(page, from, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
 		/* this gets a little ugly.  If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 5cbb81e134ac..adf22b485cea 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 /*
 ** reiserfs_unpack
 ** Function try to convert tail from direct item into indirect.
@@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	}
 
 	/* we unpack by finding the page with the tail, and calling
-	 ** reiserfs_prepare_write on that page.  This will force a
+	 ** __reiserfs_write_begin on that page.  This will force a
 	 ** reiserfs_get_block to unpack the tail for us.
 	 */
 	index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	if (!page) {
 		goto out;
 	}
-	retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
+	retval = __reiserfs_write_begin(page, write_from, 0);
 	if (retval)
 		goto out_unlock;
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf273c672..f7415de13878 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,8 +418,6 @@ static inline __u32 xattr_hash(const char *msg, int len)
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 
 static void update_ctime(struct inode *inode)
 {
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 			rxh->h_hash = cpu_to_le32(xahash);
 		}
 
-		err = reiserfs_prepare_write(NULL, page, page_offset,
-					    page_offset + chunk + skip);
+		err = __reiserfs_write_begin(page, page_offset, chunk + skip);
 		if (!err) {
 			if (buffer)
 				memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ab31ce5aeaf9..cf808782c065 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -576,7 +576,7 @@ xfs_max_file_offset(
 
 	/* Figure out maximum filesize, on Linux this can depend on
 	 * the filesystem blocksize (on 32 bit platforms).
-	 * __block_prepare_write does this in an [unsigned] long...
+	 * __block_write_begin does this in an [unsigned] long...
 	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
 	 * So, for page sized blocks (4K on 32 bit platforms),
 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index dd1b25b2641c..68d1fe7b877c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -212,7 +212,6 @@ int generic_write_end(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page *, void *);
 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
-int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int cont_write_begin(struct file *, struct address_space *, loff_t,
 			unsigned, unsigned, struct page **, void **,
 			get_block_t *, loff_t *);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 91a4177e60ce..5ca47e59b727 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2072,6 +2072,8 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
 
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+
 /* namei.c */
 void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
 int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
-- 
cgit v1.2.3


From 7e360c38abe2c70eae3ba5a8a17f17671d8b77c5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 5 Oct 2010 09:32:55 +0200
Subject: fs: allow for more than 2^31 files

Andrew,

Could you please review this patch, you probably are the right guy to
take it, because it crosses fs and net trees.

Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt
depend on previous patch (sysctl: fix min/max handling in
__do_proc_doulongvec_minmax())

Thanks !

[PATCH V4] fs: allow for more than 2^31 files

Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :

<quote>

We were seeing a failure which prevented boot.  The kernel was incapable
of creating either a named pipe or unix domain socket.  This comes down
to a common kernel function called unix_create1() which does:

        atomic_inc(&unix_nr_socks);
        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;

The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().

        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = n;

In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000).  That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.

</quote>

Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of
atomic_t.

get_max_files() is changed to return an unsigned long.
get_nr_files() is changed to return a long.

unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.

Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968

After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704     0       2147483648

Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 17 +++++++----------
 include/linux/fs.h |  8 ++++----
 kernel/sysctl.c    |  6 +++---
 net/unix/af_unix.c | 14 +++++++-------
 4 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index a04bdd81c11c..c3dee381f1b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -60,7 +60,7 @@ static inline void file_free(struct file *f)
 /*
  * Return the total number of open files in the system
  */
-static int get_nr_files(void)
+static long get_nr_files(void)
 {
 	return percpu_counter_read_positive(&nr_files);
 }
@@ -68,7 +68,7 @@ static int get_nr_files(void)
 /*
  * Return the maximum number of open files in the system
  */
-int get_max_files(void)
+unsigned long get_max_files(void)
 {
 	return files_stat.max_files;
 }
@@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, buffer, lenp, ppos);
+	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #else
 int proc_nr_files(ctl_table *table, int write,
@@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
 struct file *get_empty_filp(void)
 {
 	const struct cred *cred = current_cred();
-	static int old_max;
+	static long old_max;
 	struct file * f;
 
 	/*
@@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
 over:
 	/* Ran out of filps - report that */
 	if (get_nr_files() > old_max) {
-		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					get_max_files());
+		pr_info("VFS: file-max limit %lu reached\n", get_max_files());
 		old_max = get_nr_files();
 	}
 	goto fail;
@@ -487,7 +486,7 @@ retry:
 
 void __init files_init(unsigned long mempages)
 { 
-	int n; 
+	unsigned long n;
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
 	 */ 
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
-	files_stat.max_files = n; 
-	if (files_stat.max_files < NR_FILE)
-		files_stat.max_files = NR_FILE;
+	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
 	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0a5d83633884..0cd6821013a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -34,9 +34,9 @@
 
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
-	int nr_files;		/* read only */
-	int nr_free_files;	/* read only */
-	int max_files;		/* tunable */
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
 };
 
 struct inodes_stat_t {
@@ -400,7 +400,7 @@ extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
 extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
+extern unsigned long get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3a45c224770f..694b140852c2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = {
 	{
 		.procname	= "file-nr",
 		.data		= &files_stat,
-		.maxlen		= 3*sizeof(int),
+		.maxlen		= sizeof(files_stat),
 		.mode		= 0444,
 		.proc_handler	= proc_nr_files,
 	},
 	{
 		.procname	= "file-max",
 		.data		= &files_stat.max_files,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(files_stat.max_files),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
 		.procname	= "nr_open",
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777a6660..3c95304a0817 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
 
 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 static DEFINE_SPINLOCK(unix_table_lock);
-static atomic_t unix_nr_socks = ATOMIC_INIT(0);
+static atomic_long_t unix_nr_socks;
 
 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
 
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
 	if (u->addr)
 		unix_release_addr(u->addr);
 
-	atomic_dec(&unix_nr_socks);
+	atomic_long_dec(&unix_nr_socks);
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
 #ifdef UNIX_REFCNT_DEBUG
-	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
-		atomic_read(&unix_nr_socks));
+	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
+		atomic_long_read(&unix_nr_socks));
 #endif
 }
 
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	struct sock *sk = NULL;
 	struct unix_sock *u;
 
-	atomic_inc(&unix_nr_socks);
-	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
+	atomic_long_inc(&unix_nr_socks);
+	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 		goto out;
 
 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	if (sk == NULL)
-		atomic_dec(&unix_nr_socks);
+		atomic_long_dec(&unix_nr_socks);
 	else {
 		local_bh_disable();
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-- 
cgit v1.2.3


From 4a3956c790290efeb647bbb0c3a90476bb57800e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 1 Oct 2010 14:20:22 -0700
Subject: vfs: introduce FMODE_UNSIGNED_OFFSET for allowing negative f_pos

Now, rw_verify_area() checsk f_pos is negative or not.  And if negative,
returns -EINVAL.

But, some special files as /dev/(k)mem and /proc/<pid>/mem etc..  has
negative offsets.  And we can't do any access via read/write to the
file(device).

So introduce FMODE_UNSIGNED_OFFSET to allow negative file offsets.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/mem.c |  4 ++++
 fs/proc/base.c     |  2 ++
 fs/read_write.c    | 28 ++++++++++++++++++++++++----
 include/linux/fs.h |  3 +++
 4 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index e985b1c2730e..1256454b2d43 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -876,6 +876,10 @@ static int memory_open(struct inode *inode, struct file *filp)
 	if (dev->dev_info)
 		filp->f_mapping->backing_dev_info = dev->dev_info;
 
+	/* Is /dev/mem or /dev/kmem ? */
+	if (dev->dev_info == &directly_mappable_cdev_bdi)
+		filp->f_mode |= FMODE_UNSIGNED_OFFSET;
+
 	if (dev->fops->open)
 		return dev->fops->open(inode, filp);
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc5d5f51f3fe..fb2a5abd4e4f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
 static int mem_open(struct inode* inode, struct file* file)
 {
 	file->private_data = (void*)((long)current->self_exec_id);
+	/* OK to pass negative loff_t, we can catch out-of-range */
+	file->f_mode |= FMODE_UNSIGNED_OFFSET;
 	return 0;
 }
 
diff --git a/fs/read_write.c b/fs/read_write.c
index e757ef26e4ce..9cd9d148105d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = {
 
 EXPORT_SYMBOL(generic_ro_fops);
 
+static int
+__negative_fpos_check(struct file *file, loff_t pos, size_t count)
+{
+	/*
+	 * pos or pos+count is negative here, check overflow.
+	 * too big "count" will be caught in rw_verify_area().
+	 */
+	if ((pos < 0) && (pos + count < pos))
+		return -EOVERFLOW;
+	if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+		return 0;
+	return -EINVAL;
+}
+
 /**
  * generic_file_llseek_unlocked - lockless generic llseek implementation
  * @file:	file structure to seek on
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
 		break;
 	}
 
-	if (offset < 0 || offset > inode->i_sb->s_maxbytes)
+	if (offset < 0 && __negative_fpos_check(file, offset, 0))
+		return -EINVAL;
+	if (offset > inode->i_sb->s_maxbytes)
 		return -EINVAL;
 
 	/* Special lock needed here? */
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
 			offset += file->f_pos;
 	}
 	retval = -EINVAL;
-	if (offset >= 0) {
+	if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
@@ -221,6 +237,7 @@ bad:
 }
 #endif
 
+
 /*
  * rw_verify_area doesn't like huge counts. We limit
  * them to something that fits in "int" so that others
@@ -238,8 +255,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 	if (unlikely((ssize_t) count < 0))
 		return retval;
 	pos = *ppos;
-	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
-		return retval;
+	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
+		retval = __negative_fpos_check(file, pos, count);
+		if (retval)
+			return retval;
+	}
 
 	if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 		retval = locks_mandatory_area(
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cd6821013a0..7fc126df1c42 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,6 +92,9 @@ struct inodes_stat_t {
 /* Expect random access pattern */
 #define FMODE_RANDOM		((__force fmode_t)0x1000)
 
+/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
+#define FMODE_UNSIGNED_OFFSET	((__force fmode_t)0x2000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
 
-- 
cgit v1.2.3


From a8dade34e3df581bc36ca2afe6e27055e178801c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Oct 2010 11:13:10 -0400
Subject: unexport invalidate_inodes

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 1 -
 fs/internal.h      | 5 +++++
 include/linux/fs.h | 1 -
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 4f0a67c54f89..db7c74c7dd80 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -417,7 +417,6 @@ int invalidate_inodes(struct super_block *sb)
 
 	return busy;
 }
-EXPORT_SYMBOL(invalidate_inodes);
 
 static int can_unuse(struct inode *inode)
 {
diff --git a/fs/internal.h b/fs/internal.h
index a6910e91cee8..f6dce46d80dc 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,8 @@ extern void put_super(struct super_block *sb);
 struct nameidata;
 extern struct file *nameidata_to_filp(struct nameidata *);
 extern void release_open_intent(struct nameidata *);
+
+/*
+ * inode.c
+ */
+extern int invalidate_inodes(struct super_block *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7fc126df1c42..c3f6daf749cc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2082,7 +2082,6 @@ extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 #endif
-extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-- 
cgit v1.2.3


From 1d3382cbf02986e4833849f528d451367ea0b4cb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Oct 2010 15:19:20 -0400
Subject: new helper: inode_unhashed()

note: for race-free uses you inode_lock held

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c    | 2 +-
 fs/fs-writeback.c   | 2 +-
 fs/inode.c          | 6 +++---
 fs/reiserfs/xattr.c | 2 +-
 include/linux/fs.h  | 5 +++++
 mm/shmem.c          | 4 ++--
 6 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af3..f6f2a0da2695 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3849,7 +3849,7 @@ again:
 	p = &root->inode_tree.rb_node;
 	parent = NULL;
 
-	if (hlist_unhashed(&inode->i_hash))
+	if (inode_unhashed(inode))
 		return;
 
 	spin_lock(&root->inode_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 29e3f409bbd0..39f44f2e709a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -962,7 +962,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * dirty list.  Add blockdev inodes as well.
 		 */
 		if (!S_ISBLK(inode->i_mode)) {
-			if (hlist_unhashed(&inode->i_hash))
+			if (inode_unhashed(inode))
 				goto out;
 		}
 		if (inode->i_state & I_FREEING)
diff --git a/fs/inode.c b/fs/inode.c
index db7c74c7dd80..4440cf1034ec 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1094,7 +1094,7 @@ int insert_inode_locked(struct inode *inode)
 		__iget(old);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
-		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
 			return -EBUSY;
 		}
@@ -1133,7 +1133,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		__iget(old);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
-		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
 			return -EBUSY;
 		}
@@ -1186,7 +1186,7 @@ EXPORT_SYMBOL(generic_delete_inode);
  */
 int generic_drop_inode(struct inode *inode)
 {
-	return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
+	return !inode->i_nlink || inode_unhashed(inode);
 }
 EXPORT_SYMBOL_GPL(generic_drop_inode);
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f7415de13878..5d04a7828e7a 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -422,7 +422,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 static void update_ctime(struct inode *inode)
 {
 	struct timespec now = current_fs_time(inode->i_sb);
-	if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+	if (inode_unhashed(inode) || !inode->i_nlink ||
 	    timespec_equal(&inode->i_ctime, &now))
 		return;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3f6daf749cc..78043da85e1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -786,6 +786,11 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 };
 
+static inline int inode_unhashed(struct inode *inode)
+{
+	return hlist_unhashed(&inode->i_hash);
+}
+
 /*
  * inode->i_mutex nesting subclasses for the lock validator:
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 080b09a57a8f..27a58120dbd5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2146,7 +2146,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 	if (*len < 3)
 		return 255;
 
-	if (hlist_unhashed(&inode->i_hash)) {
+	if (inode_unhashed(inode)) {
 		/* Unfortunately insert_inode_hash is not idempotent,
 		 * so as we hash inodes here rather than at creation
 		 * time, we need a lock to ensure we only try
@@ -2154,7 +2154,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 		 */
 		static DEFINE_SPINLOCK(lock);
 		spin_lock(&lock);
-		if (hlist_unhashed(&inode->i_hash))
+		if (inode_unhashed(inode))
 			__insert_inode_hash(inode,
 					    inode->i_ino + inode->i_generation);
 		spin_unlock(&lock);
-- 
cgit v1.2.3


From 756acc2d61712a8cafe2aa6ad626c60a185d3645 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Oct 2010 15:23:40 -0400
Subject: list.h: new helper - hlist_add_fake()

Make node look as if it was on hlist, with hlist_del()
working correctly.  Usable without any locking...

Convert a couple of places where we want to do that to
inode->i_hash.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/inode.c   | 2 +-
 fs/jfs/jfs_imap.c    | 2 +-
 include/linux/list.h | 6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 78449280dae0..8afd7e84f98d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 	 * appear hashed, but do not put on any lists.  hlist_del()
 	 * will work fine and require no locking.
 	 */
-	inode->i_hash.pprev = &inode->i_hash.next;
+	hlist_add_fake(&inode->i_hash);
 
 	mark_inode_dirty(inode);
 out:
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f8332dc8eeb2..3a09423b6c22 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	 * appear hashed, but do not put on any lists.  hlist_del()
 	 * will work fine and require no locking.
 	 */
-	ip->i_hash.pprev = &ip->i_hash.next;
+	hlist_add_fake(&ip->i_hash);
 
 	return (ip);
 }
diff --git a/include/linux/list.h b/include/linux/list.h
index 88a000617d77..9a5f8a71810c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -636,6 +636,12 @@ static inline void hlist_add_after(struct hlist_node *n,
 		next->next->pprev  = &next->next;
 }
 
+/* after that we'll appear to be on some hlist and hlist_del will work */
+static inline void hlist_add_fake(struct hlist_node *n)
+{
+	n->pprev = &n->next;
+}
+
 /*
  * Move a list from one list head to another. Fixup the pprev
  * reference of the first entry if it exists.
-- 
cgit v1.2.3


From cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Sat, 23 Oct 2010 05:03:02 -0400
Subject: fs: Convert nr_inodes and nr_unused to per-cpu counters

The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.

Based on a patch originally from Eric Dumazet.

[AV: cleaned up a bit, fixed build breakage on weird configs

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs-writeback.c  |  5 ++---
 fs/inode.c         | 64 ++++++++++++++++++++++++++++++++++++++----------------
 fs/internal.h      |  1 +
 include/linux/fs.h |  3 ++-
 kernel/sysctl.c    |  4 ++--
 5 files changed, 52 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 39f44f2e709a..f04d04af84f2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 	wb->last_old_flush = jiffies;
 	nr_pages = global_page_state(NR_FILE_DIRTY) +
 			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+			get_nr_dirty_inodes();
 
 	if (nr_pages) {
 		struct wb_writeback_work work = {
@@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb)
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	work.nr_pages = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+	work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
 
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
diff --git a/fs/inode.c b/fs/inode.c
index 4440cf1034ec..0d5aeccbdd90 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
 static struct kmem_cache *inode_cachep __read_mostly;
 
+static inline int get_nr_inodes(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+	int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+	return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#ifdef CONFIG_SYSCTL
+int proc_nr_inodes(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	inodes_stat.nr_inodes = get_nr_inodes();
+	inodes_stat.nr_unused = get_nr_inodes_unused();
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
 static void wake_up_inode(struct inode *inode)
 {
 	/*
@@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_fsnotify_mask = 0;
 #endif
 
+	percpu_counter_inc(&nr_inodes);
+
 	return 0;
 out:
 	return -ENOMEM;
@@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode)
 	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
 		posix_acl_release(inode->i_default_acl);
 #endif
+	percpu_counter_dec(&nr_inodes);
 }
 EXPORT_SYMBOL(__destroy_inode);
 
@@ -286,7 +322,7 @@ void __iget(struct inode *inode)
 
 	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 		list_move(&inode->i_list, &inode_in_use);
-	inodes_stat.nr_unused--;
+	percpu_counter_dec(&nr_inodes_unused);
 }
 
 void end_writeback(struct inode *inode)
@@ -327,8 +363,6 @@ static void evict(struct inode *inode)
  */
 static void dispose_list(struct list_head *head)
 {
-	int nr_disposed = 0;
-
 	while (!list_empty(head)) {
 		struct inode *inode;
 
@@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head)
 
 		wake_up_inode(inode);
 		destroy_inode(inode);
-		nr_disposed++;
 	}
-	spin_lock(&inode_lock);
-	inodes_stat.nr_inodes -= nr_disposed;
-	spin_unlock(&inode_lock);
 }
 
 /*
@@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head)
 static int invalidate_list(struct list_head *head, struct list_head *dispose)
 {
 	struct list_head *next;
-	int busy = 0, count = 0;
+	int busy = 0;
 
 	next = head->next;
 	for (;;) {
@@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 			list_move(&inode->i_list, dispose);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
-			count++;
+			percpu_counter_dec(&nr_inodes_unused);
 			continue;
 		}
 		busy = 1;
 	}
-	/* only unused inodes may be cached with i_count zero */
-	inodes_stat.nr_unused -= count;
 	return busy;
 }
 
@@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode)
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	int nr_scanned;
 	unsigned long reap = 0;
 
@@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan)
 		list_move(&inode->i_list, &freeable);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
-		nr_pruned++;
+		percpu_counter_dec(&nr_inodes_unused);
 	}
-	inodes_stat.nr_unused -= nr_pruned;
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
@@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 			return -1;
 		prune_icache(nr);
 	}
-	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -594,7 +620,6 @@ static inline void
 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
 			struct inode *inode)
 {
-	inodes_stat.nr_inodes++;
 	list_add(&inode->i_list, &inode_in_use);
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	if (head)
@@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode)
 	if (!drop) {
 		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
+		percpu_counter_inc(&nr_inodes_unused);
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
 			return;
@@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode)
 		spin_lock(&inode_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
+		percpu_counter_dec(&nr_inodes_unused);
 		hlist_del_init(&inode->i_hash);
 	}
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 	evict(inode);
 	spin_lock(&inode_lock);
@@ -1502,6 +1526,8 @@ void __init inode_init(void)
 					 SLAB_MEM_SPREAD),
 					 init_once);
 	register_shrinker(&icache_shrinker);
+	percpu_counter_init(&nr_inodes, 0);
+	percpu_counter_init(&nr_inodes_unused, 0);
 
 	/* Hash may have been set up in inode_init_early */
 	if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index f6dce46d80dc..4cc67eb6ed56 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *);
 /*
  * inode.c
  */
+extern int get_nr_dirty_inodes(void);
 extern int invalidate_inodes(struct super_block *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78043da85e1f..a3937a8ee95e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 694b140852c2..99a510cbfbb3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
 		.data		= &inodes_stat,
 		.maxlen		= 2*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_nr_inodes,
 	},
 	{
 		.procname	= "inode-state",
 		.data		= &inodes_stat,
 		.maxlen		= 7*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_nr_inodes,
 	},
 	{
 		.procname	= "file-nr",
-- 
cgit v1.2.3


From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 23 Oct 2010 06:55:17 -0400
Subject: fs: Implement lazy LRU updates for inodes

Convert the inode LRU to use lazy updates to reduce lock and
cacheline traffic.  We avoid moving inodes around in the LRU list
during iget/iput operations so these frequent operations don't need
to access the LRUs. Instead, we defer the refcount checks to
reclaim-time and use a per-inode state flag, I_REFERENCED, to tell
reclaim that iget has touched the inode in the past. This means that
only reclaim should be touching the LRU with any frequency, hence
significantly reducing lock acquisitions and the amount contention
on LRU updates.

This also removes the inode_in_use list, which means we now only
have one list for tracking the inode LRU status. This makes it much
simpler to split out the LRU list operations under it's own lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs-writeback.c         | 11 +++---
 fs/inode.c                | 86 +++++++++++++++++++++++++++++++++--------------
 include/linux/fs.h        | 13 +++----
 include/linux/writeback.h |  2 --
 4 files changed, 71 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f04d04af84f2..e8f65290e836 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -408,16 +408,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			 * completion.
 			 */
 			redirty_tail(inode);
-		} else if (atomic_read(&inode->i_count)) {
-			/*
-			 * The inode is clean, inuse
-			 */
-			list_move(&inode->i_list, &inode_in_use);
 		} else {
 			/*
-			 * The inode is clean, unused
+			 * The inode is clean.  At this point we either have
+			 * a reference to the inode or it's on it's way out.
+			 * No need to add it back to the LRU.
 			 */
-			list_move(&inode->i_list, &inode_unused);
+			list_del_init(&inode->i_list);
 		}
 	}
 	inode_sync_complete(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 0d5aeccbdd90..3bdc76f1653a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
  * allowing for low-overhead inode sync() operations.
  */
 
-LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_unused);
 static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
@@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
+	INIT_LIST_HEAD(&inode->i_list);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -317,12 +317,23 @@ static void init_once(void *foo)
  */
 void __iget(struct inode *inode)
 {
-	if (atomic_inc_return(&inode->i_count) != 1)
-		return;
+	atomic_inc(&inode->i_count);
+}
 
-	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-		list_move(&inode->i_list, &inode_in_use);
-	percpu_counter_dec(&nr_inodes_unused);
+static void inode_lru_list_add(struct inode *inode)
+{
+	if (list_empty(&inode->i_list)) {
+		list_add(&inode->i_list, &inode_unused);
+		percpu_counter_inc(&nr_inodes_unused);
+	}
+}
+
+static void inode_lru_list_del(struct inode *inode)
+{
+	if (!list_empty(&inode->i_list)) {
+		list_del_init(&inode->i_list);
+		percpu_counter_dec(&nr_inodes_unused);
+	}
 }
 
 void end_writeback(struct inode *inode)
@@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head)
 		struct inode *inode;
 
 		inode = list_first_entry(head, struct inode, i_list);
-		list_del(&inode->i_list);
+		list_del_init(&inode->i_list);
 
 		evict(inode);
 
@@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 			list_move(&inode->i_list, dispose);
 			WARN_ON(inode->i_state & I_NEW);
 			inode->i_state |= I_FREEING;
-			percpu_counter_dec(&nr_inodes_unused);
+			if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+				percpu_counter_dec(&nr_inodes_unused);
 			continue;
 		}
 		busy = 1;
@@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb)
 
 static int can_unuse(struct inode *inode)
 {
-	if (inode->i_state)
+	if (inode->i_state & ~I_REFERENCED)
 		return 0;
 	if (inode_has_buffers(inode))
 		return 0;
@@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode)
 }
 
 /*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
+ * temporary list and then are freed outside inode_lock by dispose_list().
  *
  * Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed.  We expect the final iput() on that inode to add it to
- * the front of the inode_unused list.  So look for it there and if the
- * inode is still freeable, proceed.  The right inode is found 99.9% of the
- * time in testing on a 4-way.
+ * pagecache removed.  If the inode has metadata buffers attached to
+ * mapping->private_list then try to remove them.
  *
- * If the inode has metadata buffers attached to mapping->private_list then
- * try to remove them.
+ * If the inode has the I_REFERENCED flag set, then it means that it has been
+ * used recently - the flag is set in iput_final(). When we encounter such an
+ * inode, clear the flag and move it to the back of the LRU so it gets another
+ * pass through the LRU before it gets reclaimed. This is necessary because of
+ * the fact we are doing lazy LRU updates to minimise lock contention so the
+ * LRU does not have strict ordering. Hence we don't want to reclaim inodes
+ * with this flag set because they are the inodes that are out of order.
  */
 static void prune_icache(int nr_to_scan)
 {
@@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan)
 
 		inode = list_entry(inode_unused.prev, struct inode, i_list);
 
-		if (inode->i_state || atomic_read(&inode->i_count)) {
+		/*
+		 * Referenced or dirty inodes are still in use. Give them
+		 * another pass through the LRU as we canot reclaim them now.
+		 */
+		if (atomic_read(&inode->i_count) ||
+		    (inode->i_state & ~I_REFERENCED)) {
+			list_del_init(&inode->i_list);
+			percpu_counter_dec(&nr_inodes_unused);
+			continue;
+		}
+
+		/* recently referenced inodes get one more pass */
+		if (inode->i_state & I_REFERENCED) {
 			list_move(&inode->i_list, &inode_unused);
+			inode->i_state &= ~I_REFERENCED;
 			continue;
 		}
 		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -620,7 +648,6 @@ static inline void
 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
 			struct inode *inode)
 {
-	list_add(&inode->i_list, &inode_in_use);
 	list_add(&inode->i_sb_list, &sb->s_inodes);
 	if (head)
 		hlist_add_head(&inode->i_hash, head);
@@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode)
 		drop = generic_drop_inode(inode);
 
 	if (!drop) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		percpu_counter_inc(&nr_inodes_unused);
 		if (sb->s_flags & MS_ACTIVE) {
+			inode->i_state |= I_REFERENCED;
+			if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+				inode_lru_list_add(inode);
+			}
 			spin_unlock(&inode_lock);
 			return;
 		}
@@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode)
 		spin_lock(&inode_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
-		percpu_counter_dec(&nr_inodes_unused);
 		hlist_del_init(&inode->i_hash);
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
+
+	/*
+	 * After we delete the inode from the LRU here, we avoid moving dirty
+	 * inodes back onto the LRU now because I_FREEING is set and hence
+	 * writeback_single_inode() won't move the inode around.
+	 */
+	inode_lru_list_del(inode);
+
+	list_del_init(&inode->i_sb_list);
 	spin_unlock(&inode_lock);
 	evict(inode);
 	spin_lock(&inode_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3937a8ee95e..876275fc0638 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1641,16 +1641,17 @@ struct super_operations {
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
-#define I_DIRTY_SYNC		1
-#define I_DIRTY_DATASYNC	2
-#define I_DIRTY_PAGES		4
+#define I_DIRTY_SYNC		(1 << 0)
+#define I_DIRTY_DATASYNC	(1 << 1)
+#define I_DIRTY_PAGES		(1 << 2)
 #define __I_NEW			3
 #define I_NEW			(1 << __I_NEW)
-#define I_WILL_FREE		16
-#define I_FREEING		32
-#define I_CLEAR			64
+#define I_WILL_FREE		(1 << 4)
+#define I_FREEING		(1 << 5)
+#define I_CLEAR			(1 << 6)
 #define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
+#define I_REFERENCED		(1 << 8)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..242b6f812ba6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,8 +10,6 @@
 struct backing_dev_info;
 
 extern spinlock_t inode_lock;
-extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
 
 /*
  * fs/fs-writeback.c
-- 
cgit v1.2.3


From 646ec4615cd05972581c9c5342ed7a1e77df17bb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 23 Oct 2010 07:15:32 -0400
Subject: fs: remove inode_add_to_list/__inode_add_to_list

Split up inode_add_to_list/__inode_add_to_list.  Locking for the two
lists will be split soon so these helpers really don't buy us much
anymore.

The __ prefixes for the sb list helpers will go away soon, but until
inode_lock is gone we'll need them to distinguish between the locked
and unlocked variants.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c                  | 70 +++++++++++++++++++++------------------------
 fs/xfs/linux-2.6/xfs_iops.c |  4 ++-
 include/linux/fs.h          |  5 ++--
 3 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 78c41c626cdc..430d70f2abe7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -336,6 +336,28 @@ static void inode_lru_list_del(struct inode *inode)
 	}
 }
 
+static inline void __inode_sb_list_add(struct inode *inode)
+{
+	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
+}
+
+/**
+ * inode_sb_list_add - add inode to the superblock list of inodes
+ * @inode: inode to add
+ */
+void inode_sb_list_add(struct inode *inode)
+{
+	spin_lock(&inode_lock);
+	__inode_sb_list_add(inode);
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_sb_list_add);
+
+static inline void __inode_sb_list_del(struct inode *inode)
+{
+	list_del_init(&inode->i_sb_list);
+}
+
 static unsigned long hash(struct super_block *sb, unsigned long hashval)
 {
 	unsigned long tmp;
@@ -356,9 +378,10 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
  */
 void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 {
-	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
+	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
+
 	spin_lock(&inode_lock);
-	hlist_add_head(&inode->i_hash, head);
+	hlist_add_head(&inode->i_hash, b);
 	spin_unlock(&inode_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
@@ -436,7 +459,7 @@ static void dispose_list(struct list_head *head)
 
 		spin_lock(&inode_lock);
 		__remove_inode_hash(inode);
-		list_del_init(&inode->i_sb_list);
+		__inode_sb_list_del(inode);
 		spin_unlock(&inode_lock);
 
 		wake_up_inode(inode);
@@ -685,37 +708,6 @@ repeat:
 	return NULL;
 }
 
-static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
-			struct inode *inode)
-{
-	list_add(&inode->i_sb_list, &sb->s_inodes);
-	if (head)
-		hlist_add_head(&inode->i_hash, head);
-}
-
-/**
- * inode_add_to_lists - add a new inode to relevant lists
- * @sb: superblock inode belongs to
- * @inode: inode to mark in use
- *
- * When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
- * which requires the caller to have already set up the inode number in the
- * inode to add.
- */
-void inode_add_to_lists(struct super_block *sb, struct inode *inode)
-{
-	struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
-
-	spin_lock(&inode_lock);
-	__inode_add_to_lists(sb, head, inode);
-	spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL_GPL(inode_add_to_lists);
-
 /**
  *	new_inode 	- obtain an inode
  *	@sb: superblock
@@ -743,7 +735,7 @@ struct inode *new_inode(struct super_block *sb)
 	inode = alloc_inode(sb);
 	if (inode) {
 		spin_lock(&inode_lock);
-		__inode_add_to_lists(sb, NULL, inode);
+		__inode_sb_list_add(inode);
 		inode->i_ino = ++last_ino;
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
@@ -812,7 +804,8 @@ static struct inode *get_new_inode(struct super_block *sb,
 			if (set(inode, data))
 				goto set_failed;
 
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			__inode_sb_list_add(inode);
 			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -858,7 +851,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
 			inode->i_ino = ino;
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			__inode_sb_list_add(inode);
 			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -1318,7 +1312,7 @@ static void iput_final(struct inode *inode)
 	 */
 	inode_lru_list_del(inode);
 
-	list_del_init(&inode->i_sb_list);
+	__inode_sb_list_del(inode);
 	spin_unlock(&inode_lock);
 	evict(inode);
 	remove_inode_hash(inode);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index ec858e09d546..71d83c93621c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -760,7 +760,9 @@ xfs_setup_inode(
 
 	inode->i_ino = ip->i_ino;
 	inode->i_state = I_NEW;
-	inode_add_to_lists(ip->i_mount->m_super, inode);
+
+	inode_sb_list_add(inode);
+	insert_inode_hash(inode);
 
 	inode->i_mode	= ip->i_d.di_mode;
 	inode->i_nlink	= ip->i_d.di_nlink;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 876275fc0638..d43e8b6685a2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2171,7 +2171,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
-extern void inode_add_to_lists(struct super_block *, struct inode *);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
@@ -2202,9 +2201,11 @@ extern int file_remove_suid(struct file *);
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void remove_inode_hash(struct inode *);
-static inline void insert_inode_hash(struct inode *inode) {
+static inline void insert_inode_hash(struct inode *inode)
+{
 	__insert_inode_hash(inode, inode->i_ino);
 }
+extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
 extern void submit_bio(int, struct bio *);
-- 
cgit v1.2.3


From 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Oct 2010 11:11:40 -0400
Subject: new helper: ihold()

Clones an existing reference to inode; caller must already hold one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c           | 5 +++--
 fs/affs/inode.c             | 2 +-
 fs/afs/dir.c                | 2 +-
 fs/aio.c                    | 5 ++---
 fs/anon_inodes.c            | 5 ++---
 fs/bfs/dir.c                | 2 +-
 fs/block_dev.c              | 8 ++++----
 fs/btrfs/inode.c            | 2 +-
 fs/coda/dir.c               | 2 +-
 fs/exofs/namei.c            | 2 +-
 fs/ext2/namei.c             | 2 +-
 fs/ext3/namei.c             | 2 +-
 fs/ext4/namei.c             | 2 +-
 fs/gfs2/ops_inode.c         | 2 +-
 fs/hfsplus/dir.c            | 2 +-
 fs/inode.c                  | 9 +++++++++
 fs/jffs2/dir.c              | 4 ++--
 fs/jfs/jfs_txnmgr.c         | 2 +-
 fs/jfs/namei.c              | 2 +-
 fs/libfs.c                  | 2 +-
 fs/logfs/dir.c              | 2 +-
 fs/minix/namei.c            | 2 +-
 fs/namei.c                  | 2 +-
 fs/nfs/dir.c                | 2 +-
 fs/nfs/getroot.c            | 3 +--
 fs/nilfs2/namei.c           | 2 +-
 fs/ntfs/super.c             | 4 ++--
 fs/ocfs2/namei.c            | 2 +-
 fs/reiserfs/namei.c         | 2 +-
 fs/sysv/namei.c             | 2 +-
 fs/ubifs/dir.c              | 2 +-
 fs/udf/namei.c              | 2 +-
 fs/ufs/namei.c              | 2 +-
 fs/xfs/linux-2.6/xfs_iops.c | 2 +-
 fs/xfs/xfs_inode.h          | 2 +-
 include/linux/fs.h          | 1 +
 ipc/mqueue.c                | 2 +-
 kernel/futex.c              | 2 +-
 mm/shmem.c                  | 2 +-
 net/socket.c                | 2 +-
 40 files changed, 57 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e670d527646..ef5905f7c8a3 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 		kfree(st);
 	} else {
 		/* Caching disabled. No need to get upto date stat info.
-		 * This dentry will be released immediately. So, just i_count++
+		 * This dentry will be released immediately. So, just hold the
+		 * inode
 		 */
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 	}
 
 	dentry->d_op = old_dentry->d_op;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3a0fdec175ba..5d828903ac69 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
 		affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
 		mark_buffer_dirty_inode(inode_bh, inode);
 		inode->i_nlink = 2;
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 	}
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0d38c09bd55e..5439e1bc9a86 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	if (ret < 0)
 		goto link_error;
 
-	atomic_inc(&vnode->vfs_inode.i_count);
+	ihold(&vnode->vfs_inode);
 	d_instantiate(dentry, &vnode->vfs_inode);
 	key_put(key);
 	_leave(" = 0");
diff --git a/fs/aio.c b/fs/aio.c
index 9e319a04780e..8c8f6c5b6d79 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1553,10 +1553,9 @@ static void aio_batch_add(struct address_space *mapping,
 	 *
 	 * When we're called, we always have a reference
 	 * on the file, so we must always have a reference
-	 * on the inode, so igrab must always just
-	 * bump the count and move on.
+	 * on the inode, so ihold() is safe here.
 	 */
-	atomic_inc(&mapping->host->i_count);
+	ihold(mapping->host);
 	abe->mapping = mapping;
 	hlist_add_head(&abe->list, &batch_hash[bucket]);
 	return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda83..9c8e87b0361f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name,
 	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
-	 * so we can avoid doing an igrab() and we can use an open-coded
-	 * atomic_inc().
+	 * so ihold() is safe.
 	 */
-	atomic_inc(&anon_inode_inode->i_count);
+	ihold(anon_inode_inode);
 
 	path.dentry->d_op = &anon_inodefs_dentry_operations;
 	d_instantiate(path.dentry, anon_inode_inode);
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d967e052b779..685ecff3ab31 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
 	inc_nlink(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(new, inode);
 	mutex_unlock(&info->bfs_lock);
 	return 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b737451e2e9d..81972eb34b39 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(bdget);
  */
 struct block_device *bdgrab(struct block_device *bdev)
 {
-	atomic_inc(&bdev->bd_inode->i_count);
+	ihold(bdev->bd_inode);
 	return bdev;
 }
 
@@ -580,7 +580,7 @@ static struct block_device *bd_acquire(struct inode *inode)
 	spin_lock(&bdev_lock);
 	bdev = inode->i_bdev;
 	if (bdev) {
-		atomic_inc(&bdev->bd_inode->i_count);
+		ihold(bdev->bd_inode);
 		spin_unlock(&bdev_lock);
 		return bdev;
 	}
@@ -591,12 +591,12 @@ static struct block_device *bd_acquire(struct inode *inode)
 		spin_lock(&bdev_lock);
 		if (!inode->i_bdev) {
 			/*
-			 * We take an additional bd_inode->i_count for inode,
+			 * We take an additional reference to bd_inode,
 			 * and it's released in clear_inode() of inode.
 			 * So, we can access it via ->i_mapping always
 			 * without igrab().
 			 */
-			atomic_inc(&bdev->bd_inode->i_count);
+			ihold(bdev->bd_inode);
 			inode->i_bdev = bdev;
 			inode->i_mapping = bdev->bd_inode->i_mapping;
 			list_add(&inode->i_devices, &bdev->bd_inodes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6f2a0da2695..64f99cf69ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	btrfs_set_trans_block_group(trans, dir);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 96fbeab77f2f..5d8b35539601 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 	}
 
 	coda_dir_update_mtime(dir_inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(de, inode);
 	inc_nlink(inode);
 	return 0;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c236863..264e95d02830 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	return exofs_add_nondir(dentry, inode);
 }
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e9a3f2..f8aecd2e3297 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext2_add_link(dentry, inode);
 	if (!err) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2b35ddb70d65..bce9dce639b8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext3_add_entry(handle, dentry, inode);
 	if (!err) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 314c0d3b3fa9..bd39885b5998 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2312,7 +2312,7 @@ retry:
 
 	inode->i_ctime = ext4_current_time(inode);
 	ext4_inc_count(handle, inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 48a274f1674c..12cbea7502c2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
 	if (!error) {
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		d_instantiate(dentry, inode);
 		mark_inode_dirty(inode);
 	}
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d236d85ec9d7..e318bbc0daf6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 
 	inc_nlink(inode);
 	hfsplus_instantiate(dst_dentry, inode, cnid);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 	sbi->file_count++;
diff --git a/fs/inode.c b/fs/inode.c
index 430d70f2abe7..05ea293d5f32 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -320,6 +320,15 @@ void __iget(struct inode *inode)
 	atomic_inc(&inode->i_count);
 }
 
+/*
+ * get additional reference to inode; caller must already hold one.
+ */
+void ihold(struct inode *inode)
+{
+	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
+}
+EXPORT_SYMBOL(ihold);
+
 static void inode_lru_list_add(struct inode *inode)
 {
 	if (list_empty(&inode->i_list)) {
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ed78a3cf3cb0..79121aa5858b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 		mutex_unlock(&f->sem);
 		d_instantiate(dentry, old_dentry->d_inode);
 		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 	}
 	return ret;
 }
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 		printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
 		/* Might as well let the VFS know */
 		d_instantiate(new_dentry, old_dentry->d_inode);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d945ea76b445..9466957ec841 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	 * lazy commit thread finishes processing
 	 */
 	if (tblk->xflag & COMMIT_DELETE) {
-		atomic_inc(&tblk->u.ip->i_count);
+		ihold(tblk->u.ip);
 		/*
 		 * Avoid a rare deadlock
 		 *
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a9cf8e8675be..231ca4af9bce 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
 	ip->i_ctime = CURRENT_TIME;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	mark_inode_dirty(dir);
-	atomic_inc(&ip->i_count);
+	ihold(ip);
 
 	iplist[0] = ip;
 	iplist[1] = dir;
diff --git a/fs/libfs.c b/fs/libfs.c
index 2dbf4877d7ef..304a5132ca27 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	dget(dentry);
 	d_instantiate(dentry, inode);
 	return 0;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1eb4e89e045b..409dfd65e9a1 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
 		return -EMLINK;
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_nlink++;
 	mark_inode_dirty_sync(inode);
 
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f3f3578393a4..c0d35a3accef 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	return add_nondir(dentry, inode);
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index f1ef97dbc6c4..f7dbc06857ab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2285,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 			goto slashes;
 		inode = dentry->d_inode;
 		if (inode)
-			atomic_inc(&inode->i_count);
+			ihold(inode);
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e257172d438c..0fac7fea18ef 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1580,7 +1580,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		d_add(dentry, inode);
 	}
 	return error;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 			iput(inode);
 			return -ENOMEM;
 		}
-		/* Circumvent igrab(): we know the inode is not being freed */
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
 		 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 185d1607cb00..6e9557ecf161 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = nilfs_add_nondir(dentry, inode);
 	if (!err)
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d4dec2d32117..d3fbe5730bfc 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		goto unl_upcase_iput_tmp_ino_err_out_now;
 	}
 	if ((sb->s_root = d_alloc_root(vol->root_ino))) {
-		/* We increment i_count simulating an ntfs_iget(). */
-		atomic_inc(&vol->root_ino->i_count);
+		/* We grab a reference, simulating an ntfs_iget(). */
+		ihold(vol->root_ino);
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
 		mutex_lock(&ntfs_lock);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e7bde21149ae..ff5744e1e36f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_commit;
 	}
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ee78d4a0086a..ba5f51ec3458 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	inode->i_ctime = CURRENT_TIME_SEC;
 	reiserfs_update_sd(&th, inode);
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 	reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 33e047b59b8d..11e7f7d11cd0 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	return add_nondir(dentry, inode);
 }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce72213..14f64b689d7f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 
 	lock_2_inodes(dir, inode);
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_ctime = ubifs_current_time(inode);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bf5fc674193c..6d8dc02baebb 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	inc_nlink(inode);
 	inode->i_ctime = current_fs_time(inode->i_sb);
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	unlock_kernel();
 
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b056f02b1fb3..12f39b9e4437 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	error = ufs_add_nondir(dentry, inode);
 	unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 71d83c93621c..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
 	if (unlikely(error))
 		return -error;
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	return 0;
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fac52290de90..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void		xfs_mark_inode_dirty_sync(xfs_inode_t *);
 #define IHOLD(ip) \
 do { \
 	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-	atomic_inc(&(VFS_I(ip)->i_count)); \
+	ihold(VFS_I(ip)); \
 	trace_xfs_ihold(ip, _THIS_IP_); \
 } while (0)
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d43e8b6685a2..bd6ae6c71fc8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2171,6 +2171,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
+extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e1e7b9635f5d..80b35ffca25d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -769,7 +769,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
 
 	inode = dentry->d_inode;
 	if (inode)
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 	err = mnt_want_write(ipc_ns->mq_mnt);
 	if (err)
 		goto out_err;
diff --git a/kernel/futex.c b/kernel/futex.c
index a118bf160e0b..6c683b37f2ce 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		atomic_inc(&key->shared.inode->i_count);
+		ihold(key->shared.inode);
 		break;
 	case FUT_OFF_MMSHARED:
 		atomic_inc(&key->private.mm->mm_count);
diff --git a/mm/shmem.c b/mm/shmem.c
index 27a58120dbd5..d4e2852526e6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1903,7 +1903,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);	/* New dentry reference */
+	ihold(inode);	/* New dentry reference */
 	dget(dentry);		/* Extra pinning count for the created dentry */
 	d_instantiate(dentry, inode);
 out:
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..d223725f99e5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 		  &socket_file_ops);
 	if (unlikely(!file)) {
 		/* drop dentry, keep inode */
-		atomic_inc(&path.dentry->d_inode->i_count);
+		ihold(path.dentry->d_inode);
 		path_put(&path);
 		put_unused_fd(fd);
 		return -ENFILE;
-- 
cgit v1.2.3


From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 23 Oct 2010 11:19:54 -0400
Subject: fs: do not assign default i_ino in new_inode

Instead of always assigning an increasing inode number in new_inode
move the call to assign it into those callers that actually need it.
For now callers that need it is estimated conservatively, that is
the call is added to all filesystems that do not assign an i_ino
by themselves.  For a few more filesystems we can avoid assigning
any inode number given that they aren't user visible, and for others
it could be done lazily when an inode number is actually needed,
but that's left for later patches.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/infiniband/hw/ipath/ipath_fs.c | 1 +
 drivers/infiniband/hw/qib/qib_fs.c     | 1 +
 drivers/misc/ibmasm/ibmasmfs.c         | 1 +
 drivers/oprofile/oprofilefs.c          | 1 +
 drivers/usb/core/inode.c               | 1 +
 drivers/usb/gadget/f_fs.c              | 1 +
 drivers/usb/gadget/inode.c             | 1 +
 fs/anon_inodes.c                       | 1 +
 fs/autofs4/inode.c                     | 1 +
 fs/binfmt_misc.c                       | 1 +
 fs/configfs/inode.c                    | 1 +
 fs/debugfs/inode.c                     | 1 +
 fs/ext4/mballoc.c                      | 1 +
 fs/freevxfs/vxfs_inode.c               | 1 +
 fs/fuse/control.c                      | 1 +
 fs/hugetlbfs/inode.c                   | 1 +
 fs/inode.c                             | 4 ++--
 fs/ocfs2/dlmfs/dlmfs.c                 | 2 ++
 fs/pipe.c                              | 2 ++
 fs/proc/base.c                         | 2 ++
 fs/proc/proc_sysctl.c                  | 2 ++
 fs/ramfs/inode.c                       | 1 +
 fs/xfs/linux-2.6/xfs_buf.c             | 1 +
 include/linux/fs.h                     | 1 +
 ipc/mqueue.c                           | 1 +
 kernel/cgroup.c                        | 1 +
 mm/shmem.c                             | 1 +
 net/socket.c                           | 1 +
 net/sunrpc/rpc_pipe.c                  | 1 +
 security/inode.c                       | 1 +
 security/selinux/selinuxfs.c           | 1 +
 31 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index d13e72685dcf..12d5bf76302c 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
 		goto bail;
 	}
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	inode->i_private = data;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a0e6613e8be6..7e433d75c775 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
 		goto bail;
 	}
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_uid = 0;
 	inode->i_gid = 0;
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index af2497ae5fe3..0a53500636c9 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
 	struct inode *ret = new_inode(sb);
 
 	if (ret) {
+		ret->i_ino = get_next_ino();
 		ret->i_mode = mode;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 95f711b251ad..449de59bf35b 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	}
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 095fa5366690..e2f63c0ea09d 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index e4f595055208..e093fd8d04d3 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
 	if (likely(inode)) {
 		struct timespec current_time = CURRENT_TIME;
 
+		inode->i_ino	 = usbfs_get_inode();
 		inode->i_mode    = perms->mode;
 		inode->i_uid     = perms->uid;
 		inode->i_gid     = perms->gid;
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index d1d72d946b04..ba145e7fbe03 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb,
 	struct inode *inode = new_inode (sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = default_uid;
 		inode->i_gid = default_gid;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9c8e87b0361f..5365527ca43f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -193,6 +193,7 @@ static struct inode *anon_inode_mkinode(void)
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
+	inode->i_ino = get_next_ino();
 	inode->i_fop = &anon_inode_fops;
 
 	inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 821b2b955dac..ac87e49fa706 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
 		inode->i_gid = sb->s_root->d_inode->i_gid;
 	}
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = get_next_ino();
 
 	if (S_ISDIR(inf->mode)) {
 		inode->i_nlink = 2;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 139fc8083f53..29990f0eee0c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 	struct inode * inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime =
 			current_fs_time(inode->i_sb);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44a8d6a..253476d78ed8 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mapping->a_ops = &configfs_aops;
 		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
 		inode->i_op = &configfs_inode_operations;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30a87b3dbcac..a4ed8380e98a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 19aa0d44d822..42f77b1dc72d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		printk(KERN_ERR "EXT4-fs: can't get new inode\n");
 		goto err_freesgi;
 	}
+	sbi->s_buddy_cache->i_ino = get_next_ino();
 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
 	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 79d1b4ea13e7..8c04eac5079d 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
 	struct inode			*ip = NULL;
 
 	if ((ip = new_inode(sbp))) {
+		ip->i_ino = get_next_ino();
 		vxfs_iinit(ip, vip);
 		ip->i_mapping->a_ops = &vxfs_aops;
 	}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 7367e177186f..4eba07661e5c 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 	if (!inode)
 		return NULL;
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_uid = fc->user_id;
 	inode->i_gid = fc->group_id;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 113eba3d3c38..8d0607b37266 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -455,6 +455,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 	inode = new_inode(sb);
 	if (inode) {
 		struct hugetlbfs_inode_info *info;
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = uid;
 		inode->i_gid = gid;
diff --git a/fs/inode.c b/fs/inode.c
index 46a3e120b196..2cd2e48f7a20 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -735,7 +735,7 @@ repeat:
 #define LAST_INO_BATCH 1024
 static DEFINE_PER_CPU(unsigned int, last_ino);
 
-static unsigned int get_next_ino(void)
+unsigned int get_next_ino(void)
 {
 	unsigned int *p = &get_cpu_var(last_ino);
 	unsigned int res = *p;
@@ -753,6 +753,7 @@ static unsigned int get_next_ino(void)
 	put_cpu_var(last_ino);
 	return res;
 }
+EXPORT_SYMBOL(get_next_ino);
 
 /**
  *	new_inode 	- obtain an inode
@@ -776,7 +777,6 @@ struct inode *new_inode(struct super_block *sb)
 	if (inode) {
 		spin_lock(&inode_lock);
 		__inode_sb_list_add(inode);
-		inode->i_ino = get_next_ino();
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
 	}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a7ebd9d42dc8..75e115f1bd73 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 	if (inode) {
 		ip = DLMFS_I(inode);
 
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 	if (!inode)
 		return NULL;
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
diff --git a/fs/pipe.c b/fs/pipe.c
index 37eb1ebeaa90..d2d7566ce68e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
 	if (!inode)
 		goto fail_inode;
 
+	inode->i_ino = get_next_ino();
+
 	pipe = alloc_pipe_info(inode);
 	if (!pipe)
 		goto fail_iput;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb2a5abd4e4f..9883f1e18332 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1603,6 +1603,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 
 	/* Common stuff */
 	ei = PROC_I(inode);
+	inode->i_ino = get_next_ino();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &proc_def_inode_operations;
 
@@ -2549,6 +2550,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 
 	/* Initialize the inode */
 	ei = PROC_I(inode);
+	inode->i_ino = get_next_ino();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 
 	/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2fc52552271d..b652cb00906b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	if (!inode)
 		goto out;
 
+	inode->i_ino = get_next_ino();
+
 	sysctl_head_get(head);
 	ei = PROC_I(inode);
 	ei->sysctl = head;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae70dc6d..67fadb1ad2c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 	struct inode * inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba5312802aa9..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1580,6 +1580,7 @@ xfs_mapping_buftarg(
 			XFS_BUFTARG_NAME(btp));
 		return ENOMEM;
 	}
+	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFBLK;
 	inode->i_bdev = bdev;
 	inode->i_rdev = bdev->bd_dev;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd6ae6c71fc8..4a573cf13f51 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2191,6 +2191,7 @@ extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
+extern unsigned int get_next_ino(void);
 
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 80b35ffca25d..3a61ffefe884 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 
 	inode = new_inode(sb);
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7b69b8d0313d..9270d532ec3c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
diff --git a/mm/shmem.c b/mm/shmem.c
index d4e2852526e6..f6d350e8adc5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 
 	inode = new_inode(sb);
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		inode->i_blocks = 0;
 		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
diff --git a/net/socket.c b/net/socket.c
index d223725f99e5..5cac1c707755 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -480,6 +480,7 @@ static struct socket *sock_alloc(void)
 	sock = SOCKET_I(inode);
 
 	kmemcheck_annotate_bitfield(sock, type);
+	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 52f252432144..7df92d237cb8 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
 	struct inode *inode = new_inode(sb);
 	if (!inode)
 		return NULL;
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	switch(mode & S_IFMT) {
diff --git a/security/inode.c b/security/inode.c
index 88839866cbcd..cb8f47c66a58 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 87e0556bae70..55a755c1a1bd 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
 	struct inode *ret = new_inode(sb);
 
 	if (ret) {
+		ret->i_ino = get_next_ino();
 		ret->i_mode = mode;
 		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
 	}
-- 
cgit v1.2.3


From 312d3ca856d369bb04d0443846b85b4cdde6fa8a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 10 Oct 2010 05:36:23 -0400
Subject: fs: use percpu counter for nr_dentry and nr_dentry_unused

The nr_dentry stat is a globally touched cacheline and atomic operation
twice over the lifetime of a dentry. It is used for the benfit of userspace
only. Turn it into a per-cpu counter and always decrement it in d_free instead
of doing various batching operations to reduce lock hold times in the callers.

Based on an earlier patch from Nick Piggin <npiggin@suse.de>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c        | 51 ++++++++++++++++++++++++++++++++-------------------
 include/linux/fs.h |  2 ++
 kernel/sysctl.c    |  2 +-
 3 files changed, 35 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 028753951e95..c37a656802b0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,6 +67,19 @@ struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
 };
 
+static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
+static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
+		   size_t *lenp, loff_t *ppos)
+{
+	dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
+	dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
 static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -78,13 +91,14 @@ static void __d_free(struct rcu_head *head)
 }
 
 /*
- * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no dcache_lock, please.
  */
 static void d_free(struct dentry *dentry)
 {
+	percpu_counter_dec(&nr_dentry);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
+
 	/* if dentry was never inserted into hash, immediate free is OK */
 	if (hlist_unhashed(&dentry->d_hash))
 		__d_free(&dentry->d_u.d_rcu);
@@ -125,14 +139,14 @@ static void dentry_lru_add(struct dentry *dentry)
 {
 	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 	dentry->d_sb->s_nr_dentry_unused++;
-	dentry_stat.nr_unused++;
+	percpu_counter_inc(&nr_dentry_unused);
 }
 
 static void dentry_lru_add_tail(struct dentry *dentry)
 {
 	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 	dentry->d_sb->s_nr_dentry_unused++;
-	dentry_stat.nr_unused++;
+	percpu_counter_inc(&nr_dentry_unused);
 }
 
 static void dentry_lru_del(struct dentry *dentry)
@@ -140,7 +154,7 @@ static void dentry_lru_del(struct dentry *dentry)
 	if (!list_empty(&dentry->d_lru)) {
 		list_del(&dentry->d_lru);
 		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+		percpu_counter_dec(&nr_dentry_unused);
 	}
 }
 
@@ -149,7 +163,7 @@ static void dentry_lru_del_init(struct dentry *dentry)
 	if (likely(!list_empty(&dentry->d_lru))) {
 		list_del_init(&dentry->d_lru);
 		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+		percpu_counter_dec(&nr_dentry_unused);
 	}
 }
 
@@ -168,7 +182,6 @@ static struct dentry *d_kill(struct dentry *dentry)
 	struct dentry *parent;
 
 	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
 	/*drops the locks, at that point nobody can reach this dentry */
 	dentry_iput(dentry);
 	if (IS_ROOT(dentry))
@@ -314,7 +327,6 @@ int d_invalidate(struct dentry * dentry)
 EXPORT_SYMBOL(d_invalidate);
 
 /* This should be called _only_ with dcache_lock held */
-
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
@@ -534,7 +546,7 @@ static void prune_dcache(int count)
 {
 	struct super_block *sb, *p = NULL;
 	int w_count;
-	int unused = dentry_stat.nr_unused;
+	int unused = percpu_counter_sum_positive(&nr_dentry_unused);
 	int prune_ratio;
 	int pruned;
 
@@ -699,20 +711,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			 * otherwise we ascend to the parent and move to the
 			 * next sibling if there is one */
 			if (!parent)
-				goto out;
-
+				return;
 			dentry = parent;
-
 		} while (list_empty(&dentry->d_subdirs));
 
 		dentry = list_entry(dentry->d_subdirs.next,
 				    struct dentry, d_u.d_child);
 	}
-out:
-	/* several dentries were freed, need to correct nr_dentry */
-	spin_lock(&dcache_lock);
-	dentry_stat.nr_dentry -= detached;
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -896,12 +901,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  */
 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
+	int nr_unused;
+
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
 		prune_dcache(nr);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+
+	nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+	return (nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker dcache_shrinker = {
@@ -968,9 +977,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	spin_lock(&dcache_lock);
 	if (parent)
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
 
+	percpu_counter_inc(&nr_dentry);
+
 	return dentry;
 }
 EXPORT_SYMBOL(d_alloc);
@@ -2417,6 +2427,9 @@ static void __init dcache_init(void)
 {
 	int loop;
 
+	percpu_counter_init(&nr_dentry, 0);
+	percpu_counter_init(&nr_dentry_unused, 0);
+
 	/* 
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a573cf13f51..d58059944801 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2490,6 +2490,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
+int proc_nr_dentry(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_inodes(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a510cbfbb3..8b77ff5c502c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1377,7 +1377,7 @@ static struct ctl_table fs_table[] = {
 		.data		= &dentry_stat,
 		.maxlen		= 6*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_nr_dentry,
 	},
 	{
 		.procname	= "overflowuid",
-- 
cgit v1.2.3


From 7ccf19a8042e343f8159f8a5fdd6a9422aa90c78 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Thu, 21 Oct 2010 11:49:30 +1100
Subject: fs: inode split IO and LRU lists

The use of the same inode list structure (inode->i_list) for two
different list constructs with different lifecycles and purposes
makes it impossible to separate the locking of the different
operations. Therefore, to enable the separation of the locking of
the writeback and reclaim lists, split the inode->i_list into two
separate lists dedicated to their specific tracking functions.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     |  2 +-
 fs/fs-writeback.c  | 35 ++++++++++++++++++-----------------
 fs/inode.c         | 53 ++++++++++++++++++++++++++++++++++-------------------
 include/linux/fs.h |  3 ++-
 mm/backing-dev.c   |  6 +++---
 5 files changed, 58 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4e847e53051f..dea3b628a6ce 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -59,7 +59,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
 	spin_lock(&inode_lock);
 	inode->i_data.backing_dev_info = dst;
 	if (inode->i_state & I_DIRTY)
-		list_move(&inode->i_list, &dst->wb.b_dirty);
+		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
 	spin_unlock(&inode_lock);
 }
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e8f65290e836..7a24cc957f05 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 	return sb->s_bdi;
 }
 
+static inline struct inode *wb_inode(struct list_head *head)
+{
+	return list_entry(head, struct inode, i_wb_list);
+}
+
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
-		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+		tail = wb_inode(wb->b_dirty.next);
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &wb->b_dirty);
+	list_move(&inode->i_wb_list, &wb->b_dirty);
 }
 
 /*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
-	list_move(&inode->i_list, &wb->b_more_io);
+	list_move(&inode->i_wb_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 	int do_sb_sort = 0;
 
 	while (!list_empty(delaying_queue)) {
-		inode = list_entry(delaying_queue->prev, struct inode, i_list);
+		inode = wb_inode(delaying_queue->prev);
 		if (older_than_this &&
 		    inode_dirtied_after(inode, *older_than_this))
 			break;
 		if (sb && sb != inode->i_sb)
 			do_sb_sort = 1;
 		sb = inode->i_sb;
-		list_move(&inode->i_list, &tmp);
+		list_move(&inode->i_wb_list, &tmp);
 	}
 
 	/* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 
 	/* Move inodes from one superblock together */
 	while (!list_empty(&tmp)) {
-		inode = list_entry(tmp.prev, struct inode, i_list);
-		sb = inode->i_sb;
+		sb = wb_inode(tmp.prev)->i_sb;
 		list_for_each_prev_safe(pos, node, &tmp) {
-			inode = list_entry(pos, struct inode, i_list);
+			inode = wb_inode(pos);
 			if (inode->i_sb == sb)
-				list_move(&inode->i_list, dispatch_queue);
+				list_move(&inode->i_wb_list, dispatch_queue);
 		}
 	}
 }
@@ -414,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			 * a reference to the inode or it's on it's way out.
 			 * No need to add it back to the LRU.
 			 */
-			list_del_init(&inode->i_list);
+			list_del_init(&inode->i_wb_list);
 		}
 	}
 	inode_sync_complete(inode);
@@ -462,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 {
 	while (!list_empty(&wb->b_io)) {
 		long pages_skipped;
-		struct inode *inode = list_entry(wb->b_io.prev,
-						 struct inode, i_list);
+		struct inode *inode = wb_inode(wb->b_io.prev);
 
 		if (inode->i_sb != sb) {
 			if (only_this_sb) {
@@ -533,8 +536,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 		queue_io(wb, wbc->older_than_this);
 
 	while (!list_empty(&wb->b_io)) {
-		struct inode *inode = list_entry(wb->b_io.prev,
-						 struct inode, i_list);
+		struct inode *inode = wb_inode(wb->b_io.prev);
 		struct super_block *sb = inode->i_sb;
 
 		if (!pin_sb_for_writeback(sb)) {
@@ -672,8 +674,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 */
 		spin_lock(&inode_lock);
 		if (!list_empty(&wb->b_more_io))  {
-			inode = list_entry(wb->b_more_io.prev,
-						struct inode, i_list);
+			inode = wb_inode(wb->b_more_io.prev);
 			trace_wbc_writeback_wait(&wbc, wb->bdi);
 			inode_wait_for_writeback(inode);
 		}
@@ -987,7 +988,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &bdi->wb.b_dirty);
+			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
 		}
 	}
 out:
diff --git a/fs/inode.c b/fs/inode.c
index 4bedac32154f..09e2d7a5f1d2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -71,7 +71,7 @@ static unsigned int i_hash_shift __read_mostly;
  * allowing for low-overhead inode sync() operations.
  */
 
-static LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
 static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
@@ -271,6 +271,7 @@ EXPORT_SYMBOL(__destroy_inode);
 
 static void destroy_inode(struct inode *inode)
 {
+	BUG_ON(!list_empty(&inode->i_lru));
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
@@ -289,7 +290,8 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
-	INIT_LIST_HEAD(&inode->i_list);
+	INIT_LIST_HEAD(&inode->i_wb_list);
+	INIT_LIST_HEAD(&inode->i_lru);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -330,16 +332,16 @@ EXPORT_SYMBOL(ihold);
 
 static void inode_lru_list_add(struct inode *inode)
 {
-	if (list_empty(&inode->i_list)) {
-		list_add(&inode->i_list, &inode_unused);
+	if (list_empty(&inode->i_lru)) {
+		list_add(&inode->i_lru, &inode_lru);
 		percpu_counter_inc(&nr_inodes_unused);
 	}
 }
 
 static void inode_lru_list_del(struct inode *inode)
 {
-	if (!list_empty(&inode->i_list)) {
-		list_del_init(&inode->i_list);
+	if (!list_empty(&inode->i_lru)) {
+		list_del_init(&inode->i_lru);
 		percpu_counter_dec(&nr_inodes_unused);
 	}
 }
@@ -460,8 +462,8 @@ static void dispose_list(struct list_head *head)
 	while (!list_empty(head)) {
 		struct inode *inode;
 
-		inode = list_first_entry(head, struct inode, i_list);
-		list_del_init(&inode->i_list);
+		inode = list_first_entry(head, struct inode, i_lru);
+		list_del_init(&inode->i_lru);
 
 		evict(inode);
 
@@ -507,8 +509,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 			continue;
 		}
 
-		list_move(&inode->i_list, dispose);
 		inode->i_state |= I_FREEING;
+
+		/*
+		 * Move the inode off the IO lists and LRU once I_FREEING is
+		 * set so that it won't get moved back on there if it is dirty.
+		 */
+		list_move(&inode->i_lru, dispose);
+		list_del_init(&inode->i_wb_list);
 		if (!(inode->i_state & (I_DIRTY | I_SYNC)))
 			percpu_counter_dec(&nr_inodes_unused);
 	}
@@ -580,10 +588,10 @@ static void prune_icache(int nr_to_scan)
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
 
-		if (list_empty(&inode_unused))
+		if (list_empty(&inode_lru))
 			break;
 
-		inode = list_entry(inode_unused.prev, struct inode, i_list);
+		inode = list_entry(inode_lru.prev, struct inode, i_lru);
 
 		/*
 		 * Referenced or dirty inodes are still in use. Give them
@@ -591,14 +599,14 @@ static void prune_icache(int nr_to_scan)
 		 */
 		if (atomic_read(&inode->i_count) ||
 		    (inode->i_state & ~I_REFERENCED)) {
-			list_del_init(&inode->i_list);
+			list_del_init(&inode->i_lru);
 			percpu_counter_dec(&nr_inodes_unused);
 			continue;
 		}
 
 		/* recently referenced inodes get one more pass */
 		if (inode->i_state & I_REFERENCED) {
-			list_move(&inode->i_list, &inode_unused);
+			list_move(&inode->i_lru, &inode_lru);
 			inode->i_state &= ~I_REFERENCED;
 			continue;
 		}
@@ -611,15 +619,21 @@ static void prune_icache(int nr_to_scan)
 			iput(inode);
 			spin_lock(&inode_lock);
 
-			if (inode != list_entry(inode_unused.next,
-						struct inode, i_list))
+			if (inode != list_entry(inode_lru.next,
+						struct inode, i_lru))
 				continue;	/* wrong inode or list_empty */
 			if (!can_unuse(inode))
 				continue;
 		}
-		list_move(&inode->i_list, &freeable);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
+
+		/*
+		 * Move the inode off the IO lists and LRU once I_FREEING is
+		 * set so that it won't get moved back on there if it is dirty.
+		 */
+		list_move(&inode->i_lru, &freeable);
+		list_del_init(&inode->i_wb_list);
 		percpu_counter_dec(&nr_inodes_unused);
 	}
 	if (current_is_kswapd())
@@ -1340,15 +1354,16 @@ static void iput_final(struct inode *inode)
 		inode->i_state &= ~I_WILL_FREE;
 		__remove_inode_hash(inode);
 	}
+
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
 
 	/*
-	 * After we delete the inode from the LRU here, we avoid moving dirty
-	 * inodes back onto the LRU now because I_FREEING is set and hence
-	 * writeback_single_inode() won't move the inode around.
+	 * Move the inode off the IO lists and LRU once I_FREEING is
+	 * set so that it won't get moved back on there if it is dirty.
 	 */
 	inode_lru_list_del(inode);
+	list_del_init(&inode->i_wb_list);
 
 	__inode_sb_list_del(inode);
 	spin_unlock(&inode_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d58059944801..f300a6508818 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -723,7 +723,8 @@ struct posix_acl;
 
 struct inode {
 	struct hlist_node	i_hash;
-	struct list_head	i_list;		/* backing dev IO list */
+	struct list_head	i_wb_list;	/* backing dev IO list */
+	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 65d420499a61..15d5097de821 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
 	spin_lock(&inode_lock);
-	list_for_each_entry(inode, &wb->b_dirty, i_list)
+	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
 		nr_dirty++;
-	list_for_each_entry(inode, &wb->b_io, i_list)
+	list_for_each_entry(inode, &wb->b_io, i_wb_list)
 		nr_io++;
-	list_for_each_entry(inode, &wb->b_more_io, i_list)
+	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
 		nr_more_io++;
 	spin_unlock(&inode_lock);
 
-- 
cgit v1.2.3


From 38ab13441c36c0c470b7e4e3b30ec2fb6beba253 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 Oct 2010 16:05:08 +0900
Subject: sh: Switch dynamic IRQ creation to generic irq allocator.

Now that the genirq code provides an IRQ bitmap of its own and the
necessary API to manipulate it, there's no need to keep our own version
around anymore.

In the process we kill off some unused IRQ reservation code, with future
users now having to tie in to the genirq API as normal.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c      |  6 ----
 drivers/sh/intc/core.c    |  2 +-
 drivers/sh/intc/dynamic.c | 82 +++++------------------------------------------
 include/linux/sh_intc.h   |  3 --
 4 files changed, 9 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 9dc447db8a44..d9f3b6e7c7cb 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -273,12 +273,6 @@ void __init init_IRQ(void)
 {
 	plat_irq_setup();
 
-	/*
-	 * Pin any of the legacy IRQ vectors that haven't already been
-	 * grabbed by the platform
-	 */
-	reserve_irq_legacy();
-
 	/* Perform the machine specific initialisation */
 	if (sh_mv.mv_init_irq)
 		sh_mv.mv_init_irq();
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index 8f3c27e9f9e2..0801089828e7 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -78,7 +78,7 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	 * Register the IRQ position with the global IRQ map, then insert
 	 * it in to the radix tree.
 	 */
-	reserve_irq_vector(irq);
+	irq_reserve_irqs(irq, 1);
 
 	raw_spin_lock_irqsave(&intc_big_lock, flags);
 	radix_tree_insert(&d->tree, enum_id, intc_irq_xlate_get(irq));
diff --git a/drivers/sh/intc/dynamic.c b/drivers/sh/intc/dynamic.c
index e994c7ed916e..4187cce20ffd 100644
--- a/drivers/sh/intc/dynamic.c
+++ b/drivers/sh/intc/dynamic.c
@@ -17,7 +17,7 @@
 #include "internals.h" /* only for activate_irq() damage.. */
 
 /*
- * The intc_irq_map provides a global map of bound IRQ vectors for a
+ * The IRQ bitmap provides a global map of bound IRQ vectors for a
  * given platform. Allocation of IRQs are either static through the CPU
  * vector map, or dynamic in the case of board mux vectors or MSI.
  *
@@ -27,104 +27,38 @@
  * when dynamically creating IRQs, as well as tying in to otherwise
  * unused irq_desc positions in the sparse array.
  */
-static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
-static DEFINE_RAW_SPINLOCK(vector_lock);
 
 /*
  * Dynamic IRQ allocation and deallocation
  */
 unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
-	unsigned int irq = 0, new;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-
-	/*
-	 * First try the wanted IRQ
-	 */
-	if (test_and_set_bit(irq_want, intc_irq_map) == 0) {
-		new = irq_want;
-	} else {
-		/* .. then fall back to scanning. */
-		new = find_first_zero_bit(intc_irq_map, nr_irqs);
-		if (unlikely(new == nr_irqs))
-			goto out_unlock;
-
-		__set_bit(new, intc_irq_map);
-	}
-
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	irq = irq_alloc_desc_at(new, node);
-	if (unlikely(irq != new)) {
-		pr_err("can't get irq_desc for %d\n", new);
+	int irq = irq_alloc_desc_at(irq_want, node);
+	if (irq < 0)
 		return 0;
-	}
 
 	activate_irq(irq);
-	return 0;
-
-out_unlock:
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return 0;
+	return irq;
 }
 
 int create_irq(void)
 {
-	int nid = cpu_to_node(smp_processor_id());
-	int irq;
-
-	irq = create_irq_nr(NR_IRQS_LEGACY, nid);
-	if (irq == 0)
-		irq = -1;
+	int irq = irq_alloc_desc(numa_node_id());
+	if (irq >= 0)
+		activate_irq(irq);
 
 	return irq;
 }
 
 void destroy_irq(unsigned int irq)
 {
-	unsigned long flags;
-
 	irq_free_desc(irq);
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_bit(irq, intc_irq_map);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-int reserve_irq_vector(unsigned int irq)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	if (test_and_set_bit(irq, intc_irq_map))
-		ret = -EBUSY;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	return ret;
 }
 
 void reserve_intc_vectors(struct intc_vect *vectors, unsigned int nr_vecs)
 {
-	unsigned long flags;
 	int i;
 
-	raw_spin_lock_irqsave(&vector_lock, flags);
 	for (i = 0; i < nr_vecs; i++)
-		__set_bit(evt2irq(vectors[i].vect), intc_irq_map);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-}
-
-void reserve_irq_legacy(void)
-{
-	unsigned long flags;
-	int i, j;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	j = find_first_bit(intc_irq_map, nr_irqs);
-	for (i = 0; i < j; i++)
-		__set_bit(i, intc_irq_map);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
+		irq_reserve_irqs(evt2irq(vectors[i].vect), 1);
 }
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index b4f183a31f13..f656d1a43dc0 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -129,7 +129,4 @@ static inline int register_intc_userimask(unsigned long addr)
 }
 #endif
 
-int reserve_irq_vector(unsigned int irq);
-void reserve_irq_legacy(void);
-
 #endif /* __SH_INTC_H */
-- 
cgit v1.2.3


From 639bd12f778d55a2632fde5af7d0719abc1871b9 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 Oct 2010 16:19:13 +0900
Subject: genirq: Add single IRQ reservation helper

For cases that wish to reserve a single IRQ at a given place simply
provide a wrapper in to the ranged reservation routine.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20101026071912.GD4733@linux-sh.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e9639115dff1..abde2527c699 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -412,6 +412,11 @@ static inline void irq_free_desc(unsigned int irq)
 	irq_free_descs(irq, 1);
 }
 
+static inline int irq_reserve_irq(unsigned int irq)
+{
+	return irq_reserve_irqs(irq, 1);
+}
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
-- 
cgit v1.2.3


From 08968041bef437ec363623cd3218c2b083537ada Mon Sep 17 00:00:00 2001
From: Guillaume LECERF <glecerf@gmail.com>
Date: Tue, 26 Oct 2010 10:45:23 +0100
Subject: mtd: cfi_cmdset_0002: make sector erase command variable

Some old SST chips use 0x50 as sector erase command, instead
of 0x30. Make this value variable to handle such chips.

Signed-off-by: Guillaume LECERF <glecerf@gmail.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 10 ++++++----
 drivers/mtd/chips/cfi_probe.c       |  2 ++
 include/linux/mtd/cfi.h             |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index eb8ce0cf2855..288fc2ea8494 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -291,6 +291,8 @@ static void fixup_sst39vf_rev_b(struct mtd_info *mtd, void *param)
 
 	cfi->addr_unlock1 = 0x555;
 	cfi->addr_unlock2 = 0x2AA;
+
+	cfi->sector_erase_cmd = CMD(0x50);
 }
 
 static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd, void *param)
@@ -691,7 +693,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 				 * there was an error (so leave the erase
 				 * routine to recover from it) or we trying to
 				 * use the erase-in-progress sector. */
-				map_write(map, CMD(0x30), chip->in_progress_block_addr);
+				map_write(map, cfi->sector_erase_cmd, chip->in_progress_block_addr);
 				chip->state = FL_ERASING;
 				chip->oldstate = FL_READY;
 				printk(KERN_ERR "MTD %s(): chip not ready after erase suspend\n", __func__);
@@ -744,7 +746,7 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad
 	switch(chip->oldstate) {
 	case FL_ERASING:
 		chip->state = chip->oldstate;
-		map_write(map, CMD(0x30), chip->in_progress_block_addr);
+		map_write(map, cfi->sector_erase_cmd, chip->in_progress_block_addr);
 		chip->oldstate = FL_READY;
 		chip->state = FL_ERASING;
 		break;
@@ -887,7 +889,7 @@ static void __xipram xip_udelay(struct map_info *map, struct flchip *chip,
 			local_irq_disable();
 
 			/* Resume the write or erase operation */
-			map_write(map, CMD(0x30), adr);
+			map_write(map, cfi->sector_erase_cmd, adr);
 			chip->state = oldstate;
 			start = xip_currtime();
 		} else if (usec >= 1000000/HZ) {
@@ -1670,7 +1672,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
-	map_write(map, CMD(0x30), adr);
+	map_write(map, cfi->sector_erase_cmd, adr);
 
 	chip->state = FL_ERASING;
 	chip->erase_suspended = 0;
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index 8f5b96aa87a0..d25535279404 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -177,6 +177,8 @@ static int __xipram cfi_chip_setup(struct map_info *map,
 
 	cfi->cfi_mode = CFI_MODE_CFI;
 
+	cfi->sector_erase_cmd = CMD(0x30);
+
 	/* Read the CFI info structure */
 	xip_disable_qry(base, map, cfi);
 	for (i=0; i<(sizeof(struct cfi_ident) + num_erase_regions * 4); i++)
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d2118b0eac9a..4dd0c2cd7659 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -289,6 +289,7 @@ struct cfi_private {
 				  must be of the same type. */
 	int mfr, id;
 	int numchips;
+	map_word sector_erase_cmd;
 	unsigned long chipshift; /* Because they're of the same type */
 	const char *im_name;	 /* inter_module name for cmdset_setup */
 	struct flchip chips[0];  /* per-chip data structure for each chip */
-- 
cgit v1.2.3


From e96e72c45a1e78e9266dd70113b851395a440ef3 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 19 Oct 2010 14:22:21 +0200
Subject: [SCSI] libosd: Support for scatter gather write/read commands

This patch adds the Scatter-Gather (sg) API to libosd.
Scatter-gather enables a write/read of multiple none-contiguous
areas of an object, in a single call. The extents may overlap
and/or be in any order.

The Scatter-Gather list is sent to the target in what is called
a "cdb continuation segment". This is yet another possible segment
in the osd-out-buffer. It is unlike all other segments in that it
sits before the actual "data" segment (which until now was always
first), and that it is signed by itself and not part of the data
buffer. This is because the cdb-continuation-segment is considered
a spill-over of the CDB data, and is therefor signed under
OSD_SEC_CAPKEY and higher.

TODO: A new osd_finalize_request_ex version should be supplied so
the @caps received on the network also contains a size parameter
and can be spilled over into the "cdb continuation segment".

Thanks to John Chandy <john.chandy@uconn.edu> for the original
code, and investigations. And the implementation of SG support
in the osd-target.

Original-coded-by: John Chandy <john.chandy@uconn.edu>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/osd/osd_initiator.c | 148 +++++++++++++++++++++++++++++++++++++--
 include/scsi/osd_initiator.h     |   9 ++-
 include/scsi/osd_protocol.h      |  42 +++++++++++
 include/scsi/osd_types.h         |   5 ++
 4 files changed, 198 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index acbdcb670ac5..f5b5735f76d5 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -464,6 +464,7 @@ void osd_end_request(struct osd_request *or)
 	_osd_free_seg(or, &or->get_attr);
 	_osd_free_seg(or, &or->enc_get_attr);
 	_osd_free_seg(or, &or->set_attr);
+	_osd_free_seg(or, &or->cdb_cont);
 
 	_osd_request_free(or);
 }
@@ -548,6 +549,12 @@ static int _osd_realloc_seg(struct osd_request *or,
 	return 0;
 }
 
+static int _alloc_cdb_cont(struct osd_request *or, unsigned total_bytes)
+{
+	OSD_DEBUG("total_bytes=%d\n", total_bytes);
+	return _osd_realloc_seg(or, &or->cdb_cont, total_bytes);
+}
+
 static int _alloc_set_attr_list(struct osd_request *or,
 	const struct osd_attr *oa, unsigned nelem, unsigned add_bytes)
 {
@@ -886,6 +893,128 @@ int osd_req_read_kern(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_read_kern);
 
+static int _add_sg_continuation_descriptor(struct osd_request *or,
+	const struct osd_sg_entry *sglist, unsigned numentries, u64 *len)
+{
+	struct osd_sg_continuation_descriptor *oscd;
+	u32 oscd_size;
+	unsigned i;
+	int ret;
+
+	oscd_size = sizeof(*oscd) + numentries * sizeof(oscd->entries[0]);
+
+	if (!or->cdb_cont.total_bytes) {
+		/* First time, jump over the header, we will write to:
+		 *	cdb_cont.buff + cdb_cont.total_bytes
+		 */
+		or->cdb_cont.total_bytes =
+				sizeof(struct osd_continuation_segment_header);
+	}
+
+	ret = _alloc_cdb_cont(or, or->cdb_cont.total_bytes + oscd_size);
+	if (unlikely(ret))
+		return ret;
+
+	oscd = or->cdb_cont.buff + or->cdb_cont.total_bytes;
+	oscd->hdr.type = cpu_to_be16(SCATTER_GATHER_LIST);
+	oscd->hdr.pad_length = 0;
+	oscd->hdr.length = cpu_to_be32(oscd_size - sizeof(*oscd));
+
+	*len = 0;
+	/* copy the sg entries and convert to network byte order */
+	for (i = 0; i < numentries; i++) {
+		oscd->entries[i].offset = cpu_to_be64(sglist[i].offset);
+		oscd->entries[i].len    = cpu_to_be64(sglist[i].len);
+		*len += sglist[i].len;
+	}
+
+	or->cdb_cont.total_bytes += oscd_size;
+	OSD_DEBUG("total_bytes=%d oscd_size=%d numentries=%d\n",
+		  or->cdb_cont.total_bytes, oscd_size, numentries);
+	return 0;
+}
+
+static int _osd_req_finalize_cdb_cont(struct osd_request *or, const u8 *cap_key)
+{
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
+	struct bio *bio;
+	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
+	struct osd_continuation_segment_header *cont_seg_hdr;
+
+	if (!or->cdb_cont.total_bytes)
+		return 0;
+
+	cont_seg_hdr = or->cdb_cont.buff;
+	cont_seg_hdr->format = CDB_CONTINUATION_FORMAT_V2;
+	cont_seg_hdr->service_action = cdbh->varlen_cdb.service_action;
+
+	/* create a bio for continuation segment */
+	bio = bio_map_kern(req_q, or->cdb_cont.buff, or->cdb_cont.total_bytes,
+			   GFP_KERNEL);
+	if (unlikely(!bio))
+		return -ENOMEM;
+
+	bio->bi_rw |= REQ_WRITE;
+
+	/* integrity check the continuation before the bio is linked
+	 * with the other data segments since the continuation
+	 * integrity is separate from the other data segments.
+	 */
+	osd_sec_sign_data(cont_seg_hdr->integrity_check, bio, cap_key);
+
+	cdbh->v2.cdb_continuation_length = cpu_to_be32(or->cdb_cont.total_bytes);
+
+	/* we can't use _req_append_segment, because we need to link in the
+	 * continuation bio to the head of the bio list - the
+	 * continuation segment (if it exists) is always the first segment in
+	 * the out data buffer.
+	 */
+	bio->bi_next = or->out.bio;
+	or->out.bio = bio;
+	or->out.total_bytes += or->cdb_cont.total_bytes;
+
+	return 0;
+}
+
+/* osd_req_write_sg: Takes a @bio that points to the data out buffer and an
+ * @sglist that has the scatter gather entries. Scatter-gather enables a write
+ * of multiple none-contiguous areas of an object, in a single call. The extents
+ * may overlap and/or be in any order. The only constrain is that:
+ *	total_bytes(sglist) >= total_bytes(bio)
+ */
+int osd_req_write_sg(struct osd_request *or,
+	const struct osd_obj_id *obj, struct bio *bio,
+	const struct osd_sg_entry *sglist, unsigned numentries)
+{
+	u64 len;
+	int ret = _add_sg_continuation_descriptor(or, sglist, numentries, &len);
+
+	if (ret)
+		return ret;
+	osd_req_write(or, obj, 0, bio, len);
+
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_write_sg);
+
+/* osd_req_read_sg: Read multiple extents of an object into @bio
+ * See osd_req_write_sg
+ */
+int osd_req_read_sg(struct osd_request *or,
+	const struct osd_obj_id *obj, struct bio *bio,
+	const struct osd_sg_entry *sglist, unsigned numentries)
+{
+	u64 len;
+	int ret = _add_sg_continuation_descriptor(or, sglist, numentries, &len);
+
+	if (ret)
+		return ret;
+	osd_req_read(or, obj, 0, bio, len);
+
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_read_sg);
+
 void osd_req_get_attributes(struct osd_request *or,
 	const struct osd_obj_id *obj)
 {
@@ -1281,7 +1410,8 @@ static inline void osd_sec_parms_set_in_offset(bool is_v1,
 }
 
 static int _osd_req_finalize_data_integrity(struct osd_request *or,
-	bool has_in, bool has_out, u64 out_data_bytes, const u8 *cap_key)
+	bool has_in, bool has_out, struct bio *out_data_bio, u64 out_data_bytes,
+	const u8 *cap_key)
 {
 	struct osd_security_parameters *sec_parms = _osd_req_sec_params(or);
 	int ret;
@@ -1312,7 +1442,7 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or,
 		or->out.last_seg = NULL;
 
 		/* they are now all chained to request sign them all together */
-		osd_sec_sign_data(&or->out_data_integ, or->out.req->bio,
+		osd_sec_sign_data(&or->out_data_integ, out_data_bio,
 				  cap_key);
 	}
 
@@ -1408,6 +1538,8 @@ int osd_finalize_request(struct osd_request *or,
 {
 	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
 	bool has_in, has_out;
+	 /* Save for data_integrity without the cdb_continuation */
+	struct bio *out_data_bio = or->out.bio;
 	u64 out_data_bytes = or->out.total_bytes;
 	int ret;
 
@@ -1423,9 +1555,14 @@ int osd_finalize_request(struct osd_request *or,
 	osd_set_caps(&or->cdb, cap);
 
 	has_in = or->in.bio || or->get_attr.total_bytes;
-	has_out = or->out.bio || or->set_attr.total_bytes ||
-		or->enc_get_attr.total_bytes;
+	has_out = or->out.bio || or->cdb_cont.total_bytes ||
+		or->set_attr.total_bytes || or->enc_get_attr.total_bytes;
 
+	ret = _osd_req_finalize_cdb_cont(or, cap_key);
+	if (ret) {
+		OSD_DEBUG("_osd_req_finalize_cdb_cont failed\n");
+		return ret;
+	}
 	ret = _init_blk_request(or, has_in, has_out);
 	if (ret) {
 		OSD_DEBUG("_init_blk_request failed\n");
@@ -1463,7 +1600,8 @@ int osd_finalize_request(struct osd_request *or,
 	}
 
 	ret = _osd_req_finalize_data_integrity(or, has_in, has_out,
-					       out_data_bytes, cap_key);
+					       out_data_bio, out_data_bytes,
+					       cap_key);
 	if (ret)
 		return ret;
 
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a8f370126632..169a5dcda096 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -137,7 +137,7 @@ struct osd_request {
 		void *buff;
 		unsigned alloc_size; /* 0 here means: don't call kfree */
 		unsigned total_bytes;
-	} set_attr, enc_get_attr, get_attr;
+	} cdb_cont, set_attr, enc_get_attr, get_attr;
 
 	struct _osd_io_info {
 		struct bio *bio;
@@ -448,6 +448,13 @@ void osd_req_read(struct osd_request *or,
 int osd_req_read_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
+/* Scatter/Gather write/read commands */
+int osd_req_write_sg(struct osd_request *or,
+	const struct osd_obj_id *obj, struct bio *bio,
+	const struct osd_sg_entry *sglist, unsigned numentries);
+int osd_req_read_sg(struct osd_request *or,
+	const struct osd_obj_id *obj, struct bio *bio,
+	const struct osd_sg_entry *sglist, unsigned numentries);
 /*
  * Root/Partition/Collection/Object Attributes commands
  */
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 685661283540..a6026da25f3e 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -631,4 +631,46 @@ static inline void osd_sec_set_caps(struct osd_capability_head *cap,
 	put_unaligned_le16(bit_mask, &cap->permissions_bit_mask);
 }
 
+/* osd2r05a sec 5.3: CDB continuation segment formats */
+enum osd_continuation_segment_format {
+	CDB_CONTINUATION_FORMAT_V2 = 0x01,
+};
+
+struct osd_continuation_segment_header {
+	u8	format;
+	u8	reserved1;
+	__be16	service_action;
+	__be32	reserved2;
+	u8	integrity_check[OSDv2_CRYPTO_KEYID_SIZE];
+} __packed;
+
+/* osd2r05a sec 5.4.1: CDB continuation descriptors */
+enum osd_continuation_descriptor_type {
+	NO_MORE_DESCRIPTORS = 0x0000,
+	SCATTER_GATHER_LIST = 0x0001,
+	QUERY_LIST = 0x0002,
+	USER_OBJECT = 0x0003,
+	COPY_USER_OBJECT_SOURCE = 0x0101,
+	EXTENSION_CAPABILITIES = 0xFFEE
+};
+
+struct osd_continuation_descriptor_header {
+	__be16	type;
+	u8	reserved;
+	u8	pad_length;
+	__be32	length;
+} __packed;
+
+
+/* osd2r05a sec 5.4.2: Scatter/gather list */
+struct osd_sg_list_entry {
+	__be64 offset;
+	__be64 len;
+};
+
+struct osd_sg_continuation_descriptor {
+	struct osd_continuation_descriptor_header hdr;
+	struct osd_sg_list_entry entries[];
+};
+
 #endif /* ndef __OSD_PROTOCOL_H__ */
diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h
index 3f5e88cc75c0..bd0be7ed4bcf 100644
--- a/include/scsi/osd_types.h
+++ b/include/scsi/osd_types.h
@@ -37,4 +37,9 @@ struct osd_attr {
 	void *val_ptr;		/* in network order */
 };
 
+struct osd_sg_entry {
+	u64 offset;
+	u64 len;
+};
+
 #endif /* ndef __OSD_TYPES_H__ */
-- 
cgit v1.2.3


From 6dd1d8a7953cdc203c6eb694ce8eafe2dcd3e9da Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 19 Oct 2010 16:13:50 +0200
Subject: [SCSI] libosd: write/read_sg_kern API

This is a trivial addition to the SG API that can receive kernel
pointers. It is only used by the out-of-tree test module. So
it's immediate need is questionable. For maintenance ease it might
just get in, as it's very small.

John.
do you need this in the Kernel, or is it only for osd_ktest.ko?

Signed-off-by: John A. Chandy <john.chandy@uconn.edu>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/osd/osd_initiator.c | 71 ++++++++++++++++++++++++++++++++++++++++
 include/scsi/osd_initiator.h     |  7 ++++
 2 files changed, 78 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index f5b5735f76d5..0433ea6f27c9 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1015,6 +1015,77 @@ int osd_req_read_sg(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_read_sg);
 
+/* SG-list write/read Kern API
+ *
+ * osd_req_{write,read}_sg_kern takes an array of @buff pointers and an array
+ * of sg_entries. @numentries indicates how many pointers and sg_entries there
+ * are.  By requiring an array of buff pointers. This allows a caller to do a
+ * single write/read and scatter into multiple buffers.
+ * NOTE: Each buffer + len should not cross a page boundary.
+ */
+static struct bio *_create_sg_bios(struct osd_request *or,
+	void **buff, const struct osd_sg_entry *sglist, unsigned numentries)
+{
+	struct request_queue *q = osd_request_queue(or->osd_dev);
+	struct bio *bio;
+	unsigned i;
+
+	bio = bio_kmalloc(GFP_KERNEL, numentries);
+	if (unlikely(!bio)) {
+		OSD_DEBUG("Faild to allocate BIO size=%u\n", numentries);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < numentries; i++) {
+		unsigned offset = offset_in_page(buff[i]);
+		struct page *page = virt_to_page(buff[i]);
+		unsigned len = sglist[i].len;
+		unsigned added_len;
+
+		BUG_ON(offset + len > PAGE_SIZE);
+		added_len = bio_add_pc_page(q, bio, page, len, offset);
+		if (unlikely(len != added_len)) {
+			OSD_DEBUG("bio_add_pc_page len(%d) != added_len(%d)\n",
+				  len, added_len);
+			bio_put(bio);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	return bio;
+}
+
+int osd_req_write_sg_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, void **buff,
+	const struct osd_sg_entry *sglist, unsigned numentries)
+{
+	struct bio *bio = _create_sg_bios(or, buff, sglist, numentries);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	bio->bi_rw |= REQ_WRITE;
+	osd_req_write_sg(or, obj, bio, sglist, numentries);
+
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_write_sg_kern);
+
+int osd_req_read_sg_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, void **buff,
+	const struct osd_sg_entry *sglist, unsigned numentries)
+{
+	struct bio *bio = _create_sg_bios(or, buff, sglist, numentries);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_read_sg(or, obj, bio, sglist, numentries);
+
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_read_sg_kern);
+
+
+
 void osd_req_get_attributes(struct osd_request *or,
 	const struct osd_obj_id *obj)
 {
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 169a5dcda096..53a9e886612b 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -455,6 +455,13 @@ int osd_req_write_sg(struct osd_request *or,
 int osd_req_read_sg(struct osd_request *or,
 	const struct osd_obj_id *obj, struct bio *bio,
 	const struct osd_sg_entry *sglist, unsigned numentries);
+int osd_req_write_sg_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, void **buff,
+	const struct osd_sg_entry *sglist, unsigned numentries);
+int osd_req_read_sg_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, void **buff,
+	const struct osd_sg_entry *sglist, unsigned numentries);
+
 /*
  * Root/Partition/Collection/Object Attributes commands
  */
-- 
cgit v1.2.3


From a178d2027d3198b0a04517d764326ab71cd73da2 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 25 Oct 2010 14:41:59 -0400
Subject: IMA: move read counter into struct inode

IMA currently allocated an inode integrity structure for every inode in
core.  This stucture is about 120 bytes long.  Most files however
(especially on a system which doesn't make use of IMA) will never need
any of this space.  The problem is that if IMA is enabled we need to
know information about the number of readers and the number of writers
for every inode on the box.  At the moment we collect that information
in the per inode iint structure and waste the rest of the space.  This
patch moves those counters into the struct inode so we can eventually
stop allocating an IMA integrity structure except when absolutely
needed.

This patch does the minimum needed to move the location of the data.
Further cleanups, especially the location of counter updates, may still
be possible.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c                        |  1 +
 include/linux/fs.h                |  4 ++++
 security/integrity/ima/ima.h      |  3 +--
 security/integrity/ima/ima_api.c  |  2 +-
 security/integrity/ima/ima_iint.c | 11 +++++------
 security/integrity/ima/ima_main.c | 35 ++++++++++-------------------------
 6 files changed, 22 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 86464332e590..56d909d69bc8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include <linux/ima.h>
 
 /*
  * This is needed for the following functions:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069bd80b7..01e3a0047fed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -776,6 +776,10 @@ struct inode {
 
 	unsigned int		i_flags;
 
+#ifdef CONFIG_IMA
+	/* protected by i_lock */
+	unsigned int		i_readcount; /* struct files open RO */
+#endif
 	atomic_t		i_writecount;
 #ifdef CONFIG_SECURITY
 	void			*i_security;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index b546b90f5fa8..27849e1656dc 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -70,6 +70,7 @@ int ima_init(void);
 void ima_cleanup(void);
 int ima_fs_init(void);
 void ima_fs_cleanup(void);
+int ima_inode_alloc(struct inode *inode);
 int ima_add_template_entry(struct ima_template_entry *entry, int violation,
 			   const char *op, struct inode *inode);
 int ima_calc_hash(struct file *file, char *digest);
@@ -106,8 +107,6 @@ struct ima_iint_cache {
 	unsigned char flags;
 	u8 digest[IMA_DIGEST_SIZE];
 	struct mutex mutex;	/* protects: version, flags, digest */
-	/* protected by inode->i_lock */
-	unsigned int readcount;	/* measured files readcount */
 	struct kref refcount;	/* ima_iint_cache reference count */
 };
 
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 52015d098fdf..d3963de6003d 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -116,7 +116,7 @@ int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode,
 {
 	int must_measure;
 
-	if (iint->flags & IMA_MEASURED)
+	if (iint && iint->flags & IMA_MEASURED)
 		return 1;
 
 	must_measure = ima_match_policy(inode, function, mask);
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index e68891f8d55a..0936a7197e47 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -124,11 +124,6 @@ void iint_free(struct kref *kref)
 						   refcount);
 	iint->version = 0;
 	iint->flags = 0UL;
-	if (iint->readcount != 0) {
-		printk(KERN_INFO "%s: readcount: %u\n", __func__,
-		       iint->readcount);
-		iint->readcount = 0;
-	}
 	kref_init(&iint->refcount);
 	kmem_cache_free(iint_cache, iint);
 }
@@ -143,6 +138,11 @@ void ima_inode_free(struct inode *inode)
 {
 	struct ima_iint_cache *iint;
 
+	if (inode->i_readcount)
+		printk(KERN_INFO "%s: readcount: %u\n", __func__, inode->i_readcount);
+
+	inode->i_readcount = 0;
+
 	spin_lock(&ima_iint_lock);
 	iint = __ima_iint_find(inode);
 	if (iint)
@@ -160,7 +160,6 @@ static void init_once(void *foo)
 	iint->version = 0;
 	iint->flags = 0UL;
 	mutex_init(&iint->mutex);
-	iint->readcount = 0;
 	kref_init(&iint->refcount);
 }
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 24660bf3f82a..2a77b14fee27 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -85,17 +85,6 @@ out:
 	return found;
 }
 
-/*
- * Update the counts given an fmode_t
- */
-static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
-{
-	assert_spin_locked(&iint->inode->i_lock);
-
-	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
-		iint->readcount++;
-}
-
 /*
  * ima_counts_get - increment file counts
  *
@@ -112,27 +101,23 @@ void ima_counts_get(struct file *file)
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	fmode_t mode = file->f_mode;
-	struct ima_iint_cache *iint;
 	int rc;
 	bool send_tomtou = false, send_writers = false;
 
-	if (!iint_initialized || !S_ISREG(inode->i_mode))
+	if (!S_ISREG(inode->i_mode))
 		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
+
 	spin_lock(&inode->i_lock);
 
 	if (!ima_initialized)
 		goto out;
 
-	rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK);
+	rc = ima_must_measure(NULL, inode, MAY_READ, FILE_CHECK);
 	if (rc < 0)
 		goto out;
 
 	if (mode & FMODE_WRITE) {
-		if (iint->readcount)
+		if (inode->i_readcount)
 			send_tomtou = true;
 		goto out;
 	}
@@ -140,10 +125,10 @@ void ima_counts_get(struct file *file)
 	if (atomic_read(&inode->i_writecount) > 0)
 		send_writers = true;
 out:
-	ima_inc_counts(iint, file->f_mode);
+	/* remember the vfs deals with i_writecount */
+	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		inode->i_readcount++;
 	spin_unlock(&inode->i_lock);
-	mutex_unlock(&iint->mutex);
-	kref_put(&iint->refcount, iint_free);
 
 	if (send_tomtou)
 		ima_add_violation(inode, dentry->d_name.name, "invalid_pcr",
@@ -166,9 +151,9 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 	assert_spin_locked(&inode->i_lock);
 
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
-		if (unlikely(iint->readcount == 0))
+		if (unlikely(inode->i_readcount == 0))
 			dump = true;
-		iint->readcount--;
+		inode->i_readcount--;
 	}
 	if (mode & FMODE_WRITE) {
 		if (atomic_read(&inode->i_writecount) <= 0)
@@ -180,7 +165,7 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 
 	if (dump && !ima_limit_imbalance(file)) {
 		printk(KERN_INFO "%s: open/free imbalance (r:%u)\n",
-		       __func__, iint->readcount);
+		       __func__, inode->i_readcount);
 		dump_stack();
 	}
 }
-- 
cgit v1.2.3


From 196f518128d2ee6e0028b50e6fec0313640db142 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 25 Oct 2010 14:42:19 -0400
Subject: IMA: explicit IMA i_flag to remove global lock on inode_delete

Currently for every removed inode IMA must take a global lock and search
the IMA rbtree looking for an associated integrity structure.  Instead
we explicitly mark an inode when we add an integrity structure so we
only have to take the global lock and do the removal if it exists.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h                |  2 ++
 security/integrity/ima/ima_iint.c | 16 +++++++++++-----
 security/integrity/ima/ima_main.c |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01e3a0047fed..bb77843de9d6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -235,6 +235,7 @@ struct inodes_stat_t {
 #define S_NOCMTIME	128	/* Do not update file c/mtime */
 #define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */
 #define S_PRIVATE	512	/* Inode is fs-internal */
+#define S_IMA		1024	/* Inode has an associated IMA struct */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -269,6 +270,7 @@ struct inodes_stat_t {
 #define IS_NOCMTIME(inode)	((inode)->i_flags & S_NOCMTIME)
 #define IS_SWAPFILE(inode)	((inode)->i_flags & S_SWAPFILE)
 #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
+#define IS_IMA(inode)		((inode)->i_flags & S_IMA)
 
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index 969a1c1cb333..c442e47b6785 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -59,6 +59,9 @@ struct ima_iint_cache *ima_iint_find(struct inode *inode)
 {
 	struct ima_iint_cache *iint;
 
+	if (!IS_IMA(inode))
+		return NULL;
+
 	spin_lock(&ima_iint_lock);
 	iint = __ima_iint_find(inode);
 	spin_unlock(&ima_iint_lock);
@@ -91,6 +94,7 @@ int ima_inode_alloc(struct inode *inode)
 	new_iint->inode = inode;
 	new_node = &new_iint->rb_node;
 
+	mutex_lock(&inode->i_mutex); /* i_flags */
 	spin_lock(&ima_iint_lock);
 
 	p = &ima_iint_tree.rb_node;
@@ -107,14 +111,17 @@ int ima_inode_alloc(struct inode *inode)
 			goto out_err;
 	}
 
+	inode->i_flags |= S_IMA;
 	rb_link_node(new_node, parent, p);
 	rb_insert_color(new_node, &ima_iint_tree);
 
 	spin_unlock(&ima_iint_lock);
+	mutex_unlock(&inode->i_mutex); /* i_flags */
 
 	return 0;
 out_err:
 	spin_unlock(&ima_iint_lock);
+	mutex_unlock(&inode->i_mutex); /* i_flags */
 	iint_free(new_iint);
 
 	return rc;
@@ -135,15 +142,14 @@ void ima_inode_free(struct inode *inode)
 
 	inode->i_readcount = 0;
 
+	if (!IS_IMA(inode))
+		return;
+
 	spin_lock(&ima_iint_lock);
 	iint = __ima_iint_find(inode);
-	if (iint)
-		rb_erase(&iint->rb_node, &ima_iint_tree);
+	rb_erase(&iint->rb_node, &ima_iint_tree);
 	spin_unlock(&ima_iint_lock);
 
-	if (!iint)
-		return;
-
 	iint_free(iint);
 }
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 1dccafef7494..60dd61527b1e 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -211,6 +211,7 @@ void ima_file_free(struct file *file)
 
 	if (!iint_initialized || !S_ISREG(inode->i_mode))
 		return;
+
 	iint = ima_iint_find(inode);
 
 	if (iint)
-- 
cgit v1.2.3


From e7f8567db9a7f6b3151b0b275e245c1cef0d9c70 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 26 Oct 2010 15:41:33 -0600
Subject: resources: support allocating space within a region from the top down

Allocate space from the top of a region first, then work downward,
if an architecture desires this.

When we allocate space from a resource, we look for gaps between children
of the resource.  Previously, we always looked at gaps from the bottom up.
For example, given this:

    [mem 0xbff00000-0xf7ffffff] PCI Bus 0000:00
      [mem 0xbff00000-0xbfffffff] gap -- available
      [mem 0xc0000000-0xdfffffff] PCI Bus 0000:02
      [mem 0xe0000000-0xf7ffffff] gap -- available

we attempted to allocate from the [mem 0xbff00000-0xbfffffff] gap first,
then the [mem 0xe0000000-0xf7ffffff] gap.

With this patch an architecture can choose to allocate from the top gap
[mem 0xe0000000-0xf7ffffff] first.

We can't do this across the board because iomem_resource.end is initialized
to 0xffffffff_ffffffff on 64-bit architectures, and most machines can't
address the entire 64-bit physical address space.  Therefore, we only
allocate top-down if the arch requests it by clearing
"resource_alloc_from_bottom".

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt |  5 ++
 include/linux/ioport.h              |  1 +
 kernel/resource.c                   | 98 +++++++++++++++++++++++++++++++++++--
 3 files changed, 100 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8dd7248508a9..fe50cbd315b0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2156,6 +2156,11 @@ and is between 256 and 4096 characters. It is defined in the file
 	reset_devices	[KNL] Force drivers to reset the underlying device
 			during initialization.
 
+	resource_alloc_from_bottom
+			Allocate new resources from the beginning of available
+			space, not the end.  If you need to use this, please
+			report a bug.
+
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index b22790268b64..d377ea815d45 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,6 +112,7 @@ struct resource_list {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
+extern int resource_alloc_from_bottom;
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
diff --git a/kernel/resource.c b/kernel/resource.c
index e15b922d4ba4..716b6804077e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,6 +40,23 @@ EXPORT_SYMBOL(iomem_resource);
 
 static DEFINE_RWLOCK(resource_lock);
 
+/*
+ * By default, we allocate free space bottom-up.  The architecture can request
+ * top-down by clearing this flag.  The user can override the architecture's
+ * choice with the "resource_alloc_from_bottom" kernel boot option, but that
+ * should only be a debugging tool.
+ */
+int resource_alloc_from_bottom = 1;
+
+static __init int setup_alloc_from_bottom(char *s)
+{
+	printk(KERN_INFO
+	       "resource: allocating from bottom-up; please report a bug\n");
+	resource_alloc_from_bottom = 1;
+	return 0;
+}
+early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
+
 static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct resource *p = v;
@@ -379,8 +396,75 @@ static bool resource_contains(struct resource *res1, struct resource *res2)
 	return res1->start <= res2->start && res1->end >= res2->end;
 }
 
+/*
+ * Find the resource before "child" in the sibling list of "root" children.
+ */
+static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
+{
+	struct resource *this;
+
+	for (this = root->child; this; this = this->sibling)
+		if (this->sibling == child)
+			return this;
+
+	return NULL;
+}
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ * This version allocates from the end of the root resource first.
+ */
+static int find_resource_from_top(struct resource *root, struct resource *new,
+				  resource_size_t size, resource_size_t min,
+				  resource_size_t max, resource_size_t align,
+				  resource_size_t (*alignf)(void *,
+						   const struct resource *,
+						   resource_size_t,
+						   resource_size_t),
+				  void *alignf_data)
+{
+	struct resource *this;
+	struct resource tmp, avail, alloc;
+
+	tmp.start = root->end;
+	tmp.end = root->end;
+
+	this = find_sibling_prev(root, NULL);
+	for (;;) {
+		if (this) {
+			if (this->end < root->end)
+				tmp.start = this->end + 1;
+		} else
+			tmp.start = root->start;
+
+		resource_clip(&tmp, min, max);
+
+		/* Check for overflow after ALIGN() */
+		avail = *new;
+		avail.start = ALIGN(tmp.start, align);
+		avail.end = tmp.end;
+		if (avail.start >= tmp.start) {
+			alloc.start = alignf(alignf_data, &avail, size, align);
+			alloc.end = alloc.start + size - 1;
+			if (resource_contains(&avail, &alloc)) {
+				new->start = alloc.start;
+				new->end = alloc.end;
+				return 0;
+			}
+		}
+
+		if (!this || this->start == root->start)
+			break;
+
+		tmp.end = this->start - 1;
+		this = find_sibling_prev(root, this);
+	}
+	return -EBUSY;
+}
+
 /*
  * Find empty slot in the resource tree given range and alignment.
+ * This version allocates from the beginning of the root resource first.
  */
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
@@ -396,14 +480,15 @@ static int find_resource(struct resource *root, struct resource *new,
 
 	tmp.start = root->start;
 	/*
-	 * Skip past an allocated resource that starts at 0, since the assignment
-	 * of this->start - 1 to tmp->end below would cause an underflow.
+	 * Skip past an allocated resource that starts at 0, since the
+	 * assignment of this->start - 1 to tmp->end below would cause an
+	 * underflow.
 	 */
 	if (this && this->start == 0) {
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
-	for(;;) {
+	for (;;) {
 		if (this)
 			tmp.end = this->start - 1;
 		else
@@ -424,8 +509,10 @@ static int find_resource(struct resource *root, struct resource *new,
 				return 0;
 			}
 		}
+
 		if (!this)
 			break;
+
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
@@ -458,7 +545,10 @@ int allocate_resource(struct resource *root, struct resource *new,
 		alignf = simple_align_resource;
 
 	write_lock(&resource_lock);
-	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+	if (resource_alloc_from_bottom)
+		err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+	else
+		err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
 	if (err >= 0 && __request_resource(root, new))
 		err = -EBUSY;
 	write_unlock(&resource_lock);
-- 
cgit v1.2.3


From a75d377686037982cbec320bb770b19fe7be6a5d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:21:10 -0700
Subject: types.h: move misplaced comment

This comment landed in the wrong place.

Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Miller <davem@davemloft.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/types.h b/include/linux/types.h
index 357dbc19606f..c2a9eb44f2fa 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -121,15 +121,7 @@ typedef		__u64		u_int64_t;
 typedef		__s64		int64_t;
 #endif
 
-/*
- * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
- * common 32/64-bit compat problems.
- * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
- * architectures) and to 8-byte boundaries on 64-bit architetures.  The new
- * aligned_64 type enforces 8-byte alignment so that structs containing
- * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
- * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
- */
+/* this is a special 64bit data type that is 8-byte aligned */
 #define aligned_u64 __u64 __attribute__((aligned(8)))
 #define aligned_be64 __be64 __attribute__((aligned(8)))
 #define aligned_le64 __le64 __attribute__((aligned(8)))
@@ -186,7 +178,15 @@ typedef __u64 __bitwise __be64;
 typedef __u16 __bitwise __sum16;
 typedef __u32 __bitwise __wsum;
 
-/* this is a special 64bit data type that is 8-byte aligned */
+/*
+ * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
+ * common 32/64-bit compat problems.
+ * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
+ * architectures) and to 8-byte boundaries on 64-bit architetures.  The new
+ * aligned_64 type enforces 8-byte alignment so that structs containing
+ * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
+ * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
+ */
 #define __aligned_u64 __u64 __attribute__((aligned(8)))
 #define __aligned_be64 __be64 __attribute__((aligned(8)))
 #define __aligned_le64 __le64 __attribute__((aligned(8)))
-- 
cgit v1.2.3


From 52c5171214ff3327961d0ce0db7e8d2ce55004fd Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:21:19 -0700
Subject: kfifo: disable __kfifo_must_check_helper()

This helper is wrong: it coerces signed values into unsigned ones, so code
such as

	if (kfifo_alloc(...) < 0) {
		error
	}

will fail to detect the error.

So let's disable __kfifo_must_check_helper() for 2.6.36.

Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 62dbee554f60..c238ad2f82ea 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -171,11 +171,8 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void);
 	}
 
 
-static inline unsigned int __must_check
-__kfifo_must_check_helper(unsigned int val)
-{
-	return val;
-}
+/* __kfifo_must_check_helper() is temporarily disabled because it was faulty */
+#define __kfifo_must_check_helper(x) (x)
 
 /**
  * kfifo_initialized - Check if the fifo is initialized
-- 
cgit v1.2.3


From 3d5992d2ac7dc09aed8ab537cba074589f0f0a52 Mon Sep 17 00:00:00 2001
From: Ying Han <yinghan@google.com>
Date: Tue, 26 Oct 2010 14:21:23 -0700
Subject: oom: add per-mm oom disable count

It's pointless to kill a task if another thread sharing its mm cannot be
killed to allow future memory freeing.  A subsequent patch will prevent
kills in such cases, but first it's necessary to have a way to flag a task
that shares memory with an OOM_DISABLE task that doesn't incur an
additional tasklist scan, which would make select_bad_process() an O(n^2)
function.

This patch adds an atomic counter to struct mm_struct that follows how
many threads attached to it have an oom_score_adj of OOM_SCORE_ADJ_MIN.
They cannot be killed by the kernel, so their memory cannot be freed in
oom conditions.

This only requires task_lock() on the task that we're operating on, it
does not require mm->mmap_sem since task_lock() pins the mm and the
operation is atomic.

[rientjes@google.com: changelog and sys_unshare() code]
[rientjes@google.com: protect oom_disable_count with task_lock in fork]
[rientjes@google.com: use old_mm for oom_disable_count in exec]
Signed-off-by: Ying Han <yinghan@google.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  5 +++++
 fs/proc/base.c           | 30 ++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  2 ++
 kernel/exit.c            |  3 +++
 kernel/fork.c            | 15 ++++++++++++++-
 5 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 6d2b6f936858..3aa75b8888a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -759,6 +760,10 @@ static int exec_mmap(struct mm_struct *mm)
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
+	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+		atomic_dec(&old_mm->oom_disable_count);
+		atomic_inc(&tsk->mm->oom_disable_count);
+	}
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
 	if (old_mm) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc5d5f51f3fe..6e50c8e65513 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1047,6 +1047,21 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 		return -EACCES;
 	}
 
+	task_lock(task);
+	if (!task->mm) {
+		task_unlock(task);
+		unlock_task_sighand(task, &flags);
+		put_task_struct(task);
+		return -EINVAL;
+	}
+
+	if (oom_adjust != task->signal->oom_adj) {
+		if (oom_adjust == OOM_DISABLE)
+			atomic_inc(&task->mm->oom_disable_count);
+		if (task->signal->oom_adj == OOM_DISABLE)
+			atomic_dec(&task->mm->oom_disable_count);
+	}
+
 	/*
 	 * Warn that /proc/pid/oom_adj is deprecated, see
 	 * Documentation/feature-removal-schedule.txt.
@@ -1065,6 +1080,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	else
 		task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
 								-OOM_DISABLE;
+	task_unlock(task);
 	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
 
@@ -1133,6 +1149,19 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 		return -EACCES;
 	}
 
+	task_lock(task);
+	if (!task->mm) {
+		task_unlock(task);
+		unlock_task_sighand(task, &flags);
+		put_task_struct(task);
+		return -EINVAL;
+	}
+	if (oom_score_adj != task->signal->oom_score_adj) {
+		if (oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&task->mm->oom_disable_count);
+		if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&task->mm->oom_disable_count);
+	}
 	task->signal->oom_score_adj = oom_score_adj;
 	/*
 	 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
@@ -1143,6 +1172,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 	else
 		task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
 							OOM_SCORE_ADJ_MAX;
+	task_unlock(task);
 	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
 	return count;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cb57d657ce4d..bb7288a782fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -310,6 +310,8 @@ struct mm_struct {
 #ifdef CONFIG_MMU_NOTIFIER
 	struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
+	/* How many tasks sharing this mm are OOM_DISABLE */
+	atomic_t oom_disable_count;
 };
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/kernel/exit.c b/kernel/exit.c
index e2bdf37f9fde..894179a32ec1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -50,6 +50,7 @@
 #include <linux/perf_event.h>
 #include <trace/events/sched.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/oom.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -687,6 +688,8 @@ static void exit_mm(struct task_struct * tsk)
 	enter_lazy_tlb(mm, current);
 	/* We don't want this task to be frozen prematurely */
 	clear_freeze_flag(tsk);
+	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+		atomic_dec(&mm->oom_disable_count);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
diff --git a/kernel/fork.c b/kernel/fork.c
index c445f8cc408d..e87aaaaf5131 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -65,6 +65,7 @@
 #include <linux/perf_event.h>
 #include <linux/posix-timers.h>
 #include <linux/user-return-notifier.h>
+#include <linux/oom.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -488,6 +489,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	mm->cached_hole_size = ~0UL;
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
+	atomic_set(&mm->oom_disable_count, 0);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
@@ -741,6 +743,8 @@ good_mm:
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
 	mm->last_interval = 0;
+	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+		atomic_inc(&mm->oom_disable_count);
 
 	tsk->mm = mm;
 	tsk->active_mm = mm;
@@ -1299,8 +1303,13 @@ bad_fork_cleanup_io:
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
-	if (p->mm)
+	if (p->mm) {
+		task_lock(p);
+		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&p->mm->oom_disable_count);
+		task_unlock(p);
 		mmput(p->mm);
+	}
 bad_fork_cleanup_signal:
 	if (!(clone_flags & CLONE_THREAD))
 		free_signal_struct(p->signal);
@@ -1693,6 +1702,10 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 			active_mm = current->active_mm;
 			current->mm = new_mm;
 			current->active_mm = new_mm;
+			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+				atomic_dec(&mm->oom_disable_count);
+				atomic_inc(&new_mm->oom_disable_count);
+			}
 			activate_mm(active_mm, new_mm);
 			new_mm = mm;
 		}
-- 
cgit v1.2.3


From 1b430beee5e388605dfb092b214ef0320f752cf6 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 26 Oct 2010 14:21:26 -0700
Subject: writeback: remove nonblocking/encountered_congestion references

This removes more dead code that was somehow missed by commit 0d99519efef
(writeback: remove unused nonblocking and congestion checks).  There are
no behavior change except for the removal of two entries from one of the
ext4 tracing interface.

The nonblocking checks in ->writepages are no longer used because the
flusher now prefer to block on get_request_wait() than to skip inodes on
IO congestion.  The latter will lead to more seeky IO.

The nonblocking checks in ->writepage are no longer used because it's
redundant with the WB_SYNC_NONE check.

We no long set ->nonblocking in VM page out and page migration, because
a) it's effectively redundant with WB_SYNC_NONE in current code
b) it's old semantic of "Don't get stuck on request queues" is mis-behavior:
   that would skip some dirty inodes on congestion and page out others, which
   is unfair in terms of LRU age.

Inspired by Christoph Hellwig. Thanks!

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: David Howells <dhowells@redhat.com>
Cc: Sage Weil <sage@newdream.net>
Cc: Steve French <sfrench@samba.org>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/write.c                   | 19 +------------------
 fs/buffer.c                      |  2 +-
 fs/ceph/addr.c                   |  9 ---------
 fs/cifs/file.c                   | 10 ----------
 fs/gfs2/meta_io.c                |  2 +-
 fs/nfs/write.c                   |  4 +---
 fs/reiserfs/inode.c              |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c      |  3 +--
 include/trace/events/ext4.h      |  8 +++++---
 include/trace/events/writeback.h |  2 --
 mm/migrate.c                     |  1 -
 mm/vmscan.c                      |  1 -
 12 files changed, 11 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 722743b152d8..15690bb1d3b5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -438,7 +438,6 @@ no_more:
  */
 int afs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = page->mapping->backing_dev_info;
 	struct afs_writeback *wb;
 	int ret;
 
@@ -455,8 +454,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	wbc->nr_to_write -= ret;
-	if (wbc->nonblocking && bdi_write_congested(bdi))
-		wbc->encountered_congestion = 1;
 
 	_leave(" = 0");
 	return 0;
@@ -469,7 +466,6 @@ static int afs_writepages_region(struct address_space *mapping,
 				 struct writeback_control *wbc,
 				 pgoff_t index, pgoff_t end, pgoff_t *_next)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct afs_writeback *wb;
 	struct page *page;
 	int ret, n;
@@ -529,11 +525,6 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		wbc->nr_to_write -= ret;
 
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
-			wbc->encountered_congestion = 1;
-			break;
-		}
-
 		cond_resched();
 	} while (index < end && wbc->nr_to_write > 0);
 
@@ -548,24 +539,16 @@ static int afs_writepages_region(struct address_space *mapping,
 int afs_writepages(struct address_space *mapping,
 		   struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	pgoff_t start, end, next;
 	int ret;
 
 	_enter("");
 
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		_leave(" = 0 [congest]");
-		return 0;
-	}
-
 	if (wbc->range_cyclic) {
 		start = mapping->writeback_index;
 		end = -1;
 		ret = afs_writepages_region(mapping, wbc, start, end, &next);
-		if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
-		    !(wbc->nonblocking && wbc->encountered_congestion))
+		if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
 			ret = afs_writepages_region(mapping, wbc, 0, start,
 						    &next);
 		mapping->writeback_index = next;
diff --git a/fs/buffer.c b/fs/buffer.c
index 7f0b9b083f77..ec21a92e08b4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1706,7 +1706,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		 * and kswapd activity, but those code paths have their own
 		 * higher-level throttling.
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 51bcc5ce3230..e9c874abc9e1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -591,7 +591,6 @@ static int ceph_writepages_start(struct address_space *mapping,
 				 struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc;
 	pgoff_t index, start, end;
@@ -633,13 +632,6 @@ static int ceph_writepages_start(struct address_space *mapping,
 
 	pagevec_init(&pvec, 0);
 
-	/* ?? */
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		dout(" writepages congested\n");
-		wbc->encountered_congestion = 1;
-		goto out_final;
-	}
-
 	/* where to start/end? */
 	if (wbc->range_cyclic) {
 		start = mapping->writeback_index; /* Start from prev offset */
@@ -885,7 +877,6 @@ out:
 		rc = 0;  /* vfs expects us to return 0 */
 	ceph_put_snap_context(snapc);
 	dout("writepages done, rc = %d\n", rc);
-out_final:
 	return rc;
 }
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c81e7b14d53..45af003865d2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1303,7 +1303,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 static int cifs_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	unsigned int bytes_to_write;
 	unsigned int bytes_written;
 	struct cifs_sb_info *cifs_sb;
@@ -1326,15 +1325,6 @@ static int cifs_writepages(struct address_space *mapping,
 	int scanned = 0;
 	int xid, long_op;
 
-	/*
-	 * BB: Is this meaningful for a non-block-device file system?
-	 * If it is, we should test it again after we do I/O
-	 */
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		return 0;
-	}
-
 	cifs_sb = CIFS_SB(mapping->host->i_sb);
 
 	/*
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index f3b071f921aa..939739c7b3f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 		 * activity, but those code paths have their own higher-level
 		 * throttling.
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 605e292501f4..4c14c17a5276 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -290,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
 	nfs_pageio_cond_complete(pgio, page->index);
-	ret = nfs_page_async_flush(pgio, page,
-			wbc->sync_mode == WB_SYNC_NONE ||
-			wbc->nonblocking != 0);
+	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 	if (ret == -EAGAIN) {
 		redirty_page_for_writepage(wbc, page);
 		ret = 0;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index caa758377d66..c1f93896cb53 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2438,7 +2438,7 @@ static int reiserfs_write_full_page(struct page *page,
 		/* from this point on, we know the buffer is mapped to a
 		 * real block and not a direct item
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else {
 			if (!trylock_buffer(bh)) {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b552f816de15..c9af48fffcd7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1139,8 +1139,7 @@ xfs_vm_writepage(
 				type = IO_DELAY;
 				flags = BMAPI_ALLOCATE;
 
-				if (wbc->sync_mode == WB_SYNC_NONE &&
-				    wbc->nonblocking)
+				if (wbc->sync_mode == WB_SYNC_NONE)
 					flags |= BMAPI_TRYLOCK;
 			}
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 01e9e0076a92..6bcb00645de4 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -242,18 +242,20 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->pages_skipped	= wbc->pages_skipped;
 		__entry->range_start	= wbc->range_start;
 		__entry->range_end	= wbc->range_end;
-		__entry->nonblocking	= wbc->nonblocking;
 		__entry->for_kupdate	= wbc->for_kupdate;
 		__entry->for_reclaim	= wbc->for_reclaim;
 		__entry->range_cyclic	= wbc->range_cyclic;
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
+	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld "
+		  "range_start %llu range_end %llu "
+		  "for_kupdate %d for_reclaim %d "
+		  "range_cyclic %d writeback_index %lu",
 		  jbd2_dev_to_name(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
-		  __entry->range_end, __entry->nonblocking,
+		  __entry->range_end,
 		  __entry->for_kupdate, __entry->for_reclaim,
 		  __entry->range_cyclic,
 		  (unsigned long) __entry->writeback_index)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index f345f66ae9d1..0bb01ab2e984 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -96,8 +96,6 @@ DECLARE_EVENT_CLASS(wbc_class,
 		__field(long, nr_to_write)
 		__field(long, pages_skipped)
 		__field(int, sync_mode)
-		__field(int, nonblocking)
-		__field(int, encountered_congestion)
 		__field(int, for_kupdate)
 		__field(int, for_background)
 		__field(int, for_reclaim)
diff --git a/mm/migrate.c b/mm/migrate.c
index f8c9bccf2520..d917ac3207f5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -497,7 +497,6 @@ static int writeout(struct address_space *mapping, struct page *page)
 		.nr_to_write = 1,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
-		.nonblocking = 1,
 		.for_reclaim = 1
 	};
 	int rc;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b94c9464f262..6cbc1aac23ae 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -376,7 +376,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 			.nr_to_write = SWAP_CLUSTER_MAX,
 			.range_start = 0,
 			.range_end = LLONG_MAX,
-			.nonblocking = 1,
 			.for_reclaim = 1,
 		};
 
-- 
cgit v1.2.3


From f19e77a3dc884510dba740caa6dee126b7d40156 Mon Sep 17 00:00:00 2001
From: zeal <zealcook@gmail.com>
Date: Tue, 26 Oct 2010 14:21:27 -0700
Subject: include/linux/pageblock-flags.h: fix set_pageblock_flags() macro
 definiton

The presently-unused macro was missing one parameter.

Signed-off-by: zeal <zealcook@gmail.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pageblock-flags.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index e8c06122be36..19ef95d293ae 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -67,7 +67,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 
 #define get_pageblock_flags(page) \
 			get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
-#define set_pageblock_flags(page) \
-			set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
+#define set_pageblock_flags(page, flags) \
+			set_pageblock_flags_group(page, flags,	\
+						  0, NR_PAGEBLOCK_BITS-1)
 
 #endif	/* PAGEBLOCK_FLAGS_H */
-- 
cgit v1.2.3


From e4455abb50a19562dbfdc51a8424fda9b588bd6d Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Tue, 26 Oct 2010 14:21:28 -0700
Subject: mm: only build per-node scan_unevictable functions when NUMA is
 enabled

Non-NUMA systems do never create these files anyway, since they are only
created by driver subsystem when NUMA is configured.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 10 ++++++++++
 mm/vmscan.c          |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7cdd63366f88..eba53e71d2cc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -271,8 +271,18 @@ extern void scan_mapping_unevictable_pages(struct address_space *);
 extern unsigned long scan_unevictable_pages;
 extern int scan_unevictable_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
+#ifdef CONFIG_NUMA
 extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
+#else
+static inline int scan_unevictable_register_node(struct node *node)
+{
+	return 0;
+}
+static inline void scan_unevictable_unregister_node(struct node *node)
+{
+}
+#endif
 
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6cbc1aac23ae..f5871ee50000 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2986,6 +2986,7 @@ int scan_unevictable_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
+#ifdef CONFIG_NUMA
 /*
  * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
  * a specified node's per zone unevictable lists for evictable pages.
@@ -3032,4 +3033,4 @@ void scan_unevictable_unregister_node(struct node *node)
 {
 	sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
 }
-
+#endif
-- 
cgit v1.2.3


From 49ac825587f33afec8841b7fab2eb4db775014e6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 26 Oct 2010 14:21:30 -0700
Subject: memory hotplug: unify is_removable and offline detection code

Now, sysfs interface of memory hotplug shows whether the section is
removable or not.  But it checks only migrateype of pages and doesn't
check details of cluster of pages.

Next, memory hotplug's set_migratetype_isolate() has the same kind of
check, too.

This patch adds the function __count_unmovable_pages() and makes above 2
checks to use the same logic.  Then, is_removable and hotremove code uses
the same logic.  No changes in the hotremove logic itself.

TODO: need to find a way to check RECLAMABLE. But, considering bit,
      calling shrink_slab() against a range before starting memory hotremove
      sounds better. If so, this patch's logic doesn't need to be changed.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reported-by: Michal Hocko <mhocko@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  4 ++
 mm/memory_hotplug.c            | 17 +--------
 mm/page_alloc.c                | 87 ++++++++++++++++++++++++++++++++----------
 3 files changed, 72 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 864035fb8f8a..4307231bd22f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -70,6 +70,10 @@ extern void online_page(struct page *page);
 extern int online_pages(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+extern bool is_pageblock_removable_nolock(struct page *page);
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4821338b4e4b..b0dc65452973 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -602,27 +602,14 @@ static struct page *next_active_pageblock(struct page *page)
 /* Checks if this range of memory is likely to be hot-removable. */
 int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 {
-	int type;
 	struct page *page = pfn_to_page(start_pfn);
 	struct page *end_page = page + nr_pages;
 
 	/* Check the starting page of each pageblock within the range */
 	for (; page < end_page; page = next_active_pageblock(page)) {
-		type = get_pageblock_migratetype(page);
-
-		/*
-		 * A pageblock containing MOVABLE or free pages is considered
-		 * removable
-		 */
-		if (type != MIGRATE_MOVABLE && !pageblock_free(page))
-			return 0;
-
-		/*
-		 * A pageblock starting with a PageReserved page is not
-		 * considered removable.
-		 */
-		if (PageReserved(page))
+		if (!is_pageblock_removable_nolock(page))
 			return 0;
+		cond_resched();
 	}
 
 	/* All pageblocks in the memory block are likely to be hot-removable */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 099790052bfe..6a683f819439 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5297,12 +5297,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
  * page allocater never alloc memory from ISOLATE block.
  */
 
+static int
+__count_immobile_pages(struct zone *zone, struct page *page, int count)
+{
+	unsigned long pfn, iter, found;
+	/*
+	 * For avoiding noise data, lru_add_drain_all() should be called
+	 * If ZONE_MOVABLE, the zone never contains immobile pages
+	 */
+	if (zone_idx(zone) == ZONE_MOVABLE)
+		return true;
+
+	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+		return true;
+
+	pfn = page_to_pfn(page);
+	for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+		unsigned long check = pfn + iter;
+
+		if (!pfn_valid_within(check)) {
+			iter++;
+			continue;
+		}
+		page = pfn_to_page(check);
+		if (!page_count(page)) {
+			if (PageBuddy(page))
+				iter += (1 << page_order(page)) - 1;
+			continue;
+		}
+		if (!PageLRU(page))
+			found++;
+		/*
+		 * If there are RECLAIMABLE pages, we need to check it.
+		 * But now, memory offline itself doesn't call shrink_slab()
+		 * and it still to be fixed.
+		 */
+		/*
+		 * If the page is not RAM, page_count()should be 0.
+		 * we don't need more check. This is an _used_ not-movable page.
+		 *
+		 * The problematic thing here is PG_reserved pages. PG_reserved
+		 * is set to both of a memory hole page and a _used_ kernel
+		 * page at boot.
+		 */
+		if (found > count)
+			return false;
+	}
+	return true;
+}
+
+bool is_pageblock_removable_nolock(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	return __count_immobile_pages(zone, page, 0);
+}
+
 int set_migratetype_isolate(struct page *page)
 {
 	struct zone *zone;
-	struct page *curr_page;
-	unsigned long flags, pfn, iter;
-	unsigned long immobile = 0;
+	unsigned long flags, pfn;
 	struct memory_isolate_notify arg;
 	int notifier_ret;
 	int ret = -EBUSY;
@@ -5312,11 +5365,6 @@ int set_migratetype_isolate(struct page *page)
 	zone_idx = zone_idx(zone);
 
 	spin_lock_irqsave(&zone->lock, flags);
-	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
-	    zone_idx == ZONE_MOVABLE) {
-		ret = 0;
-		goto out;
-	}
 
 	pfn = page_to_pfn(page);
 	arg.start_pfn = pfn;
@@ -5338,21 +5386,18 @@ int set_migratetype_isolate(struct page *page)
 	notifier_ret = notifier_to_errno(notifier_ret);
 	if (notifier_ret)
 		goto out;
-
-	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
-		if (!pfn_valid_within(pfn))
-			continue;
-
-		curr_page = pfn_to_page(iter);
-		if (!page_count(curr_page) || PageLRU(curr_page))
-			continue;
-
-		immobile++;
-	}
-
-	if (arg.pages_found == immobile)
+	/*
+	 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
+	 * We just check MOVABLE pages.
+	 */
+	if (__count_immobile_pages(zone, page, arg.pages_found))
 		ret = 0;
 
+	/*
+	 * immobile means "not-on-lru" paes. If immobile is larger than
+	 * removable-by-driver pages reported by notifier, we'll fail.
+	 */
+
 out:
 	if (!ret) {
 		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-- 
cgit v1.2.3


From f629d1c9bd0dbc44a6c4f9a4a67d1646c42bfc6f Mon Sep 17 00:00:00 2001
From: Michael Rubin <mrubin@google.com>
Date: Tue, 26 Oct 2010 14:21:33 -0700
Subject: mm: add account_page_writeback()

To help developers and applications gain visibility into writeback
behaviour this patch adds two counters to /proc/vmstat.

  # grep nr_dirtied /proc/vmstat
  nr_dirtied 3747
  # grep nr_written /proc/vmstat
  nr_written 3618

These entries allow user apps to understand writeback behaviour over time
and learn how it is impacting their performance.  Currently there is no
way to inspect dirty and writeback speed over time.  It's not possible for
nr_dirty/nr_writeback.

These entries are necessary to give visibility into writeback behaviour.
We have /proc/diskstats which lets us understand the io in the block
layer.  We have blktrace for more in depth understanding.  We have
e2fsprogs and debugsfs to give insight into the file systems behaviour,
but we don't offer our users the ability understand what writeback is
doing.  There is no way to know how active it is over the whole system, if
it's falling behind or to quantify it's efforts.  With these values
exported users can easily see how much data applications are sending
through writeback and also at what rates writeback is processing this
data.  Comparing the rates of change between the two allow developers to
see when writeback is not able to keep up with incoming traffic and the
rate of dirty memory being sent to the IO back end.  This allows folks to
understand their io workloads and track kernel issues.  Non kernel
engineers at Google often use these counters to solve puzzling performance
problems.

Patch #4 adds a pernode vmstat file with nr_dirtied and nr_written

Patch #5 add writeback thresholds to /proc/vmstat

Currently these values are in debugfs. But they should be promoted to
/proc since they are useful for developers who are writing databases
and file servers and are not debugging the kernel.

The output is as below:

 # grep threshold /proc/vmstat
 nr_pages_dirty_threshold 409111
 nr_pages_dirty_background_threshold 818223

This patch:

This allows code outside of the mm core to safely manipulate page
writeback state and not worry about the other accounting.  Not using these
routines means that some code will lose track of the accounting and we get
bugs.

Modify nilfs2 to use interface.

Signed-off-by: Michael Rubin <mrubin@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
Cc: Jiro SEKIBA <jir@unicus.jp>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/segment.c |  2 +-
 include/linux/mm.h  |  1 +
 mm/page-writeback.c | 13 ++++++++++++-
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index d926af626177..687d090cea34 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1609,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (!TestSetPageWriteback(clone_page))
-		inc_zone_page_state(clone_page, NR_WRITEBACK);
+		account_page_writeback(clone_page);
 	unlock_page(clone_page);
 
 	return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a4c66846fb8f..c36297faf7cb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -868,6 +868,7 @@ int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_dirtied(struct page *page, struct address_space *mapping);
+void account_page_writeback(struct page *page);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e3bccac1f025..94159819a651 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1128,6 +1128,17 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 }
 EXPORT_SYMBOL(account_page_dirtied);
 
+/*
+ * Helper function for set_page_writeback family.
+ * NOTE: Unlike account_page_dirtied this does not rely on being atomic
+ * wrt interrupts.
+ */
+void account_page_writeback(struct page *page)
+{
+	inc_zone_page_state(page, NR_WRITEBACK);
+}
+EXPORT_SYMBOL(account_page_writeback);
+
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
  * its radix tree.
@@ -1366,7 +1377,7 @@ int test_set_page_writeback(struct page *page)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret)
-		inc_zone_page_state(page, NR_WRITEBACK);
+		account_page_writeback(page);
 	return ret;
 
 }
-- 
cgit v1.2.3


From ea941f0e2a8c02ae876cd73deb4e1557248f258c Mon Sep 17 00:00:00 2001
From: Michael Rubin <mrubin@google.com>
Date: Tue, 26 Oct 2010 14:21:35 -0700
Subject: writeback: add nr_dirtied and nr_written to /proc/vmstat

To help developers and applications gain visibility into writeback
behaviour adding two entries to vm_stat_items and /proc/vmstat.  This will
allow us to track the "written" and "dirtied" counts.

   # grep nr_dirtied /proc/vmstat
   nr_dirtied 3747
   # grep nr_written /proc/vmstat
   nr_written 3618

Signed-off-by: Michael Rubin <mrubin@google.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 ++
 mm/page-writeback.c    | 2 ++
 mm/vmstat.c            | 3 +++
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3984c4eb41fd..c3c17fb675ee 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -104,6 +104,8 @@ enum zone_stat_item {
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
+	NR_DIRTIED,		/* page dirtyings since bootup */
+	NR_WRITTEN,		/* page writings since bootup */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 94159819a651..4dd91f7fd39f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1121,6 +1121,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 {
 	if (mapping_cap_account_dirty(mapping)) {
 		__inc_zone_page_state(page, NR_FILE_DIRTY);
+		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
 		task_dirty_inc(current);
 		task_io_account_write(PAGE_CACHE_SIZE);
@@ -1136,6 +1137,7 @@ EXPORT_SYMBOL(account_page_dirtied);
 void account_page_writeback(struct page *page)
 {
 	inc_zone_page_state(page, NR_WRITEBACK);
+	inc_zone_page_state(page, NR_WRITTEN);
 }
 EXPORT_SYMBOL(account_page_writeback);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 355a9e669aaa..44e7ac0fdb66 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -745,6 +745,9 @@ static const char * const vmstat_text[] = {
 	"nr_isolated_anon",
 	"nr_isolated_file",
 	"nr_shmem",
+	"nr_dirtied",
+	"nr_written",
+
 #ifdef CONFIG_NUMA
 	"numa_hit",
 	"numa_miss",
-- 
cgit v1.2.3


From bce54bbfde07e8b300f39dae14756c12a6ceca65 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Oct 2010 14:21:37 -0700
Subject: mm: fix typo in mm.h when NODE_NOT_IN_PAGE_FLAGS

NODE_NOT_IN_PAGE_FLAGS is defined in mm.h when the node information is not
stored in the page flags bitmap.

Unfortunately, there's a typo in one of the checks for it.  This patch
fixes it (s/NODE_NOT_IN_PAGEFLAGS/NODE_NOT_IN_PAGE_FLAGS/).  Since this
has been around for ages, I doubt it's been causing any serious problems.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c36297faf7cb..2862009f9573 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -497,8 +497,8 @@ static inline void set_compound_order(struct page *page, unsigned long order)
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
 
-/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */
-#ifdef NODE_NOT_IN_PAGEFLAGS
+/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
+#ifdef NODE_NOT_IN_PAGE_FLAGS
 #define ZONEID_SHIFT		(SECTIONS_SHIFT + ZONES_SHIFT)
 #define ZONEID_PGOFF		((SECTIONS_PGOFF < ZONES_PGOFF)? \
 						SECTIONS_PGOFF : ZONES_PGOFF)
-- 
cgit v1.2.3


From e11da5b4fdf01d71d73c21cb92b00595b917d7fd Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 26 Oct 2010 14:21:40 -0700
Subject: tracing, vmscan: add trace events for LRU list shrinking

There have been numerous reports of stalls that pointed at the problem
being somewhere in the VM.  There are multiple roots to the problems which
means dealing with any of the root problems in isolation is tricky to
justify on their own and they would still need integration testing.  This
patch series puts together two different patch sets which in combination
should tackle some of the root causes of latency problems being reported.

Patch 1 adds a tracepoint for shrink_inactive_list.  For this series, the
most important results is being able to calculate the scanning/reclaim
ratio as a measure of the amount of work being done by page reclaim.

Patch 2 accounts for time spent in congestion_wait.

Patches 3-6 were originally developed by Kosaki Motohiro but reworked for
this series.  It has been noted that lumpy reclaim is far too aggressive
and trashes the system somewhat.  As SLUB uses high-order allocations, a
large cost incurred by lumpy reclaim will be noticeable.  It was also
reported during transparent hugepage support testing that lumpy reclaim
was trashing the system and these patches should mitigate that problem
without disabling lumpy reclaim.

Patch 7 adds wait_iff_congested() and replaces some callers of
congestion_wait().  wait_iff_congested() only sleeps if there is a BDI
that is currently congested.  Patch 8 notes that any BDI being congested
is not necessarily a problem because there could be multiple BDIs of
varying speeds and numberous zones.  It attempts to track when a zone
being reclaimed contains many pages backed by a congested BDI and if so,
reclaimers wait on the congestion queue.

I ran a number of tests with monitoring on X86, X86-64 and PPC64. Each
machine had 3G of RAM and the CPUs were

X86:    Intel P4 2-core
X86-64: AMD Phenom 4-core
PPC64:  PPC970MP

Each used a single disk and the onboard IO controller.  Dirty ratio was
left at 20.  I'm just going to report for X86-64 and PPC64 in a vague
attempt to keep this report short.  Four kernels were tested each based on
v2.6.36-rc4

traceonly-v2r2:     Patches 1 and 2 to instrument vmscan reclaims and congestion_wait
lowlumpy-v2r3:      Patches 1-6 to test if lumpy reclaim is better
waitcongest-v2r3:   Patches 1-7 to only wait on congestion
waitwriteback-v2r4: Patches 1-8 to detect when a zone is congested

nocongest-v1r5: Patches 1-3 for testing wait_iff_congestion
nodirect-v1r5:  Patches 1-10 to disable filesystem writeback for better IO

The tests run were as follows

kernbench
	compile-based benchmark. Smoke test performance

sysbench
	OLTP read-only benchmark. Will be re-run in the future as read-write

micro-mapped-file-stream
	This is a micro-benchmark from Johannes Weiner that accesses a
	large sparse-file through mmap(). It was configured to run in only
	single-CPU mode but can be indicative of how well page reclaim
	identifies suitable pages.

stress-highalloc
	Tries to allocate huge pages under heavy load.

kernbench, iozone and sysbench did not report any performance regression
on any machine.  sysbench did pressure the system lightly and there was
reclaim activity but there were no difference of major interest between
the kernels.

X86-64 micro-mapped-file-stream

                                      traceonly-v2r2           lowlumpy-v2r3        waitcongest-v2r3     waitwriteback-v2r4
pgalloc_dma                       1639.00 (   0.00%)       667.00 (-145.73%)      1167.00 ( -40.45%)       578.00 (-183.56%)
pgalloc_dma32                  2842410.00 (   0.00%)   2842626.00 (   0.01%)   2843043.00 (   0.02%)   2843014.00 (   0.02%)
pgalloc_normal                       0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgsteal_dma                        729.00 (   0.00%)        85.00 (-757.65%)       609.00 ( -19.70%)       125.00 (-483.20%)
pgsteal_dma32                  2338721.00 (   0.00%)   2447354.00 (   4.44%)   2429536.00 (   3.74%)   2436772.00 (   4.02%)
pgsteal_normal                       0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgscan_kswapd_dma                 1469.00 (   0.00%)       532.00 (-176.13%)      1078.00 ( -36.27%)       220.00 (-567.73%)
pgscan_kswapd_dma32            4597713.00 (   0.00%)   4503597.00 (  -2.09%)   4295673.00 (  -7.03%)   3891686.00 ( -18.14%)
pgscan_kswapd_normal                 0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgscan_direct_dma                   71.00 (   0.00%)       134.00 (  47.01%)       243.00 (  70.78%)       352.00 (  79.83%)
pgscan_direct_dma32             305820.00 (   0.00%)    280204.00 (  -9.14%)    600518.00 (  49.07%)    957485.00 (  68.06%)
pgscan_direct_normal                 0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pageoutrun                       16296.00 (   0.00%)     21254.00 (  23.33%)     18447.00 (  11.66%)     20067.00 (  18.79%)
allocstall                         443.00 (   0.00%)       273.00 ( -62.27%)       513.00 (  13.65%)      1568.00 (  71.75%)

These are based on the raw figures taken from /proc/vmstat.  It's a rough
measure of reclaim activity.  Note that allocstall counts are higher
because we are entering direct reclaim more often as a result of not
sleeping in congestion.  In itself, it's not necessarily a bad thing.
It's easier to get a view of what happened from the vmscan tracepoint
report.

FTrace Reclaim Statistics: vmscan

                                traceonly-v2r2   lowlumpy-v2r3 waitcongest-v2r3 waitwriteback-v2r4
Direct reclaims                                443        273        513       1568
Direct reclaim pages scanned                305968     280402     600825     957933
Direct reclaim pages reclaimed               43503      19005      30327     117191
Direct reclaim write file async I/O              0          0          0          0
Direct reclaim write anon async I/O              0          3          4         12
Direct reclaim write file sync I/O               0          0          0          0
Direct reclaim write anon sync I/O               0          0          0          0
Wake kswapd requests                        187649     132338     191695     267701
Kswapd wakeups                                   3          1          4          1
Kswapd pages scanned                       4599269    4454162    4296815    3891906
Kswapd pages reclaimed                     2295947    2428434    2399818    2319706
Kswapd reclaim write file async I/O              1          0          1          1
Kswapd reclaim write anon async I/O             59        187         41        222
Kswapd reclaim write file sync I/O               0          0          0          0
Kswapd reclaim write anon sync I/O               0          0          0          0
Time stalled direct reclaim (seconds)         4.34       2.52       6.63       2.96
Time kswapd awake (seconds)                  11.15      10.25      11.01      10.19

Total pages scanned                        4905237   4734564   4897640   4849839
Total pages reclaimed                      2339450   2447439   2430145   2436897
%age total pages scanned/reclaimed          47.69%    51.69%    49.62%    50.25%
%age total pages scanned/written             0.00%     0.00%     0.00%     0.00%
%age  file pages scanned/written             0.00%     0.00%     0.00%     0.00%
Percentage Time Spent Direct Reclaim        29.23%    19.02%    38.48%    20.25%
Percentage Time kswapd Awake                78.58%    78.85%    76.83%    79.86%

What is interesting here for nocongest in particular is that while direct
reclaim scans more pages, the overall number of pages scanned remains the
same and the ratio of pages scanned to pages reclaimed is more or less the
same.  In other words, while we are sleeping less, reclaim is not doing
more work and as direct reclaim and kswapd is awake for less time, it
would appear to be doing less work.

FTrace Reclaim Statistics: congestion_wait
Direct number congest     waited                87        196         64          0
Direct time   congest     waited            4604ms     4732ms     5420ms        0ms
Direct full   congest     waited                72        145         53          0
Direct number conditional waited                 0          0        324       1315
Direct time   conditional waited               0ms        0ms        0ms        0ms
Direct full   conditional waited                 0          0          0          0
KSwapd number congest     waited                20         10         15          7
KSwapd time   congest     waited            1264ms      536ms      884ms      284ms
KSwapd full   congest     waited                10          4          6          2
KSwapd number conditional waited                 0          0          0          0
KSwapd time   conditional waited               0ms        0ms        0ms        0ms
KSwapd full   conditional waited                 0          0          0          0

The vanilla kernel spent 8 seconds asleep in direct reclaim and no time at
all asleep with the patches.

MMTests Statistics: duration
User/Sys Time Running Test (seconds)         10.51     10.73      10.6     11.66
Total Elapsed Time (seconds)                 14.19     13.00     14.33     12.76

Overall, the tests completed faster. It is interesting to note that backing off further
when a zone is congested and not just a BDI was more efficient overall.

PPC64 micro-mapped-file-stream
pgalloc_dma                    3024660.00 (   0.00%)   3027185.00 (   0.08%)   3025845.00 (   0.04%)   3026281.00 (   0.05%)
pgalloc_normal                       0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgsteal_dma                    2508073.00 (   0.00%)   2565351.00 (   2.23%)   2463577.00 (  -1.81%)   2532263.00 (   0.96%)
pgsteal_normal                       0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgscan_kswapd_dma              4601307.00 (   0.00%)   4128076.00 ( -11.46%)   3912317.00 ( -17.61%)   3377165.00 ( -36.25%)
pgscan_kswapd_normal                 0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pgscan_direct_dma               629825.00 (   0.00%)    971622.00 (  35.18%)   1063938.00 (  40.80%)   1711935.00 (  63.21%)
pgscan_direct_normal                 0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)         0.00 (   0.00%)
pageoutrun                       27776.00 (   0.00%)     20458.00 ( -35.77%)     18763.00 ( -48.04%)     18157.00 ( -52.98%)
allocstall                         977.00 (   0.00%)      2751.00 (  64.49%)      2098.00 (  53.43%)      5136.00 (  80.98%)

Similar trends to x86-64. allocstalls are up but it's not necessarily bad.

FTrace Reclaim Statistics: vmscan
Direct reclaims                                977       2709       2098       5136
Direct reclaim pages scanned                629825     963814    1063938    1711935
Direct reclaim pages reclaimed               75550     242538     150904     387647
Direct reclaim write file async I/O              0          0          0          2
Direct reclaim write anon async I/O              0         10          0          4
Direct reclaim write file sync I/O               0          0          0          0
Direct reclaim write anon sync I/O               0          0          0          0
Wake kswapd requests                        392119    1201712     571935     571921
Kswapd wakeups                                   3          2          3          3
Kswapd pages scanned                       4601307    4128076    3912317    3377165
Kswapd pages reclaimed                     2432523    2318797    2312673    2144616
Kswapd reclaim write file async I/O             20          1          1          1
Kswapd reclaim write anon async I/O             57        132         11        121
Kswapd reclaim write file sync I/O               0          0          0          0
Kswapd reclaim write anon sync I/O               0          0          0          0
Time stalled direct reclaim (seconds)         6.19       7.30      13.04      10.88
Time kswapd awake (seconds)                  21.73      26.51      25.55      23.90

Total pages scanned                        5231132   5091890   4976255   5089100
Total pages reclaimed                      2508073   2561335   2463577   2532263
%age total pages scanned/reclaimed          47.95%    50.30%    49.51%    49.76%
%age total pages scanned/written             0.00%     0.00%     0.00%     0.00%
%age  file pages scanned/written             0.00%     0.00%     0.00%     0.00%
Percentage Time Spent Direct Reclaim        18.89%    20.65%    32.65%    27.65%
Percentage Time kswapd Awake                72.39%    80.68%    78.21%    77.40%

Again, a similar trend that the congestion_wait changes mean that direct
reclaim scans more pages but the overall number of pages scanned while
slightly reduced, are very similar.  The ratio of scanning/reclaimed
remains roughly similar.  The downside is that kswapd and direct reclaim
was awake longer and for a larger percentage of the overall workload.
It's possible there were big differences in the amount of time spent
reclaiming slab pages between the different kernels which is plausible
considering that the micro tests runs after fsmark and sysbench.

Trace Reclaim Statistics: congestion_wait
Direct number congest     waited               845       1312        104          0
Direct time   congest     waited           19416ms    26560ms     7544ms        0ms
Direct full   congest     waited               745       1105         72          0
Direct number conditional waited                 0          0       1322       2935
Direct time   conditional waited               0ms        0ms       12ms      312ms
Direct full   conditional waited                 0          0          0          3
KSwapd number congest     waited                39        102         75         63
KSwapd time   congest     waited            2484ms     6760ms     5756ms     3716ms
KSwapd full   congest     waited                20         48         46         25
KSwapd number conditional waited                 0          0          0          0
KSwapd time   conditional waited               0ms        0ms        0ms        0ms
KSwapd full   conditional waited                 0          0          0          0

The vanilla kernel spent 20 seconds asleep in direct reclaim and only
312ms asleep with the patches.  The time kswapd spent congest waited was
also reduced by a large factor.

MMTests Statistics: duration
ser/Sys Time Running Test (seconds)         26.58     28.05      26.9     28.47
Total Elapsed Time (seconds)                 30.02     32.86     32.67     30.88

With all patches applies, the completion times are very similar.

X86-64 STRESS-HIGHALLOC
                traceonly-v2r2     lowlumpy-v2r3  waitcongest-v2r3waitwriteback-v2r4
Pass 1          82.00 ( 0.00%)    84.00 ( 2.00%)    85.00 ( 3.00%)    85.00 ( 3.00%)
Pass 2          90.00 ( 0.00%)    87.00 (-3.00%)    88.00 (-2.00%)    89.00 (-1.00%)
At Rest         92.00 ( 0.00%)    90.00 (-2.00%)    90.00 (-2.00%)    91.00 (-1.00%)

Success figures across the board are broadly similar.

                traceonly-v2r2     lowlumpy-v2r3  waitcongest-v2r3waitwriteback-v2r4
Direct reclaims                               1045        944        886        887
Direct reclaim pages scanned                135091     119604     109382     101019
Direct reclaim pages reclaimed               88599      47535      47863      46671
Direct reclaim write file async I/O            494        283        465        280
Direct reclaim write anon async I/O          29357      13710      16656      13462
Direct reclaim write file sync I/O             154          2          2          3
Direct reclaim write anon sync I/O           14594        571        509        561
Wake kswapd requests                          7491        933        872        892
Kswapd wakeups                                 814        778        731        780
Kswapd pages scanned                       7290822   15341158   11916436   13703442
Kswapd pages reclaimed                     3587336    3142496    3094392    3187151
Kswapd reclaim write file async I/O          91975      32317      28022      29628
Kswapd reclaim write anon async I/O        1992022     789307     829745     849769
Kswapd reclaim write file sync I/O               0          0          0          0
Kswapd reclaim write anon sync I/O               0          0          0          0
Time stalled direct reclaim (seconds)      4588.93    2467.16    2495.41    2547.07
Time kswapd awake (seconds)                2497.66    1020.16    1098.06    1176.82

Total pages scanned                        7425913  15460762  12025818  13804461
Total pages reclaimed                      3675935   3190031   3142255   3233822
%age total pages scanned/reclaimed          49.50%    20.63%    26.13%    23.43%
%age total pages scanned/written            28.66%     5.41%     7.28%     6.47%
%age  file pages scanned/written             1.25%     0.21%     0.24%     0.22%
Percentage Time Spent Direct Reclaim        57.33%    42.15%    42.41%    42.99%
Percentage Time kswapd Awake                43.56%    27.87%    29.76%    31.25%

Scanned/reclaimed ratios again look good with big improvements in
efficiency.  The Scanned/written ratios also look much improved.  With a
better scanned/written ration, there is an expectation that IO would be
more efficient and indeed, the time spent in direct reclaim is much
reduced by the full series and kswapd spends a little less time awake.

Overall, indications here are that allocations were happening much faster
and this can be seen with a graph of the latency figures as the
allocations were taking place
http://www.csn.ul.ie/~mel/postings/vmscanreduce-20101509/highalloc-interlatency-hydra-mean.ps

FTrace Reclaim Statistics: congestion_wait
Direct number congest     waited              1333        204        169          4
Direct time   congest     waited           78896ms     8288ms     7260ms      200ms
Direct full   congest     waited               756         92         69          2
Direct number conditional waited                 0          0         26        186
Direct time   conditional waited               0ms        0ms        0ms     2504ms
Direct full   conditional waited                 0          0          0         25
KSwapd number congest     waited                 4        395        227        282
KSwapd time   congest     waited             384ms    25136ms    10508ms    18380ms
KSwapd full   congest     waited                 3        232         98        176
KSwapd number conditional waited                 0          0          0          0
KSwapd time   conditional waited               0ms        0ms        0ms        0ms
KSwapd full   conditional waited                 0          0          0          0
KSwapd full   conditional waited               318          0        312          9

Overall, the time spent speeping is reduced.  kswapd is still hitting
congestion_wait() but that is because there are callers remaining where it
wasn't clear in advance if they should be changed to wait_iff_congested()
or not.  Overall the sleep imes are reduced though - from 79ish seconds to
about 19.

MMTests Statistics: duration
User/Sys Time Running Test (seconds)       3415.43   3386.65   3388.39    3377.5
Total Elapsed Time (seconds)               5733.48   3660.33   3689.41   3765.39

With the full series, the time to complete the tests are reduced by 30%

PPC64 STRESS-HIGHALLOC
                traceonly-v2r2     lowlumpy-v2r3  waitcongest-v2r3waitwriteback-v2r4
Pass 1          17.00 ( 0.00%)    34.00 (17.00%)    38.00 (21.00%)    43.00 (26.00%)
Pass 2          25.00 ( 0.00%)    37.00 (12.00%)    42.00 (17.00%)    46.00 (21.00%)
At Rest         49.00 ( 0.00%)    43.00 (-6.00%)    45.00 (-4.00%)    51.00 ( 2.00%)

Success rates there are *way* up particularly considering that the 16MB
huge pages on PPC64 mean that it's always much harder to allocate them.

FTrace Reclaim Statistics: vmscan
              stress-highalloc  stress-highalloc  stress-highalloc  stress-highalloc
                traceonly-v2r2     lowlumpy-v2r3  waitcongest-v2r3waitwriteback-v2r4
Direct reclaims                                499        505        564        509
Direct reclaim pages scanned                223478      41898      51818      45605
Direct reclaim pages reclaimed              137730      21148      27161      23455
Direct reclaim write file async I/O            399        136        162        136
Direct reclaim write anon async I/O          46977       2865       4686       3998
Direct reclaim write file sync I/O              29          0          1          3
Direct reclaim write anon sync I/O           31023        159        237        239
Wake kswapd requests                           420        351        360        326
Kswapd wakeups                                 185        294        249        277
Kswapd pages scanned                      15703488   16392500   17821724   17598737
Kswapd pages reclaimed                     5808466    2908858    3139386    3145435
Kswapd reclaim write file async I/O         159938      18400      18717      13473
Kswapd reclaim write anon async I/O        3467554     228957     322799     234278
Kswapd reclaim write file sync I/O               0          0          0          0
Kswapd reclaim write anon sync I/O               0          0          0          0
Time stalled direct reclaim (seconds)      9665.35    1707.81    2374.32    1871.23
Time kswapd awake (seconds)                9401.21    1367.86    1951.75    1328.88

Total pages scanned                       15926966  16434398  17873542  17644342
Total pages reclaimed                      5946196   2930006   3166547   3168890
%age total pages scanned/reclaimed          37.33%    17.83%    17.72%    17.96%
%age total pages scanned/written            23.27%     1.52%     1.94%     1.43%
%age  file pages scanned/written             1.01%     0.11%     0.11%     0.08%
Percentage Time Spent Direct Reclaim        44.55%    35.10%    41.42%    36.91%
Percentage Time kswapd Awake                86.71%    43.58%    52.67%    41.14%

While the scanning rates are slightly up, the scanned/reclaimed and
scanned/written figures are much improved.  The time spent in direct
reclaim and with kswapd are massively reduced, mostly by the lowlumpy
patches.

FTrace Reclaim Statistics: congestion_wait
Direct number congest     waited               725        303        126          3
Direct time   congest     waited           45524ms     9180ms     5936ms      300ms
Direct full   congest     waited               487        190         52          3
Direct number conditional waited                 0          0        200        301
Direct time   conditional waited               0ms        0ms        0ms     1904ms
Direct full   conditional waited                 0          0          0         19
KSwapd number congest     waited                 0          2         23          4
KSwapd time   congest     waited               0ms      200ms      420ms      404ms
KSwapd full   congest     waited                 0          2          2          4
KSwapd number conditional waited                 0          0          0          0
KSwapd time   conditional waited               0ms        0ms        0ms        0ms
KSwapd full   conditional waited                 0          0          0          0

Not as dramatic a story here but the time spent asleep is reduced and we
can still see what wait_iff_congested is going to sleep when necessary.

MMTests Statistics: duration
User/Sys Time Running Test (seconds)      12028.09   3157.17   3357.79   3199.16
Total Elapsed Time (seconds)              10842.07   3138.72   3705.54   3229.85

The time to complete this test goes way down.  With the full series, we
are allocating over twice the number of huge pages in 30% of the time and
there is a corresponding impact on the allocation latency graph available
at.

http://www.csn.ul.ie/~mel/postings/vmscanreduce-20101509/highalloc-interlatency-powyah-mean.ps

This patch:

Add a trace event for shrink_inactive_list() and updates the sample
postprocessing script appropriately.  It can be used to determine how many
pages were reclaimed and for non-lumpy reclaim where exactly the pages
were reclaimed from.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../trace/postprocess/trace-vmscan-postprocess.pl  | 39 ++++++++++++++------
 include/trace/events/vmscan.h                      | 42 ++++++++++++++++++++++
 mm/vmscan.c                                        |  6 ++++
 3 files changed, 77 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 1b55146d1c8d..b3e73ddb1567 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -46,7 +46,7 @@ use constant HIGH_KSWAPD_LATENCY		=> 20;
 use constant HIGH_KSWAPD_REWAKEUP		=> 21;
 use constant HIGH_NR_SCANNED			=> 22;
 use constant HIGH_NR_TAKEN			=> 23;
-use constant HIGH_NR_RECLAIM			=> 24;
+use constant HIGH_NR_RECLAIMED			=> 24;
 use constant HIGH_NR_CONTIG_DIRTY		=> 25;
 
 my %perprocesspid;
@@ -58,11 +58,13 @@ my $opt_read_procstat;
 my $total_wakeup_kswapd;
 my ($total_direct_reclaim, $total_direct_nr_scanned);
 my ($total_direct_latency, $total_kswapd_latency);
+my ($total_direct_nr_reclaimed);
 my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
 my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
 my ($total_kswapd_nr_scanned, $total_kswapd_wake);
 my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
 my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
+my ($total_kswapd_nr_reclaimed);
 
 # Catch sigint and exit on request
 my $sigint_report = 0;
@@ -104,7 +106,7 @@ my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)';
 my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
 my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)';
 my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_taken=([0-9]*) contig_taken=([0-9]*) contig_dirty=([0-9]*) contig_failed=([0-9]*)';
-my $regex_lru_shrink_inactive_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*)';
+my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) zid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
 my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
 my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)';
 
@@ -203,8 +205,8 @@ $regex_lru_shrink_inactive = generate_traceevent_regex(
 			"vmscan/mm_vmscan_lru_shrink_inactive",
 			$regex_lru_shrink_inactive_default,
 			"nid", "zid",
-			"lru",
-			"nr_scanned", "nr_reclaimed", "priority");
+			"nr_scanned", "nr_reclaimed", "priority",
+			"flags");
 $regex_lru_shrink_active = generate_traceevent_regex(
 			"vmscan/mm_vmscan_lru_shrink_active",
 			$regex_lru_shrink_active_default,
@@ -375,6 +377,16 @@ EVENT_PROCESS:
 			my $nr_contig_dirty = $7;
 			$perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
 			$perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
+		} elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
+			$details = $5;
+			if ($details !~ /$regex_lru_shrink_inactive/o) {
+				print "WARNING: Failed to parse mm_vmscan_lru_shrink_inactive as expected\n";
+				print "         $details\n";
+				print "         $regex_lru_shrink_inactive/o\n";
+				next;
+			}
+			my $nr_reclaimed = $4;
+			$perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
 		} elsif ($tracepoint eq "mm_vmscan_writepage") {
 			$details = $5;
 			if ($details !~ /$regex_writepage/o) {
@@ -464,8 +476,8 @@ sub dump_stats {
 
 	# Print out process activity
 	printf("\n");
-	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s %8s\n", "Process", "Direct",  "Wokeup", "Pages",   "Pages",   "Pages",     "Time");
-	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s %8s\n", "details", "Rclms",   "Kswapd", "Scanned", "Sync-IO", "ASync-IO",  "Stalled");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s  %8s %8s %8s %8s\n", "Process", "Direct",  "Wokeup", "Pages",   "Pages",   "Pages",   "Pages",     "Time");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s  %8s %8s %8s %8s\n", "details", "Rclms",   "Kswapd", "Scanned", "Rclmed",  "Sync-IO", "ASync-IO",  "Stalled");
 	foreach $process_pid (keys %stats) {
 
 		if (!$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}) {
@@ -475,6 +487,7 @@ sub dump_stats {
 		$total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
 		$total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
 		$total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+		$total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
 		$total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
 		$total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
 		$total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -489,11 +502,12 @@ sub dump_stats {
 			$index++;
 		}
 
-		printf("%-" . $max_strlen . "s %8d %10d   %8u   %8u %8u %8.3f",
+		printf("%-" . $max_strlen . "s %8d %10d   %8u %8u  %8u %8u %8.3f",
 			$process_pid,
 			$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
 			$stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
 			$stats{$process_pid}->{HIGH_NR_SCANNED},
+			$stats{$process_pid}->{HIGH_NR_RECLAIMED},
 			$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
 			$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
 			$this_reclaim_delay / 1000);
@@ -529,8 +543,8 @@ sub dump_stats {
 
 	# Print out kswapd activity
 	printf("\n");
-	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s\n", "Kswapd",   "Kswapd",  "Order",     "Pages",   "Pages",  "Pages");
-	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s\n", "Instance", "Wakeups", "Re-wakeup", "Scanned", "Sync-IO", "ASync-IO");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s\n", "Kswapd",   "Kswapd",  "Order",     "Pages",   "Pages",   "Pages",  "Pages");
+	printf("%-" . $max_strlen . "s %8s %10s   %8s   %8s %8s %8s\n", "Instance", "Wakeups", "Re-wakeup", "Scanned", "Rclmed",  "Sync-IO", "ASync-IO");
 	foreach $process_pid (keys %stats) {
 
 		if (!$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}) {
@@ -539,16 +553,18 @@ sub dump_stats {
 
 		$total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
 		$total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+		$total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
 		$total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
 		$total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
 		$total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
 		$total_kswapd_writepage_anon_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC};
 
-		printf("%-" . $max_strlen . "s %8d %10d   %8u   %8i %8u",
+		printf("%-" . $max_strlen . "s %8d %10d   %8u %8u  %8i %8u",
 			$process_pid,
 			$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
 			$stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
 			$stats{$process_pid}->{HIGH_NR_SCANNED},
+			$stats{$process_pid}->{HIGH_NR_RECLAIMED},
 			$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
 			$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
 
@@ -579,6 +595,7 @@ sub dump_stats {
 	print "\nSummary\n";
 	print "Direct reclaims:     			$total_direct_reclaim\n";
 	print "Direct reclaim pages scanned:		$total_direct_nr_scanned\n";
+	print "Direct reclaim pages reclaimed:		$total_direct_nr_reclaimed\n";
 	print "Direct reclaim write file sync I/O:	$total_direct_writepage_file_sync\n";
 	print "Direct reclaim write anon sync I/O:	$total_direct_writepage_anon_sync\n";
 	print "Direct reclaim write file async I/O:	$total_direct_writepage_file_async\n";
@@ -588,6 +605,7 @@ sub dump_stats {
 	print "\n";
 	print "Kswapd wakeups:				$total_kswapd_wake\n";
 	print "Kswapd pages scanned:			$total_kswapd_nr_scanned\n";
+	print "Kswapd pages reclaimed:			$total_kswapd_nr_reclaimed\n";
 	print "Kswapd reclaim write file sync I/O:	$total_kswapd_writepage_file_sync\n";
 	print "Kswapd reclaim write anon sync I/O:	$total_kswapd_writepage_anon_sync\n";
 	print "Kswapd reclaim write file async I/O:	$total_kswapd_writepage_file_async\n";
@@ -612,6 +630,7 @@ sub aggregate_perprocesspid() {
 		$perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
 		$perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
 		$perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+		$perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
 		$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
 		$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
 		$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 370aa5a87322..ecf952192a93 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -10,6 +10,7 @@
 
 #define RECLAIM_WB_ANON		0x0001u
 #define RECLAIM_WB_FILE		0x0002u
+#define RECLAIM_WB_MIXED	0x0010u
 #define RECLAIM_WB_SYNC		0x0004u
 #define RECLAIM_WB_ASYNC	0x0008u
 
@@ -17,6 +18,7 @@
 	(flags) ? __print_flags(flags, "|",			\
 		{RECLAIM_WB_ANON,	"RECLAIM_WB_ANON"},	\
 		{RECLAIM_WB_FILE,	"RECLAIM_WB_FILE"},	\
+		{RECLAIM_WB_MIXED,	"RECLAIM_WB_MIXED"},	\
 		{RECLAIM_WB_SYNC,	"RECLAIM_WB_SYNC"},	\
 		{RECLAIM_WB_ASYNC,	"RECLAIM_WB_ASYNC"}	\
 		) : "RECLAIM_WB_NONE"
@@ -26,6 +28,12 @@
 	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
 	)
 
+#define trace_shrink_flags(file, sync) ( \
+	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_MIXED : \
+			(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) |  \
+	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
+	)
+
 TRACE_EVENT(mm_vmscan_kswapd_sleep,
 
 	TP_PROTO(int nid),
@@ -269,6 +277,40 @@ TRACE_EVENT(mm_vmscan_writepage,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
+
+	TP_PROTO(int nid, int zid,
+			unsigned long nr_scanned, unsigned long nr_reclaimed,
+			int priority, int reclaim_flags),
+
+	TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(int, zid)
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_reclaimed)
+		__field(int, priority)
+		__field(int, reclaim_flags)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->zid = zid;
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_reclaimed = nr_reclaimed;
+		__entry->priority = priority;
+		__entry->reclaim_flags = reclaim_flags;
+	),
+
+	TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
+		__entry->nid, __entry->zid,
+		__entry->nr_scanned, __entry->nr_reclaimed,
+		__entry->priority,
+		show_reclaim_flags(__entry->reclaim_flags))
+);
+
+
 #endif /* _TRACE_VMSCAN_H */
 
 /* This part must be outside protection */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 28b0521e4bfd..4a6dccb57586 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1358,6 +1358,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 	__count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
 
 	putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
+
+	trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
+		zone_idx(zone),
+		nr_scanned, nr_reclaimed,
+		priority,
+		trace_shrink_flags(file, sc->lumpy_reclaim_mode));
 	return nr_reclaimed;
 }
 
-- 
cgit v1.2.3


From 52bb9198668968506f9d12bf35d7f5d3f094921e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 26 Oct 2010 14:21:41 -0700
Subject: writeback: account for time spent congestion_waited

There is strong evidence to indicate a lot of time is being spent in
congestion_wait(), some of it unnecessarily.  This patch adds a tracepoint
for congestion_wait to record when congestion_wait() was called, how long
the timeout was for and how long it actually slept.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/writeback.h | 28 ++++++++++++++++++++++++++++
 mm/backing-dev.c                 |  5 +++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 0bb01ab2e984..d2b2654606ec 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -151,6 +151,34 @@ DEFINE_WBC_EVENT(wbc_balance_dirty_written);
 DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
 DEFINE_WBC_EVENT(wbc_writepage);
 
+DECLARE_EVENT_CLASS(writeback_congest_waited_template,
+
+	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
+
+	TP_ARGS(usec_timeout, usec_delayed),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	usec_timeout	)
+		__field(	unsigned int,	usec_delayed	)
+	),
+
+	TP_fast_assign(
+		__entry->usec_timeout	= usec_timeout;
+		__entry->usec_delayed	= usec_delayed;
+	),
+
+	TP_printk("usec_timeout=%u usec_delayed=%u",
+			__entry->usec_timeout,
+			__entry->usec_delayed)
+);
+
+DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait,
+
+	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
+
+	TP_ARGS(usec_timeout, usec_delayed)
+);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 65d420499a61..55627306abe0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -764,12 +764,17 @@ EXPORT_SYMBOL(set_bdi_congested);
 long congestion_wait(int sync, long timeout)
 {
 	long ret;
+	unsigned long start = jiffies;
 	DEFINE_WAIT(wait);
 	wait_queue_head_t *wqh = &congestion_wqh[sync];
 
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = io_schedule_timeout(timeout);
 	finish_wait(wqh, &wait);
+
+	trace_writeback_congestion_wait(jiffies_to_usecs(timeout),
+					jiffies_to_usecs(jiffies - start));
+
 	return ret;
 }
 EXPORT_SYMBOL(congestion_wait);
-- 
cgit v1.2.3


From 7d3579e8e61937cbba268ea9b218d006b6d64221 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 26 Oct 2010 14:21:42 -0700
Subject: vmscan: narrow the scenarios in whcih lumpy reclaim uses synchrounous
 reclaim

shrink_page_list() can decide to give up reclaiming a page under a
number of conditions such as

  1. trylock_page() failure
  2. page is unevictable
  3. zone reclaim and page is mapped
  4. PageWriteback() is true
  5. page is swapbacked and swap is full
  6. add_to_swap() failure
  7. page is dirty and gfpmask don't have GFP_IO, GFP_FS
  8. page is pinned
  9. IO queue is congested
 10. pageout() start IO, but not finished

With lumpy reclaim, failures result in entering synchronous lumpy reclaim
but this can be unnecessary.  In cases (2), (3), (5), (6), (7) and (8),
there is no point retrying.  This patch causes lumpy reclaim to abort when
it is known it will fail.

Case (9) is more interesting. current behavior is,
  1. start shrink_page_list(async)
  2. found queue_congested()
  3. skip pageout write
  4. still start shrink_page_list(sync)
  5. wait on a lot of pages
  6. again, found queue_congested()
  7. give up pageout write again

So, it's useless time wasting.  However, just skipping page reclaim is
also notgood as x86 allocating a huge page needs 512 pages for example.
It can have more dirty pages than queue congestion threshold (~=128).

After this patch, pageout() behaves as follows;

 - If order > PAGE_ALLOC_COSTLY_ORDER
	Ignore queue congestion always.
 - If order <= PAGE_ALLOC_COSTLY_ORDER
	skip write page and disable lumpy reclaim.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/vmscan.h |   6 +--
 mm/vmscan.c                   | 120 ++++++++++++++++++++++++++----------------
 2 files changed, 78 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ecf952192a93..c255fcc587bf 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -25,13 +25,13 @@
 
 #define trace_reclaim_flags(page, sync) ( \
 	(page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
-	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
+	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
 	)
 
 #define trace_shrink_flags(file, sync) ( \
-	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_MIXED : \
+	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
 			(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) |  \
-	(sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
+	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
 	)
 
 TRACE_EVENT(mm_vmscan_kswapd_sleep,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 399d54e8a82c..d9fc2dce93af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,12 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/vmscan.h>
 
+enum lumpy_mode {
+	LUMPY_MODE_NONE,
+	LUMPY_MODE_ASYNC,
+	LUMPY_MODE_SYNC,
+};
+
 struct scan_control {
 	/* Incremented by the number of inactive pages that were scanned */
 	unsigned long nr_scanned;
@@ -82,7 +88,7 @@ struct scan_control {
 	 * Intend to reclaim enough continuous memory rather than reclaim
 	 * enough amount of memory. i.e, mode for high order allocation.
 	 */
-	bool lumpy_reclaim_mode;
+	enum lumpy_mode lumpy_reclaim_mode;
 
 	/* Which cgroup do we reclaim from */
 	struct mem_cgroup *mem_cgroup;
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 	return ret;
 }
 
+static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
+				   bool sync)
+{
+	enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
+
+	/*
+	 * Some reclaim have alredy been failed. No worth to try synchronous
+	 * lumpy reclaim.
+	 */
+	if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
+		return;
+
+	/*
+	 * If we need a large contiguous chunk of memory, or have
+	 * trouble getting a small set of contiguous pages, we
+	 * will reclaim both active and inactive pages.
+	 */
+	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+		sc->lumpy_reclaim_mode = mode;
+	else if (sc->order && priority < DEF_PRIORITY - 2)
+		sc->lumpy_reclaim_mode = mode;
+	else
+		sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
+}
+
+static void disable_lumpy_reclaim_mode(struct scan_control *sc)
+{
+	sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
+}
+
 static inline int is_page_cache_freeable(struct page *page)
 {
 	/*
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page)
 	return page_count(page) - page_has_private(page) == 2;
 }
 
-static int may_write_to_queue(struct backing_dev_info *bdi)
+static int may_write_to_queue(struct backing_dev_info *bdi,
+			      struct scan_control *sc)
 {
 	if (current->flags & PF_SWAPWRITE)
 		return 1;
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
 		return 1;
 	if (bdi == current->backing_dev_info)
 		return 1;
+
+	/* lumpy reclaim for hugepage often need a lot of write */
+	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+		return 1;
 	return 0;
 }
 
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping,
 	unlock_page(page);
 }
 
-/* Request for sync pageout. */
-enum pageout_io {
-	PAGEOUT_IO_ASYNC,
-	PAGEOUT_IO_SYNC,
-};
-
 /* possible outcome of pageout() */
 typedef enum {
 	/* failed to write page out, page is locked */
@@ -330,7 +365,7 @@ typedef enum {
  * Calls ->writepage().
  */
 static pageout_t pageout(struct page *page, struct address_space *mapping,
-						enum pageout_io sync_writeback)
+			 struct scan_control *sc)
 {
 	/*
 	 * If the page is dirty, only perform writeback if that write
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 	}
 	if (mapping->a_ops->writepage == NULL)
 		return PAGE_ACTIVATE;
-	if (!may_write_to_queue(mapping->backing_dev_info))
+	if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
+		disable_lumpy_reclaim_mode(sc);
 		return PAGE_KEEP;
+	}
 
 	if (clear_page_dirty_for_io(page)) {
 		int res;
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 		 * direct reclaiming a large contiguous area and the
 		 * first attempt to free a range of pages fails.
 		 */
-		if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
+		if (PageWriteback(page) &&
+		    sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
 			wait_on_page_writeback(page);
 
 		if (!PageWriteback(page)) {
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 			ClearPageReclaim(page);
 		}
 		trace_mm_vmscan_writepage(page,
-			trace_reclaim_flags(page, sync_writeback));
+			trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
 		inc_zone_page_state(page, NR_VMSCAN_WRITE);
 		return PAGE_SUCCESS;
 	}
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page,
 	referenced_page = TestClearPageReferenced(page);
 
 	/* Lumpy reclaim - ignore references */
-	if (sc->lumpy_reclaim_mode)
+	if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
 		return PAGEREF_RECLAIM;
 
 	/*
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
  * shrink_page_list() returns the number of reclaimed pages
  */
 static unsigned long shrink_page_list(struct list_head *page_list,
-					struct scan_control *sc,
-					enum pageout_io sync_writeback)
+				      struct scan_control *sc)
 {
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(free_pages);
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			 * for any page for which writeback has already
 			 * started.
 			 */
-			if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
+			if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
+			    may_enter_fs)
 				wait_on_page_writeback(page);
-			else
-				goto keep_locked;
+			else {
+				unlock_page(page);
+				goto keep_lumpy;
+			}
 		}
 
 		references = page_check_references(page, sc);
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto keep_locked;
 
 			/* Page is dirty, try to write it out here */
-			switch (pageout(page, mapping, sync_writeback)) {
+			switch (pageout(page, mapping, sc)) {
 			case PAGE_KEEP:
 				goto keep_locked;
 			case PAGE_ACTIVATE:
 				goto activate_locked;
 			case PAGE_SUCCESS:
-				if (PageWriteback(page) || PageDirty(page))
+				if (PageWriteback(page))
+					goto keep_lumpy;
+				if (PageDirty(page))
 					goto keep;
+
 				/*
 				 * A synchronous write - probably a ramdisk.  Go
 				 * ahead and try to reclaim the page.
@@ -840,6 +883,7 @@ cull_mlocked:
 			try_to_free_swap(page);
 		unlock_page(page);
 		putback_lru_page(page);
+		disable_lumpy_reclaim_mode(sc);
 		continue;
 
 activate_locked:
@@ -852,6 +896,8 @@ activate_locked:
 keep_locked:
 		unlock_page(page);
 keep:
+		disable_lumpy_reclaim_mode(sc);
+keep_lumpy:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
 	}
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
 		return false;
 
 	/* Only stall on lumpy reclaim */
-	if (!sc->lumpy_reclaim_mode)
+	if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
 		return false;
 
 	/* If we have relaimed everything on the isolated list, no stall */
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 			return SWAP_CLUSTER_MAX;
 	}
 
-
+	set_lumpy_reclaim_mode(priority, sc, false);
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 
 	if (scanning_global_lru(sc)) {
 		nr_taken = isolate_pages_global(nr_to_scan,
 			&page_list, &nr_scanned, sc->order,
-			sc->lumpy_reclaim_mode ?
-				ISOLATE_BOTH : ISOLATE_INACTIVE,
+			sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
+					ISOLATE_INACTIVE : ISOLATE_BOTH,
 			zone, 0, file);
 		zone->pages_scanned += nr_scanned;
 		if (current_is_kswapd())
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 	} else {
 		nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
 			&page_list, &nr_scanned, sc->order,
-			sc->lumpy_reclaim_mode ?
-				ISOLATE_BOTH : ISOLATE_INACTIVE,
+			sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
+					ISOLATE_INACTIVE : ISOLATE_BOTH,
 			zone, sc->mem_cgroup,
 			0, file);
 		/*
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 
 	spin_unlock_irq(&zone->lru_lock);
 
-	nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
+	nr_reclaimed = shrink_page_list(&page_list, sc);
 
 	/* Check if we should syncronously wait for writeback */
 	if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 		nr_active = clear_active_flags(&page_list, NULL);
 		count_vm_events(PGDEACTIVATE, nr_active);
 
-		nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC);
+		set_lumpy_reclaim_mode(priority, sc, true);
+		nr_reclaimed += shrink_page_list(&page_list, sc);
 	}
 
 	local_irq_disable();
@@ -1739,21 +1786,6 @@ out:
 	}
 }
 
-static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
-{
-	/*
-	 * If we need a large contiguous chunk of memory, or have
-	 * trouble getting a small set of contiguous pages, we
-	 * will reclaim both active and inactive pages.
-	 */
-	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-		sc->lumpy_reclaim_mode = 1;
-	else if (sc->order && priority < DEF_PRIORITY - 2)
-		sc->lumpy_reclaim_mode = 1;
-	else
-		sc->lumpy_reclaim_mode = 0;
-}
-
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone,
 
 	get_scan_count(zone, sc, nr, priority);
 
-	set_lumpy_reclaim_mode(priority, sc);
-
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
 		for_each_evictable_lru(l) {
-- 
cgit v1.2.3


From 0e093d99763eb4cea09f8ca4f1d01f34e121d10b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 26 Oct 2010 14:21:45 -0700
Subject: writeback: do not sleep on the congestion queue if there are no
 congested BDIs or if significant congestion is not being encountered in the
 current zone

If congestion_wait() is called with no BDI congested, the caller will
sleep for the full timeout and this may be an unnecessary sleep.  This
patch adds a wait_iff_congested() that checks congestion and only sleeps
if a BDI is congested else, it calls cond_resched() to ensure the caller
is not hogging the CPU longer than its quota but otherwise will not sleep.

This is aimed at reducing some of the major desktop stalls reported during
IO.  For example, while kswapd is operating, it calls congestion_wait()
but it could just have been reclaiming clean page cache pages with no
congestion.  Without this patch, it would sleep for a full timeout but
after this patch, it'll just call schedule() if it has been on the CPU too
long.  Similar logic applies to direct reclaimers that are not making
enough progress.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h      |  2 +-
 include/linux/mmzone.h           |  8 ++++++
 include/trace/events/writeback.h |  7 +++++
 mm/backing-dev.c                 | 61 ++++++++++++++++++++++++++++++++++++++--
 mm/page_alloc.c                  |  4 +--
 mm/vmscan.c                      | 42 ++++++++++++++++++++++-----
 6 files changed, 112 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 35b00746c712..f1b402a50679 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -285,7 +285,7 @@ enum {
 void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
 void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 long congestion_wait(int sync, long timeout);
-
+long wait_iff_congested(struct zone *zone, int sync, long timeout);
 
 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c3c17fb675ee..39c24ebe9cfd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -423,6 +423,9 @@ struct zone {
 typedef enum {
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
+	ZONE_CONGESTED,			/* zone has many dirty pages backed by
+					 * a congested BDI
+					 */
 } zone_flags_t;
 
 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -440,6 +443,11 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
 	clear_bit(flag, &zone->flags);
 }
 
+static inline int zone_is_reclaim_congested(const struct zone *zone)
+{
+	return test_bit(ZONE_CONGESTED, &zone->flags);
+}
+
 static inline int zone_is_reclaim_locked(const struct zone *zone)
 {
 	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index d2b2654606ec..89a2b2db4375 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -179,6 +179,13 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait,
 	TP_ARGS(usec_timeout, usec_delayed)
 );
 
+DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested,
+
+	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
+
+	TP_ARGS(usec_timeout, usec_delayed)
+);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 55627306abe0..5ad3c106606b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -729,6 +729,7 @@ static wait_queue_head_t congestion_wqh[2] = {
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
 	};
+static atomic_t nr_bdi_congested[2];
 
 void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 {
@@ -736,7 +737,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 	wait_queue_head_t *wqh = &congestion_wqh[sync];
 
 	bit = sync ? BDI_sync_congested : BDI_async_congested;
-	clear_bit(bit, &bdi->state);
+	if (test_and_clear_bit(bit, &bdi->state))
+		atomic_dec(&nr_bdi_congested[sync]);
 	smp_mb__after_clear_bit();
 	if (waitqueue_active(wqh))
 		wake_up(wqh);
@@ -748,7 +750,8 @@ void set_bdi_congested(struct backing_dev_info *bdi, int sync)
 	enum bdi_state bit;
 
 	bit = sync ? BDI_sync_congested : BDI_async_congested;
-	set_bit(bit, &bdi->state);
+	if (!test_and_set_bit(bit, &bdi->state))
+		atomic_inc(&nr_bdi_congested[sync]);
 }
 EXPORT_SYMBOL(set_bdi_congested);
 
@@ -779,3 +782,57 @@ long congestion_wait(int sync, long timeout)
 }
 EXPORT_SYMBOL(congestion_wait);
 
+/**
+ * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
+ * @zone: A zone to check if it is heavily congested
+ * @sync: SYNC or ASYNC IO
+ * @timeout: timeout in jiffies
+ *
+ * In the event of a congested backing_dev (any backing_dev) and the given
+ * @zone has experienced recent congestion, this waits for up to @timeout
+ * jiffies for either a BDI to exit congestion of the given @sync queue
+ * or a write to complete.
+ *
+ * In the absense of zone congestion, cond_resched() is called to yield
+ * the processor if necessary but otherwise does not sleep.
+ *
+ * The return value is 0 if the sleep is for the full timeout. Otherwise,
+ * it is the number of jiffies that were still remaining when the function
+ * returned. return_value == timeout implies the function did not sleep.
+ */
+long wait_iff_congested(struct zone *zone, int sync, long timeout)
+{
+	long ret;
+	unsigned long start = jiffies;
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = &congestion_wqh[sync];
+
+	/*
+	 * If there is no congestion, or heavy congestion is not being
+	 * encountered in the current zone, yield if necessary instead
+	 * of sleeping on the congestion queue
+	 */
+	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
+			!zone_is_reclaim_congested(zone)) {
+		cond_resched();
+
+		/* In case we scheduled, work out time remaining */
+		ret = timeout - (jiffies - start);
+		if (ret < 0)
+			ret = 0;
+
+		goto out;
+	}
+
+	/* Sleep until uncongested or a write happens */
+	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+	ret = io_schedule_timeout(timeout);
+	finish_wait(wqh, &wait);
+
+out:
+	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
+					jiffies_to_usecs(jiffies - start));
+
+	return ret;
+}
+EXPORT_SYMBOL(wait_iff_congested);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6a683f819439..b13bc5e5bd7d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1907,7 +1907,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 			preferred_zone, migratetype);
 
 		if (!page && gfp_mask & __GFP_NOFAIL)
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
+			wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
 	} while (!page && (gfp_mask & __GFP_NOFAIL));
 
 	return page;
@@ -2095,7 +2095,7 @@ rebalance:
 	pages_reclaimed += did_some_progress;
 	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
 		/* Wait for some write requests to complete then retry */
-		congestion_wait(BLK_RW_ASYNC, HZ/50);
+		wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
 		goto rebalance;
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 130ad0239f52..30fd658bb289 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -401,10 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 	}
 	if (mapping->a_ops->writepage == NULL)
 		return PAGE_ACTIVATE;
-	if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
-		disable_lumpy_reclaim_mode(sc);
+	if (!may_write_to_queue(mapping->backing_dev_info, sc))
 		return PAGE_KEEP;
-	}
 
 	if (clear_page_dirty_for_io(page)) {
 		int res;
@@ -681,11 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
  * shrink_page_list() returns the number of reclaimed pages
  */
 static unsigned long shrink_page_list(struct list_head *page_list,
+				      struct zone *zone,
 				      struct scan_control *sc)
 {
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(free_pages);
 	int pgactivate = 0;
+	unsigned long nr_dirty = 0;
+	unsigned long nr_congested = 0;
 	unsigned long nr_reclaimed = 0;
 
 	cond_resched();
@@ -705,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			goto keep;
 
 		VM_BUG_ON(PageActive(page));
+		VM_BUG_ON(page_zone(page) != zone);
 
 		sc->nr_scanned++;
 
@@ -782,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		}
 
 		if (PageDirty(page)) {
+			nr_dirty++;
+
 			if (references == PAGEREF_RECLAIM_CLEAN)
 				goto keep_locked;
 			if (!may_enter_fs)
@@ -792,6 +796,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			/* Page is dirty, try to write it out here */
 			switch (pageout(page, mapping, sc)) {
 			case PAGE_KEEP:
+				nr_congested++;
 				goto keep_locked;
 			case PAGE_ACTIVATE:
 				goto activate_locked;
@@ -902,6 +907,15 @@ keep_lumpy:
 		VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
 	}
 
+	/*
+	 * Tag a zone as congested if all the dirty pages encountered were
+	 * backed by a congested BDI. In this case, reclaimers should just
+	 * back off and wait for congestion to clear because further reclaim
+	 * will encounter the same problem
+	 */
+	if (nr_dirty == nr_congested)
+		zone_set_flag(zone, ZONE_CONGESTED);
+
 	free_page_list(&free_pages);
 
 	list_splice(&ret_pages, page_list);
@@ -1386,12 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 
 	spin_unlock_irq(&zone->lru_lock);
 
-	nr_reclaimed = shrink_page_list(&page_list, sc);
+	nr_reclaimed = shrink_page_list(&page_list, zone, sc);
 
 	/* Check if we should syncronously wait for writeback */
 	if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
 		set_lumpy_reclaim_mode(priority, sc, true);
-		nr_reclaimed += shrink_page_list(&page_list, sc);
+		nr_reclaimed += shrink_page_list(&page_list, zone, sc);
 	}
 
 	local_irq_disable();
@@ -1982,8 +1996,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
 		/* Take a nap, wait for some writeback to complete */
 		if (!sc->hibernation_mode && sc->nr_scanned &&
-		    priority < DEF_PRIORITY - 2)
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
+		    priority < DEF_PRIORITY - 2) {
+			struct zone *preferred_zone;
+
+			first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
+							NULL, &preferred_zone);
+			wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
+		}
 	}
 
 out:
@@ -2282,6 +2301,15 @@ loop_again:
 				if (!zone_watermark_ok(zone, order,
 					    min_wmark_pages(zone), end_zone, 0))
 					has_under_min_watermark_zone = 1;
+			} else {
+				/*
+				 * If a zone reaches its high watermark,
+				 * consider it to be no longer congested. It's
+				 * possible there are dirty pages backed by
+				 * congested BDIs but as pressure is relieved,
+				 * spectulatively avoid congestion waits
+				 */
+				zone_clear_flag(zone, ZONE_CONGESTED);
 			}
 
 		}
-- 
cgit v1.2.3


From 61ecdb801ef2cd28e32442383106d7837d76deac Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Oct 2010 14:21:47 -0700
Subject: mm: strictly nested kmap_atomic()

Ensure kmap_atomic() usage is strictly nested

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 crypto/async_tx/async_memcpy.c | 2 +-
 crypto/blkcipher.c             | 2 +-
 drivers/block/loop.c           | 4 ++--
 include/linux/highmem.h        | 4 ++--
 kernel/power/snapshot.c        | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 0ec1fb69d4ea..518c22bd9562 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -83,8 +83,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 		memcpy(dest_buf, src_buf, len);
 
-		kunmap_atomic(dest_buf, KM_USER0);
 		kunmap_atomic(src_buf, KM_USER1);
+		kunmap_atomic(dest_buf, KM_USER0);
 
 		async_tx_sync_epilog(submit);
 	}
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 90d26c91f4e9..7a7219266e3c 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -89,9 +89,9 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
 		memcpy(walk->dst.virt.addr, walk->page, n);
 		blkcipher_unmap_dst(walk);
 	} else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) {
-		blkcipher_unmap_src(walk);
 		if (walk->flags & BLKCIPHER_WALK_DIFF)
 			blkcipher_unmap_dst(walk);
+		blkcipher_unmap_src(walk);
 	}
 
 	scatterwalk_advance(&walk->in, n);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6c48b3545f84..450c958b514f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -101,8 +101,8 @@ static int transfer_none(struct loop_device *lo, int cmd,
 	else
 		memcpy(raw_buf, loop_buf, size);
 
-	kunmap_atomic(raw_buf, KM_USER0);
 	kunmap_atomic(loop_buf, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
 	cond_resched();
 	return 0;
 }
@@ -130,8 +130,8 @@ static int transfer_xor(struct loop_device *lo, int cmd,
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
 
-	kunmap_atomic(raw_buf, KM_USER0);
 	kunmap_atomic(loop_buf, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
 	cond_resched();
 	return 0;
 }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index e3060ef85b6d..283cd47bb34c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -201,8 +201,8 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
 	vfrom = kmap_atomic(from, KM_USER0);
 	vto = kmap_atomic(to, KM_USER1);
 	copy_user_page(vto, vfrom, vaddr, to);
-	kunmap_atomic(vfrom, KM_USER0);
 	kunmap_atomic(vto, KM_USER1);
+	kunmap_atomic(vfrom, KM_USER0);
 }
 
 #endif
@@ -214,8 +214,8 @@ static inline void copy_highpage(struct page *to, struct page *from)
 	vfrom = kmap_atomic(from, KM_USER0);
 	vto = kmap_atomic(to, KM_USER1);
 	copy_page(vto, vfrom);
-	kunmap_atomic(vfrom, KM_USER0);
 	kunmap_atomic(vto, KM_USER1);
+	kunmap_atomic(vfrom, KM_USER0);
 }
 
 #endif /* _LINUX_HIGHMEM_H */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index ac7eb109f196..9e3581f4619a 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -984,8 +984,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 		src = kmap_atomic(s_page, KM_USER0);
 		dst = kmap_atomic(d_page, KM_USER1);
 		do_copy_page(dst, src);
-		kunmap_atomic(src, KM_USER0);
 		kunmap_atomic(dst, KM_USER1);
+		kunmap_atomic(src, KM_USER0);
 	} else {
 		if (PageHighMem(d_page)) {
 			/* Page pointed to by src may contain some kernel
@@ -2273,8 +2273,8 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
 	memcpy(buf, kaddr1, PAGE_SIZE);
 	memcpy(kaddr1, kaddr2, PAGE_SIZE);
 	memcpy(kaddr2, buf, PAGE_SIZE);
-	kunmap_atomic(kaddr1, KM_USER0);
 	kunmap_atomic(kaddr2, KM_USER1);
+	kunmap_atomic(kaddr1, KM_USER0);
 }
 
 /**
-- 
cgit v1.2.3


From 3e4d3af501cccdc8a8cca41bdbe57d54ad7e7e73 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Oct 2010 14:21:51 -0700
Subject: mm: stack based kmap_atomic()

Keep the current interface but ignore the KM_type and use a stack based
approach.

The advantage is that we get rid of crappy code like:

	#define __KM_PTE			\
		(in_nmi() ? KM_NMI_PTE : 	\
		 in_irq() ? KM_IRQ_PTE :	\
		 KM_PTE0)

and in general can stop worrying about what context we're in and what kmap
slots might be appropriate for that.

The downside is that FRV kmap_atomic() gets more expensive.

For now we use a CPP trick suggested by Andrew:

  #define kmap_atomic(page, args...) __kmap_atomic(page)

to avoid having to touch all kmap_atomic() users in a single patch.

[ not compiled on:
  - mn10300: the arch doesn't actually build with highmem to begin with ]

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix up drivers/gpu/drm/i915/intel_overlay.c]
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/highmem.h         |  6 +--
 arch/arm/mm/highmem.c                  | 23 +++++----
 arch/frv/include/asm/highmem.h         | 25 ++--------
 arch/frv/mb93090-mb00/pci-dma.c        |  4 +-
 arch/frv/mm/cache-page.c               |  8 ++--
 arch/frv/mm/highmem.c                  | 50 ++++++++++++++++++++
 arch/mips/include/asm/highmem.h        | 18 +++----
 arch/mips/mm/highmem.c                 | 50 +++++++++++---------
 arch/mn10300/include/asm/highmem.h     | 42 ++++++++++-------
 arch/powerpc/include/asm/highmem.h     |  9 ++--
 arch/powerpc/mm/highmem.c              | 35 ++++++++------
 arch/sparc/include/asm/highmem.h       |  4 +-
 arch/sparc/mm/highmem.c                | 48 ++++++++++---------
 arch/tile/include/asm/highmem.h        | 10 ++--
 arch/tile/mm/highmem.c                 | 85 +++++++++-------------------------
 arch/x86/include/asm/highmem.h         | 11 +++--
 arch/x86/include/asm/iomap.h           |  4 +-
 arch/x86/kernel/crash_dump_32.c        |  2 +-
 arch/x86/mm/highmem_32.c               | 75 ++++++++++++++++--------------
 arch/x86/mm/iomap_32.c                 | 42 ++++++++++-------
 drivers/gpu/drm/i915/i915_gem.c        | 25 +++++-----
 drivers/gpu/drm/i915/i915_irq.c        |  5 +-
 drivers/gpu/drm/i915/intel_overlay.c   |  5 +-
 drivers/gpu/drm/nouveau/nouveau_bios.c |  8 ++--
 drivers/gpu/drm/ttm/ttm_bo_util.c      |  8 ++--
 include/linux/highmem.h                | 61 +++++++++++++++---------
 include/linux/io-mapping.h             | 14 +++---
 mm/highmem.c                           | 62 ++-----------------------
 28 files changed, 367 insertions(+), 372 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 5aff58126602..1fc684e70ab6 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -35,9 +35,9 @@ extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte);
 #ifdef CONFIG_HIGHMEM
 extern void *kmap(struct page *page);
 extern void kunmap(struct page *page);
-extern void *kmap_atomic(struct page *page, enum km_type type);
-extern void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
+extern void *__kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
 extern struct page *kmap_atomic_to_page(const void *ptr);
 #endif
 
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 1fbdb55bfd1b..c00f119babbf 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -36,18 +36,17 @@ void kunmap(struct page *page)
 }
 EXPORT_SYMBOL(kunmap);
 
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
 {
 	unsigned int idx;
 	unsigned long vaddr;
 	void *kmap;
+	int type;
 
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
-
 #ifdef CONFIG_DEBUG_HIGHMEM
 	/*
 	 * There is no cache coherency issue when non VIVT, so force the
@@ -61,6 +60,8 @@ void *kmap_atomic(struct page *page, enum km_type type)
 	if (kmap)
 		return kmap;
 
+	type = kmap_atomic_idx_push();
+
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -80,14 +81,17 @@ void *kmap_atomic(struct page *page, enum km_type type)
 
 	return (void *)vaddr;
 }
-EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	unsigned int idx = type + KM_TYPE_NR * smp_processor_id();
+	int idx, type;
 
 	if (kvaddr >= (void *)FIXADDR_START) {
+		type = kmap_atomic_idx_pop();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
 		if (cache_is_vivt())
 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -103,15 +107,16 @@ void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
 	}
 	pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
 
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-	unsigned int idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	pagefault_disable();
 
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
diff --git a/arch/frv/include/asm/highmem.h b/arch/frv/include/asm/highmem.h
index cb4c317eaecc..a8d6565d415d 100644
--- a/arch/frv/include/asm/highmem.h
+++ b/arch/frv/include/asm/highmem.h
@@ -112,12 +112,11 @@ extern struct page *kmap_atomic_to_page(void *ptr);
 	(void *) damlr;										  \
 })
 
-static inline void *kmap_atomic(struct page *page, enum km_type type)
+static inline void *kmap_atomic_primary(struct page *page, enum km_type type)
 {
 	unsigned long paddr;
 
 	pagefault_disable();
-	debug_kmap_atomic(type);
 	paddr = page_to_phys(page);
 
 	switch (type) {
@@ -125,14 +124,6 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
         case 1:		return __kmap_atomic_primary(1, paddr, 3);
         case 2:		return __kmap_atomic_primary(2, paddr, 4);
         case 3:		return __kmap_atomic_primary(3, paddr, 5);
-        case 4:		return __kmap_atomic_primary(4, paddr, 6);
-        case 5:		return __kmap_atomic_primary(5, paddr, 7);
-        case 6:		return __kmap_atomic_primary(6, paddr, 8);
-        case 7:		return __kmap_atomic_primary(7, paddr, 9);
-        case 8:		return __kmap_atomic_primary(8, paddr, 10);
-
-	case 9 ... 9 + NR_TLB_LINES - 1:
-		return __kmap_atomic_secondary(type - 9, paddr);
 
 	default:
 		BUG();
@@ -152,22 +143,13 @@ do {									\
 	asm volatile("tlbpr %0,gr0,#4,#1" : : "r"(vaddr) : "memory");	\
 } while(0)
 
-static inline void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+static inline void kunmap_atomic_primary(void *kvaddr, enum km_type type)
 {
 	switch (type) {
         case 0:		__kunmap_atomic_primary(0, 2);	break;
         case 1:		__kunmap_atomic_primary(1, 3);	break;
         case 2:		__kunmap_atomic_primary(2, 4);	break;
         case 3:		__kunmap_atomic_primary(3, 5);	break;
-        case 4:		__kunmap_atomic_primary(4, 6);	break;
-        case 5:		__kunmap_atomic_primary(5, 7);	break;
-        case 6:		__kunmap_atomic_primary(6, 8);	break;
-        case 7:		__kunmap_atomic_primary(7, 9);	break;
-        case 8:		__kunmap_atomic_primary(8, 10);	break;
-
-	case 9 ... 9 + NR_TLB_LINES - 1:
-		__kunmap_atomic_secondary(type - 9, kvaddr);
-		break;
 
 	default:
 		BUG();
@@ -175,6 +157,9 @@ static inline void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
 	pagefault_enable();
 }
 
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 85d110b71cf7..41098a3803a2 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -61,14 +61,14 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	dampr2 = __get_DAMPR(2);
 
 	for (i = 0; i < nents; i++) {
-		vaddr = kmap_atomic(sg_page(&sg[i]), __KM_CACHE);
+		vaddr = kmap_atomic_primary(sg_page(&sg[i]), __KM_CACHE);
 
 		frv_dcache_writeback((unsigned long) vaddr,
 				     (unsigned long) vaddr + PAGE_SIZE);
 
 	}
 
-	kunmap_atomic(vaddr, __KM_CACHE);
+	kunmap_atomic_primary(vaddr, __KM_CACHE);
 	if (dampr2) {
 		__set_DAMPR(2, dampr2);
 		__set_IAMPR(2, dampr2);
diff --git a/arch/frv/mm/cache-page.c b/arch/frv/mm/cache-page.c
index 0261cbe153b5..b24ade27a0f0 100644
--- a/arch/frv/mm/cache-page.c
+++ b/arch/frv/mm/cache-page.c
@@ -26,11 +26,11 @@ void flush_dcache_page(struct page *page)
 
 	dampr2 = __get_DAMPR(2);
 
-	vaddr = kmap_atomic(page, __KM_CACHE);
+	vaddr = kmap_atomic_primary(page, __KM_CACHE);
 
 	frv_dcache_writeback((unsigned long) vaddr, (unsigned long) vaddr + PAGE_SIZE);
 
-	kunmap_atomic(vaddr, __KM_CACHE);
+	kunmap_atomic_primary(vaddr, __KM_CACHE);
 
 	if (dampr2) {
 		__set_DAMPR(2, dampr2);
@@ -54,12 +54,12 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 
 	dampr2 = __get_DAMPR(2);
 
-	vaddr = kmap_atomic(page, __KM_CACHE);
+	vaddr = kmap_atomic_primary(page, __KM_CACHE);
 
 	start = (start & ~PAGE_MASK) | (unsigned long) vaddr;
 	frv_cache_wback_inv(start, start + len);
 
-	kunmap_atomic(vaddr, __KM_CACHE);
+	kunmap_atomic_primary(vaddr, __KM_CACHE);
 
 	if (dampr2) {
 		__set_DAMPR(2, dampr2);
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index eadd07658075..61088dcc1594 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -36,3 +36,53 @@ struct page *kmap_atomic_to_page(void *ptr)
 {
 	return virt_to_page(ptr);
 }
+
+void *__kmap_atomic(struct page *page)
+{
+	unsigned long paddr;
+	int type;
+
+	pagefault_disable();
+	type = kmap_atomic_idx_push();
+	paddr = page_to_phys(page);
+
+	switch (type) {
+	/*
+	 * The first 4 primary maps are reserved for architecture code
+	 */
+	case 0:		return __kmap_atomic_primary(4, paddr, 6);
+	case 1:		return __kmap_atomic_primary(5, paddr, 7);
+	case 2:		return __kmap_atomic_primary(6, paddr, 8);
+	case 3:		return __kmap_atomic_primary(7, paddr, 9);
+	case 4:		return __kmap_atomic_primary(8, paddr, 10);
+
+	case 5 ... 5 + NR_TLB_LINES - 1:
+		return __kmap_atomic_secondary(type - 5, paddr);
+
+	default:
+		BUG();
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(__kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+	int type = kmap_atomic_idx_pop();
+	switch (type) {
+	case 0:		__kunmap_atomic_primary(4, 6);	break;
+	case 1:		__kunmap_atomic_primary(5, 7);	break;
+	case 2:		__kunmap_atomic_primary(6, 8);	break;
+	case 3:		__kunmap_atomic_primary(7, 9);	break;
+	case 4:		__kunmap_atomic_primary(8, 10);	break;
+
+	case 5 ... 5 + NR_TLB_LINES - 1:
+		__kunmap_atomic_secondary(type - 5, kvaddr);
+		break;
+
+	default:
+		BUG();
+	}
+	pagefault_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index 75753ca73bfd..77e644082a3b 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -45,18 +45,12 @@ extern pte_t *pkmap_page_table;
 extern void * kmap_high(struct page *page);
 extern void kunmap_high(struct page *page);
 
-extern void *__kmap(struct page *page);
-extern void __kunmap(struct page *page);
-extern void *__kmap_atomic(struct page *page, enum km_type type);
-extern void __kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-extern struct page *__kmap_atomic_to_page(void *ptr);
-
-#define kmap			__kmap
-#define kunmap			__kunmap
-#define kmap_atomic		__kmap_atomic
-#define kunmap_atomic_notypecheck		__kunmap_atomic_notypecheck
-#define kmap_atomic_to_page	__kmap_atomic_to_page
+extern void *kmap(struct page *page);
+extern void kunmap(struct page *page);
+extern void *__kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
+extern struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	flush_cache_all()
 
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 6a2b1bf9ef11..1e69b1fb4b85 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -9,7 +9,7 @@ static pte_t *kmap_pte;
 
 unsigned long highstart_pfn, highend_pfn;
 
-void *__kmap(struct page *page)
+void *kmap(struct page *page)
 {
 	void *addr;
 
@@ -21,16 +21,16 @@ void *__kmap(struct page *page)
 
 	return addr;
 }
-EXPORT_SYMBOL(__kmap);
+EXPORT_SYMBOL(kmap);
 
-void __kunmap(struct page *page)
+void kunmap(struct page *page)
 {
 	BUG_ON(in_interrupt());
 	if (!PageHighMem(page))
 		return;
 	kunmap_high(page);
 }
-EXPORT_SYMBOL(__kunmap);
+EXPORT_SYMBOL(kunmap);
 
 /*
  * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -41,17 +41,17 @@ EXPORT_SYMBOL(__kunmap);
  * kmaps are appropriate for short, tight code paths only.
  */
 
-void *__kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -64,43 +64,47 @@ void *__kmap_atomic(struct page *page, enum km_type type)
 }
 EXPORT_SYMBOL(__kmap_atomic);
 
-void __kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
-#ifdef CONFIG_DEBUG_HIGHMEM
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+	int type;
 
 	if (vaddr < FIXADDR_START) { // FIXME
 		pagefault_enable();
 		return;
 	}
 
-	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+	type = kmap_atomic_idx_pop();
+#ifdef CONFIG_DEBUG_HIGHMEM
+	{
+		int idx = type + KM_TYPE_NR * smp_processor_id();
 
-	/*
-	 * force other mappings to Oops if they'll try to access
-	 * this pte without first remap it
-	 */
-	pte_clear(&init_mm, vaddr, kmap_pte-idx);
-	local_flush_tlb_one(vaddr);
-#endif
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
 
+		/*
+		 * force other mappings to Oops if they'll try to access
+		 * this pte without first remap it
+		 */
+		pte_clear(&init_mm, vaddr, kmap_pte-idx);
+		local_flush_tlb_one(vaddr);
+	}
+#endif
 	pagefault_enable();
 }
-EXPORT_SYMBOL(__kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
 
 /*
  * This is the same as kmap_atomic() but can map memory that doesn't
  * have a struct page associated with it.
  */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	pagefault_disable();
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL));
@@ -109,7 +113,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
 	return (void*) vaddr;
 }
 
-struct page *__kmap_atomic_to_page(void *ptr)
+struct page *kmap_atomic_to_page(void *ptr)
 {
 	unsigned long idx, vaddr = (unsigned long)ptr;
 	pte_t *pte;
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
index b0b187a29b88..f577ba2268ca 100644
--- a/arch/mn10300/include/asm/highmem.h
+++ b/arch/mn10300/include/asm/highmem.h
@@ -70,15 +70,16 @@ static inline void kunmap(struct page *page)
  * be used in IRQ contexts, so in some (very limited) cases we need
  * it.
  */
-static inline unsigned long kmap_atomic(struct page *page, enum km_type type)
+static inline unsigned long __kmap_atomic(struct page *page)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
+	pagefault_disable();
 	if (page < highmem_start_page)
 		return page_address(page);
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #if HIGHMEM_DEBUG
@@ -91,26 +92,35 @@ static inline unsigned long kmap_atomic(struct page *page, enum km_type type)
 	return vaddr;
 }
 
-static inline void kunmap_atomic_notypecheck(unsigned long vaddr, enum km_type type)
+static inline void __kunmap_atomic(unsigned long vaddr)
 {
-#if HIGHMEM_DEBUG
-	enum fixed_addresses idx = type + KM_TYPE_NR * smp_processor_id();
+	int type;
 
-	if (vaddr < FIXADDR_START) /* FIXME */
+	if (vaddr < FIXADDR_START) { /* FIXME */
+		pagefault_enable();
 		return;
+	}
 
-	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx))
-		BUG();
+	type = kmap_atomic_idx_pop();
 
-	/*
-	 * force other mappings to Oops if they'll try to access
-	 * this pte without first remap it
-	 */
-	pte_clear(kmap_pte - idx);
-	__flush_tlb_one(vaddr);
+#if HIGHMEM_DEBUG
+	{
+		unsigned int idx;
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+		if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx))
+			BUG();
+
+		/*
+		 * force other mappings to Oops if they'll try to access
+		 * this pte without first remap it
+		 */
+		pte_clear(kmap_pte - idx);
+		__flush_tlb_one(vaddr);
+	}
 #endif
+	pagefault_enable();
 }
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_HIGHMEM_H */
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index d10d64a4be38..dbc264010d0b 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -60,9 +60,8 @@ extern pte_t *pkmap_page_table;
 
 extern void *kmap_high(struct page *page);
 extern void kunmap_high(struct page *page);
-extern void *kmap_atomic_prot(struct page *page, enum km_type type,
-			      pgprot_t prot);
-extern void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
+extern void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+extern void __kunmap_atomic(void *kvaddr);
 
 static inline void *kmap(struct page *page)
 {
@@ -80,9 +79,9 @@ static inline void kunmap(struct page *page)
 	kunmap_high(page);
 }
 
-static inline void *kmap_atomic(struct page *page, enum km_type type)
+static inline void *__kmap_atomic(struct page *page)
 {
-	return kmap_atomic_prot(page, type, kmap_prot);
+	return kmap_atomic_prot(page, kmap_prot);
 }
 
 static inline struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 857d4173f9c6..b0848b462bbc 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -29,17 +29,17 @@
  * be used in IRQ contexts, so in some (very limited) cases we need
  * it.
  */
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-	unsigned int idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -52,26 +52,33 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 }
 EXPORT_SYMBOL(kmap_atomic_prot);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
-#ifdef CONFIG_DEBUG_HIGHMEM
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+	int type;
 
 	if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
 		pagefault_enable();
 		return;
 	}
 
-	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+	type = kmap_atomic_idx_pop();
 
-	/*
-	 * force other mappings to Oops if they'll try to access
-	 * this pte without first remap it
-	 */
-	pte_clear(&init_mm, vaddr, kmap_pte-idx);
-	local_flush_tlb_page(NULL, vaddr);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	{
+		unsigned int idx;
+
+		idx = type + KM_TYPE_NR * smp_processor_id();
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+
+		/*
+		 * force other mappings to Oops if they'll try to access
+		 * this pte without first remap it
+		 */
+		pte_clear(&init_mm, vaddr, kmap_pte-idx);
+		local_flush_tlb_page(NULL, vaddr);
+	}
 #endif
 	pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/sparc/include/asm/highmem.h b/arch/sparc/include/asm/highmem.h
index ec23b0a87b98..3d7afbb7f4bb 100644
--- a/arch/sparc/include/asm/highmem.h
+++ b/arch/sparc/include/asm/highmem.h
@@ -70,8 +70,8 @@ static inline void kunmap(struct page *page)
 	kunmap_high(page);
 }
 
-extern void *kmap_atomic(struct page *page, enum km_type type);
-extern void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
+extern void *__kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
 extern struct page *kmap_atomic_to_page(void *vaddr);
 
 #define flush_cache_kmaps()	flush_cache_all()
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index e139e9cbf5f7..5e50c09b7dce 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -29,17 +29,17 @@
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
 
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
 {
-	unsigned long idx;
 	unsigned long vaddr;
+	long idx, type;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 
@@ -63,44 +63,50 @@ void *kmap_atomic(struct page *page, enum km_type type)
 
 	return (void*) vaddr;
 }
-EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
-#ifdef CONFIG_DEBUG_HIGHMEM
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	unsigned long idx = type + KM_TYPE_NR*smp_processor_id();
+	int type;
 
 	if (vaddr < FIXADDR_START) { // FIXME
 		pagefault_enable();
 		return;
 	}
 
-	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
+	type = kmap_atomic_idx_pop();
 
-/* XXX Fix - Anton */
+#ifdef CONFIG_DEBUG_HIGHMEM
+	{
+		unsigned long idx;
+
+		idx = type + KM_TYPE_NR * smp_processor_id();
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
+
+		/* XXX Fix - Anton */
 #if 0
-	__flush_cache_one(vaddr);
+		__flush_cache_one(vaddr);
 #else
-	flush_cache_all();
+		flush_cache_all();
 #endif
 
-	/*
-	 * force other mappings to Oops if they'll try to access
-	 * this pte without first remap it
-	 */
-	pte_clear(&init_mm, vaddr, kmap_pte-idx);
-/* XXX Fix - Anton */
+		/*
+		 * force other mappings to Oops if they'll try to access
+		 * this pte without first remap it
+		 */
+		pte_clear(&init_mm, vaddr, kmap_pte-idx);
+		/* XXX Fix - Anton */
 #if 0
-	__flush_tlb_one(vaddr);
+		__flush_tlb_one(vaddr);
 #else
-	flush_tlb_all();
+		flush_tlb_all();
 #endif
+	}
 #endif
-
 	pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
 
 /* We may be fed a pagetable here by ptep_to_xxx and others. */
 struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index d155db6fa9bd..e0f7ee186721 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -60,12 +60,12 @@ void *kmap_fix_kpte(struct page *page, int finished);
 /* This macro is used only in map_new_virtual() to map "page". */
 #define kmap_prot page_to_kpgprot(page)
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
 void kmap_atomic_fix_kpte(struct page *page, int finished);
 
 #define flush_cache_kmaps()	do { } while (0)
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 12ab137e7d4f..8ef6595e162c 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -56,50 +56,6 @@ void kunmap(struct page *page)
 }
 EXPORT_SYMBOL(kunmap);
 
-static void debug_kmap_atomic_prot(enum km_type type)
-{
-#ifdef CONFIG_DEBUG_HIGHMEM
-	static unsigned warn_count = 10;
-
-	if (unlikely(warn_count == 0))
-		return;
-
-	if (unlikely(in_interrupt())) {
-		if (in_irq()) {
-			if (type != KM_IRQ0 && type != KM_IRQ1 &&
-			    type != KM_BIO_SRC_IRQ &&
-			    /* type != KM_BIO_DST_IRQ && */
-			    type != KM_BOUNCE_READ) {
-				WARN_ON(1);
-				warn_count--;
-			}
-		} else if (!irqs_disabled()) {	/* softirq */
-			if (type != KM_IRQ0 && type != KM_IRQ1 &&
-			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
-			    type != KM_SKB_SUNRPC_DATA &&
-			    type != KM_SKB_DATA_SOFTIRQ &&
-			    type != KM_BOUNCE_READ) {
-				WARN_ON(1);
-				warn_count--;
-			}
-		}
-	}
-
-	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
-	    type == KM_BIO_SRC_IRQ /* || type == KM_BIO_DST_IRQ */) {
-		if (!irqs_disabled()) {
-			WARN_ON(1);
-			warn_count--;
-		}
-	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
-		if (irq_count() == 0 && !irqs_disabled()) {
-			WARN_ON(1);
-			warn_count--;
-		}
-	}
-#endif
-}
-
 /*
  * Describe a single atomic mapping of a page on a given cpu at a
  * given address, and allow it to be linked into a list.
@@ -240,10 +196,10 @@ void kmap_atomic_fix_kpte(struct page *page, int finished)
  * When holding an atomic kmap is is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 	pte_t *pte;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
@@ -255,8 +211,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic_prot(type);
-
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	pte = kmap_get_pte(vaddr);
@@ -269,25 +224,31 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 }
 EXPORT_SYMBOL(kmap_atomic_prot);
 
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
 {
 	/* PAGE_NONE is a magic value that tells us to check immutability. */
 	return kmap_atomic_prot(page, type, PAGE_NONE);
 }
-EXPORT_SYMBOL(kmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	/*
-	 * Force other mappings to Oops if they try to access this pte without
-	 * first remapping it.  Keeping stale mappings around is a bad idea.
-	 */
-	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) {
+	if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
 		pte_t *pte = kmap_get_pte(vaddr);
 		pte_t pteval = *pte;
+		int idx, type;
+
+		type = kmap_atomic_idx_pop();
+		idx = type + KM_TYPE_NR*smp_processor_id();
+
+		/*
+		 * Force other mappings to Oops if they try to access this pte
+		 * without first remapping it.  Keeping stale mappings around
+		 * is a bad idea.
+		 */
 		BUG_ON(!pte_present(pteval) && !pte_migrating(pteval));
 		kmap_atomic_unregister(pte_page(pteval), vaddr);
 		kpte_clear_flush(pte, vaddr);
@@ -300,19 +261,19 @@ void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
 	arch_flush_lazy_mmu_mode();
 	pagefault_enable();
 }
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
+EXPORT_SYMBOL(__kunmap_atomic);
 
 /*
  * This API is supposed to allow us to map memory without a "struct page".
  * Currently we don't support this, though this may change in the future.
  */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-	return kmap_atomic(pfn_to_page(pfn), type);
+	return kmap_atomic(pfn_to_page(pfn));
 }
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
-	return kmap_atomic_prot(pfn_to_page(pfn), type, prot);
+	return kmap_atomic_prot(pfn_to_page(pfn), prot);
 }
 
 struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 8caac76ac324..3bd04022fd0c 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -59,11 +59,12 @@ extern void kunmap_high(struct page *page);
 
 void *kmap(struct page *page);
 void kunmap(struct page *page);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()	do { } while (0)
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index c4191b3b7056..363e33eb6ec1 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -27,10 +27,10 @@
 #include <asm/tlbflush.h>
 
 void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
 
 void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type);
+iounmap_atomic(void __iomem *kvaddr);
 
 int
 iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 67414550c3cc..d5cd13945d5a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -61,7 +61,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 	if (!is_crashed_pfn_valid(pfn))
 		return -EFAULT;
 
-	vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
+	vaddr = kmap_atomic_pfn(pfn);
 
 	if (!userbuf) {
 		memcpy(buf, (vaddr + offset), csize);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 5e8fa12ef861..d723e369003c 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -9,6 +9,7 @@ void *kmap(struct page *page)
 		return page_address(page);
 	return kmap_high(page);
 }
+EXPORT_SYMBOL(kmap);
 
 void kunmap(struct page *page)
 {
@@ -18,6 +19,7 @@ void kunmap(struct page *page)
 		return;
 	kunmap_high(page);
 }
+EXPORT_SYMBOL(kunmap);
 
 /*
  * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -27,10 +29,10 @@ void kunmap(struct page *page)
  * However when holding an atomic kmap it is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
 	pagefault_disable();
@@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	debug_kmap_atomic(type);
-
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
@@ -47,44 +48,56 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
 
 	return (void *)vaddr;
 }
+EXPORT_SYMBOL(kmap_atomic_prot);
+
+void *__kmap_atomic(struct page *page)
+{
+	return kmap_atomic_prot(page, kmap_prot);
+}
+EXPORT_SYMBOL(__kmap_atomic);
 
-void *kmap_atomic(struct page *page, enum km_type type)
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
 {
-	return kmap_atomic_prot(page, type, kmap_prot);
+	return kmap_atomic_prot_pfn(pfn, kmap_prot);
 }
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn);
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-
-	/*
-	 * Force other mappings to Oops if they'll try to access this pte
-	 * without first remap it.  Keeping stale mappings around is a bad idea
-	 * also, in case the page changes cacheability attributes or becomes
-	 * a protected page in a hypervisor.
-	 */
-	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+
+	if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+		int idx, type;
+
+		type = kmap_atomic_idx_pop();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+		WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+		/*
+		 * Force other mappings to Oops if they'll try to access this
+		 * pte without first remap it.  Keeping stale mappings around
+		 * is a bad idea also, in case the page changes cacheability
+		 * attributes or becomes a protected page in a hypervisor.
+		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
-	else {
+	}
 #ifdef CONFIG_DEBUG_HIGHMEM
+	else {
 		BUG_ON(vaddr < PAGE_OFFSET);
 		BUG_ON(vaddr >= (unsigned long)high_memory);
-#endif
 	}
+#endif
 
 	pagefault_enable();
 }
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
-	return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
-}
-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+EXPORT_SYMBOL(__kunmap_atomic);
 
 struct page *kmap_atomic_to_page(void *ptr)
 {
@@ -98,12 +111,6 @@ struct page *kmap_atomic_to_page(void *ptr)
 	pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
 	return pte_page(*pte);
 }
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
-EXPORT_SYMBOL(kmap_atomic_prot);
 EXPORT_SYMBOL(kmap_atomic_to_page);
 
 void __init set_highmem_pages_init(void)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 72fc70cf6184..75a3d7f24a2c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -48,21 +48,20 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
 }
 EXPORT_SYMBOL_GPL(iomap_create_wc);
 
-void
-iomap_free(resource_size_t base, unsigned long size)
+void iomap_free(resource_size_t base, unsigned long size)
 {
 	io_free_memtype(base, base + size);
 }
 EXPORT_SYMBOL_GPL(iomap_free);
 
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
-	enum fixed_addresses idx;
 	unsigned long vaddr;
+	int idx, type;
 
 	pagefault_disable();
 
-	debug_kmap_atomic(type);
+	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
@@ -72,10 +71,10 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 }
 
 /*
- * Map 'pfn' using fixed map 'type' and protections 'prot'
+ * Map 'pfn' using protections 'prot'
  */
 void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 {
 	/*
 	 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
@@ -86,24 +85,33 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 	if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
 		prot = PAGE_KERNEL_UC_MINUS;
 
-	return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot);
+	return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot);
 }
 EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
 
 void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr)
 {
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
-	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-	/*
-	 * Force other mappings to Oops if they'll try to access this pte
-	 * without first remap it.  Keeping stale mappings around is a bad idea
-	 * also, in case the page changes cacheability attributes or becomes
-	 * a protected page in a hypervisor.
-	 */
-	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+	if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+		int idx, type;
+
+		type = kmap_atomic_idx_pop();
+		idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+		WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+		/*
+		 * Force other mappings to Oops if they'll try to access this
+		 * pte without first remap it.  Keeping stale mappings around
+		 * is a bad idea also, in case the page changes cacheability
+		 * attributes or becomes a protected page in a hypervisor.
+		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
+	}
 
 	pagefault_enable();
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90b1d6753b9d..eb6c473c6d1b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -155,11 +155,11 @@ fast_shmem_read(struct page **pages,
 	char __iomem *vaddr;
 	int unwritten;
 
-	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
+	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
 	if (vaddr == NULL)
 		return -ENOMEM;
 	unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
-	kunmap_atomic(vaddr, KM_USER0);
+	kunmap_atomic(vaddr);
 
 	if (unwritten)
 		return -EFAULT;
@@ -509,10 +509,10 @@ fast_user_write(struct io_mapping *mapping,
 	char *vaddr_atomic;
 	unsigned long unwritten;
 
-	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
+	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 						      user_data, length);
-	io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
+	io_mapping_unmap_atomic(vaddr_atomic);
 	if (unwritten)
 		return -EFAULT;
 	return 0;
@@ -551,11 +551,11 @@ fast_shmem_write(struct page **pages,
 	char __iomem *vaddr;
 	unsigned long unwritten;
 
-	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
+	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
 	if (vaddr == NULL)
 		return -ENOMEM;
 	unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
-	kunmap_atomic(vaddr, KM_USER0);
+	kunmap_atomic(vaddr);
 
 	if (unwritten)
 		return -EFAULT;
@@ -3346,8 +3346,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		reloc_offset = obj_priv->gtt_offset + reloc->offset;
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
 						      (reloc_offset &
-						       ~(PAGE_SIZE - 1)),
-						      KM_USER0);
+						       ~(PAGE_SIZE - 1)));
 		reloc_entry = (uint32_t __iomem *)(reloc_page +
 						   (reloc_offset & (PAGE_SIZE - 1)));
 		reloc_val = target_obj_priv->gtt_offset + reloc->delta;
@@ -3358,7 +3357,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 			  readl(reloc_entry), reloc_val);
 #endif
 		writel(reloc_val, reloc_entry);
-		io_mapping_unmap_atomic(reloc_page, KM_USER0);
+		io_mapping_unmap_atomic(reloc_page);
 
 		/* The updated presumed offset for this entry will be
 		 * copied back out to the user.
@@ -4772,11 +4771,11 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
 	page_count = obj->size / PAGE_SIZE;
 
 	for (i = 0; i < page_count; i++) {
-		char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
+		char *dst = kmap_atomic(obj_priv->pages[i]);
 		char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
 
 		memcpy(dst, src, PAGE_SIZE);
-		kunmap_atomic(dst, KM_USER0);
+		kunmap_atomic(dst);
 	}
 	drm_clflush_pages(obj_priv->pages, page_count);
 	drm_agp_chipset_flush(dev);
@@ -4833,11 +4832,11 @@ i915_gem_attach_phys_object(struct drm_device *dev,
 	page_count = obj->size / PAGE_SIZE;
 
 	for (i = 0; i < page_count; i++) {
-		char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
+		char *src = kmap_atomic(obj_priv->pages[i]);
 		char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
 
 		memcpy(dst, src, PAGE_SIZE);
-		kunmap_atomic(src, KM_USER0);
+		kunmap_atomic(src);
 	}
 
 	i915_gem_object_put_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 744225ebb4b2..b80010f0c4c9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -456,10 +456,9 @@ i915_error_object_create(struct drm_device *dev,
 
 		local_irq_save(flags);
 		s = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-					     reloc_offset,
-					     KM_IRQ0);
+					     reloc_offset);
 		memcpy_fromio(d, s, PAGE_SIZE);
-		io_mapping_unmap_atomic(s, KM_IRQ0);
+		io_mapping_unmap_atomic(s);
 		local_irq_restore(flags);
 
 		dst->pages[page] = d;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 1d306a458be6..3264bbd47e65 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -187,8 +187,7 @@ static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_over
 
 	if (OVERLAY_NONPHYSICAL(overlay->dev)) {
 		regs = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-						overlay->reg_bo->gtt_offset,
-						KM_USER0);
+						overlay->reg_bo->gtt_offset);
 
 		if (!regs) {
 			DRM_ERROR("failed to map overlay regs in GTT\n");
@@ -203,7 +202,7 @@ static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_over
 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay)
 {
 	if (OVERLAY_NONPHYSICAL(overlay->dev))
-		io_mapping_unmap_atomic(overlay->virt_addr, KM_USER0);
+		io_mapping_unmap_atomic(overlay->virt_addr);
 
 	overlay->virt_addr = NULL;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 974b0f8ae048..8fa339600fe3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -2167,11 +2167,11 @@ peek_fb(struct drm_device *dev, struct io_mapping *fb,
 
 	if (off < pci_resource_len(dev->pdev, 1)) {
 		uint8_t __iomem *p =
-			io_mapping_map_atomic_wc(fb, off & PAGE_MASK, KM_USER0);
+			io_mapping_map_atomic_wc(fb, off & PAGE_MASK);
 
 		val = ioread32(p + (off & ~PAGE_MASK));
 
-		io_mapping_unmap_atomic(p, KM_USER0);
+		io_mapping_unmap_atomic(p);
 	}
 
 	return val;
@@ -2183,12 +2183,12 @@ poke_fb(struct drm_device *dev, struct io_mapping *fb,
 {
 	if (off < pci_resource_len(dev->pdev, 1)) {
 		uint8_t __iomem *p =
-			io_mapping_map_atomic_wc(fb, off & PAGE_MASK, KM_USER0);
+			io_mapping_map_atomic_wc(fb, off & PAGE_MASK);
 
 		iowrite32(val, p + (off & ~PAGE_MASK));
 		wmb();
 
-		io_mapping_unmap_atomic(p, KM_USER0);
+		io_mapping_unmap_atomic(p);
 	}
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 3451a82adba7..e8a73e65da69 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -170,7 +170,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
 	src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
 
 #ifdef CONFIG_X86
-	dst = kmap_atomic_prot(d, KM_USER0, prot);
+	dst = kmap_atomic_prot(d, prot);
 #else
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL))
 		dst = vmap(&d, 1, 0, prot);
@@ -183,7 +183,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
 	memcpy_fromio(dst, src, PAGE_SIZE);
 
 #ifdef CONFIG_X86
-	kunmap_atomic(dst, KM_USER0);
+	kunmap_atomic(dst);
 #else
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL))
 		vunmap(dst);
@@ -206,7 +206,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 
 	dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
 #ifdef CONFIG_X86
-	src = kmap_atomic_prot(s, KM_USER0, prot);
+	src = kmap_atomic_prot(s, prot);
 #else
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL))
 		src = vmap(&s, 1, 0, prot);
@@ -219,7 +219,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 	memcpy_toio(dst, src, PAGE_SIZE);
 
 #ifdef CONFIG_X86
-	kunmap_atomic(src, KM_USER0);
+	kunmap_atomic(src);
 #else
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL))
 		vunmap(src);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 283cd47bb34c..8a85ec109a3a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -28,18 +28,6 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
 
 #include <asm/kmap_types.h>
 
-#ifdef CONFIG_DEBUG_HIGHMEM
-
-void debug_kmap_atomic(enum km_type type);
-
-#else
-
-static inline void debug_kmap_atomic(enum km_type type)
-{
-}
-
-#endif
-
 #ifdef CONFIG_HIGHMEM
 #include <asm/highmem.h>
 
@@ -49,6 +37,27 @@ extern unsigned long totalhigh_pages;
 
 void kmap_flush_unused(void);
 
+DECLARE_PER_CPU(int, __kmap_atomic_idx);
+
+static inline int kmap_atomic_idx_push(void)
+{
+	int idx = __get_cpu_var(__kmap_atomic_idx)++;
+#ifdef CONFIG_DEBUG_HIGHMEM
+	WARN_ON_ONCE(in_irq() && !irqs_disabled());
+	BUG_ON(idx > KM_TYPE_NR);
+#endif
+	return idx;
+}
+
+static inline int kmap_atomic_idx_pop(void)
+{
+	int idx = --__get_cpu_var(__kmap_atomic_idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(idx < 0);
+#endif
+	return idx;
+}
+
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
@@ -66,19 +75,19 @@ static inline void kunmap(struct page *page)
 {
 }
 
-static inline void *kmap_atomic(struct page *page, enum km_type idx)
+static inline void *__kmap_atomic(struct page *page)
 {
 	pagefault_disable();
 	return page_address(page);
 }
-#define kmap_atomic_prot(page, idx, prot)	kmap_atomic(page, idx)
+#define kmap_atomic_prot(page, prot)	__kmap_atomic(page)
 
-static inline void kunmap_atomic_notypecheck(void *addr, enum km_type idx)
+static inline void __kunmap_atomic(void *addr)
 {
 	pagefault_enable();
 }
 
-#define kmap_atomic_pfn(pfn, idx)	kmap_atomic(pfn_to_page(pfn), (idx))
+#define kmap_atomic_pfn(pfn)	kmap_atomic(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #define kmap_flush_unused()	do {} while(0)
@@ -86,12 +95,20 @@ static inline void kunmap_atomic_notypecheck(void *addr, enum km_type idx)
 
 #endif /* CONFIG_HIGHMEM */
 
-/* Prevent people trying to call kunmap_atomic() as if it were kunmap() */
-/* kunmap_atomic() should get the return value of kmap_atomic, not the page. */
-#define kunmap_atomic(addr, idx) do { \
-		BUILD_BUG_ON(__same_type((addr), struct page *)); \
-		kunmap_atomic_notypecheck((addr), (idx)); \
-	} while (0)
+/*
+ * Make both: kmap_atomic(page, idx) and kmap_atomic(page) work.
+ */
+#define kmap_atomic(page, args...) __kmap_atomic(page)
+
+/*
+ * Prevent people trying to call kunmap_atomic() as if it were kunmap()
+ * kunmap_atomic() should get the return value of kmap_atomic, not the page.
+ */
+#define kunmap_atomic(addr, args...)				\
+do {								\
+	BUILD_BUG_ON(__same_type((addr), struct page *));	\
+	__kunmap_atomic(addr);					\
+} while (0)
 
 /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
 #ifndef clear_user_highpage
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 7fb592793738..8cdcc2a199ad 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -81,8 +81,7 @@ io_mapping_free(struct io_mapping *mapping)
 /* Atomic map/unmap */
 static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
-			 unsigned long offset,
-			 int slot)
+			 unsigned long offset)
 {
 	resource_size_t phys_addr;
 	unsigned long pfn;
@@ -90,13 +89,13 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
 	BUG_ON(offset >= mapping->size);
 	phys_addr = mapping->base + offset;
 	pfn = (unsigned long) (phys_addr >> PAGE_SHIFT);
-	return iomap_atomic_prot_pfn(pfn, slot, mapping->prot);
+	return iomap_atomic_prot_pfn(pfn, mapping->prot);
 }
 
 static inline void
-io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr)
 {
-	iounmap_atomic(vaddr, slot);
+	iounmap_atomic(vaddr);
 }
 
 static inline void __iomem *
@@ -137,14 +136,13 @@ io_mapping_free(struct io_mapping *mapping)
 /* Atomic map/unmap */
 static inline void __iomem *
 io_mapping_map_atomic_wc(struct io_mapping *mapping,
-			 unsigned long offset,
-			 int slot)
+			 unsigned long offset)
 {
 	return ((char __force __iomem *) mapping) + offset;
 }
 
 static inline void
-io_mapping_unmap_atomic(void __iomem *vaddr, int slot)
+io_mapping_unmap_atomic(void __iomem *vaddr)
 {
 }
 
diff --git a/mm/highmem.c b/mm/highmem.c
index 7a0aa1be4993..781e754a75ac 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -42,6 +42,10 @@
 unsigned long totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(totalhigh_pages);
 
+
+DEFINE_PER_CPU(int, __kmap_atomic_idx);
+EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
+
 unsigned int nr_free_highpages (void)
 {
 	pg_data_t *pgdat;
@@ -422,61 +426,3 @@ void __init page_address_init(void)
 }
 
 #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-
-void debug_kmap_atomic(enum km_type type)
-{
-	static int warn_count = 10;
-
-	if (unlikely(warn_count < 0))
-		return;
-
-	if (unlikely(in_interrupt())) {
-		if (in_nmi()) {
-			if (type != KM_NMI && type != KM_NMI_PTE) {
-				WARN_ON(1);
-				warn_count--;
-			}
-		} else if (in_irq()) {
-			if (type != KM_IRQ0 && type != KM_IRQ1 &&
-			    type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
-			    type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
-				WARN_ON(1);
-				warn_count--;
-			}
-		} else if (!irqs_disabled()) {	/* softirq */
-			if (type != KM_IRQ0 && type != KM_IRQ1 &&
-			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
-			    type != KM_SKB_SUNRPC_DATA &&
-			    type != KM_SKB_DATA_SOFTIRQ &&
-			    type != KM_BOUNCE_READ) {
-				WARN_ON(1);
-				warn_count--;
-			}
-		}
-	}
-
-	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
-			type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ ||
-			type == KM_IRQ_PTE || type == KM_NMI ||
-			type == KM_NMI_PTE ) {
-		if (!irqs_disabled()) {
-			WARN_ON(1);
-			warn_count--;
-		}
-	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
-		if (irq_count() == 0 && !irqs_disabled()) {
-			WARN_ON(1);
-			warn_count--;
-		}
-	}
-#ifdef CONFIG_KGDB_KDB
-	if (unlikely(type == KM_KDB && atomic_read(&kgdb_active) == -1)) {
-		WARN_ON(1);
-		warn_count--;
-	}
-#endif /* CONFIG_KGDB_KDB */
-}
-
-#endif
-- 
cgit v1.2.3


From 182fea8f48332de085c0ae936605cb72671db9f2 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 26 Oct 2010 14:21:55 -0700
Subject: mm: remove alignment padding from anon_vma on (some) 64 bit builds

Reorder structure anon_vma to remove alignment padding on 64 builds when
(CONFIG_KSM || CONFIG_MIGRATION).
This will shrink the size of the anon_vma structure from 40 to 32 bytes
& allow more objects per slab in its kmem_cache.

Under slub the objects in the anon_vma kmem_cache will then be 40 bytes
with 102 objects per slab.  (On v2.6.36 without this patch,the size is 48
bytes and 85 objects/slab.)

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 31b2fd75dcba..5c98df68a953 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -25,8 +25,8 @@
  * pointing to this anon_vma once its vma list is empty.
  */
 struct anon_vma {
-	spinlock_t lock;	/* Serialize access to vma list */
 	struct anon_vma *root;	/* Root of this anon_vma tree */
+	spinlock_t lock;	/* Serialize access to vma list */
 #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
 
 	/*
-- 
cgit v1.2.3


From d065bd810b6deb67d4897a14bfe21f8eb526ba99 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Tue, 26 Oct 2010 14:21:57 -0700
Subject: mm: retry page fault when blocking on disk transfer

This change reduces mmap_sem hold times that are caused by waiting for
disk transfers when accessing file mapped VMAs.

It introduces the VM_FAULT_ALLOW_RETRY flag, which indicates that the call
site wants mmap_sem to be released if blocking on a pending disk transfer.
In that case, filemap_fault() returns the VM_FAULT_RETRY status bit and
do_page_fault() will then re-acquire mmap_sem and retry the page fault.

It is expected that the retry will hit the same page which will now be
cached, and thus it will complete with a low mmap_sem hold time.

Tests:

- microbenchmark: thread A mmaps a large file and does random read accesses
  to the mmaped area - achieves about 55 iterations/s. Thread B does
  mmap/munmap in a loop at a separate location - achieves 55 iterations/s
  before, 15000 iterations/s after.

- We are seeing related effects in some applications in house, which show
  significant performance regressions when running without this change.

[akpm@linux-foundation.org: fix warning & crash]
Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Ying Han <yinghan@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c     | 38 ++++++++++++++++++++++++++------------
 include/linux/mm.h      |  2 ++
 include/linux/pagemap.h | 13 +++++++++++++
 mm/filemap.c            | 16 +++++++++++++++-
 mm/memory.c             | 10 ++++++++--
 5 files changed, 64 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 852b319edbdc..9b2345c9e0c3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -956,8 +956,10 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	struct task_struct *tsk;
 	unsigned long address;
 	struct mm_struct *mm;
-	int write;
 	int fault;
+	int write = error_code & PF_WRITE;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+					(write ? FAULT_FLAG_WRITE : 0);
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1068,6 +1070,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 			bad_area_nosemaphore(regs, error_code, address);
 			return;
 		}
+retry:
 		down_read(&mm->mmap_sem);
 	} else {
 		/*
@@ -1111,8 +1114,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 * we can handle it..
 	 */
 good_area:
-	write = error_code & PF_WRITE;
-
 	if (unlikely(access_error(error_code, write, vma))) {
 		bad_area_access_error(regs, error_code, address);
 		return;
@@ -1123,21 +1124,34 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault:
 	 */
-	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR) {
-		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-				     regs, address);
-	} else {
-		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-				     regs, address);
+	/*
+	 * Major/minor page fault accounting is only done on the
+	 * initial attempt. If we go through a retry, it is extremely
+	 * likely that the page will be found in page cache at that point.
+	 */
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				      regs, address);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				      regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+			 * of starvation. */
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			goto retry;
+		}
 	}
 
 	check_v8086_mode(regs, address, tsk);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2862009f9573..3bf46655b50a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,6 +144,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -723,6 +724,7 @@ static inline int page_mapped(struct page *page)
 
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
+#define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e12cdc6d79ee..2d1ffe3cf1ee 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -299,6 +299,8 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
+extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+				unsigned int flags);
 extern void unlock_page(struct page *page);
 
 static inline void __set_page_locked(struct page *page)
@@ -350,6 +352,17 @@ static inline void lock_page_nosync(struct page *page)
 		__lock_page_nosync(page);
 }
 	
+/*
+ * lock_page_or_retry - Lock the page, unless this would block and the
+ * caller indicated that it can handle a retry.
+ */
+static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
+				     unsigned int flags)
+{
+	might_sleep();
+	return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
+}
+
 /*
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  * Never use this directly!
diff --git a/mm/filemap.c b/mm/filemap.c
index 8ed709a83eb7..33f81252a744 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -612,6 +612,19 @@ void __lock_page_nosync(struct page *page)
 							TASK_UNINTERRUPTIBLE);
 }
 
+int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+			 unsigned int flags)
+{
+	if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
+		__lock_page(page);
+		return 1;
+	} else {
+		up_read(&mm->mmap_sem);
+		wait_on_page_locked(page);
+		return 0;
+	}
+}
+
 /**
  * find_get_page - find and get a page reference
  * @mapping: the address_space to search
@@ -1550,7 +1563,8 @@ retry_find:
 			goto no_cached_page;
 	}
 
-	lock_page(page);
+	if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags))
+		return ret | VM_FAULT_RETRY;
 
 	/* Did it get truncated? */
 	if (unlikely(page->mapping != mapping)) {
diff --git a/mm/memory.c b/mm/memory.c
index 92cc54e94137..714c4438d887 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2627,6 +2627,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page, *swapcache = NULL;
 	swp_entry_t entry;
 	pte_t pte;
+	int locked;
 	struct mem_cgroup *ptr = NULL;
 	int exclusive = 0;
 	int ret = 0;
@@ -2677,8 +2678,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_release;
 	}
 
-	lock_page(page);
+	locked = lock_page_or_retry(page, mm, flags);
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+	if (!locked) {
+		ret |= VM_FAULT_RETRY;
+		goto out_release;
+	}
 
 	/*
 	 * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
@@ -2927,7 +2932,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	vmf.page = NULL;
 
 	ret = vma->vm_ops->fault(vma, &vmf);
-	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+			    VM_FAULT_RETRY)))
 		return ret;
 
 	if (unlikely(PageHWPoison(vmf.page))) {
-- 
cgit v1.2.3


From 25ca1d6c02fe1c6d90d918867ef670d323725458 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:21:59 -0700
Subject: mm: wrap get_locked_pte() using __cond_lock()

The get_locked_pte() conditionally grabs 'ptl' in case of returning
non-NULL.  This leads sparse to complain about context imbalance.  Rename
and wrap it using __cond_lock() to make sparse happy.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 10 +++++++++-
 mm/memory.c        |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3bf46655b50a..721f451c3029 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1034,7 +1034,15 @@ extern void unregister_shrinker(struct shrinker *);
 
 int vma_wants_writenotify(struct vm_area_struct *vma);
 
-extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
+extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
+			       spinlock_t **ptl);
+static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
+				    spinlock_t **ptl)
+{
+	pte_t *ptep;
+	__cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
+	return ptep;
+}
 
 #ifdef __PAGETABLE_PUD_FOLDED
 static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
diff --git a/mm/memory.c b/mm/memory.c
index 714c4438d887..4ce24a4d5d48 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1591,7 +1591,7 @@ struct page *get_dump_page(unsigned long addr)
 }
 #endif /* CONFIG_ELF_CORE */
 
-pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
+pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
 	pgd_t * pgd = pgd_offset(mm, addr);
-- 
cgit v1.2.3


From ea4525b6008fb29553306ec6719f8e6930ac9499 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:22:01 -0700
Subject: rmap: annotate lock context change on page_[un]lock_anon_vma()

The page_lock_anon_vma() conditionally grabs RCU and anon_vma lock but
page_unlock_anon_vma() releases them unconditionally.  This leads sparse
to complain about context imbalance.  Annotate them.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 15 ++++++++++++++-
 mm/rmap.c            |  4 +++-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 5c98df68a953..07ea89c16761 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -230,7 +230,20 @@ int try_to_munlock(struct page *);
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page);
+struct anon_vma *__page_lock_anon_vma(struct page *page);
+
+static inline struct anon_vma *page_lock_anon_vma(struct page *page)
+{
+	struct anon_vma *anon_vma;
+
+	__cond_lock(RCU, anon_vma = __page_lock_anon_vma(page));
+
+	/* (void) is needed to make gcc happy */
+	(void) __cond_lock(&anon_vma->root->lock, anon_vma);
+
+	return anon_vma;
+}
+
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f5ad996a4a8f..0995a8f68866 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -314,7 +314,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *__page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma, *root_anon_vma;
 	unsigned long anon_mapping;
@@ -348,6 +348,8 @@ out:
 }
 
 void page_unlock_anon_vma(struct anon_vma *anon_vma)
+	__releases(&anon_vma->root->lock)
+	__releases(RCU)
 {
 	anon_vma_unlock(anon_vma);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From e9a81a821d7f9c5d899cc3acdeafbd884c2c48bb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:22:01 -0700
Subject: rmap: wrap page_check_address() using __cond_lock()

The page_check_address() conditionally grabs *@ptlp in case of returning
non-NULL.  Rename and wrap it using __cond_lock() removes following
warnings from sparse:

 mm/rmap.c:472:9: warning: context imbalance in 'page_mapped_in_vma' - unexpected unlock
 mm/rmap.c:524:9: warning: context imbalance in 'page_referenced_one' - unexpected unlock
 mm/rmap.c:706:9: warning: context imbalance in 'page_mkclean_one' - unexpected unlock
 mm/rmap.c:1066:9: warning: context imbalance in 'try_to_unmap_one' - unexpected unlock

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 13 ++++++++++++-
 mm/rmap.c            |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 07ea89c16761..bb83c0da2071 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -205,9 +205,20 @@ int try_to_unmap_one(struct page *, struct vm_area_struct *,
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
  */
-pte_t *page_check_address(struct page *, struct mm_struct *,
+pte_t *__page_check_address(struct page *, struct mm_struct *,
 				unsigned long, spinlock_t **, int);
 
+static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
+					unsigned long address,
+					spinlock_t **ptlp, int sync)
+{
+	pte_t *ptep;
+
+	__cond_lock(*ptlp, ptep = __page_check_address(page, mm, address,
+						       ptlp, sync));
+	return ptep;
+}
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
diff --git a/mm/rmap.c b/mm/rmap.c
index 0995a8f68866..bfeffbddb712 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -409,7 +409,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
  *
  * On success returns with pte mapped and locked.
  */
-pte_t *page_check_address(struct page *page, struct mm_struct *mm,
+pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
 			  unsigned long address, spinlock_t **ptlp, int sync)
 {
 	pgd_t *pgd;
-- 
cgit v1.2.3


From 92c09c041f15fc88b35f8628e07639f52e1fbb38 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:22:03 -0700
Subject: mm: declare some external symbols

Declare 'bdi_pending_list' and 'tag_pages_for_writeback()' to remove
following sparse warnings:

 mm/backing-dev.c:46:1: warning: symbol 'bdi_pending_list' was not declared. Should it be static?
 mm/page-writeback.c:825:6: warning: symbol 'tag_pages_for_writeback' was not declared. Should it be static?

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 1 +
 include/linux/writeback.h   | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f1b402a50679..4ce34fa937d4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -111,6 +111,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
+extern struct list_head bdi_pending_list;
 
 static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..c7299d2ace6b 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -149,6 +149,8 @@ int write_cache_pages(struct address_space *mapping,
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 void set_page_dirty_balance(struct page *page, int page_mkwrite);
 void writeback_set_ratelimit(void);
+void tag_pages_for_writeback(struct address_space *mapping,
+			     pgoff_t start, pgoff_t end);
 
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
-- 
cgit v1.2.3


From 16b56cf4b8a0fa9acc21bd2ad19839b917999b96 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:22:04 -0700
Subject: mm: fix sparse warnings on GFP_ZONE_TABLE/BAD

Introduce ___GFP_* masks in order for gfp_t to not be mixed with plain
integers which causes a lot of warnings like the following:

 warning: restricted gfp_t degrades to integer

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 105 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 63 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 975609cb8548..e8713d55360a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,6 +9,32 @@
 
 struct vm_area_struct;
 
+/* Plain integer GFP bitmasks. Do not use this directly. */
+#define ___GFP_DMA		0x01u
+#define ___GFP_HIGHMEM		0x02u
+#define ___GFP_DMA32		0x04u
+#define ___GFP_MOVABLE		0x08u
+#define ___GFP_WAIT		0x10u
+#define ___GFP_HIGH		0x20u
+#define ___GFP_IO		0x40u
+#define ___GFP_FS		0x80u
+#define ___GFP_COLD		0x100u
+#define ___GFP_NOWARN		0x200u
+#define ___GFP_REPEAT		0x400u
+#define ___GFP_NOFAIL		0x800u
+#define ___GFP_NORETRY		0x1000u
+#define ___GFP_COMP		0x4000u
+#define ___GFP_ZERO		0x8000u
+#define ___GFP_NOMEMALLOC	0x10000u
+#define ___GFP_HARDWALL		0x20000u
+#define ___GFP_THISNODE		0x40000u
+#define ___GFP_RECLAIMABLE	0x80000u
+#ifdef CONFIG_KMEMCHECK
+#define ___GFP_NOTRACK		0x200000u
+#else
+#define ___GFP_NOTRACK		0
+#endif
+
 /*
  * GFP bitmasks..
  *
@@ -18,10 +44,10 @@ struct vm_area_struct;
  * without the underscores and use them consistently. The definitions here may
  * be used in bit comparisons.
  */
-#define __GFP_DMA	((__force gfp_t)0x01u)
-#define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#define __GFP_DMA32	((__force gfp_t)0x04u)
-#define __GFP_MOVABLE	((__force gfp_t)0x08u)  /* Page is movable */
+#define __GFP_DMA	((__force gfp_t)___GFP_DMA)
+#define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
+#define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
+#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 /*
  * Action modifiers - doesn't change the zoning
@@ -38,27 +64,22 @@ struct vm_area_struct;
  * __GFP_MOVABLE: Flag that this page will be movable by the page migration
  * mechanism or reclaimed
  */
-#define __GFP_WAIT	((__force gfp_t)0x10u)	/* Can wait and reschedule? */
-#define __GFP_HIGH	((__force gfp_t)0x20u)	/* Should access emergency pools? */
-#define __GFP_IO	((__force gfp_t)0x40u)	/* Can start physical IO? */
-#define __GFP_FS	((__force gfp_t)0x80u)	/* Can call down to low-level FS? */
-#define __GFP_COLD	((__force gfp_t)0x100u)	/* Cache-cold page required */
-#define __GFP_NOWARN	((__force gfp_t)0x200u)	/* Suppress page allocation failure warning */
-#define __GFP_REPEAT	((__force gfp_t)0x400u)	/* See above */
-#define __GFP_NOFAIL	((__force gfp_t)0x800u)	/* See above */
-#define __GFP_NORETRY	((__force gfp_t)0x1000u)/* See above */
-#define __GFP_COMP	((__force gfp_t)0x4000u)/* Add compound page metadata */
-#define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
-#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
-#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
-#define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
-#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
-
-#ifdef CONFIG_KMEMCHECK
-#define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
-#else
-#define __GFP_NOTRACK	((__force gfp_t)0)
-#endif
+#define __GFP_WAIT	((__force gfp_t)___GFP_WAIT)	/* Can wait and reschedule? */
+#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
+#define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
+#define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
+#define __GFP_COLD	((__force gfp_t)___GFP_COLD)	/* Cache-cold page required */
+#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)	/* Suppress page allocation failure warning */
+#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
+#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
+#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
+#define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
+#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */
+#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
+#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
@@ -186,14 +207,14 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags)
 #endif
 
 #define GFP_ZONE_TABLE ( \
-	(ZONE_NORMAL << 0 * ZONES_SHIFT)				\
-	| (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT)			\
-	| (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT)		\
-	| (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT)			\
-	| (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT)			\
-	| (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT)	\
-	| (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
-	| (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
+	(ZONE_NORMAL << 0 * ZONES_SHIFT)				      \
+	| (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)			      \
+	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)		      \
+	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)		      \
+	| (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)			      \
+	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)	      \
+	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
+	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
 )
 
 /*
@@ -203,20 +224,20 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags)
  * allowed.
  */
 #define GFP_ZONE_BAD ( \
-	1 << (__GFP_DMA | __GFP_HIGHMEM)				\
-	| 1 << (__GFP_DMA | __GFP_DMA32)				\
-	| 1 << (__GFP_DMA32 | __GFP_HIGHMEM)				\
-	| 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)		\
-	| 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA)		\
-	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA)		\
-	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM)		\
-	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
+	1 << (___GFP_DMA | ___GFP_HIGHMEM)				      \
+	| 1 << (___GFP_DMA | ___GFP_DMA32)				      \
+	| 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)				      \
+	| 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
+	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
 )
 
 static inline enum zone_type gfp_zone(gfp_t flags)
 {
 	enum zone_type z;
-	int bit = flags & GFP_ZONEMASK;
+	int bit = (__force int) (flags & GFP_ZONEMASK);
 
 	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
 					 ((1 << ZONES_SHIFT) - 1);
-- 
cgit v1.2.3


From e1ca7788dec6773b1a2bce51b7141948f2b8bccf Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Tue, 26 Oct 2010 14:22:06 -0700
Subject: mm: add vzalloc() and vzalloc_node() helpers

Add vzalloc() and vzalloc_node() to encapsulate the
vmalloc-then-memset-zero operation.

Use __GFP_ZERO to zero fill the allocated memory.

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h |  2 ++
 mm/nommu.c              | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/vmalloc.c            | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 94 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 63a4fe6d51bd..a03dcf62ca9d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -53,8 +53,10 @@ static inline void vmalloc_init(void)
 #endif
 
 extern void *vmalloc(unsigned long size);
+extern void *vzalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
+extern void *vzalloc_node(unsigned long size, int node);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *vmalloc_32_user(unsigned long size);
diff --git a/mm/nommu.c b/mm/nommu.c
index 88ff091eb07a..30b5c20eec15 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -293,11 +293,58 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+/*
+ *	vzalloc - allocate virtually continguos memory with zero fill
+ *
+ *	@size:		allocation size
+ *
+ *	Allocate enough pages to cover @size from the page level
+ *	allocator and map them into continguos kernel virtual space.
+ *	The memory allocated is set to zero.
+ *
+ *	For tight control over page level allocator and protection flags
+ *	use __vmalloc() instead.
+ */
+void *vzalloc(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+			PAGE_KERNEL);
+}
+EXPORT_SYMBOL(vzalloc);
+
+/**
+ * vmalloc_node - allocate memory on a specific node
+ * @size:	allocation size
+ * @node:	numa node
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc() instead.
+ */
 void *vmalloc_node(unsigned long size, int node)
 {
 	return vmalloc(size);
 }
-EXPORT_SYMBOL(vmalloc_node);
+
+/**
+ * vzalloc_node - allocate memory on a specific node with zero fill
+ * @size:	allocation size
+ * @node:	numa node
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ * The memory allocated is set to zero.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc() instead.
+ */
+void *vzalloc_node(unsigned long size, int node)
+{
+	return vzalloc(size);
+}
+EXPORT_SYMBOL(vzalloc_node);
 
 #ifndef PAGE_KERNEL_EXEC
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f492c774fa7b..a3d66b3dc5cb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1596,6 +1596,13 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
 
+static inline void *__vmalloc_node_flags(unsigned long size,
+					int node, gfp_t flags)
+{
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
+					node, __builtin_return_address(0));
+}
+
 /**
  *	vmalloc  -  allocate virtually contiguous memory
  *	@size:		allocation size
@@ -1607,11 +1614,27 @@ EXPORT_SYMBOL(__vmalloc);
  */
 void *vmalloc(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
-					-1, __builtin_return_address(0));
+	return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
 }
 EXPORT_SYMBOL(vmalloc);
 
+/**
+ *	vzalloc - allocate virtually contiguous memory with zero fill
+ *	@size:	allocation size
+ *	Allocate enough pages to cover @size from the page level
+ *	allocator and map them into contiguous kernel virtual space.
+ *	The memory allocated is set to zero.
+ *
+ *	For tight control over page level allocator and protection flags
+ *	use __vmalloc() instead.
+ */
+void *vzalloc(unsigned long size)
+{
+	return __vmalloc_node_flags(size, -1,
+				GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vzalloc);
+
 /**
  * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
  * @size: allocation size
@@ -1653,6 +1676,25 @@ void *vmalloc_node(unsigned long size, int node)
 }
 EXPORT_SYMBOL(vmalloc_node);
 
+/**
+ * vzalloc_node - allocate memory on a specific node with zero fill
+ * @size:	allocation size
+ * @node:	numa node
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ * The memory allocated is set to zero.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc_node() instead.
+ */
+void *vzalloc_node(unsigned long size, int node)
+{
+	return __vmalloc_node_flags(size, node,
+			 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vzalloc_node);
+
 #ifndef PAGE_KERNEL_EXEC
 # define PAGE_KERNEL_EXEC PAGE_KERNEL
 #endif
-- 
cgit v1.2.3


From f27c85c56b32c42bcc54a43189c1e00fdceb23ec Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 26 Oct 2010 14:22:21 -0700
Subject: kernel.h: add {min,max}3 macros

Introduce two additional min/max macros to compare three operands.  This
will save some cycles as well as some bytes on the stack and last but not
least more pleasing as macro nesting.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index edef168a0406..8e786a27cfe6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -651,6 +651,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
+#define min3(x, y, z) ({			\
+	typeof(x) _min1 = (x);			\
+	typeof(y) _min2 = (y);			\
+	typeof(z) _min3 = (z);			\
+	(void) (&_min1 == &_min2);		\
+	(void) (&_min1 == &_min3);		\
+	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
+		(_min2 < _min3 ? _min2 : _min3); })
+
+#define max3(x, y, z) ({			\
+	typeof(x) _max1 = (x);			\
+	typeof(y) _max2 = (y);			\
+	typeof(z) _max3 = (z);			\
+	(void) (&_max1 == &_max2);		\
+	(void) (&_max1 == &_max3);		\
+	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
+		(_max2 > _max3 ? _max2 : _max3); })
+
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
  * @x: value1
-- 
cgit v1.2.3


From a55621f15bc61826969a29e111ba131a55ef45de Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:22:25 -0700
Subject: include/linux/kernel.h: add __must_check to strict_strto*()

The whole point to using the strict functions is to check the return
value.  If you don't, strict_strto*() will return you uninitialised
garbage.  Offenders have been observed in the wild.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8e786a27cfe6..e9b492b33032 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -203,10 +203,10 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
-extern int strict_strtoul(const char *, unsigned int, unsigned long *);
-extern int strict_strtol(const char *, unsigned int, long *);
-extern int strict_strtoull(const char *, unsigned int, unsigned long long *);
-extern int strict_strtoll(const char *, unsigned int, long long *);
+extern int __must_check strict_strtoul(const char *, unsigned int, unsigned long *);
+extern int __must_check strict_strtol(const char *, unsigned int, long *);
+extern int __must_check strict_strtoull(const char *, unsigned int, unsigned long long *);
+extern int __must_check strict_strtoll(const char *, unsigned int, long long *);
 extern int sprintf(char * buf, const char * fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern int vsprintf(char *buf, const char *, va_list)
-- 
cgit v1.2.3


From b6472776816af1ed52848c93d26e3edb3b17adab Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 26 Oct 2010 14:22:26 -0700
Subject: modules: no need to align .modinfo strings

gcc aligns strings as a performance consideration for those cases where
strings are being used a lot.

Their use is not performance critical, and hence it seems better to save
some space.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 9d2f1837b3d8..112adf8bd47d 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -21,8 +21,8 @@
 #define __module_cat(a,b) ___module_cat(a,b)
 #define __MODULE_INFO(tag, name, info)					  \
 static const char __module_cat(name,__LINE__)[]				  \
-  __used								  \
-  __attribute__((section(".modinfo"),unused)) = __stringify(tag) "=" info
+  __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
+  = __stringify(tag) "=" info
 #else  /* !MODULE */
 #define __MODULE_INFO(tag, name, info)
 #endif
-- 
cgit v1.2.3


From d356c0b680d15e0187de48aa303e541b461ea794 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 Oct 2010 14:22:29 -0700
Subject: vmlinux.lds.h: gather .data..shared_aligned sections in DATA_DATA

With the recent change "net: remove time limit in process_backlog()", the
softnet_data variable changed from "DEFINE_PER_CPU()" to
"DEFINE_PER_CPU_ALIGNED()" which moved it from the .data section to the
.data.shared_align section.  I'm not saying this patch is wrong, just that
is what caused me to notice this larger problem.  No one else in the
kernel is using this aligned macro variant, so I imagine that's why no one
has noticed yet.

Since .data..shared_align isn't declared in any vmlinux files that I can
see, the linker just places it last.  This "just works" for most people,
but when building a ROM kernel on Blackfin systems, it causes section
overlap errors:

bfin-uclinux-ld.real:
	section .init.data [00000000202e06b8 -> 00000000202e48b7] overlaps
	section .data.shared_aligned [00000000202e06b8 -> 00000000202e0723]

I imagine other arches which support the ROM config option and thus do
funky placement would see similar issues ...

On x86, it is stuck in a dedicated section at the end:
 [8] .data             PROGBITS ffffffff810ec000 2ec0000303a8 00 WA 0 0 4096
 [9] .data.shared_alig PROGBITS ffffffff8111c3c0 31c3c00000c8 00 WA 0 0 64

So make sure we include this section in the DATA_DATA macro so that it is
placed in the right location.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f4229fb315e1..aaf1105d9d91 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -150,6 +150,7 @@
 #define DATA_DATA							\
 	*(.data)							\
 	*(.ref.data)							\
+	*(.data..shared_aligned) /* percpu related */			\
 	DEV_KEEP(init.data)						\
 	DEV_KEEP(exit.data)						\
 	CPU_KEEP(init.data)						\
-- 
cgit v1.2.3


From d88262623fb58853ed60fb110141c435de26e3de Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 Oct 2010 14:22:30 -0700
Subject: vmlinux.lds.h: lower init ramfs alignment to 4

The new init ramfs format (cpio based) requires an alignment of 4 (per the
documentation and per the source files themselves).  As for compressed
sources, the decompressors can all deal with unaligned buffers.

The cpio source is also found in the __init sections of the kernel, so
once they are read and expanded into a tmpfs, the source is freed.  That
means there is no need to force page alignment here either.

This has been used on Blackfin systems for many releases without issue.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aaf1105d9d91..2c0fc10956ba 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -637,7 +637,7 @@
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #define INIT_RAM_FS							\
-	. = ALIGN(PAGE_SIZE);						\
+	. = ALIGN(4);							\
 	VMLINUX_SYMBOL(__initramfs_start) = .;				\
 	*(.init.ramfs)							\
 	VMLINUX_SYMBOL(__initramfs_end) = .;
-- 
cgit v1.2.3


From ca1cab37d91cbe8a8333732540d43cabb54cfa85 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:22:34 -0700
Subject: workqueues: s/ON_STACK/ONSTACK/

Silly though it is, completions and wait_queue_heads use foo_ONSTACK
(COMPLETION_INITIALIZER_ONSTACK, DECLARE_COMPLETION_ONSTACK,
__WAIT_QUEUE_HEAD_INIT_ONSTACK and DECLARE_WAIT_QUEUE_HEAD_ONSTACK) so I
guess workqueues should do the same thing.

s/INIT_WORK_ON_STACK/INIT_WORK_ONSTACK/
s/INIT_DELAYED_WORK_ON_STACK/INIT_DELAYED_WORK_ONSTACK/

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/hpet.c          | 2 +-
 arch/x86/kernel/smpboot.c       | 2 +-
 drivers/md/dm-snap-persistent.c | 2 +-
 include/linux/workqueue.h       | 6 +++---
 kernel/workqueue.c              | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index aff0b3c27509..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -713,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 
 	switch (action & 0xf) {
 	case CPU_ONLINE:
-		INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
+		INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
 		init_completion(&work.complete);
 		/* FIXME: add schedule_work_on() */
 		schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6af118511b4a..6c7faecd9e4a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -747,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 
-	INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+	INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
 
 	alternatives_smp_switch(1);
 
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 0b61792a2780..2129cdb115dc 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -254,7 +254,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 	 * Issue the synchronous I/O from a different thread
 	 * to avoid generic_make_request recursion.
 	 */
-	INIT_WORK_ON_STACK(&req.work, do_metadata);
+	INIT_WORK_ONSTACK(&req.work, do_metadata);
 	queue_work(ps->metadata_wq, &req.work);
 	flush_workqueue(ps->metadata_wq);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 070bb7a88936..0c0771f06bfa 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -190,7 +190,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__INIT_WORK((_work), (_func), 0);		\
 	} while (0)
 
-#define INIT_WORK_ON_STACK(_work, _func)			\
+#define INIT_WORK_ONSTACK(_work, _func)				\
 	do {							\
 		__INIT_WORK((_work), (_func), 1);		\
 	} while (0)
@@ -201,9 +201,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
 	do {							\
-		INIT_WORK_ON_STACK(&(_work)->work, (_func));	\
+		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
 	} while (0)
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e5ff2cbaadc2..90db1bd1a978 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2064,7 +2064,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 * checks and call back into the fixup functions where we
 	 * might deadlock.
 	 */
-	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
+	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
-- 
cgit v1.2.3


From 190420ab34ab4c077c641893ac19f364cf3606e4 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Tue, 26 Oct 2010 14:22:37 -0700
Subject: drivers/misc: driver for bh1770glc / sfh7770 ALS and proximity sensor

This is a driver for ROHM BH1770GLC and OSRAM SFH7770 combined ALS and
proximity sensor.

Interface is sysfs based.  The driver uses interrupts to provide new data.
 The driver supports pm_runtime and regulator frameworks.

See Documentation/misc-devices/bh1770glc.txt for details

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig          |   10 +
 drivers/misc/Makefile         |    1 +
 drivers/misc/bh1770glc.c      | 1413 +++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/bh1770glc.h |   53 ++
 4 files changed, 1477 insertions(+)
 create mode 100644 drivers/misc/bh1770glc.c
 create mode 100644 include/linux/i2c/bh1770glc.h

(limited to 'include')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 3d57adfc8703..3b50106abfa5 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -315,6 +315,16 @@ config SENSORS_BH1780
 	  This driver can also be built as a module.  If so, the module
 	  will be called bh1780gli.
 
+config SENSORS_BH1770
+         tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
+         depends on I2C
+         ---help---
+           Say Y here if you want to build a driver for BH1770GLC (ROHM) or
+	   SFH7770 (Osram) combined ambient light and proximity sensor chip.
+
+           To compile this driver as a module, choose M here: the
+           module will be called bh1770glc. If unsure, say N here.
+
 config HMC6352
 	tristate "Honeywell HMC6352 compass"
 	depends on I2C
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 9f2986b4da2f..5fed6ab533e2 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
 obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)		+= phantom.o
 obj-$(CONFIG_SENSORS_BH1780)	+= bh1780gli.o
+obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
 obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
 obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
new file mode 100644
index 000000000000..cee632e645e1
--- /dev/null
+++ b/drivers/misc/bh1770glc.c
@@ -0,0 +1,1413 @@
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/i2c/bh1770glc.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+
+#define BH1770_ALS_CONTROL	0x80 /* ALS operation mode control */
+#define BH1770_PS_CONTROL	0x81 /* PS operation mode control */
+#define BH1770_I_LED		0x82 /* active LED and LED1, LED2 current */
+#define BH1770_I_LED3		0x83 /* LED3 current setting */
+#define BH1770_ALS_PS_MEAS	0x84 /* Forced mode trigger */
+#define BH1770_PS_MEAS_RATE	0x85 /* PS meas. rate at stand alone mode */
+#define BH1770_ALS_MEAS_RATE	0x86 /* ALS meas. rate at stand alone mode */
+#define BH1770_PART_ID		0x8a /* Part number and revision ID */
+#define BH1770_MANUFACT_ID	0x8b /* Manufacturerer ID */
+#define BH1770_ALS_DATA_0	0x8c /* ALS DATA low byte */
+#define BH1770_ALS_DATA_1	0x8d /* ALS DATA high byte */
+#define BH1770_ALS_PS_STATUS	0x8e /* Measurement data and int status */
+#define BH1770_PS_DATA_LED1	0x8f /* PS data from LED1 */
+#define BH1770_PS_DATA_LED2	0x90 /* PS data from LED2 */
+#define BH1770_PS_DATA_LED3	0x91 /* PS data from LED3 */
+#define BH1770_INTERRUPT	0x92 /* Interrupt setting */
+#define BH1770_PS_TH_LED1	0x93 /* PS interrupt threshold for LED1 */
+#define BH1770_PS_TH_LED2	0x94 /* PS interrupt threshold for LED2 */
+#define BH1770_PS_TH_LED3	0x95 /* PS interrupt threshold for LED3 */
+#define BH1770_ALS_TH_UP_0	0x96 /* ALS upper threshold low byte */
+#define BH1770_ALS_TH_UP_1	0x97 /* ALS upper threshold high byte */
+#define BH1770_ALS_TH_LOW_0	0x98 /* ALS lower threshold low byte */
+#define BH1770_ALS_TH_LOW_1	0x99 /* ALS lower threshold high byte */
+
+/* MANUFACT_ID */
+#define BH1770_MANUFACT_ROHM	0x01
+#define BH1770_MANUFACT_OSRAM	0x03
+
+/* PART_ID */
+#define BH1770_PART		0x90
+#define BH1770_PART_MASK	0xf0
+#define BH1770_REV_MASK		0x0f
+#define BH1770_REV_SHIFT	0
+#define BH1770_REV_0		0x00
+#define BH1770_REV_1		0x01
+
+/* Operating modes for both */
+#define BH1770_STANDBY		0x00
+#define BH1770_FORCED		0x02
+#define BH1770_STANDALONE	0x03
+#define BH1770_SWRESET		(0x01 << 2)
+
+#define BH1770_PS_TRIG_MEAS	(1 << 0)
+#define BH1770_ALS_TRIG_MEAS	(1 << 1)
+
+/* Interrupt control */
+#define BH1770_INT_OUTPUT_MODE	(1 << 3) /* 0 = latched */
+#define BH1770_INT_POLARITY	(1 << 2) /* 1 = active high */
+#define BH1770_INT_ALS_ENA	(1 << 1)
+#define BH1770_INT_PS_ENA	(1 << 0)
+
+/* Interrupt status */
+#define BH1770_INT_LED1_DATA	(1 << 0)
+#define BH1770_INT_LED1_INT	(1 << 1)
+#define BH1770_INT_LED2_DATA	(1 << 2)
+#define BH1770_INT_LED2_INT	(1 << 3)
+#define BH1770_INT_LED3_DATA	(1 << 4)
+#define BH1770_INT_LED3_INT	(1 << 5)
+#define BH1770_INT_LEDS_INT	((1 << 1) | (1 << 3) | (1 << 5))
+#define BH1770_INT_ALS_DATA	(1 << 6)
+#define BH1770_INT_ALS_INT	(1 << 7)
+
+/* Led channels */
+#define BH1770_LED1		0x00
+
+#define BH1770_DISABLE		0
+#define BH1770_ENABLE		1
+#define BH1770_PROX_CHANNELS	1
+
+#define BH1770_LUX_DEFAULT_RATE	1 /* Index to lux rate table */
+#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */
+#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */
+#define BH1770_STARTUP_DELAY	50
+#define BH1770_RESET_TIME	10
+#define BH1770_TIMEOUT		2100 /* Timeout in 2.1 seconds */
+
+#define BH1770_LUX_RANGE	65535
+#define BH1770_PROX_RANGE	255
+#define BH1770_COEF_SCALER	1024
+#define BH1770_CALIB_SCALER	8192
+#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER)
+#define BH1770_LUX_DEF_THRES	1000
+#define BH1770_PROX_DEF_THRES	70
+#define BH1770_PROX_DEF_ABS_THRES   100
+#define BH1770_DEFAULT_PERSISTENCE  10
+#define BH1770_PROX_MAX_PERSISTENCE 50
+#define BH1770_LUX_GA_SCALE	16384
+#define BH1770_LUX_CF_SCALE	2048 /* CF ChipFactor */
+#define BH1770_NEUTRAL_CF	BH1770_LUX_CF_SCALE
+#define BH1770_LUX_CORR_SCALE	4096
+
+#define PROX_ABOVE_THRESHOLD	1
+#define PROX_BELOW_THRESHOLD	0
+
+#define PROX_IGNORE_LUX_LIMIT	500
+
+struct bh1770_chip {
+	struct bh1770_platform_data	*pdata;
+	char				chipname[10];
+	u8				revision;
+	struct i2c_client		*client;
+	struct regulator_bulk_data	regs[2];
+	struct mutex			mutex; /* avoid parallel access */
+	wait_queue_head_t		wait;
+
+	bool			int_mode_prox;
+	bool			int_mode_lux;
+	struct delayed_work	prox_work;
+	u32	lux_cf; /* Chip specific factor */
+	u32	lux_ga;
+	u32	lux_calib;
+	int	lux_rate_index;
+	u32	lux_corr;
+	u16	lux_data_raw;
+	u16	lux_threshold_hi;
+	u16	lux_threshold_lo;
+	u16	lux_thres_hi_onchip;
+	u16	lux_thres_lo_onchip;
+	bool	lux_wait_result;
+
+	int	prox_enable_count;
+	u16	prox_coef;
+	u16	prox_const;
+	int	prox_rate;
+	int	prox_rate_threshold;
+	u8	prox_persistence;
+	u8	prox_persistence_counter;
+	u8	prox_data;
+	u8	prox_threshold;
+	u8	prox_threshold_hw;
+	bool	prox_force_update;
+	u8	prox_abs_thres;
+	u8	prox_led;
+};
+
+static const char reg_vcc[] = "Vcc";
+static const char reg_vleds[] = "Vleds";
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000};
+ */
+static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2};
+static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500};
+
+/* Supported IR-led currents in mA */
+static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200};
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {100, 200, 500, 1000, 2000};
+ */
+static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0};
+
+/*
+ * interrupt control functions are called while keeping chip->mutex
+ * excluding module probe / remove
+ */
+static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip,
+					int lux)
+{
+	chip->int_mode_lux = lux;
+	/* Set interrupt modes, interrupt active low, latched */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(lux << 1) | chip->int_mode_prox);
+}
+
+static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip,
+					int ps)
+{
+	chip->int_mode_prox = ps;
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(chip->int_mode_lux << 1) | (ps << 0));
+}
+
+/* chip->mutex is always kept here */
+static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index)
+{
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Proper proximity response needs fastest lux rate (100ms) */
+	if (chip->prox_enable_count)
+		rate_index = 0;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_MEAS_RATE,
+					rate_index);
+}
+
+static int bh1770_prox_rate(struct bh1770_chip *chip, int mode)
+{
+	int rate;
+
+	rate = (mode == PROX_ABOVE_THRESHOLD) ?
+		chip->prox_rate_threshold : chip->prox_rate;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_MEAS_RATE,
+					rate);
+}
+
+/* InfraredLED is controlled by the chip during proximity scanning */
+static inline int bh1770_led_cfg(struct bh1770_chip *chip)
+{
+	/* LED cfg, current for leds 1 and 2 */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_I_LED,
+					(BH1770_LED1 << 6) |
+					(BH1770_LED_5mA << 3) |
+					chip->prox_led);
+}
+
+/*
+ * Following two functions converts raw ps values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions.
+ */
+static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw)
+{
+	u16 adjusted;
+	adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) /
+		BH1770_COEF_SCALER);
+	if (adjusted > BH1770_PROX_RANGE)
+		adjusted = BH1770_PROX_RANGE;
+	return adjusted;
+}
+
+static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps)
+{
+	u16 raw;
+
+	raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef);
+	if (raw > chip->prox_const)
+		raw = raw - chip->prox_const;
+	else
+		raw = 0;
+	return raw;
+}
+
+/*
+ * Following two functions converts raw lux values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions. Chip->mutex is kept when this is called.
+ */
+static int bh1770_prox_set_threshold(struct bh1770_chip *chip)
+{
+	u8 tmp = 0;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold);
+	chip->prox_threshold_hw = tmp;
+
+	return	i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1,
+					tmp);
+}
+
+static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw)
+{
+	u32 lux;
+	lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE;
+	return min(lux, (u32)BH1770_LUX_RANGE);
+}
+
+static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip,
+					u16 adjusted)
+{
+	return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_lux_update_thresholds(struct bh1770_chip *chip,
+					u16 threshold_hi, u16 threshold_lo)
+{
+	u8 data[4];
+	int ret;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/*
+	 * Compensate threshold values with the correction factors if not
+	 * set to minimum or maximum.
+	 * Min & max values disables interrupts.
+	 */
+	if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0)
+		threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi);
+
+	if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0)
+		threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo);
+
+	if (chip->lux_thres_hi_onchip == threshold_hi &&
+	    chip->lux_thres_lo_onchip == threshold_lo)
+		return 0;
+
+	chip->lux_thres_hi_onchip = threshold_hi;
+	chip->lux_thres_lo_onchip = threshold_lo;
+
+	data[0] = threshold_hi;
+	data[1] = threshold_hi >> 8;
+	data[2] = threshold_lo;
+	data[3] = threshold_lo >> 8;
+
+	ret = i2c_smbus_write_i2c_block_data(chip->client,
+					BH1770_ALS_TH_UP_0,
+					ARRAY_SIZE(data),
+					data);
+	return ret;
+}
+
+static int bh1770_lux_get_result(struct bh1770_chip *chip)
+{
+	u16 data;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0);
+	if (ret < 0)
+		return ret;
+
+	data = ret & 0xff;
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1);
+	if (ret < 0)
+		return ret;
+
+	chip->lux_data_raw = data | ((ret & 0xff) << 8);
+
+	return 0;
+}
+
+/* Calculate correction value which contains chip and device specific parts */
+static u32 bh1770_get_corr_value(struct bh1770_chip *chip)
+{
+	u32 tmp;
+	/* Impact of glass attenuation correction */
+	tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE;
+	/* Impact of chip factor correction */
+	tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE;
+	/* Impact of Device specific calibration correction */
+	tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER;
+	return tmp;
+}
+
+static int bh1770_lux_read_result(struct bh1770_chip *chip)
+{
+	bh1770_lux_get_result(chip);
+	return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw);
+}
+
+/*
+ * Chip on / off functions are called while keeping mutex except probe
+ * or remove phase
+ */
+static int bh1770_chip_on(struct bh1770_chip *chip)
+{
+	int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+
+	/* Reset the chip */
+	i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL,
+				BH1770_SWRESET);
+	usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2);
+
+	/*
+	 * ALS is started always since proximity needs als results
+	 * for realibility estimation.
+	 * Let's assume dark until the first ALS measurement is ready.
+	 */
+	chip->lux_data_raw = 0;
+	chip->prox_data = 0;
+	ret = i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_CONTROL, BH1770_STANDALONE);
+
+	/* Assume reset defaults */
+	chip->lux_thres_hi_onchip = BH1770_LUX_RANGE;
+	chip->lux_thres_lo_onchip = 0;
+
+	return ret;
+}
+
+static void bh1770_chip_off(struct bh1770_chip *chip)
+{
+	i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT, BH1770_DISABLE);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_ALS_CONTROL, BH1770_STANDBY);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_PS_CONTROL, BH1770_STANDBY);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_mode_control(struct bh1770_chip *chip)
+{
+	if (chip->prox_enable_count) {
+		chip->prox_force_update = true; /* Force immediate update */
+
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_set_threshold(chip);
+		bh1770_led_cfg(chip);
+		bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD);
+		bh1770_prox_interrupt_control(chip, BH1770_ENABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDALONE);
+	} else {
+		chip->prox_data = 0;
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_interrupt_control(chip, BH1770_DISABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDBY);
+	}
+	return 0;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_read_result(struct bh1770_chip *chip)
+{
+	int ret;
+	bool above;
+	u8 mode;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1);
+	if (ret < 0)
+		goto out;
+
+	if (ret > chip->prox_threshold_hw)
+		above = true;
+	else
+		above = false;
+
+	/*
+	 * when ALS levels goes above limit, proximity result may be
+	 * false proximity. Thus ignore the result. With real proximity
+	 * there is a shadow causing low als levels.
+	 */
+	if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT)
+		ret = 0;
+
+	chip->prox_data = bh1770_psraw_to_adjusted(chip, ret);
+
+	/* Strong proximity level or force mode requires immediate response */
+	if (chip->prox_data >= chip->prox_abs_thres ||
+	    chip->prox_force_update)
+		chip->prox_persistence_counter = chip->prox_persistence;
+
+	chip->prox_force_update = false;
+
+	/* Persistence filttering to reduce false proximity events */
+	if (likely(above)) {
+		if (chip->prox_persistence_counter < chip->prox_persistence) {
+			chip->prox_persistence_counter++;
+			ret = -ENODATA;
+		} else {
+			mode = PROX_ABOVE_THRESHOLD;
+			ret = 0;
+		}
+	} else {
+		chip->prox_persistence_counter = 0;
+		mode = PROX_BELOW_THRESHOLD;
+		chip->prox_data = 0;
+		ret = 0;
+	}
+
+	/* Set proximity detection rate based on above or below value */
+	if (ret == 0) {
+		bh1770_prox_rate(chip, mode);
+		sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw");
+	}
+out:
+	return ret;
+}
+
+static int bh1770_detect(struct bh1770_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 manu, part;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID);
+	if (ret < 0)
+		goto error;
+	manu = (u8)ret;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID);
+	if (ret < 0)
+		goto error;
+	part = (u8)ret;
+
+	chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT;
+	chip->prox_coef = BH1770_COEF_SCALER;
+	chip->prox_const = 0;
+	chip->lux_cf = BH1770_NEUTRAL_CF;
+
+	if ((manu == BH1770_MANUFACT_ROHM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC");
+		return 0;
+	}
+
+	if ((manu == BH1770_MANUFACT_OSRAM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770");
+		/* Values selected by comparing different versions */
+		chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */
+		chip->prox_const = 40;
+		return 0;
+	}
+
+	ret = -ENODEV;
+error:
+	dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n");
+
+	return ret;
+}
+
+/*
+ * This work is re-scheduled at every proximity interrupt.
+ * If this work is running, it means that there hasn't been any
+ * proximity interrupt in time. Situation is handled as no-proximity.
+ * It would be nice to have low-threshold interrupt or interrupt
+ * when measurement and hi-threshold are both 0. But neither of those exists.
+ * This is a workaroud for missing HW feature.
+ */
+
+static void bh1770_prox_work(struct work_struct *work)
+{
+	struct bh1770_chip *chip =
+		container_of(work, struct bh1770_chip, prox_work.work);
+
+	mutex_lock(&chip->mutex);
+	bh1770_prox_read_result(chip);
+	mutex_unlock(&chip->mutex);
+}
+
+/* This is threaded irq handler */
+static irqreturn_t bh1770_irq(int irq, void *data)
+{
+	struct bh1770_chip *chip = data;
+	int status;
+	int rate = 0;
+
+	mutex_lock(&chip->mutex);
+	status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS);
+
+	/* Acknowledge interrupt by reading this register */
+	i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT);
+
+	/*
+	 * Check if there is fresh data available for als.
+	 * If this is the very first data, update thresholds after that.
+	 */
+	if (status & BH1770_INT_ALS_DATA) {
+		bh1770_lux_get_result(chip);
+		if (unlikely(chip->lux_wait_result)) {
+			chip->lux_wait_result = false;
+			wake_up(&chip->wait);
+			bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+		}
+	}
+
+	/* Disable interrupt logic to guarantee acknowledgement */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (0 << 1) | (0 << 0));
+
+	if ((status & BH1770_INT_ALS_INT))
+		sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input");
+
+	if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) {
+		rate = prox_rates_ms[chip->prox_rate_threshold];
+		bh1770_prox_read_result(chip);
+	}
+
+	/* Re-enable interrupt logic */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (chip->int_mode_lux << 1) |
+				  (chip->int_mode_prox << 0));
+	mutex_unlock(&chip->mutex);
+
+	/*
+	 * Can't cancel work while keeping mutex since the work uses the
+	 * same mutex.
+	 */
+	if (rate) {
+		/*
+		 * Simulate missing no-proximity interrupt 50ms after the
+		 * next expected interrupt time.
+		 */
+		cancel_delayed_work_sync(&chip->prox_work);
+		schedule_delayed_work(&chip->prox_work,
+				msecs_to_jiffies(rate + 50));
+	}
+	return IRQ_HANDLED;
+}
+
+static ssize_t bh1770_power_state_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	size_t ret;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	if (value) {
+		pm_runtime_get_sync(dev);
+
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+
+		if (ret < 0) {
+			pm_runtime_put(dev);
+			goto leave;
+		}
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	} else if (!pm_runtime_suspended(dev)) {
+		pm_runtime_put(dev);
+	}
+	ret = count;
+leave:
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+}
+
+static ssize_t bh1770_lux_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO; /* Chip is not enabled at all */
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+					!chip->lux_wait_result,
+					msecs_to_jiffies(BH1770_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip));
+	mutex_unlock(&chip->mutex);
+
+	return ret;
+}
+
+static ssize_t bh1770_lux_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_LUX_RANGE);
+}
+
+static ssize_t bh1770_prox_enable_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	/* Assume no proximity. Sensor will tell real state soon */
+	if (!chip->prox_enable_count)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_enable_count++;
+	else if (chip->prox_enable_count > 0)
+		chip->prox_enable_count--;
+	else
+		goto leave;
+
+	/* Run control only when chip is powered on */
+	if (!pm_runtime_suspended(dev))
+		bh1770_prox_mode_control(chip);
+leave:
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%d\n", chip->prox_enable_count);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_prox_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&chip->mutex);
+	if (chip->prox_enable_count && !pm_runtime_suspended(dev))
+		ret = sprintf(buf, "%d\n", chip->prox_data);
+	else
+		ret = -EIO;
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_prox_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_PROX_RANGE);
+}
+
+static ssize_t bh1770_get_prox_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_prox_rate_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]);
+}
+
+static ssize_t bh1770_get_prox_rate_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]);
+}
+
+static int bh1770_prox_rate_validate(int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++)
+		if (rate >= prox_rates_hz[i])
+			break;
+	return i;
+}
+
+static ssize_t bh1770_set_prox_rate_above(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate_threshold = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_set_prox_rate_below(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_get_prox_thres(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->prox_threshold);
+}
+
+static ssize_t bh1770_set_prox_thres(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_threshold = value;
+	ret = bh1770_prox_set_threshold(chip);
+	mutex_unlock(&chip->mutex);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
+static ssize_t bh1770_prox_persistence_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->prox_persistence);
+}
+
+static ssize_t bh1770_prox_persistence_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > BH1770_PROX_MAX_PERSISTENCE)
+		return -EINVAL;
+
+	chip->prox_persistence = value;
+
+	return len;
+}
+
+static ssize_t bh1770_prox_abs_thres_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", chip->prox_abs_thres);
+}
+
+static ssize_t bh1770_prox_abs_thres_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	chip->prox_abs_thres = value;
+
+	return len;
+}
+
+static ssize_t bh1770_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision);
+}
+
+static ssize_t bh1770_lux_calib_default_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", BH1770_CALIB_SCALER);
+}
+
+static ssize_t bh1770_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%u\n", chip->lux_calib);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	u32 old_calib;
+	u32 new_corr;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	old_calib = chip->lux_calib;
+	chip->lux_calib = value;
+	new_corr = bh1770_get_corr_value(chip);
+	if (new_corr == 0) {
+		chip->lux_calib = old_calib;
+		mutex_unlock(&chip->mutex);
+		return -EINVAL;
+	}
+	chip->lux_corr = new_corr;
+	/* Refresh thresholds on HW after changing correction value */
+	bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi,
+				chip->lux_threshold_lo);
+
+	mutex_unlock(&chip->mutex);
+
+	return len;
+}
+
+static ssize_t bh1770_get_lux_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_lux_rate(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]);
+}
+
+static ssize_t bh1770_set_lux_rate(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long rate_hz;
+	int ret, i;
+
+	if (strict_strtoul(buf, 0, &rate_hz))
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++)
+		if (rate_hz >= lux_rates_hz[i])
+			break;
+
+	mutex_lock(&chip->mutex);
+	chip->lux_rate_index = i;
+	ret = bh1770_lux_rate(chip, i);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t bh1770_get_lux_thresh_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_hi);
+}
+
+static ssize_t bh1770_get_lux_thresh_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_lo);
+}
+
+static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target,
+				const char *buf)
+{
+	int ret = 0;
+	unsigned long thresh;
+
+	if (strict_strtoul(buf, 0, &thresh))
+		return -EINVAL;
+
+	if (thresh > BH1770_LUX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_result)
+		ret = bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t bh1770_set_lux_thresh_above(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t bh1770_set_lux_thresh_below(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show,
+						bh1770_prox_enable_store);
+static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR,
+						bh1770_prox_abs_thres_show,
+						bh1770_prox_abs_thres_store);
+static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_thres,
+						bh1770_set_prox_thres);
+static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL);
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL);
+static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR,
+						bh1770_prox_persistence_show,
+						bh1770_prox_persistence_store);
+static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_above,
+						bh1770_set_prox_rate_above);
+static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_below,
+						bh1770_set_prox_rate_below);
+static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show,
+						bh1770_lux_calib_store);
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+						bh1770_lux_calib_default_show,
+						NULL);
+static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL);
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL);
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate,
+						bh1770_set_lux_rate);
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL);
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_above,
+						bh1770_set_lux_thresh_above);
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_below,
+						bh1770_set_lux_thresh_below);
+static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL);
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show,
+						 bh1770_power_state_store);
+
+
+static struct attribute *sysfs_attrs[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_thresh_above_count.attr,
+	&dev_attr_prox0_rate_above.attr,
+	&dev_attr_prox0_rate_below.attr,
+	&dev_attr_prox0_rate_avail.attr,
+	&dev_attr_prox0_thresh_above0_value.attr,
+	&dev_attr_prox0_thresh_above1_value.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static struct attribute_group bh1770_attribute_group = {
+	.attrs = sysfs_attrs
+};
+
+static int __devinit bh1770_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct bh1770_chip *chip;
+	int err;
+
+	chip = kzalloc(sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	mutex_init(&chip->mutex);
+	init_waitqueue_head(&chip->wait);
+	INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work);
+
+	if (client->dev.platform_data == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		err = -EINVAL;
+		goto fail1;
+	}
+
+	chip->pdata		= client->dev.platform_data;
+	chip->lux_calib		= BH1770_LUX_NEUTRAL_CALIB_VALUE;
+	chip->lux_rate_index	= BH1770_LUX_DEFAULT_RATE;
+	chip->lux_threshold_lo	= BH1770_LUX_DEF_THRES;
+	chip->lux_threshold_hi	= BH1770_LUX_DEF_THRES;
+
+	if (chip->pdata->glass_attenuation == 0)
+		chip->lux_ga = BH1770_NEUTRAL_GA;
+	else
+		chip->lux_ga = chip->pdata->glass_attenuation;
+
+	chip->prox_threshold	= BH1770_PROX_DEF_THRES;
+	chip->prox_led		= chip->pdata->led_def_curr;
+	chip->prox_abs_thres	= BH1770_PROX_DEF_ABS_THRES;
+	chip->prox_persistence	= BH1770_DEFAULT_PERSISTENCE;
+	chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH;
+	chip->prox_rate		= BH1770_PROX_DEFAULT_RATE;
+	chip->prox_data		= 0;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vleds;
+
+	err = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		goto fail1;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+				chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		goto fail2;
+	}
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+	err = bh1770_detect(chip);
+	if (err < 0)
+		goto fail3;
+
+	/* Start chip */
+	bh1770_chip_on(chip);
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+
+	chip->lux_corr = bh1770_get_corr_value(chip);
+	if (chip->lux_corr == 0) {
+		dev_err(&client->dev, "Improper correction values\n");
+		err = -EINVAL;
+		goto fail3;
+	}
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail3;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				&bh1770_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail4;
+	}
+
+	/*
+	 * Chip needs level triggered interrupt to work. However,
+	 * level triggering doesn't work always correctly with power
+	 * management. Select both
+	 */
+	err = request_threaded_irq(client->irq, NULL,
+				bh1770_irq,
+				IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				IRQF_TRIGGER_LOW,
+				"bh1770", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail5;
+	}
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return err;
+fail5:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+fail4:
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail3:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+fail1:
+	kfree(chip);
+	return err;
+}
+
+static int __devexit bh1770_remove(struct i2c_client *client)
+{
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	cancel_delayed_work_sync(&chip->prox_work);
+
+	if (!pm_runtime_suspended(&client->dev))
+		bh1770_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+	kfree(chip);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int bh1770_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+	int ret = 0;
+
+	bh1770_chip_on(chip);
+
+	if (!pm_runtime_suspended(dev)) {
+		/*
+		 * If we were enabled at suspend time, it is expected
+		 * everything works nice and smoothly
+		 */
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	}
+	return ret;
+}
+
+#else
+#define bh1770_suspend	NULL
+#define bh1770_shutdown NULL
+#define bh1770_resume	NULL
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int bh1770_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_on(chip);
+
+	return 0;
+}
+#endif
+
+static const struct i2c_device_id bh1770_id[] = {
+	{"bh1770glc", 0 },
+	{"sfh7770", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, bh1770_id);
+
+static const struct dev_pm_ops bh1770_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume)
+	SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL)
+};
+
+static struct i2c_driver bh1770_driver = {
+	.driver	 = {
+		.name	= "bh1770glc",
+		.owner	= THIS_MODULE,
+		.pm	= &bh1770_pm_ops,
+	},
+	.probe	  = bh1770_probe,
+	.remove	  = __devexit_p(bh1770_remove),
+	.id_table = bh1770_id,
+};
+
+static int __init bh1770_init(void)
+{
+	return i2c_add_driver(&bh1770_driver);
+}
+
+static void __exit bh1770_exit(void)
+{
+	i2c_del_driver(&bh1770_driver);
+}
+
+MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
+
+module_init(bh1770_init);
+module_exit(bh1770_exit);
diff --git a/include/linux/i2c/bh1770glc.h b/include/linux/i2c/bh1770glc.h
new file mode 100644
index 000000000000..8b5e2df36c72
--- /dev/null
+++ b/include/linux/i2c/bh1770glc.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __BH1770_H__
+#define __BH1770_H__
+
+/**
+ * struct bh1770_platform_data - platform data for bh1770glc driver
+ * @led_def_curr: IR led driving current.
+ * @glass_attenuation: Attenuation factor for covering window.
+ * @setup_resources: Call back for interrupt line setup function
+ * @release_resources: Call back for interrupte line release function
+ *
+ * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor
+ * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85
+ */
+
+struct bh1770_platform_data {
+#define BH1770_LED_5mA	0
+#define BH1770_LED_10mA	1
+#define BH1770_LED_20mA	2
+#define BH1770_LED_50mA	3
+#define BH1770_LED_100mA 4
+#define BH1770_LED_150mA 5
+#define BH1770_LED_200mA 6
+	__u8 led_def_curr;
+#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */
+	__u32 glass_attenuation;
+	int (*setup_resources)(void);
+	int (*release_resources)(void);
+};
+#endif
-- 
cgit v1.2.3


From 92b1f84d46b24675493d95a239eea2b07e5f13f8 Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Tue, 26 Oct 2010 14:22:38 -0700
Subject: drivers/misc: driver for APDS990X ALS and proximity sensors

This is a driver for Avago APDS990X combined ALS and proximity sensor.

Interface is sysfs based.  The driver uses interrupts to provide new data.
The driver supports pm_runtime and regulator frameworks.

See Documentation/misc-devices/apds990x.txt for details

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/Kconfig         |   11 +
 drivers/misc/Makefile        |    1 +
 drivers/misc/apds990x.c      | 1295 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/apds990x.h |   79 +++
 4 files changed, 1386 insertions(+)
 create mode 100644 drivers/misc/apds990x.c
 create mode 100644 include/linux/i2c/apds990x.h

(limited to 'include')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 3b50106abfa5..7362851a2ca8 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -325,6 +325,17 @@ config SENSORS_BH1770
            To compile this driver as a module, choose M here: the
            module will be called bh1770glc. If unsure, say N here.
 
+config SENSORS_APDS990X
+	 tristate "APDS990X combined als and proximity sensors"
+	 depends on I2C
+	 default n
+	 ---help---
+	   Say Y here if you want to build a driver for Avago APDS990x
+	   combined ambient light and proximity sensor chip.
+
+	   To compile this driver as a module, choose M here: the
+	   module will be called apds990x. If unsure, say N here.
+
 config HMC6352
 	tristate "Honeywell HMC6352 compass"
 	depends on I2C
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 5fed6ab533e2..081b9fca8a06 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)		+= phantom.o
 obj-$(CONFIG_SENSORS_BH1780)	+= bh1780gli.o
 obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
+obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
 obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
 obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
new file mode 100644
index 000000000000..200311fea369
--- /dev/null
+++ b/drivers/misc/apds990x.c
@@ -0,0 +1,1295 @@
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/i2c/apds990x.h>
+
+/* Register map */
+#define APDS990X_ENABLE	 0x00 /* Enable of states and interrupts */
+#define APDS990X_ATIME	 0x01 /* ALS ADC time  */
+#define APDS990X_PTIME	 0x02 /* Proximity ADC time  */
+#define APDS990X_WTIME	 0x03 /* Wait time  */
+#define APDS990X_AILTL	 0x04 /* ALS interrupt low threshold low byte */
+#define APDS990X_AILTH	 0x05 /* ALS interrupt low threshold hi byte */
+#define APDS990X_AIHTL	 0x06 /* ALS interrupt hi threshold low byte */
+#define APDS990X_AIHTH	 0x07 /* ALS interrupt hi threshold hi byte */
+#define APDS990X_PILTL	 0x08 /* Proximity interrupt low threshold low byte */
+#define APDS990X_PILTH	 0x09 /* Proximity interrupt low threshold hi byte */
+#define APDS990X_PIHTL	 0x0a /* Proximity interrupt hi threshold low byte */
+#define APDS990X_PIHTH	 0x0b /* Proximity interrupt hi threshold hi byte */
+#define APDS990X_PERS	 0x0c /* Interrupt persistence filters */
+#define APDS990X_CONFIG	 0x0d /* Configuration */
+#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */
+#define APDS990X_CONTROL 0x0f /* Gain control register */
+#define APDS990X_REV	 0x11 /* Revision Number */
+#define APDS990X_ID	 0x12 /* Device ID */
+#define APDS990X_STATUS	 0x13 /* Device status */
+#define APDS990X_CDATAL	 0x14 /* Clear ADC low data register */
+#define APDS990X_CDATAH	 0x15 /* Clear ADC high data register */
+#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */
+#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */
+#define APDS990X_PDATAL	 0x18 /* Proximity ADC low data register */
+#define APDS990X_PDATAH	 0x19 /* Proximity ADC high data register */
+
+/* Control */
+#define APDS990X_MAX_AGAIN	3
+
+/* Enable register */
+#define APDS990X_EN_PIEN	(0x1 << 5)
+#define APDS990X_EN_AIEN	(0x1 << 4)
+#define APDS990X_EN_WEN		(0x1 << 3)
+#define APDS990X_EN_PEN		(0x1 << 2)
+#define APDS990X_EN_AEN		(0x1 << 1)
+#define APDS990X_EN_PON		(0x1 << 0)
+#define APDS990X_EN_DISABLE_ALL 0
+
+/* Status register */
+#define APDS990X_ST_PINT	(0x1 << 5)
+#define APDS990X_ST_AINT	(0x1 << 4)
+
+/* I2C access types */
+#define APDS990x_CMD_TYPE_MASK	(0x03 << 5)
+#define APDS990x_CMD_TYPE_RB	(0x00 << 5) /* Repeated byte */
+#define APDS990x_CMD_TYPE_INC	(0x01 << 5) /* Auto increment */
+#define APDS990x_CMD_TYPE_SPE	(0x03 << 5) /* Special function */
+
+#define APDS990x_ADDR_SHIFT	0
+#define APDS990x_CMD		0x80
+
+/* Interrupt ack commands */
+#define APDS990X_INT_ACK_ALS	0x6
+#define APDS990X_INT_ACK_PS	0x5
+#define APDS990X_INT_ACK_BOTH	0x7
+
+/* ptime */
+#define APDS990X_PTIME_DEFAULT	0xff /* Recommended conversion time 2.7ms*/
+
+/* wtime */
+#define APDS990X_WTIME_DEFAULT	0xee /* ~50ms wait time */
+
+#define APDS990X_TIME_TO_ADC	1024 /* One timetick as ADC count value */
+
+/* Persistence */
+#define APDS990X_APERS_SHIFT	0
+#define APDS990X_PPERS_SHIFT	4
+
+/* Supported ID:s */
+#define APDS990X_ID_0		0x0
+#define APDS990X_ID_4		0x4
+#define APDS990X_ID_29		0x29
+
+/* pgain and pdiode settings */
+#define APDS_PGAIN_1X	       0x0
+#define APDS_PDIODE_IR	       0x2
+
+#define APDS990X_LUX_OUTPUT_SCALE 10
+
+/* Reverse chip factors for threshold calculation */
+struct reverse_factors {
+	u32 afactor;
+	int cf1;
+	int irf1;
+	int cf2;
+	int irf2;
+};
+
+struct apds990x_chip {
+	struct apds990x_platform_data	*pdata;
+	struct i2c_client		*client;
+	struct mutex			mutex; /* avoid parallel access */
+	struct regulator_bulk_data	regs[2];
+	wait_queue_head_t		wait;
+
+	int	prox_en;
+	bool	prox_continuous_mode;
+	bool	lux_wait_fresh_res;
+
+	/* Chip parameters */
+	struct	apds990x_chip_factors	cf;
+	struct	reverse_factors		rcf;
+	u16	atime;		/* als integration time */
+	u16	arate;		/* als reporting rate */
+	u16	a_max_result;	/* Max possible ADC value with current atime */
+	u8	again_meas;	/* Gain used in last measurement */
+	u8	again_next;	/* Next calculated gain */
+	u8	pgain;
+	u8	pdiode;
+	u8	pdrive;
+	u8	lux_persistence;
+	u8	prox_persistence;
+
+	u32	lux_raw;
+	u32	lux;
+	u16	lux_clear;
+	u16	lux_ir;
+	u16	lux_calib;
+	u32	lux_thres_hi;
+	u32	lux_thres_lo;
+
+	u32	prox_thres;
+	u16	prox_data;
+	u16	prox_calib;
+
+	char	chipname[10];
+	u8	revision;
+};
+
+#define APDS_CALIB_SCALER		8192
+#define APDS_LUX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+#define APDS_PROX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+
+#define APDS_PROX_DEF_THRES		600
+#define APDS_PROX_HYSTERESIS		50
+#define APDS_LUX_DEF_THRES_HI		101
+#define APDS_LUX_DEF_THRES_LO		100
+#define APDS_DEFAULT_PROX_PERS		1
+
+#define APDS_TIMEOUT			2000
+#define APDS_STARTUP_DELAY		25000 /* us */
+#define APDS_RANGE			65535
+#define APDS_PROX_RANGE			1023
+#define APDS_LUX_GAIN_LO_LIMIT		100
+#define APDS_LUX_GAIN_LO_LIMIT_STRICT	25
+
+#define TIMESTEP			87 /* 2.7ms is about 87 / 32 */
+#define TIME_STEP_SCALER		32
+
+#define APDS_LUX_AVERAGING_TIME		50 /* tolerates 50/60Hz ripple */
+#define APDS_LUX_DEFAULT_RATE		200
+
+static const u8 again[]	= {1, 8, 16, 120}; /* ALS gain steps */
+static const u8 ir_currents[]	= {100, 50, 25, 12}; /* IRled currents in mA */
+
+/* Following two tables must match i.e 10Hz rate means 1 as persistence value */
+static const u16 arates_hz[] = {10, 5, 2, 1};
+static const u8 apersis[] = {1, 2, 4, 5};
+
+/* Regulators */
+static const char reg_vcc[] = "Vdd";
+static const char reg_vled[] = "Vled";
+
+static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_read_word_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_write_byte_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_write_word_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_mode_on(struct apds990x_chip *chip)
+{
+	/* ALS is mandatory, proximity optional */
+	u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN |
+		APDS990X_EN_WEN;
+
+	if (chip->prox_en)
+		reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN;
+
+	return apds990x_write_byte(chip, APDS990X_ENABLE, reg);
+}
+
+static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux)
+{
+	u32 thres;
+	u32 cpl;
+	u32 ir;
+
+	if (lux == 0)
+		return 0;
+	else if (lux == APDS_RANGE)
+		return APDS_RANGE;
+
+	/*
+	 * Reported LUX value is a combination of the IR and CLEAR channel
+	 * values. However, interrupt threshold is only for clear channel.
+	 * This function approximates needed HW threshold value for a given
+	 * LUX value in the current lightning type.
+	 * IR level compared to visible light varies heavily depending on the
+	 * source of the light
+	 *
+	 * Calculate threshold value for the next measurement period.
+	 * Math: threshold = lux * cpl where
+	 * cpl = atime * again / (glass_attenuation * device_factor)
+	 * (count-per-lux)
+	 *
+	 * First remove calibration. Division by four is to avoid overflow
+	 */
+	lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4);
+
+	/* Multiplication by 64 is to increase accuracy */
+	cpl = ((u32)chip->atime * (u32)again[chip->again_next] *
+		APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df);
+
+	thres = lux * cpl / 64;
+	/*
+	 * Convert IR light from the latest result to match with
+	 * new gain step. This helps to adapt with the current
+	 * source of light.
+	 */
+	ir = (u32)chip->lux_ir * (u32)again[chip->again_next] /
+		(u32)again[chip->again_meas];
+
+	/*
+	 * Compensate count with IR light impact
+	 * IAC1 > IAC2 (see apds990x_get_lux for formulas)
+	 */
+	if (chip->lux_clear * APDS_PARAM_SCALE >=
+		chip->rcf.afactor * chip->lux_ir)
+		thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) /
+			APDS_PARAM_SCALE;
+	else
+		thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) /
+			APDS_PARAM_SCALE;
+
+	if (thres >= chip->a_max_result)
+		thres = chip->a_max_result - 1;
+	return thres;
+}
+
+static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms)
+{
+	u8 reg_value;
+
+	chip->atime = time_ms;
+	/* Formula is specified in the data sheet */
+	reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP);
+	/* Calculate max ADC value for given integration time */
+	chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC;
+	return apds990x_write_byte(chip, APDS990X_ATIME, reg_value);
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data)
+{
+	int ret, lo, hi;
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	if (data < chip->prox_thres) {
+		lo = 0;
+		hi = chip->prox_thres;
+	} else {
+		lo = chip->prox_thres - APDS_PROX_HYSTERESIS;
+		if (chip->prox_continuous_mode)
+			hi = chip->prox_thres;
+		else
+			hi = APDS_RANGE;
+	}
+
+	ret = apds990x_write_word(chip, APDS990X_PILTL, lo);
+	ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi);
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_athres(struct apds990x_chip *chip)
+{
+	int ret;
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	ret = apds990x_write_word(chip, APDS990X_AILTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_lo));
+	ret |= apds990x_write_word(chip, APDS990X_AIHTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_hi));
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_a_refresh(struct apds990x_chip *chip)
+{
+	/* This will force ALS interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO);
+	apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI);
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_p_refresh(struct apds990x_chip *chip)
+{
+	/* This will force proximity interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1);
+	apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES);
+}
+
+/* Called always with mutex locked */
+static int apds990x_calc_again(struct apds990x_chip *chip)
+{
+	int curr_again = chip->again_meas;
+	int next_again = chip->again_meas;
+	int ret = 0;
+
+	/* Calculate suitable als gain */
+	if (chip->lux_clear == chip->a_max_result)
+		next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */
+	else if (chip->lux_clear > chip->a_max_result / 2)
+		next_again--;
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		next_again += 2; /* Too dark. Increase gain by 2 steps */
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT)
+		next_again++;
+
+	/* Limit gain to available range */
+	if (next_again < 0)
+		next_again = 0;
+	else if (next_again > APDS990X_MAX_AGAIN)
+		next_again = APDS990X_MAX_AGAIN;
+
+	/* Let's check can we trust the measured result */
+	if (chip->lux_clear == chip->a_max_result)
+		/* Result can be totally garbage due to saturation */
+		ret = -ERANGE;
+	else if (next_again != curr_again &&
+		chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		/*
+		 * Gain is changed and measurement result is very small.
+		 * Result can be totally garbage due to underflow
+		 */
+		ret = -ERANGE;
+
+	chip->again_next = next_again;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+
+	/*
+	 * Error means bad result -> re-measurement is needed. The forced
+	 * refresh uses fastest possible persistence setting to get result
+	 * as soon as possible.
+	 */
+	if (ret < 0)
+		apds990x_force_a_refresh(chip);
+	else
+		apds990x_refresh_athres(chip);
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir)
+{
+	int iac, iac1, iac2; /* IR adjusted counts */
+	u32 lpc; /* Lux per count */
+
+	/* Formulas:
+	 * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH
+	 * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH
+	 */
+	iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE;
+	iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE;
+
+	iac = max(iac1, iac2);
+	iac = max(iac, 0);
+
+	lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) /
+		(u32)(again[chip->again_meas] * (u32)chip->atime);
+
+	return (iac * lpc) / APDS_PARAM_SCALE;
+}
+
+static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE;
+
+	switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) {
+	case APDS990X_ST_AINT:
+		reg |= APDS990X_INT_ACK_ALS;
+		break;
+	case APDS990X_ST_PINT:
+		reg |= APDS990X_INT_ACK_PS;
+		break;
+	default:
+		reg |= APDS990X_INT_ACK_BOTH;
+		break;
+	}
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	return (int)ret;
+}
+
+static irqreturn_t apds990x_irq(int irq, void *data)
+{
+	struct apds990x_chip *chip = data;
+	u8 status;
+
+	apds990x_read_byte(chip, APDS990X_STATUS, &status);
+	apds990x_ack_int(chip, status);
+
+	mutex_lock(&chip->mutex);
+	if (!pm_runtime_suspended(&chip->client->dev)) {
+		if (status & APDS990X_ST_AINT) {
+			apds990x_read_word(chip, APDS990X_CDATAL,
+					&chip->lux_clear);
+			apds990x_read_word(chip, APDS990X_IRDATAL,
+					&chip->lux_ir);
+			/* Store used gain for calculations */
+			chip->again_meas = chip->again_next;
+
+			chip->lux_raw = apds990x_get_lux(chip,
+							chip->lux_clear,
+							chip->lux_ir);
+
+			if (apds990x_calc_again(chip) == 0) {
+				/* Result is valid */
+				chip->lux = chip->lux_raw;
+				chip->lux_wait_fresh_res = false;
+				wake_up(&chip->wait);
+				sysfs_notify(&chip->client->dev.kobj,
+					NULL, "lux0_input");
+			}
+		}
+
+		if ((status & APDS990X_ST_PINT) && chip->prox_en) {
+			u16 clr_ch;
+
+			apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch);
+			/*
+			 * If ALS channel is saturated at min gain,
+			 * proximity gives false posivite values.
+			 * Just ignore them.
+			 */
+			if (chip->again_meas == 0 &&
+				clr_ch == chip->a_max_result)
+				chip->prox_data = 0;
+			else
+				apds990x_read_word(chip,
+						APDS990X_PDATAL,
+						&chip->prox_data);
+
+			apds990x_refresh_pthres(chip, chip->prox_data);
+			if (chip->prox_data < chip->prox_thres)
+				chip->prox_data = 0;
+			else if (!chip->prox_continuous_mode)
+				chip->prox_data = APDS_PROX_RANGE;
+			sysfs_notify(&chip->client->dev.kobj,
+				NULL, "prox0_raw");
+		}
+	}
+	mutex_unlock(&chip->mutex);
+	return IRQ_HANDLED;
+}
+
+static int apds990x_configure(struct apds990x_chip *chip)
+{
+	/* It is recommended to use disabled mode during these operations */
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+
+	/* conversion and wait times for different state machince states */
+	apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT);
+	apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT);
+	apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME);
+
+	apds990x_write_byte(chip, APDS990X_CONFIG, 0);
+
+	/* Persistence levels */
+	apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+
+	apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount);
+
+	/* Start with relatively small gain */
+	chip->again_meas = 1;
+	chip->again_next = 1;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+	return 0;
+}
+
+static int apds990x_detect(struct apds990x_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	int ret;
+	u8 id;
+
+	ret = apds990x_read_byte(chip, APDS990X_ID, &id);
+	if (ret < 0) {
+		dev_err(&client->dev, "ID read failed\n");
+		return ret;
+	}
+
+	ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision);
+	if (ret < 0) {
+		dev_err(&client->dev, "REV read failed\n");
+		return ret;
+	}
+
+	switch (id) {
+	case APDS990X_ID_0:
+	case APDS990X_ID_4:
+	case APDS990X_ID_29:
+		snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x");
+		break;
+	default:
+		ret = -ENODEV;
+		break;
+	}
+	return ret;
+}
+
+static int apds990x_chip_on(struct apds990x_chip *chip)
+{
+	int err	 = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (err < 0)
+		return err;
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	/* Refresh all configs in case of regulators were off */
+	chip->prox_data = 0;
+	apds990x_configure(chip);
+	apds990x_mode_on(chip);
+	return 0;
+}
+
+static int apds990x_chip_off(struct apds990x_chip *chip)
+{
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return 0;
+}
+
+static ssize_t apds990x_lux_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	ssize_t ret;
+	u32 result;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO;
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+						!chip->lux_wait_fresh_res,
+						msecs_to_jiffies(APDS_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER;
+	if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE))
+		result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE;
+
+	ret = sprintf(buf, "%d.%d\n",
+		result / APDS990X_LUX_OUTPUT_SCALE,
+		result % APDS990X_LUX_OUTPUT_SCALE);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL);
+
+static ssize_t apds990x_lux_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_RANGE);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL);
+
+static ssize_t apds990x_lux_calib_format_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_CALIB_SCALER);
+}
+
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+		apds990x_lux_calib_format_show, NULL);
+
+static ssize_t apds990x_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->lux_calib);
+}
+
+static ssize_t apds990x_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	if (chip->lux_calib > APDS_RANGE)
+		return -EINVAL;
+
+	chip->lux_calib = value;
+
+	return len;
+}
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show,
+		apds990x_lux_calib_store);
+
+static ssize_t apds990x_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", arates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t apds990x_rate_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->arate);
+}
+
+static int apds990x_set_arate(struct apds990x_chip *chip, int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		if (rate >= arates_hz[i])
+			break;
+
+	if (i == ARRAY_SIZE(arates_hz))
+		return -EINVAL;
+
+	/* Pick up corresponding persistence value */
+	chip->lux_persistence = apersis[i];
+	chip->arate = arates_hz[i];
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Persistence levels */
+	return apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+}
+
+static ssize_t apds990x_rate_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	ret = apds990x_set_arate(chip, value);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show,
+						 apds990x_rate_store);
+
+static ssize_t apds990x_prox_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	if (pm_runtime_suspended(dev) || !chip->prox_en)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", chip->prox_data);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL);
+
+static ssize_t apds990x_prox_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_PROX_RANGE);
+}
+
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL);
+
+static ssize_t apds990x_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->prox_en);
+}
+
+static ssize_t apds990x_prox_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+
+	if (!chip->prox_en)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_en++;
+	else if (chip->prox_en > 0)
+		chip->prox_en--;
+
+	if (!pm_runtime_suspended(dev))
+		apds990x_mode_on(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show,
+						   apds990x_prox_enable_store);
+
+static const char reporting_modes[][9] = {"trigger", "periodic"};
+
+static ssize_t apds990x_prox_reporting_mode_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n",
+		reporting_modes[!!chip->prox_continuous_mode]);
+}
+
+static ssize_t apds990x_prox_reporting_mode_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	if (sysfs_streq(buf, reporting_modes[0]))
+		chip->prox_continuous_mode = 0;
+	else if (sysfs_streq(buf, reporting_modes[1]))
+		chip->prox_continuous_mode = 1;
+	else
+		return -EINVAL;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_mode_show,
+		apds990x_prox_reporting_mode_store);
+
+static ssize_t apds990x_prox_reporting_avail_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]);
+}
+
+static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_avail_show, NULL);
+
+
+static ssize_t apds990x_lux_thresh_above_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_thres_hi);
+}
+
+static ssize_t apds990x_lux_thresh_below_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_thres_lo);
+}
+
+static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target,
+				const char *buf)
+{
+	int ret = 0;
+	unsigned long thresh;
+
+	if (strict_strtoul(buf, 0, &thresh))
+		return -EINVAL;
+
+	if (thresh > APDS_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_fresh_res)
+		apds990x_refresh_athres(chip);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t apds990x_lux_thresh_above_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t apds990x_lux_thresh_below_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_above_show,
+		apds990x_lux_thresh_above_store);
+
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_below_show,
+		apds990x_lux_thresh_below_store);
+
+static ssize_t apds990x_prox_threshold_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->prox_thres);
+}
+
+static ssize_t apds990x_prox_threshold_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+
+	if ((value > APDS_RANGE) || (value == 0) ||
+		(value < APDS_PROX_HYSTERESIS))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_thres = value;
+
+	apds990x_force_p_refresh(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_prox_threshold_show,
+		apds990x_prox_threshold_store);
+
+static ssize_t apds990x_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+	return 0;
+}
+
+static ssize_t apds990x_power_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+
+	if (strict_strtoul(buf, 0, &value))
+		return -EINVAL;
+	if (value) {
+		pm_runtime_get_sync(dev);
+		mutex_lock(&chip->mutex);
+		chip->lux_wait_fresh_res = true;
+		apds990x_force_a_refresh(chip);
+		apds990x_force_p_refresh(chip);
+		mutex_unlock(&chip->mutex);
+	} else {
+		if (!pm_runtime_suspended(dev))
+			pm_runtime_put(dev);
+	}
+	return len;
+}
+
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR,
+		apds990x_power_state_show,
+		apds990x_power_state_store);
+
+static ssize_t apds990x_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%s %d\n", chip->chipname, chip->revision);
+}
+
+static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL);
+
+static struct attribute *sysfs_attrs_ctrl[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_thresh_above_value.attr,
+	&dev_attr_prox0_reporting_mode.attr,
+	&dev_attr_prox0_reporting_mode_avail.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static struct attribute_group apds990x_attribute_group[] = {
+	{.attrs = sysfs_attrs_ctrl },
+};
+
+static int __devinit apds990x_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct apds990x_chip *chip;
+	int err;
+
+	chip = kzalloc(sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	init_waitqueue_head(&chip->wait);
+	mutex_init(&chip->mutex);
+	chip->pdata	= client->dev.platform_data;
+
+	if (chip->pdata == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		err = -EINVAL;
+		goto fail1;
+	}
+
+	if (chip->pdata->cf.ga == 0) {
+		/* set uncovered sensor default parameters */
+		chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */
+		chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */
+		chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */
+		chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */
+		chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */
+		chip->cf.df = 52;
+	} else {
+		chip->cf = chip->pdata->cf;
+	}
+
+	/* precalculate inverse chip factors for threshold control */
+	chip->rcf.afactor =
+		(chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE /
+		(chip->cf.cf1 - chip->cf.cf2);
+	chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+	chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+
+	/* Set something to start with */
+	chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI;
+	chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO;
+	chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE;
+
+	chip->prox_thres = APDS_PROX_DEF_THRES;
+	chip->pdrive = chip->pdata->pdrive;
+	chip->pdiode = APDS_PDIODE_IR;
+	chip->pgain = APDS_PGAIN_1X;
+	chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE;
+	chip->prox_persistence = APDS_DEFAULT_PROX_PERS;
+	chip->prox_continuous_mode = false;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vled;
+
+	err = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		goto fail1;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		goto fail2;
+	}
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	err = apds990x_detect(chip);
+	if (err < 0) {
+		dev_err(&client->dev, "APDS990X not found\n");
+		goto fail3;
+	}
+
+	pm_runtime_set_active(&client->dev);
+
+	apds990x_configure(chip);
+	apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE);
+	apds990x_mode_on(chip);
+
+	pm_runtime_enable(&client->dev);
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail3;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				apds990x_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail4;
+	}
+
+	err = request_threaded_irq(client->irq, NULL,
+				apds990x_irq,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW |
+				IRQF_ONESHOT,
+				"apds990x", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail5;
+	}
+	return err;
+fail5:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&apds990x_attribute_group[0]);
+fail4:
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail3:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+fail1:
+	kfree(chip);
+	return err;
+}
+
+static int __devexit apds990x_remove(struct i2c_client *client)
+{
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+	sysfs_remove_group(&chip->client->dev.kobj,
+			apds990x_attribute_group);
+
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	if (!pm_runtime_suspended(&client->dev))
+		apds990x_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+
+	kfree(chip);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int apds990x_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	/*
+	 * If we were enabled at suspend time, it is expected
+	 * everything works nice and smoothly. Chip_on is enough
+	 */
+	apds990x_chip_on(chip);
+
+	return 0;
+}
+#else
+#define apds990x_suspend  NULL
+#define apds990x_resume	  NULL
+#define apds990x_shutdown NULL
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+static int apds990x_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_on(chip);
+	return 0;
+}
+
+#endif
+
+static const struct i2c_device_id apds990x_id[] = {
+	{"apds990x", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, apds990x_id);
+
+static const struct dev_pm_ops apds990x_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume)
+	SET_RUNTIME_PM_OPS(apds990x_runtime_suspend,
+			apds990x_runtime_resume,
+			NULL)
+};
+
+static struct i2c_driver apds990x_driver = {
+	.driver	 = {
+		.name	= "apds990x",
+		.owner	= THIS_MODULE,
+		.pm	= &apds990x_pm_ops,
+	},
+	.probe	  = apds990x_probe,
+	.remove	  = __devexit_p(apds990x_remove),
+	.id_table = apds990x_id,
+};
+
+static int __init apds990x_init(void)
+{
+	return i2c_add_driver(&apds990x_driver);
+}
+
+static void __exit apds990x_exit(void)
+{
+	i2c_del_driver(&apds990x_driver);
+}
+
+MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
+
+module_init(apds990x_init);
+module_exit(apds990x_exit);
diff --git a/include/linux/i2c/apds990x.h b/include/linux/i2c/apds990x.h
new file mode 100644
index 000000000000..d186fcc5d257
--- /dev/null
+++ b/include/linux/i2c/apds990x.h
@@ -0,0 +1,79 @@
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __APDS990X_H__
+#define __APDS990X_H__
+
+
+#define APDS_IRLED_CURR_12mA	0x3
+#define APDS_IRLED_CURR_25mA	0x2
+#define APDS_IRLED_CURR_50mA	0x1
+#define APDS_IRLED_CURR_100mA	0x0
+
+/**
+ * struct apds990x_chip_factors - defines effect of the cover window
+ * @ga: Total glass attenuation
+ * @cf1: clear channel factor 1 for raw to lux conversion
+ * @irf1: IR channel factor 1 for raw to lux conversion
+ * @cf2: clear channel factor 2 for raw to lux conversion
+ * @irf2: IR channel factor 2 for raw to lux conversion
+ * @df: device factor for conversion formulas
+ *
+ * Structure for tuning ALS calculation to match with environment.
+ * Values depend on the material above the sensor and the sensor
+ * itself. If the GA is zero, driver will use uncovered sensor default values
+ * format: decimal value * APDS_PARAM_SCALE except df which is plain integer.
+ */
+#define APDS_PARAM_SCALE 4096
+struct apds990x_chip_factors {
+	int ga;
+	int cf1;
+	int irf1;
+	int cf2;
+	int irf2;
+	int df;
+};
+
+/**
+ * struct apds990x_platform_data - platform data for apsd990x.c driver
+ * @cf: chip factor data
+ * @pddrive: IR-led driving current
+ * @ppcount: number of IR pulses used for proximity estimation
+ * @setup_resources: interrupt line setup call back function
+ * @release_resources: interrupt line release call back function
+ *
+ * Proximity detection result depends heavily on correct ppcount, pdrive
+ * and cover window.
+ *
+ */
+
+struct apds990x_platform_data {
+	struct apds990x_chip_factors cf;
+	u8     pdrive;
+	u8     ppcount;
+	int    (*setup_resources)(void);
+	int    (*release_resources)(void);
+};
+
+#endif
-- 
cgit v1.2.3


From 518de9b39e854542de59bfb8b9f61c8f7ecf808b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 14:22:44 -0700
Subject: fs: allow for more than 2^31 files

Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :

<quote>

We were seeing a failure which prevented boot.  The kernel was incapable
of creating either a named pipe or unix domain socket.  This comes down
to a common kernel function called unix_create1() which does:

        atomic_inc(&unix_nr_socks);
        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;

The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().

        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = n;

In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000).  That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.

</quote>

Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of atomic_t.

get_max_files() is changed to return an unsigned long.  get_nr_files() is
changed to return a long.

unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.

Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968

After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704     0       2147483648

Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/file_table.c    | 17 +++++++----------
 include/linux/fs.h |  8 ++++----
 kernel/sysctl.c    |  6 +++---
 net/unix/af_unix.c | 14 +++++++-------
 4 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index a04bdd81c11c..c3dee381f1b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -60,7 +60,7 @@ static inline void file_free(struct file *f)
 /*
  * Return the total number of open files in the system
  */
-static int get_nr_files(void)
+static long get_nr_files(void)
 {
 	return percpu_counter_read_positive(&nr_files);
 }
@@ -68,7 +68,7 @@ static int get_nr_files(void)
 /*
  * Return the maximum number of open files in the system
  */
-int get_max_files(void)
+unsigned long get_max_files(void)
 {
 	return files_stat.max_files;
 }
@@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, buffer, lenp, ppos);
+	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #else
 int proc_nr_files(ctl_table *table, int write,
@@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
 struct file *get_empty_filp(void)
 {
 	const struct cred *cred = current_cred();
-	static int old_max;
+	static long old_max;
 	struct file * f;
 
 	/*
@@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
 over:
 	/* Ran out of filps - report that */
 	if (get_nr_files() > old_max) {
-		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					get_max_files());
+		pr_info("VFS: file-max limit %lu reached\n", get_max_files());
 		old_max = get_nr_files();
 	}
 	goto fail;
@@ -487,7 +486,7 @@ retry:
 
 void __init files_init(unsigned long mempages)
 { 
-	int n; 
+	unsigned long n;
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
 	 */ 
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
-	files_stat.max_files = n; 
-	if (files_stat.max_files < NR_FILE)
-		files_stat.max_files = NR_FILE;
+	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
 	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f34ff6e5558..b2cdb6bc8287 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -34,9 +34,9 @@
 
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
-	int nr_files;		/* read only */
-	int nr_free_files;	/* read only */
-	int max_files;		/* tunable */
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
 };
 
 struct inodes_stat_t {
@@ -400,7 +400,7 @@ extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
 extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
+extern unsigned long get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3a45c224770f..694b140852c2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = {
 	{
 		.procname	= "file-nr",
 		.data		= &files_stat,
-		.maxlen		= 3*sizeof(int),
+		.maxlen		= sizeof(files_stat),
 		.mode		= 0444,
 		.proc_handler	= proc_nr_files,
 	},
 	{
 		.procname	= "file-max",
 		.data		= &files_stat.max_files,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(files_stat.max_files),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
 		.procname	= "nr_open",
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777a6660..3c95304a0817 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
 
 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 static DEFINE_SPINLOCK(unix_table_lock);
-static atomic_t unix_nr_socks = ATOMIC_INIT(0);
+static atomic_long_t unix_nr_socks;
 
 #define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
 
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
 	if (u->addr)
 		unix_release_addr(u->addr);
 
-	atomic_dec(&unix_nr_socks);
+	atomic_long_dec(&unix_nr_socks);
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
 #ifdef UNIX_REFCNT_DEBUG
-	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
-		atomic_read(&unix_nr_socks));
+	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
+		atomic_long_read(&unix_nr_socks));
 #endif
 }
 
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	struct sock *sk = NULL;
 	struct unix_sock *u;
 
-	atomic_inc(&unix_nr_socks);
-	if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
+	atomic_long_inc(&unix_nr_socks);
+	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 		goto out;
 
 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	unix_insert_socket(unix_sockets_unbound, sk);
 out:
 	if (sk == NULL)
-		atomic_dec(&unix_nr_socks);
+		atomic_long_dec(&unix_nr_socks);
 	else {
 		local_bh_disable();
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-- 
cgit v1.2.3


From 766f9164193f6dda1497bbf3861060198421fb92 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Oct 2010 14:22:45 -0700
Subject: kernel: remove PF_FLUSHER

PF_FLUSHER is only ever set, not tested, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c     | 2 +-
 include/linux/sched.h | 1 -
 mm/backing-dev.c      | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b5aae4bd0aca..9e46aec10d1a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -794,7 +794,7 @@ int bdi_writeback_thread(void *data)
 	struct backing_dev_info *bdi = wb->bdi;
 	long pages_written;
 
-	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	current->flags |= PF_SWAPWRITE;
 	set_freezable();
 	wb->last_active = jiffies;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 56154bbb8da9..393ce94e54b7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1706,7 +1706,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_DUMPCORE	0x00000200	/* dumped core */
 #define PF_SIGNALED	0x00000400	/* killed by a signal */
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
 #define PF_FREEZING	0x00004000	/* freeze in progress. do not account to load */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5ad3c106606b..f2eb27884ffa 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -362,7 +362,7 @@ static int bdi_forker_thread(void *ptr)
 {
 	struct bdi_writeback *me = ptr;
 
-	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	current->flags |= PF_SWAPWRITE;
 	set_freezable();
 
 	/*
-- 
cgit v1.2.3


From f5d87d851d76a390d0fab2f77bd1d563d69ee586 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 26 Oct 2010 14:22:49 -0700
Subject: printk: declare printk_ratelimit_state in ratelimit.h

Adding declaration of printk_ratelimit_state in ratelimit.h removes
potential build breakage and following sparse warning:

 kernel/printk.c:1426:1: warning: symbol 'printk_ratelimit_state' was not declared. Should it be static?

[akpm@linux-foundation.org: remove unneeded ifdef]
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ratelimit.h | 2 ++
 kernel/sysctl.c           | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 8f69d09a41a5..03ff67b0cdf5 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -36,6 +36,8 @@ static inline void ratelimit_state_init(struct ratelimit_state *rs,
 	rs->begin = 0;
 }
 
+extern struct ratelimit_state printk_ratelimit_state;
+
 extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
 #define __ratelimit(state) ___ratelimit(state, __func__)
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 694b140852c2..48d9d689498f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -161,8 +161,6 @@ extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
 #endif
 
-extern struct ratelimit_state printk_ratelimit_state;
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-- 
cgit v1.2.3


From 77006a0a828249dd69341f960043ee41e7487aa0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:22:49 -0700
Subject: ratelimit: add comment warning people off printk_ratelimit()

printk_ratelimit() was a bad idea - we don't want subsytem A causing
ratelimiting of subsystem B's messages.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e9b492b33032..77b04ed037df 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -277,6 +277,11 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
+/*
+ * Please don't use printk_ratelimit(), because it shares ratelimiting state
+ * with all other unrelated printk_ratelimit() callsites.  Instead use
+ * printk_ratelimited() or plain old __ratelimit().
+ */
 extern int __printk_ratelimit(const char *func);
 #define printk_ratelimit() __printk_ratelimit(__func__)
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
-- 
cgit v1.2.3


From 658716d19f8f155c67d4677ba68034b8e492dfbe Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Tue, 26 Oct 2010 14:23:10 -0700
Subject: div64_u64(): improve precision on 32bit platforms

The current implementation of div64_u64 for 32bit systems returns an
approximately correct result when the divisor exceeds 32bits.  Since doing
64bit division using 32bit hardware is a long since solved problem we just
use one of the existing proven methods.

Additionally, add a div64_s64 function to correctly handle doing signed
64bit division.

Addresses https://bugzilla.redhat.com/show_bug.cgi?id=616105

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ben Woodard <bwoodard@llnl.gov>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Mark Grondona <mgrondona@llnl.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  5 +++++
 include/linux/math64.h | 12 ++++++++++++
 lib/div64.c            | 52 ++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 77b04ed037df..450092c1e35f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -173,6 +173,11 @@ extern int _cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
+#define abs64(x) ({				\
+		s64 __x = (x);			\
+		(__x < 0) ? -__x : __x;		\
+	})
+
 #ifdef CONFIG_PROVE_LOCKING
 void might_fault(void);
 #else
diff --git a/include/linux/math64.h b/include/linux/math64.h
index c87f1528703a..23fcdfcba81b 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -35,6 +35,14 @@ static inline u64 div64_u64(u64 dividend, u64 divisor)
 	return dividend / divisor;
 }
 
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ */
+static inline s64 div64_s64(s64 dividend, s64 divisor)
+{
+	return dividend / divisor;
+}
+
 #elif BITS_PER_LONG == 32
 
 #ifndef div_u64_rem
@@ -53,6 +61,10 @@ extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
 extern u64 div64_u64(u64 dividend, u64 divisor);
 #endif
 
+#ifndef div64_s64
+extern s64 div64_s64(s64 dividend, s64 divisor);
+#endif
+
 #endif /* BITS_PER_LONG */
 
 /**
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9cf..5b4919191778 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 EXPORT_SYMBOL(div_s64_rem);
 #endif
 
-/* 64bit divisor, dividend and result. dynamic precision */
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
+ */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
 {
-	u32 high, d;
+	u32 high = divisor >> 32;
+	u64 quot;
 
-	high = divisor >> 32;
-	if (high) {
-		unsigned int shift = fls(high);
+	if (high == 0) {
+		quot = div_u64(dividend, divisor);
+	} else {
+		int n = 1 + fls(high);
+		quot = div_u64(dividend >> n, divisor >> n);
 
-		d = divisor >> shift;
-		dividend >>= shift;
-	} else
-		d = divisor;
+		if (quot != 0)
+			quot--;
+		if ((dividend - quot * divisor) >= divisor)
+			quot++;
+	}
 
-	return div_u64(dividend, d);
+	return quot;
 }
 EXPORT_SYMBOL(div64_u64);
 #endif
 
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ */
+#ifndef div64_s64
+s64 div64_s64(s64 dividend, s64 divisor)
+{
+	s64 quot, t;
+
+	quot = div64_u64(abs64(dividend), abs64(divisor));
+	t = (dividend ^ divisor) >> 63;
+
+	return (quot ^ t) - t;
+}
+EXPORT_SYMBOL(div64_s64);
+#endif
+
 #endif /* BITS_PER_LONG == 32 */
 
 /*
-- 
cgit v1.2.3


From ee2f154a598e96df2ebb01648a7699373bc085c7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 26 Oct 2010 14:17:25 -0700
Subject: docbook: add more wait/wake/completion to device-drivers docbook

Add more wait, wake, and completion interfaces to the device-drivers
docbook.

Fix kernel-doc notation in the added files.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/device-drivers.tmpl |  5 +++++
 include/linux/completion.h                | 10 +++++-----
 kernel/wait.c                             |  6 +++---
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index feca0758391e..22edcbb9ddaf 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -51,7 +51,12 @@
      <sect1><title>Delaying, scheduling, and timer routines</title>
 !Iinclude/linux/sched.h
 !Ekernel/sched.c
+!Iinclude/linux/completion.h
 !Ekernel/timer.c
+     </sect1>
+     <sect1><title>Wait queues and Wake events</title>
+!Iinclude/linux/wait.h
+!Ekernel/wait.c
      </sect1>
      <sect1><title>High-resolution timers</title>
 !Iinclude/linux/ktime.h
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 51e3145196f6..36d57f74cd01 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,7 +10,7 @@
 
 #include <linux/wait.h>
 
-/**
+/*
  * struct completion - structure used to maintain state for a "completion"
  *
  * This is the opaque structure used to maintain the state for a "completion".
@@ -34,7 +34,7 @@ struct completion {
 	({ init_completion(&work); work; })
 
 /**
- * DECLARE_COMPLETION: - declare and initialize a completion structure
+ * DECLARE_COMPLETION - declare and initialize a completion structure
  * @work:  identifier for the completion structure
  *
  * This macro declares and initializes a completion structure. Generally used
@@ -50,7 +50,7 @@ struct completion {
  * are on the kernel stack:
  */
 /**
- * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure
+ * DECLARE_COMPLETION_ONSTACK - declare and initialize a completion structure
  * @work:  identifier for the completion structure
  *
  * This macro declares and initializes a completion structure on the kernel
@@ -64,7 +64,7 @@ struct completion {
 #endif
 
 /**
- * init_completion: - Initialize a dynamically allocated completion
+ * init_completion - Initialize a dynamically allocated completion
  * @x:  completion structure that is to be initialized
  *
  * This inline function will initialize a dynamically created completion
@@ -92,7 +92,7 @@ extern void complete(struct completion *);
 extern void complete_all(struct completion *);
 
 /**
- * INIT_COMPLETION: - reinitialize a completion structure
+ * INIT_COMPLETION - reinitialize a completion structure
  * @x:  completion structure to be reinitialized
  *
  * This macro should be used to reinitialize a completion structure so it can
diff --git a/kernel/wait.c b/kernel/wait.c
index c4bd3d825f35..b0310eb6cc1e 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -92,7 +92,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-/*
+/**
  * finish_wait - clean up after waiting in a queue
  * @q: waitqueue waited on
  * @wait: wait descriptor
@@ -127,11 +127,11 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(finish_wait);
 
-/*
+/**
  * abort_exclusive_wait - abort exclusive waiting in a queue
  * @q: waitqueue waited on
  * @wait: wait descriptor
- * @state: runstate of the waiter to be woken
+ * @mode: runstate of the waiter to be woken
  * @key: key to identify a wait bit queue or %NULL
  *
  * Sets current thread back to running state and removes
-- 
cgit v1.2.3


From 56083ab17e0075e538270823c374b59cc97e73b9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 26 Oct 2010 14:19:08 -0700
Subject: docbook: add idr/ida to kernel-api docbook

Add idr/ida to kernel-api docbook.
Fix typos and kernel-doc notation.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/kernel-api.tmpl |  6 +++++
 include/linux/idr.h                   |  1 +
 lib/idr.c                             | 49 ++++++++++++++++++-----------------
 3 files changed, 32 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 6b4e07f28b69..7160652a8736 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -93,6 +93,12 @@ X!Ilib/string.c
 !Elib/crc32.c
 !Elib/crc-ccitt.c
      </sect1>
+
+     <sect1 id="idr"><title>idr/ida Functions</title>
+!Pinclude/linux/idr.h idr sync
+!Plib/idr.c IDA description
+!Elib/idr.c
+     </sect1>
   </chapter>
 
   <chapter id="mm">
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 928ae712709f..13a801f3d028 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -81,6 +81,7 @@ struct idr {
 #define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
 
 /**
+ * DOC: idr sync
  * idr synchronization (stolen from radix-tree.h)
  *
  * idr_find() is able to be called locklessly, using RCU. The caller must
diff --git a/lib/idr.c b/lib/idr.c
index e35850d3004a..e15502e8b21e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,7 +106,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
 }
 
 /**
- * idr_pre_get - reserver resources for idr allocation
+ * idr_pre_get - reserve resources for idr allocation
  * @idp:	idr handle
  * @gfp_mask:	memory allocation flags
  *
@@ -115,8 +115,8 @@ static void idr_mark_full(struct idr_layer **pa, int id)
  * caller should pass in GFP_KERNEL if possible.  This of course requires that
  * no spinning locks be held.
  *
- * If the system is REALLY out of memory this function returns 0,
- * otherwise 1.
+ * If the system is REALLY out of memory this function returns %0,
+ * otherwise %1.
  */
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 {
@@ -292,12 +292,12 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
  * required locks.
  *
  * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * return -EAGAIN.  The caller should retry the idr_pre_get() call to refill
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
  * IDR's preallocation and then retry the idr_get_new_above() call.
  *
- * If the idr is full idr_get_new_above() will return -ENOSPC.
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
  *
- * @id returns a value in the range @starting_id ... 0x7fffffff
+ * @id returns a value in the range @starting_id ... %0x7fffffff
  */
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 {
@@ -322,12 +322,12 @@ EXPORT_SYMBOL(idr_get_new_above);
  * @id: pointer to the allocated handle
  *
  * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * return -EAGAIN.  The caller should retry the idr_pre_get() call to refill
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
  * IDR's preallocation and then retry the idr_get_new_above() call.
  *
- * If the idr is full idr_get_new_above() will return -ENOSPC.
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
  *
- * @id returns a value in the range 0 ... 0x7fffffff
+ * @id returns a value in the range %0 ... %0x7fffffff
  */
 int idr_get_new(struct idr *idp, void *ptr, int *id)
 {
@@ -390,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 }
 
 /**
- * idr_remove - remove the given id and free it's slot
+ * idr_remove - remove the given id and free its slot
  * @idp: idr handle
  * @id: unique key
  */
@@ -439,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
  * function will remove all id mappings and leave all idp_layers
  * unused.
  *
- * A typical clean-up sequence for objects stored in an idr tree, will
+ * A typical clean-up sequence for objects stored in an idr tree will
  * use idr_for_each() to free all objects, if necessay, then
  * idr_remove_all() to remove all ids, and idr_destroy() to free
  * up the cached idr_layers.
@@ -544,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
  * not allowed.
  *
  * We check the return of @fn each time. If it returns anything other
- * than 0, we break out and return that value.
+ * than %0, we break out and return that value.
  *
  * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
  */
@@ -639,8 +639,8 @@ EXPORT_SYMBOL(idr_get_next);
  * @id: lookup key
  *
  * Replace the pointer registered with an id and return the old value.
- * A -ENOENT return indicates that @id was not found.
- * A -EINVAL return indicates that @id was not within valid constraints.
+ * A %-ENOENT return indicates that @id was not found.
+ * A %-EINVAL return indicates that @id was not within valid constraints.
  *
  * The caller must serialize with writers.
  */
@@ -698,10 +698,11 @@ void idr_init(struct idr *idp)
 EXPORT_SYMBOL(idr_init);
 
 
-/*
+/**
+ * DOC: IDA description
  * IDA - IDR based ID allocator
  *
- * this is id allocator without id -> pointer translation.  Memory
+ * This is id allocator without id -> pointer translation.  Memory
  * usage is much lower than full blown idr because each id only
  * occupies a bit.  ida uses a custom leaf node which contains
  * IDA_BITMAP_BITS slots.
@@ -734,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
  * following function.  It preallocates enough memory to satisfy the
  * worst possible allocation.
  *
- * If the system is REALLY out of memory this function returns 0,
- * otherwise 1.
+ * If the system is REALLY out of memory this function returns %0,
+ * otherwise %1.
  */
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
 {
@@ -767,11 +768,11 @@ EXPORT_SYMBOL(ida_pre_get);
  * Allocate new ID above or equal to @ida.  It should be called with
  * any required locks.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
  *
- * @p_id returns a value in the range @starting_id ... 0x7fffffff.
+ * @p_id returns a value in the range @starting_id ... %0x7fffffff.
  */
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 {
@@ -853,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
  *
  * Allocate new ID.  It should be called with any required locks.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
  *
- * @id returns a value in the range 0 ... 0x7fffffff.
+ * @id returns a value in the range %0 ... %0x7fffffff.
  */
 int ida_get_new(struct ida *ida, int *p_id)
 {
-- 
cgit v1.2.3


From 47f19a0814e80e1d4e5c17d61b70fca85ea09162 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Oct 2010 17:41:17 +0200
Subject: percpu: Remove the multi-page alignment facility

[DECLARE|DEFINE]_PER_CPU_MULTIPAGE_ALIGNED never really worked because
the head percpu section was only page aligned. Now that the last user
is gone (32-bit IRQ stacks), remove the generic percpu facility.

Cc: Brian Gerst <brgerst@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1288158182-1753-1-git-send-email-brgerst@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/percpu-defs.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 018db9a62ffe..27ef6b190ea6 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -147,18 +147,6 @@
 #define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
 	DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
 
-/*
- * Declaration/definition used for large per-CPU variables that must be
- * aligned to something larger than the pagesize.
- */
-#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size)		\
-	DECLARE_PER_CPU_SECTION(type, name, "..page_aligned")		\
-	__aligned(size)
-
-#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size)		\
-	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
-	__aligned(size)
-
 /*
  * Intermodule exports for per-CPU variables.  sparse forgets about
  * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
-- 
cgit v1.2.3


From 3a5f65df5a0fcbaa35e5417c0420d691fee4ac56 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Oct 2010 17:28:36 +0100
Subject: Typedef SMP call function pointer

Typedef the pointer to the function to be called by smp_call_function() and
friends:

	typedef void (*smp_call_func_t)(void *info);

as it is used in a fair number of places.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-arch@vger.kernel.org
---
 include/linux/smp.h | 19 ++++++++++---------
 kernel/smp.c        |  8 ++++----
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfa2d20e35f1..6dc95cac6b3d 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -13,9 +13,10 @@
 
 extern void cpu_idle(void);
 
+typedef void (*smp_call_func_t)(void *info);
 struct call_single_data {
 	struct list_head list;
-	void (*func) (void *info);
+	smp_call_func_t func;
 	void *info;
 	u16 flags;
 	u16 priv;
@@ -24,8 +25,8 @@ struct call_single_data {
 /* total number of cpus in this system (may exceed NR_CPUS) */
 extern unsigned int total_cpus;
 
-int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
-				int wait);
+int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
+			     int wait);
 
 #ifdef CONFIG_SMP
 
@@ -69,15 +70,15 @@ extern void smp_cpus_done(unsigned int max_cpus);
 /*
  * Call a function on all other processors
  */
-int smp_call_function(void(*func)(void *info), void *info, int wait);
+int smp_call_function(smp_call_func_t func, void *info, int wait);
 void smp_call_function_many(const struct cpumask *mask,
-			    void (*func)(void *info), void *info, bool wait);
+			    smp_call_func_t func, void *info, bool wait);
 
 void __smp_call_function_single(int cpuid, struct call_single_data *data,
 				int wait);
 
 int smp_call_function_any(const struct cpumask *mask,
-			  void (*func)(void *info), void *info, int wait);
+			  smp_call_func_t func, void *info, int wait);
 
 /*
  * Generic and arch helpers
@@ -94,7 +95,7 @@ void ipi_call_unlock_irq(void);
 /*
  * Call a function on all processors
  */
-int on_each_cpu(void (*func) (void *info), void *info, int wait);
+int on_each_cpu(smp_call_func_t func, void *info, int wait);
 
 #define MSG_ALL_BUT_SELF	0x8000	/* Assume <32768 CPU's */
 #define MSG_ALL			0x8001
@@ -122,7 +123,7 @@ static inline void smp_send_stop(void) { }
  *	These macros fold the SMP functionality into a single CPU system
  */
 #define raw_smp_processor_id()			0
-static inline int up_smp_call_function(void (*func)(void *), void *info)
+static inline int up_smp_call_function(smp_call_func_t func, void *info)
 {
 	return 0;
 }
@@ -143,7 +144,7 @@ static inline void smp_send_reschedule(int cpu) { }
 static inline void init_call_single_data(void) { }
 
 static inline int
-smp_call_function_any(const struct cpumask *mask, void (*func)(void *info),
+smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 		      void *info, int wait)
 {
 	return smp_call_function_single(0, func, info, wait);
diff --git a/kernel/smp.c b/kernel/smp.c
index ed6aacfcb7ef..12ed8b013e2d 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
  *
  * Returns 0 on success, else a negative status code.
  */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 			     int wait)
 {
 	struct call_single_data d = {
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single);
  *	3) any other online cpu in @mask
  */
 int smp_call_function_any(const struct cpumask *mask,
-			  void (*func)(void *info), void *info, int wait)
+			  smp_call_func_t func, void *info, int wait)
 {
 	unsigned int cpu;
 	const struct cpumask *nodemask;
@@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
  * must be disabled when calling this function.
  */
 void smp_call_function_many(const struct cpumask *mask,
-			    void (*func)(void *), void *info, bool wait)
+			    smp_call_func_t func, void *info, bool wait)
 {
 	struct call_function_data *data;
 	unsigned long flags;
@@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many);
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-int smp_call_function(void (*func)(void *), void *info, int wait)
+int smp_call_function(smp_call_func_t func, void *info, int wait)
 {
 	preempt_disable();
 	smp_call_function_many(cpu_online_mask, func, info, wait);
-- 
cgit v1.2.3


From e28c31a96b1570f17731b18e8efabb7308d0c22c Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Wed, 27 Oct 2010 17:55:04 +0100
Subject: xen: register xen pci notifier

Register a pci notifier to add (or remove) pci devices to Xen via
hypercalls. Xen needs to know the pci devices present in the system to
handle pci passthrough and even MSI remapping in the initial domain.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Qing He <qing.he@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 drivers/xen/Makefile            |   1 +
 drivers/xen/pci.c               | 117 ++++++++++++++++++++++++++++++++++++++++
 include/xen/interface/physdev.h |  21 ++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 drivers/xen/pci.c

(limited to 'include')

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index b97864551718..eb8a78d77d9d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)	+= swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0)		+= pci.o
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
new file mode 100644
index 000000000000..cef4bafc07dc
--- /dev/null
+++ b/drivers/xen/pci.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/pci.h>
+#include <xen/xen.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/xen.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include "../pci/pci.h"
+
+static int xen_add_device(struct device *dev)
+{
+	int r;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+#ifdef CONFIG_PCI_IOV
+	if (pci_dev->is_virtfn) {
+		struct physdev_manage_pci_ext manage_pci_ext = {
+			.bus		= pci_dev->bus->number,
+			.devfn		= pci_dev->devfn,
+			.is_virtfn 	= 1,
+			.physfn.bus	= pci_dev->physfn->bus->number,
+			.physfn.devfn	= pci_dev->physfn->devfn,
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
+			&manage_pci_ext);
+	} else
+#endif
+	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+		struct physdev_manage_pci_ext manage_pci_ext = {
+			.bus		= pci_dev->bus->number,
+			.devfn		= pci_dev->devfn,
+			.is_extfn	= 1,
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
+			&manage_pci_ext);
+	} else {
+		struct physdev_manage_pci manage_pci = {
+			.bus 	= pci_dev->bus->number,
+			.devfn	= pci_dev->devfn,
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
+			&manage_pci);
+	}
+
+	return r;
+}
+
+static int xen_remove_device(struct device *dev)
+{
+	int r;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct physdev_manage_pci manage_pci;
+
+	manage_pci.bus = pci_dev->bus->number;
+	manage_pci.devfn = pci_dev->devfn;
+
+	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+		&manage_pci);
+
+	return r;
+}
+
+static int xen_pci_notifier(struct notifier_block *nb,
+			    unsigned long action, void *data)
+{
+	struct device *dev = data;
+	int r = 0;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		r = xen_add_device(dev);
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		r = xen_remove_device(dev);
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+struct notifier_block device_nb = {
+	.notifier_call = xen_pci_notifier,
+};
+
+static int __init register_xen_pci_notifier(void)
+{
+	if (!xen_initial_domain())
+		return 0;
+
+	return bus_register_notifier(&pci_bus_type, &device_nb);
+}
+
+arch_initcall(register_xen_pci_notifier);
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index a85d76c2e360..2b2c66c3df00 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -136,6 +136,27 @@ struct physdev_unmap_pirq {
     int pirq;
 };
 
+#define PHYSDEVOP_manage_pci_add	15
+#define PHYSDEVOP_manage_pci_remove	16
+struct physdev_manage_pci {
+	/* IN */
+	uint8_t bus;
+	uint8_t devfn;
+};
+
+#define PHYSDEVOP_manage_pci_add_ext	20
+struct physdev_manage_pci_ext {
+	/* IN */
+	uint8_t bus;
+	uint8_t devfn;
+	unsigned is_extfn;
+	unsigned is_virtfn;
+	struct {
+		uint8_t bus;
+		uint8_t devfn;
+	} physfn;
+};
+
 /*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
-- 
cgit v1.2.3


From 1c31720a74e19bb57f301350a3b03210fa2ba9e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 21:02:07 +0000
Subject: ipv4: add __rcu annotations to routes.c

Add __rcu annotations to :
        (struct dst_entry)->rt_next
        (struct rt_hash_bucket)->chain

And use appropriate rcu primitives to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h |  2 +-
 net/ipv4/route.c  | 75 ++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index a217c838ec0d..ffe9cb719c0e 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -95,7 +95,7 @@ struct dst_entry {
 	unsigned long		lastuse;
 	union {
 		struct dst_entry *next;
-		struct rtable    *rt_next;
+		struct rtable __rcu *rt_next;
 		struct rt6_info   *rt6_next;
 		struct dn_route  *dn_next;
 	};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
  */
 
 struct rt_hash_bucket {
-	struct rtable	*chain;
+	struct rtable __rcu	*chain;
 };
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 	struct rtable *r = NULL;
 
 	for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
-		if (!rt_hash_table[st->bucket].chain)
+		if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
 			continue;
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->dst.rt_next;
+	r = rcu_dereference_bh(r->dst.rt_next);
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
 			if (--st->bucket < 0)
 				return NULL;
-		} while (!rt_hash_table[st->bucket].chain);
+		} while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
 		rcu_read_lock_bh();
-		r = rt_hash_table[st->bucket].chain;
+		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 	}
-	return rcu_dereference_bh(r);
+	return r;
 }
 
 static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
 	for (i = 0; i <= rt_hash_mask; i++) {
 		if (process_context && need_resched())
 			cond_resched();
-		rth = rt_hash_table[i].chain;
+		rth = rcu_dereference_raw(rt_hash_table[i].chain);
 		if (!rth)
 			continue;
 
 		spin_lock_bh(rt_hash_lock_addr(i));
 #ifdef CONFIG_NET_NS
 		{
-		struct rtable ** prev, * p;
+		struct rtable __rcu **prev;
+		struct rtable *p;
 
-		rth = rt_hash_table[i].chain;
+		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+			lockdep_is_held(rt_hash_lock_addr(i)));
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->dst.rt_next)
+		for (tail = rth; tail;
+		     tail = rcu_dereference_protected(tail->dst.rt_next,
+				lockdep_is_held(rt_hash_lock_addr(i))))
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
 
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
-		for (p = *prev; p; p = next) {
-			next = p->dst.rt_next;
+		for (p = rcu_dereference_protected(*prev,
+				lockdep_is_held(rt_hash_lock_addr(i)));
+		     p != NULL;
+		     p = next) {
+			next = rcu_dereference_protected(p->dst.rt_next,
+				lockdep_is_held(rt_hash_lock_addr(i)));
 			if (!rt_is_expired(p)) {
 				prev = &p->dst.rt_next;
 			} else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
 		}
 		}
 #else
-		rth = rt_hash_table[i].chain;
-		rt_hash_table[i].chain = NULL;
+		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+			lockdep_is_held(rt_hash_lock_addr(i)));
+		rcu_assign_pointer(rt_hash_table[i].chain, NULL);
 		tail = NULL;
 #endif
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->dst.rt_next;
+			next = rcu_dereference_protected(rth->dst.rt_next, 1);
 			rt_free(rth);
 		}
 	}
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->dst.rt_next;
+		aux = rcu_dereference_protected(aux->dst.rt_next, 1);
 	}
 	return ONE;
 }
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
 
 		samples++;
 
-		if (*rthp == NULL)
+		if (rcu_dereference_raw(*rthp) == NULL)
 			continue;
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
-		while ((rth = *rthp) != NULL) {
+		while ((rth = rcu_dereference_protected(*rthp,
+					lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
 			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
 				*rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	static unsigned long last_gc;
 	static int rover;
 	static int equilibrium;
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	unsigned long now = jiffies;
 	int goal;
 	int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
 			k = (k + 1) & rt_hash_mask;
 			rthp = &rt_hash_table[k].chain;
 			spin_lock_bh(rt_hash_lock_addr(k));
-			while ((rth = *rthp) != NULL) {
+			while ((rth = rcu_dereference_protected(*rthp,
+					lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->dst.rt_next;
+		rth = rcu_dereference_protected(rth->dst.rt_next, 1);
 	}
 	return length >> FRACT_BITS;
 }
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
 			  struct rtable **rp, struct sk_buff *skb, int ifindex)
 {
-	struct rtable	*rth, **rthp;
+	struct rtable	*rth, *cand;
+	struct rtable __rcu **rthp, **candp;
 	unsigned long	now;
-	struct rtable *cand, **candp;
 	u32 		min_score;
 	int		chain_length;
 	int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
 	rthp = &rt_hash_table[hash].chain;
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
-	while ((rth = *rthp) != NULL) {
+	while ((rth = rcu_dereference_protected(*rthp,
+			lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
 		if (rt_is_expired(rth)) {
 			*rthp = rth->dst.rt_next;
 			rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
 
 static void rt_del(unsigned hash, struct rtable *rt)
 {
-	struct rtable **rthp, *aux;
+	struct rtable __rcu **rthp;
+	struct rtable *aux;
 
 	rthp = &rt_hash_table[hash].chain;
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	ip_rt_put(rt);
-	while ((aux = *rthp) != NULL) {
+	while ((aux = rcu_dereference_protected(*rthp,
+			lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
 			*rthp = aux->dst.rt_next;
 			rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 {
 	int i, k;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	struct rtable *rth, **rthp;
+	struct rtable *rth;
+	struct rtable __rcu **rthp;
 	__be32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
 	struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
 						rt_genid(net));
 
-			rthp=&rt_hash_table[hash].chain;
+			rthp = &rt_hash_table[hash].chain;
 
 			while ((rth = rcu_dereference(*rthp)) != NULL) {
 				struct rtable *rt;
-- 
cgit v1.2.3


From e0ad61ec867fdd262804afa7a68e11fc9930c2b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 21:02:28 +0000
Subject: net: add __rcu annotations to protocol

Add __rcu annotations to :
        struct net_protocol *inet_protos
        struct net_protocol *inet6_protos

And use appropriate casts to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h | 4 ++--
 net/ipv4/protocol.c    | 8 +++++---
 net/ipv6/protocol.c    | 8 +++++---
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index f1effdd3c265..dc07495bce4c 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -89,10 +89,10 @@ struct inet_protosw {
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
-extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
+extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 #endif
 
 extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
 
 /*
  *	Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash = protocol & (MAX_INET_PROTOS - 1);
 
-	return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
+	return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet_add_protocol);
 
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
+	ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
 
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936ae2452..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
 
 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int hash = protocol & (MAX_INET_PROTOS - 1);
 
-	return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
+	return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+			NULL, prot) ? 0 : -1;
 }
 EXPORT_SYMBOL(inet6_add_protocol);
 
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
-	ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
+	ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+		       prot, NULL) == prot) ? 0 : -1;
 
 	synchronize_net();
 
-- 
cgit v1.2.3


From b33eab08445d86c3d0dec3111ce10df561328705 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 21:01:26 +0000
Subject: tunnels: add __rcu annotations

Add __rcu annotations to :
        (struct ip_tunnel)->prl
        (struct ip_tunnel_prl_entry)->next
        (struct xfrm_tunnel)->next
	struct xfrm_tunnel *tunnel4_handlers
	struct xfrm_tunnel *tunnel64_handlers

And use appropriate rcu primitives to reduce sparse warnings if
CONFIG_SPARSE_RCU_POINTER=y

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h |  4 ++--
 include/net/xfrm.h |  2 +-
 net/ipv4/tunnel4.c | 29 +++++++++++++++++++----------
 3 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/ipip.h b/include/net/ipip.h
index 0403fe4c4519..a32654d52730 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -34,12 +34,12 @@ struct ip_tunnel {
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel_6rd_parm	ip6rd;
 #endif
-	struct ip_tunnel_prl_entry	*prl;		/* potential router list */
+	struct ip_tunnel_prl_entry __rcu *prl;		/* potential router list */
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
 
 struct ip_tunnel_prl_entry {
-	struct ip_tunnel_prl_entry	*next;
+	struct ip_tunnel_prl_entry __rcu *next;
 	__be32				addr;
 	u16				flags;
 	struct rcu_head			rcu_head;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ffcd47820a5b..bcfb6b24b019 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1264,7 +1264,7 @@ struct xfrm_tunnel {
 	int (*handler)(struct sk_buff *skb);
 	int (*err_handler)(struct sk_buff *skb, u32 info);
 
-	struct xfrm_tunnel *next;
+	struct xfrm_tunnel __rcu *next;
 	int priority;
 };
 
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
 #include <net/protocol.h>
 #include <net/xfrm.h>
 
-static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
-static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel4_mutex);
 
-static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
 {
 	return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
 }
 
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
 {
-	struct xfrm_tunnel **pprev;
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
+
 	int ret = -EEXIST;
 	int priority = handler->priority;
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
-		if ((*pprev)->priority > priority)
+	for (pprev = fam_handlers(family);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel4_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t->priority > priority)
 			break;
-		if ((*pprev)->priority == priority)
+		if (t->priority == priority)
 			goto err;
 	}
 
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
 
 int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
 {
-	struct xfrm_tunnel **pprev;
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
 	int ret = -ENOENT;
 
 	mutex_lock(&tunnel4_mutex);
 
-	for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
-		if (*pprev == handler) {
+	for (pprev = fam_handlers(family);
+	     (t = rcu_dereference_protected(*pprev,
+			lockdep_is_held(&tunnel4_mutex))) != NULL;
+	     pprev = &t->next) {
+		if (t == handler) {
 			*pprev = handler->next;
 			ret = 0;
 			break;
-- 
cgit v1.2.3


From 7a2b03c5175e9ddcc2a2d48ca86dea8a88b68383 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 09:24:55 +0000
Subject: fib_rules: __rcu annotates ctarget

Adds __rcu annotation to (struct fib_rule)->ctarget

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |  2 +-
 net/core/fib_rules.c    | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 106f3097d384..075f1e3a0fed 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -20,7 +20,7 @@ struct fib_rule {
 	u32			table;
 	u8			action;
 	u32			target;
-	struct fib_rule *	ctarget;
+	struct fib_rule __rcu	*ctarget;
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
 	struct rcu_head		rcu;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 12b43cc2f889..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 		list_for_each_entry(r, &ops->rules_list, list) {
 			if (r->pref == rule->target) {
-				rule->ctarget = r;
+				RCU_INIT_POINTER(rule->ctarget, r);
 				break;
 			}
 		}
 
-		if (rule->ctarget == NULL)
+		if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
 			unresolved = 1;
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
@@ -386,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		list_for_each_entry(r, &ops->rules_list, list) {
 			if (r->action == FR_ACT_GOTO &&
 			    r->target == rule->pref) {
-				BUG_ON(r->ctarget != NULL);
+				BUG_ON(rtnl_dereference(r->ctarget) != NULL);
 				rcu_assign_pointer(r->ctarget, rule);
 				if (--ops->unresolved_rules == 0)
 					break;
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		 */
 		if (ops->nr_goto_rules > 0) {
 			list_for_each_entry(tmp, &ops->rules_list, list) {
-				if (tmp->ctarget == rule) {
+				if (rtnl_dereference(tmp->ctarget) == rule) {
 					rcu_assign_pointer(tmp->ctarget, NULL);
 					ops->unresolved_rules++;
 				}
@@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
-	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+	if (rule->action == FR_ACT_GOTO &&
+	    rcu_dereference_raw(rule->ctarget) == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
 
 	if (rule->iifname[0]) {
-- 
cgit v1.2.3


From b914c4ea929a4ba6fb97967800dc473c31552b98 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 Oct 2010 23:55:38 +0000
Subject: inetpeer: __rcu annotations

Adds __rcu annotations to inetpeer
	(struct inet_peer)->avl_left
	(struct inet_peer)->avl_right

This is a tedious cleanup, but removes one smp_wmb() from link_to_pool()
since we now use more self documenting rcu_assign_pointer().

Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in
all cases we dont need a memory barrier.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |   2 +-
 net/ipv4/inetpeer.c    | 138 ++++++++++++++++++++++++++++---------------------
 2 files changed, 81 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 417d0c894f29..fe239bfe5f7f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -15,7 +15,7 @@
 
 struct inet_peer {
 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
-	struct inet_peer	*avl_left, *avl_right;
+	struct inet_peer __rcu	*avl_left, *avl_right;
 	__be32			v4daddr;	/* peer's address */
 	__u32			avl_height;
 	struct list_head	unused;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
 #define node_height(x) x->avl_height
 
 #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
 static const struct inet_peer peer_fake_node = {
-	.avl_left	= peer_avl_empty,
-	.avl_right	= peer_avl_empty,
+	.avl_left	= peer_avl_empty_rcu,
+	.avl_right	= peer_avl_empty_rcu,
 	.avl_height	= 0
 };
 
 static struct {
-	struct inet_peer *root;
+	struct inet_peer __rcu *root;
 	spinlock_t	lock;
 	int		total;
 } peers = {
-	.root		= peer_avl_empty,
+	.root		= peer_avl_empty_rcu,
 	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
 	.total		= 0,
 };
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
-	struct inet_peer *u, **v;				\
+	struct inet_peer *u;					\
+	struct inet_peer __rcu **v;				\
 								\
 	stackptr = _stack;					\
 	*stackptr++ = &peers.root;				\
-	for (u = peers.root; u != peer_avl_empty; ) {		\
+	for (u = rcu_dereference_protected(peers.root,		\
+			lockdep_is_held(&peers.lock));		\
+	     u != peer_avl_empty; ) {				\
 		if (_daddr == u->v4daddr)			\
 			break;					\
 		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
 		else						\
 			v = &u->avl_right;			\
 		*stackptr++ = v;				\
-		u = *v;						\
+		u = rcu_dereference_protected(*v,		\
+			lockdep_is_held(&peers.lock));		\
 	}							\
 	u;							\
 })
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
 /* Called with local BH disabled and the pool lock held. */
 #define lookup_rightempty(start)				\
 ({								\
-	struct inet_peer *u, **v;				\
+	struct inet_peer *u;					\
+	struct inet_peer __rcu **v;				\
 	*stackptr++ = &start->avl_left;				\
 	v = &start->avl_left;					\
-	for (u = *v; u->avl_right != peer_avl_empty; ) {	\
+	for (u = rcu_dereference_protected(*v,			\
+			lockdep_is_held(&peers.lock));		\
+	     u->avl_right != peer_avl_empty_rcu; ) {		\
 		v = &u->avl_right;				\
 		*stackptr++ = v;				\
-		u = *v;						\
+		u = rcu_dereference_protected(*v,		\
+			lockdep_is_held(&peers.lock));		\
 	}							\
 	u;							\
 })
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
  * Variable names are the proof of operation correctness.
  * Look into mm/map_avl.c for more detail description of the ideas.
  */
-static void peer_avl_rebalance(struct inet_peer **stack[],
-		struct inet_peer ***stackend)
+static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
+		struct inet_peer __rcu ***stackend)
 {
-	struct inet_peer **nodep, *node, *l, *r;
+	struct inet_peer __rcu **nodep;
+	struct inet_peer *node, *l, *r;
 	int lh, rh;
 
 	while (stackend > stack) {
 		nodep = *--stackend;
-		node = *nodep;
-		l = node->avl_left;
-		r = node->avl_right;
+		node = rcu_dereference_protected(*nodep,
+				lockdep_is_held(&peers.lock));
+		l = rcu_dereference_protected(node->avl_left,
+				lockdep_is_held(&peers.lock));
+		r = rcu_dereference_protected(node->avl_right,
+				lockdep_is_held(&peers.lock));
 		lh = node_height(l);
 		rh = node_height(r);
 		if (lh > rh + 1) { /* l: RH+2 */
 			struct inet_peer *ll, *lr, *lrl, *lrr;
 			int lrh;
-			ll = l->avl_left;
-			lr = l->avl_right;
+			ll = rcu_dereference_protected(l->avl_left,
+				lockdep_is_held(&peers.lock));
+			lr = rcu_dereference_protected(l->avl_right,
+				lockdep_is_held(&peers.lock));
 			lrh = node_height(lr);
 			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
-				node->avl_left = lr;	/* lr: RH or RH+1 */
-				node->avl_right = r;	/* r: RH */
+				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
+				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
 				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-				l->avl_left = ll;	/* ll: RH+1 */
-				l->avl_right = node;	/* node: RH+1 or RH+2 */
+				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
+				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
 				l->avl_height = node->avl_height + 1;
-				*nodep = l;
+				RCU_INIT_POINTER(*nodep, l);
 			} else { /* ll: RH, lr: RH+1 */
-				lrl = lr->avl_left;	/* lrl: RH or RH-1 */
-				lrr = lr->avl_right;	/* lrr: RH or RH-1 */
-				node->avl_left = lrr;	/* lrr: RH or RH-1 */
-				node->avl_right = r;	/* r: RH */
+				lrl = rcu_dereference_protected(lr->avl_left,
+					lockdep_is_held(&peers.lock));	/* lrl: RH or RH-1 */
+				lrr = rcu_dereference_protected(lr->avl_right,
+					lockdep_is_held(&peers.lock));	/* lrr: RH or RH-1 */
+				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
+				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
 				node->avl_height = rh + 1; /* node: RH+1 */
-				l->avl_left = ll;	/* ll: RH */
-				l->avl_right = lrl;	/* lrl: RH or RH-1 */
+				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
+				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
 				l->avl_height = rh + 1;	/* l: RH+1 */
-				lr->avl_left = l;	/* l: RH+1 */
-				lr->avl_right = node;	/* node: RH+1 */
+				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
+				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
 				lr->avl_height = rh + 2;
-				*nodep = lr;
+				RCU_INIT_POINTER(*nodep, lr);
 			}
 		} else if (rh > lh + 1) { /* r: LH+2 */
 			struct inet_peer *rr, *rl, *rlr, *rll;
 			int rlh;
-			rr = r->avl_right;
-			rl = r->avl_left;
+			rr = rcu_dereference_protected(r->avl_right,
+				lockdep_is_held(&peers.lock));
+			rl = rcu_dereference_protected(r->avl_left,
+				lockdep_is_held(&peers.lock));
 			rlh = node_height(rl);
 			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
-				node->avl_right = rl;	/* rl: LH or LH+1 */
-				node->avl_left = l;	/* l: LH */
+				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
+				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
 				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-				r->avl_right = rr;	/* rr: LH+1 */
-				r->avl_left = node;	/* node: LH+1 or LH+2 */
+				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
+				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
 				r->avl_height = node->avl_height + 1;
-				*nodep = r;
+				RCU_INIT_POINTER(*nodep, r);
 			} else { /* rr: RH, rl: RH+1 */
-				rlr = rl->avl_right;	/* rlr: LH or LH-1 */
-				rll = rl->avl_left;	/* rll: LH or LH-1 */
-				node->avl_right = rll;	/* rll: LH or LH-1 */
-				node->avl_left = l;	/* l: LH */
+				rlr = rcu_dereference_protected(rl->avl_right,
+					lockdep_is_held(&peers.lock));	/* rlr: LH or LH-1 */
+				rll = rcu_dereference_protected(rl->avl_left,
+					lockdep_is_held(&peers.lock));	/* rll: LH or LH-1 */
+				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
+				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
 				node->avl_height = lh + 1; /* node: LH+1 */
-				r->avl_right = rr;	/* rr: LH */
-				r->avl_left = rlr;	/* rlr: LH or LH-1 */
+				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
+				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
 				r->avl_height = lh + 1;	/* r: LH+1 */
-				rl->avl_right = r;	/* r: LH+1 */
-				rl->avl_left = node;	/* node: LH+1 */
+				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
+				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
 				rl->avl_height = lh + 2;
-				*nodep = rl;
+				RCU_INIT_POINTER(*nodep, rl);
 			}
 		} else {
 			node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
 #define link_to_pool(n)						\
 do {								\
 	n->avl_height = 1;					\
-	n->avl_left = peer_avl_empty;				\
-	n->avl_right = peer_avl_empty;				\
-	smp_wmb(); /* lockless readers can catch us now */	\
-	**--stackptr = n;					\
+	n->avl_left = peer_avl_empty_rcu;			\
+	n->avl_right = peer_avl_empty_rcu;			\
+	/* lockless readers can catch us now */			\
+	rcu_assign_pointer(**--stackptr, n);			\
 	peer_avl_rebalance(stack, stackptr);			\
 } while (0)
 
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
 	 * We use refcnt=-1 to alert lockless readers this entry is deleted.
 	 */
 	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
-		struct inet_peer **stack[PEER_MAXDEPTH];
-		struct inet_peer ***stackptr, ***delp;
+		struct inet_peer __rcu **stack[PEER_MAXDEPTH];
+		struct inet_peer __rcu ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
-		if (p->avl_left == peer_avl_empty) {
+		if (p->avl_left == peer_avl_empty_rcu) {
 			*delp[0] = p->avl_right;
 			--stackptr;
 		} else {
 			/* look for a node to insert instead of p */
 			struct inet_peer *t;
 			t = lookup_rightempty(p);
-			BUG_ON(*stackptr[-1] != t);
+			BUG_ON(rcu_dereference_protected(*stackptr[-1],
+					lockdep_is_held(&peers.lock)) != t);
 			**--stackptr = t->avl_left;
 			/* t is removed, t->v4daddr > x->v4daddr for any
 			 * x in p->avl_left subtree.
 			 * Put t in the old place of p. */
-			*delp[0] = t;
+			RCU_INIT_POINTER(*delp[0], t);
 			t->avl_left = p->avl_left;
 			t->avl_right = p->avl_right;
 			t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
 	struct inet_peer *p;
-	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
 
 	/* Look up for the address quickly, lockless.
 	 * Because of a concurrent writer, we might not find an existing entry.
-- 
cgit v1.2.3


From a10c02036f82b1fa30d69a62f7c7d9a927e8adbc Mon Sep 17 00:00:00 2001
From: Amarnath Revanna <amarnath.bangalore.revanna@stericsson.com>
Date: Wed, 27 Oct 2010 08:34:39 +0000
Subject: caif-u5500: Adding shared memory include

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_shm.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/net/caif/caif_shm.h

(limited to 'include')

diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h
new file mode 100644
index 000000000000..5bcce55438cf
--- /dev/null
+++ b/include/net/caif/caif_shm.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef CAIF_SHM_H_
+#define CAIF_SHM_H_
+
+struct shmdev_layer {
+	u32 shm_base_addr;
+	u32 shm_total_sz;
+	u32 shm_id;
+	u32 shm_loopback;
+	void *hmbx;
+	int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg);
+	int (*pshmdev_mbxsetup) (void *pshmdrv_cb,
+				struct shmdev_layer *pshm_dev, void *pshm_drv);
+	struct net_device *pshm_netdev;
+};
+
+extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev);
+extern void caif_shmcore_remove(struct net_device *pshm_netdev);
+
+#endif
-- 
cgit v1.2.3


From c5b1f0d92c36851aca09ac6c7c0c4f9690ac14f3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Oct 2010 15:46:08 +0200
Subject: locks/nfsd: allocate file lock outside of spinlock

As suggested by Christoph Hellwig, this moves allocation
of new file locks out of generic_setlease into the
callers, nfs4_open_delegation and fcntl_setlease in order
to allow GFP_KERNEL allocations when lock_flocks has
become a spinlock.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/locks.c          | 36 ++++++++++++------------------------
 fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
 include/linux/fs.h  |  1 +
 3 files changed, 28 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index 8b2b6ad56a09..0391d2ff5a4e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -162,10 +162,11 @@ EXPORT_SYMBOL_GPL(unlock_flocks);
 static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+struct file_lock *locks_alloc_lock(void)
 {
 	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
+EXPORT_SYMBOL_GPL(locks_alloc_lock);
 
 void locks_release_private(struct file_lock *fl)
 {
@@ -1365,7 +1366,6 @@ int fcntl_getlease(struct file *filp)
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
-	struct file_lock *new_fl = NULL;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error, rdlease_count = 0, wrlease_count = 0;
@@ -1385,11 +1385,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	lease = *flp;
 
 	if (arg != F_UNLCK) {
-		error = -ENOMEM;
-		new_fl = locks_alloc_lock();
-		if (new_fl == NULL)
-			goto out;
-
 		error = -EAGAIN;
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
 			goto out;
@@ -1434,7 +1429,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 		goto out;
 	}
 
-	error = 0;
 	if (arg == F_UNLCK)
 		goto out;
 
@@ -1442,15 +1436,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	if (!leases_enable)
 		goto out;
 
-	locks_copy_lock(new_fl, lease);
-	locks_insert_lock(before, new_fl);
-
-	*flp = new_fl;
+	locks_insert_lock(before, lease);
 	return 0;
 
 out:
-	if (new_fl != NULL)
-		locks_free_lock(new_fl);
+	locks_free_lock(lease);
 	return error;
 }
 EXPORT_SYMBOL(generic_setlease);
@@ -1514,26 +1504,24 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
  */
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
-	struct file_lock fl, *flp = &fl;
+	struct file_lock *fl;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int error;
 
-	locks_init_lock(&fl);
-	error = lease_init(filp, arg, &fl);
-	if (error)
-		return error;
+	fl = lease_alloc(filp, arg);
+	if (IS_ERR(fl))
+		return PTR_ERR(fl);
 
 	lock_flocks();
-
-	error = __vfs_setlease(filp, arg, &flp);
+	error = __vfs_setlease(filp, arg, &fl);
 	if (error || arg == F_UNLCK)
 		goto out_unlock;
 
-	error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
+	error = fasync_helper(fd, filp, 1, &fl->fl_fasync);
 	if (error < 0) {
 		/* remove lease just inserted by setlease */
-		flp->fl_type = F_UNLCK | F_INPROGRESS;
-		flp->fl_break_time = jiffies - 10;
+		fl->fl_type = F_UNLCK | F_INPROGRESS;
+		fl->fl_break_time = jiffies - 10;
 		time_out_leases(inode);
 		goto out_unlock;
 	}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9019e8ec9dc8..56347e0ac88d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2614,7 +2614,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	struct nfs4_delegation *dp;
 	struct nfs4_stateowner *sop = stp->st_stateowner;
 	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
-	struct file_lock fl, *flp = &fl;
+	struct file_lock *fl;
 	int status, flag = 0;
 
 	flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2648,20 +2648,24 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 		flag = NFS4_OPEN_DELEGATE_NONE;
 		goto out;
 	}
-	locks_init_lock(&fl);
-	fl.fl_lmops = &nfsd_lease_mng_ops;
-	fl.fl_flags = FL_LEASE;
-	fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
-	fl.fl_end = OFFSET_MAX;
-	fl.fl_owner =  (fl_owner_t)dp;
-	fl.fl_file = find_readable_file(stp->st_file);
-	BUG_ON(!fl.fl_file);
-	fl.fl_pid = current->tgid;
+	status = -ENOMEM;
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto out;
+	locks_init_lock(fl);
+	fl->fl_lmops = &nfsd_lease_mng_ops;
+	fl->fl_flags = FL_LEASE;
+	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner =  (fl_owner_t)dp;
+	fl->fl_file = find_readable_file(stp->st_file);
+	BUG_ON(!fl->fl_file);
+	fl->fl_pid = current->tgid;
 
 	/* vfs_setlease checks to see if delegation should be handed out.
 	 * the lock_manager callbacks fl_mylease and fl_change are used
 	 */
-	if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) {
+	if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
 		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
 		unhash_delegation(dp);
 		flag = NFS4_OPEN_DELEGATE_NONE;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb20373d0b46..8d7de08ab546 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1113,6 +1113,7 @@ extern int fcntl_getlease(struct file *filp);
 
 /* fs/locks.c */
 extern void locks_init_lock(struct file_lock *);
+extern struct file_lock * locks_alloc_lock(void);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
-- 
cgit v1.2.3


From f7347ce4ee7c65415f84be915c018473e7076f31 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Oct 2010 12:38:12 -0400
Subject: fasync: re-organize fasync entry insertion to allow it under a
 spinlock

You currently cannot use "fasync_helper()" in an atomic environment to
insert a new fasync entry, because it will need to allocate the new
"struct fasync_struct".

Yet fcntl_setlease() wants to call this under lock_flocks(), which is in
the process of being converted from the BKL to a spinlock.

In order to fix this, this abstracts out the actual fasync list
insertion and the fasync allocations into functions of their own, and
teaches fs/locks.c to pre-allocate the fasync_struct entry.  That way
the actual list insertion can happen while holding the required
spinlock.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[bfields@redhat.com: rebase on top of my changes to Arnd's patch]
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 fs/fcntl.c         | 66 +++++++++++++++++++++++++++++++++++++++++-------------
 fs/locks.c         | 18 ++++++++++++++-
 include/linux/fs.h |  5 +++++
 3 files changed, 72 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f8cc34f542c3..dcdbc6f5c33b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head)
  * match the state "is the filp on a fasync list".
  *
  */
-static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
+int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 {
 	struct fasync_struct *fa, **fp;
 	int result = 0;
@@ -666,21 +666,28 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 	return result;
 }
 
+struct fasync_struct *fasync_alloc(void)
+{
+	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
+}
+
 /*
- * Add a fasync entry. Return negative on error, positive if
- * added, and zero if did nothing but change an existing one.
- *
- * NOTE! It is very important that the FASYNC flag always
- * match the state "is the filp on a fasync list".
+ * NOTE! This can be used only for unused fasync entries:
+ * entries that actually got inserted on the fasync list
+ * need to be released by rcu - see fasync_remove_entry.
  */
-static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+void fasync_free(struct fasync_struct *new)
 {
-	struct fasync_struct *new, *fa, **fp;
-	int result = 0;
+	kmem_cache_free(fasync_cache, new);
+}
 
-	new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
+/*
+ * Insert a new entry into the fasync list.  Return the pointer to the
+ * old one if we didn't use the new one.
+ */
+struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
+{
+        struct fasync_struct *fa, **fp;
 
 	spin_lock(&filp->f_lock);
 	spin_lock(&fasync_lock);
@@ -691,8 +698,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 		spin_lock_irq(&fa->fa_lock);
 		fa->fa_fd = fd;
 		spin_unlock_irq(&fa->fa_lock);
-
-		kmem_cache_free(fasync_cache, new);
 		goto out;
 	}
 
@@ -702,13 +707,42 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 	new->fa_fd = fd;
 	new->fa_next = *fapp;
 	rcu_assign_pointer(*fapp, new);
-	result = 1;
 	filp->f_flags |= FASYNC;
 
 out:
 	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
-	return result;
+	return fa;
+}
+
+/*
+ * Add a fasync entry. Return negative on error, positive if
+ * added, and zero if did nothing but change an existing one.
+ *
+ * NOTE! It is very important that the FASYNC flag always
+ * match the state "is the filp on a fasync list".
+ */
+static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+{
+	struct fasync_struct *new;
+
+	new = fasync_alloc();
+	if (!new)
+		return -ENOMEM;
+
+	/*
+	 * fasync_insert_entry() returns the old (update) entry if
+	 * it existed.
+	 *
+	 * So free the (unused) new entry and return 0 to let the
+	 * caller know that we didn't add any new fasync entries.
+	 */
+	if (fasync_insert_entry(fd, filp, fapp, new)) {
+		fasync_free(new);
+		return 0;
+	}
+
+	return 1;
 }
 
 /*
diff --git a/fs/locks.c b/fs/locks.c
index 0391d2ff5a4e..85fd9ce1abae 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1505,6 +1505,7 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
 	struct file_lock *fl;
+	struct fasync_struct *new;
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int error;
 
@@ -1512,12 +1513,25 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	if (IS_ERR(fl))
 		return PTR_ERR(fl);
 
+	new = fasync_alloc();
+	if (!new) {
+		locks_free_lock(fl);
+		return -ENOMEM;
+	}
 	lock_flocks();
 	error = __vfs_setlease(filp, arg, &fl);
 	if (error || arg == F_UNLCK)
 		goto out_unlock;
 
-	error = fasync_helper(fd, filp, 1, &fl->fl_fasync);
+	/*
+	 * fasync_insert_entry() returns the old entry if any.
+	 * If there was no old entry, then it used 'new' and
+	 * inserted it into the fasync list. Clear new so that
+	 * we don't release it here.
+	 */
+	if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
+		new = NULL;
+
 	if (error < 0) {
 		/* remove lease just inserted by setlease */
 		fl->fl_type = F_UNLCK | F_INPROGRESS;
@@ -1529,6 +1543,8 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
 	unlock_flocks();
+	if (new)
+		fasync_free(new);
 	return error;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8d7de08ab546..56285e5e1de4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1302,6 +1302,11 @@ struct fasync_struct {
 
 /* SMP safe fasync helpers: */
 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
+extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
+extern int fasync_remove_entry(struct file *, struct fasync_struct **);
+extern struct fasync_struct *fasync_alloc(void);
+extern void fasync_free(struct fasync_struct *);
+
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
 
-- 
cgit v1.2.3


From a8e23a291852cd7c4fb5ca696dbb93912185ad10 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Oct 2010 15:32:57 -0700
Subject: mm,x86: fix kmap_atomic_push vs ioremap_32.c

It appears i386 uses kmap_atomic infrastructure regardless of
CONFIG_HIGHMEM which results in a compile error when highmem is disabled.

Cure this by providing the needed few bits for both CONFIG_HIGHMEM and
CONFIG_X86_32.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 46 +++++++++++++++++++++++++---------------------
 mm/highmem.c            |  6 +++++-
 2 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 8a85ec109a3a..102f76be90da 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -37,27 +37,6 @@ extern unsigned long totalhigh_pages;
 
 void kmap_flush_unused(void);
 
-DECLARE_PER_CPU(int, __kmap_atomic_idx);
-
-static inline int kmap_atomic_idx_push(void)
-{
-	int idx = __get_cpu_var(__kmap_atomic_idx)++;
-#ifdef CONFIG_DEBUG_HIGHMEM
-	WARN_ON_ONCE(in_irq() && !irqs_disabled());
-	BUG_ON(idx > KM_TYPE_NR);
-#endif
-	return idx;
-}
-
-static inline int kmap_atomic_idx_pop(void)
-{
-	int idx = --__get_cpu_var(__kmap_atomic_idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
-	BUG_ON(idx < 0);
-#endif
-	return idx;
-}
-
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
@@ -95,6 +74,31 @@ static inline void __kunmap_atomic(void *addr)
 
 #endif /* CONFIG_HIGHMEM */
 
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+
+DECLARE_PER_CPU(int, __kmap_atomic_idx);
+
+static inline int kmap_atomic_idx_push(void)
+{
+	int idx = __get_cpu_var(__kmap_atomic_idx)++;
+#ifdef CONFIG_DEBUG_HIGHMEM
+	WARN_ON_ONCE(in_irq() && !irqs_disabled());
+	BUG_ON(idx > KM_TYPE_NR);
+#endif
+	return idx;
+}
+
+static inline int kmap_atomic_idx_pop(void)
+{
+	int idx = --__get_cpu_var(__kmap_atomic_idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(idx < 0);
+#endif
+	return idx;
+}
+
+#endif
+
 /*
  * Make both: kmap_atomic(page, idx) and kmap_atomic(page) work.
  */
diff --git a/mm/highmem.c b/mm/highmem.c
index 781e754a75ac..693394daa2ed 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,6 +29,11 @@
 #include <linux/kgdb.h>
 #include <asm/tlbflush.h>
 
+
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+DEFINE_PER_CPU(int, __kmap_atomic_idx);
+#endif
+
 /*
  * Virtual_count is not a pure "count".
  *  0 means that it is not mapped, and has not been mapped
@@ -43,7 +48,6 @@ unsigned long totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(totalhigh_pages);
 
 
-DEFINE_PER_CPU(int, __kmap_atomic_idx);
 EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
 
 unsigned int nr_free_highpages (void)
-- 
cgit v1.2.3


From 20273941f2129aa5a432796d98a276ed73d60782 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Oct 2010 15:32:58 -0700
Subject: mm: fix race in kunmap_atomic()

Christoph reported a nice splat which illustrated a race in the new stack
based kmap_atomic implementation.

The problem is that we pop our stack slot before we're completely done
resetting its state -- in particular clearing the PTE (sometimes that's
CONFIG_DEBUG_HIGHMEM).  If an interrupt happens before we actually clear
the PTE used for the last slot, that interrupt can reuse the slot in a
dirty state, which triggers a BUG in kmap_atomic().

Fix this by introducing kmap_atomic_idx() which reports the current slot
index without actually releasing it and use that to find the PTE and delay
the _pop() until after we're completely done.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Christoph Hellwig <hch@infradead.org>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/highmem.c              | 3 ++-
 arch/frv/mm/highmem.c              | 3 ++-
 arch/mips/mm/highmem.c             | 3 ++-
 arch/mn10300/include/asm/highmem.h | 4 +++-
 arch/powerpc/mm/highmem.c          | 4 +++-
 arch/sparc/mm/highmem.c            | 4 +++-
 arch/tile/mm/highmem.c             | 3 ++-
 arch/x86/mm/highmem_32.c           | 3 ++-
 arch/x86/mm/iomap_32.c             | 3 ++-
 include/linux/highmem.h            | 5 +++++
 10 files changed, 26 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index c00f119babbf..c435fd9e1da9 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -89,7 +89,7 @@ void __kunmap_atomic(void *kvaddr)
 	int idx, type;
 
 	if (kvaddr >= (void *)FIXADDR_START) {
-		type = kmap_atomic_idx_pop();
+		type = kmap_atomic_idx();
 		idx = type + KM_TYPE_NR * smp_processor_id();
 
 		if (cache_is_vivt())
@@ -101,6 +101,7 @@ void __kunmap_atomic(void *kvaddr)
 #else
 		(void) idx;  /* to kill a warning */
 #endif
+		kmap_atomic_idx_pop();
 	} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
 		/* this address was obtained through kmap_high_get() */
 		kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index 61088dcc1594..fd7fcd4c2e33 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(__kmap_atomic);
 
 void __kunmap_atomic(void *kvaddr)
 {
-	int type = kmap_atomic_idx_pop();
+	int type = kmap_atomic_idx();
 	switch (type) {
 	case 0:		__kunmap_atomic_primary(4, 6);	break;
 	case 1:		__kunmap_atomic_primary(5, 7);	break;
@@ -83,6 +83,7 @@ void __kunmap_atomic(void *kvaddr)
 	default:
 		BUG();
 	}
+	kmap_atomic_idx_pop();
 	pagefault_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 1e69b1fb4b85..3634c7ea06ac 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr)
 		return;
 	}
 
-	type = kmap_atomic_idx_pop();
+	type = kmap_atomic_idx();
 #ifdef CONFIG_DEBUG_HIGHMEM
 	{
 		int idx = type + KM_TYPE_NR * smp_processor_id();
@@ -89,6 +89,7 @@ void __kunmap_atomic(void *kvaddr)
 		local_flush_tlb_one(vaddr);
 	}
 #endif
+	kmap_atomic_idx_pop();
 	pagefault_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
index f577ba2268ca..e2155e686451 100644
--- a/arch/mn10300/include/asm/highmem.h
+++ b/arch/mn10300/include/asm/highmem.h
@@ -101,7 +101,7 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 		return;
 	}
 
-	type = kmap_atomic_idx_pop();
+	type = kmap_atomic_idx();
 
 #if HIGHMEM_DEBUG
 	{
@@ -119,6 +119,8 @@ static inline void __kunmap_atomic(unsigned long vaddr)
 		__flush_tlb_one(vaddr);
 	}
 #endif
+
+	kmap_atomic_idx_pop();
 	pagefault_enable();
 }
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index b0848b462bbc..e7450bdbe83a 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -62,7 +62,7 @@ void __kunmap_atomic(void *kvaddr)
 		return;
 	}
 
-	type = kmap_atomic_idx_pop();
+	type = kmap_atomic_idx();
 
 #ifdef CONFIG_DEBUG_HIGHMEM
 	{
@@ -79,6 +79,8 @@ void __kunmap_atomic(void *kvaddr)
 		local_flush_tlb_page(NULL, vaddr);
 	}
 #endif
+
+	kmap_atomic_idx_pop();
 	pagefault_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
index 5e50c09b7dce..4730eac0747b 100644
--- a/arch/sparc/mm/highmem.c
+++ b/arch/sparc/mm/highmem.c
@@ -75,7 +75,7 @@ void __kunmap_atomic(void *kvaddr)
 		return;
 	}
 
-	type = kmap_atomic_idx_pop();
+	type = kmap_atomic_idx();
 
 #ifdef CONFIG_DEBUG_HIGHMEM
 	{
@@ -104,6 +104,8 @@ void __kunmap_atomic(void *kvaddr)
 #endif
 	}
 #endif
+
+	kmap_atomic_idx_pop();
 	pagefault_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 8ef6595e162c..abb57331cf6e 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -241,7 +241,7 @@ void __kunmap_atomic(void *kvaddr)
 		pte_t pteval = *pte;
 		int idx, type;
 
-		type = kmap_atomic_idx_pop();
+		type = kmap_atomic_idx();
 		idx = type + KM_TYPE_NR*smp_processor_id();
 
 		/*
@@ -252,6 +252,7 @@ void __kunmap_atomic(void *kvaddr)
 		BUG_ON(!pte_present(pteval) && !pte_migrating(pteval));
 		kmap_atomic_unregister(pte_page(pteval), vaddr);
 		kpte_clear_flush(pte, vaddr);
+		kmap_atomic_idx_pop();
 	} else {
 		/* Must be a lowmem page */
 		BUG_ON(vaddr < PAGE_OFFSET);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index d723e369003c..b49962662101 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr)
 	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
 		int idx, type;
 
-		type = kmap_atomic_idx_pop();
+		type = kmap_atomic_idx();
 		idx = type + KM_TYPE_NR * smp_processor_id();
 
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -87,6 +87,7 @@ void __kunmap_atomic(void *kvaddr)
 		 * attributes or becomes a protected page in a hypervisor.
 		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
+		kmap_atomic_idx_pop();
 	}
 #ifdef CONFIG_DEBUG_HIGHMEM
 	else {
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 75a3d7f24a2c..7b179b499fa3 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -98,7 +98,7 @@ iounmap_atomic(void __iomem *kvaddr)
 	    vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
 		int idx, type;
 
-		type = kmap_atomic_idx_pop();
+		type = kmap_atomic_idx();
 		idx = type + KM_TYPE_NR * smp_processor_id();
 
 #ifdef CONFIG_DEBUG_HIGHMEM
@@ -111,6 +111,7 @@ iounmap_atomic(void __iomem *kvaddr)
 		 * attributes or becomes a protected page in a hypervisor.
 		 */
 		kpte_clear_flush(kmap_pte-idx, vaddr);
+		kmap_atomic_idx_pop();
 	}
 
 	pagefault_enable();
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 102f76be90da..e9138198e823 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -88,6 +88,11 @@ static inline int kmap_atomic_idx_push(void)
 	return idx;
 }
 
+static inline int kmap_atomic_idx(void)
+{
+	return __get_cpu_var(__kmap_atomic_idx) - 1;
+}
+
 static inline int kmap_atomic_idx_pop(void)
 {
 	int idx = --__get_cpu_var(__kmap_atomic_idx);
-- 
cgit v1.2.3


From aeec56e331c6d2750de02ef34b305338305ca690 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Wed, 27 Oct 2010 15:33:15 -0700
Subject: gpio: add driver for basic memory-mapped GPIO controllers

The basic GPIO controllers may be found in various on-board FPGA and ASIC
solutions that are used to control board's switches, LEDs, chip-selects,
Ethernet/USB PHY power, etc.

These controllers may not provide any means of pin setup
(in/out/open drain).

The driver supports:
- 8/16/32/64 bits registers;
- GPIO controllers with clear/set registers;
- GPIO controllers with a single "data" register;
- Big endian bits/GPIOs ordering (mostly used on PowerPC).

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Samuel Ortiz <sameo@linux.intel.com>,
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig            |   5 +
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/basic_mmio_gpio.c  | 297 ++++++++++++++++++++++++++++++++++++++++
 include/linux/basic_mmio_gpio.h |  20 +++
 4 files changed, 323 insertions(+)
 create mode 100644 drivers/gpio/basic_mmio_gpio.c
 create mode 100644 include/linux/basic_mmio_gpio.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 510aa2054544..e47ef94be379 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -70,6 +70,11 @@ config GPIO_MAX730X
 
 comment "Memory mapped GPIO expanders:"
 
+config GPIO_BASIC_MMIO
+	tristate "Basic memory-mapped GPIO controllers support"
+	help
+	  Say yes here to support basic memory-mapped GPIO controllers.
+
 config GPIO_IT8761E
 	tristate "IT8761E GPIO support"
 	depends on GPIOLIB
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index fc6019d93720..3ff0651fd173 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
 obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
+obj-$(CONFIG_GPIO_BASIC_MMIO)	+= basic_mmio_gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX730X)	+= max730x.o
 obj-$(CONFIG_GPIO_MAX7300)	+= max7300.o
diff --git a/drivers/gpio/basic_mmio_gpio.c b/drivers/gpio/basic_mmio_gpio.c
new file mode 100644
index 000000000000..3addea65894e
--- /dev/null
+++ b/drivers/gpio/basic_mmio_gpio.c
@@ -0,0 +1,297 @@
+/*
+ * Driver for basic memory-mapped GPIO controllers.
+ *
+ * Copyright 2008 MontaVista Software, Inc.
+ * Copyright 2008,2010 Anton Vorontsov <cbouatmailru@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * ....``.```~~~~````.`.`.`.`.```````'',,,.........`````......`.......
+ * ...``                                                         ```````..
+ * ..The simplest form of a GPIO controller that the driver supports is``
+ *  `.just a single "data" register, where GPIO state can be read and/or `
+ *    `,..written. ,,..``~~~~ .....``.`.`.~~.```.`.........``````.```````
+ *        `````````
+                                    ___
+_/~~|___/~|   . ```~~~~~~       ___/___\___     ,~.`.`.`.`````.~~...,,,,...
+__________|~$@~~~        %~    /o*o*o*o*o*o\   .. Implementing such a GPIO .
+o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
+                                                 `....trivial..'~`.```.```
+ *                                                    ```````
+ *  .```````~~~~`..`.``.``.
+ * .  The driver supports  `...       ,..```.`~~~```````````````....````.``,,
+ * .   big-endian notation, just`.  .. A bit more sophisticated controllers ,
+ *  . register the device with -be`. .with a pair of set/clear-bit registers ,
+ *   `.. suffix.  ```~~`````....`.`   . affecting the data register and the .`
+ *     ``.`.``...```                  ```.. output pins are also supported.`
+ *                        ^^             `````.`````````.,``~``~``~~``````
+ *                                                   .                  ^^
+ *   ,..`.`.`...````````````......`.`.`.`.`.`..`.`.`..
+ * .. The expectation is that in at least some cases .    ,-~~~-,
+ *  .this will be used with roll-your-own ASIC/FPGA .`     \   /
+ *  .logic in Verilog or VHDL. ~~~`````````..`````~~`       \ /
+ *  ..````````......```````````                             \o_
+ *                                                           |
+ *                              ^^                          / \
+ *
+ *           ...`````~~`.....``.`..........``````.`.``.```........``.
+ *            `  8, 16, 32 and 64 bits registers are supported, and``.
+ *            . the number of GPIOs is determined by the width of   ~
+ *             .. the registers. ,............```.`.`..`.`.~~~.`.`.`~
+ *               `.......````.```
+ */
+
+#include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/log2.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/basic_mmio_gpio.h>
+
+struct bgpio_chip {
+	struct gpio_chip gc;
+	void __iomem *reg_dat;
+	void __iomem *reg_set;
+	void __iomem *reg_clr;
+
+	/* Number of bits (GPIOs): <register width> * 8. */
+	int bits;
+
+	/*
+	 * Some GPIO controllers work with the big-endian bits notation,
+	 * e.g. in a 8-bits register, GPIO7 is the least significant bit.
+	 */
+	int big_endian_bits;
+
+	/*
+	 * Used to lock bgpio_chip->data. Also, this is needed to keep
+	 * shadowed and real data registers writes together.
+	 */
+	spinlock_t lock;
+
+	/* Shadowed data register to clear/set bits safely. */
+	unsigned long data;
+};
+
+static struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc)
+{
+	return container_of(gc, struct bgpio_chip, gc);
+}
+
+static unsigned long bgpio_in(struct bgpio_chip *bgc)
+{
+	switch (bgc->bits) {
+	case 8:
+		return __raw_readb(bgc->reg_dat);
+	case 16:
+		return __raw_readw(bgc->reg_dat);
+	case 32:
+		return __raw_readl(bgc->reg_dat);
+#if BITS_PER_LONG >= 64
+	case 64:
+		return __raw_readq(bgc->reg_dat);
+#endif
+	}
+	return -EINVAL;
+}
+
+static void bgpio_out(struct bgpio_chip *bgc, void __iomem *reg,
+		      unsigned long data)
+{
+	switch (bgc->bits) {
+	case 8:
+		__raw_writeb(data, reg);
+		return;
+	case 16:
+		__raw_writew(data, reg);
+		return;
+	case 32:
+		__raw_writel(data, reg);
+		return;
+#if BITS_PER_LONG >= 64
+	case 64:
+		__raw_writeq(data, reg);
+		return;
+#endif
+	}
+}
+
+static unsigned long bgpio_pin2mask(struct bgpio_chip *bgc, unsigned int pin)
+{
+	if (bgc->big_endian_bits)
+		return 1 << (bgc->bits - 1 - pin);
+	else
+		return 1 << pin;
+}
+
+static int bgpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct bgpio_chip *bgc = to_bgpio_chip(gc);
+
+	return bgpio_in(bgc) & bgpio_pin2mask(bgc, gpio);
+}
+
+static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct bgpio_chip *bgc = to_bgpio_chip(gc);
+	unsigned long mask = bgpio_pin2mask(bgc, gpio);
+	unsigned long flags;
+
+	if (bgc->reg_set) {
+		if (val)
+			bgpio_out(bgc, bgc->reg_set, mask);
+		else
+			bgpio_out(bgc, bgc->reg_clr, mask);
+		return;
+	}
+
+	spin_lock_irqsave(&bgc->lock, flags);
+
+	if (val)
+		bgc->data |= mask;
+	else
+		bgc->data &= ~mask;
+
+	bgpio_out(bgc, bgc->reg_dat, bgc->data);
+
+	spin_unlock_irqrestore(&bgc->lock, flags);
+}
+
+static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	return 0;
+}
+
+static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	bgpio_set(gc, gpio, val);
+	return 0;
+}
+
+static int __devinit bgpio_probe(struct platform_device *pdev)
+{
+	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	struct device *dev = &pdev->dev;
+	struct bgpio_pdata *pdata = dev_get_platdata(dev);
+	struct bgpio_chip *bgc;
+	struct resource *res_dat;
+	struct resource *res_set;
+	struct resource *res_clr;
+	resource_size_t dat_sz;
+	int bits;
+	int ret;
+
+	res_dat = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat");
+	if (!res_dat)
+		return -EINVAL;
+
+	dat_sz = resource_size(res_dat);
+	if (!is_power_of_2(dat_sz))
+		return -EINVAL;
+
+	bits = dat_sz * 8;
+	if (bits > BITS_PER_LONG)
+		return -EINVAL;
+
+	bgc = devm_kzalloc(dev, sizeof(*bgc), GFP_KERNEL);
+	if (!bgc)
+		return -ENOMEM;
+
+	bgc->reg_dat = devm_ioremap(dev, res_dat->start, dat_sz);
+	if (!bgc->reg_dat)
+		return -ENOMEM;
+
+	res_set = platform_get_resource_byname(pdev, IORESOURCE_MEM, "set");
+	res_clr = platform_get_resource_byname(pdev, IORESOURCE_MEM, "clr");
+	if (res_set && res_clr) {
+		if (resource_size(res_set) != resource_size(res_clr) ||
+				resource_size(res_set) != dat_sz)
+			return -EINVAL;
+
+		bgc->reg_set = devm_ioremap(dev, res_set->start, dat_sz);
+		bgc->reg_clr = devm_ioremap(dev, res_clr->start, dat_sz);
+		if (!bgc->reg_set || !bgc->reg_clr)
+			return -ENOMEM;
+	} else if (res_set || res_clr) {
+		return -EINVAL;
+	}
+
+	spin_lock_init(&bgc->lock);
+
+	bgc->bits = bits;
+	bgc->big_endian_bits = !strcmp(platid->name, "basic-mmio-gpio-be");
+	bgc->data = bgpio_in(bgc);
+
+	bgc->gc.ngpio = bits;
+	bgc->gc.direction_input = bgpio_dir_in;
+	bgc->gc.direction_output = bgpio_dir_out;
+	bgc->gc.get = bgpio_get;
+	bgc->gc.set = bgpio_set;
+	bgc->gc.dev = dev;
+	bgc->gc.label = dev_name(dev);
+
+	if (pdata)
+		bgc->gc.base = pdata->base;
+	else
+		bgc->gc.base = -1;
+
+	dev_set_drvdata(dev, bgc);
+
+	ret = gpiochip_add(&bgc->gc);
+	if (ret)
+		dev_err(dev, "gpiochip_add() failed: %d\n", ret);
+
+	return ret;
+}
+
+static int __devexit bgpio_remove(struct platform_device *pdev)
+{
+	struct bgpio_chip *bgc = dev_get_drvdata(&pdev->dev);
+
+	return gpiochip_remove(&bgc->gc);
+}
+
+static const struct platform_device_id bgpio_id_table[] = {
+	{ "basic-mmio-gpio", },
+	{ "basic-mmio-gpio-be", },
+	{},
+};
+MODULE_DEVICE_TABLE(platform, bgpio_id_table);
+
+static struct platform_driver bgpio_driver = {
+	.driver = {
+		.name = "basic-mmio-gpio",
+	},
+	.id_table = bgpio_id_table,
+	.probe = bgpio_probe,
+	.remove = __devexit_p(bgpio_remove),
+};
+
+static int __init bgpio_init(void)
+{
+	return platform_driver_register(&bgpio_driver);
+}
+module_init(bgpio_init);
+
+static void __exit bgpio_exit(void)
+{
+	platform_driver_unregister(&bgpio_driver);
+}
+module_exit(bgpio_exit);
+
+MODULE_DESCRIPTION("Driver for basic memory-mapped GPIO controllers");
+MODULE_AUTHOR("Anton Vorontsov <cbouatmailru@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h
new file mode 100644
index 000000000000..198087a16fc4
--- /dev/null
+++ b/include/linux/basic_mmio_gpio.h
@@ -0,0 +1,20 @@
+/*
+ * Basic memory-mapped GPIO controllers.
+ *
+ * Copyright 2008 MontaVista Software, Inc.
+ * Copyright 2008,2010 Anton Vorontsov <cbouatmailru@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __BASIC_MMIO_GPIO_H
+#define __BASIC_MMIO_GPIO_H
+
+struct bgpio_pdata {
+	int base;
+};
+
+#endif /* __BASIC_MMIO_GPIO_H */
-- 
cgit v1.2.3


From 09cd9527d621640d4dd231dff77b681e711d8e4b Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Wed, 27 Oct 2010 15:33:16 -0700
Subject: gpiolib: fix HAVE_GPIO_LIB leftovers in asm-generic/gpio.h

commit 7444a72effa632fcd8edc566f88 ("gpiolib: allow user-selection")
removed HAVE_GPIO_LIB Kconfig symbol, but the header file still uses the
name [to confuse readers wrt #ifdef/#else/#endif location].

The real Kconfig symbol nowadays is CONFIG_GPIOLIB.

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/gpio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 8ca18e26d7e3..ff5c66080c8c 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -210,7 +210,7 @@ extern void gpio_unexport(unsigned gpio);
 
 #endif	/* CONFIG_GPIO_SYSFS */
 
-#else	/* !CONFIG_HAVE_GPIO_LIB */
+#else	/* !CONFIG_GPIOLIB */
 
 static inline int gpio_is_valid(int number)
 {
@@ -239,7 +239,7 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	gpio_set_value(gpio, value);
 }
 
-#endif /* !CONFIG_HAVE_GPIO_LIB */
+#endif /* !CONFIG_GPIOLIB */
 
 #ifndef CONFIG_GPIO_SYSFS
 
-- 
cgit v1.2.3


From ead6db084392349ad33323b1bb2916058dd7e82b Mon Sep 17 00:00:00 2001
From: Miguel Gaio <miguel.gaio@efixo.com>
Date: Wed, 27 Oct 2010 15:33:18 -0700
Subject: gpio: add support for 74x164 serial-in/parallel-out 8-bit shift
 register

Add support for generic 74x164 serial-in/parallel-out 8-bits shift
register.  This driver can be used as a GPIO output expander.

[akpm@linux-foundation.org: remove unused local `refresh']
Signed-off-by: Miguel Gaio <miguel.gaio@efixo.com>
Signed-off-by: Juhos Gabor <juhosg@openwrt.org>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/74x164.c      | 182 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/gpio/Kconfig       |   8 ++
 drivers/gpio/Makefile      |   1 +
 include/linux/spi/74x164.h |  11 +++
 4 files changed, 202 insertions(+)
 create mode 100644 drivers/gpio/74x164.c
 create mode 100644 include/linux/spi/74x164.h

(limited to 'include')

diff --git a/drivers/gpio/74x164.c b/drivers/gpio/74x164.c
new file mode 100644
index 000000000000..d91ff4c282e9
--- /dev/null
+++ b/drivers/gpio/74x164.c
@@ -0,0 +1,182 @@
+/*
+ *  74Hx164 - Generic serial-in/parallel-out 8-bits shift register GPIO driver
+ *
+ *  Copyright (C) 2010 Gabor Juhos <juhosg@openwrt.org>
+ *  Copyright (C) 2010 Miguel Gaio <miguel.gaio@efixo.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/74x164.h>
+#include <linux/gpio.h>
+#include <linux/slab.h>
+
+#define GEN_74X164_GPIO_COUNT	8
+
+
+struct gen_74x164_chip {
+	struct spi_device	*spi;
+	struct gpio_chip	gpio_chip;
+	struct mutex		lock;
+	u8			port_config;
+};
+
+static void gen_74x164_set_value(struct gpio_chip *, unsigned, int);
+
+static struct gen_74x164_chip *gpio_to_chip(struct gpio_chip *gc)
+{
+	return container_of(gc, struct gen_74x164_chip, gpio_chip);
+}
+
+static int __gen_74x164_write_config(struct gen_74x164_chip *chip)
+{
+	return spi_write(chip->spi,
+			 &chip->port_config, sizeof(chip->port_config));
+}
+
+static int gen_74x164_direction_output(struct gpio_chip *gc,
+		unsigned offset, int val)
+{
+	gen_74x164_set_value(gc, offset, val);
+	return 0;
+}
+
+static int gen_74x164_get_value(struct gpio_chip *gc, unsigned offset)
+{
+	struct gen_74x164_chip *chip = gpio_to_chip(gc);
+	int ret;
+
+	mutex_lock(&chip->lock);
+	ret = (chip->port_config >> offset) & 0x1;
+	mutex_unlock(&chip->lock);
+
+	return ret;
+}
+
+static void gen_74x164_set_value(struct gpio_chip *gc,
+		unsigned offset, int val)
+{
+	struct gen_74x164_chip *chip = gpio_to_chip(gc);
+
+	mutex_lock(&chip->lock);
+	if (val)
+		chip->port_config |= (1 << offset);
+	else
+		chip->port_config &= ~(1 << offset);
+
+	__gen_74x164_write_config(chip);
+	mutex_unlock(&chip->lock);
+}
+
+static int __devinit gen_74x164_probe(struct spi_device *spi)
+{
+	struct gen_74x164_chip *chip;
+	struct gen_74x164_chip_platform_data *pdata;
+	int ret;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !pdata->base) {
+		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * bits_per_word cannot be configured in platform data
+	 */
+	spi->bits_per_word = 8;
+
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	mutex_init(&chip->lock);
+
+	dev_set_drvdata(&spi->dev, chip);
+
+	chip->spi = spi;
+
+	chip->gpio_chip.label = GEN_74X164_DRIVER_NAME,
+		chip->gpio_chip.direction_output = gen_74x164_direction_output;
+	chip->gpio_chip.get = gen_74x164_get_value;
+	chip->gpio_chip.set = gen_74x164_set_value;
+	chip->gpio_chip.base = pdata->base;
+	chip->gpio_chip.ngpio = GEN_74X164_GPIO_COUNT;
+	chip->gpio_chip.can_sleep = 1;
+	chip->gpio_chip.dev = &spi->dev;
+	chip->gpio_chip.owner = THIS_MODULE;
+
+	ret = __gen_74x164_write_config(chip);
+	if (ret) {
+		dev_err(&spi->dev, "Failed writing: %d\n", ret);
+		goto exit_destroy;
+	}
+
+	ret = gpiochip_add(&chip->gpio_chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(&spi->dev, NULL);
+	mutex_destroy(&chip->lock);
+	kfree(chip);
+	return ret;
+}
+
+static int gen_74x164_remove(struct spi_device *spi)
+{
+	struct gen_74x164_chip *chip;
+	int ret;
+
+	chip = dev_get_drvdata(&spi->dev);
+	if (chip == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(&spi->dev, NULL);
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (!ret) {
+		mutex_destroy(&chip->lock);
+		kfree(chip);
+	} else
+		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
+				ret);
+
+	return ret;
+}
+
+static struct spi_driver gen_74x164_driver = {
+	.driver = {
+		.name		= GEN_74X164_DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= gen_74x164_probe,
+	.remove		= __devexit_p(gen_74x164_remove),
+};
+
+static int __init gen_74x164_init(void)
+{
+	return spi_register_driver(&gen_74x164_driver);
+}
+subsys_initcall(gen_74x164_init);
+
+static void __exit gen_74x164_exit(void)
+{
+	spi_unregister_driver(&gen_74x164_driver);
+}
+module_exit(gen_74x164_exit);
+
+MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
+MODULE_AUTHOR("Miguel Gaio <miguel.gaio@efixo.com>");
+MODULE_DESCRIPTION("GPIO expander driver for 74X164 8-bits shift register");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index e47ef94be379..bc7b0fca6415 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -344,6 +344,14 @@ config GPIO_MC33880
 	  SPI driver for Freescale MC33880 high-side/low-side switch.
 	  This provides GPIO interface supporting inputs and outputs.
 
+config GPIO_74X164
+	tristate "74x164 serial-in/parallel-out 8-bits shift register"
+	depends on SPI_MASTER
+	help
+	  Platform driver for 74x164 compatible serial-in/parallel-out
+	  8-outputs shift registers. This driver can be used to provide access
+	  to more gpio outputs.
+
 comment "AC97 GPIO expanders:"
 
 config GPIO_UCB1400
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 3ff0651fd173..0c23a4dd45e5 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
+obj-$(CONFIG_GPIO_74X164)	+= 74x164.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PL061)	+= pl061.o
diff --git a/include/linux/spi/74x164.h b/include/linux/spi/74x164.h
new file mode 100644
index 000000000000..d85c52f294a0
--- /dev/null
+++ b/include/linux/spi/74x164.h
@@ -0,0 +1,11 @@
+#ifndef LINUX_SPI_74X164_H
+#define LINUX_SPI_74X164_H
+
+#define GEN_74X164_DRIVER_NAME "74x164"
+
+struct gen_74x164_chip_platform_data {
+	/* number assigned to the first GPIO */
+	unsigned	base;
+};
+
+#endif
-- 
cgit v1.2.3


From 459773ae8dbbd480886d186181c6bc2e8556025f Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 27 Oct 2010 15:33:19 -0700
Subject: gpio: adp5588-gpio: support interrupt controller

Implement irq_chip functionality on ADP5588/5587 GPIO expanders.  Only
level sensitive interrupts are supported.  Interrupts provided by this
irq_chip must be requested using request_threaded_irq().

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   7 ++
 drivers/gpio/adp5588-gpio.c | 277 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/i2c/adp5588.h |  15 +++
 3 files changed, 278 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index bc7b0fca6415..3f3181dac8d7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -272,6 +272,13 @@ config GPIO_ADP5588
 	  To compile this driver as a module, choose M here: the module will be
 	  called adp5588-gpio.
 
+config GPIO_ADP5588_IRQ
+	bool "Interrupt controller support for ADP5588"
+	depends on GPIO_ADP5588=y
+	help
+	  Say yes here to enable the adp5588 to be used as an interrupt
+	  controller. It requires the driver to be built in the kernel.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
index 2e8e9e24f887..0871f78af593 100644
--- a/drivers/gpio/adp5588-gpio.c
+++ b/drivers/gpio/adp5588-gpio.c
@@ -1,8 +1,8 @@
 /*
  * GPIO Chip driver for Analog Devices
- * ADP5588 I/O Expander and QWERTY Keypad Controller
+ * ADP5588/ADP5587 I/O Expander and QWERTY Keypad Controller
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2010 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -13,21 +13,34 @@
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 
 #include <linux/i2c/adp5588.h>
 
-#define DRV_NAME		"adp5588-gpio"
-#define MAXGPIO			18
-#define ADP_BANK(offs)		((offs) >> 3)
-#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+#define DRV_NAME	"adp5588-gpio"
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev)	((rev) < 4)
 
 struct adp5588_gpio {
 	struct i2c_client *client;
 	struct gpio_chip gpio_chip;
 	struct mutex lock;	/* protect cached dir, dat_out */
+	/* protect serialized access to the interrupt controller bus */
+	struct mutex irq_lock;
 	unsigned gpio_start;
+	unsigned irq_base;
 	uint8_t dat_out[3];
 	uint8_t dir[3];
+	uint8_t int_lvl[3];
+	uint8_t int_en[3];
+	uint8_t irq_mask[3];
+	uint8_t irq_stat[3];
 };
 
 static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
@@ -55,8 +68,8 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
 	struct adp5588_gpio *dev =
 	    container_of(chip, struct adp5588_gpio, gpio_chip);
 
-	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
-		  & ADP_BIT(off));
+	return !!(adp5588_gpio_read(dev->client,
+		  GPIO_DAT_STAT1 + ADP5588_BANK(off)) & ADP5588_BIT(off));
 }
 
 static void adp5588_gpio_set_value(struct gpio_chip *chip,
@@ -66,8 +79,8 @@ static void adp5588_gpio_set_value(struct gpio_chip *chip,
 	struct adp5588_gpio *dev =
 	    container_of(chip, struct adp5588_gpio, gpio_chip);
 
-	bank = ADP_BANK(off);
-	bit = ADP_BIT(off);
+	bank = ADP5588_BANK(off);
+	bit = ADP5588_BIT(off);
 
 	mutex_lock(&dev->lock);
 	if (val)
@@ -87,10 +100,10 @@ static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
 	struct adp5588_gpio *dev =
 	    container_of(chip, struct adp5588_gpio, gpio_chip);
 
-	bank = ADP_BANK(off);
+	bank = ADP5588_BANK(off);
 
 	mutex_lock(&dev->lock);
-	dev->dir[bank] &= ~ADP_BIT(off);
+	dev->dir[bank] &= ~ADP5588_BIT(off);
 	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
 	mutex_unlock(&dev->lock);
 
@@ -105,8 +118,8 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip,
 	struct adp5588_gpio *dev =
 	    container_of(chip, struct adp5588_gpio, gpio_chip);
 
-	bank = ADP_BANK(off);
-	bit = ADP_BIT(off);
+	bank = ADP5588_BANK(off);
+	bit = ADP5588_BIT(off);
 
 	mutex_lock(&dev->lock);
 	dev->dir[bank] |= bit;
@@ -125,6 +138,213 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip,
 	return ret;
 }
 
+#ifdef CONFIG_GPIO_ADP5588_IRQ
+static int adp5588_gpio_to_irq(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+		container_of(chip, struct adp5588_gpio, gpio_chip);
+	return dev->irq_base + off;
+}
+
+static void adp5588_irq_bus_lock(unsigned int irq)
+{
+	struct adp5588_gpio *dev = get_irq_chip_data(irq);
+	mutex_lock(&dev->irq_lock);
+}
+
+ /*
+  * genirq core code can issue chip->mask/unmask from atomic context.
+  * This doesn't work for slow busses where an access needs to sleep.
+  * bus_sync_unlock() is therefore called outside the atomic context,
+  * syncs the current irq mask state with the slow external controller
+  * and unlocks the bus.
+  */
+
+static void adp5588_irq_bus_sync_unlock(unsigned int irq)
+{
+	struct adp5588_gpio *dev = get_irq_chip_data(irq);
+	int i;
+
+	for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++)
+		if (dev->int_en[i] ^ dev->irq_mask[i]) {
+			dev->int_en[i] = dev->irq_mask[i];
+			adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i,
+					   dev->int_en[i]);
+		}
+
+	mutex_unlock(&dev->irq_lock);
+}
+
+static void adp5588_irq_mask(unsigned int irq)
+{
+	struct adp5588_gpio *dev = get_irq_chip_data(irq);
+	unsigned gpio = irq - dev->irq_base;
+
+	dev->irq_mask[ADP5588_BANK(gpio)] &= ~ADP5588_BIT(gpio);
+}
+
+static void adp5588_irq_unmask(unsigned int irq)
+{
+	struct adp5588_gpio *dev = get_irq_chip_data(irq);
+	unsigned gpio = irq - dev->irq_base;
+
+	dev->irq_mask[ADP5588_BANK(gpio)] |= ADP5588_BIT(gpio);
+}
+
+static int adp5588_irq_set_type(unsigned int irq, unsigned int type)
+{
+	struct adp5588_gpio *dev = get_irq_chip_data(irq);
+	uint16_t gpio = irq - dev->irq_base;
+	unsigned bank, bit;
+
+	if ((type & IRQ_TYPE_EDGE_BOTH)) {
+		dev_err(&dev->client->dev, "irq %d: unsupported type %d\n",
+			irq, type);
+		return -EINVAL;
+	}
+
+	bank = ADP5588_BANK(gpio);
+	bit = ADP5588_BIT(gpio);
+
+	if (type & IRQ_TYPE_LEVEL_HIGH)
+		dev->int_lvl[bank] |= bit;
+	else if (type & IRQ_TYPE_LEVEL_LOW)
+		dev->int_lvl[bank] &= ~bit;
+	else
+		return -EINVAL;
+
+	adp5588_gpio_direction_input(&dev->gpio_chip, gpio);
+	adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank,
+			   dev->int_lvl[bank]);
+
+	return 0;
+}
+
+static struct irq_chip adp5588_irq_chip = {
+	.name			= "adp5588",
+	.mask			= adp5588_irq_mask,
+	.unmask			= adp5588_irq_unmask,
+	.bus_lock		= adp5588_irq_bus_lock,
+	.bus_sync_unlock	= adp5588_irq_bus_sync_unlock,
+	.set_type		= adp5588_irq_set_type,
+};
+
+static int adp5588_gpio_read_intstat(struct i2c_client *client, u8 *buf)
+{
+	int ret = i2c_smbus_read_i2c_block_data(client, GPIO_INT_STAT1, 3, buf);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read INT_STAT Error\n");
+
+	return ret;
+}
+
+static irqreturn_t adp5588_irq_handler(int irq, void *devid)
+{
+	struct adp5588_gpio *dev = devid;
+	unsigned status, bank, bit, pending;
+	int ret;
+	status = adp5588_gpio_read(dev->client, INT_STAT);
+
+	if (status & ADP5588_GPI_INT) {
+		ret = adp5588_gpio_read_intstat(dev->client, dev->irq_stat);
+		if (ret < 0)
+			memset(dev->irq_stat, 0, ARRAY_SIZE(dev->irq_stat));
+
+		for (bank = 0; bank <= ADP5588_BANK(ADP5588_MAXGPIO);
+			bank++, bit = 0) {
+			pending = dev->irq_stat[bank] & dev->irq_mask[bank];
+
+			while (pending) {
+				if (pending & (1 << bit)) {
+					handle_nested_irq(dev->irq_base +
+							  (bank << 3) + bit);
+					pending &= ~(1 << bit);
+
+				}
+				bit++;
+			}
+		}
+	}
+
+	adp5588_gpio_write(dev->client, INT_STAT, status); /* Status is W1C */
+
+	return IRQ_HANDLED;
+}
+
+static int adp5588_irq_setup(struct adp5588_gpio *dev)
+{
+	struct i2c_client *client = dev->client;
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	unsigned gpio;
+	int ret;
+
+	adp5588_gpio_write(client, CFG, ADP5588_AUTO_INC);
+	adp5588_gpio_write(client, INT_STAT, -1); /* status is W1C */
+	adp5588_gpio_read_intstat(client, dev->irq_stat); /* read to clear */
+
+	dev->irq_base = pdata->irq_base;
+	mutex_init(&dev->irq_lock);
+
+	for (gpio = 0; gpio < dev->gpio_chip.ngpio; gpio++) {
+		int irq = gpio + dev->irq_base;
+		set_irq_chip_data(irq, dev);
+		set_irq_chip_and_handler(irq, &adp5588_irq_chip,
+					 handle_level_irq);
+		set_irq_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+		/*
+		 * ARM needs us to explicitly flag the IRQ as VALID,
+		 * once we do so, it will also set the noprobe.
+		 */
+		set_irq_flags(irq, IRQF_VALID);
+#else
+		set_irq_noprobe(irq);
+#endif
+	}
+
+	ret = request_threaded_irq(client->irq,
+				   NULL,
+				   adp5588_irq_handler,
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   dev_name(&client->dev), dev);
+	if (ret) {
+		dev_err(&client->dev, "failed to request irq %d\n",
+			client->irq);
+		goto out;
+	}
+
+	dev->gpio_chip.to_irq = adp5588_gpio_to_irq;
+	adp5588_gpio_write(client, CFG,
+		ADP5588_AUTO_INC | ADP5588_INT_CFG | ADP5588_GPI_INT);
+
+	return 0;
+
+out:
+	dev->irq_base = 0;
+	return ret;
+}
+
+static void adp5588_irq_teardown(struct adp5588_gpio *dev)
+{
+	if (dev->irq_base)
+		free_irq(dev->client->irq, dev);
+}
+
+#else
+static int adp5588_irq_setup(struct adp5588_gpio *dev)
+{
+	struct i2c_client *client = dev->client;
+	dev_warn(&client->dev, "interrupt support not compiled in\n");
+
+	return 0;
+}
+
+static void adp5588_irq_teardown(struct adp5588_gpio *dev)
+{
+}
+#endif /* CONFIG_GPIO_ADP5588_IRQ */
+
 static int __devinit adp5588_gpio_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
@@ -160,37 +380,46 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client,
 	gc->can_sleep = 1;
 
 	gc->base = pdata->gpio_start;
-	gc->ngpio = MAXGPIO;
+	gc->ngpio = ADP5588_MAXGPIO;
 	gc->label = client->name;
 	gc->owner = THIS_MODULE;
 
 	mutex_init(&dev->lock);
 
-
 	ret = adp5588_gpio_read(dev->client, DEV_ID);
 	if (ret < 0)
 		goto err;
 
 	revid = ret & ADP5588_DEVICE_ID_MASK;
 
-	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+	for (i = 0, ret = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) {
 		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
 		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
 		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
 		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
 				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
-
+		ret |= adp5588_gpio_write(client, GPIO_INT_EN1 + i, 0);
 		if (ret)
 			goto err;
 	}
 
+	if (pdata->irq_base) {
+		if (WA_DELAYED_READOUT_REVID(revid)) {
+			dev_warn(&client->dev, "GPIO int not supported\n");
+		} else {
+			ret = adp5588_irq_setup(dev);
+			if (ret)
+				goto err;
+		}
+	}
+
 	ret = gpiochip_add(&dev->gpio_chip);
 	if (ret)
-		goto err;
+		goto err_irq;
 
-	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+	dev_info(&client->dev, "gpios %d..%d (IRQ Base %d) on a %s Rev. %d\n",
 			gc->base, gc->base + gc->ngpio - 1,
-			client->name, revid);
+			pdata->irq_base, client->name, revid);
 
 	if (pdata->setup) {
 		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
@@ -199,8 +428,11 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client,
 	}
 
 	i2c_set_clientdata(client, dev);
+
 	return 0;
 
+err_irq:
+	adp5588_irq_teardown(dev);
 err:
 	kfree(dev);
 	return ret;
@@ -222,6 +454,9 @@ static int __devexit adp5588_gpio_remove(struct i2c_client *client)
 		}
 	}
 
+	if (dev->irq_base)
+		free_irq(dev->client->irq, dev);
+
 	ret = gpiochip_remove(&dev->gpio_chip);
 	if (ret) {
 		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index 269181b8f623..531376b77773 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -74,6 +74,20 @@
 
 #define ADP5588_DEVICE_ID_MASK	0xF
 
+ /* Configuration Register1 */
+#define ADP5588_AUTO_INC	(1 << 7)
+#define ADP5588_GPIEM_CFG	(1 << 6)
+#define ADP5588_INT_CFG		(1 << 4)
+#define ADP5588_GPI_IEN		(1 << 1)
+
+/* Interrupt Status Register */
+#define ADP5588_GPI_INT		(1 << 1)
+#define ADP5588_KE_INT		(1 << 0)
+
+#define ADP5588_MAXGPIO		18
+#define ADP5588_BANK(offs)	((offs) >> 3)
+#define ADP5588_BIT(offs)	(1u << ((offs) & 0x7))
+
 /* Put one of these structures in i2c_board_info platform_data */
 
 #define ADP5588_KEYMAPSIZE	80
@@ -128,6 +142,7 @@ struct adp5588_kpad_platform_data {
 
 struct adp5588_gpio_platform_data {
 	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned irq_base;		/* interrupt base # */
 	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
 	int	(*setup)(struct i2c_client *client,
 				int gpio, unsigned ngpio,
-- 
cgit v1.2.3


From 9ef8c8c51a7d76bae73e0259c356b24533b6b7c0 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 27 Oct 2010 15:33:20 -0700
Subject: gpio: adp5588-gpio: gpio_start must be signed

Common code interprets this as a signed value (a negative value is used to
request dynamic ID allocation), so make sure the platform data has proper
types to support that.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c/adp5588.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index 531376b77773..bec05ed21766 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -141,9 +141,9 @@ struct adp5588_kpad_platform_data {
 };
 
 struct adp5588_gpio_platform_data {
-	unsigned gpio_start;		/* GPIO Chip base # */
-	unsigned irq_base;		/* interrupt base # */
-	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int gpio_start;		/* GPIO Chip base # */
+	unsigned irq_base;	/* interrupt base # */
+	unsigned pullup_dis_mask; /* Pull-Up Disable Mask */
 	int	(*setup)(struct i2c_client *client,
 				int gpio, unsigned ngpio,
 				void *context);
-- 
cgit v1.2.3


From dc5ae4f2f58cfa98b67d2be379fc99080a8967af Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 27 Oct 2010 15:33:20 -0700
Subject: gpio: adp5588-gpio: add i2c forward declaration

Some ADP5588 functions take a pointer to an i2c_client, but if the i2c
header doesn't happen to be included first, we hit the standard "struct
declared inside parameter list" warnings from gcc.  So add a simple
forward decl of the i2c_client struct.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c/adp5588.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index bec05ed21766..3c5d6b6e765c 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -140,6 +140,8 @@ struct adp5588_kpad_platform_data {
 	const struct adp5588_gpio_platform_data *gpio_data;
 };
 
+struct i2c_client; /* forward declaration */
+
 struct adp5588_gpio_platform_data {
 	int gpio_start;		/* GPIO Chip base # */
 	unsigned irq_base;	/* interrupt base # */
-- 
cgit v1.2.3


From f11b478d461b7113eb4603b3914aaf15b7788e87 Mon Sep 17 00:00:00 2001
From: James Hogan <james@albanarts.com>
Date: Wed, 27 Oct 2010 15:33:28 -0700
Subject: fbmem: fix fb_read, fb_write unaligned accesses

fb_{read,write} access the framebuffer using lots of fb_{read,write}l's
but don't check that the file position is aligned which can cause problems
on some architectures which do not support unaligned accesses.

Since the operations are essentially memcpy_{from,to}io, new
fb_memcpy_{from,to}fb macros have been defined and these are used instead.

For Sparc, fb_{read,write} macros use sbus_{read,write}, so this defines
new sbus_memcpy_{from,to}io functions the same as memcpy_{from,to}io but
using sbus_{read,write}b instead of {read,write}b.

Signed-off-by: James Hogan <james@albanarts.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/io_32.h | 31 ++++++++++++++++++++++++++++
 arch/sparc/include/asm/io_64.h | 31 ++++++++++++++++++++++++++++
 drivers/video/fbmem.c          | 46 +++++++++++++-----------------------------
 include/linux/fb.h             |  6 ++++++
 4 files changed, 82 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 2889574608db..c2ced21c9dc1 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -207,6 +207,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n)
 
 #define memset_io(d,c,sz)	_memset_io(d,c,sz)
 
+static inline void
+_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src,
+		    __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = sbus_readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+#define sbus_memcpy_fromio(d, s, sz)	_sbus_memcpy_fromio(d, s, sz)
+
 static inline void
 _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n)
 {
@@ -221,6 +236,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n)
 
 #define memcpy_fromio(d,s,sz)	_memcpy_fromio(d,s,sz)
 
+static inline void
+_sbus_memcpy_toio(volatile void __iomem *dst, const void *src,
+		  __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		sbus_writeb(tmp, d);
+		d++;
+	}
+}
+
+#define sbus_memcpy_toio(d, s, sz)	_sbus_memcpy_toio(d, s, sz)
+
 static inline void
 _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n)
 {
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 9517d063c79c..9c8965415f0a 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -418,6 +418,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n)
 
 #define memset_io(d,c,sz)	_memset_io(d,c,sz)
 
+static inline void
+_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src,
+		    __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = sbus_readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+#define sbus_memcpy_fromio(d, s, sz)	_sbus_memcpy_fromio(d, s, sz)
+
 static inline void
 _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n)
 {
@@ -432,6 +447,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n)
 
 #define memcpy_fromio(d,s,sz)	_memcpy_fromio(d,s,sz)
 
+static inline void
+_sbus_memcpy_toio(volatile void __iomem *dst, const void *src,
+		  __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		sbus_writeb(tmp, d);
+		d++;
+	}
+}
+
+#define sbus_memcpy_toio(d, s, sz)	_sbus_memcpy_toio(d, s, sz)
+
 static inline void
 _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n)
 {
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index a43442341ddd..0e6aa3d96a42 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -697,9 +697,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	int fbidx = iminor(inode);
 	struct fb_info *info = registered_fb[fbidx];
-	u32 *buffer, *dst;
-	u32 __iomem *src;
-	int c, i, cnt = 0, err = 0;
+	u8 *buffer, *dst;
+	u8 __iomem *src;
+	int c, cnt = 0, err = 0;
 	unsigned long total_size;
 
 	if (!info || ! info->screen_base)
@@ -730,7 +730,7 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	if (!buffer)
 		return -ENOMEM;
 
-	src = (u32 __iomem *) (info->screen_base + p);
+	src = (u8 __iomem *) (info->screen_base + p);
 
 	if (info->fbops->fb_sync)
 		info->fbops->fb_sync(info);
@@ -738,17 +738,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	while (count) {
 		c  = (count > PAGE_SIZE) ? PAGE_SIZE : count;
 		dst = buffer;
-		for (i = c >> 2; i--; )
-			*dst++ = fb_readl(src++);
-		if (c & 3) {
-			u8 *dst8 = (u8 *) dst;
-			u8 __iomem *src8 = (u8 __iomem *) src;
-
-			for (i = c & 3; i--;)
-				*dst8++ = fb_readb(src8++);
-
-			src = (u32 __iomem *) src8;
-		}
+		fb_memcpy_fromfb(dst, src, c);
+		dst += c;
+		src += c;
 
 		if (copy_to_user(buf, buffer, c)) {
 			err = -EFAULT;
@@ -772,9 +764,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	int fbidx = iminor(inode);
 	struct fb_info *info = registered_fb[fbidx];
-	u32 *buffer, *src;
-	u32 __iomem *dst;
-	int c, i, cnt = 0, err = 0;
+	u8 *buffer, *src;
+	u8 __iomem *dst;
+	int c, cnt = 0, err = 0;
 	unsigned long total_size;
 
 	if (!info || !info->screen_base)
@@ -811,7 +803,7 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	if (!buffer)
 		return -ENOMEM;
 
-	dst = (u32 __iomem *) (info->screen_base + p);
+	dst = (u8 __iomem *) (info->screen_base + p);
 
 	if (info->fbops->fb_sync)
 		info->fbops->fb_sync(info);
@@ -825,19 +817,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 			break;
 		}
 
-		for (i = c >> 2; i--; )
-			fb_writel(*src++, dst++);
-
-		if (c & 3) {
-			u8 *src8 = (u8 *) src;
-			u8 __iomem *dst8 = (u8 __iomem *) dst;
-
-			for (i = c & 3; i--; )
-				fb_writeb(*src8++, dst8++);
-
-			dst = (u32 __iomem *) dst8;
-		}
-
+		fb_memcpy_tofb(dst, src, c);
+		dst += c;
+		src += c;
 		*ppos += c;
 		buf += c;
 		cnt += c;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f0268deca658..7fca3dc4e475 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -931,6 +931,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 #define fb_writel sbus_writel
 #define fb_writeq sbus_writeq
 #define fb_memset sbus_memset_io
+#define fb_memcpy_fromfb sbus_memcpy_fromio
+#define fb_memcpy_tofb sbus_memcpy_toio
 
 #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__)
 
@@ -943,6 +945,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 #define fb_writel __raw_writel
 #define fb_writeq __raw_writeq
 #define fb_memset memset_io
+#define fb_memcpy_fromfb memcpy_fromio
+#define fb_memcpy_tofb memcpy_toio
 
 #else
 
@@ -955,6 +959,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 #define fb_writel(b,addr) (*(volatile u32 *) (addr) = (b))
 #define fb_writeq(b,addr) (*(volatile u64 *) (addr) = (b))
 #define fb_memset memset
+#define fb_memcpy_fromfb memcpy
+#define fb_memcpy_tofb memcpy
 
 #endif
 
-- 
cgit v1.2.3


From 97978e6d1f2da0073416870410459694fbdbfd9b Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Wed, 27 Oct 2010 15:33:35 -0700
Subject: cgroup: add clone_children control file

The ns_cgroup is a control group interacting with the namespaces.  When a
new namespace is created, a corresponding cgroup is automatically created
too.  The cgroup name is the pid of the process who did 'unshare' or the
child of 'clone'.

This cgroup is tied with the namespace because it prevents a process to
escape the control group and use the post_clone callback, so the child
cgroup inherits the values of the parent cgroup.

Unfortunately, the more we use this cgroup and the more we are facing
problems with it:

(1) when a process unshares, the cgroup name may conflict with a
    previous cgroup with the same pid, so unshare or clone return -EEXIST

(2) the cgroup creation is out of control because there may have an
    application creating several namespaces where the system will
    automatically create several cgroups in his back and let them on the
    cgroupfs (eg.  a vrf based on the network namespace).

(3) the mix of (1) and (2) force an administrator to regularly check
    and clean these cgroups.

This patchset removes the ns_cgroup by adding a new flag to the cgroup and
the cgroupfs mount option.  It enables the copy of the parent cgroup when
a child cgroup is created.  We can then safely remove the ns_cgroup as
this flag brings a compatibility.  We have now to manually create and add
the task to a cgroup, which is consistent with the cgroup framework.

This patch:

Sent as an answer to a previous thread around the ns_cgroup.

https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html

It adds a control file 'clone_children' for a cgroup.  This control file
is a boolean specifying if the child cgroup should be a clone of the
parent cgroup or not.  The default value is 'false'.

This flag makes the child cgroup to call the post_clone callback of all
the subsystem, if it is available.

At present, the cpuset is the only one which had implemented the
post_clone callback.

The option can be set at mount time by specifying the 'clone_children'
mount option.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jamal Hadi Salim <hadi@cyberus.ca>
Cc: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 14 ++++++++++++--
 include/linux/cgroup.h            |  4 ++++
 kernel/cgroup.c                   | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index b34823ff1646..190018b0c649 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -18,7 +18,8 @@ CONTENTS:
   1.2 Why are cgroups needed ?
   1.3 How are cgroups implemented ?
   1.4 What does notify_on_release do ?
-  1.5 How do I use cgroups ?
+  1.5 What does clone_children do ?
+  1.6 How do I use cgroups ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
   2.2 Attaching processes
@@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled
 value of their parents notify_on_release setting. The default value of
 a cgroup hierarchy's release_agent path is empty.
 
-1.5 How do I use cgroups ?
+1.5 What does clone_children do ?
+---------------------------------
+
+If the clone_children flag is enabled (1) in a cgroup, then all
+cgroups created beneath will call the post_clone callbacks for each
+subsystem of the newly created cgroup. Usually when this callback is
+implemented for a subsystem, it copies the values of the parent
+subsystem, this is the case for the cpuset.
+
+1.6 How do I use cgroups ?
 --------------------------
 
 To start a new job that is to be contained within a cgroup, using
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 709dfb901d11..ed4ba111bc8d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,10 @@ enum {
 	 * A thread in rmdir() is wating for this cgroup.
 	 */
 	CGRP_WAIT_ON_RMDIR,
+	/*
+	 * Clone cgroup values when creating a new child cgroup
+	 */
+	CGRP_CLONE_CHILDREN,
 };
 
 /* which pidlist file are we talking about? */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9270d532ec3c..4b218a46ddd3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 }
 
+static int clone_children(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+}
+
 /*
  * for_each_subsys() allows you to iterate on each subsystem attached to
  * an active hierarchy
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",noprefix");
 	if (strlen(root->release_agent_path))
 		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+	if (clone_children(&root->top_cgroup))
+		seq_puts(seq, ",clone_children");
 	if (strlen(root->name))
 		seq_printf(seq, ",name=%s", root->name);
 	mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
 	unsigned long subsys_bits;
 	unsigned long flags;
 	char *release_agent;
+	bool clone_children;
 	char *name;
 	/* User explicitly requested empty subsystem */
 	bool none;
@@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			opts->none = true;
 		} else if (!strcmp(token, "noprefix")) {
 			set_bit(ROOT_NOPREFIX, &opts->flags);
+		} else if (!strcmp(token, "clone_children")) {
+			opts->clone_children = true;
 		} else if (!strncmp(token, "release_agent=", 14)) {
 			/* Specifying two release agents is forbidden */
 			if (opts->release_agent)
@@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 		strcpy(root->release_agent_path, opts->release_agent);
 	if (opts->name)
 		strcpy(root->name, opts->name);
+	if (opts->clone_children)
+		set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
 	return root;
 }
 
@@ -3173,6 +3185,23 @@ fail:
 	return ret;
 }
 
+static u64 cgroup_clone_children_read(struct cgroup *cgrp,
+				    struct cftype *cft)
+{
+	return clone_children(cgrp);
+}
+
+static int cgroup_clone_children_write(struct cgroup *cgrp,
+				     struct cftype *cft,
+				     u64 val)
+{
+	if (val)
+		set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+	else
+		clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+	return 0;
+}
+
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -3203,6 +3232,11 @@ static struct cftype files[] = {
 		.write_string = cgroup_write_event_control,
 		.mode = S_IWUGO,
 	},
+	{
+		.name = "cgroup.clone_children",
+		.read_u64 = cgroup_clone_children_read,
+		.write_u64 = cgroup_clone_children_write,
+	},
 };
 
 static struct cftype cft_release_agent = {
@@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (notify_on_release(parent))
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 
+	if (clone_children(parent))
+		set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+
 	for_each_subsys(root, ss) {
 		struct cgroup_subsys_state *css = ss->create(ss, cgrp);
 
@@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 				goto err_destroy;
 		}
 		/* At error, ->destroy() callback has to free assigned ID. */
+		if (clone_children(parent) && ss->post_clone)
+			ss->post_clone(ss, cgrp);
 	}
 
 	cgroup_lock_hierarchy(root);
-- 
cgit v1.2.3


From 4abf986960ecda6a87fc2f795aacf888a2f0127e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Wed, 27 Oct 2010 15:33:45 -0700
Subject: ptrace: change signature of sys_ptrace() and friends

Since userspace API of ptrace syscall defines @addr and @data as void
pointers, it would be more appropriate to define them as unsigned long in
kernel.  Therefore related functions are changed also.

'unsigned long' is typically used in other places in kernel as an opaque
data type and that using this helps cleaning up a lot of warnings from
sparse.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h   |  9 ++++++---
 include/linux/syscalls.h |  3 ++-
 kernel/ptrace.c          | 16 ++++++++++------
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 4272521e29e9..67a4cd77c352 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -108,7 +108,8 @@ extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
-extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
+extern int ptrace_request(struct task_struct *child, long request,
+			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
@@ -132,8 +133,10 @@ static inline void ptrace_unlink(struct task_struct *child)
 		__ptrace_unlink(child);
 }
 
-int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data);
-int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
+int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
+			    unsigned long data);
+int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
+			    unsigned long data);
 
 /**
  * task_ptrace - return %PT_* flags that apply to a task
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e6319d18a55d..cacc27a0e285 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -701,7 +701,8 @@ asmlinkage long sys_nfsservctl(int cmd,
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
 asmlinkage long sys_uselib(const char __user *library);
 asmlinkage long sys_ni_syscall(void);
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data);
+asmlinkage long sys_ptrace(long request, long pid, unsigned long addr,
+			   unsigned long data);
 
 asmlinkage long sys_add_key(const char __user *_type,
 			    const char __user *_description,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4afd9b86cc0b..06981a8b271b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -404,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
 	return copied;
 }
 
-static int ptrace_setoptions(struct task_struct *child, long data)
+static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 {
 	child->ptrace &= ~PT_TRACE_MASK;
 
@@ -483,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
 #define is_sysemu_singlestep(request)	0
 #endif
 
-static int ptrace_resume(struct task_struct *child, long request, long data)
+static int ptrace_resume(struct task_struct *child, long request,
+			 unsigned long data)
 {
 	if (!valid_signal(data))
 		return -EIO;
@@ -560,7 +561,7 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
 #endif
 
 int ptrace_request(struct task_struct *child, long request,
-		   long addr, long data)
+		   unsigned long addr, unsigned long data)
 {
 	int ret = -EIO;
 	siginfo_t siginfo;
@@ -693,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid)
 #define arch_ptrace_attach(child)	do { } while (0)
 #endif
 
-SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
+SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
+		unsigned long, data)
 {
 	struct task_struct *child;
 	long ret;
@@ -734,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
 	return ret;
 }
 
-int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
+int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
+			    unsigned long data)
 {
 	unsigned long tmp;
 	int copied;
@@ -745,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
 	return put_user(tmp, (unsigned long __user *)data);
 }
 
-int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
+int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
+			    unsigned long data)
 {
 	int copied;
 
-- 
cgit v1.2.3


From 9b05a69e0534ec70bc94921936ffa05b330507cb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Wed, 27 Oct 2010 15:33:47 -0700
Subject: ptrace: change signature of arch_ptrace()

Fix up the arguments to arch_ptrace() to take account of the fact that
@addr and @data are now unsigned long rather than long as of a preceding
patch in this series.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: <linux-arch@vger.kernel.org>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/ptrace.c         |  7 ++++---
 arch/arm/kernel/ptrace.c           |  3 ++-
 arch/avr32/kernel/ptrace.c         |  3 ++-
 arch/blackfin/kernel/ptrace.c      |  3 ++-
 arch/cris/arch-v10/kernel/ptrace.c |  7 ++++---
 arch/cris/arch-v32/kernel/ptrace.c |  3 ++-
 arch/frv/kernel/ptrace.c           |  3 ++-
 arch/h8300/kernel/ptrace.c         |  7 ++++---
 arch/ia64/kernel/ptrace.c          |  3 ++-
 arch/m32r/kernel/ptrace.c          |  3 ++-
 arch/m68k/kernel/ptrace.c          |  9 +++++----
 arch/m68knommu/kernel/ptrace.c     |  7 ++++---
 arch/microblaze/kernel/ptrace.c    |  3 ++-
 arch/mips/kernel/ptrace.c          |  3 ++-
 arch/mn10300/kernel/ptrace.c       |  3 ++-
 arch/parisc/kernel/ptrace.c        | 11 ++++++-----
 arch/powerpc/kernel/ptrace.c       | 15 ++++++++-------
 arch/s390/kernel/ptrace.c          |  3 ++-
 arch/score/kernel/ptrace.c         |  3 ++-
 arch/sh/kernel/ptrace_32.c         | 21 +++++++++++----------
 arch/sh/kernel/ptrace_64.c         |  6 ++++--
 arch/sparc/kernel/ptrace_32.c      |  3 ++-
 arch/sparc/kernel/ptrace_64.c      |  7 ++++---
 arch/tile/kernel/ptrace.c          |  9 ++++++---
 arch/um/kernel/ptrace.c            |  5 +++--
 arch/um/sys-i386/ptrace.c          |  4 ++--
 arch/um/sys-x86_64/ptrace.c        |  4 ++--
 arch/x86/kernel/ptrace.c           |  7 ++++---
 arch/xtensa/kernel/ptrace.c        |  3 ++-
 include/linux/ptrace.h             |  3 ++-
 30 files changed, 101 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index baa903602f6a..e2af5eb59bb4 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -269,7 +269,8 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long tmp;
 	size_t copied;
@@ -292,7 +293,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	case PTRACE_PEEKUSR:
 		force_successful_syscall_return();
 		ret = get_reg(child, addr);
-		DBG(DBG_MEM, ("peek $%ld->%#lx\n", addr, ret));
+		DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret));
 		break;
 
 	/* When I and D space are separate, this will have to be fixed.  */
@@ -302,7 +303,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	case PTRACE_POKEUSR: /* write the specified register */
-		DBG(DBG_MEM, ("poke $%ld<-%#lx\n", addr, data));
+		DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data));
 		ret = put_reg(child, addr, data);
 		break;
 	default:
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index e0cb6370ed14..9bca6165459e 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -1075,7 +1075,8 @@ out:
 }
 #endif
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index 5e73c25f8f85..ecea9b6bfab4 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -146,7 +146,8 @@ static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs)
 	return ret;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index b35839354130..8e3083ccd88a 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -240,7 +240,8 @@ void user_disable_single_step(struct task_struct *child)
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index e70c804e9377..d411e024e05f 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -76,7 +76,8 @@ ptrace_disable(struct task_struct *child)
  * (in user space) where the result of the ptrace call is written (instead of
  * being returned).
  */
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
@@ -141,7 +142,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 					break;
 				}
 				
-				data += sizeof(long);
+				data += sizeof(unsigned long);
 			}
 
 			break;
@@ -165,7 +166,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				}
 				
 				put_reg(child, i, tmp);
-				data += sizeof(long);
+				data += sizeof(unsigned long);
 			}
 			
 			break;
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index f4ebd1e7d0f5..3e058a121753 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -126,7 +126,8 @@ ptrace_disable(struct task_struct *child)
 }
 
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index fac028936a04..e9dbfad93589 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -254,7 +254,8 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long tmp;
 	int ret;
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
index df114122ebdf..ef1aa0b8b20f 100644
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -50,7 +50,8 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
@@ -120,7 +121,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				ret = -EFAULT;
 				break;
 			    }
-			    data += sizeof(long);
+			    data += sizeof(unsigned long);
 			}
 			ret = 0;
 			break;
@@ -135,7 +136,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				break;
 			    }
 			    h8300_put_reg(child, i, tmp);
-			    data += sizeof(long);
+			    data += sizeof(unsigned long);
 			}
 			ret = 0;
 			break;
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 7c7909f9bc93..8848f43d819e 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1177,7 +1177,8 @@ ptrace_disable (struct task_struct *child)
 }
 
 long
-arch_ptrace (struct task_struct *child, long request, long addr, long data)
+arch_ptrace (struct task_struct *child, long request,
+	     unsigned long addr, unsigned long data)
 {
 	switch (request) {
 	case PTRACE_PEEKTEXT:
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 0021ade4cba8..5e00e0a41fff 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -622,7 +622,8 @@ void ptrace_disable(struct task_struct *child)
 }
 
 long
-arch_ptrace(struct task_struct *child, long request, long addr, long data)
+arch_ptrace(struct task_struct *child, long request,
+	    unsigned long addr, unsigned long data)
 {
 	int ret;
 
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 616e59752c29..583f59fcc363 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -156,7 +156,8 @@ void user_disable_single_step(struct task_struct *child)
 	singlestep_disable(child);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long tmp;
 	int i, ret = 0;
@@ -200,7 +201,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			 * into internal fpu reg representation
 			 */
 			if (FPU_IS_EMU && (addr < 45) && !(addr % 3)) {
-				data = (unsigned long)data << 15;
+				data <<= 15;
 				data = (data & 0xffff0000) |
 				       ((data & 0x0000ffff) >> 1);
 			}
@@ -215,7 +216,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			ret = put_user(tmp, (unsigned long *)data);
 			if (ret)
 				break;
-			data += sizeof(long);
+			data += sizeof(unsigned long);
 		}
 		break;
 
@@ -229,7 +230,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				tmp |= get_reg(child, PT_SR) & ~SR_MASK;
 			}
 			put_reg(child, i, tmp);
-			data += sizeof(long);
+			data += sizeof(unsigned long);
 		}
 		break;
 
diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c
index 6fe7c38cd556..7dbb08f5534e 100644
--- a/arch/m68knommu/kernel/ptrace.c
+++ b/arch/m68knommu/kernel/ptrace.c
@@ -112,7 +112,8 @@ void ptrace_disable(struct task_struct *child)
 	user_disable_single_step(child);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
@@ -184,7 +185,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				ret = -EFAULT;
 				break;
 			    }
-			    data += sizeof(long);
+			    data += sizeof(unsigned long);
 			}
 			ret = 0;
 			break;
@@ -204,7 +205,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				tmp |= get_reg(child, PT_SR) & ~(SR_MASK << 16);
 			    }
 			    put_reg(child, i, tmp);
-			    data += sizeof(long);
+			    data += sizeof(unsigned long);
 			}
 			ret = 0;
 			break;
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index dc03ffc8174a..3544bc157406 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -73,7 +73,8 @@ static microblaze_reg_t *reg_save_addr(unsigned reg_offs,
 	return (microblaze_reg_t *)((char *)regs + reg_offs);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int rval;
 	unsigned long val = 0;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index c8777333e198..95c3ae8b198c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -255,7 +255,8 @@ int ptrace_set_watch_regs(struct task_struct *child,
 	return 0;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index cf847dabc1bd..ec4b41439e99 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -295,7 +295,8 @@ void ptrace_disable(struct task_struct *child)
 /*
  * handle the arch-specific side of process tracing
  */
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long tmp;
 	int ret;
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index c4f49e45129d..03920db4af45 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -110,7 +110,8 @@ void user_enable_block_step(struct task_struct *task)
 	pa_psw(task)->l = 0;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long tmp;
 	long ret = -EIO;
@@ -120,8 +121,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	/* Read the word at location addr in the USER area.  For ptraced
 	   processes, the kernel saves all regs on a syscall. */
 	case PTRACE_PEEKUSR:
-		if ((addr & (sizeof(long)-1)) ||
-		    (unsigned long) addr >= sizeof(struct pt_regs))
+		if ((addr & (sizeof(unsigned long)-1)) ||
+		     addr >= sizeof(struct pt_regs))
 			break;
 		tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
 		ret = put_user(tmp, (unsigned long *) data);
@@ -151,8 +152,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 		}
 
-		if ((addr & (sizeof(long)-1)) ||
-		    (unsigned long) addr >= sizeof(struct pt_regs))
+		if ((addr & (sizeof(unsigned long)-1)) ||
+		     addr >= sizeof(struct pt_regs))
 			break;
 		if ((addr >= PT_GR1 && addr <= PT_GR31) ||
 				addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 286d9783d93f..136763568a7b 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1406,8 +1406,8 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
  * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
  * we mark them as obsolete now, they will be removed in a future version
  */
-static long arch_ptrace_old(struct task_struct *child, long request, long addr,
-			    long data)
+static long arch_ptrace_old(struct task_struct *child, long request,
+			    unsigned long addr, unsigned long data)
 {
 	switch (request) {
 	case PPC_PTRACE_GETREGS:	/* Get GPRs 0 - 31. */
@@ -1434,7 +1434,8 @@ static long arch_ptrace_old(struct task_struct *child, long request, long addr,
 	return -EPERM;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret = -EPERM;
 
@@ -1446,11 +1447,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = -EIO;
 		/* convert to index and check */
 #ifdef CONFIG_PPC32
-		index = (unsigned long) addr >> 2;
+		index = addr >> 2;
 		if ((addr & 3) || (index > PT_FPSCR)
 		    || (child->thread.regs == NULL))
 #else
-		index = (unsigned long) addr >> 3;
+		index = addr >> 3;
 		if ((addr & 7) || (index > PT_FPSCR))
 #endif
 			break;
@@ -1474,11 +1475,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = -EIO;
 		/* convert to index and check */
 #ifdef CONFIG_PPC32
-		index = (unsigned long) addr >> 2;
+		index = addr >> 2;
 		if ((addr & 3) || (index > PT_FPSCR)
 		    || (child->thread.regs == NULL))
 #else
-		index = (unsigned long) addr >> 3;
+		index = addr >> 3;
 		if ((addr & 7) || (index > PT_FPSCR))
 #endif
 			break;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 83339d33c4b1..019bb714db49 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -343,7 +343,8 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 	return __poke_user(child, addr, data);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	ptrace_area parea; 
 	int copied, ret;
diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c
index 174c6422b096..894dcbf72099 100644
--- a/arch/score/kernel/ptrace.c
+++ b/arch/score/kernel/ptrace.c
@@ -325,7 +325,8 @@ void ptrace_disable(struct task_struct *child)
 }
 
 long
-arch_ptrace(struct task_struct *child, long request, long addr, long data)
+arch_ptrace(struct task_struct *child, long request,
+	    unsigned long addr, unsigned long data)
 {
 	int ret;
 	unsigned long __user *datap = (void __user *)data;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 2cd42b58cb20..34bf03745e86 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -365,7 +365,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 	return &user_sh_native_view;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	struct user * dummy = NULL;
 	unsigned long __user *datap = (unsigned long __user *)data;
@@ -383,17 +384,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 		if (addr < sizeof(struct pt_regs))
 			tmp = get_stack_long(child, addr);
-		else if (addr >= (long) &dummy->fpu &&
-			 addr < (long) &dummy->u_fpvalid) {
+		else if (addr >= (unsigned long) &dummy->fpu &&
+			 addr < (unsigned long) &dummy->u_fpvalid) {
 			if (!tsk_used_math(child)) {
-				if (addr == (long)&dummy->fpu.fpscr)
+				if (addr == (unsigned long)&dummy->fpu.fpscr)
 					tmp = FPSCR_INIT;
 				else
 					tmp = 0;
 			} else
-				tmp = ((long *)child->thread.xstate)
+				tmp = ((unsigned long *)child->thread.xstate)
 					[(addr - (long)&dummy->fpu) >> 2];
-		} else if (addr == (long) &dummy->u_fpvalid)
+		} else if (addr == (unsigned long) &dummy->u_fpvalid)
 			tmp = !!tsk_used_math(child);
 		else if (addr == PT_TEXT_ADDR)
 			tmp = child->mm->start_code;
@@ -417,13 +418,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 		if (addr < sizeof(struct pt_regs))
 			ret = put_stack_long(child, addr, data);
-		else if (addr >= (long) &dummy->fpu &&
-			 addr < (long) &dummy->u_fpvalid) {
+		else if (addr >= (unsigned long) &dummy->fpu &&
+			 addr < (unsigned long) &dummy->u_fpvalid) {
 			set_stopped_child_used_math(child);
-			((long *)child->thread.xstate)
+			((unsigned long *)child->thread.xstate)
 				[(addr - (long)&dummy->fpu) >> 2] = data;
 			ret = 0;
-		} else if (addr == (long) &dummy->u_fpvalid) {
+		} else if (addr == (unsigned long) &dummy->u_fpvalid) {
 			conditional_stopped_child_used_math(data, child);
 			ret = 0;
 		}
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index e0fb065914aa..4840716c196a 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -383,7 +383,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 	return &user_sh64_native_view;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 
@@ -471,7 +472,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	return ret;
 }
 
-asmlinkage int sh64_ptrace(long request, long pid, long addr, long data)
+asmlinkage int sh64_ptrace(long request, long pid,
+			   unsigned long addr, unsigned long data)
 {
 #define WPC_DBRMODE 0x0d104008
 	static unsigned long first_call;
diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c
index e608f397e11f..e08ba4a46acd 100644
--- a/arch/sparc/kernel/ptrace_32.c
+++ b/arch/sparc/kernel/ptrace_32.c
@@ -323,7 +323,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 	return &user_sparc32_view;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4];
 	const struct user_regset_view *view;
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index aa90da08bf61..d9db5a4dfef9 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -969,7 +969,8 @@ struct fps {
 	unsigned long fsr;
 };
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	const struct user_regset_view *view = task_user_regset_view(current);
 	unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4];
@@ -977,8 +978,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	struct fps __user *fps;
 	int ret;
 
-	pregs = (struct pt_regs __user *) (unsigned long) addr;
-	fps = (struct fps __user *) (unsigned long) addr;
+	pregs = (struct pt_regs __user *) addr;
+	fps = (struct fps __user *) addr;
 
 	switch (request) {
 	case PTRACE_PEEKUSR:
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index 5b20c2874d51..f634729211d1 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -45,7 +45,8 @@ void ptrace_disable(struct task_struct *child)
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	unsigned long __user *datap = (long __user __force *)data;
 	unsigned long tmp;
@@ -98,7 +99,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE))
 			break;
 		childregs = (long *)task_pt_regs(child);
-		for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) {
+		for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long);
+				++i) {
 			ret = __put_user(childregs[i], &datap[i]);
 			if (ret != 0)
 				break;
@@ -109,7 +111,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE))
 			break;
 		childregs = (long *)task_pt_regs(child);
-		for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) {
+		for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long);
+				++i) {
 			ret = __get_user(childregs[i], &datap[i]);
 			if (ret != 0)
 				break;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index e0510496596c..963d82bdec06 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -42,10 +42,11 @@ void ptrace_disable(struct task_struct *child)
 extern int peek_user(struct task_struct * child, long addr, long data);
 extern int poke_user(struct task_struct * child, long addr, long data);
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int i, ret;
-	unsigned long __user *p = (void __user *)(unsigned long)data;
+	unsigned long __user *p = (void __user *)data;
 
 	switch (request) {
 	/* read word at location addr. */
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index c9b176534d65..d23b2d3ea384 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -203,8 +203,8 @@ int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
 				     (unsigned long *) &fpregs);
 }
 
-long subarch_ptrace(struct task_struct *child, long request, long addr,
-		    long data)
+long subarch_ptrace(struct task_struct *child, long request,
+		    unsigned long addr, unsigned long data)
 {
 	return -EIO;
 }
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
index f3458d7d1c5a..67e63680df28 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -175,8 +175,8 @@ int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 	return restore_fp_registers(userspace_pid[cpu], fpregs);
 }
 
-long subarch_ptrace(struct task_struct *child, long request, long addr,
-		    long data)
+long subarch_ptrace(struct task_struct *child, long request,
+		    unsigned long addr, unsigned long data)
 {
 	int ret = -EIO;
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 70c4872cd8aa..1a7ca045920d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child)
 static const struct user_regset_view user_x86_32_view; /* Initialized below. */
 #endif
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
@@ -888,14 +889,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 	case PTRACE_GET_THREAD_AREA:
-		if (addr < 0)
+		if ((int) addr < 0)
 			return -EIO;
 		ret = do_get_thread_area(child, addr,
 					 (struct user_desc __user *) data);
 		break;
 
 	case PTRACE_SET_THREAD_AREA:
-		if (addr < 0)
+		if ((int) addr < 0)
 			return -EIO;
 		ret = do_set_thread_area(child, addr,
 					 (struct user_desc __user *) data, 0);
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 9d4e1ceb3f09..af9ba80f254c 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -256,7 +256,8 @@ int ptrace_pokeusr(struct task_struct *child, long regno, long val)
 	return 0;
 }
 
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
 {
 	int ret = -EPERM;
 
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67a4cd77c352..092a04f874a8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -100,7 +100,8 @@
 #include <linux/sched.h>		/* For struct task_struct.  */
 
 
-extern long arch_ptrace(struct task_struct *child, long request, long addr, long data);
+extern long arch_ptrace(struct task_struct *child, long request,
+			unsigned long addr, unsigned long data);
 extern int ptrace_traceme(void);
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
-- 
cgit v1.2.3


From b8ed374e202e23caaf9bd77dcadc9de6447faaa8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Wed, 27 Oct 2010 15:34:06 -0700
Subject: signals: annotate lock_task_sighand()

lock_task_sighand() grabs sighand->siglock in case of returning non-NULL
but unlock_task_sighand() releases it unconditionally.  This leads sparse
to complain about the lock context imbalance.  Rename and wrap
lock_task_sighand() using __cond_lock() macro to make sparse happy.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 ++++++++-
 kernel/signal.c       | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 393ce94e54b7..3ff5c8519abd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2236,9 +2236,16 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 							unsigned long *flags);
 
+#define lock_task_sighand(tsk, flags)					\
+({	struct sighand_struct *__ss;					\
+	__cond_lock(&(tsk)->sighand->siglock,				\
+		    (__ss = __lock_task_sighand(tsk, flags)));		\
+	__ss;								\
+})									\
+
 static inline void unlock_task_sighand(struct task_struct *tsk,
 						unsigned long *flags)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index 919562c3d6b7..e921409b85a9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p)
 	return count;
 }
 
-struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
+struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
+					   unsigned long *flags)
 {
 	struct sighand_struct *sighand;
 
-- 
cgit v1.2.3


From 9b1bf12d5d51bca178dea21b04a0805e29d60cf1 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 27 Oct 2010 15:34:08 -0700
Subject: signals: move cred_guard_mutex from task_struct to signal_struct

Oleg Nesterov pointed out we have to prevent multiple-threads-inside-exec
itself and we can reuse ->cred_guard_mutex for it.  Yes, concurrent
execve() has no worth.

Let's move ->cred_guard_mutex from task_struct to signal_struct.  It
naturally prevent multiple-threads-inside-exec.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 10 +++++-----
 fs/proc/base.c            |  8 ++++----
 include/linux/init_task.h |  4 ++--
 include/linux/sched.h     |  7 ++++---
 include/linux/tracehook.h |  2 +-
 kernel/cred.c             |  4 +---
 kernel/fork.c             |  2 ++
 kernel/ptrace.c           |  4 ++--
 8 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 3aa75b8888a1..9722909c4d88 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1083,14 +1083,14 @@ EXPORT_SYMBOL(setup_new_exec);
  */
 int prepare_bprm_creds(struct linux_binprm *bprm)
 {
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
 		return -ERESTARTNOINTR;
 
 	bprm->cred = prepare_exec_creds();
 	if (likely(bprm->cred))
 		return 0;
 
-	mutex_unlock(&current->cred_guard_mutex);
+	mutex_unlock(&current->signal->cred_guard_mutex);
 	return -ENOMEM;
 }
 
@@ -1098,7 +1098,7 @@ void free_bprm(struct linux_binprm *bprm)
 {
 	free_arg_pages(bprm);
 	if (bprm->cred) {
-		mutex_unlock(&current->cred_guard_mutex);
+		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
 	kfree(bprm);
@@ -1119,13 +1119,13 @@ void install_exec_creds(struct linux_binprm *bprm)
 	 * credentials; any time after this it may be unlocked.
 	 */
 	security_bprm_committed_creds(bprm);
-	mutex_unlock(&current->cred_guard_mutex);
+	mutex_unlock(&current->signal->cred_guard_mutex);
 }
 EXPORT_SYMBOL(install_exec_creds);
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_guard_mutex to protect against
+ * - the caller must hold ->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
 int check_unsafe_exec(struct linux_binprm *bprm)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b094c1c8465..f3d02ca461ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
 {
 	struct mm_struct *mm;
 
-	if (mutex_lock_killable(&task->cred_guard_mutex))
+	if (mutex_lock_killable(&task->signal->cred_guard_mutex))
 		return NULL;
 
 	mm = get_task_mm(task);
@@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
 		mmput(mm);
 		mm = NULL;
 	}
-	mutex_unlock(&task->cred_guard_mutex);
+	mutex_unlock(&task->signal->cred_guard_mutex);
 
 	return mm;
 }
@@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 		goto out_free;
 
 	/* Guard against adverse ptrace interaction */
-	length = mutex_lock_interruptible(&task->cred_guard_mutex);
+	length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
 	if (length < 0)
 		goto out_free;
 
 	length = security_setprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
-	mutex_unlock(&task->cred_guard_mutex);
+	mutex_unlock(&task->signal->cred_guard_mutex);
 out_free:
 	free_page((unsigned long) page);
 out:
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2fea6c8ef6ba..1f8c06ce0fa6 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -29,6 +29,8 @@ extern struct fs_struct init_fs;
 		.running = 0,						\
 		.lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
 	},								\
+	.cred_guard_mutex =						\
+		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
 }
 
 extern struct nsproxy init_nsproxy;
@@ -145,8 +147,6 @@ extern struct cred init_cred;
 	.group_leader	= &tsk,						\
 	RCU_INIT_POINTER(.real_cred, &init_cred),			\
 	RCU_INIT_POINTER(.cred, &init_cred),				\
-	.cred_guard_mutex =						\
-		 __MUTEX_INITIALIZER(tsk.cred_guard_mutex),		\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ff5c8519abd..be7adb7588e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -626,6 +626,10 @@ struct signal_struct {
 
 	int oom_adj;		/* OOM kill score adjustment (bit shift) */
 	int oom_score_adj;	/* OOM kill score adjustment */
+
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
 };
 
 /* Context switch must be unlocked if interrupts are to be enabled */
@@ -1305,9 +1309,6 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred __rcu *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
-					 * credential calculations
-					 * (notably. ptrace) */
 	struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 10db0102a890..3a2e66d88a32 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -150,7 +150,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
  *
  * Return %LSM_UNSAFE_* bits applied to an exec because of tracing.
  *
- * @task->cred_guard_mutex is held by the caller through the do_execve().
+ * @task->signal->cred_guard_mutex is held by the caller through the do_execve().
  */
 static inline int tracehook_unsafe_exec(struct task_struct *task)
 {
diff --git a/kernel/cred.c b/kernel/cred.c
index 9a3e22641fe7..6a1aa004e376 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds);
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold current->cred_guard_mutex
+ * - The caller must hold ->cred_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
@@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct cred *new;
 	int ret;
 
-	mutex_init(&p->cred_guard_mutex);
-
 	if (
 #ifdef CONFIG_KEYS
 		!p->cred->thread_keyring &&
diff --git a/kernel/fork.c b/kernel/fork.c
index e87aaaaf5131..3b159c5991b7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->oom_adj = current->signal->oom_adj;
 	sig->oom_score_adj = current->signal->oom_score_adj;
 
+	mutex_init(&sig->cred_guard_mutex);
+
 	return 0;
 }
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ea7ce0215cd1..99bbaa3e5b0d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task)
 	 * under ptrace.
 	 */
 	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&task->cred_guard_mutex))
+	if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
 		goto out;
 
 	task_lock(task);
@@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task)
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
-	mutex_unlock(&task->cred_guard_mutex);
+	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
 	return retval;
 }
-- 
cgit v1.2.3


From f2c66cd8eeddedb440f33bc0f5cec1ed7ae376cb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 27 Oct 2010 15:34:13 -0700
Subject: /proc/stat: scalability of irq num per cpu

/proc/stat shows the total number of all interrupts to each cpu.  But when
the number of IRQs are very large, it take very long time and 'cat
/proc/stat' takes more than 10 secs.  This is because sum of all irq
events are counted when /proc/stat is read.  This patch adds "sum of all
irq" counter percpu and reduce read costs.

The cost of reading /proc/stat is important because it's used by major
applications as 'top', 'ps', 'w', etc....

A test on a mechin (4096cpu, 256 nodes, 4592 irqs) shows

 %time cat /proc/stat > /dev/null
 Before Patch:  12.627 sec
 After  Patch:  2.459 sec

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Jack Steiner <steiner@sgi.com>
Acked-by: Jack Steiner <steiner@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/stat.c              |  4 +---
 include/linux/kernel_stat.h | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc275..b80c620565bf 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -52,9 +52,7 @@ static int show_stat(struct seq_file *p, void *v)
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 		guest_nice = cputime64_add(guest_nice,
 			kstat_cpu(i).cpustat.guest_nice);
-		for_each_irq_nr(j) {
-			sum += kstat_irqs_cpu(j, i);
-		}
+		sum += kstat_cpu_irqs_sum(i);
 		sum += arch_irq_stat_cpu(i);
 
 		for (j = 0; j < NR_SOFTIRQS; j++) {
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c059044bc6dc..8b9b89085530 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -33,6 +33,7 @@ struct kernel_stat {
 #ifndef CONFIG_GENERIC_HARDIRQS
        unsigned int irqs[NR_IRQS];
 #endif
+	unsigned long irqs_sum;
 	unsigned int softirqs[NR_SOFTIRQS];
 };
 
@@ -54,6 +55,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 					    struct irq_desc *desc)
 {
 	kstat_this_cpu.irqs[irq]++;
+	kstat_this_cpu.irqs_sum++;
 }
 
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
@@ -65,8 +67,9 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
-#define kstat_incr_irqs_this_cpu(irqno, DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()]++)
+#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\
+	((DESC)->kstat_irqs[smp_processor_id()]++);\
+	kstat_this_cpu.irqs_sum++; } while (0)
 
 #endif
 
@@ -94,6 +97,13 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 	return sum;
 }
 
+/*
+ * Number of interrupts per cpu, since bootup
+ */
+static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
+{
+	return kstat_cpu(cpu).irqs_sum;
+}
 
 /*
  * Lock/unlock the current runqueue - to extract task statistics:
-- 
cgit v1.2.3


From 478735e38887077ac77a9756121b6ce0cb956e2f Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 27 Oct 2010 15:34:15 -0700
Subject: /proc/stat: fix scalability of irq sum of all cpu

In /proc/stat, the number of per-IRQ event is shown by making a sum each
irq's events on all cpus.  But we can make use of kstat_irqs().

kstat_irqs() do the same calculation, If !CONFIG_GENERIC_HARDIRQ,
it's not a big cost. (Both of the number of cpus and irqs are small.)

If a system is very big and CONFIG_GENERIC_HARDIRQ, it does

	for_each_irq()
		for_each_cpu()
			- look up a radix tree
			- read desc->irq_stat[cpu]
This seems not efficient. This patch adds kstat_irqs() for
CONFIG_GENRIC_HARDIRQ and change the calculation as

	for_each_irq()
		look up radix tree
		for_each_cpu()
			- read desc->irq_stat[cpu]

This reduces cost.

A test on (4096cpusp, 256 nodes, 4592 irqs) host (by Jack Steiner)

%time cat /proc/stat > /dev/null

Before Patch:	 2.459 sec
After Patch :	  .561 sec

[akpm@linux-foundation.org: unexport kstat_irqs, coding-style tweaks]
[akpm@linux-foundation.org: fix unused variable 'per_irq_sum']
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Jack Steiner <steiner@sgi.com>
Acked-by: Jack Steiner <steiner@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/stat.c              | 10 ++--------
 include/linux/kernel_stat.h |  4 ++++
 kernel/irq/irqdesc.c        | 15 +++++++++++++++
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index b80c620565bf..e15a19c93bae 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec boottime;
-	unsigned int per_irq_sum;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -108,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_nr(j) {
-		per_irq_sum = 0;
-		for_each_possible_cpu(i)
-			per_irq_sum += kstat_irqs_cpu(j, i);
-
-		seq_printf(p, " %u", per_irq_sum);
-	}
+	for_each_irq_nr(j)
+		seq_printf(p, " %u", kstat_irqs(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 8b9b89085530..ad54c846911b 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -86,6 +86,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
+#ifndef CONFIG_GENERIC_HARDIRQS
 static inline unsigned int kstat_irqs(unsigned int irq)
 {
 	unsigned int sum = 0;
@@ -96,6 +97,9 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 
 	return sum;
 }
+#else
+extern unsigned int kstat_irqs(unsigned int irq);
+#endif
 
 /*
  * Number of interrupts per cpu, since bootup
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9d917ff72675..9988d03797f5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 	struct irq_desc *desc = irq_to_desc(irq);
 	return desc ? desc->kstat_irqs[cpu] : 0;
 }
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+unsigned int kstat_irqs(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	int cpu;
+	int sum = 0;
+
+	if (!desc)
+		return 0;
+	for_each_possible_cpu(cpu)
+		sum += desc->kstat_irqs[cpu];
+	return sum;
+}
+#endif /* CONFIG_GENERIC_HARDIRQS */
-- 
cgit v1.2.3


From 9807224f1dce5fb746ee33fb67ea2e38dafe3e9c Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Wed, 27 Oct 2010 15:34:22 -0700
Subject: drivers/char/synclink_gt.c: add extended sync feature

Add support for extended byte synchronous mode feature of hardware.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/synclink_gt.c | 111 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/synclink.h   |   5 ++
 2 files changed, 113 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 9f7fc71474b4..d01fffeac951 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -301,6 +301,8 @@ struct slgt_info {
 	unsigned int rx_pio;
 	unsigned int if_mode;
 	unsigned int base_clock;
+	unsigned int xsync;
+	unsigned int xctrl;
 
 	/* device status */
 
@@ -405,6 +407,8 @@ static MGSL_PARAMS default_params = {
 #define TDCSR 0x94 /* tx DMA control/status */
 #define RDDAR 0x98 /* rx DMA descriptor address */
 #define TDDAR 0x9c /* tx DMA descriptor address */
+#define XSR   0x40 /* extended sync pattern */
+#define XCR   0x44 /* extended control */
 
 #define RXIDLE      BIT14
 #define RXBREAK     BIT14
@@ -517,6 +521,10 @@ static int  set_interface(struct slgt_info *info, int if_mode);
 static int  set_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
 static int  get_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
 static int  wait_gpio(struct slgt_info *info, struct gpio_desc __user *gpio);
+static int  get_xsync(struct slgt_info *info, int __user *if_mode);
+static int  set_xsync(struct slgt_info *info, int if_mode);
+static int  get_xctrl(struct slgt_info *info, int __user *if_mode);
+static int  set_xctrl(struct slgt_info *info, int if_mode);
 
 /*
  * driver functions
@@ -1056,6 +1064,14 @@ static int ioctl(struct tty_struct *tty, struct file *file,
 		return get_gpio(info, argp);
 	case MGSL_IOCWAITGPIO:
 		return wait_gpio(info, argp);
+	case MGSL_IOCGXSYNC:
+		return get_xsync(info, argp);
+	case MGSL_IOCSXSYNC:
+		return set_xsync(info, (int)arg);
+	case MGSL_IOCGXCTRL:
+		return get_xctrl(info, argp);
+	case MGSL_IOCSXCTRL:
+		return set_xctrl(info, (int)arg);
 	}
 	mutex_lock(&info->port.mutex);
 	switch (cmd) {
@@ -1213,12 +1229,16 @@ static long slgt_compat_ioctl(struct tty_struct *tty, struct file *file,
 	case MGSL_IOCSGPIO:
 	case MGSL_IOCGGPIO:
 	case MGSL_IOCWAITGPIO:
+	case MGSL_IOCGXSYNC:
+	case MGSL_IOCGXCTRL:
 	case MGSL_IOCSTXIDLE:
 	case MGSL_IOCTXENABLE:
 	case MGSL_IOCRXENABLE:
 	case MGSL_IOCTXABORT:
 	case TIOCMIWAIT:
 	case MGSL_IOCSIF:
+	case MGSL_IOCSXSYNC:
+	case MGSL_IOCSXCTRL:
 		rc = ioctl(tty, file, cmd, arg);
 		break;
 	}
@@ -1961,6 +1981,7 @@ static void bh_handler(struct work_struct *work)
 			case MGSL_MODE_RAW:
 			case MGSL_MODE_MONOSYNC:
 			case MGSL_MODE_BISYNC:
+			case MGSL_MODE_XSYNC:
 				while(rx_get_buf(info));
 				break;
 			}
@@ -2889,6 +2910,69 @@ static int set_interface(struct slgt_info *info, int if_mode)
 	return 0;
 }
 
+static int get_xsync(struct slgt_info *info, int __user *xsync)
+{
+	DBGINFO(("%s get_xsync=%x\n", info->device_name, info->xsync));
+	if (put_user(info->xsync, xsync))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * set extended sync pattern (1 to 4 bytes) for extended sync mode
+ *
+ * sync pattern is contained in least significant bytes of value
+ * most significant byte of sync pattern is oldest (1st sent/detected)
+ */
+static int set_xsync(struct slgt_info *info, int xsync)
+{
+	unsigned long flags;
+
+	DBGINFO(("%s set_xsync=%x)\n", info->device_name, xsync));
+	spin_lock_irqsave(&info->lock, flags);
+	info->xsync = xsync;
+	wr_reg32(info, XSR, xsync);
+	spin_unlock_irqrestore(&info->lock, flags);
+	return 0;
+}
+
+static int get_xctrl(struct slgt_info *info, int __user *xctrl)
+{
+	DBGINFO(("%s get_xctrl=%x\n", info->device_name, info->xctrl));
+	if (put_user(info->xctrl, xctrl))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * set extended control options
+ *
+ * xctrl[31:19] reserved, must be zero
+ * xctrl[18:17] extended sync pattern length in bytes
+ *              00 = 1 byte  in xsr[7:0]
+ *              01 = 2 bytes in xsr[15:0]
+ *              10 = 3 bytes in xsr[23:0]
+ *              11 = 4 bytes in xsr[31:0]
+ * xctrl[16]    1 = enable terminal count, 0=disabled
+ * xctrl[15:0]  receive terminal count for fixed length packets
+ *              value is count minus one (0 = 1 byte packet)
+ *              when terminal count is reached, receiver
+ *              automatically returns to hunt mode and receive
+ *              FIFO contents are flushed to DMA buffers with
+ *              end of frame (EOF) status
+ */
+static int set_xctrl(struct slgt_info *info, int xctrl)
+{
+	unsigned long flags;
+
+	DBGINFO(("%s set_xctrl=%x)\n", info->device_name, xctrl));
+	spin_lock_irqsave(&info->lock, flags);
+	info->xctrl = xctrl;
+	wr_reg32(info, XCR, xctrl);
+	spin_unlock_irqrestore(&info->lock, flags);
+	return 0;
+}
+
 /*
  * set general purpose IO pin state and direction
  *
@@ -3768,7 +3852,9 @@ module_exit(slgt_exit);
 #define CALC_REGADDR() \
 	unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \
 	if (addr >= 0x80) \
-		reg_addr += (info->port_num) * 32;
+		reg_addr += (info->port_num) * 32; \
+	else if (addr >= 0x40)	\
+		reg_addr += (info->port_num) * 16;
 
 static __u8 rd_reg8(struct slgt_info *info, unsigned int addr)
 {
@@ -4187,7 +4273,13 @@ static void sync_mode(struct slgt_info *info)
 
 	/* TCR (tx control)
 	 *
-	 * 15..13  mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync
+	 * 15..13  mode
+	 *         000=HDLC/SDLC
+	 *         001=raw bit synchronous
+	 *         010=asynchronous/isochronous
+	 *         011=monosync byte synchronous
+	 *         100=bisync byte synchronous
+	 *         101=xsync byte synchronous
 	 * 12..10  encoding
 	 * 09      CRC enable
 	 * 08      CRC32
@@ -4202,6 +4294,9 @@ static void sync_mode(struct slgt_info *info)
 	val = BIT2;
 
 	switch(info->params.mode) {
+	case MGSL_MODE_XSYNC:
+		val |= BIT15 + BIT13;
+		break;
 	case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break;
 	case MGSL_MODE_BISYNC:   val |= BIT15; break;
 	case MGSL_MODE_RAW:      val |= BIT13; break;
@@ -4256,7 +4351,13 @@ static void sync_mode(struct slgt_info *info)
 
 	/* RCR (rx control)
 	 *
-	 * 15..13  mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync
+	 * 15..13  mode
+	 *         000=HDLC/SDLC
+	 *         001=raw bit synchronous
+	 *         010=asynchronous/isochronous
+	 *         011=monosync byte synchronous
+	 *         100=bisync byte synchronous
+	 *         101=xsync byte synchronous
 	 * 12..10  encoding
 	 * 09      CRC enable
 	 * 08      CRC32
@@ -4268,6 +4369,9 @@ static void sync_mode(struct slgt_info *info)
 	val = 0;
 
 	switch(info->params.mode) {
+	case MGSL_MODE_XSYNC:
+		val |= BIT15 + BIT13;
+		break;
 	case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break;
 	case MGSL_MODE_BISYNC:   val |= BIT15; break;
 	case MGSL_MODE_RAW:      val |= BIT13; break;
@@ -4684,6 +4788,7 @@ static bool rx_get_buf(struct slgt_info *info)
 	switch(info->params.mode) {
 	case MGSL_MODE_MONOSYNC:
 	case MGSL_MODE_BISYNC:
+	case MGSL_MODE_XSYNC:
 		/* ignore residue in byte synchronous modes */
 		if (desc_residue(info->rbufs[i]))
 			count--;
diff --git a/include/linux/synclink.h b/include/linux/synclink.h
index 0ff2779c44d0..2e7d81c4e5ad 100644
--- a/include/linux/synclink.h
+++ b/include/linux/synclink.h
@@ -126,6 +126,7 @@
 #define MGSL_MODE_BISYNC	4
 #define MGSL_MODE_RAW		6
 #define MGSL_MODE_BASE_CLOCK    7
+#define MGSL_MODE_XSYNC         8
 
 #define MGSL_BUS_TYPE_ISA	1
 #define MGSL_BUS_TYPE_EISA	2
@@ -290,6 +291,10 @@ struct gpio_desc {
 #define MGSL_IOCSGPIO		_IOW(MGSL_MAGIC_IOC,16,struct gpio_desc)
 #define MGSL_IOCGGPIO		_IOR(MGSL_MAGIC_IOC,17,struct gpio_desc)
 #define MGSL_IOCWAITGPIO	_IOWR(MGSL_MAGIC_IOC,18,struct gpio_desc)
+#define MGSL_IOCSXSYNC		_IO(MGSL_MAGIC_IOC, 19)
+#define MGSL_IOCGXSYNC		_IO(MGSL_MAGIC_IOC, 20)
+#define MGSL_IOCSXCTRL		_IO(MGSL_MAGIC_IOC, 21)
+#define MGSL_IOCGXCTRL		_IO(MGSL_MAGIC_IOC, 22)
 
 #ifdef __KERNEL__
 /* provide 32 bit ioctl compatibility on 64 bit systems */
-- 
cgit v1.2.3


From 2c70f022e2e1b1493e157dbc3796b1f70a3ff162 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:26 -0700
Subject: rapidio: fix RapidIO sysfs hierarchy

This set of RapidIO patches extends support for standard error recovery
mechanism and adds new IDT Gen2 sRIO switch devices - CPS-1848 and
CPS-1616.  Implementation of the standard error-stopped state recovery
mechanism (as defined by the RapidIO specification) is required for the
new switches.

Version 2 of this set of patches addresses received comments and fixes an
error notification setup issue found in the idt_gen2.c after the first
version was released.

This patch:

Make RapidIO devices appear in /sys/devices/rapidio directory instead of
top of /sys/devices directory.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-driver.c | 2 +-
 drivers/rapidio/rio-scan.c   | 1 +
 include/linux/rio.h          | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 3222fa3c808c..0f4a53bdaa3c 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -192,7 +192,7 @@ static int rio_match_bus(struct device *dev, struct device_driver *drv)
       out:return 0;
 }
 
-static struct device rio_bus = {
+struct device rio_bus = {
 	.init_name = "rapidio",
 };
 
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 8070e074c739..1123be8f4b18 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -478,6 +478,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 	}
 
 	rdev->dev.bus = &rio_bus_type;
+	rdev->dev.parent = &rio_bus;
 
 	device_initialize(&rdev->dev);
 	rdev->dev.release = rio_release_dev;
diff --git a/include/linux/rio.h b/include/linux/rio.h
index bd6eb0ed34a7..84c9f8c5fb23 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -67,6 +67,7 @@
 #define RIO_PW_MSG_SIZE		64
 
 extern struct bus_type rio_bus_type;
+extern struct device rio_bus;
 extern struct list_head rio_devices;	/* list of all devices */
 
 struct rio_mport;
-- 
cgit v1.2.3


From ae05cbd5adef897d405ce8f90484c1239f79e086 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:29 -0700
Subject: rapidio: use stored ingress port number instead of register read

The switch port information is obtained and stored during RIO device
setup.  Therefore repeated reads from Switch Port Information CAR may be
removed.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 36 +++++++++---------------------------
 include/linux/rio.h        |  4 +++-
 include/linux/rio_regs.h   |  2 ++
 3 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 1123be8f4b18..d09c359844f3 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -420,6 +420,11 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 						hopcount, RIO_EFB_ERR_MGMNT);
 	}
 
+	if (rdev->pef & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) {
+		rio_mport_read_config_32(port, destid, hopcount,
+					 RIO_SWP_INFO_CAR, &rdev->swpinfo);
+	}
+
 	rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR,
 				 &rdev->src_ops);
 	rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR,
@@ -439,8 +444,6 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 
 	/* If a PE has both switch and other functions, show it as a switch */
 	if (rio_is_switch(rdev)) {
-		rio_mport_read_config_32(port, destid, hopcount,
-					 RIO_SWP_INFO_CAR, &rdev->swpinfo);
 		rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL);
 		if (!rswitch)
 			goto cleanup;
@@ -458,6 +461,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 				rdid++)
 			rswitch->route_table[rdid] = RIO_INVALID_ROUTE;
 		rdev->rswitch = rswitch;
+		rswitch->rdev = rdev;
 		dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id,
 			     rdev->rswitch->switchid);
 		rio_switch_init(rdev, do_enum);
@@ -718,25 +722,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount)
 	return (u16) (result & 0xffff);
 }
 
-/**
- * rio_get_swpinfo_inport- Gets the ingress port number
- * @mport: Master port to send transaction
- * @destid: Destination ID associated with the switch
- * @hopcount: Number of hops to the device
- *
- * Returns port number being used to access the switch device.
- */
-static u8
-rio_get_swpinfo_inport(struct rio_mport *mport, u16 destid, u8 hopcount)
-{
-	u32 result;
-
-	rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR,
-				 &result);
-
-	return (u8) (result & 0xff);
-}
-
 /**
  * rio_get_swpinfo_tports- Gets total number of ports on the switch
  * @mport: Master port to send transaction
@@ -834,8 +819,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 
 	if (rio_is_switch(rdev)) {
 		next_switchid++;
-		sw_inport = rio_get_swpinfo_inport(port,
-				RIO_ANY_DESTID(port->sys_size), hopcount);
+		sw_inport = RIO_GET_PORT_NUM(rdev->swpinfo);
 		rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE,
 				    port->host_deviceid, sw_inport, 0);
 		rdev->rswitch->route_table[port->host_deviceid] = sw_inport;
@@ -989,8 +973,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
 		    "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n",
 		    rio_name(rdev), rdev->vid, rdev->did, num_ports);
 		for (port_num = 0; port_num < num_ports; port_num++) {
-			if (rio_get_swpinfo_inport(port, destid, hopcount) ==
-			    port_num)
+			if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num)
 				continue;
 
 			if (rio_sport_is_active
@@ -1109,8 +1092,7 @@ static void rio_update_route_tables(struct rio_mport *port)
 				if (rswitch->destid == destid)
 					continue;
 
-				sport = rio_get_swpinfo_inport(port,
-						rswitch->destid, rswitch->hopcount);
+				sport = RIO_GET_PORT_NUM(rswitch->rdev->swpinfo);
 
 				if (rswitch->add_entry)	{
 					rio_route_add_entry(port, rswitch,
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 84c9f8c5fb23..ffdfe5ad43bf 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -112,7 +112,7 @@ struct rio_dev {
 	u16 asm_rev;
 	u16 efptr;
 	u32 pef;
-	u32 swpinfo;		/* Only used for switches */
+	u32 swpinfo;
 	u32 src_ops;
 	u32 dst_ops;
 	u32 comp_tag;
@@ -219,6 +219,7 @@ struct rio_net {
 /**
  * struct rio_switch - RIO switch info
  * @node: Node in global list of switches
+ * @rdev: Associated RIO device structure
  * @switchid: Switch ID that is unique across a network
  * @hopcount: Hopcount to this switch
  * @destid: Associated destid in the path
@@ -234,6 +235,7 @@ struct rio_net {
  */
 struct rio_switch {
 	struct list_head node;
+	struct rio_dev *rdev;
 	u16 switchid;
 	u16 hopcount;
 	u16 destid;
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index aedee0489fb4..be80b1b21815 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -33,6 +33,7 @@
 #define  RIO_PEF_MEMORY			0x40000000	/* [I] MMIO */
 #define  RIO_PEF_PROCESSOR		0x20000000	/* [I] Processor */
 #define  RIO_PEF_SWITCH			0x10000000	/* [I] Switch */
+#define  RIO_PEF_MULTIPORT		0x08000000	/* [VI, 2.1] Multiport */
 #define  RIO_PEF_INB_MBOX		0x00f00000	/* [II] Mailboxes */
 #define  RIO_PEF_INB_MBOX0		0x00800000	/* [II] Mailbox 0 */
 #define  RIO_PEF_INB_MBOX1		0x00400000	/* [II] Mailbox 1 */
@@ -51,6 +52,7 @@
 #define  RIO_SWP_INFO_PORT_TOTAL_MASK	0x0000ff00	/* [I] Total number of ports */
 #define  RIO_SWP_INFO_PORT_NUM_MASK	0x000000ff	/* [I] Maintenance transaction port number */
 #define  RIO_GET_TOTAL_PORTS(x)		((x & RIO_SWP_INFO_PORT_TOTAL_MASK) >> 8)
+#define  RIO_GET_PORT_NUM(x)		(x & RIO_SWP_INFO_PORT_NUM_MASK)
 
 #define RIO_SRC_OPS_CAR		0x18	/* [I] Source Operations CAR */
 #define  RIO_SRC_OPS_READ		0x00008000	/* [I] Read op */
-- 
cgit v1.2.3


From 68fe4df5d21294401959fa61d5a7094705ed8f6f Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:29 -0700
Subject: rapidio: add relation links between RIO device structures

Create back and forward links between RIO devices.  These links are
intended for use by error management and hot-plug extensions.  Links for
redundant RIO connections between switches are not set (will be fixed in a
separate patch).

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 56 +++++++++++++++++++---------------------------
 include/linux/rio.h        |  4 ++++
 2 files changed, 27 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index d09c359844f3..d2ea01872d6a 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -444,7 +444,10 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 
 	/* If a PE has both switch and other functions, show it as a switch */
 	if (rio_is_switch(rdev)) {
-		rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL);
+		rswitch = kzalloc(sizeof(*rswitch) +
+				  RIO_GET_TOTAL_PORTS(rdev->swpinfo) *
+				  sizeof(rswitch->nextdev[0]),
+				  GFP_KERNEL);
 		if (!rswitch)
 			goto cleanup;
 		rswitch->switchid = next_switchid;
@@ -722,25 +725,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount)
 	return (u16) (result & 0xffff);
 }
 
-/**
- * rio_get_swpinfo_tports- Gets total number of ports on the switch
- * @mport: Master port to send transaction
- * @destid: Destination ID associated with the switch
- * @hopcount: Number of hops to the device
- *
- * Returns total numbers of ports implemented by the switch device.
- */
-static u8 rio_get_swpinfo_tports(struct rio_mport *mport, u16 destid,
-				 u8 hopcount)
-{
-	u32 result;
-
-	rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR,
-				 &result);
-
-	return RIO_GET_TOTAL_PORTS(result);
-}
-
 /**
  * rio_net_add_mport- Add a master port to a RIO network
  * @net: RIO network
@@ -761,15 +745,16 @@ static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port)
  * @net: RIO network being enumerated
  * @port: Master port to send transactions
  * @hopcount: Number of hops into the network
+ * @prev: Previous RIO device connected to the enumerated one
+ * @prev_port: Port on previous RIO device
  *
  * Recursively enumerates a RIO network.  Transactions are sent via the
  * master port passed in @port.
  */
 static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
-			 u8 hopcount)
+			 u8 hopcount, struct rio_dev *prev, int prev_port)
 {
 	int port_num;
-	int num_ports;
 	int cur_destid;
 	int sw_destid;
 	int sw_inport;
@@ -814,6 +799,9 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 	if (rdev) {
 		/* Add device to the global and bus/net specific list. */
 		list_add_tail(&rdev->net_list, &net->devices);
+		rdev->prev = prev;
+		if (prev && rio_is_switch(prev))
+			prev->rswitch->nextdev[prev_port] = rdev;
 	} else
 		return -1;
 
@@ -832,14 +820,14 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 			rdev->rswitch->route_table[destid] = sw_inport;
 		}
 
-		num_ports =
-		    rio_get_swpinfo_tports(port, RIO_ANY_DESTID(port->sys_size),
-						hopcount);
 		pr_debug(
 		    "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n",
-		    rio_name(rdev), rdev->vid, rdev->did, num_ports);
+		    rio_name(rdev), rdev->vid, rdev->did,
+		    RIO_GET_TOTAL_PORTS(rdev->swpinfo));
 		sw_destid = next_destid;
-		for (port_num = 0; port_num < num_ports; port_num++) {
+		for (port_num = 0;
+		     port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo);
+		     port_num++) {
 			/*Enable Input Output Port (transmitter reviever)*/
 			rio_enable_rx_tx_port(port, 0,
 					      RIO_ANY_DESTID(port->sys_size),
@@ -864,7 +852,8 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 						RIO_ANY_DESTID(port->sys_size),
 						port_num, 0);
 
-				if (rio_enum_peer(net, port, hopcount + 1) < 0)
+				if (rio_enum_peer(net, port, hopcount + 1,
+						  rdev, port_num) < 0)
 					return -1;
 
 				/* Update routing tables */
@@ -951,7 +940,6 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
 	      u8 hopcount)
 {
 	u8 port_num, route_port;
-	int num_ports;
 	struct rio_dev *rdev;
 	u16 ndestid;
 
@@ -968,11 +956,13 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
 		/* Associated destid is how we accessed this switch */
 		rdev->rswitch->destid = destid;
 
-		num_ports = rio_get_swpinfo_tports(port, destid, hopcount);
 		pr_debug(
 		    "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n",
-		    rio_name(rdev), rdev->vid, rdev->did, num_ports);
-		for (port_num = 0; port_num < num_ports; port_num++) {
+		    rio_name(rdev), rdev->vid, rdev->did,
+		    RIO_GET_TOTAL_PORTS(rdev->swpinfo));
+		for (port_num = 0;
+		     port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo);
+		     port_num++) {
 			if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num)
 				continue;
 
@@ -1167,7 +1157,7 @@ int __devinit rio_enum_mport(struct rio_mport *mport)
 		/* Enable Input Output Port (transmitter reviever) */
 		rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
 
-		if (rio_enum_peer(net, mport, 0) < 0) {
+		if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) {
 			/* A higher priority host won enumeration, bail. */
 			printk(KERN_INFO
 			       "RIO: master port %d device has lost enumeration to a remote host\n",
diff --git a/include/linux/rio.h b/include/linux/rio.h
index ffdfe5ad43bf..8d9e66dc7969 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -99,6 +99,7 @@ union rio_pw_msg;
  * @riores: RIO resources this device owns
  * @pwcback: port-write callback function for this device
  * @destid: Network destination ID
+ * @prev: Previous RIO device connected to the current one
  */
 struct rio_dev {
 	struct list_head global_list;	/* node in list of all RIO devices */
@@ -125,6 +126,7 @@ struct rio_dev {
 	struct resource riores[RIO_MAX_DEV_RESOURCES];
 	int (*pwcback) (struct rio_dev *rdev, union rio_pw_msg *msg, int step);
 	u16 destid;
+	struct rio_dev *prev;
 };
 
 #define rio_dev_g(n) list_entry(n, struct rio_dev, global_list)
@@ -232,6 +234,7 @@ struct rio_net {
  * @get_domain: Callback for switch-specific domain get function
  * @em_init: Callback for switch-specific error management initialization function
  * @em_handle: Callback for switch-specific error management handler function
+ * @nextdev: Array of per-port pointers to the next attached device
  */
 struct rio_switch {
 	struct list_head node;
@@ -253,6 +256,7 @@ struct rio_switch {
 			   u8 *sw_domain);
 	int (*em_init) (struct rio_dev *dev);
 	int (*em_handle) (struct rio_dev *dev, u8 swport);
+	struct rio_dev *nextdev[0];
 };
 
 /* Low-level architecture-dependent routines */
-- 
cgit v1.2.3


From dd5648c9f53b5cbd9f948d752624400545f979fb Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:30 -0700
Subject: rapidio: add default handler for error-stopped state

The default error-stopped state handler provides recovery mechanism as
defined by RIO specification.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c             | 218 ++++++++++++++++++++++++++++++++------
 drivers/rapidio/switches/idtcps.c |  10 ++
 drivers/rapidio/switches/tsi57x.c |   4 +
 include/linux/rio_regs.h          |   8 +-
 4 files changed, 206 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 74e9d22d95fb..77bd4165238f 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -494,6 +494,148 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock)
 	return 0;
 }
 
+/**
+ * rio_get_input_status - Sends a Link-Request/Input-Status control symbol and
+ *                        returns link-response (if requested).
+ * @rdev: RIO devive to issue Input-status command
+ * @pnum: Device port number to issue the command
+ * @lnkresp: Response from a link partner
+ */
+static int
+rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp)
+{
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+	u32 regval;
+	int checkcount;
+
+	if (lnkresp) {
+		/* Read from link maintenance response register
+		 * to clear valid bit */
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum),
+			&regval);
+		udelay(50);
+	}
+
+	/* Issue Input-status command */
+	rio_mport_write_config_32(mport, destid, hopcount,
+		rdev->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(pnum),
+		RIO_MNT_REQ_CMD_IS);
+
+	/* Exit if the response is not expected */
+	if (lnkresp == NULL)
+		return 0;
+
+	checkcount = 3;
+	while (checkcount--) {
+		udelay(50);
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum),
+			&regval);
+		if (regval & RIO_PORT_N_MNT_RSP_RVAL) {
+			*lnkresp = regval;
+			return 0;
+		}
+	}
+
+	return -EIO;
+}
+
+/**
+ * rio_clr_err_stopped - Clears port Error-stopped states.
+ * @rdev: Pointer to RIO device control structure
+ * @pnum: Switch port number to clear errors
+ * @err_status: port error status (if 0 reads register from device)
+ */
+static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status)
+{
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+	struct rio_dev *nextdev = rdev->rswitch->nextdev[pnum];
+	u32 regval;
+	u32 far_ackid, far_linkstat, near_ackid;
+
+	if (err_status == 0)
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum),
+			&err_status);
+
+	if (err_status & RIO_PORT_N_ERR_STS_PW_OUT_ES) {
+		pr_debug("RIO_EM: servicing Output Error-Stopped state\n");
+		/*
+		 * Send a Link-Request/Input-Status control symbol
+		 */
+		if (rio_get_input_status(rdev, pnum, &regval)) {
+			pr_debug("RIO_EM: Input-status response timeout\n");
+			goto rd_err;
+		}
+
+		pr_debug("RIO_EM: SP%d Input-status response=0x%08x\n",
+			 pnum, regval);
+		far_ackid = (regval & RIO_PORT_N_MNT_RSP_ASTAT) >> 5;
+		far_linkstat = regval & RIO_PORT_N_MNT_RSP_LSTAT;
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum),
+			&regval);
+		pr_debug("RIO_EM: SP%d_ACK_STS_CSR=0x%08x\n", pnum, regval);
+		near_ackid = (regval & RIO_PORT_N_ACK_INBOUND) >> 24;
+		pr_debug("RIO_EM: SP%d far_ackID=0x%02x far_linkstat=0x%02x" \
+			 " near_ackID=0x%02x\n",
+			pnum, far_ackid, far_linkstat, near_ackid);
+
+		/*
+		 * If required, synchronize ackIDs of near and
+		 * far sides.
+		 */
+		if ((far_ackid != ((regval & RIO_PORT_N_ACK_OUTSTAND) >> 8)) ||
+		    (far_ackid != (regval & RIO_PORT_N_ACK_OUTBOUND))) {
+			/* Align near outstanding/outbound ackIDs with
+			 * far inbound.
+			 */
+			rio_mport_write_config_32(mport, destid,
+				hopcount, rdev->phys_efptr +
+					RIO_PORT_N_ACK_STS_CSR(pnum),
+				(near_ackid << 24) |
+					(far_ackid << 8) | far_ackid);
+			/* Align far outstanding/outbound ackIDs with
+			 * near inbound.
+			 */
+			far_ackid++;
+			if (nextdev)
+				rio_write_config_32(nextdev,
+					nextdev->phys_efptr +
+					RIO_PORT_N_ACK_STS_CSR(RIO_GET_PORT_NUM(nextdev->swpinfo)),
+					(far_ackid << 24) |
+					(near_ackid << 8) | near_ackid);
+			else
+				pr_debug("RIO_EM: Invalid nextdev pointer (NULL)\n");
+		}
+rd_err:
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum),
+			&err_status);
+		pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status);
+	}
+
+	if ((err_status & RIO_PORT_N_ERR_STS_PW_INP_ES) && nextdev) {
+		pr_debug("RIO_EM: servicing Input Error-Stopped state\n");
+		rio_get_input_status(nextdev,
+				     RIO_GET_PORT_NUM(nextdev->swpinfo), NULL);
+		udelay(50);
+
+		rio_mport_read_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum),
+			&err_status);
+		pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status);
+	}
+
+	return (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES |
+			      RIO_PORT_N_ERR_STS_PW_INP_ES)) ? 1 : 0;
+}
+
 /**
  * rio_inb_pwrite_handler - process inbound port-write message
  * @pw_msg: pointer to inbound port-write message
@@ -507,7 +649,7 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 	struct rio_mport *mport;
 	u8 hopcount;
 	u16 destid;
-	u32 err_status;
+	u32 err_status, em_perrdet, em_ltlerrdet;
 	int rc, portnum;
 
 	rdev = rio_get_comptag(pw_msg->em.comptag, NULL);
@@ -524,12 +666,11 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 	{
 	u32 i;
 	for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) {
-			pr_debug("0x%02x: %08x %08x %08x %08x",
+			pr_debug("0x%02x: %08x %08x %08x %08x\n",
 				 i*4, pw_msg->raw[i], pw_msg->raw[i + 1],
 				 pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
 			i += 4;
 	}
-	pr_debug("\n");
 	}
 #endif
 
@@ -573,29 +714,28 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 			&err_status);
 	pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status);
 
-	if (pw_msg->em.errdetect) {
-		pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n",
-			 portnum, pw_msg->em.errdetect);
-		/* Clear EM Port N Error Detect CSR */
-		rio_mport_write_config_32(mport, destid, hopcount,
-			rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0);
-	}
+	if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) {
 
-	if (pw_msg->em.ltlerrdet) {
-		pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n",
-			 pw_msg->em.ltlerrdet);
-		/* Clear EM L/T Layer Error Detect CSR */
-		rio_mport_write_config_32(mport, destid, hopcount,
-			rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0);
-	}
+		if (!(rdev->rswitch->port_ok & (1 << portnum))) {
+			rdev->rswitch->port_ok |= (1 << portnum);
+			rio_set_port_lockout(rdev, portnum, 0);
+			/* Schedule Insertion Service */
+			pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n",
+			       rio_name(rdev), portnum);
+		}
 
-	/* Clear Port Errors */
-	rio_mport_write_config_32(mport, destid, hopcount,
-			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
-			err_status & RIO_PORT_N_ERR_STS_CLR_MASK);
+		/* Clear error-stopped states (if reported).
+		 * Depending on the link partner state, two attempts
+		 * may be needed for successful recovery.
+		 */
+		if (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES |
+				  RIO_PORT_N_ERR_STS_PW_INP_ES)) {
+			if (rio_clr_err_stopped(rdev, portnum, err_status))
+				rio_clr_err_stopped(rdev, portnum, 0);
+		}
+	}  else { /* if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) */
 
-	if (rdev->rswitch->port_ok & (1 << portnum)) {
-		if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) {
+		if (rdev->rswitch->port_ok & (1 << portnum)) {
 			rdev->rswitch->port_ok &= ~(1 << portnum);
 			rio_set_port_lockout(rdev, portnum, 1);
 
@@ -608,17 +748,33 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 			pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n",
 			       rio_name(rdev), portnum);
 		}
-	} else {
-		if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) {
-			rdev->rswitch->port_ok |= (1 << portnum);
-			rio_set_port_lockout(rdev, portnum, 0);
+	}
 
-			/* Schedule Insertion Service */
-			pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n",
-			       rio_name(rdev), portnum);
-		}
+	rio_mport_read_config_32(mport, destid, hopcount,
+		rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet);
+	if (em_perrdet) {
+		pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n",
+			 portnum, em_perrdet);
+		/* Clear EM Port N Error Detect CSR */
+		rio_mport_write_config_32(mport, destid, hopcount,
+			rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0);
 	}
 
+	rio_mport_read_config_32(mport, destid, hopcount,
+		rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet);
+	if (em_ltlerrdet) {
+		pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n",
+			 em_ltlerrdet);
+		/* Clear EM L/T Layer Error Detect CSR */
+		rio_mport_write_config_32(mport, destid, hopcount,
+			rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0);
+	}
+
+	/* Clear remaining error bits */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
+			err_status & RIO_PORT_N_ERR_STS_CLR_MASK);
+
 	/* Clear Port-Write Pending bit */
 	rio_mport_write_config_32(mport, destid, hopcount,
 			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c
index 2c790c144f89..fc9f6374f759 100644
--- a/drivers/rapidio/switches/idtcps.c
+++ b/drivers/rapidio/switches/idtcps.c
@@ -117,6 +117,10 @@ idtcps_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
 
 static int idtcps_switch_init(struct rio_dev *rdev, int do_enum)
 {
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+
 	pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
 	rdev->rswitch->add_entry = idtcps_route_add_entry;
 	rdev->rswitch->get_entry = idtcps_route_get_entry;
@@ -126,6 +130,12 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum)
 	rdev->rswitch->em_init = NULL;
 	rdev->rswitch->em_handle = NULL;
 
+	if (do_enum) {
+		/* set TVAL = ~50us */
+		rio_mport_write_config_32(mport, destid, hopcount,
+			rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c
index d34df722d95f..d9e94920e8b0 100644
--- a/drivers/rapidio/switches/tsi57x.c
+++ b/drivers/rapidio/switches/tsi57x.c
@@ -205,6 +205,10 @@ tsi57x_em_init(struct rio_dev *rdev)
 			portnum++;
 	}
 
+	/* set TVAL = ~50us */
+	rio_mport_write_config_32(mport, destid, hopcount,
+		rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x9a << 8);
+
 	return 0;
 }
 
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index be80b1b21815..daa269d18e07 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -224,15 +224,17 @@
 #define  RIO_PORT_GEN_MASTER		0x40000000
 #define  RIO_PORT_GEN_DISCOVERED	0x20000000
 #define RIO_PORT_N_MNT_REQ_CSR(x)	(0x0040 + x*0x20)	/* 0x0002 */
+#define  RIO_MNT_REQ_CMD_RD		0x03	/* Reset-device command */
+#define  RIO_MNT_REQ_CMD_IS		0x04	/* Input-status command */
 #define RIO_PORT_N_MNT_RSP_CSR(x)	(0x0044 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_MNT_RSP_RVAL	0x80000000 /* Response Valid */
 #define  RIO_PORT_N_MNT_RSP_ASTAT	0x000003e0 /* ackID Status */
 #define  RIO_PORT_N_MNT_RSP_LSTAT	0x0000001f /* Link Status */
 #define RIO_PORT_N_ACK_STS_CSR(x)	(0x0048 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_ACK_CLEAR		0x80000000
-#define  RIO_PORT_N_ACK_INBOUND		0x1f000000
-#define  RIO_PORT_N_ACK_OUTSTAND	0x00001f00
-#define  RIO_PORT_N_ACK_OUTBOUND	0x0000001f
+#define  RIO_PORT_N_ACK_INBOUND		0x3f000000
+#define  RIO_PORT_N_ACK_OUTSTAND	0x00003f00
+#define  RIO_PORT_N_ACK_OUTBOUND	0x0000003f
 #define RIO_PORT_N_ERR_STS_CSR(x)	(0x0058 + x*0x20)
 #define  RIO_PORT_N_ERR_STS_PW_OUT_ES	0x00010000 /* Output Error-stopped */
 #define  RIO_PORT_N_ERR_STS_PW_INP_ES	0x00000100 /* Input Error-stopped */
-- 
cgit v1.2.3


From ac38d7232dfa3c71b129bab3318ba327bbcf8405 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:31 -0700
Subject: rapidio: modify sysfs initialization for switches

1. Change to create attribute "routes" only for switches.

2. Add a switch-specific callback to create/remove proprietary attributes.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-sysfs.c | 26 +++++++++++++++++++-------
 include/linux/rio.h         |  6 ++++++
 2 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 00b475658356..137ed93ee33f 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -40,9 +40,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch
 	char *str = buf;
 	int i;
 
-	if (!rdev->rswitch)
-		goto out;
-
 	for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size);
 			i++) {
 		if (rdev->rswitch->route_table[i] == RIO_INVALID_ROUTE)
@@ -52,7 +49,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch
 			    rdev->rswitch->route_table[i]);
 	}
 
-      out:
 	return (str - buf);
 }
 
@@ -63,10 +59,11 @@ struct device_attribute rio_dev_attrs[] = {
 	__ATTR_RO(asm_did),
 	__ATTR_RO(asm_vid),
 	__ATTR_RO(asm_rev),
-	__ATTR_RO(routes),
 	__ATTR_NULL,
 };
 
+static DEVICE_ATTR(routes, S_IRUGO, routes_show, NULL);
+
 static ssize_t
 rio_read_config(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *bin_attr,
@@ -218,7 +215,17 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev)
 {
 	int err = 0;
 
-	err = sysfs_create_bin_file(&rdev->dev.kobj, &rio_config_attr);
+	err = device_create_bin_file(&rdev->dev, &rio_config_attr);
+
+	if (!err && rdev->rswitch) {
+		err = device_create_file(&rdev->dev, &dev_attr_routes);
+		if (!err && rdev->rswitch->sw_sysfs)
+			err = rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_CREATE);
+	}
+
+	if (err)
+		pr_warning("RIO: Failed to create attribute file(s) for %s\n",
+			   rio_name(rdev));
 
 	return err;
 }
@@ -231,5 +238,10 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev)
  */
 void rio_remove_sysfs_dev_files(struct rio_dev *rdev)
 {
-	sysfs_remove_bin_file(&rdev->dev.kobj, &rio_config_attr);
+	device_remove_bin_file(&rdev->dev, &rio_config_attr);
+	if (rdev->rswitch) {
+		device_remove_file(&rdev->dev, &dev_attr_routes);
+		if (rdev->rswitch->sw_sysfs)
+			rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_REMOVE);
+	}
 }
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 8d9e66dc7969..4fa5e3d2b117 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -218,6 +218,10 @@ struct rio_net {
 	unsigned char id;	/* RIO network ID */
 };
 
+/* Definitions used by switch sysfs initialization callback */
+#define RIO_SW_SYSFS_CREATE	1	/* Create switch attributes */
+#define RIO_SW_SYSFS_REMOVE	0	/* Remove switch attributes */
+
 /**
  * struct rio_switch - RIO switch info
  * @node: Node in global list of switches
@@ -234,6 +238,7 @@ struct rio_net {
  * @get_domain: Callback for switch-specific domain get function
  * @em_init: Callback for switch-specific error management initialization function
  * @em_handle: Callback for switch-specific error management handler function
+ * @sw_sysfs: Callback that initializes switch-specific sysfs attributes
  * @nextdev: Array of per-port pointers to the next attached device
  */
 struct rio_switch {
@@ -256,6 +261,7 @@ struct rio_switch {
 			   u8 *sw_domain);
 	int (*em_init) (struct rio_dev *dev);
 	int (*em_handle) (struct rio_dev *dev, u8 swport);
+	int (*sw_sysfs) (struct rio_dev *dev, int create);
 	struct rio_dev *nextdev[0];
 };
 
-- 
cgit v1.2.3


From a3725c45c114bd06e091802f90533332d1e93819 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:33 -0700
Subject: rapidio: add support for IDT CPS Gen2 switches

Add the RIO switch driver and definitions for IDT CPS-1848 and CPS-1616
Gen2 devices.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/switches/Kconfig    |   7 +
 drivers/rapidio/switches/Makefile   |   1 +
 drivers/rapidio/switches/idt_gen2.c | 447 ++++++++++++++++++++++++++++++++++++
 include/linux/rio_ids.h             |   2 +
 include/linux/rio_regs.h            |   5 +
 5 files changed, 462 insertions(+)
 create mode 100644 drivers/rapidio/switches/idt_gen2.c

(limited to 'include')

diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig
index 2b4e9b2b6631..f47fee5d4563 100644
--- a/drivers/rapidio/switches/Kconfig
+++ b/drivers/rapidio/switches/Kconfig
@@ -20,6 +20,13 @@ config RAPIDIO_TSI568
 	---help---
 	  Includes support for IDT Tsi568 serial RapidIO switch.
 
+config RAPIDIO_CPS_GEN2
+	bool "IDT CPS Gen.2 SRIO switch support"
+	depends on RAPIDIO
+	default n
+	---help---
+	  Includes support for ITD CPS Gen.2 serial RapidIO switches.
+
 config RAPIDIO_TSI500
 	bool "Tsi500 Parallel RapidIO switch support"
 	depends on RAPIDIO
diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile
index fe4adc3e8d5f..48d67a6b98c8 100644
--- a/drivers/rapidio/switches/Makefile
+++ b/drivers/rapidio/switches/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_RAPIDIO_TSI57X)	+= tsi57x.o
 obj-$(CONFIG_RAPIDIO_CPS_XX)	+= idtcps.o
 obj-$(CONFIG_RAPIDIO_TSI568)	+= tsi568.o
 obj-$(CONFIG_RAPIDIO_TSI500)	+= tsi500.o
+obj-$(CONFIG_RAPIDIO_CPS_GEN2)	+= idt_gen2.o
 
 ifeq ($(CONFIG_RAPIDIO_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c
new file mode 100644
index 000000000000..0bb871cb5c40
--- /dev/null
+++ b/drivers/rapidio/switches/idt_gen2.c
@@ -0,0 +1,447 @@
+/*
+ * IDT CPS Gen.2 Serial RapidIO switch family support
+ *
+ * Copyright 2010 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_ids.h>
+#include <linux/delay.h>
+#include "../rio.h"
+
+#define LOCAL_RTE_CONF_DESTID_SEL	0x010070
+#define LOCAL_RTE_CONF_DESTID_SEL_PSEL	0x0000001f
+
+#define IDT_LT_ERR_REPORT_EN	0x03100c
+
+#define IDT_PORT_ERR_REPORT_EN(n)	(0x031044 + (n)*0x40)
+#define IDT_PORT_ERR_REPORT_EN_BC	0x03ff04
+
+#define IDT_PORT_ISERR_REPORT_EN(n)	(0x03104C + (n)*0x40)
+#define IDT_PORT_ISERR_REPORT_EN_BC	0x03ff0c
+#define IDT_PORT_INIT_TX_ACQUIRED	0x00000020
+
+#define IDT_LANE_ERR_REPORT_EN(n)	(0x038010 + (n)*0x100)
+#define IDT_LANE_ERR_REPORT_EN_BC	0x03ff10
+
+#define IDT_DEV_CTRL_1		0xf2000c
+#define IDT_DEV_CTRL_1_GENPW		0x02000000
+#define IDT_DEV_CTRL_1_PRSTBEH		0x00000001
+
+#define IDT_CFGBLK_ERR_CAPTURE_EN	0x020008
+#define IDT_CFGBLK_ERR_REPORT		0xf20014
+#define IDT_CFGBLK_ERR_REPORT_GENPW		0x00000002
+
+#define IDT_AUX_PORT_ERR_CAP_EN	0x020000
+#define IDT_AUX_ERR_REPORT_EN	0xf20018
+#define IDT_AUX_PORT_ERR_LOG_I2C	0x00000002
+#define IDT_AUX_PORT_ERR_LOG_JTAG	0x00000001
+
+#define	IDT_ISLTL_ADDRESS_CAP	0x021014
+
+#define IDT_RIO_DOMAIN		0xf20020
+#define IDT_RIO_DOMAIN_MASK		0x000000ff
+
+#define IDT_PW_INFO_CSR		0xf20024
+
+#define IDT_SOFT_RESET		0xf20040
+#define IDT_SOFT_RESET_REQ		0x00030097
+
+#define IDT_I2C_MCTRL		0xf20050
+#define IDT_I2C_MCTRL_GENPW		0x04000000
+
+#define IDT_JTAG_CTRL		0xf2005c
+#define IDT_JTAG_CTRL_GENPW		0x00000002
+
+#define IDT_LANE_CTRL(n)	(0xff8000 + (n)*0x100)
+#define IDT_LANE_CTRL_BC	0xffff00
+#define IDT_LANE_CTRL_GENPW		0x00200000
+#define IDT_LANE_DFE_1_BC	0xffff18
+#define IDT_LANE_DFE_2_BC	0xffff1c
+
+#define IDT_PORT_OPS(n)		(0xf40004 + (n)*0x100)
+#define IDT_PORT_OPS_GENPW		0x08000000
+#define IDT_PORT_OPS_PL_ELOG		0x00000040
+#define IDT_PORT_OPS_LL_ELOG		0x00000020
+#define IDT_PORT_OPS_LT_ELOG		0x00000010
+#define IDT_PORT_OPS_BC		0xf4ff04
+
+#define IDT_PORT_ISERR_DET(n)	(0xf40008 + (n)*0x100)
+
+#define IDT_ERR_CAP		0xfd0000
+#define IDT_ERR_CAP_LOG_OVERWR		0x00000004
+
+#define IDT_ERR_RD		0xfd0004
+
+#define IDT_DEFAULT_ROUTE	0xde
+#define IDT_NO_ROUTE		0xdf
+
+static int
+idtg2_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+		       u16 table, u16 route_destid, u8 route_port)
+{
+	/*
+	 * Select routing table to update
+	 */
+	if (table == RIO_GLOBAL_TABLE)
+		table = 0;
+	else
+		table++;
+
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  LOCAL_RTE_CONF_DESTID_SEL, table);
+
+	/*
+	 * Program destination port for the specified destID
+	 */
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  RIO_STD_RTE_CONF_DESTID_SEL_CSR,
+				  (u32)route_destid);
+
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  RIO_STD_RTE_CONF_PORT_SEL_CSR,
+				  (u32)route_port);
+	udelay(10);
+
+	return 0;
+}
+
+static int
+idtg2_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+		       u16 table, u16 route_destid, u8 *route_port)
+{
+	u32 result;
+
+	/*
+	 * Select routing table to read
+	 */
+	if (table == RIO_GLOBAL_TABLE)
+		table = 0;
+	else
+		table++;
+
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  LOCAL_RTE_CONF_DESTID_SEL, table);
+
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  RIO_STD_RTE_CONF_DESTID_SEL_CSR,
+				  route_destid);
+
+	rio_mport_read_config_32(mport, destid, hopcount,
+				 RIO_STD_RTE_CONF_PORT_SEL_CSR, &result);
+
+	if (IDT_DEFAULT_ROUTE == (u8)result || IDT_NO_ROUTE == (u8)result)
+		*route_port = RIO_INVALID_ROUTE;
+	else
+		*route_port = (u8)result;
+
+	return 0;
+}
+
+static int
+idtg2_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
+		       u16 table)
+{
+	u32 i;
+
+	/*
+	 * Select routing table to read
+	 */
+	if (table == RIO_GLOBAL_TABLE)
+		table = 0;
+	else
+		table++;
+
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  LOCAL_RTE_CONF_DESTID_SEL, table);
+
+	for (i = RIO_STD_RTE_CONF_EXTCFGEN;
+	     i <= (RIO_STD_RTE_CONF_EXTCFGEN | 0xff);) {
+		rio_mport_write_config_32(mport, destid, hopcount,
+			RIO_STD_RTE_CONF_DESTID_SEL_CSR, i);
+		rio_mport_write_config_32(mport, destid, hopcount,
+			RIO_STD_RTE_CONF_PORT_SEL_CSR,
+			(IDT_DEFAULT_ROUTE << 24) | (IDT_DEFAULT_ROUTE << 16) |
+			(IDT_DEFAULT_ROUTE << 8) | IDT_DEFAULT_ROUTE);
+		i += 4;
+	}
+
+	return 0;
+}
+
+
+static int
+idtg2_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+		       u8 sw_domain)
+{
+	/*
+	 * Switch domain configuration operates only at global level
+	 */
+	rio_mport_write_config_32(mport, destid, hopcount,
+				  IDT_RIO_DOMAIN, (u32)sw_domain);
+	return 0;
+}
+
+static int
+idtg2_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+		       u8 *sw_domain)
+{
+	u32 regval;
+
+	/*
+	 * Switch domain configuration operates only at global level
+	 */
+	rio_mport_read_config_32(mport, destid, hopcount,
+				IDT_RIO_DOMAIN, &regval);
+
+	*sw_domain = (u8)(regval & 0xff);
+
+	return 0;
+}
+
+static int
+idtg2_em_init(struct rio_dev *rdev)
+{
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+	u32 regval;
+	int i, tmp;
+
+	/*
+	 * This routine performs device-specific initialization only.
+	 * All standard EM configuration should be performed at upper level.
+	 */
+
+	pr_debug("RIO: %s [%d:%d]\n", __func__, destid, hopcount);
+
+	/* Set Port-Write info CSR: PRIO=3 and CRF=1 */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_PW_INFO_CSR, 0x0000e000);
+
+	/*
+	 * Configure LT LAYER error reporting.
+	 */
+
+	/* Enable standard (RIO.p8) error reporting */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_LT_ERR_REPORT_EN,
+			REM_LTL_ERR_ILLTRAN | REM_LTL_ERR_UNSOLR |
+			REM_LTL_ERR_UNSUPTR);
+
+	/* Use Port-Writes for LT layer error reporting.
+	 * Enable per-port reset
+	 */
+	rio_mport_read_config_32(mport, destid, hopcount,
+			IDT_DEV_CTRL_1, &regval);
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_DEV_CTRL_1,
+			regval | IDT_DEV_CTRL_1_GENPW | IDT_DEV_CTRL_1_PRSTBEH);
+
+	/*
+	 * Configure PORT error reporting.
+	 */
+
+	/* Report all RIO.p8 errors supported by device */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_PORT_ERR_REPORT_EN_BC, 0x807e8037);
+
+	/* Configure reporting of implementation specific errors/events */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_PORT_ISERR_REPORT_EN_BC, IDT_PORT_INIT_TX_ACQUIRED);
+
+	/* Use Port-Writes for port error reporting and enable error logging */
+	tmp = RIO_GET_TOTAL_PORTS(rdev->swpinfo);
+	for (i = 0; i < tmp; i++) {
+		rio_mport_read_config_32(mport, destid, hopcount,
+				IDT_PORT_OPS(i), &regval);
+		rio_mport_write_config_32(mport, destid, hopcount,
+				IDT_PORT_OPS(i), regval | IDT_PORT_OPS_GENPW |
+				IDT_PORT_OPS_PL_ELOG |
+				IDT_PORT_OPS_LL_ELOG |
+				IDT_PORT_OPS_LT_ELOG);
+	}
+	/* Overwrite error log if full */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_ERR_CAP, IDT_ERR_CAP_LOG_OVERWR);
+
+	/*
+	 * Configure LANE error reporting.
+	 */
+
+	/* Disable line error reporting */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_LANE_ERR_REPORT_EN_BC, 0);
+
+	/* Use Port-Writes for lane error reporting (when enabled)
+	 * (do per-lane update because lanes may have different configuration)
+	 */
+	tmp = (rdev->did == RIO_DID_IDTCPS1848) ? 48 : 16;
+	for (i = 0; i < tmp; i++) {
+		rio_mport_read_config_32(mport, destid, hopcount,
+				IDT_LANE_CTRL(i), &regval);
+		rio_mport_write_config_32(mport, destid, hopcount,
+				IDT_LANE_CTRL(i), regval | IDT_LANE_CTRL_GENPW);
+	}
+
+	/*
+	 * Configure AUX error reporting.
+	 */
+
+	/* Disable JTAG and I2C Error capture */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_AUX_PORT_ERR_CAP_EN, 0);
+
+	/* Disable JTAG and I2C Error reporting/logging */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_AUX_ERR_REPORT_EN, 0);
+
+	/* Disable Port-Write notification from JTAG */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_JTAG_CTRL, 0);
+
+	/* Disable Port-Write notification from I2C */
+	rio_mport_read_config_32(mport, destid, hopcount,
+			IDT_I2C_MCTRL, &regval);
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_I2C_MCTRL,
+			regval & ~IDT_I2C_MCTRL_GENPW);
+
+	/*
+	 * Configure CFG_BLK error reporting.
+	 */
+
+	/* Disable Configuration Block error capture */
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_CFGBLK_ERR_CAPTURE_EN, 0);
+
+	/* Disable Port-Writes for Configuration Block error reporting */
+	rio_mport_read_config_32(mport, destid, hopcount,
+			IDT_CFGBLK_ERR_REPORT, &regval);
+	rio_mport_write_config_32(mport, destid, hopcount,
+			IDT_CFGBLK_ERR_REPORT,
+			regval & ~IDT_CFGBLK_ERR_REPORT_GENPW);
+
+	/* set TVAL = ~50us */
+	rio_mport_write_config_32(mport, destid, hopcount,
+		rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8);
+
+	return 0;
+}
+
+static int
+idtg2_em_handler(struct rio_dev *rdev, u8 portnum)
+{
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+	u32 regval, em_perrdet, em_ltlerrdet;
+
+	rio_mport_read_config_32(mport, destid, hopcount,
+		rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet);
+	if (em_ltlerrdet) {
+		/* Service Logical/Transport Layer Error(s) */
+		if (em_ltlerrdet & REM_LTL_ERR_IMPSPEC) {
+			/* Implementation specific error reported */
+			rio_mport_read_config_32(mport, destid, hopcount,
+					IDT_ISLTL_ADDRESS_CAP, &regval);
+
+			pr_debug("RIO: %s Implementation Specific LTL errors" \
+				 " 0x%x @(0x%x)\n",
+				 rio_name(rdev), em_ltlerrdet, regval);
+
+			/* Clear implementation specific address capture CSR */
+			rio_mport_write_config_32(mport, destid, hopcount,
+					IDT_ISLTL_ADDRESS_CAP, 0);
+
+		}
+	}
+
+	rio_mport_read_config_32(mport, destid, hopcount,
+		rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet);
+	if (em_perrdet) {
+		/* Service Port-Level Error(s) */
+		if (em_perrdet & REM_PED_IMPL_SPEC) {
+			/* Implementation Specific port error reported */
+
+			/* Get IS errors reported */
+			rio_mport_read_config_32(mport, destid, hopcount,
+					IDT_PORT_ISERR_DET(portnum), &regval);
+
+			pr_debug("RIO: %s Implementation Specific Port" \
+				 " errors 0x%x\n", rio_name(rdev), regval);
+
+			/* Clear all implementation specific events */
+			rio_mport_write_config_32(mport, destid, hopcount,
+					IDT_PORT_ISERR_DET(portnum), 0);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t
+idtg2_show_errlog(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+	struct rio_mport *mport = rdev->net->hport;
+	u16 destid = rdev->rswitch->destid;
+	u8 hopcount = rdev->rswitch->hopcount;
+	ssize_t len = 0;
+	u32 regval;
+
+	while (!rio_mport_read_config_32(mport, destid, hopcount,
+					 IDT_ERR_RD, &regval)) {
+		if (!regval)    /* 0 = end of log */
+			break;
+		len += snprintf(buf + len, PAGE_SIZE - len,
+					"%08x\n", regval);
+		if (len >= (PAGE_SIZE - 10))
+			break;
+	}
+
+	return len;
+}
+
+static DEVICE_ATTR(errlog, S_IRUGO, idtg2_show_errlog, NULL);
+
+static int idtg2_sysfs(struct rio_dev *rdev, int create)
+{
+	struct device *dev = &rdev->dev;
+	int err = 0;
+
+	if (create == RIO_SW_SYSFS_CREATE) {
+		/* Initialize sysfs entries */
+		err = device_create_file(dev, &dev_attr_errlog);
+		if (err)
+			dev_err(dev, "Unable create sysfs errlog file\n");
+	} else
+		device_remove_file(dev, &dev_attr_errlog);
+
+	return err;
+}
+
+static int idtg2_switch_init(struct rio_dev *rdev, int do_enum)
+{
+	pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
+	rdev->rswitch->add_entry = idtg2_route_add_entry;
+	rdev->rswitch->get_entry = idtg2_route_get_entry;
+	rdev->rswitch->clr_table = idtg2_route_clr_table;
+	rdev->rswitch->set_domain = idtg2_set_domain;
+	rdev->rswitch->get_domain = idtg2_get_domain;
+	rdev->rswitch->em_init = idtg2_em_init;
+	rdev->rswitch->em_handle = idtg2_em_handler;
+	rdev->rswitch->sw_sysfs = idtg2_sysfs;
+
+	return 0;
+}
+
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init);
diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h
index db50e1c288b7..ee7b6ada188f 100644
--- a/include/linux/rio_ids.h
+++ b/include/linux/rio_ids.h
@@ -34,5 +34,7 @@
 #define RIO_DID_IDTCPS16		0x035b
 #define RIO_DID_IDTCPS6Q		0x035f
 #define RIO_DID_IDTCPS10Q		0x035e
+#define RIO_DID_IDTCPS1848		0x0374
+#define RIO_DID_IDTCPS1616		0x0379
 
 #endif				/* LINUX_RIO_IDS_H */
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index daa269d18e07..a18b2e22aa1d 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -161,6 +161,7 @@
 #define RIO_COMPONENT_TAG_CSR	0x6c	/* [III] Component Tag CSR */
 
 #define RIO_STD_RTE_CONF_DESTID_SEL_CSR	0x70
+#define  RIO_STD_RTE_CONF_EXTCFGEN		0x80000000
 #define RIO_STD_RTE_CONF_PORT_SEL_CSR	0x74
 #define RIO_STD_RTE_DEFAULT_PORT	0x78
 
@@ -265,6 +266,10 @@
 #define RIO_EM_EFB_HEADER	0x000	/* Error Management Extensions Block Header */
 #define RIO_EM_LTL_ERR_DETECT	0x008	/* Logical/Transport Layer Error Detect CSR */
 #define RIO_EM_LTL_ERR_EN	0x00c	/* Logical/Transport Layer Error Enable CSR */
+#define  REM_LTL_ERR_ILLTRAN		0x08000000 /* Illegal Transaction decode */
+#define  REM_LTL_ERR_UNSOLR		0x00800000 /* Unsolicited Response */
+#define  REM_LTL_ERR_UNSUPTR		0x00400000 /* Unsupported Transaction */
+#define  REM_LTL_ERR_IMPSPEC		0x000000ff /* Implementation Specific */
 #define RIO_EM_LTL_HIADDR_CAP	0x010	/* Logical/Transport Layer High Address Capture CSR */
 #define RIO_EM_LTL_ADDR_CAP	0x014	/* Logical/Transport Layer Address Capture CSR */
 #define RIO_EM_LTL_DEVID_CAP	0x018	/* Logical/Transport Layer Device ID Capture CSR */
-- 
cgit v1.2.3


From af84ca38aff94061dd0711edbb99b0900a9c28fd Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:34 -0700
Subject: rapidio: add handling of redundant routes

Detects RIO link to the already enumerated device and properly sets links
between device objects.  Changes to the enumeration/discovery logic:

1. Use Master Enable bit to signal end of the enumeration - agents may
   start their discovery process as soon as they see this bit set
   (Component Tag register was used before for this purpose).

2. Enumerator sets Component Tag (!= 0) immediately during device
   setup.  This allows to identify the device if the redundant route
   exists in a RIO system.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/fsl_rio.c |  8 ++++
 drivers/rapidio/rio-scan.c    | 88 ++++++++++++++++++++++---------------------
 drivers/rapidio/rio.c         | 16 ++++----
 drivers/rapidio/rio.h         |  1 +
 include/linux/rio.h           |  2 +
 5 files changed, 64 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index ed2ec7154917..9725369d432a 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -50,6 +50,7 @@
 #define RIO_ATMU_REGS_OFFSET	0x10c00
 #define RIO_P_MSG_REGS_OFFSET	0x11000
 #define RIO_S_MSG_REGS_OFFSET	0x13000
+#define RIO_GCCSR		0x13c
 #define RIO_ESCSR		0x158
 #define RIO_CCSR		0x15c
 #define RIO_LTLEDCSR		0x0608
@@ -1471,6 +1472,7 @@ int fsl_rio_setup(struct platform_device *dev)
 	port->host_deviceid = fsl_rio_get_hdid(port->id);
 
 	port->priv = priv;
+	port->phys_efptr = 0x100;
 	rio_register_mport(port);
 
 	priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1);
@@ -1518,6 +1520,12 @@ int fsl_rio_setup(struct platform_device *dev)
 	dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
 			port->sys_size ? 65536 : 256);
 
+	if (port->host_deviceid >= 0)
+		out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
+			RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
+	else
+		out_be32(priv->regs_win + RIO_GCCSR, 0x00000000);
+
 	priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win
 					+ RIO_ATMU_REGS_OFFSET);
 	priv->maint_atmu_regs = priv->atmu_regs + 1;
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index e3efdf93df5a..1eb82c4c712e 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -48,7 +48,7 @@ DEFINE_SPINLOCK(rio_global_list_lock);
 static int next_destid = 0;
 static int next_switchid = 0;
 static int next_net = 0;
-static int next_comptag;
+static int next_comptag = 1;
 
 static struct timer_list rio_enum_timer =
 TIMER_INITIALIZER(rio_enum_timeout, 0, 0);
@@ -121,27 +121,6 @@ static int rio_clear_locks(struct rio_mport *port)
 	u32 result;
 	int ret = 0;
 
-	/* Assign component tag to all devices */
-	next_comptag = 1;
-	rio_local_write_config_32(port, RIO_COMPONENT_TAG_CSR, next_comptag++);
-
-	list_for_each_entry(rdev, &rio_devices, global_list) {
-		/* Mark device as discovered */
-		rio_read_config_32(rdev,
-				   rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
-				   &result);
-		rio_write_config_32(rdev,
-				    rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
-				    result | RIO_PORT_GEN_DISCOVERED);
-
-		rio_write_config_32(rdev, RIO_COMPONENT_TAG_CSR, next_comptag);
-		rdev->comp_tag = next_comptag++;
-		if (next_comptag >= 0x10000) {
-			pr_err("RIO: Component Tag Counter Overflow\n");
-			break;
-		}
-	}
-
 	/* Release host device id locks */
 	rio_local_write_config_32(port, RIO_HOST_DID_LOCK_CSR,
 				  port->host_deviceid);
@@ -162,6 +141,15 @@ static int rio_clear_locks(struct rio_mport *port)
 			       rdev->vid, rdev->did);
 			ret = -EINVAL;
 		}
+
+		/* Mark device as discovered and enable master */
+		rio_read_config_32(rdev,
+				   rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
+				   &result);
+		result |= RIO_PORT_GEN_DISCOVERED | RIO_PORT_GEN_MASTER;
+		rio_write_config_32(rdev,
+				    rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
+				    result);
 	}
 
 	return ret;
@@ -430,6 +418,17 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 	rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR,
 				 &rdev->dst_ops);
 
+	if (do_enum) {
+		/* Assign component tag to device */
+		if (next_comptag >= 0x10000) {
+			pr_err("RIO: Component Tag Counter Overflow\n");
+			goto cleanup;
+		}
+		rio_mport_write_config_32(port, destid, hopcount,
+					  RIO_COMPONENT_TAG_CSR, next_comptag);
+		rdev->comp_tag = next_comptag++;
+	}
+
 	if (rio_device_has_destid(port, rdev->src_ops, rdev->dst_ops)) {
 		if (do_enum) {
 			rio_set_device_id(port, destid, hopcount, next_destid);
@@ -725,21 +724,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount)
 	return (u16) (result & 0xffff);
 }
 
-/**
- * rio_net_add_mport- Add a master port to a RIO network
- * @net: RIO network
- * @port: Master port to add
- *
- * Adds a master port to the network list of associated master
- * ports..
- */
-static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port)
-{
-	spin_lock(&rio_global_list_lock);
-	list_add_tail(&port->nnode, &net->mports);
-	spin_unlock(&rio_global_list_lock);
-}
-
 /**
  * rio_enum_peer- Recursively enumerate a RIO network through a master port
  * @net: RIO network being enumerated
@@ -760,6 +744,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 	int sw_inport;
 	struct rio_dev *rdev;
 	u16 destid;
+	u32 regval;
 	int tmp;
 
 	if (rio_mport_chk_dev_access(port,
@@ -772,9 +757,21 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 		pr_debug("RIO: PE already discovered by this host\n");
 		/*
 		 * Already discovered by this host. Add it as another
-		 * master port for the current network.
+		 * link to the existing device.
 		 */
-		rio_net_add_mport(net, port);
+		rio_mport_read_config_32(port, RIO_ANY_DESTID(port->sys_size),
+				hopcount, RIO_COMPONENT_TAG_CSR, &regval);
+
+		if (regval) {
+			rdev = rio_get_comptag((regval & 0xffff), NULL);
+
+			if (rdev && prev && rio_is_switch(prev)) {
+				pr_debug("RIO: redundant path to %s\n",
+					 rio_name(rdev));
+				prev->rswitch->nextdev[prev_port] = rdev;
+			}
+		}
+
 		return 0;
 	}
 
@@ -925,10 +922,11 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
  */
 static int rio_enum_complete(struct rio_mport *port)
 {
-	u32 tag_csr;
+	u32 regval;
 
-	rio_local_read_config_32(port, RIO_COMPONENT_TAG_CSR, &tag_csr);
-	return (tag_csr & 0xffff) ? 1 : 0;
+	rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR,
+				 &regval);
+	return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0;
 }
 
 /**
@@ -991,6 +989,8 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
 						break;
 				}
 
+				if (ndestid == RIO_ANY_DESTID(port->sys_size))
+					continue;
 				rio_unlock_device(port, destid, hopcount);
 				if (rio_disc_peer
 				    (net, port, ndestid, hopcount + 1) < 0)
@@ -1163,6 +1163,10 @@ int __devinit rio_enum_mport(struct rio_mport *mport)
 		/* Enable Input Output Port (transmitter reviever) */
 		rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
 
+		/* Set component tag for host */
+		rio_local_write_config_32(mport, RIO_COMPONENT_TAG_CSR,
+					  next_comptag++);
+
 		if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) {
 			/* A higher priority host won enumeration, bail. */
 			printk(KERN_INFO
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index fa5e3cbe4c83..7f18a65c4ed0 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -443,7 +443,7 @@ rio_mport_get_physefb(struct rio_mport *port, int local,
  * @from is not %NULL, searches continue from next device on the global
  * list.
  */
-static struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from)
+struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from)
 {
 	struct list_head *n;
 	struct rio_dev *rdev;
@@ -507,7 +507,7 @@ static int
 rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum)
 {
 	u32 result;
-	int p_port, rc = -EIO;
+	int p_port, dstid, rc = -EIO;
 	struct rio_dev *prev = NULL;
 
 	/* Find switch with failed RIO link */
@@ -522,20 +522,18 @@ rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum)
 	if (prev == NULL)
 		goto err_out;
 
-	/* Find port with failed RIO link */
-	for (p_port = 0;
-	     p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo); p_port++)
-		if (prev->rswitch->nextdev[p_port] == rdev)
-			break;
+	dstid = (rdev->pef & RIO_PEF_SWITCH) ?
+			rdev->rswitch->destid : rdev->destid;
+	p_port = prev->rswitch->route_table[dstid];
 
-	if (p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo)) {
+	if (p_port != RIO_INVALID_ROUTE) {
 		pr_debug("RIO: link failed on [%s]-P%d\n",
 			 rio_name(prev), p_port);
 		*nrdev = prev;
 		*npnum = p_port;
 		rc = 0;
 	} else
-		pr_debug("RIO: failed to trace route to %s\n", rio_name(prev));
+		pr_debug("RIO: failed to trace route to %s\n", rio_name(rdev));
 err_out:
 	return rc;
 }
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index d249a1205c7d..b1af414f15e6 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -38,6 +38,7 @@ extern int rio_std_route_get_entry(struct rio_mport *mport, u16 destid,
 extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid,
 				   u8 hopcount, u16 table);
 extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
+extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from);
 
 /* Structures internal to the RIO core code */
 extern struct device_attribute rio_dev_attrs[];
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4fa5e3d2b117..0bed941f9b13 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -177,6 +177,7 @@ enum rio_phy_type {
  * @index: Port index, unique among all port interfaces of the same type
  * @sys_size: RapidIO common transport system size
  * @phy_type: RapidIO phy type
+ * @phys_efptr: RIO port extended features pointer
  * @name: Port name string
  * @priv: Master port private data
  */
@@ -198,6 +199,7 @@ struct rio_mport {
 				 * 1 - Large size, 65536 devices.
 				 */
 	enum rio_phy_type phy_type;	/* RapidIO phy type */
+	u32 phys_efptr;
 	unsigned char name[40];
 	void *priv;		/* Master port private data */
 };
-- 
cgit v1.2.3


From 388c45ccfaeec68e334ad79edeb0b5b0a43197ff Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Wed, 27 Oct 2010 15:34:35 -0700
Subject: rapidio: fix IDLE2 bits corruption

RapidIO spec v.2.1 adds Idle Sequence 2 into LP-Serial Physical Layer.
The fix ensures that corresponding bits are not corrupted during error
handling.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c    | 9 ++-------
 include/linux/rio_regs.h | 3 +--
 2 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 7f18a65c4ed0..68cf0c99138a 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -871,15 +871,10 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 			rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0);
 	}
 
-	/* Clear remaining error bits */
+	/* Clear remaining error bits and Port-Write Pending bit */
 	rio_mport_write_config_32(mport, destid, hopcount,
 			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
-			err_status & RIO_PORT_N_ERR_STS_CLR_MASK);
-
-	/* Clear Port-Write Pending bit */
-	rio_mport_write_config_32(mport, destid, hopcount,
-			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
-			RIO_PORT_N_ERR_STS_PW_PEND);
+			err_status);
 
 	return 0;
 }
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index a18b2e22aa1d..d63dcbaea169 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -229,7 +229,7 @@
 #define  RIO_MNT_REQ_CMD_IS		0x04	/* Input-status command */
 #define RIO_PORT_N_MNT_RSP_CSR(x)	(0x0044 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_MNT_RSP_RVAL	0x80000000 /* Response Valid */
-#define  RIO_PORT_N_MNT_RSP_ASTAT	0x000003e0 /* ackID Status */
+#define  RIO_PORT_N_MNT_RSP_ASTAT	0x000007e0 /* ackID Status */
 #define  RIO_PORT_N_MNT_RSP_LSTAT	0x0000001f /* Link Status */
 #define RIO_PORT_N_ACK_STS_CSR(x)	(0x0048 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_ACK_CLEAR		0x80000000
@@ -243,7 +243,6 @@
 #define  RIO_PORT_N_ERR_STS_PORT_ERR	0x00000004
 #define  RIO_PORT_N_ERR_STS_PORT_OK	0x00000002
 #define  RIO_PORT_N_ERR_STS_PORT_UNINIT	0x00000001
-#define  RIO_PORT_N_ERR_STS_CLR_MASK	0x07120204
 #define RIO_PORT_N_CTL_CSR(x)		(0x005c + x*0x20)
 #define  RIO_PORT_N_CTL_PWIDTH		0xc0000000
 #define  RIO_PORT_N_CTL_PWIDTH_1	0x00000000
-- 
cgit v1.2.3


From d57af9b2142f31a39dcfdeb30776baadfc802827 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Wed, 27 Oct 2010 15:34:45 -0700
Subject: taskstats: use real microsecond granularity for CPU times

The taskstats interface uses microsecond granularity for the user and
system time values.  The conversion from cputime to the taskstats values
uses the cputime_to_msecs primitive which effectively limits the
granularity to milliseconds.  Add the cputime_to_usecs primitive for
architectures that have better, more precise CPU time values.  Remove
cputime_to_msecs primitive because there are no more users left.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Luck Tony <tony.luck@intel.com>
Cc: Shailabh Nagar <nagar1234@in.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Shailabh Nagar <nagar@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/cputime.h    |  6 +++---
 arch/powerpc/include/asm/cputime.h | 12 ++++++------
 arch/s390/include/asm/cputime.h    | 10 +++++-----
 include/asm-generic/cputime.h      |  6 +++---
 kernel/tsacct.c                    | 10 ++++------
 5 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 7fa8a8594660..6073b187528a 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -56,10 +56,10 @@ typedef u64 cputime64_t;
 #define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
 
 /*
- * Convert cputime <-> milliseconds
+ * Convert cputime <-> microseconds
  */
-#define cputime_to_msecs(__ct)		((__ct) / NSEC_PER_MSEC)
-#define msecs_to_cputime(__msecs)	((__msecs) * NSEC_PER_MSEC)
+#define cputime_to_usecs(__ct)		((__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	((__usecs) * NSEC_PER_USEC)
 
 /*
  * Convert cputime <-> seconds
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 8bdc6a9e5773..1cf20bdfbeca 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -124,23 +124,23 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 }
 
 /*
- * Convert cputime <-> milliseconds
+ * Convert cputime <-> microseconds
  */
 extern u64 __cputime_msec_factor;
 
-static inline unsigned long cputime_to_msecs(const cputime_t ct)
+static inline unsigned long cputime_to_usecs(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_msec_factor);
+	return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
 }
 
-static inline cputime_t msecs_to_cputime(const unsigned long ms)
+static inline cputime_t usecs_to_cputime(const unsigned long us)
 {
 	cputime_t ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
-	ct = ms % 1000;
-	sec = ms / 1000;
+	ct = us % 1000000;
+	sec = us / 1000000;
 	if (ct) {
 		ct *= tb_ticks_per_sec;
 		do_div(ct, 1000);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 8b1a52a137c5..40e2ab0fa3f0 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -73,18 +73,18 @@ cputime64_to_jiffies64(cputime64_t cputime)
 }
 
 /*
- * Convert cputime to milliseconds and back.
+ * Convert cputime to microseconds and back.
  */
 static inline unsigned int
-cputime_to_msecs(const cputime_t cputime)
+cputime_to_usecs(const cputime_t cputime)
 {
-	return cputime_div(cputime, 4096000);
+	return cputime_div(cputime, 4096);
 }
 
 static inline cputime_t
-msecs_to_cputime(const unsigned int m)
+usecs_to_cputime(const unsigned int m)
 {
-	return (cputime_t) m * 4096000;
+	return (cputime_t) m * 4096;
 }
 
 /*
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index ca0f239f0e13..2bcc5c7c22a6 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -33,10 +33,10 @@ typedef u64 cputime64_t;
 
 
 /*
- * Convert cputime to milliseconds and back.
+ * Convert cputime to microseconds and back.
  */
-#define cputime_to_msecs(__ct)		jiffies_to_msecs(__ct)
-#define msecs_to_cputime(__msecs)	msecs_to_jiffies(__msecs)
+#define cputime_to_usecs(__ct)		jiffies_to_usecs(__ct);
+#define usecs_to_cputime(__msecs)	usecs_to_jiffies(__msecs);
 
 /*
  * Convert cputime to seconds and back.
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 0a67e041edf8..24dc60d9fa1f 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 	stats->ac_ppid	 = pid_alive(tsk) ?
 				rcu_dereference(tsk->real_parent)->tgid : 0;
 	rcu_read_unlock();
-	stats->ac_utime	 = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
-	stats->ac_stime	 = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
-	stats->ac_utimescaled =
-		cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC;
-	stats->ac_stimescaled =
-		cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC;
+	stats->ac_utime = cputime_to_usecs(tsk->utime);
+	stats->ac_stime = cputime_to_usecs(tsk->stime);
+	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
+	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
 
-- 
cgit v1.2.3


From 144ecf310eb52d9df607b9b7eeb096743e232a96 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Wed, 27 Oct 2010 15:34:50 -0700
Subject: kfifo: fix kfifo_alloc() to return a signed int value

Add a new __kfifo_int_must_check_helper() helper function, which is needed
for kfifo_alloc() to return the right signed integer value.

The origin __kfifo_must_check_helper() helper was renamed into
__kfifo_uint_must_check_helper() to show the sign which is expected and
returned.

(And revert the temporary disabling of __kfifo_must_check_helper())

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c238ad2f82ea..10308c6a3d1c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -171,8 +171,17 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void);
 	}
 
 
-/* __kfifo_must_check_helper() is temporarily disabled because it was faulty */
-#define __kfifo_must_check_helper(x) (x)
+static inline unsigned int __must_check
+__kfifo_uint_must_check_helper(unsigned int val)
+{
+	return val;
+}
+
+static inline int __must_check
+__kfifo_int_must_check_helper(int val)
+{
+	return val;
+}
 
 /**
  * kfifo_initialized - Check if the fifo is initialized
@@ -264,7 +273,7 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void);
  * @fifo: address of the fifo to be used
  */
 #define	kfifo_avail(fifo) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmpq = (fifo); \
 	const size_t __recsize = sizeof(*__tmpq->rectype); \
@@ -297,7 +306,7 @@ __kfifo_must_check_helper( \
  * This function returns the size of the next fifo record in number of bytes.
  */
 #define kfifo_peek_len(fifo) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	const size_t __recsize = sizeof(*__tmp->rectype); \
@@ -320,7 +329,7 @@ __kfifo_must_check_helper( \
  * Return 0 if no error, otherwise an error code.
  */
 #define kfifo_alloc(fifo, size, gfp_mask) \
-__kfifo_must_check_helper( \
+__kfifo_int_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
@@ -416,7 +425,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_get(fifo, val) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	typeof((val) + 1) __val = (val); \
@@ -457,7 +466,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_peek(fifo, val) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	typeof((val) + 1) __val = (val); \
@@ -549,7 +558,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_out(fifo, buf, n) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	typeof((buf) + 1) __buf = (buf); \
@@ -577,7 +586,7 @@ __kfifo_must_check_helper( \
  * copied.
  */
 #define	kfifo_out_spinlocked(fifo, buf, n, lock) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	unsigned long __flags; \
 	unsigned int __ret; \
@@ -606,7 +615,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_from_user(fifo, from, len, copied) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	const void __user *__from = (from); \
@@ -634,7 +643,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_to_user(fifo, to, len, copied) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	void __user *__to = (to); \
@@ -761,7 +770,7 @@ __kfifo_must_check_helper( \
  * writer, you don't need extra locking to use these macro.
  */
 #define	kfifo_out_peek(fifo, buf, n) \
-__kfifo_must_check_helper( \
+__kfifo_uint_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	typeof((buf) + 1) __buf = (buf); \
-- 
cgit v1.2.3


From c3b92ce9e75f6353104fc7f8e32fb9fdb2550ad0 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Wed, 27 Oct 2010 15:34:52 -0700
Subject: ramoops: use the platform data structure instead of module params

As each board and system has different memory for ramoops.  It's better to
define the platform data instead of module params.

[akpm@linux-foundation.org: fix ramoops_remove() return type]
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/ramoops.c  | 30 ++++++++++++++++++++++++++++--
 include/linux/ramoops.h | 15 +++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/ramoops.h

(limited to 'include')

diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index 74f00b5ffa36..73dcb0ee41fd 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -25,6 +25,8 @@
 #include <linux/time.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/ramoops.h>
 
 #define RAMOOPS_KERNMSG_HDR "===="
 #define RAMOOPS_HEADER_SIZE   (5 + sizeof(struct timeval))
@@ -91,11 +93,17 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
 	cxt->count = (cxt->count + 1) % cxt->max_count;
 }
 
-static int __init ramoops_init(void)
+static int __init ramoops_probe(struct platform_device *pdev)
 {
+	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
 	struct ramoops_context *cxt = &oops_cxt;
 	int err = -EINVAL;
 
+	if (pdata) {
+		mem_size = pdata->mem_size;
+		mem_address = pdata->mem_address;
+	}
+
 	if (!mem_size) {
 		printk(KERN_ERR "ramoops: invalid size specification");
 		goto fail3;
@@ -142,7 +150,7 @@ fail3:
 	return err;
 }
 
-static void __exit ramoops_exit(void)
+static int __exit ramoops_remove(struct platform_device *pdev)
 {
 	struct ramoops_context *cxt = &oops_cxt;
 
@@ -151,8 +159,26 @@ static void __exit ramoops_exit(void)
 
 	iounmap(cxt->virt_addr);
 	release_mem_region(cxt->phys_addr, cxt->size);
+	return 0;
 }
 
+static struct platform_driver ramoops_driver = {
+	.remove		= __exit_p(ramoops_remove),
+	.driver		= {
+		.name	= "ramoops",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init ramoops_init(void)
+{
+	return platform_driver_probe(&ramoops_driver, ramoops_probe);
+}
+
+static void __exit ramoops_exit(void)
+{
+	platform_driver_unregister(&ramoops_driver);
+}
 
 module_init(ramoops_init);
 module_exit(ramoops_exit);
diff --git a/include/linux/ramoops.h b/include/linux/ramoops.h
new file mode 100644
index 000000000000..0ae68a2c1212
--- /dev/null
+++ b/include/linux/ramoops.h
@@ -0,0 +1,15 @@
+#ifndef __RAMOOPS_H
+#define __RAMOOPS_H
+
+/*
+ * Ramoops platform data
+ * @mem_size	memory size for ramoops
+ * @mem_address	physical memory address to contain ramoops
+ */
+
+struct ramoops_platform_data {
+	unsigned long	mem_size;
+	unsigned long	mem_address;
+};
+
+#endif
-- 
cgit v1.2.3


From 61d8e11e519ee7912ab59610fba1aaf08e3c1d84 Mon Sep 17 00:00:00 2001
From: Zimny Lech <napohybelskurwysynom2010@gmail.com>
Date: Wed, 27 Oct 2010 15:34:53 -0700
Subject: Remove duplicate includes from many files

Signed-off-by: Zimny Lech <napohybelskurwysynom2010@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-tegra/timer.c                    | 1 -
 arch/arm/plat-nomadik/include/plat/ste_dma40.h | 1 -
 arch/tile/kernel/setup.c                       | 2 --
 arch/x86/mm/init_64.c                          | 1 -
 arch/x86/xen/enlighten.c                       | 1 -
 drivers/media/IR/lirc_dev.c                    | 1 -
 drivers/platform/x86/intel_pmic_gpio.c         | 1 -
 include/linux/virtio_9p.h                      | 1 -
 kernel/trace/trace_kprobe.c                    | 1 -
 9 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c
index 2f420210d406..9057d6fd1d31 100644
--- a/arch/arm/mach-tegra/timer.c
+++ b/arch/arm/mach-tegra/timer.c
@@ -27,7 +27,6 @@
 #include <linux/io.h>
 #include <linux/cnt32_to_63.h>
 
-#include <asm/mach/time.h>
 #include <asm/mach/time.h>
 #include <asm/localtimer.h>
 
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 5fbde4b8dc12..93a812672d9a 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -14,7 +14,6 @@
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/dmaengine.h>
 
 /* dev types for memcpy */
 #define STEDMA40_DEV_DST_MEMORY (-1)
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index f3a50e74f9a4..ae51cad12da0 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -30,8 +30,6 @@
 #include <linux/timex.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
-#include <asm/sections.h>
-#include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 84346200e783..71a59296af80 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -51,7 +51,6 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 #include <asm/init.h>
-#include <linux/bootmem.h>
 
 static int __init parse_direct_gbpages_off(char *arg)
 {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 44ab12dc2a12..0cd12db0b142 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -59,7 +59,6 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/reboot.h>
-#include <asm/setup.h>
 #include <asm/stackprotector.h>
 #include <asm/hypervisor.h>
 
diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c
index 0acf6396e068..202581808bdc 100644
--- a/drivers/media/IR/lirc_dev.c
+++ b/drivers/media/IR/lirc_dev.c
@@ -27,7 +27,6 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/completion.h>
-#include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/wait.h>
 #include <linux/unistd.h>
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index f540ff96c53f..e61db9dfebef 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -29,7 +29,6 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-#include <linux/interrupt.h>
 #include <asm/intel_scu_ipc.h>
 #include <linux/device.h>
 #include <linux/intel_pmic_gpio.h>
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 1faa80d92f05..e68b439b2860 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
-#include <linux/types.h>
 
 /* The feature bitmap for virtio 9P */
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b8d2852baa4a..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
 #include <linux/perf_event.h>
 #include <linux/stringify.h>
 #include <linux/limits.h>
-#include <linux/uaccess.h>
 #include <asm/bitsperlong.h>
 
 #include "trace.h"
-- 
cgit v1.2.3


From 95aac7b1cd224f568fb83937044cd303ff11b029 Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <shawn.bohrer@gmail.com>
Date: Wed, 27 Oct 2010 15:34:54 -0700
Subject: epoll: make epoll_wait() use the hrtimer range feature

This make epoll use hrtimers for the timeout value which prevents
epoll_wait() from timing out up to a millisecond early.

This mirrors the behavior of select() and poll().

Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c       | 35 +++++++++++++++++++----------------
 fs/select.c          |  2 +-
 include/linux/poll.h |  2 ++
 3 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 256bb7bb102a..8cf07242067d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -77,9 +77,6 @@
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
-/* Maximum msec timeout value storeable in a long int */
-#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
-
 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 #define EP_UNACTIVE_PTR ((void *) -1L)
@@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep,
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		   int maxevents, long timeout)
 {
-	int res, eavail;
+	int res, eavail, timed_out = 0;
 	unsigned long flags;
-	long jtimeout;
+	long slack;
 	wait_queue_t wait;
-
-	/*
-	 * Calculate the timeout by checking for the "infinite" value (-1)
-	 * and the overflow condition. The passed timeout is in milliseconds,
-	 * that why (t * HZ) / 1000.
-	 */
-	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
-		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+	struct timespec end_time;
+	ktime_t expires, *to = NULL;
+
+	if (timeout > 0) {
+		ktime_get_ts(&end_time);
+		timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
+		slack = select_estimate_accuracy(&end_time);
+		to = &expires;
+		*to = timespec_to_ktime(end_time);
+	} else if (timeout == 0) {
+		timed_out = 1;
+	}
 
 retry:
 	spin_lock_irqsave(&ep->lock, flags);
@@ -1150,7 +1151,7 @@ retry:
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (!list_empty(&ep->rdllist) || !jtimeout)
+			if (!list_empty(&ep->rdllist) || timed_out)
 				break;
 			if (signal_pending(current)) {
 				res = -EINTR;
@@ -1158,7 +1159,9 @@ retry:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			jtimeout = schedule_timeout(jtimeout);
+			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+				timed_out = 1;
+
 			spin_lock_irqsave(&ep->lock, flags);
 		}
 		__remove_wait_queue(&ep->wq, &wait);
@@ -1176,7 +1179,7 @@ retry:
 	 * more luck.
 	 */
 	if (!res && eavail &&
-	    !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
+	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
 		goto retry;
 
 	return res;
diff --git a/fs/select.c b/fs/select.c
index 5f023f911202..b7b10aa30861 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
 	return slack;
 }
 
-static long select_estimate_accuracy(struct timespec *tv)
+long select_estimate_accuracy(struct timespec *tv)
 {
 	unsigned long ret;
 	struct timespec now;
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 600cc1fde64d..56e76af78102 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
 extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 				 ktime_t *expires, unsigned long slack);
+extern long select_estimate_accuracy(struct timespec *tv);
+
 
 static inline int poll_schedule(struct poll_wqueues *pwq, int state)
 {
-- 
cgit v1.2.3


From 39e3ac2599a5f9aba499b5f8af809108e70a6163 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Wed, 27 Oct 2010 21:25:12 -0400
Subject: jbd2: Fix I/O hang in jbd2_journal_release_jbd_inode

This fixes a hang seen in jbd2_journal_release_jbd_inode
on a lot of Power 6 systems running with ext4. When we get
in the hung state, all I/O to the disk in question gets blocked
where we stay indefinitely. Looking at the task list, I can see
we are stuck in jbd2_journal_release_jbd_inode waiting on a
wake up. I added some debug code to detect this scenario and
dump additional data if we were stuck in jbd2_journal_release_jbd_inode
for longer than 30 minutes. When it hit, I was able to see that
i_flags was 0, suggesting we missed the wake up.

This patch changes i_flags to be an unsigned long, uses bit operators
to access it, and adds barriers around the accesses. Prior to applying
this patch, we were regularly hitting this hang on numerous systems
in our test environment. After applying the patch, the hangs no longer
occur.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c     | 12 ++++++++----
 fs/jbd2/journal.c    |  4 +++-
 include/linux/jbd2.h |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7c068c189d80..6494c81e3b0a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/bitops.h>
 #include <trace/events/jbd2.h>
+#include <asm/system.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -236,7 +238,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 	spin_lock(&journal->j_list_lock);
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		mapping = jinode->i_vfs_inode->i_mapping;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
 		spin_unlock(&journal->j_list_lock);
 		/*
 		 * submit the inode data buffers. We use writepage
@@ -251,7 +253,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
 		commit_transaction->t_flushed_data_blocks = 1;
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+		smp_mb__after_clear_bit();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -272,7 +275,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 	/* For locking, see the comment in journal_submit_data_buffers() */
 	spin_lock(&journal->j_list_lock);
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
 		spin_unlock(&journal->j_list_lock);
 		err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
 		if (err) {
@@ -288,7 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 				ret = err;
 		}
 		spin_lock(&journal->j_list_lock);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+		smp_mb__after_clear_bit();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0e8014ea6b94..75e1b5a0bc2d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
 #include <linux/log2.h>
 #include <linux/vmalloc.h>
 #include <linux/backing-dev.h>
+#include <linux/bitops.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
+#include <asm/system.h>
 
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
@@ -2206,7 +2208,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
 restart:
 	spin_lock(&journal->j_list_lock);
 	/* Is commit writing out inode - we have to wait */
-	if (jinode->i_flags & JI_COMMIT_RUNNING) {
+	if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
 		wait_queue_head_t *wq;
 		DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
 		wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0b52924a0cb6..2ae86aa21fce 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -395,7 +395,7 @@ struct jbd2_inode {
 	struct inode *i_vfs_inode;
 
 	/* Flags of inode [j_list_lock] */
-	unsigned int i_flags;
+	unsigned long i_flags;
 };
 
 struct jbd2_revoke_table_s;
-- 
cgit v1.2.3


From b853fd364810a241050778124842a8c415c72a69 Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Wed, 27 Oct 2010 21:27:12 -0400
Subject: ext4: avoid null dereference in trace_ext4_mballoc_discard

ac->inode is set to null in function ext4_mb_release_group_pa(),
and then trace_ext4_mballoc_discard(ac) is called, the kernel
will panic.

BUG: unable to handle kernel NULL pointer dereference at 000000a4
IP: [<f87e1714>] ftrace_raw_event_ext4__mballoc+0x54/0xc0 [ext4]
*pdpt = 0000000000abd001 *pde = 0000000000000000
Oops: 0000 [#1] SMP

Pid: 550, comm: flush-8:16 Not tainted 2.6.36-rc1 #1 SE7320EP2/Altos G530
EIP: 0060:[<f87e1714>] EFLAGS: 00010206 CPU: 1
EIP is at ftrace_raw_event_ext4__mballoc+0x54/0xc0 [ext4]
EAX: f32ac840 EBX: f3f1cf88 ECX: f32ac840 EDX: 00000000
ESI: f32ac83c EDI: f880b9d8 EBP: 00000000 ESP: f4b77ae4
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process flush-8:16 (pid: 550, ti=f4b76000 task=f613e540 task.ti=f4b76000)
Call Trace:
 [<f87f5ac1>] ? ext4_mb_release_group_pa+0x121/0x150 [ext4]
 [<f87f8356>] ? ext4_mb_discard_group_preallocations+0x336/0x400 [ext4]
 [<f87fb7f1>] ? ext4_mb_new_blocks+0x3d1/0x4f0 [ext4]
 [<c05a6c5b>] ? __make_request+0x10b/0x440
 [<f87f1fb4>] ? ext4_ext_map_blocks+0x1334/0x1980 [ext4]
 [<c04ac78a>] ? rb_reserve_next_event+0xaa/0x3b0
 [<f87d18d6>] ? ext4_map_blocks+0xd6/0x1d0 [ext4]
 [<f87d2da7>] ? mpage_da_map_blocks+0xc7/0x8a0 [ext4]
 [<c04c8a68>] ? find_get_pages_tag+0x38/0x110
 [<c04d23a5>] ? __pagevec_release+0x15/0x20
 [<f87d3ca5>] ? ext4_da_writepages+0x2b5/0x5d0 [ext4]
 [<c04cfbe0>] ? __writepage+0x0/0x30
 [<c04d0e34>] ? do_writepages+0x14/0x30
 [<c0526600>] ? writeback_single_inode+0xa0/0x240
 [<c0526971>] ? writeback_sb_inodes+0xc1/0x180
 [<c0526ab8>] ? writeback_inodes_wb+0x88/0x140
 [<c0526d7b>] ? wb_writeback+0x20b/0x320
 [<c045aca7>] ? lock_timer_base+0x27/0x50
 [<c0526fe0>] ? wb_do_writeback+0x150/0x190
 [<c05270a8>] ? bdi_writeback_thread+0x88/0x1f0
 [<c043b680>] ? complete+0x40/0x60
 [<c0527020>] ? bdi_writeback_thread+0x0/0x1f0
 [<c0469474>] ? kthread+0x74/0x80
 [<c0469400>] ? kthread+0x0/0x80
 [<c040a23e>] ? kernel_thread_helper+0x6/0x10

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 01e9e0076a92..6a1fcff95f7c 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -796,8 +796,9 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_inode->i_sb->s_dev;
-		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->ino		= ac->ac_inode ?
+						ac->ac_inode->i_ino : 0;
 		__entry->result_logical	= ac->ac_b_ex.fe_logical;
 		__entry->result_start	= ac->ac_b_ex.fe_start;
 		__entry->result_group	= ac->ac_b_ex.fe_group;
-- 
cgit v1.2.3


From e6fa0be699449d28a20e815bfe9ce26725ec4962 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Wed, 27 Oct 2010 21:30:04 -0400
Subject: Add helper function for blkdev_issue_zeroout (sb_issue_discard)

This is done the same way as helper sb_issue_discard for
blkdev_issue_discard.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/blkdev.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c54906f678f..e5cb4d029689 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -941,6 +941,14 @@ static inline int sb_issue_discard(struct super_block *sb,
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
 				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
+static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
+		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
+{
+	return blkdev_issue_zeroout(sb->s_bdev,
+				    block << (sb->s_blocksize_bits - 9),
+				    nr_blocks << (sb->s_blocksize_bits - 9),
+				    gfp_mask, flags);
+}
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 
-- 
cgit v1.2.3


From 4d5476164a052e80d4ef430e368e76dbde96801f Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 27 Oct 2010 21:30:07 -0400
Subject: ext4: fix oops in trace_ext4_mb_release_group_pa

Our QA reported an oops in the ext4_mb_release_group_pa tracing,
and Josef Bacik pointed out that it was because we may have a
non-null but uninitialized ac_inode in the allocation context.

I can reproduce it when running xfstests with ext4 tracepoints on,
on a CONFIG_SLAB_DEBUG kernel.

We call trace_ext4_mb_release_group_pa from 2 places,
ext4_mb_discard_group_preallocations and
ext4_mb_discard_lg_preallocations

In both cases we allocate an ac as a container just for tracing (!)
and never fill in the ac_inode.  There's no reason to be assigning,
testing, or printing it as far as I can see, so just remove it from
the tracepoint.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Josef Bacik <josef@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6a1fcff95f7c..fbdfa3a6bbd8 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -432,7 +432,6 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
 		__field(	__u64,	pa_pstart		)
 		__field(	__u32,	pa_len			)
 
@@ -440,8 +439,6 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->ino		= (ac && ac->ac_inode) ?
-						ac->ac_inode->i_ino : 0;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
-- 
cgit v1.2.3


From 3e1e5f501632460184a98237d5460c521510535e Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 27 Oct 2010 21:30:07 -0400
Subject: ext4: don't use ext4_allocation_contexts for tracing

Many tracepoints were populating an ext4_allocation_context
to pass in, but this requires a slab allocation even when
tracepoints are off.  In fact, 4 of 5 of these allocations
were only for tracing.  In addition, we were only using a
small fraction of the 144 bytes of this structure for this
purpose.

We can do away with all these alloc/frees of the ac and
simply pass in the bits we care about, instead.

I tested this by turning on tracing and running through
xfstests on x86_64.  I did not actually do anything with
the trace output, however.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c           | 81 +++++++--------------------------------------
 include/trace/events/ext4.h | 51 ++++++++++++++++------------
 2 files changed, 41 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ccdfec6acb75..3f62df50f483 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3591,8 +3591,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  */
 static noinline_for_stack int
 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-			struct ext4_prealloc_space *pa,
-			struct ext4_allocation_context *ac)
+			struct ext4_prealloc_space *pa)
 {
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3610,11 +3609,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	end = bit + pa->pa_len;
 
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = pa->pa_inode;
-	}
-
 	while (bit < end) {
 		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
 		if (bit >= end)
@@ -3625,16 +3619,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			 (unsigned) next - bit, (unsigned) group);
 		free += next - bit;
 
-		if (ac) {
-			ac->ac_b_ex.fe_group = group;
-			ac->ac_b_ex.fe_start = bit;
-			ac->ac_b_ex.fe_len = next - bit;
-			ac->ac_b_ex.fe_logical = 0;
-			trace_ext4_mballoc_discard(ac);
-		}
-
-		trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
-					       next - bit);
+		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+		trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+					       grp_blk_start + bit, next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
 	}
@@ -3657,29 +3644,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 
 static noinline_for_stack int
 ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-				struct ext4_prealloc_space *pa,
-				struct ext4_allocation_context *ac)
+				struct ext4_prealloc_space *pa)
 {
 	struct super_block *sb = e4b->bd_sb;
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	trace_ext4_mb_release_group_pa(sb, ac, pa);
+	trace_ext4_mb_release_group_pa(sb, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
 	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = NULL;
-		ac->ac_b_ex.fe_group = group;
-		ac->ac_b_ex.fe_start = bit;
-		ac->ac_b_ex.fe_len = pa->pa_len;
-		ac->ac_b_ex.fe_logical = 0;
-		trace_ext4_mballoc_discard(ac);
-	}
+	trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
 
 	return 0;
 }
@@ -3700,7 +3677,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	struct list_head list;
 	struct ext4_buddy e4b;
 	int err;
@@ -3729,9 +3705,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
 
 	INIT_LIST_HEAD(&list);
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac)
-		ac->ac_sb = sb;
 repeat:
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
@@ -3786,9 +3759,9 @@ repeat:
 		spin_unlock(pa->pa_obj_lock);
 
 		if (pa->pa_type == MB_GROUP_PA)
-			ext4_mb_release_group_pa(&e4b, pa, ac);
+			ext4_mb_release_group_pa(&e4b, pa);
 		else
-			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3796,8 +3769,6 @@ repeat:
 
 out:
 	ext4_unlock_group(sb, group);
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 	ext4_mb_unload_buddy(&e4b);
 	put_bh(bitmap_bh);
 	return free;
@@ -3818,7 +3789,6 @@ void ext4_discard_preallocations(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	ext4_group_t group = 0;
 	struct list_head list;
 	struct ext4_buddy e4b;
@@ -3834,11 +3804,6 @@ void ext4_discard_preallocations(struct inode *inode)
 
 	INIT_LIST_HEAD(&list);
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = inode;
-	}
 repeat:
 	/* first, collect all pa's in the inode */
 	spin_lock(&ei->i_prealloc_lock);
@@ -3908,7 +3873,7 @@ repeat:
 
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
-		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 		ext4_unlock_group(sb, group);
 
 		ext4_mb_unload_buddy(&e4b);
@@ -3917,8 +3882,6 @@ repeat:
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4116,14 +4079,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 	struct ext4_buddy e4b;
 	struct list_head discard_list;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 
 	mb_debug(1, "discard locality group preallocation\n");
 
 	INIT_LIST_HEAD(&discard_list);
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac)
-		ac->ac_sb = sb;
 
 	spin_lock(&lg->lg_prealloc_lock);
 	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4175,15 +4134,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 		}
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
-		ext4_mb_release_group_pa(&e4b, pa, ac);
+		ext4_mb_release_group_pa(&e4b, pa);
 		ext4_unlock_group(sb, group);
 
 		ext4_mb_unload_buddy(&e4b);
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4547,7 +4504,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct super_block *sb = inode->i_sb;
-	struct ext4_allocation_context *ac = NULL;
 	struct ext4_group_desc *gdp;
 	unsigned long freed = 0;
 	unsigned int overflow;
@@ -4602,12 +4558,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	if (!ext4_should_writeback_data(inode))
 		flags |= EXT4_FREE_BLOCKS_METADATA;
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac) {
-		ac->ac_inode = inode;
-		ac->ac_sb = sb;
-	}
-
 do_more:
 	overflow = 0;
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4665,12 +4615,7 @@ do_more:
 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
 	}
 #endif
-	if (ac) {
-		ac->ac_b_ex.fe_group = block_group;
-		ac->ac_b_ex.fe_start = bit;
-		ac->ac_b_ex.fe_len = count;
-		trace_ext4_mballoc_free(ac);
-	}
+	trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
@@ -4741,7 +4686,5 @@ error_return:
 		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 	return;
 }
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index fbdfa3a6bbd8..b5f4938d612d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -396,11 +396,11 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
 	TP_PROTO(struct super_block *sb,
-		 struct ext4_allocation_context *ac,
+		 struct inode *inode,
 		 struct ext4_prealloc_space *pa,
 		 unsigned long long block, unsigned int count),
 
-	TP_ARGS(sb, ac, pa, block, count),
+	TP_ARGS(sb, inode, pa, block, count),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -412,8 +412,7 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->ino		= (ac && ac->ac_inode) ? 
-						ac->ac_inode->i_ino : 0;
+		__entry->ino		= inode->i_ino;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
@@ -425,10 +424,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 
 TRACE_EVENT(ext4_mb_release_group_pa,
 	TP_PROTO(struct super_block *sb,
-		 struct ext4_allocation_context *ac,
 		 struct ext4_prealloc_space *pa),
 
-	TP_ARGS(sb, ac, pa),
+	TP_ARGS(sb, pa),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -779,47 +777,56 @@ TRACE_EVENT(ext4_mballoc_prealloc,
 );
 
 DECLARE_EVENT_CLASS(ext4__mballoc,
-	TP_PROTO(struct ext4_allocation_context *ac),
+	TP_PROTO(struct super_block *sb,
+		 struct inode *inode,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
 
-	TP_ARGS(ac),
+	TP_ARGS(sb, inode, group, start, len),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
-		__field(	__u32, 	result_logical		)
 		__field(	  int,	result_start		)
 		__field(	__u32, 	result_group		)
 		__field(	  int,	result_len		)
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_sb->s_dev;
-		__entry->ino		= ac->ac_inode ?
-						ac->ac_inode->i_ino : 0;
-		__entry->result_logical	= ac->ac_b_ex.fe_logical;
-		__entry->result_start	= ac->ac_b_ex.fe_start;
-		__entry->result_group	= ac->ac_b_ex.fe_group;
-		__entry->result_len	= ac->ac_b_ex.fe_len;
+		__entry->dev		= sb->s_dev;
+		__entry->ino		= inode ? inode->i_ino : 0;
+		__entry->result_start	= start;
+		__entry->result_group	= group;
+		__entry->result_len	= len;
 	),
 
-	TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
+	TP_printk("dev %s inode %lu extent %u/%d/%u ",
 		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
 		  __entry->result_group, __entry->result_start,
-		  __entry->result_len, __entry->result_logical)
+		  __entry->result_len)
 );
 
 DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
 
-	TP_PROTO(struct ext4_allocation_context *ac),
+	TP_PROTO(struct super_block *sb,
+		 struct inode *inode,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
 
-	TP_ARGS(ac)
+	TP_ARGS(sb, inode, group, start, len)
 );
 
 DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
 
-	TP_PROTO(struct ext4_allocation_context *ac),
+	TP_PROTO(struct super_block *sb,
+		 struct inode *inode,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
 
-	TP_ARGS(ac)
+	TP_ARGS(sb, inode, group, start, len)
 );
 
 TRACE_EVENT(ext4_forget,
-- 
cgit v1.2.3


From 367a51a339020ba4d9edb0ce0f21d65bd50b00c9 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Wed, 27 Oct 2010 21:30:11 -0400
Subject: fs: Add FITRIM ioctl

Adds an filesystem independent ioctl to allow implementation of file
system batched discard support. I takes fstrim_range structure as an
argument. fstrim_range is definec in the include/fs.h and its
definition is as follows.

struct fstrim_range {
	start;
	len;
	minlen;
}

start	- first Byte to trim
len	- number of Bytes to trim from start
minlen	- minimum extent length to trim, free extents shorter than this
	  number of Bytes will be ignored. This will be rounded up to fs
	  block size.

It is also possible to specify NULL as an argument. In this case the
arguments will set itself as follows:

start = 0;
len = ULLONG_MAX;
minlen = 0;

So it will trim the whole file system at one run.

After the FITRIM is done, the number of actually discarded Bytes is stored
in fstrim_range.len to give the user better insight on how much storage
space has been really released for wear-leveling.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ioctl.c         | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  8 ++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc888..e92fdbb3bc3a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
 	return thaw_super(sb);
 }
 
+static int ioctl_fstrim(struct file *filp, void __user *argp)
+{
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct fstrim_range range;
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* If filesystem doesn't support trim feature, return. */
+	if (sb->s_op->trim_fs == NULL)
+		return -EOPNOTSUPP;
+
+	/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
+	if (sb->s_bdev == NULL)
+		return -EINVAL;
+
+	if (argp == NULL) {
+		range.start = 0;
+		range.len = ULLONG_MAX;
+		range.minlen = 0;
+	} else if (copy_from_user(&range, argp, sizeof(range)))
+		return -EFAULT;
+
+	ret = sb->s_op->trim_fs(sb, &range);
+	if (ret < 0)
+		return ret;
+
+	if ((argp != NULL) &&
+	    (copy_to_user(argp, &range, sizeof(range))))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		error = ioctl_fsthaw(filp);
 		break;
 
+	case FITRIM:
+		error = ioctl_fstrim(filp, argp);
+		break;
+
 	case FS_IOC_FIEMAP:
 		return ioctl_fiemap(filp, arg);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069bd80b7..7008268e9b5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -32,6 +32,12 @@
 #define SEEK_END	2	/* seek relative to end of file */
 #define SEEK_MAX	SEEK_END
 
+struct fstrim_range {
+	uint64_t start;
+	uint64_t len;
+	uint64_t minlen;
+};
+
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
 	int nr_files;		/* read only */
@@ -316,6 +322,7 @@ struct inodes_stat_t {
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -1581,6 +1588,7 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+	int (*trim_fs) (struct super_block *, struct fstrim_range *);
 };
 
 /*
-- 
cgit v1.2.3


From 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 27 Oct 2010 21:30:13 -0400
Subject: ext4: implement writeback livelock avoidance using page tagging

This is analogous to Jan Kara's commit,
f446daaea9d4a420d16c606f755f3689dcb2d0ce
mm: implement writeback livelock avoidance using page tagging

but since we forked write_cache_pages, we need to reimplement
it there (and in ext4_da_writepages, since range_cyclic handling
was moved to there)

If you start a large buffered IO to a file, and then set
fsync after it, you'll find that fsync does not complete
until the other IO stops.

If you continue re-dirtying the file (say, putting dd
with conv=notrunc in a loop), when fsync finally completes
(after all IO is done), it reports via tracing that
it has written many more pages than the file contains;
in other words it has synced and re-synced pages in
the file multiple times.

This then leads to problems with our writeback_index
update, since it advances it by pages written, and
essentially sets writeback_index off the end of the
file...

With the following patch, we only sync as much as was
dirty at the time of the sync.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c           | 18 +++++++++++++++---
 include/linux/writeback.h |  2 ++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6671fcbb5293..c9ea95ba5fde 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	long nr_to_write = wbc->nr_to_write;
+	int tag;
 
 	pagevec_init(&pvec, 0);
 	index = wbc->range_start >> PAGE_CACHE_SHIFT;
 	end = wbc->range_end >> PAGE_CACHE_SHIFT;
 
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag = PAGECACHE_TAG_TOWRITE;
+	else
+		tag = PAGECACHE_TAG_DIRTY;
+
 	while (!done && (index <= end)) {
 		int i;
 
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			      PAGECACHE_TAG_DIRTY,
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
 			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 		if (nr_pages == 0)
 			break;
@@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping,
 	long desired_nr_to_write, nr_to_writebump = 0;
 	loff_t range_start = wbc->range_start;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+	pgoff_t end;
 
 	trace_ext4_da_writepages(inode, wbc);
 
@@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping,
 		wbc->range_start = index << PAGE_CACHE_SHIFT;
 		wbc->range_end  = LLONG_MAX;
 		wbc->range_cyclic = 0;
-	} else
+		end = -1;
+	} else {
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+	}
 
 	/*
 	 * This works around two forms of stupidity.  The first is in
@@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping,
 	pages_skipped = wbc->pages_skipped;
 
 retry:
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag_pages_for_writeback(mapping, index, end);
+
 	while (!ret && wbc->nr_to_write > 0) {
 
 		/*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..3d132bfb4f3d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 
 int generic_writepages(struct address_space *mapping,
 		       struct writeback_control *wbc);
+void tag_pages_for_writeback(struct address_space *mapping,
+			     pgoff_t start, pgoff_t end);
 int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data);
-- 
cgit v1.2.3


From 7f93cff90fa9be6ed45f6189e136153d1d8631b0 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Oct 2010 21:30:13 -0400
Subject: ext4: fix kernel oops if the journal superblock has a non-zero
 j_errno

Commit 84061e0 fixed an accounting bug only to introduce the
possibility of a kernel OOPS if the journal has a non-zero j_errno
field indicating that the file system had detected a fs inconsistency.
After the journal replay, if the journal superblock indicates that the
file system has an error, this indication is transfered to the file
system and then ext4_commit_super() is called to write this to the
disk.

But since the percpu counters are now initialized after the journal
replay, the call to ext4_commit_super() will cause a kernel oops since
it needs to use the percpu counters the ext4 superblock structure.

The fix is to skip setting the ext4 free block and free inode fields
if the percpu counter has not been set.

Thanks to Ken Sumrall for reporting and analyzing the root causes of
this bug.

Addresses-Google-Bug: #3054080

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c                |  7 +++++--
 include/linux/percpu_counter.h | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9ce3b67b7269..c9e06c647ce8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3964,9 +3964,12 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	else
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
+		ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
 					&EXT4_SB(sb)->s_freeblocks_counter));
-	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+		es->s_free_inodes_count =
+			cpu_to_le32(percpu_counter_sum_positive(
 					&EXT4_SB(sb)->s_freeinodes_counter));
 	sb->s_dirt = 0;
 	BUFFER_TRACE(sbh, "marking dirty");
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 8a7d510ffa9c..46f6ba56fa91 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -78,6 +78,11 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
 	return 1;
 }
 
+static inline int percpu_counter_initialized(struct percpu_counter *fbc)
+{
+	return (fbc->counters != NULL);
+}
+
 #else
 
 struct percpu_counter {
@@ -143,6 +148,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 	return percpu_counter_read(fbc);
 }
 
+static inline int percpu_counter_initialized(struct percpu_counter *fbc)
+{
+	return 1;
+}
+
 #endif	/* CONFIG_SMP */
 
 static inline void percpu_counter_inc(struct percpu_counter *fbc)
-- 
cgit v1.2.3


From a269029d0e2192046be4c07ed78a45022469ee4c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Oct 2010 22:08:50 -0400
Subject: ext4,jbd2: convert tracepoints to use major/minor numbers

Unfortunately perf can't deal with anything other than direct structure
accesses in the TP_printk() section.  It will drop dead when it sees
jbd2_dev_to_name() in the "print fmt" section of the tracepoint.

Addresses-Google-Bug: 3138508

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 329 +++++++++++++++++++++++++++-----------------
 include/trace/events/jbd2.h |  78 ++++++-----
 2 files changed, 251 insertions(+), 156 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b5f4938d612d..8f59db107bbb 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -21,7 +21,8 @@ TRACE_EVENT(ext4_free_inode,
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	uid_t,	uid			)
@@ -30,7 +31,8 @@ TRACE_EVENT(ext4_free_inode,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->uid	= inode->i_uid;
@@ -38,9 +40,10 @@ TRACE_EVENT(ext4_free_inode,
 		__entry->blocks	= inode->i_blocks;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->mode, __entry->uid, __entry->gid,
+	TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->mode,
+		  __entry->uid, __entry->gid,
 		  (unsigned long long) __entry->blocks)
 );
 
@@ -50,20 +53,22 @@ TRACE_EVENT(ext4_request_inode,
 	TP_ARGS(dir, mode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	dir			)
 		__field(	umode_t, mode			)
 	),
 
 	TP_fast_assign(
-		__entry->dev	= dir->i_sb->s_dev;
+		__entry->dev_major = MAJOR(dir->i_sb->s_dev);
+		__entry->dev_minor = MINOR(dir->i_sb->s_dev);
 		__entry->dir	= dir->i_ino;
 		__entry->mode	= mode;
 	),
 
-	TP_printk("dev %s dir %lu mode 0%o",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
-		  __entry->mode)
+	TP_printk("dev %d,%d dir %lu mode 0%o",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->dir, __entry->mode)
 );
 
 TRACE_EVENT(ext4_allocate_inode,
@@ -72,21 +77,24 @@ TRACE_EVENT(ext4_allocate_inode,
 	TP_ARGS(inode, dir, mode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	dir			)
 		__field(	umode_t, mode			)
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->dir	= dir->i_ino;
 		__entry->mode	= mode;
 	),
 
-	TP_printk("dev %s ino %lu dir %lu mode 0%o",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->dir, __entry->mode)
 );
 
@@ -98,7 +106,8 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
 	TP_ARGS(inode, pos, len, flags),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	loff_t,	pos			)
 		__field(	unsigned int, len		)
@@ -106,15 +115,17 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->pos	= pos;
 		__entry->len	= len;
 		__entry->flags	= flags;
 	),
 
-	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->flags)
 );
 
@@ -141,7 +152,8 @@ DECLARE_EVENT_CLASS(ext4__write_end,
 	TP_ARGS(inode, pos, len, copied),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	loff_t,	pos			)
 		__field(	unsigned int, len		)
@@ -149,16 +161,18 @@ DECLARE_EVENT_CLASS(ext4__write_end,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->pos	= pos;
 		__entry->len	= len;
 		__entry->copied	= copied;
 	),
 
-	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pos, __entry->len, __entry->copied)
+	TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->pos,
+		  __entry->len, __entry->copied)
 );
 
 DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@@ -199,21 +213,23 @@ TRACE_EVENT(ext4_writepage,
 	TP_ARGS(inode, page),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	pgoff_t, index			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->index	= page->index;
 	),
 
-	TP_printk("dev %s ino %lu page_index %lu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->index)
+	TP_printk("dev %d,%d ino %lu page_index %lu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->index)
 );
 
 TRACE_EVENT(ext4_da_writepages,
@@ -222,7 +238,8 @@ TRACE_EVENT(ext4_da_writepages,
 	TP_ARGS(inode, wbc),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	long,	nr_to_write		)
 		__field(	long,	pages_skipped		)
@@ -236,7 +253,8 @@ TRACE_EVENT(ext4_da_writepages,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
 		__entry->ino		= inode->i_ino;
 		__entry->nr_to_write	= wbc->nr_to_write;
 		__entry->pages_skipped	= wbc->pages_skipped;
@@ -249,8 +267,8 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
-		  jbd2_dev_to_name(__entry->dev),
+	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
+		  __entry->dev_major, __entry->dev_minor,
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
@@ -265,7 +283,8 @@ TRACE_EVENT(ext4_da_write_pages,
 	TP_ARGS(inode, mpd),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u64,	b_blocknr		)
 		__field(	__u32,	b_size			)
@@ -276,7 +295,8 @@ TRACE_EVENT(ext4_da_write_pages,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
 		__entry->ino		= inode->i_ino;
 		__entry->b_blocknr	= mpd->b_blocknr;
 		__entry->b_size		= mpd->b_size;
@@ -286,8 +306,9 @@ TRACE_EVENT(ext4_da_write_pages,
 		__entry->pages_written	= mpd->pages_written;
 	),
 
-	TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->b_blocknr, __entry->b_size,
 		  __entry->b_state, __entry->first_page,
 		  __entry->io_done, __entry->pages_written)
@@ -300,7 +321,8 @@ TRACE_EVENT(ext4_da_writepages_result,
 	TP_ARGS(inode, wbc, ret, pages_written),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	int,	ret			)
 		__field(	int,	pages_written		)
@@ -310,7 +332,8 @@ TRACE_EVENT(ext4_da_writepages_result,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
 		__entry->ino		= inode->i_ino;
 		__entry->ret		= ret;
 		__entry->pages_written	= pages_written;
@@ -319,8 +342,8 @@ TRACE_EVENT(ext4_da_writepages_result,
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
-		  jbd2_dev_to_name(__entry->dev),
+	TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
+		  __entry->dev_major, __entry->dev_minor,
 		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
 		  __entry->more_io,
@@ -334,20 +357,23 @@ TRACE_EVENT(ext4_discard_blocks,
 	TP_ARGS(sb, blk, count),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	__u64,	blk			)
 		__field(	__u64,	count			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev_major = MAJOR(sb->s_dev);
+		__entry->dev_minor = MINOR(sb->s_dev);
 		__entry->blk	= blk;
 		__entry->count	= count;
 	),
 
-	TP_printk("dev %s blk %llu count %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
+	TP_printk("dev %d,%d blk %llu count %llu",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->blk, __entry->count)
 );
 
 DECLARE_EVENT_CLASS(ext4__mb_new_pa,
@@ -357,7 +383,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
 	TP_ARGS(ac, pa),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u64,	pa_pstart		)
 		__field(	__u32,	pa_len			)
@@ -366,16 +393,18 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->dev_major	= MAJOR(ac->ac_sb->s_dev);
+		__entry->dev_minor	= MINOR(ac->ac_sb->s_dev);
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 		__entry->pa_lstart	= pa->pa_lstart;
 	),
 
-	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
+	TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->pa_pstart,
+		  __entry->pa_len, __entry->pa_lstart)
 );
 
 DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@@ -403,7 +432,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	TP_ARGS(sb, inode, pa, block, count),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u64,	block			)
 		__field(	__u32,	count			)
@@ -411,15 +441,16 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
+		__entry->dev_major	= MAJOR(sb->s_dev);
+		__entry->dev_minor	= MINOR(sb->s_dev);
 		__entry->ino		= inode->i_ino;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
 
-	TP_printk("dev %s ino %lu block %llu count %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->block, __entry->count)
+	TP_printk("dev %d,%d ino %lu block %llu count %u",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->block, __entry->count)
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
@@ -429,20 +460,23 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 	TP_ARGS(sb, pa),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	__u64,	pa_pstart		)
 		__field(	__u32,	pa_len			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
+		__entry->dev_major	= MAJOR(sb->s_dev);
+		__entry->dev_minor	= MINOR(sb->s_dev);
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
 
-	TP_printk("dev %s pstart %llu len %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
+	TP_printk("dev %d,%d pstart %llu len %u",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->pa_pstart, __entry->pa_len)
 );
 
 TRACE_EVENT(ext4_discard_preallocations,
@@ -451,18 +485,21 @@ TRACE_EVENT(ext4_discard_preallocations,
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 	),
 
-	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
+	TP_printk("dev %d,%d ino %lu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(ext4_mb_discard_preallocations,
@@ -471,18 +508,20 @@ TRACE_EVENT(ext4_mb_discard_preallocations,
 	TP_ARGS(sb, needed),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	int,	needed			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev_major = MAJOR(sb->s_dev);
+		__entry->dev_minor = MINOR(sb->s_dev);
 		__entry->needed	= needed;
 	),
 
-	TP_printk("dev %s needed %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->needed)
+	TP_printk("dev %d,%d needed %d",
+		  __entry->dev_major, __entry->dev_minor, __entry->needed)
 );
 
 TRACE_EVENT(ext4_request_blocks,
@@ -491,7 +530,8 @@ TRACE_EVENT(ext4_request_blocks,
 	TP_ARGS(ar),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
@@ -504,7 +544,8 @@ TRACE_EVENT(ext4_request_blocks,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= ar->inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
 		__entry->ino	= ar->inode->i_ino;
 		__entry->flags	= ar->flags;
 		__entry->len	= ar->len;
@@ -516,8 +557,9 @@ TRACE_EVENT(ext4_request_blocks,
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->flags, __entry->len,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
@@ -533,7 +575,8 @@ TRACE_EVENT(ext4_allocate_blocks,
 	TP_ARGS(ar, block),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u64,	block			)
 		__field(	unsigned int, flags		)
@@ -547,7 +590,8 @@ TRACE_EVENT(ext4_allocate_blocks,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= ar->inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
 		__entry->ino	= ar->inode->i_ino;
 		__entry->block	= block;
 		__entry->flags	= ar->flags;
@@ -560,9 +604,10 @@ TRACE_EVENT(ext4_allocate_blocks,
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->flags, __entry->len, __entry->block,
+	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->flags,
+		  __entry->len, __entry->block,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -578,7 +623,8 @@ TRACE_EVENT(ext4_free_blocks,
 	TP_ARGS(inode, block, count, flags),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(      umode_t, mode			)
 		__field(	__u64,	block			)
@@ -587,7 +633,8 @@ TRACE_EVENT(ext4_free_blocks,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
 		__entry->ino		= inode->i_ino;
 		__entry->mode		= inode->i_mode;
 		__entry->block		= block;
@@ -595,8 +642,9 @@ TRACE_EVENT(ext4_free_blocks,
 		__entry->flags		= flags;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->block, __entry->count,
 		  __entry->flags)
 );
@@ -607,7 +655,8 @@ TRACE_EVENT(ext4_sync_file,
 	TP_ARGS(file, datasync),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	parent			)
 		__field(	int,	datasync		)
@@ -616,14 +665,16 @@ TRACE_EVENT(ext4_sync_file,
 	TP_fast_assign(
 		struct dentry *dentry = file->f_path.dentry;
 
-		__entry->dev		= dentry->d_inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(dentry->d_inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(dentry->d_inode->i_sb->s_dev);
 		__entry->ino		= dentry->d_inode->i_ino;
 		__entry->datasync	= datasync;
 		__entry->parent		= dentry->d_parent->d_inode->i_ino;
 	),
 
-	TP_printk("dev %s ino %ld parent %ld datasync %d ",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->parent, __entry->datasync)
 );
 
@@ -633,18 +684,20 @@ TRACE_EVENT(ext4_sync_fs,
 	TP_ARGS(sb, wait),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	int,	wait			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev_major = MAJOR(sb->s_dev);
+		__entry->dev_minor = MINOR(sb->s_dev);
 		__entry->wait	= wait;
 	),
 
-	TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
-		  __entry->wait)
+	TP_printk("dev %d,%d wait %d", __entry->dev_major,
+		  __entry->dev_minor, __entry->wait)
 );
 
 TRACE_EVENT(ext4_alloc_da_blocks,
@@ -653,21 +706,24 @@ TRACE_EVENT(ext4_alloc_da_blocks,
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field( unsigned int,	data_blocks	)
 		__field( unsigned int,	meta_blocks	)
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
 		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
-	TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->data_blocks, __entry->meta_blocks)
 );
 
@@ -677,7 +733,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
 	TP_ARGS(ac),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u16,	found			)
 		__field(	__u16,	groups			)
@@ -700,7 +757,8 @@ TRACE_EVENT(ext4_mballoc_alloc,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(ac->ac_inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(ac->ac_inode->i_sb->s_dev);
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->found		= ac->ac_found;
 		__entry->flags		= ac->ac_flags;
@@ -722,10 +780,11 @@ TRACE_EVENT(ext4_mballoc_alloc,
 		__entry->result_len	= ac->ac_f_ex.fe_len;
 	),
 
-	TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
+	TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
 		  "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
 		  "tail %u broken %u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->orig_group, __entry->orig_start,
 		  __entry->orig_len, __entry->orig_logical,
 		  __entry->goal_group, __entry->goal_start,
@@ -743,7 +802,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
 	TP_ARGS(ac),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	__u32, 	orig_logical		)
 		__field(	  int,	orig_start		)
@@ -756,7 +816,8 @@ TRACE_EVENT(ext4_mballoc_prealloc,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->dev_major	= MAJOR(ac->ac_inode->i_sb->s_dev);
+		__entry->dev_minor	= MINOR(ac->ac_inode->i_sb->s_dev);
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
 		__entry->orig_start	= ac->ac_o_ex.fe_start;
@@ -768,8 +829,9 @@ TRACE_EVENT(ext4_mballoc_prealloc,
 		__entry->result_len	= ac->ac_b_ex.fe_len;
 	),
 
-	TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->orig_group, __entry->orig_start,
 		  __entry->orig_len, __entry->orig_logical,
 		  __entry->result_group, __entry->result_start,
@@ -786,7 +848,8 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
 	TP_ARGS(sb, inode, group, start, len),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	  int,	result_start		)
 		__field(	__u32, 	result_group		)
@@ -794,15 +857,17 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
+		__entry->dev_major	= MAJOR(sb->s_dev);
+		__entry->dev_minor	= MINOR(sb->s_dev);
 		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->result_start	= start;
 		__entry->result_group	= group;
 		__entry->result_len	= len;
 	),
 
-	TP_printk("dev %s inode %lu extent %u/%d/%u ",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->result_group, __entry->result_start,
 		  __entry->result_len)
 );
@@ -835,7 +900,8 @@ TRACE_EVENT(ext4_forget,
 	TP_ARGS(inode, is_metadata, block),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	int,	is_metadata		)
@@ -843,16 +909,18 @@ TRACE_EVENT(ext4_forget,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->is_metadata = is_metadata;
 		__entry->block	= block;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->mode, __entry->is_metadata, __entry->block)
+	TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->mode,
+		  __entry->is_metadata, __entry->block)
 );
 
 TRACE_EVENT(ext4_da_update_reserve_space,
@@ -861,7 +929,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 	TP_ARGS(inode, used_blocks),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -872,7 +941,8 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -882,9 +952,10 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-		  __entry->mode,  (unsigned long long) __entry->i_blocks,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino, __entry->mode,
+		  (unsigned long long) __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -895,7 +966,8 @@ TRACE_EVENT(ext4_da_reserve_space,
 	TP_ARGS(inode, md_needed),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -905,7 +977,8 @@ TRACE_EVENT(ext4_da_reserve_space,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -914,8 +987,9 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->mode, (unsigned long long) __entry->i_blocks,
 		  __entry->md_needed, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks)
@@ -927,7 +1001,8 @@ TRACE_EVENT(ext4_da_release_space,
 	TP_ARGS(inode, freed_blocks),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -938,7 +1013,8 @@ TRACE_EVENT(ext4_da_release_space,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -948,8 +1024,9 @@ TRACE_EVENT(ext4_da_release_space,
 		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 	),
 
-	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino,
 		  __entry->mode, (unsigned long long) __entry->i_blocks,
 		  __entry->freed_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
@@ -961,18 +1038,20 @@ DECLARE_EVENT_CLASS(ext4__bitmap_load,
 	TP_ARGS(sb, group),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	__u32,	group			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev_major = MAJOR(sb->s_dev);
+		__entry->dev_minor = MINOR(sb->s_dev);
 		__entry->group	= group;
 	),
 
-	TP_printk("dev %s group %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->group)
+	TP_printk("dev %d,%d group %u",
+		  __entry->dev_major, __entry->dev_minor, __entry->group)
 );
 
 DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index bf16545cc977..7447ea9305b5 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -17,17 +17,19 @@ TRACE_EVENT(jbd2_checkpoint,
 	TP_ARGS(journal, result),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,	dev_major		)
+		__field(	int,	dev_minor		)
 		__field(	int,	result			)
 	),
 
 	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
+		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
 		__entry->result		= result;
 	),
 
-	TP_printk("dev %s result %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->result)
+	TP_printk("dev %d,%d result %d",
+		  __entry->dev_major, __entry->dev_minor, __entry->result)
 );
 
 DECLARE_EVENT_CLASS(jbd2_commit,
@@ -37,20 +39,22 @@ DECLARE_EVENT_CLASS(jbd2_commit,
 	TP_ARGS(journal, commit_transaction),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	char,	sync_commit		  )
 		__field(	int,	transaction		  )
 	),
 
 	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
+		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
 		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_printk("dev %d,%d transaction %d sync %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->transaction, __entry->sync_commit)
 );
 
 DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -87,22 +91,24 @@ TRACE_EVENT(jbd2_end_commit,
 	TP_ARGS(journal, commit_transaction),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	char,	sync_commit		  )
 		__field(	int,	transaction		  )
 		__field(	int,	head		  	  )
 	),
 
 	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
+		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
 		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 		__entry->head		= journal->j_tail_sequence;
 	),
 
-	TP_printk("dev %s transaction %d sync %d head %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit, __entry->head)
+	TP_printk("dev %d,%d transaction %d sync %d head %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->transaction, __entry->sync_commit, __entry->head)
 );
 
 TRACE_EVENT(jbd2_submit_inode_data,
@@ -111,17 +117,20 @@ TRACE_EVENT(jbd2_submit_inode_data,
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	ino_t,	ino			)
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
+		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
+		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
 		__entry->ino	= inode->i_ino;
 	),
 
-	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
+	TP_printk("dev %d,%d ino %lu",
+		  __entry->dev_major, __entry->dev_minor,
+		  (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(jbd2_run_stats,
@@ -131,7 +140,8 @@ TRACE_EVENT(jbd2_run_stats,
 	TP_ARGS(dev, tid, stats),
 
 	TP_STRUCT__entry(
-		__field(		dev_t,	dev		)
+		__field(		  int,	dev_major	)
+		__field(		  int,	dev_minor	)
 		__field(	unsigned long,	tid		)
 		__field(	unsigned long,	wait		)
 		__field(	unsigned long,	running		)
@@ -144,7 +154,8 @@ TRACE_EVENT(jbd2_run_stats,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= dev;
+		__entry->dev_major	= MAJOR(dev);
+		__entry->dev_minor	= MINOR(dev);
 		__entry->tid		= tid;
 		__entry->wait		= stats->rs_wait;
 		__entry->running	= stats->rs_running;
@@ -156,9 +167,9 @@ TRACE_EVENT(jbd2_run_stats,
 		__entry->blocks_logged	= stats->rs_blocks_logged;
 	),
 
-	TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
+	TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
 		  "logging %u handle_count %u blocks %u blocks_logged %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  __entry->dev_major, __entry->dev_minor, __entry->tid,
 		  jiffies_to_msecs(__entry->wait),
 		  jiffies_to_msecs(__entry->running),
 		  jiffies_to_msecs(__entry->locked),
@@ -175,7 +186,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 	TP_ARGS(dev, tid, stats),
 
 	TP_STRUCT__entry(
-		__field(		dev_t,	dev		)
+		__field(		  int,	dev_major	)
+		__field(		  int,	dev_minor	)
 		__field(	unsigned long,	tid		)
 		__field(	unsigned long,	chp_time	)
 		__field(		__u32,	forced_to_close	)
@@ -184,7 +196,8 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= dev;
+		__entry->dev_major	= MAJOR(dev);
+		__entry->dev_minor	= MINOR(dev);
 		__entry->tid		= tid;
 		__entry->chp_time	= stats->cs_chp_time;
 		__entry->forced_to_close= stats->cs_forced_to_close;
@@ -192,9 +205,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 		__entry->dropped	= stats->cs_dropped;
 	),
 
-	TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
+	TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
 		  "written %u dropped %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  __entry->dev_major, __entry->dev_minor, __entry->tid,
 		  jiffies_to_msecs(__entry->chp_time),
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
@@ -207,7 +220,8 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
 	TP_ARGS(journal, first_tid, block_nr, freed),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
+		__field(	int,   dev_major                )
+		__field(	int,   dev_minor                )
 		__field(	tid_t,	tail_sequence		)
 		__field(	tid_t,	first_tid		)
 		__field(unsigned long,	block_nr		)
@@ -215,16 +229,18 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
+		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
 		__entry->tail_sequence	= journal->j_tail_sequence;
 		__entry->first_tid	= first_tid;
 		__entry->block_nr	= block_nr;
 		__entry->freed		= freed;
 	),
 
-	TP_printk("dev %s from %u to %u offset %lu freed %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
-		  __entry->first_tid, __entry->block_nr, __entry->freed)
+	TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->tail_sequence, __entry->first_tid,
+		  __entry->block_nr, __entry->freed)
 );
 
 #endif /* _TRACE_JBD2_H */
-- 
cgit v1.2.3


From 4600d7c493f354a3e338a35bcf8a3bfbe815776a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 28 Oct 2010 11:30:31 +0900
Subject: genirq: Fix up irq_node() for irq_data changes.

Now that the node ID is tracked in the irq_data structure, update the
irq_node() definition accordingly. This fixes up irq_node() usage under
GENERIC_HARDIRQS_NO_DEPRECATED && SMP.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20101028023031.GB10365@linux-sh.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqnr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 05aa8c23483f..3bc4dcab6e82 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -43,7 +43,7 @@ unsigned int irq_get_next_irq(unsigned int offset);
 		else
 
 #ifdef CONFIG_SMP
-#define irq_node(irq)	(irq_to_desc(irq)->node)
+#define irq_node(irq)	(irq_get_irq_data(irq)->node)
 #else
 #define irq_node(irq)	0
 #endif
-- 
cgit v1.2.3


From 91bad2f8d3057482b9afb599f14421b007136960 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 1 Oct 2010 17:23:48 -0400
Subject: jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex

register_kprobe() downs the 'text_mutex' and then calls
jump_label_text_reserved(), which downs the 'jump_label_mutex'.
However, the jump label code takes those mutexes in the reverse
order.

Fix by requiring the caller of jump_label_text_reserved() to do
the jump label locking via the newly added: jump_label_lock(),
jump_label_unlock(). Currently, kprobes is the only user
of jump_label_text_reserved().

Reported-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <759032c48d5e30c27f0bba003d09bffa8e9f28bb.1285965957.git.jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/jump_label.h |  5 +++++
 kernel/jump_label.c        | 33 +++++++++++++++++++++------------
 kernel/kprobes.c           |  6 ++++++
 3 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b67cb180e6e9..1947a1212678 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -18,6 +18,8 @@ struct module;
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
+extern void jump_label_lock(void);
+extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
@@ -59,6 +61,9 @@ static inline int jump_label_text_reserved(void *start, void *end)
 	return 0;
 }
 
+static inline void jump_label_lock(void) {}
+static inline void jump_label_unlock(void) {}
+
 #endif
 
 #define COND_STMT(key, stmt)					\
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index be9e105345eb..12cce78e9568 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -39,6 +39,16 @@ struct jump_label_module_entry {
 	struct module *mod;
 };
 
+void jump_label_lock(void)
+{
+	mutex_lock(&jump_label_mutex);
+}
+
+void jump_label_unlock(void)
+{
+	mutex_unlock(&jump_label_mutex);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 	struct jump_label_module_entry *e_module;
 	int count;
 
-	mutex_lock(&jump_label_mutex);
+	jump_label_lock();
 	entry = get_jump_label_entry((jump_label_t)key);
 	if (entry) {
 		count = entry->nr_entries;
@@ -175,7 +185,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 			}
 		}
 	}
-	mutex_unlock(&jump_label_mutex);
+	jump_label_unlock();
 }
 
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
@@ -232,6 +242,7 @@ out:
  * overlaps with any of the jump label patch addresses. Code
  * that wants to modify kernel text should first verify that
  * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
  *
  * returns 1 if there is an overlap, 0 otherwise
  */
@@ -242,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end)
 	struct jump_entry *iter_stop = __start___jump_table;
 	int conflict = 0;
 
-	mutex_lock(&jump_label_mutex);
 	iter = iter_start;
 	while (iter < iter_stop) {
 		if (addr_conflict(iter, start, end)) {
@@ -257,7 +267,6 @@ int jump_label_text_reserved(void *start, void *end)
 	conflict = module_conflict(start, end);
 #endif
 out:
-	mutex_unlock(&jump_label_mutex);
 	return conflict;
 }
 
@@ -268,7 +277,7 @@ static __init int init_jump_label(void)
 	struct jump_entry *iter_stop = __stop___jump_table;
 	struct jump_entry *iter;
 
-	mutex_lock(&jump_label_mutex);
+	jump_label_lock();
 	ret = build_jump_label_hashtable(__start___jump_table,
 					 __stop___jump_table);
 	iter = iter_start;
@@ -276,7 +285,7 @@ static __init int init_jump_label(void)
 		arch_jump_label_text_poke_early(iter->code);
 		iter++;
 	}
-	mutex_unlock(&jump_label_mutex);
+	jump_label_unlock();
 	return ret;
 }
 early_initcall(init_jump_label);
@@ -409,21 +418,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 
 	switch (val) {
 	case MODULE_STATE_COMING:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		ret = add_jump_label_module(mod);
 		if (ret)
 			remove_jump_label_module(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		remove_jump_label_module(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		remove_jump_label_module_init(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	}
 	return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 99865c33a60d..9437e14f36bd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1146,13 +1146,16 @@ int __kprobes register_kprobe(struct kprobe *p)
 		return ret;
 
 	preempt_disable();
+	jump_label_lock();
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    in_kprobes_functions((unsigned long) p->addr) ||
 	    ftrace_text_reserved(p->addr, p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		preempt_enable();
+		jump_label_unlock();
 		return -EINVAL;
 	}
+	jump_label_unlock();
 
 	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
 	p->flags &= KPROBE_FLAG_DISABLED;
@@ -1187,6 +1190,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
 
+	jump_label_lock(); /* needed to call jump_label_text_reserved() */
+
 	get_online_cpus();	/* For avoiding text_mutex deadlock. */
 	mutex_lock(&text_mutex);
 
@@ -1214,6 +1219,7 @@ int __kprobes register_kprobe(struct kprobe *p)
 out:
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
+	jump_label_unlock();
 	mutex_unlock(&kprobe_mutex);
 
 	if (probed_mod)
-- 
cgit v1.2.3


From 4f7ebe807242898ee08ed732d56982874442c304 Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Wed, 28 Jul 2010 14:17:26 +0530
Subject: net/9p: This patch implements TLERROR/RLERROR on the 9P client.

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h |  4 ++++
 net/9p/client.c     | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index a8de812ccbc8..a4a1b043a8c4 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -86,6 +86,8 @@ do { \
 
 /**
  * enum p9_msg_t - 9P message types
+ * @P9_TLERROR: not used
+ * @P9_RLERROR: response for any failed request for 9P2000.L
  * @P9_TSTATFS: file system status request
  * @P9_RSTATFS: file system status response
  * @P9_TSYMLINK: make symlink request
@@ -137,6 +139,8 @@ do { \
  */
 
 enum p9_msg_t {
+	P9_TLERROR = 6,
+	P9_RLERROR,
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
 	P9_TLOPEN = 12,
diff --git a/net/9p/client.c b/net/9p/client.c
index 83bf0541d66f..c155cc45eff9 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		return err;
 	}
 
-	if (type == P9_RERROR) {
+	if (type == P9_RERROR || type == P9_RLERROR) {
 		int ecode;
-		char *ename;
 
-		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
-							&ename, &ecode);
-		if (err) {
-			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
-									err);
-			return err;
-		}
+		if (!p9_is_proto_dotl(c)) {
+			char *ename;
 
-		if (p9_is_proto_dotu(c) ||
-			p9_is_proto_dotl(c))
-			err = -ecode;
+			err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+								&ename, &ecode);
+			if (err)
+				goto out_err;
 
-		if (!err || !IS_ERR_VALUE(err))
-			err = p9_errstr2errno(ename, strlen(ename));
+			if (p9_is_proto_dotu(c))
+				err = -ecode;
 
-		P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+			if (!err || !IS_ERR_VALUE(err)) {
+				err = p9_errstr2errno(ename, strlen(ename));
+
+				P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+
+				kfree(ename);
+			}
+		} else {
+			err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+			err = -ecode;
+
+			P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+		}
 
-		kfree(ename);
 	} else
 		err = 0;
 
 	return err;
+
+out_err:
+	P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
+
+	return err;
 }
 
 /**
-- 
cgit v1.2.3


From 920e65dc6911da28a58e17f4b683302636fc6d8e Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Wed, 22 Sep 2010 17:19:19 -0700
Subject: [9p] Introduce client side TFSYNC/RFSYNC for dotl.

SYNOPSIS
    size[4] Tfsync tag[2] fid[4]

    size[4] Rfsync tag[2]

DESCRIPTION

The Tfsync transaction transfers ("flushes") all modified in-core data of
file identified by fid to the disk device (or other  permanent  storage
device)  where that  file  resides.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_file.c        | 18 ++++++++++++++++--
 include/net/9p/9p.h     |  2 ++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 25 +++++++++++++++++++++++++
 4 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 28db7fb1d96e..fdf303207c72 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -290,6 +290,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
 	return retval;
 }
 
+static int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+{
+	struct p9_fid *fid;
+	int retval;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
+			filp, datasync);
+
+	fid = filp->private_data;
+
+	retval = p9_client_fsync(fid);
+	return retval;
+}
+
 static const struct file_operations v9fs_cached_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
@@ -311,7 +325,7 @@ static const struct file_operations v9fs_cached_file_operations_dotl = {
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
 	.mmap = generic_file_readonly_mmap,
-	.fsync = v9fs_file_fsync,
+	.fsync = v9fs_file_fsync_dotl,
 };
 
 const struct file_operations v9fs_file_operations = {
@@ -333,5 +347,5 @@ const struct file_operations v9fs_file_operations_dotl = {
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
 	.mmap = generic_file_readonly_mmap,
-	.fsync = v9fs_file_fsync,
+	.fsync = v9fs_file_fsync_dotl,
 };
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index a4a1b043a8c4..55e96057f47f 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -163,6 +163,8 @@ enum p9_msg_t {
 	P9_RXATTRCREATE,
 	P9_TREADDIR = 40,
 	P9_RREADDIR,
+	P9_TFSYNC = 50,
+	P9_RFSYNC,
 	P9_TLINK = 70,
 	P9_RLINK,
 	P9_TMKDIR = 72,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 7f63d5ab7b44..8744e3ad4a07 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -229,6 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid,
 int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 		gid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
+int p9_client_fsync(struct p9_fid *fid);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
diff --git a/net/9p/client.c b/net/9p/client.c
index e50ec802937a..30c4a1b224fb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1162,6 +1162,31 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 }
 EXPORT_SYMBOL(p9_client_link);
 
+int p9_client_fsync(struct p9_fid *fid)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d\n", fid->fid);
+	err = 0;
+	clnt = fid->clnt;
+
+	req = p9_client_rpc(clnt, P9_TFSYNC, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
+
+	p9_free_req(clnt, req);
+
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_fsync);
+
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-- 
cgit v1.2.3


From a099027c779068b834f335cfdc3f2bf10f531dd9 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Mon, 27 Sep 2010 11:34:24 +0530
Subject: 9p: Implement TLOCK

Synopsis

    size[4] TLock tag[2] fid[4] flock[n]
    size[4] RLock tag[2] status[1]

Description

Tlock is used to acquire/release byte range posix locks on a file
identified by given fid. The reply contains status of the lock request

    flock structure:
        type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK
        flags[4] - Flags could be either of
          P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a
            conflicting lock exists, wait for that lock to be released.
          P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is
            trying to reclaim a lock after a server restrart (due to crash)
        start[8] - Starting offset for lock
        length[8] - Number of bytes to lock
          If length is 0, lock all bytes starting at the location 'start'
          through to the end of file
        pid[4] - PID of the process that wants to take lock
        client_id[4] - Unique client id

        status[1] - Status of the lock request, can be
          P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or
          P9_LOCK_GRACE(3)
          P9_LOCK_SUCCESS - Request was successful
          P9_LOCK_BLOCKED - A conflicting lock is held by another process
          P9_LOCK_ERROR - Error while processing the lock request
          P9_LOCK_GRACE - Server is in grace period, it can't accept new lock
            requests in this period (except locks with
            P9_LOCK_FLAGS_RECLAIM flag set)

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h        |   2 +
 fs/9p/vfs_file.c        | 160 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/net/9p/9p.h     |  28 +++++++++
 include/net/9p/client.h |   1 +
 net/9p/client.c         |  33 ++++++++++
 5 files changed, 222 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 09861295eef9..d26db1932f7a 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -65,3 +65,5 @@ int v9fs_uflags2omode(int uflags, int extended);
 ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
 void v9fs_blank_wstat(struct p9_wstat *wstat);
 int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+
+#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fdf303207c72..6f77abd23184 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
 #include <linux/inet.h>
 #include <linux/list.h>
 #include <linux/pagemap.h>
+#include <linux/utsname.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
 #include <net/9p/9p.h>
@@ -133,6 +134,159 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 	return res;
 }
 
+static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct p9_flock flock;
+	struct p9_fid *fid;
+	uint8_t status;
+	int res = 0;
+	unsigned char fl_type;
+
+	fid = filp->private_data;
+	BUG_ON(fid == NULL);
+
+	if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
+		BUG();
+
+	res = posix_lock_file_wait(filp, fl);
+	if (res < 0)
+		goto out;
+
+	/* convert posix lock to p9 tlock args */
+	memset(&flock, 0, sizeof(flock));
+	flock.type = fl->fl_type;
+	flock.start = fl->fl_start;
+	if (fl->fl_end == OFFSET_MAX)
+		flock.length = 0;
+	else
+		flock.length = fl->fl_end - fl->fl_start + 1;
+	flock.proc_id = fl->fl_pid;
+	flock.client_id = utsname()->nodename;
+	if (IS_SETLKW(cmd))
+		flock.flags = P9_LOCK_FLAGS_BLOCK;
+
+	/*
+	 * if its a blocked request and we get P9_LOCK_BLOCKED as the status
+	 * for lock request, keep on trying
+	 */
+	for (;;) {
+		res = p9_client_lock_dotl(fid, &flock, &status);
+		if (res < 0)
+			break;
+
+		if (status != P9_LOCK_BLOCKED)
+			break;
+		if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
+			break;
+		schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
+	}
+
+	/* map 9p status to VFS status */
+	switch (status) {
+	case P9_LOCK_SUCCESS:
+		res = 0;
+		break;
+	case P9_LOCK_BLOCKED:
+		res = -EAGAIN;
+		break;
+	case P9_LOCK_ERROR:
+	case P9_LOCK_GRACE:
+		res = -ENOLCK;
+		break;
+	default:
+		BUG();
+	}
+
+	/*
+	 * incase server returned error for lock request, revert
+	 * it locally
+	 */
+	if (res < 0 && fl->fl_type != F_UNLCK) {
+		fl_type = fl->fl_type;
+		fl->fl_type = F_UNLCK;
+		res = posix_lock_file_wait(filp, fl);
+		fl->fl_type = fl_type;
+	}
+out:
+	return res;
+}
+
+/**
+ * v9fs_file_lock_dotl - lock a file (or directory)
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret = -ENOLCK;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+				cmd, fl, filp->f_path.dentry->d_name.name);
+
+	/* No mandatory locks */
+	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+		goto out_err;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_write_and_wait(inode->i_mapping);
+		invalidate_mapping_pages(&inode->i_data, 0, -1);
+	}
+
+	if (IS_SETLK(cmd) || IS_SETLKW(cmd))
+		ret = v9fs_file_do_lock(filp, cmd, fl);
+	else
+		ret = -EINVAL;
+out_err:
+	return ret;
+}
+
+/**
+ * v9fs_file_flock_dotl - lock a file
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_flock_dotl(struct file *filp, int cmd,
+	struct file_lock *fl)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret = -ENOLCK;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+				cmd, fl, filp->f_path.dentry->d_name.name);
+
+	/* No mandatory locks */
+	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+		goto out_err;
+
+	if (!(fl->fl_flags & FL_FLOCK))
+		goto out_err;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_write_and_wait(inode->i_mapping);
+		invalidate_mapping_pages(&inode->i_data, 0, -1);
+	}
+	/* Convert flock to posix lock */
+	fl->fl_owner = (fl_owner_t)filp;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_flags |= FL_POSIX;
+	fl->fl_flags ^= FL_FLOCK;
+
+	if (IS_SETLK(cmd) | IS_SETLKW(cmd))
+		ret = v9fs_file_do_lock(filp, cmd, fl);
+	else
+		ret = -EINVAL;
+out_err:
+	return ret;
+}
+
 /**
  * v9fs_file_readn - read from a file
  * @filp: file pointer to read
@@ -323,7 +477,8 @@ static const struct file_operations v9fs_cached_file_operations_dotl = {
 	.write = v9fs_file_write,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
-	.lock = v9fs_file_lock,
+	.lock = v9fs_file_lock_dotl,
+	.flock = v9fs_file_flock_dotl,
 	.mmap = generic_file_readonly_mmap,
 	.fsync = v9fs_file_fsync_dotl,
 };
@@ -345,7 +500,8 @@ const struct file_operations v9fs_file_operations_dotl = {
 	.write = v9fs_file_write,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
-	.lock = v9fs_file_lock,
+	.lock = v9fs_file_lock_dotl,
+	.flock = v9fs_file_flock_dotl,
 	.mmap = generic_file_readonly_mmap,
 	.fsync = v9fs_file_fsync_dotl,
 };
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 55e96057f47f..1859a2560cc5 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -165,6 +165,8 @@ enum p9_msg_t {
 	P9_RREADDIR,
 	P9_TFSYNC = 50,
 	P9_RFSYNC,
+	P9_TLOCK = 52,
+	P9_RLOCK,
 	P9_TLINK = 70,
 	P9_RLINK,
 	P9_TMKDIR = 72,
@@ -464,6 +466,32 @@ struct p9_iattr_dotl {
 	u64 mtime_nsec;
 };
 
+#define P9_LOCK_SUCCESS 0
+#define P9_LOCK_BLOCKED 1
+#define P9_LOCK_ERROR 2
+#define P9_LOCK_GRACE 3
+
+#define P9_LOCK_FLAGS_BLOCK 1
+#define P9_LOCK_FLAGS_RECLAIM 2
+
+/* struct p9_flock: POSIX lock structure
+ * @type - type of lock
+ * @flags - lock flags
+ * @start - starting offset of the lock
+ * @length - number of bytes
+ * @proc_id - process id which wants to take lock
+ * @client_id - client id
+ */
+
+struct p9_flock {
+	u8 type;
+	u32 flags;
+	u64 start;
+	u64 length;
+	u32 proc_id;
+	char *client_id;
+};
+
 /* Structures for Protocol Operations */
 struct p9_tstatfs {
 	u32 fid;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 8744e3ad4a07..d7dcb142e3bb 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -249,6 +249,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
 			dev_t rdev, gid_t gid, struct p9_qid *);
 int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
 				gid_t gid, struct p9_qid *);
+int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 30c4a1b224fb..fbd2b195801c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1803,3 +1803,36 @@ error:
 
 }
 EXPORT_SYMBOL(p9_client_mkdir_dotl);
+
+int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
+			"start %lld length %lld proc_id %d client_id %s\n",
+			fid->fid, flock->type, flock->flags, flock->start,
+			flock->length, flock->proc_id, flock->client_id);
+
+	req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
+				flock->flags, flock->start, flock->length,
+					flock->proc_id, flock->client_id);
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
+error:
+	p9_free_req(clnt, req);
+	return err;
+
+}
+EXPORT_SYMBOL(p9_client_lock_dotl);
-- 
cgit v1.2.3


From 1d769cd192fc8c4097b1e2cd41fdee6ba3d1b2af Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Mon, 27 Sep 2010 12:22:13 +0530
Subject: 9p: Implement TGETLOCK

Synopsis

    size[4] TGetlock tag[2] fid[4] getlock[n]
    size[4] RGetlock tag[2] getlock[n]

Description

TGetlock is used to test for the existence of byte range posix locks on a file
identified by given fid. The reply contains getlock structure. If the lock could
be placed it returns F_UNLCK in type field of getlock structure.  Otherwise it
returns the details of the conflicting locks in the getlock structure

    getlock structure:
      type[1] - Type of lock: F_RDLCK, F_WRLCK
      start[8] - Starting offset for lock
      length[8] - Number of bytes to check for the lock
             If length is 0, check for lock in all bytes starting at the location
            'start' through to the end of file
      pid[4] - PID of the process that wants to take lock/owns the task
               in case of reply
      client[4] - Client id of the system that owns the process which
                  has the conflicting lock

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_file.c        | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 include/net/9p/9p.h     | 18 ++++++++++++++++++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+)

(limited to 'include')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6f77abd23184..3a4352f23a98 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -211,6 +211,51 @@ out:
 	return res;
 }
 
+static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
+{
+	struct p9_getlock glock;
+	struct p9_fid *fid;
+	int res = 0;
+
+	fid = filp->private_data;
+	BUG_ON(fid == NULL);
+
+	posix_test_lock(filp, fl);
+	/*
+	 * if we have a conflicting lock locally, no need to validate
+	 * with server
+	 */
+	if (fl->fl_type != F_UNLCK)
+		return res;
+
+	/* convert posix lock to p9 tgetlock args */
+	memset(&glock, 0, sizeof(glock));
+	glock.type = fl->fl_type;
+	glock.start = fl->fl_start;
+	if (fl->fl_end == OFFSET_MAX)
+		glock.length = 0;
+	else
+		glock.length = fl->fl_end - fl->fl_start + 1;
+	glock.proc_id = fl->fl_pid;
+	glock.client_id = utsname()->nodename;
+
+	res = p9_client_getlock_dotl(fid, &glock);
+	if (res < 0)
+		return res;
+	if (glock.type != F_UNLCK) {
+		fl->fl_type = glock.type;
+		fl->fl_start = glock.start;
+		if (glock.length == 0)
+			fl->fl_end = OFFSET_MAX;
+		else
+			fl->fl_end = glock.start + glock.length - 1;
+		fl->fl_pid = glock.proc_id;
+	} else
+		fl->fl_type = F_UNLCK;
+
+	return res;
+}
+
 /**
  * v9fs_file_lock_dotl - lock a file (or directory)
  * @filp: file to be locked
@@ -238,6 +283,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd))
 		ret = v9fs_file_do_lock(filp, cmd, fl);
+	else if (IS_GETLK(cmd))
+		ret = v9fs_file_getlock(filp, fl);
 	else
 		ret = -EINVAL;
 out_err:
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 1859a2560cc5..6367a71d84fc 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -167,6 +167,8 @@ enum p9_msg_t {
 	P9_RFSYNC,
 	P9_TLOCK = 52,
 	P9_RLOCK,
+	P9_TGETLOCK = 54,
+	P9_RGETLOCK,
 	P9_TLINK = 70,
 	P9_RLINK,
 	P9_TMKDIR = 72,
@@ -492,6 +494,22 @@ struct p9_flock {
 	char *client_id;
 };
 
+/* struct p9_getlock: getlock structure
+ * @type - type of lock
+ * @start - starting offset of the lock
+ * @length - number of bytes
+ * @proc_id - process id which wants to take lock
+ * @client_id - client id
+ */
+
+struct p9_getlock {
+	u8 type;
+	u64 start;
+	u64 length;
+	u32 proc_id;
+	char *client_id;
+};
+
 /* Structures for Protocol Operations */
 struct p9_tstatfs {
 	u32 fid;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index d7dcb142e3bb..127c9f2a9cb8 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -250,6 +250,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
 int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
 				gid_t gid, struct p9_qid *);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
+int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index fbd2b195801c..fc1b0579016a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1836,3 +1836,37 @@ error:
 
 }
 EXPORT_SYMBOL(p9_client_lock_dotl);
+
+int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
+		"length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
+		glock->start, glock->length, glock->proc_id, glock->client_id);
+
+	req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid,  glock->type,
+		glock->start, glock->length, glock->proc_id, glock->client_id);
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
+			&glock->start, &glock->length, &glock->proc_id,
+			&glock->client_id);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
+		"proc_id %d client_id %s\n", glock->type, glock->start,
+		glock->length, glock->proc_id, glock->client_id);
+error:
+	p9_free_req(clnt, req);
+	return err;
+}
+EXPORT_SYMBOL(p9_client_getlock_dotl);
-- 
cgit v1.2.3


From 368c09d2a303c39e9f37193b23e945e6754cf0a7 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Mon, 27 Sep 2010 14:17:24 +0530
Subject: 9p: Use V9FS_MAGIC in statfs

Use V9FS_MAGIC as the file system type while filling kernel statfs
strucutre instead of using host file system magic number. Also move
the definition of V9FS_MAGIC from v9fs.h to standard magic.h file.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.h          | 2 --
 fs/9p/vfs_super.c     | 3 ++-
 include/linux/magic.h | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 8bb7792afe2e..cb6396855e2d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -117,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
 
-#define V9FS_MAGIC 0x01021997
-
 /* other default globals */
 #define V9FS_PORT	564
 #define V9FS_DEFUSER	"nobody"
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 174643f4f901..48d4215c60a8 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
+#include <linux/magic.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
@@ -256,7 +257,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	if (v9fs_proto_dotl(v9ses)) {
 		res = p9_client_statfs(fid, &rs);
 		if (res == 0) {
-			buf->f_type = rs.type;
+			buf->f_type = V9FS_MAGIC;
 			buf->f_bsize = rs.bsize;
 			buf->f_blocks = rs.blocks;
 			buf->f_bfree = rs.bfree;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index eb9800f05782..ff690d05f129 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -57,5 +57,6 @@
 
 #define DEVPTS_SUPER_MAGIC	0x1cd1
 #define SOCKFS_MAGIC		0x534F434B
+#define V9FS_MAGIC		0x01021997
 
 #endif /* __LINUX_MAGIC_H__ */
-- 
cgit v1.2.3


From 329176cc2c50e63c580ddaabb385876db5af1360 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Tue, 28 Sep 2010 19:59:25 +0530
Subject: 9p: Implement TREADLINK operation for 9p2000.L

Synopsis

	size[4] TReadlink tag[2] fid[4]
	size[4] RReadlink tag[2] target[s]

Description
	Readlink is used to return the contents of the symoblic link
        referred by fid. Contents of symboic link is returned as a
        response.

	target[s] - Contents of the symbolic link referred by fid.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/net/9p/9p.h     |  2 ++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 26 +++++++++++++++++++++
 4 files changed, 86 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 68f02973c338..88419073c654 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1527,7 +1527,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
+	if (!v9fs_proto_dotu(v9ses))
 		return -EBADF;
 
 	st = p9_client_stat(fid);
@@ -1995,6 +1995,60 @@ error:
 	return err;
 }
 
+static int
+v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
+{
+	int retval;
+	struct p9_fid *fid;
+	char *target = NULL;
+
+	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+	retval = -EPERM;
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+
+	retval = p9_client_readlink(fid, &target);
+	if (retval < 0)
+		return retval;
+
+	strncpy(buffer, target, buflen);
+	P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
+
+	retval = strnlen(buffer, buflen);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+{
+	int len = 0;
+	char *link = __getname();
+
+	P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
+
+	if (!link)
+		link = ERR_PTR(-ENOMEM);
+	else {
+		len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
+		if (len < 0) {
+			__putname(link);
+			link = ERR_PTR(len);
+		} else
+			link[min(len, PATH_MAX-1)] = 0;
+	}
+	nd_set_link(nd, link);
+
+	return NULL;
+}
+
 static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
@@ -2064,8 +2118,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
 };
 
 static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
-	.readlink = generic_readlink,
-	.follow_link = v9fs_vfs_follow_link,
+	.readlink = v9fs_vfs_readlink_dotl,
+	.follow_link = v9fs_vfs_follow_link_dotl,
 	.put_link = v9fs_vfs_put_link,
 	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr_dotl,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 6367a71d84fc..071fd7a8d781 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -153,6 +153,8 @@ enum p9_msg_t {
 	P9_RMKNOD,
 	P9_TRENAME = 20,
 	P9_RRENAME,
+	P9_TREADLINK = 22,
+	P9_RREADLINK,
 	P9_TGETATTR = 24,
 	P9_RGETATTR,
 	P9_TSETATTR = 26,
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 127c9f2a9cb8..335b088e7672 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -262,5 +262,6 @@ int p9_is_proto_dotu(struct p9_client *clnt);
 int p9_is_proto_dotl(struct p9_client *clnt);
 struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
 int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
+int p9_client_readlink(struct p9_fid *fid, char **target);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index fc1b0579016a..2bc99e9031e7 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1870,3 +1870,29 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_getlock_dotl);
+
+int p9_client_readlink(struct p9_fid *fid, char **target)
+{
+	int err;
+	struct p9_client *clnt;
+	struct p9_req_t *req;
+
+	err = 0;
+	clnt = fid->clnt;
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
+
+	req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
+error:
+	p9_free_req(clnt, req);
+	return err;
+}
+EXPORT_SYMBOL(p9_client_readlink);
-- 
cgit v1.2.3


From b165d60145b717261a0234f989c442c2b68b6ec0 Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Fri, 22 Oct 2010 10:13:12 -0700
Subject: 9p: Add datasync to client side TFSYNC/RFSYNC for dotl

SYNOPSIS
    size[4] Tfsync tag[2] fid[4] datasync[4]

    size[4] Rfsync tag[2]

DESCRIPTION

    The Tfsync transaction transfers ("flushes") all modified in-core data of
    file identified by fid to the disk device (or other  permanent  storage
    device)  where that  file  resides.

    If datasync flag is specified data will be fleshed but does not flush
    modified metadata unless  that  metadata  is  needed  in order to allow a
    subsequent data retrieval to be correctly handled.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h        | 1 +
 fs/9p/vfs_dir.c         | 1 +
 fs/9p/vfs_file.c        | 4 ++--
 include/net/9p/client.h | 2 +-
 net/9p/client.c         | 7 ++++---
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index d26db1932f7a..bab0eac873f4 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -65,5 +65,6 @@ int v9fs_uflags2omode(int uflags, int extended);
 ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
 void v9fs_blank_wstat(struct p9_wstat *wstat);
 int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_file_fsync_dotl(struct file *filp, int datasync);
 
 #define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 170f5bb8ebe0..b84ebe8cefed 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -315,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
 	.readdir = v9fs_dir_readdir_dotl,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
+        .fsync = v9fs_file_fsync_dotl,
 };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3a4352f23a98..240c30674396 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -491,7 +491,7 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
 	return retval;
 }
 
-static int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+int v9fs_file_fsync_dotl(struct file *filp, int datasync)
 {
 	struct p9_fid *fid;
 	int retval;
@@ -501,7 +501,7 @@ static int v9fs_file_fsync_dotl(struct file *filp, int datasync)
 
 	fid = filp->private_data;
 
-	retval = p9_client_fsync(fid);
+	retval = p9_client_fsync(fid, datasync);
 	return retval;
 }
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 335b088e7672..83ba6a4d58a3 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -229,7 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid,
 int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 		gid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
-int p9_client_fsync(struct p9_fid *fid);
+int p9_client_fsync(struct p9_fid *fid, int datasync);
 int p9_client_remove(struct p9_fid *fid);
 int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
 							u64 offset, u32 count);
diff --git a/net/9p/client.c b/net/9p/client.c
index e3cfdff37327..8df80fb86f23 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1165,17 +1165,18 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
 }
 EXPORT_SYMBOL(p9_client_link);
 
-int p9_client_fsync(struct p9_fid *fid)
+int p9_client_fsync(struct p9_fid *fid, int datasync)
 {
 	int err;
 	struct p9_client *clnt;
 	struct p9_req_t *req;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d\n", fid->fid);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
+			fid->fid, datasync);
 	err = 0;
 	clnt = fid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TFSYNC, "d", fid->fid);
+	req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
-- 
cgit v1.2.3


From b31d42a5af1818bdf31a5f023abe4d8b212542f2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 Oct 2010 16:39:24 +0200
Subject: Fix compile brekage with !CONFIG_BLOCK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today's git tree fails to build on !CONFIG_BLOCK, due to upstream commit
367a51a33902 ("fs: Add FITRIM ioctl"):

 include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’
 include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’
 include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’

The commit adds uint64_t type usage to fs.h, but linux/types.h is not included
explicitly - it's only included implicitly via linux/blk_types.h, and there only if
CONFIG_BLOCK is enabled.

Add the explicit #include to fix this.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6ed7ace74b7c..1c73b50e81ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -9,6 +9,7 @@
 #include <linux/limits.h>
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
+#include <linux/types.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
-- 
cgit v1.2.3


From e732ff707743e5ceba6ae2bfc7e799a0bac30ffa Mon Sep 17 00:00:00 2001
From: "Figo.zhang" <figo1802@gmail.com>
Date: Tue, 26 Oct 2010 21:01:47 +0800
Subject: mmu_notifier.h: fix comment spelling

Signed-off-by: Figo.zhang <figo1802@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 4e02ee2b071e..43dcfbdc39de 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -227,7 +227,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
 /*
  * These two macros will sometime replace ptep_clear_flush.
- * ptep_clear_flush is impleemnted as macro itself, so this also is
+ * ptep_clear_flush is implemented as macro itself, so this also is
  * implemented as a macro until ptep_clear_flush will converted to an
  * inline function, to diminish the risk of compilation failure. The
  * invalidate_page method over time can be moved outside the PT lock
-- 
cgit v1.2.3


From dc841e30eaea9f9f83c9ab1ee0b3ef9e5c95ce8a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 27 Oct 2010 19:16:26 +0000
Subject: dccp: Extend CCID packet dequeueing interface

This extends the packet dequeuing interface of dccp_write_xmit() to allow
 1. CCIDs to take care of timing when the next packet may be sent;
 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds).

The main purpose is to take CCID-2 out of its polling mode (when it is network-
limited, it tries every millisecond to send, without interruption).

The mode of operation for (2) is as follows:
 * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(),
 * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full),
 * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER',
 * dccp_write_xmit() returns without further action;
 * after some time the wait-condition for CCID becomes true,
 * that CCID schedules the tasklet,
 * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(),
 * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now",
 * packet is sent, and possibly more (since dccp_write_xmit() loops).

Code reuse: the taskled function calls dccp_write_xmit(), the timer function
            reduces to a wrapper around the same code.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |   4 +-
 net/dccp/output.c    | 118 +++++++++++++++++++++++++++++++--------------------
 net/dccp/timer.c     |  25 ++++++-----
 3 files changed, 89 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7187bd8a75f6..749f01ccd26e 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -462,7 +462,8 @@ struct dccp_ackvec;
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
- * @dccps_xmit_timer - timer for when CCID is not ready to send
+ * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
+ * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
 struct dccp_sock {
@@ -502,6 +503,7 @@ struct dccp_sock {
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
+	struct tasklet_struct		dccps_xmitlet;
 	struct timer_list		dccps_xmit_timer;
 };
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index a988fe9ffcba..11418a9a389d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -254,63 +254,89 @@ do_interrupted:
 	goto out;
 }
 
+/**
+ * dccp_xmit_packet  -  Send data packet under control of CCID
+ * Transmits next-queued payload and informs CCID to account for the packet.
+ */
+static void dccp_xmit_packet(struct sock *sk)
+{
+	int err, len;
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+
+	if (unlikely(skb == NULL))
+		return;
+	len = skb->len;
+
+	if (sk->sk_state == DCCP_PARTOPEN) {
+		const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+		/*
+		 * See 8.1.5 - Handshake Completion.
+		 *
+		 * For robustness we resend Confirm options until the client has
+		 * entered OPEN. During the initial feature negotiation, the MPS
+		 * is smaller than usual, reduced by the Change/Confirm options.
+		 */
+		if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+			DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+			dccp_send_ack(sk);
+			dccp_feat_list_purge(&dp->dccps_featneg);
+		}
+
+		inet_csk_schedule_ack(sk);
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+					      inet_csk(sk)->icsk_rto,
+					      DCCP_RTO_MAX);
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+	} else if (dccp_ack_pending(sk)) {
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+	} else {
+		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
+	}
+
+	err = dccp_transmit_skb(sk, skb);
+	if (err)
+		dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+	/*
+	 * Register this one as sent even if an error occurred. To the remote
+	 * end a local packet drop is indistinguishable from network loss, i.e.
+	 * any local drop will eventually be reported via receiver feedback.
+	 */
+	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+}
+
 void dccp_write_xmit(struct sock *sk, int block)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 
 	while ((skb = skb_peek(&sk->sk_write_queue))) {
-		int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
-		if (err > 0) {
+		switch (ccid_packet_dequeue_eval(rc)) {
+		case CCID_PACKET_WILL_DEQUEUE_LATER:
+			return;
+		case CCID_PACKET_DELAY:
 			if (!block) {
 				sk_reset_timer(sk, &dp->dccps_xmit_timer,
-						msecs_to_jiffies(err)+jiffies);
+						msecs_to_jiffies(rc)+jiffies);
+				return;
+			}
+			rc = dccp_wait_for_ccid(sk, skb, rc);
+			if (rc && rc != -EINTR) {
+				DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
+				skb_dequeue(&sk->sk_write_queue);
+				kfree_skb(skb);
 				break;
-			} else
-				err = dccp_wait_for_ccid(sk, skb, err);
-			if (err && err != -EINTR)
-				DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
-		}
-
-		skb_dequeue(&sk->sk_write_queue);
-		if (err == 0) {
-			struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-			const int len = skb->len;
-
-			if (sk->sk_state == DCCP_PARTOPEN) {
-				const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
-				/*
-				 * See 8.1.5 - Handshake Completion.
-				 *
-				 * For robustness we resend Confirm options until the client has
-				 * entered OPEN. During the initial feature negotiation, the MPS
-				 * is smaller than usual, reduced by the Change/Confirm options.
-				 */
-				if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
-					DCCP_WARN("Payload too large (%d) for featneg.\n", len);
-					dccp_send_ack(sk);
-					dccp_feat_list_purge(&dp->dccps_featneg);
-				}
-
-				inet_csk_schedule_ack(sk);
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  inet_csk(sk)->icsk_rto,
-						  DCCP_RTO_MAX);
-				dcb->dccpd_type = DCCP_PKT_DATAACK;
-			} else if (dccp_ack_pending(sk))
-				dcb->dccpd_type = DCCP_PKT_DATAACK;
-			else
-				dcb->dccpd_type = DCCP_PKT_DATA;
-
-			err = dccp_transmit_skb(sk, skb);
-			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
-			if (err)
-				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
-					 err);
-		} else {
-			dccp_pr_debug("packet discarded due to err=%d\n", err);
+			}
+			/* fall through */
+		case CCID_PACKET_SEND_AT_ONCE:
+			dccp_xmit_packet(sk);
+			break;
+		case CCID_PACKET_ERR:
+			skb_dequeue(&sk->sk_write_queue);
 			kfree_skb(skb);
+			dccp_pr_debug("packet discarded due to err=%d\n", rc);
 		}
 	}
 }
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..916f9d1dab36 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
 	sock_put(sk);
 }
 
-/* Transmit-delay timer: used by the CCIDs to delay actual send time */
-static void dccp_write_xmit_timer(unsigned long data)
+/**
+ * dccp_write_xmitlet  -  Workhorse for CCID packet dequeueing interface
+ * See the comments above %ccid_dequeueing_decision for supported modes.
+ */
+static void dccp_write_xmitlet(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
-	struct dccp_sock *dp = dccp_sk(sk);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk))
-		sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+		sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
 	else
 		dccp_write_xmit(sk, 0);
 	bh_unlock_sock(sk);
-	sock_put(sk);
 }
 
-static void dccp_init_write_xmit_timer(struct sock *sk)
+static void dccp_write_xmit_timer(unsigned long data)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-
-	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
-			(unsigned long)sk);
+	dccp_write_xmitlet(data);
+	sock_put((struct sock *)data);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
 {
-	dccp_init_write_xmit_timer(sk);
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
+	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+							     (unsigned long)sk);
 	inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
 				  &dccp_keepalive_timer);
 }
-- 
cgit v1.2.3


From 4aa2c466a7733af093a526e9d1cdd0b3b90d47e9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 28 Oct 2010 02:00:43 +0000
Subject: fib: Fix fib zone and its hash leak on namespace stop

When we stop a namespace we flush the table and free one, but the
added fn_zone-s (and their hashes if grown) are leaked. Need to free.
Tries releases all its stuff in the flushing code.

Shame on us - this bug exists since the very first make-fib-per-net
patches in 2.6.27 :(

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  2 ++
 net/ipv4/fib_frontend.c |  2 +-
 net/ipv4/fib_hash.c     | 18 ++++++++++++++++++
 net/ipv4/fib_trie.c     |  5 +++++
 4 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ba3666d31766..07bdb5e9e8ac 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -158,6 +158,8 @@ extern int fib_table_flush(struct fib_table *table);
 extern void fib_table_select_default(struct fib_table *table,
 				     const struct flowi *flp,
 				     struct fib_result *res);
+extern void fib_free_table(struct fib_table *tb);
+
 
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
 		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
 			hlist_del(node);
 			fib_table_flush(tb);
-			kfree(tb);
+			fib_free_table(tb);
 		}
 	}
 	kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index b232375a0b75..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -716,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
 	return found;
 }
 
+void fib_free_table(struct fib_table *tb)
+{
+	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+	struct fn_zone *fz, *next;
+
+	next = table->fn_zone_list;
+	while (next != NULL) {
+		fz = next;
+		next = fz->fz_next;
+
+		if (fz->fz_hash != fz->fz_embedded_hash)
+			fz_hash_free(fz->fz_hash, fz->fz_divisor);
+
+		kfree(fz);
+	}
+
+	kfree(tb);
+}
 
 static inline int
 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..200eb538fbb3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
 	return found;
 }
 
+void fib_free_table(struct fib_table *tb)
+{
+	kfree(tb);
+}
+
 void fib_table_select_default(struct fib_table *tb,
 			      const struct flowi *flp,
 			      struct fib_result *res)
-- 
cgit v1.2.3


From 8acfe468b0384e834a303f08ebc4953d72fb690a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 28 Oct 2010 11:41:55 -0700
Subject: net: Limit socket I/O iovec total length to INT_MAX.

This helps protect us from overflow issues down in the
individual protocol sendmsg/recvmsg handlers.  Once
we hit INT_MAX we truncate out the rest of the iovec
by setting the iov_len members to zero.

This works because:

1) For SOCK_STREAM and SOCK_SEQPACKET sockets, partial
   writes are allowed and the application will just continue
   with another write to send the rest of the data.

2) For datagram oriented sockets, where there must be a
   one-to-one correspondance between write() calls and
   packets on the wire, INT_MAX is going to be far larger
   than the packet size limit the protocol is going to
   check for and signal with -EMSGSIZE.

Based upon a patch by Linus Torvalds.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h |  2 +-
 net/compat.c           | 10 ++++++----
 net/core/iovec.c       | 20 +++++++++-----------
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 5146b50202ce..86b652fabf6e 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  int offset, 
 					  unsigned int len, __wsum *csump);
 
-extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
+extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
 		compat_size_t len;
 
 		if (get_user(len, &uiov32->iov_len) ||
-		   get_user(buf, &uiov32->iov_base)) {
-			tot_len = -EFAULT;
-			break;
-		}
+		    get_user(buf, &uiov32->iov_base))
+			return -EFAULT;
+
+		if (len > INT_MAX - tot_len)
+			len = INT_MAX - tot_len;
+
 		tot_len += len;
 		kiov->iov_base = compat_ptr(buf);
 		kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
  *	in any case.
  */
 
-long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, ct;
-	long err;
+	int size, ct, err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
 	err = 0;
 
 	for (ct = 0; ct < m->msg_iovlen; ct++) {
-		err += iov[ct].iov_len;
-		/*
-		 * Goal is not to verify user data, but to prevent returning
-		 * negative value, which is interpreted as errno.
-		 * Overflow is still possible, but it is harmless.
-		 */
-		if (err < 0)
-			return -EMSGSIZE;
+		size_t len = iov[ct].iov_len;
+
+		if (len > INT_MAX - err) {
+			len = INT_MAX - err;
+			iov[ct].iov_len = len;
+		}
+		err += len;
 	}
 
 	return err;
-- 
cgit v1.2.3


From 9343919c1495b085a4a1cf4cbada8d7888daf099 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fanotify: allow fanotify to be built

We disabled the ability to build fanotify in commit 7c5347733dcc4ba0ba.
This reverts that commit and allows people to build fanotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/Kconfig    | 2 +-
 include/linux/Kbuild | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443c3a09..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
-#source "fs/notify/fanotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 90e3ed3a3144..97319a8fc1e0 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,6 +118,7 @@ header-y += eventpoll.h
 header-y += ext2_fs.h
 header-y += fadvise.h
 header-y += falloc.h
+header-y += fanotify.h
 header-y += fb.h
 header-y += fcntl.h
 header-y += fd.h
-- 
cgit v1.2.3


From 6ad2d4e3e97ee4bfde0b45e8dfe37911330fc4aa Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fsnotify: implement ordering between notifiers

fanotify needs to be able to specify that some groups get events before
others.  They use this idea to make sure that a hierarchical storage
manager gets access to files before programs which actually use them.  This
is purely infrastructure.  Everything will have a priority of 0, but the
infrastructure will exist for it to be non-zero.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c           | 9 +++++++--
 fs/notify/vfsmount_mark.c        | 6 +++++-
 include/linux/fsnotify_backend.h | 8 ++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 21ed10660b80..4c29fcf557d1 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
  * Attach an initialized mark to a given inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.  These
- * marks are ordered according to the group's location in memory.
+ * marks are ordered according to priority, highest number first, and then by
+ * the group's location in memory.
  */
 int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			    struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group < lmark->group)
+		if (mark->group->priority < lmark->group->priority)
+			continue;
+
+		if ((mark->group->priority == lmark->group->priority) &&
+		    (mark->group < lmark->group))
 			continue;
 
 		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b578fbd..85eebff6d0d7 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group < lmark->group)
+		if (mark->group->priority < lmark->group->priority)
+			continue;
+
+		if ((mark->group->priority == lmark->group->priority) &&
+		    (mark->group < lmark->group))
 			continue;
 
 		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e40190d16878..825329534162 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -129,6 +129,14 @@ struct fsnotify_group {
 	wait_queue_head_t notification_waitq;	/* read() on the notification file blocks on this waitq */
 	unsigned int q_len;			/* events on the queue */
 	unsigned int max_events;		/* maximum events allowed on the list */
+	/*
+	 * Valid fsnotify group priorities.  Events are send in order from highest
+	 * priority to lowest priority.  We default to the lowest priority.
+	 */
+	#define FS_PRIO_0	0 /* normal notifiers, no permissions */
+	#define FS_PRIO_1	1 /* fanotify content based access control */
+	#define FS_PRIO_2	2 /* fanotify pre-content access */
+	unsigned int priority;
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
 	spinlock_t mark_lock;		/* protect marks_list */
-- 
cgit v1.2.3


From 4231a23530a30e86eb32fbe869bbef1b3e54d5aa Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fanotify: implement fanotify listener ordering

The fanotify listeners needs to be able to specify what types of operations
they are going to perform so they can be ordered appropriately between other
listeners doing other types of operations.  They need this to be able to make
sure that things like hierarchichal storage managers will get access to inodes
before processes which need the data.  This patch defines 3 possible uses
which groups must indicate in the fanotify_init() flags.

FAN_CLASS_PRE_CONTENT
FAN_CLASS_CONTENT
FAN_CLASS_NOTIF

Groups will receive notification in that order.  The order between 2 groups in
the same class is undeterministic.

FAN_CLASS_PRE_CONTENT is intended to be used by listeners which need access to
the inode before they are certain that the inode contains it's final data.  A
hierarchical storage manager should choose to use this class.

FAN_CLASS_CONTENT is intended to be used by listeners which need access to the
inode after it contains its intended contents.  This would be the appropriate
level for an AV solution or document control system.

FAN_CLASS_NOTIF is intended for normal async notification about access, much the
same as inotify and dnotify.  Syncronous permissions events are not permitted
at this class.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++++++++++-
 include/linux/fanotify.h           | 11 ++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e7fcc6..1c09e6321c5e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -664,6 +664,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
 #endif
+	switch (flags & FAN_ALL_CLASS_BITS) {
+	case FAN_CLASS_NOTIF:
+		group->priority = FS_PRIO_0;
+		break;
+	case FAN_CLASS_CONTENT:
+		group->priority = FS_PRIO_1;
+		break;
+	case FAN_CLASS_PRE_CONTENT:
+		group->priority = FS_PRIO_2;
+		break;
+	default:
+		fd = -EINVAL;
+		goto out_put_group;
+	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
@@ -719,6 +733,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	ret = -EINVAL;
 	if (unlikely(filp->f_op != &fanotify_fops))
 		goto fput_and_out;
+	group = filp->private_data;
+
+	/*
+	 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
+	 * allowed to set permissions events.
+	 */
+	ret = -EINVAL;
+	if (mask & FAN_ALL_PERM_EVENTS &&
+	    group->priority == FS_PRIO_0)
+		goto fput_and_out;
 
 	ret = fanotify_find_path(dfd, pathname, &path, flags);
 	if (ret)
@@ -729,7 +753,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		inode = path.dentry->d_inode;
 	else
 		mnt = path.mnt;
-	group = filp->private_data;
 
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 63531a6b4d2a..2c89ce7b644e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,16 @@
 #define FAN_CLOEXEC		0x00000001
 #define FAN_NONBLOCK		0x00000002
 
-#define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK)
+/* These are NOT bitwise flags.  Both bits are used togther.  */
+#define FAN_CLASS_NOTIF		0x00000000
+#define FAN_CLASS_CONTENT	0x00000004
+#define FAN_CLASS_PRE_CONTENT	0x00000008
+
+#define FAN_ALL_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+				 FAN_CLASS_PRE_CONTENT)
+
+#define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
+				 FAN_ALL_CLASS_BITS)
 
 /* flags used for fanotify_modify_mark() */
 #define FAN_MARK_ADD		0x00000001
-- 
cgit v1.2.3


From 2868201965419b9011f3f07fd80e765181343cb1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fanotify: use __aligned_u64 in fanotify userspace metadata

Currently the userspace struct exposed by fanotify uses
__attribute__((packed)) to make sure that alignment works on multiarch
platforms.  Since this causes a severe performance penalty on some
platforms we are going to switch to using explicit alignment notation on
the 64bit values so we don't have to use 'packed'

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 2c89ce7b644e..8a621c1a0991 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -79,10 +79,10 @@
 struct fanotify_event_metadata {
 	__u32 event_len;
 	__u32 vers;
-	__u64 mask;
+	__aligned_u64 mask;
 	__s32 fd;
 	__s32 pid;
-} __attribute__ ((packed));
+};
 
 struct fanotify_response {
 	__s32 fd;
-- 
cgit v1.2.3


From ff8bcbd03da881bf1171910c6c07d44bd3c0a234 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fsnotify: correctly handle return codes from listeners

When fsnotify groups return errors they are ignored.  For permissions
events these should be passed back up the stack, but for most events these
should continue to be ignored.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c             | 20 ++++++++++++--------
 include/linux/fsnotify_backend.h |  2 ++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4498a208df94..57ecadd85abf 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -252,20 +252,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
 		if (inode_group > vfsmount_group) {
 			/* handle inode */
-			send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
-				      data_is, cookie, file_name, &event);
+			ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+					    data_is, cookie, file_name, &event);
 			/* we didn't use the vfsmount_mark */
 			vfsmount_group = NULL;
 		} else if (vfsmount_group > inode_group) {
-			send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
-				      data_is, cookie, file_name, &event);
+			ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+					    data_is, cookie, file_name, &event);
 			inode_group = NULL;
 		} else {
-			send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
-				      mask, data, data_is, cookie, file_name,
-				      &event);
+			ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+					    mask, data, data_is, cookie, file_name,
+					    &event);
 		}
 
+		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
+			goto out;
+
 		if (inode_group)
 			inode_node = srcu_dereference(inode_node->next,
 						      &fsnotify_mark_srcu);
@@ -273,7 +276,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 			vfsmount_node = srcu_dereference(vfsmount_node->next,
 							 &fsnotify_mark_srcu);
 	}
-
+	ret = 0;
+out:
 	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 825329534162..026892187c83 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -64,6 +64,8 @@
 
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
+#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM)
+
 #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
 			     FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \
 			     FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \
-- 
cgit v1.2.3


From 52420392c81c8712f555e6bcd116d8bd214ce43a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:56 -0400
Subject: fsnotify: call fsnotify_parent in perm events

fsnotify perm events do not call fsnotify parent.  That means you cannot
register a perm event on a directory and enforce permissions on all inodes in
that directory.  This patch fixes that situation.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c             | 15 +++++++++------
 include/linux/fsnotify.h         |  9 +++++++--
 include/linux/fsnotify_backend.h |  8 +++++---
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 57ecadd85abf..20dc218707ca 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
+	int ret = 0;
 
 	if (!dentry)
 		dentry = path->dentry;
 
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
-		return;
+		return 0;
 
 	parent = dget_parent(dentry);
 	p_inode = parent->d_inode;
@@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 		mask |= FS_EVENT_ON_CHILD;
 
 		if (path)
-			fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-				 dentry->d_name.name, 0);
+			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+				       dentry->d_name.name, 0);
 		else
-			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-				 dentry->d_name.name, 0);
+			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+				       dentry->d_name.name, 0);
 	}
 
 	dput(parent);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 59d0df43ff9d..5059faacceab 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -26,12 +26,12 @@ static inline void fsnotify_d_instantiate(struct dentry *dentry,
 }
 
 /* Notify this dentry's parent about a child's events. */
-static inline void fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
 	if (!dentry)
 		dentry = path->dentry;
 
-	__fsnotify_parent(path, dentry, mask);
+	return __fsnotify_parent(path, dentry, mask);
 }
 
 /* simple call site for access decisions */
@@ -40,6 +40,7 @@ static inline int fsnotify_perm(struct file *file, int mask)
 	struct path *path = &file->f_path;
 	struct inode *inode = path->dentry->d_inode;
 	__u32 fsnotify_mask = 0;
+	int ret;
 
 	if (file->f_mode & FMODE_NONOTIFY)
 		return 0;
@@ -52,6 +53,10 @@ static inline int fsnotify_perm(struct file *file, int mask)
 	else
 		BUG();
 
+	ret = fsnotify_parent(path, NULL, fsnotify_mask);
+	if (ret)
+		return ret;
+
 	return fsnotify(inode, fsnotify_mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 026892187c83..b37f3a71a9dc 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -304,7 +304,7 @@ struct fsnotify_mark {
 /* main fsnotify call to send events */
 extern int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 		    const unsigned char *name, u32 cookie);
-extern void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask);
+extern int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
 extern u32 fsnotify_get_cookie(void);
@@ -433,8 +433,10 @@ static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int da
 	return 0;
 }
 
-static inline void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
-{}
+static inline int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+{
+	return 0;
+}
 
 static inline void __fsnotify_inode_delete(struct inode *inode)
 {}
-- 
cgit v1.2.3


From bbf2aba50f6ed7c8dd53623fa1437b539928ac39 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:57 -0400
Subject: fanotify: allow userspace to flush all marks

fanotify is supposed to be able to flush all marks.  This is mostly useful
for the AV community to flush all cached decisions on a security policy
change.  This functionality has existed in the kernel but wasn't correctly
exposed to userspace.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 8a621c1a0991..a97c96d28c07 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -52,7 +52,8 @@
 				 FAN_MARK_ONLYDIR |\
 				 FAN_MARK_MOUNT |\
 				 FAN_MARK_IGNORED_MASK |\
-				 FAN_MARK_IGNORED_SURV_MODIFY)
+				 FAN_MARK_IGNORED_SURV_MODIFY |\
+				 FAN_MARK_FLUSH)
 
 /*
  * All of the events - we build the list by hand so that we can add flags in
-- 
cgit v1.2.3


From 2529a0df0f64dab1f60ae08e038b89c53a6b4c02 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:57 -0400
Subject: fsnotify: implement a default maximum queue depth

Currently fanotify has no maximum queue depth.  Since fanotify is
CAP_SYS_ADMIN only this does not pose a normal user DoS issue, but it
certianly is possible that an fanotify listener which can't keep up could
OOM the box.  This patch implements a default 16k depth.  This is the same
default depth used by inotify, but given fanotify's better queue merging in
many situations this queue will contain many additional useful events by
comparison.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 4 ++++
 include/linux/fanotify.h           | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b265936e92d6..04f2fe47b66a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,8 @@
 
 #include <asm/ioctls.h>
 
+#define FANOTIFY_DEFAULT_MAX_EVENTS	16384
+
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -689,6 +691,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		goto out_put_group;
 	}
 
+	group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
 		goto out_put_group;
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index a97c96d28c07..ed479b6fef7b 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -12,7 +12,6 @@
 
 #define FAN_EVENT_ON_CHILD	0x08000000	/* interested in child events */
 
-/* FIXME currently Q's have no limit.... */
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
 
 #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
-- 
cgit v1.2.3


From 5dd03f55fd2f21916ce248bb2e68bbfb39d94fe5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:57 -0400
Subject: fanotify: allow userspace to override max queue depth

fanotify has a defualt max queue depth.  This patch allows processes which
explicitly request it to have an 'unlimited' queue depth.  These processes
need to be very careful to make sure they cannot fall far enough behind
that they OOM the box.  Thus this flag is gated on CAP_SYS_ADMIN.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 9 ++++++++-
 include/linux/fanotify.h           | 5 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 04f2fe47b66a..43d66d9b2eff 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -691,7 +691,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		goto out_put_group;
 	}
 
-	group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+	if (flags & FAN_UNLIMITED_QUEUE) {
+		fd = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out_put_group;
+		group->max_events = UINT_MAX;
+	} else {
+		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index ed479b6fef7b..e37f559c95e1 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -28,12 +28,13 @@
 #define FAN_CLASS_NOTIF		0x00000000
 #define FAN_CLASS_CONTENT	0x00000004
 #define FAN_CLASS_PRE_CONTENT	0x00000008
-
 #define FAN_ALL_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
 				 FAN_CLASS_PRE_CONTENT)
 
+#define FAN_UNLIMITED_QUEUE	0x00000010
+
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
-				 FAN_ALL_CLASS_BITS)
+				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE)
 
 /* flags used for fanotify_modify_mark() */
 #define FAN_MARK_ADD		0x00000001
-- 
cgit v1.2.3


From e7099d8a5a34d2876908a9fab4952dabdcfc5909 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:57 -0400
Subject: fanotify: limit the number of marks in a single fanotify group

There is currently no limit on the number of marks a given fanotify group
can have.  Since fanotify is gated on CAP_SYS_ADMIN this was not seen as
a serious DoS threat.  This patch implements a default of 8192, the same as
inotify to work towards removing the CAP_SYS_ADMIN gating and eliminating
the default DoS'able status.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 9 +++++++++
 include/linux/fsnotify_backend.h   | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 43d66d9b2eff..1d33d7db277a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,7 @@
 #include <asm/ioctls.h>
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
+#define FANOTIFY_DEFAULT_MAX_MARKS	8192
 
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
@@ -584,6 +585,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 	if (!fsn_mark) {
 		int ret;
 
+		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+			return -ENOSPC;
+
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
 			return -ENOMEM;
@@ -626,6 +630,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 	if (!fsn_mark) {
 		int ret;
 
+		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+			return -ENOSPC;
+
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
 			return -ENOMEM;
@@ -700,6 +707,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
 	}
 
+	group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
 		goto out_put_group;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b37f3a71a9dc..49ceed6e92b1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -169,6 +169,7 @@ struct fsnotify_group {
 			bool bypass_perm; /* protected by access_mutex */
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
+			unsigned int max_marks;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
cgit v1.2.3


From ac7e22dcfafd04c842a02057afd6541c1d613ef9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:58 -0400
Subject: fanotify: allow userspace to override max marks

Some fanotify groups, especially those like AV scanners, will need to place
lots of marks, particularly ignore marks.  Since ignore marks do not pin
inodes in cache and are cleared if the inode is removed from core (usually
under memory pressure) we expose an interface for listeners, with
CAP_SYS_ADMIN, to override the maximum number of marks and be allowed to
set and 'unlimited' number of marks.  Programs which make use of this
feature will be able to OOM a machine.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 9 ++++++++-
 include/linux/fanotify.h           | 4 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1d33d7db277a..f9216102b426 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -707,7 +707,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
 	}
 
-	group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+	if (flags & FAN_UNLIMITED_MARKS) {
+		fd = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out_put_group;
+		group->fanotify_data.max_marks = UINT_MAX;
+	} else {
+		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e37f559c95e1..7592a366a57b 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -32,9 +32,11 @@
 				 FAN_CLASS_PRE_CONTENT)
 
 #define FAN_UNLIMITED_QUEUE	0x00000010
+#define FAN_UNLIMITED_MARKS	0x00000020
 
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
-				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE)
+				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+				 FAN_UNLIMITED_MARKS)
 
 /* flags used for fanotify_modify_mark() */
 #define FAN_MARK_ADD		0x00000001
-- 
cgit v1.2.3


From 4afeff8505cb8a38e36c1ef2bd3447c4b8f87367 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:58 -0400
Subject: fanotify: limit number of listeners per user

fanotify currently has no limit on the number of listeners a given user can
have open.  This patch limits the total number of listeners per user to
128.  This is the same as the inotify default limit.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      | 11 ++++++++++-
 fs/notify/fanotify/fanotify_user.c | 11 +++++++++++
 include/linux/fsnotify_backend.h   |  1 +
 include/linux/sched.h              |  3 +++
 4 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c78cc37..60c11c306fd9 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -200,10 +200,19 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 	return false;
 }
 
+static void fanotify_free_group_priv(struct fsnotify_group *group)
+{
+	struct user_struct *user;
+
+	user = group->fanotify_data.user;
+	atomic_dec(&user->fanotify_listeners);
+	free_uid(user);
+}
+
 const struct fsnotify_ops fanotify_fsnotify_ops = {
 	.handle_event = fanotify_handle_event,
 	.should_send_event = fanotify_should_send_event,
-	.free_group_priv = NULL,
+	.free_group_priv = fanotify_free_group_priv,
 	.free_event_priv = NULL,
 	.freeing_mark = NULL,
 };
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index f9216102b426..a7d9369482d5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -18,6 +18,7 @@
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS	128
 
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
@@ -656,6 +657,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 {
 	struct fsnotify_group *group;
 	int f_flags, fd;
+	struct user_struct *user;
 
 	pr_debug("%s: flags=%d event_f_flags=%d\n",
 		__func__, flags, event_f_flags);
@@ -666,6 +668,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (flags & ~FAN_ALL_INIT_FLAGS)
 		return -EINVAL;
 
+	user = get_current_user();
+	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+		free_uid(user);
+		return -EMFILE;
+	}
+
 	f_flags = O_RDWR | FMODE_NONOTIFY;
 	if (flags & FAN_CLOEXEC)
 		f_flags |= O_CLOEXEC;
@@ -677,6 +685,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	group->fanotify_data.user = user;
+	atomic_inc(&user->fanotify_listeners);
+
 	group->fanotify_data.f_flags = event_f_flags;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	mutex_init(&group->fanotify_data.access_mutex);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 49ceed6e92b1..4366f458a86a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -170,6 +170,7 @@ struct fsnotify_group {
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 			unsigned int max_marks;
+			struct user_struct *user;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index be7adb7588e5..6f420baf37ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,9 @@ struct user_struct {
 	atomic_t inotify_watches; /* How many inotify watches does this user have? */
 	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
 #endif
+#ifdef CONFIG_FANOTIFY
+	atomic_t fanotify_listeners;
+#endif
 #ifdef CONFIG_EPOLL
 	atomic_t epoll_watches;	/* The number of file descriptors currently watched */
 #endif
-- 
cgit v1.2.3


From b29866aab8489487f11cc4506590ac31bdbae22a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:58 -0400
Subject: fsnotify: rename FS_IN_ISDIR to FS_ISDIR

The _IN_ in the naming is reserved for flags only used by inotify.  Since I
am about to use this flag for fanotify rename it to be generic like the
rest.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c |  2 +-
 include/linux/fsnotify.h         | 20 ++++++++++----------
 include/linux/fsnotify_backend.h |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc1185d53..444c305a468c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -862,7 +862,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
 	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
-	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
 	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 5059faacceab..ecb43b33d181 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -98,8 +98,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		old_dir_mask |= FS_DN_RENAME;
 
 	if (isdir) {
-		old_dir_mask |= FS_IN_ISDIR;
-		new_dir_mask |= FS_IN_ISDIR;
+		old_dir_mask |= FS_ISDIR;
+		new_dir_mask |= FS_ISDIR;
 	}
 
 	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
@@ -137,7 +137,7 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 	__u32 mask = FS_DELETE;
 
 	if (isdir)
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	fsnotify_parent(NULL, dentry, mask);
 }
@@ -179,7 +179,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
  */
 static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
-	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
+	__u32 mask = (FS_CREATE | FS_ISDIR);
 	struct inode *d_inode = dentry->d_inode;
 
 	audit_inode_child(dentry, inode);
@@ -197,7 +197,7 @@ static inline void fsnotify_access(struct file *file)
 	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	if (!(file->f_mode & FMODE_NONOTIFY)) {
 		fsnotify_parent(path, NULL, mask);
@@ -215,7 +215,7 @@ static inline void fsnotify_modify(struct file *file)
 	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	if (!(file->f_mode & FMODE_NONOTIFY)) {
 		fsnotify_parent(path, NULL, mask);
@@ -233,7 +233,7 @@ static inline void fsnotify_open(struct file *file)
 	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	if (!(file->f_mode & FMODE_NONOTIFY)) {
 		fsnotify_parent(path, NULL, mask);
@@ -252,7 +252,7 @@ static inline void fsnotify_close(struct file *file)
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	if (!(file->f_mode & FMODE_NONOTIFY)) {
 		fsnotify_parent(path, NULL, mask);
@@ -269,7 +269,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	__u32 mask = FS_ATTRIB;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= FS_IN_ISDIR;
+		mask |= FS_ISDIR;
 
 	fsnotify_parent(NULL, dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
@@ -304,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 
 	if (mask) {
 		if (S_ISDIR(inode->i_mode))
-			mask |= FS_IN_ISDIR;
+			mask |= FS_ISDIR;
 
 		fsnotify_parent(NULL, dentry, mask);
 		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 4366f458a86a..b36041e9cd34 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -45,7 +45,7 @@
 #define FS_ACCESS_PERM		0x00020000	/* access event in a permissions hook */
 
 #define FS_EXCL_UNLINK		0x04000000	/* do not send events if object is unlinked */
-#define FS_IN_ISDIR		0x40000000	/* event occurred against dir */
+#define FS_ISDIR		0x40000000	/* event occurred against dir */
 #define FS_IN_ONESHOT		0x80000000	/* only send event once */
 
 #define FS_DN_RENAME		0x10000000	/* file renamed */
@@ -72,7 +72,7 @@
 			     FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \
 			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
 			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \
-			     FS_IN_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
+			     FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
 			     FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
 
 struct fsnotify_group;
-- 
cgit v1.2.3


From 8fcd65280abc4699510f1853ede31f43e8a3783a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 28 Oct 2010 17:21:59 -0400
Subject: fanotify: ignore events on directories unless specifically requested

fanotify has a very limited number of events it sends on directories.  The
usefulness of these events is yet to be seen and still we send them.  This
is particularly painful for mount marks where one might receive many of
these useless events.  As such this patch will drop events on IS_DIR()
inodes unless they were explictly requested with FAN_ON_DIR.

This means that a mark on a directory without FAN_EVENT_ON_CHILD or
FAN_ON_DIR is meaningless and will result in no events ever (although it
will still be allowed since detecting it is hard)

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      |  5 +++++
 fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++
 include/linux/fanotify.h           | 10 ++++++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 8d98e1f5817b..b04f88eed09e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
 	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -195,6 +196,10 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 		BUG();
 	}
 
+	if (S_ISDIR(path->dentry->d_inode->i_mode) &&
+	    (marks_ignored_mask & FS_ISDIR))
+		return false;
+
 	if (event_mask & marks_mask & ~marks_ignored_mask)
 		return true;
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a7d9369482d5..ff1a908c9708 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -570,6 +570,12 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
+
+	if (!(flags & FAN_MARK_ONDIR)) {
+		__u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+	}
+
 	spin_unlock(&fsn_mark->lock);
 
 	return mask & ~oldmask;
@@ -766,6 +772,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	default:
 		return -EINVAL;
 	}
+
+	if (mask & FAN_ONDIR) {
+		flags |= FAN_MARK_ONDIR;
+		mask &= ~FAN_ONDIR;
+	}
+
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
 #else
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 7592a366a57b..5e0400a80c33 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -10,13 +10,15 @@
 #define FAN_CLOSE_NOWRITE	0x00000010	/* Writtable file closed */
 #define FAN_OPEN		0x00000020	/* File was opened */
 
-#define FAN_EVENT_ON_CHILD	0x08000000	/* interested in child events */
-
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
 
 #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
 #define FAN_ACCESS_PERM		0x00020000	/* File accessed in perm check */
 
+#define FAN_ONDIR		0x40000000	/* event occurred against dir */
+
+#define FAN_EVENT_ON_CHILD	0x08000000	/* interested in child events */
+
 /* helper events */
 #define FAN_CLOSE		(FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
 
@@ -47,6 +49,10 @@
 #define FAN_MARK_IGNORED_MASK	0x00000020
 #define FAN_MARK_IGNORED_SURV_MODIFY	0x00000040
 #define FAN_MARK_FLUSH		0x00000080
+#ifdef __KERNEL__
+/* not valid from userspace, only kernel internal */
+#define FAN_MARK_ONDIR		0x00000100
+#endif
 
 #define FAN_ALL_MARK_FLAGS	(FAN_MARK_ADD |\
 				 FAN_MARK_REMOVE |\
-- 
cgit v1.2.3


From 50e4a98914de13c6f38f50fd1afa06e2c18b3cf7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Thu, 28 Oct 2010 17:21:59 -0400
Subject: fanotify: Fix FAN_CLOSE comments

The comments for FAN_CLOSE_WRITE and FAN_CLOSE_NOWRITE do not match
FS_CLOSE_WRITE and FS_CLOSE_NOWRITE, respectively.  WRITE is for
writable files while NOWRITE is for non-writable files.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 5e0400a80c33..0f0121467fc4 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -6,8 +6,8 @@
 /* the following events that user-space can register for */
 #define FAN_ACCESS		0x00000001	/* File was accessed */
 #define FAN_MODIFY		0x00000002	/* File was modified */
-#define FAN_CLOSE_WRITE		0x00000008	/* Unwrittable file closed */
-#define FAN_CLOSE_NOWRITE	0x00000010	/* Writtable file closed */
+#define FAN_CLOSE_WRITE		0x00000008	/* Writtable file closed */
+#define FAN_CLOSE_NOWRITE	0x00000010	/* Unwrittable file closed */
 #define FAN_OPEN		0x00000020	/* File was opened */
 
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
-- 
cgit v1.2.3


From d8c0fca68da25ca3df534dfb12ce628675c828e4 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Thu, 28 Oct 2010 17:21:59 -0400
Subject: fsnotify: remove alignment padding from fsnotify_mark on 64 bit
 builds

Reorder struct fsnotfiy_mark to remove 8 bytes of alignment padding on 64
bit builds.  Shrinks fsnotfiy_mark to 128 bytes allowing more objects per
slab in its kmem_cache and reduces the number of cachelines needed for
each structure.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b36041e9cd34..0a68f924f06f 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -287,8 +287,8 @@ struct fsnotify_mark {
 		struct fsnotify_inode_mark i;
 		struct fsnotify_vfsmount_mark m;
 	};
-	__u32 ignored_mask;		/* events types to ignore */
 	struct list_head free_g_list;	/* tmp list used when freeing this mark */
+	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
 #define FSNOTIFY_MARK_FLAG_VFSMOUNT		0x02
 #define FSNOTIFY_MARK_FLAG_OBJECT_PINNED	0x04
-- 
cgit v1.2.3


From 202f4f53e503ae09b431459131b5b3a99fa6d839 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Fri, 20 Aug 2010 14:43:56 +0900
Subject: MAX8952 PMIC Driver Initial Release

MAX8952 PMIC is used to provide voltage output between 770mV - 1400mV
with DVS support. In this initial release, users can set voltages for
four DVS modes, RAMP delay values, and SYNC frequency.
Controlling FPWM/SYNC_MODE/Pull-Down/Ramp Modes and reading CHIP_ID
is not supported in this release.

If GPIO of EN is not valid in platform data, the driver assumes that it
is always-on. If GPIO of VID0 or VID1 is invalid, the driver pulls down
VID0 and VID1 to fix DVS mode as 0 and disables DVS support.

We assume that V_OUT is capable to provide every voltage from 770mV to
1.40V in 10mV steps although the data sheet has some ambiguity on it.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
--
v2:
	- Style correction
	- Can accept platform_data with invalid GPIOs
	- Removed unnecessary features
	- Improved error handling
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   8 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8952.c       | 360 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8952.h | 135 ++++++++++++++
 4 files changed, 504 insertions(+)
 create mode 100644 drivers/regulator/max8952.c
 create mode 100644 include/linux/regulator/max8952.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 172951bf23a4..4889caabc632 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -100,6 +100,14 @@ config REGULATOR_MAX8925
 	help
 	  Say y here to support the voltage regulaltor of Maxim MAX8925 PMIC.
 
+config REGULATOR_MAX8952
+	tristate "Maxim MAX8952 Power Management IC"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8952 voltage output regulator
+	  via I2C bus. Maxim 8952 has one voltage output and supports 4 DVS
+	  modes ranging from 0.77V to 1.40V by 0.01V steps.
+
 config REGULATOR_MAX8998
 	tristate "Maxim 8998 voltage regulator"
 	depends on MFD_MAX8998
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 8285fd832e16..beff6da6eff6 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o
 obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c
new file mode 100644
index 000000000000..f2af0b1c3925
--- /dev/null
+++ b/drivers/regulator/max8952.c
@@ -0,0 +1,360 @@
+/*
+ * max8952.c - Voltage and current regulation for the Maxim 8952
+ *
+ * Copyright (C) 2010 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8952.h>
+#include <linux/mutex.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+
+/* Registers */
+enum {
+	MAX8952_REG_MODE0,
+	MAX8952_REG_MODE1,
+	MAX8952_REG_MODE2,
+	MAX8952_REG_MODE3,
+	MAX8952_REG_CONTROL,
+	MAX8952_REG_SYNC,
+	MAX8952_REG_RAMP,
+	MAX8952_REG_CHIP_ID1,
+	MAX8952_REG_CHIP_ID2,
+};
+
+struct max8952_data {
+	struct i2c_client	*client;
+	struct device		*dev;
+	struct mutex		mutex;
+	struct max8952_platform_data *pdata;
+	struct regulator_dev	*rdev;
+
+	bool vid0;
+	bool vid1;
+	bool en;
+};
+
+static int max8952_read_reg(struct max8952_data *max8952, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(max8952->client, reg);
+	if (ret > 0)
+		ret &= 0xff;
+
+	return ret;
+}
+
+static int max8952_write_reg(struct max8952_data *max8952,
+		u8 reg, u8 value)
+{
+	return i2c_smbus_write_byte_data(max8952->client, reg, value);
+}
+
+static int max8952_voltage(struct max8952_data *max8952, u8 mode)
+{
+	return (max8952->pdata->dvs_mode[mode] * 10 + 770) * 1000;
+}
+
+static int max8952_list_voltage(struct regulator_dev *rdev,
+		unsigned int selector)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+
+	if (rdev_get_id(rdev) != 0)
+		return -EINVAL;
+
+	return max8952_voltage(max8952, selector);
+}
+
+static int max8952_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+	return max8952->en;
+}
+
+static int max8952_enable(struct regulator_dev *rdev)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+
+	/* If not valid, assume "ALWAYS_HIGH" */
+	if (gpio_is_valid(max8952->pdata->gpio_en))
+		gpio_set_value(max8952->pdata->gpio_en, 1);
+
+	max8952->en = true;
+	return 0;
+}
+
+static int max8952_disable(struct regulator_dev *rdev)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+
+	/* If not valid, assume "ALWAYS_HIGH" -> not permitted */
+	if (gpio_is_valid(max8952->pdata->gpio_en))
+		gpio_set_value(max8952->pdata->gpio_en, 0);
+	else
+		return -EPERM;
+
+	max8952->en = false;
+	return 0;
+}
+
+static int max8952_get_voltage(struct regulator_dev *rdev)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+	u8 vid = 0;
+
+	if (max8952->vid0)
+		vid += 1;
+	if (max8952->vid1)
+		vid += 2;
+
+	return max8952_voltage(max8952, vid);
+}
+
+static int max8952_set_voltage(struct regulator_dev *rdev,
+				int min_uV, int max_uV)
+{
+	struct max8952_data *max8952 = rdev_get_drvdata(rdev);
+	u8 vid = -1, i;
+
+	if (!gpio_is_valid(max8952->pdata->gpio_vid0) ||
+			!gpio_is_valid(max8952->pdata->gpio_vid0)) {
+		/* DVS not supported */
+		return -EPERM;
+	}
+
+	for (i = 0; i < MAX8952_NUM_DVS_MODE; i++) {
+		int volt = max8952_voltage(max8952, i);
+
+		/* Set the voltage as low as possible within the range */
+		if (volt <= max_uV && volt >= min_uV)
+			if (vid == -1 || max8952_voltage(max8952, vid) > volt)
+				vid = i;
+	}
+
+	if (vid >= 0 && vid < MAX8952_NUM_DVS_MODE) {
+		max8952->vid0 = (vid % 2 == 1);
+		max8952->vid1 = (((vid >> 1) % 2) == 1);
+		gpio_set_value(max8952->pdata->gpio_vid0, max8952->vid0);
+		gpio_set_value(max8952->pdata->gpio_vid1, max8952->vid1);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct regulator_ops max8952_ops = {
+	.list_voltage		= max8952_list_voltage,
+	.is_enabled		= max8952_is_enabled,
+	.enable			= max8952_enable,
+	.disable		= max8952_disable,
+	.get_voltage		= max8952_get_voltage,
+	.set_voltage		= max8952_set_voltage,
+	.set_suspend_disable	= max8952_disable,
+};
+
+static struct regulator_desc regulator = {
+	.name		= "MAX8952_VOUT",
+	.id		= 0,
+	.n_voltages	= MAX8952_NUM_DVS_MODE,
+	.ops		= &max8952_ops,
+	.type		= REGULATOR_VOLTAGE,
+	.owner		= THIS_MODULE,
+};
+
+static int __devinit max8952_pmic_probe(struct i2c_client *client,
+		const struct i2c_device_id *i2c_id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct max8952_platform_data *pdata = client->dev.platform_data;
+	struct max8952_data *max8952;
+
+	int ret = 0, err = 0;
+
+	if (!pdata) {
+		dev_err(&client->dev, "Require the platform data\n");
+		return -EINVAL;
+	}
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	max8952 = kzalloc(sizeof(struct max8952_data), GFP_KERNEL);
+	if (!max8952)
+		return -ENOMEM;
+
+	max8952->client = client;
+	max8952->dev = &client->dev;
+	max8952->pdata = pdata;
+	mutex_init(&max8952->mutex);
+
+	max8952->rdev = regulator_register(&regulator, max8952->dev,
+			&pdata->reg_data, max8952);
+
+	ret = IS_ERR(max8952->rdev);
+	if (ret)
+		dev_err(max8952->dev, "regulator init failed (%d)\n", ret);
+
+	max8952->en = !!(pdata->reg_data.constraints.boot_on);
+	max8952->vid0 = (pdata->default_mode % 2) == 1;
+	max8952->vid1 = ((pdata->default_mode >> 1) % 2) == 1;
+
+	if (gpio_is_valid(pdata->gpio_en)) {
+		if (!gpio_request(pdata->gpio_en, "MAX8952 EN"))
+			gpio_direction_output(pdata->gpio_en, max8952->en);
+		else
+			err = 1;
+	} else
+		err = 2;
+
+	if (err) {
+		dev_info(max8952->dev, "EN gpio invalid: assume that EN"
+				"is always High\n");
+		max8952->en = 1;
+		pdata->gpio_en = -1; /* Mark invalid */
+	}
+
+	err = 0;
+
+	if (gpio_is_valid(pdata->gpio_vid0) &&
+			gpio_is_valid(pdata->gpio_vid1)) {
+		if (!gpio_request(pdata->gpio_vid0, "MAX8952 VID0"))
+			gpio_direction_output(pdata->gpio_vid0,
+					(pdata->default_mode) % 2);
+		else
+			err = 1;
+
+		if (!gpio_request(pdata->gpio_vid1, "MAX8952 VID1"))
+			gpio_direction_output(pdata->gpio_vid1,
+				(pdata->default_mode >> 1) % 2);
+		else {
+			if (!err)
+				gpio_free(pdata->gpio_vid0);
+			err = 2;
+		}
+
+	} else
+		err = 3;
+
+	if (err) {
+		dev_warn(max8952->dev, "VID0/1 gpio invalid: "
+				"DVS not avilable.\n");
+		max8952->vid0 = 0;
+		max8952->vid1 = 0;
+		/* Mark invalid */
+		pdata->gpio_vid0 = -1;
+		pdata->gpio_vid1 = -1;
+
+		/* Disable Pulldown of EN only */
+		max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x60);
+
+		dev_err(max8952->dev, "DVS modes disabled because VID0 and VID1"
+				" do not have proper controls.\n");
+	} else {
+		/*
+		 * Disable Pulldown on EN, VID0, VID1 to reduce
+		 * leakage current of MAX8952 assuming that MAX8952
+		 * is turned on (EN==1). Note that without having VID0/1
+		 * properly connected, turning pulldown off can be
+		 * problematic. Thus, turn this off only when they are
+		 * controllable by GPIO.
+		 */
+		max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x0);
+	}
+
+	max8952_write_reg(max8952, MAX8952_REG_MODE0,
+			(max8952_read_reg(max8952,
+					  MAX8952_REG_MODE0) & 0xC0) |
+			(pdata->dvs_mode[0] & 0x3F));
+	max8952_write_reg(max8952, MAX8952_REG_MODE1,
+			(max8952_read_reg(max8952,
+					  MAX8952_REG_MODE1) & 0xC0) |
+			(pdata->dvs_mode[1] & 0x3F));
+	max8952_write_reg(max8952, MAX8952_REG_MODE2,
+			(max8952_read_reg(max8952,
+					  MAX8952_REG_MODE2) & 0xC0) |
+			(pdata->dvs_mode[2] & 0x3F));
+	max8952_write_reg(max8952, MAX8952_REG_MODE3,
+			(max8952_read_reg(max8952,
+					  MAX8952_REG_MODE3) & 0xC0) |
+			(pdata->dvs_mode[3] & 0x3F));
+
+	max8952_write_reg(max8952, MAX8952_REG_SYNC,
+			(max8952_read_reg(max8952, MAX8952_REG_SYNC) & 0x3F) |
+			((pdata->sync_freq & 0x3) << 6));
+	max8952_write_reg(max8952, MAX8952_REG_RAMP,
+			(max8952_read_reg(max8952, MAX8952_REG_RAMP) & 0x1F) |
+			((pdata->ramp_speed & 0x7) << 5));
+
+	i2c_set_clientdata(client, max8952);
+
+	return ret;
+}
+
+static int __devexit max8952_pmic_remove(struct i2c_client *client)
+{
+	struct max8952_data *max8952 = i2c_get_clientdata(client);
+	struct max8952_platform_data *pdata = max8952->pdata;
+	struct regulator_dev *rdev = max8952->rdev;
+
+	regulator_unregister(rdev);
+
+	gpio_free(pdata->gpio_vid0);
+	gpio_free(pdata->gpio_vid1);
+	gpio_free(pdata->gpio_en);
+
+	kfree(max8952);
+	return 0;
+}
+
+static const struct i2c_device_id max8952_ids[] = {
+	{ "max8952", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, max8952_ids);
+
+static struct i2c_driver max8952_pmic_driver = {
+	.probe		= max8952_pmic_probe,
+	.remove		= __devexit_p(max8952_pmic_remove),
+	.driver		= {
+		.name	= "max8952",
+	},
+	.id_table	= max8952_ids,
+};
+
+static int __init max8952_pmic_init(void)
+{
+	return i2c_add_driver(&max8952_pmic_driver);
+}
+subsys_initcall(max8952_pmic_init);
+
+static void __exit max8952_pmic_exit(void)
+{
+	i2c_del_driver(&max8952_pmic_driver);
+}
+module_exit(max8952_pmic_exit);
+
+MODULE_DESCRIPTION("MAXIM 8952 voltage regulator driver");
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h
new file mode 100644
index 000000000000..45e42855ad05
--- /dev/null
+++ b/include/linux/regulator/max8952.h
@@ -0,0 +1,135 @@
+/*
+ * max8952.h - Voltage regulation for the Maxim 8952
+ *
+ *  Copyright (C) 2010 Samsung Electrnoics
+ *  MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef REGULATOR_MAX8952
+#define REGULATOR_MAX8952
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8952_DVS_MODE0,
+	MAX8952_DVS_MODE1,
+	MAX8952_DVS_MODE2,
+	MAX8952_DVS_MODE3,
+};
+
+enum {
+	MAX8952_DVS_770mV = 0,
+	MAX8952_DVS_780mV,
+	MAX8952_DVS_790mV,
+	MAX8952_DVS_800mV,
+	MAX8952_DVS_810mV,
+	MAX8952_DVS_820mV,
+	MAX8952_DVS_830mV,
+	MAX8952_DVS_840mV,
+	MAX8952_DVS_850mV,
+	MAX8952_DVS_860mV,
+	MAX8952_DVS_870mV,
+	MAX8952_DVS_880mV,
+	MAX8952_DVS_890mV,
+	MAX8952_DVS_900mV,
+	MAX8952_DVS_910mV,
+	MAX8952_DVS_920mV,
+	MAX8952_DVS_930mV,
+	MAX8952_DVS_940mV,
+	MAX8952_DVS_950mV,
+	MAX8952_DVS_960mV,
+	MAX8952_DVS_970mV,
+	MAX8952_DVS_980mV,
+	MAX8952_DVS_990mV,
+	MAX8952_DVS_1000mV,
+	MAX8952_DVS_1010mV,
+	MAX8952_DVS_1020mV,
+	MAX8952_DVS_1030mV,
+	MAX8952_DVS_1040mV,
+	MAX8952_DVS_1050mV,
+	MAX8952_DVS_1060mV,
+	MAX8952_DVS_1070mV,
+	MAX8952_DVS_1080mV,
+	MAX8952_DVS_1090mV,
+	MAX8952_DVS_1100mV,
+	MAX8952_DVS_1110mV,
+	MAX8952_DVS_1120mV,
+	MAX8952_DVS_1130mV,
+	MAX8952_DVS_1140mV,
+	MAX8952_DVS_1150mV,
+	MAX8952_DVS_1160mV,
+	MAX8952_DVS_1170mV,
+	MAX8952_DVS_1180mV,
+	MAX8952_DVS_1190mV,
+	MAX8952_DVS_1200mV,
+	MAX8952_DVS_1210mV,
+	MAX8952_DVS_1220mV,
+	MAX8952_DVS_1230mV,
+	MAX8952_DVS_1240mV,
+	MAX8952_DVS_1250mV,
+	MAX8952_DVS_1260mV,
+	MAX8952_DVS_1270mV,
+	MAX8952_DVS_1280mV,
+	MAX8952_DVS_1290mV,
+	MAX8952_DVS_1300mV,
+	MAX8952_DVS_1310mV,
+	MAX8952_DVS_1320mV,
+	MAX8952_DVS_1330mV,
+	MAX8952_DVS_1340mV,
+	MAX8952_DVS_1350mV,
+	MAX8952_DVS_1360mV,
+	MAX8952_DVS_1370mV,
+	MAX8952_DVS_1380mV,
+	MAX8952_DVS_1390mV,
+	MAX8952_DVS_1400mV,
+};
+
+enum {
+	MAX8952_SYNC_FREQ_26MHZ, /* Default */
+	MAX8952_SYNC_FREQ_13MHZ,
+	MAX8952_SYNC_FREQ_19_2MHZ,
+};
+
+enum {
+	MAX8952_RAMP_32mV_us = 0, /* Default */
+	MAX8952_RAMP_16mV_us,
+	MAX8952_RAMP_8mV_us,
+	MAX8952_RAMP_4mV_us,
+	MAX8952_RAMP_2mV_us,
+	MAX8952_RAMP_1mV_us,
+	MAX8952_RAMP_0_5mV_us,
+	MAX8952_RAMP_0_25mV_us,
+};
+
+#define MAX8952_NUM_DVS_MODE	4
+
+struct max8952_platform_data {
+	int gpio_vid0;
+	int gpio_vid1;
+	int gpio_en;
+
+	u8 default_mode;
+	u8 dvs_mode[MAX8952_NUM_DVS_MODE]; /* MAX8952_DVS_MODEx_XXXXmV */
+
+	u8 sync_freq;
+	u8 ramp_speed;
+
+	struct regulator_init_data reg_data;
+};
+
+
+#endif /* REGULATOR_MAX8952 */
-- 
cgit v1.2.3


From 5976f0959d5251ae5b4db848eaa2f42a19e98652 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 16 Sep 2010 16:48:49 +0800
Subject: Regulator: LP3972 PMIC regulator driver

This patch adds regulator drivers for National Semiconductors LP3972 PMIC.
This LP3972 PMIC controller has 3 DC/DC voltage converters and 5 low drop-out
(LDO) regulators. LP3972 PMIC controller uses I2C interface.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig        |   7 +
 drivers/regulator/Makefile       |   1 +
 drivers/regulator/lp3972.c       | 649 +++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/lp3972.h |  48 +++
 4 files changed, 705 insertions(+)
 create mode 100644 drivers/regulator/lp3972.c
 create mode 100644 include/linux/regulator/lp3972.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 4889caabc632..dd30e883d4a7 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -172,6 +172,13 @@ config REGULATOR_LP3971
 	 Say Y here to support the voltage regulators and convertors
 	 on National Semiconductors LP3971 PMIC
 
+config REGULATOR_LP3972
+	tristate "National Semiconductors LP3972 PMIC regulator driver"
+	depends on I2C
+	help
+	 Say Y here to support the voltage regulators and convertors
+	 on National Semiconductors LP3972 PMIC
+
 config REGULATOR_PCAP
 	tristate "PCAP2 regulator driver"
 	depends on EZX_PCAP
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index beff6da6eff6..6cae6419b8b1 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_DUMMY) += dummy.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
+obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
diff --git a/drivers/regulator/lp3972.c b/drivers/regulator/lp3972.c
new file mode 100644
index 000000000000..0d9b47569c1b
--- /dev/null
+++ b/drivers/regulator/lp3972.c
@@ -0,0 +1,649 @@
+/*
+ * Regulator driver for National Semiconductors LP3972 PMIC chip
+ *
+ * Based on lp3971.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/lp3972.h>
+#include <linux/slab.h>
+
+struct lp3972 {
+	struct device *dev;
+	struct mutex io_lock;
+	struct i2c_client *i2c;
+	int num_regulators;
+	struct regulator_dev **rdev;
+};
+
+/* LP3972 Control Registers */
+#define LP3972_SCR_REG		0x07
+#define LP3972_OVER1_REG	0x10
+#define LP3972_OVSR1_REG	0x11
+#define LP3972_OVER2_REG	0x12
+#define LP3972_OVSR2_REG	0x13
+#define LP3972_VCC1_REG		0x20
+#define LP3972_ADTV1_REG	0x23
+#define LP3972_ADTV2_REG	0x24
+#define LP3972_AVRC_REG		0x25
+#define LP3972_CDTC1_REG	0x26
+#define LP3972_CDTC2_REG	0x27
+#define LP3972_SDTV1_REG	0x29
+#define LP3972_SDTV2_REG	0x2A
+#define LP3972_MDTV1_REG	0x32
+#define LP3972_MDTV2_REG	0x33
+#define LP3972_L2VCR_REG	0x39
+#define LP3972_L34VCR_REG	0x3A
+#define LP3972_SCR1_REG		0x80
+#define LP3972_SCR2_REG		0x81
+#define LP3972_OEN3_REG		0x82
+#define LP3972_OSR3_REG		0x83
+#define LP3972_LOER4_REG	0x84
+#define LP3972_B2TV_REG		0x85
+#define LP3972_B3TV_REG		0x86
+#define LP3972_B32RC_REG	0x87
+#define LP3972_ISRA_REG		0x88
+#define LP3972_BCCR_REG		0x89
+#define LP3972_II1RR_REG	0x8E
+#define LP3972_II2RR_REG	0x8F
+
+#define LP3972_SYS_CONTROL1_REG		LP3972_SCR1_REG
+/* System control register 1 initial value,
+ * bits 5, 6 and 7 are EPROM programmable */
+#define SYS_CONTROL1_INIT_VAL		0x02
+#define SYS_CONTROL1_INIT_MASK		0x1F
+
+#define LP3972_VOL_CHANGE_REG		LP3972_VCC1_REG
+#define LP3972_VOL_CHANGE_FLAG_GO	0x01
+#define LP3972_VOL_CHANGE_FLAG_MASK	0x03
+
+/* LDO output enable mask */
+#define LP3972_OEN3_L1EN	BIT(0)
+#define LP3972_OVER2_LDO2_EN	BIT(2)
+#define LP3972_OVER2_LDO3_EN	BIT(3)
+#define LP3972_OVER2_LDO4_EN	BIT(4)
+#define LP3972_OVER1_S_EN	BIT(2)
+
+static const int ldo1_voltage_map[] = {
+	1700, 1725, 1750, 1775, 1800, 1825, 1850, 1875,
+	1900, 1925, 1950, 1975, 2000,
+};
+
+static const int ldo23_voltage_map[] = {
+	1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500,
+	2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300,
+};
+
+static const int ldo4_voltage_map[] = {
+	1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350,
+	1400, 1500, 1800, 1900, 2500, 2800, 3000, 3300,
+};
+
+static const int ldo5_voltage_map[] = {
+	   0,    0,    0,    0,    0,  850,  875,  900,
+	 925,  950,  975, 1000, 1025, 1050, 1075, 1100,
+	1125, 1150, 1175, 1200, 1225, 1250, 1275, 1300,
+	1325, 1350, 1375, 1400, 1425, 1450, 1475, 1500,
+};
+
+static const int buck1_voltage_map[] = {
+	 725,  750,  775,  800,  825,  850,  875,  900,
+	 925,  950,  975, 1000, 1025, 1050, 1075, 1100,
+	1125, 1150, 1175, 1200, 1225, 1250, 1275, 1300,
+	1325, 1350, 1375, 1400, 1425, 1450, 1475, 1500,
+};
+
+static const int buck23_voltage_map[] = {
+	   0,  800,  850,  900,  950, 1000, 1050, 1100,
+	1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500,
+	1550, 1600, 1650, 1700, 1800, 1900, 2500, 2800,
+	3000, 3300,
+};
+
+static const int *ldo_voltage_map[] = {
+	ldo1_voltage_map,
+	ldo23_voltage_map,
+	ldo23_voltage_map,
+	ldo4_voltage_map,
+	ldo5_voltage_map,
+};
+
+static const int *buck_voltage_map[] = {
+	buck1_voltage_map,
+	buck23_voltage_map,
+	buck23_voltage_map,
+};
+
+static const int ldo_output_enable_mask[] = {
+	LP3972_OEN3_L1EN,
+	LP3972_OVER2_LDO2_EN,
+	LP3972_OVER2_LDO3_EN,
+	LP3972_OVER2_LDO4_EN,
+	LP3972_OVER1_S_EN,
+};
+
+static const int ldo_output_enable_addr[] = {
+	LP3972_OEN3_REG,
+	LP3972_OVER2_REG,
+	LP3972_OVER2_REG,
+	LP3972_OVER2_REG,
+	LP3972_OVER1_REG,
+};
+
+static const int ldo_vol_ctl_addr[] = {
+	LP3972_MDTV1_REG,
+	LP3972_L2VCR_REG,
+	LP3972_L34VCR_REG,
+	LP3972_L34VCR_REG,
+	LP3972_SDTV1_REG,
+};
+
+static const int buck_vol_enable_addr[] = {
+	LP3972_OVER1_REG,
+	LP3972_OEN3_REG,
+	LP3972_OEN3_REG,
+};
+
+static const int buck_base_addr[] = {
+	LP3972_ADTV1_REG,
+	LP3972_B2TV_REG,
+	LP3972_B3TV_REG,
+};
+
+#define LP3972_LDO_VOL_VALUE_MAP(x) (ldo_voltage_map[x])
+#define LP3972_LDO_OUTPUT_ENABLE_MASK(x) (ldo_output_enable_mask[x])
+#define LP3972_LDO_OUTPUT_ENABLE_REG(x) (ldo_output_enable_addr[x])
+
+/*	LDO voltage control registers shift:
+	LP3972_LDO1 -> 0, LP3972_LDO2 -> 4
+	LP3972_LDO3 -> 0, LP3972_LDO4 -> 4
+	LP3972_LDO5 -> 0
+*/
+#define LP3972_LDO_VOL_CONTR_SHIFT(x) (((x) & 1) << 2)
+#define LP3972_LDO_VOL_CONTR_REG(x) (ldo_vol_ctl_addr[x])
+#define LP3972_LDO_VOL_CHANGE_SHIFT(x) ((x) ? 4 : 6)
+
+#define LP3972_LDO_VOL_MASK(x) (((x) % 4) ? 0x0f : 0x1f)
+#define LP3972_LDO_VOL_MIN_IDX(x) (((x) == 4) ? 0x05 : 0x00)
+#define LP3972_LDO_VOL_MAX_IDX(x) ((x) ? (((x) == 4) ? 0x1f : 0x0f) : 0x0c)
+
+#define LP3972_BUCK_VOL_VALUE_MAP(x) (buck_voltage_map[x])
+#define LP3972_BUCK_VOL_ENABLE_REG(x) (buck_vol_enable_addr[x])
+#define LP3972_BUCK_VOL1_REG(x) (buck_base_addr[x])
+#define LP3972_BUCK_VOL_MASK 0x1f
+#define LP3972_BUCK_VOL_MIN_IDX(x) ((x) ? 0x01 : 0x00)
+#define LP3972_BUCK_VOL_MAX_IDX(x) ((x) ? 0x19 : 0x1f)
+
+static int lp3972_i2c_read(struct i2c_client *i2c, char reg, int count,
+	u16 *dest)
+{
+	int ret;
+
+	if (count != 1)
+		return -EIO;
+	ret = i2c_smbus_read_byte_data(i2c, reg);
+	if (ret < 0)
+		return -EIO;
+
+	*dest = ret;
+	return 0;
+}
+
+static int lp3972_i2c_write(struct i2c_client *i2c, char reg, int count,
+	const u16 *src)
+{
+	if (count != 1)
+		return -EIO;
+	return i2c_smbus_write_byte_data(i2c, reg, *src);
+}
+
+static u8 lp3972_reg_read(struct lp3972 *lp3972, u8 reg)
+{
+	u16 val = 0;
+
+	mutex_lock(&lp3972->io_lock);
+
+	lp3972_i2c_read(lp3972->i2c, reg, 1, &val);
+
+	dev_dbg(lp3972->dev, "reg read 0x%02x -> 0x%02x\n", (int)reg,
+		(unsigned)val&0xff);
+
+	mutex_unlock(&lp3972->io_lock);
+
+	return val & 0xff;
+}
+
+static int lp3972_set_bits(struct lp3972 *lp3972, u8 reg, u16 mask, u16 val)
+{
+	u16 tmp;
+	int ret;
+
+	mutex_lock(&lp3972->io_lock);
+
+	ret = lp3972_i2c_read(lp3972->i2c, reg, 1, &tmp);
+	tmp = (tmp & ~mask) | val;
+	if (ret == 0) {
+		ret = lp3972_i2c_write(lp3972->i2c, reg, 1, &tmp);
+		dev_dbg(lp3972->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
+			(unsigned)val&0xff);
+	}
+	mutex_unlock(&lp3972->io_lock);
+
+	return ret;
+}
+
+static int lp3972_ldo_list_voltage(struct regulator_dev *dev, unsigned index)
+{
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	return 1000 * LP3972_LDO_VOL_VALUE_MAP(ldo)[index];
+}
+
+static int lp3972_ldo_is_enabled(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo);
+	u16 val;
+
+	val = lp3972_reg_read(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo));
+	return !!(val & mask);
+}
+
+static int lp3972_ldo_enable(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo);
+
+	return lp3972_set_bits(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo),
+				mask, mask);
+}
+
+static int lp3972_ldo_disable(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo);
+
+	return lp3972_set_bits(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo),
+				mask, 0);
+}
+
+static int lp3972_ldo_get_voltage(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	u16 mask = LP3972_LDO_VOL_MASK(ldo);
+	u16 val, reg;
+
+	reg = lp3972_reg_read(lp3972, LP3972_LDO_VOL_CONTR_REG(ldo));
+	val = (reg >> LP3972_LDO_VOL_CONTR_SHIFT(ldo)) & mask;
+
+	return 1000 * LP3972_LDO_VOL_VALUE_MAP(ldo)[val];
+}
+
+static int lp3972_ldo_set_voltage(struct regulator_dev *dev,
+				  int min_uV, int max_uV)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3972_LDO1;
+	int min_vol = min_uV / 1000, max_vol = max_uV / 1000;
+	const int *vol_map = LP3972_LDO_VOL_VALUE_MAP(ldo);
+	u16 val;
+	int shift, ret;
+
+	if (min_vol < vol_map[LP3972_LDO_VOL_MIN_IDX(ldo)] ||
+	    min_vol > vol_map[LP3972_LDO_VOL_MAX_IDX(ldo)])
+		return -EINVAL;
+
+	for (val = LP3972_LDO_VOL_MIN_IDX(ldo);
+		val <= LP3972_LDO_VOL_MAX_IDX(ldo); val++)
+		if (vol_map[val] >= min_vol)
+			break;
+
+	if (val > LP3972_LDO_VOL_MAX_IDX(ldo) || vol_map[val] > max_vol)
+		return -EINVAL;
+
+	shift = LP3972_LDO_VOL_CONTR_SHIFT(ldo);
+	ret = lp3972_set_bits(lp3972, LP3972_LDO_VOL_CONTR_REG(ldo),
+		LP3972_LDO_VOL_MASK(ldo) << shift, val << shift);
+
+	if (ret)
+		return ret;
+
+	switch (ldo) {
+	case LP3972_LDO1:
+	case LP3972_LDO5:
+		shift = LP3972_LDO_VOL_CHANGE_SHIFT(ldo);
+		ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG,
+			LP3972_VOL_CHANGE_FLAG_MASK << shift,
+			LP3972_VOL_CHANGE_FLAG_GO << shift);
+		if (ret)
+			return ret;
+
+		ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG,
+			LP3972_VOL_CHANGE_FLAG_MASK << shift, 0);
+		break;
+	}
+
+	return ret;
+}
+
+static struct regulator_ops lp3972_ldo_ops = {
+	.list_voltage = lp3972_ldo_list_voltage,
+	.is_enabled = lp3972_ldo_is_enabled,
+	.enable = lp3972_ldo_enable,
+	.disable = lp3972_ldo_disable,
+	.get_voltage = lp3972_ldo_get_voltage,
+	.set_voltage = lp3972_ldo_set_voltage,
+};
+
+static int lp3972_dcdc_list_voltage(struct regulator_dev *dev, unsigned index)
+{
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	return 1000 * buck_voltage_map[buck][index];
+}
+
+static int lp3972_dcdc_is_enabled(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	u16 mask = 1 << (buck * 2);
+	u16 val;
+
+	val = lp3972_reg_read(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck));
+	return !!(val & mask);
+}
+
+static int lp3972_dcdc_enable(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	u16 mask = 1 << (buck * 2);
+	u16 val;
+
+	val = lp3972_set_bits(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck),
+				mask, mask);
+	return val;
+}
+
+static int lp3972_dcdc_disable(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	u16 mask = 1 << (buck * 2);
+	u16 val;
+
+	val = lp3972_set_bits(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck),
+				mask, 0);
+	return val;
+}
+
+static int lp3972_dcdc_get_voltage(struct regulator_dev *dev)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	u16 reg;
+	int val;
+
+	reg = lp3972_reg_read(lp3972, LP3972_BUCK_VOL1_REG(buck));
+	reg &= LP3972_BUCK_VOL_MASK;
+	if (reg <= LP3972_BUCK_VOL_MAX_IDX(buck))
+		val = 1000 * buck_voltage_map[buck][reg];
+	else {
+		val = 0;
+		dev_warn(&dev->dev, "chip reported incorrect voltage value.\n");
+	}
+
+	return val;
+}
+
+static int lp3972_dcdc_set_voltage(struct regulator_dev *dev,
+				  int min_uV, int max_uV)
+{
+	struct lp3972 *lp3972 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3972_DCDC1;
+	int min_vol = min_uV / 1000, max_vol = max_uV / 1000;
+	const int *vol_map = buck_voltage_map[buck];
+	u16 val;
+	int ret;
+
+	if (min_vol < vol_map[LP3972_BUCK_VOL_MIN_IDX(buck)] ||
+	    min_vol > vol_map[LP3972_BUCK_VOL_MAX_IDX(buck)])
+		return -EINVAL;
+
+	for (val = LP3972_BUCK_VOL_MIN_IDX(buck);
+		val <= LP3972_BUCK_VOL_MAX_IDX(buck); val++)
+		if (vol_map[val] >= min_vol)
+			break;
+
+	if (val > LP3972_BUCK_VOL_MAX_IDX(buck) ||
+	    vol_map[val] > max_vol)
+		return -EINVAL;
+
+	ret = lp3972_set_bits(lp3972, LP3972_BUCK_VOL1_REG(buck),
+				LP3972_BUCK_VOL_MASK, val);
+	if (ret)
+		return ret;
+
+	if (buck != 0)
+		return ret;
+
+	ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG,
+		LP3972_VOL_CHANGE_FLAG_MASK, LP3972_VOL_CHANGE_FLAG_GO);
+	if (ret)
+		return ret;
+
+	return lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG,
+				LP3972_VOL_CHANGE_FLAG_MASK, 0);
+}
+
+static struct regulator_ops lp3972_dcdc_ops = {
+	.list_voltage = lp3972_dcdc_list_voltage,
+	.is_enabled = lp3972_dcdc_is_enabled,
+	.enable = lp3972_dcdc_enable,
+	.disable = lp3972_dcdc_disable,
+	.get_voltage = lp3972_dcdc_get_voltage,
+	.set_voltage = lp3972_dcdc_set_voltage,
+};
+
+static struct regulator_desc regulators[] = {
+	{
+		.name = "LDO1",
+		.id = LP3972_LDO1,
+		.ops = &lp3972_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo1_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO2",
+		.id = LP3972_LDO2,
+		.ops = &lp3972_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo23_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO3",
+		.id = LP3972_LDO3,
+		.ops = &lp3972_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo23_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO4",
+		.id = LP3972_LDO4,
+		.ops = &lp3972_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo4_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO5",
+		.id = LP3972_LDO5,
+		.ops = &lp3972_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo5_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC1",
+		.id = LP3972_DCDC1,
+		.ops = &lp3972_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck1_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC2",
+		.id = LP3972_DCDC2,
+		.ops = &lp3972_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck23_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC3",
+		.id = LP3972_DCDC3,
+		.ops = &lp3972_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck23_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int setup_regulators(struct lp3972 *lp3972,
+	struct lp3972_platform_data *pdata)
+{
+	int i, err;
+
+	lp3972->num_regulators = pdata->num_regulators;
+	lp3972->rdev = kcalloc(pdata->num_regulators,
+				sizeof(struct regulator_dev *), GFP_KERNEL);
+	if (!lp3972->rdev) {
+		err = -ENOMEM;
+		goto err_nomem;
+	}
+
+	/* Instantiate the regulators */
+	for (i = 0; i < pdata->num_regulators; i++) {
+		struct lp3972_regulator_subdev *reg = &pdata->regulators[i];
+		lp3972->rdev[i] = regulator_register(&regulators[reg->id],
+					lp3972->dev, reg->initdata, lp3972);
+
+		if (IS_ERR(lp3972->rdev[i])) {
+			err = PTR_ERR(lp3972->rdev[i]);
+			dev_err(lp3972->dev, "regulator init failed: %d\n",
+				err);
+			goto error;
+		}
+	}
+
+	return 0;
+error:
+	while (--i >= 0)
+		regulator_unregister(lp3972->rdev[i]);
+	kfree(lp3972->rdev);
+	lp3972->rdev = NULL;
+err_nomem:
+	return err;
+}
+
+static int __devinit lp3972_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct lp3972 *lp3972;
+	struct lp3972_platform_data *pdata = i2c->dev.platform_data;
+	int ret;
+	u16 val;
+
+	if (!pdata) {
+		dev_dbg(&i2c->dev, "No platform init data supplied\n");
+		return -ENODEV;
+	}
+
+	lp3972 = kzalloc(sizeof(struct lp3972), GFP_KERNEL);
+	if (!lp3972)
+		return -ENOMEM;
+
+	lp3972->i2c = i2c;
+	lp3972->dev = &i2c->dev;
+
+	mutex_init(&lp3972->io_lock);
+
+	/* Detect LP3972 */
+	ret = lp3972_i2c_read(i2c, LP3972_SYS_CONTROL1_REG, 1, &val);
+	if (ret == 0 && (val & SYS_CONTROL1_INIT_MASK) != SYS_CONTROL1_INIT_VAL)
+		ret = -ENODEV;
+	if (ret < 0) {
+		dev_err(&i2c->dev, "failed to detect device\n");
+		goto err_detect;
+	}
+
+	ret = setup_regulators(lp3972, pdata);
+	if (ret < 0)
+		goto err_detect;
+
+	i2c_set_clientdata(i2c, lp3972);
+	return 0;
+
+err_detect:
+	kfree(lp3972);
+	return ret;
+}
+
+static int __devexit lp3972_i2c_remove(struct i2c_client *i2c)
+{
+	struct lp3972 *lp3972 = i2c_get_clientdata(i2c);
+	int i;
+
+	for (i = 0; i < lp3972->num_regulators; i++)
+		regulator_unregister(lp3972->rdev[i]);
+	kfree(lp3972->rdev);
+	kfree(lp3972);
+
+	return 0;
+}
+
+static const struct i2c_device_id lp3972_i2c_id[] = {
+	{ "lp3972", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, lp3972_i2c_id);
+
+static struct i2c_driver lp3972_i2c_driver = {
+	.driver = {
+		.name = "lp3972",
+		.owner = THIS_MODULE,
+	},
+	.probe    = lp3972_i2c_probe,
+	.remove   = __devexit_p(lp3972_i2c_remove),
+	.id_table = lp3972_i2c_id,
+};
+
+static int __init lp3972_module_init(void)
+{
+	return i2c_add_driver(&lp3972_i2c_driver);
+}
+subsys_initcall(lp3972_module_init);
+
+static void __exit lp3972_module_exit(void)
+{
+	i2c_del_driver(&lp3972_i2c_driver);
+}
+module_exit(lp3972_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Axel Lin <axel.lin@gmail.com>");
+MODULE_DESCRIPTION("LP3972 PMIC driver");
diff --git a/include/linux/regulator/lp3972.h b/include/linux/regulator/lp3972.h
new file mode 100644
index 000000000000..9bb7389b7a1e
--- /dev/null
+++ b/include/linux/regulator/lp3972.h
@@ -0,0 +1,48 @@
+/*
+ * National Semiconductors LP3972 PMIC chip client interface
+ *
+ * Based on lp3971.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_REGULATOR_LP3972_H
+#define __LINUX_REGULATOR_LP3972_H
+
+#include <linux/regulator/machine.h>
+
+#define LP3972_LDO1  0
+#define LP3972_LDO2  1
+#define LP3972_LDO3  2
+#define LP3972_LDO4  3
+#define LP3972_LDO5  4
+
+#define LP3972_DCDC1 5
+#define LP3972_DCDC2 6
+#define LP3972_DCDC3 7
+
+#define LP3972_NUM_REGULATORS 8
+
+struct lp3972_regulator_subdev {
+	int id;
+	struct regulator_init_data *initdata;
+};
+
+struct lp3972_platform_data {
+	int num_regulators;
+	struct lp3972_regulator_subdev *regulators;
+};
+
+#endif
-- 
cgit v1.2.3


From 688fe99a439f7c9dfcc52fbf7cb347f140a2dc8b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Oct 2010 19:18:32 -0700
Subject: regulator: Add option for machine drivers to enable the dummy
 regulator

Allow machine drivers to explicitly enable the use of the dummy regulator,
enabling simpler support for systems with only a few specific supplies
visible to software.

It is strongly recommended that this is not used on systems with
substantial software control over their PMICs, for maximum functionality
constrints should be as fully specified as possible.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Makefile        |  3 +--
 drivers/regulator/core.c          | 22 ++++++++++++++++++++++
 include/linux/regulator/machine.h |  5 +++++
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 6cae6419b8b1..bff815736780 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -3,14 +3,13 @@
 #
 
 
-obj-$(CONFIG_REGULATOR) += core.o
+obj-$(CONFIG_REGULATOR) += core.o dummy.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
 
 obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
-obj-$(CONFIG_REGULATOR_DUMMY) += dummy.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index cc8b337b9119..4fa08c85f3ce 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -33,6 +33,7 @@ static DEFINE_MUTEX(regulator_list_mutex);
 static LIST_HEAD(regulator_list);
 static LIST_HEAD(regulator_map_list);
 static int has_full_constraints;
+static bool board_wants_dummy_regulator;
 
 /*
  * struct regulator_map
@@ -1108,6 +1109,11 @@ static struct regulator *_regulator_get(struct device *dev, const char *id,
 		}
 	}
 
+	if (board_wants_dummy_regulator) {
+		rdev = dummy_regulator_rdev;
+		goto found;
+	}
+
 #ifdef CONFIG_REGULATOR_DUMMY
 	if (!devname)
 		devname = "deviceless";
@@ -2462,6 +2468,22 @@ void regulator_has_full_constraints(void)
 }
 EXPORT_SYMBOL_GPL(regulator_has_full_constraints);
 
+/**
+ * regulator_use_dummy_regulator - Provide a dummy regulator when none is found
+ *
+ * Calling this function will cause the regulator API to provide a
+ * dummy regulator to consumers if no physical regulator is found,
+ * allowing most consumers to proceed as though a regulator were
+ * configured.  This allows systems such as those with software
+ * controllable regulators for the CPU core only to be brought up more
+ * readily.
+ */
+void regulator_use_dummy_regulator(void)
+{
+	board_wants_dummy_regulator = true;
+}
+EXPORT_SYMBOL_GPL(regulator_use_dummy_regulator);
+
 /**
  * rdev_get_drvdata - get rdev regulator driver data
  * @rdev: regulator
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index e2980287245e..761c745b9c24 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -189,10 +189,15 @@ int regulator_suspend_prepare(suspend_state_t state);
 
 #ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
+void regulator_use_dummy_regulator(void);
 #else
 static inline void regulator_has_full_constraints(void)
 {
 }
+
+static inline void regulator_use_dummy_regulator(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From f337134ff0cfe60fb1e347bc45b8e7190ef90a82 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 17 Aug 2010 13:13:36 +0100
Subject: mfd: Move PCF50633 IRQ protoypes where the definitions can see them

Fixed warnings about unprototyped global functions.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/pcf50633-core.c       | 7 -------
 include/linux/mfd/pcf50633/core.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 23e585527285..6d4233f0d0d3 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -25,13 +25,6 @@
 
 #include <linux/mfd/pcf50633/core.h>
 
-int pcf50633_irq_init(struct pcf50633 *pcf, int irq);
-void pcf50633_irq_free(struct pcf50633 *pcf);
-#ifdef CONFIG_PM
-int pcf50633_irq_suspend(struct pcf50633 *pcf);
-int pcf50633_irq_resume(struct pcf50633 *pcf);
-#endif
-
 static int __pcf50633_read(struct pcf50633 *pcf, u8 reg, int num, u8 *data)
 {
 	int ret;
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index ad411a78870c..50d4a047118d 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -227,4 +227,11 @@ static inline struct pcf50633 *dev_to_pcf50633(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
+int pcf50633_irq_init(struct pcf50633 *pcf, int irq);
+void pcf50633_irq_free(struct pcf50633 *pcf);
+#ifdef CONFIG_PM
+int pcf50633_irq_suspend(struct pcf50633 *pcf);
+int pcf50633_irq_resume(struct pcf50633 *pcf);
+#endif
+
 #endif
-- 
cgit v1.2.3


From b8e9cf0b28173fc25dae9f3ac44de6fc4e9fc385 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 16 Aug 2010 17:14:44 +0200
Subject: gpio: Add bitmask to block requests to unavailable stmpe GPIOs

GPIOs on these controller are multi-functional. If you decided to use
some of them e.g. as input channels for the ADC, you surely don't want
those pins to be reassigned as simple GPIOs (which may be triggered even
from userspace via 'export'). Same for the touchscreen controller pins.
Since knowledge about the hardware is needed to decide which GPIOs to
reserve, let this bitmask be inside platform_data and provide some
defines to assist potential users.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Rabin Vincent <rabin.vincent@stericsson.com>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/stmpe-gpio.c | 5 +++++
 include/linux/mfd/stmpe.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/stmpe-gpio.c b/drivers/gpio/stmpe-gpio.c
index 4e1f1b9d5e67..65b996083918 100644
--- a/drivers/gpio/stmpe-gpio.c
+++ b/drivers/gpio/stmpe-gpio.c
@@ -30,6 +30,7 @@ struct stmpe_gpio {
 	struct mutex irq_lock;
 
 	int irq_base;
+	unsigned norequest_mask;
 
 	/* Caches of interrupt control registers for bus_lock */
 	u8 regs[CACHE_NR_REGS][CACHE_NR_BANKS];
@@ -103,6 +104,9 @@ static int stmpe_gpio_request(struct gpio_chip *chip, unsigned offset)
 	struct stmpe_gpio *stmpe_gpio = to_stmpe_gpio(chip);
 	struct stmpe *stmpe = stmpe_gpio->stmpe;
 
+	if (stmpe_gpio->norequest_mask & (1 << offset))
+		return -EINVAL;
+
 	return stmpe_set_altfunc(stmpe, 1 << offset, STMPE_BLOCK_GPIO);
 }
 
@@ -302,6 +306,7 @@ static int __devinit stmpe_gpio_probe(struct platform_device *pdev)
 
 	stmpe_gpio->dev = &pdev->dev;
 	stmpe_gpio->stmpe = stmpe;
+	stmpe_gpio->norequest_mask = pdata ? pdata->norequest_mask : 0;
 
 	stmpe_gpio->chip = template_chip;
 	stmpe_gpio->chip.ngpio = stmpe->num_gpios;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 39ca7588659b..e762c270d8d4 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -112,13 +112,19 @@ struct stmpe_keypad_platform_data {
 	bool no_autorepeat;
 };
 
+#define STMPE_GPIO_NOREQ_811_TOUCH	(0xf0)
+
 /**
  * struct stmpe_gpio_platform_data - STMPE GPIO platform data
  * @gpio_base: first gpio number assigned.  A maximum of
  *	       %STMPE_NR_GPIOS GPIOs will be allocated.
+ * @norequest_mask: bitmask specifying which GPIOs should _not_ be
+ *		    requestable due to different usage (e.g. touch, keypad)
+ *		    STMPE_GPIO_NOREQ_* macros can be used here.
  */
 struct stmpe_gpio_platform_data {
 	int gpio_base;
+	unsigned norequest_mask;
 	void (*setup)(struct stmpe *stmpe, unsigned gpio_base);
 	void (*remove)(struct stmpe *stmpe, unsigned gpio_base);
 };
-- 
cgit v1.2.3


From 89712059c09ff12f1e60e444d05d2ca257dd00ef Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Fri, 10 Sep 2010 17:10:21 +0200
Subject: i2c: twl: add register defines for pm master module

Some modules already need to talk to at least PROTECT_KEY
register, while at that, add defines to the entire register
space.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/i2c/twl.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 4793d8a7f480..53089516c17a 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -357,6 +357,52 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 
 /*----------------------------------------------------------------------*/
 
+/*
+ * PM Master module register offsets (use TWL4030_MODULE_PM_MASTER)
+ */
+
+#define TWL4030_PM_MASTER_CFG_P1_TRANSITION	0x00
+#define TWL4030_PM_MASTER_CFG_P2_TRANSITION	0x01
+#define TWL4030_PM_MASTER_CFG_P3_TRANSITION	0x02
+#define TWL4030_PM_MASTER_CFG_P123_TRANSITION	0x03
+#define TWL4030_PM_MASTER_STS_BOOT		0x04
+#define TWL4030_PM_MASTER_CFG_BOOT		0x05
+#define TWL4030_PM_MASTER_SHUNDAN		0x06
+#define TWL4030_PM_MASTER_BOOT_BCI		0x07
+#define TWL4030_PM_MASTER_CFG_PWRANA1		0x08
+#define TWL4030_PM_MASTER_CFG_PWRANA2		0x09
+#define TWL4030_PM_MASTER_BACKUP_MISC_STS	0x0b
+#define TWL4030_PM_MASTER_BACKUP_MISC_CFG	0x0c
+#define TWL4030_PM_MASTER_BACKUP_MISC_TST	0x0d
+#define TWL4030_PM_MASTER_PROTECT_KEY		0x0e
+#define TWL4030_PM_MASTER_STS_HW_CONDITIONS	0x0f
+#define TWL4030_PM_MASTER_P1_SW_EVENTS		0x10
+#define TWL4030_PM_MASTER_P2_SW_EVENTS		0x11
+#define TWL4030_PM_MASTER_P3_SW_EVENTS		0x12
+#define TWL4030_PM_MASTER_STS_P123_STATE	0x13
+#define TWL4030_PM_MASTER_PB_CFG		0x14
+#define TWL4030_PM_MASTER_PB_WORD_MSB		0x15
+#define TWL4030_PM_MASTER_PB_WORD_LSB		0x16
+#define TWL4030_PM_MASTER_SEQ_ADD_W2P		0x1c
+#define TWL4030_PM_MASTER_SEQ_ADD_P2A		0x1d
+#define TWL4030_PM_MASTER_SEQ_ADD_A2W		0x1e
+#define TWL4030_PM_MASTER_SEQ_ADD_A2S		0x1f
+#define TWL4030_PM_MASTER_SEQ_ADD_S2A12		0x20
+#define TWL4030_PM_MASTER_SEQ_ADD_S2A3		0x21
+#define TWL4030_PM_MASTER_SEQ_ADD_WARM		0x22
+#define TWL4030_PM_MASTER_MEMORY_ADDRESS	0x23
+#define TWL4030_PM_MASTER_MEMORY_DATA		0x24
+
+#define TWL4030_PM_MASTER_KEY_CFG1		0xc0
+#define TWL4030_PM_MASTER_KEY_CFG2		0x0c
+
+#define TWL4030_PM_MASTER_KEY_TST1		0xe0
+#define TWL4030_PM_MASTER_KEY_TST2		0x0e
+
+#define TWL4030_PM_MASTER_GLOBAL_TST		0xb6
+
+/*----------------------------------------------------------------------*/
+
 /* Power bus message definitions */
 
 /* The TWL4030/5030 splits its power-management resources (the various
-- 
cgit v1.2.3


From 676e02d7a2ed9bb02994670a07df533a29a99de6 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Fri, 6 Aug 2010 11:28:06 +0900
Subject: mfd: Use i2c_client as an argument on MAX8998 i2c routines

The MAX8998 chip have regulator and rtc features. The i2c slave address
of regulator and rtc is different, so needs each i2c client on i2c
operation functions.

Also, this patch exports i2c operation functions instead of callback to
make easy to read.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8998.c               | 29 ++++++++++++++---------------
 drivers/regulator/max8998.c         | 17 +++++++++++------
 include/linux/mfd/max8998-private.h | 30 +++++-------------------------
 3 files changed, 30 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index f1c928915880..3b8a5076d351 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -36,13 +36,13 @@ static struct mfd_cell max8998_devs[] = {
 	}
 };
 
-static int max8998_i2c_device_read(struct max8998_dev *max8998, u8 reg, u8 *dest)
+int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
 {
-	struct i2c_client *client = max8998->i2c_client;
+	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&max8998->iolock);
-	ret = i2c_smbus_read_byte_data(client, reg);
+	ret = i2c_smbus_read_byte_data(i2c, reg);
 	mutex_unlock(&max8998->iolock);
 	if (ret < 0)
 		return ret;
@@ -51,36 +51,38 @@ static int max8998_i2c_device_read(struct max8998_dev *max8998, u8 reg, u8 *dest
 	*dest = ret;
 	return 0;
 }
+EXPORT_SYMBOL(max8998_read_reg);
 
-static int max8998_i2c_device_write(struct max8998_dev *max8998, u8 reg, u8 value)
+int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value)
 {
-	struct i2c_client *client = max8998->i2c_client;
+	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&max8998->iolock);
-	ret = i2c_smbus_write_byte_data(client, reg, value);
+	ret = i2c_smbus_write_byte_data(i2c, reg, value);
 	mutex_unlock(&max8998->iolock);
 	return ret;
 }
+EXPORT_SYMBOL(max8998_write_reg);
 
-static int max8998_i2c_device_update(struct max8998_dev *max8998, u8 reg,
-				     u8 val, u8 mask)
+int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask)
 {
-	struct i2c_client *client = max8998->i2c_client;
+	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&max8998->iolock);
-	ret = i2c_smbus_read_byte_data(client, reg);
+	ret = i2c_smbus_read_byte_data(i2c, reg);
 	if (ret >= 0) {
 		u8 old_val = ret & 0xff;
 		u8 new_val = (val & mask) | (old_val & (~mask));
-		ret = i2c_smbus_write_byte_data(client, reg, new_val);
+		ret = i2c_smbus_write_byte_data(i2c, reg, new_val);
 		if (ret >= 0)
 			ret = 0;
 	}
 	mutex_unlock(&max8998->iolock);
 	return ret;
 }
+EXPORT_SYMBOL(max8998_update_reg);
 
 static int max8998_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
@@ -94,10 +96,7 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
 
 	i2c_set_clientdata(i2c, max8998);
 	max8998->dev = &i2c->dev;
-	max8998->i2c_client = i2c;
-	max8998->dev_read = max8998_i2c_device_read;
-	max8998->dev_write = max8998_i2c_device_write;
-	max8998->dev_update = max8998_i2c_device_update;
+	max8998->i2c = i2c;
 	mutex_init(&max8998->iolock);
 
 	ret = mfd_add_devices(max8998->dev, -1,
diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index a1baf1fbe004..7f5fe6f198cf 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -173,6 +173,7 @@ static int max8998_get_enable_register(struct regulator_dev *rdev,
 static int max8998_ldo_is_enabled(struct regulator_dev *rdev)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
 	int ret, reg, shift = 8;
 	u8 val;
 
@@ -180,7 +181,7 @@ static int max8998_ldo_is_enabled(struct regulator_dev *rdev)
 	if (ret)
 		return ret;
 
-	ret = max8998_read_reg(max8998->iodev, reg, &val);
+	ret = max8998_read_reg(i2c, reg, &val);
 	if (ret)
 		return ret;
 
@@ -190,25 +191,27 @@ static int max8998_ldo_is_enabled(struct regulator_dev *rdev)
 static int max8998_ldo_enable(struct regulator_dev *rdev)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
 	int reg, shift = 8, ret;
 
 	ret = max8998_get_enable_register(rdev, &reg, &shift);
 	if (ret)
 		return ret;
 
-	return max8998_update_reg(max8998->iodev, reg, 1<<shift, 1<<shift);
+	return max8998_update_reg(i2c, reg, 1<<shift, 1<<shift);
 }
 
 static int max8998_ldo_disable(struct regulator_dev *rdev)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
 	int reg, shift = 8, ret;
 
 	ret = max8998_get_enable_register(rdev, &reg, &shift);
 	if (ret)
 		return ret;
 
-	return max8998_update_reg(max8998->iodev, reg, 0, 1<<shift);
+	return max8998_update_reg(i2c, reg, 0, 1<<shift);
 }
 
 static int max8998_get_voltage_register(struct regulator_dev *rdev,
@@ -276,6 +279,7 @@ static int max8998_get_voltage_register(struct regulator_dev *rdev,
 static int max8998_get_voltage(struct regulator_dev *rdev)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
 	int reg, shift = 0, mask, ret;
 	u8 val;
 
@@ -283,7 +287,7 @@ static int max8998_get_voltage(struct regulator_dev *rdev)
 	if (ret)
 		return ret;
 
-	ret = max8998_read_reg(max8998->iodev, reg, &val);
+	ret = max8998_read_reg(i2c, reg, &val);
 	if (ret)
 		return ret;
 
@@ -297,6 +301,7 @@ static int max8998_set_voltage(struct regulator_dev *rdev,
 				int min_uV, int max_uV)
 {
 	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
+	struct i2c_client *i2c = max8998->iodev->i2c;
 	int min_vol = min_uV / 1000, max_vol = max_uV / 1000;
 	int previous_vol = 0;
 	const struct voltage_map_desc *desc;
@@ -330,14 +335,14 @@ static int max8998_set_voltage(struct regulator_dev *rdev,
 	/* wait for RAMP_UP_DELAY if rdev is BUCK1/2 and
 	 * ENRAMP is ON */
 	if (ldo == MAX8998_BUCK1 || ldo == MAX8998_BUCK2) {
-		max8998_read_reg(max8998->iodev, MAX8998_REG_ONOFF4, &val);
+		max8998_read_reg(i2c, MAX8998_REG_ONOFF4, &val);
 		if (val & (1 << 4)) {
 			en_ramp = true;
 			previous_vol = max8998_get_voltage(rdev);
 		}
 	}
 
-	ret = max8998_update_reg(max8998->iodev, reg, i<<shift, mask<<shift);
+	ret = max8998_update_reg(i2c, reg, i<<shift, mask<<shift);
 
 	if (en_ramp == true) {
 		int difference = desc->min + desc->step*i - previous_vol/1000;
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 6dc75b3e2d33..f0a20cdc288c 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -75,38 +75,18 @@ enum {
 /**
  * struct max8998_dev - max8998 master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
- * @i2c_client: i2c client private data
- * @dev_read():	chip register read function
- * @dev_write(): chip register write function
- * @dev_update(): chip register update function
+ * @i2c: i2c client private data
  * @iolock: mutex for serializing io access
  */
 
 struct max8998_dev {
 	struct device *dev;
-	struct i2c_client *i2c_client;
-	int (*dev_read)(struct max8998_dev *max8998, u8 reg, u8 *dest);
-	int (*dev_write)(struct max8998_dev *max8998, u8 reg, u8 val);
-	int (*dev_update)(struct max8998_dev *max8998, u8 reg, u8 val, u8 mask);
+	struct i2c_client *i2c;
 	struct mutex iolock;
 };
 
-static inline int max8998_read_reg(struct max8998_dev *max8998, u8 reg,
-				   u8 *value)
-{
-	return max8998->dev_read(max8998, reg, value);
-}
-
-static inline int max8998_write_reg(struct max8998_dev *max8998, u8 reg,
-				    u8 value)
-{
-	return max8998->dev_write(max8998, reg, value);
-}
-
-static inline int max8998_update_reg(struct max8998_dev *max8998, u8 reg,
-				     u8 value, u8 mask)
-{
-	return max8998->dev_update(max8998, reg, value, mask);
-}
+extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
+extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value);
+extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask);
 
 #endif /*  __LINUX_MFD_MAX8998_PRIV_H */
-- 
cgit v1.2.3


From 2c7e6f5797140b33ec2b967ff28941e1c7eff4b2 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Fri, 10 Sep 2010 18:36:39 +0200
Subject: mfd: Add MAX8998 interrupts support

Use genirq and provide seperated file for interrupts support.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                 |   2 +-
 drivers/mfd/Makefile                |   2 +-
 drivers/mfd/max8998-irq.c           | 255 ++++++++++++++++++++++++++++++++++++
 drivers/mfd/max8998.c               |  25 +++-
 include/linux/mfd/max8998-private.h |  72 +++++++++-
 include/linux/mfd/max8998.h         |  11 +-
 6 files changed, 358 insertions(+), 9 deletions(-)
 create mode 100644 drivers/mfd/max8998-irq.c

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 02cd6c0e59cb..8a463e6c22a0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -295,7 +295,7 @@ config MFD_MAX8925
 
 config MFD_MAX8998
 	bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support"
-	depends on I2C=y
+	depends on I2C=y && GENERIC_HARDIRQS
 	select MFD_CORE
 	help
 	  Say yes here to support for Maxim Semiconductor MAX8998 and
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index feaeeaeeddb7..b1ad74da2351 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -58,7 +58,7 @@ obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
-obj-$(CONFIG_MFD_MAX8998)	+= max8998.o
+obj-$(CONFIG_MFD_MAX8998)	+= max8998.o max8998-irq.o
 
 pcf50633-objs			:= pcf50633-core.o pcf50633-irq.o
 obj-$(CONFIG_MFD_PCF50633)	+= pcf50633.o
diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c
new file mode 100644
index 000000000000..346fd296cf7d
--- /dev/null
+++ b/drivers/mfd/max8998-irq.c
@@ -0,0 +1,255 @@
+/*
+ * Interrupt controller support for MAX8998
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/max8998-private.h>
+
+struct max8998_irq_data {
+	int reg;
+	int mask;
+};
+
+static struct max8998_irq_data max8998_irqs[] = {
+	[MAX8998_IRQ_DCINF] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_DCINF_MASK,
+	},
+	[MAX8998_IRQ_DCINR] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_DCINR_MASK,
+	},
+	[MAX8998_IRQ_JIGF] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_JIGF_MASK,
+	},
+	[MAX8998_IRQ_JIGR] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_JIGR_MASK,
+	},
+	[MAX8998_IRQ_PWRONF] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_PWRONF_MASK,
+	},
+	[MAX8998_IRQ_PWRONR] = {
+		.reg = 1,
+		.mask = MAX8998_IRQ_PWRONR_MASK,
+	},
+	[MAX8998_IRQ_WTSREVNT] = {
+		.reg = 2,
+		.mask = MAX8998_IRQ_WTSREVNT_MASK,
+	},
+	[MAX8998_IRQ_SMPLEVNT] = {
+		.reg = 2,
+		.mask = MAX8998_IRQ_SMPLEVNT_MASK,
+	},
+	[MAX8998_IRQ_ALARM1] = {
+		.reg = 2,
+		.mask = MAX8998_IRQ_ALARM1_MASK,
+	},
+	[MAX8998_IRQ_ALARM0] = {
+		.reg = 2,
+		.mask = MAX8998_IRQ_ALARM0_MASK,
+	},
+	[MAX8998_IRQ_ONKEY1S] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_ONKEY1S_MASK,
+	},
+	[MAX8998_IRQ_TOPOFFR] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_TOPOFFR_MASK,
+	},
+	[MAX8998_IRQ_DCINOVPR] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_DCINOVPR_MASK,
+	},
+	[MAX8998_IRQ_CHGRSTF] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_CHGRSTF_MASK,
+	},
+	[MAX8998_IRQ_DONER] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_DONER_MASK,
+	},
+	[MAX8998_IRQ_CHGFAULT] = {
+		.reg = 3,
+		.mask = MAX8998_IRQ_CHGFAULT_MASK,
+	},
+	[MAX8998_IRQ_LOBAT1] = {
+		.reg = 4,
+		.mask = MAX8998_IRQ_LOBAT1_MASK,
+	},
+	[MAX8998_IRQ_LOBAT2] = {
+		.reg = 4,
+		.mask = MAX8998_IRQ_LOBAT2_MASK,
+	},
+};
+
+static inline struct max8998_irq_data *
+irq_to_max8998_irq(struct max8998_dev *max8998, int irq)
+{
+	return &max8998_irqs[irq - max8998->irq_base];
+}
+
+static void max8998_irq_lock(unsigned int irq)
+{
+	struct max8998_dev *max8998 = get_irq_chip_data(irq);
+
+	mutex_lock(&max8998->irqlock);
+}
+
+static void max8998_irq_sync_unlock(unsigned int irq)
+{
+	struct max8998_dev *max8998 = get_irq_chip_data(irq);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max8998->irq_masks_cur); i++) {
+		/*
+		 * If there's been a change in the mask write it back
+		 * to the hardware.
+		 */
+		if (max8998->irq_masks_cur[i] != max8998->irq_masks_cache[i]) {
+			max8998->irq_masks_cache[i] = max8998->irq_masks_cur[i];
+			max8998_write_reg(max8998->i2c, MAX8998_REG_IRQM1 + i,
+					max8998->irq_masks_cur[i]);
+		}
+	}
+
+	mutex_unlock(&max8998->irqlock);
+}
+
+static void max8998_irq_unmask(unsigned int irq)
+{
+	struct max8998_dev *max8998 = get_irq_chip_data(irq);
+	struct max8998_irq_data *irq_data = irq_to_max8998_irq(max8998, irq);
+
+	max8998->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
+}
+
+static void max8998_irq_mask(unsigned int irq)
+{
+	struct max8998_dev *max8998 = get_irq_chip_data(irq);
+	struct max8998_irq_data *irq_data = irq_to_max8998_irq(max8998, irq);
+
+	max8998->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
+}
+
+static struct irq_chip max8998_irq_chip = {
+	.name = "max8998",
+	.bus_lock = max8998_irq_lock,
+	.bus_sync_unlock = max8998_irq_sync_unlock,
+	.mask = max8998_irq_mask,
+	.unmask = max8998_irq_unmask,
+};
+
+static irqreturn_t max8998_irq_thread(int irq, void *data)
+{
+	struct max8998_dev *max8998 = data;
+	u8 irq_reg[MAX8998_NUM_IRQ_REGS];
+	int ret;
+	int i;
+
+	ret = max8998_bulk_read(max8998->i2c, MAX8998_REG_IRQ1,
+			MAX8998_NUM_IRQ_REGS, irq_reg);
+	if (ret < 0) {
+		dev_err(max8998->dev, "Failed to read interrupt register: %d\n",
+				ret);
+		return IRQ_NONE;
+	}
+
+	/* Apply masking */
+	for (i = 0; i < MAX8998_NUM_IRQ_REGS; i++)
+		irq_reg[i] &= ~max8998->irq_masks_cur[i];
+
+	/* Report */
+	for (i = 0; i < MAX8998_IRQ_NR; i++) {
+		if (irq_reg[max8998_irqs[i].reg - 1] & max8998_irqs[i].mask)
+			handle_nested_irq(max8998->irq_base + i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int max8998_irq_init(struct max8998_dev *max8998)
+{
+	int i;
+	int cur_irq;
+	int ret;
+
+	if (!max8998->irq) {
+		dev_warn(max8998->dev,
+			 "No interrupt specified, no interrupts\n");
+		max8998->irq_base = 0;
+		return 0;
+	}
+
+	if (!max8998->irq_base) {
+		dev_err(max8998->dev,
+			"No interrupt base specified, no interrupts\n");
+		return 0;
+	}
+
+	mutex_init(&max8998->irqlock);
+
+	/* Mask the individual interrupt sources */
+	for (i = 0; i < MAX8998_NUM_IRQ_REGS; i++) {
+		max8998->irq_masks_cur[i] = 0xff;
+		max8998->irq_masks_cache[i] = 0xff;
+		max8998_write_reg(max8998->i2c, MAX8998_REG_IRQM1 + i, 0xff);
+	}
+
+	max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM1, 0xff);
+	max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM2, 0xff);
+
+	/* register with genirq */
+	for (i = 0; i < MAX8998_IRQ_NR; i++) {
+		cur_irq = i + max8998->irq_base;
+		set_irq_chip_data(cur_irq, max8998);
+		set_irq_chip_and_handler(cur_irq, &max8998_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(cur_irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		set_irq_noprobe(cur_irq);
+#endif
+	}
+
+	ret = request_threaded_irq(max8998->irq, NULL, max8998_irq_thread,
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   "max8998-irq", max8998);
+	if (ret) {
+		dev_err(max8998->dev, "Failed to request IRQ %d: %d\n",
+			max8998->irq, ret);
+		return ret;
+	}
+
+	if (!max8998->ono)
+		return 0;
+
+	ret = request_threaded_irq(max8998->ono, NULL, max8998_irq_thread,
+				   IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING |
+				   IRQF_ONESHOT, "max8998-ono", max8998);
+	if (ret)
+		dev_err(max8998->dev, "Failed to request IRQ %d: %d\n",
+			max8998->ono, ret);
+
+	return 0;
+}
+
+void max8998_irq_exit(struct max8998_dev *max8998)
+{
+	if (max8998->irq)
+		free_irq(max8998->irq, max8998);
+}
diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index 3b8a5076d351..49c140a78ba9 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -1,5 +1,5 @@
 /*
- * max8698.c - mfd core driver for the Maxim 8998
+ * max8998.c - mfd core driver for the Maxim 8998
  *
  *  Copyright (C) 2009-2010 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
@@ -53,6 +53,21 @@ int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
 }
 EXPORT_SYMBOL(max8998_read_reg);
 
+int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count, u8 *buf)
+{
+	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&max8998->iolock);
+	ret = i2c_smbus_read_i2c_block_data(i2c, reg, count, buf);
+	mutex_unlock(&max8998->iolock);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(max8998_bulk_read);
+
 int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value)
 {
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
@@ -87,6 +102,7 @@ EXPORT_SYMBOL(max8998_update_reg);
 static int max8998_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
+	struct max8998_platform_data *pdata = i2c->dev.platform_data;
 	struct max8998_dev *max8998;
 	int ret = 0;
 
@@ -97,8 +113,15 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, max8998);
 	max8998->dev = &i2c->dev;
 	max8998->i2c = i2c;
+	max8998->irq = i2c->irq;
+	if (pdata) {
+		max8998->ono = pdata->ono;
+		max8998->irq_base = pdata->irq_base;
+	}
 	mutex_init(&max8998->iolock);
 
+	max8998_irq_init(max8998);
+
 	ret = mfd_add_devices(max8998->dev, -1,
 			      max8998_devs, ARRAY_SIZE(max8998_devs),
 			      NULL, 0);
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index f0a20cdc288c..3bd2371a05f7 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -1,5 +1,5 @@
 /*
- * max8698.h - Voltage regulator driver for the Maxim 8998
+ * max8998.h - Voltage regulator driver for the Maxim 8998
  *
  *  Copyright (C) 2009-2010 Samsung Electrnoics
  *  Kyungmin Park <kyungmin.park@samsung.com>
@@ -23,6 +23,8 @@
 #ifndef __LINUX_MFD_MAX8998_PRIV_H
 #define __LINUX_MFD_MAX8998_PRIV_H
 
+#define MAX8998_NUM_IRQ_REGS	4
+
 /* MAX 8998 registers */
 enum {
 	MAX8998_REG_IRQ1,
@@ -72,20 +74,86 @@ enum {
 	MAX8998_REG_LBCNFG2,
 };
 
+/* IRQ definitions */
+enum {
+	MAX8998_IRQ_DCINF,
+	MAX8998_IRQ_DCINR,
+	MAX8998_IRQ_JIGF,
+	MAX8998_IRQ_JIGR,
+	MAX8998_IRQ_PWRONF,
+	MAX8998_IRQ_PWRONR,
+
+	MAX8998_IRQ_WTSREVNT,
+	MAX8998_IRQ_SMPLEVNT,
+	MAX8998_IRQ_ALARM1,
+	MAX8998_IRQ_ALARM0,
+
+	MAX8998_IRQ_ONKEY1S,
+	MAX8998_IRQ_TOPOFFR,
+	MAX8998_IRQ_DCINOVPR,
+	MAX8998_IRQ_CHGRSTF,
+	MAX8998_IRQ_DONER,
+	MAX8998_IRQ_CHGFAULT,
+
+	MAX8998_IRQ_LOBAT1,
+	MAX8998_IRQ_LOBAT2,
+
+	MAX8998_IRQ_NR,
+};
+
+#define MAX8998_IRQ_DCINF_MASK		(1 << 2)
+#define MAX8998_IRQ_DCINR_MASK		(1 << 3)
+#define MAX8998_IRQ_JIGF_MASK		(1 << 4)
+#define MAX8998_IRQ_JIGR_MASK		(1 << 5)
+#define MAX8998_IRQ_PWRONF_MASK		(1 << 6)
+#define MAX8998_IRQ_PWRONR_MASK		(1 << 7)
+
+#define MAX8998_IRQ_WTSREVNT_MASK	(1 << 0)
+#define MAX8998_IRQ_SMPLEVNT_MASK	(1 << 1)
+#define MAX8998_IRQ_ALARM1_MASK		(1 << 2)
+#define MAX8998_IRQ_ALARM0_MASK		(1 << 3)
+
+#define MAX8998_IRQ_ONKEY1S_MASK	(1 << 0)
+#define MAX8998_IRQ_TOPOFFR_MASK	(1 << 2)
+#define MAX8998_IRQ_DCINOVPR_MASK	(1 << 3)
+#define MAX8998_IRQ_CHGRSTF_MASK	(1 << 4)
+#define MAX8998_IRQ_DONER_MASK		(1 << 5)
+#define MAX8998_IRQ_CHGFAULT_MASK	(1 << 7)
+
+#define MAX8998_IRQ_LOBAT1_MASK		(1 << 0)
+#define MAX8998_IRQ_LOBAT2_MASK		(1 << 1)
+
 /**
  * struct max8998_dev - max8998 master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
  * @i2c: i2c client private data
  * @iolock: mutex for serializing io access
+ * @irqlock: mutex for buslock
+ * @irq_base: base IRQ number for max8998, required for IRQs
+ * @irq: generic IRQ number for max8998
+ * @ono: power onoff IRQ number for max8998
+ * @irq_masks_cur: currently active value
+ * @irq_masks_cache: cached hardware value
  */
-
 struct max8998_dev {
 	struct device *dev;
 	struct i2c_client *i2c;
 	struct mutex iolock;
+	struct mutex irqlock;
+
+	int irq_base;
+	int irq;
+	int ono;
+	u8 irq_masks_cur[MAX8998_NUM_IRQ_REGS];
+	u8 irq_masks_cache[MAX8998_NUM_IRQ_REGS];
 };
 
+int max8998_irq_init(struct max8998_dev *max8998);
+void max8998_irq_exit(struct max8998_dev *max8998);
+
 extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
+extern int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count,
+		u8 *buf);
 extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value);
 extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask);
 
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index 1d3601a2d853..d47ed4c190fe 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -1,5 +1,5 @@
 /*
- * max8698.h - Voltage regulator driver for the Maxim 8998
+ * max8998.h - Voltage regulator driver for the Maxim 8998
  *
  *  Copyright (C) 2009-2010 Samsung Electrnoics
  *  Kyungmin Park <kyungmin.park@samsung.com>
@@ -66,13 +66,16 @@ struct max8998_regulator_data {
 
 /**
  * struct max8998_board - packages regulator init data
- * @num_regulators: number of regultors used
  * @regulators: array of defined regulators
+ * @num_regulators: number of regultors used
+ * @irq_base: base IRQ number for max8998, required for IRQs
+ * @ono: power onoff IRQ number for max8998
  */
-
 struct max8998_platform_data {
-	int				num_regulators;
 	struct max8998_regulator_data	*regulators;
+	int				num_regulators;
+	int				irq_base;
+	int				ono;
 };
 
 #endif /*  __LINUX_MFD_MAX8998_H */
-- 
cgit v1.2.3


From 9b16c0a43b74393cc18666a7748293812c61af1f Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Fri, 6 Aug 2010 11:28:08 +0900
Subject: rtc: Add MAX8998 rtc driver

This adds support for the RTC provided by the Maxim 8998 chip. This
driver was tested on a GONI board by using the rtc-test application from
the Documentation/rtc.txt.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8998.c               |  24 ++-
 drivers/rtc/Kconfig                 |  10 ++
 drivers/rtc/Makefile                |   1 +
 drivers/rtc/rtc-max8998.c           | 300 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8998-private.h |   6 +-
 5 files changed, 339 insertions(+), 2 deletions(-)
 create mode 100644 drivers/rtc/rtc-max8998.c

(limited to 'include')

diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index 49c140a78ba9..310fd8054f35 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -30,10 +30,14 @@
 #include <linux/mfd/max8998.h>
 #include <linux/mfd/max8998-private.h>
 
+#define RTC_I2C_ADDR		(0x0c >> 1)
+
 static struct mfd_cell max8998_devs[] = {
 	{
 		.name = "max8998-pmic",
-	}
+	}, {
+		.name = "max8998-rtc",
+	},
 };
 
 int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
@@ -80,6 +84,21 @@ int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value)
 }
 EXPORT_SYMBOL(max8998_write_reg);
 
+int max8998_bulk_write(struct i2c_client *i2c, u8 reg, int count, u8 *buf)
+{
+	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&max8998->iolock);
+	ret = i2c_smbus_write_i2c_block_data(i2c, reg, count, buf);
+	mutex_unlock(&max8998->iolock);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(max8998_bulk_write);
+
 int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask)
 {
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
@@ -120,6 +139,9 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
 	}
 	mutex_init(&max8998->iolock);
 
+	max8998->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
+	i2c_set_clientdata(max8998->rtc, max8998);
+
 	max8998_irq_init(max8998);
 
 	ret = mfd_add_devices(max8998->dev, -1,
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 6a77437d4f5a..23579d6e4a6e 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -196,6 +196,16 @@ config RTC_DRV_MAX8925
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-max8925.
 
+config RTC_DRV_MAX8998
+	tristate "Maxim MAX8998"
+	depends on MFD_MAX8998
+	help
+	  If you say yes here you will get support for the
+	  RTC of Maxim MAX8998 PMIC.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-max8998.
+
 config RTC_DRV_RS5C372
 	tristate "Ricoh R2025S/D, RS5C372A/B, RV5C386, RV5C387A"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 7a7cb3228a1d..c3f3c2d9e1ed 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_RTC_DRV_M48T86)	+= rtc-m48t86.o
 obj-$(CONFIG_RTC_MXC)		+= rtc-mxc.o
 obj-$(CONFIG_RTC_DRV_MAX6900)	+= rtc-max6900.o
 obj-$(CONFIG_RTC_DRV_MAX8925)	+= rtc-max8925.o
+obj-$(CONFIG_RTC_DRV_MAX8998)	+= rtc-max8998.o
 obj-$(CONFIG_RTC_DRV_MAX6902)	+= rtc-max6902.o
 obj-$(CONFIG_RTC_DRV_MC13783)	+= rtc-mc13783.o
 obj-$(CONFIG_RTC_DRV_MSM6242)	+= rtc-msm6242.o
diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c
new file mode 100644
index 000000000000..f22dee35f330
--- /dev/null
+++ b/drivers/rtc/rtc-max8998.c
@@ -0,0 +1,300 @@
+/*
+ * RTC driver for Maxim MAX8998
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: Minkyu Kang <mk7.kang@samsung.com>
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/max8998.h>
+#include <linux/mfd/max8998-private.h>
+
+#define MAX8998_RTC_SEC			0x00
+#define MAX8998_RTC_MIN			0x01
+#define MAX8998_RTC_HOUR		0x02
+#define MAX8998_RTC_WEEKDAY		0x03
+#define MAX8998_RTC_DATE		0x04
+#define MAX8998_RTC_MONTH		0x05
+#define MAX8998_RTC_YEAR1		0x06
+#define MAX8998_RTC_YEAR2		0x07
+#define MAX8998_ALARM0_SEC		0x08
+#define MAX8998_ALARM0_MIN		0x09
+#define MAX8998_ALARM0_HOUR		0x0a
+#define MAX8998_ALARM0_WEEKDAY		0x0b
+#define MAX8998_ALARM0_DATE		0x0c
+#define MAX8998_ALARM0_MONTH		0x0d
+#define MAX8998_ALARM0_YEAR1		0x0e
+#define MAX8998_ALARM0_YEAR2		0x0f
+#define MAX8998_ALARM1_SEC		0x10
+#define MAX8998_ALARM1_MIN		0x11
+#define MAX8998_ALARM1_HOUR		0x12
+#define MAX8998_ALARM1_WEEKDAY		0x13
+#define MAX8998_ALARM1_DATE		0x14
+#define MAX8998_ALARM1_MONTH		0x15
+#define MAX8998_ALARM1_YEAR1		0x16
+#define MAX8998_ALARM1_YEAR2		0x17
+#define MAX8998_ALARM0_CONF		0x18
+#define MAX8998_ALARM1_CONF		0x19
+#define MAX8998_RTC_STATUS		0x1a
+#define MAX8998_WTSR_SMPL_CNTL		0x1b
+#define MAX8998_TEST			0x1f
+
+#define HOUR_12				(1 << 7)
+#define HOUR_PM				(1 << 5)
+#define ALARM0_STATUS			(1 << 1)
+#define ALARM1_STATUS			(1 << 2)
+
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+};
+
+struct max8998_rtc_info {
+	struct device		*dev;
+	struct max8998_dev	*max8998;
+	struct i2c_client	*rtc;
+	struct rtc_device	*rtc_dev;
+	int irq;
+};
+
+static void max8998_data_to_tm(u8 *data, struct rtc_time *tm)
+{
+	tm->tm_sec = bcd2bin(data[RTC_SEC]);
+	tm->tm_min = bcd2bin(data[RTC_MIN]);
+	if (data[RTC_HOUR] & HOUR_12) {
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x1f);
+		if (data[RTC_HOUR] & HOUR_PM)
+			tm->tm_hour += 12;
+	} else
+		tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x3f);
+
+	tm->tm_wday = data[RTC_WEEKDAY] & 0x07;
+	tm->tm_mday = bcd2bin(data[RTC_DATE]);
+	tm->tm_mon = bcd2bin(data[RTC_MONTH]);
+	tm->tm_year = bcd2bin(data[RTC_YEAR1]) + bcd2bin(data[RTC_YEAR2]) * 100;
+	tm->tm_year -= 1900;
+}
+
+static void max8998_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+	data[RTC_SEC] = bin2bcd(tm->tm_sec);
+	data[RTC_MIN] = bin2bcd(tm->tm_min);
+	data[RTC_HOUR] = bin2bcd(tm->tm_hour);
+	data[RTC_WEEKDAY] = tm->tm_wday;
+	data[RTC_DATE] = bin2bcd(tm->tm_mday);
+	data[RTC_MONTH] = bin2bcd(tm->tm_mon);
+	data[RTC_YEAR1] = bin2bcd(tm->tm_year % 100);
+	data[RTC_YEAR2] = bin2bcd((tm->tm_year + 1900) / 100);
+}
+
+static int max8998_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct max8998_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	ret = max8998_bulk_read(info->rtc, MAX8998_RTC_SEC, 8, data);
+	if (ret < 0)
+		return ret;
+
+	max8998_data_to_tm(data, tm);
+
+	return rtc_valid_tm(tm);
+}
+
+static int max8998_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct max8998_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+
+	max8998_tm_to_data(tm, data);
+
+	return max8998_bulk_write(info->rtc, MAX8998_RTC_SEC, 8, data);
+}
+
+static int max8998_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct max8998_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	u8 val;
+	int ret;
+
+	ret = max8998_bulk_read(info->rtc, MAX8998_ALARM0_SEC, 8, data);
+	if (ret < 0)
+		return ret;
+
+	max8998_data_to_tm(data, &alrm->time);
+
+	ret = max8998_read_reg(info->rtc, MAX8998_ALARM0_CONF, &val);
+	if (ret < 0)
+		return ret;
+
+	alrm->enabled = !!val;
+
+	ret = max8998_read_reg(info->rtc, MAX8998_RTC_STATUS, &val);
+	if (ret < 0)
+		return ret;
+
+	if (val & ALARM0_STATUS)
+		alrm->pending = 1;
+	else
+		alrm->pending = 0;
+
+	return 0;
+}
+
+static int max8998_rtc_stop_alarm(struct max8998_rtc_info *info)
+{
+	return max8998_write_reg(info->rtc, MAX8998_ALARM0_CONF, 0);
+}
+
+static int max8998_rtc_start_alarm(struct max8998_rtc_info *info)
+{
+	return max8998_write_reg(info->rtc, MAX8998_ALARM0_CONF, 0x77);
+}
+
+static int max8998_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct max8998_rtc_info *info = dev_get_drvdata(dev);
+	u8 data[8];
+	int ret;
+
+	max8998_tm_to_data(&alrm->time, data);
+
+	ret = max8998_rtc_stop_alarm(info);
+	if (ret < 0)
+		return ret;
+
+	ret = max8998_bulk_write(info->rtc, MAX8998_ALARM0_SEC, 8, data);
+	if (ret < 0)
+		return ret;
+
+	if (alrm->enabled)
+		return max8998_rtc_start_alarm(info);
+
+	return 0;
+}
+
+static int max8998_rtc_alarm_irq_enable(struct device *dev,
+					unsigned int enabled)
+{
+	struct max8998_rtc_info *info = dev_get_drvdata(dev);
+
+	if (enabled)
+		return max8998_rtc_start_alarm(info);
+	else
+		return max8998_rtc_stop_alarm(info);
+}
+
+static irqreturn_t max8998_rtc_alarm_irq(int irq, void *data)
+{
+	struct max8998_rtc_info *info = data;
+
+	rtc_update_irq(info->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops max8998_rtc_ops = {
+	.read_time = max8998_rtc_read_time,
+	.set_time = max8998_rtc_set_time,
+	.read_alarm = max8998_rtc_read_alarm,
+	.set_alarm = max8998_rtc_set_alarm,
+	.alarm_irq_enable = max8998_rtc_alarm_irq_enable,
+};
+
+static int __devinit max8998_rtc_probe(struct platform_device *pdev)
+{
+	struct max8998_dev *max8998 = dev_get_drvdata(pdev->dev.parent);
+	struct max8998_rtc_info *info;
+	int ret;
+
+	info = kzalloc(sizeof(struct max8998_rtc_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->dev = &pdev->dev;
+	info->max8998 = max8998;
+	info->rtc = max8998->rtc;
+	info->irq = max8998->irq_base + MAX8998_IRQ_ALARM0;
+
+	info->rtc_dev = rtc_device_register("max8998-rtc", &pdev->dev,
+			&max8998_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(info->rtc_dev)) {
+		ret = PTR_ERR(info->rtc_dev);
+		dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);
+		goto out_rtc;
+	}
+
+	platform_set_drvdata(pdev, info);
+
+	ret = request_threaded_irq(info->irq, NULL, max8998_rtc_alarm_irq, 0,
+			"rtc-alarm0", info);
+	if (ret < 0)
+		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
+			info->irq, ret);
+
+	return 0;
+
+out_rtc:
+	kfree(info);
+	return ret;
+}
+
+static int __devexit max8998_rtc_remove(struct platform_device *pdev)
+{
+	struct max8998_rtc_info *info = platform_get_drvdata(pdev);
+
+	if (info) {
+		free_irq(info->irq, info);
+		rtc_device_unregister(info->rtc_dev);
+		kfree(info);
+	}
+
+	return 0;
+}
+
+static struct platform_driver max8998_rtc_driver = {
+	.driver		= {
+		.name	= "max8998-rtc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max8998_rtc_probe,
+	.remove		= __devexit_p(max8998_rtc_remove),
+};
+
+static int __init max8998_rtc_init(void)
+{
+	return platform_driver_register(&max8998_rtc_driver);
+}
+module_init(max8998_rtc_init);
+
+static void __exit max8998_rtc_exit(void)
+{
+	platform_driver_unregister(&max8998_rtc_driver);
+}
+module_exit(max8998_rtc_exit);
+
+MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Maxim MAX8998 RTC driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 3bd2371a05f7..170f665c7cdd 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -126,7 +126,8 @@ enum {
 /**
  * struct max8998_dev - max8998 master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
- * @i2c: i2c client private data
+ * @i2c: i2c client private data for regulator
+ * @rtc: i2c client private data for rtc
  * @iolock: mutex for serializing io access
  * @irqlock: mutex for buslock
  * @irq_base: base IRQ number for max8998, required for IRQs
@@ -138,6 +139,7 @@ enum {
 struct max8998_dev {
 	struct device *dev;
 	struct i2c_client *i2c;
+	struct i2c_client *rtc;
 	struct mutex iolock;
 	struct mutex irqlock;
 
@@ -155,6 +157,8 @@ extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
 extern int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count,
 		u8 *buf);
 extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value);
+extern int max8998_bulk_write(struct i2c_client *i2c, u8 reg, int count,
+		u8 *buf);
 extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask);
 
 #endif /*  __LINUX_MFD_MAX8998_PRIV_H */
-- 
cgit v1.2.3


From 19ca7502c508595edfb963e5dbcf62854a926506 Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <arnd@arndnet.de>
Date: Tue, 24 Aug 2010 17:26:59 +0200
Subject: mmc: Allow the tmio_mmc mfd driver to specify get_cd handler

Some controllers, supported by the tmio_mmc driver do not have the card
detect pin of a slot connected, so that polling needs to be used and
card detection is handled by other means.
This patch exposes a get_cd hook for that purpose.

Signed-off-by: Arnd Hannemann <arnd@arndnet.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mmc/host/tmio_mmc.c | 13 +++++++++++++
 include/linux/mfd/tmio.h    |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 69d98e3bf6ab..1a47221d01a4 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -756,10 +756,23 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc)
 		(sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT)) ? 0 : 1;
 }
 
+static int tmio_mmc_get_cd(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
+	if (!pdata->get_cd)
+		return -ENOSYS;
+	else
+		return pdata->get_cd(host->pdev);
+}
+
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
+	.get_cd		= tmio_mmc_get_cd,
 };
 
 #ifdef CONFIG_PM
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index f07425bc3dcd..24c43bbad541 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -74,6 +74,7 @@ struct tmio_mmc_data {
 	struct tmio_mmc_dma		*dma;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
+	int (*get_cd)(struct platform_device *host);
 };
 
 /*
-- 
cgit v1.2.3


From 998283e2e359249133f2f47db26669a55ff25c98 Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <arnd@arndnet.de>
Date: Tue, 24 Aug 2010 17:27:00 +0200
Subject: mfd: Allow the platform to specify the sh_mobile_sdhi get_cd handler

On some platforms (e.g. AP4EVB) the card detect pin of a slot is not
directly connected to the sdhi hardware, so that polling needs to be used
with tmio_mmc and card detection is handled in the platform code.
This patch allows to set tmio_mmc capabilities (to pass the
MMC_CAP_NEEDS_POLL flag) and exposes a get_cd hook for that purpose.

Signed-off-by: Arnd Hannemann <arnd@arndnet.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sh_mobile_sdhi.c       | 13 +++++++++++++
 include/linux/mfd/sh_mobile_sdhi.h |  2 ++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c
index 49b4d069cbf9..01d83a41d570 100644
--- a/drivers/mfd/sh_mobile_sdhi.c
+++ b/drivers/mfd/sh_mobile_sdhi.c
@@ -65,6 +65,17 @@ static void sh_mobile_sdhi_set_pwr(struct platform_device *tmio, int state)
 		p->set_pwr(pdev, state);
 }
 
+static int sh_mobile_sdhi_get_cd(struct platform_device *tmio)
+{
+	struct platform_device *pdev = to_platform_device(tmio->dev.parent);
+	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
+
+	if (p && p->get_cd)
+		return p->get_cd(pdev);
+	else
+		return -ENOSYS;
+}
+
 static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_sdhi *priv;
@@ -106,10 +117,12 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 
 	mmc_data->hclk = clk_get_rate(priv->clk);
 	mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
+	mmc_data->get_cd = sh_mobile_sdhi_get_cd;
 	mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
 	if (p) {
 		mmc_data->flags = p->tmio_flags;
 		mmc_data->ocr_mask = p->tmio_ocr_mask;
+		mmc_data->capabilities |= p->tmio_caps;
 	}
 
 	if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) {
diff --git a/include/linux/mfd/sh_mobile_sdhi.h b/include/linux/mfd/sh_mobile_sdhi.h
index 49067802a6d7..c981b959760f 100644
--- a/include/linux/mfd/sh_mobile_sdhi.h
+++ b/include/linux/mfd/sh_mobile_sdhi.h
@@ -7,8 +7,10 @@ struct sh_mobile_sdhi_info {
 	int dma_slave_tx;
 	int dma_slave_rx;
 	unsigned long tmio_flags;
+	unsigned long tmio_caps;
 	u32 tmio_ocr_mask;	/* available MMC voltages */
 	void (*set_pwr)(struct platform_device *pdev, int state);
+	int (*get_cd)(struct platform_device *pdev);
 };
 
 #endif /* __SH_MOBILE_SDHI_H__ */
-- 
cgit v1.2.3


From 777271d0f33da306575ef776c75f66fc27246bf0 Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <arnd@arndnet.de>
Date: Tue, 24 Aug 2010 17:27:01 +0200
Subject: mmc: Allow the platform to specify the sh_mmcif get_cd handler

In some platforms (e.g. AP4EVB) the card detect pin of a slot is not
directly connected to the sh_mmcif controller, so that polling needs
to be used. To overcome the overhead induced by querying the controller
on each poll cycle, card detection can be handled in the platform code
more efficiently.
This patch exposes a get_cd hook for that purpose.

Signed-off-by: Arnd Hannemann <arnd@arndnet.de>
Tested-by: Yusuke Goda <yusuke.goda.sx@renesas.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mmc/host/sh_mmcif.c  | 12 ++++++++++++
 include/linux/mmc/sh_mmcif.h |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 0f06b8002814..ddd09840520b 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -710,9 +710,21 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	host->bus_width = ios->bus_width;
 }
 
+static int sh_mmcif_get_cd(struct mmc_host *mmc)
+{
+	struct sh_mmcif_host *host = mmc_priv(mmc);
+	struct sh_mmcif_plat_data *p = host->pd->dev.platform_data;
+
+	if (!p->get_cd)
+		return -ENOSYS;
+	else
+		return p->get_cd(host->pd);
+}
+
 static struct mmc_host_ops sh_mmcif_ops = {
 	.request	= sh_mmcif_request,
 	.set_ios	= sh_mmcif_set_ios,
+	.get_cd		= sh_mmcif_get_cd,
 };
 
 static void sh_mmcif_detect(struct mmc_host *mmc)
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index d4a2ebbdab4b..d19e2114fd86 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -34,6 +34,7 @@
 struct sh_mmcif_plat_data {
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	void (*down_pwr)(struct platform_device *pdev);
+	int (*get_cd)(struct platform_device *pdef);
 	u8	sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
 	unsigned long caps;
 	u32	ocr;
-- 
cgit v1.2.3


From f1334fb3c3006ba109886158c0ad79512f928bc1 Mon Sep 17 00:00:00 2001
From: Yusuke Goda <yusuke.goda.sx@renesas.com>
Date: Mon, 30 Aug 2010 11:50:19 +0100
Subject: mmc: Allow 2 byte requests in 4-bit mode for tmio_mmc

Adjust the tmio_mmc block size check to accept 2-byte requests in 4-bit
mode if the hardware supports it.

Tested with the SDHI hardware block included in sh7724.

Signed-off-by: Yusuke Goda <yusuke.goda.sx@renesas.com>
Signed-off-by: Matt Fleming <matt@console-pimps.org>
Acked-by: Magnus Damm <damm@opensource.se>
Tested-by: Arnd Hannemann <arnd@arndnet.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sh_mobile_sdhi.c |  6 ++++++
 drivers/mmc/host/tmio_mmc.c  | 17 ++++++++++++-----
 include/linux/mfd/tmio.h     |  5 +++++
 3 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c
index 01d83a41d570..f1714f93af9d 100644
--- a/drivers/mfd/sh_mobile_sdhi.c
+++ b/drivers/mfd/sh_mobile_sdhi.c
@@ -125,6 +125,12 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 		mmc_data->capabilities |= p->tmio_caps;
 	}
 
+	/*
+	 * All SDHI blocks support 2-byte and larger block sizes in 4-bit
+	 * bus width mode.
+	 */
+	mmc_data->flags |= TMIO_MMC_BLKSZ_2BYTES;
+
 	if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) {
 		priv->param_tx.slave_id = p->dma_slave_tx;
 		priv->param_rx.slave_id = p->dma_slave_rx;
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 1a47221d01a4..e7765a89593e 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -658,14 +658,21 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host)
 static int tmio_mmc_start_data(struct tmio_mmc_host *host,
 	struct mmc_data *data)
 {
+	struct mfd_cell *cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
 	pr_debug("setup data transfer: blocksize %08x  nr_blocks %d\n",
 		 data->blksz, data->blocks);
 
-	/* Hardware cannot perform 1 and 2 byte requests in 4 bit mode */
-	if (data->blksz < 4 && host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) {
-		pr_err("%s: %d byte block unsupported in 4 bit mode\n",
-		       mmc_hostname(host->mmc), data->blksz);
-		return -EINVAL;
+	/* Some hardware cannot perform 2 byte requests in 4 bit mode */
+	if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) {
+		int blksz_2bytes = pdata->flags & TMIO_MMC_BLKSZ_2BYTES;
+
+		if (data->blksz < 2 || (data->blksz < 4 && !blksz_2bytes)) {
+			pr_err("%s: %d byte block unsupported in 4 bit mode\n",
+			       mmc_hostname(host->mmc), data->blksz);
+			return -EINVAL;
+		}
 	}
 
 	tmio_mmc_init_sg(host, data);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 24c43bbad541..085f041197dc 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -52,6 +52,11 @@
 
 /* tmio MMC platform flags */
 #define TMIO_MMC_WRPROTECT_DISABLE	(1 << 0)
+/*
+ * Some controllers can support a 2-byte block size when the bus width
+ * is configured in 4-bit mode.
+ */
+#define TMIO_MMC_BLKSZ_2BYTES		(1 << 1)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
-- 
cgit v1.2.3


From 47c1697508f2ec9f6b31ce6c825fe1017871dea6 Mon Sep 17 00:00:00 2001
From: Mattias Wallin <mattias.wallin@stericsson.com>
Date: Fri, 10 Sep 2010 17:47:56 +0200
Subject: mfd: Align ab8500 with the abx500 interface

This patch makes the ab8500 mixed signal chip expose the same
interface for register access as the ab3100, ab3550 and ab5500 chip.
The ab8500_read() and ab8500_write() is removed and replaced with
abx500_get_register_interruptible() and
abx500_set_register_interruptible().

Signed-off-by: Mattias Wallin <mattias.wallin@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig        |   4 +-
 drivers/mfd/ab8500-core.c  | 281 ++++++++++++++++++++++++++-------------------
 drivers/regulator/ab8500.c |  86 ++++++++------
 drivers/rtc/rtc-ab8500.c   | 103 +++++++++--------
 include/linux/mfd/ab8500.h |  28 ++++-
 include/linux/mfd/abx500.h |   3 +-
 6 files changed, 296 insertions(+), 209 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 8a463e6c22a0..ec0af47a9058 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -434,7 +434,7 @@ config PCF50633_GPIO
 
 config ABX500_CORE
 	bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions"
-	default y if ARCH_U300
+	default y if ARCH_U300 || ARCH_U8500
 	help
 	  Say yes here if you have the ABX500 Mixed Signal IC family
 	  chips. This core driver expose register access functions.
@@ -475,7 +475,7 @@ config EZX_PCAP
 
 config AB8500_CORE
 	bool "ST-Ericsson AB8500 Mixed Signal Power Management chip"
-	depends on SPI=y && GENERIC_HARDIRQS
+	depends on SPI=y && GENERIC_HARDIRQS && ABX500_CORE
 	select MFD_CORE
 	help
 	  Select this option to enable access to AB8500 power management
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index b8c4b80e4c43..373783146b5e 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -4,6 +4,7 @@
  * License Terms: GNU General Public License v2
  * Author: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
  * Author: Rabin Vincent <rabin.vincent@stericsson.com>
+ * Changes: Mattias Wallin <mattias.wallin@stericsson.com>
  */
 
 #include <linux/kernel.h>
@@ -15,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
+#include <linux/mfd/abx500.h>
 #include <linux/mfd/ab8500.h>
 #include <linux/regulator/ab8500.h>
 
@@ -22,71 +24,71 @@
  * Interrupt register offsets
  * Bank : 0x0E
  */
-#define AB8500_IT_SOURCE1_REG		0x0E00
-#define AB8500_IT_SOURCE2_REG		0x0E01
-#define AB8500_IT_SOURCE3_REG		0x0E02
-#define AB8500_IT_SOURCE4_REG		0x0E03
-#define AB8500_IT_SOURCE5_REG		0x0E04
-#define AB8500_IT_SOURCE6_REG		0x0E05
-#define AB8500_IT_SOURCE7_REG		0x0E06
-#define AB8500_IT_SOURCE8_REG		0x0E07
-#define AB8500_IT_SOURCE19_REG		0x0E12
-#define AB8500_IT_SOURCE20_REG		0x0E13
-#define AB8500_IT_SOURCE21_REG		0x0E14
-#define AB8500_IT_SOURCE22_REG		0x0E15
-#define AB8500_IT_SOURCE23_REG		0x0E16
-#define AB8500_IT_SOURCE24_REG		0x0E17
+#define AB8500_IT_SOURCE1_REG		0x00
+#define AB8500_IT_SOURCE2_REG		0x01
+#define AB8500_IT_SOURCE3_REG		0x02
+#define AB8500_IT_SOURCE4_REG		0x03
+#define AB8500_IT_SOURCE5_REG		0x04
+#define AB8500_IT_SOURCE6_REG		0x05
+#define AB8500_IT_SOURCE7_REG		0x06
+#define AB8500_IT_SOURCE8_REG		0x07
+#define AB8500_IT_SOURCE19_REG		0x12
+#define AB8500_IT_SOURCE20_REG		0x13
+#define AB8500_IT_SOURCE21_REG		0x14
+#define AB8500_IT_SOURCE22_REG		0x15
+#define AB8500_IT_SOURCE23_REG		0x16
+#define AB8500_IT_SOURCE24_REG		0x17
 
 /*
  * latch registers
  */
-#define AB8500_IT_LATCH1_REG		0x0E20
-#define AB8500_IT_LATCH2_REG		0x0E21
-#define AB8500_IT_LATCH3_REG		0x0E22
-#define AB8500_IT_LATCH4_REG		0x0E23
-#define AB8500_IT_LATCH5_REG		0x0E24
-#define AB8500_IT_LATCH6_REG		0x0E25
-#define AB8500_IT_LATCH7_REG		0x0E26
-#define AB8500_IT_LATCH8_REG		0x0E27
-#define AB8500_IT_LATCH9_REG		0x0E28
-#define AB8500_IT_LATCH10_REG		0x0E29
-#define AB8500_IT_LATCH19_REG		0x0E32
-#define AB8500_IT_LATCH20_REG		0x0E33
-#define AB8500_IT_LATCH21_REG		0x0E34
-#define AB8500_IT_LATCH22_REG		0x0E35
-#define AB8500_IT_LATCH23_REG		0x0E36
-#define AB8500_IT_LATCH24_REG		0x0E37
+#define AB8500_IT_LATCH1_REG		0x20
+#define AB8500_IT_LATCH2_REG		0x21
+#define AB8500_IT_LATCH3_REG		0x22
+#define AB8500_IT_LATCH4_REG		0x23
+#define AB8500_IT_LATCH5_REG		0x24
+#define AB8500_IT_LATCH6_REG		0x25
+#define AB8500_IT_LATCH7_REG		0x26
+#define AB8500_IT_LATCH8_REG		0x27
+#define AB8500_IT_LATCH9_REG		0x28
+#define AB8500_IT_LATCH10_REG		0x29
+#define AB8500_IT_LATCH19_REG		0x32
+#define AB8500_IT_LATCH20_REG		0x33
+#define AB8500_IT_LATCH21_REG		0x34
+#define AB8500_IT_LATCH22_REG		0x35
+#define AB8500_IT_LATCH23_REG		0x36
+#define AB8500_IT_LATCH24_REG		0x37
 
 /*
  * mask registers
  */
 
-#define AB8500_IT_MASK1_REG		0x0E40
-#define AB8500_IT_MASK2_REG		0x0E41
-#define AB8500_IT_MASK3_REG		0x0E42
-#define AB8500_IT_MASK4_REG		0x0E43
-#define AB8500_IT_MASK5_REG		0x0E44
-#define AB8500_IT_MASK6_REG		0x0E45
-#define AB8500_IT_MASK7_REG		0x0E46
-#define AB8500_IT_MASK8_REG		0x0E47
-#define AB8500_IT_MASK9_REG		0x0E48
-#define AB8500_IT_MASK10_REG		0x0E49
-#define AB8500_IT_MASK11_REG		0x0E4A
-#define AB8500_IT_MASK12_REG		0x0E4B
-#define AB8500_IT_MASK13_REG		0x0E4C
-#define AB8500_IT_MASK14_REG		0x0E4D
-#define AB8500_IT_MASK15_REG		0x0E4E
-#define AB8500_IT_MASK16_REG		0x0E4F
-#define AB8500_IT_MASK17_REG		0x0E50
-#define AB8500_IT_MASK18_REG		0x0E51
-#define AB8500_IT_MASK19_REG		0x0E52
-#define AB8500_IT_MASK20_REG		0x0E53
-#define AB8500_IT_MASK21_REG		0x0E54
-#define AB8500_IT_MASK22_REG		0x0E55
-#define AB8500_IT_MASK23_REG		0x0E56
-#define AB8500_IT_MASK24_REG		0x0E57
-
-#define AB8500_REV_REG			0x1080
+#define AB8500_IT_MASK1_REG		0x40
+#define AB8500_IT_MASK2_REG		0x41
+#define AB8500_IT_MASK3_REG		0x42
+#define AB8500_IT_MASK4_REG		0x43
+#define AB8500_IT_MASK5_REG		0x44
+#define AB8500_IT_MASK6_REG		0x45
+#define AB8500_IT_MASK7_REG		0x46
+#define AB8500_IT_MASK8_REG		0x47
+#define AB8500_IT_MASK9_REG		0x48
+#define AB8500_IT_MASK10_REG		0x49
+#define AB8500_IT_MASK11_REG		0x4A
+#define AB8500_IT_MASK12_REG		0x4B
+#define AB8500_IT_MASK13_REG		0x4C
+#define AB8500_IT_MASK14_REG		0x4D
+#define AB8500_IT_MASK15_REG		0x4E
+#define AB8500_IT_MASK16_REG		0x4F
+#define AB8500_IT_MASK17_REG		0x50
+#define AB8500_IT_MASK18_REG		0x51
+#define AB8500_IT_MASK19_REG		0x52
+#define AB8500_IT_MASK20_REG		0x53
+#define AB8500_IT_MASK21_REG		0x54
+#define AB8500_IT_MASK22_REG		0x55
+#define AB8500_IT_MASK23_REG		0x56
+#define AB8500_IT_MASK24_REG		0x57
+
+#define AB8500_REV_REG			0x80
 
 /*
  * Map interrupt numbers to the LATCH and MASK register offsets, Interrupt
@@ -99,96 +101,132 @@ static const int ab8500_irq_regoffset[AB8500_NUM_IRQ_REGS] = {
 	0, 1, 2, 3, 4, 6, 7, 8, 9, 18, 19, 20, 21,
 };
 
-static int __ab8500_write(struct ab8500 *ab8500, u16 addr, u8 data)
+static int ab8500_get_chip_id(struct device *dev)
+{
+	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+	return (int)ab8500->chip_id;
+}
+
+static int set_register_interruptible(struct ab8500 *ab8500, u8 bank,
+	u8 reg, u8 data)
 {
 	int ret;
+	/*
+	 * Put the u8 bank and u8 register together into a an u16.
+	 * The bank on higher 8 bits and register in lower 8 bits.
+	 * */
+	u16 addr = ((u16)bank) << 8 | reg;
 
 	dev_vdbg(ab8500->dev, "wr: addr %#x <= %#x\n", addr, data);
 
+	ret = mutex_lock_interruptible(&ab8500->lock);
+	if (ret)
+		return ret;
+
 	ret = ab8500->write(ab8500, addr, data);
 	if (ret < 0)
 		dev_err(ab8500->dev, "failed to write reg %#x: %d\n",
 			addr, ret);
+	mutex_unlock(&ab8500->lock);
 
 	return ret;
 }
 
-/**
- * ab8500_write() - write an AB8500 register
- * @ab8500: device to write to
- * @addr: address of the register
- * @data: value to write
- */
-int ab8500_write(struct ab8500 *ab8500, u16 addr, u8 data)
+static int ab8500_set_register(struct device *dev, u8 bank,
+	u8 reg, u8 value)
 {
-	int ret;
-
-	mutex_lock(&ab8500->lock);
-	ret = __ab8500_write(ab8500, addr, data);
-	mutex_unlock(&ab8500->lock);
+	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 
-	return ret;
+	return set_register_interruptible(ab8500, bank, reg, value);
 }
-EXPORT_SYMBOL_GPL(ab8500_write);
 
-static int __ab8500_read(struct ab8500 *ab8500, u16 addr)
+static int get_register_interruptible(struct ab8500 *ab8500, u8 bank,
+	u8 reg, u8 *value)
 {
 	int ret;
+	/* put the u8 bank and u8 reg together into a an u16.
+	 * bank on higher 8 bits and reg in lower */
+	u16 addr = ((u16)bank) << 8 | reg;
+
+	ret = mutex_lock_interruptible(&ab8500->lock);
+	if (ret)
+		return ret;
 
 	ret = ab8500->read(ab8500, addr);
 	if (ret < 0)
 		dev_err(ab8500->dev, "failed to read reg %#x: %d\n",
 			addr, ret);
+	else
+		*value = ret;
 
+	mutex_unlock(&ab8500->lock);
 	dev_vdbg(ab8500->dev, "rd: addr %#x => data %#x\n", addr, ret);
 
 	return ret;
 }
 
-/**
- * ab8500_read() - read an AB8500 register
- * @ab8500: device to read from
- * @addr: address of the register
- */
-int ab8500_read(struct ab8500 *ab8500, u16 addr)
+static int ab8500_get_register(struct device *dev, u8 bank,
+	u8 reg, u8 *value)
 {
-	int ret;
-
-	mutex_lock(&ab8500->lock);
-	ret = __ab8500_read(ab8500, addr);
-	mutex_unlock(&ab8500->lock);
+	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 
-	return ret;
+	return get_register_interruptible(ab8500, bank, reg, value);
 }
-EXPORT_SYMBOL_GPL(ab8500_read);
-
-/**
- * ab8500_set_bits() - set a bitfield in an AB8500 register
- * @ab8500: device to read from
- * @addr: address of the register
- * @mask: mask of the bitfield to modify
- * @data: value to set to the bitfield
- */
-int ab8500_set_bits(struct ab8500 *ab8500, u16 addr, u8 mask, u8 data)
+
+static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank,
+	u8 reg, u8 bitmask, u8 bitvalues)
 {
 	int ret;
+	u8 data;
+	/* put the u8 bank and u8 reg together into a an u16.
+	 * bank on higher 8 bits and reg in lower */
+	u16 addr = ((u16)bank) << 8 | reg;
 
-	mutex_lock(&ab8500->lock);
+	ret = mutex_lock_interruptible(&ab8500->lock);
+	if (ret)
+		return ret;
 
-	ret = __ab8500_read(ab8500, addr);
-	if (ret < 0)
+	ret = ab8500->read(ab8500, addr);
+	if (ret < 0) {
+		dev_err(ab8500->dev, "failed to read reg %#x: %d\n",
+			addr, ret);
 		goto out;
+	}
 
-	ret &= ~mask;
-	ret |= data;
+	data = (u8)ret;
+	data = (~bitmask & data) | (bitmask & bitvalues);
 
-	ret = __ab8500_write(ab8500, addr, ret);
+	ret = ab8500->write(ab8500, addr, data);
+	if (ret < 0)
+		dev_err(ab8500->dev, "failed to write reg %#x: %d\n",
+			addr, ret);
 
+	dev_vdbg(ab8500->dev, "mask: addr %#x => data %#x\n", addr, data);
 out:
 	mutex_unlock(&ab8500->lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ab8500_set_bits);
+
+static int ab8500_mask_and_set_register(struct device *dev,
+	u8 bank, u8 reg, u8 bitmask, u8 bitvalues)
+{
+	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+
+	return mask_and_set_register_interruptible(ab8500, bank, reg,
+		bitmask, bitvalues);
+
+}
+
+static struct abx500_ops ab8500_ops = {
+	.get_chip_id = ab8500_get_chip_id,
+	.get_register = ab8500_get_register,
+	.set_register = ab8500_set_register,
+	.get_register_page = NULL,
+	.set_register_page = NULL,
+	.mask_and_set_register = ab8500_mask_and_set_register,
+	.event_registers_startup_state_get = NULL,
+	.startup_irq_enabled = NULL,
+};
 
 static void ab8500_irq_lock(unsigned int irq)
 {
@@ -213,7 +251,7 @@ static void ab8500_irq_sync_unlock(unsigned int irq)
 		ab8500->oldmask[i] = new;
 
 		reg = AB8500_IT_MASK1_REG + ab8500_irq_regoffset[i];
-		ab8500_write(ab8500, reg, new);
+		set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new);
 	}
 
 	mutex_unlock(&ab8500->irq_lock);
@@ -257,9 +295,11 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 	for (i = 0; i < AB8500_NUM_IRQ_REGS; i++) {
 		int regoffset = ab8500_irq_regoffset[i];
 		int status;
+		u8 value;
 
-		status = ab8500_read(ab8500, AB8500_IT_LATCH1_REG + regoffset);
-		if (status <= 0)
+		status = get_register_interruptible(ab8500, AB8500_INTERRUPT,
+			AB8500_IT_LATCH1_REG + regoffset, &value);
+		if (status < 0 || value == 0)
 			continue;
 
 		do {
@@ -267,8 +307,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 			int line = i * 8 + bit;
 
 			handle_nested_irq(ab8500->irq_base + line);
-			status &= ~(1 << bit);
-		} while (status);
+			value &= ~(1 << bit);
+		} while (value);
 	}
 
 	return IRQ_HANDLED;
@@ -381,6 +421,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
 	struct ab8500_platform_data *plat = dev_get_platdata(ab8500->dev);
 	int ret;
 	int i;
+	u8 value;
 
 	if (plat)
 		ab8500->irq_base = plat->irq_base;
@@ -388,7 +429,8 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
 	mutex_init(&ab8500->lock);
 	mutex_init(&ab8500->irq_lock);
 
-	ret = ab8500_read(ab8500, AB8500_REV_REG);
+	ret = get_register_interruptible(ab8500, AB8500_MISC,
+		AB8500_REV_REG, &value);
 	if (ret < 0)
 		return ret;
 
@@ -397,28 +439,37 @@ int __devinit ab8500_init(struct ab8500 *ab8500)
 	 * 0x10 - Cut 1.0
 	 * 0x11 - Cut 1.1
 	 */
-	if (ret == 0x0 || ret == 0x10 || ret == 0x11) {
-		ab8500->revision = ret;
-		dev_info(ab8500->dev, "detected chip, revision: %#x\n", ret);
+	if (value == 0x0 || value == 0x10 || value == 0x11) {
+		ab8500->revision = value;
+		dev_info(ab8500->dev, "detected chip, revision: %#x\n", value);
 	} else {
-		dev_err(ab8500->dev, "unknown chip, revision: %#x\n", ret);
+		dev_err(ab8500->dev, "unknown chip, revision: %#x\n", value);
 		return -EINVAL;
 	}
+	ab8500->chip_id = value;
 
 	if (plat && plat->init)
 		plat->init(ab8500);
 
 	/* Clear and mask all interrupts */
 	for (i = 0; i < 10; i++) {
-		ab8500_read(ab8500, AB8500_IT_LATCH1_REG + i);
-		ab8500_write(ab8500, AB8500_IT_MASK1_REG + i, 0xff);
+		get_register_interruptible(ab8500, AB8500_INTERRUPT,
+			AB8500_IT_LATCH1_REG + i, &value);
+		set_register_interruptible(ab8500, AB8500_INTERRUPT,
+			AB8500_IT_MASK1_REG + i, 0xff);
 	}
 
 	for (i = 18; i < 24; i++) {
-		ab8500_read(ab8500, AB8500_IT_LATCH1_REG + i);
-		ab8500_write(ab8500, AB8500_IT_MASK1_REG + i, 0xff);
+		get_register_interruptible(ab8500, AB8500_INTERRUPT,
+			AB8500_IT_LATCH1_REG + i, &value);
+		set_register_interruptible(ab8500, AB8500_INTERRUPT,
+			AB8500_IT_MASK1_REG + i, 0xff);
 	}
 
+	ret = abx500_register_ops(ab8500->dev, &ab8500_ops);
+	if (ret)
+		return ret;
+
 	for (i = 0; i < AB8500_NUM_IRQ_REGS; i++)
 		ab8500->mask[i] = ab8500->oldmask[i] = 0xff;
 
diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 28c7ae67cec9..db6b70f20511 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -21,6 +21,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/ab8500.h>
@@ -33,9 +34,11 @@
  * @max_uV: maximum voltage (for variable voltage supplies)
  * @min_uV: minimum voltage (for variable voltage supplies)
  * @fixed_uV: typical voltage (for fixed voltage supplies)
+ * @update_bank: bank to control on/off
  * @update_reg: register to control on/off
  * @mask: mask to enable/disable regulator
  * @enable: bits to enable the regulator in normal(high power) mode
+ * @voltage_bank: bank to control regulator voltage
  * @voltage_reg: register to control regulator voltage
  * @voltage_mask: mask to control regulator voltage
  * @supported_voltages: supported voltage table
@@ -49,11 +52,13 @@ struct ab8500_regulator_info {
 	int max_uV;
 	int min_uV;
 	int fixed_uV;
-	int update_reg;
-	int mask;
-	int enable;
-	int voltage_reg;
-	int voltage_mask;
+	u8 update_bank;
+	u8 update_reg;
+	u8 mask;
+	u8 enable;
+	u8 voltage_bank;
+	u8 voltage_reg;
+	u8 voltage_mask;
 	int const *supported_voltages;
 	int voltages_len;
 };
@@ -97,8 +102,8 @@ static int ab8500_regulator_enable(struct regulator_dev *rdev)
 	if (regulator_id >= AB8500_NUM_REGULATORS)
 		return -EINVAL;
 
-	ret = ab8500_set_bits(info->ab8500, info->update_reg,
-			info->mask, info->enable);
+	ret = abx500_mask_and_set_register_interruptible(info->dev,
+		info->update_bank, info->update_reg, info->mask, info->enable);
 	if (ret < 0)
 		dev_err(rdev_get_dev(rdev),
 			"couldn't set enable bits for regulator\n");
@@ -114,8 +119,8 @@ static int ab8500_regulator_disable(struct regulator_dev *rdev)
 	if (regulator_id >= AB8500_NUM_REGULATORS)
 		return -EINVAL;
 
-	ret = ab8500_set_bits(info->ab8500, info->update_reg,
-			info->mask, 0x0);
+	ret = abx500_mask_and_set_register_interruptible(info->dev,
+		info->update_bank, info->update_reg, info->mask, 0x0);
 	if (ret < 0)
 		dev_err(rdev_get_dev(rdev),
 			"couldn't set disable bits for regulator\n");
@@ -126,19 +131,21 @@ static int ab8500_regulator_is_enabled(struct regulator_dev *rdev)
 {
 	int regulator_id, ret;
 	struct ab8500_regulator_info *info = rdev_get_drvdata(rdev);
+	u8 value;
 
 	regulator_id = rdev_get_id(rdev);
 	if (regulator_id >= AB8500_NUM_REGULATORS)
 		return -EINVAL;
 
-	ret = ab8500_read(info->ab8500, info->update_reg);
+	ret = abx500_get_register_interruptible(info->dev,
+		info->update_bank, info->update_reg, &value);
 	if (ret < 0) {
 		dev_err(rdev_get_dev(rdev),
 			"couldn't read 0x%x register\n", info->update_reg);
 		return ret;
 	}
 
-	if (ret & info->mask)
+	if (value & info->mask)
 		return true;
 	else
 		return false;
@@ -165,14 +172,16 @@ static int ab8500_list_voltage(struct regulator_dev *rdev, unsigned selector)
 
 static int ab8500_regulator_get_voltage(struct regulator_dev *rdev)
 {
-	int regulator_id, ret, val;
+	int regulator_id, ret;
 	struct ab8500_regulator_info *info = rdev_get_drvdata(rdev);
+	u8 value;
 
 	regulator_id = rdev_get_id(rdev);
 	if (regulator_id >= AB8500_NUM_REGULATORS)
 		return -EINVAL;
 
-	ret = ab8500_read(info->ab8500, info->voltage_reg);
+	ret = abx500_get_register_interruptible(info->dev, info->voltage_bank,
+		info->voltage_reg, &value);
 	if (ret < 0) {
 		dev_err(rdev_get_dev(rdev),
 			"couldn't read voltage reg for regulator\n");
@@ -180,11 +189,11 @@ static int ab8500_regulator_get_voltage(struct regulator_dev *rdev)
 	}
 
 	/* vintcore has a different layout */
-	val = ret & info->voltage_mask;
+	value &= info->voltage_mask;
 	if (regulator_id == AB8500_LDO_INTCORE)
-		ret = info->supported_voltages[val >> 0x3];
+		ret = info->supported_voltages[value >> 0x3];
 	else
-		ret = info->supported_voltages[val];
+		ret = info->supported_voltages[value];
 
 	return ret;
 }
@@ -224,8 +233,9 @@ static int ab8500_regulator_set_voltage(struct regulator_dev *rdev,
 	}
 
 	/* set the registers for the request */
-	ret = ab8500_set_bits(info->ab8500, info->voltage_reg,
-				info->voltage_mask, ret);
+	ret = abx500_mask_and_set_register_interruptible(info->dev,
+		info->voltage_bank, info->voltage_reg,
+		info->voltage_mask, (u8)ret);
 	if (ret < 0)
 		dev_err(rdev_get_dev(rdev),
 		"couldn't set voltage reg for regulator\n");
@@ -262,9 +272,9 @@ static struct regulator_ops ab8500_ldo_fixed_ops = {
 	.list_voltage	= ab8500_list_voltage,
 };
 
-#define AB8500_LDO(_id, min, max, reg, reg_mask, reg_enable,	\
-		volt_reg, volt_mask, voltages,			\
-			len_volts)				\
+#define AB8500_LDO(_id, min, max, bank, reg, reg_mask,		\
+		reg_enable, volt_bank, volt_reg, volt_mask,	\
+		voltages, len_volts)				\
 {								\
 	.desc	= {						\
 		.name	= "LDO-" #_id,				\
@@ -275,9 +285,11 @@ static struct regulator_ops ab8500_ldo_fixed_ops = {
 	},							\
 	.min_uV		= (min) * 1000,				\
 	.max_uV		= (max) * 1000,				\
+	.update_bank	= bank,					\
 	.update_reg	= reg,					\
 	.mask		= reg_mask,				\
 	.enable		= reg_enable,				\
+	.voltage_bank	= volt_bank,				\
 	.voltage_reg	= volt_reg,				\
 	.voltage_mask	= volt_mask,				\
 	.supported_voltages = voltages,				\
@@ -285,8 +297,8 @@ static struct regulator_ops ab8500_ldo_fixed_ops = {
 	.fixed_uV	= 0,					\
 }
 
-#define AB8500_FIXED_LDO(_id, fixed, reg, reg_mask,	\
-				reg_enable)		\
+#define AB8500_FIXED_LDO(_id, fixed, bank, reg,		\
+			reg_mask, reg_enable)		\
 {							\
 	.desc	= {					\
 		.name	= "LDO-" #_id,			\
@@ -296,6 +308,7 @@ static struct regulator_ops ab8500_ldo_fixed_ops = {
 		.owner	= THIS_MODULE,			\
 	},						\
 	.fixed_uV	= fixed * 1000,			\
+	.update_bank	= bank,				\
 	.update_reg	= reg,				\
 	.mask		= reg_mask,			\
 	.enable		= reg_enable,			\
@@ -304,28 +317,29 @@ static struct regulator_ops ab8500_ldo_fixed_ops = {
 static struct ab8500_regulator_info ab8500_regulator_info[] = {
 	/*
 	 * Variable Voltage LDOs
-	 * name, min uV, max uV, ctrl reg, reg mask, enable mask,
-	 *	volt ctrl reg, volt ctrl mask, volt table, num supported volts
+	 * name, min uV, max uV, ctrl bank, ctrl reg, reg mask, enable mask,
+	 *      volt ctrl bank, volt ctrl reg, volt ctrl mask, volt table,
+	 *      num supported volts
 	 */
-	AB8500_LDO(AUX1, 1100, 3300, 0x0409, 0x3, 0x1, 0x041f, 0xf,
+	AB8500_LDO(AUX1, 1100, 3300, 0x04, 0x09, 0x3, 0x1, 0x04, 0x1f, 0xf,
 			ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)),
-	AB8500_LDO(AUX2, 1100, 3300, 0x0409, 0xc, 0x4, 0x0420, 0xf,
+	AB8500_LDO(AUX2, 1100, 3300, 0x04, 0x09, 0xc, 0x4, 0x04, 0x20, 0xf,
 			ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)),
-	AB8500_LDO(AUX3, 1100, 3300, 0x040a, 0x3, 0x1, 0x0421, 0xf,
+	AB8500_LDO(AUX3, 1100, 3300, 0x04, 0x0a, 0x3, 0x1, 0x04, 0x21, 0xf,
 			ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)),
-	AB8500_LDO(INTCORE, 1100, 3300, 0x0380, 0x4, 0x4, 0x0380, 0x38,
+	AB8500_LDO(INTCORE, 1100, 3300, 0x03, 0x80, 0x4, 0x4, 0x03, 0x80, 0x38,
 		ldo_vintcore_voltages, ARRAY_SIZE(ldo_vintcore_voltages)),
 
 	/*
 	 * Fixed Voltage LDOs
-	 *		 name,	o/p uV, ctrl reg, enable, disable
+	 *		 name,	o/p uV, ctrl bank, ctrl reg, enable, disable
 	 */
-	AB8500_FIXED_LDO(TVOUT,	  2000,   0x0380,   0x2,    0x2),
-	AB8500_FIXED_LDO(AUDIO,   2000,   0x0383,   0x2,    0x2),
-	AB8500_FIXED_LDO(ANAMIC1, 2050,   0x0383,   0x4,    0x4),
-	AB8500_FIXED_LDO(ANAMIC2, 2050,   0x0383,   0x8,    0x8),
-	AB8500_FIXED_LDO(DMIC,    1800,   0x0383,   0x10,   0x10),
-	AB8500_FIXED_LDO(ANA,     1200,   0x0383,   0xc,    0x4),
+	AB8500_FIXED_LDO(TVOUT,	  2000, 0x03,      0x80,     0x2,    0x2),
+	AB8500_FIXED_LDO(AUDIO,   2000, 0x03,      0x83,     0x2,    0x2),
+	AB8500_FIXED_LDO(ANAMIC1, 2050, 0x03,      0x83,     0x4,    0x4),
+	AB8500_FIXED_LDO(ANAMIC2, 2050, 0x03,      0x83,     0x8,    0x8),
+	AB8500_FIXED_LDO(DMIC,    1800, 0x03,      0x83,     0x10,   0x10),
+	AB8500_FIXED_LDO(ANA,     1200, 0x03,      0x83,     0xc,    0x4),
 };
 
 static inline struct ab8500_regulator_info *find_regulator_info(int id)
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index 2fda03125e55..e346705aae92 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -14,26 +14,26 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
+#include <linux/mfd/abx500.h>
 #include <linux/mfd/ab8500.h>
 #include <linux/delay.h>
 
-#define AB8500_RTC_SOFF_STAT_REG	0x0F00
-#define AB8500_RTC_CC_CONF_REG		0x0F01
-#define AB8500_RTC_READ_REQ_REG		0x0F02
-#define AB8500_RTC_WATCH_TSECMID_REG	0x0F03
-#define AB8500_RTC_WATCH_TSECHI_REG	0x0F04
-#define AB8500_RTC_WATCH_TMIN_LOW_REG	0x0F05
-#define AB8500_RTC_WATCH_TMIN_MID_REG	0x0F06
-#define AB8500_RTC_WATCH_TMIN_HI_REG	0x0F07
-#define AB8500_RTC_ALRM_MIN_LOW_REG	0x0F08
-#define AB8500_RTC_ALRM_MIN_MID_REG	0x0F09
-#define AB8500_RTC_ALRM_MIN_HI_REG	0x0F0A
-#define AB8500_RTC_STAT_REG		0x0F0B
-#define AB8500_RTC_BKUP_CHG_REG		0x0F0C
-#define AB8500_RTC_FORCE_BKUP_REG	0x0F0D
-#define AB8500_RTC_CALIB_REG		0x0F0E
-#define AB8500_RTC_SWITCH_STAT_REG	0x0F0F
-#define AB8500_REV_REG			0x1080
+#define AB8500_RTC_SOFF_STAT_REG	0x00
+#define AB8500_RTC_CC_CONF_REG		0x01
+#define AB8500_RTC_READ_REQ_REG		0x02
+#define AB8500_RTC_WATCH_TSECMID_REG	0x03
+#define AB8500_RTC_WATCH_TSECHI_REG	0x04
+#define AB8500_RTC_WATCH_TMIN_LOW_REG	0x05
+#define AB8500_RTC_WATCH_TMIN_MID_REG	0x06
+#define AB8500_RTC_WATCH_TMIN_HI_REG	0x07
+#define AB8500_RTC_ALRM_MIN_LOW_REG	0x08
+#define AB8500_RTC_ALRM_MIN_MID_REG	0x09
+#define AB8500_RTC_ALRM_MIN_HI_REG	0x0A
+#define AB8500_RTC_STAT_REG		0x0B
+#define AB8500_RTC_BKUP_CHG_REG		0x0C
+#define AB8500_RTC_FORCE_BKUP_REG	0x0D
+#define AB8500_RTC_CALIB_REG		0x0E
+#define AB8500_RTC_SWITCH_STAT_REG	0x0F
 
 /* RtcReadRequest bits */
 #define RTC_READ_REQUEST		0x01
@@ -46,13 +46,13 @@
 #define COUNTS_PER_SEC			(0xF000 / 60)
 #define AB8500_RTC_EPOCH		2000
 
-static const unsigned long ab8500_rtc_time_regs[] = {
+static const u8 ab8500_rtc_time_regs[] = {
 	AB8500_RTC_WATCH_TMIN_HI_REG, AB8500_RTC_WATCH_TMIN_MID_REG,
 	AB8500_RTC_WATCH_TMIN_LOW_REG, AB8500_RTC_WATCH_TSECHI_REG,
 	AB8500_RTC_WATCH_TSECMID_REG
 };
 
-static const unsigned long ab8500_rtc_alarm_regs[] = {
+static const u8 ab8500_rtc_alarm_regs[] = {
 	AB8500_RTC_ALRM_MIN_HI_REG, AB8500_RTC_ALRM_MIN_MID_REG,
 	AB8500_RTC_ALRM_MIN_LOW_REG
 };
@@ -76,29 +76,30 @@ static unsigned long get_elapsed_seconds(int year)
 
 static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 	unsigned long timeout = jiffies + HZ;
 	int retval, i;
 	unsigned long mins, secs;
 	unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)];
+	u8 value;
 
 	/* Request a data read */
-	retval = ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG,
-			      RTC_READ_REQUEST);
+	retval = abx500_set_register_interruptible(dev,
+		AB8500_RTC, AB8500_RTC_READ_REQ_REG, RTC_READ_REQUEST);
 	if (retval < 0)
 		return retval;
 
 	/* Early AB8500 chips will not clear the rtc read request bit */
-	if (ab8500->revision == 0) {
+	if (abx500_get_chip_id(dev) == 0) {
 		msleep(1);
 	} else {
 		/* Wait for some cycles after enabling the rtc read in ab8500 */
 		while (time_before(jiffies, timeout)) {
-			retval = ab8500_read(ab8500, AB8500_RTC_READ_REQ_REG);
+			retval = abx500_get_register_interruptible(dev,
+				AB8500_RTC, AB8500_RTC_READ_REQ_REG, &value);
 			if (retval < 0)
 				return retval;
 
-			if (!(retval & RTC_READ_REQUEST))
+			if (!(value & RTC_READ_REQUEST))
 				break;
 
 			msleep(1);
@@ -107,10 +108,11 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	/* Read the Watchtime registers */
 	for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) {
-		retval = ab8500_read(ab8500, ab8500_rtc_time_regs[i]);
+		retval = abx500_get_register_interruptible(dev,
+			AB8500_RTC, ab8500_rtc_time_regs[i], &value);
 		if (retval < 0)
 			return retval;
-		buf[i] = retval;
+		buf[i] = value;
 	}
 
 	mins = (buf[0] << 16) | (buf[1] << 8) | buf[2];
@@ -128,7 +130,6 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 	int retval, i;
 	unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)];
 	unsigned long no_secs, no_mins, secs = 0;
@@ -162,27 +163,29 @@ static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	buf[0] = (no_mins >> 16) & 0xFF;
 
 	for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) {
-		retval = ab8500_write(ab8500, ab8500_rtc_time_regs[i], buf[i]);
+		retval = abx500_set_register_interruptible(dev, AB8500_RTC,
+			ab8500_rtc_time_regs[i], buf[i]);
 		if (retval < 0)
 			return retval;
 	}
 
 	/* Request a data write */
-	return ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG, RTC_WRITE_REQUEST);
+	return abx500_set_register_interruptible(dev, AB8500_RTC,
+		AB8500_RTC_READ_REQ_REG, RTC_WRITE_REQUEST);
 }
 
 static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 	int retval, i;
-	int rtc_ctrl;
+	u8 rtc_ctrl, value;
 	unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)];
 	unsigned long secs, mins;
 
 	/* Check if the alarm is enabled or not */
-	rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG);
-	if (rtc_ctrl < 0)
-		return rtc_ctrl;
+	retval = abx500_get_register_interruptible(dev, AB8500_RTC,
+		AB8500_RTC_STAT_REG, &rtc_ctrl);
+	if (retval < 0)
+		return retval;
 
 	if (rtc_ctrl & RTC_ALARM_ENA)
 		alarm->enabled = 1;
@@ -192,10 +195,11 @@ static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 	alarm->pending = 0;
 
 	for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) {
-		retval = ab8500_read(ab8500, ab8500_rtc_alarm_regs[i]);
+		retval = abx500_get_register_interruptible(dev, AB8500_RTC,
+			ab8500_rtc_alarm_regs[i], &value);
 		if (retval < 0)
 			return retval;
-		buf[i] = retval;
+		buf[i] = value;
 	}
 
 	mins = (buf[0] << 16) | (buf[1] << 8) | (buf[2]);
@@ -211,15 +215,13 @@ static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 
 static int ab8500_rtc_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
-
-	return ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_ALARM_ENA,
-			       enabled ? RTC_ALARM_ENA : 0);
+	return abx500_mask_and_set_register_interruptible(dev, AB8500_RTC,
+		AB8500_RTC_STAT_REG, RTC_ALARM_ENA,
+		enabled ? RTC_ALARM_ENA : 0);
 }
 
 static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 	int retval, i;
 	unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)];
 	unsigned long mins, secs = 0;
@@ -247,7 +249,8 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 
 	/* Set the alarm time */
 	for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) {
-		retval = ab8500_write(ab8500, ab8500_rtc_alarm_regs[i], buf[i]);
+		retval = abx500_set_register_interruptible(dev, AB8500_RTC,
+			ab8500_rtc_alarm_regs[i], buf[i]);
 		if (retval < 0)
 			return retval;
 	}
@@ -276,10 +279,9 @@ static const struct rtc_class_ops ab8500_rtc_ops = {
 
 static int __devinit ab8500_rtc_probe(struct platform_device *pdev)
 {
-	struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent);
 	int err;
 	struct rtc_device *rtc;
-	int rtc_ctrl;
+	u8 rtc_ctrl;
 	int irq;
 
 	irq = platform_get_irq_byname(pdev, "ALARM");
@@ -287,17 +289,18 @@ static int __devinit ab8500_rtc_probe(struct platform_device *pdev)
 		return irq;
 
 	/* For RTC supply test */
-	err = ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_STATUS_DATA,
-			RTC_STATUS_DATA);
+	err = abx500_mask_and_set_register_interruptible(&pdev->dev, AB8500_RTC,
+		AB8500_RTC_STAT_REG, RTC_STATUS_DATA, RTC_STATUS_DATA);
 	if (err < 0)
 		return err;
 
 	/* Wait for reset by the PorRtc */
 	msleep(1);
 
-	rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG);
-	if (rtc_ctrl < 0)
-		return rtc_ctrl;
+	err = abx500_get_register_interruptible(&pdev->dev, AB8500_RTC,
+		AB8500_RTC_STAT_REG, &rtc_ctrl);
+	if (err < 0)
+		return err;
 
 	/* Check if the RTC Supply fails */
 	if (!(rtc_ctrl & RTC_STATUS_DATA)) {
diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
index f5cec4500f38..d63b6050b183 100644
--- a/include/linux/mfd/ab8500.h
+++ b/include/linux/mfd/ab8500.h
@@ -9,6 +9,29 @@
 
 #include <linux/device.h>
 
+/*
+ * AB8500 bank addresses
+ */
+#define AB8500_SYS_CTRL1_BLOCK	0x1
+#define AB8500_SYS_CTRL2_BLOCK	0x2
+#define AB8500_REGU_CTRL1	0x3
+#define AB8500_REGU_CTRL2	0x4
+#define AB8500_USB		0x5
+#define AB8500_TVOUT		0x6
+#define AB8500_DBI		0x7
+#define AB8500_ECI_AV_ACC	0x8
+#define AB8500_RESERVED		0x9
+#define AB8500_GPADC		0xA
+#define AB8500_CHARGER		0xB
+#define AB8500_GAS_GAUGE	0xC
+#define AB8500_AUDIO		0xD
+#define AB8500_INTERRUPT	0xE
+#define AB8500_RTC		0xF
+#define AB8500_MISC		0x10
+#define AB8500_DEBUG		0x12
+#define AB8500_PROD_TEST	0x13
+#define AB8500_OTP_EMUL		0x15
+
 /*
  * Interrupts
  */
@@ -99,6 +122,7 @@ struct ab8500 {
 	int		revision;
 	int		irq_base;
 	int		irq;
+	u8		chip_id;
 
 	int (*write) (struct ab8500 *a8500, u16 addr, u8 data);
 	int (*read) (struct ab8500 *a8500, u16 addr);
@@ -124,10 +148,6 @@ struct ab8500_platform_data {
 	struct regulator_init_data *regulator[AB8500_NUM_REGULATORS];
 };
 
-extern int ab8500_write(struct ab8500 *a8500, u16 addr, u8 data);
-extern int ab8500_read(struct ab8500 *a8500, u16 addr);
-extern int ab8500_set_bits(struct ab8500 *a8500, u16 addr, u8 mask, u8 data);
-
 extern int __devinit ab8500_init(struct ab8500 *ab8500);
 extern int __devexit ab8500_exit(struct ab8500 *ab8500);
 
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 390726fcbcb1..be7373c79bea 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -6,8 +6,7 @@
  *
  * ABX500 core access functions.
  * The abx500 interface is used for the Analog Baseband chip
- * ab3100, ab3550, ab5500 and possibly comming. It is not used for
- * ab4500 and ab8500 since they are another family of chip.
+ * ab3100, ab3550, ab5500, and ab8500.
  *
  * Author: Mattias Wallin <mattias.wallin@stericsson.com>
  * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com>
-- 
cgit v1.2.3


From 38b340527aa44bb8d1b88ef1e5a4e26b27695c2b Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@gmail.com>
Date: Wed, 8 Sep 2010 09:44:34 -0400
Subject: mfd: Update chip id of 88pm8607

Chipid of 88pm8607 is 0x40 or 0x50.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  | 7 +++++--
 include/linux/mfd/88pm860x.h | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 4db10a150369..20895e7a99c9 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -645,10 +645,13 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
 		goto out;
 	}
-	if ((ret & PM8607_VERSION_MASK) == PM8607_VERSION)
+	switch (ret & PM8607_VERSION_MASK) {
+	case 0x40:
+	case 0x50:
 		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
 			 ret);
-	else {
+		break;
+	default:
 		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
 			"Chip ID: %02x\n", ret);
 		goto out;
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index bfd23bef7363..4db1fbd8969e 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -138,7 +138,7 @@ enum {
 	PM8607_ID_RG_MAX,
 };
 
-#define PM8607_VERSION			(0x40)	/* 8607 chip ID */
+/* 8607 chip ID is 0x40 or 0x50 */
 #define PM8607_VERSION_MASK		(0xF0)	/* 8607 chip ID mask */
 
 /* Interrupt Registers */
-- 
cgit v1.2.3


From c26448c48448266480e1b6c371f897167060ceaf Mon Sep 17 00:00:00 2001
From: Gary King <gking@nvidia.com>
Date: Mon, 20 Sep 2010 00:18:27 +0200
Subject: mfd: Add basic tps6586x interrupt support

Add support for enabling and disabling tps6586x subdevice interrupts

Signed-off-by: Gary King <gking@nvidia.com>
Acked-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          |   4 +-
 drivers/mfd/tps6586x.c       | 200 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps6586x.h |  31 +++++++
 3 files changed, 233 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 01c928bca099..66779bdc9627 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -563,8 +563,8 @@ config MFD_JZ4740_ADC
 	  This driver is necessary for jz4740-battery and jz4740-hwmon driver.
 
 config MFD_TPS6586X
-	tristate "TPS6586x Power Management chips"
-	depends on I2C && GPIOLIB
+	bool "TPS6586x Power Management chips"
+	depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
 	select MFD_CORE
 	help
 	  If you say yes here you get support for the TPS6586X series of
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 2f9336c3710c..117eb7cafe77 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -15,6 +15,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -29,16 +31,76 @@
 #define TPS6586X_GPIOSET1	0x5d
 #define TPS6586X_GPIOSET2	0x5e
 
+/* interrupt control registers */
+#define TPS6586X_INT_ACK1	0xb5
+#define TPS6586X_INT_ACK2	0xb6
+#define TPS6586X_INT_ACK3	0xb7
+#define TPS6586X_INT_ACK4	0xb8
+
+/* interrupt mask registers */
+#define TPS6586X_INT_MASK1	0xb0
+#define TPS6586X_INT_MASK2	0xb1
+#define TPS6586X_INT_MASK3	0xb2
+#define TPS6586X_INT_MASK4	0xb3
+#define TPS6586X_INT_MASK5	0xb4
+
 /* device id */
 #define TPS6586X_VERSIONCRC	0xcd
 #define TPS658621A_VERSIONCRC	0x15
 
+struct tps6586x_irq_data {
+	u8	mask_reg;
+	u8	mask_mask;
+};
+
+#define TPS6586X_IRQ(_reg, _mask)				\
+	{							\
+		.mask_reg = (_reg) - TPS6586X_INT_MASK1,	\
+		.mask_mask = (_mask),				\
+	}
+
+static const struct tps6586x_irq_data tps6586x_irqs[] = {
+	[TPS6586X_INT_PLDO_0]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 0),
+	[TPS6586X_INT_PLDO_1]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 1),
+	[TPS6586X_INT_PLDO_2]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 2),
+	[TPS6586X_INT_PLDO_3]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 3),
+	[TPS6586X_INT_PLDO_4]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 4),
+	[TPS6586X_INT_PLDO_5]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 5),
+	[TPS6586X_INT_PLDO_6]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 6),
+	[TPS6586X_INT_PLDO_7]	= TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 7),
+	[TPS6586X_INT_COMP_DET]	= TPS6586X_IRQ(TPS6586X_INT_MASK4, 1 << 0),
+	[TPS6586X_INT_ADC]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 1),
+	[TPS6586X_INT_PLDO_8]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 2),
+	[TPS6586X_INT_PLDO_9]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 3),
+	[TPS6586X_INT_PSM_0]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 4),
+	[TPS6586X_INT_PSM_1]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 5),
+	[TPS6586X_INT_PSM_2]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 6),
+	[TPS6586X_INT_PSM_3]	= TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 7),
+	[TPS6586X_INT_RTC_ALM1]	= TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 4),
+	[TPS6586X_INT_ACUSB_OVP] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 0x03),
+	[TPS6586X_INT_USB_DET]	= TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 2),
+	[TPS6586X_INT_AC_DET]	= TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 3),
+	[TPS6586X_INT_BAT_DET]	= TPS6586X_IRQ(TPS6586X_INT_MASK3, 1 << 0),
+	[TPS6586X_INT_CHG_STAT]	= TPS6586X_IRQ(TPS6586X_INT_MASK4, 0xfc),
+	[TPS6586X_INT_CHG_TEMP]	= TPS6586X_IRQ(TPS6586X_INT_MASK3, 0x06),
+	[TPS6586X_INT_PP]	= TPS6586X_IRQ(TPS6586X_INT_MASK3, 0xf0),
+	[TPS6586X_INT_RESUME]	= TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 5),
+	[TPS6586X_INT_LOW_SYS]	= TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 6),
+	[TPS6586X_INT_RTC_ALM2] = TPS6586X_IRQ(TPS6586X_INT_MASK4, 1 << 1),
+};
+
 struct tps6586x {
 	struct mutex		lock;
 	struct device		*dev;
 	struct i2c_client	*client;
 
 	struct gpio_chip	gpio;
+	struct irq_chip		irq_chip;
+	struct mutex		irq_lock;
+	int			irq_base;
+	u32			irq_en;
+	u8			mask_cache[5];
+	u8			mask_reg[5];
 };
 
 static inline int __tps6586x_read(struct i2c_client *client,
@@ -262,6 +324,129 @@ static int tps6586x_remove_subdevs(struct tps6586x *tps6586x)
 	return device_for_each_child(tps6586x->dev, NULL, __remove_subdev);
 }
 
+static void tps6586x_irq_lock(unsigned int irq)
+{
+	struct tps6586x *tps6586x = get_irq_chip_data(irq);
+
+	mutex_lock(&tps6586x->irq_lock);
+}
+
+static void tps6586x_irq_enable(unsigned int irq)
+{
+	struct tps6586x *tps6586x = get_irq_chip_data(irq);
+	unsigned int __irq = irq - tps6586x->irq_base;
+	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
+
+	tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask;
+	tps6586x->irq_en |= (1 << __irq);
+}
+
+static void tps6586x_irq_disable(unsigned int irq)
+{
+	struct tps6586x *tps6586x = get_irq_chip_data(irq);
+
+	unsigned int __irq = irq - tps6586x->irq_base;
+	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
+
+	tps6586x->mask_reg[data->mask_reg] |= data->mask_mask;
+	tps6586x->irq_en &= ~(1 << __irq);
+}
+
+static void tps6586x_irq_sync_unlock(unsigned int irq)
+{
+	struct tps6586x *tps6586x = get_irq_chip_data(irq);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tps6586x->mask_reg); i++) {
+		if (tps6586x->mask_reg[i] != tps6586x->mask_cache[i]) {
+			if (!WARN_ON(tps6586x_write(tps6586x->dev,
+						    TPS6586X_INT_MASK1 + i,
+						    tps6586x->mask_reg[i])))
+				tps6586x->mask_cache[i] = tps6586x->mask_reg[i];
+		}
+	}
+
+	mutex_unlock(&tps6586x->irq_lock);
+}
+
+static irqreturn_t tps6586x_irq(int irq, void *data)
+{
+	struct tps6586x *tps6586x = data;
+	u32 acks;
+	int ret = 0;
+
+	ret = tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1,
+			     sizeof(acks), (uint8_t *)&acks);
+
+	if (ret < 0) {
+		dev_err(tps6586x->dev, "failed to read interrupt status\n");
+		return IRQ_NONE;
+	}
+
+	acks = le32_to_cpu(acks);
+
+	while (acks) {
+		int i = __ffs(acks);
+
+		if (tps6586x->irq_en & (1 << i))
+			handle_nested_irq(tps6586x->irq_base + i);
+
+		acks &= ~(1 << i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
+				       int irq_base)
+{
+	int i, ret;
+	u8 tmp[4];
+
+	if (!irq_base) {
+		dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&tps6586x->irq_lock);
+	for (i = 0; i < 5; i++) {
+		tps6586x->mask_cache[i] = 0xff;
+		tps6586x->mask_reg[i] = 0xff;
+		tps6586x_write(tps6586x->dev, TPS6586X_INT_MASK1 + i, 0xff);
+	}
+
+	tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp);
+
+	tps6586x->irq_base = irq_base;
+
+	tps6586x->irq_chip.name = "tps6586x";
+	tps6586x->irq_chip.enable = tps6586x_irq_enable;
+	tps6586x->irq_chip.disable = tps6586x_irq_disable;
+	tps6586x->irq_chip.bus_lock = tps6586x_irq_lock;
+	tps6586x->irq_chip.bus_sync_unlock = tps6586x_irq_sync_unlock;
+
+	for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
+		int __irq = i + tps6586x->irq_base;
+		set_irq_chip_data(__irq, tps6586x);
+		set_irq_chip_and_handler(__irq, &tps6586x->irq_chip,
+					 handle_simple_irq);
+		set_irq_nested_thread(__irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(__irq, IRQF_VALID);
+#endif
+	}
+
+	ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT,
+				   "tps6586x", tps6586x);
+
+	if (!ret) {
+		device_init_wakeup(tps6586x->dev, 1);
+		enable_irq_wake(irq);
+	}
+
+	return ret;
+}
+
 static int __devinit tps6586x_add_subdevs(struct tps6586x *tps6586x,
 					  struct tps6586x_platform_data *pdata)
 {
@@ -327,6 +512,15 @@ static int __devinit tps6586x_i2c_probe(struct i2c_client *client,
 
 	mutex_init(&tps6586x->lock);
 
+	if (client->irq) {
+		ret = tps6586x_irq_init(tps6586x, client->irq,
+					pdata->irq_base);
+		if (ret) {
+			dev_err(&client->dev, "IRQ init failed: %d\n", ret);
+			goto err_irq_init;
+		}
+	}
+
 	ret = tps6586x_add_subdevs(tps6586x, pdata);
 	if (ret) {
 		dev_err(&client->dev, "add devices failed: %d\n", ret);
@@ -338,6 +532,9 @@ static int __devinit tps6586x_i2c_probe(struct i2c_client *client,
 	return 0;
 
 err_add_devs:
+	if (client->irq)
+		free_irq(client->irq, tps6586x);
+err_irq_init:
 	kfree(tps6586x);
 	return ret;
 }
@@ -348,6 +545,9 @@ static int __devexit tps6586x_i2c_remove(struct i2c_client *client)
 	struct tps6586x_platform_data *pdata = client->dev.platform_data;
 	int ret;
 
+	if (client->irq)
+		free_irq(client->irq, tps6586x);
+
 	if (pdata->gpio_base) {
 		ret = gpiochip_remove(&tps6586x->gpio);
 		if (ret)
diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index 772b3ae640af..b6bab1b04e25 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h
@@ -18,6 +18,36 @@ enum {
 	TPS6586X_ID_LDO_RTC,
 };
 
+enum {
+	TPS6586X_INT_PLDO_0,
+	TPS6586X_INT_PLDO_1,
+	TPS6586X_INT_PLDO_2,
+	TPS6586X_INT_PLDO_3,
+	TPS6586X_INT_PLDO_4,
+	TPS6586X_INT_PLDO_5,
+	TPS6586X_INT_PLDO_6,
+	TPS6586X_INT_PLDO_7,
+	TPS6586X_INT_COMP_DET,
+	TPS6586X_INT_ADC,
+	TPS6586X_INT_PLDO_8,
+	TPS6586X_INT_PLDO_9,
+	TPS6586X_INT_PSM_0,
+	TPS6586X_INT_PSM_1,
+	TPS6586X_INT_PSM_2,
+	TPS6586X_INT_PSM_3,
+	TPS6586X_INT_RTC_ALM1,
+	TPS6586X_INT_ACUSB_OVP,
+	TPS6586X_INT_USB_DET,
+	TPS6586X_INT_AC_DET,
+	TPS6586X_INT_BAT_DET,
+	TPS6586X_INT_CHG_STAT,
+	TPS6586X_INT_CHG_TEMP,
+	TPS6586X_INT_PP,
+	TPS6586X_INT_RESUME,
+	TPS6586X_INT_LOW_SYS,
+	TPS6586X_INT_RTC_ALM2,
+};
+
 struct tps6586x_subdev_info {
 	int		id;
 	const char	*name;
@@ -29,6 +59,7 @@ struct tps6586x_platform_data {
 	struct tps6586x_subdev_info *subdevs;
 
 	int gpio_base;
+	int irq_base;
 };
 
 /*
-- 
cgit v1.2.3


From c6252e9ce7f51a2af66bd69c93afb37191467c96 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Sep 2010 14:58:30 +0100
Subject: mfd: Declare abx500_remove_ops()

Otherwise sparse warns about a public symbol with no declaration and
the compiler can't spot if the callers and users have different signatures
for the function.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/abx500.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index be7373c79bea..67bd6f7ecf32 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -229,4 +229,5 @@ struct abx500_ops {
 };
 
 int abx500_register_ops(struct device *core_dev, struct abx500_ops *ops);
+void abx500_remove_ops(struct device *dev);
 #endif
-- 
cgit v1.2.3


From 5f2545fa156f3d4d327038d7664608e146809a3c Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Thu, 30 Sep 2010 21:55:36 +0100
Subject: mfd: Allow for bypass of cell resource conflict check

The upcoming VIA VX855 MFD driver needs to communicate resources
to subdevices where the resources may be claimed by ACPI.

Add a flag to mfd_cell to request that resources are not policed.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mfd-core.c   | 8 +++++---
 include/linux/mfd/core.h | 3 +++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 1823a57b7d8f..d1c8605d4ed4 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -65,9 +65,11 @@ static int mfd_add_device(struct device *parent, int id,
 			res[r].end   = cell->resources[r].end;
 		}
 
-		ret = acpi_check_resource_conflict(res);
-		if (ret)
-			goto fail_res;
+		if (!cell->ignore_resource_conflicts) {
+			ret = acpi_check_resource_conflict(res);
+			if (ret)
+				goto fail_res;
+		}
 	}
 
 	ret = platform_device_add_resources(pdev, res, cell->num_resources);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 11d740b8831d..cb93d80aa642 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -44,6 +44,9 @@ struct mfd_cell {
 	 */
 	int			num_resources;
 	const struct resource	*resources;
+
+	/* don't check for resource conflicts */
+	bool			ignore_resource_conflicts;
 };
 
 extern int mfd_add_devices(struct device *parent, int id,
-- 
cgit v1.2.3


From b4e017e332b873133602f47ae8cacfae64ab82c5 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 28 Sep 2010 16:38:41 +0200
Subject: mfd: Remove deprecated mc13783 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The last user is gone since v2.6.34-rc1~40

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/mc13783.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 0fa44fb8dd26..5f6aff55eeb7 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -35,24 +35,6 @@ int mc13783_irq_status(struct mc13783 *mc13783, int irq,
 		int *enabled, int *pending);
 int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
 
-static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated;
-static inline int mc13783_mask(struct mc13783 *mc13783, int irq)
-{
-	return mc13783_irq_mask(mc13783, irq);
-}
-
-static inline int mc13783_unmask(struct mc13783 *mc13783, int irq) __deprecated;
-static inline int mc13783_unmask(struct mc13783 *mc13783, int irq)
-{
-	return mc13783_irq_unmask(mc13783, irq);
-}
-
-static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq) __deprecated;
-static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq)
-{
-	return mc13783_irq_ack(mc13783, irq);
-}
-
 #define MC13783_ADC0		43
 #define MC13783_ADC0_ADREFEN		(1 << 10)
 #define MC13783_ADC0_ADREFMODE		(1 << 11)
-- 
cgit v1.2.3


From 8e00593557c3c5a7bc6f636412a1cadcf4624232 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 28 Sep 2010 16:37:20 +0200
Subject: mfd: Add mc13892 support to mc13xxx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mc13892 is the companion PMIC for Freescale's i.MX51.  It's similar enough
to mc13782 to support it in a single driver.

This patch introduces enough compatibility cruft to keep all users of the
superseded mc13783 driver unchanged.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   9 +-
 drivers/mfd/Makefile        |   2 +-
 drivers/mfd/mc13783-core.c  | 743 ---------------------------------------
 drivers/mfd/mc13xxx-core.c  | 840 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783.h | 247 ++++++-------
 include/linux/mfd/mc13xxx.h | 154 ++++++++
 6 files changed, 1113 insertions(+), 882 deletions(-)
 delete mode 100644 drivers/mfd/mc13783-core.c
 create mode 100644 drivers/mfd/mc13xxx-core.c
 create mode 100644 include/linux/mfd/mc13xxx.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9735f581574d..6c6b9f02d177 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -409,11 +409,16 @@ config MFD_PCF50633
 	  so that function-specific drivers can bind to them.
 
 config MFD_MC13783
-	tristate "Support Freescale MC13783"
+	tristate
+
+config MFD_MC13XXX
+	tristate "Support Freescale MC13783 and MC13892"
 	depends on SPI_MASTER
 	select MFD_CORE
+	select MFD_MC13783
 	help
-	  Support for the Freescale (Atlas) MC13783 PMIC and audio CODEC.
+	  Support for the Freescale (Atlas) PMIC and audio CODECs
+	  MC13783 and MC13892.
 	  This driver provides common support for accessing  the device,
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d18a6ab2ab58..70b26999dc81 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_TWL4030_CODEC)	+= twl4030-codec.o
 obj-$(CONFIG_TWL6030_PWM)	+= twl6030-pwm.o
 
-obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
+obj-$(CONFIG_MFD_MC13XXX)	+= mc13xxx-core.o
 
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
deleted file mode 100644
index 2506e6888507..000000000000
--- a/drivers/mfd/mc13783-core.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- * Copyright 2009 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- *
- * loosely based on an earlier driver that has
- * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
- */
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-#include <linux/spi/spi.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/mc13783.h>
-
-struct mc13783 {
-	struct spi_device *spidev;
-	struct mutex lock;
-	int irq;
-	int adcflags;
-
-	irq_handler_t irqhandler[MC13783_NUM_IRQ];
-	void *irqdata[MC13783_NUM_IRQ];
-};
-
-#define MC13783_REG_REVISION			 7
-#define MC13783_REG_ADC_0			43
-#define MC13783_REG_ADC_1			44
-#define MC13783_REG_ADC_2			45
-
-#define MC13783_IRQSTAT0	0
-#define MC13783_IRQSTAT0_ADCDONEI	(1 << 0)
-#define MC13783_IRQSTAT0_ADCBISDONEI	(1 << 1)
-#define MC13783_IRQSTAT0_TSI		(1 << 2)
-#define MC13783_IRQSTAT0_WHIGHI		(1 << 3)
-#define MC13783_IRQSTAT0_WLOWI		(1 << 4)
-#define MC13783_IRQSTAT0_CHGDETI	(1 << 6)
-#define MC13783_IRQSTAT0_CHGOVI		(1 << 7)
-#define MC13783_IRQSTAT0_CHGREVI	(1 << 8)
-#define MC13783_IRQSTAT0_CHGSHORTI	(1 << 9)
-#define MC13783_IRQSTAT0_CCCVI		(1 << 10)
-#define MC13783_IRQSTAT0_CHGCURRI	(1 << 11)
-#define MC13783_IRQSTAT0_BPONI		(1 << 12)
-#define MC13783_IRQSTAT0_LOBATLI	(1 << 13)
-#define MC13783_IRQSTAT0_LOBATHI	(1 << 14)
-#define MC13783_IRQSTAT0_UDPI		(1 << 15)
-#define MC13783_IRQSTAT0_USBI		(1 << 16)
-#define MC13783_IRQSTAT0_IDI		(1 << 19)
-#define MC13783_IRQSTAT0_SE1I		(1 << 21)
-#define MC13783_IRQSTAT0_CKDETI		(1 << 22)
-#define MC13783_IRQSTAT0_UDMI		(1 << 23)
-
-#define MC13783_IRQMASK0	1
-#define MC13783_IRQMASK0_ADCDONEM	MC13783_IRQSTAT0_ADCDONEI
-#define MC13783_IRQMASK0_ADCBISDONEM	MC13783_IRQSTAT0_ADCBISDONEI
-#define MC13783_IRQMASK0_TSM		MC13783_IRQSTAT0_TSI
-#define MC13783_IRQMASK0_WHIGHM		MC13783_IRQSTAT0_WHIGHI
-#define MC13783_IRQMASK0_WLOWM		MC13783_IRQSTAT0_WLOWI
-#define MC13783_IRQMASK0_CHGDETM	MC13783_IRQSTAT0_CHGDETI
-#define MC13783_IRQMASK0_CHGOVM		MC13783_IRQSTAT0_CHGOVI
-#define MC13783_IRQMASK0_CHGREVM	MC13783_IRQSTAT0_CHGREVI
-#define MC13783_IRQMASK0_CHGSHORTM	MC13783_IRQSTAT0_CHGSHORTI
-#define MC13783_IRQMASK0_CCCVM		MC13783_IRQSTAT0_CCCVI
-#define MC13783_IRQMASK0_CHGCURRM	MC13783_IRQSTAT0_CHGCURRI
-#define MC13783_IRQMASK0_BPONM		MC13783_IRQSTAT0_BPONI
-#define MC13783_IRQMASK0_LOBATLM	MC13783_IRQSTAT0_LOBATLI
-#define MC13783_IRQMASK0_LOBATHM	MC13783_IRQSTAT0_LOBATHI
-#define MC13783_IRQMASK0_UDPM		MC13783_IRQSTAT0_UDPI
-#define MC13783_IRQMASK0_USBM		MC13783_IRQSTAT0_USBI
-#define MC13783_IRQMASK0_IDM		MC13783_IRQSTAT0_IDI
-#define MC13783_IRQMASK0_SE1M		MC13783_IRQSTAT0_SE1I
-#define MC13783_IRQMASK0_CKDETM		MC13783_IRQSTAT0_CKDETI
-#define MC13783_IRQMASK0_UDMM		MC13783_IRQSTAT0_UDMI
-
-#define MC13783_IRQSTAT1	3
-#define MC13783_IRQSTAT1_1HZI		(1 << 0)
-#define MC13783_IRQSTAT1_TODAI		(1 << 1)
-#define MC13783_IRQSTAT1_ONOFD1I	(1 << 3)
-#define MC13783_IRQSTAT1_ONOFD2I	(1 << 4)
-#define MC13783_IRQSTAT1_ONOFD3I	(1 << 5)
-#define MC13783_IRQSTAT1_SYSRSTI	(1 << 6)
-#define MC13783_IRQSTAT1_RTCRSTI	(1 << 7)
-#define MC13783_IRQSTAT1_PCI		(1 << 8)
-#define MC13783_IRQSTAT1_WARMI		(1 << 9)
-#define MC13783_IRQSTAT1_MEMHLDI	(1 << 10)
-#define MC13783_IRQSTAT1_PWRRDYI	(1 << 11)
-#define MC13783_IRQSTAT1_THWARNLI	(1 << 12)
-#define MC13783_IRQSTAT1_THWARNHI	(1 << 13)
-#define MC13783_IRQSTAT1_CLKI		(1 << 14)
-#define MC13783_IRQSTAT1_SEMAFI		(1 << 15)
-#define MC13783_IRQSTAT1_MC2BI		(1 << 17)
-#define MC13783_IRQSTAT1_HSDETI		(1 << 18)
-#define MC13783_IRQSTAT1_HSLI		(1 << 19)
-#define MC13783_IRQSTAT1_ALSPTHI	(1 << 20)
-#define MC13783_IRQSTAT1_AHSSHORTI	(1 << 21)
-
-#define MC13783_IRQMASK1	4
-#define MC13783_IRQMASK1_1HZM		MC13783_IRQSTAT1_1HZI
-#define MC13783_IRQMASK1_TODAM		MC13783_IRQSTAT1_TODAI
-#define MC13783_IRQMASK1_ONOFD1M	MC13783_IRQSTAT1_ONOFD1I
-#define MC13783_IRQMASK1_ONOFD2M	MC13783_IRQSTAT1_ONOFD2I
-#define MC13783_IRQMASK1_ONOFD3M	MC13783_IRQSTAT1_ONOFD3I
-#define MC13783_IRQMASK1_SYSRSTM	MC13783_IRQSTAT1_SYSRSTI
-#define MC13783_IRQMASK1_RTCRSTM	MC13783_IRQSTAT1_RTCRSTI
-#define MC13783_IRQMASK1_PCM		MC13783_IRQSTAT1_PCI
-#define MC13783_IRQMASK1_WARMM		MC13783_IRQSTAT1_WARMI
-#define MC13783_IRQMASK1_MEMHLDM	MC13783_IRQSTAT1_MEMHLDI
-#define MC13783_IRQMASK1_PWRRDYM	MC13783_IRQSTAT1_PWRRDYI
-#define MC13783_IRQMASK1_THWARNLM	MC13783_IRQSTAT1_THWARNLI
-#define MC13783_IRQMASK1_THWARNHM	MC13783_IRQSTAT1_THWARNHI
-#define MC13783_IRQMASK1_CLKM		MC13783_IRQSTAT1_CLKI
-#define MC13783_IRQMASK1_SEMAFM		MC13783_IRQSTAT1_SEMAFI
-#define MC13783_IRQMASK1_MC2BM		MC13783_IRQSTAT1_MC2BI
-#define MC13783_IRQMASK1_HSDETM		MC13783_IRQSTAT1_HSDETI
-#define MC13783_IRQMASK1_HSLM		MC13783_IRQSTAT1_HSLI
-#define MC13783_IRQMASK1_ALSPTHM	MC13783_IRQSTAT1_ALSPTHI
-#define MC13783_IRQMASK1_AHSSHORTM	MC13783_IRQSTAT1_AHSSHORTI
-
-#define MC13783_ADC1		44
-#define MC13783_ADC1_ADEN		(1 << 0)
-#define MC13783_ADC1_RAND		(1 << 1)
-#define MC13783_ADC1_ADSEL		(1 << 3)
-#define MC13783_ADC1_ASC		(1 << 20)
-#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
-
-#define MC13783_NUMREGS 0x3f
-
-void mc13783_lock(struct mc13783 *mc13783)
-{
-	if (!mutex_trylock(&mc13783->lock)) {
-		dev_dbg(&mc13783->spidev->dev, "wait for %s from %pf\n",
-				__func__, __builtin_return_address(0));
-
-		mutex_lock(&mc13783->lock);
-	}
-	dev_dbg(&mc13783->spidev->dev, "%s from %pf\n",
-			__func__, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(mc13783_lock);
-
-void mc13783_unlock(struct mc13783 *mc13783)
-{
-	dev_dbg(&mc13783->spidev->dev, "%s from %pf\n",
-			__func__, __builtin_return_address(0));
-	mutex_unlock(&mc13783->lock);
-}
-EXPORT_SYMBOL(mc13783_unlock);
-
-#define MC13783_REGOFFSET_SHIFT 25
-int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val)
-{
-	struct spi_transfer t;
-	struct spi_message m;
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&mc13783->lock));
-
-	if (offset > MC13783_NUMREGS)
-		return -EINVAL;
-
-	*val = offset << MC13783_REGOFFSET_SHIFT;
-
-	memset(&t, 0, sizeof(t));
-
-	t.tx_buf = val;
-	t.rx_buf = val;
-	t.len = sizeof(u32);
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t, &m);
-
-	ret = spi_sync(mc13783->spidev, &m);
-
-	/* error in message.status implies error return from spi_sync */
-	BUG_ON(!ret && m.status);
-
-	if (ret)
-		return ret;
-
-	*val &= 0xffffff;
-
-	dev_vdbg(&mc13783->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val);
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_reg_read);
-
-int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val)
-{
-	u32 buf;
-	struct spi_transfer t;
-	struct spi_message m;
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&mc13783->lock));
-
-	dev_vdbg(&mc13783->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val);
-
-	if (offset > MC13783_NUMREGS || val > 0xffffff)
-		return -EINVAL;
-
-	buf = 1 << 31 | offset << MC13783_REGOFFSET_SHIFT | val;
-
-	memset(&t, 0, sizeof(t));
-
-	t.tx_buf = &buf;
-	t.rx_buf = &buf;
-	t.len = sizeof(u32);
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t, &m);
-
-	ret = spi_sync(mc13783->spidev, &m);
-
-	BUG_ON(!ret && m.status);
-
-	if (ret)
-		return ret;
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_reg_write);
-
-int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
-		u32 mask, u32 val)
-{
-	int ret;
-	u32 valread;
-
-	BUG_ON(val & ~mask);
-
-	ret = mc13783_reg_read(mc13783, offset, &valread);
-	if (ret)
-		return ret;
-
-	valread = (valread & ~mask) | val;
-
-	return mc13783_reg_write(mc13783, offset, valread);
-}
-EXPORT_SYMBOL(mc13783_reg_rmw);
-
-int mc13783_get_flags(struct mc13783 *mc13783)
-{
-	struct mc13783_platform_data *pdata =
-		dev_get_platdata(&mc13783->spidev->dev);
-
-	return pdata->flags;
-}
-EXPORT_SYMBOL(mc13783_get_flags);
-
-int mc13783_irq_mask(struct mc13783 *mc13783, int irq)
-{
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13783_NUM_IRQ)
-		return -EINVAL;
-
-	ret = mc13783_reg_read(mc13783, offmask, &mask);
-	if (ret)
-		return ret;
-
-	if (mask & irqbit)
-		/* already masked */
-		return 0;
-
-	return mc13783_reg_write(mc13783, offmask, mask | irqbit);
-}
-EXPORT_SYMBOL(mc13783_irq_mask);
-
-int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
-{
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13783_NUM_IRQ)
-		return -EINVAL;
-
-	ret = mc13783_reg_read(mc13783, offmask, &mask);
-	if (ret)
-		return ret;
-
-	if (!(mask & irqbit))
-		/* already unmasked */
-		return 0;
-
-	return mc13783_reg_write(mc13783, offmask, mask & ~irqbit);
-}
-EXPORT_SYMBOL(mc13783_irq_unmask);
-
-int mc13783_irq_status(struct mc13783 *mc13783, int irq,
-		int *enabled, int *pending)
-{
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
-	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-
-	if (irq < 0 || irq >= MC13783_NUM_IRQ)
-		return -EINVAL;
-
-	if (enabled) {
-		u32 mask;
-
-		ret = mc13783_reg_read(mc13783, offmask, &mask);
-		if (ret)
-			return ret;
-
-		*enabled = mask & irqbit;
-	}
-
-	if (pending) {
-		u32 stat;
-
-		ret = mc13783_reg_read(mc13783, offstat, &stat);
-		if (ret)
-			return ret;
-
-		*pending = stat & irqbit;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_irq_status);
-
-int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
-{
-	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
-	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
-
-	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
-
-	return mc13783_reg_write(mc13783, offstat, val);
-}
-EXPORT_SYMBOL(mc13783_irq_ack);
-
-int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
-		irq_handler_t handler, const char *name, void *dev)
-{
-	BUG_ON(!mutex_is_locked(&mc13783->lock));
-	BUG_ON(!handler);
-
-	if (irq < 0 || irq >= MC13783_NUM_IRQ)
-		return -EINVAL;
-
-	if (mc13783->irqhandler[irq])
-		return -EBUSY;
-
-	mc13783->irqhandler[irq] = handler;
-	mc13783->irqdata[irq] = dev;
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_irq_request_nounmask);
-
-int mc13783_irq_request(struct mc13783 *mc13783, int irq,
-		irq_handler_t handler, const char *name, void *dev)
-{
-	int ret;
-
-	ret = mc13783_irq_request_nounmask(mc13783, irq, handler, name, dev);
-	if (ret)
-		return ret;
-
-	ret = mc13783_irq_unmask(mc13783, irq);
-	if (ret) {
-		mc13783->irqhandler[irq] = NULL;
-		mc13783->irqdata[irq] = NULL;
-		return ret;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_irq_request);
-
-int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
-{
-	int ret;
-	BUG_ON(!mutex_is_locked(&mc13783->lock));
-
-	if (irq < 0 || irq >= MC13783_NUM_IRQ || !mc13783->irqhandler[irq] ||
-			mc13783->irqdata[irq] != dev)
-		return -EINVAL;
-
-	ret = mc13783_irq_mask(mc13783, irq);
-	if (ret)
-		return ret;
-
-	mc13783->irqhandler[irq] = NULL;
-	mc13783->irqdata[irq] = NULL;
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13783_irq_free);
-
-static inline irqreturn_t mc13783_irqhandler(struct mc13783 *mc13783, int irq)
-{
-	return mc13783->irqhandler[irq](irq, mc13783->irqdata[irq]);
-}
-
-/*
- * returns: number of handled irqs or negative error
- * locking: holds mc13783->lock
- */
-static int mc13783_irq_handle(struct mc13783 *mc13783,
-		unsigned int offstat, unsigned int offmask, int baseirq)
-{
-	u32 stat, mask;
-	int ret = mc13783_reg_read(mc13783, offstat, &stat);
-	int num_handled = 0;
-
-	if (ret)
-		return ret;
-
-	ret = mc13783_reg_read(mc13783, offmask, &mask);
-	if (ret)
-		return ret;
-
-	while (stat & ~mask) {
-		int irq = __ffs(stat & ~mask);
-
-		stat &= ~(1 << irq);
-
-		if (likely(mc13783->irqhandler[baseirq + irq])) {
-			irqreturn_t handled;
-
-			handled = mc13783_irqhandler(mc13783, baseirq + irq);
-			if (handled == IRQ_HANDLED)
-				num_handled++;
-		} else {
-			dev_err(&mc13783->spidev->dev,
-					"BUG: irq %u but no handler\n",
-					baseirq + irq);
-
-			mask |= 1 << irq;
-
-			ret = mc13783_reg_write(mc13783, offmask, mask);
-		}
-	}
-
-	return num_handled;
-}
-
-static irqreturn_t mc13783_irq_thread(int irq, void *data)
-{
-	struct mc13783 *mc13783 = data;
-	irqreturn_t ret;
-	int handled = 0;
-
-	mc13783_lock(mc13783);
-
-	ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT0,
-			MC13783_IRQMASK0, MC13783_IRQ_ADCDONE);
-	if (ret > 0)
-		handled = 1;
-
-	ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT1,
-			MC13783_IRQMASK1, MC13783_IRQ_1HZ);
-	if (ret > 0)
-		handled = 1;
-
-	mc13783_unlock(mc13783);
-
-	return IRQ_RETVAL(handled);
-}
-
-#define MC13783_ADC1_CHAN0_SHIFT	5
-#define MC13783_ADC1_CHAN1_SHIFT	8
-
-struct mc13783_adcdone_data {
-	struct mc13783 *mc13783;
-	struct completion done;
-};
-
-static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
-{
-	struct mc13783_adcdone_data *adcdone_data = data;
-
-	mc13783_irq_ack(adcdone_data->mc13783, irq);
-
-	complete_all(&adcdone_data->done);
-
-	return IRQ_HANDLED;
-}
-
-#define MC13783_ADC_WORKING (1 << 0)
-
-int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
-		unsigned int channel, unsigned int *sample)
-{
-	u32 adc0, adc1, old_adc0;
-	int i, ret;
-	struct mc13783_adcdone_data adcdone_data = {
-		.mc13783 = mc13783,
-	};
-	init_completion(&adcdone_data.done);
-
-	dev_dbg(&mc13783->spidev->dev, "%s\n", __func__);
-
-	mc13783_lock(mc13783);
-
-	if (mc13783->adcflags & MC13783_ADC_WORKING) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	mc13783->adcflags |= MC13783_ADC_WORKING;
-
-	mc13783_reg_read(mc13783, MC13783_ADC0, &old_adc0);
-
-	adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
-	adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC;
-
-	if (channel > 7)
-		adc1 |= MC13783_ADC1_ADSEL;
-
-	switch (mode) {
-	case MC13783_ADC_MODE_TS:
-		adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_TSMOD0 |
-			MC13783_ADC0_TSMOD1;
-		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
-		break;
-
-	case MC13783_ADC_MODE_SINGLE_CHAN:
-		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
-		adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
-		adc1 |= MC13783_ADC1_RAND;
-		break;
-
-	case MC13783_ADC_MODE_MULT_CHAN:
-		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
-		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
-		break;
-
-	default:
-		mc13783_unlock(mc13783);
-		return -EINVAL;
-	}
-
-	dev_dbg(&mc13783->spidev->dev, "%s: request irq\n", __func__);
-	mc13783_irq_request(mc13783, MC13783_IRQ_ADCDONE,
-			mc13783_handler_adcdone, __func__, &adcdone_data);
-	mc13783_irq_ack(mc13783, MC13783_IRQ_ADCDONE);
-
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, adc0);
-	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, adc1);
-
-	mc13783_unlock(mc13783);
-
-	ret = wait_for_completion_interruptible_timeout(&adcdone_data.done, HZ);
-
-	if (!ret)
-		ret = -ETIMEDOUT;
-
-	mc13783_lock(mc13783);
-
-	mc13783_irq_free(mc13783, MC13783_IRQ_ADCDONE, &adcdone_data);
-
-	if (ret > 0)
-		for (i = 0; i < 4; ++i) {
-			ret = mc13783_reg_read(mc13783,
-					MC13783_REG_ADC_2, &sample[i]);
-			if (ret)
-				break;
-		}
-
-	if (mode == MC13783_ADC_MODE_TS)
-		/* restore TSMOD */
-		mc13783_reg_write(mc13783, MC13783_REG_ADC_0, old_adc0);
-
-	mc13783->adcflags &= ~MC13783_ADC_WORKING;
-out:
-	mc13783_unlock(mc13783);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
-
-static int mc13783_add_subdevice_pdata(struct mc13783 *mc13783,
-		const char *name, void *pdata, size_t pdata_size)
-{
-	struct mfd_cell cell = {
-		.name = name,
-		.platform_data = pdata,
-		.data_size = pdata_size,
-	};
-
-	return mfd_add_devices(&mc13783->spidev->dev, -1, &cell, 1, NULL, 0);
-}
-
-static int mc13783_add_subdevice(struct mc13783 *mc13783, const char *name)
-{
-	return mc13783_add_subdevice_pdata(mc13783, name, NULL, 0);
-}
-
-static int mc13783_check_revision(struct mc13783 *mc13783)
-{
-	u32 rev_id, rev1, rev2, finid, icid;
-
-	mc13783_reg_read(mc13783, MC13783_REG_REVISION, &rev_id);
-
-	rev1 = (rev_id & 0x018) >> 3;
-	rev2 = (rev_id & 0x007);
-	icid = (rev_id & 0x01C0) >> 6;
-	finid = (rev_id & 0x01E00) >> 9;
-
-	/* Ver 0.2 is actually 3.2a.  Report as 3.2 */
-	if ((rev1 == 0) && (rev2 == 2))
-		rev1 = 3;
-
-	if (rev1 == 0 || icid != 2) {
-		dev_err(&mc13783->spidev->dev, "No MC13783 detected.\n");
-		return -ENODEV;
-	}
-
-	dev_info(&mc13783->spidev->dev,
-			"MC13783 Rev %d.%d FinVer %x detected\n",
-			rev1, rev2, finid);
-
-	return 0;
-}
-
-static int mc13783_probe(struct spi_device *spi)
-{
-	struct mc13783 *mc13783;
-	struct mc13783_platform_data *pdata = dev_get_platdata(&spi->dev);
-	int ret;
-
-	mc13783 = kzalloc(sizeof(*mc13783), GFP_KERNEL);
-	if (!mc13783)
-		return -ENOMEM;
-
-	dev_set_drvdata(&spi->dev, mc13783);
-	spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
-	spi->bits_per_word = 32;
-	spi_setup(spi);
-
-	mc13783->spidev = spi;
-
-	mutex_init(&mc13783->lock);
-	mc13783_lock(mc13783);
-
-	ret = mc13783_check_revision(mc13783);
-	if (ret)
-		goto err_revision;
-
-	/* mask all irqs */
-	ret = mc13783_reg_write(mc13783, MC13783_IRQMASK0, 0x00ffffff);
-	if (ret)
-		goto err_mask;
-
-	ret = mc13783_reg_write(mc13783, MC13783_IRQMASK1, 0x00ffffff);
-	if (ret)
-		goto err_mask;
-
-	ret = request_threaded_irq(spi->irq, NULL, mc13783_irq_thread,
-			IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13783", mc13783);
-
-	if (ret) {
-err_mask:
-err_revision:
-		mutex_unlock(&mc13783->lock);
-		dev_set_drvdata(&spi->dev, NULL);
-		kfree(mc13783);
-		return ret;
-	}
-
-	mc13783_unlock(mc13783);
-
-	if (pdata->flags & MC13783_USE_ADC)
-		mc13783_add_subdevice(mc13783, "mc13783-adc");
-
-	if (pdata->flags & MC13783_USE_CODEC)
-		mc13783_add_subdevice(mc13783, "mc13783-codec");
-
-	if (pdata->flags & MC13783_USE_REGULATOR) {
-		struct mc13783_regulator_platform_data regulator_pdata = {
-			.num_regulators = pdata->num_regulators,
-			.regulators = pdata->regulators,
-		};
-
-		mc13783_add_subdevice_pdata(mc13783, "mc13783-regulator",
-				&regulator_pdata, sizeof(regulator_pdata));
-	}
-
-	if (pdata->flags & MC13783_USE_RTC)
-		mc13783_add_subdevice(mc13783, "mc13783-rtc");
-
-	if (pdata->flags & MC13783_USE_TOUCHSCREEN)
-		mc13783_add_subdevice(mc13783, "mc13783-ts");
-
-	if (pdata->flags & MC13783_USE_LED)
-		mc13783_add_subdevice_pdata(mc13783, "mc13783-led",
-					pdata->leds, sizeof(*pdata->leds));
-
-	return 0;
-}
-
-static int __devexit mc13783_remove(struct spi_device *spi)
-{
-	struct mc13783 *mc13783 = dev_get_drvdata(&spi->dev);
-
-	free_irq(mc13783->spidev->irq, mc13783);
-
-	mfd_remove_devices(&spi->dev);
-
-	return 0;
-}
-
-static struct spi_driver mc13783_driver = {
-	.driver = {
-		.name = "mc13783",
-		.bus = &spi_bus_type,
-		.owner = THIS_MODULE,
-	},
-	.probe = mc13783_probe,
-	.remove = __devexit_p(mc13783_remove),
-};
-
-static int __init mc13783_init(void)
-{
-	return spi_register_driver(&mc13783_driver);
-}
-subsys_initcall(mc13783_init);
-
-static void __exit mc13783_exit(void)
-{
-	spi_unregister_driver(&mc13783_driver);
-}
-module_exit(mc13783_exit);
-
-MODULE_DESCRIPTION("Core driver for Freescale MC13783 PMIC");
-MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
new file mode 100644
index 000000000000..93258adf8b63
--- /dev/null
+++ b/drivers/mfd/mc13xxx-core.c
@@ -0,0 +1,840 @@
+/*
+ * Copyright 2009-2010 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ *
+ * loosely based on an earlier driver that has
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#define DEBUG
+#define VERBOSE_DEBUG
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/mc13xxx.h>
+
+struct mc13xxx {
+	struct spi_device *spidev;
+	struct mutex lock;
+	int irq;
+
+	irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
+	void *irqdata[MC13XXX_NUM_IRQ];
+};
+
+struct mc13783 {
+	struct mc13xxx mc13xxx;
+
+	int adcflags;
+};
+
+struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783)
+{
+	return &mc13783->mc13xxx;
+}
+EXPORT_SYMBOL(mc13783_to_mc13xxx);
+
+#define MC13XXX_IRQSTAT0	0
+#define MC13XXX_IRQSTAT0_ADCDONEI	(1 << 0)
+#define MC13XXX_IRQSTAT0_ADCBISDONEI	(1 << 1)
+#define MC13XXX_IRQSTAT0_TSI		(1 << 2)
+#define MC13783_IRQSTAT0_WHIGHI		(1 << 3)
+#define MC13783_IRQSTAT0_WLOWI		(1 << 4)
+#define MC13XXX_IRQSTAT0_CHGDETI	(1 << 6)
+#define MC13783_IRQSTAT0_CHGOVI		(1 << 7)
+#define MC13XXX_IRQSTAT0_CHGREVI	(1 << 8)
+#define MC13XXX_IRQSTAT0_CHGSHORTI	(1 << 9)
+#define MC13XXX_IRQSTAT0_CCCVI		(1 << 10)
+#define MC13XXX_IRQSTAT0_CHGCURRI	(1 << 11)
+#define MC13XXX_IRQSTAT0_BPONI		(1 << 12)
+#define MC13XXX_IRQSTAT0_LOBATLI	(1 << 13)
+#define MC13XXX_IRQSTAT0_LOBATHI	(1 << 14)
+#define MC13783_IRQSTAT0_UDPI		(1 << 15)
+#define MC13783_IRQSTAT0_USBI		(1 << 16)
+#define MC13783_IRQSTAT0_IDI		(1 << 19)
+#define MC13783_IRQSTAT0_SE1I		(1 << 21)
+#define MC13783_IRQSTAT0_CKDETI		(1 << 22)
+#define MC13783_IRQSTAT0_UDMI		(1 << 23)
+
+#define MC13XXX_IRQMASK0	1
+#define MC13XXX_IRQMASK0_ADCDONEM	MC13XXX_IRQSTAT0_ADCDONEI
+#define MC13XXX_IRQMASK0_ADCBISDONEM	MC13XXX_IRQSTAT0_ADCBISDONEI
+#define MC13XXX_IRQMASK0_TSM		MC13XXX_IRQSTAT0_TSI
+#define MC13783_IRQMASK0_WHIGHM		MC13783_IRQSTAT0_WHIGHI
+#define MC13783_IRQMASK0_WLOWM		MC13783_IRQSTAT0_WLOWI
+#define MC13XXX_IRQMASK0_CHGDETM	MC13XXX_IRQSTAT0_CHGDETI
+#define MC13783_IRQMASK0_CHGOVM		MC13783_IRQSTAT0_CHGOVI
+#define MC13XXX_IRQMASK0_CHGREVM	MC13XXX_IRQSTAT0_CHGREVI
+#define MC13XXX_IRQMASK0_CHGSHORTM	MC13XXX_IRQSTAT0_CHGSHORTI
+#define MC13XXX_IRQMASK0_CCCVM		MC13XXX_IRQSTAT0_CCCVI
+#define MC13XXX_IRQMASK0_CHGCURRM	MC13XXX_IRQSTAT0_CHGCURRI
+#define MC13XXX_IRQMASK0_BPONM		MC13XXX_IRQSTAT0_BPONI
+#define MC13XXX_IRQMASK0_LOBATLM	MC13XXX_IRQSTAT0_LOBATLI
+#define MC13XXX_IRQMASK0_LOBATHM	MC13XXX_IRQSTAT0_LOBATHI
+#define MC13783_IRQMASK0_UDPM		MC13783_IRQSTAT0_UDPI
+#define MC13783_IRQMASK0_USBM		MC13783_IRQSTAT0_USBI
+#define MC13783_IRQMASK0_IDM		MC13783_IRQSTAT0_IDI
+#define MC13783_IRQMASK0_SE1M		MC13783_IRQSTAT0_SE1I
+#define MC13783_IRQMASK0_CKDETM		MC13783_IRQSTAT0_CKDETI
+#define MC13783_IRQMASK0_UDMM		MC13783_IRQSTAT0_UDMI
+
+#define MC13XXX_IRQSTAT1	3
+#define MC13XXX_IRQSTAT1_1HZI		(1 << 0)
+#define MC13XXX_IRQSTAT1_TODAI		(1 << 1)
+#define MC13783_IRQSTAT1_ONOFD1I	(1 << 3)
+#define MC13783_IRQSTAT1_ONOFD2I	(1 << 4)
+#define MC13783_IRQSTAT1_ONOFD3I	(1 << 5)
+#define MC13XXX_IRQSTAT1_SYSRSTI	(1 << 6)
+#define MC13XXX_IRQSTAT1_RTCRSTI	(1 << 7)
+#define MC13XXX_IRQSTAT1_PCI		(1 << 8)
+#define MC13XXX_IRQSTAT1_WARMI		(1 << 9)
+#define MC13XXX_IRQSTAT1_MEMHLDI	(1 << 10)
+#define MC13783_IRQSTAT1_PWRRDYI	(1 << 11)
+#define MC13XXX_IRQSTAT1_THWARNLI	(1 << 12)
+#define MC13XXX_IRQSTAT1_THWARNHI	(1 << 13)
+#define MC13XXX_IRQSTAT1_CLKI		(1 << 14)
+#define MC13783_IRQSTAT1_SEMAFI		(1 << 15)
+#define MC13783_IRQSTAT1_MC2BI		(1 << 17)
+#define MC13783_IRQSTAT1_HSDETI		(1 << 18)
+#define MC13783_IRQSTAT1_HSLI		(1 << 19)
+#define MC13783_IRQSTAT1_ALSPTHI	(1 << 20)
+#define MC13783_IRQSTAT1_AHSSHORTI	(1 << 21)
+
+#define MC13XXX_IRQMASK1	4
+#define MC13XXX_IRQMASK1_1HZM		MC13XXX_IRQSTAT1_1HZI
+#define MC13XXX_IRQMASK1_TODAM		MC13XXX_IRQSTAT1_TODAI
+#define MC13783_IRQMASK1_ONOFD1M	MC13783_IRQSTAT1_ONOFD1I
+#define MC13783_IRQMASK1_ONOFD2M	MC13783_IRQSTAT1_ONOFD2I
+#define MC13783_IRQMASK1_ONOFD3M	MC13783_IRQSTAT1_ONOFD3I
+#define MC13XXX_IRQMASK1_SYSRSTM	MC13XXX_IRQSTAT1_SYSRSTI
+#define MC13XXX_IRQMASK1_RTCRSTM	MC13XXX_IRQSTAT1_RTCRSTI
+#define MC13XXX_IRQMASK1_PCM		MC13XXX_IRQSTAT1_PCI
+#define MC13XXX_IRQMASK1_WARMM		MC13XXX_IRQSTAT1_WARMI
+#define MC13XXX_IRQMASK1_MEMHLDM	MC13XXX_IRQSTAT1_MEMHLDI
+#define MC13783_IRQMASK1_PWRRDYM	MC13783_IRQSTAT1_PWRRDYI
+#define MC13XXX_IRQMASK1_THWARNLM	MC13XXX_IRQSTAT1_THWARNLI
+#define MC13XXX_IRQMASK1_THWARNHM	MC13XXX_IRQSTAT1_THWARNHI
+#define MC13XXX_IRQMASK1_CLKM		MC13XXX_IRQSTAT1_CLKI
+#define MC13783_IRQMASK1_SEMAFM		MC13783_IRQSTAT1_SEMAFI
+#define MC13783_IRQMASK1_MC2BM		MC13783_IRQSTAT1_MC2BI
+#define MC13783_IRQMASK1_HSDETM		MC13783_IRQSTAT1_HSDETI
+#define MC13783_IRQMASK1_HSLM		MC13783_IRQSTAT1_HSLI
+#define MC13783_IRQMASK1_ALSPTHM	MC13783_IRQSTAT1_ALSPTHI
+#define MC13783_IRQMASK1_AHSSHORTM	MC13783_IRQSTAT1_AHSSHORTI
+
+#define MC13XXX_REVISION	7
+#define MC13XXX_REVISION_REVMETAL	(0x07 <<  0)
+#define MC13XXX_REVISION_REVFULL	(0x03 <<  3)
+#define MC13XXX_REVISION_ICID		(0x07 <<  6)
+#define MC13XXX_REVISION_FIN		(0x03 <<  9)
+#define MC13XXX_REVISION_FAB		(0x03 << 11)
+#define MC13XXX_REVISION_ICIDCODE	(0x3f << 13)
+
+#define MC13783_ADC1		44
+#define MC13783_ADC1_ADEN		(1 << 0)
+#define MC13783_ADC1_RAND		(1 << 1)
+#define MC13783_ADC1_ADSEL		(1 << 3)
+#define MC13783_ADC1_ASC		(1 << 20)
+#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
+
+#define MC13783_ADC2		45
+
+#define MC13XXX_NUMREGS 0x3f
+
+void mc13xxx_lock(struct mc13xxx *mc13xxx)
+{
+	if (!mutex_trylock(&mc13xxx->lock)) {
+		dev_dbg(&mc13xxx->spidev->dev, "wait for %s from %pf\n",
+				__func__, __builtin_return_address(0));
+
+		mutex_lock(&mc13xxx->lock);
+	}
+	dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n",
+			__func__, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(mc13xxx_lock);
+
+void mc13xxx_unlock(struct mc13xxx *mc13xxx)
+{
+	dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n",
+			__func__, __builtin_return_address(0));
+	mutex_unlock(&mc13xxx->lock);
+}
+EXPORT_SYMBOL(mc13xxx_unlock);
+
+#define MC13XXX_REGOFFSET_SHIFT 25
+int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val)
+{
+	struct spi_transfer t;
+	struct spi_message m;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+
+	if (offset > MC13XXX_NUMREGS)
+		return -EINVAL;
+
+	*val = offset << MC13XXX_REGOFFSET_SHIFT;
+
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = val;
+	t.rx_buf = val;
+	t.len = sizeof(u32);
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t, &m);
+
+	ret = spi_sync(mc13xxx->spidev, &m);
+
+	/* error in message.status implies error return from spi_sync */
+	BUG_ON(!ret && m.status);
+
+	if (ret)
+		return ret;
+
+	*val &= 0xffffff;
+
+	dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val);
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_reg_read);
+
+int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val)
+{
+	u32 buf;
+	struct spi_transfer t;
+	struct spi_message m;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+
+	dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val);
+
+	if (offset > MC13XXX_NUMREGS || val > 0xffffff)
+		return -EINVAL;
+
+	buf = 1 << 31 | offset << MC13XXX_REGOFFSET_SHIFT | val;
+
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = &buf;
+	t.rx_buf = &buf;
+	t.len = sizeof(u32);
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t, &m);
+
+	ret = spi_sync(mc13xxx->spidev, &m);
+
+	BUG_ON(!ret && m.status);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_reg_write);
+
+int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset,
+		u32 mask, u32 val)
+{
+	int ret;
+	u32 valread;
+
+	BUG_ON(val & ~mask);
+
+	ret = mc13xxx_reg_read(mc13xxx, offset, &valread);
+	if (ret)
+		return ret;
+
+	valread = (valread & ~mask) | val;
+
+	return mc13xxx_reg_write(mc13xxx, offset, valread);
+}
+EXPORT_SYMBOL(mc13xxx_reg_rmw);
+
+int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq)
+{
+	int ret;
+	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+	u32 mask;
+
+	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+		return -EINVAL;
+
+	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
+	if (ret)
+		return ret;
+
+	if (mask & irqbit)
+		/* already masked */
+		return 0;
+
+	return mc13xxx_reg_write(mc13xxx, offmask, mask | irqbit);
+}
+EXPORT_SYMBOL(mc13xxx_irq_mask);
+
+int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq)
+{
+	int ret;
+	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+	u32 mask;
+
+	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+		return -EINVAL;
+
+	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
+	if (ret)
+		return ret;
+
+	if (!(mask & irqbit))
+		/* already unmasked */
+		return 0;
+
+	return mc13xxx_reg_write(mc13xxx, offmask, mask & ~irqbit);
+}
+EXPORT_SYMBOL(mc13xxx_irq_unmask);
+
+int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
+		int *enabled, int *pending)
+{
+	int ret;
+	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
+	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+
+	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+		return -EINVAL;
+
+	if (enabled) {
+		u32 mask;
+
+		ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
+		if (ret)
+			return ret;
+
+		*enabled = mask & irqbit;
+	}
+
+	if (pending) {
+		u32 stat;
+
+		ret = mc13xxx_reg_read(mc13xxx, offstat, &stat);
+		if (ret)
+			return ret;
+
+		*pending = stat & irqbit;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_irq_status);
+
+int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
+{
+	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
+	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
+
+	BUG_ON(irq < 0 || irq >= MC13XXX_NUM_IRQ);
+
+	return mc13xxx_reg_write(mc13xxx, offstat, val);
+}
+EXPORT_SYMBOL(mc13xxx_irq_ack);
+
+int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
+		irq_handler_t handler, const char *name, void *dev)
+{
+	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+	BUG_ON(!handler);
+
+	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+		return -EINVAL;
+
+	if (mc13xxx->irqhandler[irq])
+		return -EBUSY;
+
+	mc13xxx->irqhandler[irq] = handler;
+	mc13xxx->irqdata[irq] = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_irq_request_nounmask);
+
+int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
+		irq_handler_t handler, const char *name, void *dev)
+{
+	int ret;
+
+	ret = mc13xxx_irq_request_nounmask(mc13xxx, irq, handler, name, dev);
+	if (ret)
+		return ret;
+
+	ret = mc13xxx_irq_unmask(mc13xxx, irq);
+	if (ret) {
+		mc13xxx->irqhandler[irq] = NULL;
+		mc13xxx->irqdata[irq] = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_irq_request);
+
+int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev)
+{
+	int ret;
+	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+
+	if (irq < 0 || irq >= MC13XXX_NUM_IRQ || !mc13xxx->irqhandler[irq] ||
+			mc13xxx->irqdata[irq] != dev)
+		return -EINVAL;
+
+	ret = mc13xxx_irq_mask(mc13xxx, irq);
+	if (ret)
+		return ret;
+
+	mc13xxx->irqhandler[irq] = NULL;
+	mc13xxx->irqdata[irq] = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13xxx_irq_free);
+
+static inline irqreturn_t mc13xxx_irqhandler(struct mc13xxx *mc13xxx, int irq)
+{
+	return mc13xxx->irqhandler[irq](irq, mc13xxx->irqdata[irq]);
+}
+
+/*
+ * returns: number of handled irqs or negative error
+ * locking: holds mc13xxx->lock
+ */
+static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx,
+		unsigned int offstat, unsigned int offmask, int baseirq)
+{
+	u32 stat, mask;
+	int ret = mc13xxx_reg_read(mc13xxx, offstat, &stat);
+	int num_handled = 0;
+
+	if (ret)
+		return ret;
+
+	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
+	if (ret)
+		return ret;
+
+	while (stat & ~mask) {
+		int irq = __ffs(stat & ~mask);
+
+		stat &= ~(1 << irq);
+
+		if (likely(mc13xxx->irqhandler[baseirq + irq])) {
+			irqreturn_t handled;
+
+			handled = mc13xxx_irqhandler(mc13xxx, baseirq + irq);
+			if (handled == IRQ_HANDLED)
+				num_handled++;
+		} else {
+			dev_err(&mc13xxx->spidev->dev,
+					"BUG: irq %u but no handler\n",
+					baseirq + irq);
+
+			mask |= 1 << irq;
+
+			ret = mc13xxx_reg_write(mc13xxx, offmask, mask);
+		}
+	}
+
+	return num_handled;
+}
+
+static irqreturn_t mc13xxx_irq_thread(int irq, void *data)
+{
+	struct mc13xxx *mc13xxx = data;
+	irqreturn_t ret;
+	int handled = 0;
+
+	mc13xxx_lock(mc13xxx);
+
+	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT0,
+			MC13XXX_IRQMASK0, 0);
+	if (ret > 0)
+		handled = 1;
+
+	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT1,
+			MC13XXX_IRQMASK1, 24);
+	if (ret > 0)
+		handled = 1;
+
+	mc13xxx_unlock(mc13xxx);
+
+	return IRQ_RETVAL(handled);
+}
+
+enum mc13xxx_id {
+	MC13XXX_ID_MC13783,
+	MC13XXX_ID_MC13892,
+	MC13XXX_ID_INVALID,
+};
+
+const char *mc13xxx_chipname[] = {
+	[MC13XXX_ID_MC13783] = "mc13783",
+	[MC13XXX_ID_MC13892] = "mc13892",
+};
+
+#define maskval(reg, mask)	(((reg) & (mask)) >> __ffs(mask))
+static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id)
+{
+	u32 icid;
+	u32 revision;
+	const char *name;
+	int ret;
+
+	ret = mc13xxx_reg_read(mc13xxx, 46, &icid);
+	if (ret)
+		return ret;
+
+	icid = (icid >> 6) & 0x7;
+
+	switch (icid) {
+	case 2:
+		*id = MC13XXX_ID_MC13783;
+		name = "mc13783";
+		break;
+	case 7:
+		*id = MC13XXX_ID_MC13892;
+		name = "mc13892";
+		break;
+	default:
+		*id = MC13XXX_ID_INVALID;
+		break;
+	}
+
+	if (*id == MC13XXX_ID_MC13783 || *id == MC13XXX_ID_MC13892) {
+		ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision);
+		if (ret)
+			return ret;
+
+		dev_info(&mc13xxx->spidev->dev, "%s: rev: %d.%d, "
+				"fin: %d, fab: %d, icid: %d/%d\n",
+				mc13xxx_chipname[*id],
+				maskval(revision, MC13XXX_REVISION_REVFULL),
+				maskval(revision, MC13XXX_REVISION_REVMETAL),
+				maskval(revision, MC13XXX_REVISION_FIN),
+				maskval(revision, MC13XXX_REVISION_FAB),
+				maskval(revision, MC13XXX_REVISION_ICID),
+				maskval(revision, MC13XXX_REVISION_ICIDCODE));
+	}
+
+	if (*id != MC13XXX_ID_INVALID) {
+		const struct spi_device_id *devid =
+			spi_get_device_id(mc13xxx->spidev);
+		if (!devid || devid->driver_data != *id)
+			dev_warn(&mc13xxx->spidev->dev, "device id doesn't "
+					"match auto detection!\n");
+	}
+
+	return 0;
+}
+
+static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx)
+{
+	const struct spi_device_id *devid =
+		spi_get_device_id(mc13xxx->spidev);
+
+	if (!devid)
+		return NULL;
+
+	return mc13xxx_chipname[devid->driver_data];
+}
+
+#include <linux/mfd/mc13783.h>
+
+int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
+{
+	struct mc13xxx_platform_data *pdata =
+		dev_get_platdata(&mc13xxx->spidev->dev);
+
+	return pdata->flags;
+}
+EXPORT_SYMBOL(mc13xxx_get_flags);
+
+#define MC13783_ADC1_CHAN0_SHIFT	5
+#define MC13783_ADC1_CHAN1_SHIFT	8
+
+struct mc13xxx_adcdone_data {
+	struct mc13xxx *mc13xxx;
+	struct completion done;
+};
+
+static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
+{
+	struct mc13xxx_adcdone_data *adcdone_data = data;
+
+	mc13xxx_irq_ack(adcdone_data->mc13xxx, irq);
+
+	complete_all(&adcdone_data->done);
+
+	return IRQ_HANDLED;
+}
+
+#define MC13783_ADC_WORKING (1 << 0)
+
+int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+		unsigned int channel, unsigned int *sample)
+{
+	struct mc13xxx *mc13xxx = &mc13783->mc13xxx;
+	u32 adc0, adc1, old_adc0;
+	int i, ret;
+	struct mc13xxx_adcdone_data adcdone_data = {
+		.mc13xxx = mc13xxx,
+	};
+	init_completion(&adcdone_data.done);
+
+	dev_dbg(&mc13xxx->spidev->dev, "%s\n", __func__);
+
+	mc13xxx_lock(mc13xxx);
+
+	if (mc13783->adcflags & MC13783_ADC_WORKING) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	mc13783->adcflags |= MC13783_ADC_WORKING;
+
+	mc13xxx_reg_read(mc13xxx, MC13783_ADC0, &old_adc0);
+
+	adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
+	adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC;
+
+	if (channel > 7)
+		adc1 |= MC13783_ADC1_ADSEL;
+
+	switch (mode) {
+	case MC13783_ADC_MODE_TS:
+		adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_TSMOD0 |
+			MC13783_ADC0_TSMOD1;
+		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+
+	case MC13783_ADC_MODE_SINGLE_CHAN:
+		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
+		adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
+		adc1 |= MC13783_ADC1_RAND;
+		break;
+
+	case MC13783_ADC_MODE_MULT_CHAN:
+		adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK;
+		adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+
+	default:
+		mc13783_unlock(mc13783);
+		return -EINVAL;
+	}
+
+	dev_dbg(&mc13783->mc13xxx.spidev->dev, "%s: request irq\n", __func__);
+	mc13xxx_irq_request(mc13xxx, MC13783_IRQ_ADCDONE,
+			mc13783_handler_adcdone, __func__, &adcdone_data);
+	mc13xxx_irq_ack(mc13xxx, MC13783_IRQ_ADCDONE);
+
+	mc13xxx_reg_write(mc13xxx, MC13783_ADC0, adc0);
+	mc13xxx_reg_write(mc13xxx, MC13783_ADC1, adc1);
+
+	mc13xxx_unlock(mc13xxx);
+
+	ret = wait_for_completion_interruptible_timeout(&adcdone_data.done, HZ);
+
+	if (!ret)
+		ret = -ETIMEDOUT;
+
+	mc13xxx_lock(mc13xxx);
+
+	mc13xxx_irq_free(mc13xxx, MC13783_IRQ_ADCDONE, &adcdone_data);
+
+	if (ret > 0)
+		for (i = 0; i < 4; ++i) {
+			ret = mc13xxx_reg_read(mc13xxx,
+					MC13783_ADC2, &sample[i]);
+			if (ret)
+				break;
+		}
+
+	if (mode == MC13783_ADC_MODE_TS)
+		/* restore TSMOD */
+		mc13xxx_reg_write(mc13xxx, MC13783_ADC0, old_adc0);
+
+	mc13783->adcflags &= ~MC13783_ADC_WORKING;
+out:
+	mc13xxx_unlock(mc13xxx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
+
+static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
+		const char *format, void *pdata, size_t pdata_size)
+{
+	char buf[30];
+	const char *name = mc13xxx_get_chipname(mc13xxx);
+
+	struct mfd_cell cell = {
+		.platform_data = pdata,
+		.data_size = pdata_size,
+	};
+
+	/* there is no asnprintf in the kernel :-( */
+	if (snprintf(buf, sizeof(buf), format, name) > sizeof(buf))
+		return -E2BIG;
+
+	cell.name = kmemdup(buf, strlen(buf) + 1, GFP_KERNEL);
+	if (!cell.name)
+		return -ENOMEM;
+
+	return mfd_add_devices(&mc13xxx->spidev->dev, -1, &cell, 1, NULL, 0);
+}
+
+static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
+{
+	return mc13xxx_add_subdevice_pdata(mc13xxx, format, NULL, 0);
+}
+
+static int mc13xxx_probe(struct spi_device *spi)
+{
+	struct mc13xxx *mc13xxx;
+	struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev);
+	enum mc13xxx_id id;
+	int ret;
+
+	mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
+	if (!mc13xxx)
+		return -ENOMEM;
+
+	dev_set_drvdata(&spi->dev, mc13xxx);
+	spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
+	spi->bits_per_word = 32;
+	spi_setup(spi);
+
+	mc13xxx->spidev = spi;
+
+	mutex_init(&mc13xxx->lock);
+	mc13xxx_lock(mc13xxx);
+
+	ret = mc13xxx_identify(mc13xxx, &id);
+	if (ret || id == MC13XXX_ID_INVALID)
+		goto err_revision;
+
+	/* mask all irqs */
+	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff);
+	if (ret)
+		goto err_mask;
+
+	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK1, 0x00ffffff);
+	if (ret)
+		goto err_mask;
+
+	ret = request_threaded_irq(spi->irq, NULL, mc13xxx_irq_thread,
+			IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx);
+
+	if (ret) {
+err_mask:
+err_revision:
+		mutex_unlock(&mc13xxx->lock);
+		dev_set_drvdata(&spi->dev, NULL);
+		kfree(mc13xxx);
+		return ret;
+	}
+
+	mc13xxx_unlock(mc13xxx);
+
+	if (pdata->flags & MC13XXX_USE_ADC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
+
+	if (pdata->flags & MC13XXX_USE_CODEC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-codec");
+
+	if (pdata->flags & MC13XXX_USE_REGULATOR) {
+		struct mc13xxx_regulator_platform_data regulator_pdata = {
+			.num_regulators = pdata->num_regulators,
+			.regulators = pdata->regulators,
+		};
+
+		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
+				&regulator_pdata, sizeof(regulator_pdata));
+	}
+
+	if (pdata->flags & MC13XXX_USE_RTC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
+
+	if (pdata->flags & MC13XXX_USE_TOUCHSCREEN)
+		mc13xxx_add_subdevice(mc13xxx, "%s-ts");
+
+	if (pdata->flags & MC13XXX_USE_LED) {
+		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-led",
+					pdata->leds, sizeof(*pdata->leds));
+	}
+
+	return 0;
+}
+
+static int __devexit mc13xxx_remove(struct spi_device *spi)
+{
+	struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev);
+
+	free_irq(mc13xxx->spidev->irq, mc13xxx);
+
+	mfd_remove_devices(&spi->dev);
+
+	return 0;
+}
+
+static const struct spi_device_id mc13xxx_device_id[] = {
+	{
+		.name = "mc13783",
+		.driver_data = MC13XXX_ID_MC13783,
+	}, {
+		.name = "mc13892",
+		.driver_data = MC13XXX_ID_MC13892,
+	}, {
+		/* sentinel */
+	}
+};
+
+static struct spi_driver mc13xxx_driver = {
+	.id_table = mc13xxx_device_id,
+	.driver = {
+		.name = "mc13xxx",
+		.bus = &spi_bus_type,
+		.owner = THIS_MODULE,
+	},
+	.probe = mc13xxx_probe,
+	.remove = __devexit_p(mc13xxx_remove),
+};
+
+static int __init mc13xxx_init(void)
+{
+	return spi_register_driver(&mc13xxx_driver);
+}
+subsys_initcall(mc13xxx_init);
+
+static void __exit mc13xxx_exit(void)
+{
+	spi_unregister_driver(&mc13xxx_driver);
+}
+module_exit(mc13xxx_exit);
+
+MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC");
+MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 5f6aff55eeb7..b4c741e352c2 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009 Pengutronix
+ * Copyright 2009-2010 Pengutronix
  * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
  *
  * This program is free software; you can redistribute it and/or modify it under
@@ -9,31 +9,84 @@
 #ifndef __LINUX_MFD_MC13783_H
 #define __LINUX_MFD_MC13783_H
 
-#include <linux/interrupt.h>
+#include <linux/mfd/mc13xxx.h>
 
 struct mc13783;
 
-void mc13783_lock(struct mc13783 *mc13783);
-void mc13783_unlock(struct mc13783 *mc13783);
-
-int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val);
-int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val);
-int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
-		u32 mask, u32 val);
-
-int mc13783_get_flags(struct mc13783 *mc13783);
-
-int mc13783_irq_request(struct mc13783 *mc13783, int irq,
-		irq_handler_t handler, const char *name, void *dev);
-int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
-		irq_handler_t handler, const char *name, void *dev);
-int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
-
-int mc13783_irq_mask(struct mc13783 *mc13783, int irq);
-int mc13783_irq_unmask(struct mc13783 *mc13783, int irq);
-int mc13783_irq_status(struct mc13783 *mc13783, int irq,
-		int *enabled, int *pending);
-int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
+struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783);
+
+static inline void mc13783_lock(struct mc13783 *mc13783)
+{
+	mc13xxx_lock(mc13783_to_mc13xxx(mc13783));
+}
+
+static inline void mc13783_unlock(struct mc13783 *mc13783)
+{
+	mc13xxx_unlock(mc13783_to_mc13xxx(mc13783));
+}
+
+static inline int mc13783_reg_read(struct mc13783 *mc13783,
+		unsigned int offset, u32 *val)
+{
+	return mc13xxx_reg_read(mc13783_to_mc13xxx(mc13783), offset, val);
+}
+
+static inline int mc13783_reg_write(struct mc13783 *mc13783,
+		unsigned int offset, u32 val)
+{
+	return mc13xxx_reg_write(mc13783_to_mc13xxx(mc13783), offset, val);
+}
+
+static inline int mc13783_reg_rmw(struct mc13783 *mc13783,
+		unsigned int offset, u32 mask, u32 val)
+{
+	return mc13xxx_reg_rmw(mc13783_to_mc13xxx(mc13783), offset, mask, val);
+}
+
+static inline int mc13783_get_flags(struct mc13783 *mc13783)
+{
+	return mc13xxx_get_flags(mc13783_to_mc13xxx(mc13783));
+}
+
+static inline int mc13783_irq_request(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev)
+{
+	return mc13xxx_irq_request(mc13783_to_mc13xxx(mc13783), irq,
+			handler, name, dev);
+}
+
+static inline int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
+		irq_handler_t handler, const char *name, void *dev)
+{
+	return mc13xxx_irq_request_nounmask(mc13783_to_mc13xxx(mc13783), irq,
+			handler, name, dev);
+}
+
+static inline int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
+{
+	return mc13xxx_irq_free(mc13783_to_mc13xxx(mc13783), irq, dev);
+}
+
+static inline int mc13783_irq_mask(struct mc13783 *mc13783, int irq)
+{
+	return mc13xxx_irq_mask(mc13783_to_mc13xxx(mc13783), irq);
+}
+
+static inline int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
+{
+	return mc13xxx_irq_unmask(mc13783_to_mc13xxx(mc13783), irq);
+}
+static inline int mc13783_irq_status(struct mc13783 *mc13783, int irq,
+		int *enabled, int *pending)
+{
+	return mc13xxx_irq_status(mc13783_to_mc13xxx(mc13783),
+			irq, enabled, pending);
+}
+
+static inline int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
+{
+	return mc13xxx_irq_ack(mc13783_to_mc13xxx(mc13783), irq);
+}
 
 #define MC13783_ADC0		43
 #define MC13783_ADC0_ADREFEN		(1 << 10)
@@ -48,96 +101,18 @@ int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
 					MC13783_ADC0_TSMOD1 | \
 					MC13783_ADC0_TSMOD2)
 
-struct mc13783_led_platform_data {
-#define MC13783_LED_MD		0
-#define MC13783_LED_AD		1
-#define MC13783_LED_KP		2
-#define MC13783_LED_R1		3
-#define MC13783_LED_G1		4
-#define MC13783_LED_B1		5
-#define MC13783_LED_R2		6
-#define MC13783_LED_G2		7
-#define MC13783_LED_B2		8
-#define MC13783_LED_R3		9
-#define MC13783_LED_G3		10
-#define MC13783_LED_B3		11
-#define MC13783_LED_MAX MC13783_LED_B3
-	int id;
-	const char *name;
-	const char *default_trigger;
-
-/* Three or two bits current selection depending on the led */
-	char max_current;
-};
-
-struct mc13783_leds_platform_data {
-	int num_leds;
-	struct mc13783_led_platform_data *led;
-
-#define MC13783_LED_TRIODE_MD	(1 << 0)
-#define MC13783_LED_TRIODE_AD	(1 << 1)
-#define MC13783_LED_TRIODE_KP	(1 << 2)
-#define MC13783_LED_BOOST_EN	(1 << 3)
-#define MC13783_LED_TC1HALF	(1 << 4)
-#define MC13783_LED_SLEWLIMTC	(1 << 5)
-#define MC13783_LED_SLEWLIMBL	(1 << 6)
-#define MC13783_LED_TRIODE_TC1	(1 << 7)
-#define MC13783_LED_TRIODE_TC2	(1 << 8)
-#define MC13783_LED_TRIODE_TC3	(1 << 9)
-	int flags;
-
-#define MC13783_LED_AB_DISABLED		0
-#define MC13783_LED_AB_MD1		1
-#define MC13783_LED_AB_MD12		2
-#define MC13783_LED_AB_MD123		3
-#define MC13783_LED_AB_MD1234		4
-#define MC13783_LED_AB_MD1234_AD1	5
-#define MC13783_LED_AB_MD1234_AD12	6
-#define MC13783_LED_AB_MD1_AD		7
-	char abmode;
-
-#define MC13783_LED_ABREF_200MV	0
-#define MC13783_LED_ABREF_400MV	1
-#define MC13783_LED_ABREF_600MV	2
-#define MC13783_LED_ABREF_800MV	3
-	char abref;
-
-#define MC13783_LED_PERIOD_10MS		0
-#define MC13783_LED_PERIOD_100MS	1
-#define MC13783_LED_PERIOD_500MS	2
-#define MC13783_LED_PERIOD_2S		3
-	char bl_period;
-	char tc1_period;
-	char tc2_period;
-	char tc3_period;
-};
-
-/* to be cleaned up */
-struct regulator_init_data;
-
-struct mc13783_regulator_init_data {
-	int id;
-	struct regulator_init_data *init_data;
-};
-
-struct mc13783_regulator_platform_data {
-	int num_regulators;
-	struct mc13783_regulator_init_data *regulators;
-};
-
-struct mc13783_platform_data {
-	int num_regulators;
-	struct mc13783_regulator_init_data *regulators;
-	struct mc13783_leds_platform_data *leds;
-
-#define MC13783_USE_TOUCHSCREEN (1 << 0)
-#define MC13783_USE_CODEC	(1 << 1)
-#define MC13783_USE_ADC		(1 << 2)
-#define MC13783_USE_RTC		(1 << 3)
-#define MC13783_USE_REGULATOR	(1 << 4)
-#define MC13783_USE_LED		(1 << 5)
-	unsigned int flags;
-};
+#define mc13783_regulator_init_data mc13xxx_regulator_init_data
+#define mc13783_regulator_platform_data mc13xxx_regulator_platform_data
+#define mc13783_led_platform_data mc13xxx_led_platform_data
+#define mc13783_leds_platform_data mc13xxx_leds_platform_data
+
+#define mc13783_platform_data mc13xxx_platform_data
+#define MC13783_USE_TOUCHSCREEN	MC13XXX_USE_TOUCHSCREEN
+#define MC13783_USE_CODEC	MC13XXX_USE_CODEC
+#define MC13783_USE_ADC		MC13XXX_USE_ADC
+#define MC13783_USE_RTC		MC13XXX_USE_RTC
+#define MC13783_USE_REGULATOR	MC13XXX_USE_REGULATOR
+#define MC13783_USE_LED		MC13XXX_USE_LED
 
 #define MC13783_ADC_MODE_TS		1
 #define MC13783_ADC_MODE_SINGLE_CHAN	2
@@ -181,46 +156,46 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 #define	MC13783_REGU_PWGT1SPI	31
 #define	MC13783_REGU_PWGT2SPI	32
 
-#define MC13783_IRQ_ADCDONE	0
-#define MC13783_IRQ_ADCBISDONE	1
-#define MC13783_IRQ_TS		2
+#define MC13783_IRQ_ADCDONE	MC13XXX_IRQ_ADCDONE
+#define MC13783_IRQ_ADCBISDONE	MC13XXX_IRQ_ADCBISDONE
+#define MC13783_IRQ_TS		MC13XXX_IRQ_TS
 #define MC13783_IRQ_WHIGH	3
 #define MC13783_IRQ_WLOW	4
-#define MC13783_IRQ_CHGDET	6
+#define MC13783_IRQ_CHGDET	MC13XXX_IRQ_CHGDET
 #define MC13783_IRQ_CHGOV	7
-#define MC13783_IRQ_CHGREV	8
-#define MC13783_IRQ_CHGSHORT	9
-#define MC13783_IRQ_CCCV	10
-#define MC13783_IRQ_CHGCURR	11
-#define MC13783_IRQ_BPON	12
-#define MC13783_IRQ_LOBATL	13
-#define MC13783_IRQ_LOBATH	14
+#define MC13783_IRQ_CHGREV	MC13XXX_IRQ_CHGREV
+#define MC13783_IRQ_CHGSHORT	MC13XXX_IRQ_CHGSHORT
+#define MC13783_IRQ_CCCV	MC13XXX_IRQ_CCCV
+#define MC13783_IRQ_CHGCURR	MC13XXX_IRQ_CHGCURR
+#define MC13783_IRQ_BPON	MC13XXX_IRQ_BPON
+#define MC13783_IRQ_LOBATL	MC13XXX_IRQ_LOBATL
+#define MC13783_IRQ_LOBATH	MC13XXX_IRQ_LOBATH
 #define MC13783_IRQ_UDP		15
 #define MC13783_IRQ_USB		16
 #define MC13783_IRQ_ID		19
 #define MC13783_IRQ_SE1		21
 #define MC13783_IRQ_CKDET	22
 #define MC13783_IRQ_UDM		23
-#define MC13783_IRQ_1HZ		24
-#define MC13783_IRQ_TODA	25
+#define MC13783_IRQ_1HZ		MC13XXX_IRQ_1HZ
+#define MC13783_IRQ_TODA	MC13XXX_IRQ_TODA
 #define MC13783_IRQ_ONOFD1	27
 #define MC13783_IRQ_ONOFD2	28
 #define MC13783_IRQ_ONOFD3	29
-#define MC13783_IRQ_SYSRST	30
-#define MC13783_IRQ_RTCRST	31
-#define MC13783_IRQ_PC		32
-#define MC13783_IRQ_WARM	33
-#define MC13783_IRQ_MEMHLD	34
+#define MC13783_IRQ_SYSRST	MC13XXX_IRQ_SYSRST
+#define MC13783_IRQ_RTCRST	MC13XXX_IRQ_RTCRST
+#define MC13783_IRQ_PC		MC13XXX_IRQ_PC
+#define MC13783_IRQ_WARM	MC13XXX_IRQ_WARM
+#define MC13783_IRQ_MEMHLD	MC13XXX_IRQ_MEMHLD
 #define MC13783_IRQ_PWRRDY	35
-#define MC13783_IRQ_THWARNL	36
-#define MC13783_IRQ_THWARNH	37
-#define MC13783_IRQ_CLK		38
+#define MC13783_IRQ_THWARNL	MC13XXX_IRQ_THWARNL
+#define MC13783_IRQ_THWARNH	MC13XXX_IRQ_THWARNH
+#define MC13783_IRQ_CLK		MC13XXX_IRQ_CLK
 #define MC13783_IRQ_SEMAF	39
 #define MC13783_IRQ_MC2B	41
 #define MC13783_IRQ_HSDET	42
 #define MC13783_IRQ_HSL		43
 #define MC13783_IRQ_ALSPTH	44
 #define MC13783_IRQ_AHSSHORT	45
-#define MC13783_NUM_IRQ		46
+#define MC13783_NUM_IRQ		MC13XXX_NUM_IRQ
 
-#endif /* __LINUX_MFD_MC13783_H */
+#endif /* ifndef __LINUX_MFD_MC13783_H */
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
new file mode 100644
index 000000000000..a1d391b40e68
--- /dev/null
+++ b/include/linux/mfd/mc13xxx.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2009-2010 Pengutronix
+ * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+#ifndef __LINUX_MFD_MC13XXX_H
+#define __LINUX_MFD_MC13XXX_H
+
+#include <linux/interrupt.h>
+
+struct mc13xxx;
+
+void mc13xxx_lock(struct mc13xxx *mc13xxx);
+void mc13xxx_unlock(struct mc13xxx *mc13xxx);
+
+int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val);
+int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val);
+int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset,
+		u32 mask, u32 val);
+
+int mc13xxx_get_flags(struct mc13xxx *mc13xxx);
+
+int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
+		irq_handler_t handler, const char *name, void *dev);
+int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
+		irq_handler_t handler, const char *name, void *dev);
+int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev);
+
+int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq);
+int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq);
+int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
+		int *enabled, int *pending);
+int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq);
+
+int mc13xxx_get_flags(struct mc13xxx *mc13xxx);
+
+#define MC13XXX_IRQ_ADCDONE	0
+#define MC13XXX_IRQ_ADCBISDONE	1
+#define MC13XXX_IRQ_TS		2
+#define MC13XXX_IRQ_CHGDET	6
+#define MC13XXX_IRQ_CHGREV	8
+#define MC13XXX_IRQ_CHGSHORT	9
+#define MC13XXX_IRQ_CCCV	10
+#define MC13XXX_IRQ_CHGCURR	11
+#define MC13XXX_IRQ_BPON	12
+#define MC13XXX_IRQ_LOBATL	13
+#define MC13XXX_IRQ_LOBATH	14
+#define MC13XXX_IRQ_1HZ		24
+#define MC13XXX_IRQ_TODA	25
+#define MC13XXX_IRQ_SYSRST	30
+#define MC13XXX_IRQ_RTCRST	31
+#define MC13XXX_IRQ_PC		32
+#define MC13XXX_IRQ_WARM	33
+#define MC13XXX_IRQ_MEMHLD	34
+#define MC13XXX_IRQ_THWARNL	36
+#define MC13XXX_IRQ_THWARNH	37
+#define MC13XXX_IRQ_CLK		38
+
+#define MC13XXX_NUM_IRQ		46
+
+struct regulator_init_data;
+
+struct mc13xxx_regulator_init_data {
+	int id;
+	struct regulator_init_data *init_data;
+};
+
+struct mc13xxx_regulator_platform_data {
+	int num_regulators;
+	struct mc13xxx_regulator_init_data *regulators;
+};
+
+struct mc13xxx_led_platform_data {
+#define MC13783_LED_MD		0
+#define MC13783_LED_AD		1
+#define MC13783_LED_KP		2
+#define MC13783_LED_R1		3
+#define MC13783_LED_G1		4
+#define MC13783_LED_B1		5
+#define MC13783_LED_R2		6
+#define MC13783_LED_G2		7
+#define MC13783_LED_B2		8
+#define MC13783_LED_R3		9
+#define MC13783_LED_G3		10
+#define MC13783_LED_B3		11
+#define MC13783_LED_MAX MC13783_LED_B3
+	int id;
+	const char *name;
+	const char *default_trigger;
+
+/* Three or two bits current selection depending on the led */
+	char max_current;
+};
+
+struct mc13xxx_leds_platform_data {
+	int num_leds;
+	struct mc13xxx_led_platform_data *led;
+
+#define MC13783_LED_TRIODE_MD	(1 << 0)
+#define MC13783_LED_TRIODE_AD	(1 << 1)
+#define MC13783_LED_TRIODE_KP	(1 << 2)
+#define MC13783_LED_BOOST_EN	(1 << 3)
+#define MC13783_LED_TC1HALF	(1 << 4)
+#define MC13783_LED_SLEWLIMTC	(1 << 5)
+#define MC13783_LED_SLEWLIMBL	(1 << 6)
+#define MC13783_LED_TRIODE_TC1	(1 << 7)
+#define MC13783_LED_TRIODE_TC2	(1 << 8)
+#define MC13783_LED_TRIODE_TC3	(1 << 9)
+	int flags;
+
+#define MC13783_LED_AB_DISABLED		0
+#define MC13783_LED_AB_MD1		1
+#define MC13783_LED_AB_MD12		2
+#define MC13783_LED_AB_MD123		3
+#define MC13783_LED_AB_MD1234		4
+#define MC13783_LED_AB_MD1234_AD1	5
+#define MC13783_LED_AB_MD1234_AD12	6
+#define MC13783_LED_AB_MD1_AD		7
+	char abmode;
+
+#define MC13783_LED_ABREF_200MV	0
+#define MC13783_LED_ABREF_400MV	1
+#define MC13783_LED_ABREF_600MV	2
+#define MC13783_LED_ABREF_800MV	3
+	char abref;
+
+#define MC13783_LED_PERIOD_10MS		0
+#define MC13783_LED_PERIOD_100MS	1
+#define MC13783_LED_PERIOD_500MS	2
+#define MC13783_LED_PERIOD_2S		3
+	char bl_period;
+	char tc1_period;
+	char tc2_period;
+	char tc3_period;
+};
+
+struct mc13xxx_platform_data {
+#define MC13XXX_USE_TOUCHSCREEN (1 << 0)
+#define MC13XXX_USE_CODEC	(1 << 1)
+#define MC13XXX_USE_ADC		(1 << 2)
+#define MC13XXX_USE_RTC		(1 << 3)
+#define MC13XXX_USE_REGULATOR	(1 << 4)
+#define MC13XXX_USE_LED		(1 << 5)
+	unsigned int flags;
+
+	int num_regulators;
+	struct mc13xxx_regulator_init_data *regulators;
+	struct mc13xxx_leds_platform_data *leds;
+};
+
+#endif /* ifndef __LINUX_MFD_MC13XXX_H */
-- 
cgit v1.2.3


From 72f2e2c763edc41f8eead042b6ff933acb0378e2 Mon Sep 17 00:00:00 2001
From: kishore kadiyala <kishore.kadiyala@ti.com>
Date: Fri, 24 Sep 2010 17:13:20 +0000
Subject: mfd: Adding twl6030 mmc card detect support for MMC1

Adding card detect callback function and card detect configuration
function for MMC1 Controller on OMAP4.

Card detect configuration function does initial configuration of the
MMC Control & PullUp-PullDown registers of Phoenix.

For MMC1 Controller, card detect interrupt source is
twl6030 which is non-gpio. The card detect call back function provides
card present/absent status by reading MMC Control register present
on twl6030.

Since OMAP4 doesn't use any GPIO line as used in OMAP3 for card detect,
the suspend/resume initialization which was done in omap_hsmmc_gpio_init
previously is moved to the probe thus making it generic for both OMAP3 &
OMAP4.

Cc: Tony Lindgren <tony@atomide.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Madhusudhan Chikkature <madhu.cr@ti.com>
Cc: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Kishore Kadiyala <kishore.kadiyala@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-omap2/board-4430sdp.c |  7 +++-
 drivers/mfd/twl6030-irq.c           | 73 +++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/omap_hsmmc.c       |  4 +-
 include/linux/i2c/twl.h             | 31 ++++++++++++++++
 4 files changed, 112 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 69a4ae971e41..df5a425a49d1 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -269,9 +269,14 @@ static int omap4_twl6030_hsmmc_late_init(struct device *dev)
 	struct omap_mmc_platform_data *pdata = dev->platform_data;
 
 	/* Setting MMC1 Card detect Irq */
-	if (pdev->id == 0)
+	if (pdev->id == 0) {
+		ret = twl6030_mmc_card_detect_config();
+		if (ret)
+			pr_err("Failed configuring MMC1 card detect\n");
 		pdata->slots[0].card_detect_irq = TWL6030_IRQ_BASE +
 						MMCDETECT_INTR_OFFSET;
+		pdata->slots[0].card_detect = twl6030_mmc_card_detect;
+	}
 	return ret;
 }
 
diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index 10bf228ad626..2d3bb82dbf24 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c
@@ -36,6 +36,7 @@
 #include <linux/irq.h>
 #include <linux/kthread.h>
 #include <linux/i2c/twl.h>
+#include <linux/platform_device.h>
 
 /*
  * TWL6030 (unlike its predecessors, which had two level interrupt handling)
@@ -223,6 +224,78 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset)
 }
 EXPORT_SYMBOL(twl6030_interrupt_mask);
 
+int twl6030_mmc_card_detect_config(void)
+{
+	int ret;
+	u8 reg_val = 0;
+
+	/* Unmasking the Card detect Interrupt line for MMC1 from Phoenix */
+	twl6030_interrupt_unmask(TWL6030_MMCDETECT_INT_MASK,
+						REG_INT_MSK_LINE_B);
+	twl6030_interrupt_unmask(TWL6030_MMCDETECT_INT_MASK,
+						REG_INT_MSK_STS_B);
+	/*
+	 * Intially Configuring MMC_CTRL for receving interrupts &
+	 * Card status on TWL6030 for MMC1
+	 */
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, &reg_val, TWL6030_MMCCTRL);
+	if (ret < 0) {
+		pr_err("twl6030: Failed to read MMCCTRL, error %d\n", ret);
+		return ret;
+	}
+	reg_val &= ~VMMC_AUTO_OFF;
+	reg_val |= SW_FC;
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, reg_val, TWL6030_MMCCTRL);
+	if (ret < 0) {
+		pr_err("twl6030: Failed to write MMCCTRL, error %d\n", ret);
+		return ret;
+	}
+
+	/* Configuring PullUp-PullDown register */
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, &reg_val,
+						TWL6030_CFG_INPUT_PUPD3);
+	if (ret < 0) {
+		pr_err("twl6030: Failed to read CFG_INPUT_PUPD3, error %d\n",
+									ret);
+		return ret;
+	}
+	reg_val &= ~(MMC_PU | MMC_PD);
+	ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, reg_val,
+						TWL6030_CFG_INPUT_PUPD3);
+	if (ret < 0) {
+		pr_err("twl6030: Failed to write CFG_INPUT_PUPD3, error %d\n",
+									ret);
+		return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(twl6030_mmc_card_detect_config);
+
+int twl6030_mmc_card_detect(struct device *dev, int slot)
+{
+	int ret = -EIO;
+	u8 read_reg = 0;
+	struct platform_device *pdev = to_platform_device(dev);
+
+	if (pdev->id) {
+		/* TWL6030 provide's Card detect support for
+		 * only MMC1 controller.
+		 */
+		pr_err("Unkown MMC controller %d in %s\n", pdev->id, __func__);
+		return ret;
+	}
+	/*
+	 * BIT0 of MMC_CTRL on TWL6030 provides card status for MMC1
+	 * 0 - Card not present ,1 - Card present
+	 */
+	ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, &read_reg,
+						TWL6030_MMCCTRL);
+	if (ret >= 0)
+		ret = read_reg & STS_MMC;
+	return ret;
+}
+EXPORT_SYMBOL(twl6030_mmc_card_detect);
+
 int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 {
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e865032a52eb..82a1079bbdc7 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -483,8 +483,6 @@ static int omap_hsmmc_gpio_init(struct omap_mmc_platform_data *pdata)
 	int ret;
 
 	if (gpio_is_valid(pdata->slots[0].switch_pin)) {
-		pdata->suspend = omap_hsmmc_suspend_cdirq;
-		pdata->resume = omap_hsmmc_resume_cdirq;
 		if (pdata->slots[0].cover)
 			pdata->slots[0].get_cover_state =
 					omap_hsmmc_get_cover_state;
@@ -2218,6 +2216,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 				"Unable to grab MMC CD IRQ\n");
 			goto err_irq_cd;
 		}
+		pdata->suspend = omap_hsmmc_suspend_cdirq;
+		pdata->resume = omap_hsmmc_resume_cdirq;
 	}
 
 	omap_hsmmc_disable_irq(host);
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 53089516c17a..c760991b354a 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -141,6 +141,16 @@
 #define TWL6030_CHARGER_CTRL_INT_MASK 	0x10
 #define TWL6030_CHARGER_FAULT_INT_MASK 	0x60
 
+#define TWL6030_MMCCTRL		0xEE
+#define VMMC_AUTO_OFF			(0x1 << 3)
+#define SW_FC				(0x1 << 2)
+#define STS_MMC			0x1
+
+#define TWL6030_CFG_INPUT_PUPD3	0xF2
+#define MMC_PU				(0x1 << 3)
+#define MMC_PD				(0x1 << 2)
+
+
 
 #define TWL4030_CLASS_ID 		0x4030
 #define TWL6030_CLASS_ID 		0x6030
@@ -173,6 +183,27 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 int twl6030_interrupt_unmask(u8 bit_mask, u8 offset);
 int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 
+/* Card detect Configuration for MMC1 Controller on OMAP4 */
+#ifdef CONFIG_TWL4030_CORE
+int twl6030_mmc_card_detect_config(void);
+#else
+static inline int twl6030_mmc_card_detect_config(void)
+{
+	pr_debug("twl6030_mmc_card_detect_config not supported\n");
+	return 0;
+}
+#endif
+
+/* MMC1 Controller on OMAP4 uses Phoenix irq for Card detect */
+#ifdef CONFIG_TWL4030_CORE
+int twl6030_mmc_card_detect(struct device *dev, int slot);
+#else
+static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
+{
+	pr_debug("Call back twl6030_mmc_card_detect not supported\n");
+	return -EIO;
+}
+#endif
 /*----------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From 509bd4764c110b89bb3d09a5b6621fd31dc58044 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@samsung.com>
Date: Mon, 27 Sep 2010 14:32:24 +0200
Subject: mfd: Support for ICs compliant with max8998

Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8998.c               |  5 +++--
 include/linux/mfd/max8998-private.h | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index 310fd8054f35..a720f412cd15 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -133,6 +133,7 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
 	max8998->dev = &i2c->dev;
 	max8998->i2c = i2c;
 	max8998->irq = i2c->irq;
+	max8998->type = id->driver_data;
 	if (pdata) {
 		max8998->ono = pdata->ono;
 		max8998->irq_base = pdata->irq_base;
@@ -169,8 +170,8 @@ static int max8998_i2c_remove(struct i2c_client *i2c)
 }
 
 static const struct i2c_device_id max8998_i2c_id[] = {
-	{ "max8998", 0 },
-	{ "lp3974", 0 },
+	{ "max8998", TYPE_MAX8998 },
+	{ "lp3974", TYPE_LP3974},
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max8998_i2c_id);
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 170f665c7cdd..0ff42116d5dd 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -101,6 +101,13 @@ enum {
 	MAX8998_IRQ_NR,
 };
 
+/* MAX8998 various variants */
+enum {
+	TYPE_MAX8998 = 0, /* Default */
+	TYPE_LP3974,	/* National version of MAX8998 */
+	TYPE_LP3979,	/* Added AVS */
+};
+
 #define MAX8998_IRQ_DCINF_MASK		(1 << 2)
 #define MAX8998_IRQ_DCINR_MASK		(1 << 3)
 #define MAX8998_IRQ_JIGF_MASK		(1 << 4)
@@ -123,6 +130,8 @@ enum {
 #define MAX8998_IRQ_LOBAT1_MASK		(1 << 0)
 #define MAX8998_IRQ_LOBAT2_MASK		(1 << 1)
 
+#define MAX8998_ENRAMP                  (1 << 4)
+
 /**
  * struct max8998_dev - max8998 master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
@@ -135,6 +144,7 @@ enum {
  * @ono: power onoff IRQ number for max8998
  * @irq_masks_cur: currently active value
  * @irq_masks_cache: cached hardware value
+ * @type: indicate which max8998 "variant" is used
  */
 struct max8998_dev {
 	struct device *dev;
@@ -148,6 +158,7 @@ struct max8998_dev {
 	int ono;
 	u8 irq_masks_cur[MAX8998_NUM_IRQ_REGS];
 	u8 irq_masks_cache[MAX8998_NUM_IRQ_REGS];
+	int type;
 };
 
 int max8998_irq_init(struct max8998_dev *max8998);
-- 
cgit v1.2.3


From 889cd5a60f880e0a56b7b769d0b74eb222e6896c Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@samsung.com>
Date: Mon, 27 Sep 2010 14:32:25 +0200
Subject: regulator: max8998 BUCK1/2 internal voltages and indexes defined

BUCK1/2 internal voltages and indexes defined in the struct max8998_data
max_get_voltage_register now uses index values to chose proper register
More generic BUCK1/2 registers names provided

Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/max8998.c         | 10 ++++++++--
 include/linux/mfd/max8998-private.h | 12 ++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 1e5bd504fbad..7ae0639c1394 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -39,6 +39,11 @@ struct max8998_data {
 	struct max8998_dev	*iodev;
 	int			num_regulators;
 	struct regulator_dev	**rdev;
+	u8                      buck1_vol[4]; /* voltages for selection */
+	u8                      buck2_vol[2];
+	unsigned int		buck1_idx; /* index to last changed voltage */
+					   /* value in a set */
+	unsigned int		buck2_idx;
 };
 
 struct voltage_map_desc {
@@ -218,6 +223,7 @@ static int max8998_get_voltage_register(struct regulator_dev *rdev,
 				int *_reg, int *_shift, int *_mask)
 {
 	int ldo = max8998_get_ldo(rdev);
+	struct max8998_data *max8998 = rdev_get_drvdata(rdev);
 	int reg, shift = 0, mask = 0xff;
 
 	switch (ldo) {
@@ -254,10 +260,10 @@ static int max8998_get_voltage_register(struct regulator_dev *rdev,
 		reg = MAX8998_REG_LDO12 + (ldo - MAX8998_LDO12);
 		break;
 	case MAX8998_BUCK1:
-		reg = MAX8998_REG_BUCK1_DVSARM1;
+		reg = MAX8998_REG_BUCK1_VOLTAGE1 + max8998->buck1_idx;
 		break;
 	case MAX8998_BUCK2:
-		reg = MAX8998_REG_BUCK2_DVSINT1;
+		reg = MAX8998_REG_BUCK2_VOLTAGE1 + max8998->buck2_idx;
 		break;
 	case MAX8998_BUCK3:
 		reg = MAX8998_REG_BUCK3;
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 0ff42116d5dd..7363dea6bbcd 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -48,12 +48,12 @@ enum {
 	MAX8998_REG_ONOFF2,
 	MAX8998_REG_ONOFF3,
 	MAX8998_REG_ONOFF4,
-	MAX8998_REG_BUCK1_DVSARM1,
-	MAX8998_REG_BUCK1_DVSARM2,
-	MAX8998_REG_BUCK1_DVSARM3,
-	MAX8998_REG_BUCK1_DVSARM4,
-	MAX8998_REG_BUCK2_DVSINT1,
-	MAX8998_REG_BUCK2_DVSINT2,
+	MAX8998_REG_BUCK1_VOLTAGE1,
+	MAX8998_REG_BUCK1_VOLTAGE2,
+	MAX8998_REG_BUCK1_VOLTAGE3,
+	MAX8998_REG_BUCK1_VOLTAGE4,
+	MAX8998_REG_BUCK2_VOLTAGE1,
+	MAX8998_REG_BUCK2_VOLTAGE2,
 	MAX8998_REG_BUCK3,
 	MAX8998_REG_BUCK4,
 	MAX8998_REG_LDO2_LDO3,
-- 
cgit v1.2.3


From 58aa6334fbf5cf420a47cfd2718a0b299f40a379 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@samsung.com>
Date: Mon, 27 Sep 2010 14:32:26 +0200
Subject: mfd: Voltages and GPIOs platform_data definitions for max8998

Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8998.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index d47ed4c190fe..f8c9f884aff2 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -70,12 +70,24 @@ struct max8998_regulator_data {
  * @num_regulators: number of regultors used
  * @irq_base: base IRQ number for max8998, required for IRQs
  * @ono: power onoff IRQ number for max8998
+ * @buck1_max_voltage1: BUCK1 maximum alowed voltage register 1
+ * @buck1_max_voltage2: BUCK1 maximum alowed voltage register 2
+ * @buck2_max_voltage: BUCK2 maximum alowed voltage
+ * @buck1_set1: BUCK1 gpio pin 1 to set output voltage
+ * @buck1_set2: BUCK1 gpio pin 2 to set output voltage
+ * @buck2_set3: BUCK2 gpio pin to set output voltage
  */
 struct max8998_platform_data {
 	struct max8998_regulator_data	*regulators;
 	int				num_regulators;
 	int				irq_base;
 	int				ono;
+	int                             buck1_max_voltage1;
+	int                             buck1_max_voltage2;
+	int                             buck2_max_voltage;
+	int				buck1_set1;
+	int				buck1_set2;
+	int				buck2_set3;
 };
 
 #endif /*  __LINUX_MFD_MAX8998_H */
-- 
cgit v1.2.3


From e5b486841d572c5ac83c798f82f4f67cbbac5320 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 19 Oct 2010 23:57:56 +0200
Subject: mfd: Factor out WM831x I2C I/O from the core driver

In preparation for the addition of SPI support for the WM831x move the I2C
specific code into a separate file with a separate Kconfig option so the
I2C support can be excluded from the build.

Also update the 1133-EV1 PMIC module support for SMDK6410 to use the new
symbol.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-s3c64xx/Kconfig   |   1 +
 drivers/mfd/Kconfig             |  15 +++--
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/wm831x-core.c       | 138 ++------------------------------------
 drivers/mfd/wm831x-i2c.c        | 143 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |  12 ++++
 6 files changed, 171 insertions(+), 139 deletions(-)
 create mode 100644 drivers/mfd/wm831x-i2c.c

(limited to 'include')

diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 1e4d78af7d84..546db5cb8929 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig
@@ -185,6 +185,7 @@ config SMDK6410_WM1192_EV1
 	select REGULATOR_WM831X
 	select S3C24XX_GPIO_EXTRA64
 	select MFD_WM831X
+	select MFD_WM831X_I2C
 	help
 	  The Wolfson Microelectronics 1192-EV1 is a WM831x based PMIC
 	  daughtercard for the Samsung SMDK6410 reference platform.
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 6c6b9f02d177..2fb1e892331c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -315,14 +315,19 @@ config MFD_WM8400
 	  the functionality of the device.
 
 config MFD_WM831X
-	bool "Support Wolfson Microelectronics WM831x/2x PMICs"
+	bool
+	depends on GENERIC_HARDIRQS
+
+config MFD_WM831X_I2C
+	bool "Support Wolfson Microelectronics WM831x/2x PMICs with I2C"
 	select MFD_CORE
+	select MFD_WM831X
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
-	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs.
-	  This driver provides common support for accessing the device,
-	  additional drivers must be enabled in order to use the
-	  functionality of the device.
+	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
+	  when controlled using I2C.  This driver provides common support
+	  for accessing the device, additional drivers must be enabled in
+	  order to use the functionality of the device.
 
 config MFD_WM8350
 	bool
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 70b26999dc81..c9ef41b13bf3 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
 obj-$(CONFIG_MFD_WM831X)	+= wm831x.o
+obj-$(CONFIG_MFD_WM831X_I2C)	+= wm831x-i2c.o
 wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 wm8350-objs			+= wm8350-irq.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index ad36579bc815..7d2563fc15c6 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -14,7 +14,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/i2c.h>
 #include <linux/bcd.h>
 #include <linux/delay.h>
 #include <linux/mfd/core.h>
@@ -90,15 +89,6 @@ int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = {
 };
 EXPORT_SYMBOL_GPL(wm831x_isinkv_values);
 
-enum wm831x_parent {
-	WM8310 = 0x8310,
-	WM8311 = 0x8311,
-	WM8312 = 0x8312,
-	WM8320 = 0x8320,
-	WM8321 = 0x8321,
-	WM8325 = 0x8325,
-};
-
 static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg)
 {
 	if (!wm831x->locked)
@@ -1447,7 +1437,7 @@ static struct mfd_cell backlight_devs[] = {
 /*
  * Instantiate the generic non-control parts of the device.
  */
-static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
+int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 {
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	int rev;
@@ -1673,7 +1663,7 @@ err:
 	return ret;
 }
 
-static void wm831x_device_exit(struct wm831x *wm831x)
+void wm831x_device_exit(struct wm831x *wm831x)
 {
 	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
@@ -1683,7 +1673,7 @@ static void wm831x_device_exit(struct wm831x *wm831x)
 	kfree(wm831x);
 }
 
-static int wm831x_device_suspend(struct wm831x *wm831x)
+int wm831x_device_suspend(struct wm831x *wm831x)
 {
 	int reg, mask;
 
@@ -1719,126 +1709,6 @@ static int wm831x_device_suspend(struct wm831x *wm831x)
 	return 0;
 }
 
-static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg,
-				  int bytes, void *dest)
-{
-	struct i2c_client *i2c = wm831x->control_data;
-	int ret;
-	u16 r = cpu_to_be16(reg);
-
-	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
-	if (ret < 0)
-		return ret;
-	if (ret != 2)
-		return -EIO;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	if (ret != bytes)
-		return -EIO;
-	return 0;
-}
-
-/* Currently we allocate the write buffer on the stack; this is OK for
- * small writes - if we need to do large writes this will need to be
- * revised.
- */
-static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg,
-				   int bytes, void *src)
-{
-	struct i2c_client *i2c = wm831x->control_data;
-	unsigned char msg[bytes + 2];
-	int ret;
-
-	reg = cpu_to_be16(reg);
-	memcpy(&msg[0], &reg, 2);
-	memcpy(&msg[2], src, bytes);
-
-	ret = i2c_master_send(i2c, msg, bytes + 2);
-	if (ret < 0)
-		return ret;
-	if (ret < bytes + 2)
-		return -EIO;
-
-	return 0;
-}
-
-static int wm831x_i2c_probe(struct i2c_client *i2c,
-			    const struct i2c_device_id *id)
-{
-	struct wm831x *wm831x;
-
-	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
-	if (wm831x == NULL)
-		return -ENOMEM;
-
-	i2c_set_clientdata(i2c, wm831x);
-	wm831x->dev = &i2c->dev;
-	wm831x->control_data = i2c;
-	wm831x->read_dev = wm831x_i2c_read_device;
-	wm831x->write_dev = wm831x_i2c_write_device;
-
-	return wm831x_device_init(wm831x, id->driver_data, i2c->irq);
-}
-
-static int wm831x_i2c_remove(struct i2c_client *i2c)
-{
-	struct wm831x *wm831x = i2c_get_clientdata(i2c);
-
-	wm831x_device_exit(wm831x);
-
-	return 0;
-}
-
-static int wm831x_i2c_suspend(struct i2c_client *i2c, pm_message_t mesg)
-{
-	struct wm831x *wm831x = i2c_get_clientdata(i2c);
-
-	return wm831x_device_suspend(wm831x);
-}
-
-static const struct i2c_device_id wm831x_i2c_id[] = {
-	{ "wm8310", WM8310 },
-	{ "wm8311", WM8311 },
-	{ "wm8312", WM8312 },
-	{ "wm8320", WM8320 },
-	{ "wm8321", WM8321 },
-	{ "wm8325", WM8325 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id);
-
-
-static struct i2c_driver wm831x_i2c_driver = {
-	.driver = {
-		   .name = "wm831x",
-		   .owner = THIS_MODULE,
-	},
-	.probe = wm831x_i2c_probe,
-	.remove = wm831x_i2c_remove,
-	.suspend = wm831x_i2c_suspend,
-	.id_table = wm831x_i2c_id,
-};
-
-static int __init wm831x_i2c_init(void)
-{
-	int ret;
-
-	ret = i2c_add_driver(&wm831x_i2c_driver);
-	if (ret != 0)
-		pr_err("Failed to register wm831x I2C driver: %d\n", ret);
-
-	return ret;
-}
-subsys_initcall(wm831x_i2c_init);
-
-static void __exit wm831x_i2c_exit(void)
-{
-	i2c_del_driver(&wm831x_i2c_driver);
-}
-module_exit(wm831x_i2c_exit);
-
-MODULE_DESCRIPTION("I2C support for the WM831X AudioPlus PMIC");
+MODULE_DESCRIPTION("Core support for the WM831X AudioPlus PMIC");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mark Brown");
diff --git a/drivers/mfd/wm831x-i2c.c b/drivers/mfd/wm831x-i2c.c
new file mode 100644
index 000000000000..156b19859e81
--- /dev/null
+++ b/drivers/mfd/wm831x-i2c.c
@@ -0,0 +1,143 @@
+/*
+ * wm831x-i2c.c  --  I2C access for Wolfson WM831x PMICs
+ *
+ * Copyright 2009,2010 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+#include <linux/slab.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	int ret;
+	u16 r = cpu_to_be16(reg);
+
+	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
+	if (ret < 0)
+		return ret;
+	if (ret != 2)
+		return -EIO;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes)
+		return -EIO;
+	return 0;
+}
+
+/* Currently we allocate the write buffer on the stack; this is OK for
+ * small writes - if we need to do large writes this will need to be
+ * revised.
+ */
+static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	unsigned char msg[bytes + 2];
+	int ret;
+
+	reg = cpu_to_be16(reg);
+	memcpy(&msg[0], &reg, 2);
+	memcpy(&msg[2], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 2);
+	if (ret < 0)
+		return ret;
+	if (ret < bytes + 2)
+		return -EIO;
+
+	return 0;
+}
+
+static int wm831x_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm831x *wm831x;
+
+	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
+	if (wm831x == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, wm831x);
+	wm831x->dev = &i2c->dev;
+	wm831x->control_data = i2c;
+	wm831x->read_dev = wm831x_i2c_read_device;
+	wm831x->write_dev = wm831x_i2c_write_device;
+
+	return wm831x_device_init(wm831x, id->driver_data, i2c->irq);
+}
+
+static int wm831x_i2c_remove(struct i2c_client *i2c)
+{
+	struct wm831x *wm831x = i2c_get_clientdata(i2c);
+
+	wm831x_device_exit(wm831x);
+
+	return 0;
+}
+
+static int wm831x_i2c_suspend(struct i2c_client *i2c, pm_message_t mesg)
+{
+	struct wm831x *wm831x = i2c_get_clientdata(i2c);
+
+	return wm831x_device_suspend(wm831x);
+}
+
+static const struct i2c_device_id wm831x_i2c_id[] = {
+	{ "wm8310", WM8310 },
+	{ "wm8311", WM8311 },
+	{ "wm8312", WM8312 },
+	{ "wm8320", WM8320 },
+	{ "wm8321", WM8321 },
+	{ "wm8325", WM8325 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id);
+
+
+static struct i2c_driver wm831x_i2c_driver = {
+	.driver = {
+		   .name = "wm831x",
+		   .owner = THIS_MODULE,
+	},
+	.probe = wm831x_i2c_probe,
+	.remove = wm831x_i2c_remove,
+	.suspend = wm831x_i2c_suspend,
+	.id_table = wm831x_i2c_id,
+};
+
+static int __init wm831x_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm831x_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register wm831x I2C driver: %d\n", ret);
+
+	return ret;
+}
+subsys_initcall(wm831x_i2c_init);
+
+static void __exit wm831x_i2c_exit(void)
+{
+	i2c_del_driver(&wm831x_i2c_driver);
+}
+module_exit(wm831x_i2c_exit);
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index eb5bd4e0e03c..a1239c48b41a 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -238,6 +238,15 @@ struct regulator_dev;
 
 #define WM831X_NUM_IRQ_REGS 5
 
+enum wm831x_parent {
+	WM8310 = 0x8310,
+	WM8311 = 0x8311,
+	WM8312 = 0x8312,
+	WM8320 = 0x8320,
+	WM8321 = 0x8321,
+	WM8325 = 0x8325,
+};
+
 struct wm831x {
 	struct mutex io_lock;
 
@@ -285,6 +294,9 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
 int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
 		     int count, u16 *buf);
 
+int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq);
+void wm831x_device_exit(struct wm831x *wm831x);
+int wm831x_device_suspend(struct wm831x *wm831x);
 int wm831x_irq_init(struct wm831x *wm831x, int irq);
 void wm831x_irq_exit(struct wm831x *wm831x);
 
-- 
cgit v1.2.3


From c96e41e92b4aaf11e1f9775ecf0d1c8cbff829ed Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 00:17:56 +0400
Subject: beginning of transtion: ->mount()

eventual replacement for ->get_sb() - does *not* get vfsmount,
return ERR_PTR(error) or root of subtree to be mounted.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 17 ++++++++++++++---
 include/linux/fs.h |  2 ++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index b9c9869165db..00a2c9662b55 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -918,6 +918,7 @@ struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
 	struct vfsmount *mnt;
+	struct dentry *root;
 	char *secdata = NULL;
 	int error;
 
@@ -942,9 +943,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 			goto out_free_secdata;
 	}
 
-	error = type->get_sb(type, flags, name, data, mnt);
-	if (error < 0)
-		goto out_free_secdata;
+	if (type->mount) {
+		root = type->mount(type, flags, name, data);
+		if (IS_ERR(root)) {
+			error = PTR_ERR(root);
+			goto out_free_secdata;
+		}
+		mnt->mnt_root = root;
+		mnt->mnt_sb = root->d_sb;
+	} else {
+		error = type->get_sb(type, flags, name, data, mnt);
+		if (error < 0)
+			goto out_free_secdata;
+	}
 	BUG_ON(!mnt->mnt_sb);
 	WARN_ON(!mnt->mnt_sb->s_bdi);
 	mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c73b50e81ff..c6b474311690 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1772,6 +1772,8 @@ struct file_system_type {
 	int fs_flags;
 	int (*get_sb) (struct file_system_type *, int,
 		       const char *, void *, struct vfsmount *);
+	struct dentry *(*mount) (struct file_system_type *, int,
+		       const char *, void *);
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
-- 
cgit v1.2.3


From 152a08366671080f27b32e0c411ad620c5f88b57 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 00:46:55 +0400
Subject: new helper: mount_bdev()

... and switch of the obvious get_sb_bdev() users to ->mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/super.c              |  9 ++++-----
 fs/affs/super.c              |  9 ++++-----
 fs/befs/linuxvfs.c           | 11 +++++------
 fs/bfs/inode.c               |  8 ++++----
 fs/cramfs/inode.c            |  9 ++++-----
 fs/efs/super.c               |  8 ++++----
 fs/ext2/super.c              |  8 ++++----
 fs/ext3/super.c              |  8 ++++----
 fs/ext4/super.c              | 16 ++++++++--------
 fs/fat/namei_msdos.c         |  9 ++++-----
 fs/fat/namei_vfat.c          |  9 ++++-----
 fs/freevxfs/vxfs_super.c     |  9 ++++-----
 fs/fuse/inode.c              |  9 ++++-----
 fs/hfs/super.c               |  9 ++++-----
 fs/hfsplus/super.c           | 10 ++++------
 fs/hpfs/super.c              |  9 ++++-----
 fs/isofs/inode.c             |  9 ++++-----
 fs/jfs/super.c               |  9 ++++-----
 fs/minix/inode.c             |  9 ++++-----
 fs/ntfs/super.c              |  9 ++++-----
 fs/ocfs2/super.c             | 11 ++++-------
 fs/omfs/inode.c              |  9 ++++-----
 fs/qnx4/inode.c              |  9 ++++-----
 fs/reiserfs/super.c          |  9 ++++-----
 fs/squashfs/super.c          | 10 ++++------
 fs/super.c                   | 28 +++++++++++++++++++++-------
 fs/sysv/super.c              | 17 ++++++++---------
 fs/udf/super.c               |  9 ++++-----
 fs/ufs/super.c               |  8 ++++----
 fs/xfs/linux-2.6/xfs_super.c | 12 +++++-------
 include/linux/fs.h           |  3 +++
 31 files changed, 150 insertions(+), 161 deletions(-)

(limited to 'include')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
 	return -EINVAL;
 }
 
-static int adfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *adfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
 }
 
 static struct file_system_type adfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "adfs",
-	.get_sb		= adfs_get_sb,
+	.mount		= adfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int affs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *affs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
 }
 
 static struct file_system_type affs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "affs",
-	.get_sb		= affs_get_sb,
+	.mount		= affs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int
-befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
-	    void *data, struct vfsmount *mnt)
+static struct dentry *
+befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
+	    void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
 }
 
 static struct file_system_type befs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "befs",
-	.get_sb		= befs_get_sb,
+	.mount		= befs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,	
 };
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
 	return ret;
 }
 
-static int bfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
 }
 
 static struct file_system_type bfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "bfs",
-	.get_sb		= bfs_get_sb,
+	.mount		= bfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
 	.statfs		= cramfs_statfs,
 };
 
-static int cramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
 }
 
 static struct file_system_type cramfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "cramfs",
-	.get_sb		= cramfs_get_sb,
+	.mount		= cramfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
 static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
 
-static int efs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *efs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
 }
 
 static struct file_system_type efs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "efs",
-	.get_sb		= efs_get_sb,
+	.mount		= efs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0901320671da..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	return 0;
 }
 
-static int ext2_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext2_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
 }
 
 #ifdef CONFIG_QUOTA
@@ -1473,7 +1473,7 @@ out:
 static struct file_system_type ext2_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext2",
-	.get_sb		= ext2_get_sb,
+	.mount		= ext2_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index db87413d3479..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -3020,16 +3020,16 @@ out:
 
 #endif
 
-static int ext3_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext3_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
 }
 
 static struct file_system_type ext3_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext3",
-	.get_sb		= ext3_get_sb,
+	.mount		= ext3_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0348ce066592..40131b777af6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -73,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
 static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 
@@ -82,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
 static struct file_system_type ext3_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext3",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -4667,17 +4667,17 @@ out:
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
 }
 
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext2_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext2",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -4722,7 +4722,7 @@ static inline void unregister_as_ext3(void) { }
 static struct file_system_type ext4_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext4",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int msdos_get_sb(struct file_system_type *fs_type,
+static struct dentry *msdos_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
 }
 
 static struct file_system_type msdos_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "msdos",
-	.get_sb		= msdos_get_sb,
+	.mount		= msdos_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int vfat_get_sb(struct file_system_type *fs_type,
+static struct dentry *vfat_mount(struct file_system_type *fs_type,
 		       int flags, const char *dev_name,
-		       void *data, struct vfsmount *mnt)
+		       void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
 }
 
 static struct file_system_type vfat_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "vfat",
-	.get_sb		= vfat_get_sb,
+	.mount		= vfat_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
 /*
  * The usual module blurb.
  */
-static int vxfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *vxfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
 }
 
 static struct file_system_type vxfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "vxfs",
-	.get_sb		= vxfs_get_sb,
+	.mount		= vxfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..edf6a1843533 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1070,12 +1070,11 @@ static struct file_system_type fuse_fs_type = {
 };
 
 #ifdef CONFIG_BLOCK
-static int fuse_get_sb_blk(struct file_system_type *fs_type,
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
-			   void *raw_data, struct vfsmount *mnt)
+			   void *raw_data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
 }
 
 static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
-	.get_sb		= fuse_get_sb_blk,
+	.mount		= fuse_mount_blk,
 	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586f2334..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -441,17 +441,16 @@ bail:
 	return res;
 }
 
-static int hfs_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *hfs_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
 }
 
 static struct file_system_type hfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hfs",
-	.get_sb		= hfs_get_sb,
+	.mount		= hfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
 
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
-static int hfsplus_get_sb(struct file_system_type *fs_type,
-			  int flags, const char *dev_name, void *data,
-			  struct vfsmount *mnt)
+static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
 }
 
 static struct file_system_type hfsplus_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hfsplus",
-	.get_sb		= hfsplus_get_sb,
+	.mount		= hfsplus_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..bb69389972eb 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -686,17 +686,16 @@ bail0:
 	return -EINVAL;
 }
 
-static int hpfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hpfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
 }
 
 static struct file_system_type hpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hpfs",
-	.get_sb		= hpfs_get_sb,
+	.mount		= hpfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 79cf7f616bbe..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1507,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
 	return inode;
 }
 
-static int isofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *isofs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
-				mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
 }
 
 static struct file_system_type iso9660_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "iso9660",
-	.get_sb		= isofs_get_sb,
+	.mount		= isofs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
 	return 0;
 }
 
-static int jfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
 }
 
 static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
 static struct file_system_type jfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "jfs",
-	.get_sb		= jfs_get_sb,
+	.mount		= jfs_do_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
 		V2_minix_truncate(inode);
 }
 
-static int minix_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *minix_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
 }
 
 static struct file_system_type minix_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "minix",
-	.get_sb		= minix_get_sb,
+	.mount		= minix_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe5730bfc..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
 /* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
-static int ntfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
 }
 
 static struct file_system_type ntfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ntfs",
-	.get_sb		= ntfs_get_sb,
+	.mount		= ntfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
 	return status;
 }
 
-static int ocfs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
 			int flags,
 			const char *dev_name,
-			void *data,
-			struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
 }
 
 static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
 static struct file_system_type ocfs2_fs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "ocfs2",
-	.get_sb         = ocfs2_get_sb, /* is this called when we mount
-					* the fs? */
+	.mount          = ocfs2_mount,
 	.kill_sb        = ocfs2_kill_sb,
 
 	.fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
 	return ret;
 }
 
-static int omfs_get_sb(struct file_system_type *fs_type,
-			int flags, const char *dev_name,
-			void *data, struct vfsmount *m)
+static struct dentry *omfs_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+	return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
 }
 
 static struct file_system_type omfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "omfs",
-	.get_sb = omfs_get_sb,
+	.mount = omfs_mount,
 	.kill_sb = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(qnx4_inode_cachep);
 }
 
-static int qnx4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *qnx4_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
 }
 
 static struct file_system_type qnx4_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "qnx4",
-	.get_sb		= qnx4_get_sb,
+	.mount		= qnx4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
 
 #endif
 
-static int get_super_block(struct file_system_type *fs_type,
+static struct dentry *get_super_block(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
-			   void *data, struct vfsmount *mnt)
+			   void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
 }
 
 static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
 struct file_system_type reiserfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "reiserfs",
-	.get_sb = get_super_block,
+	.mount = get_super_block,
 	.kill_sb = reiserfs_kill_sb,
 	.fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
 }
 
 
-static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
-				const char *dev_name, void *data,
-				struct vfsmount *mnt)
+static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
-				mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
 }
 
 
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
 static struct file_system_type squashfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "squashfs",
-	.get_sb = squashfs_get_sb,
+	.mount = squashfs_mount,
 	.kill_sb = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV
 };
diff --git a/fs/super.c b/fs/super.c
index 00a2c9662b55..40989e9a2606 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -762,10 +762,9 @@ static int test_bdev_super(struct super_block *s, void *data)
 	return (void *)s->s_bdev == data;
 }
 
-int get_sb_bdev(struct file_system_type *fs_type,
+struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct block_device *bdev;
 	struct super_block *s;
@@ -777,7 +776,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
 
 	bdev = open_bdev_exclusive(dev_name, mode, fs_type);
 	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
+		return ERR_CAST(bdev);
 
 	/*
 	 * once the super is inserted into the list by sget, s_umount
@@ -829,15 +828,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
 		bdev->bd_super = s;
 	}
 
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 
 error_s:
 	error = PTR_ERR(s);
 error_bdev:
 	close_bdev_exclusive(bdev, mode);
 error:
-	return error;
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(mount_bdev);
+
+int get_sb_bdev(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
+
+	root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
+	return 0;
 }
 
 EXPORT_SYMBOL(get_sb_bdev);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
 
 /* Every kernel module contains stuff like this. */
 
-static int sysv_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysv_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
 }
 
-static int v7_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *v7_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
 }
 
 static struct file_system_type sysv_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "sysv",
-	.get_sb		= sysv_get_sb,
+	.mount		= sysv_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
 static struct file_system_type v7_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "v7",
-	.get_sb		= v7_get_sb,
+	.mount		= v7_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 }
 
 /* UDF filesystem type */
-static int udf_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *udf_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
 }
 
 static struct file_system_type udf_fstype = {
 	.owner		= THIS_MODULE,
 	.name		= "udf",
-	.get_sb		= udf_get_sb,
+	.mount		= udf_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
 	.show_options   = ufs_show_options,
 };
 
-static int ufs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ufs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
 }
 
 static struct file_system_type ufs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ufs",
-	.get_sb		= ufs_get_sb,
+	.mount		= ufs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf808782c065..9f3a78fe6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1609,16 +1609,14 @@ xfs_fs_fill_super(
 	goto out_free_sb;
 }
 
-STATIC int
-xfs_fs_get_sb(
+STATIC struct dentry *
+xfs_fs_mount(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
-	void			*data,
-	struct vfsmount		*mnt)
+	void			*data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
 }
 
 static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1637,7 @@ static const struct super_operations xfs_super_operations = {
 static struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
-	.get_sb			= xfs_fs_get_sb,
+	.mount			= xfs_fs_mount,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c6b474311690..2fab5a24ca51 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1792,6 +1792,9 @@ struct file_system_type {
 extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
 	struct vfsmount *mnt);
+extern struct dentry *mount_bdev(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data,
+	int (*fill_super)(struct super_block *, void *, int));
 extern int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
-- 
cgit v1.2.3


From 848b83a59b772b8f102bc5e3f1187c2fa5676959 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 00:56:46 +0400
Subject: convert get_sb_mtd() users to ->mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/mtdsuper.c    | 54 +++++++++++++++++++----------------------------
 fs/jffs2/super.c          |  9 ++++----
 fs/romfs/super.c          | 17 +++++++--------
 include/linux/mtd/super.h |  5 ++---
 4 files changed, 36 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 38e2ab07e7a3..16b02a1fc100 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -54,11 +54,10 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd)
 /*
  * get a superblock on an MTD-backed filesystem
  */
-static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags,
+static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags,
 			  const char *dev_name, void *data,
 			  struct mtd_info *mtd,
-			  int (*fill_super)(struct super_block *, void *, int),
-			  struct vfsmount *mnt)
+			  int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *sb;
 	int ret;
@@ -79,57 +78,49 @@ static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags,
 	ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 	if (ret < 0) {
 		deactivate_locked_super(sb);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
 	/* go */
 	sb->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, sb);
-
-	return 0;
+	return dget(sb->s_root);
 
 	/* new mountpoint for an already mounted superblock */
 already_mounted:
 	DEBUG(1, "MTDSB: Device %d (\"%s\") is already mounted\n",
 	      mtd->index, mtd->name);
-	simple_set_mnt(mnt, sb);
-	ret = 0;
-	goto out_put;
+	put_mtd_device(mtd);
+	return dget(sb->s_root);
 
 out_error:
-	ret = PTR_ERR(sb);
-out_put:
 	put_mtd_device(mtd);
-	return ret;
+	return ERR_CAST(sb);
 }
 
 /*
  * get a superblock on an MTD-backed filesystem by MTD device number
  */
-static int get_sb_mtd_nr(struct file_system_type *fs_type, int flags,
+static struct dentry *mount_mtd_nr(struct file_system_type *fs_type, int flags,
 			 const char *dev_name, void *data, int mtdnr,
-			 int (*fill_super)(struct super_block *, void *, int),
-			 struct vfsmount *mnt)
+			 int (*fill_super)(struct super_block *, void *, int))
 {
 	struct mtd_info *mtd;
 
 	mtd = get_mtd_device(NULL, mtdnr);
 	if (IS_ERR(mtd)) {
 		DEBUG(0, "MTDSB: Device #%u doesn't appear to exist\n", mtdnr);
-		return PTR_ERR(mtd);
+		return ERR_CAST(mtd);
 	}
 
-	return get_sb_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super,
-			      mnt);
+	return mount_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super);
 }
 
 /*
  * set up an MTD-based superblock
  */
-int get_sb_mtd(struct file_system_type *fs_type, int flags,
+struct dentry *mount_mtd(struct file_system_type *fs_type, int flags,
 	       const char *dev_name, void *data,
-	       int (*fill_super)(struct super_block *, void *, int),
-	       struct vfsmount *mnt)
+	       int (*fill_super)(struct super_block *, void *, int))
 {
 #ifdef CONFIG_BLOCK
 	struct block_device *bdev;
@@ -138,7 +129,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	int mtdnr;
 
 	if (!dev_name)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	DEBUG(2, "MTDSB: dev_name \"%s\"\n", dev_name);
 
@@ -156,10 +147,10 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 
 			mtd = get_mtd_device_nm(dev_name + 4);
 			if (!IS_ERR(mtd))
-				return get_sb_mtd_aux(
+				return mount_mtd_aux(
 					fs_type, flags,
 					dev_name, data, mtd,
-					fill_super, mnt);
+					fill_super);
 
 			printk(KERN_NOTICE "MTD:"
 			       " MTD device with name \"%s\" not found.\n",
@@ -174,9 +165,9 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 				/* It was a valid number */
 				DEBUG(1, "MTDSB: mtd%%d, mtdnr %d\n",
 				      mtdnr);
-				return get_sb_mtd_nr(fs_type, flags,
+				return mount_mtd_nr(fs_type, flags,
 						     dev_name, data,
-						     mtdnr, fill_super, mnt);
+						     mtdnr, fill_super);
 			}
 		}
 	}
@@ -189,7 +180,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	if (IS_ERR(bdev)) {
 		ret = PTR_ERR(bdev);
 		DEBUG(1, "MTDSB: lookup_bdev() returned %d\n", ret);
-		return ret;
+		return ERR_PTR(ret);
 	}
 	DEBUG(1, "MTDSB: lookup_bdev() returned 0\n");
 
@@ -202,8 +193,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
 	if (major != MTD_BLOCK_MAJOR)
 		goto not_an_MTD_device;
 
-	return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
-			     mnt);
+	return mount_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super);
 
 not_an_MTD_device:
 #endif /* CONFIG_BLOCK */
@@ -212,10 +202,10 @@ not_an_MTD_device:
 		printk(KERN_NOTICE
 		       "MTD: Attempt to mount non-MTD device \"%s\"\n",
 		       dev_name);
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
-EXPORT_SYMBOL_GPL(get_sb_mtd);
+EXPORT_SYMBOL_GPL(mount_mtd);
 
 /*
  * destroy an MTD-based superblock
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
 	return ret;
 }
 
-static int jffs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *jffs2_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
-			  mnt);
+	return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
 }
 
 static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
 static struct file_system_type jffs2_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"jffs2",
-	.get_sb =	jffs2_get_sb,
+	.mount =	jffs2_mount,
 	.kill_sb =	jffs2_kill_sb,
 };
 
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
 /*
  * get a superblock for mounting
  */
-static int romfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *romfs_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	int ret = -EINVAL;
+	struct dentry *ret = ERR_PTR(-EINVAL);
 
 #ifdef CONFIG_ROMFS_ON_MTD
-	ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super,
-			 mnt);
+	ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
-	if (ret == -EINVAL)
-		ret = get_sb_bdev(fs_type, flags, dev_name, data,
-				  romfs_fill_super, mnt);
+	if (ret == ERR_PTR(-EINVAL))
+		ret = mount_bdev(fs_type, flags, dev_name, data,
+				  romfs_fill_super);
 #endif
 	return ret;
 }
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
 static struct file_system_type romfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "romfs",
-	.get_sb		= romfs_get_sb,
+	.mount		= romfs_mount,
 	.kill_sb	= romfs_kill_sb,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/include/linux/mtd/super.h b/include/linux/mtd/super.h
index 4016dd6fe336..f456230f9330 100644
--- a/include/linux/mtd/super.h
+++ b/include/linux/mtd/super.h
@@ -18,10 +18,9 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 
-extern int get_sb_mtd(struct file_system_type *fs_type, int flags,
+extern struct dentry *mount_mtd(struct file_system_type *fs_type, int flags,
 		      const char *dev_name, void *data,
-		      int (*fill_super)(struct super_block *, void *, int),
-		      struct vfsmount *mnt);
+		      int (*fill_super)(struct super_block *, void *, int));
 extern void kill_mtd_super(struct super_block *sb);
 
 
-- 
cgit v1.2.3


From fc14f2fef682df677d64a145256dbd263df2aa7b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 01:48:30 +0400
Subject: convert get_sb_single() users

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 10 +++++-----
 arch/s390/hypfs/inode.c                   |  8 ++++----
 drivers/base/devtmpfs.c                   | 18 ++++++++---------
 drivers/infiniband/hw/ipath/ipath_fs.c    | 14 +++++++-------
 drivers/infiniband/hw/qib/qib_fs.c        | 14 +++++++-------
 drivers/isdn/capi/capifs.c                |  8 ++++----
 drivers/misc/ibmasm/ibmasmfs.c            |  9 ++++-----
 drivers/oprofile/oprofilefs.c             |  8 ++++----
 drivers/usb/core/inode.c                  |  8 ++++----
 drivers/usb/gadget/f_fs.c                 | 14 +++++++-------
 drivers/usb/gadget/inode.c                | 10 +++++-----
 drivers/xen/xenfs/super.c                 |  8 ++++----
 fs/binfmt_misc.c                          |  8 ++++----
 fs/configfs/mount.c                       |  8 ++++----
 fs/debugfs/inode.c                        |  8 ++++----
 fs/devpts/inode.c                         | 32 +++++++++++++++----------------
 fs/fuse/control.c                         | 10 ++++------
 fs/nfsd/nfsctl.c                          |  8 ++++----
 fs/openpromfs/inode.c                     |  8 ++++----
 fs/super.c                                | 25 ++++++++++++++++++------
 include/linux/fs.h                        |  3 +++
 net/sunrpc/rpc_pipe.c                     | 18 ++++++++---------
 security/inode.c                          |  8 ++++----
 security/selinux/selinuxfs.c              |  9 ++++-----
 security/smack/smackfs.c                  | 12 +++++-------
 25 files changed, 147 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5dec408d6703..3532b92de983 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -798,17 +798,17 @@ spufs_fill_super(struct super_block *sb, void *data, int silent)
 	return spufs_create_root(sb, data);
 }
 
-static int
-spufs_get_sb(struct file_system_type *fstype, int flags,
-		const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
+		const char *name, void *data)
 {
-	return get_sb_single(fstype, flags, data, spufs_fill_super, mnt);
+	return mount_single(fstype, flags, data, spufs_fill_super);
 }
 
 static struct file_system_type spufs_type = {
 	.owner = THIS_MODULE,
 	.name = "spufs",
-	.get_sb = spufs_get_sb,
+	.mount = spufs_mount,
 	.kill_sb = kill_litter_super,
 };
 
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 74d98670be27..47cc446dab8f 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -316,10 +316,10 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int hypfs_get_super(struct file_system_type *fst, int flags,
-			const char *devname, void *data, struct vfsmount *mnt)
+static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
+			const char *devname, void *data)
 {
-	return get_sb_single(fst, flags, data, hypfs_fill_super, mnt);
+	return mount_single(fst, flags, data, hypfs_fill_super);
 }
 
 static void hypfs_kill_super(struct super_block *sb)
@@ -455,7 +455,7 @@ static const struct file_operations hypfs_file_ops = {
 static struct file_system_type hypfs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "s390_hypfs",
-	.get_sb		= hypfs_get_super,
+	.mount		= hypfs_mount,
 	.kill_sb	= hypfs_kill_super
 };
 
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index af0600143d1c..82bbb5967aa9 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -29,33 +29,33 @@
 static struct vfsmount *dev_mnt;
 
 #if defined CONFIG_DEVTMPFS_MOUNT
-static int dev_mount = 1;
+static int mount_dev = 1;
 #else
-static int dev_mount;
+static int mount_dev;
 #endif
 
 static DEFINE_MUTEX(dirlock);
 
 static int __init mount_param(char *str)
 {
-	dev_mount = simple_strtoul(str, NULL, 0);
+	mount_dev = simple_strtoul(str, NULL, 0);
 	return 1;
 }
 __setup("devtmpfs.mount=", mount_param);
 
-static int dev_get_sb(struct file_system_type *fs_type, int flags,
-		      const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *dev_mount(struct file_system_type *fs_type, int flags,
+		      const char *dev_name, void *data)
 {
 #ifdef CONFIG_TMPFS
-	return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt);
+	return mount_single(fs_type, flags, data, shmem_fill_super);
 #else
-	return get_sb_single(fs_type, flags, data, ramfs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, ramfs_fill_super);
 #endif
 }
 
 static struct file_system_type dev_fs_type = {
 	.name = "devtmpfs",
-	.get_sb = dev_get_sb,
+	.mount = dev_mount,
 	.kill_sb = kill_litter_super,
 };
 
@@ -351,7 +351,7 @@ int devtmpfs_mount(const char *mntdir)
 {
 	int err;
 
-	if (!dev_mount)
+	if (!mount_dev)
 		return 0;
 
 	if (!dev_mnt)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 12d5bf76302c..8c8afc716b98 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -362,13 +362,13 @@ bail:
 	return ret;
 }
 
-static int ipathfs_get_sb(struct file_system_type *fs_type, int flags,
-			const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *data)
 {
-	int ret = get_sb_single(fs_type, flags, data,
-				    ipathfs_fill_super, mnt);
-	if (ret >= 0)
-		ipath_super = mnt->mnt_sb;
+	struct dentry *ret;
+	ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
+	if (!IS_ERR(ret))
+		ipath_super = ret->d_sb;
 	return ret;
 }
 
@@ -411,7 +411,7 @@ bail:
 static struct file_system_type ipathfs_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"ipathfs",
-	.get_sb =	ipathfs_get_sb,
+	.mount =	ipathfs_mount,
 	.kill_sb =	ipathfs_kill_super,
 };
 
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 7e433d75c775..f99bddc01716 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -555,13 +555,13 @@ bail:
 	return ret;
 }
 
-static int qibfs_get_sb(struct file_system_type *fs_type, int flags,
-			const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags,
+			const char *dev_name, void *data)
 {
-	int ret = get_sb_single(fs_type, flags, data,
-				qibfs_fill_super, mnt);
-	if (ret >= 0)
-		qib_super = mnt->mnt_sb;
+	struct dentry *ret;
+	ret = mount_single(fs_type, flags, data, qibfs_fill_super);
+	if (!IS_ERR(ret))
+		qib_super = ret->d_sb;
 	return ret;
 }
 
@@ -603,7 +603,7 @@ int qibfs_remove(struct qib_devdata *dd)
 static struct file_system_type qibfs_fs_type = {
 	.owner =        THIS_MODULE,
 	.name =         "ipathfs",
-	.get_sb =       qibfs_get_sb,
+	.mount =        qibfs_mount,
 	.kill_sb =      qibfs_kill_super,
 };
 
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index 2b83850997c3..b4faed7fe0d3 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -125,16 +125,16 @@ fail:
 	return -ENOMEM;
 }
 
-static int capifs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *capifs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, capifs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, capifs_fill_super);
 }
 
 static struct file_system_type capifs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "capifs",
-	.get_sb		= capifs_get_sb,
+	.mount		= capifs_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 0a53500636c9..d2d5d23416dd 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -91,11 +91,10 @@ static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root);
 static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
 
 
-static int ibmasmfs_get_super(struct file_system_type *fst,
-			int flags, const char *name, void *data,
-			struct vfsmount *mnt)
+static struct dentry *ibmasmfs_mount(struct file_system_type *fst,
+			int flags, const char *name, void *data)
 {
-	return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt);
+	return mount_single(fst, flags, data, ibmasmfs_fill_super);
 }
 
 static const struct super_operations ibmasmfs_s_ops = {
@@ -108,7 +107,7 @@ static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
 static struct file_system_type ibmasmfs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "ibmasmfs",
-	.get_sb         = ibmasmfs_get_super,
+	.mount          = ibmasmfs_mount,
 	.kill_sb        = kill_litter_super,
 };
 
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 449de59bf35b..e9ff6f7770be 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -259,17 +259,17 @@ static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent)
 }
 
 
-static int oprofilefs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *oprofilefs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, oprofilefs_fill_super);
 }
 
 
 static struct file_system_type oprofilefs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "oprofilefs",
-	.get_sb		= oprofilefs_get_sb,
+	.mount		= oprofilefs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index e2f63c0ea09d..9819a4cc3b26 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -574,16 +574,16 @@ static void fs_remove_file (struct dentry *dentry)
 
 /* --------------------------------------------------------------------- */
 
-static int usb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *usb_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, usbfs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, usbfs_fill_super);
 }
 
 static struct file_system_type usb_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"usbfs",
-	.get_sb =	usb_get_sb,
+	.mount =	usb_mount,
 	.kill_sb =	kill_litter_super,
 };
 
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index f276e9594f00..4a830df4fc31 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -1176,9 +1176,9 @@ invalid:
 
 /* "mount -t functionfs dev_name /dev/function" ends up here */
 
-static int
-ffs_fs_get_sb(struct file_system_type *t, int flags,
-	      const char *dev_name, void *opts, struct vfsmount *mnt)
+static struct dentry *
+ffs_fs_mount(struct file_system_type *t, int flags,
+	      const char *dev_name, void *opts)
 {
 	struct ffs_sb_fill_data data = {
 		.perms = {
@@ -1194,14 +1194,14 @@ ffs_fs_get_sb(struct file_system_type *t, int flags,
 
 	ret = functionfs_check_dev_callback(dev_name);
 	if (unlikely(ret < 0))
-		return ret;
+		return ERR_PTR(ret);
 
 	ret = ffs_fs_parse_opts(&data, opts);
 	if (unlikely(ret < 0))
-		return ret;
+		return ERR_PTR(ret);
 
 	data.dev_name = dev_name;
-	return get_sb_single(t, flags, &data, ffs_sb_fill, mnt);
+	return mount_single(t, flags, &data, ffs_sb_fill);
 }
 
 static void
@@ -1220,7 +1220,7 @@ ffs_fs_kill_sb(struct super_block *sb)
 static struct file_system_type ffs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "functionfs",
-	.get_sb		= ffs_fs_get_sb,
+	.mount		= ffs_fs_mount,
 	.kill_sb	= ffs_fs_kill_sb,
 };
 
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index ba145e7fbe03..3ed73f49cf18 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -2097,11 +2097,11 @@ enomem0:
 }
 
 /* "mount -t gadgetfs path /dev/gadget" ends up here */
-static int
-gadgetfs_get_sb (struct file_system_type *t, int flags,
-		const char *path, void *opts, struct vfsmount *mnt)
+static struct dentry *
+gadgetfs_mount (struct file_system_type *t, int flags,
+		const char *path, void *opts)
 {
-	return get_sb_single (t, flags, opts, gadgetfs_fill_super, mnt);
+	return mount_single (t, flags, opts, gadgetfs_fill_super);
 }
 
 static void
@@ -2119,7 +2119,7 @@ gadgetfs_kill_sb (struct super_block *sb)
 static struct file_system_type gadgetfs_type = {
 	.owner		= THIS_MODULE,
 	.name		= shortname,
-	.get_sb		= gadgetfs_get_sb,
+	.mount		= gadgetfs_mount,
 	.kill_sb	= gadgetfs_kill_sb,
 };
 
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index d6662b789b6b..f6339d11d59c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -121,17 +121,17 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 	return rc;
 }
 
-static int xenfs_get_sb(struct file_system_type *fs_type,
+static int xenfs_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, xenfs_fill_super);
 }
 
 static struct file_system_type xenfs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"xenfs",
-	.get_sb =	xenfs_get_sb,
+	.mount =	xenfs_mount,
 	.kill_sb =	kill_litter_super,
 };
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0eee0c..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
 	return err;
 }
 
-static int bm_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bm_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
+	return mount_single(fs_type, flags, data, bm_fill_super);
 }
 
 static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
 static struct file_system_type bm_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "binfmt_misc",
-	.get_sb		= bm_get_sb,
+	.mount		= bm_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int configfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, configfs_fill_super);
 }
 
 static struct file_system_type configfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "configfs",
-	.get_sb		= configfs_get_sb,
+	.mount		= configfs_do_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed8380e98a..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static int debug_get_sb(struct file_system_type *fs_type,
+static struct dentry *debug_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
+	return mount_single(fs_type, flags, data, debug_fill_super);
 }
 
 static struct file_system_type debug_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"debugfs",
-	.get_sb =	debug_get_sb,
+	.mount =	debug_mount,
 	.kill_sb =	kill_litter_super,
 };
 
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 }
 
 /*
- * devpts_get_sb()
+ * devpts_mount()
  *
  *     If the '-o newinstance' mount option was specified, mount a new
  *     (private) instance of devpts.  PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
  *     semantics in devpts while preserving backward compatibility of the
  *     current 'single-namespace' semantics. i.e all mounts of devpts
  *     without the 'newinstance' mount option should bind to the initial
- *     kernel mount, like get_sb_single().
+ *     kernel mount, like mount_single().
  *
  *     Mounts with 'newinstance' option create a new, private namespace.
  *
  *     NOTE:
  *
- *     For single-mount semantics, devpts cannot use get_sb_single(),
- *     because get_sb_single()/sget() find and use the super-block from
+ *     For single-mount semantics, devpts cannot use mount_single(),
+ *     because mount_single()/sget() find and use the super-block from
  *     the most recent mount of devpts. But that recent mount may be a
- *     'newinstance' mount and get_sb_single() would pick the newinstance
+ *     'newinstance' mount and mount_single() would pick the newinstance
  *     super-block instead of the initial super-block.
  */
-static int devpts_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
 	int error;
 	struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 
 	error = parse_mount_options(data, PARSE_MOUNT, &opts);
 	if (error)
-		return error;
+		return ERR_PTR(error);
 
 	if (opts.newinstance)
 		s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 		s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	if (!s->s_root) {
 		s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 	if (error)
 		goto out_undo_sget;
 
-	simple_set_mnt(mnt, s);
-
-	return 0;
+	return dget(s->s_root);
 
 out_undo_sget:
 	deactivate_locked_super(s);
-	return error;
+	return ERR_PTR(error);
 }
 
 #else
@@ -404,10 +402,10 @@ out_undo_sget:
  * This supports only the legacy single-instance semantics (no
  * multiple-instance semantics)
  */
-static int devpts_get_sb(struct file_system_type *fs_type, int flags,
-		const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+	return mount_single(fs_type, flags, data, devpts_fill_super);
 }
 #endif
 
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
 
 static struct file_system_type devpts_fs_type = {
 	.name		= "devpts",
-	.get_sb		= devpts_get_sb,
+	.mount		= devpts_mount,
 	.kill_sb	= devpts_kill_sb,
 };
 
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba07661e5c..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
-			const char *dev_name, void *raw_data,
-			struct vfsmount *mnt)
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *raw_data)
 {
-	return get_sb_single(fs_type, flags, raw_data,
-				fuse_ctl_fill_super, mnt);
+	return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
 }
 
 static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
 static struct file_system_type fuse_ctl_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fusectl",
-	.get_sb		= fuse_ctl_get_sb,
+	.mount		= fuse_ctl_mount,
 	.kill_sb	= fuse_ctl_kill_sb,
 };
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8ba..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
 }
 
-static int nfsd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
+	return mount_single(fs_type, flags, data, nfsd_fill_super);
 }
 
 static struct file_system_type nfsd_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfsd",
-	.get_sb		= nfsd_get_sb,
+	.mount		= nfsd_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..ddb1f41376e5 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
 	return ret;
 }
 
-static int openprom_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *openprom_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
+	return mount_single(fs_type, flags, data, openprom_fill_super)
 }
 
 static struct file_system_type openprom_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "openpromfs",
-	.get_sb		= openprom_get_sb,
+	.mount		= openprom_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/fs/super.c b/fs/super.c
index 40989e9a2606..6f021a171ac6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -900,29 +900,42 @@ static int compare_single(struct super_block *s, void *p)
 	return 1;
 }
 
-int get_sb_single(struct file_system_type *fs_type,
+struct dentry *mount_single(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *s;
 	int error;
 
 	s = sget(fs_type, compare_single, set_anon_super, NULL);
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 	if (!s->s_root) {
 		s->s_flags = flags;
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
-			return error;
+			return ERR_PTR(error);
 		}
 		s->s_flags |= MS_ACTIVE;
 	} else {
 		do_remount_sb(s, flags, data, 0);
 	}
-	simple_set_mnt(mnt, s);
+	return dget(s->s_root);
+}
+EXPORT_SYMBOL(mount_single);
+
+int get_sb_single(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
+	root = mount_single(fs_type, flags, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
 	return 0;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2fab5a24ca51..0aa2f1202afa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1799,6 +1799,9 @@ extern int get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
 	struct vfsmount *mnt);
+extern struct dentry *mount_single(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int));
 extern int get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7df92d237cb8..10a17a37ec4e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -28,7 +28,7 @@
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/cache.h>
 
-static struct vfsmount *rpc_mount __read_mostly;
+static struct vfsmount *rpc_mnt __read_mostly;
 static int rpc_mount_count;
 
 static struct file_system_type rpc_pipe_fs_type;
@@ -417,16 +417,16 @@ struct vfsmount *rpc_get_mount(void)
 {
 	int err;
 
-	err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count);
+	err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
 	if (err != 0)
 		return ERR_PTR(err);
-	return rpc_mount;
+	return rpc_mnt;
 }
 EXPORT_SYMBOL_GPL(rpc_get_mount);
 
 void rpc_put_mount(void)
 {
-	simple_release_fs(&rpc_mount, &rpc_mount_count);
+	simple_release_fs(&rpc_mnt, &rpc_mount_count);
 }
 EXPORT_SYMBOL_GPL(rpc_put_mount);
 
@@ -1018,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int
-rpc_get_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+rpc_mount(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt);
+	return mount_single(fs_type, flags, data, rpc_fill_super);
 }
 
 static struct file_system_type rpc_pipe_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "rpc_pipefs",
-	.get_sb		= rpc_get_sb,
+	.mount		= rpc_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/security/inode.c b/security/inode.c
index cb8f47c66a58..c4df2fbebe6b 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -131,17 +131,17 @@ static int fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, SECURITYFS_MAGIC, files);
 }
 
-static int get_sb(struct file_system_type *fs_type,
+static struct dentry *get_sb(struct file_system_type *fs_type,
 		  int flags, const char *dev_name,
-		  void *data, struct vfsmount *mnt)
+		  void *data)
 {
-	return get_sb_single(fs_type, flags, data, fill_super, mnt);
+	return mount_single(fs_type, flags, data, fill_super);
 }
 
 static struct file_system_type fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"securityfs",
-	.get_sb =	get_sb,
+	.mount =	get_sb,
 	.kill_sb =	kill_litter_super,
 };
 
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 55a755c1a1bd..073fd5b0a53a 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1909,16 +1909,15 @@ err:
 	goto out;
 }
 
-static int sel_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *sel_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, sel_fill_super, mnt);
+	return mount_single(fs_type, flags, data, sel_fill_super);
 }
 
 static struct file_system_type sel_fs_type = {
 	.name		= "selinuxfs",
-	.get_sb		= sel_get_sb,
+	.mount		= sel_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 7512502d0162..dc1fd6239f24 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -1310,27 +1310,25 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 }
 
 /**
- * smk_get_sb - get the smackfs superblock
+ * smk_mount - get the smackfs superblock
  * @fs_type: passed along without comment
  * @flags: passed along without comment
  * @dev_name: passed along without comment
  * @data: passed along without comment
- * @mnt: passed along without comment
  *
  * Just passes everything along.
  *
  * Returns what the lower level code does.
  */
-static int smk_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *smk_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, smk_fill_super, mnt);
+	return mount_single(fs_type, flags, data, smk_fill_super);
 }
 
 static struct file_system_type smk_fs_type = {
 	.name		= "smackfs",
-	.get_sb		= smk_get_sb,
+	.mount		= smk_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-- 
cgit v1.2.3


From 3c26ff6e499ee7e6f9f2bc7da5f2f30d80862ecf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 11:46:36 +0400
Subject: convert get_sb_nodev() users

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/autofs/init.c    |  8 ++++----
 drivers/staging/pohmelfs/inode.c |  9 ++++-----
 drivers/staging/smbfs/inode.c    |  8 ++++----
 fs/autofs4/init.c                |  8 ++++----
 fs/coda/inode.c                  |  8 ++++----
 fs/exofs/super.c                 | 10 +++++-----
 fs/fuse/inode.c                  |  8 ++++----
 fs/hostfs/hostfs_kern.c          |  8 ++++----
 fs/hppfs/hppfs.c                 |  8 ++++----
 fs/hugetlbfs/inode.c             |  8 ++++----
 fs/ncpfs/inode.c                 |  8 ++++----
 fs/ocfs2/dlmfs/dlmfs.c           |  8 ++++----
 fs/ramfs/inode.c                 | 17 ++++++++---------
 fs/super.c                       | 27 ++++++++++++++++++++-------
 include/linux/fs.h               |  3 +++
 include/linux/ramfs.h            |  4 ++--
 mm/shmem.c                       | 10 +++++-----
 17 files changed, 87 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/autofs/init.c b/drivers/staging/autofs/init.c
index 765c72f42976..5e4b372ea663 100644
--- a/drivers/staging/autofs/init.c
+++ b/drivers/staging/autofs/init.c
@@ -14,16 +14,16 @@
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static int autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, autofs_fill_super);
 }
 
 static struct file_system_type autofs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
-	.get_sb		= autofs_get_sb,
+	.mount		= autofs_mount,
 	.kill_sb	= autofs_kill_sb,
 };
 
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index c62d30017c07..61685ccceda8 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1937,11 +1937,10 @@ err_out_exit:
 /*
  * Some VFS magic here...
  */
-static int pohmelfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *pohmelfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, pohmelfs_fill_super,
-				mnt);
+	return mount_nodev(fs_type, flags, data, pohmelfs_fill_super);
 }
 
 /*
@@ -1958,7 +1957,7 @@ static void pohmelfs_kill_super(struct super_block *sb)
 static struct file_system_type pohmel_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "pohmel",
-	.get_sb		= pohmelfs_get_sb,
+	.mount		= pohmelfs_mount,
 	.kill_sb 	= pohmelfs_kill_super,
 };
 
diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c
index 552951aa7498..f9c493591ce7 100644
--- a/drivers/staging/smbfs/inode.c
+++ b/drivers/staging/smbfs/inode.c
@@ -793,16 +793,16 @@ out:
 	return error;
 }
 
-static int smb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *smb_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, smb_fill_super);
 }
 
 static struct file_system_type smb_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "smbfs",
-	.get_sb		= smb_get_sb,
+	.mount		= smb_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static int autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, autofs4_fill_super);
 }
 
 static struct file_system_type autofs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
-	.get_sb		= autofs_get_sb,
+	.mount		= autofs_mount,
 	.kill_sb	= autofs4_kill_sb,
 };
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca348..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 /* init_coda: used by filesystems.c to register coda */
 
-static int coda_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *coda_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, coda_fill_super);
 }
 
 struct file_system_type coda_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "coda",
-	.get_sb		= coda_get_sb,
+	.mount		= coda_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
 /*
  * Set up the superblock (calls exofs_fill_super eventually)
  */
-static int exofs_get_sb(struct file_system_type *type,
+static struct dentry *exofs_mount(struct file_system_type *type,
 			  int flags, const char *dev_name,
-			  void *data, struct vfsmount *mnt)
+			  void *data)
 {
 	struct exofs_mountopt opts;
 	int ret;
 
 	ret = parse_options(data, &opts);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	opts.dev_name = dev_name;
-	return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+	return mount_nodev(type, flags, &opts, exofs_fill_super);
 }
 
 /*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
 static struct file_system_type exofs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "exofs",
-	.get_sb         = exofs_get_sb,
+	.mount          = exofs_mount,
 	.kill_sb        = generic_shutdown_super,
 };
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index edf6a1843533..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	return err;
 }
 
-static int fuse_get_sb(struct file_system_type *fs_type,
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
 		       int flags, const char *dev_name,
-		       void *raw_data, struct vfsmount *mnt)
+		       void *raw_data)
 {
-	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
+	return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
 }
 
 static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,7 +1065,7 @@ static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE,
-	.get_sb		= fuse_get_sb,
+	.mount		= fuse_mount,
 	.kill_sb	= fuse_kill_sb_anon,
 };
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c93917cc7..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -962,11 +962,11 @@ out:
 	return err;
 }
 
-static int hostfs_read_sb(struct file_system_type *type,
+static struct dentry *hostfs_read_sb(struct file_system_type *type,
 			  int flags, const char *dev_name,
-			  void *data, struct vfsmount *mnt)
+			  void *data)
 {
-	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
+	return mount_nodev(type, flags, data, hostfs_fill_sb_common);
 }
 
 static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
 static struct file_system_type hostfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hostfs",
-	.get_sb 	= hostfs_read_sb,
+	.mount	 	= hostfs_read_sb,
 	.kill_sb	= hostfs_kill_sb,
 	.fs_flags 	= 0,
 };
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
 	return(err);
 }
 
-static int hppfs_read_super(struct file_system_type *type,
+static struct dentry *hppfs_read_super(struct file_system_type *type,
 			    int flags, const char *dev_name,
-			    void *data, struct vfsmount *mnt)
+			    void *data)
 {
-	return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
+	return mount_nodev(type, flags, data, hppfs_fill_super);
 }
 
 static struct file_system_type hppfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hppfs",
-	.get_sb 	= hppfs_read_super,
+	.mount 		= hppfs_read_super,
 	.kill_sb	= kill_anon_super,
 	.fs_flags 	= 0,
 };
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f781c7..d6cfac1f0a40 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
 	}
 }
 
-static int hugetlbfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
 }
 
 static struct file_system_type hugetlbfs_fs_type = {
 	.name		= "hugetlbfs",
-	.get_sb		= hugetlbfs_get_sb,
+	.mount		= hugetlbfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
 	return result;
 }
 
-static int ncp_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ncp_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, ncp_fill_super);
 }
 
 static struct file_system_type ncp_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ncpfs",
-	.get_sb		= ncp_get_sb,
+	.mount		= ncp_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f1bd73..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
 	.setattr	= dlmfs_file_setattr,
 };
 
-static int dlmfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
 }
 
 static struct file_system_type dlmfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ocfs2_dlmfs",
-	.get_sb		= dlmfs_get_sb,
+	.mount		= dlmfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1ad2c1..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
 	return err;
 }
 
-int ramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+struct dentry *ramfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, ramfs_fill_super);
 }
 
-static int rootfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
-			    mnt);
+	return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
 }
 
 static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
 
 static struct file_system_type ramfs_fs_type = {
 	.name		= "ramfs",
-	.get_sb		= ramfs_get_sb,
+	.mount		= ramfs_mount,
 	.kill_sb	= ramfs_kill_sb,
 };
 static struct file_system_type rootfs_fs_type = {
 	.name		= "rootfs",
-	.get_sb		= rootfs_get_sb,
+	.mount		= rootfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/super.c b/fs/super.c
index 6f021a171ac6..f6a7bf1fff2b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -870,29 +870,42 @@ void kill_block_super(struct super_block *sb)
 EXPORT_SYMBOL(kill_block_super);
 #endif
 
-int get_sb_nodev(struct file_system_type *fs_type,
+struct dentry *mount_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	int error;
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	s->s_flags = flags;
 
 	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		deactivate_locked_super(s);
-		return error;
+		return ERR_PTR(error);
 	}
 	s->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 }
+EXPORT_SYMBOL(mount_nodev);
+
+int get_sb_nodev(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
 
+	root = mount_nodev(fs_type, flags, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
+	return 0;
+}
 EXPORT_SYMBOL(get_sb_nodev);
 
 static int compare_single(struct super_block *s, void *p)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0aa2f1202afa..4c3a29ddcacb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1806,6 +1806,9 @@ extern int get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
 	struct vfsmount *mnt);
+extern struct dentry *mount_nodev(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int));
 extern int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index e7320b5e82fb..3a8f0c9b2933 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -3,8 +3,8 @@
 
 struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
 	 int mode, dev_t dev);
-extern int ramfs_get_sb(struct file_system_type *fs_type,
-	 int flags, const char *dev_name, void *data, struct vfsmount *mnt);
+extern struct dentry *ramfs_mount(struct file_system_type *fs_type,
+	 int flags, const char *dev_name, void *data);
 
 #ifndef CONFIG_MMU
 extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
diff --git a/mm/shmem.c b/mm/shmem.c
index f6d350e8adc5..47fdeeb9d636 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2538,16 +2538,16 @@ static const struct vm_operations_struct shmem_vm_ops = {
 };
 
 
-static int shmem_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *shmem_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
 static struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
-	.get_sb		= shmem_get_sb,
+	.mount		= shmem_mount,
 	.kill_sb	= kill_litter_super,
 };
 
@@ -2643,7 +2643,7 @@ out:
 
 static struct file_system_type tmpfs_fs_type = {
 	.name		= "tmpfs",
-	.get_sb		= ramfs_get_sb,
+	.mount		= ramfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-- 
cgit v1.2.3


From 51139adac92f7160ad3ca1cab2de1b4b8d19dc96 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 23:47:46 +0400
Subject: convert get_sb_pseudo() users

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c |  9 ++++-----
 drivers/mtd/mtdchar.c      | 10 ++++------
 fs/anon_inodes.c           | 10 ++++------
 fs/block_dev.c             |  8 ++++----
 fs/libfs.c                 | 14 ++++++--------
 fs/pipe.c                  |  9 ++++-----
 include/linux/fs.h         |  5 ++---
 net/socket.c               | 10 ++++------
 8 files changed, 32 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 6b1852f7f972..39e534f5a3b0 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -618,16 +618,15 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 }
 
 
-static int
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
-	     struct vfsmount *mnt)
+static struct dentry *
+pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
+	return mount_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
 }
 
 static struct file_system_type pfm_fs_type = {
 	.name     = "pfmfs",
-	.get_sb   = pfmfs_get_sb,
+	.mount    = pfmfs_mount,
 	.kill_sb  = kill_anon_super,
 };
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 5ef45487b65f..a34a0fe14884 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1030,17 +1030,15 @@ static const struct file_operations mtd_fops = {
 #endif
 };
 
-static int mtd_inodefs_get_sb(struct file_system_type *fs_type, int flags,
-                               const char *dev_name, void *data,
-                               struct vfsmount *mnt)
+static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
 {
-        return get_sb_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC,
-                             mnt);
+        return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC);
 }
 
 static struct file_system_type mtd_inodefs_type = {
        .name = "mtd_inodefs",
-       .get_sb = mtd_inodefs_get_sb,
+       .mount = mtd_inodefs_mount,
        .kill_sb = kill_anon_super,
 };
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527ca43f..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
 static const struct file_operations anon_inode_fops;
 
-static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
-			       const char *dev_name, void *data,
-			       struct vfsmount *mnt)
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC,
-			     mnt);
+	return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
 }
 
 /*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
 
 static struct file_system_type anon_inode_fs_type = {
 	.name		= "anon_inodefs",
-	.get_sb		= anon_inodefs_get_sb,
+	.mount		= anon_inodefs_mount,
 	.kill_sb	= kill_anon_super,
 };
 static const struct dentry_operations anon_inodefs_dentry_operations = {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b628a6ce..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = {
 	.evict_inode = bdev_evict_inode,
 };
 
-static int bd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bd_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
+	return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
 }
 
 static struct file_system_type bd_type = {
 	.name		= "bdev",
-	.get_sb		= bd_get_sb,
+	.mount		= bd_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a5132ca27..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  * will never be mountable)
  */
-int get_sb_pseudo(struct file_system_type *fs_type, char *name,
-	const struct super_operations *ops, unsigned long magic,
-	struct vfsmount *mnt)
+struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
+	const struct super_operations *ops, unsigned long magic)
 {
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 	struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	struct qstr d_name = {.name = name, .len = strlen(name)};
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 
 Enomem:
 	deactivate_locked_super(s);
-	return -ENOMEM;
+	return ERR_PTR(-ENOMEM);
 }
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(mount_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566ce68e..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1247,16 +1247,15 @@ out:
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
-static int pipefs_get_sb(struct file_system_type *fs_type,
-			 int flags, const char *dev_name, void *data,
-			 struct vfsmount *mnt)
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+	return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
 }
 
 static struct file_system_type pipe_fs_type = {
 	.name		= "pipefs",
-	.get_sb		= pipefs_get_sb,
+	.mount		= pipefs_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4c3a29ddcacb..43e6cfb5cbb3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1824,9 +1824,8 @@ struct super_block *sget(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
 			void *data);
-extern int get_sb_pseudo(struct file_system_type *, char *,
-	const struct super_operations *ops, unsigned long,
-	struct vfsmount *mnt);
+extern struct dentry *mount_pseudo(struct file_system_type *, char *,
+	const struct super_operations *ops, unsigned long);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 
 static inline void sb_mark_dirty(struct super_block *sb)
diff --git a/net/socket.c b/net/socket.c
index ee3cd280c76e..5247ae10f374 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = {
 	.statfs		= simple_statfs,
 };
 
-static int sockfs_get_sb(struct file_system_type *fs_type,
-			 int flags, const char *dev_name, void *data,
-			 struct vfsmount *mnt)
+static struct dentry *sockfs_mount(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
-			     mnt);
+	return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
 }
 
 static struct vfsmount *sock_mnt __read_mostly;
 
 static struct file_system_type sock_fs_type = {
 	.name =		"sockfs",
-	.get_sb =	sockfs_get_sb,
+	.mount =	sockfs_mount,
 	.kill_sb =	kill_anon_super,
 };
 
-- 
cgit v1.2.3


From ceefda6931806972ecf550bd8231dce4a4178953 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 26 Jul 2010 13:16:50 +0400
Subject: switch get_sb_ns() users

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 14 ++++++--------
 include/linux/fs.h |  5 ++---
 ipc/mqueue.c       |  8 ++++----
 3 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index f6a7bf1fff2b..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
 	return set_anon_super(sb, NULL);
 }
 
-int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+	void *data, int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *sb;
 
 	sb = sget(fs_type, ns_test_super, ns_set_super, data);
 	if (IS_ERR(sb))
-		return PTR_ERR(sb);
+		return ERR_CAST(sb);
 
 	if (!sb->s_root) {
 		int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
 		err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (err) {
 			deactivate_locked_super(sb);
-			return err;
+			return ERR_PTR(err);
 		}
 
 		sb->s_flags |= MS_ACTIVE;
 	}
 
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 }
 
-EXPORT_SYMBOL(get_sb_ns);
+EXPORT_SYMBOL(mount_ns);
 
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43e6cfb5cbb3..4d07902bc50c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1789,9 +1789,8 @@ struct file_system_type {
 	struct lock_class_key i_alloc_sem_key;
 };
 
-extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt);
+extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+	void *data, int (*fill_super)(struct super_block *, void *, int));
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3a61ffefe884..035f4399edbc 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -211,13 +211,13 @@ out:
 	return error;
 }
 
-static int mqueue_get_sb(struct file_system_type *fs_type,
+static struct dentry *mqueue_mount(struct file_system_type *fs_type,
 			 int flags, const char *dev_name,
-			 void *data, struct vfsmount *mnt)
+			 void *data)
 {
 	if (!(flags & MS_KERNMOUNT))
 		data = current->nsproxy->ipc_ns;
-	return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt);
+	return mount_ns(fs_type, flags, data, mqueue_fill_super);
 }
 
 static void init_once(void *foo)
@@ -1232,7 +1232,7 @@ static const struct super_operations mqueue_super_ops = {
 
 static struct file_system_type mqueue_fs_type = {
 	.name = "mqueue",
-	.get_sb = mqueue_get_sb,
+	.mount = mqueue_mount,
 	.kill_sb = kill_litter_super,
 };
 
-- 
cgit v1.2.3


From 3259f8bed2f0f57c2fdcdac1b510c3fa319ef97e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 29 Oct 2010 11:16:17 -0400
Subject: Add new functions for triggering inode writeback

When btrfs is running low on metadata space, it needs to force delayed
allocation pages to disk.  It currently does this with a suboptimal walk
of a private list of inodes with delayed allocation, and it would be
much better if we used the generic flusher threads.

writeback_inodes_sb_if_idle would be ideal, but it waits for the flusher
thread to start IO on all the dirty pages in the FS before it returns.
This adds variants of writeback_inodes_sb* that allow the caller to
control how many pages get sent down.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/fs-writeback.c         | 52 ++++++++++++++++++++++++++++++++++++++---------
 include/linux/writeback.h |  2 ++
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef1c9a1..1e23c33ea5cf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1069,33 +1069,44 @@ static void wait_sb_inodes(struct super_block *sb)
 }
 
 /**
- * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
  * @sb: the superblock
+ * @nr: the number of pages to write
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 {
-	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.done		= &done,
+		.nr_pages	= nr,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
-	work.nr_pages = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
 }
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+	return writeback_inodes_sb_nr(sb, global_page_state(NR_FILE_DIRTY) +
+			      global_page_state(NR_UNSTABLE_NFS) +
+			      (inodes_stat.nr_inodes - inodes_stat.nr_unused));
+}
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
@@ -1117,6 +1128,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+				   unsigned long nr)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		down_read(&sb->s_umount);
+		writeback_inodes_sb_nr(sb, nr);
+		up_read(&sb->s_umount);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 72a5d647a5f2..a4cf84511e79 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -60,7 +60,9 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
 int writeback_inodes_sb_if_idle(struct super_block *);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wb(struct bdi_writeback *wb,
 		struct writeback_control *wbc);
-- 
cgit v1.2.3


From 435f49a518c78eec8e2edbbadd912737246cbe20 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 29 Oct 2010 10:36:49 -0700
Subject: readv/writev: do the same MAX_RW_COUNT truncation that read/write
 does

We used to protect against overflow, but rather than return an error, do
what read/write does, namely to limit the total size to MAX_RW_COUNT.
This is not only more consistent, but it also means that any broken
low-level read/write routine that still keeps counts in 'int' can't
break.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c        | 12 +++++------
 fs/read_write.c    | 62 +++++++++++++++++++++++++++++-------------------------
 include/linux/fs.h |  1 +
 3 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/compat.c b/fs/compat.c
index 52cfeb61da77..ff66c0d7583d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -606,14 +606,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
 	/*
 	 * Single unix specification:
 	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.  The total length is fitting an ssize_t
+	 * ssize_t.
 	 *
-	 * Be careful here because iov_len is a size_t not an ssize_t
+	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
+	 * no overflow possibility.
 	 */
 	tot_len = 0;
 	ret = -EINVAL;
 	for (seg = 0; seg < nr_segs; seg++) {
-		compat_ssize_t tmp = tot_len;
 		compat_uptr_t buf;
 		compat_ssize_t len;
 
@@ -624,13 +624,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
 		}
 		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
 			goto out;
-		tot_len += len;
-		if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
-			goto out;
 		if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
 			ret = -EFAULT;
 			goto out;
 		}
+		if (len > MAX_RW_COUNT - tot_len)
+			len = MAX_RW_COUNT - tot_len;
+		tot_len += len;
 		iov->iov_base = compat_ptr(buf);
 		iov->iov_len = (compat_size_t) len;
 		uvector++;
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d148105d..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -243,8 +243,6 @@ bad:
  * them to something that fits in "int" so that others
  * won't have to do range checks all the time.
  */
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 {
 	struct inode *inode;
@@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
 			      struct iovec **ret_pointer)
-  {
+{
 	unsigned long seg;
-  	ssize_t ret;
+	ssize_t ret;
 	struct iovec *iov = fast_pointer;
 
-  	/*
-  	 * SuS says "The readv() function *may* fail if the iovcnt argument
-  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-  	 * traditionally returned zero for zero segments, so...
-  	 */
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
 	if (nr_segs == 0) {
 		ret = 0;
-  		goto out;
+		goto out;
 	}
 
-  	/*
-  	 * First get the "struct iovec" from user memory and
-  	 * verify all the pointers
-  	 */
+	/*
+	 * First get the "struct iovec" from user memory and
+	 * verify all the pointers
+	 */
 	if (nr_segs > UIO_MAXIOV) {
 		ret = -EINVAL;
-  		goto out;
+		goto out;
 	}
 	if (nr_segs > fast_segs) {
-  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 		if (iov == NULL) {
 			ret = -ENOMEM;
-  			goto out;
+			goto out;
 		}
-  	}
+	}
 	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 		ret = -EFAULT;
-  		goto out;
+		goto out;
 	}
 
-  	/*
+	/*
 	 * According to the Single Unix Specification we should return EINVAL
 	 * if an element length is < 0 when cast to ssize_t or if the
 	 * total length would overflow the ssize_t return value of the
 	 * system call.
-  	 */
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
 	ret = 0;
-  	for (seg = 0; seg < nr_segs; seg++) {
-  		void __user *buf = iov[seg].iov_base;
-  		ssize_t len = (ssize_t)iov[seg].iov_len;
+	for (seg = 0; seg < nr_segs; seg++) {
+		void __user *buf = iov[seg].iov_base;
+		ssize_t len = (ssize_t)iov[seg].iov_len;
 
 		/* see if we we're about to use an invalid len or if
 		 * it's about to overflow ssize_t */
-		if (len < 0 || (ret + len < ret)) {
+		if (len < 0) {
 			ret = -EINVAL;
-  			goto out;
+			goto out;
 		}
 		if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 			ret = -EFAULT;
-  			goto out;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - ret) {
+			len = MAX_RW_COUNT - ret;
+			iov[seg].iov_len = len;
 		}
-
 		ret += len;
-  	}
+	}
 out:
 	*ret_pointer = iov;
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4d07902bc50c..7b7b507ffa1c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1867,6 +1867,7 @@ extern int current_umask(void);
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
+#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
 extern int rw_verify_area(int, struct file *, loff_t *, size_t);
 
 #define FLOCK_VERIFY_READ  1
-- 
cgit v1.2.3


From d7ba979d45272385ce0fdf141d922e61ff48e07b Mon Sep 17 00:00:00 2001
From: Dongdong Deng <dongdong.deng@windriver.com>
Date: Wed, 18 Aug 2010 06:02:00 -0500
Subject: debug_core,x86,blackfin: Clean up hw debug disable API

The kgdb_disable_hw_debug() was an architecture specific function for
disabling all hardware breakpoints on a per cpu basis when entering
the debug core.

This patch will remove the weak function kdbg_disable_hw_debug() and
change it into a call back which lives with the rest of hw breakpoint
call backs in struct kgdb_arch.

Signed-off-by: Dongdong Deng <dongdong.deng@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/blackfin/kernel/kgdb.c |  3 ++-
 arch/x86/kernel/kgdb.c      |  3 ++-
 include/linux/kgdb.h        | 13 +++----------
 kernel/debug/debug_core.c   | 16 +++-------------
 4 files changed, 10 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index 08bc44ea6883..edae461b1c54 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -320,7 +320,7 @@ static void bfin_correct_hw_break(void)
 	}
 }
 
-void kgdb_disable_hw_debug(struct pt_regs *regs)
+static void bfin_disable_hw_debug(struct pt_regs *regs)
 {
 	/* Disable hardware debugging while we are in kgdb */
 	bfin_write_WPIACTL(0);
@@ -406,6 +406,7 @@ struct kgdb_arch arch_kgdb_ops = {
 #endif
 	.set_hw_breakpoint = bfin_set_hw_break,
 	.remove_hw_breakpoint = bfin_remove_hw_break,
+	.disable_hw_break = bfin_disable_hw_debug,
 	.remove_all_hw_break = bfin_remove_all_hw_break,
 	.correct_hw_break = bfin_correct_hw_break,
 };
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index d81cfebb848f..ec592caac4b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
  *	disable hardware debugging while it is processing gdb packets or
  *	handling exception.
  */
-void kgdb_disable_hw_debug(struct pt_regs *regs)
+static void kgdb_disable_hw_debug(struct pt_regs *regs)
 {
 	int i;
 	int cpu = raw_smp_processor_id();
@@ -724,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = {
 	.flags			= KGDB_HW_BREAKPOINT,
 	.set_hw_breakpoint	= kgdb_set_hw_break,
 	.remove_hw_breakpoint	= kgdb_remove_hw_break,
+	.disable_hw_break	= kgdb_disable_hw_debug,
 	.remove_all_hw_break	= kgdb_remove_all_hw_break,
 	.correct_hw_break	= kgdb_correct_hw_break,
 };
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index cc96f0f23e04..092e4250a458 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -35,16 +35,6 @@ struct pt_regs;
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
-/**
- *	kgdb_disable_hw_debug - (optional) Disable hardware debugging hook
- *	@regs: Current &struct pt_regs.
- *
- *	This function will be called if the particular architecture must
- *	disable hardware debugging while it is processing gdb packets or
- *	handling exception.
- */
-extern void kgdb_disable_hw_debug(struct pt_regs *regs);
-
 struct tasklet_struct;
 struct task_struct;
 struct uart_port;
@@ -243,6 +233,8 @@ extern void kgdb_arch_late(void);
  * breakpoint.
  * @remove_hw_breakpoint: Allow an architecture to specify how to remove a
  * hardware breakpoint.
+ * @disable_hw_break: Allow an architecture to specify how to disable
+ * hardware breakpoints for a single cpu.
  * @remove_all_hw_break: Allow an architecture to specify how to remove all
  * hardware breakpoints.
  * @correct_hw_break: Allow an architecture to specify how to correct the
@@ -256,6 +248,7 @@ struct kgdb_arch {
 	int	(*remove_breakpoint)(unsigned long, char *);
 	int	(*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
 	int	(*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
+	void	(*disable_hw_break)(struct pt_regs *regs);
 	void	(*remove_all_hw_break)(void);
 	void	(*correct_hw_break)(void);
 };
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index fec596da9bd0..cefd4a11f6d9 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
 	return 0;
 }
 
-/**
- *	kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
- *	@regs: Current &struct pt_regs.
- *
- *	This function will be called if the particular architecture must
- *	disable hardware debugging while it is processing gdb packets or
- *	handling exception.
- */
-void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
-{
-}
-
 /*
  * Some architectures need cache flushes when we set/clear a
  * breakpoint:
@@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
 		atomic_inc(&masters_in_kgdb);
 	else
 		atomic_inc(&slaves_in_kgdb);
-	kgdb_disable_hw_debug(ks->linux_regs);
+
+	if (arch_kgdb_ops.disable_hw_break)
+		arch_kgdb_ops.disable_hw_break(regs);
 
 acquirelock:
 	/*
-- 
cgit v1.2.3


From 45f81b1c96d9793e47ce925d257ea693ce0b193e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Oct 2010 12:33:43 -0400
Subject: jump label: Add work around to i386 gcc asm goto bug

On i386 (not x86_64) early implementations of gcc would have a bug
with asm goto causing it to produce code like the following:

(This was noticed by Peter Zijlstra)

   56 pushl 0
   67 nopl         jmp 0x6f
      popl
      jmp 0x8c

   6f              mov
                   test
                   je 0x8c

   8c mov
      call *(%esp)

The jump added in the asm goto skipped over the popl that matched
the pushl 0, which lead up to a quick crash of the system when
the jump was enabled. The nopl is defined in the asm goto () statement
and when tracepoints are enabled, the nop changes to a jump to the label
that was specified by the asm goto. asm goto is suppose to tell gcc that
the code in the asm might jump to an external label. Here gcc obviously
fails to make that work.

The bug report for gcc is here:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226

The bug only appears on x86 when not compiled with
-maccumulate-outgoing-args. This option is always set on x86_64 and it
is also the work around for a function graph tracer i386 bug.
(See commit: 746357d6a526d6da9d89a2ec645b28406e959c2e)
This explains why the bug only showed up on i386 when function graph
tracer was not enabled.

This patch now adds a CONFIG_JUMP_LABEL option that is default
off instead of using jump labels by default. When jump labels are
enabled, the -maccumulate-outgoing-args will be used (causing a
slightly larger kernel image on i386). This option will exist
until we have a way to detect if the gcc compiler in use is safe
to use on all configurations without the work around.

Note, there exists such a test, but for now we will keep the enabling
of jump label as a manual option.

Archs that know the compiler is safe with asm goto, may choose to
select JUMP_LABEL and enable it by default.

Reported-by: Ingo Molnar <mingo@elte.hu>
Cause-discovered-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Baron <jbaron@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Miller <davem@davemloft.net>
Cc: Richard Henderson <rth@redhat.com>
LKML-Reference: <1288028746.3673.11.camel@laptop>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/Kconfig               | 14 ++++++++++++++
 arch/x86/Makefile_32.cpu   | 13 ++++++++++++-
 include/linux/jump_label.h |  2 +-
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 53d7f619a1b9..8bf0fa652eb6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -42,6 +42,20 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config JUMP_LABEL
+       bool "Optimize trace point call sites"
+       depends on HAVE_ARCH_JUMP_LABEL
+       help
+         If it is detected that the compiler has support for "asm goto",
+	 the kernel will compile trace point locations with just a
+	 nop instruction. When trace points are enabled, the nop will
+	 be converted to a jump to the trace function. This technique
+	 lowers overhead and stress on the branch prediction of the
+	 processor.
+
+	 On i386, options added to the compiler flags may increase
+	 the size of the kernel slightly.
+
 config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1255d953c65d..f2ee1abb1df9 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) 	+= $(call tune,generic,$(call tune,i686))
 # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
 # tracer assumptions. For i686, generic, core2 this is set by the
 # compiler anyway
-cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
+ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+# Work around to a bug with asm goto with first implementations of it
+# in gcc causing gcc to mess up the push and pop of the stack in some
+# uses of asm goto.
+ifeq ($(CONFIG_JUMP_LABEL), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
 
 # Bug fix for binutils: this option is required in order to keep
 # binutils from generating NOPL instructions against our will.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 1947a1212678..7880f18e4b86 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
-- 
cgit v1.2.3


From 337ac9d5218cc19f40fca13fa4deb3c658c4241b Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Fri, 29 Oct 2010 13:50:25 -0700
Subject: phy/marvell: rename 88ec048 to 88e1318s and fix mscr1 addr

The marvell 88ec048's official part number is 88e1318s.  This patch renames
definitions in the driver to reflect this.

In addition, a minor bug fix has been added to write back the MSCR1 register
value properly.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 18 +++++++++---------
 include/linux/marvell_phy.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e2afdce0a437..f0bd1a1aba3a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -74,8 +74,8 @@
 #define MII_88E1121_PHY_MSCR_TX_DELAY	BIT(4)
 #define MII_88E1121_PHY_MSCR_DELAY_MASK	(~(0x3 << 4))
 
-#define MII_88EC048_PHY_MSCR1_REG	16
-#define MII_88EC048_PHY_MSCR1_PAD_ODD	BIT(6)
+#define MII_88E1318S_PHY_MSCR1_REG	16
+#define MII_88E1318S_PHY_MSCR1_PAD_ODD	BIT(6)
 
 #define MII_88E1121_PHY_LED_CTRL	16
 #define MII_88E1121_PHY_LED_PAGE	3
@@ -240,7 +240,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	return err;
 }
 
-static int m88ec048_config_aneg(struct phy_device *phydev)
+static int m88e1318_config_aneg(struct phy_device *phydev)
 {
 	int err, oldpage, mscr;
 
@@ -251,10 +251,10 @@ static int m88ec048_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	mscr = phy_read(phydev, MII_88EC048_PHY_MSCR1_REG);
-	mscr |= MII_88EC048_PHY_MSCR1_PAD_ODD;
+	mscr = phy_read(phydev, MII_88E1318S_PHY_MSCR1_REG);
+	mscr |= MII_88E1318S_PHY_MSCR1_PAD_ODD;
 
-	err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr);
+	err = phy_write(phydev, MII_88E1318S_PHY_MSCR1_REG, mscr);
 	if (err < 0)
 		return err;
 
@@ -659,12 +659,12 @@ static struct phy_driver marvell_drivers[] = {
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
-		.phy_id = MARVELL_PHY_ID_88EC048,
+		.phy_id = MARVELL_PHY_ID_88E1318S,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
-		.name = "Marvell 88EC048",
+		.name = "Marvell 88E1318S",
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
-		.config_aneg = &m88ec048_config_aneg,
+		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index d0f08018335d..1ff81b51b656 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -12,7 +12,7 @@
 #define MARVELL_PHY_ID_88E1121R		0x01410cb0
 #define MARVELL_PHY_ID_88E1145		0x01410cd0
 #define MARVELL_PHY_ID_88E1240		0x01410e30
-#define MARVELL_PHY_ID_88EC048		0x01410e90
+#define MARVELL_PHY_ID_88E1318S		0x01410e90
 
 /* struct phy_device dev_flags definitions */
 #define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
-- 
cgit v1.2.3


From 4882720b267b7b1d1b0ce08334b205f0329d4615 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Sep 2010 14:34:01 +0000
Subject: semaphore: Remove mutex emulation

Semaphores used as mutexes have been deprecated for years. Now that
all users are either converted to real semaphores or to mutexes remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
LKML-Reference: <20100907125057.562399240@linutronix.de>
---
 include/linux/semaphore.h | 6 ------
 scripts/checkpatch.pl     | 8 ++------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 5310d27abd2a..39fa04966aa8 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -29,9 +29,6 @@ struct semaphore {
 #define DEFINE_SEMAPHORE(name)	\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
 
-#define DECLARE_MUTEX(name)	\
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
-
 static inline void sema_init(struct semaphore *sem, int val)
 {
 	static struct lock_class_key __key;
@@ -39,9 +36,6 @@ static inline void sema_init(struct semaphore *sem, int val)
 	lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
 }
 
-#define init_MUTEX(sem)		sema_init(sem, 1)
-#define init_MUTEX_LOCKED(sem)	sema_init(sem, 0)
-
 extern void down(struct semaphore *sem);
 extern int __must_check down_interruptible(struct semaphore *sem);
 extern int __must_check down_killable(struct semaphore *sem);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 90b54d4697fd..e3c7fc0dca38 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2794,12 +2794,8 @@ sub process {
 			WARN("__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr);
 		}
 
-# check for semaphores used as mutexes
-		if ($line =~ /^.\s*(DECLARE_MUTEX|init_MUTEX)\s*\(/) {
-			WARN("mutexes are preferred for single holder semaphores\n" . $herecurr);
-		}
-# check for semaphores used as mutexes
-		if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) {
+# check for semaphores initialized locked
+		if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
 			WARN("consider using a completion\n" . $herecurr);
 
 		}
-- 
cgit v1.2.3


From 3c80fe4ac9cfb13b1bfa4edf1544e8b656716694 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Dec 2009 14:19:31 +0000
Subject: audit: Call tty_audit_push_task() outside preempt disabled

While auditing all tasklist_lock read_lock sites I stumbled over the
following call chain:

audit_prepare_user_tty()
  read_lock(&tasklist_lock);
  tty_audit_push_task();
     mutex_lock(&buf->mutex);

     --> buf->mutex is locked with preemption disabled.

Solve this by acquiring a reference to the task struct under
rcu_read_lock and call tty_audit_push_task outside of the preempt
disabled region.

Move all code which needs to be protected by sighand lock into
tty_audit_push_task() and use lock/unlock_sighand as we do not hold
tasklist_lock.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/tty_audit.c | 38 ++++++++++++++++++++++++++++----------
 include/linux/tty.h      |  9 +++++----
 kernel/audit.c           | 25 +++++++++----------------
 3 files changed, 42 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 1b8ee590b4ca..f64582b0f623 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -188,25 +188,43 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch)
 }
 
 /**
- *	tty_audit_push_task	-	Flush task's pending audit data
+ * tty_audit_push_task	-	Flush task's pending audit data
+ * @tsk:		task pointer
+ * @loginuid:		sender login uid
+ * @sessionid:		sender session id
+ *
+ * Called with a ref on @tsk held. Try to lock sighand and get a
+ * reference to the tty audit buffer if available.
+ * Flush the buffer or return an appropriate error code.
  */
-void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid)
+int tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid)
 {
-	struct tty_audit_buf *buf;
+	struct tty_audit_buf *buf = ERR_PTR(-EPERM);
+	unsigned long flags;
 
-	spin_lock_irq(&tsk->sighand->siglock);
-	buf = tsk->signal->tty_audit_buf;
-	if (buf)
-		atomic_inc(&buf->count);
-	spin_unlock_irq(&tsk->sighand->siglock);
-	if (!buf)
-		return;
+	if (!lock_task_sighand(tsk, &flags))
+		return -ESRCH;
+
+	if (tsk->signal->audit_tty) {
+		buf = tsk->signal->tty_audit_buf;
+		if (buf)
+			atomic_inc(&buf->count);
+	}
+	unlock_task_sighand(tsk, &flags);
+
+	/*
+	 * Return 0 when signal->audit_tty set
+	 * but tsk->signal->tty_audit_buf == NULL.
+	 */
+	if (!buf || IS_ERR(buf))
+		return PTR_ERR(buf);
 
 	mutex_lock(&buf->mutex);
 	tty_audit_buf_push(tsk, loginuid, sessionid, buf);
 	mutex_unlock(&buf->mutex);
 
 	tty_audit_buf_put(buf);
+	return 0;
 }
 
 /**
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e500171c745f..2a754748dd5f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -541,8 +541,8 @@ extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
 extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
 extern void tty_audit_push(struct tty_struct *tty);
-extern void tty_audit_push_task(struct task_struct *tsk,
-					uid_t loginuid, u32 sessionid);
+extern int tty_audit_push_task(struct task_struct *tsk,
+			       uid_t loginuid, u32 sessionid);
 #else
 static inline void tty_audit_add_data(struct tty_struct *tty,
 				      unsigned char *data, size_t size)
@@ -560,9 +560,10 @@ static inline void tty_audit_fork(struct signal_struct *sig)
 static inline void tty_audit_push(struct tty_struct *tty)
 {
 }
-static inline void tty_audit_push_task(struct task_struct *tsk,
-					uid_t loginuid, u32 sessionid)
+static inline int tty_audit_push_task(struct task_struct *tsk,
+				      uid_t loginuid, u32 sessionid)
 {
+	return 0;
 }
 #endif
 
diff --git a/kernel/audit.c b/kernel/audit.c
index a300931fc45f..8429afea37bf 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid)
 	struct task_struct *tsk;
 	int err;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	tsk = find_task_by_vpid(pid);
-	err = -ESRCH;
-	if (!tsk)
-		goto out;
-	err = 0;
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	if (!tsk->signal->audit_tty)
-		err = -EPERM;
-	spin_unlock_irq(&tsk->sighand->siglock);
-	if (err)
-		goto out;
-
-	tty_audit_push_task(tsk, loginuid, sessionid);
-out:
-	read_unlock(&tasklist_lock);
+	if (!tsk) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+	get_task_struct(tsk);
+	rcu_read_unlock();
+	err = tty_audit_push_task(tsk, loginuid, sessionid);
+	put_task_struct(tsk);
 	return err;
 }
 
-- 
cgit v1.2.3


From af2951325bd6c26cb2c91943c7b11aed53504056 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 11 Nov 2009 10:09:41 -0500
Subject: audit: make link()/linkat() match "attribute change" predicate

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/audit_change_attr.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h
index 50764550a60c..bcbab3e4a3be 100644
--- a/include/asm-generic/audit_change_attr.h
+++ b/include/asm-generic/audit_change_attr.h
@@ -20,3 +20,7 @@ __NR_chown32,
 __NR_fchown32,
 __NR_lchown32,
 #endif
+__NR_link,
+#ifdef __NR_linkat
+__NR_linkat,
+#endif
-- 
cgit v1.2.3


From 120a795da07c9a02221ca23464c28a7c6ad7de1d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Oct 2010 02:54:44 -0400
Subject: audit mmap

Normal syscall audit doesn't catch 5th argument of syscall.  It also
doesn't catch the contents of userland structures pointed to be
syscall argument, so for both old and new mmap(2) ABI it doesn't
record the descriptor we are mapping.  For old one it also misses
flags.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 +++++++++
 kernel/auditsc.c      | 16 ++++++++++++++++
 mm/mmap.c             |  2 ++
 mm/nommu.c            |  2 ++
 4 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index e24afabc548f..8b5c0620abf9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -102,6 +102,7 @@
 #define AUDIT_EOE		1320	/* End of multi-record event */
 #define AUDIT_BPRM_FCAPS	1321	/* Information about fcaps increasing perms */
 #define AUDIT_CAPSET		1322	/* Record showing argument to sys_capset */
+#define AUDIT_MMAP		1323	/* Record showing descriptor and flags in mmap */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -478,6 +479,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
 				  const struct cred *old);
 extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
+extern void __audit_mmap_fd(int fd, int flags);
 
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -531,6 +533,12 @@ static inline void audit_log_capset(pid_t pid, const struct cred *new,
 		__audit_log_capset(pid, new, old);
 }
 
+static inline void audit_mmap_fd(int fd, int flags)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_mmap_fd(fd, flags);
+}
+
 extern int audit_n_rules;
 extern int audit_signals;
 #else
@@ -564,6 +572,7 @@ extern int audit_signals;
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
 #define audit_log_capset(pid, ncr, ocr) ((void)0)
+#define audit_mmap_fd(fd, flags) ((void)0)
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1b31c130d034..f49a0318c2ed 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -241,6 +241,10 @@ struct audit_context {
 			pid_t			pid;
 			struct audit_cap_data	cap;
 		} capset;
+		struct {
+			int			fd;
+			int			flags;
+		} mmap;
 	};
 	int fds[2];
 
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic)
 		audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted);
 		audit_log_cap(ab, "cap_pe", &context->capset.cap.effective);
 		break; }
+	case AUDIT_MMAP: {
+		audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
+				 context->mmap.flags);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid,
 	context->type = AUDIT_CAPSET;
 }
 
+void __audit_mmap_fd(int fd, int flags)
+{
+	struct audit_context *context = current->audit_context;
+	context->mmap.fd = fd;
+	context->mmap.flags = flags;
+	context->type = AUDIT_MMAP;
+}
+
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
diff --git a/mm/mmap.c b/mm/mmap.c
index 00161a48a451..b179abb1474a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,6 +28,7 @@
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
+#include <linux/audit.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1108,6 +1109,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 	unsigned long retval = -EBADF;
 
 	if (!(flags & MAP_ANONYMOUS)) {
+		audit_mmap_fd(fd, flags);
 		if (unlikely(flags & MAP_HUGETLB))
 			return -EINVAL;
 		file = fget(fd);
diff --git a/mm/nommu.c b/mm/nommu.c
index 30b5c20eec15..3613517c7592 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -29,6 +29,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/audit.h>
 
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
@@ -1458,6 +1459,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 	struct file *file = NULL;
 	unsigned long retval = -EBADF;
 
+	audit_mmap_fd(fd, flags);
 	if (!(flags & MAP_ANONYMOUS)) {
 		file = fget(fd);
 		if (!file)
-- 
cgit v1.2.3


From 6bff7eccb0d9bdef4123aad5399e73cbc26683a6 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Fri, 29 Oct 2010 12:02:17 +0200
Subject: Ensure FMODE_NONOTIFY is not set by userspace

    In fsnotify_open() ensure that FMODE_NONOTIFY is never set by userspace.
    Also always call fsnotify_parent and fsnotify.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index ecb43b33d181..5c185fa27089 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -235,10 +235,11 @@ static inline void fsnotify_open(struct file *file)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
-	if (!(file->f_mode & FMODE_NONOTIFY)) {
-		fsnotify_parent(path, NULL, mask);
-		fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
-	}
+	/* FMODE_NONOTIFY must never be set from user */
+	file->f_mode &= ~FMODE_NONOTIFY;
+
+	fsnotify_parent(path, NULL, mask);
+	fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
 }
 
 /*
-- 
cgit v1.2.3


From 05fa3135fdc7b9b510b502a35b6b97d2b38c6f48 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sat, 30 Oct 2010 17:31:15 -0400
Subject: locks: fix setlease methods to free passed-in lock

We modified setlease to require the caller to allocate the new lease in
the case of creating a new lease, but forgot to fix up the filesystem
methods.

Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/cifsfs.c   | 5 ++++-
 fs/gfs2/file.c     | 2 ++
 fs/locks.c         | 3 ++-
 fs/nfs/file.c      | 3 ++-
 include/linux/fs.h | 1 +
 5 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 75c4eaa79588..54745b6c3db9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -625,8 +625,11 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 		   knows that the file won't be changed on the server
 		   by anyone else */
 		return generic_setlease(file, arg, lease);
-	else
+	else {
+		if (arg != F_UNLCK)
+			locks_free_lock(*lease);
 		return -EAGAIN;
+	}
 }
 
 struct file_system_type cifs_fs_type = {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa996471ec5c..ac943c1307b5 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -629,6 +629,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
 {
+	if (arg != F_UNLCK)
+		locks_free_lock(*fl);
 	return -EINVAL;
 }
 
diff --git a/fs/locks.c b/fs/locks.c
index 63fbc41cc573..5b526a977882 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -186,7 +186,7 @@ void locks_release_private(struct file_lock *fl)
 EXPORT_SYMBOL_GPL(locks_release_private);
 
 /* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
 {
 	BUG_ON(waitqueue_active(&fl->fl_wait));
 	BUG_ON(!list_empty(&fl->fl_block));
@@ -195,6 +195,7 @@ static void locks_free_lock(struct file_lock *fl)
 	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
+EXPORT_SYMBOL(locks_free_lock);
 
 void locks_init_lock(struct file_lock *fl)
 {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075637b0..1e524fb73ba5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -884,6 +884,7 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 	dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
 			file->f_path.dentry->d_parent->d_name.name,
 			file->f_path.dentry->d_name.name, arg);
-
+	if (arg != F_UNLCK)
+		locks_free_lock(*fl);
 	return -EINVAL;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b7b507ffa1c..1eb29399a4ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1129,6 +1129,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
 extern int fcntl_getlease(struct file *filp);
 
 /* fs/locks.c */
+void locks_free_lock(struct file_lock *fl);
 extern void locks_init_lock(struct file_lock *);
 extern struct file_lock * locks_alloc_lock(void);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
-- 
cgit v1.2.3


From bb8430a2c8fe2b726033017daadf73c69b0348ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 31 Oct 2010 08:35:31 -0400
Subject: locks: remove fl_copy_lock lock_manager operation

This one was only used for a nasty hack in nfsd, which has recently
been removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking | 2 --
 fs/locks.c                        | 5 +----
 include/linux/fs.h                | 1 -
 3 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8a817f656f0a..a91f30890011 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -322,7 +322,6 @@ fl_release_private:	yes	yes
 prototypes:
 	int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
 	void (*fl_notify)(struct file_lock *);  /* unblock callback */
-	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
 	void (*fl_release_private)(struct file_lock *);
 	void (*fl_break)(struct file_lock *); /* break_lease callback */
 
@@ -330,7 +329,6 @@ locking rules:
 			BKL	may block
 fl_compare_owner:	yes	no
 fl_notify:		yes	no
-fl_copy_lock:		yes	no
 fl_release_private:	yes	yes
 fl_break:		yes	no
 
diff --git a/fs/locks.c b/fs/locks.c
index a2ab790471b5..65765cb6afed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -235,11 +235,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 			fl->fl_ops->fl_copy_lock(new, fl);
 		new->fl_ops = fl->fl_ops;
 	}
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_copy_lock)
-			fl->fl_lmops->fl_copy_lock(new, fl);
+	if (fl->fl_lmops)
 		new->fl_lmops = fl->fl_lmops;
-	}
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1eb29399a4ff..334d68a17108 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1056,7 +1056,6 @@ struct lock_manager_operations {
 	int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
 	void (*fl_notify)(struct file_lock *);	/* unblock callback */
 	int (*fl_grant)(struct file_lock *, struct file_lock *, int);
-	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
 	void (*fl_release_private)(struct file_lock *);
 	void (*fl_break)(struct file_lock *);
 	int (*fl_mylease)(struct file_lock *, struct file_lock *);
-- 
cgit v1.2.3


From cbf4bd380a9caa72118525eabe7b82c6a3c8da78 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 31 Oct 2010 21:06:59 +0100
Subject: i2c: Drop unused I2C_CLASS_TV flags

There are no users left for I2C_CLASS_TV_ANALOG and
I2C_CLASS_TV_DIGITAL, so we can get rid of them.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/staging/tm6000/tm6000-i2c.c | 1 -
 include/linux/i2c.h                 | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/tm6000/tm6000-i2c.c b/drivers/staging/tm6000/tm6000-i2c.c
index 3e46866dd279..93f625fc852b 100644
--- a/drivers/staging/tm6000/tm6000-i2c.c
+++ b/drivers/staging/tm6000/tm6000-i2c.c
@@ -320,7 +320,6 @@ static struct i2c_algorithm tm6000_algo = {
 
 static struct i2c_adapter tm6000_adap_template = {
 	.owner = THIS_MODULE,
-	.class = I2C_CLASS_TV_ANALOG | I2C_CLASS_TV_DIGITAL,
 	.name = "tm6000",
 	.algo = &tm6000_algo,
 };
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 1f66fa06a97c..889b35abaeda 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -407,8 +407,6 @@ void i2c_unlock_adapter(struct i2c_adapter *);
 
 /* i2c adapter classes (bitmask) */
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
-#define I2C_CLASS_TV_ANALOG	(1<<1)	/* bttv + friends */
-#define I2C_CLASS_TV_DIGITAL	(1<<2)	/* dvb cards */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
 #define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
 
-- 
cgit v1.2.3


From e30d9859cf08920ae711f57ecd9726804451d29f Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Sun, 31 Oct 2010 21:06:59 +0100
Subject: i2c-i801: Add Intel Patsburg device ID

Add support for the Intel Patsburg PCH SMBus Controller.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/busses/i2c-i801 | 3 ++-
 drivers/i2c/busses/Kconfig        | 1 +
 drivers/i2c/busses/i2c-i801.c     | 2 ++
 include/linux/pci_ids.h           | 1 +
 4 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index e307914a3eda..a417cb13943d 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -15,8 +15,9 @@ Supported adapters:
   * Intel 82801I (ICH9)
   * Intel EP80579 (Tolapai)
   * Intel 82801JI (ICH10)
-  * Intel 3400/5 Series (PCH)
+  * Intel 5/3400 Series (PCH)
   * Intel Cougar Point (PCH)
+  * Intel Patsburg (PCH)
    Datasheets: Publicly available at the Intel website
 
 Authors: 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c950be3cce21..3a6321cb8030 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -99,6 +99,7 @@ config I2C_I801
 	    ICH10
 	    5/3400 Series (PCH)
 	    Cougar Point (PCH)
+	    Patsburg (PCH)
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 59d65981eed7..1fe30da653c3 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -43,6 +43,7 @@
   ICH10                 0x3a60     32     hard     yes     yes     yes
   5/3400 Series (PCH)   0x3b30     32     hard     yes     yes     yes
   Cougar Point (PCH)    0x1c22     32     hard     yes     yes     yes
+  Patsburg (PCH)        0x1d22     32     hard     yes     yes     yes
 
   Features supported by this driver:
   Software PEC                     no
@@ -592,6 +593,7 @@ static const struct pci_device_id i801_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS) },
 	{ 0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 87e2c2e7aed3..c6bcfe93b9ca 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2465,6 +2465,7 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS	0x1c22
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
+#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC	0x1d40
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
-- 
cgit v1.2.3